From d6da7f4d7be430557c7bc2397a490ef1307f8748 Mon Sep 17 00:00:00 2001 From: justin Date: Mon, 4 May 2026 11:02:49 -0500 Subject: [PATCH] Use Word VM PDF converter instead of raw LibreOffice in base handler _convert_to_pdf() now calls pdf_converter.convert_to_pdf() which tries the Windows Word VM via MinIO first (pixel-perfect), falling back to LibreOffice headless automatically when the VM is unavailable. Co-Authored-By: Claude Opus 4.6 (1M context) --- scripts/workers/services/base_handler.py | 30 ++++++------------------ 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/scripts/workers/services/base_handler.py b/scripts/workers/services/base_handler.py index 56389d8..a76f7b6 100644 --- a/scripts/workers/services/base_handler.py +++ b/scripts/workers/services/base_handler.py @@ -164,33 +164,17 @@ class BaseServiceHandler(ABC): # ------------------------------------------------------------------ # def _convert_to_pdf(self, docx_path: str) -> str: - """Convert a DOCX file to PDF using LibreOffice. + """Convert a DOCX file to PDF. + + Uses the Word VM via MinIO for pixel-perfect conversion when available, + falls back to LibreOffice headless automatically. Returns the path to the generated PDF file. """ - output_dir = str(Path(docx_path).parent) - result = subprocess.run( - [ - "libreoffice", - "--headless", - "--convert-to", - "pdf", - "--outdir", - output_dir, - docx_path, - ], - capture_output=True, - text=True, - timeout=120, - ) - if result.returncode != 0: - raise RuntimeError( - f"LibreOffice conversion failed: {result.stderr}" - ) + from scripts.document_gen.pdf_converter import convert_to_pdf - pdf_path = str(Path(docx_path).with_suffix(".pdf")) - if not Path(pdf_path).exists(): - raise FileNotFoundError(f"PDF not generated: {pdf_path}") + pdf_path_obj = convert_to_pdf(docx_path) + pdf_path = str(pdf_path_obj) logger.info("Converted %s → %s", docx_path, pdf_path) return pdf_path