Use Word VM PDF converter instead of raw LibreOffice in base handler

_convert_to_pdf() now calls pdf_converter.convert_to_pdf() which tries
the Windows Word VM via MinIO first (pixel-perfect), falling back to
LibreOffice headless automatically when the VM is unavailable.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
justin 2026-05-04 11:02:49 -05:00
parent 9ca6094984
commit d6da7f4d7b

View file

@ -164,33 +164,17 @@ class BaseServiceHandler(ABC):
# ------------------------------------------------------------------ #
def _convert_to_pdf(self, docx_path: str) -> str:
"""Convert a DOCX file to PDF using LibreOffice.
"""Convert a DOCX file to PDF.
Uses the Word VM via MinIO for pixel-perfect conversion when available,
falls back to LibreOffice headless automatically.
Returns the path to the generated PDF file.
"""
output_dir = str(Path(docx_path).parent)
result = subprocess.run(
[
"libreoffice",
"--headless",
"--convert-to",
"pdf",
"--outdir",
output_dir,
docx_path,
],
capture_output=True,
text=True,
timeout=120,
)
if result.returncode != 0:
raise RuntimeError(
f"LibreOffice conversion failed: {result.stderr}"
)
from scripts.document_gen.pdf_converter import convert_to_pdf
pdf_path = str(Path(docx_path).with_suffix(".pdf"))
if not Path(pdf_path).exists():
raise FileNotFoundError(f"PDF not generated: {pdf_path}")
pdf_path_obj = convert_to_pdf(docx_path)
pdf_path = str(pdf_path_obj)
logger.info("Converted %s%s", docx_path, pdf_path)
return pdf_path