#!/usr/bin/env python3 """Convert compliance guide markdown files to branded PDFs and upload to MinIO.""" import os import re import subprocess import sys sys.path.insert(0, "/app") GUIDES = [ "scripts/document_gen/templates/guides/dno_list_enforcement.md", "scripts/document_gen/templates/guides/kyc_procedures.md", "scripts/document_gen/templates/guides/material_change_procedures.md", "scripts/document_gen/templates/guides/traceback_response.md", ] FOOTER = ( '
' "Performance West Inc. · performancewest.net · 1-888-411-0383
" "525 Randall Ave Ste 100-1195, Cheyenne, WY 82001
" ) CSS = """ body { font-family: Arial, sans-serif; font-size: 11pt; line-height: 1.6; max-width: 700px; margin: 40px auto; color: #333; } h1 { color: #1a2744; border-bottom: 3px solid #059669; padding-bottom: 8px; } h2 { color: #1a2744; margin-top: 24px; } h3 { color: #1e3a5f; } table { border-collapse: collapse; width: 100%; margin: 12px 0; } th { background: #1a2744; color: white; padding: 8px 12px; text-align: left; font-size: 10pt; } td { padding: 8px 12px; border-bottom: 1px solid #e2e8f0; font-size: 10pt; } code { background: #f1f5f9; padding: 2px 6px; border-radius: 3px; font-size: 10pt; } pre { background: #f1f5f9; padding: 12px; border-radius: 6px; overflow-x: auto; font-size: 9pt; } ul, ol { padding-left: 20px; } li { margin-bottom: 4px; } strong { color: #111; } """ def md_to_html(md_text: str) -> str: html_parts = [f''] in_code = False in_table = False in_list = False for line in md_text.split("\n"): if line.startswith("```"): if in_code: html_parts.append("") in_code = False else: html_parts.append("
")
                in_code = True
            continue
        if in_code:
            html_parts.append(line)
            continue

        # Close table if we left it
        if in_table and not line.startswith("|"):
            html_parts.append("")
            in_table = False

        # Table separator
        if line.startswith("|") and "---" in line:
            continue

        if line.startswith("| "):
            cells = [c.strip() for c in line.split("|")[1:-1]]
            if not in_table:
                html_parts.append("" + "".join(f"" for c in cells) + "")
                in_table = True
            else:
                html_parts.append("" + "".join(f"" for c in cells) + "")
            continue

        # Headings
        if line.startswith("#### "):
            html_parts.append(f"

{line[5:]}

") continue if line.startswith("### "): html_parts.append(f"

{line[4:]}

") continue if line.startswith("## "): html_parts.append(f"

{line[3:]}

") continue if line.startswith("# "): html_parts.append(f"

{line[2:]}

") continue # Inline formatting line = re.sub(r"\*\*(.+?)\*\*", r"\1", line) line = re.sub(r"`(.+?)`", r"\1", line) line = re.sub(r"\[(.+?)\]\((.+?)\)", r'\1', line) if line.startswith("- "): if not in_list: html_parts.append("" if "- " in html_parts[-5:] else "") in_list = False if line.strip() == "---": html_parts.append("
") elif line.strip(): html_parts.append(f"

{line}

") if in_table: html_parts.append("
{c}
{c}
") if in_list: html_parts.append("") html_parts.append(FOOTER) html_parts.append("") return "\n".join(html_parts) def main(): for md_path in GUIDES: if not os.path.exists(md_path): print(f"SKIP: {md_path} not found") continue with open(md_path) as f: md_text = f.read() title = md_text.split("\n")[0].replace("# ", "").strip() html = md_to_html(md_text) html_path = md_path.replace(".md", ".html") with open(html_path, "w") as f: f.write(html) # Convert to PDF pdf_dir = os.path.dirname(html_path) r = subprocess.run( ["libreoffice", "--headless", "--convert-to", "pdf", "--outdir", pdf_dir, html_path], capture_output=True, timeout=30, ) pdf_path = html_path.replace(".html", ".pdf") if os.path.exists(pdf_path): try: from scripts.document_gen.minio_client import MinioStorage storage = MinioStorage() remote = f"guides/{os.path.basename(pdf_path)}" storage.upload(pdf_path, remote) print(f"OK: {title} -> {remote}") except Exception as e: print(f"OK (local only): {title} -> {pdf_path} (MinIO: {e})") else: print(f"FAIL: {title} — PDF not generated. stderr: {r.stderr[:200]}") if __name__ == "__main__": main()