new-site/scripts/workers/convert_guides.py
justin f8cd37ac8c Initial commit — Performance West telecom compliance platform
Includes: API (Express/TypeScript), Astro site, Python workers,
document generators, FCC compliance tools, Canada CRTC formation,
Ansible infrastructure, and deployment scripts.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-27 06:54:22 -05:00

163 lines
5.6 KiB
Python

#!/usr/bin/env python3
"""Convert compliance guide markdown files to branded PDFs and upload to MinIO."""
import os
import re
import subprocess
import sys
sys.path.insert(0, "/app")
GUIDES = [
"scripts/document_gen/templates/guides/dno_list_enforcement.md",
"scripts/document_gen/templates/guides/kyc_procedures.md",
"scripts/document_gen/templates/guides/material_change_procedures.md",
"scripts/document_gen/templates/guides/traceback_response.md",
]
FOOTER = (
'<div style="margin-top:40px;padding-top:16px;border-top:1px solid #e2e8f0;'
'font-size:9pt;color:#94a3b8;text-align:center;">'
"Performance West Inc. &middot; performancewest.net &middot; 1-888-411-0383<br>"
"525 Randall Ave Ste 100-1195, Cheyenne, WY 82001</div>"
)
CSS = """
body { font-family: Arial, sans-serif; font-size: 11pt; line-height: 1.6; max-width: 700px; margin: 40px auto; color: #333; }
h1 { color: #1a2744; border-bottom: 3px solid #059669; padding-bottom: 8px; }
h2 { color: #1a2744; margin-top: 24px; }
h3 { color: #1e3a5f; }
table { border-collapse: collapse; width: 100%; margin: 12px 0; }
th { background: #1a2744; color: white; padding: 8px 12px; text-align: left; font-size: 10pt; }
td { padding: 8px 12px; border-bottom: 1px solid #e2e8f0; font-size: 10pt; }
code { background: #f1f5f9; padding: 2px 6px; border-radius: 3px; font-size: 10pt; }
pre { background: #f1f5f9; padding: 12px; border-radius: 6px; overflow-x: auto; font-size: 9pt; }
ul, ol { padding-left: 20px; }
li { margin-bottom: 4px; }
strong { color: #111; }
"""
def md_to_html(md_text: str) -> str:
html_parts = [f'<!DOCTYPE html><html><head><meta charset="utf-8"><style>{CSS}</style></head><body>']
in_code = False
in_table = False
in_list = False
for line in md_text.split("\n"):
if line.startswith("```"):
if in_code:
html_parts.append("</pre>")
in_code = False
else:
html_parts.append("<pre>")
in_code = True
continue
if in_code:
html_parts.append(line)
continue
# Close table if we left it
if in_table and not line.startswith("|"):
html_parts.append("</table>")
in_table = False
# Table separator
if line.startswith("|") and "---" in line:
continue
if line.startswith("| "):
cells = [c.strip() for c in line.split("|")[1:-1]]
if not in_table:
html_parts.append("<table><tr>" + "".join(f"<th>{c}</th>" for c in cells) + "</tr>")
in_table = True
else:
html_parts.append("<tr>" + "".join(f"<td>{c}</td>" for c in cells) + "</tr>")
continue
# Headings
if line.startswith("#### "):
html_parts.append(f"<h4>{line[5:]}</h4>")
continue
if line.startswith("### "):
html_parts.append(f"<h3>{line[4:]}</h3>")
continue
if line.startswith("## "):
html_parts.append(f"<h2>{line[3:]}</h2>")
continue
if line.startswith("# "):
html_parts.append(f"<h1>{line[2:]}</h1>")
continue
# Inline formatting
line = re.sub(r"\*\*(.+?)\*\*", r"<strong>\1</strong>", line)
line = re.sub(r"`(.+?)`", r"<code>\1</code>", line)
line = re.sub(r"\[(.+?)\]\((.+?)\)", r'<a href="\2">\1</a>', line)
if line.startswith("- "):
if not in_list:
html_parts.append("<ul>")
in_list = True
html_parts.append(f"<li>{line[2:]}</li>")
elif re.match(r"^\d+\. ", line):
if not in_list:
html_parts.append("<ol>")
in_list = True
html_parts.append(f"<li>{re.sub(r'^[0-9]+. ', '', line)}</li>")
else:
if in_list:
html_parts.append("</ul>" if "- " in html_parts[-5:] else "</ol>")
in_list = False
if line.strip() == "---":
html_parts.append("<hr>")
elif line.strip():
html_parts.append(f"<p>{line}</p>")
if in_table:
html_parts.append("</table>")
if in_list:
html_parts.append("</ul>")
html_parts.append(FOOTER)
html_parts.append("</body></html>")
return "\n".join(html_parts)
def main():
for md_path in GUIDES:
if not os.path.exists(md_path):
print(f"SKIP: {md_path} not found")
continue
with open(md_path) as f:
md_text = f.read()
title = md_text.split("\n")[0].replace("# ", "").strip()
html = md_to_html(md_text)
html_path = md_path.replace(".md", ".html")
with open(html_path, "w") as f:
f.write(html)
# Convert to PDF
pdf_dir = os.path.dirname(html_path)
r = subprocess.run(
["libreoffice", "--headless", "--convert-to", "pdf", "--outdir", pdf_dir, html_path],
capture_output=True, timeout=30,
)
pdf_path = html_path.replace(".html", ".pdf")
if os.path.exists(pdf_path):
try:
from scripts.document_gen.minio_client import MinioStorage
storage = MinioStorage()
remote = f"guides/{os.path.basename(pdf_path)}"
storage.upload(pdf_path, remote)
print(f"OK: {title} -> {remote}")
except Exception as e:
print(f"OK (local only): {title} -> {pdf_path} (MinIO: {e})")
else:
print(f"FAIL: {title} — PDF not generated. stderr: {r.stderr[:200]}")
if __name__ == "__main__":
main()