new-site/scripts/document_gen/docx_builder.py
justin f8cd37ac8c Initial commit — Performance West telecom compliance platform
Includes: API (Express/TypeScript), Astro site, Python workers,
document generators, FCC compliance tools, Canada CRTC formation,
Ansible infrastructure, and deployment scripts.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-27 06:54:22 -05:00

222 lines
8.2 KiB
Python

"""
DOCX template builder using python-docx + Jinja2.
Templates use Jinja2 placeholders: {{ variable_name }}
Supports:
- Simple variable substitution
- Conditional sections ({% if ... %})
- Loops for member tables ({% for member in members %})
- Section insertion (replace a placeholder paragraph with multi-paragraph LLM output)
"""
from __future__ import annotations
import copy
import logging
import os
import re
from datetime import datetime
from pathlib import Path
from typing import Any
from docx import Document
from docx.shared import Inches, Pt, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
from jinja2 import Template
LOG = logging.getLogger("document_gen.docx")
TEMPLATES_DIR = Path(os.getenv("TEMPLATES_DIR", "/app/scripts/templates"))
class DocxBuilder:
"""Build DOCX documents from templates with variable substitution."""
def __init__(self, template_name: str):
"""Load a DOCX template by name (e.g., 'operating-agreement')."""
self.template_path = TEMPLATES_DIR / f"{template_name}.docx"
if not self.template_path.exists():
raise FileNotFoundError(f"Template not found: {self.template_path}")
self.doc = Document(str(self.template_path))
self.variables: dict[str, Any] = {}
def set_variables(self, variables: dict[str, Any]) -> "DocxBuilder":
"""Set template variables for substitution."""
self.variables = variables
return self
def fill(self) -> "DocxBuilder":
"""Fill all Jinja2 placeholders in the document."""
# Process paragraphs
for para in self.doc.paragraphs:
self._fill_paragraph(para)
# Process table cells
for table in self.doc.tables:
for row in table.rows:
for cell in row.cells:
for para in cell.paragraphs:
self._fill_paragraph(para)
# Process headers and footers
for section in self.doc.sections:
for header_para in section.header.paragraphs:
self._fill_paragraph(header_para)
for footer_para in section.footer.paragraphs:
self._fill_paragraph(footer_para)
return self
def _fill_paragraph(self, para):
"""Replace Jinja2 placeholders in a paragraph, preserving formatting."""
full_text = para.text
if "{{" not in full_text and "{%" not in full_text:
return
# Render the full paragraph text through Jinja2
try:
template = Template(full_text)
rendered = template.render(**self.variables)
except Exception as e:
LOG.warning("Template render error in paragraph: %s%s", full_text[:80], e)
return
if rendered == full_text:
return
# Clear all runs and set the rendered text in the first run
if para.runs:
# Preserve the formatting of the first run
first_run = para.runs[0]
first_run.text = rendered
for run in para.runs[1:]:
run.text = ""
else:
para.text = rendered
def insert_section(self, placeholder: str, content: str) -> "DocxBuilder":
"""Replace a placeholder paragraph with multi-paragraph content.
Used for LLM-generated sections — the placeholder (e.g., '{{findings_section}}')
is replaced with multiple paragraphs of formatted text.
"""
for i, para in enumerate(self.doc.paragraphs):
if placeholder in para.text:
# Split content into paragraphs
lines = content.strip().split("\n\n")
# Replace the placeholder paragraph with the first line
para.text = lines[0] if lines else ""
# Insert remaining lines as new paragraphs after the current one
for j, line in enumerate(lines[1:], 1):
new_para = copy.deepcopy(para)
new_para.text = line
para._element.addnext(new_para._element)
return self
LOG.warning("Placeholder not found: %s", placeholder)
return self
def add_cover_page(
self,
title: str,
subtitle: str = "",
client_name: str = "",
order_number: str = "",
date: str = "",
) -> "DocxBuilder":
"""Add a branded cover page at the beginning of the document."""
# Insert paragraphs at the top
first_para = self.doc.paragraphs[0] if self.doc.paragraphs else self.doc.add_paragraph()
# We'll prepend by inserting before the first paragraph
cover_elements = []
# Spacer
spacer = self.doc.add_paragraph()
spacer.space_after = Pt(72)
# Title
title_para = self.doc.add_paragraph()
title_run = title_para.add_run(title)
title_run.font.size = Pt(28)
title_run.font.color.rgb = RGBColor(0x2D, 0x4E, 0x78) # pw-700
title_run.font.bold = True
title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
# Subtitle
if subtitle:
sub_para = self.doc.add_paragraph()
sub_run = sub_para.add_run(subtitle)
sub_run.font.size = Pt(14)
sub_run.font.color.rgb = RGBColor(0x6B, 0x72, 0x80)
sub_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
# Client info
if client_name:
client_para = self.doc.add_paragraph()
client_para.space_before = Pt(36)
client_run = client_para.add_run(f"Prepared for: {client_name}")
client_run.font.size = Pt(12)
client_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
# Order number + date
meta_para = self.doc.add_paragraph()
meta_parts = []
if order_number:
meta_parts.append(f"Order: {order_number}")
meta_parts.append(f"Date: {date or datetime.now().strftime('%B %d, %Y')}")
meta_run = meta_para.add_run(" | ".join(meta_parts))
meta_run.font.size = Pt(10)
meta_run.font.color.rgb = RGBColor(0x9C, 0xA3, 0xAF)
meta_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
# Performance West branding
brand_para = self.doc.add_paragraph()
brand_para.space_before = Pt(48)
brand_run = brand_para.add_run("Performance West Inc.")
brand_run.font.size = Pt(10)
brand_run.font.color.rgb = RGBColor(0x2D, 0x4E, 0x78)
brand_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
addr_para = self.doc.add_paragraph()
addr_run = addr_para.add_run("525 Randall Ave Ste 100-1195, Cheyenne, WY 82001 | 1-888-411-0383")
addr_run.font.size = Pt(8)
addr_run.font.color.rgb = RGBColor(0x9C, 0xA3, 0xAF)
addr_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
# Page break after cover
self.doc.add_page_break()
# Move cover elements to the beginning
body = self.doc.element.body
# The paragraphs we just added are at the end — move them to the front
added = list(body)[-8:] # Last 8 elements we added (spacer, title, sub, client, meta, brand, addr, pagebreak)
for elem in reversed(added):
body.insert(0, elem)
return self
def add_disclaimer(self, text: str = "") -> "DocxBuilder":
"""Add a disclaimer paragraph at the end of the document."""
default = (
"DISCLAIMER: This document is prepared by Performance West Inc. for compliance consulting purposes only. "
"It does not constitute legal advice, legal representation, or create an attorney-client relationship. "
"For legal matters, consult a licensed attorney in your jurisdiction."
)
para = self.doc.add_paragraph()
para.space_before = Pt(24)
run = para.add_run(text or default)
run.font.size = Pt(8)
run.font.italic = True
run.font.color.rgb = RGBColor(0x9C, 0xA3, 0xAF)
return self
def save(self, output_path: str | Path) -> Path:
"""Save the filled document to a file."""
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
self.doc.save(str(output_path))
LOG.info("DOCX saved: %s", output_path)
return output_path