Includes: API (Express/TypeScript), Astro site, Python workers, document generators, FCC compliance tools, Canada CRTC formation, Ansible infrastructure, and deployment scripts. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
443 lines
17 KiB
Python
443 lines
17 KiB
Python
"""Abstract base class for all service handlers.
|
|
|
|
Every compliance service (FLSA audit, handbook review, etc.) inherits from
|
|
this class and implements the ``process()`` method.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import os
|
|
import subprocess
|
|
import tempfile
|
|
from abc import ABC, abstractmethod
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Directory containing DOCX templates
|
|
TEMPLATES_DIR = os.getenv(
|
|
"TEMPLATES_DIR",
|
|
str(Path(__file__).resolve().parent.parent.parent / "templates"),
|
|
)
|
|
|
|
# LLM configuration
|
|
LLM_API_URL = os.getenv("LLM_API_URL", "https://api.openai.com/v1/chat/completions")
|
|
LLM_API_KEY = os.getenv("LLM_API_KEY", "")
|
|
LLM_MODEL = os.getenv("LLM_MODEL", "gpt-4o")
|
|
LLM_TEMPERATURE = float(os.getenv("LLM_TEMPERATURE", "0.3"))
|
|
LLM_MAX_TOKENS = int(os.getenv("LLM_MAX_TOKENS", "4096"))
|
|
|
|
|
|
class BaseServiceHandler(ABC):
|
|
"""Base class for compliance service handlers."""
|
|
|
|
SERVICE_SLUG: str = ""
|
|
SERVICE_NAME: str = ""
|
|
TEMPLATE_NAME: str = "" # DOCX template filename in TEMPLATES_DIR
|
|
REQUIRES_LLM: bool = False
|
|
|
|
def __init__(self) -> None:
|
|
self.templates_dir = Path(TEMPLATES_DIR)
|
|
self._work_dir: str | None = None
|
|
|
|
# ------------------------------------------------------------------ #
|
|
# Abstract interface
|
|
# ------------------------------------------------------------------ #
|
|
|
|
@abstractmethod
|
|
async def process(self, order_data: dict) -> list[str]:
|
|
"""Process an order and return list of generated file paths.
|
|
|
|
Implementations should:
|
|
1. Load the DOCX template
|
|
2. (Optionally) call the LLM to generate section content
|
|
3. Fill the template with variables/content
|
|
4. Save as DOCX and convert to PDF
|
|
5. Return list of file paths
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
# ------------------------------------------------------------------ #
|
|
# Helpers
|
|
# ------------------------------------------------------------------ #
|
|
|
|
def _build_output_path(self, order_number: str, filename: str) -> str:
|
|
"""Build the MinIO object path for output files."""
|
|
return f"compliance/{order_number}/{filename}"
|
|
|
|
def _get_template_path(self, template_name: str | None = None) -> Path:
|
|
"""Resolve the full path to a DOCX template."""
|
|
name = template_name or self.TEMPLATE_NAME
|
|
path = self.templates_dir / name
|
|
if not path.exists():
|
|
raise FileNotFoundError(f"Template not found: {path}")
|
|
return path
|
|
|
|
def _make_work_dir(self) -> str:
|
|
"""Create a temporary working directory for generated files."""
|
|
if self._work_dir is None:
|
|
self._work_dir = tempfile.mkdtemp(prefix=f"pw_{self.SERVICE_SLUG}_")
|
|
return self._work_dir
|
|
|
|
def _output_filename(self, order_number: str, ext: str = "docx") -> str:
|
|
"""Generate a consistent output filename."""
|
|
date_str = datetime.now().strftime("%Y%m%d")
|
|
slug = self.SERVICE_SLUG.replace("-", "_")
|
|
return f"{slug}_{order_number}_{date_str}.{ext}"
|
|
|
|
# ------------------------------------------------------------------ #
|
|
# Template filling (python-docx)
|
|
# ------------------------------------------------------------------ #
|
|
|
|
def _fill_template(
|
|
self,
|
|
template_path: Path,
|
|
variables: dict[str, str],
|
|
output_path: str,
|
|
) -> str:
|
|
"""Open a DOCX template, replace {{variable}} placeholders, and save.
|
|
|
|
Supports replacement in paragraphs and table cells.
|
|
Returns the output path.
|
|
"""
|
|
from docx import Document
|
|
|
|
doc = Document(str(template_path))
|
|
|
|
def _replace_in_paragraph(paragraph: Any) -> None:
|
|
for key, value in variables.items():
|
|
placeholder = "{{" + key + "}}"
|
|
if placeholder in paragraph.text:
|
|
# Preserve formatting: replace in runs
|
|
for run in paragraph.runs:
|
|
if placeholder in run.text:
|
|
run.text = run.text.replace(placeholder, value)
|
|
|
|
for paragraph in doc.paragraphs:
|
|
_replace_in_paragraph(paragraph)
|
|
|
|
for table in doc.tables:
|
|
for row in table.rows:
|
|
for cell in row.cells:
|
|
for paragraph in cell.paragraphs:
|
|
_replace_in_paragraph(paragraph)
|
|
|
|
doc.save(output_path)
|
|
logger.info("Filled template → %s", output_path)
|
|
return output_path
|
|
|
|
def _add_sections_to_doc(
|
|
self,
|
|
doc_path: str,
|
|
sections: dict[str, str],
|
|
) -> str:
|
|
"""Append named sections (from LLM output) to an existing DOCX.
|
|
|
|
Each section gets a heading followed by the generated content.
|
|
Returns the (modified) doc_path.
|
|
"""
|
|
from docx import Document
|
|
from docx.shared import Pt
|
|
|
|
doc = Document(doc_path)
|
|
|
|
for section_name, content in sections.items():
|
|
heading = section_name.replace("_", " ").title()
|
|
doc.add_heading(heading, level=2)
|
|
|
|
for paragraph_text in content.split("\n\n"):
|
|
paragraph_text = paragraph_text.strip()
|
|
if not paragraph_text:
|
|
continue
|
|
p = doc.add_paragraph(paragraph_text)
|
|
for run in p.runs:
|
|
run.font.size = Pt(11)
|
|
|
|
doc.save(doc_path)
|
|
return doc_path
|
|
|
|
# ------------------------------------------------------------------ #
|
|
# PDF conversion (LibreOffice headless)
|
|
# ------------------------------------------------------------------ #
|
|
|
|
def _convert_to_pdf(self, docx_path: str) -> str:
|
|
"""Convert a DOCX file to PDF using LibreOffice.
|
|
|
|
Returns the path to the generated PDF file.
|
|
"""
|
|
output_dir = str(Path(docx_path).parent)
|
|
result = subprocess.run(
|
|
[
|
|
"libreoffice",
|
|
"--headless",
|
|
"--convert-to",
|
|
"pdf",
|
|
"--outdir",
|
|
output_dir,
|
|
docx_path,
|
|
],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=120,
|
|
)
|
|
if result.returncode != 0:
|
|
raise RuntimeError(
|
|
f"LibreOffice conversion failed: {result.stderr}"
|
|
)
|
|
|
|
pdf_path = str(Path(docx_path).with_suffix(".pdf"))
|
|
if not Path(pdf_path).exists():
|
|
raise FileNotFoundError(f"PDF not generated: {pdf_path}")
|
|
|
|
logger.info("Converted %s → %s", docx_path, pdf_path)
|
|
return pdf_path
|
|
|
|
# ------------------------------------------------------------------ #
|
|
# Entity intake request — pause order until client provides entity data
|
|
# ------------------------------------------------------------------ #
|
|
|
|
def _request_entity_intake(self, order_data: dict) -> None:
|
|
"""Pause the order and email the client to complete entity intake.
|
|
|
|
Called when an order is dispatched but the entity data (company name,
|
|
FRN, officer, etc.) is missing. Sends the client to the intake wizard.
|
|
|
|
For batch orders, only the first handler to call this sends the email;
|
|
subsequent handlers for the same batch skip the email (but still pause
|
|
their own order).
|
|
"""
|
|
import psycopg2
|
|
|
|
order_number = order_data.get("name", "")
|
|
customer_email = order_data.get("customer_email", "")
|
|
customer_name = order_data.get("customer_name", "")
|
|
frn = (order_data.get("intake_data") or {}).get("frn", "")
|
|
batch_id = order_data.get("batch_id")
|
|
|
|
# Update order status to pending intake
|
|
try:
|
|
conn = psycopg2.connect(os.environ.get("DATABASE_URL", ""))
|
|
cur = conn.cursor()
|
|
cur.execute(
|
|
"""UPDATE compliance_orders
|
|
SET payment_status = 'pending_intake',
|
|
notes = COALESCE(notes, '') || %s
|
|
WHERE order_number = %s""",
|
|
[f"\nPaused: entity data missing ({datetime.now().isoformat()})", order_number],
|
|
)
|
|
conn.commit()
|
|
cur.close()
|
|
conn.close()
|
|
except Exception as exc:
|
|
logger.warning("Could not update order status: %s", exc)
|
|
|
|
# For batch orders, only the first order (lowest order_number) sends the
|
|
# intake email. Others just pause silently. This avoids the race condition
|
|
# where concurrent handlers all check DB before any has committed.
|
|
if batch_id and customer_email:
|
|
try:
|
|
conn2 = psycopg2.connect(os.environ.get("DATABASE_URL", ""))
|
|
cur2 = conn2.cursor()
|
|
cur2.execute(
|
|
"""SELECT MIN(order_number) FROM compliance_orders
|
|
WHERE batch_id = %s""",
|
|
(batch_id,),
|
|
)
|
|
first_order = cur2.fetchone()[0]
|
|
cur2.close()
|
|
conn2.close()
|
|
if first_order and order_number != first_order:
|
|
logger.info(
|
|
"Skipping intake email for %s — batch %s will send from %s",
|
|
order_number, batch_id, first_order,
|
|
)
|
|
return
|
|
except Exception:
|
|
pass # If check fails, send the email anyway
|
|
|
|
# Email the client
|
|
if customer_email:
|
|
try:
|
|
import PyJWT as pyjwt
|
|
except ImportError:
|
|
try:
|
|
import jwt as pyjwt
|
|
except ImportError:
|
|
logger.warning("No JWT library available — cannot send intake link")
|
|
return
|
|
|
|
secret = os.environ.get("CUSTOMER_JWT_SECRET", "changeme")
|
|
domain = os.environ.get("DOMAIN", "performancewest.net")
|
|
token = pyjwt.encode(
|
|
{"order_id": order_number, "order_type": "compliance", "email": customer_email},
|
|
secret, algorithm="HS256",
|
|
)
|
|
|
|
# For batch orders, build a generic intake email listing all services;
|
|
# for single orders, link directly to the service intake page.
|
|
if batch_id:
|
|
# Get all service names in this batch
|
|
batch_services = []
|
|
try:
|
|
conn3 = psycopg2.connect(os.environ.get("DATABASE_URL", ""))
|
|
cur3 = conn3.cursor()
|
|
cur3.execute(
|
|
"SELECT service_name FROM compliance_orders WHERE batch_id = %s ORDER BY order_number",
|
|
(batch_id,),
|
|
)
|
|
batch_services = [r[0] for r in cur3.fetchall()]
|
|
cur3.close()
|
|
conn3.close()
|
|
except Exception:
|
|
batch_services = [self.SERVICE_NAME]
|
|
|
|
services_html = "".join(f"<li>{s}</li>" for s in batch_services)
|
|
# Link to the first service's intake page
|
|
intake_url = f"https://{domain}/order/{self.SERVICE_SLUG}?token={token}&frn={frn}"
|
|
service_label = "FCC Compliance Services"
|
|
extra_text = (
|
|
f"<p>Your order includes:</p><ul style='margin:8px 0 16px 20px'>{services_html}</ul>"
|
|
f"<p>We'll start with the first filing — the intake form collects information "
|
|
f"needed for all services in your order.</p>"
|
|
)
|
|
else:
|
|
intake_url = f"https://{domain}/order/{self.SERVICE_SLUG}?token={token}&frn={frn}"
|
|
service_label = self.SERVICE_NAME
|
|
extra_text = ""
|
|
|
|
try:
|
|
import smtplib
|
|
from email.mime.text import MIMEText
|
|
from email.mime.multipart import MIMEMultipart
|
|
|
|
first_name = customer_name.split(" ")[0] if customer_name else "there"
|
|
subject = f"Action Required — Complete your {service_label} intake"
|
|
body = (
|
|
f"<h2>We need a few more details</h2>"
|
|
f"<p>Hi {first_name},</p>"
|
|
f"<p>Thank you for your order. To prepare your <strong>{service_label}</strong> "
|
|
f"filing, we need some additional information about your company.</p>"
|
|
f"{extra_text}"
|
|
f"<p>Please click below to complete the intake form — it takes about 2 minutes.</p>"
|
|
f"<p><a href='{intake_url}' style='display:inline-block;background:#1e3a5f;color:#fff;"
|
|
f"padding:12px 28px;border-radius:6px;text-decoration:none;font-weight:600;'>"
|
|
f"Complete Intake Form →</a></p>"
|
|
f"<p style='font-size:12px;color:#9ca3af;'>Order: {batch_id or order_number}</p>"
|
|
)
|
|
|
|
smtp_host = os.environ.get("SMTP_HOST", "co.carrierone.com")
|
|
smtp_port = int(os.environ.get("SMTP_PORT", "587"))
|
|
smtp_user = os.environ.get("SMTP_USER", "")
|
|
smtp_pass = os.environ.get("SMTP_PASS", "")
|
|
smtp_from = os.environ.get("SMTP_FROM", "Performance West <noreply@performancewest.net>")
|
|
if smtp_user and smtp_pass:
|
|
msg = MIMEMultipart("alternative")
|
|
msg["Subject"] = subject
|
|
msg["From"] = smtp_from
|
|
msg["To"] = customer_email
|
|
msg["Reply-To"] = "info@performancewest.net"
|
|
msg.attach(MIMEText(body, "html"))
|
|
with smtplib.SMTP(smtp_host, smtp_port) as server:
|
|
server.starttls()
|
|
server.login(smtp_user, smtp_pass)
|
|
server.send_message(msg)
|
|
logger.info("Entity intake email sent to %s for %s", customer_email, order_number)
|
|
except Exception as exc:
|
|
logger.warning("Could not send intake email: %s", exc)
|
|
|
|
# ------------------------------------------------------------------ #
|
|
# LLM interaction
|
|
# ------------------------------------------------------------------ #
|
|
|
|
async def _call_llm(
|
|
self,
|
|
system_prompt: str,
|
|
user_prompt: str,
|
|
temperature: float | None = None,
|
|
max_tokens: int | None = None,
|
|
) -> str:
|
|
"""Call the LLM API and return the generated text."""
|
|
import httpx
|
|
|
|
headers = {
|
|
"Authorization": f"Bearer {LLM_API_KEY}",
|
|
"Content-Type": "application/json",
|
|
}
|
|
payload = {
|
|
"model": LLM_MODEL,
|
|
"temperature": temperature or LLM_TEMPERATURE,
|
|
"max_tokens": max_tokens or LLM_MAX_TOKENS,
|
|
"messages": [
|
|
{"role": "system", "content": system_prompt},
|
|
{"role": "user", "content": user_prompt},
|
|
],
|
|
}
|
|
|
|
async with httpx.AsyncClient(timeout=120.0) as client:
|
|
response = await client.post(LLM_API_URL, json=payload, headers=headers)
|
|
response.raise_for_status()
|
|
|
|
data = response.json()
|
|
return data["choices"][0]["message"]["content"]
|
|
|
|
async def _generate_sections(
|
|
self,
|
|
system_prompt: str,
|
|
sections: list[dict[str, str]],
|
|
context: str,
|
|
) -> dict[str, str]:
|
|
"""Generate multiple document sections via the LLM.
|
|
|
|
Args:
|
|
system_prompt: The service-specific system prompt.
|
|
sections: List of dicts with 'name' and 'prompt' keys.
|
|
context: Order/customer context to include in each prompt.
|
|
|
|
Returns:
|
|
Dict mapping section names to generated content.
|
|
"""
|
|
results: dict[str, str] = {}
|
|
for section in sections:
|
|
user_prompt = (
|
|
f"Context:\n{context}\n\n"
|
|
f"Section: {section['name']}\n\n"
|
|
f"{section['prompt']}"
|
|
)
|
|
content = await self._call_llm(system_prompt, user_prompt)
|
|
results[section["name"]] = content
|
|
logger.info(
|
|
"Generated section '%s' (%d chars)",
|
|
section["name"],
|
|
len(content),
|
|
)
|
|
return results
|
|
|
|
# ------------------------------------------------------------------ #
|
|
# Context extraction
|
|
# ------------------------------------------------------------------ #
|
|
|
|
def _extract_order_context(self, order_data: dict) -> str:
|
|
"""Build a text context block from order data for LLM prompts."""
|
|
lines = [
|
|
f"Order Number: {order_data.get('name', 'N/A')}",
|
|
f"Customer: {order_data.get('customer_name', order_data.get('customer', 'N/A'))}",
|
|
f"Service: {self.SERVICE_NAME}",
|
|
]
|
|
|
|
# Include custom fields if present
|
|
for key in [
|
|
"custom_company_size",
|
|
"custom_industry",
|
|
"custom_state",
|
|
"custom_notes",
|
|
"custom_intake_data",
|
|
]:
|
|
val = order_data.get(key)
|
|
if val:
|
|
label = key.replace("custom_", "").replace("_", " ").title()
|
|
lines.append(f"{label}: {val}")
|
|
|
|
return "\n".join(lines)
|