"""Abstract base class for all service handlers. Every compliance service (FLSA audit, handbook review, etc.) inherits from this class and implements the ``process()`` method. """ from __future__ import annotations import logging import os import subprocess import tempfile from abc import ABC, abstractmethod from datetime import datetime from pathlib import Path from typing import Any logger = logging.getLogger(__name__) # Directory containing DOCX templates TEMPLATES_DIR = os.getenv( "TEMPLATES_DIR", str(Path(__file__).resolve().parent.parent.parent / "templates"), ) # LLM configuration LLM_API_URL = os.getenv("LLM_API_URL", "https://api.openai.com/v1/chat/completions") LLM_API_KEY = os.getenv("LLM_API_KEY", "") LLM_MODEL = os.getenv("LLM_MODEL", "gpt-4o") LLM_TEMPERATURE = float(os.getenv("LLM_TEMPERATURE", "0.3")) LLM_MAX_TOKENS = int(os.getenv("LLM_MAX_TOKENS", "4096")) class BaseServiceHandler(ABC): """Base class for compliance service handlers.""" SERVICE_SLUG: str = "" SERVICE_NAME: str = "" TEMPLATE_NAME: str = "" # DOCX template filename in TEMPLATES_DIR REQUIRES_LLM: bool = False def __init__(self) -> None: self.templates_dir = Path(TEMPLATES_DIR) self._work_dir: str | None = None # ------------------------------------------------------------------ # # Abstract interface # ------------------------------------------------------------------ # @abstractmethod async def process(self, order_data: dict) -> list[str]: """Process an order and return list of generated file paths. Implementations should: 1. Load the DOCX template 2. (Optionally) call the LLM to generate section content 3. Fill the template with variables/content 4. Save as DOCX and convert to PDF 5. Return list of file paths """ raise NotImplementedError # ------------------------------------------------------------------ # # Helpers # ------------------------------------------------------------------ # def _build_output_path(self, order_number: str, filename: str) -> str: """Build the MinIO object path for output files.""" return f"compliance/{order_number}/{filename}" def _get_template_path(self, template_name: str | None = None) -> Path: """Resolve the full path to a DOCX template.""" name = template_name or self.TEMPLATE_NAME path = self.templates_dir / name if not path.exists(): raise FileNotFoundError(f"Template not found: {path}") return path def _make_work_dir(self) -> str: """Create a temporary working directory for generated files.""" if self._work_dir is None: self._work_dir = tempfile.mkdtemp(prefix=f"pw_{self.SERVICE_SLUG}_") return self._work_dir def _output_filename(self, order_number: str, ext: str = "docx") -> str: """Generate a consistent output filename.""" date_str = datetime.now().strftime("%Y%m%d") slug = self.SERVICE_SLUG.replace("-", "_") return f"{slug}_{order_number}_{date_str}.{ext}" # ------------------------------------------------------------------ # # Template filling (python-docx) # ------------------------------------------------------------------ # def _fill_template( self, template_path: Path, variables: dict[str, str], output_path: str, ) -> str: """Open a DOCX template, replace {{variable}} placeholders, and save. Supports replacement in paragraphs and table cells. Returns the output path. """ from docx import Document doc = Document(str(template_path)) def _replace_in_paragraph(paragraph: Any) -> None: for key, value in variables.items(): placeholder = "{{" + key + "}}" if placeholder in paragraph.text: # Preserve formatting: replace in runs for run in paragraph.runs: if placeholder in run.text: run.text = run.text.replace(placeholder, value) for paragraph in doc.paragraphs: _replace_in_paragraph(paragraph) for table in doc.tables: for row in table.rows: for cell in row.cells: for paragraph in cell.paragraphs: _replace_in_paragraph(paragraph) doc.save(output_path) logger.info("Filled template → %s", output_path) return output_path def _add_sections_to_doc( self, doc_path: str, sections: dict[str, str], ) -> str: """Append named sections (from LLM output) to an existing DOCX. Each section gets a heading followed by the generated content. Returns the (modified) doc_path. """ from docx import Document from docx.shared import Pt doc = Document(doc_path) for section_name, content in sections.items(): heading = section_name.replace("_", " ").title() doc.add_heading(heading, level=2) for paragraph_text in content.split("\n\n"): paragraph_text = paragraph_text.strip() if not paragraph_text: continue p = doc.add_paragraph(paragraph_text) for run in p.runs: run.font.size = Pt(11) doc.save(doc_path) return doc_path # ------------------------------------------------------------------ # # PDF conversion (LibreOffice headless) # ------------------------------------------------------------------ # def _convert_to_pdf(self, docx_path: str) -> str: """Convert a DOCX file to PDF using LibreOffice. Returns the path to the generated PDF file. """ output_dir = str(Path(docx_path).parent) result = subprocess.run( [ "libreoffice", "--headless", "--convert-to", "pdf", "--outdir", output_dir, docx_path, ], capture_output=True, text=True, timeout=120, ) if result.returncode != 0: raise RuntimeError( f"LibreOffice conversion failed: {result.stderr}" ) pdf_path = str(Path(docx_path).with_suffix(".pdf")) if not Path(pdf_path).exists(): raise FileNotFoundError(f"PDF not generated: {pdf_path}") logger.info("Converted %s → %s", docx_path, pdf_path) return pdf_path # ------------------------------------------------------------------ # # Entity intake request — pause order until client provides entity data # ------------------------------------------------------------------ # def _request_entity_intake(self, order_data: dict) -> None: """Pause the order and email the client to complete entity intake. Called when an order is dispatched but the entity data (company name, FRN, officer, etc.) is missing. Sends the client to the intake wizard. For batch orders, only the first handler to call this sends the email; subsequent handlers for the same batch skip the email (but still pause their own order). """ import psycopg2 order_number = order_data.get("name", "") customer_email = order_data.get("customer_email", "") customer_name = order_data.get("customer_name", "") frn = (order_data.get("intake_data") or {}).get("frn", "") batch_id = order_data.get("batch_id") # Update order status to pending intake try: conn = psycopg2.connect(os.environ.get("DATABASE_URL", "")) cur = conn.cursor() cur.execute( """UPDATE compliance_orders SET payment_status = 'pending_intake', notes = COALESCE(notes, '') || %s WHERE order_number = %s""", [f"\nPaused: entity data missing ({datetime.now().isoformat()})", order_number], ) conn.commit() cur.close() conn.close() except Exception as exc: logger.warning("Could not update order status: %s", exc) # For batch orders, only the first order (lowest order_number) sends the # intake email. Others just pause silently. This avoids the race condition # where concurrent handlers all check DB before any has committed. if batch_id and customer_email: try: conn2 = psycopg2.connect(os.environ.get("DATABASE_URL", "")) cur2 = conn2.cursor() cur2.execute( """SELECT MIN(order_number) FROM compliance_orders WHERE batch_id = %s""", (batch_id,), ) first_order = cur2.fetchone()[0] cur2.close() conn2.close() if first_order and order_number != first_order: logger.info( "Skipping intake email for %s — batch %s will send from %s", order_number, batch_id, first_order, ) return except Exception: pass # If check fails, send the email anyway # Email the client if customer_email: try: import PyJWT as pyjwt except ImportError: try: import jwt as pyjwt except ImportError: logger.warning("No JWT library available — cannot send intake link") return secret = os.environ.get("CUSTOMER_JWT_SECRET", "changeme") domain = os.environ.get("DOMAIN", "performancewest.net") token = pyjwt.encode( {"order_id": order_number, "order_type": "compliance", "email": customer_email}, secret, algorithm="HS256", ) # For batch orders, build a generic intake email listing all services; # for single orders, link directly to the service intake page. if batch_id: # Get all service names in this batch batch_services = [] try: conn3 = psycopg2.connect(os.environ.get("DATABASE_URL", "")) cur3 = conn3.cursor() cur3.execute( "SELECT service_name FROM compliance_orders WHERE batch_id = %s ORDER BY order_number", (batch_id,), ) batch_services = [r[0] for r in cur3.fetchall()] cur3.close() conn3.close() except Exception: batch_services = [self.SERVICE_NAME] services_html = "".join(f"
  • {s}
  • " for s in batch_services) # Link to the first service's intake page intake_url = f"https://{domain}/order/{self.SERVICE_SLUG}?token={token}&frn={frn}" service_label = "FCC Compliance Services" extra_text = ( f"

    Your order includes:

    " f"

    We'll start with the first filing — the intake form collects information " f"needed for all services in your order.

    " ) else: intake_url = f"https://{domain}/order/{self.SERVICE_SLUG}?token={token}&frn={frn}" service_label = self.SERVICE_NAME extra_text = "" try: import smtplib from email.mime.text import MIMEText from email.mime.multipart import MIMEMultipart first_name = customer_name.split(" ")[0] if customer_name else "there" subject = f"Action Required — Complete your {service_label} intake" body = ( f"

    We need a few more details

    " f"

    Hi {first_name},

    " f"

    Thank you for your order. To prepare your {service_label} " f"filing, we need some additional information about your company.

    " f"{extra_text}" f"

    Please click below to complete the intake form — it takes about 2 minutes.

    " f"

    " f"Complete Intake Form →

    " f"

    Order: {batch_id or order_number}

    " ) smtp_host = os.environ.get("SMTP_HOST", "co.carrierone.com") smtp_port = int(os.environ.get("SMTP_PORT", "587")) smtp_user = os.environ.get("SMTP_USER", "") smtp_pass = os.environ.get("SMTP_PASS", "") smtp_from = os.environ.get("SMTP_FROM", "Performance West ") if smtp_user and smtp_pass: msg = MIMEMultipart("alternative") msg["Subject"] = subject msg["From"] = smtp_from msg["To"] = customer_email msg["Reply-To"] = "info@performancewest.net" msg.attach(MIMEText(body, "html")) with smtplib.SMTP(smtp_host, smtp_port) as server: server.starttls() server.login(smtp_user, smtp_pass) server.send_message(msg) logger.info("Entity intake email sent to %s for %s", customer_email, order_number) except Exception as exc: logger.warning("Could not send intake email: %s", exc) # ------------------------------------------------------------------ # # LLM interaction # ------------------------------------------------------------------ # async def _call_llm( self, system_prompt: str, user_prompt: str, temperature: float | None = None, max_tokens: int | None = None, ) -> str: """Call the LLM API and return the generated text.""" import httpx headers = { "Authorization": f"Bearer {LLM_API_KEY}", "Content-Type": "application/json", } payload = { "model": LLM_MODEL, "temperature": temperature or LLM_TEMPERATURE, "max_tokens": max_tokens or LLM_MAX_TOKENS, "messages": [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, ], } async with httpx.AsyncClient(timeout=120.0) as client: response = await client.post(LLM_API_URL, json=payload, headers=headers) response.raise_for_status() data = response.json() return data["choices"][0]["message"]["content"] async def _generate_sections( self, system_prompt: str, sections: list[dict[str, str]], context: str, ) -> dict[str, str]: """Generate multiple document sections via the LLM. Args: system_prompt: The service-specific system prompt. sections: List of dicts with 'name' and 'prompt' keys. context: Order/customer context to include in each prompt. Returns: Dict mapping section names to generated content. """ results: dict[str, str] = {} for section in sections: user_prompt = ( f"Context:\n{context}\n\n" f"Section: {section['name']}\n\n" f"{section['prompt']}" ) content = await self._call_llm(system_prompt, user_prompt) results[section["name"]] = content logger.info( "Generated section '%s' (%d chars)", section["name"], len(content), ) return results # ------------------------------------------------------------------ # # Context extraction # ------------------------------------------------------------------ # def _extract_order_context(self, order_data: dict) -> str: """Build a text context block from order data for LLM prompts.""" lines = [ f"Order Number: {order_data.get('name', 'N/A')}", f"Customer: {order_data.get('customer_name', order_data.get('customer', 'N/A'))}", f"Service: {self.SERVICE_NAME}", ] # Include custom fields if present for key in [ "custom_company_size", "custom_industry", "custom_state", "custom_notes", "custom_intake_data", ]: val = order_data.get(key) if val: label = key.replace("custom_", "").replace("_", " ").title() lines.append(f"{label}: {val}") return "\n".join(lines)