new-site/scripts/workers/services/base_handler.py

"""Abstract base class for all service handlers.

Every compliance service (FLSA audit, handbook review, etc.) inherits from
this class and implements the ``process()`` method.
"""

from __future__ import annotations

import logging
import os
import subprocess
import tempfile
from abc import ABC, abstractmethod
from datetime import datetime
from pathlib import Path
from typing import Any

logger = logging.getLogger(__name__)

# Directory containing DOCX templates
TEMPLATES_DIR = os.getenv(
    "TEMPLATES_DIR",
    str(Path(__file__).resolve().parent.parent.parent / "templates"),
)

# LLM configuration
LLM_API_URL = os.getenv("LLM_API_URL", "https://api.openai.com/v1/chat/completions")
LLM_API_KEY = os.getenv("LLM_API_KEY", "")
LLM_MODEL = os.getenv("LLM_MODEL", "gpt-4o")
LLM_TEMPERATURE = float(os.getenv("LLM_TEMPERATURE", "0.3"))
LLM_MAX_TOKENS = int(os.getenv("LLM_MAX_TOKENS", "4096"))


class BaseServiceHandler(ABC):
    """Base class for compliance service handlers."""

    SERVICE_SLUG: str = ""
    SERVICE_NAME: str = ""
    TEMPLATE_NAME: str = ""  # DOCX template filename in TEMPLATES_DIR
    REQUIRES_LLM: bool = False

    def __init__(self) -> None:
        self.templates_dir = Path(TEMPLATES_DIR)
        self._work_dir: str | None = None

    # ------------------------------------------------------------------ #
    # Abstract interface
    # ------------------------------------------------------------------ #

    @abstractmethod
    async def process(self, order_data: dict) -> list[str]:
        """Process an order and return list of generated file paths.

        Implementations should:
        1. Load the DOCX template
        2. (Optionally) call the LLM to generate section content
        3. Fill the template with variables/content
        4. Save as DOCX and convert to PDF
        5. Return list of file paths
        """
        raise NotImplementedError

    # ------------------------------------------------------------------ #
    # Helpers
    # ------------------------------------------------------------------ #

    def _build_output_path(self, order_number: str, filename: str) -> str:
        """Build the MinIO object path for output files."""
        return f"compliance/{order_number}/{filename}"

    def _get_template_path(self, template_name: str | None = None) -> Path:
        """Resolve the full path to a DOCX template."""
        name = template_name or self.TEMPLATE_NAME
        path = self.templates_dir / name
        if not path.exists():
            raise FileNotFoundError(f"Template not found: {path}")
        return path

    def _make_work_dir(self) -> str:
        """Create a temporary working directory for generated files."""
        if self._work_dir is None:
            self._work_dir = tempfile.mkdtemp(prefix=f"pw_{self.SERVICE_SLUG}_")
        return self._work_dir

    def _output_filename(self, order_number: str, ext: str = "docx") -> str:
        """Generate a consistent output filename."""
        date_str = datetime.now().strftime("%Y%m%d")
        slug = self.SERVICE_SLUG.replace("-", "_")
        return f"{slug}_{order_number}_{date_str}.{ext}"

    # ------------------------------------------------------------------ #
    # Template filling (python-docx)
    # ------------------------------------------------------------------ #

    def _fill_template(
        self,
        template_path: Path,
        variables: dict[str, str],
        output_path: str,
    ) -> str:
        """Open a DOCX template, replace {{variable}} placeholders, and save.

        Supports replacement in paragraphs and table cells.
        Returns the output path.
        """
        from docx import Document

        doc = Document(str(template_path))

        def _replace_in_paragraph(paragraph: Any) -> None:
            for key, value in variables.items():
                placeholder = "{{" + key + "}}"
                if placeholder in paragraph.text:
                    # Preserve formatting: replace in runs
                    for run in paragraph.runs:
                        if placeholder in run.text:
                            run.text = run.text.replace(placeholder, value)

        for paragraph in doc.paragraphs:
            _replace_in_paragraph(paragraph)

        for table in doc.tables:
            for row in table.rows:
                for cell in row.cells:
                    for paragraph in cell.paragraphs:
                        _replace_in_paragraph(paragraph)

        doc.save(output_path)
        logger.info("Filled template → %s", output_path)
        return output_path

    def _add_sections_to_doc(
        self,
        doc_path: str,
        sections: dict[str, str],
    ) -> str:
        """Append named sections (from LLM output) to an existing DOCX.

        Each section gets a heading followed by the generated content.
        Returns the (modified) doc_path.
        """
        from docx import Document
        from docx.shared import Pt

        doc = Document(doc_path)

        for section_name, content in sections.items():
            heading = section_name.replace("_", " ").title()
            doc.add_heading(heading, level=2)

            for paragraph_text in content.split("\n\n"):
                paragraph_text = paragraph_text.strip()
                if not paragraph_text:
                    continue
                p = doc.add_paragraph(paragraph_text)
                for run in p.runs:
                    run.font.size = Pt(11)

        doc.save(doc_path)
        return doc_path

    # ------------------------------------------------------------------ #
    # PDF conversion (LibreOffice headless)
    # ------------------------------------------------------------------ #

    def _convert_to_pdf(self, docx_path: str) -> str:
        """Convert a DOCX file to PDF using LibreOffice.

        Returns the path to the generated PDF file.
        """
        output_dir = str(Path(docx_path).parent)
        result = subprocess.run(
            [
                "libreoffice",
                "--headless",
                "--convert-to",
                "pdf",
                "--outdir",
                output_dir,
                docx_path,
            ],
            capture_output=True,
            text=True,
            timeout=120,
        )
        if result.returncode != 0:
            raise RuntimeError(
                f"LibreOffice conversion failed: {result.stderr}"
            )

        pdf_path = str(Path(docx_path).with_suffix(".pdf"))
        if not Path(pdf_path).exists():
            raise FileNotFoundError(f"PDF not generated: {pdf_path}")

        logger.info("Converted %s → %s", docx_path, pdf_path)
        return pdf_path

    # ------------------------------------------------------------------ #
    # Entity intake request — pause order until client provides entity data
    # ------------------------------------------------------------------ #

    def _request_entity_intake(self, order_data: dict) -> None:
        """Pause the order and email the client to complete entity intake.

        Called when an order is dispatched but the entity data (company name,
        FRN, officer, etc.) is missing. Sends the client to the intake wizard.

        For batch orders, only the first handler to call this sends the email;
        subsequent handlers for the same batch skip the email (but still pause
        their own order).
        """
        import psycopg2

        order_number = order_data.get("name", "")
        customer_email = order_data.get("customer_email", "")
        customer_name = order_data.get("customer_name", "")
        frn = (order_data.get("intake_data") or {}).get("frn", "")
        batch_id = order_data.get("batch_id")

        # Update order status to pending intake
        try:
            conn = psycopg2.connect(os.environ.get("DATABASE_URL", ""))
            cur = conn.cursor()
            cur.execute(
                """UPDATE compliance_orders
                   SET payment_status = 'pending_intake',
                       notes = COALESCE(notes, '') || %s
                   WHERE order_number = %s""",
                [f"\nPaused: entity data missing ({datetime.now().isoformat()})", order_number],
            )
            conn.commit()
            cur.close()
            conn.close()
        except Exception as exc:
            logger.warning("Could not update order status: %s", exc)

        # For batch orders, only the first order (lowest order_number) sends the
        # intake email. Others just pause silently. This avoids the race condition
        # where concurrent handlers all check DB before any has committed.
        if batch_id and customer_email:
            try:
                conn2 = psycopg2.connect(os.environ.get("DATABASE_URL", ""))
                cur2 = conn2.cursor()
                cur2.execute(
                    """SELECT MIN(order_number) FROM compliance_orders
                       WHERE batch_id = %s""",
                    (batch_id,),
                )
                first_order = cur2.fetchone()[0]
                cur2.close()
                conn2.close()
                if first_order and order_number != first_order:
                    logger.info(
                        "Skipping intake email for %s — batch %s will send from %s",
                        order_number, batch_id, first_order,
                    )
                    return
            except Exception:
                pass  # If check fails, send the email anyway

        # Email the client
        if customer_email:
            try:
                import PyJWT as pyjwt
            except ImportError:
                try:
                    import jwt as pyjwt
                except ImportError:
                    logger.warning("No JWT library available — cannot send intake link")
                    return

            secret = os.environ.get("CUSTOMER_JWT_SECRET", "changeme")
            domain = os.environ.get("DOMAIN", "performancewest.net")
            token = pyjwt.encode(
                {"order_id": order_number, "order_type": "compliance", "email": customer_email},
                secret, algorithm="HS256",
            )

            # For batch orders, build a generic intake email listing all services;
            # for single orders, link directly to the service intake page.
            if batch_id:
                # Get all service names in this batch
                batch_services = []
                try:
                    conn3 = psycopg2.connect(os.environ.get("DATABASE_URL", ""))
                    cur3 = conn3.cursor()
                    cur3.execute(
                        "SELECT service_name FROM compliance_orders WHERE batch_id = %s ORDER BY order_number",
                        (batch_id,),
                    )
                    batch_services = [r[0] for r in cur3.fetchall()]
                    cur3.close()
                    conn3.close()
                except Exception:
                    batch_services = [self.SERVICE_NAME]

                services_html = "".join(f"<li>{s}</li>" for s in batch_services)
                # Link to the first service's intake page
                intake_url = f"https://{domain}/order/{self.SERVICE_SLUG}?token={token}&frn={frn}"
                service_label = "FCC Compliance Services"
                extra_text = (
                    f"<p>Your order includes:</p><ul style='margin:8px 0 16px 20px'>{services_html}</ul>"
                    f"<p>We'll start with the first filing — the intake form collects information "
                    f"needed for all services in your order.</p>"
                )
            else:
                intake_url = f"https://{domain}/order/{self.SERVICE_SLUG}?token={token}&frn={frn}"
                service_label = self.SERVICE_NAME
                extra_text = ""

            try:
                import smtplib
                from email.mime.text import MIMEText
                from email.mime.multipart import MIMEMultipart

                first_name = customer_name.split(" ")[0] if customer_name else "there"
                subject = f"Action Required — Complete your {service_label} intake"
                body = (
                    f"<h2>We need a few more details</h2>"
                    f"<p>Hi {first_name},</p>"
                    f"<p>Thank you for your order. To prepare your <strong>{service_label}</strong> "
                    f"filing, we need some additional information about your company.</p>"
                    f"{extra_text}"
                    f"<p>Please click below to complete the intake form — it takes about 2 minutes.</p>"
                    f"<p><a href='{intake_url}' style='display:inline-block;background:#1e3a5f;color:#fff;"
                    f"padding:12px 28px;border-radius:6px;text-decoration:none;font-weight:600;'>"
                    f"Complete Intake Form →</a></p>"
                    f"<p style='font-size:12px;color:#9ca3af;'>Order: {batch_id or order_number}</p>"
                )

                smtp_host = os.environ.get("SMTP_HOST", "co.carrierone.com")
                smtp_port = int(os.environ.get("SMTP_PORT", "587"))
                smtp_user = os.environ.get("SMTP_USER", "")
                smtp_pass = os.environ.get("SMTP_PASS", "")
                smtp_from = os.environ.get("SMTP_FROM", "Performance West <noreply@performancewest.net>")
                if smtp_user and smtp_pass:
                    msg = MIMEMultipart("alternative")
                    msg["Subject"] = subject
                    msg["From"] = smtp_from
                    msg["To"] = customer_email
                    msg["Reply-To"] = "info@performancewest.net"
                    msg.attach(MIMEText(body, "html"))
                    with smtplib.SMTP(smtp_host, smtp_port) as server:
                        server.starttls()
                        server.login(smtp_user, smtp_pass)
                        server.send_message(msg)
                logger.info("Entity intake email sent to %s for %s", customer_email, order_number)
            except Exception as exc:
                logger.warning("Could not send intake email: %s", exc)

    # ------------------------------------------------------------------ #
    # LLM interaction
    # ------------------------------------------------------------------ #

    async def _call_llm(
        self,
        system_prompt: str,
        user_prompt: str,
        temperature: float | None = None,
        max_tokens: int | None = None,
    ) -> str:
        """Call the LLM API and return the generated text."""
        import httpx

        headers = {
            "Authorization": f"Bearer {LLM_API_KEY}",
            "Content-Type": "application/json",
        }
        payload = {
            "model": LLM_MODEL,
            "temperature": temperature or LLM_TEMPERATURE,
            "max_tokens": max_tokens or LLM_MAX_TOKENS,
            "messages": [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt},
            ],
        }

        async with httpx.AsyncClient(timeout=120.0) as client:
            response = await client.post(LLM_API_URL, json=payload, headers=headers)
            response.raise_for_status()

        data = response.json()
        return data["choices"][0]["message"]["content"]

    async def _generate_sections(
        self,
        system_prompt: str,
        sections: list[dict[str, str]],
        context: str,
    ) -> dict[str, str]:
        """Generate multiple document sections via the LLM.

        Args:
            system_prompt: The service-specific system prompt.
            sections: List of dicts with 'name' and 'prompt' keys.
            context: Order/customer context to include in each prompt.

        Returns:
            Dict mapping section names to generated content.
        """
        results: dict[str, str] = {}
        for section in sections:
            user_prompt = (
                f"Context:\n{context}\n\n"
                f"Section: {section['name']}\n\n"
                f"{section['prompt']}"
            )
            content = await self._call_llm(system_prompt, user_prompt)
            results[section["name"]] = content
            logger.info(
                "Generated section '%s' (%d chars)",
                section["name"],
                len(content),
            )
        return results

    # ------------------------------------------------------------------ #
    # Context extraction
    # ------------------------------------------------------------------ #

    def _extract_order_context(self, order_data: dict) -> str:
        """Build a text context block from order data for LLM prompts."""
        lines = [
            f"Order Number: {order_data.get('name', 'N/A')}",
            f"Customer: {order_data.get('customer_name', order_data.get('customer', 'N/A'))}",
            f"Service: {self.SERVICE_NAME}",
        ]

        # Include custom fields if present
        for key in [
            "custom_company_size",
            "custom_industry",
            "custom_state",
            "custom_notes",
            "custom_intake_data",
        ]:
            val = order_data.get(key)
            if val:
                label = key.replace("custom_", "").replace("_", " ").title()
                lines.append(f"{label}: {val}")

        return "\n".join(lines)