new-site/scripts/document_gen/llm_writer.py

"""
LLM content writer for compliance report sections.

Uses Ollama (local LLM) to generate analysis and prose for compliance reports.
Each service type provides its own system prompt and section templates.
"""

from __future__ import annotations

import json
import logging
import os
from typing import Any

import httpx

LOG = logging.getLogger("document_gen.llm")

OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434")
DEFAULT_MODEL = os.getenv("OLLAMA_MODEL", "qwen2.5:7b")


class LLMWriter:
    """Generate compliance report content using a local LLM."""

    def __init__(self, model: str = DEFAULT_MODEL):
        self.model = model
        self.base_url = OLLAMA_HOST
        self.client = httpx.Client(timeout=300.0)  # 5 min timeout for long generations

    def generate_section(
        self,
        system_prompt: str,
        user_prompt: str,
        temperature: float = 0.3,
        max_tokens: int = 4096,
    ) -> str:
        """Generate a single section of a compliance report.

        Args:
            system_prompt: System instructions (compliance rules, format requirements)
            user_prompt: The specific section to generate (includes customer data)
            temperature: Lower = more factual, higher = more creative
            max_tokens: Maximum output length

        Returns:
            Generated text content for the section
        """
        LOG.info("Generating section (model=%s, temp=%.1f)...", self.model, temperature)

        try:
            response = self.client.post(
                f"{self.base_url}/api/chat",
                json={
                    "model": self.model,
                    "messages": [
                        {"role": "system", "content": system_prompt},
                        {"role": "user", "content": user_prompt},
                    ],
                    "options": {
                        "temperature": temperature,
                        "num_predict": max_tokens,
                    },
                    "stream": False,
                },
            )
            response.raise_for_status()
            data = response.json()
            content = data.get("message", {}).get("content", "")
            LOG.info("Generated %d characters", len(content))
            return content.strip()

        except httpx.HTTPError as e:
            LOG.error("Ollama request failed: %s", e)
            raise RuntimeError(f"LLM generation failed: {e}") from e

    def generate_report(
        self,
        service_type: str,
        customer_data: dict[str, Any],
        sections: list[dict[str, str]],
        system_prompt: str,
    ) -> dict[str, str]:
        """Generate all sections of a compliance report.

        Args:
            service_type: Service identifier (e.g., 'flsa_audit')
            customer_data: Customer and order information
            sections: List of {"name": "section_name", "prompt": "section-specific instructions"}
            system_prompt: Base system prompt for this service type

        Returns:
            Dict mapping section names to generated content
        """
        results: dict[str, str] = {}
        customer_json = json.dumps(customer_data, indent=2)

        for section in sections:
            section_name = section["name"]
            section_prompt = section["prompt"]

            user_prompt = (
                f"SERVICE: {service_type}\n"
                f"SECTION: {section_name}\n\n"
                f"CUSTOMER DATA:\n{customer_json}\n\n"
                f"INSTRUCTIONS:\n{section_prompt}"
            )

            try:
                content = self.generate_section(
                    system_prompt=system_prompt,
                    user_prompt=user_prompt,
                    temperature=0.3,
                )
                results[section_name] = content
                LOG.info("Section '%s' generated (%d chars)", section_name, len(content))
            except Exception as e:
                LOG.error("Section '%s' failed: %s", section_name, e)
                results[section_name] = f"[GENERATION FAILED: {e}]"

        return results

    def health_check(self) -> bool:
        """Check if Ollama is reachable and the model is available."""
        try:
            resp = self.client.get(f"{self.base_url}/api/tags")
            if resp.status_code != 200:
                return False
            models = resp.json().get("models", [])
            model_names = [m.get("name", "") for m in models]
            available = any(self.model in name for name in model_names)
            if not available:
                LOG.warning("Model %s not found. Available: %s", self.model, model_names)
            return available
        except Exception as e:
            LOG.error("Ollama health check failed: %s", e)
            return False