new-site/scripts/document_gen/templates/cdr_traffic_study_generator.py

"""Traffic Study generator — PDF + XLSX deliverable.

Takes a fully-rolled ``cdr_traffic_studies`` row and produces:
  * a signed-ready DOCX (converted to PDF downstream) for the customer's
    audit file, with methodology statement + both Block 5 regional
    tables + revenue-vs-minutes cross-check
  * an XLSX "working doc" with per-period rollups and the same cells
    that will drop into the 499-A E-File session

Produced by ``CDRAnalysisHandler`` at the end of a reporting period.
Pre-existing infrastructure reused:
  * python-docx for the DOCX
  * openpyxl for the XLSX
  * scripts.document_gen.templates.base_handler pattern for styling

No classification happens here — this module only formats numbers that
the ingester + classifier already wrote into cdr_calls + cdr_traffic_studies.
"""

from __future__ import annotations

import logging
from datetime import datetime
from decimal import Decimal
from pathlib import Path
from typing import Optional

LOG = logging.getLogger("document_gen.cdr_traffic_study")

try:
    from docx import Document
    from docx.shared import Pt, Inches, RGBColor
    from docx.enum.text import WD_ALIGN_PARAGRAPH
except ImportError:
    LOG.warning("python-docx not installed — traffic study generation unavailable")
    Document = None  # type: ignore[assignment,misc]

try:
    from openpyxl import Workbook
    from openpyxl.styles import Font, PatternFill, Border, Side
except ImportError:
    LOG.warning("openpyxl not installed — xlsx export unavailable")
    Workbook = None  # type: ignore[assignment,misc]


NAVY = RGBColor(0x1A, 0x27, 0x44) if Document else None


def _pct(value) -> str:
    if value is None:
        return "—"
    return f"{float(value):.2f}%"


def _dollars(cents: Optional[int]) -> str:
    if cents is None:
        return "—"
    return f"${cents/100:,.2f}"


def _minutes(seconds: Optional[int]) -> str:
    if seconds is None:
        return "—"
    return f"{seconds/60:,.0f}"


# ─── DOCX ───────────────────────────────────────────────────────────────


def generate_traffic_study_docx(
    *,
    study: dict,
    entity_name: str,
    frn: str = "",
    filer_id_499: str = "",
    output_path: str,
) -> Optional[str]:
    if Document is None:
        LOG.error("python-docx not installed")
        return None

    doc = Document()
    for section in doc.sections:
        section.top_margin = Inches(1)
        section.bottom_margin = Inches(1)
        section.left_margin = Inches(1.25)
        section.right_margin = Inches(1.25)

    # Title
    title = doc.add_paragraph()
    title.alignment = WD_ALIGN_PARAGRAPH.CENTER
    r = title.add_run(f"Telecommunications Traffic Study — {study['reporting_year']} {study['reporting_period']}")
    r.font.size = Pt(14)
    r.bold = True
    r.font.color.rgb = NAVY

    sub = doc.add_paragraph()
    sub.alignment = WD_ALIGN_PARAGRAPH.CENTER
    sub_r = sub.add_run(entity_name)
    sub_r.font.size = Pt(12)
    sub_r.bold = True

    info = doc.add_paragraph()
    info_r = info.add_run(
        f"FRN: {frn or 'N/A'}  |  499 Filer ID: {filer_id_499 or 'N/A'}  |  "
        f"Generated: {datetime.now().strftime('%B %d, %Y')}"
    )
    info_r.font.size = Pt(9)
    info_r.font.color.rgb = RGBColor(0x55, 0x55, 0x55)

    doc.add_paragraph()

    # Methodology
    doc.add_heading("Methodology", level=1)
    method_paragraphs = [
        (
            f"This study analyzes {study.get('total_calls', 0):,} call detail records "
            f"covering {study['reporting_year']} {study['reporting_period']}. "
            "Each call was classified by endpoint geography using NANP area-code "
            "records and FCC country-code assignments (47 CFR § 54.706 definitions). "
            "Jurisdictional buckets are: interstate, intrastate, international, and "
            "indeterminate (records where one or both endpoints could not be "
            "resolved to a country/state)."
        ),
        (
            "Revenue-based weighting is used where the source CDR carries per-call "
            "billing amounts. Minutes-weighted percentages are provided as a "
            "cross-check. Records are five-year retained per 47 CFR § 54.711(a) "
            "and available for USAC audit on request."
        ),
        (study.get("methodology") or ""),
    ]
    for text in method_paragraphs:
        if text:
            doc.add_paragraph(text)

    # Jurisdictional table
    doc.add_heading("Jurisdictional Breakdown", level=1)
    juris_table = doc.add_table(rows=1, cols=3)
    juris_table.style = "Table Grid"
    hdr = juris_table.rows[0].cells
    hdr[0].text = "Category"
    hdr[1].text = "Revenue-weighted"
    hdr[2].text = "Minutes-weighted"
    for label, key_rev, key_min in [
        ("Interstate", "interstate_pct", "interstate_pct_minutes"),
        ("Intrastate", "intrastate_pct", "intrastate_pct_minutes"),
        ("International", "international_pct", "international_pct_minutes"),
        ("Indeterminate", "indeterminate_pct", "indeterminate_pct_minutes"),
    ]:
        row = juris_table.add_row().cells
        row[0].text = label
        row[1].text = _pct(study.get(key_rev))
        row[2].text = _pct(study.get(key_min))

    # Wholesale vs retail
    doc.add_heading("Block 3 vs. Block 4-A Allocation", level=1)
    w_min = study.get("wholesale_minutes") or 0
    r_min = study.get("retail_minutes") or 0
    doc.add_paragraph(
        f"Wholesale (carrier-to-carrier, Block 3): {w_min/60:,.0f} minutes\n"
        f"Retail (end-user, Block 4-A): {r_min/60:,.0f} minutes"
    )

    # Block 5 regional — BOTH reports
    for label, key in [
        ("Block 5 — by originating state of caller", "orig_state_regions_json"),
        ("Block 5 — by customer billing-address state", "billing_state_regions_json"),
    ]:
        doc.add_heading(label, level=1)
        regions = (study.get(key) or {})
        if not regions:
            doc.add_paragraph("(no data for this view)")
            continue
        table = doc.add_table(rows=1, cols=2)
        table.style = "Table Grid"
        h = table.rows[0].cells
        h[0].text = "Region"
        h[1].text = "% of Total"
        for region_name, pct_val in sorted(regions.items()):
            row = table.add_row().cells
            row[0].text = region_name
            row[1].text = _pct(pct_val)

    doc.add_heading("Certification", level=1)
    doc.add_paragraph(
        f"I certify that this traffic study accurately reflects the "
        f"telecommunications usage of {entity_name} during the reporting "
        f"period. The underlying CDRs are retained for five years and "
        f"available on request."
    )
    for _ in range(2):
        doc.add_paragraph()
    doc.add_paragraph("_" * 45)
    doc.add_paragraph("Authorized Officer")
    doc.add_paragraph(entity_name)
    doc.add_paragraph(f"Date: {datetime.now().strftime('%B %d, %Y')}")

    out = Path(output_path)
    out.parent.mkdir(parents=True, exist_ok=True)
    doc.save(str(out))
    return str(out)


# ─── XLSX (admin working doc) ───────────────────────────────────────────


def generate_traffic_study_xlsx(
    *,
    study: dict,
    entity_name: str,
    output_path: str,
) -> Optional[str]:
    if Workbook is None:
        LOG.error("openpyxl not installed")
        return None

    wb = Workbook()
    default = wb.active
    wb.remove(default)

    # Summary
    ws = wb.create_sheet("Summary")
    ws["A1"] = f"Traffic Study — {entity_name}"
    ws["A1"].font = Font(bold=True, size=14, color="1A2744")
    ws["A2"] = f"{study['reporting_year']} {study['reporting_period']}"
    rows = [
        ("Total calls", study.get("total_calls") or 0),
        ("Total minutes", (study.get("total_minutes") or 0)),
        ("Total revenue (cents)", study.get("total_revenue_cents") or 0),
        ("", ""),
        ("Interstate % (revenue-weighted)", study.get("interstate_pct")),
        ("Intrastate % (revenue-weighted)", study.get("intrastate_pct")),
        ("International % (revenue-weighted)", study.get("international_pct")),
        ("Indeterminate % (revenue-weighted)", study.get("indeterminate_pct")),
        ("", ""),
        ("Interstate % (minutes-weighted)", study.get("interstate_pct_minutes")),
        ("Intrastate % (minutes-weighted)", study.get("intrastate_pct_minutes")),
        ("International % (minutes-weighted)", study.get("international_pct_minutes")),
        ("Indeterminate % (minutes-weighted)", study.get("indeterminate_pct_minutes")),
        ("", ""),
        ("Wholesale minutes (Block 3)", (study.get("wholesale_minutes") or 0) / 60),
        ("Retail minutes (Block 4-A)",  (study.get("retail_minutes") or 0) / 60),
    ]
    for i, (label, value) in enumerate(rows, start=4):
        ws.cell(row=i, column=1, value=label)
        ws.cell(row=i, column=2, value=value)
    ws.column_dimensions["A"].width = 45
    ws.column_dimensions["B"].width = 22

    # Regional breakdowns
    for sheet_name, key in [
        ("Block 5 — Orig State", "orig_state_regions_json"),
        ("Block 5 — Billing State", "billing_state_regions_json"),
    ]:
        rs = wb.create_sheet(sheet_name)
        rs.cell(row=1, column=1, value="Region").font = Font(bold=True)
        rs.cell(row=1, column=2, value="% of Total").font = Font(bold=True)
        regions = study.get(key) or {}
        for i, (name, pct) in enumerate(sorted(regions.items()), start=2):
            rs.cell(row=i, column=1, value=name)
            rs.cell(row=i, column=2, value=float(pct) if pct is not None else None)
            rs.cell(row=i, column=2).number_format = '0.00"%"'
        rs.column_dimensions["A"].width = 25
        rs.column_dimensions["B"].width = 15

    out = Path(output_path)
    out.parent.mkdir(parents=True, exist_ok=True)
    wb.save(str(out))
    return str(out)