new-site/scripts/workers/icc_adapters/common.py
justin f8cd37ac8c Initial commit — Performance West telecom compliance platform
Includes: API (Express/TypeScript), Astro site, Python workers,
document generators, FCC compliance tools, Canada CRTC formation,
Ansible infrastructure, and deployment scripts.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-27 06:54:22 -05:00

175 lines
6.6 KiB
Python

"""Shared base + normalized row type for ICC revenue adapters.
Each concrete adapter parses a carrier-specific interconnection or
settlement artifact (CABS BOS, X12 EDI 810, iconectiv 8YY query report,
international settlement TAS, wholesale SIP CSV, etc.) into a stream of
``IccRevenueLine`` dataclass instances, which the ingester then bulk-
inserts into ``icc_revenue_lines``.
The contract mirrors ``scripts/workers/cdr_adapters/base.py``:
* ``BaseICCAdapter.iter_rows(local_path) -> Iterator[IccRevenueLine]``
* ``ValidationError`` is raised for a single malformed row; the ingester
catches it and increments ``rows_rejected`` without halting iteration.
* ``natural_key_hash`` produces a stable SHA-256 identity used as the
dedup key in ``icc_revenue_lines``.
"""
from __future__ import annotations
import hashlib
import logging
import re
from dataclasses import dataclass, field
from typing import Any, Dict, Iterator, Optional
logger = logging.getLogger(__name__)
class ValidationError(Exception):
"""Raised by an ICC adapter when a single row fails structural validation.
The ingester catches this per-row and increments ``rows_rejected`` on the
upload. ``reason_code`` is a short machine-friendly tag (e.g.
``"bad_revenue"``, ``"missing_ocn"``) used in logs + evidence payloads.
"""
def __init__(self, reason_code: str, detail: str = ""):
super().__init__(f"{reason_code}: {detail}")
self.reason_code = reason_code
self.detail = detail
@dataclass
class IccRevenueLine:
"""Normalized Inter-Carrier-Compensation revenue line, pre-classification.
The ``icc_category`` must be one of the enum values enforced by the
``icc_revenue_lines.icc_category`` CHECK constraint:
``term_switched_access`` | ``orig_switched_access`` | ``8yy_orig_access`` |
``transit`` | ``special_access`` | ``intl_settlement`` |
``wholesale_sip`` | ``access_stim`` | ``other``.
"""
profile_id: int
reporting_year: int
icc_category: str
counterparty_legal_name: str
revenue_cents: int # signed integer cents
reporting_quarter: Optional[int] = None # 1..4 or None for annual
counterparty_ocn: Optional[str] = None
counterparty_country: str = "US" # ISO-2; "US" covers domestic
minutes_of_use: Optional[int] = None
source_line_no: Optional[int] = None
raw_row: Dict[str, Any] = field(default_factory=dict)
def natural_key_hash(self) -> str:
"""Stable SHA-256 hex digest used as the per-row dedup key.
Built from the tuple of fields that together uniquely identify a
revenue line within a (profile, year, quarter) scope:
``icc_category | counterparty_ocn-or-name | year | quarter |
revenue_cents | minutes_of_use``.
"""
counterparty_tag = (self.counterparty_ocn or self.counterparty_legal_name or "").strip().upper()
basis = "|".join(
[
self.icc_category,
counterparty_tag,
str(self.reporting_year),
str(self.reporting_quarter if self.reporting_quarter is not None else ""),
str(self.revenue_cents),
str(self.minutes_of_use if self.minutes_of_use is not None else ""),
]
)
return hashlib.sha256(basis.encode("utf-8")).hexdigest()
class BaseICCAdapter:
"""Abstract ICC adapter. Subclasses implement ``iter_rows()``."""
SOURCE_FORMAT: str = "" # matches icc_ingestion_uploads.source_format
def __init__(self, profile_id: int, reporting_year: int):
self.profile_id = profile_id
self.reporting_year = reporting_year
# ------------------------------------------------------------------
# Abstract
# ------------------------------------------------------------------
def iter_rows(self, local_path: str) -> Iterator[IccRevenueLine]:
"""Yield one ``IccRevenueLine`` per revenue line in the artifact."""
raise NotImplementedError
# ------------------------------------------------------------------
# Helpers for subclasses
# ------------------------------------------------------------------
_CENTS_CLEAN_RE = re.compile(r"[\s,$]")
@classmethod
def parse_cents(cls, val: Any) -> int:
"""Convert a currency/number input into signed integer cents.
Accepts ``1234``, ``"1,234.56"``, ``"$1,234.56"``, ``" $-97.10 "``,
``"(123.45)"`` (accounting-negative parens), floats, Decimals.
Returns ``0`` on empty inputs. Raises :class:`ValidationError`
(``reason_code='bad_revenue'``) for garbage that can't be parsed.
"""
if val is None:
return 0
if isinstance(val, bool): # avoid treating True as 1
return 0
if isinstance(val, (int, float)):
return int(round(float(val) * 100))
s = str(val).strip()
if not s:
return 0
negative = False
if s.startswith("(") and s.endswith(")"):
negative = True
s = s[1:-1]
cleaned = cls._CENTS_CLEAN_RE.sub("", s)
# Strip trailing "CR" (credit) / "DR" (debit) suffixes seen in invoices
if cleaned.upper().endswith("CR"):
negative = True
cleaned = cleaned[:-2]
elif cleaned.upper().endswith("DR"):
cleaned = cleaned[:-2]
if cleaned in ("", "-", "+"):
return 0
try:
f = float(cleaned)
except ValueError as exc:
raise ValidationError("bad_revenue", f"unparseable revenue: {val!r}") from exc
if negative:
f = -abs(f)
return int(round(f * 100))
@staticmethod
def parse_int(val: Any, default: int = 0) -> int:
"""Convert a number-like value into an int with robust coercion.
Accepts ints, floats, numeric strings with commas/whitespace, and
empty/``None`` (→ ``default``). Raises :class:`ValidationError`
(``reason_code='bad_integer'``) for non-numeric garbage.
"""
if val is None:
return default
if isinstance(val, bool):
return default
if isinstance(val, int):
return val
if isinstance(val, float):
return int(val)
s = str(val).strip()
if not s:
return default
cleaned = s.replace(",", "").replace(" ", "")
if cleaned in ("-", "+"):
return default
try:
return int(float(cleaned))
except ValueError as exc:
raise ValidationError("bad_integer", f"unparseable int: {val!r}") from exc