"""Shared base + normalized row type for ICC revenue adapters. Each concrete adapter parses a carrier-specific interconnection or settlement artifact (CABS BOS, X12 EDI 810, iconectiv 8YY query report, international settlement TAS, wholesale SIP CSV, etc.) into a stream of ``IccRevenueLine`` dataclass instances, which the ingester then bulk- inserts into ``icc_revenue_lines``. The contract mirrors ``scripts/workers/cdr_adapters/base.py``: * ``BaseICCAdapter.iter_rows(local_path) -> Iterator[IccRevenueLine]`` * ``ValidationError`` is raised for a single malformed row; the ingester catches it and increments ``rows_rejected`` without halting iteration. * ``natural_key_hash`` produces a stable SHA-256 identity used as the dedup key in ``icc_revenue_lines``. """ from __future__ import annotations import hashlib import logging import re from dataclasses import dataclass, field from typing import Any, Dict, Iterator, Optional logger = logging.getLogger(__name__) class ValidationError(Exception): """Raised by an ICC adapter when a single row fails structural validation. The ingester catches this per-row and increments ``rows_rejected`` on the upload. ``reason_code`` is a short machine-friendly tag (e.g. ``"bad_revenue"``, ``"missing_ocn"``) used in logs + evidence payloads. """ def __init__(self, reason_code: str, detail: str = ""): super().__init__(f"{reason_code}: {detail}") self.reason_code = reason_code self.detail = detail @dataclass class IccRevenueLine: """Normalized Inter-Carrier-Compensation revenue line, pre-classification. The ``icc_category`` must be one of the enum values enforced by the ``icc_revenue_lines.icc_category`` CHECK constraint: ``term_switched_access`` | ``orig_switched_access`` | ``8yy_orig_access`` | ``transit`` | ``special_access`` | ``intl_settlement`` | ``wholesale_sip`` | ``access_stim`` | ``other``. """ profile_id: int reporting_year: int icc_category: str counterparty_legal_name: str revenue_cents: int # signed integer cents reporting_quarter: Optional[int] = None # 1..4 or None for annual counterparty_ocn: Optional[str] = None counterparty_country: str = "US" # ISO-2; "US" covers domestic minutes_of_use: Optional[int] = None source_line_no: Optional[int] = None raw_row: Dict[str, Any] = field(default_factory=dict) def natural_key_hash(self) -> str: """Stable SHA-256 hex digest used as the per-row dedup key. Built from the tuple of fields that together uniquely identify a revenue line within a (profile, year, quarter) scope: ``icc_category | counterparty_ocn-or-name | year | quarter | revenue_cents | minutes_of_use``. """ counterparty_tag = (self.counterparty_ocn or self.counterparty_legal_name or "").strip().upper() basis = "|".join( [ self.icc_category, counterparty_tag, str(self.reporting_year), str(self.reporting_quarter if self.reporting_quarter is not None else ""), str(self.revenue_cents), str(self.minutes_of_use if self.minutes_of_use is not None else ""), ] ) return hashlib.sha256(basis.encode("utf-8")).hexdigest() class BaseICCAdapter: """Abstract ICC adapter. Subclasses implement ``iter_rows()``.""" SOURCE_FORMAT: str = "" # matches icc_ingestion_uploads.source_format def __init__(self, profile_id: int, reporting_year: int): self.profile_id = profile_id self.reporting_year = reporting_year # ------------------------------------------------------------------ # Abstract # ------------------------------------------------------------------ def iter_rows(self, local_path: str) -> Iterator[IccRevenueLine]: """Yield one ``IccRevenueLine`` per revenue line in the artifact.""" raise NotImplementedError # ------------------------------------------------------------------ # Helpers for subclasses # ------------------------------------------------------------------ _CENTS_CLEAN_RE = re.compile(r"[\s,$]") @classmethod def parse_cents(cls, val: Any) -> int: """Convert a currency/number input into signed integer cents. Accepts ``1234``, ``"1,234.56"``, ``"$1,234.56"``, ``" $-97.10 "``, ``"(123.45)"`` (accounting-negative parens), floats, Decimals. Returns ``0`` on empty inputs. Raises :class:`ValidationError` (``reason_code='bad_revenue'``) for garbage that can't be parsed. """ if val is None: return 0 if isinstance(val, bool): # avoid treating True as 1 return 0 if isinstance(val, (int, float)): return int(round(float(val) * 100)) s = str(val).strip() if not s: return 0 negative = False if s.startswith("(") and s.endswith(")"): negative = True s = s[1:-1] cleaned = cls._CENTS_CLEAN_RE.sub("", s) # Strip trailing "CR" (credit) / "DR" (debit) suffixes seen in invoices if cleaned.upper().endswith("CR"): negative = True cleaned = cleaned[:-2] elif cleaned.upper().endswith("DR"): cleaned = cleaned[:-2] if cleaned in ("", "-", "+"): return 0 try: f = float(cleaned) except ValueError as exc: raise ValidationError("bad_revenue", f"unparseable revenue: {val!r}") from exc if negative: f = -abs(f) return int(round(f * 100)) @staticmethod def parse_int(val: Any, default: int = 0) -> int: """Convert a number-like value into an int with robust coercion. Accepts ints, floats, numeric strings with commas/whitespace, and empty/``None`` (→ ``default``). Raises :class:`ValidationError` (``reason_code='bad_integer'``) for non-numeric garbage. """ if val is None: return default if isinstance(val, bool): return default if isinstance(val, int): return val if isinstance(val, float): return int(val) s = str(val).strip() if not s: return default cleaned = s.replace(",", "").replace(" ", "") if cleaned in ("-", "+"): return default try: return int(float(cleaned)) except ValueError as exc: raise ValidationError("bad_integer", f"unparseable int: {val!r}") from exc