Includes: API (Express/TypeScript), Astro site, Python workers, document generators, FCC compliance tools, Canada CRTC formation, Ansible infrastructure, and deployment scripts. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
175 lines
6.6 KiB
Python
175 lines
6.6 KiB
Python
"""Shared base + normalized row type for ICC revenue adapters.
|
|
|
|
Each concrete adapter parses a carrier-specific interconnection or
|
|
settlement artifact (CABS BOS, X12 EDI 810, iconectiv 8YY query report,
|
|
international settlement TAS, wholesale SIP CSV, etc.) into a stream of
|
|
``IccRevenueLine`` dataclass instances, which the ingester then bulk-
|
|
inserts into ``icc_revenue_lines``.
|
|
|
|
The contract mirrors ``scripts/workers/cdr_adapters/base.py``:
|
|
|
|
* ``BaseICCAdapter.iter_rows(local_path) -> Iterator[IccRevenueLine]``
|
|
* ``ValidationError`` is raised for a single malformed row; the ingester
|
|
catches it and increments ``rows_rejected`` without halting iteration.
|
|
* ``natural_key_hash`` produces a stable SHA-256 identity used as the
|
|
dedup key in ``icc_revenue_lines``.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import logging
|
|
import re
|
|
from dataclasses import dataclass, field
|
|
from typing import Any, Dict, Iterator, Optional
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class ValidationError(Exception):
|
|
"""Raised by an ICC adapter when a single row fails structural validation.
|
|
|
|
The ingester catches this per-row and increments ``rows_rejected`` on the
|
|
upload. ``reason_code`` is a short machine-friendly tag (e.g.
|
|
``"bad_revenue"``, ``"missing_ocn"``) used in logs + evidence payloads.
|
|
"""
|
|
|
|
def __init__(self, reason_code: str, detail: str = ""):
|
|
super().__init__(f"{reason_code}: {detail}")
|
|
self.reason_code = reason_code
|
|
self.detail = detail
|
|
|
|
|
|
@dataclass
|
|
class IccRevenueLine:
|
|
"""Normalized Inter-Carrier-Compensation revenue line, pre-classification.
|
|
|
|
The ``icc_category`` must be one of the enum values enforced by the
|
|
``icc_revenue_lines.icc_category`` CHECK constraint:
|
|
``term_switched_access`` | ``orig_switched_access`` | ``8yy_orig_access`` |
|
|
``transit`` | ``special_access`` | ``intl_settlement`` |
|
|
``wholesale_sip`` | ``access_stim`` | ``other``.
|
|
"""
|
|
|
|
profile_id: int
|
|
reporting_year: int
|
|
icc_category: str
|
|
counterparty_legal_name: str
|
|
revenue_cents: int # signed integer cents
|
|
reporting_quarter: Optional[int] = None # 1..4 or None for annual
|
|
counterparty_ocn: Optional[str] = None
|
|
counterparty_country: str = "US" # ISO-2; "US" covers domestic
|
|
minutes_of_use: Optional[int] = None
|
|
source_line_no: Optional[int] = None
|
|
raw_row: Dict[str, Any] = field(default_factory=dict)
|
|
|
|
def natural_key_hash(self) -> str:
|
|
"""Stable SHA-256 hex digest used as the per-row dedup key.
|
|
|
|
Built from the tuple of fields that together uniquely identify a
|
|
revenue line within a (profile, year, quarter) scope:
|
|
``icc_category | counterparty_ocn-or-name | year | quarter |
|
|
revenue_cents | minutes_of_use``.
|
|
"""
|
|
counterparty_tag = (self.counterparty_ocn or self.counterparty_legal_name or "").strip().upper()
|
|
basis = "|".join(
|
|
[
|
|
self.icc_category,
|
|
counterparty_tag,
|
|
str(self.reporting_year),
|
|
str(self.reporting_quarter if self.reporting_quarter is not None else ""),
|
|
str(self.revenue_cents),
|
|
str(self.minutes_of_use if self.minutes_of_use is not None else ""),
|
|
]
|
|
)
|
|
return hashlib.sha256(basis.encode("utf-8")).hexdigest()
|
|
|
|
|
|
class BaseICCAdapter:
|
|
"""Abstract ICC adapter. Subclasses implement ``iter_rows()``."""
|
|
|
|
SOURCE_FORMAT: str = "" # matches icc_ingestion_uploads.source_format
|
|
|
|
def __init__(self, profile_id: int, reporting_year: int):
|
|
self.profile_id = profile_id
|
|
self.reporting_year = reporting_year
|
|
|
|
# ------------------------------------------------------------------
|
|
# Abstract
|
|
# ------------------------------------------------------------------
|
|
|
|
def iter_rows(self, local_path: str) -> Iterator[IccRevenueLine]:
|
|
"""Yield one ``IccRevenueLine`` per revenue line in the artifact."""
|
|
raise NotImplementedError
|
|
|
|
# ------------------------------------------------------------------
|
|
# Helpers for subclasses
|
|
# ------------------------------------------------------------------
|
|
|
|
_CENTS_CLEAN_RE = re.compile(r"[\s,$]")
|
|
|
|
@classmethod
|
|
def parse_cents(cls, val: Any) -> int:
|
|
"""Convert a currency/number input into signed integer cents.
|
|
|
|
Accepts ``1234``, ``"1,234.56"``, ``"$1,234.56"``, ``" $-97.10 "``,
|
|
``"(123.45)"`` (accounting-negative parens), floats, Decimals.
|
|
Returns ``0`` on empty inputs. Raises :class:`ValidationError`
|
|
(``reason_code='bad_revenue'``) for garbage that can't be parsed.
|
|
"""
|
|
if val is None:
|
|
return 0
|
|
if isinstance(val, bool): # avoid treating True as 1
|
|
return 0
|
|
if isinstance(val, (int, float)):
|
|
return int(round(float(val) * 100))
|
|
s = str(val).strip()
|
|
if not s:
|
|
return 0
|
|
negative = False
|
|
if s.startswith("(") and s.endswith(")"):
|
|
negative = True
|
|
s = s[1:-1]
|
|
cleaned = cls._CENTS_CLEAN_RE.sub("", s)
|
|
# Strip trailing "CR" (credit) / "DR" (debit) suffixes seen in invoices
|
|
if cleaned.upper().endswith("CR"):
|
|
negative = True
|
|
cleaned = cleaned[:-2]
|
|
elif cleaned.upper().endswith("DR"):
|
|
cleaned = cleaned[:-2]
|
|
if cleaned in ("", "-", "+"):
|
|
return 0
|
|
try:
|
|
f = float(cleaned)
|
|
except ValueError as exc:
|
|
raise ValidationError("bad_revenue", f"unparseable revenue: {val!r}") from exc
|
|
if negative:
|
|
f = -abs(f)
|
|
return int(round(f * 100))
|
|
|
|
@staticmethod
|
|
def parse_int(val: Any, default: int = 0) -> int:
|
|
"""Convert a number-like value into an int with robust coercion.
|
|
|
|
Accepts ints, floats, numeric strings with commas/whitespace, and
|
|
empty/``None`` (→ ``default``). Raises :class:`ValidationError`
|
|
(``reason_code='bad_integer'``) for non-numeric garbage.
|
|
"""
|
|
if val is None:
|
|
return default
|
|
if isinstance(val, bool):
|
|
return default
|
|
if isinstance(val, int):
|
|
return val
|
|
if isinstance(val, float):
|
|
return int(val)
|
|
s = str(val).strip()
|
|
if not s:
|
|
return default
|
|
cleaned = s.replace(",", "").replace(" ", "")
|
|
if cleaned in ("-", "+"):
|
|
return default
|
|
try:
|
|
return int(float(cleaned))
|
|
except ValueError as exc:
|
|
raise ValidationError("bad_integer", f"unparseable int: {val!r}") from exc
|