new-site/scripts/workers/icc_adapters/international_settlement.py
justin f8cd37ac8c Initial commit — Performance West telecom compliance platform
Includes: API (Express/TypeScript), Astro site, Python workers,
document generators, FCC compliance tools, Canada CRTC formation,
Ansible infrastructure, and deployment scripts.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-27 06:54:22 -05:00

300 lines
12 KiB
Python

"""International carrier settlement adapter (ITU TAS + IC&SS).
Parses international inter-carrier settlement statements from either
the ITU-T D.150-compliant Traffic and Accounting Statement (TAS) tab-
delimited export, or the legacy IC&SS (International Carrier Settlement
& Statement) fixed-width format used by older clearinghouses.
ITU TAS — tab-delimited
-----------------------
First row is a header; required columns (case-insensitive, leading/trailing
whitespace ignored):
destination_country ISO-2 or country name
outbound_minutes integer
outbound_revenue_usd decimal
inbound_minutes integer
inbound_revenue_usd decimal
Each row yields **two** ``IccRevenueLine`` records — one for outbound
(revenue we *paid* the partner, which reduces net intl_settlement) and
one for inbound (revenue we *received*). Both are stored with
``icc_category='intl_settlement'`` and ``counterparty_country=<ISO-2>``;
the ``raw_row.direction`` tag distinguishes them for downstream analytics.
Outbound revenue is stored as a **negative** signed integer so that sum()
on the category yields net settlement.
IC&SS — fixed-width
-------------------
Record layout (positions 1-based):
01..02 record type ("SR" = settlement row)
03..05 destination country (ISO-3)
06..15 outbound minutes (10 digits)
16..27 outbound revenue, implied 2 decimals (12 digits)
28..37 inbound minutes
38..49 inbound revenue, implied 2 decimals
50..55 period YYMMDD
Detection
---------
Layout is sniffed from the first non-blank line: presence of a literal
``\\t`` tab character and an alphabetic header → TAS; otherwise IC&SS.
Deferred
--------
* Traffic-termination rate reconciliation (expected-vs-actual)
* ISO-3 to ISO-2 mapping table beyond the small built-in set — unknown
ISO-3 codes are stored raw in ``raw_row.iso3`` and ``counterparty_country``
is set to the first two characters as a best-effort fallback
"""
from __future__ import annotations
import csv
import logging
from typing import Iterator, Optional
from .common import BaseICCAdapter, IccRevenueLine, ValidationError
logger = logging.getLogger(__name__)
# Minimal ISO-3 → ISO-2 mapping for the most common settlement partners.
# Extend via an external table when the list outgrows readability.
_ISO3_TO_ISO2 = {
"USA": "US", "CAN": "CA", "MEX": "MX", "GBR": "GB", "DEU": "DE",
"FRA": "FR", "ITA": "IT", "ESP": "ES", "NLD": "NL", "BEL": "BE",
"CHE": "CH", "AUT": "AT", "SWE": "SE", "NOR": "NO", "DNK": "DK",
"FIN": "FI", "IRL": "IE", "POL": "PL", "PRT": "PT", "JPN": "JP",
"KOR": "KR", "CHN": "CN", "HKG": "HK", "TWN": "TW", "SGP": "SG",
"AUS": "AU", "NZL": "NZ", "IND": "IN", "BRA": "BR", "ARG": "AR",
"CHL": "CL", "COL": "CO", "PER": "PE", "ZAF": "ZA", "EGY": "EG",
"ISR": "IL", "ARE": "AE", "SAU": "SA", "TUR": "TR", "RUS": "RU",
"UKR": "UA", "PHL": "PH", "IDN": "ID", "THA": "TH", "VNM": "VN",
}
class InternationalSettlementAdapter(BaseICCAdapter):
SOURCE_FORMAT_TAS = "itu_tas"
SOURCE_FORMAT_ICSS = "icss"
# Generic SOURCE_FORMAT left blank; the adapter is registered under
# both slugs in ``__init__.py`` and sniffs per-file.
SOURCE_FORMAT = "itu_tas"
def iter_rows(self, local_path: str) -> Iterator[IccRevenueLine]:
layout = self._sniff_layout(local_path)
if layout == "tas":
yield from self._iter_tas(local_path)
else:
yield from self._iter_icss(local_path)
@staticmethod
def _sniff_layout(path: str) -> str:
with open(path, "r", encoding="latin-1", errors="replace") as fh:
for line in fh:
stripped = line.strip()
if not stripped:
continue
if "\t" in line:
return "tas"
return "icss"
return "tas" # empty file → treat as TAS no-rows
# ------------------------------------------------------------------
# TAS — tab-delimited
# ------------------------------------------------------------------
_TAS_REQUIRED = (
"destination_country",
"outbound_minutes",
"outbound_revenue_usd",
"inbound_minutes",
"inbound_revenue_usd",
)
def _iter_tas(self, local_path: str) -> Iterator[IccRevenueLine]:
with open(local_path, "r", encoding="utf-8", errors="replace", newline="") as fh:
reader = csv.DictReader(fh, delimiter="\t")
# Normalize header keys to lowercase/stripped
if not reader.fieldnames:
return
header_map = {name: name.strip().lower() for name in reader.fieldnames}
missing = [
req for req in self._TAS_REQUIRED
if req not in header_map.values()
]
if missing:
raise ValidationError(
"bad_header",
f"ITU TAS file missing required columns: {missing}",
)
# Build lookup: canonical_name → raw_column_name
canon = {v: k for k, v in header_map.items()}
for i, raw in enumerate(reader, start=2): # +1 for 1-index, +1 for header
country = (raw.get(canon["destination_country"]) or "").strip()
if not country:
raise ValidationError("missing_country", f"TAS row {i} has no country")
iso2 = self._country_to_iso2(country)
try:
out_min = self.parse_int(raw.get(canon["outbound_minutes"]))
out_rev = self.parse_cents(raw.get(canon["outbound_revenue_usd"]))
in_min = self.parse_int(raw.get(canon["inbound_minutes"]))
in_rev = self.parse_cents(raw.get(canon["inbound_revenue_usd"]))
except ValidationError:
raise
# Outbound = we pay partner → negative net
yield IccRevenueLine(
profile_id=self.profile_id,
reporting_year=self.reporting_year,
icc_category="intl_settlement",
counterparty_legal_name=country,
counterparty_ocn=None,
counterparty_country=iso2,
revenue_cents=-out_rev,
minutes_of_use=out_min or None,
source_line_no=i,
raw_row={
"direction": "outbound",
"country_raw": country,
"iso2": iso2,
"outbound_minutes": out_min,
"outbound_revenue_usd": raw.get(canon["outbound_revenue_usd"]),
},
)
# Inbound = we receive revenue → positive
yield IccRevenueLine(
profile_id=self.profile_id,
reporting_year=self.reporting_year,
icc_category="intl_settlement",
counterparty_legal_name=country,
counterparty_ocn=None,
counterparty_country=iso2,
revenue_cents=in_rev,
minutes_of_use=in_min or None,
source_line_no=i,
raw_row={
"direction": "inbound",
"country_raw": country,
"iso2": iso2,
"inbound_minutes": in_min,
"inbound_revenue_usd": raw.get(canon["inbound_revenue_usd"]),
},
)
# ------------------------------------------------------------------
# IC&SS — fixed-width
# ------------------------------------------------------------------
_ICSS_REC = (1, 2)
_ICSS_COUNTRY = (3, 5)
_ICSS_OUT_MIN = (6, 15)
_ICSS_OUT_REV = (16, 27)
_ICSS_IN_MIN = (28, 37)
_ICSS_IN_REV = (38, 49)
_ICSS_PERIOD = (50, 55)
@staticmethod
def _slice(line: str, span: tuple[int, int]) -> str:
start, end = span
return line[start - 1:end]
@staticmethod
def _implied_cents(raw: str) -> int:
cleaned = raw.strip()
if not cleaned:
return 0
negative = False
if cleaned[0] in "+-":
negative = cleaned[0] == "-"
cleaned = cleaned[1:]
if not cleaned.isdigit():
raise ValidationError("bad_revenue", f"ICSS revenue non-numeric: {raw!r}")
cents = int(cleaned)
return -cents if negative else cents
@staticmethod
def _month_to_quarter(month: int) -> Optional[int]:
if 1 <= month <= 3:
return 1
if 4 <= month <= 6:
return 2
if 7 <= month <= 9:
return 3
if 10 <= month <= 12:
return 4
return None
def _iter_icss(self, local_path: str) -> Iterator[IccRevenueLine]:
with open(local_path, "r", encoding="latin-1", errors="replace") as fh:
for lineno, line in enumerate(fh, start=1):
stripped = line.rstrip("\r\n")
if len(stripped) < 55:
continue
rec = self._slice(stripped, self._ICSS_REC).strip().upper()
if rec != "SR":
continue
iso3 = self._slice(stripped, self._ICSS_COUNTRY).strip().upper()
out_min = self.parse_int(self._slice(stripped, self._ICSS_OUT_MIN))
out_rev = self._implied_cents(self._slice(stripped, self._ICSS_OUT_REV))
in_min = self.parse_int(self._slice(stripped, self._ICSS_IN_MIN))
in_rev = self._implied_cents(self._slice(stripped, self._ICSS_IN_REV))
period = self._slice(stripped, self._ICSS_PERIOD).strip()
quarter = None
if len(period) >= 4:
try:
quarter = self._month_to_quarter(int(period[2:4]))
except ValueError:
quarter = None
iso2 = _ISO3_TO_ISO2.get(iso3, iso3[:2] if iso3 else "US")
yield IccRevenueLine(
profile_id=self.profile_id,
reporting_year=self.reporting_year,
reporting_quarter=quarter,
icc_category="intl_settlement",
counterparty_legal_name=iso3,
counterparty_ocn=None,
counterparty_country=iso2,
revenue_cents=-out_rev,
minutes_of_use=out_min or None,
source_line_no=lineno,
raw_row={
"direction": "outbound",
"iso3": iso3,
"iso2": iso2,
"period": period,
},
)
yield IccRevenueLine(
profile_id=self.profile_id,
reporting_year=self.reporting_year,
reporting_quarter=quarter,
icc_category="intl_settlement",
counterparty_legal_name=iso3,
counterparty_ocn=None,
counterparty_country=iso2,
revenue_cents=in_rev,
minutes_of_use=in_min or None,
source_line_no=lineno,
raw_row={
"direction": "inbound",
"iso3": iso3,
"iso2": iso2,
"period": period,
},
)
# ------------------------------------------------------------------
# Country normalization
# ------------------------------------------------------------------
@staticmethod
def _country_to_iso2(value: str) -> str:
v = value.strip().upper()
if len(v) == 2 and v.isalpha():
return v
if len(v) == 3 and v in _ISO3_TO_ISO2:
return _ISO3_TO_ISO2[v]
# Fall back to first two alphabetic characters
letters = "".join(ch for ch in v if ch.isalpha())[:2]
return letters or "US"