"""International carrier settlement adapter (ITU TAS + IC&SS). Parses international inter-carrier settlement statements from either the ITU-T D.150-compliant Traffic and Accounting Statement (TAS) tab- delimited export, or the legacy IC&SS (International Carrier Settlement & Statement) fixed-width format used by older clearinghouses. ITU TAS — tab-delimited ----------------------- First row is a header; required columns (case-insensitive, leading/trailing whitespace ignored): destination_country ISO-2 or country name outbound_minutes integer outbound_revenue_usd decimal inbound_minutes integer inbound_revenue_usd decimal Each row yields **two** ``IccRevenueLine`` records — one for outbound (revenue we *paid* the partner, which reduces net intl_settlement) and one for inbound (revenue we *received*). Both are stored with ``icc_category='intl_settlement'`` and ``counterparty_country=``; the ``raw_row.direction`` tag distinguishes them for downstream analytics. Outbound revenue is stored as a **negative** signed integer so that sum() on the category yields net settlement. IC&SS — fixed-width ------------------- Record layout (positions 1-based): 01..02 record type ("SR" = settlement row) 03..05 destination country (ISO-3) 06..15 outbound minutes (10 digits) 16..27 outbound revenue, implied 2 decimals (12 digits) 28..37 inbound minutes 38..49 inbound revenue, implied 2 decimals 50..55 period YYMMDD Detection --------- Layout is sniffed from the first non-blank line: presence of a literal ``\\t`` tab character and an alphabetic header → TAS; otherwise IC&SS. Deferred -------- * Traffic-termination rate reconciliation (expected-vs-actual) * ISO-3 to ISO-2 mapping table beyond the small built-in set — unknown ISO-3 codes are stored raw in ``raw_row.iso3`` and ``counterparty_country`` is set to the first two characters as a best-effort fallback """ from __future__ import annotations import csv import logging from typing import Iterator, Optional from .common import BaseICCAdapter, IccRevenueLine, ValidationError logger = logging.getLogger(__name__) # Minimal ISO-3 → ISO-2 mapping for the most common settlement partners. # Extend via an external table when the list outgrows readability. _ISO3_TO_ISO2 = { "USA": "US", "CAN": "CA", "MEX": "MX", "GBR": "GB", "DEU": "DE", "FRA": "FR", "ITA": "IT", "ESP": "ES", "NLD": "NL", "BEL": "BE", "CHE": "CH", "AUT": "AT", "SWE": "SE", "NOR": "NO", "DNK": "DK", "FIN": "FI", "IRL": "IE", "POL": "PL", "PRT": "PT", "JPN": "JP", "KOR": "KR", "CHN": "CN", "HKG": "HK", "TWN": "TW", "SGP": "SG", "AUS": "AU", "NZL": "NZ", "IND": "IN", "BRA": "BR", "ARG": "AR", "CHL": "CL", "COL": "CO", "PER": "PE", "ZAF": "ZA", "EGY": "EG", "ISR": "IL", "ARE": "AE", "SAU": "SA", "TUR": "TR", "RUS": "RU", "UKR": "UA", "PHL": "PH", "IDN": "ID", "THA": "TH", "VNM": "VN", } class InternationalSettlementAdapter(BaseICCAdapter): SOURCE_FORMAT_TAS = "itu_tas" SOURCE_FORMAT_ICSS = "icss" # Generic SOURCE_FORMAT left blank; the adapter is registered under # both slugs in ``__init__.py`` and sniffs per-file. SOURCE_FORMAT = "itu_tas" def iter_rows(self, local_path: str) -> Iterator[IccRevenueLine]: layout = self._sniff_layout(local_path) if layout == "tas": yield from self._iter_tas(local_path) else: yield from self._iter_icss(local_path) @staticmethod def _sniff_layout(path: str) -> str: with open(path, "r", encoding="latin-1", errors="replace") as fh: for line in fh: stripped = line.strip() if not stripped: continue if "\t" in line: return "tas" return "icss" return "tas" # empty file → treat as TAS no-rows # ------------------------------------------------------------------ # TAS — tab-delimited # ------------------------------------------------------------------ _TAS_REQUIRED = ( "destination_country", "outbound_minutes", "outbound_revenue_usd", "inbound_minutes", "inbound_revenue_usd", ) def _iter_tas(self, local_path: str) -> Iterator[IccRevenueLine]: with open(local_path, "r", encoding="utf-8", errors="replace", newline="") as fh: reader = csv.DictReader(fh, delimiter="\t") # Normalize header keys to lowercase/stripped if not reader.fieldnames: return header_map = {name: name.strip().lower() for name in reader.fieldnames} missing = [ req for req in self._TAS_REQUIRED if req not in header_map.values() ] if missing: raise ValidationError( "bad_header", f"ITU TAS file missing required columns: {missing}", ) # Build lookup: canonical_name → raw_column_name canon = {v: k for k, v in header_map.items()} for i, raw in enumerate(reader, start=2): # +1 for 1-index, +1 for header country = (raw.get(canon["destination_country"]) or "").strip() if not country: raise ValidationError("missing_country", f"TAS row {i} has no country") iso2 = self._country_to_iso2(country) try: out_min = self.parse_int(raw.get(canon["outbound_minutes"])) out_rev = self.parse_cents(raw.get(canon["outbound_revenue_usd"])) in_min = self.parse_int(raw.get(canon["inbound_minutes"])) in_rev = self.parse_cents(raw.get(canon["inbound_revenue_usd"])) except ValidationError: raise # Outbound = we pay partner → negative net yield IccRevenueLine( profile_id=self.profile_id, reporting_year=self.reporting_year, icc_category="intl_settlement", counterparty_legal_name=country, counterparty_ocn=None, counterparty_country=iso2, revenue_cents=-out_rev, minutes_of_use=out_min or None, source_line_no=i, raw_row={ "direction": "outbound", "country_raw": country, "iso2": iso2, "outbound_minutes": out_min, "outbound_revenue_usd": raw.get(canon["outbound_revenue_usd"]), }, ) # Inbound = we receive revenue → positive yield IccRevenueLine( profile_id=self.profile_id, reporting_year=self.reporting_year, icc_category="intl_settlement", counterparty_legal_name=country, counterparty_ocn=None, counterparty_country=iso2, revenue_cents=in_rev, minutes_of_use=in_min or None, source_line_no=i, raw_row={ "direction": "inbound", "country_raw": country, "iso2": iso2, "inbound_minutes": in_min, "inbound_revenue_usd": raw.get(canon["inbound_revenue_usd"]), }, ) # ------------------------------------------------------------------ # IC&SS — fixed-width # ------------------------------------------------------------------ _ICSS_REC = (1, 2) _ICSS_COUNTRY = (3, 5) _ICSS_OUT_MIN = (6, 15) _ICSS_OUT_REV = (16, 27) _ICSS_IN_MIN = (28, 37) _ICSS_IN_REV = (38, 49) _ICSS_PERIOD = (50, 55) @staticmethod def _slice(line: str, span: tuple[int, int]) -> str: start, end = span return line[start - 1:end] @staticmethod def _implied_cents(raw: str) -> int: cleaned = raw.strip() if not cleaned: return 0 negative = False if cleaned[0] in "+-": negative = cleaned[0] == "-" cleaned = cleaned[1:] if not cleaned.isdigit(): raise ValidationError("bad_revenue", f"ICSS revenue non-numeric: {raw!r}") cents = int(cleaned) return -cents if negative else cents @staticmethod def _month_to_quarter(month: int) -> Optional[int]: if 1 <= month <= 3: return 1 if 4 <= month <= 6: return 2 if 7 <= month <= 9: return 3 if 10 <= month <= 12: return 4 return None def _iter_icss(self, local_path: str) -> Iterator[IccRevenueLine]: with open(local_path, "r", encoding="latin-1", errors="replace") as fh: for lineno, line in enumerate(fh, start=1): stripped = line.rstrip("\r\n") if len(stripped) < 55: continue rec = self._slice(stripped, self._ICSS_REC).strip().upper() if rec != "SR": continue iso3 = self._slice(stripped, self._ICSS_COUNTRY).strip().upper() out_min = self.parse_int(self._slice(stripped, self._ICSS_OUT_MIN)) out_rev = self._implied_cents(self._slice(stripped, self._ICSS_OUT_REV)) in_min = self.parse_int(self._slice(stripped, self._ICSS_IN_MIN)) in_rev = self._implied_cents(self._slice(stripped, self._ICSS_IN_REV)) period = self._slice(stripped, self._ICSS_PERIOD).strip() quarter = None if len(period) >= 4: try: quarter = self._month_to_quarter(int(period[2:4])) except ValueError: quarter = None iso2 = _ISO3_TO_ISO2.get(iso3, iso3[:2] if iso3 else "US") yield IccRevenueLine( profile_id=self.profile_id, reporting_year=self.reporting_year, reporting_quarter=quarter, icc_category="intl_settlement", counterparty_legal_name=iso3, counterparty_ocn=None, counterparty_country=iso2, revenue_cents=-out_rev, minutes_of_use=out_min or None, source_line_no=lineno, raw_row={ "direction": "outbound", "iso3": iso3, "iso2": iso2, "period": period, }, ) yield IccRevenueLine( profile_id=self.profile_id, reporting_year=self.reporting_year, reporting_quarter=quarter, icc_category="intl_settlement", counterparty_legal_name=iso3, counterparty_ocn=None, counterparty_country=iso2, revenue_cents=in_rev, minutes_of_use=in_min or None, source_line_no=lineno, raw_row={ "direction": "inbound", "iso3": iso3, "iso2": iso2, "period": period, }, ) # ------------------------------------------------------------------ # Country normalization # ------------------------------------------------------------------ @staticmethod def _country_to_iso2(value: str) -> str: v = value.strip().upper() if len(v) == 2 and v.isalpha(): return v if len(v) == 3 and v in _ISO3_TO_ISO2: return _ISO3_TO_ISO2[v] # Fall back to first two alphabetic characters letters = "".join(ch for ch in v if ch.isalpha())[:2] return letters or "US"