"""iconectiv 8YY Query Report adapter. Parses the monthly 8YY query-counts-and-revenue report produced by iconectiv (formerly Ericsson / Somos toll-free number administration). The report drives the database-query component of 8YY originating access charges — each query to the 8YY routing database is billable at a tariff-specified per-query rate. Supported layouts ----------------- * **XML** (default, `.xml`): ```` root with a single ```` followed by N ```` elements, each containing ````, ```` (optional), ```` (integer count), and ```` (decimal USD). * **QRY pipe-delimited** (`.qry`): one record per line, fields separated by ``|``: ``OCN|QUERIES|REVENUE|PERIOD`` where PERIOD is ``YYYYMM``. Header row ``OCN|QUERIES|REVENUE|PERIOD`` is tolerated and skipped. Output ------ Every record yields ``icc_category='8yy_orig_access'``, ``minutes_of_use=None`` (8YY database queries are discrete events, not duration), ``counterparty_country='US'``. Reporting quarter is derived from the record's period month. Deferred -------- * XML schema validation / multiple ```` blocks per file * Multi-currency revenue (assumes USD) * Query-type breakdown (POTS Dip vs LRN Dip) — treated as one category """ from __future__ import annotations import csv import logging import os from typing import Iterator from .common import BaseICCAdapter, IccRevenueLine, ValidationError logger = logging.getLogger(__name__) try: from lxml import etree as _etree # pragma: no cover _USE_LXML = True except ImportError: import xml.etree.ElementTree as _etree _USE_LXML = False class IconectivQRYAdapter(BaseICCAdapter): SOURCE_FORMAT = "8yy_qry" @staticmethod def _month_to_quarter(month: int) -> int | None: if 1 <= month <= 3: return 1 if 4 <= month <= 6: return 2 if 7 <= month <= 9: return 3 if 10 <= month <= 12: return 4 return None def iter_rows(self, local_path: str) -> Iterator[IccRevenueLine]: ext = os.path.splitext(local_path)[1].lower() if ext in (".xml",): yield from self._iter_xml(local_path) return # Sniff by content when extension is unknown with open(local_path, "rb") as fh: head = fh.read(256).lstrip() if head.startswith(b"<"): yield from self._iter_xml(local_path) else: yield from self._iter_pipe(local_path) # ------------------------------------------------------------------ # XML layout # ------------------------------------------------------------------ def _iter_xml(self, local_path: str) -> Iterator[IccRevenueLine]: try: tree = _etree.parse(local_path) except Exception as exc: raise ValidationError("bad_xml", f"could not parse XML: {exc}") from exc root = tree.getroot() period_el = root.find(".//Period") period_year = self.reporting_year period_month: int | None = None if period_el is not None: try: period_year = int(period_el.get("year") or self.reporting_year) except ValueError: period_year = self.reporting_year try: pm = period_el.get("month") period_month = int(pm) if pm else None except ValueError: period_month = None quarter = self._month_to_quarter(period_month) if period_month else None lineno = 0 for q in root.iter("Query"): lineno += 1 ocn_el = q.find("OCN") rev_el = q.find("Revenue") queries_el = q.find("Queries") name_el = q.find("CarrierName") ocn = (ocn_el.text or "").strip() if ocn_el is not None else "" revenue_text = (rev_el.text or "").strip() if rev_el is not None else "" queries_text = (queries_el.text or "").strip() if queries_el is not None else "" name = (name_el.text or "").strip() if name_el is not None else "" if not ocn and not name: raise ValidationError("missing_ocn", f"Query row {lineno} has no OCN or CarrierName") try: revenue_cents = self.parse_cents(revenue_text) query_count = self.parse_int(queries_text) if queries_text else None except ValidationError: raise yield IccRevenueLine( profile_id=self.profile_id, reporting_year=period_year, reporting_quarter=quarter, icc_category="8yy_orig_access", counterparty_legal_name=name or ocn, counterparty_ocn=ocn or None, counterparty_country="US", revenue_cents=revenue_cents, minutes_of_use=None, source_line_no=lineno, raw_row={ "ocn": ocn, "name": name, "queries": query_count, "revenue_raw": revenue_text, "period_year": period_year, "period_month": period_month, }, ) # ------------------------------------------------------------------ # Pipe-delimited layout # ------------------------------------------------------------------ def _iter_pipe(self, local_path: str) -> Iterator[IccRevenueLine]: with open(local_path, "r", encoding="utf-8", errors="replace", newline="") as fh: reader = csv.reader(fh, delimiter="|") for i, row in enumerate(reader, start=1): if not row or not any(cell.strip() for cell in row): continue # Skip the header row if i == 1 and row[0].strip().upper() == "OCN": continue if len(row) < 4: raise ValidationError( "bad_row", f"8yy QRY row {i} has {len(row)} fields, expected 4", ) ocn = row[0].strip() queries_raw = row[1].strip() revenue_raw = row[2].strip() period_raw = row[3].strip() period_year = self.reporting_year period_month: int | None = None if len(period_raw) >= 6: try: period_year = int(period_raw[:4]) period_month = int(period_raw[4:6]) except ValueError: pass quarter = self._month_to_quarter(period_month) if period_month else None yield IccRevenueLine( profile_id=self.profile_id, reporting_year=period_year, reporting_quarter=quarter, icc_category="8yy_orig_access", counterparty_legal_name=ocn, counterparty_ocn=ocn or None, counterparty_country="US", revenue_cents=self.parse_cents(revenue_raw), minutes_of_use=None, source_line_no=i, raw_row={ "ocn": ocn, "queries_raw": queries_raw, "revenue_raw": revenue_raw, "period_raw": period_raw, }, )