new-site/scripts/workers/cdr_adapters/generic_csv.py
justin f8cd37ac8c Initial commit — Performance West telecom compliance platform
Includes: API (Express/TypeScript), Astro site, Python workers,
document generators, FCC compliance tools, Canada CRTC formation,
Ansible infrastructure, and deployment scripts.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-27 06:54:22 -05:00

131 lines
5.4 KiB
Python

"""Generic CSV adapter — configurable column mapping.
For switches that don't match any of the specific presets or for
customers whose mediation layer emits a custom CSV, the profile stores
a column mapping in ``format_config`` JSONB and this adapter maps it
into the normalized CDR row.
Example ``format_config`` (set by customer via portal):
{
"start_time": "call_date",
"caller_number": "source",
"called_number": "destination",
"duration_sec": "billsec",
"billed_amount": "charge_usd",
"trunk_group": "trunk",
"account_id": "accountcode",
"direction": "direction",
"disposition": "disposition",
"customer_type_override": "cust_type",
"call_id": "uniqueid",
"ts_format": "%Y-%m-%d %H:%M:%S",
"encoding": "utf-8",
"delimiter": ","
}
"""
from __future__ import annotations
import csv
import logging
from datetime import datetime
from typing import Iterator
from .base import BaseCDRAdapter, CDRRow, ValidationError
logger = logging.getLogger(__name__)
class GenericCSVAdapter(BaseCDRAdapter):
FORMAT_SLUG = "generic_csv"
# Required mapping keys: the profile's format_config MUST name a source
# column for each of these.
REQUIRED_MAPPING_KEYS = (
"start_time", "caller_number", "called_number", "duration_sec",
)
OPTIONAL_MAPPING_KEYS = (
"billed_amount", "trunk_group", "account_id", "direction",
"disposition", "customer_type_override", "call_id",
)
def _check_mapping(self) -> None:
missing = [k for k in self.REQUIRED_MAPPING_KEYS if not self.profile_config.get(k)]
if missing:
raise ValidationError(
"bad_mapping",
f"generic_csv profile config missing required keys: {missing}",
)
def iter_rows(self, local_path: str) -> Iterator[CDRRow]:
self._check_mapping()
cfg = self.profile_config
encoding = cfg.get("encoding", "utf-8")
delimiter = cfg.get("delimiter", ",")
ts_format = cfg.get("ts_format")
col = {
"start_time": cfg["start_time"],
"caller_number": cfg["caller_number"],
"called_number": cfg["called_number"],
"duration_sec": cfg["duration_sec"],
}
# Optional source column names — None when not mapped
opt = {k: cfg.get(k) for k in self.OPTIONAL_MAPPING_KEYS}
with open(local_path, "r", encoding=encoding, errors="replace", newline="") as fh:
reader = csv.DictReader(fh, delimiter=delimiter)
for i, raw_row in enumerate(reader, start=1):
try:
start_time = self.parse_ts(raw_row.get(col["start_time"]), ts_format)
duration = self.parse_duration(raw_row.get(col["duration_sec"]))
caller = (raw_row.get(col["caller_number"]) or "").strip()
called = (raw_row.get(col["called_number"]) or "").strip()
row = CDRRow(
start_time=start_time,
caller_number=caller,
called_number=called,
duration_sec=duration,
billed_amount_cents=(
self.parse_cents(raw_row.get(opt["billed_amount"]))
if opt.get("billed_amount") else None
),
billed_currency=(cfg.get("currency", "USD")
if opt.get("billed_amount") else None),
trunk_group_id=(
raw_row.get(opt["trunk_group"]).strip()
if opt.get("trunk_group") and raw_row.get(opt["trunk_group"]) else None
),
customer_account_id=(
raw_row.get(opt["account_id"]).strip()
if opt.get("account_id") and raw_row.get(opt["account_id"]) else None
),
call_direction=(
(raw_row.get(opt["direction"]) or "").strip().lower() or None
if opt.get("direction") else None
),
disposition=(
(raw_row.get(opt["disposition"]) or "").strip().lower() or None
if opt.get("disposition") else None
),
customer_type_override=(
(raw_row.get(opt["customer_type_override"]) or "").strip().lower() or None
if opt.get("customer_type_override") else None
),
natural_key=(
raw_row.get(opt["call_id"]).strip()
if opt.get("call_id") and raw_row.get(opt["call_id"])
else f"{caller}|{called}|{start_time.isoformat()}|{duration}"
),
source_file=local_path,
source_row=i,
raw=dict(raw_row),
)
self.validate_row(row)
yield row
except ValidationError:
raise # let ingester catch + quarantine
except Exception as exc:
raise ValidationError("unparseable_row", str(exc)) from exc