Initial commit — Performance West telecom compliance platform
Includes: API (Express/TypeScript), Astro site, Python workers, document generators, FCC compliance tools, Canada CRTC formation, Ansible infrastructure, and deployment scripts. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
commit
f8cd37ac8c
1823 changed files with 145167 additions and 0 deletions
34
scripts/workers/cdr_adapters/__init__.py
Normal file
34
scripts/workers/cdr_adapters/__init__.py
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
"""CDR format adapters.
|
||||
|
||||
Each adapter parses a switch-specific CDR file format into a normalized
|
||||
``CDRRow`` stream consumed by the ingester. Selection is driven by the
|
||||
``cdr_ingestion_profiles.format`` column (or inferred from a switch preset).
|
||||
|
||||
Contract: ``BaseCDRAdapter.iter_rows(path_or_bytes) -> Iterator[CDRRow]``
|
||||
plus ``Adapter.FORMAT_SLUG`` and required-column metadata used by the
|
||||
validator.
|
||||
"""
|
||||
|
||||
from .base import BaseCDRAdapter, CDRRow, ValidationError
|
||||
from .generic_csv import GenericCSVAdapter
|
||||
from .asterisk import AsteriskAdapter
|
||||
from .freeswitch import FreeSWITCHAdapter
|
||||
from .netsapiens import NetSapiensAdapter
|
||||
|
||||
ADAPTERS: dict[str, type[BaseCDRAdapter]] = {
|
||||
"generic_csv": GenericCSVAdapter,
|
||||
"asterisk": AsteriskAdapter,
|
||||
"freeswitch": FreeSWITCHAdapter,
|
||||
"netsapiens": NetSapiensAdapter,
|
||||
}
|
||||
|
||||
__all__ = [
|
||||
"ADAPTERS",
|
||||
"BaseCDRAdapter",
|
||||
"CDRRow",
|
||||
"ValidationError",
|
||||
"GenericCSVAdapter",
|
||||
"AsteriskAdapter",
|
||||
"FreeSWITCHAdapter",
|
||||
"NetSapiensAdapter",
|
||||
]
|
||||
95
scripts/workers/cdr_adapters/asterisk.py
Normal file
95
scripts/workers/cdr_adapters/asterisk.py
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
"""Asterisk CDR adapter.
|
||||
|
||||
Parses the standard Asterisk `Master.csv` format. Default headerless layout
|
||||
(v1.4+):
|
||||
|
||||
accountcode, src, dst, dcontext, clid, channel, dstchannel, lastapp,
|
||||
lastdata, start, answer, end, duration, billsec, disposition,
|
||||
amaflags, uniqueid, userfield
|
||||
|
||||
We read both headerless and header'd variants. The ``uniqueid`` column is
|
||||
Asterisk's per-call UUID and makes a perfect natural dedup key.
|
||||
|
||||
Per-call revenue — Asterisk's built-in CDR does not include a charge
|
||||
column. Customers using ``cdr_asteriskcosts`` / ``cdr_addon_mysql`` /
|
||||
``cel_custom`` typically add columns for rate and billed amount; those
|
||||
are consumed via the generic_csv adapter with a preset mapping.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import logging
|
||||
from typing import Iterator
|
||||
|
||||
from .base import BaseCDRAdapter, CDRRow, ValidationError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_DEFAULT_HEADERS = [
|
||||
"accountcode", "src", "dst", "dcontext", "clid", "channel",
|
||||
"dstchannel", "lastapp", "lastdata", "start", "answer", "end",
|
||||
"duration", "billsec", "disposition", "amaflags", "uniqueid", "userfield",
|
||||
]
|
||||
|
||||
|
||||
class AsteriskAdapter(BaseCDRAdapter):
|
||||
FORMAT_SLUG = "asterisk"
|
||||
|
||||
def iter_rows(self, local_path: str) -> Iterator[CDRRow]:
|
||||
with open(local_path, "r", encoding="utf-8", errors="replace", newline="") as fh:
|
||||
# Peek the first line: if it looks like a header row, use it;
|
||||
# otherwise fall back to the default Asterisk header order.
|
||||
first = fh.readline()
|
||||
fh.seek(0)
|
||||
has_header = "start" in first.lower() and "," in first
|
||||
if has_header:
|
||||
reader = csv.DictReader(fh)
|
||||
else:
|
||||
reader = csv.DictReader(fh, fieldnames=_DEFAULT_HEADERS)
|
||||
|
||||
for i, raw in enumerate(reader, start=1):
|
||||
try:
|
||||
# Prefer billsec (answered portion) over duration for 499-A
|
||||
billsec_raw = raw.get("billsec") or raw.get("duration") or "0"
|
||||
duration = self.parse_duration(billsec_raw)
|
||||
start = self.parse_ts(raw.get("start"))
|
||||
caller = (raw.get("src") or raw.get("clid") or "").strip()
|
||||
called = (raw.get("dst") or "").strip()
|
||||
unique_id = (raw.get("uniqueid") or "").strip()
|
||||
|
||||
row = CDRRow(
|
||||
start_time=start,
|
||||
caller_number=caller,
|
||||
called_number=called,
|
||||
duration_sec=duration,
|
||||
trunk_group_id=_extract_trunk(raw.get("channel") or raw.get("dstchannel")),
|
||||
customer_account_id=(raw.get("accountcode") or "").strip() or None,
|
||||
disposition=(raw.get("disposition") or "").strip().lower() or None,
|
||||
natural_key=unique_id or f"{caller}|{called}|{start.isoformat()}|{duration}",
|
||||
source_file=local_path,
|
||||
source_row=i,
|
||||
raw=dict(raw),
|
||||
)
|
||||
self.validate_row(row)
|
||||
yield row
|
||||
except ValidationError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
raise ValidationError("unparseable_row", str(exc)) from exc
|
||||
|
||||
|
||||
def _extract_trunk(channel: str | None) -> str | None:
|
||||
"""Pull a trunk-group identifier from an Asterisk channel string.
|
||||
|
||||
Asterisk channels look like: ``SIP/trunk-mycarrier-0000abcd`` or
|
||||
``PJSIP/outbound-trunk/out-sip:+14155551212@...``. The portion right
|
||||
after the protocol is a stable trunk id for bucketing.
|
||||
"""
|
||||
if not channel:
|
||||
return None
|
||||
parts = channel.split("/")
|
||||
if len(parts) < 2:
|
||||
return None
|
||||
token = parts[1].split("-")[0]
|
||||
return token or None
|
||||
202
scripts/workers/cdr_adapters/base.py
Normal file
202
scripts/workers/cdr_adapters/base.py
Normal file
|
|
@ -0,0 +1,202 @@
|
|||
"""Base CDR adapter — shared interface + normalized row type + validation.
|
||||
|
||||
All format adapters inherit from ``BaseCDRAdapter`` and implement
|
||||
``iter_rows()``. The ingester sees only this interface, so the classifier
|
||||
and quarantine logic are adapter-agnostic.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from datetime import datetime
|
||||
from typing import Iterator, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ValidationError(Exception):
|
||||
"""Raised by an adapter when a row fails structural validation.
|
||||
|
||||
The ingester catches this and routes the offending row into
|
||||
``cdr_quarantine`` with ``reason_code``.
|
||||
"""
|
||||
|
||||
def __init__(self, reason_code: str, detail: str = ""):
|
||||
super().__init__(f"{reason_code}: {detail}")
|
||||
self.reason_code = reason_code
|
||||
self.detail = detail
|
||||
|
||||
|
||||
@dataclass
|
||||
class CDRRow:
|
||||
"""Normalized single-call CDR row, pre-classification."""
|
||||
# Required fields (adapters must set)
|
||||
start_time: datetime
|
||||
caller_number: str
|
||||
called_number: str
|
||||
duration_sec: int
|
||||
|
||||
# Strongly preferred (revenue-first attribution)
|
||||
billed_amount_cents: Optional[int] = None
|
||||
billed_currency: Optional[str] = None
|
||||
|
||||
# Optional — improves bucketing / accuracy
|
||||
call_direction: Optional[str] = None # inbound|outbound
|
||||
disposition: Optional[str] = None # answered|no_answer|busy|failed
|
||||
trunk_group_id: Optional[str] = None
|
||||
customer_account_id: Optional[str] = None
|
||||
customer_type_override: Optional[str] = None # per-row wholesale/retail tag
|
||||
|
||||
# Provenance — set by the adapter
|
||||
natural_key: str = "" # adapter-specific uniqueness key
|
||||
source_file: Optional[str] = None
|
||||
source_row: Optional[int] = None
|
||||
|
||||
# Raw payload for quarantine re-processing
|
||||
raw: dict = field(default_factory=dict)
|
||||
|
||||
def natural_key_hash(self, profile_id: int) -> str:
|
||||
"""Stable SHA-1 hash used as the dedup key in cdr_calls."""
|
||||
basis = f"{profile_id}|{self.natural_key}"
|
||||
return hashlib.sha1(basis.encode("utf-8")).hexdigest()
|
||||
|
||||
def to_db_tuple(self, profile_id: int) -> dict:
|
||||
return {
|
||||
**asdict(self),
|
||||
"profile_id": profile_id,
|
||||
"natural_key_hash": self.natural_key_hash(profile_id),
|
||||
}
|
||||
|
||||
|
||||
class BaseCDRAdapter:
|
||||
"""Abstract adapter. Subclasses implement ``iter_rows()``."""
|
||||
|
||||
FORMAT_SLUG: str = "" # matches cdr_ingestion_profiles.format
|
||||
REQUIRED_FIELDS: tuple[str, ...] = ("start_time", "caller_number", "called_number", "duration_sec")
|
||||
|
||||
def __init__(self, profile_config: Optional[dict] = None):
|
||||
self.profile_config = profile_config or {}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Abstract
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def iter_rows(self, local_path: str) -> Iterator[CDRRow]:
|
||||
"""Yield one CDRRow per call record in the file."""
|
||||
raise NotImplementedError
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Shared row validation — used by every subclass before yield
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def validate_row(self, row: CDRRow) -> None:
|
||||
"""Raise ValidationError if the row fails structural checks."""
|
||||
if not isinstance(row.start_time, datetime):
|
||||
raise ValidationError("missing_start_time", "start_time absent or unparseable")
|
||||
if not row.caller_number and not row.called_number:
|
||||
# We need at least one endpoint to classify. Some inbound-only
|
||||
# switches omit the caller; that's fine as long as called is set.
|
||||
raise ValidationError("missing_endpoints", "neither caller nor called number present")
|
||||
if row.duration_sec is None:
|
||||
raise ValidationError("missing_duration", "duration_sec absent")
|
||||
if row.duration_sec < 0:
|
||||
raise ValidationError("bad_duration", f"negative duration {row.duration_sec}")
|
||||
if row.duration_sec > 86400:
|
||||
raise ValidationError("bad_duration", f"duration > 24h: {row.duration_sec}")
|
||||
# Sanity: start_time within a reasonable window
|
||||
now = datetime.utcnow()
|
||||
if row.start_time.tzinfo is not None:
|
||||
# Strip tz for the comparison
|
||||
start_naive = row.start_time.replace(tzinfo=None)
|
||||
else:
|
||||
start_naive = row.start_time
|
||||
if start_naive.year < 2000 or start_naive.year > now.year + 1:
|
||||
raise ValidationError(
|
||||
"bad_start_time",
|
||||
f"start_time out of range: {row.start_time.isoformat()}",
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Helpers for subclasses
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def parse_duration(value) -> int:
|
||||
"""Normalize duration values (seconds, ms, or H:MM:SS)."""
|
||||
if value is None or value == "":
|
||||
return 0
|
||||
if isinstance(value, (int, float)):
|
||||
return int(value)
|
||||
s = str(value).strip()
|
||||
if ":" in s:
|
||||
parts = [int(p) for p in s.split(":")]
|
||||
while len(parts) < 3:
|
||||
parts.insert(0, 0)
|
||||
h, m, sec = parts[:3]
|
||||
return h * 3600 + m * 60 + sec
|
||||
try:
|
||||
v = float(s)
|
||||
except ValueError as exc:
|
||||
raise ValidationError("bad_duration", f"unparseable duration: {s}") from exc
|
||||
# Heuristic: > 100k usually means milliseconds
|
||||
if v > 100_000:
|
||||
return int(v / 1000)
|
||||
return int(v)
|
||||
|
||||
@staticmethod
|
||||
def parse_cents(value, *, currency: str = "USD") -> Optional[int]:
|
||||
"""Turn a revenue amount string into integer cents. Returns None if unparseable."""
|
||||
if value is None or value == "":
|
||||
return None
|
||||
s = str(value).replace(",", "").replace("$", "").strip()
|
||||
if not s:
|
||||
return None
|
||||
try:
|
||||
f = float(s)
|
||||
except ValueError:
|
||||
return None
|
||||
return int(round(f * 100))
|
||||
|
||||
@staticmethod
|
||||
def parse_ts(value, fmt: Optional[str] = None) -> datetime:
|
||||
"""Parse a timestamp. Accepts ISO-8601, common CDR formats, or Unix epoch."""
|
||||
if isinstance(value, datetime):
|
||||
return value
|
||||
s = str(value).strip()
|
||||
if not s:
|
||||
raise ValidationError("missing_start_time", "empty timestamp")
|
||||
# Unix epoch
|
||||
if s.isdigit():
|
||||
try:
|
||||
epoch = int(s)
|
||||
if epoch > 10**12: # ms
|
||||
epoch = epoch // 1000
|
||||
return datetime.utcfromtimestamp(epoch)
|
||||
except (ValueError, OverflowError):
|
||||
pass
|
||||
if fmt:
|
||||
try:
|
||||
return datetime.strptime(s, fmt)
|
||||
except ValueError:
|
||||
pass
|
||||
# Try several common formats
|
||||
for trial in (
|
||||
"%Y-%m-%d %H:%M:%S",
|
||||
"%Y-%m-%dT%H:%M:%S",
|
||||
"%Y-%m-%dT%H:%M:%SZ",
|
||||
"%Y-%m-%dT%H:%M:%S.%f",
|
||||
"%Y-%m-%dT%H:%M:%S.%fZ",
|
||||
"%m/%d/%Y %H:%M:%S",
|
||||
"%d/%m/%Y %H:%M:%S",
|
||||
):
|
||||
try:
|
||||
return datetime.strptime(s, trial)
|
||||
except ValueError:
|
||||
continue
|
||||
# Finally, try Python's fromisoformat
|
||||
try:
|
||||
return datetime.fromisoformat(s.replace("Z", "+00:00"))
|
||||
except ValueError as exc:
|
||||
raise ValidationError("missing_start_time", f"unparseable timestamp {s!r}") from exc
|
||||
80
scripts/workers/cdr_adapters/freeswitch.py
Normal file
80
scripts/workers/cdr_adapters/freeswitch.py
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
"""FreeSWITCH CDR adapter.
|
||||
|
||||
Handles the standard ``mod_cdr_csv`` output format:
|
||||
|
||||
"caller_id_name","caller_id_number","destination_number","context",
|
||||
"start_stamp","answer_stamp","end_stamp","duration","billsec",
|
||||
"hangup_cause","uuid","bleg_uuid","accountcode"
|
||||
|
||||
Billed amount is populated via ``mod_nibblebill`` when installed — the
|
||||
additional columns ``nibble_total_billed`` / ``nibble_bill_amount`` /
|
||||
``nibble_rate`` land in the same CSV. We pick them up when present.
|
||||
|
||||
``uuid`` is FreeSWITCH's unique call identifier and makes a perfect
|
||||
natural key for dedup.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import logging
|
||||
from typing import Iterator
|
||||
|
||||
from .base import BaseCDRAdapter, CDRRow, ValidationError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FreeSWITCHAdapter(BaseCDRAdapter):
|
||||
FORMAT_SLUG = "freeswitch"
|
||||
|
||||
# Columns we check for per-call billed amount (mod_nibblebill output)
|
||||
_BILLED_COLUMNS = (
|
||||
"nibble_total_billed",
|
||||
"nibble_bill_amount",
|
||||
"billed_amount",
|
||||
"total_charge",
|
||||
"charge",
|
||||
)
|
||||
|
||||
def iter_rows(self, local_path: str) -> Iterator[CDRRow]:
|
||||
with open(local_path, "r", encoding="utf-8", errors="replace", newline="") as fh:
|
||||
reader = csv.DictReader(fh)
|
||||
for i, raw in enumerate(reader, start=1):
|
||||
try:
|
||||
start_raw = raw.get("start_stamp") or raw.get("answer_stamp")
|
||||
start = self.parse_ts(start_raw)
|
||||
duration_raw = raw.get("billsec") or raw.get("duration") or "0"
|
||||
duration = self.parse_duration(duration_raw)
|
||||
caller = (raw.get("caller_id_number") or "").strip()
|
||||
called = (raw.get("destination_number") or "").strip()
|
||||
uuid = (raw.get("uuid") or "").strip()
|
||||
|
||||
billed_cents = None
|
||||
for col in self._BILLED_COLUMNS:
|
||||
if raw.get(col):
|
||||
billed_cents = self.parse_cents(raw[col])
|
||||
if billed_cents is not None:
|
||||
break
|
||||
|
||||
row = CDRRow(
|
||||
start_time=start,
|
||||
caller_number=caller,
|
||||
called_number=called,
|
||||
duration_sec=duration,
|
||||
billed_amount_cents=billed_cents,
|
||||
billed_currency=("USD" if billed_cents is not None else None),
|
||||
trunk_group_id=(raw.get("context") or "").strip() or None,
|
||||
customer_account_id=(raw.get("accountcode") or "").strip() or None,
|
||||
disposition=(raw.get("hangup_cause") or "").strip().lower() or None,
|
||||
natural_key=uuid or f"{caller}|{called}|{start.isoformat()}|{duration}",
|
||||
source_file=local_path,
|
||||
source_row=i,
|
||||
raw=dict(raw),
|
||||
)
|
||||
self.validate_row(row)
|
||||
yield row
|
||||
except ValidationError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
raise ValidationError("unparseable_row", str(exc)) from exc
|
||||
131
scripts/workers/cdr_adapters/generic_csv.py
Normal file
131
scripts/workers/cdr_adapters/generic_csv.py
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
"""Generic CSV adapter — configurable column mapping.
|
||||
|
||||
For switches that don't match any of the specific presets or for
|
||||
customers whose mediation layer emits a custom CSV, the profile stores
|
||||
a column mapping in ``format_config`` JSONB and this adapter maps it
|
||||
into the normalized CDR row.
|
||||
|
||||
Example ``format_config`` (set by customer via portal):
|
||||
|
||||
{
|
||||
"start_time": "call_date",
|
||||
"caller_number": "source",
|
||||
"called_number": "destination",
|
||||
"duration_sec": "billsec",
|
||||
"billed_amount": "charge_usd",
|
||||
"trunk_group": "trunk",
|
||||
"account_id": "accountcode",
|
||||
"direction": "direction",
|
||||
"disposition": "disposition",
|
||||
"customer_type_override": "cust_type",
|
||||
"call_id": "uniqueid",
|
||||
"ts_format": "%Y-%m-%d %H:%M:%S",
|
||||
"encoding": "utf-8",
|
||||
"delimiter": ","
|
||||
}
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Iterator
|
||||
|
||||
from .base import BaseCDRAdapter, CDRRow, ValidationError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GenericCSVAdapter(BaseCDRAdapter):
|
||||
FORMAT_SLUG = "generic_csv"
|
||||
|
||||
# Required mapping keys: the profile's format_config MUST name a source
|
||||
# column for each of these.
|
||||
REQUIRED_MAPPING_KEYS = (
|
||||
"start_time", "caller_number", "called_number", "duration_sec",
|
||||
)
|
||||
OPTIONAL_MAPPING_KEYS = (
|
||||
"billed_amount", "trunk_group", "account_id", "direction",
|
||||
"disposition", "customer_type_override", "call_id",
|
||||
)
|
||||
|
||||
def _check_mapping(self) -> None:
|
||||
missing = [k for k in self.REQUIRED_MAPPING_KEYS if not self.profile_config.get(k)]
|
||||
if missing:
|
||||
raise ValidationError(
|
||||
"bad_mapping",
|
||||
f"generic_csv profile config missing required keys: {missing}",
|
||||
)
|
||||
|
||||
def iter_rows(self, local_path: str) -> Iterator[CDRRow]:
|
||||
self._check_mapping()
|
||||
cfg = self.profile_config
|
||||
encoding = cfg.get("encoding", "utf-8")
|
||||
delimiter = cfg.get("delimiter", ",")
|
||||
ts_format = cfg.get("ts_format")
|
||||
|
||||
col = {
|
||||
"start_time": cfg["start_time"],
|
||||
"caller_number": cfg["caller_number"],
|
||||
"called_number": cfg["called_number"],
|
||||
"duration_sec": cfg["duration_sec"],
|
||||
}
|
||||
# Optional source column names — None when not mapped
|
||||
opt = {k: cfg.get(k) for k in self.OPTIONAL_MAPPING_KEYS}
|
||||
|
||||
with open(local_path, "r", encoding=encoding, errors="replace", newline="") as fh:
|
||||
reader = csv.DictReader(fh, delimiter=delimiter)
|
||||
for i, raw_row in enumerate(reader, start=1):
|
||||
try:
|
||||
start_time = self.parse_ts(raw_row.get(col["start_time"]), ts_format)
|
||||
duration = self.parse_duration(raw_row.get(col["duration_sec"]))
|
||||
caller = (raw_row.get(col["caller_number"]) or "").strip()
|
||||
called = (raw_row.get(col["called_number"]) or "").strip()
|
||||
|
||||
row = CDRRow(
|
||||
start_time=start_time,
|
||||
caller_number=caller,
|
||||
called_number=called,
|
||||
duration_sec=duration,
|
||||
billed_amount_cents=(
|
||||
self.parse_cents(raw_row.get(opt["billed_amount"]))
|
||||
if opt.get("billed_amount") else None
|
||||
),
|
||||
billed_currency=(cfg.get("currency", "USD")
|
||||
if opt.get("billed_amount") else None),
|
||||
trunk_group_id=(
|
||||
raw_row.get(opt["trunk_group"]).strip()
|
||||
if opt.get("trunk_group") and raw_row.get(opt["trunk_group"]) else None
|
||||
),
|
||||
customer_account_id=(
|
||||
raw_row.get(opt["account_id"]).strip()
|
||||
if opt.get("account_id") and raw_row.get(opt["account_id"]) else None
|
||||
),
|
||||
call_direction=(
|
||||
(raw_row.get(opt["direction"]) or "").strip().lower() or None
|
||||
if opt.get("direction") else None
|
||||
),
|
||||
disposition=(
|
||||
(raw_row.get(opt["disposition"]) or "").strip().lower() or None
|
||||
if opt.get("disposition") else None
|
||||
),
|
||||
customer_type_override=(
|
||||
(raw_row.get(opt["customer_type_override"]) or "").strip().lower() or None
|
||||
if opt.get("customer_type_override") else None
|
||||
),
|
||||
natural_key=(
|
||||
raw_row.get(opt["call_id"]).strip()
|
||||
if opt.get("call_id") and raw_row.get(opt["call_id"])
|
||||
else f"{caller}|{called}|{start_time.isoformat()}|{duration}"
|
||||
),
|
||||
source_file=local_path,
|
||||
source_row=i,
|
||||
raw=dict(raw_row),
|
||||
)
|
||||
self.validate_row(row)
|
||||
yield row
|
||||
except ValidationError:
|
||||
raise # let ingester catch + quarantine
|
||||
except Exception as exc:
|
||||
raise ValidationError("unparseable_row", str(exc)) from exc
|
||||
111
scripts/workers/cdr_adapters/netsapiens.py
Normal file
111
scripts/workers/cdr_adapters/netsapiens.py
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
"""NetSapiens CDRv2 adapter (NDJSON / JSON array).
|
||||
|
||||
NetSapiens emits CDRs either as a JSON array (via the ``/cdr`` REST
|
||||
endpoint with paginated pages) or as newline-delimited JSON (via
|
||||
streaming export). Both shapes use the same record schema; this adapter
|
||||
accepts either.
|
||||
|
||||
Key fields (NetSapiens CDRv2):
|
||||
|
||||
orig_from_uri, orig_to_uri -> caller / called SIP URIs
|
||||
orig_callid, term_callid -> two call-legs (we stitch on
|
||||
the term_callid where present)
|
||||
time_start, time_answer, time_release -> timestamps (ISO-8601)
|
||||
duration -> seconds on the billed leg
|
||||
charge / cost / rate -> per-call revenue
|
||||
orig_sub, term_sub -> subscriber identifiers
|
||||
orig_carrier, term_carrier -> trunk/carrier IDs
|
||||
release_code -> disposition
|
||||
|
||||
Natural key: NetSapiens distinguishes orig and term call legs. We use
|
||||
``term_callid`` when present (the billed leg), falling back to
|
||||
``orig_callid``. That dedups the two-leg SBC emission cleanly.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Iterator
|
||||
|
||||
from .base import BaseCDRAdapter, CDRRow, ValidationError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class NetSapiensAdapter(BaseCDRAdapter):
|
||||
FORMAT_SLUG = "netsapiens"
|
||||
|
||||
def iter_rows(self, local_path: str) -> Iterator[CDRRow]:
|
||||
with open(local_path, "r", encoding="utf-8", errors="replace") as fh:
|
||||
first = fh.read(1)
|
||||
fh.seek(0)
|
||||
if first == "[":
|
||||
records = json.load(fh)
|
||||
else:
|
||||
records = (json.loads(line) for line in fh if line.strip())
|
||||
|
||||
for i, record in enumerate(records, start=1):
|
||||
try:
|
||||
start = self.parse_ts(record.get("time_start") or record.get("start_time"))
|
||||
duration = self.parse_duration(record.get("duration") or 0)
|
||||
caller = _extract_uri_number(record.get("orig_from_uri") or record.get("from_uri"))
|
||||
called = _extract_uri_number(record.get("orig_to_uri") or record.get("to_uri"))
|
||||
billed = None
|
||||
for col in ("charge", "cost", "total_charge"):
|
||||
if record.get(col) not in (None, ""):
|
||||
billed = self.parse_cents(record[col])
|
||||
if billed is not None:
|
||||
break
|
||||
# Prefer term_callid (billed leg) as the natural key —
|
||||
# collapses ingress+egress legs of a single call.
|
||||
nkey = (
|
||||
record.get("term_callid")
|
||||
or record.get("orig_callid")
|
||||
or f"{caller}|{called}|{start.isoformat()}|{duration}"
|
||||
)
|
||||
trunk = (
|
||||
record.get("term_carrier")
|
||||
or record.get("orig_carrier")
|
||||
or ""
|
||||
)
|
||||
|
||||
row = CDRRow(
|
||||
start_time=start,
|
||||
caller_number=caller,
|
||||
called_number=called,
|
||||
duration_sec=duration,
|
||||
billed_amount_cents=billed,
|
||||
billed_currency=("USD" if billed is not None else None),
|
||||
trunk_group_id=trunk.strip() or None,
|
||||
customer_account_id=(
|
||||
record.get("orig_sub") or record.get("term_sub") or ""
|
||||
).strip() or None,
|
||||
disposition=(record.get("release_code") or "").strip().lower() or None,
|
||||
call_direction=(record.get("direction") or "").strip().lower() or None,
|
||||
natural_key=nkey.strip(),
|
||||
source_file=local_path,
|
||||
source_row=i,
|
||||
raw=record if isinstance(record, dict) else {},
|
||||
)
|
||||
self.validate_row(row)
|
||||
yield row
|
||||
except ValidationError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
raise ValidationError("unparseable_row", str(exc)) from exc
|
||||
|
||||
|
||||
def _extract_uri_number(uri: str | None) -> str:
|
||||
"""Pull the user portion out of a SIP URI."""
|
||||
if not uri:
|
||||
return ""
|
||||
s = str(uri).strip()
|
||||
if s.startswith("sip:") or s.startswith("sips:"):
|
||||
s = s.split(":", 1)[1]
|
||||
if "@" in s:
|
||||
s = s.split("@", 1)[0]
|
||||
# Strip parameters like ";user=phone"
|
||||
if ";" in s:
|
||||
s = s.split(";", 1)[0]
|
||||
return s.strip()
|
||||
Loading…
Add table
Add a link
Reference in a new issue