new-site/scripts/workers/cdr_adapters/asterisk.py
justin f8cd37ac8c Initial commit — Performance West telecom compliance platform
Includes: API (Express/TypeScript), Astro site, Python workers,
document generators, FCC compliance tools, Canada CRTC formation,
Ansible infrastructure, and deployment scripts.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-27 06:54:22 -05:00

95 lines
3.8 KiB
Python

"""Asterisk CDR adapter.
Parses the standard Asterisk `Master.csv` format. Default headerless layout
(v1.4+):
accountcode, src, dst, dcontext, clid, channel, dstchannel, lastapp,
lastdata, start, answer, end, duration, billsec, disposition,
amaflags, uniqueid, userfield
We read both headerless and header'd variants. The ``uniqueid`` column is
Asterisk's per-call UUID and makes a perfect natural dedup key.
Per-call revenue — Asterisk's built-in CDR does not include a charge
column. Customers using ``cdr_asteriskcosts`` / ``cdr_addon_mysql`` /
``cel_custom`` typically add columns for rate and billed amount; those
are consumed via the generic_csv adapter with a preset mapping.
"""
from __future__ import annotations
import csv
import logging
from typing import Iterator
from .base import BaseCDRAdapter, CDRRow, ValidationError
logger = logging.getLogger(__name__)
_DEFAULT_HEADERS = [
"accountcode", "src", "dst", "dcontext", "clid", "channel",
"dstchannel", "lastapp", "lastdata", "start", "answer", "end",
"duration", "billsec", "disposition", "amaflags", "uniqueid", "userfield",
]
class AsteriskAdapter(BaseCDRAdapter):
FORMAT_SLUG = "asterisk"
def iter_rows(self, local_path: str) -> Iterator[CDRRow]:
with open(local_path, "r", encoding="utf-8", errors="replace", newline="") as fh:
# Peek the first line: if it looks like a header row, use it;
# otherwise fall back to the default Asterisk header order.
first = fh.readline()
fh.seek(0)
has_header = "start" in first.lower() and "," in first
if has_header:
reader = csv.DictReader(fh)
else:
reader = csv.DictReader(fh, fieldnames=_DEFAULT_HEADERS)
for i, raw in enumerate(reader, start=1):
try:
# Prefer billsec (answered portion) over duration for 499-A
billsec_raw = raw.get("billsec") or raw.get("duration") or "0"
duration = self.parse_duration(billsec_raw)
start = self.parse_ts(raw.get("start"))
caller = (raw.get("src") or raw.get("clid") or "").strip()
called = (raw.get("dst") or "").strip()
unique_id = (raw.get("uniqueid") or "").strip()
row = CDRRow(
start_time=start,
caller_number=caller,
called_number=called,
duration_sec=duration,
trunk_group_id=_extract_trunk(raw.get("channel") or raw.get("dstchannel")),
customer_account_id=(raw.get("accountcode") or "").strip() or None,
disposition=(raw.get("disposition") or "").strip().lower() or None,
natural_key=unique_id or f"{caller}|{called}|{start.isoformat()}|{duration}",
source_file=local_path,
source_row=i,
raw=dict(raw),
)
self.validate_row(row)
yield row
except ValidationError:
raise
except Exception as exc:
raise ValidationError("unparseable_row", str(exc)) from exc
def _extract_trunk(channel: str | None) -> str | None:
"""Pull a trunk-group identifier from an Asterisk channel string.
Asterisk channels look like: ``SIP/trunk-mycarrier-0000abcd`` or
``PJSIP/outbound-trunk/out-sip:+14155551212@...``. The portion right
after the protocol is a stable trunk id for bucketing.
"""
if not channel:
return None
parts = channel.split("/")
if len(parts) < 2:
return None
token = parts[1].split("-")[0]
return token or None