new-site/scripts/workers/cdr_adapters/asterisk.py

"""Asterisk CDR adapter.

Parses the standard Asterisk `Master.csv` format. Default headerless layout
(v1.4+):

    accountcode, src, dst, dcontext, clid, channel, dstchannel, lastapp,
    lastdata, start, answer, end, duration, billsec, disposition,
    amaflags, uniqueid, userfield

We read both headerless and header'd variants. The ``uniqueid`` column is
Asterisk's per-call UUID and makes a perfect natural dedup key.

Per-call revenue — Asterisk's built-in CDR does not include a charge
column. Customers using ``cdr_asteriskcosts`` / ``cdr_addon_mysql`` /
``cel_custom`` typically add columns for rate and billed amount; those
are consumed via the generic_csv adapter with a preset mapping.
"""

from __future__ import annotations

import csv
import logging
from typing import Iterator

from .base import BaseCDRAdapter, CDRRow, ValidationError

logger = logging.getLogger(__name__)

_DEFAULT_HEADERS = [
    "accountcode", "src", "dst", "dcontext", "clid", "channel",
    "dstchannel", "lastapp", "lastdata", "start", "answer", "end",
    "duration", "billsec", "disposition", "amaflags", "uniqueid", "userfield",
]


class AsteriskAdapter(BaseCDRAdapter):
    FORMAT_SLUG = "asterisk"

    def iter_rows(self, local_path: str) -> Iterator[CDRRow]:
        with open(local_path, "r", encoding="utf-8", errors="replace", newline="") as fh:
            # Peek the first line: if it looks like a header row, use it;
            # otherwise fall back to the default Asterisk header order.
            first = fh.readline()
            fh.seek(0)
            has_header = "start" in first.lower() and "," in first
            if has_header:
                reader = csv.DictReader(fh)
            else:
                reader = csv.DictReader(fh, fieldnames=_DEFAULT_HEADERS)

            for i, raw in enumerate(reader, start=1):
                try:
                    # Prefer billsec (answered portion) over duration for 499-A
                    billsec_raw = raw.get("billsec") or raw.get("duration") or "0"
                    duration = self.parse_duration(billsec_raw)
                    start = self.parse_ts(raw.get("start"))
                    caller = (raw.get("src") or raw.get("clid") or "").strip()
                    called = (raw.get("dst") or "").strip()
                    unique_id = (raw.get("uniqueid") or "").strip()

                    row = CDRRow(
                        start_time=start,
                        caller_number=caller,
                        called_number=called,
                        duration_sec=duration,
                        trunk_group_id=_extract_trunk(raw.get("channel") or raw.get("dstchannel")),
                        customer_account_id=(raw.get("accountcode") or "").strip() or None,
                        disposition=(raw.get("disposition") or "").strip().lower() or None,
                        natural_key=unique_id or f"{caller}|{called}|{start.isoformat()}|{duration}",
                        source_file=local_path,
                        source_row=i,
                        raw=dict(raw),
                    )
                    self.validate_row(row)
                    yield row
                except ValidationError:
                    raise
                except Exception as exc:
                    raise ValidationError("unparseable_row", str(exc)) from exc


def _extract_trunk(channel: str | None) -> str | None:
    """Pull a trunk-group identifier from an Asterisk channel string.

    Asterisk channels look like: ``SIP/trunk-mycarrier-0000abcd`` or
    ``PJSIP/outbound-trunk/out-sip:+14155551212@...``. The portion right
    after the protocol is a stable trunk id for bucketing.
    """
    if not channel:
        return None
    parts = channel.split("/")
    if len(parts) < 2:
        return None
    token = parts[1].split("-")[0]
    return token or None