new-site/api/migrations/050_cdr_ingestion.sql
justin f8cd37ac8c Initial commit — Performance West telecom compliance platform
Includes: API (Express/TypeScript), Astro site, Python workers,
document generators, FCC compliance tools, Canada CRTC formation,
Ansible infrastructure, and deployment scripts.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-27 06:54:22 -05:00

256 lines
12 KiB
SQL

-- 050: CDR Ingestion + Traffic Study
--
-- Populated by scripts.workers.cdr_ingester + cdr_puller (see
-- /home/justin/.claude/plans/swirling-napping-sonnet.md for the full design).
--
-- Flow: customer pushes CDRs via SFTPGo OR we pull via a switch preset /
-- generic transport → raw files land in MinIO cdr-uploads/{customer_id}/
-- raw/ → ingester parses (adapter) → validates → dedups → classifies →
-- writes cdr_calls (PG hot path) + parquet (MinIO bulk) → traffic study
-- summarizes into cdr_traffic_studies → pre-fills 499-A workbook.
--
-- Paywall: cdr_study_access_grants gates classified output behind payment
-- for that reporting year's 499-A filing.
--
-- Quotas: cdr_usage_meters tracks bytes + row counts; storage_plan on
-- the profile drives overage billing.
-- ── Profiles (one per telecom_entity that ingests CDRs) ──────────────────
CREATE TABLE IF NOT EXISTS cdr_ingestion_profiles (
id SERIAL PRIMARY KEY,
customer_id INT NOT NULL REFERENCES customers(id),
telecom_entity_id INT NOT NULL REFERENCES telecom_entities(id),
-- Known-switch preset (customer picks from portal dropdown). When set,
-- drives both the transport AND the CDR format automatically. NULL =
-- "Other" with manual transport/format config below.
switch_preset TEXT CHECK (switch_preset IS NULL OR switch_preset IN (
'netsapiens','freeswitch','asterisk','kazoo','ribbon',
'metaswitch','sansay','broadworks','grandstream',
'fortysix_labs','sip_navigator'
)),
-- CDR format adapter (slug matches scripts/workers/cdr_adapters/)
format TEXT NOT NULL,
format_config JSONB DEFAULT '{}', -- column mappings for generic_csv
-- SFTPGo push (customer → our server)
sftpgo_enabled BOOLEAN DEFAULT FALSE,
sftpgo_username TEXT,
sftpgo_password_hash TEXT,
sftpgo_quota_bytes BIGINT DEFAULT 5368709120, -- 5 GB default
-- Generic transport pull (us → customer's switch) — used when
-- switch_preset IS NULL. Presets carry their own config fields.
pull_enabled BOOLEAN DEFAULT FALSE,
pull_transport TEXT CHECK (pull_transport IS NULL OR pull_transport IN
('sftp','ftp','ftps','https','s3','api','scrape')),
pull_host TEXT,
pull_port INT,
pull_remote_glob TEXT,
pull_cron TEXT DEFAULT '0 2 * * *',
pull_sensitive_id TEXT, -- ERPNext Sensitive ID docname
preset_config JSONB DEFAULT '{}', -- preset-specific extras (API host, account_id, etc.)
last_fetched_at TIMESTAMPTZ,
last_fetched_mtime TIMESTAMPTZ,
last_test_at TIMESTAMPTZ,
last_test_ok BOOLEAN,
last_test_error TEXT,
-- Customer's billing-address state — used for the Block 5
-- billing-region report (both-report requirement).
billing_state TEXT,
-- Revenue attribution: per-call gross revenue from the CDR is preferred.
-- Minutes-only estimation is an explicit opt-in for flat-rate line
-- service carriers (or switches without charge data).
minutes_only_estimation_enabled BOOLEAN DEFAULT FALSE,
flat_monthly_revenue_cents BIGINT,
-- Storage quota plan. Filing service includes 10 GB / 10 M rows;
-- customers with higher volumes subscribe to a tier.
storage_plan TEXT NOT NULL DEFAULT 'included'
CHECK (storage_plan IN ('included','tier1','tier2','tier3','enterprise')),
storage_plan_order TEXT, -- compliance_orders.order_number of active plan
over_quota_policy TEXT NOT NULL DEFAULT 'notify'
CHECK (over_quota_policy IN ('notify','block','auto_upgrade')),
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(telecom_entity_id)
);
CREATE INDEX IF NOT EXISTS idx_cdr_profiles_customer
ON cdr_ingestion_profiles(customer_id);
-- ── Uploads (file-level tracking) ───────────────────────────────────────
CREATE TABLE IF NOT EXISTS cdr_ingestion_uploads (
id SERIAL PRIMARY KEY,
profile_id INT NOT NULL REFERENCES cdr_ingestion_profiles(id),
source TEXT NOT NULL
CHECK (source IN ('sftpgo','pull','browser','webhook')),
raw_minio_path TEXT NOT NULL,
raw_sha256 TEXT NOT NULL,
normalized_minio_path TEXT,
summary_json JSONB,
status TEXT NOT NULL DEFAULT 'pending'
CHECK (status IN ('pending','processing','done',
'failed','duplicate','quarantined',
'quota_exceeded')),
duplicate_of_id INT REFERENCES cdr_ingestion_uploads(id),
row_count INT,
rows_accepted INT,
rows_quarantined INT,
rows_dropped_dupes INT,
error TEXT,
created_at TIMESTAMPTZ DEFAULT NOW(),
processed_at TIMESTAMPTZ,
UNIQUE(profile_id, raw_sha256)
);
CREATE INDEX IF NOT EXISTS idx_cdr_uploads_profile_created
ON cdr_ingestion_uploads(profile_id, created_at DESC);
CREATE INDEX IF NOT EXISTS idx_cdr_uploads_status
ON cdr_ingestion_uploads(status) WHERE status IN ('pending','processing');
-- ── Wholesale / retail bucket mappings ──────────────────────────────────
CREATE TABLE IF NOT EXISTS cdr_bucket_mappings (
id SERIAL PRIMARY KEY,
profile_id INT NOT NULL REFERENCES cdr_ingestion_profiles(id),
match_type TEXT NOT NULL
CHECK (match_type IN ('trunk_group','account_id')),
match_value TEXT NOT NULL,
bucket TEXT NOT NULL
CHECK (bucket IN ('wholesale','retail')),
override_priority INT DEFAULT 0,
UNIQUE(profile_id, match_type, match_value)
);
-- ── Per-period traffic studies ──────────────────────────────────────────
CREATE TABLE IF NOT EXISTS cdr_traffic_studies (
id SERIAL PRIMARY KEY,
profile_id INT NOT NULL REFERENCES cdr_ingestion_profiles(id),
reporting_year INT NOT NULL,
reporting_period TEXT NOT NULL
CHECK (reporting_period IN ('Q1','Q2','Q3','Q4','ANNUAL')),
total_calls BIGINT,
total_minutes BIGINT,
total_revenue_cents BIGINT,
-- Revenue-weighted percentages (preferred)
interstate_pct NUMERIC(6,4),
intrastate_pct NUMERIC(6,4),
international_pct NUMERIC(6,4),
indeterminate_pct NUMERIC(6,4),
-- Minutes-weighted percentages (cross-check; or primary if
-- minutes_only_estimation_enabled)
interstate_pct_minutes NUMERIC(6,4),
intrastate_pct_minutes NUMERIC(6,4),
international_pct_minutes NUMERIC(6,4),
indeterminate_pct_minutes NUMERIC(6,4),
-- Bucketed minutes
wholesale_minutes BIGINT,
retail_minutes BIGINT,
-- Block 5 regional: both reports produced side-by-side
orig_state_regions_json JSONB,
billing_state_regions_json JSONB,
methodology TEXT,
pdf_minio_path TEXT,
xlsx_minio_path TEXT,
generated_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(profile_id, reporting_year, reporting_period)
);
-- ── Classified calls (hot-path PG table; bulk storage is parquet in MinIO) ──
CREATE TABLE IF NOT EXISTS cdr_calls (
id BIGSERIAL PRIMARY KEY,
profile_id INT NOT NULL REFERENCES cdr_ingestion_profiles(id),
upload_id INT NOT NULL REFERENCES cdr_ingestion_uploads(id),
natural_key_hash TEXT NOT NULL, -- SHA-1 of adapter natural key
start_time TIMESTAMPTZ NOT NULL,
duration_sec INT,
billed_amount_cents BIGINT, -- per-call revenue (NULL = unknown)
billed_currency TEXT,
trunk_group_id TEXT,
customer_account_id TEXT,
customer_type TEXT, -- wholesale|retail|unknown
call_direction TEXT, -- inbound|outbound
caller_npa TEXT,
caller_state TEXT,
caller_country TEXT,
called_npa TEXT,
called_state TEXT,
called_country TEXT,
jurisdiction TEXT, -- interstate|intrastate|international|local|indeterminate
orig_state_region TEXT,
billing_state_region TEXT
);
CREATE UNIQUE INDEX IF NOT EXISTS uq_cdr_calls_natural_key
ON cdr_calls(profile_id, natural_key_hash);
CREATE INDEX IF NOT EXISTS idx_cdr_calls_profile_start
ON cdr_calls(profile_id, start_time);
CREATE INDEX IF NOT EXISTS idx_cdr_calls_profile_juris
ON cdr_calls(profile_id, jurisdiction);
-- ── Quarantine: rows that failed validation ─────────────────────────────
CREATE TABLE IF NOT EXISTS cdr_quarantine (
id BIGSERIAL PRIMARY KEY,
upload_id INT NOT NULL REFERENCES cdr_ingestion_uploads(id),
source_row INT,
raw_payload JSONB,
reason_code TEXT NOT NULL,
reason_detail TEXT,
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_cdr_quarantine_upload
ON cdr_quarantine(upload_id);
-- ── Paywall: per-year access grants ─────────────────────────────────────
--
-- Populated by the checkout.ts payment-complete hook on any of the
-- gating service slugs (fcc-499a, fcc-499a-499q, fcc-full-compliance,
-- cdr-analysis). Presence of a grant unlocks the classified study for
-- that reporting year. Admin view ignores grants.
CREATE TABLE IF NOT EXISTS cdr_study_access_grants (
id SERIAL PRIMARY KEY,
profile_id INT NOT NULL REFERENCES cdr_ingestion_profiles(id),
reporting_year INT NOT NULL,
granted_by_order TEXT NOT NULL, -- compliance_orders.order_number
granted_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(profile_id, reporting_year, granted_by_order)
);
CREATE INDEX IF NOT EXISTS idx_cdr_grants_profile_year
ON cdr_study_access_grants(profile_id, reporting_year);
-- ── Usage meters (quota tracking) ───────────────────────────────────────
CREATE TABLE IF NOT EXISTS cdr_usage_meters (
id SERIAL PRIMARY KEY,
profile_id INT NOT NULL REFERENCES cdr_ingestion_profiles(id),
reporting_year INT NOT NULL,
bytes_stored BIGINT DEFAULT 0,
rows_ingested BIGINT DEFAULT 0,
last_measured_at TIMESTAMPTZ DEFAULT NOW(),
warned_80pct_at TIMESTAMPTZ,
warned_100pct_at TIMESTAMPTZ,
UNIQUE(profile_id, reporting_year)
);
-- ── Link back from telecom_entities ─────────────────────────────────────
ALTER TABLE telecom_entities
ADD COLUMN IF NOT EXISTS cdr_ingestion_profile_id
INT REFERENCES cdr_ingestion_profiles(id);