-- 050: CDR Ingestion + Traffic Study -- -- Populated by scripts.workers.cdr_ingester + cdr_puller (see -- /home/justin/.claude/plans/swirling-napping-sonnet.md for the full design). -- -- Flow: customer pushes CDRs via SFTPGo OR we pull via a switch preset / -- generic transport → raw files land in MinIO cdr-uploads/{customer_id}/ -- raw/ → ingester parses (adapter) → validates → dedups → classifies → -- writes cdr_calls (PG hot path) + parquet (MinIO bulk) → traffic study -- summarizes into cdr_traffic_studies → pre-fills 499-A workbook. -- -- Paywall: cdr_study_access_grants gates classified output behind payment -- for that reporting year's 499-A filing. -- -- Quotas: cdr_usage_meters tracks bytes + row counts; storage_plan on -- the profile drives overage billing. -- ── Profiles (one per telecom_entity that ingests CDRs) ────────────────── CREATE TABLE IF NOT EXISTS cdr_ingestion_profiles ( id SERIAL PRIMARY KEY, customer_id INT NOT NULL REFERENCES customers(id), telecom_entity_id INT NOT NULL REFERENCES telecom_entities(id), -- Known-switch preset (customer picks from portal dropdown). When set, -- drives both the transport AND the CDR format automatically. NULL = -- "Other" with manual transport/format config below. switch_preset TEXT CHECK (switch_preset IS NULL OR switch_preset IN ( 'netsapiens','freeswitch','asterisk','kazoo','ribbon', 'metaswitch','sansay','broadworks','grandstream', 'fortysix_labs','sip_navigator' )), -- CDR format adapter (slug matches scripts/workers/cdr_adapters/) format TEXT NOT NULL, format_config JSONB DEFAULT '{}', -- column mappings for generic_csv -- SFTPGo push (customer → our server) sftpgo_enabled BOOLEAN DEFAULT FALSE, sftpgo_username TEXT, sftpgo_password_hash TEXT, sftpgo_quota_bytes BIGINT DEFAULT 5368709120, -- 5 GB default -- Generic transport pull (us → customer's switch) — used when -- switch_preset IS NULL. Presets carry their own config fields. pull_enabled BOOLEAN DEFAULT FALSE, pull_transport TEXT CHECK (pull_transport IS NULL OR pull_transport IN ('sftp','ftp','ftps','https','s3','api','scrape')), pull_host TEXT, pull_port INT, pull_remote_glob TEXT, pull_cron TEXT DEFAULT '0 2 * * *', pull_sensitive_id TEXT, -- ERPNext Sensitive ID docname preset_config JSONB DEFAULT '{}', -- preset-specific extras (API host, account_id, etc.) last_fetched_at TIMESTAMPTZ, last_fetched_mtime TIMESTAMPTZ, last_test_at TIMESTAMPTZ, last_test_ok BOOLEAN, last_test_error TEXT, -- Customer's billing-address state — used for the Block 5 -- billing-region report (both-report requirement). billing_state TEXT, -- Revenue attribution: per-call gross revenue from the CDR is preferred. -- Minutes-only estimation is an explicit opt-in for flat-rate line -- service carriers (or switches without charge data). minutes_only_estimation_enabled BOOLEAN DEFAULT FALSE, flat_monthly_revenue_cents BIGINT, -- Storage quota plan. Filing service includes 10 GB / 10 M rows; -- customers with higher volumes subscribe to a tier. storage_plan TEXT NOT NULL DEFAULT 'included' CHECK (storage_plan IN ('included','tier1','tier2','tier3','enterprise')), storage_plan_order TEXT, -- compliance_orders.order_number of active plan over_quota_policy TEXT NOT NULL DEFAULT 'notify' CHECK (over_quota_policy IN ('notify','block','auto_upgrade')), created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW(), UNIQUE(telecom_entity_id) ); CREATE INDEX IF NOT EXISTS idx_cdr_profiles_customer ON cdr_ingestion_profiles(customer_id); -- ── Uploads (file-level tracking) ─────────────────────────────────────── CREATE TABLE IF NOT EXISTS cdr_ingestion_uploads ( id SERIAL PRIMARY KEY, profile_id INT NOT NULL REFERENCES cdr_ingestion_profiles(id), source TEXT NOT NULL CHECK (source IN ('sftpgo','pull','browser','webhook')), raw_minio_path TEXT NOT NULL, raw_sha256 TEXT NOT NULL, normalized_minio_path TEXT, summary_json JSONB, status TEXT NOT NULL DEFAULT 'pending' CHECK (status IN ('pending','processing','done', 'failed','duplicate','quarantined', 'quota_exceeded')), duplicate_of_id INT REFERENCES cdr_ingestion_uploads(id), row_count INT, rows_accepted INT, rows_quarantined INT, rows_dropped_dupes INT, error TEXT, created_at TIMESTAMPTZ DEFAULT NOW(), processed_at TIMESTAMPTZ, UNIQUE(profile_id, raw_sha256) ); CREATE INDEX IF NOT EXISTS idx_cdr_uploads_profile_created ON cdr_ingestion_uploads(profile_id, created_at DESC); CREATE INDEX IF NOT EXISTS idx_cdr_uploads_status ON cdr_ingestion_uploads(status) WHERE status IN ('pending','processing'); -- ── Wholesale / retail bucket mappings ────────────────────────────────── CREATE TABLE IF NOT EXISTS cdr_bucket_mappings ( id SERIAL PRIMARY KEY, profile_id INT NOT NULL REFERENCES cdr_ingestion_profiles(id), match_type TEXT NOT NULL CHECK (match_type IN ('trunk_group','account_id')), match_value TEXT NOT NULL, bucket TEXT NOT NULL CHECK (bucket IN ('wholesale','retail')), override_priority INT DEFAULT 0, UNIQUE(profile_id, match_type, match_value) ); -- ── Per-period traffic studies ────────────────────────────────────────── CREATE TABLE IF NOT EXISTS cdr_traffic_studies ( id SERIAL PRIMARY KEY, profile_id INT NOT NULL REFERENCES cdr_ingestion_profiles(id), reporting_year INT NOT NULL, reporting_period TEXT NOT NULL CHECK (reporting_period IN ('Q1','Q2','Q3','Q4','ANNUAL')), total_calls BIGINT, total_minutes BIGINT, total_revenue_cents BIGINT, -- Revenue-weighted percentages (preferred) interstate_pct NUMERIC(6,4), intrastate_pct NUMERIC(6,4), international_pct NUMERIC(6,4), indeterminate_pct NUMERIC(6,4), -- Minutes-weighted percentages (cross-check; or primary if -- minutes_only_estimation_enabled) interstate_pct_minutes NUMERIC(6,4), intrastate_pct_minutes NUMERIC(6,4), international_pct_minutes NUMERIC(6,4), indeterminate_pct_minutes NUMERIC(6,4), -- Bucketed minutes wholesale_minutes BIGINT, retail_minutes BIGINT, -- Block 5 regional: both reports produced side-by-side orig_state_regions_json JSONB, billing_state_regions_json JSONB, methodology TEXT, pdf_minio_path TEXT, xlsx_minio_path TEXT, generated_at TIMESTAMPTZ DEFAULT NOW(), UNIQUE(profile_id, reporting_year, reporting_period) ); -- ── Classified calls (hot-path PG table; bulk storage is parquet in MinIO) ── CREATE TABLE IF NOT EXISTS cdr_calls ( id BIGSERIAL PRIMARY KEY, profile_id INT NOT NULL REFERENCES cdr_ingestion_profiles(id), upload_id INT NOT NULL REFERENCES cdr_ingestion_uploads(id), natural_key_hash TEXT NOT NULL, -- SHA-1 of adapter natural key start_time TIMESTAMPTZ NOT NULL, duration_sec INT, billed_amount_cents BIGINT, -- per-call revenue (NULL = unknown) billed_currency TEXT, trunk_group_id TEXT, customer_account_id TEXT, customer_type TEXT, -- wholesale|retail|unknown call_direction TEXT, -- inbound|outbound caller_npa TEXT, caller_state TEXT, caller_country TEXT, called_npa TEXT, called_state TEXT, called_country TEXT, jurisdiction TEXT, -- interstate|intrastate|international|local|indeterminate orig_state_region TEXT, billing_state_region TEXT ); CREATE UNIQUE INDEX IF NOT EXISTS uq_cdr_calls_natural_key ON cdr_calls(profile_id, natural_key_hash); CREATE INDEX IF NOT EXISTS idx_cdr_calls_profile_start ON cdr_calls(profile_id, start_time); CREATE INDEX IF NOT EXISTS idx_cdr_calls_profile_juris ON cdr_calls(profile_id, jurisdiction); -- ── Quarantine: rows that failed validation ───────────────────────────── CREATE TABLE IF NOT EXISTS cdr_quarantine ( id BIGSERIAL PRIMARY KEY, upload_id INT NOT NULL REFERENCES cdr_ingestion_uploads(id), source_row INT, raw_payload JSONB, reason_code TEXT NOT NULL, reason_detail TEXT, created_at TIMESTAMPTZ DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_cdr_quarantine_upload ON cdr_quarantine(upload_id); -- ── Paywall: per-year access grants ───────────────────────────────────── -- -- Populated by the checkout.ts payment-complete hook on any of the -- gating service slugs (fcc-499a, fcc-499a-499q, fcc-full-compliance, -- cdr-analysis). Presence of a grant unlocks the classified study for -- that reporting year. Admin view ignores grants. CREATE TABLE IF NOT EXISTS cdr_study_access_grants ( id SERIAL PRIMARY KEY, profile_id INT NOT NULL REFERENCES cdr_ingestion_profiles(id), reporting_year INT NOT NULL, granted_by_order TEXT NOT NULL, -- compliance_orders.order_number granted_at TIMESTAMPTZ DEFAULT NOW(), UNIQUE(profile_id, reporting_year, granted_by_order) ); CREATE INDEX IF NOT EXISTS idx_cdr_grants_profile_year ON cdr_study_access_grants(profile_id, reporting_year); -- ── Usage meters (quota tracking) ─────────────────────────────────────── CREATE TABLE IF NOT EXISTS cdr_usage_meters ( id SERIAL PRIMARY KEY, profile_id INT NOT NULL REFERENCES cdr_ingestion_profiles(id), reporting_year INT NOT NULL, bytes_stored BIGINT DEFAULT 0, rows_ingested BIGINT DEFAULT 0, last_measured_at TIMESTAMPTZ DEFAULT NOW(), warned_80pct_at TIMESTAMPTZ, warned_100pct_at TIMESTAMPTZ, UNIQUE(profile_id, reporting_year) ); -- ── Link back from telecom_entities ───────────────────────────────────── ALTER TABLE telecom_entities ADD COLUMN IF NOT EXISTS cdr_ingestion_profile_id INT REFERENCES cdr_ingestion_profiles(id);