Includes: API (Express/TypeScript), Astro site, Python workers, document generators, FCC compliance tools, Canada CRTC formation, Ansible infrastructure, and deployment scripts. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
256 lines
12 KiB
SQL
256 lines
12 KiB
SQL
-- 050: CDR Ingestion + Traffic Study
|
|
--
|
|
-- Populated by scripts.workers.cdr_ingester + cdr_puller (see
|
|
-- /home/justin/.claude/plans/swirling-napping-sonnet.md for the full design).
|
|
--
|
|
-- Flow: customer pushes CDRs via SFTPGo OR we pull via a switch preset /
|
|
-- generic transport → raw files land in MinIO cdr-uploads/{customer_id}/
|
|
-- raw/ → ingester parses (adapter) → validates → dedups → classifies →
|
|
-- writes cdr_calls (PG hot path) + parquet (MinIO bulk) → traffic study
|
|
-- summarizes into cdr_traffic_studies → pre-fills 499-A workbook.
|
|
--
|
|
-- Paywall: cdr_study_access_grants gates classified output behind payment
|
|
-- for that reporting year's 499-A filing.
|
|
--
|
|
-- Quotas: cdr_usage_meters tracks bytes + row counts; storage_plan on
|
|
-- the profile drives overage billing.
|
|
|
|
|
|
-- ── Profiles (one per telecom_entity that ingests CDRs) ──────────────────
|
|
|
|
CREATE TABLE IF NOT EXISTS cdr_ingestion_profiles (
|
|
id SERIAL PRIMARY KEY,
|
|
customer_id INT NOT NULL REFERENCES customers(id),
|
|
telecom_entity_id INT NOT NULL REFERENCES telecom_entities(id),
|
|
|
|
-- Known-switch preset (customer picks from portal dropdown). When set,
|
|
-- drives both the transport AND the CDR format automatically. NULL =
|
|
-- "Other" with manual transport/format config below.
|
|
switch_preset TEXT CHECK (switch_preset IS NULL OR switch_preset IN (
|
|
'netsapiens','freeswitch','asterisk','kazoo','ribbon',
|
|
'metaswitch','sansay','broadworks','grandstream',
|
|
'fortysix_labs','sip_navigator'
|
|
)),
|
|
|
|
-- CDR format adapter (slug matches scripts/workers/cdr_adapters/)
|
|
format TEXT NOT NULL,
|
|
format_config JSONB DEFAULT '{}', -- column mappings for generic_csv
|
|
|
|
-- SFTPGo push (customer → our server)
|
|
sftpgo_enabled BOOLEAN DEFAULT FALSE,
|
|
sftpgo_username TEXT,
|
|
sftpgo_password_hash TEXT,
|
|
sftpgo_quota_bytes BIGINT DEFAULT 5368709120, -- 5 GB default
|
|
|
|
-- Generic transport pull (us → customer's switch) — used when
|
|
-- switch_preset IS NULL. Presets carry their own config fields.
|
|
pull_enabled BOOLEAN DEFAULT FALSE,
|
|
pull_transport TEXT CHECK (pull_transport IS NULL OR pull_transport IN
|
|
('sftp','ftp','ftps','https','s3','api','scrape')),
|
|
pull_host TEXT,
|
|
pull_port INT,
|
|
pull_remote_glob TEXT,
|
|
pull_cron TEXT DEFAULT '0 2 * * *',
|
|
pull_sensitive_id TEXT, -- ERPNext Sensitive ID docname
|
|
preset_config JSONB DEFAULT '{}', -- preset-specific extras (API host, account_id, etc.)
|
|
last_fetched_at TIMESTAMPTZ,
|
|
last_fetched_mtime TIMESTAMPTZ,
|
|
last_test_at TIMESTAMPTZ,
|
|
last_test_ok BOOLEAN,
|
|
last_test_error TEXT,
|
|
|
|
-- Customer's billing-address state — used for the Block 5
|
|
-- billing-region report (both-report requirement).
|
|
billing_state TEXT,
|
|
|
|
-- Revenue attribution: per-call gross revenue from the CDR is preferred.
|
|
-- Minutes-only estimation is an explicit opt-in for flat-rate line
|
|
-- service carriers (or switches without charge data).
|
|
minutes_only_estimation_enabled BOOLEAN DEFAULT FALSE,
|
|
flat_monthly_revenue_cents BIGINT,
|
|
|
|
-- Storage quota plan. Filing service includes 10 GB / 10 M rows;
|
|
-- customers with higher volumes subscribe to a tier.
|
|
storage_plan TEXT NOT NULL DEFAULT 'included'
|
|
CHECK (storage_plan IN ('included','tier1','tier2','tier3','enterprise')),
|
|
storage_plan_order TEXT, -- compliance_orders.order_number of active plan
|
|
over_quota_policy TEXT NOT NULL DEFAULT 'notify'
|
|
CHECK (over_quota_policy IN ('notify','block','auto_upgrade')),
|
|
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
|
UNIQUE(telecom_entity_id)
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_cdr_profiles_customer
|
|
ON cdr_ingestion_profiles(customer_id);
|
|
|
|
|
|
-- ── Uploads (file-level tracking) ───────────────────────────────────────
|
|
|
|
CREATE TABLE IF NOT EXISTS cdr_ingestion_uploads (
|
|
id SERIAL PRIMARY KEY,
|
|
profile_id INT NOT NULL REFERENCES cdr_ingestion_profiles(id),
|
|
source TEXT NOT NULL
|
|
CHECK (source IN ('sftpgo','pull','browser','webhook')),
|
|
raw_minio_path TEXT NOT NULL,
|
|
raw_sha256 TEXT NOT NULL,
|
|
normalized_minio_path TEXT,
|
|
summary_json JSONB,
|
|
status TEXT NOT NULL DEFAULT 'pending'
|
|
CHECK (status IN ('pending','processing','done',
|
|
'failed','duplicate','quarantined',
|
|
'quota_exceeded')),
|
|
duplicate_of_id INT REFERENCES cdr_ingestion_uploads(id),
|
|
row_count INT,
|
|
rows_accepted INT,
|
|
rows_quarantined INT,
|
|
rows_dropped_dupes INT,
|
|
error TEXT,
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
processed_at TIMESTAMPTZ,
|
|
UNIQUE(profile_id, raw_sha256)
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_cdr_uploads_profile_created
|
|
ON cdr_ingestion_uploads(profile_id, created_at DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_cdr_uploads_status
|
|
ON cdr_ingestion_uploads(status) WHERE status IN ('pending','processing');
|
|
|
|
|
|
-- ── Wholesale / retail bucket mappings ──────────────────────────────────
|
|
|
|
CREATE TABLE IF NOT EXISTS cdr_bucket_mappings (
|
|
id SERIAL PRIMARY KEY,
|
|
profile_id INT NOT NULL REFERENCES cdr_ingestion_profiles(id),
|
|
match_type TEXT NOT NULL
|
|
CHECK (match_type IN ('trunk_group','account_id')),
|
|
match_value TEXT NOT NULL,
|
|
bucket TEXT NOT NULL
|
|
CHECK (bucket IN ('wholesale','retail')),
|
|
override_priority INT DEFAULT 0,
|
|
UNIQUE(profile_id, match_type, match_value)
|
|
);
|
|
|
|
|
|
-- ── Per-period traffic studies ──────────────────────────────────────────
|
|
|
|
CREATE TABLE IF NOT EXISTS cdr_traffic_studies (
|
|
id SERIAL PRIMARY KEY,
|
|
profile_id INT NOT NULL REFERENCES cdr_ingestion_profiles(id),
|
|
reporting_year INT NOT NULL,
|
|
reporting_period TEXT NOT NULL
|
|
CHECK (reporting_period IN ('Q1','Q2','Q3','Q4','ANNUAL')),
|
|
total_calls BIGINT,
|
|
total_minutes BIGINT,
|
|
total_revenue_cents BIGINT,
|
|
-- Revenue-weighted percentages (preferred)
|
|
interstate_pct NUMERIC(6,4),
|
|
intrastate_pct NUMERIC(6,4),
|
|
international_pct NUMERIC(6,4),
|
|
indeterminate_pct NUMERIC(6,4),
|
|
-- Minutes-weighted percentages (cross-check; or primary if
|
|
-- minutes_only_estimation_enabled)
|
|
interstate_pct_minutes NUMERIC(6,4),
|
|
intrastate_pct_minutes NUMERIC(6,4),
|
|
international_pct_minutes NUMERIC(6,4),
|
|
indeterminate_pct_minutes NUMERIC(6,4),
|
|
-- Bucketed minutes
|
|
wholesale_minutes BIGINT,
|
|
retail_minutes BIGINT,
|
|
-- Block 5 regional: both reports produced side-by-side
|
|
orig_state_regions_json JSONB,
|
|
billing_state_regions_json JSONB,
|
|
methodology TEXT,
|
|
pdf_minio_path TEXT,
|
|
xlsx_minio_path TEXT,
|
|
generated_at TIMESTAMPTZ DEFAULT NOW(),
|
|
UNIQUE(profile_id, reporting_year, reporting_period)
|
|
);
|
|
|
|
|
|
-- ── Classified calls (hot-path PG table; bulk storage is parquet in MinIO) ──
|
|
|
|
CREATE TABLE IF NOT EXISTS cdr_calls (
|
|
id BIGSERIAL PRIMARY KEY,
|
|
profile_id INT NOT NULL REFERENCES cdr_ingestion_profiles(id),
|
|
upload_id INT NOT NULL REFERENCES cdr_ingestion_uploads(id),
|
|
natural_key_hash TEXT NOT NULL, -- SHA-1 of adapter natural key
|
|
start_time TIMESTAMPTZ NOT NULL,
|
|
duration_sec INT,
|
|
billed_amount_cents BIGINT, -- per-call revenue (NULL = unknown)
|
|
billed_currency TEXT,
|
|
trunk_group_id TEXT,
|
|
customer_account_id TEXT,
|
|
customer_type TEXT, -- wholesale|retail|unknown
|
|
call_direction TEXT, -- inbound|outbound
|
|
caller_npa TEXT,
|
|
caller_state TEXT,
|
|
caller_country TEXT,
|
|
called_npa TEXT,
|
|
called_state TEXT,
|
|
called_country TEXT,
|
|
jurisdiction TEXT, -- interstate|intrastate|international|local|indeterminate
|
|
orig_state_region TEXT,
|
|
billing_state_region TEXT
|
|
);
|
|
CREATE UNIQUE INDEX IF NOT EXISTS uq_cdr_calls_natural_key
|
|
ON cdr_calls(profile_id, natural_key_hash);
|
|
CREATE INDEX IF NOT EXISTS idx_cdr_calls_profile_start
|
|
ON cdr_calls(profile_id, start_time);
|
|
CREATE INDEX IF NOT EXISTS idx_cdr_calls_profile_juris
|
|
ON cdr_calls(profile_id, jurisdiction);
|
|
|
|
|
|
-- ── Quarantine: rows that failed validation ─────────────────────────────
|
|
|
|
CREATE TABLE IF NOT EXISTS cdr_quarantine (
|
|
id BIGSERIAL PRIMARY KEY,
|
|
upload_id INT NOT NULL REFERENCES cdr_ingestion_uploads(id),
|
|
source_row INT,
|
|
raw_payload JSONB,
|
|
reason_code TEXT NOT NULL,
|
|
reason_detail TEXT,
|
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_cdr_quarantine_upload
|
|
ON cdr_quarantine(upload_id);
|
|
|
|
|
|
-- ── Paywall: per-year access grants ─────────────────────────────────────
|
|
--
|
|
-- Populated by the checkout.ts payment-complete hook on any of the
|
|
-- gating service slugs (fcc-499a, fcc-499a-499q, fcc-full-compliance,
|
|
-- cdr-analysis). Presence of a grant unlocks the classified study for
|
|
-- that reporting year. Admin view ignores grants.
|
|
|
|
CREATE TABLE IF NOT EXISTS cdr_study_access_grants (
|
|
id SERIAL PRIMARY KEY,
|
|
profile_id INT NOT NULL REFERENCES cdr_ingestion_profiles(id),
|
|
reporting_year INT NOT NULL,
|
|
granted_by_order TEXT NOT NULL, -- compliance_orders.order_number
|
|
granted_at TIMESTAMPTZ DEFAULT NOW(),
|
|
UNIQUE(profile_id, reporting_year, granted_by_order)
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_cdr_grants_profile_year
|
|
ON cdr_study_access_grants(profile_id, reporting_year);
|
|
|
|
|
|
-- ── Usage meters (quota tracking) ───────────────────────────────────────
|
|
|
|
CREATE TABLE IF NOT EXISTS cdr_usage_meters (
|
|
id SERIAL PRIMARY KEY,
|
|
profile_id INT NOT NULL REFERENCES cdr_ingestion_profiles(id),
|
|
reporting_year INT NOT NULL,
|
|
bytes_stored BIGINT DEFAULT 0,
|
|
rows_ingested BIGINT DEFAULT 0,
|
|
last_measured_at TIMESTAMPTZ DEFAULT NOW(),
|
|
warned_80pct_at TIMESTAMPTZ,
|
|
warned_100pct_at TIMESTAMPTZ,
|
|
UNIQUE(profile_id, reporting_year)
|
|
);
|
|
|
|
|
|
-- ── Link back from telecom_entities ─────────────────────────────────────
|
|
|
|
ALTER TABLE telecom_entities
|
|
ADD COLUMN IF NOT EXISTS cdr_ingestion_profile_id
|
|
INT REFERENCES cdr_ingestion_profiles(id);
|