new-site/scripts/workers/fcc_499_filer_import.py
justin f8cd37ac8c Initial commit — Performance West telecom compliance platform
Includes: API (Express/TypeScript), Astro site, Python workers,
document generators, FCC compliance tools, Canada CRTC formation,
Ansible infrastructure, and deployment scripts.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-27 06:54:22 -05:00

193 lines
6.7 KiB
Python

"""
Import FCC Form 499 Filer Database Excel dump into PostgreSQL.
Download the Excel file from https://apps.fcc.gov/cgb/form499/499a.cfm
("Excel Dump of all 499 filings") then run:
python3 scripts/workers/fcc_499_filer_import.py /path/to/499_dump.xlsx
Or copy to the server and run:
scp 499_dump.xlsx deploy@207.174.124.71:/tmp/
ssh deploy@207.174.124.71 "cd /opt/performancewest && docker compose exec -T workers python3 /app/scripts/workers/fcc_499_filer_import.py /tmp/499_dump.xlsx"
"""
from __future__ import annotations
import csv
import logging
import os
import sys
from datetime import datetime, timezone
LOG = logging.getLogger("workers.fcc_499_import")
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(levelname)s %(message)s")
DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://pw:pw_dev_2026@207.174.124.71:5433/performancewest")
def import_excel(path: str) -> int:
"""Import an Excel (.xlsx) file into fcc_499_filers table."""
try:
import openpyxl
except ImportError:
LOG.error("openpyxl not installed. Install with: pip install openpyxl")
sys.exit(1)
LOG.info("Reading %s", path)
wb = openpyxl.load_workbook(path, read_only=True, data_only=True)
ws = wb.active
rows = list(ws.iter_rows(values_only=True))
if len(rows) < 2:
LOG.error("File has fewer than 2 rows — no data")
return 0
# Normalize headers
headers = [str(h or "").strip() for h in rows[0]]
LOG.info("Headers (%d): %s", len(headers), headers[:10])
return _upsert_rows(headers, rows[1:])
def import_csv(path: str) -> int:
"""Import a CSV file into fcc_499_filers table."""
LOG.info("Reading %s", path)
with open(path, "r", encoding="utf-8-sig") as f:
reader = csv.reader(f)
rows_list = list(reader)
if len(rows_list) < 2:
LOG.error("File has fewer than 2 rows — no data")
return 0
headers = [h.strip() for h in rows_list[0]]
LOG.info("Headers (%d): %s", len(headers), headers[:10])
return _upsert_rows(headers, rows_list[1:])
def _upsert_rows(headers: list[str], data_rows: list) -> int:
"""Upsert rows into fcc_499_filers. Returns count."""
import psycopg2
# Build a column map — the FCC Excel uses various header names
col_map = {}
for i, h in enumerate(headers):
hl = h.lower().replace(" ", "_").replace("-", "_")
if "filer" in hl and "id" in hl:
col_map["filer_id"] = i
elif hl in ("frn", "coresid", "fcc_registration_number", "cores_id"):
col_map["frn"] = i
elif "legal" in hl and "name" in hl:
col_map["legal_name"] = i
elif "trade" in hl and "name" in hl:
col_map["trade_name"] = i
elif hl == "state" or (hl == "hq_state" and "state" not in col_map):
col_map["state"] = i
elif "principal" in hl and "communication" in hl:
col_map["service_type"] = i
elif "service" in hl and "type" in hl:
col_map["service_type"] = i
elif "holding" in hl and "company" in hl:
col_map["holding_company"] = i
elif "operational" in hl and "status" in hl:
col_map["status"] = i
elif hl == "company_name" and "legal_name" not in col_map:
col_map["legal_name"] = i
LOG.info("Column map: %s", col_map)
if "filer_id" not in col_map and "legal_name" not in col_map:
LOG.error("Could not find filer_id or legal_name columns. Headers: %s", headers)
return 0
conn = psycopg2.connect(DATABASE_URL)
now = datetime.now(timezone.utc)
count = 0
with conn.cursor() as cur:
for row in data_rows:
def get(field):
idx = col_map.get(field)
if idx is None or idx >= len(row):
return None
val = row[idx]
return str(val).strip() if val is not None else None
filer_id = get("filer_id")
legal_name = get("legal_name")
if not filer_id or not legal_name:
continue
frn = get("frn")
if frn:
frn = frn.replace(".0", "").strip() # Excel sometimes adds .0
if len(frn) < 10:
frn = frn.zfill(10)
cur.execute("""
INSERT INTO fcc_499_filers (filer_id, frn, legal_name, trade_name, state,
service_type, holding_company, status, last_scraped_at, updated_at)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (filer_id) DO UPDATE SET
frn = COALESCE(EXCLUDED.frn, fcc_499_filers.frn),
legal_name = EXCLUDED.legal_name,
trade_name = COALESCE(EXCLUDED.trade_name, fcc_499_filers.trade_name),
state = COALESCE(EXCLUDED.state, fcc_499_filers.state),
service_type = COALESCE(EXCLUDED.service_type, fcc_499_filers.service_type),
holding_company = COALESCE(EXCLUDED.holding_company, fcc_499_filers.holding_company),
status = COALESCE(EXCLUDED.status, fcc_499_filers.status),
last_scraped_at = %s,
updated_at = %s
""", (
filer_id, frn or None, legal_name,
get("trade_name"), (get("state") or "")[:2] or None,
get("service_type"), get("holding_company"),
get("status") or "Active",
now, now, now, now,
))
count += 1
if count % 1000 == 0:
conn.commit()
LOG.info("%d rows", count)
conn.commit()
# Report totals
with conn.cursor() as cur:
cur.execute("SELECT COUNT(*) FROM fcc_499_filers")
total = cur.fetchone()[0]
cur.execute("SELECT COUNT(*) FROM fcc_499_filers WHERE frn IS NOT NULL")
with_frn = cur.fetchone()[0]
conn.close()
LOG.info("Import complete: %d rows upserted. Total in DB: %d (%d with FRN)", count, total, with_frn)
return count
def main():
if len(sys.argv) < 2:
print("Usage: python3 scripts/workers/fcc_499_filer_import.py <path-to-excel-or-csv>")
print()
print("Download the Excel dump from: https://apps.fcc.gov/cgb/form499/499a.cfm")
sys.exit(1)
path = sys.argv[1]
if not os.path.exists(path):
LOG.error("File not found: %s", path)
sys.exit(1)
if path.endswith(".xlsx") or path.endswith(".xls"):
count = import_excel(path)
elif path.endswith(".csv"):
count = import_csv(path)
else:
LOG.error("Unsupported file format. Use .xlsx, .xls, or .csv")
sys.exit(1)
if count == 0:
sys.exit(1)
if __name__ == "__main__":
main()