Includes: API (Express/TypeScript), Astro site, Python workers, document generators, FCC compliance tools, Canada CRTC formation, Ansible infrastructure, and deployment scripts. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
193 lines
6.7 KiB
Python
193 lines
6.7 KiB
Python
"""
|
|
Import FCC Form 499 Filer Database Excel dump into PostgreSQL.
|
|
|
|
Download the Excel file from https://apps.fcc.gov/cgb/form499/499a.cfm
|
|
("Excel Dump of all 499 filings") then run:
|
|
|
|
python3 scripts/workers/fcc_499_filer_import.py /path/to/499_dump.xlsx
|
|
|
|
Or copy to the server and run:
|
|
scp 499_dump.xlsx deploy@207.174.124.71:/tmp/
|
|
ssh deploy@207.174.124.71 "cd /opt/performancewest && docker compose exec -T workers python3 /app/scripts/workers/fcc_499_filer_import.py /tmp/499_dump.xlsx"
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import csv
|
|
import logging
|
|
import os
|
|
import sys
|
|
from datetime import datetime, timezone
|
|
|
|
LOG = logging.getLogger("workers.fcc_499_import")
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(levelname)s %(message)s")
|
|
|
|
DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://pw:pw_dev_2026@207.174.124.71:5433/performancewest")
|
|
|
|
|
|
def import_excel(path: str) -> int:
|
|
"""Import an Excel (.xlsx) file into fcc_499_filers table."""
|
|
try:
|
|
import openpyxl
|
|
except ImportError:
|
|
LOG.error("openpyxl not installed. Install with: pip install openpyxl")
|
|
sys.exit(1)
|
|
|
|
LOG.info("Reading %s", path)
|
|
wb = openpyxl.load_workbook(path, read_only=True, data_only=True)
|
|
ws = wb.active
|
|
rows = list(ws.iter_rows(values_only=True))
|
|
|
|
if len(rows) < 2:
|
|
LOG.error("File has fewer than 2 rows — no data")
|
|
return 0
|
|
|
|
# Normalize headers
|
|
headers = [str(h or "").strip() for h in rows[0]]
|
|
LOG.info("Headers (%d): %s", len(headers), headers[:10])
|
|
|
|
return _upsert_rows(headers, rows[1:])
|
|
|
|
|
|
def import_csv(path: str) -> int:
|
|
"""Import a CSV file into fcc_499_filers table."""
|
|
LOG.info("Reading %s", path)
|
|
with open(path, "r", encoding="utf-8-sig") as f:
|
|
reader = csv.reader(f)
|
|
rows_list = list(reader)
|
|
|
|
if len(rows_list) < 2:
|
|
LOG.error("File has fewer than 2 rows — no data")
|
|
return 0
|
|
|
|
headers = [h.strip() for h in rows_list[0]]
|
|
LOG.info("Headers (%d): %s", len(headers), headers[:10])
|
|
|
|
return _upsert_rows(headers, rows_list[1:])
|
|
|
|
|
|
def _upsert_rows(headers: list[str], data_rows: list) -> int:
|
|
"""Upsert rows into fcc_499_filers. Returns count."""
|
|
import psycopg2
|
|
|
|
# Build a column map — the FCC Excel uses various header names
|
|
col_map = {}
|
|
for i, h in enumerate(headers):
|
|
hl = h.lower().replace(" ", "_").replace("-", "_")
|
|
if "filer" in hl and "id" in hl:
|
|
col_map["filer_id"] = i
|
|
elif hl in ("frn", "coresid", "fcc_registration_number", "cores_id"):
|
|
col_map["frn"] = i
|
|
elif "legal" in hl and "name" in hl:
|
|
col_map["legal_name"] = i
|
|
elif "trade" in hl and "name" in hl:
|
|
col_map["trade_name"] = i
|
|
elif hl == "state" or (hl == "hq_state" and "state" not in col_map):
|
|
col_map["state"] = i
|
|
elif "principal" in hl and "communication" in hl:
|
|
col_map["service_type"] = i
|
|
elif "service" in hl and "type" in hl:
|
|
col_map["service_type"] = i
|
|
elif "holding" in hl and "company" in hl:
|
|
col_map["holding_company"] = i
|
|
elif "operational" in hl and "status" in hl:
|
|
col_map["status"] = i
|
|
elif hl == "company_name" and "legal_name" not in col_map:
|
|
col_map["legal_name"] = i
|
|
|
|
LOG.info("Column map: %s", col_map)
|
|
|
|
if "filer_id" not in col_map and "legal_name" not in col_map:
|
|
LOG.error("Could not find filer_id or legal_name columns. Headers: %s", headers)
|
|
return 0
|
|
|
|
conn = psycopg2.connect(DATABASE_URL)
|
|
now = datetime.now(timezone.utc)
|
|
count = 0
|
|
|
|
with conn.cursor() as cur:
|
|
for row in data_rows:
|
|
def get(field):
|
|
idx = col_map.get(field)
|
|
if idx is None or idx >= len(row):
|
|
return None
|
|
val = row[idx]
|
|
return str(val).strip() if val is not None else None
|
|
|
|
filer_id = get("filer_id")
|
|
legal_name = get("legal_name")
|
|
if not filer_id or not legal_name:
|
|
continue
|
|
|
|
frn = get("frn")
|
|
if frn:
|
|
frn = frn.replace(".0", "").strip() # Excel sometimes adds .0
|
|
if len(frn) < 10:
|
|
frn = frn.zfill(10)
|
|
|
|
cur.execute("""
|
|
INSERT INTO fcc_499_filers (filer_id, frn, legal_name, trade_name, state,
|
|
service_type, holding_company, status, last_scraped_at, updated_at)
|
|
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
|
ON CONFLICT (filer_id) DO UPDATE SET
|
|
frn = COALESCE(EXCLUDED.frn, fcc_499_filers.frn),
|
|
legal_name = EXCLUDED.legal_name,
|
|
trade_name = COALESCE(EXCLUDED.trade_name, fcc_499_filers.trade_name),
|
|
state = COALESCE(EXCLUDED.state, fcc_499_filers.state),
|
|
service_type = COALESCE(EXCLUDED.service_type, fcc_499_filers.service_type),
|
|
holding_company = COALESCE(EXCLUDED.holding_company, fcc_499_filers.holding_company),
|
|
status = COALESCE(EXCLUDED.status, fcc_499_filers.status),
|
|
last_scraped_at = %s,
|
|
updated_at = %s
|
|
""", (
|
|
filer_id, frn or None, legal_name,
|
|
get("trade_name"), (get("state") or "")[:2] or None,
|
|
get("service_type"), get("holding_company"),
|
|
get("status") or "Active",
|
|
now, now, now, now,
|
|
))
|
|
count += 1
|
|
|
|
if count % 1000 == 0:
|
|
conn.commit()
|
|
LOG.info(" … %d rows", count)
|
|
|
|
conn.commit()
|
|
|
|
# Report totals
|
|
with conn.cursor() as cur:
|
|
cur.execute("SELECT COUNT(*) FROM fcc_499_filers")
|
|
total = cur.fetchone()[0]
|
|
cur.execute("SELECT COUNT(*) FROM fcc_499_filers WHERE frn IS NOT NULL")
|
|
with_frn = cur.fetchone()[0]
|
|
|
|
conn.close()
|
|
LOG.info("Import complete: %d rows upserted. Total in DB: %d (%d with FRN)", count, total, with_frn)
|
|
return count
|
|
|
|
|
|
def main():
|
|
if len(sys.argv) < 2:
|
|
print("Usage: python3 scripts/workers/fcc_499_filer_import.py <path-to-excel-or-csv>")
|
|
print()
|
|
print("Download the Excel dump from: https://apps.fcc.gov/cgb/form499/499a.cfm")
|
|
sys.exit(1)
|
|
|
|
path = sys.argv[1]
|
|
if not os.path.exists(path):
|
|
LOG.error("File not found: %s", path)
|
|
sys.exit(1)
|
|
|
|
if path.endswith(".xlsx") or path.endswith(".xls"):
|
|
count = import_excel(path)
|
|
elif path.endswith(".csv"):
|
|
count = import_csv(path)
|
|
else:
|
|
LOG.error("Unsupported file format. Use .xlsx, .xls, or .csv")
|
|
sys.exit(1)
|
|
|
|
if count == 0:
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|