Includes: API (Express/TypeScript), Astro site, Python workers, document generators, FCC compliance tools, Canada CRTC formation, Ansible infrastructure, and deployment scripts. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
274 lines
10 KiB
Python
274 lines
10 KiB
Python
"""
|
|
NANPA area-code importer.
|
|
|
|
One-shot (re-runnable) loader for `nanpa_area_codes`. Fetches the public
|
|
NANPA "Geographic Area Code Number Report" — a CSV listing every
|
|
active/assigned NANP area code and the geography it serves.
|
|
|
|
Source: https://www.nationalnanpa.com/enas/geoAreaCodeNumberReport.do
|
|
(the site exposes the data as CSV behind a simple GET; we mirror the
|
|
fields we care about into our PG table.)
|
|
|
|
Fallback: if the NANPA endpoint is unreachable, we have a bundled
|
|
snapshot at scripts/data/nanpa_area_codes_snapshot.csv that we fall
|
|
back to so classification still works in dev / offline environments.
|
|
|
|
Coverage:
|
|
- All 50 US states + DC + territories (PR, VI, GU, AS, MP)
|
|
- Canada (for international classification)
|
|
- Caribbean NANP jurisdictions (1-242 Bahamas, 1-246 Barbados, etc. —
|
|
these are "NANP but not US" and classify as international)
|
|
|
|
Usage:
|
|
python -m scripts.workers.cdr_npa_importer
|
|
python -m scripts.workers.cdr_npa_importer --snapshot-only # skip network
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import csv
|
|
import io
|
|
import logging
|
|
import os
|
|
import sys
|
|
import urllib.request
|
|
from pathlib import Path
|
|
from typing import Iterable
|
|
|
|
import psycopg2
|
|
import psycopg2.extras
|
|
|
|
log = logging.getLogger("cdr_npa_importer")
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
|
handlers=[logging.StreamHandler(sys.stdout)],
|
|
)
|
|
|
|
NANPA_URL = os.environ.get(
|
|
"NANPA_CSV_URL",
|
|
"https://www.nationalnanpa.com/enas/displayNpaCityReport.do"
|
|
"?csv=Y",
|
|
)
|
|
SNAPSHOT_PATH = (
|
|
Path(__file__).resolve().parent.parent / "data" / "nanpa_area_codes_snapshot.csv"
|
|
)
|
|
DATABASE_URL = os.environ.get("DATABASE_URL", "")
|
|
|
|
|
|
# NANP jurisdictions that are NOT the United States — classifying their
|
|
# area codes as "international" is critical for correct 499-A splits.
|
|
# Source: ITU-T E.164 + NANPA country-code assignments.
|
|
_NANP_NON_US_BY_NPA: dict[str, tuple[str, str]] = {
|
|
# NPA: (ISO-2 country, region label)
|
|
"242": ("BS", "Bahamas"),
|
|
"246": ("BB", "Barbados"),
|
|
"264": ("AI", "Anguilla"),
|
|
"268": ("AG", "Antigua and Barbuda"),
|
|
"284": ("VG", "British Virgin Islands"),
|
|
"340": ("VI", "US Virgin Islands"), # actually US territory — handled below
|
|
"345": ("KY", "Cayman Islands"),
|
|
"441": ("BM", "Bermuda"),
|
|
"473": ("GD", "Grenada"),
|
|
"649": ("TC", "Turks and Caicos"),
|
|
"658": ("JM", "Jamaica"),
|
|
"664": ("MS", "Montserrat"),
|
|
"670": ("MP", "Northern Mariana Islands"), # US territory
|
|
"671": ("GU", "Guam"), # US territory
|
|
"684": ("AS", "American Samoa"), # US territory
|
|
"721": ("SX", "Sint Maarten"),
|
|
"758": ("LC", "Saint Lucia"),
|
|
"767": ("DM", "Dominica"),
|
|
"784": ("VC", "Saint Vincent and the Grenadines"),
|
|
"787": ("PR", "Puerto Rico"), # US territory
|
|
"809": ("DO", "Dominican Republic"),
|
|
"829": ("DO", "Dominican Republic"),
|
|
"849": ("DO", "Dominican Republic"),
|
|
"868": ("TT", "Trinidad and Tobago"),
|
|
"869": ("KN", "Saint Kitts and Nevis"),
|
|
"876": ("JM", "Jamaica"),
|
|
"939": ("PR", "Puerto Rico"), # US territory
|
|
}
|
|
# US territories (still country=US, not international):
|
|
_US_TERRITORY_NPAS = {"340", "670", "671", "684", "787", "939"}
|
|
|
|
|
|
def _fetch_csv(url: str) -> str | None:
|
|
try:
|
|
req = urllib.request.Request(
|
|
url, headers={"User-Agent": "Mozilla/5.0 (PerformanceWest NPA Importer)"}
|
|
)
|
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
return resp.read().decode("utf-8", errors="replace")
|
|
except Exception as exc:
|
|
log.warning("NANPA fetch failed (%s): %s — falling back to snapshot", url, exc)
|
|
return None
|
|
|
|
|
|
def _parse_nanpa_csv(csv_text: str) -> Iterable[dict]:
|
|
"""Yield normalized rows from the NANPA CSV.
|
|
|
|
The public NANPA report varies slightly across the two endpoints
|
|
(geoAreaCodeNumberReport vs. displayNpaCityReport). We accept either
|
|
and key fields we care about: NPA, Location (state/province/country),
|
|
Status. Rows without a 3-digit numeric NPA are skipped (header rows,
|
|
footers, non-geographic codes like 500/700/800/900 toll-free).
|
|
"""
|
|
reader = csv.DictReader(io.StringIO(csv_text))
|
|
for row in reader:
|
|
# Normalize common column-name variants
|
|
npa = (row.get("NPA") or row.get("Area Code") or row.get("npa") or "").strip()
|
|
if not npa.isdigit() or len(npa) != 3:
|
|
continue
|
|
location = (
|
|
row.get("Location")
|
|
or row.get("Jurisdiction")
|
|
or row.get("Service Area Description")
|
|
or ""
|
|
).strip()
|
|
status = (row.get("Status") or row.get("status") or "").strip().upper()
|
|
if status in ("UNASSIGNED", "RETURNED", "RESERVED"):
|
|
continue
|
|
|
|
# Resolve country / state
|
|
country, state, note = _resolve_geography(npa, location)
|
|
yield {
|
|
"npa": npa,
|
|
"state": state,
|
|
"country": country,
|
|
"note": note or location,
|
|
}
|
|
|
|
|
|
def _resolve_geography(npa: str, location: str) -> tuple[str, str | None, str]:
|
|
"""Return (country_code, state_code_or_None, note)."""
|
|
# Hardcoded NANP non-US lookup wins (covers Caribbean + territories)
|
|
if npa in _NANP_NON_US_BY_NPA:
|
|
country, name = _NANP_NON_US_BY_NPA[npa]
|
|
if npa in _US_TERRITORY_NPAS:
|
|
return "US", country, name # "PR" / "GU" / etc. as state
|
|
return country, None, name
|
|
|
|
# Canadian area codes — location typically includes the province name
|
|
# or "Canada". Map the common ones.
|
|
canadian_npas = {
|
|
"204","226","236","249","250","263","289","306","343","354","365","367",
|
|
"368","382","403","416","418","428","431","437","438","450","468","474",
|
|
"506","514","519","548","579","581","584","587","600","604","613","639",
|
|
"647","672","683","705","709","742","753","778","780","782","807","819",
|
|
"825","867","873","879","902","905",
|
|
}
|
|
if npa in canadian_npas or "canada" in location.lower():
|
|
return "CA", None, location or "Canada"
|
|
|
|
# Default: US. State is the first two-char token matching a USPS code.
|
|
state = _extract_us_state(location)
|
|
return "US", state, location
|
|
|
|
|
|
# USPS state/territory codes — used to extract state from the free-text
|
|
# "Location" column ("CALIFORNIA" / "New York" / "CA" / "NY").
|
|
_USPS_STATES = {
|
|
"AL":"ALABAMA","AK":"ALASKA","AZ":"ARIZONA","AR":"ARKANSAS","CA":"CALIFORNIA",
|
|
"CO":"COLORADO","CT":"CONNECTICUT","DE":"DELAWARE","DC":"DISTRICT OF COLUMBIA",
|
|
"FL":"FLORIDA","GA":"GEORGIA","HI":"HAWAII","ID":"IDAHO","IL":"ILLINOIS",
|
|
"IN":"INDIANA","IA":"IOWA","KS":"KANSAS","KY":"KENTUCKY","LA":"LOUISIANA",
|
|
"ME":"MAINE","MD":"MARYLAND","MA":"MASSACHUSETTS","MI":"MICHIGAN","MN":"MINNESOTA",
|
|
"MS":"MISSISSIPPI","MO":"MISSOURI","MT":"MONTANA","NE":"NEBRASKA","NV":"NEVADA",
|
|
"NH":"NEW HAMPSHIRE","NJ":"NEW JERSEY","NM":"NEW MEXICO","NY":"NEW YORK",
|
|
"NC":"NORTH CAROLINA","ND":"NORTH DAKOTA","OH":"OHIO","OK":"OKLAHOMA","OR":"OREGON",
|
|
"PA":"PENNSYLVANIA","RI":"RHODE ISLAND","SC":"SOUTH CAROLINA","SD":"SOUTH DAKOTA",
|
|
"TN":"TENNESSEE","TX":"TEXAS","UT":"UTAH","VT":"VERMONT","VA":"VIRGINIA",
|
|
"WA":"WASHINGTON","WV":"WEST VIRGINIA","WI":"WISCONSIN","WY":"WYOMING",
|
|
}
|
|
|
|
|
|
def _extract_us_state(location: str) -> str | None:
|
|
if not location:
|
|
return None
|
|
up = location.upper()
|
|
# Try explicit 2-char token first
|
|
for token in up.replace(",", " ").split():
|
|
if token in _USPS_STATES:
|
|
return token
|
|
# Then try full-name match
|
|
for code, full_name in _USPS_STATES.items():
|
|
if full_name in up:
|
|
return code
|
|
return None
|
|
|
|
|
|
def _load_snapshot() -> str | None:
|
|
if not SNAPSHOT_PATH.exists():
|
|
return None
|
|
return SNAPSHOT_PATH.read_text(encoding="utf-8", errors="replace")
|
|
|
|
|
|
def import_rows(rows: Iterable[dict]) -> int:
|
|
"""Upsert into nanpa_area_codes. Returns count written."""
|
|
conn = psycopg2.connect(DATABASE_URL)
|
|
count = 0
|
|
try:
|
|
with conn.cursor() as cur:
|
|
for row in rows:
|
|
cur.execute(
|
|
"""
|
|
INSERT INTO nanpa_area_codes (npa, state, country, note, updated_at)
|
|
VALUES (%s, %s, %s, %s, NOW())
|
|
ON CONFLICT (npa) DO UPDATE SET
|
|
state = EXCLUDED.state,
|
|
country = EXCLUDED.country,
|
|
note = EXCLUDED.note,
|
|
updated_at = NOW()
|
|
""",
|
|
(row["npa"], row["state"], row["country"], row.get("note")),
|
|
)
|
|
count += 1
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
return count
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser(description="Import NANPA area codes.")
|
|
parser.add_argument(
|
|
"--snapshot-only",
|
|
action="store_true",
|
|
help="Skip the network fetch; use the bundled snapshot CSV only.",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
if not DATABASE_URL:
|
|
log.error("DATABASE_URL not set — aborting")
|
|
sys.exit(1)
|
|
|
|
csv_text: str | None = None
|
|
if not args.snapshot_only:
|
|
csv_text = _fetch_csv(NANPA_URL)
|
|
if csv_text is None:
|
|
csv_text = _load_snapshot()
|
|
if csv_text is None:
|
|
log.error(
|
|
"No NANPA data available — network fetch failed and no snapshot at %s",
|
|
SNAPSHOT_PATH,
|
|
)
|
|
sys.exit(1)
|
|
|
|
rows = list(_parse_nanpa_csv(csv_text))
|
|
log.info("Parsed %d NPA rows from input", len(rows))
|
|
|
|
# Always ensure the hardcoded NANP non-US set is present even if the
|
|
# public CSV misses it (the NANPA report sometimes omits rows).
|
|
for npa, (country, name) in _NANP_NON_US_BY_NPA.items():
|
|
state = country if npa in _US_TERRITORY_NPAS else None
|
|
country_code = "US" if npa in _US_TERRITORY_NPAS else country
|
|
rows.append({"npa": npa, "state": state, "country": country_code, "note": name})
|
|
|
|
written = import_rows(rows)
|
|
log.info("Upserted %d NPA records", written)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|