new-site/scripts/workers/usac_import.py
justin f8cd37ac8c Initial commit — Performance West telecom compliance platform
Includes: API (Express/TypeScript), Astro site, Python workers,
document generators, FCC compliance tools, Canada CRTC formation,
Ansible infrastructure, and deployment scripts.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-27 06:54:22 -05:00

297 lines
12 KiB
Python

"""
USAC 499-A Data Importer
Logs into the USAC E-File system with delegated access credentials,
navigates to a filer's most recent Form 499-A submission, and extracts:
- Entity identification (name, EIN, address, contact)
- Service categories (Line 105)
- Revenue data (Blocks 3-4)
- Filing history
Prerequisites:
- USAC E-File credentials (USAC_USERNAME / USAC_PASSWORD env vars)
- The client must have granted access to our account
Usage:
python3 -m scripts.workers.usac_import --filer-id 812345
# Via job server:
POST /jobs {"action": "import_499a", "filer_id": "812345"}
"""
from __future__ import annotations
import argparse
import asyncio
import json
import logging
import os
import re
import sys
from datetime import datetime
LOG = logging.getLogger("workers.usac_import")
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(levelname)s %(message)s")
USAC_URL = "https://forms.universalservice.org"
USAC_USERNAME = os.getenv("USAC_USERNAME", "")
USAC_PASSWORD = os.getenv("USAC_PASSWORD", "")
async def import_499a(filer_id: str) -> dict:
"""Import Form 499-A data from USAC E-File for a given filer ID.
Returns a dict with entity info, service categories, and revenue lines,
or an error dict if import fails.
"""
if not USAC_USERNAME or not USAC_PASSWORD:
return {"success": False, "error": "USAC credentials not configured. Set USAC_USERNAME and USAC_PASSWORD."}
from playwright.async_api import async_playwright
result = {
"success": False,
"filer_id": filer_id,
"entity": {},
"service_categories": [],
"revenue_lines": [],
"filing_year": None,
"classification": {},
"error": None,
}
async with async_playwright() as pw:
browser = await pw.chromium.launch(headless=True)
context = await browser.new_context(
viewport={"width": 1280, "height": 900},
user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
)
page = await context.new_page()
try:
# Step 1: Login to USAC E-File
LOG.info("[%s] Navigating to USAC E-File login", filer_id)
await page.goto(f"{USAC_URL}/login", wait_until="domcontentloaded", timeout=30000)
await page.wait_for_timeout(2000)
# Fill login form
username_field = page.locator('input[name="username"], input[name="userId"], input[type="text"]').first
password_field = page.locator('input[name="password"], input[type="password"]').first
if await username_field.count() == 0 or await password_field.count() == 0:
result["error"] = "Could not find USAC login form — site may have changed"
return result
await username_field.fill(USAC_USERNAME)
await password_field.fill(USAC_PASSWORD)
# Submit login
submit_btn = page.locator('button[type="submit"], input[type="submit"], button:has-text("Log In"), button:has-text("Sign In")').first
await submit_btn.click()
await page.wait_for_timeout(5000)
# Check login success
if "login" in page.url.lower() or "error" in (await page.content()).lower()[:500]:
result["error"] = "USAC login failed — check credentials or access delegation"
return result
LOG.info("[%s] Logged into USAC E-File", filer_id)
# Step 2: Navigate to the filer's 499-A
# USAC E-File has a filer search or direct URL pattern
search_url = f"{USAC_URL}/Form499/search?filerId={filer_id}"
await page.goto(search_url, wait_until="domcontentloaded", timeout=30000)
await page.wait_for_timeout(3000)
# Try to find the most recent 499-A filing
# Look for filing year links or the form data directly
page_text = await page.inner_text("body")
# If we can see filer data, extract it
if filer_id in page_text or "499-A" in page_text:
LOG.info("[%s] Found filer data on USAC", filer_id)
# Step 3: Extract entity information
entity = await _extract_entity_info(page)
result["entity"] = entity
# Step 4: Extract service categories
categories = await _extract_service_categories(page)
result["service_categories"] = categories
# Step 5: Extract revenue lines
revenue = await _extract_revenue_lines(page)
result["revenue_lines"] = revenue
# Step 6: Determine filing year
year_match = re.search(r"(?:20\d{2})\s*(?:Form|Filing|499)", page_text)
result["filing_year"] = year_match.group(0)[:4] if year_match else str(datetime.now().year - 1)
result["success"] = True
LOG.info("[%s] Successfully imported 499-A data for year %s", filer_id, result["filing_year"])
else:
# Try clicking into the filer's account
filer_link = page.locator(f'a:has-text("{filer_id}"), td:has-text("{filer_id}")')
if await filer_link.count() > 0:
await filer_link.first.click()
await page.wait_for_timeout(3000)
entity = await _extract_entity_info(page)
result["entity"] = entity
categories = await _extract_service_categories(page)
result["service_categories"] = categories
revenue = await _extract_revenue_lines(page)
result["revenue_lines"] = revenue
result["success"] = True
else:
result["error"] = f"Filer ID {filer_id} not found on USAC. Verify access has been granted."
except Exception as e:
LOG.error("[%s] Import failed: %s", filer_id, e)
result["error"] = str(e)
finally:
await browser.close()
return result
async def _extract_entity_info(page) -> dict:
"""Extract entity identification from the 499-A form page."""
entity = {}
text = await page.inner_text("body")
# Common patterns for USAC form fields
patterns = {
"legal_name": [r"Legal\s*Name[:\s]*([^\n]+)", r"Entity\s*Name[:\s]*([^\n]+)", r"Company\s*Name[:\s]*([^\n]+)"],
"ein": [r"EIN[:\s]*([\d-]+)", r"Tax\s*ID[:\s]*([\d-]+)"],
"frn": [r"FRN[:\s]*(\d{10})", r"FCC\s*Registration[:\s]*(\d{10})"],
"filer_id": [r"499\s*(?:ID|Filer)[:\s]*(\d+)", r"Filer\s*(?:ID|Number)[:\s]*(\d+)"],
"address": [r"(?:Principal|Business)\s*Address[:\s]*([^\n]+)"],
"city": [r"City[:\s]*([A-Za-z\s]+?)(?:\s{2,}|,|\n)"],
"state": [r"State[:\s]*([A-Z]{2})"],
"zip": [r"(?:Zip|ZIP|Postal)[:\s]*(\d{5}(?:-\d{4})?)"],
"contact_name": [r"Contact[:\s]*(?:Name)?[:\s]*([^\n,]+)"],
"contact_phone": [r"(?:Contact\s*)?Phone[:\s]*([\d\(\)\-\s\+]+)"],
"contact_email": [r"(?:Contact\s*)?Email[:\s]*([\w\.\-\+]+@[\w\.\-]+)"],
}
for field, pats in patterns.items():
for pat in pats:
match = re.search(pat, text, re.IGNORECASE)
if match:
entity[field] = match.group(1).strip()
break
# Also try to extract from form inputs if the page has a form
for field_id in ["legalName", "entityName", "ein", "frn", "address1", "city", "state", "zip"]:
try:
el = page.locator(f'input[name*="{field_id}" i], input[id*="{field_id}" i]')
if await el.count() > 0:
val = await el.first.input_value()
if val:
key = re.sub(r'([A-Z])', r'_\1', field_id).lower().strip('_')
entity[key] = val
except Exception:
pass
return entity
async def _extract_service_categories(page) -> list[str]:
"""Extract service category selections (Line 105) from the form."""
categories = []
text = await page.inner_text("body")
# Check for each known category in the page text or checked checkboxes
category_map = {
"interconnected_voip": ["Interconnected VoIP", "iVoIP"],
"non_interconnected_voip": ["Non-Interconnected VoIP"],
"clec": ["CLEC", "CAP/CLEC", "Competitive Local"],
"ixc": ["IXC", "Interexchange"],
"wireless": ["Cellular", "PCS", "SMR", "Wireless"],
"prepaid": ["Prepaid"],
"conferencing": ["Audio Bridging", "Conferencing"],
"satellite": ["Satellite"],
}
for cat_id, keywords in category_map.items():
for kw in keywords:
if kw.lower() in text.lower():
categories.append(cat_id)
break
# Also check checkboxes/radios on the form
try:
checked = await page.evaluate("""() => {
const cats = [];
document.querySelectorAll('input[type="checkbox"]:checked, input[type="radio"]:checked').forEach(el => {
const label = el.closest('label')?.textContent || el.name || '';
if (label) cats.push(label.trim());
});
return cats;
}""")
for label in (checked or []):
for cat_id, keywords in category_map.items():
if any(kw.lower() in label.lower() for kw in keywords) and cat_id not in categories:
categories.append(cat_id)
except Exception:
pass
return categories
async def _extract_revenue_lines(page) -> list[dict]:
"""Extract revenue data from Blocks 3-4 of the 499-A form."""
revenue_lines = []
# Try to extract from table cells or form inputs
# Revenue lines are numbered 303-314 (Block 3) and 403-418 (Block 4)
for line_num in [303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314,
403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414]:
try:
# Try input fields with line number
for selector in [
f'input[name*="{line_num}"]',
f'input[id*="line{line_num}"]',
f'input[id*="Line{line_num}"]',
f'td:has-text("Line {line_num}") + td input',
]:
el = page.locator(selector)
if await el.count() > 0:
val = await el.first.input_value()
if val and val.strip() and val != "0":
revenue_lines.append({
"line": str(line_num),
"value": float(val.replace(",", "").replace("$", "")),
})
break
except Exception:
pass
# Fallback: try to parse revenue from page text
if not revenue_lines:
text = await page.inner_text("body")
for match in re.finditer(r"Line\s*(\d{3})[:\s]*\$?([\d,]+(?:\.\d{2})?)", text):
line_num = match.group(1)
value = float(match.group(2).replace(",", ""))
if value > 0:
revenue_lines.append({"line": line_num, "value": value})
return revenue_lines
# ── CLI entry point ───────────────────────────────────────────────────
async def main():
parser = argparse.ArgumentParser(description="Import 499-A data from USAC E-File")
parser.add_argument("--filer-id", required=True, help="USAC 499 Filer ID")
args = parser.parse_args()
result = await import_499a(args.filer_id)
print(json.dumps(result, indent=2))
if not result["success"]:
sys.exit(1)
if __name__ == "__main__":
asyncio.run(main())