Includes: API (Express/TypeScript), Astro site, Python workers, document generators, FCC compliance tools, Canada CRTC formation, Ansible infrastructure, and deployment scripts. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
297 lines
12 KiB
Python
297 lines
12 KiB
Python
"""
|
|
USAC 499-A Data Importer
|
|
|
|
Logs into the USAC E-File system with delegated access credentials,
|
|
navigates to a filer's most recent Form 499-A submission, and extracts:
|
|
- Entity identification (name, EIN, address, contact)
|
|
- Service categories (Line 105)
|
|
- Revenue data (Blocks 3-4)
|
|
- Filing history
|
|
|
|
Prerequisites:
|
|
- USAC E-File credentials (USAC_USERNAME / USAC_PASSWORD env vars)
|
|
- The client must have granted access to our account
|
|
|
|
Usage:
|
|
python3 -m scripts.workers.usac_import --filer-id 812345
|
|
|
|
# Via job server:
|
|
POST /jobs {"action": "import_499a", "filer_id": "812345"}
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
import os
|
|
import re
|
|
import sys
|
|
from datetime import datetime
|
|
|
|
LOG = logging.getLogger("workers.usac_import")
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(levelname)s %(message)s")
|
|
|
|
USAC_URL = "https://forms.universalservice.org"
|
|
USAC_USERNAME = os.getenv("USAC_USERNAME", "")
|
|
USAC_PASSWORD = os.getenv("USAC_PASSWORD", "")
|
|
|
|
|
|
async def import_499a(filer_id: str) -> dict:
|
|
"""Import Form 499-A data from USAC E-File for a given filer ID.
|
|
|
|
Returns a dict with entity info, service categories, and revenue lines,
|
|
or an error dict if import fails.
|
|
"""
|
|
if not USAC_USERNAME or not USAC_PASSWORD:
|
|
return {"success": False, "error": "USAC credentials not configured. Set USAC_USERNAME and USAC_PASSWORD."}
|
|
|
|
from playwright.async_api import async_playwright
|
|
|
|
result = {
|
|
"success": False,
|
|
"filer_id": filer_id,
|
|
"entity": {},
|
|
"service_categories": [],
|
|
"revenue_lines": [],
|
|
"filing_year": None,
|
|
"classification": {},
|
|
"error": None,
|
|
}
|
|
|
|
async with async_playwright() as pw:
|
|
browser = await pw.chromium.launch(headless=True)
|
|
context = await browser.new_context(
|
|
viewport={"width": 1280, "height": 900},
|
|
user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
|
)
|
|
page = await context.new_page()
|
|
|
|
try:
|
|
# Step 1: Login to USAC E-File
|
|
LOG.info("[%s] Navigating to USAC E-File login", filer_id)
|
|
await page.goto(f"{USAC_URL}/login", wait_until="domcontentloaded", timeout=30000)
|
|
await page.wait_for_timeout(2000)
|
|
|
|
# Fill login form
|
|
username_field = page.locator('input[name="username"], input[name="userId"], input[type="text"]').first
|
|
password_field = page.locator('input[name="password"], input[type="password"]').first
|
|
|
|
if await username_field.count() == 0 or await password_field.count() == 0:
|
|
result["error"] = "Could not find USAC login form — site may have changed"
|
|
return result
|
|
|
|
await username_field.fill(USAC_USERNAME)
|
|
await password_field.fill(USAC_PASSWORD)
|
|
|
|
# Submit login
|
|
submit_btn = page.locator('button[type="submit"], input[type="submit"], button:has-text("Log In"), button:has-text("Sign In")').first
|
|
await submit_btn.click()
|
|
await page.wait_for_timeout(5000)
|
|
|
|
# Check login success
|
|
if "login" in page.url.lower() or "error" in (await page.content()).lower()[:500]:
|
|
result["error"] = "USAC login failed — check credentials or access delegation"
|
|
return result
|
|
|
|
LOG.info("[%s] Logged into USAC E-File", filer_id)
|
|
|
|
# Step 2: Navigate to the filer's 499-A
|
|
# USAC E-File has a filer search or direct URL pattern
|
|
search_url = f"{USAC_URL}/Form499/search?filerId={filer_id}"
|
|
await page.goto(search_url, wait_until="domcontentloaded", timeout=30000)
|
|
await page.wait_for_timeout(3000)
|
|
|
|
# Try to find the most recent 499-A filing
|
|
# Look for filing year links or the form data directly
|
|
page_text = await page.inner_text("body")
|
|
|
|
# If we can see filer data, extract it
|
|
if filer_id in page_text or "499-A" in page_text:
|
|
LOG.info("[%s] Found filer data on USAC", filer_id)
|
|
|
|
# Step 3: Extract entity information
|
|
entity = await _extract_entity_info(page)
|
|
result["entity"] = entity
|
|
|
|
# Step 4: Extract service categories
|
|
categories = await _extract_service_categories(page)
|
|
result["service_categories"] = categories
|
|
|
|
# Step 5: Extract revenue lines
|
|
revenue = await _extract_revenue_lines(page)
|
|
result["revenue_lines"] = revenue
|
|
|
|
# Step 6: Determine filing year
|
|
year_match = re.search(r"(?:20\d{2})\s*(?:Form|Filing|499)", page_text)
|
|
result["filing_year"] = year_match.group(0)[:4] if year_match else str(datetime.now().year - 1)
|
|
|
|
result["success"] = True
|
|
LOG.info("[%s] Successfully imported 499-A data for year %s", filer_id, result["filing_year"])
|
|
else:
|
|
# Try clicking into the filer's account
|
|
filer_link = page.locator(f'a:has-text("{filer_id}"), td:has-text("{filer_id}")')
|
|
if await filer_link.count() > 0:
|
|
await filer_link.first.click()
|
|
await page.wait_for_timeout(3000)
|
|
|
|
entity = await _extract_entity_info(page)
|
|
result["entity"] = entity
|
|
categories = await _extract_service_categories(page)
|
|
result["service_categories"] = categories
|
|
revenue = await _extract_revenue_lines(page)
|
|
result["revenue_lines"] = revenue
|
|
result["success"] = True
|
|
else:
|
|
result["error"] = f"Filer ID {filer_id} not found on USAC. Verify access has been granted."
|
|
|
|
except Exception as e:
|
|
LOG.error("[%s] Import failed: %s", filer_id, e)
|
|
result["error"] = str(e)
|
|
finally:
|
|
await browser.close()
|
|
|
|
return result
|
|
|
|
|
|
async def _extract_entity_info(page) -> dict:
|
|
"""Extract entity identification from the 499-A form page."""
|
|
entity = {}
|
|
text = await page.inner_text("body")
|
|
|
|
# Common patterns for USAC form fields
|
|
patterns = {
|
|
"legal_name": [r"Legal\s*Name[:\s]*([^\n]+)", r"Entity\s*Name[:\s]*([^\n]+)", r"Company\s*Name[:\s]*([^\n]+)"],
|
|
"ein": [r"EIN[:\s]*([\d-]+)", r"Tax\s*ID[:\s]*([\d-]+)"],
|
|
"frn": [r"FRN[:\s]*(\d{10})", r"FCC\s*Registration[:\s]*(\d{10})"],
|
|
"filer_id": [r"499\s*(?:ID|Filer)[:\s]*(\d+)", r"Filer\s*(?:ID|Number)[:\s]*(\d+)"],
|
|
"address": [r"(?:Principal|Business)\s*Address[:\s]*([^\n]+)"],
|
|
"city": [r"City[:\s]*([A-Za-z\s]+?)(?:\s{2,}|,|\n)"],
|
|
"state": [r"State[:\s]*([A-Z]{2})"],
|
|
"zip": [r"(?:Zip|ZIP|Postal)[:\s]*(\d{5}(?:-\d{4})?)"],
|
|
"contact_name": [r"Contact[:\s]*(?:Name)?[:\s]*([^\n,]+)"],
|
|
"contact_phone": [r"(?:Contact\s*)?Phone[:\s]*([\d\(\)\-\s\+]+)"],
|
|
"contact_email": [r"(?:Contact\s*)?Email[:\s]*([\w\.\-\+]+@[\w\.\-]+)"],
|
|
}
|
|
|
|
for field, pats in patterns.items():
|
|
for pat in pats:
|
|
match = re.search(pat, text, re.IGNORECASE)
|
|
if match:
|
|
entity[field] = match.group(1).strip()
|
|
break
|
|
|
|
# Also try to extract from form inputs if the page has a form
|
|
for field_id in ["legalName", "entityName", "ein", "frn", "address1", "city", "state", "zip"]:
|
|
try:
|
|
el = page.locator(f'input[name*="{field_id}" i], input[id*="{field_id}" i]')
|
|
if await el.count() > 0:
|
|
val = await el.first.input_value()
|
|
if val:
|
|
key = re.sub(r'([A-Z])', r'_\1', field_id).lower().strip('_')
|
|
entity[key] = val
|
|
except Exception:
|
|
pass
|
|
|
|
return entity
|
|
|
|
|
|
async def _extract_service_categories(page) -> list[str]:
|
|
"""Extract service category selections (Line 105) from the form."""
|
|
categories = []
|
|
text = await page.inner_text("body")
|
|
|
|
# Check for each known category in the page text or checked checkboxes
|
|
category_map = {
|
|
"interconnected_voip": ["Interconnected VoIP", "iVoIP"],
|
|
"non_interconnected_voip": ["Non-Interconnected VoIP"],
|
|
"clec": ["CLEC", "CAP/CLEC", "Competitive Local"],
|
|
"ixc": ["IXC", "Interexchange"],
|
|
"wireless": ["Cellular", "PCS", "SMR", "Wireless"],
|
|
"prepaid": ["Prepaid"],
|
|
"conferencing": ["Audio Bridging", "Conferencing"],
|
|
"satellite": ["Satellite"],
|
|
}
|
|
|
|
for cat_id, keywords in category_map.items():
|
|
for kw in keywords:
|
|
if kw.lower() in text.lower():
|
|
categories.append(cat_id)
|
|
break
|
|
|
|
# Also check checkboxes/radios on the form
|
|
try:
|
|
checked = await page.evaluate("""() => {
|
|
const cats = [];
|
|
document.querySelectorAll('input[type="checkbox"]:checked, input[type="radio"]:checked').forEach(el => {
|
|
const label = el.closest('label')?.textContent || el.name || '';
|
|
if (label) cats.push(label.trim());
|
|
});
|
|
return cats;
|
|
}""")
|
|
for label in (checked or []):
|
|
for cat_id, keywords in category_map.items():
|
|
if any(kw.lower() in label.lower() for kw in keywords) and cat_id not in categories:
|
|
categories.append(cat_id)
|
|
except Exception:
|
|
pass
|
|
|
|
return categories
|
|
|
|
|
|
async def _extract_revenue_lines(page) -> list[dict]:
|
|
"""Extract revenue data from Blocks 3-4 of the 499-A form."""
|
|
revenue_lines = []
|
|
|
|
# Try to extract from table cells or form inputs
|
|
# Revenue lines are numbered 303-314 (Block 3) and 403-418 (Block 4)
|
|
for line_num in [303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314,
|
|
403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414]:
|
|
try:
|
|
# Try input fields with line number
|
|
for selector in [
|
|
f'input[name*="{line_num}"]',
|
|
f'input[id*="line{line_num}"]',
|
|
f'input[id*="Line{line_num}"]',
|
|
f'td:has-text("Line {line_num}") + td input',
|
|
]:
|
|
el = page.locator(selector)
|
|
if await el.count() > 0:
|
|
val = await el.first.input_value()
|
|
if val and val.strip() and val != "0":
|
|
revenue_lines.append({
|
|
"line": str(line_num),
|
|
"value": float(val.replace(",", "").replace("$", "")),
|
|
})
|
|
break
|
|
except Exception:
|
|
pass
|
|
|
|
# Fallback: try to parse revenue from page text
|
|
if not revenue_lines:
|
|
text = await page.inner_text("body")
|
|
for match in re.finditer(r"Line\s*(\d{3})[:\s]*\$?([\d,]+(?:\.\d{2})?)", text):
|
|
line_num = match.group(1)
|
|
value = float(match.group(2).replace(",", ""))
|
|
if value > 0:
|
|
revenue_lines.append({"line": line_num, "value": value})
|
|
|
|
return revenue_lines
|
|
|
|
|
|
# ── CLI entry point ───────────────────────────────────────────────────
|
|
|
|
async def main():
|
|
parser = argparse.ArgumentParser(description="Import 499-A data from USAC E-File")
|
|
parser.add_argument("--filer-id", required=True, help="USAC 499 Filer ID")
|
|
args = parser.parse_args()
|
|
|
|
result = await import_499a(args.filer_id)
|
|
print(json.dumps(result, indent=2))
|
|
|
|
if not result["success"]:
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|