""" USAC 499-A Data Importer Logs into the USAC E-File system with delegated access credentials, navigates to a filer's most recent Form 499-A submission, and extracts: - Entity identification (name, EIN, address, contact) - Service categories (Line 105) - Revenue data (Blocks 3-4) - Filing history Prerequisites: - USAC E-File credentials (USAC_USERNAME / USAC_PASSWORD env vars) - The client must have granted access to our account Usage: python3 -m scripts.workers.usac_import --filer-id 812345 # Via job server: POST /jobs {"action": "import_499a", "filer_id": "812345"} """ from __future__ import annotations import argparse import asyncio import json import logging import os import re import sys from datetime import datetime LOG = logging.getLogger("workers.usac_import") logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(levelname)s %(message)s") USAC_URL = "https://forms.universalservice.org" USAC_USERNAME = os.getenv("USAC_USERNAME", "") USAC_PASSWORD = os.getenv("USAC_PASSWORD", "") async def import_499a(filer_id: str) -> dict: """Import Form 499-A data from USAC E-File for a given filer ID. Returns a dict with entity info, service categories, and revenue lines, or an error dict if import fails. """ if not USAC_USERNAME or not USAC_PASSWORD: return {"success": False, "error": "USAC credentials not configured. Set USAC_USERNAME and USAC_PASSWORD."} from playwright.async_api import async_playwright result = { "success": False, "filer_id": filer_id, "entity": {}, "service_categories": [], "revenue_lines": [], "filing_year": None, "classification": {}, "error": None, } async with async_playwright() as pw: browser = await pw.chromium.launch(headless=True) context = await browser.new_context( viewport={"width": 1280, "height": 900}, user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", ) page = await context.new_page() try: # Step 1: Login to USAC E-File LOG.info("[%s] Navigating to USAC E-File login", filer_id) await page.goto(f"{USAC_URL}/login", wait_until="domcontentloaded", timeout=30000) await page.wait_for_timeout(2000) # Fill login form username_field = page.locator('input[name="username"], input[name="userId"], input[type="text"]').first password_field = page.locator('input[name="password"], input[type="password"]').first if await username_field.count() == 0 or await password_field.count() == 0: result["error"] = "Could not find USAC login form — site may have changed" return result await username_field.fill(USAC_USERNAME) await password_field.fill(USAC_PASSWORD) # Submit login submit_btn = page.locator('button[type="submit"], input[type="submit"], button:has-text("Log In"), button:has-text("Sign In")').first await submit_btn.click() await page.wait_for_timeout(5000) # Check login success if "login" in page.url.lower() or "error" in (await page.content()).lower()[:500]: result["error"] = "USAC login failed — check credentials or access delegation" return result LOG.info("[%s] Logged into USAC E-File", filer_id) # Step 2: Navigate to the filer's 499-A # USAC E-File has a filer search or direct URL pattern search_url = f"{USAC_URL}/Form499/search?filerId={filer_id}" await page.goto(search_url, wait_until="domcontentloaded", timeout=30000) await page.wait_for_timeout(3000) # Try to find the most recent 499-A filing # Look for filing year links or the form data directly page_text = await page.inner_text("body") # If we can see filer data, extract it if filer_id in page_text or "499-A" in page_text: LOG.info("[%s] Found filer data on USAC", filer_id) # Step 3: Extract entity information entity = await _extract_entity_info(page) result["entity"] = entity # Step 4: Extract service categories categories = await _extract_service_categories(page) result["service_categories"] = categories # Step 5: Extract revenue lines revenue = await _extract_revenue_lines(page) result["revenue_lines"] = revenue # Step 6: Determine filing year year_match = re.search(r"(?:20\d{2})\s*(?:Form|Filing|499)", page_text) result["filing_year"] = year_match.group(0)[:4] if year_match else str(datetime.now().year - 1) result["success"] = True LOG.info("[%s] Successfully imported 499-A data for year %s", filer_id, result["filing_year"]) else: # Try clicking into the filer's account filer_link = page.locator(f'a:has-text("{filer_id}"), td:has-text("{filer_id}")') if await filer_link.count() > 0: await filer_link.first.click() await page.wait_for_timeout(3000) entity = await _extract_entity_info(page) result["entity"] = entity categories = await _extract_service_categories(page) result["service_categories"] = categories revenue = await _extract_revenue_lines(page) result["revenue_lines"] = revenue result["success"] = True else: result["error"] = f"Filer ID {filer_id} not found on USAC. Verify access has been granted." except Exception as e: LOG.error("[%s] Import failed: %s", filer_id, e) result["error"] = str(e) finally: await browser.close() return result async def _extract_entity_info(page) -> dict: """Extract entity identification from the 499-A form page.""" entity = {} text = await page.inner_text("body") # Common patterns for USAC form fields patterns = { "legal_name": [r"Legal\s*Name[:\s]*([^\n]+)", r"Entity\s*Name[:\s]*([^\n]+)", r"Company\s*Name[:\s]*([^\n]+)"], "ein": [r"EIN[:\s]*([\d-]+)", r"Tax\s*ID[:\s]*([\d-]+)"], "frn": [r"FRN[:\s]*(\d{10})", r"FCC\s*Registration[:\s]*(\d{10})"], "filer_id": [r"499\s*(?:ID|Filer)[:\s]*(\d+)", r"Filer\s*(?:ID|Number)[:\s]*(\d+)"], "address": [r"(?:Principal|Business)\s*Address[:\s]*([^\n]+)"], "city": [r"City[:\s]*([A-Za-z\s]+?)(?:\s{2,}|,|\n)"], "state": [r"State[:\s]*([A-Z]{2})"], "zip": [r"(?:Zip|ZIP|Postal)[:\s]*(\d{5}(?:-\d{4})?)"], "contact_name": [r"Contact[:\s]*(?:Name)?[:\s]*([^\n,]+)"], "contact_phone": [r"(?:Contact\s*)?Phone[:\s]*([\d\(\)\-\s\+]+)"], "contact_email": [r"(?:Contact\s*)?Email[:\s]*([\w\.\-\+]+@[\w\.\-]+)"], } for field, pats in patterns.items(): for pat in pats: match = re.search(pat, text, re.IGNORECASE) if match: entity[field] = match.group(1).strip() break # Also try to extract from form inputs if the page has a form for field_id in ["legalName", "entityName", "ein", "frn", "address1", "city", "state", "zip"]: try: el = page.locator(f'input[name*="{field_id}" i], input[id*="{field_id}" i]') if await el.count() > 0: val = await el.first.input_value() if val: key = re.sub(r'([A-Z])', r'_\1', field_id).lower().strip('_') entity[key] = val except Exception: pass return entity async def _extract_service_categories(page) -> list[str]: """Extract service category selections (Line 105) from the form.""" categories = [] text = await page.inner_text("body") # Check for each known category in the page text or checked checkboxes category_map = { "interconnected_voip": ["Interconnected VoIP", "iVoIP"], "non_interconnected_voip": ["Non-Interconnected VoIP"], "clec": ["CLEC", "CAP/CLEC", "Competitive Local"], "ixc": ["IXC", "Interexchange"], "wireless": ["Cellular", "PCS", "SMR", "Wireless"], "prepaid": ["Prepaid"], "conferencing": ["Audio Bridging", "Conferencing"], "satellite": ["Satellite"], } for cat_id, keywords in category_map.items(): for kw in keywords: if kw.lower() in text.lower(): categories.append(cat_id) break # Also check checkboxes/radios on the form try: checked = await page.evaluate("""() => { const cats = []; document.querySelectorAll('input[type="checkbox"]:checked, input[type="radio"]:checked').forEach(el => { const label = el.closest('label')?.textContent || el.name || ''; if (label) cats.push(label.trim()); }); return cats; }""") for label in (checked or []): for cat_id, keywords in category_map.items(): if any(kw.lower() in label.lower() for kw in keywords) and cat_id not in categories: categories.append(cat_id) except Exception: pass return categories async def _extract_revenue_lines(page) -> list[dict]: """Extract revenue data from Blocks 3-4 of the 499-A form.""" revenue_lines = [] # Try to extract from table cells or form inputs # Revenue lines are numbered 303-314 (Block 3) and 403-418 (Block 4) for line_num in [303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414]: try: # Try input fields with line number for selector in [ f'input[name*="{line_num}"]', f'input[id*="line{line_num}"]', f'input[id*="Line{line_num}"]', f'td:has-text("Line {line_num}") + td input', ]: el = page.locator(selector) if await el.count() > 0: val = await el.first.input_value() if val and val.strip() and val != "0": revenue_lines.append({ "line": str(line_num), "value": float(val.replace(",", "").replace("$", "")), }) break except Exception: pass # Fallback: try to parse revenue from page text if not revenue_lines: text = await page.inner_text("body") for match in re.finditer(r"Line\s*(\d{3})[:\s]*\$?([\d,]+(?:\.\d{2})?)", text): line_num = match.group(1) value = float(match.group(2).replace(",", "")) if value > 0: revenue_lines.append({"line": line_num, "value": value}) return revenue_lines # ── CLI entry point ─────────────────────────────────────────────────── async def main(): parser = argparse.ArgumentParser(description="Import 499-A data from USAC E-File") parser.add_argument("--filer-id", required=True, help="USAC 499 Filer ID") args = parser.parse_args() result = await import_499a(args.filer_id) print(json.dumps(result, indent=2)) if not result["success"]: sys.exit(1) if __name__ == "__main__": asyncio.run(main())