"""Undetected Playwright launcher for FCC / USAC / BDC portals. All automated filing handlers (RMD, CPNI, Form 499-A, BDC) go through this helper instead of importing ``playwright`` directly. We prefer ``patchright`` (a drop-in Playwright replacement that patches ``navigator.webdriver``, CDP leakage, runtime-enable fingerprints, and the ``--disable-blink-features= AutomationControlled`` artifact) and fall back to vanilla Playwright with the same stealth init scripts we use in ``scripts/formation/base.py`` if patchright is not installed. State formation portals that sit behind Incapsula/Akamai (Nevada, Delaware, etc.) should also use this helper — see ``docs/state-automation-status.md`` for the list. Optional residential proxy support: set ``UNDETECTED_PROXY_URL`` in the environment (e.g. ``http://user:pass@proxy.example.com:8080``) and pass ``use_proxy=True`` when launching. """ from __future__ import annotations import logging import os import random from contextlib import asynccontextmanager from typing import TYPE_CHECKING, AsyncIterator if TYPE_CHECKING: from playwright.async_api import Browser, BrowserContext, Page, Playwright logger = logging.getLogger(__name__) # Prefer patchright; fall back to playwright with manual stealth patches. _USING_PATCHRIGHT = False try: from patchright.async_api import async_playwright # type: ignore _USING_PATCHRIGHT = True logger.info("undetected_browser: using patchright") except ImportError: from playwright.async_api import async_playwright # type: ignore logger.warning( "undetected_browser: patchright not installed — falling back to " "vanilla playwright with home-grown stealth patches. Install with " "`pip install patchright` and run `patchright install chromium` " "for best results against bot-detection-heavy portals." ) # Common modern Chrome UAs. We rotate between a handful so that a burst of # concurrent submissions doesn't all look like the same client. _USER_AGENTS = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36", ] # Common viewports; mildly jittered per-launch to vary fingerprint. _VIEWPORTS = [ {"width": 1280, "height": 900}, {"width": 1440, "height": 900}, {"width": 1536, "height": 864}, {"width": 1920, "height": 1080}, ] # Init script run on every page — only used on the vanilla-playwright path; # patchright handles all of these patches (and many more) internally. _STEALTH_INIT_SCRIPT = """ Object.defineProperty(navigator, 'webdriver', { get: () => undefined }); Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5].map(() => ({ name: 'Chrome PDF Plugin' })), }); Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] }); window.chrome = { runtime: {} }; const originalQuery = window.navigator.permissions && window.navigator.permissions.query; if (originalQuery) { window.navigator.permissions.query = (parameters) => parameters.name === 'notifications' ? Promise.resolve({ state: Notification.permission }) : originalQuery(parameters); } """ def _proxy_config() -> dict | None: """Read UNDETECTED_PROXY_URL and turn it into a Playwright proxy dict.""" url = os.environ.get("UNDETECTED_PROXY_URL", "").strip() if not url: return None # Playwright's proxy dict supports: server, username, password, bypass from urllib.parse import urlparse parsed = urlparse(url) server = f"{parsed.scheme}://{parsed.hostname}" if parsed.port: server += f":{parsed.port}" cfg: dict = {"server": server} if parsed.username: cfg["username"] = parsed.username if parsed.password: cfg["password"] = parsed.password return cfg async def launch_context( playwright: "Playwright", *, headless: bool = True, use_proxy: bool = False, timezone_id: str = "America/New_York", locale: str = "en-US", storage_state: str | None = None, ) -> "tuple[Browser, BrowserContext]": """Launch a Chromium browser + context with stealth settings. Returns ``(browser, context)`` — caller is responsible for closing both (prefer the :func:`undetected_browser` context manager instead). """ launch_args = [ "--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", ] # On the vanilla-playwright path we add the extra flag that hides the # AutomationControlled fingerprint. Patchright already does this (and # adding the flag with patchright is harmless but redundant). if not _USING_PATCHRIGHT: launch_args.append("--disable-blink-features=AutomationControlled") browser = await playwright.chromium.launch( headless=headless, args=launch_args, ) context_kwargs: dict = { "viewport": random.choice(_VIEWPORTS), "user_agent": random.choice(_USER_AGENTS), "locale": locale, "timezone_id": timezone_id, "java_script_enabled": True, } if use_proxy: proxy = _proxy_config() if proxy: context_kwargs["proxy"] = proxy else: logger.warning( "undetected_browser: use_proxy=True but UNDETECTED_PROXY_URL is unset" ) if storage_state: context_kwargs["storage_state"] = storage_state context = await browser.new_context(**context_kwargs) if not _USING_PATCHRIGHT: await context.add_init_script(_STEALTH_INIT_SCRIPT) return browser, context @asynccontextmanager async def undetected_browser( *, headless: bool = True, use_proxy: bool = False, timezone_id: str = "America/New_York", locale: str = "en-US", storage_state: str | None = None, ) -> AsyncIterator["tuple[BrowserContext, Page]"]: """Async context manager yielding a (context, page) pair. Example:: async with undetected_browser(headless=False) as (ctx, page): await page.goto("https://apps.fcc.gov/rmd/") ... """ async with async_playwright() as pw: browser, context = await launch_context( pw, headless=headless, use_proxy=use_proxy, timezone_id=timezone_id, locale=locale, storage_state=storage_state, ) try: page = await context.new_page() yield context, page finally: await context.close() await browser.close() # ─── Human-like interaction helpers (lifted from scripts/formation/base.py) ── async def human_delay(min_s: float = 1.0, max_s: float = 3.0) -> None: """Random delay to appear human. Mirrors the formation base helper.""" import asyncio await asyncio.sleep(random.uniform(min_s, max_s)) async def type_slowly(page: "Page", selector: str, text: str, delay_ms: int = 50) -> None: """Type text character-by-character with jitter.""" await page.click(selector) for char in text: await page.type(selector, char, delay=delay_ms + random.randint(0, 30)) def is_using_patchright() -> bool: """Return True if patchright is the active backend.""" return _USING_PATCHRIGHT