"""Probe whether our undetected (patchright) browser can reach NPPES / PECOS and how detectable it looks. Honest, no assertions from memory — it visits real endpoints and a fingerprint-detection page and prints what it sees. Run: python3 scripts/probe_npi_undetected.py """ import asyncio import os import sys sys.path.insert(0, "scripts") from workers.services.telecom.undetected_browser import ( # noqa: E402 undetected_browser, is_using_patchright, ) # Route healthcare (NPPES/PECOS/I&A) traffic through the residential SOCKS # proxy (username "performancewest"). Set HEALTHCARE_PROXY_URL=1 (or any # truthy value) to force it; the proxy is also used automatically whenever # HEALTHCARE_PROXY_URL is configured with a real URL. USE_HEALTHCARE_PROXY = bool(os.environ.get("HEALTHCARE_PROXY_URL", "").strip()) TARGETS = [ # NPPES public registry UI (where NPI lookups/updates happen) ("NPPES registry", "https://npiregistry.cms.hhs.gov/"), # NPPES public API (already used by our free tool — sanity check) ("NPPES API", "https://npiregistry.cms.hhs.gov/api/?version=2.1&number=1234567893"), # PECOS / I&A login surface (Identity & Access) ("PECOS portal", "https://pecos.cms.hhs.gov/pecos/login.do"), ("I&A portal", "https://nppes.cms.hhs.gov/IAWeb/login.do"), ] # Public bot-detection fingerprint check. SANNYSOFT = "https://bot.sannysoft.com/" async def probe(headless: bool): print(f"\n{'='*60}\nbackend = {'patchright' if is_using_patchright() else 'vanilla-playwright'} | headless={headless}\n{'='*60}") async with undetected_browser( headless=headless, use_proxy="HEALTHCARE_PROXY_URL" if USE_HEALTHCARE_PROXY else False, ) as (ctx, page): # 1. navigator.webdriver + a couple of fingerprint signals try: await page.goto("about:blank") fp = await page.evaluate("""() => ({ webdriver: navigator.webdriver, plugins: navigator.plugins.length, languages: navigator.languages, chrome: typeof window.chrome, ua: navigator.userAgent, })""") print("fingerprint:", fp) except Exception as e: print("fingerprint eval failed:", e) # 2. real target reachability for name, url in TARGETS: try: resp = await page.goto(url, wait_until="domcontentloaded", timeout=30000) status = resp.status if resp else "?" title = await page.title() body = (await page.content())[:400].lower() blocked = any(w in body for w in [ "access denied", "are you a human", "captcha", "blocked", "incapsula", "akamai", "unusual traffic", "request unsuccessful", ]) print(f" [{status}] {name:14} blocked={blocked} title={title[:60]!r}") except Exception as e: print(f" [ERR] {name:14} {type(e).__name__}: {str(e)[:80]}") # 3. sannysoft fingerprint scorecard (count red FAILs) try: await page.goto(SANNYSOFT, wait_until="networkidle", timeout=30000) await asyncio.sleep(2) fails = await page.evaluate("""() => { const rows = [...document.querySelectorAll('tr')]; const bad = []; for (const r of rows) { const cls = r.className || ''; const txt = r.innerText.replace(/\\s+/g,' ').trim(); if (/fail|warn/i.test(cls)) bad.push(txt.slice(0,80)); } return bad; }""") if fails: print(f" sannysoft FAIL/WARN rows ({len(fails)}):") for f in fails: print(f" - {f}") else: print(" sannysoft: no FAIL/WARN rows detected (clean)") except Exception as e: print(" sannysoft check failed:", e) async def main(): await probe(headless=True) if __name__ == "__main__": asyncio.run(main())