new-site/scripts/probe_npi_undetected.py
justin 5cfe9702e2 Add Healthcare/NPI section to nav dropdown across all static pages
The site's pre-rendered public/**/index.html pages each embed their own copy
of the Services mega-dropdown and do not read src/partials/nav.html, so the
earlier nav.html-only edit never appeared. inject_healthcare_nav.py adds the
canonical Healthcare block (Medicare Revalidation, Medicare Enrollment, NPI/
NPPES Services, free NPI Compliance Check) to the desktop Column 3 + mobile
menu of all 80 static pages. Idempotent.
2026-06-05 03:05:19 -05:00

90 lines
3.6 KiB
Python

"""Probe whether our undetected (patchright) browser can reach NPPES / PECOS
and how detectable it looks. Honest, no assertions from memory — it visits
real endpoints and a fingerprint-detection page and prints what it sees.
Run: python3 scripts/probe_npi_undetected.py
"""
import asyncio
import sys
sys.path.insert(0, "scripts")
from workers.services.telecom.undetected_browser import ( # noqa: E402
undetected_browser, is_using_patchright,
)
TARGETS = [
# NPPES public registry UI (where NPI lookups/updates happen)
("NPPES registry", "https://npiregistry.cms.hhs.gov/"),
# NPPES public API (already used by our free tool — sanity check)
("NPPES API", "https://npiregistry.cms.hhs.gov/api/?version=2.1&number=1234567893"),
# PECOS / I&A login surface (Identity & Access)
("PECOS portal", "https://pecos.cms.hhs.gov/pecos/login.do"),
("I&A portal", "https://nppes.cms.hhs.gov/IAWeb/login.do"),
]
# Public bot-detection fingerprint check.
SANNYSOFT = "https://bot.sannysoft.com/"
async def probe(headless: bool):
print(f"\n{'='*60}\nbackend = {'patchright' if is_using_patchright() else 'vanilla-playwright'} | headless={headless}\n{'='*60}")
async with undetected_browser(headless=headless) as (ctx, page):
# 1. navigator.webdriver + a couple of fingerprint signals
try:
await page.goto("about:blank")
fp = await page.evaluate("""() => ({
webdriver: navigator.webdriver,
plugins: navigator.plugins.length,
languages: navigator.languages,
chrome: typeof window.chrome,
ua: navigator.userAgent,
})""")
print("fingerprint:", fp)
except Exception as e:
print("fingerprint eval failed:", e)
# 2. real target reachability
for name, url in TARGETS:
try:
resp = await page.goto(url, wait_until="domcontentloaded", timeout=30000)
status = resp.status if resp else "?"
title = await page.title()
body = (await page.content())[:400].lower()
blocked = any(w in body for w in [
"access denied", "are you a human", "captcha", "blocked",
"incapsula", "akamai", "unusual traffic", "request unsuccessful",
])
print(f" [{status}] {name:14} blocked={blocked} title={title[:60]!r}")
except Exception as e:
print(f" [ERR] {name:14} {type(e).__name__}: {str(e)[:80]}")
# 3. sannysoft fingerprint scorecard (count red FAILs)
try:
await page.goto(SANNYSOFT, wait_until="networkidle", timeout=30000)
await asyncio.sleep(2)
fails = await page.evaluate("""() => {
const rows = [...document.querySelectorAll('tr')];
const bad = [];
for (const r of rows) {
const cls = r.className || '';
const txt = r.innerText.replace(/\\s+/g,' ').trim();
if (/fail|warn/i.test(cls)) bad.push(txt.slice(0,80));
}
return bad;
}""")
if fails:
print(f" sannysoft FAIL/WARN rows ({len(fails)}):")
for f in fails:
print(f" - {f}")
else:
print(" sannysoft: no FAIL/WARN rows detected (clean)")
except Exception as e:
print(" sannysoft check failed:", e)
async def main():
await probe(headless=True)
if __name__ == "__main__":
asyncio.run(main())