feat(healthcare): route NPPES/PECOS Playwright flows through residential SOCKS proxy
CMS healthcare portals (NPPES, PECOS, I&A) block datacenter IPs, so the healthcare browser automation needs to egress via the residential proxy on hg409y7ez04.sn.mynetname.net (username 'performancewest'). - undetected_browser: use_proxy now accepts an env-var name, so callers can select a domain-specific proxy. _proxy_config(proxy_env) reads it and falls back to UNDETECTED_PROXY_URL. Healthcare uses 'HEALTHCARE_PROXY_URL'. - probe_npi_undetected: launches with use_proxy='HEALTHCARE_PROXY_URL' when set. - npi_provider: documents that the (future) automated NPPES/PECOS flows must use the healthcare proxy. - Plumb HEALTHCARE_PROXY_URL (+ UNDETECTED_PROXY_URL fallback) through the ansible env template and docker-compose workers env. The credential itself is NOT in the repo. Set the full URL in the ansible vault as vault_healthcare_proxy_url: socks5://performancewest:<password>@hg409y7ez04.sn.mynetname.net:<port> Verified parsing + Playwright proxy-dict wiring with a unit test.
This commit is contained in:
parent
bd9a70607f
commit
17318f6e7d
5 changed files with 70 additions and 10 deletions
|
|
@ -107,6 +107,10 @@ services:
|
||||||
- CRYPTO_SWEEP_ADMIN_EMAIL=${ADMIN_EMAIL:-ops@performancewest.net}
|
- CRYPTO_SWEEP_ADMIN_EMAIL=${ADMIN_EMAIL:-ops@performancewest.net}
|
||||||
- USAC_USERNAME=${USAC_USERNAME}
|
- USAC_USERNAME=${USAC_USERNAME}
|
||||||
- USAC_PASSWORD=${USAC_PASSWORD}
|
- USAC_PASSWORD=${USAC_PASSWORD}
|
||||||
|
# Residential SOCKS proxy for healthcare (NPPES/PECOS) Playwright flows.
|
||||||
|
# Username "performancewest"; full URL set in .env via the ansible vault.
|
||||||
|
- HEALTHCARE_PROXY_URL=${HEALTHCARE_PROXY_URL:-}
|
||||||
|
- UNDETECTED_PROXY_URL=${UNDETECTED_PROXY_URL:-}
|
||||||
- ANYTIME_MAILBOX_SIGNUP_EMAIL=${ANYTIME_MAILBOX_SIGNUP_EMAIL:-noreply@performancewest.net}
|
- ANYTIME_MAILBOX_SIGNUP_EMAIL=${ANYTIME_MAILBOX_SIGNUP_EMAIL:-noreply@performancewest.net}
|
||||||
- ANYTIME_MAILBOX_SIGNUP_PHONE=${ANYTIME_MAILBOX_SIGNUP_PHONE}
|
- ANYTIME_MAILBOX_SIGNUP_PHONE=${ANYTIME_MAILBOX_SIGNUP_PHONE}
|
||||||
- ANYTIME_MAILBOX_DEFAULT_PASSWORD=${ANYTIME_MAILBOX_DEFAULT_PASSWORD}
|
- ANYTIME_MAILBOX_DEFAULT_PASSWORD=${ANYTIME_MAILBOX_DEFAULT_PASSWORD}
|
||||||
|
|
|
||||||
|
|
@ -119,6 +119,16 @@ HESTIA_URL={{ vault_hestia_url | default('https://cp.carrierone.com:8083') }}
|
||||||
HESTIA_USER={{ vault_hestia_user | default('admin') }}
|
HESTIA_USER={{ vault_hestia_user | default('admin') }}
|
||||||
HESTIA_PASS={{ vault_hestia_pass | default('') }}
|
HESTIA_PASS={{ vault_hestia_pass | default('') }}
|
||||||
|
|
||||||
|
# ── Residential proxy (healthcare NPPES/PECOS automation) ────────────────────
|
||||||
|
# CMS healthcare portals (NPPES, PECOS, I&A) block datacenter IPs, so the
|
||||||
|
# Playwright healthcare flows route through a residential SOCKS proxy.
|
||||||
|
# Format: socks5://performancewest:<password>@hg409y7ez04.sn.mynetname.net:<port>
|
||||||
|
# (username is "performancewest"). Set the full URL in the ansible vault as
|
||||||
|
# vault_healthcare_proxy_url. Leave blank to run without a proxy.
|
||||||
|
# UNDETECTED_PROXY_URL is the generic fallback used by FCC/state flows.
|
||||||
|
HEALTHCARE_PROXY_URL={{ vault_healthcare_proxy_url | default('') }}
|
||||||
|
UNDETECTED_PROXY_URL={{ vault_undetected_proxy_url | default(vault_healthcare_proxy_url | default('')) }}
|
||||||
|
|
||||||
# ── Application URLs ──────────────────────────────────────────────────────────
|
# ── Application URLs ──────────────────────────────────────────────────────────
|
||||||
DOMAIN=https://{{ domain }}
|
DOMAIN=https://{{ domain }}
|
||||||
SITE_URL=https://{{ domain }}
|
SITE_URL=https://{{ domain }}
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ real endpoints and a fingerprint-detection page and prints what it sees.
|
||||||
Run: python3 scripts/probe_npi_undetected.py
|
Run: python3 scripts/probe_npi_undetected.py
|
||||||
"""
|
"""
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
sys.path.insert(0, "scripts")
|
sys.path.insert(0, "scripts")
|
||||||
|
|
@ -12,6 +13,12 @@ from workers.services.telecom.undetected_browser import ( # noqa: E402
|
||||||
undetected_browser, is_using_patchright,
|
undetected_browser, is_using_patchright,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Route healthcare (NPPES/PECOS/I&A) traffic through the residential SOCKS
|
||||||
|
# proxy (username "performancewest"). Set HEALTHCARE_PROXY_URL=1 (or any
|
||||||
|
# truthy value) to force it; the proxy is also used automatically whenever
|
||||||
|
# HEALTHCARE_PROXY_URL is configured with a real URL.
|
||||||
|
USE_HEALTHCARE_PROXY = bool(os.environ.get("HEALTHCARE_PROXY_URL", "").strip())
|
||||||
|
|
||||||
TARGETS = [
|
TARGETS = [
|
||||||
# NPPES public registry UI (where NPI lookups/updates happen)
|
# NPPES public registry UI (where NPI lookups/updates happen)
|
||||||
("NPPES registry", "https://npiregistry.cms.hhs.gov/"),
|
("NPPES registry", "https://npiregistry.cms.hhs.gov/"),
|
||||||
|
|
@ -28,7 +35,10 @@ SANNYSOFT = "https://bot.sannysoft.com/"
|
||||||
|
|
||||||
async def probe(headless: bool):
|
async def probe(headless: bool):
|
||||||
print(f"\n{'='*60}\nbackend = {'patchright' if is_using_patchright() else 'vanilla-playwright'} | headless={headless}\n{'='*60}")
|
print(f"\n{'='*60}\nbackend = {'patchright' if is_using_patchright() else 'vanilla-playwright'} | headless={headless}\n{'='*60}")
|
||||||
async with undetected_browser(headless=headless) as (ctx, page):
|
async with undetected_browser(
|
||||||
|
headless=headless,
|
||||||
|
use_proxy="HEALTHCARE_PROXY_URL" if USE_HEALTHCARE_PROXY else False,
|
||||||
|
) as (ctx, page):
|
||||||
# 1. navigator.webdriver + a couple of fingerprint signals
|
# 1. navigator.webdriver + a couple of fingerprint signals
|
||||||
try:
|
try:
|
||||||
await page.goto("about:blank")
|
await page.goto("about:blank")
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,11 @@ NPI + intake details for a human to file in CMS PECOS / NPPES. This mirrors the
|
||||||
FCC auto-filing-off safety default — no automated submission to government
|
FCC auto-filing-off safety default — no automated submission to government
|
||||||
portals until the Playwright flows are proven.
|
portals until the Playwright flows are proven.
|
||||||
|
|
||||||
|
When the Playwright NPPES/PECOS flows are enabled, they must route through the
|
||||||
|
residential SOCKS proxy (CMS blocks datacenter IPs) by launching with
|
||||||
|
``undetected_browser(use_proxy="HEALTHCARE_PROXY_URL")`` — the credential
|
||||||
|
(username ``performancewest``) is configured via HEALTHCARE_PROXY_URL in .env.
|
||||||
|
|
||||||
Covers slugs:
|
Covers slugs:
|
||||||
npi-revalidation Medicare PECOS revalidation (5-yr cycle)
|
npi-revalidation Medicare PECOS revalidation (5-yr cycle)
|
||||||
npi-reactivation reactivate a deactivated NPI
|
npi-reactivation reactivate a deactivated NPI
|
||||||
|
|
|
||||||
|
|
@ -13,8 +13,11 @@ etc.) should also use this helper — see ``docs/state-automation-status.md``
|
||||||
for the list.
|
for the list.
|
||||||
|
|
||||||
Optional residential proxy support: set ``UNDETECTED_PROXY_URL`` in the
|
Optional residential proxy support: set ``UNDETECTED_PROXY_URL`` in the
|
||||||
environment (e.g. ``http://user:pass@proxy.example.com:8080``) and pass
|
environment (e.g. ``socks5://user:pass@proxy.example.com:1080``) and pass
|
||||||
``use_proxy=True`` when launching.
|
``use_proxy=True`` when launching. Healthcare NPPES/PECOS flows use a
|
||||||
|
dedicated ``HEALTHCARE_PROXY_URL`` (residential SOCKS proxy, username
|
||||||
|
``performancewest``) via ``use_proxy="HEALTHCARE_PROXY_URL"``; it falls back
|
||||||
|
to ``UNDETECTED_PROXY_URL`` if the healthcare-specific var is unset.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
@ -87,9 +90,23 @@ if (originalQuery) {
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def _proxy_config() -> dict | None:
|
def _proxy_config(proxy_env: str = "UNDETECTED_PROXY_URL") -> dict | None:
|
||||||
"""Read UNDETECTED_PROXY_URL and turn it into a Playwright proxy dict."""
|
"""Read a proxy URL env var and turn it into a Playwright proxy dict.
|
||||||
url = os.environ.get("UNDETECTED_PROXY_URL", "").strip()
|
|
||||||
|
``proxy_env`` names the environment variable to read (default
|
||||||
|
``UNDETECTED_PROXY_URL``). Callers that need a dedicated upstream — e.g.
|
||||||
|
the healthcare NPPES/PECOS flows, which route through the residential
|
||||||
|
SOCKS proxy — pass ``use_proxy="HEALTHCARE_PROXY_URL"`` so the credential
|
||||||
|
is configured in exactly one place (the env / ansible vault).
|
||||||
|
|
||||||
|
The URL may be ``http://`` or ``socks5://`` and may embed credentials:
|
||||||
|
``socks5://user:pass@host:port``.
|
||||||
|
"""
|
||||||
|
url = os.environ.get(proxy_env, "").strip()
|
||||||
|
# Allow a single shared residential proxy to back several domain-specific
|
||||||
|
# env names: if the requested var is unset, fall back to the generic one.
|
||||||
|
if not url and proxy_env != "UNDETECTED_PROXY_URL":
|
||||||
|
url = os.environ.get("UNDETECTED_PROXY_URL", "").strip()
|
||||||
if not url:
|
if not url:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
@ -113,13 +130,17 @@ async def launch_context(
|
||||||
playwright: "Playwright",
|
playwright: "Playwright",
|
||||||
*,
|
*,
|
||||||
headless: bool = True,
|
headless: bool = True,
|
||||||
use_proxy: bool = False,
|
use_proxy: "bool | str" = False,
|
||||||
timezone_id: str = "America/New_York",
|
timezone_id: str = "America/New_York",
|
||||||
locale: str = "en-US",
|
locale: str = "en-US",
|
||||||
storage_state: str | None = None,
|
storage_state: str | None = None,
|
||||||
) -> "tuple[Browser, BrowserContext]":
|
) -> "tuple[Browser, BrowserContext]":
|
||||||
"""Launch a Chromium browser + context with stealth settings.
|
"""Launch a Chromium browser + context with stealth settings.
|
||||||
|
|
||||||
|
``use_proxy`` may be ``True`` (read ``UNDETECTED_PROXY_URL``) or the name
|
||||||
|
of a specific env var, e.g. ``"HEALTHCARE_PROXY_URL"`` for the residential
|
||||||
|
proxy used by the NPPES/PECOS healthcare flows.
|
||||||
|
|
||||||
Returns ``(browser, context)`` — caller is responsible for closing both
|
Returns ``(browser, context)`` — caller is responsible for closing both
|
||||||
(prefer the :func:`undetected_browser` context manager instead).
|
(prefer the :func:`undetected_browser` context manager instead).
|
||||||
"""
|
"""
|
||||||
|
|
@ -148,12 +169,18 @@ async def launch_context(
|
||||||
"java_script_enabled": True,
|
"java_script_enabled": True,
|
||||||
}
|
}
|
||||||
if use_proxy:
|
if use_proxy:
|
||||||
proxy = _proxy_config()
|
proxy_env = use_proxy if isinstance(use_proxy, str) else "UNDETECTED_PROXY_URL"
|
||||||
|
proxy = _proxy_config(proxy_env)
|
||||||
if proxy:
|
if proxy:
|
||||||
context_kwargs["proxy"] = proxy
|
context_kwargs["proxy"] = proxy
|
||||||
|
logger.info(
|
||||||
|
"undetected_browser: routing through proxy %s (from %s)",
|
||||||
|
proxy.get("server"),
|
||||||
|
proxy_env,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"undetected_browser: use_proxy=True but UNDETECTED_PROXY_URL is unset"
|
"undetected_browser: use_proxy set but %s is unset", proxy_env
|
||||||
)
|
)
|
||||||
if storage_state:
|
if storage_state:
|
||||||
context_kwargs["storage_state"] = storage_state
|
context_kwargs["storage_state"] = storage_state
|
||||||
|
|
@ -170,13 +197,17 @@ async def launch_context(
|
||||||
async def undetected_browser(
|
async def undetected_browser(
|
||||||
*,
|
*,
|
||||||
headless: bool = True,
|
headless: bool = True,
|
||||||
use_proxy: bool = False,
|
use_proxy: "bool | str" = False,
|
||||||
timezone_id: str = "America/New_York",
|
timezone_id: str = "America/New_York",
|
||||||
locale: str = "en-US",
|
locale: str = "en-US",
|
||||||
storage_state: str | None = None,
|
storage_state: str | None = None,
|
||||||
) -> AsyncIterator["tuple[BrowserContext, Page]"]:
|
) -> AsyncIterator["tuple[BrowserContext, Page]"]:
|
||||||
"""Async context manager yielding a (context, page) pair.
|
"""Async context manager yielding a (context, page) pair.
|
||||||
|
|
||||||
|
``use_proxy`` may be ``True`` (read ``UNDETECTED_PROXY_URL``) or the name
|
||||||
|
of a specific env var, e.g. ``"HEALTHCARE_PROXY_URL"`` for the residential
|
||||||
|
proxy the NPPES/PECOS flows route through.
|
||||||
|
|
||||||
Example::
|
Example::
|
||||||
|
|
||||||
async with undetected_browser(headless=False) as (ctx, page):
|
async with undetected_browser(headless=False) as (ctx, page):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue