new-site/scripts/workers/cdr_presets/_scrape_base.py
justin f8cd37ac8c Initial commit — Performance West telecom compliance platform
Includes: API (Express/TypeScript), Astro site, Python workers,
document generators, FCC compliance tools, Canada CRTC formation,
Ansible infrastructure, and deployment scripts.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-27 06:54:22 -05:00

86 lines
3.3 KiB
Python

"""Shared skeleton for Playwright-scrape presets.
Switches that don't expose an API for CDR export (Metaswitch iCM, Sansay
SSM, BroadWorks OCS web, some Cataleya SIP Navigator deployments) need a
login + download flow driven by Playwright. The selectors vary per
deployment version, so each subclass locks its own login URL and
post-login navigation; the base here handles the undetected browser
launch + download capture.
Until live recon finalizes the selectors, each concrete preset's
``fetch()`` raises NotImplementedError with a clear instruction; the
cdr_puller catches that, creates an admin ToDo, and the admin runs the
download manually (or a PW engineer extends the preset against the
specific deployment).
"""
from __future__ import annotations
import asyncio
import logging
from datetime import datetime
from typing import Iterable, Optional
from .base import BasePreset, CredentialField, FetchedFile
logger = logging.getLogger(__name__)
class ScrapePreset(BasePreset):
"""Skeleton for Playwright-driven presets."""
TRANSPORT_METHOD = "scrape"
LOGIN_URL: str = "" # subclass sets (e.g. https://icm.example/admin)
CDR_DOWNLOAD_URL: str = "" # subclass sets
CREDENTIAL_FIELDS = (
CredentialField("admin_url", "Web admin URL", "text",
help="Root URL of the management UI."),
CredentialField("username", "Admin username", "text"),
CredentialField("password", "Admin password", "password", sensitive=True),
)
async def _run_scrape(self, cfg: dict, secrets: dict, since: Optional[datetime]) -> Iterable[FetchedFile]:
"""Subclasses implement the Playwright flow here."""
raise NotImplementedError(
f"{self.__class__.__name__} requires live-session recon before it can "
"automate CDR download. Until selectors are locked, the puller will "
"file an admin ToDo instructing a human to export + upload manually."
)
def validate(self, profile_config: dict, secrets: dict) -> tuple[bool, str]:
# Without a concrete flow, we at least confirm the admin URL is reachable.
import urllib.request
url = profile_config.get("admin_url") or self.LOGIN_URL
if not url:
return False, "admin_url not configured"
try:
req = urllib.request.Request(url, method="HEAD")
with urllib.request.urlopen(req, timeout=15) as resp:
return True, f"admin URL reachable (HTTP {resp.status})"
except Exception as exc:
return False, f"admin URL unreachable: {exc}"
def fetch(
self,
profile_config: dict,
secrets: dict,
since: Optional[datetime],
) -> Iterable[FetchedFile]:
try:
loop = asyncio.new_event_loop()
try:
return list(loop.run_until_complete(
self._to_async_list(profile_config, secrets, since)
))
finally:
loop.close()
except NotImplementedError as exc:
logger.warning("%s: %s", self.__class__.__name__, exc)
raise
async def _to_async_list(self, cfg, secrets, since):
out: list[FetchedFile] = []
async for f in self._run_scrape(cfg, secrets, since):
out.append(f)
return out