new-site/scripts/formation/base.py
justin f8cd37ac8c Initial commit — Performance West telecom compliance platform
Includes: API (Express/TypeScript), Astro site, Python workers,
document generators, FCC compliance tools, Canada CRTC formation,
Ansible infrastructure, and deployment scripts.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-27 06:54:22 -05:00

388 lines
14 KiB
Python

"""
Base class for state Secretary of State portal automation.
Each state adapter inherits from StatePortal and implements:
- search_name() -> Check business name availability
- file_llc() -> File LLC Articles of Organization
- file_corporation() -> File Articles of Incorporation
- check_status() -> Check filing status
- download_docs() -> Download filed documents
All state adapters use Playwright for browser automation.
The base class provides shared utilities: screenshot capture, retry logic,
CAPTCHA detection, error reporting, and state-specific delay injection
(to appear human-paced).
"""
from __future__ import annotations
import asyncio
import json
import logging
import os
import random
import time
from abc import ABC, abstractmethod
from dataclasses import dataclass, field, asdict
from datetime import datetime
from enum import Enum
from pathlib import Path
from typing import Optional
from playwright.async_api import Browser, BrowserContext, Page
# Undetected Playwright launcher (patchright + stealth fallback). Shared with
# the FCC / USAC / BDC compliance filing handlers.
from scripts.workers.services.telecom.undetected_browser import (
launch_context as _undetected_launch_context,
)
# Keep async_playwright import available for backwards compat (tests may patch
# this symbol). When the helper is in use, prefer the shared launcher.
try:
from patchright.async_api import async_playwright # type: ignore
except ImportError:
from playwright.async_api import async_playwright # type: ignore
LOG = logging.getLogger("formation")
SCREENSHOTS_DIR = Path(os.getenv("SCREENSHOTS_DIR", "/tmp/formation-screenshots"))
SCREENSHOTS_DIR.mkdir(parents=True, exist_ok=True)
class EntityType(str, Enum):
LLC = "llc"
CORPORATION = "corporation"
S_CORP = "s_corp" # Corp + IRS 2553 election
class FilingStatus(str, Enum):
PENDING = "pending"
NAME_AVAILABLE = "name_available"
NAME_UNAVAILABLE = "name_unavailable"
SUBMITTED = "submitted"
PROCESSING = "processing"
FILED = "filed"
REJECTED = "rejected"
ERROR = "error"
@dataclass
class NameSearchResult:
available: bool
exact_match: bool = False
similar_names: list[str] = field(default_factory=list)
state_code: str = ""
searched_name: str = ""
timestamp: str = field(default_factory=lambda: datetime.utcnow().isoformat())
raw_response: str = ""
@dataclass
class Member:
name: str
address: str
city: str
state: str
zip_code: str
title: str = "Member" # Member, Manager, Organizer, Director, etc.
ownership_pct: float = 0.0
is_organizer: bool = False # Signs the formation docs
@dataclass
class FormationOrder:
"""All information needed to file a business entity in any state."""
order_id: str
state_code: str
entity_type: EntityType
entity_name: str
entity_name_alt: str = "" # Backup name if primary unavailable
# Management
management_type: str = "member_managed" # member_managed or manager_managed (LLC)
purpose: str = "Any lawful business activity"
# People
members: list[Member] = field(default_factory=list)
registered_agent_name: str = "Northwest Registered Agent"
registered_agent_address: str = "" # Populated per-state from NW RA
# Addresses
principal_address: str = ""
principal_city: str = ""
principal_state: str = ""
principal_zip: str = ""
mailing_address: str = ""
mailing_city: str = ""
mailing_state: str = ""
mailing_zip: str = ""
# Corp-specific
shares_authorized: int = 10000 # Default for corp formation (BC flat fee, no per-share cost)
par_value: float = 0.0 # 0 = no par value
fiscal_year_end: str = "12/31"
# Regulatory contact (for CRTC letter — populated from provisioned Canadian identity)
regulatory_contact_name: str = "Regulatory Director"
regulatory_contact_email: str = "" # regulatory@{.ca domain}
regulatory_contact_phone: str = "" # Canadian DID from Flowroute
# Options
expedited: bool = False
effective_date: str = "" # Empty = immediate, else future date
# Payment (Relay virtual debit card — loaded from ERPNext Sensitive ID at runtime)
payment_card_number: str = "" # Populated by worker before filing
payment_card_exp: str = "" # MM/YY
payment_card_cvv: str = ""
payment_card_name: str = "Performance West Inc"
payment_card_zip: str = "82001" # Cheyenne, WY billing zip
# Results (populated during filing)
status: FilingStatus = FilingStatus.PENDING
state_filing_number: str = ""
filed_at: str = ""
confirmation_number: str = ""
documents: list[str] = field(default_factory=list) # File paths
error_message: str = ""
@dataclass
class FilingResult:
success: bool
status: FilingStatus
state_code: str
entity_name: str
filing_number: str = ""
confirmation_number: str = ""
error_message: str = ""
screenshot_path: str = ""
documents: list[str] = field(default_factory=list)
timestamp: str = field(default_factory=lambda: datetime.utcnow().isoformat())
def to_dict(self) -> dict:
return asdict(self)
class StatePortal(ABC):
"""Base class for all state SOS portal automations."""
STATE_CODE: str = ""
STATE_NAME: str = ""
PORTAL_NAME: str = ""
PORTAL_URL: str = ""
SUPPORTS_LLC: bool = True
SUPPORTS_CORP: bool = True
SUPPORTS_ONLINE_FILING: bool = True
SUPPORTS_NAME_SEARCH: bool = True
# NW Registered Agent address for this state (populated by subclass)
NWRA_ADDRESS: str = ""
NWRA_CITY: str = ""
NWRA_STATE: str = ""
NWRA_ZIP: str = ""
def __init__(self):
self.browser: Optional[Browser] = None
self.context: Optional[BrowserContext] = None
self.page: Optional[Page] = None
self.log = logging.getLogger(f"formation.{self.STATE_CODE}")
async def start_browser(self, headless: bool = True) -> Page:
"""Launch browser with undetected/stealth settings.
Uses the shared patchright-based launcher in
``scripts/workers/services/telecom/undetected_browser.py`` so that
state SoS portals and FCC/USAC filing handlers share one stealth
implementation.
"""
pw = await async_playwright().start()
self.browser, self.context = await _undetected_launch_context(
pw,
headless=headless,
timezone_id="America/Denver",
)
self.page = await self.context.new_page()
return self.page
async def close_browser(self):
"""Shut down browser."""
if self.context:
await self.context.close()
if self.browser:
await self.browser.close()
async def screenshot(self, label: str) -> str:
"""Capture screenshot for debugging/audit trail."""
if not self.page:
return ""
ts = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
path = SCREENSHOTS_DIR / f"{self.STATE_CODE}_{label}_{ts}.png"
await self.page.screenshot(path=str(path), full_page=True)
self.log.info("Screenshot saved: %s", path)
return str(path)
async def human_delay(self, min_s: float = 1.0, max_s: float = 3.0):
"""Random delay to appear human."""
delay = random.uniform(min_s, max_s)
await asyncio.sleep(delay)
async def type_slowly(self, selector: str, text: str, delay_ms: int = 50):
"""Type text character by character with random delays."""
if not self.page:
return
await self.page.click(selector)
for char in text:
await self.page.type(selector, char, delay=delay_ms + random.randint(0, 30))
async def safe_click(self, selector: str, timeout: int = 10000):
"""Click an element with wait and error handling."""
if not self.page:
return
await self.page.wait_for_selector(selector, timeout=timeout)
await self.human_delay(0.3, 0.8)
await self.page.click(selector)
async def detect_captcha(self) -> bool:
"""Check if a CAPTCHA is present on the page."""
if not self.page:
return False
captcha_selectors = [
"iframe[src*='recaptcha']",
"iframe[src*='hcaptcha']",
".g-recaptcha",
".h-captcha",
"#captcha",
"[class*='captcha']",
"iframe[src*='challenge']",
]
for sel in captcha_selectors:
try:
el = await self.page.query_selector(sel)
if el:
self.log.warning("CAPTCHA detected: %s", sel)
return True
except Exception:
pass
return False
# --- Abstract methods — each state implements these ---
@abstractmethod
async def search_name(self, name: str) -> NameSearchResult:
"""Search for business name availability in this state."""
...
@abstractmethod
async def file_llc(self, order: FormationOrder) -> FilingResult:
"""File LLC Articles of Organization."""
...
@abstractmethod
async def file_corporation(self, order: FormationOrder) -> FilingResult:
"""File Articles of Incorporation."""
...
async def file_entity(self, order: FormationOrder) -> FilingResult:
"""Route to correct filing method based on entity type."""
if order.entity_type in (EntityType.LLC,):
return await self.file_llc(order)
elif order.entity_type in (EntityType.CORPORATION, EntityType.S_CORP):
return await self.file_corporation(order)
else:
return FilingResult(
success=False,
status=FilingStatus.ERROR,
state_code=self.STATE_CODE,
entity_name=order.entity_name,
error_message=f"Unsupported entity type: {order.entity_type}",
)
async def check_status(self, filing_number: str) -> FilingStatus:
"""Check the status of a previously submitted filing."""
self.log.warning("check_status not implemented for %s", self.STATE_CODE)
return FilingStatus.PENDING
async def enter_payment(
self,
order: FormationOrder,
selectors: dict[str, str],
) -> bool:
"""Enter Relay virtual debit card payment on a state portal payment form.
Common payment form selectors (vary by state, passed from config):
card_number_field, card_exp_field, card_cvv_field,
card_name_field, card_zip_field, submit_payment_btn
Args:
order: FormationOrder with payment card details populated
selectors: Dict of CSS selectors for the payment form fields
Returns:
True if payment fields were filled and submitted successfully.
"""
if not self.page:
self.log.error("No browser page open for payment")
return False
if not order.payment_card_number:
self.log.error("No payment card number on order — card not loaded from ERPNext")
return False
await self.screenshot("payment_before")
self.log.info("Entering payment for %s ($%.2f)",
order.entity_name,
order.status) # Amount would come from state fee
try:
# Card number
if selectors.get("card_number_field"):
await self.type_slowly(selectors["card_number_field"], order.payment_card_number, delay_ms=40)
await self.human_delay(0.3, 0.6)
# Expiration (some states split into month/year, some have one field)
if selectors.get("card_exp_field"):
await self.type_slowly(selectors["card_exp_field"], order.payment_card_exp, delay_ms=40)
await self.human_delay(0.2, 0.5)
elif selectors.get("card_exp_month_field") and selectors.get("card_exp_year_field"):
month, year = order.payment_card_exp.split("/")
await self.page.select_option(selectors["card_exp_month_field"], month.strip())
await self.page.select_option(selectors["card_exp_year_field"], year.strip())
await self.human_delay(0.2, 0.5)
# CVV
if selectors.get("card_cvv_field"):
await self.type_slowly(selectors["card_cvv_field"], order.payment_card_cvv, delay_ms=40)
await self.human_delay(0.2, 0.5)
# Name on card
if selectors.get("card_name_field"):
await self.type_slowly(selectors["card_name_field"], order.payment_card_name, delay_ms=30)
await self.human_delay(0.2, 0.5)
# Billing ZIP
if selectors.get("card_zip_field"):
await self.type_slowly(selectors["card_zip_field"], order.payment_card_zip, delay_ms=30)
await self.human_delay(0.2, 0.5)
await self.screenshot("payment_filled")
# Submit payment
if selectors.get("submit_payment_btn"):
await self.safe_click(selectors["submit_payment_btn"])
await self.page.wait_for_load_state("networkidle", timeout=30000)
await self.human_delay(2.0, 4.0) # Payment processing delay
await self.screenshot("payment_after")
self.log.info("Payment submitted for %s", order.entity_name)
return True
except Exception as e:
self.log.error("Payment entry failed: %s", e)
await self.screenshot("payment_error")
return False
async def download_docs(self, filing_number: str) -> list[str]:
"""Download filed documents. Returns list of file paths."""
self.log.warning("download_docs not implemented for %s", self.STATE_CODE)
return []