""" Base class for state Secretary of State portal automation. Each state adapter inherits from StatePortal and implements: - search_name() -> Check business name availability - file_llc() -> File LLC Articles of Organization - file_corporation() -> File Articles of Incorporation - check_status() -> Check filing status - download_docs() -> Download filed documents All state adapters use Playwright for browser automation. The base class provides shared utilities: screenshot capture, retry logic, CAPTCHA detection, error reporting, and state-specific delay injection (to appear human-paced). """ from __future__ import annotations import asyncio import json import logging import os import random import time from abc import ABC, abstractmethod from dataclasses import dataclass, field, asdict from datetime import datetime from enum import Enum from pathlib import Path from typing import Optional from playwright.async_api import Browser, BrowserContext, Page # Undetected Playwright launcher (patchright + stealth fallback). Shared with # the FCC / USAC / BDC compliance filing handlers. from scripts.workers.services.telecom.undetected_browser import ( launch_context as _undetected_launch_context, ) # Keep async_playwright import available for backwards compat (tests may patch # this symbol). When the helper is in use, prefer the shared launcher. try: from patchright.async_api import async_playwright # type: ignore except ImportError: from playwright.async_api import async_playwright # type: ignore LOG = logging.getLogger("formation") SCREENSHOTS_DIR = Path(os.getenv("SCREENSHOTS_DIR", "/tmp/formation-screenshots")) SCREENSHOTS_DIR.mkdir(parents=True, exist_ok=True) class EntityType(str, Enum): LLC = "llc" CORPORATION = "corporation" S_CORP = "s_corp" # Corp + IRS 2553 election class FilingStatus(str, Enum): PENDING = "pending" NAME_AVAILABLE = "name_available" NAME_UNAVAILABLE = "name_unavailable" SUBMITTED = "submitted" PROCESSING = "processing" FILED = "filed" REJECTED = "rejected" ERROR = "error" @dataclass class NameSearchResult: available: bool exact_match: bool = False similar_names: list[str] = field(default_factory=list) state_code: str = "" searched_name: str = "" timestamp: str = field(default_factory=lambda: datetime.utcnow().isoformat()) raw_response: str = "" @dataclass class Member: name: str address: str city: str state: str zip_code: str title: str = "Member" # Member, Manager, Organizer, Director, etc. ownership_pct: float = 0.0 is_organizer: bool = False # Signs the formation docs @dataclass class FormationOrder: """All information needed to file a business entity in any state.""" order_id: str state_code: str entity_type: EntityType entity_name: str entity_name_alt: str = "" # Backup name if primary unavailable # Management management_type: str = "member_managed" # member_managed or manager_managed (LLC) purpose: str = "Any lawful business activity" # People members: list[Member] = field(default_factory=list) registered_agent_name: str = "Northwest Registered Agent" registered_agent_address: str = "" # Populated per-state from NW RA # Addresses principal_address: str = "" principal_city: str = "" principal_state: str = "" principal_zip: str = "" mailing_address: str = "" mailing_city: str = "" mailing_state: str = "" mailing_zip: str = "" # Corp-specific shares_authorized: int = 10000 # Default for corp formation (BC flat fee, no per-share cost) par_value: float = 0.0 # 0 = no par value fiscal_year_end: str = "12/31" # Regulatory contact (for CRTC letter — populated from provisioned Canadian identity) regulatory_contact_name: str = "Regulatory Director" regulatory_contact_email: str = "" # regulatory@{.ca domain} regulatory_contact_phone: str = "" # Canadian DID from Flowroute # Options expedited: bool = False effective_date: str = "" # Empty = immediate, else future date # Payment (Relay virtual debit card — loaded from ERPNext Sensitive ID at runtime) payment_card_number: str = "" # Populated by worker before filing payment_card_exp: str = "" # MM/YY payment_card_cvv: str = "" payment_card_name: str = "Performance West Inc" payment_card_zip: str = "82001" # Cheyenne, WY billing zip # Results (populated during filing) status: FilingStatus = FilingStatus.PENDING state_filing_number: str = "" filed_at: str = "" confirmation_number: str = "" documents: list[str] = field(default_factory=list) # File paths error_message: str = "" @dataclass class FilingResult: success: bool status: FilingStatus state_code: str entity_name: str filing_number: str = "" confirmation_number: str = "" error_message: str = "" screenshot_path: str = "" documents: list[str] = field(default_factory=list) timestamp: str = field(default_factory=lambda: datetime.utcnow().isoformat()) def to_dict(self) -> dict: return asdict(self) class StatePortal(ABC): """Base class for all state SOS portal automations.""" STATE_CODE: str = "" STATE_NAME: str = "" PORTAL_NAME: str = "" PORTAL_URL: str = "" SUPPORTS_LLC: bool = True SUPPORTS_CORP: bool = True SUPPORTS_ONLINE_FILING: bool = True SUPPORTS_NAME_SEARCH: bool = True # NW Registered Agent address for this state (populated by subclass) NWRA_ADDRESS: str = "" NWRA_CITY: str = "" NWRA_STATE: str = "" NWRA_ZIP: str = "" def __init__(self): self.browser: Optional[Browser] = None self.context: Optional[BrowserContext] = None self.page: Optional[Page] = None self.log = logging.getLogger(f"formation.{self.STATE_CODE}") async def start_browser(self, headless: bool = True) -> Page: """Launch browser with undetected/stealth settings. Uses the shared patchright-based launcher in ``scripts/workers/services/telecom/undetected_browser.py`` so that state SoS portals and FCC/USAC filing handlers share one stealth implementation. """ pw = await async_playwright().start() self.browser, self.context = await _undetected_launch_context( pw, headless=headless, timezone_id="America/Denver", ) self.page = await self.context.new_page() return self.page async def close_browser(self): """Shut down browser.""" if self.context: await self.context.close() if self.browser: await self.browser.close() async def screenshot(self, label: str) -> str: """Capture screenshot for debugging/audit trail.""" if not self.page: return "" ts = datetime.utcnow().strftime("%Y%m%d_%H%M%S") path = SCREENSHOTS_DIR / f"{self.STATE_CODE}_{label}_{ts}.png" await self.page.screenshot(path=str(path), full_page=True) self.log.info("Screenshot saved: %s", path) return str(path) async def human_delay(self, min_s: float = 1.0, max_s: float = 3.0): """Random delay to appear human.""" delay = random.uniform(min_s, max_s) await asyncio.sleep(delay) async def type_slowly(self, selector: str, text: str, delay_ms: int = 50): """Type text character by character with random delays.""" if not self.page: return await self.page.click(selector) for char in text: await self.page.type(selector, char, delay=delay_ms + random.randint(0, 30)) async def safe_click(self, selector: str, timeout: int = 10000): """Click an element with wait and error handling.""" if not self.page: return await self.page.wait_for_selector(selector, timeout=timeout) await self.human_delay(0.3, 0.8) await self.page.click(selector) async def detect_captcha(self) -> bool: """Check if a CAPTCHA is present on the page.""" if not self.page: return False captcha_selectors = [ "iframe[src*='recaptcha']", "iframe[src*='hcaptcha']", ".g-recaptcha", ".h-captcha", "#captcha", "[class*='captcha']", "iframe[src*='challenge']", ] for sel in captcha_selectors: try: el = await self.page.query_selector(sel) if el: self.log.warning("CAPTCHA detected: %s", sel) return True except Exception: pass return False # --- Abstract methods — each state implements these --- @abstractmethod async def search_name(self, name: str) -> NameSearchResult: """Search for business name availability in this state.""" ... @abstractmethod async def file_llc(self, order: FormationOrder) -> FilingResult: """File LLC Articles of Organization.""" ... @abstractmethod async def file_corporation(self, order: FormationOrder) -> FilingResult: """File Articles of Incorporation.""" ... async def file_entity(self, order: FormationOrder) -> FilingResult: """Route to correct filing method based on entity type.""" if order.entity_type in (EntityType.LLC,): return await self.file_llc(order) elif order.entity_type in (EntityType.CORPORATION, EntityType.S_CORP): return await self.file_corporation(order) else: return FilingResult( success=False, status=FilingStatus.ERROR, state_code=self.STATE_CODE, entity_name=order.entity_name, error_message=f"Unsupported entity type: {order.entity_type}", ) async def check_status(self, filing_number: str) -> FilingStatus: """Check the status of a previously submitted filing.""" self.log.warning("check_status not implemented for %s", self.STATE_CODE) return FilingStatus.PENDING async def enter_payment( self, order: FormationOrder, selectors: dict[str, str], ) -> bool: """Enter Relay virtual debit card payment on a state portal payment form. Common payment form selectors (vary by state, passed from config): card_number_field, card_exp_field, card_cvv_field, card_name_field, card_zip_field, submit_payment_btn Args: order: FormationOrder with payment card details populated selectors: Dict of CSS selectors for the payment form fields Returns: True if payment fields were filled and submitted successfully. """ if not self.page: self.log.error("No browser page open for payment") return False if not order.payment_card_number: self.log.error("No payment card number on order — card not loaded from ERPNext") return False await self.screenshot("payment_before") self.log.info("Entering payment for %s ($%.2f)", order.entity_name, order.status) # Amount would come from state fee try: # Card number if selectors.get("card_number_field"): await self.type_slowly(selectors["card_number_field"], order.payment_card_number, delay_ms=40) await self.human_delay(0.3, 0.6) # Expiration (some states split into month/year, some have one field) if selectors.get("card_exp_field"): await self.type_slowly(selectors["card_exp_field"], order.payment_card_exp, delay_ms=40) await self.human_delay(0.2, 0.5) elif selectors.get("card_exp_month_field") and selectors.get("card_exp_year_field"): month, year = order.payment_card_exp.split("/") await self.page.select_option(selectors["card_exp_month_field"], month.strip()) await self.page.select_option(selectors["card_exp_year_field"], year.strip()) await self.human_delay(0.2, 0.5) # CVV if selectors.get("card_cvv_field"): await self.type_slowly(selectors["card_cvv_field"], order.payment_card_cvv, delay_ms=40) await self.human_delay(0.2, 0.5) # Name on card if selectors.get("card_name_field"): await self.type_slowly(selectors["card_name_field"], order.payment_card_name, delay_ms=30) await self.human_delay(0.2, 0.5) # Billing ZIP if selectors.get("card_zip_field"): await self.type_slowly(selectors["card_zip_field"], order.payment_card_zip, delay_ms=30) await self.human_delay(0.2, 0.5) await self.screenshot("payment_filled") # Submit payment if selectors.get("submit_payment_btn"): await self.safe_click(selectors["submit_payment_btn"]) await self.page.wait_for_load_state("networkidle", timeout=30000) await self.human_delay(2.0, 4.0) # Payment processing delay await self.screenshot("payment_after") self.log.info("Payment submitted for %s", order.entity_name) return True except Exception as e: self.log.error("Payment entry failed: %s", e) await self.screenshot("payment_error") return False async def download_docs(self, filing_number: str) -> list[str]: """Download filed documents. Returns list of file paths.""" self.log.warning("download_docs not implemented for %s", self.STATE_CODE) return []