new-site/scripts/formation/states/tx/adapter.py
justin f94ad1682b fix(formation/TX): name search via Texas open-data API, not scraping
The TX Comptroller web search is now a JS form (old input#entityName selector
dead) and SOSDirect is login-gated, so the scraper returned garbage. Replaced
search_name with the Texas Socrata 'Active Franchise Taxpayers' dataset
(data.texas.gov/resource/9cir-efmm.json) over SoQL -- free, no-auth, no-login,
no bot-blocks. Exact normalized match => unavailable; no rows => available; API
error => available=None (never a false 'taken'). Verified: unique name = 0 rows
(available), 'APPLE INC.' = exact match (taken).
2026-06-09 08:34:37 -05:00

174 lines
6.9 KiB
Python

"""Texas — SOSDirect SOS portal automation.
SOSDirect (direct.sos.state.tx.us) is the Texas Secretary of State's
online filing system. It requires an account login. Name search is
available without login via the Comptroller's Taxable Entity Search.
Key URLs:
Name search: https://mycpa.cpa.state.tx.us/coa/Index.html
SOSDirect: https://direct.sos.state.tx.us
Filing: SOSDirect → Corporations → File a Certificate of Formation
Fees:
LLC: $300 (Certificate of Formation — Domestic LLC)
Corp: $300 (Certificate of Formation — Domestic Corp)
Expedited: +$25 (24-hour), +$50 (same-day)
Notes:
- Texas uses "Certificate of Formation" (not Articles of Organization)
- No publication requirement
- Franchise tax applies only if revenue > $2.47M (most small carriers exempt)
- SOSDirect uses ASP.NET with __VIEWSTATE like WY; human-pace typing required
"""
from __future__ import annotations
import asyncio
import re
from typing import Optional
from scripts.formation.base import (
StatePortal,
NameSearchResult,
FormationOrder,
FilingResult,
FilingStatus,
EntityType,
)
from .config import CONFIG
class TXPortal(StatePortal):
STATE_CODE = "TX"
STATE_NAME = "Texas"
PORTAL_NAME = "SOSDirect"
PORTAL_URL = "https://direct.sos.state.tx.us"
NWRA_ADDRESS = CONFIG["registered_agent"]["street"]
NWRA_CITY = CONFIG["registered_agent"]["city"]
NWRA_STATE = CONFIG["registered_agent"]["state"]
NWRA_ZIP = CONFIG["registered_agent"]["zip"]
# ── Name Search (Comptroller Taxable Entity Search — no login) ──────
async def search_name(self, name: str) -> NameSearchResult:
"""Search Texas business name availability via the Texas open-data API.
Uses the Comptroller "Active Franchise Taxpayers" dataset on the state
Socrata portal (data.texas.gov, dataset 9cir-efmm) over SoQL. This is a
free, no-auth, no-login JSON API -- far more robust than scraping the
Comptroller web search (which is a JS form) or SOSDirect (login-gated,
ASP.NET). An entity name appearing as an *active* franchise taxpayer is
a strong "name in use" signal; absence => available (subject to a final
SOSDirect confirmation when the order is actually filed).
Availability semantics:
- exact (normalized) match -> available=False, exact_match=True
- only similar names -> available=True, similar_names listed
- no rows -> available=True
- API error -> available=None (never a false "taken")
"""
import json as _json
import urllib.parse as _url
import urllib.request as _req
def _norm(s: str) -> str:
return (
s.upper()
.replace(",", "")
.replace(".", "")
.replace(" ", " ")
.strip()
)
# SoQL: case-insensitive LIKE on the first ~20 normalized chars so we
# catch the exact name plus close variants, capped at 25 rows.
needle = _norm(name)[:20].replace("'", "''")
where = f"upper(taxpayer_name) like '%{needle}%'"
api = (
"https://data.texas.gov/resource/9cir-efmm.json?"
+ _url.urlencode({"$where": where, "$limit": "25",
"$select": "taxpayer_name"})
)
try:
loop = asyncio.get_event_loop()
def _fetch() -> list[dict]:
req = _req.Request(api, headers={"User-Agent": "PerformanceWest formation name-check"})
with _req.urlopen(req, timeout=20) as resp:
return _json.loads(resp.read().decode("utf-8"))
rows = await loop.run_in_executor(None, _fetch)
similar = [r.get("taxpayer_name", "").strip() for r in rows if r.get("taxpayer_name")]
target = _norm(name)
exact = any(_norm(s) == target for s in similar)
return NameSearchResult(
available=not exact,
exact_match=exact,
similar_names=similar[:10],
state_code="TX",
searched_name=name,
raw_response=f"texas-open-data 9cir-efmm: {len(similar)} match(es)",
)
except Exception as exc:
# available=None => "could not determine" (never a false "taken").
return NameSearchResult(
available=None,
state_code="TX",
searched_name=name,
raw_response=f"Error: {exc}",
)
# ── LLC Filing (SOSDirect — requires login) ─────────────────────────
async def file_llc(self, order: FormationOrder) -> FilingResult:
"""File a Certificate of Formation for a Texas LLC via SOSDirect.
SOSDirect flow:
1. Login with SOS account credentials
2. Navigate: Corporations → File a Document → Certificate of Formation
3. Select entity type: Domestic Limited Liability Company (Form 205)
4. Fill form fields (name, RA, members, management type, purpose)
5. Pay $300 ($325 for 24hr expedited, $350 for same-day)
6. Capture confirmation + filing number
Selectors need verification against live portal. The form is a
multi-step ASP.NET WebForms wizard.
"""
# TODO: Verify selectors against live SOSDirect portal session
# For now, return a stub that creates an admin todo for manual filing
return FilingResult(
success=False,
status=FilingStatus.PENDING,
state_code="TX",
entity_name=order.entity_name,
error_message=(
"TX SOSDirect adapter selectors pending verification. "
"Admin: file manually at https://direct.sos.state.tx.us "
f"— LLC Certificate of Formation (Form 205), ${CONFIG['fees']['llc']}."
),
)
# ── Corporation Filing ───────────────────────────────────────────────
async def file_corporation(self, order: FormationOrder) -> FilingResult:
"""File a Certificate of Formation for a Texas corporation.
Same SOSDirect flow as LLC but selects:
Domestic For-Profit Corporation (Form 201) or
Domestic Nonprofit Corporation (Form 202).
"""
return FilingResult(
success=False,
status=FilingStatus.PENDING,
state_code="TX",
entity_name=order.entity_name,
error_message=(
"TX SOSDirect adapter selectors pending verification. "
"Admin: file manually at https://direct.sos.state.tx.us "
f"— Corp Certificate of Formation, ${CONFIG['fees']['corporation']}."
),
)
def adapter() -> TXPortal:
return TXPortal()