mcs150: complete all-variant field mapping (150/150B/150C)

Adds the previously-unmapped fields so every variant fills fully:
- Q25 hazmat C/S/B/NB matrix (HAZMAT_ROW_MAP x HAZMAT_COL_MAP, 156 boxes)
- MCS-150B states-of-operation checkboxes (full name or 2-letter code), HMSP
  Hazard/Permit/Security radios, and accident count (32accidentNumber)
- MCS-150C intermodal equipment counts (20owned/leased/serviced) + correct
  field renumbering (17dunbrad/18irs/19eMail) + USDOT Button + named-export
  Reason/Mailing radios
- Structured fleet via intake['vehicles'] = {vehicle_type: {owned, term_leased,
  trip_leased}} across all Q26 vehicle rows; non-CMV count; cell/fax; second
  officer
- _set_button now resolves a candidate tuple against each field's actual export
  states, so numeric (/0../4) and named (/Yes,/Biennial...) radios both work

verify_mcs150_variants.py exercises all three variants end-to-end: ALL PASS.
This commit is contained in:
justin 2026-06-10 13:55:55 -05:00
parent 96f31e7c31
commit 38739e023c
2 changed files with 307 additions and 58 deletions

View file

@ -108,6 +108,76 @@ CARGO_TYPE_MAP = {
"other": "24ccBox",
}
# Question 25: Hazardous Materials matrix. Each commodity row has four columns:
# C=Carrier, S=Shipper, B=Bulk (cargo tanks), NB=Non-Bulk (in packages). The
# AcroForm fields are 25{row}{COL}Box where COL in {C,S,B,NB}. The row letters
# follow the printed list A..Z then AA..MM (with a few gaps for headers).
HAZMAT_ROW_MAP = {
"div_1_1": "a", "div_1_2": "b", "div_1_3": "c", "div_1_4": "d",
"div_1_5": "e", "div_1_6": "f", "div_2_1_flam_gas": "g", "div_2_1_lpg": "h",
"div_2_1_methane": "i", "div_2_2": "j", "div_2_3a": "k", "div_2_3b": "l",
"div_2_3c": "m", "div_2_3d": "n", "class_3": "o", "comb_liq": "p",
"div_4_1": "q", "div_4_2": "r", "div_4_3": "s", "div_5_1": "t",
"div_5_2": "u", "div_6_1a": "v", "div_6_1b": "w", "div_6_1_liquid": "x",
"div_6_1_solid": "y", "div_6_2_infectious": "z", "div_6_2_select": "aa",
"class_7": "bb", "hrcq": "cc", "class_8": "dd", "class_8a": "ee",
"class_8b": "ff", "class_9": "gg", "elevated_temp": "hh",
"infectious_waste": "ii", "marine_pollutants": "jj", "hazardous_sub_rq": "kk",
"hazardous_waste": "ll", "ltd_qty": "mm",
}
HAZMAT_COL_MAP = {"carrier": "C", "shipper": "S", "bulk": "B", "non_bulk": "NB"}
# Vehicle-count rows (Q26): map an intake vehicle-type key to the field prefix
# used for Owned/Term-leased/Trip-leased columns ({prefix}Own/{prefix}Term/
# {prefix}Trip).
VEHICLE_TYPE_PREFIX = {
"straight": "straight",
"tractor": "tractor",
"trailer": "trailer",
"hazmat_truck": "haztruck",
"hazmat_trailer": "haztrail",
"motorcoach": "coach",
"school_bus_1_8": "school1-8",
"school_bus_9_15": "school9-15",
"school_bus_16": "school16+",
"bus_16": "bus16+",
"van_1_8": "van1-8",
"van_9_15": "van9-15",
"limo_1_8": "limo1-8",
"limo_9_15": "limo9-15",
"limo_16": "limo16+",
}
# US states / territories -> the MCS-150B "states of operation" checkbox field
# name (the field name is the full state name).
US_STATES = [
"Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado",
"Connecticut", "Delaware", "District of Columbia", "Florida", "Georgia",
"Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky",
"Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota",
"Mississippi", "Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire",
"New Jersey", "New Mexico", "New York", "North Carolina", "North Dakota",
"Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Puerto Rico", "Rhode Island",
"South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", "Vermont",
"Virginia", "Washington", "West Virginia", "Wisconsin", "Wyoming",
]
# Accept two-letter codes too.
US_STATE_BY_CODE = {
"AL": "Alabama", "AK": "Alaska", "AZ": "Arizona", "AR": "Arkansas",
"CA": "California", "CO": "Colorado", "CT": "Connecticut", "DE": "Delaware",
"DC": "District of Columbia", "FL": "Florida", "GA": "Georgia", "HI": "Hawaii",
"ID": "Idaho", "IL": "Illinois", "IN": "Indiana", "IA": "Iowa", "KS": "Kansas",
"KY": "Kentucky", "LA": "Louisiana", "ME": "Maine", "MD": "Maryland",
"MA": "Massachusetts", "MI": "Michigan", "MN": "Minnesota", "MS": "Mississippi",
"MO": "Missouri", "MT": "Montana", "NE": "Nebraska", "NV": "Nevada",
"NH": "New Hampshire", "NJ": "New Jersey", "NM": "New Mexico", "NY": "New York",
"NC": "North Carolina", "ND": "North Dakota", "OH": "Ohio", "OK": "Oklahoma",
"OR": "Oregon", "PA": "Pennsylvania", "PR": "Puerto Rico", "RI": "Rhode Island",
"SC": "South Carolina", "SD": "South Dakota", "TN": "Tennessee", "TX": "Texas",
"UT": "Utah", "VT": "Vermont", "VA": "Virginia", "WA": "Washington",
"WV": "West Virginia", "WI": "Wisconsin", "WY": "Wyoming",
}
def determine_form_type(intake: dict) -> str:
"""Determine which MCS-150 form to use.
@ -192,6 +262,14 @@ def fill_mcs150(intake: dict, order_number: str = "") -> str:
field_updates = {}
# ── Text fields ──────────────────────────────────────────────────
# The 150C form renumbers a few identification fields (D&B/EIN/email shift
# up by one because it has no MC/MX line). Pick the right field names per
# variant so the values land in the correct boxes.
if form_type == "mcs150c":
ein_field, email_field, dunbrad_field = "18irsNumber", "19eMail", "17dunbradNumber"
else:
ein_field, email_field, dunbrad_field = "19irsNumber", "20eMail", "18dunbradNumber"
field_updates["1bizName"] = intake.get("legal_name", "")
field_updates["2dbaName"] = intake.get("dba_name", "")
field_updates["3principalStreet"] = intake.get("address_street", "")
@ -204,32 +282,64 @@ def fill_mcs150(intake: dict, order_number: str = "") -> str:
field_updates["16usdotNumber"] = intake.get("dot_number", "")
field_updates["usdotNumber"] = intake.get("dot_number", "") # duplicate field
field_updates["17mcmxNumber"] = intake.get("mc_number", "")
field_updates["19irsNumber"] = intake.get("ein", "")
field_updates["20eMail"] = intake.get("email", "")
field_updates["21carrierMileage"] = str(intake.get("annual_miles", ""))
field_updates[dunbrad_field] = intake.get("dun_bradstreet", "")
field_updates[ein_field] = intake.get("ein", "")
field_updates[email_field] = intake.get("email", "")
field_updates["21carrierMileage"] = str(intake.get("annual_miles", "") or "")
# Mailing address (if different)
# Mailing address (if different from principal place of business)
if intake.get("mailing_street"):
field_updates["8mailStreet"] = intake.get("mailing_street", "")
field_updates["9mailCity"] = intake.get("mailing_city", "")
field_updates["10mailState"] = intake.get("mailing_state", "")
field_updates["11mailZip"] = intake.get("mailing_zip", "")
# Fleet/drivers
field_updates["totalDrivers"] = str(intake.get("drivers", ""))
field_updates["totalCDL"] = str(intake.get("cdl_drivers", intake.get("drivers", "")))
# Fleet / drivers (base + 150B)
if intake.get("drivers") not in (None, ""):
field_updates["totalDrivers"] = str(intake.get("drivers"))
field_updates["totalCDL"] = str(intake.get("cdl_drivers", intake.get("drivers")))
# Vehicle counts — straight trucks and tractors are most common
power_units = intake.get("power_units", "")
vehicle_type = intake.get("primary_vehicle_type", "straight")
if vehicle_type == "tractor":
field_updates["tractorOwn"] = str(power_units)
# Vehicle counts (Q26). Accept a structured `vehicles` dict of
# {vehicle_type: {owned, term_leased, trip_leased}} for full fleets; fall
# back to a single power-unit count on the primary vehicle type.
vehicles = intake.get("vehicles") or {}
if isinstance(vehicles, dict) and vehicles:
for vtype, counts in vehicles.items():
prefix = VEHICLE_TYPE_PREFIX.get(vtype)
if not prefix or not isinstance(counts, dict):
continue
for col, suffix in (("owned", "Own"), ("term_leased", "Term"),
("trip_leased", "Trip")):
val = counts.get(col)
if val not in (None, "", 0, "0"):
field_updates[f"{prefix}{suffix}"] = str(val)
else:
field_updates["straightOwn"] = str(power_units)
power_units = intake.get("power_units", "")
prefix = VEHICLE_TYPE_PREFIX.get(
intake.get("primary_vehicle_type", "straight"), "straight")
if power_units not in (None, ""):
field_updates[f"{prefix}Own"] = str(power_units)
# Officers
# Non-CMV count, if provided.
if intake.get("non_cmv_vehicles") not in (None, ""):
field_updates["non-CMV"] = str(intake.get("non_cmv_vehicles"))
# Intermodal equipment counts (150C only).
if form_type == "mcs150c":
for key, fld in (("iep_owned", "20owned"), ("iep_leased", "20leased"),
("iep_serviced", "20serviced")):
if intake.get(key) not in (None, ""):
field_updates[fld] = str(intake.get(key))
# HMSP accident count (150B only).
if form_type == "mcs150b" and intake.get("hmsp_accident_count") not in (None, ""):
field_updates["32accidentNumber"] = str(intake.get("hmsp_accident_count"))
# Officers (up to two named on the form)
field_updates["officerName1"] = intake.get("signer_name", "")
field_updates["officerTitle1"] = intake.get("signer_title", "")
field_updates["officerName2"] = intake.get("officer2_name", "")
field_updates["officerTitle2"] = intake.get("officer2_title", "")
# Certification
field_updates["certifyName"] = intake.get("signer_name", "")
@ -261,6 +371,29 @@ def fill_mcs150(intake: dict, order_number: str = "") -> str:
if cargo in CARGO_TYPE_MAP:
checkbox_on[CARGO_TYPE_MAP[cargo]] = True
# Q25 Hazardous materials matrix. `hazmat_materials` is a list of
# {commodity, roles:[carrier|shipper|bulk|non_bulk]} (or a dict
# {commodity: [roles]}). Each commodity row x role column = one box.
hazmat_materials = intake.get("hazmat_materials") or {}
if isinstance(hazmat_materials, list):
hazmat_materials = {m.get("commodity"): m.get("roles", [])
for m in hazmat_materials if isinstance(m, dict)}
for commodity, roles in (hazmat_materials or {}).items():
row = HAZMAT_ROW_MAP.get(commodity)
if not row:
continue
for role in roles or []:
col = HAZMAT_COL_MAP.get(role)
if col:
checkbox_on[f"25{row}{col}Box"] = True
# MCS-150B: states of operation (full names or 2-letter codes).
if form_type == "mcs150b":
for st in intake.get("operating_states", []) or []:
name = US_STATE_BY_CODE.get(str(st).upper(), st)
if name in US_STATES:
checkbox_on[name] = True
# Q29 Passenger Carrier Compliance Certification "YES" box. Only motor
# passenger carriers certify here -- leave it unchecked for freight/property
# carriers. (The Q31 perjury declaration is made via the signature, not a
@ -273,31 +406,41 @@ def fill_mcs150(intake: dict, order_number: str = "") -> str:
if is_passenger:
checkbox_on["certifyBox"] = True
# ── Radio-button groups (value /0../4, not /Yes) ─────────────────
# These are single-select radio fields; the selected option index is the
# field value.
# ── Radio-button groups ──────────────────────────────────────────
# These are single-select radios. _set_button resolves the semantic value
# against each field's actual export states, so it works whether the form
# uses numeric (/0../4) exports (base/150B) or named exports (150C).
radio_values = {}
# Reason for filing (form REASON FOR FILING). 0=New Application,
# 1=Biennial Update or Changes, 2=Out of Business, 3=Reapplication,
# 4=Reactivate. Map the service slug / explicit reason to the index.
reason_map = {
"new_application": "0",
"biennial_update": "1",
"out_of_business": "2",
"reapplication": "3",
"reactivate": "4",
}
# Reason for filing. Semantic -> {numeric index, named export} so it maps on
# both export styles.
reason = intake.get("reason_for_filing", "biennial_update")
radio_values["Reason Button"] = reason_map.get(reason, "1")
reason_choices = {
"new_application": ("0", "New Application"),
"biennial_update": ("1", "Biennial Update or Changes"),
"out_of_business": ("2", "Out of Business Notification"),
"reapplication": ("3", "Reapplication"),
"reactivate": ("4", "Reactivate"),
}
radio_values["Reason Button"] = reason_choices.get(reason, reason_choices["biennial_update"])
# Mailing address: 0 = same as principal place of business, 1 = different
# address provided below.
radio_values["Mailing Button"] = "1" if intake.get("mailing_street") else "0"
# Mailing address: same-as-principal vs separate mailing address below.
if intake.get("mailing_street"):
radio_values["Mailing Button"] = ("1", "Please enter mailing address below.")
else:
radio_values["Mailing Button"] = ("0", "Same as Principal Address")
# Q28 Is USDOT registration currently revoked? On this form the options
# render Yes (index 0) then No (index 1), so No = "1".
radio_values["Revoke Button"] = "0" if intake.get("usdot_revoked") == "yes" else "1"
# Q28 Is USDOT registration currently revoked? Options render Yes then No.
revoked = intake.get("usdot_revoked") == "yes"
radio_values["Revoke Button"] = ("0", "Yes") if revoked else ("1", "No")
# 150C asks the same as a USDOT Button (named Yes/No).
radio_values["USDOT Button"] = ("Yes",) if revoked else ("No",)
# MCS-150B HMSP questions (Hazard/Permit/Security): default No (index 1).
if form_type == "mcs150b":
radio_values["Hazard Button"] = ("0",) if intake.get("hmsp_hazard") == "yes" else ("1",)
radio_values["Permit Button"] = ("0",) if intake.get("hmsp_has_permit") == "yes" else ("1",)
radio_values["Security Button"] = ("0",) if intake.get("hmsp_security_plan") == "yes" else ("1",)
# ── Apply fields to PDF ──────────────────────────────────────────
# Apply text-field values to every page. The template ships with only the
@ -323,14 +466,24 @@ def fill_mcs150(intake: dict, order_number: str = "") -> str:
# explicit checkmark overlay (AcroForm appearances render inconsistently).
marks: list = []
def _set_button(field_name: str, on_value: str) -> None:
"""Set a /Btn field (checkbox or radio) to ``on_value`` (e.g. 'Yes' or
'1'). Handles both flat widgets and parent/kid radio groups."""
target = "/" + on_value.lstrip("/")
def _set_button(field_name: str, candidates) -> None:
"""Set a /Btn field (checkbox or radio) to the first of ``candidates``
that matches one of the field's actual export states. ``candidates`` may
be a single string or a tuple of strings (e.g. ('1', 'No') to cover both
numeric and named export styles). Handles flat checkboxes and
parent/kid radio groups."""
if isinstance(candidates, str):
candidates = (candidates,)
wanted = ["/" + str(c).lstrip("/") for c in candidates]
# Collect every widget annotation belonging to this field, plus the
# union of all of its export states, so we can resolve which candidate
# to use.
widgets = []
all_states: set = set()
for page in writer.pages:
for annot in page.get("/Annots", []) or []:
obj = annot.get_object()
# Resolve the field name from this widget or its parent.
name = obj.get("/T")
parent = obj.get("/Parent")
pobj = parent.get_object() if parent else None
@ -338,29 +491,37 @@ def fill_mcs150(intake: dict, order_number: str = "") -> str:
name = pobj.get("/T")
if name is None or str(name) != field_name:
continue
# Determine the appearance state this widget represents.
states = []
ap = obj.get("/AP")
on_states = []
if ap:
n = ap.get_object().get("/N")
if n is not None:
on_states = [str(k) for k in n.get_object().keys() if str(k) != "/Off"]
# Set the field value on the field object (parent for radios).
field_obj = pobj if pobj is not None else obj
field_obj[NameObject("/V")] = NameObject(target)
# The widget is "on" only if its own appearance state matches.
if target in on_states or (not on_states and target == "/Yes"):
obj[NameObject("/AS")] = NameObject(target)
# Record the on-widget rectangle + page so we can draw the
# mark directly onto the page content (see overlay below).
# AcroForm /AP appearances are unreliable across viewers
# (poppler/Preview regenerate them and lose the ZapfDingbats
# checkmark), so we stamp our own mark to guarantee it shows.
rect = obj.get("/Rect")
if rect is not None:
marks.append((page, [float(x) for x in rect]))
else:
obj[NameObject("/AS")] = NameObject("/Off")
states = [str(k) for k in n.get_object().keys() if str(k) != "/Off"]
all_states.update(states)
widgets.append((page, obj, pobj, states))
if not widgets:
return
# Choose the candidate that this field actually supports.
target = next((w for w in wanted if w in all_states), None)
if target is None:
target = "/Yes" if "/Yes" in all_states else (wanted[0] if wanted else "/Yes")
for page, obj, pobj, states in widgets:
field_obj = pobj if pobj is not None else obj
field_obj[NameObject("/V")] = NameObject(target)
if target in states or (not states and target == "/Yes"):
obj[NameObject("/AS")] = NameObject(target)
# Record the on-widget rectangle + page so we can draw the mark
# directly onto the page content. AcroForm /AP appearances are
# unreliable across viewers (poppler/Preview regenerate them and
# lose the ZapfDingbats checkmark), so we stamp our own mark.
rect = obj.get("/Rect")
if rect is not None:
marks.append((page, [float(x) for x in rect]))
else:
obj[NameObject("/AS")] = NameObject("/Off")
for field_name, checked in checkbox_on.items():
if checked:

View file

@ -0,0 +1,88 @@
"""Smoke-test all three MCS-150 variants fill the right fields/boxes.
Reads the filled PDF's AcroForm values (and the appearance streams for text)
to confirm each variant populates its expected fields, including the 150B
hazmat-matrix + states-of-operation and 150C intermodal numbering.
"""
import sys
sys.path.insert(0, "/app")
from pypdf import PdfReader
from scripts.document_gen.templates.mcs150_pdf_filler import fill_mcs150
BASE = dict(
legal_name="ALLENS SCRAP METAL LLC", dot_number="1609564", ein="264625451",
address_street="3838 DANNY RD", address_city="LORIS", address_state="SC",
address_zip="29569", phone="8435551234", email="m@allenscrapmetal.com",
annual_miles="42000", carrier_operation="private_property",
cargo_types=["metal", "general"], interstate_intrastate="interstate",
power_units="2", drivers="2", signer_name="Mitchell W Allen",
signer_title="Owner",
)
def btn_on(fields):
return {n: str(f.get("/V")) for n, f in fields.items()
if str(f.get("/FT")) == "/Btn" and f.get("/V") and str(f.get("/V")) != "/Off"}
def tx(fields, name):
return (fields.get(name) or {}).get("/V")
def check(label, cond):
print(f" [{'OK ' if cond else 'MISS'}] {label}")
return cond
ok = True
# ── Base MCS-150 ─────────────────────────────────────────────────────
print("=== MCS-150 (base) ===")
r = PdfReader(fill_mcs150(BASE, "CO-V150"))
f = r.get_fields() or {}
b = btn_on(f)
ok &= check(f"3 pages (got {len(r.pages)})", len(r.pages) == 3)
ok &= check("Q22 interstate (22aBox)", b.get("22aBox") == "/Yes")
ok &= check("Q23 private property (23cBox)", b.get("23cBox") == "/Yes")
ok &= check("Q24 cargo metal+general", b.get("24aBox") == "/Yes" and b.get("24cBox") == "/Yes")
ok &= check("Reason=Biennial", b.get("Reason Button") == "/1")
ok &= check("Mailing=same", b.get("Mailing Button") == "/0")
ok &= check("Revoke=No (/1)", b.get("Revoke Button") == "/1")
ok &= check("email + mileage set", tx(f, "20eMail") and tx(f, "21carrierMileage") == "42000")
ok &= check("certifyBox NOT set (freight)", "certifyBox" not in b)
# ── MCS-150B (hazmat permit) ─────────────────────────────────────────
print("=== MCS-150B (hazmat permit) ===")
hz = dict(BASE, hazmat="yes", needs_hmsp=True,
interstate_intrastate="intrastate_hazmat",
operating_states=["SC", "NC", "GA"],
hazmat_materials={"class_3": ["carrier", "bulk"], "class_8": ["shipper"]},
hmsp_accident_count="0", hmsp_security_plan="yes",
cargo_types=["chemicals"])
r = PdfReader(fill_mcs150(hz, "CO-V150B"))
f = r.get_fields() or {}
b = btn_on(f)
ok &= check(f"4 pages (got {len(r.pages)})", len(r.pages) == 4)
ok &= check("states SC/NC/GA", all(b.get(s) == "/Yes" for s in ("South Carolina", "North Carolina", "Georgia")))
ok &= check("hazmat class_3 carrier+bulk", b.get("25oCBox") == "/Yes" and b.get("25oBBox") == "/Yes")
ok &= check("hazmat class_8 shipper", b.get("25ddSBox") == "/Yes")
ok &= check("accident count 0", tx(f, "32accidentNumber") == "0")
ok &= check("Security Button yes (/0)", b.get("Security Button") == "/0")
# ── MCS-150C (intermodal) ────────────────────────────────────────────
print("=== MCS-150C (intermodal) ===")
iep = dict(BASE, is_intermodal_equipment_provider=True,
iep_owned="10", iep_leased="3", iep_serviced="2")
r = PdfReader(fill_mcs150(iep, "CO-V150C"))
f = r.get_fields() or {}
b = btn_on(f)
ok &= check(f"2 pages (got {len(r.pages)})", len(r.pages) == 2)
ok &= check("legal name set", tx(f, "1bizName") == "ALLENS SCRAP METAL LLC")
ok &= check("EIN in 18irsNumber (150C numbering)", tx(f, "18irsNumber") == "264625451")
ok &= check("email in 19eMail (150C numbering)", bool(tx(f, "19eMail")))
ok &= check("iep counts 10/3/2", tx(f, "20owned") == "10" and tx(f, "20leased") == "3" and tx(f, "20serviced") == "2")
ok &= check("Reason named-export resolves", b.get("Reason Button") in ("/Biennial Update or Changes", "/1"))
ok &= check("USDOT Button = No", b.get("USDOT Button") == "/No")
print("\n" + ("ALL PASS" if ok else "FAILURES ABOVE"))
sys.exit(0 if ok else 1)