"""MCS-150 Official PDF Form Filler. Fills the official FMCSA MCS-150/150B/150C fillable PDF forms using intake data from the order. Produces a ready-to-fax or electronically submit PDF. Forms stored at (FORM pages only -- the FMCSA instruction/example pages are trimmed off the source PDFs so the filled output is fax/submit-ready and never includes the instruction pages): docs/MCS-150 Form.pdf — standard (3 pages, 289 fields) docs/MCS-150B Form.pdf — hazmat safety permit (4 pages, 349 fields) docs/MCS-150C Form.pdf — intermodal equipment (2 pages, 33 fields) Usage: from scripts.document_gen.templates.mcs150_pdf_filler import fill_mcs150 pdf_path = fill_mcs150(intake_data, order_number="CO-12345") """ from __future__ import annotations import logging import os import tempfile from datetime import datetime from pathlib import Path from copy import copy LOG = logging.getLogger("document_gen.mcs150_pdf_filler") try: from pypdf import PdfReader, PdfWriter from pypdf.generic import NameObject, BooleanObject, TextStringObject except ImportError: LOG.warning("pypdf not installed — MCS-150 PDF filling unavailable") PdfReader = None # Path to the official forms DOCS_DIR = Path(__file__).resolve().parent.parent.parent.parent / "docs" FORMS = { "mcs150": DOCS_DIR / "MCS-150 Form.pdf", "mcs150b": DOCS_DIR / "MCS-150B Form.pdf", "mcs150c": DOCS_DIR / "MCS-150C Form.pdf", } # ── Field mappings ──────────────────────────────────────────────────── # Question 22: COMPANY OPERATIONS (interstate / intrastate classification). # A=Interstate Carrier, B=Intrastate Hazmat Carrier, C=Intrastate Non-Hazmat # Carrier, D=Interstate Hazmat Shipper, E=Intrastate Hazmat Shipper. COMPANY_OPERATION_MAP = { "interstate": "22aBox", "intrastate_hazmat": "22bBox", "intrastate_non_hazmat": "22cBox", } # Question 23: OPERATION CLASSIFICATIONS (how the carrier operates). # A=Authorized For-Hire, B=Exempt For-Hire, C=Private Property, # D=Private Passengers (Business), E=Private Passengers (Non-Business), # F=Migrant, G=U.S. Mail, H=Federal Govt, I=State Govt, J=Local Govt, # K=Indian Tribe. CARRIER_OP_MAP = { "authorized_for_hire": "23aBox", "exempt_for_hire": "23bBox", "private_property": "23cBox", "private_passengers_business": "23dBox", "private_passengers_non_business": "23eBox", "migrant": "23fBox", "us_mail": "23gBox", "federal_government": "23hBox", "state_government": "23iBox", "local_government": "23jBox", "indian_tribe": "23kBox", } # (legacy synonyms kept so older intake values still map sensibly) CARRIER_OP_MAP.setdefault("private_passengers", "23dBox") # Question 24: Cargo Types (checkboxes — a through z, aa through dd) CARGO_TYPE_MAP = { "general": "24aBox", "household": "24bBox", "metal": "24cBox", "motor_vehicles": "24dBox", "drivetow": "24eBox", "logs": "24fBox", "building_materials": "24gBox", "mobile_homes": "24hBox", "machinery": "24iBox", "fresh_produce": "24jBox", "liquids": "24kBox", "intermodal": "24lBox", "passengers": "24mBox", "oilfield": "24nBox", "livestock": "24oBox", "grain": "24pBox", "coal": "24qBox", "meat": "24rBox", "garbage": "24sBox", "chemicals": "24tBox", "commodities_dry": "24uBox", "refrigerated": "24vBox", "beverages": "24wBox", "paper": "24xBox", "utilities": "24yBox", "farm_supplies": "24zBox", "construction": "24aaBox", "water_well": "24bbBox", "other": "24ccBox", } # Question 25: Hazardous Materials matrix. Each commodity row has four columns: # C=Carrier, S=Shipper, B=Bulk (cargo tanks), NB=Non-Bulk (in packages). The # AcroForm fields are 25{row}{COL}Box where COL in {C,S,B,NB}. The row letters # follow the printed list A..Z then AA..MM (with a few gaps for headers). HAZMAT_ROW_MAP = { "div_1_1": "a", "div_1_2": "b", "div_1_3": "c", "div_1_4": "d", "div_1_5": "e", "div_1_6": "f", "div_2_1_flam_gas": "g", "div_2_1_lpg": "h", "div_2_1_methane": "i", "div_2_2": "j", "div_2_3a": "k", "div_2_3b": "l", "div_2_3c": "m", "div_2_3d": "n", "class_3": "o", "comb_liq": "p", "div_4_1": "q", "div_4_2": "r", "div_4_3": "s", "div_5_1": "t", "div_5_2": "u", "div_6_1a": "v", "div_6_1b": "w", "div_6_1_liquid": "x", "div_6_1_solid": "y", "div_6_2_infectious": "z", "div_6_2_select": "aa", "class_7": "bb", "hrcq": "cc", "class_8": "dd", "class_8a": "ee", "class_8b": "ff", "class_9": "gg", "elevated_temp": "hh", "infectious_waste": "ii", "marine_pollutants": "jj", "hazardous_sub_rq": "kk", "hazardous_waste": "ll", "ltd_qty": "mm", } HAZMAT_COL_MAP = {"carrier": "C", "shipper": "S", "bulk": "B", "non_bulk": "NB"} # Vehicle-count rows (Q26): map an intake vehicle-type key to the field prefix # used for Owned/Term-leased/Trip-leased columns ({prefix}Own/{prefix}Term/ # {prefix}Trip). VEHICLE_TYPE_PREFIX = { "straight": "straight", "tractor": "tractor", "trailer": "trailer", "hazmat_truck": "haztruck", "hazmat_trailer": "haztrail", "motorcoach": "coach", "school_bus_1_8": "school1-8", "school_bus_9_15": "school9-15", "school_bus_16": "school16+", "bus_16": "bus16+", "van_1_8": "van1-8", "van_9_15": "van9-15", "limo_1_8": "limo1-8", "limo_9_15": "limo9-15", "limo_16": "limo16+", } # US states / territories -> the MCS-150B "states of operation" checkbox field # name (the field name is the full state name). US_STATES = [ "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", "Delaware", "District of Columbia", "Florida", "Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi", "Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire", "New Jersey", "New Mexico", "New York", "North Carolina", "North Dakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Puerto Rico", "Rhode Island", "South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington", "West Virginia", "Wisconsin", "Wyoming", ] # Accept two-letter codes too. US_STATE_BY_CODE = { "AL": "Alabama", "AK": "Alaska", "AZ": "Arizona", "AR": "Arkansas", "CA": "California", "CO": "Colorado", "CT": "Connecticut", "DE": "Delaware", "DC": "District of Columbia", "FL": "Florida", "GA": "Georgia", "HI": "Hawaii", "ID": "Idaho", "IL": "Illinois", "IN": "Indiana", "IA": "Iowa", "KS": "Kansas", "KY": "Kentucky", "LA": "Louisiana", "ME": "Maine", "MD": "Maryland", "MA": "Massachusetts", "MI": "Michigan", "MN": "Minnesota", "MS": "Mississippi", "MO": "Missouri", "MT": "Montana", "NE": "Nebraska", "NV": "Nevada", "NH": "New Hampshire", "NJ": "New Jersey", "NM": "New Mexico", "NY": "New York", "NC": "North Carolina", "ND": "North Dakota", "OH": "Ohio", "OK": "Oklahoma", "OR": "Oregon", "PA": "Pennsylvania", "PR": "Puerto Rico", "RI": "Rhode Island", "SC": "South Carolina", "SD": "South Dakota", "TN": "Tennessee", "TX": "Texas", "UT": "Utah", "VT": "Vermont", "VA": "Virginia", "WA": "Washington", "WV": "West Virginia", "WI": "Wisconsin", "WY": "Wyoming", } def determine_form_type(intake: dict) -> str: """Determine which MCS-150 form to use. Returns 'mcs150', 'mcs150b', or 'mcs150c'. """ if intake.get("is_intermodal_equipment_provider"): return "mcs150c" if intake.get("hazmat") == "yes" and intake.get("needs_hmsp"): return "mcs150b" return "mcs150" def _stamp_check_marks(writer, marks: list) -> None: """Draw an explicit "X" mark inside each (page, rect) in ``marks``. AcroForm checkbox/radio appearances render inconsistently across viewers (poppler, Preview), so we burn the mark into the page content to guarantee it shows on screen and in the faxed/printed output. Grouped per page so we merge a single overlay onto each affected page. """ if not marks: return try: import io from reportlab.pdfgen import canvas as _canvas from pypdf import PdfReader as _PdfReader except Exception as exc: # reportlab missing — fall back to AcroForm only LOG.warning("Checkmark overlay unavailable (%s); relying on AcroForm", exc) return # Group rects by their page object's index in the writer. page_index = {id(p): i for i, p in enumerate(writer.pages)} by_page: dict[int, list] = {} for page, rect in marks: idx = page_index.get(id(page)) if idx is not None: by_page.setdefault(idx, []).append(rect) for idx, rects in by_page.items(): page = writer.pages[idx] pw = float(page.mediabox.width) ph = float(page.mediabox.height) buf = io.BytesIO() c = _canvas.Canvas(buf, pagesize=(pw, ph)) c.setLineWidth(1.3) for (x0, y0, x1, y1) in rects: # Draw an "X" that fills most of the box with a small inset. inset = max(1.2, (x1 - x0) * 0.18) c.line(x0 + inset, y0 + inset, x1 - inset, y1 - inset) c.line(x0 + inset, y1 - inset, x1 - inset, y0 + inset) c.save() buf.seek(0) overlay = _PdfReader(buf).pages[0] page.merge_page(overlay) def fill_mcs150(intake: dict, order_number: str = "") -> str: """Fill the official MCS-150 PDF form. Args: intake: Dict with all MCS-150 fields from intake form. order_number: Order number for filename. Returns: Path to the filled PDF. """ if PdfReader is None: raise ImportError("pypdf not installed") form_type = determine_form_type(intake) form_path = FORMS[form_type] if not form_path.exists(): raise FileNotFoundError(f"MCS-150 form not found: {form_path}") reader = PdfReader(str(form_path)) writer = PdfWriter() writer.clone_document_from_reader(reader) # Build field values field_updates = {} # ── Text fields ────────────────────────────────────────────────── # The 150C form renumbers a few identification fields (D&B/EIN/email shift # up by one because it has no MC/MX line). Pick the right field names per # variant so the values land in the correct boxes. if form_type == "mcs150c": ein_field, email_field, dunbrad_field = "18irsNumber", "19eMail", "17dunbradNumber" else: ein_field, email_field, dunbrad_field = "19irsNumber", "20eMail", "18dunbradNumber" field_updates["1bizName"] = intake.get("legal_name", "") field_updates["2dbaName"] = intake.get("dba_name", "") field_updates["3principalStreet"] = intake.get("address_street", "") field_updates["4principalCity"] = intake.get("address_city", "") field_updates["5principalState"] = intake.get("address_state", "") field_updates["6principalZip"] = intake.get("address_zip", "") field_updates["13bizPhone"] = intake.get("phone", "") field_updates["14cellPhone"] = intake.get("cell_phone", "") field_updates["15faxNumber"] = intake.get("fax", "") field_updates["16usdotNumber"] = intake.get("dot_number", "") field_updates["usdotNumber"] = intake.get("dot_number", "") # duplicate field field_updates["17mcmxNumber"] = intake.get("mc_number", "") field_updates[dunbrad_field] = intake.get("dun_bradstreet", "") field_updates[ein_field] = intake.get("ein", "") field_updates[email_field] = intake.get("email", "") field_updates["21carrierMileage"] = str(intake.get("annual_miles", "") or "") # Mailing address (if different from principal place of business) if intake.get("mailing_street"): field_updates["8mailStreet"] = intake.get("mailing_street", "") field_updates["9mailCity"] = intake.get("mailing_city", "") field_updates["10mailState"] = intake.get("mailing_state", "") field_updates["11mailZip"] = intake.get("mailing_zip", "") # Fleet / drivers (base + 150B) if intake.get("drivers") not in (None, ""): field_updates["totalDrivers"] = str(intake.get("drivers")) field_updates["totalCDL"] = str(intake.get("cdl_drivers", intake.get("drivers"))) # Vehicle counts (Q26). Accept a structured `vehicles` dict of # {vehicle_type: {owned, term_leased, trip_leased}} for full fleets; fall # back to a single power-unit count on the primary vehicle type. vehicles = intake.get("vehicles") or {} if isinstance(vehicles, dict) and vehicles: for vtype, counts in vehicles.items(): prefix = VEHICLE_TYPE_PREFIX.get(vtype) if not prefix or not isinstance(counts, dict): continue for col, suffix in (("owned", "Own"), ("term_leased", "Term"), ("trip_leased", "Trip")): val = counts.get(col) if val not in (None, "", 0, "0"): field_updates[f"{prefix}{suffix}"] = str(val) else: power_units = intake.get("power_units", "") prefix = VEHICLE_TYPE_PREFIX.get( intake.get("primary_vehicle_type", "straight"), "straight") if power_units not in (None, ""): field_updates[f"{prefix}Own"] = str(power_units) # Simple intake also collects a flat trailer count (no owned/leased # breakdown). Default trailers to the Owned column of the trailer row. trailers = intake.get("trailers", "") if trailers not in (None, "", 0, "0"): field_updates["trailerOwn"] = str(trailers) # Non-CMV count, if provided. if intake.get("non_cmv_vehicles") not in (None, ""): field_updates["non-CMV"] = str(intake.get("non_cmv_vehicles")) # Intermodal equipment counts (150C only). if form_type == "mcs150c": for key, fld in (("iep_owned", "20owned"), ("iep_leased", "20leased"), ("iep_serviced", "20serviced")): if intake.get(key) not in (None, ""): field_updates[fld] = str(intake.get(key)) # HMSP accident count (150B only). if form_type == "mcs150b" and intake.get("hmsp_accident_count") not in (None, ""): field_updates["32accidentNumber"] = str(intake.get("hmsp_accident_count")) # Officers (up to two named on the form) field_updates["officerName1"] = intake.get("signer_name", "") field_updates["officerTitle1"] = intake.get("signer_title", "") field_updates["officerName2"] = intake.get("officer2_name", "") field_updates["officerTitle2"] = intake.get("officer2_title", "") # Certification field_updates["certifyName"] = intake.get("signer_name", "") field_updates["certifyTitle"] = intake.get("signer_title", "") field_updates["certifyDate"] = datetime.now().strftime("%m/%d/%Y") # Interstate/intrastate mileage interstate = intake.get("interstate_intrastate", "") if interstate == "interstate": field_updates["interWithin"] = str(intake.get("annual_miles", "")) elif interstate in ("intrastate_hazmat", "intrastate_non_hazmat"): field_updates["intraWithin"] = str(intake.get("annual_miles", "")) # ── Checkbox fields (on/off, value /Yes) ───────────────────────── checkbox_on = {} # Q22 Company operations (interstate / intrastate classification) company_op = intake.get("interstate_intrastate", "") if company_op in COMPANY_OPERATION_MAP: checkbox_on[COMPANY_OPERATION_MAP[company_op]] = True # Q23 Operation classifications (for-hire / private / government / etc.) carrier_op = intake.get("carrier_operation", "") if carrier_op in CARRIER_OP_MAP: checkbox_on[CARRIER_OP_MAP[carrier_op]] = True # Q24 Cargo types for cargo in intake.get("cargo_types", []): if cargo in CARGO_TYPE_MAP: checkbox_on[CARGO_TYPE_MAP[cargo]] = True # Q25 Hazardous materials matrix. `hazmat_materials` is a list of # {commodity, roles:[carrier|shipper|bulk|non_bulk]} (or a dict # {commodity: [roles]}). Each commodity row x role column = one box. hazmat_materials = intake.get("hazmat_materials") or {} if isinstance(hazmat_materials, list): hazmat_materials = {m.get("commodity"): m.get("roles", []) for m in hazmat_materials if isinstance(m, dict)} for commodity, roles in (hazmat_materials or {}).items(): row = HAZMAT_ROW_MAP.get(commodity) if not row: continue for role in roles or []: col = HAZMAT_COL_MAP.get(role) if col: checkbox_on[f"25{row}{col}Box"] = True # MCS-150B: states of operation (full names or 2-letter codes). if form_type == "mcs150b": for st in intake.get("operating_states", []) or []: name = US_STATE_BY_CODE.get(str(st).upper(), st) if name in US_STATES: checkbox_on[name] = True # Q29 Passenger Carrier Compliance Certification "YES" box. Only motor # passenger carriers certify here -- leave it unchecked for freight/property # carriers. (The Q31 perjury declaration is made via the signature, not a # checkbox.) is_passenger = ( intake.get("is_passenger_carrier") == "yes" or carrier_op in ("private_passengers_business", "private_passengers_non_business", "private_passengers") ) if is_passenger: checkbox_on["certifyBox"] = True # ── Radio-button groups ────────────────────────────────────────── # These are single-select radios. _set_button resolves the semantic value # against each field's actual export states, so it works whether the form # uses numeric (/0../4) exports (base/150B) or named exports (150C). radio_values = {} # Reason for filing. Semantic -> {numeric index, named export} so it maps on # both export styles. reason = intake.get("reason_for_filing", "biennial_update") reason_choices = { "new_application": ("0", "New Application"), "biennial_update": ("1", "Biennial Update or Changes"), "out_of_business": ("2", "Out of Business Notification"), "reapplication": ("3", "Reapplication"), "reactivate": ("4", "Reactivate"), } radio_values["Reason Button"] = reason_choices.get(reason, reason_choices["biennial_update"]) # Mailing address: same-as-principal vs separate mailing address below. if intake.get("mailing_street"): radio_values["Mailing Button"] = ("1", "Please enter mailing address below.") else: radio_values["Mailing Button"] = ("0", "Same as Principal Address") # Q28 Is USDOT registration currently revoked? Options render Yes then No. revoked = intake.get("usdot_revoked") == "yes" radio_values["Revoke Button"] = ("0", "Yes") if revoked else ("1", "No") # 150C asks the same as a USDOT Button (named Yes/No). radio_values["USDOT Button"] = ("Yes",) if revoked else ("No",) # MCS-150B HMSP questions (Hazard/Permit/Security): default No (index 1). if form_type == "mcs150b": radio_values["Hazard Button"] = ("0",) if intake.get("hmsp_hazard") == "yes" else ("1",) radio_values["Permit Button"] = ("0",) if intake.get("hmsp_has_permit") == "yes" else ("1",) radio_values["Security Button"] = ("0",) if intake.get("hmsp_security_plan") == "yes" else ("1",) # ── Apply fields to PDF ────────────────────────────────────────── # Apply text-field values to every page. The template ships with only the # fillable FORM pages (the FMCSA instruction pages are trimmed off the # source PDF so we never fax/submit them), so we update across all pages # (pypdf silently ignores field names not present on a given page). auto_regenerate=True makes pypdf # build appearance streams from the values, so viewers that ignore # /NeedAppearances (Preview, Chrome) still render the text. text_values = {k: v for k, v in field_updates.items() if v} for page in writer.pages: try: writer.update_page_form_field_values( page, text_values, auto_regenerate=True, ) except Exception as exc: LOG.debug("Form field apply on page failed: %s", exc) # Apply checkbox fields (value /Yes) and radio groups (value /0../n) by # walking the widget annotations. For radios, the selected option is the # kid whose appearance state (/AP /N) contains the chosen index; we set the # parent /V and each kid's /AS so the correct circle renders filled. # ``marks`` collects (page, rect) of every "on" widget so we can stamp an # explicit checkmark overlay (AcroForm appearances render inconsistently). marks: list = [] def _set_button(field_name: str, candidates) -> None: """Set a /Btn field (checkbox or radio) to the first of ``candidates`` that matches one of the field's actual export states. ``candidates`` may be a single string or a tuple of strings (e.g. ('1', 'No') to cover both numeric and named export styles). Handles flat checkboxes and parent/kid radio groups.""" if isinstance(candidates, str): candidates = (candidates,) wanted = ["/" + str(c).lstrip("/") for c in candidates] # Collect every widget annotation belonging to this field, plus the # union of all of its export states, so we can resolve which candidate # to use. widgets = [] all_states: set = set() for page in writer.pages: for annot in page.get("/Annots", []) or []: obj = annot.get_object() name = obj.get("/T") parent = obj.get("/Parent") pobj = parent.get_object() if parent else None if name is None and pobj is not None: name = pobj.get("/T") if name is None or str(name) != field_name: continue states = [] ap = obj.get("/AP") if ap: n = ap.get_object().get("/N") if n is not None: states = [str(k) for k in n.get_object().keys() if str(k) != "/Off"] all_states.update(states) widgets.append((page, obj, pobj, states)) if not widgets: return # Choose the candidate that this field actually supports. target = next((w for w in wanted if w in all_states), None) if target is None: target = "/Yes" if "/Yes" in all_states else (wanted[0] if wanted else "/Yes") for page, obj, pobj, states in widgets: field_obj = pobj if pobj is not None else obj field_obj[NameObject("/V")] = NameObject(target) if target in states or (not states and target == "/Yes"): obj[NameObject("/AS")] = NameObject(target) # Record the on-widget rectangle + page so we can draw the mark # directly onto the page content. AcroForm /AP appearances are # unreliable across viewers (poppler/Preview regenerate them and # lose the ZapfDingbats checkmark), so we stamp our own mark. rect = obj.get("/Rect") if rect is not None: marks.append((page, [float(x) for x in rect])) else: obj[NameObject("/AS")] = NameObject("/Off") for field_name, checked in checkbox_on.items(): if checked: try: _set_button(field_name, "Yes") except Exception as e: LOG.debug("Checkbox %s set failed: %s", field_name, e) for field_name, value in radio_values.items(): try: _set_button(field_name, value) except Exception as e: LOG.debug("Radio %s set failed: %s", field_name, e) # Stamp an explicit checkmark onto every "on" checkbox/radio. AcroForm /AP # appearances for these boxes use a ZapfDingbats glyph that poppler/Preview # fail to render reliably (the value is set but the box looks empty). Drawing # the mark directly into the page content guarantees it shows in every # viewer and in the faxed/printed output. _stamp_check_marks(writer, marks) # Force viewers to (re)generate field appearance streams from the values we # set. Without /NeedAppearances, pypdf leaves the template's blank /AP streams # in place, so the typed values are present in /V but the viewer renders the # empty widget on top -- the data looks missing / "covered up by the form # field". Setting NeedAppearances=true on the AcroForm fixes the rendering. try: catalog = writer._root_object if "/AcroForm" in catalog: acro = catalog["/AcroForm"] acro_obj = acro.get_object() if hasattr(acro, "get_object") else acro acro_obj[NameObject("/NeedAppearances")] = BooleanObject(True) except Exception as exc: LOG.warning("Could not set NeedAppearances: %s", exc) # Save work_dir = tempfile.mkdtemp(prefix="pw_mcs150_") dot = intake.get("dot_number", "unknown") date_str = datetime.now().strftime("%Y%m%d") filename = f"MCS150_DOT{dot}_{date_str}_filled.pdf" filepath = os.path.join(work_dir, filename) with open(filepath, "wb") as f: writer.write(f) LOG.info("Filled MCS-150 (%s) → %s", form_type, filepath) return filepath if __name__ == "__main__": test_intake = { "legal_name": "ADAMS LUMBER INC", "dba_name": "Adams Trucking", "dot_number": "1157913", "mc_number": "MC-456789", "address_street": "123 Timber Lane", "address_city": "Portland", "address_state": "OR", "address_zip": "97201", "phone": "(503) 555-1234", "email": "mark@adamslumber.com", "entity_type": "corporation", "carrier_operation": "authorized_for_hire", "interstate_intrastate": "interstate", "hazmat": "no", "power_units": "5", "drivers": "6", "annual_miles": "250000", "cargo_types": ["general", "building_materials", "logs"], "signer_name": "Mark Adams", "signer_title": "President", } path = fill_mcs150(test_intake, order_number="CO-TEST123") print(f"Generated: {path}") print(f"Size: {os.path.getsize(path)} bytes")