Fixes a batch of missing fields the FMCSA census does not provide and the filler was mis-mapping: - Corrected the question->field mapping to match the actual form: Q22 = COMPANY OPERATIONS (interstate/intrastate, 22xBox), Q23 = OPERATION CLASSIFICATIONS (for-hire/private/govt, 23xBox). These were swapped, and the bogus entity-type->23xBox map (no entity-type question exists on this form revision) was removed. - Added proper radio-group handling for Reason for Filing (Biennial Update), Mailing-address (Same as principal vs below), and Q28 USDOT-revoked, with correct option indices (these are /0../n radios, not /Yes checkboxes; the old code set them to /Yes and never selected the right option). - Map interstate/intrastate from the FMCSA census carrierOperationCode, and populate email/phone/mileage/cargo from intake. - AcroForm checkbox/radio appearances use a ZapfDingbats glyph that poppler/Preview fail to render (value set but box looks empty). Now stamp an explicit X overlay into the page content for every 'on' box so it shows in every viewer and in the faxed output.
431 lines
18 KiB
Python
431 lines
18 KiB
Python
"""MCS-150 Official PDF Form Filler.
|
|
|
|
Fills the official FMCSA MCS-150/150B/150C fillable PDF forms using
|
|
intake data from the order. Produces a ready-to-fax or electronically
|
|
submit PDF.
|
|
|
|
Forms stored at (FORM pages only -- the FMCSA instruction/example pages are
|
|
trimmed off the source PDFs so the filled output is fax/submit-ready and never
|
|
includes the instruction pages):
|
|
docs/MCS-150 Form.pdf — standard (3 pages, 289 fields)
|
|
docs/MCS-150B Form.pdf — hazmat safety permit (4 pages, 349 fields)
|
|
docs/MCS-150C Form.pdf — intermodal equipment (2 pages, 33 fields)
|
|
|
|
Usage:
|
|
from scripts.document_gen.templates.mcs150_pdf_filler import fill_mcs150
|
|
pdf_path = fill_mcs150(intake_data, order_number="CO-12345")
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import os
|
|
import tempfile
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from copy import copy
|
|
|
|
LOG = logging.getLogger("document_gen.mcs150_pdf_filler")
|
|
|
|
try:
|
|
from pypdf import PdfReader, PdfWriter
|
|
from pypdf.generic import NameObject, BooleanObject, TextStringObject
|
|
except ImportError:
|
|
LOG.warning("pypdf not installed — MCS-150 PDF filling unavailable")
|
|
PdfReader = None
|
|
|
|
# Path to the official forms
|
|
DOCS_DIR = Path(__file__).resolve().parent.parent.parent.parent / "docs"
|
|
FORMS = {
|
|
"mcs150": DOCS_DIR / "MCS-150 Form.pdf",
|
|
"mcs150b": DOCS_DIR / "MCS-150B Form.pdf",
|
|
"mcs150c": DOCS_DIR / "MCS-150C Form.pdf",
|
|
}
|
|
|
|
# ── Field mappings ────────────────────────────────────────────────────
|
|
|
|
# Question 22: COMPANY OPERATIONS (interstate / intrastate classification).
|
|
# A=Interstate Carrier, B=Intrastate Hazmat Carrier, C=Intrastate Non-Hazmat
|
|
# Carrier, D=Interstate Hazmat Shipper, E=Intrastate Hazmat Shipper.
|
|
COMPANY_OPERATION_MAP = {
|
|
"interstate": "22aBox",
|
|
"intrastate_hazmat": "22bBox",
|
|
"intrastate_non_hazmat": "22cBox",
|
|
}
|
|
|
|
# Question 23: OPERATION CLASSIFICATIONS (how the carrier operates).
|
|
# A=Authorized For-Hire, B=Exempt For-Hire, C=Private Property,
|
|
# D=Private Passengers (Business), E=Private Passengers (Non-Business),
|
|
# F=Migrant, G=U.S. Mail, H=Federal Govt, I=State Govt, J=Local Govt,
|
|
# K=Indian Tribe.
|
|
CARRIER_OP_MAP = {
|
|
"authorized_for_hire": "23aBox",
|
|
"exempt_for_hire": "23bBox",
|
|
"private_property": "23cBox",
|
|
"private_passengers_business": "23dBox",
|
|
"private_passengers_non_business": "23eBox",
|
|
"migrant": "23fBox",
|
|
"us_mail": "23gBox",
|
|
"federal_government": "23hBox",
|
|
"state_government": "23iBox",
|
|
"local_government": "23jBox",
|
|
"indian_tribe": "23kBox",
|
|
}
|
|
|
|
# (legacy synonyms kept so older intake values still map sensibly)
|
|
CARRIER_OP_MAP.setdefault("private_passengers", "23dBox")
|
|
|
|
# Question 24: Cargo Types (checkboxes — a through z, aa through dd)
|
|
CARGO_TYPE_MAP = {
|
|
"general": "24aBox",
|
|
"household": "24bBox",
|
|
"metal": "24cBox",
|
|
"motor_vehicles": "24dBox",
|
|
"drivetow": "24eBox",
|
|
"logs": "24fBox",
|
|
"building_materials": "24gBox",
|
|
"mobile_homes": "24hBox",
|
|
"machinery": "24iBox",
|
|
"fresh_produce": "24jBox",
|
|
"liquids": "24kBox",
|
|
"intermodal": "24lBox",
|
|
"passengers": "24mBox",
|
|
"oilfield": "24nBox",
|
|
"livestock": "24oBox",
|
|
"grain": "24pBox",
|
|
"coal": "24qBox",
|
|
"meat": "24rBox",
|
|
"garbage": "24sBox",
|
|
"chemicals": "24tBox",
|
|
"commodities_dry": "24uBox",
|
|
"refrigerated": "24vBox",
|
|
"beverages": "24wBox",
|
|
"paper": "24xBox",
|
|
"utilities": "24yBox",
|
|
"farm_supplies": "24zBox",
|
|
"construction": "24aaBox",
|
|
"water_well": "24bbBox",
|
|
"other": "24ccBox",
|
|
}
|
|
|
|
|
|
def determine_form_type(intake: dict) -> str:
|
|
"""Determine which MCS-150 form to use.
|
|
|
|
Returns 'mcs150', 'mcs150b', or 'mcs150c'.
|
|
"""
|
|
if intake.get("is_intermodal_equipment_provider"):
|
|
return "mcs150c"
|
|
if intake.get("hazmat") == "yes" and intake.get("needs_hmsp"):
|
|
return "mcs150b"
|
|
return "mcs150"
|
|
|
|
|
|
def _stamp_check_marks(writer, marks: list) -> None:
|
|
"""Draw an explicit "X" mark inside each (page, rect) in ``marks``.
|
|
|
|
AcroForm checkbox/radio appearances render inconsistently across viewers
|
|
(poppler, Preview), so we burn the mark into the page content to guarantee
|
|
it shows on screen and in the faxed/printed output. Grouped per page so we
|
|
merge a single overlay onto each affected page.
|
|
"""
|
|
if not marks:
|
|
return
|
|
try:
|
|
import io
|
|
from reportlab.pdfgen import canvas as _canvas
|
|
from pypdf import PdfReader as _PdfReader
|
|
except Exception as exc: # reportlab missing — fall back to AcroForm only
|
|
LOG.warning("Checkmark overlay unavailable (%s); relying on AcroForm", exc)
|
|
return
|
|
|
|
# Group rects by their page object's index in the writer.
|
|
page_index = {id(p): i for i, p in enumerate(writer.pages)}
|
|
by_page: dict[int, list] = {}
|
|
for page, rect in marks:
|
|
idx = page_index.get(id(page))
|
|
if idx is not None:
|
|
by_page.setdefault(idx, []).append(rect)
|
|
|
|
for idx, rects in by_page.items():
|
|
page = writer.pages[idx]
|
|
pw = float(page.mediabox.width)
|
|
ph = float(page.mediabox.height)
|
|
buf = io.BytesIO()
|
|
c = _canvas.Canvas(buf, pagesize=(pw, ph))
|
|
c.setLineWidth(1.3)
|
|
for (x0, y0, x1, y1) in rects:
|
|
# Draw an "X" that fills most of the box with a small inset.
|
|
inset = max(1.2, (x1 - x0) * 0.18)
|
|
c.line(x0 + inset, y0 + inset, x1 - inset, y1 - inset)
|
|
c.line(x0 + inset, y1 - inset, x1 - inset, y0 + inset)
|
|
c.save()
|
|
buf.seek(0)
|
|
overlay = _PdfReader(buf).pages[0]
|
|
page.merge_page(overlay)
|
|
|
|
|
|
def fill_mcs150(intake: dict, order_number: str = "") -> str:
|
|
"""Fill the official MCS-150 PDF form.
|
|
|
|
Args:
|
|
intake: Dict with all MCS-150 fields from intake form.
|
|
order_number: Order number for filename.
|
|
|
|
Returns:
|
|
Path to the filled PDF.
|
|
"""
|
|
if PdfReader is None:
|
|
raise ImportError("pypdf not installed")
|
|
|
|
form_type = determine_form_type(intake)
|
|
form_path = FORMS[form_type]
|
|
|
|
if not form_path.exists():
|
|
raise FileNotFoundError(f"MCS-150 form not found: {form_path}")
|
|
|
|
reader = PdfReader(str(form_path))
|
|
writer = PdfWriter()
|
|
writer.clone_document_from_reader(reader)
|
|
|
|
# Build field values
|
|
field_updates = {}
|
|
|
|
# ── Text fields ──────────────────────────────────────────────────
|
|
field_updates["1bizName"] = intake.get("legal_name", "")
|
|
field_updates["2dbaName"] = intake.get("dba_name", "")
|
|
field_updates["3principalStreet"] = intake.get("address_street", "")
|
|
field_updates["4principalCity"] = intake.get("address_city", "")
|
|
field_updates["5principalState"] = intake.get("address_state", "")
|
|
field_updates["6principalZip"] = intake.get("address_zip", "")
|
|
field_updates["13bizPhone"] = intake.get("phone", "")
|
|
field_updates["14cellPhone"] = intake.get("cell_phone", "")
|
|
field_updates["15faxNumber"] = intake.get("fax", "")
|
|
field_updates["16usdotNumber"] = intake.get("dot_number", "")
|
|
field_updates["usdotNumber"] = intake.get("dot_number", "") # duplicate field
|
|
field_updates["17mcmxNumber"] = intake.get("mc_number", "")
|
|
field_updates["19irsNumber"] = intake.get("ein", "")
|
|
field_updates["20eMail"] = intake.get("email", "")
|
|
field_updates["21carrierMileage"] = str(intake.get("annual_miles", ""))
|
|
|
|
# Mailing address (if different)
|
|
if intake.get("mailing_street"):
|
|
field_updates["8mailStreet"] = intake.get("mailing_street", "")
|
|
field_updates["9mailCity"] = intake.get("mailing_city", "")
|
|
field_updates["10mailState"] = intake.get("mailing_state", "")
|
|
field_updates["11mailZip"] = intake.get("mailing_zip", "")
|
|
|
|
# Fleet/drivers
|
|
field_updates["totalDrivers"] = str(intake.get("drivers", ""))
|
|
field_updates["totalCDL"] = str(intake.get("cdl_drivers", intake.get("drivers", "")))
|
|
|
|
# Vehicle counts — straight trucks and tractors are most common
|
|
power_units = intake.get("power_units", "")
|
|
vehicle_type = intake.get("primary_vehicle_type", "straight")
|
|
if vehicle_type == "tractor":
|
|
field_updates["tractorOwn"] = str(power_units)
|
|
else:
|
|
field_updates["straightOwn"] = str(power_units)
|
|
|
|
# Officers
|
|
field_updates["officerName1"] = intake.get("signer_name", "")
|
|
field_updates["officerTitle1"] = intake.get("signer_title", "")
|
|
|
|
# Certification
|
|
field_updates["certifyName"] = intake.get("signer_name", "")
|
|
field_updates["certifyTitle"] = intake.get("signer_title", "")
|
|
field_updates["certifyDate"] = datetime.now().strftime("%m/%d/%Y")
|
|
|
|
# Interstate/intrastate mileage
|
|
interstate = intake.get("interstate_intrastate", "")
|
|
if interstate == "interstate":
|
|
field_updates["interWithin"] = str(intake.get("annual_miles", ""))
|
|
elif interstate in ("intrastate_hazmat", "intrastate_non_hazmat"):
|
|
field_updates["intraWithin"] = str(intake.get("annual_miles", ""))
|
|
|
|
# ── Checkbox fields (on/off, value /Yes) ─────────────────────────
|
|
checkbox_on = {}
|
|
|
|
# Q22 Company operations (interstate / intrastate classification)
|
|
company_op = intake.get("interstate_intrastate", "")
|
|
if company_op in COMPANY_OPERATION_MAP:
|
|
checkbox_on[COMPANY_OPERATION_MAP[company_op]] = True
|
|
|
|
# Q23 Operation classifications (for-hire / private / government / etc.)
|
|
carrier_op = intake.get("carrier_operation", "")
|
|
if carrier_op in CARRIER_OP_MAP:
|
|
checkbox_on[CARRIER_OP_MAP[carrier_op]] = True
|
|
|
|
# Q24 Cargo types
|
|
for cargo in intake.get("cargo_types", []):
|
|
if cargo in CARGO_TYPE_MAP:
|
|
checkbox_on[CARGO_TYPE_MAP[cargo]] = True
|
|
|
|
# The bottom certification ("I ... certify ...") box is always checked --
|
|
# the client signs the perjury certification.
|
|
checkbox_on["certifyBox"] = True
|
|
|
|
# ── Radio-button groups (value /0../4, not /Yes) ─────────────────
|
|
# These are single-select radio fields; the selected option index is the
|
|
# field value.
|
|
radio_values = {}
|
|
|
|
# Reason for filing (form REASON FOR FILING). 0=New Application,
|
|
# 1=Biennial Update or Changes, 2=Out of Business, 3=Reapplication,
|
|
# 4=Reactivate. Map the service slug / explicit reason to the index.
|
|
reason_map = {
|
|
"new_application": "0",
|
|
"biennial_update": "1",
|
|
"out_of_business": "2",
|
|
"reapplication": "3",
|
|
"reactivate": "4",
|
|
}
|
|
reason = intake.get("reason_for_filing", "biennial_update")
|
|
radio_values["Reason Button"] = reason_map.get(reason, "1")
|
|
|
|
# Mailing address: 0 = same as principal place of business, 1 = different
|
|
# address provided below.
|
|
radio_values["Mailing Button"] = "1" if intake.get("mailing_street") else "0"
|
|
|
|
# Q28 Is USDOT registration currently revoked? On this form the options
|
|
# render Yes (index 0) then No (index 1), so No = "1".
|
|
radio_values["Revoke Button"] = "0" if intake.get("usdot_revoked") == "yes" else "1"
|
|
|
|
# ── Apply fields to PDF ──────────────────────────────────────────
|
|
# Apply text-field values to every page. The template ships with only the
|
|
# fillable FORM pages (the FMCSA instruction pages are trimmed off the
|
|
# source PDF so we never fax/submit them), so we update across all pages
|
|
# (pypdf silently ignores field names not present on a given page). auto_regenerate=True makes pypdf
|
|
# build appearance streams from the values, so viewers that ignore
|
|
# /NeedAppearances (Preview, Chrome) still render the text.
|
|
text_values = {k: v for k, v in field_updates.items() if v}
|
|
for page in writer.pages:
|
|
try:
|
|
writer.update_page_form_field_values(
|
|
page, text_values, auto_regenerate=True,
|
|
)
|
|
except Exception as exc:
|
|
LOG.debug("Form field apply on page failed: %s", exc)
|
|
|
|
# Apply checkbox fields (value /Yes) and radio groups (value /0../n) by
|
|
# walking the widget annotations. For radios, the selected option is the
|
|
# kid whose appearance state (/AP /N) contains the chosen index; we set the
|
|
# parent /V and each kid's /AS so the correct circle renders filled.
|
|
# ``marks`` collects (page, rect) of every "on" widget so we can stamp an
|
|
# explicit checkmark overlay (AcroForm appearances render inconsistently).
|
|
marks: list = []
|
|
|
|
def _set_button(field_name: str, on_value: str) -> None:
|
|
"""Set a /Btn field (checkbox or radio) to ``on_value`` (e.g. 'Yes' or
|
|
'1'). Handles both flat widgets and parent/kid radio groups."""
|
|
target = "/" + on_value.lstrip("/")
|
|
for page in writer.pages:
|
|
for annot in page.get("/Annots", []) or []:
|
|
obj = annot.get_object()
|
|
# Resolve the field name from this widget or its parent.
|
|
name = obj.get("/T")
|
|
parent = obj.get("/Parent")
|
|
pobj = parent.get_object() if parent else None
|
|
if name is None and pobj is not None:
|
|
name = pobj.get("/T")
|
|
if name is None or str(name) != field_name:
|
|
continue
|
|
# Determine the appearance state this widget represents.
|
|
ap = obj.get("/AP")
|
|
on_states = []
|
|
if ap:
|
|
n = ap.get_object().get("/N")
|
|
if n is not None:
|
|
on_states = [str(k) for k in n.get_object().keys() if str(k) != "/Off"]
|
|
# Set the field value on the field object (parent for radios).
|
|
field_obj = pobj if pobj is not None else obj
|
|
field_obj[NameObject("/V")] = NameObject(target)
|
|
# The widget is "on" only if its own appearance state matches.
|
|
if target in on_states or (not on_states and target == "/Yes"):
|
|
obj[NameObject("/AS")] = NameObject(target)
|
|
# Record the on-widget rectangle + page so we can draw the
|
|
# mark directly onto the page content (see overlay below).
|
|
# AcroForm /AP appearances are unreliable across viewers
|
|
# (poppler/Preview regenerate them and lose the ZapfDingbats
|
|
# checkmark), so we stamp our own mark to guarantee it shows.
|
|
rect = obj.get("/Rect")
|
|
if rect is not None:
|
|
marks.append((page, [float(x) for x in rect]))
|
|
else:
|
|
obj[NameObject("/AS")] = NameObject("/Off")
|
|
|
|
for field_name, checked in checkbox_on.items():
|
|
if checked:
|
|
try:
|
|
_set_button(field_name, "Yes")
|
|
except Exception as e:
|
|
LOG.debug("Checkbox %s set failed: %s", field_name, e)
|
|
|
|
for field_name, value in radio_values.items():
|
|
try:
|
|
_set_button(field_name, value)
|
|
except Exception as e:
|
|
LOG.debug("Radio %s set failed: %s", field_name, e)
|
|
|
|
# Stamp an explicit checkmark onto every "on" checkbox/radio. AcroForm /AP
|
|
# appearances for these boxes use a ZapfDingbats glyph that poppler/Preview
|
|
# fail to render reliably (the value is set but the box looks empty). Drawing
|
|
# the mark directly into the page content guarantees it shows in every
|
|
# viewer and in the faxed/printed output.
|
|
_stamp_check_marks(writer, marks)
|
|
|
|
# Force viewers to (re)generate field appearance streams from the values we
|
|
# set. Without /NeedAppearances, pypdf leaves the template's blank /AP streams
|
|
# in place, so the typed values are present in /V but the viewer renders the
|
|
# empty widget on top -- the data looks missing / "covered up by the form
|
|
# field". Setting NeedAppearances=true on the AcroForm fixes the rendering.
|
|
try:
|
|
catalog = writer._root_object
|
|
if "/AcroForm" in catalog:
|
|
acro = catalog["/AcroForm"]
|
|
acro_obj = acro.get_object() if hasattr(acro, "get_object") else acro
|
|
acro_obj[NameObject("/NeedAppearances")] = BooleanObject(True)
|
|
except Exception as exc:
|
|
LOG.warning("Could not set NeedAppearances: %s", exc)
|
|
|
|
# Save
|
|
work_dir = tempfile.mkdtemp(prefix="pw_mcs150_")
|
|
dot = intake.get("dot_number", "unknown")
|
|
date_str = datetime.now().strftime("%Y%m%d")
|
|
filename = f"MCS150_DOT{dot}_{date_str}_filled.pdf"
|
|
filepath = os.path.join(work_dir, filename)
|
|
|
|
with open(filepath, "wb") as f:
|
|
writer.write(f)
|
|
|
|
LOG.info("Filled MCS-150 (%s) → %s", form_type, filepath)
|
|
return filepath
|
|
|
|
|
|
if __name__ == "__main__":
|
|
test_intake = {
|
|
"legal_name": "ADAMS LUMBER INC",
|
|
"dba_name": "Adams Trucking",
|
|
"dot_number": "1157913",
|
|
"mc_number": "MC-456789",
|
|
"address_street": "123 Timber Lane",
|
|
"address_city": "Portland",
|
|
"address_state": "OR",
|
|
"address_zip": "97201",
|
|
"phone": "(503) 555-1234",
|
|
"email": "mark@adamslumber.com",
|
|
"entity_type": "corporation",
|
|
"carrier_operation": "authorized_for_hire",
|
|
"interstate_intrastate": "interstate",
|
|
"hazmat": "no",
|
|
"power_units": "5",
|
|
"drivers": "6",
|
|
"annual_miles": "250000",
|
|
"cargo_types": ["general", "building_materials", "logs"],
|
|
"signer_name": "Mark Adams",
|
|
"signer_title": "President",
|
|
}
|
|
|
|
path = fill_mcs150(test_intake, order_number="CO-TEST123")
|
|
print(f"Generated: {path}")
|
|
print(f"Size: {os.path.getsize(path)} bytes")
|