certifyBox is the Q29 Passenger Carrier Compliance Certification YES box (page 3, y=530), not a general perjury checkbox. It was being checked unconditionally, which wrongly marked freight/property carriers as passenger carriers. Now only check it when the carrier is a passenger carrier; the Q31 perjury declaration is made via the signature.
439 lines
18 KiB
Python
439 lines
18 KiB
Python
"""MCS-150 Official PDF Form Filler.
|
|
|
|
Fills the official FMCSA MCS-150/150B/150C fillable PDF forms using
|
|
intake data from the order. Produces a ready-to-fax or electronically
|
|
submit PDF.
|
|
|
|
Forms stored at (FORM pages only -- the FMCSA instruction/example pages are
|
|
trimmed off the source PDFs so the filled output is fax/submit-ready and never
|
|
includes the instruction pages):
|
|
docs/MCS-150 Form.pdf — standard (3 pages, 289 fields)
|
|
docs/MCS-150B Form.pdf — hazmat safety permit (4 pages, 349 fields)
|
|
docs/MCS-150C Form.pdf — intermodal equipment (2 pages, 33 fields)
|
|
|
|
Usage:
|
|
from scripts.document_gen.templates.mcs150_pdf_filler import fill_mcs150
|
|
pdf_path = fill_mcs150(intake_data, order_number="CO-12345")
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import os
|
|
import tempfile
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from copy import copy
|
|
|
|
LOG = logging.getLogger("document_gen.mcs150_pdf_filler")
|
|
|
|
try:
|
|
from pypdf import PdfReader, PdfWriter
|
|
from pypdf.generic import NameObject, BooleanObject, TextStringObject
|
|
except ImportError:
|
|
LOG.warning("pypdf not installed — MCS-150 PDF filling unavailable")
|
|
PdfReader = None
|
|
|
|
# Path to the official forms
|
|
DOCS_DIR = Path(__file__).resolve().parent.parent.parent.parent / "docs"
|
|
FORMS = {
|
|
"mcs150": DOCS_DIR / "MCS-150 Form.pdf",
|
|
"mcs150b": DOCS_DIR / "MCS-150B Form.pdf",
|
|
"mcs150c": DOCS_DIR / "MCS-150C Form.pdf",
|
|
}
|
|
|
|
# ── Field mappings ────────────────────────────────────────────────────
|
|
|
|
# Question 22: COMPANY OPERATIONS (interstate / intrastate classification).
|
|
# A=Interstate Carrier, B=Intrastate Hazmat Carrier, C=Intrastate Non-Hazmat
|
|
# Carrier, D=Interstate Hazmat Shipper, E=Intrastate Hazmat Shipper.
|
|
COMPANY_OPERATION_MAP = {
|
|
"interstate": "22aBox",
|
|
"intrastate_hazmat": "22bBox",
|
|
"intrastate_non_hazmat": "22cBox",
|
|
}
|
|
|
|
# Question 23: OPERATION CLASSIFICATIONS (how the carrier operates).
|
|
# A=Authorized For-Hire, B=Exempt For-Hire, C=Private Property,
|
|
# D=Private Passengers (Business), E=Private Passengers (Non-Business),
|
|
# F=Migrant, G=U.S. Mail, H=Federal Govt, I=State Govt, J=Local Govt,
|
|
# K=Indian Tribe.
|
|
CARRIER_OP_MAP = {
|
|
"authorized_for_hire": "23aBox",
|
|
"exempt_for_hire": "23bBox",
|
|
"private_property": "23cBox",
|
|
"private_passengers_business": "23dBox",
|
|
"private_passengers_non_business": "23eBox",
|
|
"migrant": "23fBox",
|
|
"us_mail": "23gBox",
|
|
"federal_government": "23hBox",
|
|
"state_government": "23iBox",
|
|
"local_government": "23jBox",
|
|
"indian_tribe": "23kBox",
|
|
}
|
|
|
|
# (legacy synonyms kept so older intake values still map sensibly)
|
|
CARRIER_OP_MAP.setdefault("private_passengers", "23dBox")
|
|
|
|
# Question 24: Cargo Types (checkboxes — a through z, aa through dd)
|
|
CARGO_TYPE_MAP = {
|
|
"general": "24aBox",
|
|
"household": "24bBox",
|
|
"metal": "24cBox",
|
|
"motor_vehicles": "24dBox",
|
|
"drivetow": "24eBox",
|
|
"logs": "24fBox",
|
|
"building_materials": "24gBox",
|
|
"mobile_homes": "24hBox",
|
|
"machinery": "24iBox",
|
|
"fresh_produce": "24jBox",
|
|
"liquids": "24kBox",
|
|
"intermodal": "24lBox",
|
|
"passengers": "24mBox",
|
|
"oilfield": "24nBox",
|
|
"livestock": "24oBox",
|
|
"grain": "24pBox",
|
|
"coal": "24qBox",
|
|
"meat": "24rBox",
|
|
"garbage": "24sBox",
|
|
"chemicals": "24tBox",
|
|
"commodities_dry": "24uBox",
|
|
"refrigerated": "24vBox",
|
|
"beverages": "24wBox",
|
|
"paper": "24xBox",
|
|
"utilities": "24yBox",
|
|
"farm_supplies": "24zBox",
|
|
"construction": "24aaBox",
|
|
"water_well": "24bbBox",
|
|
"other": "24ccBox",
|
|
}
|
|
|
|
|
|
def determine_form_type(intake: dict) -> str:
|
|
"""Determine which MCS-150 form to use.
|
|
|
|
Returns 'mcs150', 'mcs150b', or 'mcs150c'.
|
|
"""
|
|
if intake.get("is_intermodal_equipment_provider"):
|
|
return "mcs150c"
|
|
if intake.get("hazmat") == "yes" and intake.get("needs_hmsp"):
|
|
return "mcs150b"
|
|
return "mcs150"
|
|
|
|
|
|
def _stamp_check_marks(writer, marks: list) -> None:
|
|
"""Draw an explicit "X" mark inside each (page, rect) in ``marks``.
|
|
|
|
AcroForm checkbox/radio appearances render inconsistently across viewers
|
|
(poppler, Preview), so we burn the mark into the page content to guarantee
|
|
it shows on screen and in the faxed/printed output. Grouped per page so we
|
|
merge a single overlay onto each affected page.
|
|
"""
|
|
if not marks:
|
|
return
|
|
try:
|
|
import io
|
|
from reportlab.pdfgen import canvas as _canvas
|
|
from pypdf import PdfReader as _PdfReader
|
|
except Exception as exc: # reportlab missing — fall back to AcroForm only
|
|
LOG.warning("Checkmark overlay unavailable (%s); relying on AcroForm", exc)
|
|
return
|
|
|
|
# Group rects by their page object's index in the writer.
|
|
page_index = {id(p): i for i, p in enumerate(writer.pages)}
|
|
by_page: dict[int, list] = {}
|
|
for page, rect in marks:
|
|
idx = page_index.get(id(page))
|
|
if idx is not None:
|
|
by_page.setdefault(idx, []).append(rect)
|
|
|
|
for idx, rects in by_page.items():
|
|
page = writer.pages[idx]
|
|
pw = float(page.mediabox.width)
|
|
ph = float(page.mediabox.height)
|
|
buf = io.BytesIO()
|
|
c = _canvas.Canvas(buf, pagesize=(pw, ph))
|
|
c.setLineWidth(1.3)
|
|
for (x0, y0, x1, y1) in rects:
|
|
# Draw an "X" that fills most of the box with a small inset.
|
|
inset = max(1.2, (x1 - x0) * 0.18)
|
|
c.line(x0 + inset, y0 + inset, x1 - inset, y1 - inset)
|
|
c.line(x0 + inset, y1 - inset, x1 - inset, y0 + inset)
|
|
c.save()
|
|
buf.seek(0)
|
|
overlay = _PdfReader(buf).pages[0]
|
|
page.merge_page(overlay)
|
|
|
|
|
|
def fill_mcs150(intake: dict, order_number: str = "") -> str:
|
|
"""Fill the official MCS-150 PDF form.
|
|
|
|
Args:
|
|
intake: Dict with all MCS-150 fields from intake form.
|
|
order_number: Order number for filename.
|
|
|
|
Returns:
|
|
Path to the filled PDF.
|
|
"""
|
|
if PdfReader is None:
|
|
raise ImportError("pypdf not installed")
|
|
|
|
form_type = determine_form_type(intake)
|
|
form_path = FORMS[form_type]
|
|
|
|
if not form_path.exists():
|
|
raise FileNotFoundError(f"MCS-150 form not found: {form_path}")
|
|
|
|
reader = PdfReader(str(form_path))
|
|
writer = PdfWriter()
|
|
writer.clone_document_from_reader(reader)
|
|
|
|
# Build field values
|
|
field_updates = {}
|
|
|
|
# ── Text fields ──────────────────────────────────────────────────
|
|
field_updates["1bizName"] = intake.get("legal_name", "")
|
|
field_updates["2dbaName"] = intake.get("dba_name", "")
|
|
field_updates["3principalStreet"] = intake.get("address_street", "")
|
|
field_updates["4principalCity"] = intake.get("address_city", "")
|
|
field_updates["5principalState"] = intake.get("address_state", "")
|
|
field_updates["6principalZip"] = intake.get("address_zip", "")
|
|
field_updates["13bizPhone"] = intake.get("phone", "")
|
|
field_updates["14cellPhone"] = intake.get("cell_phone", "")
|
|
field_updates["15faxNumber"] = intake.get("fax", "")
|
|
field_updates["16usdotNumber"] = intake.get("dot_number", "")
|
|
field_updates["usdotNumber"] = intake.get("dot_number", "") # duplicate field
|
|
field_updates["17mcmxNumber"] = intake.get("mc_number", "")
|
|
field_updates["19irsNumber"] = intake.get("ein", "")
|
|
field_updates["20eMail"] = intake.get("email", "")
|
|
field_updates["21carrierMileage"] = str(intake.get("annual_miles", ""))
|
|
|
|
# Mailing address (if different)
|
|
if intake.get("mailing_street"):
|
|
field_updates["8mailStreet"] = intake.get("mailing_street", "")
|
|
field_updates["9mailCity"] = intake.get("mailing_city", "")
|
|
field_updates["10mailState"] = intake.get("mailing_state", "")
|
|
field_updates["11mailZip"] = intake.get("mailing_zip", "")
|
|
|
|
# Fleet/drivers
|
|
field_updates["totalDrivers"] = str(intake.get("drivers", ""))
|
|
field_updates["totalCDL"] = str(intake.get("cdl_drivers", intake.get("drivers", "")))
|
|
|
|
# Vehicle counts — straight trucks and tractors are most common
|
|
power_units = intake.get("power_units", "")
|
|
vehicle_type = intake.get("primary_vehicle_type", "straight")
|
|
if vehicle_type == "tractor":
|
|
field_updates["tractorOwn"] = str(power_units)
|
|
else:
|
|
field_updates["straightOwn"] = str(power_units)
|
|
|
|
# Officers
|
|
field_updates["officerName1"] = intake.get("signer_name", "")
|
|
field_updates["officerTitle1"] = intake.get("signer_title", "")
|
|
|
|
# Certification
|
|
field_updates["certifyName"] = intake.get("signer_name", "")
|
|
field_updates["certifyTitle"] = intake.get("signer_title", "")
|
|
field_updates["certifyDate"] = datetime.now().strftime("%m/%d/%Y")
|
|
|
|
# Interstate/intrastate mileage
|
|
interstate = intake.get("interstate_intrastate", "")
|
|
if interstate == "interstate":
|
|
field_updates["interWithin"] = str(intake.get("annual_miles", ""))
|
|
elif interstate in ("intrastate_hazmat", "intrastate_non_hazmat"):
|
|
field_updates["intraWithin"] = str(intake.get("annual_miles", ""))
|
|
|
|
# ── Checkbox fields (on/off, value /Yes) ─────────────────────────
|
|
checkbox_on = {}
|
|
|
|
# Q22 Company operations (interstate / intrastate classification)
|
|
company_op = intake.get("interstate_intrastate", "")
|
|
if company_op in COMPANY_OPERATION_MAP:
|
|
checkbox_on[COMPANY_OPERATION_MAP[company_op]] = True
|
|
|
|
# Q23 Operation classifications (for-hire / private / government / etc.)
|
|
carrier_op = intake.get("carrier_operation", "")
|
|
if carrier_op in CARRIER_OP_MAP:
|
|
checkbox_on[CARRIER_OP_MAP[carrier_op]] = True
|
|
|
|
# Q24 Cargo types
|
|
for cargo in intake.get("cargo_types", []):
|
|
if cargo in CARGO_TYPE_MAP:
|
|
checkbox_on[CARGO_TYPE_MAP[cargo]] = True
|
|
|
|
# Q29 Passenger Carrier Compliance Certification "YES" box. Only motor
|
|
# passenger carriers certify here -- leave it unchecked for freight/property
|
|
# carriers. (The Q31 perjury declaration is made via the signature, not a
|
|
# checkbox.)
|
|
is_passenger = (
|
|
intake.get("is_passenger_carrier") == "yes"
|
|
or carrier_op in ("private_passengers_business", "private_passengers_non_business",
|
|
"private_passengers")
|
|
)
|
|
if is_passenger:
|
|
checkbox_on["certifyBox"] = True
|
|
|
|
# ── Radio-button groups (value /0../4, not /Yes) ─────────────────
|
|
# These are single-select radio fields; the selected option index is the
|
|
# field value.
|
|
radio_values = {}
|
|
|
|
# Reason for filing (form REASON FOR FILING). 0=New Application,
|
|
# 1=Biennial Update or Changes, 2=Out of Business, 3=Reapplication,
|
|
# 4=Reactivate. Map the service slug / explicit reason to the index.
|
|
reason_map = {
|
|
"new_application": "0",
|
|
"biennial_update": "1",
|
|
"out_of_business": "2",
|
|
"reapplication": "3",
|
|
"reactivate": "4",
|
|
}
|
|
reason = intake.get("reason_for_filing", "biennial_update")
|
|
radio_values["Reason Button"] = reason_map.get(reason, "1")
|
|
|
|
# Mailing address: 0 = same as principal place of business, 1 = different
|
|
# address provided below.
|
|
radio_values["Mailing Button"] = "1" if intake.get("mailing_street") else "0"
|
|
|
|
# Q28 Is USDOT registration currently revoked? On this form the options
|
|
# render Yes (index 0) then No (index 1), so No = "1".
|
|
radio_values["Revoke Button"] = "0" if intake.get("usdot_revoked") == "yes" else "1"
|
|
|
|
# ── Apply fields to PDF ──────────────────────────────────────────
|
|
# Apply text-field values to every page. The template ships with only the
|
|
# fillable FORM pages (the FMCSA instruction pages are trimmed off the
|
|
# source PDF so we never fax/submit them), so we update across all pages
|
|
# (pypdf silently ignores field names not present on a given page). auto_regenerate=True makes pypdf
|
|
# build appearance streams from the values, so viewers that ignore
|
|
# /NeedAppearances (Preview, Chrome) still render the text.
|
|
text_values = {k: v for k, v in field_updates.items() if v}
|
|
for page in writer.pages:
|
|
try:
|
|
writer.update_page_form_field_values(
|
|
page, text_values, auto_regenerate=True,
|
|
)
|
|
except Exception as exc:
|
|
LOG.debug("Form field apply on page failed: %s", exc)
|
|
|
|
# Apply checkbox fields (value /Yes) and radio groups (value /0../n) by
|
|
# walking the widget annotations. For radios, the selected option is the
|
|
# kid whose appearance state (/AP /N) contains the chosen index; we set the
|
|
# parent /V and each kid's /AS so the correct circle renders filled.
|
|
# ``marks`` collects (page, rect) of every "on" widget so we can stamp an
|
|
# explicit checkmark overlay (AcroForm appearances render inconsistently).
|
|
marks: list = []
|
|
|
|
def _set_button(field_name: str, on_value: str) -> None:
|
|
"""Set a /Btn field (checkbox or radio) to ``on_value`` (e.g. 'Yes' or
|
|
'1'). Handles both flat widgets and parent/kid radio groups."""
|
|
target = "/" + on_value.lstrip("/")
|
|
for page in writer.pages:
|
|
for annot in page.get("/Annots", []) or []:
|
|
obj = annot.get_object()
|
|
# Resolve the field name from this widget or its parent.
|
|
name = obj.get("/T")
|
|
parent = obj.get("/Parent")
|
|
pobj = parent.get_object() if parent else None
|
|
if name is None and pobj is not None:
|
|
name = pobj.get("/T")
|
|
if name is None or str(name) != field_name:
|
|
continue
|
|
# Determine the appearance state this widget represents.
|
|
ap = obj.get("/AP")
|
|
on_states = []
|
|
if ap:
|
|
n = ap.get_object().get("/N")
|
|
if n is not None:
|
|
on_states = [str(k) for k in n.get_object().keys() if str(k) != "/Off"]
|
|
# Set the field value on the field object (parent for radios).
|
|
field_obj = pobj if pobj is not None else obj
|
|
field_obj[NameObject("/V")] = NameObject(target)
|
|
# The widget is "on" only if its own appearance state matches.
|
|
if target in on_states or (not on_states and target == "/Yes"):
|
|
obj[NameObject("/AS")] = NameObject(target)
|
|
# Record the on-widget rectangle + page so we can draw the
|
|
# mark directly onto the page content (see overlay below).
|
|
# AcroForm /AP appearances are unreliable across viewers
|
|
# (poppler/Preview regenerate them and lose the ZapfDingbats
|
|
# checkmark), so we stamp our own mark to guarantee it shows.
|
|
rect = obj.get("/Rect")
|
|
if rect is not None:
|
|
marks.append((page, [float(x) for x in rect]))
|
|
else:
|
|
obj[NameObject("/AS")] = NameObject("/Off")
|
|
|
|
for field_name, checked in checkbox_on.items():
|
|
if checked:
|
|
try:
|
|
_set_button(field_name, "Yes")
|
|
except Exception as e:
|
|
LOG.debug("Checkbox %s set failed: %s", field_name, e)
|
|
|
|
for field_name, value in radio_values.items():
|
|
try:
|
|
_set_button(field_name, value)
|
|
except Exception as e:
|
|
LOG.debug("Radio %s set failed: %s", field_name, e)
|
|
|
|
# Stamp an explicit checkmark onto every "on" checkbox/radio. AcroForm /AP
|
|
# appearances for these boxes use a ZapfDingbats glyph that poppler/Preview
|
|
# fail to render reliably (the value is set but the box looks empty). Drawing
|
|
# the mark directly into the page content guarantees it shows in every
|
|
# viewer and in the faxed/printed output.
|
|
_stamp_check_marks(writer, marks)
|
|
|
|
# Force viewers to (re)generate field appearance streams from the values we
|
|
# set. Without /NeedAppearances, pypdf leaves the template's blank /AP streams
|
|
# in place, so the typed values are present in /V but the viewer renders the
|
|
# empty widget on top -- the data looks missing / "covered up by the form
|
|
# field". Setting NeedAppearances=true on the AcroForm fixes the rendering.
|
|
try:
|
|
catalog = writer._root_object
|
|
if "/AcroForm" in catalog:
|
|
acro = catalog["/AcroForm"]
|
|
acro_obj = acro.get_object() if hasattr(acro, "get_object") else acro
|
|
acro_obj[NameObject("/NeedAppearances")] = BooleanObject(True)
|
|
except Exception as exc:
|
|
LOG.warning("Could not set NeedAppearances: %s", exc)
|
|
|
|
# Save
|
|
work_dir = tempfile.mkdtemp(prefix="pw_mcs150_")
|
|
dot = intake.get("dot_number", "unknown")
|
|
date_str = datetime.now().strftime("%Y%m%d")
|
|
filename = f"MCS150_DOT{dot}_{date_str}_filled.pdf"
|
|
filepath = os.path.join(work_dir, filename)
|
|
|
|
with open(filepath, "wb") as f:
|
|
writer.write(f)
|
|
|
|
LOG.info("Filled MCS-150 (%s) → %s", form_type, filepath)
|
|
return filepath
|
|
|
|
|
|
if __name__ == "__main__":
|
|
test_intake = {
|
|
"legal_name": "ADAMS LUMBER INC",
|
|
"dba_name": "Adams Trucking",
|
|
"dot_number": "1157913",
|
|
"mc_number": "MC-456789",
|
|
"address_street": "123 Timber Lane",
|
|
"address_city": "Portland",
|
|
"address_state": "OR",
|
|
"address_zip": "97201",
|
|
"phone": "(503) 555-1234",
|
|
"email": "mark@adamslumber.com",
|
|
"entity_type": "corporation",
|
|
"carrier_operation": "authorized_for_hire",
|
|
"interstate_intrastate": "interstate",
|
|
"hazmat": "no",
|
|
"power_units": "5",
|
|
"drivers": "6",
|
|
"annual_miles": "250000",
|
|
"cargo_types": ["general", "building_materials", "logs"],
|
|
"signer_name": "Mark Adams",
|
|
"signer_title": "President",
|
|
}
|
|
|
|
path = fill_mcs150(test_intake, order_number="CO-TEST123")
|
|
print(f"Generated: {path}")
|
|
print(f"Size: {os.path.getsize(path)} bytes")
|