- fill_mcs150 now uses auto_regenerate=True so pypdf writes appearance streams for every text field. Preview/Chrome ignore /NeedAppearances and were showing blank widgets over the values; generated /AP streams make the text render in all viewers. - New verify_mcs150.py reads each widget's /AP /N appearance stream (the literal drawn glyphs) to confirm expected values actually render, since the container has no OCR/raster tooling. Exits non-zero on any miss.
112 lines
3.9 KiB
Python
112 lines
3.9 KiB
Python
"""Verify an MCS-150 PDF actually RENDERS the expected values.
|
|
|
|
OCR tooling (tesseract/pdftoppm/fitz) is not available in the workers
|
|
container, so instead of rasterising we verify what a viewer would draw by
|
|
reading each text widget's *appearance stream* (/AP /N), which is the literal
|
|
content the PDF renders. After fill_mcs150 with auto_regenerate=True, pypdf
|
|
writes these streams, so their presence + content is ground truth that the
|
|
value will be visible (not just sitting in /V under a blank widget).
|
|
|
|
Usage:
|
|
python3 -m scripts.document_gen.templates.verify_mcs150 [DOT_NUMBER]
|
|
|
|
Exits non-zero if any expected value is missing from the rendered output.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
import sys
|
|
|
|
from pypdf import PdfReader
|
|
|
|
from .mcs150_pdf_filler import fill_mcs150
|
|
|
|
|
|
def _appearance_text(reader: PdfReader) -> dict[str, str]:
|
|
"""Return {field_name: rendered text from its /AP /N stream}."""
|
|
out: dict[str, str] = {}
|
|
for page in reader.pages:
|
|
annots = page.get("/Annots")
|
|
if not annots:
|
|
continue
|
|
for ref in annots:
|
|
obj = ref.get_object()
|
|
name = obj.get("/T")
|
|
if name is None:
|
|
continue
|
|
ap = obj.get("/AP")
|
|
text = ""
|
|
if ap:
|
|
n = ap.get_object().get("/N")
|
|
if n is not None:
|
|
try:
|
|
raw = n.get_object().get_data()
|
|
# Tj / TJ text operators carry the drawn glyphs.
|
|
for m in re.findall(rb"\((.*?)\)\s*Tj", raw):
|
|
text += m.decode("latin-1", "ignore")
|
|
for m in re.findall(rb"\[(.*?)\]\s*TJ", raw):
|
|
for s in re.findall(rb"\((.*?)\)", m):
|
|
text += s.decode("latin-1", "ignore")
|
|
except Exception:
|
|
pass
|
|
out[str(name)] = text.strip()
|
|
return out
|
|
|
|
|
|
def main() -> int:
|
|
dot = sys.argv[1] if len(sys.argv) > 1 else "1609564"
|
|
|
|
# Simulate enrichment by pulling the FMCSA census the same way the handler
|
|
# does, then fill.
|
|
from scripts.workers.services.mcs150_update import MCS150UpdateService
|
|
|
|
svc = MCS150UpdateService()
|
|
census = svc._fetch_carrier_record(dot)
|
|
if not census:
|
|
print(f"FAIL: no FMCSA census for DOT {dot}")
|
|
return 2
|
|
|
|
intake = dict(census)
|
|
intake.setdefault("signer_name", "Test Signer")
|
|
intake.setdefault("signer_title", "Owner")
|
|
|
|
path = fill_mcs150(intake, order_number="CO-VERIFY")
|
|
reader = PdfReader(path)
|
|
rendered = _appearance_text(reader)
|
|
|
|
# What we expect to be visible on the rendered form.
|
|
checks = {
|
|
"1bizName": intake.get("legal_name", ""),
|
|
"3principalStreet": intake.get("address_street", ""),
|
|
"4principalCity": intake.get("address_city", ""),
|
|
"5principalState": intake.get("address_state", ""),
|
|
"6principalZip": intake.get("address_zip", ""),
|
|
"16usdotNumber": intake.get("dot_number", ""),
|
|
"19irsNumber": intake.get("ein", ""),
|
|
"certifyName": intake.get("signer_name", ""),
|
|
}
|
|
|
|
print(f"=== MCS-150 render verification for DOT {dot} ===")
|
|
print(f"PDF: {path}")
|
|
print(f"census fields: {sorted(census.keys())}\n")
|
|
|
|
failures = []
|
|
for field, expected in checks.items():
|
|
if not expected:
|
|
continue
|
|
got = rendered.get(field, "")
|
|
ok = expected.upper().replace(" ", "") in got.upper().replace(" ", "")
|
|
flag = "OK " if ok else "MISS"
|
|
print(f" [{flag}] {field}: expected '{expected}' | rendered '{got}'")
|
|
if not ok:
|
|
failures.append(field)
|
|
|
|
if failures:
|
|
print(f"\nFAIL: {len(failures)} field(s) not rendered: {failures}")
|
|
return 1
|
|
print("\nPASS: all expected values render in their appearance streams.")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|