new-site/scripts/document_gen/traffic_study_stamper.py
justin f8cd37ac8c Initial commit — Performance West telecom compliance platform
Includes: API (Express/TypeScript), Astro site, Python workers,
document generators, FCC compliance tools, Canada CRTC formation,
Ansible infrastructure, and deployment scripts.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-27 06:54:22 -05:00

303 lines
11 KiB
Python

"""
Traffic Study Page Stamper for FCC Form 499-A filings.
2026 Form 499-A Section IV.C.5.h requires carriers that submit a
traffic study (in lieu of electing a safe-harbor allocation) to stamp
every page of the study with a one-line header identifying the Filer
ID, Company Name, and Affiliated Filers Name. USAC uses this header
to match the study back to the 499-A submission and to verify
consistency across affiliated filers.
Primary path
------------
Try to generate a text overlay via ``reportlab`` and merge it onto
each page of the source PDF with ``pypdf``. Each overlay PDF matches
the media-box size of its corresponding source page so that the
merge is geometrically correct.
Fallback
--------
If ``reportlab`` is not installed, attempt a best-effort stamping
using a pypdf-authored PageObject with a small content-stream
annotation. If that also fails, copy the source PDF to the output
path unchanged, log a warning, and return the output path (the 499-A
submission plan requires the filing to proceed regardless).
Usage
-----
from scripts.document_gen.traffic_study_stamper import stamp_pages
out = stamp_pages(
pdf_path="/data/traffic_study.pdf",
output_path="/data/traffic_study.stamped.pdf",
filer_id="812345",
company_name="Acme Telco LLC",
affiliated_filers_name="Acme Holdings",
)
"""
from __future__ import annotations
import io
import logging
import shutil
from pathlib import Path
from typing import Optional
LOG = logging.getLogger("document_gen.traffic_study_stamper")
# ── PDF core (required) ─────────────────────────────────────────────
try:
from pypdf import PdfReader, PdfWriter, PageObject
from pypdf.generic import (
ContentStream,
NameObject,
NumberObject,
TextStringObject,
)
_HAS_PYPDF = True
except ImportError:
LOG.warning("pypdf not installed — traffic study stamping unavailable")
PdfReader = None # type: ignore[assignment,misc]
PdfWriter = None # type: ignore[assignment,misc]
PageObject = None # type: ignore[assignment,misc]
_HAS_PYPDF = False
# ── Reportlab (preferred overlay path; optional) ────────────────────
try:
from reportlab.pdfgen import canvas as _rl_canvas # type: ignore
_HAS_REPORTLAB = True
except ImportError:
_rl_canvas = None # type: ignore[assignment]
_HAS_REPORTLAB = False
STAMP_FONT_PT = 8
STAMP_MARGIN_Y_PT = 20 # distance from top of page
STAMP_MARGIN_X_PT = 36 # 0.5 inch from left
def _format_stamp(filer_id: str, company_name: str, affiliated_filers_name: str) -> str:
"""Build the stamp-text one-liner per Form 499-A Section IV.C.5.h."""
return (
f"Filer ID {filer_id or '\u2014'} | "
f"{company_name or '\u2014'} | "
f"Affiliated Filers: {affiliated_filers_name or '\u2014'}"
)
def _overlay_reportlab(
stamp_text: str, width: float, height: float
) -> Optional[bytes]:
"""Build a one-page overlay PDF (bytes) sized (width, height) with the
stamp drawn at the top-left. Returns None if reportlab can't be used."""
if not _HAS_REPORTLAB or _rl_canvas is None:
return None
try:
buf = io.BytesIO()
c = _rl_canvas.Canvas(buf, pagesize=(width, height))
c.setFont("Helvetica", STAMP_FONT_PT)
# y measured from bottom of page; header sits near top
y = height - STAMP_MARGIN_Y_PT
c.drawString(STAMP_MARGIN_X_PT, y, stamp_text)
c.showPage()
c.save()
return buf.getvalue()
except Exception as exc: # pragma: no cover
LOG.warning("reportlab overlay build failed: %s", exc)
return None
def _apply_overlay_via_pypdf(
page: "PageObject", # type: ignore[name-defined]
overlay_pdf_bytes: bytes,
) -> None:
"""Merge a single-page overlay PDF onto the given source page."""
from pypdf import PdfReader as _Reader
overlay_reader = _Reader(io.BytesIO(overlay_pdf_bytes))
if not overlay_reader.pages:
return
page.merge_page(overlay_reader.pages[0])
def _stamp_via_content_stream(
page: "PageObject", # type: ignore[name-defined]
stamp_text: str,
page_height: float,
) -> bool:
"""
Fallback stamping when reportlab is unavailable.
Appends a minimal PDF content stream to draw ``stamp_text`` at the
top of ``page``. Returns True on success, False on any exception.
"""
try:
# Escape parentheses / backslashes per PDF string encoding.
safe = (
stamp_text.replace("\\", "\\\\")
.replace("(", "\\(")
.replace(")", "\\)")
)
y = page_height - STAMP_MARGIN_Y_PT
# Use Helvetica (F1) at STAMP_FONT_PT. We add an /F1 resource
# reference if missing.
stream = (
f"q BT /F1 {STAMP_FONT_PT} Tf "
f"{STAMP_MARGIN_X_PT} {y} Td ({safe}) Tj ET Q"
).encode("latin-1", errors="replace")
existing = page.get_contents()
from pypdf.generic import ByteStringObject, ArrayObject
new_cs = ContentStream(None, None)
new_cs.set_data(stream)
# Ensure /Font /F1 exists in the page resources.
from pypdf.generic import DictionaryObject, IndirectObject
resources = page.get("/Resources")
if isinstance(resources, IndirectObject):
resources = resources.get_object()
if resources is None:
resources = DictionaryObject()
page[NameObject("/Resources")] = resources
fonts = resources.get("/Font")
if isinstance(fonts, IndirectObject):
fonts = fonts.get_object()
if fonts is None:
fonts = DictionaryObject()
resources[NameObject("/Font")] = fonts
if "/F1" not in fonts:
helv = DictionaryObject(
{
NameObject("/Type"): NameObject("/Font"),
NameObject("/Subtype"): NameObject("/Type1"),
NameObject("/BaseFont"): NameObject("/Helvetica"),
}
)
fonts[NameObject("/F1")] = helv
# Append the new content stream. If existing /Contents is an
# array, append. Otherwise, wrap both into an array.
if existing is None:
page[NameObject("/Contents")] = new_cs
else:
# merge_page would normally handle this; we emulate the
# simplest case by concatenating streams.
try:
combined = ContentStream(None, None)
combined.set_data(existing.get_data() + b"\n" + stream)
page[NameObject("/Contents")] = combined
except Exception:
# Last-ditch: prepend via an array.
page[NameObject("/Contents")] = ArrayObject([existing, new_cs])
return True
except Exception as exc:
LOG.warning("pypdf content-stream stamping failed: %s", exc)
return False
def stamp_pages(
pdf_path: str,
output_path: str,
filer_id: str,
company_name: str,
affiliated_filers_name: str = "\u2014",
) -> str:
"""
Stamp every page of ``pdf_path`` with a one-line header containing
the Filer ID, Company Name, and Affiliated Filers Name. Write the
result to ``output_path``. Return ``output_path``.
This function is best-effort by design. The Form 499-A filing plan
requires that the submission proceed even when fancy stamping fails
(e.g., in a constrained environment missing ``reportlab``). On any
unrecoverable error the source PDF is copied verbatim to the
output path and a warning is logged.
"""
out = Path(output_path)
out.parent.mkdir(parents=True, exist_ok=True)
src = Path(pdf_path)
if not src.exists():
raise FileNotFoundError(f"source PDF not found: {pdf_path}")
stamp_text = _format_stamp(filer_id, company_name, affiliated_filers_name)
if not _HAS_PYPDF:
LOG.warning(
"pypdf unavailable — copying source PDF unchanged to %s", out
)
shutil.copyfile(src, out)
return str(out)
try:
reader = PdfReader(str(src))
writer = PdfWriter()
overlay_mode = "reportlab" if _HAS_REPORTLAB else "content_stream"
for page in reader.pages:
mb = page.mediabox
width = float(mb.width)
height = float(mb.height)
stamped = False
if overlay_mode == "reportlab":
overlay_bytes = _overlay_reportlab(stamp_text, width, height)
if overlay_bytes:
try:
_apply_overlay_via_pypdf(page, overlay_bytes)
stamped = True
except Exception as exc:
LOG.warning(
"overlay merge failed on page; "
"falling back to content stream: %s", exc
)
if not stamped:
_stamp_via_content_stream(page, stamp_text, height)
writer.add_page(page)
with out.open("wb") as fh:
writer.write(fh)
if overlay_mode != "reportlab":
LOG.warning(
"reportlab not available — used pypdf content-stream fallback "
"to stamp %s (filer=%s).", out, filer_id
)
else:
LOG.info(
"Traffic study stamped via reportlab overlay: %s (filer=%s)",
out, filer_id,
)
return str(out)
except Exception as exc:
LOG.warning(
"traffic-study stamping failed (%s); copying source unchanged "
"to preserve filing timeline.", exc
)
try:
shutil.copyfile(src, out)
except Exception as exc2: # pragma: no cover
LOG.error("fallback copy also failed: %s", exc2)
raise
return str(out)
if __name__ == "__main__": # pragma: no cover
import argparse
logging.basicConfig(level=logging.INFO)
ap = argparse.ArgumentParser(description=__doc__.split("\n\n", 1)[0])
ap.add_argument("source_pdf")
ap.add_argument("output_pdf")
ap.add_argument("--filer-id", required=True)
ap.add_argument("--company-name", required=True)
ap.add_argument("--affiliated-filers-name", default="\u2014")
args = ap.parse_args()
p = stamp_pages(
pdf_path=args.source_pdf,
output_path=args.output_pdf,
filer_id=args.filer_id,
company_name=args.company_name,
affiliated_filers_name=args.affiliated_filers_name,
)
print(p)