"""Carrier invoice PDF adapter (v2 stub).

Real-world carrier invoices frequently arrive only as PDF — either as
structured text or scanned images. Extracting line-item revenue from
arbitrary carrier PDFs (CenturyLink, Verizon, AT&T, Lumen, Frontier,
Windstream, etc.) requires an OCR+layout-aware pipeline that is out of
scope for the v1 ICC ingester.

This class is a **registered stub** so that the upload endpoint can
accept the ``carrier_invoice_pdf`` source format and persist the blob
into MinIO. When the ingester dispatches to this adapter it raises
``NotImplementedError``; the ingester catches that in its outer
try/except and marks the upload ``status='failed'`` with an explanatory
error message, letting customers see "awaiting v2 PDF parser" in the
portal without losing the file.

v2 plan
-------
* ``pdfplumber`` for text extraction + table reconstruction
* Vendor-specific anchor templates (e.g. "Total Switched Access Charges")
* Fall back to Anthropic Claude vision on scanned-image pages
* OCR output manually QA'd by the Accounting-Advisor role before
  ``rows_accepted`` is trusted
"""

from __future__ import annotations

import logging
from typing import Iterator

from .common import BaseICCAdapter, IccRevenueLine

logger = logging.getLogger(__name__)


class CarrierInvoicePDFAdapter(BaseICCAdapter):
    SOURCE_FORMAT = "carrier_invoice_pdf"

    def iter_rows(self, local_path: str) -> Iterator[IccRevenueLine]:
        raise NotImplementedError(
            "carrier_invoice_pdf parsing is deferred to v2 — upload accepted "
            "and stored, but no line items have been extracted. See the "
            "module docstring for the planned OCR pipeline."
        )
        # Unreachable but makes this a generator-returning function:
        yield  # pragma: no cover