trucking: A/B/C coupon price test (20/30/40% off) + SpamAssassin harness

- CAMPAIGN_COUPON_AB_PCTS="20,30,40" mints one daily code per arm; each carrier is bucketed by a stable sha256(email) hash so the split is even (~33/33/33 verified over 30k) and stable across re-sends (no arm-hopping). - Each arm's code stores its own percent in discount_codes, so the advertised discount always matches what checkout applies; redemptions are countable per code (marker campaign-daily:<date>:<pct>). - Empty/unset keeps legacy single-arm behavior (COUPON_PCT, legacy marker). - coupon_attribs() now takes per-recipient pct. - Tests: scripts/tests/test_coupon_ab.py (5 pass). SpamAssassin: both main campaigns (186/188) score 0.0 HAM across all 3 arms, coupon block renders clean; harness saved for re-runs.
2026-06-20 16:41:47 -05:00 · 2026-06-20 16:41:47 -05:00 · 6fce3ec9eb
commit 6fce3ec9eb
parent 1acae2f20c
4 changed files with 390 additions and 26 deletions
--- a/scripts/build_trucking_campaigns.py
+++ b/scripts/build_trucking_campaigns.py
@ -21,6 +21,7 @@ from __future__ import annotations
 import argparse
 import base64
 import hashlib
 import json
 import logging
 import os
@ -166,6 +167,19 @@ def lp_slug_for(campaign_type: str, phy_state: str | None = None) -> str:
 # bring the daily deal back. Reversible, no template or DB changes needed.
 COUPON_ENABLED = os.getenv("CAMPAIGN_ENABLE_COUPON", "0") in ("1", "true", "yes")
 COUPON_PCT = int(os.getenv("CAMPAIGN_COUPON_PCT", "40"))
 # A/B/C price test: when set to a comma list of percents (e.g. "20,30,40") each
 # carrier is deterministically bucketed by a stable hash of their email into one
 # arm, getting that arm's own daily code. Because each code stores its own
 # percent in discount_codes, the discount the email advertises always matches the
 # discount checkout actually applies, and redemptions are measurable per code
 # (description marker campaign-daily:<date>:<pct>). Empty/unset = single-arm test
 # at COUPON_PCT (legacy behavior). The split is even and stable per carrier, so a
 # given carrier always sees the same percent across re-sends (no arm-hopping).
 COUPON_AB_PCTS = tuple(
    int(p.strip())
    for p in os.getenv("CAMPAIGN_COUPON_AB_PCTS", "").split(",")
    if p.strip().isdigit()
 )
 # Eligible slugs = every discountable service a trucking campaign can link to.
 # Pass-through-only slugs (boc3-filing $25 passthrough, etc.) are intentionally
 # eligible too: the discount math only touches the service-fee portion, so a
@ -267,13 +281,23 @@ def _random_coupon_code() -> str:
    return "".join(secrets.choice(_COUPON_ALPHABET) for _ in range(5))
-def get_or_create_daily_coupon(conn, send_date: date) -> str:
+def get_or_create_daily_coupon(conn, send_date: date, pct: int | None = None) -> str:
-    """Return the 5-letter coupon code for `send_date`, creating it if needed.
+    """Return the 5-letter coupon code for `send_date` at `pct`% off, minting it
    if needed.
-    Idempotent: a marker in `description` (campaign-daily:<date>) lets a re-run
+    Idempotent: a marker in `description` lets a re-run on the same day reuse the
-    on the same day reuse the existing code instead of minting a duplicate.
+    existing code instead of minting a duplicate. Single-arm runs use the legacy
    marker `campaign-daily:<date>`; A/B arms use `campaign-daily:<date>:<pct>` so
    each percent gets its own stable, separately-countable code.
    """
-    marker = f"campaign-daily:{send_date.isoformat()}"
+    pct = COUPON_PCT if pct is None else pct
    # Keep the legacy marker for the single-arm (no A/B) case so historical
    # idempotency/lookups still work; A/B arms get a percent-suffixed marker.
    marker = (
        f"campaign-daily:{send_date.isoformat()}"
        if not COUPON_AB_PCTS
        else f"campaign-daily:{send_date.isoformat()}:{pct}"
    )
    cur = conn.cursor()
    cur.execute("SELECT code FROM discount_codes WHERE description = %s LIMIT 1", (marker,))
    row = cur.fetchone()
@ -299,26 +323,61 @@ def get_or_create_daily_coupon(conn, send_date: date) -> str:
                ON CONFLICT (code) DO NOTHING
                RETURNING code
                """,
-                (code, marker, COUPON_PCT, COUPON_SLUGS, starts, expires),
+                (code, marker, pct, COUPON_SLUGS, starts, expires),
            )
            r = cur.fetchone()
            if r:
                conn.commit()
                LOG.info("[coupon] daily code %s (%d%% off, expires %s ET)",
-                         code, COUPON_PCT, expires.isoformat())
+                         code, pct, expires.isoformat())
                return r[0]
        except Exception:
            conn.rollback()
    raise RuntimeError("could not mint a unique daily coupon code")
-def coupon_attribs(coupon_code: str | None) -> dict:
+def get_or_create_daily_coupons(conn, send_date: date) -> dict[int, str]:
-    """Merge fields for the same-day deal, blank when no coupon is active."""
+    """Mint (or reuse) every coupon arm for `send_date`.
    Returns a mapping of percent -> code. With no A/B test configured this is a
    single arm {COUPON_PCT: code}; with CAMPAIGN_COUPON_AB_PCTS="20,30,40" it
    returns one code per percent so recipients can be split across arms.
    """
    pcts = list(COUPON_AB_PCTS) if COUPON_AB_PCTS else [COUPON_PCT]
    return {pct: get_or_create_daily_coupon(conn, send_date, pct) for pct in pcts}
 def pick_coupon_for_email(email: str, daily_coupons: dict[int, str] | None) -> tuple[str, str]:
    """Deterministically assign a carrier to one coupon arm by a stable hash of
    their email. Returns (code, pct_str); ("", "") when coupons are off.
    The hash makes the split even and *stable*: the same carrier always lands in
    the same arm across re-sends, so an A/B comparison isn't polluted by a carrier
    seeing 20% one day and 40% the next.
    """
    if not daily_coupons:
        return "", ""
    pcts = sorted(daily_coupons.keys())
    if len(pcts) == 1:
        pct = pcts[0]
        return daily_coupons[pct], str(pct)
    h = hashlib.sha256((email or "").strip().lower().encode()).hexdigest()
    pct = pcts[int(h, 16) % len(pcts)]
    return daily_coupons[pct], str(pct)
 def coupon_attribs(coupon_code: str | None, coupon_pct: str | None = None) -> dict:
    """Merge fields for the same-day deal, blank when no coupon is active.
    `coupon_pct` is passed per-recipient during an A/B test so the advertised
    percent matches the arm's actual code; it falls back to the global COUPON_PCT
    for single-arm sends.
    """
    if not coupon_code:
        return {"coupon_code": "", "coupon_pct": "", "coupon_expires": ""}
    return {
        "coupon_code": coupon_code,
-        "coupon_pct": str(COUPON_PCT),
+        "coupon_pct": coupon_pct or str(COUPON_PCT),
        # Human-readable cutoff for the email body.
        "coupon_expires": "11:59 PM ET tonight",
    }
@ -1096,16 +1155,21 @@ def run(send_date: date, dry_run: bool = False, preview: bool = False,
        warmup_cap: bool = True) -> None:
    conn = psycopg2.connect(DB_URL)
-    # Mint (or reuse) the same-day coupon for this send date so every campaign
+    # Mint (or reuse) the same-day coupon(s) for this send date so every campaign
-    # in the run shares one expiring code. Preview/dry runs skip the write, and
+    # in the run shares the same expiring code(s). Preview/dry runs skip the write,
-    # the daily deal is disabled by default (see COUPON_ENABLED) -- when off we
+    # and the daily deal is disabled by default (see COUPON_ENABLED) -- when off we
-    # send at normal price (empty coupon_code -> template's no-deal branch).
+    # send at normal price (empty coupon_code -> template's no-deal branch). When
-    daily_coupon = None
+    # CAMPAIGN_COUPON_AB_PCTS is set we mint one code per percent arm and split
    # recipients across them by a stable hash of their email.
    daily_coupons: dict[int, str] | None = None
    if COUPON_ENABLED and not dry_run and not preview:
        try:
-            daily_coupon = get_or_create_daily_coupon(conn, send_date)
+            daily_coupons = get_or_create_daily_coupons(conn, send_date)
            if COUPON_AB_PCTS:
                LOG.info("[coupon] A/B test arms: %s",
                         ", ".join(f"{p}%={c}" for p, c in sorted(daily_coupons.items())))
        except Exception as exc:  # noqa: BLE001
-            LOG.warning("[coupon] could not mint daily coupon: %s (sending without)", exc)
+            LOG.warning("[coupon] could not mint daily coupon(s): %s (sending without)", exc)
    elif not COUPON_ENABLED:
        LOG.info("[coupon] disabled (CAMPAIGN_ENABLE_COUPON unset) — sending at normal price")
@ -1240,24 +1304,25 @@ def run(send_date: date, dry_run: bool = False, preview: bool = False,
            # sample carrier's attribs) so the real audience is never touched.
            if preview:
                r0 = rows[0]
                p_code, p_pct = pick_coupon_for_email(TEST_EMAIL, daily_coupons)
                subscribers = [{
                    "email": TEST_EMAIL,
                    "name": r0[2] or "Sample Carrier",
                    "attribs": {"dot_number": r0[0], "company": r0[2] or "", "state": r0[3] or "",
-                                "lp_link": lp_link_with_coupon(campaign_type, r0[4], daily_coupon),
+                                "lp_link": lp_link_with_coupon(campaign_type, r0[4], p_code),
-                                **coupon_attribs(daily_coupon)},
+                                **coupon_attribs(p_code, p_pct)},
                }]
            else:
-                subscribers = [
+                subscribers = []
-                    {
+                for row in rows:
                    c_code, c_pct = pick_coupon_for_email(row[1], daily_coupons)
                    subscribers.append({
                        "email": row[1],
                        "name": row[2] or row[1],
                        "attribs": {"dot_number": row[0], "company": row[2] or "", "state": row[3] or "",
-                                    "lp_link": lp_link_with_coupon(campaign_type, row[4], daily_coupon),
+                                    "lp_link": lp_link_with_coupon(campaign_type, row[4], c_code),
-                                    **coupon_attribs(daily_coupon)},
+                                    **coupon_attribs(c_code, c_pct)},
-                    }
+                    })
                    for row in rows
                ]
            # Create list + add subscribers
            list_id = create_list(list_name)
--- a/scripts/tests/sa_coupon_test.py
+++ b/scripts/tests/sa_coupon_test.py
@ -0,0 +1,123 @@
 #!/usr/bin/env python3
 """Render a trucking campaign body with coupon merge-tags filled in and score it
 through SpamAssassin. Run on the prod host (has spamassassin + listmonk DB).
 Usage: python3 /tmp/sa_coupon_test.py <campaign_id> <pct> <code>
 """
 import html
 import re
 import subprocess
 import sys
 from email.message import EmailMessage
 from email.utils import formatdate, make_msgid
 CID = sys.argv[1] if len(sys.argv) > 1 else "186"
 PCT = sys.argv[2] if len(sys.argv) > 2 else "40"
 CODE = sys.argv[3] if len(sys.argv) > 3 else "KQ7MTN".replace("6", "K")[:5] or "KQMTN"
 def psql(q):
    out = subprocess.check_output(
        ["docker", "exec", "performancewest-api-postgres-1",
         "psql", "-U", "pw", "-d", "listmonk", "-tA", "-c", q],
        text=True,
    )
    return out.rstrip("\n")
 subject = psql(f"SELECT subject FROM campaigns WHERE id={CID};")
 from_email = psql(f"SELECT from_email FROM campaigns WHERE id={CID};")
 body = psql(f"SELECT body FROM campaigns WHERE id={CID};")
 # --- Sample subscriber attribs (mirrors build_trucking_campaigns.coupon_attribs) ---
 attribs = {
    "dot_number": "1228791",
    "company": "BLUE RIDGE FREIGHT LLC",
    "state": "CT",
    "lp_link": f"https://performancewest.net/order/mcs150-update?code={CODE}",
    "coupon_code": CODE,
    "coupon_pct": PCT,
    "coupon_expires": "11:59 PM ET tonight",
 }
 # --- Minimal listmonk template renderer: {{ if .Subscriber.Attribs.X }}..{{ else }}..{{ end }} + {{ .Subscriber.Attribs.X }} ---
 def render(tpl: str, at: dict) -> str:
    # Resolve {{ if .Subscriber.Attribs.key }}TRUE{{ else }}FALSE{{ end }} (no nesting).
    pat = re.compile(
        r"\{\{\s*if\s+\.Subscriber\.Attribs\.(\w+)\s*\}\}(.*?)"
        r"(?:\{\{\s*else\s*\}\}(.*?))?\{\{\s*end\s*\}\}",
        re.DOTALL,
    )
    def repl(m):
        key, t_branch, f_branch = m.group(1), m.group(2), m.group(3) or ""
        return t_branch if str(at.get(key, "")).strip() else f_branch
    prev = None
    while prev != tpl:
        prev = tpl
        tpl = pat.sub(repl, tpl)
    # Resolve simple value tags.
    def val(m):
        return html.escape(str(at.get(m.group(1), "")))
    tpl = re.sub(r"\{\{\s*\.Subscriber\.Attribs\.(\w+)\s*\}\}", val, tpl)
    # Listmonk unsubscribe/tracking placeholders -> realistic stand-ins.
    tpl = tpl.replace("{{ UnsubscribeURL }}", "https://performancewest.net/unsubscribe/abc123")
    tpl = re.sub(r"\{\{[^}]*\}\}", "", tpl)  # strip any remaining tags
    return tpl
 rendered_subject = render(subject, attribs)
 rendered_body = render(body, attribs)
 # --- Assemble a realistic MIME email (what the recipient's filter sees) ---
 m_from = from_email
 # from_email is like 'Performance West <noreply@performancewest.net>'
 msg = EmailMessage()
 msg["From"] = m_from
 msg["To"] = "dispatch@blueridgefreight.com"
 msg["Subject"] = rendered_subject
 msg["Date"] = formatdate(localtime=True)
 msg["Message-ID"] = make_msgid(domain="performancewest.net")
 msg["List-Unsubscribe"] = "<https://performancewest.net/unsubscribe/abc123>, <mailto:unsub@performancewest.net>"
 msg["List-Unsubscribe-Post"] = "List-Unsubscribe=One-Click"
 msg["Precedence"] = "bulk"
 # plain-text altbody (strip tags crudely) + html
 text_alt = re.sub(r"<[^>]+>", "", rendered_body)
 text_alt = html.unescape(re.sub(r"\n{3,}", "\n\n", text_alt)).strip()
 msg.set_content(text_alt)
 msg.add_alternative(rendered_body, subtype="html")
 raw = msg.as_bytes()
 # --- Score through SpamAssassin (-t test mode; network tests skipped for determinism) ---
 proc = subprocess.run(
    ["spamassassin", "-t", "-L"],  # -L = local tests only (no DNS/network), -t = test mode
    input=raw, capture_output=True,
 )
 scored = proc.stdout.decode("utf-8", "replace")
 # Pull the score + the rule hits from the rewritten headers.
 score_line = ""
 rules = []
 for line in scored.splitlines():
    if line.startswith("X-Spam-Status:"):
        score_line = line
    if line.startswith("X-Spam-Report:") or line.startswith("\tpts") or re.match(r"^\s+[-0-9.]+\s+[A-Z0-9_]+\s", line):
        rules.append(line.rstrip())
 print("=" * 70)
 print(f"Campaign {CID}  arm={PCT}%  code={CODE}")
 print(f"Subject: {rendered_subject}")
 print("=" * 70)
 print(score_line or "(no X-Spam-Status header found)")
 print("-" * 70)
 m = re.search(r"score=([-0-9.]+)", score_line)
 if m:
    print(f"SCORE: {m.group(1)}   (SpamAssassin default spam threshold = 5.0)")
 # Show the detailed report block.
 rep = re.search(r"X-Spam-Report:(.*?)(?:\nX-Spam-|\n\n)", scored, re.DOTALL)
 if rep:
    print(rep.group(0))
 else:
    # Fallback: print any rule lines we collected.
    for r in rules:
        print(r)
--- a/scripts/tests/sa_coupon_test_local.py
+++ b/scripts/tests/sa_coupon_test_local.py
@ -0,0 +1,100 @@
 #!/usr/bin/env python3
 """Render a trucking campaign body with coupon merge-tags filled in and score it
 through local SpamAssassin (4.0.1). Reads the body HTML from a file.
 Usage: sa_coupon_local.py <body.html> <subject> <pct> <code>
 """
 import html
 import re
 import subprocess
 import sys
 from email.message import EmailMessage
 from email.utils import formatdate, make_msgid
 BODY_FILE = sys.argv[1]
 SUBJECT_TPL = sys.argv[2]
 PCT = sys.argv[3] if len(sys.argv) > 3 else "40"
 CODE = sys.argv[4] if len(sys.argv) > 4 else "KQMTN"
 body = open(BODY_FILE, encoding="utf-8").read()
 attribs = {
    "dot_number": "1228791",
    "company": "BLUE RIDGE FREIGHT LLC",
    "state": "CT",
    "lp_link": f"https://performancewest.net/order/mcs150-update?code={CODE}",
    "coupon_code": CODE,
    "coupon_pct": PCT,
    "coupon_expires": "11:59 PM ET tonight",
 }
 def render(tpl: str, at: dict) -> str:
    pat = re.compile(
        r"\{\{\s*if\s+\.Subscriber\.Attribs\.(\w+)\s*\}\}(.*?)"
        r"(?:\{\{\s*else\s*\}\}(.*?))?\{\{\s*end\s*\}\}",
        re.DOTALL,
    )
    def repl(m):
        key, t_branch, f_branch = m.group(1), m.group(2), m.group(3) or ""
        return t_branch if str(at.get(key, "")).strip() else f_branch
    prev = None
    while prev != tpl:
        prev = tpl
        tpl = pat.sub(repl, tpl)
    def val(m):
        return html.escape(str(at.get(m.group(1), "")))
    tpl = re.sub(r"\{\{\s*\.Subscriber\.Attribs\.(\w+)\s*\}\}", val, tpl)
    tpl = tpl.replace("{{ UnsubscribeURL }}", "https://performancewest.net/unsubscribe/abc123")
    tpl = re.sub(r"\{\{[^}]*\}\}", "", tpl)
    return tpl
 rendered_subject = render(SUBJECT_TPL, attribs)
 rendered_body = render(body, attribs)
 msg = EmailMessage()
 msg["From"] = "Performance West <noreply@performancewest.net>"
 msg["To"] = "dispatch@blueridgefreight.com"
 msg["Subject"] = rendered_subject
 msg["Date"] = formatdate(localtime=True)
 msg["Message-ID"] = make_msgid(domain="performancewest.net")
 msg["List-Unsubscribe"] = "<https://performancewest.net/unsubscribe/abc123>, <mailto:unsub@performancewest.net>"
 msg["List-Unsubscribe-Post"] = "List-Unsubscribe=One-Click"
 msg["Precedence"] = "bulk"
 text_alt = re.sub(r"<[^>]+>", "", rendered_body)
 text_alt = html.unescape(re.sub(r"\n{3,}", "\n\n", text_alt)).strip()
 msg.set_content(text_alt)
 msg.add_alternative(rendered_body, subtype="html")
 raw = msg.as_bytes()
 proc = subprocess.run(["spamassassin", "-t", "-L"], input=raw, capture_output=True)
 scored = proc.stdout.decode("utf-8", "replace")
 score_line = ""
 for line in scored.splitlines():
    if line.startswith("X-Spam-Status:"):
        score_line = line
        break
 m = re.search(r"score=([-0-9.]+)", score_line)
 score = m.group(1) if m else "?"
 verdict = "SPAM" if score_line.startswith("X-Spam-Status: Yes") else "HAM"
 print(f"arm={PCT}% code={CODE}  ->  SCORE {score}  [{verdict}]   subj: {rendered_subject}")
 # Detailed rule report.
 rep = re.search(r"X-Spam-Report:(.*?)\n(?:[A-Za-z-]+:|\n)", scored, re.DOTALL)
 if "--verbose" in sys.argv:
    if rep:
        print(rep.group(1))
    else:
        for line in scored.splitlines():
            if re.match(r"^\s*[-0-9.]+\s+[A-Z0-9_]+", line):
                print("   ", line.strip())
--- a/scripts/tests/test_coupon_ab.py
+++ b/scripts/tests/test_coupon_ab.py
@ -0,0 +1,76 @@
 #!/usr/bin/env python3
 """Unit tests for the trucking same-day coupon A/B/C price test.
 Verifies that CAMPAIGN_COUPON_AB_PCTS produces an even, stable per-email split,
 that the advertised percent always matches the arm's actual code (so the email
 never promises a discount checkout won't honor), and that the off / single-arm
 states behave. Pure-function tests, no DB required (psycopg2 is stubbed).
 Run: CAMPAIGN_COUPON_AB_PCTS="20,30,40" python3 scripts/tests/test_coupon_ab.py
 """
 from __future__ import annotations
 import importlib.util
 import os
 import sys
 import types
 from collections import Counter
 from pathlib import Path
 os.environ.setdefault("CAMPAIGN_COUPON_AB_PCTS", "20,30,40")
 # Stub the DB driver so the builder imports without a live Postgres.
 sys.modules.setdefault("psycopg2", types.ModuleType("psycopg2"))
 _SCRIPT = Path(__file__).resolve().parents[1] / "build_trucking_campaigns.py"
 _spec = importlib.util.spec_from_file_location("btc", _SCRIPT)
 btc = importlib.util.module_from_spec(_spec)
 _spec.loader.exec_module(btc)
 def test_arms_parsed():
    assert btc.COUPON_AB_PCTS == (20, 30, 40)
 def test_even_stable_and_code_matches_pct():
    coupons = {20: "AAAAA", 30: "BBBBB", 40: "CCCCC"}
    emails = [f"user{i}@carrier{i % 500}.com" for i in range(30000)]
    counts: Counter = Counter()
    for e in emails:
        code, pct = btc.pick_coupon_for_email(e, coupons)
        counts[pct] += 1
        # Advertised percent must match the arm's real code.
        assert coupons[int(pct)] == code
        # Stable: same input -> same arm.
        assert btc.pick_coupon_for_email(e, coupons) == (code, pct)
    total = sum(counts.values())
    # Each arm should be within ~2 points of an even third.
    for pct in ("20", "30", "40"):
        share = counts[pct] / total
        assert 0.31 <= share <= 0.353, (pct, share)
 def test_off_state():
    assert btc.pick_coupon_for_email("x@y.com", None) == ("", "")
    assert btc.pick_coupon_for_email("x@y.com", {}) == ("", "")
 def test_single_arm():
    assert btc.pick_coupon_for_email("a@b.com", {40: "ZZZZZ"}) == ("ZZZZZ", "40")
 def test_coupon_attribs_reflects_pct():
    a = btc.coupon_attribs("BBBBB", "30")
    assert a["coupon_code"] == "BBBBB"
    assert a["coupon_pct"] == "30"
    assert btc.coupon_attribs("", "30") == {
        "coupon_code": "", "coupon_pct": "", "coupon_expires": ""
    }
 if __name__ == "__main__":
    fns = [v for k, v in sorted(globals().items()) if k.startswith("test_")]
    for fn in fns:
        fn()
        print(f"ok  {fn.__name__}")
    print(f"\nAll {len(fns)} coupon A/B tests passed.")