diff --git a/scripts/build_trucking_campaigns.py b/scripts/build_trucking_campaigns.py index 9084a34..5df95ab 100644 --- a/scripts/build_trucking_campaigns.py +++ b/scripts/build_trucking_campaigns.py @@ -21,6 +21,7 @@ from __future__ import annotations import argparse import base64 +import hashlib import json import logging import os @@ -166,6 +167,19 @@ def lp_slug_for(campaign_type: str, phy_state: str | None = None) -> str: # bring the daily deal back. Reversible, no template or DB changes needed. COUPON_ENABLED = os.getenv("CAMPAIGN_ENABLE_COUPON", "0") in ("1", "true", "yes") COUPON_PCT = int(os.getenv("CAMPAIGN_COUPON_PCT", "40")) +# A/B/C price test: when set to a comma list of percents (e.g. "20,30,40") each +# carrier is deterministically bucketed by a stable hash of their email into one +# arm, getting that arm's own daily code. Because each code stores its own +# percent in discount_codes, the discount the email advertises always matches the +# discount checkout actually applies, and redemptions are measurable per code +# (description marker campaign-daily::). Empty/unset = single-arm test +# at COUPON_PCT (legacy behavior). The split is even and stable per carrier, so a +# given carrier always sees the same percent across re-sends (no arm-hopping). +COUPON_AB_PCTS = tuple( + int(p.strip()) + for p in os.getenv("CAMPAIGN_COUPON_AB_PCTS", "").split(",") + if p.strip().isdigit() +) # Eligible slugs = every discountable service a trucking campaign can link to. # Pass-through-only slugs (boc3-filing $25 passthrough, etc.) are intentionally # eligible too: the discount math only touches the service-fee portion, so a @@ -267,13 +281,23 @@ def _random_coupon_code() -> str: return "".join(secrets.choice(_COUPON_ALPHABET) for _ in range(5)) -def get_or_create_daily_coupon(conn, send_date: date) -> str: - """Return the 5-letter coupon code for `send_date`, creating it if needed. +def get_or_create_daily_coupon(conn, send_date: date, pct: int | None = None) -> str: + """Return the 5-letter coupon code for `send_date` at `pct`% off, minting it + if needed. - Idempotent: a marker in `description` (campaign-daily:) lets a re-run - on the same day reuse the existing code instead of minting a duplicate. + Idempotent: a marker in `description` lets a re-run on the same day reuse the + existing code instead of minting a duplicate. Single-arm runs use the legacy + marker `campaign-daily:`; A/B arms use `campaign-daily::` so + each percent gets its own stable, separately-countable code. """ - marker = f"campaign-daily:{send_date.isoformat()}" + pct = COUPON_PCT if pct is None else pct + # Keep the legacy marker for the single-arm (no A/B) case so historical + # idempotency/lookups still work; A/B arms get a percent-suffixed marker. + marker = ( + f"campaign-daily:{send_date.isoformat()}" + if not COUPON_AB_PCTS + else f"campaign-daily:{send_date.isoformat()}:{pct}" + ) cur = conn.cursor() cur.execute("SELECT code FROM discount_codes WHERE description = %s LIMIT 1", (marker,)) row = cur.fetchone() @@ -299,26 +323,61 @@ def get_or_create_daily_coupon(conn, send_date: date) -> str: ON CONFLICT (code) DO NOTHING RETURNING code """, - (code, marker, COUPON_PCT, COUPON_SLUGS, starts, expires), + (code, marker, pct, COUPON_SLUGS, starts, expires), ) r = cur.fetchone() if r: conn.commit() LOG.info("[coupon] daily code %s (%d%% off, expires %s ET)", - code, COUPON_PCT, expires.isoformat()) + code, pct, expires.isoformat()) return r[0] except Exception: conn.rollback() raise RuntimeError("could not mint a unique daily coupon code") -def coupon_attribs(coupon_code: str | None) -> dict: - """Merge fields for the same-day deal, blank when no coupon is active.""" +def get_or_create_daily_coupons(conn, send_date: date) -> dict[int, str]: + """Mint (or reuse) every coupon arm for `send_date`. + + Returns a mapping of percent -> code. With no A/B test configured this is a + single arm {COUPON_PCT: code}; with CAMPAIGN_COUPON_AB_PCTS="20,30,40" it + returns one code per percent so recipients can be split across arms. + """ + pcts = list(COUPON_AB_PCTS) if COUPON_AB_PCTS else [COUPON_PCT] + return {pct: get_or_create_daily_coupon(conn, send_date, pct) for pct in pcts} + + +def pick_coupon_for_email(email: str, daily_coupons: dict[int, str] | None) -> tuple[str, str]: + """Deterministically assign a carrier to one coupon arm by a stable hash of + their email. Returns (code, pct_str); ("", "") when coupons are off. + + The hash makes the split even and *stable*: the same carrier always lands in + the same arm across re-sends, so an A/B comparison isn't polluted by a carrier + seeing 20% one day and 40% the next. + """ + if not daily_coupons: + return "", "" + pcts = sorted(daily_coupons.keys()) + if len(pcts) == 1: + pct = pcts[0] + return daily_coupons[pct], str(pct) + h = hashlib.sha256((email or "").strip().lower().encode()).hexdigest() + pct = pcts[int(h, 16) % len(pcts)] + return daily_coupons[pct], str(pct) + + +def coupon_attribs(coupon_code: str | None, coupon_pct: str | None = None) -> dict: + """Merge fields for the same-day deal, blank when no coupon is active. + + `coupon_pct` is passed per-recipient during an A/B test so the advertised + percent matches the arm's actual code; it falls back to the global COUPON_PCT + for single-arm sends. + """ if not coupon_code: return {"coupon_code": "", "coupon_pct": "", "coupon_expires": ""} return { "coupon_code": coupon_code, - "coupon_pct": str(COUPON_PCT), + "coupon_pct": coupon_pct or str(COUPON_PCT), # Human-readable cutoff for the email body. "coupon_expires": "11:59 PM ET tonight", } @@ -1096,16 +1155,21 @@ def run(send_date: date, dry_run: bool = False, preview: bool = False, warmup_cap: bool = True) -> None: conn = psycopg2.connect(DB_URL) - # Mint (or reuse) the same-day coupon for this send date so every campaign - # in the run shares one expiring code. Preview/dry runs skip the write, and - # the daily deal is disabled by default (see COUPON_ENABLED) -- when off we - # send at normal price (empty coupon_code -> template's no-deal branch). - daily_coupon = None + # Mint (or reuse) the same-day coupon(s) for this send date so every campaign + # in the run shares the same expiring code(s). Preview/dry runs skip the write, + # and the daily deal is disabled by default (see COUPON_ENABLED) -- when off we + # send at normal price (empty coupon_code -> template's no-deal branch). When + # CAMPAIGN_COUPON_AB_PCTS is set we mint one code per percent arm and split + # recipients across them by a stable hash of their email. + daily_coupons: dict[int, str] | None = None if COUPON_ENABLED and not dry_run and not preview: try: - daily_coupon = get_or_create_daily_coupon(conn, send_date) + daily_coupons = get_or_create_daily_coupons(conn, send_date) + if COUPON_AB_PCTS: + LOG.info("[coupon] A/B test arms: %s", + ", ".join(f"{p}%={c}" for p, c in sorted(daily_coupons.items()))) except Exception as exc: # noqa: BLE001 - LOG.warning("[coupon] could not mint daily coupon: %s (sending without)", exc) + LOG.warning("[coupon] could not mint daily coupon(s): %s (sending without)", exc) elif not COUPON_ENABLED: LOG.info("[coupon] disabled (CAMPAIGN_ENABLE_COUPON unset) — sending at normal price") @@ -1240,24 +1304,25 @@ def run(send_date: date, dry_run: bool = False, preview: bool = False, # sample carrier's attribs) so the real audience is never touched. if preview: r0 = rows[0] + p_code, p_pct = pick_coupon_for_email(TEST_EMAIL, daily_coupons) subscribers = [{ "email": TEST_EMAIL, "name": r0[2] or "Sample Carrier", "attribs": {"dot_number": r0[0], "company": r0[2] or "", "state": r0[3] or "", - "lp_link": lp_link_with_coupon(campaign_type, r0[4], daily_coupon), - **coupon_attribs(daily_coupon)}, + "lp_link": lp_link_with_coupon(campaign_type, r0[4], p_code), + **coupon_attribs(p_code, p_pct)}, }] else: - subscribers = [ - { + subscribers = [] + for row in rows: + c_code, c_pct = pick_coupon_for_email(row[1], daily_coupons) + subscribers.append({ "email": row[1], "name": row[2] or row[1], "attribs": {"dot_number": row[0], "company": row[2] or "", "state": row[3] or "", - "lp_link": lp_link_with_coupon(campaign_type, row[4], daily_coupon), - **coupon_attribs(daily_coupon)}, - } - for row in rows - ] + "lp_link": lp_link_with_coupon(campaign_type, row[4], c_code), + **coupon_attribs(c_code, c_pct)}, + }) # Create list + add subscribers list_id = create_list(list_name) diff --git a/scripts/tests/sa_coupon_test.py b/scripts/tests/sa_coupon_test.py new file mode 100644 index 0000000..82f8623 --- /dev/null +++ b/scripts/tests/sa_coupon_test.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +"""Render a trucking campaign body with coupon merge-tags filled in and score it +through SpamAssassin. Run on the prod host (has spamassassin + listmonk DB). + +Usage: python3 /tmp/sa_coupon_test.py +""" +import html +import re +import subprocess +import sys +from email.message import EmailMessage +from email.utils import formatdate, make_msgid + +CID = sys.argv[1] if len(sys.argv) > 1 else "186" +PCT = sys.argv[2] if len(sys.argv) > 2 else "40" +CODE = sys.argv[3] if len(sys.argv) > 3 else "KQ7MTN".replace("6", "K")[:5] or "KQMTN" + + +def psql(q): + out = subprocess.check_output( + ["docker", "exec", "performancewest-api-postgres-1", + "psql", "-U", "pw", "-d", "listmonk", "-tA", "-c", q], + text=True, + ) + return out.rstrip("\n") + + +subject = psql(f"SELECT subject FROM campaigns WHERE id={CID};") +from_email = psql(f"SELECT from_email FROM campaigns WHERE id={CID};") +body = psql(f"SELECT body FROM campaigns WHERE id={CID};") + +# --- Sample subscriber attribs (mirrors build_trucking_campaigns.coupon_attribs) --- +attribs = { + "dot_number": "1228791", + "company": "BLUE RIDGE FREIGHT LLC", + "state": "CT", + "lp_link": f"https://performancewest.net/order/mcs150-update?code={CODE}", + "coupon_code": CODE, + "coupon_pct": PCT, + "coupon_expires": "11:59 PM ET tonight", +} + +# --- Minimal listmonk template renderer: {{ if .Subscriber.Attribs.X }}..{{ else }}..{{ end }} + {{ .Subscriber.Attribs.X }} --- +def render(tpl: str, at: dict) -> str: + # Resolve {{ if .Subscriber.Attribs.key }}TRUE{{ else }}FALSE{{ end }} (no nesting). + pat = re.compile( + r"\{\{\s*if\s+\.Subscriber\.Attribs\.(\w+)\s*\}\}(.*?)" + r"(?:\{\{\s*else\s*\}\}(.*?))?\{\{\s*end\s*\}\}", + re.DOTALL, + ) + def repl(m): + key, t_branch, f_branch = m.group(1), m.group(2), m.group(3) or "" + return t_branch if str(at.get(key, "")).strip() else f_branch + prev = None + while prev != tpl: + prev = tpl + tpl = pat.sub(repl, tpl) + # Resolve simple value tags. + def val(m): + return html.escape(str(at.get(m.group(1), ""))) + tpl = re.sub(r"\{\{\s*\.Subscriber\.Attribs\.(\w+)\s*\}\}", val, tpl) + # Listmonk unsubscribe/tracking placeholders -> realistic stand-ins. + tpl = tpl.replace("{{ UnsubscribeURL }}", "https://performancewest.net/unsubscribe/abc123") + tpl = re.sub(r"\{\{[^}]*\}\}", "", tpl) # strip any remaining tags + return tpl + +rendered_subject = render(subject, attribs) +rendered_body = render(body, attribs) + +# --- Assemble a realistic MIME email (what the recipient's filter sees) --- +m_from = from_email +# from_email is like 'Performance West ' +msg = EmailMessage() +msg["From"] = m_from +msg["To"] = "dispatch@blueridgefreight.com" +msg["Subject"] = rendered_subject +msg["Date"] = formatdate(localtime=True) +msg["Message-ID"] = make_msgid(domain="performancewest.net") +msg["List-Unsubscribe"] = ", " +msg["List-Unsubscribe-Post"] = "List-Unsubscribe=One-Click" +msg["Precedence"] = "bulk" + +# plain-text altbody (strip tags crudely) + html +text_alt = re.sub(r"<[^>]+>", "", rendered_body) +text_alt = html.unescape(re.sub(r"\n{3,}", "\n\n", text_alt)).strip() +msg.set_content(text_alt) +msg.add_alternative(rendered_body, subtype="html") + +raw = msg.as_bytes() + +# --- Score through SpamAssassin (-t test mode; network tests skipped for determinism) --- +proc = subprocess.run( + ["spamassassin", "-t", "-L"], # -L = local tests only (no DNS/network), -t = test mode + input=raw, capture_output=True, +) +scored = proc.stdout.decode("utf-8", "replace") + +# Pull the score + the rule hits from the rewritten headers. +score_line = "" +rules = [] +for line in scored.splitlines(): + if line.startswith("X-Spam-Status:"): + score_line = line + if line.startswith("X-Spam-Report:") or line.startswith("\tpts") or re.match(r"^\s+[-0-9.]+\s+[A-Z0-9_]+\s", line): + rules.append(line.rstrip()) + +print("=" * 70) +print(f"Campaign {CID} arm={PCT}% code={CODE}") +print(f"Subject: {rendered_subject}") +print("=" * 70) +print(score_line or "(no X-Spam-Status header found)") +print("-" * 70) +m = re.search(r"score=([-0-9.]+)", score_line) +if m: + print(f"SCORE: {m.group(1)} (SpamAssassin default spam threshold = 5.0)") +# Show the detailed report block. +rep = re.search(r"X-Spam-Report:(.*?)(?:\nX-Spam-|\n\n)", scored, re.DOTALL) +if rep: + print(rep.group(0)) +else: + # Fallback: print any rule lines we collected. + for r in rules: + print(r) diff --git a/scripts/tests/sa_coupon_test_local.py b/scripts/tests/sa_coupon_test_local.py new file mode 100644 index 0000000..487e594 --- /dev/null +++ b/scripts/tests/sa_coupon_test_local.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +"""Render a trucking campaign body with coupon merge-tags filled in and score it +through local SpamAssassin (4.0.1). Reads the body HTML from a file. + +Usage: sa_coupon_local.py +""" +import html +import re +import subprocess +import sys +from email.message import EmailMessage +from email.utils import formatdate, make_msgid + +BODY_FILE = sys.argv[1] +SUBJECT_TPL = sys.argv[2] +PCT = sys.argv[3] if len(sys.argv) > 3 else "40" +CODE = sys.argv[4] if len(sys.argv) > 4 else "KQMTN" + +body = open(BODY_FILE, encoding="utf-8").read() + +attribs = { + "dot_number": "1228791", + "company": "BLUE RIDGE FREIGHT LLC", + "state": "CT", + "lp_link": f"https://performancewest.net/order/mcs150-update?code={CODE}", + "coupon_code": CODE, + "coupon_pct": PCT, + "coupon_expires": "11:59 PM ET tonight", +} + + +def render(tpl: str, at: dict) -> str: + pat = re.compile( + r"\{\{\s*if\s+\.Subscriber\.Attribs\.(\w+)\s*\}\}(.*?)" + r"(?:\{\{\s*else\s*\}\}(.*?))?\{\{\s*end\s*\}\}", + re.DOTALL, + ) + + def repl(m): + key, t_branch, f_branch = m.group(1), m.group(2), m.group(3) or "" + return t_branch if str(at.get(key, "")).strip() else f_branch + + prev = None + while prev != tpl: + prev = tpl + tpl = pat.sub(repl, tpl) + + def val(m): + return html.escape(str(at.get(m.group(1), ""))) + + tpl = re.sub(r"\{\{\s*\.Subscriber\.Attribs\.(\w+)\s*\}\}", val, tpl) + tpl = tpl.replace("{{ UnsubscribeURL }}", "https://performancewest.net/unsubscribe/abc123") + tpl = re.sub(r"\{\{[^}]*\}\}", "", tpl) + return tpl + + +rendered_subject = render(SUBJECT_TPL, attribs) +rendered_body = render(body, attribs) + +msg = EmailMessage() +msg["From"] = "Performance West " +msg["To"] = "dispatch@blueridgefreight.com" +msg["Subject"] = rendered_subject +msg["Date"] = formatdate(localtime=True) +msg["Message-ID"] = make_msgid(domain="performancewest.net") +msg["List-Unsubscribe"] = ", " +msg["List-Unsubscribe-Post"] = "List-Unsubscribe=One-Click" +msg["Precedence"] = "bulk" + +text_alt = re.sub(r"<[^>]+>", "", rendered_body) +text_alt = html.unescape(re.sub(r"\n{3,}", "\n\n", text_alt)).strip() +msg.set_content(text_alt) +msg.add_alternative(rendered_body, subtype="html") + +raw = msg.as_bytes() + +proc = subprocess.run(["spamassassin", "-t", "-L"], input=raw, capture_output=True) +scored = proc.stdout.decode("utf-8", "replace") + +score_line = "" +for line in scored.splitlines(): + if line.startswith("X-Spam-Status:"): + score_line = line + break + +m = re.search(r"score=([-0-9.]+)", score_line) +score = m.group(1) if m else "?" +verdict = "SPAM" if score_line.startswith("X-Spam-Status: Yes") else "HAM" + +print(f"arm={PCT}% code={CODE} -> SCORE {score} [{verdict}] subj: {rendered_subject}") + +# Detailed rule report. +rep = re.search(r"X-Spam-Report:(.*?)\n(?:[A-Za-z-]+:|\n)", scored, re.DOTALL) +if "--verbose" in sys.argv: + if rep: + print(rep.group(1)) + else: + for line in scored.splitlines(): + if re.match(r"^\s*[-0-9.]+\s+[A-Z0-9_]+", line): + print(" ", line.strip()) diff --git a/scripts/tests/test_coupon_ab.py b/scripts/tests/test_coupon_ab.py new file mode 100644 index 0000000..72f1223 --- /dev/null +++ b/scripts/tests/test_coupon_ab.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 +"""Unit tests for the trucking same-day coupon A/B/C price test. + +Verifies that CAMPAIGN_COUPON_AB_PCTS produces an even, stable per-email split, +that the advertised percent always matches the arm's actual code (so the email +never promises a discount checkout won't honor), and that the off / single-arm +states behave. Pure-function tests, no DB required (psycopg2 is stubbed). + +Run: CAMPAIGN_COUPON_AB_PCTS="20,30,40" python3 scripts/tests/test_coupon_ab.py +""" +from __future__ import annotations + +import importlib.util +import os +import sys +import types +from collections import Counter +from pathlib import Path + +os.environ.setdefault("CAMPAIGN_COUPON_AB_PCTS", "20,30,40") + +# Stub the DB driver so the builder imports without a live Postgres. +sys.modules.setdefault("psycopg2", types.ModuleType("psycopg2")) + +_SCRIPT = Path(__file__).resolve().parents[1] / "build_trucking_campaigns.py" +_spec = importlib.util.spec_from_file_location("btc", _SCRIPT) +btc = importlib.util.module_from_spec(_spec) +_spec.loader.exec_module(btc) + + +def test_arms_parsed(): + assert btc.COUPON_AB_PCTS == (20, 30, 40) + + +def test_even_stable_and_code_matches_pct(): + coupons = {20: "AAAAA", 30: "BBBBB", 40: "CCCCC"} + emails = [f"user{i}@carrier{i % 500}.com" for i in range(30000)] + counts: Counter = Counter() + for e in emails: + code, pct = btc.pick_coupon_for_email(e, coupons) + counts[pct] += 1 + # Advertised percent must match the arm's real code. + assert coupons[int(pct)] == code + # Stable: same input -> same arm. + assert btc.pick_coupon_for_email(e, coupons) == (code, pct) + total = sum(counts.values()) + # Each arm should be within ~2 points of an even third. + for pct in ("20", "30", "40"): + share = counts[pct] / total + assert 0.31 <= share <= 0.353, (pct, share) + + +def test_off_state(): + assert btc.pick_coupon_for_email("x@y.com", None) == ("", "") + assert btc.pick_coupon_for_email("x@y.com", {}) == ("", "") + + +def test_single_arm(): + assert btc.pick_coupon_for_email("a@b.com", {40: "ZZZZZ"}) == ("ZZZZZ", "40") + + +def test_coupon_attribs_reflects_pct(): + a = btc.coupon_attribs("BBBBB", "30") + assert a["coupon_code"] == "BBBBB" + assert a["coupon_pct"] == "30" + assert btc.coupon_attribs("", "30") == { + "coupon_code": "", "coupon_pct": "", "coupon_expires": "" + } + + +if __name__ == "__main__": + fns = [v for k, v in sorted(globals().items()) if k.startswith("test_")] + for fn in fns: + fn() + print(f"ok {fn.__name__}") + print(f"\nAll {len(fns)} coupon A/B tests passed.")