mail: DMARC aggregate-report parser + dedicated dmarc@ mailbox ingestion
Tool 2 of the deliverability monitoring pair (Tool 1 = mail_reputation_monitor). DMARC rua reports from dozens of operators (Google, Yahoo, Comcast, Cox, Bell, Mimecast, Cisco ESA, GMX, mail.com, ...) were landing in ops@ (dmarc@ was a DL), burying real mail and never parsed. Now ingested + queryable: - dmarc@performancewest.net converted DL -> dedicated Carbonio mailbox; isolated IMAP creds in server .env, surfaced to workers in docker-compose.yml (mirrors OPS_IMAP_*). 29 historical reports moved ops@ -> dmarc@ via IMAP. - scripts/dmarc_report_parser.py: IMAP fetch unseen -> decompress .gz/.zip/.xml (namespace-agnostic: classic + urn:ietf:params:xml:ns:dmarc-2.0 GMX/mail.com) -> parse aggregate XML -> upsert dmarc_report (keyed (org_name,report_id), no-op on re-parse) + dmarc_record per source IP. dmarc_pass = dkim_aligned OR spf_aligned. Marks \Seen. --dry-run/--all/--alert (7d per-IP summary + Telegram if one of OUR IPs <95% pass, or EXTERNAL IP sends >=20 failing msgs as us = spoofing under p=reject). psycopg2 imported lazily so --dry-run runs without the driver. - api/migrations/102_dmarc_aggregate.sql: dmarc_report + dmarc_record tables. - infra/cron/pw-dmarc-parser: 06:20 UTC daily --alert (after reputation, before scrub). - docs/deliverability.md: DMARC section DONE; query examples. Verified: dry-run --all parses all 28 reports (1 non-report test probe), 0 unknown after the namespace fix.
This commit is contained in:
parent
b45332b5f7
commit
8e5590b492
5 changed files with 509 additions and 8 deletions
66
api/migrations/102_dmarc_aggregate.sql
Normal file
66
api/migrations/102_dmarc_aggregate.sql
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
-- DMARC aggregate (rua) report ingestion.
|
||||
--
|
||||
-- WHY: DMARC aggregate reports (RFC 7489) are the authoritative, cross-operator
|
||||
-- view of who is sending mail AS us (header_from = performancewest.net / its
|
||||
-- subdomains) and whether that mail passes SPF + DKIM alignment. Every major
|
||||
-- receiver (Google, Yahoo, Comcast, Cox, Bell, Mimecast, Cisco ESA, GMX, mail.com,
|
||||
-- ...) emails one zipped/gzipped XML per day to rua=mailto:dmarc@performancewest.net.
|
||||
-- Reading them by hand is hopeless (dozens/day). This turns them into queryable
|
||||
-- per-source-IP / per-domain SPF+DKIM+DMARC pass-fail trends so we can SEE:
|
||||
-- * our own senders (.94 bulk / .107 hcout / .71 transactional / .15 relay) all
|
||||
-- passing alignment (DKIM d=send. selector send, d=root selector mail) -- the
|
||||
-- deliverability fixes this session were exactly about this; and
|
||||
-- * any UNKNOWN IP sending as us that fails -- i.e. spoofing or a forgotten relay,
|
||||
-- which is reputation poison under p=reject.
|
||||
--
|
||||
-- Populated by scripts/dmarc_report_parser.py (IMAP fetch dmarc@ -> unzip -> parse
|
||||
-- XML -> upsert). Idempotent: each report is keyed by (org_name, report_id) and
|
||||
-- re-parsing the same report is a no-op (ON CONFLICT DO NOTHING).
|
||||
|
||||
-- One row per aggregate report (the <report_metadata> + <policy_published>).
|
||||
CREATE TABLE IF NOT EXISTS dmarc_report (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
org_name TEXT NOT NULL, -- reporting operator (google.com, yahoo.com, ...)
|
||||
org_email TEXT, -- contact email from the report
|
||||
report_id TEXT NOT NULL, -- operator's unique report id
|
||||
date_begin TIMESTAMPTZ, -- report window start (from epoch)
|
||||
date_end TIMESTAMPTZ, -- report window end
|
||||
policy_domain TEXT, -- <policy_published><domain>
|
||||
policy_p TEXT, -- published policy: none|quarantine|reject
|
||||
policy_sp TEXT, -- subdomain policy
|
||||
policy_adkim TEXT, -- DKIM alignment mode r|s
|
||||
policy_aspf TEXT, -- SPF alignment mode r|s
|
||||
policy_pct INTEGER, -- % of mail policy applies to
|
||||
received_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
UNIQUE (org_name, report_id)
|
||||
);
|
||||
|
||||
-- One row per <record> inside a report (a distinct source_ip + auth result combo).
|
||||
CREATE TABLE IF NOT EXISTS dmarc_record (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
report_id BIGINT NOT NULL REFERENCES dmarc_report(id) ON DELETE CASCADE,
|
||||
source_ip TEXT NOT NULL, -- the IP that sent the mail
|
||||
msg_count INTEGER NOT NULL DEFAULT 0, -- messages from this IP in the window
|
||||
disposition TEXT, -- DMARC disposition applied: none|quarantine|reject
|
||||
dkim_aligned TEXT, -- policy_evaluated/dkim: pass|fail
|
||||
spf_aligned TEXT, -- policy_evaluated/spf: pass|fail
|
||||
dmarc_pass BOOLEAN, -- derived: dkim_aligned=pass OR spf_aligned=pass
|
||||
header_from TEXT, -- identifiers/header_from
|
||||
envelope_from TEXT, -- identifiers/envelope_from
|
||||
dkim_domain TEXT, -- auth_results/dkim/domain
|
||||
dkim_selector TEXT, -- auth_results/dkim/selector
|
||||
dkim_result TEXT, -- auth_results/dkim/result (raw)
|
||||
spf_domain TEXT, -- auth_results/spf/domain
|
||||
spf_result TEXT -- auth_results/spf/result (raw)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_dmarc_report_window ON dmarc_report (date_begin);
|
||||
CREATE INDEX IF NOT EXISTS idx_dmarc_report_org ON dmarc_report (org_name);
|
||||
CREATE INDEX IF NOT EXISTS idx_dmarc_record_report ON dmarc_record (report_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_dmarc_record_ip ON dmarc_record (source_ip);
|
||||
CREATE INDEX IF NOT EXISTS idx_dmarc_record_fail ON dmarc_record (dmarc_pass) WHERE dmarc_pass = false;
|
||||
|
||||
COMMENT ON TABLE dmarc_report IS
|
||||
'DMARC aggregate (rua) report headers. One row per operator report, keyed (org_name, report_id). Populated by scripts/dmarc_report_parser.py.';
|
||||
COMMENT ON TABLE dmarc_record IS
|
||||
'Per-source-IP rows inside each DMARC aggregate report: SPF/DKIM alignment + raw auth results. dmarc_pass = dkim_aligned=pass OR spf_aligned=pass.';
|
||||
Loading…
Add table
Add a link
Reference in a new issue