email: add plaintext MIME part + stable Message-ID hostname
Two deliverability hardening fixes from the email audit:
1. Plaintext (altbody): all campaigns were HTML-only. Listmonk only emits
multipart/alternative when altbody is set, and HTML-only bulk mail is a
spam-score signal. New scripts/_email_plaintext.py renders a readable
text/plain part from the HTML body (dependency-free; preserves Listmonk
{{ .Subscriber }}/{{ UnsubscribeURL }} template tags, turns links into
'text (url)'). Wired into the trucking builder (and thus UCR + IFTA, which
reuse create_and_schedule_campaign) and the healthcare builder.
2. Stable container hostname: Listmonk derived its Message-ID from the random
docker container id -> @localhost.localdomain (spam-score signal). Pin both
listmonk + listmonk-hc hostname to perfwest.performancewest.net, matching
Listmonk's SMTP hello_hostname.
Part of the email-deliverability incident hardening.
This commit is contained in:
parent
2e4388a803
commit
a32a3b05a0
4 changed files with 133 additions and 3 deletions
105
scripts/_email_plaintext.py
Normal file
105
scripts/_email_plaintext.py
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
"""Shared HTML -> plaintext conversion for outbound campaigns.
|
||||
|
||||
Every campaign we build was HTML-only (no plaintext MIME part). A missing
|
||||
text/plain alternative is a spam-score signal: legitimate bulk senders ship
|
||||
multipart/alternative, and several filters (and most "this looks like spam"
|
||||
heuristics) penalise HTML-only mail. It also degrades the experience for
|
||||
plaintext-only clients and accessibility tooling.
|
||||
|
||||
Listmonk only emits multipart/alternative when a campaign's `altbody` is set;
|
||||
otherwise it sends text/html alone. So we generate a readable plaintext
|
||||
rendition from the HTML body and pass it as `altbody`.
|
||||
|
||||
This is intentionally dependency-free (no bs4/html2text on the prod box): a
|
||||
small, well-tested regex pipeline that:
|
||||
- drops <script>/<style>/<head> blocks,
|
||||
- turns <a href=...>text</a> into "text (url)" so links survive,
|
||||
- maps <br>, </p>, </div>, <li>, headings, <tr> to newlines,
|
||||
- prefixes <li> with "- ",
|
||||
- strips all remaining tags,
|
||||
- unescapes HTML entities,
|
||||
- collapses runs of blank lines / trailing whitespace.
|
||||
|
||||
Listmonk template tags ({{ .Subscriber... }}, {{ UnsubscribeURL }}) are left
|
||||
untouched so they still render per-subscriber in the plaintext part too.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import html as _html
|
||||
import re
|
||||
|
||||
__all__ = ["html_to_text"]
|
||||
|
||||
_RE_FLAGS = re.IGNORECASE | re.DOTALL
|
||||
|
||||
# Whole blocks whose *content* must be discarded, not just the tags.
|
||||
_DROP_BLOCKS = re.compile(
|
||||
r"<(script|style|head|title|noscript)\b[^>]*>.*?</\1>", _RE_FLAGS
|
||||
)
|
||||
# HTML comments (Listmonk/MSO conditional comments etc.).
|
||||
_COMMENTS = re.compile(r"<!--.*?-->", _RE_FLAGS)
|
||||
# <a href="URL" ...>TEXT</a> -> TEXT (URL) (skip mailto:/tel:/anchors/templated)
|
||||
_ANCHORS = re.compile(
|
||||
r'<a\b[^>]*?\bhref\s*=\s*["\']([^"\']+)["\'][^>]*>(.*?)</a>', _RE_FLAGS
|
||||
)
|
||||
# Tags that should become a line break.
|
||||
_BR = re.compile(r"<br\s*/?>", re.IGNORECASE)
|
||||
_BLOCK_END = re.compile(
|
||||
r"</(p|div|h[1-6]|tr|table|ul|ol|blockquote|section|header|footer)>",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_LI = re.compile(r"<li\b[^>]*>", re.IGNORECASE)
|
||||
_HR = re.compile(r"<hr\s*/?>", re.IGNORECASE)
|
||||
_ANY_TAG = re.compile(r"<[^>]+>")
|
||||
_MANY_BLANKS = re.compile(r"\n[ \t]*\n[ \t]*(\n[ \t]*)+")
|
||||
_TRAIL_WS = re.compile(r"[ \t]+\n")
|
||||
_MANY_SPACES = re.compile(r"[ \t]{2,}")
|
||||
|
||||
|
||||
def _anchor_repl(m: "re.Match[str]") -> str:
|
||||
url = m.group(1).strip()
|
||||
text = _ANY_TAG.sub("", m.group(2)).strip()
|
||||
low = url.lower()
|
||||
# mailto:/tel: -> surface the address (with link text if it adds info).
|
||||
# Bare in-page anchors -> keep text only. Templated hrefs (e.g.
|
||||
# {{ UnsubscribeURL }}) ARE kept as "text (url)" so the per-subscriber link
|
||||
# still renders in the plaintext part.
|
||||
if low.startswith(("mailto:", "tel:")):
|
||||
addr = url.split(":", 1)[1].split("?", 1)[0]
|
||||
if text and text != addr:
|
||||
return f"{text} ({addr})"
|
||||
return addr
|
||||
if low.startswith("#"):
|
||||
return text
|
||||
if not text:
|
||||
return url
|
||||
if text == url:
|
||||
return url
|
||||
return f"{text} ({url})"
|
||||
|
||||
|
||||
def html_to_text(html: str) -> str:
|
||||
"""Convert an HTML email body to a readable text/plain rendition.
|
||||
|
||||
Returns "" for empty input. Listmonk template tags are preserved verbatim.
|
||||
"""
|
||||
if not html:
|
||||
return ""
|
||||
s = html
|
||||
s = _DROP_BLOCKS.sub("", s)
|
||||
s = _COMMENTS.sub("", s)
|
||||
s = _ANCHORS.sub(_anchor_repl, s)
|
||||
s = _HR.sub("\n----------\n", s)
|
||||
s = _BR.sub("\n", s)
|
||||
s = _LI.sub("\n- ", s)
|
||||
s = _BLOCK_END.sub("\n", s)
|
||||
s = _ANY_TAG.sub("", s)
|
||||
s = _html.unescape(s)
|
||||
# Normalise whitespace: trim trailing spaces, collapse runs of spaces and
|
||||
# blank lines, strip leading/trailing blank space overall.
|
||||
s = _TRAIL_WS.sub("\n", s)
|
||||
s = _MANY_SPACES.sub(" ", s)
|
||||
s = _MANY_BLANKS.sub("\n\n", s)
|
||||
s = "\n".join(line.rstrip() for line in s.splitlines())
|
||||
return s.strip() + "\n"
|
||||
Loading…
Add table
Add a link
Reference in a new issue