email: handle unquoted hrefs in plaintext converter + add tests

The anchor regex only matched quoted hrefs; unquoted (href=URL) dropped the URL from the plaintext part. Now handles double/single/unquoted. Added scripts/test_email_plaintext.py (11 cases: link forms, mailto, template-tag preservation, tag stripping, entity unescape, blank-line collapse).
2026-06-17 20:28:15 -05:00 · 2026-06-17 20:28:15 -05:00 · 466460112b
commit 466460112b
parent 4dc5690666
2 changed files with 95 additions and 4 deletions
--- a/scripts/_email_plaintext.py
+++ b/scripts/_email_plaintext.py
@ -39,9 +39,13 @@ _DROP_BLOCKS = re.compile(
 )
 # HTML comments (Listmonk/MSO conditional comments etc.).
 _COMMENTS = re.compile(r"<!--.*?-->", _RE_FLAGS)
-# <a href="URL" ...>TEXT</a>  ->  TEXT (URL)   (skip mailto:/tel:/anchors/templated)
+# <a href="URL" ...>TEXT</a>  ->  TEXT (URL)   (skip mailto:/tel:/anchors)
+# Handles quoted ("..." / '...') and unquoted (href=URL) hrefs.
 _ANCHORS = re.compile(
-    r'<a\b[^>]*?\bhref\s*=\s*["\']([^"\']+)["\'][^>]*>(.*?)</a>', _RE_FLAGS
+    r'<a\b[^>]*?\bhref\s*=\s*'
+    r'(?:"([^"]*)"|\'([^\']*)\'|([^\s">]+))'
+    r'[^>]*>(.*?)</a>',
+    _RE_FLAGS,
 )
 # Tags that should become a line break.
 _BR = re.compile(r"<br\s*/?>", re.IGNORECASE)
@ -58,8 +62,9 @@ _MANY_SPACES = re.compile(r"[ \t]{2,}")


 def _anchor_repl(m: "re.Match[str]") -> str:
-    url = m.group(1).strip()
-    text = _ANY_TAG.sub("", m.group(2)).strip()
+    # href is whichever of the 3 alternatives matched (double/single/unquoted).
+    url = (m.group(1) or m.group(2) or m.group(3) or "").strip()
+    text = _ANY_TAG.sub("", m.group(4)).strip()
    low = url.lower()
    # mailto:/tel: -> surface the address (with link text if it adds info).
    # Bare in-page anchors -> keep text only. Templated hrefs (e.g.