#!/usr/bin/env bash
# End-to-end test for the analytics email-scanner / headless filter.
#
# Verifies pw-bot-filter.js (a) correctly flags headless/automated browsers as
# bots across several scanner disguises, and (b) wires the Umami before-send
# hook so flagged traffic is dropped. Run from the repo root:
#
# bash site/tests/bot-filter.test.sh
#
# Requires: chromium (or google-chrome) + python3. Skips cleanly if absent.
set -uo pipefail
FILTER="$(cd "$(dirname "$0")/.." && pwd)/public/js/pw-bot-filter.js"
CHROME="$(command -v chromium || command -v chromium-browser || command -v google-chrome || true)"
if [ -z "$CHROME" ]; then echo "SKIP: no chromium/chrome found"; exit 0; fi
[ -f "$FILTER" ] || { echo "FAIL: $FILTER missing"; exit 1; }
WORK="$(mktemp -d)"
SRV_PID=""
cleanup() { [ -n "$SRV_PID" ] && kill "$SRV_PID" 2>/dev/null; rm -rf "$WORK"; }
trap cleanup EXIT
cp "$FILTER" "$WORK/pw-bot-filter.js"
cat > "$WORK/probe.html" <<'HTML'
t
HTML
( cd "$WORK" && exec python3 -m http.server 8791 >/dev/null 2>&1 ) &
SRV_PID=$!
sleep 1
run() { # args -> JSON from page title
"$CHROME" --headless=new --no-sandbox --disable-dev-shm-usage "$@" \
--dump-dom "http://localhost:8791/probe.html" 2>/dev/null \
| grep -oE '\{.*\}' | head -1
}
PASS=0; FAIL=0
check() { # name expr_json key wantbool
local name="$1" json="$2"
local got; got="$(printf '%s' "$json" | python3 -c "import sys,json;print(json.loads(sys.stdin.read()).get('isBot'))" 2>/dev/null)"
if [ "$got" = "True" ]; then echo "PASS $name (isBot=True)"; PASS=$((PASS+1));
else echo "FAIL $name (isBot=$got; json=$json)"; FAIL=$((FAIL+1)); fi
}
# 1. default headless-new
check "headless-new default" "$(run)"
# 2. spoofed Windows UA + normal window (sophisticated scanner)
check "spoofed UA + normal window" "$(run --window-size=1366,768 \
--user-agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')"
# 3. spoofed UA + tiny window
check "spoofed UA + tiny window" "$(run --window-size=1,1 \
--user-agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36')"
# hook wiring on the default run
HJSON="$(run)"
HK="$(printf '%s' "$HJSON" | python3 -c "import sys,json;d=json.loads(sys.stdin.read());print(d.get('hook'),d.get('drop'))" 2>/dev/null)"
if [ "$HK" = "function True" ]; then echo "PASS umamiBeforeSend drops bot events"; PASS=$((PASS+1));
else echo "FAIL hook wiring ($HK)"; FAIL=$((FAIL+1)); fi
echo ""; echo "$PASS passed, $FAIL failed"
[ "$FAIL" -eq 0 ]