#!/usr/bin/env bash # End-to-end test for the analytics email-scanner / headless filter. # # Verifies pw-bot-filter.js (a) correctly flags headless/automated browsers as # bots across several scanner disguises, and (b) wires the Umami before-send # hook so flagged traffic is dropped. Run from the repo root: # # bash site/tests/bot-filter.test.sh # # Requires: chromium (or google-chrome) + python3. Skips cleanly if absent. set -uo pipefail FILTER="$(cd "$(dirname "$0")/.." && pwd)/public/js/pw-bot-filter.js" CHROME="$(command -v chromium || command -v chromium-browser || command -v google-chrome || true)" if [ -z "$CHROME" ]; then echo "SKIP: no chromium/chrome found"; exit 0; fi [ -f "$FILTER" ] || { echo "FAIL: $FILTER missing"; exit 1; } WORK="$(mktemp -d)" SRV_PID="" cleanup() { [ -n "$SRV_PID" ] && kill "$SRV_PID" 2>/dev/null; rm -rf "$WORK"; } trap cleanup EXIT cp "$FILTER" "$WORK/pw-bot-filter.js" cat > "$WORK/probe.html" <<'HTML' t HTML ( cd "$WORK" && exec python3 -m http.server 8791 >/dev/null 2>&1 ) & SRV_PID=$! sleep 1 run() { # args -> JSON from page title "$CHROME" --headless=new --no-sandbox --disable-dev-shm-usage "$@" \ --dump-dom "http://localhost:8791/probe.html" 2>/dev/null \ | grep -oE '\{.*\}' | head -1 } PASS=0; FAIL=0 check() { # name expr_json key wantbool local name="$1" json="$2" local got; got="$(printf '%s' "$json" | python3 -c "import sys,json;print(json.loads(sys.stdin.read()).get('isBot'))" 2>/dev/null)" if [ "$got" = "True" ]; then echo "PASS $name (isBot=True)"; PASS=$((PASS+1)); else echo "FAIL $name (isBot=$got; json=$json)"; FAIL=$((FAIL+1)); fi } # 1. default headless-new check "headless-new default" "$(run)" # 2. spoofed Windows UA + normal window (sophisticated scanner) check "spoofed UA + normal window" "$(run --window-size=1366,768 \ --user-agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')" # 3. spoofed UA + tiny window check "spoofed UA + tiny window" "$(run --window-size=1,1 \ --user-agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36')" # hook wiring on the default run HJSON="$(run)" HK="$(printf '%s' "$HJSON" | python3 -c "import sys,json;d=json.loads(sys.stdin.read());print(d.get('hook'),d.get('drop'))" 2>/dev/null)" if [ "$HK" = "function True" ]; then echo "PASS umamiBeforeSend drops bot events"; PASS=$((PASS+1)); else echo "FAIL hook wiring ($HK)"; FAIL=$((FAIL+1)); fi echo ""; echo "$PASS passed, $FAIL failed" [ "$FAIL" -eq 0 ]