Includes: API (Express/TypeScript), Astro site, Python workers, document generators, FCC compliance tools, Canada CRTC formation, Ansible infrastructure, and deployment scripts. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
551 lines
21 KiB
Python
551 lines
21 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
reddit-monitor.py — Monitor Reddit for compliance-related questions relevant to
|
|
Performance West, generate helpful replies with Ollama (qwen2.5:3b), and post them.
|
|
|
|
Targets: r/smallbusiness, r/Entrepreneur, r/tax, r/legaladvice,
|
|
r/Bookkeeping, r/accounting, r/humanresources, r/QuickBooks, r/IRS,
|
|
r/ecommerce, r/marketing, r/realestateinvesting, r/restaurateur,
|
|
r/construction, r/antiwork, r/EmploymentLaw, r/freelance, r/startups,
|
|
r/payroll
|
|
|
|
State: ~/.reddit-monitor-state.json
|
|
Log: ~/logs/reddit-monitor.log
|
|
"""
|
|
|
|
import os, sys, json, time, re, random, logging, urllib.request, urllib.parse, fcntl
|
|
from pathlib import Path
|
|
|
|
# Single-instance lock — acquire before anything else including logging setup
|
|
_LOCK_FILE = open("/tmp/reddit-monitor.lock", "w")
|
|
try:
|
|
fcntl.flock(_LOCK_FILE, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
except OSError:
|
|
sys.exit(0) # Another instance running — exit silently
|
|
|
|
sys.path.insert(0, os.path.dirname(__file__))
|
|
from alert import alert_account_broken
|
|
from product_facts import get_product_facts
|
|
import ollama_client
|
|
from gap_tracker import log_gap
|
|
from datetime import datetime, timezone
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Config
|
|
# ---------------------------------------------------------------------------
|
|
|
|
REDDIT_CLIENT_ID = os.environ.get("REDDIT_CLIENT_ID", "")
|
|
REDDIT_CLIENT_SECRET = os.environ.get("REDDIT_CLIENT_SECRET", "")
|
|
REDDIT_USERNAME = os.environ.get("REDDIT_USERNAME", "")
|
|
REDDIT_PASSWORD = os.environ.get("REDDIT_PASSWORD", "")
|
|
REDDIT_USER_AGENT = "PerfWestBot/1.0 (by /u/performancewest)"
|
|
|
|
STATE_FILE = Path.home() / ".reddit-monitor-state.json"
|
|
LOG_DIR = Path.home() / "logs"
|
|
LOG_DIR.mkdir(exist_ok=True)
|
|
|
|
# Rate limits
|
|
MAX_REPLIES_PER_RUN = 3
|
|
MAX_REPLIES_PER_SUBREDDIT = 1
|
|
PAUSE_BETWEEN_MIN = 5 # minutes
|
|
PAUSE_BETWEEN_MAX = 15 # minutes
|
|
DAILY_LIMIT = 10
|
|
MAX_AGE_DAYS = 7
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Subreddits to monitor
|
|
# ---------------------------------------------------------------------------
|
|
|
|
SUBREDDITS = [
|
|
# TIER 1 — Highest volume, business owners asking compliance questions
|
|
"smallbusiness", # 470K — constant contractor/LLC/compliance posts
|
|
"Entrepreneur", # 470K — formation, contractor, privacy questions
|
|
"tax", # 841K — 1099 vs W-2 daily, misclassification gold
|
|
"legaladvice", # 1.6M — employee-side misclassif posts (shows employer risk)
|
|
# TIER 2 — Professionals who refer clients + direct compliance Q&A
|
|
"Bookkeeping", # 75K — 1099 processing, payroll compliance, QBO/Xero
|
|
"accounting", # 1.2M — broad but huge; contractor classification
|
|
"humanresources", # 107K — FLSA, handbooks, discrimination, HR policies
|
|
"QuickBooks", # 37K — payroll/1099 compliance in QB context
|
|
"IRS", # 442K — enforcement notices, compliance questions
|
|
# TIER 3 — Industry-specific (highest misclassification/wage-hour risk)
|
|
"ecommerce", # 91K — CCPA, privacy policies, SMS marketing
|
|
"marketing", # 141K — TCPA, SMS consent, DNC
|
|
"realestateinvesting", # contractor classification, entity formation
|
|
"restaurateur", # wage-hour violations (huge in food service)
|
|
"construction", # contractor misclassification (#1 violating industry)
|
|
"antiwork", # 1.6M — misclassif/wage theft posts get massive engagement
|
|
"EmploymentLaw", # 7.1K — small but 100% signal, every post is compliance
|
|
"freelance", # the "other side" of contractor misclassification
|
|
"startups", # 1.2M — business formation, early compliance
|
|
"payroll", # payroll tax compliance, misclassification
|
|
]
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Keyword triggers by compliance category
|
|
# ---------------------------------------------------------------------------
|
|
|
|
COMPLIANCE_KEYWORDS = {
|
|
"flsa": [
|
|
"FLSA", "wage and hour", "overtime violation", "exempt vs nonexempt",
|
|
"minimum wage", "off the clock", "meal break violation",
|
|
"unpaid overtime", "salary threshold", "wage theft",
|
|
"DOL audit", "Department of Labor",
|
|
],
|
|
"misclassification": [
|
|
"1099 vs W-2", "1099 vs W2", "independent contractor",
|
|
"misclassification", "misclassified", "contractor or employee",
|
|
"IC vs employee", "gig worker classification",
|
|
"pay contractor", "paying 1099", "1099 worker",
|
|
"contractor to employee", "should I 1099",
|
|
],
|
|
"discrimination": [
|
|
"workplace discrimination", "harassment policy", "Title VII",
|
|
"ADA compliance", "hostile work environment", "DEI policy",
|
|
"pay equity", "retaliation claim", "EEOC",
|
|
],
|
|
"privacy": [
|
|
"CCPA", "CPRA", "privacy policy", "data privacy", "opt-out request",
|
|
"cookie consent", "data breach notification", "biometric data",
|
|
"privacy compliance", "do not sell", "consumer rights request",
|
|
],
|
|
"tcpa": [
|
|
"TCPA", "robocall", "SMS marketing", "text message consent",
|
|
"do not call", "DNC list", "autodialer",
|
|
"prior express written consent", "one-to-one consent",
|
|
"SMS campaign sued", "text marketing compliance",
|
|
],
|
|
"corporate": [
|
|
"LLC formation", "form an LLC", "register a business",
|
|
"annual report filing", "registered agent", "foreign qualification",
|
|
"state registration", "business formation", "incorporate",
|
|
"S-Corp election", "C-Corp vs S-Corp", "EIN",
|
|
"operating agreement", "good standing",
|
|
],
|
|
"telecom": [
|
|
"FCC 499A", "STIR/SHAKEN", "telecom compliance",
|
|
"IPES registration", "ISP registration", "robocall attestation",
|
|
"FCC registration", "CLEC", "telecom license",
|
|
],
|
|
"payroll": [
|
|
"payroll compliance", "payroll tax", "W-4", "Form 941",
|
|
"employer taxes", "FUTA", "SUTA", "withholding",
|
|
"QuickBooks payroll", "Xero payroll", "payroll setup",
|
|
],
|
|
}
|
|
|
|
# Flatten all keywords for quick scanning
|
|
ALL_KEYWORDS = []
|
|
for kws in COMPLIANCE_KEYWORDS.values():
|
|
ALL_KEYWORDS.extend(kws)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# System prompt
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def build_system_prompt() -> str:
|
|
return f"""You are Justin, the owner of Performance West (https://performancewest.net),
|
|
a compliance consulting firm helping US small and mid-size businesses navigate
|
|
employment, privacy, TCPA, corporate, and telecom compliance.
|
|
|
|
=== PRODUCT FACTS (authoritative — use these exactly, never claim anything not listed) ===
|
|
{get_product_facts()}
|
|
=== END PRODUCT FACTS ===
|
|
|
|
You are replying to a Reddit post where someone has a compliance-related question.
|
|
|
|
QUALIFICATION RULES — if skipping, respond ONLY with "SKIP: <one sentence reason>".
|
|
Skip if ANY of these are true:
|
|
- The person needs legal advice or legal representation (we are consultants, not attorneys)
|
|
- The person mentions they already have an attorney handling this
|
|
- The person is involved in active litigation (plaintiff or defendant)
|
|
- The person is outside the US (we only serve US businesses)
|
|
- The person is at a large enterprise (500+ employees — not our market)
|
|
- The question is about tax preparation or CPA-level tax advice
|
|
- The question is clearly academic or a student assignment
|
|
- Performance West's services would not meaningfully help their specific problem
|
|
- The compliance area is NOT covered by our services
|
|
|
|
REPLY RULES (only if not SKIPped):
|
|
- Be genuinely helpful and educational — answer their question first
|
|
- Explain the compliance concept clearly in plain language
|
|
- Only mention Performance West if it's directly relevant to their situation
|
|
- If we have a free tool (FLSA calculator, privacy policy generator, contractor quiz),
|
|
mention it naturally — people love free resources
|
|
- Never provide legal advice or say "you should do X" — instead explain what the
|
|
regulations generally require and suggest they get professional guidance
|
|
- Keep it conversational and helpful, not salesy
|
|
- Stay under 250 words
|
|
- Sign off with a new line and "-- Justin"
|
|
- Do NOT use markdown headers or bullet lists — Reddit comments should feel natural"""
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Logging
|
|
# ---------------------------------------------------------------------------
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="[%(asctime)s] %(message)s",
|
|
datefmt="%Y-%m-%d %H:%M:%S",
|
|
handlers=[
|
|
logging.StreamHandler(sys.stdout),
|
|
logging.FileHandler(LOG_DIR / "reddit-monitor.log"),
|
|
],
|
|
)
|
|
log = logging.getLogger(__name__)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# State
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def load_state():
|
|
if STATE_FILE.exists():
|
|
try:
|
|
return json.loads(STATE_FILE.read_text())
|
|
except Exception:
|
|
pass
|
|
return {
|
|
"seen_ids": [],
|
|
"replied_ids": [],
|
|
"daily_count": 0,
|
|
"daily_reset": "",
|
|
"subreddit_counts": {},
|
|
}
|
|
|
|
def save_state(state):
|
|
STATE_FILE.write_text(json.dumps(state, indent=2))
|
|
|
|
def reset_daily_if_needed(state):
|
|
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
|
if state.get("daily_reset") != today:
|
|
state["daily_count"] = 0
|
|
state["daily_reset"] = today
|
|
state["subreddit_counts"] = {}
|
|
return state
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Reddit OAuth
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_access_token = None
|
|
_token_expiry = 0
|
|
|
|
def reddit_auth():
|
|
"""Get Reddit OAuth2 access token using script-type app credentials."""
|
|
global _access_token, _token_expiry
|
|
|
|
if _access_token and time.time() < _token_expiry:
|
|
return _access_token
|
|
|
|
if not REDDIT_CLIENT_ID or not REDDIT_CLIENT_SECRET:
|
|
log.error("Reddit credentials not set in environment")
|
|
return None
|
|
|
|
data = urllib.parse.urlencode({
|
|
"grant_type": "password",
|
|
"username": REDDIT_USERNAME,
|
|
"password": REDDIT_PASSWORD,
|
|
}).encode()
|
|
|
|
# HTTP Basic auth with client_id:client_secret
|
|
import base64
|
|
credentials = base64.b64encode(f"{REDDIT_CLIENT_ID}:{REDDIT_CLIENT_SECRET}".encode()).decode()
|
|
|
|
req = urllib.request.Request(
|
|
"https://www.reddit.com/api/v1/access_token",
|
|
data=data,
|
|
headers={
|
|
"Authorization": f"Basic {credentials}",
|
|
"User-Agent": REDDIT_USER_AGENT,
|
|
"Content-Type": "application/x-www-form-urlencoded",
|
|
},
|
|
method="POST",
|
|
)
|
|
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=15) as r:
|
|
resp = json.loads(r.read())
|
|
_access_token = resp.get("access_token")
|
|
_token_expiry = time.time() + resp.get("expires_in", 3600) - 60
|
|
return _access_token
|
|
except Exception as e:
|
|
log.error(f"Reddit auth failed: {e}")
|
|
return None
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Reddit API helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def reddit_get(path, params=None):
|
|
"""GET request to Reddit OAuth API."""
|
|
token = reddit_auth()
|
|
if not token:
|
|
return {}
|
|
|
|
p = urllib.parse.urlencode(params or {})
|
|
url = f"https://oauth.reddit.com{path}"
|
|
if p:
|
|
url += f"?{p}"
|
|
|
|
req = urllib.request.Request(
|
|
url,
|
|
headers={
|
|
"Authorization": f"Bearer {token}",
|
|
"User-Agent": REDDIT_USER_AGENT,
|
|
},
|
|
)
|
|
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=15) as r:
|
|
return json.loads(r.read())
|
|
except urllib.error.HTTPError as e:
|
|
body = e.read().decode("utf-8", errors="replace")[:300]
|
|
log.warning(f"Reddit GET {path}: {e.code} {body}")
|
|
if e.code in (401, 403):
|
|
alert_account_broken("reddit-monitor", "Reddit", f"HTTP {e.code}", body)
|
|
return {}
|
|
except Exception as e:
|
|
log.warning(f"Reddit GET {path}: {e}")
|
|
return {}
|
|
|
|
|
|
def reddit_post_comment(thing_id, text):
|
|
"""Post a comment reply to a Reddit submission or comment."""
|
|
token = reddit_auth()
|
|
if not token:
|
|
return {}
|
|
|
|
data = urllib.parse.urlencode({
|
|
"thing_id": thing_id,
|
|
"text": text,
|
|
}).encode()
|
|
|
|
req = urllib.request.Request(
|
|
"https://oauth.reddit.com/api/comment",
|
|
data=data,
|
|
headers={
|
|
"Authorization": f"Bearer {token}",
|
|
"User-Agent": REDDIT_USER_AGENT,
|
|
"Content-Type": "application/x-www-form-urlencoded",
|
|
},
|
|
method="POST",
|
|
)
|
|
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=15) as r:
|
|
return json.loads(r.read())
|
|
except urllib.error.HTTPError as e:
|
|
body = e.read().decode("utf-8", errors="replace")[:500]
|
|
log.warning(f"Reddit POST comment: {e.code} {body}")
|
|
if e.code in (401, 403):
|
|
alert_account_broken("reddit-monitor", "Reddit", f"HTTP {e.code} on comment", body)
|
|
return {"error": e.code, "body": body}
|
|
except Exception as e:
|
|
log.warning(f"Reddit POST comment: {e}")
|
|
return {"error": str(e)}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Keyword matching
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def matches_keywords(title: str, body: str) -> list[str]:
|
|
"""Return list of matched compliance categories."""
|
|
text = (title + " " + body).lower()
|
|
matched = []
|
|
for category, keywords in COMPLIANCE_KEYWORDS.items():
|
|
for kw in keywords:
|
|
if kw.lower() in text:
|
|
matched.append(category)
|
|
break
|
|
return matched
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Ollama
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def generate_reply(title: str, body_text: str, categories: list[str], subreddit: str) -> str:
|
|
categories_str = ", ".join(categories)
|
|
prompt = f"""Reddit post in r/{subreddit}:
|
|
|
|
Title: {title}
|
|
|
|
Body (first 800 chars): {body_text[:800]}
|
|
|
|
Matched compliance categories: {categories_str}
|
|
|
|
Write a helpful Reddit comment reply. Answer their question first, then mention Performance West only if directly relevant."""
|
|
|
|
return ollama_client.generate(prompt, system=build_system_prompt(), max_tokens=350)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Post logger
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def log_post(platform, target_url, target_title, content):
|
|
posts_log = LOG_DIR / "posts.log"
|
|
entry = {
|
|
"ts": datetime.now(timezone.utc).isoformat(),
|
|
"platform": platform,
|
|
"url": target_url,
|
|
"title": target_title,
|
|
"content": content,
|
|
}
|
|
with open(posts_log, "a") as f:
|
|
f.write(json.dumps(entry) + "\n")
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Main
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def main():
|
|
log.info("=== Reddit monitor starting ===")
|
|
|
|
state = load_state()
|
|
state = reset_daily_if_needed(state)
|
|
|
|
if state["daily_count"] >= DAILY_LIMIT:
|
|
log.info(f"Daily reply limit reached ({DAILY_LIMIT}). Skipping.")
|
|
return
|
|
|
|
# Verify Reddit auth works
|
|
token = reddit_auth()
|
|
if not token:
|
|
log.error("Cannot authenticate with Reddit. Aborting.")
|
|
alert_account_broken("reddit-monitor", "Reddit", "OAuth authentication failed")
|
|
return
|
|
|
|
if not ollama_client.start_tunnel():
|
|
log.error("Cannot reach Ollama. Aborting.")
|
|
return
|
|
ollama_client.warmup()
|
|
|
|
try:
|
|
seen_ids = set(state.get("seen_ids", []))
|
|
replied_ids = set(state.get("replied_ids", []))
|
|
subreddit_counts = state.get("subreddit_counts", {})
|
|
replies_this_run = 0
|
|
cutoff = time.time() - (MAX_AGE_DAYS * 86400)
|
|
|
|
# Shuffle subreddits to avoid always hitting the same ones first
|
|
subs = list(SUBREDDITS)
|
|
random.shuffle(subs)
|
|
|
|
for subreddit in subs:
|
|
if replies_this_run >= MAX_REPLIES_PER_RUN:
|
|
break
|
|
if state["daily_count"] >= DAILY_LIMIT:
|
|
break
|
|
if subreddit_counts.get(subreddit, 0) >= MAX_REPLIES_PER_SUBREDDIT:
|
|
log.info(f"r/{subreddit}: already replied this run, skipping")
|
|
continue
|
|
|
|
log.info(f"Scanning r/{subreddit}...")
|
|
|
|
# Fetch new posts from the subreddit
|
|
resp = reddit_get(f"/r/{subreddit}/new", {"limit": 25})
|
|
posts = resp.get("data", {}).get("children", [])
|
|
|
|
for post_wrapper in posts:
|
|
if replies_this_run >= MAX_REPLIES_PER_RUN:
|
|
break
|
|
if state["daily_count"] >= DAILY_LIMIT:
|
|
break
|
|
|
|
post = post_wrapper.get("data", {})
|
|
post_id = post.get("id", "")
|
|
fullname = post.get("name", "") # t3_xxxxx
|
|
title = post.get("title", "")
|
|
body = post.get("selftext", "")
|
|
created = post.get("created_utc", 0)
|
|
permalink = post.get("permalink", "")
|
|
post_url = f"https://reddit.com{permalink}" if permalink else ""
|
|
num_comments = post.get("num_comments", 0)
|
|
|
|
# Skip if already processed
|
|
if post_id in seen_ids or post_id in replied_ids:
|
|
continue
|
|
|
|
# Skip if too old
|
|
if created < cutoff:
|
|
seen_ids.add(post_id)
|
|
continue
|
|
|
|
# Skip link-only posts (no selftext)
|
|
if not body or len(body.strip()) < 30:
|
|
seen_ids.add(post_id)
|
|
continue
|
|
|
|
# Check keyword match
|
|
categories = matches_keywords(title, body)
|
|
if not categories:
|
|
seen_ids.add(post_id)
|
|
continue
|
|
|
|
seen_ids.add(post_id)
|
|
log.info(f" [r/{subreddit}] '{title[:65]}' (id:{post_id}) categories:{categories}")
|
|
|
|
# Generate reply with LLM
|
|
try:
|
|
reply = generate_reply(title, body, categories, subreddit)
|
|
except Exception as e:
|
|
log.warning(f" Ollama error: {e}")
|
|
continue
|
|
|
|
if not reply or reply.strip().upper().startswith("SKIP"):
|
|
skip_reason = reply.strip()[4:].strip(" :-") if reply and len(reply.strip()) > 4 else ""
|
|
log.info(f" -> Skipped: {skip_reason or 'not relevant'}")
|
|
if skip_reason:
|
|
log_gap("Reddit", post_url, title, body[:300], skip_reason)
|
|
continue
|
|
|
|
# Post the reply
|
|
log.info(f" -> Posting reply ({len(reply)} chars)...")
|
|
result = reddit_post_comment(fullname, reply)
|
|
|
|
# Check for success — Reddit returns nested jquery structure
|
|
if result.get("error"):
|
|
err = str(result.get("error", ""))
|
|
detail = result.get("body", "")
|
|
log.warning(f" -> Failed to post: {err}")
|
|
if any(x in str(detail).lower() for x in [
|
|
"forbidden", "banned", "suspended", "rate limit",
|
|
"unauthorized", "invalid_grant",
|
|
]):
|
|
alert_account_broken("reddit-monitor", "Reddit", err, detail)
|
|
continue
|
|
|
|
# Success
|
|
log.info(f" -> Posted reply to r/{subreddit}")
|
|
log_post("Reddit", post_url, title, reply)
|
|
replied_ids.add(post_id)
|
|
replies_this_run += 1
|
|
state["daily_count"] += 1
|
|
subreddit_counts[subreddit] = subreddit_counts.get(subreddit, 0) + 1
|
|
|
|
# Human-like pause between replies
|
|
if replies_this_run < MAX_REPLIES_PER_RUN:
|
|
pause = random.uniform(
|
|
PAUSE_BETWEEN_MIN * 60,
|
|
PAUSE_BETWEEN_MAX * 60,
|
|
)
|
|
log.info(f" -> Pausing {pause/60:.1f} min before next reply...")
|
|
time.sleep(pause)
|
|
|
|
break # Move to next subreddit after replying (1 per sub)
|
|
|
|
# Don't hammer Reddit API
|
|
time.sleep(2)
|
|
|
|
# Save state — trim seen_ids to prevent unbounded growth
|
|
state["seen_ids"] = list(seen_ids)[-5000:]
|
|
state["replied_ids"] = list(replied_ids)
|
|
state["subreddit_counts"] = subreddit_counts
|
|
save_state(state)
|
|
log.info(f"=== Done. {replies_this_run} replies posted this run. ===")
|
|
|
|
finally:
|
|
ollama_client.stop_tunnel()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|