#!/usr/bin/env python3 """ reddit-monitor.py — Monitor Reddit for compliance-related questions relevant to Performance West, generate helpful replies with Ollama (qwen2.5:3b), and post them. Targets: r/smallbusiness, r/Entrepreneur, r/tax, r/legaladvice, r/Bookkeeping, r/accounting, r/humanresources, r/QuickBooks, r/IRS, r/ecommerce, r/marketing, r/realestateinvesting, r/restaurateur, r/construction, r/antiwork, r/EmploymentLaw, r/freelance, r/startups, r/payroll State: ~/.reddit-monitor-state.json Log: ~/logs/reddit-monitor.log """ import os, sys, json, time, re, random, logging, urllib.request, urllib.parse, fcntl from pathlib import Path # Single-instance lock — acquire before anything else including logging setup _LOCK_FILE = open("/tmp/reddit-monitor.lock", "w") try: fcntl.flock(_LOCK_FILE, fcntl.LOCK_EX | fcntl.LOCK_NB) except OSError: sys.exit(0) # Another instance running — exit silently sys.path.insert(0, os.path.dirname(__file__)) from alert import alert_account_broken from product_facts import get_product_facts import ollama_client from gap_tracker import log_gap from datetime import datetime, timezone # --------------------------------------------------------------------------- # Config # --------------------------------------------------------------------------- REDDIT_CLIENT_ID = os.environ.get("REDDIT_CLIENT_ID", "") REDDIT_CLIENT_SECRET = os.environ.get("REDDIT_CLIENT_SECRET", "") REDDIT_USERNAME = os.environ.get("REDDIT_USERNAME", "") REDDIT_PASSWORD = os.environ.get("REDDIT_PASSWORD", "") REDDIT_USER_AGENT = "PerfWestBot/1.0 (by /u/performancewest)" STATE_FILE = Path.home() / ".reddit-monitor-state.json" LOG_DIR = Path.home() / "logs" LOG_DIR.mkdir(exist_ok=True) # Rate limits MAX_REPLIES_PER_RUN = 3 MAX_REPLIES_PER_SUBREDDIT = 1 PAUSE_BETWEEN_MIN = 5 # minutes PAUSE_BETWEEN_MAX = 15 # minutes DAILY_LIMIT = 10 MAX_AGE_DAYS = 7 # --------------------------------------------------------------------------- # Subreddits to monitor # --------------------------------------------------------------------------- SUBREDDITS = [ # TIER 1 — Highest volume, business owners asking compliance questions "smallbusiness", # 470K — constant contractor/LLC/compliance posts "Entrepreneur", # 470K — formation, contractor, privacy questions "tax", # 841K — 1099 vs W-2 daily, misclassification gold "legaladvice", # 1.6M — employee-side misclassif posts (shows employer risk) # TIER 2 — Professionals who refer clients + direct compliance Q&A "Bookkeeping", # 75K — 1099 processing, payroll compliance, QBO/Xero "accounting", # 1.2M — broad but huge; contractor classification "humanresources", # 107K — FLSA, handbooks, discrimination, HR policies "QuickBooks", # 37K — payroll/1099 compliance in QB context "IRS", # 442K — enforcement notices, compliance questions # TIER 3 — Industry-specific (highest misclassification/wage-hour risk) "ecommerce", # 91K — CCPA, privacy policies, SMS marketing "marketing", # 141K — TCPA, SMS consent, DNC "realestateinvesting", # contractor classification, entity formation "restaurateur", # wage-hour violations (huge in food service) "construction", # contractor misclassification (#1 violating industry) "antiwork", # 1.6M — misclassif/wage theft posts get massive engagement "EmploymentLaw", # 7.1K — small but 100% signal, every post is compliance "freelance", # the "other side" of contractor misclassification "startups", # 1.2M — business formation, early compliance "payroll", # payroll tax compliance, misclassification ] # --------------------------------------------------------------------------- # Keyword triggers by compliance category # --------------------------------------------------------------------------- COMPLIANCE_KEYWORDS = { "flsa": [ "FLSA", "wage and hour", "overtime violation", "exempt vs nonexempt", "minimum wage", "off the clock", "meal break violation", "unpaid overtime", "salary threshold", "wage theft", "DOL audit", "Department of Labor", ], "misclassification": [ "1099 vs W-2", "1099 vs W2", "independent contractor", "misclassification", "misclassified", "contractor or employee", "IC vs employee", "gig worker classification", "pay contractor", "paying 1099", "1099 worker", "contractor to employee", "should I 1099", ], "discrimination": [ "workplace discrimination", "harassment policy", "Title VII", "ADA compliance", "hostile work environment", "DEI policy", "pay equity", "retaliation claim", "EEOC", ], "privacy": [ "CCPA", "CPRA", "privacy policy", "data privacy", "opt-out request", "cookie consent", "data breach notification", "biometric data", "privacy compliance", "do not sell", "consumer rights request", ], "tcpa": [ "TCPA", "robocall", "SMS marketing", "text message consent", "do not call", "DNC list", "autodialer", "prior express written consent", "one-to-one consent", "SMS campaign sued", "text marketing compliance", ], "corporate": [ "LLC formation", "form an LLC", "register a business", "annual report filing", "registered agent", "foreign qualification", "state registration", "business formation", "incorporate", "S-Corp election", "C-Corp vs S-Corp", "EIN", "operating agreement", "good standing", ], "telecom": [ "FCC 499A", "STIR/SHAKEN", "telecom compliance", "IPES registration", "ISP registration", "robocall attestation", "FCC registration", "CLEC", "telecom license", ], "payroll": [ "payroll compliance", "payroll tax", "W-4", "Form 941", "employer taxes", "FUTA", "SUTA", "withholding", "QuickBooks payroll", "Xero payroll", "payroll setup", ], } # Flatten all keywords for quick scanning ALL_KEYWORDS = [] for kws in COMPLIANCE_KEYWORDS.values(): ALL_KEYWORDS.extend(kws) # --------------------------------------------------------------------------- # System prompt # --------------------------------------------------------------------------- def build_system_prompt() -> str: return f"""You are Justin, the owner of Performance West (https://performancewest.net), a compliance consulting firm helping US small and mid-size businesses navigate employment, privacy, TCPA, corporate, and telecom compliance. === PRODUCT FACTS (authoritative — use these exactly, never claim anything not listed) === {get_product_facts()} === END PRODUCT FACTS === You are replying to a Reddit post where someone has a compliance-related question. QUALIFICATION RULES — if skipping, respond ONLY with "SKIP: ". Skip if ANY of these are true: - The person needs legal advice or legal representation (we are consultants, not attorneys) - The person mentions they already have an attorney handling this - The person is involved in active litigation (plaintiff or defendant) - The person is outside the US (we only serve US businesses) - The person is at a large enterprise (500+ employees — not our market) - The question is about tax preparation or CPA-level tax advice - The question is clearly academic or a student assignment - Performance West's services would not meaningfully help their specific problem - The compliance area is NOT covered by our services REPLY RULES (only if not SKIPped): - Be genuinely helpful and educational — answer their question first - Explain the compliance concept clearly in plain language - Only mention Performance West if it's directly relevant to their situation - If we have a free tool (FLSA calculator, privacy policy generator, contractor quiz), mention it naturally — people love free resources - Never provide legal advice or say "you should do X" — instead explain what the regulations generally require and suggest they get professional guidance - Keep it conversational and helpful, not salesy - Stay under 250 words - Sign off with a new line and "-- Justin" - Do NOT use markdown headers or bullet lists — Reddit comments should feel natural""" # --------------------------------------------------------------------------- # Logging # --------------------------------------------------------------------------- logging.basicConfig( level=logging.INFO, format="[%(asctime)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S", handlers=[ logging.StreamHandler(sys.stdout), logging.FileHandler(LOG_DIR / "reddit-monitor.log"), ], ) log = logging.getLogger(__name__) # --------------------------------------------------------------------------- # State # --------------------------------------------------------------------------- def load_state(): if STATE_FILE.exists(): try: return json.loads(STATE_FILE.read_text()) except Exception: pass return { "seen_ids": [], "replied_ids": [], "daily_count": 0, "daily_reset": "", "subreddit_counts": {}, } def save_state(state): STATE_FILE.write_text(json.dumps(state, indent=2)) def reset_daily_if_needed(state): today = datetime.now(timezone.utc).strftime("%Y-%m-%d") if state.get("daily_reset") != today: state["daily_count"] = 0 state["daily_reset"] = today state["subreddit_counts"] = {} return state # --------------------------------------------------------------------------- # Reddit OAuth # --------------------------------------------------------------------------- _access_token = None _token_expiry = 0 def reddit_auth(): """Get Reddit OAuth2 access token using script-type app credentials.""" global _access_token, _token_expiry if _access_token and time.time() < _token_expiry: return _access_token if not REDDIT_CLIENT_ID or not REDDIT_CLIENT_SECRET: log.error("Reddit credentials not set in environment") return None data = urllib.parse.urlencode({ "grant_type": "password", "username": REDDIT_USERNAME, "password": REDDIT_PASSWORD, }).encode() # HTTP Basic auth with client_id:client_secret import base64 credentials = base64.b64encode(f"{REDDIT_CLIENT_ID}:{REDDIT_CLIENT_SECRET}".encode()).decode() req = urllib.request.Request( "https://www.reddit.com/api/v1/access_token", data=data, headers={ "Authorization": f"Basic {credentials}", "User-Agent": REDDIT_USER_AGENT, "Content-Type": "application/x-www-form-urlencoded", }, method="POST", ) try: with urllib.request.urlopen(req, timeout=15) as r: resp = json.loads(r.read()) _access_token = resp.get("access_token") _token_expiry = time.time() + resp.get("expires_in", 3600) - 60 return _access_token except Exception as e: log.error(f"Reddit auth failed: {e}") return None # --------------------------------------------------------------------------- # Reddit API helpers # --------------------------------------------------------------------------- def reddit_get(path, params=None): """GET request to Reddit OAuth API.""" token = reddit_auth() if not token: return {} p = urllib.parse.urlencode(params or {}) url = f"https://oauth.reddit.com{path}" if p: url += f"?{p}" req = urllib.request.Request( url, headers={ "Authorization": f"Bearer {token}", "User-Agent": REDDIT_USER_AGENT, }, ) try: with urllib.request.urlopen(req, timeout=15) as r: return json.loads(r.read()) except urllib.error.HTTPError as e: body = e.read().decode("utf-8", errors="replace")[:300] log.warning(f"Reddit GET {path}: {e.code} {body}") if e.code in (401, 403): alert_account_broken("reddit-monitor", "Reddit", f"HTTP {e.code}", body) return {} except Exception as e: log.warning(f"Reddit GET {path}: {e}") return {} def reddit_post_comment(thing_id, text): """Post a comment reply to a Reddit submission or comment.""" token = reddit_auth() if not token: return {} data = urllib.parse.urlencode({ "thing_id": thing_id, "text": text, }).encode() req = urllib.request.Request( "https://oauth.reddit.com/api/comment", data=data, headers={ "Authorization": f"Bearer {token}", "User-Agent": REDDIT_USER_AGENT, "Content-Type": "application/x-www-form-urlencoded", }, method="POST", ) try: with urllib.request.urlopen(req, timeout=15) as r: return json.loads(r.read()) except urllib.error.HTTPError as e: body = e.read().decode("utf-8", errors="replace")[:500] log.warning(f"Reddit POST comment: {e.code} {body}") if e.code in (401, 403): alert_account_broken("reddit-monitor", "Reddit", f"HTTP {e.code} on comment", body) return {"error": e.code, "body": body} except Exception as e: log.warning(f"Reddit POST comment: {e}") return {"error": str(e)} # --------------------------------------------------------------------------- # Keyword matching # --------------------------------------------------------------------------- def matches_keywords(title: str, body: str) -> list[str]: """Return list of matched compliance categories.""" text = (title + " " + body).lower() matched = [] for category, keywords in COMPLIANCE_KEYWORDS.items(): for kw in keywords: if kw.lower() in text: matched.append(category) break return matched # --------------------------------------------------------------------------- # Ollama # --------------------------------------------------------------------------- def generate_reply(title: str, body_text: str, categories: list[str], subreddit: str) -> str: categories_str = ", ".join(categories) prompt = f"""Reddit post in r/{subreddit}: Title: {title} Body (first 800 chars): {body_text[:800]} Matched compliance categories: {categories_str} Write a helpful Reddit comment reply. Answer their question first, then mention Performance West only if directly relevant.""" return ollama_client.generate(prompt, system=build_system_prompt(), max_tokens=350) # --------------------------------------------------------------------------- # Post logger # --------------------------------------------------------------------------- def log_post(platform, target_url, target_title, content): posts_log = LOG_DIR / "posts.log" entry = { "ts": datetime.now(timezone.utc).isoformat(), "platform": platform, "url": target_url, "title": target_title, "content": content, } with open(posts_log, "a") as f: f.write(json.dumps(entry) + "\n") # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- def main(): log.info("=== Reddit monitor starting ===") state = load_state() state = reset_daily_if_needed(state) if state["daily_count"] >= DAILY_LIMIT: log.info(f"Daily reply limit reached ({DAILY_LIMIT}). Skipping.") return # Verify Reddit auth works token = reddit_auth() if not token: log.error("Cannot authenticate with Reddit. Aborting.") alert_account_broken("reddit-monitor", "Reddit", "OAuth authentication failed") return if not ollama_client.start_tunnel(): log.error("Cannot reach Ollama. Aborting.") return ollama_client.warmup() try: seen_ids = set(state.get("seen_ids", [])) replied_ids = set(state.get("replied_ids", [])) subreddit_counts = state.get("subreddit_counts", {}) replies_this_run = 0 cutoff = time.time() - (MAX_AGE_DAYS * 86400) # Shuffle subreddits to avoid always hitting the same ones first subs = list(SUBREDDITS) random.shuffle(subs) for subreddit in subs: if replies_this_run >= MAX_REPLIES_PER_RUN: break if state["daily_count"] >= DAILY_LIMIT: break if subreddit_counts.get(subreddit, 0) >= MAX_REPLIES_PER_SUBREDDIT: log.info(f"r/{subreddit}: already replied this run, skipping") continue log.info(f"Scanning r/{subreddit}...") # Fetch new posts from the subreddit resp = reddit_get(f"/r/{subreddit}/new", {"limit": 25}) posts = resp.get("data", {}).get("children", []) for post_wrapper in posts: if replies_this_run >= MAX_REPLIES_PER_RUN: break if state["daily_count"] >= DAILY_LIMIT: break post = post_wrapper.get("data", {}) post_id = post.get("id", "") fullname = post.get("name", "") # t3_xxxxx title = post.get("title", "") body = post.get("selftext", "") created = post.get("created_utc", 0) permalink = post.get("permalink", "") post_url = f"https://reddit.com{permalink}" if permalink else "" num_comments = post.get("num_comments", 0) # Skip if already processed if post_id in seen_ids or post_id in replied_ids: continue # Skip if too old if created < cutoff: seen_ids.add(post_id) continue # Skip link-only posts (no selftext) if not body or len(body.strip()) < 30: seen_ids.add(post_id) continue # Check keyword match categories = matches_keywords(title, body) if not categories: seen_ids.add(post_id) continue seen_ids.add(post_id) log.info(f" [r/{subreddit}] '{title[:65]}' (id:{post_id}) categories:{categories}") # Generate reply with LLM try: reply = generate_reply(title, body, categories, subreddit) except Exception as e: log.warning(f" Ollama error: {e}") continue if not reply or reply.strip().upper().startswith("SKIP"): skip_reason = reply.strip()[4:].strip(" :-") if reply and len(reply.strip()) > 4 else "" log.info(f" -> Skipped: {skip_reason or 'not relevant'}") if skip_reason: log_gap("Reddit", post_url, title, body[:300], skip_reason) continue # Post the reply log.info(f" -> Posting reply ({len(reply)} chars)...") result = reddit_post_comment(fullname, reply) # Check for success — Reddit returns nested jquery structure if result.get("error"): err = str(result.get("error", "")) detail = result.get("body", "") log.warning(f" -> Failed to post: {err}") if any(x in str(detail).lower() for x in [ "forbidden", "banned", "suspended", "rate limit", "unauthorized", "invalid_grant", ]): alert_account_broken("reddit-monitor", "Reddit", err, detail) continue # Success log.info(f" -> Posted reply to r/{subreddit}") log_post("Reddit", post_url, title, reply) replied_ids.add(post_id) replies_this_run += 1 state["daily_count"] += 1 subreddit_counts[subreddit] = subreddit_counts.get(subreddit, 0) + 1 # Human-like pause between replies if replies_this_run < MAX_REPLIES_PER_RUN: pause = random.uniform( PAUSE_BETWEEN_MIN * 60, PAUSE_BETWEEN_MAX * 60, ) log.info(f" -> Pausing {pause/60:.1f} min before next reply...") time.sleep(pause) break # Move to next subreddit after replying (1 per sub) # Don't hammer Reddit API time.sleep(2) # Save state — trim seen_ids to prevent unbounded growth state["seen_ids"] = list(seen_ids)[-5000:] state["replied_ids"] = list(replied_ids) state["subreddit_counts"] = subreddit_counts save_state(state) log.info(f"=== Done. {replies_this_run} replies posted this run. ===") finally: ollama_client.stop_tunnel() if __name__ == "__main__": main()