Initial commit — Performance West telecom compliance platform
Includes: API (Express/TypeScript), Astro site, Python workers, document generators, FCC compliance tools, Canada CRTC formation, Ansible infrastructure, and deployment scripts. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
commit
f8cd37ac8c
1823 changed files with 145167 additions and 0 deletions
519
scripts/workers/amb_location_scraper.py
Normal file
519
scripts/workers/amb_location_scraper.py
Normal file
|
|
@ -0,0 +1,519 @@
|
|||
"""
|
||||
Anytime Mailbox Location Scraper (BC + ON)
|
||||
|
||||
Scrapes all BC and Ontario virtual mailbox locations from anytimemailbox.com,
|
||||
extracts pricing, and upserts into the amb_locations PG table.
|
||||
|
||||
Deactivates sold-out locations. Detects price changes and sends admin alert.
|
||||
|
||||
Schedule: daily via cron (0 6 * * *)
|
||||
Usage: python3 scripts/workers/amb_location_scraper.py
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import smtplib
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from email.mime.multipart import MIMEMultipart
|
||||
from email.mime.text import MIMEText
|
||||
from typing import Optional
|
||||
|
||||
import psycopg2
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
LOG = logging.getLogger("workers.amb_scraper")
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(levelname)s %(message)s")
|
||||
|
||||
DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://pw:pw@localhost:5432/performancewest")
|
||||
DOMAIN = os.getenv("DOMAIN", "performancewest.net")
|
||||
|
||||
SMTP_HOST = os.getenv("SMTP_HOST", "co.carrierone.com")
|
||||
SMTP_PORT = int(os.getenv("SMTP_PORT", "587"))
|
||||
SMTP_USER = os.getenv("SMTP_USER", "noreply@performancewest.net")
|
||||
SMTP_PASS = os.getenv("SMTP_PASS", "")
|
||||
SMTP_FROM = os.getenv("SMTP_FROM", "Performance West <noreply@performancewest.net>")
|
||||
ADMIN_EMAIL = os.getenv("ADMIN_EMAIL", "ops@performancewest.net")
|
||||
|
||||
AMB_PROVINCE_URLS = {
|
||||
"BC": "https://www.anytimemailbox.com/l/canada/british-columbia",
|
||||
"ON": "https://www.anytimemailbox.com/l/canada/ontario",
|
||||
}
|
||||
|
||||
|
||||
def slugify(text: str) -> str:
|
||||
"""Convert address to URL-safe slug."""
|
||||
s = text.lower().strip()
|
||||
s = re.sub(r"[^a-z0-9\s-]", "", s)
|
||||
s = re.sub(r"[\s_]+", "-", s)
|
||||
s = re.sub(r"-+", "-", s).strip("-")
|
||||
return s
|
||||
|
||||
|
||||
def parse_price_text(text: str) -> int:
|
||||
"""Extract dollar amount from text like '$9.99/mo' or '$99/yr'. Returns cents."""
|
||||
match = re.search(r"\$\s?([\d,]+(?:\.\d{1,2})?)", text)
|
||||
if match:
|
||||
return int(float(match.group(1).replace(",", "")) * 100)
|
||||
return 0
|
||||
|
||||
|
||||
async def scrape_province_locations(province: str) -> list[dict]:
|
||||
"""Scrape all Anytime Mailbox locations for a given province."""
|
||||
url_page = AMB_PROVINCE_URLS.get(province)
|
||||
if not url_page:
|
||||
LOG.error("No AMB URL configured for province: %s", province)
|
||||
return []
|
||||
|
||||
locations = []
|
||||
|
||||
async with async_playwright() as pw:
|
||||
browser = await pw.chromium.launch(headless=True)
|
||||
page = await browser.new_page()
|
||||
|
||||
LOG.info("[%s] Navigating to AMB page: %s", province, url_page)
|
||||
await page.goto(url_page, wait_until="domcontentloaded", timeout=60000)
|
||||
await page.wait_for_timeout(3000)
|
||||
|
||||
# AMB uses /s/city-address URLs for individual locations.
|
||||
location_urls_raw = await page.evaluate("""() => {
|
||||
const links = document.querySelectorAll('a[href]');
|
||||
return [...links]
|
||||
.map(a => a.href)
|
||||
.filter(h => h.includes('/s/') && !h.includes('#'));
|
||||
}""")
|
||||
location_urls_raw = list(dict.fromkeys(location_urls_raw)) # dedupe
|
||||
LOG.info("[%s] Found %d raw /s/ location URLs", province, len(location_urls_raw))
|
||||
|
||||
# Fallback: card-based approach
|
||||
if not location_urls_raw:
|
||||
card_selectors = [
|
||||
".location-card",
|
||||
"[data-testid='location-card']",
|
||||
".LocationCard",
|
||||
f"a[href*='/l/canada/']",
|
||||
".search-results-list a",
|
||||
"article a[href*='anytimemailbox.com/l/']",
|
||||
]
|
||||
for sel in card_selectors:
|
||||
cards = await page.query_selector_all(sel)
|
||||
if cards:
|
||||
LOG.info("[%s] Fallback: found %d cards with selector: %s", province, len(cards), sel)
|
||||
for card in cards:
|
||||
href = await card.get_attribute("href") or ""
|
||||
inner_a = await card.query_selector("a[href*='/s/']")
|
||||
if inner_a:
|
||||
href = await inner_a.get_attribute("href") or ""
|
||||
if href and "/s/" in href:
|
||||
if not href.startswith("http"):
|
||||
href = f"https://www.anytimemailbox.com{href}"
|
||||
location_urls_raw.append(href)
|
||||
break
|
||||
|
||||
location_urls = list(dict.fromkeys(u for u in location_urls_raw if "/s/" in u))
|
||||
LOG.info("[%s] Found %d unique location URLs to scrape", province, len(location_urls))
|
||||
|
||||
# Visit each location page to get address + pricing
|
||||
for url in location_urls:
|
||||
try:
|
||||
loc = await _scrape_single_location(page, url, province)
|
||||
if loc:
|
||||
locations.append(loc)
|
||||
except Exception as e:
|
||||
LOG.warning("[%s] Failed to scrape %s: %s", province, url, e)
|
||||
|
||||
await browser.close()
|
||||
|
||||
LOG.info("[%s] Scraped %d locations total", province, len(locations))
|
||||
return locations
|
||||
|
||||
|
||||
async def _scrape_single_location(page, url: str, province: str = "BC") -> Optional[dict]:
|
||||
"""Scrape a single AMB location page for address, pricing, and operator name.
|
||||
|
||||
AMB BC pages show prices in CAD. We store the CAD values and convert
|
||||
to USD at display time using the daily Bank of Canada rate.
|
||||
The gb-block-layout-column elements contain plan cards with text like:
|
||||
"BronzeC$ 14.99 / month SelectC$ 169.99 / year Select..."
|
||||
|
||||
operator_name is the legal business name of the mailbox operator at this
|
||||
location (e.g. "Regus", "iPostal1", "The UPS Store"). It appears in the
|
||||
page <title>, h1, or a prominent heading before the address block.
|
||||
"""
|
||||
await page.goto(url, wait_until="networkidle", timeout=30000)
|
||||
await page.wait_for_timeout(2000)
|
||||
|
||||
# Extract address from the YOUR NAME / address block
|
||||
page_text = await page.inner_text("body")
|
||||
|
||||
# ── Operator name ────────────────────────────────────────────────────────
|
||||
# AMB location pages have the operator/business name as the primary heading.
|
||||
# Strategy (in priority order):
|
||||
# 1. <h1> tag — most reliable
|
||||
# 2. <title> before " - Anytime Mailbox" suffix
|
||||
# 3. The line immediately before the street address in page_text
|
||||
# (AMB shows: "Regus\n329 Howe St\n...")
|
||||
operator_name: Optional[str] = None
|
||||
|
||||
try:
|
||||
# 1. h1 element
|
||||
h1_el = await page.query_selector("h1")
|
||||
if h1_el:
|
||||
h1_text = (await h1_el.inner_text()).strip()
|
||||
# Exclude generic headings that are just the address
|
||||
if h1_text and not re.match(r"^\d+\s+", h1_text) and len(h1_text) < 80:
|
||||
operator_name = h1_text
|
||||
|
||||
# 2. Page title: "Regus | 329 Howe St, Vancouver, BC | Anytime Mailbox"
|
||||
if not operator_name:
|
||||
title = await page.title()
|
||||
title_parts = re.split(r"\s*[\|\-–]\s*", title)
|
||||
for part in title_parts:
|
||||
part = part.strip()
|
||||
if (part
|
||||
and "anytime mailbox" not in part.lower()
|
||||
and not re.match(r"^\d+\s+", part)
|
||||
and len(part) < 60):
|
||||
operator_name = part
|
||||
break
|
||||
|
||||
# 3. Line immediately before the street number in page_text
|
||||
if not operator_name:
|
||||
lines_text = [l.strip() for l in page_text.splitlines() if l.strip()]
|
||||
for idx, line in enumerate(lines_text):
|
||||
if re.match(r"^\d+\s+[\w]", line) and idx > 0:
|
||||
candidate = lines_text[idx - 1]
|
||||
# Must look like a business name: not all-caps noise, not a
|
||||
# postal code, not a price, and reasonably short
|
||||
if (candidate
|
||||
and len(candidate) < 80
|
||||
and not re.match(r"^[A-Z]\d[A-Z]", candidate)
|
||||
and not re.search(r"C\$|\$\d", candidate)
|
||||
and candidate.lower() not in ("your name", "name", "address", "company")):
|
||||
operator_name = candidate
|
||||
break
|
||||
except Exception as e:
|
||||
LOG.warning("Could not extract operator_name from %s: %s", url, e)
|
||||
|
||||
if operator_name:
|
||||
LOG.info(" operator_name: %s", operator_name)
|
||||
else:
|
||||
LOG.warning(" Could not determine operator_name for %s", url)
|
||||
|
||||
# Extract street address from page text
|
||||
# AMB pages show: "YOUR NAME\n702 Russell Ave\nB438 Unit #MAILBOX\nVancouver, BC V5P 3V6"
|
||||
addr_match = re.search(r"(\d+\s+[\w\s]+?(?:St|Ave|Dr|Rd|Blvd|Way|Drive|Street|Avenue|Road|Highway|Hwy)[\w\s]*?)[\n#]", page_text, re.IGNORECASE)
|
||||
if addr_match:
|
||||
address = addr_match.group(1).strip()
|
||||
else:
|
||||
# Fallback: extract from URL slug
|
||||
# e.g. "vancouver-5307-victoria-drive" → "5307 Victoria Drive"
|
||||
url_tail = url.rstrip("/").split("/")[-1]
|
||||
# Remove city prefix: split on dashes, find first digit group
|
||||
parts = url_tail.split("-")
|
||||
addr_parts = []
|
||||
found_digit = False
|
||||
for part in parts:
|
||||
if re.match(r"^\d+$", part):
|
||||
found_digit = True
|
||||
if found_digit:
|
||||
addr_parts.append(part)
|
||||
if addr_parts:
|
||||
address = " ".join(addr_parts).title()
|
||||
else:
|
||||
LOG.warning("Could not extract address from %s", url)
|
||||
return None
|
||||
|
||||
# City from URL: /s/vancouver-... or /s/kelowna-...
|
||||
url_slug = url.rstrip("/").split("/")[-1]
|
||||
city_match = re.match(r"([a-z]+)", url_slug)
|
||||
city = city_match.group(1).title() if city_match else "Vancouver"
|
||||
|
||||
# Postal code
|
||||
postal_match = re.search(r"\b([A-Z]\d[A-Z]\s?\d[A-Z]\d)\b", page_text)
|
||||
postal_code = postal_match.group(1) if postal_match else ""
|
||||
|
||||
# Pricing: extract from plan cards (gb-block-layout-column or similar)
|
||||
# Formats seen:
|
||||
# "BronzeC$ 14.99 / month SelectC$ 169.99 / year Select..."
|
||||
# "The 2026 PlanC$ 16.00 / month Select..." (single plan, no yearly)
|
||||
plan_texts = await page.evaluate("""() => {
|
||||
const cols = document.querySelectorAll('.gb-block-layout-column, [class*="plan"], [class*="price"]');
|
||||
return [...cols].map(c => c.textContent.replace(/[\\s]+/g, ' ').trim()).filter(t => /C\\$/.test(t));
|
||||
}""")
|
||||
|
||||
monthly_cad_cents = 0
|
||||
yearly_cad_cents = 0
|
||||
|
||||
# Priority: Bronze (non-promo) → any named plan → first plan with C$
|
||||
plan_priority = ["bronze", "basic", "starter", "standard", "the 2026", "silver"]
|
||||
|
||||
for target in plan_priority:
|
||||
for pt in plan_texts:
|
||||
if target in pt.lower() and "promo" not in pt.lower():
|
||||
mo_match = re.search(r"C\$\s?([\d,]+(?:\.\d{1,2})?)\s*/\s*month", pt, re.IGNORECASE)
|
||||
if mo_match:
|
||||
monthly_cad_cents = int(float(mo_match.group(1).replace(",", "")) * 100)
|
||||
yr_match = re.search(r"C\$\s?([\d,]+(?:\.\d{1,2})?)\s*/\s*year", pt, re.IGNORECASE)
|
||||
if yr_match:
|
||||
yearly_cad_cents = int(float(yr_match.group(1).replace(",", "")) * 100)
|
||||
if monthly_cad_cents or yearly_cad_cents:
|
||||
break
|
||||
if monthly_cad_cents or yearly_cad_cents:
|
||||
break
|
||||
|
||||
# Last resort: grab the first C$ price from the page
|
||||
if not monthly_cad_cents and not yearly_cad_cents:
|
||||
all_prices = re.findall(r"C\$\s?([\d,]+(?:\.\d{1,2})?)\s*/\s*(month|year)", page_text, re.IGNORECASE)
|
||||
for amount_str, period in all_prices:
|
||||
cents = int(float(amount_str.replace(",", "")) * 100)
|
||||
if period.lower() in ("month",) and not monthly_cad_cents:
|
||||
monthly_cad_cents = cents
|
||||
elif period.lower() in ("year",) and not yearly_cad_cents:
|
||||
yearly_cad_cents = cents
|
||||
|
||||
if not yearly_cad_cents and monthly_cad_cents:
|
||||
yearly_cad_cents = monthly_cad_cents * 12
|
||||
|
||||
# Convert CAD to USD using a fixed approximate rate (scraper stores USD)
|
||||
# The FX rate is updated daily by the API's fx.ts module; here we use a
|
||||
# conservative estimate. The order form will show the exact USD at order time.
|
||||
CAD_TO_USD = float(os.getenv("CAD_TO_USD_RATE", "0.72"))
|
||||
monthly_usd_cents = int(monthly_cad_cents * CAD_TO_USD)
|
||||
yearly_usd_cents = int(yearly_cad_cents * CAD_TO_USD)
|
||||
|
||||
# Check mailbox availability — look for sold out / no availability indicators
|
||||
available_units = -1 # -1 = unknown
|
||||
avail_text = page_text.lower()
|
||||
if any(kw in avail_text for kw in ["sold out", "no mailboxes available", "currently unavailable", "waitlist", "no units available"]):
|
||||
available_units = 0
|
||||
LOG.warning(" %s: SOLD OUT — no mailboxes available", address)
|
||||
else:
|
||||
# Try to click into signup flow to count available unit numbers
|
||||
try:
|
||||
for sel in ['button:has-text("Select")', 'a:has-text("Select")']:
|
||||
btn = await page.query_selector(sel)
|
||||
if btn and await btn.is_visible():
|
||||
await btn.click()
|
||||
break
|
||||
await page.wait_for_timeout(2000)
|
||||
|
||||
# Look for mailbox number dropdown/select
|
||||
unit_count = await page.evaluate("""() => {
|
||||
const selects = document.querySelectorAll('select');
|
||||
for (const sel of selects) {
|
||||
const opts = [...sel.options].filter(o => o.value && o.value !== '');
|
||||
if (opts.length > 0) return opts.length;
|
||||
}
|
||||
// Check for radio buttons or list items
|
||||
const radios = document.querySelectorAll('input[type="radio"][name*="mailbox"], input[type="radio"][name*="unit"]');
|
||||
if (radios.length > 0) return radios.length;
|
||||
return -1;
|
||||
}""")
|
||||
available_units = unit_count if isinstance(unit_count, int) else -1
|
||||
except Exception:
|
||||
pass # Keep as unknown
|
||||
|
||||
slug = slugify(f"{address}-{city}")
|
||||
|
||||
LOG.info(" %s: %s, %s — C$%.2f/yr (US$%.2f/yr), C$%.2f/mo, units=%s",
|
||||
slug, address, city,
|
||||
yearly_cad_cents / 100, yearly_usd_cents / 100,
|
||||
monthly_cad_cents / 100,
|
||||
"sold_out" if available_units == 0 else str(available_units) if available_units > 0 else "unknown")
|
||||
|
||||
return {
|
||||
"slug": slug,
|
||||
"name": address,
|
||||
"full_address": f"{address}, {city}, {province} {postal_code}",
|
||||
"city": city,
|
||||
"province": province,
|
||||
"postal_code": postal_code,
|
||||
"provider_url": url,
|
||||
"plan_name": "Bronze",
|
||||
"monthly_price_usd": monthly_usd_cents,
|
||||
"yearly_price_usd": yearly_usd_cents,
|
||||
"available_units": available_units,
|
||||
"operator_name": operator_name,
|
||||
}
|
||||
|
||||
|
||||
def upsert_locations(locations: list[dict], province: str = "BC") -> list[dict]:
|
||||
"""Upsert locations into PG. Returns list of price changes.
|
||||
|
||||
Only deactivates locations for the given province that were not found in the scrape.
|
||||
"""
|
||||
conn = psycopg2.connect(DATABASE_URL)
|
||||
now = datetime.now(timezone.utc)
|
||||
changes = []
|
||||
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
for loc in locations:
|
||||
# Check existing
|
||||
cur.execute("SELECT yearly_price_usd, monthly_price_usd, is_active FROM amb_locations WHERE slug = %s", (loc["slug"],))
|
||||
existing = cur.fetchone()
|
||||
|
||||
if existing:
|
||||
old_yearly, old_monthly, was_active = existing
|
||||
price_changed = (old_yearly != loc["yearly_price_usd"] or old_monthly != loc["monthly_price_usd"])
|
||||
|
||||
# If no units available, mark as inactive so it doesn't show in order form
|
||||
units = loc.get("available_units", -1)
|
||||
should_be_active = units != 0 # 0 = sold out, -1 = unknown (keep active), >0 = has units
|
||||
|
||||
cur.execute("""
|
||||
UPDATE amb_locations SET
|
||||
name = %s, full_address = %s, city = %s, province = %s,
|
||||
postal_code = %s, provider_url = %s, plan_name = %s,
|
||||
monthly_price_usd = %s, yearly_price_usd = %s,
|
||||
available_units = %s,
|
||||
is_active = %s, last_scraped_at = %s,
|
||||
price_changed_at = CASE WHEN %s THEN %s ELSE price_changed_at END,
|
||||
operator_name = COALESCE(%s, operator_name),
|
||||
updated_at = %s
|
||||
WHERE slug = %s
|
||||
""", (
|
||||
loc["name"], loc["full_address"], loc["city"], loc["province"],
|
||||
loc["postal_code"], loc["provider_url"], loc["plan_name"],
|
||||
loc["monthly_price_usd"], loc["yearly_price_usd"],
|
||||
units, should_be_active, now,
|
||||
price_changed, now,
|
||||
loc.get("operator_name"),
|
||||
now, loc["slug"],
|
||||
))
|
||||
|
||||
if not should_be_active and was_active:
|
||||
changes.append({
|
||||
"slug": loc["slug"],
|
||||
"name": loc["name"],
|
||||
"old_yearly": old_yearly,
|
||||
"new_yearly": loc["yearly_price_usd"],
|
||||
"old_monthly": old_monthly,
|
||||
"new_monthly": loc["monthly_price_usd"],
|
||||
"sold_out": True,
|
||||
})
|
||||
LOG.warning("SOLD OUT: %s — no mailboxes available, deactivated", loc["slug"])
|
||||
|
||||
if price_changed:
|
||||
changes.append({
|
||||
"slug": loc["slug"],
|
||||
"name": loc["name"],
|
||||
"old_yearly": old_yearly,
|
||||
"new_yearly": loc["yearly_price_usd"],
|
||||
"old_monthly": old_monthly,
|
||||
"new_monthly": loc["monthly_price_usd"],
|
||||
})
|
||||
LOG.warning("PRICE CHANGE: %s — yearly $%d → $%d", loc["slug"], old_yearly // 100, loc["yearly_price_usd"] // 100)
|
||||
else:
|
||||
units = loc.get("available_units", -1)
|
||||
should_be_active = units != 0
|
||||
|
||||
cur.execute("""
|
||||
INSERT INTO amb_locations (slug, name, full_address, city, province, postal_code,
|
||||
provider_url, plan_name, monthly_price_usd, yearly_price_usd,
|
||||
available_units, is_active, operator_name, last_scraped_at, created_at, updated_at)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
""", (
|
||||
loc["slug"], loc["name"], loc["full_address"], loc["city"], loc["province"],
|
||||
loc["postal_code"], loc["provider_url"], loc["plan_name"],
|
||||
loc["monthly_price_usd"], loc["yearly_price_usd"],
|
||||
units, should_be_active, loc.get("operator_name"), now, now, now,
|
||||
))
|
||||
LOG.info("NEW LOCATION: %s — %s, %s — $%d/yr", loc["slug"], loc["name"], loc["city"], loc["yearly_price_usd"] // 100)
|
||||
|
||||
# Mark locations not seen in this scrape as inactive (scoped to province)
|
||||
scraped_slugs = [loc["slug"] for loc in locations]
|
||||
if scraped_slugs:
|
||||
cur.execute(
|
||||
"UPDATE amb_locations SET is_active = FALSE, updated_at = %s "
|
||||
"WHERE slug != ALL(%s) AND province = %s AND is_active = TRUE",
|
||||
(now, scraped_slugs, province),
|
||||
)
|
||||
deactivated = cur.rowcount
|
||||
if deactivated:
|
||||
LOG.warning("[%s] Deactivated %d locations not found in scrape", province, deactivated)
|
||||
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
return changes
|
||||
|
||||
|
||||
def send_price_change_alert(changes: list[dict]):
|
||||
"""Send admin email about price changes."""
|
||||
if not changes or not SMTP_PASS:
|
||||
return
|
||||
|
||||
lines = []
|
||||
for c in changes:
|
||||
if c.get("sold_out"):
|
||||
lines.append(f" SOLD OUT: {c['name']} ({c['slug']}) — no mailboxes available, location deactivated")
|
||||
else:
|
||||
lines.append(
|
||||
f" {c['name']} ({c['slug']}): "
|
||||
f"yearly ${c['old_yearly'] // 100} → ${c['new_yearly'] // 100}, "
|
||||
f"monthly ${c['old_monthly'] // 100} → ${c['new_monthly'] // 100}"
|
||||
)
|
||||
|
||||
body = (
|
||||
f"Anytime Mailbox price changes detected on {datetime.now(timezone.utc).strftime('%Y-%m-%d')}:\n\n"
|
||||
+ "\n".join(lines)
|
||||
+ "\n\nPlease review and update any affected pending orders."
|
||||
+ f"\n\nhttps://{DOMAIN}/admin"
|
||||
)
|
||||
|
||||
msg = MIMEMultipart()
|
||||
msg["From"] = SMTP_FROM
|
||||
msg["To"] = ADMIN_EMAIL
|
||||
msg["Subject"] = f"[PW Alert] Anytime Mailbox price change — {len(changes)} location(s)"
|
||||
msg.attach(MIMEText(body, "plain"))
|
||||
|
||||
try:
|
||||
with smtplib.SMTP(SMTP_HOST, SMTP_PORT, timeout=30) as server:
|
||||
server.ehlo()
|
||||
server.starttls()
|
||||
server.ehlo()
|
||||
server.login(SMTP_USER, SMTP_PASS)
|
||||
server.sendmail(SMTP_USER, [ADMIN_EMAIL], msg.as_string())
|
||||
LOG.info("Sent price change alert to %s", ADMIN_EMAIL)
|
||||
except Exception as e:
|
||||
LOG.error("Failed to send price change alert: %s", e)
|
||||
|
||||
|
||||
async def main():
|
||||
all_changes = []
|
||||
total_locations = 0
|
||||
|
||||
for province in AMB_PROVINCE_URLS:
|
||||
LOG.info("=" * 50)
|
||||
LOG.info("Starting AMB %s location scrape", province)
|
||||
LOG.info("=" * 50)
|
||||
|
||||
locations = await scrape_province_locations(province)
|
||||
|
||||
if not locations:
|
||||
LOG.error("[%s] No locations scraped — check if AMB site changed or province page moved", province)
|
||||
continue
|
||||
|
||||
changes = upsert_locations(locations, province)
|
||||
all_changes.extend(changes)
|
||||
total_locations += len(locations)
|
||||
|
||||
LOG.info("[%s] Processed %d locations, %d changes", province, len(locations), len(changes))
|
||||
|
||||
if all_changes:
|
||||
LOG.warning("%d total price/availability changes detected", len(all_changes))
|
||||
send_price_change_alert(all_changes)
|
||||
else:
|
||||
LOG.info("No price or availability changes detected")
|
||||
|
||||
LOG.info("AMB scrape complete: %d locations across %d provinces", total_locations, len(AMB_PROVINCE_URLS))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Loading…
Add table
Add a link
Reference in a new issue