Fix long-running PG transactions in RMD scrapers
Both scrapers held a cursor/transaction open while doing slow HTTP requests to FCC ServiceNow and company websites, causing "idle in transaction" for 10+ minutes and triggering the PostgresSlowQueries alert. Fix: fetch all row IDs upfront, commit the read transaction immediately, then process each row with its own short UPDATE+COMMIT cycle. No long-lived transactions. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
5e74c1dcb9
commit
eee2aa497b
2 changed files with 38 additions and 31 deletions
|
|
@ -113,15 +113,18 @@ def run_email_research(conn: psycopg2.extensions.connection) -> int:
|
||||||
2. Try FCC CORES lookup by FRN
|
2. Try FCC CORES lookup by FRN
|
||||||
3. Try guessing the company website and scraping contact emails
|
3. Try guessing the company website and scraping contact emails
|
||||||
"""
|
"""
|
||||||
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
|
# Fetch all IDs upfront then close cursor — don't hold a transaction
|
||||||
cur.execute("""
|
# open while doing slow HTTP requests (CORES, website scraping)
|
||||||
SELECT r.id, r.rmd_number, r.frn, r.business_name, r.business_address
|
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
|
||||||
FROM fcc_rmd_removed r
|
cur.execute("""
|
||||||
WHERE r.contact_email IS NULL
|
SELECT r.id, r.rmd_number, r.frn, r.business_name, r.business_address
|
||||||
AND r.business_name NOT LIKE '[%'
|
FROM fcc_rmd_removed r
|
||||||
ORDER BY r.id
|
WHERE r.contact_email IS NULL
|
||||||
""")
|
AND r.business_name NOT LIKE '[%'
|
||||||
rows = cur.fetchall()
|
ORDER BY r.id
|
||||||
|
""")
|
||||||
|
rows = cur.fetchall()
|
||||||
|
conn.commit() # close the read transaction immediately
|
||||||
LOG.info("Researching emails for %d removed carriers …", len(rows))
|
LOG.info("Researching emails for %d removed carriers …", len(rows))
|
||||||
|
|
||||||
found = 0
|
found = 0
|
||||||
|
|
|
||||||
|
|
@ -304,18 +304,21 @@ def run_email_scrape(
|
||||||
|
|
||||||
Returns the number of emails successfully fetched.
|
Returns the number of emails successfully fetched.
|
||||||
"""
|
"""
|
||||||
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
|
# Fetch all IDs upfront then close the cursor — don't hold a transaction
|
||||||
query = """
|
# open while doing slow HTTP requests to ServiceNow
|
||||||
SELECT id, rmd_number, servicenow_sys_id
|
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
|
||||||
FROM fcc_rmd
|
query = """
|
||||||
WHERE contact_email IS NULL
|
SELECT id, rmd_number, servicenow_sys_id
|
||||||
AND servicenow_sys_id IS NOT NULL
|
FROM fcc_rmd
|
||||||
ORDER BY rmd_number
|
WHERE contact_email IS NULL
|
||||||
"""
|
AND servicenow_sys_id IS NOT NULL
|
||||||
if limit:
|
ORDER BY rmd_number
|
||||||
query += f" LIMIT {int(limit)}"
|
"""
|
||||||
cur.execute(query)
|
if limit:
|
||||||
rows = cur.fetchall()
|
query += f" LIMIT {int(limit)}"
|
||||||
|
cur.execute(query)
|
||||||
|
rows = cur.fetchall()
|
||||||
|
conn.commit() # close the read transaction immediately
|
||||||
|
|
||||||
LOG.info("Fetching email for %d records via SP API …", len(rows))
|
LOG.info("Fetching email for %d records via SP API …", len(rows))
|
||||||
scraped = 0
|
scraped = 0
|
||||||
|
|
@ -340,16 +343,17 @@ def run_email_scrape(
|
||||||
email_val = _fetch_email_via_sp_api(sys_id, session)
|
email_val = _fetch_email_via_sp_api(sys_id, session)
|
||||||
|
|
||||||
if email_val:
|
if email_val:
|
||||||
cur.execute(
|
with conn.cursor() as ucur:
|
||||||
"""
|
ucur.execute(
|
||||||
UPDATE fcc_rmd
|
"""
|
||||||
SET contact_email = %s,
|
UPDATE fcc_rmd
|
||||||
contact_email_scraped_at = now(),
|
SET contact_email = %s,
|
||||||
updated_at = now()
|
contact_email_scraped_at = now(),
|
||||||
WHERE id = %s
|
updated_at = now()
|
||||||
""",
|
WHERE id = %s
|
||||||
(email_val, row["id"]),
|
""",
|
||||||
)
|
(email_val, row["id"]),
|
||||||
|
)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
scraped += 1
|
scraped += 1
|
||||||
if i <= 10 or i % 500 == 0:
|
if i <= 10 or i % 500 == 0:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue