diff --git a/scripts/workers/fcc_rmd_removed_scraper.py b/scripts/workers/fcc_rmd_removed_scraper.py index 24daf64..f5e855f 100644 --- a/scripts/workers/fcc_rmd_removed_scraper.py +++ b/scripts/workers/fcc_rmd_removed_scraper.py @@ -113,15 +113,18 @@ def run_email_research(conn: psycopg2.extensions.connection) -> int: 2. Try FCC CORES lookup by FRN 3. Try guessing the company website and scraping contact emails """ - cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) - cur.execute(""" - SELECT r.id, r.rmd_number, r.frn, r.business_name, r.business_address - FROM fcc_rmd_removed r - WHERE r.contact_email IS NULL - AND r.business_name NOT LIKE '[%' - ORDER BY r.id - """) - rows = cur.fetchall() + # Fetch all IDs upfront then close cursor — don't hold a transaction + # open while doing slow HTTP requests (CORES, website scraping) + with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: + cur.execute(""" + SELECT r.id, r.rmd_number, r.frn, r.business_name, r.business_address + FROM fcc_rmd_removed r + WHERE r.contact_email IS NULL + AND r.business_name NOT LIKE '[%' + ORDER BY r.id + """) + rows = cur.fetchall() + conn.commit() # close the read transaction immediately LOG.info("Researching emails for %d removed carriers …", len(rows)) found = 0 diff --git a/scripts/workers/fcc_rmd_scraper.py b/scripts/workers/fcc_rmd_scraper.py index 8433e35..9b49ceb 100644 --- a/scripts/workers/fcc_rmd_scraper.py +++ b/scripts/workers/fcc_rmd_scraper.py @@ -304,18 +304,21 @@ def run_email_scrape( Returns the number of emails successfully fetched. """ - cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) - query = """ - SELECT id, rmd_number, servicenow_sys_id - FROM fcc_rmd - WHERE contact_email IS NULL - AND servicenow_sys_id IS NOT NULL - ORDER BY rmd_number - """ - if limit: - query += f" LIMIT {int(limit)}" - cur.execute(query) - rows = cur.fetchall() + # Fetch all IDs upfront then close the cursor — don't hold a transaction + # open while doing slow HTTP requests to ServiceNow + with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: + query = """ + SELECT id, rmd_number, servicenow_sys_id + FROM fcc_rmd + WHERE contact_email IS NULL + AND servicenow_sys_id IS NOT NULL + ORDER BY rmd_number + """ + if limit: + query += f" LIMIT {int(limit)}" + cur.execute(query) + rows = cur.fetchall() + conn.commit() # close the read transaction immediately LOG.info("Fetching email for %d records via SP API …", len(rows)) scraped = 0 @@ -340,16 +343,17 @@ def run_email_scrape( email_val = _fetch_email_via_sp_api(sys_id, session) if email_val: - cur.execute( - """ - UPDATE fcc_rmd - SET contact_email = %s, - contact_email_scraped_at = now(), - updated_at = now() - WHERE id = %s - """, - (email_val, row["id"]), - ) + with conn.cursor() as ucur: + ucur.execute( + """ + UPDATE fcc_rmd + SET contact_email = %s, + contact_email_scraped_at = now(), + updated_at = now() + WHERE id = %s + """, + (email_val, row["id"]), + ) conn.commit() scraped += 1 if i <= 10 or i % 500 == 0: