enrichment
This commit is contained in:
166
fetch_missing_bid_history.py
Normal file
166
fetch_missing_bid_history.py
Normal file
@@ -0,0 +1,166 @@
|
||||
"""
|
||||
Fetch bid history for existing lots that have bids but no bid history records.
|
||||
Reads cached lot pages to get lot UUIDs, then calls bid history API.
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
|
||||
|
||||
import asyncio
|
||||
from cache import CacheManager
|
||||
import sqlite3
|
||||
import zlib
|
||||
import json
|
||||
import re
|
||||
from bid_history_client import fetch_bid_history, parse_bid_history
|
||||
|
||||
async def fetch_missing_bid_history():
|
||||
"""Fetch bid history for lots that have bids but no history records"""
|
||||
cache = CacheManager()
|
||||
conn = sqlite3.connect(cache.db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Get lots with bids but no bid history
|
||||
cursor.execute("""
|
||||
SELECT l.lot_id, l.bid_count
|
||||
FROM lots l
|
||||
WHERE l.bid_count > 0
|
||||
AND l.lot_id NOT IN (SELECT DISTINCT lot_id FROM bid_history)
|
||||
ORDER BY l.bid_count DESC
|
||||
""")
|
||||
|
||||
lots_to_fetch = cursor.fetchall()
|
||||
print(f"Found {len(lots_to_fetch)} lots with bids but no bid history")
|
||||
|
||||
if not lots_to_fetch:
|
||||
print("No lots to process!")
|
||||
conn.close()
|
||||
return
|
||||
|
||||
# Build mapping from lot_id to lot UUID from cached pages
|
||||
print("Building lot_id -> UUID mapping from cache...")
|
||||
|
||||
cursor.execute("""
|
||||
SELECT url, content
|
||||
FROM cache
|
||||
WHERE url LIKE '%/l/%'
|
||||
""")
|
||||
|
||||
lot_id_to_uuid = {}
|
||||
total_cached = 0
|
||||
|
||||
for url, content_blob in cursor:
|
||||
total_cached += 1
|
||||
|
||||
if total_cached % 100 == 0:
|
||||
print(f"Processed {total_cached} cached pages...", end='\r')
|
||||
|
||||
try:
|
||||
content = zlib.decompress(content_blob).decode('utf-8')
|
||||
match = re.search(r'<script[^>]*id="__NEXT_DATA__"[^>]*>(.+?)</script>', content, re.DOTALL)
|
||||
|
||||
if not match:
|
||||
continue
|
||||
|
||||
data = json.loads(match.group(1))
|
||||
lot = data.get('props', {}).get('pageProps', {}).get('lot', {})
|
||||
|
||||
if not lot:
|
||||
continue
|
||||
|
||||
lot_display_id = lot.get('displayId')
|
||||
lot_uuid = lot.get('id')
|
||||
|
||||
if lot_display_id and lot_uuid:
|
||||
lot_id_to_uuid[lot_display_id] = lot_uuid
|
||||
|
||||
except:
|
||||
continue
|
||||
|
||||
print(f"\n\nBuilt UUID mapping for {len(lot_id_to_uuid)} lots")
|
||||
|
||||
# Fetch bid history for each lot
|
||||
print("\nFetching bid history from API...")
|
||||
|
||||
fetched = 0
|
||||
failed = 0
|
||||
no_uuid = 0
|
||||
|
||||
for lot_id, bid_count in lots_to_fetch:
|
||||
lot_uuid = lot_id_to_uuid.get(lot_id)
|
||||
|
||||
if not lot_uuid:
|
||||
no_uuid += 1
|
||||
continue
|
||||
|
||||
try:
|
||||
print(f"\nFetching bid history for {lot_id} ({bid_count} bids)...")
|
||||
bid_history = await fetch_bid_history(lot_uuid)
|
||||
|
||||
if bid_history:
|
||||
bid_data = parse_bid_history(bid_history, lot_id)
|
||||
|
||||
# Update lots table with bid intelligence
|
||||
cursor.execute("""
|
||||
UPDATE lots
|
||||
SET first_bid_time = ?,
|
||||
last_bid_time = ?,
|
||||
bid_velocity = ?
|
||||
WHERE lot_id = ?
|
||||
""", (
|
||||
bid_data['first_bid_time'],
|
||||
bid_data['last_bid_time'],
|
||||
bid_data['bid_velocity'],
|
||||
lot_id
|
||||
))
|
||||
|
||||
# Save bid history records
|
||||
cache.save_bid_history(lot_id, bid_data['bid_records'])
|
||||
|
||||
fetched += 1
|
||||
print(f" Saved {len(bid_data['bid_records'])} bid records")
|
||||
print(f" Bid velocity: {bid_data['bid_velocity']:.2f} bids/hour")
|
||||
|
||||
# Commit every 10 lots
|
||||
if fetched % 10 == 0:
|
||||
conn.commit()
|
||||
print(f"\nProgress: {fetched}/{len(lots_to_fetch)} lots processed...")
|
||||
|
||||
# Rate limit to be respectful
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
else:
|
||||
failed += 1
|
||||
|
||||
except Exception as e:
|
||||
print(f" Error fetching bid history for {lot_id}: {e}")
|
||||
failed += 1
|
||||
continue
|
||||
|
||||
conn.commit()
|
||||
|
||||
print(f"\n\nComplete!")
|
||||
print(f"Total lots to process: {len(lots_to_fetch)}")
|
||||
print(f"Successfully fetched: {fetched}")
|
||||
print(f"Failed: {failed}")
|
||||
print(f"No UUID found: {no_uuid}")
|
||||
|
||||
# Verify fix
|
||||
cursor.execute("""
|
||||
SELECT COUNT(DISTINCT lot_id) FROM bid_history
|
||||
""")
|
||||
lots_with_history = cursor.fetchone()[0]
|
||||
|
||||
cursor.execute("""
|
||||
SELECT COUNT(*) FROM lots WHERE bid_count > 0
|
||||
""")
|
||||
lots_with_bids = cursor.fetchone()[0]
|
||||
|
||||
print(f"\nLots with bids: {lots_with_bids}")
|
||||
print(f"Lots with bid history: {lots_with_history}")
|
||||
print(f"Coverage: {lots_with_history/lots_with_bids*100:.1f}%")
|
||||
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(fetch_missing_bid_history())
|
||||
Reference in New Issue
Block a user