Files
scaev/fetch_missing_bid_history.py
2025-12-07 02:20:14 +01:00

167 lines
4.8 KiB
Python

"""
Fetch bid history for existing lots that have bids but no bid history records.
Reads cached lot pages to get lot UUIDs, then calls bid history API.
"""
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
import asyncio
from cache import CacheManager
import sqlite3
import zlib
import json
import re
from bid_history_client import fetch_bid_history, parse_bid_history
async def fetch_missing_bid_history():
"""Fetch bid history for lots that have bids but no history records"""
cache = CacheManager()
conn = sqlite3.connect(cache.db_path)
cursor = conn.cursor()
# Get lots with bids but no bid history
cursor.execute("""
SELECT l.lot_id, l.bid_count
FROM lots l
WHERE l.bid_count > 0
AND l.lot_id NOT IN (SELECT DISTINCT lot_id FROM bid_history)
ORDER BY l.bid_count DESC
""")
lots_to_fetch = cursor.fetchall()
print(f"Found {len(lots_to_fetch)} lots with bids but no bid history")
if not lots_to_fetch:
print("No lots to process!")
conn.close()
return
# Build mapping from lot_id to lot UUID from cached pages
print("Building lot_id -> UUID mapping from cache...")
cursor.execute("""
SELECT url, content
FROM cache
WHERE url LIKE '%/l/%'
""")
lot_id_to_uuid = {}
total_cached = 0
for url, content_blob in cursor:
total_cached += 1
if total_cached % 100 == 0:
print(f"Processed {total_cached} cached pages...", end='\r')
try:
content = zlib.decompress(content_blob).decode('utf-8')
match = re.search(r'<script[^>]*id="__NEXT_DATA__"[^>]*>(.+?)</script>', content, re.DOTALL)
if not match:
continue
data = json.loads(match.group(1))
lot = data.get('props', {}).get('pageProps', {}).get('lot', {})
if not lot:
continue
lot_display_id = lot.get('displayId')
lot_uuid = lot.get('id')
if lot_display_id and lot_uuid:
lot_id_to_uuid[lot_display_id] = lot_uuid
except:
continue
print(f"\n\nBuilt UUID mapping for {len(lot_id_to_uuid)} lots")
# Fetch bid history for each lot
print("\nFetching bid history from API...")
fetched = 0
failed = 0
no_uuid = 0
for lot_id, bid_count in lots_to_fetch:
lot_uuid = lot_id_to_uuid.get(lot_id)
if not lot_uuid:
no_uuid += 1
continue
try:
print(f"\nFetching bid history for {lot_id} ({bid_count} bids)...")
bid_history = await fetch_bid_history(lot_uuid)
if bid_history:
bid_data = parse_bid_history(bid_history, lot_id)
# Update lots table with bid intelligence
cursor.execute("""
UPDATE lots
SET first_bid_time = ?,
last_bid_time = ?,
bid_velocity = ?
WHERE lot_id = ?
""", (
bid_data['first_bid_time'],
bid_data['last_bid_time'],
bid_data['bid_velocity'],
lot_id
))
# Save bid history records
cache.save_bid_history(lot_id, bid_data['bid_records'])
fetched += 1
print(f" Saved {len(bid_data['bid_records'])} bid records")
print(f" Bid velocity: {bid_data['bid_velocity']:.2f} bids/hour")
# Commit every 10 lots
if fetched % 10 == 0:
conn.commit()
print(f"\nProgress: {fetched}/{len(lots_to_fetch)} lots processed...")
# Rate limit to be respectful
await asyncio.sleep(0.5)
else:
failed += 1
except Exception as e:
print(f" Error fetching bid history for {lot_id}: {e}")
failed += 1
continue
conn.commit()
print(f"\n\nComplete!")
print(f"Total lots to process: {len(lots_to_fetch)}")
print(f"Successfully fetched: {fetched}")
print(f"Failed: {failed}")
print(f"No UUID found: {no_uuid}")
# Verify fix
cursor.execute("""
SELECT COUNT(DISTINCT lot_id) FROM bid_history
""")
lots_with_history = cursor.fetchone()[0]
cursor.execute("""
SELECT COUNT(*) FROM lots WHERE bid_count > 0
""")
lots_with_bids = cursor.fetchone()[0]
print(f"\nLots with bids: {lots_with_bids}")
print(f"Lots with bid history: {lots_with_history}")
print(f"Coverage: {lots_with_history/lots_with_bids*100:.1f}%")
conn.close()
if __name__ == "__main__":
asyncio.run(fetch_missing_bid_history())