""" Fetch bid history for existing lots that have bids but no bid history records. Reads cached lot pages to get lot UUIDs, then calls bid history API. """ import sys import os sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src')) import asyncio from cache import CacheManager import sqlite3 import zlib import json import re from bid_history_client import fetch_bid_history, parse_bid_history async def fetch_missing_bid_history(): """Fetch bid history for lots that have bids but no history records""" cache = CacheManager() conn = sqlite3.connect(cache.db_path) cursor = conn.cursor() # Get lots with bids but no bid history cursor.execute(""" SELECT l.lot_id, l.bid_count FROM lots l WHERE l.bid_count > 0 AND l.lot_id NOT IN (SELECT DISTINCT lot_id FROM bid_history) ORDER BY l.bid_count DESC """) lots_to_fetch = cursor.fetchall() print(f"Found {len(lots_to_fetch)} lots with bids but no bid history") if not lots_to_fetch: print("No lots to process!") conn.close() return # Build mapping from lot_id to lot UUID from cached pages print("Building lot_id -> UUID mapping from cache...") cursor.execute(""" SELECT url, content FROM cache WHERE url LIKE '%/l/%' """) lot_id_to_uuid = {} total_cached = 0 for url, content_blob in cursor: total_cached += 1 if total_cached % 100 == 0: print(f"Processed {total_cached} cached pages...", end='\r') try: content = zlib.decompress(content_blob).decode('utf-8') match = re.search(r']*id="__NEXT_DATA__"[^>]*>(.+?)', content, re.DOTALL) if not match: continue data = json.loads(match.group(1)) lot = data.get('props', {}).get('pageProps', {}).get('lot', {}) if not lot: continue lot_display_id = lot.get('displayId') lot_uuid = lot.get('id') if lot_display_id and lot_uuid: lot_id_to_uuid[lot_display_id] = lot_uuid except: continue print(f"\n\nBuilt UUID mapping for {len(lot_id_to_uuid)} lots") # Fetch bid history for each lot print("\nFetching bid history from API...") fetched = 0 failed = 0 no_uuid = 0 for lot_id, bid_count in lots_to_fetch: lot_uuid = lot_id_to_uuid.get(lot_id) if not lot_uuid: no_uuid += 1 continue try: print(f"\nFetching bid history for {lot_id} ({bid_count} bids)...") bid_history = await fetch_bid_history(lot_uuid) if bid_history: bid_data = parse_bid_history(bid_history, lot_id) # Update lots table with bid intelligence cursor.execute(""" UPDATE lots SET first_bid_time = ?, last_bid_time = ?, bid_velocity = ? WHERE lot_id = ? """, ( bid_data['first_bid_time'], bid_data['last_bid_time'], bid_data['bid_velocity'], lot_id )) # Save bid history records cache.save_bid_history(lot_id, bid_data['bid_records']) fetched += 1 print(f" Saved {len(bid_data['bid_records'])} bid records") print(f" Bid velocity: {bid_data['bid_velocity']:.2f} bids/hour") # Commit every 10 lots if fetched % 10 == 0: conn.commit() print(f"\nProgress: {fetched}/{len(lots_to_fetch)} lots processed...") # Rate limit to be respectful await asyncio.sleep(0.5) else: failed += 1 except Exception as e: print(f" Error fetching bid history for {lot_id}: {e}") failed += 1 continue conn.commit() print(f"\n\nComplete!") print(f"Total lots to process: {len(lots_to_fetch)}") print(f"Successfully fetched: {fetched}") print(f"Failed: {failed}") print(f"No UUID found: {no_uuid}") # Verify fix cursor.execute(""" SELECT COUNT(DISTINCT lot_id) FROM bid_history """) lots_with_history = cursor.fetchone()[0] cursor.execute(""" SELECT COUNT(*) FROM lots WHERE bid_count > 0 """) lots_with_bids = cursor.fetchone()[0] print(f"\nLots with bids: {lots_with_bids}") print(f"Lots with bid history: {lots_with_history}") print(f"Coverage: {lots_with_history/lots_with_bids*100:.1f}%") conn.close() if __name__ == "__main__": asyncio.run(fetch_missing_bid_history())