""" Enrich existing lots with new intelligence fields: - followers_count - estimated_min_price / estimated_max_price - lot_condition - appearance Reads from cached lot pages __NEXT_DATA__ JSON """ import sys import os sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src')) import asyncio from cache import CacheManager import sqlite3 import zlib import json import re from graphql_client import fetch_lot_bidding_data, format_bid_data async def enrich_existing_lots(): """Enrich existing lots with new fields from GraphQL API""" cache = CacheManager() conn = sqlite3.connect(cache.db_path) cursor = conn.cursor() # Get all lot IDs cursor.execute("SELECT lot_id FROM lots") lot_ids = [r[0] for r in cursor.fetchall()] print(f"Found {len(lot_ids)} lots to enrich") print("Fetching enrichment data from GraphQL API...") print("This will take ~{:.1f} minutes (0.5s rate limit)".format(len(lot_ids) * 0.5 / 60)) enriched = 0 failed = 0 no_data = 0 for i, lot_id in enumerate(lot_ids): if (i + 1) % 10 == 0: print(f"Progress: {i+1}/{len(lot_ids)} ({enriched} enriched, {no_data} no data, {failed} failed)", end='\r') try: # Fetch from GraphQL API bidding_data = await fetch_lot_bidding_data(lot_id) if bidding_data: formatted_data = format_bid_data(bidding_data) # Update lot with new fields cursor.execute(""" UPDATE lots SET followers_count = ?, estimated_min_price = ?, estimated_max_price = ?, lot_condition = ?, appearance = ? WHERE lot_id = ? """, ( formatted_data.get('followers_count', 0), formatted_data.get('estimated_min_price'), formatted_data.get('estimated_max_price'), formatted_data.get('lot_condition', ''), formatted_data.get('appearance', ''), lot_id )) enriched += 1 # Commit every 50 lots if enriched % 50 == 0: conn.commit() else: no_data += 1 # Rate limit await asyncio.sleep(0.5) except Exception as e: failed += 1 continue conn.commit() print(f"\n\nComplete!") print(f"Total lots: {len(lot_ids)}") print(f"Enriched: {enriched}") print(f"No data: {no_data}") print(f"Failed: {failed}") # Show statistics cursor.execute("SELECT COUNT(*) FROM lots WHERE followers_count > 0") with_followers = cursor.fetchone()[0] cursor.execute("SELECT COUNT(*) FROM lots WHERE estimated_min_price IS NOT NULL") with_estimates = cursor.fetchone()[0] cursor.execute("SELECT COUNT(*) FROM lots WHERE lot_condition IS NOT NULL AND lot_condition != ''") with_condition = cursor.fetchone()[0] print(f"\nEnrichment statistics:") print(f" Lots with followers_count: {with_followers} ({with_followers/len(lot_ids)*100:.1f}%)") print(f" Lots with estimated prices: {with_estimates} ({with_estimates/len(lot_ids)*100:.1f}%)") print(f" Lots with condition: {with_condition} ({with_condition/len(lot_ids)*100:.1f}%)") conn.close() if __name__ == "__main__": print("WARNING: This will make ~16,800 API calls at 0.5s intervals (~2.3 hours)") print("Press Ctrl+C to cancel, or wait 5 seconds to continue...") import time try: time.sleep(5) except KeyboardInterrupt: print("\nCancelled") sys.exit(0) asyncio.run(enrich_existing_lots())