""" Fix auctions table by replacing with correct data from cached auction pages. The auctions table currently has wrong auction_ids (numeric instead of displayId). """ import sys import os sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src')) from cache import CacheManager import sqlite3 import zlib import json import re from datetime import datetime def fix_auctions_table(): """Rebuild auctions table from cached auction pages""" cache = CacheManager() conn = sqlite3.connect(cache.db_path) cursor = conn.cursor() # Clear existing auctions table print("Clearing auctions table...") cursor.execute("DELETE FROM auctions") conn.commit() # Get all auction pages from cache cursor.execute(""" SELECT url, content FROM cache WHERE url LIKE '%/a/%' """) auction_pages = cursor.fetchall() print(f"Found {len(auction_pages)} auction pages in cache") total = 0 inserted = 0 errors = 0 print("Extracting auction data from cached pages...") for url, content_blob in auction_pages: total += 1 if total % 10 == 0: print(f"Processed {total}/{len(auction_pages)}...", end='\r') try: # Decompress and parse __NEXT_DATA__ content = zlib.decompress(content_blob).decode('utf-8') match = re.search(r']*id="__NEXT_DATA__"[^>]*>(.+?)', content, re.DOTALL) if not match: errors += 1 continue data = json.loads(match.group(1)) page_props = data.get('props', {}).get('pageProps', {}) auction = page_props.get('auction', {}) if not auction: errors += 1 continue # Extract auction data auction_id = auction.get('displayId') if not auction_id: errors += 1 continue title = auction.get('name', '') # Get location location = '' viewing_days = auction.get('viewingDays', []) if viewing_days and isinstance(viewing_days, list) and len(viewing_days) > 0: loc = viewing_days[0] city = loc.get('city', '') country = loc.get('countryCode', '').upper() location = f"{city}, {country}" if city and country else (city or country) lots_count = auction.get('lotCount', 0) # Get first lot closing time first_lot_closing = '' min_end_date = auction.get('minEndDate', '') if min_end_date: # Format timestamp try: dt = datetime.fromisoformat(min_end_date.replace('Z', '+00:00')) first_lot_closing = dt.strftime('%Y-%m-%d %H:%M:%S') except: first_lot_closing = min_end_date scraped_at = datetime.now().isoformat() # Insert into auctions table cursor.execute(""" INSERT OR REPLACE INTO auctions (auction_id, url, title, location, lots_count, first_lot_closing_time, scraped_at) VALUES (?, ?, ?, ?, ?, ?, ?) """, (auction_id, url, title, location, lots_count, first_lot_closing, scraped_at)) inserted += 1 except Exception as e: errors += 1 continue conn.commit() print(f"\n\nComplete!") print(f"Total auction pages processed: {total}") print(f"Auctions inserted: {inserted}") print(f"Errors: {errors}") # Verify fix cursor.execute("SELECT COUNT(*) FROM auctions") total_auctions = cursor.fetchone()[0] print(f"\nTotal auctions in table: {total_auctions}") cursor.execute(""" SELECT COUNT(*) FROM lots WHERE auction_id NOT IN (SELECT auction_id FROM auctions) AND auction_id != '' """) orphaned = cursor.fetchone()[0] print(f"Orphaned lots remaining: {orphaned}") if orphaned == 0: print("\nSUCCESS! All lots now have matching auctions!") else: # Show sample of remaining orphans cursor.execute(""" SELECT lot_id, auction_id FROM lots WHERE auction_id NOT IN (SELECT auction_id FROM auctions) AND auction_id != '' LIMIT 5 """) print("\nSample remaining orphaned lots:") for lot_id, auction_id in cursor.fetchall(): print(f" {lot_id} -> auction_id: {auction_id}") # Show what auction_ids we do have cursor.execute("SELECT auction_id FROM auctions LIMIT 10") print("\nSample auction_ids in auctions table:") for row in cursor.fetchall(): print(f" {row[0]}") conn.close() if __name__ == "__main__": fix_auctions_table()