""" Fix orphaned lots by updating auction_id from UUID to displayId. This migration reads cached lot pages and extracts the correct auction displayId. """ import sys import os sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src')) from cache import CacheManager import sqlite3 import zlib import json import re def fix_orphaned_lots(): """Update lot auction_id from UUID to auction displayId""" cache = CacheManager() conn = sqlite3.connect(cache.db_path) cursor = conn.cursor() # Get all lots that need fixing (have UUID auction_id) cursor.execute(""" SELECT l.lot_id, l.auction_id FROM lots l WHERE length(l.auction_id) > 20 -- UUID is longer than displayId like "A1-12345" """) lots_to_fix = {lot_id: auction_uuid for lot_id, auction_uuid in cursor.fetchall()} print(f"Found {len(lots_to_fix)} lots with UUID auction_id that need fixing") if not lots_to_fix: print("No lots to fix!") conn.close() return # Build mapping from lot displayId to auction displayId from cached pages print("Building lot displayId -> auction displayId mapping from cache...") cursor.execute(""" SELECT url, content FROM cache WHERE url LIKE '%/l/%' """) lot_to_auction_map = {} total = 0 errors = 0 for url, content_blob in cursor: total += 1 if total % 100 == 0: print(f"Processing cached pages... {total}", end='\r') try: # Decompress and parse __NEXT_DATA__ content = zlib.decompress(content_blob).decode('utf-8') match = re.search(r']*id="__NEXT_DATA__"[^>]*>(.+?)', content, re.DOTALL) if not match: continue data = json.loads(match.group(1)) page_props = data.get('props', {}).get('pageProps', {}) lot = page_props.get('lot', {}) auction = page_props.get('auction', {}) if not lot or not auction: continue lot_display_id = lot.get('displayId') auction_display_id = auction.get('displayId') if lot_display_id and auction_display_id: lot_to_auction_map[lot_display_id] = auction_display_id except Exception as e: errors += 1 continue print(f"\n\nBuilt mapping for {len(lot_to_auction_map)} lots") print(f"Errors while parsing: {errors}") # Now update the lots table print("\nUpdating lots table...") updated = 0 not_found = 0 for lot_id, old_auction_uuid in lots_to_fix.items(): if lot_id in lot_to_auction_map: new_auction_id = lot_to_auction_map[lot_id] cursor.execute(""" UPDATE lots SET auction_id = ? WHERE lot_id = ? """, (new_auction_id, lot_id)) updated += 1 else: not_found += 1 if (updated + not_found) % 100 == 0: print(f"Updated: {updated}, not found: {not_found}", end='\r') conn.commit() print(f"\n\nComplete!") print(f"Total cached pages processed: {total}") print(f"Lots updated with auction displayId: {updated}") print(f"Lots not found in cache: {not_found}") print(f"Parse errors: {errors}") # Verify fix cursor.execute(""" SELECT COUNT(*) FROM lots WHERE auction_id NOT IN (SELECT auction_id FROM auctions) """) orphaned = cursor.fetchone()[0] print(f"\nOrphaned lots remaining: {orphaned}") if orphaned > 0: # Show sample of remaining orphans cursor.execute(""" SELECT lot_id, auction_id FROM lots WHERE auction_id NOT IN (SELECT auction_id FROM auctions) LIMIT 5 """) print("\nSample remaining orphaned lots:") for lot_id, auction_id in cursor.fetchall(): print(f" {lot_id} -> auction_id: {auction_id}") conn.close() if __name__ == "__main__": fix_orphaned_lots()