Files
scaev/fix_orphaned_lots.py
2025-12-07 02:20:14 +01:00

137 lines
4.0 KiB
Python

"""
Fix orphaned lots by updating auction_id from UUID to displayId.
This migration reads cached lot pages and extracts the correct auction displayId.
"""
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
from cache import CacheManager
import sqlite3
import zlib
import json
import re
def fix_orphaned_lots():
"""Update lot auction_id from UUID to auction displayId"""
cache = CacheManager()
conn = sqlite3.connect(cache.db_path)
cursor = conn.cursor()
# Get all lots that need fixing (have UUID auction_id)
cursor.execute("""
SELECT l.lot_id, l.auction_id
FROM lots l
WHERE length(l.auction_id) > 20 -- UUID is longer than displayId like "A1-12345"
""")
lots_to_fix = {lot_id: auction_uuid for lot_id, auction_uuid in cursor.fetchall()}
print(f"Found {len(lots_to_fix)} lots with UUID auction_id that need fixing")
if not lots_to_fix:
print("No lots to fix!")
conn.close()
return
# Build mapping from lot displayId to auction displayId from cached pages
print("Building lot displayId -> auction displayId mapping from cache...")
cursor.execute("""
SELECT url, content
FROM cache
WHERE url LIKE '%/l/%'
""")
lot_to_auction_map = {}
total = 0
errors = 0
for url, content_blob in cursor:
total += 1
if total % 100 == 0:
print(f"Processing cached pages... {total}", end='\r')
try:
# Decompress and parse __NEXT_DATA__
content = zlib.decompress(content_blob).decode('utf-8')
match = re.search(r'<script[^>]*id="__NEXT_DATA__"[^>]*>(.+?)</script>', content, re.DOTALL)
if not match:
continue
data = json.loads(match.group(1))
page_props = data.get('props', {}).get('pageProps', {})
lot = page_props.get('lot', {})
auction = page_props.get('auction', {})
if not lot or not auction:
continue
lot_display_id = lot.get('displayId')
auction_display_id = auction.get('displayId')
if lot_display_id and auction_display_id:
lot_to_auction_map[lot_display_id] = auction_display_id
except Exception as e:
errors += 1
continue
print(f"\n\nBuilt mapping for {len(lot_to_auction_map)} lots")
print(f"Errors while parsing: {errors}")
# Now update the lots table
print("\nUpdating lots table...")
updated = 0
not_found = 0
for lot_id, old_auction_uuid in lots_to_fix.items():
if lot_id in lot_to_auction_map:
new_auction_id = lot_to_auction_map[lot_id]
cursor.execute("""
UPDATE lots
SET auction_id = ?
WHERE lot_id = ?
""", (new_auction_id, lot_id))
updated += 1
else:
not_found += 1
if (updated + not_found) % 100 == 0:
print(f"Updated: {updated}, not found: {not_found}", end='\r')
conn.commit()
print(f"\n\nComplete!")
print(f"Total cached pages processed: {total}")
print(f"Lots updated with auction displayId: {updated}")
print(f"Lots not found in cache: {not_found}")
print(f"Parse errors: {errors}")
# Verify fix
cursor.execute("""
SELECT COUNT(*) FROM lots
WHERE auction_id NOT IN (SELECT auction_id FROM auctions)
""")
orphaned = cursor.fetchone()[0]
print(f"\nOrphaned lots remaining: {orphaned}")
if orphaned > 0:
# Show sample of remaining orphans
cursor.execute("""
SELECT lot_id, auction_id FROM lots
WHERE auction_id NOT IN (SELECT auction_id FROM auctions)
LIMIT 5
""")
print("\nSample remaining orphaned lots:")
for lot_id, auction_id in cursor.fetchall():
print(f" {lot_id} -> auction_id: {auction_id}")
conn.close()
if __name__ == "__main__":
fix_orphaned_lots()