137 lines
4.0 KiB
Python
137 lines
4.0 KiB
Python
"""
|
|
Fix orphaned lots by updating auction_id from UUID to displayId.
|
|
This migration reads cached lot pages and extracts the correct auction displayId.
|
|
"""
|
|
import sys
|
|
import os
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
|
|
|
|
from cache import CacheManager
|
|
import sqlite3
|
|
import zlib
|
|
import json
|
|
import re
|
|
|
|
def fix_orphaned_lots():
|
|
"""Update lot auction_id from UUID to auction displayId"""
|
|
cache = CacheManager()
|
|
conn = sqlite3.connect(cache.db_path)
|
|
cursor = conn.cursor()
|
|
|
|
# Get all lots that need fixing (have UUID auction_id)
|
|
cursor.execute("""
|
|
SELECT l.lot_id, l.auction_id
|
|
FROM lots l
|
|
WHERE length(l.auction_id) > 20 -- UUID is longer than displayId like "A1-12345"
|
|
""")
|
|
|
|
lots_to_fix = {lot_id: auction_uuid for lot_id, auction_uuid in cursor.fetchall()}
|
|
print(f"Found {len(lots_to_fix)} lots with UUID auction_id that need fixing")
|
|
|
|
if not lots_to_fix:
|
|
print("No lots to fix!")
|
|
conn.close()
|
|
return
|
|
|
|
# Build mapping from lot displayId to auction displayId from cached pages
|
|
print("Building lot displayId -> auction displayId mapping from cache...")
|
|
|
|
cursor.execute("""
|
|
SELECT url, content
|
|
FROM cache
|
|
WHERE url LIKE '%/l/%'
|
|
""")
|
|
|
|
lot_to_auction_map = {}
|
|
total = 0
|
|
errors = 0
|
|
|
|
for url, content_blob in cursor:
|
|
total += 1
|
|
|
|
if total % 100 == 0:
|
|
print(f"Processing cached pages... {total}", end='\r')
|
|
|
|
try:
|
|
# Decompress and parse __NEXT_DATA__
|
|
content = zlib.decompress(content_blob).decode('utf-8')
|
|
match = re.search(r'<script[^>]*id="__NEXT_DATA__"[^>]*>(.+?)</script>', content, re.DOTALL)
|
|
|
|
if not match:
|
|
continue
|
|
|
|
data = json.loads(match.group(1))
|
|
page_props = data.get('props', {}).get('pageProps', {})
|
|
|
|
lot = page_props.get('lot', {})
|
|
auction = page_props.get('auction', {})
|
|
|
|
if not lot or not auction:
|
|
continue
|
|
|
|
lot_display_id = lot.get('displayId')
|
|
auction_display_id = auction.get('displayId')
|
|
|
|
if lot_display_id and auction_display_id:
|
|
lot_to_auction_map[lot_display_id] = auction_display_id
|
|
|
|
except Exception as e:
|
|
errors += 1
|
|
continue
|
|
|
|
print(f"\n\nBuilt mapping for {len(lot_to_auction_map)} lots")
|
|
print(f"Errors while parsing: {errors}")
|
|
|
|
# Now update the lots table
|
|
print("\nUpdating lots table...")
|
|
updated = 0
|
|
not_found = 0
|
|
|
|
for lot_id, old_auction_uuid in lots_to_fix.items():
|
|
if lot_id in lot_to_auction_map:
|
|
new_auction_id = lot_to_auction_map[lot_id]
|
|
cursor.execute("""
|
|
UPDATE lots
|
|
SET auction_id = ?
|
|
WHERE lot_id = ?
|
|
""", (new_auction_id, lot_id))
|
|
updated += 1
|
|
else:
|
|
not_found += 1
|
|
|
|
if (updated + not_found) % 100 == 0:
|
|
print(f"Updated: {updated}, not found: {not_found}", end='\r')
|
|
|
|
conn.commit()
|
|
|
|
print(f"\n\nComplete!")
|
|
print(f"Total cached pages processed: {total}")
|
|
print(f"Lots updated with auction displayId: {updated}")
|
|
print(f"Lots not found in cache: {not_found}")
|
|
print(f"Parse errors: {errors}")
|
|
|
|
# Verify fix
|
|
cursor.execute("""
|
|
SELECT COUNT(*) FROM lots
|
|
WHERE auction_id NOT IN (SELECT auction_id FROM auctions)
|
|
""")
|
|
orphaned = cursor.fetchone()[0]
|
|
|
|
print(f"\nOrphaned lots remaining: {orphaned}")
|
|
|
|
if orphaned > 0:
|
|
# Show sample of remaining orphans
|
|
cursor.execute("""
|
|
SELECT lot_id, auction_id FROM lots
|
|
WHERE auction_id NOT IN (SELECT auction_id FROM auctions)
|
|
LIMIT 5
|
|
""")
|
|
print("\nSample remaining orphaned lots:")
|
|
for lot_id, auction_id in cursor.fetchall():
|
|
print(f" {lot_id} -> auction_id: {auction_id}")
|
|
|
|
conn.close()
|
|
|
|
if __name__ == "__main__":
|
|
fix_orphaned_lots()
|