enrichment
This commit is contained in:
136
fix_orphaned_lots.py
Normal file
136
fix_orphaned_lots.py
Normal file
@@ -0,0 +1,136 @@
|
||||
"""
|
||||
Fix orphaned lots by updating auction_id from UUID to displayId.
|
||||
This migration reads cached lot pages and extracts the correct auction displayId.
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
|
||||
|
||||
from cache import CacheManager
|
||||
import sqlite3
|
||||
import zlib
|
||||
import json
|
||||
import re
|
||||
|
||||
def fix_orphaned_lots():
|
||||
"""Update lot auction_id from UUID to auction displayId"""
|
||||
cache = CacheManager()
|
||||
conn = sqlite3.connect(cache.db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Get all lots that need fixing (have UUID auction_id)
|
||||
cursor.execute("""
|
||||
SELECT l.lot_id, l.auction_id
|
||||
FROM lots l
|
||||
WHERE length(l.auction_id) > 20 -- UUID is longer than displayId like "A1-12345"
|
||||
""")
|
||||
|
||||
lots_to_fix = {lot_id: auction_uuid for lot_id, auction_uuid in cursor.fetchall()}
|
||||
print(f"Found {len(lots_to_fix)} lots with UUID auction_id that need fixing")
|
||||
|
||||
if not lots_to_fix:
|
||||
print("No lots to fix!")
|
||||
conn.close()
|
||||
return
|
||||
|
||||
# Build mapping from lot displayId to auction displayId from cached pages
|
||||
print("Building lot displayId -> auction displayId mapping from cache...")
|
||||
|
||||
cursor.execute("""
|
||||
SELECT url, content
|
||||
FROM cache
|
||||
WHERE url LIKE '%/l/%'
|
||||
""")
|
||||
|
||||
lot_to_auction_map = {}
|
||||
total = 0
|
||||
errors = 0
|
||||
|
||||
for url, content_blob in cursor:
|
||||
total += 1
|
||||
|
||||
if total % 100 == 0:
|
||||
print(f"Processing cached pages... {total}", end='\r')
|
||||
|
||||
try:
|
||||
# Decompress and parse __NEXT_DATA__
|
||||
content = zlib.decompress(content_blob).decode('utf-8')
|
||||
match = re.search(r'<script[^>]*id="__NEXT_DATA__"[^>]*>(.+?)</script>', content, re.DOTALL)
|
||||
|
||||
if not match:
|
||||
continue
|
||||
|
||||
data = json.loads(match.group(1))
|
||||
page_props = data.get('props', {}).get('pageProps', {})
|
||||
|
||||
lot = page_props.get('lot', {})
|
||||
auction = page_props.get('auction', {})
|
||||
|
||||
if not lot or not auction:
|
||||
continue
|
||||
|
||||
lot_display_id = lot.get('displayId')
|
||||
auction_display_id = auction.get('displayId')
|
||||
|
||||
if lot_display_id and auction_display_id:
|
||||
lot_to_auction_map[lot_display_id] = auction_display_id
|
||||
|
||||
except Exception as e:
|
||||
errors += 1
|
||||
continue
|
||||
|
||||
print(f"\n\nBuilt mapping for {len(lot_to_auction_map)} lots")
|
||||
print(f"Errors while parsing: {errors}")
|
||||
|
||||
# Now update the lots table
|
||||
print("\nUpdating lots table...")
|
||||
updated = 0
|
||||
not_found = 0
|
||||
|
||||
for lot_id, old_auction_uuid in lots_to_fix.items():
|
||||
if lot_id in lot_to_auction_map:
|
||||
new_auction_id = lot_to_auction_map[lot_id]
|
||||
cursor.execute("""
|
||||
UPDATE lots
|
||||
SET auction_id = ?
|
||||
WHERE lot_id = ?
|
||||
""", (new_auction_id, lot_id))
|
||||
updated += 1
|
||||
else:
|
||||
not_found += 1
|
||||
|
||||
if (updated + not_found) % 100 == 0:
|
||||
print(f"Updated: {updated}, not found: {not_found}", end='\r')
|
||||
|
||||
conn.commit()
|
||||
|
||||
print(f"\n\nComplete!")
|
||||
print(f"Total cached pages processed: {total}")
|
||||
print(f"Lots updated with auction displayId: {updated}")
|
||||
print(f"Lots not found in cache: {not_found}")
|
||||
print(f"Parse errors: {errors}")
|
||||
|
||||
# Verify fix
|
||||
cursor.execute("""
|
||||
SELECT COUNT(*) FROM lots
|
||||
WHERE auction_id NOT IN (SELECT auction_id FROM auctions)
|
||||
""")
|
||||
orphaned = cursor.fetchone()[0]
|
||||
|
||||
print(f"\nOrphaned lots remaining: {orphaned}")
|
||||
|
||||
if orphaned > 0:
|
||||
# Show sample of remaining orphans
|
||||
cursor.execute("""
|
||||
SELECT lot_id, auction_id FROM lots
|
||||
WHERE auction_id NOT IN (SELECT auction_id FROM auctions)
|
||||
LIMIT 5
|
||||
""")
|
||||
print("\nSample remaining orphaned lots:")
|
||||
for lot_id, auction_id in cursor.fetchall():
|
||||
print(f" {lot_id} -> auction_id: {auction_id}")
|
||||
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
fix_orphaned_lots()
|
||||
Reference in New Issue
Block a user