156 lines
4.8 KiB
Python
156 lines
4.8 KiB
Python
"""
|
|
Fix auctions table by replacing with correct data from cached auction pages.
|
|
The auctions table currently has wrong auction_ids (numeric instead of displayId).
|
|
"""
|
|
import sys
|
|
import os
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
|
|
|
|
from cache import CacheManager
|
|
import sqlite3
|
|
import zlib
|
|
import json
|
|
import re
|
|
from datetime import datetime
|
|
|
|
def fix_auctions_table():
|
|
"""Rebuild auctions table from cached auction pages"""
|
|
cache = CacheManager()
|
|
conn = sqlite3.connect(cache.db_path)
|
|
cursor = conn.cursor()
|
|
|
|
# Clear existing auctions table
|
|
print("Clearing auctions table...")
|
|
cursor.execute("DELETE FROM auctions")
|
|
conn.commit()
|
|
|
|
# Get all auction pages from cache
|
|
cursor.execute("""
|
|
SELECT url, content
|
|
FROM cache
|
|
WHERE url LIKE '%/a/%'
|
|
""")
|
|
|
|
auction_pages = cursor.fetchall()
|
|
print(f"Found {len(auction_pages)} auction pages in cache")
|
|
|
|
total = 0
|
|
inserted = 0
|
|
errors = 0
|
|
|
|
print("Extracting auction data from cached pages...")
|
|
|
|
for url, content_blob in auction_pages:
|
|
total += 1
|
|
|
|
if total % 10 == 0:
|
|
print(f"Processed {total}/{len(auction_pages)}...", end='\r')
|
|
|
|
try:
|
|
# Decompress and parse __NEXT_DATA__
|
|
content = zlib.decompress(content_blob).decode('utf-8')
|
|
match = re.search(r'<script[^>]*id="__NEXT_DATA__"[^>]*>(.+?)</script>', content, re.DOTALL)
|
|
|
|
if not match:
|
|
errors += 1
|
|
continue
|
|
|
|
data = json.loads(match.group(1))
|
|
page_props = data.get('props', {}).get('pageProps', {})
|
|
auction = page_props.get('auction', {})
|
|
|
|
if not auction:
|
|
errors += 1
|
|
continue
|
|
|
|
# Extract auction data
|
|
auction_id = auction.get('displayId')
|
|
if not auction_id:
|
|
errors += 1
|
|
continue
|
|
|
|
title = auction.get('name', '')
|
|
|
|
# Get location
|
|
location = ''
|
|
viewing_days = auction.get('viewingDays', [])
|
|
if viewing_days and isinstance(viewing_days, list) and len(viewing_days) > 0:
|
|
loc = viewing_days[0]
|
|
city = loc.get('city', '')
|
|
country = loc.get('countryCode', '').upper()
|
|
location = f"{city}, {country}" if city and country else (city or country)
|
|
|
|
lots_count = auction.get('lotCount', 0)
|
|
|
|
# Get first lot closing time
|
|
first_lot_closing = ''
|
|
min_end_date = auction.get('minEndDate', '')
|
|
if min_end_date:
|
|
# Format timestamp
|
|
try:
|
|
dt = datetime.fromisoformat(min_end_date.replace('Z', '+00:00'))
|
|
first_lot_closing = dt.strftime('%Y-%m-%d %H:%M:%S')
|
|
except:
|
|
first_lot_closing = min_end_date
|
|
|
|
scraped_at = datetime.now().isoformat()
|
|
|
|
# Insert into auctions table
|
|
cursor.execute("""
|
|
INSERT OR REPLACE INTO auctions
|
|
(auction_id, url, title, location, lots_count, first_lot_closing_time, scraped_at)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
""", (auction_id, url, title, location, lots_count, first_lot_closing, scraped_at))
|
|
|
|
inserted += 1
|
|
|
|
except Exception as e:
|
|
errors += 1
|
|
continue
|
|
|
|
conn.commit()
|
|
|
|
print(f"\n\nComplete!")
|
|
print(f"Total auction pages processed: {total}")
|
|
print(f"Auctions inserted: {inserted}")
|
|
print(f"Errors: {errors}")
|
|
|
|
# Verify fix
|
|
cursor.execute("SELECT COUNT(*) FROM auctions")
|
|
total_auctions = cursor.fetchone()[0]
|
|
print(f"\nTotal auctions in table: {total_auctions}")
|
|
|
|
cursor.execute("""
|
|
SELECT COUNT(*) FROM lots
|
|
WHERE auction_id NOT IN (SELECT auction_id FROM auctions)
|
|
AND auction_id != ''
|
|
""")
|
|
orphaned = cursor.fetchone()[0]
|
|
|
|
print(f"Orphaned lots remaining: {orphaned}")
|
|
|
|
if orphaned == 0:
|
|
print("\nSUCCESS! All lots now have matching auctions!")
|
|
else:
|
|
# Show sample of remaining orphans
|
|
cursor.execute("""
|
|
SELECT lot_id, auction_id FROM lots
|
|
WHERE auction_id NOT IN (SELECT auction_id FROM auctions)
|
|
AND auction_id != ''
|
|
LIMIT 5
|
|
""")
|
|
print("\nSample remaining orphaned lots:")
|
|
for lot_id, auction_id in cursor.fetchall():
|
|
print(f" {lot_id} -> auction_id: {auction_id}")
|
|
|
|
# Show what auction_ids we do have
|
|
cursor.execute("SELECT auction_id FROM auctions LIMIT 10")
|
|
print("\nSample auction_ids in auctions table:")
|
|
for row in cursor.fetchall():
|
|
print(f" {row[0]}")
|
|
|
|
conn.close()
|
|
|
|
if __name__ == "__main__":
|
|
fix_auctions_table()
|