Files
scaev/enrich_existing_lots.py
2025-12-07 02:20:14 +01:00

121 lines
3.7 KiB
Python

"""
Enrich existing lots with new intelligence fields:
- followers_count
- estimated_min_price / estimated_max_price
- lot_condition
- appearance
Reads from cached lot pages __NEXT_DATA__ JSON
"""
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
import asyncio
from cache import CacheManager
import sqlite3
import zlib
import json
import re
from graphql_client import fetch_lot_bidding_data, format_bid_data
async def enrich_existing_lots():
"""Enrich existing lots with new fields from GraphQL API"""
cache = CacheManager()
conn = sqlite3.connect(cache.db_path)
cursor = conn.cursor()
# Get all lot IDs
cursor.execute("SELECT lot_id FROM lots")
lot_ids = [r[0] for r in cursor.fetchall()]
print(f"Found {len(lot_ids)} lots to enrich")
print("Fetching enrichment data from GraphQL API...")
print("This will take ~{:.1f} minutes (0.5s rate limit)".format(len(lot_ids) * 0.5 / 60))
enriched = 0
failed = 0
no_data = 0
for i, lot_id in enumerate(lot_ids):
if (i + 1) % 10 == 0:
print(f"Progress: {i+1}/{len(lot_ids)} ({enriched} enriched, {no_data} no data, {failed} failed)", end='\r')
try:
# Fetch from GraphQL API
bidding_data = await fetch_lot_bidding_data(lot_id)
if bidding_data:
formatted_data = format_bid_data(bidding_data)
# Update lot with new fields
cursor.execute("""
UPDATE lots
SET followers_count = ?,
estimated_min_price = ?,
estimated_max_price = ?,
lot_condition = ?,
appearance = ?
WHERE lot_id = ?
""", (
formatted_data.get('followers_count', 0),
formatted_data.get('estimated_min_price'),
formatted_data.get('estimated_max_price'),
formatted_data.get('lot_condition', ''),
formatted_data.get('appearance', ''),
lot_id
))
enriched += 1
# Commit every 50 lots
if enriched % 50 == 0:
conn.commit()
else:
no_data += 1
# Rate limit
await asyncio.sleep(0.5)
except Exception as e:
failed += 1
continue
conn.commit()
print(f"\n\nComplete!")
print(f"Total lots: {len(lot_ids)}")
print(f"Enriched: {enriched}")
print(f"No data: {no_data}")
print(f"Failed: {failed}")
# Show statistics
cursor.execute("SELECT COUNT(*) FROM lots WHERE followers_count > 0")
with_followers = cursor.fetchone()[0]
cursor.execute("SELECT COUNT(*) FROM lots WHERE estimated_min_price IS NOT NULL")
with_estimates = cursor.fetchone()[0]
cursor.execute("SELECT COUNT(*) FROM lots WHERE lot_condition IS NOT NULL AND lot_condition != ''")
with_condition = cursor.fetchone()[0]
print(f"\nEnrichment statistics:")
print(f" Lots with followers_count: {with_followers} ({with_followers/len(lot_ids)*100:.1f}%)")
print(f" Lots with estimated prices: {with_estimates} ({with_estimates/len(lot_ids)*100:.1f}%)")
print(f" Lots with condition: {with_condition} ({with_condition/len(lot_ids)*100:.1f}%)")
conn.close()
if __name__ == "__main__":
print("WARNING: This will make ~16,800 API calls at 0.5s intervals (~2.3 hours)")
print("Press Ctrl+C to cancel, or wait 5 seconds to continue...")
import time
try:
time.sleep(5)
except KeyboardInterrupt:
print("\nCancelled")
sys.exit(0)
asyncio.run(enrich_existing_lots())