121 lines
3.7 KiB
Python
121 lines
3.7 KiB
Python
"""
|
|
Enrich existing lots with new intelligence fields:
|
|
- followers_count
|
|
- estimated_min_price / estimated_max_price
|
|
- lot_condition
|
|
- appearance
|
|
|
|
Reads from cached lot pages __NEXT_DATA__ JSON
|
|
"""
|
|
import sys
|
|
import os
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
|
|
|
|
import asyncio
|
|
from cache import CacheManager
|
|
import sqlite3
|
|
import zlib
|
|
import json
|
|
import re
|
|
from graphql_client import fetch_lot_bidding_data, format_bid_data
|
|
|
|
async def enrich_existing_lots():
|
|
"""Enrich existing lots with new fields from GraphQL API"""
|
|
cache = CacheManager()
|
|
conn = sqlite3.connect(cache.db_path)
|
|
cursor = conn.cursor()
|
|
|
|
# Get all lot IDs
|
|
cursor.execute("SELECT lot_id FROM lots")
|
|
lot_ids = [r[0] for r in cursor.fetchall()]
|
|
|
|
print(f"Found {len(lot_ids)} lots to enrich")
|
|
print("Fetching enrichment data from GraphQL API...")
|
|
print("This will take ~{:.1f} minutes (0.5s rate limit)".format(len(lot_ids) * 0.5 / 60))
|
|
|
|
enriched = 0
|
|
failed = 0
|
|
no_data = 0
|
|
|
|
for i, lot_id in enumerate(lot_ids):
|
|
if (i + 1) % 10 == 0:
|
|
print(f"Progress: {i+1}/{len(lot_ids)} ({enriched} enriched, {no_data} no data, {failed} failed)", end='\r')
|
|
|
|
try:
|
|
# Fetch from GraphQL API
|
|
bidding_data = await fetch_lot_bidding_data(lot_id)
|
|
|
|
if bidding_data:
|
|
formatted_data = format_bid_data(bidding_data)
|
|
|
|
# Update lot with new fields
|
|
cursor.execute("""
|
|
UPDATE lots
|
|
SET followers_count = ?,
|
|
estimated_min_price = ?,
|
|
estimated_max_price = ?,
|
|
lot_condition = ?,
|
|
appearance = ?
|
|
WHERE lot_id = ?
|
|
""", (
|
|
formatted_data.get('followers_count', 0),
|
|
formatted_data.get('estimated_min_price'),
|
|
formatted_data.get('estimated_max_price'),
|
|
formatted_data.get('lot_condition', ''),
|
|
formatted_data.get('appearance', ''),
|
|
lot_id
|
|
))
|
|
|
|
enriched += 1
|
|
|
|
# Commit every 50 lots
|
|
if enriched % 50 == 0:
|
|
conn.commit()
|
|
|
|
else:
|
|
no_data += 1
|
|
|
|
# Rate limit
|
|
await asyncio.sleep(0.5)
|
|
|
|
except Exception as e:
|
|
failed += 1
|
|
continue
|
|
|
|
conn.commit()
|
|
|
|
print(f"\n\nComplete!")
|
|
print(f"Total lots: {len(lot_ids)}")
|
|
print(f"Enriched: {enriched}")
|
|
print(f"No data: {no_data}")
|
|
print(f"Failed: {failed}")
|
|
|
|
# Show statistics
|
|
cursor.execute("SELECT COUNT(*) FROM lots WHERE followers_count > 0")
|
|
with_followers = cursor.fetchone()[0]
|
|
|
|
cursor.execute("SELECT COUNT(*) FROM lots WHERE estimated_min_price IS NOT NULL")
|
|
with_estimates = cursor.fetchone()[0]
|
|
|
|
cursor.execute("SELECT COUNT(*) FROM lots WHERE lot_condition IS NOT NULL AND lot_condition != ''")
|
|
with_condition = cursor.fetchone()[0]
|
|
|
|
print(f"\nEnrichment statistics:")
|
|
print(f" Lots with followers_count: {with_followers} ({with_followers/len(lot_ids)*100:.1f}%)")
|
|
print(f" Lots with estimated prices: {with_estimates} ({with_estimates/len(lot_ids)*100:.1f}%)")
|
|
print(f" Lots with condition: {with_condition} ({with_condition/len(lot_ids)*100:.1f}%)")
|
|
|
|
conn.close()
|
|
|
|
if __name__ == "__main__":
|
|
print("WARNING: This will make ~16,800 API calls at 0.5s intervals (~2.3 hours)")
|
|
print("Press Ctrl+C to cancel, or wait 5 seconds to continue...")
|
|
import time
|
|
try:
|
|
time.sleep(5)
|
|
except KeyboardInterrupt:
|
|
print("\nCancelled")
|
|
sys.exit(0)
|
|
|
|
asyncio.run(enrich_existing_lots())
|