enrich data
This commit is contained in:
173
src/cache.py
173
src/cache.py
@@ -19,8 +19,9 @@ class CacheManager:
|
||||
self._init_db()
|
||||
|
||||
def _init_db(self):
|
||||
"""Initialize cache and data storage database"""
|
||||
"""Initialize cache and data storage database with consolidated schema"""
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
# Cache table
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS cache (
|
||||
url TEXT PRIMARY KEY,
|
||||
@@ -32,6 +33,8 @@ class CacheManager:
|
||||
conn.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_timestamp ON cache(timestamp)
|
||||
""")
|
||||
|
||||
# Auctions table - consolidated schema
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS auctions (
|
||||
auction_id TEXT PRIMARY KEY,
|
||||
@@ -40,9 +43,18 @@ class CacheManager:
|
||||
location TEXT,
|
||||
lots_count INTEGER,
|
||||
first_lot_closing_time TEXT,
|
||||
scraped_at TEXT
|
||||
scraped_at TEXT,
|
||||
city TEXT,
|
||||
country TEXT,
|
||||
type TEXT,
|
||||
lot_count INTEGER DEFAULT 0,
|
||||
closing_time TEXT,
|
||||
discovered_at INTEGER
|
||||
)
|
||||
""")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_auctions_country ON auctions(country)")
|
||||
|
||||
# Lots table - consolidated schema with all fields from working database
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS lots (
|
||||
lot_id TEXT PRIMARY KEY,
|
||||
@@ -50,8 +62,6 @@ class CacheManager:
|
||||
url TEXT UNIQUE,
|
||||
title TEXT,
|
||||
current_bid TEXT,
|
||||
starting_bid TEXT,
|
||||
minimum_bid TEXT,
|
||||
bid_count INTEGER,
|
||||
closing_time TEXT,
|
||||
viewing_time TEXT,
|
||||
@@ -60,9 +70,54 @@ class CacheManager:
|
||||
description TEXT,
|
||||
category TEXT,
|
||||
scraped_at TEXT,
|
||||
sale_id INTEGER,
|
||||
manufacturer TEXT,
|
||||
type TEXT,
|
||||
year INTEGER,
|
||||
currency TEXT DEFAULT 'EUR',
|
||||
closing_notified INTEGER DEFAULT 0,
|
||||
starting_bid TEXT,
|
||||
minimum_bid TEXT,
|
||||
status TEXT,
|
||||
brand TEXT,
|
||||
model TEXT,
|
||||
attributes_json TEXT,
|
||||
first_bid_time TEXT,
|
||||
last_bid_time TEXT,
|
||||
bid_velocity REAL,
|
||||
bid_increment REAL,
|
||||
year_manufactured INTEGER,
|
||||
condition_score REAL,
|
||||
condition_description TEXT,
|
||||
serial_number TEXT,
|
||||
damage_description TEXT,
|
||||
followers_count INTEGER DEFAULT 0,
|
||||
estimated_min_price REAL,
|
||||
estimated_max_price REAL,
|
||||
lot_condition TEXT,
|
||||
appearance TEXT,
|
||||
estimated_min REAL,
|
||||
estimated_max REAL,
|
||||
next_bid_step_cents INTEGER,
|
||||
condition TEXT,
|
||||
category_path TEXT,
|
||||
city_location TEXT,
|
||||
country_code TEXT,
|
||||
bidding_status TEXT,
|
||||
packaging TEXT,
|
||||
quantity INTEGER,
|
||||
vat REAL,
|
||||
buyer_premium_percentage REAL,
|
||||
remarks TEXT,
|
||||
reserve_price REAL,
|
||||
reserve_met INTEGER,
|
||||
view_count INTEGER,
|
||||
FOREIGN KEY (auction_id) REFERENCES auctions(auction_id)
|
||||
)
|
||||
""")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_lots_sale_id ON lots(sale_id)")
|
||||
|
||||
# Images table
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS images (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
@@ -70,86 +125,28 @@ class CacheManager:
|
||||
url TEXT,
|
||||
local_path TEXT,
|
||||
downloaded INTEGER DEFAULT 0,
|
||||
labels TEXT,
|
||||
processed_at INTEGER,
|
||||
FOREIGN KEY (lot_id) REFERENCES lots(lot_id)
|
||||
)
|
||||
""")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_images_lot_id ON images(lot_id)")
|
||||
|
||||
# Add new columns to auctions table if they don't exist
|
||||
cursor = conn.execute("PRAGMA table_info(auctions)")
|
||||
auction_columns = {row[1] for row in cursor.fetchall()}
|
||||
# Remove duplicates before creating unique index
|
||||
conn.execute("""
|
||||
DELETE FROM images
|
||||
WHERE id NOT IN (
|
||||
SELECT MIN(id)
|
||||
FROM images
|
||||
GROUP BY lot_id, url
|
||||
)
|
||||
""")
|
||||
conn.execute("""
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_unique_lot_url
|
||||
ON images(lot_id, url)
|
||||
""")
|
||||
|
||||
if 'city' not in auction_columns:
|
||||
conn.execute("ALTER TABLE auctions ADD COLUMN city TEXT")
|
||||
if 'country' not in auction_columns:
|
||||
conn.execute("ALTER TABLE auctions ADD COLUMN country TEXT")
|
||||
if 'type' not in auction_columns:
|
||||
conn.execute("ALTER TABLE auctions ADD COLUMN type TEXT")
|
||||
if 'lot_count' not in auction_columns:
|
||||
conn.execute("ALTER TABLE auctions ADD COLUMN lot_count INTEGER DEFAULT 0")
|
||||
if 'closing_time' not in auction_columns:
|
||||
conn.execute("ALTER TABLE auctions ADD COLUMN closing_time TEXT")
|
||||
if 'discovered_at' not in auction_columns:
|
||||
conn.execute("ALTER TABLE auctions ADD COLUMN discovered_at INTEGER")
|
||||
|
||||
# Add index for country filtering
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_auctions_country ON auctions(country)")
|
||||
|
||||
# Add new columns to lots table if they don't exist
|
||||
cursor = conn.execute("PRAGMA table_info(lots)")
|
||||
columns = {row[1] for row in cursor.fetchall()}
|
||||
|
||||
if 'starting_bid' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN starting_bid TEXT")
|
||||
if 'minimum_bid' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN minimum_bid TEXT")
|
||||
if 'status' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN status TEXT")
|
||||
if 'brand' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN brand TEXT")
|
||||
if 'model' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN model TEXT")
|
||||
if 'attributes_json' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN attributes_json TEXT")
|
||||
|
||||
# Bidding intelligence fields
|
||||
if 'first_bid_time' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN first_bid_time TEXT")
|
||||
if 'last_bid_time' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN last_bid_time TEXT")
|
||||
if 'bid_velocity' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN bid_velocity REAL")
|
||||
if 'bid_increment' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN bid_increment REAL")
|
||||
|
||||
# Valuation intelligence fields
|
||||
if 'year_manufactured' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN year_manufactured INTEGER")
|
||||
if 'condition_score' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN condition_score REAL")
|
||||
if 'condition_description' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN condition_description TEXT")
|
||||
if 'serial_number' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN serial_number TEXT")
|
||||
if 'manufacturer' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN manufacturer TEXT")
|
||||
if 'damage_description' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN damage_description TEXT")
|
||||
|
||||
# NEW: High-value API fields
|
||||
if 'followers_count' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN followers_count INTEGER DEFAULT 0")
|
||||
if 'estimated_min_price' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN estimated_min_price REAL")
|
||||
if 'estimated_max_price' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN estimated_max_price REAL")
|
||||
if 'lot_condition' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN lot_condition TEXT")
|
||||
if 'appearance' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN appearance TEXT")
|
||||
if 'scraped_at_timestamp' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN scraped_at_timestamp INTEGER")
|
||||
|
||||
# Create bid_history table
|
||||
# Bid history table
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS bid_history (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
@@ -163,33 +160,15 @@ class CacheManager:
|
||||
FOREIGN KEY (lot_id) REFERENCES lots(lot_id)
|
||||
)
|
||||
""")
|
||||
|
||||
conn.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_bid_history_lot_time
|
||||
ON bid_history(lot_id, bid_time)
|
||||
""")
|
||||
|
||||
conn.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_bid_history_bidder
|
||||
ON bid_history(bidder_id)
|
||||
""")
|
||||
|
||||
# Remove duplicates before creating unique index
|
||||
# Keep the row with the smallest id (first occurrence) for each (lot_id, url) pair
|
||||
conn.execute("""
|
||||
DELETE FROM images
|
||||
WHERE id NOT IN (
|
||||
SELECT MIN(id)
|
||||
FROM images
|
||||
GROUP BY lot_id, url
|
||||
)
|
||||
""")
|
||||
|
||||
# Now create the unique index
|
||||
conn.execute("""
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_unique_lot_url
|
||||
ON images(lot_id, url)
|
||||
""")
|
||||
conn.commit()
|
||||
|
||||
def get(self, url: str, max_age_hours: int = 24) -> Optional[Dict]:
|
||||
|
||||
Reference in New Issue
Block a user