enrichment

This commit is contained in:
Tour
2025-12-07 02:20:14 +01:00
parent 08bf112c3f
commit 765361d582
9 changed files with 1096 additions and 5 deletions

View File

@@ -115,6 +115,18 @@ class CacheManager:
if 'damage_description' not in columns:
conn.execute("ALTER TABLE lots ADD COLUMN damage_description TEXT")
# NEW: High-value API fields
if 'followers_count' not in columns:
conn.execute("ALTER TABLE lots ADD COLUMN followers_count INTEGER DEFAULT 0")
if 'estimated_min_price' not in columns:
conn.execute("ALTER TABLE lots ADD COLUMN estimated_min_price REAL")
if 'estimated_max_price' not in columns:
conn.execute("ALTER TABLE lots ADD COLUMN estimated_max_price REAL")
if 'lot_condition' not in columns:
conn.execute("ALTER TABLE lots ADD COLUMN lot_condition TEXT")
if 'appearance' not in columns:
conn.execute("ALTER TABLE lots ADD COLUMN appearance TEXT")
# Create bid_history table
conn.execute("""
CREATE TABLE IF NOT EXISTS bid_history (
@@ -239,8 +251,9 @@ class CacheManager:
first_bid_time, last_bid_time, bid_velocity, bid_increment,
year_manufactured, condition_score, condition_description,
serial_number, manufacturer, damage_description,
followers_count, estimated_min_price, estimated_max_price, lot_condition, appearance,
scraped_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
lot_data['lot_id'],
lot_data.get('auction_id', ''),
@@ -270,6 +283,11 @@ class CacheManager:
lot_data.get('serial_number', ''),
lot_data.get('manufacturer', ''),
lot_data.get('damage_description', ''),
lot_data.get('followers_count', 0),
lot_data.get('estimated_min_price'),
lot_data.get('estimated_max_price'),
lot_data.get('lot_condition', ''),
lot_data.get('appearance', ''),
lot_data['scraped_at']
))
conn.commit()

View File

@@ -32,6 +32,14 @@ LOT_BIDDING_QUERY = """
query LotBiddingData($lotDisplayId: String!, $locale: String!, $platform: Platform!) {
lotDetails(displayId: $lotDisplayId, locale: $locale, platform: $platform) {
estimatedFullPrice {
min {
cents
currency
}
max {
cents
currency
}
saleTerm
}
lot {
@@ -55,6 +63,9 @@ query LotBiddingData($lotDisplayId: String!, $locale: String!, $platform: Platfo
markupPercentage
biddingStatus
bidsCount
followersCount
condition
appearance
startDate
endDate
assignedExplicitly
@@ -193,6 +204,23 @@ def format_bid_data(lot_details: Dict) -> Dict:
}
status = status_map.get(minimum_bid_met, '')
# Extract estimated prices
estimated_full_price = lot_details.get('estimatedFullPrice', {})
estimated_min_obj = estimated_full_price.get('min')
estimated_max_obj = estimated_full_price.get('max')
estimated_min = None
estimated_max = None
if estimated_min_obj and isinstance(estimated_min_obj, dict):
cents = estimated_min_obj.get('cents')
if cents is not None:
estimated_min = cents / 100.0
if estimated_max_obj and isinstance(estimated_max_obj, dict):
cents = estimated_max_obj.get('cents')
if cents is not None:
estimated_max = cents / 100.0
return {
'current_bid': current_bid,
'starting_bid': starting_bid,
@@ -203,6 +231,12 @@ def format_bid_data(lot_details: Dict) -> Dict:
'vat_percentage': lot.get('vat', 0),
'status': status,
'auction_id': lot.get('auctionId', ''),
# NEW: High-value intelligence fields
'followers_count': lot.get('followersCount', 0),
'estimated_min_price': estimated_min,
'estimated_max_price': estimated_max,
'lot_condition': lot.get('condition', ''),
'appearance': lot.get('appearance', ''),
}

View File

@@ -109,7 +109,8 @@ class DataParser:
page_props = data.get('props', {}).get('pageProps', {})
if 'lot' in page_props:
return self._parse_lot_json(page_props.get('lot', {}), url)
# Pass both lot and auction data (auction is included in lot pages)
return self._parse_lot_json(page_props.get('lot', {}), url, page_props.get('auction'))
if 'auction' in page_props:
return self._parse_auction_json(page_props.get('auction', {}), url)
return None
@@ -118,8 +119,14 @@ class DataParser:
print(f" → Error parsing __NEXT_DATA__: {e}")
return None
def _parse_lot_json(self, lot_data: Dict, url: str) -> Dict:
"""Parse lot data from JSON"""
def _parse_lot_json(self, lot_data: Dict, url: str, auction_data: Optional[Dict] = None) -> Dict:
"""Parse lot data from JSON
Args:
lot_data: Lot object from __NEXT_DATA__
url: Page URL
auction_data: Optional auction object (included in lot pages)
"""
location_data = lot_data.get('location', {})
city = location_data.get('city', '')
country = location_data.get('countryCode', '').upper()
@@ -145,10 +152,16 @@ class DataParser:
category = lot_data.get('category', {})
category_name = category.get('name', '') if isinstance(category, dict) else ''
# Get auction displayId from auction data if available (lot pages include auction)
# Otherwise fall back to the UUID auctionId
auction_id = lot_data.get('auctionId', '')
if auction_data and auction_data.get('displayId'):
auction_id = auction_data.get('displayId')
return {
'type': 'lot',
'lot_id': lot_data.get('displayId', ''),
'auction_id': lot_data.get('auctionId', ''),
'auction_id': auction_id,
'url': url,
'title': lot_data.get('title', ''),
'current_bid': current_bid_str,