enrichment
This commit is contained in:
20
src/cache.py
20
src/cache.py
@@ -115,6 +115,18 @@ class CacheManager:
|
||||
if 'damage_description' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN damage_description TEXT")
|
||||
|
||||
# NEW: High-value API fields
|
||||
if 'followers_count' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN followers_count INTEGER DEFAULT 0")
|
||||
if 'estimated_min_price' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN estimated_min_price REAL")
|
||||
if 'estimated_max_price' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN estimated_max_price REAL")
|
||||
if 'lot_condition' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN lot_condition TEXT")
|
||||
if 'appearance' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN appearance TEXT")
|
||||
|
||||
# Create bid_history table
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS bid_history (
|
||||
@@ -239,8 +251,9 @@ class CacheManager:
|
||||
first_bid_time, last_bid_time, bid_velocity, bid_increment,
|
||||
year_manufactured, condition_score, condition_description,
|
||||
serial_number, manufacturer, damage_description,
|
||||
followers_count, estimated_min_price, estimated_max_price, lot_condition, appearance,
|
||||
scraped_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
lot_data['lot_id'],
|
||||
lot_data.get('auction_id', ''),
|
||||
@@ -270,6 +283,11 @@ class CacheManager:
|
||||
lot_data.get('serial_number', ''),
|
||||
lot_data.get('manufacturer', ''),
|
||||
lot_data.get('damage_description', ''),
|
||||
lot_data.get('followers_count', 0),
|
||||
lot_data.get('estimated_min_price'),
|
||||
lot_data.get('estimated_max_price'),
|
||||
lot_data.get('lot_condition', ''),
|
||||
lot_data.get('appearance', ''),
|
||||
lot_data['scraped_at']
|
||||
))
|
||||
conn.commit()
|
||||
|
||||
@@ -32,6 +32,14 @@ LOT_BIDDING_QUERY = """
|
||||
query LotBiddingData($lotDisplayId: String!, $locale: String!, $platform: Platform!) {
|
||||
lotDetails(displayId: $lotDisplayId, locale: $locale, platform: $platform) {
|
||||
estimatedFullPrice {
|
||||
min {
|
||||
cents
|
||||
currency
|
||||
}
|
||||
max {
|
||||
cents
|
||||
currency
|
||||
}
|
||||
saleTerm
|
||||
}
|
||||
lot {
|
||||
@@ -55,6 +63,9 @@ query LotBiddingData($lotDisplayId: String!, $locale: String!, $platform: Platfo
|
||||
markupPercentage
|
||||
biddingStatus
|
||||
bidsCount
|
||||
followersCount
|
||||
condition
|
||||
appearance
|
||||
startDate
|
||||
endDate
|
||||
assignedExplicitly
|
||||
@@ -193,6 +204,23 @@ def format_bid_data(lot_details: Dict) -> Dict:
|
||||
}
|
||||
status = status_map.get(minimum_bid_met, '')
|
||||
|
||||
# Extract estimated prices
|
||||
estimated_full_price = lot_details.get('estimatedFullPrice', {})
|
||||
estimated_min_obj = estimated_full_price.get('min')
|
||||
estimated_max_obj = estimated_full_price.get('max')
|
||||
|
||||
estimated_min = None
|
||||
estimated_max = None
|
||||
if estimated_min_obj and isinstance(estimated_min_obj, dict):
|
||||
cents = estimated_min_obj.get('cents')
|
||||
if cents is not None:
|
||||
estimated_min = cents / 100.0
|
||||
|
||||
if estimated_max_obj and isinstance(estimated_max_obj, dict):
|
||||
cents = estimated_max_obj.get('cents')
|
||||
if cents is not None:
|
||||
estimated_max = cents / 100.0
|
||||
|
||||
return {
|
||||
'current_bid': current_bid,
|
||||
'starting_bid': starting_bid,
|
||||
@@ -203,6 +231,12 @@ def format_bid_data(lot_details: Dict) -> Dict:
|
||||
'vat_percentage': lot.get('vat', 0),
|
||||
'status': status,
|
||||
'auction_id': lot.get('auctionId', ''),
|
||||
# NEW: High-value intelligence fields
|
||||
'followers_count': lot.get('followersCount', 0),
|
||||
'estimated_min_price': estimated_min,
|
||||
'estimated_max_price': estimated_max,
|
||||
'lot_condition': lot.get('condition', ''),
|
||||
'appearance': lot.get('appearance', ''),
|
||||
}
|
||||
|
||||
|
||||
|
||||
21
src/parse.py
21
src/parse.py
@@ -109,7 +109,8 @@ class DataParser:
|
||||
page_props = data.get('props', {}).get('pageProps', {})
|
||||
|
||||
if 'lot' in page_props:
|
||||
return self._parse_lot_json(page_props.get('lot', {}), url)
|
||||
# Pass both lot and auction data (auction is included in lot pages)
|
||||
return self._parse_lot_json(page_props.get('lot', {}), url, page_props.get('auction'))
|
||||
if 'auction' in page_props:
|
||||
return self._parse_auction_json(page_props.get('auction', {}), url)
|
||||
return None
|
||||
@@ -118,8 +119,14 @@ class DataParser:
|
||||
print(f" → Error parsing __NEXT_DATA__: {e}")
|
||||
return None
|
||||
|
||||
def _parse_lot_json(self, lot_data: Dict, url: str) -> Dict:
|
||||
"""Parse lot data from JSON"""
|
||||
def _parse_lot_json(self, lot_data: Dict, url: str, auction_data: Optional[Dict] = None) -> Dict:
|
||||
"""Parse lot data from JSON
|
||||
|
||||
Args:
|
||||
lot_data: Lot object from __NEXT_DATA__
|
||||
url: Page URL
|
||||
auction_data: Optional auction object (included in lot pages)
|
||||
"""
|
||||
location_data = lot_data.get('location', {})
|
||||
city = location_data.get('city', '')
|
||||
country = location_data.get('countryCode', '').upper()
|
||||
@@ -145,10 +152,16 @@ class DataParser:
|
||||
category = lot_data.get('category', {})
|
||||
category_name = category.get('name', '') if isinstance(category, dict) else ''
|
||||
|
||||
# Get auction displayId from auction data if available (lot pages include auction)
|
||||
# Otherwise fall back to the UUID auctionId
|
||||
auction_id = lot_data.get('auctionId', '')
|
||||
if auction_data and auction_data.get('displayId'):
|
||||
auction_id = auction_data.get('displayId')
|
||||
|
||||
return {
|
||||
'type': 'lot',
|
||||
'lot_id': lot_data.get('displayId', ''),
|
||||
'auction_id': lot_data.get('auctionId', ''),
|
||||
'auction_id': auction_id,
|
||||
'url': url,
|
||||
'title': lot_data.get('title', ''),
|
||||
'current_bid': current_bid_str,
|
||||
|
||||
Reference in New Issue
Block a user