enrich data
This commit is contained in:
121
src/bid_history_client.py
Normal file
121
src/bid_history_client.py
Normal file
@@ -0,0 +1,121 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Client for fetching bid history from Troostwijk REST API
|
||||
"""
|
||||
import aiohttp
|
||||
from typing import Dict, List, Optional
|
||||
from datetime import datetime
|
||||
|
||||
BID_HISTORY_ENDPOINT = "https://shared-api.tbauctions.com/bidmanagement/lots/{lot_uuid}/bidding-history"
|
||||
|
||||
|
||||
async def fetch_bid_history(lot_uuid: str, page_size: int = 100) -> Optional[List[Dict]]:
|
||||
"""
|
||||
Fetch complete bid history for a lot
|
||||
|
||||
Args:
|
||||
lot_uuid: The lot UUID (from GraphQL response)
|
||||
page_size: Number of bids per page
|
||||
|
||||
Returns:
|
||||
List of bid dictionaries or None if request fails
|
||||
"""
|
||||
all_bids = []
|
||||
page_number = 1
|
||||
has_more = True
|
||||
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
while has_more:
|
||||
url = BID_HISTORY_ENDPOINT.format(lot_uuid=lot_uuid)
|
||||
params = {"pageNumber": page_number, "pageSize": page_size}
|
||||
|
||||
async with session.get(url, params=params, timeout=30) as response:
|
||||
if response.status == 200:
|
||||
data = await response.json()
|
||||
|
||||
results = data.get('results', [])
|
||||
all_bids.extend(results)
|
||||
|
||||
has_more = data.get('hasNext', False)
|
||||
page_number += 1
|
||||
|
||||
if not has_more:
|
||||
break
|
||||
else:
|
||||
return None if page_number == 1 else all_bids
|
||||
|
||||
return all_bids if all_bids else None
|
||||
|
||||
except Exception as e:
|
||||
print(f" Bid history fetch failed: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def parse_bid_history(bid_history: List[Dict], lot_id: str) -> Dict:
|
||||
"""
|
||||
Parse bid history into database-ready format
|
||||
|
||||
Args:
|
||||
bid_history: Raw bid history from API
|
||||
lot_id: The lot display ID (e.g., "A1-28505-5")
|
||||
|
||||
Returns:
|
||||
Dict with bid_records and calculated metrics
|
||||
"""
|
||||
if not bid_history:
|
||||
return {
|
||||
'bid_records': [],
|
||||
'first_bid_time': None,
|
||||
'last_bid_time': None,
|
||||
'bid_velocity': 0.0
|
||||
}
|
||||
|
||||
bid_records = []
|
||||
|
||||
for bid in bid_history:
|
||||
bid_amount_cents = bid.get('currentBid', {}).get('cents', 0)
|
||||
bid_amount = bid_amount_cents / 100.0 if bid_amount_cents else 0.0
|
||||
|
||||
bid_time_str = bid.get('createdAt', '')
|
||||
|
||||
bid_records.append({
|
||||
'lot_id': lot_id,
|
||||
'bid_amount': bid_amount,
|
||||
'bid_time': bid_time_str,
|
||||
'is_autobid': bid.get('autoBid', False),
|
||||
'bidder_id': bid.get('buyerId', ''),
|
||||
'bidder_number': bid.get('buyerNumber', 0)
|
||||
})
|
||||
|
||||
# Calculate metrics
|
||||
bid_times = []
|
||||
for record in bid_records:
|
||||
try:
|
||||
# Parse ISO timestamp: "2025-12-04T17:17:45.694698Z"
|
||||
dt = datetime.fromisoformat(record['bid_time'].replace('Z', '+00:00'))
|
||||
bid_times.append(dt)
|
||||
except:
|
||||
pass
|
||||
|
||||
first_bid_time = None
|
||||
last_bid_time = None
|
||||
bid_velocity = 0.0
|
||||
|
||||
if bid_times:
|
||||
bid_times.sort()
|
||||
first_bid_time = bid_times[0].strftime('%Y-%m-%d %H:%M:%S')
|
||||
last_bid_time = bid_times[-1].strftime('%Y-%m-%d %H:%M:%S')
|
||||
|
||||
# Calculate velocity (bids per hour)
|
||||
if len(bid_times) > 1:
|
||||
time_span = (bid_times[-1] - bid_times[0]).total_seconds() / 3600 # hours
|
||||
if time_span > 0:
|
||||
bid_velocity = len(bid_times) / time_span
|
||||
|
||||
return {
|
||||
'bid_records': bid_records,
|
||||
'first_bid_time': first_bid_time,
|
||||
'last_bid_time': last_bid_time,
|
||||
'bid_velocity': round(bid_velocity, 2)
|
||||
}
|
||||
104
src/cache.py
104
src/cache.py
@@ -82,6 +82,63 @@ class CacheManager:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN starting_bid TEXT")
|
||||
if 'minimum_bid' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN minimum_bid TEXT")
|
||||
if 'status' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN status TEXT")
|
||||
if 'brand' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN brand TEXT")
|
||||
if 'model' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN model TEXT")
|
||||
if 'attributes_json' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN attributes_json TEXT")
|
||||
|
||||
# Bidding intelligence fields
|
||||
if 'first_bid_time' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN first_bid_time TEXT")
|
||||
if 'last_bid_time' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN last_bid_time TEXT")
|
||||
if 'bid_velocity' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN bid_velocity REAL")
|
||||
if 'bid_increment' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN bid_increment REAL")
|
||||
|
||||
# Valuation intelligence fields
|
||||
if 'year_manufactured' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN year_manufactured INTEGER")
|
||||
if 'condition_score' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN condition_score REAL")
|
||||
if 'condition_description' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN condition_description TEXT")
|
||||
if 'serial_number' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN serial_number TEXT")
|
||||
if 'manufacturer' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN manufacturer TEXT")
|
||||
if 'damage_description' not in columns:
|
||||
conn.execute("ALTER TABLE lots ADD COLUMN damage_description TEXT")
|
||||
|
||||
# Create bid_history table
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS bid_history (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
lot_id TEXT NOT NULL,
|
||||
bid_amount REAL NOT NULL,
|
||||
bid_time TEXT NOT NULL,
|
||||
is_autobid INTEGER DEFAULT 0,
|
||||
bidder_id TEXT,
|
||||
bidder_number INTEGER,
|
||||
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||
FOREIGN KEY (lot_id) REFERENCES lots(lot_id)
|
||||
)
|
||||
""")
|
||||
|
||||
conn.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_bid_history_lot_time
|
||||
ON bid_history(lot_id, bid_time)
|
||||
""")
|
||||
|
||||
conn.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_bid_history_bidder
|
||||
ON bid_history(bidder_id)
|
||||
""")
|
||||
|
||||
# Remove duplicates before creating unique index
|
||||
# Keep the row with the smallest id (first occurrence) for each (lot_id, url) pair
|
||||
@@ -178,8 +235,12 @@ class CacheManager:
|
||||
INSERT OR REPLACE INTO lots
|
||||
(lot_id, auction_id, url, title, current_bid, starting_bid, minimum_bid,
|
||||
bid_count, closing_time, viewing_time, pickup_date, location, description,
|
||||
category, scraped_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
category, status, brand, model, attributes_json,
|
||||
first_bid_time, last_bid_time, bid_velocity, bid_increment,
|
||||
year_manufactured, condition_score, condition_description,
|
||||
serial_number, manufacturer, damage_description,
|
||||
scraped_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
lot_data['lot_id'],
|
||||
lot_data.get('auction_id', ''),
|
||||
@@ -195,10 +256,49 @@ class CacheManager:
|
||||
lot_data.get('location', ''),
|
||||
lot_data.get('description', ''),
|
||||
lot_data.get('category', ''),
|
||||
lot_data.get('status', ''),
|
||||
lot_data.get('brand', ''),
|
||||
lot_data.get('model', ''),
|
||||
lot_data.get('attributes_json', ''),
|
||||
lot_data.get('first_bid_time'),
|
||||
lot_data.get('last_bid_time'),
|
||||
lot_data.get('bid_velocity'),
|
||||
lot_data.get('bid_increment'),
|
||||
lot_data.get('year_manufactured'),
|
||||
lot_data.get('condition_score'),
|
||||
lot_data.get('condition_description', ''),
|
||||
lot_data.get('serial_number', ''),
|
||||
lot_data.get('manufacturer', ''),
|
||||
lot_data.get('damage_description', ''),
|
||||
lot_data['scraped_at']
|
||||
))
|
||||
conn.commit()
|
||||
|
||||
def save_bid_history(self, lot_id: str, bid_records: List[Dict]):
|
||||
"""Save bid history records to database"""
|
||||
if not bid_records:
|
||||
return
|
||||
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
# Clear existing bid history for this lot
|
||||
conn.execute("DELETE FROM bid_history WHERE lot_id = ?", (lot_id,))
|
||||
|
||||
# Insert new records
|
||||
for record in bid_records:
|
||||
conn.execute("""
|
||||
INSERT INTO bid_history
|
||||
(lot_id, bid_amount, bid_time, is_autobid, bidder_id, bidder_number)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
record['lot_id'],
|
||||
record['bid_amount'],
|
||||
record['bid_time'],
|
||||
1 if record['is_autobid'] else 0,
|
||||
record['bidder_id'],
|
||||
record['bidder_number']
|
||||
))
|
||||
conn.commit()
|
||||
|
||||
def save_images(self, lot_id: str, image_urls: List[str]):
|
||||
"""Save image URLs for a lot (prevents duplicates via unique constraint)"""
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
|
||||
@@ -7,6 +7,27 @@ from typing import Dict, Optional
|
||||
|
||||
GRAPHQL_ENDPOINT = "https://storefront.tbauctions.com/storefront/graphql"
|
||||
|
||||
AUCTION_QUERY = """
|
||||
query AuctionData($auctionId: TbaUuid!, $locale: String!, $platform: Platform!) {
|
||||
auction(id: $auctionId, locale: $locale, platform: $platform) {
|
||||
id
|
||||
displayId
|
||||
viewingDays {
|
||||
startDate
|
||||
endDate
|
||||
city
|
||||
countryCode
|
||||
}
|
||||
collectionDays {
|
||||
startDate
|
||||
endDate
|
||||
city
|
||||
countryCode
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
LOT_BIDDING_QUERY = """
|
||||
query LotBiddingData($lotDisplayId: String!, $locale: String!, $platform: Platform!) {
|
||||
lotDetails(displayId: $lotDisplayId, locale: $locale, platform: $platform) {
|
||||
@@ -44,6 +65,42 @@ query LotBiddingData($lotDisplayId: String!, $locale: String!, $platform: Platfo
|
||||
"""
|
||||
|
||||
|
||||
async def fetch_auction_data(auction_id: str) -> Optional[Dict]:
|
||||
"""
|
||||
Fetch auction data (viewing/pickup times) from GraphQL API
|
||||
|
||||
Args:
|
||||
auction_id: The auction UUID
|
||||
|
||||
Returns:
|
||||
Dict with auction data or None if request fails
|
||||
"""
|
||||
variables = {
|
||||
"auctionId": auction_id,
|
||||
"locale": "nl",
|
||||
"platform": "TWK"
|
||||
}
|
||||
|
||||
payload = {
|
||||
"query": AUCTION_QUERY,
|
||||
"variables": variables
|
||||
}
|
||||
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(GRAPHQL_ENDPOINT, json=payload, timeout=30) as response:
|
||||
if response.status == 200:
|
||||
data = await response.json()
|
||||
auction = data.get('data', {}).get('auction', {})
|
||||
if auction:
|
||||
return auction
|
||||
return None
|
||||
else:
|
||||
return None
|
||||
except Exception as e:
|
||||
return None
|
||||
|
||||
|
||||
async def fetch_lot_bidding_data(lot_display_id: str) -> Optional[Dict]:
|
||||
"""
|
||||
Fetch lot bidding data from GraphQL API
|
||||
@@ -127,6 +184,15 @@ def format_bid_data(lot_details: Dict) -> Dict:
|
||||
return ''
|
||||
return ''
|
||||
|
||||
# Format status from minimumBidAmountMet
|
||||
minimum_bid_met = lot.get('minimumBidAmountMet', '')
|
||||
status_map = {
|
||||
'NO_MINIMUM_BID_AMOUNT': 'Geen Minimumprijs',
|
||||
'MINIMUM_BID_AMOUNT_NOT_MET': 'Minimumprijs nog niet gehaald',
|
||||
'MINIMUM_BID_AMOUNT_MET': 'Minimumprijs gehaald'
|
||||
}
|
||||
status = status_map.get(minimum_bid_met, '')
|
||||
|
||||
return {
|
||||
'current_bid': current_bid,
|
||||
'starting_bid': starting_bid,
|
||||
@@ -135,4 +201,209 @@ def format_bid_data(lot_details: Dict) -> Dict:
|
||||
'closing_time': format_timestamp(end_date),
|
||||
'bidding_status': lot.get('biddingStatus', ''),
|
||||
'vat_percentage': lot.get('vat', 0),
|
||||
'status': status,
|
||||
'auction_id': lot.get('auctionId', ''),
|
||||
}
|
||||
|
||||
|
||||
def format_auction_data(auction: Dict) -> Dict:
|
||||
"""
|
||||
Extract viewing/pickup times from auction data
|
||||
|
||||
Args:
|
||||
auction: Auction data from GraphQL
|
||||
|
||||
Returns:
|
||||
Dict with viewing_time and pickup_date
|
||||
"""
|
||||
from datetime import datetime
|
||||
|
||||
def format_days(days_list):
|
||||
if not days_list or not isinstance(days_list, list) or len(days_list) == 0:
|
||||
return ''
|
||||
|
||||
first_day = days_list[0]
|
||||
start_ts = first_day.get('startDate')
|
||||
end_ts = first_day.get('endDate')
|
||||
city = first_day.get('city', '')
|
||||
country = first_day.get('countryCode', '').upper()
|
||||
|
||||
if not start_ts or not end_ts:
|
||||
return ''
|
||||
|
||||
try:
|
||||
start_dt = datetime.fromtimestamp(start_ts)
|
||||
end_dt = datetime.fromtimestamp(end_ts)
|
||||
|
||||
# Format: "vr 05 dec 2025 van 09:00 tot 12:00"
|
||||
days_nl = ['ma', 'di', 'wo', 'do', 'vr', 'za', 'zo']
|
||||
months_nl = ['jan', 'feb', 'mrt', 'apr', 'mei', 'jun',
|
||||
'jul', 'aug', 'sep', 'okt', 'nov', 'dec']
|
||||
|
||||
day_name = days_nl[start_dt.weekday()]
|
||||
month_name = months_nl[start_dt.month - 1]
|
||||
|
||||
time_str = f"{day_name} {start_dt.day:02d} {month_name} {start_dt.year} van {start_dt.strftime('%H:%M')} tot {end_dt.strftime('%H:%M')}"
|
||||
|
||||
if city:
|
||||
location = f"{city}, {country}" if country else city
|
||||
return f"{time_str}\n{location}"
|
||||
|
||||
return time_str
|
||||
except:
|
||||
return ''
|
||||
|
||||
viewing_time = format_days(auction.get('viewingDays', []))
|
||||
pickup_date = format_days(auction.get('collectionDays', []))
|
||||
|
||||
return {
|
||||
'viewing_time': viewing_time,
|
||||
'pickup_date': pickup_date
|
||||
}
|
||||
|
||||
|
||||
def extract_attributes_from_lot_json(lot_json: Dict) -> Dict:
|
||||
"""
|
||||
Extract brand, model, and other attributes from lot JSON
|
||||
|
||||
Args:
|
||||
lot_json: The lot object from __NEXT_DATA__
|
||||
|
||||
Returns:
|
||||
Dict with brand, model, and attributes
|
||||
"""
|
||||
attributes = lot_json.get('attributes', [])
|
||||
if not isinstance(attributes, list):
|
||||
return {'brand': '', 'model': '', 'attributes_json': ''}
|
||||
|
||||
brand = ''
|
||||
model = ''
|
||||
|
||||
# Look for brand and model in attributes
|
||||
for attr in attributes:
|
||||
if not isinstance(attr, dict):
|
||||
continue
|
||||
|
||||
name = attr.get('name', '').lower()
|
||||
value = attr.get('value', '')
|
||||
|
||||
if name in ['brand', 'merk', 'fabrikant', 'manufacturer']:
|
||||
brand = value
|
||||
elif name in ['model', 'type']:
|
||||
model = value
|
||||
|
||||
import json
|
||||
return {
|
||||
'brand': brand,
|
||||
'model': model,
|
||||
'attributes_json': json.dumps(attributes) if attributes else ''
|
||||
}
|
||||
|
||||
|
||||
def extract_enriched_attributes(lot_json: Dict, page_data: Dict) -> Dict:
|
||||
"""
|
||||
Extract enriched valuation attributes from lot data
|
||||
|
||||
Args:
|
||||
lot_json: The lot object from __NEXT_DATA__
|
||||
page_data: Already parsed page data (title, description)
|
||||
|
||||
Returns:
|
||||
Dict with enriched attributes
|
||||
"""
|
||||
import re
|
||||
|
||||
attributes = lot_json.get('attributes', [])
|
||||
title = page_data.get('title', '')
|
||||
description = page_data.get('description', '')
|
||||
|
||||
# Initialize
|
||||
year_manufactured = None
|
||||
condition_description = ''
|
||||
condition_score = None
|
||||
serial_number = ''
|
||||
manufacturer = ''
|
||||
damage_description = ''
|
||||
|
||||
# Extract from attributes array
|
||||
for attr in attributes:
|
||||
if not isinstance(attr, dict):
|
||||
continue
|
||||
|
||||
name = attr.get('name', '').lower()
|
||||
value = str(attr.get('value', ''))
|
||||
|
||||
if name in ['jaar', 'year', 'bouwjaar', 'productiejaar']:
|
||||
try:
|
||||
year_manufactured = int(re.search(r'\d{4}', value).group())
|
||||
except:
|
||||
pass
|
||||
|
||||
elif name in ['conditie', 'condition', 'staat']:
|
||||
condition_description = value
|
||||
# Map condition to score (0-10)
|
||||
condition_map = {
|
||||
'nieuw': 10.0, 'new': 10.0,
|
||||
'als nieuw': 9.5, 'like new': 9.5,
|
||||
'uitstekend': 9.0, 'excellent': 9.0,
|
||||
'zeer goed': 8.0, 'very good': 8.0,
|
||||
'goed': 7.0, 'good': 7.0,
|
||||
'redelijk': 6.0, 'fair': 6.0,
|
||||
'matig': 5.0, 'moderate': 5.0,
|
||||
'slecht': 3.0, 'poor': 3.0,
|
||||
'defect': 1.0, 'defective': 1.0
|
||||
}
|
||||
for key, score in condition_map.items():
|
||||
if key in value.lower():
|
||||
condition_score = score
|
||||
break
|
||||
|
||||
elif name in ['serienummer', 'serial', 'serial number', 'artikelnummer']:
|
||||
serial_number = value
|
||||
|
||||
elif name in ['fabrikant', 'manufacturer', 'merk', 'brand']:
|
||||
manufacturer = value
|
||||
|
||||
# Extract 4-digit year from title if not found
|
||||
if not year_manufactured:
|
||||
year_match = re.search(r'\b(19|20)\d{2}\b', title)
|
||||
if year_match:
|
||||
try:
|
||||
year_manufactured = int(year_match.group())
|
||||
except:
|
||||
pass
|
||||
|
||||
# Extract damage mentions from description
|
||||
damage_keywords = ['schade', 'damage', 'beschadigd', 'damaged', 'defect', 'broken', 'kapot']
|
||||
if description:
|
||||
for keyword in damage_keywords:
|
||||
if keyword in description.lower():
|
||||
# Extract sentence containing damage keyword
|
||||
sentences = description.split('.')
|
||||
for sentence in sentences:
|
||||
if keyword in sentence.lower():
|
||||
damage_description = sentence.strip()
|
||||
break
|
||||
break
|
||||
|
||||
# Extract condition from __NEXT_DATA__ fields
|
||||
if not condition_description:
|
||||
lot_condition = lot_json.get('condition', '')
|
||||
if lot_condition and lot_condition != 'NOT_CHECKED':
|
||||
condition_description = lot_condition
|
||||
|
||||
lot_appearance = lot_json.get('appearance', '')
|
||||
if lot_appearance and lot_appearance != 'NOT_CHECKED':
|
||||
if condition_description:
|
||||
condition_description += f", {lot_appearance}"
|
||||
else:
|
||||
condition_description = lot_appearance
|
||||
|
||||
return {
|
||||
'year_manufactured': year_manufactured,
|
||||
'condition_description': condition_description,
|
||||
'condition_score': condition_score,
|
||||
'serial_number': serial_number,
|
||||
'manufacturer': manufacturer or page_data.get('brand', ''), # Fallback to brand
|
||||
'damage_description': damage_description
|
||||
}
|
||||
|
||||
@@ -19,7 +19,13 @@ from config import (
|
||||
)
|
||||
from cache import CacheManager
|
||||
from parse import DataParser
|
||||
from graphql_client import fetch_lot_bidding_data, format_bid_data
|
||||
from graphql_client import (
|
||||
fetch_lot_bidding_data, format_bid_data,
|
||||
fetch_auction_data, format_auction_data,
|
||||
extract_attributes_from_lot_json,
|
||||
extract_enriched_attributes
|
||||
)
|
||||
from bid_history_client import fetch_bid_history, parse_bid_history
|
||||
|
||||
class TroostwijkScraper:
|
||||
"""Main scraper class for Troostwijk Auctions"""
|
||||
@@ -183,6 +189,31 @@ class TroostwijkScraper:
|
||||
print(f" Type: LOT")
|
||||
print(f" Title: {page_data.get('title', 'N/A')[:60]}...")
|
||||
|
||||
# Extract ALL data from __NEXT_DATA__ lot object
|
||||
import json
|
||||
import re
|
||||
lot_json = None
|
||||
lot_uuid = None
|
||||
|
||||
match = re.search(r'<script[^>]*id="__NEXT_DATA__"[^>]*>(.+?)</script>', content, re.DOTALL)
|
||||
if match:
|
||||
try:
|
||||
data = json.loads(match.group(1))
|
||||
lot_json = data.get('props', {}).get('pageProps', {}).get('lot', {})
|
||||
if lot_json:
|
||||
# Basic attributes
|
||||
attrs = extract_attributes_from_lot_json(lot_json)
|
||||
page_data.update(attrs)
|
||||
|
||||
# Enriched attributes (year, condition, etc.)
|
||||
enriched = extract_enriched_attributes(lot_json, page_data)
|
||||
page_data.update(enriched)
|
||||
|
||||
# Get lot UUID for bid history
|
||||
lot_uuid = lot_json.get('id')
|
||||
except:
|
||||
pass
|
||||
|
||||
# Fetch bidding data from GraphQL API
|
||||
lot_id = page_data.get('lot_id')
|
||||
print(f" Fetching bidding data from API...")
|
||||
@@ -190,11 +221,39 @@ class TroostwijkScraper:
|
||||
|
||||
if bidding_data:
|
||||
formatted_data = format_bid_data(bidding_data)
|
||||
# Update page_data with real bidding info
|
||||
page_data.update(formatted_data)
|
||||
print(f" Bid: {page_data.get('current_bid', 'N/A')}")
|
||||
print(f" Bid Count: {page_data.get('bid_count', 0)}")
|
||||
print(f" Closing: {page_data.get('closing_time', 'N/A')}")
|
||||
print(f" Status: {page_data.get('status', 'N/A')}")
|
||||
|
||||
# Extract bid increment from nextBidStepInCents
|
||||
lot_details_lot = bidding_data.get('lot', {})
|
||||
next_step_cents = lot_details_lot.get('nextBidStepInCents')
|
||||
if next_step_cents:
|
||||
page_data['bid_increment'] = next_step_cents / 100.0
|
||||
|
||||
# Get lot UUID if not already extracted
|
||||
if not lot_uuid:
|
||||
lot_uuid = lot_details_lot.get('id')
|
||||
|
||||
# Fetch bid history for intelligence
|
||||
if lot_uuid and page_data.get('bid_count', 0) > 0:
|
||||
print(f" Fetching bid history...")
|
||||
bid_history = await fetch_bid_history(lot_uuid)
|
||||
if bid_history:
|
||||
bid_data = parse_bid_history(bid_history, lot_id)
|
||||
page_data.update(bid_data)
|
||||
print(f" Bid velocity: {bid_data['bid_velocity']} bids/hour")
|
||||
|
||||
# Save bid history to database
|
||||
self.cache.save_bid_history(lot_id, bid_data['bid_records'])
|
||||
|
||||
# Fetch auction data for viewing/pickup times if we have auction_id
|
||||
auction_id = page_data.get('auction_id')
|
||||
if auction_id:
|
||||
auction_data = await fetch_auction_data(auction_id)
|
||||
if auction_data:
|
||||
auction_times = format_auction_data(auction_data)
|
||||
page_data.update(auction_times)
|
||||
else:
|
||||
print(f" Bid: {page_data.get('current_bid', 'N/A')} (from HTML)")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user