enrich data

This commit is contained in:
Tour
2025-12-07 01:59:45 +01:00
parent d09ee5574f
commit 08bf112c3f
9 changed files with 1750 additions and 32 deletions

370
explore_api_fields.py Normal file
View File

@@ -0,0 +1,370 @@
"""
Explore API responses to identify additional fields available for intelligence.
Tests GraphQL and REST API responses for field coverage.
"""
import asyncio
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
import json
import aiohttp
from graphql_client import fetch_lot_bidding_data, GRAPHQL_ENDPOINT
from bid_history_client import fetch_bid_history, BID_HISTORY_ENDPOINT
async def explore_graphql_schema():
"""Query GraphQL schema to see all available fields"""
print("=" * 80)
print("GRAPHQL SCHEMA EXPLORATION")
print("=" * 80)
# Introspection query for LotDetails type
introspection_query = """
query IntrospectionQuery {
__type(name: "LotDetails") {
name
fields {
name
type {
name
kind
ofType {
name
kind
}
}
}
}
}
"""
async with aiohttp.ClientSession() as session:
try:
async with session.post(
GRAPHQL_ENDPOINT,
json={
"query": introspection_query,
"variables": {}
},
headers={"Content-Type": "application/json"}
) as response:
if response.status == 200:
data = await response.json()
lot_type = data.get('data', {}).get('__type')
if lot_type:
print("\nLotDetails available fields:")
for field in lot_type.get('fields', []):
field_name = field['name']
field_type = field['type'].get('name') or field['type'].get('ofType', {}).get('name', 'Complex')
print(f" - {field_name}: {field_type}")
print()
else:
print(f"Failed with status {response.status}")
except Exception as e:
print(f"Error: {e}")
# Also try Lot type
introspection_query_lot = """
query IntrospectionQuery {
__type(name: "Lot") {
name
fields {
name
type {
name
kind
ofType {
name
kind
}
}
}
}
}
"""
async with aiohttp.ClientSession() as session:
try:
async with session.post(
GRAPHQL_ENDPOINT,
json={
"query": introspection_query_lot,
"variables": {}
},
headers={"Content-Type": "application/json"}
) as response:
if response.status == 200:
data = await response.json()
lot_type = data.get('data', {}).get('__type')
if lot_type:
print("\nLot type available fields:")
for field in lot_type.get('fields', []):
field_name = field['name']
field_type = field['type'].get('name') or field['type'].get('ofType', {}).get('name', 'Complex')
print(f" - {field_name}: {field_type}")
print()
except Exception as e:
print(f"Error: {e}")
async def test_graphql_full_query():
"""Test a comprehensive GraphQL query to see all returned data"""
print("=" * 80)
print("GRAPHQL FULL QUERY TEST")
print("=" * 80)
# Test with a real lot ID
lot_id = "A1-34731-107" # Example from database
comprehensive_query = """
query ComprehensiveLotQuery($lotDisplayId: String!, $locale: String!, $platform: Platform!) {
lotDetails(displayId: $lotDisplayId, locale: $locale, platform: $platform) {
lot {
id
displayId
title
description
currentBidAmount { cents currency }
initialAmount { cents currency }
nextMinimalBid { cents currency }
bidsCount
startDate
endDate
minimumBidAmountMet
lotNumber
auctionId
lotState
location {
city
countryCode
}
viewingDays {
city
countryCode
addressLine1
addressLine2
endDate
startDate
}
collectionDays {
city
countryCode
addressLine1
addressLine2
endDate
startDate
}
images {
url
thumbnailUrl
}
attributes {
name
value
}
}
}
}
"""
async with aiohttp.ClientSession() as session:
try:
async with session.post(
GRAPHQL_ENDPOINT,
json={
"query": comprehensive_query,
"variables": {
"lotDisplayId": lot_id,
"locale": "nl_NL",
"platform": "WEB"
}
},
headers={"Content-Type": "application/json"}
) as response:
if response.status == 200:
data = await response.json()
print(f"\nFull GraphQL response for {lot_id}:")
print(json.dumps(data, indent=2))
print()
else:
print(f"Failed with status {response.status}")
print(await response.text())
except Exception as e:
print(f"Error: {e}")
async def test_bid_history_response():
"""Test bid history API to see all returned fields"""
print("=" * 80)
print("BID HISTORY API TEST")
print("=" * 80)
# Get a lot with bids from database
import sqlite3
from cache import CacheManager
cache = CacheManager()
conn = sqlite3.connect(cache.db_path)
cursor = conn.cursor()
# Find a lot with bids
cursor.execute("""
SELECT lot_id, url FROM lots
WHERE bid_count > 0
ORDER BY bid_count DESC
LIMIT 1
""")
result = cursor.fetchone()
if result:
lot_id, url = result
# Extract UUID from URL
import re
match = re.search(r'<script[^>]*id="__NEXT_DATA__"[^>]*>', url)
# We need to get UUID from cached page
cursor.execute("SELECT content FROM cache WHERE url = ?", (url,))
page_result = cursor.fetchone()
if page_result:
import zlib
content = zlib.decompress(page_result[0]).decode('utf-8')
match = re.search(r'"lot":\s*\{[^}]*"id":\s*"([^"]+)"', content)
if match:
lot_uuid = match.group(1)
print(f"\nTesting with lot {lot_id} (UUID: {lot_uuid})")
# Fetch bid history
bid_history = await fetch_bid_history(lot_uuid)
if bid_history:
print(f"\nBid history sample (first 3 records):")
for i, bid in enumerate(bid_history[:3]):
print(f"\nBid {i+1}:")
print(json.dumps(bid, indent=2))
print(f"\n\nAll available fields in bid records:")
if bid_history:
all_keys = set()
for bid in bid_history:
all_keys.update(bid.keys())
for key in sorted(all_keys):
print(f" - {key}")
else:
print("No bid history found")
conn.close()
async def check_auction_api():
"""Check if there's an auction details API"""
print("=" * 80)
print("AUCTION API EXPLORATION")
print("=" * 80)
auction_query = """
query AuctionDetails($auctionId: String!, $locale: String!, $platform: Platform!) {
auctionDetails(auctionId: $auctionId, locale: $locale, platform: $platform) {
auction {
id
title
description
startDate
endDate
firstLotEndDate
location {
city
countryCode
}
viewingDays {
city
countryCode
startDate
endDate
addressLine1
addressLine2
}
collectionDays {
city
countryCode
startDate
endDate
addressLine1
addressLine2
}
}
}
}
"""
# Get an auction ID from database
import sqlite3
from cache import CacheManager
cache = CacheManager()
conn = sqlite3.connect(cache.db_path)
cursor = conn.cursor()
# Get auction ID from a lot
cursor.execute("SELECT DISTINCT auction_id FROM lots WHERE auction_id IS NOT NULL LIMIT 1")
result = cursor.fetchone()
if result:
auction_id = result[0]
print(f"\nTesting with auction {auction_id}")
async with aiohttp.ClientSession() as session:
try:
async with session.post(
GRAPHQL_ENDPOINT,
json={
"query": auction_query,
"variables": {
"auctionId": auction_id,
"locale": "nl_NL",
"platform": "WEB"
}
},
headers={"Content-Type": "application/json"}
) as response:
if response.status == 200:
data = await response.json()
print("\nAuction API response:")
print(json.dumps(data, indent=2))
else:
print(f"Failed with status {response.status}")
print(await response.text())
except Exception as e:
print(f"Error: {e}")
conn.close()
async def main():
"""Run all API explorations"""
await explore_graphql_schema()
await test_graphql_full_query()
await test_bid_history_response()
await check_auction_api()
print("\n" + "=" * 80)
print("SUMMARY: AVAILABLE DATA FIELDS")
print("=" * 80)
print("""
CURRENTLY CAPTURED:
- Lot bidding data: current_bid, starting_bid, minimum_bid, bid_count, closing_time
- Lot attributes: brand, model, manufacturer, year, condition, serial_number
- Bid history: bid_amount, bid_time, bidder_id, is_autobid
- Bid intelligence: first_bid_time, last_bid_time, bid_velocity, bid_increment
- Images: URLs and local paths
POTENTIALLY AVAILABLE (TO CHECK):
- Viewing/collection times with full address and date ranges
- Lot location details (city, country)
- Lot state/status
- Image thumbnails
- More detailed attributes
NOT AVAILABLE:
- Watch count (not exposed in API)
- Reserve price (not exposed in API)
- Estimated min/max value (not exposed in API)
- Bidder identities (anonymized)
""")
if __name__ == "__main__":
asyncio.run(main())