Files
scaev/scrape_fresh_auction.py
2025-12-07 00:25:25 +01:00

52 lines
1.6 KiB
Python

#!/usr/bin/env python3
"""Scrape a fresh auction page to see the lots array structure"""
import asyncio
import json
import re
from playwright.async_api import async_playwright
async def main():
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
page = await browser.new_page()
# Get first auction
await page.goto("https://www.troostwijkauctions.com/auctions", wait_until='networkidle')
content = await page.content()
# Find first auction link
match = re.search(r'href="(/a/[^"]+)"', content)
if not match:
print("No auction found")
return
auction_url = f"https://www.troostwijkauctions.com{match.group(1)}"
print(f"Scraping: {auction_url}\n")
await page.goto(auction_url, wait_until='networkidle')
content = await page.content()
# Extract __NEXT_DATA__
match = re.search(r'<script[^>]*id="__NEXT_DATA__"[^>]*>(.+?)</script>', content, re.DOTALL)
if not match:
print("No __NEXT_DATA__ found")
return
data = json.loads(match.group(1))
page_props = data.get('props', {}).get('pageProps', {})
if 'auction' in page_props:
auction = page_props['auction']
print(f"Auction: {auction.get('name', '')[:50]}...")
print(f"Lots in array: {len(auction.get('lots', []))}")
if auction.get('lots'):
lot = auction['lots'][0]
print(f"\nFIRST LOT:")
print(json.dumps(lot, indent=2)[:1500])
await browser.close()
if __name__ == "__main__":
asyncio.run(main())