#!/usr/bin/env python3 """Scrape a fresh auction page to see the lots array structure""" import asyncio import json import re from playwright.async_api import async_playwright async def main(): async with async_playwright() as p: browser = await p.chromium.launch(headless=True) page = await browser.new_page() # Get first auction await page.goto("https://www.troostwijkauctions.com/auctions", wait_until='networkidle') content = await page.content() # Find first auction link match = re.search(r'href="(/a/[^"]+)"', content) if not match: print("No auction found") return auction_url = f"https://www.troostwijkauctions.com{match.group(1)}" print(f"Scraping: {auction_url}\n") await page.goto(auction_url, wait_until='networkidle') content = await page.content() # Extract __NEXT_DATA__ match = re.search(r']*id="__NEXT_DATA__"[^>]*>(.+?)', content, re.DOTALL) if not match: print("No __NEXT_DATA__ found") return data = json.loads(match.group(1)) page_props = data.get('props', {}).get('pageProps', {}) if 'auction' in page_props: auction = page_props['auction'] print(f"Auction: {auction.get('name', '')[:50]}...") print(f"Lots in array: {len(auction.get('lots', []))}") if auction.get('lots'): lot = auction['lots'][0] print(f"\nFIRST LOT:") print(json.dumps(lot, indent=2)[:1500]) await browser.close() if __name__ == "__main__": asyncio.run(main())