#!/usr/bin/env python3 """Check for Apollo state or other embedded data""" import asyncio import json import re from playwright.async_api import async_playwright async def main(): async with async_playwright() as p: browser = await p.chromium.launch(headless=True) page = await browser.new_page() await page.goto("https://www.troostwijkauctions.com/a/woonunits-generatoren-reinigingsmachines-en-zakelijke-goederen-A1-37889", wait_until='networkidle') content = await page.content() # Look for embedded data structures patterns = [ (r']*id="__NEXT_DATA__"[^>]*>(.+?)', "NEXT_DATA"), (r'window\.__APOLLO_STATE__\s*=\s*({.+?});', "APOLLO_STATE"), (r'"lots"\s*:\s*\[(.+?)\]', "LOTS_ARRAY"), ] for pattern, name in patterns: match = re.search(pattern, content, re.DOTALL) if match: print(f"\n{'='*60}") print(f"FOUND: {name}") print(f"{'='*60}") try: if name == "LOTS_ARRAY": print(f"Preview: {match.group(1)[:500]}") else: data = json.loads(match.group(1)) print(json.dumps(data, indent=2)[:2000]) except: print(f"Preview: {match.group(1)[:1000]}") # Also check for any script tags with "lot" and "bid" and "end" print(f"\n{'='*60}") print("SEARCHING FOR LOT DATA IN ALL SCRIPTS") print(f"{'='*60}") scripts = re.findall(r']*>(.+?)', content, re.DOTALL) for i, script in enumerate(scripts): if all(term in script.lower() for term in ['lot', 'bid', 'end']): print(f"\nScript #{i} (first 500 chars):") print(script[:500]) if i > 3: # Limit output break await browser.close() if __name__ == "__main__": asyncio.run(main())