55 lines
2.0 KiB
Python
55 lines
2.0 KiB
Python
#!/usr/bin/env python3
|
|
"""Check for Apollo state or other embedded data"""
|
|
import asyncio
|
|
import json
|
|
import re
|
|
from playwright.async_api import async_playwright
|
|
|
|
async def main():
|
|
async with async_playwright() as p:
|
|
browser = await p.chromium.launch(headless=True)
|
|
page = await browser.new_page()
|
|
|
|
await page.goto("https://www.troostwijkauctions.com/a/woonunits-generatoren-reinigingsmachines-en-zakelijke-goederen-A1-37889", wait_until='networkidle')
|
|
content = await page.content()
|
|
|
|
# Look for embedded data structures
|
|
patterns = [
|
|
(r'<script[^>]*id="__NEXT_DATA__"[^>]*>(.+?)</script>', "NEXT_DATA"),
|
|
(r'window\.__APOLLO_STATE__\s*=\s*({.+?});', "APOLLO_STATE"),
|
|
(r'"lots"\s*:\s*\[(.+?)\]', "LOTS_ARRAY"),
|
|
]
|
|
|
|
for pattern, name in patterns:
|
|
match = re.search(pattern, content, re.DOTALL)
|
|
if match:
|
|
print(f"\n{'='*60}")
|
|
print(f"FOUND: {name}")
|
|
print(f"{'='*60}")
|
|
try:
|
|
if name == "LOTS_ARRAY":
|
|
print(f"Preview: {match.group(1)[:500]}")
|
|
else:
|
|
data = json.loads(match.group(1))
|
|
print(json.dumps(data, indent=2)[:2000])
|
|
except:
|
|
print(f"Preview: {match.group(1)[:1000]}")
|
|
|
|
# Also check for any script tags with "lot" and "bid" and "end"
|
|
print(f"\n{'='*60}")
|
|
print("SEARCHING FOR LOT DATA IN ALL SCRIPTS")
|
|
print(f"{'='*60}")
|
|
|
|
scripts = re.findall(r'<script[^>]*>(.+?)</script>', content, re.DOTALL)
|
|
for i, script in enumerate(scripts):
|
|
if all(term in script.lower() for term in ['lot', 'bid', 'end']):
|
|
print(f"\nScript #{i} (first 500 chars):")
|
|
print(script[:500])
|
|
if i > 3: # Limit output
|
|
break
|
|
|
|
await browser.close()
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|