GraphQL integrate, data correctness

This commit is contained in:
Tour
2025-12-07 00:25:25 +01:00
parent 8c5f6016ec
commit 71567fd965
17 changed files with 1037 additions and 13 deletions

54
check_apollo_state.py Normal file
View File

@@ -0,0 +1,54 @@
#!/usr/bin/env python3
"""Check for Apollo state or other embedded data"""
import asyncio
import json
import re
from playwright.async_api import async_playwright
async def main():
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
page = await browser.new_page()
await page.goto("https://www.troostwijkauctions.com/a/woonunits-generatoren-reinigingsmachines-en-zakelijke-goederen-A1-37889", wait_until='networkidle')
content = await page.content()
# Look for embedded data structures
patterns = [
(r'<script[^>]*id="__NEXT_DATA__"[^>]*>(.+?)</script>', "NEXT_DATA"),
(r'window\.__APOLLO_STATE__\s*=\s*({.+?});', "APOLLO_STATE"),
(r'"lots"\s*:\s*\[(.+?)\]', "LOTS_ARRAY"),
]
for pattern, name in patterns:
match = re.search(pattern, content, re.DOTALL)
if match:
print(f"\n{'='*60}")
print(f"FOUND: {name}")
print(f"{'='*60}")
try:
if name == "LOTS_ARRAY":
print(f"Preview: {match.group(1)[:500]}")
else:
data = json.loads(match.group(1))
print(json.dumps(data, indent=2)[:2000])
except:
print(f"Preview: {match.group(1)[:1000]}")
# Also check for any script tags with "lot" and "bid" and "end"
print(f"\n{'='*60}")
print("SEARCHING FOR LOT DATA IN ALL SCRIPTS")
print(f"{'='*60}")
scripts = re.findall(r'<script[^>]*>(.+?)</script>', content, re.DOTALL)
for i, script in enumerate(scripts):
if all(term in script.lower() for term in ['lot', 'bid', 'end']):
print(f"\nScript #{i} (first 500 chars):")
print(script[:500])
if i > 3: # Limit output
break
await browser.close()
if __name__ == "__main__":
asyncio.run(main())