GraphQL integrate, data correctness
This commit is contained in:
54
check_apollo_state.py
Normal file
54
check_apollo_state.py
Normal file
@@ -0,0 +1,54 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Check for Apollo state or other embedded data"""
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
async def main():
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(headless=True)
|
||||
page = await browser.new_page()
|
||||
|
||||
await page.goto("https://www.troostwijkauctions.com/a/woonunits-generatoren-reinigingsmachines-en-zakelijke-goederen-A1-37889", wait_until='networkidle')
|
||||
content = await page.content()
|
||||
|
||||
# Look for embedded data structures
|
||||
patterns = [
|
||||
(r'<script[^>]*id="__NEXT_DATA__"[^>]*>(.+?)</script>', "NEXT_DATA"),
|
||||
(r'window\.__APOLLO_STATE__\s*=\s*({.+?});', "APOLLO_STATE"),
|
||||
(r'"lots"\s*:\s*\[(.+?)\]', "LOTS_ARRAY"),
|
||||
]
|
||||
|
||||
for pattern, name in patterns:
|
||||
match = re.search(pattern, content, re.DOTALL)
|
||||
if match:
|
||||
print(f"\n{'='*60}")
|
||||
print(f"FOUND: {name}")
|
||||
print(f"{'='*60}")
|
||||
try:
|
||||
if name == "LOTS_ARRAY":
|
||||
print(f"Preview: {match.group(1)[:500]}")
|
||||
else:
|
||||
data = json.loads(match.group(1))
|
||||
print(json.dumps(data, indent=2)[:2000])
|
||||
except:
|
||||
print(f"Preview: {match.group(1)[:1000]}")
|
||||
|
||||
# Also check for any script tags with "lot" and "bid" and "end"
|
||||
print(f"\n{'='*60}")
|
||||
print("SEARCHING FOR LOT DATA IN ALL SCRIPTS")
|
||||
print(f"{'='*60}")
|
||||
|
||||
scripts = re.findall(r'<script[^>]*>(.+?)</script>', content, re.DOTALL)
|
||||
for i, script in enumerate(scripts):
|
||||
if all(term in script.lower() for term in ['lot', 'bid', 'end']):
|
||||
print(f"\nScript #{i} (first 500 chars):")
|
||||
print(script[:500])
|
||||
if i > 3: # Limit output
|
||||
break
|
||||
|
||||
await browser.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user