enrich data

This commit is contained in:
Tour
2025-12-07 01:59:45 +01:00
parent d09ee5574f
commit 08bf112c3f
9 changed files with 1750 additions and 32 deletions

View File

@@ -0,0 +1,45 @@
#!/usr/bin/env python3
"""Find viewing/pickup in actual HTML"""
import asyncio
from playwright.async_api import async_playwright
import re
async def main():
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
page = await browser.new_page()
# Try a lot that should have viewing times
await page.goto("https://www.troostwijkauctions.com/l/woonunit-type-tp-4-b-6m-nr-102-A1-37889-102", wait_until='networkidle')
# Get text content
text_content = await page.evaluate("document.body.innerText")
print("Searching for viewing/pickup patterns...\n")
# Look for "Bezichtigingen" section
lines = text_content.split('\n')
for i, line in enumerate(lines):
if 'bezichtig' in line.lower() or 'viewing' in line.lower():
# Print surrounding context
context = lines[max(0, i-1):min(len(lines), i+5)]
print("FOUND Bezichtigingen:")
for c in context:
print(f" {c}")
print()
break
# Look for "Ophalen" section
for i, line in enumerate(lines):
if 'ophalen' in line.lower() or 'collection' in line.lower() or 'pickup' in line.lower():
context = lines[max(0, i-1):min(len(lines), i+5)]
print("FOUND Ophalen:")
for c in context:
print(f" {c}")
print()
break
await browser.close()
if __name__ == "__main__":
asyncio.run(main())