50 lines
1.9 KiB
Python
50 lines
1.9 KiB
Python
#!/usr/bin/env python3
|
|
"""Inspect a lot page HTML to find viewing_time and pickup_date"""
|
|
import asyncio
|
|
from playwright.async_api import async_playwright
|
|
|
|
async def main():
|
|
async with async_playwright() as p:
|
|
browser = await p.chromium.launch(headless=True)
|
|
page = await browser.new_page()
|
|
|
|
# Use the known lot
|
|
await page.goto("https://www.troostwijkauctions.com/l/woonunit-type-tp-4-b-6m-nr-102-A1-37889-102", wait_until='networkidle')
|
|
content = await page.content()
|
|
|
|
print("Searching for patterns...")
|
|
print("="*60)
|
|
|
|
# Search for viewing time patterns
|
|
import re
|
|
patterns = {
|
|
'Bezichtigingen': r'Bezichtigingen.*?(\d{2}\s+\w{3}\s+\d{4}\s+van\s+\d{2}:\d{2}\s+tot\s+\d{2}:\d{2})',
|
|
'viewing': r'(?i)viewing.*?(\d{2}\s+\w{3}\s+\d{4}\s+van\s+\d{2}:\d{2}\s+tot\s+\d{2}:\d{2})',
|
|
'Ophalen': r'Ophalen.*?(\d{2}\s+\w{3}\s+\d{4}\s+van\s+\d{2}:\d{2}\s+tot\s+\d{2}:\d{2})',
|
|
'pickup': r'(?i)pickup.*?(\d{2}\s+\w{3}\s+\d{4}\s+van\s+\d{2}:\d{2}\s+tot\s+\d{2}:\d{2})',
|
|
'Status': r'Status\s+([^<]+)',
|
|
}
|
|
|
|
for name, pattern in patterns.items():
|
|
matches = re.findall(pattern, content, re.DOTALL | re.MULTILINE)
|
|
if matches:
|
|
print(f"\n{name}:")
|
|
for match in matches[:3]:
|
|
print(f" {match[:200]}")
|
|
|
|
# Also look for structured data
|
|
print("\n\nSearching for 'Bezichtigingen' section:")
|
|
bez_match = re.search(r'Bezichtigingen.*?<.*?>(.*?)</.*?>', content, re.DOTALL)
|
|
if bez_match:
|
|
print(bez_match.group(0)[:500])
|
|
|
|
print("\n\nSearching for 'Ophalen' section:")
|
|
oph_match = re.search(r'Ophalen.*?<.*?>(.*?)</.*?>', content, re.DOTALL)
|
|
if oph_match:
|
|
print(oph_match.group(0)[:500])
|
|
|
|
await browser.close()
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|