enrich data
This commit is contained in:
49
inspect_lot_html.py
Normal file
49
inspect_lot_html.py
Normal file
@@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Inspect a lot page HTML to find viewing_time and pickup_date"""
|
||||
import asyncio
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
async def main():
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(headless=True)
|
||||
page = await browser.new_page()
|
||||
|
||||
# Use the known lot
|
||||
await page.goto("https://www.troostwijkauctions.com/l/woonunit-type-tp-4-b-6m-nr-102-A1-37889-102", wait_until='networkidle')
|
||||
content = await page.content()
|
||||
|
||||
print("Searching for patterns...")
|
||||
print("="*60)
|
||||
|
||||
# Search for viewing time patterns
|
||||
import re
|
||||
patterns = {
|
||||
'Bezichtigingen': r'Bezichtigingen.*?(\d{2}\s+\w{3}\s+\d{4}\s+van\s+\d{2}:\d{2}\s+tot\s+\d{2}:\d{2})',
|
||||
'viewing': r'(?i)viewing.*?(\d{2}\s+\w{3}\s+\d{4}\s+van\s+\d{2}:\d{2}\s+tot\s+\d{2}:\d{2})',
|
||||
'Ophalen': r'Ophalen.*?(\d{2}\s+\w{3}\s+\d{4}\s+van\s+\d{2}:\d{2}\s+tot\s+\d{2}:\d{2})',
|
||||
'pickup': r'(?i)pickup.*?(\d{2}\s+\w{3}\s+\d{4}\s+van\s+\d{2}:\d{2}\s+tot\s+\d{2}:\d{2})',
|
||||
'Status': r'Status\s+([^<]+)',
|
||||
}
|
||||
|
||||
for name, pattern in patterns.items():
|
||||
matches = re.findall(pattern, content, re.DOTALL | re.MULTILINE)
|
||||
if matches:
|
||||
print(f"\n{name}:")
|
||||
for match in matches[:3]:
|
||||
print(f" {match[:200]}")
|
||||
|
||||
# Also look for structured data
|
||||
print("\n\nSearching for 'Bezichtigingen' section:")
|
||||
bez_match = re.search(r'Bezichtigingen.*?<.*?>(.*?)</.*?>', content, re.DOTALL)
|
||||
if bez_match:
|
||||
print(bez_match.group(0)[:500])
|
||||
|
||||
print("\n\nSearching for 'Ophalen' section:")
|
||||
oph_match = re.search(r'Ophalen.*?<.*?>(.*?)</.*?>', content, re.DOTALL)
|
||||
if oph_match:
|
||||
print(oph_match.group(0)[:500])
|
||||
|
||||
await browser.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user