52 lines
1.9 KiB
Python
52 lines
1.9 KiB
Python
#!/usr/bin/env python3
|
|
import sys
|
|
import os
|
|
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
|
sys.path.insert(0, parent_dir)
|
|
sys.path.insert(0, os.path.join(parent_dir, 'src'))
|
|
|
|
import asyncio
|
|
from scraper import TroostwijkScraper
|
|
import config
|
|
import os
|
|
|
|
async def test():
|
|
# Force online mode
|
|
os.environ['SCAEV_OFFLINE'] = '0'
|
|
config.OFFLINE = False
|
|
|
|
scraper = TroostwijkScraper()
|
|
scraper.offline = False
|
|
|
|
from playwright.async_api import async_playwright
|
|
async with async_playwright() as p:
|
|
browser = await p.chromium.launch(headless=True)
|
|
context = await browser.new_context()
|
|
page = await context.new_page()
|
|
|
|
url = "https://www.troostwijkauctions.com/l/used-dometic-seastar-tfxchx8641p-top-mount-engine-control-liver-A1-39684-12"
|
|
|
|
# Add debug logging to parser
|
|
original_parse = scraper.parser.parse_page
|
|
def debug_parse(content, url):
|
|
result = original_parse(content, url)
|
|
if result:
|
|
print(f"PARSER OUTPUT:")
|
|
print(f" description: {result.get('description', 'NONE')[:100] if result.get('description') else 'EMPTY'}")
|
|
print(f" closing_time: {result.get('closing_time', 'NONE')}")
|
|
print(f" bid_count: {result.get('bid_count', 'NONE')}")
|
|
return result
|
|
scraper.parser.parse_page = debug_parse
|
|
|
|
page_data = await scraper.crawl_page(page, url)
|
|
|
|
await browser.close()
|
|
|
|
print(f"\nFINAL page_data:")
|
|
print(f" description: {page_data.get('description', 'NONE')[:100] if page_data and page_data.get('description') else 'EMPTY'}")
|
|
print(f" closing_time: {page_data.get('closing_time', 'NONE') if page_data else 'NONE'}")
|
|
print(f" bid_count: {page_data.get('bid_count', 'NONE') if page_data else 'NONE'}")
|
|
print(f" status: {page_data.get('status', 'NONE') if page_data else 'NONE'}")
|
|
|
|
asyncio.run(test())
|