#!/usr/bin/env python3 import sys import os parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) sys.path.insert(0, parent_dir) sys.path.insert(0, os.path.join(parent_dir, 'src')) import asyncio from scraper import TroostwijkScraper import config import os async def test(): # Force online mode os.environ['SCAEV_OFFLINE'] = '0' config.OFFLINE = False scraper = TroostwijkScraper() scraper.offline = False from playwright.async_api import async_playwright async with async_playwright() as p: browser = await p.chromium.launch(headless=True) context = await browser.new_context() page = await context.new_page() url = "https://www.troostwijkauctions.com/l/used-dometic-seastar-tfxchx8641p-top-mount-engine-control-liver-A1-39684-12" # Add debug logging to parser original_parse = scraper.parser.parse_page def debug_parse(content, url): result = original_parse(content, url) if result: print(f"PARSER OUTPUT:") print(f" description: {result.get('description', 'NONE')[:100] if result.get('description') else 'EMPTY'}") print(f" closing_time: {result.get('closing_time', 'NONE')}") print(f" bid_count: {result.get('bid_count', 'NONE')}") return result scraper.parser.parse_page = debug_parse page_data = await scraper.crawl_page(page, url) await browser.close() print(f"\nFINAL page_data:") print(f" description: {page_data.get('description', 'NONE')[:100] if page_data and page_data.get('description') else 'EMPTY'}") print(f" closing_time: {page_data.get('closing_time', 'NONE') if page_data else 'NONE'}") print(f" bid_count: {page_data.get('bid_count', 'NONE') if page_data else 'NONE'}") print(f" status: {page_data.get('status', 'NONE') if page_data else 'NONE'}") asyncio.run(test())