#!/usr/bin/env python3 """ Test to validate that all expected fields are populated after scraping """ import sys import os import asyncio import sqlite3 # Add parent and src directory to path parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) sys.path.insert(0, parent_dir) sys.path.insert(0, os.path.join(parent_dir, 'src')) # Force online mode before importing os.environ['SCAEV_OFFLINE'] = '0' from scraper import TroostwijkScraper import config async def test_lot_has_all_fields(): """Test that a lot page has all expected fields populated""" print("\n" + "="*60) print("TEST: Lot has all required fields") print("="*60) # Use the example lot from user test_url = "https://www.troostwijkauctions.com/l/radaway-idea-black-dwj-doucheopstelling-A1-39956-18" # Ensure we're not in offline mode config.OFFLINE = False scraper = TroostwijkScraper() scraper.offline = False print(f"\n[1] Scraping: {test_url}") # Start playwright and scrape from playwright.async_api import async_playwright async with async_playwright() as p: browser = await p.chromium.launch(headless=True) context = await browser.new_context() page = await context.new_page() page_data = await scraper.crawl_page(page, test_url) await browser.close() if not page_data: print(" [FAIL] No data returned") return False print(f"\n[2] Validating fields...") # Fields that MUST have values (critical for auction functionality) required_fields = { 'closing_time': 'Closing time', 'current_bid': 'Current bid', 'bid_count': 'Bid count', 'status': 'Status', } # Fields that SHOULD have values but may legitimately be empty optional_fields = { 'description': 'Description', } missing_fields = [] empty_fields = [] optional_missing = [] # Check required fields for field, label in required_fields.items(): value = page_data.get(field) if value is None: missing_fields.append(label) print(f" [FAIL] {label}: MISSING (None)") elif value == '' or value == 0 or value == 'No bids': # Special case: 'No bids' is only acceptable if bid_count is 0 if field == 'current_bid' and page_data.get('bid_count', 0) == 0: print(f" [PASS] {label}: '{value}' (acceptable - no bids)") else: empty_fields.append(label) print(f" [FAIL] {label}: EMPTY ('{value}')") else: print(f" [PASS] {label}: {value}") # Check optional fields (warn but don't fail) for field, label in optional_fields.items(): value = page_data.get(field) if value is None or value == '': optional_missing.append(label) print(f" [WARN] {label}: EMPTY (may be legitimate)") else: print(f" [PASS] {label}: {value[:50]}...") # Check database print(f"\n[3] Checking database entry...") conn = sqlite3.connect(scraper.cache.db_path) cursor = conn.cursor() cursor.execute(""" SELECT closing_time, current_bid, bid_count, description, status FROM lots WHERE url = ? """, (test_url,)) row = cursor.fetchone() conn.close() if row: db_closing, db_bid, db_count, db_desc, db_status = row print(f" DB closing_time: {db_closing or 'EMPTY'}") print(f" DB current_bid: {db_bid or 'EMPTY'}") print(f" DB bid_count: {db_count}") print(f" DB description: {db_desc[:50] if db_desc else 'EMPTY'}...") print(f" DB status: {db_status or 'EMPTY'}") # Verify DB matches page_data if db_closing != page_data.get('closing_time'): print(f" [WARN] DB closing_time doesn't match page_data") if db_count != page_data.get('bid_count'): print(f" [WARN] DB bid_count doesn't match page_data") else: print(f" [WARN] No database entry found") print(f"\n" + "="*60) if missing_fields or empty_fields: print(f"[FAIL] Missing fields: {', '.join(missing_fields)}") print(f"[FAIL] Empty fields: {', '.join(empty_fields)}") if optional_missing: print(f"[WARN] Optional missing: {', '.join(optional_missing)}") return False else: print("[PASS] All required fields are populated") if optional_missing: print(f"[WARN] Optional missing: {', '.join(optional_missing)}") return True async def test_lot_with_description(): """Test that a lot with description preserves it""" print("\n" + "="*60) print("TEST: Lot with description") print("="*60) # Use a lot known to have description test_url = "https://www.troostwijkauctions.com/l/used-dometic-seastar-tfxchx8641p-top-mount-engine-control-liver-A1-39684-12" config.OFFLINE = False scraper = TroostwijkScraper() scraper.offline = False print(f"\n[1] Scraping: {test_url}") from playwright.async_api import async_playwright async with async_playwright() as p: browser = await p.chromium.launch(headless=True) context = await browser.new_context() page = await context.new_page() page_data = await scraper.crawl_page(page, test_url) await browser.close() if not page_data: print(" [FAIL] No data returned") return False print(f"\n[2] Checking description...") description = page_data.get('description', '') if not description or description == '': print(f" [FAIL] Description is empty") return False else: print(f" [PASS] Description: {description[:100]}...") return True async def main(): """Run all tests""" print("\n" + "="*60) print("MISSING FIELDS TEST SUITE") print("="*60) test1 = await test_lot_has_all_fields() test2 = await test_lot_with_description() print("\n" + "="*60) if test1 and test2: print("ALL TESTS PASSED") else: print("SOME TESTS FAILED") if not test1: print(" - test_lot_has_all_fields FAILED") if not test2: print(" - test_lot_with_description FAILED") print("="*60 + "\n") return 0 if (test1 and test2) else 1 if __name__ == '__main__': exit_code = asyncio.run(main()) sys.exit(exit_code)