209 lines
6.3 KiB
Python
209 lines
6.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test to validate that all expected fields are populated after scraping
|
|
"""
|
|
import sys
|
|
import os
|
|
import asyncio
|
|
import sqlite3
|
|
|
|
# Add parent and src directory to path
|
|
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
|
sys.path.insert(0, parent_dir)
|
|
sys.path.insert(0, os.path.join(parent_dir, 'src'))
|
|
|
|
# Force online mode before importing
|
|
os.environ['SCAEV_OFFLINE'] = '0'
|
|
|
|
from scraper import TroostwijkScraper
|
|
import config
|
|
|
|
|
|
async def test_lot_has_all_fields():
|
|
"""Test that a lot page has all expected fields populated"""
|
|
|
|
print("\n" + "="*60)
|
|
print("TEST: Lot has all required fields")
|
|
print("="*60)
|
|
|
|
# Use the example lot from user
|
|
test_url = "https://www.troostwijkauctions.com/l/radaway-idea-black-dwj-doucheopstelling-A1-39956-18"
|
|
|
|
# Ensure we're not in offline mode
|
|
config.OFFLINE = False
|
|
|
|
scraper = TroostwijkScraper()
|
|
scraper.offline = False
|
|
|
|
print(f"\n[1] Scraping: {test_url}")
|
|
|
|
# Start playwright and scrape
|
|
from playwright.async_api import async_playwright
|
|
async with async_playwright() as p:
|
|
browser = await p.chromium.launch(headless=True)
|
|
context = await browser.new_context()
|
|
page = await context.new_page()
|
|
|
|
page_data = await scraper.crawl_page(page, test_url)
|
|
|
|
await browser.close()
|
|
|
|
if not page_data:
|
|
print(" [FAIL] No data returned")
|
|
return False
|
|
|
|
print(f"\n[2] Validating fields...")
|
|
|
|
# Fields that MUST have values (critical for auction functionality)
|
|
required_fields = {
|
|
'closing_time': 'Closing time',
|
|
'current_bid': 'Current bid',
|
|
'bid_count': 'Bid count',
|
|
'status': 'Status',
|
|
}
|
|
|
|
# Fields that SHOULD have values but may legitimately be empty
|
|
optional_fields = {
|
|
'description': 'Description',
|
|
}
|
|
|
|
missing_fields = []
|
|
empty_fields = []
|
|
optional_missing = []
|
|
|
|
# Check required fields
|
|
for field, label in required_fields.items():
|
|
value = page_data.get(field)
|
|
|
|
if value is None:
|
|
missing_fields.append(label)
|
|
print(f" [FAIL] {label}: MISSING (None)")
|
|
elif value == '' or value == 0 or value == 'No bids':
|
|
# Special case: 'No bids' is only acceptable if bid_count is 0
|
|
if field == 'current_bid' and page_data.get('bid_count', 0) == 0:
|
|
print(f" [PASS] {label}: '{value}' (acceptable - no bids)")
|
|
else:
|
|
empty_fields.append(label)
|
|
print(f" [FAIL] {label}: EMPTY ('{value}')")
|
|
else:
|
|
print(f" [PASS] {label}: {value}")
|
|
|
|
# Check optional fields (warn but don't fail)
|
|
for field, label in optional_fields.items():
|
|
value = page_data.get(field)
|
|
if value is None or value == '':
|
|
optional_missing.append(label)
|
|
print(f" [WARN] {label}: EMPTY (may be legitimate)")
|
|
else:
|
|
print(f" [PASS] {label}: {value[:50]}...")
|
|
|
|
# Check database
|
|
print(f"\n[3] Checking database entry...")
|
|
conn = sqlite3.connect(scraper.cache.db_path)
|
|
cursor = conn.cursor()
|
|
cursor.execute("""
|
|
SELECT closing_time, current_bid, bid_count, description, status
|
|
FROM lots WHERE url = ?
|
|
""", (test_url,))
|
|
row = cursor.fetchone()
|
|
conn.close()
|
|
|
|
if row:
|
|
db_closing, db_bid, db_count, db_desc, db_status = row
|
|
print(f" DB closing_time: {db_closing or 'EMPTY'}")
|
|
print(f" DB current_bid: {db_bid or 'EMPTY'}")
|
|
print(f" DB bid_count: {db_count}")
|
|
print(f" DB description: {db_desc[:50] if db_desc else 'EMPTY'}...")
|
|
print(f" DB status: {db_status or 'EMPTY'}")
|
|
|
|
# Verify DB matches page_data
|
|
if db_closing != page_data.get('closing_time'):
|
|
print(f" [WARN] DB closing_time doesn't match page_data")
|
|
if db_count != page_data.get('bid_count'):
|
|
print(f" [WARN] DB bid_count doesn't match page_data")
|
|
else:
|
|
print(f" [WARN] No database entry found")
|
|
|
|
print(f"\n" + "="*60)
|
|
if missing_fields or empty_fields:
|
|
print(f"[FAIL] Missing fields: {', '.join(missing_fields)}")
|
|
print(f"[FAIL] Empty fields: {', '.join(empty_fields)}")
|
|
if optional_missing:
|
|
print(f"[WARN] Optional missing: {', '.join(optional_missing)}")
|
|
return False
|
|
else:
|
|
print("[PASS] All required fields are populated")
|
|
if optional_missing:
|
|
print(f"[WARN] Optional missing: {', '.join(optional_missing)}")
|
|
return True
|
|
|
|
|
|
async def test_lot_with_description():
|
|
"""Test that a lot with description preserves it"""
|
|
|
|
print("\n" + "="*60)
|
|
print("TEST: Lot with description")
|
|
print("="*60)
|
|
|
|
# Use a lot known to have description
|
|
test_url = "https://www.troostwijkauctions.com/l/used-dometic-seastar-tfxchx8641p-top-mount-engine-control-liver-A1-39684-12"
|
|
|
|
config.OFFLINE = False
|
|
|
|
scraper = TroostwijkScraper()
|
|
scraper.offline = False
|
|
|
|
print(f"\n[1] Scraping: {test_url}")
|
|
|
|
from playwright.async_api import async_playwright
|
|
async with async_playwright() as p:
|
|
browser = await p.chromium.launch(headless=True)
|
|
context = await browser.new_context()
|
|
page = await context.new_page()
|
|
|
|
page_data = await scraper.crawl_page(page, test_url)
|
|
|
|
await browser.close()
|
|
|
|
if not page_data:
|
|
print(" [FAIL] No data returned")
|
|
return False
|
|
|
|
print(f"\n[2] Checking description...")
|
|
description = page_data.get('description', '')
|
|
|
|
if not description or description == '':
|
|
print(f" [FAIL] Description is empty")
|
|
return False
|
|
else:
|
|
print(f" [PASS] Description: {description[:100]}...")
|
|
return True
|
|
|
|
|
|
async def main():
|
|
"""Run all tests"""
|
|
print("\n" + "="*60)
|
|
print("MISSING FIELDS TEST SUITE")
|
|
print("="*60)
|
|
|
|
test1 = await test_lot_has_all_fields()
|
|
test2 = await test_lot_with_description()
|
|
|
|
print("\n" + "="*60)
|
|
if test1 and test2:
|
|
print("ALL TESTS PASSED")
|
|
else:
|
|
print("SOME TESTS FAILED")
|
|
if not test1:
|
|
print(" - test_lot_has_all_fields FAILED")
|
|
if not test2:
|
|
print(" - test_lot_with_description FAILED")
|
|
print("="*60 + "\n")
|
|
|
|
return 0 if (test1 and test2) else 1
|
|
|
|
|
|
if __name__ == '__main__':
|
|
exit_code = asyncio.run(main())
|
|
sys.exit(exit_code)
|