move.venv
This commit is contained in:
51
test/test_description_simple.py
Normal file
51
test/test_description_simple.py
Normal file
@@ -0,0 +1,51 @@
|
||||
#!/usr/bin/env python3
|
||||
import sys
|
||||
import os
|
||||
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
||||
sys.path.insert(0, parent_dir)
|
||||
sys.path.insert(0, os.path.join(parent_dir, 'src'))
|
||||
|
||||
import asyncio
|
||||
from scraper import TroostwijkScraper
|
||||
import config
|
||||
import os
|
||||
|
||||
async def test():
|
||||
# Force online mode
|
||||
os.environ['SCAEV_OFFLINE'] = '0'
|
||||
config.OFFLINE = False
|
||||
|
||||
scraper = TroostwijkScraper()
|
||||
scraper.offline = False
|
||||
|
||||
from playwright.async_api import async_playwright
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(headless=True)
|
||||
context = await browser.new_context()
|
||||
page = await context.new_page()
|
||||
|
||||
url = "https://www.troostwijkauctions.com/l/used-dometic-seastar-tfxchx8641p-top-mount-engine-control-liver-A1-39684-12"
|
||||
|
||||
# Add debug logging to parser
|
||||
original_parse = scraper.parser.parse_page
|
||||
def debug_parse(content, url):
|
||||
result = original_parse(content, url)
|
||||
if result:
|
||||
print(f"PARSER OUTPUT:")
|
||||
print(f" description: {result.get('description', 'NONE')[:100] if result.get('description') else 'EMPTY'}")
|
||||
print(f" closing_time: {result.get('closing_time', 'NONE')}")
|
||||
print(f" bid_count: {result.get('bid_count', 'NONE')}")
|
||||
return result
|
||||
scraper.parser.parse_page = debug_parse
|
||||
|
||||
page_data = await scraper.crawl_page(page, url)
|
||||
|
||||
await browser.close()
|
||||
|
||||
print(f"\nFINAL page_data:")
|
||||
print(f" description: {page_data.get('description', 'NONE')[:100] if page_data and page_data.get('description') else 'EMPTY'}")
|
||||
print(f" closing_time: {page_data.get('closing_time', 'NONE') if page_data else 'NONE'}")
|
||||
print(f" bid_count: {page_data.get('bid_count', 'NONE') if page_data else 'NONE'}")
|
||||
print(f" status: {page_data.get('status', 'NONE') if page_data else 'NONE'}")
|
||||
|
||||
asyncio.run(test())
|
||||
208
test/test_missing_fields.py
Normal file
208
test/test_missing_fields.py
Normal file
@@ -0,0 +1,208 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test to validate that all expected fields are populated after scraping
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
import asyncio
|
||||
import sqlite3
|
||||
|
||||
# Add parent and src directory to path
|
||||
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
||||
sys.path.insert(0, parent_dir)
|
||||
sys.path.insert(0, os.path.join(parent_dir, 'src'))
|
||||
|
||||
# Force online mode before importing
|
||||
os.environ['SCAEV_OFFLINE'] = '0'
|
||||
|
||||
from scraper import TroostwijkScraper
|
||||
import config
|
||||
|
||||
|
||||
async def test_lot_has_all_fields():
|
||||
"""Test that a lot page has all expected fields populated"""
|
||||
|
||||
print("\n" + "="*60)
|
||||
print("TEST: Lot has all required fields")
|
||||
print("="*60)
|
||||
|
||||
# Use the example lot from user
|
||||
test_url = "https://www.troostwijkauctions.com/l/radaway-idea-black-dwj-doucheopstelling-A1-39956-18"
|
||||
|
||||
# Ensure we're not in offline mode
|
||||
config.OFFLINE = False
|
||||
|
||||
scraper = TroostwijkScraper()
|
||||
scraper.offline = False
|
||||
|
||||
print(f"\n[1] Scraping: {test_url}")
|
||||
|
||||
# Start playwright and scrape
|
||||
from playwright.async_api import async_playwright
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(headless=True)
|
||||
context = await browser.new_context()
|
||||
page = await context.new_page()
|
||||
|
||||
page_data = await scraper.crawl_page(page, test_url)
|
||||
|
||||
await browser.close()
|
||||
|
||||
if not page_data:
|
||||
print(" [FAIL] No data returned")
|
||||
return False
|
||||
|
||||
print(f"\n[2] Validating fields...")
|
||||
|
||||
# Fields that MUST have values (critical for auction functionality)
|
||||
required_fields = {
|
||||
'closing_time': 'Closing time',
|
||||
'current_bid': 'Current bid',
|
||||
'bid_count': 'Bid count',
|
||||
'status': 'Status',
|
||||
}
|
||||
|
||||
# Fields that SHOULD have values but may legitimately be empty
|
||||
optional_fields = {
|
||||
'description': 'Description',
|
||||
}
|
||||
|
||||
missing_fields = []
|
||||
empty_fields = []
|
||||
optional_missing = []
|
||||
|
||||
# Check required fields
|
||||
for field, label in required_fields.items():
|
||||
value = page_data.get(field)
|
||||
|
||||
if value is None:
|
||||
missing_fields.append(label)
|
||||
print(f" [FAIL] {label}: MISSING (None)")
|
||||
elif value == '' or value == 0 or value == 'No bids':
|
||||
# Special case: 'No bids' is only acceptable if bid_count is 0
|
||||
if field == 'current_bid' and page_data.get('bid_count', 0) == 0:
|
||||
print(f" [PASS] {label}: '{value}' (acceptable - no bids)")
|
||||
else:
|
||||
empty_fields.append(label)
|
||||
print(f" [FAIL] {label}: EMPTY ('{value}')")
|
||||
else:
|
||||
print(f" [PASS] {label}: {value}")
|
||||
|
||||
# Check optional fields (warn but don't fail)
|
||||
for field, label in optional_fields.items():
|
||||
value = page_data.get(field)
|
||||
if value is None or value == '':
|
||||
optional_missing.append(label)
|
||||
print(f" [WARN] {label}: EMPTY (may be legitimate)")
|
||||
else:
|
||||
print(f" [PASS] {label}: {value[:50]}...")
|
||||
|
||||
# Check database
|
||||
print(f"\n[3] Checking database entry...")
|
||||
conn = sqlite3.connect(scraper.cache.db_path)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("""
|
||||
SELECT closing_time, current_bid, bid_count, description, status
|
||||
FROM lots WHERE url = ?
|
||||
""", (test_url,))
|
||||
row = cursor.fetchone()
|
||||
conn.close()
|
||||
|
||||
if row:
|
||||
db_closing, db_bid, db_count, db_desc, db_status = row
|
||||
print(f" DB closing_time: {db_closing or 'EMPTY'}")
|
||||
print(f" DB current_bid: {db_bid or 'EMPTY'}")
|
||||
print(f" DB bid_count: {db_count}")
|
||||
print(f" DB description: {db_desc[:50] if db_desc else 'EMPTY'}...")
|
||||
print(f" DB status: {db_status or 'EMPTY'}")
|
||||
|
||||
# Verify DB matches page_data
|
||||
if db_closing != page_data.get('closing_time'):
|
||||
print(f" [WARN] DB closing_time doesn't match page_data")
|
||||
if db_count != page_data.get('bid_count'):
|
||||
print(f" [WARN] DB bid_count doesn't match page_data")
|
||||
else:
|
||||
print(f" [WARN] No database entry found")
|
||||
|
||||
print(f"\n" + "="*60)
|
||||
if missing_fields or empty_fields:
|
||||
print(f"[FAIL] Missing fields: {', '.join(missing_fields)}")
|
||||
print(f"[FAIL] Empty fields: {', '.join(empty_fields)}")
|
||||
if optional_missing:
|
||||
print(f"[WARN] Optional missing: {', '.join(optional_missing)}")
|
||||
return False
|
||||
else:
|
||||
print("[PASS] All required fields are populated")
|
||||
if optional_missing:
|
||||
print(f"[WARN] Optional missing: {', '.join(optional_missing)}")
|
||||
return True
|
||||
|
||||
|
||||
async def test_lot_with_description():
|
||||
"""Test that a lot with description preserves it"""
|
||||
|
||||
print("\n" + "="*60)
|
||||
print("TEST: Lot with description")
|
||||
print("="*60)
|
||||
|
||||
# Use a lot known to have description
|
||||
test_url = "https://www.troostwijkauctions.com/l/used-dometic-seastar-tfxchx8641p-top-mount-engine-control-liver-A1-39684-12"
|
||||
|
||||
config.OFFLINE = False
|
||||
|
||||
scraper = TroostwijkScraper()
|
||||
scraper.offline = False
|
||||
|
||||
print(f"\n[1] Scraping: {test_url}")
|
||||
|
||||
from playwright.async_api import async_playwright
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(headless=True)
|
||||
context = await browser.new_context()
|
||||
page = await context.new_page()
|
||||
|
||||
page_data = await scraper.crawl_page(page, test_url)
|
||||
|
||||
await browser.close()
|
||||
|
||||
if not page_data:
|
||||
print(" [FAIL] No data returned")
|
||||
return False
|
||||
|
||||
print(f"\n[2] Checking description...")
|
||||
description = page_data.get('description', '')
|
||||
|
||||
if not description or description == '':
|
||||
print(f" [FAIL] Description is empty")
|
||||
return False
|
||||
else:
|
||||
print(f" [PASS] Description: {description[:100]}...")
|
||||
return True
|
||||
|
||||
|
||||
async def main():
|
||||
"""Run all tests"""
|
||||
print("\n" + "="*60)
|
||||
print("MISSING FIELDS TEST SUITE")
|
||||
print("="*60)
|
||||
|
||||
test1 = await test_lot_has_all_fields()
|
||||
test2 = await test_lot_with_description()
|
||||
|
||||
print("\n" + "="*60)
|
||||
if test1 and test2:
|
||||
print("ALL TESTS PASSED")
|
||||
else:
|
||||
print("SOME TESTS FAILED")
|
||||
if not test1:
|
||||
print(" - test_lot_has_all_fields FAILED")
|
||||
if not test2:
|
||||
print(" - test_lot_with_description FAILED")
|
||||
print("="*60 + "\n")
|
||||
|
||||
return 0 if (test1 and test2) else 1
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
exit_code = asyncio.run(main())
|
||||
sys.exit(exit_code)
|
||||
Reference in New Issue
Block a user