enrich data
This commit is contained in:
95
test_comprehensive.py
Normal file
95
test_comprehensive.py
Normal file
@@ -0,0 +1,95 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Test comprehensive data enrichment"""
|
||||
import asyncio
|
||||
import sys
|
||||
sys.path.insert(0, 'src')
|
||||
|
||||
from scraper import TroostwijkScraper
|
||||
|
||||
async def main():
|
||||
scraper = TroostwijkScraper()
|
||||
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(headless=True)
|
||||
page = await browser.new_page(
|
||||
viewport={'width': 1920, 'height': 1080},
|
||||
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
||||
)
|
||||
|
||||
# Test with lot that has bids
|
||||
lot_url = "https://www.troostwijkauctions.com/l/%25282x%2529-duo-bureau-160x168-cm-A1-28505-5"
|
||||
|
||||
print(f"Testing comprehensive extraction\n")
|
||||
result = await scraper.crawl_page(page, lot_url)
|
||||
|
||||
if result:
|
||||
print(f"\n{'='*60}")
|
||||
print("COMPREHENSIVE DATA EXTRACTION:")
|
||||
print(f"{'='*60}")
|
||||
print(f"Lot ID: {result.get('lot_id')}")
|
||||
print(f"Title: {result.get('title', '')[:50]}...")
|
||||
print(f"\n[Bidding Intelligence]")
|
||||
print(f" Status: {result.get('status')}")
|
||||
print(f" Current Bid: {result.get('current_bid')}")
|
||||
print(f" Starting Bid: {result.get('starting_bid')}")
|
||||
print(f" Bid Increment: EUR {result.get('bid_increment', 0):.2f}")
|
||||
print(f" Bid Count: {result.get('bid_count')}")
|
||||
print(f" First Bid: {result.get('first_bid_time', 'N/A')}")
|
||||
print(f" Last Bid: {result.get('last_bid_time', 'N/A')}")
|
||||
print(f" Bid Velocity: {result.get('bid_velocity', 0)} bids/hour")
|
||||
print(f"\n[Valuation Intelligence]")
|
||||
print(f" Brand: {result.get('brand', 'N/A')}")
|
||||
print(f" Model: {result.get('model', 'N/A')}")
|
||||
print(f" Year: {result.get('year_manufactured', 'N/A')}")
|
||||
print(f" Manufacturer: {result.get('manufacturer', 'N/A')}")
|
||||
print(f" Condition Score: {result.get('condition_score', 'N/A')}")
|
||||
print(f" Condition: {result.get('condition_description', 'N/A')}")
|
||||
print(f" Serial#: {result.get('serial_number', 'N/A')}")
|
||||
print(f" Damage: {result.get('damage_description', 'N/A')[:50] if result.get('damage_description') else 'N/A'}...")
|
||||
|
||||
await browser.close()
|
||||
|
||||
# Verify database
|
||||
import sqlite3
|
||||
conn = sqlite3.connect('/mnt/okcomputer/output/cache.db')
|
||||
|
||||
# Check lot data
|
||||
cursor = conn.execute("""
|
||||
SELECT bid_velocity, first_bid_time, year_manufactured, condition_score
|
||||
FROM lots
|
||||
WHERE lot_id = ?
|
||||
""", (result.get('lot_id'),))
|
||||
row = cursor.fetchone()
|
||||
|
||||
if row:
|
||||
print(f"\n{'='*60}")
|
||||
print("DATABASE VERIFICATION (lots table):")
|
||||
print(f"{'='*60}")
|
||||
print(f" Bid Velocity: {row[0]}")
|
||||
print(f" First Bid Time: {row[1]}")
|
||||
print(f" Year: {row[2]}")
|
||||
print(f" Condition Score: {row[3]}")
|
||||
|
||||
# Check bid history
|
||||
cursor = conn.execute("""
|
||||
SELECT COUNT(*), MIN(bid_time), MAX(bid_time), SUM(is_autobid)
|
||||
FROM bid_history
|
||||
WHERE lot_id = ?
|
||||
""", (result.get('lot_id'),))
|
||||
row = cursor.fetchone()
|
||||
|
||||
if row and row[0] > 0:
|
||||
print(f"\n{'='*60}")
|
||||
print("DATABASE VERIFICATION (bid_history table):")
|
||||
print(f"{'='*60}")
|
||||
print(f" Total Bids Stored: {row[0]}")
|
||||
print(f" First Bid: {row[1]}")
|
||||
print(f" Last Bid: {row[2]}")
|
||||
print(f" Autobids: {row[3]}")
|
||||
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user