GraphQL integrate, data correctness
This commit is contained in:
49
test_concurrent_images.py
Normal file
49
test_concurrent_images.py
Normal file
@@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Test concurrent image downloads"""
|
||||
import asyncio
|
||||
import time
|
||||
import sys
|
||||
sys.path.insert(0, 'src')
|
||||
|
||||
from scraper import TroostwijkScraper
|
||||
|
||||
async def main():
|
||||
scraper = TroostwijkScraper()
|
||||
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(headless=True)
|
||||
page = await browser.new_page(
|
||||
viewport={'width': 1920, 'height': 1080},
|
||||
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
||||
)
|
||||
|
||||
# Test with a lot that has multiple images
|
||||
lot_url = "https://www.troostwijkauctions.com/l/%25282x%2529-duo-bureau-160x168-cm-A1-28505-5"
|
||||
|
||||
print(f"Testing concurrent image downloads\n")
|
||||
print(f"Lot: {lot_url}\n")
|
||||
|
||||
start_time = time.time()
|
||||
result = await scraper.crawl_page(page, lot_url)
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"TIMING RESULTS:")
|
||||
print(f"{'='*60}")
|
||||
print(f"Total time: {elapsed:.2f}s")
|
||||
|
||||
image_count = len(result.get('images', []))
|
||||
print(f"Images: {image_count}")
|
||||
|
||||
if image_count > 1:
|
||||
print(f"Time per image: {elapsed/image_count:.2f}s (if sequential)")
|
||||
print(f"Actual time: {elapsed:.2f}s (concurrent!)")
|
||||
speedup = (image_count * 0.5) / elapsed if elapsed > 0 else 1
|
||||
print(f"Speedup factor: {speedup:.1f}x")
|
||||
|
||||
await browser.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user