GraphQL integrate, data correctness

This commit is contained in:
Tour
2025-12-07 00:36:57 +01:00
parent 71567fd965
commit bb7f4bbe9d
6 changed files with 357 additions and 23 deletions

View File

@@ -32,15 +32,14 @@ class TroostwijkScraper:
self.last_request_time = 0
self.download_images = DOWNLOAD_IMAGES
async def _download_image(self, url: str, lot_id: str, index: int) -> Optional[str]:
"""Download an image and save it locally"""
async def _download_image(self, session: 'aiohttp.ClientSession', url: str, lot_id: str, index: int) -> Optional[str]:
"""Download an image and save it locally (without rate limiting - concurrent within lot)"""
if not self.download_images:
return None
try:
import aiohttp
lot_dir = Path(IMAGES_DIR) / lot_id
lot_dir.mkdir(exist_ok=True)
lot_dir.mkdir(parents=True, exist_ok=True)
ext = url.split('.')[-1].split('?')[0]
if ext not in ['jpg', 'jpeg', 'png', 'gif', 'webp']:
@@ -50,22 +49,19 @@ class TroostwijkScraper:
if filepath.exists():
return str(filepath)
await self._rate_limit()
async with session.get(url, timeout=30) as response:
if response.status == 200:
content = await response.read()
with open(filepath, 'wb') as f:
f.write(content)
async with aiohttp.ClientSession() as session:
async with session.get(url, timeout=30) as response:
if response.status == 200:
content = await response.read()
with open(filepath, 'wb') as f:
f.write(content)
with sqlite3.connect(self.cache.db_path) as conn:
conn.execute("UPDATE images\n"
"SET local_path = ?, downloaded = 1\n"
"WHERE lot_id = ? AND url = ?\n"
"", (str(filepath), lot_id, url))
conn.commit()
return str(filepath)
with sqlite3.connect(self.cache.db_path) as conn:
conn.execute("UPDATE images\n"
"SET local_path = ?, downloaded = 1\n"
"WHERE lot_id = ? AND url = ?\n"
"", (str(filepath), lot_id, url))
conn.commit()
return str(filepath)
except Exception as e:
print(f" ERROR downloading image: {e}")
@@ -211,10 +207,17 @@ class TroostwijkScraper:
print(f" Images: {len(images)}")
if self.download_images:
for i, img_url in enumerate(images):
local_path = await self._download_image(img_url, page_data['lot_id'], i)
if local_path:
print(f" Downloaded: {Path(local_path).name}")
# Download all images concurrently for this lot
import aiohttp
async with aiohttp.ClientSession() as session:
download_tasks = [
self._download_image(session, img_url, page_data['lot_id'], i)
for i, img_url in enumerate(images)
]
results = await asyncio.gather(*download_tasks, return_exceptions=True)
downloaded_count = sum(1 for r in results if r and not isinstance(r, Exception))
print(f" Downloaded: {downloaded_count}/{len(images)} images")
return page_data