#!/usr/bin/env python3 """ Verification script to check image download status and duplicates Run this after deployment to verify the scraper is working correctly """ import sqlite3 import sys from pathlib import Path DB_PATH = "/mnt/okcomputer/output/cache.db" def verify_database(): """Run verification queries on the database""" if not Path(DB_PATH).exists(): print(f"❌ Database not found: {DB_PATH}") sys.exit(1) conn = sqlite3.connect(DB_PATH) print("=" * 60) print("IMAGE DOWNLOAD VERIFICATION") print("=" * 60) # Check download success rate print("\n[*] Download Success Rate:") cursor = conn.execute(""" SELECT COUNT(*) as total_images, SUM(CASE WHEN downloaded = 1 THEN 1 ELSE 0 END) as downloaded, SUM(CASE WHEN downloaded = 0 THEN 1 ELSE 0 END) as failed, ROUND(100.0 * SUM(downloaded) / COUNT(*), 2) as success_rate FROM images """) row = cursor.fetchone() print(f" Total images: {row[0]:,}") print(f" Downloaded: {row[1]:,}") print(f" Not downloaded: {row[2]:,}") print(f" Success rate: {row[3]}%") # Check for duplicates print("\n[*] Duplicate Check:") cursor = conn.execute(""" SELECT lot_id, url, COUNT(*) as dup_count FROM images GROUP BY lot_id, url HAVING COUNT(*) > 1 LIMIT 5 """) duplicates = cursor.fetchall() if duplicates: print(f" [!] Found {len(duplicates)} duplicate entries!") for lot_id, url, count in duplicates: print(f" {lot_id}: {url[:50]}... (x{count})") else: print(" [+] No duplicates found!") # Verify file system print("\n[*] File System Verification:") cursor = conn.execute(""" SELECT COUNT(*) FROM images WHERE downloaded = 1 AND local_path IS NOT NULL AND local_path != '' """) files_with_path = cursor.fetchone()[0] print(f" Images with local_path: {files_with_path:,}") # Sample some downloaded images print("\n[*] Sample Downloaded Images:") cursor = conn.execute(""" SELECT lot_id, local_path FROM images WHERE downloaded = 1 AND local_path IS NOT NULL LIMIT 5 """) samples = cursor.fetchall() for lot_id, path in samples: exists = "[+]" if Path(path).exists() else "[!]" print(f" {exists} {lot_id}: {path}") conn.close() print("\n" + "=" * 60) print("VERIFICATION COMPLETE") print("=" * 60) if __name__ == "__main__": verify_database()