enrich data

2025-12-07 16:26:30 +01:00
parent fd69faebcc
commit b1905164bd
40 changed files with 76 additions and 3605 deletions
--- a/check_apollo_state.py
+++ b/check_apollo_state.py
@@ -1,54 +0,0 @@
-#!/usr/bin/env python3
-"""Check for Apollo state or other embedded data"""
-import asyncio
-import json
-import re
-from playwright.async_api import async_playwright
-
-async def main():
-    async with async_playwright() as p:
-        browser = await p.chromium.launch(headless=True)
-        page = await browser.new_page()
-
-        await page.goto("https://www.troostwijkauctions.com/a/woonunits-generatoren-reinigingsmachines-en-zakelijke-goederen-A1-37889", wait_until='networkidle')
-        content = await page.content()
-
-        # Look for embedded data structures
-        patterns = [
-            (r'<script[^>]*id="__NEXT_DATA__"[^>]*>(.+?)</script>', "NEXT_DATA"),
-            (r'window\.__APOLLO_STATE__\s*=\s*({.+?});', "APOLLO_STATE"),
-            (r'"lots"\s*:\s*\[(.+?)\]', "LOTS_ARRAY"),
-        ]
-
-        for pattern, name in patterns:
-            match = re.search(pattern, content, re.DOTALL)
-            if match:
-                print(f"\n{'='*60}")
-                print(f"FOUND: {name}")
-                print(f"{'='*60}")
-                try:
-                    if name == "LOTS_ARRAY":
-                        print(f"Preview: {match.group(1)[:500]}")
-                    else:
-                        data = json.loads(match.group(1))
-                        print(json.dumps(data, indent=2)[:2000])
-                except:
-                    print(f"Preview: {match.group(1)[:1000]}")
-
-        # Also check for any script tags with "lot" and "bid" and "end"
-        print(f"\n{'='*60}")
-        print("SEARCHING FOR LOT DATA IN ALL SCRIPTS")
-        print(f"{'='*60}")
-
-        scripts = re.findall(r'<script[^>]*>(.+?)</script>', content, re.DOTALL)
-        for i, script in enumerate(scripts):
-            if all(term in script.lower() for term in ['lot', 'bid', 'end']):
-                print(f"\nScript #{i} (first 500 chars):")
-                print(script[:500])
-                if i > 3:  # Limit output
-                    break
-
-        await browser.close()
-
-if __name__ == "__main__":
-    asyncio.run(main())
--- a/check_data.py
+++ b/check_data.py
@@ -1,54 +0,0 @@
-#!/usr/bin/env python3
-"""Check current data quality in cache.db"""
-import sqlite3
-
-conn = sqlite3.connect('/mnt/okcomputer/output/cache.db')
-
-print("=" * 60)
-print("CURRENT DATA QUALITY CHECK")
-print("=" * 60)
-
-# Check lots table
-print("\n[*] Sample Lot Data:")
-cursor = conn.execute("""
-    SELECT lot_id, current_bid, bid_count, closing_time
-    FROM lots
-    LIMIT 10
-""")
-for row in cursor:
-    print(f"  Lot: {row[0]}")
-    print(f"    Current Bid: {row[1]}")
-    print(f"    Bid Count: {row[2]}")
-    print(f"    Closing Time: {row[3]}")
-
-# Check auctions table
-print("\n[*] Sample Auction Data:")
-cursor = conn.execute("""
-    SELECT auction_id, title, closing_time, first_lot_closing_time
-    FROM auctions
-    LIMIT 5
-""")
-for row in cursor:
-    print(f"  Auction: {row[0]}")
-    print(f"    Title: {row[1][:50]}...")
-    print(f"    Closing Time: {row[2] if len(row) > 2 else 'N/A'}")
-    print(f"    First Lot Closing: {row[3]}")
-
-# Data completeness stats
-print("\n[*] Data Completeness:")
-cursor = conn.execute("""
-    SELECT
-        COUNT(*) as total,
-        SUM(CASE WHEN current_bid IS NULL OR current_bid = '' THEN 1 ELSE 0 END) as missing_current_bid,
-        SUM(CASE WHEN closing_time IS NULL OR closing_time = '' THEN 1 ELSE 0 END) as missing_closing_time,
-        SUM(CASE WHEN bid_count IS NULL OR bid_count = 0 THEN 1 ELSE 0 END) as zero_bid_count
-    FROM lots
-""")
-row = cursor.fetchone()
-print(f"  Total lots: {row[0]:,}")
-print(f"  Missing current_bid: {row[1]:,} ({100*row[1]/row[0]:.1f}%)")
-print(f"  Missing closing_time: {row[2]:,} ({100*row[2]/row[0]:.1f}%)")
-print(f"  Zero bid_count: {row[3]:,} ({100*row[3]/row[0]:.1f}%)")
-
-conn.close()
-print("\n" + "=" * 60)
--- a/check_graphql_full.py
+++ b/check_graphql_full.py
@@ -1,67 +0,0 @@
-#!/usr/bin/env python3
-"""Check if GraphQL has viewing/pickup data"""
-import asyncio
-import json
-import sys
-sys.path.insert(0, 'src')
-
-from graphql_client import GRAPHQL_ENDPOINT
-import aiohttp
-
-# Expanded query to check for all available fields
-EXTENDED_QUERY = """
-query LotBiddingData($lotDisplayId: String!, $locale: String!, $platform: Platform!) {
-  lotDetails(displayId: $lotDisplayId, locale: $locale, platform: $platform) {
-    lot {
-      id
-      displayId
-      auctionId
-      currentBidAmount { cents currency }
-      initialAmount { cents currency }
-      nextMinimalBid { cents currency }
-      bidsCount
-      startDate
-      endDate
-
-      # Try to find viewing/pickup fields
-      viewingDays { startDate endDate city countryCode }
-      collectionDays { startDate endDate city countryCode }
-      pickupDays { startDate endDate city countryCode }
-    }
-    auction {
-      id
-      displayId
-      viewingDays { startDate endDate city countryCode }
-      collectionDays { startDate endDate city countryCode }
-    }
-  }
-}
-"""
-
-async def main():
-    variables = {
-        "lotDisplayId": "A1-28505-5",
-        "locale": "nl",
-        "platform": "TWK"
-    }
-
-    payload = {
-        "query": EXTENDED_QUERY,
-        "variables": variables
-    }
-
-    try:
-        async with aiohttp.ClientSession() as session:
-            async with session.post(GRAPHQL_ENDPOINT, json=payload, timeout=30) as response:
-                if response.status == 200:
-                    data = await response.json()
-                    print("Full GraphQL Response:")
-                    print(json.dumps(data, indent=2))
-                else:
-                    print(f"Error: {response.status}")
-                    print(await response.text())
-    except Exception as e:
-        print(f"Exception: {e}")
-
-if __name__ == "__main__":
-    asyncio.run(main())
--- a/check_lot_auction_link.py
+++ b/check_lot_auction_link.py
@@ -1,72 +0,0 @@
-"""Check how lots link to auctions"""
-import sys
-import os
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
-
-from cache import CacheManager
-import sqlite3
-import zlib
-import json
-import re
-
-cache = CacheManager()
-conn = sqlite3.connect(cache.db_path)
-cursor = conn.cursor()
-
-# Get a lot page from cache
-cursor.execute("SELECT url, content FROM cache WHERE url LIKE '%/l/%' LIMIT 1")
-url, content_blob = cursor.fetchone()
-content = zlib.decompress(content_blob).decode('utf-8')
-
-# Extract __NEXT_DATA__
-match = re.search(r'<script[^>]*id="__NEXT_DATA__"[^>]*>(.+?)</script>', content, re.DOTALL)
-data = json.loads(match.group(1))
-
-props = data.get('props', {}).get('pageProps', {})
-print("PageProps keys:", list(props.keys()))
-
-lot = props.get('lot', {})
-print("\nLot data:")
-print(f"  displayId: {lot.get('displayId')}")
-print(f"  auctionId (UUID): {lot.get('auctionId')}")
-
-# Check if auction data is also included
-auction = props.get('auction')
-if auction:
-    print("\nAuction data IS included in lot page!")
-    print(f"  Auction displayId: {auction.get('displayId')}")
-    print(f"  Auction id (UUID): {auction.get('id')}")
-    print(f"  Auction name: {auction.get('name', '')[:60]}")
-else:
-    print("\nAuction data NOT included in lot page")
-    print("Need to look up auction by UUID")
-
-# Check if we can find the auction by UUID
-lot_auction_uuid = lot.get('auctionId')
-if lot_auction_uuid:
-    # Try to find auction page with this UUID
-    cursor.execute("""
-        SELECT url, content FROM cache
-        WHERE url LIKE '%/a/%'
-        LIMIT 10
-    """)
-
-    found_match = False
-    for auction_url, auction_content_blob in cursor.fetchall():
-        auction_content = zlib.decompress(auction_content_blob).decode('utf-8')
-        match = re.search(r'<script[^>]*id="__NEXT_DATA__"[^>]*>(.+?)</script>', auction_content, re.DOTALL)
-        if match:
-            auction_data = json.loads(match.group(1))
-            auction_obj = auction_data.get('props', {}).get('pageProps', {}).get('auction', {})
-            if auction_obj.get('id') == lot_auction_uuid:
-                print(f"\n✓ Found matching auction!")
-                print(f"  Auction displayId: {auction_obj.get('displayId')}")
-                print(f"  Auction UUID: {auction_obj.get('id')}")
-                print(f"  Auction URL: {auction_url}")
-                found_match = True
-                break
-
-    if not found_match:
-        print(f"\n✗ Could not find auction with UUID {lot_auction_uuid} in first 10 cached auctions")
-
-conn.close()
--- a/check_viewing_data.py
+++ b/check_viewing_data.py
@@ -1,36 +0,0 @@
-#!/usr/bin/env python3
-"""Check viewing time data"""
-import sqlite3
-
-conn = sqlite3.connect('/mnt/okcomputer/output/cache.db')
-
-# Check if viewing_time has data
-cursor = conn.execute("""
-    SELECT viewing_time, pickup_date
-    FROM lots
-    WHERE viewing_time IS NOT NULL AND viewing_time != ''
-    LIMIT 5
-""")
-
-rows = cursor.fetchall()
-print("Existing viewing_time data:")
-for r in rows:
-    print(f"  Viewing: {r[0]}")
-    print(f"  Pickup: {r[1]}")
-    print()
-
-# Check overall completeness
-cursor = conn.execute("""
-    SELECT
-        COUNT(*) as total,
-        SUM(CASE WHEN viewing_time IS NOT NULL AND viewing_time != '' THEN 1 ELSE 0 END) as has_viewing,
-        SUM(CASE WHEN pickup_date IS NOT NULL AND pickup_date != '' THEN 1 ELSE 0 END) as has_pickup
-    FROM lots
-""")
-row = cursor.fetchone()
-print(f"Completeness:")
-print(f"  Total lots: {row[0]}")
-print(f"  Has viewing_time: {row[1]} ({100*row[1]/row[0]:.1f}%)")
-print(f"  Has pickup_date: {row[2]} ({100*row[2]/row[0]:.1f}%)")
-
-conn.close()
--- a/check_viewing_time.py
+++ b/check_viewing_time.py
@@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-"""Check if viewing time is in the GraphQL response"""
-import asyncio
-import json
-from playwright.async_api import async_playwright
-
-async def main():
-    async with async_playwright() as p:
-        browser = await p.chromium.launch(headless=True)
-        page = await browser.new_page()
-
-        responses = []
-
-        async def capture_response(response):
-            if 'graphql' in response.url and 'LotBiddingData' in await response.text():
-                try:
-                    body = await response.json()
-                    responses.append(body)
-                except:
-                    pass
-
-        page.on('response', capture_response)
-
-        await page.goto("https://www.troostwijkauctions.com/l/%25282x%2529-duo-bureau-160x168-cm-A1-28505-5", wait_until='networkidle')
-        await asyncio.sleep(2)
-
-        if responses:
-            print("Full LotBiddingData Response:")
-            print("="*60)
-            print(json.dumps(responses[0], indent=2))
-
-        await browser.close()
-
-if __name__ == "__main__":
-    asyncio.run(main())
--- a/debug_lot_structure.py
+++ b/debug_lot_structure.py
@@ -1,69 +0,0 @@
-#!/usr/bin/env python3
-"""Debug lot data structure from cached page"""
-import sqlite3
-import zlib
-import json
-import re
-import sys
-sys.path.insert(0, 'src')
-
-from parse import DataParser
-
-conn = sqlite3.connect('/mnt/okcomputer/output/cache.db')
-
-# Get a recent lot page
-cursor = conn.execute("""
-    SELECT url, content
-    FROM cache
-    WHERE url LIKE '%/l/%'
-    ORDER BY timestamp DESC
-    LIMIT 1
-""")
-
-row = cursor.fetchone()
-if not row:
-    print("No lot pages found")
-    exit(1)
-
-url, content_blob = row
-content = zlib.decompress(content_blob).decode('utf-8')
-
-parser = DataParser()
-result = parser.parse_page(content, url)
-
-if result:
-    print(f"URL: {url}")
-    print(f"\nParsed Data:")
-    print(f"  type: {result.get('type')}")
-    print(f"  lot_id: {result.get('lot_id')}")
-    print(f"  title: {result.get('title', '')[:50]}...")
-    print(f"  current_bid: {result.get('current_bid')}")
-    print(f"  bid_count: {result.get('bid_count')}")
-    print(f"  closing_time: {result.get('closing_time')}")
-    print(f"  location: {result.get('location')}")
-
-# Also dump the raw JSON
-match = re.search(r'<script[^>]*id="__NEXT_DATA__"[^>]*>(.+?)</script>', content, re.DOTALL)
-if match:
-    data = json.loads(match.group(1))
-    page_props = data.get('props', {}).get('pageProps', {})
-
-    if 'lot' in page_props:
-        lot = page_props['lot']
-        print(f"\nRAW __NEXT_DATA__.lot keys: {list(lot.keys())}")
-        print(f"\nSearching for bid/timing fields...")
-
-        # Deep search for these fields
-        def deep_search(obj, prefix=""):
-            if isinstance(obj, dict):
-                for k, v in obj.items():
-                    if any(term in k.lower() for term in ['bid', 'end', 'close', 'date', 'time']):
-                        print(f"  {prefix}{k}: {v}")
-                    if isinstance(v, (dict, list)):
-                        deep_search(v, prefix + k + ".")
-            elif isinstance(obj, list) and len(obj) > 0:
-                deep_search(obj[0], prefix + "[0].")
-
-        deep_search(lot)
-
-conn.close()
--- a/deep_inspect_lot.py
+++ b/deep_inspect_lot.py
@@ -1,65 +0,0 @@
-#!/usr/bin/env python3
-"""Deep inspect lot JSON for viewing/pickup data"""
-import sqlite3
-import zlib
-import json
-import re
-
-conn = sqlite3.connect('/mnt/okcomputer/output/cache.db')
-
-cursor = conn.execute("""
-    SELECT url, content
-    FROM cache
-    WHERE url LIKE '%/l/%'
-    ORDER BY timestamp DESC
-    LIMIT 1
-""")
-
-row = cursor.fetchone()
-url, content_blob = row
-content = zlib.decompress(content_blob).decode('utf-8')
-
-match = re.search(r'<script[^>]*id="__NEXT_DATA__"[^>]*>(.+?)</script>', content, re.DOTALL)
-data = json.loads(match.group(1))
-lot = data.get('props', {}).get('pageProps', {}).get('lot', {})
-
-print(f"Inspecting: {url}\n")
-
-# Check onboarding
-if 'onboarding' in lot:
-    print("ONBOARDING:")
-    print(json.dumps(lot['onboarding'], indent=2))
-    print()
-
-# Check attributes
-if 'attributes' in lot:
-    print("ATTRIBUTES:")
-    attrs = lot['attributes']
-    print(json.dumps(attrs[:3] if isinstance(attrs, list) else attrs, indent=2))
-    print()
-
-# Check condition
-if 'condition' in lot:
-    print("CONDITION:")
-    print(json.dumps(lot['condition'], indent=2))
-    print()
-
-# Check appearance
-if 'appearance' in lot:
-    print("APPEARANCE:")
-    print(json.dumps(lot['appearance'], indent=2))
-    print()
-
-# Check location
-if 'location' in lot:
-    print("LOCATION:")
-    print(json.dumps(lot['location'], indent=2))
-    print()
-
-# Check for any field with "view", "pick", "collect", "date", "time"
-print("\nFIELDS WITH VIEWING/PICKUP/TIME:")
-for key in lot.keys():
-    if any(term in key.lower() for term in ['view', 'pick', 'collect', 'date', 'time', 'day']):
-        print(f"  {key}: {lot[key]}")
-
-conn.close()
--- a/enrich_existing_lots.py
+++ b/enrich_existing_lots.py
@@ -1,120 +0,0 @@
-"""
-Enrich existing lots with new intelligence fields:
- followers_count
- estimated_min_price / estimated_max_price
- lot_condition
- appearance
-
-Reads from cached lot pages __NEXT_DATA__ JSON
-"""
-import sys
-import os
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
-
-import asyncio
-from cache import CacheManager
-import sqlite3
-import zlib
-import json
-import re
-from graphql_client import fetch_lot_bidding_data, format_bid_data
-
-async def enrich_existing_lots():
-    """Enrich existing lots with new fields from GraphQL API"""
-    cache = CacheManager()
-    conn = sqlite3.connect(cache.db_path)
-    cursor = conn.cursor()
-
-    # Get all lot IDs
-    cursor.execute("SELECT lot_id FROM lots")
-    lot_ids = [r[0] for r in cursor.fetchall()]
-
-    print(f"Found {len(lot_ids)} lots to enrich")
-    print("Fetching enrichment data from GraphQL API...")
-    print("This will take ~{:.1f} minutes (0.5s rate limit)".format(len(lot_ids) * 0.5 / 60))
-
-    enriched = 0
-    failed = 0
-    no_data = 0
-
-    for i, lot_id in enumerate(lot_ids):
-        if (i + 1) % 10 == 0:
-            print(f"Progress: {i+1}/{len(lot_ids)} ({enriched} enriched, {no_data} no data, {failed} failed)", end='\r')
-
-        try:
-            # Fetch from GraphQL API
-            bidding_data = await fetch_lot_bidding_data(lot_id)
-
-            if bidding_data:
-                formatted_data = format_bid_data(bidding_data)
-
-                # Update lot with new fields
-                cursor.execute("""
-                    UPDATE lots
-                    SET followers_count = ?,
-                        estimated_min_price = ?,
-                        estimated_max_price = ?,
-                        lot_condition = ?,
-                        appearance = ?
-                    WHERE lot_id = ?
-                """, (
-                    formatted_data.get('followers_count', 0),
-                    formatted_data.get('estimated_min_price'),
-                    formatted_data.get('estimated_max_price'),
-                    formatted_data.get('lot_condition', ''),
-                    formatted_data.get('appearance', ''),
-                    lot_id
-                ))
-
-                enriched += 1
-
-                # Commit every 50 lots
-                if enriched % 50 == 0:
-                    conn.commit()
-
-            else:
-                no_data += 1
-
-            # Rate limit
-            await asyncio.sleep(0.5)
-
-        except Exception as e:
-            failed += 1
-            continue
-
-    conn.commit()
-
-    print(f"\n\nComplete!")
-    print(f"Total lots: {len(lot_ids)}")
-    print(f"Enriched: {enriched}")
-    print(f"No data: {no_data}")
-    print(f"Failed: {failed}")
-
-    # Show statistics
-    cursor.execute("SELECT COUNT(*) FROM lots WHERE followers_count > 0")
-    with_followers = cursor.fetchone()[0]
-
-    cursor.execute("SELECT COUNT(*) FROM lots WHERE estimated_min_price IS NOT NULL")
-    with_estimates = cursor.fetchone()[0]
-
-    cursor.execute("SELECT COUNT(*) FROM lots WHERE lot_condition IS NOT NULL AND lot_condition != ''")
-    with_condition = cursor.fetchone()[0]
-
-    print(f"\nEnrichment statistics:")
-    print(f"  Lots with followers_count: {with_followers} ({with_followers/len(lot_ids)*100:.1f}%)")
-    print(f"  Lots with estimated prices: {with_estimates} ({with_estimates/len(lot_ids)*100:.1f}%)")
-    print(f"  Lots with condition: {with_condition} ({with_condition/len(lot_ids)*100:.1f}%)")
-
-    conn.close()
-
-if __name__ == "__main__":
-    print("WARNING: This will make ~16,800 API calls at 0.5s intervals (~2.3 hours)")
-    print("Press Ctrl+C to cancel, or wait 5 seconds to continue...")
-    import time
-    try:
-        time.sleep(5)
-    except KeyboardInterrupt:
-        print("\nCancelled")
-        sys.exit(0)
-
-    asyncio.run(enrich_existing_lots())
--- a/explore_api_fields.py
+++ b/explore_api_fields.py
@@ -1,370 +0,0 @@
-"""
-Explore API responses to identify additional fields available for intelligence.
-Tests GraphQL and REST API responses for field coverage.
-"""
-import asyncio
-import sys
-import os
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
-
-import json
-import aiohttp
-from graphql_client import fetch_lot_bidding_data, GRAPHQL_ENDPOINT
-from bid_history_client import fetch_bid_history, BID_HISTORY_ENDPOINT
-
-async def explore_graphql_schema():
-    """Query GraphQL schema to see all available fields"""
-    print("=" * 80)
-    print("GRAPHQL SCHEMA EXPLORATION")
-    print("=" * 80)
-
-    # Introspection query for LotDetails type
-    introspection_query = """
-    query IntrospectionQuery {
-      __type(name: "LotDetails") {
-        name
-        fields {
-          name
-          type {
-            name
-            kind
-            ofType {
-              name
-              kind
-            }
-          }
-        }
-      }
-    }
-    """
-
-    async with aiohttp.ClientSession() as session:
-        try:
-            async with session.post(
-                GRAPHQL_ENDPOINT,
-                json={
-                    "query": introspection_query,
-                    "variables": {}
-                },
-                headers={"Content-Type": "application/json"}
-            ) as response:
-                if response.status == 200:
-                    data = await response.json()
-                    lot_type = data.get('data', {}).get('__type')
-                    if lot_type:
-                        print("\nLotDetails available fields:")
-                        for field in lot_type.get('fields', []):
-                            field_name = field['name']
-                            field_type = field['type'].get('name') or field['type'].get('ofType', {}).get('name', 'Complex')
-                            print(f"  - {field_name}: {field_type}")
-                    print()
-                else:
-                    print(f"Failed with status {response.status}")
-        except Exception as e:
-            print(f"Error: {e}")
-
-    # Also try Lot type
-    introspection_query_lot = """
-    query IntrospectionQuery {
-      __type(name: "Lot") {
-        name
-        fields {
-          name
-          type {
-            name
-            kind
-            ofType {
-              name
-              kind
-            }
-          }
-        }
-      }
-    }
-    """
-
-    async with aiohttp.ClientSession() as session:
-        try:
-            async with session.post(
-                GRAPHQL_ENDPOINT,
-                json={
-                    "query": introspection_query_lot,
-                    "variables": {}
-                },
-                headers={"Content-Type": "application/json"}
-            ) as response:
-                if response.status == 200:
-                    data = await response.json()
-                    lot_type = data.get('data', {}).get('__type')
-                    if lot_type:
-                        print("\nLot type available fields:")
-                        for field in lot_type.get('fields', []):
-                            field_name = field['name']
-                            field_type = field['type'].get('name') or field['type'].get('ofType', {}).get('name', 'Complex')
-                            print(f"  - {field_name}: {field_type}")
-                    print()
-        except Exception as e:
-            print(f"Error: {e}")
-
-async def test_graphql_full_query():
-    """Test a comprehensive GraphQL query to see all returned data"""
-    print("=" * 80)
-    print("GRAPHQL FULL QUERY TEST")
-    print("=" * 80)
-
-    # Test with a real lot ID
-    lot_id = "A1-34731-107"  # Example from database
-
-    comprehensive_query = """
-    query ComprehensiveLotQuery($lotDisplayId: String!, $locale: String!, $platform: Platform!) {
-      lotDetails(displayId: $lotDisplayId, locale: $locale, platform: $platform) {
-        lot {
-          id
-          displayId
-          title
-          description
-          currentBidAmount { cents currency }
-          initialAmount { cents currency }
-          nextMinimalBid { cents currency }
-          bidsCount
-          startDate
-          endDate
-          minimumBidAmountMet
-          lotNumber
-          auctionId
-          lotState
-          location {
-            city
-            countryCode
-          }
-          viewingDays {
-            city
-            countryCode
-            addressLine1
-            addressLine2
-            endDate
-            startDate
-          }
-          collectionDays {
-            city
-            countryCode
-            addressLine1
-            addressLine2
-            endDate
-            startDate
-          }
-          images {
-            url
-            thumbnailUrl
-          }
-          attributes {
-            name
-            value
-          }
-        }
-      }
-    }
-    """
-
-    async with aiohttp.ClientSession() as session:
-        try:
-            async with session.post(
-                GRAPHQL_ENDPOINT,
-                json={
-                    "query": comprehensive_query,
-                    "variables": {
-                        "lotDisplayId": lot_id,
-                        "locale": "nl_NL",
-                        "platform": "WEB"
-                    }
-                },
-                headers={"Content-Type": "application/json"}
-            ) as response:
-                if response.status == 200:
-                    data = await response.json()
-                    print(f"\nFull GraphQL response for {lot_id}:")
-                    print(json.dumps(data, indent=2))
-                    print()
-                else:
-                    print(f"Failed with status {response.status}")
-                    print(await response.text())
-        except Exception as e:
-            print(f"Error: {e}")
-
-async def test_bid_history_response():
-    """Test bid history API to see all returned fields"""
-    print("=" * 80)
-    print("BID HISTORY API TEST")
-    print("=" * 80)
-
-    # Get a lot with bids from database
-    import sqlite3
-    from cache import CacheManager
-
-    cache = CacheManager()
-    conn = sqlite3.connect(cache.db_path)
-    cursor = conn.cursor()
-
-    # Find a lot with bids
-    cursor.execute("""
-        SELECT lot_id, url FROM lots
-        WHERE bid_count > 0
-        ORDER BY bid_count DESC
-        LIMIT 1
-    """)
-    result = cursor.fetchone()
-
-    if result:
-        lot_id, url = result
-        # Extract UUID from URL
-        import re
-        match = re.search(r'<script[^>]*id="__NEXT_DATA__"[^>]*>', url)
-        # We need to get UUID from cached page
-        cursor.execute("SELECT content FROM cache WHERE url = ?", (url,))
-        page_result = cursor.fetchone()
-
-        if page_result:
-            import zlib
-            content = zlib.decompress(page_result[0]).decode('utf-8')
-            match = re.search(r'"lot":\s*\{[^}]*"id":\s*"([^"]+)"', content)
-            if match:
-                lot_uuid = match.group(1)
-                print(f"\nTesting with lot {lot_id} (UUID: {lot_uuid})")
-
-                # Fetch bid history
-                bid_history = await fetch_bid_history(lot_uuid)
-                if bid_history:
-                    print(f"\nBid history sample (first 3 records):")
-                    for i, bid in enumerate(bid_history[:3]):
-                        print(f"\nBid {i+1}:")
-                        print(json.dumps(bid, indent=2))
-
-                    print(f"\n\nAll available fields in bid records:")
-                    if bid_history:
-                        all_keys = set()
-                        for bid in bid_history:
-                            all_keys.update(bid.keys())
-                        for key in sorted(all_keys):
-                            print(f"  - {key}")
-                else:
-                    print("No bid history found")
-
-    conn.close()
-
-async def check_auction_api():
-    """Check if there's an auction details API"""
-    print("=" * 80)
-    print("AUCTION API EXPLORATION")
-    print("=" * 80)
-
-    auction_query = """
-    query AuctionDetails($auctionId: String!, $locale: String!, $platform: Platform!) {
-      auctionDetails(auctionId: $auctionId, locale: $locale, platform: $platform) {
-        auction {
-          id
-          title
-          description
-          startDate
-          endDate
-          firstLotEndDate
-          location {
-            city
-            countryCode
-          }
-          viewingDays {
-            city
-            countryCode
-            startDate
-            endDate
-            addressLine1
-            addressLine2
-          }
-          collectionDays {
-            city
-            countryCode
-            startDate
-            endDate
-            addressLine1
-            addressLine2
-          }
-        }
-      }
-    }
-    """
-
-    # Get an auction ID from database
-    import sqlite3
-    from cache import CacheManager
-
-    cache = CacheManager()
-    conn = sqlite3.connect(cache.db_path)
-    cursor = conn.cursor()
-
-    # Get auction ID from a lot
-    cursor.execute("SELECT DISTINCT auction_id FROM lots WHERE auction_id IS NOT NULL LIMIT 1")
-    result = cursor.fetchone()
-
-    if result:
-        auction_id = result[0]
-        print(f"\nTesting with auction {auction_id}")
-
-        async with aiohttp.ClientSession() as session:
-            try:
-                async with session.post(
-                    GRAPHQL_ENDPOINT,
-                    json={
-                        "query": auction_query,
-                        "variables": {
-                            "auctionId": auction_id,
-                            "locale": "nl_NL",
-                            "platform": "WEB"
-                        }
-                    },
-                    headers={"Content-Type": "application/json"}
-                ) as response:
-                    if response.status == 200:
-                        data = await response.json()
-                        print("\nAuction API response:")
-                        print(json.dumps(data, indent=2))
-                    else:
-                        print(f"Failed with status {response.status}")
-                        print(await response.text())
-            except Exception as e:
-                print(f"Error: {e}")
-
-    conn.close()
-
-async def main():
-    """Run all API explorations"""
-    await explore_graphql_schema()
-    await test_graphql_full_query()
-    await test_bid_history_response()
-    await check_auction_api()
-
-    print("\n" + "=" * 80)
-    print("SUMMARY: AVAILABLE DATA FIELDS")
-    print("=" * 80)
-    print("""
-    CURRENTLY CAPTURED:
-    - Lot bidding data: current_bid, starting_bid, minimum_bid, bid_count, closing_time
-    - Lot attributes: brand, model, manufacturer, year, condition, serial_number
-    - Bid history: bid_amount, bid_time, bidder_id, is_autobid
-    - Bid intelligence: first_bid_time, last_bid_time, bid_velocity, bid_increment
-    - Images: URLs and local paths
-
-    POTENTIALLY AVAILABLE (TO CHECK):
-    - Viewing/collection times with full address and date ranges
-    - Lot location details (city, country)
-    - Lot state/status
-    - Image thumbnails
-    - More detailed attributes
-
-    NOT AVAILABLE:
-    - Watch count (not exposed in API)
-    - Reserve price (not exposed in API)
-    - Estimated min/max value (not exposed in API)
-    - Bidder identities (anonymized)
-    """)
-
-if __name__ == "__main__":
-    asyncio.run(main())
--- a/explore_auction_schema.py
+++ b/explore_auction_schema.py
@@ -1,93 +0,0 @@
-#!/usr/bin/env python3
-"""Explore the actual auction schema"""
-import asyncio
-import aiohttp
-import json
-
-GRAPHQL_ENDPOINT = "https://storefront.tbauctions.com/storefront/graphql"
-
-# Try different field structures
-QUERIES = {
-    "viewingDays_simple": """
-query AuctionData($auctionId: TbaUuid!, $locale: String!, $platform: Platform!) {
-  auction(id: $auctionId, locale: $locale, platform: $platform) {
-    viewingDays {
-      city
-      countryCode
-    }
-  }
-}
-""",
-    "viewingDays_with_times": """
-query AuctionData($auctionId: TbaUuid!, $locale: String!, $platform: Platform!) {
-  auction(id: $auctionId, locale: $locale, platform: $platform) {
-    viewingDays {
-      from
-      to
-      city
-    }
-  }
-}
-""",
-    "full_auction": """
-query AuctionData($auctionId: TbaUuid!, $locale: String!, $platform: Platform!) {
-  auction(id: $auctionId, locale: $locale, platform: $platform) {
-    id
-    displayId
-    biddingStatus
-    buyersPremium
-    viewingDays {
-      city
-      countryCode
-      from
-      to
-    }
-    collectionDays {
-      city
-      countryCode
-      from
-      to
-    }
-  }
-}
-"""
-}
-
-async def test_query(name, query, auction_id):
-    variables = {
-        "auctionId": auction_id,
-        "locale": "nl",
-        "platform": "TWK"
-    }
-
-    payload = {
-        "query": query,
-        "variables": variables
-    }
-
-    async with aiohttp.ClientSession() as session:
-        async with session.post(GRAPHQL_ENDPOINT, json=payload, timeout=30) as response:
-            data = await response.json()
-
-            print(f"\n{'='*60}")
-            print(f"QUERY: {name}")
-            print(f"{'='*60}")
-
-            if 'errors' in data:
-                print("ERRORS:")
-                for error in data['errors']:
-                    print(f"  {error}")
-            else:
-                print("SUCCESS:")
-                print(json.dumps(data, indent=2))
-
-async def main():
-    # Test with the auction we know exists
-    auction_id = "9d5d9d6b-94de-4147-b523-dfa512d85dfa"
-
-    for name, query in QUERIES.items():
-        await test_query(name, query, auction_id)
-        await asyncio.sleep(0.5)
-
-if __name__ == "__main__":
-    asyncio.run(main())
--- a/export_incremental.py
+++ b/export_incremental.py
@@ -1,141 +0,0 @@
-#!/usr/bin/env python3
-"""
-Export only NEW auctions/lots that haven't been sent to server yet
-Prevents UNIQUE constraint errors on server import
-"""
-
-import sqlite3
-import json
-import csv
-from datetime import datetime
-from pathlib import Path
-
-DB_PATH = "C:/mnt/okcomputer/output/cache.db"
-OUTPUT_DIR = Path("C:/mnt/okcomputer/output")
-SYNC_STATE_FILE = OUTPUT_DIR / ".server_sync_state"
-
-def get_last_export_timestamp():
-    """Get timestamp of last successful export to server"""
-    if SYNC_STATE_FILE.exists():
-        return int(SYNC_STATE_FILE.read_text().strip())
-    return 0
-
-def save_export_timestamp(timestamp: int):
-    """Save timestamp of successful export"""
-    SYNC_STATE_FILE.write_text(str(timestamp))
-
-def export_new_data():
-    """Export only records that are NEW since last server import"""
-    conn = sqlite3.connect(DB_PATH)
-    conn.row_factory = sqlite3.Row
-    cursor = conn.cursor()
-
-    last_export = get_last_export_timestamp()
-    current_time = int(datetime.now().timestamp())
-
-    print("="*60)
-    print("INCREMENTAL EXPORT FOR SERVER")
-    print("="*60)
-    print(f"Last export: {datetime.fromtimestamp(last_export).strftime('%Y-%m-%d %H:%M:%S') if last_export else 'Never (will export ALL)'}")
-    print(f"Current time: {datetime.fromtimestamp(current_time).strftime('%Y-%m-%d %H:%M:%S')}")
-    print()
-
-    # Get new auctions (discovered_at > last_export)
-    if last_export == 0:
-        # First run: export all
-        cursor.execute("SELECT * FROM auctions ORDER BY auction_id")
-    else:
-        # Subsequent runs: only new ones
-        cursor.execute("""
-            SELECT * FROM auctions
-            WHERE discovered_at > ?
-            ORDER BY auction_id
-        """, (last_export,))
-
-    new_auctions = [dict(row) for row in cursor.fetchall()]
-
-    # Get new lots (scraped_at_timestamp > last_export)
-    if last_export == 0:
-        cursor.execute("SELECT * FROM lots ORDER BY lot_id")
-    else:
-        cursor.execute("""
-            SELECT * FROM lots
-            WHERE scraped_at_timestamp > ?
-            ORDER BY lot_id
-        """, (last_export,))
-
-    new_lots = [dict(row) for row in cursor.fetchall()]
-
-    conn.close()
-
-    # Export to server-ready files
-    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
-    files_created = []
-
-    # Export auctions
-    if new_auctions:
-        auctions_csv = OUTPUT_DIR / f'auctions_{timestamp}.csv'
-        auctions_json = OUTPUT_DIR / f'auctions_{timestamp}.json'
-
-        with open(auctions_csv, 'w', newline='', encoding='utf-8') as f:
-            writer = csv.DictWriter(f, fieldnames=new_auctions[0].keys())
-            writer.writeheader()
-            writer.writerows(new_auctions)
-
-        with open(auctions_json, 'w', encoding='utf-8') as f:
-            json.dump(new_auctions, f, indent=2, ensure_ascii=False)
-
-        files_created.extend([auctions_csv, auctions_json])
-        print(f"✓ Exported {len(new_auctions)} auctions")
-        print(f"  CSV: {auctions_csv}")
-        print(f"  JSON: {auctions_json}")
-    else:
-        print("✓ No new auctions to export")
-
-    # Export lots
-    if new_lots:
-        lots_csv = OUTPUT_DIR / f'lots_{timestamp}.csv'
-        lots_json = OUTPUT_DIR / f'lots_{timestamp}.json'
-
-        with open(lots_csv, 'w', newline='', encoding='utf-8') as f:
-            writer = csv.DictWriter(f, fieldnames=new_lots[0].keys())
-            writer.writeheader()
-            writer.writerows(new_lots)
-
-        with open(lots_json, 'w', encoding='utf-8') as f:
-            json.dump(new_lots, f, indent=2, ensure_ascii=False)
-
-        files_created.extend([lots_csv, lots_json])
-        print(f"✓ Exported {len(new_lots)} lots")
-        print(f"  CSV: {lots_csv}")
-        print(f"  JSON: {lots_json}")
-    else:
-        print("✓ No new lots to export")
-
-    # Save sync state
-    if new_auctions or new_lots:
-        save_export_timestamp(current_time)
-        print()
-        print("="*60)
-        print("EXPORT COMPLETE")
-        print("="*60)
-        print(f"New auctions: {len(new_auctions)}")
-        print(f"New lots: {len(new_lots)}")
-        print()
-        print("Next export will only include records newer than:")
-        print(f"  {datetime.fromtimestamp(current_time).strftime('%Y-%m-%d %H:%M:%S')}")
-    else:
-        print()
-        print("="*60)
-        print("NOTHING TO EXPORT")
-        print("="*60)
-        print("All data already exported to server")
-
-    return {
-        'auctions': len(new_auctions),
-        'lots': len(new_lots),
-        'files': [str(f) for f in files_created]
-    }
-
-if __name__ == "__main__":
-    export_new_data()
--- a/extract_graphql_query.py
+++ b/extract_graphql_query.py
@@ -1,53 +0,0 @@
-#!/usr/bin/env python3
-"""Extract the GraphQL query being used"""
-import asyncio
-import json
-from playwright.async_api import async_playwright
-
-async def main():
-    async with async_playwright() as p:
-        browser = await p.chromium.launch(headless=True)
-        page = await browser.new_page()
-
-        graphql_requests = []
-
-        async def capture_request(request):
-            if 'graphql' in request.url:
-                graphql_requests.append({
-                    'url': request.url,
-                    'method': request.method,
-                    'post_data': request.post_data,
-                    'headers': dict(request.headers)
-                })
-
-        page.on('request', capture_request)
-
-        await page.goto("https://www.troostwijkauctions.com/l/%25282x%2529-duo-bureau-160x168-cm-A1-28505-5", wait_until='networkidle')
-        await asyncio.sleep(2)
-
-        print(f"Captured {len(graphql_requests)} GraphQL requests\n")
-
-        for i, req in enumerate(graphql_requests):
-            print(f"{'='*60}")
-            print(f"REQUEST #{i+1}")
-            print(f"{'='*60}")
-            print(f"URL: {req['url']}")
-            print(f"Method: {req['method']}")
-
-            if req['post_data']:
-                try:
-                    data = json.loads(req['post_data'])
-                    print(f"\nQuery Name: {data.get('operationName', 'N/A')}")
-                    print(f"\nVariables:")
-                    print(json.dumps(data.get('variables', {}), indent=2))
-                    print(f"\nQuery:")
-                    print(data.get('query', '')[:1000])
-                except:
-                    print(f"\nPOST Data: {req['post_data'][:500]}")
-
-            print()
-
-        await browser.close()
-
-if __name__ == "__main__":
-    asyncio.run(main())
--- a/extract_viewing_from_html.py
+++ b/extract_viewing_from_html.py
@@ -1,45 +0,0 @@
-#!/usr/bin/env python3
-"""Find viewing/pickup in actual HTML"""
-import asyncio
-from playwright.async_api import async_playwright
-import re
-
-async def main():
-    async with async_playwright() as p:
-        browser = await p.chromium.launch(headless=True)
-        page = await browser.new_page()
-
-        # Try a lot that should have viewing times
-        await page.goto("https://www.troostwijkauctions.com/l/woonunit-type-tp-4-b-6m-nr-102-A1-37889-102", wait_until='networkidle')
-
-        # Get text content
-        text_content = await page.evaluate("document.body.innerText")
-
-        print("Searching for viewing/pickup patterns...\n")
-
-        # Look for "Bezichtigingen" section
-        lines = text_content.split('\n')
-        for i, line in enumerate(lines):
-            if 'bezichtig' in line.lower() or 'viewing' in line.lower():
-                # Print surrounding context
-                context = lines[max(0, i-1):min(len(lines), i+5)]
-                print("FOUND Bezichtigingen:")
-                for c in context:
-                    print(f"  {c}")
-                print()
-                break
-
-        # Look for "Ophalen" section
-        for i, line in enumerate(lines):
-            if 'ophalen' in line.lower() or 'collection' in line.lower() or 'pickup' in line.lower():
-                context = lines[max(0, i-1):min(len(lines), i+5)]
-                print("FOUND Ophalen:")
-                for c in context:
-                    print(f"  {c}")
-                print()
-                break
-
-        await browser.close()
-
-if __name__ == "__main__":
-    asyncio.run(main())
--- a/fetch_missing_bid_history.py
+++ b/fetch_missing_bid_history.py
@@ -1,166 +0,0 @@
-"""
-Fetch bid history for existing lots that have bids but no bid history records.
-Reads cached lot pages to get lot UUIDs, then calls bid history API.
-"""
-import sys
-import os
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
-
-import asyncio
-from cache import CacheManager
-import sqlite3
-import zlib
-import json
-import re
-from bid_history_client import fetch_bid_history, parse_bid_history
-
-async def fetch_missing_bid_history():
-    """Fetch bid history for lots that have bids but no history records"""
-    cache = CacheManager()
-    conn = sqlite3.connect(cache.db_path)
-    cursor = conn.cursor()
-
-    # Get lots with bids but no bid history
-    cursor.execute("""
-        SELECT l.lot_id, l.bid_count
-        FROM lots l
-        WHERE l.bid_count > 0
-        AND l.lot_id NOT IN (SELECT DISTINCT lot_id FROM bid_history)
-        ORDER BY l.bid_count DESC
-    """)
-
-    lots_to_fetch = cursor.fetchall()
-    print(f"Found {len(lots_to_fetch)} lots with bids but no bid history")
-
-    if not lots_to_fetch:
-        print("No lots to process!")
-        conn.close()
-        return
-
-    # Build mapping from lot_id to lot UUID from cached pages
-    print("Building lot_id -> UUID mapping from cache...")
-
-    cursor.execute("""
-        SELECT url, content
-        FROM cache
-        WHERE url LIKE '%/l/%'
-    """)
-
-    lot_id_to_uuid = {}
-    total_cached = 0
-
-    for url, content_blob in cursor:
-        total_cached += 1
-
-        if total_cached % 100 == 0:
-            print(f"Processed {total_cached} cached pages...", end='\r')
-
-        try:
-            content = zlib.decompress(content_blob).decode('utf-8')
-            match = re.search(r'<script[^>]*id="__NEXT_DATA__"[^>]*>(.+?)</script>', content, re.DOTALL)
-
-            if not match:
-                continue
-
-            data = json.loads(match.group(1))
-            lot = data.get('props', {}).get('pageProps', {}).get('lot', {})
-
-            if not lot:
-                continue
-
-            lot_display_id = lot.get('displayId')
-            lot_uuid = lot.get('id')
-
-            if lot_display_id and lot_uuid:
-                lot_id_to_uuid[lot_display_id] = lot_uuid
-
-        except:
-            continue
-
-    print(f"\n\nBuilt UUID mapping for {len(lot_id_to_uuid)} lots")
-
-    # Fetch bid history for each lot
-    print("\nFetching bid history from API...")
-
-    fetched = 0
-    failed = 0
-    no_uuid = 0
-
-    for lot_id, bid_count in lots_to_fetch:
-        lot_uuid = lot_id_to_uuid.get(lot_id)
-
-        if not lot_uuid:
-            no_uuid += 1
-            continue
-
-        try:
-            print(f"\nFetching bid history for {lot_id} ({bid_count} bids)...")
-            bid_history = await fetch_bid_history(lot_uuid)
-
-            if bid_history:
-                bid_data = parse_bid_history(bid_history, lot_id)
-
-                # Update lots table with bid intelligence
-                cursor.execute("""
-                    UPDATE lots
-                    SET first_bid_time = ?,
-                        last_bid_time = ?,
-                        bid_velocity = ?
-                    WHERE lot_id = ?
-                """, (
-                    bid_data['first_bid_time'],
-                    bid_data['last_bid_time'],
-                    bid_data['bid_velocity'],
-                    lot_id
-                ))
-
-                # Save bid history records
-                cache.save_bid_history(lot_id, bid_data['bid_records'])
-
-                fetched += 1
-                print(f"  Saved {len(bid_data['bid_records'])} bid records")
-                print(f"  Bid velocity: {bid_data['bid_velocity']:.2f} bids/hour")
-
-                # Commit every 10 lots
-                if fetched % 10 == 0:
-                    conn.commit()
-                    print(f"\nProgress: {fetched}/{len(lots_to_fetch)} lots processed...")
-
-                # Rate limit to be respectful
-                await asyncio.sleep(0.5)
-
-            else:
-                failed += 1
-
-        except Exception as e:
-            print(f"  Error fetching bid history for {lot_id}: {e}")
-            failed += 1
-            continue
-
-    conn.commit()
-
-    print(f"\n\nComplete!")
-    print(f"Total lots to process: {len(lots_to_fetch)}")
-    print(f"Successfully fetched: {fetched}")
-    print(f"Failed: {failed}")
-    print(f"No UUID found: {no_uuid}")
-
-    # Verify fix
-    cursor.execute("""
-        SELECT COUNT(DISTINCT lot_id) FROM bid_history
-    """)
-    lots_with_history = cursor.fetchone()[0]
-
-    cursor.execute("""
-        SELECT COUNT(*) FROM lots WHERE bid_count > 0
-    """)
-    lots_with_bids = cursor.fetchone()[0]
-
-    print(f"\nLots with bids: {lots_with_bids}")
-    print(f"Lots with bid history: {lots_with_history}")
-    print(f"Coverage: {lots_with_history/lots_with_bids*100:.1f}%")
-
-    conn.close()
-
-if __name__ == "__main__":
-    asyncio.run(fetch_missing_bid_history())
--- a/find_api_endpoint.py
+++ b/find_api_endpoint.py
@@ -1,64 +0,0 @@
-#!/usr/bin/env python3
-"""Find the API endpoint by monitoring network requests"""
-import asyncio
-import json
-from playwright.async_api import async_playwright
-
-async def main():
-    async with async_playwright() as p:
-        browser = await p.chromium.launch(headless=True)
-        page = await browser.new_page()
-
-        requests = []
-        responses = []
-
-        async def log_request(request):
-            if any(term in request.url for term in ['api', 'graphql', 'lot', 'auction', 'bid']):
-                requests.append({
-                    'url': request.url,
-                    'method': request.method,
-                    'headers': dict(request.headers),
-                    'post_data': request.post_data
-                })
-
-        async def log_response(response):
-            if any(term in response.url for term in ['api', 'graphql', 'lot', 'auction', 'bid']):
-                try:
-                    body = await response.text()
-                    responses.append({
-                        'url': response.url,
-                        'status': response.status,
-                        'body': body[:1000]
-                    })
-                except:
-                    pass
-
-        page.on('request', log_request)
-        page.on('response', log_response)
-
-        print("Loading lot page...")
-        await page.goto("https://www.troostwijkauctions.com/l/woonunit-type-tp-4-b-6m-nr-102-A1-37889-102", wait_until='networkidle')
-
-        # Wait for dynamic content
-        await asyncio.sleep(3)
-
-        print(f"\nFound {len(requests)} relevant requests")
-        print(f"Found {len(responses)} relevant responses\n")
-
-        for req in requests[:10]:
-            print(f"REQUEST: {req['method']} {req['url']}")
-            if req['post_data']:
-                print(f"  POST DATA: {req['post_data'][:200]}")
-
-        print("\n" + "="*60 + "\n")
-
-        for resp in responses[:10]:
-            print(f"RESPONSE: {resp['url']}")
-            print(f"  Status: {resp['status']}")
-            print(f"  Body: {resp['body'][:300]}")
-            print()
-
-        await browser.close()
-
-if __name__ == "__main__":
-    asyncio.run(main())
--- a/find_api_valid_lot.py
+++ b/find_api_valid_lot.py
@@ -1,70 +0,0 @@
-#!/usr/bin/env python3
-"""Find API endpoint using a valid lot from database"""
-import asyncio
-import sqlite3
-from playwright.async_api import async_playwright
-
-# Get a valid lot URL
-conn = sqlite3.connect('/mnt/okcomputer/output/cache.db')
-cursor = conn.execute("SELECT url FROM lots WHERE url LIKE '%/l/%' LIMIT 5")
-lot_urls = [row[0] for row in cursor.fetchall()]
-conn.close()
-
-async def main():
-    async with async_playwright() as p:
-        browser = await p.chromium.launch(headless=True)
-        page = await browser.new_page()
-
-        api_calls = []
-
-        async def log_response(response):
-            url = response.url
-            # Look for API calls
-            if ('api' in url.lower() or 'graphql' in url.lower() or
-                '/v2/' in url or '/v3/' in url or '/v4/' in url or
-                'query' in url.lower() or 'mutation' in url.lower()):
-                try:
-                    body = await response.text()
-                    api_calls.append({
-                        'url': url,
-                        'status': response.status,
-                        'body': body
-                    })
-                    print(f"\nAPI: {url}")
-                except:
-                    pass
-
-        page.on('response', log_response)
-
-        for lot_url in lot_urls[:2]:
-            print(f"\n{'='*60}")
-            print(f"Loading: {lot_url}")
-            print(f"{'='*60}")
-
-            try:
-                await page.goto(lot_url, wait_until='networkidle', timeout=30000)
-                await asyncio.sleep(2)
-
-                # Check if page has bid info
-                content = await page.content()
-                if 'currentBid' in content or 'Current bid' in content or 'Huidig bod' in content:
-                    print("[+] Page contains bid information")
-                    break
-            except Exception as e:
-                print(f"[!] Error: {e}")
-                continue
-
-        print(f"\n\n{'='*60}")
-        print(f"CAPTURED {len(api_calls)} API CALLS")
-        print(f"{'='*60}")
-
-        for call in api_calls:
-            print(f"\n{call['url']}")
-            print(f"Status: {call['status']}")
-            if 'json' in call['body'][:100].lower() or call['body'].startswith('{'):
-                print(f"Body (first 500 chars): {call['body'][:500]}")
-
-        await browser.close()
-
-if __name__ == "__main__":
-    asyncio.run(main())
--- a/find_auction_with_lots.py
+++ b/find_auction_with_lots.py
@@ -1,48 +0,0 @@
-#!/usr/bin/env python3
-"""Find an auction page with lots data"""
-import sqlite3
-import zlib
-import json
-import re
-
-conn = sqlite3.connect('/mnt/okcomputer/output/cache.db')
-
-cursor = conn.execute("""
-    SELECT url, content
-    FROM cache
-    WHERE url LIKE '%/a/%'
-""")
-
-for row in cursor:
-    url, content_blob = row
-    content = zlib.decompress(content_blob).decode('utf-8')
-
-    match = re.search(r'<script[^>]*id="__NEXT_DATA__"[^>]*>(.+?)</script>', content, re.DOTALL)
-    if not match:
-        continue
-
-    data = json.loads(match.group(1))
-    page_props = data.get('props', {}).get('pageProps', {})
-
-    if 'auction' in page_props:
-        auction = page_props['auction']
-        lots = auction.get('lots', [])
-
-        if lots and len(lots) > 0:
-            print(f"Found auction with {len(lots)} lots: {url}\n")
-
-            lot = lots[0]
-            print(f"SAMPLE LOT FROM AUCTION.LOTS[]:")
-            print(f"  displayId: {lot.get('displayId')}")
-            print(f"  title: {lot.get('title', '')[:50]}...")
-            print(f"  urlSlug: {lot.get('urlSlug')}")
-            print(f"\nBIDDING FIELDS:")
-            for key in ['currentBid', 'highestBid', 'startingBid', 'minimumBidAmount', 'bidCount', 'numberOfBids']:
-                print(f"  {key}: {lot.get(key)}")
-            print(f"\nTIMING FIELDS:")
-            for key in ['endDate', 'startDate', 'closingTime']:
-                print(f"  {key}: {lot.get(key)}")
-            print(f"\nALL KEYS: {list(lot.keys())[:30]}...")
-            break
-
-conn.close()
--- a/fix_auctions_table.py
+++ b/fix_auctions_table.py
@@ -1,155 +0,0 @@
-"""
-Fix auctions table by replacing with correct data from cached auction pages.
-The auctions table currently has wrong auction_ids (numeric instead of displayId).
-"""
-import sys
-import os
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
-
-from cache import CacheManager
-import sqlite3
-import zlib
-import json
-import re
-from datetime import datetime
-
-def fix_auctions_table():
-    """Rebuild auctions table from cached auction pages"""
-    cache = CacheManager()
-    conn = sqlite3.connect(cache.db_path)
-    cursor = conn.cursor()
-
-    # Clear existing auctions table
-    print("Clearing auctions table...")
-    cursor.execute("DELETE FROM auctions")
-    conn.commit()
-
-    # Get all auction pages from cache
-    cursor.execute("""
-        SELECT url, content
-        FROM cache
-        WHERE url LIKE '%/a/%'
-    """)
-
-    auction_pages = cursor.fetchall()
-    print(f"Found {len(auction_pages)} auction pages in cache")
-
-    total = 0
-    inserted = 0
-    errors = 0
-
-    print("Extracting auction data from cached pages...")
-
-    for url, content_blob in auction_pages:
-        total += 1
-
-        if total % 10 == 0:
-            print(f"Processed {total}/{len(auction_pages)}...", end='\r')
-
-        try:
-            # Decompress and parse __NEXT_DATA__
-            content = zlib.decompress(content_blob).decode('utf-8')
-            match = re.search(r'<script[^>]*id="__NEXT_DATA__"[^>]*>(.+?)</script>', content, re.DOTALL)
-
-            if not match:
-                errors += 1
-                continue
-
-            data = json.loads(match.group(1))
-            page_props = data.get('props', {}).get('pageProps', {})
-            auction = page_props.get('auction', {})
-
-            if not auction:
-                errors += 1
-                continue
-
-            # Extract auction data
-            auction_id = auction.get('displayId')
-            if not auction_id:
-                errors += 1
-                continue
-
-            title = auction.get('name', '')
-
-            # Get location
-            location = ''
-            viewing_days = auction.get('viewingDays', [])
-            if viewing_days and isinstance(viewing_days, list) and len(viewing_days) > 0:
-                loc = viewing_days[0]
-                city = loc.get('city', '')
-                country = loc.get('countryCode', '').upper()
-                location = f"{city}, {country}" if city and country else (city or country)
-
-            lots_count = auction.get('lotCount', 0)
-
-            # Get first lot closing time
-            first_lot_closing = ''
-            min_end_date = auction.get('minEndDate', '')
-            if min_end_date:
-                # Format timestamp
-                try:
-                    dt = datetime.fromisoformat(min_end_date.replace('Z', '+00:00'))
-                    first_lot_closing = dt.strftime('%Y-%m-%d %H:%M:%S')
-                except:
-                    first_lot_closing = min_end_date
-
-            scraped_at = datetime.now().isoformat()
-
-            # Insert into auctions table
-            cursor.execute("""
-                INSERT OR REPLACE INTO auctions
-                (auction_id, url, title, location, lots_count, first_lot_closing_time, scraped_at)
-                VALUES (?, ?, ?, ?, ?, ?, ?)
-            """, (auction_id, url, title, location, lots_count, first_lot_closing, scraped_at))
-
-            inserted += 1
-
-        except Exception as e:
-            errors += 1
-            continue
-
-    conn.commit()
-
-    print(f"\n\nComplete!")
-    print(f"Total auction pages processed: {total}")
-    print(f"Auctions inserted: {inserted}")
-    print(f"Errors: {errors}")
-
-    # Verify fix
-    cursor.execute("SELECT COUNT(*) FROM auctions")
-    total_auctions = cursor.fetchone()[0]
-    print(f"\nTotal auctions in table: {total_auctions}")
-
-    cursor.execute("""
-        SELECT COUNT(*) FROM lots
-        WHERE auction_id NOT IN (SELECT auction_id FROM auctions)
-        AND auction_id != ''
-    """)
-    orphaned = cursor.fetchone()[0]
-
-    print(f"Orphaned lots remaining: {orphaned}")
-
-    if orphaned == 0:
-        print("\nSUCCESS! All lots now have matching auctions!")
-    else:
-        # Show sample of remaining orphans
-        cursor.execute("""
-            SELECT lot_id, auction_id FROM lots
-            WHERE auction_id NOT IN (SELECT auction_id FROM auctions)
-            AND auction_id != ''
-            LIMIT 5
-        """)
-        print("\nSample remaining orphaned lots:")
-        for lot_id, auction_id in cursor.fetchall():
-            print(f"  {lot_id} -> auction_id: {auction_id}")
-
-        # Show what auction_ids we do have
-        cursor.execute("SELECT auction_id FROM auctions LIMIT 10")
-        print("\nSample auction_ids in auctions table:")
-        for row in cursor.fetchall():
-            print(f"  {row[0]}")
-
-    conn.close()
-
-if __name__ == "__main__":
-    fix_auctions_table()
--- a/fix_orphaned_lots.py
+++ b/fix_orphaned_lots.py
@@ -1,136 +0,0 @@
-"""
-Fix orphaned lots by updating auction_id from UUID to displayId.
-This migration reads cached lot pages and extracts the correct auction displayId.
-"""
-import sys
-import os
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
-
-from cache import CacheManager
-import sqlite3
-import zlib
-import json
-import re
-
-def fix_orphaned_lots():
-    """Update lot auction_id from UUID to auction displayId"""
-    cache = CacheManager()
-    conn = sqlite3.connect(cache.db_path)
-    cursor = conn.cursor()
-
-    # Get all lots that need fixing (have UUID auction_id)
-    cursor.execute("""
-        SELECT l.lot_id, l.auction_id
-        FROM lots l
-        WHERE length(l.auction_id) > 20  -- UUID is longer than displayId like "A1-12345"
-    """)
-
-    lots_to_fix = {lot_id: auction_uuid for lot_id, auction_uuid in cursor.fetchall()}
-    print(f"Found {len(lots_to_fix)} lots with UUID auction_id that need fixing")
-
-    if not lots_to_fix:
-        print("No lots to fix!")
-        conn.close()
-        return
-
-    # Build mapping from lot displayId to auction displayId from cached pages
-    print("Building lot displayId -> auction displayId mapping from cache...")
-
-    cursor.execute("""
-        SELECT url, content
-        FROM cache
-        WHERE url LIKE '%/l/%'
-    """)
-
-    lot_to_auction_map = {}
-    total = 0
-    errors = 0
-
-    for url, content_blob in cursor:
-        total += 1
-
-        if total % 100 == 0:
-            print(f"Processing cached pages... {total}", end='\r')
-
-        try:
-            # Decompress and parse __NEXT_DATA__
-            content = zlib.decompress(content_blob).decode('utf-8')
-            match = re.search(r'<script[^>]*id="__NEXT_DATA__"[^>]*>(.+?)</script>', content, re.DOTALL)
-
-            if not match:
-                continue
-
-            data = json.loads(match.group(1))
-            page_props = data.get('props', {}).get('pageProps', {})
-
-            lot = page_props.get('lot', {})
-            auction = page_props.get('auction', {})
-
-            if not lot or not auction:
-                continue
-
-            lot_display_id = lot.get('displayId')
-            auction_display_id = auction.get('displayId')
-
-            if lot_display_id and auction_display_id:
-                lot_to_auction_map[lot_display_id] = auction_display_id
-
-        except Exception as e:
-            errors += 1
-            continue
-
-    print(f"\n\nBuilt mapping for {len(lot_to_auction_map)} lots")
-    print(f"Errors while parsing: {errors}")
-
-    # Now update the lots table
-    print("\nUpdating lots table...")
-    updated = 0
-    not_found = 0
-
-    for lot_id, old_auction_uuid in lots_to_fix.items():
-        if lot_id in lot_to_auction_map:
-            new_auction_id = lot_to_auction_map[lot_id]
-            cursor.execute("""
-                UPDATE lots
-                SET auction_id = ?
-                WHERE lot_id = ?
-            """, (new_auction_id, lot_id))
-            updated += 1
-        else:
-            not_found += 1
-
-        if (updated + not_found) % 100 == 0:
-            print(f"Updated: {updated}, not found: {not_found}", end='\r')
-
-    conn.commit()
-
-    print(f"\n\nComplete!")
-    print(f"Total cached pages processed: {total}")
-    print(f"Lots updated with auction displayId: {updated}")
-    print(f"Lots not found in cache: {not_found}")
-    print(f"Parse errors: {errors}")
-
-    # Verify fix
-    cursor.execute("""
-        SELECT COUNT(*) FROM lots
-        WHERE auction_id NOT IN (SELECT auction_id FROM auctions)
-    """)
-    orphaned = cursor.fetchone()[0]
-
-    print(f"\nOrphaned lots remaining: {orphaned}")
-
-    if orphaned > 0:
-        # Show sample of remaining orphans
-        cursor.execute("""
-            SELECT lot_id, auction_id FROM lots
-            WHERE auction_id NOT IN (SELECT auction_id FROM auctions)
-            LIMIT 5
-        """)
-        print("\nSample remaining orphaned lots:")
-        for lot_id, auction_id in cursor.fetchall():
-            print(f"  {lot_id} -> auction_id: {auction_id}")
-
-    conn.close()
-
-if __name__ == "__main__":
-    fix_orphaned_lots()
--- a/inspect_cached_page.py
+++ b/inspect_cached_page.py
@@ -1,69 +0,0 @@
-#!/usr/bin/env python3
-"""Extract and inspect __NEXT_DATA__ from a cached lot page"""
-import sqlite3
-import zlib
-import json
-import re
-
-conn = sqlite3.connect('/mnt/okcomputer/output/cache.db')
-
-# Get a cached auction page
-cursor = conn.execute("""
-    SELECT url, content
-    FROM cache
-    WHERE url LIKE '%/a/%'
-    LIMIT 1
-""")
-
-row = cursor.fetchone()
-if not row:
-    print("No cached lot pages found")
-    exit(1)
-
-url, content_blob = row
-print(f"Inspecting: {url}\n")
-
-# Decompress
-content = zlib.decompress(content_blob).decode('utf-8')
-
-# Extract __NEXT_DATA__
-match = re.search(r'<script[^>]*id="__NEXT_DATA__"[^>]*>(.+?)</script>', content, re.DOTALL)
-if not match:
-    print("No __NEXT_DATA__ found")
-    exit(1)
-
-data = json.loads(match.group(1))
-page_props = data.get('props', {}).get('pageProps', {})
-
-if 'auction' in page_props:
-    auction = page_props['auction']
-    print("AUCTION DATA STRUCTURE:")
-    print("=" * 60)
-    print(f"displayId: {auction.get('displayId')}")
-    print(f"name: {auction.get('name', '')[:50]}...")
-    print(f"lots count: {len(auction.get('lots', []))}")
-
-    if auction.get('lots'):
-        lot = auction['lots'][0]
-        print(f"\nFIRST LOT STRUCTURE:")
-        print(f"  displayId: {lot.get('displayId')}")
-        print(f"  title: {lot.get('title', '')[:50]}...")
-        print(f"\n  BIDDING:")
-        print(f"    currentBid: {lot.get('currentBid')}")
-        print(f"    highestBid: {lot.get('highestBid')}")
-        print(f"    startingBid: {lot.get('startingBid')}")
-        print(f"    minimumBidAmount: {lot.get('minimumBidAmount')}")
-        print(f"    bidCount: {lot.get('bidCount')}")
-        print(f"    numberOfBids: {lot.get('numberOfBids')}")
-        print(f"  TIMING:")
-        print(f"    endDate: {lot.get('endDate')}")
-        print(f"    startDate: {lot.get('startDate')}")
-        print(f"    closingTime: {lot.get('closingTime')}")
-        print(f"  ALL KEYS: {list(lot.keys())}")
-
-    print(f"\nAUCTION TIMING:")
-    print(f"  minEndDate: {auction.get('minEndDate')}")
-    print(f"  maxEndDate: {auction.get('maxEndDate')}")
-    print(f"  ALL KEYS: {list(auction.keys())}")
-
-conn.close()
--- a/inspect_lot_html.py
+++ b/inspect_lot_html.py
@@ -1,49 +0,0 @@
-#!/usr/bin/env python3
-"""Inspect a lot page HTML to find viewing_time and pickup_date"""
-import asyncio
-from playwright.async_api import async_playwright
-
-async def main():
-    async with async_playwright() as p:
-        browser = await p.chromium.launch(headless=True)
-        page = await browser.new_page()
-
-        # Use the known lot
-        await page.goto("https://www.troostwijkauctions.com/l/woonunit-type-tp-4-b-6m-nr-102-A1-37889-102", wait_until='networkidle')
-        content = await page.content()
-
-        print("Searching for patterns...")
-        print("="*60)
-
-        # Search for viewing time patterns
-        import re
-        patterns = {
-            'Bezichtigingen': r'Bezichtigingen.*?(\d{2}\s+\w{3}\s+\d{4}\s+van\s+\d{2}:\d{2}\s+tot\s+\d{2}:\d{2})',
-            'viewing': r'(?i)viewing.*?(\d{2}\s+\w{3}\s+\d{4}\s+van\s+\d{2}:\d{2}\s+tot\s+\d{2}:\d{2})',
-            'Ophalen': r'Ophalen.*?(\d{2}\s+\w{3}\s+\d{4}\s+van\s+\d{2}:\d{2}\s+tot\s+\d{2}:\d{2})',
-            'pickup': r'(?i)pickup.*?(\d{2}\s+\w{3}\s+\d{4}\s+van\s+\d{2}:\d{2}\s+tot\s+\d{2}:\d{2})',
-            'Status': r'Status\s+([^<]+)',
-        }
-
-        for name, pattern in patterns.items():
-            matches = re.findall(pattern, content, re.DOTALL | re.MULTILINE)
-            if matches:
-                print(f"\n{name}:")
-                for match in matches[:3]:
-                    print(f"  {match[:200]}")
-
-        # Also look for structured data
-        print("\n\nSearching for 'Bezichtigingen' section:")
-        bez_match = re.search(r'Bezichtigingen.*?<.*?>(.*?)</.*?>', content, re.DOTALL)
-        if bez_match:
-            print(bez_match.group(0)[:500])
-
-        print("\n\nSearching for 'Ophalen' section:")
-        oph_match = re.search(r'Ophalen.*?<.*?>(.*?)</.*?>', content, re.DOTALL)
-        if oph_match:
-            print(oph_match.group(0)[:500])
-
-        await browser.close()
-
-if __name__ == "__main__":
-    asyncio.run(main())
--- a/intercept_api.py
+++ b/intercept_api.py
@@ -1,45 +0,0 @@
-#!/usr/bin/env python3
-"""Intercept API calls to find where lot data comes from"""
-import asyncio
-import json
-from playwright.async_api import async_playwright
-
-async def main():
-    async with async_playwright() as p:
-        browser = await p.chromium.launch(headless=False)
-        page = await browser.new_page()
-
-        # Track API calls
-        api_calls = []
-
-        async def handle_response(response):
-            if 'api' in response.url.lower() or 'graphql' in response.url.lower():
-                try:
-                    body = await response.json()
-                    api_calls.append({
-                        'url': response.url,
-                        'status': response.status,
-                        'body': body
-                    })
-                    print(f"\nAPI CALL: {response.url}")
-                    print(f"Status: {response.status}")
-                    if 'lot' in response.url.lower() or 'auction' in response.url.lower():
-                        print(f"Body preview: {json.dumps(body, indent=2)[:500]}")
-                except:
-                    pass
-
-        page.on('response', handle_response)
-
-        # Visit auction page
-        print("Loading auction page...")
-        await page.goto("https://www.troostwijkauctions.com/a/woonunits-generatoren-reinigingsmachines-en-zakelijke-goederen-A1-37889", wait_until='networkidle')
-
-        # Wait a bit for lazy loading
-        await asyncio.sleep(5)
-
-        print(f"\n\nCaptured {len(api_calls)} API calls")
-
-        await browser.close()
-
-if __name__ == "__main__":
-    asyncio.run(main())
--- a/migrate_existing_data.py
+++ b/migrate_existing_data.py
@@ -1,148 +0,0 @@
-#!/usr/bin/env python3
-"""
-Migrate existing lot data to extract missing enriched fields
-"""
-import sqlite3
-import json
-import re
-from datetime import datetime
-import sys
-sys.path.insert(0, 'src')
-
-from graphql_client import extract_enriched_attributes, extract_attributes_from_lot_json
-
-DB_PATH = "/mnt/okcomputer/output/cache.db"
-
-def migrate_lot_attributes():
-    """Extract attributes from cached lot pages"""
-    print("="*60)
-    print("MIGRATING EXISTING LOT DATA")
-    print("="*60)
-
-    conn = sqlite3.connect(DB_PATH)
-
-    # Get cached lot pages
-    cursor = conn.execute("""
-        SELECT url, content, timestamp
-        FROM cache
-        WHERE url LIKE '%/l/%'
-        ORDER BY timestamp DESC
-    """)
-
-    import zlib
-    updated_count = 0
-
-    for url, content_blob, timestamp in cursor:
-        try:
-            # Get lot_id from URL
-            lot_id_match = re.search(r'/l/.*?([A-Z]\d+-\d+-\d+)', url)
-            if not lot_id_match:
-                lot_id_match = re.search(r'([A-Z]\d+-\d+-\d+)', url)
-            if not lot_id_match:
-                continue
-
-            lot_id = lot_id_match.group(1)
-
-            # Check if lot exists in database
-            lot_cursor = conn.execute("SELECT lot_id, title, description FROM lots WHERE lot_id = ?", (lot_id,))
-            lot_row = lot_cursor.fetchone()
-            if not lot_row:
-                continue
-
-            _, title, description = lot_row
-
-            # Decompress and parse __NEXT_DATA__
-            content = zlib.decompress(content_blob).decode('utf-8')
-            match = re.search(r'<script[^>]*id="__NEXT_DATA__"[^>]*>(.+?)</script>', content, re.DOTALL)
-            if not match:
-                continue
-
-            data = json.loads(match.group(1))
-            lot_json = data.get('props', {}).get('pageProps', {}).get('lot', {})
-            if not lot_json:
-                continue
-
-            # Extract basic attributes
-            attrs = extract_attributes_from_lot_json(lot_json)
-
-            # Extract enriched attributes
-            page_data = {'title': title, 'description': description, 'brand': attrs.get('brand', '')}
-            enriched = extract_enriched_attributes(lot_json, page_data)
-
-            # Merge
-            all_attrs = {**attrs, **enriched}
-
-            # Update database
-            conn.execute("""
-                UPDATE lots
-                SET brand = ?,
-                    model = ?,
-                    attributes_json = ?,
-                    year_manufactured = ?,
-                    condition_score = ?,
-                    condition_description = ?,
-                    serial_number = ?,
-                    manufacturer = ?,
-                    damage_description = ?
-                WHERE lot_id = ?
-            """, (
-                all_attrs.get('brand', ''),
-                all_attrs.get('model', ''),
-                all_attrs.get('attributes_json', ''),
-                all_attrs.get('year_manufactured'),
-                all_attrs.get('condition_score'),
-                all_attrs.get('condition_description', ''),
-                all_attrs.get('serial_number', ''),
-                all_attrs.get('manufacturer', ''),
-                all_attrs.get('damage_description', ''),
-                lot_id
-            ))
-
-            updated_count += 1
-            if updated_count % 100 == 0:
-                print(f"  Processed {updated_count} lots...")
-                conn.commit()
-
-        except Exception as e:
-            print(f"  Error processing {url}: {e}")
-            continue
-
-    conn.commit()
-    print(f"\n✓ Updated {updated_count} lots with enriched attributes")
-
-    # Show stats
-    cursor = conn.execute("""
-        SELECT
-            COUNT(*) as total,
-            SUM(CASE WHEN year_manufactured IS NOT NULL THEN 1 ELSE 0 END) as has_year,
-            SUM(CASE WHEN condition_score IS NOT NULL THEN 1 ELSE 0 END) as has_condition,
-            SUM(CASE WHEN manufacturer != '' THEN 1 ELSE 0 END) as has_manufacturer,
-            SUM(CASE WHEN brand != '' THEN 1 ELSE 0 END) as has_brand,
-            SUM(CASE WHEN model != '' THEN 1 ELSE 0 END) as has_model
-        FROM lots
-    """)
-    stats = cursor.fetchone()
-
-    print(f"\nENRICHMENT STATISTICS:")
-    print(f"  Total lots: {stats[0]:,}")
-    print(f"  Has year: {stats[1]:,} ({100*stats[1]/stats[0]:.1f}%)")
-    print(f"  Has condition: {stats[2]:,} ({100*stats[2]/stats[0]:.1f}%)")
-    print(f"  Has manufacturer: {stats[3]:,} ({100*stats[3]/stats[0]:.1f}%)")
-    print(f"  Has brand: {stats[4]:,} ({100*stats[4]/stats[0]:.1f}%)")
-    print(f"  Has model: {stats[5]:,} ({100*stats[5]/stats[0]:.1f}%)")
-
-    conn.close()
-
-
-def main():
-    print("\nStarting migration of existing data...")
-    print(f"Database: {DB_PATH}\n")
-
-    migrate_lot_attributes()
-
-    print(f"\n{'='*60}")
-    print("MIGRATION COMPLETE")
-    print(f"{'='*60}\n")
-
-if __name__ == "__main__":
-    main()
--- a/scrape_fresh_auction.py
+++ b/scrape_fresh_auction.py
@@ -1,51 +0,0 @@
-#!/usr/bin/env python3
-"""Scrape a fresh auction page to see the lots array structure"""
-import asyncio
-import json
-import re
-from playwright.async_api import async_playwright
-
-async def main():
-    async with async_playwright() as p:
-        browser = await p.chromium.launch(headless=True)
-        page = await browser.new_page()
-
-        # Get first auction
-        await page.goto("https://www.troostwijkauctions.com/auctions", wait_until='networkidle')
-        content = await page.content()
-
-        # Find first auction link
-        match = re.search(r'href="(/a/[^"]+)"', content)
-        if not match:
-            print("No auction found")
-            return
-
-        auction_url = f"https://www.troostwijkauctions.com{match.group(1)}"
-        print(f"Scraping: {auction_url}\n")
-
-        await page.goto(auction_url, wait_until='networkidle')
-        content = await page.content()
-
-        # Extract __NEXT_DATA__
-        match = re.search(r'<script[^>]*id="__NEXT_DATA__"[^>]*>(.+?)</script>', content, re.DOTALL)
-        if not match:
-            print("No __NEXT_DATA__ found")
-            return
-
-        data = json.loads(match.group(1))
-        page_props = data.get('props', {}).get('pageProps', {})
-
-        if 'auction' in page_props:
-            auction = page_props['auction']
-            print(f"Auction: {auction.get('name', '')[:50]}...")
-            print(f"Lots in array: {len(auction.get('lots', []))}")
-
-            if auction.get('lots'):
-                lot = auction['lots'][0]
-                print(f"\nFIRST LOT:")
-                print(json.dumps(lot, indent=2)[:1500])
-
-        await browser.close()
-
-if __name__ == "__main__":
-    asyncio.run(main())
--- a/search_cached_viewing.py
+++ b/search_cached_viewing.py
@@ -1,47 +0,0 @@
-#!/usr/bin/env python3
-"""Search cached pages for viewing/pickup text"""
-import sqlite3
-import zlib
-import re
-
-conn = sqlite3.connect('/mnt/okcomputer/output/cache.db')
-
-cursor = conn.execute("""
-    SELECT url, content
-    FROM cache
-    WHERE url LIKE '%/l/%'
-    ORDER BY timestamp DESC
-    LIMIT 20
-""")
-
-for url, content_blob in cursor:
-    try:
-        content = zlib.decompress(content_blob).decode('utf-8')
-
-        # Look for viewing/pickup patterns
-        if 'bezichtig' in content.lower() or 'ophalen' in content.lower():
-            print(f"\n{'='*60}")
-            print(f"URL: {url}")
-            print(f"{'='*60}")
-
-            # Extract sections with context
-            patterns = [
-                (r'(Bezichtigingen?.*?(?:\n.*?){0,5})', 'VIEWING'),
-                (r'(Ophalen.*?(?:\n.*?){0,5})', 'PICKUP'),
-            ]
-
-            for pattern, label in patterns:
-                matches = re.findall(pattern, content, re.IGNORECASE | re.DOTALL)
-                if matches:
-                    print(f"\n{label}:")
-                    for match in matches[:1]:  # First match
-                        # Clean up HTML
-                        clean = re.sub(r'<[^>]+>', ' ', match)
-                        clean = re.sub(r'\s+', ' ', clean).strip()
-                        print(f"  {clean[:200]}")
-
-            break  # Found one, that's enough
-    except:
-        continue
-
-conn.close()
--- a/setup_windows_task.ps1
+++ b/setup_windows_task.ps1
@@ -1,47 +0,0 @@
-# PowerShell script to create Windows Task Scheduler job for Scaev Monitor
-# Run as Administrator
-
-$TaskName = "ScaevAuctionMonitor"
-$ScriptPath = "C:\vibe\scaev\src\monitor.py"
-$PythonPath = "python3"  # Adjust if needed
-$WorkingDir = "C:\vibe\scaev"
-
-# Create the action (run Python script)
-$Action = New-ScheduledTaskAction -Execute $PythonPath `
-    -Argument "$ScriptPath 30" `
-    -WorkingDirectory $WorkingDir
-
-# Trigger: On system startup
-$TriggerStartup = New-ScheduledTaskTrigger -AtStartup
-
-# Settings
-$Settings = New-ScheduledTaskSettingsSet `
-    -AllowStartIfOnBatteries `
-    -DontStopIfGoingOnBatteries `
-    -StartWhenAvailable `
-    -RestartCount 3 `
-    -RestartInterval (New-TimeSpan -Minutes 5)
-
-# Principal: Run with highest privileges
-$Principal = New-ScheduledTaskPrincipal -UserId "SYSTEM" -LogonType ServiceAccount -RunLevel Highest
-
-# Register the task
-Register-ScheduledTask `
-    -TaskName $TaskName `
-    -Action $Action `
-    -Trigger $TriggerStartup `
-    -Settings $Settings `
-    -Principal $Principal `
-    -Description "Scaev auction monitor - polls for new auctions every 30 minutes" `
-    -Force
-
-Write-Host "`nTask '$TaskName' created successfully!" -ForegroundColor Green
-Write-Host "`nTo manage the task:"
-Write-Host "  1. Open Task Scheduler (taskschd.msc)"
-Write-Host "  2. Find 'ScaevAuctionMonitor' in Task Scheduler Library"
-Write-Host "  3. Right-click to Run, Stop, or Disable"
-Write-Host "`nOr use PowerShell commands:"
-Write-Host "  Start-ScheduledTask -TaskName '$TaskName'"
-Write-Host "  Stop-ScheduledTask -TaskName '$TaskName'"
-Write-Host "  Disable-ScheduledTask -TaskName '$TaskName'"
-Write-Host "  Get-ScheduledTask -TaskName '$TaskName' | Get-ScheduledTaskInfo"
--- a/show_migration_stats.py
+++ b/show_migration_stats.py
@@ -1,49 +0,0 @@
-#!/usr/bin/env python3
-"""Show migration statistics"""
-import sqlite3
-
-conn = sqlite3.connect('/mnt/okcomputer/output/cache.db')
-
-cursor = conn.execute("""
-    SELECT
-        COUNT(*) as total,
-        SUM(CASE WHEN year_manufactured IS NOT NULL THEN 1 ELSE 0 END) as has_year,
-        SUM(CASE WHEN condition_score IS NOT NULL THEN 1 ELSE 0 END) as has_condition,
-        SUM(CASE WHEN manufacturer != '' THEN 1 ELSE 0 END) as has_manufacturer,
-        SUM(CASE WHEN brand != '' THEN 1 ELSE 0 END) as has_brand,
-        SUM(CASE WHEN model != '' THEN 1 ELSE 0 END) as has_model
-    FROM lots
-""")
-
-stats = cursor.fetchone()
-
-print("="*60)
-print("MIGRATION RESULTS")
-print("="*60)
-print(f"\nTotal lots: {stats[0]:,}")
-print(f"Has year: {stats[1]:,} ({100*stats[1]/stats[0]:.1f}%)")
-print(f"Has condition: {stats[2]:,} ({100*stats[2]/stats[0]:.1f}%)")
-print(f"Has manufacturer: {stats[3]:,} ({100*stats[3]/stats[0]:.1f}%)")
-print(f"Has brand: {stats[4]:,} ({100*stats[4]/stats[0]:.1f}%)")
-print(f"Has model: {stats[5]:,} ({100*stats[5]/stats[0]:.1f}%)")
-
-# Show sample enriched data
-print(f"\n{'='*60}")
-print("SAMPLE ENRICHED LOTS")
-print(f"{'='*60}")
-
-cursor = conn.execute("""
-    SELECT lot_id, year_manufactured, manufacturer, model, condition_score
-    FROM lots
-    WHERE year_manufactured IS NOT NULL OR manufacturer != ''
-    LIMIT 5
-""")
-
-for row in cursor:
-    print(f"\n{row[0]}:")
-    print(f"  Year: {row[1]}")
-    print(f"  Manufacturer: {row[2]}")
-    print(f"  Model: {row[3]}")
-    print(f"  Condition: {row[4]}")
-
-conn.close()
--- a/src/cache.py
+++ b/src/cache.py
@@ -19,8 +19,9 @@ class CacheManager:
        self._init_db()

    def _init_db(self):
-        """Initialize cache and data storage database"""
+        """Initialize cache and data storage database with consolidated schema"""
        with sqlite3.connect(self.db_path) as conn:
+            # Cache table
            conn.execute("""
                CREATE TABLE IF NOT EXISTS cache (
                    url TEXT PRIMARY KEY,
@@ -32,6 +33,8 @@ class CacheManager:
            conn.execute("""
                CREATE INDEX IF NOT EXISTS idx_timestamp ON cache(timestamp)
            """)
+
+            # Auctions table - consolidated schema
            conn.execute("""
                CREATE TABLE IF NOT EXISTS auctions (
                    auction_id TEXT PRIMARY KEY,
@@ -40,9 +43,18 @@ class CacheManager:
                    location TEXT,
                    lots_count INTEGER,
                    first_lot_closing_time TEXT,
-                    scraped_at TEXT
+                    scraped_at TEXT,
+                    city TEXT,
+                    country TEXT,
+                    type TEXT,
+                    lot_count INTEGER DEFAULT 0,
+                    closing_time TEXT,
+                    discovered_at INTEGER
                )
            """)
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_auctions_country ON auctions(country)")
+
+            # Lots table - consolidated schema with all fields from working database
            conn.execute("""
                CREATE TABLE IF NOT EXISTS lots (
                    lot_id TEXT PRIMARY KEY,
@@ -50,8 +62,6 @@ class CacheManager:
                    url TEXT UNIQUE,
                    title TEXT,
                    current_bid TEXT,
-                    starting_bid TEXT,
-                    minimum_bid TEXT,
                    bid_count INTEGER,
                    closing_time TEXT,
                    viewing_time TEXT,
@@ -60,9 +70,54 @@ class CacheManager:
                    description TEXT,
                    category TEXT,
                    scraped_at TEXT,
+                    sale_id INTEGER,
+                    manufacturer TEXT,
+                    type TEXT,
+                    year INTEGER,
+                    currency TEXT DEFAULT 'EUR',
+                    closing_notified INTEGER DEFAULT 0,
+                    starting_bid TEXT,
+                    minimum_bid TEXT,
+                    status TEXT,
+                    brand TEXT,
+                    model TEXT,
+                    attributes_json TEXT,
+                    first_bid_time TEXT,
+                    last_bid_time TEXT,
+                    bid_velocity REAL,
+                    bid_increment REAL,
+                    year_manufactured INTEGER,
+                    condition_score REAL,
+                    condition_description TEXT,
+                    serial_number TEXT,
+                    damage_description TEXT,
+                    followers_count INTEGER DEFAULT 0,
+                    estimated_min_price REAL,
+                    estimated_max_price REAL,
+                    lot_condition TEXT,
+                    appearance TEXT,
+                    estimated_min REAL,
+                    estimated_max REAL,
+                    next_bid_step_cents INTEGER,
+                    condition TEXT,
+                    category_path TEXT,
+                    city_location TEXT,
+                    country_code TEXT,
+                    bidding_status TEXT,
+                    packaging TEXT,
+                    quantity INTEGER,
+                    vat REAL,
+                    buyer_premium_percentage REAL,
+                    remarks TEXT,
+                    reserve_price REAL,
+                    reserve_met INTEGER,
+                    view_count INTEGER,
                    FOREIGN KEY (auction_id) REFERENCES auctions(auction_id)
                )
            """)
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_lots_sale_id ON lots(sale_id)")
+
+            # Images table
            conn.execute("""
                CREATE TABLE IF NOT EXISTS images (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -70,86 +125,28 @@ class CacheManager:
                    url TEXT,
                    local_path TEXT,
                    downloaded INTEGER DEFAULT 0,
+                    labels TEXT,
+                    processed_at INTEGER,
                    FOREIGN KEY (lot_id) REFERENCES lots(lot_id)
                )
            """)
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_images_lot_id ON images(lot_id)")

-            # Add new columns to auctions table if they don't exist
-            cursor = conn.execute("PRAGMA table_info(auctions)")
-            auction_columns = {row[1] for row in cursor.fetchall()}
+            # Remove duplicates before creating unique index
+            conn.execute("""
+                DELETE FROM images
+                WHERE id NOT IN (
+                    SELECT MIN(id)
+                    FROM images
+                    GROUP BY lot_id, url
+                )
+            """)
+            conn.execute("""
+                CREATE UNIQUE INDEX IF NOT EXISTS idx_unique_lot_url
+                ON images(lot_id, url)
+            """)

-            if 'city' not in auction_columns:
-                conn.execute("ALTER TABLE auctions ADD COLUMN city TEXT")
-            if 'country' not in auction_columns:
-                conn.execute("ALTER TABLE auctions ADD COLUMN country TEXT")
-            if 'type' not in auction_columns:
-                conn.execute("ALTER TABLE auctions ADD COLUMN type TEXT")
-            if 'lot_count' not in auction_columns:
-                conn.execute("ALTER TABLE auctions ADD COLUMN lot_count INTEGER DEFAULT 0")
-            if 'closing_time' not in auction_columns:
-                conn.execute("ALTER TABLE auctions ADD COLUMN closing_time TEXT")
-            if 'discovered_at' not in auction_columns:
-                conn.execute("ALTER TABLE auctions ADD COLUMN discovered_at INTEGER")
-
-            # Add index for country filtering
-            conn.execute("CREATE INDEX IF NOT EXISTS idx_auctions_country ON auctions(country)")
-
-            # Add new columns to lots table if they don't exist
-            cursor = conn.execute("PRAGMA table_info(lots)")
-            columns = {row[1] for row in cursor.fetchall()}
-
-            if 'starting_bid' not in columns:
-                conn.execute("ALTER TABLE lots ADD COLUMN starting_bid TEXT")
-            if 'minimum_bid' not in columns:
-                conn.execute("ALTER TABLE lots ADD COLUMN minimum_bid TEXT")
-            if 'status' not in columns:
-                conn.execute("ALTER TABLE lots ADD COLUMN status TEXT")
-            if 'brand' not in columns:
-                conn.execute("ALTER TABLE lots ADD COLUMN brand TEXT")
-            if 'model' not in columns:
-                conn.execute("ALTER TABLE lots ADD COLUMN model TEXT")
-            if 'attributes_json' not in columns:
-                conn.execute("ALTER TABLE lots ADD COLUMN attributes_json TEXT")
-
-            # Bidding intelligence fields
-            if 'first_bid_time' not in columns:
-                conn.execute("ALTER TABLE lots ADD COLUMN first_bid_time TEXT")
-            if 'last_bid_time' not in columns:
-                conn.execute("ALTER TABLE lots ADD COLUMN last_bid_time TEXT")
-            if 'bid_velocity' not in columns:
-                conn.execute("ALTER TABLE lots ADD COLUMN bid_velocity REAL")
-            if 'bid_increment' not in columns:
-                conn.execute("ALTER TABLE lots ADD COLUMN bid_increment REAL")
-
-            # Valuation intelligence fields
-            if 'year_manufactured' not in columns:
-                conn.execute("ALTER TABLE lots ADD COLUMN year_manufactured INTEGER")
-            if 'condition_score' not in columns:
-                conn.execute("ALTER TABLE lots ADD COLUMN condition_score REAL")
-            if 'condition_description' not in columns:
-                conn.execute("ALTER TABLE lots ADD COLUMN condition_description TEXT")
-            if 'serial_number' not in columns:
-                conn.execute("ALTER TABLE lots ADD COLUMN serial_number TEXT")
-            if 'manufacturer' not in columns:
-                conn.execute("ALTER TABLE lots ADD COLUMN manufacturer TEXT")
-            if 'damage_description' not in columns:
-                conn.execute("ALTER TABLE lots ADD COLUMN damage_description TEXT")
-
-            # NEW: High-value API fields
-            if 'followers_count' not in columns:
-                conn.execute("ALTER TABLE lots ADD COLUMN followers_count INTEGER DEFAULT 0")
-            if 'estimated_min_price' not in columns:
-                conn.execute("ALTER TABLE lots ADD COLUMN estimated_min_price REAL")
-            if 'estimated_max_price' not in columns:
-                conn.execute("ALTER TABLE lots ADD COLUMN estimated_max_price REAL")
-            if 'lot_condition' not in columns:
-                conn.execute("ALTER TABLE lots ADD COLUMN lot_condition TEXT")
-            if 'appearance' not in columns:
-                conn.execute("ALTER TABLE lots ADD COLUMN appearance TEXT")
-            if 'scraped_at_timestamp' not in columns:
-                conn.execute("ALTER TABLE lots ADD COLUMN scraped_at_timestamp INTEGER")
-
-            # Create bid_history table
+            # Bid history table
            conn.execute("""
                CREATE TABLE IF NOT EXISTS bid_history (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -163,33 +160,15 @@ class CacheManager:
                    FOREIGN KEY (lot_id) REFERENCES lots(lot_id)
                )
            """)
-
            conn.execute("""
                CREATE INDEX IF NOT EXISTS idx_bid_history_lot_time
                ON bid_history(lot_id, bid_time)
            """)
-
            conn.execute("""
                CREATE INDEX IF NOT EXISTS idx_bid_history_bidder
                ON bid_history(bidder_id)
            """)

-            # Remove duplicates before creating unique index
-            # Keep the row with the smallest id (first occurrence) for each (lot_id, url) pair
-            conn.execute("""
-                DELETE FROM images
-                WHERE id NOT IN (
-                    SELECT MIN(id)
-                    FROM images
-                    GROUP BY lot_id, url
-                )
-            """)
-
-            # Now create the unique index
-            conn.execute("""
-                CREATE UNIQUE INDEX IF NOT EXISTS idx_unique_lot_url
-                ON images(lot_id, url)
-            """)
            conn.commit()

    def get(self, url: str, max_age_hours: int = 24) -> Optional[Dict]:
--- a/sync_updates.py
+++ b/sync_updates.py
@@ -1,256 +0,0 @@
-#!/usr/bin/env python3
-"""
-Sync local database updates to server-compatible format
-Creates incremental exports with only NEW or UPDATED records
-"""
-
-import sqlite3
-import json
-import csv
-from datetime import datetime
-from pathlib import Path
-
-DB_PATH = "C:/mnt/okcomputer/output/cache.db"
-OUTPUT_DIR = Path("C:/mnt/okcomputer/output")
-
-def fill_missing_auction_fields():
-    """Fill in missing fields in auctions table from scraped data"""
-    conn = sqlite3.connect(DB_PATH)
-    cursor = conn.cursor()
-
-    print("Filling missing auction fields...")
-
-    # Update closing_time from first_lot_closing_time
-    cursor.execute("""
-        UPDATE auctions
-        SET closing_time = first_lot_closing_time
-        WHERE closing_time IS NULL AND first_lot_closing_time IS NOT NULL
-    """)
-    updated = cursor.rowcount
-    print(f"  ✓ Updated {updated} closing_time fields")
-
-    # Parse location to extract city and country
-    cursor.execute("""
-        SELECT auction_id, location
-        FROM auctions
-        WHERE location IS NOT NULL AND (city IS NULL OR country IS NULL)
-    """)
-    locations = cursor.fetchall()
-
-    city_updates = 0
-    for auction_id, location in locations:
-        if not location:
-            continue
-
-        # Parse "City, COUNTRY" or "City, Region, COUNTRY"
-        parts = [p.strip() for p in location.split(',')]
-        if len(parts) >= 2:
-            city = parts[0]
-            country = parts[-1]
-
-            cursor.execute("""
-                UPDATE auctions
-                SET city = ?, country = ?
-                WHERE auction_id = ?
-            """, (city, country, auction_id))
-            city_updates += 1
-
-    print(f"  ✓ Updated {city_updates} city/country fields")
-
-    # Set type to 'online' for all (Troostwijk is online platform)
-    cursor.execute("""
-        UPDATE auctions
-        SET type = 'online'
-        WHERE type IS NULL
-    """)
-    type_updates = cursor.rowcount
-    print(f"  ✓ Updated {type_updates} type fields")
-
-    conn.commit()
-    conn.close()
-
-    print(f"✓ Auction fields updated\n")
-
-def get_last_sync_timestamp():
-    """Get timestamp of last successful sync"""
-    sync_file = OUTPUT_DIR / ".last_sync"
-    if sync_file.exists():
-        return int(sync_file.read_text().strip())
-    return 0
-
-def save_sync_timestamp(timestamp: int):
-    """Save timestamp of successful sync"""
-    sync_file = OUTPUT_DIR / ".last_sync"
-    sync_file.write_text(str(timestamp))
-
-def export_incremental():
-    """Export only records that are new or updated since last sync"""
-    conn = sqlite3.connect(DB_PATH)
-    conn.row_factory = sqlite3.Row
-    cursor = conn.cursor()
-
-    last_sync = get_last_sync_timestamp()
-    current_time = int(datetime.now().timestamp())
-
-    print(f"Last sync: {datetime.fromtimestamp(last_sync).strftime('%Y-%m-%d %H:%M:%S') if last_sync else 'Never'}")
-    print(f"Current time: {datetime.fromtimestamp(current_time).strftime('%Y-%m-%d %H:%M:%S')}")
-
-    # Get new/updated auctions
-    cursor.execute("""
-        SELECT * FROM auctions
-        WHERE discovered_at IS NULL OR discovered_at > ?
-        ORDER BY auction_id
-    """, (last_sync,))
-    new_auctions = [dict(row) for row in cursor.fetchall()]
-
-    # Get new/updated lots
-    cursor.execute("""
-        SELECT * FROM lots
-        WHERE scraped_at_timestamp IS NULL OR scraped_at_timestamp > ?
-        ORDER BY lot_id
-    """, (last_sync,))
-    new_lots = [dict(row) for row in cursor.fetchall()]
-
-    conn.close()
-
-    # Export to timestamped files
-    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
-
-    results = {
-        'auctions': 0,
-        'lots': 0,
-        'files': {}
-    }
-
-    # Export auctions if any new
-    if new_auctions:
-        auctions_csv = OUTPUT_DIR / f'auctions_update_{timestamp}.csv'
-        auctions_json = OUTPUT_DIR / f'auctions_update_{timestamp}.json'
-
-        with open(auctions_csv, 'w', newline='', encoding='utf-8') as f:
-            writer = csv.DictWriter(f, fieldnames=new_auctions[0].keys())
-            writer.writeheader()
-            writer.writerows(new_auctions)
-
-        with open(auctions_json, 'w', encoding='utf-8') as f:
-            json.dump(new_auctions, f, indent=2, ensure_ascii=False)
-
-        results['auctions'] = len(new_auctions)
-        results['files']['auctions_csv'] = str(auctions_csv)
-        results['files']['auctions_json'] = str(auctions_json)
-
-        print(f"\n✓ Exported {len(new_auctions)} new/updated auctions")
-        print(f"  CSV: {auctions_csv}")
-        print(f"  JSON: {auctions_json}")
-
-    # Export lots if any new
-    if new_lots:
-        lots_csv = OUTPUT_DIR / f'lots_update_{timestamp}.csv'
-        lots_json = OUTPUT_DIR / f'lots_update_{timestamp}.json'
-
-        with open(lots_csv, 'w', newline='', encoding='utf-8') as f:
-            writer = csv.DictWriter(f, fieldnames=new_lots[0].keys())
-            writer.writeheader()
-            writer.writerows(new_lots)
-
-        with open(lots_json, 'w', encoding='utf-8') as f:
-            json.dump(new_lots, f, indent=2, ensure_ascii=False)
-
-        results['lots'] = len(new_lots)
-        results['files']['lots_csv'] = str(lots_csv)
-        results['files']['lots_json'] = str(lots_json)
-
-        print(f"\n✓ Exported {len(new_lots)} new/updated lots")
-        print(f"  CSV: {lots_csv}")
-        print(f"  JSON: {lots_json}")
-
-    if not new_auctions and not new_lots:
-        print("\n✓ No new updates since last sync")
-
-    return results
-
-def create_upsert_export():
-    """Create SQL script for server to UPSERT (update or insert) data"""
-    conn = sqlite3.connect(DB_PATH)
-    conn.row_factory = sqlite3.Row
-    cursor = conn.cursor()
-
-    last_sync = get_last_sync_timestamp()
-    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
-
-    # Get new/updated auctions
-    cursor.execute("""
-        SELECT * FROM auctions
-        WHERE discovered_at IS NULL OR discovered_at > ?
-    """, (last_sync,))
-    new_auctions = [dict(row) for row in cursor.fetchall()]
-
-    if new_auctions:
-        sql_file = OUTPUT_DIR / f'upsert_auctions_{timestamp}.sql'
-
-        with open(sql_file, 'w', encoding='utf-8') as f:
-            f.write("-- UPSERT script for auctions (updates existing, inserts new)\n\n")
-
-            for auction in new_auctions:
-                # Create INSERT OR REPLACE statement
-                columns = list(auction.keys())
-                placeholders = []
-
-                for col, val in auction.items():
-                    if val is None:
-                        placeholders.append("NULL")
-                    elif isinstance(val, (int, float)):
-                        placeholders.append(str(val))
-                    else:
-                        # Escape single quotes
-                        escaped = str(val).replace("'", "''")
-                        placeholders.append(f"'{escaped}'")
-
-                f.write(f"INSERT OR REPLACE INTO auctions ({', '.join(columns)})\n")
-                f.write(f"VALUES ({', '.join(placeholders)});\n\n")
-
-        print(f"\n✓ Created UPSERT SQL script: {sql_file}")
-        print(f"  Server can execute this to avoid constraint errors")
-
-    conn.close()
-
-def main():
-    """Main sync process"""
-    print("="*60)
-    print("DATABASE SYNC UTILITY")
-    print("="*60)
-    print(f"Database: {DB_PATH}")
-    print(f"Output: {OUTPUT_DIR}")
-    print("="*60)
-
-    # Step 1: Fill missing fields
-    fill_missing_auction_fields()
-
-    # Step 2: Export incremental updates
-    print("Exporting incremental updates...")
-    results = export_incremental()
-
-    # Step 3: Create UPSERT SQL (prevents constraint errors on server)
-    if results['auctions'] > 0:
-        create_upsert_export()
-
-    # Step 4: Save sync timestamp
-    current_time = int(datetime.now().timestamp())
-    save_sync_timestamp(current_time)
-
-    print("\n" + "="*60)
-    print("SYNC COMPLETE")
-    print("="*60)
-    print(f"New auctions: {results['auctions']}")
-    print(f"New lots: {results['lots']}")
-
-    if results['files']:
-        print("\nFiles ready for server import:")
-        for key, path in results['files'].items():
-            print(f"  {key}: {path}")
-
-    print("\nNext sync will only export records newer than:")
-    print(f"  {datetime.fromtimestamp(current_time).strftime('%Y-%m-%d %H:%M:%S')}")
-
-if __name__ == "__main__":
-    main()
--- a/test_auction_fetch.py
+++ b/test_auction_fetch.py
@@ -1,28 +0,0 @@
-#!/usr/bin/env python3
-"""Test auction data fetch"""
-import asyncio
-import json
-import sys
-sys.path.insert(0, 'src')
-
-from graphql_client import fetch_auction_data, format_auction_data
-
-async def main():
-    auction_id = "9d5d9d6b-94de-4147-b523-dfa512d85dfa"
-
-    print(f"Fetching auction: {auction_id}\n")
-    auction_data = await fetch_auction_data(auction_id)
-
-    if auction_data:
-        print("Raw Auction Data:")
-        print(json.dumps(auction_data, indent=2))
-
-        print("\n\nFormatted:")
-        formatted = format_auction_data(auction_data)
-        print(f"Viewing: {formatted['viewing_time']}")
-        print(f"Pickup: {formatted['pickup_date']}")
-    else:
-        print("No auction data returned")
-
-if __name__ == "__main__":
-    asyncio.run(main())
--- a/test_auction_query.py
+++ b/test_auction_query.py
@@ -1,59 +0,0 @@
-#!/usr/bin/env python3
-"""Test if the auction query works at all"""
-import asyncio
-import aiohttp
-import json
-
-GRAPHQL_ENDPOINT = "https://storefront.tbauctions.com/storefront/graphql"
-
-# Try a simpler query first
-SIMPLE_QUERY = """
-query AuctionData($auctionId: TbaUuid!, $locale: String!, $platform: Platform!) {
-  auction(id: $auctionId, locale: $locale, platform: $platform) {
-    id
-    displayId
-    viewingDays {
-      startDate
-      endDate
-      city
-      countryCode
-    }
-    collectionDays {
-      startDate
-      endDate
-      city
-      countryCode
-    }
-  }
-}
-"""
-
-async def main():
-    auction_id = "9d5d9d6b-94de-4147-b523-dfa512d85dfa"
-
-    variables = {
-        "auctionId": auction_id,
-        "locale": "nl",
-        "platform": "TWK"
-    }
-
-    payload = {
-        "query": SIMPLE_QUERY,
-        "variables": variables
-    }
-
-    async with aiohttp.ClientSession() as session:
-        async with session.post(GRAPHQL_ENDPOINT, json=payload, timeout=30) as response:
-            print(f"Status: {response.status}")
-            text = await response.text()
-            print(f"Response: {text}")
-
-            try:
-                data = await response.json()
-                print(f"\nParsed:")
-                print(json.dumps(data, indent=2))
-            except:
-                pass
-
-if __name__ == "__main__":
-    asyncio.run(main())
--- a/test_comprehensive.py
+++ b/test_comprehensive.py
@@ -1,95 +0,0 @@
-#!/usr/bin/env python3
-"""Test comprehensive data enrichment"""
-import asyncio
-import sys
-sys.path.insert(0, 'src')
-
-from scraper import TroostwijkScraper
-
-async def main():
-    scraper = TroostwijkScraper()
-
-    from playwright.async_api import async_playwright
-
-    async with async_playwright() as p:
-        browser = await p.chromium.launch(headless=True)
-        page = await browser.new_page(
-            viewport={'width': 1920, 'height': 1080},
-            user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
-        )
-
-        # Test with lot that has bids
-        lot_url = "https://www.troostwijkauctions.com/l/%25282x%2529-duo-bureau-160x168-cm-A1-28505-5"
-
-        print(f"Testing comprehensive extraction\n")
-        result = await scraper.crawl_page(page, lot_url)
-
-        if result:
-            print(f"\n{'='*60}")
-            print("COMPREHENSIVE DATA EXTRACTION:")
-            print(f"{'='*60}")
-            print(f"Lot ID: {result.get('lot_id')}")
-            print(f"Title: {result.get('title', '')[:50]}...")
-            print(f"\n[Bidding Intelligence]")
-            print(f"  Status: {result.get('status')}")
-            print(f"  Current Bid: {result.get('current_bid')}")
-            print(f"  Starting Bid: {result.get('starting_bid')}")
-            print(f"  Bid Increment: EUR {result.get('bid_increment', 0):.2f}")
-            print(f"  Bid Count: {result.get('bid_count')}")
-            print(f"  First Bid: {result.get('first_bid_time', 'N/A')}")
-            print(f"  Last Bid: {result.get('last_bid_time', 'N/A')}")
-            print(f"  Bid Velocity: {result.get('bid_velocity', 0)} bids/hour")
-            print(f"\n[Valuation Intelligence]")
-            print(f"  Brand: {result.get('brand', 'N/A')}")
-            print(f"  Model: {result.get('model', 'N/A')}")
-            print(f"  Year: {result.get('year_manufactured', 'N/A')}")
-            print(f"  Manufacturer: {result.get('manufacturer', 'N/A')}")
-            print(f"  Condition Score: {result.get('condition_score', 'N/A')}")
-            print(f"  Condition: {result.get('condition_description', 'N/A')}")
-            print(f"  Serial#: {result.get('serial_number', 'N/A')}")
-            print(f"  Damage: {result.get('damage_description', 'N/A')[:50] if result.get('damage_description') else 'N/A'}...")
-
-        await browser.close()
-
-        # Verify database
-        import sqlite3
-        conn = sqlite3.connect('/mnt/okcomputer/output/cache.db')
-
-        # Check lot data
-        cursor = conn.execute("""
-            SELECT bid_velocity, first_bid_time, year_manufactured, condition_score
-            FROM lots
-            WHERE lot_id = ?
-        """, (result.get('lot_id'),))
-        row = cursor.fetchone()
-
-        if row:
-            print(f"\n{'='*60}")
-            print("DATABASE VERIFICATION (lots table):")
-            print(f"{'='*60}")
-            print(f"  Bid Velocity: {row[0]}")
-            print(f"  First Bid Time: {row[1]}")
-            print(f"  Year: {row[2]}")
-            print(f"  Condition Score: {row[3]}")
-
-        # Check bid history
-        cursor = conn.execute("""
-            SELECT COUNT(*), MIN(bid_time), MAX(bid_time), SUM(is_autobid)
-            FROM bid_history
-            WHERE lot_id = ?
-        """, (result.get('lot_id'),))
-        row = cursor.fetchone()
-
-        if row and row[0] > 0:
-            print(f"\n{'='*60}")
-            print("DATABASE VERIFICATION (bid_history table):")
-            print(f"{'='*60}")
-            print(f"  Total Bids Stored: {row[0]}")
-            print(f"  First Bid: {row[1]}")
-            print(f"  Last Bid: {row[2]}")
-            print(f"  Autobids: {row[3]}")
-
-        conn.close()
-
-if __name__ == "__main__":
-    asyncio.run(main())
--- a/test_concurrent_images.py
+++ b/test_concurrent_images.py
@@ -1,49 +0,0 @@
-#!/usr/bin/env python3
-"""Test concurrent image downloads"""
-import asyncio
-import time
-import sys
-sys.path.insert(0, 'src')
-
-from scraper import TroostwijkScraper
-
-async def main():
-    scraper = TroostwijkScraper()
-
-    from playwright.async_api import async_playwright
-
-    async with async_playwright() as p:
-        browser = await p.chromium.launch(headless=True)
-        page = await browser.new_page(
-            viewport={'width': 1920, 'height': 1080},
-            user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
-        )
-
-        # Test with a lot that has multiple images
-        lot_url = "https://www.troostwijkauctions.com/l/%25282x%2529-duo-bureau-160x168-cm-A1-28505-5"
-
-        print(f"Testing concurrent image downloads\n")
-        print(f"Lot: {lot_url}\n")
-
-        start_time = time.time()
-        result = await scraper.crawl_page(page, lot_url)
-        elapsed = time.time() - start_time
-
-        print(f"\n{'='*60}")
-        print(f"TIMING RESULTS:")
-        print(f"{'='*60}")
-        print(f"Total time: {elapsed:.2f}s")
-
-        image_count = len(result.get('images', []))
-        print(f"Images: {image_count}")
-
-        if image_count > 1:
-            print(f"Time per image: {elapsed/image_count:.2f}s (if sequential)")
-            print(f"Actual time: {elapsed:.2f}s (concurrent!)")
-            speedup = (image_count * 0.5) / elapsed if elapsed > 0 else 1
-            print(f"Speedup factor: {speedup:.1f}x")
-
-        await browser.close()
-
-if __name__ == "__main__":
-    asyncio.run(main())
--- a/test_full_scraper.py
+++ b/test_full_scraper.py
@@ -1,66 +0,0 @@
-#!/usr/bin/env python3
-"""Test the full scraper with one lot"""
-import asyncio
-import sys
-sys.path.insert(0, 'src')
-
-from scraper import TroostwijkScraper
-
-async def main():
-    scraper = TroostwijkScraper()
-
-    from playwright.async_api import async_playwright
-
-    async with async_playwright() as p:
-        browser = await p.chromium.launch(headless=True)
-        page = await browser.new_page(
-            viewport={'width': 1920, 'height': 1080},
-            user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
-        )
-
-        # Test with a known lot
-        lot_url = "https://www.troostwijkauctions.com/l/%25282x%2529-duo-bureau-160x168-cm-A1-28505-5"
-
-        print(f"Testing with: {lot_url}\n")
-        result = await scraper.crawl_page(page, lot_url)
-
-        if result:
-            print(f"\n{'='*60}")
-            print("FINAL RESULT:")
-            print(f"{'='*60}")
-            print(f"Lot ID: {result.get('lot_id')}")
-            print(f"Title: {result.get('title', '')[:50]}...")
-            print(f"Current Bid: {result.get('current_bid')}")
-            print(f"Starting Bid: {result.get('starting_bid')}")
-            print(f"Minimum Bid: {result.get('minimum_bid')}")
-            print(f"Bid Count: {result.get('bid_count')}")
-            print(f"Closing Time: {result.get('closing_time')}")
-            print(f"Viewing Time: {result.get('viewing_time', 'N/A')}")
-            print(f"Pickup Date: {result.get('pickup_date', 'N/A')}")
-            print(f"Location: {result.get('location')}")
-
-        await browser.close()
-
-        # Verify database
-        import sqlite3
-        conn = sqlite3.connect('/mnt/okcomputer/output/cache.db')
-        cursor = conn.execute("""
-            SELECT current_bid, starting_bid, minimum_bid, bid_count, closing_time
-            FROM lots
-            WHERE lot_id = 'A1-28505-5'
-        """)
-        row = cursor.fetchone()
-        conn.close()
-
-        if row:
-            print(f"\n{'='*60}")
-            print("DATABASE VERIFICATION:")
-            print(f"{'='*60}")
-            print(f"Current Bid: {row[0]}")
-            print(f"Starting Bid: {row[1]}")
-            print(f"Minimum Bid: {row[2]}")
-            print(f"Bid Count: {row[3]}")
-            print(f"Closing Time: {row[4]}")
-
-if __name__ == "__main__":
-    asyncio.run(main())
--- a/test_graphql_scraper.py
+++ b/test_graphql_scraper.py
@@ -1,32 +0,0 @@
-#!/usr/bin/env python3
-"""Test the updated scraper with GraphQL integration"""
-import asyncio
-import sys
-sys.path.insert(0, 'src')
-
-from graphql_client import fetch_lot_bidding_data, format_bid_data
-
-async def main():
-    # Test with known lot ID
-    lot_id = "A1-28505-5"
-
-    print(f"Testing GraphQL API with lot: {lot_id}\n")
-
-    bidding_data = await fetch_lot_bidding_data(lot_id)
-
-    if bidding_data:
-        print("Raw GraphQL Response:")
-        print("="*60)
-        import json
-        print(json.dumps(bidding_data, indent=2))
-
-        print("\n\nFormatted Data:")
-        print("="*60)
-        formatted = format_bid_data(bidding_data)
-        for key, value in formatted.items():
-            print(f"  {key}: {value}")
-    else:
-        print("Failed to fetch bidding data")
-
-if __name__ == "__main__":
-    asyncio.run(main())
--- a/test_live_lot.py
+++ b/test_live_lot.py
@@ -1,43 +0,0 @@
-#!/usr/bin/env python3
-"""Test scraping a single live lot page"""
-import asyncio
-import sys
-sys.path.insert(0, 'src')
-
-from scraper import TroostwijkScraper
-
-async def main():
-    scraper = TroostwijkScraper()
-
-    from playwright.async_api import async_playwright
-
-    async with async_playwright() as p:
-        browser = await p.chromium.launch(headless=True)
-        page = await browser.new_page()
-
-        # Get a lot URL from the database
-        import sqlite3
-        conn = sqlite3.connect('/mnt/okcomputer/output/cache.db')
-        cursor = conn.execute("SELECT url FROM lots LIMIT 1")
-        row = cursor.fetchone()
-        conn.close()
-
-        if not row:
-            print("No lots in database")
-            return
-
-        lot_url = row[0]
-        print(f"Fetching: {lot_url}\n")
-
-        result = await scraper.crawl_page(page, lot_url)
-
-        if result:
-            print(f"\nExtracted Data:")
-            print(f"  current_bid: {result.get('current_bid')}")
-            print(f"  bid_count: {result.get('bid_count')}")
-            print(f"  closing_time: {result.get('closing_time')}")
-
-        await browser.close()
-
-if __name__ == "__main__":
-    asyncio.run(main())
--- a/test_new_fields.py
+++ b/test_new_fields.py
@@ -1,64 +0,0 @@
-#!/usr/bin/env python3
-"""Test the new fields extraction"""
-import asyncio
-import sys
-sys.path.insert(0, 'src')
-
-from scraper import TroostwijkScraper
-
-async def main():
-    scraper = TroostwijkScraper()
-
-    from playwright.async_api import async_playwright
-
-    async with async_playwright() as p:
-        browser = await p.chromium.launch(headless=True)
-        page = await browser.new_page(
-            viewport={'width': 1920, 'height': 1080},
-            user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
-        )
-
-        # Test with lot that has attributes
-        lot_url = "https://www.troostwijkauctions.com/l/47-5kg-hexagon-dumbbell-%25282x%2529-A1-40668-34"
-
-        print(f"Testing new fields with: {lot_url}\n")
-        result = await scraper.crawl_page(page, lot_url)
-
-        if result:
-            print(f"\n{'='*60}")
-            print("EXTRACTED FIELDS:")
-            print(f"{'='*60}")
-            print(f"Lot ID: {result.get('lot_id')}")
-            print(f"Title: {result.get('title', '')[:50]}...")
-            print(f"Status: {result.get('status')}")
-            print(f"Brand: {result.get('brand')}")
-            print(f"Model: {result.get('model')}")
-            print(f"Viewing Time: {result.get('viewing_time', 'N/A')}")
-            print(f"Pickup Date: {result.get('pickup_date', 'N/A')}")
-            print(f"Attributes: {result.get('attributes_json', '')[:100]}...")
-
-        await browser.close()
-
-        # Verify database
-        import sqlite3
-        conn = sqlite3.connect('/mnt/okcomputer/output/cache.db')
-        cursor = conn.execute("""
-            SELECT status, brand, model, viewing_time, pickup_date
-            FROM lots
-            WHERE lot_id = ?
-        """, (result.get('lot_id'),))
-        row = cursor.fetchone()
-        conn.close()
-
-        if row:
-            print(f"\n{'='*60}")
-            print("DATABASE VERIFICATION:")
-            print(f"{'='*60}")
-            print(f"Status: {row[0]}")
-            print(f"Brand: {row[1]}")
-            print(f"Model: {row[2]}")
-            print(f"Viewing: {row[3][:100] if row[3] else 'N/A'}...")
-            print(f"Pickup: {row[4][:100] if row[4] else 'N/A'}...")
-
-if __name__ == "__main__":
-    asyncio.run(main())
--- a/validate_data.py
+++ b/validate_data.py
@@ -1,306 +0,0 @@
-"""
-Validate data quality and completeness in the database.
-Checks if scraped data matches expectations and API capabilities.
-"""
-import sys
-import os
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
-
-import sqlite3
-from datetime import datetime
-from typing import Dict, List, Tuple
-from cache import CacheManager
-
-cache = CacheManager()
-DB_PATH = cache.db_path
-
-def get_db_stats() -> Dict:
-    """Get comprehensive database statistics"""
-    conn = sqlite3.connect(DB_PATH)
-    cursor = conn.cursor()
-
-    stats = {}
-
-    # Total counts
-    stats['total_auctions'] = cursor.execute("SELECT COUNT(*) FROM auctions").fetchone()[0]
-    stats['total_lots'] = cursor.execute("SELECT COUNT(*) FROM lots").fetchone()[0]
-    stats['total_images'] = cursor.execute("SELECT COUNT(*) FROM images").fetchone()[0]
-    stats['total_bid_history'] = cursor.execute("SELECT COUNT(*) FROM bid_history").fetchone()[0]
-
-    # Auctions completeness
-    cursor.execute("""
-        SELECT
-            COUNT(*) as total,
-            SUM(CASE WHEN title IS NOT NULL AND title != '' THEN 1 ELSE 0 END) as has_title,
-            SUM(CASE WHEN lots_count IS NOT NULL THEN 1 ELSE 0 END) as has_lots_count,
-            SUM(CASE WHEN closing_time IS NOT NULL THEN 1 ELSE 0 END) as has_closing_time,
-            SUM(CASE WHEN first_lot_closing_time IS NOT NULL THEN 1 ELSE 0 END) as has_first_lot_closing
-        FROM auctions
-    """)
-    row = cursor.fetchone()
-    stats['auctions'] = {
-        'total': row[0],
-        'has_title': row[1],
-        'has_lots_count': row[2],
-        'has_closing_time': row[3],
-        'has_first_lot_closing': row[4]
-    }
-
-    # Lots completeness - Core fields
-    cursor.execute("""
-        SELECT
-            COUNT(*) as total,
-            SUM(CASE WHEN title IS NOT NULL AND title != '' THEN 1 ELSE 0 END) as has_title,
-            SUM(CASE WHEN current_bid IS NOT NULL THEN 1 ELSE 0 END) as has_current_bid,
-            SUM(CASE WHEN starting_bid IS NOT NULL THEN 1 ELSE 0 END) as has_starting_bid,
-            SUM(CASE WHEN minimum_bid IS NOT NULL THEN 1 ELSE 0 END) as has_minimum_bid,
-            SUM(CASE WHEN bid_count IS NOT NULL AND bid_count > 0 THEN 1 ELSE 0 END) as has_bids,
-            SUM(CASE WHEN closing_time IS NOT NULL THEN 1 ELSE 0 END) as has_closing_time,
-            SUM(CASE WHEN status IS NOT NULL AND status != '' THEN 1 ELSE 0 END) as has_status
-        FROM lots
-    """)
-    row = cursor.fetchone()
-    stats['lots_core'] = {
-        'total': row[0],
-        'has_title': row[1],
-        'has_current_bid': row[2],
-        'has_starting_bid': row[3],
-        'has_minimum_bid': row[4],
-        'has_bids': row[5],
-        'has_closing_time': row[6],
-        'has_status': row[7]
-    }
-
-    # Lots completeness - Enriched fields
-    cursor.execute("""
-        SELECT
-            COUNT(*) as total,
-            SUM(CASE WHEN brand IS NOT NULL AND brand != '' THEN 1 ELSE 0 END) as has_brand,
-            SUM(CASE WHEN model IS NOT NULL AND model != '' THEN 1 ELSE 0 END) as has_model,
-            SUM(CASE WHEN manufacturer IS NOT NULL AND manufacturer != '' THEN 1 ELSE 0 END) as has_manufacturer,
-            SUM(CASE WHEN year_manufactured IS NOT NULL THEN 1 ELSE 0 END) as has_year,
-            SUM(CASE WHEN condition_score IS NOT NULL THEN 1 ELSE 0 END) as has_condition_score,
-            SUM(CASE WHEN condition_description IS NOT NULL AND condition_description != '' THEN 1 ELSE 0 END) as has_condition_desc,
-            SUM(CASE WHEN serial_number IS NOT NULL AND serial_number != '' THEN 1 ELSE 0 END) as has_serial,
-            SUM(CASE WHEN damage_description IS NOT NULL AND damage_description != '' THEN 1 ELSE 0 END) as has_damage
-        FROM lots
-    """)
-    row = cursor.fetchone()
-    stats['lots_enriched'] = {
-        'total': row[0],
-        'has_brand': row[1],
-        'has_model': row[2],
-        'has_manufacturer': row[3],
-        'has_year': row[4],
-        'has_condition_score': row[5],
-        'has_condition_desc': row[6],
-        'has_serial': row[7],
-        'has_damage': row[8]
-    }
-
-    # Lots completeness - Bid intelligence
-    cursor.execute("""
-        SELECT
-            COUNT(*) as total,
-            SUM(CASE WHEN first_bid_time IS NOT NULL THEN 1 ELSE 0 END) as has_first_bid_time,
-            SUM(CASE WHEN last_bid_time IS NOT NULL THEN 1 ELSE 0 END) as has_last_bid_time,
-            SUM(CASE WHEN bid_velocity IS NOT NULL THEN 1 ELSE 0 END) as has_bid_velocity,
-            SUM(CASE WHEN bid_increment IS NOT NULL THEN 1 ELSE 0 END) as has_bid_increment
-        FROM lots
-    """)
-    row = cursor.fetchone()
-    stats['lots_bid_intelligence'] = {
-        'total': row[0],
-        'has_first_bid_time': row[1],
-        'has_last_bid_time': row[2],
-        'has_bid_velocity': row[3],
-        'has_bid_increment': row[4]
-    }
-
-    # Bid history stats
-    cursor.execute("""
-        SELECT
-            COUNT(DISTINCT lot_id) as lots_with_history,
-            COUNT(*) as total_bids,
-            SUM(CASE WHEN is_autobid = 1 THEN 1 ELSE 0 END) as autobids,
-            SUM(CASE WHEN bidder_id IS NOT NULL THEN 1 ELSE 0 END) as has_bidder_id
-        FROM bid_history
-    """)
-    row = cursor.fetchone()
-    stats['bid_history'] = {
-        'lots_with_history': row[0],
-        'total_bids': row[1],
-        'autobids': row[2],
-        'has_bidder_id': row[3]
-    }
-
-    # Image stats
-    cursor.execute("""
-        SELECT
-            COUNT(DISTINCT lot_id) as lots_with_images,
-            COUNT(*) as total_images,
-            SUM(CASE WHEN downloaded = 1 THEN 1 ELSE 0 END) as downloaded_images,
-            SUM(CASE WHEN local_path IS NOT NULL THEN 1 ELSE 0 END) as has_local_path
-        FROM images
-    """)
-    row = cursor.fetchone()
-    stats['images'] = {
-        'lots_with_images': row[0],
-        'total_images': row[1],
-        'downloaded_images': row[2],
-        'has_local_path': row[3]
-    }
-
-    conn.close()
-    return stats
-
-def check_data_quality() -> List[Tuple[str, str, str]]:
-    """Check for data quality issues"""
-    issues = []
-    conn = sqlite3.connect(DB_PATH)
-    cursor = conn.cursor()
-
-    # Check for lots without auction
-    cursor.execute("""
-        SELECT COUNT(*) FROM lots
-        WHERE auction_id NOT IN (SELECT auction_id FROM auctions)
-    """)
-    orphaned_lots = cursor.fetchone()[0]
-    if orphaned_lots > 0:
-        issues.append(("ERROR", "Orphaned Lots", f"{orphaned_lots} lots without matching auction"))
-
-    # Check for lots with bids but no bid history
-    cursor.execute("""
-        SELECT COUNT(*) FROM lots
-        WHERE bid_count > 0
-        AND lot_id NOT IN (SELECT DISTINCT lot_id FROM bid_history)
-    """)
-    missing_history = cursor.fetchone()[0]
-    if missing_history > 0:
-        issues.append(("WARNING", "Missing Bid History", f"{missing_history} lots have bids but no bid history records"))
-
-    # Check for lots with closing time in past but still active
-    cursor.execute("""
-        SELECT COUNT(*) FROM lots
-        WHERE closing_time IS NOT NULL
-        AND closing_time < datetime('now')
-        AND status NOT LIKE '%gesloten%'
-    """)
-    past_closing = cursor.fetchone()[0]
-    if past_closing > 0:
-        issues.append(("INFO", "Past Closing Time", f"{past_closing} lots have closing time in past"))
-
-    # Check for duplicate lot_ids
-    cursor.execute("""
-        SELECT lot_id, COUNT(*) FROM lots
-        GROUP BY lot_id
-        HAVING COUNT(*) > 1
-    """)
-    duplicates = cursor.fetchall()
-    if duplicates:
-        issues.append(("ERROR", "Duplicate Lot IDs", f"{len(duplicates)} duplicate lot_id values found"))
-
-    # Check for lots without images
-    cursor.execute("""
-        SELECT COUNT(*) FROM lots
-        WHERE lot_id NOT IN (SELECT DISTINCT lot_id FROM images)
-    """)
-    no_images = cursor.fetchone()[0]
-    if no_images > 0:
-        issues.append(("WARNING", "No Images", f"{no_images} lots have no images"))
-
-    conn.close()
-    return issues
-
-def print_validation_report():
-    """Print comprehensive validation report"""
-    print("=" * 80)
-    print("DATABASE VALIDATION REPORT")
-    print("=" * 80)
-    print()
-
-    stats = get_db_stats()
-
-    # Overall counts
-    print("OVERALL COUNTS:")
-    print(f"  Auctions: {stats['total_auctions']:,}")
-    print(f"  Lots: {stats['total_lots']:,}")
-    print(f"  Images: {stats['total_images']:,}")
-    print(f"  Bid History Records: {stats['total_bid_history']:,}")
-    print()
-
-    # Auctions completeness
-    print("AUCTIONS COMPLETENESS:")
-    a = stats['auctions']
-    print(f"  Title: {a['has_title']:,} / {a['total']:,} ({a['has_title']/a['total']*100:.1f}%)")
-    print(f"  Lots Count: {a['has_lots_count']:,} / {a['total']:,} ({a['has_lots_count']/a['total']*100:.1f}%)")
-    print(f"  Closing Time: {a['has_closing_time']:,} / {a['total']:,} ({a['has_closing_time']/a['total']*100:.1f}%)")
-    print(f"  First Lot Closing: {a['has_first_lot_closing']:,} / {a['total']:,} ({a['has_first_lot_closing']/a['total']*100:.1f}%)")
-    print()
-
-    # Lots core completeness
-    print("LOTS CORE FIELDS:")
-    l = stats['lots_core']
-    print(f"  Title: {l['has_title']:,} / {l['total']:,} ({l['has_title']/l['total']*100:.1f}%)")
-    print(f"  Current Bid: {l['has_current_bid']:,} / {l['total']:,} ({l['has_current_bid']/l['total']*100:.1f}%)")
-    print(f"  Starting Bid: {l['has_starting_bid']:,} / {l['total']:,} ({l['has_starting_bid']/l['total']*100:.1f}%)")
-    print(f"  Minimum Bid: {l['has_minimum_bid']:,} / {l['total']:,} ({l['has_minimum_bid']/l['total']*100:.1f}%)")
-    print(f"  Has Bids (>0): {l['has_bids']:,} / {l['total']:,} ({l['has_bids']/l['total']*100:.1f}%)")
-    print(f"  Closing Time: {l['has_closing_time']:,} / {l['total']:,} ({l['has_closing_time']/l['total']*100:.1f}%)")
-    print(f"  Status: {l['has_status']:,} / {l['total']:,} ({l['has_status']/l['total']*100:.1f}%)")
-    print()
-
-    # Lots enriched fields
-    print("LOTS ENRICHED FIELDS:")
-    e = stats['lots_enriched']
-    print(f"  Brand: {e['has_brand']:,} / {e['total']:,} ({e['has_brand']/e['total']*100:.1f}%)")
-    print(f"  Model: {e['has_model']:,} / {e['total']:,} ({e['has_model']/e['total']*100:.1f}%)")
-    print(f"  Manufacturer: {e['has_manufacturer']:,} / {e['total']:,} ({e['has_manufacturer']/e['total']*100:.1f}%)")
-    print(f"  Year: {e['has_year']:,} / {e['total']:,} ({e['has_year']/e['total']*100:.1f}%)")
-    print(f"  Condition Score: {e['has_condition_score']:,} / {e['total']:,} ({e['has_condition_score']/e['total']*100:.1f}%)")
-    print(f"  Condition Desc: {e['has_condition_desc']:,} / {e['total']:,} ({e['has_condition_desc']/e['total']*100:.1f}%)")
-    print(f"  Serial Number: {e['has_serial']:,} / {e['total']:,} ({e['has_serial']/e['total']*100:.1f}%)")
-    print(f"  Damage Desc: {e['has_damage']:,} / {e['total']:,} ({e['has_damage']/e['total']*100:.1f}%)")
-    print()
-
-    # Bid intelligence
-    print("LOTS BID INTELLIGENCE:")
-    b = stats['lots_bid_intelligence']
-    print(f"  First Bid Time: {b['has_first_bid_time']:,} / {b['total']:,} ({b['has_first_bid_time']/b['total']*100:.1f}%)")
-    print(f"  Last Bid Time: {b['has_last_bid_time']:,} / {b['total']:,} ({b['has_last_bid_time']/b['total']*100:.1f}%)")
-    print(f"  Bid Velocity: {b['has_bid_velocity']:,} / {b['total']:,} ({b['has_bid_velocity']/b['total']*100:.1f}%)")
-    print(f"  Bid Increment: {b['has_bid_increment']:,} / {b['total']:,} ({b['has_bid_increment']/b['total']*100:.1f}%)")
-    print()
-
-    # Bid history
-    print("BID HISTORY:")
-    h = stats['bid_history']
-    print(f"  Lots with History: {h['lots_with_history']:,}")
-    print(f"  Total Bid Records: {h['total_bids']:,}")
-    print(f"  Autobids: {h['autobids']:,} ({h['autobids']/max(h['total_bids'],1)*100:.1f}%)")
-    print(f"  Has Bidder ID: {h['has_bidder_id']:,} ({h['has_bidder_id']/max(h['total_bids'],1)*100:.1f}%)")
-    print()
-
-    # Images
-    print("IMAGES:")
-    i = stats['images']
-    print(f"  Lots with Images: {i['lots_with_images']:,}")
-    print(f"  Total Images: {i['total_images']:,}")
-    print(f"  Downloaded: {i['downloaded_images']:,} ({i['downloaded_images']/max(i['total_images'],1)*100:.1f}%)")
-    print(f"  Has Local Path: {i['has_local_path']:,} ({i['has_local_path']/max(i['total_images'],1)*100:.1f}%)")
-    print()
-
-    # Data quality issues
-    print("=" * 80)
-    print("DATA QUALITY ISSUES:")
-    print("=" * 80)
-    issues = check_data_quality()
-    if issues:
-        for severity, category, message in issues:
-            print(f"  [{severity}] {category}: {message}")
-    else:
-        print("  No issues found!")
-    print()
-
-if __name__ == "__main__":
-    print_validation_report()
--- a/verify_images.py
+++ b/verify_images.py
@@ -1,92 +0,0 @@
-#!/usr/bin/env python3
-"""
-Verification script to check image download status and duplicates
-Run this after deployment to verify the scraper is working correctly
-"""
-import sqlite3
-import sys
-from pathlib import Path
-
-DB_PATH = "/mnt/okcomputer/output/cache.db"
-
-def verify_database():
-    """Run verification queries on the database"""
-
-    if not Path(DB_PATH).exists():
-        print(f"❌ Database not found: {DB_PATH}")
-        sys.exit(1)
-
-    conn = sqlite3.connect(DB_PATH)
-
-    print("=" * 60)
-    print("IMAGE DOWNLOAD VERIFICATION")
-    print("=" * 60)
-
-    # Check download success rate
-    print("\n[*] Download Success Rate:")
-    cursor = conn.execute("""
-        SELECT
-            COUNT(*) as total_images,
-            SUM(CASE WHEN downloaded = 1 THEN 1 ELSE 0 END) as downloaded,
-            SUM(CASE WHEN downloaded = 0 THEN 1 ELSE 0 END) as failed,
-            ROUND(100.0 * SUM(downloaded) / COUNT(*), 2) as success_rate
-        FROM images
-    """)
-    row = cursor.fetchone()
-    print(f"  Total images:    {row[0]:,}")
-    print(f"  Downloaded:      {row[1]:,}")
-    print(f"  Not downloaded:  {row[2]:,}")
-    print(f"  Success rate:    {row[3]}%")
-
-    # Check for duplicates
-    print("\n[*] Duplicate Check:")
-    cursor = conn.execute("""
-        SELECT lot_id, url, COUNT(*) as dup_count
-        FROM images
-        GROUP BY lot_id, url
-        HAVING COUNT(*) > 1
-        LIMIT 5
-    """)
-    duplicates = cursor.fetchall()
-
-    if duplicates:
-        print(f"  [!] Found {len(duplicates)} duplicate entries!")
-        for lot_id, url, count in duplicates:
-            print(f"     {lot_id}: {url[:50]}... (x{count})")
-    else:
-        print("  [+] No duplicates found!")
-
-    # Verify file system
-    print("\n[*] File System Verification:")
-    cursor = conn.execute("""
-        SELECT COUNT(*)
-        FROM images
-        WHERE downloaded = 1
-          AND local_path IS NOT NULL
-          AND local_path != ''
-    """)
-    files_with_path = cursor.fetchone()[0]
-    print(f"  Images with local_path: {files_with_path:,}")
-
-    # Sample some downloaded images
-    print("\n[*] Sample Downloaded Images:")
-    cursor = conn.execute("""
-        SELECT lot_id, local_path
-        FROM images
-        WHERE downloaded = 1
-          AND local_path IS NOT NULL
-        LIMIT 5
-    """)
-    samples = cursor.fetchall()
-    for lot_id, path in samples:
-        exists = "[+]" if Path(path).exists() else "[!]"
-        print(f"  {exists} {lot_id}: {path}")
-
-    conn.close()
-
-    print("\n" + "=" * 60)
-    print("VERIFICATION COMPLETE")
-    print("=" * 60)
-
-if __name__ == "__main__":
-    verify_database()