From 8a2b005d4ac755953696a16cf682366ebc88c421 Mon Sep 17 00:00:00 2001
From: Tour <tour@example.com>
Date: Tue, 9 Dec 2025 07:11:09 +0100
Subject: [PATCH] move.venv

---
 src/cache.py                    |   2 +-
 src/graphql_client.py           |  11 --
 src/scraper.py                  |  18 ++-
 test/test_description_simple.py |  51 ++++++++
 test/test_missing_fields.py     | 208 ++++++++++++++++++++++++++++++++
 5 files changed, 276 insertions(+), 14 deletions(-)
 create mode 100644 test/test_description_simple.py
 create mode 100644 test/test_missing_fields.py

diff --git a/src/cache.py b/src/cache.py
index 43081cf..b3ff940 100644
--- a/src/cache.py
+++ b/src/cache.py
@@ -315,7 +315,7 @@ class CacheManager:
                 (url, compressed_content, time.time(), status_code)
             )
             conn.commit()
-            print(f"  → Cached: {url} (compressed {ratio:.1f}%)")
+            print(f"  -> Cached: {url} (compressed {ratio:.1f}%)")
 
     def clear_old(self, max_age_hours: int = 168):
         """Clear old cache entries to prevent database bloat"""
diff --git a/src/graphql_client.py b/src/graphql_client.py
index b2991c2..654d4bd 100644
--- a/src/graphql_client.py
+++ b/src/graphql_client.py
@@ -31,17 +31,6 @@ query AuctionData($auctionId: TbaUuid!, $locale: String!, $platform: Platform!)
 LOT_BIDDING_QUERY = """
 query LotBiddingData($lotDisplayId: String!, $locale: String!, $platform: Platform!) {
   lotDetails(displayId: $lotDisplayId, locale: $locale, platform: $platform) {
-    estimatedFullPrice {
-      min {
-        cents
-        currency
-      }
-      max {
-        cents
-        currency
-      }
-      saleTerm
-    }
     lot {
       id
       displayId
diff --git a/src/scraper.py b/src/scraper.py
index 3af4b70..4a256ab 100644
--- a/src/scraper.py
+++ b/src/scraper.py
@@ -358,7 +358,7 @@ class TroostwijkScraper:
                 conn = sqlite3.connect(self.cache.db_path)
                 cursor = conn.cursor()
                 cursor.execute("""
-                    SELECT followers_count, estimated_min_price, current_bid, bid_count, closing_time
+                    SELECT followers_count, estimated_min_price, current_bid, bid_count, closing_time, status
                     FROM lots WHERE lot_id = ?
                 """, (lot_id,))
                 existing = cursor.fetchone()
@@ -377,6 +377,8 @@ class TroostwijkScraper:
                     page_data['estimated_min_price'] = existing[1]
                     page_data['current_bid'] = existing[2] or page_data.get('current_bid', 'No bids')
                     page_data['bid_count'] = existing[3] or 0
+                    page_data['closing_time'] = existing[4]  # Add closing_time
+                    page_data['status'] = existing[5] or ''   # Add status
                     bidding_data = None
                     bid_history_data = None
                 else:
@@ -439,7 +441,19 @@ class TroostwijkScraper:
 
             if bidding_data:
                 formatted_data = format_bid_data(bidding_data)
-                page_data.update(formatted_data)
+
+                # Merge data intelligently - don't overwrite existing fields
+                # Parser (from __NEXT_DATA__) has: description, category, images
+                # API has: current_bid, bid_count, closing_time, status, followers, estimates
+                # Keep parser data, enhance with API data
+                for key, value in formatted_data.items():
+                    # Only update if current value is missing/empty
+                    current_value = page_data.get(key)
+                    if current_value is None or current_value == '' or current_value == 0 or current_value == 'No bids':
+                        page_data[key] = value
+                    # Special case: always update bid_count if API has higher value
+                    elif key == 'bid_count' and isinstance(value, int) and value > current_value:
+                        page_data[key] = value
 
                 # Enhanced logging with new intelligence fields
                 print(f"  Bid: {page_data.get('current_bid', 'N/A')}")
diff --git a/test/test_description_simple.py b/test/test_description_simple.py
new file mode 100644
index 0000000..f167a79
--- /dev/null
+++ b/test/test_description_simple.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+import sys
+import os
+parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
+sys.path.insert(0, parent_dir)
+sys.path.insert(0, os.path.join(parent_dir, 'src'))
+
+import asyncio
+from scraper import TroostwijkScraper
+import config
+import os
+
+async def test():
+    # Force online mode
+    os.environ['SCAEV_OFFLINE'] = '0'
+    config.OFFLINE = False
+
+    scraper = TroostwijkScraper()
+    scraper.offline = False
+
+    from playwright.async_api import async_playwright
+    async with async_playwright() as p:
+        browser = await p.chromium.launch(headless=True)
+        context = await browser.new_context()
+        page = await context.new_page()
+
+        url = "https://www.troostwijkauctions.com/l/used-dometic-seastar-tfxchx8641p-top-mount-engine-control-liver-A1-39684-12"
+
+        # Add debug logging to parser
+        original_parse = scraper.parser.parse_page
+        def debug_parse(content, url):
+            result = original_parse(content, url)
+            if result:
+                print(f"PARSER OUTPUT:")
+                print(f"  description: {result.get('description', 'NONE')[:100] if result.get('description') else 'EMPTY'}")
+                print(f"  closing_time: {result.get('closing_time', 'NONE')}")
+                print(f"  bid_count: {result.get('bid_count', 'NONE')}")
+            return result
+        scraper.parser.parse_page = debug_parse
+
+        page_data = await scraper.crawl_page(page, url)
+
+        await browser.close()
+
+        print(f"\nFINAL page_data:")
+        print(f"  description: {page_data.get('description', 'NONE')[:100] if page_data and page_data.get('description') else 'EMPTY'}")
+        print(f"  closing_time: {page_data.get('closing_time', 'NONE') if page_data else 'NONE'}")
+        print(f"  bid_count: {page_data.get('bid_count', 'NONE') if page_data else 'NONE'}")
+        print(f"  status: {page_data.get('status', 'NONE') if page_data else 'NONE'}")
+
+asyncio.run(test())
diff --git a/test/test_missing_fields.py b/test/test_missing_fields.py
new file mode 100644
index 0000000..14c417a
--- /dev/null
+++ b/test/test_missing_fields.py
@@ -0,0 +1,208 @@
+#!/usr/bin/env python3
+"""
+Test to validate that all expected fields are populated after scraping
+"""
+import sys
+import os
+import asyncio
+import sqlite3
+
+# Add parent and src directory to path
+parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
+sys.path.insert(0, parent_dir)
+sys.path.insert(0, os.path.join(parent_dir, 'src'))
+
+# Force online mode before importing
+os.environ['SCAEV_OFFLINE'] = '0'
+
+from scraper import TroostwijkScraper
+import config
+
+
+async def test_lot_has_all_fields():
+    """Test that a lot page has all expected fields populated"""
+
+    print("\n" + "="*60)
+    print("TEST: Lot has all required fields")
+    print("="*60)
+
+    # Use the example lot from user
+    test_url = "https://www.troostwijkauctions.com/l/radaway-idea-black-dwj-doucheopstelling-A1-39956-18"
+
+    # Ensure we're not in offline mode
+    config.OFFLINE = False
+
+    scraper = TroostwijkScraper()
+    scraper.offline = False
+
+    print(f"\n[1] Scraping: {test_url}")
+
+    # Start playwright and scrape
+    from playwright.async_api import async_playwright
+    async with async_playwright() as p:
+        browser = await p.chromium.launch(headless=True)
+        context = await browser.new_context()
+        page = await context.new_page()
+
+        page_data = await scraper.crawl_page(page, test_url)
+
+        await browser.close()
+
+    if not page_data:
+        print("  [FAIL] No data returned")
+        return False
+
+    print(f"\n[2] Validating fields...")
+
+    # Fields that MUST have values (critical for auction functionality)
+    required_fields = {
+        'closing_time': 'Closing time',
+        'current_bid': 'Current bid',
+        'bid_count': 'Bid count',
+        'status': 'Status',
+    }
+
+    # Fields that SHOULD have values but may legitimately be empty
+    optional_fields = {
+        'description': 'Description',
+    }
+
+    missing_fields = []
+    empty_fields = []
+    optional_missing = []
+
+    # Check required fields
+    for field, label in required_fields.items():
+        value = page_data.get(field)
+
+        if value is None:
+            missing_fields.append(label)
+            print(f"  [FAIL] {label}: MISSING (None)")
+        elif value == '' or value == 0 or value == 'No bids':
+            # Special case: 'No bids' is only acceptable if bid_count is 0
+            if field == 'current_bid' and page_data.get('bid_count', 0) == 0:
+                print(f"  [PASS] {label}: '{value}' (acceptable - no bids)")
+            else:
+                empty_fields.append(label)
+                print(f"  [FAIL] {label}: EMPTY ('{value}')")
+        else:
+            print(f"  [PASS] {label}: {value}")
+
+    # Check optional fields (warn but don't fail)
+    for field, label in optional_fields.items():
+        value = page_data.get(field)
+        if value is None or value == '':
+            optional_missing.append(label)
+            print(f"  [WARN] {label}: EMPTY (may be legitimate)")
+        else:
+            print(f"  [PASS] {label}: {value[:50]}...")
+
+    # Check database
+    print(f"\n[3] Checking database entry...")
+    conn = sqlite3.connect(scraper.cache.db_path)
+    cursor = conn.cursor()
+    cursor.execute("""
+        SELECT closing_time, current_bid, bid_count, description, status
+        FROM lots WHERE url = ?
+    """, (test_url,))
+    row = cursor.fetchone()
+    conn.close()
+
+    if row:
+        db_closing, db_bid, db_count, db_desc, db_status = row
+        print(f"  DB closing_time: {db_closing or 'EMPTY'}")
+        print(f"  DB current_bid: {db_bid or 'EMPTY'}")
+        print(f"  DB bid_count: {db_count}")
+        print(f"  DB description: {db_desc[:50] if db_desc else 'EMPTY'}...")
+        print(f"  DB status: {db_status or 'EMPTY'}")
+
+        # Verify DB matches page_data
+        if db_closing != page_data.get('closing_time'):
+            print(f"  [WARN] DB closing_time doesn't match page_data")
+        if db_count != page_data.get('bid_count'):
+            print(f"  [WARN] DB bid_count doesn't match page_data")
+    else:
+        print(f"  [WARN] No database entry found")
+
+    print(f"\n" + "="*60)
+    if missing_fields or empty_fields:
+        print(f"[FAIL] Missing fields: {', '.join(missing_fields)}")
+        print(f"[FAIL] Empty fields: {', '.join(empty_fields)}")
+        if optional_missing:
+            print(f"[WARN] Optional missing: {', '.join(optional_missing)}")
+        return False
+    else:
+        print("[PASS] All required fields are populated")
+        if optional_missing:
+            print(f"[WARN] Optional missing: {', '.join(optional_missing)}")
+        return True
+
+
+async def test_lot_with_description():
+    """Test that a lot with description preserves it"""
+
+    print("\n" + "="*60)
+    print("TEST: Lot with description")
+    print("="*60)
+
+    # Use a lot known to have description
+    test_url = "https://www.troostwijkauctions.com/l/used-dometic-seastar-tfxchx8641p-top-mount-engine-control-liver-A1-39684-12"
+
+    config.OFFLINE = False
+
+    scraper = TroostwijkScraper()
+    scraper.offline = False
+
+    print(f"\n[1] Scraping: {test_url}")
+
+    from playwright.async_api import async_playwright
+    async with async_playwright() as p:
+        browser = await p.chromium.launch(headless=True)
+        context = await browser.new_context()
+        page = await context.new_page()
+
+        page_data = await scraper.crawl_page(page, test_url)
+
+        await browser.close()
+
+    if not page_data:
+        print("  [FAIL] No data returned")
+        return False
+
+    print(f"\n[2] Checking description...")
+    description = page_data.get('description', '')
+
+    if not description or description == '':
+        print(f"  [FAIL] Description is empty")
+        return False
+    else:
+        print(f"  [PASS] Description: {description[:100]}...")
+        return True
+
+
+async def main():
+    """Run all tests"""
+    print("\n" + "="*60)
+    print("MISSING FIELDS TEST SUITE")
+    print("="*60)
+
+    test1 = await test_lot_has_all_fields()
+    test2 = await test_lot_with_description()
+
+    print("\n" + "="*60)
+    if test1 and test2:
+        print("ALL TESTS PASSED")
+    else:
+        print("SOME TESTS FAILED")
+        if not test1:
+            print("  - test_lot_has_all_fields FAILED")
+        if not test2:
+            print("  - test_lot_with_description FAILED")
+    print("="*60 + "\n")
+
+    return 0 if (test1 and test2) else 1
+
+
+if __name__ == '__main__':
+    exit_code = asyncio.run(main())
+    sys.exit(exit_code)