#!/usr/bin/env python3 """Debug lot data structure from cached page""" import sqlite3 import zlib import json import re import sys sys.path.insert(0, 'src') from parse import DataParser conn = sqlite3.connect('/mnt/okcomputer/output/cache.db') # Get a recent lot page cursor = conn.execute(""" SELECT url, content FROM cache WHERE url LIKE '%/l/%' ORDER BY timestamp DESC LIMIT 1 """) row = cursor.fetchone() if not row: print("No lot pages found") exit(1) url, content_blob = row content = zlib.decompress(content_blob).decode('utf-8') parser = DataParser() result = parser.parse_page(content, url) if result: print(f"URL: {url}") print(f"\nParsed Data:") print(f" type: {result.get('type')}") print(f" lot_id: {result.get('lot_id')}") print(f" title: {result.get('title', '')[:50]}...") print(f" current_bid: {result.get('current_bid')}") print(f" bid_count: {result.get('bid_count')}") print(f" closing_time: {result.get('closing_time')}") print(f" location: {result.get('location')}") # Also dump the raw JSON match = re.search(r']*id="__NEXT_DATA__"[^>]*>(.+?)', content, re.DOTALL) if match: data = json.loads(match.group(1)) page_props = data.get('props', {}).get('pageProps', {}) if 'lot' in page_props: lot = page_props['lot'] print(f"\nRAW __NEXT_DATA__.lot keys: {list(lot.keys())}") print(f"\nSearching for bid/timing fields...") # Deep search for these fields def deep_search(obj, prefix=""): if isinstance(obj, dict): for k, v in obj.items(): if any(term in k.lower() for term in ['bid', 'end', 'close', 'date', 'time']): print(f" {prefix}{k}: {v}") if isinstance(v, (dict, list)): deep_search(v, prefix + k + ".") elif isinstance(obj, list) and len(obj) > 0: deep_search(obj[0], prefix + "[0].") deep_search(lot) conn.close()