Files
scaev/debug_lot_structure.py
2025-12-07 00:25:25 +01:00

70 lines
2.0 KiB
Python

#!/usr/bin/env python3
"""Debug lot data structure from cached page"""
import sqlite3
import zlib
import json
import re
import sys
sys.path.insert(0, 'src')
from parse import DataParser
conn = sqlite3.connect('/mnt/okcomputer/output/cache.db')
# Get a recent lot page
cursor = conn.execute("""
SELECT url, content
FROM cache
WHERE url LIKE '%/l/%'
ORDER BY timestamp DESC
LIMIT 1
""")
row = cursor.fetchone()
if not row:
print("No lot pages found")
exit(1)
url, content_blob = row
content = zlib.decompress(content_blob).decode('utf-8')
parser = DataParser()
result = parser.parse_page(content, url)
if result:
print(f"URL: {url}")
print(f"\nParsed Data:")
print(f" type: {result.get('type')}")
print(f" lot_id: {result.get('lot_id')}")
print(f" title: {result.get('title', '')[:50]}...")
print(f" current_bid: {result.get('current_bid')}")
print(f" bid_count: {result.get('bid_count')}")
print(f" closing_time: {result.get('closing_time')}")
print(f" location: {result.get('location')}")
# Also dump the raw JSON
match = re.search(r'<script[^>]*id="__NEXT_DATA__"[^>]*>(.+?)</script>', content, re.DOTALL)
if match:
data = json.loads(match.group(1))
page_props = data.get('props', {}).get('pageProps', {})
if 'lot' in page_props:
lot = page_props['lot']
print(f"\nRAW __NEXT_DATA__.lot keys: {list(lot.keys())}")
print(f"\nSearching for bid/timing fields...")
# Deep search for these fields
def deep_search(obj, prefix=""):
if isinstance(obj, dict):
for k, v in obj.items():
if any(term in k.lower() for term in ['bid', 'end', 'close', 'date', 'time']):
print(f" {prefix}{k}: {v}")
if isinstance(v, (dict, list)):
deep_search(v, prefix + k + ".")
elif isinstance(obj, list) and len(obj) > 0:
deep_search(obj[0], prefix + "[0].")
deep_search(lot)
conn.close()