Files
scaev/search_cached_viewing.py
2025-12-07 01:59:45 +01:00

48 lines
1.4 KiB
Python

#!/usr/bin/env python3
"""Search cached pages for viewing/pickup text"""
import sqlite3
import zlib
import re
conn = sqlite3.connect('/mnt/okcomputer/output/cache.db')
cursor = conn.execute("""
SELECT url, content
FROM cache
WHERE url LIKE '%/l/%'
ORDER BY timestamp DESC
LIMIT 20
""")
for url, content_blob in cursor:
try:
content = zlib.decompress(content_blob).decode('utf-8')
# Look for viewing/pickup patterns
if 'bezichtig' in content.lower() or 'ophalen' in content.lower():
print(f"\n{'='*60}")
print(f"URL: {url}")
print(f"{'='*60}")
# Extract sections with context
patterns = [
(r'(Bezichtigingen?.*?(?:\n.*?){0,5})', 'VIEWING'),
(r'(Ophalen.*?(?:\n.*?){0,5})', 'PICKUP'),
]
for pattern, label in patterns:
matches = re.findall(pattern, content, re.IGNORECASE | re.DOTALL)
if matches:
print(f"\n{label}:")
for match in matches[:1]: # First match
# Clean up HTML
clean = re.sub(r'<[^>]+>', ' ', match)
clean = re.sub(r'\s+', ' ', clean).strip()
print(f" {clean[:200]}")
break # Found one, that's enough
except:
continue
conn.close()