48 lines
1.4 KiB
Python
48 lines
1.4 KiB
Python
#!/usr/bin/env python3
|
|
"""Search cached pages for viewing/pickup text"""
|
|
import sqlite3
|
|
import zlib
|
|
import re
|
|
|
|
conn = sqlite3.connect('/mnt/okcomputer/output/cache.db')
|
|
|
|
cursor = conn.execute("""
|
|
SELECT url, content
|
|
FROM cache
|
|
WHERE url LIKE '%/l/%'
|
|
ORDER BY timestamp DESC
|
|
LIMIT 20
|
|
""")
|
|
|
|
for url, content_blob in cursor:
|
|
try:
|
|
content = zlib.decompress(content_blob).decode('utf-8')
|
|
|
|
# Look for viewing/pickup patterns
|
|
if 'bezichtig' in content.lower() or 'ophalen' in content.lower():
|
|
print(f"\n{'='*60}")
|
|
print(f"URL: {url}")
|
|
print(f"{'='*60}")
|
|
|
|
# Extract sections with context
|
|
patterns = [
|
|
(r'(Bezichtigingen?.*?(?:\n.*?){0,5})', 'VIEWING'),
|
|
(r'(Ophalen.*?(?:\n.*?){0,5})', 'PICKUP'),
|
|
]
|
|
|
|
for pattern, label in patterns:
|
|
matches = re.findall(pattern, content, re.IGNORECASE | re.DOTALL)
|
|
if matches:
|
|
print(f"\n{label}:")
|
|
for match in matches[:1]: # First match
|
|
# Clean up HTML
|
|
clean = re.sub(r'<[^>]+>', ' ', match)
|
|
clean = re.sub(r'\s+', ' ', clean).strip()
|
|
print(f" {clean[:200]}")
|
|
|
|
break # Found one, that's enough
|
|
except:
|
|
continue
|
|
|
|
conn.close()
|