#!/usr/bin/env python3 """ Migrate uncompressed cache entries to compressed format This script compresses all cache entries where compressed=0 """ import sqlite3 import zlib import time CACHE_DB = "/mnt/okcomputer/output/cache.db" def migrate_cache(): """Compress all uncompressed cache entries""" with sqlite3.connect(CACHE_DB) as conn: # Get uncompressed entries cursor = conn.execute( "SELECT url, content FROM cache WHERE compressed = 0 OR compressed IS NULL" ) uncompressed = cursor.fetchall() if not uncompressed: print("āœ“ No uncompressed entries found. All cache is already compressed!") return print(f"Found {len(uncompressed)} uncompressed cache entries") print("Starting compression...") total_original_size = 0 total_compressed_size = 0 compressed_count = 0 for url, content in uncompressed: try: # Handle both text and bytes if isinstance(content, str): content_bytes = content.encode('utf-8') else: content_bytes = content original_size = len(content_bytes) # Compress compressed_content = zlib.compress(content_bytes, level=9) compressed_size = len(compressed_content) # Update in database conn.execute( "UPDATE cache SET content = ?, compressed = 1 WHERE url = ?", (compressed_content, url) ) total_original_size += original_size total_compressed_size += compressed_size compressed_count += 1 if compressed_count % 100 == 0: conn.commit() ratio = (1 - total_compressed_size / total_original_size) * 100 print(f" Compressed {compressed_count}/{len(uncompressed)} entries... " f"({ratio:.1f}% reduction so far)") except Exception as e: print(f" ERROR compressing {url}: {e}") continue # Final commit conn.commit() # Calculate final statistics ratio = (1 - total_compressed_size / total_original_size) * 100 if total_original_size > 0 else 0 size_saved_mb = (total_original_size - total_compressed_size) / (1024 * 1024) print("\n" + "="*60) print("MIGRATION COMPLETE") print("="*60) print(f"Entries compressed: {compressed_count}") print(f"Original size: {total_original_size / (1024*1024):.2f} MB") print(f"Compressed size: {total_compressed_size / (1024*1024):.2f} MB") print(f"Space saved: {size_saved_mb:.2f} MB") print(f"Compression ratio: {ratio:.1f}%") print("="*60) def verify_migration(): """Verify all entries are compressed""" with sqlite3.connect(CACHE_DB) as conn: cursor = conn.execute( "SELECT COUNT(*) FROM cache WHERE compressed = 0 OR compressed IS NULL" ) uncompressed_count = cursor.fetchone()[0] cursor = conn.execute("SELECT COUNT(*) FROM cache WHERE compressed = 1") compressed_count = cursor.fetchone()[0] print("\nVERIFICATION:") print(f" Compressed entries: {compressed_count}") print(f" Uncompressed entries: {uncompressed_count}") if uncompressed_count == 0: print(" āœ“ All cache entries are compressed!") return True else: print(" āœ— Some entries are still uncompressed") return False def get_db_size(): """Get current database file size""" import os if os.path.exists(CACHE_DB): size_mb = os.path.getsize(CACHE_DB) / (1024 * 1024) return size_mb return 0 if __name__ == "__main__": print("Cache Compression Migration Tool") print("="*60) # Show initial DB size initial_size = get_db_size() print(f"Initial database size: {initial_size:.2f} MB\n") # Run migration start_time = time.time() migrate_cache() elapsed = time.time() - start_time print(f"\nTime taken: {elapsed:.2f} seconds") # Verify verify_migration() # Show final DB size final_size = get_db_size() print(f"\nFinal database size: {final_size:.2f} MB") print(f"Database size reduced by: {initial_size - final_size:.2f} MB") print("\nāœ“ Migration complete! You can now run VACUUM to reclaim disk space:") print(" sqlite3 /mnt/okcomputer/output/cache.db 'VACUUM;'")