first
This commit is contained in:
139
script/migrate_compress_cache.py
Normal file
139
script/migrate_compress_cache.py
Normal file
@@ -0,0 +1,139 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Migrate uncompressed cache entries to compressed format
|
||||
This script compresses all cache entries where compressed=0
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import zlib
|
||||
import time
|
||||
|
||||
CACHE_DB = "/mnt/okcomputer/output/cache.db"
|
||||
|
||||
def migrate_cache():
|
||||
"""Compress all uncompressed cache entries"""
|
||||
|
||||
with sqlite3.connect(CACHE_DB) as conn:
|
||||
# Get uncompressed entries
|
||||
cursor = conn.execute(
|
||||
"SELECT url, content FROM cache WHERE compressed = 0 OR compressed IS NULL"
|
||||
)
|
||||
uncompressed = cursor.fetchall()
|
||||
|
||||
if not uncompressed:
|
||||
print("✓ No uncompressed entries found. All cache is already compressed!")
|
||||
return
|
||||
|
||||
print(f"Found {len(uncompressed)} uncompressed cache entries")
|
||||
print("Starting compression...")
|
||||
|
||||
total_original_size = 0
|
||||
total_compressed_size = 0
|
||||
compressed_count = 0
|
||||
|
||||
for url, content in uncompressed:
|
||||
try:
|
||||
# Handle both text and bytes
|
||||
if isinstance(content, str):
|
||||
content_bytes = content.encode('utf-8')
|
||||
else:
|
||||
content_bytes = content
|
||||
|
||||
original_size = len(content_bytes)
|
||||
|
||||
# Compress
|
||||
compressed_content = zlib.compress(content_bytes, level=9)
|
||||
compressed_size = len(compressed_content)
|
||||
|
||||
# Update in database
|
||||
conn.execute(
|
||||
"UPDATE cache SET content = ?, compressed = 1 WHERE url = ?",
|
||||
(compressed_content, url)
|
||||
)
|
||||
|
||||
total_original_size += original_size
|
||||
total_compressed_size += compressed_size
|
||||
compressed_count += 1
|
||||
|
||||
if compressed_count % 100 == 0:
|
||||
conn.commit()
|
||||
ratio = (1 - total_compressed_size / total_original_size) * 100
|
||||
print(f" Compressed {compressed_count}/{len(uncompressed)} entries... "
|
||||
f"({ratio:.1f}% reduction so far)")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ERROR compressing {url}: {e}")
|
||||
continue
|
||||
|
||||
# Final commit
|
||||
conn.commit()
|
||||
|
||||
# Calculate final statistics
|
||||
ratio = (1 - total_compressed_size / total_original_size) * 100 if total_original_size > 0 else 0
|
||||
size_saved_mb = (total_original_size - total_compressed_size) / (1024 * 1024)
|
||||
|
||||
print("\n" + "="*60)
|
||||
print("MIGRATION COMPLETE")
|
||||
print("="*60)
|
||||
print(f"Entries compressed: {compressed_count}")
|
||||
print(f"Original size: {total_original_size / (1024*1024):.2f} MB")
|
||||
print(f"Compressed size: {total_compressed_size / (1024*1024):.2f} MB")
|
||||
print(f"Space saved: {size_saved_mb:.2f} MB")
|
||||
print(f"Compression ratio: {ratio:.1f}%")
|
||||
print("="*60)
|
||||
|
||||
def verify_migration():
|
||||
"""Verify all entries are compressed"""
|
||||
with sqlite3.connect(CACHE_DB) as conn:
|
||||
cursor = conn.execute(
|
||||
"SELECT COUNT(*) FROM cache WHERE compressed = 0 OR compressed IS NULL"
|
||||
)
|
||||
uncompressed_count = cursor.fetchone()[0]
|
||||
|
||||
cursor = conn.execute("SELECT COUNT(*) FROM cache WHERE compressed = 1")
|
||||
compressed_count = cursor.fetchone()[0]
|
||||
|
||||
print("\nVERIFICATION:")
|
||||
print(f" Compressed entries: {compressed_count}")
|
||||
print(f" Uncompressed entries: {uncompressed_count}")
|
||||
|
||||
if uncompressed_count == 0:
|
||||
print(" ✓ All cache entries are compressed!")
|
||||
return True
|
||||
else:
|
||||
print(" ✗ Some entries are still uncompressed")
|
||||
return False
|
||||
|
||||
def get_db_size():
|
||||
"""Get current database file size"""
|
||||
import os
|
||||
if os.path.exists(CACHE_DB):
|
||||
size_mb = os.path.getsize(CACHE_DB) / (1024 * 1024)
|
||||
return size_mb
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Cache Compression Migration Tool")
|
||||
print("="*60)
|
||||
|
||||
# Show initial DB size
|
||||
initial_size = get_db_size()
|
||||
print(f"Initial database size: {initial_size:.2f} MB\n")
|
||||
|
||||
# Run migration
|
||||
start_time = time.time()
|
||||
migrate_cache()
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
print(f"\nTime taken: {elapsed:.2f} seconds")
|
||||
|
||||
# Verify
|
||||
verify_migration()
|
||||
|
||||
# Show final DB size
|
||||
final_size = get_db_size()
|
||||
print(f"\nFinal database size: {final_size:.2f} MB")
|
||||
print(f"Database size reduced by: {initial_size - final_size:.2f} MB")
|
||||
|
||||
print("\n✓ Migration complete! You can now run VACUUM to reclaim disk space:")
|
||||
print(" sqlite3 /mnt/okcomputer/output/cache.db 'VACUUM;'")
|
||||
Reference in New Issue
Block a user