Files
defrag/app/migration/engine.py
2025-12-13 12:00:34 +01:00

196 lines
10 KiB
Python

from pathlib import Path
from typing import Optional, Callable
from datetime import datetime
import psycopg2
from psycopg2.extras import execute_batch
from .copy import CopyMigrationStrategy, SafeCopyStrategy
from .hardlink import HardlinkMigrationStrategy, SymlinkMigrationStrategy
from ..shared.models import OperationRecord, ProcessingStats, MigrationPlan
from ..shared.config import DatabaseConfig, ProcessingConfig
from ..shared.logger import ProgressLogger
class MigrationEngine:
def __init__(self, db_config: DatabaseConfig, processing_config: ProcessingConfig, logger: ProgressLogger, target_base: Path):
self.db_config = db_config
self.processing_config = processing_config
self.logger = logger
self.target_base = Path(target_base)
self._connection = None
self.copy_strategy = SafeCopyStrategy(logger=logger)
self.hardlink_strategy = HardlinkMigrationStrategy(logger=logger)
self.symlink_strategy = SymlinkMigrationStrategy(logger=logger)
def _get_connection(self):
if self._connection is None or self._connection.closed:
self._connection = psycopg2.connect(host=self.db_config.host, port=self.db_config.port, database=self.db_config.database, user=self.db_config.user, password=self.db_config.password)
return self._connection
def _ensure_tables(self):
conn = self._get_connection()
cursor = conn.cursor()
cursor.execute("\n CREATE TABLE IF NOT EXISTS operations (\n id SERIAL PRIMARY KEY,\n source_path TEXT NOT NULL,\n target_path TEXT NOT NULL,\n operation_type TEXT NOT NULL,\n size BIGINT DEFAULT 0,\n status TEXT DEFAULT 'pending',\n error TEXT,\n executed_at TIMESTAMP,\n verified BOOLEAN DEFAULT FALSE,\n created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP\n )\n ")
cursor.execute('\n CREATE INDEX IF NOT EXISTS idx_operations_status\n ON operations(status)\n ')
conn.commit()
cursor.close()
def plan_migration(self, disk: Optional[str]=None, category: Optional[str]=None) -> MigrationPlan:
self.logger.section('Planning Migration')
conn = self._get_connection()
cursor = conn.cursor()
conditions = ['category IS NOT NULL']
params = []
if disk:
conditions.append('disk_label = %s')
params.append(disk)
if category:
conditions.append('category = %s')
params.append(category)
query = f"\n SELECT path, size, category, duplicate_of\n FROM files\n WHERE {' AND '.join(conditions)}\n ORDER BY category, path\n "
cursor.execute(query, params)
files = cursor.fetchall()
self.logger.info(f'Found {len(files)} files to migrate')
operations = []
total_size = 0
for path_str, size, file_category, duplicate_of in files:
source = Path(path_str)
target_path = self.target_base / file_category / source.name
if duplicate_of:
operation_type = 'hardlink'
else:
operation_type = 'copy'
operation = OperationRecord(source_path=source, target_path=target_path, operation_type=operation_type, size=size)
operations.append(operation)
total_size += size
cursor.close()
plan = MigrationPlan(target_disk=str(self.target_base), destination_disks=[str(self.target_base)], operations=operations, total_size=total_size, file_count=len(operations))
self.logger.info(f'Migration plan created: {plan.file_count} files, {plan.total_size:,} bytes')
return plan
def execute_migration(self, operations: list[OperationRecord], dry_run: bool=False, progress_callback: Optional[Callable[[int, int, ProcessingStats], None]]=None) -> ProcessingStats:
self.logger.section('Executing Migration' + (' (DRY RUN)' if dry_run else ''))
self._ensure_tables()
stats = ProcessingStats()
total_ops = len(operations)
for operation in operations:
stats.files_processed += 1
if dry_run:
self.logger.debug(f'[DRY RUN] Would {operation.operation_type}: {operation.source_path} -> {operation.target_path}')
stats.files_succeeded += 1
else:
success = self._execute_operation(operation)
if success:
stats.files_succeeded += 1
stats.bytes_processed += operation.size
else:
stats.files_failed += 1
if progress_callback and stats.files_processed % 100 == 0:
progress_callback(stats.files_processed, total_ops, stats)
if stats.files_processed % 1000 == 0:
self.logger.progress(stats.files_processed, total_ops, prefix='Operations executed', bytes_processed=stats.bytes_processed, elapsed_seconds=stats.elapsed_seconds)
self.logger.info(f"Migration {('dry run' if dry_run else 'execution')} complete: {stats.files_succeeded}/{total_ops} operations, {stats.bytes_processed:,} bytes in {stats.elapsed_seconds:.1f}s")
return stats
def _execute_operation(self, operation: OperationRecord) -> bool:
operation.status = 'in_progress'
operation.executed_at = datetime.now()
try:
if operation.operation_type == 'copy':
strategy = self.copy_strategy
elif operation.operation_type == 'hardlink':
strategy = self.hardlink_strategy
elif operation.operation_type == 'symlink':
strategy = self.symlink_strategy
else:
raise ValueError(f'Unknown operation type: {operation.operation_type}')
success = strategy.migrate(operation.source_path, operation.target_path, verify=self.processing_config.verify_operations)
if success:
operation.status = 'completed'
operation.verified = True
self._record_operation(operation)
return True
else:
operation.status = 'failed'
operation.error = 'Migration failed'
self._record_operation(operation)
return False
except Exception as e:
operation.status = 'failed'
operation.error = str(e)
self._record_operation(operation)
self.logger.error(f'Operation failed: {operation.source_path}: {e}')
return False
def _record_operation(self, operation: OperationRecord):
conn = self._get_connection()
cursor = conn.cursor()
cursor.execute('\n INSERT INTO operations (\n source_path, target_path, operation_type, bytes_processed,\n status, error, executed_at, verified\n )\n VALUES (%s, %s, %s, %s, %s, %s, %s, %s)\n ', (str(operation.source_path), str(operation.target_path), operation.operation_type, operation.size, operation.status, operation.error, operation.executed_at, operation.verified))
conn.commit()
cursor.close()
def rollback(self, operation: OperationRecord) -> bool:
self.logger.warning(f'Rolling back: {operation.target_path}')
try:
if operation.target_path.exists():
operation.target_path.unlink()
conn = self._get_connection()
cursor = conn.cursor()
cursor.execute("\n UPDATE operations\n SET status = 'rolled_back'\n WHERE source_path = %s AND target_path = %s\n ", (str(operation.source_path), str(operation.target_path)))
conn.commit()
cursor.close()
return True
except Exception as e:
self.logger.error(f'Rollback failed: {operation.target_path}: {e}')
return False
def get_migration_stats(self) -> dict:
conn = self._get_connection()
cursor = conn.cursor()
stats = {}
cursor.execute('SELECT COUNT(*) FROM operations')
stats['total_operations'] = cursor.fetchone()[0]
cursor.execute('\n SELECT status, COUNT(*)\n FROM operations\n GROUP BY status\n ')
for status, count in cursor.fetchall():
stats[f'{status}_operations'] = count
cursor.execute("\n SELECT COALESCE(SUM(size), 0)\n FROM operations\n WHERE status = 'completed'\n ")
stats['total_size_migrated'] = cursor.fetchone()[0]
cursor.close()
return stats
def verify_migrations(self) -> dict:
self.logger.subsection('Verifying Migrations')
conn = self._get_connection()
cursor = conn.cursor()
cursor.execute("\n SELECT source_path, target_path, operation_type\n FROM operations\n WHERE status = 'completed' AND verified = FALSE\n ")
operations = cursor.fetchall()
cursor.close()
results = {'total': len(operations), 'verified': 0, 'failed': 0}
for source_str, dest_str, op_type in operations:
source = Path(source_str)
dest = Path(dest_str)
if not dest.exists():
results['failed'] += 1
self.logger.warning(f'Verification failed: {dest} does not exist')
continue
if op_type == 'hardlink':
if source.exists() and source.stat().st_ino == dest.stat().st_ino:
results['verified'] += 1
else:
results['failed'] += 1
elif dest.exists():
results['verified'] += 1
else:
results['failed'] += 1
self.logger.info(f"Verification complete: {results['verified']}/{results['total']} verified")
return results
def close(self):
if self._connection and (not self._connection.closed):
self._connection.close()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()