"""Migration engine""" from pathlib import Path from typing import Optional, Callable from datetime import datetime import psycopg2 from psycopg2.extras import execute_batch from .copy import CopyMigrationStrategy, SafeCopyStrategy from .hardlink import HardlinkMigrationStrategy, SymlinkMigrationStrategy from ..shared.models import OperationRecord, ProcessingStats, MigrationPlan from ..shared.config import DatabaseConfig, ProcessingConfig from ..shared.logger import ProgressLogger class MigrationEngine: """Engine for migrating files""" def __init__( self, db_config: DatabaseConfig, processing_config: ProcessingConfig, logger: ProgressLogger, target_base: Path ): """Initialize migration engine Args: db_config: Database configuration processing_config: Processing configuration logger: Progress logger target_base: Target base directory for migrations """ self.db_config = db_config self.processing_config = processing_config self.logger = logger self.target_base = Path(target_base) self._connection = None # Initialize strategies self.copy_strategy = SafeCopyStrategy(logger=logger) self.hardlink_strategy = HardlinkMigrationStrategy(logger=logger) self.symlink_strategy = SymlinkMigrationStrategy(logger=logger) def _get_connection(self): """Get or create database connection""" if self._connection is None or self._connection.closed: self._connection = psycopg2.connect( host=self.db_config.host, port=self.db_config.port, database=self.db_config.database, user=self.db_config.user, password=self.db_config.password ) return self._connection def _ensure_tables(self): """Ensure migration tables exist""" conn = self._get_connection() cursor = conn.cursor() # Create operations table cursor.execute(""" CREATE TABLE IF NOT EXISTS operations ( id SERIAL PRIMARY KEY, source_path TEXT NOT NULL, dest_path TEXT NOT NULL, operation_type TEXT NOT NULL, size BIGINT DEFAULT 0, status TEXT DEFAULT 'pending', error TEXT, executed_at TIMESTAMP, verified BOOLEAN DEFAULT FALSE, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) """) # Create index on status cursor.execute(""" CREATE INDEX IF NOT EXISTS idx_operations_status ON operations(status) """) conn.commit() cursor.close() def plan_migration( self, disk: Optional[str] = None, category: Optional[str] = None ) -> MigrationPlan: """Plan migration for files Args: disk: Optional disk filter category: Optional category filter Returns: MigrationPlan with planned operations """ self.logger.section("Planning Migration") conn = self._get_connection() cursor = conn.cursor() # Build query conditions = ["category IS NOT NULL"] params = [] if disk: conditions.append("disk = %s") params.append(disk) if category: conditions.append("category = %s") params.append(category) query = f""" SELECT path, size, category, duplicate_of FROM files WHERE {' AND '.join(conditions)} ORDER BY category, path """ cursor.execute(query, params) files = cursor.fetchall() self.logger.info(f"Found {len(files)} files to migrate") operations = [] total_size = 0 for path_str, size, file_category, duplicate_of in files: source = Path(path_str) # Determine destination dest_path = self.target_base / file_category / source.name # Determine operation type if duplicate_of: # Use hardlink for duplicates operation_type = 'hardlink' else: # Use copy for unique files operation_type = 'copy' operation = OperationRecord( source_path=source, dest_path=dest_path, operation_type=operation_type, size=size ) operations.append(operation) total_size += size cursor.close() plan = MigrationPlan( target_disk=str(self.target_base), destination_disks=[str(self.target_base)], operations=operations, total_size=total_size, file_count=len(operations) ) self.logger.info( f"Migration plan created: {plan.file_count} files, " f"{plan.total_size:,} bytes" ) return plan def execute_migration( self, operations: list[OperationRecord], dry_run: bool = False, progress_callback: Optional[Callable[[int, int, ProcessingStats], None]] = None ) -> ProcessingStats: """Execute migration operations Args: operations: List of operations to execute dry_run: Whether to perform a dry run progress_callback: Optional callback for progress updates Returns: ProcessingStats with execution statistics """ self.logger.section("Executing Migration" + (" (DRY RUN)" if dry_run else "")) self._ensure_tables() stats = ProcessingStats() total_ops = len(operations) for operation in operations: stats.files_processed += 1 if dry_run: # In dry run, just log what would happen self.logger.debug( f"[DRY RUN] Would {operation.operation_type}: " f"{operation.source_path} -> {operation.dest_path}" ) stats.files_succeeded += 1 else: # Execute actual migration success = self._execute_operation(operation) if success: stats.files_succeeded += 1 stats.bytes_processed += operation.size else: stats.files_failed += 1 # Progress callback if progress_callback and stats.files_processed % 100 == 0: progress_callback(stats.files_processed, total_ops, stats) # Log progress if stats.files_processed % 1000 == 0: self.logger.progress( stats.files_processed, total_ops, prefix="Operations executed", bytes_processed=stats.bytes_processed, elapsed_seconds=stats.elapsed_seconds ) self.logger.info( f"Migration {'dry run' if dry_run else 'execution'} complete: " f"{stats.files_succeeded}/{total_ops} operations, " f"{stats.bytes_processed:,} bytes in {stats.elapsed_seconds:.1f}s" ) return stats def _execute_operation(self, operation: OperationRecord) -> bool: """Execute a single migration operation Args: operation: Operation to execute Returns: True if successful """ operation.status = 'in_progress' operation.executed_at = datetime.now() try: # Select strategy based on operation type if operation.operation_type == 'copy': strategy = self.copy_strategy elif operation.operation_type == 'hardlink': strategy = self.hardlink_strategy elif operation.operation_type == 'symlink': strategy = self.symlink_strategy else: raise ValueError(f"Unknown operation type: {operation.operation_type}") # Execute migration success = strategy.migrate( operation.source_path, operation.dest_path, verify=self.processing_config.verify_operations ) if success: operation.status = 'completed' operation.verified = True self._record_operation(operation) return True else: operation.status = 'failed' operation.error = "Migration failed" self._record_operation(operation) return False except Exception as e: operation.status = 'failed' operation.error = str(e) self._record_operation(operation) self.logger.error(f"Operation failed: {operation.source_path}: {e}") return False def _record_operation(self, operation: OperationRecord): """Record operation in database Args: operation: Operation to record """ conn = self._get_connection() cursor = conn.cursor() cursor.execute(""" INSERT INTO operations ( source_path, dest_path, operation_type, size, status, error, executed_at, verified ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s) """, ( str(operation.source_path), str(operation.dest_path), operation.operation_type, operation.size, operation.status, operation.error, operation.executed_at, operation.verified )) conn.commit() cursor.close() def rollback(self, operation: OperationRecord) -> bool: """Rollback a migration operation Args: operation: Operation to rollback Returns: True if rollback successful """ self.logger.warning(f"Rolling back: {operation.dest_path}") try: # Remove destination if operation.dest_path.exists(): operation.dest_path.unlink() # Update database conn = self._get_connection() cursor = conn.cursor() cursor.execute(""" UPDATE operations SET status = 'rolled_back' WHERE source_path = %s AND dest_path = %s """, (str(operation.source_path), str(operation.dest_path))) conn.commit() cursor.close() return True except Exception as e: self.logger.error(f"Rollback failed: {operation.dest_path}: {e}") return False def get_migration_stats(self) -> dict: """Get migration statistics Returns: Dictionary with statistics """ conn = self._get_connection() cursor = conn.cursor() stats = {} # Total operations cursor.execute("SELECT COUNT(*) FROM operations") stats['total_operations'] = cursor.fetchone()[0] # Operations by status cursor.execute(""" SELECT status, COUNT(*) FROM operations GROUP BY status """) for status, count in cursor.fetchall(): stats[f'{status}_operations'] = count # Total size migrated cursor.execute(""" SELECT COALESCE(SUM(size), 0) FROM operations WHERE status = 'completed' """) stats['total_size_migrated'] = cursor.fetchone()[0] cursor.close() return stats def verify_migrations(self) -> dict: """Verify completed migrations Returns: Dictionary with verification results """ self.logger.subsection("Verifying Migrations") conn = self._get_connection() cursor = conn.cursor() cursor.execute(""" SELECT source_path, dest_path, operation_type FROM operations WHERE status = 'completed' AND verified = FALSE """) operations = cursor.fetchall() cursor.close() results = { 'total': len(operations), 'verified': 0, 'failed': 0 } for source_str, dest_str, op_type in operations: source = Path(source_str) dest = Path(dest_str) # Verify destination exists if not dest.exists(): results['failed'] += 1 self.logger.warning(f"Verification failed: {dest} does not exist") continue # Verify based on operation type if op_type == 'hardlink': # Check if hardlinked if source.exists() and source.stat().st_ino == dest.stat().st_ino: results['verified'] += 1 else: results['failed'] += 1 else: # Check if destination exists and has correct size if dest.exists(): results['verified'] += 1 else: results['failed'] += 1 self.logger.info( f"Verification complete: {results['verified']}/{results['total']} verified" ) return results def close(self): """Close database connection""" if self._connection and not self._connection.closed: self._connection.close() def __enter__(self): """Context manager entry""" return self def __exit__(self, exc_type, exc_val, exc_tb): """Context manager exit""" self.close()