from pathlib import Path from typing import Optional, Callable from datetime import datetime import psycopg2 from psycopg2.extras import execute_batch from .copy import CopyMigrationStrategy, SafeCopyStrategy from .hardlink import HardlinkMigrationStrategy, SymlinkMigrationStrategy from ..shared.models import OperationRecord, ProcessingStats, MigrationPlan from ..shared.config import DatabaseConfig, ProcessingConfig from ..shared.logger import ProgressLogger class MigrationEngine: def __init__(self, db_config: DatabaseConfig, processing_config: ProcessingConfig, logger: ProgressLogger, target_base: Path): self.db_config = db_config self.processing_config = processing_config self.logger = logger self.target_base = Path(target_base) self._connection = None self.copy_strategy = SafeCopyStrategy(logger=logger) self.hardlink_strategy = HardlinkMigrationStrategy(logger=logger) self.symlink_strategy = SymlinkMigrationStrategy(logger=logger) def _get_connection(self): if self._connection is None or self._connection.closed: self._connection = psycopg2.connect(host=self.db_config.host, port=self.db_config.port, database=self.db_config.database, user=self.db_config.user, password=self.db_config.password) return self._connection def _ensure_tables(self): conn = self._get_connection() cursor = conn.cursor() cursor.execute("\n CREATE TABLE IF NOT EXISTS operations (\n id SERIAL PRIMARY KEY,\n source_path TEXT NOT NULL,\n target_path TEXT NOT NULL,\n operation_type TEXT NOT NULL,\n size BIGINT DEFAULT 0,\n status TEXT DEFAULT 'pending',\n error TEXT,\n executed_at TIMESTAMP,\n verified BOOLEAN DEFAULT FALSE,\n created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP\n )\n ") cursor.execute('\n CREATE INDEX IF NOT EXISTS idx_operations_status\n ON operations(status)\n ') conn.commit() cursor.close() def plan_migration(self, disk: Optional[str]=None, category: Optional[str]=None) -> MigrationPlan: self.logger.section('Planning Migration') conn = self._get_connection() cursor = conn.cursor() conditions = ['category IS NOT NULL'] params = [] if disk: conditions.append('disk_label = %s') params.append(disk) if category: conditions.append('category = %s') params.append(category) query = f"\n SELECT path, size, category, duplicate_of\n FROM files\n WHERE {' AND '.join(conditions)}\n ORDER BY category, path\n " cursor.execute(query, params) files = cursor.fetchall() self.logger.info(f'Found {len(files)} files to migrate') operations = [] total_size = 0 for path_str, size, file_category, duplicate_of in files: source = Path(path_str) target_path = self.target_base / file_category / source.name if duplicate_of: operation_type = 'hardlink' else: operation_type = 'copy' operation = OperationRecord(source_path=source, target_path=target_path, operation_type=operation_type, size=size) operations.append(operation) total_size += size cursor.close() plan = MigrationPlan(target_disk=str(self.target_base), destination_disks=[str(self.target_base)], operations=operations, total_size=total_size, file_count=len(operations)) self.logger.info(f'Migration plan created: {plan.file_count} files, {plan.total_size:,} bytes') return plan def execute_migration(self, operations: list[OperationRecord], dry_run: bool=False, progress_callback: Optional[Callable[[int, int, ProcessingStats], None]]=None) -> ProcessingStats: self.logger.section('Executing Migration' + (' (DRY RUN)' if dry_run else '')) self._ensure_tables() stats = ProcessingStats() total_ops = len(operations) for operation in operations: stats.files_processed += 1 if dry_run: self.logger.debug(f'[DRY RUN] Would {operation.operation_type}: {operation.source_path} -> {operation.target_path}') stats.files_succeeded += 1 else: success = self._execute_operation(operation) if success: stats.files_succeeded += 1 stats.bytes_processed += operation.size else: stats.files_failed += 1 if progress_callback and stats.files_processed % 100 == 0: progress_callback(stats.files_processed, total_ops, stats) if stats.files_processed % 1000 == 0: self.logger.progress(stats.files_processed, total_ops, prefix='Operations executed', bytes_processed=stats.bytes_processed, elapsed_seconds=stats.elapsed_seconds) self.logger.info(f"Migration {('dry run' if dry_run else 'execution')} complete: {stats.files_succeeded}/{total_ops} operations, {stats.bytes_processed:,} bytes in {stats.elapsed_seconds:.1f}s") return stats def _execute_operation(self, operation: OperationRecord) -> bool: operation.status = 'in_progress' operation.executed_at = datetime.now() try: if operation.operation_type == 'copy': strategy = self.copy_strategy elif operation.operation_type == 'hardlink': strategy = self.hardlink_strategy elif operation.operation_type == 'symlink': strategy = self.symlink_strategy else: raise ValueError(f'Unknown operation type: {operation.operation_type}') success = strategy.migrate(operation.source_path, operation.target_path, verify=self.processing_config.verify_operations) if success: operation.status = 'completed' operation.verified = True self._record_operation(operation) return True else: operation.status = 'failed' operation.error = 'Migration failed' self._record_operation(operation) return False except Exception as e: operation.status = 'failed' operation.error = str(e) self._record_operation(operation) self.logger.error(f'Operation failed: {operation.source_path}: {e}') return False def _record_operation(self, operation: OperationRecord): conn = self._get_connection() cursor = conn.cursor() cursor.execute('\n INSERT INTO operations (\n source_path, target_path, operation_type, bytes_processed,\n status, error, executed_at, verified\n )\n VALUES (%s, %s, %s, %s, %s, %s, %s, %s)\n ', (str(operation.source_path), str(operation.target_path), operation.operation_type, operation.size, operation.status, operation.error, operation.executed_at, operation.verified)) conn.commit() cursor.close() def rollback(self, operation: OperationRecord) -> bool: self.logger.warning(f'Rolling back: {operation.target_path}') try: if operation.target_path.exists(): operation.target_path.unlink() conn = self._get_connection() cursor = conn.cursor() cursor.execute("\n UPDATE operations\n SET status = 'rolled_back'\n WHERE source_path = %s AND target_path = %s\n ", (str(operation.source_path), str(operation.target_path))) conn.commit() cursor.close() return True except Exception as e: self.logger.error(f'Rollback failed: {operation.target_path}: {e}') return False def get_migration_stats(self) -> dict: conn = self._get_connection() cursor = conn.cursor() stats = {} cursor.execute('SELECT COUNT(*) FROM operations') stats['total_operations'] = cursor.fetchone()[0] cursor.execute('\n SELECT status, COUNT(*)\n FROM operations\n GROUP BY status\n ') for status, count in cursor.fetchall(): stats[f'{status}_operations'] = count cursor.execute("\n SELECT COALESCE(SUM(size), 0)\n FROM operations\n WHERE status = 'completed'\n ") stats['total_size_migrated'] = cursor.fetchone()[0] cursor.close() return stats def verify_migrations(self) -> dict: self.logger.subsection('Verifying Migrations') conn = self._get_connection() cursor = conn.cursor() cursor.execute("\n SELECT source_path, target_path, operation_type\n FROM operations\n WHERE status = 'completed' AND verified = FALSE\n ") operations = cursor.fetchall() cursor.close() results = {'total': len(operations), 'verified': 0, 'failed': 0} for source_str, dest_str, op_type in operations: source = Path(source_str) dest = Path(dest_str) if not dest.exists(): results['failed'] += 1 self.logger.warning(f'Verification failed: {dest} does not exist') continue if op_type == 'hardlink': if source.exists() and source.stat().st_ino == dest.stat().st_ino: results['verified'] += 1 else: results['failed'] += 1 elif dest.exists(): results['verified'] += 1 else: results['failed'] += 1 self.logger.info(f"Verification complete: {results['verified']}/{results['total']} verified") return results def close(self): if self._connection and (not self._connection.closed): self._connection.close() def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.close()