196 lines
10 KiB
Python
196 lines
10 KiB
Python
from pathlib import Path
|
|
from typing import Optional, Callable
|
|
from datetime import datetime
|
|
import psycopg2
|
|
from psycopg2.extras import execute_batch
|
|
from .copy import CopyMigrationStrategy, SafeCopyStrategy
|
|
from .hardlink import HardlinkMigrationStrategy, SymlinkMigrationStrategy
|
|
from ..shared.models import OperationRecord, ProcessingStats, MigrationPlan
|
|
from ..shared.config import DatabaseConfig, ProcessingConfig
|
|
from ..shared.logger import ProgressLogger
|
|
|
|
class MigrationEngine:
|
|
|
|
def __init__(self, db_config: DatabaseConfig, processing_config: ProcessingConfig, logger: ProgressLogger, target_base: Path):
|
|
self.db_config = db_config
|
|
self.processing_config = processing_config
|
|
self.logger = logger
|
|
self.target_base = Path(target_base)
|
|
self._connection = None
|
|
self.copy_strategy = SafeCopyStrategy(logger=logger)
|
|
self.hardlink_strategy = HardlinkMigrationStrategy(logger=logger)
|
|
self.symlink_strategy = SymlinkMigrationStrategy(logger=logger)
|
|
|
|
def _get_connection(self):
|
|
if self._connection is None or self._connection.closed:
|
|
self._connection = psycopg2.connect(host=self.db_config.host, port=self.db_config.port, database=self.db_config.database, user=self.db_config.user, password=self.db_config.password)
|
|
return self._connection
|
|
|
|
def _ensure_tables(self):
|
|
conn = self._get_connection()
|
|
cursor = conn.cursor()
|
|
cursor.execute("\n CREATE TABLE IF NOT EXISTS operations (\n id SERIAL PRIMARY KEY,\n source_path TEXT NOT NULL,\n target_path TEXT NOT NULL,\n operation_type TEXT NOT NULL,\n size BIGINT DEFAULT 0,\n status TEXT DEFAULT 'pending',\n error TEXT,\n executed_at TIMESTAMP,\n verified BOOLEAN DEFAULT FALSE,\n created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP\n )\n ")
|
|
cursor.execute('\n CREATE INDEX IF NOT EXISTS idx_operations_status\n ON operations(status)\n ')
|
|
conn.commit()
|
|
cursor.close()
|
|
|
|
def plan_migration(self, disk: Optional[str]=None, category: Optional[str]=None) -> MigrationPlan:
|
|
self.logger.section('Planning Migration')
|
|
conn = self._get_connection()
|
|
cursor = conn.cursor()
|
|
conditions = ['category IS NOT NULL']
|
|
params = []
|
|
if disk:
|
|
conditions.append('disk_label = %s')
|
|
params.append(disk)
|
|
if category:
|
|
conditions.append('category = %s')
|
|
params.append(category)
|
|
query = f"\n SELECT path, size, category, duplicate_of\n FROM files\n WHERE {' AND '.join(conditions)}\n ORDER BY category, path\n "
|
|
cursor.execute(query, params)
|
|
files = cursor.fetchall()
|
|
self.logger.info(f'Found {len(files)} files to migrate')
|
|
operations = []
|
|
total_size = 0
|
|
for path_str, size, file_category, duplicate_of in files:
|
|
source = Path(path_str)
|
|
target_path = self.target_base / file_category / source.name
|
|
if duplicate_of:
|
|
operation_type = 'hardlink'
|
|
else:
|
|
operation_type = 'copy'
|
|
operation = OperationRecord(source_path=source, target_path=target_path, operation_type=operation_type, size=size)
|
|
operations.append(operation)
|
|
total_size += size
|
|
cursor.close()
|
|
plan = MigrationPlan(target_disk=str(self.target_base), destination_disks=[str(self.target_base)], operations=operations, total_size=total_size, file_count=len(operations))
|
|
self.logger.info(f'Migration plan created: {plan.file_count} files, {plan.total_size:,} bytes')
|
|
return plan
|
|
|
|
def execute_migration(self, operations: list[OperationRecord], dry_run: bool=False, progress_callback: Optional[Callable[[int, int, ProcessingStats], None]]=None) -> ProcessingStats:
|
|
self.logger.section('Executing Migration' + (' (DRY RUN)' if dry_run else ''))
|
|
self._ensure_tables()
|
|
stats = ProcessingStats()
|
|
total_ops = len(operations)
|
|
for operation in operations:
|
|
stats.files_processed += 1
|
|
if dry_run:
|
|
self.logger.debug(f'[DRY RUN] Would {operation.operation_type}: {operation.source_path} -> {operation.target_path}')
|
|
stats.files_succeeded += 1
|
|
else:
|
|
success = self._execute_operation(operation)
|
|
if success:
|
|
stats.files_succeeded += 1
|
|
stats.bytes_processed += operation.size
|
|
else:
|
|
stats.files_failed += 1
|
|
if progress_callback and stats.files_processed % 100 == 0:
|
|
progress_callback(stats.files_processed, total_ops, stats)
|
|
if stats.files_processed % 1000 == 0:
|
|
self.logger.progress(stats.files_processed, total_ops, prefix='Operations executed', bytes_processed=stats.bytes_processed, elapsed_seconds=stats.elapsed_seconds)
|
|
self.logger.info(f"Migration {('dry run' if dry_run else 'execution')} complete: {stats.files_succeeded}/{total_ops} operations, {stats.bytes_processed:,} bytes in {stats.elapsed_seconds:.1f}s")
|
|
return stats
|
|
|
|
def _execute_operation(self, operation: OperationRecord) -> bool:
|
|
operation.status = 'in_progress'
|
|
operation.executed_at = datetime.now()
|
|
try:
|
|
if operation.operation_type == 'copy':
|
|
strategy = self.copy_strategy
|
|
elif operation.operation_type == 'hardlink':
|
|
strategy = self.hardlink_strategy
|
|
elif operation.operation_type == 'symlink':
|
|
strategy = self.symlink_strategy
|
|
else:
|
|
raise ValueError(f'Unknown operation type: {operation.operation_type}')
|
|
success = strategy.migrate(operation.source_path, operation.target_path, verify=self.processing_config.verify_operations)
|
|
if success:
|
|
operation.status = 'completed'
|
|
operation.verified = True
|
|
self._record_operation(operation)
|
|
return True
|
|
else:
|
|
operation.status = 'failed'
|
|
operation.error = 'Migration failed'
|
|
self._record_operation(operation)
|
|
return False
|
|
except Exception as e:
|
|
operation.status = 'failed'
|
|
operation.error = str(e)
|
|
self._record_operation(operation)
|
|
self.logger.error(f'Operation failed: {operation.source_path}: {e}')
|
|
return False
|
|
|
|
def _record_operation(self, operation: OperationRecord):
|
|
conn = self._get_connection()
|
|
cursor = conn.cursor()
|
|
cursor.execute('\n INSERT INTO operations (\n source_path, target_path, operation_type, bytes_processed,\n status, error, executed_at, verified\n )\n VALUES (%s, %s, %s, %s, %s, %s, %s, %s)\n ', (str(operation.source_path), str(operation.target_path), operation.operation_type, operation.size, operation.status, operation.error, operation.executed_at, operation.verified))
|
|
conn.commit()
|
|
cursor.close()
|
|
|
|
def rollback(self, operation: OperationRecord) -> bool:
|
|
self.logger.warning(f'Rolling back: {operation.target_path}')
|
|
try:
|
|
if operation.target_path.exists():
|
|
operation.target_path.unlink()
|
|
conn = self._get_connection()
|
|
cursor = conn.cursor()
|
|
cursor.execute("\n UPDATE operations\n SET status = 'rolled_back'\n WHERE source_path = %s AND target_path = %s\n ", (str(operation.source_path), str(operation.target_path)))
|
|
conn.commit()
|
|
cursor.close()
|
|
return True
|
|
except Exception as e:
|
|
self.logger.error(f'Rollback failed: {operation.target_path}: {e}')
|
|
return False
|
|
|
|
def get_migration_stats(self) -> dict:
|
|
conn = self._get_connection()
|
|
cursor = conn.cursor()
|
|
stats = {}
|
|
cursor.execute('SELECT COUNT(*) FROM operations')
|
|
stats['total_operations'] = cursor.fetchone()[0]
|
|
cursor.execute('\n SELECT status, COUNT(*)\n FROM operations\n GROUP BY status\n ')
|
|
for status, count in cursor.fetchall():
|
|
stats[f'{status}_operations'] = count
|
|
cursor.execute("\n SELECT COALESCE(SUM(size), 0)\n FROM operations\n WHERE status = 'completed'\n ")
|
|
stats['total_size_migrated'] = cursor.fetchone()[0]
|
|
cursor.close()
|
|
return stats
|
|
|
|
def verify_migrations(self) -> dict:
|
|
self.logger.subsection('Verifying Migrations')
|
|
conn = self._get_connection()
|
|
cursor = conn.cursor()
|
|
cursor.execute("\n SELECT source_path, target_path, operation_type\n FROM operations\n WHERE status = 'completed' AND verified = FALSE\n ")
|
|
operations = cursor.fetchall()
|
|
cursor.close()
|
|
results = {'total': len(operations), 'verified': 0, 'failed': 0}
|
|
for source_str, dest_str, op_type in operations:
|
|
source = Path(source_str)
|
|
dest = Path(dest_str)
|
|
if not dest.exists():
|
|
results['failed'] += 1
|
|
self.logger.warning(f'Verification failed: {dest} does not exist')
|
|
continue
|
|
if op_type == 'hardlink':
|
|
if source.exists() and source.stat().st_ino == dest.stat().st_ino:
|
|
results['verified'] += 1
|
|
else:
|
|
results['failed'] += 1
|
|
elif dest.exists():
|
|
results['verified'] += 1
|
|
else:
|
|
results['failed'] += 1
|
|
self.logger.info(f"Verification complete: {results['verified']}/{results['total']} verified")
|
|
return results
|
|
|
|
def close(self):
|
|
if self._connection and (not self._connection.closed):
|
|
self._connection.close()
|
|
|
|
def __enter__(self):
|
|
return self
|
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
self.close()
|