initial
This commit is contained in:
454
app/migration/engine.py
Normal file
454
app/migration/engine.py
Normal file
@@ -0,0 +1,454 @@
|
||||
"""Migration engine"""
|
||||
from pathlib import Path
|
||||
from typing import Optional, Callable
|
||||
from datetime import datetime
|
||||
import psycopg2
|
||||
from psycopg2.extras import execute_batch
|
||||
|
||||
from .copy import CopyMigrationStrategy, SafeCopyStrategy
|
||||
from .hardlink import HardlinkMigrationStrategy, SymlinkMigrationStrategy
|
||||
from ..shared.models import OperationRecord, ProcessingStats, MigrationPlan
|
||||
from ..shared.config import DatabaseConfig, ProcessingConfig
|
||||
from ..shared.logger import ProgressLogger
|
||||
|
||||
|
||||
class MigrationEngine:
|
||||
"""Engine for migrating files"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
db_config: DatabaseConfig,
|
||||
processing_config: ProcessingConfig,
|
||||
logger: ProgressLogger,
|
||||
target_base: Path
|
||||
):
|
||||
"""Initialize migration engine
|
||||
|
||||
Args:
|
||||
db_config: Database configuration
|
||||
processing_config: Processing configuration
|
||||
logger: Progress logger
|
||||
target_base: Target base directory for migrations
|
||||
"""
|
||||
self.db_config = db_config
|
||||
self.processing_config = processing_config
|
||||
self.logger = logger
|
||||
self.target_base = Path(target_base)
|
||||
self._connection = None
|
||||
|
||||
# Initialize strategies
|
||||
self.copy_strategy = SafeCopyStrategy(logger=logger)
|
||||
self.hardlink_strategy = HardlinkMigrationStrategy(logger=logger)
|
||||
self.symlink_strategy = SymlinkMigrationStrategy(logger=logger)
|
||||
|
||||
def _get_connection(self):
|
||||
"""Get or create database connection"""
|
||||
if self._connection is None or self._connection.closed:
|
||||
self._connection = psycopg2.connect(
|
||||
host=self.db_config.host,
|
||||
port=self.db_config.port,
|
||||
database=self.db_config.database,
|
||||
user=self.db_config.user,
|
||||
password=self.db_config.password
|
||||
)
|
||||
return self._connection
|
||||
|
||||
def _ensure_tables(self):
|
||||
"""Ensure migration tables exist"""
|
||||
conn = self._get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Create operations table
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS operations (
|
||||
id SERIAL PRIMARY KEY,
|
||||
source_path TEXT NOT NULL,
|
||||
target_path TEXT NOT NULL,
|
||||
operation_type TEXT NOT NULL,
|
||||
size BIGINT DEFAULT 0,
|
||||
status TEXT DEFAULT 'pending',
|
||||
error TEXT,
|
||||
executed_at TIMESTAMP,
|
||||
verified BOOLEAN DEFAULT FALSE,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""")
|
||||
|
||||
# Create index on status
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_operations_status
|
||||
ON operations(status)
|
||||
""")
|
||||
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
|
||||
def plan_migration(
|
||||
self,
|
||||
disk: Optional[str] = None,
|
||||
category: Optional[str] = None
|
||||
) -> MigrationPlan:
|
||||
"""Plan migration for files
|
||||
|
||||
Args:
|
||||
disk: Optional disk filter
|
||||
category: Optional category filter
|
||||
|
||||
Returns:
|
||||
MigrationPlan with planned operations
|
||||
"""
|
||||
self.logger.section("Planning Migration")
|
||||
|
||||
conn = self._get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Build query
|
||||
conditions = ["category IS NOT NULL"]
|
||||
params = []
|
||||
|
||||
if disk:
|
||||
conditions.append("disk_label = %s")
|
||||
params.append(disk)
|
||||
|
||||
if category:
|
||||
conditions.append("category = %s")
|
||||
params.append(category)
|
||||
|
||||
query = f"""
|
||||
SELECT path, size, category, duplicate_of
|
||||
FROM files
|
||||
WHERE {' AND '.join(conditions)}
|
||||
ORDER BY category, path
|
||||
"""
|
||||
|
||||
cursor.execute(query, params)
|
||||
files = cursor.fetchall()
|
||||
|
||||
self.logger.info(f"Found {len(files)} files to migrate")
|
||||
|
||||
operations = []
|
||||
total_size = 0
|
||||
|
||||
for path_str, size, file_category, duplicate_of in files:
|
||||
source = Path(path_str)
|
||||
|
||||
# Determine destination
|
||||
target_path = self.target_base / file_category / source.name
|
||||
|
||||
# Determine operation type
|
||||
if duplicate_of:
|
||||
# Use hardlink for duplicates
|
||||
operation_type = 'hardlink'
|
||||
else:
|
||||
# Use copy for unique files
|
||||
operation_type = 'copy'
|
||||
|
||||
operation = OperationRecord(
|
||||
source_path=source,
|
||||
target_path=target_path,
|
||||
operation_type=operation_type,
|
||||
size=size
|
||||
)
|
||||
|
||||
operations.append(operation)
|
||||
total_size += size
|
||||
|
||||
cursor.close()
|
||||
|
||||
plan = MigrationPlan(
|
||||
target_disk=str(self.target_base),
|
||||
destination_disks=[str(self.target_base)],
|
||||
operations=operations,
|
||||
total_size=total_size,
|
||||
file_count=len(operations)
|
||||
)
|
||||
|
||||
self.logger.info(
|
||||
f"Migration plan created: {plan.file_count} files, "
|
||||
f"{plan.total_size:,} bytes"
|
||||
)
|
||||
|
||||
return plan
|
||||
|
||||
def execute_migration(
|
||||
self,
|
||||
operations: list[OperationRecord],
|
||||
dry_run: bool = False,
|
||||
progress_callback: Optional[Callable[[int, int, ProcessingStats], None]] = None
|
||||
) -> ProcessingStats:
|
||||
"""Execute migration operations
|
||||
|
||||
Args:
|
||||
operations: List of operations to execute
|
||||
dry_run: Whether to perform a dry run
|
||||
progress_callback: Optional callback for progress updates
|
||||
|
||||
Returns:
|
||||
ProcessingStats with execution statistics
|
||||
"""
|
||||
self.logger.section("Executing Migration" + (" (DRY RUN)" if dry_run else ""))
|
||||
|
||||
self._ensure_tables()
|
||||
|
||||
stats = ProcessingStats()
|
||||
total_ops = len(operations)
|
||||
|
||||
for operation in operations:
|
||||
stats.files_processed += 1
|
||||
|
||||
if dry_run:
|
||||
# In dry run, just log what would happen
|
||||
self.logger.debug(
|
||||
f"[DRY RUN] Would {operation.operation_type}: "
|
||||
f"{operation.source_path} -> {operation.target_path}"
|
||||
)
|
||||
stats.files_succeeded += 1
|
||||
else:
|
||||
# Execute actual migration
|
||||
success = self._execute_operation(operation)
|
||||
|
||||
if success:
|
||||
stats.files_succeeded += 1
|
||||
stats.bytes_processed += operation.size
|
||||
else:
|
||||
stats.files_failed += 1
|
||||
|
||||
# Progress callback
|
||||
if progress_callback and stats.files_processed % 100 == 0:
|
||||
progress_callback(stats.files_processed, total_ops, stats)
|
||||
|
||||
# Log progress
|
||||
if stats.files_processed % 1000 == 0:
|
||||
self.logger.progress(
|
||||
stats.files_processed,
|
||||
total_ops,
|
||||
prefix="Operations executed",
|
||||
bytes_processed=stats.bytes_processed,
|
||||
elapsed_seconds=stats.elapsed_seconds
|
||||
)
|
||||
|
||||
self.logger.info(
|
||||
f"Migration {'dry run' if dry_run else 'execution'} complete: "
|
||||
f"{stats.files_succeeded}/{total_ops} operations, "
|
||||
f"{stats.bytes_processed:,} bytes in {stats.elapsed_seconds:.1f}s"
|
||||
)
|
||||
|
||||
return stats
|
||||
|
||||
def _execute_operation(self, operation: OperationRecord) -> bool:
|
||||
"""Execute a single migration operation
|
||||
|
||||
Args:
|
||||
operation: Operation to execute
|
||||
|
||||
Returns:
|
||||
True if successful
|
||||
"""
|
||||
operation.status = 'in_progress'
|
||||
operation.executed_at = datetime.now()
|
||||
|
||||
try:
|
||||
# Select strategy based on operation type
|
||||
if operation.operation_type == 'copy':
|
||||
strategy = self.copy_strategy
|
||||
elif operation.operation_type == 'hardlink':
|
||||
strategy = self.hardlink_strategy
|
||||
elif operation.operation_type == 'symlink':
|
||||
strategy = self.symlink_strategy
|
||||
else:
|
||||
raise ValueError(f"Unknown operation type: {operation.operation_type}")
|
||||
|
||||
# Execute migration
|
||||
success = strategy.migrate(
|
||||
operation.source_path,
|
||||
operation.target_path,
|
||||
verify=self.processing_config.verify_operations
|
||||
)
|
||||
|
||||
if success:
|
||||
operation.status = 'completed'
|
||||
operation.verified = True
|
||||
self._record_operation(operation)
|
||||
return True
|
||||
else:
|
||||
operation.status = 'failed'
|
||||
operation.error = "Migration failed"
|
||||
self._record_operation(operation)
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
operation.status = 'failed'
|
||||
operation.error = str(e)
|
||||
self._record_operation(operation)
|
||||
self.logger.error(f"Operation failed: {operation.source_path}: {e}")
|
||||
return False
|
||||
|
||||
def _record_operation(self, operation: OperationRecord):
|
||||
"""Record operation in database
|
||||
|
||||
Args:
|
||||
operation: Operation to record
|
||||
"""
|
||||
conn = self._get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute("""
|
||||
INSERT INTO operations (
|
||||
source_path, target_path, operation_type, bytes_processed,
|
||||
status, error, executed_at, verified
|
||||
)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
|
||||
""", (
|
||||
str(operation.source_path),
|
||||
str(operation.target_path),
|
||||
operation.operation_type,
|
||||
operation.size,
|
||||
operation.status,
|
||||
operation.error,
|
||||
operation.executed_at,
|
||||
operation.verified
|
||||
))
|
||||
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
|
||||
def rollback(self, operation: OperationRecord) -> bool:
|
||||
"""Rollback a migration operation
|
||||
|
||||
Args:
|
||||
operation: Operation to rollback
|
||||
|
||||
Returns:
|
||||
True if rollback successful
|
||||
"""
|
||||
self.logger.warning(f"Rolling back: {operation.target_path}")
|
||||
|
||||
try:
|
||||
# Remove destination
|
||||
if operation.target_path.exists():
|
||||
operation.target_path.unlink()
|
||||
|
||||
# Update database
|
||||
conn = self._get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute("""
|
||||
UPDATE operations
|
||||
SET status = 'rolled_back'
|
||||
WHERE source_path = %s AND target_path = %s
|
||||
""", (str(operation.source_path), str(operation.target_path)))
|
||||
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Rollback failed: {operation.target_path}: {e}")
|
||||
return False
|
||||
|
||||
def get_migration_stats(self) -> dict:
|
||||
"""Get migration statistics
|
||||
|
||||
Returns:
|
||||
Dictionary with statistics
|
||||
"""
|
||||
conn = self._get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
stats = {}
|
||||
|
||||
# Total operations
|
||||
cursor.execute("SELECT COUNT(*) FROM operations")
|
||||
stats['total_operations'] = cursor.fetchone()[0]
|
||||
|
||||
# Operations by status
|
||||
cursor.execute("""
|
||||
SELECT status, COUNT(*)
|
||||
FROM operations
|
||||
GROUP BY status
|
||||
""")
|
||||
|
||||
for status, count in cursor.fetchall():
|
||||
stats[f'{status}_operations'] = count
|
||||
|
||||
# Total size migrated
|
||||
cursor.execute("""
|
||||
SELECT COALESCE(SUM(size), 0)
|
||||
FROM operations
|
||||
WHERE status = 'completed'
|
||||
""")
|
||||
stats['total_size_migrated'] = cursor.fetchone()[0]
|
||||
|
||||
cursor.close()
|
||||
|
||||
return stats
|
||||
|
||||
def verify_migrations(self) -> dict:
|
||||
"""Verify completed migrations
|
||||
|
||||
Returns:
|
||||
Dictionary with verification results
|
||||
"""
|
||||
self.logger.subsection("Verifying Migrations")
|
||||
|
||||
conn = self._get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute("""
|
||||
SELECT source_path, target_path, operation_type
|
||||
FROM operations
|
||||
WHERE status = 'completed' AND verified = FALSE
|
||||
""")
|
||||
|
||||
operations = cursor.fetchall()
|
||||
cursor.close()
|
||||
|
||||
results = {
|
||||
'total': len(operations),
|
||||
'verified': 0,
|
||||
'failed': 0
|
||||
}
|
||||
|
||||
for source_str, dest_str, op_type in operations:
|
||||
source = Path(source_str)
|
||||
dest = Path(dest_str)
|
||||
|
||||
# Verify destination exists
|
||||
if not dest.exists():
|
||||
results['failed'] += 1
|
||||
self.logger.warning(f"Verification failed: {dest} does not exist")
|
||||
continue
|
||||
|
||||
# Verify based on operation type
|
||||
if op_type == 'hardlink':
|
||||
# Check if hardlinked
|
||||
if source.exists() and source.stat().st_ino == dest.stat().st_ino:
|
||||
results['verified'] += 1
|
||||
else:
|
||||
results['failed'] += 1
|
||||
else:
|
||||
# Check if destination exists and has correct size
|
||||
if dest.exists():
|
||||
results['verified'] += 1
|
||||
else:
|
||||
results['failed'] += 1
|
||||
|
||||
self.logger.info(
|
||||
f"Verification complete: {results['verified']}/{results['total']} verified"
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
def close(self):
|
||||
"""Close database connection"""
|
||||
if self._connection and not self._connection.closed:
|
||||
self._connection.close()
|
||||
|
||||
def __enter__(self):
|
||||
"""Context manager entry"""
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Context manager exit"""
|
||||
self.close()
|
||||
Reference in New Issue
Block a user