455 lines
14 KiB
Python
455 lines
14 KiB
Python
"""Migration engine"""
|
|
from pathlib import Path
|
|
from typing import Optional, Callable
|
|
from datetime import datetime
|
|
import psycopg2
|
|
from psycopg2.extras import execute_batch
|
|
|
|
from .copy import CopyMigrationStrategy, SafeCopyStrategy
|
|
from .hardlink import HardlinkMigrationStrategy, SymlinkMigrationStrategy
|
|
from ..shared.models import OperationRecord, ProcessingStats, MigrationPlan
|
|
from ..shared.config import DatabaseConfig, ProcessingConfig
|
|
from ..shared.logger import ProgressLogger
|
|
|
|
|
|
class MigrationEngine:
|
|
"""Engine for migrating files"""
|
|
|
|
def __init__(
|
|
self,
|
|
db_config: DatabaseConfig,
|
|
processing_config: ProcessingConfig,
|
|
logger: ProgressLogger,
|
|
target_base: Path
|
|
):
|
|
"""Initialize migration engine
|
|
|
|
Args:
|
|
db_config: Database configuration
|
|
processing_config: Processing configuration
|
|
logger: Progress logger
|
|
target_base: Target base directory for migrations
|
|
"""
|
|
self.db_config = db_config
|
|
self.processing_config = processing_config
|
|
self.logger = logger
|
|
self.target_base = Path(target_base)
|
|
self._connection = None
|
|
|
|
# Initialize strategies
|
|
self.copy_strategy = SafeCopyStrategy(logger=logger)
|
|
self.hardlink_strategy = HardlinkMigrationStrategy(logger=logger)
|
|
self.symlink_strategy = SymlinkMigrationStrategy(logger=logger)
|
|
|
|
def _get_connection(self):
|
|
"""Get or create database connection"""
|
|
if self._connection is None or self._connection.closed:
|
|
self._connection = psycopg2.connect(
|
|
host=self.db_config.host,
|
|
port=self.db_config.port,
|
|
database=self.db_config.database,
|
|
user=self.db_config.user,
|
|
password=self.db_config.password
|
|
)
|
|
return self._connection
|
|
|
|
def _ensure_tables(self):
|
|
"""Ensure migration tables exist"""
|
|
conn = self._get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
# Create operations table
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS operations (
|
|
id SERIAL PRIMARY KEY,
|
|
source_path TEXT NOT NULL,
|
|
dest_path TEXT NOT NULL,
|
|
operation_type TEXT NOT NULL,
|
|
size BIGINT DEFAULT 0,
|
|
status TEXT DEFAULT 'pending',
|
|
error TEXT,
|
|
executed_at TIMESTAMP,
|
|
verified BOOLEAN DEFAULT FALSE,
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
""")
|
|
|
|
# Create index on status
|
|
cursor.execute("""
|
|
CREATE INDEX IF NOT EXISTS idx_operations_status
|
|
ON operations(status)
|
|
""")
|
|
|
|
conn.commit()
|
|
cursor.close()
|
|
|
|
def plan_migration(
|
|
self,
|
|
disk: Optional[str] = None,
|
|
category: Optional[str] = None
|
|
) -> MigrationPlan:
|
|
"""Plan migration for files
|
|
|
|
Args:
|
|
disk: Optional disk filter
|
|
category: Optional category filter
|
|
|
|
Returns:
|
|
MigrationPlan with planned operations
|
|
"""
|
|
self.logger.section("Planning Migration")
|
|
|
|
conn = self._get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
# Build query
|
|
conditions = ["category IS NOT NULL"]
|
|
params = []
|
|
|
|
if disk:
|
|
conditions.append("disk = %s")
|
|
params.append(disk)
|
|
|
|
if category:
|
|
conditions.append("category = %s")
|
|
params.append(category)
|
|
|
|
query = f"""
|
|
SELECT path, size, category, duplicate_of
|
|
FROM files
|
|
WHERE {' AND '.join(conditions)}
|
|
ORDER BY category, path
|
|
"""
|
|
|
|
cursor.execute(query, params)
|
|
files = cursor.fetchall()
|
|
|
|
self.logger.info(f"Found {len(files)} files to migrate")
|
|
|
|
operations = []
|
|
total_size = 0
|
|
|
|
for path_str, size, file_category, duplicate_of in files:
|
|
source = Path(path_str)
|
|
|
|
# Determine destination
|
|
dest_path = self.target_base / file_category / source.name
|
|
|
|
# Determine operation type
|
|
if duplicate_of:
|
|
# Use hardlink for duplicates
|
|
operation_type = 'hardlink'
|
|
else:
|
|
# Use copy for unique files
|
|
operation_type = 'copy'
|
|
|
|
operation = OperationRecord(
|
|
source_path=source,
|
|
dest_path=dest_path,
|
|
operation_type=operation_type,
|
|
size=size
|
|
)
|
|
|
|
operations.append(operation)
|
|
total_size += size
|
|
|
|
cursor.close()
|
|
|
|
plan = MigrationPlan(
|
|
target_disk=str(self.target_base),
|
|
destination_disks=[str(self.target_base)],
|
|
operations=operations,
|
|
total_size=total_size,
|
|
file_count=len(operations)
|
|
)
|
|
|
|
self.logger.info(
|
|
f"Migration plan created: {plan.file_count} files, "
|
|
f"{plan.total_size:,} bytes"
|
|
)
|
|
|
|
return plan
|
|
|
|
def execute_migration(
|
|
self,
|
|
operations: list[OperationRecord],
|
|
dry_run: bool = False,
|
|
progress_callback: Optional[Callable[[int, int, ProcessingStats], None]] = None
|
|
) -> ProcessingStats:
|
|
"""Execute migration operations
|
|
|
|
Args:
|
|
operations: List of operations to execute
|
|
dry_run: Whether to perform a dry run
|
|
progress_callback: Optional callback for progress updates
|
|
|
|
Returns:
|
|
ProcessingStats with execution statistics
|
|
"""
|
|
self.logger.section("Executing Migration" + (" (DRY RUN)" if dry_run else ""))
|
|
|
|
self._ensure_tables()
|
|
|
|
stats = ProcessingStats()
|
|
total_ops = len(operations)
|
|
|
|
for operation in operations:
|
|
stats.files_processed += 1
|
|
|
|
if dry_run:
|
|
# In dry run, just log what would happen
|
|
self.logger.debug(
|
|
f"[DRY RUN] Would {operation.operation_type}: "
|
|
f"{operation.source_path} -> {operation.dest_path}"
|
|
)
|
|
stats.files_succeeded += 1
|
|
else:
|
|
# Execute actual migration
|
|
success = self._execute_operation(operation)
|
|
|
|
if success:
|
|
stats.files_succeeded += 1
|
|
stats.bytes_processed += operation.size
|
|
else:
|
|
stats.files_failed += 1
|
|
|
|
# Progress callback
|
|
if progress_callback and stats.files_processed % 100 == 0:
|
|
progress_callback(stats.files_processed, total_ops, stats)
|
|
|
|
# Log progress
|
|
if stats.files_processed % 1000 == 0:
|
|
self.logger.progress(
|
|
stats.files_processed,
|
|
total_ops,
|
|
prefix="Operations executed",
|
|
bytes_processed=stats.bytes_processed,
|
|
elapsed_seconds=stats.elapsed_seconds
|
|
)
|
|
|
|
self.logger.info(
|
|
f"Migration {'dry run' if dry_run else 'execution'} complete: "
|
|
f"{stats.files_succeeded}/{total_ops} operations, "
|
|
f"{stats.bytes_processed:,} bytes in {stats.elapsed_seconds:.1f}s"
|
|
)
|
|
|
|
return stats
|
|
|
|
def _execute_operation(self, operation: OperationRecord) -> bool:
|
|
"""Execute a single migration operation
|
|
|
|
Args:
|
|
operation: Operation to execute
|
|
|
|
Returns:
|
|
True if successful
|
|
"""
|
|
operation.status = 'in_progress'
|
|
operation.executed_at = datetime.now()
|
|
|
|
try:
|
|
# Select strategy based on operation type
|
|
if operation.operation_type == 'copy':
|
|
strategy = self.copy_strategy
|
|
elif operation.operation_type == 'hardlink':
|
|
strategy = self.hardlink_strategy
|
|
elif operation.operation_type == 'symlink':
|
|
strategy = self.symlink_strategy
|
|
else:
|
|
raise ValueError(f"Unknown operation type: {operation.operation_type}")
|
|
|
|
# Execute migration
|
|
success = strategy.migrate(
|
|
operation.source_path,
|
|
operation.dest_path,
|
|
verify=self.processing_config.verify_operations
|
|
)
|
|
|
|
if success:
|
|
operation.status = 'completed'
|
|
operation.verified = True
|
|
self._record_operation(operation)
|
|
return True
|
|
else:
|
|
operation.status = 'failed'
|
|
operation.error = "Migration failed"
|
|
self._record_operation(operation)
|
|
return False
|
|
|
|
except Exception as e:
|
|
operation.status = 'failed'
|
|
operation.error = str(e)
|
|
self._record_operation(operation)
|
|
self.logger.error(f"Operation failed: {operation.source_path}: {e}")
|
|
return False
|
|
|
|
def _record_operation(self, operation: OperationRecord):
|
|
"""Record operation in database
|
|
|
|
Args:
|
|
operation: Operation to record
|
|
"""
|
|
conn = self._get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute("""
|
|
INSERT INTO operations (
|
|
source_path, dest_path, operation_type, size,
|
|
status, error, executed_at, verified
|
|
)
|
|
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
|
|
""", (
|
|
str(operation.source_path),
|
|
str(operation.dest_path),
|
|
operation.operation_type,
|
|
operation.size,
|
|
operation.status,
|
|
operation.error,
|
|
operation.executed_at,
|
|
operation.verified
|
|
))
|
|
|
|
conn.commit()
|
|
cursor.close()
|
|
|
|
def rollback(self, operation: OperationRecord) -> bool:
|
|
"""Rollback a migration operation
|
|
|
|
Args:
|
|
operation: Operation to rollback
|
|
|
|
Returns:
|
|
True if rollback successful
|
|
"""
|
|
self.logger.warning(f"Rolling back: {operation.dest_path}")
|
|
|
|
try:
|
|
# Remove destination
|
|
if operation.dest_path.exists():
|
|
operation.dest_path.unlink()
|
|
|
|
# Update database
|
|
conn = self._get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute("""
|
|
UPDATE operations
|
|
SET status = 'rolled_back'
|
|
WHERE source_path = %s AND dest_path = %s
|
|
""", (str(operation.source_path), str(operation.dest_path)))
|
|
|
|
conn.commit()
|
|
cursor.close()
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Rollback failed: {operation.dest_path}: {e}")
|
|
return False
|
|
|
|
def get_migration_stats(self) -> dict:
|
|
"""Get migration statistics
|
|
|
|
Returns:
|
|
Dictionary with statistics
|
|
"""
|
|
conn = self._get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
stats = {}
|
|
|
|
# Total operations
|
|
cursor.execute("SELECT COUNT(*) FROM operations")
|
|
stats['total_operations'] = cursor.fetchone()[0]
|
|
|
|
# Operations by status
|
|
cursor.execute("""
|
|
SELECT status, COUNT(*)
|
|
FROM operations
|
|
GROUP BY status
|
|
""")
|
|
|
|
for status, count in cursor.fetchall():
|
|
stats[f'{status}_operations'] = count
|
|
|
|
# Total size migrated
|
|
cursor.execute("""
|
|
SELECT COALESCE(SUM(size), 0)
|
|
FROM operations
|
|
WHERE status = 'completed'
|
|
""")
|
|
stats['total_size_migrated'] = cursor.fetchone()[0]
|
|
|
|
cursor.close()
|
|
|
|
return stats
|
|
|
|
def verify_migrations(self) -> dict:
|
|
"""Verify completed migrations
|
|
|
|
Returns:
|
|
Dictionary with verification results
|
|
"""
|
|
self.logger.subsection("Verifying Migrations")
|
|
|
|
conn = self._get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute("""
|
|
SELECT source_path, dest_path, operation_type
|
|
FROM operations
|
|
WHERE status = 'completed' AND verified = FALSE
|
|
""")
|
|
|
|
operations = cursor.fetchall()
|
|
cursor.close()
|
|
|
|
results = {
|
|
'total': len(operations),
|
|
'verified': 0,
|
|
'failed': 0
|
|
}
|
|
|
|
for source_str, dest_str, op_type in operations:
|
|
source = Path(source_str)
|
|
dest = Path(dest_str)
|
|
|
|
# Verify destination exists
|
|
if not dest.exists():
|
|
results['failed'] += 1
|
|
self.logger.warning(f"Verification failed: {dest} does not exist")
|
|
continue
|
|
|
|
# Verify based on operation type
|
|
if op_type == 'hardlink':
|
|
# Check if hardlinked
|
|
if source.exists() and source.stat().st_ino == dest.stat().st_ino:
|
|
results['verified'] += 1
|
|
else:
|
|
results['failed'] += 1
|
|
else:
|
|
# Check if destination exists and has correct size
|
|
if dest.exists():
|
|
results['verified'] += 1
|
|
else:
|
|
results['failed'] += 1
|
|
|
|
self.logger.info(
|
|
f"Verification complete: {results['verified']}/{results['total']} verified"
|
|
)
|
|
|
|
return results
|
|
|
|
def close(self):
|
|
"""Close database connection"""
|
|
if self._connection and not self._connection.closed:
|
|
self._connection.close()
|
|
|
|
def __enter__(self):
|
|
"""Context manager entry"""
|
|
return self
|
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
"""Context manager exit"""
|
|
self.close()
|