initial
This commit is contained in:
27
app/migration/__init__.py
Normal file
27
app/migration/__init__.py
Normal file
@@ -0,0 +1,27 @@
|
||||
"""Migration package exports"""
|
||||
from .copy import (
|
||||
CopyMigrationStrategy,
|
||||
FastCopyStrategy,
|
||||
SafeCopyStrategy,
|
||||
ReferenceCopyStrategy
|
||||
)
|
||||
from .hardlink import (
|
||||
HardlinkMigrationStrategy,
|
||||
SymlinkMigrationStrategy,
|
||||
DedupHardlinkStrategy
|
||||
)
|
||||
from .engine import MigrationEngine
|
||||
from ._protocols import IMigrationStrategy, IMigrationEngine
|
||||
|
||||
__all__ = [
|
||||
'CopyMigrationStrategy',
|
||||
'FastCopyStrategy',
|
||||
'SafeCopyStrategy',
|
||||
'ReferenceCopyStrategy',
|
||||
'HardlinkMigrationStrategy',
|
||||
'SymlinkMigrationStrategy',
|
||||
'DedupHardlinkStrategy',
|
||||
'MigrationEngine',
|
||||
'IMigrationStrategy',
|
||||
'IMigrationEngine',
|
||||
]
|
||||
107
app/migration/_protocols.py
Normal file
107
app/migration/_protocols.py
Normal file
@@ -0,0 +1,107 @@
|
||||
"""Protocol definitions for the migration package"""
|
||||
from typing import Protocol
|
||||
from pathlib import Path
|
||||
from ..shared.models import OperationRecord
|
||||
|
||||
|
||||
class IMigrationStrategy(Protocol):
|
||||
"""Protocol for migration strategies"""
|
||||
|
||||
def migrate(
|
||||
self,
|
||||
source: Path,
|
||||
destination: Path,
|
||||
verify: bool = True
|
||||
) -> bool:
|
||||
"""Migrate a file from source to destination
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
destination: Destination file path
|
||||
verify: Whether to verify the operation
|
||||
|
||||
Returns:
|
||||
True if migration successful
|
||||
"""
|
||||
...
|
||||
|
||||
def can_migrate(self, source: Path, destination: Path) -> bool:
|
||||
"""Check if migration is possible
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
destination: Destination file path
|
||||
|
||||
Returns:
|
||||
True if migration is possible
|
||||
"""
|
||||
...
|
||||
|
||||
def estimate_time(self, source: Path) -> float:
|
||||
"""Estimate migration time in seconds
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
|
||||
Returns:
|
||||
Estimated time in seconds
|
||||
"""
|
||||
...
|
||||
|
||||
def cleanup(self, source: Path) -> bool:
|
||||
"""Cleanup source file after successful migration
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
|
||||
Returns:
|
||||
True if cleanup successful
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
class IMigrationEngine(Protocol):
|
||||
"""Protocol for migration engine"""
|
||||
|
||||
def plan_migration(
|
||||
self,
|
||||
disk: str,
|
||||
target_base: Path
|
||||
) -> list[OperationRecord]:
|
||||
"""Plan migration for a disk
|
||||
|
||||
Args:
|
||||
disk: Disk identifier
|
||||
target_base: Target base directory
|
||||
|
||||
Returns:
|
||||
List of planned operations
|
||||
"""
|
||||
...
|
||||
|
||||
def execute_migration(
|
||||
self,
|
||||
operations: list[OperationRecord],
|
||||
dry_run: bool = False
|
||||
) -> dict:
|
||||
"""Execute migration operations
|
||||
|
||||
Args:
|
||||
operations: List of operations to execute
|
||||
dry_run: Whether to perform a dry run
|
||||
|
||||
Returns:
|
||||
Dictionary with execution statistics
|
||||
"""
|
||||
...
|
||||
|
||||
def rollback(self, operation: OperationRecord) -> bool:
|
||||
"""Rollback a migration operation
|
||||
|
||||
Args:
|
||||
operation: Operation to rollback
|
||||
|
||||
Returns:
|
||||
True if rollback successful
|
||||
"""
|
||||
...
|
||||
268
app/migration/copy.py
Normal file
268
app/migration/copy.py
Normal file
@@ -0,0 +1,268 @@
|
||||
"""Copy-based migration strategy"""
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
import os
|
||||
|
||||
from ..shared.logger import ProgressLogger
|
||||
|
||||
|
||||
class CopyMigrationStrategy:
|
||||
"""Copy files to destination with verification"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
logger: Optional[ProgressLogger] = None,
|
||||
preserve_metadata: bool = True,
|
||||
verify_checksums: bool = True
|
||||
):
|
||||
"""Initialize copy migration strategy
|
||||
|
||||
Args:
|
||||
logger: Optional progress logger
|
||||
preserve_metadata: Whether to preserve file metadata
|
||||
verify_checksums: Whether to verify checksums after copy
|
||||
"""
|
||||
self.logger = logger
|
||||
self.preserve_metadata = preserve_metadata
|
||||
self.verify_checksums = verify_checksums
|
||||
|
||||
def migrate(
|
||||
self,
|
||||
source: Path,
|
||||
destination: Path,
|
||||
verify: bool = True
|
||||
) -> bool:
|
||||
"""Migrate file by copying
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
destination: Destination file path
|
||||
verify: Whether to verify the operation
|
||||
|
||||
Returns:
|
||||
True if migration successful
|
||||
"""
|
||||
if not source.exists():
|
||||
if self.logger:
|
||||
self.logger.error(f"Source file does not exist: {source}")
|
||||
return False
|
||||
|
||||
# Create destination directory
|
||||
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
# Copy file
|
||||
if self.preserve_metadata:
|
||||
shutil.copy2(source, destination)
|
||||
else:
|
||||
shutil.copy(source, destination)
|
||||
|
||||
# Verify if requested
|
||||
if verify and self.verify_checksums:
|
||||
if not self._verify_copy(source, destination):
|
||||
if self.logger:
|
||||
self.logger.error(f"Verification failed: {source} -> {destination}")
|
||||
destination.unlink()
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
if self.logger:
|
||||
self.logger.error(f"Copy failed: {source} -> {destination}: {e}")
|
||||
return False
|
||||
|
||||
def _verify_copy(self, source: Path, destination: Path) -> bool:
|
||||
"""Verify copied file
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
destination: Destination file path
|
||||
|
||||
Returns:
|
||||
True if verification successful
|
||||
"""
|
||||
# Check size
|
||||
source_size = source.stat().st_size
|
||||
dest_size = destination.stat().st_size
|
||||
|
||||
if source_size != dest_size:
|
||||
return False
|
||||
|
||||
# Compare checksums for files larger than 1MB
|
||||
if source_size > 1024 * 1024:
|
||||
from ..deduplication.chunker import hash_file
|
||||
|
||||
source_hash = hash_file(source)
|
||||
dest_hash = hash_file(destination)
|
||||
|
||||
return source_hash == dest_hash
|
||||
|
||||
# For small files, compare content directly
|
||||
with open(source, 'rb') as f1, open(destination, 'rb') as f2:
|
||||
return f1.read() == f2.read()
|
||||
|
||||
def can_migrate(self, source: Path, destination: Path) -> bool:
|
||||
"""Check if migration is possible
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
destination: Destination file path
|
||||
|
||||
Returns:
|
||||
True if migration is possible
|
||||
"""
|
||||
if not source.exists():
|
||||
return False
|
||||
|
||||
# Check if destination directory is writable
|
||||
dest_dir = destination.parent
|
||||
if dest_dir.exists():
|
||||
return os.access(dest_dir, os.W_OK)
|
||||
|
||||
# Check if parent directory exists and is writable
|
||||
parent = dest_dir.parent
|
||||
while not parent.exists() and parent != parent.parent:
|
||||
parent = parent.parent
|
||||
|
||||
return parent.exists() and os.access(parent, os.W_OK)
|
||||
|
||||
def estimate_time(self, source: Path) -> float:
|
||||
"""Estimate migration time in seconds
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
|
||||
Returns:
|
||||
Estimated time in seconds
|
||||
"""
|
||||
if not source.exists():
|
||||
return 0.0
|
||||
|
||||
size = source.stat().st_size
|
||||
|
||||
# Estimate based on typical copy speed (100 MB/s)
|
||||
typical_speed = 100 * 1024 * 1024 # bytes per second
|
||||
return size / typical_speed
|
||||
|
||||
def cleanup(self, source: Path) -> bool:
|
||||
"""Cleanup source file after successful migration
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
|
||||
Returns:
|
||||
True if cleanup successful
|
||||
"""
|
||||
try:
|
||||
if source.exists():
|
||||
source.unlink()
|
||||
return True
|
||||
except Exception as e:
|
||||
if self.logger:
|
||||
self.logger.warning(f"Failed to cleanup {source}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
class FastCopyStrategy(CopyMigrationStrategy):
|
||||
"""Fast copy strategy without verification"""
|
||||
|
||||
def __init__(self, logger: Optional[ProgressLogger] = None):
|
||||
"""Initialize fast copy strategy"""
|
||||
super().__init__(
|
||||
logger=logger,
|
||||
preserve_metadata=True,
|
||||
verify_checksums=False
|
||||
)
|
||||
|
||||
|
||||
class SafeCopyStrategy(CopyMigrationStrategy):
|
||||
"""Safe copy strategy with full verification"""
|
||||
|
||||
def __init__(self, logger: Optional[ProgressLogger] = None):
|
||||
"""Initialize safe copy strategy"""
|
||||
super().__init__(
|
||||
logger=logger,
|
||||
preserve_metadata=True,
|
||||
verify_checksums=True
|
||||
)
|
||||
|
||||
|
||||
class ReferenceCopyStrategy:
|
||||
"""Create reference copy using reflinks (CoW) if supported"""
|
||||
|
||||
def __init__(self, logger: Optional[ProgressLogger] = None):
|
||||
"""Initialize reflink copy strategy"""
|
||||
self.logger = logger
|
||||
|
||||
def migrate(
|
||||
self,
|
||||
source: Path,
|
||||
destination: Path,
|
||||
verify: bool = True
|
||||
) -> bool:
|
||||
"""Migrate using reflink (copy-on-write)
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
destination: Destination file path
|
||||
verify: Whether to verify the operation
|
||||
|
||||
Returns:
|
||||
True if migration successful
|
||||
"""
|
||||
if not source.exists():
|
||||
if self.logger:
|
||||
self.logger.error(f"Source file does not exist: {source}")
|
||||
return False
|
||||
|
||||
# Create destination directory
|
||||
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
# Try reflink copy (works on btrfs, xfs, etc.)
|
||||
import subprocess
|
||||
|
||||
result = subprocess.run(
|
||||
['cp', '--reflink=auto', str(source), str(destination)],
|
||||
capture_output=True,
|
||||
check=False
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
# Fallback to regular copy
|
||||
shutil.copy2(source, destination)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
if self.logger:
|
||||
self.logger.error(f"Reflink copy failed: {source} -> {destination}: {e}")
|
||||
return False
|
||||
|
||||
def can_migrate(self, source: Path, destination: Path) -> bool:
|
||||
"""Check if migration is possible"""
|
||||
if not source.exists():
|
||||
return False
|
||||
|
||||
dest_dir = destination.parent
|
||||
if dest_dir.exists():
|
||||
return os.access(dest_dir, os.W_OK)
|
||||
|
||||
return True
|
||||
|
||||
def estimate_time(self, source: Path) -> float:
|
||||
"""Estimate migration time (reflinks are fast)"""
|
||||
return 0.1 # Reflinks are nearly instant
|
||||
|
||||
def cleanup(self, source: Path) -> bool:
|
||||
"""Cleanup source file"""
|
||||
try:
|
||||
if source.exists():
|
||||
source.unlink()
|
||||
return True
|
||||
except Exception as e:
|
||||
if self.logger:
|
||||
self.logger.warning(f"Failed to cleanup {source}: {e}")
|
||||
return False
|
||||
454
app/migration/engine.py
Normal file
454
app/migration/engine.py
Normal file
@@ -0,0 +1,454 @@
|
||||
"""Migration engine"""
|
||||
from pathlib import Path
|
||||
from typing import Optional, Callable
|
||||
from datetime import datetime
|
||||
import psycopg2
|
||||
from psycopg2.extras import execute_batch
|
||||
|
||||
from .copy import CopyMigrationStrategy, SafeCopyStrategy
|
||||
from .hardlink import HardlinkMigrationStrategy, SymlinkMigrationStrategy
|
||||
from ..shared.models import OperationRecord, ProcessingStats, MigrationPlan
|
||||
from ..shared.config import DatabaseConfig, ProcessingConfig
|
||||
from ..shared.logger import ProgressLogger
|
||||
|
||||
|
||||
class MigrationEngine:
|
||||
"""Engine for migrating files"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
db_config: DatabaseConfig,
|
||||
processing_config: ProcessingConfig,
|
||||
logger: ProgressLogger,
|
||||
target_base: Path
|
||||
):
|
||||
"""Initialize migration engine
|
||||
|
||||
Args:
|
||||
db_config: Database configuration
|
||||
processing_config: Processing configuration
|
||||
logger: Progress logger
|
||||
target_base: Target base directory for migrations
|
||||
"""
|
||||
self.db_config = db_config
|
||||
self.processing_config = processing_config
|
||||
self.logger = logger
|
||||
self.target_base = Path(target_base)
|
||||
self._connection = None
|
||||
|
||||
# Initialize strategies
|
||||
self.copy_strategy = SafeCopyStrategy(logger=logger)
|
||||
self.hardlink_strategy = HardlinkMigrationStrategy(logger=logger)
|
||||
self.symlink_strategy = SymlinkMigrationStrategy(logger=logger)
|
||||
|
||||
def _get_connection(self):
|
||||
"""Get or create database connection"""
|
||||
if self._connection is None or self._connection.closed:
|
||||
self._connection = psycopg2.connect(
|
||||
host=self.db_config.host,
|
||||
port=self.db_config.port,
|
||||
database=self.db_config.database,
|
||||
user=self.db_config.user,
|
||||
password=self.db_config.password
|
||||
)
|
||||
return self._connection
|
||||
|
||||
def _ensure_tables(self):
|
||||
"""Ensure migration tables exist"""
|
||||
conn = self._get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Create operations table
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS operations (
|
||||
id SERIAL PRIMARY KEY,
|
||||
source_path TEXT NOT NULL,
|
||||
target_path TEXT NOT NULL,
|
||||
operation_type TEXT NOT NULL,
|
||||
size BIGINT DEFAULT 0,
|
||||
status TEXT DEFAULT 'pending',
|
||||
error TEXT,
|
||||
executed_at TIMESTAMP,
|
||||
verified BOOLEAN DEFAULT FALSE,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""")
|
||||
|
||||
# Create index on status
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_operations_status
|
||||
ON operations(status)
|
||||
""")
|
||||
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
|
||||
def plan_migration(
|
||||
self,
|
||||
disk: Optional[str] = None,
|
||||
category: Optional[str] = None
|
||||
) -> MigrationPlan:
|
||||
"""Plan migration for files
|
||||
|
||||
Args:
|
||||
disk: Optional disk filter
|
||||
category: Optional category filter
|
||||
|
||||
Returns:
|
||||
MigrationPlan with planned operations
|
||||
"""
|
||||
self.logger.section("Planning Migration")
|
||||
|
||||
conn = self._get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Build query
|
||||
conditions = ["category IS NOT NULL"]
|
||||
params = []
|
||||
|
||||
if disk:
|
||||
conditions.append("disk_label = %s")
|
||||
params.append(disk)
|
||||
|
||||
if category:
|
||||
conditions.append("category = %s")
|
||||
params.append(category)
|
||||
|
||||
query = f"""
|
||||
SELECT path, size, category, duplicate_of
|
||||
FROM files
|
||||
WHERE {' AND '.join(conditions)}
|
||||
ORDER BY category, path
|
||||
"""
|
||||
|
||||
cursor.execute(query, params)
|
||||
files = cursor.fetchall()
|
||||
|
||||
self.logger.info(f"Found {len(files)} files to migrate")
|
||||
|
||||
operations = []
|
||||
total_size = 0
|
||||
|
||||
for path_str, size, file_category, duplicate_of in files:
|
||||
source = Path(path_str)
|
||||
|
||||
# Determine destination
|
||||
target_path = self.target_base / file_category / source.name
|
||||
|
||||
# Determine operation type
|
||||
if duplicate_of:
|
||||
# Use hardlink for duplicates
|
||||
operation_type = 'hardlink'
|
||||
else:
|
||||
# Use copy for unique files
|
||||
operation_type = 'copy'
|
||||
|
||||
operation = OperationRecord(
|
||||
source_path=source,
|
||||
target_path=target_path,
|
||||
operation_type=operation_type,
|
||||
size=size
|
||||
)
|
||||
|
||||
operations.append(operation)
|
||||
total_size += size
|
||||
|
||||
cursor.close()
|
||||
|
||||
plan = MigrationPlan(
|
||||
target_disk=str(self.target_base),
|
||||
destination_disks=[str(self.target_base)],
|
||||
operations=operations,
|
||||
total_size=total_size,
|
||||
file_count=len(operations)
|
||||
)
|
||||
|
||||
self.logger.info(
|
||||
f"Migration plan created: {plan.file_count} files, "
|
||||
f"{plan.total_size:,} bytes"
|
||||
)
|
||||
|
||||
return plan
|
||||
|
||||
def execute_migration(
|
||||
self,
|
||||
operations: list[OperationRecord],
|
||||
dry_run: bool = False,
|
||||
progress_callback: Optional[Callable[[int, int, ProcessingStats], None]] = None
|
||||
) -> ProcessingStats:
|
||||
"""Execute migration operations
|
||||
|
||||
Args:
|
||||
operations: List of operations to execute
|
||||
dry_run: Whether to perform a dry run
|
||||
progress_callback: Optional callback for progress updates
|
||||
|
||||
Returns:
|
||||
ProcessingStats with execution statistics
|
||||
"""
|
||||
self.logger.section("Executing Migration" + (" (DRY RUN)" if dry_run else ""))
|
||||
|
||||
self._ensure_tables()
|
||||
|
||||
stats = ProcessingStats()
|
||||
total_ops = len(operations)
|
||||
|
||||
for operation in operations:
|
||||
stats.files_processed += 1
|
||||
|
||||
if dry_run:
|
||||
# In dry run, just log what would happen
|
||||
self.logger.debug(
|
||||
f"[DRY RUN] Would {operation.operation_type}: "
|
||||
f"{operation.source_path} -> {operation.target_path}"
|
||||
)
|
||||
stats.files_succeeded += 1
|
||||
else:
|
||||
# Execute actual migration
|
||||
success = self._execute_operation(operation)
|
||||
|
||||
if success:
|
||||
stats.files_succeeded += 1
|
||||
stats.bytes_processed += operation.size
|
||||
else:
|
||||
stats.files_failed += 1
|
||||
|
||||
# Progress callback
|
||||
if progress_callback and stats.files_processed % 100 == 0:
|
||||
progress_callback(stats.files_processed, total_ops, stats)
|
||||
|
||||
# Log progress
|
||||
if stats.files_processed % 1000 == 0:
|
||||
self.logger.progress(
|
||||
stats.files_processed,
|
||||
total_ops,
|
||||
prefix="Operations executed",
|
||||
bytes_processed=stats.bytes_processed,
|
||||
elapsed_seconds=stats.elapsed_seconds
|
||||
)
|
||||
|
||||
self.logger.info(
|
||||
f"Migration {'dry run' if dry_run else 'execution'} complete: "
|
||||
f"{stats.files_succeeded}/{total_ops} operations, "
|
||||
f"{stats.bytes_processed:,} bytes in {stats.elapsed_seconds:.1f}s"
|
||||
)
|
||||
|
||||
return stats
|
||||
|
||||
def _execute_operation(self, operation: OperationRecord) -> bool:
|
||||
"""Execute a single migration operation
|
||||
|
||||
Args:
|
||||
operation: Operation to execute
|
||||
|
||||
Returns:
|
||||
True if successful
|
||||
"""
|
||||
operation.status = 'in_progress'
|
||||
operation.executed_at = datetime.now()
|
||||
|
||||
try:
|
||||
# Select strategy based on operation type
|
||||
if operation.operation_type == 'copy':
|
||||
strategy = self.copy_strategy
|
||||
elif operation.operation_type == 'hardlink':
|
||||
strategy = self.hardlink_strategy
|
||||
elif operation.operation_type == 'symlink':
|
||||
strategy = self.symlink_strategy
|
||||
else:
|
||||
raise ValueError(f"Unknown operation type: {operation.operation_type}")
|
||||
|
||||
# Execute migration
|
||||
success = strategy.migrate(
|
||||
operation.source_path,
|
||||
operation.target_path,
|
||||
verify=self.processing_config.verify_operations
|
||||
)
|
||||
|
||||
if success:
|
||||
operation.status = 'completed'
|
||||
operation.verified = True
|
||||
self._record_operation(operation)
|
||||
return True
|
||||
else:
|
||||
operation.status = 'failed'
|
||||
operation.error = "Migration failed"
|
||||
self._record_operation(operation)
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
operation.status = 'failed'
|
||||
operation.error = str(e)
|
||||
self._record_operation(operation)
|
||||
self.logger.error(f"Operation failed: {operation.source_path}: {e}")
|
||||
return False
|
||||
|
||||
def _record_operation(self, operation: OperationRecord):
|
||||
"""Record operation in database
|
||||
|
||||
Args:
|
||||
operation: Operation to record
|
||||
"""
|
||||
conn = self._get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute("""
|
||||
INSERT INTO operations (
|
||||
source_path, target_path, operation_type, bytes_processed,
|
||||
status, error, executed_at, verified
|
||||
)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
|
||||
""", (
|
||||
str(operation.source_path),
|
||||
str(operation.target_path),
|
||||
operation.operation_type,
|
||||
operation.size,
|
||||
operation.status,
|
||||
operation.error,
|
||||
operation.executed_at,
|
||||
operation.verified
|
||||
))
|
||||
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
|
||||
def rollback(self, operation: OperationRecord) -> bool:
|
||||
"""Rollback a migration operation
|
||||
|
||||
Args:
|
||||
operation: Operation to rollback
|
||||
|
||||
Returns:
|
||||
True if rollback successful
|
||||
"""
|
||||
self.logger.warning(f"Rolling back: {operation.target_path}")
|
||||
|
||||
try:
|
||||
# Remove destination
|
||||
if operation.target_path.exists():
|
||||
operation.target_path.unlink()
|
||||
|
||||
# Update database
|
||||
conn = self._get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute("""
|
||||
UPDATE operations
|
||||
SET status = 'rolled_back'
|
||||
WHERE source_path = %s AND target_path = %s
|
||||
""", (str(operation.source_path), str(operation.target_path)))
|
||||
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Rollback failed: {operation.target_path}: {e}")
|
||||
return False
|
||||
|
||||
def get_migration_stats(self) -> dict:
|
||||
"""Get migration statistics
|
||||
|
||||
Returns:
|
||||
Dictionary with statistics
|
||||
"""
|
||||
conn = self._get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
stats = {}
|
||||
|
||||
# Total operations
|
||||
cursor.execute("SELECT COUNT(*) FROM operations")
|
||||
stats['total_operations'] = cursor.fetchone()[0]
|
||||
|
||||
# Operations by status
|
||||
cursor.execute("""
|
||||
SELECT status, COUNT(*)
|
||||
FROM operations
|
||||
GROUP BY status
|
||||
""")
|
||||
|
||||
for status, count in cursor.fetchall():
|
||||
stats[f'{status}_operations'] = count
|
||||
|
||||
# Total size migrated
|
||||
cursor.execute("""
|
||||
SELECT COALESCE(SUM(size), 0)
|
||||
FROM operations
|
||||
WHERE status = 'completed'
|
||||
""")
|
||||
stats['total_size_migrated'] = cursor.fetchone()[0]
|
||||
|
||||
cursor.close()
|
||||
|
||||
return stats
|
||||
|
||||
def verify_migrations(self) -> dict:
|
||||
"""Verify completed migrations
|
||||
|
||||
Returns:
|
||||
Dictionary with verification results
|
||||
"""
|
||||
self.logger.subsection("Verifying Migrations")
|
||||
|
||||
conn = self._get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute("""
|
||||
SELECT source_path, target_path, operation_type
|
||||
FROM operations
|
||||
WHERE status = 'completed' AND verified = FALSE
|
||||
""")
|
||||
|
||||
operations = cursor.fetchall()
|
||||
cursor.close()
|
||||
|
||||
results = {
|
||||
'total': len(operations),
|
||||
'verified': 0,
|
||||
'failed': 0
|
||||
}
|
||||
|
||||
for source_str, dest_str, op_type in operations:
|
||||
source = Path(source_str)
|
||||
dest = Path(dest_str)
|
||||
|
||||
# Verify destination exists
|
||||
if not dest.exists():
|
||||
results['failed'] += 1
|
||||
self.logger.warning(f"Verification failed: {dest} does not exist")
|
||||
continue
|
||||
|
||||
# Verify based on operation type
|
||||
if op_type == 'hardlink':
|
||||
# Check if hardlinked
|
||||
if source.exists() and source.stat().st_ino == dest.stat().st_ino:
|
||||
results['verified'] += 1
|
||||
else:
|
||||
results['failed'] += 1
|
||||
else:
|
||||
# Check if destination exists and has correct size
|
||||
if dest.exists():
|
||||
results['verified'] += 1
|
||||
else:
|
||||
results['failed'] += 1
|
||||
|
||||
self.logger.info(
|
||||
f"Verification complete: {results['verified']}/{results['total']} verified"
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
def close(self):
|
||||
"""Close database connection"""
|
||||
if self._connection and not self._connection.closed:
|
||||
self._connection.close()
|
||||
|
||||
def __enter__(self):
|
||||
"""Context manager entry"""
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Context manager exit"""
|
||||
self.close()
|
||||
377
app/migration/hardlink.py
Normal file
377
app/migration/hardlink.py
Normal file
@@ -0,0 +1,377 @@
|
||||
"""Hardlink-based migration strategy"""
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from ..shared.logger import ProgressLogger
|
||||
|
||||
|
||||
class HardlinkMigrationStrategy:
|
||||
"""Create hardlinks to files instead of copying"""
|
||||
|
||||
def __init__(self, logger: Optional[ProgressLogger] = None):
|
||||
"""Initialize hardlink migration strategy
|
||||
|
||||
Args:
|
||||
logger: Optional progress logger
|
||||
"""
|
||||
self.logger = logger
|
||||
|
||||
def migrate(
|
||||
self,
|
||||
source: Path,
|
||||
destination: Path,
|
||||
verify: bool = True
|
||||
) -> bool:
|
||||
"""Migrate file by creating hardlink
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
destination: Destination file path
|
||||
verify: Whether to verify the operation
|
||||
|
||||
Returns:
|
||||
True if migration successful
|
||||
"""
|
||||
if not source.exists():
|
||||
if self.logger:
|
||||
self.logger.error(f"Source file does not exist: {source}")
|
||||
return False
|
||||
|
||||
# Check if source and destination are on same filesystem
|
||||
if not self._same_filesystem(source, destination.parent):
|
||||
if self.logger:
|
||||
self.logger.warning(
|
||||
f"Cannot hardlink across filesystems: {source} -> {destination}"
|
||||
)
|
||||
return False
|
||||
|
||||
# Create destination directory
|
||||
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
# Create hardlink
|
||||
os.link(source, destination)
|
||||
|
||||
# Verify if requested
|
||||
if verify:
|
||||
if not self._verify_hardlink(source, destination):
|
||||
if self.logger:
|
||||
self.logger.error(f"Verification failed: {source} -> {destination}")
|
||||
destination.unlink()
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
except FileExistsError:
|
||||
if self.logger:
|
||||
self.logger.warning(f"Destination already exists: {destination}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
if self.logger:
|
||||
self.logger.error(f"Hardlink failed: {source} -> {destination}: {e}")
|
||||
return False
|
||||
|
||||
def _same_filesystem(self, path1: Path, path2: Path) -> bool:
|
||||
"""Check if two paths are on the same filesystem
|
||||
|
||||
Args:
|
||||
path1: First path
|
||||
path2: Second path
|
||||
|
||||
Returns:
|
||||
True if on same filesystem
|
||||
"""
|
||||
try:
|
||||
# Get device IDs
|
||||
stat1 = path1.stat()
|
||||
stat2 = path2.stat()
|
||||
return stat1.st_dev == stat2.st_dev
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def _verify_hardlink(self, source: Path, destination: Path) -> bool:
|
||||
"""Verify hardlink
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
destination: Destination file path
|
||||
|
||||
Returns:
|
||||
True if verification successful
|
||||
"""
|
||||
try:
|
||||
# Check if they have the same inode
|
||||
source_stat = source.stat()
|
||||
dest_stat = destination.stat()
|
||||
|
||||
return source_stat.st_ino == dest_stat.st_ino
|
||||
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def can_migrate(self, source: Path, destination: Path) -> bool:
|
||||
"""Check if migration is possible
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
destination: Destination file path
|
||||
|
||||
Returns:
|
||||
True if migration is possible
|
||||
"""
|
||||
if not source.exists():
|
||||
return False
|
||||
|
||||
# Check if on same filesystem
|
||||
dest_dir = destination.parent
|
||||
if dest_dir.exists():
|
||||
return self._same_filesystem(source, dest_dir)
|
||||
|
||||
# Check parent directories
|
||||
parent = dest_dir.parent
|
||||
while not parent.exists() and parent != parent.parent:
|
||||
parent = parent.parent
|
||||
|
||||
return parent.exists() and self._same_filesystem(source, parent)
|
||||
|
||||
def estimate_time(self, source: Path) -> float:
|
||||
"""Estimate migration time in seconds
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
|
||||
Returns:
|
||||
Estimated time in seconds (hardlinks are instant)
|
||||
"""
|
||||
return 0.01 # Hardlinks are nearly instant
|
||||
|
||||
def cleanup(self, source: Path) -> bool:
|
||||
"""Cleanup source file after successful migration
|
||||
|
||||
Note: For hardlinks, we typically don't remove the source
|
||||
immediately as both links point to the same inode.
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
|
||||
Returns:
|
||||
True (no cleanup needed for hardlinks)
|
||||
"""
|
||||
# For hardlinks, we don't remove the source
|
||||
# Both source and destination point to the same data
|
||||
return True
|
||||
|
||||
|
||||
class SymlinkMigrationStrategy:
|
||||
"""Create symbolic links to files"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
logger: Optional[ProgressLogger] = None,
|
||||
absolute_links: bool = True
|
||||
):
|
||||
"""Initialize symlink migration strategy
|
||||
|
||||
Args:
|
||||
logger: Optional progress logger
|
||||
absolute_links: Whether to create absolute symlinks
|
||||
"""
|
||||
self.logger = logger
|
||||
self.absolute_links = absolute_links
|
||||
|
||||
def migrate(
|
||||
self,
|
||||
source: Path,
|
||||
destination: Path,
|
||||
verify: bool = True
|
||||
) -> bool:
|
||||
"""Migrate file by creating symlink
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
destination: Destination file path
|
||||
verify: Whether to verify the operation
|
||||
|
||||
Returns:
|
||||
True if migration successful
|
||||
"""
|
||||
if not source.exists():
|
||||
if self.logger:
|
||||
self.logger.error(f"Source file does not exist: {source}")
|
||||
return False
|
||||
|
||||
# Create destination directory
|
||||
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
# Determine link target
|
||||
if self.absolute_links:
|
||||
target = source.resolve()
|
||||
else:
|
||||
# Create relative symlink
|
||||
target = os.path.relpath(source, destination.parent)
|
||||
|
||||
# Create symlink
|
||||
destination.symlink_to(target)
|
||||
|
||||
# Verify if requested
|
||||
if verify:
|
||||
if not self._verify_symlink(destination, source):
|
||||
if self.logger:
|
||||
self.logger.error(f"Verification failed: {source} -> {destination}")
|
||||
destination.unlink()
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
except FileExistsError:
|
||||
if self.logger:
|
||||
self.logger.warning(f"Destination already exists: {destination}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
if self.logger:
|
||||
self.logger.error(f"Symlink failed: {source} -> {destination}: {e}")
|
||||
return False
|
||||
|
||||
def _verify_symlink(self, symlink: Path, expected_target: Path) -> bool:
|
||||
"""Verify symlink
|
||||
|
||||
Args:
|
||||
symlink: Symlink path
|
||||
expected_target: Expected target path
|
||||
|
||||
Returns:
|
||||
True if verification successful
|
||||
"""
|
||||
try:
|
||||
# Check if it's a symlink
|
||||
if not symlink.is_symlink():
|
||||
return False
|
||||
|
||||
# Resolve and compare
|
||||
resolved = symlink.resolve()
|
||||
expected = expected_target.resolve()
|
||||
|
||||
return resolved == expected
|
||||
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def can_migrate(self, source: Path, destination: Path) -> bool:
|
||||
"""Check if migration is possible
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
destination: Destination file path
|
||||
|
||||
Returns:
|
||||
True if migration is possible
|
||||
"""
|
||||
if not source.exists():
|
||||
return False
|
||||
|
||||
# Check if destination directory is writable
|
||||
dest_dir = destination.parent
|
||||
if dest_dir.exists():
|
||||
return os.access(dest_dir, os.W_OK)
|
||||
|
||||
return True
|
||||
|
||||
def estimate_time(self, source: Path) -> float:
|
||||
"""Estimate migration time in seconds
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
|
||||
Returns:
|
||||
Estimated time in seconds (symlinks are instant)
|
||||
"""
|
||||
return 0.01 # Symlinks are instant
|
||||
|
||||
def cleanup(self, source: Path) -> bool:
|
||||
"""Cleanup source file after successful migration
|
||||
|
||||
Note: For symlinks, we don't remove the source as the
|
||||
symlink points to it.
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
|
||||
Returns:
|
||||
True (no cleanup needed for symlinks)
|
||||
"""
|
||||
# For symlinks, we don't remove the source
|
||||
return True
|
||||
|
||||
|
||||
class DedupHardlinkStrategy(HardlinkMigrationStrategy):
|
||||
"""Hardlink strategy for deduplication
|
||||
|
||||
Creates hardlinks for duplicate files to save space.
|
||||
"""
|
||||
|
||||
def __init__(self, logger: Optional[ProgressLogger] = None):
|
||||
"""Initialize dedup hardlink strategy"""
|
||||
super().__init__(logger=logger)
|
||||
|
||||
def deduplicate(
|
||||
self,
|
||||
canonical: Path,
|
||||
duplicate: Path
|
||||
) -> bool:
|
||||
"""Replace duplicate with hardlink to canonical
|
||||
|
||||
Args:
|
||||
canonical: Canonical file path
|
||||
duplicate: Duplicate file path
|
||||
|
||||
Returns:
|
||||
True if deduplication successful
|
||||
"""
|
||||
if not canonical.exists():
|
||||
if self.logger:
|
||||
self.logger.error(f"Canonical file does not exist: {canonical}")
|
||||
return False
|
||||
|
||||
if not duplicate.exists():
|
||||
if self.logger:
|
||||
self.logger.error(f"Duplicate file does not exist: {duplicate}")
|
||||
return False
|
||||
|
||||
# Check if already hardlinked
|
||||
if self._verify_hardlink(canonical, duplicate):
|
||||
return True
|
||||
|
||||
# Check if on same filesystem
|
||||
if not self._same_filesystem(canonical, duplicate):
|
||||
if self.logger:
|
||||
self.logger.warning(
|
||||
f"Cannot hardlink across filesystems: {canonical} -> {duplicate}"
|
||||
)
|
||||
return False
|
||||
|
||||
try:
|
||||
# Create temporary backup
|
||||
backup = duplicate.with_suffix(duplicate.suffix + '.bak')
|
||||
duplicate.rename(backup)
|
||||
|
||||
# Create hardlink
|
||||
os.link(canonical, duplicate)
|
||||
|
||||
# Remove backup
|
||||
backup.unlink()
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
if self.logger:
|
||||
self.logger.error(f"Deduplication failed: {duplicate}: {e}")
|
||||
|
||||
# Restore from backup
|
||||
if backup.exists():
|
||||
backup.rename(duplicate)
|
||||
|
||||
return False
|
||||
Reference in New Issue
Block a user