clean up code

This commit is contained in:
mike
2025-12-13 12:00:34 +01:00
parent 2b2c575385
commit 7ce8c8c73d
10 changed files with 158 additions and 1471 deletions

View File

@@ -1,27 +1,5 @@
"""Migration package exports"""
from .copy import (
CopyMigrationStrategy,
FastCopyStrategy,
SafeCopyStrategy,
ReferenceCopyStrategy
)
from .hardlink import (
HardlinkMigrationStrategy,
SymlinkMigrationStrategy,
DedupHardlinkStrategy
)
from .copy import CopyMigrationStrategy, FastCopyStrategy, SafeCopyStrategy, ReferenceCopyStrategy
from .hardlink import HardlinkMigrationStrategy, SymlinkMigrationStrategy, DedupHardlinkStrategy
from .engine import MigrationEngine
from ._protocols import IMigrationStrategy, IMigrationEngine
__all__ = [
'CopyMigrationStrategy',
'FastCopyStrategy',
'SafeCopyStrategy',
'ReferenceCopyStrategy',
'HardlinkMigrationStrategy',
'SymlinkMigrationStrategy',
'DedupHardlinkStrategy',
'MigrationEngine',
'IMigrationStrategy',
'IMigrationEngine',
]
__all__ = ['CopyMigrationStrategy', 'FastCopyStrategy', 'SafeCopyStrategy', 'ReferenceCopyStrategy', 'HardlinkMigrationStrategy', 'SymlinkMigrationStrategy', 'DedupHardlinkStrategy', 'MigrationEngine', 'IMigrationStrategy', 'IMigrationEngine']

View File

@@ -1,107 +1,28 @@
"""Protocol definitions for the migration package"""
from typing import Protocol
from pathlib import Path
from ..shared.models import OperationRecord
class IMigrationStrategy(Protocol):
"""Protocol for migration strategies"""
def migrate(
self,
source: Path,
destination: Path,
verify: bool = True
) -> bool:
"""Migrate a file from source to destination
Args:
source: Source file path
destination: Destination file path
verify: Whether to verify the operation
Returns:
True if migration successful
"""
def migrate(self, source: Path, destination: Path, verify: bool=True) -> bool:
...
def can_migrate(self, source: Path, destination: Path) -> bool:
"""Check if migration is possible
Args:
source: Source file path
destination: Destination file path
Returns:
True if migration is possible
"""
...
def estimate_time(self, source: Path) -> float:
"""Estimate migration time in seconds
Args:
source: Source file path
Returns:
Estimated time in seconds
"""
...
def cleanup(self, source: Path) -> bool:
"""Cleanup source file after successful migration
Args:
source: Source file path
Returns:
True if cleanup successful
"""
...
class IMigrationEngine(Protocol):
"""Protocol for migration engine"""
def plan_migration(
self,
disk: str,
target_base: Path
) -> list[OperationRecord]:
"""Plan migration for a disk
Args:
disk: Disk identifier
target_base: Target base directory
Returns:
List of planned operations
"""
def plan_migration(self, disk: str, target_base: Path) -> list[OperationRecord]:
...
def execute_migration(
self,
operations: list[OperationRecord],
dry_run: bool = False
) -> dict:
"""Execute migration operations
Args:
operations: List of operations to execute
dry_run: Whether to perform a dry run
Returns:
Dictionary with execution statistics
"""
def execute_migration(self, operations: list[OperationRecord], dry_run: bool=False) -> dict:
...
def rollback(self, operation: OperationRecord) -> bool:
"""Rollback a migration operation
Args:
operation: Operation to rollback
Returns:
True if rollback successful
"""
...

View File

@@ -1,268 +1,129 @@
"""Copy-based migration strategy"""
import shutil
from pathlib import Path
from typing import Optional
import os
from ..shared.logger import ProgressLogger
class CopyMigrationStrategy:
"""Copy files to destination with verification"""
def __init__(
self,
logger: Optional[ProgressLogger] = None,
preserve_metadata: bool = True,
verify_checksums: bool = True
):
"""Initialize copy migration strategy
Args:
logger: Optional progress logger
preserve_metadata: Whether to preserve file metadata
verify_checksums: Whether to verify checksums after copy
"""
def __init__(self, logger: Optional[ProgressLogger]=None, preserve_metadata: bool=True, verify_checksums: bool=True):
self.logger = logger
self.preserve_metadata = preserve_metadata
self.verify_checksums = verify_checksums
def migrate(
self,
source: Path,
destination: Path,
verify: bool = True
) -> bool:
"""Migrate file by copying
Args:
source: Source file path
destination: Destination file path
verify: Whether to verify the operation
Returns:
True if migration successful
"""
def migrate(self, source: Path, destination: Path, verify: bool=True) -> bool:
if not source.exists():
if self.logger:
self.logger.error(f"Source file does not exist: {source}")
self.logger.error(f'Source file does not exist: {source}')
return False
# Create destination directory
destination.parent.mkdir(parents=True, exist_ok=True)
try:
# Copy file
if self.preserve_metadata:
shutil.copy2(source, destination)
else:
shutil.copy(source, destination)
# Verify if requested
if verify and self.verify_checksums:
if not self._verify_copy(source, destination):
if self.logger:
self.logger.error(f"Verification failed: {source} -> {destination}")
self.logger.error(f'Verification failed: {source} -> {destination}')
destination.unlink()
return False
return True
except Exception as e:
if self.logger:
self.logger.error(f"Copy failed: {source} -> {destination}: {e}")
self.logger.error(f'Copy failed: {source} -> {destination}: {e}')
return False
def _verify_copy(self, source: Path, destination: Path) -> bool:
"""Verify copied file
Args:
source: Source file path
destination: Destination file path
Returns:
True if verification successful
"""
# Check size
source_size = source.stat().st_size
dest_size = destination.stat().st_size
if source_size != dest_size:
return False
# Compare checksums for files larger than 1MB
if source_size > 1024 * 1024:
from ..deduplication.chunker import hash_file
source_hash = hash_file(source)
dest_hash = hash_file(destination)
return source_hash == dest_hash
# For small files, compare content directly
with open(source, 'rb') as f1, open(destination, 'rb') as f2:
return f1.read() == f2.read()
def can_migrate(self, source: Path, destination: Path) -> bool:
"""Check if migration is possible
Args:
source: Source file path
destination: Destination file path
Returns:
True if migration is possible
"""
if not source.exists():
return False
# Check if destination directory is writable
dest_dir = destination.parent
if dest_dir.exists():
return os.access(dest_dir, os.W_OK)
# Check if parent directory exists and is writable
parent = dest_dir.parent
while not parent.exists() and parent != parent.parent:
parent = parent.parent
return parent.exists() and os.access(parent, os.W_OK)
def estimate_time(self, source: Path) -> float:
"""Estimate migration time in seconds
Args:
source: Source file path
Returns:
Estimated time in seconds
"""
if not source.exists():
return 0.0
size = source.stat().st_size
# Estimate based on typical copy speed (100 MB/s)
typical_speed = 100 * 1024 * 1024 # bytes per second
typical_speed = 100 * 1024 * 1024
return size / typical_speed
def cleanup(self, source: Path) -> bool:
"""Cleanup source file after successful migration
Args:
source: Source file path
Returns:
True if cleanup successful
"""
try:
if source.exists():
source.unlink()
return True
except Exception as e:
if self.logger:
self.logger.warning(f"Failed to cleanup {source}: {e}")
self.logger.warning(f'Failed to cleanup {source}: {e}')
return False
class FastCopyStrategy(CopyMigrationStrategy):
"""Fast copy strategy without verification"""
def __init__(self, logger: Optional[ProgressLogger] = None):
"""Initialize fast copy strategy"""
super().__init__(
logger=logger,
preserve_metadata=True,
verify_checksums=False
)
def __init__(self, logger: Optional[ProgressLogger]=None):
super().__init__(logger=logger, preserve_metadata=True, verify_checksums=False)
class SafeCopyStrategy(CopyMigrationStrategy):
"""Safe copy strategy with full verification"""
def __init__(self, logger: Optional[ProgressLogger] = None):
"""Initialize safe copy strategy"""
super().__init__(
logger=logger,
preserve_metadata=True,
verify_checksums=True
)
def __init__(self, logger: Optional[ProgressLogger]=None):
super().__init__(logger=logger, preserve_metadata=True, verify_checksums=True)
class ReferenceCopyStrategy:
"""Create reference copy using reflinks (CoW) if supported"""
def __init__(self, logger: Optional[ProgressLogger] = None):
"""Initialize reflink copy strategy"""
def __init__(self, logger: Optional[ProgressLogger]=None):
self.logger = logger
def migrate(
self,
source: Path,
destination: Path,
verify: bool = True
) -> bool:
"""Migrate using reflink (copy-on-write)
Args:
source: Source file path
destination: Destination file path
verify: Whether to verify the operation
Returns:
True if migration successful
"""
def migrate(self, source: Path, destination: Path, verify: bool=True) -> bool:
if not source.exists():
if self.logger:
self.logger.error(f"Source file does not exist: {source}")
self.logger.error(f'Source file does not exist: {source}')
return False
# Create destination directory
destination.parent.mkdir(parents=True, exist_ok=True)
try:
# Try reflink copy (works on btrfs, xfs, etc.)
import subprocess
result = subprocess.run(
['cp', '--reflink=auto', str(source), str(destination)],
capture_output=True,
check=False
)
result = subprocess.run(['cp', '--reflink=auto', str(source), str(destination)], capture_output=True, check=False)
if result.returncode != 0:
# Fallback to regular copy
shutil.copy2(source, destination)
return True
except Exception as e:
if self.logger:
self.logger.error(f"Reflink copy failed: {source} -> {destination}: {e}")
self.logger.error(f'Reflink copy failed: {source} -> {destination}: {e}')
return False
def can_migrate(self, source: Path, destination: Path) -> bool:
"""Check if migration is possible"""
if not source.exists():
return False
dest_dir = destination.parent
if dest_dir.exists():
return os.access(dest_dir, os.W_OK)
return True
def estimate_time(self, source: Path) -> float:
"""Estimate migration time (reflinks are fast)"""
return 0.1 # Reflinks are nearly instant
return 0.1
def cleanup(self, source: Path) -> bool:
"""Cleanup source file"""
try:
if source.exists():
source.unlink()
return True
except Exception as e:
if self.logger:
self.logger.warning(f"Failed to cleanup {source}: {e}")
self.logger.warning(f'Failed to cleanup {source}: {e}')
return False

View File

@@ -1,254 +1,100 @@
"""Migration engine"""
from pathlib import Path
from typing import Optional, Callable
from datetime import datetime
import psycopg2
from psycopg2.extras import execute_batch
from .copy import CopyMigrationStrategy, SafeCopyStrategy
from .hardlink import HardlinkMigrationStrategy, SymlinkMigrationStrategy
from ..shared.models import OperationRecord, ProcessingStats, MigrationPlan
from ..shared.config import DatabaseConfig, ProcessingConfig
from ..shared.logger import ProgressLogger
class MigrationEngine:
"""Engine for migrating files"""
def __init__(
self,
db_config: DatabaseConfig,
processing_config: ProcessingConfig,
logger: ProgressLogger,
target_base: Path
):
"""Initialize migration engine
Args:
db_config: Database configuration
processing_config: Processing configuration
logger: Progress logger
target_base: Target base directory for migrations
"""
def __init__(self, db_config: DatabaseConfig, processing_config: ProcessingConfig, logger: ProgressLogger, target_base: Path):
self.db_config = db_config
self.processing_config = processing_config
self.logger = logger
self.target_base = Path(target_base)
self._connection = None
# Initialize strategies
self.copy_strategy = SafeCopyStrategy(logger=logger)
self.hardlink_strategy = HardlinkMigrationStrategy(logger=logger)
self.symlink_strategy = SymlinkMigrationStrategy(logger=logger)
def _get_connection(self):
"""Get or create database connection"""
if self._connection is None or self._connection.closed:
self._connection = psycopg2.connect(
host=self.db_config.host,
port=self.db_config.port,
database=self.db_config.database,
user=self.db_config.user,
password=self.db_config.password
)
self._connection = psycopg2.connect(host=self.db_config.host, port=self.db_config.port, database=self.db_config.database, user=self.db_config.user, password=self.db_config.password)
return self._connection
def _ensure_tables(self):
"""Ensure migration tables exist"""
conn = self._get_connection()
cursor = conn.cursor()
# Create operations table
cursor.execute("""
CREATE TABLE IF NOT EXISTS operations (
id SERIAL PRIMARY KEY,
source_path TEXT NOT NULL,
target_path TEXT NOT NULL,
operation_type TEXT NOT NULL,
size BIGINT DEFAULT 0,
status TEXT DEFAULT 'pending',
error TEXT,
executed_at TIMESTAMP,
verified BOOLEAN DEFAULT FALSE,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
# Create index on status
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_operations_status
ON operations(status)
""")
cursor.execute("\n CREATE TABLE IF NOT EXISTS operations (\n id SERIAL PRIMARY KEY,\n source_path TEXT NOT NULL,\n target_path TEXT NOT NULL,\n operation_type TEXT NOT NULL,\n size BIGINT DEFAULT 0,\n status TEXT DEFAULT 'pending',\n error TEXT,\n executed_at TIMESTAMP,\n verified BOOLEAN DEFAULT FALSE,\n created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP\n )\n ")
cursor.execute('\n CREATE INDEX IF NOT EXISTS idx_operations_status\n ON operations(status)\n ')
conn.commit()
cursor.close()
def plan_migration(
self,
disk: Optional[str] = None,
category: Optional[str] = None
) -> MigrationPlan:
"""Plan migration for files
Args:
disk: Optional disk filter
category: Optional category filter
Returns:
MigrationPlan with planned operations
"""
self.logger.section("Planning Migration")
def plan_migration(self, disk: Optional[str]=None, category: Optional[str]=None) -> MigrationPlan:
self.logger.section('Planning Migration')
conn = self._get_connection()
cursor = conn.cursor()
# Build query
conditions = ["category IS NOT NULL"]
conditions = ['category IS NOT NULL']
params = []
if disk:
conditions.append("disk_label = %s")
conditions.append('disk_label = %s')
params.append(disk)
if category:
conditions.append("category = %s")
conditions.append('category = %s')
params.append(category)
query = f"""
SELECT path, size, category, duplicate_of
FROM files
WHERE {' AND '.join(conditions)}
ORDER BY category, path
"""
query = f"\n SELECT path, size, category, duplicate_of\n FROM files\n WHERE {' AND '.join(conditions)}\n ORDER BY category, path\n "
cursor.execute(query, params)
files = cursor.fetchall()
self.logger.info(f"Found {len(files)} files to migrate")
self.logger.info(f'Found {len(files)} files to migrate')
operations = []
total_size = 0
for path_str, size, file_category, duplicate_of in files:
source = Path(path_str)
# Determine destination
target_path = self.target_base / file_category / source.name
# Determine operation type
if duplicate_of:
# Use hardlink for duplicates
operation_type = 'hardlink'
else:
# Use copy for unique files
operation_type = 'copy'
operation = OperationRecord(
source_path=source,
target_path=target_path,
operation_type=operation_type,
size=size
)
operation = OperationRecord(source_path=source, target_path=target_path, operation_type=operation_type, size=size)
operations.append(operation)
total_size += size
cursor.close()
plan = MigrationPlan(
target_disk=str(self.target_base),
destination_disks=[str(self.target_base)],
operations=operations,
total_size=total_size,
file_count=len(operations)
)
self.logger.info(
f"Migration plan created: {plan.file_count} files, "
f"{plan.total_size:,} bytes"
)
plan = MigrationPlan(target_disk=str(self.target_base), destination_disks=[str(self.target_base)], operations=operations, total_size=total_size, file_count=len(operations))
self.logger.info(f'Migration plan created: {plan.file_count} files, {plan.total_size:,} bytes')
return plan
def execute_migration(
self,
operations: list[OperationRecord],
dry_run: bool = False,
progress_callback: Optional[Callable[[int, int, ProcessingStats], None]] = None
) -> ProcessingStats:
"""Execute migration operations
Args:
operations: List of operations to execute
dry_run: Whether to perform a dry run
progress_callback: Optional callback for progress updates
Returns:
ProcessingStats with execution statistics
"""
self.logger.section("Executing Migration" + (" (DRY RUN)" if dry_run else ""))
def execute_migration(self, operations: list[OperationRecord], dry_run: bool=False, progress_callback: Optional[Callable[[int, int, ProcessingStats], None]]=None) -> ProcessingStats:
self.logger.section('Executing Migration' + (' (DRY RUN)' if dry_run else ''))
self._ensure_tables()
stats = ProcessingStats()
total_ops = len(operations)
for operation in operations:
stats.files_processed += 1
if dry_run:
# In dry run, just log what would happen
self.logger.debug(
f"[DRY RUN] Would {operation.operation_type}: "
f"{operation.source_path} -> {operation.target_path}"
)
self.logger.debug(f'[DRY RUN] Would {operation.operation_type}: {operation.source_path} -> {operation.target_path}')
stats.files_succeeded += 1
else:
# Execute actual migration
success = self._execute_operation(operation)
if success:
stats.files_succeeded += 1
stats.bytes_processed += operation.size
else:
stats.files_failed += 1
# Progress callback
if progress_callback and stats.files_processed % 100 == 0:
progress_callback(stats.files_processed, total_ops, stats)
# Log progress
if stats.files_processed % 1000 == 0:
self.logger.progress(
stats.files_processed,
total_ops,
prefix="Operations executed",
bytes_processed=stats.bytes_processed,
elapsed_seconds=stats.elapsed_seconds
)
self.logger.info(
f"Migration {'dry run' if dry_run else 'execution'} complete: "
f"{stats.files_succeeded}/{total_ops} operations, "
f"{stats.bytes_processed:,} bytes in {stats.elapsed_seconds:.1f}s"
)
self.logger.progress(stats.files_processed, total_ops, prefix='Operations executed', bytes_processed=stats.bytes_processed, elapsed_seconds=stats.elapsed_seconds)
self.logger.info(f"Migration {('dry run' if dry_run else 'execution')} complete: {stats.files_succeeded}/{total_ops} operations, {stats.bytes_processed:,} bytes in {stats.elapsed_seconds:.1f}s")
return stats
def _execute_operation(self, operation: OperationRecord) -> bool:
"""Execute a single migration operation
Args:
operation: Operation to execute
Returns:
True if successful
"""
operation.status = 'in_progress'
operation.executed_at = datetime.now()
try:
# Select strategy based on operation type
if operation.operation_type == 'copy':
strategy = self.copy_strategy
elif operation.operation_type == 'hardlink':
@@ -256,15 +102,8 @@ class MigrationEngine:
elif operation.operation_type == 'symlink':
strategy = self.symlink_strategy
else:
raise ValueError(f"Unknown operation type: {operation.operation_type}")
# Execute migration
success = strategy.migrate(
operation.source_path,
operation.target_path,
verify=self.processing_config.verify_operations
)
raise ValueError(f'Unknown operation type: {operation.operation_type}')
success = strategy.migrate(operation.source_path, operation.target_path, verify=self.processing_config.verify_operations)
if success:
operation.status = 'completed'
operation.verified = True
@@ -272,183 +111,85 @@ class MigrationEngine:
return True
else:
operation.status = 'failed'
operation.error = "Migration failed"
operation.error = 'Migration failed'
self._record_operation(operation)
return False
except Exception as e:
operation.status = 'failed'
operation.error = str(e)
self._record_operation(operation)
self.logger.error(f"Operation failed: {operation.source_path}: {e}")
self.logger.error(f'Operation failed: {operation.source_path}: {e}')
return False
def _record_operation(self, operation: OperationRecord):
"""Record operation in database
Args:
operation: Operation to record
"""
conn = self._get_connection()
cursor = conn.cursor()
cursor.execute("""
INSERT INTO operations (
source_path, target_path, operation_type, bytes_processed,
status, error, executed_at, verified
)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
""", (
str(operation.source_path),
str(operation.target_path),
operation.operation_type,
operation.size,
operation.status,
operation.error,
operation.executed_at,
operation.verified
))
cursor.execute('\n INSERT INTO operations (\n source_path, target_path, operation_type, bytes_processed,\n status, error, executed_at, verified\n )\n VALUES (%s, %s, %s, %s, %s, %s, %s, %s)\n ', (str(operation.source_path), str(operation.target_path), operation.operation_type, operation.size, operation.status, operation.error, operation.executed_at, operation.verified))
conn.commit()
cursor.close()
def rollback(self, operation: OperationRecord) -> bool:
"""Rollback a migration operation
Args:
operation: Operation to rollback
Returns:
True if rollback successful
"""
self.logger.warning(f"Rolling back: {operation.target_path}")
self.logger.warning(f'Rolling back: {operation.target_path}')
try:
# Remove destination
if operation.target_path.exists():
operation.target_path.unlink()
# Update database
conn = self._get_connection()
cursor = conn.cursor()
cursor.execute("""
UPDATE operations
SET status = 'rolled_back'
WHERE source_path = %s AND target_path = %s
""", (str(operation.source_path), str(operation.target_path)))
cursor.execute("\n UPDATE operations\n SET status = 'rolled_back'\n WHERE source_path = %s AND target_path = %s\n ", (str(operation.source_path), str(operation.target_path)))
conn.commit()
cursor.close()
return True
except Exception as e:
self.logger.error(f"Rollback failed: {operation.target_path}: {e}")
self.logger.error(f'Rollback failed: {operation.target_path}: {e}')
return False
def get_migration_stats(self) -> dict:
"""Get migration statistics
Returns:
Dictionary with statistics
"""
conn = self._get_connection()
cursor = conn.cursor()
stats = {}
# Total operations
cursor.execute("SELECT COUNT(*) FROM operations")
cursor.execute('SELECT COUNT(*) FROM operations')
stats['total_operations'] = cursor.fetchone()[0]
# Operations by status
cursor.execute("""
SELECT status, COUNT(*)
FROM operations
GROUP BY status
""")
cursor.execute('\n SELECT status, COUNT(*)\n FROM operations\n GROUP BY status\n ')
for status, count in cursor.fetchall():
stats[f'{status}_operations'] = count
# Total size migrated
cursor.execute("""
SELECT COALESCE(SUM(size), 0)
FROM operations
WHERE status = 'completed'
""")
cursor.execute("\n SELECT COALESCE(SUM(size), 0)\n FROM operations\n WHERE status = 'completed'\n ")
stats['total_size_migrated'] = cursor.fetchone()[0]
cursor.close()
return stats
def verify_migrations(self) -> dict:
"""Verify completed migrations
Returns:
Dictionary with verification results
"""
self.logger.subsection("Verifying Migrations")
self.logger.subsection('Verifying Migrations')
conn = self._get_connection()
cursor = conn.cursor()
cursor.execute("""
SELECT source_path, target_path, operation_type
FROM operations
WHERE status = 'completed' AND verified = FALSE
""")
cursor.execute("\n SELECT source_path, target_path, operation_type\n FROM operations\n WHERE status = 'completed' AND verified = FALSE\n ")
operations = cursor.fetchall()
cursor.close()
results = {
'total': len(operations),
'verified': 0,
'failed': 0
}
results = {'total': len(operations), 'verified': 0, 'failed': 0}
for source_str, dest_str, op_type in operations:
source = Path(source_str)
dest = Path(dest_str)
# Verify destination exists
if not dest.exists():
results['failed'] += 1
self.logger.warning(f"Verification failed: {dest} does not exist")
self.logger.warning(f'Verification failed: {dest} does not exist')
continue
# Verify based on operation type
if op_type == 'hardlink':
# Check if hardlinked
if source.exists() and source.stat().st_ino == dest.stat().st_ino:
results['verified'] += 1
else:
results['failed'] += 1
elif dest.exists():
results['verified'] += 1
else:
# Check if destination exists and has correct size
if dest.exists():
results['verified'] += 1
else:
results['failed'] += 1
self.logger.info(
f"Verification complete: {results['verified']}/{results['total']} verified"
)
results['failed'] += 1
self.logger.info(f"Verification complete: {results['verified']}/{results['total']} verified")
return results
def close(self):
"""Close database connection"""
if self._connection and not self._connection.closed:
if self._connection and (not self._connection.closed):
self._connection.close()
def __enter__(self):
"""Context manager entry"""
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit"""
self.close()

View File

@@ -1,90 +1,43 @@
"""Hardlink-based migration strategy"""
import os
from pathlib import Path
from typing import Optional
from ..shared.logger import ProgressLogger
class HardlinkMigrationStrategy:
"""Create hardlinks to files instead of copying"""
def __init__(self, logger: Optional[ProgressLogger] = None):
"""Initialize hardlink migration strategy
Args:
logger: Optional progress logger
"""
def __init__(self, logger: Optional[ProgressLogger]=None):
self.logger = logger
def migrate(
self,
source: Path,
destination: Path,
verify: bool = True
) -> bool:
"""Migrate file by creating hardlink
Args:
source: Source file path
destination: Destination file path
verify: Whether to verify the operation
Returns:
True if migration successful
"""
def migrate(self, source: Path, destination: Path, verify: bool=True) -> bool:
if not source.exists():
if self.logger:
self.logger.error(f"Source file does not exist: {source}")
self.logger.error(f'Source file does not exist: {source}')
return False
# Check if source and destination are on same filesystem
if not self._same_filesystem(source, destination.parent):
if self.logger:
self.logger.warning(
f"Cannot hardlink across filesystems: {source} -> {destination}"
)
self.logger.warning(f'Cannot hardlink across filesystems: {source} -> {destination}')
return False
# Create destination directory
destination.parent.mkdir(parents=True, exist_ok=True)
try:
# Create hardlink
os.link(source, destination)
# Verify if requested
if verify:
if not self._verify_hardlink(source, destination):
if self.logger:
self.logger.error(f"Verification failed: {source} -> {destination}")
self.logger.error(f'Verification failed: {source} -> {destination}')
destination.unlink()
return False
return True
except FileExistsError:
if self.logger:
self.logger.warning(f"Destination already exists: {destination}")
self.logger.warning(f'Destination already exists: {destination}')
return False
except Exception as e:
if self.logger:
self.logger.error(f"Hardlink failed: {source} -> {destination}: {e}")
self.logger.error(f'Hardlink failed: {source} -> {destination}: {e}')
return False
def _same_filesystem(self, path1: Path, path2: Path) -> bool:
"""Check if two paths are on the same filesystem
Args:
path1: First path
path2: Second path
Returns:
True if on same filesystem
"""
try:
# Get device IDs
stat1 = path1.stat()
stat2 = path2.stat()
return stat1.st_dev == stat2.st_dev
@@ -92,286 +45,117 @@ class HardlinkMigrationStrategy:
return False
def _verify_hardlink(self, source: Path, destination: Path) -> bool:
"""Verify hardlink
Args:
source: Source file path
destination: Destination file path
Returns:
True if verification successful
"""
try:
# Check if they have the same inode
source_stat = source.stat()
dest_stat = destination.stat()
return source_stat.st_ino == dest_stat.st_ino
except Exception:
return False
def can_migrate(self, source: Path, destination: Path) -> bool:
"""Check if migration is possible
Args:
source: Source file path
destination: Destination file path
Returns:
True if migration is possible
"""
if not source.exists():
return False
# Check if on same filesystem
dest_dir = destination.parent
if dest_dir.exists():
return self._same_filesystem(source, dest_dir)
# Check parent directories
parent = dest_dir.parent
while not parent.exists() and parent != parent.parent:
parent = parent.parent
return parent.exists() and self._same_filesystem(source, parent)
def estimate_time(self, source: Path) -> float:
"""Estimate migration time in seconds
Args:
source: Source file path
Returns:
Estimated time in seconds (hardlinks are instant)
"""
return 0.01 # Hardlinks are nearly instant
return 0.01
def cleanup(self, source: Path) -> bool:
"""Cleanup source file after successful migration
Note: For hardlinks, we typically don't remove the source
immediately as both links point to the same inode.
Args:
source: Source file path
Returns:
True (no cleanup needed for hardlinks)
"""
# For hardlinks, we don't remove the source
# Both source and destination point to the same data
return True
class SymlinkMigrationStrategy:
"""Create symbolic links to files"""
def __init__(
self,
logger: Optional[ProgressLogger] = None,
absolute_links: bool = True
):
"""Initialize symlink migration strategy
Args:
logger: Optional progress logger
absolute_links: Whether to create absolute symlinks
"""
def __init__(self, logger: Optional[ProgressLogger]=None, absolute_links: bool=True):
self.logger = logger
self.absolute_links = absolute_links
def migrate(
self,
source: Path,
destination: Path,
verify: bool = True
) -> bool:
"""Migrate file by creating symlink
Args:
source: Source file path
destination: Destination file path
verify: Whether to verify the operation
Returns:
True if migration successful
"""
def migrate(self, source: Path, destination: Path, verify: bool=True) -> bool:
if not source.exists():
if self.logger:
self.logger.error(f"Source file does not exist: {source}")
self.logger.error(f'Source file does not exist: {source}')
return False
# Create destination directory
destination.parent.mkdir(parents=True, exist_ok=True)
try:
# Determine link target
if self.absolute_links:
target = source.resolve()
else:
# Create relative symlink
target = os.path.relpath(source, destination.parent)
# Create symlink
destination.symlink_to(target)
# Verify if requested
if verify:
if not self._verify_symlink(destination, source):
if self.logger:
self.logger.error(f"Verification failed: {source} -> {destination}")
self.logger.error(f'Verification failed: {source} -> {destination}')
destination.unlink()
return False
return True
except FileExistsError:
if self.logger:
self.logger.warning(f"Destination already exists: {destination}")
self.logger.warning(f'Destination already exists: {destination}')
return False
except Exception as e:
if self.logger:
self.logger.error(f"Symlink failed: {source} -> {destination}: {e}")
self.logger.error(f'Symlink failed: {source} -> {destination}: {e}')
return False
def _verify_symlink(self, symlink: Path, expected_target: Path) -> bool:
"""Verify symlink
Args:
symlink: Symlink path
expected_target: Expected target path
Returns:
True if verification successful
"""
try:
# Check if it's a symlink
if not symlink.is_symlink():
return False
# Resolve and compare
resolved = symlink.resolve()
expected = expected_target.resolve()
return resolved == expected
except Exception:
return False
def can_migrate(self, source: Path, destination: Path) -> bool:
"""Check if migration is possible
Args:
source: Source file path
destination: Destination file path
Returns:
True if migration is possible
"""
if not source.exists():
return False
# Check if destination directory is writable
dest_dir = destination.parent
if dest_dir.exists():
return os.access(dest_dir, os.W_OK)
return True
def estimate_time(self, source: Path) -> float:
"""Estimate migration time in seconds
Args:
source: Source file path
Returns:
Estimated time in seconds (symlinks are instant)
"""
return 0.01 # Symlinks are instant
return 0.01
def cleanup(self, source: Path) -> bool:
"""Cleanup source file after successful migration
Note: For symlinks, we don't remove the source as the
symlink points to it.
Args:
source: Source file path
Returns:
True (no cleanup needed for symlinks)
"""
# For symlinks, we don't remove the source
return True
class DedupHardlinkStrategy(HardlinkMigrationStrategy):
"""Hardlink strategy for deduplication
Creates hardlinks for duplicate files to save space.
"""
def __init__(self, logger: Optional[ProgressLogger] = None):
"""Initialize dedup hardlink strategy"""
def __init__(self, logger: Optional[ProgressLogger]=None):
super().__init__(logger=logger)
def deduplicate(
self,
canonical: Path,
duplicate: Path
) -> bool:
"""Replace duplicate with hardlink to canonical
Args:
canonical: Canonical file path
duplicate: Duplicate file path
Returns:
True if deduplication successful
"""
def deduplicate(self, canonical: Path, duplicate: Path) -> bool:
if not canonical.exists():
if self.logger:
self.logger.error(f"Canonical file does not exist: {canonical}")
self.logger.error(f'Canonical file does not exist: {canonical}')
return False
if not duplicate.exists():
if self.logger:
self.logger.error(f"Duplicate file does not exist: {duplicate}")
self.logger.error(f'Duplicate file does not exist: {duplicate}')
return False
# Check if already hardlinked
if self._verify_hardlink(canonical, duplicate):
return True
# Check if on same filesystem
if not self._same_filesystem(canonical, duplicate):
if self.logger:
self.logger.warning(
f"Cannot hardlink across filesystems: {canonical} -> {duplicate}"
)
self.logger.warning(f'Cannot hardlink across filesystems: {canonical} -> {duplicate}')
return False
try:
# Create temporary backup
backup = duplicate.with_suffix(duplicate.suffix + '.bak')
duplicate.rename(backup)
# Create hardlink
os.link(canonical, duplicate)
# Remove backup
backup.unlink()
return True
except Exception as e:
if self.logger:
self.logger.error(f"Deduplication failed: {duplicate}: {e}")
# Restore from backup
self.logger.error(f'Deduplication failed: {duplicate}: {e}')
if backup.exists():
backup.rename(duplicate)
return False