"""Hardlink-based migration strategy""" import os from pathlib import Path from typing import Optional from ..shared.logger import ProgressLogger class HardlinkMigrationStrategy: """Create hardlinks to files instead of copying""" def __init__(self, logger: Optional[ProgressLogger] = None): """Initialize hardlink migration strategy Args: logger: Optional progress logger """ self.logger = logger def migrate( self, source: Path, destination: Path, verify: bool = True ) -> bool: """Migrate file by creating hardlink Args: source: Source file path destination: Destination file path verify: Whether to verify the operation Returns: True if migration successful """ if not source.exists(): if self.logger: self.logger.error(f"Source file does not exist: {source}") return False # Check if source and destination are on same filesystem if not self._same_filesystem(source, destination.parent): if self.logger: self.logger.warning( f"Cannot hardlink across filesystems: {source} -> {destination}" ) return False # Create destination directory destination.parent.mkdir(parents=True, exist_ok=True) try: # Create hardlink os.link(source, destination) # Verify if requested if verify: if not self._verify_hardlink(source, destination): if self.logger: self.logger.error(f"Verification failed: {source} -> {destination}") destination.unlink() return False return True except FileExistsError: if self.logger: self.logger.warning(f"Destination already exists: {destination}") return False except Exception as e: if self.logger: self.logger.error(f"Hardlink failed: {source} -> {destination}: {e}") return False def _same_filesystem(self, path1: Path, path2: Path) -> bool: """Check if two paths are on the same filesystem Args: path1: First path path2: Second path Returns: True if on same filesystem """ try: # Get device IDs stat1 = path1.stat() stat2 = path2.stat() return stat1.st_dev == stat2.st_dev except Exception: return False def _verify_hardlink(self, source: Path, destination: Path) -> bool: """Verify hardlink Args: source: Source file path destination: Destination file path Returns: True if verification successful """ try: # Check if they have the same inode source_stat = source.stat() dest_stat = destination.stat() return source_stat.st_ino == dest_stat.st_ino except Exception: return False def can_migrate(self, source: Path, destination: Path) -> bool: """Check if migration is possible Args: source: Source file path destination: Destination file path Returns: True if migration is possible """ if not source.exists(): return False # Check if on same filesystem dest_dir = destination.parent if dest_dir.exists(): return self._same_filesystem(source, dest_dir) # Check parent directories parent = dest_dir.parent while not parent.exists() and parent != parent.parent: parent = parent.parent return parent.exists() and self._same_filesystem(source, parent) def estimate_time(self, source: Path) -> float: """Estimate migration time in seconds Args: source: Source file path Returns: Estimated time in seconds (hardlinks are instant) """ return 0.01 # Hardlinks are nearly instant def cleanup(self, source: Path) -> bool: """Cleanup source file after successful migration Note: For hardlinks, we typically don't remove the source immediately as both links point to the same inode. Args: source: Source file path Returns: True (no cleanup needed for hardlinks) """ # For hardlinks, we don't remove the source # Both source and destination point to the same data return True class SymlinkMigrationStrategy: """Create symbolic links to files""" def __init__( self, logger: Optional[ProgressLogger] = None, absolute_links: bool = True ): """Initialize symlink migration strategy Args: logger: Optional progress logger absolute_links: Whether to create absolute symlinks """ self.logger = logger self.absolute_links = absolute_links def migrate( self, source: Path, destination: Path, verify: bool = True ) -> bool: """Migrate file by creating symlink Args: source: Source file path destination: Destination file path verify: Whether to verify the operation Returns: True if migration successful """ if not source.exists(): if self.logger: self.logger.error(f"Source file does not exist: {source}") return False # Create destination directory destination.parent.mkdir(parents=True, exist_ok=True) try: # Determine link target if self.absolute_links: target = source.resolve() else: # Create relative symlink target = os.path.relpath(source, destination.parent) # Create symlink destination.symlink_to(target) # Verify if requested if verify: if not self._verify_symlink(destination, source): if self.logger: self.logger.error(f"Verification failed: {source} -> {destination}") destination.unlink() return False return True except FileExistsError: if self.logger: self.logger.warning(f"Destination already exists: {destination}") return False except Exception as e: if self.logger: self.logger.error(f"Symlink failed: {source} -> {destination}: {e}") return False def _verify_symlink(self, symlink: Path, expected_target: Path) -> bool: """Verify symlink Args: symlink: Symlink path expected_target: Expected target path Returns: True if verification successful """ try: # Check if it's a symlink if not symlink.is_symlink(): return False # Resolve and compare resolved = symlink.resolve() expected = expected_target.resolve() return resolved == expected except Exception: return False def can_migrate(self, source: Path, destination: Path) -> bool: """Check if migration is possible Args: source: Source file path destination: Destination file path Returns: True if migration is possible """ if not source.exists(): return False # Check if destination directory is writable dest_dir = destination.parent if dest_dir.exists(): return os.access(dest_dir, os.W_OK) return True def estimate_time(self, source: Path) -> float: """Estimate migration time in seconds Args: source: Source file path Returns: Estimated time in seconds (symlinks are instant) """ return 0.01 # Symlinks are instant def cleanup(self, source: Path) -> bool: """Cleanup source file after successful migration Note: For symlinks, we don't remove the source as the symlink points to it. Args: source: Source file path Returns: True (no cleanup needed for symlinks) """ # For symlinks, we don't remove the source return True class DedupHardlinkStrategy(HardlinkMigrationStrategy): """Hardlink strategy for deduplication Creates hardlinks for duplicate files to save space. """ def __init__(self, logger: Optional[ProgressLogger] = None): """Initialize dedup hardlink strategy""" super().__init__(logger=logger) def deduplicate( self, canonical: Path, duplicate: Path ) -> bool: """Replace duplicate with hardlink to canonical Args: canonical: Canonical file path duplicate: Duplicate file path Returns: True if deduplication successful """ if not canonical.exists(): if self.logger: self.logger.error(f"Canonical file does not exist: {canonical}") return False if not duplicate.exists(): if self.logger: self.logger.error(f"Duplicate file does not exist: {duplicate}") return False # Check if already hardlinked if self._verify_hardlink(canonical, duplicate): return True # Check if on same filesystem if not self._same_filesystem(canonical, duplicate): if self.logger: self.logger.warning( f"Cannot hardlink across filesystems: {canonical} -> {duplicate}" ) return False try: # Create temporary backup backup = duplicate.with_suffix(duplicate.suffix + '.bak') duplicate.rename(backup) # Create hardlink os.link(canonical, duplicate) # Remove backup backup.unlink() return True except Exception as e: if self.logger: self.logger.error(f"Deduplication failed: {duplicate}: {e}") # Restore from backup if backup.exists(): backup.rename(duplicate) return False