import os from pathlib import Path from typing import Optional from ..shared.logger import ProgressLogger class HardlinkMigrationStrategy: def __init__(self, logger: Optional[ProgressLogger]=None): self.logger = logger def migrate(self, source: Path, destination: Path, verify: bool=True) -> bool: if not source.exists(): if self.logger: self.logger.error(f'Source file does not exist: {source}') return False if not self._same_filesystem(source, destination.parent): if self.logger: self.logger.warning(f'Cannot hardlink across filesystems: {source} -> {destination}') return False destination.parent.mkdir(parents=True, exist_ok=True) try: os.link(source, destination) if verify: if not self._verify_hardlink(source, destination): if self.logger: self.logger.error(f'Verification failed: {source} -> {destination}') destination.unlink() return False return True except FileExistsError: if self.logger: self.logger.warning(f'Destination already exists: {destination}') return False except Exception as e: if self.logger: self.logger.error(f'Hardlink failed: {source} -> {destination}: {e}') return False def _same_filesystem(self, path1: Path, path2: Path) -> bool: try: stat1 = path1.stat() stat2 = path2.stat() return stat1.st_dev == stat2.st_dev except Exception: return False def _verify_hardlink(self, source: Path, destination: Path) -> bool: try: source_stat = source.stat() dest_stat = destination.stat() return source_stat.st_ino == dest_stat.st_ino except Exception: return False def can_migrate(self, source: Path, destination: Path) -> bool: if not source.exists(): return False dest_dir = destination.parent if dest_dir.exists(): return self._same_filesystem(source, dest_dir) parent = dest_dir.parent while not parent.exists() and parent != parent.parent: parent = parent.parent return parent.exists() and self._same_filesystem(source, parent) def estimate_time(self, source: Path) -> float: return 0.01 def cleanup(self, source: Path) -> bool: return True class SymlinkMigrationStrategy: def __init__(self, logger: Optional[ProgressLogger]=None, absolute_links: bool=True): self.logger = logger self.absolute_links = absolute_links def migrate(self, source: Path, destination: Path, verify: bool=True) -> bool: if not source.exists(): if self.logger: self.logger.error(f'Source file does not exist: {source}') return False destination.parent.mkdir(parents=True, exist_ok=True) try: if self.absolute_links: target = source.resolve() else: target = os.path.relpath(source, destination.parent) destination.symlink_to(target) if verify: if not self._verify_symlink(destination, source): if self.logger: self.logger.error(f'Verification failed: {source} -> {destination}') destination.unlink() return False return True except FileExistsError: if self.logger: self.logger.warning(f'Destination already exists: {destination}') return False except Exception as e: if self.logger: self.logger.error(f'Symlink failed: {source} -> {destination}: {e}') return False def _verify_symlink(self, symlink: Path, expected_target: Path) -> bool: try: if not symlink.is_symlink(): return False resolved = symlink.resolve() expected = expected_target.resolve() return resolved == expected except Exception: return False def can_migrate(self, source: Path, destination: Path) -> bool: if not source.exists(): return False dest_dir = destination.parent if dest_dir.exists(): return os.access(dest_dir, os.W_OK) return True def estimate_time(self, source: Path) -> float: return 0.01 def cleanup(self, source: Path) -> bool: return True class DedupHardlinkStrategy(HardlinkMigrationStrategy): def __init__(self, logger: Optional[ProgressLogger]=None): super().__init__(logger=logger) def deduplicate(self, canonical: Path, duplicate: Path) -> bool: if not canonical.exists(): if self.logger: self.logger.error(f'Canonical file does not exist: {canonical}') return False if not duplicate.exists(): if self.logger: self.logger.error(f'Duplicate file does not exist: {duplicate}') return False if self._verify_hardlink(canonical, duplicate): return True if not self._same_filesystem(canonical, duplicate): if self.logger: self.logger.warning(f'Cannot hardlink across filesystems: {canonical} -> {duplicate}') return False try: backup = duplicate.with_suffix(duplicate.suffix + '.bak') duplicate.rename(backup) os.link(canonical, duplicate) backup.unlink() return True except Exception as e: if self.logger: self.logger.error(f'Deduplication failed: {duplicate}: {e}') if backup.exists(): backup.rename(duplicate) return False