Files
defrag/app/migration/hardlink.py
2025-12-13 12:00:34 +01:00

162 lines
5.9 KiB
Python

import os
from pathlib import Path
from typing import Optional
from ..shared.logger import ProgressLogger
class HardlinkMigrationStrategy:
def __init__(self, logger: Optional[ProgressLogger]=None):
self.logger = logger
def migrate(self, source: Path, destination: Path, verify: bool=True) -> bool:
if not source.exists():
if self.logger:
self.logger.error(f'Source file does not exist: {source}')
return False
if not self._same_filesystem(source, destination.parent):
if self.logger:
self.logger.warning(f'Cannot hardlink across filesystems: {source} -> {destination}')
return False
destination.parent.mkdir(parents=True, exist_ok=True)
try:
os.link(source, destination)
if verify:
if not self._verify_hardlink(source, destination):
if self.logger:
self.logger.error(f'Verification failed: {source} -> {destination}')
destination.unlink()
return False
return True
except FileExistsError:
if self.logger:
self.logger.warning(f'Destination already exists: {destination}')
return False
except Exception as e:
if self.logger:
self.logger.error(f'Hardlink failed: {source} -> {destination}: {e}')
return False
def _same_filesystem(self, path1: Path, path2: Path) -> bool:
try:
stat1 = path1.stat()
stat2 = path2.stat()
return stat1.st_dev == stat2.st_dev
except Exception:
return False
def _verify_hardlink(self, source: Path, destination: Path) -> bool:
try:
source_stat = source.stat()
dest_stat = destination.stat()
return source_stat.st_ino == dest_stat.st_ino
except Exception:
return False
def can_migrate(self, source: Path, destination: Path) -> bool:
if not source.exists():
return False
dest_dir = destination.parent
if dest_dir.exists():
return self._same_filesystem(source, dest_dir)
parent = dest_dir.parent
while not parent.exists() and parent != parent.parent:
parent = parent.parent
return parent.exists() and self._same_filesystem(source, parent)
def estimate_time(self, source: Path) -> float:
return 0.01
def cleanup(self, source: Path) -> bool:
return True
class SymlinkMigrationStrategy:
def __init__(self, logger: Optional[ProgressLogger]=None, absolute_links: bool=True):
self.logger = logger
self.absolute_links = absolute_links
def migrate(self, source: Path, destination: Path, verify: bool=True) -> bool:
if not source.exists():
if self.logger:
self.logger.error(f'Source file does not exist: {source}')
return False
destination.parent.mkdir(parents=True, exist_ok=True)
try:
if self.absolute_links:
target = source.resolve()
else:
target = os.path.relpath(source, destination.parent)
destination.symlink_to(target)
if verify:
if not self._verify_symlink(destination, source):
if self.logger:
self.logger.error(f'Verification failed: {source} -> {destination}')
destination.unlink()
return False
return True
except FileExistsError:
if self.logger:
self.logger.warning(f'Destination already exists: {destination}')
return False
except Exception as e:
if self.logger:
self.logger.error(f'Symlink failed: {source} -> {destination}: {e}')
return False
def _verify_symlink(self, symlink: Path, expected_target: Path) -> bool:
try:
if not symlink.is_symlink():
return False
resolved = symlink.resolve()
expected = expected_target.resolve()
return resolved == expected
except Exception:
return False
def can_migrate(self, source: Path, destination: Path) -> bool:
if not source.exists():
return False
dest_dir = destination.parent
if dest_dir.exists():
return os.access(dest_dir, os.W_OK)
return True
def estimate_time(self, source: Path) -> float:
return 0.01
def cleanup(self, source: Path) -> bool:
return True
class DedupHardlinkStrategy(HardlinkMigrationStrategy):
def __init__(self, logger: Optional[ProgressLogger]=None):
super().__init__(logger=logger)
def deduplicate(self, canonical: Path, duplicate: Path) -> bool:
if not canonical.exists():
if self.logger:
self.logger.error(f'Canonical file does not exist: {canonical}')
return False
if not duplicate.exists():
if self.logger:
self.logger.error(f'Duplicate file does not exist: {duplicate}')
return False
if self._verify_hardlink(canonical, duplicate):
return True
if not self._same_filesystem(canonical, duplicate):
if self.logger:
self.logger.warning(f'Cannot hardlink across filesystems: {canonical} -> {duplicate}')
return False
try:
backup = duplicate.with_suffix(duplicate.suffix + '.bak')
duplicate.rename(backup)
os.link(canonical, duplicate)
backup.unlink()
return True
except Exception as e:
if self.logger:
self.logger.error(f'Deduplication failed: {duplicate}: {e}')
if backup.exists():
backup.rename(duplicate)
return False