initial
This commit is contained in:
377
app/migration/hardlink.py
Normal file
377
app/migration/hardlink.py
Normal file
@@ -0,0 +1,377 @@
|
||||
"""Hardlink-based migration strategy"""
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from ..shared.logger import ProgressLogger
|
||||
|
||||
|
||||
class HardlinkMigrationStrategy:
|
||||
"""Create hardlinks to files instead of copying"""
|
||||
|
||||
def __init__(self, logger: Optional[ProgressLogger] = None):
|
||||
"""Initialize hardlink migration strategy
|
||||
|
||||
Args:
|
||||
logger: Optional progress logger
|
||||
"""
|
||||
self.logger = logger
|
||||
|
||||
def migrate(
|
||||
self,
|
||||
source: Path,
|
||||
destination: Path,
|
||||
verify: bool = True
|
||||
) -> bool:
|
||||
"""Migrate file by creating hardlink
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
destination: Destination file path
|
||||
verify: Whether to verify the operation
|
||||
|
||||
Returns:
|
||||
True if migration successful
|
||||
"""
|
||||
if not source.exists():
|
||||
if self.logger:
|
||||
self.logger.error(f"Source file does not exist: {source}")
|
||||
return False
|
||||
|
||||
# Check if source and destination are on same filesystem
|
||||
if not self._same_filesystem(source, destination.parent):
|
||||
if self.logger:
|
||||
self.logger.warning(
|
||||
f"Cannot hardlink across filesystems: {source} -> {destination}"
|
||||
)
|
||||
return False
|
||||
|
||||
# Create destination directory
|
||||
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
# Create hardlink
|
||||
os.link(source, destination)
|
||||
|
||||
# Verify if requested
|
||||
if verify:
|
||||
if not self._verify_hardlink(source, destination):
|
||||
if self.logger:
|
||||
self.logger.error(f"Verification failed: {source} -> {destination}")
|
||||
destination.unlink()
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
except FileExistsError:
|
||||
if self.logger:
|
||||
self.logger.warning(f"Destination already exists: {destination}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
if self.logger:
|
||||
self.logger.error(f"Hardlink failed: {source} -> {destination}: {e}")
|
||||
return False
|
||||
|
||||
def _same_filesystem(self, path1: Path, path2: Path) -> bool:
|
||||
"""Check if two paths are on the same filesystem
|
||||
|
||||
Args:
|
||||
path1: First path
|
||||
path2: Second path
|
||||
|
||||
Returns:
|
||||
True if on same filesystem
|
||||
"""
|
||||
try:
|
||||
# Get device IDs
|
||||
stat1 = path1.stat()
|
||||
stat2 = path2.stat()
|
||||
return stat1.st_dev == stat2.st_dev
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def _verify_hardlink(self, source: Path, destination: Path) -> bool:
|
||||
"""Verify hardlink
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
destination: Destination file path
|
||||
|
||||
Returns:
|
||||
True if verification successful
|
||||
"""
|
||||
try:
|
||||
# Check if they have the same inode
|
||||
source_stat = source.stat()
|
||||
dest_stat = destination.stat()
|
||||
|
||||
return source_stat.st_ino == dest_stat.st_ino
|
||||
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def can_migrate(self, source: Path, destination: Path) -> bool:
|
||||
"""Check if migration is possible
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
destination: Destination file path
|
||||
|
||||
Returns:
|
||||
True if migration is possible
|
||||
"""
|
||||
if not source.exists():
|
||||
return False
|
||||
|
||||
# Check if on same filesystem
|
||||
dest_dir = destination.parent
|
||||
if dest_dir.exists():
|
||||
return self._same_filesystem(source, dest_dir)
|
||||
|
||||
# Check parent directories
|
||||
parent = dest_dir.parent
|
||||
while not parent.exists() and parent != parent.parent:
|
||||
parent = parent.parent
|
||||
|
||||
return parent.exists() and self._same_filesystem(source, parent)
|
||||
|
||||
def estimate_time(self, source: Path) -> float:
|
||||
"""Estimate migration time in seconds
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
|
||||
Returns:
|
||||
Estimated time in seconds (hardlinks are instant)
|
||||
"""
|
||||
return 0.01 # Hardlinks are nearly instant
|
||||
|
||||
def cleanup(self, source: Path) -> bool:
|
||||
"""Cleanup source file after successful migration
|
||||
|
||||
Note: For hardlinks, we typically don't remove the source
|
||||
immediately as both links point to the same inode.
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
|
||||
Returns:
|
||||
True (no cleanup needed for hardlinks)
|
||||
"""
|
||||
# For hardlinks, we don't remove the source
|
||||
# Both source and destination point to the same data
|
||||
return True
|
||||
|
||||
|
||||
class SymlinkMigrationStrategy:
|
||||
"""Create symbolic links to files"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
logger: Optional[ProgressLogger] = None,
|
||||
absolute_links: bool = True
|
||||
):
|
||||
"""Initialize symlink migration strategy
|
||||
|
||||
Args:
|
||||
logger: Optional progress logger
|
||||
absolute_links: Whether to create absolute symlinks
|
||||
"""
|
||||
self.logger = logger
|
||||
self.absolute_links = absolute_links
|
||||
|
||||
def migrate(
|
||||
self,
|
||||
source: Path,
|
||||
destination: Path,
|
||||
verify: bool = True
|
||||
) -> bool:
|
||||
"""Migrate file by creating symlink
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
destination: Destination file path
|
||||
verify: Whether to verify the operation
|
||||
|
||||
Returns:
|
||||
True if migration successful
|
||||
"""
|
||||
if not source.exists():
|
||||
if self.logger:
|
||||
self.logger.error(f"Source file does not exist: {source}")
|
||||
return False
|
||||
|
||||
# Create destination directory
|
||||
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
# Determine link target
|
||||
if self.absolute_links:
|
||||
target = source.resolve()
|
||||
else:
|
||||
# Create relative symlink
|
||||
target = os.path.relpath(source, destination.parent)
|
||||
|
||||
# Create symlink
|
||||
destination.symlink_to(target)
|
||||
|
||||
# Verify if requested
|
||||
if verify:
|
||||
if not self._verify_symlink(destination, source):
|
||||
if self.logger:
|
||||
self.logger.error(f"Verification failed: {source} -> {destination}")
|
||||
destination.unlink()
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
except FileExistsError:
|
||||
if self.logger:
|
||||
self.logger.warning(f"Destination already exists: {destination}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
if self.logger:
|
||||
self.logger.error(f"Symlink failed: {source} -> {destination}: {e}")
|
||||
return False
|
||||
|
||||
def _verify_symlink(self, symlink: Path, expected_target: Path) -> bool:
|
||||
"""Verify symlink
|
||||
|
||||
Args:
|
||||
symlink: Symlink path
|
||||
expected_target: Expected target path
|
||||
|
||||
Returns:
|
||||
True if verification successful
|
||||
"""
|
||||
try:
|
||||
# Check if it's a symlink
|
||||
if not symlink.is_symlink():
|
||||
return False
|
||||
|
||||
# Resolve and compare
|
||||
resolved = symlink.resolve()
|
||||
expected = expected_target.resolve()
|
||||
|
||||
return resolved == expected
|
||||
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def can_migrate(self, source: Path, destination: Path) -> bool:
|
||||
"""Check if migration is possible
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
destination: Destination file path
|
||||
|
||||
Returns:
|
||||
True if migration is possible
|
||||
"""
|
||||
if not source.exists():
|
||||
return False
|
||||
|
||||
# Check if destination directory is writable
|
||||
dest_dir = destination.parent
|
||||
if dest_dir.exists():
|
||||
return os.access(dest_dir, os.W_OK)
|
||||
|
||||
return True
|
||||
|
||||
def estimate_time(self, source: Path) -> float:
|
||||
"""Estimate migration time in seconds
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
|
||||
Returns:
|
||||
Estimated time in seconds (symlinks are instant)
|
||||
"""
|
||||
return 0.01 # Symlinks are instant
|
||||
|
||||
def cleanup(self, source: Path) -> bool:
|
||||
"""Cleanup source file after successful migration
|
||||
|
||||
Note: For symlinks, we don't remove the source as the
|
||||
symlink points to it.
|
||||
|
||||
Args:
|
||||
source: Source file path
|
||||
|
||||
Returns:
|
||||
True (no cleanup needed for symlinks)
|
||||
"""
|
||||
# For symlinks, we don't remove the source
|
||||
return True
|
||||
|
||||
|
||||
class DedupHardlinkStrategy(HardlinkMigrationStrategy):
|
||||
"""Hardlink strategy for deduplication
|
||||
|
||||
Creates hardlinks for duplicate files to save space.
|
||||
"""
|
||||
|
||||
def __init__(self, logger: Optional[ProgressLogger] = None):
|
||||
"""Initialize dedup hardlink strategy"""
|
||||
super().__init__(logger=logger)
|
||||
|
||||
def deduplicate(
|
||||
self,
|
||||
canonical: Path,
|
||||
duplicate: Path
|
||||
) -> bool:
|
||||
"""Replace duplicate with hardlink to canonical
|
||||
|
||||
Args:
|
||||
canonical: Canonical file path
|
||||
duplicate: Duplicate file path
|
||||
|
||||
Returns:
|
||||
True if deduplication successful
|
||||
"""
|
||||
if not canonical.exists():
|
||||
if self.logger:
|
||||
self.logger.error(f"Canonical file does not exist: {canonical}")
|
||||
return False
|
||||
|
||||
if not duplicate.exists():
|
||||
if self.logger:
|
||||
self.logger.error(f"Duplicate file does not exist: {duplicate}")
|
||||
return False
|
||||
|
||||
# Check if already hardlinked
|
||||
if self._verify_hardlink(canonical, duplicate):
|
||||
return True
|
||||
|
||||
# Check if on same filesystem
|
||||
if not self._same_filesystem(canonical, duplicate):
|
||||
if self.logger:
|
||||
self.logger.warning(
|
||||
f"Cannot hardlink across filesystems: {canonical} -> {duplicate}"
|
||||
)
|
||||
return False
|
||||
|
||||
try:
|
||||
# Create temporary backup
|
||||
backup = duplicate.with_suffix(duplicate.suffix + '.bak')
|
||||
duplicate.rename(backup)
|
||||
|
||||
# Create hardlink
|
||||
os.link(canonical, duplicate)
|
||||
|
||||
# Remove backup
|
||||
backup.unlink()
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
if self.logger:
|
||||
self.logger.error(f"Deduplication failed: {duplicate}: {e}")
|
||||
|
||||
# Restore from backup
|
||||
if backup.exists():
|
||||
backup.rename(duplicate)
|
||||
|
||||
return False
|
||||
Reference in New Issue
Block a user