This commit is contained in:
mike
2025-12-12 23:04:51 +01:00
parent 56b2db82fc
commit 87550e426a
14 changed files with 132 additions and 122 deletions

View File

@@ -59,25 +59,25 @@ System is now **network-capable**, **auditable**, **scalable**, and offers **rea
2. **Index** 2. **Index**
```bash ```bash
python src/main.py index "D:\\" disk_d python app/main.py index "D:\\" disk_d
``` ```
3. **Plan** 3. **Plan**
```bash ```bash
python src/main.py plan disk_d disk_e python app/main.py plan disk_d disk_e
``` ```
4. **Dry-Run** 4. **Dry-Run**
```bash ```bash
python src/main.py execute plan.json --dry-run python app/main.py execute plan.json --dry-run
``` ```
5. **Execute** 5. **Execute**
```bash ```bash
python src/main.py execute plan.json python app/main.py execute plan.json
``` ```
6. **Report** 6. **Report**

View File

@@ -70,13 +70,13 @@ class ClassificationEngine:
if disk: if disk:
cursor.execute(""" cursor.execute("""
SELECT path, checksum SELECT path, checksum
FROM files_bak FROM files
WHERE disk = %s AND category IS NULL WHERE disk_label = %s AND category IS NULL
""", (disk,)) """, (disk,))
else: else:
cursor.execute(""" cursor.execute("""
SELECT path, checksum SELECT path, checksum
FROM files_bak FROM files
WHERE category IS NULL WHERE category IS NULL
""") """)
@@ -149,7 +149,7 @@ class ClassificationEngine:
from psycopg2.extras import execute_batch from psycopg2.extras import execute_batch
query = """ query = """
UPDATE files_bak UPDATE files
SET category = %s SET category = %s
WHERE path = %s WHERE path = %s
""" """
@@ -188,7 +188,7 @@ class ClassificationEngine:
category, category,
COUNT(*) as file_count, COUNT(*) as file_count,
SUM(size) as total_size SUM(size) as total_size
FROM files_bak FROM files
WHERE category IS NOT NULL WHERE category IS NOT NULL
GROUP BY category GROUP BY category
ORDER BY total_size DESC ORDER BY total_size DESC
@@ -214,7 +214,7 @@ class ClassificationEngine:
conn = self._get_connection() conn = self._get_connection()
cursor = conn.cursor() cursor = conn.cursor()
cursor.execute("SELECT COUNT(*) FROM files_bak WHERE category IS NULL") cursor.execute("SELECT COUNT(*) FROM files WHERE category IS NULL")
count = cursor.fetchone()[0] count = cursor.fetchone()[0]
cursor.close() cursor.close()
@@ -241,7 +241,7 @@ class ClassificationEngine:
cursor = conn.cursor() cursor = conn.cursor()
cursor.execute(""" cursor.execute("""
UPDATE files_bak UPDATE files
SET category = %s SET category = %s
WHERE category = %s WHERE category = %s
""", (new_category, old_category)) """, (new_category, old_category))
@@ -278,7 +278,7 @@ class ClassificationEngine:
# Get categorized files # Get categorized files
cursor.execute(""" cursor.execute("""
SELECT path, category SELECT path, category
FROM files_bak FROM files
WHERE category IS NOT NULL WHERE category IS NOT NULL
""") """)
@@ -326,7 +326,7 @@ class ClassificationEngine:
cursor.execute(""" cursor.execute("""
SELECT DISTINCT category SELECT DISTINCT category
FROM files_bak FROM files
WHERE category IS NOT NULL WHERE category IS NOT NULL
ORDER BY category ORDER BY category
""") """)

View File

@@ -241,7 +241,7 @@ def train_from_database(
cursor = db_connection.cursor() cursor = db_connection.cursor()
cursor.execute(""" cursor.execute("""
SELECT path, category SELECT path, category
FROM files_bak FROM files
WHERE category IS NOT NULL WHERE category IS NOT NULL
""") """)

View File

@@ -70,14 +70,14 @@ class DeduplicationEngine:
if disk: if disk:
cursor.execute(""" cursor.execute("""
SELECT path, size SELECT path, size
FROM files_bak FROM files
WHERE disk = %s AND checksum IS NULL WHERE disk_label = %s AND checksum IS NULL
ORDER BY size DESC ORDER BY size DESC
""", (disk,)) """, (disk,))
else: else:
cursor.execute(""" cursor.execute("""
SELECT path, size SELECT path, size
FROM files_bak FROM files
WHERE checksum IS NULL WHERE checksum IS NULL
ORDER BY size DESC ORDER BY size DESC
""") """)
@@ -108,7 +108,7 @@ class DeduplicationEngine:
if checksum: if checksum:
# Update database # Update database
cursor.execute(""" cursor.execute("""
UPDATE files_bak UPDATE files
SET checksum = %s, duplicate_of = %s SET checksum = %s, duplicate_of = %s
WHERE path = %s WHERE path = %s
""", (checksum, duplicate_of, str(path))) """, (checksum, duplicate_of, str(path)))
@@ -225,15 +225,15 @@ class DeduplicationEngine:
if disk: if disk:
cursor.execute(""" cursor.execute("""
SELECT checksum, array_agg(path ORDER BY path) as paths SELECT checksum, array_agg(path ORDER BY path) as paths
FROM files_bak FROM files
WHERE disk = %s AND checksum IS NOT NULL WHERE disk_label = %s AND checksum IS NOT NULL
GROUP BY checksum GROUP BY checksum
HAVING COUNT(*) > 1 HAVING COUNT(*) > 1
""", (disk,)) """, (disk,))
else: else:
cursor.execute(""" cursor.execute("""
SELECT checksum, array_agg(path ORDER BY path) as paths SELECT checksum, array_agg(path ORDER BY path) as paths
FROM files_bak FROM files
WHERE checksum IS NOT NULL WHERE checksum IS NOT NULL
GROUP BY checksum GROUP BY checksum
HAVING COUNT(*) > 1 HAVING COUNT(*) > 1
@@ -262,18 +262,18 @@ class DeduplicationEngine:
stats = {} stats = {}
# Total files # Total files
cursor.execute("SELECT COUNT(*) FROM files_bak WHERE checksum IS NOT NULL") cursor.execute("SELECT COUNT(*) FROM files WHERE checksum IS NOT NULL")
stats['total_files'] = cursor.fetchone()[0] stats['total_files'] = cursor.fetchone()[0]
# Unique files # Unique files
cursor.execute("SELECT COUNT(DISTINCT checksum) FROM files_bak WHERE checksum IS NOT NULL") cursor.execute("SELECT COUNT(DISTINCT checksum) FROM files WHERE checksum IS NOT NULL")
stats['unique_files'] = cursor.fetchone()[0] stats['unique_files'] = cursor.fetchone()[0]
# Duplicate files # Duplicate files
stats['duplicate_files'] = stats['total_files'] - stats['unique_files'] stats['duplicate_files'] = stats['total_files'] - stats['unique_files']
# Total size # Total size
cursor.execute("SELECT COALESCE(SUM(size), 0) FROM files_bak WHERE checksum IS NOT NULL") cursor.execute("SELECT COALESCE(SUM(size), 0) FROM files WHERE checksum IS NOT NULL")
stats['total_size'] = cursor.fetchone()[0] stats['total_size'] = cursor.fetchone()[0]
# Unique size # Unique size
@@ -281,7 +281,7 @@ class DeduplicationEngine:
SELECT COALESCE(SUM(size), 0) SELECT COALESCE(SUM(size), 0)
FROM ( FROM (
SELECT DISTINCT ON (checksum) size SELECT DISTINCT ON (checksum) size
FROM files_bak FROM files
WHERE checksum IS NOT NULL WHERE checksum IS NOT NULL
) AS unique_files ) AS unique_files
""") """)
@@ -321,11 +321,11 @@ class DeduplicationEngine:
cursor.execute(""" cursor.execute("""
WITH canonical AS ( WITH canonical AS (
SELECT DISTINCT ON (checksum) path, checksum SELECT DISTINCT ON (checksum) path, checksum
FROM files_bak FROM files
WHERE checksum IS NOT NULL WHERE checksum IS NOT NULL
ORDER BY checksum, path ORDER BY checksum, path
) )
UPDATE files_bak UPDATE files
SET duplicate_of = NULL SET duplicate_of = NULL
WHERE path IN (SELECT path FROM canonical) WHERE path IN (SELECT path FROM canonical)
""") """)

View File

@@ -227,7 +227,7 @@ class HashStore:
# Get all files with their hashes # Get all files with their hashes
cursor.execute(""" cursor.execute("""
SELECT f.path, f.checksum SELECT f.path, f.checksum
FROM files_bak f FROM files f
WHERE f.checksum IS NOT NULL WHERE f.checksum IS NOT NULL
""") """)

View File

@@ -60,7 +60,7 @@ class DiscoveryEngine:
size BIGINT NOT NULL, size BIGINT NOT NULL,
modified_time DOUBLE PRECISION NOT NULL, modified_time DOUBLE PRECISION NOT NULL,
created_time DOUBLE PRECISION NOT NULL, created_time DOUBLE PRECISION NOT NULL,
disk TEXT NOT NULL, disk_label TEXT NOT NULL,
checksum TEXT, checksum TEXT,
status TEXT DEFAULT 'indexed', status TEXT DEFAULT 'indexed',
category TEXT, category TEXT,
@@ -72,17 +72,17 @@ class DiscoveryEngine:
# Create index on path # Create index on path
cursor.execute(""" cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_files_path ON files_bak(path) CREATE INDEX IF NOT EXISTS idx_files_path ON files(path)
""") """)
# Create index on disk # Create index on disk
cursor.execute(""" cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_files_disk ON files_bak(disk) CREATE INDEX IF NOT EXISTS idx_files_disk ON files(disk_label)
""") """)
# Create index on checksum # Create index on checksum
cursor.execute(""" cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_files_checksum ON files_bak(checksum) CREATE INDEX IF NOT EXISTS idx_files_checksum ON files(checksum)
""") """)
conn.commit() conn.commit()
@@ -136,7 +136,7 @@ class DiscoveryEngine:
size=file_meta.size, size=file_meta.size,
modified_time=file_meta.modified_time, modified_time=file_meta.modified_time,
created_time=file_meta.created_time, created_time=file_meta.created_time,
disk=disk disk_label=disk
) )
batch.append(record) batch.append(record)
@@ -193,7 +193,7 @@ class DiscoveryEngine:
batch: List of FileRecord objects batch: List of FileRecord objects
""" """
query = """ query = """
INSERT INTO files_bak (path, size, modified_time, created_time, disk, checksum, status, category, duplicate_of) INSERT INTO files (path, size, modified_time, created_time, disk_label, checksum, status, category, duplicate_of)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (path) DO UPDATE SET ON CONFLICT (path) DO UPDATE SET
size = EXCLUDED.size, size = EXCLUDED.size,
@@ -207,7 +207,7 @@ class DiscoveryEngine:
record.size, record.size,
record.modified_time, record.modified_time,
record.created_time, record.created_time,
record.disk, record.disk_label,
record.checksum, record.checksum,
record.status, record.status,
record.category, record.category,
@@ -276,9 +276,9 @@ class DiscoveryEngine:
cursor = conn.cursor() cursor = conn.cursor()
if disk: if disk:
cursor.execute("SELECT COUNT(*) FROM files_bak WHERE disk = %s", (disk,)) cursor.execute("SELECT COUNT(*) FROM files WHERE disk_label = %s", (disk,))
else: else:
cursor.execute("SELECT COUNT(*) FROM files_bak") cursor.execute("SELECT COUNT(*) FROM files")
count = cursor.fetchone()[0] count = cursor.fetchone()[0]
cursor.close() cursor.close()
@@ -298,9 +298,9 @@ class DiscoveryEngine:
cursor = conn.cursor() cursor = conn.cursor()
if disk: if disk:
cursor.execute("SELECT COALESCE(SUM(size), 0) FROM files_bak WHERE disk = %s", (disk,)) cursor.execute("SELECT COALESCE(SUM(size), 0) FROM files WHERE disk_label = %s", (disk,))
else: else:
cursor.execute("SELECT COALESCE(SUM(size), 0) FROM files_bak") cursor.execute("SELECT COALESCE(SUM(size), 0) FROM files")
total = cursor.fetchone()[0] total = cursor.fetchone()[0]
cursor.close() cursor.close()

View File

@@ -37,7 +37,7 @@ class FileRecord:
path: str path: str
size: int size: int
modified_time: float modified_time: float
disk: str disk_label: str
checksum: Optional[str] = None checksum: Optional[str] = None
status: str = 'indexed' # indexed, planned, moved, verified status: str = 'indexed' # indexed, planned, moved, verified
@@ -49,11 +49,11 @@ class DiskReorganizer:
""" """
if db_config is None: if db_config is None:
db_config = { db_config = {
'host': '192.168.1.159', 'host': os.getenv('DB_HOST', 'localhost'),
'port': 5432, 'port': int(os.getenv('DB_PORT', 5432)),
'database': 'disk_reorganizer_db', 'database': os.getenv('DB_NAME', 'disk_reorganizer_db'),
'user': 'disk_reorg_user', 'user': os.getenv('DB_USER', 'disk_reorg_user'),
'password': 'heel-goed-wachtwoord' 'password': os.getenv('DB_PASSWORD', 'heel-goed-wachtwoord')
} }
self.db_config = db_config self.db_config = db_config
self.init_database() self.init_database()
@@ -127,12 +127,12 @@ class DiskReorganizer:
# PostgreSQL INSERT ... ON CONFLICT for upsert # PostgreSQL INSERT ... ON CONFLICT for upsert
cursor.execute(""" cursor.execute("""
INSERT INTO files_bak (path, size, modified_time, disk, checksum, status) INSERT INTO files (path, size, modified_time, disk_label, checksum, status)
VALUES (%s, %s, %s, %s, %s, %s) VALUES (%s, %s, %s, %s, %s, %s)
ON CONFLICT (path) DO UPDATE SET ON CONFLICT (path) DO UPDATE SET
size = EXCLUDED.size, size = EXCLUDED.size,
modified_time = EXCLUDED.modified_time, modified_time = EXCLUDED.modified_time,
disk = EXCLUDED.disk, disk_label = EXCLUDED.disk_label,
status = EXCLUDED.status status = EXCLUDED.status
""", (rel_path, size, mtime, disk_name, None, 'indexed')) """, (rel_path, size, mtime, disk_name, None, 'indexed'))
@@ -174,9 +174,9 @@ class DiskReorganizer:
try: try:
cursor.execute(""" cursor.execute("""
SELECT disk, SUM(size) as total_size, COUNT(*) as file_count SELECT disk_label, SUM(size) as total_size, COUNT(*) as file_count
FROM files_bak FROM files
GROUP BY disk GROUP BY disk_label
""") """)
usage = {} usage = {}
@@ -215,7 +215,7 @@ class DiskReorganizer:
cursor = conn.cursor() cursor = conn.cursor()
cursor.execute( cursor.execute(
"SELECT path, size, modified_time FROM files_bak WHERE disk = %s ORDER BY size DESC", "SELECT path, size, modified_time FROM files WHERE disk_label = %s ORDER BY size DESC",
(target_disk,) (target_disk,)
) )
files_to_move = cursor.fetchall() files_to_move = cursor.fetchall()
@@ -265,15 +265,15 @@ class DiskReorganizer:
'source_disk': target_disk, 'source_disk': target_disk,
'source_path': rel_path, 'source_path': rel_path,
'dest_disk': dest_disk, 'dest_disk': dest_disk,
'dest_path': rel_path, # Keep same relative path 'target_path': rel_path, # Keep same relative path
'size': size 'size': size
} }
plan['operations'].append(op) plan['operations'].append(op)
# Store in database # Store in database
cursor.execute( cursor.execute(
"INSERT INTO operations_bak (source_path, dest_path, operation_type) VALUES (%s, %s, %s)", "INSERT INTO operations (source_path, target_path, operation_type, status) VALUES (%s, %s, %s, %s)",
(f"{target_disk}:{rel_path}", f"{dest_disk}:{rel_path}", 'move') (f"{target_disk}:{rel_path}", f"{dest_disk}:{rel_path}", 'move', 'pending')
) )
conn.commit() conn.commit()
@@ -347,10 +347,10 @@ class DiskReorganizer:
source_disk = op['source_disk'] source_disk = op['source_disk']
source_path = op['source_path'] source_path = op['source_path']
dest_disk = op['dest_disk'] dest_disk = op['dest_disk']
dest_path = op['dest_path'] target_path = op['target_path']
source_full = Path(source_disk) / source_path source_full = Path(source_disk) / source_path
dest_full = Path(dest_disk) / dest_path dest_full = Path(dest_disk) / target_path
# Dynamic progress display # Dynamic progress display
elapsed = time.time() - start_time elapsed = time.time() - start_time
@@ -384,7 +384,7 @@ class DiskReorganizer:
if self.verify_operation(source_full, dest_full): if self.verify_operation(source_full, dest_full):
# Update database # Update database
cursor.execute( cursor.execute(
"UPDATE files_bak SET disk = %s, status = 'moved' WHERE path = %s AND disk = %s", "UPDATE files SET disk_label = %s, status = 'moved' WHERE path = %s AND disk_label = %s",
(dest_disk, source_path, source_disk) (dest_disk, source_path, source_disk)
) )
@@ -393,7 +393,7 @@ class DiskReorganizer:
# Log operation as executed # Log operation as executed
cursor.execute( cursor.execute(
"UPDATE operations_bak SET executed = 1, executed_at = CURRENT_TIMESTAMP WHERE source_path = %s", "UPDATE operations SET executed = 1, executed_at = CURRENT_TIMESTAMP WHERE source_path = %s",
(f"{source_disk}:{source_path}",) (f"{source_disk}:{source_path}",)
) )
@@ -407,7 +407,7 @@ class DiskReorganizer:
except Exception as e: except Exception as e:
logger.error(f"\n Error processing {source_path}: {e}") logger.error(f"\n Error processing {source_path}: {e}")
cursor.execute( cursor.execute(
"UPDATE operations_bak SET error = %s WHERE source_path = %s", "UPDATE operations SET error = %s WHERE source_path = %s",
(str(e), f"{source_disk}:{source_path}") (str(e), f"{source_disk}:{source_path}")
) )
error_count += 1 error_count += 1
@@ -436,7 +436,7 @@ class DiskReorganizer:
try: try:
cursor.execute(""" cursor.execute("""
SELECT status, COUNT(*), SUM(size) FROM files_bak GROUP BY status SELECT status, COUNT(*), SUM(size) FROM files GROUP BY status
""") """)
print("\n=== FILE MIGRATION REPORT ===") print("\n=== FILE MIGRATION REPORT ===")
@@ -445,7 +445,7 @@ class DiskReorganizer:
print(f"{status:15}: {count:6} files, {self.format_size(size or 0)}") print(f"{status:15}: {count:6} files, {self.format_size(size or 0)}")
cursor.execute(""" cursor.execute("""
SELECT operation_type, executed, verified, COUNT(*) FROM operations_bak GROUP BY operation_type, executed, verified SELECT operation_type, executed, verified, COUNT(*) FROM operations GROUP BY operation_type, executed, verified
""") """)
print("\n=== OPERATIONS REPORT ===") print("\n=== OPERATIONS REPORT ===")

View File

@@ -63,7 +63,7 @@ class MigrationEngine:
CREATE TABLE IF NOT EXISTS operations ( CREATE TABLE IF NOT EXISTS operations (
id SERIAL PRIMARY KEY, id SERIAL PRIMARY KEY,
source_path TEXT NOT NULL, source_path TEXT NOT NULL,
dest_path TEXT NOT NULL, target_path TEXT NOT NULL,
operation_type TEXT NOT NULL, operation_type TEXT NOT NULL,
size BIGINT DEFAULT 0, size BIGINT DEFAULT 0,
status TEXT DEFAULT 'pending', status TEXT DEFAULT 'pending',
@@ -77,7 +77,7 @@ class MigrationEngine:
# Create index on status # Create index on status
cursor.execute(""" cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_operations_status CREATE INDEX IF NOT EXISTS idx_operations_status
ON operations_bak(status) ON operations(status)
""") """)
conn.commit() conn.commit()
@@ -107,7 +107,7 @@ class MigrationEngine:
params = [] params = []
if disk: if disk:
conditions.append("disk = %s") conditions.append("disk_label = %s")
params.append(disk) params.append(disk)
if category: if category:
@@ -116,7 +116,7 @@ class MigrationEngine:
query = f""" query = f"""
SELECT path, size, category, duplicate_of SELECT path, size, category, duplicate_of
FROM files_bak FROM files
WHERE {' AND '.join(conditions)} WHERE {' AND '.join(conditions)}
ORDER BY category, path ORDER BY category, path
""" """
@@ -133,7 +133,7 @@ class MigrationEngine:
source = Path(path_str) source = Path(path_str)
# Determine destination # Determine destination
dest_path = self.target_base / file_category / source.name target_path = self.target_base / file_category / source.name
# Determine operation type # Determine operation type
if duplicate_of: if duplicate_of:
@@ -145,7 +145,7 @@ class MigrationEngine:
operation = OperationRecord( operation = OperationRecord(
source_path=source, source_path=source,
dest_path=dest_path, target_path=target_path,
operation_type=operation_type, operation_type=operation_type,
size=size size=size
) )
@@ -200,7 +200,7 @@ class MigrationEngine:
# In dry run, just log what would happen # In dry run, just log what would happen
self.logger.debug( self.logger.debug(
f"[DRY RUN] Would {operation.operation_type}: " f"[DRY RUN] Would {operation.operation_type}: "
f"{operation.source_path} -> {operation.dest_path}" f"{operation.source_path} -> {operation.target_path}"
) )
stats.files_succeeded += 1 stats.files_succeeded += 1
else: else:
@@ -261,7 +261,7 @@ class MigrationEngine:
# Execute migration # Execute migration
success = strategy.migrate( success = strategy.migrate(
operation.source_path, operation.source_path,
operation.dest_path, operation.target_path,
verify=self.processing_config.verify_operations verify=self.processing_config.verify_operations
) )
@@ -293,14 +293,14 @@ class MigrationEngine:
cursor = conn.cursor() cursor = conn.cursor()
cursor.execute(""" cursor.execute("""
INSERT INTO operations_bak ( INSERT INTO operations (
source_path, dest_path, operation_type, size, source_path, target_path, operation_type, bytes_processed,
status, error, executed_at, verified status, error, executed_at, verified
) )
VALUES (%s, %s, %s, %s, %s, %s, %s, %s) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
""", ( """, (
str(operation.source_path), str(operation.source_path),
str(operation.dest_path), str(operation.target_path),
operation.operation_type, operation.operation_type,
operation.size, operation.size,
operation.status, operation.status,
@@ -321,22 +321,22 @@ class MigrationEngine:
Returns: Returns:
True if rollback successful True if rollback successful
""" """
self.logger.warning(f"Rolling back: {operation.dest_path}") self.logger.warning(f"Rolling back: {operation.target_path}")
try: try:
# Remove destination # Remove destination
if operation.dest_path.exists(): if operation.target_path.exists():
operation.dest_path.unlink() operation.target_path.unlink()
# Update database # Update database
conn = self._get_connection() conn = self._get_connection()
cursor = conn.cursor() cursor = conn.cursor()
cursor.execute(""" cursor.execute("""
UPDATE operations_bak UPDATE operations
SET status = 'rolled_back' SET status = 'rolled_back'
WHERE source_path = %s AND dest_path = %s WHERE source_path = %s AND target_path = %s
""", (str(operation.source_path), str(operation.dest_path))) """, (str(operation.source_path), str(operation.target_path)))
conn.commit() conn.commit()
cursor.close() cursor.close()
@@ -344,7 +344,7 @@ class MigrationEngine:
return True return True
except Exception as e: except Exception as e:
self.logger.error(f"Rollback failed: {operation.dest_path}: {e}") self.logger.error(f"Rollback failed: {operation.target_path}: {e}")
return False return False
def get_migration_stats(self) -> dict: def get_migration_stats(self) -> dict:
@@ -359,13 +359,13 @@ class MigrationEngine:
stats = {} stats = {}
# Total operations # Total operations
cursor.execute("SELECT COUNT(*) FROM operations_bak") cursor.execute("SELECT COUNT(*) FROM operations")
stats['total_operations'] = cursor.fetchone()[0] stats['total_operations'] = cursor.fetchone()[0]
# Operations by status # Operations by status
cursor.execute(""" cursor.execute("""
SELECT status, COUNT(*) SELECT status, COUNT(*)
FROM operations_bak FROM operations
GROUP BY status GROUP BY status
""") """)
@@ -375,7 +375,7 @@ class MigrationEngine:
# Total size migrated # Total size migrated
cursor.execute(""" cursor.execute("""
SELECT COALESCE(SUM(size), 0) SELECT COALESCE(SUM(size), 0)
FROM operations_bak FROM operations
WHERE status = 'completed' WHERE status = 'completed'
""") """)
stats['total_size_migrated'] = cursor.fetchone()[0] stats['total_size_migrated'] = cursor.fetchone()[0]
@@ -396,8 +396,8 @@ class MigrationEngine:
cursor = conn.cursor() cursor = conn.cursor()
cursor.execute(""" cursor.execute("""
SELECT source_path, dest_path, operation_type SELECT source_path, target_path, operation_type
FROM operations_bak FROM operations
WHERE status = 'completed' AND verified = FALSE WHERE status = 'completed' AND verified = FALSE
""") """)

View File

@@ -12,7 +12,7 @@ class FileRecord:
size: int size: int
modified_time: float modified_time: float
created_time: float created_time: float
disk: str disk_label: str
checksum: str | None = None checksum: str | None = None
status: str = 'indexed' # indexed, planned, moved, verified status: str = 'indexed' # indexed, planned, moved, verified
category: str | None = None category: str | None = None
@@ -23,7 +23,7 @@ class FileRecord:
class OperationRecord: class OperationRecord:
"""Record of a migration operation""" """Record of a migration operation"""
source_path: Path source_path: Path
dest_path: Path target_path: Path
operation_type: str # move, copy, hardlink, symlink operation_type: str # move, copy, hardlink, symlink
status: str = 'pending' # pending, in_progress, completed, failed status: str = 'pending' # pending, in_progress, completed, failed
error: str | None = None error: str | None = None

View File

@@ -12,7 +12,7 @@ class FileRecord:
size: int size: int
modified_time: float modified_time: float
created_time: float created_time: float
disk: str disk_label: str
checksum: Optional[str] = None checksum: Optional[str] = None
status: str = 'indexed' # indexed, planned, moved, verified status: str = 'indexed' # indexed, planned, moved, verified
category: Optional[str] = None category: Optional[str] = None
@@ -25,7 +25,7 @@ class FileRecord:
'size': self.size, 'size': self.size,
'modified_time': self.modified_time, 'modified_time': self.modified_time,
'created_time': self.created_time, 'created_time': self.created_time,
'disk': self.disk, 'disk_label': self.disk_label,
'checksum': self.checksum, 'checksum': self.checksum,
'status': self.status, 'status': self.status,
'category': self.category, 'category': self.category,
@@ -37,7 +37,7 @@ class FileRecord:
class OperationRecord: class OperationRecord:
"""Record of a migration operation""" """Record of a migration operation"""
source_path: Path source_path: Path
dest_path: Path target_path: Path
operation_type: str # move, copy, hardlink, symlink operation_type: str # move, copy, hardlink, symlink
size: int = 0 size: int = 0
status: str = 'pending' # pending, in_progress, completed, failed status: str = 'pending' # pending, in_progress, completed, failed
@@ -49,7 +49,7 @@ class OperationRecord:
"""Convert to dictionary for serialization""" """Convert to dictionary for serialization"""
return { return {
'source_path': str(self.source_path), 'source_path': str(self.source_path),
'dest_path': str(self.dest_path), 'target_path': str(self.target_path),
'operation_type': self.operation_type, 'operation_type': self.operation_type,
'size': self.size, 'size': self.size,
'status': self.status, 'status': self.status,

View File

@@ -105,7 +105,7 @@ services:
- ${HOST_SOURCE_PATH:-/mnt/source}:/mnt/source:ro - ${HOST_SOURCE_PATH:-/mnt/source}:/mnt/source:ro
- ./config:/app/config - ./config:/app/config
- ./logs:/app/logs - ./logs:/app/logs
command: ["python", "app/main.py", "index", "/mnt/source", "disk_d"] command: ["python", "app/main.py", "index", "/media/mike/SMT", "SMT"]
profiles: profiles:
- index-only - index-only
networks: networks:
@@ -127,7 +127,7 @@ services:
- ./config:/app/config - ./config:/app/config
- ./plans:/app/plans - ./plans:/app/plans
- ./logs:/app/logs - ./logs:/app/logs
command: ["python", "app/main.py", "plan", "disk_d", "disk_e"] command: ["python", "app/main.py", "plan", "/media/mike/SMT", "SMT"]
profiles: profiles:
- plan-only - plan-only
networks: networks:
@@ -194,7 +194,7 @@ services:
volumes: volumes:
- ./reports:/app/reports - ./reports:/app/reports
- ./logs:/app/logs - ./logs:/app/logs
command: ["python", "app/main.py", "report", "--format", "html"] command: ["python", "app/main.py", "report"]
profiles: profiles:
- report-only - report-only
networks: networks:

View File

@@ -40,11 +40,11 @@ echo "✅ Setup complete!"
echo "" echo ""
echo "📋 Available commands:" echo "📋 Available commands:"
echo " docker compose up -d # Start all services" echo " docker compose up -d # Start all services"
echo " docker compose up --profile index-only index # Run index only" echo " docker compose --profile index-only up index # Run index only"
echo " docker compose up --profile plan-only plan # Generate plan" echo " docker compose --profile plan-only up plan # Generate plan"
echo " docker compose up --profile dry-run-only dry-run # Dry run" echo " docker compose --profile dry-run-only up dry-run # Dry run"
echo " docker compose up --profile execute-only execute # Execute migration" echo " docker compose --profile execute-only up execute # Execute migration"
echo " docker compose up --profile report-only report # Generate report" echo " docker compose --profile report-only up report # Generate report"
echo "" echo ""
echo "🌐 Access monitoring:" echo "🌐 Access monitoring:"
echo " - PostgreSQL Admin: http://localhost:5050" echo " - PostgreSQL Admin: http://localhost:5050"

View File

@@ -13,9 +13,12 @@ CREATE TABLE IF NOT EXISTS files (
modified_time TIMESTAMP WITH TIME ZONE, modified_time TIMESTAMP WITH TIME ZONE,
created_time TIMESTAMP WITH TIME ZONE, created_time TIMESTAMP WITH TIME ZONE,
file_hash VARCHAR(64), -- SHA-256 hash file_hash VARCHAR(64), -- SHA-256 hash
checksum VARCHAR(64), -- Alias for file_hash (legacy compatibility)
category VARCHAR(50), category VARCHAR(50),
disk_label VARCHAR(50), disk_label VARCHAR(50),
last_verified TIMESTAMP WITH TIME ZONE, last_verified TIMESTAMP WITH TIME ZONE,
status VARCHAR(20) DEFAULT 'indexed',
duplicate_of TEXT, -- Path to canonical file if this is a duplicate
-- Metadata -- Metadata
metadata JSONB DEFAULT '{}', metadata JSONB DEFAULT '{}',
@@ -36,8 +39,13 @@ CREATE TABLE IF NOT EXISTS operations (
target_path TEXT, target_path TEXT,
status VARCHAR(20) NOT NULL, status VARCHAR(20) NOT NULL,
-- Legacy compatibility fields
executed INTEGER DEFAULT 0,
verified INTEGER DEFAULT 0,
error TEXT,
-- File reference -- File reference
file_id UUID REFERENCES files_bak(id) ON DELETE SET NULL, file_id UUID REFERENCES files(id) ON DELETE SET NULL,
-- Performance metrics -- Performance metrics
duration_ms INTEGER, duration_ms INTEGER,
@@ -54,6 +62,7 @@ CREATE TABLE IF NOT EXISTS operations (
-- Audit fields -- Audit fields
started_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, started_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
completed_at TIMESTAMP WITH TIME ZONE, completed_at TIMESTAMP WITH TIME ZONE,
executed_at TIMESTAMP WITH TIME ZONE,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
); );
@@ -89,14 +98,15 @@ CREATE TABLE IF NOT EXISTS migration_plans (
); );
-- Indexes for performance -- Indexes for performance
CREATE INDEX IF NOT EXISTS idx_files_path ON files_bak(path); CREATE INDEX IF NOT EXISTS idx_files_path ON files(path);
CREATE INDEX IF NOT EXISTS idx_files_hash ON files_bak(file_hash); CREATE INDEX IF NOT EXISTS idx_files_hash ON files(file_hash);
CREATE INDEX IF NOT EXISTS idx_files_disk ON files_bak(disk_label); CREATE INDEX IF NOT EXISTS idx_files_disk ON files(disk_label);
CREATE INDEX IF NOT EXISTS idx_files_category ON files_bak(category); CREATE INDEX IF NOT EXISTS idx_files_category ON files(category);
CREATE INDEX IF NOT EXISTS idx_files_status ON files(status);
CREATE INDEX IF NOT EXISTS idx_operations_status ON operations_bak(status); CREATE INDEX IF NOT EXISTS idx_operations_status ON operations(status);
CREATE INDEX IF NOT EXISTS idx_operations_created ON operations_bak(created_at); CREATE INDEX IF NOT EXISTS idx_operations_created ON operations(created_at);
CREATE INDEX IF NOT EXISTS idx_operations_file_id ON operations_bak(file_id); CREATE INDEX IF NOT EXISTS idx_operations_file_id ON operations(file_id);
CREATE INDEX IF NOT EXISTS idx_dedup_canonical ON deduplication_store(canonical_path); CREATE INDEX IF NOT EXISTS idx_dedup_canonical ON deduplication_store(canonical_path);
@@ -110,7 +120,7 @@ END;
$$ language 'plpgsql'; $$ language 'plpgsql';
-- Triggers for automatic updated_at -- Triggers for automatic updated_at
CREATE TRIGGER update_files_updated_at BEFORE UPDATE ON files_bak CREATE TRIGGER update_files_updated_at BEFORE UPDATE ON files
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
-- View for operational dashboard -- View for operational dashboard
@@ -122,7 +132,7 @@ SELECT
AVG(o.duration_ms) as avg_duration_ms, AVG(o.duration_ms) as avg_duration_ms,
MIN(o.started_at) as earliest_operation, MIN(o.started_at) as earliest_operation,
MAX(o.completed_at) as latest_operation MAX(o.completed_at) as latest_operation
FROM operations_bak o FROM operations o
WHERE o.started_at > CURRENT_TIMESTAMP - INTERVAL '24 hours' WHERE o.started_at > CURRENT_TIMESTAMP - INTERVAL '24 hours'
GROUP BY o.status; GROUP BY o.status;
@@ -135,7 +145,7 @@ SELECT
AVG(size) as avg_file_size, AVG(size) as avg_file_size,
MIN(created_time) as oldest_file, MIN(created_time) as oldest_file,
MAX(modified_time) as newest_file MAX(modified_time) as newest_file
FROM files_bak FROM files
GROUP BY disk_label; GROUP BY disk_label;
-- Insert default configuration -- Insert default configuration

View File

@@ -23,7 +23,7 @@ CREATE TABLE IF NOT EXISTS files (
path TEXT PRIMARY KEY, path TEXT PRIMARY KEY,
size BIGINT NOT NULL, size BIGINT NOT NULL,
modified_time DOUBLE PRECISION NOT NULL, modified_time DOUBLE PRECISION NOT NULL,
disk TEXT NOT NULL, disk_label TEXT NOT NULL,
checksum TEXT, checksum TEXT,
status TEXT DEFAULT 'indexed', status TEXT DEFAULT 'indexed',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
@@ -31,14 +31,14 @@ CREATE TABLE IF NOT EXISTS files (
); );
-- Create index on disk column for faster queries -- Create index on disk column for faster queries
CREATE INDEX IF NOT EXISTS idx_files_disk ON files(disk); CREATE INDEX IF NOT EXISTS idx_files_disk ON files(disk_label);
CREATE INDEX IF NOT EXISTS idx_files_status ON files(status); CREATE INDEX IF NOT EXISTS idx_files_status ON files(status);
-- Create operations table -- Create operations table
CREATE TABLE IF NOT EXISTS operations ( CREATE TABLE IF NOT EXISTS operations (
id SERIAL PRIMARY KEY, id SERIAL PRIMARY KEY,
source_path TEXT NOT NULL, source_path TEXT NOT NULL,
dest_path TEXT NOT NULL, target_path TEXT NOT NULL,
operation_type TEXT NOT NULL, operation_type TEXT NOT NULL,
executed INTEGER DEFAULT 0, executed INTEGER DEFAULT 0,
verified INTEGER DEFAULT 0, verified INTEGER DEFAULT 0,