From 87550e426a9ffdbe6abcac227371ca5fdf5e8f65 Mon Sep 17 00:00:00 2001 From: mike Date: Fri, 12 Dec 2025 23:04:51 +0100 Subject: [PATCH] base --- README.md | 8 +++---- app/classification/engine.py | 18 +++++++------- app/classification/ml.py | 2 +- app/deduplication/engine.py | 40 +++++++++++++++---------------- app/deduplication/store.py | 2 +- app/discovery/engine.py | 28 +++++++++++----------- app/main.py | 44 +++++++++++++++++----------------- app/migration/engine.py | 46 ++++++++++++++++++------------------ app/shared/_protocols.py | 4 ++-- app/shared/models.py | 8 +++---- docker-compose.yml | 6 ++--- setup.sh | 10 ++++---- sql/init.sql | 32 ++++++++++++++++--------- sql/setup_database.sql | 6 ++--- 14 files changed, 132 insertions(+), 122 deletions(-) diff --git a/README.md b/README.md index aa90f9d..7fabdae 100644 --- a/README.md +++ b/README.md @@ -59,25 +59,25 @@ System is now **network-capable**, **auditable**, **scalable**, and offers **rea 2. **Index** ```bash - python src/main.py index "D:\\" disk_d + python app/main.py index "D:\\" disk_d ``` 3. **Plan** ```bash - python src/main.py plan disk_d disk_e + python app/main.py plan disk_d disk_e ``` 4. **Dry-Run** ```bash - python src/main.py execute plan.json --dry-run + python app/main.py execute plan.json --dry-run ``` 5. **Execute** ```bash - python src/main.py execute plan.json + python app/main.py execute plan.json ``` 6. **Report** diff --git a/app/classification/engine.py b/app/classification/engine.py index 1dd9a69..fbf105d 100644 --- a/app/classification/engine.py +++ b/app/classification/engine.py @@ -70,13 +70,13 @@ class ClassificationEngine: if disk: cursor.execute(""" SELECT path, checksum - FROM files_bak - WHERE disk = %s AND category IS NULL + FROM files + WHERE disk_label = %s AND category IS NULL """, (disk,)) else: cursor.execute(""" SELECT path, checksum - FROM files_bak + FROM files WHERE category IS NULL """) @@ -149,7 +149,7 @@ class ClassificationEngine: from psycopg2.extras import execute_batch query = """ - UPDATE files_bak + UPDATE files SET category = %s WHERE path = %s """ @@ -188,7 +188,7 @@ class ClassificationEngine: category, COUNT(*) as file_count, SUM(size) as total_size - FROM files_bak + FROM files WHERE category IS NOT NULL GROUP BY category ORDER BY total_size DESC @@ -214,7 +214,7 @@ class ClassificationEngine: conn = self._get_connection() cursor = conn.cursor() - cursor.execute("SELECT COUNT(*) FROM files_bak WHERE category IS NULL") + cursor.execute("SELECT COUNT(*) FROM files WHERE category IS NULL") count = cursor.fetchone()[0] cursor.close() @@ -241,7 +241,7 @@ class ClassificationEngine: cursor = conn.cursor() cursor.execute(""" - UPDATE files_bak + UPDATE files SET category = %s WHERE category = %s """, (new_category, old_category)) @@ -278,7 +278,7 @@ class ClassificationEngine: # Get categorized files cursor.execute(""" SELECT path, category - FROM files_bak + FROM files WHERE category IS NOT NULL """) @@ -326,7 +326,7 @@ class ClassificationEngine: cursor.execute(""" SELECT DISTINCT category - FROM files_bak + FROM files WHERE category IS NOT NULL ORDER BY category """) diff --git a/app/classification/ml.py b/app/classification/ml.py index 6084de2..d334d90 100644 --- a/app/classification/ml.py +++ b/app/classification/ml.py @@ -241,7 +241,7 @@ def train_from_database( cursor = db_connection.cursor() cursor.execute(""" SELECT path, category - FROM files_bak + FROM files WHERE category IS NOT NULL """) diff --git a/app/deduplication/engine.py b/app/deduplication/engine.py index 8d59a5d..c80d51c 100644 --- a/app/deduplication/engine.py +++ b/app/deduplication/engine.py @@ -70,17 +70,17 @@ class DeduplicationEngine: if disk: cursor.execute(""" SELECT path, size - FROM files_bak - WHERE disk = %s AND checksum IS NULL + FROM files + WHERE disk_label = %s AND checksum IS NULL ORDER BY size DESC - """, (disk,)) + """, (disk,)) else: cursor.execute(""" SELECT path, size - FROM files_bak + FROM files WHERE checksum IS NULL ORDER BY size DESC - """) + """) files_to_process = cursor.fetchall() total_files = len(files_to_process) @@ -108,10 +108,10 @@ class DeduplicationEngine: if checksum: # Update database cursor.execute(""" - UPDATE files_bak + UPDATE files SET checksum = %s, duplicate_of = %s WHERE path = %s - """, (checksum, duplicate_of, str(path))) + """, (checksum, duplicate_of, str(path))) stats.files_succeeded += 1 stats.bytes_processed += size @@ -225,19 +225,19 @@ class DeduplicationEngine: if disk: cursor.execute(""" SELECT checksum, array_agg(path ORDER BY path) as paths - FROM files_bak - WHERE disk = %s AND checksum IS NOT NULL + FROM files + WHERE disk_label = %s AND checksum IS NOT NULL GROUP BY checksum HAVING COUNT(*) > 1 - """, (disk,)) + """, (disk,)) else: cursor.execute(""" SELECT checksum, array_agg(path ORDER BY path) as paths - FROM files_bak + FROM files WHERE checksum IS NOT NULL GROUP BY checksum HAVING COUNT(*) > 1 - """) + """) duplicates = {} for checksum, paths in cursor.fetchall(): @@ -262,18 +262,18 @@ class DeduplicationEngine: stats = {} # Total files - cursor.execute("SELECT COUNT(*) FROM files_bak WHERE checksum IS NOT NULL") + cursor.execute("SELECT COUNT(*) FROM files WHERE checksum IS NOT NULL") stats['total_files'] = cursor.fetchone()[0] # Unique files - cursor.execute("SELECT COUNT(DISTINCT checksum) FROM files_bak WHERE checksum IS NOT NULL") + cursor.execute("SELECT COUNT(DISTINCT checksum) FROM files WHERE checksum IS NOT NULL") stats['unique_files'] = cursor.fetchone()[0] # Duplicate files stats['duplicate_files'] = stats['total_files'] - stats['unique_files'] # Total size - cursor.execute("SELECT COALESCE(SUM(size), 0) FROM files_bak WHERE checksum IS NOT NULL") + cursor.execute("SELECT COALESCE(SUM(size), 0) FROM files WHERE checksum IS NOT NULL") stats['total_size'] = cursor.fetchone()[0] # Unique size @@ -281,10 +281,10 @@ class DeduplicationEngine: SELECT COALESCE(SUM(size), 0) FROM ( SELECT DISTINCT ON (checksum) size - FROM files_bak + FROM files WHERE checksum IS NOT NULL ) AS unique_files - """) + """) stats['unique_size'] = cursor.fetchone()[0] # Wasted space @@ -321,14 +321,14 @@ class DeduplicationEngine: cursor.execute(""" WITH canonical AS ( SELECT DISTINCT ON (checksum) path, checksum - FROM files_bak + FROM files WHERE checksum IS NOT NULL ORDER BY checksum, path ) - UPDATE files_bak + UPDATE files SET duplicate_of = NULL WHERE path IN (SELECT path FROM canonical) - """) + """) count = cursor.rowcount conn.commit() diff --git a/app/deduplication/store.py b/app/deduplication/store.py index 2d99d00..117a580 100644 --- a/app/deduplication/store.py +++ b/app/deduplication/store.py @@ -227,7 +227,7 @@ class HashStore: # Get all files with their hashes cursor.execute(""" SELECT f.path, f.checksum - FROM files_bak f + FROM files f WHERE f.checksum IS NOT NULL """) diff --git a/app/discovery/engine.py b/app/discovery/engine.py index 4e677f6..cd2891a 100644 --- a/app/discovery/engine.py +++ b/app/discovery/engine.py @@ -60,7 +60,7 @@ class DiscoveryEngine: size BIGINT NOT NULL, modified_time DOUBLE PRECISION NOT NULL, created_time DOUBLE PRECISION NOT NULL, - disk TEXT NOT NULL, + disk_label TEXT NOT NULL, checksum TEXT, status TEXT DEFAULT 'indexed', category TEXT, @@ -72,18 +72,18 @@ class DiscoveryEngine: # Create index on path cursor.execute(""" - CREATE INDEX IF NOT EXISTS idx_files_path ON files_bak(path) - """) + CREATE INDEX IF NOT EXISTS idx_files_path ON files(path) + """) # Create index on disk cursor.execute(""" - CREATE INDEX IF NOT EXISTS idx_files_disk ON files_bak(disk) - """) + CREATE INDEX IF NOT EXISTS idx_files_disk ON files(disk_label) + """) # Create index on checksum cursor.execute(""" - CREATE INDEX IF NOT EXISTS idx_files_checksum ON files_bak(checksum) - """) + CREATE INDEX IF NOT EXISTS idx_files_checksum ON files(checksum) + """) conn.commit() cursor.close() @@ -136,7 +136,7 @@ class DiscoveryEngine: size=file_meta.size, modified_time=file_meta.modified_time, created_time=file_meta.created_time, - disk=disk + disk_label=disk ) batch.append(record) @@ -193,7 +193,7 @@ class DiscoveryEngine: batch: List of FileRecord objects """ query = """ - INSERT INTO files_bak (path, size, modified_time, created_time, disk, checksum, status, category, duplicate_of) + INSERT INTO files (path, size, modified_time, created_time, disk_label, checksum, status, category, duplicate_of) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s) ON CONFLICT (path) DO UPDATE SET size = EXCLUDED.size, @@ -207,7 +207,7 @@ class DiscoveryEngine: record.size, record.modified_time, record.created_time, - record.disk, + record.disk_label, record.checksum, record.status, record.category, @@ -276,9 +276,9 @@ class DiscoveryEngine: cursor = conn.cursor() if disk: - cursor.execute("SELECT COUNT(*) FROM files_bak WHERE disk = %s", (disk,)) + cursor.execute("SELECT COUNT(*) FROM files WHERE disk_label = %s", (disk,)) else: - cursor.execute("SELECT COUNT(*) FROM files_bak") + cursor.execute("SELECT COUNT(*) FROM files") count = cursor.fetchone()[0] cursor.close() @@ -298,9 +298,9 @@ class DiscoveryEngine: cursor = conn.cursor() if disk: - cursor.execute("SELECT COALESCE(SUM(size), 0) FROM files_bak WHERE disk = %s", (disk,)) + cursor.execute("SELECT COALESCE(SUM(size), 0) FROM files WHERE disk_label = %s", (disk,)) else: - cursor.execute("SELECT COALESCE(SUM(size), 0) FROM files_bak") + cursor.execute("SELECT COALESCE(SUM(size), 0) FROM files") total = cursor.fetchone()[0] cursor.close() diff --git a/app/main.py b/app/main.py index ae51554..4096926 100644 --- a/app/main.py +++ b/app/main.py @@ -37,7 +37,7 @@ class FileRecord: path: str size: int modified_time: float - disk: str + disk_label: str checksum: Optional[str] = None status: str = 'indexed' # indexed, planned, moved, verified @@ -49,11 +49,11 @@ class DiskReorganizer: """ if db_config is None: db_config = { - 'host': '192.168.1.159', - 'port': 5432, - 'database': 'disk_reorganizer_db', - 'user': 'disk_reorg_user', - 'password': 'heel-goed-wachtwoord' + 'host': os.getenv('DB_HOST', 'localhost'), + 'port': int(os.getenv('DB_PORT', 5432)), + 'database': os.getenv('DB_NAME', 'disk_reorganizer_db'), + 'user': os.getenv('DB_USER', 'disk_reorg_user'), + 'password': os.getenv('DB_PASSWORD', 'heel-goed-wachtwoord') } self.db_config = db_config self.init_database() @@ -127,12 +127,12 @@ class DiskReorganizer: # PostgreSQL INSERT ... ON CONFLICT for upsert cursor.execute(""" - INSERT INTO files_bak (path, size, modified_time, disk, checksum, status) + INSERT INTO files (path, size, modified_time, disk_label, checksum, status) VALUES (%s, %s, %s, %s, %s, %s) ON CONFLICT (path) DO UPDATE SET size = EXCLUDED.size, modified_time = EXCLUDED.modified_time, - disk = EXCLUDED.disk, + disk_label = EXCLUDED.disk_label, status = EXCLUDED.status """, (rel_path, size, mtime, disk_name, None, 'indexed')) @@ -174,9 +174,9 @@ class DiskReorganizer: try: cursor.execute(""" - SELECT disk, SUM(size) as total_size, COUNT(*) as file_count - FROM files_bak - GROUP BY disk + SELECT disk_label, SUM(size) as total_size, COUNT(*) as file_count + FROM files + GROUP BY disk_label """) usage = {} @@ -215,7 +215,7 @@ class DiskReorganizer: cursor = conn.cursor() cursor.execute( - "SELECT path, size, modified_time FROM files_bak WHERE disk = %s ORDER BY size DESC", + "SELECT path, size, modified_time FROM files WHERE disk_label = %s ORDER BY size DESC", (target_disk,) ) files_to_move = cursor.fetchall() @@ -265,15 +265,15 @@ class DiskReorganizer: 'source_disk': target_disk, 'source_path': rel_path, 'dest_disk': dest_disk, - 'dest_path': rel_path, # Keep same relative path + 'target_path': rel_path, # Keep same relative path 'size': size } plan['operations'].append(op) # Store in database cursor.execute( - "INSERT INTO operations_bak (source_path, dest_path, operation_type) VALUES (%s, %s, %s)", - (f"{target_disk}:{rel_path}", f"{dest_disk}:{rel_path}", 'move') + "INSERT INTO operations (source_path, target_path, operation_type, status) VALUES (%s, %s, %s, %s)", + (f"{target_disk}:{rel_path}", f"{dest_disk}:{rel_path}", 'move', 'pending') ) conn.commit() @@ -347,10 +347,10 @@ class DiskReorganizer: source_disk = op['source_disk'] source_path = op['source_path'] dest_disk = op['dest_disk'] - dest_path = op['dest_path'] + target_path = op['target_path'] source_full = Path(source_disk) / source_path - dest_full = Path(dest_disk) / dest_path + dest_full = Path(dest_disk) / target_path # Dynamic progress display elapsed = time.time() - start_time @@ -384,7 +384,7 @@ class DiskReorganizer: if self.verify_operation(source_full, dest_full): # Update database cursor.execute( - "UPDATE files_bak SET disk = %s, status = 'moved' WHERE path = %s AND disk = %s", + "UPDATE files SET disk_label = %s, status = 'moved' WHERE path = %s AND disk_label = %s", (dest_disk, source_path, source_disk) ) @@ -393,7 +393,7 @@ class DiskReorganizer: # Log operation as executed cursor.execute( - "UPDATE operations_bak SET executed = 1, executed_at = CURRENT_TIMESTAMP WHERE source_path = %s", + "UPDATE operations SET executed = 1, executed_at = CURRENT_TIMESTAMP WHERE source_path = %s", (f"{source_disk}:{source_path}",) ) @@ -407,7 +407,7 @@ class DiskReorganizer: except Exception as e: logger.error(f"\n Error processing {source_path}: {e}") cursor.execute( - "UPDATE operations_bak SET error = %s WHERE source_path = %s", + "UPDATE operations SET error = %s WHERE source_path = %s", (str(e), f"{source_disk}:{source_path}") ) error_count += 1 @@ -436,7 +436,7 @@ class DiskReorganizer: try: cursor.execute(""" - SELECT status, COUNT(*), SUM(size) FROM files_bak GROUP BY status + SELECT status, COUNT(*), SUM(size) FROM files GROUP BY status """) print("\n=== FILE MIGRATION REPORT ===") @@ -445,7 +445,7 @@ class DiskReorganizer: print(f"{status:15}: {count:6} files, {self.format_size(size or 0)}") cursor.execute(""" - SELECT operation_type, executed, verified, COUNT(*) FROM operations_bak GROUP BY operation_type, executed, verified + SELECT operation_type, executed, verified, COUNT(*) FROM operations GROUP BY operation_type, executed, verified """) print("\n=== OPERATIONS REPORT ===") diff --git a/app/migration/engine.py b/app/migration/engine.py index ce97247..bf742a5 100644 --- a/app/migration/engine.py +++ b/app/migration/engine.py @@ -63,7 +63,7 @@ class MigrationEngine: CREATE TABLE IF NOT EXISTS operations ( id SERIAL PRIMARY KEY, source_path TEXT NOT NULL, - dest_path TEXT NOT NULL, + target_path TEXT NOT NULL, operation_type TEXT NOT NULL, size BIGINT DEFAULT 0, status TEXT DEFAULT 'pending', @@ -77,7 +77,7 @@ class MigrationEngine: # Create index on status cursor.execute(""" CREATE INDEX IF NOT EXISTS idx_operations_status - ON operations_bak(status) + ON operations(status) """) conn.commit() @@ -107,7 +107,7 @@ class MigrationEngine: params = [] if disk: - conditions.append("disk = %s") + conditions.append("disk_label = %s") params.append(disk) if category: @@ -116,7 +116,7 @@ class MigrationEngine: query = f""" SELECT path, size, category, duplicate_of - FROM files_bak + FROM files WHERE {' AND '.join(conditions)} ORDER BY category, path """ @@ -133,7 +133,7 @@ class MigrationEngine: source = Path(path_str) # Determine destination - dest_path = self.target_base / file_category / source.name + target_path = self.target_base / file_category / source.name # Determine operation type if duplicate_of: @@ -145,7 +145,7 @@ class MigrationEngine: operation = OperationRecord( source_path=source, - dest_path=dest_path, + target_path=target_path, operation_type=operation_type, size=size ) @@ -200,7 +200,7 @@ class MigrationEngine: # In dry run, just log what would happen self.logger.debug( f"[DRY RUN] Would {operation.operation_type}: " - f"{operation.source_path} -> {operation.dest_path}" + f"{operation.source_path} -> {operation.target_path}" ) stats.files_succeeded += 1 else: @@ -261,7 +261,7 @@ class MigrationEngine: # Execute migration success = strategy.migrate( operation.source_path, - operation.dest_path, + operation.target_path, verify=self.processing_config.verify_operations ) @@ -293,14 +293,14 @@ class MigrationEngine: cursor = conn.cursor() cursor.execute(""" - INSERT INTO operations_bak ( - source_path, dest_path, operation_type, size, + INSERT INTO operations ( + source_path, target_path, operation_type, bytes_processed, status, error, executed_at, verified ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s) """, ( str(operation.source_path), - str(operation.dest_path), + str(operation.target_path), operation.operation_type, operation.size, operation.status, @@ -321,22 +321,22 @@ class MigrationEngine: Returns: True if rollback successful """ - self.logger.warning(f"Rolling back: {operation.dest_path}") + self.logger.warning(f"Rolling back: {operation.target_path}") try: # Remove destination - if operation.dest_path.exists(): - operation.dest_path.unlink() + if operation.target_path.exists(): + operation.target_path.unlink() # Update database conn = self._get_connection() cursor = conn.cursor() cursor.execute(""" - UPDATE operations_bak + UPDATE operations SET status = 'rolled_back' - WHERE source_path = %s AND dest_path = %s - """, (str(operation.source_path), str(operation.dest_path))) + WHERE source_path = %s AND target_path = %s + """, (str(operation.source_path), str(operation.target_path))) conn.commit() cursor.close() @@ -344,7 +344,7 @@ class MigrationEngine: return True except Exception as e: - self.logger.error(f"Rollback failed: {operation.dest_path}: {e}") + self.logger.error(f"Rollback failed: {operation.target_path}: {e}") return False def get_migration_stats(self) -> dict: @@ -359,13 +359,13 @@ class MigrationEngine: stats = {} # Total operations - cursor.execute("SELECT COUNT(*) FROM operations_bak") + cursor.execute("SELECT COUNT(*) FROM operations") stats['total_operations'] = cursor.fetchone()[0] # Operations by status cursor.execute(""" SELECT status, COUNT(*) - FROM operations_bak + FROM operations GROUP BY status """) @@ -375,7 +375,7 @@ class MigrationEngine: # Total size migrated cursor.execute(""" SELECT COALESCE(SUM(size), 0) - FROM operations_bak + FROM operations WHERE status = 'completed' """) stats['total_size_migrated'] = cursor.fetchone()[0] @@ -396,8 +396,8 @@ class MigrationEngine: cursor = conn.cursor() cursor.execute(""" - SELECT source_path, dest_path, operation_type - FROM operations_bak + SELECT source_path, target_path, operation_type + FROM operations WHERE status = 'completed' AND verified = FALSE """) diff --git a/app/shared/_protocols.py b/app/shared/_protocols.py index 3120edc..93d7a5c 100644 --- a/app/shared/_protocols.py +++ b/app/shared/_protocols.py @@ -12,7 +12,7 @@ class FileRecord: size: int modified_time: float created_time: float - disk: str + disk_label: str checksum: str | None = None status: str = 'indexed' # indexed, planned, moved, verified category: str | None = None @@ -23,7 +23,7 @@ class FileRecord: class OperationRecord: """Record of a migration operation""" source_path: Path - dest_path: Path + target_path: Path operation_type: str # move, copy, hardlink, symlink status: str = 'pending' # pending, in_progress, completed, failed error: str | None = None diff --git a/app/shared/models.py b/app/shared/models.py index e864b08..49ad778 100644 --- a/app/shared/models.py +++ b/app/shared/models.py @@ -12,7 +12,7 @@ class FileRecord: size: int modified_time: float created_time: float - disk: str + disk_label: str checksum: Optional[str] = None status: str = 'indexed' # indexed, planned, moved, verified category: Optional[str] = None @@ -25,7 +25,7 @@ class FileRecord: 'size': self.size, 'modified_time': self.modified_time, 'created_time': self.created_time, - 'disk': self.disk, + 'disk_label': self.disk_label, 'checksum': self.checksum, 'status': self.status, 'category': self.category, @@ -37,7 +37,7 @@ class FileRecord: class OperationRecord: """Record of a migration operation""" source_path: Path - dest_path: Path + target_path: Path operation_type: str # move, copy, hardlink, symlink size: int = 0 status: str = 'pending' # pending, in_progress, completed, failed @@ -49,7 +49,7 @@ class OperationRecord: """Convert to dictionary for serialization""" return { 'source_path': str(self.source_path), - 'dest_path': str(self.dest_path), + 'target_path': str(self.target_path), 'operation_type': self.operation_type, 'size': self.size, 'status': self.status, diff --git a/docker-compose.yml b/docker-compose.yml index 326f09e..87b8696 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -105,7 +105,7 @@ services: - ${HOST_SOURCE_PATH:-/mnt/source}:/mnt/source:ro - ./config:/app/config - ./logs:/app/logs - command: ["python", "app/main.py", "index", "/mnt/source", "disk_d"] + command: ["python", "app/main.py", "index", "/media/mike/SMT", "SMT"] profiles: - index-only networks: @@ -127,7 +127,7 @@ services: - ./config:/app/config - ./plans:/app/plans - ./logs:/app/logs - command: ["python", "app/main.py", "plan", "disk_d", "disk_e"] + command: ["python", "app/main.py", "plan", "/media/mike/SMT", "SMT"] profiles: - plan-only networks: @@ -194,7 +194,7 @@ services: volumes: - ./reports:/app/reports - ./logs:/app/logs - command: ["python", "app/main.py", "report", "--format", "html"] + command: ["python", "app/main.py", "report"] profiles: - report-only networks: diff --git a/setup.sh b/setup.sh index aee06ad..62961b4 100644 --- a/setup.sh +++ b/setup.sh @@ -40,11 +40,11 @@ echo "✅ Setup complete!" echo "" echo "📋 Available commands:" echo " docker compose up -d # Start all services" -echo " docker compose up --profile index-only index # Run index only" -echo " docker compose up --profile plan-only plan # Generate plan" -echo " docker compose up --profile dry-run-only dry-run # Dry run" -echo " docker compose up --profile execute-only execute # Execute migration" -echo " docker compose up --profile report-only report # Generate report" +echo " docker compose --profile index-only up index # Run index only" +echo " docker compose --profile plan-only up plan # Generate plan" +echo " docker compose --profile dry-run-only up dry-run # Dry run" +echo " docker compose --profile execute-only up execute # Execute migration" +echo " docker compose --profile report-only up report # Generate report" echo "" echo "🌐 Access monitoring:" echo " - PostgreSQL Admin: http://localhost:5050" diff --git a/sql/init.sql b/sql/init.sql index 3b367c4..956fbc0 100644 --- a/sql/init.sql +++ b/sql/init.sql @@ -13,9 +13,12 @@ CREATE TABLE IF NOT EXISTS files ( modified_time TIMESTAMP WITH TIME ZONE, created_time TIMESTAMP WITH TIME ZONE, file_hash VARCHAR(64), -- SHA-256 hash + checksum VARCHAR(64), -- Alias for file_hash (legacy compatibility) category VARCHAR(50), disk_label VARCHAR(50), last_verified TIMESTAMP WITH TIME ZONE, + status VARCHAR(20) DEFAULT 'indexed', + duplicate_of TEXT, -- Path to canonical file if this is a duplicate -- Metadata metadata JSONB DEFAULT '{}', @@ -36,8 +39,13 @@ CREATE TABLE IF NOT EXISTS operations ( target_path TEXT, status VARCHAR(20) NOT NULL, + -- Legacy compatibility fields + executed INTEGER DEFAULT 0, + verified INTEGER DEFAULT 0, + error TEXT, + -- File reference - file_id UUID REFERENCES files_bak(id) ON DELETE SET NULL, + file_id UUID REFERENCES files(id) ON DELETE SET NULL, -- Performance metrics duration_ms INTEGER, @@ -54,6 +62,7 @@ CREATE TABLE IF NOT EXISTS operations ( -- Audit fields started_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, completed_at TIMESTAMP WITH TIME ZONE, + executed_at TIMESTAMP WITH TIME ZONE, created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP ); @@ -89,14 +98,15 @@ CREATE TABLE IF NOT EXISTS migration_plans ( ); -- Indexes for performance -CREATE INDEX IF NOT EXISTS idx_files_path ON files_bak(path); -CREATE INDEX IF NOT EXISTS idx_files_hash ON files_bak(file_hash); -CREATE INDEX IF NOT EXISTS idx_files_disk ON files_bak(disk_label); -CREATE INDEX IF NOT EXISTS idx_files_category ON files_bak(category); +CREATE INDEX IF NOT EXISTS idx_files_path ON files(path); +CREATE INDEX IF NOT EXISTS idx_files_hash ON files(file_hash); +CREATE INDEX IF NOT EXISTS idx_files_disk ON files(disk_label); +CREATE INDEX IF NOT EXISTS idx_files_category ON files(category); +CREATE INDEX IF NOT EXISTS idx_files_status ON files(status); -CREATE INDEX IF NOT EXISTS idx_operations_status ON operations_bak(status); -CREATE INDEX IF NOT EXISTS idx_operations_created ON operations_bak(created_at); -CREATE INDEX IF NOT EXISTS idx_operations_file_id ON operations_bak(file_id); +CREATE INDEX IF NOT EXISTS idx_operations_status ON operations(status); +CREATE INDEX IF NOT EXISTS idx_operations_created ON operations(created_at); +CREATE INDEX IF NOT EXISTS idx_operations_file_id ON operations(file_id); CREATE INDEX IF NOT EXISTS idx_dedup_canonical ON deduplication_store(canonical_path); @@ -110,7 +120,7 @@ END; $$ language 'plpgsql'; -- Triggers for automatic updated_at -CREATE TRIGGER update_files_updated_at BEFORE UPDATE ON files_bak +CREATE TRIGGER update_files_updated_at BEFORE UPDATE ON files FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); -- View for operational dashboard @@ -122,7 +132,7 @@ SELECT AVG(o.duration_ms) as avg_duration_ms, MIN(o.started_at) as earliest_operation, MAX(o.completed_at) as latest_operation -FROM operations_bak o +FROM operations o WHERE o.started_at > CURRENT_TIMESTAMP - INTERVAL '24 hours' GROUP BY o.status; @@ -135,7 +145,7 @@ SELECT AVG(size) as avg_file_size, MIN(created_time) as oldest_file, MAX(modified_time) as newest_file -FROM files_bak +FROM files GROUP BY disk_label; -- Insert default configuration diff --git a/sql/setup_database.sql b/sql/setup_database.sql index 4f0818e..8eee7b5 100644 --- a/sql/setup_database.sql +++ b/sql/setup_database.sql @@ -23,7 +23,7 @@ CREATE TABLE IF NOT EXISTS files ( path TEXT PRIMARY KEY, size BIGINT NOT NULL, modified_time DOUBLE PRECISION NOT NULL, - disk TEXT NOT NULL, + disk_label TEXT NOT NULL, checksum TEXT, status TEXT DEFAULT 'indexed', created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, @@ -31,14 +31,14 @@ CREATE TABLE IF NOT EXISTS files ( ); -- Create index on disk column for faster queries -CREATE INDEX IF NOT EXISTS idx_files_disk ON files(disk); +CREATE INDEX IF NOT EXISTS idx_files_disk ON files(disk_label); CREATE INDEX IF NOT EXISTS idx_files_status ON files(status); -- Create operations table CREATE TABLE IF NOT EXISTS operations ( id SERIAL PRIMARY KEY, source_path TEXT NOT NULL, - dest_path TEXT NOT NULL, + target_path TEXT NOT NULL, operation_type TEXT NOT NULL, executed INTEGER DEFAULT 0, verified INTEGER DEFAULT 0,