first-working-version

2025-12-10 10:33:29 +01:00
parent 0ad3c0063a
commit d401462be8
6 changed files with 1324 additions and 73 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,68 @@
+### PythonVanilla template
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+
--- a/README.md
+++ b/README.md
--- a/defrag.iml
+++ b/defrag.iml
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager" inherit-compiler-output="true">
+    <exclude-output />
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+# PostgreSQL database adapter for Python
+psycopg2-binary>=2.9.9
+
+# Alternative: psycopg2>=2.9.9 (requires PostgreSQL development libraries)
+# Use psycopg2-binary for easier installation without compilation
--- a/setup_database.sql
+++ b/setup_database.sql
@@ -0,0 +1,79 @@
+-- PostgreSQL Database Setup Script for Disk Reorganizer
+-- Database: disk_reorganizer_db
+-- User: disk_reorg_user
+
+-- Create the database (run as superuser: auction)
+CREATE DATABASE disk_reorganizer_db
+    WITH
+    ENCODING = 'UTF8'
+    LC_COLLATE = 'en_US.UTF-8'
+    LC_CTYPE = 'en_US.UTF-8'
+    TEMPLATE = template0;
+
+-- Connect to the new database
+\c disk_reorganizer_db
+
+-- Create the user
+CREATE USER disk_reorg_user WITH PASSWORD 'heel-goed-wachtwoord';
+
+-- Create files table
+CREATE TABLE IF NOT EXISTS files (
+    path TEXT PRIMARY KEY,
+    size BIGINT NOT NULL,
+    modified_time DOUBLE PRECISION NOT NULL,
+    disk TEXT NOT NULL,
+    checksum TEXT,
+    status TEXT DEFAULT 'indexed',
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+
+-- Create index on disk column for faster queries
+CREATE INDEX IF NOT EXISTS idx_files_disk ON files(disk);
+CREATE INDEX IF NOT EXISTS idx_files_status ON files(status);
+
+-- Create operations table
+CREATE TABLE IF NOT EXISTS operations (
+    id SERIAL PRIMARY KEY,
+    source_path TEXT NOT NULL,
+    dest_path TEXT NOT NULL,
+    operation_type TEXT NOT NULL,
+    executed INTEGER DEFAULT 0,
+    verified INTEGER DEFAULT 0,
+    error TEXT,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    executed_at TIMESTAMP
+);
+
+-- Create index on operations for faster lookups
+CREATE INDEX IF NOT EXISTS idx_operations_executed ON operations(executed);
+CREATE INDEX IF NOT EXISTS idx_operations_source ON operations(source_path);
+
+-- Grant privileges to disk_reorg_user
+GRANT CONNECT ON DATABASE disk_reorganizer_db TO disk_reorg_user;
+GRANT USAGE ON SCHEMA public TO disk_reorg_user;
+GRANT SELECT, INSERT, UPDATE, DELETE ON TABLE files TO disk_reorg_user;
+GRANT SELECT, INSERT, UPDATE, DELETE ON TABLE operations TO disk_reorg_user;
+GRANT USAGE, SELECT ON SEQUENCE operations_id_seq TO disk_reorg_user;
+
+-- Create function to update updated_at timestamp
+CREATE OR REPLACE FUNCTION update_updated_at_column()
+RETURNS TRIGGER AS $$
+BEGIN
+    NEW.updated_at = CURRENT_TIMESTAMP;
+    RETURN NEW;
+END;
+$$ LANGUAGE plpgsql;
+
+-- Create trigger for files table
+CREATE TRIGGER update_files_updated_at
+    BEFORE UPDATE ON files
+    FOR EACH ROW
+    EXECUTE FUNCTION update_updated_at_column();
+
+-- Display success message
+\echo 'Database setup completed successfully!'
+\echo 'Database: disk_reorganizer_db'
+\echo 'User: disk_reorg_user'
+\echo 'Tables created: files, operations'
+\echo 'Indexes and triggers created'
--- a/src/main.py
+++ b/src/main.py
@@ -6,7 +6,9 @@ Three modes: index, plan, execute

 import os
 import sys
-import sqlite3
+import psycopg2
+from psycopg2 import sql
+from psycopg2.extras import RealDictCursor
 import shutil
 import hashlib
 import argparse
@@ -16,6 +18,7 @@ from dataclasses import dataclass, asdict
 from typing import List, Dict, Optional, Tuple
 from datetime import datetime
 import logging
+import time

 # Setup logging
 logging.basicConfig(
@@ -39,39 +42,53 @@ class FileRecord:
    status: str = 'indexed'  # indexed, planned, moved, verified

 class DiskReorganizer:
-    def __init__(self, db_path: str = "file_index.db"):
-        self.db_path = db_path
+    def __init__(self, db_config: Dict = None):
+        """
+        Initialize DiskReorganizer with PostgreSQL connection
+        :param db_config: Database configuration dict with host, port, database, user, password
+        """
+        if db_config is None:
+            db_config = {
+                'host': '192.168.1.159',
+                'port': 5432,
+                'database': 'disk_reorganizer_db',
+                'user': 'disk_reorg_user',
+                'password': 'heel-goed-wachtwoord'
+            }
+        self.db_config = db_config
        self.init_database()

+    def get_connection(self):
+        """Get PostgreSQL database connection"""
+        return psycopg2.connect(**self.db_config)
+
    def init_database(self):
-        """Initialize SQLite database"""
-        with sqlite3.connect(self.db_path) as conn:
-            conn.execute("""
-                         CREATE TABLE IF NOT EXISTS files (
-                                                              path TEXT PRIMARY KEY,
-                                                              size INTEGER,
-                                                              modified_time REAL,
-                                                              disk TEXT,
-                                                              checksum TEXT,
-                                                              status TEXT DEFAULT 'indexed'
-                         )
-                         """)
-            conn.execute("""
-                         CREATE TABLE IF NOT EXISTS operations (
-                                                                   id INTEGER PRIMARY KEY AUTOINCREMENT,
-                                                                   source_path TEXT,
-                                                                   dest_path TEXT,
-                                                                   operation_type TEXT,
-                                                                   executed INTEGER DEFAULT 0,
-                                                                   verified INTEGER DEFAULT 0,
-                                                                   error TEXT
-                         )
-                         """)
-            conn.commit()
+        """Verify PostgreSQL database connection and tables exist"""
+        try:
+            conn = self.get_connection()
+            cursor = conn.cursor()
+
+            # Test connection and verify tables exist
+            cursor.execute("""
+                SELECT table_name FROM information_schema.tables
+                WHERE table_schema = 'public' AND table_name IN ('files', 'operations')
+            """)
+            tables = cursor.fetchall()
+
+            if len(tables) < 2:
+                logger.error("Database tables not found! Please run setup_database.sh first.")
+                raise Exception("Database not properly initialized. Run setup_database.sh")
+
+            cursor.close()
+            conn.close()
+            logger.info("Database connection verified successfully")
+        except psycopg2.Error as e:
+            logger.error(f"Database connection failed: {e}")
+            raise

    def index_disk(self, disk_root: str, disk_name: str):
        """
-        Index all files on a disk/partition
+        Index all files on a disk/partition with dynamic progress display
        :param disk_root: Root path of disk (e.g., 'D:\\')
        :param disk_name: Logical name for the disk
        """
@@ -84,10 +101,12 @@ class DiskReorganizer:

        files_count = 0
        total_size = 0
+        start_time = time.time()

-        with sqlite3.connect(self.db_path) as conn:
-            cursor = conn.cursor()
+        conn = self.get_connection()
+        cursor = conn.cursor()

+        try:
            # Walk through all files
            for root, dirs, files in os.walk(disk_path):
                # Skip system directories
@@ -106,36 +125,62 @@ class DiskReorganizer:
                        # Calculate relative path for portability
                        rel_path = str(file_path.relative_to(disk_path))

-                        cursor.execute(
-                            "INSERT OR REPLACE INTO files VALUES (?, ?, ?, ?, ?, ?)",
-                            (rel_path, size, mtime, disk_name, None, 'indexed')
-                        )
+                        # PostgreSQL INSERT ... ON CONFLICT for upsert
+                        cursor.execute("""
+                            INSERT INTO files (path, size, modified_time, disk, checksum, status)
+                            VALUES (%s, %s, %s, %s, %s, %s)
+                            ON CONFLICT (path) DO UPDATE SET
+                                size = EXCLUDED.size,
+                                modified_time = EXCLUDED.modified_time,
+                                disk = EXCLUDED.disk,
+                                status = EXCLUDED.status
+                        """, (rel_path, size, mtime, disk_name, None, 'indexed'))

                        files_count += 1
                        total_size += size

+                        # Dynamic progress display - update every 100 files
+                        if files_count % 100 == 0:
+                            elapsed = time.time() - start_time
+                            rate = files_count / elapsed if elapsed > 0 else 0
+                            # Truncate path for display
+                            display_path = str(file_path)
+                            if len(display_path) > 60:
+                                display_path = '...' + display_path[-57:]
+
+                            # Use \r to overwrite the line
+                            print(f"\rIndexing: {files_count:,} files | {self.format_size(total_size)} | {rate:.0f} files/s | {display_path}", end='', flush=True)
+
+                        # Commit every 1000 files for performance
                        if files_count % 1000 == 0:
-                            logger.info(f"Indexed {files_count} files, {self.format_size(total_size)}...")
+                            conn.commit()

                    except Exception as e:
-                        logger.warning(f"Skipping {file_path}: {e}")
+                        logger.warning(f"\nSkipping {file_path}: {e}")
                        continue

            conn.commit()
+            print()  # New line after progress display
+            logger.info(f"Completed indexing {disk_name}: {files_count} files, {self.format_size(total_size)}")

-        logger.info(f"Completed indexing {disk_name}: {files_count} files, {self.format_size(total_size)}")
+        finally:
+            cursor.close()
+            conn.close()

    def calculate_disk_usage(self) -> Dict[str, Dict]:
        """Calculate current usage per disk"""
-        with sqlite3.connect(self.db_path) as conn:
-            cursor = conn.execute("""
-                                  SELECT disk, SUM(size) as total_size, COUNT(*) as file_count
-                                  FROM files
-                                  GROUP BY disk
-                                  """)
+        conn = self.get_connection()
+        cursor = conn.cursor()
+
+        try:
+            cursor.execute("""
+                SELECT disk, SUM(size) as total_size, COUNT(*) as file_count
+                FROM files
+                GROUP BY disk
+            """)

            usage = {}
-            for row in cursor:
+            for row in cursor.fetchall():
                disk = row[0]
                size = row[1] or 0
                count = row[2]
@@ -146,6 +191,9 @@ class DiskReorganizer:
                }

            return usage
+        finally:
+            cursor.close()
+            conn.close()

    def plan_migration(self, target_disk: str, destination_disks: List[str]) -> Dict:
        """
@@ -163,12 +211,16 @@ class DiskReorganizer:
            return {}

        # Get files on target disk
-        with sqlite3.connect(self.db_path) as conn:
-            cursor = conn.execute(
-                "SELECT path, size, modified_time FROM files WHERE disk = ? ORDER BY size DESC",
-                (target_disk,)
-            )
-            files_to_move = cursor.fetchall()
+        conn = self.get_connection()
+        cursor = conn.cursor()
+
+        cursor.execute(
+            "SELECT path, size, modified_time FROM files WHERE disk = %s ORDER BY size DESC",
+            (target_disk,)
+        )
+        files_to_move = cursor.fetchall()
+        cursor.close()
+        conn.close()

        target_disk_usage = usage[target_disk]['size']
        logger.info(f"Need to move {len(files_to_move)} files, {self.format_size(target_disk_usage)}")
@@ -198,9 +250,10 @@ class DiskReorganizer:
            'destination_disks': destination_disks
        }

-        with sqlite3.connect(self.db_path) as conn:
-            cursor = conn.cursor()
+        conn = self.get_connection()
+        cursor = conn.cursor()

+        try:
            for file_info in files_to_move:
                rel_path, size, mtime = file_info

@@ -219,11 +272,14 @@ class DiskReorganizer:

                # Store in database
                cursor.execute(
-                    "INSERT INTO operations (source_path, dest_path, operation_type) VALUES (?, ?, ?)",
+                    "INSERT INTO operations (source_path, dest_path, operation_type) VALUES (%s, %s, %s)",
                    (f"{target_disk}:{rel_path}", f"{dest_disk}:{rel_path}", 'move')
                )

            conn.commit()
+        finally:
+            cursor.close()
+            conn.close()

        # Save plan to JSON
        plan_file = f"migration_plan_{target_disk}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
@@ -281,10 +337,12 @@ class DiskReorganizer:

        success_count = 0
        error_count = 0
+        start_time = time.time()

-        with sqlite3.connect(self.db_path) as conn:
-            cursor = conn.cursor()
+        conn = self.get_connection()
+        cursor = conn.cursor()

+        try:
            for i, op in enumerate(operations, 1):
                source_disk = op['source_disk']
                source_path = op['source_path']
@@ -294,15 +352,22 @@ class DiskReorganizer:
                source_full = Path(source_disk) / source_path
                dest_full = Path(dest_disk) / dest_path

-                logger.info(f"[{i}/{len(operations)}] {source_full} -> {dest_full}")
+                # Dynamic progress display
+                elapsed = time.time() - start_time
+                rate = i / elapsed if elapsed > 0 else 0
+                eta = (len(operations) - i) / rate if rate > 0 else 0
+                display_path = str(source_path)
+                if len(display_path) > 50:
+                    display_path = '...' + display_path[-47:]
+
+                print(f"\r[{i}/{len(operations)}] {success_count} OK, {error_count} ERR | {rate:.1f} files/s | ETA: {int(eta)}s | {display_path}", end='', flush=True)

                if dry_run:
                    # Simulate
                    if source_full.exists():
-                        logger.info(f"  Would move {self.format_size(op['size'])}")
                        success_count += 1
                    else:
-                        logger.warning(f"  Source does not exist!")
+                        logger.warning(f"\n  Source does not exist: {source_full}")
                        error_count += 1
                    continue

@@ -319,7 +384,7 @@ class DiskReorganizer:
                        if self.verify_operation(source_full, dest_full):
                            # Update database
                            cursor.execute(
-                                "UPDATE files SET disk = ?, status = 'moved' WHERE path = ? AND disk = ?",
+                                "UPDATE files SET disk = %s, status = 'moved' WHERE path = %s AND disk = %s",
                                (dest_disk, source_path, source_disk)
                            )

@@ -328,22 +393,21 @@ class DiskReorganizer:

                            # Log operation as executed
                            cursor.execute(
-                                "UPDATE operations SET executed = 1 WHERE source_path = ?",
+                                "UPDATE operations SET executed = 1, executed_at = CURRENT_TIMESTAMP WHERE source_path = %s",
                                (f"{source_disk}:{source_path}",)
                            )

-                            logger.info(f"  ✓ Moved and verified")
                            success_count += 1
                        else:
                            raise Exception("Verification failed")
                    else:
-                        logger.warning(f"  Source missing, skipping")
+                        logger.warning(f"\n  Source missing: {source_full}")
                        error_count += 1

                except Exception as e:
-                    logger.error(f"  ✗ Error: {e}")
+                    logger.error(f"\n  Error processing {source_path}: {e}")
                    cursor.execute(
-                        "UPDATE operations SET error = ? WHERE source_path = ?",
+                        "UPDATE operations SET error = %s WHERE source_path = %s",
                        (str(e), f"{source_disk}:{source_path}")
                    )
                    error_count += 1
@@ -353,6 +417,11 @@ class DiskReorganizer:
                    conn.commit()

            conn.commit()
+            print()  # New line after progress display
+
+        finally:
+            cursor.close()
+            conn.close()

        logger.info(f"Migration complete: {success_count} success, {error_count} errors")

@@ -362,28 +431,35 @@ class DiskReorganizer:

    def generate_report(self):
        """Generate status report"""
-        with sqlite3.connect(self.db_path) as conn:
-            cursor = conn.execute("""
-                                  SELECT status, COUNT(*), SUM(size) FROM files GROUP BY status
-                                  """)
+        conn = self.get_connection()
+        cursor = conn.cursor()
+
+        try:
+            cursor.execute("""
+                SELECT status, COUNT(*), SUM(size) FROM files GROUP BY status
+            """)

            print("\n=== FILE MIGRATION REPORT ===")
-            for row in cursor:
+            for row in cursor.fetchall():
                status, count, size = row
                print(f"{status:15}: {count:6} files, {self.format_size(size or 0)}")

-            cursor = conn.execute("""
-                                  SELECT operation_type, executed, verified, COUNT(*) FROM operations GROUP BY operation_type, executed, verified
-                                  """)
+            cursor.execute("""
+                SELECT operation_type, executed, verified, COUNT(*) FROM operations GROUP BY operation_type, executed, verified
+            """)

            print("\n=== OPERATIONS REPORT ===")
-            for row in cursor:
+            for row in cursor.fetchall():
                op_type, executed, verified, count = row
                status = "EXECUTED" if executed else "PENDING"
                if verified:
                    status += "+VERIFIED"
                print(f"{op_type:10} {status:15}: {count} operations")

+        finally:
+            cursor.close()
+            conn.close()
+
    @staticmethod
    def format_size(size: int) -> str:
        """Format bytes to human readable string"""