first-working-version

This commit is contained in:
Tour
2025-12-10 10:33:29 +01:00
parent 0ad3c0063a
commit d401462be8
6 changed files with 1324 additions and 73 deletions

68
.gitignore vendored Normal file
View File

@@ -0,0 +1,68 @@
### PythonVanilla template
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

1014
README.md Normal file

File diff suppressed because it is too large Load Diff

9
defrag.iml Normal file
View File

@@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

5
requirements.txt Normal file
View File

@@ -0,0 +1,5 @@
# PostgreSQL database adapter for Python
psycopg2-binary>=2.9.9
# Alternative: psycopg2>=2.9.9 (requires PostgreSQL development libraries)
# Use psycopg2-binary for easier installation without compilation

79
setup_database.sql Normal file
View File

@@ -0,0 +1,79 @@
-- PostgreSQL Database Setup Script for Disk Reorganizer
-- Database: disk_reorganizer_db
-- User: disk_reorg_user
-- Create the database (run as superuser: auction)
CREATE DATABASE disk_reorganizer_db
WITH
ENCODING = 'UTF8'
LC_COLLATE = 'en_US.UTF-8'
LC_CTYPE = 'en_US.UTF-8'
TEMPLATE = template0;
-- Connect to the new database
\c disk_reorganizer_db
-- Create the user
CREATE USER disk_reorg_user WITH PASSWORD 'heel-goed-wachtwoord';
-- Create files table
CREATE TABLE IF NOT EXISTS files (
path TEXT PRIMARY KEY,
size BIGINT NOT NULL,
modified_time DOUBLE PRECISION NOT NULL,
disk TEXT NOT NULL,
checksum TEXT,
status TEXT DEFAULT 'indexed',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Create index on disk column for faster queries
CREATE INDEX IF NOT EXISTS idx_files_disk ON files(disk);
CREATE INDEX IF NOT EXISTS idx_files_status ON files(status);
-- Create operations table
CREATE TABLE IF NOT EXISTS operations (
id SERIAL PRIMARY KEY,
source_path TEXT NOT NULL,
dest_path TEXT NOT NULL,
operation_type TEXT NOT NULL,
executed INTEGER DEFAULT 0,
verified INTEGER DEFAULT 0,
error TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
executed_at TIMESTAMP
);
-- Create index on operations for faster lookups
CREATE INDEX IF NOT EXISTS idx_operations_executed ON operations(executed);
CREATE INDEX IF NOT EXISTS idx_operations_source ON operations(source_path);
-- Grant privileges to disk_reorg_user
GRANT CONNECT ON DATABASE disk_reorganizer_db TO disk_reorg_user;
GRANT USAGE ON SCHEMA public TO disk_reorg_user;
GRANT SELECT, INSERT, UPDATE, DELETE ON TABLE files TO disk_reorg_user;
GRANT SELECT, INSERT, UPDATE, DELETE ON TABLE operations TO disk_reorg_user;
GRANT USAGE, SELECT ON SEQUENCE operations_id_seq TO disk_reorg_user;
-- Create function to update updated_at timestamp
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Create trigger for files table
CREATE TRIGGER update_files_updated_at
BEFORE UPDATE ON files
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
-- Display success message
\echo 'Database setup completed successfully!'
\echo 'Database: disk_reorganizer_db'
\echo 'User: disk_reorg_user'
\echo 'Tables created: files, operations'
\echo 'Indexes and triggers created'

View File

@@ -6,7 +6,9 @@ Three modes: index, plan, execute
import os import os
import sys import sys
import sqlite3 import psycopg2
from psycopg2 import sql
from psycopg2.extras import RealDictCursor
import shutil import shutil
import hashlib import hashlib
import argparse import argparse
@@ -16,6 +18,7 @@ from dataclasses import dataclass, asdict
from typing import List, Dict, Optional, Tuple from typing import List, Dict, Optional, Tuple
from datetime import datetime from datetime import datetime
import logging import logging
import time
# Setup logging # Setup logging
logging.basicConfig( logging.basicConfig(
@@ -39,39 +42,53 @@ class FileRecord:
status: str = 'indexed' # indexed, planned, moved, verified status: str = 'indexed' # indexed, planned, moved, verified
class DiskReorganizer: class DiskReorganizer:
def __init__(self, db_path: str = "file_index.db"): def __init__(self, db_config: Dict = None):
self.db_path = db_path """
Initialize DiskReorganizer with PostgreSQL connection
:param db_config: Database configuration dict with host, port, database, user, password
"""
if db_config is None:
db_config = {
'host': '192.168.1.159',
'port': 5432,
'database': 'disk_reorganizer_db',
'user': 'disk_reorg_user',
'password': 'heel-goed-wachtwoord'
}
self.db_config = db_config
self.init_database() self.init_database()
def get_connection(self):
"""Get PostgreSQL database connection"""
return psycopg2.connect(**self.db_config)
def init_database(self): def init_database(self):
"""Initialize SQLite database""" """Verify PostgreSQL database connection and tables exist"""
with sqlite3.connect(self.db_path) as conn: try:
conn.execute(""" conn = self.get_connection()
CREATE TABLE IF NOT EXISTS files ( cursor = conn.cursor()
path TEXT PRIMARY KEY,
size INTEGER, # Test connection and verify tables exist
modified_time REAL, cursor.execute("""
disk TEXT, SELECT table_name FROM information_schema.tables
checksum TEXT, WHERE table_schema = 'public' AND table_name IN ('files', 'operations')
status TEXT DEFAULT 'indexed' """)
) tables = cursor.fetchall()
""")
conn.execute(""" if len(tables) < 2:
CREATE TABLE IF NOT EXISTS operations ( logger.error("Database tables not found! Please run setup_database.sh first.")
id INTEGER PRIMARY KEY AUTOINCREMENT, raise Exception("Database not properly initialized. Run setup_database.sh")
source_path TEXT,
dest_path TEXT, cursor.close()
operation_type TEXT, conn.close()
executed INTEGER DEFAULT 0, logger.info("Database connection verified successfully")
verified INTEGER DEFAULT 0, except psycopg2.Error as e:
error TEXT logger.error(f"Database connection failed: {e}")
) raise
""")
conn.commit()
def index_disk(self, disk_root: str, disk_name: str): def index_disk(self, disk_root: str, disk_name: str):
""" """
Index all files on a disk/partition Index all files on a disk/partition with dynamic progress display
:param disk_root: Root path of disk (e.g., 'D:\\') :param disk_root: Root path of disk (e.g., 'D:\\')
:param disk_name: Logical name for the disk :param disk_name: Logical name for the disk
""" """
@@ -84,10 +101,12 @@ class DiskReorganizer:
files_count = 0 files_count = 0
total_size = 0 total_size = 0
start_time = time.time()
with sqlite3.connect(self.db_path) as conn: conn = self.get_connection()
cursor = conn.cursor() cursor = conn.cursor()
try:
# Walk through all files # Walk through all files
for root, dirs, files in os.walk(disk_path): for root, dirs, files in os.walk(disk_path):
# Skip system directories # Skip system directories
@@ -106,36 +125,62 @@ class DiskReorganizer:
# Calculate relative path for portability # Calculate relative path for portability
rel_path = str(file_path.relative_to(disk_path)) rel_path = str(file_path.relative_to(disk_path))
cursor.execute( # PostgreSQL INSERT ... ON CONFLICT for upsert
"INSERT OR REPLACE INTO files VALUES (?, ?, ?, ?, ?, ?)", cursor.execute("""
(rel_path, size, mtime, disk_name, None, 'indexed') INSERT INTO files (path, size, modified_time, disk, checksum, status)
) VALUES (%s, %s, %s, %s, %s, %s)
ON CONFLICT (path) DO UPDATE SET
size = EXCLUDED.size,
modified_time = EXCLUDED.modified_time,
disk = EXCLUDED.disk,
status = EXCLUDED.status
""", (rel_path, size, mtime, disk_name, None, 'indexed'))
files_count += 1 files_count += 1
total_size += size total_size += size
# Dynamic progress display - update every 100 files
if files_count % 100 == 0:
elapsed = time.time() - start_time
rate = files_count / elapsed if elapsed > 0 else 0
# Truncate path for display
display_path = str(file_path)
if len(display_path) > 60:
display_path = '...' + display_path[-57:]
# Use \r to overwrite the line
print(f"\rIndexing: {files_count:,} files | {self.format_size(total_size)} | {rate:.0f} files/s | {display_path}", end='', flush=True)
# Commit every 1000 files for performance
if files_count % 1000 == 0: if files_count % 1000 == 0:
logger.info(f"Indexed {files_count} files, {self.format_size(total_size)}...") conn.commit()
except Exception as e: except Exception as e:
logger.warning(f"Skipping {file_path}: {e}") logger.warning(f"\nSkipping {file_path}: {e}")
continue continue
conn.commit() conn.commit()
print() # New line after progress display
logger.info(f"Completed indexing {disk_name}: {files_count} files, {self.format_size(total_size)}")
logger.info(f"Completed indexing {disk_name}: {files_count} files, {self.format_size(total_size)}") finally:
cursor.close()
conn.close()
def calculate_disk_usage(self) -> Dict[str, Dict]: def calculate_disk_usage(self) -> Dict[str, Dict]:
"""Calculate current usage per disk""" """Calculate current usage per disk"""
with sqlite3.connect(self.db_path) as conn: conn = self.get_connection()
cursor = conn.execute(""" cursor = conn.cursor()
SELECT disk, SUM(size) as total_size, COUNT(*) as file_count
FROM files try:
GROUP BY disk cursor.execute("""
""") SELECT disk, SUM(size) as total_size, COUNT(*) as file_count
FROM files
GROUP BY disk
""")
usage = {} usage = {}
for row in cursor: for row in cursor.fetchall():
disk = row[0] disk = row[0]
size = row[1] or 0 size = row[1] or 0
count = row[2] count = row[2]
@@ -146,6 +191,9 @@ class DiskReorganizer:
} }
return usage return usage
finally:
cursor.close()
conn.close()
def plan_migration(self, target_disk: str, destination_disks: List[str]) -> Dict: def plan_migration(self, target_disk: str, destination_disks: List[str]) -> Dict:
""" """
@@ -163,12 +211,16 @@ class DiskReorganizer:
return {} return {}
# Get files on target disk # Get files on target disk
with sqlite3.connect(self.db_path) as conn: conn = self.get_connection()
cursor = conn.execute( cursor = conn.cursor()
"SELECT path, size, modified_time FROM files WHERE disk = ? ORDER BY size DESC",
(target_disk,) cursor.execute(
) "SELECT path, size, modified_time FROM files WHERE disk = %s ORDER BY size DESC",
files_to_move = cursor.fetchall() (target_disk,)
)
files_to_move = cursor.fetchall()
cursor.close()
conn.close()
target_disk_usage = usage[target_disk]['size'] target_disk_usage = usage[target_disk]['size']
logger.info(f"Need to move {len(files_to_move)} files, {self.format_size(target_disk_usage)}") logger.info(f"Need to move {len(files_to_move)} files, {self.format_size(target_disk_usage)}")
@@ -198,9 +250,10 @@ class DiskReorganizer:
'destination_disks': destination_disks 'destination_disks': destination_disks
} }
with sqlite3.connect(self.db_path) as conn: conn = self.get_connection()
cursor = conn.cursor() cursor = conn.cursor()
try:
for file_info in files_to_move: for file_info in files_to_move:
rel_path, size, mtime = file_info rel_path, size, mtime = file_info
@@ -219,11 +272,14 @@ class DiskReorganizer:
# Store in database # Store in database
cursor.execute( cursor.execute(
"INSERT INTO operations (source_path, dest_path, operation_type) VALUES (?, ?, ?)", "INSERT INTO operations (source_path, dest_path, operation_type) VALUES (%s, %s, %s)",
(f"{target_disk}:{rel_path}", f"{dest_disk}:{rel_path}", 'move') (f"{target_disk}:{rel_path}", f"{dest_disk}:{rel_path}", 'move')
) )
conn.commit() conn.commit()
finally:
cursor.close()
conn.close()
# Save plan to JSON # Save plan to JSON
plan_file = f"migration_plan_{target_disk}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" plan_file = f"migration_plan_{target_disk}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
@@ -281,10 +337,12 @@ class DiskReorganizer:
success_count = 0 success_count = 0
error_count = 0 error_count = 0
start_time = time.time()
with sqlite3.connect(self.db_path) as conn: conn = self.get_connection()
cursor = conn.cursor() cursor = conn.cursor()
try:
for i, op in enumerate(operations, 1): for i, op in enumerate(operations, 1):
source_disk = op['source_disk'] source_disk = op['source_disk']
source_path = op['source_path'] source_path = op['source_path']
@@ -294,15 +352,22 @@ class DiskReorganizer:
source_full = Path(source_disk) / source_path source_full = Path(source_disk) / source_path
dest_full = Path(dest_disk) / dest_path dest_full = Path(dest_disk) / dest_path
logger.info(f"[{i}/{len(operations)}] {source_full} -> {dest_full}") # Dynamic progress display
elapsed = time.time() - start_time
rate = i / elapsed if elapsed > 0 else 0
eta = (len(operations) - i) / rate if rate > 0 else 0
display_path = str(source_path)
if len(display_path) > 50:
display_path = '...' + display_path[-47:]
print(f"\r[{i}/{len(operations)}] {success_count} OK, {error_count} ERR | {rate:.1f} files/s | ETA: {int(eta)}s | {display_path}", end='', flush=True)
if dry_run: if dry_run:
# Simulate # Simulate
if source_full.exists(): if source_full.exists():
logger.info(f" Would move {self.format_size(op['size'])}")
success_count += 1 success_count += 1
else: else:
logger.warning(f" Source does not exist!") logger.warning(f"\n Source does not exist: {source_full}")
error_count += 1 error_count += 1
continue continue
@@ -319,7 +384,7 @@ class DiskReorganizer:
if self.verify_operation(source_full, dest_full): if self.verify_operation(source_full, dest_full):
# Update database # Update database
cursor.execute( cursor.execute(
"UPDATE files SET disk = ?, status = 'moved' WHERE path = ? AND disk = ?", "UPDATE files SET disk = %s, status = 'moved' WHERE path = %s AND disk = %s",
(dest_disk, source_path, source_disk) (dest_disk, source_path, source_disk)
) )
@@ -328,22 +393,21 @@ class DiskReorganizer:
# Log operation as executed # Log operation as executed
cursor.execute( cursor.execute(
"UPDATE operations SET executed = 1 WHERE source_path = ?", "UPDATE operations SET executed = 1, executed_at = CURRENT_TIMESTAMP WHERE source_path = %s",
(f"{source_disk}:{source_path}",) (f"{source_disk}:{source_path}",)
) )
logger.info(f" ✓ Moved and verified")
success_count += 1 success_count += 1
else: else:
raise Exception("Verification failed") raise Exception("Verification failed")
else: else:
logger.warning(f" Source missing, skipping") logger.warning(f"\n Source missing: {source_full}")
error_count += 1 error_count += 1
except Exception as e: except Exception as e:
logger.error(f" Error: {e}") logger.error(f"\n Error processing {source_path}: {e}")
cursor.execute( cursor.execute(
"UPDATE operations SET error = ? WHERE source_path = ?", "UPDATE operations SET error = %s WHERE source_path = %s",
(str(e), f"{source_disk}:{source_path}") (str(e), f"{source_disk}:{source_path}")
) )
error_count += 1 error_count += 1
@@ -353,6 +417,11 @@ class DiskReorganizer:
conn.commit() conn.commit()
conn.commit() conn.commit()
print() # New line after progress display
finally:
cursor.close()
conn.close()
logger.info(f"Migration complete: {success_count} success, {error_count} errors") logger.info(f"Migration complete: {success_count} success, {error_count} errors")
@@ -362,28 +431,35 @@ class DiskReorganizer:
def generate_report(self): def generate_report(self):
"""Generate status report""" """Generate status report"""
with sqlite3.connect(self.db_path) as conn: conn = self.get_connection()
cursor = conn.execute(""" cursor = conn.cursor()
SELECT status, COUNT(*), SUM(size) FROM files GROUP BY status
""") try:
cursor.execute("""
SELECT status, COUNT(*), SUM(size) FROM files GROUP BY status
""")
print("\n=== FILE MIGRATION REPORT ===") print("\n=== FILE MIGRATION REPORT ===")
for row in cursor: for row in cursor.fetchall():
status, count, size = row status, count, size = row
print(f"{status:15}: {count:6} files, {self.format_size(size or 0)}") print(f"{status:15}: {count:6} files, {self.format_size(size or 0)}")
cursor = conn.execute(""" cursor.execute("""
SELECT operation_type, executed, verified, COUNT(*) FROM operations GROUP BY operation_type, executed, verified SELECT operation_type, executed, verified, COUNT(*) FROM operations GROUP BY operation_type, executed, verified
""") """)
print("\n=== OPERATIONS REPORT ===") print("\n=== OPERATIONS REPORT ===")
for row in cursor: for row in cursor.fetchall():
op_type, executed, verified, count = row op_type, executed, verified, count = row
status = "EXECUTED" if executed else "PENDING" status = "EXECUTED" if executed else "PENDING"
if verified: if verified:
status += "+VERIFIED" status += "+VERIFIED"
print(f"{op_type:10} {status:15}: {count} operations") print(f"{op_type:10} {status:15}: {count} operations")
finally:
cursor.close()
conn.close()
@staticmethod @staticmethod
def format_size(size: int) -> str: def format_size(size: int) -> str:
"""Format bytes to human readable string""" """Format bytes to human readable string"""