first-working-version
This commit is contained in:
68
.gitignore
vendored
Normal file
68
.gitignore
vendored
Normal file
@@ -0,0 +1,68 @@
|
||||
### PythonVanilla template
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
||||
__pypackages__/
|
||||
|
||||
|
||||
9
defrag.iml
Normal file
9
defrag.iml
Normal file
@@ -0,0 +1,9 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager" inherit-compiler-output="true">
|
||||
<exclude-output />
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
||||
5
requirements.txt
Normal file
5
requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
# PostgreSQL database adapter for Python
|
||||
psycopg2-binary>=2.9.9
|
||||
|
||||
# Alternative: psycopg2>=2.9.9 (requires PostgreSQL development libraries)
|
||||
# Use psycopg2-binary for easier installation without compilation
|
||||
79
setup_database.sql
Normal file
79
setup_database.sql
Normal file
@@ -0,0 +1,79 @@
|
||||
-- PostgreSQL Database Setup Script for Disk Reorganizer
|
||||
-- Database: disk_reorganizer_db
|
||||
-- User: disk_reorg_user
|
||||
|
||||
-- Create the database (run as superuser: auction)
|
||||
CREATE DATABASE disk_reorganizer_db
|
||||
WITH
|
||||
ENCODING = 'UTF8'
|
||||
LC_COLLATE = 'en_US.UTF-8'
|
||||
LC_CTYPE = 'en_US.UTF-8'
|
||||
TEMPLATE = template0;
|
||||
|
||||
-- Connect to the new database
|
||||
\c disk_reorganizer_db
|
||||
|
||||
-- Create the user
|
||||
CREATE USER disk_reorg_user WITH PASSWORD 'heel-goed-wachtwoord';
|
||||
|
||||
-- Create files table
|
||||
CREATE TABLE IF NOT EXISTS files (
|
||||
path TEXT PRIMARY KEY,
|
||||
size BIGINT NOT NULL,
|
||||
modified_time DOUBLE PRECISION NOT NULL,
|
||||
disk TEXT NOT NULL,
|
||||
checksum TEXT,
|
||||
status TEXT DEFAULT 'indexed',
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
-- Create index on disk column for faster queries
|
||||
CREATE INDEX IF NOT EXISTS idx_files_disk ON files(disk);
|
||||
CREATE INDEX IF NOT EXISTS idx_files_status ON files(status);
|
||||
|
||||
-- Create operations table
|
||||
CREATE TABLE IF NOT EXISTS operations (
|
||||
id SERIAL PRIMARY KEY,
|
||||
source_path TEXT NOT NULL,
|
||||
dest_path TEXT NOT NULL,
|
||||
operation_type TEXT NOT NULL,
|
||||
executed INTEGER DEFAULT 0,
|
||||
verified INTEGER DEFAULT 0,
|
||||
error TEXT,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
executed_at TIMESTAMP
|
||||
);
|
||||
|
||||
-- Create index on operations for faster lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_operations_executed ON operations(executed);
|
||||
CREATE INDEX IF NOT EXISTS idx_operations_source ON operations(source_path);
|
||||
|
||||
-- Grant privileges to disk_reorg_user
|
||||
GRANT CONNECT ON DATABASE disk_reorganizer_db TO disk_reorg_user;
|
||||
GRANT USAGE ON SCHEMA public TO disk_reorg_user;
|
||||
GRANT SELECT, INSERT, UPDATE, DELETE ON TABLE files TO disk_reorg_user;
|
||||
GRANT SELECT, INSERT, UPDATE, DELETE ON TABLE operations TO disk_reorg_user;
|
||||
GRANT USAGE, SELECT ON SEQUENCE operations_id_seq TO disk_reorg_user;
|
||||
|
||||
-- Create function to update updated_at timestamp
|
||||
CREATE OR REPLACE FUNCTION update_updated_at_column()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
NEW.updated_at = CURRENT_TIMESTAMP;
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Create trigger for files table
|
||||
CREATE TRIGGER update_files_updated_at
|
||||
BEFORE UPDATE ON files
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION update_updated_at_column();
|
||||
|
||||
-- Display success message
|
||||
\echo 'Database setup completed successfully!'
|
||||
\echo 'Database: disk_reorganizer_db'
|
||||
\echo 'User: disk_reorg_user'
|
||||
\echo 'Tables created: files, operations'
|
||||
\echo 'Indexes and triggers created'
|
||||
222
src/main.py
222
src/main.py
@@ -6,7 +6,9 @@ Three modes: index, plan, execute
|
||||
|
||||
import os
|
||||
import sys
|
||||
import sqlite3
|
||||
import psycopg2
|
||||
from psycopg2 import sql
|
||||
from psycopg2.extras import RealDictCursor
|
||||
import shutil
|
||||
import hashlib
|
||||
import argparse
|
||||
@@ -16,6 +18,7 @@ from dataclasses import dataclass, asdict
|
||||
from typing import List, Dict, Optional, Tuple
|
||||
from datetime import datetime
|
||||
import logging
|
||||
import time
|
||||
|
||||
# Setup logging
|
||||
logging.basicConfig(
|
||||
@@ -39,39 +42,53 @@ class FileRecord:
|
||||
status: str = 'indexed' # indexed, planned, moved, verified
|
||||
|
||||
class DiskReorganizer:
|
||||
def __init__(self, db_path: str = "file_index.db"):
|
||||
self.db_path = db_path
|
||||
def __init__(self, db_config: Dict = None):
|
||||
"""
|
||||
Initialize DiskReorganizer with PostgreSQL connection
|
||||
:param db_config: Database configuration dict with host, port, database, user, password
|
||||
"""
|
||||
if db_config is None:
|
||||
db_config = {
|
||||
'host': '192.168.1.159',
|
||||
'port': 5432,
|
||||
'database': 'disk_reorganizer_db',
|
||||
'user': 'disk_reorg_user',
|
||||
'password': 'heel-goed-wachtwoord'
|
||||
}
|
||||
self.db_config = db_config
|
||||
self.init_database()
|
||||
|
||||
def get_connection(self):
|
||||
"""Get PostgreSQL database connection"""
|
||||
return psycopg2.connect(**self.db_config)
|
||||
|
||||
def init_database(self):
|
||||
"""Initialize SQLite database"""
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS files (
|
||||
path TEXT PRIMARY KEY,
|
||||
size INTEGER,
|
||||
modified_time REAL,
|
||||
disk TEXT,
|
||||
checksum TEXT,
|
||||
status TEXT DEFAULT 'indexed'
|
||||
)
|
||||
""")
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS operations (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
source_path TEXT,
|
||||
dest_path TEXT,
|
||||
operation_type TEXT,
|
||||
executed INTEGER DEFAULT 0,
|
||||
verified INTEGER DEFAULT 0,
|
||||
error TEXT
|
||||
)
|
||||
""")
|
||||
conn.commit()
|
||||
"""Verify PostgreSQL database connection and tables exist"""
|
||||
try:
|
||||
conn = self.get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Test connection and verify tables exist
|
||||
cursor.execute("""
|
||||
SELECT table_name FROM information_schema.tables
|
||||
WHERE table_schema = 'public' AND table_name IN ('files', 'operations')
|
||||
""")
|
||||
tables = cursor.fetchall()
|
||||
|
||||
if len(tables) < 2:
|
||||
logger.error("Database tables not found! Please run setup_database.sh first.")
|
||||
raise Exception("Database not properly initialized. Run setup_database.sh")
|
||||
|
||||
cursor.close()
|
||||
conn.close()
|
||||
logger.info("Database connection verified successfully")
|
||||
except psycopg2.Error as e:
|
||||
logger.error(f"Database connection failed: {e}")
|
||||
raise
|
||||
|
||||
def index_disk(self, disk_root: str, disk_name: str):
|
||||
"""
|
||||
Index all files on a disk/partition
|
||||
Index all files on a disk/partition with dynamic progress display
|
||||
:param disk_root: Root path of disk (e.g., 'D:\\')
|
||||
:param disk_name: Logical name for the disk
|
||||
"""
|
||||
@@ -84,10 +101,12 @@ class DiskReorganizer:
|
||||
|
||||
files_count = 0
|
||||
total_size = 0
|
||||
start_time = time.time()
|
||||
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
conn = self.get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
try:
|
||||
# Walk through all files
|
||||
for root, dirs, files in os.walk(disk_path):
|
||||
# Skip system directories
|
||||
@@ -106,36 +125,62 @@ class DiskReorganizer:
|
||||
# Calculate relative path for portability
|
||||
rel_path = str(file_path.relative_to(disk_path))
|
||||
|
||||
cursor.execute(
|
||||
"INSERT OR REPLACE INTO files VALUES (?, ?, ?, ?, ?, ?)",
|
||||
(rel_path, size, mtime, disk_name, None, 'indexed')
|
||||
)
|
||||
# PostgreSQL INSERT ... ON CONFLICT for upsert
|
||||
cursor.execute("""
|
||||
INSERT INTO files (path, size, modified_time, disk, checksum, status)
|
||||
VALUES (%s, %s, %s, %s, %s, %s)
|
||||
ON CONFLICT (path) DO UPDATE SET
|
||||
size = EXCLUDED.size,
|
||||
modified_time = EXCLUDED.modified_time,
|
||||
disk = EXCLUDED.disk,
|
||||
status = EXCLUDED.status
|
||||
""", (rel_path, size, mtime, disk_name, None, 'indexed'))
|
||||
|
||||
files_count += 1
|
||||
total_size += size
|
||||
|
||||
# Dynamic progress display - update every 100 files
|
||||
if files_count % 100 == 0:
|
||||
elapsed = time.time() - start_time
|
||||
rate = files_count / elapsed if elapsed > 0 else 0
|
||||
# Truncate path for display
|
||||
display_path = str(file_path)
|
||||
if len(display_path) > 60:
|
||||
display_path = '...' + display_path[-57:]
|
||||
|
||||
# Use \r to overwrite the line
|
||||
print(f"\rIndexing: {files_count:,} files | {self.format_size(total_size)} | {rate:.0f} files/s | {display_path}", end='', flush=True)
|
||||
|
||||
# Commit every 1000 files for performance
|
||||
if files_count % 1000 == 0:
|
||||
logger.info(f"Indexed {files_count} files, {self.format_size(total_size)}...")
|
||||
conn.commit()
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Skipping {file_path}: {e}")
|
||||
logger.warning(f"\nSkipping {file_path}: {e}")
|
||||
continue
|
||||
|
||||
conn.commit()
|
||||
print() # New line after progress display
|
||||
logger.info(f"Completed indexing {disk_name}: {files_count} files, {self.format_size(total_size)}")
|
||||
|
||||
logger.info(f"Completed indexing {disk_name}: {files_count} files, {self.format_size(total_size)}")
|
||||
finally:
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
def calculate_disk_usage(self) -> Dict[str, Dict]:
|
||||
"""Calculate current usage per disk"""
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
cursor = conn.execute("""
|
||||
SELECT disk, SUM(size) as total_size, COUNT(*) as file_count
|
||||
FROM files
|
||||
GROUP BY disk
|
||||
""")
|
||||
conn = self.get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
try:
|
||||
cursor.execute("""
|
||||
SELECT disk, SUM(size) as total_size, COUNT(*) as file_count
|
||||
FROM files
|
||||
GROUP BY disk
|
||||
""")
|
||||
|
||||
usage = {}
|
||||
for row in cursor:
|
||||
for row in cursor.fetchall():
|
||||
disk = row[0]
|
||||
size = row[1] or 0
|
||||
count = row[2]
|
||||
@@ -146,6 +191,9 @@ class DiskReorganizer:
|
||||
}
|
||||
|
||||
return usage
|
||||
finally:
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
def plan_migration(self, target_disk: str, destination_disks: List[str]) -> Dict:
|
||||
"""
|
||||
@@ -163,12 +211,16 @@ class DiskReorganizer:
|
||||
return {}
|
||||
|
||||
# Get files on target disk
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
cursor = conn.execute(
|
||||
"SELECT path, size, modified_time FROM files WHERE disk = ? ORDER BY size DESC",
|
||||
(target_disk,)
|
||||
)
|
||||
files_to_move = cursor.fetchall()
|
||||
conn = self.get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute(
|
||||
"SELECT path, size, modified_time FROM files WHERE disk = %s ORDER BY size DESC",
|
||||
(target_disk,)
|
||||
)
|
||||
files_to_move = cursor.fetchall()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
target_disk_usage = usage[target_disk]['size']
|
||||
logger.info(f"Need to move {len(files_to_move)} files, {self.format_size(target_disk_usage)}")
|
||||
@@ -198,9 +250,10 @@ class DiskReorganizer:
|
||||
'destination_disks': destination_disks
|
||||
}
|
||||
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
conn = self.get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
try:
|
||||
for file_info in files_to_move:
|
||||
rel_path, size, mtime = file_info
|
||||
|
||||
@@ -219,11 +272,14 @@ class DiskReorganizer:
|
||||
|
||||
# Store in database
|
||||
cursor.execute(
|
||||
"INSERT INTO operations (source_path, dest_path, operation_type) VALUES (?, ?, ?)",
|
||||
"INSERT INTO operations (source_path, dest_path, operation_type) VALUES (%s, %s, %s)",
|
||||
(f"{target_disk}:{rel_path}", f"{dest_disk}:{rel_path}", 'move')
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
finally:
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
# Save plan to JSON
|
||||
plan_file = f"migration_plan_{target_disk}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
||||
@@ -281,10 +337,12 @@ class DiskReorganizer:
|
||||
|
||||
success_count = 0
|
||||
error_count = 0
|
||||
start_time = time.time()
|
||||
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
conn = self.get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
try:
|
||||
for i, op in enumerate(operations, 1):
|
||||
source_disk = op['source_disk']
|
||||
source_path = op['source_path']
|
||||
@@ -294,15 +352,22 @@ class DiskReorganizer:
|
||||
source_full = Path(source_disk) / source_path
|
||||
dest_full = Path(dest_disk) / dest_path
|
||||
|
||||
logger.info(f"[{i}/{len(operations)}] {source_full} -> {dest_full}")
|
||||
# Dynamic progress display
|
||||
elapsed = time.time() - start_time
|
||||
rate = i / elapsed if elapsed > 0 else 0
|
||||
eta = (len(operations) - i) / rate if rate > 0 else 0
|
||||
display_path = str(source_path)
|
||||
if len(display_path) > 50:
|
||||
display_path = '...' + display_path[-47:]
|
||||
|
||||
print(f"\r[{i}/{len(operations)}] {success_count} OK, {error_count} ERR | {rate:.1f} files/s | ETA: {int(eta)}s | {display_path}", end='', flush=True)
|
||||
|
||||
if dry_run:
|
||||
# Simulate
|
||||
if source_full.exists():
|
||||
logger.info(f" Would move {self.format_size(op['size'])}")
|
||||
success_count += 1
|
||||
else:
|
||||
logger.warning(f" Source does not exist!")
|
||||
logger.warning(f"\n Source does not exist: {source_full}")
|
||||
error_count += 1
|
||||
continue
|
||||
|
||||
@@ -319,7 +384,7 @@ class DiskReorganizer:
|
||||
if self.verify_operation(source_full, dest_full):
|
||||
# Update database
|
||||
cursor.execute(
|
||||
"UPDATE files SET disk = ?, status = 'moved' WHERE path = ? AND disk = ?",
|
||||
"UPDATE files SET disk = %s, status = 'moved' WHERE path = %s AND disk = %s",
|
||||
(dest_disk, source_path, source_disk)
|
||||
)
|
||||
|
||||
@@ -328,22 +393,21 @@ class DiskReorganizer:
|
||||
|
||||
# Log operation as executed
|
||||
cursor.execute(
|
||||
"UPDATE operations SET executed = 1 WHERE source_path = ?",
|
||||
"UPDATE operations SET executed = 1, executed_at = CURRENT_TIMESTAMP WHERE source_path = %s",
|
||||
(f"{source_disk}:{source_path}",)
|
||||
)
|
||||
|
||||
logger.info(f" ✓ Moved and verified")
|
||||
success_count += 1
|
||||
else:
|
||||
raise Exception("Verification failed")
|
||||
else:
|
||||
logger.warning(f" Source missing, skipping")
|
||||
logger.warning(f"\n Source missing: {source_full}")
|
||||
error_count += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f" ✗ Error: {e}")
|
||||
logger.error(f"\n Error processing {source_path}: {e}")
|
||||
cursor.execute(
|
||||
"UPDATE operations SET error = ? WHERE source_path = ?",
|
||||
"UPDATE operations SET error = %s WHERE source_path = %s",
|
||||
(str(e), f"{source_disk}:{source_path}")
|
||||
)
|
||||
error_count += 1
|
||||
@@ -353,6 +417,11 @@ class DiskReorganizer:
|
||||
conn.commit()
|
||||
|
||||
conn.commit()
|
||||
print() # New line after progress display
|
||||
|
||||
finally:
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
logger.info(f"Migration complete: {success_count} success, {error_count} errors")
|
||||
|
||||
@@ -362,28 +431,35 @@ class DiskReorganizer:
|
||||
|
||||
def generate_report(self):
|
||||
"""Generate status report"""
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
cursor = conn.execute("""
|
||||
SELECT status, COUNT(*), SUM(size) FROM files GROUP BY status
|
||||
""")
|
||||
conn = self.get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
try:
|
||||
cursor.execute("""
|
||||
SELECT status, COUNT(*), SUM(size) FROM files GROUP BY status
|
||||
""")
|
||||
|
||||
print("\n=== FILE MIGRATION REPORT ===")
|
||||
for row in cursor:
|
||||
for row in cursor.fetchall():
|
||||
status, count, size = row
|
||||
print(f"{status:15}: {count:6} files, {self.format_size(size or 0)}")
|
||||
|
||||
cursor = conn.execute("""
|
||||
SELECT operation_type, executed, verified, COUNT(*) FROM operations GROUP BY operation_type, executed, verified
|
||||
""")
|
||||
cursor.execute("""
|
||||
SELECT operation_type, executed, verified, COUNT(*) FROM operations GROUP BY operation_type, executed, verified
|
||||
""")
|
||||
|
||||
print("\n=== OPERATIONS REPORT ===")
|
||||
for row in cursor:
|
||||
for row in cursor.fetchall():
|
||||
op_type, executed, verified, count = row
|
||||
status = "EXECUTED" if executed else "PENDING"
|
||||
if verified:
|
||||
status += "+VERIFIED"
|
||||
print(f"{op_type:10} {status:15}: {count} operations")
|
||||
|
||||
finally:
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
@staticmethod
|
||||
def format_size(size: int) -> str:
|
||||
"""Format bytes to human readable string"""
|
||||
|
||||
Reference in New Issue
Block a user