base
This commit is contained in:
18
.aiignore
Normal file
18
.aiignore
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
.DS_Store
|
||||||
|
*.log
|
||||||
|
*.tmp
|
||||||
|
dist/
|
||||||
|
build/
|
||||||
|
out/
|
||||||
|
.idea
|
||||||
|
node_modules/
|
||||||
|
.vscode/
|
||||||
|
.git
|
||||||
|
.github
|
||||||
|
scripts
|
||||||
|
.pytest_cache/
|
||||||
|
__pycache__
|
||||||
|
.aiignore
|
||||||
|
*.iml
|
||||||
|
.env
|
||||||
|
.bundle.md
|
||||||
12
.idea/dataSources.xml
generated
Normal file
12
.idea/dataSources.xml
generated
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="DataSourceManagerImpl" format="xml" multifile-model="true">
|
||||||
|
<data-source source="LOCAL" name="disk_reorganizer_db@192.168.1.159" uuid="40177905-314d-45ca-b0d0-ae9d40009a0c">
|
||||||
|
<driver-ref>postgresql</driver-ref>
|
||||||
|
<synchronize>true</synchronize>
|
||||||
|
<jdbc-driver>org.postgresql.Driver</jdbc-driver>
|
||||||
|
<jdbc-url>jdbc:postgresql://192.168.1.159:5432/disk_reorganizer_db</jdbc-url>
|
||||||
|
<working-dir>$ProjectFileDir$</working-dir>
|
||||||
|
</data-source>
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
7
.idea/data_source_mapping.xml
generated
Normal file
7
.idea/data_source_mapping.xml
generated
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="DataSourcePerFileMappings">
|
||||||
|
<file url="file://$PROJECT_DIR$/.idea/queries/Query.sql" value="40177905-314d-45ca-b0d0-ae9d40009a0c" />
|
||||||
|
<file url="file://$PROJECT_DIR$/setup_database.sql" value="40177905-314d-45ca-b0d0-ae9d40009a0c" />
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
3
.idea/defrag.iml
generated
3
.idea/defrag.iml
generated
@@ -3,6 +3,9 @@
|
|||||||
<component name="NewModuleRootManager">
|
<component name="NewModuleRootManager">
|
||||||
<content url="file://$MODULE_DIR$">
|
<content url="file://$MODULE_DIR$">
|
||||||
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
|
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/.git" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/.idea/dataSources" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/.idea/queries" />
|
||||||
</content>
|
</content>
|
||||||
<orderEntry type="jdk" jdkName="Python 3.13 (.venv)" jdkType="Python SDK" />
|
<orderEntry type="jdk" jdkName="Python 3.13 (.venv)" jdkType="Python SDK" />
|
||||||
<orderEntry type="sourceFolder" forTests="false" />
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
|||||||
7
.idea/sqldialects.xml
generated
Normal file
7
.idea/sqldialects.xml
generated
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="SqlDialectMappings">
|
||||||
|
<file url="file://$PROJECT_DIR$/setup_database.sql" dialect="PostgreSQL" />
|
||||||
|
<file url="PROJECT" dialect="PostgreSQL" />
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
38
Dockerfile
Normal file
38
Dockerfile
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
# Dockerfile for Project Defrag with PostgreSQL integration
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
# Install system dependencies
|
||||||
|
RUN apt-get update && apt-get install -y \
|
||||||
|
gcc \
|
||||||
|
g++ \
|
||||||
|
libpq-dev \
|
||||||
|
postgresql-client \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Set environment variables
|
||||||
|
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||||
|
PYTHONUNBUFFERED=1 \
|
||||||
|
PYTHONPATH=/app
|
||||||
|
|
||||||
|
# Install Python dependencies
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir --upgrade pip && \
|
||||||
|
pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy application code
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Create non-root user
|
||||||
|
RUN useradd -m -u 1000 appuser && \
|
||||||
|
chown -R appuser:appuser /app
|
||||||
|
USER appuser
|
||||||
|
|
||||||
|
# Health check
|
||||||
|
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
|
||||||
|
CMD python -c "import psycopg2; psycopg2.connect(dbname='${POSTGRES_DB:-disk_reorganizer_db}', user='${POSTGRES_USER:-disk_reorg_user}', password='${POSTGRES_PASSWORD}', host='${DB_HOST:-db}', port='${DB_PORT:-5432}')" || exit 1
|
||||||
|
|
||||||
|
# Default command (can be overridden in docker-compose)
|
||||||
|
CMD ["python", "main.py", "--help"]
|
||||||
20
docker-compose.override.yml
Normal file
20
docker-compose.override.yml
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
services:
|
||||||
|
app:
|
||||||
|
environment:
|
||||||
|
- LOG_LEVEL=DEBUG
|
||||||
|
- PYTHONPATH=/app
|
||||||
|
volumes:
|
||||||
|
- .:/app
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
|
ports:
|
||||||
|
- "8000:8000"
|
||||||
|
command: uvicorn main:app --host 0.0.0.0 --port 8000 --reload
|
||||||
|
|
||||||
|
postgres:
|
||||||
|
environment:
|
||||||
|
- POSTGRES_LOG_STATEMENT=all
|
||||||
|
ports:
|
||||||
|
- "5433:5432" # Different port to avoid conflict with host PostgreSQL
|
||||||
|
|
||||||
|
redis:
|
||||||
|
command: redis-server --appendonly yes --loglevel verbose
|
||||||
245
docker-compose.yml
Normal file
245
docker-compose.yml
Normal file
@@ -0,0 +1,245 @@
|
|||||||
|
services:
|
||||||
|
# PostgreSQL Database
|
||||||
|
postgres:
|
||||||
|
image: postgres:15-alpine
|
||||||
|
container_name: project_defrag_db
|
||||||
|
environment:
|
||||||
|
POSTGRES_USER: disk_reorg_user
|
||||||
|
POSTGRES_PASSWORD: heel-goed-wachtwoord
|
||||||
|
POSTGRES_DB: disk_reorganizer_db
|
||||||
|
POSTGRES_INITDB_ARGS: "--encoding=UTF8 --locale=C"
|
||||||
|
volumes:
|
||||||
|
- postgres_data:/var/lib/postgresql/data
|
||||||
|
- ./sql/init.sql:/docker-entrypoint-initdb.d/init.sql
|
||||||
|
- ./sql/migrations:/docker-entrypoint-initdb.d/migrations
|
||||||
|
ports:
|
||||||
|
- "5432:5432"
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U disk_reorg_user -d disk_reorganizer_db"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
networks:
|
||||||
|
- defrag-network
|
||||||
|
|
||||||
|
# Redis for deduplication hash store (optional)
|
||||||
|
redis:
|
||||||
|
image: redis:7-alpine
|
||||||
|
container_name: project_defrag_redis
|
||||||
|
command: redis-server --appendonly yes
|
||||||
|
volumes:
|
||||||
|
- redis_data:/data
|
||||||
|
ports:
|
||||||
|
- "6379:6379"
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "redis-cli", "ping"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
networks:
|
||||||
|
- defrag-network
|
||||||
|
|
||||||
|
# Application Service
|
||||||
|
app:
|
||||||
|
build: .
|
||||||
|
container_name: project_defrag_app
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
redis:
|
||||||
|
condition: service_healthy
|
||||||
|
environment:
|
||||||
|
# Database Configuration
|
||||||
|
DB_HOST: postgres
|
||||||
|
DB_PORT: 5432
|
||||||
|
DB_NAME: disk_reorganizer_db
|
||||||
|
DB_USER: disk_reorg_user
|
||||||
|
DB_PASSWORD: heel-goed-wachtwoord
|
||||||
|
|
||||||
|
# Redis Configuration
|
||||||
|
REDIS_HOST: redis
|
||||||
|
REDIS_PORT: 6379
|
||||||
|
|
||||||
|
# Application Configuration
|
||||||
|
LOG_LEVEL: INFO
|
||||||
|
MAX_WORKERS: 4
|
||||||
|
CHUNK_SIZE_KB: 64
|
||||||
|
|
||||||
|
# Mount points (set these when running specific commands)
|
||||||
|
SOURCE_MOUNT: /mnt/source
|
||||||
|
TARGET_MOUNT: /mnt/target
|
||||||
|
volumes:
|
||||||
|
# Mount host directories for file operations
|
||||||
|
- ${HOST_SOURCE_PATH:-/mnt/source}:/mnt/source:ro
|
||||||
|
- ${HOST_TARGET_PATH:-/mnt/target}:/mnt/target
|
||||||
|
|
||||||
|
# Mount for configuration and plans
|
||||||
|
- ./config:/app/config
|
||||||
|
- ./plans:/app/plans
|
||||||
|
- ./logs:/app/logs
|
||||||
|
|
||||||
|
# Bind mount for development (optional)
|
||||||
|
- .:/app
|
||||||
|
networks:
|
||||||
|
- defrag-network
|
||||||
|
profiles:
|
||||||
|
- full-cycle
|
||||||
|
- development
|
||||||
|
# Uncomment for development with hot reload
|
||||||
|
# command: watchmedo auto-restart --pattern="*.py" --recursive -- python main.py
|
||||||
|
|
||||||
|
# Single command services for specific operations
|
||||||
|
index:
|
||||||
|
build: .
|
||||||
|
container_name: defrag_index
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
environment:
|
||||||
|
DB_HOST: postgres
|
||||||
|
DB_PORT: 5432
|
||||||
|
DB_NAME: disk_reorganizer_db
|
||||||
|
DB_USER: disk_reorg_user
|
||||||
|
DB_PASSWORD: heel-goed-wachtwoord
|
||||||
|
volumes:
|
||||||
|
- ${HOST_SOURCE_PATH:-/mnt/source}:/mnt/source:ro
|
||||||
|
- ./config:/app/config
|
||||||
|
- ./logs:/app/logs
|
||||||
|
command: ["python", "main.py", "index", "/mnt/source", "disk_d"]
|
||||||
|
profiles:
|
||||||
|
- index-only
|
||||||
|
networks:
|
||||||
|
- defrag-network
|
||||||
|
|
||||||
|
plan:
|
||||||
|
build: .
|
||||||
|
container_name: defrag_plan
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
environment:
|
||||||
|
DB_HOST: postgres
|
||||||
|
DB_PORT: 5432
|
||||||
|
DB_NAME: disk_reorganizer_db
|
||||||
|
DB_USER: disk_reorg_user
|
||||||
|
DB_PASSWORD: heel-goed-wachtwoord
|
||||||
|
volumes:
|
||||||
|
- ./config:/app/config
|
||||||
|
- ./plans:/app/plans
|
||||||
|
- ./logs:/app/logs
|
||||||
|
command: ["python", "main.py", "plan", "disk_d", "disk_e"]
|
||||||
|
profiles:
|
||||||
|
- plan-only
|
||||||
|
networks:
|
||||||
|
- defrag-network
|
||||||
|
|
||||||
|
execute:
|
||||||
|
build: .
|
||||||
|
container_name: defrag_execute
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
environment:
|
||||||
|
DB_HOST: postgres
|
||||||
|
DB_PORT: 5432
|
||||||
|
DB_NAME: disk_reorganizer_db
|
||||||
|
DB_USER: disk_reorg_user
|
||||||
|
DB_PASSWORD: heel-goed-wachtwoord
|
||||||
|
volumes:
|
||||||
|
- ${HOST_SOURCE_PATH:-/mnt/source}:/mnt/source
|
||||||
|
- ${HOST_TARGET_PATH:-/mnt/target}:/mnt/target
|
||||||
|
- ./plans:/app/plans
|
||||||
|
- ./config:/app/config
|
||||||
|
- ./logs:/app/logs
|
||||||
|
command: ["python", "main.py", "execute", "/app/plans/plan.json"]
|
||||||
|
profiles:
|
||||||
|
- execute-only
|
||||||
|
networks:
|
||||||
|
- defrag-network
|
||||||
|
|
||||||
|
dry-run:
|
||||||
|
build: .
|
||||||
|
container_name: defrag_dry_run
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
environment:
|
||||||
|
DB_HOST: postgres
|
||||||
|
DB_PORT: 5432
|
||||||
|
DB_NAME: disk_reorganizer_db
|
||||||
|
DB_USER: disk_reorg_user
|
||||||
|
DB_PASSWORD: heel-goed-wachtwoord
|
||||||
|
volumes:
|
||||||
|
- ./plans:/app/plans
|
||||||
|
- ./config:/app/config
|
||||||
|
- ./logs:/app/logs
|
||||||
|
command: ["python", "main.py", "execute", "/app/plans/plan.json", "--dry-run"]
|
||||||
|
profiles:
|
||||||
|
- dry-run-only
|
||||||
|
networks:
|
||||||
|
- defrag-network
|
||||||
|
|
||||||
|
report:
|
||||||
|
build: .
|
||||||
|
container_name: defrag_report
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
environment:
|
||||||
|
DB_HOST: postgres
|
||||||
|
DB_PORT: 5432
|
||||||
|
DB_NAME: disk_reorganizer_db
|
||||||
|
DB_USER: disk_reorg_user
|
||||||
|
DB_PASSWORD: heel-goed-wachtwoord
|
||||||
|
volumes:
|
||||||
|
- ./reports:/app/reports
|
||||||
|
- ./logs:/app/logs
|
||||||
|
command: ["python", "main.py", "report", "--format", "html"]
|
||||||
|
profiles:
|
||||||
|
- report-only
|
||||||
|
networks:
|
||||||
|
- defrag-network
|
||||||
|
|
||||||
|
# Monitoring and Admin Services
|
||||||
|
pgadmin:
|
||||||
|
image: dpage/pgadmin4:latest
|
||||||
|
container_name: defrag_pgadmin
|
||||||
|
environment:
|
||||||
|
PGADMIN_DEFAULT_EMAIL: admin@defrag.local
|
||||||
|
PGADMIN_DEFAULT_PASSWORD: admin123
|
||||||
|
volumes:
|
||||||
|
- pgadmin_data:/var/lib/pgadmin
|
||||||
|
ports:
|
||||||
|
- "5050:80"
|
||||||
|
depends_on:
|
||||||
|
- postgres
|
||||||
|
profiles:
|
||||||
|
- monitoring
|
||||||
|
networks:
|
||||||
|
- defrag-network
|
||||||
|
|
||||||
|
redis-commander:
|
||||||
|
image: rediscommander/redis-commander:latest
|
||||||
|
container_name: defrag_redis_commander
|
||||||
|
environment:
|
||||||
|
REDIS_HOSTS: local:redis:6379
|
||||||
|
ports:
|
||||||
|
- "8081:8081"
|
||||||
|
depends_on:
|
||||||
|
- redis
|
||||||
|
profiles:
|
||||||
|
- monitoring
|
||||||
|
networks:
|
||||||
|
- defrag-network
|
||||||
|
|
||||||
|
networks:
|
||||||
|
defrag-network:
|
||||||
|
driver: bridge
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
postgres_data:
|
||||||
|
driver: local
|
||||||
|
redis_data:
|
||||||
|
driver: local
|
||||||
|
pgadmin_data:
|
||||||
|
driver: local
|
||||||
74
pyproject.toml
Normal file
74
pyproject.toml
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
[build-system]
|
||||||
|
requires = ["setuptools>=65.0", "wheel"]
|
||||||
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
|
[project]
|
||||||
|
name = "defrag"
|
||||||
|
version = "1.0.0"
|
||||||
|
description = "Intelligent disk reorganization system for 20TB+ data"
|
||||||
|
readme = "README.md"
|
||||||
|
requires-python = ">=3.9"
|
||||||
|
license = {text = "MIT"}
|
||||||
|
authors = [
|
||||||
|
{name = "Project Defrag"}
|
||||||
|
]
|
||||||
|
keywords = ["disk", "storage", "deduplication", "classification", "migration"]
|
||||||
|
classifiers = [
|
||||||
|
"Development Status :: 4 - Beta",
|
||||||
|
"Intended Audience :: System Administrators",
|
||||||
|
"Topic :: System :: Filesystems",
|
||||||
|
"License :: OSI Approved :: MIT License",
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
"Programming Language :: Python :: 3.9",
|
||||||
|
"Programming Language :: Python :: 3.10",
|
||||||
|
"Programming Language :: Python :: 3.11",
|
||||||
|
"Programming Language :: Python :: 3.12",
|
||||||
|
]
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
"psycopg2-binary>=2.9.0",
|
||||||
|
"psutil>=5.9.0",
|
||||||
|
"pandas>=1.5.0",
|
||||||
|
"pyarrow>=10.0.0",
|
||||||
|
"python-magic>=0.4.27",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.optional-dependencies]
|
||||||
|
redis = ["redis>=4.5.0"]
|
||||||
|
ml = ["scikit-learn>=1.2.0", "numpy>=1.24.0"]
|
||||||
|
dev = [
|
||||||
|
"pytest>=7.2.0",
|
||||||
|
"pytest-cov>=4.0.0",
|
||||||
|
"black>=23.0.0",
|
||||||
|
"mypy>=1.0.0",
|
||||||
|
"flake8>=6.0.0",
|
||||||
|
]
|
||||||
|
all = [
|
||||||
|
"redis>=4.5.0",
|
||||||
|
"scikit-learn>=1.2.0",
|
||||||
|
"numpy>=1.24.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
defrag = "main:main"
|
||||||
|
|
||||||
|
[tool.black]
|
||||||
|
line-length = 100
|
||||||
|
target-version = ['py39', 'py310', 'py311', 'py312']
|
||||||
|
include = '\.pyi?$'
|
||||||
|
|
||||||
|
[tool.mypy]
|
||||||
|
python_version = "3.9"
|
||||||
|
warn_return_any = true
|
||||||
|
warn_unused_configs = true
|
||||||
|
disallow_untyped_defs = false
|
||||||
|
disallow_incomplete_defs = false
|
||||||
|
check_untyped_defs = true
|
||||||
|
no_implicit_optional = true
|
||||||
|
|
||||||
|
[tool.pytest.ini_options]
|
||||||
|
testpaths = ["tests"]
|
||||||
|
python_files = ["test_*.py"]
|
||||||
|
python_classes = ["Test*"]
|
||||||
|
python_functions = ["test_*"]
|
||||||
|
addopts = "-v --cov=. --cov-report=html --cov-report=term"
|
||||||
@@ -15,3 +15,25 @@ pytest-cov>=4.0.0
|
|||||||
black>=22.0.0
|
black>=22.0.0
|
||||||
mypy>=0.950
|
mypy>=0.950
|
||||||
flake8>=5.0.0
|
flake8>=5.0.0
|
||||||
|
# Core dependencies
|
||||||
|
psycopg2-binary>=2.9.0
|
||||||
|
psutil>=5.9.0
|
||||||
|
|
||||||
|
# Data processing
|
||||||
|
pandas>=1.5.0
|
||||||
|
pyarrow>=10.0.0
|
||||||
|
|
||||||
|
# File type detection
|
||||||
|
python-magic>=0.4.27
|
||||||
|
|
||||||
|
# Optional dependencies
|
||||||
|
redis>=4.5.0 # For RedisHashStore (optional)
|
||||||
|
scikit-learn>=1.2.0 # For MLClassifier (optional)
|
||||||
|
numpy>=1.24.0 # For MLClassifier (optional)
|
||||||
|
|
||||||
|
# Development dependencies
|
||||||
|
pytest>=7.2.0
|
||||||
|
pytest-cov>=4.0.0
|
||||||
|
black>=23.0.0
|
||||||
|
mypy>=1.0.0
|
||||||
|
flake8>=6.0.0
|
||||||
|
|||||||
164
sql/init.sql
Normal file
164
sql/init.sql
Normal file
@@ -0,0 +1,164 @@
|
|||||||
|
-- sql/init.sql
|
||||||
|
-- Initialize PostgreSQL database for Project Defrag
|
||||||
|
|
||||||
|
-- Enable useful extensions
|
||||||
|
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
||||||
|
CREATE EXTENSION IF NOT EXISTS "pgcrypto";
|
||||||
|
|
||||||
|
-- Files table
|
||||||
|
CREATE TABLE IF NOT EXISTS files (
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
path TEXT NOT NULL,
|
||||||
|
size BIGINT NOT NULL,
|
||||||
|
modified_time TIMESTAMP WITH TIME ZONE,
|
||||||
|
created_time TIMESTAMP WITH TIME ZONE,
|
||||||
|
file_hash VARCHAR(64), -- SHA-256 hash
|
||||||
|
category VARCHAR(50),
|
||||||
|
disk_label VARCHAR(50),
|
||||||
|
last_verified TIMESTAMP WITH TIME ZONE,
|
||||||
|
|
||||||
|
-- Metadata
|
||||||
|
metadata JSONB DEFAULT '{}',
|
||||||
|
|
||||||
|
-- Audit fields
|
||||||
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
|
||||||
|
-- Constraints
|
||||||
|
CONSTRAINT unique_file_path UNIQUE(path)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Operations table (audit log)
|
||||||
|
CREATE TABLE IF NOT EXISTS operations (
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
operation_type VARCHAR(50) NOT NULL,
|
||||||
|
source_path TEXT,
|
||||||
|
target_path TEXT,
|
||||||
|
status VARCHAR(20) NOT NULL,
|
||||||
|
|
||||||
|
-- File reference
|
||||||
|
file_id UUID REFERENCES files(id) ON DELETE SET NULL,
|
||||||
|
|
||||||
|
-- Performance metrics
|
||||||
|
duration_ms INTEGER,
|
||||||
|
bytes_processed BIGINT,
|
||||||
|
|
||||||
|
-- Error information
|
||||||
|
error_message TEXT,
|
||||||
|
error_details JSONB,
|
||||||
|
|
||||||
|
-- Context
|
||||||
|
session_id VARCHAR(100),
|
||||||
|
user_agent TEXT,
|
||||||
|
|
||||||
|
-- Audit fields
|
||||||
|
started_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
completed_at TIMESTAMP WITH TIME ZONE,
|
||||||
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Deduplication hash store
|
||||||
|
CREATE TABLE IF NOT EXISTS deduplication_store (
|
||||||
|
hash VARCHAR(64) PRIMARY KEY,
|
||||||
|
canonical_path TEXT NOT NULL,
|
||||||
|
reference_count INTEGER DEFAULT 1,
|
||||||
|
first_seen TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
last_seen TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Migration plan table
|
||||||
|
CREATE TABLE IF NOT EXISTS migration_plans (
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
name VARCHAR(100) NOT NULL,
|
||||||
|
source_disk VARCHAR(50) NOT NULL,
|
||||||
|
target_disk VARCHAR(50) NOT NULL,
|
||||||
|
plan_json JSONB NOT NULL,
|
||||||
|
|
||||||
|
-- Statistics
|
||||||
|
total_files INTEGER DEFAULT 0,
|
||||||
|
total_size BIGINT DEFAULT 0,
|
||||||
|
estimated_duration INTEGER, -- in seconds
|
||||||
|
|
||||||
|
-- Status
|
||||||
|
status VARCHAR(20) DEFAULT 'draft',
|
||||||
|
|
||||||
|
-- Audit
|
||||||
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
executed_at TIMESTAMP WITH TIME ZONE,
|
||||||
|
completed_at TIMESTAMP WITH TIME ZONE
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for performance
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_files_path ON files(path);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_files_hash ON files(file_hash);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_files_disk ON files(disk_label);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_files_category ON files(category);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_operations_status ON operations(status);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_operations_created ON operations(created_at);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_operations_file_id ON operations(file_id);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dedup_canonical ON deduplication_store(canonical_path);
|
||||||
|
|
||||||
|
-- Functions for updating timestamps
|
||||||
|
CREATE OR REPLACE FUNCTION update_updated_at_column()
|
||||||
|
RETURNS TRIGGER AS $$
|
||||||
|
BEGIN
|
||||||
|
NEW.updated_at = CURRENT_TIMESTAMP;
|
||||||
|
RETURN NEW;
|
||||||
|
END;
|
||||||
|
$$ language 'plpgsql';
|
||||||
|
|
||||||
|
-- Triggers for automatic updated_at
|
||||||
|
CREATE TRIGGER update_files_updated_at BEFORE UPDATE ON files
|
||||||
|
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
|
||||||
|
|
||||||
|
-- View for operational dashboard
|
||||||
|
CREATE OR REPLACE VIEW operational_dashboard AS
|
||||||
|
SELECT
|
||||||
|
o.status,
|
||||||
|
COUNT(*) as operation_count,
|
||||||
|
SUM(o.bytes_processed) as total_bytes,
|
||||||
|
AVG(o.duration_ms) as avg_duration_ms,
|
||||||
|
MIN(o.started_at) as earliest_operation,
|
||||||
|
MAX(o.completed_at) as latest_operation
|
||||||
|
FROM operations o
|
||||||
|
WHERE o.started_at > CURRENT_TIMESTAMP - INTERVAL '24 hours'
|
||||||
|
GROUP BY o.status;
|
||||||
|
|
||||||
|
-- View for disk usage statistics
|
||||||
|
CREATE OR REPLACE VIEW disk_usage_stats AS
|
||||||
|
SELECT
|
||||||
|
disk_label,
|
||||||
|
COUNT(*) as file_count,
|
||||||
|
SUM(size) as total_size,
|
||||||
|
AVG(size) as avg_file_size,
|
||||||
|
MIN(created_time) as oldest_file,
|
||||||
|
MAX(modified_time) as newest_file
|
||||||
|
FROM files
|
||||||
|
GROUP BY disk_label;
|
||||||
|
|
||||||
|
-- Insert default configuration
|
||||||
|
INSERT INTO migration_plans (name, source_disk, target_disk, plan_json, status)
|
||||||
|
VALUES (
|
||||||
|
'Default Migration Plan',
|
||||||
|
'disk_d',
|
||||||
|
'disk_e',
|
||||||
|
'{"strategy": "hardlink", "verify_copies": true, "preserve_timestamps": true}'::jsonb,
|
||||||
|
'draft'
|
||||||
|
) ON CONFLICT DO NOTHING;
|
||||||
|
|
||||||
|
-- Create read-only user for monitoring
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'monitor_user') THEN
|
||||||
|
CREATE USER monitor_user WITH PASSWORD 'monitor_password';
|
||||||
|
END IF;
|
||||||
|
END
|
||||||
|
$$;
|
||||||
|
|
||||||
|
GRANT CONNECT ON DATABASE disk_reorganizer_db TO monitor_user;
|
||||||
|
GRANT USAGE ON SCHEMA public TO monitor_user;
|
||||||
|
GRANT SELECT ON ALL TABLES IN SCHEMA public TO monitor_user;
|
||||||
|
GRANT SELECT ON operational_dashboard TO monitor_user;
|
||||||
|
GRANT SELECT ON disk_usage_stats TO monitor_user;
|
||||||
51
src/setup.py
Normal file
51
src/setup.py
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Setup script for defrag disk reorganizer"""
|
||||||
|
from setuptools import setup, find_packages
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Read requirements
|
||||||
|
requirements_path = Path(__file__).parent / 'requirements.txt'
|
||||||
|
with open(requirements_path) as f:
|
||||||
|
requirements = [
|
||||||
|
line.strip()
|
||||||
|
for line in f
|
||||||
|
if line.strip() and not line.startswith('#')
|
||||||
|
]
|
||||||
|
|
||||||
|
# Read long description from README
|
||||||
|
readme_path = Path(__file__).parent / 'README.md'
|
||||||
|
long_description = ""
|
||||||
|
if readme_path.exists():
|
||||||
|
with open(readme_path) as f:
|
||||||
|
long_description = f.read()
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name='defrag',
|
||||||
|
version='1.0.0',
|
||||||
|
description='Intelligent disk reorganization system for 20TB+ data with deduplication and classification',
|
||||||
|
long_description=long_description,
|
||||||
|
long_description_content_type='text/markdown',
|
||||||
|
author='Project Defrag',
|
||||||
|
author_email='defrag@example.com',
|
||||||
|
url='https://github.com/yourusername/defrag',
|
||||||
|
packages=find_packages(),
|
||||||
|
install_requires=requirements,
|
||||||
|
python_requires='>=3.9',
|
||||||
|
entry_points={
|
||||||
|
'console_scripts': [
|
||||||
|
'defrag=main:main',
|
||||||
|
],
|
||||||
|
},
|
||||||
|
classifiers=[
|
||||||
|
'Development Status :: 4 - Beta',
|
||||||
|
'Intended Audience :: System Administrators',
|
||||||
|
'Topic :: System :: Filesystems',
|
||||||
|
'License :: OSI Approved :: MIT License',
|
||||||
|
'Programming Language :: Python :: 3',
|
||||||
|
'Programming Language :: Python :: 3.9',
|
||||||
|
'Programming Language :: Python :: 3.10',
|
||||||
|
'Programming Language :: Python :: 3.11',
|
||||||
|
'Programming Language :: Python :: 3.12',
|
||||||
|
],
|
||||||
|
keywords='disk management storage deduplication classification migration',
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user