This commit is contained in:
mike
2025-12-12 05:40:33 +01:00
parent 0eba1619cf
commit fb675746ee
12 changed files with 661 additions and 0 deletions

164
sql/init.sql Normal file
View File

@@ -0,0 +1,164 @@
-- sql/init.sql
-- Initialize PostgreSQL database for Project Defrag
-- Enable useful extensions
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE EXTENSION IF NOT EXISTS "pgcrypto";
-- Files table
CREATE TABLE IF NOT EXISTS files (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
path TEXT NOT NULL,
size BIGINT NOT NULL,
modified_time TIMESTAMP WITH TIME ZONE,
created_time TIMESTAMP WITH TIME ZONE,
file_hash VARCHAR(64), -- SHA-256 hash
category VARCHAR(50),
disk_label VARCHAR(50),
last_verified TIMESTAMP WITH TIME ZONE,
-- Metadata
metadata JSONB DEFAULT '{}',
-- Audit fields
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
-- Constraints
CONSTRAINT unique_file_path UNIQUE(path)
);
-- Operations table (audit log)
CREATE TABLE IF NOT EXISTS operations (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
operation_type VARCHAR(50) NOT NULL,
source_path TEXT,
target_path TEXT,
status VARCHAR(20) NOT NULL,
-- File reference
file_id UUID REFERENCES files(id) ON DELETE SET NULL,
-- Performance metrics
duration_ms INTEGER,
bytes_processed BIGINT,
-- Error information
error_message TEXT,
error_details JSONB,
-- Context
session_id VARCHAR(100),
user_agent TEXT,
-- Audit fields
started_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
completed_at TIMESTAMP WITH TIME ZONE,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
-- Deduplication hash store
CREATE TABLE IF NOT EXISTS deduplication_store (
hash VARCHAR(64) PRIMARY KEY,
canonical_path TEXT NOT NULL,
reference_count INTEGER DEFAULT 1,
first_seen TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_seen TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
-- Migration plan table
CREATE TABLE IF NOT EXISTS migration_plans (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name VARCHAR(100) NOT NULL,
source_disk VARCHAR(50) NOT NULL,
target_disk VARCHAR(50) NOT NULL,
plan_json JSONB NOT NULL,
-- Statistics
total_files INTEGER DEFAULT 0,
total_size BIGINT DEFAULT 0,
estimated_duration INTEGER, -- in seconds
-- Status
status VARCHAR(20) DEFAULT 'draft',
-- Audit
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
executed_at TIMESTAMP WITH TIME ZONE,
completed_at TIMESTAMP WITH TIME ZONE
);
-- Indexes for performance
CREATE INDEX IF NOT EXISTS idx_files_path ON files(path);
CREATE INDEX IF NOT EXISTS idx_files_hash ON files(file_hash);
CREATE INDEX IF NOT EXISTS idx_files_disk ON files(disk_label);
CREATE INDEX IF NOT EXISTS idx_files_category ON files(category);
CREATE INDEX IF NOT EXISTS idx_operations_status ON operations(status);
CREATE INDEX IF NOT EXISTS idx_operations_created ON operations(created_at);
CREATE INDEX IF NOT EXISTS idx_operations_file_id ON operations(file_id);
CREATE INDEX IF NOT EXISTS idx_dedup_canonical ON deduplication_store(canonical_path);
-- Functions for updating timestamps
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$ language 'plpgsql';
-- Triggers for automatic updated_at
CREATE TRIGGER update_files_updated_at BEFORE UPDATE ON files
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
-- View for operational dashboard
CREATE OR REPLACE VIEW operational_dashboard AS
SELECT
o.status,
COUNT(*) as operation_count,
SUM(o.bytes_processed) as total_bytes,
AVG(o.duration_ms) as avg_duration_ms,
MIN(o.started_at) as earliest_operation,
MAX(o.completed_at) as latest_operation
FROM operations o
WHERE o.started_at > CURRENT_TIMESTAMP - INTERVAL '24 hours'
GROUP BY o.status;
-- View for disk usage statistics
CREATE OR REPLACE VIEW disk_usage_stats AS
SELECT
disk_label,
COUNT(*) as file_count,
SUM(size) as total_size,
AVG(size) as avg_file_size,
MIN(created_time) as oldest_file,
MAX(modified_time) as newest_file
FROM files
GROUP BY disk_label;
-- Insert default configuration
INSERT INTO migration_plans (name, source_disk, target_disk, plan_json, status)
VALUES (
'Default Migration Plan',
'disk_d',
'disk_e',
'{"strategy": "hardlink", "verify_copies": true, "preserve_timestamps": true}'::jsonb,
'draft'
) ON CONFLICT DO NOTHING;
-- Create read-only user for monitoring
DO $$
BEGIN
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'monitor_user') THEN
CREATE USER monitor_user WITH PASSWORD 'monitor_password';
END IF;
END
$$;
GRANT CONNECT ON DATABASE disk_reorganizer_db TO monitor_user;
GRANT USAGE ON SCHEMA public TO monitor_user;
GRANT SELECT ON ALL TABLES IN SCHEMA public TO monitor_user;
GRANT SELECT ON operational_dashboard TO monitor_user;
GRANT SELECT ON disk_usage_stats TO monitor_user;