Files
defrag/sql/migration/V001__init.sql
2025-12-13 11:56:06 +01:00

188 lines
6.3 KiB
PL/PgSQL

-- sql/init.sql
-- Initialize PostgreSQL database for Project Defrag
-- Enable useful extensions
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE EXTENSION IF NOT EXISTS "pgcrypto";
-- future tables/sequences created by your owner role (pick the role that creates them)
ALTER DEFAULT PRIVILEGES FOR ROLE auction IN SCHEMA public
GRANT ALL PRIVILEGES ON TABLES TO disk_reorg_user;
ALTER DEFAULT PRIVILEGES FOR ROLE auction IN SCHEMA public
GRANT ALL PRIVILEGES ON SEQUENCES TO disk_reorg_user;
ALTER DATABASE disk_reorganizer_db OWNER TO disk_reorg_user;
-- Files table
CREATE TABLE IF NOT EXISTS files
(
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
path TEXT NOT NULL,
size BIGINT NOT NULL,
modified_time TIMESTAMP WITH TIME ZONE,
created_time TIMESTAMP WITH TIME ZONE,
file_hash VARCHAR(64), -- SHA-256 hash
checksum VARCHAR(64), -- Alias for file_hash (legacy compatibility)
category VARCHAR(50),
disk_label VARCHAR(50),
last_verified TIMESTAMP WITH TIME ZONE,
status VARCHAR(20) DEFAULT 'indexed',
duplicate_of TEXT, -- Path to canonical file if this is a duplicate
-- Metadata
metadata JSONB DEFAULT '{}',
-- Audit fields
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
-- Constraints
CONSTRAINT unique_file_path UNIQUE (path)
);
-- Operations table (audit log)
CREATE TABLE IF NOT EXISTS operations
(
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
operation_type VARCHAR(50) NOT NULL,
source_path TEXT,
target_path TEXT,
status VARCHAR(20) NOT NULL,
-- Legacy compatibility fields
executed INTEGER DEFAULT 0,
verified INTEGER DEFAULT 0,
error TEXT,
-- File reference
file_id UUID REFERENCES files (id) ON DELETE SET NULL,
-- Performance metrics
duration_ms INTEGER,
bytes_processed BIGINT,
-- Error information
error_message TEXT,
error_details JSONB,
-- Context
session_id VARCHAR(100),
user_agent TEXT,
-- Audit fields
started_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
completed_at TIMESTAMP WITH TIME ZONE,
executed_at TIMESTAMP WITH TIME ZONE,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
-- Deduplication hash store
CREATE TABLE IF NOT EXISTS deduplication_store
(
hash VARCHAR(64) PRIMARY KEY,
canonical_path TEXT NOT NULL,
reference_count INTEGER DEFAULT 1,
first_seen TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_seen TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
-- Migration plan table
CREATE TABLE IF NOT EXISTS migration_plans
(
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name VARCHAR(100) NOT NULL,
source_disk VARCHAR(50) NOT NULL,
target_disk VARCHAR(50) NOT NULL,
plan_json JSONB NOT NULL,
-- Statistics
total_files INTEGER DEFAULT 0,
total_size BIGINT DEFAULT 0,
estimated_duration INTEGER, -- in seconds
-- Status
status VARCHAR(20) DEFAULT 'draft',
-- Audit
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
executed_at TIMESTAMP WITH TIME ZONE,
completed_at TIMESTAMP WITH TIME ZONE
);
-- Indexes for performance
CREATE INDEX IF NOT EXISTS idx_files_path ON files (path);
CREATE INDEX IF NOT EXISTS idx_files_hash ON files (file_hash);
CREATE INDEX IF NOT EXISTS idx_files_disk ON files (disk_label);
CREATE INDEX IF NOT EXISTS idx_files_category ON files (category);
CREATE INDEX IF NOT EXISTS idx_files_status ON files (status);
create index on files (checksum);
create index on files (checksum, path);
CREATE INDEX IF NOT EXISTS idx_operations_status ON operations (status);
CREATE INDEX IF NOT EXISTS idx_operations_created ON operations (created_at);
CREATE INDEX IF NOT EXISTS idx_operations_file_id ON operations (file_id);
CREATE INDEX IF NOT EXISTS idx_dedup_canonical ON deduplication_store (canonical_path);
-- Functions for updating timestamps
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS
$$
BEGIN
NEW.updated_at = CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$ language 'plpgsql';
-- Triggers for automatic updated_at
CREATE TRIGGER update_files_updated_at
BEFORE UPDATE
ON files
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
-- View for operational dashboard
CREATE OR REPLACE VIEW operational_dashboard AS
SELECT o.status,
COUNT(*) as operation_count,
SUM(o.bytes_processed) as total_bytes,
AVG(o.duration_ms) as avg_duration_ms,
MIN(o.started_at) as earliest_operation,
MAX(o.completed_at) as latest_operation
FROM operations o
WHERE o.started_at > CURRENT_TIMESTAMP - INTERVAL '24 hours'
GROUP BY o.status;
-- View for disk usage statistics
CREATE OR REPLACE VIEW disk_usage_stats AS
SELECT disk_label,
COUNT(*) as file_count,
SUM(size) as total_size,
AVG(size) as avg_file_size,
MIN(created_time) as oldest_file,
MAX(modified_time) as newest_file
FROM files
GROUP BY disk_label;
-- Insert default configuration
INSERT INTO migration_plans (name, source_disk, target_disk, plan_json, status)
VALUES ('Default Migration Plan',
'disk_d',
'disk_e',
'{"strategy": "hardlink", "verify_copies": true, "preserve_timestamps": true}'::jsonb,
'draft')
ON CONFLICT DO NOTHING;
-- Create read-only user for monitoring
DO
$$
BEGIN
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'monitor_user') THEN
CREATE USER monitor_user WITH PASSWORD 'monitor_password';
END IF;
END
$$;
GRANT CONNECT ON DATABASE disk_reorganizer_db TO monitor_user;
GRANT USAGE ON SCHEMA public TO monitor_user;
GRANT SELECT ON ALL TABLES IN SCHEMA public TO monitor_user;
GRANT SELECT ON operational_dashboard TO monitor_user;
GRANT SELECT ON disk_usage_stats TO monitor_user;