-- sql/init.sql -- Initialize PostgreSQL database for Project Defrag -- Enable useful extensions CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; CREATE EXTENSION IF NOT EXISTS "pgcrypto"; -- future tables/sequences created by your owner role (pick the role that creates them) ALTER DEFAULT PRIVILEGES FOR ROLE auction IN SCHEMA public GRANT ALL PRIVILEGES ON TABLES TO disk_reorg_user; ALTER DEFAULT PRIVILEGES FOR ROLE auction IN SCHEMA public GRANT ALL PRIVILEGES ON SEQUENCES TO disk_reorg_user; ALTER DATABASE disk_reorganizer_db OWNER TO disk_reorg_user; -- Files table CREATE TABLE IF NOT EXISTS files ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), path TEXT NOT NULL, size BIGINT NOT NULL, modified_time TIMESTAMP WITH TIME ZONE, created_time TIMESTAMP WITH TIME ZONE, file_hash VARCHAR(64), -- SHA-256 hash checksum VARCHAR(64), -- Alias for file_hash (legacy compatibility) category VARCHAR(50), disk_label VARCHAR(50), last_verified TIMESTAMP WITH TIME ZONE, status VARCHAR(20) DEFAULT 'indexed', duplicate_of TEXT, -- Path to canonical file if this is a duplicate -- Metadata metadata JSONB DEFAULT '{}', -- Audit fields created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, -- Constraints CONSTRAINT unique_file_path UNIQUE (path) ); -- Operations table (audit log) CREATE TABLE IF NOT EXISTS operations ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), operation_type VARCHAR(50) NOT NULL, source_path TEXT, target_path TEXT, status VARCHAR(20) NOT NULL, -- Legacy compatibility fields executed INTEGER DEFAULT 0, verified INTEGER DEFAULT 0, error TEXT, -- File reference file_id UUID REFERENCES files (id) ON DELETE SET NULL, -- Performance metrics duration_ms INTEGER, bytes_processed BIGINT, -- Error information error_message TEXT, error_details JSONB, -- Context session_id VARCHAR(100), user_agent TEXT, -- Audit fields started_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, completed_at TIMESTAMP WITH TIME ZONE, executed_at TIMESTAMP WITH TIME ZONE, created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP ); -- Deduplication hash store CREATE TABLE IF NOT EXISTS deduplication_store ( hash VARCHAR(64) PRIMARY KEY, canonical_path TEXT NOT NULL, reference_count INTEGER DEFAULT 1, first_seen TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, last_seen TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP ); -- Migration plan table CREATE TABLE IF NOT EXISTS migration_plans ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), name VARCHAR(100) NOT NULL, source_disk VARCHAR(50) NOT NULL, target_disk VARCHAR(50) NOT NULL, plan_json JSONB NOT NULL, -- Statistics total_files INTEGER DEFAULT 0, total_size BIGINT DEFAULT 0, estimated_duration INTEGER, -- in seconds -- Status status VARCHAR(20) DEFAULT 'draft', -- Audit created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, executed_at TIMESTAMP WITH TIME ZONE, completed_at TIMESTAMP WITH TIME ZONE ); -- Indexes for performance CREATE INDEX IF NOT EXISTS idx_files_path ON files (path); CREATE INDEX IF NOT EXISTS idx_files_hash ON files (file_hash); CREATE INDEX IF NOT EXISTS idx_files_disk ON files (disk_label); CREATE INDEX IF NOT EXISTS idx_files_category ON files (category); CREATE INDEX IF NOT EXISTS idx_files_status ON files (status); create index on files (checksum); create index on files (checksum, path); CREATE INDEX IF NOT EXISTS idx_operations_status ON operations (status); CREATE INDEX IF NOT EXISTS idx_operations_created ON operations (created_at); CREATE INDEX IF NOT EXISTS idx_operations_file_id ON operations (file_id); CREATE INDEX IF NOT EXISTS idx_dedup_canonical ON deduplication_store (canonical_path); -- Functions for updating timestamps CREATE OR REPLACE FUNCTION update_updated_at_column() RETURNS TRIGGER AS $$ BEGIN NEW.updated_at = CURRENT_TIMESTAMP; RETURN NEW; END; $$ language 'plpgsql'; -- Triggers for automatic updated_at CREATE TRIGGER update_files_updated_at BEFORE UPDATE ON files FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); -- View for operational dashboard CREATE OR REPLACE VIEW operational_dashboard AS SELECT o.status, COUNT(*) as operation_count, SUM(o.bytes_processed) as total_bytes, AVG(o.duration_ms) as avg_duration_ms, MIN(o.started_at) as earliest_operation, MAX(o.completed_at) as latest_operation FROM operations o WHERE o.started_at > CURRENT_TIMESTAMP - INTERVAL '24 hours' GROUP BY o.status; -- View for disk usage statistics CREATE OR REPLACE VIEW disk_usage_stats AS SELECT disk_label, COUNT(*) as file_count, SUM(size) as total_size, AVG(size) as avg_file_size, MIN(created_time) as oldest_file, MAX(modified_time) as newest_file FROM files GROUP BY disk_label; -- Insert default configuration INSERT INTO migration_plans (name, source_disk, target_disk, plan_json, status) VALUES ('Default Migration Plan', 'disk_d', 'disk_e', '{"strategy": "hardlink", "verify_copies": true, "preserve_timestamps": true}'::jsonb, 'draft') ON CONFLICT DO NOTHING; -- Create read-only user for monitoring DO $$ BEGIN IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'monitor_user') THEN CREATE USER monitor_user WITH PASSWORD 'monitor_password'; END IF; END $$; GRANT CONNECT ON DATABASE disk_reorganizer_db TO monitor_user; GRANT USAGE ON SCHEMA public TO monitor_user; GRANT SELECT ON ALL TABLES IN SCHEMA public TO monitor_user; GRANT SELECT ON operational_dashboard TO monitor_user; GRANT SELECT ON disk_usage_stats TO monitor_user;