initial
This commit is contained in:
61
sql/legacy_setup.sql
Normal file
61
sql/legacy_setup.sql
Normal file
@@ -0,0 +1,61 @@
|
||||
-- PostgreSQL Database Setup Script for Disk Reorganizer
|
||||
-- Database: disk_reorganizer_db
|
||||
-- User: disk_reorg_user
|
||||
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
||||
CREATE EXTENSION IF NOT EXISTS "pgcrypto";
|
||||
|
||||
-- Create the database (run as superuser: auction)
|
||||
CREATE DATABASE disk_reorganizer_db
|
||||
WITH
|
||||
ENCODING = 'UTF8'
|
||||
LC_COLLATE = 'en_US.UTF-8'
|
||||
LC_CTYPE = 'en_US.UTF-8'
|
||||
TEMPLATE = template0;
|
||||
|
||||
-- Connect to the new database
|
||||
\c disk_reorganizer_db
|
||||
|
||||
-- Create the user
|
||||
CREATE USER disk_reorg_user WITH PASSWORD 'heel-goed-wachtwoord';
|
||||
|
||||
-- Create files table
|
||||
|
||||
-- Create index on disk column for faster queries
|
||||
|
||||
|
||||
-- Grant privileges to disk_reorg_user
|
||||
GRANT CONNECT ON DATABASE disk_reorganizer_db TO disk_reorg_user;
|
||||
GRANT USAGE ON SCHEMA public TO disk_reorg_user;
|
||||
GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO disk_reorg_user;
|
||||
GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO disk_reorg_user;
|
||||
|
||||
-- future tables/sequences created by your owner role (pick the role that creates them)
|
||||
ALTER DEFAULT PRIVILEGES FOR ROLE auction IN SCHEMA public
|
||||
GRANT ALL PRIVILEGES ON TABLES TO disk_reorg_user;
|
||||
|
||||
ALTER DEFAULT PRIVILEGES FOR ROLE auction IN SCHEMA public
|
||||
GRANT ALL PRIVILEGES ON SEQUENCES TO disk_reorg_user;
|
||||
|
||||
-- Create function to update updated_at timestamp
|
||||
CREATE OR REPLACE FUNCTION update_updated_at_column()
|
||||
RETURNS TRIGGER AS
|
||||
$$
|
||||
BEGIN
|
||||
NEW.updated_at = CURRENT_TIMESTAMP;
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Create trigger for files table
|
||||
CREATE TRIGGER update_files_updated_at
|
||||
BEFORE UPDATE
|
||||
ON files
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION update_updated_at_column();
|
||||
|
||||
-- Display success message
|
||||
\echo 'Database setup completed successfully!'
|
||||
\echo 'Database: disk_reorganizer_db'
|
||||
\echo 'User: disk_reorg_user'
|
||||
\echo 'Tables created: files, operations'
|
||||
\echo 'Indexes and triggers created 2)'
|
||||
188
sql/migration/V001__init.sql
Normal file
188
sql/migration/V001__init.sql
Normal file
@@ -0,0 +1,188 @@
|
||||
-- sql/init.sql
|
||||
-- Initialize PostgreSQL database for Project Defrag
|
||||
|
||||
-- Enable useful extensions
|
||||
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
||||
CREATE EXTENSION IF NOT EXISTS "pgcrypto";
|
||||
-- future tables/sequences created by your owner role (pick the role that creates them)
|
||||
ALTER DEFAULT PRIVILEGES FOR ROLE auction IN SCHEMA public
|
||||
GRANT ALL PRIVILEGES ON TABLES TO disk_reorg_user;
|
||||
|
||||
ALTER DEFAULT PRIVILEGES FOR ROLE auction IN SCHEMA public
|
||||
GRANT ALL PRIVILEGES ON SEQUENCES TO disk_reorg_user;
|
||||
ALTER DATABASE disk_reorganizer_db OWNER TO disk_reorg_user;
|
||||
-- Files table
|
||||
CREATE TABLE IF NOT EXISTS files
|
||||
(
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
path TEXT NOT NULL,
|
||||
size BIGINT NOT NULL,
|
||||
modified_time TIMESTAMP WITH TIME ZONE,
|
||||
created_time TIMESTAMP WITH TIME ZONE,
|
||||
file_hash VARCHAR(64), -- SHA-256 hash
|
||||
checksum VARCHAR(64), -- Alias for file_hash (legacy compatibility)
|
||||
category VARCHAR(50),
|
||||
disk_label VARCHAR(50),
|
||||
last_verified TIMESTAMP WITH TIME ZONE,
|
||||
status VARCHAR(20) DEFAULT 'indexed',
|
||||
duplicate_of TEXT, -- Path to canonical file if this is a duplicate
|
||||
|
||||
-- Metadata
|
||||
metadata JSONB DEFAULT '{}',
|
||||
|
||||
-- Audit fields
|
||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
||||
|
||||
-- Constraints
|
||||
CONSTRAINT unique_file_path UNIQUE (path)
|
||||
);
|
||||
|
||||
-- Operations table (audit log)
|
||||
CREATE TABLE IF NOT EXISTS operations
|
||||
(
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
operation_type VARCHAR(50) NOT NULL,
|
||||
source_path TEXT,
|
||||
target_path TEXT,
|
||||
status VARCHAR(20) NOT NULL,
|
||||
|
||||
-- Legacy compatibility fields
|
||||
executed INTEGER DEFAULT 0,
|
||||
verified INTEGER DEFAULT 0,
|
||||
error TEXT,
|
||||
|
||||
-- File reference
|
||||
file_id UUID REFERENCES files (id) ON DELETE SET NULL,
|
||||
|
||||
-- Performance metrics
|
||||
duration_ms INTEGER,
|
||||
bytes_processed BIGINT,
|
||||
|
||||
-- Error information
|
||||
error_message TEXT,
|
||||
error_details JSONB,
|
||||
|
||||
-- Context
|
||||
session_id VARCHAR(100),
|
||||
user_agent TEXT,
|
||||
|
||||
-- Audit fields
|
||||
started_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
||||
completed_at TIMESTAMP WITH TIME ZONE,
|
||||
executed_at TIMESTAMP WITH TIME ZONE,
|
||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
-- Deduplication hash store
|
||||
CREATE TABLE IF NOT EXISTS deduplication_store
|
||||
(
|
||||
hash VARCHAR(64) PRIMARY KEY,
|
||||
canonical_path TEXT NOT NULL,
|
||||
reference_count INTEGER DEFAULT 1,
|
||||
first_seen TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
||||
last_seen TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
-- Migration plan table
|
||||
CREATE TABLE IF NOT EXISTS migration_plans
|
||||
(
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
name VARCHAR(100) NOT NULL,
|
||||
source_disk VARCHAR(50) NOT NULL,
|
||||
target_disk VARCHAR(50) NOT NULL,
|
||||
plan_json JSONB NOT NULL,
|
||||
|
||||
-- Statistics
|
||||
total_files INTEGER DEFAULT 0,
|
||||
total_size BIGINT DEFAULT 0,
|
||||
estimated_duration INTEGER, -- in seconds
|
||||
|
||||
-- Status
|
||||
status VARCHAR(20) DEFAULT 'draft',
|
||||
|
||||
-- Audit
|
||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
||||
executed_at TIMESTAMP WITH TIME ZONE,
|
||||
completed_at TIMESTAMP WITH TIME ZONE
|
||||
);
|
||||
|
||||
-- Indexes for performance
|
||||
CREATE INDEX IF NOT EXISTS idx_files_path ON files (path);
|
||||
CREATE INDEX IF NOT EXISTS idx_files_hash ON files (file_hash);
|
||||
CREATE INDEX IF NOT EXISTS idx_files_disk ON files (disk_label);
|
||||
CREATE INDEX IF NOT EXISTS idx_files_category ON files (category);
|
||||
CREATE INDEX IF NOT EXISTS idx_files_status ON files (status);
|
||||
create index on files (checksum);
|
||||
create index on files (checksum, path);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_operations_status ON operations (status);
|
||||
CREATE INDEX IF NOT EXISTS idx_operations_created ON operations (created_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_operations_file_id ON operations (file_id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_dedup_canonical ON deduplication_store (canonical_path);
|
||||
|
||||
-- Functions for updating timestamps
|
||||
CREATE OR REPLACE FUNCTION update_updated_at_column()
|
||||
RETURNS TRIGGER AS
|
||||
$$
|
||||
BEGIN
|
||||
NEW.updated_at = CURRENT_TIMESTAMP;
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ language 'plpgsql';
|
||||
|
||||
-- Triggers for automatic updated_at
|
||||
CREATE TRIGGER update_files_updated_at
|
||||
BEFORE UPDATE
|
||||
ON files
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION update_updated_at_column();
|
||||
|
||||
-- View for operational dashboard
|
||||
CREATE OR REPLACE VIEW operational_dashboard AS
|
||||
SELECT o.status,
|
||||
COUNT(*) as operation_count,
|
||||
SUM(o.bytes_processed) as total_bytes,
|
||||
AVG(o.duration_ms) as avg_duration_ms,
|
||||
MIN(o.started_at) as earliest_operation,
|
||||
MAX(o.completed_at) as latest_operation
|
||||
FROM operations o
|
||||
WHERE o.started_at > CURRENT_TIMESTAMP - INTERVAL '24 hours'
|
||||
GROUP BY o.status;
|
||||
|
||||
-- View for disk usage statistics
|
||||
CREATE OR REPLACE VIEW disk_usage_stats AS
|
||||
SELECT disk_label,
|
||||
COUNT(*) as file_count,
|
||||
SUM(size) as total_size,
|
||||
AVG(size) as avg_file_size,
|
||||
MIN(created_time) as oldest_file,
|
||||
MAX(modified_time) as newest_file
|
||||
FROM files
|
||||
GROUP BY disk_label;
|
||||
|
||||
-- Insert default configuration
|
||||
INSERT INTO migration_plans (name, source_disk, target_disk, plan_json, status)
|
||||
VALUES ('Default Migration Plan',
|
||||
'disk_d',
|
||||
'disk_e',
|
||||
'{"strategy": "hardlink", "verify_copies": true, "preserve_timestamps": true}'::jsonb,
|
||||
'draft')
|
||||
ON CONFLICT DO NOTHING;
|
||||
|
||||
-- Create read-only user for monitoring
|
||||
DO
|
||||
$$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'monitor_user') THEN
|
||||
CREATE USER monitor_user WITH PASSWORD 'monitor_password';
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
|
||||
GRANT CONNECT ON DATABASE disk_reorganizer_db TO monitor_user;
|
||||
GRANT USAGE ON SCHEMA public TO monitor_user;
|
||||
GRANT SELECT ON ALL TABLES IN SCHEMA public TO monitor_user;
|
||||
GRANT SELECT ON operational_dashboard TO monitor_user;
|
||||
GRANT SELECT ON disk_usage_stats TO monitor_user;
|
||||
11
sql/migration/V002__add_extracted_text.sql
Normal file
11
sql/migration/V002__add_extracted_text.sql
Normal file
@@ -0,0 +1,11 @@
|
||||
-- Add extracted text and enrichment columns
|
||||
ALTER TABLE files ADD COLUMN IF NOT EXISTS extracted_text TEXT;
|
||||
ALTER TABLE files ADD COLUMN IF NOT EXISTS text_quality VARCHAR(20);
|
||||
ALTER TABLE files ADD COLUMN IF NOT EXISTS enrichment JSONB;
|
||||
|
||||
-- Add indexes for text search
|
||||
CREATE INDEX IF NOT EXISTS idx_files_extracted_text ON files USING gin(to_tsvector('english', extracted_text));
|
||||
CREATE INDEX IF NOT EXISTS idx_files_enrichment ON files USING gin(enrichment);
|
||||
|
||||
-- Add full text search capability
|
||||
CREATE INDEX IF NOT EXISTS idx_files_fts ON files USING gin(to_tsvector('english', COALESCE(extracted_text, '')));
|
||||
41
sql/migration/V003__add_folder_support.sql
Normal file
41
sql/migration/V003__add_folder_support.sql
Normal file
@@ -0,0 +1,41 @@
|
||||
CREATE TABLE IF NOT EXISTS folders
|
||||
(
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
path TEXT NOT NULL UNIQUE,
|
||||
parent_path TEXT,
|
||||
disk_label VARCHAR(50),
|
||||
|
||||
file_count INT DEFAULT 0,
|
||||
total_size BIGINT DEFAULT 0,
|
||||
|
||||
project_type VARCHAR(50),
|
||||
intent TEXT,
|
||||
summary TEXT,
|
||||
|
||||
has_readme BOOLEAN DEFAULT FALSE,
|
||||
has_git BOOLEAN DEFAULT FALSE,
|
||||
has_manifest BOOLEAN DEFAULT FALSE,
|
||||
manifest_types TEXT[],
|
||||
dominant_file_types JSONB,
|
||||
|
||||
structure JSONB,
|
||||
|
||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_folders_path ON folders (path);
|
||||
CREATE INDEX IF NOT EXISTS idx_folders_parent ON folders (parent_path);
|
||||
CREATE INDEX IF NOT EXISTS idx_folders_disk ON folders (disk_label);
|
||||
CREATE INDEX IF NOT EXISTS idx_folders_project_type ON folders (project_type);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS processing_checkpoints
|
||||
(
|
||||
task_name VARCHAR(100) PRIMARY KEY,
|
||||
last_processed_id TEXT,
|
||||
last_processed_path TEXT,
|
||||
processed_count INT DEFAULT 0,
|
||||
total_count INT,
|
||||
started_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
Reference in New Issue
Block a user