77 lines
3.0 KiB
SQL
77 lines
3.0 KiB
SQL
CREATE TABLE IF NOT EXISTS content_nodes (
|
|
id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
|
|
node_type VARCHAR(50) NOT NULL,
|
|
path TEXT NOT NULL,
|
|
disk_label VARCHAR(50),
|
|
parent_id UUID REFERENCES content_nodes(id) ON DELETE CASCADE,
|
|
|
|
checksum VARCHAR(64),
|
|
size BIGINT,
|
|
modified_time TIMESTAMP,
|
|
|
|
content_hash VARCHAR(64),
|
|
extracted_at TIMESTAMP,
|
|
extraction_method VARCHAR(100),
|
|
|
|
metadata JSONB,
|
|
|
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
|
|
UNIQUE(node_type, path, disk_label)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_content_nodes_type ON content_nodes(node_type);
|
|
CREATE INDEX IF NOT EXISTS idx_content_nodes_path ON content_nodes(path);
|
|
CREATE INDEX IF NOT EXISTS idx_content_nodes_parent ON content_nodes(parent_id);
|
|
CREATE INDEX IF NOT EXISTS idx_content_nodes_checksum ON content_nodes(checksum);
|
|
CREATE INDEX IF NOT EXISTS idx_content_nodes_content_hash ON content_nodes(content_hash);
|
|
|
|
CREATE TABLE IF NOT EXISTS content_edges (
|
|
id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
|
|
source_id UUID NOT NULL REFERENCES content_nodes(id) ON DELETE CASCADE,
|
|
target_id UUID NOT NULL REFERENCES content_nodes(id) ON DELETE CASCADE,
|
|
edge_type VARCHAR(50) NOT NULL,
|
|
|
|
metadata JSONB,
|
|
confidence FLOAT,
|
|
|
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
|
|
UNIQUE(source_id, target_id, edge_type)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_content_edges_source ON content_edges(source_id);
|
|
CREATE INDEX IF NOT EXISTS idx_content_edges_target ON content_edges(target_id);
|
|
CREATE INDEX IF NOT EXISTS idx_content_edges_type ON content_edges(edge_type);
|
|
|
|
CREATE TABLE IF NOT EXISTS extraction_log (
|
|
id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
|
|
node_id UUID REFERENCES content_nodes(id) ON DELETE CASCADE,
|
|
file_path TEXT NOT NULL,
|
|
file_checksum VARCHAR(64),
|
|
|
|
extraction_method VARCHAR(100),
|
|
status VARCHAR(50),
|
|
error_message TEXT,
|
|
|
|
extracted_size BIGINT,
|
|
processing_time_ms INT,
|
|
|
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_extraction_log_node ON extraction_log(node_id);
|
|
CREATE INDEX IF NOT EXISTS idx_extraction_log_file ON extraction_log(file_path);
|
|
CREATE INDEX IF NOT EXISTS idx_extraction_log_checksum ON extraction_log(file_checksum);
|
|
CREATE INDEX IF NOT EXISTS idx_extraction_log_status ON extraction_log(status);
|
|
CREATE INDEX IF NOT EXISTS idx_extraction_log_created ON extraction_log(created_at DESC);
|
|
|
|
COMMENT ON TABLE content_nodes IS 'Content graph nodes: directories, files, chunks';
|
|
COMMENT ON TABLE content_edges IS 'Content graph edges: contains, derived_from, references, duplicates';
|
|
COMMENT ON TABLE extraction_log IS 'Tracks extraction history for incremental updates';
|
|
|
|
COMMENT ON COLUMN content_nodes.node_type IS 'directory, file, chunk, embedding';
|
|
COMMENT ON COLUMN content_nodes.content_hash IS 'Hash of extracted content (not file bytes)';
|
|
COMMENT ON COLUMN content_edges.edge_type IS 'contains, derived_from, references, duplicates, similar_to';
|