clean up code
This commit is contained in:
76
sql/migrations/003_content_graph.sql
Normal file
76
sql/migrations/003_content_graph.sql
Normal file
@@ -0,0 +1,76 @@
|
||||
CREATE TABLE IF NOT EXISTS content_nodes (
|
||||
id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
|
||||
node_type VARCHAR(50) NOT NULL,
|
||||
path TEXT NOT NULL,
|
||||
disk_label VARCHAR(50),
|
||||
parent_id UUID REFERENCES content_nodes(id) ON DELETE CASCADE,
|
||||
|
||||
checksum VARCHAR(64),
|
||||
size BIGINT,
|
||||
modified_time TIMESTAMP,
|
||||
|
||||
content_hash VARCHAR(64),
|
||||
extracted_at TIMESTAMP,
|
||||
extraction_method VARCHAR(100),
|
||||
|
||||
metadata JSONB,
|
||||
|
||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
||||
|
||||
UNIQUE(node_type, path, disk_label)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_content_nodes_type ON content_nodes(node_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_content_nodes_path ON content_nodes(path);
|
||||
CREATE INDEX IF NOT EXISTS idx_content_nodes_parent ON content_nodes(parent_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_content_nodes_checksum ON content_nodes(checksum);
|
||||
CREATE INDEX IF NOT EXISTS idx_content_nodes_content_hash ON content_nodes(content_hash);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS content_edges (
|
||||
id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
|
||||
source_id UUID NOT NULL REFERENCES content_nodes(id) ON DELETE CASCADE,
|
||||
target_id UUID NOT NULL REFERENCES content_nodes(id) ON DELETE CASCADE,
|
||||
edge_type VARCHAR(50) NOT NULL,
|
||||
|
||||
metadata JSONB,
|
||||
confidence FLOAT,
|
||||
|
||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
||||
|
||||
UNIQUE(source_id, target_id, edge_type)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_content_edges_source ON content_edges(source_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_content_edges_target ON content_edges(target_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_content_edges_type ON content_edges(edge_type);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS extraction_log (
|
||||
id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
|
||||
node_id UUID REFERENCES content_nodes(id) ON DELETE CASCADE,
|
||||
file_path TEXT NOT NULL,
|
||||
file_checksum VARCHAR(64),
|
||||
|
||||
extraction_method VARCHAR(100),
|
||||
status VARCHAR(50),
|
||||
error_message TEXT,
|
||||
|
||||
extracted_size BIGINT,
|
||||
processing_time_ms INT,
|
||||
|
||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_extraction_log_node ON extraction_log(node_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_extraction_log_file ON extraction_log(file_path);
|
||||
CREATE INDEX IF NOT EXISTS idx_extraction_log_checksum ON extraction_log(file_checksum);
|
||||
CREATE INDEX IF NOT EXISTS idx_extraction_log_status ON extraction_log(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_extraction_log_created ON extraction_log(created_at DESC);
|
||||
|
||||
COMMENT ON TABLE content_nodes IS 'Content graph nodes: directories, files, chunks';
|
||||
COMMENT ON TABLE content_edges IS 'Content graph edges: contains, derived_from, references, duplicates';
|
||||
COMMENT ON TABLE extraction_log IS 'Tracks extraction history for incremental updates';
|
||||
|
||||
COMMENT ON COLUMN content_nodes.node_type IS 'directory, file, chunk, embedding';
|
||||
COMMENT ON COLUMN content_nodes.content_hash IS 'Hash of extracted content (not file bytes)';
|
||||
COMMENT ON COLUMN content_edges.edge_type IS 'contains, derived_from, references, duplicates, similar_to';
|
||||
Reference in New Issue
Block a user