CREATE TABLE IF NOT EXISTS content_nodes ( id UUID DEFAULT gen_random_uuid() PRIMARY KEY, node_type VARCHAR(50) NOT NULL, path TEXT NOT NULL, disk_label VARCHAR(50), parent_id UUID REFERENCES content_nodes(id) ON DELETE CASCADE, checksum VARCHAR(64), size BIGINT, modified_time TIMESTAMP, content_hash VARCHAR(64), extracted_at TIMESTAMP, extraction_method VARCHAR(100), metadata JSONB, created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, UNIQUE(node_type, path, disk_label) ); CREATE INDEX IF NOT EXISTS idx_content_nodes_type ON content_nodes(node_type); CREATE INDEX IF NOT EXISTS idx_content_nodes_path ON content_nodes(path); CREATE INDEX IF NOT EXISTS idx_content_nodes_parent ON content_nodes(parent_id); CREATE INDEX IF NOT EXISTS idx_content_nodes_checksum ON content_nodes(checksum); CREATE INDEX IF NOT EXISTS idx_content_nodes_content_hash ON content_nodes(content_hash); CREATE TABLE IF NOT EXISTS content_edges ( id UUID DEFAULT gen_random_uuid() PRIMARY KEY, source_id UUID NOT NULL REFERENCES content_nodes(id) ON DELETE CASCADE, target_id UUID NOT NULL REFERENCES content_nodes(id) ON DELETE CASCADE, edge_type VARCHAR(50) NOT NULL, metadata JSONB, confidence FLOAT, created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, UNIQUE(source_id, target_id, edge_type) ); CREATE INDEX IF NOT EXISTS idx_content_edges_source ON content_edges(source_id); CREATE INDEX IF NOT EXISTS idx_content_edges_target ON content_edges(target_id); CREATE INDEX IF NOT EXISTS idx_content_edges_type ON content_edges(edge_type); CREATE TABLE IF NOT EXISTS extraction_log ( id UUID DEFAULT gen_random_uuid() PRIMARY KEY, node_id UUID REFERENCES content_nodes(id) ON DELETE CASCADE, file_path TEXT NOT NULL, file_checksum VARCHAR(64), extraction_method VARCHAR(100), status VARCHAR(50), error_message TEXT, extracted_size BIGINT, processing_time_ms INT, created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_extraction_log_node ON extraction_log(node_id); CREATE INDEX IF NOT EXISTS idx_extraction_log_file ON extraction_log(file_path); CREATE INDEX IF NOT EXISTS idx_extraction_log_checksum ON extraction_log(file_checksum); CREATE INDEX IF NOT EXISTS idx_extraction_log_status ON extraction_log(status); CREATE INDEX IF NOT EXISTS idx_extraction_log_created ON extraction_log(created_at DESC); COMMENT ON TABLE content_nodes IS 'Content graph nodes: directories, files, chunks'; COMMENT ON TABLE content_edges IS 'Content graph edges: contains, derived_from, references, duplicates'; COMMENT ON TABLE extraction_log IS 'Tracks extraction history for incremental updates'; COMMENT ON COLUMN content_nodes.node_type IS 'directory, file, chunk, embedding'; COMMENT ON COLUMN content_nodes.content_hash IS 'Hash of extracted content (not file bytes)'; COMMENT ON COLUMN content_edges.edge_type IS 'contains, derived_from, references, duplicates, similar_to';