ALTER TABLE files ADD COLUMN IF NOT EXISTS fts_vector tsvector; CREATE INDEX IF NOT EXISTS idx_files_fts ON files USING GIN(fts_vector); CREATE OR REPLACE FUNCTION files_fts_update() RETURNS trigger AS $$ BEGIN NEW.fts_vector := setweight(to_tsvector('english', COALESCE(NEW.path, '')), 'A') || setweight(to_tsvector('english', COALESCE(NEW.extracted_text, '')), 'B'); RETURN NEW; END; $$ LANGUAGE plpgsql; CREATE TRIGGER files_fts_trigger BEFORE INSERT OR UPDATE OF path, extracted_text ON files FOR EACH ROW EXECUTE FUNCTION files_fts_update(); UPDATE files SET fts_vector = setweight(to_tsvector('english', COALESCE(path, '')), 'A') || setweight(to_tsvector('english', COALESCE(extracted_text, '')), 'B') WHERE extracted_text IS NOT NULL AND fts_vector IS NULL; CREATE TABLE IF NOT EXISTS content_chunks ( id UUID DEFAULT gen_random_uuid() PRIMARY KEY, node_id UUID REFERENCES content_nodes(id) ON DELETE CASCADE, file_path TEXT NOT NULL, disk_label VARCHAR(50), chunk_id VARCHAR(32) NOT NULL, chunk_index INT NOT NULL, chunk_text TEXT NOT NULL, chunk_size INT, offset_start INT, offset_end INT, section_title TEXT, metadata JSONB, fts_vector tsvector, created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, UNIQUE(file_path, chunk_id) ); CREATE INDEX IF NOT EXISTS idx_chunks_node ON content_chunks(node_id); CREATE INDEX IF NOT EXISTS idx_chunks_file ON content_chunks(file_path); CREATE INDEX IF NOT EXISTS idx_chunks_fts ON content_chunks USING GIN(fts_vector); CREATE INDEX IF NOT EXISTS idx_chunks_metadata ON content_chunks USING GIN(metadata); CREATE OR REPLACE FUNCTION chunks_fts_update() RETURNS trigger AS $$ BEGIN NEW.fts_vector := setweight(to_tsvector('english', COALESCE(NEW.section_title, '')), 'A') || setweight(to_tsvector('english', COALESCE(NEW.chunk_text, '')), 'B'); RETURN NEW; END; $$ LANGUAGE plpgsql; CREATE TRIGGER chunks_fts_trigger BEFORE INSERT OR UPDATE OF chunk_text, section_title ON content_chunks FOR EACH ROW EXECUTE FUNCTION chunks_fts_update(); CREATE TABLE IF NOT EXISTS directory_index ( id UUID DEFAULT gen_random_uuid() PRIMARY KEY, dir_path TEXT NOT NULL UNIQUE, disk_label VARCHAR(50), file_count INT DEFAULT 0, total_size BIGINT DEFAULT 0, indexed_files INT DEFAULT 0, aggregated_text TEXT, fts_vector tsvector, bm25_stats JSONB, created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_dir_index_path ON directory_index(dir_path); CREATE INDEX IF NOT EXISTS idx_dir_index_disk ON directory_index(disk_label); CREATE INDEX IF NOT EXISTS idx_dir_index_fts ON directory_index USING GIN(fts_vector); COMMENT ON TABLE content_chunks IS 'Structured chunks for BM25 and vector search'; COMMENT ON TABLE directory_index IS 'Aggregated directory-level BM25 index'; COMMENT ON COLUMN content_chunks.chunk_id IS 'Stable hash-based chunk identifier'; COMMENT ON COLUMN content_chunks.offset_start IS 'Character offset or paragraph index'; COMMENT ON COLUMN content_chunks.section_title IS 'Heading/function name for structured chunks';