init

2025-12-17 16:30:46 +01:00
parent c0ca907b01
commit 918e96ad21
14 changed files with 358 additions and 1951 deletions
--- a/transcribe_speakers.py
+++ b/transcribe_speakers.py
@@ -15,11 +15,13 @@ import json
 from datetime import datetime
 from concurrent.futures import ThreadPoolExecutor, as_completed

-# Choose your Whisper backend here:
-# For faster-whisper (recommended):
+# Whisper transcription (using faster-whisper for optimal performance)
 from faster_whisper import WhisperModel

-# LLM integration
+# Sentence extraction for stitching chunks
+from sentence_extractor import SentenceExtractor, SentenceCleaner
+
+# LLM integration (optional)
 try:
    import ollama
    OLLAMA_AVAILABLE = True
@@ -27,10 +29,6 @@ except ImportError:
    OLLAMA_AVAILABLE = False


-# # For regular whisper (comment out the line above and uncomment these):
-# import whisper
-
-
 class WindowsLoopbackAudioCapture:
    """Capture Windows speaker output using WASAPI loopback"""

@@ -170,12 +168,6 @@ class WhisperStreamTranscriber:
        self.audio_buffer = np.array([], dtype=np.float32)
        self.lock = threading.Lock()

-        # # REGULAR WHISPER:
-        # self.model = whisper.load_model(model_name)
-        # self.language = language
-        # self.audio_buffer = np.array([], dtype=np.float32)
-        # self.lock = threading.Lock()
-
    def add_audio(self, audio_chunk):
        """Add new audio data to buffer"""
        with self.lock:
@@ -222,19 +214,6 @@ class WhisperStreamTranscriber:
            print(f"❌ Transcription error: {e}")
            return None

-        # # REGULAR WHISPER:
-        # try:
-        #     result = self.model.transcribe(
-        #         audio_to_process,
-        #         language=self.language,
-        #         task="transcribe",
-        #         fp16=False
-        #     )
-        #     return result["text"].strip()
-        # except Exception as e:
-        #     print(f"❌ Transcription error: {e}")
-        #     return None
-

 class LocalLLMAnalyzer:
    """Local LLM for fact-checking and question generation using Ollama"""
@@ -536,6 +515,8 @@ Examples:
                        help="Ollama model to use for LLM analysis (default:  gpt-oss:20b)")
    parser.add_argument("--llm-debug", action="store_true",
                        help="Show LLM raw responses for debugging")
+    parser.add_argument("--sentence-mode", action="store_true",
+                        help="Extract complete sentences by stitching chunks together")

    args = parser.parse_args()

@@ -549,6 +530,8 @@ Examples:
        print(f"Output: {args.output}")
    if args.enable_llm:
        print(f"LLM Analysis: Enabled ({args.llm_model})")
+    if args.sentence_mode:
+        print(f"Sentence Mode: Enabled (stitching chunks into complete sentences)")

    # Initialize audio capture
    try:
@@ -591,6 +574,14 @@ Examples:
            print("Continuing without LLM analysis...")
            llm_analyzer = None

+    # Initialize sentence extractor (optional)
+    sentence_extractor = None
+    sentence_cleaner = None
+    if args.sentence_mode:
+        sentence_extractor = SentenceExtractor(max_buffer_words=150)
+        sentence_cleaner = SentenceCleaner()
+        print("✓ Sentence extraction initialized")
+
    # Main processing loop
    print(f"\n✅ Started transcription. Press Ctrl+C to stop.\n{'=' * 50}")
    last_process_time = time.time()
@@ -620,11 +611,45 @@ Examples:
                    segment_count += 1
                    timestamp = datetime.now().strftime("%H:%M:%S")

-                    # Display transcription immediately (don't wait for LLM)
-                    print(f"[{timestamp}] {text}")
+                    # Sentence extraction mode
+                    if sentence_extractor:
+                        # Add chunk to extractor and get complete sentences
+                        sentences = sentence_extractor.add_chunk(text)

-                    # LLM Analysis (run concurrently in background)
-                    if llm_analyzer:
+                        for sentence in sentences:
+                            # Clean the sentence
+                            cleaned = sentence_cleaner.clean(sentence) if sentence_cleaner else sentence
+                            if cleaned:
+                                print(f"[{timestamp}] 📝 {cleaned}")
+
+                                # Save individual sentences
+                                if args.output and not llm_analyzer:
+                                    save_transcript(cleaned, timestamp, args.output)
+
+                                # LLM analysis on complete sentences
+                                if llm_analyzer:
+                                    context = f"Sentence from segment {segment_count}"
+
+                                    def run_llm_analysis(txt, ctx, ts, seg_num):
+                                        fc = llm_analyzer.fact_check(txt, ctx)
+                                        qs = llm_analyzer.generate_augmenting_questions(txt, ctx)
+                                        return {
+                                            'timestamp': ts,
+                                            'text': txt,
+                                            'segment_count': seg_num,
+                                            'fact_check': fc,
+                                            'questions': qs
+                                        }
+
+                                    future = llm_executor.submit(run_llm_analysis, cleaned, context, timestamp, segment_count)
+                                    pending_llm_tasks[segment_count] = future
+                    else:
+                        # Standard mode: display chunks as-is
+                        # Display transcription immediately (don't wait for LLM)
+                        print(f"[{timestamp}] {text}")
+
+                    # LLM Analysis (run concurrently in background) - only in non-sentence mode
+                    if llm_analyzer and not sentence_extractor:
                        context = f"Segment {segment_count}"

                        # Submit LLM tasks to thread pool
@@ -701,6 +726,34 @@ Examples:
    # Cleanup
    capturer.close()

+    # Flush sentence buffer if in sentence mode
+    if sentence_extractor:
+        print("\n📝 Flushing sentence buffer...")
+        final_sentences = sentence_extractor.flush()
+        for sentence in final_sentences:
+            cleaned = sentence_cleaner.clean(sentence) if sentence_cleaner else sentence
+            if cleaned:
+                timestamp = datetime.now().strftime("%H:%M:%S")
+                print(f"[{timestamp}] 📝 {cleaned}")
+
+                if args.output and not llm_analyzer:
+                    save_transcript(cleaned, timestamp, args.output)
+
+                # LLM analysis for flushed sentences
+                if llm_analyzer:
+                    fact_check = llm_analyzer.fact_check(cleaned, "Final sentence")
+                    questions = llm_analyzer.generate_augmenting_questions(cleaned)
+                    display_enriched_output(cleaned, timestamp, fact_check, questions)
+
+                    if args.output:
+                        data = {
+                            'timestamp': timestamp,
+                            'text': cleaned,
+                            'fact_check': fact_check,
+                            'questions': questions
+                        }
+                        save_enriched_transcript(data, args.output)
+
    # Process remaining audio
    print("\nProcessing remaining audio...")
    final_text = transcriber.transcribe_chunk(min_duration=0)