init

2025-12-17 16:33:19 +01:00
commit ae818f0b4b
10 changed files with 2206 additions and 0 deletions
--- a/transcribe_speakers.py
+++ b/transcribe_speakers.py
@@ -0,0 +1,792 @@
+#!/usr/bin/env python3
+"""
+Real-time transcription of Windows speaker output using loopback capture.
+Captures system audio and transcribes with Whisper in near real-time.
+"""
+
+import sounddevice as sd
+import numpy as np
+import threading
+import queue
+import time
+import os
+import argparse
+import json
+from datetime import datetime
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+# Whisper transcription (using faster-whisper for optimal performance)
+from faster_whisper import WhisperModel
+
+# Sentence extraction for stitching chunks
+from sentence_extractor import SentenceExtractor, SentenceCleaner
+
+# LLM integration (optional)
+try:
+    import ollama
+    OLLAMA_AVAILABLE = True
+except ImportError:
+    OLLAMA_AVAILABLE = False
+
+
+class WindowsLoopbackAudioCapture:
+    """Capture Windows speaker output using WASAPI loopback"""
+
+    def __init__(self, device_name=None, sample_rate=16000, chunk_size=2048):
+        self.sample_rate = sample_rate
+        self.chunk_size = chunk_size
+
+        # Find loopback device
+        self.device_info = self._find_loopback_device(device_name)
+        if not self.device_info:
+            raise RuntimeError(
+                "No loopback device found.\n"
+                "1. Ensure your speakers/headphones are connected\n"
+                "2. Enable 'Stereo Mix' in Sound settings\n"
+                "3. Or install VB-Cable virtual audio device"
+            )
+
+        print(f"✓ Using device: {self.device_info['name']} (index {self.device_info['index']})")
+
+        # Queue for audio data
+        self.audio_queue = queue.Queue()
+        self.stop_event = threading.Event()
+
+        # Start the stream
+        try:
+            self.stream = sd.InputStream(
+                device=self.device_info['index'],
+                channels=1,
+                samplerate=sample_rate,
+                blocksize=chunk_size,
+                dtype='int16',
+                latency='low',
+                callback=self._audio_callback
+            )
+            self.stream.start()
+            print("✓ Audio capture stream started")
+        except Exception as e:
+            raise RuntimeError(f"Failed to start audio stream: {e}")
+
+    def _find_loopback_device(self, device_name):
+        """Find the speaker device with loopback capability"""
+        devices = sd.query_devices()
+
+        # If device name specified, find exact match
+        if device_name:
+            for dev in devices:
+                if (device_name.lower() in dev['name'].lower() and
+                        dev['max_input_channels'] > 0):
+                    return dev
+
+        # Auto-detect: look for WASAPI speakers/headphones
+        for dev in devices:
+            if (dev['max_input_channels'] > 0 and
+                    any(x in dev['name'] for x in ['Speakers', 'Headphones', 'Output'])):
+                return dev
+
+        # Fallback: Stereo Mix or similar
+        for dev in devices:
+            if 'Stereo Mix' in dev['name']:
+                return dev
+
+        return None
+
+    def _audio_callback(self, indata, frames, time_info, status):
+        """Callback for audio data"""
+        if status:
+            print(f"⚠ Audio status: {status}")
+        self.audio_queue.put(indata.copy())
+
+    def read_chunk(self):
+        """Read audio data from queue"""
+        try:
+            return self.audio_queue.get(timeout=0.05).flatten()
+        except queue.Empty:
+            return None
+
+    def close(self):
+        """Cleanup resources"""
+        if hasattr(self, 'stream'):
+            self.stream.stop()
+            self.stream.close()
+
+
+class WhisperStreamTranscriber:
+    """Process audio chunks with Whisper/faster-whisper"""
+
+    def __init__(self, model_name="base", language="en", force_cpu=False, device_index=0):
+        print(f"Loading Whisper model '{model_name}'...")
+
+        # Check for CUDA availability
+        import torch
+        has_cuda = torch.cuda.is_available() and not force_cpu
+
+        # Force CPU if CUDA libraries incompatible
+        device = "cpu"
+        compute_type = "int8"
+
+        if has_cuda:
+            try:
+                # Test if CTranslate2 can actually use CUDA
+                import ctranslate2
+                cuda_count = ctranslate2.get_cuda_device_count()
+                if cuda_count > 0:
+                    # Validate device index
+                    if device_index >= cuda_count:
+                        print(f"⚠️  GPU index {device_index} not available. Found {cuda_count} GPU(s). Using GPU 0.")
+                        device_index = 0
+
+                    # CTranslate2 uses "cuda" + device_index parameter, not "cuda:N"
+                    device = "cuda"
+                    compute_type = "float16"
+                    print(f"Using device: cuda:{device_index} ({torch.cuda.get_device_name(device_index)})")
+                else:
+                    print(f"CUDA available in PyTorch but not in CTranslate2. Using CPU.")
+                    device = "cpu"
+                    compute_type = "int8"
+            except Exception as e:
+                print(f"CUDA libraries not found ({e}). Using CPU.")
+                device = "cpu"
+                compute_type = "int8"
+        else:
+            print("Using device: cpu")
+
+        # FASTER-WHISPER (recommended):
+        model_kwargs = {
+            "device": device,
+            "compute_type": compute_type
+        }
+
+        if device == "cuda":
+            model_kwargs["device_index"] = device_index
+        elif device == "cpu":
+            model_kwargs["cpu_threads"] = 4
+
+        self.model = WhisperModel(model_name, **model_kwargs)
+        self.language = language
+        self.audio_buffer = np.array([], dtype=np.float32)
+        self.lock = threading.Lock()
+
+    def add_audio(self, audio_chunk):
+        """Add new audio data to buffer"""
+        with self.lock:
+            audio_float = audio_chunk.astype(np.float32) / 32768.0
+            self.audio_buffer = np.concatenate([self.audio_buffer, audio_float])
+
+    def transcribe_chunk(self, min_duration=5.0, fast_mode=False):
+        """Transcribe accumulated audio if enough duration"""
+        with self.lock:
+            duration = len(self.audio_buffer) / 16000
+            if duration < min_duration:
+                return None
+
+            audio_to_process = self.audio_buffer.copy()
+            self.audio_buffer = np.array([], dtype=np.float32)
+
+        # Process with FASTER-WHISPER:
+        try:
+            # Optimize parameters for speed vs accuracy
+            if fast_mode:
+                # Fast mode: lower beam size, no VAD
+                segments, _ = self.model.transcribe(
+                    audio_to_process,
+                    language=self.language,
+                    beam_size=1,  # Greedy decoding (fastest)
+                    best_of=1,
+                    temperature=0.0,
+                    vad_filter=False,
+                    word_timestamps=False
+                )
+            else:
+                # Balanced mode: moderate beam size with VAD
+                segments, _ = self.model.transcribe(
+                    audio_to_process,
+                    language=self.language,
+                    beam_size=3,  # Reduced from 5
+                    vad_filter=True,
+                    vad_parameters=dict(min_silence_duration_ms=500),
+                    word_timestamps=False
+                )
+            text = " ".join([segment.text for segment in segments]).strip()
+            return text if text else None
+        except Exception as e:
+            print(f"❌ Transcription error: {e}")
+            return None
+
+
+class LocalLLMAnalyzer:
+    """Local LLM for fact-checking and question generation using Ollama"""
+
+    def __init__(self, model="llama3.2", debug=False):
+        if not OLLAMA_AVAILABLE:
+            raise RuntimeError(
+                "Ollama package not installed.\n"
+                "Install with: pip install ollama"
+            )
+
+        self.model = model
+        self.debug = debug
+        self._test_connection()
+
+    def _test_connection(self):
+        """Test connection to Ollama service"""
+        try:
+            ollama.list()
+            print(f"✓ Ollama connected using model: {self.model}")
+        except Exception as e:
+            raise RuntimeError(
+                f"Cannot connect to Ollama. Ensure it's installed and running.\n"
+                f"Error: {e}\n"
+                f"Install from: https://ollama.ai\n"
+                f"Then run: ollama pull {self.model}"
+            )
+
+    def _extract_json(self, text):
+        """Extract JSON from text that might contain markdown or other formatting"""
+        # Try to find JSON block in markdown code fence
+        import re
+        json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', text, re.DOTALL)
+        if json_match:
+            return json_match.group(1)
+
+        # Try to find raw JSON object
+        json_match = re.search(r'\{.*\}', text, re.DOTALL)
+        if json_match:
+            return json_match.group(0)
+
+        return text
+
+    def fact_check(self, text, context=""):
+        """Analyze text for factual accuracy"""
+        # Try simple structured format first
+        prompt = f"""Analyze this for accuracy. Reply in this exact format:
+
+VERDICT: [factual/dubious/not_factual]
+CONFIDENCE: [0.0-1.0]
+EXPLANATION: [one sentence]
+
+Statement: "{text}"
+"""
+
+        try:
+            response = ollama.generate(
+                model=self.model,
+                prompt=prompt,
+                options={"temperature": 0.1, "num_predict": 250}
+            )
+
+            response_text = response['response'].strip()
+
+            if self.debug:
+                print(f"\n[DEBUG] Fact-check response:\n{response_text}\n")
+
+            # Try to parse structured text format
+            verdict = "dubious"
+            confidence = 0.5
+            explanation = "No explanation provided"
+
+            # Extract VERDICT
+            import re
+            verdict_match = re.search(r'VERDICT:\s*(\w+)', response_text, re.IGNORECASE)
+            if verdict_match:
+                verdict = verdict_match.group(1).lower()
+
+            # Extract CONFIDENCE
+            conf_match = re.search(r'CONFIDENCE:\s*([\d.]+)', response_text, re.IGNORECASE)
+            if conf_match:
+                try:
+                    confidence = float(conf_match.group(1))
+                    confidence = max(0.0, min(1.0, confidence))  # Clamp to 0-1
+                except ValueError:
+                    pass
+
+            # Extract EXPLANATION
+            expl_match = re.search(r'EXPLANATION:\s*(.+?)(?:\n\n|\Z)', response_text, re.IGNORECASE | re.DOTALL)
+            if expl_match:
+                explanation = expl_match.group(1).strip()
+                # Clean up incomplete sentences
+                if explanation and not explanation[-1] in '.!?':
+                    # Try to find last complete sentence
+                    last_period = max(explanation.rfind('.'), explanation.rfind('!'), explanation.rfind('?'))
+                    if last_period > 20:  # Keep at least some text
+                        explanation = explanation[:last_period + 1]
+
+            return {
+                "verdict": verdict,
+                "confidence": confidence,
+                "explanation": explanation[:250] if explanation else "Analysis completed",
+                "sources": [],
+                "corrections": ""
+            }
+
+        except Exception as e:
+            if self.debug:
+                print(f"[DEBUG] Fact-check error: {e}")
+            return {
+                "verdict": "error",
+                "confidence": 0.0,
+                "explanation": f"Analysis failed: {str(e)}",
+                "sources": [],
+                "corrections": ""
+            }
+
+    def generate_augmenting_questions(self, text, context=""):
+        """Generate insightful questions based on the text"""
+        prompt = f"""Generate 3 questions about this. Reply in this exact format:
+
+Q1: [question]
+Q2: [question]
+Q3: [question]
+
+Statement: "{text}"
+"""
+
+        try:
+            response = ollama.generate(
+                model=self.model,
+                prompt=prompt,
+                options={"temperature": 0.7, "num_predict": 250}
+            )
+
+            response_text = response['response'].strip()
+
+            if self.debug:
+                print(f"\n[DEBUG] Questions response:\n{response_text}\n")
+
+            # Extract questions
+            import re
+            questions = []
+            for i in range(1, 4):
+                q_match = re.search(rf'Q{i}:\s*(.+?)(?:\n|$)', response_text, re.IGNORECASE)
+                if q_match:
+                    question = q_match.group(1).strip()
+                    # Clean up incomplete questions
+                    if question and not question[-1] in '?':
+                        # Try to find last complete question
+                        last_q = question.rfind('?')
+                        if last_q > 10:
+                            question = question[:last_q + 1]
+                        else:
+                            question = question + "?"
+                    questions.append(question)
+
+            # If we couldn't parse, try to split by newlines and take first 3 non-empty lines
+            if len(questions) < 3:
+                lines = [line.strip() for line in response_text.split('\n') if line.strip()]
+                # Filter out lines that look like question markers
+                lines = [l for l in lines if not re.match(r'^Q\d+:?\s*$', l)]
+                for line in lines[:3]:
+                    if not line.endswith('?'):
+                        line = line + "?"
+                    questions.append(line)
+
+            # Ensure we have exactly 3 questions
+            default_questions = [
+                "What are the key points here?",
+                "What evidence supports this?",
+                "What are the implications?"
+            ]
+            while len(questions) < 3:
+                questions.append(default_questions[len(questions)])
+
+            return {
+                "questions": questions[:3],
+                "topics": []
+            }
+
+        except Exception as e:
+            if self.debug:
+                print(f"[DEBUG] Questions error: {e}")
+            return {
+                "questions": [
+                    "What are the key points?",
+                    "What supports this claim?",
+                    "What are the implications?"
+                ],
+                "topics": []
+            }
+
+
+def list_audio_devices():
+    """Print all available audio input devices"""
+    print("\nAvailable audio capture devices:")
+    devices = sd.query_devices()
+    for i, dev in enumerate(devices):
+        if dev['max_input_channels'] > 0:
+            print(f"  [{i}] {dev['name']}")
+            print(f"      Channels: {dev['max_input_channels']} | Sample Rate: {dev['default_samplerate']}")
+    print()
+
+
+def save_transcript(text, timestamp, filename):
+    """Append transcript to file"""
+    os.makedirs(os.path.dirname(filename) if os.path.dirname(filename) else '.', exist_ok=True)
+    with open(filename, "a", encoding="utf-8") as f:
+        f.write(f"[{timestamp}] {text}\n")
+
+
+def save_enriched_transcript(data, filename):
+    """Save enriched transcript with LLM analysis"""
+    os.makedirs(os.path.dirname(filename) if os.path.dirname(filename) else '.', exist_ok=True)
+    with open(filename, "a", encoding="utf-8") as f:
+        f.write(f"\n{'='*70}\n")
+        f.write(f"[{data['timestamp']}] {data['text']}\n\n")
+
+        if 'fact_check' in data:
+            fc = data['fact_check']
+            f.write(f"📊 Fact Check: {fc.get('verdict', 'N/A').upper()} "
+                   f"(confidence: {fc.get('confidence', 0):.2f})\n")
+            f.write(f"💡 {fc.get('explanation', 'N/A')}\n")
+            if fc.get('corrections'):
+                f.write(f"✏️  Correction: {fc['corrections']}\n")
+            f.write("\n")
+
+        if 'questions' in data and data['questions'].get('questions'):
+            f.write("❓ Questions:\n")
+            for i, q in enumerate(data['questions']['questions'], 1):
+                f.write(f"{i}. {q}\n")
+            f.write("\n")
+
+
+def display_enriched_output(text, timestamp, fact_check=None, questions=None):
+    """Display transcript with LLM analysis"""
+    print(f"\n[{timestamp}] {text}")
+
+    if fact_check:
+        verdict_emoji = {
+            'factual': '✅',
+            'dubious': '⚠️',
+            'not_factual': '❌',
+            'error': '⚠️'
+        }
+        emoji = verdict_emoji.get(fact_check.get('verdict', 'error'), '❓')
+
+        print(f"\n{emoji} Fact Check: {fact_check.get('verdict', 'N/A').upper()} "
+              f"(confidence: {fact_check.get('confidence', 0):.2f})")
+        print(f"💡 {fact_check.get('explanation', 'N/A')}")
+
+        if fact_check.get('corrections'):
+            print(f"✏️  Correction: {fact_check['corrections']}")
+
+    if questions and questions.get('questions'):
+        print(f"\n❓ Questions:")
+        for i, q in enumerate(questions['questions'], 1):
+            print(f"  {i}. {q}")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Real-time transcription of Windows speaker output",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python transcribe_speakers.py
+  python transcribe_speakers.py --model small --language es --interval 5
+  python transcribe_speakers.py --device "Speakers" --output "meeting.txt"
+  python transcribe_speakers.py --model medium --interval 10 --output transcripts/live.txt
+        """
+    )
+
+    parser.add_argument("--model", default="base",
+                        choices=["tiny", "base", "small", "medium", "large"],
+                        help="Whisper model size (default: base)")
+    parser.add_argument("--language", default="en",
+                        help="Language code (default: en)")
+    parser.add_argument("--device", metavar="NAME",
+                        help="Audio device name (partial match). If not specified, auto-detects")
+    parser.add_argument("--interval", type=float, default=8.0,
+                        help="Processing interval in seconds (default: 8.0)")
+    parser.add_argument("--min-duration", type=float, default=3.0,
+                        help="Minimum audio duration before transcription (default: 3.0)")
+    parser.add_argument("--fast-mode", action="store_true",
+                        help="Enable fast mode (lower accuracy, faster transcription)")
+    parser.add_argument("--output", "-o", metavar="FILE",
+                        help="Save transcript to file (e.g., transcript.txt)")
+    parser.add_argument("--list-devices", action="store_true",
+                        help="List all available audio devices and exit")
+    parser.add_argument("--force-cpu", action="store_true",
+                        help="Force CPU processing (disable GPU acceleration)")
+    parser.add_argument("--gpu-index", type=int, default=0,
+                        help="GPU device index to use (default: 0)")
+    parser.add_argument("--enable-llm", action="store_true",
+                        help="Enable LLM analysis (fact-checking and questions)")
+    parser.add_argument("--llm-model", default="gpt-oss:20b",
+                        help="Ollama model to use for LLM analysis (default:  gpt-oss:20b)")
+    parser.add_argument("--llm-debug", action="store_true",
+                        help="Show LLM raw responses for debugging")
+    parser.add_argument("--sentence-mode", action="store_true",
+                        help="Extract complete sentences by stitching chunks together")
+
+    args = parser.parse_args()
+
+    if args.list_devices:
+        list_audio_devices()
+        return
+
+    print("=== Windows Real-Time Audio Transcription ===")
+    print(f"Model: {args.model} | Language: {args.language} | Interval: {args.interval}s")
+    if args.output:
+        print(f"Output: {args.output}")
+    if args.enable_llm:
+        print(f"LLM Analysis: Enabled ({args.llm_model})")
+    if args.sentence_mode:
+        print(f"Sentence Mode: Enabled (stitching chunks into complete sentences)")
+
+    # Initialize audio capture
+    try:
+        capturer = WindowsLoopbackAudioCapture(
+            device_name=args.device,
+            sample_rate=16000,
+            chunk_size=2048
+        )
+    except RuntimeError as e:
+        print(f"\n❌ Audio Error: {e}")
+        print("\nTo fix this:")
+        print("1. Right-click speaker icon → Sounds → Recording tab")
+        print("2. Right-click in empty area → Show Disabled Devices")
+        print("3. Enable 'Stereo Mix' → Set as Default Device")
+        print("\nAlternative: Install VB-Cable (free) from vb-audio.com")
+        print("   Then use: --device 'CABLE Output'")
+        list_audio_devices()
+        return
+
+    # Initialize transcriber
+    try:
+        transcriber = WhisperStreamTranscriber(
+            model_name=args.model,
+            language=args.language,
+            force_cpu=args.force_cpu,
+            device_index=args.gpu_index
+        )
+    except Exception as e:
+        print(f"\n❌ Model Error: {e}")
+        print("Make sure you installed Whisper correctly")
+        return
+
+    # Initialize LLM analyzer (optional)
+    llm_analyzer = None
+    if args.enable_llm:
+        try:
+            llm_analyzer = LocalLLMAnalyzer(model=args.llm_model, debug=args.llm_debug)
+        except RuntimeError as e:
+            print(f"\n❌ LLM Error: {e}")
+            print("Continuing without LLM analysis...")
+            llm_analyzer = None
+
+    # Initialize sentence extractor (optional)
+    sentence_extractor = None
+    sentence_cleaner = None
+    if args.sentence_mode:
+        sentence_extractor = SentenceExtractor(max_buffer_words=150)
+        sentence_cleaner = SentenceCleaner()
+        print("✓ Sentence extraction initialized")
+
+    # Main processing loop
+    print(f"\n✅ Started transcription. Press Ctrl+C to stop.\n{'=' * 50}")
+    last_process_time = time.time()
+    total_duration = 0
+    segment_count = 0
+
+    # Thread pool for concurrent LLM processing
+    llm_executor = ThreadPoolExecutor(max_workers=2) if llm_analyzer else None
+    pending_llm_tasks = {}  # Maps segment_count -> future
+
+    try:
+        while True:
+            # Collect audio
+            chunk = capturer.read_chunk()
+            if chunk is not None:
+                transcriber.add_audio(chunk)
+                total_duration += len(chunk) / 16000
+
+            # Process at intervals
+            current_time = time.time()
+            if current_time - last_process_time >= args.interval:
+                text = transcriber.transcribe_chunk(
+                    min_duration=args.min_duration,
+                    fast_mode=args.fast_mode
+                )
+                if text:
+                    segment_count += 1
+                    timestamp = datetime.now().strftime("%H:%M:%S")
+
+                    # Sentence extraction mode
+                    if sentence_extractor:
+                        # Add chunk to extractor and get complete sentences
+                        sentences = sentence_extractor.add_chunk(text)
+
+                        for sentence in sentences:
+                            # Clean the sentence
+                            cleaned = sentence_cleaner.clean(sentence) if sentence_cleaner else sentence
+                            if cleaned:
+                                print(f"[{timestamp}] 📝 {cleaned}")
+
+                                # Save individual sentences
+                                if args.output and not llm_analyzer:
+                                    save_transcript(cleaned, timestamp, args.output)
+
+                                # LLM analysis on complete sentences
+                                if llm_analyzer:
+                                    context = f"Sentence from segment {segment_count}"
+
+                                    def run_llm_analysis(txt, ctx, ts, seg_num):
+                                        fc = llm_analyzer.fact_check(txt, ctx)
+                                        qs = llm_analyzer.generate_augmenting_questions(txt, ctx)
+                                        return {
+                                            'timestamp': ts,
+                                            'text': txt,
+                                            'segment_count': seg_num,
+                                            'fact_check': fc,
+                                            'questions': qs
+                                        }
+
+                                    future = llm_executor.submit(run_llm_analysis, cleaned, context, timestamp, segment_count)
+                                    pending_llm_tasks[segment_count] = future
+                    else:
+                        # Standard mode: display chunks as-is
+                        # Display transcription immediately (don't wait for LLM)
+                        print(f"[{timestamp}] {text}")
+
+                    # LLM Analysis (run concurrently in background) - only in non-sentence mode
+                    if llm_analyzer and not sentence_extractor:
+                        context = f"Segment {segment_count}"
+
+                        # Submit LLM tasks to thread pool
+                        def run_llm_analysis(txt, ctx, ts, seg_num):
+                            fc = llm_analyzer.fact_check(txt, ctx)
+                            qs = llm_analyzer.generate_augmenting_questions(txt, ctx)
+                            return {
+                                'timestamp': ts,
+                                'text': txt,
+                                'segment_count': seg_num,
+                                'fact_check': fc,
+                                'questions': qs
+                            }
+
+                        future = llm_executor.submit(run_llm_analysis, text, context, timestamp, segment_count)
+                        pending_llm_tasks[segment_count] = future
+                    else:
+                        # Save transcript immediately without LLM
+                        if args.output:
+                            save_transcript(text, timestamp, args.output)
+
+                last_process_time = current_time
+
+            # Check for completed LLM tasks (non-blocking)
+            if llm_analyzer:
+                completed_segments = []
+                for seg_num, future in pending_llm_tasks.items():
+                    if future.done():
+                        try:
+                            result = future.result()
+                            # Display enriched output
+                            display_enriched_output(
+                                result['text'],
+                                result['timestamp'],
+                                result['fact_check'],
+                                result['questions']
+                            )
+                            # Save enriched output
+                            if args.output:
+                                save_enriched_transcript(result, args.output)
+                            completed_segments.append(seg_num)
+                        except Exception as e:
+                            print(f"⚠️  LLM processing error for segment {seg_num}: {e}")
+                            completed_segments.append(seg_num)
+
+                # Remove completed tasks
+                for seg_num in completed_segments:
+                    del pending_llm_tasks[seg_num]
+
+    except KeyboardInterrupt:
+        print(f"\n{'=' * 50}\n🛑 Stopping transcription...")
+
+    # Wait for pending LLM tasks to complete
+    if llm_analyzer and pending_llm_tasks:
+        print(f"\n⏳ Waiting for {len(pending_llm_tasks)} pending LLM tasks to complete...")
+        for seg_num, future in pending_llm_tasks.items():
+            try:
+                result = future.result(timeout=30)
+                display_enriched_output(
+                    result['text'],
+                    result['timestamp'],
+                    result['fact_check'],
+                    result['questions']
+                )
+                if args.output:
+                    save_enriched_transcript(result, args.output)
+            except Exception as e:
+                print(f"⚠️  LLM task {seg_num} failed: {e}")
+
+    # Shutdown executor
+    if llm_executor:
+        llm_executor.shutdown(wait=True)
+
+    # Cleanup
+    capturer.close()
+
+    # Flush sentence buffer if in sentence mode
+    if sentence_extractor:
+        print("\n📝 Flushing sentence buffer...")
+        final_sentences = sentence_extractor.flush()
+        for sentence in final_sentences:
+            cleaned = sentence_cleaner.clean(sentence) if sentence_cleaner else sentence
+            if cleaned:
+                timestamp = datetime.now().strftime("%H:%M:%S")
+                print(f"[{timestamp}] 📝 {cleaned}")
+
+                if args.output and not llm_analyzer:
+                    save_transcript(cleaned, timestamp, args.output)
+
+                # LLM analysis for flushed sentences
+                if llm_analyzer:
+                    fact_check = llm_analyzer.fact_check(cleaned, "Final sentence")
+                    questions = llm_analyzer.generate_augmenting_questions(cleaned)
+                    display_enriched_output(cleaned, timestamp, fact_check, questions)
+
+                    if args.output:
+                        data = {
+                            'timestamp': timestamp,
+                            'text': cleaned,
+                            'fact_check': fact_check,
+                            'questions': questions
+                        }
+                        save_enriched_transcript(data, args.output)
+
+    # Process remaining audio
+    print("\nProcessing remaining audio...")
+    final_text = transcriber.transcribe_chunk(min_duration=0)
+    if final_text:
+        timestamp = datetime.now().strftime("%H:%M:%S")
+        print(f"[{timestamp}] {final_text}")
+
+        # LLM Analysis for final segment (synchronous since we're shutting down)
+        if llm_analyzer:
+            fact_check = llm_analyzer.fact_check(final_text, "Final segment")
+            questions = llm_analyzer.generate_augmenting_questions(final_text)
+
+            display_enriched_output(final_text, timestamp, fact_check, questions)
+
+            if args.output:
+                data = {
+                    'timestamp': timestamp,
+                    'text': final_text,
+                    'fact_check': fact_check,
+                    'questions': questions
+                }
+                save_enriched_transcript(data, args.output)
+        else:
+            if args.output:
+                save_transcript(final_text, timestamp, args.output)
+
+    # Summary
+    print(f"\n✅ Complete! Processed {total_duration:.1f}s of audio")
+    print(f"   Generated {segment_count} transcript segments")
+    if args.output and os.path.exists(args.output):
+        abs_path = os.path.abspath(args.output)
+        print(f"💾 Transcript saved to: {abs_path}")
+
+
+if __name__ == "__main__":
+    main()