verbatim-dicta/transcribe_speakers.py

#!/usr/bin/env python3
"""
Real-time transcription of Windows speaker output using loopback capture.
Captures system audio and transcribes with Whisper in near real-time.
"""

import sounddevice as sd
import numpy as np
import threading
import queue
import time
import os
import argparse
import json
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed

# Whisper transcription (using faster-whisper for optimal performance)
from faster_whisper import WhisperModel

# Sentence extraction for stitching chunks
from sentence_extractor import SentenceExtractor, SentenceCleaner

# LLM integration (optional)
try:
    import ollama
    OLLAMA_AVAILABLE = True
except ImportError:
    OLLAMA_AVAILABLE = False


class WindowsLoopbackAudioCapture:
    """Capture Windows speaker output using WASAPI loopback"""

    def __init__(self, device_name=None, sample_rate=16000, chunk_size=2048):
        self.sample_rate = sample_rate
        self.chunk_size = chunk_size

        # Find loopback device
        self.device_info = self._find_loopback_device(device_name)
        if not self.device_info:
            raise RuntimeError(
                "No loopback device found.\n"
                "1. Ensure your speakers/headphones are connected\n"
                "2. Enable 'Stereo Mix' in Sound settings\n"
                "3. Or install VB-Cable virtual audio device"
            )

        print(f"✓ Using device: {self.device_info['name']} (index {self.device_info['index']})")

        # Queue for audio data
        self.audio_queue = queue.Queue()
        self.stop_event = threading.Event()

        # Start the stream
        try:
            self.stream = sd.InputStream(
                device=self.device_info['index'],
                channels=1,
                samplerate=sample_rate,
                blocksize=chunk_size,
                dtype='int16',
                latency='low',
                callback=self._audio_callback
            )
            self.stream.start()
            print("✓ Audio capture stream started")
        except Exception as e:
            raise RuntimeError(f"Failed to start audio stream: {e}")

    def _find_loopback_device(self, device_name):
        """Find the speaker device with loopback capability"""
        devices = sd.query_devices()

        # If device name specified, find exact match
        if device_name:
            for dev in devices:
                if (device_name.lower() in dev['name'].lower() and
                        dev['max_input_channels'] > 0):
                    return dev

        # Auto-detect: look for WASAPI speakers/headphones
        for dev in devices:
            if (dev['max_input_channels'] > 0 and
                    any(x in dev['name'] for x in ['Speakers', 'Headphones', 'Output'])):
                return dev

        # Fallback: Stereo Mix or similar
        for dev in devices:
            if 'Stereo Mix' in dev['name']:
                return dev

        return None

    def _audio_callback(self, indata, frames, time_info, status):
        """Callback for audio data"""
        if status:
            print(f"⚠ Audio status: {status}")
        self.audio_queue.put(indata.copy())

    def read_chunk(self):
        """Read audio data from queue"""
        try:
            return self.audio_queue.get(timeout=0.05).flatten()
        except queue.Empty:
            return None

    def close(self):
        """Cleanup resources"""
        if hasattr(self, 'stream'):
            self.stream.stop()
            self.stream.close()


class WhisperStreamTranscriber:
    """Process audio chunks with Whisper/faster-whisper"""

    def __init__(self, model_name="base", language="en", force_cpu=False, device_index=0):
        print(f"Loading Whisper model '{model_name}'...")

        # Check for CUDA availability
        import torch
        has_cuda = torch.cuda.is_available() and not force_cpu

        # Force CPU if CUDA libraries incompatible
        device = "cpu"
        compute_type = "int8"

        if has_cuda:
            try:
                # Test if CTranslate2 can actually use CUDA
                import ctranslate2
                cuda_count = ctranslate2.get_cuda_device_count()
                if cuda_count > 0:
                    # Validate device index
                    if device_index >= cuda_count:
                        print(f"⚠️  GPU index {device_index} not available. Found {cuda_count} GPU(s). Using GPU 0.")
                        device_index = 0

                    # CTranslate2 uses "cuda" + device_index parameter, not "cuda:N"
                    device = "cuda"
                    compute_type = "float16"
                    print(f"Using device: cuda:{device_index} ({torch.cuda.get_device_name(device_index)})")
                else:
                    print(f"CUDA available in PyTorch but not in CTranslate2. Using CPU.")
                    device = "cpu"
                    compute_type = "int8"
            except Exception as e:
                print(f"CUDA libraries not found ({e}). Using CPU.")
                device = "cpu"
                compute_type = "int8"
        else:
            print("Using device: cpu")

        # FASTER-WHISPER (recommended):
        model_kwargs = {
            "device": device,
            "compute_type": compute_type
        }

        if device == "cuda":
            model_kwargs["device_index"] = device_index
        elif device == "cpu":
            model_kwargs["cpu_threads"] = 4

        self.model = WhisperModel(model_name, **model_kwargs)
        self.language = language
        self.audio_buffer = np.array([], dtype=np.float32)
        self.lock = threading.Lock()

    def add_audio(self, audio_chunk):
        """Add new audio data to buffer"""
        with self.lock:
            audio_float = audio_chunk.astype(np.float32) / 32768.0
            self.audio_buffer = np.concatenate([self.audio_buffer, audio_float])

    def transcribe_chunk(self, min_duration=5.0, fast_mode=False):
        """Transcribe accumulated audio if enough duration"""
        with self.lock:
            duration = len(self.audio_buffer) / 16000
            if duration < min_duration:
                return None

            audio_to_process = self.audio_buffer.copy()
            self.audio_buffer = np.array([], dtype=np.float32)

        # Process with FASTER-WHISPER:
        try:
            # Optimize parameters for speed vs accuracy
            if fast_mode:
                # Fast mode: lower beam size, no VAD
                segments, _ = self.model.transcribe(
                    audio_to_process,
                    language=self.language,
                    beam_size=1,  # Greedy decoding (fastest)
                    best_of=1,
                    temperature=0.0,
                    vad_filter=False,
                    word_timestamps=False
                )
            else:
                # Balanced mode: moderate beam size with VAD
                segments, _ = self.model.transcribe(
                    audio_to_process,
                    language=self.language,
                    beam_size=3,  # Reduced from 5
                    vad_filter=True,
                    vad_parameters=dict(min_silence_duration_ms=500),
                    word_timestamps=False
                )
            text = " ".join([segment.text for segment in segments]).strip()
            return text if text else None
        except Exception as e:
            print(f"❌ Transcription error: {e}")
            return None


class LocalLLMAnalyzer:
    """Local LLM for fact-checking and question generation using Ollama"""

    def __init__(self, model="llama3.2", debug=False):
        if not OLLAMA_AVAILABLE:
            raise RuntimeError(
                "Ollama package not installed.\n"
                "Install with: pip install ollama"
            )

        self.model = model
        self.debug = debug
        self._test_connection()

    def _test_connection(self):
        """Test connection to Ollama service"""
        try:
            ollama.list()
            print(f"✓ Ollama connected using model: {self.model}")
        except Exception as e:
            raise RuntimeError(
                f"Cannot connect to Ollama. Ensure it's installed and running.\n"
                f"Error: {e}\n"
                f"Install from: https://ollama.ai\n"
                f"Then run: ollama pull {self.model}"
            )

    def _extract_json(self, text):
        """Extract JSON from text that might contain markdown or other formatting"""
        # Try to find JSON block in markdown code fence
        import re
        json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', text, re.DOTALL)
        if json_match:
            return json_match.group(1)

        # Try to find raw JSON object
        json_match = re.search(r'\{.*\}', text, re.DOTALL)
        if json_match:
            return json_match.group(0)

        return text

    def fact_check(self, text, context=""):
        """Analyze text for factual accuracy"""
        # Try simple structured format first
        prompt = f"""Analyze this for accuracy. Reply in this exact format:

VERDICT: [factual/dubious/not_factual]
CONFIDENCE: [0.0-1.0]
EXPLANATION: [one sentence]

Statement: "{text}"
"""

        try:
            response = ollama.generate(
                model=self.model,
                prompt=prompt,
                options={"temperature": 0.1, "num_predict": 250}
            )

            response_text = response['response'].strip()

            if self.debug:
                print(f"\n[DEBUG] Fact-check response:\n{response_text}\n")

            # Try to parse structured text format
            verdict = "dubious"
            confidence = 0.5
            explanation = "No explanation provided"

            # Extract VERDICT
            import re
            verdict_match = re.search(r'VERDICT:\s*(\w+)', response_text, re.IGNORECASE)
            if verdict_match:
                verdict = verdict_match.group(1).lower()

            # Extract CONFIDENCE
            conf_match = re.search(r'CONFIDENCE:\s*([\d.]+)', response_text, re.IGNORECASE)
            if conf_match:
                try:
                    confidence = float(conf_match.group(1))
                    confidence = max(0.0, min(1.0, confidence))  # Clamp to 0-1
                except ValueError:
                    pass

            # Extract EXPLANATION
            expl_match = re.search(r'EXPLANATION:\s*(.+?)(?:\n\n|\Z)', response_text, re.IGNORECASE | re.DOTALL)
            if expl_match:
                explanation = expl_match.group(1).strip()
                # Clean up incomplete sentences
                if explanation and not explanation[-1] in '.!?':
                    # Try to find last complete sentence
                    last_period = max(explanation.rfind('.'), explanation.rfind('!'), explanation.rfind('?'))
                    if last_period > 20:  # Keep at least some text
                        explanation = explanation[:last_period + 1]

            return {
                "verdict": verdict,
                "confidence": confidence,
                "explanation": explanation[:250] if explanation else "Analysis completed",
                "sources": [],
                "corrections": ""
            }

        except Exception as e:
            if self.debug:
                print(f"[DEBUG] Fact-check error: {e}")
            return {
                "verdict": "error",
                "confidence": 0.0,
                "explanation": f"Analysis failed: {str(e)}",
                "sources": [],
                "corrections": ""
            }

    def generate_augmenting_questions(self, text, context=""):
        """Generate insightful questions based on the text"""
        prompt = f"""Generate 3 questions about this. Reply in this exact format:

Q1: [question]
Q2: [question]
Q3: [question]

Statement: "{text}"
"""

        try:
            response = ollama.generate(
                model=self.model,
                prompt=prompt,
                options={"temperature": 0.7, "num_predict": 250}
            )

            response_text = response['response'].strip()

            if self.debug:
                print(f"\n[DEBUG] Questions response:\n{response_text}\n")

            # Extract questions
            import re
            questions = []
            for i in range(1, 4):
                q_match = re.search(rf'Q{i}:\s*(.+?)(?:\n|$)', response_text, re.IGNORECASE)
                if q_match:
                    question = q_match.group(1).strip()
                    # Clean up incomplete questions
                    if question and not question[-1] in '?':
                        # Try to find last complete question
                        last_q = question.rfind('?')
                        if last_q > 10:
                            question = question[:last_q + 1]
                        else:
                            question = question + "?"
                    questions.append(question)

            # If we couldn't parse, try to split by newlines and take first 3 non-empty lines
            if len(questions) < 3:
                lines = [line.strip() for line in response_text.split('\n') if line.strip()]
                # Filter out lines that look like question markers
                lines = [l for l in lines if not re.match(r'^Q\d+:?\s*$', l)]
                for line in lines[:3]:
                    if not line.endswith('?'):
                        line = line + "?"
                    questions.append(line)

            # Ensure we have exactly 3 questions
            default_questions = [
                "What are the key points here?",
                "What evidence supports this?",
                "What are the implications?"
            ]
            while len(questions) < 3:
                questions.append(default_questions[len(questions)])

            return {
                "questions": questions[:3],
                "topics": []
            }

        except Exception as e:
            if self.debug:
                print(f"[DEBUG] Questions error: {e}")
            return {
                "questions": [
                    "What are the key points?",
                    "What supports this claim?",
                    "What are the implications?"
                ],
                "topics": []
            }


def list_audio_devices():
    """Print all available audio input devices"""
    print("\nAvailable audio capture devices:")
    devices = sd.query_devices()
    for i, dev in enumerate(devices):
        if dev['max_input_channels'] > 0:
            print(f"  [{i}] {dev['name']}")
            print(f"      Channels: {dev['max_input_channels']} | Sample Rate: {dev['default_samplerate']}")
    print()


def save_transcript(text, timestamp, filename):
    """Append transcript to file"""
    os.makedirs(os.path.dirname(filename) if os.path.dirname(filename) else '.', exist_ok=True)
    with open(filename, "a", encoding="utf-8") as f:
        f.write(f"[{timestamp}] {text}\n")


def save_enriched_transcript(data, filename):
    """Save enriched transcript with LLM analysis"""
    os.makedirs(os.path.dirname(filename) if os.path.dirname(filename) else '.', exist_ok=True)
    with open(filename, "a", encoding="utf-8") as f:
        f.write(f"\n{'='*70}\n")
        f.write(f"[{data['timestamp']}] {data['text']}\n\n")

        if 'fact_check' in data:
            fc = data['fact_check']
            f.write(f"📊 Fact Check: {fc.get('verdict', 'N/A').upper()} "
                   f"(confidence: {fc.get('confidence', 0):.2f})\n")
            f.write(f"💡 {fc.get('explanation', 'N/A')}\n")
            if fc.get('corrections'):
                f.write(f"✏️  Correction: {fc['corrections']}\n")
            f.write("\n")

        if 'questions' in data and data['questions'].get('questions'):
            f.write("❓ Questions:\n")
            for i, q in enumerate(data['questions']['questions'], 1):
                f.write(f"{i}. {q}\n")
            f.write("\n")


def display_enriched_output(text, timestamp, fact_check=None, questions=None):
    """Display transcript with LLM analysis"""
    print(f"\n[{timestamp}] {text}")

    if fact_check:
        verdict_emoji = {
            'factual': '✅',
            'dubious': '⚠️',
            'not_factual': '❌',
            'error': '⚠️'
        }
        emoji = verdict_emoji.get(fact_check.get('verdict', 'error'), '❓')

        print(f"\n{emoji} Fact Check: {fact_check.get('verdict', 'N/A').upper()} "
              f"(confidence: {fact_check.get('confidence', 0):.2f})")
        print(f"💡 {fact_check.get('explanation', 'N/A')}")

        if fact_check.get('corrections'):
            print(f"✏️  Correction: {fact_check['corrections']}")

    if questions and questions.get('questions'):
        print(f"\n❓ Questions:")
        for i, q in enumerate(questions['questions'], 1):
            print(f"  {i}. {q}")


def main():
    parser = argparse.ArgumentParser(
        description="Real-time transcription of Windows speaker output",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  python transcribe_speakers.py
  python transcribe_speakers.py --model small --language es --interval 5
  python transcribe_speakers.py --device "Speakers" --output "meeting.txt"
  python transcribe_speakers.py --model medium --interval 10 --output transcripts/live.txt
        """
    )

    parser.add_argument("--model", default="base",
                        choices=["tiny", "base", "small", "medium", "large"],
                        help="Whisper model size (default: base)")
    parser.add_argument("--language", default="en",
                        help="Language code (default: en)")
    parser.add_argument("--device", metavar="NAME",
                        help="Audio device name (partial match). If not specified, auto-detects")
    parser.add_argument("--interval", type=float, default=8.0,
                        help="Processing interval in seconds (default: 8.0)")
    parser.add_argument("--min-duration", type=float, default=3.0,
                        help="Minimum audio duration before transcription (default: 3.0)")
    parser.add_argument("--fast-mode", action="store_true",
                        help="Enable fast mode (lower accuracy, faster transcription)")
    parser.add_argument("--output", "-o", metavar="FILE",
                        help="Save transcript to file (e.g., transcript.txt)")
    parser.add_argument("--list-devices", action="store_true",
                        help="List all available audio devices and exit")
    parser.add_argument("--force-cpu", action="store_true",
                        help="Force CPU processing (disable GPU acceleration)")
    parser.add_argument("--gpu-index", type=int, default=0,
                        help="GPU device index to use (default: 0)")
    parser.add_argument("--enable-llm", action="store_true",
                        help="Enable LLM analysis (fact-checking and questions)")
    parser.add_argument("--llm-model", default="gpt-oss:20b",
                        help="Ollama model to use for LLM analysis (default:  gpt-oss:20b)")
    parser.add_argument("--llm-debug", action="store_true",
                        help="Show LLM raw responses for debugging")
    parser.add_argument("--sentence-mode", action="store_true",
                        help="Extract complete sentences by stitching chunks together")

    args = parser.parse_args()

    if args.list_devices:
        list_audio_devices()
        return

    print("=== Windows Real-Time Audio Transcription ===")
    print(f"Model: {args.model} | Language: {args.language} | Interval: {args.interval}s")
    if args.output:
        print(f"Output: {args.output}")
    if args.enable_llm:
        print(f"LLM Analysis: Enabled ({args.llm_model})")
    if args.sentence_mode:
        print(f"Sentence Mode: Enabled (stitching chunks into complete sentences)")

    # Initialize audio capture
    try:
        capturer = WindowsLoopbackAudioCapture(
            device_name=args.device,
            sample_rate=16000,
            chunk_size=2048
        )
    except RuntimeError as e:
        print(f"\n❌ Audio Error: {e}")
        print("\nTo fix this:")
        print("1. Right-click speaker icon → Sounds → Recording tab")
        print("2. Right-click in empty area → Show Disabled Devices")
        print("3. Enable 'Stereo Mix' → Set as Default Device")
        print("\nAlternative: Install VB-Cable (free) from vb-audio.com")
        print("   Then use: --device 'CABLE Output'")
        list_audio_devices()
        return

    # Initialize transcriber
    try:
        transcriber = WhisperStreamTranscriber(
            model_name=args.model,
            language=args.language,
            force_cpu=args.force_cpu,
            device_index=args.gpu_index
        )
    except Exception as e:
        print(f"\n❌ Model Error: {e}")
        print("Make sure you installed Whisper correctly")
        return

    # Initialize LLM analyzer (optional)
    llm_analyzer = None
    if args.enable_llm:
        try:
            llm_analyzer = LocalLLMAnalyzer(model=args.llm_model, debug=args.llm_debug)
        except RuntimeError as e:
            print(f"\n❌ LLM Error: {e}")
            print("Continuing without LLM analysis...")
            llm_analyzer = None

    # Initialize sentence extractor (optional)
    sentence_extractor = None
    sentence_cleaner = None
    if args.sentence_mode:
        sentence_extractor = SentenceExtractor(max_buffer_words=150)
        sentence_cleaner = SentenceCleaner()
        print("✓ Sentence extraction initialized")

    # Main processing loop
    print(f"\n✅ Started transcription. Press Ctrl+C to stop.\n{'=' * 50}")
    last_process_time = time.time()
    total_duration = 0
    segment_count = 0

    # Thread pool for concurrent LLM processing
    llm_executor = ThreadPoolExecutor(max_workers=2) if llm_analyzer else None
    pending_llm_tasks = {}  # Maps segment_count -> future

    try:
        while True:
            # Collect audio
            chunk = capturer.read_chunk()
            if chunk is not None:
                transcriber.add_audio(chunk)
                total_duration += len(chunk) / 16000

            # Process at intervals
            current_time = time.time()
            if current_time - last_process_time >= args.interval:
                text = transcriber.transcribe_chunk(
                    min_duration=args.min_duration,
                    fast_mode=args.fast_mode
                )
                if text:
                    segment_count += 1
                    timestamp = datetime.now().strftime("%H:%M:%S")

                    # Sentence extraction mode
                    if sentence_extractor:
                        # Add chunk to extractor and get complete sentences
                        sentences = sentence_extractor.add_chunk(text)

                        for sentence in sentences:
                            # Clean the sentence
                            cleaned = sentence_cleaner.clean(sentence) if sentence_cleaner else sentence
                            if cleaned:
                                print(f"[{timestamp}] 📝 {cleaned}")

                                # Save individual sentences
                                if args.output and not llm_analyzer:
                                    save_transcript(cleaned, timestamp, args.output)

                                # LLM analysis on complete sentences
                                if llm_analyzer:
                                    context = f"Sentence from segment {segment_count}"

                                    def run_llm_analysis(txt, ctx, ts, seg_num):
                                        fc = llm_analyzer.fact_check(txt, ctx)
                                        qs = llm_analyzer.generate_augmenting_questions(txt, ctx)
                                        return {
                                            'timestamp': ts,
                                            'text': txt,
                                            'segment_count': seg_num,
                                            'fact_check': fc,
                                            'questions': qs
                                        }

                                    future = llm_executor.submit(run_llm_analysis, cleaned, context, timestamp, segment_count)
                                    pending_llm_tasks[segment_count] = future
                    else:
                        # Standard mode: display chunks as-is
                        # Display transcription immediately (don't wait for LLM)
                        print(f"[{timestamp}] {text}")

                    # LLM Analysis (run concurrently in background) - only in non-sentence mode
                    if llm_analyzer and not sentence_extractor:
                        context = f"Segment {segment_count}"

                        # Submit LLM tasks to thread pool
                        def run_llm_analysis(txt, ctx, ts, seg_num):
                            fc = llm_analyzer.fact_check(txt, ctx)
                            qs = llm_analyzer.generate_augmenting_questions(txt, ctx)
                            return {
                                'timestamp': ts,
                                'text': txt,
                                'segment_count': seg_num,
                                'fact_check': fc,
                                'questions': qs
                            }

                        future = llm_executor.submit(run_llm_analysis, text, context, timestamp, segment_count)
                        pending_llm_tasks[segment_count] = future
                    else:
                        # Save transcript immediately without LLM
                        if args.output:
                            save_transcript(text, timestamp, args.output)

                last_process_time = current_time

            # Check for completed LLM tasks (non-blocking)
            if llm_analyzer:
                completed_segments = []
                for seg_num, future in pending_llm_tasks.items():
                    if future.done():
                        try:
                            result = future.result()
                            # Display enriched output
                            display_enriched_output(
                                result['text'],
                                result['timestamp'],
                                result['fact_check'],
                                result['questions']
                            )
                            # Save enriched output
                            if args.output:
                                save_enriched_transcript(result, args.output)
                            completed_segments.append(seg_num)
                        except Exception as e:
                            print(f"⚠️  LLM processing error for segment {seg_num}: {e}")
                            completed_segments.append(seg_num)

                # Remove completed tasks
                for seg_num in completed_segments:
                    del pending_llm_tasks[seg_num]

    except KeyboardInterrupt:
        print(f"\n{'=' * 50}\n🛑 Stopping transcription...")

    # Wait for pending LLM tasks to complete
    if llm_analyzer and pending_llm_tasks:
        print(f"\n⏳ Waiting for {len(pending_llm_tasks)} pending LLM tasks to complete...")
        for seg_num, future in pending_llm_tasks.items():
            try:
                result = future.result(timeout=30)
                display_enriched_output(
                    result['text'],
                    result['timestamp'],
                    result['fact_check'],
                    result['questions']
                )
                if args.output:
                    save_enriched_transcript(result, args.output)
            except Exception as e:
                print(f"⚠️  LLM task {seg_num} failed: {e}")

    # Shutdown executor
    if llm_executor:
        llm_executor.shutdown(wait=True)

    # Cleanup
    capturer.close()

    # Flush sentence buffer if in sentence mode
    if sentence_extractor:
        print("\n📝 Flushing sentence buffer...")
        final_sentences = sentence_extractor.flush()
        for sentence in final_sentences:
            cleaned = sentence_cleaner.clean(sentence) if sentence_cleaner else sentence
            if cleaned:
                timestamp = datetime.now().strftime("%H:%M:%S")
                print(f"[{timestamp}] 📝 {cleaned}")

                if args.output and not llm_analyzer:
                    save_transcript(cleaned, timestamp, args.output)

                # LLM analysis for flushed sentences
                if llm_analyzer:
                    fact_check = llm_analyzer.fact_check(cleaned, "Final sentence")
                    questions = llm_analyzer.generate_augmenting_questions(cleaned)
                    display_enriched_output(cleaned, timestamp, fact_check, questions)

                    if args.output:
                        data = {
                            'timestamp': timestamp,
                            'text': cleaned,
                            'fact_check': fact_check,
                            'questions': questions
                        }
                        save_enriched_transcript(data, args.output)

    # Process remaining audio
    print("\nProcessing remaining audio...")
    final_text = transcriber.transcribe_chunk(min_duration=0)
    if final_text:
        timestamp = datetime.now().strftime("%H:%M:%S")
        print(f"[{timestamp}] {final_text}")

        # LLM Analysis for final segment (synchronous since we're shutting down)
        if llm_analyzer:
            fact_check = llm_analyzer.fact_check(final_text, "Final segment")
            questions = llm_analyzer.generate_augmenting_questions(final_text)

            display_enriched_output(final_text, timestamp, fact_check, questions)

            if args.output:
                data = {
                    'timestamp': timestamp,
                    'text': final_text,
                    'fact_check': fact_check,
                    'questions': questions
                }
                save_enriched_transcript(data, args.output)
        else:
            if args.output:
                save_transcript(final_text, timestamp, args.output)

    # Summary
    print(f"\n✅ Complete! Processed {total_duration:.1f}s of audio")
    print(f"   Generated {segment_count} transcript segments")
    if args.output and os.path.exists(args.output):
        abs_path = os.path.abspath(args.output)
        print(f"💾 Transcript saved to: {abs_path}")


if __name__ == "__main__":
    main()