chore: update 6 file(s)

2025-12-17 22:30:41 +01:00
parent a53c0e2902
commit 4343b7a5a2
6 changed files with 1122 additions and 220 deletions
--- a/transcribe_dual_linux.py
+++ b/transcribe_dual_linux.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 """
-Real-time transcription with dual audio capture (microphone + speaker monitor).
-Linux/PipeWire optimized with Ollama LLM fact-checking.
+Real-time audio transcription with dual capture and optional LLM analysis.
+Supports microphone + speaker monitor, file output, and fact-checking.
 """

 import sounddevice as sd
@@ -9,6 +9,7 @@ import numpy as np
 import threading
 import queue
 import time
+import os
 import argparse
 from datetime import datetime
 from faster_whisper import WhisperModel
@@ -197,8 +198,8 @@ class WhisperTranscriber:
            return None


-class LLMFactChecker:
-    """Fast fact-checking with Ollama"""
+class LLMAnalyzer:
+    """LLM analysis with fact-checking and question generation"""

    def __init__(self, model="qwen2.5:3b"):
        if not OLLAMA_AVAILABLE:
@@ -228,34 +229,100 @@ Statement: "{text}" """
            )

            import re
-            text = response['response']
+            response_text = response['response']

-            verdict = re.search(r'VERDICT:\s*(\w+)', text, re.I)
-            confidence = re.search(r'CONFIDENCE:\s*([\d.]+)', text, re.I)
-            reason = re.search(r'REASON:\s*(.+?)(?:\n|$)', text, re.I | re.DOTALL)
+            verdict = re.search(r'VERDICT:\s*(\w+)', response_text, re.I)
+            confidence = re.search(r'CONFIDENCE:\s*([\d.]+)', response_text, re.I)
+            reason = re.search(r'REASON:\s*(.+?)(?:\n|$)', response_text, re.I | re.DOTALL)

            return {
                'verdict': verdict.group(1).lower() if verdict else 'unknown',
                'confidence': float(confidence.group(1)) if confidence else 0.5,
-                'reason': reason.group(1).strip() if reason else text[:150]
+                'reason': reason.group(1).strip() if reason else response_text[:150]
            }
        except Exception as e:
            return {'verdict': 'error', 'confidence': 0.0, 'reason': str(e)}

+    def generate_questions(self, text):
+        """Generate follow-up questions"""
+        prompt = f"""Generate 3 insightful questions about this. Reply ONLY with:
+Q1: [question]
+Q2: [question]
+Q3: [question]
+
+Statement: "{text}" """
+
+        try:
+            response = ollama.generate(
+                model=self.model,
+                prompt=prompt,
+                options={"temperature": 0.7, "num_predict": 120}
+            )
+
+            import re
+            response_text = response['response']
+            questions = []
+
+            for i in range(1, 4):
+                q_match = re.search(rf'Q{i}:\s*(.+?)(?:\n|$)', response_text, re.I)
+                if q_match:
+                    question = q_match.group(1).strip()
+                    if not question.endswith('?'):
+                        question += '?'
+                    questions.append(question)
+
+            # Fallback defaults
+            while len(questions) < 3:
+                defaults = ["What are the implications?", "What evidence supports this?", "What's the context?"]
+                questions.append(defaults[len(questions)])
+
+            return questions[:3]
+        except Exception as e:
+            return ["What are the key points?", "What supports this?", "What are the implications?"]
+
+
+def save_transcript(text, source, timestamp, filename):
+    """Append transcript to file"""
+    os.makedirs(os.path.dirname(filename) if os.path.dirname(filename) else '.', exist_ok=True)
+    with open(filename, "a", encoding="utf-8") as f:
+        source_label = "MIC" if source == 'mic' else "SPEAKER"
+        f.write(f"[{timestamp}] {source_label}: {text}\n")
+
+
+def save_enriched_transcript(text, source, timestamp, fact_check, questions, filename):
+    """Save enriched transcript with LLM analysis"""
+    os.makedirs(os.path.dirname(filename) if os.path.dirname(filename) else '.', exist_ok=True)
+    with open(filename, "a", encoding="utf-8") as f:
+        source_label = "MIC" if source == 'mic' else "SPEAKER"
+        f.write(f"\n{'='*70}\n")
+        f.write(f"[{timestamp}] {source_label}: {text}\n\n")
+
+        if fact_check:
+            f.write(f"📊 Fact Check: {fact_check['verdict'].upper()} ")
+            f.write(f"(confidence: {fact_check['confidence']:.2f})\n")
+            f.write(f"💡 {fact_check['reason']}\n\n")
+
+        if questions:
+            f.write("❓ Questions:\n")
+            for i, q in enumerate(questions, 1):
+                f.write(f"{i}. {q}\n")
+            f.write("\n")
+

 def main():
-    parser = argparse.ArgumentParser(description="Dual audio transcription with fact-checking")
-    parser.add_argument("--model", default="tiny", choices=["tiny", "base", "small", "medium"],
-                        help="Whisper model (default: tiny for speed)")
-    parser.add_argument("--language", default="en", help="Language code")
+    parser = argparse.ArgumentParser(description="Real-time audio transcription with dual capture")
+    parser.add_argument("--model", default="tiny", choices=["tiny", "base", "small", "medium", "large"],
+                        help="Whisper model (default: tiny)")
+    parser.add_argument("--language", default="en", help="Language code (default: en)")
    parser.add_argument("--mic", help="Microphone device name (partial match)")
    parser.add_argument("--monitor", help="Monitor device name for speaker capture")
-    parser.add_argument("--interval", type=float, default=5.0, help="Processing interval (seconds)")
-    parser.add_argument("--min-duration", type=float, default=2.0, help="Min audio duration")
-    parser.add_argument("--enable-llm", action="store_true", help="Enable fact-checking")
-    parser.add_argument("--llm-model", default="qwen2.5:3b", help="Ollama model")
-    parser.add_argument("--list-devices", action="store_true", help="List audio devices")
-    parser.add_argument("--force-cpu", action="store_true", help="Force CPU")
+    parser.add_argument("--interval", type=float, default=5.0, help="Processing interval in seconds (default: 5.0)")
+    parser.add_argument("--min-duration", type=float, default=2.0, help="Minimum audio duration (default: 2.0)")
+    parser.add_argument("--enable-llm", action="store_true", help="Enable LLM analysis (fact-checking + questions)")
+    parser.add_argument("--llm-model", default="qwen2.5:3b", help="Ollama model (default: qwen2.5:3b)")
+    parser.add_argument("--output", "-o", help="Save transcript to file")
+    parser.add_argument("--list-devices", action="store_true", help="List audio devices and exit")
+    parser.add_argument("--force-cpu", action="store_true", help="Force CPU processing")

    args = parser.parse_args()

@@ -268,8 +335,12 @@ def main():
                print(f"  [{i:2d}] {dev['name']:<50} IN:{in_ch} OUT:{out_ch}")
        return

-    print("=== Dual Audio Transcription with Fact-Checking ===")
+    print("=== Real-Time Audio Transcription ===")
    print(f"Model: {args.model} | Language: {args.language} | Interval: {args.interval}s")
+    if args.output:
+        print(f"Output: {args.output}")
+    if args.enable_llm:
+        print(f"LLM Analysis: Enabled ({args.llm_model})")

    # Initialize capture
    try:
@@ -296,14 +367,14 @@ def main():
        print(f"\n❌ Whisper Error: {e}")
        return

-    # Initialize fact checker
-    fact_checker = None
+    # Initialize LLM analyzer
+    llm_analyzer = None
    if args.enable_llm:
        try:
-            fact_checker = LLMFactChecker(model=args.llm_model)
+            llm_analyzer = LLMAnalyzer(model=args.llm_model)
        except Exception as e:
            print(f"\n⚠ LLM Error: {e}")
-            print("Continuing without fact-checking...")
+            print("Continuing without LLM analysis...")

    # Main loop
    print(f"\n✅ Started. Press Ctrl+C to stop.\n{'='*60}")
@@ -329,10 +400,27 @@ def main():
                            source_emoji = "🎤" if source == 'mic' else "🔊"
                            print(f"\n{source_emoji} [{timestamp}] {text}")

-                            if fact_checker:
-                                fc = fact_checker.fact_check(text)
-                                verdict_emoji = {'factual': '✅', 'dubious': '⚠️', 'false': '❌'}.get(fc['verdict'], '❓')
-                                print(f"   {verdict_emoji} {fc['verdict'].upper()} ({fc['confidence']:.2f}): {fc['reason']}")
+                            # LLM analysis
+                            fact_check = None
+                            questions = None
+                            if llm_analyzer:
+                                fact_check = llm_analyzer.fact_check(text)
+                                questions = llm_analyzer.generate_questions(text)
+
+                                verdict_emoji = {'factual': '✅', 'dubious': '⚠️', 'false': '❌'}.get(
+                                    fact_check['verdict'], '❓')
+                                print(f"   {verdict_emoji} {fact_check['verdict'].upper()} "
+                                      f"({fact_check['confidence']:.2f}): {fact_check['reason']}")
+                                print(f"   ❓ Questions:")
+                                for i, q in enumerate(questions, 1):
+                                    print(f"      {i}. {q}")
+
+                            # Save to file
+                            if args.output:
+                                if llm_analyzer:
+                                    save_enriched_transcript(text, source, timestamp, fact_check, questions, args.output)
+                                else:
+                                    save_transcript(text, source, timestamp, args.output)

                last_process = time.time()

@@ -340,6 +428,8 @@ def main():
        print(f"\n{'='*60}\n🛑 Stopping...")

    capturer.close()
+    if args.output and os.path.exists(args.output):
+        print(f"\n💾 Transcript saved: {os.path.abspath(args.output)}")
    print("\n✅ Done!")