chore: update 6 file(s)

This commit is contained in:
mike
2025-12-17 22:30:41 +01:00
parent a53c0e2902
commit 4343b7a5a2
6 changed files with 1122 additions and 220 deletions

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python3
"""
Real-time transcription with dual audio capture (microphone + speaker monitor).
Linux/PipeWire optimized with Ollama LLM fact-checking.
Real-time audio transcription with dual capture and optional LLM analysis.
Supports microphone + speaker monitor, file output, and fact-checking.
"""
import sounddevice as sd
@@ -9,6 +9,7 @@ import numpy as np
import threading
import queue
import time
import os
import argparse
from datetime import datetime
from faster_whisper import WhisperModel
@@ -197,8 +198,8 @@ class WhisperTranscriber:
return None
class LLMFactChecker:
"""Fast fact-checking with Ollama"""
class LLMAnalyzer:
"""LLM analysis with fact-checking and question generation"""
def __init__(self, model="qwen2.5:3b"):
if not OLLAMA_AVAILABLE:
@@ -228,34 +229,100 @@ Statement: "{text}" """
)
import re
text = response['response']
response_text = response['response']
verdict = re.search(r'VERDICT:\s*(\w+)', text, re.I)
confidence = re.search(r'CONFIDENCE:\s*([\d.]+)', text, re.I)
reason = re.search(r'REASON:\s*(.+?)(?:\n|$)', text, re.I | re.DOTALL)
verdict = re.search(r'VERDICT:\s*(\w+)', response_text, re.I)
confidence = re.search(r'CONFIDENCE:\s*([\d.]+)', response_text, re.I)
reason = re.search(r'REASON:\s*(.+?)(?:\n|$)', response_text, re.I | re.DOTALL)
return {
'verdict': verdict.group(1).lower() if verdict else 'unknown',
'confidence': float(confidence.group(1)) if confidence else 0.5,
'reason': reason.group(1).strip() if reason else text[:150]
'reason': reason.group(1).strip() if reason else response_text[:150]
}
except Exception as e:
return {'verdict': 'error', 'confidence': 0.0, 'reason': str(e)}
def generate_questions(self, text):
"""Generate follow-up questions"""
prompt = f"""Generate 3 insightful questions about this. Reply ONLY with:
Q1: [question]
Q2: [question]
Q3: [question]
Statement: "{text}" """
try:
response = ollama.generate(
model=self.model,
prompt=prompt,
options={"temperature": 0.7, "num_predict": 120}
)
import re
response_text = response['response']
questions = []
for i in range(1, 4):
q_match = re.search(rf'Q{i}:\s*(.+?)(?:\n|$)', response_text, re.I)
if q_match:
question = q_match.group(1).strip()
if not question.endswith('?'):
question += '?'
questions.append(question)
# Fallback defaults
while len(questions) < 3:
defaults = ["What are the implications?", "What evidence supports this?", "What's the context?"]
questions.append(defaults[len(questions)])
return questions[:3]
except Exception as e:
return ["What are the key points?", "What supports this?", "What are the implications?"]
def save_transcript(text, source, timestamp, filename):
"""Append transcript to file"""
os.makedirs(os.path.dirname(filename) if os.path.dirname(filename) else '.', exist_ok=True)
with open(filename, "a", encoding="utf-8") as f:
source_label = "MIC" if source == 'mic' else "SPEAKER"
f.write(f"[{timestamp}] {source_label}: {text}\n")
def save_enriched_transcript(text, source, timestamp, fact_check, questions, filename):
"""Save enriched transcript with LLM analysis"""
os.makedirs(os.path.dirname(filename) if os.path.dirname(filename) else '.', exist_ok=True)
with open(filename, "a", encoding="utf-8") as f:
source_label = "MIC" if source == 'mic' else "SPEAKER"
f.write(f"\n{'='*70}\n")
f.write(f"[{timestamp}] {source_label}: {text}\n\n")
if fact_check:
f.write(f"📊 Fact Check: {fact_check['verdict'].upper()} ")
f.write(f"(confidence: {fact_check['confidence']:.2f})\n")
f.write(f"💡 {fact_check['reason']}\n\n")
if questions:
f.write("❓ Questions:\n")
for i, q in enumerate(questions, 1):
f.write(f"{i}. {q}\n")
f.write("\n")
def main():
parser = argparse.ArgumentParser(description="Dual audio transcription with fact-checking")
parser.add_argument("--model", default="tiny", choices=["tiny", "base", "small", "medium"],
help="Whisper model (default: tiny for speed)")
parser.add_argument("--language", default="en", help="Language code")
parser = argparse.ArgumentParser(description="Real-time audio transcription with dual capture")
parser.add_argument("--model", default="tiny", choices=["tiny", "base", "small", "medium", "large"],
help="Whisper model (default: tiny)")
parser.add_argument("--language", default="en", help="Language code (default: en)")
parser.add_argument("--mic", help="Microphone device name (partial match)")
parser.add_argument("--monitor", help="Monitor device name for speaker capture")
parser.add_argument("--interval", type=float, default=5.0, help="Processing interval (seconds)")
parser.add_argument("--min-duration", type=float, default=2.0, help="Min audio duration")
parser.add_argument("--enable-llm", action="store_true", help="Enable fact-checking")
parser.add_argument("--llm-model", default="qwen2.5:3b", help="Ollama model")
parser.add_argument("--list-devices", action="store_true", help="List audio devices")
parser.add_argument("--force-cpu", action="store_true", help="Force CPU")
parser.add_argument("--interval", type=float, default=5.0, help="Processing interval in seconds (default: 5.0)")
parser.add_argument("--min-duration", type=float, default=2.0, help="Minimum audio duration (default: 2.0)")
parser.add_argument("--enable-llm", action="store_true", help="Enable LLM analysis (fact-checking + questions)")
parser.add_argument("--llm-model", default="qwen2.5:3b", help="Ollama model (default: qwen2.5:3b)")
parser.add_argument("--output", "-o", help="Save transcript to file")
parser.add_argument("--list-devices", action="store_true", help="List audio devices and exit")
parser.add_argument("--force-cpu", action="store_true", help="Force CPU processing")
args = parser.parse_args()
@@ -268,8 +335,12 @@ def main():
print(f" [{i:2d}] {dev['name']:<50} IN:{in_ch} OUT:{out_ch}")
return
print("=== Dual Audio Transcription with Fact-Checking ===")
print("=== Real-Time Audio Transcription ===")
print(f"Model: {args.model} | Language: {args.language} | Interval: {args.interval}s")
if args.output:
print(f"Output: {args.output}")
if args.enable_llm:
print(f"LLM Analysis: Enabled ({args.llm_model})")
# Initialize capture
try:
@@ -296,14 +367,14 @@ def main():
print(f"\n❌ Whisper Error: {e}")
return
# Initialize fact checker
fact_checker = None
# Initialize LLM analyzer
llm_analyzer = None
if args.enable_llm:
try:
fact_checker = LLMFactChecker(model=args.llm_model)
llm_analyzer = LLMAnalyzer(model=args.llm_model)
except Exception as e:
print(f"\n⚠ LLM Error: {e}")
print("Continuing without fact-checking...")
print("Continuing without LLM analysis...")
# Main loop
print(f"\n✅ Started. Press Ctrl+C to stop.\n{'='*60}")
@@ -329,10 +400,27 @@ def main():
source_emoji = "🎤" if source == 'mic' else "🔊"
print(f"\n{source_emoji} [{timestamp}] {text}")
if fact_checker:
fc = fact_checker.fact_check(text)
verdict_emoji = {'factual': '', 'dubious': '⚠️', 'false': ''}.get(fc['verdict'], '')
print(f" {verdict_emoji} {fc['verdict'].upper()} ({fc['confidence']:.2f}): {fc['reason']}")
# LLM analysis
fact_check = None
questions = None
if llm_analyzer:
fact_check = llm_analyzer.fact_check(text)
questions = llm_analyzer.generate_questions(text)
verdict_emoji = {'factual': '', 'dubious': '⚠️', 'false': ''}.get(
fact_check['verdict'], '')
print(f" {verdict_emoji} {fact_check['verdict'].upper()} "
f"({fact_check['confidence']:.2f}): {fact_check['reason']}")
print(f" ❓ Questions:")
for i, q in enumerate(questions, 1):
print(f" {i}. {q}")
# Save to file
if args.output:
if llm_analyzer:
save_enriched_transcript(text, source, timestamp, fact_check, questions, args.output)
else:
save_transcript(text, source, timestamp, args.output)
last_process = time.time()
@@ -340,6 +428,8 @@ def main():
print(f"\n{'='*60}\n🛑 Stopping...")
capturer.close()
if args.output and os.path.exists(args.output):
print(f"\n💾 Transcript saved: {os.path.abspath(args.output)}")
print("\n✅ Done!")