This commit is contained in:
mike
2025-12-17 16:30:46 +01:00
parent c0ca907b01
commit 918e96ad21
14 changed files with 358 additions and 1951 deletions

View File

@@ -15,11 +15,13 @@ import json
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
# Choose your Whisper backend here:
# For faster-whisper (recommended):
# Whisper transcription (using faster-whisper for optimal performance)
from faster_whisper import WhisperModel
# LLM integration
# Sentence extraction for stitching chunks
from sentence_extractor import SentenceExtractor, SentenceCleaner
# LLM integration (optional)
try:
import ollama
OLLAMA_AVAILABLE = True
@@ -27,10 +29,6 @@ except ImportError:
OLLAMA_AVAILABLE = False
# # For regular whisper (comment out the line above and uncomment these):
# import whisper
class WindowsLoopbackAudioCapture:
"""Capture Windows speaker output using WASAPI loopback"""
@@ -170,12 +168,6 @@ class WhisperStreamTranscriber:
self.audio_buffer = np.array([], dtype=np.float32)
self.lock = threading.Lock()
# # REGULAR WHISPER:
# self.model = whisper.load_model(model_name)
# self.language = language
# self.audio_buffer = np.array([], dtype=np.float32)
# self.lock = threading.Lock()
def add_audio(self, audio_chunk):
"""Add new audio data to buffer"""
with self.lock:
@@ -222,19 +214,6 @@ class WhisperStreamTranscriber:
print(f"❌ Transcription error: {e}")
return None
# # REGULAR WHISPER:
# try:
# result = self.model.transcribe(
# audio_to_process,
# language=self.language,
# task="transcribe",
# fp16=False
# )
# return result["text"].strip()
# except Exception as e:
# print(f"❌ Transcription error: {e}")
# return None
class LocalLLMAnalyzer:
"""Local LLM for fact-checking and question generation using Ollama"""
@@ -536,6 +515,8 @@ Examples:
help="Ollama model to use for LLM analysis (default: gpt-oss:20b)")
parser.add_argument("--llm-debug", action="store_true",
help="Show LLM raw responses for debugging")
parser.add_argument("--sentence-mode", action="store_true",
help="Extract complete sentences by stitching chunks together")
args = parser.parse_args()
@@ -549,6 +530,8 @@ Examples:
print(f"Output: {args.output}")
if args.enable_llm:
print(f"LLM Analysis: Enabled ({args.llm_model})")
if args.sentence_mode:
print(f"Sentence Mode: Enabled (stitching chunks into complete sentences)")
# Initialize audio capture
try:
@@ -591,6 +574,14 @@ Examples:
print("Continuing without LLM analysis...")
llm_analyzer = None
# Initialize sentence extractor (optional)
sentence_extractor = None
sentence_cleaner = None
if args.sentence_mode:
sentence_extractor = SentenceExtractor(max_buffer_words=150)
sentence_cleaner = SentenceCleaner()
print("✓ Sentence extraction initialized")
# Main processing loop
print(f"\n✅ Started transcription. Press Ctrl+C to stop.\n{'=' * 50}")
last_process_time = time.time()
@@ -620,11 +611,45 @@ Examples:
segment_count += 1
timestamp = datetime.now().strftime("%H:%M:%S")
# Display transcription immediately (don't wait for LLM)
print(f"[{timestamp}] {text}")
# Sentence extraction mode
if sentence_extractor:
# Add chunk to extractor and get complete sentences
sentences = sentence_extractor.add_chunk(text)
# LLM Analysis (run concurrently in background)
if llm_analyzer:
for sentence in sentences:
# Clean the sentence
cleaned = sentence_cleaner.clean(sentence) if sentence_cleaner else sentence
if cleaned:
print(f"[{timestamp}] 📝 {cleaned}")
# Save individual sentences
if args.output and not llm_analyzer:
save_transcript(cleaned, timestamp, args.output)
# LLM analysis on complete sentences
if llm_analyzer:
context = f"Sentence from segment {segment_count}"
def run_llm_analysis(txt, ctx, ts, seg_num):
fc = llm_analyzer.fact_check(txt, ctx)
qs = llm_analyzer.generate_augmenting_questions(txt, ctx)
return {
'timestamp': ts,
'text': txt,
'segment_count': seg_num,
'fact_check': fc,
'questions': qs
}
future = llm_executor.submit(run_llm_analysis, cleaned, context, timestamp, segment_count)
pending_llm_tasks[segment_count] = future
else:
# Standard mode: display chunks as-is
# Display transcription immediately (don't wait for LLM)
print(f"[{timestamp}] {text}")
# LLM Analysis (run concurrently in background) - only in non-sentence mode
if llm_analyzer and not sentence_extractor:
context = f"Segment {segment_count}"
# Submit LLM tasks to thread pool
@@ -701,6 +726,34 @@ Examples:
# Cleanup
capturer.close()
# Flush sentence buffer if in sentence mode
if sentence_extractor:
print("\n📝 Flushing sentence buffer...")
final_sentences = sentence_extractor.flush()
for sentence in final_sentences:
cleaned = sentence_cleaner.clean(sentence) if sentence_cleaner else sentence
if cleaned:
timestamp = datetime.now().strftime("%H:%M:%S")
print(f"[{timestamp}] 📝 {cleaned}")
if args.output and not llm_analyzer:
save_transcript(cleaned, timestamp, args.output)
# LLM analysis for flushed sentences
if llm_analyzer:
fact_check = llm_analyzer.fact_check(cleaned, "Final sentence")
questions = llm_analyzer.generate_augmenting_questions(cleaned)
display_enriched_output(cleaned, timestamp, fact_check, questions)
if args.output:
data = {
'timestamp': timestamp,
'text': cleaned,
'fact_check': fact_check,
'questions': questions
}
save_enriched_transcript(data, args.output)
# Process remaining audio
print("\nProcessing remaining audio...")
final_text = transcriber.transcribe_chunk(min_duration=0)