import os import subprocess import tempfile from pathlib import Path from typing import Dict, Optional import logging logger = logging.getLogger(__name__) class TranscriptionParser: def __init__(self, model: str = 'base'): self.model = model self.whisper_available = self._check_whisper() def _check_whisper(self) -> bool: try: import whisper return True except ImportError: logger.warning('Whisper not installed. Install with: pip install openai-whisper') return False def parse(self, file_path: Path) -> Dict: if not self.whisper_available: return {'success': False, 'error': 'Whisper not available', 'text': ''} if not self._is_supported(file_path): return {'success': False, 'error': 'Unsupported file type', 'text': ''} try: import whisper logger.info(f'Transcribing {file_path} with Whisper model={self.model}') model = whisper.load_model(self.model) result = model.transcribe(str(file_path)) return { 'success': True, 'text': result['text'], 'segments': result.get('segments', []), 'language': result.get('language', 'unknown') } except Exception as e: logger.error(f'Transcription failed for {file_path}: {e}') return {'success': False, 'error': str(e), 'text': ''} def _is_supported(self, file_path: Path) -> bool: supported = {'.mp3', '.mp4', '.wav', '.m4a', '.flac', '.ogg', '.avi', '.mkv', '.webm'} return file_path.suffix.lower() in supported def parse_with_timestamps(self, file_path: Path) -> Dict: result = self.parse(file_path) if not result['success']: return result segments = result.get('segments', []) timestamped_text = [] for seg in segments: start = seg.get('start', 0) end = seg.get('end', 0) text = seg.get('text', '').strip() timestamped_text.append(f'[{start:.2f}s - {end:.2f}s] {text}') result['timestamped_text'] = '\n'.join(timestamped_text) return result