from pathlib import Path from typing import Dict import logging logger = logging.getLogger(__name__) class AudioParser: def __init__(self, whisper_model: str = 'base'): self.supported_formats = {'.mp3', '.wav', '.flac', '.m4a', '.ogg', '.wma', '.aac'} self.whisper_model = whisper_model def parse(self, file_path: Path) -> Dict: if file_path.suffix.lower() not in self.supported_formats: return {'error': f'Unsupported format: {file_path.suffix}'} try: return self._transcribe_with_whisper(file_path) except Exception as e: logger.error(f'Audio parse failed for {file_path}: {e}') return {'error': str(e), 'text': ''} def _transcribe_with_whisper(self, file_path: Path) -> Dict: try: import whisper model = whisper.load_model(self.whisper_model) result = model.transcribe(str(file_path)) return { 'text': result['text'].strip(), 'quality': 'good', 'method': f'whisper-{self.whisper_model}', 'language': result.get('language', 'unknown'), 'segments': len(result.get('segments', [])), 'metadata': { 'duration': result.get('duration'), 'language': result.get('language') } } except ImportError: logger.warning('Whisper not installed') return {'error': 'Whisper not installed', 'text': '', 'needs': 'pip install openai-whisper'} except Exception as e: return {'error': str(e), 'text': ''} def extract_metadata(self, file_path: Path) -> Dict: try: import mutagen audio = mutagen.File(str(file_path)) if audio is None: return {'error': 'Could not read audio file'} return { 'duration': audio.info.length if hasattr(audio.info, 'length') else None, 'bitrate': audio.info.bitrate if hasattr(audio.info, 'bitrate') else None, 'sample_rate': audio.info.sample_rate if hasattr(audio.info, 'sample_rate') else None, 'channels': audio.info.channels if hasattr(audio.info, 'channels') else None } except ImportError: return {'error': 'mutagen not installed', 'needs': 'pip install mutagen'} except Exception as e: return {'error': str(e)}