defrag/app/parsers/audio_parser.py

from pathlib import Path
from typing import Dict
import logging

logger = logging.getLogger(__name__)

class AudioParser:
    def __init__(self, whisper_model: str = 'base'):
        self.supported_formats = {'.mp3', '.wav', '.flac', '.m4a', '.ogg', '.wma', '.aac'}
        self.whisper_model = whisper_model

    def parse(self, file_path: Path) -> Dict:
        if file_path.suffix.lower() not in self.supported_formats:
            return {'error': f'Unsupported format: {file_path.suffix}'}

        try:
            return self._transcribe_with_whisper(file_path)
        except Exception as e:
            logger.error(f'Audio parse failed for {file_path}: {e}')
            return {'error': str(e), 'text': ''}

    def _transcribe_with_whisper(self, file_path: Path) -> Dict:
        try:
            import whisper

            model = whisper.load_model(self.whisper_model)
            result = model.transcribe(str(file_path))

            return {
                'text': result['text'].strip(),
                'quality': 'good',
                'method': f'whisper-{self.whisper_model}',
                'language': result.get('language', 'unknown'),
                'segments': len(result.get('segments', [])),
                'metadata': {
                    'duration': result.get('duration'),
                    'language': result.get('language')
                }
            }
        except ImportError:
            logger.warning('Whisper not installed')
            return {'error': 'Whisper not installed', 'text': '', 'needs': 'pip install openai-whisper'}
        except Exception as e:
            return {'error': str(e), 'text': ''}

    def extract_metadata(self, file_path: Path) -> Dict:
        try:
            import mutagen
            audio = mutagen.File(str(file_path))
            if audio is None:
                return {'error': 'Could not read audio file'}

            return {
                'duration': audio.info.length if hasattr(audio.info, 'length') else None,
                'bitrate': audio.info.bitrate if hasattr(audio.info, 'bitrate') else None,
                'sample_rate': audio.info.sample_rate if hasattr(audio.info, 'sample_rate') else None,
                'channels': audio.info.channels if hasattr(audio.info, 'channels') else None
            }
        except ImportError:
            return {'error': 'mutagen not installed', 'needs': 'pip install mutagen'}
        except Exception as e:
            return {'error': str(e)}