clean up code
This commit is contained in:
62
app/parsers/audio_parser.py
Normal file
62
app/parsers/audio_parser.py
Normal file
@@ -0,0 +1,62 @@
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class AudioParser:
|
||||
def __init__(self, whisper_model: str = 'base'):
|
||||
self.supported_formats = {'.mp3', '.wav', '.flac', '.m4a', '.ogg', '.wma', '.aac'}
|
||||
self.whisper_model = whisper_model
|
||||
|
||||
def parse(self, file_path: Path) -> Dict:
|
||||
if file_path.suffix.lower() not in self.supported_formats:
|
||||
return {'error': f'Unsupported format: {file_path.suffix}'}
|
||||
|
||||
try:
|
||||
return self._transcribe_with_whisper(file_path)
|
||||
except Exception as e:
|
||||
logger.error(f'Audio parse failed for {file_path}: {e}')
|
||||
return {'error': str(e), 'text': ''}
|
||||
|
||||
def _transcribe_with_whisper(self, file_path: Path) -> Dict:
|
||||
try:
|
||||
import whisper
|
||||
|
||||
model = whisper.load_model(self.whisper_model)
|
||||
result = model.transcribe(str(file_path))
|
||||
|
||||
return {
|
||||
'text': result['text'].strip(),
|
||||
'quality': 'good',
|
||||
'method': f'whisper-{self.whisper_model}',
|
||||
'language': result.get('language', 'unknown'),
|
||||
'segments': len(result.get('segments', [])),
|
||||
'metadata': {
|
||||
'duration': result.get('duration'),
|
||||
'language': result.get('language')
|
||||
}
|
||||
}
|
||||
except ImportError:
|
||||
logger.warning('Whisper not installed')
|
||||
return {'error': 'Whisper not installed', 'text': '', 'needs': 'pip install openai-whisper'}
|
||||
except Exception as e:
|
||||
return {'error': str(e), 'text': ''}
|
||||
|
||||
def extract_metadata(self, file_path: Path) -> Dict:
|
||||
try:
|
||||
import mutagen
|
||||
audio = mutagen.File(str(file_path))
|
||||
if audio is None:
|
||||
return {'error': 'Could not read audio file'}
|
||||
|
||||
return {
|
||||
'duration': audio.info.length if hasattr(audio.info, 'length') else None,
|
||||
'bitrate': audio.info.bitrate if hasattr(audio.info, 'bitrate') else None,
|
||||
'sample_rate': audio.info.sample_rate if hasattr(audio.info, 'sample_rate') else None,
|
||||
'channels': audio.info.channels if hasattr(audio.info, 'channels') else None
|
||||
}
|
||||
except ImportError:
|
||||
return {'error': 'mutagen not installed', 'needs': 'pip install mutagen'}
|
||||
except Exception as e:
|
||||
return {'error': str(e)}
|
||||
Reference in New Issue
Block a user