66 lines
2.2 KiB
Python
66 lines
2.2 KiB
Python
import os
|
|
import subprocess
|
|
import tempfile
|
|
from pathlib import Path
|
|
from typing import Dict, Optional
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class TranscriptionParser:
|
|
def __init__(self, model: str = 'base'):
|
|
self.model = model
|
|
self.whisper_available = self._check_whisper()
|
|
|
|
def _check_whisper(self) -> bool:
|
|
try:
|
|
import whisper
|
|
return True
|
|
except ImportError:
|
|
logger.warning('Whisper not installed. Install with: pip install openai-whisper')
|
|
return False
|
|
|
|
def parse(self, file_path: Path) -> Dict:
|
|
if not self.whisper_available:
|
|
return {'success': False, 'error': 'Whisper not available', 'text': ''}
|
|
|
|
if not self._is_supported(file_path):
|
|
return {'success': False, 'error': 'Unsupported file type', 'text': ''}
|
|
|
|
try:
|
|
import whisper
|
|
logger.info(f'Transcribing {file_path} with Whisper model={self.model}')
|
|
|
|
model = whisper.load_model(self.model)
|
|
result = model.transcribe(str(file_path))
|
|
|
|
return {
|
|
'success': True,
|
|
'text': result['text'],
|
|
'segments': result.get('segments', []),
|
|
'language': result.get('language', 'unknown')
|
|
}
|
|
except Exception as e:
|
|
logger.error(f'Transcription failed for {file_path}: {e}')
|
|
return {'success': False, 'error': str(e), 'text': ''}
|
|
|
|
def _is_supported(self, file_path: Path) -> bool:
|
|
supported = {'.mp3', '.mp4', '.wav', '.m4a', '.flac', '.ogg', '.avi', '.mkv', '.webm'}
|
|
return file_path.suffix.lower() in supported
|
|
|
|
def parse_with_timestamps(self, file_path: Path) -> Dict:
|
|
result = self.parse(file_path)
|
|
if not result['success']:
|
|
return result
|
|
|
|
segments = result.get('segments', [])
|
|
timestamped_text = []
|
|
for seg in segments:
|
|
start = seg.get('start', 0)
|
|
end = seg.get('end', 0)
|
|
text = seg.get('text', '').strip()
|
|
timestamped_text.append(f'[{start:.2f}s - {end:.2f}s] {text}')
|
|
|
|
result['timestamped_text'] = '\n'.join(timestamped_text)
|
|
return result
|