Files
defrag/app/parsers/code_parser.py
2025-12-13 11:56:06 +01:00

45 lines
1.7 KiB
Python

from pathlib import Path
from typing import Dict
import re
class CodeParser:
def __init__(self):
self.patterns = {
'python': {'imports': r'^import |^from .+ import', 'class': r'^class \w+', 'function': r'^def \w+'},
'javascript': {'imports': r'^import |^require\(', 'class': r'^class \w+', 'function': r'^function \w+|^const \w+ = '},
'java': {'package': r'^package ', 'imports': r'^import ', 'class': r'^public class \w+'},
'go': {'package': r'^package ', 'imports': r'^import ', 'function': r'^func \w+'}
}
def parse(self, file_path: Path) -> Dict:
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
text = f.read()
language = self._detect_language(file_path, text)
structure = self._extract_structure(text, language)
return {
'text': text,
'language': language,
'line_count': len(text.split('\n')),
'structure': structure,
'quality': 'high'
}
except Exception as e:
return {'error': str(e)}
def _detect_language(self, file_path: Path, text: str) -> str:
lang_map = {'.py': 'python', '.js': 'javascript', '.ts': 'typescript', '.java': 'java', '.go': 'go'}
return lang_map.get(file_path.suffix.lower(), 'unknown')
def _extract_structure(self, text: str, language: str) -> Dict:
patterns = self.patterns.get(language, {})
structure = {'type': 'code', 'language': language}
for key, pattern in patterns.items():
matches = re.findall(pattern, text, re.MULTILINE)
structure[key] = len(matches)
return structure