initial
This commit is contained in:
44
app/parsers/code_parser.py
Normal file
44
app/parsers/code_parser.py
Normal file
@@ -0,0 +1,44 @@
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
import re
|
||||
|
||||
class CodeParser:
|
||||
def __init__(self):
|
||||
self.patterns = {
|
||||
'python': {'imports': r'^import |^from .+ import', 'class': r'^class \w+', 'function': r'^def \w+'},
|
||||
'javascript': {'imports': r'^import |^require\(', 'class': r'^class \w+', 'function': r'^function \w+|^const \w+ = '},
|
||||
'java': {'package': r'^package ', 'imports': r'^import ', 'class': r'^public class \w+'},
|
||||
'go': {'package': r'^package ', 'imports': r'^import ', 'function': r'^func \w+'}
|
||||
}
|
||||
|
||||
def parse(self, file_path: Path) -> Dict:
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
text = f.read()
|
||||
|
||||
language = self._detect_language(file_path, text)
|
||||
structure = self._extract_structure(text, language)
|
||||
|
||||
return {
|
||||
'text': text,
|
||||
'language': language,
|
||||
'line_count': len(text.split('\n')),
|
||||
'structure': structure,
|
||||
'quality': 'high'
|
||||
}
|
||||
except Exception as e:
|
||||
return {'error': str(e)}
|
||||
|
||||
def _detect_language(self, file_path: Path, text: str) -> str:
|
||||
lang_map = {'.py': 'python', '.js': 'javascript', '.ts': 'typescript', '.java': 'java', '.go': 'go'}
|
||||
return lang_map.get(file_path.suffix.lower(), 'unknown')
|
||||
|
||||
def _extract_structure(self, text: str, language: str) -> Dict:
|
||||
patterns = self.patterns.get(language, {})
|
||||
structure = {'type': 'code', 'language': language}
|
||||
|
||||
for key, pattern in patterns.items():
|
||||
matches = re.findall(pattern, text, re.MULTILINE)
|
||||
structure[key] = len(matches)
|
||||
|
||||
return structure
|
||||
Reference in New Issue
Block a user