45 lines
1.7 KiB
Python
45 lines
1.7 KiB
Python
from pathlib import Path
|
|
from typing import Dict
|
|
import re
|
|
|
|
class CodeParser:
|
|
def __init__(self):
|
|
self.patterns = {
|
|
'python': {'imports': r'^import |^from .+ import', 'class': r'^class \w+', 'function': r'^def \w+'},
|
|
'javascript': {'imports': r'^import |^require\(', 'class': r'^class \w+', 'function': r'^function \w+|^const \w+ = '},
|
|
'java': {'package': r'^package ', 'imports': r'^import ', 'class': r'^public class \w+'},
|
|
'go': {'package': r'^package ', 'imports': r'^import ', 'function': r'^func \w+'}
|
|
}
|
|
|
|
def parse(self, file_path: Path) -> Dict:
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
|
text = f.read()
|
|
|
|
language = self._detect_language(file_path, text)
|
|
structure = self._extract_structure(text, language)
|
|
|
|
return {
|
|
'text': text,
|
|
'language': language,
|
|
'line_count': len(text.split('\n')),
|
|
'structure': structure,
|
|
'quality': 'high'
|
|
}
|
|
except Exception as e:
|
|
return {'error': str(e)}
|
|
|
|
def _detect_language(self, file_path: Path, text: str) -> str:
|
|
lang_map = {'.py': 'python', '.js': 'javascript', '.ts': 'typescript', '.java': 'java', '.go': 'go'}
|
|
return lang_map.get(file_path.suffix.lower(), 'unknown')
|
|
|
|
def _extract_structure(self, text: str, language: str) -> Dict:
|
|
patterns = self.patterns.get(language, {})
|
|
structure = {'type': 'code', 'language': language}
|
|
|
|
for key, pattern in patterns.items():
|
|
matches = re.findall(pattern, text, re.MULTILINE)
|
|
structure[key] = len(matches)
|
|
|
|
return structure
|