from pathlib import Path from typing import Dict import re class CodeParser: def __init__(self): self.patterns = { 'python': {'imports': r'^import |^from .+ import', 'class': r'^class \w+', 'function': r'^def \w+'}, 'javascript': {'imports': r'^import |^require\(', 'class': r'^class \w+', 'function': r'^function \w+|^const \w+ = '}, 'java': {'package': r'^package ', 'imports': r'^import ', 'class': r'^public class \w+'}, 'go': {'package': r'^package ', 'imports': r'^import ', 'function': r'^func \w+'} } def parse(self, file_path: Path) -> Dict: try: with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: text = f.read() language = self._detect_language(file_path, text) structure = self._extract_structure(text, language) return { 'text': text, 'language': language, 'line_count': len(text.split('\n')), 'structure': structure, 'quality': 'high' } except Exception as e: return {'error': str(e)} def _detect_language(self, file_path: Path, text: str) -> str: lang_map = {'.py': 'python', '.js': 'javascript', '.ts': 'typescript', '.java': 'java', '.go': 'go'} return lang_map.get(file_path.suffix.lower(), 'unknown') def _extract_structure(self, text: str, language: str) -> Dict: patterns = self.patterns.get(language, {}) structure = {'type': 'code', 'language': language} for key, pattern in patterns.items(): matches = re.findall(pattern, text, re.MULTILINE) structure[key] = len(matches) return structure