This commit is contained in:
mike
2025-12-13 11:35:33 +01:00
parent 9759001f4c
commit e9eb7ea5d9
16 changed files with 899 additions and 216 deletions

View File

@@ -0,0 +1,110 @@
from pathlib import Path
from typing import Dict, Set, List
from collections import Counter
class FolderAnalyzer:
def __init__(self):
self.manifest_files = {
'java': ['pom.xml', 'build.gradle', 'build.gradle.kts'],
'javascript': ['package.json', 'yarn.lock', 'package-lock.json'],
'python': ['pyproject.toml', 'setup.py', 'requirements.txt', 'Pipfile'],
'go': ['go.mod', 'go.sum'],
'rust': ['Cargo.toml', 'Cargo.lock'],
'docker': ['Dockerfile', 'docker-compose.yml', 'docker-compose.yaml'],
'k8s': ['helm', 'kustomization.yaml', 'deployment.yaml']
}
self.intent_keywords = {
'infrastructure': ['infra', 'deploy', 'k8s', 'docker', 'terraform', 'ansible'],
'application': ['app', 'service', 'api', 'server', 'client'],
'data': ['data', 'dataset', 'models', 'training', 'ml'],
'documentation': ['docs', 'documentation', 'wiki', 'readme'],
'testing': ['test', 'tests', 'spec', 'e2e', 'integration'],
'build': ['build', 'dist', 'target', 'out', 'bin'],
'config': ['config', 'conf', 'settings', 'env']
}
def analyze_folder(self, folder_path: Path, files: List[Dict]) -> Dict:
files_list = [Path(f['path']) for f in files]
has_readme = any('readme' in f.name.lower() for f in files_list)
has_git = any('.git' in str(f) for f in files_list)
manifest_types = self._detect_manifests(files_list)
has_manifest = len(manifest_types) > 0
file_types = Counter(f.suffix.lower() for f in files_list if f.suffix)
dominant_types = dict(file_types.most_common(10))
intent = self._infer_intent(folder_path.name.lower(), files_list)
project_type = self._infer_project_type(manifest_types, dominant_types)
structure = {
'depth': len(folder_path.parts),
'has_src': any('src' in str(f) for f in files_list[:20]),
'has_tests': any('test' in str(f) for f in files_list[:20]),
'has_docs': any('doc' in str(f) for f in files_list[:20])
}
return {
'has_readme': has_readme,
'has_git': has_git,
'has_manifest': has_manifest,
'manifest_types': manifest_types,
'dominant_file_types': dominant_types,
'project_type': project_type,
'intent': intent,
'structure': structure
}
def _detect_manifests(self, files: List[Path]) -> List[str]:
detected = []
file_names = {f.name for f in files}
for tech, manifests in self.manifest_files.items():
if any(m in file_names for m in manifests):
detected.append(tech)
return detected
def _infer_intent(self, folder_name: str, files: List[Path]) -> str:
file_str = ' '.join(str(f) for f in files[:50])
for intent, keywords in self.intent_keywords.items():
if any(kw in folder_name or kw in file_str.lower() for kw in keywords):
return intent
return 'unknown'
def _infer_project_type(self, manifests: List[str], file_types: Dict) -> str:
if manifests:
return manifests[0]
if '.py' in file_types and file_types.get('.py', 0) > 5:
return 'python'
if '.js' in file_types or '.ts' in file_types:
return 'javascript'
if '.java' in file_types:
return 'java'
if '.go' in file_types:
return 'go'
return 'mixed'
def generate_summary(self, folder_analysis: Dict, readme_text: str = None) -> str:
parts = []
if folder_analysis.get('project_type'):
parts.append(f"{folder_analysis['project_type']} project")
if folder_analysis.get('intent'):
parts.append(f"for {folder_analysis['intent']}")
if folder_analysis.get('manifest_types'):
parts.append(f"using {', '.join(folder_analysis['manifest_types'])}")
if readme_text:
first_para = readme_text.split('\n\n')[0][:200]
parts.append(f"Description: {first_para}")
return ' '.join(parts) if parts else 'Mixed content folder'