111 lines
4.2 KiB
Python
111 lines
4.2 KiB
Python
from pathlib import Path
|
|
from typing import Dict, Set, List
|
|
from collections import Counter
|
|
|
|
class FolderAnalyzer:
|
|
def __init__(self):
|
|
self.manifest_files = {
|
|
'java': ['pom.xml', 'build.gradle', 'build.gradle.kts'],
|
|
'javascript': ['package.json', 'yarn.lock', 'package-lock.json'],
|
|
'python': ['pyproject.toml', 'setup.py', 'requirements.txt', 'Pipfile'],
|
|
'go': ['go.mod', 'go.sum'],
|
|
'rust': ['Cargo.toml', 'Cargo.lock'],
|
|
'docker': ['Dockerfile', 'docker-compose.yml', 'docker-compose.yaml'],
|
|
'k8s': ['helm', 'kustomization.yaml', 'deployment.yaml']
|
|
}
|
|
|
|
self.intent_keywords = {
|
|
'infrastructure': ['infra', 'deploy', 'k8s', 'docker', 'terraform', 'ansible'],
|
|
'application': ['app', 'service', 'api', 'server', 'client'],
|
|
'data': ['data', 'dataset', 'models', 'training', 'ml'],
|
|
'documentation': ['docs', 'documentation', 'wiki', 'readme'],
|
|
'testing': ['test', 'tests', 'spec', 'e2e', 'integration'],
|
|
'build': ['build', 'dist', 'target', 'out', 'bin'],
|
|
'config': ['config', 'conf', 'settings', 'env']
|
|
}
|
|
|
|
def analyze_folder(self, folder_path: Path, files: List[Dict]) -> Dict:
|
|
files_list = [Path(f['path']) for f in files]
|
|
|
|
has_readme = any('readme' in f.name.lower() for f in files_list)
|
|
has_git = any('.git' in str(f) for f in files_list)
|
|
|
|
manifest_types = self._detect_manifests(files_list)
|
|
has_manifest = len(manifest_types) > 0
|
|
|
|
file_types = Counter(f.suffix.lower() for f in files_list if f.suffix)
|
|
dominant_types = dict(file_types.most_common(10))
|
|
|
|
intent = self._infer_intent(folder_path.name.lower(), files_list)
|
|
project_type = self._infer_project_type(manifest_types, dominant_types)
|
|
|
|
structure = {
|
|
'depth': len(folder_path.parts),
|
|
'has_src': any('src' in str(f) for f in files_list[:20]),
|
|
'has_tests': any('test' in str(f) for f in files_list[:20]),
|
|
'has_docs': any('doc' in str(f) for f in files_list[:20])
|
|
}
|
|
|
|
return {
|
|
'has_readme': has_readme,
|
|
'has_git': has_git,
|
|
'has_manifest': has_manifest,
|
|
'manifest_types': manifest_types,
|
|
'dominant_file_types': dominant_types,
|
|
'project_type': project_type,
|
|
'intent': intent,
|
|
'structure': structure
|
|
}
|
|
|
|
def _detect_manifests(self, files: List[Path]) -> List[str]:
|
|
detected = []
|
|
file_names = {f.name for f in files}
|
|
|
|
for tech, manifests in self.manifest_files.items():
|
|
if any(m in file_names for m in manifests):
|
|
detected.append(tech)
|
|
|
|
return detected
|
|
|
|
def _infer_intent(self, folder_name: str, files: List[Path]) -> str:
|
|
file_str = ' '.join(str(f) for f in files[:50])
|
|
|
|
for intent, keywords in self.intent_keywords.items():
|
|
if any(kw in folder_name or kw in file_str.lower() for kw in keywords):
|
|
return intent
|
|
|
|
return 'unknown'
|
|
|
|
def _infer_project_type(self, manifests: List[str], file_types: Dict) -> str:
|
|
if manifests:
|
|
return manifests[0]
|
|
|
|
if '.py' in file_types and file_types.get('.py', 0) > 5:
|
|
return 'python'
|
|
if '.js' in file_types or '.ts' in file_types:
|
|
return 'javascript'
|
|
if '.java' in file_types:
|
|
return 'java'
|
|
if '.go' in file_types:
|
|
return 'go'
|
|
|
|
return 'mixed'
|
|
|
|
def generate_summary(self, folder_analysis: Dict, readme_text: str = None) -> str:
|
|
parts = []
|
|
|
|
if folder_analysis.get('project_type'):
|
|
parts.append(f"{folder_analysis['project_type']} project")
|
|
|
|
if folder_analysis.get('intent'):
|
|
parts.append(f"for {folder_analysis['intent']}")
|
|
|
|
if folder_analysis.get('manifest_types'):
|
|
parts.append(f"using {', '.join(folder_analysis['manifest_types'])}")
|
|
|
|
if readme_text:
|
|
first_para = readme_text.split('\n\n')[0][:200]
|
|
parts.append(f"Description: {first_para}")
|
|
|
|
return ' '.join(parts) if parts else 'Mixed content folder'
|