64 lines
3.8 KiB
Python
64 lines
3.8 KiB
Python
from pathlib import Path
|
|
from typing import Dict, Set, List
|
|
from collections import Counter
|
|
|
|
class FolderAnalyzer:
|
|
|
|
def __init__(self):
|
|
self.manifest_files = {'java': ['pom.xml', 'build.gradle', 'build.gradle.kts'], 'javascript': ['package.json', 'yarn.lock', 'package-lock.json'], 'python': ['pyproject.toml', 'setup.py', 'requirements.txt', 'Pipfile'], 'go': ['go.mod', 'go.sum'], 'rust': ['Cargo.toml', 'Cargo.lock'], 'docker': ['Dockerfile', 'docker-compose.yml', 'docker-compose.yaml'], 'k8s': ['helm', 'kustomization.yaml', 'deployment.yaml']}
|
|
self.intent_keywords = {'infrastructure': ['infra', 'deploy', 'k8s', 'docker', 'terraform', 'ansible'], 'application': ['app', 'service', 'api', 'server', 'client'], 'data': ['data', 'dataset', 'models', 'training', 'ml'], 'documentation': ['docs', 'documentation', 'wiki', 'readme'], 'testing': ['test', 'tests', 'spec', 'e2e', 'integration'], 'build': ['build', 'dist', 'target', 'out', 'bin'], 'config': ['config', 'conf', 'settings', 'env']}
|
|
|
|
def analyze_folder(self, folder_path: Path, files: List[Dict]) -> Dict:
|
|
files_list = [Path(f['path']) for f in files]
|
|
has_readme = any(('readme' in f.name.lower() for f in files_list))
|
|
has_git = any(('.git' in str(f) for f in files_list))
|
|
manifest_types = self._detect_manifests(files_list)
|
|
has_manifest = len(manifest_types) > 0
|
|
file_types = Counter((f.suffix.lower() for f in files_list if f.suffix))
|
|
dominant_types = dict(file_types.most_common(10))
|
|
intent = self._infer_intent(folder_path.name.lower(), files_list)
|
|
project_type = self._infer_project_type(manifest_types, dominant_types)
|
|
structure = {'depth': len(folder_path.parts), 'has_src': any(('src' in str(f) for f in files_list[:20])), 'has_tests': any(('test' in str(f) for f in files_list[:20])), 'has_docs': any(('doc' in str(f) for f in files_list[:20]))}
|
|
return {'has_readme': has_readme, 'has_git': has_git, 'has_manifest': has_manifest, 'manifest_types': manifest_types, 'dominant_file_types': dominant_types, 'project_type': project_type, 'intent': intent, 'structure': structure}
|
|
|
|
def _detect_manifests(self, files: List[Path]) -> List[str]:
|
|
detected = []
|
|
file_names = {f.name for f in files}
|
|
for tech, manifests in self.manifest_files.items():
|
|
if any((m in file_names for m in manifests)):
|
|
detected.append(tech)
|
|
return detected
|
|
|
|
def _infer_intent(self, folder_name: str, files: List[Path]) -> str:
|
|
file_str = ' '.join((str(f) for f in files[:50]))
|
|
for intent, keywords in self.intent_keywords.items():
|
|
if any((kw in folder_name or kw in file_str.lower() for kw in keywords)):
|
|
return intent
|
|
return 'unknown'
|
|
|
|
def _infer_project_type(self, manifests: List[str], file_types: Dict) -> str:
|
|
if manifests:
|
|
return manifests[0]
|
|
if '.py' in file_types and file_types.get('.py', 0) > 5:
|
|
return 'python'
|
|
if '.js' in file_types or '.ts' in file_types:
|
|
return 'javascript'
|
|
if '.java' in file_types:
|
|
return 'java'
|
|
if '.go' in file_types:
|
|
return 'go'
|
|
return 'mixed'
|
|
|
|
def generate_summary(self, folder_analysis: Dict, readme_text: str=None) -> str:
|
|
parts = []
|
|
if folder_analysis.get('project_type'):
|
|
parts.append(f"{folder_analysis['project_type']} project")
|
|
if folder_analysis.get('intent'):
|
|
parts.append(f"for {folder_analysis['intent']}")
|
|
if folder_analysis.get('manifest_types'):
|
|
parts.append(f"using {', '.join(folder_analysis['manifest_types'])}")
|
|
if readme_text:
|
|
first_para = readme_text.split('\n\n')[0][:200]
|
|
parts.append(f'Description: {first_para}')
|
|
return ' '.join(parts) if parts else 'Mixed content folder'
|