initial
This commit is contained in:
63
app/analysis/folder_analyzer.py
Normal file
63
app/analysis/folder_analyzer.py
Normal file
@@ -0,0 +1,63 @@
|
||||
from pathlib import Path
|
||||
from typing import Dict, Set, List
|
||||
from collections import Counter
|
||||
|
||||
class FolderAnalyzer:
|
||||
|
||||
def __init__(self):
|
||||
self.manifest_files = {'java': ['pom.xml', 'build.gradle', 'build.gradle.kts'], 'javascript': ['package.json', 'yarn.lock', 'package-lock.json'], 'python': ['pyproject.toml', 'setup.py', 'requirements.txt', 'Pipfile'], 'go': ['go.mod', 'go.sum'], 'rust': ['Cargo.toml', 'Cargo.lock'], 'docker': ['Dockerfile', 'docker-compose.yml', 'docker-compose.yaml'], 'k8s': ['helm', 'kustomization.yaml', 'deployment.yaml']}
|
||||
self.intent_keywords = {'infrastructure': ['infra', 'deploy', 'k8s', 'docker', 'terraform', 'ansible'], 'application': ['app', 'service', 'api', 'server', 'client'], 'data': ['data', 'dataset', 'models', 'training', 'ml'], 'documentation': ['docs', 'documentation', 'wiki', 'readme'], 'testing': ['test', 'tests', 'spec', 'e2e', 'integration'], 'build': ['build', 'dist', 'target', 'out', 'bin'], 'config': ['config', 'conf', 'settings', 'env']}
|
||||
|
||||
def analyze_folder(self, folder_path: Path, files: List[Dict]) -> Dict:
|
||||
files_list = [Path(f['path']) for f in files]
|
||||
has_readme = any(('readme' in f.name.lower() for f in files_list))
|
||||
has_git = any(('.git' in str(f) for f in files_list))
|
||||
manifest_types = self._detect_manifests(files_list)
|
||||
has_manifest = len(manifest_types) > 0
|
||||
file_types = Counter((f.suffix.lower() for f in files_list if f.suffix))
|
||||
dominant_types = dict(file_types.most_common(10))
|
||||
intent = self._infer_intent(folder_path.name.lower(), files_list)
|
||||
project_type = self._infer_project_type(manifest_types, dominant_types)
|
||||
structure = {'depth': len(folder_path.parts), 'has_src': any(('src' in str(f) for f in files_list[:20])), 'has_tests': any(('test' in str(f) for f in files_list[:20])), 'has_docs': any(('doc' in str(f) for f in files_list[:20]))}
|
||||
return {'has_readme': has_readme, 'has_git': has_git, 'has_manifest': has_manifest, 'manifest_types': manifest_types, 'dominant_file_types': dominant_types, 'project_type': project_type, 'intent': intent, 'structure': structure}
|
||||
|
||||
def _detect_manifests(self, files: List[Path]) -> List[str]:
|
||||
detected = []
|
||||
file_names = {f.name for f in files}
|
||||
for tech, manifests in self.manifest_files.items():
|
||||
if any((m in file_names for m in manifests)):
|
||||
detected.append(tech)
|
||||
return detected
|
||||
|
||||
def _infer_intent(self, folder_name: str, files: List[Path]) -> str:
|
||||
file_str = ' '.join((str(f) for f in files[:50]))
|
||||
for intent, keywords in self.intent_keywords.items():
|
||||
if any((kw in folder_name or kw in file_str.lower() for kw in keywords)):
|
||||
return intent
|
||||
return 'unknown'
|
||||
|
||||
def _infer_project_type(self, manifests: List[str], file_types: Dict) -> str:
|
||||
if manifests:
|
||||
return manifests[0]
|
||||
if '.py' in file_types and file_types.get('.py', 0) > 5:
|
||||
return 'python'
|
||||
if '.js' in file_types or '.ts' in file_types:
|
||||
return 'javascript'
|
||||
if '.java' in file_types:
|
||||
return 'java'
|
||||
if '.go' in file_types:
|
||||
return 'go'
|
||||
return 'mixed'
|
||||
|
||||
def generate_summary(self, folder_analysis: Dict, readme_text: str=None) -> str:
|
||||
parts = []
|
||||
if folder_analysis.get('project_type'):
|
||||
parts.append(f"{folder_analysis['project_type']} project")
|
||||
if folder_analysis.get('intent'):
|
||||
parts.append(f"for {folder_analysis['intent']}")
|
||||
if folder_analysis.get('manifest_types'):
|
||||
parts.append(f"using {', '.join(folder_analysis['manifest_types'])}")
|
||||
if readme_text:
|
||||
first_para = readme_text.split('\n\n')[0][:200]
|
||||
parts.append(f'Description: {first_para}')
|
||||
return ' '.join(parts) if parts else 'Mixed content folder'
|
||||
Reference in New Issue
Block a user