clean up code

2025-12-13 13:57:13 +01:00
parent f6aa2b7b76
commit 1583df8f57
9 changed files with 622 additions and 138 deletions
--- a/app/main.py
+++ b/app/main.py
@@ -894,137 +894,3 @@ class DiskReorganizer:
            cursor.close()
            conn.close()

-    def review_migration(self, category: Optional[str]=None, show_build: bool=False):
-        from classification.classifier import FileClassifier
-        classifier = FileClassifier()
-        conn = self.get_connection()
-        cursor = conn.cursor()
-        try:
-            query = 'SELECT path, size, category FROM files WHERE 1=1'
-            params = []
-            if category:
-                query += ' AND category = %s'
-                params.append(category)
-            if not show_build:
-                query += " AND (metadata->>'labels' IS NULL OR metadata->>'labels' NOT LIKE '%build-artifact%')"
-            query += ' ORDER BY category, size DESC LIMIT 100'
-            cursor.execute(query, params)
-            files = cursor.fetchall()
-            if not files:
-                print('No files found matching criteria')
-                return
-            print(f'\n=== MIGRATION PREVIEW ===')
-            print(f'Showing {len(files)} files\n')
-            current_category = None
-            for path, size, cat in files:
-                if cat != current_category:
-                    current_category = cat
-                    print(f'\n{cat}:')
-                labels, suggested_cat, is_build = classifier.classify_path(path, int(size))
-                target = classifier.suggest_target_path(path, suggested_cat, labels)
-                print(f'  {path}')
-                print(f'    → {target} ({self.format_size(int(size))})')
-        finally:
-            cursor.close()
-            conn.close()
-
-    @staticmethod
-    def format_size(size: int) -> str:
-        for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
-            if size < 1024:
-                return f'{size:.1f}{unit}'
-            size /= 1024
-        return f'{size:.1f}PB'
-
-def main():
-    parser = argparse.ArgumentParser(description='Disk Reorganizer - Free up a disk for Linux dual-boot')
-    subparsers = parser.add_subparsers(dest='command', required=True)
-    index_parser = subparsers.add_parser('index', help='Index files on a disk')
-    index_parser.add_argument('disk_root', help='Root path of disk (e.g., D:\\\\)')
-    index_parser.add_argument('disk_name', help='Logical name for the disk')
-    plan_parser = subparsers.add_parser('plan', help='Create migration plan')
-    plan_parser.add_argument('target_disk', help='Disk to free up')
-    plan_parser.add_argument('dest_disks', nargs='+', help='Destination disks')
-    exec_parser = subparsers.add_parser('execute', help='Execute migration plan')
-    exec_parser.add_argument('plan_file', help='Path to plan JSON file')
-    exec_parser.add_argument('--dry-run', action='store_true', help='Simulate without actual file operations')
-    dedupe_parser = subparsers.add_parser('dedupe', help='Deduplicate files and compute checksums')
-    dedupe_parser.add_argument('--disk', help='Optional: Only dedupe specific disk')
-    dedupe_parser.add_argument('--no-chunks', action='store_true', help='Disable chunk-level deduplication')
-    merge_parser = subparsers.add_parser('merge', help='Plan multi-disk merge with deduplication')
-    merge_parser.add_argument('--sources', nargs='+', required=True, help='Source disks to merge')
-    merge_parser.add_argument('--target', required=True, help='Target disk')
-    merge_parser.add_argument('--output', default='merge_plan.json', help='Output plan file')
-    merge_parser.add_argument('--filter-system', action='store_true', help='Filter system/build files')
-    merge_parser.add_argument('--network', help='Network target (e.g., user@host:/path)')
-    profile_parser = subparsers.add_parser('profile', help='Create content profiles (inventory + triage)')
-    profile_parser.add_argument('--disk', help='Profile specific disk')
-    profile_parser.add_argument('--update', action='store_true', help='Update database with profiles')
-    profile_parser.add_argument('--limit', type=int, help='Limit number of files')
-    extract_parser = subparsers.add_parser('extract', help='Extract content from files')
-    extract_parser.add_argument('--kind', help='Extract specific kind (pdf, image, audio, video)')
-    extract_parser.add_argument('--limit', type=int, default=10, help='Limit extraction batch')
-
-    parse_parser = subparsers.add_parser('parse', help='Parse files to extract text')
-    parse_parser.add_argument('--kind', help='Parse specific kind (text, code, pdf)')
-    parse_parser.add_argument('--limit', type=int, default=100, help='Limit parse batch')
-    parse_parser.add_argument('--update', action='store_true', help='Save extracted text to database')
-
-    enrich_parser = subparsers.add_parser('enrich', help='Enrich content with LLM analysis')
-    enrich_parser.add_argument('--limit', type=int, default=10, help='Limit enrichment batch')
-    enrich_parser.add_argument('--use-llm', action='store_true', help='Use LLM for summarization')
-    enrich_parser.add_argument('--network', action='store_true', help='Use network LM_STUDIO instead of local OLLAMA')
-
-    classify_parser = subparsers.add_parser('classify', help='Classify files and suggest organization')
-    classify_parser.add_argument('--disk', help='Classify specific disk')
-    classify_parser.add_argument('--update', action='store_true', help='Update database with classifications')
-    classify_parser.add_argument('--no-resume', action='store_true', help='Start from scratch instead of resuming')
-    folders_parser = subparsers.add_parser('analyze-folders', help='Analyze folder structure and infer project intent')
-    folders_parser.add_argument('--disk', help='Analyze specific disk')
-    folders_parser.add_argument('--min-files', type=int, default=3, help='Minimum files per folder')
-    search_parser = subparsers.add_parser('search', help='Search indexed content')
-    search_parser.add_argument('query', help='Search query')
-    search_parser.add_argument('--type', choices=['text', 'enrichment', 'path'], default='enrichment', help='Search type')
-    search_parser.add_argument('--limit', type=int, default=20, help='Max results')
-    review_parser = subparsers.add_parser('review', help='Review proposed migration structure')
-    review_parser.add_argument('--category', help='Review specific category')
-    review_parser.add_argument('--show-build', action='store_true', help='Include build artifacts')
-    report_parser = subparsers.add_parser('report', help='Show current status')
-    report_parser.add_argument('--format', choices=['text', 'json'], default='text', help='Report format')
-    report_parser.add_argument('--show-duplicates', action='store_true', help='Show duplicate files')
-    report_parser.add_argument('--preview-merge', help='Preview merge plan from file')
-    args = parser.parse_args()
-    tool = DiskReorganizer()
-    if args.command == 'index':
-        tool.index_disk(args.disk_root, args.disk_name)
-    elif args.command == 'dedupe':
-        tool.run_deduplication(disk=args.disk, use_chunks=not args.no_chunks)
-    elif args.command == 'merge':
-        tool.plan_merge(sources=args.sources, target=args.target, output_file=args.output, filter_system=args.filter_system, network_target=args.network)
-    elif args.command == 'plan':
-        plan = tool.plan_migration(args.target_disk, args.dest_disks)
-        if plan:
-            print(f"\nPlan generated: {plan['file_count']} files, {tool.format_size(plan['total_size'])}")
-            print(f"Destination disks: {', '.join(plan['destination_disks'])}")
-    elif args.command == 'execute':
-        tool.execute_migration(args.plan_file, dry_run=args.dry_run)
-    elif args.command == 'profile':
-        tool.profile_content(disk=args.disk, update_db=args.update, limit=args.limit)
-    elif args.command == 'extract':
-        tool.extract_content(kind=args.kind, limit=args.limit)
-    elif args.command == 'parse':
-        tool.parse_files(kind=args.kind, limit=args.limit, update_db=args.update)
-    elif args.command == 'enrich':
-        tool.enrich_files(limit=args.limit, use_llm=args.use_llm, use_local=not args.network)
-    elif args.command == 'classify':
-        tool.classify_files(disk=args.disk, update_db=args.update, resume=not args.no_resume)
-    elif args.command == 'analyze-folders':
-        tool.analyze_folders(disk=args.disk, min_files=args.min_files)
-    elif args.command == 'search':
-        tool.search_content(query=args.query, limit=args.limit, search_type=args.type)
-    elif args.command == 'review':
-        tool.review_migration(category=args.category, show_build=args.show_build)
-    elif args.command == 'report':
-        tool.generate_report(format=args.format, show_duplicates=args.show_duplicates, preview_merge=args.preview_merge)
-if __name__ == '__main__':
-    main()