diff --git a/app/filters/__init__.py b/app/filters/__init__.py
new file mode 100644
index 0000000..edaa883
--- /dev/null
+++ b/app/filters/__init__.py
@@ -0,0 +1,3 @@
+from .gitignore import GitignoreFilter, DEFAULT_PATTERNS
+
+__all__ = ['GitignoreFilter', 'DEFAULT_PATTERNS']
diff --git a/app/main.py b/app/main.py
index 81be092..82d937f 100644
--- a/app/main.py
+++ b/app/main.py
@@ -451,35 +451,77 @@ class DiskReorganizer:
 
             processed = 0
             skipped = 0
-            for path_str, size, disk_label in files_to_process:
+            start_time = time.time()
+            batch = []
+
+            print(f"Phase 1: Computing checksums...")
+
+            for idx, (path_str, size, disk_label) in enumerate(files_to_process, 1):
                 try:
                     mount_point = disk_mount_map.get(disk_label, disk_label)
                     full_path = Path(mount_point) / path_str if not Path(path_str).is_absolute() else Path(path_str)
 
                     if not full_path.exists():
                         skipped += 1
+                        if idx % 100 == 0:
+                            elapsed = time.time() - start_time
+                            rate = (processed + skipped) / elapsed if elapsed > 0 else 0
+                            remaining = (total - idx) / rate if rate > 0 else 0
+                            pct = 100 * idx / total
+                            print(f"\r[{pct:5.1f}%] {processed:,}/{total:,} | {rate:.0f}/s | ETA: {int(remaining/60)}m{int(remaining%60):02d}s | Skip: {skipped:,}", end='', flush=True)
                         continue
 
                     checksum = hash_file_local(full_path)
-
-                    cursor.execute("SELECT path FROM files WHERE checksum = %s AND path != %s LIMIT 1", (checksum, path_str))
-                    dup_row = cursor.fetchone()
-                    duplicate_of = dup_row[0] if dup_row else None
-
-                    cursor.execute("UPDATE files SET checksum = %s, duplicate_of = %s WHERE path = %s", (checksum, duplicate_of, path_str))
+                    batch.append((checksum, path_str))
 
                     processed += 1
-                    if processed % 100 == 0:
-                        conn.commit()
-                        print(f"\rProcessed: {processed}/{total} ({skipped} skipped)", end='', flush=True)
+                    if len(batch) >= 1000:
+                        try:
+                            cursor.executemany("UPDATE files SET checksum = %s WHERE path = %s", batch)
+                            conn.commit()
+                            batch.clear()
+                        except Exception as e:
+                            conn.rollback()
+                            batch.clear()
+                            print(f"\nBatch update failed: {e}")
+
+                    if idx % 100 == 0:
+                        elapsed = time.time() - start_time
+                        rate = (processed + skipped) / elapsed if elapsed > 0 else 0
+                        remaining = (total - idx) / rate if rate > 0 else 0
+                        pct = 100 * idx / total
+                        print(f"\r[{pct:5.1f}%] {processed:,}/{total:,} | {rate:.0f}/s | ETA: {int(remaining/60)}m{int(remaining%60):02d}s | Skip: {skipped:,}", end='', flush=True)
 
                 except Exception as e:
                     skipped += 1
-                    conn.rollback()
+                    if idx <= 5:
+                        print(f"\nDebug: {full_path} - {e}")
+
+            if batch:
+                try:
+                    cursor.executemany("UPDATE files SET checksum = %s WHERE path = %s", batch)
+                    conn.commit()
+                except Exception as e:
+                    conn.rollback()
+                    print(f"\nFinal batch failed: {e}")
 
-            conn.commit()
             print()
-            logger.info(f"Deduplication complete: {processed}/{total} files processed, {skipped} skipped")
+            elapsed = time.time() - start_time
+            logger.info(f"Phase 1 done: {processed:,} files in {int(elapsed/60)}m{int(elapsed%60):02d}s ({skipped:,} skipped)")
+
+            print("Phase 2: Finding duplicates...")
+            cursor.execute("""
+                UPDATE files f1 SET duplicate_of = (
+                    SELECT MIN(path) FROM files f2
+                    WHERE f2.checksum = f1.checksum AND f2.path < f1.path
+                )
+                WHERE checksum IS NOT NULL
+            """)
+            conn.commit()
+
+            cursor.execute("SELECT COUNT(*) FROM files WHERE duplicate_of IS NOT NULL")
+            dup_count = cursor.fetchone()[0]
+            logger.info(f"Phase 2 done: Found {dup_count:,} duplicates")
 
         finally:
             cursor.close()
@@ -671,67 +713,20 @@ class DiskReorganizer:
         return f"{size:.1f}PB"
 
 def main():
-    parser = argparse.ArgumentParser(description='Disk Reorganizer - Free up a disk for Linux dual-boot')
-    subparsers = parser.add_subparsers(dest='command', required=True)
-
-    # Index command
-    index_parser = subparsers.add_parser('index', help='Index files on a disk')
-    index_parser.add_argument('disk_root', help='Root path of disk (e.g., D:\\\\)')
-    index_parser.add_argument('disk_name', help='Logical name for the disk')
-
-    # Plan command
-    plan_parser = subparsers.add_parser('plan', help='Create migration plan')
-    plan_parser.add_argument('target_disk', help='Disk to free up')
-    plan_parser.add_argument('dest_disks', nargs='+', help='Destination disks')
-
-    # Execute command
-    exec_parser = subparsers.add_parser('execute', help='Execute migration plan')
-    exec_parser.add_argument('plan_file', help='Path to plan JSON file')
-    exec_parser.add_argument('--dry-run', action='store_true', help='Simulate without actual file operations')
-
-    # Dedupe command
-    dedupe_parser = subparsers.add_parser('dedupe', help='Deduplicate files and compute checksums')
-    dedupe_parser.add_argument('--disk', help='Optional: Only dedupe specific disk')
-    dedupe_parser.add_argument('--no-chunks', action='store_true', help='Disable chunk-level deduplication')
-
-    # Merge command
-    merge_parser = subparsers.add_parser('merge', help='Plan multi-disk merge with deduplication')
-    merge_parser.add_argument('--sources', nargs='+', required=True, help='Source disks to merge')
-    merge_parser.add_argument('--target', required=True, help='Target disk')
-    merge_parser.add_argument('--output', default='merge_plan.json', help='Output plan file')
-    merge_parser.add_argument('--filter-system', action='store_true', help='Filter system/build files')
-    merge_parser.add_argument('--network', help='Network target (e.g., user@host:/path)')
-
-    # Report command
-    report_parser = subparsers.add_parser('report', help='Show current status')
-    report_parser.add_argument('--format', choices=['text', 'json'], default='text', help='Report format')
-    report_parser.add_argument('--show-duplicates', action='store_true', help='Show duplicate files')
-    report_parser.add_argument('--preview-merge', help='Preview merge plan from file')
-
-    args = parser.parse_args()
-    tool = DiskReorganizer()
-
-    if args.command == 'index':
-        tool.index_disk(args.disk_root, args.disk_name)
-
-    elif args.command == 'dedupe':
-        tool.run_deduplication(disk=args.disk, use_chunks=not args.no_chunks)
-
-    elif args.command == 'merge':
-        tool.plan_merge(sources=args.sources, target=args.target, output_file=args.output,
-                       filter_system=args.filter_system, network_target=args.network)
-
-    elif args.command == 'plan':
-        plan = tool.plan_migration(args.target_disk, args.dest_disks)
-        if plan:
-            print(f"\nPlan generated: {plan['file_count']} files, {tool.format_size(plan['total_size'])}")
-            print(f"Destination disks: {', '.join(plan['destination_disks'])}")
-
-    elif args.command == 'execute':
-        tool.execute_migration(args.plan_file, dry_run=args.dry_run)
-
-    elif args.command == 'report':
-        tool.generate_report(format=args.format, show_duplicates=args.show_duplicates, preview_merge=args.preview_merge)
+    p=argparse.ArgumentParser();s=p.add_subparsers(dest='cmd',required=True)
+    i=s.add_parser('index');i.add_argument('root');i.add_argument('name')
+    pl=s.add_parser('plan');pl.add_argument('target');pl.add_argument('dests',nargs='+')
+    e=s.add_parser('execute');e.add_argument('file');e.add_argument('--dry',action='store_true')
+    d=s.add_parser('dedupe');d.add_argument('--disk');d.add_argument('--no-chunks',action='store_true')
+    m=s.add_parser('merge');m.add_argument('--sources',nargs='+',required=True);m.add_argument('--target',required=True);m.add_argument('--out',default='merge.json');m.add_argument('--filter',action='store_true');m.add_argument('--net')
+    r=s.add_parser('report');r.add_argument('--fmt',default='text');r.add_argument('--dups',action='store_true');r.add_argument('--preview')
+    a=p.parse_args();t=DiskReorganizer()
+    if a.cmd=='index':t.index_disk(a.root,a.name)
+    elif a.cmd=='dedupe':t.run_deduplication(a.disk,not a.no_chunks)
+    elif a.cmd=='merge':t.plan_merge(a.sources,a.target,a.out,a.filter,a.net)
+    elif a.cmd=='plan':plan=t.plan_migration(a.target,a.dests);print(f"\nPlan {plan['file_count']} files {t.format_size(plan['total_size'])}")
+    elif a.cmd=='execute':t.execute_migration(a.file,a.dry)
+    elif a.cmd=='report':t.generate_report(a.fmt,a.dups,a.preview)
 
 if __name__ == '__main__':
     main()
\ No newline at end of file
diff --git a/sql/init.sql b/sql/init.sql
index 956fbc0..58266c7 100644
--- a/sql/init.sql
+++ b/sql/init.sql
@@ -98,11 +98,13 @@ CREATE TABLE IF NOT EXISTS migration_plans (
 );
 
 -- Indexes for performance
-CREATE INDEX IF NOT EXISTS idx_files_path ON files(path);
-CREATE INDEX IF NOT EXISTS idx_files_hash ON files(file_hash);
-CREATE INDEX IF NOT EXISTS idx_files_disk ON files(disk_label);
-CREATE INDEX IF NOT EXISTS idx_files_category ON files(category);
-CREATE INDEX IF NOT EXISTS idx_files_status ON files(status);
+CREATE INDEX IF NOT EXISTS idx_files_path ON files (path);
+CREATE INDEX IF NOT EXISTS idx_files_hash ON files (file_hash);
+CREATE INDEX IF NOT EXISTS idx_files_disk ON files (disk_label);
+CREATE INDEX IF NOT EXISTS idx_files_category ON files (category);
+CREATE INDEX IF NOT EXISTS idx_files_status ON files (status);
+create index on files (checksum);
+create index on files (checksum,path);
 
 CREATE INDEX IF NOT EXISTS idx_operations_status ON operations(status);
 CREATE INDEX IF NOT EXISTS idx_operations_created ON operations(created_at);