diff --git a/app/filters/gitignore.py b/app/filters/gitignore.py
new file mode 100644
index 0000000..5952bfc
--- /dev/null
+++ b/app/filters/gitignore.py
@@ -0,0 +1,30 @@
+from pathlib import Path
+from typing import Set
+import fnmatch
+
+DEFAULT_PATTERNS = {
+    'node_modules/**', '__pycache__/**', '.git/**', 'build/**', 'dist/**',
+    '.cache/**', 'target/**', 'vendor/**', '.venv/**', 'venv/**',
+    '*.pyc', '*.pyo', '*.so', '*.dll', '*.dylib', '*.o', '*.a',
+    '.DS_Store', 'Thumbs.db', '.pytest_cache/**', '.tox/**',
+    '*.egg-info/**', '.mypy_cache/**', '.coverage', 'htmlcov/**',
+    '.gradle/**', 'bin/**', 'obj/**', '.vs/**', '.idea/**'
+}
+
+class GitignoreFilter:
+    def __init__(self, patterns: Set[str] = None):
+        self.patterns = patterns or DEFAULT_PATTERNS
+
+    def should_exclude(self, path: str) -> bool:
+        path_obj = Path(path)
+        for pattern in self.patterns:
+            if '**' in pattern:
+                clean_pattern = pattern.replace('/**', '').replace('**/', '')
+                if clean_pattern in path_obj.parts:
+                    return True
+            elif fnmatch.fnmatch(path, pattern) or fnmatch.fnmatch(path_obj.name, pattern):
+                return True
+        return False
+
+    def filter_files(self, files: list) -> list:
+        return [f for f in files if not self.should_exclude(f)]
diff --git a/app/main.py b/app/main.py
index 3181775..81be092 100644
--- a/app/main.py
+++ b/app/main.py
@@ -1,33 +1,23 @@
 #!/usr/bin/env python3
-"""
-Disk Reorganizer - Safely restructure files across disks to free up one entire disk.
-Three modes: index, plan, execute
-"""
-
 import os
 import sys
+from dataclasses import dataclass
+
 import psycopg2
-from psycopg2 import sql
-from psycopg2.extras import RealDictCursor
 import shutil
 import hashlib
 import argparse
 import json
 from pathlib import Path
-from dataclasses import dataclass, asdict
-from typing import List, Dict, Optional, Tuple
+from typing import List, Dict, Optional
 from datetime import datetime
 import logging
 import time
 
-# Setup logging
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s',
-    handlers=[
-        logging.FileHandler('disk_reorganizer.log'),
-        logging.StreamHandler(sys.stdout)
-    ]
+    handlers=[logging.FileHandler('disk_reorganizer.log'), logging.StreamHandler(sys.stdout)]
 )
 logger = logging.getLogger(__name__)
 
@@ -430,12 +420,186 @@ class DiskReorganizer:
             logger.info(f"✓ Disk {plan['target_disk']} is ready for Linux installation!")
             logger.info(f"  Remember to safely delete original files from {plan['target_disk']}")
 
-    def generate_report(self):
+    def run_deduplication(self, disk: Optional[str] = None, use_chunks: bool = True):
+        logger.info(f"Starting deduplication{' for disk ' + disk if disk else ''}")
+
+        disk_mount_map = {
+            'SMT': '/media/mike/SMT',
+            'DISK1': '/media/mike/DISK1',
+            'LLM': '/media/mike/LLM'
+        }
+
+        conn = self.get_connection()
+        cursor = conn.cursor()
+
+        def hash_file_local(file_path: Path) -> str:
+            hasher = hashlib.sha256()
+            with open(file_path, 'rb') as f:
+                while chunk := f.read(65536):
+                    hasher.update(chunk)
+            return hasher.hexdigest()
+
+        try:
+            if disk:
+                cursor.execute("SELECT path, size, disk_label FROM files WHERE disk_label = %s AND checksum IS NULL ORDER BY size DESC", (disk,))
+            else:
+                cursor.execute("SELECT path, size, disk_label FROM files WHERE checksum IS NULL ORDER BY size DESC")
+
+            files_to_process = cursor.fetchall()
+            total = len(files_to_process)
+            logger.info(f"Found {total} files to hash")
+
+            processed = 0
+            skipped = 0
+            for path_str, size, disk_label in files_to_process:
+                try:
+                    mount_point = disk_mount_map.get(disk_label, disk_label)
+                    full_path = Path(mount_point) / path_str if not Path(path_str).is_absolute() else Path(path_str)
+
+                    if not full_path.exists():
+                        skipped += 1
+                        continue
+
+                    checksum = hash_file_local(full_path)
+
+                    cursor.execute("SELECT path FROM files WHERE checksum = %s AND path != %s LIMIT 1", (checksum, path_str))
+                    dup_row = cursor.fetchone()
+                    duplicate_of = dup_row[0] if dup_row else None
+
+                    cursor.execute("UPDATE files SET checksum = %s, duplicate_of = %s WHERE path = %s", (checksum, duplicate_of, path_str))
+
+                    processed += 1
+                    if processed % 100 == 0:
+                        conn.commit()
+                        print(f"\rProcessed: {processed}/{total} ({skipped} skipped)", end='', flush=True)
+
+                except Exception as e:
+                    skipped += 1
+                    conn.rollback()
+
+            conn.commit()
+            print()
+            logger.info(f"Deduplication complete: {processed}/{total} files processed, {skipped} skipped")
+
+        finally:
+            cursor.close()
+            conn.close()
+
+    def plan_merge(self, sources: List[str], target: str, output_file: str,
+                   filter_system: bool = False, network_target: str = None):
+        """Plan merge of multiple source disks to target with deduplication"""
+        logger.info(f"Planning merge: {', '.join(sources)} → {target or network_target}")
+
+        if filter_system:
+            sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+            from filters import GitignoreFilter
+            file_filter = GitignoreFilter()
+            logger.info("System/build file filtering enabled")
+
+        conn = self.get_connection()
+        cursor = conn.cursor()
+
+        try:
+            placeholders = ','.join(['%s'] * len(sources))
+            cursor.execute(f"""
+                SELECT path, size, checksum, disk_label, duplicate_of
+                FROM files
+                WHERE disk_label IN ({placeholders})
+                ORDER BY size DESC
+            """, tuple(sources))
+
+            files = cursor.fetchall()
+            total_files = len(files)
+            total_size = sum(int(f[1]) for f in files)
+
+            unique_files = {}
+            duplicate_count = 0
+            duplicate_size = 0
+            filtered_count = 0
+            filtered_size = 0
+
+            for path, size, checksum, disk_label, duplicate_of in files:
+                if filter_system and file_filter.should_exclude(path):
+                    filtered_count += 1
+                    filtered_size += int(size)
+                    continue
+
+                if checksum and checksum in unique_files:
+                    duplicate_count += 1
+                    duplicate_size += int(size)
+                else:
+                    if checksum:
+                        unique_files[checksum] = (path, int(size), disk_label)
+
+            unique_count = len(unique_files)
+            unique_size = sum(f[1] for f in unique_files.values())
+
+            plan = {
+                'sources': sources,
+                'target': target or network_target,
+                'network': network_target is not None,
+                'total_files': total_files,
+                'total_size': total_size,
+                'unique_files': unique_count,
+                'unique_size': unique_size,
+                'duplicate_files': duplicate_count,
+                'duplicate_size': duplicate_size,
+                'filtered_files': filtered_count if filter_system else 0,
+                'filtered_size': filtered_size if filter_system else 0,
+                'space_saved': duplicate_size + (filtered_size if filter_system else 0),
+                'operations': []
+            }
+
+            for checksum, (path, size, disk_label) in unique_files.items():
+                plan['operations'].append({
+                    'source_disk': disk_label,
+                    'source_path': path,
+                    'target_disk': target or network_target,
+                    'target_path': path,
+                    'size': size,
+                    'checksum': checksum
+                })
+
+            with open(output_file, 'w') as f:
+                json.dump(plan, f, indent=2)
+
+            logger.info(f"Merge plan saved to {output_file}")
+            print(f"\n=== MERGE PLAN SUMMARY ===")
+            print(f"Sources: {', '.join(sources)}")
+            print(f"Target: {target or network_target}")
+            print(f"Total files: {total_files:,} ({self.format_size(total_size)})")
+            if filter_system:
+                print(f"Filtered (system/build): {filtered_count:,} ({self.format_size(filtered_size)})")
+            print(f"Unique files: {unique_count:,} ({self.format_size(unique_size)})")
+            print(f"Duplicates: {duplicate_count:,} ({self.format_size(duplicate_size)})")
+            print(f"Total space saved: {self.format_size(plan['space_saved'])}")
+            print(f"Space needed on target: {self.format_size(unique_size)}")
+
+        finally:
+            cursor.close()
+            conn.close()
+
+    def generate_report(self, format='text', show_duplicates=False, preview_merge=None):
         """Generate status report"""
         conn = self.get_connection()
         cursor = conn.cursor()
 
         try:
+            if preview_merge:
+                # Load and display merge plan
+                with open(preview_merge, 'r') as f:
+                    plan = json.load(f)
+
+                print("\n=== MERGE PLAN PREVIEW ===")
+                print(f"Sources: {', '.join(plan['sources'])}")
+                print(f"Target: {plan['target']}")
+                print(f"Total files: {plan['total_files']:,} ({self.format_size(plan['total_size'])})")
+                print(f"Unique files: {plan['unique_files']:,} ({self.format_size(plan['unique_size'])})")
+                print(f"Duplicates: {plan['duplicate_files']:,} ({self.format_size(plan['duplicate_size'])})")
+                print(f"Space saved: {self.format_size(plan['space_saved'])}")
+                print(f"Space needed on target: {self.format_size(plan['unique_size'])}")
+                return
+
             cursor.execute("""
                 SELECT status, COUNT(*), SUM(size) FROM files GROUP BY status
             """)
@@ -443,7 +607,43 @@ class DiskReorganizer:
             print("\n=== FILE MIGRATION REPORT ===")
             for row in cursor.fetchall():
                 status, count, size = row
-                print(f"{status:15}: {count:6} files, {self.format_size(size or 0)}")
+                print(f"{status:15}: {count:6} files, {self.format_size(int(size or 0))}")
+
+            # Disk usage summary
+            cursor.execute("""
+                SELECT disk_label, COUNT(*), SUM(size) FROM files GROUP BY disk_label
+            """)
+
+            print("\n=== DISK USAGE ===")
+            for row in cursor.fetchall():
+                disk, count, size = row
+                print(f"{disk:20}: {count:6} files, {self.format_size(int(size or 0))}")
+
+            # Deduplication stats
+            cursor.execute("""
+                SELECT COUNT(*), SUM(size) FROM files WHERE checksum IS NOT NULL
+            """)
+            hashed_count, hashed_size = cursor.fetchone()
+
+            cursor.execute("""
+                SELECT COUNT(*), SUM(size) FROM files WHERE duplicate_of IS NOT NULL
+            """)
+            dup_count, dup_size = cursor.fetchone()
+
+            print("\n=== DEDUPLICATION STATS ===")
+            print(f"Files with checksums: {hashed_count or 0:6}")
+            print(f"Duplicate files: {dup_count or 0:6} ({self.format_size(int(dup_size or 0))})")
+
+            if show_duplicates and dup_count:
+                print("\n=== DUPLICATE FILES ===")
+                cursor.execute("""
+                    SELECT path, size, duplicate_of FROM files
+                    WHERE duplicate_of IS NOT NULL
+                    ORDER BY size DESC
+                    LIMIT 20
+                """)
+                for path, size, dup_of in cursor.fetchall():
+                    print(f"  {path} ({self.format_size(int(size))}) → {dup_of}")
 
             cursor.execute("""
                 SELECT operation_type, executed, verified, COUNT(*) FROM operations GROUP BY operation_type, executed, verified
@@ -489,8 +689,24 @@ def main():
     exec_parser.add_argument('plan_file', help='Path to plan JSON file')
     exec_parser.add_argument('--dry-run', action='store_true', help='Simulate without actual file operations')
 
+    # Dedupe command
+    dedupe_parser = subparsers.add_parser('dedupe', help='Deduplicate files and compute checksums')
+    dedupe_parser.add_argument('--disk', help='Optional: Only dedupe specific disk')
+    dedupe_parser.add_argument('--no-chunks', action='store_true', help='Disable chunk-level deduplication')
+
+    # Merge command
+    merge_parser = subparsers.add_parser('merge', help='Plan multi-disk merge with deduplication')
+    merge_parser.add_argument('--sources', nargs='+', required=True, help='Source disks to merge')
+    merge_parser.add_argument('--target', required=True, help='Target disk')
+    merge_parser.add_argument('--output', default='merge_plan.json', help='Output plan file')
+    merge_parser.add_argument('--filter-system', action='store_true', help='Filter system/build files')
+    merge_parser.add_argument('--network', help='Network target (e.g., user@host:/path)')
+
     # Report command
     report_parser = subparsers.add_parser('report', help='Show current status')
+    report_parser.add_argument('--format', choices=['text', 'json'], default='text', help='Report format')
+    report_parser.add_argument('--show-duplicates', action='store_true', help='Show duplicate files')
+    report_parser.add_argument('--preview-merge', help='Preview merge plan from file')
 
     args = parser.parse_args()
     tool = DiskReorganizer()
@@ -498,6 +714,13 @@ def main():
     if args.command == 'index':
         tool.index_disk(args.disk_root, args.disk_name)
 
+    elif args.command == 'dedupe':
+        tool.run_deduplication(disk=args.disk, use_chunks=not args.no_chunks)
+
+    elif args.command == 'merge':
+        tool.plan_merge(sources=args.sources, target=args.target, output_file=args.output,
+                       filter_system=args.filter_system, network_target=args.network)
+
     elif args.command == 'plan':
         plan = tool.plan_migration(args.target_disk, args.dest_disks)
         if plan:
@@ -508,7 +731,7 @@ def main():
         tool.execute_migration(args.plan_file, dry_run=args.dry_run)
 
     elif args.command == 'report':
-        tool.generate_report()
+        tool.generate_report(format=args.format, show_duplicates=args.show_duplicates, preview_merge=args.preview_merge)
 
 if __name__ == '__main__':
     main()
\ No newline at end of file