base
This commit is contained in:
3
app/filters/__init__.py
Normal file
3
app/filters/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .gitignore import GitignoreFilter, DEFAULT_PATTERNS
|
||||
|
||||
__all__ = ['GitignoreFilter', 'DEFAULT_PATTERNS']
|
||||
143
app/main.py
143
app/main.py
@@ -451,35 +451,77 @@ class DiskReorganizer:
|
||||
|
||||
processed = 0
|
||||
skipped = 0
|
||||
for path_str, size, disk_label in files_to_process:
|
||||
start_time = time.time()
|
||||
batch = []
|
||||
|
||||
print(f"Phase 1: Computing checksums...")
|
||||
|
||||
for idx, (path_str, size, disk_label) in enumerate(files_to_process, 1):
|
||||
try:
|
||||
mount_point = disk_mount_map.get(disk_label, disk_label)
|
||||
full_path = Path(mount_point) / path_str if not Path(path_str).is_absolute() else Path(path_str)
|
||||
|
||||
if not full_path.exists():
|
||||
skipped += 1
|
||||
if idx % 100 == 0:
|
||||
elapsed = time.time() - start_time
|
||||
rate = (processed + skipped) / elapsed if elapsed > 0 else 0
|
||||
remaining = (total - idx) / rate if rate > 0 else 0
|
||||
pct = 100 * idx / total
|
||||
print(f"\r[{pct:5.1f}%] {processed:,}/{total:,} | {rate:.0f}/s | ETA: {int(remaining/60)}m{int(remaining%60):02d}s | Skip: {skipped:,}", end='', flush=True)
|
||||
continue
|
||||
|
||||
checksum = hash_file_local(full_path)
|
||||
|
||||
cursor.execute("SELECT path FROM files WHERE checksum = %s AND path != %s LIMIT 1", (checksum, path_str))
|
||||
dup_row = cursor.fetchone()
|
||||
duplicate_of = dup_row[0] if dup_row else None
|
||||
|
||||
cursor.execute("UPDATE files SET checksum = %s, duplicate_of = %s WHERE path = %s", (checksum, duplicate_of, path_str))
|
||||
batch.append((checksum, path_str))
|
||||
|
||||
processed += 1
|
||||
if processed % 100 == 0:
|
||||
conn.commit()
|
||||
print(f"\rProcessed: {processed}/{total} ({skipped} skipped)", end='', flush=True)
|
||||
if len(batch) >= 1000:
|
||||
try:
|
||||
cursor.executemany("UPDATE files SET checksum = %s WHERE path = %s", batch)
|
||||
conn.commit()
|
||||
batch.clear()
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
batch.clear()
|
||||
print(f"\nBatch update failed: {e}")
|
||||
|
||||
if idx % 100 == 0:
|
||||
elapsed = time.time() - start_time
|
||||
rate = (processed + skipped) / elapsed if elapsed > 0 else 0
|
||||
remaining = (total - idx) / rate if rate > 0 else 0
|
||||
pct = 100 * idx / total
|
||||
print(f"\r[{pct:5.1f}%] {processed:,}/{total:,} | {rate:.0f}/s | ETA: {int(remaining/60)}m{int(remaining%60):02d}s | Skip: {skipped:,}", end='', flush=True)
|
||||
|
||||
except Exception as e:
|
||||
skipped += 1
|
||||
conn.rollback()
|
||||
if idx <= 5:
|
||||
print(f"\nDebug: {full_path} - {e}")
|
||||
|
||||
if batch:
|
||||
try:
|
||||
cursor.executemany("UPDATE files SET checksum = %s WHERE path = %s", batch)
|
||||
conn.commit()
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
print(f"\nFinal batch failed: {e}")
|
||||
|
||||
conn.commit()
|
||||
print()
|
||||
logger.info(f"Deduplication complete: {processed}/{total} files processed, {skipped} skipped")
|
||||
elapsed = time.time() - start_time
|
||||
logger.info(f"Phase 1 done: {processed:,} files in {int(elapsed/60)}m{int(elapsed%60):02d}s ({skipped:,} skipped)")
|
||||
|
||||
print("Phase 2: Finding duplicates...")
|
||||
cursor.execute("""
|
||||
UPDATE files f1 SET duplicate_of = (
|
||||
SELECT MIN(path) FROM files f2
|
||||
WHERE f2.checksum = f1.checksum AND f2.path < f1.path
|
||||
)
|
||||
WHERE checksum IS NOT NULL
|
||||
""")
|
||||
conn.commit()
|
||||
|
||||
cursor.execute("SELECT COUNT(*) FROM files WHERE duplicate_of IS NOT NULL")
|
||||
dup_count = cursor.fetchone()[0]
|
||||
logger.info(f"Phase 2 done: Found {dup_count:,} duplicates")
|
||||
|
||||
finally:
|
||||
cursor.close()
|
||||
@@ -671,67 +713,20 @@ class DiskReorganizer:
|
||||
return f"{size:.1f}PB"
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Disk Reorganizer - Free up a disk for Linux dual-boot')
|
||||
subparsers = parser.add_subparsers(dest='command', required=True)
|
||||
|
||||
# Index command
|
||||
index_parser = subparsers.add_parser('index', help='Index files on a disk')
|
||||
index_parser.add_argument('disk_root', help='Root path of disk (e.g., D:\\\\)')
|
||||
index_parser.add_argument('disk_name', help='Logical name for the disk')
|
||||
|
||||
# Plan command
|
||||
plan_parser = subparsers.add_parser('plan', help='Create migration plan')
|
||||
plan_parser.add_argument('target_disk', help='Disk to free up')
|
||||
plan_parser.add_argument('dest_disks', nargs='+', help='Destination disks')
|
||||
|
||||
# Execute command
|
||||
exec_parser = subparsers.add_parser('execute', help='Execute migration plan')
|
||||
exec_parser.add_argument('plan_file', help='Path to plan JSON file')
|
||||
exec_parser.add_argument('--dry-run', action='store_true', help='Simulate without actual file operations')
|
||||
|
||||
# Dedupe command
|
||||
dedupe_parser = subparsers.add_parser('dedupe', help='Deduplicate files and compute checksums')
|
||||
dedupe_parser.add_argument('--disk', help='Optional: Only dedupe specific disk')
|
||||
dedupe_parser.add_argument('--no-chunks', action='store_true', help='Disable chunk-level deduplication')
|
||||
|
||||
# Merge command
|
||||
merge_parser = subparsers.add_parser('merge', help='Plan multi-disk merge with deduplication')
|
||||
merge_parser.add_argument('--sources', nargs='+', required=True, help='Source disks to merge')
|
||||
merge_parser.add_argument('--target', required=True, help='Target disk')
|
||||
merge_parser.add_argument('--output', default='merge_plan.json', help='Output plan file')
|
||||
merge_parser.add_argument('--filter-system', action='store_true', help='Filter system/build files')
|
||||
merge_parser.add_argument('--network', help='Network target (e.g., user@host:/path)')
|
||||
|
||||
# Report command
|
||||
report_parser = subparsers.add_parser('report', help='Show current status')
|
||||
report_parser.add_argument('--format', choices=['text', 'json'], default='text', help='Report format')
|
||||
report_parser.add_argument('--show-duplicates', action='store_true', help='Show duplicate files')
|
||||
report_parser.add_argument('--preview-merge', help='Preview merge plan from file')
|
||||
|
||||
args = parser.parse_args()
|
||||
tool = DiskReorganizer()
|
||||
|
||||
if args.command == 'index':
|
||||
tool.index_disk(args.disk_root, args.disk_name)
|
||||
|
||||
elif args.command == 'dedupe':
|
||||
tool.run_deduplication(disk=args.disk, use_chunks=not args.no_chunks)
|
||||
|
||||
elif args.command == 'merge':
|
||||
tool.plan_merge(sources=args.sources, target=args.target, output_file=args.output,
|
||||
filter_system=args.filter_system, network_target=args.network)
|
||||
|
||||
elif args.command == 'plan':
|
||||
plan = tool.plan_migration(args.target_disk, args.dest_disks)
|
||||
if plan:
|
||||
print(f"\nPlan generated: {plan['file_count']} files, {tool.format_size(plan['total_size'])}")
|
||||
print(f"Destination disks: {', '.join(plan['destination_disks'])}")
|
||||
|
||||
elif args.command == 'execute':
|
||||
tool.execute_migration(args.plan_file, dry_run=args.dry_run)
|
||||
|
||||
elif args.command == 'report':
|
||||
tool.generate_report(format=args.format, show_duplicates=args.show_duplicates, preview_merge=args.preview_merge)
|
||||
p=argparse.ArgumentParser();s=p.add_subparsers(dest='cmd',required=True)
|
||||
i=s.add_parser('index');i.add_argument('root');i.add_argument('name')
|
||||
pl=s.add_parser('plan');pl.add_argument('target');pl.add_argument('dests',nargs='+')
|
||||
e=s.add_parser('execute');e.add_argument('file');e.add_argument('--dry',action='store_true')
|
||||
d=s.add_parser('dedupe');d.add_argument('--disk');d.add_argument('--no-chunks',action='store_true')
|
||||
m=s.add_parser('merge');m.add_argument('--sources',nargs='+',required=True);m.add_argument('--target',required=True);m.add_argument('--out',default='merge.json');m.add_argument('--filter',action='store_true');m.add_argument('--net')
|
||||
r=s.add_parser('report');r.add_argument('--fmt',default='text');r.add_argument('--dups',action='store_true');r.add_argument('--preview')
|
||||
a=p.parse_args();t=DiskReorganizer()
|
||||
if a.cmd=='index':t.index_disk(a.root,a.name)
|
||||
elif a.cmd=='dedupe':t.run_deduplication(a.disk,not a.no_chunks)
|
||||
elif a.cmd=='merge':t.plan_merge(a.sources,a.target,a.out,a.filter,a.net)
|
||||
elif a.cmd=='plan':plan=t.plan_migration(a.target,a.dests);print(f"\nPlan {plan['file_count']} files {t.format_size(plan['total_size'])}")
|
||||
elif a.cmd=='execute':t.execute_migration(a.file,a.dry)
|
||||
elif a.cmd=='report':t.generate_report(a.fmt,a.dups,a.preview)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user