base
This commit is contained in:
@@ -70,17 +70,17 @@ class DeduplicationEngine:
|
||||
if disk:
|
||||
cursor.execute("""
|
||||
SELECT path, size
|
||||
FROM files_bak
|
||||
WHERE disk = %s AND checksum IS NULL
|
||||
FROM files
|
||||
WHERE disk_label = %s AND checksum IS NULL
|
||||
ORDER BY size DESC
|
||||
""", (disk,))
|
||||
""", (disk,))
|
||||
else:
|
||||
cursor.execute("""
|
||||
SELECT path, size
|
||||
FROM files_bak
|
||||
FROM files
|
||||
WHERE checksum IS NULL
|
||||
ORDER BY size DESC
|
||||
""")
|
||||
""")
|
||||
|
||||
files_to_process = cursor.fetchall()
|
||||
total_files = len(files_to_process)
|
||||
@@ -108,10 +108,10 @@ class DeduplicationEngine:
|
||||
if checksum:
|
||||
# Update database
|
||||
cursor.execute("""
|
||||
UPDATE files_bak
|
||||
UPDATE files
|
||||
SET checksum = %s, duplicate_of = %s
|
||||
WHERE path = %s
|
||||
""", (checksum, duplicate_of, str(path)))
|
||||
""", (checksum, duplicate_of, str(path)))
|
||||
|
||||
stats.files_succeeded += 1
|
||||
stats.bytes_processed += size
|
||||
@@ -225,19 +225,19 @@ class DeduplicationEngine:
|
||||
if disk:
|
||||
cursor.execute("""
|
||||
SELECT checksum, array_agg(path ORDER BY path) as paths
|
||||
FROM files_bak
|
||||
WHERE disk = %s AND checksum IS NOT NULL
|
||||
FROM files
|
||||
WHERE disk_label = %s AND checksum IS NOT NULL
|
||||
GROUP BY checksum
|
||||
HAVING COUNT(*) > 1
|
||||
""", (disk,))
|
||||
""", (disk,))
|
||||
else:
|
||||
cursor.execute("""
|
||||
SELECT checksum, array_agg(path ORDER BY path) as paths
|
||||
FROM files_bak
|
||||
FROM files
|
||||
WHERE checksum IS NOT NULL
|
||||
GROUP BY checksum
|
||||
HAVING COUNT(*) > 1
|
||||
""")
|
||||
""")
|
||||
|
||||
duplicates = {}
|
||||
for checksum, paths in cursor.fetchall():
|
||||
@@ -262,18 +262,18 @@ class DeduplicationEngine:
|
||||
stats = {}
|
||||
|
||||
# Total files
|
||||
cursor.execute("SELECT COUNT(*) FROM files_bak WHERE checksum IS NOT NULL")
|
||||
cursor.execute("SELECT COUNT(*) FROM files WHERE checksum IS NOT NULL")
|
||||
stats['total_files'] = cursor.fetchone()[0]
|
||||
|
||||
# Unique files
|
||||
cursor.execute("SELECT COUNT(DISTINCT checksum) FROM files_bak WHERE checksum IS NOT NULL")
|
||||
cursor.execute("SELECT COUNT(DISTINCT checksum) FROM files WHERE checksum IS NOT NULL")
|
||||
stats['unique_files'] = cursor.fetchone()[0]
|
||||
|
||||
# Duplicate files
|
||||
stats['duplicate_files'] = stats['total_files'] - stats['unique_files']
|
||||
|
||||
# Total size
|
||||
cursor.execute("SELECT COALESCE(SUM(size), 0) FROM files_bak WHERE checksum IS NOT NULL")
|
||||
cursor.execute("SELECT COALESCE(SUM(size), 0) FROM files WHERE checksum IS NOT NULL")
|
||||
stats['total_size'] = cursor.fetchone()[0]
|
||||
|
||||
# Unique size
|
||||
@@ -281,10 +281,10 @@ class DeduplicationEngine:
|
||||
SELECT COALESCE(SUM(size), 0)
|
||||
FROM (
|
||||
SELECT DISTINCT ON (checksum) size
|
||||
FROM files_bak
|
||||
FROM files
|
||||
WHERE checksum IS NOT NULL
|
||||
) AS unique_files
|
||||
""")
|
||||
""")
|
||||
stats['unique_size'] = cursor.fetchone()[0]
|
||||
|
||||
# Wasted space
|
||||
@@ -321,14 +321,14 @@ class DeduplicationEngine:
|
||||
cursor.execute("""
|
||||
WITH canonical AS (
|
||||
SELECT DISTINCT ON (checksum) path, checksum
|
||||
FROM files_bak
|
||||
FROM files
|
||||
WHERE checksum IS NOT NULL
|
||||
ORDER BY checksum, path
|
||||
)
|
||||
UPDATE files_bak
|
||||
UPDATE files
|
||||
SET duplicate_of = NULL
|
||||
WHERE path IN (SELECT path FROM canonical)
|
||||
""")
|
||||
""")
|
||||
|
||||
count = cursor.rowcount
|
||||
conn.commit()
|
||||
|
||||
@@ -227,7 +227,7 @@ class HashStore:
|
||||
# Get all files with their hashes
|
||||
cursor.execute("""
|
||||
SELECT f.path, f.checksum
|
||||
FROM files_bak f
|
||||
FROM files f
|
||||
WHERE f.checksum IS NOT NULL
|
||||
""")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user