This commit is contained in:
mike
2025-12-13 11:35:33 +01:00
parent 9759001f4c
commit e9eb7ea5d9
16 changed files with 899 additions and 216 deletions

View File

@@ -0,0 +1,110 @@
from pathlib import Path
from typing import Dict, Set, List
from collections import Counter
class FolderAnalyzer:
def __init__(self):
self.manifest_files = {
'java': ['pom.xml', 'build.gradle', 'build.gradle.kts'],
'javascript': ['package.json', 'yarn.lock', 'package-lock.json'],
'python': ['pyproject.toml', 'setup.py', 'requirements.txt', 'Pipfile'],
'go': ['go.mod', 'go.sum'],
'rust': ['Cargo.toml', 'Cargo.lock'],
'docker': ['Dockerfile', 'docker-compose.yml', 'docker-compose.yaml'],
'k8s': ['helm', 'kustomization.yaml', 'deployment.yaml']
}
self.intent_keywords = {
'infrastructure': ['infra', 'deploy', 'k8s', 'docker', 'terraform', 'ansible'],
'application': ['app', 'service', 'api', 'server', 'client'],
'data': ['data', 'dataset', 'models', 'training', 'ml'],
'documentation': ['docs', 'documentation', 'wiki', 'readme'],
'testing': ['test', 'tests', 'spec', 'e2e', 'integration'],
'build': ['build', 'dist', 'target', 'out', 'bin'],
'config': ['config', 'conf', 'settings', 'env']
}
def analyze_folder(self, folder_path: Path, files: List[Dict]) -> Dict:
files_list = [Path(f['path']) for f in files]
has_readme = any('readme' in f.name.lower() for f in files_list)
has_git = any('.git' in str(f) for f in files_list)
manifest_types = self._detect_manifests(files_list)
has_manifest = len(manifest_types) > 0
file_types = Counter(f.suffix.lower() for f in files_list if f.suffix)
dominant_types = dict(file_types.most_common(10))
intent = self._infer_intent(folder_path.name.lower(), files_list)
project_type = self._infer_project_type(manifest_types, dominant_types)
structure = {
'depth': len(folder_path.parts),
'has_src': any('src' in str(f) for f in files_list[:20]),
'has_tests': any('test' in str(f) for f in files_list[:20]),
'has_docs': any('doc' in str(f) for f in files_list[:20])
}
return {
'has_readme': has_readme,
'has_git': has_git,
'has_manifest': has_manifest,
'manifest_types': manifest_types,
'dominant_file_types': dominant_types,
'project_type': project_type,
'intent': intent,
'structure': structure
}
def _detect_manifests(self, files: List[Path]) -> List[str]:
detected = []
file_names = {f.name for f in files}
for tech, manifests in self.manifest_files.items():
if any(m in file_names for m in manifests):
detected.append(tech)
return detected
def _infer_intent(self, folder_name: str, files: List[Path]) -> str:
file_str = ' '.join(str(f) for f in files[:50])
for intent, keywords in self.intent_keywords.items():
if any(kw in folder_name or kw in file_str.lower() for kw in keywords):
return intent
return 'unknown'
def _infer_project_type(self, manifests: List[str], file_types: Dict) -> str:
if manifests:
return manifests[0]
if '.py' in file_types and file_types.get('.py', 0) > 5:
return 'python'
if '.js' in file_types or '.ts' in file_types:
return 'javascript'
if '.java' in file_types:
return 'java'
if '.go' in file_types:
return 'go'
return 'mixed'
def generate_summary(self, folder_analysis: Dict, readme_text: str = None) -> str:
parts = []
if folder_analysis.get('project_type'):
parts.append(f"{folder_analysis['project_type']} project")
if folder_analysis.get('intent'):
parts.append(f"for {folder_analysis['intent']}")
if folder_analysis.get('manifest_types'):
parts.append(f"using {', '.join(folder_analysis['manifest_types'])}")
if readme_text:
first_para = readme_text.split('\n\n')[0][:200]
parts.append(f"Description: {first_para}")
return ' '.join(parts) if parts else 'Mixed content folder'

View File

@@ -0,0 +1,59 @@
from typing import Dict
import re
class ContentEnricher:
def __init__(self, llm_client=None):
self.llm_client = llm_client
self.pii_patterns = {
'email': r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
'phone': r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b',
'ssn': r'\b\d{3}-\d{2}-\d{4}\b',
'credit_card': r'\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b'
}
def enrich(self, text: str, use_llm: bool = False) -> Dict:
enrichment = {
'summary': self._basic_summary(text),
'word_count': len(text.split()),
'has_pii': self._detect_pii(text),
'quality': self._assess_quality(text),
'topics': self._extract_basic_topics(text)
}
if use_llm and self.llm_client:
llm_result = self.llm_client.classify_content(text)
if llm_result.get('success'):
enrichment['llm_classification'] = llm_result['text']
return enrichment
def _basic_summary(self, text: str) -> str:
sentences = re.split(r'[.!?]+', text)
return ' '.join(sentences[:3])[:200]
def _detect_pii(self, text: str) -> Dict:
detected = {}
for pii_type, pattern in self.pii_patterns.items():
matches = re.findall(pattern, text)
if matches:
detected[pii_type] = len(matches)
return detected
def _assess_quality(self, text: str) -> str:
if len(text.strip()) < 10:
return 'low'
special_char_ratio = sum(1 for c in text if not c.isalnum() and not c.isspace()) / len(text)
if special_char_ratio > 0.3:
return 'low'
return 'high' if len(text.split()) > 50 else 'medium'
def _extract_basic_topics(self, text: str) -> list:
words = re.findall(r'\b[A-Z][a-z]+\b', text)
word_freq = {}
for word in words:
if len(word) > 3:
word_freq[word] = word_freq.get(word, 0) + 1
return sorted(word_freq, key=word_freq.get, reverse=True)[:10]

View File

@@ -0,0 +1,54 @@
import requests
import json
from typing import Dict, Optional
class LLMClient:
def __init__(self, endpoint: str = 'http://192.168.1.74:1234', model: str = 'local'):
self.endpoint = endpoint
self.model = model
self.local_ollama = 'http://localhost:11434'
def summarize(self, text: str, max_length: int = 200) -> Dict:
prompt = f"Summarize the following in {max_length} chars or less:\n\n{text[:2000]}"
return self._query(prompt)
def extract_topics(self, text: str) -> Dict:
prompt = f"Extract 5-10 key topics/tags from this text. Return as comma-separated list:\n\n{text[:2000]}"
return self._query(prompt)
def classify_content(self, text: str) -> Dict:
prompt = f"Classify this content. Return: category, topics, has_pii (yes/no), quality (high/medium/low):\n\n{text[:1000]}"
return self._query(prompt)
def _query(self, prompt: str, use_local: bool = False) -> Dict:
try:
endpoint = self.local_ollama if use_local else self.endpoint
if use_local:
response = requests.post(
f'{endpoint}/api/generate',
json={'model': 'llama3.2', 'prompt': prompt, 'stream': False},
timeout=30
)
else:
response = requests.post(
f'{endpoint}/v1/chat/completions',
json={
'model': self.model,
'messages': [{'role': 'user', 'content': prompt}],
'max_tokens': 500
},
timeout=30
)
if response.status_code == 200:
data = response.json()
if use_local:
return {'success': True, 'text': data.get('response', '')}
else:
return {'success': True, 'text': data['choices'][0]['message']['content']}
else:
return {'success': False, 'error': f'HTTP {response.status_code}'}
except Exception as e:
return {'success': False, 'error': str(e)}

View File

@@ -27,7 +27,7 @@ class DiskReorganizer:
def __init__(self, db_config: Dict=None): def __init__(self, db_config: Dict=None):
if db_config is None: if db_config is None:
db_config = {'host': os.getenv('DB_HOST', '192.168.1.159'), 'port': int(os.getenv('DB_PORT', 5432)), 'database': os.getenv('DB_NAME', 'disk_reorganizer_db'), 'user': os.getenv('DB_USER', 'disk_reorg_user'), 'password': os.getenv('DB_PASSWORD', 'heel-goed-wachtwoord')} db_config = {'host': os.getenv('DB_HOST', '192.168.1.159'), 'port': int(os.getenv('DB_PORT', 5432)), 'database': os.getenv('DB_NAME', 'disk_reorganizer_db'), 'user': os.getenv('DB_USER', 'auction'), 'password': os.getenv('DB_PASSWORD', 'heel-goed-wachtwoord')}
self.db_config = db_config self.db_config = db_config
self.init_database() self.init_database()
@@ -522,23 +522,126 @@ class DiskReorganizer:
cursor.close() cursor.close()
conn.close() conn.close()
def classify_files(self, disk: Optional[str]=None, update_db: bool=False): def parse_files(self, kind: Optional[str] = None, limit: int = 100, update_db: bool = False):
from parsers.text_parser import TextParser
from parsers.code_parser import CodeParser
from parsers.pdf_parser import PDFParser
parsers = {'text': TextParser(), 'code': CodeParser(), 'pdf': PDFParser()}
disk_mount_map = {'SMT': '/media/mike/SMT', 'DISK1': '/media/mike/DISK1', 'LLM': '/media/mike/LLM'}
conn = self.get_connection()
cursor = conn.cursor()
try:
query = "SELECT path, size, disk_label FROM files WHERE 1=1"
params = []
if kind:
suffix_map = {'text': "('.txt', '.md', '.log', '.json')", 'code': "('.py', '.js', '.java', '.go')", 'pdf': "('.pdf',)"}
if kind in suffix_map:
query += f" AND RIGHT(path, 4) IN {suffix_map[kind]} OR RIGHT(path, 3) IN {suffix_map[kind]}"
query += f" LIMIT {limit}"
cursor.execute(query, params)
files = cursor.fetchall()
print(f"\n=== PARSING FILES ===\nProcessing {len(files)} files\n")
parsed_count = 0
for path, size, disk_label in files:
mount_point = disk_mount_map.get(disk_label, disk_label)
full_path = Path(mount_point) / path if not Path(path).is_absolute() else Path(path)
if not full_path.exists() or int(size) > 10 * 1024 * 1024:
continue
file_kind = 'pdf' if path.endswith('.pdf') else 'code' if any(path.endswith(e) for e in ['.py', '.js', '.java']) else 'text'
parser = parsers.get(file_kind)
if not parser:
continue
result = parser.parse(full_path)
if 'error' not in result:
text = result.get('text', '')
quality = result.get('quality', 'unknown')
print(f"{path[:60]} | {file_kind} | {len(text):,} chars")
if update_db and text:
cursor.execute("UPDATE files SET extracted_text = %s, text_quality = %s WHERE path = %s", (text[:50000], quality, path))
parsed_count += 1
if parsed_count % 10 == 0:
conn.commit()
if update_db:
conn.commit()
print(f"\nParsed {parsed_count} files")
finally:
cursor.close()
conn.close()
def enrich_files(self, limit: int = 10, llm_endpoint: str = None, use_local: bool = False):
from enrichment.enricher import ContentEnricher
enricher = ContentEnricher()
conn = self.get_connection()
cursor = conn.cursor()
try:
cursor.execute(f"SELECT path, extracted_text FROM files WHERE extracted_text IS NOT NULL LIMIT {limit}")
files = cursor.fetchall()
print(f"\n=== ENRICHING CONTENT ===\nProcessing {len(files)} files\n")
for path, text in files:
enrichment = enricher.enrich(text[:5000], use_llm=False)
print(f"{path[:60]}")
print(f" Quality: {enrichment.get('quality')} | Words: {enrichment.get('word_count'):,}")
print(f" PII: {list(enrichment.get('has_pii', {}).keys())}")
print(f" Topics: {', '.join(enrichment.get('topics', [])[:5])}\n")
cursor.execute("UPDATE files SET enrichment = %s::jsonb WHERE path = %s", (json.dumps(enrichment), path))
conn.commit()
print(f"Enriched {len(files)} files")
finally:
cursor.close()
conn.close()
def classify_files(self, disk: Optional[str]=None, update_db: bool=False, resume: bool=True):
from classification.classifier import FileClassifier from classification.classifier import FileClassifier
classifier = FileClassifier() classifier = FileClassifier()
conn = self.get_connection() conn = self.get_connection()
cursor = conn.cursor() cursor = conn.cursor()
try: try:
task_name = f"classify_{disk or 'all'}"
skip_count = 0
if resume and update_db:
cursor.execute('SELECT last_processed_path, processed_count FROM processing_checkpoints WHERE task_name = %s', (task_name,))
checkpoint = cursor.fetchone()
if checkpoint:
last_path, skip_count = checkpoint
logger.info(f'Resuming from checkpoint: {skip_count:,} files already processed')
if disk: if disk:
cursor.execute('SELECT path, size, disk_label FROM files WHERE disk_label = %s', (disk,)) cursor.execute('SELECT path, size, disk_label FROM files WHERE disk_label = %s ORDER BY path', (disk,))
else: else:
cursor.execute('SELECT path, size, disk_label FROM files') cursor.execute('SELECT path, size, disk_label FROM files ORDER BY path')
files = cursor.fetchall() files = cursor.fetchall()
total = len(files) total = len(files)
logger.info(f'Classifying {total:,} files...') logger.info(f'Classifying {total:,} files...')
categories = {} categories = {}
build_artifacts = 0 build_artifacts = 0
batch = [] batch = []
processed = 0
for idx, (path, size, disk_label) in enumerate(files, 1): for idx, (path, size, disk_label) in enumerate(files, 1):
if idx <= skip_count:
continue
labels, category, is_build = classifier.classify_path(path, int(size)) labels, category, is_build = classifier.classify_path(path, int(size))
if is_build: if is_build:
build_artifacts += 1 build_artifacts += 1
@@ -546,18 +649,40 @@ class DiskReorganizer:
categories[category] = {'count': 0, 'size': 0} categories[category] = {'count': 0, 'size': 0}
categories[category]['count'] += 1 categories[category]['count'] += 1
categories[category]['size'] += int(size) categories[category]['size'] += int(size)
if update_db: if update_db:
labels_str = ','.join(labels) labels_str = ','.join(labels)
batch.append((category, labels_str, path)) batch.append((category, labels_str, path))
if len(batch) >= 1000: if len(batch) >= 1000:
cursor.executemany('UPDATE files SET category = %s WHERE path = %s', [(cat, p) for cat, lbl, p in batch]) cursor.executemany('UPDATE files SET category = %s WHERE path = %s', [(cat, p) for cat, lbl, p in batch])
cursor.execute('''
INSERT INTO processing_checkpoints (task_name, last_processed_path, processed_count, updated_at)
VALUES (%s, %s, %s, CURRENT_TIMESTAMP)
ON CONFLICT (task_name) DO UPDATE SET
last_processed_path = EXCLUDED.last_processed_path,
processed_count = EXCLUDED.processed_count,
updated_at = CURRENT_TIMESTAMP
''', (task_name, path, idx))
conn.commit() conn.commit()
batch.clear() batch.clear()
processed += 1
if idx % 1000 == 0: if idx % 1000 == 0:
print(f'\rClassified: {idx:,}/{total:,}', end='', flush=True) print(f'\rClassified: {idx:,}/{total:,} ({100*idx/total:.1f}%)', end='', flush=True)
if update_db and batch: if update_db and batch:
cursor.executemany('UPDATE files SET category = %s WHERE path = %s', [(cat, p) for cat, lbl, p in batch]) cursor.executemany('UPDATE files SET category = %s WHERE path = %s', [(cat, p) for cat, lbl, p in batch])
cursor.execute('''
INSERT INTO processing_checkpoints (task_name, last_processed_path, processed_count, updated_at)
VALUES (%s, %s, %s, CURRENT_TIMESTAMP)
ON CONFLICT (task_name) DO UPDATE SET
last_processed_path = EXCLUDED.last_processed_path,
processed_count = EXCLUDED.processed_count,
updated_at = CURRENT_TIMESTAMP
''', (task_name, files[-1][0] if files else '', total))
conn.commit() conn.commit()
print() print()
print(f'\n=== CLASSIFICATION SUMMARY ===') print(f'\n=== CLASSIFICATION SUMMARY ===')
print(f'Total files: {total:,}') print(f'Total files: {total:,}')
@@ -570,6 +695,99 @@ class DiskReorganizer:
cursor.close() cursor.close()
conn.close() conn.close()
def analyze_folders(self, disk: Optional[str]=None, min_files: int=3):
from analysis.folder_analyzer import FolderAnalyzer
analyzer = FolderAnalyzer()
conn = self.get_connection()
cursor = conn.cursor()
try:
query = '''
SELECT DISTINCT SUBSTRING(path FROM 1 FOR POSITION('/' IN path || '/') - 1) as folder, disk_label
FROM files
WHERE 1=1
'''
params = []
if disk:
query += ' AND disk_label = %s'
params.append(disk)
cursor.execute(query, params)
potential_folders = cursor.fetchall()
logger.info(f'Found {len(potential_folders)} potential folders to analyze')
processed = 0
for folder_name, disk_label in potential_folders:
cursor.execute('''
SELECT path, size FROM files
WHERE disk_label = %s AND path LIKE %s
''', (disk_label, f'{folder_name}%'))
files = cursor.fetchall()
if len(files) < min_files:
continue
files_list = [{'path': f[0], 'size': int(f[1])} for f in files]
folder_path = Path(folder_name)
analysis = analyzer.analyze_folder(folder_path, files_list)
readme_text = None
for file_dict in files_list:
if 'readme' in file_dict['path'].lower():
readme_text = f"Found README at {file_dict['path']}"
break
summary = analyzer.generate_summary(analysis, readme_text)
cursor.execute('''
INSERT INTO folders (path, disk_label, file_count, total_size, project_type, intent, summary,
has_readme, has_git, has_manifest, manifest_types, dominant_file_types, structure)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (path) DO UPDATE SET
file_count = EXCLUDED.file_count,
total_size = EXCLUDED.total_size,
project_type = EXCLUDED.project_type,
intent = EXCLUDED.intent,
summary = EXCLUDED.summary,
has_readme = EXCLUDED.has_readme,
has_git = EXCLUDED.has_git,
has_manifest = EXCLUDED.has_manifest,
manifest_types = EXCLUDED.manifest_types,
dominant_file_types = EXCLUDED.dominant_file_types,
structure = EXCLUDED.structure,
updated_at = CURRENT_TIMESTAMP
''', (
str(folder_path), disk_label, len(files_list), sum(f['size'] for f in files_list),
analysis.get('project_type'), analysis.get('intent'), summary,
analysis.get('has_readme'), analysis.get('has_git'), analysis.get('has_manifest'),
analysis.get('manifest_types'), json.dumps(analysis.get('dominant_file_types', {})),
json.dumps(analysis.get('structure', {}))
))
processed += 1
if processed % 100 == 0:
conn.commit()
print(f'\rAnalyzed: {processed} folders', end='', flush=True)
conn.commit()
print()
logger.info(f'Completed folder analysis: {processed} folders')
cursor.execute('''
SELECT project_type, COUNT(*), SUM(file_count), SUM(total_size)
FROM folders
GROUP BY project_type
''')
print(f'\n=== FOLDER ANALYSIS SUMMARY ===')
for row in cursor.fetchall():
proj_type, count, files, size = row
print(f'{proj_type:20}: {count:6,} folders, {files:8,} files, {self.format_size(int(size or 0))}')
finally:
cursor.close()
conn.close()
def review_migration(self, category: Optional[str]=None, show_build: bool=False): def review_migration(self, category: Optional[str]=None, show_build: bool=False):
from classification.classifier import FileClassifier from classification.classifier import FileClassifier
classifier = FileClassifier() classifier = FileClassifier()
@@ -640,9 +858,24 @@ def main():
extract_parser = subparsers.add_parser('extract', help='Extract content from files') extract_parser = subparsers.add_parser('extract', help='Extract content from files')
extract_parser.add_argument('--kind', help='Extract specific kind (pdf, image, audio, video)') extract_parser.add_argument('--kind', help='Extract specific kind (pdf, image, audio, video)')
extract_parser.add_argument('--limit', type=int, default=10, help='Limit extraction batch') extract_parser.add_argument('--limit', type=int, default=10, help='Limit extraction batch')
parse_parser = subparsers.add_parser('parse', help='Parse files to extract text')
parse_parser.add_argument('--kind', help='Parse specific kind (text, code, pdf)')
parse_parser.add_argument('--limit', type=int, default=100, help='Limit parse batch')
parse_parser.add_argument('--update', action='store_true', help='Save extracted text to database')
enrich_parser = subparsers.add_parser('enrich', help='Enrich content with LLM analysis')
enrich_parser.add_argument('--limit', type=int, default=10, help='Limit enrichment batch')
enrich_parser.add_argument('--llm-endpoint', default='http://192.168.1.74:1234', help='LLM endpoint')
enrich_parser.add_argument('--local', action='store_true', help='Use local Ollama')
classify_parser = subparsers.add_parser('classify', help='Classify files and suggest organization') classify_parser = subparsers.add_parser('classify', help='Classify files and suggest organization')
classify_parser.add_argument('--disk', help='Classify specific disk') classify_parser.add_argument('--disk', help='Classify specific disk')
classify_parser.add_argument('--update', action='store_true', help='Update database with classifications') classify_parser.add_argument('--update', action='store_true', help='Update database with classifications')
classify_parser.add_argument('--no-resume', action='store_true', help='Start from scratch instead of resuming')
folders_parser = subparsers.add_parser('analyze-folders', help='Analyze folder structure and infer project intent')
folders_parser.add_argument('--disk', help='Analyze specific disk')
folders_parser.add_argument('--min-files', type=int, default=3, help='Minimum files per folder')
review_parser = subparsers.add_parser('review', help='Review proposed migration structure') review_parser = subparsers.add_parser('review', help='Review proposed migration structure')
review_parser.add_argument('--category', help='Review specific category') review_parser.add_argument('--category', help='Review specific category')
review_parser.add_argument('--show-build', action='store_true', help='Include build artifacts') review_parser.add_argument('--show-build', action='store_true', help='Include build artifacts')
@@ -669,8 +902,14 @@ def main():
tool.profile_content(disk=args.disk, update_db=args.update, limit=args.limit) tool.profile_content(disk=args.disk, update_db=args.update, limit=args.limit)
elif args.command == 'extract': elif args.command == 'extract':
tool.extract_content(kind=args.kind, limit=args.limit) tool.extract_content(kind=args.kind, limit=args.limit)
elif args.command == 'parse':
tool.parse_files(kind=args.kind, limit=args.limit, update_db=args.update)
elif args.command == 'enrich':
tool.enrich_files(limit=args.limit, llm_endpoint=args.llm_endpoint, use_local=args.local)
elif args.command == 'classify': elif args.command == 'classify':
tool.classify_files(disk=args.disk, update_db=args.update) tool.classify_files(disk=args.disk, update_db=args.update, resume=not args.no_resume)
elif args.command == 'analyze-folders':
tool.analyze_folders(disk=args.disk, min_files=args.min_files)
elif args.command == 'review': elif args.command == 'review':
tool.review_migration(category=args.category, show_build=args.show_build) tool.review_migration(category=args.category, show_build=args.show_build)
elif args.command == 'report': elif args.command == 'report':

View File

@@ -0,0 +1,44 @@
from pathlib import Path
from typing import Dict
import re
class CodeParser:
def __init__(self):
self.patterns = {
'python': {'imports': r'^import |^from .+ import', 'class': r'^class \w+', 'function': r'^def \w+'},
'javascript': {'imports': r'^import |^require\(', 'class': r'^class \w+', 'function': r'^function \w+|^const \w+ = '},
'java': {'package': r'^package ', 'imports': r'^import ', 'class': r'^public class \w+'},
'go': {'package': r'^package ', 'imports': r'^import ', 'function': r'^func \w+'}
}
def parse(self, file_path: Path) -> Dict:
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
text = f.read()
language = self._detect_language(file_path, text)
structure = self._extract_structure(text, language)
return {
'text': text,
'language': language,
'line_count': len(text.split('\n')),
'structure': structure,
'quality': 'high'
}
except Exception as e:
return {'error': str(e)}
def _detect_language(self, file_path: Path, text: str) -> str:
lang_map = {'.py': 'python', '.js': 'javascript', '.ts': 'typescript', '.java': 'java', '.go': 'go'}
return lang_map.get(file_path.suffix.lower(), 'unknown')
def _extract_structure(self, text: str, language: str) -> Dict:
patterns = self.patterns.get(language, {})
structure = {'type': 'code', 'language': language}
for key, pattern in patterns.items():
matches = re.findall(pattern, text, re.MULTILINE)
structure[key] = len(matches)
return structure

View File

@@ -0,0 +1,42 @@
from pathlib import Path
from typing import Dict
class MediaParser:
def parse_audio(self, file_path: Path) -> Dict:
return {
'text': '[Audio transcription pending]',
'needs_transcription': True,
'transcription_service': 'whisper',
'structure': {'type': 'audio'},
'quality': 'pending'
}
def parse_video(self, file_path: Path) -> Dict:
return {
'text': '[Video transcription pending]',
'needs_transcription': True,
'needs_scene_detection': True,
'transcription_service': 'whisper',
'structure': {'type': 'video'},
'quality': 'pending'
}
def parse_image(self, file_path: Path) -> Dict:
try:
from PIL import Image
with Image.open(file_path) as img:
width, height = img.size
mode = img.mode
return {
'text': '[Image caption/OCR pending]',
'needs_ocr': True,
'needs_caption': True,
'dimensions': f'{width}x{height}',
'mode': mode,
'structure': {'type': 'image', 'width': width, 'height': height},
'quality': 'pending'
}
except Exception as e:
return {'error': str(e)}

31
app/parsers/pdf_parser.py Normal file
View File

@@ -0,0 +1,31 @@
from pathlib import Path
from typing import Dict, List
class PDFParser:
def parse(self, file_path: Path) -> Dict:
try:
import PyPDF2
pages = []
with open(file_path, 'rb') as f:
pdf = PyPDF2.PdfReader(f)
page_count = len(pdf.pages)
for i, page in enumerate(pdf.pages[:50]):
text = page.extract_text()
pages.append({'page': i + 1, 'text': text, 'char_count': len(text)})
full_text = '\n\n'.join([p['text'] for p in pages])
has_text_layer = sum(p['char_count'] for p in pages) > 100
return {
'text': full_text,
'page_count': page_count,
'pages_extracted': len(pages),
'has_text_layer': has_text_layer,
'needs_ocr': not has_text_layer,
'structure': {'type': 'document', 'pages': pages[:5]},
'quality': 'high' if has_text_layer else 'needs_ocr'
}
except Exception as e:
return {'error': str(e), 'needs_ocr': True}

View File

@@ -0,0 +1,26 @@
from pathlib import Path
from typing import Dict, Optional
import chardet
class TextParser:
def parse(self, file_path: Path) -> Dict:
try:
with open(file_path, 'rb') as f:
raw_data = f.read(1024 * 1024)
encoding = chardet.detect(raw_data)['encoding'] or 'utf-8'
text = raw_data.decode(encoding, errors='ignore')
lines = text.split('\n')
return {
'text': text,
'encoding': encoding,
'line_count': len(lines),
'char_count': len(text),
'word_count': len(text.split()),
'structure': {'type': 'plain_text'},
'quality': 'high' if encoding == 'utf-8' else 'medium'
}
except Exception as e:
return {'error': str(e)}

View File

@@ -232,6 +232,37 @@ services:
networks: networks:
- defrag-network - defrag-network
flyway:
image: flyway/flyway:latest
container_name: flyway
volumes:
- ./sql/migration:/flyway/sql:ro
environment:
FLYWAY_URL: jdbc:postgresql://192.168.1.159:5432/disk_reorganizer_db
FLYWAY_USER: disk_reorg_user
FLYWAY_PASSWORD: heel-goed-wachtwoord
FLYWAY_SCHEMAS: public
FLYWAY_LOCATIONS: filesystem:./sql
FLYWAY_CONNECT_RETRIES: "60"
command: migrate
restart: "no"
pg_backup:
image: postgres:16
container_name: pg_backup
environment:
PGPASSWORD: heel-goed-wachtwoord
volumes:
- ./:/backup
command:
- bash
- -lc
- >
pg_dump -h 192.168.1.159 -p 5432 -U disk_reorg_user -d disk_reorganizer_db
--format=custom --no-owner --no-privileges
-f /backup/backup_$(date +%F_%H%M)_disk_reorganizer_db.dump
restart: "no"
networks: networks:
defrag-network: defrag-network:
driver: bridge driver: bridge

7
flyway.conf Normal file
View File

@@ -0,0 +1,7 @@
flyway.url=jdbc:postgresql://192.168.1.159:5432/disk_reorganizer_db
flyway.user=disk_org_user
flyway.password=heel-goed-wachtwoord
flyway.locations=filesystem:sql/migration
flyway.schemas=public

View File

@@ -37,3 +37,5 @@ pytest-cov>=4.0.0
black>=23.0.0 black>=23.0.0
mypy>=1.0.0 mypy>=1.0.0
flake8>=6.0.0 flake8>=6.0.0
chardet

View File

@@ -1,176 +0,0 @@
-- sql/init.sql
-- Initialize PostgreSQL database for Project Defrag
-- Enable useful extensions
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE EXTENSION IF NOT EXISTS "pgcrypto";
-- Files table
CREATE TABLE IF NOT EXISTS files (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
path TEXT NOT NULL,
size BIGINT NOT NULL,
modified_time TIMESTAMP WITH TIME ZONE,
created_time TIMESTAMP WITH TIME ZONE,
file_hash VARCHAR(64), -- SHA-256 hash
checksum VARCHAR(64), -- Alias for file_hash (legacy compatibility)
category VARCHAR(50),
disk_label VARCHAR(50),
last_verified TIMESTAMP WITH TIME ZONE,
status VARCHAR(20) DEFAULT 'indexed',
duplicate_of TEXT, -- Path to canonical file if this is a duplicate
-- Metadata
metadata JSONB DEFAULT '{}',
-- Audit fields
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
-- Constraints
CONSTRAINT unique_file_path UNIQUE(path)
);
-- Operations table (audit log)
CREATE TABLE IF NOT EXISTS operations (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
operation_type VARCHAR(50) NOT NULL,
source_path TEXT,
target_path TEXT,
status VARCHAR(20) NOT NULL,
-- Legacy compatibility fields
executed INTEGER DEFAULT 0,
verified INTEGER DEFAULT 0,
error TEXT,
-- File reference
file_id UUID REFERENCES files(id) ON DELETE SET NULL,
-- Performance metrics
duration_ms INTEGER,
bytes_processed BIGINT,
-- Error information
error_message TEXT,
error_details JSONB,
-- Context
session_id VARCHAR(100),
user_agent TEXT,
-- Audit fields
started_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
completed_at TIMESTAMP WITH TIME ZONE,
executed_at TIMESTAMP WITH TIME ZONE,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
-- Deduplication hash store
CREATE TABLE IF NOT EXISTS deduplication_store (
hash VARCHAR(64) PRIMARY KEY,
canonical_path TEXT NOT NULL,
reference_count INTEGER DEFAULT 1,
first_seen TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_seen TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
-- Migration plan table
CREATE TABLE IF NOT EXISTS migration_plans (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name VARCHAR(100) NOT NULL,
source_disk VARCHAR(50) NOT NULL,
target_disk VARCHAR(50) NOT NULL,
plan_json JSONB NOT NULL,
-- Statistics
total_files INTEGER DEFAULT 0,
total_size BIGINT DEFAULT 0,
estimated_duration INTEGER, -- in seconds
-- Status
status VARCHAR(20) DEFAULT 'draft',
-- Audit
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
executed_at TIMESTAMP WITH TIME ZONE,
completed_at TIMESTAMP WITH TIME ZONE
);
-- Indexes for performance
CREATE INDEX IF NOT EXISTS idx_files_path ON files (path);
CREATE INDEX IF NOT EXISTS idx_files_hash ON files (file_hash);
CREATE INDEX IF NOT EXISTS idx_files_disk ON files (disk_label);
CREATE INDEX IF NOT EXISTS idx_files_category ON files (category);
CREATE INDEX IF NOT EXISTS idx_files_status ON files (status);
create index on files (checksum);
create index on files (checksum,path);
CREATE INDEX IF NOT EXISTS idx_operations_status ON operations(status);
CREATE INDEX IF NOT EXISTS idx_operations_created ON operations(created_at);
CREATE INDEX IF NOT EXISTS idx_operations_file_id ON operations(file_id);
CREATE INDEX IF NOT EXISTS idx_dedup_canonical ON deduplication_store(canonical_path);
-- Functions for updating timestamps
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$ language 'plpgsql';
-- Triggers for automatic updated_at
CREATE TRIGGER update_files_updated_at BEFORE UPDATE ON files
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
-- View for operational dashboard
CREATE OR REPLACE VIEW operational_dashboard AS
SELECT
o.status,
COUNT(*) as operation_count,
SUM(o.bytes_processed) as total_bytes,
AVG(o.duration_ms) as avg_duration_ms,
MIN(o.started_at) as earliest_operation,
MAX(o.completed_at) as latest_operation
FROM operations o
WHERE o.started_at > CURRENT_TIMESTAMP - INTERVAL '24 hours'
GROUP BY o.status;
-- View for disk usage statistics
CREATE OR REPLACE VIEW disk_usage_stats AS
SELECT
disk_label,
COUNT(*) as file_count,
SUM(size) as total_size,
AVG(size) as avg_file_size,
MIN(created_time) as oldest_file,
MAX(modified_time) as newest_file
FROM files
GROUP BY disk_label;
-- Insert default configuration
INSERT INTO migration_plans (name, source_disk, target_disk, plan_json, status)
VALUES (
'Default Migration Plan',
'disk_d',
'disk_e',
'{"strategy": "hardlink", "verify_copies": true, "preserve_timestamps": true}'::jsonb,
'draft'
) ON CONFLICT DO NOTHING;
-- Create read-only user for monitoring
DO $$
BEGIN
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'monitor_user') THEN
CREATE USER monitor_user WITH PASSWORD 'monitor_password';
END IF;
END
$$;
GRANT CONNECT ON DATABASE disk_reorganizer_db TO monitor_user;
GRANT USAGE ON SCHEMA public TO monitor_user;
GRANT SELECT ON ALL TABLES IN SCHEMA public TO monitor_user;
GRANT SELECT ON operational_dashboard TO monitor_user;
GRANT SELECT ON disk_usage_stats TO monitor_user;

View File

@@ -0,0 +1,188 @@
-- sql/init.sql
-- Initialize PostgreSQL database for Project Defrag
-- Enable useful extensions
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE EXTENSION IF NOT EXISTS "pgcrypto";
-- future tables/sequences created by your owner role (pick the role that creates them)
ALTER DEFAULT PRIVILEGES FOR ROLE auction IN SCHEMA public
GRANT ALL PRIVILEGES ON TABLES TO disk_reorg_user;
ALTER DEFAULT PRIVILEGES FOR ROLE auction IN SCHEMA public
GRANT ALL PRIVILEGES ON SEQUENCES TO disk_reorg_user;
ALTER DATABASE disk_reorganizer_db OWNER TO disk_reorg_user;
-- Files table
CREATE TABLE IF NOT EXISTS files
(
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
path TEXT NOT NULL,
size BIGINT NOT NULL,
modified_time TIMESTAMP WITH TIME ZONE,
created_time TIMESTAMP WITH TIME ZONE,
file_hash VARCHAR(64), -- SHA-256 hash
checksum VARCHAR(64), -- Alias for file_hash (legacy compatibility)
category VARCHAR(50),
disk_label VARCHAR(50),
last_verified TIMESTAMP WITH TIME ZONE,
status VARCHAR(20) DEFAULT 'indexed',
duplicate_of TEXT, -- Path to canonical file if this is a duplicate
-- Metadata
metadata JSONB DEFAULT '{}',
-- Audit fields
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
-- Constraints
CONSTRAINT unique_file_path UNIQUE (path)
);
-- Operations table (audit log)
CREATE TABLE IF NOT EXISTS operations
(
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
operation_type VARCHAR(50) NOT NULL,
source_path TEXT,
target_path TEXT,
status VARCHAR(20) NOT NULL,
-- Legacy compatibility fields
executed INTEGER DEFAULT 0,
verified INTEGER DEFAULT 0,
error TEXT,
-- File reference
file_id UUID REFERENCES files (id) ON DELETE SET NULL,
-- Performance metrics
duration_ms INTEGER,
bytes_processed BIGINT,
-- Error information
error_message TEXT,
error_details JSONB,
-- Context
session_id VARCHAR(100),
user_agent TEXT,
-- Audit fields
started_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
completed_at TIMESTAMP WITH TIME ZONE,
executed_at TIMESTAMP WITH TIME ZONE,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
-- Deduplication hash store
CREATE TABLE IF NOT EXISTS deduplication_store
(
hash VARCHAR(64) PRIMARY KEY,
canonical_path TEXT NOT NULL,
reference_count INTEGER DEFAULT 1,
first_seen TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_seen TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
-- Migration plan table
CREATE TABLE IF NOT EXISTS migration_plans
(
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name VARCHAR(100) NOT NULL,
source_disk VARCHAR(50) NOT NULL,
target_disk VARCHAR(50) NOT NULL,
plan_json JSONB NOT NULL,
-- Statistics
total_files INTEGER DEFAULT 0,
total_size BIGINT DEFAULT 0,
estimated_duration INTEGER, -- in seconds
-- Status
status VARCHAR(20) DEFAULT 'draft',
-- Audit
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
executed_at TIMESTAMP WITH TIME ZONE,
completed_at TIMESTAMP WITH TIME ZONE
);
-- Indexes for performance
CREATE INDEX IF NOT EXISTS idx_files_path ON files (path);
CREATE INDEX IF NOT EXISTS idx_files_hash ON files (file_hash);
CREATE INDEX IF NOT EXISTS idx_files_disk ON files (disk_label);
CREATE INDEX IF NOT EXISTS idx_files_category ON files (category);
CREATE INDEX IF NOT EXISTS idx_files_status ON files (status);
create index on files (checksum);
create index on files (checksum, path);
CREATE INDEX IF NOT EXISTS idx_operations_status ON operations (status);
CREATE INDEX IF NOT EXISTS idx_operations_created ON operations (created_at);
CREATE INDEX IF NOT EXISTS idx_operations_file_id ON operations (file_id);
CREATE INDEX IF NOT EXISTS idx_dedup_canonical ON deduplication_store (canonical_path);
-- Functions for updating timestamps
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS
$$
BEGIN
NEW.updated_at = CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$ language 'plpgsql';
-- Triggers for automatic updated_at
CREATE TRIGGER update_files_updated_at
BEFORE UPDATE
ON files
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
-- View for operational dashboard
CREATE OR REPLACE VIEW operational_dashboard AS
SELECT o.status,
COUNT(*) as operation_count,
SUM(o.bytes_processed) as total_bytes,
AVG(o.duration_ms) as avg_duration_ms,
MIN(o.started_at) as earliest_operation,
MAX(o.completed_at) as latest_operation
FROM operations o
WHERE o.started_at > CURRENT_TIMESTAMP - INTERVAL '24 hours'
GROUP BY o.status;
-- View for disk usage statistics
CREATE OR REPLACE VIEW disk_usage_stats AS
SELECT disk_label,
COUNT(*) as file_count,
SUM(size) as total_size,
AVG(size) as avg_file_size,
MIN(created_time) as oldest_file,
MAX(modified_time) as newest_file
FROM files
GROUP BY disk_label;
-- Insert default configuration
INSERT INTO migration_plans (name, source_disk, target_disk, plan_json, status)
VALUES ('Default Migration Plan',
'disk_d',
'disk_e',
'{"strategy": "hardlink", "verify_copies": true, "preserve_timestamps": true}'::jsonb,
'draft')
ON CONFLICT DO NOTHING;
-- Create read-only user for monitoring
DO
$$
BEGIN
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'monitor_user') THEN
CREATE USER monitor_user WITH PASSWORD 'monitor_password';
END IF;
END
$$;
GRANT CONNECT ON DATABASE disk_reorganizer_db TO monitor_user;
GRANT USAGE ON SCHEMA public TO monitor_user;
GRANT SELECT ON ALL TABLES IN SCHEMA public TO monitor_user;
GRANT SELECT ON operational_dashboard TO monitor_user;
GRANT SELECT ON disk_usage_stats TO monitor_user;

View File

@@ -0,0 +1,11 @@
-- Add extracted text and enrichment columns
ALTER TABLE files ADD COLUMN IF NOT EXISTS extracted_text TEXT;
ALTER TABLE files ADD COLUMN IF NOT EXISTS text_quality VARCHAR(20);
ALTER TABLE files ADD COLUMN IF NOT EXISTS enrichment JSONB;
-- Add indexes for text search
CREATE INDEX IF NOT EXISTS idx_files_extracted_text ON files USING gin(to_tsvector('english', extracted_text));
CREATE INDEX IF NOT EXISTS idx_files_enrichment ON files USING gin(enrichment);
-- Add full text search capability
CREATE INDEX IF NOT EXISTS idx_files_fts ON files USING gin(to_tsvector('english', COALESCE(extracted_text, '')));

View File

@@ -0,0 +1,41 @@
CREATE TABLE IF NOT EXISTS folders
(
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
path TEXT NOT NULL UNIQUE,
parent_path TEXT,
disk_label VARCHAR(50),
file_count INT DEFAULT 0,
total_size BIGINT DEFAULT 0,
project_type VARCHAR(50),
intent TEXT,
summary TEXT,
has_readme BOOLEAN DEFAULT FALSE,
has_git BOOLEAN DEFAULT FALSE,
has_manifest BOOLEAN DEFAULT FALSE,
manifest_types TEXT[],
dominant_file_types JSONB,
structure JSONB,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_folders_path ON folders (path);
CREATE INDEX IF NOT EXISTS idx_folders_parent ON folders (parent_path);
CREATE INDEX IF NOT EXISTS idx_folders_disk ON folders (disk_label);
CREATE INDEX IF NOT EXISTS idx_folders_project_type ON folders (project_type);
CREATE TABLE IF NOT EXISTS processing_checkpoints
(
task_name VARCHAR(100) PRIMARY KEY,
last_processed_id TEXT,
last_processed_path TEXT,
processed_count INT DEFAULT 0,
total_count INT,
started_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);

View File

@@ -19,54 +19,27 @@ CREATE DATABASE disk_reorganizer_db
CREATE USER disk_reorg_user WITH PASSWORD 'heel-goed-wachtwoord'; CREATE USER disk_reorg_user WITH PASSWORD 'heel-goed-wachtwoord';
-- Create files table -- Create files table
CREATE TABLE IF NOT EXISTS files (
path TEXT PRIMARY KEY,
size BIGINT NOT NULL,
modified_time DOUBLE PRECISION NOT NULL,
disk_label TEXT NOT NULL,
checksum TEXT,
status TEXT DEFAULT 'indexed',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Create index on disk column for faster queries -- Create index on disk column for faster queries
CREATE INDEX IF NOT EXISTS idx_files_disk ON files(disk_label);
CREATE INDEX IF NOT EXISTS idx_files_status ON files(status);
-- Create operations table
CREATE TABLE IF NOT EXISTS operations (
id SERIAL PRIMARY KEY,
source_path TEXT NOT NULL,
target_path TEXT NOT NULL,
operation_type TEXT NOT NULL,
executed INTEGER DEFAULT 0,
verified INTEGER DEFAULT 0,
error TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
executed_at TIMESTAMP
);
-- Create index on operations for faster lookups
CREATE INDEX IF NOT EXISTS idx_operations_executed ON operations(executed);
CREATE INDEX IF NOT EXISTS idx_operations_source ON operations(source_path);
-- Grant privileges to disk_reorg_user -- Grant privileges to disk_reorg_user
GRANT CONNECT ON DATABASE disk_reorganizer_db TO disk_reorg_user; GRANT CONNECT ON DATABASE disk_reorganizer_db TO disk_reorg_user;
GRANT USAGE ON SCHEMA public TO disk_reorg_user; GRANT USAGE ON SCHEMA public TO disk_reorg_user;
GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO disk_reorg_user; GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO disk_reorg_user;
GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO disk_reorg_user; GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO disk_reorg_user;
-- future tables/sequences created by your owner role (pick the role that creates them) -- future tables/sequences created by your owner role (pick the role that creates them)
ALTER DEFAULT PRIVILEGES FOR ROLE auction IN SCHEMA public ALTER DEFAULT PRIVILEGES FOR ROLE auction IN SCHEMA public
GRANT ALL PRIVILEGES ON TABLES TO disk_reorg_user; GRANT ALL PRIVILEGES ON TABLES TO disk_reorg_user;
ALTER DEFAULT PRIVILEGES FOR ROLE auction IN SCHEMA public ALTER DEFAULT PRIVILEGES FOR ROLE auction IN SCHEMA public
GRANT ALL PRIVILEGES ON SEQUENCES TO disk_reorg_user; GRANT ALL PRIVILEGES ON SEQUENCES TO disk_reorg_user;
-- Create function to update updated_at timestamp -- Create function to update updated_at timestamp
CREATE OR REPLACE FUNCTION update_updated_at_column() CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$ RETURNS TRIGGER AS
$$
BEGIN BEGIN
NEW.updated_at = CURRENT_TIMESTAMP; NEW.updated_at = CURRENT_TIMESTAMP;
RETURN NEW; RETURN NEW;
@@ -75,9 +48,10 @@ $$ LANGUAGE plpgsql;
-- Create trigger for files table -- Create trigger for files table
CREATE TRIGGER update_files_updated_at CREATE TRIGGER update_files_updated_at
BEFORE UPDATE ON files BEFORE UPDATE
ON files
FOR EACH ROW FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column(); EXECUTE FUNCTION update_updated_at_column();
-- Display success message -- Display success message
\echo 'Database setup completed successfully!' \echo 'Database setup completed successfully!'