Files
scaev/sync_updates.py
2025-12-07 12:26:41 +01:00

257 lines
8.1 KiB
Python

#!/usr/bin/env python3
"""
Sync local database updates to server-compatible format
Creates incremental exports with only NEW or UPDATED records
"""
import sqlite3
import json
import csv
from datetime import datetime
from pathlib import Path
DB_PATH = "C:/mnt/okcomputer/output/cache.db"
OUTPUT_DIR = Path("C:/mnt/okcomputer/output")
def fill_missing_auction_fields():
"""Fill in missing fields in auctions table from scraped data"""
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
print("Filling missing auction fields...")
# Update closing_time from first_lot_closing_time
cursor.execute("""
UPDATE auctions
SET closing_time = first_lot_closing_time
WHERE closing_time IS NULL AND first_lot_closing_time IS NOT NULL
""")
updated = cursor.rowcount
print(f" ✓ Updated {updated} closing_time fields")
# Parse location to extract city and country
cursor.execute("""
SELECT auction_id, location
FROM auctions
WHERE location IS NOT NULL AND (city IS NULL OR country IS NULL)
""")
locations = cursor.fetchall()
city_updates = 0
for auction_id, location in locations:
if not location:
continue
# Parse "City, COUNTRY" or "City, Region, COUNTRY"
parts = [p.strip() for p in location.split(',')]
if len(parts) >= 2:
city = parts[0]
country = parts[-1]
cursor.execute("""
UPDATE auctions
SET city = ?, country = ?
WHERE auction_id = ?
""", (city, country, auction_id))
city_updates += 1
print(f" ✓ Updated {city_updates} city/country fields")
# Set type to 'online' for all (Troostwijk is online platform)
cursor.execute("""
UPDATE auctions
SET type = 'online'
WHERE type IS NULL
""")
type_updates = cursor.rowcount
print(f" ✓ Updated {type_updates} type fields")
conn.commit()
conn.close()
print(f"✓ Auction fields updated\n")
def get_last_sync_timestamp():
"""Get timestamp of last successful sync"""
sync_file = OUTPUT_DIR / ".last_sync"
if sync_file.exists():
return int(sync_file.read_text().strip())
return 0
def save_sync_timestamp(timestamp: int):
"""Save timestamp of successful sync"""
sync_file = OUTPUT_DIR / ".last_sync"
sync_file.write_text(str(timestamp))
def export_incremental():
"""Export only records that are new or updated since last sync"""
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
last_sync = get_last_sync_timestamp()
current_time = int(datetime.now().timestamp())
print(f"Last sync: {datetime.fromtimestamp(last_sync).strftime('%Y-%m-%d %H:%M:%S') if last_sync else 'Never'}")
print(f"Current time: {datetime.fromtimestamp(current_time).strftime('%Y-%m-%d %H:%M:%S')}")
# Get new/updated auctions
cursor.execute("""
SELECT * FROM auctions
WHERE discovered_at IS NULL OR discovered_at > ?
ORDER BY auction_id
""", (last_sync,))
new_auctions = [dict(row) for row in cursor.fetchall()]
# Get new/updated lots
cursor.execute("""
SELECT * FROM lots
WHERE scraped_at_timestamp IS NULL OR scraped_at_timestamp > ?
ORDER BY lot_id
""", (last_sync,))
new_lots = [dict(row) for row in cursor.fetchall()]
conn.close()
# Export to timestamped files
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
results = {
'auctions': 0,
'lots': 0,
'files': {}
}
# Export auctions if any new
if new_auctions:
auctions_csv = OUTPUT_DIR / f'auctions_update_{timestamp}.csv'
auctions_json = OUTPUT_DIR / f'auctions_update_{timestamp}.json'
with open(auctions_csv, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=new_auctions[0].keys())
writer.writeheader()
writer.writerows(new_auctions)
with open(auctions_json, 'w', encoding='utf-8') as f:
json.dump(new_auctions, f, indent=2, ensure_ascii=False)
results['auctions'] = len(new_auctions)
results['files']['auctions_csv'] = str(auctions_csv)
results['files']['auctions_json'] = str(auctions_json)
print(f"\n✓ Exported {len(new_auctions)} new/updated auctions")
print(f" CSV: {auctions_csv}")
print(f" JSON: {auctions_json}")
# Export lots if any new
if new_lots:
lots_csv = OUTPUT_DIR / f'lots_update_{timestamp}.csv'
lots_json = OUTPUT_DIR / f'lots_update_{timestamp}.json'
with open(lots_csv, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=new_lots[0].keys())
writer.writeheader()
writer.writerows(new_lots)
with open(lots_json, 'w', encoding='utf-8') as f:
json.dump(new_lots, f, indent=2, ensure_ascii=False)
results['lots'] = len(new_lots)
results['files']['lots_csv'] = str(lots_csv)
results['files']['lots_json'] = str(lots_json)
print(f"\n✓ Exported {len(new_lots)} new/updated lots")
print(f" CSV: {lots_csv}")
print(f" JSON: {lots_json}")
if not new_auctions and not new_lots:
print("\n✓ No new updates since last sync")
return results
def create_upsert_export():
"""Create SQL script for server to UPSERT (update or insert) data"""
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
last_sync = get_last_sync_timestamp()
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
# Get new/updated auctions
cursor.execute("""
SELECT * FROM auctions
WHERE discovered_at IS NULL OR discovered_at > ?
""", (last_sync,))
new_auctions = [dict(row) for row in cursor.fetchall()]
if new_auctions:
sql_file = OUTPUT_DIR / f'upsert_auctions_{timestamp}.sql'
with open(sql_file, 'w', encoding='utf-8') as f:
f.write("-- UPSERT script for auctions (updates existing, inserts new)\n\n")
for auction in new_auctions:
# Create INSERT OR REPLACE statement
columns = list(auction.keys())
placeholders = []
for col, val in auction.items():
if val is None:
placeholders.append("NULL")
elif isinstance(val, (int, float)):
placeholders.append(str(val))
else:
# Escape single quotes
escaped = str(val).replace("'", "''")
placeholders.append(f"'{escaped}'")
f.write(f"INSERT OR REPLACE INTO auctions ({', '.join(columns)})\n")
f.write(f"VALUES ({', '.join(placeholders)});\n\n")
print(f"\n✓ Created UPSERT SQL script: {sql_file}")
print(f" Server can execute this to avoid constraint errors")
conn.close()
def main():
"""Main sync process"""
print("="*60)
print("DATABASE SYNC UTILITY")
print("="*60)
print(f"Database: {DB_PATH}")
print(f"Output: {OUTPUT_DIR}")
print("="*60)
# Step 1: Fill missing fields
fill_missing_auction_fields()
# Step 2: Export incremental updates
print("Exporting incremental updates...")
results = export_incremental()
# Step 3: Create UPSERT SQL (prevents constraint errors on server)
if results['auctions'] > 0:
create_upsert_export()
# Step 4: Save sync timestamp
current_time = int(datetime.now().timestamp())
save_sync_timestamp(current_time)
print("\n" + "="*60)
print("SYNC COMPLETE")
print("="*60)
print(f"New auctions: {results['auctions']}")
print(f"New lots: {results['lots']}")
if results['files']:
print("\nFiles ready for server import:")
for key, path in results['files'].items():
print(f" {key}: {path}")
print("\nNext sync will only export records newer than:")
print(f" {datetime.fromtimestamp(current_time).strftime('%Y-%m-%d %H:%M:%S')}")
if __name__ == "__main__":
main()