scaev/sync_updates.py

#!/usr/bin/env python3
"""
Sync local database updates to server-compatible format
Creates incremental exports with only NEW or UPDATED records
"""

import sqlite3
import json
import csv
from datetime import datetime
from pathlib import Path

DB_PATH = "C:/mnt/okcomputer/output/cache.db"
OUTPUT_DIR = Path("C:/mnt/okcomputer/output")

def fill_missing_auction_fields():
    """Fill in missing fields in auctions table from scraped data"""
    conn = sqlite3.connect(DB_PATH)
    cursor = conn.cursor()

    print("Filling missing auction fields...")

    # Update closing_time from first_lot_closing_time
    cursor.execute("""
        UPDATE auctions
        SET closing_time = first_lot_closing_time
        WHERE closing_time IS NULL AND first_lot_closing_time IS NOT NULL
    """)
    updated = cursor.rowcount
    print(f"  ✓ Updated {updated} closing_time fields")

    # Parse location to extract city and country
    cursor.execute("""
        SELECT auction_id, location
        FROM auctions
        WHERE location IS NOT NULL AND (city IS NULL OR country IS NULL)
    """)
    locations = cursor.fetchall()

    city_updates = 0
    for auction_id, location in locations:
        if not location:
            continue

        # Parse "City, COUNTRY" or "City, Region, COUNTRY"
        parts = [p.strip() for p in location.split(',')]
        if len(parts) >= 2:
            city = parts[0]
            country = parts[-1]

            cursor.execute("""
                UPDATE auctions
                SET city = ?, country = ?
                WHERE auction_id = ?
            """, (city, country, auction_id))
            city_updates += 1

    print(f"  ✓ Updated {city_updates} city/country fields")

    # Set type to 'online' for all (Troostwijk is online platform)
    cursor.execute("""
        UPDATE auctions
        SET type = 'online'
        WHERE type IS NULL
    """)
    type_updates = cursor.rowcount
    print(f"  ✓ Updated {type_updates} type fields")

    conn.commit()
    conn.close()

    print(f"✓ Auction fields updated\n")

def get_last_sync_timestamp():
    """Get timestamp of last successful sync"""
    sync_file = OUTPUT_DIR / ".last_sync"
    if sync_file.exists():
        return int(sync_file.read_text().strip())
    return 0

def save_sync_timestamp(timestamp: int):
    """Save timestamp of successful sync"""
    sync_file = OUTPUT_DIR / ".last_sync"
    sync_file.write_text(str(timestamp))

def export_incremental():
    """Export only records that are new or updated since last sync"""
    conn = sqlite3.connect(DB_PATH)
    conn.row_factory = sqlite3.Row
    cursor = conn.cursor()

    last_sync = get_last_sync_timestamp()
    current_time = int(datetime.now().timestamp())

    print(f"Last sync: {datetime.fromtimestamp(last_sync).strftime('%Y-%m-%d %H:%M:%S') if last_sync else 'Never'}")
    print(f"Current time: {datetime.fromtimestamp(current_time).strftime('%Y-%m-%d %H:%M:%S')}")

    # Get new/updated auctions
    cursor.execute("""
        SELECT * FROM auctions
        WHERE discovered_at IS NULL OR discovered_at > ?
        ORDER BY auction_id
    """, (last_sync,))
    new_auctions = [dict(row) for row in cursor.fetchall()]

    # Get new/updated lots
    cursor.execute("""
        SELECT * FROM lots
        WHERE scraped_at_timestamp IS NULL OR scraped_at_timestamp > ?
        ORDER BY lot_id
    """, (last_sync,))
    new_lots = [dict(row) for row in cursor.fetchall()]

    conn.close()

    # Export to timestamped files
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

    results = {
        'auctions': 0,
        'lots': 0,
        'files': {}
    }

    # Export auctions if any new
    if new_auctions:
        auctions_csv = OUTPUT_DIR / f'auctions_update_{timestamp}.csv'
        auctions_json = OUTPUT_DIR / f'auctions_update_{timestamp}.json'

        with open(auctions_csv, 'w', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=new_auctions[0].keys())
            writer.writeheader()
            writer.writerows(new_auctions)

        with open(auctions_json, 'w', encoding='utf-8') as f:
            json.dump(new_auctions, f, indent=2, ensure_ascii=False)

        results['auctions'] = len(new_auctions)
        results['files']['auctions_csv'] = str(auctions_csv)
        results['files']['auctions_json'] = str(auctions_json)

        print(f"\n✓ Exported {len(new_auctions)} new/updated auctions")
        print(f"  CSV: {auctions_csv}")
        print(f"  JSON: {auctions_json}")

    # Export lots if any new
    if new_lots:
        lots_csv = OUTPUT_DIR / f'lots_update_{timestamp}.csv'
        lots_json = OUTPUT_DIR / f'lots_update_{timestamp}.json'

        with open(lots_csv, 'w', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=new_lots[0].keys())
            writer.writeheader()
            writer.writerows(new_lots)

        with open(lots_json, 'w', encoding='utf-8') as f:
            json.dump(new_lots, f, indent=2, ensure_ascii=False)

        results['lots'] = len(new_lots)
        results['files']['lots_csv'] = str(lots_csv)
        results['files']['lots_json'] = str(lots_json)

        print(f"\n✓ Exported {len(new_lots)} new/updated lots")
        print(f"  CSV: {lots_csv}")
        print(f"  JSON: {lots_json}")

    if not new_auctions and not new_lots:
        print("\n✓ No new updates since last sync")

    return results

def create_upsert_export():
    """Create SQL script for server to UPSERT (update or insert) data"""
    conn = sqlite3.connect(DB_PATH)
    conn.row_factory = sqlite3.Row
    cursor = conn.cursor()

    last_sync = get_last_sync_timestamp()
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

    # Get new/updated auctions
    cursor.execute("""
        SELECT * FROM auctions
        WHERE discovered_at IS NULL OR discovered_at > ?
    """, (last_sync,))
    new_auctions = [dict(row) for row in cursor.fetchall()]

    if new_auctions:
        sql_file = OUTPUT_DIR / f'upsert_auctions_{timestamp}.sql'

        with open(sql_file, 'w', encoding='utf-8') as f:
            f.write("-- UPSERT script for auctions (updates existing, inserts new)\n\n")

            for auction in new_auctions:
                # Create INSERT OR REPLACE statement
                columns = list(auction.keys())
                placeholders = []

                for col, val in auction.items():
                    if val is None:
                        placeholders.append("NULL")
                    elif isinstance(val, (int, float)):
                        placeholders.append(str(val))
                    else:
                        # Escape single quotes
                        escaped = str(val).replace("'", "''")
                        placeholders.append(f"'{escaped}'")

                f.write(f"INSERT OR REPLACE INTO auctions ({', '.join(columns)})\n")
                f.write(f"VALUES ({', '.join(placeholders)});\n\n")

        print(f"\n✓ Created UPSERT SQL script: {sql_file}")
        print(f"  Server can execute this to avoid constraint errors")

    conn.close()

def main():
    """Main sync process"""
    print("="*60)
    print("DATABASE SYNC UTILITY")
    print("="*60)
    print(f"Database: {DB_PATH}")
    print(f"Output: {OUTPUT_DIR}")
    print("="*60)

    # Step 1: Fill missing fields
    fill_missing_auction_fields()

    # Step 2: Export incremental updates
    print("Exporting incremental updates...")
    results = export_incremental()

    # Step 3: Create UPSERT SQL (prevents constraint errors on server)
    if results['auctions'] > 0:
        create_upsert_export()

    # Step 4: Save sync timestamp
    current_time = int(datetime.now().timestamp())
    save_sync_timestamp(current_time)

    print("\n" + "="*60)
    print("SYNC COMPLETE")
    print("="*60)
    print(f"New auctions: {results['auctions']}")
    print(f"New lots: {results['lots']}")

    if results['files']:
        print("\nFiles ready for server import:")
        for key, path in results['files'].items():
            print(f"  {key}: {path}")

    print("\nNext sync will only export records newer than:")
    print(f"  {datetime.fromtimestamp(current_time).strftime('%Y-%m-%d %H:%M:%S')}")

if __name__ == "__main__":
    main()