From 07d58cf59c46a68838292968e581b41e5ffec483 Mon Sep 17 00:00:00 2001 From: Tour Date: Sun, 7 Dec 2025 12:26:41 +0100 Subject: [PATCH] gogo --- sync_updates.py | 256 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 256 insertions(+) create mode 100644 sync_updates.py diff --git a/sync_updates.py b/sync_updates.py new file mode 100644 index 0000000..56a284f --- /dev/null +++ b/sync_updates.py @@ -0,0 +1,256 @@ +#!/usr/bin/env python3 +""" +Sync local database updates to server-compatible format +Creates incremental exports with only NEW or UPDATED records +""" + +import sqlite3 +import json +import csv +from datetime import datetime +from pathlib import Path + +DB_PATH = "C:/mnt/okcomputer/output/cache.db" +OUTPUT_DIR = Path("C:/mnt/okcomputer/output") + +def fill_missing_auction_fields(): + """Fill in missing fields in auctions table from scraped data""" + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + + print("Filling missing auction fields...") + + # Update closing_time from first_lot_closing_time + cursor.execute(""" + UPDATE auctions + SET closing_time = first_lot_closing_time + WHERE closing_time IS NULL AND first_lot_closing_time IS NOT NULL + """) + updated = cursor.rowcount + print(f" ✓ Updated {updated} closing_time fields") + + # Parse location to extract city and country + cursor.execute(""" + SELECT auction_id, location + FROM auctions + WHERE location IS NOT NULL AND (city IS NULL OR country IS NULL) + """) + locations = cursor.fetchall() + + city_updates = 0 + for auction_id, location in locations: + if not location: + continue + + # Parse "City, COUNTRY" or "City, Region, COUNTRY" + parts = [p.strip() for p in location.split(',')] + if len(parts) >= 2: + city = parts[0] + country = parts[-1] + + cursor.execute(""" + UPDATE auctions + SET city = ?, country = ? + WHERE auction_id = ? + """, (city, country, auction_id)) + city_updates += 1 + + print(f" ✓ Updated {city_updates} city/country fields") + + # Set type to 'online' for all (Troostwijk is online platform) + cursor.execute(""" + UPDATE auctions + SET type = 'online' + WHERE type IS NULL + """) + type_updates = cursor.rowcount + print(f" ✓ Updated {type_updates} type fields") + + conn.commit() + conn.close() + + print(f"✓ Auction fields updated\n") + +def get_last_sync_timestamp(): + """Get timestamp of last successful sync""" + sync_file = OUTPUT_DIR / ".last_sync" + if sync_file.exists(): + return int(sync_file.read_text().strip()) + return 0 + +def save_sync_timestamp(timestamp: int): + """Save timestamp of successful sync""" + sync_file = OUTPUT_DIR / ".last_sync" + sync_file.write_text(str(timestamp)) + +def export_incremental(): + """Export only records that are new or updated since last sync""" + conn = sqlite3.connect(DB_PATH) + conn.row_factory = sqlite3.Row + cursor = conn.cursor() + + last_sync = get_last_sync_timestamp() + current_time = int(datetime.now().timestamp()) + + print(f"Last sync: {datetime.fromtimestamp(last_sync).strftime('%Y-%m-%d %H:%M:%S') if last_sync else 'Never'}") + print(f"Current time: {datetime.fromtimestamp(current_time).strftime('%Y-%m-%d %H:%M:%S')}") + + # Get new/updated auctions + cursor.execute(""" + SELECT * FROM auctions + WHERE discovered_at IS NULL OR discovered_at > ? + ORDER BY auction_id + """, (last_sync,)) + new_auctions = [dict(row) for row in cursor.fetchall()] + + # Get new/updated lots + cursor.execute(""" + SELECT * FROM lots + WHERE scraped_at_timestamp IS NULL OR scraped_at_timestamp > ? + ORDER BY lot_id + """, (last_sync,)) + new_lots = [dict(row) for row in cursor.fetchall()] + + conn.close() + + # Export to timestamped files + timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') + + results = { + 'auctions': 0, + 'lots': 0, + 'files': {} + } + + # Export auctions if any new + if new_auctions: + auctions_csv = OUTPUT_DIR / f'auctions_update_{timestamp}.csv' + auctions_json = OUTPUT_DIR / f'auctions_update_{timestamp}.json' + + with open(auctions_csv, 'w', newline='', encoding='utf-8') as f: + writer = csv.DictWriter(f, fieldnames=new_auctions[0].keys()) + writer.writeheader() + writer.writerows(new_auctions) + + with open(auctions_json, 'w', encoding='utf-8') as f: + json.dump(new_auctions, f, indent=2, ensure_ascii=False) + + results['auctions'] = len(new_auctions) + results['files']['auctions_csv'] = str(auctions_csv) + results['files']['auctions_json'] = str(auctions_json) + + print(f"\n✓ Exported {len(new_auctions)} new/updated auctions") + print(f" CSV: {auctions_csv}") + print(f" JSON: {auctions_json}") + + # Export lots if any new + if new_lots: + lots_csv = OUTPUT_DIR / f'lots_update_{timestamp}.csv' + lots_json = OUTPUT_DIR / f'lots_update_{timestamp}.json' + + with open(lots_csv, 'w', newline='', encoding='utf-8') as f: + writer = csv.DictWriter(f, fieldnames=new_lots[0].keys()) + writer.writeheader() + writer.writerows(new_lots) + + with open(lots_json, 'w', encoding='utf-8') as f: + json.dump(new_lots, f, indent=2, ensure_ascii=False) + + results['lots'] = len(new_lots) + results['files']['lots_csv'] = str(lots_csv) + results['files']['lots_json'] = str(lots_json) + + print(f"\n✓ Exported {len(new_lots)} new/updated lots") + print(f" CSV: {lots_csv}") + print(f" JSON: {lots_json}") + + if not new_auctions and not new_lots: + print("\n✓ No new updates since last sync") + + return results + +def create_upsert_export(): + """Create SQL script for server to UPSERT (update or insert) data""" + conn = sqlite3.connect(DB_PATH) + conn.row_factory = sqlite3.Row + cursor = conn.cursor() + + last_sync = get_last_sync_timestamp() + timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') + + # Get new/updated auctions + cursor.execute(""" + SELECT * FROM auctions + WHERE discovered_at IS NULL OR discovered_at > ? + """, (last_sync,)) + new_auctions = [dict(row) for row in cursor.fetchall()] + + if new_auctions: + sql_file = OUTPUT_DIR / f'upsert_auctions_{timestamp}.sql' + + with open(sql_file, 'w', encoding='utf-8') as f: + f.write("-- UPSERT script for auctions (updates existing, inserts new)\n\n") + + for auction in new_auctions: + # Create INSERT OR REPLACE statement + columns = list(auction.keys()) + placeholders = [] + + for col, val in auction.items(): + if val is None: + placeholders.append("NULL") + elif isinstance(val, (int, float)): + placeholders.append(str(val)) + else: + # Escape single quotes + escaped = str(val).replace("'", "''") + placeholders.append(f"'{escaped}'") + + f.write(f"INSERT OR REPLACE INTO auctions ({', '.join(columns)})\n") + f.write(f"VALUES ({', '.join(placeholders)});\n\n") + + print(f"\n✓ Created UPSERT SQL script: {sql_file}") + print(f" Server can execute this to avoid constraint errors") + + conn.close() + +def main(): + """Main sync process""" + print("="*60) + print("DATABASE SYNC UTILITY") + print("="*60) + print(f"Database: {DB_PATH}") + print(f"Output: {OUTPUT_DIR}") + print("="*60) + + # Step 1: Fill missing fields + fill_missing_auction_fields() + + # Step 2: Export incremental updates + print("Exporting incremental updates...") + results = export_incremental() + + # Step 3: Create UPSERT SQL (prevents constraint errors on server) + if results['auctions'] > 0: + create_upsert_export() + + # Step 4: Save sync timestamp + current_time = int(datetime.now().timestamp()) + save_sync_timestamp(current_time) + + print("\n" + "="*60) + print("SYNC COMPLETE") + print("="*60) + print(f"New auctions: {results['auctions']}") + print(f"New lots: {results['lots']}") + + if results['files']: + print("\nFiles ready for server import:") + for key, path in results['files'].items(): + print(f" {key}: {path}") + + print("\nNext sync will only export records newer than:") + print(f" {datetime.fromtimestamp(current_time).strftime('%Y-%m-%d %H:%M:%S')}") + +if __name__ == "__main__": + main()