257 lines
8.1 KiB
Python
257 lines
8.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Sync local database updates to server-compatible format
|
|
Creates incremental exports with only NEW or UPDATED records
|
|
"""
|
|
|
|
import sqlite3
|
|
import json
|
|
import csv
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
DB_PATH = "C:/mnt/okcomputer/output/cache.db"
|
|
OUTPUT_DIR = Path("C:/mnt/okcomputer/output")
|
|
|
|
def fill_missing_auction_fields():
|
|
"""Fill in missing fields in auctions table from scraped data"""
|
|
conn = sqlite3.connect(DB_PATH)
|
|
cursor = conn.cursor()
|
|
|
|
print("Filling missing auction fields...")
|
|
|
|
# Update closing_time from first_lot_closing_time
|
|
cursor.execute("""
|
|
UPDATE auctions
|
|
SET closing_time = first_lot_closing_time
|
|
WHERE closing_time IS NULL AND first_lot_closing_time IS NOT NULL
|
|
""")
|
|
updated = cursor.rowcount
|
|
print(f" ✓ Updated {updated} closing_time fields")
|
|
|
|
# Parse location to extract city and country
|
|
cursor.execute("""
|
|
SELECT auction_id, location
|
|
FROM auctions
|
|
WHERE location IS NOT NULL AND (city IS NULL OR country IS NULL)
|
|
""")
|
|
locations = cursor.fetchall()
|
|
|
|
city_updates = 0
|
|
for auction_id, location in locations:
|
|
if not location:
|
|
continue
|
|
|
|
# Parse "City, COUNTRY" or "City, Region, COUNTRY"
|
|
parts = [p.strip() for p in location.split(',')]
|
|
if len(parts) >= 2:
|
|
city = parts[0]
|
|
country = parts[-1]
|
|
|
|
cursor.execute("""
|
|
UPDATE auctions
|
|
SET city = ?, country = ?
|
|
WHERE auction_id = ?
|
|
""", (city, country, auction_id))
|
|
city_updates += 1
|
|
|
|
print(f" ✓ Updated {city_updates} city/country fields")
|
|
|
|
# Set type to 'online' for all (Troostwijk is online platform)
|
|
cursor.execute("""
|
|
UPDATE auctions
|
|
SET type = 'online'
|
|
WHERE type IS NULL
|
|
""")
|
|
type_updates = cursor.rowcount
|
|
print(f" ✓ Updated {type_updates} type fields")
|
|
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
print(f"✓ Auction fields updated\n")
|
|
|
|
def get_last_sync_timestamp():
|
|
"""Get timestamp of last successful sync"""
|
|
sync_file = OUTPUT_DIR / ".last_sync"
|
|
if sync_file.exists():
|
|
return int(sync_file.read_text().strip())
|
|
return 0
|
|
|
|
def save_sync_timestamp(timestamp: int):
|
|
"""Save timestamp of successful sync"""
|
|
sync_file = OUTPUT_DIR / ".last_sync"
|
|
sync_file.write_text(str(timestamp))
|
|
|
|
def export_incremental():
|
|
"""Export only records that are new or updated since last sync"""
|
|
conn = sqlite3.connect(DB_PATH)
|
|
conn.row_factory = sqlite3.Row
|
|
cursor = conn.cursor()
|
|
|
|
last_sync = get_last_sync_timestamp()
|
|
current_time = int(datetime.now().timestamp())
|
|
|
|
print(f"Last sync: {datetime.fromtimestamp(last_sync).strftime('%Y-%m-%d %H:%M:%S') if last_sync else 'Never'}")
|
|
print(f"Current time: {datetime.fromtimestamp(current_time).strftime('%Y-%m-%d %H:%M:%S')}")
|
|
|
|
# Get new/updated auctions
|
|
cursor.execute("""
|
|
SELECT * FROM auctions
|
|
WHERE discovered_at IS NULL OR discovered_at > ?
|
|
ORDER BY auction_id
|
|
""", (last_sync,))
|
|
new_auctions = [dict(row) for row in cursor.fetchall()]
|
|
|
|
# Get new/updated lots
|
|
cursor.execute("""
|
|
SELECT * FROM lots
|
|
WHERE scraped_at_timestamp IS NULL OR scraped_at_timestamp > ?
|
|
ORDER BY lot_id
|
|
""", (last_sync,))
|
|
new_lots = [dict(row) for row in cursor.fetchall()]
|
|
|
|
conn.close()
|
|
|
|
# Export to timestamped files
|
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
|
|
results = {
|
|
'auctions': 0,
|
|
'lots': 0,
|
|
'files': {}
|
|
}
|
|
|
|
# Export auctions if any new
|
|
if new_auctions:
|
|
auctions_csv = OUTPUT_DIR / f'auctions_update_{timestamp}.csv'
|
|
auctions_json = OUTPUT_DIR / f'auctions_update_{timestamp}.json'
|
|
|
|
with open(auctions_csv, 'w', newline='', encoding='utf-8') as f:
|
|
writer = csv.DictWriter(f, fieldnames=new_auctions[0].keys())
|
|
writer.writeheader()
|
|
writer.writerows(new_auctions)
|
|
|
|
with open(auctions_json, 'w', encoding='utf-8') as f:
|
|
json.dump(new_auctions, f, indent=2, ensure_ascii=False)
|
|
|
|
results['auctions'] = len(new_auctions)
|
|
results['files']['auctions_csv'] = str(auctions_csv)
|
|
results['files']['auctions_json'] = str(auctions_json)
|
|
|
|
print(f"\n✓ Exported {len(new_auctions)} new/updated auctions")
|
|
print(f" CSV: {auctions_csv}")
|
|
print(f" JSON: {auctions_json}")
|
|
|
|
# Export lots if any new
|
|
if new_lots:
|
|
lots_csv = OUTPUT_DIR / f'lots_update_{timestamp}.csv'
|
|
lots_json = OUTPUT_DIR / f'lots_update_{timestamp}.json'
|
|
|
|
with open(lots_csv, 'w', newline='', encoding='utf-8') as f:
|
|
writer = csv.DictWriter(f, fieldnames=new_lots[0].keys())
|
|
writer.writeheader()
|
|
writer.writerows(new_lots)
|
|
|
|
with open(lots_json, 'w', encoding='utf-8') as f:
|
|
json.dump(new_lots, f, indent=2, ensure_ascii=False)
|
|
|
|
results['lots'] = len(new_lots)
|
|
results['files']['lots_csv'] = str(lots_csv)
|
|
results['files']['lots_json'] = str(lots_json)
|
|
|
|
print(f"\n✓ Exported {len(new_lots)} new/updated lots")
|
|
print(f" CSV: {lots_csv}")
|
|
print(f" JSON: {lots_json}")
|
|
|
|
if not new_auctions and not new_lots:
|
|
print("\n✓ No new updates since last sync")
|
|
|
|
return results
|
|
|
|
def create_upsert_export():
|
|
"""Create SQL script for server to UPSERT (update or insert) data"""
|
|
conn = sqlite3.connect(DB_PATH)
|
|
conn.row_factory = sqlite3.Row
|
|
cursor = conn.cursor()
|
|
|
|
last_sync = get_last_sync_timestamp()
|
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
|
|
# Get new/updated auctions
|
|
cursor.execute("""
|
|
SELECT * FROM auctions
|
|
WHERE discovered_at IS NULL OR discovered_at > ?
|
|
""", (last_sync,))
|
|
new_auctions = [dict(row) for row in cursor.fetchall()]
|
|
|
|
if new_auctions:
|
|
sql_file = OUTPUT_DIR / f'upsert_auctions_{timestamp}.sql'
|
|
|
|
with open(sql_file, 'w', encoding='utf-8') as f:
|
|
f.write("-- UPSERT script for auctions (updates existing, inserts new)\n\n")
|
|
|
|
for auction in new_auctions:
|
|
# Create INSERT OR REPLACE statement
|
|
columns = list(auction.keys())
|
|
placeholders = []
|
|
|
|
for col, val in auction.items():
|
|
if val is None:
|
|
placeholders.append("NULL")
|
|
elif isinstance(val, (int, float)):
|
|
placeholders.append(str(val))
|
|
else:
|
|
# Escape single quotes
|
|
escaped = str(val).replace("'", "''")
|
|
placeholders.append(f"'{escaped}'")
|
|
|
|
f.write(f"INSERT OR REPLACE INTO auctions ({', '.join(columns)})\n")
|
|
f.write(f"VALUES ({', '.join(placeholders)});\n\n")
|
|
|
|
print(f"\n✓ Created UPSERT SQL script: {sql_file}")
|
|
print(f" Server can execute this to avoid constraint errors")
|
|
|
|
conn.close()
|
|
|
|
def main():
|
|
"""Main sync process"""
|
|
print("="*60)
|
|
print("DATABASE SYNC UTILITY")
|
|
print("="*60)
|
|
print(f"Database: {DB_PATH}")
|
|
print(f"Output: {OUTPUT_DIR}")
|
|
print("="*60)
|
|
|
|
# Step 1: Fill missing fields
|
|
fill_missing_auction_fields()
|
|
|
|
# Step 2: Export incremental updates
|
|
print("Exporting incremental updates...")
|
|
results = export_incremental()
|
|
|
|
# Step 3: Create UPSERT SQL (prevents constraint errors on server)
|
|
if results['auctions'] > 0:
|
|
create_upsert_export()
|
|
|
|
# Step 4: Save sync timestamp
|
|
current_time = int(datetime.now().timestamp())
|
|
save_sync_timestamp(current_time)
|
|
|
|
print("\n" + "="*60)
|
|
print("SYNC COMPLETE")
|
|
print("="*60)
|
|
print(f"New auctions: {results['auctions']}")
|
|
print(f"New lots: {results['lots']}")
|
|
|
|
if results['files']:
|
|
print("\nFiles ready for server import:")
|
|
for key, path in results['files'].items():
|
|
print(f" {key}: {path}")
|
|
|
|
print("\nNext sync will only export records newer than:")
|
|
print(f" {datetime.fromtimestamp(current_time).strftime('%Y-%m-%d %H:%M:%S')}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|