gogo
This commit is contained in:
256
sync_updates.py
Normal file
256
sync_updates.py
Normal file
@@ -0,0 +1,256 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Sync local database updates to server-compatible format
|
||||
Creates incremental exports with only NEW or UPDATED records
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import json
|
||||
import csv
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
DB_PATH = "C:/mnt/okcomputer/output/cache.db"
|
||||
OUTPUT_DIR = Path("C:/mnt/okcomputer/output")
|
||||
|
||||
def fill_missing_auction_fields():
|
||||
"""Fill in missing fields in auctions table from scraped data"""
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cursor = conn.cursor()
|
||||
|
||||
print("Filling missing auction fields...")
|
||||
|
||||
# Update closing_time from first_lot_closing_time
|
||||
cursor.execute("""
|
||||
UPDATE auctions
|
||||
SET closing_time = first_lot_closing_time
|
||||
WHERE closing_time IS NULL AND first_lot_closing_time IS NOT NULL
|
||||
""")
|
||||
updated = cursor.rowcount
|
||||
print(f" ✓ Updated {updated} closing_time fields")
|
||||
|
||||
# Parse location to extract city and country
|
||||
cursor.execute("""
|
||||
SELECT auction_id, location
|
||||
FROM auctions
|
||||
WHERE location IS NOT NULL AND (city IS NULL OR country IS NULL)
|
||||
""")
|
||||
locations = cursor.fetchall()
|
||||
|
||||
city_updates = 0
|
||||
for auction_id, location in locations:
|
||||
if not location:
|
||||
continue
|
||||
|
||||
# Parse "City, COUNTRY" or "City, Region, COUNTRY"
|
||||
parts = [p.strip() for p in location.split(',')]
|
||||
if len(parts) >= 2:
|
||||
city = parts[0]
|
||||
country = parts[-1]
|
||||
|
||||
cursor.execute("""
|
||||
UPDATE auctions
|
||||
SET city = ?, country = ?
|
||||
WHERE auction_id = ?
|
||||
""", (city, country, auction_id))
|
||||
city_updates += 1
|
||||
|
||||
print(f" ✓ Updated {city_updates} city/country fields")
|
||||
|
||||
# Set type to 'online' for all (Troostwijk is online platform)
|
||||
cursor.execute("""
|
||||
UPDATE auctions
|
||||
SET type = 'online'
|
||||
WHERE type IS NULL
|
||||
""")
|
||||
type_updates = cursor.rowcount
|
||||
print(f" ✓ Updated {type_updates} type fields")
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
print(f"✓ Auction fields updated\n")
|
||||
|
||||
def get_last_sync_timestamp():
|
||||
"""Get timestamp of last successful sync"""
|
||||
sync_file = OUTPUT_DIR / ".last_sync"
|
||||
if sync_file.exists():
|
||||
return int(sync_file.read_text().strip())
|
||||
return 0
|
||||
|
||||
def save_sync_timestamp(timestamp: int):
|
||||
"""Save timestamp of successful sync"""
|
||||
sync_file = OUTPUT_DIR / ".last_sync"
|
||||
sync_file.write_text(str(timestamp))
|
||||
|
||||
def export_incremental():
|
||||
"""Export only records that are new or updated since last sync"""
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.row_factory = sqlite3.Row
|
||||
cursor = conn.cursor()
|
||||
|
||||
last_sync = get_last_sync_timestamp()
|
||||
current_time = int(datetime.now().timestamp())
|
||||
|
||||
print(f"Last sync: {datetime.fromtimestamp(last_sync).strftime('%Y-%m-%d %H:%M:%S') if last_sync else 'Never'}")
|
||||
print(f"Current time: {datetime.fromtimestamp(current_time).strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
|
||||
# Get new/updated auctions
|
||||
cursor.execute("""
|
||||
SELECT * FROM auctions
|
||||
WHERE discovered_at IS NULL OR discovered_at > ?
|
||||
ORDER BY auction_id
|
||||
""", (last_sync,))
|
||||
new_auctions = [dict(row) for row in cursor.fetchall()]
|
||||
|
||||
# Get new/updated lots
|
||||
cursor.execute("""
|
||||
SELECT * FROM lots
|
||||
WHERE scraped_at_timestamp IS NULL OR scraped_at_timestamp > ?
|
||||
ORDER BY lot_id
|
||||
""", (last_sync,))
|
||||
new_lots = [dict(row) for row in cursor.fetchall()]
|
||||
|
||||
conn.close()
|
||||
|
||||
# Export to timestamped files
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
|
||||
results = {
|
||||
'auctions': 0,
|
||||
'lots': 0,
|
||||
'files': {}
|
||||
}
|
||||
|
||||
# Export auctions if any new
|
||||
if new_auctions:
|
||||
auctions_csv = OUTPUT_DIR / f'auctions_update_{timestamp}.csv'
|
||||
auctions_json = OUTPUT_DIR / f'auctions_update_{timestamp}.json'
|
||||
|
||||
with open(auctions_csv, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=new_auctions[0].keys())
|
||||
writer.writeheader()
|
||||
writer.writerows(new_auctions)
|
||||
|
||||
with open(auctions_json, 'w', encoding='utf-8') as f:
|
||||
json.dump(new_auctions, f, indent=2, ensure_ascii=False)
|
||||
|
||||
results['auctions'] = len(new_auctions)
|
||||
results['files']['auctions_csv'] = str(auctions_csv)
|
||||
results['files']['auctions_json'] = str(auctions_json)
|
||||
|
||||
print(f"\n✓ Exported {len(new_auctions)} new/updated auctions")
|
||||
print(f" CSV: {auctions_csv}")
|
||||
print(f" JSON: {auctions_json}")
|
||||
|
||||
# Export lots if any new
|
||||
if new_lots:
|
||||
lots_csv = OUTPUT_DIR / f'lots_update_{timestamp}.csv'
|
||||
lots_json = OUTPUT_DIR / f'lots_update_{timestamp}.json'
|
||||
|
||||
with open(lots_csv, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=new_lots[0].keys())
|
||||
writer.writeheader()
|
||||
writer.writerows(new_lots)
|
||||
|
||||
with open(lots_json, 'w', encoding='utf-8') as f:
|
||||
json.dump(new_lots, f, indent=2, ensure_ascii=False)
|
||||
|
||||
results['lots'] = len(new_lots)
|
||||
results['files']['lots_csv'] = str(lots_csv)
|
||||
results['files']['lots_json'] = str(lots_json)
|
||||
|
||||
print(f"\n✓ Exported {len(new_lots)} new/updated lots")
|
||||
print(f" CSV: {lots_csv}")
|
||||
print(f" JSON: {lots_json}")
|
||||
|
||||
if not new_auctions and not new_lots:
|
||||
print("\n✓ No new updates since last sync")
|
||||
|
||||
return results
|
||||
|
||||
def create_upsert_export():
|
||||
"""Create SQL script for server to UPSERT (update or insert) data"""
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.row_factory = sqlite3.Row
|
||||
cursor = conn.cursor()
|
||||
|
||||
last_sync = get_last_sync_timestamp()
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
|
||||
# Get new/updated auctions
|
||||
cursor.execute("""
|
||||
SELECT * FROM auctions
|
||||
WHERE discovered_at IS NULL OR discovered_at > ?
|
||||
""", (last_sync,))
|
||||
new_auctions = [dict(row) for row in cursor.fetchall()]
|
||||
|
||||
if new_auctions:
|
||||
sql_file = OUTPUT_DIR / f'upsert_auctions_{timestamp}.sql'
|
||||
|
||||
with open(sql_file, 'w', encoding='utf-8') as f:
|
||||
f.write("-- UPSERT script for auctions (updates existing, inserts new)\n\n")
|
||||
|
||||
for auction in new_auctions:
|
||||
# Create INSERT OR REPLACE statement
|
||||
columns = list(auction.keys())
|
||||
placeholders = []
|
||||
|
||||
for col, val in auction.items():
|
||||
if val is None:
|
||||
placeholders.append("NULL")
|
||||
elif isinstance(val, (int, float)):
|
||||
placeholders.append(str(val))
|
||||
else:
|
||||
# Escape single quotes
|
||||
escaped = str(val).replace("'", "''")
|
||||
placeholders.append(f"'{escaped}'")
|
||||
|
||||
f.write(f"INSERT OR REPLACE INTO auctions ({', '.join(columns)})\n")
|
||||
f.write(f"VALUES ({', '.join(placeholders)});\n\n")
|
||||
|
||||
print(f"\n✓ Created UPSERT SQL script: {sql_file}")
|
||||
print(f" Server can execute this to avoid constraint errors")
|
||||
|
||||
conn.close()
|
||||
|
||||
def main():
|
||||
"""Main sync process"""
|
||||
print("="*60)
|
||||
print("DATABASE SYNC UTILITY")
|
||||
print("="*60)
|
||||
print(f"Database: {DB_PATH}")
|
||||
print(f"Output: {OUTPUT_DIR}")
|
||||
print("="*60)
|
||||
|
||||
# Step 1: Fill missing fields
|
||||
fill_missing_auction_fields()
|
||||
|
||||
# Step 2: Export incremental updates
|
||||
print("Exporting incremental updates...")
|
||||
results = export_incremental()
|
||||
|
||||
# Step 3: Create UPSERT SQL (prevents constraint errors on server)
|
||||
if results['auctions'] > 0:
|
||||
create_upsert_export()
|
||||
|
||||
# Step 4: Save sync timestamp
|
||||
current_time = int(datetime.now().timestamp())
|
||||
save_sync_timestamp(current_time)
|
||||
|
||||
print("\n" + "="*60)
|
||||
print("SYNC COMPLETE")
|
||||
print("="*60)
|
||||
print(f"New auctions: {results['auctions']}")
|
||||
print(f"New lots: {results['lots']}")
|
||||
|
||||
if results['files']:
|
||||
print("\nFiles ready for server import:")
|
||||
for key, path in results['files'].items():
|
||||
print(f" {key}: {path}")
|
||||
|
||||
print("\nNext sync will only export records newer than:")
|
||||
print(f" {datetime.fromtimestamp(current_time).strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user