From 17af27ee99ddc7956042852eb6a1117f23881d46 Mon Sep 17 00:00:00 2001 From: Tour Date: Sun, 7 Dec 2025 12:20:51 +0100 Subject: [PATCH] gogo --- monitor.sh | 11 ++++ src/monitor.py | 155 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+) create mode 100644 monitor.sh create mode 100644 src/monitor.py diff --git a/monitor.sh b/monitor.sh new file mode 100644 index 0000000..3b0fb5d --- /dev/null +++ b/monitor.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# Start the auction monitor with custom polling interval +# Usage: ./monitor.sh [interval_in_minutes] +# Default: 30 minutes + +cd "$(dirname "$0")" + +INTERVAL=${1:-30} + +echo "Starting auction monitor (polling every $INTERVAL minutes)..." +python3 src/monitor.py "$INTERVAL" diff --git a/src/monitor.py b/src/monitor.py new file mode 100644 index 0000000..32bf37a --- /dev/null +++ b/src/monitor.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 +""" +Continuous Auction Monitor - Polls for new auctions and updates +Runs indefinitely to keep database current with latest Troostwijk data +""" + +import asyncio +import time +from datetime import datetime +import sqlite3 +import config +from cache import CacheManager +from scraper import TroostwijkScraper + +class AuctionMonitor: + """Continuously monitors for new auctions and updates""" + + def __init__(self, poll_interval_minutes: int = 30): + """ + Initialize monitor + + Args: + poll_interval_minutes: How often to check for new auctions (default: 30 minutes) + """ + self.poll_interval = poll_interval_minutes * 60 # Convert to seconds + self.scraper = TroostwijkScraper() + self.last_run = None + self.run_count = 0 + + async def run_scan(self): + """Execute a full scan for new/updated auctions""" + self.run_count += 1 + + print("\n" + "="*60) + print(f"SCAN #{self.run_count} - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") + print("="*60) + + # Get counts before scan + before_stats = self._get_stats() + + try: + # Run the crawler (cache mechanism handles duplicates) + results = await self.scraper.crawl_auctions(max_pages=config.MAX_PAGES) + + # Get counts after scan + after_stats = self._get_stats() + + # Calculate differences + new_auctions = after_stats['auctions'] - before_stats['auctions'] + new_lots = after_stats['lots'] - before_stats['lots'] + + print("\n" + "="*60) + print("SCAN RESULTS") + print("="*60) + print(f" New auctions discovered: {new_auctions}") + print(f" New lots discovered: {new_lots}") + print(f" Total auctions in DB: {after_stats['auctions']}") + print(f" Total lots in DB: {after_stats['lots']}") + print(f" Pages scanned: {len(results)}") + + # Export if new data found + if new_auctions > 0 or new_lots > 0: + print("\n Exporting updated database...") + self.scraper.export_to_files() + print(" ✓ Export complete") + + self.last_run = datetime.now() + + return { + 'success': True, + 'new_auctions': new_auctions, + 'new_lots': new_lots, + 'total_auctions': after_stats['auctions'], + 'total_lots': after_stats['lots'] + } + + except Exception as e: + print(f"\n ERROR during scan: {e}") + import traceback + traceback.print_exc() + return {'success': False, 'error': str(e)} + + def _get_stats(self) -> dict: + """Get current database statistics""" + conn = sqlite3.connect(self.scraper.cache.db_path) + cursor = conn.cursor() + + cursor.execute("SELECT COUNT(*) FROM auctions") + auction_count = cursor.fetchone()[0] + + cursor.execute("SELECT COUNT(*) FROM lots") + lot_count = cursor.fetchone()[0] + + conn.close() + + return { + 'auctions': auction_count, + 'lots': lot_count + } + + async def start(self): + """Start continuous monitoring loop""" + print("="*60) + print("AUCTION MONITOR STARTED") + print("="*60) + print(f"Poll interval: {self.poll_interval / 60:.0f} minutes") + print(f"Cache database: {config.CACHE_DB}") + print(f"Rate limit: {config.RATE_LIMIT_SECONDS}s between requests") + print("="*60) + print("\nPress Ctrl+C to stop\n") + + try: + while True: + # Run scan + await self.run_scan() + + # Calculate next run time + next_run = datetime.now().timestamp() + self.poll_interval + next_run_str = datetime.fromtimestamp(next_run).strftime('%Y-%m-%d %H:%M:%S') + + print(f"\n⏰ Next scan at: {next_run_str}") + print(f" Sleeping for {self.poll_interval / 60:.0f} minutes...") + + # Sleep until next scan + await asyncio.sleep(self.poll_interval) + + except KeyboardInterrupt: + print("\n\n" + "="*60) + print("MONITOR STOPPED BY USER") + print("="*60) + print(f"Total scans completed: {self.run_count}") + if self.last_run: + print(f"Last scan: {self.last_run.strftime('%Y-%m-%d %H:%M:%S')}") + print("\nDatabase remains intact with all collected data") + +def main(): + """Main entry point for monitor""" + import sys + + # Default: 30 minute polling + poll_interval = 30 + + # Allow custom interval via command line + if len(sys.argv) > 1: + try: + poll_interval = int(sys.argv[1]) + print(f"Using custom poll interval: {poll_interval} minutes") + except ValueError: + print(f"Invalid interval '{sys.argv[1]}', using default 30 minutes") + + monitor = AuctionMonitor(poll_interval_minutes=poll_interval) + asyncio.run(monitor.start()) + +if __name__ == "__main__": + main()