#!/usr/bin/env python3 """ Continuous Auction Monitor - Polls for new auctions and updates Runs indefinitely to keep database current with latest Troostwijk data """ import asyncio import time from datetime import datetime import sqlite3 import config from cache import CacheManager from scraper import TroostwijkScraper class AuctionMonitor: """Continuously monitors for new auctions and updates""" def __init__(self, poll_interval_minutes: int = 30): """ Initialize monitor Args: poll_interval_minutes: How often to check for new auctions (default: 30 minutes) """ self.poll_interval = poll_interval_minutes * 60 # Convert to seconds self.scraper = TroostwijkScraper() self.last_run = None self.run_count = 0 async def run_scan(self): """Execute a full scan for new/updated auctions""" self.run_count += 1 print("\n" + "="*60) print(f"SCAN #{self.run_count} - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") print("="*60) # Get counts before scan before_stats = self._get_stats() try: # Run the crawler (cache mechanism handles duplicates) results = await self.scraper.crawl_auctions(max_pages=config.MAX_PAGES) # Get counts after scan after_stats = self._get_stats() # Calculate differences new_auctions = after_stats['auctions'] - before_stats['auctions'] new_lots = after_stats['lots'] - before_stats['lots'] print("\n" + "="*60) print("SCAN RESULTS") print("="*60) print(f" New auctions discovered: {new_auctions}") print(f" New lots discovered: {new_lots}") print(f" Total auctions in DB: {after_stats['auctions']}") print(f" Total lots in DB: {after_stats['lots']}") print(f" Pages scanned: {len(results)}") # Export if new data found if new_auctions > 0 or new_lots > 0: print("\n Exporting updated database...") self.scraper.export_to_files() print(" ✓ Export complete") self.last_run = datetime.now() return { 'success': True, 'new_auctions': new_auctions, 'new_lots': new_lots, 'total_auctions': after_stats['auctions'], 'total_lots': after_stats['lots'] } except Exception as e: print(f"\n ERROR during scan: {e}") import traceback traceback.print_exc() return {'success': False, 'error': str(e)} def _get_stats(self) -> dict: """Get current database statistics""" conn = sqlite3.connect(self.scraper.cache.db_path) cursor = conn.cursor() cursor.execute("SELECT COUNT(*) FROM auctions") auction_count = cursor.fetchone()[0] cursor.execute("SELECT COUNT(*) FROM lots") lot_count = cursor.fetchone()[0] conn.close() return { 'auctions': auction_count, 'lots': lot_count } async def start(self): """Start continuous monitoring loop""" print("="*60) print("AUCTION MONITOR STARTED") print("="*60) print(f"Poll interval: {self.poll_interval / 60:.0f} minutes") print(f"Cache database: {config.CACHE_DB}") print(f"Rate limit: {config.RATE_LIMIT_SECONDS}s between requests") print("="*60) print("\nPress Ctrl+C to stop\n") try: while True: # Run scan await self.run_scan() # Calculate next run time next_run = datetime.now().timestamp() + self.poll_interval next_run_str = datetime.fromtimestamp(next_run).strftime('%Y-%m-%d %H:%M:%S') print(f"\n⏰ Next scan at: {next_run_str}") print(f" Sleeping for {self.poll_interval / 60:.0f} minutes...") # Sleep until next scan await asyncio.sleep(self.poll_interval) except KeyboardInterrupt: print("\n\n" + "="*60) print("MONITOR STOPPED BY USER") print("="*60) print(f"Total scans completed: {self.run_count}") if self.last_run: print(f"Last scan: {self.last_run.strftime('%Y-%m-%d %H:%M:%S')}") print("\nDatabase remains intact with all collected data") def main(): """Main entry point for monitor""" import sys # Default: 30 minute polling poll_interval = 30 # Allow custom interval via command line if len(sys.argv) > 1: try: poll_interval = int(sys.argv[1]) print(f"Using custom poll interval: {poll_interval} minutes") except ValueError: print(f"Invalid interval '{sys.argv[1]}', using default 30 minutes") monitor = AuctionMonitor(poll_interval_minutes=poll_interval) asyncio.run(monitor.start()) if __name__ == "__main__": main()