This commit is contained in:
Tour
2025-12-07 12:20:51 +01:00
parent 450ec33101
commit 17af27ee99
2 changed files with 166 additions and 0 deletions

11
monitor.sh Normal file
View File

@@ -0,0 +1,11 @@
#!/bin/bash
# Start the auction monitor with custom polling interval
# Usage: ./monitor.sh [interval_in_minutes]
# Default: 30 minutes
cd "$(dirname "$0")"
INTERVAL=${1:-30}
echo "Starting auction monitor (polling every $INTERVAL minutes)..."
python3 src/monitor.py "$INTERVAL"

155
src/monitor.py Normal file
View File

@@ -0,0 +1,155 @@
#!/usr/bin/env python3
"""
Continuous Auction Monitor - Polls for new auctions and updates
Runs indefinitely to keep database current with latest Troostwijk data
"""
import asyncio
import time
from datetime import datetime
import sqlite3
import config
from cache import CacheManager
from scraper import TroostwijkScraper
class AuctionMonitor:
"""Continuously monitors for new auctions and updates"""
def __init__(self, poll_interval_minutes: int = 30):
"""
Initialize monitor
Args:
poll_interval_minutes: How often to check for new auctions (default: 30 minutes)
"""
self.poll_interval = poll_interval_minutes * 60 # Convert to seconds
self.scraper = TroostwijkScraper()
self.last_run = None
self.run_count = 0
async def run_scan(self):
"""Execute a full scan for new/updated auctions"""
self.run_count += 1
print("\n" + "="*60)
print(f"SCAN #{self.run_count} - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("="*60)
# Get counts before scan
before_stats = self._get_stats()
try:
# Run the crawler (cache mechanism handles duplicates)
results = await self.scraper.crawl_auctions(max_pages=config.MAX_PAGES)
# Get counts after scan
after_stats = self._get_stats()
# Calculate differences
new_auctions = after_stats['auctions'] - before_stats['auctions']
new_lots = after_stats['lots'] - before_stats['lots']
print("\n" + "="*60)
print("SCAN RESULTS")
print("="*60)
print(f" New auctions discovered: {new_auctions}")
print(f" New lots discovered: {new_lots}")
print(f" Total auctions in DB: {after_stats['auctions']}")
print(f" Total lots in DB: {after_stats['lots']}")
print(f" Pages scanned: {len(results)}")
# Export if new data found
if new_auctions > 0 or new_lots > 0:
print("\n Exporting updated database...")
self.scraper.export_to_files()
print(" ✓ Export complete")
self.last_run = datetime.now()
return {
'success': True,
'new_auctions': new_auctions,
'new_lots': new_lots,
'total_auctions': after_stats['auctions'],
'total_lots': after_stats['lots']
}
except Exception as e:
print(f"\n ERROR during scan: {e}")
import traceback
traceback.print_exc()
return {'success': False, 'error': str(e)}
def _get_stats(self) -> dict:
"""Get current database statistics"""
conn = sqlite3.connect(self.scraper.cache.db_path)
cursor = conn.cursor()
cursor.execute("SELECT COUNT(*) FROM auctions")
auction_count = cursor.fetchone()[0]
cursor.execute("SELECT COUNT(*) FROM lots")
lot_count = cursor.fetchone()[0]
conn.close()
return {
'auctions': auction_count,
'lots': lot_count
}
async def start(self):
"""Start continuous monitoring loop"""
print("="*60)
print("AUCTION MONITOR STARTED")
print("="*60)
print(f"Poll interval: {self.poll_interval / 60:.0f} minutes")
print(f"Cache database: {config.CACHE_DB}")
print(f"Rate limit: {config.RATE_LIMIT_SECONDS}s between requests")
print("="*60)
print("\nPress Ctrl+C to stop\n")
try:
while True:
# Run scan
await self.run_scan()
# Calculate next run time
next_run = datetime.now().timestamp() + self.poll_interval
next_run_str = datetime.fromtimestamp(next_run).strftime('%Y-%m-%d %H:%M:%S')
print(f"\n⏰ Next scan at: {next_run_str}")
print(f" Sleeping for {self.poll_interval / 60:.0f} minutes...")
# Sleep until next scan
await asyncio.sleep(self.poll_interval)
except KeyboardInterrupt:
print("\n\n" + "="*60)
print("MONITOR STOPPED BY USER")
print("="*60)
print(f"Total scans completed: {self.run_count}")
if self.last_run:
print(f"Last scan: {self.last_run.strftime('%Y-%m-%d %H:%M:%S')}")
print("\nDatabase remains intact with all collected data")
def main():
"""Main entry point for monitor"""
import sys
# Default: 30 minute polling
poll_interval = 30
# Allow custom interval via command line
if len(sys.argv) > 1:
try:
poll_interval = int(sys.argv[1])
print(f"Using custom poll interval: {poll_interval} minutes")
except ValueError:
print(f"Invalid interval '{sys.argv[1]}', using default 30 minutes")
monitor = AuctionMonitor(poll_interval_minutes=poll_interval)
asyncio.run(monitor.start())
if __name__ == "__main__":
main()