156 lines
5.0 KiB
Python
156 lines
5.0 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Continuous Auction Monitor - Polls for new auctions and updates
|
|
Runs indefinitely to keep database current with latest Troostwijk data
|
|
"""
|
|
|
|
import asyncio
|
|
import time
|
|
from datetime import datetime
|
|
import sqlite3
|
|
import config
|
|
from cache import CacheManager
|
|
from scraper import TroostwijkScraper
|
|
|
|
class AuctionMonitor:
|
|
"""Continuously monitors for new auctions and updates"""
|
|
|
|
def __init__(self, poll_interval_minutes: int = 30):
|
|
"""
|
|
Initialize monitor
|
|
|
|
Args:
|
|
poll_interval_minutes: How often to check for new auctions (default: 30 minutes)
|
|
"""
|
|
self.poll_interval = poll_interval_minutes * 60 # Convert to seconds
|
|
self.scraper = TroostwijkScraper()
|
|
self.last_run = None
|
|
self.run_count = 0
|
|
|
|
async def run_scan(self):
|
|
"""Execute a full scan for new/updated auctions"""
|
|
self.run_count += 1
|
|
|
|
print("\n" + "="*60)
|
|
print(f"SCAN #{self.run_count} - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|
print("="*60)
|
|
|
|
# Get counts before scan
|
|
before_stats = self._get_stats()
|
|
|
|
try:
|
|
# Run the crawler (cache mechanism handles duplicates)
|
|
results = await self.scraper.crawl_auctions(max_pages=config.MAX_PAGES)
|
|
|
|
# Get counts after scan
|
|
after_stats = self._get_stats()
|
|
|
|
# Calculate differences
|
|
new_auctions = after_stats['auctions'] - before_stats['auctions']
|
|
new_lots = after_stats['lots'] - before_stats['lots']
|
|
|
|
print("\n" + "="*60)
|
|
print("SCAN RESULTS")
|
|
print("="*60)
|
|
print(f" New auctions discovered: {new_auctions}")
|
|
print(f" New lots discovered: {new_lots}")
|
|
print(f" Total auctions in DB: {after_stats['auctions']}")
|
|
print(f" Total lots in DB: {after_stats['lots']}")
|
|
print(f" Pages scanned: {len(results)}")
|
|
|
|
# Export if new data found
|
|
if new_auctions > 0 or new_lots > 0:
|
|
print("\n Exporting updated database...")
|
|
self.scraper.export_to_files()
|
|
print(" ✓ Export complete")
|
|
|
|
self.last_run = datetime.now()
|
|
|
|
return {
|
|
'success': True,
|
|
'new_auctions': new_auctions,
|
|
'new_lots': new_lots,
|
|
'total_auctions': after_stats['auctions'],
|
|
'total_lots': after_stats['lots']
|
|
}
|
|
|
|
except Exception as e:
|
|
print(f"\n ERROR during scan: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return {'success': False, 'error': str(e)}
|
|
|
|
def _get_stats(self) -> dict:
|
|
"""Get current database statistics"""
|
|
conn = sqlite3.connect(self.scraper.cache.db_path)
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute("SELECT COUNT(*) FROM auctions")
|
|
auction_count = cursor.fetchone()[0]
|
|
|
|
cursor.execute("SELECT COUNT(*) FROM lots")
|
|
lot_count = cursor.fetchone()[0]
|
|
|
|
conn.close()
|
|
|
|
return {
|
|
'auctions': auction_count,
|
|
'lots': lot_count
|
|
}
|
|
|
|
async def start(self):
|
|
"""Start continuous monitoring loop"""
|
|
print("="*60)
|
|
print("AUCTION MONITOR STARTED")
|
|
print("="*60)
|
|
print(f"Poll interval: {self.poll_interval / 60:.0f} minutes")
|
|
print(f"Cache database: {config.CACHE_DB}")
|
|
print(f"Rate limit: {config.RATE_LIMIT_SECONDS}s between requests")
|
|
print("="*60)
|
|
print("\nPress Ctrl+C to stop\n")
|
|
|
|
try:
|
|
while True:
|
|
# Run scan
|
|
await self.run_scan()
|
|
|
|
# Calculate next run time
|
|
next_run = datetime.now().timestamp() + self.poll_interval
|
|
next_run_str = datetime.fromtimestamp(next_run).strftime('%Y-%m-%d %H:%M:%S')
|
|
|
|
print(f"\n⏰ Next scan at: {next_run_str}")
|
|
print(f" Sleeping for {self.poll_interval / 60:.0f} minutes...")
|
|
|
|
# Sleep until next scan
|
|
await asyncio.sleep(self.poll_interval)
|
|
|
|
except KeyboardInterrupt:
|
|
print("\n\n" + "="*60)
|
|
print("MONITOR STOPPED BY USER")
|
|
print("="*60)
|
|
print(f"Total scans completed: {self.run_count}")
|
|
if self.last_run:
|
|
print(f"Last scan: {self.last_run.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
print("\nDatabase remains intact with all collected data")
|
|
|
|
def main():
|
|
"""Main entry point for monitor"""
|
|
import sys
|
|
|
|
# Default: 30 minute polling
|
|
poll_interval = 30
|
|
|
|
# Allow custom interval via command line
|
|
if len(sys.argv) > 1:
|
|
try:
|
|
poll_interval = int(sys.argv[1])
|
|
print(f"Using custom poll interval: {poll_interval} minutes")
|
|
except ValueError:
|
|
print(f"Invalid interval '{sys.argv[1]}', using default 30 minutes")
|
|
|
|
monitor = AuctionMonitor(poll_interval_minutes=poll_interval)
|
|
asyncio.run(monitor.start())
|
|
|
|
if __name__ == "__main__":
|
|
main()
|