Initial clean commit
This commit is contained in:
326
docs/ARCHITECTURE-TROOSTWIJK-SCRAPER.md
Normal file
326
docs/ARCHITECTURE-TROOSTWIJK-SCRAPER.md
Normal file
@@ -0,0 +1,326 @@
|
||||
# Troostwijk Scraper - Architecture & Data Flow
|
||||
|
||||
## System Overview
|
||||
|
||||
The scraper follows a **3-phase hierarchical crawling pattern** to extract auction and lot data from Troostwijk Auctions website.
|
||||
|
||||
## Architecture Diagram
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ TROOSTWIJK SCRAPER │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ PHASE 1: COLLECT AUCTION URLs │
|
||||
│ ┌──────────────┐ ┌──────────────┐ │
|
||||
│ │ Listing Page │────────▶│ Extract /a/ │ │
|
||||
│ │ /auctions? │ │ auction URLs │ │
|
||||
│ │ page=1..N │ └──────────────┘ │
|
||||
│ └──────────────┘ │ │
|
||||
│ ▼ │
|
||||
│ [ List of Auction URLs ] │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ PHASE 2: EXTRACT LOT URLs FROM AUCTIONS │
|
||||
│ ┌──────────────┐ ┌──────────────┐ │
|
||||
│ │ Auction Page │────────▶│ Parse │ │
|
||||
│ │ /a/... │ │ __NEXT_DATA__│ │
|
||||
│ └──────────────┘ │ JSON │ │
|
||||
│ │ └──────────────┘ │
|
||||
│ │ │ │
|
||||
│ ▼ ▼ │
|
||||
│ ┌──────────────┐ ┌──────────────┐ │
|
||||
│ │ Save Auction │ │ Extract /l/ │ │
|
||||
│ │ Metadata │ │ lot URLs │ │
|
||||
│ │ to DB │ └──────────────┘ │
|
||||
│ └──────────────┘ │ │
|
||||
│ ▼ │
|
||||
│ [ List of Lot URLs ] │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ PHASE 3: SCRAPE LOT DETAILS │
|
||||
│ ┌──────────────┐ ┌──────────────┐ │
|
||||
│ │ Lot Page │────────▶│ Parse │ │
|
||||
│ │ /l/... │ │ __NEXT_DATA__│ │
|
||||
│ └──────────────┘ │ JSON │ │
|
||||
│ └──────────────┘ │
|
||||
│ │ │
|
||||
│ ┌─────────────────────────┴─────────────────┐ │
|
||||
│ ▼ ▼ │
|
||||
│ ┌──────────────┐ ┌──────────────┐ │
|
||||
│ │ Save Lot │ │ Save Images │ │
|
||||
│ │ Details │ │ URLs to DB │ │
|
||||
│ │ to DB │ └──────────────┘ │
|
||||
│ └──────────────┘ │ │
|
||||
│ ▼ │
|
||||
│ [Optional Download] │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Database Schema
|
||||
|
||||
```sql
|
||||
┌──────────────────────────────────────────────────────────────────┐
|
||||
│ CACHE TABLE (HTML Storage with Compression) │
|
||||
├──────────────────────────────────────────────────────────────────┤
|
||||
│ cache │
|
||||
│ ├── url (TEXT, PRIMARY KEY) │
|
||||
│ ├── content (BLOB) -- Compressed HTML (zlib) │
|
||||
│ ├── timestamp (REAL) │
|
||||
│ ├── status_code (INTEGER) │
|
||||
│ └── compressed (INTEGER) -- 1=compressed, 0=plain │
|
||||
└──────────────────────────────────────────────────────────────────┘
|
||||
|
||||
┌──────────────────────────────────────────────────────────────────┐
|
||||
│ AUCTIONS TABLE │
|
||||
├──────────────────────────────────────────────────────────────────┤
|
||||
│ auctions │
|
||||
│ ├── auction_id (TEXT, PRIMARY KEY) -- e.g. "A7-39813" │
|
||||
│ ├── url (TEXT, UNIQUE) │
|
||||
│ ├── title (TEXT) │
|
||||
│ ├── location (TEXT) -- e.g. "Cluj-Napoca, RO" │
|
||||
│ ├── lots_count (INTEGER) │
|
||||
│ ├── first_lot_closing_time (TEXT) │
|
||||
│ └── scraped_at (TEXT) │
|
||||
└──────────────────────────────────────────────────────────────────┘
|
||||
|
||||
┌──────────────────────────────────────────────────────────────────┐
|
||||
│ LOTS TABLE │
|
||||
├──────────────────────────────────────────────────────────────────┤
|
||||
│ lots │
|
||||
│ ├── lot_id (TEXT, PRIMARY KEY) -- e.g. "A1-28505-5" │
|
||||
│ ├── auction_id (TEXT) -- FK to auctions │
|
||||
│ ├── url (TEXT, UNIQUE) │
|
||||
│ ├── title (TEXT) │
|
||||
│ ├── current_bid (TEXT) -- "€123.45" or "No bids" │
|
||||
│ ├── bid_count (INTEGER) │
|
||||
│ ├── closing_time (TEXT) │
|
||||
│ ├── viewing_time (TEXT) │
|
||||
│ ├── pickup_date (TEXT) │
|
||||
│ ├── location (TEXT) -- e.g. "Dongen, NL" │
|
||||
│ ├── description (TEXT) │
|
||||
│ ├── category (TEXT) │
|
||||
│ └── scraped_at (TEXT) │
|
||||
│ FOREIGN KEY (auction_id) → auctions(auction_id) │
|
||||
└──────────────────────────────────────────────────────────────────┘
|
||||
|
||||
┌──────────────────────────────────────────────────────────────────┐
|
||||
│ IMAGES TABLE (Image URLs & Download Status) │
|
||||
├──────────────────────────────────────────────────────────────────┤
|
||||
│ images ◀── THIS TABLE HOLDS IMAGE LINKS│
|
||||
│ ├── id (INTEGER, PRIMARY KEY AUTOINCREMENT) │
|
||||
│ ├── lot_id (TEXT) -- FK to lots │
|
||||
│ ├── url (TEXT) -- Image URL │
|
||||
│ ├── local_path (TEXT) -- Path after download │
|
||||
│ └── downloaded (INTEGER) -- 0=pending, 1=downloaded │
|
||||
│ FOREIGN KEY (lot_id) → lots(lot_id) │
|
||||
└──────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Sequence Diagram
|
||||
|
||||
```
|
||||
User Scraper Playwright Cache DB Data Tables
|
||||
│ │ │ │ │
|
||||
│ Run │ │ │ │
|
||||
├──────────────▶│ │ │ │
|
||||
│ │ │ │ │
|
||||
│ │ Phase 1: Listing Pages │ │
|
||||
│ ├───────────────▶│ │ │
|
||||
│ │ goto() │ │ │
|
||||
│ │◀───────────────┤ │ │
|
||||
│ │ HTML │ │ │
|
||||
│ ├───────────────────────────────▶│ │
|
||||
│ │ compress & cache │ │
|
||||
│ │ │ │ │
|
||||
│ │ Phase 2: Auction Pages │ │
|
||||
│ ├───────────────▶│ │ │
|
||||
│ │◀───────────────┤ │ │
|
||||
│ │ HTML │ │ │
|
||||
│ │ │ │ │
|
||||
│ │ Parse __NEXT_DATA__ JSON │ │
|
||||
│ │────────────────────────────────────────────────▶│
|
||||
│ │ │ │ INSERT auctions
|
||||
│ │ │ │ │
|
||||
│ │ Phase 3: Lot Pages │ │
|
||||
│ ├───────────────▶│ │ │
|
||||
│ │◀───────────────┤ │ │
|
||||
│ │ HTML │ │ │
|
||||
│ │ │ │ │
|
||||
│ │ Parse __NEXT_DATA__ JSON │ │
|
||||
│ │────────────────────────────────────────────────▶│
|
||||
│ │ │ │ INSERT lots │
|
||||
│ │────────────────────────────────────────────────▶│
|
||||
│ │ │ │ INSERT images│
|
||||
│ │ │ │ │
|
||||
│ │ Export to CSV/JSON │ │
|
||||
│ │◀────────────────────────────────────────────────┤
|
||||
│ │ Query all data │ │
|
||||
│◀──────────────┤ │ │ │
|
||||
│ Results │ │ │ │
|
||||
```
|
||||
|
||||
## Data Flow Details
|
||||
|
||||
### 1. **Page Retrieval & Caching**
|
||||
```
|
||||
Request URL
|
||||
│
|
||||
├──▶ Check cache DB (with timestamp validation)
|
||||
│ │
|
||||
│ ├─[HIT]──▶ Decompress (if compressed=1)
|
||||
│ │ └──▶ Return HTML
|
||||
│ │
|
||||
│ └─[MISS]─▶ Fetch via Playwright
|
||||
│ │
|
||||
│ ├──▶ Compress HTML (zlib level 9)
|
||||
│ │ ~70-90% size reduction
|
||||
│ │
|
||||
│ └──▶ Store in cache DB (compressed=1)
|
||||
│
|
||||
└──▶ Return HTML for parsing
|
||||
```
|
||||
|
||||
### 2. **JSON Parsing Strategy**
|
||||
```
|
||||
HTML Content
|
||||
│
|
||||
└──▶ Extract <script id="__NEXT_DATA__">
|
||||
│
|
||||
├──▶ Parse JSON
|
||||
│ │
|
||||
│ ├─[has pageProps.lot]──▶ Individual LOT
|
||||
│ │ └──▶ Extract: title, bid, location, images, etc.
|
||||
│ │
|
||||
│ └─[has pageProps.auction]──▶ AUCTION
|
||||
│ │
|
||||
│ ├─[has lots[] array]──▶ Auction with lots
|
||||
│ │ └──▶ Extract: title, location, lots_count
|
||||
│ │
|
||||
│ └─[no lots[] array]──▶ Old format lot
|
||||
│ └──▶ Parse as lot
|
||||
│
|
||||
└──▶ Fallback to HTML regex parsing (if JSON fails)
|
||||
```
|
||||
|
||||
### 3. **Image Handling**
|
||||
```
|
||||
Lot Page Parsed
|
||||
│
|
||||
├──▶ Extract images[] from JSON
|
||||
│ │
|
||||
│ └──▶ INSERT INTO images (lot_id, url, downloaded=0)
|
||||
│
|
||||
└──▶ [If DOWNLOAD_IMAGES=True]
|
||||
│
|
||||
├──▶ Download each image
|
||||
│ │
|
||||
│ ├──▶ Save to: /images/{lot_id}/001.jpg
|
||||
│ │
|
||||
│ └──▶ UPDATE images SET local_path=?, downloaded=1
|
||||
│
|
||||
└──▶ Rate limit between downloads (0.5s)
|
||||
```
|
||||
|
||||
## Key Configuration
|
||||
|
||||
| Setting | Value | Purpose |
|
||||
|---------|-------|---------|
|
||||
| `CACHE_DB` | `/mnt/okcomputer/output/cache.db` | SQLite database path |
|
||||
| `IMAGES_DIR` | `/mnt/okcomputer/output/images` | Downloaded images storage |
|
||||
| `RATE_LIMIT_SECONDS` | `0.5` | Delay between requests |
|
||||
| `DOWNLOAD_IMAGES` | `False` | Toggle image downloading |
|
||||
| `MAX_PAGES` | `50` | Number of listing pages to crawl |
|
||||
|
||||
## Output Files
|
||||
|
||||
```
|
||||
/mnt/okcomputer/output/
|
||||
├── cache.db # SQLite database (compressed HTML + data)
|
||||
├── auctions_{timestamp}.json # Exported auctions
|
||||
├── auctions_{timestamp}.csv # Exported auctions
|
||||
├── lots_{timestamp}.json # Exported lots
|
||||
├── lots_{timestamp}.csv # Exported lots
|
||||
└── images/ # Downloaded images (if enabled)
|
||||
├── A1-28505-5/
|
||||
│ ├── 001.jpg
|
||||
│ └── 002.jpg
|
||||
└── A1-28505-6/
|
||||
└── 001.jpg
|
||||
```
|
||||
|
||||
## Extension Points for Integration
|
||||
|
||||
### 1. **Downstream Processing Pipeline**
|
||||
```python
|
||||
# Query lots without downloaded images
|
||||
SELECT lot_id, url FROM images WHERE downloaded = 0
|
||||
|
||||
# Process images: OCR, classification, etc.
|
||||
# Update status when complete
|
||||
UPDATE images SET downloaded = 1, local_path = ? WHERE id = ?
|
||||
```
|
||||
|
||||
### 2. **Real-time Monitoring**
|
||||
```python
|
||||
# Check for new lots every N minutes
|
||||
SELECT COUNT(*) FROM lots WHERE scraped_at > datetime('now', '-1 hour')
|
||||
|
||||
# Monitor bid changes
|
||||
SELECT lot_id, current_bid, bid_count FROM lots WHERE bid_count > 0
|
||||
```
|
||||
|
||||
### 3. **Analytics & Reporting**
|
||||
```python
|
||||
# Top locations
|
||||
SELECT location, COUNT(*) as lot_count FROM lots GROUP BY location
|
||||
|
||||
# Auction statistics
|
||||
SELECT
|
||||
a.auction_id,
|
||||
a.title,
|
||||
COUNT(l.lot_id) as actual_lots,
|
||||
SUM(CASE WHEN l.bid_count > 0 THEN 1 ELSE 0 END) as lots_with_bids
|
||||
FROM auctions a
|
||||
LEFT JOIN lots l ON a.auction_id = l.auction_id
|
||||
GROUP BY a.auction_id
|
||||
```
|
||||
|
||||
### 4. **Image Processing Integration**
|
||||
```python
|
||||
# Get all images for a lot
|
||||
SELECT url, local_path FROM images WHERE lot_id = 'A1-28505-5'
|
||||
|
||||
# Batch process unprocessed images
|
||||
SELECT i.id, i.lot_id, i.local_path, l.title, l.category
|
||||
FROM images i
|
||||
JOIN lots l ON i.lot_id = l.lot_id
|
||||
WHERE i.downloaded = 1 AND i.local_path IS NOT NULL
|
||||
```
|
||||
|
||||
## Performance Characteristics
|
||||
|
||||
- **Compression**: ~70-90% HTML size reduction (1GB → ~100-300MB)
|
||||
- **Rate Limiting**: Exactly 0.5s between requests (respectful scraping)
|
||||
- **Caching**: 24-hour default cache validity (configurable)
|
||||
- **Throughput**: ~7,200 pages/hour (with 0.5s rate limit)
|
||||
- **Scalability**: SQLite handles millions of rows efficiently
|
||||
|
||||
## Error Handling
|
||||
|
||||
- **Network failures**: Cached as status_code=500, retry after cache expiry
|
||||
- **Parse failures**: Falls back to HTML regex patterns
|
||||
- **Compression errors**: Auto-detects and handles uncompressed legacy data
|
||||
- **Missing fields**: Defaults to "No bids", empty string, or 0
|
||||
|
||||
## Rate Limiting & Ethics
|
||||
|
||||
- **REQUIRED**: 0.5 second delay between ALL requests
|
||||
- **Respects cache**: Avoids unnecessary re-fetching
|
||||
- **User-Agent**: Identifies as standard browser
|
||||
- **No parallelization**: Single-threaded sequential crawling
|
||||
258
docs/DATABASE_ARCHITECTURE.md
Normal file
258
docs/DATABASE_ARCHITECTURE.md
Normal file
@@ -0,0 +1,258 @@
|
||||
# Database Architecture
|
||||
|
||||
## Overview
|
||||
|
||||
The Auctiora auction monitoring system uses **SQLite** as its database engine, shared between the scraper process and the monitor application for simplicity and performance.
|
||||
|
||||
## Current State (Dec 2025)
|
||||
|
||||
- **Database**: `C:\mnt\okcomputer\output\cache.db`
|
||||
- **Size**: 1.6 GB
|
||||
- **Records**: 16,006 lots, 536,502 images
|
||||
- **Concurrent Processes**: 2 (scraper + monitor)
|
||||
- **Access Pattern**: Scraper writes, Monitor reads + occasional updates
|
||||
|
||||
## Why SQLite?
|
||||
|
||||
### ✅ Advantages for This Use Case
|
||||
|
||||
1. **Embedded Architecture**
|
||||
- No separate database server to manage
|
||||
- Zero network latency (local file access)
|
||||
- Perfect for single-machine scraping + monitoring
|
||||
|
||||
2. **Excellent Read Performance**
|
||||
- Monitor performs mostly SELECT queries
|
||||
- Well-indexed access by `lot_id`, `url`, `auction_id`
|
||||
- Sub-millisecond query times for simple lookups
|
||||
|
||||
3. **Simplicity**
|
||||
- Single file database
|
||||
- Automatic backup via file copy
|
||||
- No connection pooling or authentication overhead
|
||||
|
||||
4. **Proven Scalability**
|
||||
- Tested up to 281 TB database size
|
||||
- 1.6 GB is only 0.0006% of capacity
|
||||
- Handles billions of rows efficiently
|
||||
|
||||
5. **WAL Mode for Concurrency**
|
||||
- Multiple readers don't block each other
|
||||
- Readers don't block writers
|
||||
- Writers don't block readers
|
||||
- Perfect for scraper + monitor workload
|
||||
|
||||
## Configuration
|
||||
|
||||
### Connection String (DatabaseService.java:28)
|
||||
```java
|
||||
jdbc:sqlite:C:\mnt\okcomputer\output\cache.db?journal_mode=WAL&busy_timeout=10000
|
||||
```
|
||||
|
||||
### Key PRAGMAs (DatabaseService.java:38-40)
|
||||
```sql
|
||||
PRAGMA journal_mode=WAL; -- Write-Ahead Logging for concurrency
|
||||
PRAGMA busy_timeout=10000; -- 10s retry on lock contention
|
||||
PRAGMA synchronous=NORMAL; -- Balance safety and performance
|
||||
```
|
||||
|
||||
### What These Settings Do
|
||||
|
||||
| Setting | Purpose | Impact |
|
||||
|---------|---------|--------|
|
||||
| `journal_mode=WAL` | Write-Ahead Logging | Enables concurrent read/write access |
|
||||
| `busy_timeout=10000` | Wait 10s on lock | Prevents immediate `SQLITE_BUSY` errors |
|
||||
| `synchronous=NORMAL` | Balanced sync mode | Faster writes, still crash-safe |
|
||||
|
||||
## Schema Integration
|
||||
|
||||
### Scraper Schema (Read-Only for Monitor)
|
||||
```sql
|
||||
CREATE TABLE lots (
|
||||
lot_id TEXT PRIMARY KEY,
|
||||
auction_id TEXT,
|
||||
url TEXT UNIQUE, -- ⚠️ Enforced by scraper
|
||||
title TEXT,
|
||||
current_bid TEXT,
|
||||
closing_time TEXT,
|
||||
manufacturer TEXT,
|
||||
type TEXT,
|
||||
year INTEGER,
|
||||
currency TEXT DEFAULT 'EUR',
|
||||
closing_notified INTEGER DEFAULT 0,
|
||||
...
|
||||
)
|
||||
```
|
||||
|
||||
### Monitor Schema (Tables Created by Monitor)
|
||||
```sql
|
||||
CREATE TABLE images (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
lot_id INTEGER,
|
||||
url TEXT,
|
||||
local_path TEXT,
|
||||
labels TEXT, -- Object detection results
|
||||
processed_at INTEGER,
|
||||
FOREIGN KEY (lot_id) REFERENCES lots(lot_id)
|
||||
)
|
||||
```
|
||||
|
||||
### Handling Schema Conflicts
|
||||
|
||||
**Problem**: Scraper has `UNIQUE` constraint on `lots.url`
|
||||
|
||||
**Solution** (DatabaseService.java:361-424):
|
||||
```java
|
||||
// Try UPDATE first
|
||||
UPDATE lots SET ... WHERE lot_id = ?
|
||||
|
||||
// If no rows updated, INSERT OR IGNORE
|
||||
INSERT OR IGNORE INTO lots (...) VALUES (...)
|
||||
```
|
||||
|
||||
This approach:
|
||||
- ✅ Updates existing lots by `lot_id`
|
||||
- ✅ Skips inserts that violate UNIQUE constraints
|
||||
- ✅ No crashes on re-imports or duplicate URLs
|
||||
|
||||
## Performance Characteristics
|
||||
|
||||
### Current Performance
|
||||
- Simple SELECT by ID: <1ms
|
||||
- Full table scan (16K lots): ~50ms
|
||||
- Image INSERT: <5ms
|
||||
- Concurrent operations: No blocking observed
|
||||
|
||||
### Scalability Projections
|
||||
|
||||
| Metric | Current | 1 Year | 3 Years | SQLite Limit |
|
||||
|--------|---------|--------|---------|--------------|
|
||||
| Lots | 16K | 365K | 1M | 1B+ rows |
|
||||
| Images | 536K | 19M | 54M | 1B+ rows |
|
||||
| DB Size | 1.6GB | 36GB | 100GB | 281TB |
|
||||
| Queries | <1ms | <5ms | <20ms | Depends on indexes |
|
||||
|
||||
## When to Migrate to PostgreSQL/MySQL
|
||||
|
||||
### 🚨 Migration Triggers
|
||||
|
||||
Consider migrating if you encounter **any** of these:
|
||||
|
||||
1. **Concurrency Limits**
|
||||
- >5 concurrent writers needed
|
||||
- Frequent `SQLITE_BUSY` errors despite WAL mode
|
||||
- Need for distributed access across multiple servers
|
||||
|
||||
2. **Performance Degradation**
|
||||
- Database >50GB AND queries >1s for simple SELECTs
|
||||
- Complex JOIN queries become bottleneck
|
||||
- Index sizes exceed available RAM
|
||||
|
||||
3. **Operational Requirements**
|
||||
- Need for replication (master/slave)
|
||||
- Geographic distribution required
|
||||
- High availability / failover needed
|
||||
- Remote access from multiple locations
|
||||
|
||||
4. **Advanced Features**
|
||||
- Full-text search on large text fields
|
||||
- Complex analytical queries (window functions, CTEs)
|
||||
- User management and fine-grained permissions
|
||||
- Connection pooling for web applications
|
||||
|
||||
### Migration Path (If Needed)
|
||||
|
||||
1. **Choose Database**: PostgreSQL (recommended) or MySQL
|
||||
2. **Schema Export**: Use SQLite `.schema` command
|
||||
3. **Data Migration**: Use `sqlite3-to-postgres` or custom scripts
|
||||
4. **Update Connection**: Change JDBC URL in `application.properties`
|
||||
5. **Update Queries**: Fix SQL dialect differences
|
||||
6. **Performance Tuning**: Create appropriate indexes
|
||||
|
||||
Example PostgreSQL configuration:
|
||||
```properties
|
||||
# application.properties
|
||||
auction.database.url=jdbc:postgresql://localhost:5432/auctiora
|
||||
auction.database.username=monitor
|
||||
auction.database.password=${DB_PASSWORD}
|
||||
```
|
||||
|
||||
## Current Recommendation: ✅ **Stick with SQLite**
|
||||
|
||||
### Rationale
|
||||
|
||||
1. **Sufficient Capacity**: 1.6GB is 0.0006% of SQLite's limit
|
||||
2. **Excellent Performance**: Sub-millisecond queries
|
||||
3. **Simple Operations**: No complex transactions or analytics
|
||||
4. **Low Concurrency**: Only 2 processes (scraper + monitor)
|
||||
5. **Local Architecture**: No need for network DB access
|
||||
6. **Zero Maintenance**: No DB server to manage or monitor
|
||||
|
||||
### Monitoring Dashboard Metrics
|
||||
|
||||
Track these to know when to reconsider:
|
||||
|
||||
```sql
|
||||
-- Add to praetium.html dashboard
|
||||
SELECT
|
||||
(SELECT COUNT(*) FROM lots) as lot_count,
|
||||
(SELECT COUNT(*) FROM images) as image_count,
|
||||
(SELECT page_count * page_size FROM pragma_page_count(), pragma_page_size()) as db_size_bytes,
|
||||
(SELECT (page_count - freelist_count) * 100.0 / page_count FROM pragma_page_count(), pragma_freelist_count()) as db_utilization
|
||||
```
|
||||
|
||||
**Review decision when**:
|
||||
- Database >20GB
|
||||
- Query times >500ms for simple lookups
|
||||
- More than 3 concurrent processes needed
|
||||
|
||||
## Backup Strategy
|
||||
|
||||
### Recommended Approach
|
||||
|
||||
```bash
|
||||
# Nightly backup via Windows Task Scheduler
|
||||
sqlite3 C:\mnt\okcomputer\output\cache.db ".backup C:\backups\cache_$(date +%Y%m%d).db"
|
||||
|
||||
# Keep last 30 days
|
||||
forfiles /P C:\backups /M cache_*.db /D -30 /C "cmd /c del @path"
|
||||
```
|
||||
|
||||
### WAL File Management
|
||||
|
||||
SQLite creates additional files in WAL mode:
|
||||
- `cache.db` - Main database
|
||||
- `cache.db-wal` - Write-Ahead Log
|
||||
- `cache.db-shm` - Shared memory
|
||||
|
||||
**Important**: Backup all three files together for consistency.
|
||||
|
||||
## Integration Points
|
||||
|
||||
### Scraper Process
|
||||
- **Writes**: INSERT new lots, auctions, images
|
||||
- **Schema Owner**: Creates tables, enforces constraints
|
||||
- **Frequency**: Continuous (every 30 minutes)
|
||||
|
||||
### Monitor Process (Auctiora)
|
||||
- **Reads**: SELECT lots, auctions for monitoring
|
||||
- **Writes**: UPDATE bid amounts, notification flags; INSERT image processing results
|
||||
- **Schema**: Adds `images` table for object detection
|
||||
- **Frequency**: Every 15 seconds (dashboard refresh)
|
||||
|
||||
### Conflict Resolution
|
||||
|
||||
| Conflict | Strategy | Implementation |
|
||||
|----------|----------|----------------|
|
||||
| Duplicate lot_id | UPDATE instead of INSERT | DatabaseService.upsertLot() |
|
||||
| Duplicate URL | INSERT OR IGNORE | Silent skip |
|
||||
| Oversized IDs (>Long.MAX_VALUE) | Return 0L, skip import | ScraperDataAdapter.extractNumericId() |
|
||||
| Invalid timestamps | Try-catch, log, continue | DatabaseService.getAllAuctions() |
|
||||
| Database locked | 10s busy_timeout + WAL | Connection string |
|
||||
|
||||
## References
|
||||
|
||||
- [SQLite Documentation](https://www.sqlite.org/docs.html)
|
||||
- [WAL Mode](https://www.sqlite.org/wal.html)
|
||||
- [SQLite Limits](https://www.sqlite.org/limits.html)
|
||||
- [When to Use SQLite](https://www.sqlite.org/whentouse.html)
|
||||
109
docs/DATA_SYNC_SETUP.md
Normal file
109
docs/DATA_SYNC_SETUP.md
Normal file
@@ -0,0 +1,109 @@
|
||||
# Production Data Sync Setup
|
||||
|
||||
Quick reference for syncing production data from `athena.lan` to your local development environment.
|
||||
|
||||
## 🚀 One-Command Setup
|
||||
|
||||
### Linux/Mac
|
||||
```bash
|
||||
./scripts/sync-production-data.sh
|
||||
```
|
||||
|
||||
## 📋 Complete Usage
|
||||
|
||||
### Bash (Linux/Mac/Git Bash)
|
||||
```bash
|
||||
# Database only
|
||||
./scripts/sync-production-data.sh --db-only
|
||||
|
||||
# Everything
|
||||
./scripts/sync-production-data.sh --all
|
||||
|
||||
# Images only
|
||||
./scripts/sync-production-data.sh --images-only
|
||||
```
|
||||
|
||||
## 🔧 What It Does
|
||||
|
||||
1. **Connects to athena.lan** via SSH
|
||||
2. **Copies database** from Docker volume to /tmp
|
||||
3. **Downloads to local** machine (c:\mnt\okcomputer\cache.db)
|
||||
4. **Backs up** existing local database automatically
|
||||
5. **Shows statistics** (auction count, lot count, etc.)
|
||||
6. **Cleans up** temporary files on remote server
|
||||
|
||||
### With Images
|
||||
- Also syncs the `/data/images/` directory
|
||||
- Uses rsync for incremental sync (if available)
|
||||
- Can be large (several GB)
|
||||
|
||||
## 📊 What You Get
|
||||
|
||||
### Database (`cache.db`)
|
||||
- **~8.9 GB** of production data
|
||||
- 16,000+ lots
|
||||
- 536,000+ images metadata
|
||||
- Full auction history
|
||||
- HTTP cache from scraper
|
||||
|
||||
### Images (`images/`)
|
||||
- Downloaded lot images
|
||||
- Organized by lot ID
|
||||
- Variable size (can be large)
|
||||
|
||||
## ⚡ Quick Workflow
|
||||
|
||||
### Daily Development
|
||||
```powershell
|
||||
# Morning: Get fresh data
|
||||
.\scripts\Sync-ProductionData.sh -Force
|
||||
|
||||
# Develop & test
|
||||
mvn quarkus:dev
|
||||
|
||||
# View dashboard
|
||||
start http://localhost:8080
|
||||
```
|
||||
|
||||
## 🔒 Safety Features
|
||||
|
||||
- ✅ **Automatic backups** before overwriting
|
||||
- ✅ **Confirmation prompts** (unless `-Force`)
|
||||
- ✅ **Error handling** with clear messages
|
||||
- ✅ **Cleanup** of temporary files
|
||||
- ✅ **Non-destructive** - production data is never modified
|
||||
|
||||
## 🐛 Troubleshooting
|
||||
|
||||
### "Permission denied" or SSH errors
|
||||
```bash
|
||||
# Test SSH connection
|
||||
ssh tour@athena.lan "echo OK"
|
||||
|
||||
# If fails, check your SSH key
|
||||
ssh-add -l
|
||||
```
|
||||
|
||||
### Database already exists
|
||||
- Script automatically backs up existing database
|
||||
- Backup format: `cache.db.backup-YYYYMMDD-HHMMSS`
|
||||
|
||||
### Slow image transfer
|
||||
- Install rsync for 10x faster incremental sync
|
||||
- Or sync database only: `.\scripts\Sync-ProductionData.sh` (default)
|
||||
|
||||
## 📚 Full Documentation
|
||||
|
||||
See [scripts/README.md](../scripts/README.md) for:
|
||||
- Prerequisites
|
||||
- Performance tips
|
||||
- Automation setup
|
||||
- Detailed troubleshooting
|
||||
|
||||
## 🎯 Common Use Cases
|
||||
|
||||
**Quick Links**:
|
||||
- [Main README](../README.md)
|
||||
- [Scripts Documentation](../scripts/README.md)
|
||||
- [Integration Flowchart](INTEGRATION_FLOWCHART.md)
|
||||
- [Intelligence Features](INTELLIGENCE_FEATURES_SUMMARY.md)
|
||||
226
docs/EMAIL_CONFIGURATION.md
Normal file
226
docs/EMAIL_CONFIGURATION.md
Normal file
@@ -0,0 +1,226 @@
|
||||
# Email Notification Configuration Guide
|
||||
|
||||
## Overview
|
||||
The application uses Gmail SMTP to send email notifications for auction alerts and lot updates.
|
||||
|
||||
## Gmail App Password Setup (Required for michael@appmodel.nl)
|
||||
|
||||
### Why App Passwords?
|
||||
Google requires **App Passwords** instead of your regular Gmail password when using SMTP with 2-factor authentication enabled.
|
||||
|
||||
### Steps to Generate Gmail App Password:
|
||||
|
||||
1. **Enable 2-Factor Authentication** (if not already enabled)
|
||||
- Go to https://myaccount.google.com/security
|
||||
- Under "Signing in to Google", enable "2-Step Verification"
|
||||
|
||||
2. **Generate App Password**
|
||||
- Go to https://myaccount.google.com/apppasswords
|
||||
- Or navigate: Google Account → Security → 2-Step Verification → App passwords
|
||||
- Select app: "Mail"
|
||||
- Select device: "Other (Custom name)" → Enter "Auctiora Monitor"
|
||||
- Click "Generate"
|
||||
- Google will display a 16-character password (e.g., `abcd efgh ijkl mnop`)
|
||||
- **Copy this password immediately** (you won't see it again)
|
||||
|
||||
3. **Use the App Password**
|
||||
- Use this 16-character password (without spaces) in your configuration
|
||||
- Format: `abcdefghijklmnop`
|
||||
|
||||
## Configuration
|
||||
|
||||
### Method 1: Environment Variable (Recommended for Production)
|
||||
|
||||
Set the `auction.notification.config` property in your `application.properties` or via environment variable:
|
||||
|
||||
```properties
|
||||
# Format: smtp:username:password:recipient_email
|
||||
auction.notification.config=smtp:michael@appmodel.nl:YOUR_APP_PASSWORD:michael@appmodel.nl
|
||||
```
|
||||
|
||||
**Example with Docker:**
|
||||
```bash
|
||||
docker run -e AUCTION_NOTIFICATION_CONFIG="smtp:michael@appmodel.nl:abcdefghijklmnop:michael@appmodel.nl" ...
|
||||
```
|
||||
|
||||
### Method 2: application.properties (Development)
|
||||
|
||||
Edit `src/main/resources/application.properties`:
|
||||
|
||||
```properties
|
||||
# BEFORE (desktop only):
|
||||
auction.notification.config=desktop
|
||||
|
||||
# AFTER (desktop + email):
|
||||
auction.notification.config=smtp:michael@appmodel.nl:YOUR_APP_PASSWORD_HERE:michael@appmodel.nl
|
||||
```
|
||||
|
||||
### Format Breakdown
|
||||
|
||||
The configuration string format is:
|
||||
```
|
||||
smtp:<SMTP_USERNAME>:<APP_PASSWORD>:<RECIPIENT_EMAIL>
|
||||
```
|
||||
|
||||
Where:
|
||||
- `SMTP_USERNAME`: Your Gmail address (michael@appmodel.nl)
|
||||
- `APP_PASSWORD`: The 16-character app password from Google (no spaces)
|
||||
- `RECIPIENT_EMAIL`: Email address to receive notifications (can be same as sender)
|
||||
|
||||
## Configuration Examples
|
||||
|
||||
### Desktop Notifications Only
|
||||
```properties
|
||||
auction.notification.config=desktop
|
||||
```
|
||||
|
||||
### Email Notifications Only
|
||||
```properties
|
||||
auction.notification.config=smtp:michael@appmodel.nl:abcdefghijklmnop:michael@appmodel.nl
|
||||
```
|
||||
|
||||
### Both Desktop and Email (Recommended)
|
||||
The SMTP configuration automatically enables both:
|
||||
```properties
|
||||
auction.notification.config=smtp:michael@appmodel.nl:abcdefghijklmnop:michael@appmodel.nl
|
||||
```
|
||||
|
||||
### Send to Multiple Recipients
|
||||
To send to multiple recipients, you can modify the code or set up Gmail forwarding rules.
|
||||
|
||||
## SMTP Configuration Details
|
||||
|
||||
The application uses these Gmail SMTP settings (hardcoded):
|
||||
- **Host**: smtp.gmail.com
|
||||
- **Port**: 587
|
||||
- **Security**: STARTTLS
|
||||
- **Authentication**: Required
|
||||
|
||||
## Testing Configuration
|
||||
|
||||
After configuration, restart the application and check logs:
|
||||
|
||||
**Success:**
|
||||
```
|
||||
✓ OpenCV loaded successfully
|
||||
Email notification: Test Alert
|
||||
```
|
||||
|
||||
**Failure (wrong password):**
|
||||
```
|
||||
WARN NotificationService - Email failed: 535-5.7.8 Username and Password not accepted
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Error: "Username and Password not accepted"
|
||||
- **Cause**: Invalid App Password or 2FA not enabled
|
||||
- **Solution**:
|
||||
1. Verify 2-Factor Authentication is enabled
|
||||
2. Generate a new App Password
|
||||
3. Ensure no spaces in the password
|
||||
4. Check for typos in email address
|
||||
|
||||
### Error: "AuthenticationFailedException"
|
||||
- **Cause**: Incorrect credentials format
|
||||
- **Solution**: Verify the format: `smtp:user:pass:recipient`
|
||||
|
||||
### Gmail Blocks Sign-in
|
||||
- **Cause**: "Less secure app access" is disabled (deprecated by Google)
|
||||
- **Solution**: Use App Passwords (as described above)
|
||||
|
||||
### Configuration Not Taking Effect
|
||||
- **Cause**: Application not restarted or environment variable not set
|
||||
- **Solution**:
|
||||
1. Restart the application/container
|
||||
2. Verify with: `docker logs auctiora | grep notification`
|
||||
|
||||
### SMTP Connection Timeout
|
||||
- **Error**: `Couldn't connect to host, port: smtp.gmail.com, 587; timeout -1`
|
||||
- **Causes**:
|
||||
1. **Firewall/Network blocking port 587**
|
||||
2. **Corporate network blocking SMTP**
|
||||
3. **Antivirus/security software blocking connections**
|
||||
4. **No internet access in test/container environment**
|
||||
- **Solutions**:
|
||||
1. **Test connectivity**:
|
||||
```bash
|
||||
# On Linux/Mac
|
||||
telnet smtp.gmail.com 587
|
||||
# On Windows
|
||||
Test-NetConnection -ComputerName smtp.gmail.com -Port 587
|
||||
```
|
||||
2. **Check firewall rules**: Allow outbound connections to port 587
|
||||
3. **Docker network**: Ensure container has internet access
|
||||
```bash
|
||||
docker exec auctiora ping -c 3 smtp.gmail.com
|
||||
```
|
||||
4. **Try alternative port 465** (SSL/TLS):
|
||||
- Requires code change to use `mail.smtp.socketFactory`
|
||||
5. **Corporate networks**: May require VPN or proxy configuration
|
||||
6. **Windows Firewall**: Add Java/application to allowed programs
|
||||
|
||||
### Connection Succeeds but Authentication Fails
|
||||
- **Error**: `Email authentication failed - check Gmail App Password`
|
||||
- **Solution**: Verify App Password is correct and has no spaces
|
||||
|
||||
## Security Best Practices
|
||||
|
||||
1. **Never commit passwords to git**
|
||||
- Use environment variables in production
|
||||
- Add `application-local.properties` to `.gitignore`
|
||||
|
||||
2. **Rotate App Passwords periodically**
|
||||
- Generate new App Password every 90 days
|
||||
- Revoke old passwords at https://myaccount.google.com/apppasswords
|
||||
|
||||
3. **Use separate App Passwords per application**
|
||||
- Creates "Auctiora Monitor" specific password
|
||||
- Easy to revoke if compromised
|
||||
|
||||
4. **Monitor Gmail Activity**
|
||||
- Check https://myaccount.google.com/notifications
|
||||
- Review "Recent security activity"
|
||||
|
||||
## Example Docker Compose Configuration
|
||||
|
||||
```yaml
|
||||
services:
|
||||
auctiora:
|
||||
image: auctiora:latest
|
||||
environment:
|
||||
- AUCTION_NOTIFICATION_CONFIG=smtp:michael@appmodel.nl:${GMAIL_APP_PASSWORD}:michael@appmodel.nl
|
||||
- AUCTION_DATABASE_PATH=/mnt/okcomputer/output/cache.db
|
||||
volumes:
|
||||
- shared-auction-data:/mnt/okcomputer/output
|
||||
```
|
||||
|
||||
Then set the password in `.env` file (not committed):
|
||||
```bash
|
||||
GMAIL_APP_PASSWORD=abcdefghijklmnop
|
||||
```
|
||||
|
||||
## Notification Types
|
||||
|
||||
The application sends these email notifications:
|
||||
|
||||
1. **Lot Closing Soon** (Priority: High)
|
||||
- Sent when a lot closes within 5 minutes
|
||||
- Subject: `[Troostwijk] Lot nearing closure`
|
||||
|
||||
2. **Bid Updated** (Priority: Normal)
|
||||
- Sent when current bid increases
|
||||
- Subject: `[Troostwijk] Bid update`
|
||||
|
||||
3. **Critical Alerts** (Priority: High)
|
||||
- System errors or important events
|
||||
- Subject: `[Troostwijk] Critical Alert`
|
||||
|
||||
## Alternative: Desktop Notifications Only
|
||||
|
||||
If you don't want email notifications, use:
|
||||
```properties
|
||||
auction.notification.config=desktop
|
||||
```
|
||||
|
||||
This will only show system tray notifications (Linux/Windows/Mac).
|
||||
153
docs/EXPERT_ANALITICS.sql
Normal file
153
docs/EXPERT_ANALITICS.sql
Normal file
@@ -0,0 +1,153 @@
|
||||
-- Extend 'lots' table
|
||||
ALTER TABLE lots
|
||||
ADD COLUMN starting_bid DECIMAL(12, 2);
|
||||
ALTER TABLE lots
|
||||
ADD COLUMN estimated_min DECIMAL(12, 2);
|
||||
ALTER TABLE lots
|
||||
ADD COLUMN estimated_max DECIMAL(12, 2);
|
||||
ALTER TABLE lots
|
||||
ADD COLUMN reserve_price DECIMAL(12, 2);
|
||||
ALTER TABLE lots
|
||||
ADD COLUMN reserve_met BOOLEAN DEFAULT FALSE;
|
||||
ALTER TABLE lots
|
||||
ADD COLUMN bid_increment DECIMAL(12, 2);
|
||||
ALTER TABLE lots
|
||||
ADD COLUMN watch_count INTEGER DEFAULT 0;
|
||||
ALTER TABLE lots
|
||||
ADD COLUMN view_count INTEGER DEFAULT 0;
|
||||
ALTER TABLE lots
|
||||
ADD COLUMN first_bid_time TEXT;
|
||||
ALTER TABLE lots
|
||||
ADD COLUMN last_bid_time TEXT;
|
||||
ALTER TABLE lots
|
||||
ADD COLUMN bid_velocity DECIMAL(5, 2);
|
||||
-- bids per hour
|
||||
|
||||
-- New table: bid history (CRITICAL)
|
||||
CREATE TABLE bid_history
|
||||
(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
lot_id TEXT REFERENCES lots (lot_id),
|
||||
bid_amount DECIMAL(12, 2) NOT NULL,
|
||||
bid_time TEXT NOT NULL,
|
||||
is_winning BOOLEAN DEFAULT FALSE,
|
||||
is_autobid BOOLEAN DEFAULT FALSE,
|
||||
bidder_id TEXT, -- anonymized
|
||||
created_at TEXT DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE INDEX idx_bid_history_lot_time ON bid_history (lot_id, bid_time);
|
||||
-- Extend 'lots' table
|
||||
ALTER TABLE lots
|
||||
ADD COLUMN condition_score DECIMAL(3, 2); -- 0.00-10.00
|
||||
ALTER TABLE lots
|
||||
ADD COLUMN condition_description TEXT;
|
||||
ALTER TABLE lots
|
||||
ADD COLUMN year_manufactured INTEGER;
|
||||
ALTER TABLE lots
|
||||
ADD COLUMN serial_number TEXT;
|
||||
ALTER TABLE lots
|
||||
ADD COLUMN originality_score DECIMAL(3, 2); -- % original parts
|
||||
ALTER TABLE lots
|
||||
ADD COLUMN provenance TEXT;
|
||||
ALTER TABLE lots
|
||||
ADD COLUMN comparable_lot_ids TEXT;
|
||||
-- JSON array
|
||||
|
||||
-- New table: comparable sales
|
||||
CREATE TABLE comparable_sales
|
||||
(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
lot_id TEXT REFERENCES lots (lot_id),
|
||||
comparable_lot_id TEXT,
|
||||
similarity_score DECIMAL(3, 2), -- 0.00-1.00
|
||||
price_difference_percent DECIMAL(5, 2),
|
||||
created_at TEXT DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
-- New table: market indices
|
||||
CREATE TABLE market_indices
|
||||
(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
category TEXT NOT NULL,
|
||||
manufacturer TEXT,
|
||||
avg_price DECIMAL(12, 2),
|
||||
median_price DECIMAL(12, 2),
|
||||
price_change_30d DECIMAL(5, 2),
|
||||
volume_change_30d DECIMAL(5, 2),
|
||||
calculated_at TEXT DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
-- Extend 'auctions' table
|
||||
ALTER TABLE auctions
|
||||
ADD COLUMN auction_house TEXT;
|
||||
ALTER TABLE auctions
|
||||
ADD COLUMN auction_house_rating DECIMAL(3, 2);
|
||||
ALTER TABLE auctions
|
||||
ADD COLUMN buyers_premium_percent DECIMAL(5, 2);
|
||||
ALTER TABLE auctions
|
||||
ADD COLUMN payment_methods TEXT; -- JSON
|
||||
ALTER TABLE auctions
|
||||
ADD COLUMN shipping_cost_min DECIMAL(12, 2);
|
||||
ALTER TABLE auctions
|
||||
ADD COLUMN shipping_cost_max DECIMAL(12, 2);
|
||||
ALTER TABLE auctions
|
||||
ADD COLUMN seller_verified BOOLEAN DEFAULT FALSE;
|
||||
|
||||
-- New table: auction performance metrics
|
||||
CREATE TABLE auction_metrics
|
||||
(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
auction_id TEXT REFERENCES auctions (auction_id),
|
||||
sell_through_rate DECIMAL(5, 2),
|
||||
avg_hammer_vs_estimate DECIMAL(5, 2),
|
||||
total_hammer_price DECIMAL(15, 2),
|
||||
total_starting_price DECIMAL(15, 2),
|
||||
calculated_at TEXT DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
-- New table: seasonal trends
|
||||
CREATE TABLE seasonal_trends
|
||||
(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
category TEXT NOT NULL,
|
||||
month INTEGER NOT NULL,
|
||||
avg_price_multiplier DECIMAL(4, 2), -- vs annual avg
|
||||
volume_multiplier DECIMAL(4, 2),
|
||||
PRIMARY KEY (category, month)
|
||||
);
|
||||
-- New table: external market data
|
||||
CREATE TABLE external_market_data
|
||||
(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
category TEXT NOT NULL,
|
||||
manufacturer TEXT,
|
||||
model TEXT,
|
||||
dealer_avg_price DECIMAL(12, 2),
|
||||
retail_avg_price DECIMAL(12, 2),
|
||||
wholesale_avg_price DECIMAL(12, 2),
|
||||
source TEXT,
|
||||
fetched_at TEXT DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
-- New table: image analysis results
|
||||
CREATE TABLE image_analysis
|
||||
(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
image_id INTEGER REFERENCES images (id),
|
||||
damage_detected BOOLEAN,
|
||||
damage_severity DECIMAL(3, 2),
|
||||
wear_level TEXT CHECK (wear_level IN ('EXCELLENT', 'GOOD', 'FAIR', 'POOR')),
|
||||
estimated_hours_used INTEGER,
|
||||
ai_confidence DECIMAL(3, 2)
|
||||
);
|
||||
|
||||
-- New table: economic indicators
|
||||
CREATE TABLE economic_indicators
|
||||
(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
indicator_date TEXT NOT NULL,
|
||||
currency TEXT NOT NULL,
|
||||
exchange_rate DECIMAL(10, 4),
|
||||
inflation_rate DECIMAL(5, 2),
|
||||
market_volatility DECIMAL(5, 2)
|
||||
);
|
||||
38
docs/EXPERT_ANALITICS_PRIORITY.md
Normal file
38
docs/EXPERT_ANALITICS_PRIORITY.md
Normal file
@@ -0,0 +1,38 @@
|
||||
```mermaid
|
||||
graph TD
|
||||
A[Add bid_history table] --> B[Add watch_count + estimates]
|
||||
B --> C[Create market_indices]
|
||||
C --> D[Add condition + year fields]
|
||||
D --> E[Build comparable matching]
|
||||
E --> F[Enrich with auction house data]
|
||||
F --> G[Add AI image analysis]
|
||||
```
|
||||
|
||||
| Current Practice | New Requirement | Why |
|
||||
|-----------------------|---------------------------------|---------------------------|
|
||||
| Scrape once per hour | **Scrape every bid update** | Capture velocity & timing |
|
||||
| Save only current bid | **Save full bid history** | Detect patterns & sniping |
|
||||
| Ignore watchers | **Track watch\_count** | Predict competition |
|
||||
| Skip auction metadata | **Capture house estimates** | Anchor valuations |
|
||||
| No historical data | **Store sold prices** | Train prediction models |
|
||||
| Basic text scraping | **Parse condition/serial/year** | Enable comparables |
|
||||
|
||||
|
||||
```bazaar
|
||||
Week 1-2: Foundation
|
||||
Implement bid_history scraping (most critical)
|
||||
Add watch_count, starting_bid, estimated_min/max fields
|
||||
Calculate basic bid_velocity
|
||||
Week 3-4: Valuation
|
||||
Extract year_manufactured, manufacturer, condition_description
|
||||
Create market_indices (manually or via external API)
|
||||
Build comparable lot matching logic
|
||||
Week 5-6: Intelligence Layer
|
||||
Add auction house performance tracking
|
||||
Implement undervaluation detection algorithm
|
||||
Create price alert system
|
||||
Week 7-8: Automation
|
||||
Integrate image analysis API
|
||||
Add economic indicator tracking
|
||||
Refine ML-based price predictions
|
||||
```
|
||||
126
docs/GraphQL.md
Normal file
126
docs/GraphQL.md
Normal file
@@ -0,0 +1,126 @@
|
||||
# GraphQL Auction Schema Explorer
|
||||
|
||||
A Python script for exploring and testing GraphQL queries against the TBAuctions storefront API. This tool helps understand the auction schema by testing different query structures and viewing the responses.
|
||||
|
||||
## Features
|
||||
|
||||
- Three pre-configured GraphQL queries with varying levels of detail
|
||||
- Asynchronous HTTP requests using aiohttp for efficient testing
|
||||
- Error handling and formatted JSON output
|
||||
- Configurable auction ID, locale, and platform parameters
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Python 3.7 or higher
|
||||
- Required packages: `aiohttp`
|
||||
|
||||
## Installation
|
||||
|
||||
1. Clone or download this script
|
||||
2. Install dependencies:
|
||||
|
||||
```bash
|
||||
pip install aiohttp
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
Run the script directly:
|
||||
|
||||
```bash
|
||||
python auction_explorer.py
|
||||
```
|
||||
|
||||
Or make it executable and run:
|
||||
|
||||
```bash
|
||||
chmod +x auction_explorer.py
|
||||
./auction_explorer.py
|
||||
```
|
||||
|
||||
## Queries Included
|
||||
|
||||
The script tests three different query structures:
|
||||
|
||||
### 1. `viewingDays_simple`
|
||||
Basic query that retrieves city and country code for viewing days.
|
||||
|
||||
### 2. `viewingDays_with_times`
|
||||
Extended query that includes date ranges (`from` and `to`) along with city information.
|
||||
|
||||
### 3. `full_auction`
|
||||
Comprehensive query that fetches:
|
||||
- Auction ID and display ID
|
||||
- Bidding status
|
||||
- Buyer's premium
|
||||
- Viewing days with location and timing
|
||||
- Collection days with location and timing
|
||||
|
||||
## Configuration
|
||||
|
||||
Modify these variables in the script as needed:
|
||||
|
||||
```python
|
||||
GRAPHQL_ENDPOINT = "https://storefront.tbauctions.com/storefront/graphql"
|
||||
auction_id = "9d5d9d6b-94de-4147-b523-dfa512d85dfa" # Replace with your auction ID
|
||||
variables = {
|
||||
"auctionId": auction_id,
|
||||
"locale": "nl", # Change locale as needed
|
||||
"platform": "TWK" # Change platform as needed
|
||||
}
|
||||
```
|
||||
|
||||
## Output Format
|
||||
|
||||
The script outputs:
|
||||
- Query name and separator
|
||||
- Success status with formatted JSON response
|
||||
- Or error messages if the query fails
|
||||
|
||||
Example output:
|
||||
```
|
||||
============================================================
|
||||
QUERY: viewingDays_simple
|
||||
============================================================
|
||||
SUCCESS:
|
||||
{
|
||||
"data": {
|
||||
"auction": {
|
||||
"viewingDays": [
|
||||
{
|
||||
"city": "Amsterdam",
|
||||
"countryCode": "NL"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Customization
|
||||
|
||||
To add new queries, extend the `QUERIES` dictionary:
|
||||
|
||||
```python
|
||||
QUERIES = {
|
||||
"your_query_name": """
|
||||
query YourQuery($auctionId: TbaUuid!, $locale: String!, $platform: Platform!) {
|
||||
auction(id: $auctionId, locale: $locale, platform: $platform) {
|
||||
# Your fields here
|
||||
}
|
||||
}
|
||||
""",
|
||||
# ... existing queries
|
||||
}
|
||||
```
|
||||
|
||||
## Notes
|
||||
|
||||
- The script includes a 500ms delay between queries to avoid rate limiting
|
||||
- Timeout is set to 30 seconds per request
|
||||
- All queries use the same GraphQL endpoint and variables
|
||||
- Error responses are displayed in a readable format
|
||||
|
||||
## License
|
||||
|
||||
This script is provided for educational and exploratory purposes.
|
||||
393
docs/INTEGRATION_FLOWCHART.md
Normal file
393
docs/INTEGRATION_FLOWCHART.md
Normal file
@@ -0,0 +1,393 @@
|
||||
# Auctiora Intelligence Integration Flowchart
|
||||
|
||||
## Complete System Integration Diagram
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ COMPLETE SYSTEM INTEGRATION DIAGRAM │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
┌──────────────────────────────────────────────────────────────────────────────┐
|
||||
│ PHASE 1: EXTERNAL SCRAPER (Python/Playwright) - ARCHITECTURE-TROOSTWIJK │
|
||||
└──────────────────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
┌─────────────────────────────┼─────────────────────────────┐
|
||||
▼ ▼ ▼
|
||||
[Listing Pages] [Auction Pages] [Lot Pages]
|
||||
/auctions?page=N /a/auction-id /l/lot-id
|
||||
│ │ │
|
||||
│ Extract URLs │ Parse __NEXT_DATA__ │ Parse __NEXT_DATA__
|
||||
├────────────────────────────▶│ JSON (GraphQL) │ JSON (GraphQL)
|
||||
│ │ │
|
||||
│ ▼ ▼
|
||||
│ ┌────────────────┐ ┌────────────────┐
|
||||
│ │ INSERT auctions│ │ INSERT lots │
|
||||
│ │ to SQLite │ │ INSERT images │
|
||||
│ └────────────────┘ │ (URLs only) │
|
||||
│ │ └────────────────┘
|
||||
│ │ │
|
||||
└─────────────────────────────┴────────────────────────────┘
|
||||
▼
|
||||
┌──────────────────┐
|
||||
│ SQLITE DATABASE │
|
||||
│ output/cache.db │
|
||||
└──────────────────┘
|
||||
│
|
||||
┌─────────────────┼─────────────────┐
|
||||
▼ ▼ ▼
|
||||
[auctions table] [lots table] [images table]
|
||||
- auction_id - lot_id - id
|
||||
- title - auction_id - lot_id
|
||||
- location - title - url
|
||||
- lots_count - current_bid - local_path
|
||||
- closing_time - bid_count - downloaded=0
|
||||
- closing_time
|
||||
- followersCount ⭐ NEW
|
||||
- estimatedMin ⭐ NEW
|
||||
- estimatedMax ⭐ NEW
|
||||
- nextBidStepInCents ⭐ NEW
|
||||
- condition ⭐ NEW
|
||||
- vat ⭐ NEW
|
||||
- buyerPremiumPercentage ⭐ NEW
|
||||
- quantity ⭐ NEW
|
||||
- biddingStatus ⭐ NEW
|
||||
- remarks ⭐ NEW
|
||||
│
|
||||
┌─────────────────────────────────────┴─────────────────────────────────────┐
|
||||
│ PHASE 2: MONITORING & PROCESSING (Java) - THIS PROJECT │
|
||||
└────────────────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
┌─────────────────┼─────────────────┐
|
||||
▼ ▼ ▼
|
||||
[TroostwijkMonitor] [DatabaseService] [ScraperDataAdapter]
|
||||
│ │ │
|
||||
│ Read lots │ Query lots │ Transform data
|
||||
│ every hour │ Import images │ TEXT → INTEGER
|
||||
│ │ │ "€123" → 123.0
|
||||
└─────────────────┴─────────────────┘
|
||||
│
|
||||
┌─────────────────────────┼─────────────────────────┐
|
||||
▼ ▼ ▼
|
||||
[Bid Monitoring] [Image Processing] [Closing Alerts]
|
||||
Check API every 1h Download images Check < 5 min
|
||||
│ │ │
|
||||
│ New bid? │ Process via │ Time critical?
|
||||
├─[YES]──────────┐ │ ObjectDetection ├─[YES]────┐
|
||||
│ │ │ │ │
|
||||
▼ │ ▼ │ │
|
||||
[Update current_bid] │ ┌──────────────────┐ │ │
|
||||
in database │ │ YOLO Detection │ │ │
|
||||
│ │ OpenCV DNN │ │ │
|
||||
│ └──────────────────┘ │ │
|
||||
│ │ │ │
|
||||
│ │ Detect objects │ │
|
||||
│ ├─[vehicle] │ │
|
||||
│ ├─[furniture] │ │
|
||||
│ ├─[machinery] │ │
|
||||
│ │ │ │
|
||||
│ ▼ │ │
|
||||
│ [Save labels to DB] │ │
|
||||
│ [Estimate value] │ │
|
||||
│ │ │ │
|
||||
│ │ │ │
|
||||
└─────────┴───────────────────────┴──────────┘
|
||||
│
|
||||
┌───────────────────────────────────────────────┴────────────────────────────┐
|
||||
│ PHASE 3: INTELLIGENCE LAYER ⭐ NEW - PREDICTIVE ANALYTICS │
|
||||
└────────────────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
┌─────────────────┴─────────────────┐
|
||||
▼ ▼
|
||||
[Intelligence Engine] [Analytics Calculations]
|
||||
│ │
|
||||
┌───────────────────┼──────────────┐ │
|
||||
▼ ▼ ▼ │
|
||||
[Sleeper Detection] [Bargain Finder] [Popularity Tracker] │
|
||||
High followers Price < estimate Watch count analysis │
|
||||
Low current bid Opportunity Competition level │
|
||||
│ │ │ │
|
||||
│ │ │ │
|
||||
└───────────────────┴──────────────┴───────────────────┘
|
||||
│
|
||||
┌─────────────────┴─────────────────┐
|
||||
▼ ▼
|
||||
[Total Cost Calculator] [Next Bid Calculator]
|
||||
Current bid × (1 + VAT/100) Current bid + increment
|
||||
× (1 + premium/100) (from API or calculated)
|
||||
│ │
|
||||
└─────────────────┬─────────────────┘
|
||||
│
|
||||
┌───────────────────────────────────────────────┴────────────────────────────┐
|
||||
│ PHASE 4: NOTIFICATION SYSTEM - USER INTERACTION TRIGGERS │
|
||||
└────────────────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
┌─────────────────┴─────────────────┐
|
||||
▼ ▼
|
||||
[NotificationService] [User Decision Points]
|
||||
│ │
|
||||
┌───────────────────┼───────────────────┐ │
|
||||
▼ ▼ ▼ │
|
||||
[Desktop Notify] [Email Notify] [Priority Level] │
|
||||
Windows/macOS/ Gmail SMTP 0=Normal │
|
||||
Linux system (FREE) 1=High │
|
||||
tray │
|
||||
│ │ │ │
|
||||
└───────────────────┴───────────────────┘ │
|
||||
│ │
|
||||
▼ ▼
|
||||
┌──────────────────┐ ┌──────────────────┐
|
||||
│ USER INTERACTION │ │ TRIGGER EVENTS: │
|
||||
│ NOTIFICATIONS │ │ │
|
||||
└──────────────────┘ └──────────────────┘
|
||||
│ │
|
||||
┌───────────────────┼───────────────────┐ │
|
||||
▼ ▼ ▼ │
|
||||
┌──────────────────┐ ┌──────────────────┐ ┌──────────────────┐ │
|
||||
│ 1. BID CHANGE │ │ 2. OBJECT │ │ 3. CLOSING │ │
|
||||
│ │ │ DETECTED │ │ ALERT │ │
|
||||
│ "Nieuw bod op │ │ │ │ │ │
|
||||
│ kavel 12345: │ │ "Lot contains: │ │ "Kavel 12345 │ │
|
||||
│ €150 (was €125)"│ │ - Vehicle │ │ sluit binnen │ │
|
||||
│ │ │ - Machinery │ │ 5 min." │ │
|
||||
│ Priority: NORMAL │ │ Est: €5000" │ │ Priority: HIGH │ │
|
||||
│ │ │ │ │ │ │
|
||||
│ Action needed: │ │ Action needed: │ │ Action needed: │ │
|
||||
│ ▸ Place bid? │ │ ▸ Review item? │ │ ▸ Place final │ │
|
||||
│ ▸ Monitor? │ │ ▸ Confirm value? │ │ bid? │ │
|
||||
│ ▸ Ignore? │ │ ▸ Add to watch? │ │ ▸ Let expire? │ │
|
||||
└──────────────────┘ └──────────────────┘ └──────────────────┘ │
|
||||
│ │ │ │
|
||||
└───────────────────┴───────────────────┴─────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ INTELLIGENCE NOTIFICATIONS ⭐ NEW │
|
||||
├─────────────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ 4. SLEEPER LOT ALERT │
|
||||
│ "Lot 12345: 25 watchers, only €50 bid - Opportunity!" │
|
||||
│ Action: ▸ Place strategic bid ▸ Monitor competition ▸ Set alert │
|
||||
│ │
|
||||
│ 5. BARGAIN DETECTED │
|
||||
│ "Lot 67890: Current €200, Estimate €400-€600 - Below estimate!" │
|
||||
│ Action: ▸ Bid now ▸ Research comparable ▸ Add to watchlist │
|
||||
│ │
|
||||
│ 6. HIGH COMPETITION WARNING │
|
||||
│ "Lot 11111: 75 watchers, bid velocity 5/hr - Strong competition" │
|
||||
│ Action: ▸ Review strategy ▸ Set max bid ▸ Find alternatives │
|
||||
│ │
|
||||
│ 7. TOTAL COST NOTIFICATION │
|
||||
│ "True cost: €500 bid + €105 VAT (21%) + €50 premium (10%) = €655" │
|
||||
│ Action: ▸ Confirm budget ▸ Adjust bid ▸ Calculate logistics │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Intelligence Dashboard Flow
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
subgraph P1["PHASE 1: DATA COLLECTION"]
|
||||
A1[GraphQL API] --> A2[Scraper Extracts 15+ New Fields]
|
||||
A2 --> A3[followersCount]
|
||||
A2 --> A4[estimatedMin/Max]
|
||||
A2 --> A5[nextBidStepInCents]
|
||||
A2 --> A6[vat + buyerPremiumPercentage]
|
||||
A2 --> A7[condition + biddingStatus]
|
||||
|
||||
A3 & A4 & A5 & A6 & A7 --> DB[(SQLite Database)]
|
||||
end
|
||||
|
||||
DB --> P2_Entry
|
||||
|
||||
subgraph P2["PHASE 2: INTELLIGENCE PROCESSING"]
|
||||
P2_Entry[Lot.java Model] --> Intelligence[Intelligence Methods]
|
||||
|
||||
Intelligence --> Sleeper[isSleeperLot<br/>High followers, low bid]
|
||||
Intelligence --> Bargain[isBelowEstimate<br/>Price < estimate]
|
||||
Intelligence --> Popular[getPopularityLevel<br/>Watch count tiers]
|
||||
Intelligence --> Cost[calculateTotalCost<br/>Bid + VAT + Premium]
|
||||
Intelligence --> NextBid[calculateNextBid<br/>API increment]
|
||||
end
|
||||
|
||||
P2_Entry --> API_Layer
|
||||
|
||||
subgraph API["PHASE 3: REST API ENDPOINTS"]
|
||||
API_Layer[AuctionMonitorResource] --> E1[/intelligence/sleepers]
|
||||
API_Layer --> E2[/intelligence/bargains]
|
||||
API_Layer --> E3[/intelligence/popular]
|
||||
API_Layer --> E4[/intelligence/price-analysis]
|
||||
API_Layer --> E5[/lots/:id/intelligence]
|
||||
API_Layer --> E6[/charts/watch-distribution]
|
||||
end
|
||||
|
||||
E1 & E2 & E3 & E4 & E5 & E6 --> Dashboard
|
||||
|
||||
subgraph UI["PHASE 4: INTELLIGENCE DASHBOARD"]
|
||||
Dashboard[index.html] --> Widget1[Sleeper Lots Widget<br/>Opportunities]
|
||||
Dashboard --> Widget2[Bargain Lots Widget<br/>Below Estimate]
|
||||
Dashboard --> Widget3[Popular Lots Widget<br/>High Competition]
|
||||
Dashboard --> Table[Enhanced Table<br/>Watchers | Est. Range | Total Cost]
|
||||
|
||||
Table --> Badges[Smart Badges:<br/>DEAL | Watch Count | Time Left]
|
||||
end
|
||||
|
||||
Widget1 --> UserAction
|
||||
Widget2 --> UserAction
|
||||
Widget3 --> UserAction
|
||||
Table --> UserAction
|
||||
|
||||
subgraph Actions["PHASE 5: USER ACTIONS"]
|
||||
UserAction[User Decision] --> Bid[Place Strategic Bid]
|
||||
UserAction --> Monitor[Add to Watchlist]
|
||||
UserAction --> Research[Research Comparables]
|
||||
UserAction --> Calculate[Budget Calculator]
|
||||
end
|
||||
```
|
||||
|
||||
## Key Intelligence Features
|
||||
|
||||
### 1. Follower/Watch Count Analytics
|
||||
- **Data Source**: `followersCount` from GraphQL API
|
||||
- **Intelligence Value**:
|
||||
- Predict lot popularity before bidding wars
|
||||
- Calculate interest-to-bid conversion rates
|
||||
- Identify "sleeper" lots (high followers, low bids)
|
||||
- Alert on sudden interest spikes
|
||||
|
||||
### 2. Price vs Estimate Analysis
|
||||
- **Data Source**: `estimatedMin`, `estimatedMax` from GraphQL API
|
||||
- **Intelligence Value**:
|
||||
- Identify bargains: `currentBid < estimatedMin`
|
||||
- Identify overvalued: `currentBid > estimatedMax`
|
||||
- Build pricing models per category
|
||||
- Track auction house estimate accuracy
|
||||
|
||||
### 3. True Cost Calculator
|
||||
- **Data Source**: `vat`, `buyerPremiumPercentage` from GraphQL API
|
||||
- **Intelligence Value**:
|
||||
- Calculate total cost: `bid × (1 + VAT/100) × (1 + premium/100)`
|
||||
- Budget planning with accurate all-in costs
|
||||
- Compare true costs across lots
|
||||
- Prevent bidding surprises
|
||||
|
||||
### 4. Exact Bid Increment
|
||||
- **Data Source**: `nextBidStepInCents` from GraphQL API
|
||||
- **Intelligence Value**:
|
||||
- Show exact next bid amount
|
||||
- No calculation errors
|
||||
- Better UX for bidding recommendations
|
||||
- Strategic bid placement
|
||||
|
||||
### 5. Structured Location & Category
|
||||
- **Data Source**: `cityLocation`, `countryCode`, `categoryPath` from GraphQL API
|
||||
- **Intelligence Value**:
|
||||
- Filter by distance from user
|
||||
- Calculate pickup logistics costs
|
||||
- Category-based analytics
|
||||
- Regional pricing trends
|
||||
|
||||
## Integration Hooks & Timing
|
||||
|
||||
| Event | Frequency | Trigger | Notification Type | User Action Required |
|
||||
|--------------------------------|-------------------|----------------------------|----------------------------|------------------------|
|
||||
| **Sleeper lot detected** | On data refresh | followers > 10, bid < €100 | Desktop + Email | Review opportunity |
|
||||
| **Bargain detected** | On data refresh | bid < estimatedMin | Desktop + Email | Consider bidding |
|
||||
| **High competition** | On data refresh | followers > 50 | Desktop | Review strategy |
|
||||
| **Bid change detected** | Every 1 hour | Monitor detects higher bid | Desktop + Email | Place counter-bid? |
|
||||
| **Closing soon (< 30 min)** | When detected | Time-based check | Desktop + Email | Review lot |
|
||||
| **Closing imminent (< 5 min)** | When detected | Time-based check | Desktop + Email (HIGH) | Final bid decision |
|
||||
| **Object detected** | On image process | YOLO finds objects | Desktop + Email | Confirm identification |
|
||||
| **True cost calculated** | On page load | User views lot | Dashboard display | Budget confirmation |
|
||||
|
||||
## API Endpoints Reference
|
||||
|
||||
### Intelligence Endpoints
|
||||
- `GET /api/monitor/intelligence/sleepers` - Returns high-interest, low-bid lots
|
||||
- `GET /api/monitor/intelligence/bargains` - Returns lots priced below estimate
|
||||
- `GET /api/monitor/intelligence/popular?level={HIGH|MEDIUM|LOW}` - Returns lots by popularity
|
||||
- `GET /api/monitor/intelligence/price-analysis` - Returns price vs estimate statistics
|
||||
- `GET /api/monitor/lots/{lotId}/intelligence` - Returns detailed intelligence for specific lot
|
||||
|
||||
### Chart Endpoints
|
||||
- `GET /api/monitor/charts/watch-distribution` - Returns follower count distribution
|
||||
- `GET /api/monitor/charts/country-distribution` - Returns geographic distribution
|
||||
- `GET /api/monitor/charts/category-distribution` - Returns category distribution
|
||||
- `GET /api/monitor/charts/bidding-trend?hours=24` - Returns time series data
|
||||
|
||||
## Dashboard Intelligence Widgets
|
||||
|
||||
### Sleeper Lots Widget
|
||||
- **Color**: Purple gradient
|
||||
- **Icon**: Eye (fa-eye)
|
||||
- **Metric**: Count of lots with followers > 10 and bid < €100
|
||||
- **Action**: Click to filter table to sleeper lots only
|
||||
|
||||
### Bargain Lots Widget
|
||||
- **Color**: Green gradient
|
||||
- **Icon**: Tag (fa-tag)
|
||||
- **Metric**: Count of lots where current bid < estimated minimum
|
||||
- **Action**: Click to filter table to bargain lots only
|
||||
|
||||
### Popular/Hot Lots Widget
|
||||
- **Color**: Orange gradient
|
||||
- **Icon**: Fire (fa-fire)
|
||||
- **Metric**: Count of lots with followers > 20
|
||||
- **Action**: Click to filter table to popular lots only
|
||||
|
||||
## Enhanced Table Features
|
||||
|
||||
### New Columns
|
||||
1. **Watchers** - Shows follower count with color-coded badges:
|
||||
- 50+ followers: Red (high competition)
|
||||
- 21-50 followers: Orange (medium competition)
|
||||
- 6-20 followers: Blue (some interest)
|
||||
- 0-5 followers: Gray (minimal interest)
|
||||
|
||||
2. **Est. Range** - Shows auction house estimate: `€min-€max`
|
||||
- Displays "DEAL" badge if current bid < estimate
|
||||
|
||||
3. **Total Cost** - Shows true cost including VAT and buyer premium:
|
||||
- Hover tooltip shows breakdown: `Including VAT (21%) + Premium (10%)`
|
||||
|
||||
### Smart Indicators
|
||||
- **DEAL Badge**: Green badge when `currentBid < estimatedMin`
|
||||
- **Watch Count Badge**: Color-coded by competition level
|
||||
- **Urgency Badge**: Time-based coloring (< 10 min = red)
|
||||
|
||||
## Technical Implementation
|
||||
|
||||
### Backend (Java)
|
||||
- **File**: `src/main/java/auctiora/Lot.java`
|
||||
- Added 24 new fields from GraphQL API
|
||||
- Added 9 intelligence calculation methods
|
||||
- Immutable record with Lombok `@With` annotation
|
||||
|
||||
- **File**: `src/main/java/auctiora/AuctionMonitorResource.java`
|
||||
- Added 6 new REST API endpoints
|
||||
- Enhanced insights with sleeper/bargain/popular detection
|
||||
- Added watch distribution chart endpoint
|
||||
|
||||
### Frontend (HTML/JavaScript)
|
||||
- **File**: `src/main/resources/META-INF/resources/index.html`
|
||||
- Added 3 intelligence widgets with click handlers
|
||||
- Enhanced closing soon table with 3 new columns
|
||||
- Added `fetchIntelligenceData()` function
|
||||
- Added smart badges and color coding
|
||||
- Added total cost calculator display
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
1. **Bid History Table** - Track bid changes over time
|
||||
2. **Comparative Analytics** - Compare similar lots across auctions
|
||||
3. **Machine Learning** - Predict final hammer price based on patterns
|
||||
4. **Geographic Filtering** - Distance-based sorting and filtering
|
||||
5. **Email Alerts** - Custom alerts for sleepers, bargains, etc.
|
||||
6. **Mobile App** - Push notifications for time-critical events
|
||||
7. **Bid Automation** - Auto-bid up to maximum with increment logic
|
||||
|
||||
---
|
||||
|
||||
**Last Updated**: December 2025
|
||||
**Version**: 2.1
|
||||
**Author**: Auctiora Intelligence Team
|
||||
650
docs/QUARKUS_GUIDE.md
Normal file
650
docs/QUARKUS_GUIDE.md
Normal file
@@ -0,0 +1,650 @@
|
||||
# Quarkus Auction Monitor - Complete Guide
|
||||
|
||||
## 🚀 Overview
|
||||
|
||||
The Troostwijk Auction Monitor now runs on **Quarkus**, a Kubernetes-native Java framework optimized for fast startup and low memory footprint.
|
||||
|
||||
### Key Features
|
||||
|
||||
✅ **Quarkus Scheduler** - Built-in cron-based scheduling
|
||||
✅ **REST API** - Control and monitor via HTTP endpoints
|
||||
✅ **Health Checks** - Kubernetes-ready liveness/readiness probes
|
||||
✅ **CDI/Dependency Injection** - Type-safe service management
|
||||
✅ **Fast Startup** - 0.5s startup time
|
||||
✅ **Low Memory** - ~50MB RSS memory footprint
|
||||
✅ **Hot Reload** - Development mode with live coding
|
||||
|
||||
---
|
||||
|
||||
## 📦 Quick Start
|
||||
|
||||
### Option 1: Run with Maven (Development)
|
||||
|
||||
```bash
|
||||
# Start in dev mode with live reload
|
||||
mvn quarkus:dev
|
||||
|
||||
# Access application
|
||||
# API: http://localhost:8081/api/monitor/status
|
||||
# Health: http://localhost:8081/health
|
||||
```
|
||||
|
||||
### Option 2: Build and Run JAR
|
||||
|
||||
```bash
|
||||
# Build
|
||||
mvn clean package
|
||||
|
||||
# Run
|
||||
java -jar target/quarkus-app/quarkus-run.jar
|
||||
|
||||
# Or use fast-jar (recommended for production)
|
||||
mvn clean package -Dquarkus.package.jar.type=fast-jar
|
||||
java -jar target/quarkus-app/quarkus-run.jar
|
||||
```
|
||||
|
||||
### Option 3: Docker
|
||||
|
||||
```bash
|
||||
# Build image
|
||||
docker build -t auction-monitor:latest .
|
||||
|
||||
# Run container
|
||||
docker run -p 8081:8081 \
|
||||
-v $(pwd)/data:/mnt/okcomputer/output \
|
||||
auction-monitor:latest
|
||||
```
|
||||
|
||||
### Option 4: Docker Compose (Recommended)
|
||||
|
||||
```bash
|
||||
# Start services
|
||||
docker-compose up -d
|
||||
|
||||
# View logs
|
||||
docker-compose logs -f
|
||||
|
||||
# Stop services
|
||||
docker-compose down
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔧 Configuration
|
||||
|
||||
### application.properties
|
||||
|
||||
All configuration is in `src/main/resources/application.properties`:
|
||||
|
||||
```properties
|
||||
# Database
|
||||
auction.database.path=C:\\mnt\\okcomputer\\output\\cache.db
|
||||
auction.images.path=C:\\mnt\\okcomputer\\output\\images
|
||||
|
||||
# Notifications
|
||||
auction.notification.config=desktop
|
||||
# Or for email: smtp:your@gmail.com:app_password:recipient@example.com
|
||||
|
||||
# YOLO Models (optional)
|
||||
auction.yolo.config=models/yolov4.cfg
|
||||
auction.yolo.weights=models/yolov4.weights
|
||||
auction.yolo.classes=models/coco.names
|
||||
|
||||
# Workflow Schedules (cron expressions)
|
||||
auction.workflow.scraper-import.cron=0 */30 * * * ? # Every 30 min
|
||||
auction.workflow.image-processing.cron=0 0 * * * ? # Every 1 hour
|
||||
auction.workflow.bid-monitoring.cron=0 */15 * * * ? # Every 15 min
|
||||
auction.workflow.closing-alerts.cron=0 */5 * * * ? # Every 5 min
|
||||
|
||||
# HTTP Server
|
||||
quarkus.http.port=8081
|
||||
quarkus.http.host=0.0.0.0
|
||||
```
|
||||
|
||||
### Environment Variables
|
||||
|
||||
Override configuration with environment variables:
|
||||
|
||||
```bash
|
||||
export AUCTION_DATABASE_PATH=/path/to/cache.db
|
||||
export AUCTION_NOTIFICATION_CONFIG=desktop
|
||||
export QUARKUS_HTTP_PORT=8081
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📅 Scheduled Workflows
|
||||
|
||||
Quarkus automatically runs these workflows based on cron expressions:
|
||||
|
||||
| Workflow | Schedule | Cron Expression | Description |
|
||||
|----------|----------|-----------------|-------------|
|
||||
| **Scraper Import** | Every 30 min | `0 */30 * * * ?` | Import auctions/lots from external scraper |
|
||||
| **Image Processing** | Every 1 hour | `0 0 * * * ?` | Download images & run object detection |
|
||||
| **Bid Monitoring** | Every 15 min | `0 */15 * * * ?` | Check for bid changes |
|
||||
| **Closing Alerts** | Every 5 min | `0 */5 * * * ?` | Send alerts for lots closing soon |
|
||||
|
||||
### Cron Expression Format
|
||||
|
||||
```
|
||||
┌───────────── second (0-59)
|
||||
│ ┌───────────── minute (0-59)
|
||||
│ │ ┌───────────── hour (0-23)
|
||||
│ │ │ ┌───────────── day of month (1-31)
|
||||
│ │ │ │ ┌───────────── month (1-12)
|
||||
│ │ │ │ │ ┌───────────── day of week (0-6, Sunday=0)
|
||||
│ │ │ │ │ │
|
||||
0 */30 * * * ? = Every 30 minutes
|
||||
0 0 * * * ? = Every hour at minute 0
|
||||
0 0 0 * * ? = Every day at midnight
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🌐 REST API
|
||||
|
||||
### Base URL
|
||||
```
|
||||
http://localhost:8081/api/monitor
|
||||
```
|
||||
|
||||
### Endpoints
|
||||
|
||||
#### 1. Get Status
|
||||
```bash
|
||||
GET /api/monitor/status
|
||||
|
||||
# Example
|
||||
curl http://localhost:8081/api/monitor/status
|
||||
|
||||
# Response
|
||||
{
|
||||
"running": true,
|
||||
"auctions": 25,
|
||||
"lots": 150,
|
||||
"images": 300,
|
||||
"closingSoon": 5
|
||||
}
|
||||
```
|
||||
|
||||
#### 2. Get Statistics
|
||||
```bash
|
||||
GET /api/monitor/statistics
|
||||
|
||||
# Example
|
||||
curl http://localhost:8081/api/monitor/statistics
|
||||
|
||||
# Response
|
||||
{
|
||||
"totalAuctions": 25,
|
||||
"totalLots": 150,
|
||||
"totalImages": 300,
|
||||
"activeLots": 120,
|
||||
"lotsWithBids": 80,
|
||||
"totalBidValue": "€125,450.00",
|
||||
"averageBid": "€1,568.13"
|
||||
}
|
||||
```
|
||||
|
||||
#### 3. Trigger Workflows Manually
|
||||
|
||||
```bash
|
||||
# Scraper Import
|
||||
POST /api/monitor/trigger/scraper-import
|
||||
curl -X POST http://localhost:8081/api/monitor/trigger/scraper-import
|
||||
|
||||
# Image Processing
|
||||
POST /api/monitor/trigger/image-processing
|
||||
curl -X POST http://localhost:8081/api/monitor/trigger/image-processing
|
||||
|
||||
# Bid Monitoring
|
||||
POST /api/monitor/trigger/bid-monitoring
|
||||
curl -X POST http://localhost:8081/api/monitor/trigger/bid-monitoring
|
||||
|
||||
# Closing Alerts
|
||||
POST /api/monitor/trigger/closing-alerts
|
||||
curl -X POST http://localhost:8081/api/monitor/trigger/closing-alerts
|
||||
```
|
||||
|
||||
#### 4. Get Auctions
|
||||
```bash
|
||||
# All auctions
|
||||
GET /api/monitor/auctions
|
||||
curl http://localhost:8081/api/monitor/auctions
|
||||
|
||||
# Filter by country
|
||||
GET /api/monitor/auctions?country=NL
|
||||
curl http://localhost:8081/api/monitor/auctions?country=NL
|
||||
```
|
||||
|
||||
#### 5. Get Lots
|
||||
```bash
|
||||
# Active lots
|
||||
GET /api/monitor/lots
|
||||
curl http://localhost:8081/api/monitor/lots
|
||||
|
||||
# Lots closing soon (within 30 minutes by default)
|
||||
GET /api/monitor/lots/closing-soon
|
||||
curl http://localhost:8081/api/monitor/lots/closing-soon
|
||||
|
||||
# Custom minutes threshold
|
||||
GET /api/monitor/lots/closing-soon?minutes=60
|
||||
curl http://localhost:8081/api/monitor/lots/closing-soon?minutes=60
|
||||
```
|
||||
|
||||
#### 6. Get Lot Images
|
||||
```bash
|
||||
GET /api/monitor/lots/{lotId}/images
|
||||
|
||||
# Example
|
||||
curl http://localhost:8081/api/monitor/lots/12345/images
|
||||
```
|
||||
|
||||
#### 7. Test Notification
|
||||
```bash
|
||||
POST /api/monitor/test-notification
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"message": "Test message",
|
||||
"title": "Test Title",
|
||||
"priority": "0"
|
||||
}
|
||||
|
||||
# Example
|
||||
curl -X POST http://localhost:8081/api/monitor/test-notification \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"message":"Test notification","title":"Test","priority":"0"}'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🏥 Health Checks
|
||||
|
||||
Quarkus provides built-in health checks for Kubernetes/Docker:
|
||||
|
||||
### Liveness Probe
|
||||
```bash
|
||||
GET /health/live
|
||||
|
||||
# Example
|
||||
curl http://localhost:8081/health/live
|
||||
|
||||
# Response
|
||||
{
|
||||
"status": "UP",
|
||||
"checks": [
|
||||
{
|
||||
"name": "Auction Monitor is alive",
|
||||
"status": "UP"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Readiness Probe
|
||||
```bash
|
||||
GET /health/ready
|
||||
|
||||
# Example
|
||||
curl http://localhost:8081/health/ready
|
||||
|
||||
# Response
|
||||
{
|
||||
"status": "UP",
|
||||
"checks": [
|
||||
{
|
||||
"name": "database",
|
||||
"status": "UP",
|
||||
"data": {
|
||||
"auctions": 25
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Startup Probe
|
||||
```bash
|
||||
GET /health/started
|
||||
|
||||
# Example
|
||||
curl http://localhost:8081/health/started
|
||||
```
|
||||
|
||||
### Combined Health
|
||||
```bash
|
||||
GET /health
|
||||
|
||||
# Returns all health checks
|
||||
curl http://localhost:8081/health
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🐳 Docker Deployment
|
||||
|
||||
### Build Image
|
||||
|
||||
```bash
|
||||
docker build -t auction-monitor:1.0 .
|
||||
```
|
||||
|
||||
### Run Container
|
||||
|
||||
```bash
|
||||
docker run -d \
|
||||
--name auction-monitor \
|
||||
-p 8081:8081 \
|
||||
-v $(pwd)/data:/mnt/okcomputer/output \
|
||||
-e AUCTION_NOTIFICATION_CONFIG=desktop \
|
||||
auction-monitor:1.0
|
||||
```
|
||||
|
||||
### Docker Compose
|
||||
|
||||
```yaml
|
||||
version: '3.8'
|
||||
services:
|
||||
auction-monitor:
|
||||
image: auction-monitor:1.0
|
||||
ports:
|
||||
- "8081:8081"
|
||||
volumes:
|
||||
- ./data:/mnt/okcomputer/output
|
||||
environment:
|
||||
- AUCTION_DATABASE_PATH=/mnt/okcomputer/output/cache.db
|
||||
- AUCTION_NOTIFICATION_CONFIG=desktop
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--spider", "http://localhost:8081/health/live"]
|
||||
interval: 30s
|
||||
timeout: 3s
|
||||
retries: 3
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## ☸️ Kubernetes Deployment
|
||||
|
||||
### deployment.yaml
|
||||
|
||||
```yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: auction-monitor
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: auction-monitor
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: auction-monitor
|
||||
spec:
|
||||
containers:
|
||||
- name: auction-monitor
|
||||
image: auction-monitor:1.0
|
||||
ports:
|
||||
- containerPort: 8081
|
||||
env:
|
||||
- name: AUCTION_DATABASE_PATH
|
||||
value: /data/cache.db
|
||||
- name: QUARKUS_HTTP_PORT
|
||||
value: "8081"
|
||||
volumeMounts:
|
||||
- name: data
|
||||
mountPath: /mnt/okcomputer/output
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health/live
|
||||
port: 8081
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 30
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health/ready
|
||||
port: 8081
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
startupProbe:
|
||||
httpGet:
|
||||
path: /health/started
|
||||
port: 8081
|
||||
failureThreshold: 30
|
||||
periodSeconds: 10
|
||||
volumes:
|
||||
- name: data
|
||||
persistentVolumeClaim:
|
||||
claimName: auction-data-pvc
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: auction-monitor
|
||||
spec:
|
||||
selector:
|
||||
app: auction-monitor
|
||||
ports:
|
||||
- port: 8081
|
||||
targetPort: 8081
|
||||
type: LoadBalancer
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔄 Development Mode
|
||||
|
||||
Quarkus dev mode provides live reload for rapid development:
|
||||
|
||||
```bash
|
||||
# Start dev mode
|
||||
mvn quarkus:dev
|
||||
|
||||
# Features available:
|
||||
# - Live reload (no restart needed)
|
||||
# - Dev UI: http://localhost:8081/q/dev/
|
||||
# - Continuous testing
|
||||
# - Debug on port 5005
|
||||
```
|
||||
|
||||
### Dev UI
|
||||
|
||||
Access at: `http://localhost:8081/q/dev/`
|
||||
|
||||
Features:
|
||||
- Configuration editor
|
||||
- Scheduler dashboard
|
||||
- Health checks
|
||||
- REST endpoints explorer
|
||||
- Continuous testing
|
||||
|
||||
---
|
||||
|
||||
## 🧪 Testing
|
||||
|
||||
### Run All Tests
|
||||
```bash
|
||||
mvn test
|
||||
```
|
||||
|
||||
### Run Quarkus Tests
|
||||
```bash
|
||||
mvn test -Dtest=*QuarkusTest
|
||||
```
|
||||
|
||||
### Integration Test with Running Application
|
||||
```bash
|
||||
# Terminal 1: Start application
|
||||
mvn quarkus:dev
|
||||
|
||||
# Terminal 2: Run integration tests
|
||||
curl http://localhost:8081/api/monitor/status
|
||||
curl http://localhost:8081/health/live
|
||||
curl -X POST http://localhost:8081/api/monitor/trigger/scraper-import
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📊 Monitoring & Logging
|
||||
|
||||
### View Logs
|
||||
|
||||
```bash
|
||||
# Docker
|
||||
docker logs -f auction-monitor
|
||||
|
||||
# Docker Compose
|
||||
docker-compose logs -f
|
||||
|
||||
# Kubernetes
|
||||
kubectl logs -f deployment/auction-monitor
|
||||
```
|
||||
|
||||
### Log Levels
|
||||
|
||||
Configure in `application.properties`:
|
||||
|
||||
```properties
|
||||
# Production
|
||||
quarkus.log.console.level=INFO
|
||||
|
||||
# Development
|
||||
%dev.quarkus.log.console.level=DEBUG
|
||||
|
||||
# Specific logger
|
||||
quarkus.log.category."com.auction".level=DEBUG
|
||||
```
|
||||
|
||||
### Scheduled Job Logs
|
||||
|
||||
```
|
||||
14:30:00 INFO [com.auc.Qua] (executor-thread-1) 📥 [WORKFLOW 1] Importing scraper data...
|
||||
14:30:00 INFO [com.auc.Qua] (executor-thread-1) → Imported 5 auctions
|
||||
14:30:00 INFO [com.auc.Qua] (executor-thread-1) → Imported 25 lots
|
||||
14:30:00 INFO [com.auc.Qua] (executor-thread-1) ✓ Scraper import completed in 1250ms
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## ⚙️ Performance
|
||||
|
||||
### Startup Time
|
||||
- **JVM Mode**: ~0.5 seconds
|
||||
- **Native Image**: ~0.014 seconds
|
||||
|
||||
### Memory Footprint
|
||||
- **JVM Mode**: ~50MB RSS
|
||||
- **Native Image**: ~15MB RSS
|
||||
|
||||
### Build Native Image (Optional)
|
||||
|
||||
```bash
|
||||
# Requires GraalVM
|
||||
mvn package -Pnative
|
||||
|
||||
# Run native executable
|
||||
./target/troostwijk-scraper-1.0-SNAPSHOT-runner
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔐 Security
|
||||
|
||||
### Environment Variables for Secrets
|
||||
|
||||
```bash
|
||||
# Don't commit credentials!
|
||||
export AUCTION_NOTIFICATION_CONFIG=smtp:user@gmail.com:SECRET_PASSWORD:recipient@example.com
|
||||
|
||||
# Or use Kubernetes secrets
|
||||
kubectl create secret generic auction-secrets \
|
||||
--from-literal=notification-config='smtp:user@gmail.com:password:recipient@example.com'
|
||||
```
|
||||
|
||||
### Kubernetes Secret
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: auction-secrets
|
||||
type: Opaque
|
||||
stringData:
|
||||
notification-config: smtp:user@gmail.com:app_password:recipient@example.com
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🛠️ Troubleshooting
|
||||
|
||||
### Issue: Schedulers not running
|
||||
|
||||
**Check scheduler status:**
|
||||
```bash
|
||||
curl http://localhost:8081/health/ready
|
||||
```
|
||||
|
||||
**Enable debug logging:**
|
||||
```properties
|
||||
quarkus.log.category."io.quarkus.scheduler".level=DEBUG
|
||||
```
|
||||
|
||||
### Issue: Database not found
|
||||
|
||||
**Check file permissions:**
|
||||
```bash
|
||||
ls -la C:/mnt/okcomputer/output/cache.db
|
||||
```
|
||||
|
||||
**Create directory:**
|
||||
```bash
|
||||
mkdir -p C:/mnt/okcomputer/output
|
||||
```
|
||||
|
||||
### Issue: Port 8081 already in use
|
||||
|
||||
**Change port:**
|
||||
```bash
|
||||
mvn quarkus:dev -Dquarkus.http.port=8082
|
||||
# Or
|
||||
export QUARKUS_HTTP_PORT=8082
|
||||
```
|
||||
|
||||
### Issue: Health check failing
|
||||
|
||||
**Check application logs:**
|
||||
```bash
|
||||
docker logs auction-monitor
|
||||
```
|
||||
|
||||
**Verify database connection:**
|
||||
```bash
|
||||
curl http://localhost:8081/health/ready
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📚 Additional Resources
|
||||
|
||||
- [Quarkus Official Guide](https://quarkus.io/guides/)
|
||||
- [Quarkus Scheduler](https://quarkus.io/guides/scheduler)
|
||||
- [Quarkus REST](https://quarkus.io/guides/rest)
|
||||
- [Quarkus Health](https://quarkus.io/guides/smallrye-health)
|
||||
- [Quarkus Docker](https://quarkus.io/guides/container-image)
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
✅ **Quarkus Framework** integrated for modern Java development
|
||||
✅ **CDI/Dependency Injection** for clean architecture
|
||||
✅ **@Scheduled** annotations for cron-based workflows
|
||||
✅ **REST API** for control and monitoring
|
||||
✅ **Health Checks** for Kubernetes/Docker
|
||||
✅ **Fast Startup** and low memory footprint
|
||||
✅ **Docker/Kubernetes** ready
|
||||
✅ **Production** optimized
|
||||
|
||||
**Run and enjoy! 🎉**
|
||||
209
docs/RATE_LIMITING.md
Normal file
209
docs/RATE_LIMITING.md
Normal file
@@ -0,0 +1,209 @@
|
||||
# HTTP Rate Limiting
|
||||
|
||||
## Overview
|
||||
|
||||
The Troostwijk Scraper implements **per-host HTTP rate limiting** to prevent overloading external services (especially Troostwijk APIs) and avoid getting blocked.
|
||||
|
||||
## Features
|
||||
|
||||
- ✅ **Per-host rate limiting** - Different limits for different hosts
|
||||
- ✅ **Token bucket algorithm** - Allows burst traffic while maintaining steady rate
|
||||
- ✅ **Automatic host detection** - Extracts host from URL automatically
|
||||
- ✅ **Request statistics** - Tracks success/failure/rate-limited requests
|
||||
- ✅ **Thread-safe** - Uses semaphores for concurrent request handling
|
||||
- ✅ **Configurable** - Via `application.properties`
|
||||
|
||||
## Configuration
|
||||
|
||||
Edit `src/main/resources/application.properties`:
|
||||
|
||||
```properties
|
||||
# Default rate limit for all hosts (requests per second)
|
||||
auction.http.rate-limit.default-max-rps=2
|
||||
|
||||
# Troostwijk-specific rate limit (requests per second)
|
||||
auction.http.rate-limit.troostwijk-max-rps=1
|
||||
|
||||
# HTTP request timeout (seconds)
|
||||
auction.http.timeout-seconds=30
|
||||
```
|
||||
|
||||
### Recommended Settings
|
||||
|
||||
| Service | Max RPS | Reason |
|
||||
|---------|---------|--------|
|
||||
| `troostwijkauctions.com` | **1 req/s** | Prevent blocking by Troostwijk |
|
||||
| Other image hosts | **2 req/s** | Balance speed and politeness |
|
||||
|
||||
## Usage
|
||||
|
||||
The `RateLimitedHttpClient` is automatically injected into services that make HTTP requests:
|
||||
|
||||
```java
|
||||
@Inject
|
||||
RateLimitedHttpClient httpClient;
|
||||
|
||||
// GET request for text
|
||||
HttpResponse<String> response = httpClient.sendGet(url);
|
||||
|
||||
// GET request for binary data (images)
|
||||
HttpResponse<byte[]> response = httpClient.sendGetBytes(imageUrl);
|
||||
```
|
||||
|
||||
### Integrated Services
|
||||
|
||||
1. **TroostwijkMonitor** - API calls for bid monitoring
|
||||
2. **ImageProcessingService** - Image downloads
|
||||
3. **QuarkusWorkflowScheduler** - Scheduled workflows
|
||||
|
||||
## Monitoring
|
||||
|
||||
### REST API Endpoints
|
||||
|
||||
#### Get All Rate Limit Statistics
|
||||
```bash
|
||||
GET http://localhost:8081/api/monitor/rate-limit/stats
|
||||
```
|
||||
|
||||
Response:
|
||||
```json
|
||||
{
|
||||
"hosts": 2,
|
||||
"statistics": {
|
||||
"api.troostwijkauctions.com": {
|
||||
"totalRequests": 150,
|
||||
"successfulRequests": 148,
|
||||
"failedRequests": 1,
|
||||
"rateLimitedRequests": 0,
|
||||
"averageDurationMs": 245
|
||||
},
|
||||
"images.troostwijkauctions.com": {
|
||||
"totalRequests": 320,
|
||||
"successfulRequests": 315,
|
||||
"failedRequests": 5,
|
||||
"rateLimitedRequests": 2,
|
||||
"averageDurationMs": 892
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Get Statistics for Specific Host
|
||||
```bash
|
||||
GET http://localhost:8081/api/monitor/rate-limit/stats/api.troostwijkauctions.com
|
||||
```
|
||||
|
||||
Response:
|
||||
```json
|
||||
{
|
||||
"host": "api.troostwijkauctions.com",
|
||||
"totalRequests": 150,
|
||||
"successfulRequests": 148,
|
||||
"failedRequests": 1,
|
||||
"rateLimitedRequests": 0,
|
||||
"averageDurationMs": 245
|
||||
}
|
||||
```
|
||||
|
||||
## How It Works
|
||||
|
||||
### Token Bucket Algorithm
|
||||
|
||||
1. **Bucket initialization** - Starts with `maxRequestsPerSecond` tokens
|
||||
2. **Request consumption** - Each request consumes 1 token
|
||||
3. **Token refill** - Bucket refills every second
|
||||
4. **Blocking** - If no tokens available, request waits
|
||||
|
||||
### Per-Host Rate Limiting
|
||||
|
||||
The client automatically:
|
||||
1. Extracts hostname from URL (e.g., `api.troostwijkauctions.com`)
|
||||
2. Creates/retrieves rate limiter for that host
|
||||
3. Applies configured limit (Troostwijk-specific or default)
|
||||
4. Tracks statistics per host
|
||||
|
||||
### Request Flow
|
||||
|
||||
```
|
||||
Request → Extract Host → Get Rate Limiter → Acquire Token → Send Request → Record Stats
|
||||
↓
|
||||
troostwijkauctions.com?
|
||||
↓
|
||||
Yes: 1 req/s | No: 2 req/s
|
||||
```
|
||||
|
||||
## Warning Signs
|
||||
|
||||
Monitor for these indicators of rate limiting issues:
|
||||
|
||||
| Metric | Warning Threshold | Action |
|
||||
|--------|------------------|--------|
|
||||
| `rateLimitedRequests` | > 0 | Server is rate limiting you - reduce `max-rps` |
|
||||
| `failedRequests` | > 5% | Investigate connection issues or increase timeout |
|
||||
| `averageDurationMs` | > 3000ms | Server may be slow - reduce load |
|
||||
|
||||
## Testing
|
||||
|
||||
### Manual Test via cURL
|
||||
|
||||
```bash
|
||||
# Test Troostwijk API rate limiting
|
||||
for i in {1..10}; do
|
||||
echo "Request $i at $(date +%T)"
|
||||
curl -s http://localhost:8081/api/monitor/status > /dev/null
|
||||
sleep 0.5
|
||||
done
|
||||
|
||||
# Check statistics
|
||||
curl http://localhost:8081/api/monitor/rate-limit/stats | jq
|
||||
```
|
||||
|
||||
### Check Logs
|
||||
|
||||
Rate limiting is logged at DEBUG level:
|
||||
|
||||
```
|
||||
03:15:23 DEBUG [RateLimitedHttpClient] HTTP 200 GET api.troostwijkauctions.com (245ms)
|
||||
03:15:24 DEBUG [RateLimitedHttpClient] HTTP 200 GET api.troostwijkauctions.com (251ms)
|
||||
03:15:25 WARN [RateLimitedHttpClient] ⚠️ Rate limited by api.troostwijkauctions.com (HTTP 429)
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Problem: Getting HTTP 429 (Too Many Requests)
|
||||
|
||||
**Solution:** Decrease `max-rps` for that host:
|
||||
```properties
|
||||
auction.http.rate-limit.troostwijk-max-rps=0.5
|
||||
```
|
||||
|
||||
### Problem: Requests too slow
|
||||
|
||||
**Solution:** Increase `max-rps` (be careful not to get blocked):
|
||||
```properties
|
||||
auction.http.rate-limit.default-max-rps=3
|
||||
```
|
||||
|
||||
### Problem: Requests timing out
|
||||
|
||||
**Solution:** Increase timeout:
|
||||
```properties
|
||||
auction.http.timeout-seconds=60
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Start conservative** - Begin with low limits (1 req/s)
|
||||
2. **Monitor statistics** - Watch `rateLimitedRequests` metric
|
||||
3. **Respect robots.txt** - Check host's crawling policy
|
||||
4. **Use off-peak hours** - Run heavy scraping during low-traffic times
|
||||
5. **Implement exponential backoff** - If receiving 429s, wait longer between retries
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
Potential improvements:
|
||||
- [ ] Dynamic rate adjustment based on 429 responses
|
||||
- [ ] Exponential backoff on failures
|
||||
- [ ] Per-endpoint rate limiting (not just per-host)
|
||||
- [ ] Request queue visualization
|
||||
- [ ] Integration with external rate limit APIs (e.g., Redis)
|
||||
304
docs/VALUATION.md
Normal file
304
docs/VALUATION.md
Normal file
@@ -0,0 +1,304 @@
|
||||
# Auction Valuation Mathematics - Technical Reference
|
||||
|
||||
## 1. Fair Market Value (FMV) - Core Valuation Formula
|
||||
|
||||
The baseline valuation is calculated using a **weighted comparable sales approach**:
|
||||
|
||||
$$
|
||||
FMV = \frac{\sum_{i=1}^{n} \left( P_i \cdot \omega_c \cdot \omega_t \cdot \omega_p \cdot \omega_h \right)}{\sum_{i=1}^{n} \left( \omega_c \cdot \omega_t \cdot \omega_p \cdot \omega_h \right)}
|
||||
$$
|
||||
|
||||
**Variables:**
|
||||
- $P_i$ = Final hammer price of comparable lot *i* (€)
|
||||
- $\omega_c$ = **Condition weight**: $\exp(-\lambda_c \cdot |C_{target} - C_i|)$
|
||||
- $\omega_t$ = **Time weight**: $\exp(-\lambda_t \cdot |T_{target} - T_i|)$
|
||||
- $\omega_p$ = **Provenance weight**: $1 + \delta_p \cdot (P_{target} - P_i)$
|
||||
- $\omega_h$ = **Historical weight**: $\left( \frac{1}{1 + e^{-kh \cdot (D_i - D_{median})}} \right)$
|
||||
|
||||
**Parameter Definitions:**
|
||||
- $C \in [0, 10]$ = Condition score (10 = perfect)
|
||||
- $T$ = Manufacturing year
|
||||
- $P \in \{0,1\}$ = Provenance flag (1 = documented history)
|
||||
- $D_i$ = Days since comparable sale
|
||||
- $\lambda_c = 0.693$ = Condition decay constant (50% weight at 1-point difference)
|
||||
- $\lambda_t = 0.048$ = Time decay constant (50% weight at 15-year difference)
|
||||
- $\delta_p = 0.15$ = Provenance premium coefficient
|
||||
- $kh = 0.01$ = Historical relevance coefficient
|
||||
|
||||
---
|
||||
|
||||
## 2. Condition Adjustment Multiplier
|
||||
|
||||
Normalizes prices across condition states:
|
||||
|
||||
$$
|
||||
M_{cond} = \exp\left( \alpha_c \cdot \sqrt{C_{target}} - \beta_c \right)
|
||||
$$
|
||||
|
||||
**Variables:**
|
||||
- $\alpha_c = 0.15$ = Condition sensitivity parameter
|
||||
- $\beta_c = 0.40$ = Baseline condition offset
|
||||
- $C_{target}$ = Target lot condition score
|
||||
|
||||
**Interpretation:**
|
||||
- $C = 10$ (mint): $M_{cond} = 1.48$ (48% premium over poor condition)
|
||||
- $C = 5$ (average): $M_{cond} = 0.91$
|
||||
|
||||
---
|
||||
|
||||
## 3. Time-Based Depreciation Model
|
||||
|
||||
For equipment/machinery with measurable lifespan:
|
||||
|
||||
$$
|
||||
V_{age} = V_{new} \cdot \left( 1 - \gamma \cdot \ln\left( 1 + \frac{Y_{current} - Y_{manu}}{Y_{expected}} \right) \right)
|
||||
$$
|
||||
|
||||
**Variables:**
|
||||
- $V_{new}$ = Original market value (€)
|
||||
- $\gamma = 0.25$ = Depreciation aggressivity factor
|
||||
- $Y_{current}$ = Current year
|
||||
- $Y_{manu}$ = Manufacturing year
|
||||
- $Y_{expected}$ = Expected useful life span (years)
|
||||
|
||||
**Example:** 10-year-old machinery with 25-year expected life retains 85% of value.
|
||||
|
||||
---
|
||||
|
||||
## 4. Provenance Premium Calculation
|
||||
|
||||
$$
|
||||
\Delta_{prov} = V_{base} \cdot \left( \eta_0 + \eta_1 \cdot \ln(1 + N_{docs}) \right)
|
||||
$$
|
||||
|
||||
**Variables:**
|
||||
- $V_{base}$ = Base valuation without provenance (€)
|
||||
- $N_{docs}$ = Number of verifiable provenance documents
|
||||
- $\eta_0 = 0.08$ = Base provenance premium (8%)
|
||||
- $\eta_1 = 0.035$ = Marginal document premium coefficient
|
||||
|
||||
---
|
||||
|
||||
## 5. Undervaluation Detection Score
|
||||
|
||||
Critical for identifying mispriced opportunities:
|
||||
|
||||
$$
|
||||
U_{score} = \frac{FMV - P_{current}}{FMV} \cdot \sigma_{market} \cdot \left( 1 + \frac{B_{velocity}}{B_{threshold}} \right) \cdot \ln\left( 1 + \frac{W_{watch}}{W_{bid}} \right)
|
||||
$$
|
||||
|
||||
**Variables:**
|
||||
- $P_{current}$ = Current bid price (€)
|
||||
- $\sigma_{market} \in [0,1]$ = Market volatility factor (from indices)
|
||||
- $B_{velocity}$ = Bids per hour (bph)
|
||||
- $B_{threshold} = 10$ bph = High-velocity threshold
|
||||
- $W_{watch}$ = Watch count
|
||||
- $W_{bid}$ = Bid count
|
||||
|
||||
**Trigger condition:** $U_{score} > 0.25$ (25% undervaluation) with confidence > 0.70
|
||||
|
||||
---
|
||||
|
||||
## 6. Bid Velocity Indicator (Competition Heat)
|
||||
|
||||
Measures real-time competitive intensity:
|
||||
|
||||
$$
|
||||
\Lambda_b(t) = \frac{dB}{dt} \cdot \exp\left( -\lambda_{cool} \cdot (t - t_{last}) \right)
|
||||
$$
|
||||
|
||||
**Variables:**
|
||||
- $\frac{dB}{dt}$ = Bid frequency derivative (bids/minute)
|
||||
- $\lambda_{cool} = 0.1$ = Cool-down decay constant
|
||||
- $t_{last}$ = Timestamp of last bid (minutes)
|
||||
|
||||
**Interpretation:**
|
||||
- $\Lambda_b > 5$ = **Hot lot** (bidding war likely)
|
||||
- $\Lambda_b < 0.5$ = **Cold lot** (potential sleeper)
|
||||
|
||||
---
|
||||
|
||||
## 7. Final Price Prediction Model
|
||||
|
||||
Composite machine learning-style formula:
|
||||
|
||||
$$
|
||||
\hat{P}_{final} = FMV \cdot \left( 1 + \epsilon_{bid} + \epsilon_{time} + \epsilon_{comp} \right)
|
||||
$$
|
||||
|
||||
**Error Components:**
|
||||
|
||||
- **Bid momentum error**:
|
||||
$$\epsilon_{bid} = \tanh\left( \phi_1 \cdot \Lambda_b - \phi_2 \cdot \frac{P_{current}}{FMV} \right)$$
|
||||
|
||||
- **Time-to-close error**:
|
||||
$$\epsilon_{time} = \psi \cdot \exp\left( -\frac{t_{close}}{30} \right)$$
|
||||
|
||||
- **Competition error**:
|
||||
$$\epsilon_{comp} = \rho \cdot \ln\left( 1 + \frac{W_{watch}}{50} \right)$$
|
||||
|
||||
**Parameters:**
|
||||
- $\phi_1 = 0.15$, $\phi_2 = 0.10$ = Bid momentum coefficients
|
||||
- $\psi = 0.20$ = Time pressure coefficient
|
||||
- $\rho = 0.08$ = Competition coefficient
|
||||
- $t_{close}$ = Minutes until close
|
||||
|
||||
**Confidence interval**:
|
||||
$$
|
||||
CI_{95\%} = \hat{P}_{final} \pm 1.96 \cdot \sigma_{residual}
|
||||
$$
|
||||
|
||||
---
|
||||
|
||||
## 8. Bidding Strategy Recommendation Engine
|
||||
|
||||
Optimal max bid and timing:
|
||||
|
||||
$$
|
||||
S_{max} =
|
||||
\begin{cases}
|
||||
FMV \cdot (1 - \theta_{agg}) & \text{if } U_{score} > 0.20 \\
|
||||
FMV \cdot (1 + \theta_{cons}) & \text{if } \Lambda_b > 3 \\
|
||||
\hat{P}_{final} - \delta_{margin} & \text{otherwise}
|
||||
\end{cases}
|
||||
$$
|
||||
|
||||
**Variables:**
|
||||
- $\theta_{agg} = 0.10$ = Aggressive buyer discount target (10% below FMV)
|
||||
- $\theta_{cons} = 0.05$ = Conservative buyer overbid tolerance
|
||||
- $\delta_{margin} = €50$ = Minimum margin below predicted final
|
||||
|
||||
**Timing function**:
|
||||
$$
|
||||
t_{optimal} = t_{close} - \begin{cases}
|
||||
5 \text{ min} & \text{if } \Lambda_b < 1 \\
|
||||
30 \text{ sec} & \text{if } \Lambda_b > 5 \\
|
||||
10 \text{ min} & \text{otherwise}
|
||||
\end{cases}
|
||||
$$
|
||||
|
||||
---
|
||||
|
||||
## Variable Reference Table
|
||||
|
||||
| Symbol | Variable | Unit | Data Source |
|
||||
|--------|----------|------|-------------|
|
||||
| $P_i$ | Comparable sale price | € | `bid_history.final` |
|
||||
| $C$ | Condition score | [0,10] | Image analysis + text parsing |
|
||||
| $T$ | Manufacturing year | Year | Lot description extraction |
|
||||
| $W_{watch}$ | Number of watchers | Count | Page metadata |
|
||||
| $\Lambda_b$ | Bid velocity | bids/min | `bid_history.timestamp` diff |
|
||||
| $t_{close}$ | Time until close | Minutes | `lots.closing_time` - NOW() |
|
||||
| $\sigma_{market}$ | Market volatility | [0,1] | `market_indices.price_change_30d` |
|
||||
| $N_{docs}$ | Provenance documents | Count | PDF link analysis |
|
||||
| $B_{velocity}$ | Bid acceleration | bph² | Second derivative of $\Lambda_b$ |
|
||||
|
||||
---
|
||||
|
||||
## Backend Implementation (Quarkus Pseudo-Code)
|
||||
|
||||
```java
|
||||
@Inject
|
||||
MLModelService mlModel;
|
||||
|
||||
public Valuation calculateFairMarketValue(Lot lot) {
|
||||
List<Comparable> comparables = db.findComparables(lot, minSimilarity=0.75, limit=20);
|
||||
|
||||
double weightedSum = 0.0;
|
||||
double weightSum = 0.0;
|
||||
|
||||
for (Comparable comp : comparables) {
|
||||
double wc = Math.exp(-0.693 * Math.abs(lot.getConditionScore() - comp.getConditionScore()));
|
||||
double wt = Math.exp(-0.048 * Math.abs(lot.getYear() - comp.getYear()));
|
||||
double wp = 1 + 0.15 * (lot.hasProvenance() ? 1 : 0 - comp.hasProvenance() ? 1 : 0);
|
||||
|
||||
double weight = wc * wt * wp;
|
||||
weightedSum += comp.getFinalPrice() * weight;
|
||||
weightSum += weight;
|
||||
}
|
||||
|
||||
double fm v = weightSum > 0 ? weightedSum / weightSum : lot.getEstimatedMin();
|
||||
|
||||
// Apply condition multiplier
|
||||
fm v *= Math.exp(0.15 * Math.sqrt(lot.getConditionScore()) - 0.40);
|
||||
|
||||
return new Valuation(fm v, calculateConfidence(comparables.size()));
|
||||
}
|
||||
|
||||
public BiddingStrategy getBiddingStrategy(String lotId) {
|
||||
var lot = db.getLot(lotId);
|
||||
var bidHistory = db.getBidHistory(lotId);
|
||||
var watchers = lot.getWatchCount();
|
||||
|
||||
// Analyze patterns
|
||||
boolean isSnipeTarget = watchers > 50 && bidHistory.size() < 5;
|
||||
boolean hasReserve = lot.getReservePrice() > 0;
|
||||
double bidVelocity = calculateBidVelocity(bidHistory);
|
||||
|
||||
// Strategy recommendation
|
||||
String strategy = isSnipeTarget ? "SNIPING_DETECTED" :
|
||||
(hasReserve && lot.getCurrentBid() < lot.getReservePrice() * 0.9) ? "RESERVE_AVOID" :
|
||||
bidVelocity > 5.0 ? "AGGRESSIVE_COMPETITION" : "STANDARD";
|
||||
|
||||
return new BiddingStrategy(
|
||||
strategy,
|
||||
calculateRecommendedMax(lot),
|
||||
isSnipeTarget ? "FINAL_30_SECONDS" : "FINAL_10_MINUTES",
|
||||
getCompetitionLevel(watchers, bidHistory.size())
|
||||
);
|
||||
}
|
||||
```
|
||||
```sqlite
|
||||
-- Core bidding intelligence
|
||||
ALTER TABLE lots ADD COLUMN starting_bid DECIMAL(12,2);
|
||||
ALTER TABLE lots ADD COLUMN estimated_min DECIMAL(12,2);
|
||||
ALTER TABLE lots ADD COLUMN estimated_max DECIMAL(12,2);
|
||||
ALTER TABLE lots ADD COLUMN reserve_price DECIMAL(12,2);
|
||||
ALTER TABLE lots ADD COLUMN watch_count INTEGER DEFAULT 0;
|
||||
ALTER TABLE lots ADD COLUMN first_bid_time TEXT;
|
||||
ALTER TABLE lots ADD COLUMN last_bid_time TEXT;
|
||||
ALTER TABLE lots ADD COLUMN bid_velocity DECIMAL(5,2);
|
||||
|
||||
-- Bid history (critical)
|
||||
CREATE TABLE bid_history (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
lot_id TEXT REFERENCES lots(lot_id),
|
||||
bid_amount DECIMAL(12,2) NOT NULL,
|
||||
bid_time TEXT NOT NULL,
|
||||
is_winning BOOLEAN DEFAULT FALSE,
|
||||
is_autobid BOOLEAN DEFAULT FALSE,
|
||||
bidder_id TEXT,
|
||||
created_at TEXT DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
-- Valuation support
|
||||
ALTER TABLE lots ADD COLUMN condition_score DECIMAL(3,2);
|
||||
ALTER TABLE lots ADD COLUMN year_manufactured INTEGER;
|
||||
ALTER TABLE lots ADD COLUMN provenance TEXT;
|
||||
|
||||
CREATE TABLE comparable_sales (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
lot_id TEXT REFERENCES lots(lot_id),
|
||||
comparable_lot_id TEXT,
|
||||
similarity_score DECIMAL(3,2),
|
||||
price_difference_percent DECIMAL(5,2)
|
||||
);
|
||||
|
||||
CREATE TABLE market_indices (
|
||||
category TEXT NOT NULL,
|
||||
manufacturer TEXT,
|
||||
avg_price DECIMAL(12,2),
|
||||
price_change_30d DECIMAL(5,2),
|
||||
PRIMARY KEY (category, manufacturer)
|
||||
);
|
||||
|
||||
-- Alert system
|
||||
CREATE TABLE price_alerts (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
lot_id TEXT REFERENCES lots(lot_id),
|
||||
alert_type TEXT CHECK(alert_type IN ('UNDervalued', 'ACCELERATING', 'RESERVE_IN_SIGHT')),
|
||||
trigger_price DECIMAL(12,2),
|
||||
is_triggered BOOLEAN DEFAULT FALSE
|
||||
);
|
||||
|
||||
```
|
||||
Reference in New Issue
Block a user