go
This commit is contained in:
399
SCRAPER_REFACTOR_GUIDE.md
Normal file
399
SCRAPER_REFACTOR_GUIDE.md
Normal file
@@ -0,0 +1,399 @@
|
|||||||
|
# Scraper Refactor Guide - Image Download Integration
|
||||||
|
|
||||||
|
## 🎯 Objective
|
||||||
|
|
||||||
|
Refactor the Troostwijk scraper to **download and store images locally**, eliminating the 57M+ duplicate image problem in the monitoring process.
|
||||||
|
|
||||||
|
## 📋 Current vs. New Architecture
|
||||||
|
|
||||||
|
### **Before** (Current Architecture)
|
||||||
|
```
|
||||||
|
┌──────────────┐ ┌──────────────┐ ┌──────────────┐
|
||||||
|
│ Scraper │────────▶│ Database │◀────────│ Monitor │
|
||||||
|
│ │ │ │ │ │
|
||||||
|
│ Stores URLs │ │ images table │ │ Downloads + │
|
||||||
|
│ downloaded=0 │ │ │ │ Detection │
|
||||||
|
└──────────────┘ └──────────────┘ └──────────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
57M+ duplicates!
|
||||||
|
```
|
||||||
|
|
||||||
|
### **After** (New Architecture)
|
||||||
|
```
|
||||||
|
┌──────────────┐ ┌──────────────┐ ┌──────────────┐
|
||||||
|
│ Scraper │────────▶│ Database │◀────────│ Monitor │
|
||||||
|
│ │ │ │ │ │
|
||||||
|
│ Downloads + │ │ images table │ │ Detection │
|
||||||
|
│ Stores path │ │ local_path ✓ │ │ Only │
|
||||||
|
│ downloaded=1 │ │ │ │ │
|
||||||
|
└──────────────┘ └──────────────┘ └──────────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
No duplicates!
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🗄️ Database Schema Changes
|
||||||
|
|
||||||
|
### Current Schema (ARCHITECTURE-TROOSTWIJK-SCRAPER.md:113-122)
|
||||||
|
```sql
|
||||||
|
CREATE TABLE images (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
lot_id TEXT,
|
||||||
|
url TEXT,
|
||||||
|
local_path TEXT, -- Currently NULL
|
||||||
|
downloaded INTEGER -- Currently 0
|
||||||
|
-- Missing: processed_at, labels (added by monitor)
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Required Schema (Already Compatible!)
|
||||||
|
```sql
|
||||||
|
CREATE TABLE images (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
lot_id TEXT,
|
||||||
|
url TEXT,
|
||||||
|
local_path TEXT, -- ✅ SET by scraper after download
|
||||||
|
downloaded INTEGER, -- ✅ SET to 1 by scraper after download
|
||||||
|
labels TEXT, -- ⚠️ SET by monitor (object detection)
|
||||||
|
processed_at INTEGER, -- ⚠️ SET by monitor (timestamp)
|
||||||
|
FOREIGN KEY (lot_id) REFERENCES lots(lot_id)
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
**Good News**: The scraper's schema already has `local_path` and `downloaded` columns! You just need to populate them.
|
||||||
|
|
||||||
|
## 🔧 Implementation Steps
|
||||||
|
|
||||||
|
### **Step 1: Enable Image Downloading in Configuration**
|
||||||
|
|
||||||
|
**File**: Your scraper's config file (e.g., `config.py` or environment variables)
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Current setting
|
||||||
|
DOWNLOAD_IMAGES = False # ❌ Change this!
|
||||||
|
|
||||||
|
# New setting
|
||||||
|
DOWNLOAD_IMAGES = True # ✅ Enable downloads
|
||||||
|
|
||||||
|
# Image storage path
|
||||||
|
IMAGES_DIR = "/mnt/okcomputer/output/images" # Or your preferred path
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Step 2: Update Image Download Logic**
|
||||||
|
|
||||||
|
Based on ARCHITECTURE-TROOSTWIJK-SCRAPER.md:211-228, you already have the structure. Here's what needs to change:
|
||||||
|
|
||||||
|
**Current Code** (Conceptual):
|
||||||
|
```python
|
||||||
|
# Phase 3: Scrape lot details
|
||||||
|
def scrape_lot(lot_url):
|
||||||
|
lot_data = parse_lot_page(lot_url)
|
||||||
|
|
||||||
|
# Save lot to database
|
||||||
|
db.insert_lot(lot_data)
|
||||||
|
|
||||||
|
# Save image URLs to database (NOT DOWNLOADED)
|
||||||
|
for img_url in lot_data['images']:
|
||||||
|
db.execute("""
|
||||||
|
INSERT INTO images (lot_id, url, downloaded)
|
||||||
|
VALUES (?, ?, 0)
|
||||||
|
""", (lot_data['lot_id'], img_url))
|
||||||
|
```
|
||||||
|
|
||||||
|
**New Code** (Required):
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
import requests
|
||||||
|
from pathlib import Path
|
||||||
|
import time
|
||||||
|
|
||||||
|
def scrape_lot(lot_url):
|
||||||
|
lot_data = parse_lot_page(lot_url)
|
||||||
|
|
||||||
|
# Save lot to database
|
||||||
|
db.insert_lot(lot_data)
|
||||||
|
|
||||||
|
# Download and save images
|
||||||
|
for idx, img_url in enumerate(lot_data['images'], start=1):
|
||||||
|
try:
|
||||||
|
# Download image
|
||||||
|
local_path = download_image(img_url, lot_data['lot_id'], idx)
|
||||||
|
|
||||||
|
# Insert with local_path and downloaded=1
|
||||||
|
db.execute("""
|
||||||
|
INSERT INTO images (lot_id, url, local_path, downloaded)
|
||||||
|
VALUES (?, ?, ?, 1)
|
||||||
|
ON CONFLICT(lot_id, url) DO UPDATE SET
|
||||||
|
local_path = excluded.local_path,
|
||||||
|
downloaded = 1
|
||||||
|
""", (lot_data['lot_id'], img_url, local_path))
|
||||||
|
|
||||||
|
# Rate limiting (0.5s between downloads)
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Failed to download {img_url}: {e}")
|
||||||
|
# Still insert record but mark as not downloaded
|
||||||
|
db.execute("""
|
||||||
|
INSERT INTO images (lot_id, url, downloaded)
|
||||||
|
VALUES (?, ?, 0)
|
||||||
|
""", (lot_data['lot_id'], img_url))
|
||||||
|
|
||||||
|
def download_image(image_url, lot_id, index):
|
||||||
|
"""
|
||||||
|
Downloads an image and saves it to organized directory structure.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_url: Remote URL of the image
|
||||||
|
lot_id: Lot identifier (e.g., "A1-28505-5")
|
||||||
|
index: Image sequence number (1, 2, 3, ...)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Absolute path to saved file
|
||||||
|
"""
|
||||||
|
# Create directory structure: /images/{lot_id}/
|
||||||
|
images_dir = Path(os.getenv('IMAGES_DIR', '/mnt/okcomputer/output/images'))
|
||||||
|
lot_dir = images_dir / lot_id
|
||||||
|
lot_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Determine file extension from URL or content-type
|
||||||
|
ext = Path(image_url).suffix or '.jpg'
|
||||||
|
filename = f"{index:03d}{ext}" # 001.jpg, 002.jpg, etc.
|
||||||
|
local_path = lot_dir / filename
|
||||||
|
|
||||||
|
# Download with timeout
|
||||||
|
response = requests.get(image_url, timeout=10)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
# Save to disk
|
||||||
|
with open(local_path, 'wb') as f:
|
||||||
|
f.write(response.content)
|
||||||
|
|
||||||
|
return str(local_path.absolute())
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Step 3: Add Unique Constraint to Prevent Duplicates**
|
||||||
|
|
||||||
|
**Migration SQL**:
|
||||||
|
```sql
|
||||||
|
-- Add unique constraint to prevent duplicate image records
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_images_unique
|
||||||
|
ON images(lot_id, url);
|
||||||
|
```
|
||||||
|
|
||||||
|
Add this to your scraper's schema initialization:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def init_database():
|
||||||
|
conn = sqlite3.connect('/mnt/okcomputer/output/cache.db')
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
# Existing table creation...
|
||||||
|
cursor.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS images (...)
|
||||||
|
""")
|
||||||
|
|
||||||
|
# Add unique constraint (NEW)
|
||||||
|
cursor.execute("""
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_images_unique
|
||||||
|
ON images(lot_id, url)
|
||||||
|
""")
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Step 4: Handle Image Download Failures Gracefully**
|
||||||
|
|
||||||
|
```python
|
||||||
|
def download_with_retry(image_url, lot_id, index, max_retries=3):
|
||||||
|
"""Downloads image with retry logic."""
|
||||||
|
for attempt in range(max_retries):
|
||||||
|
try:
|
||||||
|
return download_image(image_url, lot_id, index)
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
if attempt == max_retries - 1:
|
||||||
|
print(f"Failed after {max_retries} attempts: {image_url}")
|
||||||
|
return None # Return None on failure
|
||||||
|
print(f"Retry {attempt + 1}/{max_retries} for {image_url}")
|
||||||
|
time.sleep(2 ** attempt) # Exponential backoff
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Step 5: Update Database Queries**
|
||||||
|
|
||||||
|
Make sure your INSERT uses `INSERT ... ON CONFLICT` to handle re-scraping:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Good: Handles re-scraping without duplicates
|
||||||
|
db.execute("""
|
||||||
|
INSERT INTO images (lot_id, url, local_path, downloaded)
|
||||||
|
VALUES (?, ?, ?, 1)
|
||||||
|
ON CONFLICT(lot_id, url) DO UPDATE SET
|
||||||
|
local_path = excluded.local_path,
|
||||||
|
downloaded = 1
|
||||||
|
""", (lot_id, img_url, local_path))
|
||||||
|
|
||||||
|
# Bad: Creates duplicates on re-scrape
|
||||||
|
db.execute("""
|
||||||
|
INSERT INTO images (lot_id, url, local_path, downloaded)
|
||||||
|
VALUES (?, ?, ?, 1)
|
||||||
|
""", (lot_id, img_url, local_path))
|
||||||
|
```
|
||||||
|
|
||||||
|
## 📊 Expected Outcomes
|
||||||
|
|
||||||
|
### Before Refactor
|
||||||
|
```sql
|
||||||
|
SELECT COUNT(*) FROM images WHERE downloaded = 0;
|
||||||
|
-- Result: 57,376,293 (57M+ undownloaded!)
|
||||||
|
|
||||||
|
SELECT COUNT(*) FROM images WHERE local_path IS NOT NULL;
|
||||||
|
-- Result: 0 (no files downloaded)
|
||||||
|
```
|
||||||
|
|
||||||
|
### After Refactor
|
||||||
|
```sql
|
||||||
|
SELECT COUNT(*) FROM images WHERE downloaded = 1;
|
||||||
|
-- Result: ~16,807 (one per actual lot image)
|
||||||
|
|
||||||
|
SELECT COUNT(*) FROM images WHERE local_path IS NOT NULL;
|
||||||
|
-- Result: ~16,807 (all downloaded images have paths)
|
||||||
|
|
||||||
|
SELECT COUNT(DISTINCT lot_id, url) FROM images;
|
||||||
|
-- Result: ~16,807 (no duplicates!)
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🚀 Deployment Checklist
|
||||||
|
|
||||||
|
### Pre-Deployment
|
||||||
|
- [ ] Back up current database: `cp cache.db cache.db.backup`
|
||||||
|
- [ ] Verify disk space: At least 10GB free for images
|
||||||
|
- [ ] Test download function on 5 sample lots
|
||||||
|
- [ ] Verify `IMAGES_DIR` path exists and is writable
|
||||||
|
|
||||||
|
### Deployment
|
||||||
|
- [ ] Update configuration: `DOWNLOAD_IMAGES = True`
|
||||||
|
- [ ] Run schema migration to add unique index
|
||||||
|
- [ ] Deploy updated scraper code
|
||||||
|
- [ ] Monitor first 100 lots for errors
|
||||||
|
|
||||||
|
### Post-Deployment Verification
|
||||||
|
```sql
|
||||||
|
-- Check download success rate
|
||||||
|
SELECT
|
||||||
|
COUNT(*) as total_images,
|
||||||
|
SUM(CASE WHEN downloaded = 1 THEN 1 ELSE 0 END) as downloaded,
|
||||||
|
SUM(CASE WHEN downloaded = 0 THEN 1 ELSE 0 END) as failed,
|
||||||
|
ROUND(100.0 * SUM(downloaded) / COUNT(*), 2) as success_rate
|
||||||
|
FROM images;
|
||||||
|
|
||||||
|
-- Check for duplicates (should be 0)
|
||||||
|
SELECT lot_id, url, COUNT(*) as dup_count
|
||||||
|
FROM images
|
||||||
|
GROUP BY lot_id, url
|
||||||
|
HAVING COUNT(*) > 1;
|
||||||
|
|
||||||
|
-- Verify file system
|
||||||
|
SELECT COUNT(*) FROM images
|
||||||
|
WHERE downloaded = 1
|
||||||
|
AND local_path IS NOT NULL
|
||||||
|
AND local_path != '';
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🔍 Monitoring Process Impact
|
||||||
|
|
||||||
|
The monitoring process (auctiora) will automatically:
|
||||||
|
- ✅ Stop downloading images (network I/O eliminated)
|
||||||
|
- ✅ Only run object detection on `local_path` files
|
||||||
|
- ✅ Query: `WHERE local_path IS NOT NULL AND (labels IS NULL OR labels = '')`
|
||||||
|
- ✅ Update only the `labels` and `processed_at` columns
|
||||||
|
|
||||||
|
**No changes needed in monitoring process!** It's already updated to work with scraper-downloaded images.
|
||||||
|
|
||||||
|
## 🐛 Troubleshooting
|
||||||
|
|
||||||
|
### Problem: "No space left on device"
|
||||||
|
```bash
|
||||||
|
# Check disk usage
|
||||||
|
df -h /mnt/okcomputer/output/images
|
||||||
|
|
||||||
|
# Estimate needed space: ~100KB per image
|
||||||
|
# 16,807 images × 100KB = ~1.6GB
|
||||||
|
```
|
||||||
|
|
||||||
|
### Problem: "Permission denied" when writing images
|
||||||
|
```bash
|
||||||
|
# Fix permissions
|
||||||
|
chmod 755 /mnt/okcomputer/output/images
|
||||||
|
chown -R scraper_user:scraper_group /mnt/okcomputer/output/images
|
||||||
|
```
|
||||||
|
|
||||||
|
### Problem: Images downloading but not recorded in DB
|
||||||
|
```python
|
||||||
|
# Add logging
|
||||||
|
import logging
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
def download_image(...):
|
||||||
|
logging.info(f"Downloading {image_url} to {local_path}")
|
||||||
|
# ... download code ...
|
||||||
|
logging.info(f"Saved to {local_path}, size: {os.path.getsize(local_path)} bytes")
|
||||||
|
return local_path
|
||||||
|
```
|
||||||
|
|
||||||
|
### Problem: Duplicate images after refactor
|
||||||
|
```sql
|
||||||
|
-- Find duplicates
|
||||||
|
SELECT lot_id, url, COUNT(*)
|
||||||
|
FROM images
|
||||||
|
GROUP BY lot_id, url
|
||||||
|
HAVING COUNT(*) > 1;
|
||||||
|
|
||||||
|
-- Clean up duplicates (keep newest)
|
||||||
|
DELETE FROM images
|
||||||
|
WHERE id NOT IN (
|
||||||
|
SELECT MAX(id)
|
||||||
|
FROM images
|
||||||
|
GROUP BY lot_id, url
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
## 📈 Performance Comparison
|
||||||
|
|
||||||
|
| Metric | Before (Monitor Downloads) | After (Scraper Downloads) |
|
||||||
|
|--------|---------------------------|---------------------------|
|
||||||
|
| **Image records** | 57,376,293 | ~16,807 |
|
||||||
|
| **Duplicates** | 57,359,486 (99.97%!) | 0 |
|
||||||
|
| **Network I/O** | Monitor process | Scraper process |
|
||||||
|
| **Disk usage** | 0 (URLs only) | ~1.6GB (actual files) |
|
||||||
|
| **Processing speed** | 500ms/image (download + detect) | 100ms/image (detect only) |
|
||||||
|
| **Error handling** | Complex (download failures) | Simple (files exist) |
|
||||||
|
|
||||||
|
## 🎓 Code Examples by Language
|
||||||
|
|
||||||
|
### Python (Most Likely)
|
||||||
|
See **Step 2** above for complete implementation.
|
||||||
|
|
||||||
|
## 📚 References
|
||||||
|
|
||||||
|
- **Current Scraper Architecture**: `wiki/ARCHITECTURE-TROOSTWIJK-SCRAPER.md`
|
||||||
|
- **Database Schema**: `wiki/DATABASE_ARCHITECTURE.md`
|
||||||
|
- **Monitor Changes**: See commit history for `ImageProcessingService.java`, `DatabaseService.java`
|
||||||
|
|
||||||
|
## ✅ Success Criteria
|
||||||
|
|
||||||
|
You'll know the refactor is successful when:
|
||||||
|
|
||||||
|
1. ✅ Database query `SELECT COUNT(*) FROM images` returns ~16,807 (not 57M+)
|
||||||
|
2. ✅ All images have `downloaded = 1` and `local_path IS NOT NULL`
|
||||||
|
3. ✅ No duplicate records: `SELECT lot_id, url, COUNT(*) ... HAVING COUNT(*) > 1` returns 0 rows
|
||||||
|
4. ✅ Monitor logs show "Found N images needing detection" with reasonable numbers
|
||||||
|
5. ✅ Files exist at paths in `local_path` column
|
||||||
|
6. ✅ Monitor process speed increases (100ms vs 500ms per image)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Questions?** Check the troubleshooting section or inspect the monitor's updated code in:
|
||||||
|
- `src/main/java/auctiora/ImageProcessingService.java`
|
||||||
|
- `src/main/java/auctiora/DatabaseService.java:695-719`
|
||||||
@@ -28,7 +28,7 @@ services:
|
|||||||
- AUCTION_WORKFLOW_CLOSING_ALERTS_CRON=0 */5 * * * ?
|
- AUCTION_WORKFLOW_CLOSING_ALERTS_CRON=0 */5 * * * ?
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
# Mount database and images directory
|
# Mount database and images directory1
|
||||||
- shared-auction-data:/mnt/okcomputer/output
|
- shared-auction-data:/mnt/okcomputer/output
|
||||||
|
|
||||||
labels:
|
labels:
|
||||||
|
|||||||
@@ -68,10 +68,9 @@ public class AuctionMonitorProducer {
|
|||||||
@Singleton
|
@Singleton
|
||||||
public ImageProcessingService produceImageProcessingService(
|
public ImageProcessingService produceImageProcessingService(
|
||||||
DatabaseService db,
|
DatabaseService db,
|
||||||
ObjectDetectionService detector,
|
ObjectDetectionService detector) {
|
||||||
RateLimitedHttpClient httpClient) {
|
|
||||||
|
|
||||||
LOG.infof("Initializing ImageProcessingService");
|
LOG.infof("Initializing ImageProcessingService");
|
||||||
return new ImageProcessingService(db, detector, httpClient);
|
return new ImageProcessingService(db, detector);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -73,7 +73,8 @@ public class DatabaseService {
|
|||||||
FOREIGN KEY (sale_id) REFERENCES auctions(auction_id)
|
FOREIGN KEY (sale_id) REFERENCES auctions(auction_id)
|
||||||
)""");
|
)""");
|
||||||
|
|
||||||
// Images table (populated by this process)
|
// Images table (populated by external scraper with URLs and local_path)
|
||||||
|
// This process only adds labels via object detection
|
||||||
stmt.execute("""
|
stmt.execute("""
|
||||||
CREATE TABLE IF NOT EXISTS images (
|
CREATE TABLE IF NOT EXISTS images (
|
||||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
@@ -82,6 +83,7 @@ public class DatabaseService {
|
|||||||
local_path TEXT,
|
local_path TEXT,
|
||||||
labels TEXT,
|
labels TEXT,
|
||||||
processed_at INTEGER,
|
processed_at INTEGER,
|
||||||
|
downloaded INTEGER DEFAULT 0,
|
||||||
FOREIGN KEY (lot_id) REFERENCES lots(lot_id)
|
FOREIGN KEY (lot_id) REFERENCES lots(lot_id)
|
||||||
)""");
|
)""");
|
||||||
|
|
||||||
@@ -258,6 +260,7 @@ public class DatabaseService {
|
|||||||
var hasLabels = false;
|
var hasLabels = false;
|
||||||
var hasLocalPath = false;
|
var hasLocalPath = false;
|
||||||
var hasProcessedAt = false;
|
var hasProcessedAt = false;
|
||||||
|
var hasDownloaded = false;
|
||||||
|
|
||||||
while (rs.next()) {
|
while (rs.next()) {
|
||||||
var colName = rs.getString("name");
|
var colName = rs.getString("name");
|
||||||
@@ -265,6 +268,7 @@ public class DatabaseService {
|
|||||||
case "labels" -> hasLabels = true;
|
case "labels" -> hasLabels = true;
|
||||||
case "local_path" -> hasLocalPath = true;
|
case "local_path" -> hasLocalPath = true;
|
||||||
case "processed_at" -> hasProcessedAt = true;
|
case "processed_at" -> hasProcessedAt = true;
|
||||||
|
case "downloaded" -> hasDownloaded = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -280,6 +284,10 @@ public class DatabaseService {
|
|||||||
log.info("Migrating schema: Adding 'processed_at' column to images table");
|
log.info("Migrating schema: Adding 'processed_at' column to images table");
|
||||||
stmt.execute("ALTER TABLE images ADD COLUMN processed_at INTEGER");
|
stmt.execute("ALTER TABLE images ADD COLUMN processed_at INTEGER");
|
||||||
}
|
}
|
||||||
|
if (!hasDownloaded) {
|
||||||
|
log.info("Migrating schema: Adding 'downloaded' column to images table");
|
||||||
|
stmt.execute("ALTER TABLE images ADD COLUMN downloaded INTEGER DEFAULT 0");
|
||||||
|
}
|
||||||
} catch (SQLException e) {
|
} catch (SQLException e) {
|
||||||
// Table might not exist yet, which is fine
|
// Table might not exist yet, which is fine
|
||||||
log.debug("Could not check images table schema: " + e.getMessage());
|
log.debug("Could not check images table schema: " + e.getMessage());
|
||||||
@@ -462,20 +470,36 @@ public class DatabaseService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Inserts a new image record with object detection labels
|
* Updates the labels field for an image after object detection
|
||||||
*/
|
*/
|
||||||
synchronized void insertImage(long lotId, String url, String filePath, List<String> labels) throws SQLException {
|
synchronized void updateImageLabels(int imageId, List<String> labels) throws SQLException {
|
||||||
var sql = "INSERT INTO images (lot_id, url, local_path, labels, processed_at) VALUES (?, ?, ?, ?, ?)";
|
var sql = "UPDATE images SET labels = ?, processed_at = ? WHERE id = ?";
|
||||||
try (var conn = DriverManager.getConnection(this.url); var ps = conn.prepareStatement(sql)) {
|
try (var conn = DriverManager.getConnection(this.url); var ps = conn.prepareStatement(sql)) {
|
||||||
ps.setLong(1, lotId);
|
ps.setString(1, String.join(",", labels));
|
||||||
ps.setString(2, url);
|
ps.setLong(2, Instant.now().getEpochSecond());
|
||||||
ps.setString(3, filePath);
|
ps.setInt(3, imageId);
|
||||||
ps.setString(4, String.join(",", labels));
|
|
||||||
ps.setLong(5, Instant.now().getEpochSecond());
|
|
||||||
ps.executeUpdate();
|
ps.executeUpdate();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the labels for a specific image
|
||||||
|
*/
|
||||||
|
synchronized List<String> getImageLabels(int imageId) throws SQLException {
|
||||||
|
var sql = "SELECT labels FROM images WHERE id = ?";
|
||||||
|
try (var conn = DriverManager.getConnection(this.url); var ps = conn.prepareStatement(sql)) {
|
||||||
|
ps.setInt(1, imageId);
|
||||||
|
var rs = ps.executeQuery();
|
||||||
|
if (rs.next()) {
|
||||||
|
var labelsStr = rs.getString("labels");
|
||||||
|
if (labelsStr != null && !labelsStr.isEmpty()) {
|
||||||
|
return List.of(labelsStr.split(","));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return List.of();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves images for a specific lot
|
* Retrieves images for a specific lot
|
||||||
*/
|
*/
|
||||||
@@ -671,44 +695,32 @@ public class DatabaseService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Imports image URLs from scraper's schema.
|
* Gets images that have been downloaded by the scraper but need object detection.
|
||||||
* The scraper populates the images table with URLs but doesn't download them.
|
* Only returns images that have local_path set but no labels yet.
|
||||||
* This method retrieves undownloaded images for processing.
|
|
||||||
*
|
*
|
||||||
* @return List of image URLs that need to be downloaded
|
* @return List of images needing object detection
|
||||||
*/
|
*/
|
||||||
synchronized List<ImageImportRecord> getUnprocessedImagesFromScraper() throws SQLException {
|
synchronized List<ImageDetectionRecord> getImagesNeedingDetection() throws SQLException {
|
||||||
List<ImageImportRecord> images = new ArrayList<>();
|
List<ImageDetectionRecord> images = new ArrayList<>();
|
||||||
var sql = """
|
var sql = """
|
||||||
SELECT i.lot_id, i.url, l.auction_id
|
SELECT i.id, i.lot_id, i.local_path
|
||||||
FROM images i
|
FROM images i
|
||||||
LEFT JOIN lots l ON i.lot_id = l.lot_id
|
WHERE i.local_path IS NOT NULL
|
||||||
WHERE i.downloaded = 0 OR i.local_path IS NULL
|
AND i.local_path != ''
|
||||||
|
AND (i.labels IS NULL OR i.labels = '')
|
||||||
""";
|
""";
|
||||||
|
|
||||||
try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) {
|
try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) {
|
||||||
var rs = stmt.executeQuery(sql);
|
var rs = stmt.executeQuery(sql);
|
||||||
while (rs.next()) {
|
while (rs.next()) {
|
||||||
var lotIdStr = rs.getString("lot_id");
|
images.add(new ImageDetectionRecord(
|
||||||
var auctionIdStr = rs.getString("auction_id");
|
rs.getInt("id"),
|
||||||
|
rs.getLong("lot_id"),
|
||||||
var lotId = ScraperDataAdapter.extractNumericId(lotIdStr);
|
rs.getString("local_path")
|
||||||
var saleId = ScraperDataAdapter.extractNumericId(auctionIdStr);
|
|
||||||
|
|
||||||
// Skip images with invalid IDs (0 indicates parsing failed)
|
|
||||||
if (lotId == 0L || saleId == 0L) {
|
|
||||||
log.debug("Skipping image with invalid ID: lot_id={}, sale_id={}", lotId, saleId);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
images.add(new ImageImportRecord(
|
|
||||||
lotId,
|
|
||||||
saleId,
|
|
||||||
rs.getString("url")
|
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
} catch (SQLException e) {
|
} catch (SQLException e) {
|
||||||
log.info("ℹ️ No unprocessed images found in scraper format");
|
log.info("ℹ️ No images needing detection found");
|
||||||
}
|
}
|
||||||
|
|
||||||
return images;
|
return images;
|
||||||
@@ -720,7 +732,7 @@ public class DatabaseService {
|
|||||||
record ImageRecord(int id, long lotId, String url, String filePath, String labels) { }
|
record ImageRecord(int id, long lotId, String url, String filePath, String labels) { }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Record for importing images from scraper format
|
* Record for images that need object detection processing
|
||||||
*/
|
*/
|
||||||
record ImageImportRecord(long lotId, long saleId, String url) { }
|
record ImageDetectionRecord(int id, long lotId, String filePath) { }
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,120 +12,78 @@ import java.util.List;
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Service responsible for processing images from the IMAGES table.
|
* Service responsible for processing images from the IMAGES table.
|
||||||
* Downloads images, performs object detection, and updates the database.
|
* Performs object detection on already-downloaded images and updates the database.
|
||||||
*
|
*
|
||||||
* This separates image processing concerns from scraping, allowing this project
|
* NOTE: Image downloading is handled by the external scraper process.
|
||||||
* to focus on enriching data scraped by the external process.
|
* This service only performs object detection on images that already have local_path set.
|
||||||
*/
|
*/
|
||||||
@Slf4j
|
@Slf4j
|
||||||
class ImageProcessingService {
|
class ImageProcessingService {
|
||||||
|
|
||||||
private final RateLimitedHttpClient httpClient;
|
|
||||||
private final DatabaseService db;
|
private final DatabaseService db;
|
||||||
private final ObjectDetectionService detector;
|
private final ObjectDetectionService detector;
|
||||||
|
|
||||||
ImageProcessingService(DatabaseService db, ObjectDetectionService detector, RateLimitedHttpClient httpClient) {
|
ImageProcessingService(DatabaseService db, ObjectDetectionService detector) {
|
||||||
this.httpClient = httpClient;
|
|
||||||
this.db = db;
|
this.db = db;
|
||||||
this.detector = detector;
|
this.detector = detector;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Downloads an image from the given URL to local storage.
|
* Processes a single image: runs object detection and updates labels in database.
|
||||||
* Images are organized by saleId/lotId for easy management.
|
|
||||||
*
|
*
|
||||||
* @param imageUrl remote image URL
|
* @param imageId database ID of the image record
|
||||||
* @param saleId sale identifier
|
* @param localPath local file path to the downloaded image
|
||||||
* @param lotId lot identifier
|
* @param lotId lot identifier (for logging)
|
||||||
* @return absolute path to saved file or null on failure
|
* @return true if processing succeeded
|
||||||
*/
|
*/
|
||||||
String downloadImage(String imageUrl, long saleId, long lotId) {
|
boolean processImage(int imageId, String localPath, long lotId) {
|
||||||
try {
|
try {
|
||||||
var response = httpClient.sendGetBytes(imageUrl);
|
// Run object detection on the local file
|
||||||
|
var labels = detector.detectObjects(localPath);
|
||||||
|
|
||||||
if (response != null && response.statusCode() == 200) {
|
// Update the database with detected labels
|
||||||
// Use environment variable for cross-platform compatibility
|
db.updateImageLabels(imageId, labels);
|
||||||
var imagesPath = System.getenv().getOrDefault("AUCTION_IMAGES_PATH", "/mnt/okcomputer/output/images");
|
|
||||||
var baseDir = Paths.get(imagesPath);
|
|
||||||
var dir = baseDir.resolve(String.valueOf(saleId)).resolve(String.valueOf(lotId));
|
|
||||||
Files.createDirectories(dir);
|
|
||||||
|
|
||||||
// Extract filename from URL
|
if (!labels.isEmpty()) {
|
||||||
var fileName = imageUrl.substring(imageUrl.lastIndexOf('/') + 1);
|
log.info(" Lot {}: Detected {}", lotId, String.join(", ", labels));
|
||||||
// Remove query parameters if present
|
|
||||||
int queryIndex = fileName.indexOf('?');
|
|
||||||
if (queryIndex > 0) {
|
|
||||||
fileName = fileName.substring(0, queryIndex);
|
|
||||||
}
|
|
||||||
var dest = dir.resolve(fileName);
|
|
||||||
|
|
||||||
Files.write(dest, response.body());
|
|
||||||
return dest.toAbsolutePath().toString();
|
|
||||||
}
|
|
||||||
} catch (IOException | InterruptedException e) {
|
|
||||||
System.err.println("Failed to download image " + imageUrl + ": " + e.getMessage());
|
|
||||||
if (e instanceof InterruptedException) {
|
|
||||||
Thread.currentThread().interrupt();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new RuntimeException(e);
|
log.error(" Failed to process image {}: {}", imageId, e.getMessage());
|
||||||
}
|
return false;
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Processes images for a specific lot: downloads and runs object detection.
|
|
||||||
*
|
|
||||||
* @param lotId lot identifier
|
|
||||||
* @param saleId sale identifier
|
|
||||||
* @param imageUrls list of image URLs to process
|
|
||||||
*/
|
|
||||||
void processImagesForLot(long lotId, long saleId, List<String> imageUrls) {
|
|
||||||
log.info(" Processing {} images for lot {}", imageUrls.size(), lotId);
|
|
||||||
|
|
||||||
for (var imgUrl : imageUrls) {
|
|
||||||
var fileName = downloadImage(imgUrl, saleId, lotId);
|
|
||||||
|
|
||||||
if (fileName != null) {
|
|
||||||
// Run object detection
|
|
||||||
var labels = detector.detectObjects(fileName);
|
|
||||||
|
|
||||||
// Save to database
|
|
||||||
try {
|
|
||||||
db.insertImage(lotId, imgUrl, fileName, labels);
|
|
||||||
|
|
||||||
if (!labels.isEmpty()) {
|
|
||||||
log.info(" Detected: {}", String.join(", ", labels));
|
|
||||||
}
|
|
||||||
} catch (SQLException e) {
|
|
||||||
System.err.println(" Failed to save image to database: " + e.getMessage());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Batch processes all pending images in the database.
|
* Batch processes all pending images in the database.
|
||||||
* Useful for processing images after the external scraper has populated lot data.
|
* Only processes images that have been downloaded by the scraper but haven't had object detection run yet.
|
||||||
*/
|
*/
|
||||||
void processPendingImages() {
|
void processPendingImages() {
|
||||||
log.info("Processing pending images...");
|
log.info("Processing pending images...");
|
||||||
|
|
||||||
try {
|
try {
|
||||||
var lots = db.getAllLots();
|
var pendingImages = db.getImagesNeedingDetection();
|
||||||
log.info("Found {} lots to check for images", lots.size());
|
log.info("Found {} images needing object detection", pendingImages.size());
|
||||||
|
|
||||||
for (var lot : lots) {
|
var processed = 0;
|
||||||
// Check if images already processed for this lot
|
var detected = 0;
|
||||||
var existingImages = db.getImagesForLot(lot.lotId());
|
|
||||||
|
|
||||||
if (existingImages.isEmpty()) {
|
for (var image : pendingImages) {
|
||||||
log.info(" Lot {} has no images yet - needs external scraper data", lot.lotId());
|
if (processImage(image.id(), image.filePath(), image.lotId())) {
|
||||||
|
processed++;
|
||||||
|
// Re-fetch to check if labels were found
|
||||||
|
var labels = db.getImageLabels(image.id());
|
||||||
|
if (labels != null && !labels.isEmpty()) {
|
||||||
|
detected++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log.info("Processed {} images, detected objects in {}", processed, detected);
|
||||||
|
|
||||||
} catch (SQLException e) {
|
} catch (SQLException e) {
|
||||||
System.err.println("Error processing pending images: " + e.getMessage());
|
log.error("Error processing pending images: {}", e.getMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -54,9 +54,9 @@ public class QuarkusWorkflowScheduler {
|
|||||||
var lots = db.importLotsFromScraper();
|
var lots = db.importLotsFromScraper();
|
||||||
LOG.infof(" → Imported %d lots", lots.size());
|
LOG.infof(" → Imported %d lots", lots.size());
|
||||||
|
|
||||||
// Import image URLs
|
// Check for images needing detection
|
||||||
var images = db.getUnprocessedImagesFromScraper();
|
var images = db.getImagesNeedingDetection();
|
||||||
LOG.infof(" → Found %d unprocessed images", images.size());
|
LOG.infof(" → Found %d images needing detection", images.size());
|
||||||
|
|
||||||
var duration = System.currentTimeMillis() - start;
|
var duration = System.currentTimeMillis() - start;
|
||||||
LOG.infof(" ✓ Scraper import completed in %dms", duration);
|
LOG.infof(" ✓ Scraper import completed in %dms", duration);
|
||||||
@@ -78,7 +78,7 @@ public class QuarkusWorkflowScheduler {
|
|||||||
/**
|
/**
|
||||||
* Workflow 2: Process Pending Images
|
* Workflow 2: Process Pending Images
|
||||||
* Cron: Every 1 hour (0 0 * * * ?)
|
* Cron: Every 1 hour (0 0 * * * ?)
|
||||||
* Purpose: Download images and run object detection
|
* Purpose: Run object detection on images already downloaded by scraper
|
||||||
*/
|
*/
|
||||||
@Scheduled(cron = "{auction.workflow.image-processing.cron}", identity = "image-processing")
|
@Scheduled(cron = "{auction.workflow.image-processing.cron}", identity = "image-processing")
|
||||||
void processImages() {
|
void processImages() {
|
||||||
@@ -86,55 +86,45 @@ public class QuarkusWorkflowScheduler {
|
|||||||
LOG.info("🖼️ [WORKFLOW 2] Processing pending images...");
|
LOG.info("🖼️ [WORKFLOW 2] Processing pending images...");
|
||||||
var start = System.currentTimeMillis();
|
var start = System.currentTimeMillis();
|
||||||
|
|
||||||
// Get unprocessed images
|
// Get images that have been downloaded but need object detection
|
||||||
var unprocessedImages = db.getUnprocessedImagesFromScraper();
|
var pendingImages = db.getImagesNeedingDetection();
|
||||||
|
|
||||||
if (unprocessedImages.isEmpty()) {
|
if (pendingImages.isEmpty()) {
|
||||||
LOG.info(" → No pending images to process");
|
LOG.info(" → No pending images to process");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG.infof(" → Processing %d images", unprocessedImages.size());
|
LOG.infof(" → Processing %d images", pendingImages.size());
|
||||||
|
|
||||||
var processed = 0;
|
var processed = 0;
|
||||||
var detected = 0;
|
var detected = 0;
|
||||||
|
|
||||||
for (var imageRecord : unprocessedImages) {
|
for (var image : pendingImages) {
|
||||||
try {
|
try {
|
||||||
// Download image
|
// Run object detection on already-downloaded image
|
||||||
var filePath = imageProcessor.downloadImage(
|
if (imageProcessor.processImage(image.id(), image.filePath(), image.lotId())) {
|
||||||
imageRecord.url(),
|
|
||||||
imageRecord.saleId(),
|
|
||||||
imageRecord.lotId()
|
|
||||||
);
|
|
||||||
|
|
||||||
if (filePath != null) {
|
|
||||||
// Run object detection
|
|
||||||
var labels = detector.detectObjects(filePath);
|
|
||||||
|
|
||||||
// Save to database
|
|
||||||
db.insertImage(imageRecord.lotId(), imageRecord.url(),
|
|
||||||
filePath, labels);
|
|
||||||
|
|
||||||
processed++;
|
processed++;
|
||||||
if (!labels.isEmpty()) {
|
|
||||||
|
// Check if objects were detected
|
||||||
|
var labels = db.getImageLabels(image.id());
|
||||||
|
if (labels != null && !labels.isEmpty()) {
|
||||||
detected++;
|
detected++;
|
||||||
|
|
||||||
// Send notification for interesting detections
|
// Send notification for interesting detections
|
||||||
if (labels.size() >= 3) {
|
if (labels.size() >= 3) {
|
||||||
notifier.sendNotification(
|
notifier.sendNotification(
|
||||||
String.format("Lot %d: Detected %s",
|
String.format("Lot %d: Detected %s",
|
||||||
imageRecord.lotId(),
|
image.lotId(),
|
||||||
String.join(", ", labels)),
|
String.join(", ", labels)),
|
||||||
"Objects Detected",
|
"Objects Detected",
|
||||||
0
|
0
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Rate limiting
|
// Rate limiting (lighter since no network I/O)
|
||||||
Thread.sleep(500);
|
Thread.sleep(100);
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
LOG.warnf(" ⚠️ Failed to process image: %s", e.getMessage());
|
LOG.warnf(" ⚠️ Failed to process image: %s", e.getMessage());
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ public class TroostwijkMonitor {
|
|||||||
db = new DatabaseService(databasePath);
|
db = new DatabaseService(databasePath);
|
||||||
notifier = new NotificationService(notificationConfig);
|
notifier = new NotificationService(notificationConfig);
|
||||||
detector = new ObjectDetectionService(yoloCfgPath, yoloWeightsPath, classNamesPath);
|
detector = new ObjectDetectionService(yoloCfgPath, yoloWeightsPath, classNamesPath);
|
||||||
imageProcessor = new ImageProcessingService(db, detector, httpClient);
|
imageProcessor = new ImageProcessingService(db, detector);
|
||||||
|
|
||||||
db.ensureSchema();
|
db.ensureSchema();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -42,8 +42,7 @@ public class WorkflowOrchestrator {
|
|||||||
|
|
||||||
this.notifier = new NotificationService(notificationConfig);
|
this.notifier = new NotificationService(notificationConfig);
|
||||||
this.detector = new ObjectDetectionService(yoloCfg, yoloWeights, yoloClasses);
|
this.detector = new ObjectDetectionService(yoloCfg, yoloWeights, yoloClasses);
|
||||||
var httpClient = new RateLimitedHttpClient();
|
this.imageProcessor = new ImageProcessingService(db, detector);
|
||||||
this.imageProcessor = new ImageProcessingService(db, detector, httpClient);
|
|
||||||
|
|
||||||
this.monitor = new TroostwijkMonitor(databasePath, notificationConfig,
|
this.monitor = new TroostwijkMonitor(databasePath, notificationConfig,
|
||||||
yoloCfg, yoloWeights, yoloClasses);
|
yoloCfg, yoloWeights, yoloClasses);
|
||||||
@@ -100,9 +99,9 @@ public class WorkflowOrchestrator {
|
|||||||
var lots = db.importLotsFromScraper();
|
var lots = db.importLotsFromScraper();
|
||||||
log.info(" → Imported {} lots", lots.size());
|
log.info(" → Imported {} lots", lots.size());
|
||||||
|
|
||||||
// Import image URLs
|
// Check for images needing detection
|
||||||
var images = db.getUnprocessedImagesFromScraper();
|
var images = db.getImagesNeedingDetection();
|
||||||
log.info(" → Found {} unprocessed images", images.size());
|
log.info(" → Found {} images needing detection", images.size());
|
||||||
|
|
||||||
var duration = System.currentTimeMillis() - start;
|
var duration = System.currentTimeMillis() - start;
|
||||||
log.info(" ✓ Scraper import completed in {}ms\n", duration);
|
log.info(" ✓ Scraper import completed in {}ms\n", duration);
|
||||||
@@ -127,7 +126,7 @@ public class WorkflowOrchestrator {
|
|||||||
/**
|
/**
|
||||||
* Workflow 2: Process Pending Images
|
* Workflow 2: Process Pending Images
|
||||||
* Frequency: Every 1 hour
|
* Frequency: Every 1 hour
|
||||||
* Purpose: Download images and run object detection
|
* Purpose: Run object detection on images already downloaded by scraper
|
||||||
*/
|
*/
|
||||||
private void scheduleImageProcessing() {
|
private void scheduleImageProcessing() {
|
||||||
scheduler.scheduleAtFixedRate(() -> {
|
scheduler.scheduleAtFixedRate(() -> {
|
||||||
@@ -135,55 +134,45 @@ public class WorkflowOrchestrator {
|
|||||||
log.info("🖼️ [WORKFLOW 2] Processing pending images...");
|
log.info("🖼️ [WORKFLOW 2] Processing pending images...");
|
||||||
var start = System.currentTimeMillis();
|
var start = System.currentTimeMillis();
|
||||||
|
|
||||||
// Get unprocessed images
|
// Get images that have been downloaded but need object detection
|
||||||
var unprocessedImages = db.getUnprocessedImagesFromScraper();
|
var pendingImages = db.getImagesNeedingDetection();
|
||||||
|
|
||||||
if (unprocessedImages.isEmpty()) {
|
if (pendingImages.isEmpty()) {
|
||||||
log.info(" → No pending images to process\n");
|
log.info(" → No pending images to process\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
log.info(" → Processing {} images", unprocessedImages.size());
|
log.info(" → Processing {} images", pendingImages.size());
|
||||||
|
|
||||||
var processed = 0;
|
var processed = 0;
|
||||||
var detected = 0;
|
var detected = 0;
|
||||||
|
|
||||||
for (var imageRecord : unprocessedImages) {
|
for (var image : pendingImages) {
|
||||||
try {
|
try {
|
||||||
// Download image
|
// Run object detection on already-downloaded image
|
||||||
var filePath = imageProcessor.downloadImage(
|
if (imageProcessor.processImage(image.id(), image.filePath(), image.lotId())) {
|
||||||
imageRecord.url(),
|
|
||||||
imageRecord.saleId(),
|
|
||||||
imageRecord.lotId()
|
|
||||||
);
|
|
||||||
|
|
||||||
if (filePath != null) {
|
|
||||||
// Run object detection
|
|
||||||
var labels = detector.detectObjects(filePath);
|
|
||||||
|
|
||||||
// Save to database
|
|
||||||
db.insertImage(imageRecord.lotId(), imageRecord.url(),
|
|
||||||
filePath, labels);
|
|
||||||
|
|
||||||
processed++;
|
processed++;
|
||||||
if (!labels.isEmpty()) {
|
|
||||||
|
// Check if objects were detected
|
||||||
|
var labels = db.getImageLabels(image.id());
|
||||||
|
if (labels != null && !labels.isEmpty()) {
|
||||||
detected++;
|
detected++;
|
||||||
|
|
||||||
// Send notification for interesting detections
|
// Send notification for interesting detections
|
||||||
if (labels.size() >= 3) {
|
if (labels.size() >= 3) {
|
||||||
notifier.sendNotification(
|
notifier.sendNotification(
|
||||||
String.format("Lot %d: Detected %s",
|
String.format("Lot %d: Detected %s",
|
||||||
imageRecord.lotId(),
|
image.lotId(),
|
||||||
String.join(", ", labels)),
|
String.join(", ", labels)),
|
||||||
"Objects Detected",
|
"Objects Detected",
|
||||||
0
|
0
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Rate limiting
|
// Rate limiting (lighter since no network I/O)
|
||||||
Thread.sleep(500);
|
Thread.sleep(100);
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.info(" ⚠\uFE0F Failed to process image: {}", e.getMessage());
|
log.info(" ⚠\uFE0F Failed to process image: {}", e.getMessage());
|
||||||
|
|||||||
Reference in New Issue
Block a user