This commit is contained in:
Tour
2025-12-03 15:32:34 +01:00
parent 815d6a9a4a
commit aef7a3aa30
10 changed files with 533 additions and 1350 deletions

View File

@@ -1,24 +1,19 @@
package com.auction; package com.auction;
import java.time.LocalDateTime; import java.time.LocalDateTime;
/** /**
* Represents auction metadata (veiling informatie) * Represents auction metadata (veiling informatie)
* Data typically populated by the external scraper process
*/ */
public final class AuctionInfo { public record AuctionInfo(
int auctionId, // Unique auction ID (from URL)
public int auctionId; // Unique auction ID (from URL) String title, // Auction title
public String title; // Auction title String location, // Location (e.g., "Amsterdam, NL")
public String location; // Location (e.g., "Amsterdam, NL") String city, // City name
public String city; // City name String country, // Country code (e.g., "NL")
public String country; // Country code (e.g., "NL") String url, // Full auction URL
public String url; // Full auction URL String type, // Auction type (A1 or A7)
public String type; // Auction type (A1 or A7) int lotCount, // Number of lots/kavels
public int lotCount; // Number of lots/kavels LocalDateTime closingTime // Closing time if available
public LocalDateTime closingTime; // Closing time if available ) {}
@Override
public String toString() {
return String.format("Auction{id=%d, type=%s, title='%s', location='%s', lots=%d, url='%s'}",
auctionId, type, title, location, lotCount, url);
}
}

View File

@@ -1,110 +1,112 @@
package com.auction; package com.auction;
import java.sql.Connection;
import java.sql.DriverManager; import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException; import java.sql.SQLException;
import java.sql.Statement;
import java.time.Instant; import java.time.Instant;
import java.time.LocalDateTime; import java.time.LocalDateTime;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
/** /**
* Service for persisting auctions, lots, images, and object labels into * Service for persisting auctions, lots, and images into a SQLite database.
* a SQLite database. Uses the Xerial JDBC driver which connects to * Data is typically populated by an external scraper process;
* SQLite via a URL of the form "jdbc:sqlite:path_to_file"【329850066306528†L40-L63】. * this service enriches it with image processing and monitoring.
*/ */
public class DatabaseService { public class DatabaseService {
private final String url; private final String url;
DatabaseService(String dbPath) { DatabaseService(String dbPath) {
this.url = "jdbc:sqlite:" + dbPath; this.url = "jdbc:sqlite:" + dbPath;
} }
/** /**
* Creates tables if they do not already exist. The schema includes * Creates tables if they do not already exist.
* tables for auctions, lots, images, and object labels. This method is * Schema supports data from external scraper and adds image processing results.
* idempotent; it can be called multiple times.
*/ */
void ensureSchema() throws SQLException { void ensureSchema() throws SQLException {
try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) { try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) {
// Auctions table (veilingen) // Auctions table (populated by external scraper)
stmt.execute("CREATE TABLE IF NOT EXISTS auctions (" stmt.execute("""
+ "auction_id INTEGER PRIMARY KEY," CREATE TABLE IF NOT EXISTS auctions (
+ "title TEXT NOT NULL," auction_id INTEGER PRIMARY KEY,
+ "location TEXT," title TEXT NOT NULL,
+ "city TEXT," location TEXT,
+ "country TEXT," city TEXT,
+ "url TEXT NOT NULL," country TEXT,
+ "type TEXT," url TEXT NOT NULL,
+ "lot_count INTEGER DEFAULT 0," type TEXT,
+ "closing_time TEXT," lot_count INTEGER DEFAULT 0,
+ "discovered_at INTEGER" // Unix timestamp closing_time TEXT,
+ ")"); discovered_at INTEGER
)""");
// Sales table (legacy - keep for compatibility) // Lots table (populated by external scraper)
stmt.execute("CREATE TABLE IF NOT EXISTS sales (" stmt.execute("""
+ "sale_id INTEGER PRIMARY KEY," CREATE TABLE IF NOT EXISTS lots (
+ "title TEXT," lot_id INTEGER PRIMARY KEY,
+ "location TEXT," sale_id INTEGER,
+ "closing_time TEXT" title TEXT,
+ ")"); description TEXT,
manufacturer TEXT,
type TEXT,
year INTEGER,
category TEXT,
current_bid REAL,
currency TEXT,
url TEXT,
closing_time TEXT,
closing_notified INTEGER DEFAULT 0,
FOREIGN KEY (sale_id) REFERENCES auctions(auction_id)
)""");
// Lots table // Images table (populated by this process)
stmt.execute("CREATE TABLE IF NOT EXISTS lots (" stmt.execute("""
+ "lot_id INTEGER PRIMARY KEY," CREATE TABLE IF NOT EXISTS images (
+ "sale_id INTEGER," id INTEGER PRIMARY KEY AUTOINCREMENT,
+ "title TEXT," lot_id INTEGER,
+ "description TEXT," url TEXT,
+ "manufacturer TEXT," file_path TEXT,
+ "type TEXT," labels TEXT,
+ "year INTEGER," processed_at INTEGER,
+ "category TEXT," FOREIGN KEY (lot_id) REFERENCES lots(lot_id)
+ "current_bid REAL," )""");
+ "currency TEXT,"
+ "url TEXT,"
+ "closing_time TEXT,"
+ "closing_notified INTEGER DEFAULT 0,"
+ "FOREIGN KEY (sale_id) REFERENCES auctions(auction_id)"
+ ")");
// Images table // Indexes for performance
stmt.execute("CREATE TABLE IF NOT EXISTS images ("
+ "id INTEGER PRIMARY KEY AUTOINCREMENT,"
+ "lot_id INTEGER,"
+ "url TEXT,"
+ "file_path TEXT,"
+ "labels TEXT,"
+ "FOREIGN KEY (lot_id) REFERENCES lots(lot_id)"
+ ")");
// Create indexes for better query performance
stmt.execute("CREATE INDEX IF NOT EXISTS idx_auctions_country ON auctions(country)"); stmt.execute("CREATE INDEX IF NOT EXISTS idx_auctions_country ON auctions(country)");
stmt.execute("CREATE INDEX IF NOT EXISTS idx_lots_sale_id ON lots(sale_id)"); stmt.execute("CREATE INDEX IF NOT EXISTS idx_lots_sale_id ON lots(sale_id)");
stmt.execute("CREATE INDEX IF NOT EXISTS idx_images_lot_id ON images(lot_id)");
} }
} }
/** /**
* Inserts or updates an auction record * Inserts or updates an auction record (typically called by external scraper)
*/ */
synchronized void upsertAuction(AuctionInfo auction) throws SQLException { synchronized void upsertAuction(AuctionInfo auction) throws SQLException {
var sql = "INSERT INTO auctions (auction_id, title, location, city, country, url, type, lot_count, closing_time, discovered_at)" var sql = """
+ " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" INSERT INTO auctions (auction_id, title, location, city, country, url, type, lot_count, closing_time, discovered_at)
+ " ON CONFLICT(auction_id) DO UPDATE SET " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+ "title = excluded.title, location = excluded.location, city = excluded.city, " ON CONFLICT(auction_id) DO UPDATE SET
+ "country = excluded.country, url = excluded.url, type = excluded.type, " title = excluded.title,
+ "lot_count = excluded.lot_count, closing_time = excluded.closing_time"; location = excluded.location,
city = excluded.city,
country = excluded.country,
url = excluded.url,
type = excluded.type,
lot_count = excluded.lot_count,
closing_time = excluded.closing_time
""";
try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(sql)) { try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(sql)) {
ps.setInt(1, auction.auctionId); ps.setInt(1, auction.auctionId());
ps.setString(2, auction.title); ps.setString(2, auction.title());
ps.setString(3, auction.location); ps.setString(3, auction.location());
ps.setString(4, auction.city); ps.setString(4, auction.city());
ps.setString(5, auction.country); ps.setString(5, auction.country());
ps.setString(6, auction.url); ps.setString(6, auction.url());
ps.setString(7, auction.type); ps.setString(7, auction.type());
ps.setInt(8, auction.lotCount); ps.setInt(8, auction.lotCount());
ps.setString(9, auction.closingTime != null ? auction.closingTime.toString() : null); ps.setString(9, auction.closingTime() != null ? auction.closingTime().toString() : null);
ps.setLong(10, Instant.now().getEpochSecond()); ps.setLong(10, Instant.now().getEpochSecond());
ps.executeUpdate(); ps.executeUpdate();
} }
@@ -120,20 +122,20 @@ public class DatabaseService {
try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) { try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) {
var rs = stmt.executeQuery(sql); var rs = stmt.executeQuery(sql);
while (rs.next()) { while (rs.next()) {
var auction = new AuctionInfo(); var closingStr = rs.getString("closing_time");
auction.auctionId = rs.getInt("auction_id"); var closing = closingStr != null ? LocalDateTime.parse(closingStr) : null;
auction.title = rs.getString("title");
auction.location = rs.getString("location"); auctions.add(new AuctionInfo(
auction.city = rs.getString("city"); rs.getInt("auction_id"),
auction.country = rs.getString("country"); rs.getString("title"),
auction.url = rs.getString("url"); rs.getString("location"),
auction.type = rs.getString("type"); rs.getString("city"),
auction.lotCount = rs.getInt("lot_count"); rs.getString("country"),
var closing = rs.getString("closing_time"); rs.getString("url"),
if (closing != null) { rs.getString("type"),
auction.closingTime = LocalDateTime.parse(closing); rs.getInt("lot_count"),
} closing
auctions.add(auction); ));
} }
} }
return auctions; return auctions;
@@ -151,119 +153,145 @@ public class DatabaseService {
ps.setString(1, countryCode); ps.setString(1, countryCode);
var rs = ps.executeQuery(); var rs = ps.executeQuery();
while (rs.next()) { while (rs.next()) {
var auction = new AuctionInfo(); var closingStr = rs.getString("closing_time");
auction.auctionId = rs.getInt("auction_id"); var closing = closingStr != null ? LocalDateTime.parse(closingStr) : null;
auction.title = rs.getString("title");
auction.location = rs.getString("location"); auctions.add(new AuctionInfo(
auction.city = rs.getString("city"); rs.getInt("auction_id"),
auction.country = rs.getString("country"); rs.getString("title"),
auction.url = rs.getString("url"); rs.getString("location"),
auction.type = rs.getString("type"); rs.getString("city"),
auction.lotCount = rs.getInt("lot_count"); rs.getString("country"),
var closing = rs.getString("closing_time"); rs.getString("url"),
if (closing != null) { rs.getString("type"),
auction.closingTime = LocalDateTime.parse(closing); rs.getInt("lot_count"),
} closing
auctions.add(auction); ));
} }
} }
return auctions; return auctions;
} }
/** /**
* Inserts or updates a lot record. Uses INSERT OR REPLACE to * Inserts or updates a lot record (typically called by external scraper)
* implement upsert semantics so that existing rows are replaced.
*/ */
synchronized void upsertLot(Lot lot) throws SQLException { synchronized void upsertLot(Lot lot) throws SQLException {
var sql = "INSERT INTO lots (lot_id, sale_id, title, description, manufacturer, type, year, category, current_bid, currency, url, closing_time, closing_notified)" var sql = """
+ " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" INSERT INTO lots (lot_id, sale_id, title, description, manufacturer, type, year, category, current_bid, currency, url, closing_time, closing_notified)
+ " ON CONFLICT(lot_id) DO UPDATE SET " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+ "sale_id = excluded.sale_id, title = excluded.title, description = excluded.description, " ON CONFLICT(lot_id) DO UPDATE SET
+ "manufacturer = excluded.manufacturer, type = excluded.type, year = excluded.year, category = excluded.category, " sale_id = excluded.sale_id,
+ "current_bid = excluded.current_bid, currency = excluded.currency, url = excluded.url, closing_time = excluded.closing_time"; title = excluded.title,
description = excluded.description,
manufacturer = excluded.manufacturer,
type = excluded.type,
year = excluded.year,
category = excluded.category,
current_bid = excluded.current_bid,
currency = excluded.currency,
url = excluded.url,
closing_time = excluded.closing_time
""";
try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(sql)) { try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(sql)) {
ps.setInt(1, lot.lotId); ps.setInt(1, lot.lotId());
ps.setInt(2, lot.saleId); ps.setInt(2, lot.saleId());
ps.setString(3, lot.title); ps.setString(3, lot.title());
ps.setString(4, lot.description); ps.setString(4, lot.description());
ps.setString(5, lot.manufacturer); ps.setString(5, lot.manufacturer());
ps.setString(6, lot.type); ps.setString(6, lot.type());
ps.setInt(7, lot.year); ps.setInt(7, lot.year());
ps.setString(8, lot.category); ps.setString(8, lot.category());
ps.setDouble(9, lot.currentBid); ps.setDouble(9, lot.currentBid());
ps.setString(10, lot.currency); ps.setString(10, lot.currency());
ps.setString(11, lot.url); ps.setString(11, lot.url());
ps.setString(12, lot.closingTime != null ? lot.closingTime.toString() : null); ps.setString(12, lot.closingTime() != null ? lot.closingTime().toString() : null);
ps.setInt(13, lot.closingNotified ? 1 : 0); ps.setInt(13, lot.closingNotified() ? 1 : 0);
ps.executeUpdate(); ps.executeUpdate();
} }
} }
/** /**
* Inserts a new image record. Each image is associated with a lot and * Inserts a new image record with object detection labels
* stores both the original URL and the local file path. Detected
* labels are stored as a comma separated string.
*/ */
synchronized void insertImage(int lotId, String url, String filePath, List<String> labels) throws SQLException { synchronized void insertImage(int lotId, String url, String filePath, List<String> labels) throws SQLException {
var sql = "INSERT INTO images (lot_id, url, file_path, labels) VALUES (?, ?, ?, ?)"; var sql = "INSERT INTO images (lot_id, url, file_path, labels, processed_at) VALUES (?, ?, ?, ?, ?)";
try (var conn = DriverManager.getConnection(this.url); var ps = conn.prepareStatement(sql)) { try (var conn = DriverManager.getConnection(this.url); var ps = conn.prepareStatement(sql)) {
ps.setInt(1, lotId); ps.setInt(1, lotId);
ps.setString(2, url); ps.setString(2, url);
ps.setString(3, filePath); ps.setString(3, filePath);
ps.setString(4, String.join(",", labels)); ps.setString(4, String.join(",", labels));
ps.setLong(5, Instant.now().getEpochSecond());
ps.executeUpdate(); ps.executeUpdate();
} }
} }
/** /**
* Retrieves all lots that are still active (i.e., have a closing time * Retrieves images for a specific lot
* in the future or unknown). Only these lots need to be monitored. */
synchronized List<ImageRecord> getImagesForLot(int lotId) throws SQLException {
List<ImageRecord> images = new ArrayList<>();
var sql = "SELECT id, lot_id, url, file_path, labels FROM images WHERE lot_id = ?";
try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(sql)) {
ps.setInt(1, lotId);
var rs = ps.executeQuery();
while (rs.next()) {
images.add(new ImageRecord(
rs.getInt("id"),
rs.getInt("lot_id"),
rs.getString("url"),
rs.getString("file_path"),
rs.getString("labels")
));
}
}
return images;
}
/**
* Retrieves all lots that are active and need monitoring
*/ */
synchronized List<Lot> getActiveLots() throws SQLException { synchronized List<Lot> getActiveLots() throws SQLException {
List<Lot> list = new ArrayList<>(); List<Lot> list = new ArrayList<>();
var sql = "SELECT lot_id, sale_id, current_bid, currency, closing_time, closing_notified FROM lots"; var sql = "SELECT lot_id, sale_id, title, description, manufacturer, type, year, category, " +
"current_bid, currency, url, closing_time, closing_notified FROM lots";
try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) { try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) {
var rs = stmt.executeQuery(sql); var rs = stmt.executeQuery(sql);
while (rs.next()) { while (rs.next()) {
var lot = new Lot(); var closingStr = rs.getString("closing_time");
lot.lotId = rs.getInt("lot_id"); var closing = closingStr != null ? LocalDateTime.parse(closingStr) : null;
lot.saleId = rs.getInt("sale_id");
lot.currentBid = rs.getDouble("current_bid"); list.add(new Lot(
lot.currency = rs.getString("currency"); rs.getInt("sale_id"),
var closing = rs.getString("closing_time"); rs.getInt("lot_id"),
lot.closingNotified = rs.getInt("closing_notified") != 0; rs.getString("title"),
if (closing != null) { rs.getString("description"),
lot.closingTime = LocalDateTime.parse(closing); rs.getString("manufacturer"),
} rs.getString("type"),
list.add(lot); rs.getInt("year"),
rs.getString("category"),
rs.getDouble("current_bid"),
rs.getString("currency"),
rs.getString("url"),
closing,
rs.getInt("closing_notified") != 0
));
} }
} }
return list; return list;
} }
/** /**
* Retrieves all lots from the database. * Retrieves all lots from the database
*/ */
synchronized List<Lot> getAllLots() throws SQLException { synchronized List<Lot> getAllLots() throws SQLException {
List<Lot> list = new ArrayList<>(); return getActiveLots();
var sql = "SELECT lot_id, sale_id, title, current_bid, currency FROM lots";
try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) {
var rs = stmt.executeQuery(sql);
while (rs.next()) {
var lot = new Lot();
lot.lotId = rs.getInt("lot_id");
lot.saleId = rs.getInt("sale_id");
lot.title = rs.getString("title");
lot.currentBid = rs.getDouble("current_bid");
lot.currency = rs.getString("currency");
list.add(lot);
}
}
return list;
} }
/** /**
* Gets the total number of images in the database. * Gets the total number of images in the database
*/ */
synchronized int getImageCount() throws SQLException { synchronized int getImageCount() throws SQLException {
var sql = "SELECT COUNT(*) as count FROM images"; var sql = "SELECT COUNT(*) as count FROM images";
@@ -277,27 +305,31 @@ public class DatabaseService {
} }
/** /**
* Updates the current bid of a lot after a bid refresh. * Updates the current bid of a lot (used by monitoring service)
*/ */
synchronized void updateLotCurrentBid(Lot lot) throws SQLException { synchronized void updateLotCurrentBid(Lot lot) throws SQLException {
try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement( try (var conn = DriverManager.getConnection(url);
"UPDATE lots SET current_bid = ? WHERE lot_id = ?")) { var ps = conn.prepareStatement("UPDATE lots SET current_bid = ? WHERE lot_id = ?")) {
ps.setDouble(1, lot.currentBid); ps.setDouble(1, lot.currentBid());
ps.setInt(2, lot.lotId); ps.setInt(2, lot.lotId());
ps.executeUpdate(); ps.executeUpdate();
} }
} }
/** /**
* Updates the closingNotified flag of a lot (set to 1 when we have * Updates the closingNotified flag of a lot
* warned the user about its imminent closure).
*/ */
synchronized void updateLotNotificationFlags(Lot lot) throws SQLException { synchronized void updateLotNotificationFlags(Lot lot) throws SQLException {
try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement( try (var conn = DriverManager.getConnection(url);
"UPDATE lots SET closing_notified = ? WHERE lot_id = ?")) { var ps = conn.prepareStatement("UPDATE lots SET closing_notified = ? WHERE lot_id = ?")) {
ps.setInt(1, lot.closingNotified ? 1 : 0); ps.setInt(1, lot.closingNotified() ? 1 : 0);
ps.setInt(2, lot.lotId); ps.setInt(2, lot.lotId());
ps.executeUpdate(); ps.executeUpdate();
} }
} }
/**
* Simple record for image data
*/
record ImageRecord(int id, int lotId, String url, String filePath, String labels) {}
} }

View File

@@ -1,29 +1,30 @@
package com.auction; package com.auction;
import java.time.Duration;
import java.time.LocalDateTime; import java.time.LocalDateTime;
/** /**
* Simple POJO representing a lot (kavel) in an auction. It keeps track * Represents a lot (kavel) in an auction.
* of the sale it belongs to, current bid and closing time. The method * Data typically populated by the external scraper process.
* minutesUntilClose computes how many minutes remain until the lot closes. * This project enriches the data with image analysis and monitoring.
*/ */
final class Lot { record Lot(
int saleId,
int saleId; int lotId,
int lotId; String title,
String title; String description,
String description; String manufacturer,
String manufacturer; String type,
String type; int year,
int year; String category,
String category; double currentBid,
double currentBid; String currency,
String currency; String url,
String url; LocalDateTime closingTime,
LocalDateTime closingTime; // null if unknown boolean closingNotified
boolean closingNotified; ) {
long minutesUntilClose() { long minutesUntilClose() {
if (closingTime == null) return Long.MAX_VALUE; if (closingTime == null) return Long.MAX_VALUE;
return java.time.Duration.between(LocalDateTime.now(), closingTime).toMinutes(); return Duration.between(LocalDateTime.now(), closingTime).toMinutes();
} }
} }

View File

@@ -1,82 +1,93 @@
package com.auction; package com.auction;
import org.opencv.core.Core; import org.opencv.core.Core;
import java.util.List;
public class Main {
public static void main2(String[] args) {
// If arguments are passed, this is likely a one-off command via dokku run
// Just exit immediately to allow the command to run
if (args.length > 0) {
IO.println("Command mode - exiting to allow shell commands");
return;
}
IO.println("Starting Troostwijk Auction Scraper..."); /**
IO.println("Container is running and healthy."); * Main entry point for Troostwijk Auction Monitor.
*
// Keep container alive * ARCHITECTURE:
try { * This project focuses on:
Thread.sleep(Long.MAX_VALUE); * 1. Image processing and object detection
} catch (InterruptedException e) { * 2. Bid monitoring and notifications
Thread.currentThread().interrupt(); * 3. Data enrichment
IO.println("Container interrupted, exiting."); *
} * Auction/Lot scraping is handled by the external ARCHITECTURE-TROOSTWIJK-SCRAPER process.
} * That process populates the auctions and lots tables in the shared database.
/** * This process reads from those tables and enriches them with:
* Entry point. Configure database location, notification settings, and * - Downloaded images
* YOLO model paths here before running. Once started the scraper * - Object detection labels
* discovers Dutch auctions, scrapes lots, and begins monitoring. * - Bid monitoring
* - Notifications
*/ */
public class Main {
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
IO.println("=== Troostwijk Auction Scraper ===\n"); Console.println("=== Troostwijk Auction Monitor ===\n");
// Configuration parameters (replace with your own values) // Configuration
String databaseFile = "troostwijk.db"; String databaseFile = System.getenv().getOrDefault("DATABASE_FILE", "troostwijk.db");
// Notification configuration - choose one:
// Option 1: Desktop notifications only (free, no setup required)
String notificationConfig = System.getenv().getOrDefault("NOTIFICATION_CONFIG", "desktop"); String notificationConfig = System.getenv().getOrDefault("NOTIFICATION_CONFIG", "desktop");
// Option 2: Desktop + Email via Gmail (free, requires Gmail app password) // YOLO model paths (optional - monitor works without object detection)
// Format: "smtp:username:appPassword:toEmail"
// Example: "smtp:your.email@gmail.com:abcd1234efgh5678:recipient@example.com"
// Get app password: Google Account > Security > 2-Step Verification > App passwords
// YOLO model paths (optional - scraper works without object detection)
String yoloCfg = "models/yolov4.cfg"; String yoloCfg = "models/yolov4.cfg";
String yoloWeights = "models/yolov4.weights"; String yoloWeights = "models/yolov4.weights";
String yoloClasses = "models/coco.names"; String yoloClasses = "models/coco.names";
// Load native OpenCV library // Load native OpenCV library (only if models exist)
try {
System.loadLibrary(Core.NATIVE_LIBRARY_NAME); System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
Console.println("✓ OpenCV loaded");
} catch (UnsatisfiedLinkError e) {
Console.println("⚠️ OpenCV not available - image detection disabled");
}
IO.println("Initializing scraper..."); Console.println("Initializing monitor...");
TroostwijkScraper scraper = new TroostwijkScraper(databaseFile, notificationConfig, "", var monitor = new TroostwijkMonitor(databaseFile, notificationConfig,
yoloCfg, yoloWeights, yoloClasses); yoloCfg, yoloWeights, yoloClasses);
// Step 1: Discover auctions in NL // Show current database state
IO.println("\n[1/3] Discovering Dutch auctions..."); Console.println("\n📊 Current Database State:");
List<Integer> auctions = scraper.discoverDutchAuctions(); monitor.printDatabaseStats();
IO.println("✓ Found " + auctions.size() + " auctions: " + auctions);
// Step 2: Fetch lots for each auction // Check for pending image processing
IO.println("\n[2/3] Fetching lot details..."); Console.println("\n[1/2] Processing images...");
int totalAuctions = auctions.size(); monitor.processPendingImages();
int currentAuction = 0;
for (int saleId : auctions) { // Start monitoring service
currentAuction++; Console.println("\n[2/2] Starting bid monitoring...");
IO.println(" [Page " + currentAuction + "] Fetching auctions..."); monitor.scheduleMonitoring();
IO.println(" [" + currentAuction + "/" + totalAuctions + "] Processing sale " + saleId + "...");
scraper.fetchLotsForSale(saleId); Console.println("\n✓ Monitor is running. Press Ctrl+C to stop.\n");
Console.println("NOTE: This process expects auction/lot data from the external scraper.");
Console.println(" Make sure ARCHITECTURE-TROOSTWIJK-SCRAPER is running and populating the database.\n");
// Keep application alive
try {
Thread.sleep(Long.MAX_VALUE);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
Console.println("Monitor interrupted, exiting.");
}
} }
// Show database summary /**
IO.println("\n📊 Database Summary:"); * Alternative entry point for container environments.
scraper.printDatabaseStats(); * Simply keeps the container alive for manual commands.
*/
public static void main2(String[] args) {
if (args.length > 0) {
Console.println("Command mode - exiting to allow shell commands");
return;
}
// Step 3: Start monitoring bids and closures Console.println("Troostwijk Monitor container is running and healthy.");
IO.println("\n[3/3] Starting monitoring service..."); Console.println("Use 'docker exec' or 'dokku run' to execute commands.");
scraper.scheduleMonitoring();
IO.println("✓ Monitoring active. Press Ctrl+C to stop.\n"); try {
Thread.sleep(Long.MAX_VALUE);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
Console.println("Container interrupted, exiting.");
}
} }
} }

View File

@@ -103,9 +103,9 @@ class NotificationService {
Thread.sleep(2000); Thread.sleep(2000);
tray.remove(trayIcon); tray.remove(trayIcon);
IO.println("Desktop notification sent: " + title); Console.println("Desktop notification sent: " + title);
} else { } else {
IO.println("Desktop notifications not supported, logging: " + title + " - " + message); Console.println("Desktop notifications not supported, logging: " + title + " - " + message);
} }
} catch (Exception e) { } catch (Exception e) {
System.err.println("Desktop notification failed: " + e.getMessage()); System.err.println("Desktop notification failed: " + e.getMessage());
@@ -147,7 +147,7 @@ class NotificationService {
} }
Transport.send(msg); Transport.send(msg);
IO.println("Email notification sent: " + title); Console.println("Email notification sent: " + title);
} catch (Exception e) { } catch (Exception e) {
System.err.println("Email notification failed: " + e.getMessage()); System.err.println("Email notification failed: " + e.getMessage());

View File

@@ -38,12 +38,12 @@ class ObjectDetectionService {
var classNamesFile = Paths.get(classNamesPath); var classNamesFile = Paths.get(classNamesPath);
if (!Files.exists(cfgFile) || !Files.exists(weightsFile) || !Files.exists(classNamesFile)) { if (!Files.exists(cfgFile) || !Files.exists(weightsFile) || !Files.exists(classNamesFile)) {
IO.println("⚠️ Object detection disabled: YOLO model files not found"); Console.println("⚠️ Object detection disabled: YOLO model files not found");
IO.println(" Expected files:"); Console.println(" Expected files:");
IO.println(" - " + cfgPath); Console.println(" - " + cfgPath);
IO.println(" - " + weightsPath); Console.println(" - " + weightsPath);
IO.println(" - " + classNamesPath); Console.println(" - " + classNamesPath);
IO.println(" Scraper will continue without image analysis."); Console.println(" Scraper will continue without image analysis.");
this.enabled = false; this.enabled = false;
this.net = null; this.net = null;
this.classNames = new ArrayList<>(); this.classNames = new ArrayList<>();
@@ -58,7 +58,7 @@ class ObjectDetectionService {
// Load class names (one per line) // Load class names (one per line)
this.classNames = Files.readAllLines(classNamesFile); this.classNames = Files.readAllLines(classNamesFile);
this.enabled = true; this.enabled = true;
IO.println("✓ Object detection enabled with YOLO"); Console.println("✓ Object detection enabled with YOLO");
} catch (Exception e) { } catch (Exception e) {
System.err.println("⚠️ Object detection disabled: " + e.getMessage()); System.err.println("⚠️ Object detection disabled: " + e.getMessage());
throw new IOException("Failed to initialize object detection", e); throw new IOException("Failed to initialize object detection", e);

View File

@@ -1,687 +0,0 @@
package com.auction;
/*
* TroostwijkScraper
*
* This example shows how you could build a Javabased scraper for the Dutch
* auctions on Troostwijk Auctions. The scraper uses a combination of
* HTTP requests and HTML parsing with the jsoup library to discover active
* auctions, calls Troostwijk's internal JSON API to fetch lot (kavel) data
* efficiently, writes the results into a local SQLite database, performs
* object detection on lot images using OpenCV's DNN module, and sends
* desktop/email notifications when bids change or lots are about to expire.
* The implementation uses well known open source libraries for each of these
* concerns. You can adjust the API endpoints and CSS selectors as
* Troostwijk's site evolves. The code is organised into small helper
* classes to make it easier to maintain.
*
* Dependencies (add these to your Maven/Gradle project):
*
* - org.jsoup:jsoup:1.17.2 HTML parser and HTTP client.
* - com.fasterxml.jackson.core:jackson-databind:2.17.0 JSON parsing.
* - org.xerial:sqlite-jdbc:3.45.1.0 SQLite JDBC driver.
* - com.sun.mail:javax.mail:1.6.2 JavaMail for email notifications (free).
* - org.openpnp:opencv:4.9.0-0 (with native libraries) OpenCV for image
* processing and object detection.
*
* Before running this program you must ensure that the native OpenCV
* binaries are on your library path (e.g. via -Djava.library.path).
* Desktop notifications work out of the box on Windows, macOS, and Linux.
* For email notifications, you need a Gmail account with an app password
* (free, requires 2FA enabled). See https://support.google.com/accounts/answer/185833
*
* The scraper performs four major tasks:
* 1. Discover all auctions located in the Netherlands.
* 2. For each auction, fetch all lots (kavels) including images and
* bidding information, and persist the data into SQLite tables.
* 3. Monitor bidding and closing times on a schedule and send desktop/email
* notifications when bids change or lots are about to expire.
* 4. Run object detection on downloaded lot images to automatically
* label objects using a YOLO model. The results are stored in the
* database for later search.
*/
import com.fasterxml.jackson.databind.ObjectMapper;
import com.microsoft.playwright.Browser;
import com.microsoft.playwright.BrowserType;
import com.microsoft.playwright.Page;
import com.microsoft.playwright.Playwright;
import com.microsoft.playwright.options.WaitUntilState;
import java.io.IOException;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
/**
* Main scraper class. It encapsulates the logic for scraping auctions,
* persisting data, scheduling updates, and performing object detection.
*/
public class TroostwijkScraper {
// Base URLs adjust these if Troostwijk changes their site structure
private static final String AUCTIONS_PAGE = "https://www.troostwijkauctions.com/auctions";
private static final String LOT_API = "https://api.troostwijkauctions.com/lot/7/list";
private static final String CACHE_DB_PATH = "cache/page_cache.db";
private static final long CACHE_EXPIRATION_HOURS = 24;
private static final int RATE_LIMIT_MS = 200;
// HTTP client used for API calls
private final HttpClient httpClient;
private final ObjectMapper objectMapper;
public final DatabaseService db;
private final NotificationService notifier;
private final ObjectDetectionService detector;
private final CacheDatabase cacheDb;
private final boolean useCache;
private Playwright playwright;
private Browser browser;
/**
* Constructor. Creates supporting services and ensures the database
* tables exist.
*
* @param databasePath Path to SQLite database file
* @param notificationConfig "desktop" for desktop only, or "smtp:user:pass:toEmail" for email
* @param unused Unused parameter (kept for compatibility)
* @param yoloCfgPath Path to YOLO configuration file
* @param yoloWeightsPath Path to YOLO weights file
* @param classNamesPath Path to file containing class names
*/
public TroostwijkScraper(String databasePath, String notificationConfig, String unused,
String yoloCfgPath, String yoloWeightsPath, String classNamesPath) throws SQLException, IOException {
this(databasePath, notificationConfig, unused, yoloCfgPath, yoloWeightsPath, classNamesPath, true);
}
/**
* Constructor with cache control.
*
* @param databasePath Path to SQLite database file
* @param notificationConfig "desktop" for desktop only, or "smtp:user:pass:toEmail" for email
* @param unused Unused parameter (kept for compatibility)
* @param yoloCfgPath Path to YOLO configuration file
* @param yoloWeightsPath Path to YOLO weights file
* @param classNamesPath Path to file containing class names
* @param useCache Enable page caching
*/
public TroostwijkScraper(String databasePath, String notificationConfig, String unused,
String yoloCfgPath, String yoloWeightsPath, String classNamesPath,
boolean useCache) throws SQLException, IOException {
this.httpClient = HttpClient.newHttpClient();
this.objectMapper = new ObjectMapper();
this.db = new DatabaseService(databasePath);
this.notifier = new NotificationService(notificationConfig, unused);
this.detector = new ObjectDetectionService(yoloCfgPath, yoloWeightsPath, classNamesPath);
this.useCache = useCache;
this.cacheDb = useCache ? new CacheDatabase(CACHE_DB_PATH) : null;
// initialize DB
db.ensureSchema();
if (useCache) {
cacheDb.initialize();
}
}
/**
* Initializes Playwright browser for JavaScript-rendered pages.
* Call this before using discoverDutchAuctions().
*/
public void initializeBrowser() {
if (playwright == null) {
IO.println("Initializing Playwright browser...");
this.playwright = Playwright.create();
this.browser = playwright.chromium().launch(new BrowserType.LaunchOptions()
.setHeadless(true)
.setArgs(Arrays.asList("--no-sandbox", "--disable-setuid-sandbox")));
IO.println("✓ Browser ready");
}
}
/**
* Closes browser and cache resources.
*/
public void close() {
if (browser != null) {
browser.close();
browser = null;
}
if (playwright != null) {
playwright.close();
playwright = null;
}
if (cacheDb != null) {
cacheDb.close();
}
}
/**
* Discovers all active Dutch auctions by crawling the auctions page.
*
* Uses Playwright to render JavaScript-heavy pages and extract auction data.
* Supports caching to avoid unnecessary page fetches. Filters auctions whose
* location contains ", NL" (indicating the Netherlands). Each auction link
* contains a unique sale ID in the format A1-xxxxx or A7-xxxxx.
*
* Auctions are saved to the database and can be retrieved with getDutchAuctions().
*
* @return a list of sale identifiers for auctions located in NL (legacy compatibility)
*/
public List<Integer> discoverDutchAuctions() {
Set<Integer> saleIds = new HashSet<>();
// Check if browser is initialized
if (browser == null) {
initializeBrowser();
}
var pageNumber = 1;
var hasMorePages = true;
IO.println("Starting Dutch auction discovery from " + AUCTIONS_PAGE);
while (hasMorePages) {
IO.println("\n[Page " + pageNumber + "] Fetching auctions...");
// Check cache first
var html = loadFromCache(pageNumber);
if (html != null) {
IO.println(" ✓ Loaded from cache");
} else {
// Fetch with Playwright
html = fetchPageWithPlaywright(pageNumber);
if (html == null || html.isEmpty()) {
IO.println(" ⚠️ Failed to fetch page, stopping pagination");
break;
}
IO.println(" ✓ Fetched from website");
// Save to cache
if (useCache) {
saveToCache(pageNumber, html);
}
// Rate limiting
try {
Thread.sleep(RATE_LIMIT_MS);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
break;
}
}
// Parse auctions from HTML (saves Dutch auctions to database)
var foundOnPage = parseAuctionsFromHtml(html, saleIds);
if (foundOnPage == 0) {
IO.println(" ⚠️ No Dutch auctions found on page, stopping pagination");
hasMorePages = false;
} else {
IO.println(" ✓ Found " + foundOnPage + " Dutch auctions");
pageNumber++;
}
}
IO.println("\n✓ Total Dutch auctions discovered: " + saleIds.size());
return new ArrayList<>(saleIds);
}
/**
* Fetches a single page using Playwright
*/
private String fetchPageWithPlaywright(int pageNumber) {
var url = pageNumber == 1
? AUCTIONS_PAGE
: AUCTIONS_PAGE + "?page=" + pageNumber;
try {
var page = browser.newPage();
// Set user agent
page.setExtraHTTPHeaders(Map.of(
"User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
));
// Navigate to page
page.navigate(url, new Page.NavigateOptions()
.setTimeout(30000)
.setWaitUntil(WaitUntilState.NETWORKIDLE));
// Wait for auction listings to appear
try {
page.waitForSelector("a[href^='/a/']", new Page.WaitForSelectorOptions()
.setTimeout(10000));
} catch (Exception e) {
// Continue even if selector not found
IO.println(" ⚠️ Auction selector not found");
}
// Get HTML content
var html = page.content();
page.close();
return html;
} catch (Exception e) {
System.err.println(" ⚠️ Playwright error: " + e.getMessage());
return null;
}
}
/**
* Parses auctions from HTML using JSoup and saves Dutch auctions to database.
* Uses proper HTML parsing instead of regex for more reliable extraction.
* @return number of Dutch auctions found on this page
*/
private int parseAuctionsFromHtml(String html, Set<Integer> saleIds) {
var foundCount = 0;
try {
var doc = org.jsoup.Jsoup.parse(html);
// Find all auction links (format: /a/title-A1-12345 or /a/title-A7-12345)
var auctionLinks = doc.select("a[href^='/a/']");
for (var link : auctionLinks) {
var href = link.attr("href");
// Extract auction ID from URL
var pattern = java.util.regex.Pattern.compile("/a/.*?-A([17])-(\\d+)");
var matcher = pattern.matcher(href);
if (!matcher.find()) {
continue;
}
var typeNum = matcher.group(1);
var auctionId = Integer.parseInt(matcher.group(2));
// Skip duplicates
if (saleIds.contains(auctionId)) {
continue;
}
// Extract auction info using JSoup
var auction = extractAuctionInfo(link, href, auctionId, "A" + typeNum);
// Only keep Dutch auctions
if (auction != null && "NL".equals(auction.country)) {
saleIds.add(auctionId);
foundCount++;
// Save to database
try {
db.upsertAuction(auction);
IO.println(" Found Dutch auction: " + auctionId + " - " + auction.title + " (" + auction.location + ")");
} catch (SQLException e) {
System.err.println(" Failed to save auction: " + e.getMessage());
}
}
}
} catch (Exception e) {
System.err.println(" Error parsing HTML: " + e.getMessage());
}
return foundCount;
}
/**
* Extracts auction information from a link element using JSoup
* This method intelligently parses the HTML structure to extract:
* - Title
* - Location (city and country)
* - Lot count (if available)
*/
private AuctionInfo extractAuctionInfo(org.jsoup.nodes.Element link, String href, int auctionId, String type) {
var auction = new AuctionInfo();
auction.auctionId = auctionId;
auction.type = type;
auction.url = "https://www.troostwijkauctions.com" + href;
// Extract title from href (convert kebab-case to title)
var titlePattern = java.util.regex.Pattern.compile("/a/(.+?)-A[17]-");
var titleMatcher = titlePattern.matcher(href);
if (titleMatcher.find()) {
var slug = titleMatcher.group(1);
auction.title = slug.replace("-", " ");
// Capitalize first letter
if (!auction.title.isEmpty()) {
auction.title = auction.title.substring(0, 1).toUpperCase() + auction.title.substring(1);
}
} else {
auction.title = "Unknown Auction";
}
// Try to find title in link text (more accurate)
var linkText = link.text();
if (!linkText.isEmpty() && !linkText.matches(".*\\d+.*")) {
// If link text doesn't contain numbers, it's likely the title
var parts = linkText.split(",|\\d+");
if (parts.length > 0 && parts[0].trim().length() > 5) {
auction.title = parts[0].trim();
}
}
// Extract location using JSoup selectors
// Look for <p> tags that contain location info
var locationElements = link.select("p");
for (var p : locationElements) {
var text = p.text();
// Pattern: "City, Country" or "City, Region, Country"
if (text.matches(".*[A-Z]{2}$")) {
// Ends with 2-letter country code
var countryCode = text.substring(text.length() - 2);
var cityPart = text.substring(0, text.length() - 2).trim();
// Remove trailing comma or whitespace
cityPart = cityPart.replaceAll("[,\\s]+$", "");
auction.country = countryCode;
auction.city = cityPart;
auction.location = cityPart + ", " + countryCode;
break;
}
}
// Fallback: check HTML content directly
if (auction.country == null) {
var html = link.html();
var locPattern = java.util.regex.Pattern.compile(
"([A-Za-z][A-Za-z\\s,\\-']+?)\\s*(?:<!--.*?-->)?\\s*</span>\\s*([A-Z]{2})(?![A-Za-z])");
var locMatcher = locPattern.matcher(html);
if (locMatcher.find()) {
var city = locMatcher.group(1).trim().replaceAll(",$", "");
var country = locMatcher.group(2);
auction.city = city;
auction.country = country;
auction.location = city + ", " + country;
}
}
// Extract lot count if available (kavels/lots)
var textElements = link.select("*");
for (var elem : textElements) {
var text = elem.ownText();
if (text.matches("\\d+\\s+(?:kavel|lot|item)s?.*")) {
var countPattern = java.util.regex.Pattern.compile("(\\d+)");
var countMatcher = countPattern.matcher(text);
if (countMatcher.find()) {
auction.lotCount = Integer.parseInt(countMatcher.group(1));
break;
}
}
}
return auction;
}
/**
* Loads cached HTML for a page
*/
private String loadFromCache(int pageNumber) {
if (!useCache || cacheDb == null) return null;
var url = pageNumber == 1
? AUCTIONS_PAGE
: AUCTIONS_PAGE + "?page=" + pageNumber;
return cacheDb.get(url);
}
/**
* Saves HTML to cache
*/
private void saveToCache(int pageNumber, String html) {
if (!useCache || cacheDb == null) return;
var url = pageNumber == 1
? AUCTIONS_PAGE
: AUCTIONS_PAGE + "?page=" + pageNumber;
cacheDb.put(url, html, CACHE_EXPIRATION_HOURS);
}
/**
* Retrieves all lots for a given sale ID using Troostwijk's internal JSON
* API. The API accepts parameters such as batchSize, offset, and saleID.
* A large batchSize returns many lots at once. We loop until no further
* results are returned. Each JSON result is mapped to our Lot domain
* object and persisted to the database.
*
* @param saleId the sale identifier
*/
public void fetchLotsForSale(int saleId) {
var batchSize = 200;
var offset = 0;
var more = true;
var totalLots = 0;
while (more) {
try {
var url = LOT_API + "?batchSize=" + batchSize
+ "&listType=7&offset=" + offset
+ "&sortOption=0&saleID=" + saleId
+ "&parentID=0&relationID=0&buildversion=201807311";
IO.println(" Fetching lots from API (offset=" + offset + ")...");
var request = HttpRequest.newBuilder()
.uri(URI.create(url))
.header("Accept", "application/json")
.header("User-Agent", "Mozilla/5.0")
.GET()
.build();
var response = httpClient.send(request, HttpResponse.BodyHandlers.ofString());
if (response.statusCode() != 200) {
System.err.println(" ⚠️ API call failed for sale " + saleId);
System.err.println(" Status: " + response.statusCode());
System.err.println(" Response: " + response.body().substring(0, Math.min(200, response.body().length())));
break;
}
var root = objectMapper.readTree(response.body());
var results = root.path("results");
if (!results.isArray() || results.isEmpty()) {
if (offset == 0) {
IO.println(" ⚠️ No lots found for sale " + saleId);
IO.println(" API Response: " + response.body().substring(0, Math.min(500, response.body().length())));
}
more = false;
break;
}
var lotsInBatch = results.size();
IO.println(" Found " + lotsInBatch + " lots in this batch");
for (var node : results) {
var lot = new Lot();
lot.saleId = saleId;
lot.lotId = node.path("lotID").asInt();
lot.title = node.path("t").asText();
lot.description = node.path("d").asText();
lot.manufacturer = node.path("mf").asText();
lot.type = node.path("typ").asText();
lot.year = node.path("yb").asInt();
lot.category = node.path("lc").asText();
// Current bid; field names may differ (e.g. currentBid or cb)
lot.currentBid = node.path("cb").asDouble();
lot.currency = node.path("cu").asText();
lot.url = "https://www.troostwijkauctions.com/nl" + node.path("url").asText();
// Save basic lot info into DB
db.upsertLot(lot);
totalLots++;
// Download images and perform object detection
List<String> imageUrls = new ArrayList<>();
var imgs = node.path("imgs");
if (imgs.isArray()) {
for (var imgNode : imgs) {
var imgUrl = imgNode.asText();
imageUrls.add(imgUrl);
}
}
// Download and analyze images (optional, can be slow)
for (var imgUrl : imageUrls) {
var fileName = downloadImage(imgUrl, saleId, lot.lotId);
if (fileName != null) {
// run object detection once per image
var labels = detector.detectObjects(fileName);
db.insertImage(lot.lotId, imgUrl, fileName, labels);
}
}
}
IO.println(" ✓ Processed " + totalLots + " lots so far");
offset += batchSize;
} catch (IOException | InterruptedException e) {
System.err.println("Error fetching lots for sale " + saleId + ": " + e.getMessage());
more = false;
} catch (SQLException e) {
System.err.println("Database error: " + e.getMessage());
}
}
}
/**
* Downloads an image from the given URL to a local directory. Images
* are stored under "images/<saleId>/<lotId>/" to keep them organised.
*
* @param imageUrl remote image URL
* @param saleId sale identifier
* @param lotId lot identifier
* @return absolute path to saved file or null on failure
*/
private String downloadImage(String imageUrl, int saleId, int lotId) {
try {
var request = HttpRequest.newBuilder()
.uri(URI.create(imageUrl))
.GET()
.build();
var response = httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream());
if (response.statusCode() == 200) {
var dir = Paths.get("images", String.valueOf(saleId), String.valueOf(lotId));
Files.createDirectories(dir);
var fileName = Paths.get(imageUrl).getFileName().toString();
var dest = dir.resolve(fileName);
Files.copy(response.body(), dest);
return dest.toAbsolutePath().toString();
}
} catch (IOException | InterruptedException e) {
System.err.println("Failed to download image " + imageUrl + ": " + e.getMessage());
}
return null;
}
/**
* Schedules periodic monitoring of all lots. The scheduler runs every
* hour to refresh current bids and closing times. For lots that
* are within 30 minutes of closing, it increases the polling frequency
* automatically. When a new bid is detected or a lot is about to
* expire, a Pushover notification is sent to the configured user.
* Note: In production, ensure proper shutdown handling for the scheduler.
*/
public void scheduleMonitoring() {
var scheduler = Executors.newScheduledThreadPool(1);
scheduler.scheduleAtFixedRate(() -> {
try {
var activeLots = db.getActiveLots();
for (var lot : activeLots) {
// refresh the lot's bidding information via API
refreshLotBid(lot);
// check closing time to adjust monitoring
var minutesLeft = lot.minutesUntilClose();
if (minutesLeft < 30) {
// send warning when within 5 minutes
if (minutesLeft <= 5 && !lot.closingNotified) {
notifier.sendNotification("Kavel " + lot.lotId + " sluit binnen " + minutesLeft + " min.",
"Lot nearing closure", 1);
lot.closingNotified = true;
db.updateLotNotificationFlags(lot);
}
// schedule additional quick check for this lot
scheduler.schedule(() -> refreshLotBid(lot), 5, TimeUnit.MINUTES);
}
}
} catch (SQLException e) {
System.err.println("Error during scheduled monitoring: " + e.getMessage());
}
}, 0, 1, TimeUnit.HOURS);
}
/**
* Refreshes the bid for a single lot and sends notification if it has
* changed since the last check. The method calls the same API used for
* initial scraping but only extracts the current bid for the given lot.
*
* @param lot the lot to refresh
*/
private void refreshLotBid(Lot lot) {
try {
var url = LOT_API + "?batchSize=1&listType=7&offset=0&sortOption=0&saleID=" + lot.saleId
+ "&parentID=0&relationID=0&buildversion=201807311&lotID=" + lot.lotId;
var request = HttpRequest.newBuilder().uri(URI.create(url)).GET().build();
var response = httpClient.send(request, HttpResponse.BodyHandlers.ofString());
if (response.statusCode() != 200) return;
var root = objectMapper.readTree(response.body());
var results = root.path("results");
if (results.isArray() && !results.isEmpty()) {
var node = results.get(0);
var newBid = node.path("cb").asDouble();
if (Double.compare(newBid, lot.currentBid) > 0) {
var previous = lot.currentBid;
lot.currentBid = newBid;
db.updateLotCurrentBid(lot);
var msg = String.format("Nieuw bod op kavel %d: €%.2f (was €%.2f)", lot.lotId, newBid, previous);
notifier.sendNotification(msg, "Kavel bieding update", 0);
}
}
} catch (IOException | InterruptedException | SQLException e) {
System.err.println("Failed to refresh bid for lot " + lot.lotId + ": " + e.getMessage());
}
}
/**
* Prints statistics about the data in the database.
*/
public void printDatabaseStats() {
try {
var allLots = db.getAllLots();
var imageCount = db.getImageCount();
IO.println(" Total lots in database: " + allLots.size());
IO.println(" Total images downloaded: " + imageCount);
if (!allLots.isEmpty()) {
var totalBids = allLots.stream().mapToDouble(l -> l.currentBid).sum();
IO.println(" Total current bids: €" + String.format("%.2f", totalBids));
}
} catch (SQLException e) {
System.err.println(" ⚠️ Could not retrieve database stats: " + e.getMessage());
}
}
// ----------------------------------------------------------------------
// Domain classes and services
// ----------------------------------------------------------------------
}

View File

@@ -30,171 +30,6 @@ public class AuctionParsingTest {
System.out.println("Loaded test HTML (" + testHtml.length() + " characters)"); System.out.println("Loaded test HTML (" + testHtml.length() + " characters)");
} }
@Test
public void testParseAuctionsFromTestHtml() {
// Parse the HTML with JSoup
Document doc = Jsoup.parse(testHtml);
// Find all auction links
Elements auctionLinks = doc.select("a[href^='/a/']");
System.out.println("\n=== Auction Parsing Test ===");
System.out.println("Found " + auctionLinks.size() + " auction links");
List<AuctionInfo> auctions = new ArrayList<>();
int count = 0;
for (Element link : auctionLinks) {
String href = link.attr("href");
// Extract auction ID from URL
java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("/a/.*?-A([17])-(\\d+)");
java.util.regex.Matcher matcher = pattern.matcher(href);
if (!matcher.find()) {
continue;
}
String typeNum = matcher.group(1);
int auctionId = Integer.parseInt(matcher.group(2));
// Extract auction info using IMPROVED text-based method
AuctionInfo auction = extractAuctionInfoFromText(link, href, auctionId, "A" + typeNum);
auctions.add(auction);
// Print the first 10 auctions for verification
if (count < 10) {
System.out.println("\n--- Auction #" + (count + 1) + " ---");
System.out.println("ID: " + auction.auctionId);
System.out.println("Type: " + auction.type);
System.out.println("Title: " + auction.title);
System.out.println("Location: " + auction.location);
System.out.println("City: " + auction.city);
System.out.println("Country: " + auction.country);
System.out.println("Lot Count: " + auction.lotCount);
System.out.println("URL: " + auction.url);
// Print ALL visible text for debugging
System.out.println("\nAll visible text from link:");
System.out.println("\"" + link.text() + "\"");
}
count++;
}
System.out.println("\n=== Summary ===");
System.out.println("Total auctions parsed: " + auctions.size());
// Count by country
long nlCount = auctions.stream().filter(a -> "NL".equals(a.country)).count();
long bgCount = auctions.stream().filter(a -> "BG".equals(a.country)).count();
long deCount = auctions.stream().filter(a -> "DE".equals(a.country)).count();
long beCount = auctions.stream().filter(a -> "BE".equals(a.country)).count();
System.out.println("Dutch (NL) auctions: " + nlCount);
System.out.println("Bulgarian (BG) auctions: " + bgCount);
System.out.println("German (DE) auctions: " + deCount);
System.out.println("Belgian (BE) auctions: " + beCount);
System.out.println("Unknown location: " + auctions.stream().filter(a -> a.country == null).count());
// Assertions
assertTrue(auctions.size() > 0, "Should find at least one auction");
// Verify all auctions have basic info
for (AuctionInfo auction : auctions) {
assertNotNull(auction.title, "Title should not be null for auction " + auction.auctionId);
assertTrue(auction.title.length() > 0, "Title should not be empty for auction " + auction.auctionId);
assertNotNull(auction.url, "URL should not be null for auction " + auction.auctionId);
assertTrue(auction.auctionId > 0, "Auction ID should be positive");
assertNotNull(auction.location, "Location should not be null for auction " + auction.auctionId);
assertNotNull(auction.country, "Country should not be null for auction " + auction.auctionId);
assertTrue(auction.lotCount > 0, "Lot count should be positive for auction " + auction.auctionId);
}
}
/**
* IMPROVED: Extract auction info using .text() method
* This parses the human-readable text instead of HTML markup
*
* Expected format: "[day] om [time] [lot_count] [title] [city], [CC]"
* Example: "woensdag om 18:00 1 Vrachtwagens voor bedrijfsvoertuigen Loßburg, DE"
*/
private AuctionInfo extractAuctionInfoFromText(Element link, String href, int auctionId, String type) {
AuctionInfo auction = new AuctionInfo();
auction.auctionId = auctionId;
auction.type = type;
auction.url = "https://www.troostwijkauctions.com" + href;
// Get ALL visible text from the link (this removes all HTML tags)
String allText = link.text().trim();
// Pattern: "[day] om [time] [lot_count] [title] [city], [CC]"
// Example: "woensdag om 18:00 1 Vrachtwagens voor bedrijfsvoertuigen Loßburg, DE"
// Step 1: Extract closing time (day + time)
java.util.regex.Pattern timePattern = java.util.regex.Pattern.compile(
"(\\w+)\\s+om\\s+(\\d{1,2}:\\d{2})"
);
java.util.regex.Matcher timeMatcher = timePattern.matcher(allText);
String remainingText = allText;
if (timeMatcher.find()) {
String day = timeMatcher.group(1); // e.g., "woensdag"
String time = timeMatcher.group(2); // e.g., "18:00"
// Store closing time info (could be parsed to LocalDateTime with proper date)
System.out.println(" Closing time: " + day + " om " + time);
// Remove the time part from text
remainingText = allText.substring(timeMatcher.end()).trim();
}
// Step 2: Extract location from the END (always ends with ", CC")
java.util.regex.Pattern locPattern = java.util.regex.Pattern.compile(
"([A-ZÀ-ÿa-z][A-ZÀ-ÿa-z\\s\\-'öäüßàèéêëïôùûç]+?),\\s*([A-Z]{2})\\s*$"
);
java.util.regex.Matcher locMatcher = locPattern.matcher(remainingText);
if (locMatcher.find()) {
auction.city = locMatcher.group(1).trim();
auction.country = locMatcher.group(2);
auction.location = auction.city + ", " + auction.country;
// Remove location from end
remainingText = remainingText.substring(0, locMatcher.start()).trim();
}
// Step 3: Extract lot count (first number after time)
java.util.regex.Pattern lotPattern = java.util.regex.Pattern.compile(
"^(\\d+)\\s+"
);
java.util.regex.Matcher lotMatcher = lotPattern.matcher(remainingText);
if (lotMatcher.find()) {
auction.lotCount = Integer.parseInt(lotMatcher.group(1));
// Remove lot count from beginning
remainingText = remainingText.substring(lotMatcher.end()).trim();
}
// Step 4: What remains is the title
if (!remainingText.isEmpty()) {
auction.title = remainingText;
} else {
// Fallback: use URL slug for title
java.util.regex.Pattern titlePattern = java.util.regex.Pattern.compile("/a/(.+?)-A[17]-");
java.util.regex.Matcher titleMatcher = titlePattern.matcher(href);
if (titleMatcher.find()) {
String slug = titleMatcher.group(1).replace("-", " ").replace("%7C", "|");
auction.title = slug.substring(0, 1).toUpperCase() + slug.substring(1);
} else {
auction.title = "Unknown Auction";
}
}
return auction;
}
@Test @Test
public void testLocationPatternMatching() { public void testLocationPatternMatching() {
System.out.println("\n=== Location Pattern Tests ==="); System.out.println("\n=== Location Pattern Tests ===");

View File

@@ -1,12 +1,8 @@
package com.auction; package com.auction;
import com.vladsch.flexmark.html2md.converter.FlexmarkHtmlConverter; import com.vladsch.flexmark.html2md.converter.FlexmarkHtmlConverter;
import com.vladsch.flexmark.util.data.DataHolder;
import net.bytebuddy.build.Plugin.Engine.Source.Element;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.Extensions;
public class Parser { public class Parser {
public record AuctionItem( public record AuctionItem(

Binary file not shown.