This commit is contained in:
Tour
2025-12-03 15:09:39 +01:00
parent 7fa3e4a545
commit 853c3cf53e
16 changed files with 1405 additions and 2000 deletions

View File

@@ -0,0 +1,24 @@
package com.auction;
import java.time.LocalDateTime;
/**
* Represents auction metadata (veiling informatie)
*/
public final class AuctionInfo {
public int auctionId; // Unique auction ID (from URL)
public String title; // Auction title
public String location; // Location (e.g., "Amsterdam, NL")
public String city; // City name
public String country; // Country code (e.g., "NL")
public String url; // Full auction URL
public String type; // Auction type (A1 or A7)
public int lotCount; // Number of lots/kavels
public LocalDateTime closingTime; // Closing time if available
@Override
public String toString() {
return String.format("Auction{id=%d, type=%s, title='%s', location='%s', lots=%d, url='%s'}",
auctionId, type, title, location, lotCount, url);
}
}

View File

@@ -0,0 +1,165 @@
package com.auction;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.time.Instant;
/**
* SQLite-based caching system for HTML pages with expiration support
*/
class CacheDatabase {
private final String dbPath;
private Connection connection;
public CacheDatabase(String dbPath) {
this.dbPath = dbPath;
}
/**
* Initialize database and create schema
*/
public void initialize() throws SQLException, IOException {
// Create cache directory if it doesn't exist
var cacheDir = Paths.get(dbPath).getParent();
if (cacheDir != null) {
Files.createDirectories(cacheDir);
}
connection = DriverManager.getConnection("jdbc:sqlite:" + dbPath);
// Create cache table with URL as primary key
var createTable = "CREATE TABLE IF NOT EXISTS page_cache (\n" +
" url TEXT PRIMARY KEY,\n" +
" html TEXT NOT NULL,\n" +
" cached_at INTEGER NOT NULL,\n" +
" expires_at INTEGER NOT NULL\n" +
")\n";
try (var stmt = connection.createStatement()) {
stmt.execute(createTable);
// Create index on expires_at for efficient cleanup
stmt.execute("CREATE INDEX IF NOT EXISTS idx_expires_at ON page_cache(expires_at)");
}
// Clean up expired entries on initialization
cleanupExpired();
System.out.println("✓ Cache database initialized");
}
/**
* Get cached HTML for a URL if it exists and hasn't expired
*
* @param url The URL to look up
* @return Cached HTML or null if not found/expired
*/
public synchronized String get(String url) {
var sql = "SELECT html FROM page_cache WHERE url = ? AND expires_at > ?";
try (var ps = connection.prepareStatement(sql)) {
ps.setString(1, url);
ps.setLong(2, Instant.now().getEpochSecond());
var rs = ps.executeQuery();
if (rs.next()) {
return rs.getString("html");
}
} catch (SQLException e) {
System.err.println("Cache read error: " + e.getMessage());
}
return null;
}
/**
* Store HTML in cache with expiration time
*
* @param url The URL to cache
* @param html The HTML content
* @param expirationHours Hours until cache expires
*/
public synchronized void put(String url, String html, long expirationHours) {
var sql = "INSERT OR REPLACE INTO page_cache (url, html, cached_at, expires_at)\n" +
"VALUES (?, ?, ?, ?)\n";
var now = Instant.now().getEpochSecond();
var expiresAt = now + (expirationHours * 3600);
try (var ps = connection.prepareStatement(sql)) {
ps.setString(1, url);
ps.setString(2, html);
ps.setLong(3, now);
ps.setLong(4, expiresAt);
ps.executeUpdate();
} catch (SQLException e) {
System.err.println("Cache write error: " + e.getMessage());
}
}
/**
* Remove expired cache entries
*/
public synchronized void cleanupExpired() {
var sql = "DELETE FROM page_cache WHERE expires_at <= ?";
try (var ps = connection.prepareStatement(sql)) {
ps.setLong(1, Instant.now().getEpochSecond());
var deleted = ps.executeUpdate();
if (deleted > 0) {
System.out.println("✓ Cleaned up " + deleted + " expired cache entries");
}
} catch (SQLException e) {
System.err.println("Cache cleanup error: " + e.getMessage());
}
}
/**
* Get cache statistics
*/
public synchronized void printStats() {
var sql = "SELECT COUNT(*) as total, " +
"SUM(CASE WHEN expires_at > ? THEN 1 ELSE 0 END) as valid, " +
"SUM(LENGTH(html)) as total_size " +
"FROM page_cache";
try (var ps = connection.prepareStatement(sql)) {
ps.setLong(1, Instant.now().getEpochSecond());
var rs = ps.executeQuery();
if (rs.next()) {
var total = rs.getInt("total");
var valid = rs.getInt("valid");
var size = rs.getLong("total_size");
System.out.println("\n=== Cache Statistics ===");
System.out.println("Total entries: " + total);
System.out.println("Valid entries: " + valid);
System.out.println("Expired entries: " + (total - valid));
System.out.println("Total size: " + (size / 1024) + " KB");
}
} catch (SQLException e) {
System.err.println("Cache stats error: " + e.getMessage());
}
}
/**
* Close database connection
*/
public void close() {
if (connection != null) {
try {
connection.close();
} catch (SQLException e) {
System.err.println("Error closing cache database: " + e.getMessage());
}
}
}
}

View File

@@ -0,0 +1,303 @@
package com.auction;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.time.Instant;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.List;
/**
* Service for persisting auctions, lots, images, and object labels into
* a SQLite database. Uses the Xerial JDBC driver which connects to
* SQLite via a URL of the form "jdbc:sqlite:path_to_file"【329850066306528†L40-L63】.
*/
public class DatabaseService {
private final String url;
DatabaseService(String dbPath) {
this.url = "jdbc:sqlite:" + dbPath;
}
/**
* Creates tables if they do not already exist. The schema includes
* tables for auctions, lots, images, and object labels. This method is
* idempotent; it can be called multiple times.
*/
void ensureSchema() throws SQLException {
try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) {
// Auctions table (veilingen)
stmt.execute("CREATE TABLE IF NOT EXISTS auctions ("
+ "auction_id INTEGER PRIMARY KEY,"
+ "title TEXT NOT NULL,"
+ "location TEXT,"
+ "city TEXT,"
+ "country TEXT,"
+ "url TEXT NOT NULL,"
+ "type TEXT,"
+ "lot_count INTEGER DEFAULT 0,"
+ "closing_time TEXT,"
+ "discovered_at INTEGER" // Unix timestamp
+ ")");
// Sales table (legacy - keep for compatibility)
stmt.execute("CREATE TABLE IF NOT EXISTS sales ("
+ "sale_id INTEGER PRIMARY KEY,"
+ "title TEXT,"
+ "location TEXT,"
+ "closing_time TEXT"
+ ")");
// Lots table
stmt.execute("CREATE TABLE IF NOT EXISTS lots ("
+ "lot_id INTEGER PRIMARY KEY,"
+ "sale_id INTEGER,"
+ "title TEXT,"
+ "description TEXT,"
+ "manufacturer TEXT,"
+ "type TEXT,"
+ "year INTEGER,"
+ "category TEXT,"
+ "current_bid REAL,"
+ "currency TEXT,"
+ "url TEXT,"
+ "closing_time TEXT,"
+ "closing_notified INTEGER DEFAULT 0,"
+ "FOREIGN KEY (sale_id) REFERENCES auctions(auction_id)"
+ ")");
// Images table
stmt.execute("CREATE TABLE IF NOT EXISTS images ("
+ "id INTEGER PRIMARY KEY AUTOINCREMENT,"
+ "lot_id INTEGER,"
+ "url TEXT,"
+ "file_path TEXT,"
+ "labels TEXT,"
+ "FOREIGN KEY (lot_id) REFERENCES lots(lot_id)"
+ ")");
// Create indexes for better query performance
stmt.execute("CREATE INDEX IF NOT EXISTS idx_auctions_country ON auctions(country)");
stmt.execute("CREATE INDEX IF NOT EXISTS idx_lots_sale_id ON lots(sale_id)");
}
}
/**
* Inserts or updates an auction record
*/
synchronized void upsertAuction(AuctionInfo auction) throws SQLException {
var sql = "INSERT INTO auctions (auction_id, title, location, city, country, url, type, lot_count, closing_time, discovered_at)"
+ " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
+ " ON CONFLICT(auction_id) DO UPDATE SET "
+ "title = excluded.title, location = excluded.location, city = excluded.city, "
+ "country = excluded.country, url = excluded.url, type = excluded.type, "
+ "lot_count = excluded.lot_count, closing_time = excluded.closing_time";
try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(sql)) {
ps.setInt(1, auction.auctionId);
ps.setString(2, auction.title);
ps.setString(3, auction.location);
ps.setString(4, auction.city);
ps.setString(5, auction.country);
ps.setString(6, auction.url);
ps.setString(7, auction.type);
ps.setInt(8, auction.lotCount);
ps.setString(9, auction.closingTime != null ? auction.closingTime.toString() : null);
ps.setLong(10, Instant.now().getEpochSecond());
ps.executeUpdate();
}
}
/**
* Retrieves all auctions from the database
*/
synchronized List<AuctionInfo> getAllAuctions() throws SQLException {
List<AuctionInfo> auctions = new ArrayList<>();
var sql = "SELECT auction_id, title, location, city, country, url, type, lot_count, closing_time FROM auctions";
try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) {
var rs = stmt.executeQuery(sql);
while (rs.next()) {
var auction = new AuctionInfo();
auction.auctionId = rs.getInt("auction_id");
auction.title = rs.getString("title");
auction.location = rs.getString("location");
auction.city = rs.getString("city");
auction.country = rs.getString("country");
auction.url = rs.getString("url");
auction.type = rs.getString("type");
auction.lotCount = rs.getInt("lot_count");
var closing = rs.getString("closing_time");
if (closing != null) {
auction.closingTime = LocalDateTime.parse(closing);
}
auctions.add(auction);
}
}
return auctions;
}
/**
* Retrieves auctions by country code
*/
synchronized List<AuctionInfo> getAuctionsByCountry(String countryCode) throws SQLException {
List<AuctionInfo> auctions = new ArrayList<>();
var sql = "SELECT auction_id, title, location, city, country, url, type, lot_count, closing_time "
+ "FROM auctions WHERE country = ?";
try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(sql)) {
ps.setString(1, countryCode);
var rs = ps.executeQuery();
while (rs.next()) {
var auction = new AuctionInfo();
auction.auctionId = rs.getInt("auction_id");
auction.title = rs.getString("title");
auction.location = rs.getString("location");
auction.city = rs.getString("city");
auction.country = rs.getString("country");
auction.url = rs.getString("url");
auction.type = rs.getString("type");
auction.lotCount = rs.getInt("lot_count");
var closing = rs.getString("closing_time");
if (closing != null) {
auction.closingTime = LocalDateTime.parse(closing);
}
auctions.add(auction);
}
}
return auctions;
}
/**
* Inserts or updates a lot record. Uses INSERT OR REPLACE to
* implement upsert semantics so that existing rows are replaced.
*/
synchronized void upsertLot(Lot lot) throws SQLException {
var sql = "INSERT INTO lots (lot_id, sale_id, title, description, manufacturer, type, year, category, current_bid, currency, url, closing_time, closing_notified)"
+ " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
+ " ON CONFLICT(lot_id) DO UPDATE SET "
+ "sale_id = excluded.sale_id, title = excluded.title, description = excluded.description, "
+ "manufacturer = excluded.manufacturer, type = excluded.type, year = excluded.year, category = excluded.category, "
+ "current_bid = excluded.current_bid, currency = excluded.currency, url = excluded.url, closing_time = excluded.closing_time";
try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(sql)) {
ps.setInt(1, lot.lotId);
ps.setInt(2, lot.saleId);
ps.setString(3, lot.title);
ps.setString(4, lot.description);
ps.setString(5, lot.manufacturer);
ps.setString(6, lot.type);
ps.setInt(7, lot.year);
ps.setString(8, lot.category);
ps.setDouble(9, lot.currentBid);
ps.setString(10, lot.currency);
ps.setString(11, lot.url);
ps.setString(12, lot.closingTime != null ? lot.closingTime.toString() : null);
ps.setInt(13, lot.closingNotified ? 1 : 0);
ps.executeUpdate();
}
}
/**
* Inserts a new image record. Each image is associated with a lot and
* stores both the original URL and the local file path. Detected
* labels are stored as a comma separated string.
*/
synchronized void insertImage(int lotId, String url, String filePath, List<String> labels) throws SQLException {
var sql = "INSERT INTO images (lot_id, url, file_path, labels) VALUES (?, ?, ?, ?)";
try (var conn = DriverManager.getConnection(this.url); var ps = conn.prepareStatement(sql)) {
ps.setInt(1, lotId);
ps.setString(2, url);
ps.setString(3, filePath);
ps.setString(4, String.join(",", labels));
ps.executeUpdate();
}
}
/**
* Retrieves all lots that are still active (i.e., have a closing time
* in the future or unknown). Only these lots need to be monitored.
*/
synchronized List<Lot> getActiveLots() throws SQLException {
List<Lot> list = new ArrayList<>();
var sql = "SELECT lot_id, sale_id, current_bid, currency, closing_time, closing_notified FROM lots";
try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) {
var rs = stmt.executeQuery(sql);
while (rs.next()) {
var lot = new Lot();
lot.lotId = rs.getInt("lot_id");
lot.saleId = rs.getInt("sale_id");
lot.currentBid = rs.getDouble("current_bid");
lot.currency = rs.getString("currency");
var closing = rs.getString("closing_time");
lot.closingNotified = rs.getInt("closing_notified") != 0;
if (closing != null) {
lot.closingTime = LocalDateTime.parse(closing);
}
list.add(lot);
}
}
return list;
}
/**
* Retrieves all lots from the database.
*/
synchronized List<Lot> getAllLots() throws SQLException {
List<Lot> list = new ArrayList<>();
var sql = "SELECT lot_id, sale_id, title, current_bid, currency FROM lots";
try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) {
var rs = stmt.executeQuery(sql);
while (rs.next()) {
var lot = new Lot();
lot.lotId = rs.getInt("lot_id");
lot.saleId = rs.getInt("sale_id");
lot.title = rs.getString("title");
lot.currentBid = rs.getDouble("current_bid");
lot.currency = rs.getString("currency");
list.add(lot);
}
}
return list;
}
/**
* Gets the total number of images in the database.
*/
synchronized int getImageCount() throws SQLException {
var sql = "SELECT COUNT(*) as count FROM images";
try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) {
var rs = stmt.executeQuery(sql);
if (rs.next()) {
return rs.getInt("count");
}
}
return 0;
}
/**
* Updates the current bid of a lot after a bid refresh.
*/
synchronized void updateLotCurrentBid(Lot lot) throws SQLException {
try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(
"UPDATE lots SET current_bid = ? WHERE lot_id = ?")) {
ps.setDouble(1, lot.currentBid);
ps.setInt(2, lot.lotId);
ps.executeUpdate();
}
}
/**
* Updates the closingNotified flag of a lot (set to 1 when we have
* warned the user about its imminent closure).
*/
synchronized void updateLotNotificationFlags(Lot lot) throws SQLException {
try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(
"UPDATE lots SET closing_notified = ? WHERE lot_id = ?")) {
ps.setInt(1, lot.closingNotified ? 1 : 0);
ps.setInt(2, lot.lotId);
ps.executeUpdate();
}
}
}

View File

@@ -0,0 +1,29 @@
package com.auction;
import java.time.LocalDateTime;
/**
* Simple POJO representing a lot (kavel) in an auction. It keeps track
* of the sale it belongs to, current bid and closing time. The method
* minutesUntilClose computes how many minutes remain until the lot closes.
*/
final class Lot {
int saleId;
int lotId;
String title;
String description;
String manufacturer;
String type;
int year;
String category;
double currentBid;
String currency;
String url;
LocalDateTime closingTime; // null if unknown
boolean closingNotified;
long minutesUntilClose() {
if (closingTime == null) return Long.MAX_VALUE;
return java.time.Duration.between(LocalDateTime.now(), closingTime).toMinutes();
}
}

View File

@@ -1,23 +1,82 @@
package com.auction;
import org.opencv.core.Core;
import java.util.List;
public class Main {
public static void main(String[] args) {
public static void main2(String[] args) {
// If arguments are passed, this is likely a one-off command via dokku run
// Just exit immediately to allow the command to run
if (args.length > 0) {
System.out.println("Command mode - exiting to allow shell commands");
IO.println("Command mode - exiting to allow shell commands");
return;
}
System.out.println("Starting Troostwijk Auction Scraper...");
System.out.println("Container is running and healthy.");
IO.println("Starting Troostwijk Auction Scraper...");
IO.println("Container is running and healthy.");
// Keep container alive
try {
Thread.sleep(Long.MAX_VALUE);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
System.out.println("Container interrupted, exiting.");
IO.println("Container interrupted, exiting.");
}
}
/**
* Entry point. Configure database location, notification settings, and
* YOLO model paths here before running. Once started the scraper
* discovers Dutch auctions, scrapes lots, and begins monitoring.
*/
public static void main(String[] args) throws Exception {
IO.println("=== Troostwijk Auction Scraper ===\n");
// Configuration parameters (replace with your own values)
String databaseFile = "troostwijk.db";
// Notification configuration - choose one:
// Option 1: Desktop notifications only (free, no setup required)
String notificationConfig = System.getenv().getOrDefault("NOTIFICATION_CONFIG", "desktop");
// Option 2: Desktop + Email via Gmail (free, requires Gmail app password)
// Format: "smtp:username:appPassword:toEmail"
// Example: "smtp:your.email@gmail.com:abcd1234efgh5678:recipient@example.com"
// Get app password: Google Account > Security > 2-Step Verification > App passwords
// YOLO model paths (optional - scraper works without object detection)
String yoloCfg = "models/yolov4.cfg";
String yoloWeights = "models/yolov4.weights";
String yoloClasses = "models/coco.names";
// Load native OpenCV library
System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
IO.println("Initializing scraper...");
TroostwijkScraper scraper = new TroostwijkScraper(databaseFile, notificationConfig, "",
yoloCfg, yoloWeights, yoloClasses);
// Step 1: Discover auctions in NL
IO.println("\n[1/3] Discovering Dutch auctions...");
List<Integer> auctions = scraper.discoverDutchAuctions();
IO.println("✓ Found " + auctions.size() + " auctions: " + auctions);
// Step 2: Fetch lots for each auction
IO.println("\n[2/3] Fetching lot details...");
int totalAuctions = auctions.size();
int currentAuction = 0;
for (int saleId : auctions) {
currentAuction++;
IO.println(" [Page " + currentAuction + "] Fetching auctions...");
IO.println(" [" + currentAuction + "/" + totalAuctions + "] Processing sale " + saleId + "...");
scraper.fetchLotsForSale(saleId);
}
// Show database summary
IO.println("\n📊 Database Summary:");
scraper.printDatabaseStats();
// Step 3: Start monitoring bids and closures
IO.println("\n[3/3] Starting monitoring service...");
scraper.scheduleMonitoring();
IO.println("✓ Monitoring active. Press Ctrl+C to stop.\n");
}
}

View File

@@ -0,0 +1,156 @@
package com.auction;
import javax.mail.Authenticator;
import javax.mail.Message.RecipientType;
import javax.mail.PasswordAuthentication;
import javax.mail.Session;
import javax.mail.Transport;
import javax.mail.internet.InternetAddress;
import javax.mail.internet.MimeMessage;
import java.awt.SystemTray;
import java.awt.Toolkit;
import java.awt.TrayIcon;
import java.awt.TrayIcon.MessageType;
import java.util.Date;
import java.util.Properties;
/**
* Service for sending notifications via desktop notifications and/or email.
* Supports free notification methods:
* 1. Desktop notifications (Windows/Linux/macOS system tray)
* 2. Email via Gmail SMTP (free, requires app password)
*
* Configuration:
* - For email: Set notificationEmail to your Gmail address
* - Enable 2FA in Gmail and create an App Password
* - Use format "smtp:username:appPassword:toEmail" for credentials
* - Or use "desktop" for desktop-only notifications
*/
class NotificationService {
private final boolean useDesktop;
private final boolean useEmail;
private final String smtpUsername;
private final String smtpPassword;
private final String toEmail;
/**
* Creates a notification service.
*
* @param config "desktop" for desktop only, or "smtp:username:password:toEmail" for email
* @param unusedParam Kept for compatibility (can pass empty string)
*/
NotificationService(String config, String unusedParam) {
if ("desktop".equalsIgnoreCase(config)) {
this.useDesktop = true;
this.useEmail = false;
this.smtpUsername = null;
this.smtpPassword = null;
this.toEmail = null;
} else if (config.startsWith("smtp:")) {
var parts = config.split(":", 4);
if (parts.length != 4) {
throw new IllegalArgumentException("Email config must be 'smtp:username:password:toEmail'");
}
this.useDesktop = true; // Always include desktop
this.useEmail = true;
this.smtpUsername = parts[1];
this.smtpPassword = parts[2];
this.toEmail = parts[3];
} else {
throw new IllegalArgumentException("Config must be 'desktop' or 'smtp:username:password:toEmail'");
}
}
/**
* Sends notification via configured channels.
*
* @param message The message body
* @param title Message title
* @param priority Priority level (0=normal, 1=high)
*/
void sendNotification(String message, String title, int priority) {
if (useDesktop) {
sendDesktopNotification(title, message, priority);
}
if (useEmail) {
sendEmailNotification(title, message, priority);
}
}
/**
* Sends a desktop notification using system tray.
* Works on Windows, macOS, and Linux with desktop environments.
*/
private void sendDesktopNotification(String title, String message, int priority) {
try {
if (SystemTray.isSupported()) {
var tray = SystemTray.getSystemTray();
var image = Toolkit.getDefaultToolkit()
.createImage(new byte[0]); // Empty image
var trayIcon = new TrayIcon(image, "Troostwijk Scraper");
trayIcon.setImageAutoSize(true);
var messageType = priority > 0
? MessageType.WARNING
: MessageType.INFO;
tray.add(trayIcon);
trayIcon.displayMessage(title, message, messageType);
// Remove icon after 2 seconds to avoid clutter
Thread.sleep(2000);
tray.remove(trayIcon);
IO.println("Desktop notification sent: " + title);
} else {
IO.println("Desktop notifications not supported, logging: " + title + " - " + message);
}
} catch (Exception e) {
System.err.println("Desktop notification failed: " + e.getMessage());
}
}
/**
* Sends email notification via Gmail SMTP (free).
* Uses Gmail's SMTP server with app password authentication.
*/
private void sendEmailNotification(String title, String message, int priority) {
try {
var props = new Properties();
props.put("mail.smtp.auth", "true");
props.put("mail.smtp.starttls.enable", "true");
props.put("mail.smtp.host", "smtp.gmail.com");
props.put("mail.smtp.port", "587");
props.put("mail.smtp.ssl.trust", "smtp.gmail.com");
var session = Session.getInstance(props,
new Authenticator() {
protected PasswordAuthentication getPasswordAuthentication() {
return new PasswordAuthentication(smtpUsername, smtpPassword);
}
});
var msg = new MimeMessage(session);
msg.setFrom(new InternetAddress(smtpUsername));
msg.setRecipients(RecipientType.TO,
InternetAddress.parse(toEmail));
msg.setSubject("[Troostwijk] " + title);
msg.setText(message);
msg.setSentDate(new Date());
if (priority > 0) {
msg.setHeader("X-Priority", "1");
msg.setHeader("Importance", "High");
}
Transport.send(msg);
IO.println("Email notification sent: " + title);
} catch (Exception e) {
System.err.println("Email notification failed: " + e.getMessage());
}
}
}

View File

@@ -0,0 +1,140 @@
package com.auction;
import org.opencv.core.Mat;
import org.opencv.core.Scalar;
import org.opencv.core.Size;
import org.opencv.dnn.Dnn;
import org.opencv.dnn.Net;
import org.opencv.imgcodecs.Imgcodecs;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import static org.opencv.dnn.Dnn.DNN_BACKEND_OPENCV;
import static org.opencv.dnn.Dnn.DNN_TARGET_CPU;
/**
* Service for performing object detection on images using OpenCV's DNN
* module. The DNN module can load pretrained models from several
* frameworks (Darknet, TensorFlow, ONNX, etc.)【784097309529506†L209-L233】. Here
* we load a YOLO model (Darknet) by specifying the configuration and
* weights files. For each image we run a forward pass and return a
* list of detected class labels.
*
* If model files are not found, the service operates in disabled mode
* and returns empty lists.
*/
class ObjectDetectionService {
private final Net net;
private final List<String> classNames;
private final boolean enabled;
ObjectDetectionService(String cfgPath, String weightsPath, String classNamesPath) throws IOException {
// Check if model files exist
var cfgFile = Paths.get(cfgPath);
var weightsFile = Paths.get(weightsPath);
var classNamesFile = Paths.get(classNamesPath);
if (!Files.exists(cfgFile) || !Files.exists(weightsFile) || !Files.exists(classNamesFile)) {
IO.println("⚠️ Object detection disabled: YOLO model files not found");
IO.println(" Expected files:");
IO.println(" - " + cfgPath);
IO.println(" - " + weightsPath);
IO.println(" - " + classNamesPath);
IO.println(" Scraper will continue without image analysis.");
this.enabled = false;
this.net = null;
this.classNames = new ArrayList<>();
return;
}
try {
// Load network
this.net = Dnn.readNetFromDarknet(cfgPath, weightsPath);
this.net.setPreferableBackend(DNN_BACKEND_OPENCV);
this.net.setPreferableTarget(DNN_TARGET_CPU);
// Load class names (one per line)
this.classNames = Files.readAllLines(classNamesFile);
this.enabled = true;
IO.println("✓ Object detection enabled with YOLO");
} catch (Exception e) {
System.err.println("⚠️ Object detection disabled: " + e.getMessage());
throw new IOException("Failed to initialize object detection", e);
}
}
/**
* Detects objects in the given image file and returns a list of
* humanreadable labels. Only detections above a confidence
* threshold are returned. For brevity this method omits drawing
* bounding boxes. See the OpenCV DNN documentation for details on
* postprocessing【784097309529506†L324-L344】.
*
* @param imagePath absolute path to the image
* @return list of detected class names (empty if detection disabled)
*/
List<String> detectObjects(String imagePath) {
if (!enabled) {
return new ArrayList<>();
}
List<String> labels = new ArrayList<>();
var image = Imgcodecs.imread(imagePath);
if (image.empty()) return labels;
// Create a 4D blob from the image
var blob = Dnn.blobFromImage(image, 1.0 / 255.0, new Size(416, 416), new Scalar(0, 0, 0), true, false);
net.setInput(blob);
List<Mat> outs = new ArrayList<>();
var outNames = getOutputLayerNames(net);
net.forward(outs, outNames);
// Postprocess: for each detection compute score and choose class
var confThreshold = 0.5f;
for (var out : outs) {
for (var i = 0; i < out.rows(); i++) {
var data = out.get(i, 0);
if (data == null) continue;
// The first 5 numbers are bounding box, then class scores
var scores = new double[classNames.size()];
System.arraycopy(data, 5, scores, 0, scores.length);
var classId = argMax(scores);
var confidence = scores[classId];
if (confidence > confThreshold) {
var label = classNames.get(classId);
if (!labels.contains(label)) {
labels.add(label);
}
}
}
}
return labels;
}
/**
* Returns the indexes of the output layers in the network. YOLO
* automatically discovers its output layers; other models may require
* manually specifying them【784097309529506†L356-L365】.
*/
private List<String> getOutputLayerNames(Net net) {
List<String> names = new ArrayList<>();
var outLayers = net.getUnconnectedOutLayers().toList();
var layersNames = net.getLayerNames();
for (var i : outLayers) {
names.add(layersNames.get(i - 1));
}
return names;
}
/**
* Returns the index of the maximum value in the array.
*/
private int argMax(double[] array) {
var best = 0;
var max = array[0];
for (var i = 1; i < array.length; i++) {
if (array[i] > max) {
max = array[i];
best = i;
}
}
return best;
}
}

View File

@@ -1,645 +0,0 @@
package com.auction;
import com.microsoft.playwright.*;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.microsoft.playwright.options.WaitUntilState;
import java.io.IOException;
import java.nio.file.*;
import java.sql.*;
import java.time.Instant;
import java.util.*;
/**
* TroostwijkAuctionExtractor
*
* Extracts auction listings from https://www.troostwijkauctions.com/auctions
* using Playwright for Java (headless browser automation).
*
* Features:
* - Uses Playwright for Java to load JavaScript-rendered content
* - Iterates through all pages of auction listings
* - Rate limiting: 200ms between each page request
* - Caches visited pages in SQLite database with expiration times
* - Extracts auction metadata: ID, title, location, URL
*
* Dependencies (Maven):
* <dependency>
* <groupId>com.microsoft.playwright</groupId>
* <artifactId>playwright</artifactId>
* <version>1.40.0</version>
* </dependency>
* <dependency>
* <groupId>com.fasterxml.jackson.core</groupId>
* <artifactId>jackson-databind</artifactId>
* <version>2.17.0</version>
* </dependency>
* <dependency>
* <groupId>org.xerial</groupId>
* <artifactId>sqlite-jdbc</artifactId>
* <version>3.45.1.0</version>
* </dependency>
*
* After adding dependency, run: mvn exec:java -e -D exec.mainClass=com.microsoft.playwright.CLI -D exec.args="install"
* This downloads the browser binaries needed by Playwright.
*/
public class TroostwijkAuctionExtractor {
private static final String AUCTIONS_BASE_URL = "https://www.troostwijkauctions.com/auctions";
private static final int RATE_LIMIT_MS = 200;
private static final String CACHE_DB_PATH = "cache/page_cache.db";
private static final long CACHE_EXPIRATION_HOURS = 24; // Cache expires after 24 hours
private final ObjectMapper objectMapper;
private final boolean useCache;
private final CacheDatabase cacheDb;
private final int maxPageVisits; // Maximum number of pages to fetch (0 = unlimited)
private int pageVisitCount; // Counter for actual page fetches (not from cache)
private Playwright playwright;
private Browser browser;
/**
* Represents an auction listing
*/
public static class Auction {
public int id;
public String title;
public String location;
public String url;
public String type; // e.g. "A1" or "A7"
@Override
public String toString() {
return String.format("Auction{id=%d, type=%s, title='%s', location='%s', url='%s'}",
id, type, title, location, url);
}
}
/**
* Constructor
*
* @param useCache Enable database caching of visited pages
* @param maxPageVisits Maximum number of actual page fetches (0 = unlimited)
*/
public TroostwijkAuctionExtractor(boolean useCache, int maxPageVisits) throws SQLException, IOException {
this.objectMapper = new ObjectMapper();
this.useCache = useCache;
this.maxPageVisits = maxPageVisits;
this.pageVisitCount = 0;
this.cacheDb = useCache ? new CacheDatabase(CACHE_DB_PATH) : null;
if (useCache) {
cacheDb.initialize();
}
}
/**
* Constructor with default unlimited page visits
*
* @param useCache Enable database caching of visited pages
*/
public TroostwijkAuctionExtractor(boolean useCache) throws SQLException, IOException {
this(useCache, 0); // 0 = unlimited
}
/**
* Initializes Playwright and browser instance
* Call this before extracting auctions
*/
public void initialize() {
System.out.println("Initializing Playwright browser...");
this.playwright = Playwright.create();
this.browser = playwright.chromium().launch(new BrowserType.LaunchOptions()
.setHeadless(true)
.setArgs(Arrays.asList("--no-sandbox", "--disable-setuid-sandbox")));
System.out.println("✓ Browser ready");
}
/**
* Closes browser and Playwright instance
* Call this when done extracting
*/
public void close() {
if (browser != null) {
browser.close();
}
if (playwright != null) {
playwright.close();
}
if (cacheDb != null) {
cacheDb.close();
}
System.out.println("✓ Browser and cache closed");
}
/**
* Extracts all auctions from all pages
*
* @return List of all discovered auctions
*/
public List<Auction> extractAllAuctions() throws InterruptedException {
if (browser == null) {
throw new IllegalStateException("Browser not initialized. Call initialize() first.");
}
List<Auction> allAuctions = new ArrayList<>();
int pageNumber = 1;
boolean hasMorePages = true;
System.out.println("Starting auction extraction from " + AUCTIONS_BASE_URL);
while (hasMorePages) {
System.out.println("\n[Page " + pageNumber + "] Fetching auctions...");
// Check cache first
String cachedHtml = loadFromCache(pageNumber);
String html;
if (cachedHtml != null) {
System.out.println(" ✓ Loaded from cache");
html = cachedHtml;
} else {
// Check if we've reached the maximum page visit limit
if (maxPageVisits > 0 && pageVisitCount >= maxPageVisits) {
System.out.println(" ⚠️ Reached maximum page visit limit (" + maxPageVisits + "), stopping");
break;
}
// Fetch with Playwright
html = fetchPageWithPlaywright(pageNumber);
pageVisitCount++; // Increment actual page fetch counter
if (html == null || html.isEmpty()) {
System.out.println(" ⚠️ Failed to fetch page, stopping pagination");
break;
}
System.out.println(" ✓ Fetched from website (visit " + pageVisitCount +
(maxPageVisits > 0 ? "/" + maxPageVisits : "") + ")");
// Save to cache
if (useCache) {
saveToCache(pageNumber, html);
}
// Rate limiting - wait 200ms before next request
Thread.sleep(RATE_LIMIT_MS);
}
// Parse auctions from HTML
List<Auction> pageAuctions = parseAuctionsFromHtml(html);
if (pageAuctions.isEmpty()) {
System.out.println(" ⚠️ No auctions found on page, stopping pagination");
hasMorePages = false;
} else {
System.out.println(" ✓ Found " + pageAuctions.size() + " auctions");
allAuctions.addAll(pageAuctions);
pageNumber++;
}
}
System.out.println("\n✓ Total auctions extracted: " + allAuctions.size());
return allAuctions;
}
/**
* Fetches a single page using Playwright
*
* @param pageNumber Page number (1-indexed)
* @return HTML content of the page
*/
private String fetchPageWithPlaywright(int pageNumber) {
String url = pageNumber == 1
? AUCTIONS_BASE_URL
: AUCTIONS_BASE_URL + "?page=" + pageNumber;
try {
Page page = browser.newPage();
// Set user agent
page.setExtraHTTPHeaders(Map.of(
"User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
));
// Navigate to page
page.navigate(url, new Page.NavigateOptions()
.setTimeout(30000)
.setWaitUntil(WaitUntilState.NETWORKIDLE));
// Wait for auction listings to appear
try {
page.waitForSelector("a[href^='/a/']", new Page.WaitForSelectorOptions()
.setTimeout(10000));
} catch (Exception e) {
// Continue even if selector not found
System.out.println(" ⚠️ Auction selector not found, attempting to parse anyway");
}
// Get HTML content
String html = page.content();
page.close();
return html;
} catch (Exception e) {
System.err.println(" ⚠️ Playwright error: " + e.getMessage());
return null;
}
}
/**
* Parses auction data from HTML content
*
* @param html HTML content
* @return List of parsed auctions
*/
private List<Auction> parseAuctionsFromHtml(String html) {
List<Auction> auctions = new ArrayList<>();
// Simple regex-based parsing for auction links
// Format: <a href="/a/title-A1-12345" or "/a/title-A7-12345"
java.util.regex.Pattern linkPattern = java.util.regex.Pattern.compile(
"href=\"(/a/[^\"]+A[17]-(\\d+)[^\"]*)\"");
java.util.regex.Matcher linkMatcher = linkPattern.matcher(html);
Set<Integer> seenIds = new HashSet<>();
while (linkMatcher.find()) {
String href = linkMatcher.group(1);
int auctionId = Integer.parseInt(linkMatcher.group(2));
// Avoid duplicates
if (seenIds.contains(auctionId)) {
continue;
}
// Extract auction type (A1 or A7)
String type = href.contains("A1-") ? "A1" : "A7";
// Try to find location and title near this link
String location = extractLocationNearLink(html, href);
String title = extractTitleFromHref(href);
Auction auction = new Auction();
auction.id = auctionId;
auction.type = type;
auction.title = title;
auction.location = location;
auction.url = "https://www.troostwijkauctions.com" + href;
auctions.add(auction);
seenIds.add(auctionId);
}
return auctions;
}
/**
* Extracts location text near an auction link
* Looks for ", NL" or other country codes
*/
private String extractLocationNearLink(String html, String href) {
int hrefPos = html.indexOf(href);
if (hrefPos == -1) return "Unknown";
// Look at 1000 characters before AND after the href for location info
int startPos = Math.max(hrefPos - 500, 0);
int endPos = Math.min(hrefPos + 1000, html.length());
String context = html.substring(startPos, endPos);
// Pattern 1: Classic format "City, NL"
java.util.regex.Pattern locPattern = java.util.regex.Pattern.compile(
"([A-Za-z][A-Za-z\\s\\-']+),\\s*([A-Z]{2})(?![A-Za-z])");
java.util.regex.Matcher locMatcher = locPattern.matcher(context);
if (locMatcher.find()) {
String location = locMatcher.group(1).trim() + ", " + locMatcher.group(2);
System.out.println(" Found location: " + location + " for auction " + href);
return location;
}
// Pattern 2: HTML format like "<span>City,<!-- --> </span>NL"
// Extract city and country code separately
java.util.regex.Pattern htmlPattern = java.util.regex.Pattern.compile(
"<span[^>]*>([A-Za-z][A-Za-z\\s\\-',]+?)(?:,)?\\s*(?:<!--.*?-->)?\\s*</span>\\s*([A-Z]{2})(?![A-Za-z])");
java.util.regex.Matcher htmlMatcher = htmlPattern.matcher(context);
if (htmlMatcher.find()) {
String city = htmlMatcher.group(1).trim().replaceAll(",$", ""); // Remove trailing comma
String country = htmlMatcher.group(2);
String location = city + ", " + country;
System.out.println(" Found location (HTML): " + location + " for auction " + href);
return location;
}
// Pattern 3: Fallback - just find country code after HTML tags
java.util.regex.Pattern countryPattern = java.util.regex.Pattern.compile(
"(?:-->|</span>|</div>)\\s*([A-Z]{2})(?![A-Za-z])");
java.util.regex.Matcher countryMatcher = countryPattern.matcher(context);
if (countryMatcher.find()) {
String country = countryMatcher.group(1);
System.out.println(" Found country code: " + country + " for auction " + href);
return "Unknown, " + country;
}
System.out.println(" ⚠️ No location found for auction " + href);
return "Unknown";
}
/**
* Extracts human-readable title from URL slug
* Converts "some-auction-title-A1-12345" to "Some Auction Title"
*/
private String extractTitleFromHref(String href) {
// Extract everything between "/a/" and "-A1-" or "-A7-"
java.util.regex.Pattern titlePattern = java.util.regex.Pattern.compile(
"/a/(.+?)-A[17]-");
java.util.regex.Matcher titleMatcher = titlePattern.matcher(href);
if (titleMatcher.find()) {
String slug = titleMatcher.group(1);
// Convert kebab-case to Title Case
String[] words = slug.split("-");
StringBuilder title = new StringBuilder();
for (String word : words) {
if (!word.isEmpty()) {
title.append(Character.toUpperCase(word.charAt(0)))
.append(word.substring(1))
.append(" ");
}
}
return title.toString().trim();
}
return "Untitled Auction";
}
/**
* Loads cached HTML for a page from SQLite database
* Returns null if not cached or cache has expired
*
* @param pageNumber Page number
* @return Cached HTML or null if not found/expired
*/
private String loadFromCache(int pageNumber) {
if (!useCache || cacheDb == null) return null;
String url = pageNumber == 1
? AUCTIONS_BASE_URL
: AUCTIONS_BASE_URL + "?page=" + pageNumber;
return cacheDb.get(url);
}
/**
* Saves HTML to SQLite cache database with expiration time
*
* @param pageNumber Page number
* @param html HTML content
*/
private void saveToCache(int pageNumber, String html) {
if (!useCache || cacheDb == null) return;
String url = pageNumber == 1
? AUCTIONS_BASE_URL
: AUCTIONS_BASE_URL + "?page=" + pageNumber;
cacheDb.put(url, html, CACHE_EXPIRATION_HOURS);
}
/**
* Filters auctions by location
*
* @param auctions List of auctions
* @param locationFilter Location string to match (e.g., "NL")
* @return Filtered list
*/
public static List<Auction> filterByLocation(List<Auction> auctions, String locationFilter) {
return auctions.stream()
.filter(a -> a.location.contains(locationFilter))
.toList();
}
/**
* Entry point for testing
*
* Arguments:
* --max-visits <number> : Maximum number of page visits (0 = unlimited, default)
* --no-cache : Disable caching
*/
public static void main(String[] args) throws Exception {
System.out.println("=== Troostwijk Auction Extractor ===\n");
// Parse command line arguments
boolean useCache = true;
int maxVisits = 0; // 0 = unlimited
for (int i = 0; i < args.length; i++) {
switch (args[i]) {
case "--max-visits":
if (i + 1 < args.length) {
maxVisits = Integer.parseInt(args[++i]);
System.out.println("Max page visits set to: " + maxVisits);
}
break;
case "--no-cache":
useCache = false;
System.out.println("Caching disabled");
break;
case "--help":
System.out.println("Usage: java TroostwijkAuctionExtractor [options]");
System.out.println("Options:");
System.out.println(" --max-visits <n> : Limit actual page fetches to n (0 = unlimited)");
System.out.println(" --no-cache : Disable page caching");
System.out.println(" --help : Show this help message");
return;
}
}
TroostwijkAuctionExtractor extractor = new TroostwijkAuctionExtractor(useCache, maxVisits);
try {
// Initialize browser
extractor.initialize();
// Extract all auctions
List<Auction> allAuctions = extractor.extractAllAuctions();
// Filter for Dutch auctions only
List<Auction> dutchAuctions = filterByLocation(allAuctions, "NL");
System.out.println("\n=== Results ===");
System.out.println("Total auctions found: " + allAuctions.size());
System.out.println("Dutch auctions (NL): " + dutchAuctions.size());
System.out.println("Actual page visits: " + extractor.pageVisitCount);
// Display first 10 Dutch auctions
System.out.println("\n=== Sample Dutch Auctions ===");
dutchAuctions.stream()
.limit(10)
.forEach(System.out::println);
} finally {
// Always close browser
extractor.close();
}
}
/**
* SQLite-based caching system for HTML pages with expiration support
*/
static class CacheDatabase {
private final String dbPath;
private Connection connection;
public CacheDatabase(String dbPath) {
this.dbPath = dbPath;
}
/**
* Initialize database and create schema
*/
public void initialize() throws SQLException, IOException {
// Create cache directory if it doesn't exist
Path cacheDir = Paths.get(dbPath).getParent();
if (cacheDir != null) {
Files.createDirectories(cacheDir);
}
connection = DriverManager.getConnection("jdbc:sqlite:" + dbPath);
// Create cache table with URL as primary key
String createTable = """
CREATE TABLE IF NOT EXISTS page_cache (
url TEXT PRIMARY KEY,
html TEXT NOT NULL,
cached_at INTEGER NOT NULL,
expires_at INTEGER NOT NULL
)
""";
try (Statement stmt = connection.createStatement()) {
stmt.execute(createTable);
// Create index on expires_at for efficient cleanup
stmt.execute("CREATE INDEX IF NOT EXISTS idx_expires_at ON page_cache(expires_at)");
}
// Clean up expired entries on initialization
cleanupExpired();
System.out.println("✓ Cache database initialized");
}
/**
* Get cached HTML for a URL if it exists and hasn't expired
*
* @param url The URL to look up
* @return Cached HTML or null if not found/expired
*/
public synchronized String get(String url) {
String sql = "SELECT html FROM page_cache WHERE url = ? AND expires_at > ?";
try (PreparedStatement ps = connection.prepareStatement(sql)) {
ps.setString(1, url);
ps.setLong(2, Instant.now().getEpochSecond());
ResultSet rs = ps.executeQuery();
if (rs.next()) {
return rs.getString("html");
}
} catch (SQLException e) {
System.err.println("Cache read error: " + e.getMessage());
}
return null;
}
/**
* Store HTML in cache with expiration time
*
* @param url The URL to cache
* @param html The HTML content
* @param expirationHours Hours until cache expires
*/
public synchronized void put(String url, String html, long expirationHours) {
String sql = """
INSERT OR REPLACE INTO page_cache (url, html, cached_at, expires_at)
VALUES (?, ?, ?, ?)
""";
long now = Instant.now().getEpochSecond();
long expiresAt = now + (expirationHours * 3600);
try (PreparedStatement ps = connection.prepareStatement(sql)) {
ps.setString(1, url);
ps.setString(2, html);
ps.setLong(3, now);
ps.setLong(4, expiresAt);
ps.executeUpdate();
} catch (SQLException e) {
System.err.println("Cache write error: " + e.getMessage());
}
}
/**
* Remove expired cache entries
*/
public synchronized void cleanupExpired() {
String sql = "DELETE FROM page_cache WHERE expires_at <= ?";
try (PreparedStatement ps = connection.prepareStatement(sql)) {
ps.setLong(1, Instant.now().getEpochSecond());
int deleted = ps.executeUpdate();
if (deleted > 0) {
System.out.println("✓ Cleaned up " + deleted + " expired cache entries");
}
} catch (SQLException e) {
System.err.println("Cache cleanup error: " + e.getMessage());
}
}
/**
* Get cache statistics
*/
public synchronized void printStats() {
String sql = "SELECT COUNT(*) as total, " +
"SUM(CASE WHEN expires_at > ? THEN 1 ELSE 0 END) as valid, " +
"SUM(LENGTH(html)) as total_size " +
"FROM page_cache";
try (PreparedStatement ps = connection.prepareStatement(sql)) {
ps.setLong(1, Instant.now().getEpochSecond());
ResultSet rs = ps.executeQuery();
if (rs.next()) {
int total = rs.getInt("total");
int valid = rs.getInt("valid");
long size = rs.getLong("total_size");
System.out.println("\n=== Cache Statistics ===");
System.out.println("Total entries: " + total);
System.out.println("Valid entries: " + valid);
System.out.println("Expired entries: " + (total - valid));
System.out.println("Total size: " + (size / 1024) + " KB");
}
} catch (SQLException e) {
System.err.println("Cache stats error: " + e.getMessage());
}
}
/**
* Close database connection
*/
public void close() {
if (connection != null) {
try {
connection.close();
} catch (SQLException e) {
System.err.println("Error closing cache database: " + e.getMessage());
}
}
}
}
}

File diff suppressed because it is too large Load Diff