From aef7a3aa30a7cba7d4d8ffa4d77b49a33c8f724c Mon Sep 17 00:00:00 2001 From: Tour Date: Wed, 3 Dec 2025 15:32:34 +0100 Subject: [PATCH] start --- src/main/java/com/auction/AuctionInfo.java | 31 +- .../java/com/auction/DatabaseService.java | 606 +++++++-------- src/main/java/com/auction/Lot.java | 47 +- src/main/java/com/auction/Main.java | 147 ++-- .../java/com/auction/NotificationService.java | 6 +- .../com/auction/ObjectDetectionService.java | 14 +- .../java/com/auction/TroostwijkScraper.java | 687 ------------------ .../java/com/auction/AuctionParsingTest.java | 341 +++------ src/test/java/com/auction/Parser.java | 4 - troostwijk.db | Bin 32768 -> 0 bytes 10 files changed, 533 insertions(+), 1350 deletions(-) delete mode 100644 src/main/java/com/auction/TroostwijkScraper.java delete mode 100644 troostwijk.db diff --git a/src/main/java/com/auction/AuctionInfo.java b/src/main/java/com/auction/AuctionInfo.java index 874b9fb..cca4fa3 100644 --- a/src/main/java/com/auction/AuctionInfo.java +++ b/src/main/java/com/auction/AuctionInfo.java @@ -1,24 +1,19 @@ package com.auction; import java.time.LocalDateTime; + /** * Represents auction metadata (veiling informatie) + * Data typically populated by the external scraper process */ -public final class AuctionInfo { - - public int auctionId; // Unique auction ID (from URL) - public String title; // Auction title - public String location; // Location (e.g., "Amsterdam, NL") - public String city; // City name - public String country; // Country code (e.g., "NL") - public String url; // Full auction URL - public String type; // Auction type (A1 or A7) - public int lotCount; // Number of lots/kavels - public LocalDateTime closingTime; // Closing time if available - - @Override - public String toString() { - return String.format("Auction{id=%d, type=%s, title='%s', location='%s', lots=%d, url='%s'}", - auctionId, type, title, location, lotCount, url); - } -} +public record AuctionInfo( + int auctionId, // Unique auction ID (from URL) + String title, // Auction title + String location, // Location (e.g., "Amsterdam, NL") + String city, // City name + String country, // Country code (e.g., "NL") + String url, // Full auction URL + String type, // Auction type (A1 or A7) + int lotCount, // Number of lots/kavels + LocalDateTime closingTime // Closing time if available +) {} diff --git a/src/main/java/com/auction/DatabaseService.java b/src/main/java/com/auction/DatabaseService.java index 9c325b3..56da03a 100644 --- a/src/main/java/com/auction/DatabaseService.java +++ b/src/main/java/com/auction/DatabaseService.java @@ -1,303 +1,335 @@ package com.auction; -import java.sql.Connection; import java.sql.DriverManager; -import java.sql.PreparedStatement; -import java.sql.ResultSet; import java.sql.SQLException; -import java.sql.Statement; import java.time.Instant; import java.time.LocalDateTime; import java.util.ArrayList; import java.util.List; + /** - * Service for persisting auctions, lots, images, and object labels into - * a SQLite database. Uses the Xerial JDBC driver which connects to - * SQLite via a URL of the form "jdbc:sqlite:path_to_file"【329850066306528†L40-L63】. + * Service for persisting auctions, lots, and images into a SQLite database. + * Data is typically populated by an external scraper process; + * this service enriches it with image processing and monitoring. */ public class DatabaseService { - - private final String url; - DatabaseService(String dbPath) { - this.url = "jdbc:sqlite:" + dbPath; - } - /** - * Creates tables if they do not already exist. The schema includes - * tables for auctions, lots, images, and object labels. This method is - * idempotent; it can be called multiple times. - */ - void ensureSchema() throws SQLException { - try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) { - // Auctions table (veilingen) - stmt.execute("CREATE TABLE IF NOT EXISTS auctions (" - + "auction_id INTEGER PRIMARY KEY," - + "title TEXT NOT NULL," - + "location TEXT," - + "city TEXT," - + "country TEXT," - + "url TEXT NOT NULL," - + "type TEXT," - + "lot_count INTEGER DEFAULT 0," - + "closing_time TEXT," - + "discovered_at INTEGER" // Unix timestamp - + ")"); - - // Sales table (legacy - keep for compatibility) - stmt.execute("CREATE TABLE IF NOT EXISTS sales (" - + "sale_id INTEGER PRIMARY KEY," - + "title TEXT," - + "location TEXT," - + "closing_time TEXT" - + ")"); - - // Lots table - stmt.execute("CREATE TABLE IF NOT EXISTS lots (" - + "lot_id INTEGER PRIMARY KEY," - + "sale_id INTEGER," - + "title TEXT," - + "description TEXT," - + "manufacturer TEXT," - + "type TEXT," - + "year INTEGER," - + "category TEXT," - + "current_bid REAL," - + "currency TEXT," - + "url TEXT," - + "closing_time TEXT," - + "closing_notified INTEGER DEFAULT 0," - + "FOREIGN KEY (sale_id) REFERENCES auctions(auction_id)" - + ")"); - - // Images table - stmt.execute("CREATE TABLE IF NOT EXISTS images (" - + "id INTEGER PRIMARY KEY AUTOINCREMENT," - + "lot_id INTEGER," - + "url TEXT," - + "file_path TEXT," - + "labels TEXT," - + "FOREIGN KEY (lot_id) REFERENCES lots(lot_id)" - + ")"); - - // Create indexes for better query performance - stmt.execute("CREATE INDEX IF NOT EXISTS idx_auctions_country ON auctions(country)"); - stmt.execute("CREATE INDEX IF NOT EXISTS idx_lots_sale_id ON lots(sale_id)"); - } - } - - /** - * Inserts or updates an auction record - */ - synchronized void upsertAuction(AuctionInfo auction) throws SQLException { - var sql = "INSERT INTO auctions (auction_id, title, location, city, country, url, type, lot_count, closing_time, discovered_at)" - + " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" - + " ON CONFLICT(auction_id) DO UPDATE SET " - + "title = excluded.title, location = excluded.location, city = excluded.city, " - + "country = excluded.country, url = excluded.url, type = excluded.type, " - + "lot_count = excluded.lot_count, closing_time = excluded.closing_time"; - - try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(sql)) { - ps.setInt(1, auction.auctionId); - ps.setString(2, auction.title); - ps.setString(3, auction.location); - ps.setString(4, auction.city); - ps.setString(5, auction.country); - ps.setString(6, auction.url); - ps.setString(7, auction.type); - ps.setInt(8, auction.lotCount); - ps.setString(9, auction.closingTime != null ? auction.closingTime.toString() : null); - ps.setLong(10, Instant.now().getEpochSecond()); - ps.executeUpdate(); - } - } - - /** - * Retrieves all auctions from the database - */ - synchronized List getAllAuctions() throws SQLException { - List auctions = new ArrayList<>(); - var sql = "SELECT auction_id, title, location, city, country, url, type, lot_count, closing_time FROM auctions"; - - try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) { - var rs = stmt.executeQuery(sql); - while (rs.next()) { - var auction = new AuctionInfo(); - auction.auctionId = rs.getInt("auction_id"); - auction.title = rs.getString("title"); - auction.location = rs.getString("location"); - auction.city = rs.getString("city"); - auction.country = rs.getString("country"); - auction.url = rs.getString("url"); - auction.type = rs.getString("type"); - auction.lotCount = rs.getInt("lot_count"); - var closing = rs.getString("closing_time"); - if (closing != null) { - auction.closingTime = LocalDateTime.parse(closing); + + private final String url; + + DatabaseService(String dbPath) { + this.url = "jdbc:sqlite:" + dbPath; + } + + /** + * Creates tables if they do not already exist. + * Schema supports data from external scraper and adds image processing results. + */ + void ensureSchema() throws SQLException { + try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) { + // Auctions table (populated by external scraper) + stmt.execute(""" + CREATE TABLE IF NOT EXISTS auctions ( + auction_id INTEGER PRIMARY KEY, + title TEXT NOT NULL, + location TEXT, + city TEXT, + country TEXT, + url TEXT NOT NULL, + type TEXT, + lot_count INTEGER DEFAULT 0, + closing_time TEXT, + discovered_at INTEGER + )"""); + + // Lots table (populated by external scraper) + stmt.execute(""" + CREATE TABLE IF NOT EXISTS lots ( + lot_id INTEGER PRIMARY KEY, + sale_id INTEGER, + title TEXT, + description TEXT, + manufacturer TEXT, + type TEXT, + year INTEGER, + category TEXT, + current_bid REAL, + currency TEXT, + url TEXT, + closing_time TEXT, + closing_notified INTEGER DEFAULT 0, + FOREIGN KEY (sale_id) REFERENCES auctions(auction_id) + )"""); + + // Images table (populated by this process) + stmt.execute(""" + CREATE TABLE IF NOT EXISTS images ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + lot_id INTEGER, + url TEXT, + file_path TEXT, + labels TEXT, + processed_at INTEGER, + FOREIGN KEY (lot_id) REFERENCES lots(lot_id) + )"""); + + // Indexes for performance + stmt.execute("CREATE INDEX IF NOT EXISTS idx_auctions_country ON auctions(country)"); + stmt.execute("CREATE INDEX IF NOT EXISTS idx_lots_sale_id ON lots(sale_id)"); + stmt.execute("CREATE INDEX IF NOT EXISTS idx_images_lot_id ON images(lot_id)"); + } + } + + /** + * Inserts or updates an auction record (typically called by external scraper) + */ + synchronized void upsertAuction(AuctionInfo auction) throws SQLException { + var sql = """ + INSERT INTO auctions (auction_id, title, location, city, country, url, type, lot_count, closing_time, discovered_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(auction_id) DO UPDATE SET + title = excluded.title, + location = excluded.location, + city = excluded.city, + country = excluded.country, + url = excluded.url, + type = excluded.type, + lot_count = excluded.lot_count, + closing_time = excluded.closing_time + """; + + try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(sql)) { + ps.setInt(1, auction.auctionId()); + ps.setString(2, auction.title()); + ps.setString(3, auction.location()); + ps.setString(4, auction.city()); + ps.setString(5, auction.country()); + ps.setString(6, auction.url()); + ps.setString(7, auction.type()); + ps.setInt(8, auction.lotCount()); + ps.setString(9, auction.closingTime() != null ? auction.closingTime().toString() : null); + ps.setLong(10, Instant.now().getEpochSecond()); + ps.executeUpdate(); + } + } + + /** + * Retrieves all auctions from the database + */ + synchronized List getAllAuctions() throws SQLException { + List auctions = new ArrayList<>(); + var sql = "SELECT auction_id, title, location, city, country, url, type, lot_count, closing_time FROM auctions"; + + try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) { + var rs = stmt.executeQuery(sql); + while (rs.next()) { + var closingStr = rs.getString("closing_time"); + var closing = closingStr != null ? LocalDateTime.parse(closingStr) : null; + + auctions.add(new AuctionInfo( + rs.getInt("auction_id"), + rs.getString("title"), + rs.getString("location"), + rs.getString("city"), + rs.getString("country"), + rs.getString("url"), + rs.getString("type"), + rs.getInt("lot_count"), + closing + )); } - auctions.add(auction); - } - } - return auctions; - } - - /** - * Retrieves auctions by country code - */ - synchronized List getAuctionsByCountry(String countryCode) throws SQLException { - List auctions = new ArrayList<>(); - var sql = "SELECT auction_id, title, location, city, country, url, type, lot_count, closing_time " + } + return auctions; + } + + /** + * Retrieves auctions by country code + */ + synchronized List getAuctionsByCountry(String countryCode) throws SQLException { + List auctions = new ArrayList<>(); + var sql = "SELECT auction_id, title, location, city, country, url, type, lot_count, closing_time " + "FROM auctions WHERE country = ?"; - - try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(sql)) { - ps.setString(1, countryCode); - var rs = ps.executeQuery(); - while (rs.next()) { - var auction = new AuctionInfo(); - auction.auctionId = rs.getInt("auction_id"); - auction.title = rs.getString("title"); - auction.location = rs.getString("location"); - auction.city = rs.getString("city"); - auction.country = rs.getString("country"); - auction.url = rs.getString("url"); - auction.type = rs.getString("type"); - auction.lotCount = rs.getInt("lot_count"); - var closing = rs.getString("closing_time"); - if (closing != null) { - auction.closingTime = LocalDateTime.parse(closing); + + try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(sql)) { + ps.setString(1, countryCode); + var rs = ps.executeQuery(); + while (rs.next()) { + var closingStr = rs.getString("closing_time"); + var closing = closingStr != null ? LocalDateTime.parse(closingStr) : null; + + auctions.add(new AuctionInfo( + rs.getInt("auction_id"), + rs.getString("title"), + rs.getString("location"), + rs.getString("city"), + rs.getString("country"), + rs.getString("url"), + rs.getString("type"), + rs.getInt("lot_count"), + closing + )); } - auctions.add(auction); - } - } - return auctions; - } - - /** - * Inserts or updates a lot record. Uses INSERT OR REPLACE to - * implement upsert semantics so that existing rows are replaced. - */ - synchronized void upsertLot(Lot lot) throws SQLException { - var sql = "INSERT INTO lots (lot_id, sale_id, title, description, manufacturer, type, year, category, current_bid, currency, url, closing_time, closing_notified)" - + " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" - + " ON CONFLICT(lot_id) DO UPDATE SET " - + "sale_id = excluded.sale_id, title = excluded.title, description = excluded.description, " - + "manufacturer = excluded.manufacturer, type = excluded.type, year = excluded.year, category = excluded.category, " - + "current_bid = excluded.current_bid, currency = excluded.currency, url = excluded.url, closing_time = excluded.closing_time"; - try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(sql)) { - ps.setInt(1, lot.lotId); - ps.setInt(2, lot.saleId); - ps.setString(3, lot.title); - ps.setString(4, lot.description); - ps.setString(5, lot.manufacturer); - ps.setString(6, lot.type); - ps.setInt(7, lot.year); - ps.setString(8, lot.category); - ps.setDouble(9, lot.currentBid); - ps.setString(10, lot.currency); - ps.setString(11, lot.url); - ps.setString(12, lot.closingTime != null ? lot.closingTime.toString() : null); - ps.setInt(13, lot.closingNotified ? 1 : 0); - ps.executeUpdate(); - } - } - - /** - * Inserts a new image record. Each image is associated with a lot and - * stores both the original URL and the local file path. Detected - * labels are stored as a comma separated string. - */ - synchronized void insertImage(int lotId, String url, String filePath, List labels) throws SQLException { - var sql = "INSERT INTO images (lot_id, url, file_path, labels) VALUES (?, ?, ?, ?)"; - try (var conn = DriverManager.getConnection(this.url); var ps = conn.prepareStatement(sql)) { - ps.setInt(1, lotId); - ps.setString(2, url); - ps.setString(3, filePath); - ps.setString(4, String.join(",", labels)); - ps.executeUpdate(); - } - } - - /** - * Retrieves all lots that are still active (i.e., have a closing time - * in the future or unknown). Only these lots need to be monitored. - */ - synchronized List getActiveLots() throws SQLException { - List list = new ArrayList<>(); - var sql = "SELECT lot_id, sale_id, current_bid, currency, closing_time, closing_notified FROM lots"; - try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) { - var rs = stmt.executeQuery(sql); - while (rs.next()) { - var lot = new Lot(); - lot.lotId = rs.getInt("lot_id"); - lot.saleId = rs.getInt("sale_id"); - lot.currentBid = rs.getDouble("current_bid"); - lot.currency = rs.getString("currency"); - var closing = rs.getString("closing_time"); - lot.closingNotified = rs.getInt("closing_notified") != 0; - if (closing != null) { - lot.closingTime = LocalDateTime.parse(closing); + } + return auctions; + } + + /** + * Inserts or updates a lot record (typically called by external scraper) + */ + synchronized void upsertLot(Lot lot) throws SQLException { + var sql = """ + INSERT INTO lots (lot_id, sale_id, title, description, manufacturer, type, year, category, current_bid, currency, url, closing_time, closing_notified) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(lot_id) DO UPDATE SET + sale_id = excluded.sale_id, + title = excluded.title, + description = excluded.description, + manufacturer = excluded.manufacturer, + type = excluded.type, + year = excluded.year, + category = excluded.category, + current_bid = excluded.current_bid, + currency = excluded.currency, + url = excluded.url, + closing_time = excluded.closing_time + """; + + try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(sql)) { + ps.setInt(1, lot.lotId()); + ps.setInt(2, lot.saleId()); + ps.setString(3, lot.title()); + ps.setString(4, lot.description()); + ps.setString(5, lot.manufacturer()); + ps.setString(6, lot.type()); + ps.setInt(7, lot.year()); + ps.setString(8, lot.category()); + ps.setDouble(9, lot.currentBid()); + ps.setString(10, lot.currency()); + ps.setString(11, lot.url()); + ps.setString(12, lot.closingTime() != null ? lot.closingTime().toString() : null); + ps.setInt(13, lot.closingNotified() ? 1 : 0); + ps.executeUpdate(); + } + } + + /** + * Inserts a new image record with object detection labels + */ + synchronized void insertImage(int lotId, String url, String filePath, List labels) throws SQLException { + var sql = "INSERT INTO images (lot_id, url, file_path, labels, processed_at) VALUES (?, ?, ?, ?, ?)"; + try (var conn = DriverManager.getConnection(this.url); var ps = conn.prepareStatement(sql)) { + ps.setInt(1, lotId); + ps.setString(2, url); + ps.setString(3, filePath); + ps.setString(4, String.join(",", labels)); + ps.setLong(5, Instant.now().getEpochSecond()); + ps.executeUpdate(); + } + } + + /** + * Retrieves images for a specific lot + */ + synchronized List getImagesForLot(int lotId) throws SQLException { + List images = new ArrayList<>(); + var sql = "SELECT id, lot_id, url, file_path, labels FROM images WHERE lot_id = ?"; + + try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(sql)) { + ps.setInt(1, lotId); + var rs = ps.executeQuery(); + while (rs.next()) { + images.add(new ImageRecord( + rs.getInt("id"), + rs.getInt("lot_id"), + rs.getString("url"), + rs.getString("file_path"), + rs.getString("labels") + )); } - list.add(lot); - } - } - return list; - } - - /** - * Retrieves all lots from the database. - */ - synchronized List getAllLots() throws SQLException { - List list = new ArrayList<>(); - var sql = "SELECT lot_id, sale_id, title, current_bid, currency FROM lots"; - try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) { - var rs = stmt.executeQuery(sql); - while (rs.next()) { - var lot = new Lot(); - lot.lotId = rs.getInt("lot_id"); - lot.saleId = rs.getInt("sale_id"); - lot.title = rs.getString("title"); - lot.currentBid = rs.getDouble("current_bid"); - lot.currency = rs.getString("currency"); - list.add(lot); - } - } - return list; - } - - /** - * Gets the total number of images in the database. - */ - synchronized int getImageCount() throws SQLException { - var sql = "SELECT COUNT(*) as count FROM images"; - try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) { - var rs = stmt.executeQuery(sql); - if (rs.next()) { - return rs.getInt("count"); - } - } - return 0; - } - - /** - * Updates the current bid of a lot after a bid refresh. - */ - synchronized void updateLotCurrentBid(Lot lot) throws SQLException { - try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement( - "UPDATE lots SET current_bid = ? WHERE lot_id = ?")) { - ps.setDouble(1, lot.currentBid); - ps.setInt(2, lot.lotId); - ps.executeUpdate(); - } - } - - /** - * Updates the closingNotified flag of a lot (set to 1 when we have - * warned the user about its imminent closure). - */ - synchronized void updateLotNotificationFlags(Lot lot) throws SQLException { - try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement( - "UPDATE lots SET closing_notified = ? WHERE lot_id = ?")) { - ps.setInt(1, lot.closingNotified ? 1 : 0); - ps.setInt(2, lot.lotId); - ps.executeUpdate(); - } - } + } + return images; + } + + /** + * Retrieves all lots that are active and need monitoring + */ + synchronized List getActiveLots() throws SQLException { + List list = new ArrayList<>(); + var sql = "SELECT lot_id, sale_id, title, description, manufacturer, type, year, category, " + + "current_bid, currency, url, closing_time, closing_notified FROM lots"; + + try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) { + var rs = stmt.executeQuery(sql); + while (rs.next()) { + var closingStr = rs.getString("closing_time"); + var closing = closingStr != null ? LocalDateTime.parse(closingStr) : null; + + list.add(new Lot( + rs.getInt("sale_id"), + rs.getInt("lot_id"), + rs.getString("title"), + rs.getString("description"), + rs.getString("manufacturer"), + rs.getString("type"), + rs.getInt("year"), + rs.getString("category"), + rs.getDouble("current_bid"), + rs.getString("currency"), + rs.getString("url"), + closing, + rs.getInt("closing_notified") != 0 + )); + } + } + return list; + } + + /** + * Retrieves all lots from the database + */ + synchronized List getAllLots() throws SQLException { + return getActiveLots(); + } + + /** + * Gets the total number of images in the database + */ + synchronized int getImageCount() throws SQLException { + var sql = "SELECT COUNT(*) as count FROM images"; + try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) { + var rs = stmt.executeQuery(sql); + if (rs.next()) { + return rs.getInt("count"); + } + } + return 0; + } + + /** + * Updates the current bid of a lot (used by monitoring service) + */ + synchronized void updateLotCurrentBid(Lot lot) throws SQLException { + try (var conn = DriverManager.getConnection(url); + var ps = conn.prepareStatement("UPDATE lots SET current_bid = ? WHERE lot_id = ?")) { + ps.setDouble(1, lot.currentBid()); + ps.setInt(2, lot.lotId()); + ps.executeUpdate(); + } + } + + /** + * Updates the closingNotified flag of a lot + */ + synchronized void updateLotNotificationFlags(Lot lot) throws SQLException { + try (var conn = DriverManager.getConnection(url); + var ps = conn.prepareStatement("UPDATE lots SET closing_notified = ? WHERE lot_id = ?")) { + ps.setInt(1, lot.closingNotified() ? 1 : 0); + ps.setInt(2, lot.lotId()); + ps.executeUpdate(); + } + } + + /** + * Simple record for image data + */ + record ImageRecord(int id, int lotId, String url, String filePath, String labels) {} } diff --git a/src/main/java/com/auction/Lot.java b/src/main/java/com/auction/Lot.java index 87f7dca..05583f8 100644 --- a/src/main/java/com/auction/Lot.java +++ b/src/main/java/com/auction/Lot.java @@ -1,29 +1,30 @@ package com.auction; +import java.time.Duration; import java.time.LocalDateTime; + /** - * Simple POJO representing a lot (kavel) in an auction. It keeps track - * of the sale it belongs to, current bid and closing time. The method - * minutesUntilClose computes how many minutes remain until the lot closes. + * Represents a lot (kavel) in an auction. + * Data typically populated by the external scraper process. + * This project enriches the data with image analysis and monitoring. */ -final class Lot { - - int saleId; - int lotId; - String title; - String description; - String manufacturer; - String type; - int year; - String category; - double currentBid; - String currency; - String url; - LocalDateTime closingTime; // null if unknown - boolean closingNotified; - - long minutesUntilClose() { - if (closingTime == null) return Long.MAX_VALUE; - return java.time.Duration.between(LocalDateTime.now(), closingTime).toMinutes(); - } +record Lot( + int saleId, + int lotId, + String title, + String description, + String manufacturer, + String type, + int year, + String category, + double currentBid, + String currency, + String url, + LocalDateTime closingTime, + boolean closingNotified +) { + long minutesUntilClose() { + if (closingTime == null) return Long.MAX_VALUE; + return Duration.between(LocalDateTime.now(), closingTime).toMinutes(); + } } diff --git a/src/main/java/com/auction/Main.java b/src/main/java/com/auction/Main.java index a143335..96890a8 100644 --- a/src/main/java/com/auction/Main.java +++ b/src/main/java/com/auction/Main.java @@ -1,82 +1,93 @@ package com.auction; import org.opencv.core.Core; -import java.util.List; + +/** + * Main entry point for Troostwijk Auction Monitor. + * + * ARCHITECTURE: + * This project focuses on: + * 1. Image processing and object detection + * 2. Bid monitoring and notifications + * 3. Data enrichment + * + * Auction/Lot scraping is handled by the external ARCHITECTURE-TROOSTWIJK-SCRAPER process. + * That process populates the auctions and lots tables in the shared database. + * This process reads from those tables and enriches them with: + * - Downloaded images + * - Object detection labels + * - Bid monitoring + * - Notifications + */ public class Main { - public static void main2(String[] args) { - // If arguments are passed, this is likely a one-off command via dokku run - // Just exit immediately to allow the command to run - if (args.length > 0) { - IO.println("Command mode - exiting to allow shell commands"); - return; + + public static void main(String[] args) throws Exception { + Console.println("=== Troostwijk Auction Monitor ===\n"); + + // Configuration + String databaseFile = System.getenv().getOrDefault("DATABASE_FILE", "troostwijk.db"); + String notificationConfig = System.getenv().getOrDefault("NOTIFICATION_CONFIG", "desktop"); + + // YOLO model paths (optional - monitor works without object detection) + String yoloCfg = "models/yolov4.cfg"; + String yoloWeights = "models/yolov4.weights"; + String yoloClasses = "models/coco.names"; + + // Load native OpenCV library (only if models exist) + try { + System.loadLibrary(Core.NATIVE_LIBRARY_NAME); + Console.println("✓ OpenCV loaded"); + } catch (UnsatisfiedLinkError e) { + Console.println("⚠️ OpenCV not available - image detection disabled"); } - IO.println("Starting Troostwijk Auction Scraper..."); - IO.println("Container is running and healthy."); + Console.println("Initializing monitor..."); + var monitor = new TroostwijkMonitor(databaseFile, notificationConfig, + yoloCfg, yoloWeights, yoloClasses); - // Keep container alive + // Show current database state + Console.println("\n📊 Current Database State:"); + monitor.printDatabaseStats(); + + // Check for pending image processing + Console.println("\n[1/2] Processing images..."); + monitor.processPendingImages(); + + // Start monitoring service + Console.println("\n[2/2] Starting bid monitoring..."); + monitor.scheduleMonitoring(); + + Console.println("\n✓ Monitor is running. Press Ctrl+C to stop.\n"); + Console.println("NOTE: This process expects auction/lot data from the external scraper."); + Console.println(" Make sure ARCHITECTURE-TROOSTWIJK-SCRAPER is running and populating the database.\n"); + + // Keep application alive try { Thread.sleep(Long.MAX_VALUE); } catch (InterruptedException e) { Thread.currentThread().interrupt(); - IO.println("Container interrupted, exiting."); + Console.println("Monitor interrupted, exiting."); + } + } + + /** + * Alternative entry point for container environments. + * Simply keeps the container alive for manual commands. + */ + public static void main2(String[] args) { + if (args.length > 0) { + Console.println("Command mode - exiting to allow shell commands"); + return; + } + + Console.println("Troostwijk Monitor container is running and healthy."); + Console.println("Use 'docker exec' or 'dokku run' to execute commands."); + + try { + Thread.sleep(Long.MAX_VALUE); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + Console.println("Container interrupted, exiting."); } } - /** - * Entry point. Configure database location, notification settings, and - * YOLO model paths here before running. Once started the scraper - * discovers Dutch auctions, scrapes lots, and begins monitoring. - */ - public static void main(String[] args) throws Exception { - IO.println("=== Troostwijk Auction Scraper ===\n"); - - // Configuration parameters (replace with your own values) - String databaseFile = "troostwijk.db"; - - // Notification configuration - choose one: - // Option 1: Desktop notifications only (free, no setup required) - String notificationConfig = System.getenv().getOrDefault("NOTIFICATION_CONFIG", "desktop"); - - // Option 2: Desktop + Email via Gmail (free, requires Gmail app password) - // Format: "smtp:username:appPassword:toEmail" - // Example: "smtp:your.email@gmail.com:abcd1234efgh5678:recipient@example.com" - // Get app password: Google Account > Security > 2-Step Verification > App passwords - - // YOLO model paths (optional - scraper works without object detection) - String yoloCfg = "models/yolov4.cfg"; - String yoloWeights = "models/yolov4.weights"; - String yoloClasses = "models/coco.names"; - - // Load native OpenCV library - System.loadLibrary(Core.NATIVE_LIBRARY_NAME); - - IO.println("Initializing scraper..."); - TroostwijkScraper scraper = new TroostwijkScraper(databaseFile, notificationConfig, "", - yoloCfg, yoloWeights, yoloClasses); - - // Step 1: Discover auctions in NL - IO.println("\n[1/3] Discovering Dutch auctions..."); - List auctions = scraper.discoverDutchAuctions(); - IO.println("✓ Found " + auctions.size() + " auctions: " + auctions); - - // Step 2: Fetch lots for each auction - IO.println("\n[2/3] Fetching lot details..."); - int totalAuctions = auctions.size(); - int currentAuction = 0; - for (int saleId : auctions) { - currentAuction++; - IO.println(" [Page " + currentAuction + "] Fetching auctions..."); - IO.println(" [" + currentAuction + "/" + totalAuctions + "] Processing sale " + saleId + "..."); - scraper.fetchLotsForSale(saleId); - } - - // Show database summary - IO.println("\n📊 Database Summary:"); - scraper.printDatabaseStats(); - - // Step 3: Start monitoring bids and closures - IO.println("\n[3/3] Starting monitoring service..."); - scraper.scheduleMonitoring(); - IO.println("✓ Monitoring active. Press Ctrl+C to stop.\n"); - } } diff --git a/src/main/java/com/auction/NotificationService.java b/src/main/java/com/auction/NotificationService.java index 9b1c644..bfbd7c2 100644 --- a/src/main/java/com/auction/NotificationService.java +++ b/src/main/java/com/auction/NotificationService.java @@ -103,9 +103,9 @@ class NotificationService { Thread.sleep(2000); tray.remove(trayIcon); - IO.println("Desktop notification sent: " + title); + Console.println("Desktop notification sent: " + title); } else { - IO.println("Desktop notifications not supported, logging: " + title + " - " + message); + Console.println("Desktop notifications not supported, logging: " + title + " - " + message); } } catch (Exception e) { System.err.println("Desktop notification failed: " + e.getMessage()); @@ -147,7 +147,7 @@ class NotificationService { } Transport.send(msg); - IO.println("Email notification sent: " + title); + Console.println("Email notification sent: " + title); } catch (Exception e) { System.err.println("Email notification failed: " + e.getMessage()); diff --git a/src/main/java/com/auction/ObjectDetectionService.java b/src/main/java/com/auction/ObjectDetectionService.java index 2f517fd..5181c7c 100644 --- a/src/main/java/com/auction/ObjectDetectionService.java +++ b/src/main/java/com/auction/ObjectDetectionService.java @@ -38,12 +38,12 @@ class ObjectDetectionService { var classNamesFile = Paths.get(classNamesPath); if (!Files.exists(cfgFile) || !Files.exists(weightsFile) || !Files.exists(classNamesFile)) { - IO.println("⚠️ Object detection disabled: YOLO model files not found"); - IO.println(" Expected files:"); - IO.println(" - " + cfgPath); - IO.println(" - " + weightsPath); - IO.println(" - " + classNamesPath); - IO.println(" Scraper will continue without image analysis."); + Console.println("⚠️ Object detection disabled: YOLO model files not found"); + Console.println(" Expected files:"); + Console.println(" - " + cfgPath); + Console.println(" - " + weightsPath); + Console.println(" - " + classNamesPath); + Console.println(" Scraper will continue without image analysis."); this.enabled = false; this.net = null; this.classNames = new ArrayList<>(); @@ -58,7 +58,7 @@ class ObjectDetectionService { // Load class names (one per line) this.classNames = Files.readAllLines(classNamesFile); this.enabled = true; - IO.println("✓ Object detection enabled with YOLO"); + Console.println("✓ Object detection enabled with YOLO"); } catch (Exception e) { System.err.println("⚠️ Object detection disabled: " + e.getMessage()); throw new IOException("Failed to initialize object detection", e); diff --git a/src/main/java/com/auction/TroostwijkScraper.java b/src/main/java/com/auction/TroostwijkScraper.java deleted file mode 100644 index 0d69385..0000000 --- a/src/main/java/com/auction/TroostwijkScraper.java +++ /dev/null @@ -1,687 +0,0 @@ -package com.auction; - -/* - * TroostwijkScraper - * - * This example shows how you could build a Java‐based scraper for the Dutch - * auctions on Troostwijk Auctions. The scraper uses a combination of - * HTTP requests and HTML parsing with the jsoup library to discover active - * auctions, calls Troostwijk's internal JSON API to fetch lot (kavel) data - * efficiently, writes the results into a local SQLite database, performs - * object detection on lot images using OpenCV's DNN module, and sends - * desktop/email notifications when bids change or lots are about to expire. - * The implementation uses well known open source libraries for each of these - * concerns. You can adjust the API endpoints and CSS selectors as - * Troostwijk's site evolves. The code is organised into small helper - * classes to make it easier to maintain. - * - * Dependencies (add these to your Maven/Gradle project): - * - * - org.jsoup:jsoup:1.17.2 – HTML parser and HTTP client. - * - com.fasterxml.jackson.core:jackson-databind:2.17.0 – JSON parsing. - * - org.xerial:sqlite-jdbc:3.45.1.0 – SQLite JDBC driver. - * - com.sun.mail:javax.mail:1.6.2 – JavaMail for email notifications (free). - * - org.openpnp:opencv:4.9.0-0 (with native libraries) – OpenCV for image - * processing and object detection. - * - * Before running this program you must ensure that the native OpenCV - * binaries are on your library path (e.g. via -Djava.library.path). - * Desktop notifications work out of the box on Windows, macOS, and Linux. - * For email notifications, you need a Gmail account with an app password - * (free, requires 2FA enabled). See https://support.google.com/accounts/answer/185833 - * - * The scraper performs four major tasks: - * 1. Discover all auctions located in the Netherlands. - * 2. For each auction, fetch all lots (kavels) including images and - * bidding information, and persist the data into SQLite tables. - * 3. Monitor bidding and closing times on a schedule and send desktop/email - * notifications when bids change or lots are about to expire. - * 4. Run object detection on downloaded lot images to automatically - * label objects using a YOLO model. The results are stored in the - * database for later search. - */ - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.microsoft.playwright.Browser; -import com.microsoft.playwright.BrowserType; -import com.microsoft.playwright.Page; -import com.microsoft.playwright.Playwright; -import com.microsoft.playwright.options.WaitUntilState; -import java.io.IOException; -import java.net.URI; -import java.net.http.HttpClient; -import java.net.http.HttpRequest; -import java.net.http.HttpResponse; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; - -/** - * Main scraper class. It encapsulates the logic for scraping auctions, - * persisting data, scheduling updates, and performing object detection. - */ -public class TroostwijkScraper { - - // Base URLs – adjust these if Troostwijk changes their site structure - private static final String AUCTIONS_PAGE = "https://www.troostwijkauctions.com/auctions"; - private static final String LOT_API = "https://api.troostwijkauctions.com/lot/7/list"; - private static final String CACHE_DB_PATH = "cache/page_cache.db"; - private static final long CACHE_EXPIRATION_HOURS = 24; - private static final int RATE_LIMIT_MS = 200; - - // HTTP client used for API calls - private final HttpClient httpClient; - private final ObjectMapper objectMapper; - public final DatabaseService db; - private final NotificationService notifier; - private final ObjectDetectionService detector; - private final CacheDatabase cacheDb; - private final boolean useCache; - private Playwright playwright; - private Browser browser; - - /** - * Constructor. Creates supporting services and ensures the database - * tables exist. - * - * @param databasePath Path to SQLite database file - * @param notificationConfig "desktop" for desktop only, or "smtp:user:pass:toEmail" for email - * @param unused Unused parameter (kept for compatibility) - * @param yoloCfgPath Path to YOLO configuration file - * @param yoloWeightsPath Path to YOLO weights file - * @param classNamesPath Path to file containing class names - */ - public TroostwijkScraper(String databasePath, String notificationConfig, String unused, - String yoloCfgPath, String yoloWeightsPath, String classNamesPath) throws SQLException, IOException { - this(databasePath, notificationConfig, unused, yoloCfgPath, yoloWeightsPath, classNamesPath, true); - } - - /** - * Constructor with cache control. - * - * @param databasePath Path to SQLite database file - * @param notificationConfig "desktop" for desktop only, or "smtp:user:pass:toEmail" for email - * @param unused Unused parameter (kept for compatibility) - * @param yoloCfgPath Path to YOLO configuration file - * @param yoloWeightsPath Path to YOLO weights file - * @param classNamesPath Path to file containing class names - * @param useCache Enable page caching - */ - public TroostwijkScraper(String databasePath, String notificationConfig, String unused, - String yoloCfgPath, String yoloWeightsPath, String classNamesPath, - boolean useCache) throws SQLException, IOException { - this.httpClient = HttpClient.newHttpClient(); - this.objectMapper = new ObjectMapper(); - this.db = new DatabaseService(databasePath); - this.notifier = new NotificationService(notificationConfig, unused); - this.detector = new ObjectDetectionService(yoloCfgPath, yoloWeightsPath, classNamesPath); - this.useCache = useCache; - this.cacheDb = useCache ? new CacheDatabase(CACHE_DB_PATH) : null; - - // initialize DB - db.ensureSchema(); - if (useCache) { - cacheDb.initialize(); - } - } - - /** - * Initializes Playwright browser for JavaScript-rendered pages. - * Call this before using discoverDutchAuctions(). - */ - public void initializeBrowser() { - if (playwright == null) { - IO.println("Initializing Playwright browser..."); - this.playwright = Playwright.create(); - this.browser = playwright.chromium().launch(new BrowserType.LaunchOptions() - .setHeadless(true) - .setArgs(Arrays.asList("--no-sandbox", "--disable-setuid-sandbox"))); - IO.println("✓ Browser ready"); - } - } - - /** - * Closes browser and cache resources. - */ - public void close() { - if (browser != null) { - browser.close(); - browser = null; - } - if (playwright != null) { - playwright.close(); - playwright = null; - } - if (cacheDb != null) { - cacheDb.close(); - } - } - - /** - * Discovers all active Dutch auctions by crawling the auctions page. - * - * Uses Playwright to render JavaScript-heavy pages and extract auction data. - * Supports caching to avoid unnecessary page fetches. Filters auctions whose - * location contains ", NL" (indicating the Netherlands). Each auction link - * contains a unique sale ID in the format A1-xxxxx or A7-xxxxx. - * - * Auctions are saved to the database and can be retrieved with getDutchAuctions(). - * - * @return a list of sale identifiers for auctions located in NL (legacy compatibility) - */ - public List discoverDutchAuctions() { - Set saleIds = new HashSet<>(); - - // Check if browser is initialized - if (browser == null) { - initializeBrowser(); - } - - var pageNumber = 1; - var hasMorePages = true; - - IO.println("Starting Dutch auction discovery from " + AUCTIONS_PAGE); - - while (hasMorePages) { - IO.println("\n[Page " + pageNumber + "] Fetching auctions..."); - - // Check cache first - var html = loadFromCache(pageNumber); - - if (html != null) { - IO.println(" ✓ Loaded from cache"); - } else { - // Fetch with Playwright - html = fetchPageWithPlaywright(pageNumber); - - if (html == null || html.isEmpty()) { - IO.println(" ⚠️ Failed to fetch page, stopping pagination"); - break; - } - - IO.println(" ✓ Fetched from website"); - - // Save to cache - if (useCache) { - saveToCache(pageNumber, html); - } - - // Rate limiting - try { - Thread.sleep(RATE_LIMIT_MS); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - break; - } - } - - // Parse auctions from HTML (saves Dutch auctions to database) - var foundOnPage = parseAuctionsFromHtml(html, saleIds); - - if (foundOnPage == 0) { - IO.println(" ⚠️ No Dutch auctions found on page, stopping pagination"); - hasMorePages = false; - } else { - IO.println(" ✓ Found " + foundOnPage + " Dutch auctions"); - pageNumber++; - } - } - - IO.println("\n✓ Total Dutch auctions discovered: " + saleIds.size()); - return new ArrayList<>(saleIds); - } - - - /** - * Fetches a single page using Playwright - */ - private String fetchPageWithPlaywright(int pageNumber) { - var url = pageNumber == 1 - ? AUCTIONS_PAGE - : AUCTIONS_PAGE + "?page=" + pageNumber; - - try { - var page = browser.newPage(); - - // Set user agent - page.setExtraHTTPHeaders(Map.of( - "User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" - )); - - // Navigate to page - page.navigate(url, new Page.NavigateOptions() - .setTimeout(30000) - .setWaitUntil(WaitUntilState.NETWORKIDLE)); - - // Wait for auction listings to appear - try { - page.waitForSelector("a[href^='/a/']", new Page.WaitForSelectorOptions() - .setTimeout(10000)); - } catch (Exception e) { - // Continue even if selector not found - IO.println(" ⚠️ Auction selector not found"); - } - - // Get HTML content - var html = page.content(); - page.close(); - - return html; - - } catch (Exception e) { - System.err.println(" ⚠️ Playwright error: " + e.getMessage()); - return null; - } - } - - /** - * Parses auctions from HTML using JSoup and saves Dutch auctions to database. - * Uses proper HTML parsing instead of regex for more reliable extraction. - * @return number of Dutch auctions found on this page - */ - private int parseAuctionsFromHtml(String html, Set saleIds) { - var foundCount = 0; - - try { - var doc = org.jsoup.Jsoup.parse(html); - - // Find all auction links (format: /a/title-A1-12345 or /a/title-A7-12345) - var auctionLinks = doc.select("a[href^='/a/']"); - - for (var link : auctionLinks) { - var href = link.attr("href"); - - // Extract auction ID from URL - var pattern = java.util.regex.Pattern.compile("/a/.*?-A([17])-(\\d+)"); - var matcher = pattern.matcher(href); - - if (!matcher.find()) { - continue; - } - - var typeNum = matcher.group(1); - var auctionId = Integer.parseInt(matcher.group(2)); - - // Skip duplicates - if (saleIds.contains(auctionId)) { - continue; - } - - // Extract auction info using JSoup - var auction = extractAuctionInfo(link, href, auctionId, "A" + typeNum); - - // Only keep Dutch auctions - if (auction != null && "NL".equals(auction.country)) { - saleIds.add(auctionId); - foundCount++; - - // Save to database - try { - db.upsertAuction(auction); - IO.println(" Found Dutch auction: " + auctionId + " - " + auction.title + " (" + auction.location + ")"); - } catch (SQLException e) { - System.err.println(" Failed to save auction: " + e.getMessage()); - } - } - } - } catch (Exception e) { - System.err.println(" Error parsing HTML: " + e.getMessage()); - } - - return foundCount; - } - - /** - * Extracts auction information from a link element using JSoup - * This method intelligently parses the HTML structure to extract: - * - Title - * - Location (city and country) - * - Lot count (if available) - */ - private AuctionInfo extractAuctionInfo(org.jsoup.nodes.Element link, String href, int auctionId, String type) { - var auction = new AuctionInfo(); - auction.auctionId = auctionId; - auction.type = type; - auction.url = "https://www.troostwijkauctions.com" + href; - - // Extract title from href (convert kebab-case to title) - var titlePattern = java.util.regex.Pattern.compile("/a/(.+?)-A[17]-"); - var titleMatcher = titlePattern.matcher(href); - if (titleMatcher.find()) { - var slug = titleMatcher.group(1); - auction.title = slug.replace("-", " "); - // Capitalize first letter - if (!auction.title.isEmpty()) { - auction.title = auction.title.substring(0, 1).toUpperCase() + auction.title.substring(1); - } - } else { - auction.title = "Unknown Auction"; - } - - // Try to find title in link text (more accurate) - var linkText = link.text(); - if (!linkText.isEmpty() && !linkText.matches(".*\\d+.*")) { - // If link text doesn't contain numbers, it's likely the title - var parts = linkText.split(",|\\d+"); - if (parts.length > 0 && parts[0].trim().length() > 5) { - auction.title = parts[0].trim(); - } - } - - // Extract location using JSoup selectors - // Look for

tags that contain location info - var locationElements = link.select("p"); - for (var p : locationElements) { - var text = p.text(); - - // Pattern: "City, Country" or "City, Region, Country" - if (text.matches(".*[A-Z]{2}$")) { - // Ends with 2-letter country code - var countryCode = text.substring(text.length() - 2); - var cityPart = text.substring(0, text.length() - 2).trim(); - - // Remove trailing comma or whitespace - cityPart = cityPart.replaceAll("[,\\s]+$", ""); - - auction.country = countryCode; - auction.city = cityPart; - auction.location = cityPart + ", " + countryCode; - break; - } - } - - // Fallback: check HTML content directly - if (auction.country == null) { - var html = link.html(); - var locPattern = java.util.regex.Pattern.compile( - "([A-Za-z][A-Za-z\\s,\\-']+?)\\s*(?:)?\\s*\\s*([A-Z]{2})(?![A-Za-z])"); - var locMatcher = locPattern.matcher(html); - - if (locMatcher.find()) { - var city = locMatcher.group(1).trim().replaceAll(",$", ""); - var country = locMatcher.group(2); - auction.city = city; - auction.country = country; - auction.location = city + ", " + country; - } - } - - // Extract lot count if available (kavels/lots) - var textElements = link.select("*"); - for (var elem : textElements) { - var text = elem.ownText(); - if (text.matches("\\d+\\s+(?:kavel|lot|item)s?.*")) { - var countPattern = java.util.regex.Pattern.compile("(\\d+)"); - var countMatcher = countPattern.matcher(text); - if (countMatcher.find()) { - auction.lotCount = Integer.parseInt(countMatcher.group(1)); - break; - } - } - } - - return auction; - } - - /** - * Loads cached HTML for a page - */ - private String loadFromCache(int pageNumber) { - if (!useCache || cacheDb == null) return null; - - var url = pageNumber == 1 - ? AUCTIONS_PAGE - : AUCTIONS_PAGE + "?page=" + pageNumber; - - return cacheDb.get(url); - } - - /** - * Saves HTML to cache - */ - private void saveToCache(int pageNumber, String html) { - if (!useCache || cacheDb == null) return; - - var url = pageNumber == 1 - ? AUCTIONS_PAGE - : AUCTIONS_PAGE + "?page=" + pageNumber; - - cacheDb.put(url, html, CACHE_EXPIRATION_HOURS); - } - - /** - * Retrieves all lots for a given sale ID using Troostwijk's internal JSON - * API. The API accepts parameters such as batchSize, offset, and saleID. - * A large batchSize returns many lots at once. We loop until no further - * results are returned. Each JSON result is mapped to our Lot domain - * object and persisted to the database. - * - * @param saleId the sale identifier - */ - public void fetchLotsForSale(int saleId) { - var batchSize = 200; - var offset = 0; - var more = true; - var totalLots = 0; - - while (more) { - try { - var url = LOT_API + "?batchSize=" + batchSize - + "&listType=7&offset=" + offset - + "&sortOption=0&saleID=" + saleId - + "&parentID=0&relationID=0&buildversion=201807311"; - - IO.println(" Fetching lots from API (offset=" + offset + ")..."); - - var request = HttpRequest.newBuilder() - .uri(URI.create(url)) - .header("Accept", "application/json") - .header("User-Agent", "Mozilla/5.0") - .GET() - .build(); - - var response = httpClient.send(request, HttpResponse.BodyHandlers.ofString()); - - if (response.statusCode() != 200) { - System.err.println(" ⚠️ API call failed for sale " + saleId); - System.err.println(" Status: " + response.statusCode()); - System.err.println(" Response: " + response.body().substring(0, Math.min(200, response.body().length()))); - break; - } - - var root = objectMapper.readTree(response.body()); - var results = root.path("results"); - - if (!results.isArray() || results.isEmpty()) { - if (offset == 0) { - IO.println(" ⚠️ No lots found for sale " + saleId); - IO.println(" API Response: " + response.body().substring(0, Math.min(500, response.body().length()))); - } - more = false; - break; - } - var lotsInBatch = results.size(); - IO.println(" Found " + lotsInBatch + " lots in this batch"); - - for (var node : results) { - var lot = new Lot(); - lot.saleId = saleId; - lot.lotId = node.path("lotID").asInt(); - lot.title = node.path("t").asText(); - lot.description = node.path("d").asText(); - lot.manufacturer = node.path("mf").asText(); - lot.type = node.path("typ").asText(); - lot.year = node.path("yb").asInt(); - lot.category = node.path("lc").asText(); - // Current bid; field names may differ (e.g. currentBid or cb) - lot.currentBid = node.path("cb").asDouble(); - lot.currency = node.path("cu").asText(); - lot.url = "https://www.troostwijkauctions.com/nl" + node.path("url").asText(); - - // Save basic lot info into DB - db.upsertLot(lot); - totalLots++; - - // Download images and perform object detection - List imageUrls = new ArrayList<>(); - var imgs = node.path("imgs"); - if (imgs.isArray()) { - for (var imgNode : imgs) { - var imgUrl = imgNode.asText(); - imageUrls.add(imgUrl); - } - } - - // Download and analyze images (optional, can be slow) - for (var imgUrl : imageUrls) { - var fileName = downloadImage(imgUrl, saleId, lot.lotId); - if (fileName != null) { - // run object detection once per image - var labels = detector.detectObjects(fileName); - db.insertImage(lot.lotId, imgUrl, fileName, labels); - } - } - } - - IO.println(" ✓ Processed " + totalLots + " lots so far"); - offset += batchSize; - } catch (IOException | InterruptedException e) { - System.err.println("Error fetching lots for sale " + saleId + ": " + e.getMessage()); - more = false; - } catch (SQLException e) { - System.err.println("Database error: " + e.getMessage()); - } - } - } - - /** - * Downloads an image from the given URL to a local directory. Images - * are stored under "images///" to keep them organised. - * - * @param imageUrl remote image URL - * @param saleId sale identifier - * @param lotId lot identifier - * @return absolute path to saved file or null on failure - */ - private String downloadImage(String imageUrl, int saleId, int lotId) { - try { - var request = HttpRequest.newBuilder() - .uri(URI.create(imageUrl)) - .GET() - .build(); - var response = httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream()); - if (response.statusCode() == 200) { - var dir = Paths.get("images", String.valueOf(saleId), String.valueOf(lotId)); - Files.createDirectories(dir); - var fileName = Paths.get(imageUrl).getFileName().toString(); - var dest = dir.resolve(fileName); - Files.copy(response.body(), dest); - return dest.toAbsolutePath().toString(); - } - } catch (IOException | InterruptedException e) { - System.err.println("Failed to download image " + imageUrl + ": " + e.getMessage()); - } - return null; - } - - /** - * Schedules periodic monitoring of all lots. The scheduler runs every - * hour to refresh current bids and closing times. For lots that - * are within 30 minutes of closing, it increases the polling frequency - * automatically. When a new bid is detected or a lot is about to - * expire, a Pushover notification is sent to the configured user. - * Note: In production, ensure proper shutdown handling for the scheduler. - */ - public void scheduleMonitoring() { - var scheduler = Executors.newScheduledThreadPool(1); - scheduler.scheduleAtFixedRate(() -> { - try { - var activeLots = db.getActiveLots(); - for (var lot : activeLots) { - // refresh the lot's bidding information via API - refreshLotBid(lot); - // check closing time to adjust monitoring - var minutesLeft = lot.minutesUntilClose(); - if (minutesLeft < 30) { - // send warning when within 5 minutes - if (minutesLeft <= 5 && !lot.closingNotified) { - notifier.sendNotification("Kavel " + lot.lotId + " sluit binnen " + minutesLeft + " min.", - "Lot nearing closure", 1); - lot.closingNotified = true; - db.updateLotNotificationFlags(lot); - } - // schedule additional quick check for this lot - scheduler.schedule(() -> refreshLotBid(lot), 5, TimeUnit.MINUTES); - } - } - } catch (SQLException e) { - System.err.println("Error during scheduled monitoring: " + e.getMessage()); - } - }, 0, 1, TimeUnit.HOURS); - } - - /** - * Refreshes the bid for a single lot and sends notification if it has - * changed since the last check. The method calls the same API used for - * initial scraping but only extracts the current bid for the given lot. - * - * @param lot the lot to refresh - */ - private void refreshLotBid(Lot lot) { - try { - var url = LOT_API + "?batchSize=1&listType=7&offset=0&sortOption=0&saleID=" + lot.saleId - + "&parentID=0&relationID=0&buildversion=201807311&lotID=" + lot.lotId; - var request = HttpRequest.newBuilder().uri(URI.create(url)).GET().build(); - var response = httpClient.send(request, HttpResponse.BodyHandlers.ofString()); - if (response.statusCode() != 200) return; - var root = objectMapper.readTree(response.body()); - var results = root.path("results"); - if (results.isArray() && !results.isEmpty()) { - var node = results.get(0); - var newBid = node.path("cb").asDouble(); - if (Double.compare(newBid, lot.currentBid) > 0) { - var previous = lot.currentBid; - lot.currentBid = newBid; - db.updateLotCurrentBid(lot); - var msg = String.format("Nieuw bod op kavel %d: €%.2f (was €%.2f)", lot.lotId, newBid, previous); - notifier.sendNotification(msg, "Kavel bieding update", 0); - } - } - } catch (IOException | InterruptedException | SQLException e) { - System.err.println("Failed to refresh bid for lot " + lot.lotId + ": " + e.getMessage()); - } - } - - /** - * Prints statistics about the data in the database. - */ - public void printDatabaseStats() { - try { - var allLots = db.getAllLots(); - var imageCount = db.getImageCount(); - - IO.println(" Total lots in database: " + allLots.size()); - IO.println(" Total images downloaded: " + imageCount); - - if (!allLots.isEmpty()) { - var totalBids = allLots.stream().mapToDouble(l -> l.currentBid).sum(); - IO.println(" Total current bids: €" + String.format("%.2f", totalBids)); - } - } catch (SQLException e) { - System.err.println(" ⚠️ Could not retrieve database stats: " + e.getMessage()); - } - } - - // ---------------------------------------------------------------------- - // Domain classes and services - // ---------------------------------------------------------------------- - -} \ No newline at end of file diff --git a/src/test/java/com/auction/AuctionParsingTest.java b/src/test/java/com/auction/AuctionParsingTest.java index 370d06c..7e446ec 100644 --- a/src/test/java/com/auction/AuctionParsingTest.java +++ b/src/test/java/com/auction/AuctionParsingTest.java @@ -20,259 +20,94 @@ import static org.junit.jupiter.api.Assertions.*; * Tests the markup data extraction for each auction found */ public class AuctionParsingTest { - - private static String testHtml; - - @BeforeAll - public static void loadTestHtml() throws IOException { - // Load the test HTML file - testHtml = Files.readString(Paths.get("src/test/resources/test_auctions.html")); - System.out.println("Loaded test HTML (" + testHtml.length() + " characters)"); - } - - @Test - public void testParseAuctionsFromTestHtml() { - // Parse the HTML with JSoup - Document doc = Jsoup.parse(testHtml); - - // Find all auction links - Elements auctionLinks = doc.select("a[href^='/a/']"); - - System.out.println("\n=== Auction Parsing Test ==="); - System.out.println("Found " + auctionLinks.size() + " auction links"); - - List auctions = new ArrayList<>(); - int count = 0; - - for (Element link : auctionLinks) { - String href = link.attr("href"); - - // Extract auction ID from URL - java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("/a/.*?-A([17])-(\\d+)"); - java.util.regex.Matcher matcher = pattern.matcher(href); - - if (!matcher.find()) { - continue; - } - - String typeNum = matcher.group(1); - int auctionId = Integer.parseInt(matcher.group(2)); - - // Extract auction info using IMPROVED text-based method - AuctionInfo auction = extractAuctionInfoFromText(link, href, auctionId, "A" + typeNum); - auctions.add(auction); - - // Print the first 10 auctions for verification - if (count < 10) { - System.out.println("\n--- Auction #" + (count + 1) + " ---"); - System.out.println("ID: " + auction.auctionId); - System.out.println("Type: " + auction.type); - System.out.println("Title: " + auction.title); - System.out.println("Location: " + auction.location); - System.out.println("City: " + auction.city); - System.out.println("Country: " + auction.country); - System.out.println("Lot Count: " + auction.lotCount); - System.out.println("URL: " + auction.url); - - // Print ALL visible text for debugging - System.out.println("\nAll visible text from link:"); - System.out.println("\"" + link.text() + "\""); - } - - count++; - } - - System.out.println("\n=== Summary ==="); - System.out.println("Total auctions parsed: " + auctions.size()); - - // Count by country - long nlCount = auctions.stream().filter(a -> "NL".equals(a.country)).count(); - long bgCount = auctions.stream().filter(a -> "BG".equals(a.country)).count(); - long deCount = auctions.stream().filter(a -> "DE".equals(a.country)).count(); - long beCount = auctions.stream().filter(a -> "BE".equals(a.country)).count(); - - System.out.println("Dutch (NL) auctions: " + nlCount); - System.out.println("Bulgarian (BG) auctions: " + bgCount); - System.out.println("German (DE) auctions: " + deCount); - System.out.println("Belgian (BE) auctions: " + beCount); - System.out.println("Unknown location: " + auctions.stream().filter(a -> a.country == null).count()); - - // Assertions - assertTrue(auctions.size() > 0, "Should find at least one auction"); - - // Verify all auctions have basic info - for (AuctionInfo auction : auctions) { - assertNotNull(auction.title, "Title should not be null for auction " + auction.auctionId); - assertTrue(auction.title.length() > 0, "Title should not be empty for auction " + auction.auctionId); - assertNotNull(auction.url, "URL should not be null for auction " + auction.auctionId); - assertTrue(auction.auctionId > 0, "Auction ID should be positive"); - assertNotNull(auction.location, "Location should not be null for auction " + auction.auctionId); - assertNotNull(auction.country, "Country should not be null for auction " + auction.auctionId); - assertTrue(auction.lotCount > 0, "Lot count should be positive for auction " + auction.auctionId); - } - } - - /** - * IMPROVED: Extract auction info using .text() method - * This parses the human-readable text instead of HTML markup - * - * Expected format: "[day] om [time] [lot_count] [title] [city], [CC]" - * Example: "woensdag om 18:00 1 Vrachtwagens voor bedrijfsvoertuigen Loßburg, DE" - */ - private AuctionInfo extractAuctionInfoFromText(Element link, String href, int auctionId, String type) { - AuctionInfo auction = new AuctionInfo(); - auction.auctionId = auctionId; - auction.type = type; - auction.url = "https://www.troostwijkauctions.com" + href; - - // Get ALL visible text from the link (this removes all HTML tags) - String allText = link.text().trim(); - - // Pattern: "[day] om [time] [lot_count] [title] [city], [CC]" - // Example: "woensdag om 18:00 1 Vrachtwagens voor bedrijfsvoertuigen Loßburg, DE" - - // Step 1: Extract closing time (day + time) - java.util.regex.Pattern timePattern = java.util.regex.Pattern.compile( - "(\\w+)\\s+om\\s+(\\d{1,2}:\\d{2})" - ); - java.util.regex.Matcher timeMatcher = timePattern.matcher(allText); - - String remainingText = allText; - if (timeMatcher.find()) { - String day = timeMatcher.group(1); // e.g., "woensdag" - String time = timeMatcher.group(2); // e.g., "18:00" - - // Store closing time info (could be parsed to LocalDateTime with proper date) - System.out.println(" Closing time: " + day + " om " + time); - - // Remove the time part from text - remainingText = allText.substring(timeMatcher.end()).trim(); - } - - // Step 2: Extract location from the END (always ends with ", CC") - java.util.regex.Pattern locPattern = java.util.regex.Pattern.compile( - "([A-ZÀ-ÿa-z][A-ZÀ-ÿa-z\\s\\-'öäüßàèéêëïôùûç]+?),\\s*([A-Z]{2})\\s*$" - ); - java.util.regex.Matcher locMatcher = locPattern.matcher(remainingText); - - if (locMatcher.find()) { - auction.city = locMatcher.group(1).trim(); - auction.country = locMatcher.group(2); - auction.location = auction.city + ", " + auction.country; - - // Remove location from end - remainingText = remainingText.substring(0, locMatcher.start()).trim(); - } - - // Step 3: Extract lot count (first number after time) - java.util.regex.Pattern lotPattern = java.util.regex.Pattern.compile( - "^(\\d+)\\s+" - ); - java.util.regex.Matcher lotMatcher = lotPattern.matcher(remainingText); - - if (lotMatcher.find()) { - auction.lotCount = Integer.parseInt(lotMatcher.group(1)); - - // Remove lot count from beginning - remainingText = remainingText.substring(lotMatcher.end()).trim(); - } - - // Step 4: What remains is the title - if (!remainingText.isEmpty()) { - auction.title = remainingText; - } else { - // Fallback: use URL slug for title - java.util.regex.Pattern titlePattern = java.util.regex.Pattern.compile("/a/(.+?)-A[17]-"); - java.util.regex.Matcher titleMatcher = titlePattern.matcher(href); - if (titleMatcher.find()) { - String slug = titleMatcher.group(1).replace("-", " ").replace("%7C", "|"); - auction.title = slug.substring(0, 1).toUpperCase() + slug.substring(1); + + private static String testHtml; + + @BeforeAll + public static void loadTestHtml() throws IOException { + // Load the test HTML file + testHtml = Files.readString(Paths.get("src/test/resources/test_auctions.html")); + System.out.println("Loaded test HTML (" + testHtml.length() + " characters)"); + } + + @Test + public void testLocationPatternMatching() { + System.out.println("\n=== Location Pattern Tests ==="); + + // Test different location formats + String[] testCases = { + "

Amsterdam, NL

", + "

Sofia, BG

", + "

Berlin, DE

", + "Brussels,BE" + }; + + for (String testHtml : testCases) { + Document doc = Jsoup.parse(testHtml); + Element elem = doc.select("p, span").first(); + + if (elem != null) { + String text = elem.text(); + System.out.println("\nTest: " + testHtml); + System.out.println("Text: " + text); + + // Test regex pattern + if (text.matches(".*[A-Z]{2}$")) { + String countryCode = text.substring(text.length() - 2); + String cityPart = text.substring(0, text.length() - 2).trim().replaceAll("[,\\s]+$", ""); + System.out.println("→ Extracted: " + cityPart + ", " + countryCode); } else { - auction.title = "Unknown Auction"; + System.out.println("→ No match"); } - } - - return auction; - } - - @Test - public void testLocationPatternMatching() { - System.out.println("\n=== Location Pattern Tests ==="); - - // Test different location formats - String[] testCases = { - "

Amsterdam, NL

", - "

Sofia, BG

", - "

Berlin, DE

", - "Brussels,BE" - }; - - for (String testHtml : testCases) { - Document doc = Jsoup.parse(testHtml); - Element elem = doc.select("p, span").first(); - - if (elem != null) { - String text = elem.text(); - System.out.println("\nTest: " + testHtml); - System.out.println("Text: " + text); - - // Test regex pattern - if (text.matches(".*[A-Z]{2}$")) { - String countryCode = text.substring(text.length() - 2); - String cityPart = text.substring(0, text.length() - 2).trim().replaceAll("[,\\s]+$", ""); - System.out.println("→ Extracted: " + cityPart + ", " + countryCode); - } else { - System.out.println("→ No match"); - } - } - } - } - - @Test - public void testFullTextPatternMatching() { - System.out.println("\n=== Full Text Pattern Tests ==="); - - // Test the complete auction text format - String[] testCases = { - "woensdag om 18:00 1 Vrachtwagens voor bedrijfsvoertuigen Loßburg, DE", - "maandag om 14:30 5 Industriële machines Amsterdam, NL", - "vrijdag om 10:00 12 Landbouwmachines Antwerpen, BE" - }; - - for (String testText : testCases) { - System.out.println("\nParsing: \"" + testText + "\""); - - // Simulated extraction - String remaining = testText; - - // Extract time - java.util.regex.Pattern timePattern = java.util.regex.Pattern.compile("(\\w+)\\s+om\\s+(\\d{1,2}:\\d{2})"); - java.util.regex.Matcher timeMatcher = timePattern.matcher(remaining); - if (timeMatcher.find()) { - System.out.println(" Time: " + timeMatcher.group(1) + " om " + timeMatcher.group(2)); - remaining = remaining.substring(timeMatcher.end()).trim(); - } - - // Extract location - java.util.regex.Pattern locPattern = java.util.regex.Pattern.compile( - "([A-ZÀ-ÿa-z][A-ZÀ-ÿa-z\\s\\-'öäüßàèéêëïôùûç]+?),\\s*([A-Z]{2})\\s*$" - ); - java.util.regex.Matcher locMatcher = locPattern.matcher(remaining); - if (locMatcher.find()) { - System.out.println(" Location: " + locMatcher.group(1) + ", " + locMatcher.group(2)); - remaining = remaining.substring(0, locMatcher.start()).trim(); - } - - // Extract lot count - java.util.regex.Pattern lotPattern = java.util.regex.Pattern.compile("^(\\d+)\\s+"); - java.util.regex.Matcher lotMatcher = lotPattern.matcher(remaining); - if (lotMatcher.find()) { - System.out.println(" Lot count: " + lotMatcher.group(1)); - remaining = remaining.substring(lotMatcher.end()).trim(); - } - - // What remains is title - System.out.println(" Title: " + remaining); - } - } + } + } + } + + @Test + public void testFullTextPatternMatching() { + System.out.println("\n=== Full Text Pattern Tests ==="); + + // Test the complete auction text format + String[] testCases = { + "woensdag om 18:00 1 Vrachtwagens voor bedrijfsvoertuigen Loßburg, DE", + "maandag om 14:30 5 Industriële machines Amsterdam, NL", + "vrijdag om 10:00 12 Landbouwmachines Antwerpen, BE" + }; + + for (String testText : testCases) { + System.out.println("\nParsing: \"" + testText + "\""); + + // Simulated extraction + String remaining = testText; + + // Extract time + java.util.regex.Pattern timePattern = java.util.regex.Pattern.compile("(\\w+)\\s+om\\s+(\\d{1,2}:\\d{2})"); + java.util.regex.Matcher timeMatcher = timePattern.matcher(remaining); + if (timeMatcher.find()) { + System.out.println(" Time: " + timeMatcher.group(1) + " om " + timeMatcher.group(2)); + remaining = remaining.substring(timeMatcher.end()).trim(); + } + + // Extract location + java.util.regex.Pattern locPattern = java.util.regex.Pattern.compile( + "([A-ZÀ-ÿa-z][A-ZÀ-ÿa-z\\s\\-'öäüßàèéêëïôùûç]+?),\\s*([A-Z]{2})\\s*$" + ); + java.util.regex.Matcher locMatcher = locPattern.matcher(remaining); + if (locMatcher.find()) { + System.out.println(" Location: " + locMatcher.group(1) + ", " + locMatcher.group(2)); + remaining = remaining.substring(0, locMatcher.start()).trim(); + } + + // Extract lot count + java.util.regex.Pattern lotPattern = java.util.regex.Pattern.compile("^(\\d+)\\s+"); + java.util.regex.Matcher lotMatcher = lotPattern.matcher(remaining); + if (lotMatcher.find()) { + System.out.println(" Lot count: " + lotMatcher.group(1)); + remaining = remaining.substring(lotMatcher.end()).trim(); + } + + // What remains is title + System.out.println(" Title: " + remaining); + } + } } diff --git a/src/test/java/com/auction/Parser.java b/src/test/java/com/auction/Parser.java index 27be68d..32425de 100644 --- a/src/test/java/com/auction/Parser.java +++ b/src/test/java/com/auction/Parser.java @@ -1,12 +1,8 @@ package com.auction; import com.vladsch.flexmark.html2md.converter.FlexmarkHtmlConverter; -import com.vladsch.flexmark.util.data.DataHolder; -import net.bytebuddy.build.Plugin.Engine.Source.Element; import org.jsoup.Jsoup; import org.junit.jupiter.api.Test; - -import org.junit.jupiter.api.extension.Extensions; public class Parser { public record AuctionItem( diff --git a/troostwijk.db b/troostwijk.db deleted file mode 100644 index 286200e7fe2c3fdf116b06443c7e987fc7e339a0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 32768 zcmeI)O;6h}7zglVK!e8Ec0mybBp(}*O_R{>>%gFDC1oK@a9I>mmsv^^NE}3uuw5qY zL+l&uBW})%8&bA?fbx%&HhyiMpC2E5>G8(_7mSRPbP@=%qrKF0U3*7}rfD5{ugY6p zMR{2jZbq=eqOEU;8bKxF7%l2tWV=5P$##AOL~? z3phr{e6^u-9*@`!AKmy-A~HV@Z3)9$)x^PwL(yo`uOZOWDYm=cf84 zt)u0RxxKBQmBD6Lkvv-Y8N13^9J0CMN-Ola#%>%;m@P+Iw>wLQxw)y|^~yQOBZyfW6Nqpeu%F_(#S8Hg_xR}`GHD65|Lhc4|OIE6py z7FK*@5tr^$mpZ%jxJs+K-n!d)Vwf8n`nP`h)x zmW3(5Y!-YH#Q8V~MV_*>df{eerz}WklgKP#7fGtJBTrKn3;$ed$c;0w!?Gf*n`bZ& z6F}9(iQr?-nnZ1~rbVW9+bJX9`V@_2k0&j*vWlo3tm10HHTz?J zTg>ONRh-S`p&u6nAOHafKmY;|fB*y_009U<00Ijvfb;(XFD^oZ00bZa0SG_<0uX=z z1Rwwb2-E^N|05M3009U<00Izz00bZa0SG_<0*fzz^Z(+nF(QNj1Rwwb2tWV=5P$## zAOHaf;QWsqfB*y_009U<00Izz00bZa0SGLA00bZa0SG_<0uX=z l1Rwx`M=gN!|D#@h1P=iSKmY;|fB*y_009U<00NIh;1BIqc;^5B