diff --git a/pom.xml b/pom.xml index 3b9d5c1..39dc6b9 100644 --- a/pom.xml +++ b/pom.xml @@ -14,8 +14,8 @@ UTF-8 - 21 - 21 + 25 + 25 2.17.0 4.9.0-0 @@ -93,8 +93,8 @@ maven-compiler-plugin 3.11.0 - 21 - 21 + 25 + 25 diff --git a/src/main/java/com/auction/AuctionInfo.java b/src/main/java/com/auction/AuctionInfo.java new file mode 100644 index 0000000..874b9fb --- /dev/null +++ b/src/main/java/com/auction/AuctionInfo.java @@ -0,0 +1,24 @@ +package com.auction; + +import java.time.LocalDateTime; +/** + * Represents auction metadata (veiling informatie) + */ +public final class AuctionInfo { + + public int auctionId; // Unique auction ID (from URL) + public String title; // Auction title + public String location; // Location (e.g., "Amsterdam, NL") + public String city; // City name + public String country; // Country code (e.g., "NL") + public String url; // Full auction URL + public String type; // Auction type (A1 or A7) + public int lotCount; // Number of lots/kavels + public LocalDateTime closingTime; // Closing time if available + + @Override + public String toString() { + return String.format("Auction{id=%d, type=%s, title='%s', location='%s', lots=%d, url='%s'}", + auctionId, type, title, location, lotCount, url); + } +} diff --git a/src/main/java/com/auction/CacheDatabase.java b/src/main/java/com/auction/CacheDatabase.java new file mode 100644 index 0000000..f34e407 --- /dev/null +++ b/src/main/java/com/auction/CacheDatabase.java @@ -0,0 +1,165 @@ +package com.auction; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.time.Instant; +/** + * SQLite-based caching system for HTML pages with expiration support + */ +class CacheDatabase { + + private final String dbPath; + private Connection connection; + + public CacheDatabase(String dbPath) { + this.dbPath = dbPath; + } + + /** + * Initialize database and create schema + */ + public void initialize() throws SQLException, IOException { + // Create cache directory if it doesn't exist + var cacheDir = Paths.get(dbPath).getParent(); + if (cacheDir != null) { + Files.createDirectories(cacheDir); + } + + connection = DriverManager.getConnection("jdbc:sqlite:" + dbPath); + + // Create cache table with URL as primary key + var createTable = "CREATE TABLE IF NOT EXISTS page_cache (\n" + + " url TEXT PRIMARY KEY,\n" + + " html TEXT NOT NULL,\n" + + " cached_at INTEGER NOT NULL,\n" + + " expires_at INTEGER NOT NULL\n" + + ")\n"; + + try (var stmt = connection.createStatement()) { + stmt.execute(createTable); + // Create index on expires_at for efficient cleanup + stmt.execute("CREATE INDEX IF NOT EXISTS idx_expires_at ON page_cache(expires_at)"); + } + + // Clean up expired entries on initialization + cleanupExpired(); + + System.out.println("✓ Cache database initialized"); + } + + /** + * Get cached HTML for a URL if it exists and hasn't expired + * + * @param url The URL to look up + * @return Cached HTML or null if not found/expired + */ + public synchronized String get(String url) { + var sql = "SELECT html FROM page_cache WHERE url = ? AND expires_at > ?"; + + try (var ps = connection.prepareStatement(sql)) { + ps.setString(1, url); + ps.setLong(2, Instant.now().getEpochSecond()); + + var rs = ps.executeQuery(); + if (rs.next()) { + return rs.getString("html"); + } + } catch (SQLException e) { + System.err.println("Cache read error: " + e.getMessage()); + } + + return null; + } + + /** + * Store HTML in cache with expiration time + * + * @param url The URL to cache + * @param html The HTML content + * @param expirationHours Hours until cache expires + */ + public synchronized void put(String url, String html, long expirationHours) { + var sql = "INSERT OR REPLACE INTO page_cache (url, html, cached_at, expires_at)\n" + + "VALUES (?, ?, ?, ?)\n"; + + var now = Instant.now().getEpochSecond(); + var expiresAt = now + (expirationHours * 3600); + + try (var ps = connection.prepareStatement(sql)) { + ps.setString(1, url); + ps.setString(2, html); + ps.setLong(3, now); + ps.setLong(4, expiresAt); + ps.executeUpdate(); + } catch (SQLException e) { + System.err.println("Cache write error: " + e.getMessage()); + } + } + + /** + * Remove expired cache entries + */ + public synchronized void cleanupExpired() { + var sql = "DELETE FROM page_cache WHERE expires_at <= ?"; + + try (var ps = connection.prepareStatement(sql)) { + ps.setLong(1, Instant.now().getEpochSecond()); + var deleted = ps.executeUpdate(); + if (deleted > 0) { + System.out.println("✓ Cleaned up " + deleted + " expired cache entries"); + } + } catch (SQLException e) { + System.err.println("Cache cleanup error: " + e.getMessage()); + } + } + + /** + * Get cache statistics + */ + public synchronized void printStats() { + var sql = "SELECT COUNT(*) as total, " + + "SUM(CASE WHEN expires_at > ? THEN 1 ELSE 0 END) as valid, " + + "SUM(LENGTH(html)) as total_size " + + "FROM page_cache"; + + try (var ps = connection.prepareStatement(sql)) { + ps.setLong(1, Instant.now().getEpochSecond()); + var rs = ps.executeQuery(); + + if (rs.next()) { + var total = rs.getInt("total"); + var valid = rs.getInt("valid"); + var size = rs.getLong("total_size"); + + System.out.println("\n=== Cache Statistics ==="); + System.out.println("Total entries: " + total); + System.out.println("Valid entries: " + valid); + System.out.println("Expired entries: " + (total - valid)); + System.out.println("Total size: " + (size / 1024) + " KB"); + } + } catch (SQLException e) { + System.err.println("Cache stats error: " + e.getMessage()); + } + } + + /** + * Close database connection + */ + public void close() { + if (connection != null) { + try { + connection.close(); + } catch (SQLException e) { + System.err.println("Error closing cache database: " + e.getMessage()); + } + } + } +} diff --git a/src/main/java/com/auction/DatabaseService.java b/src/main/java/com/auction/DatabaseService.java new file mode 100644 index 0000000..9c325b3 --- /dev/null +++ b/src/main/java/com/auction/DatabaseService.java @@ -0,0 +1,303 @@ +package com.auction; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.time.Instant; +import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.List; +/** + * Service for persisting auctions, lots, images, and object labels into + * a SQLite database. Uses the Xerial JDBC driver which connects to + * SQLite via a URL of the form "jdbc:sqlite:path_to_file"【329850066306528†L40-L63】. + */ +public class DatabaseService { + + private final String url; + DatabaseService(String dbPath) { + this.url = "jdbc:sqlite:" + dbPath; + } + /** + * Creates tables if they do not already exist. The schema includes + * tables for auctions, lots, images, and object labels. This method is + * idempotent; it can be called multiple times. + */ + void ensureSchema() throws SQLException { + try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) { + // Auctions table (veilingen) + stmt.execute("CREATE TABLE IF NOT EXISTS auctions (" + + "auction_id INTEGER PRIMARY KEY," + + "title TEXT NOT NULL," + + "location TEXT," + + "city TEXT," + + "country TEXT," + + "url TEXT NOT NULL," + + "type TEXT," + + "lot_count INTEGER DEFAULT 0," + + "closing_time TEXT," + + "discovered_at INTEGER" // Unix timestamp + + ")"); + + // Sales table (legacy - keep for compatibility) + stmt.execute("CREATE TABLE IF NOT EXISTS sales (" + + "sale_id INTEGER PRIMARY KEY," + + "title TEXT," + + "location TEXT," + + "closing_time TEXT" + + ")"); + + // Lots table + stmt.execute("CREATE TABLE IF NOT EXISTS lots (" + + "lot_id INTEGER PRIMARY KEY," + + "sale_id INTEGER," + + "title TEXT," + + "description TEXT," + + "manufacturer TEXT," + + "type TEXT," + + "year INTEGER," + + "category TEXT," + + "current_bid REAL," + + "currency TEXT," + + "url TEXT," + + "closing_time TEXT," + + "closing_notified INTEGER DEFAULT 0," + + "FOREIGN KEY (sale_id) REFERENCES auctions(auction_id)" + + ")"); + + // Images table + stmt.execute("CREATE TABLE IF NOT EXISTS images (" + + "id INTEGER PRIMARY KEY AUTOINCREMENT," + + "lot_id INTEGER," + + "url TEXT," + + "file_path TEXT," + + "labels TEXT," + + "FOREIGN KEY (lot_id) REFERENCES lots(lot_id)" + + ")"); + + // Create indexes for better query performance + stmt.execute("CREATE INDEX IF NOT EXISTS idx_auctions_country ON auctions(country)"); + stmt.execute("CREATE INDEX IF NOT EXISTS idx_lots_sale_id ON lots(sale_id)"); + } + } + + /** + * Inserts or updates an auction record + */ + synchronized void upsertAuction(AuctionInfo auction) throws SQLException { + var sql = "INSERT INTO auctions (auction_id, title, location, city, country, url, type, lot_count, closing_time, discovered_at)" + + " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" + + " ON CONFLICT(auction_id) DO UPDATE SET " + + "title = excluded.title, location = excluded.location, city = excluded.city, " + + "country = excluded.country, url = excluded.url, type = excluded.type, " + + "lot_count = excluded.lot_count, closing_time = excluded.closing_time"; + + try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(sql)) { + ps.setInt(1, auction.auctionId); + ps.setString(2, auction.title); + ps.setString(3, auction.location); + ps.setString(4, auction.city); + ps.setString(5, auction.country); + ps.setString(6, auction.url); + ps.setString(7, auction.type); + ps.setInt(8, auction.lotCount); + ps.setString(9, auction.closingTime != null ? auction.closingTime.toString() : null); + ps.setLong(10, Instant.now().getEpochSecond()); + ps.executeUpdate(); + } + } + + /** + * Retrieves all auctions from the database + */ + synchronized List getAllAuctions() throws SQLException { + List auctions = new ArrayList<>(); + var sql = "SELECT auction_id, title, location, city, country, url, type, lot_count, closing_time FROM auctions"; + + try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) { + var rs = stmt.executeQuery(sql); + while (rs.next()) { + var auction = new AuctionInfo(); + auction.auctionId = rs.getInt("auction_id"); + auction.title = rs.getString("title"); + auction.location = rs.getString("location"); + auction.city = rs.getString("city"); + auction.country = rs.getString("country"); + auction.url = rs.getString("url"); + auction.type = rs.getString("type"); + auction.lotCount = rs.getInt("lot_count"); + var closing = rs.getString("closing_time"); + if (closing != null) { + auction.closingTime = LocalDateTime.parse(closing); + } + auctions.add(auction); + } + } + return auctions; + } + + /** + * Retrieves auctions by country code + */ + synchronized List getAuctionsByCountry(String countryCode) throws SQLException { + List auctions = new ArrayList<>(); + var sql = "SELECT auction_id, title, location, city, country, url, type, lot_count, closing_time " + + "FROM auctions WHERE country = ?"; + + try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(sql)) { + ps.setString(1, countryCode); + var rs = ps.executeQuery(); + while (rs.next()) { + var auction = new AuctionInfo(); + auction.auctionId = rs.getInt("auction_id"); + auction.title = rs.getString("title"); + auction.location = rs.getString("location"); + auction.city = rs.getString("city"); + auction.country = rs.getString("country"); + auction.url = rs.getString("url"); + auction.type = rs.getString("type"); + auction.lotCount = rs.getInt("lot_count"); + var closing = rs.getString("closing_time"); + if (closing != null) { + auction.closingTime = LocalDateTime.parse(closing); + } + auctions.add(auction); + } + } + return auctions; + } + + /** + * Inserts or updates a lot record. Uses INSERT OR REPLACE to + * implement upsert semantics so that existing rows are replaced. + */ + synchronized void upsertLot(Lot lot) throws SQLException { + var sql = "INSERT INTO lots (lot_id, sale_id, title, description, manufacturer, type, year, category, current_bid, currency, url, closing_time, closing_notified)" + + " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" + + " ON CONFLICT(lot_id) DO UPDATE SET " + + "sale_id = excluded.sale_id, title = excluded.title, description = excluded.description, " + + "manufacturer = excluded.manufacturer, type = excluded.type, year = excluded.year, category = excluded.category, " + + "current_bid = excluded.current_bid, currency = excluded.currency, url = excluded.url, closing_time = excluded.closing_time"; + try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(sql)) { + ps.setInt(1, lot.lotId); + ps.setInt(2, lot.saleId); + ps.setString(3, lot.title); + ps.setString(4, lot.description); + ps.setString(5, lot.manufacturer); + ps.setString(6, lot.type); + ps.setInt(7, lot.year); + ps.setString(8, lot.category); + ps.setDouble(9, lot.currentBid); + ps.setString(10, lot.currency); + ps.setString(11, lot.url); + ps.setString(12, lot.closingTime != null ? lot.closingTime.toString() : null); + ps.setInt(13, lot.closingNotified ? 1 : 0); + ps.executeUpdate(); + } + } + + /** + * Inserts a new image record. Each image is associated with a lot and + * stores both the original URL and the local file path. Detected + * labels are stored as a comma separated string. + */ + synchronized void insertImage(int lotId, String url, String filePath, List labels) throws SQLException { + var sql = "INSERT INTO images (lot_id, url, file_path, labels) VALUES (?, ?, ?, ?)"; + try (var conn = DriverManager.getConnection(this.url); var ps = conn.prepareStatement(sql)) { + ps.setInt(1, lotId); + ps.setString(2, url); + ps.setString(3, filePath); + ps.setString(4, String.join(",", labels)); + ps.executeUpdate(); + } + } + + /** + * Retrieves all lots that are still active (i.e., have a closing time + * in the future or unknown). Only these lots need to be monitored. + */ + synchronized List getActiveLots() throws SQLException { + List list = new ArrayList<>(); + var sql = "SELECT lot_id, sale_id, current_bid, currency, closing_time, closing_notified FROM lots"; + try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) { + var rs = stmt.executeQuery(sql); + while (rs.next()) { + var lot = new Lot(); + lot.lotId = rs.getInt("lot_id"); + lot.saleId = rs.getInt("sale_id"); + lot.currentBid = rs.getDouble("current_bid"); + lot.currency = rs.getString("currency"); + var closing = rs.getString("closing_time"); + lot.closingNotified = rs.getInt("closing_notified") != 0; + if (closing != null) { + lot.closingTime = LocalDateTime.parse(closing); + } + list.add(lot); + } + } + return list; + } + + /** + * Retrieves all lots from the database. + */ + synchronized List getAllLots() throws SQLException { + List list = new ArrayList<>(); + var sql = "SELECT lot_id, sale_id, title, current_bid, currency FROM lots"; + try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) { + var rs = stmt.executeQuery(sql); + while (rs.next()) { + var lot = new Lot(); + lot.lotId = rs.getInt("lot_id"); + lot.saleId = rs.getInt("sale_id"); + lot.title = rs.getString("title"); + lot.currentBid = rs.getDouble("current_bid"); + lot.currency = rs.getString("currency"); + list.add(lot); + } + } + return list; + } + + /** + * Gets the total number of images in the database. + */ + synchronized int getImageCount() throws SQLException { + var sql = "SELECT COUNT(*) as count FROM images"; + try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) { + var rs = stmt.executeQuery(sql); + if (rs.next()) { + return rs.getInt("count"); + } + } + return 0; + } + + /** + * Updates the current bid of a lot after a bid refresh. + */ + synchronized void updateLotCurrentBid(Lot lot) throws SQLException { + try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement( + "UPDATE lots SET current_bid = ? WHERE lot_id = ?")) { + ps.setDouble(1, lot.currentBid); + ps.setInt(2, lot.lotId); + ps.executeUpdate(); + } + } + + /** + * Updates the closingNotified flag of a lot (set to 1 when we have + * warned the user about its imminent closure). + */ + synchronized void updateLotNotificationFlags(Lot lot) throws SQLException { + try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement( + "UPDATE lots SET closing_notified = ? WHERE lot_id = ?")) { + ps.setInt(1, lot.closingNotified ? 1 : 0); + ps.setInt(2, lot.lotId); + ps.executeUpdate(); + } + } +} diff --git a/src/main/java/com/auction/Lot.java b/src/main/java/com/auction/Lot.java new file mode 100644 index 0000000..87f7dca --- /dev/null +++ b/src/main/java/com/auction/Lot.java @@ -0,0 +1,29 @@ +package com.auction; + +import java.time.LocalDateTime; +/** + * Simple POJO representing a lot (kavel) in an auction. It keeps track + * of the sale it belongs to, current bid and closing time. The method + * minutesUntilClose computes how many minutes remain until the lot closes. + */ +final class Lot { + + int saleId; + int lotId; + String title; + String description; + String manufacturer; + String type; + int year; + String category; + double currentBid; + String currency; + String url; + LocalDateTime closingTime; // null if unknown + boolean closingNotified; + + long minutesUntilClose() { + if (closingTime == null) return Long.MAX_VALUE; + return java.time.Duration.between(LocalDateTime.now(), closingTime).toMinutes(); + } +} diff --git a/src/main/java/com/auction/Main.java b/src/main/java/com/auction/Main.java index 6a0091f..a143335 100644 --- a/src/main/java/com/auction/Main.java +++ b/src/main/java/com/auction/Main.java @@ -1,23 +1,82 @@ package com.auction; +import org.opencv.core.Core; +import java.util.List; public class Main { - public static void main(String[] args) { + public static void main2(String[] args) { // If arguments are passed, this is likely a one-off command via dokku run // Just exit immediately to allow the command to run if (args.length > 0) { - System.out.println("Command mode - exiting to allow shell commands"); + IO.println("Command mode - exiting to allow shell commands"); return; } - System.out.println("Starting Troostwijk Auction Scraper..."); - System.out.println("Container is running and healthy."); + IO.println("Starting Troostwijk Auction Scraper..."); + IO.println("Container is running and healthy."); // Keep container alive try { Thread.sleep(Long.MAX_VALUE); } catch (InterruptedException e) { Thread.currentThread().interrupt(); - System.out.println("Container interrupted, exiting."); + IO.println("Container interrupted, exiting."); } } + /** + * Entry point. Configure database location, notification settings, and + * YOLO model paths here before running. Once started the scraper + * discovers Dutch auctions, scrapes lots, and begins monitoring. + */ + public static void main(String[] args) throws Exception { + IO.println("=== Troostwijk Auction Scraper ===\n"); + + // Configuration parameters (replace with your own values) + String databaseFile = "troostwijk.db"; + + // Notification configuration - choose one: + // Option 1: Desktop notifications only (free, no setup required) + String notificationConfig = System.getenv().getOrDefault("NOTIFICATION_CONFIG", "desktop"); + + // Option 2: Desktop + Email via Gmail (free, requires Gmail app password) + // Format: "smtp:username:appPassword:toEmail" + // Example: "smtp:your.email@gmail.com:abcd1234efgh5678:recipient@example.com" + // Get app password: Google Account > Security > 2-Step Verification > App passwords + + // YOLO model paths (optional - scraper works without object detection) + String yoloCfg = "models/yolov4.cfg"; + String yoloWeights = "models/yolov4.weights"; + String yoloClasses = "models/coco.names"; + + // Load native OpenCV library + System.loadLibrary(Core.NATIVE_LIBRARY_NAME); + + IO.println("Initializing scraper..."); + TroostwijkScraper scraper = new TroostwijkScraper(databaseFile, notificationConfig, "", + yoloCfg, yoloWeights, yoloClasses); + + // Step 1: Discover auctions in NL + IO.println("\n[1/3] Discovering Dutch auctions..."); + List auctions = scraper.discoverDutchAuctions(); + IO.println("✓ Found " + auctions.size() + " auctions: " + auctions); + + // Step 2: Fetch lots for each auction + IO.println("\n[2/3] Fetching lot details..."); + int totalAuctions = auctions.size(); + int currentAuction = 0; + for (int saleId : auctions) { + currentAuction++; + IO.println(" [Page " + currentAuction + "] Fetching auctions..."); + IO.println(" [" + currentAuction + "/" + totalAuctions + "] Processing sale " + saleId + "..."); + scraper.fetchLotsForSale(saleId); + } + + // Show database summary + IO.println("\n📊 Database Summary:"); + scraper.printDatabaseStats(); + + // Step 3: Start monitoring bids and closures + IO.println("\n[3/3] Starting monitoring service..."); + scraper.scheduleMonitoring(); + IO.println("✓ Monitoring active. Press Ctrl+C to stop.\n"); + } } diff --git a/src/main/java/com/auction/NotificationService.java b/src/main/java/com/auction/NotificationService.java new file mode 100644 index 0000000..9b1c644 --- /dev/null +++ b/src/main/java/com/auction/NotificationService.java @@ -0,0 +1,156 @@ +package com.auction; + +import javax.mail.Authenticator; +import javax.mail.Message.RecipientType; +import javax.mail.PasswordAuthentication; +import javax.mail.Session; +import javax.mail.Transport; +import javax.mail.internet.InternetAddress; +import javax.mail.internet.MimeMessage; +import java.awt.SystemTray; +import java.awt.Toolkit; +import java.awt.TrayIcon; +import java.awt.TrayIcon.MessageType; +import java.util.Date; +import java.util.Properties; +/** + * Service for sending notifications via desktop notifications and/or email. + * Supports free notification methods: + * 1. Desktop notifications (Windows/Linux/macOS system tray) + * 2. Email via Gmail SMTP (free, requires app password) + * + * Configuration: + * - For email: Set notificationEmail to your Gmail address + * - Enable 2FA in Gmail and create an App Password + * - Use format "smtp:username:appPassword:toEmail" for credentials + * - Or use "desktop" for desktop-only notifications + */ +class NotificationService { + + private final boolean useDesktop; + private final boolean useEmail; + private final String smtpUsername; + private final String smtpPassword; + private final String toEmail; + + /** + * Creates a notification service. + * + * @param config "desktop" for desktop only, or "smtp:username:password:toEmail" for email + * @param unusedParam Kept for compatibility (can pass empty string) + */ + NotificationService(String config, String unusedParam) { + + if ("desktop".equalsIgnoreCase(config)) { + this.useDesktop = true; + this.useEmail = false; + this.smtpUsername = null; + this.smtpPassword = null; + this.toEmail = null; + } else if (config.startsWith("smtp:")) { + var parts = config.split(":", 4); + if (parts.length != 4) { + throw new IllegalArgumentException("Email config must be 'smtp:username:password:toEmail'"); + } + this.useDesktop = true; // Always include desktop + this.useEmail = true; + this.smtpUsername = parts[1]; + this.smtpPassword = parts[2]; + this.toEmail = parts[3]; + } else { + throw new IllegalArgumentException("Config must be 'desktop' or 'smtp:username:password:toEmail'"); + } + } + + /** + * Sends notification via configured channels. + * + * @param message The message body + * @param title Message title + * @param priority Priority level (0=normal, 1=high) + */ + void sendNotification(String message, String title, int priority) { + if (useDesktop) { + sendDesktopNotification(title, message, priority); + } + if (useEmail) { + sendEmailNotification(title, message, priority); + } + } + + /** + * Sends a desktop notification using system tray. + * Works on Windows, macOS, and Linux with desktop environments. + */ + private void sendDesktopNotification(String title, String message, int priority) { + try { + if (SystemTray.isSupported()) { + var tray = SystemTray.getSystemTray(); + var image = Toolkit.getDefaultToolkit() + .createImage(new byte[0]); // Empty image + + var trayIcon = new TrayIcon(image, "Troostwijk Scraper"); + trayIcon.setImageAutoSize(true); + + var messageType = priority > 0 + ? MessageType.WARNING + : MessageType.INFO; + + tray.add(trayIcon); + trayIcon.displayMessage(title, message, messageType); + + // Remove icon after 2 seconds to avoid clutter + Thread.sleep(2000); + tray.remove(trayIcon); + + IO.println("Desktop notification sent: " + title); + } else { + IO.println("Desktop notifications not supported, logging: " + title + " - " + message); + } + } catch (Exception e) { + System.err.println("Desktop notification failed: " + e.getMessage()); + } + } + + /** + * Sends email notification via Gmail SMTP (free). + * Uses Gmail's SMTP server with app password authentication. + */ + private void sendEmailNotification(String title, String message, int priority) { + try { + var props = new Properties(); + props.put("mail.smtp.auth", "true"); + props.put("mail.smtp.starttls.enable", "true"); + props.put("mail.smtp.host", "smtp.gmail.com"); + props.put("mail.smtp.port", "587"); + props.put("mail.smtp.ssl.trust", "smtp.gmail.com"); + + var session = Session.getInstance(props, + new Authenticator() { + + protected PasswordAuthentication getPasswordAuthentication() { + return new PasswordAuthentication(smtpUsername, smtpPassword); + } + }); + + var msg = new MimeMessage(session); + msg.setFrom(new InternetAddress(smtpUsername)); + msg.setRecipients(RecipientType.TO, + InternetAddress.parse(toEmail)); + msg.setSubject("[Troostwijk] " + title); + msg.setText(message); + msg.setSentDate(new Date()); + + if (priority > 0) { + msg.setHeader("X-Priority", "1"); + msg.setHeader("Importance", "High"); + } + + Transport.send(msg); + IO.println("Email notification sent: " + title); + + } catch (Exception e) { + System.err.println("Email notification failed: " + e.getMessage()); + } + } +} diff --git a/src/main/java/com/auction/ObjectDetectionService.java b/src/main/java/com/auction/ObjectDetectionService.java new file mode 100644 index 0000000..2f517fd --- /dev/null +++ b/src/main/java/com/auction/ObjectDetectionService.java @@ -0,0 +1,140 @@ +package com.auction; + +import org.opencv.core.Mat; +import org.opencv.core.Scalar; +import org.opencv.core.Size; +import org.opencv.dnn.Dnn; +import org.opencv.dnn.Net; +import org.opencv.imgcodecs.Imgcodecs; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import static org.opencv.dnn.Dnn.DNN_BACKEND_OPENCV; +import static org.opencv.dnn.Dnn.DNN_TARGET_CPU; +/** + * Service for performing object detection on images using OpenCV's DNN + * module. The DNN module can load pre‑trained models from several + * frameworks (Darknet, TensorFlow, ONNX, etc.)【784097309529506†L209-L233】. Here + * we load a YOLO model (Darknet) by specifying the configuration and + * weights files. For each image we run a forward pass and return a + * list of detected class labels. + * + * If model files are not found, the service operates in disabled mode + * and returns empty lists. + */ +class ObjectDetectionService { + + private final Net net; + private final List classNames; + private final boolean enabled; + + ObjectDetectionService(String cfgPath, String weightsPath, String classNamesPath) throws IOException { + // Check if model files exist + var cfgFile = Paths.get(cfgPath); + var weightsFile = Paths.get(weightsPath); + var classNamesFile = Paths.get(classNamesPath); + + if (!Files.exists(cfgFile) || !Files.exists(weightsFile) || !Files.exists(classNamesFile)) { + IO.println("⚠️ Object detection disabled: YOLO model files not found"); + IO.println(" Expected files:"); + IO.println(" - " + cfgPath); + IO.println(" - " + weightsPath); + IO.println(" - " + classNamesPath); + IO.println(" Scraper will continue without image analysis."); + this.enabled = false; + this.net = null; + this.classNames = new ArrayList<>(); + return; + } + + try { + // Load network + this.net = Dnn.readNetFromDarknet(cfgPath, weightsPath); + this.net.setPreferableBackend(DNN_BACKEND_OPENCV); + this.net.setPreferableTarget(DNN_TARGET_CPU); + // Load class names (one per line) + this.classNames = Files.readAllLines(classNamesFile); + this.enabled = true; + IO.println("✓ Object detection enabled with YOLO"); + } catch (Exception e) { + System.err.println("⚠️ Object detection disabled: " + e.getMessage()); + throw new IOException("Failed to initialize object detection", e); + } + } + /** + * Detects objects in the given image file and returns a list of + * human‑readable labels. Only detections above a confidence + * threshold are returned. For brevity this method omits drawing + * bounding boxes. See the OpenCV DNN documentation for details on + * post‑processing【784097309529506†L324-L344】. + * + * @param imagePath absolute path to the image + * @return list of detected class names (empty if detection disabled) + */ + List detectObjects(String imagePath) { + if (!enabled) { + return new ArrayList<>(); + } + + List labels = new ArrayList<>(); + var image = Imgcodecs.imread(imagePath); + if (image.empty()) return labels; + // Create a 4D blob from the image + var blob = Dnn.blobFromImage(image, 1.0 / 255.0, new Size(416, 416), new Scalar(0, 0, 0), true, false); + net.setInput(blob); + List outs = new ArrayList<>(); + var outNames = getOutputLayerNames(net); + net.forward(outs, outNames); + // Post‑process: for each detection compute score and choose class + var confThreshold = 0.5f; + for (var out : outs) { + for (var i = 0; i < out.rows(); i++) { + var data = out.get(i, 0); + if (data == null) continue; + // The first 5 numbers are bounding box, then class scores + var scores = new double[classNames.size()]; + System.arraycopy(data, 5, scores, 0, scores.length); + var classId = argMax(scores); + var confidence = scores[classId]; + if (confidence > confThreshold) { + var label = classNames.get(classId); + if (!labels.contains(label)) { + labels.add(label); + } + } + } + } + return labels; + } + /** + * Returns the indexes of the output layers in the network. YOLO + * automatically discovers its output layers; other models may require + * manually specifying them【784097309529506†L356-L365】. + */ + private List getOutputLayerNames(Net net) { + List names = new ArrayList<>(); + var outLayers = net.getUnconnectedOutLayers().toList(); + var layersNames = net.getLayerNames(); + for (var i : outLayers) { + names.add(layersNames.get(i - 1)); + } + return names; + } + /** + * Returns the index of the maximum value in the array. + */ + private int argMax(double[] array) { + var best = 0; + var max = array[0]; + for (var i = 1; i < array.length; i++) { + if (array[i] > max) { + max = array[i]; + best = i; + } + } + return best; + } +} diff --git a/src/main/java/com/auction/TroostwijkAuctionExtractor.java b/src/main/java/com/auction/TroostwijkAuctionExtractor.java deleted file mode 100644 index 46be1c2..0000000 --- a/src/main/java/com/auction/TroostwijkAuctionExtractor.java +++ /dev/null @@ -1,645 +0,0 @@ -package com.auction; - -import com.microsoft.playwright.*; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.microsoft.playwright.options.WaitUntilState; - -import java.io.IOException; -import java.nio.file.*; -import java.sql.*; -import java.time.Instant; -import java.util.*; - -/** - * TroostwijkAuctionExtractor - * - * Extracts auction listings from https://www.troostwijkauctions.com/auctions - * using Playwright for Java (headless browser automation). - * - * Features: - * - Uses Playwright for Java to load JavaScript-rendered content - * - Iterates through all pages of auction listings - * - Rate limiting: 200ms between each page request - * - Caches visited pages in SQLite database with expiration times - * - Extracts auction metadata: ID, title, location, URL - * - * Dependencies (Maven): - * - * com.microsoft.playwright - * playwright - * 1.40.0 - * - * - * com.fasterxml.jackson.core - * jackson-databind - * 2.17.0 - * - * - * org.xerial - * sqlite-jdbc - * 3.45.1.0 - * - * - * After adding dependency, run: mvn exec:java -e -D exec.mainClass=com.microsoft.playwright.CLI -D exec.args="install" - * This downloads the browser binaries needed by Playwright. - */ -public class TroostwijkAuctionExtractor { - - private static final String AUCTIONS_BASE_URL = "https://www.troostwijkauctions.com/auctions"; - private static final int RATE_LIMIT_MS = 200; - private static final String CACHE_DB_PATH = "cache/page_cache.db"; - private static final long CACHE_EXPIRATION_HOURS = 24; // Cache expires after 24 hours - - private final ObjectMapper objectMapper; - private final boolean useCache; - private final CacheDatabase cacheDb; - private final int maxPageVisits; // Maximum number of pages to fetch (0 = unlimited) - private int pageVisitCount; // Counter for actual page fetches (not from cache) - private Playwright playwright; - private Browser browser; - - /** - * Represents an auction listing - */ - public static class Auction { - public int id; - public String title; - public String location; - public String url; - public String type; // e.g. "A1" or "A7" - - @Override - public String toString() { - return String.format("Auction{id=%d, type=%s, title='%s', location='%s', url='%s'}", - id, type, title, location, url); - } - } - - /** - * Constructor - * - * @param useCache Enable database caching of visited pages - * @param maxPageVisits Maximum number of actual page fetches (0 = unlimited) - */ - public TroostwijkAuctionExtractor(boolean useCache, int maxPageVisits) throws SQLException, IOException { - this.objectMapper = new ObjectMapper(); - this.useCache = useCache; - this.maxPageVisits = maxPageVisits; - this.pageVisitCount = 0; - this.cacheDb = useCache ? new CacheDatabase(CACHE_DB_PATH) : null; - - if (useCache) { - cacheDb.initialize(); - } - } - - /** - * Constructor with default unlimited page visits - * - * @param useCache Enable database caching of visited pages - */ - public TroostwijkAuctionExtractor(boolean useCache) throws SQLException, IOException { - this(useCache, 0); // 0 = unlimited - } - - /** - * Initializes Playwright and browser instance - * Call this before extracting auctions - */ - public void initialize() { - System.out.println("Initializing Playwright browser..."); - this.playwright = Playwright.create(); - this.browser = playwright.chromium().launch(new BrowserType.LaunchOptions() - .setHeadless(true) - .setArgs(Arrays.asList("--no-sandbox", "--disable-setuid-sandbox"))); - System.out.println("✓ Browser ready"); - } - - /** - * Closes browser and Playwright instance - * Call this when done extracting - */ - public void close() { - if (browser != null) { - browser.close(); - } - if (playwright != null) { - playwright.close(); - } - if (cacheDb != null) { - cacheDb.close(); - } - System.out.println("✓ Browser and cache closed"); - } - - /** - * Extracts all auctions from all pages - * - * @return List of all discovered auctions - */ - public List extractAllAuctions() throws InterruptedException { - if (browser == null) { - throw new IllegalStateException("Browser not initialized. Call initialize() first."); - } - - List allAuctions = new ArrayList<>(); - int pageNumber = 1; - boolean hasMorePages = true; - - System.out.println("Starting auction extraction from " + AUCTIONS_BASE_URL); - - while (hasMorePages) { - System.out.println("\n[Page " + pageNumber + "] Fetching auctions..."); - - // Check cache first - String cachedHtml = loadFromCache(pageNumber); - String html; - - if (cachedHtml != null) { - System.out.println(" ✓ Loaded from cache"); - html = cachedHtml; - } else { - // Check if we've reached the maximum page visit limit - if (maxPageVisits > 0 && pageVisitCount >= maxPageVisits) { - System.out.println(" ⚠️ Reached maximum page visit limit (" + maxPageVisits + "), stopping"); - break; - } - - // Fetch with Playwright - html = fetchPageWithPlaywright(pageNumber); - pageVisitCount++; // Increment actual page fetch counter - - if (html == null || html.isEmpty()) { - System.out.println(" ⚠️ Failed to fetch page, stopping pagination"); - break; - } - - System.out.println(" ✓ Fetched from website (visit " + pageVisitCount + - (maxPageVisits > 0 ? "/" + maxPageVisits : "") + ")"); - - // Save to cache - if (useCache) { - saveToCache(pageNumber, html); - } - - // Rate limiting - wait 200ms before next request - Thread.sleep(RATE_LIMIT_MS); - } - - // Parse auctions from HTML - List pageAuctions = parseAuctionsFromHtml(html); - - if (pageAuctions.isEmpty()) { - System.out.println(" ⚠️ No auctions found on page, stopping pagination"); - hasMorePages = false; - } else { - System.out.println(" ✓ Found " + pageAuctions.size() + " auctions"); - allAuctions.addAll(pageAuctions); - pageNumber++; - } - } - - System.out.println("\n✓ Total auctions extracted: " + allAuctions.size()); - return allAuctions; - } - - /** - * Fetches a single page using Playwright - * - * @param pageNumber Page number (1-indexed) - * @return HTML content of the page - */ - private String fetchPageWithPlaywright(int pageNumber) { - String url = pageNumber == 1 - ? AUCTIONS_BASE_URL - : AUCTIONS_BASE_URL + "?page=" + pageNumber; - - try { - Page page = browser.newPage(); - - // Set user agent - page.setExtraHTTPHeaders(Map.of( - "User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" - )); - - // Navigate to page - page.navigate(url, new Page.NavigateOptions() - .setTimeout(30000) - .setWaitUntil(WaitUntilState.NETWORKIDLE)); - - // Wait for auction listings to appear - try { - page.waitForSelector("a[href^='/a/']", new Page.WaitForSelectorOptions() - .setTimeout(10000)); - } catch (Exception e) { - // Continue even if selector not found - System.out.println(" ⚠️ Auction selector not found, attempting to parse anyway"); - } - - // Get HTML content - String html = page.content(); - page.close(); - - return html; - - } catch (Exception e) { - System.err.println(" ⚠️ Playwright error: " + e.getMessage()); - return null; - } - } - - /** - * Parses auction data from HTML content - * - * @param html HTML content - * @return List of parsed auctions - */ - private List parseAuctionsFromHtml(String html) { - List auctions = new ArrayList<>(); - - // Simple regex-based parsing for auction links - // Format: seenIds = new HashSet<>(); - - while (linkMatcher.find()) { - String href = linkMatcher.group(1); - int auctionId = Integer.parseInt(linkMatcher.group(2)); - - // Avoid duplicates - if (seenIds.contains(auctionId)) { - continue; - } - - // Extract auction type (A1 or A7) - String type = href.contains("A1-") ? "A1" : "A7"; - - // Try to find location and title near this link - String location = extractLocationNearLink(html, href); - String title = extractTitleFromHref(href); - - Auction auction = new Auction(); - auction.id = auctionId; - auction.type = type; - auction.title = title; - auction.location = location; - auction.url = "https://www.troostwijkauctions.com" + href; - - auctions.add(auction); - seenIds.add(auctionId); - } - - return auctions; - } - - /** - * Extracts location text near an auction link - * Looks for ", NL" or other country codes - */ - private String extractLocationNearLink(String html, String href) { - int hrefPos = html.indexOf(href); - if (hrefPos == -1) return "Unknown"; - - // Look at 1000 characters before AND after the href for location info - int startPos = Math.max(hrefPos - 500, 0); - int endPos = Math.min(hrefPos + 1000, html.length()); - String context = html.substring(startPos, endPos); - - // Pattern 1: Classic format "City, NL" - java.util.regex.Pattern locPattern = java.util.regex.Pattern.compile( - "([A-Za-z][A-Za-z\\s\\-']+),\\s*([A-Z]{2})(?![A-Za-z])"); - java.util.regex.Matcher locMatcher = locPattern.matcher(context); - - if (locMatcher.find()) { - String location = locMatcher.group(1).trim() + ", " + locMatcher.group(2); - System.out.println(" Found location: " + location + " for auction " + href); - return location; - } - - // Pattern 2: HTML format like "City, NL" - // Extract city and country code separately - java.util.regex.Pattern htmlPattern = java.util.regex.Pattern.compile( - "]*>([A-Za-z][A-Za-z\\s\\-',]+?)(?:,)?\\s*(?:)?\\s*\\s*([A-Z]{2})(?![A-Za-z])"); - java.util.regex.Matcher htmlMatcher = htmlPattern.matcher(context); - - if (htmlMatcher.find()) { - String city = htmlMatcher.group(1).trim().replaceAll(",$", ""); // Remove trailing comma - String country = htmlMatcher.group(2); - String location = city + ", " + country; - System.out.println(" Found location (HTML): " + location + " for auction " + href); - return location; - } - - // Pattern 3: Fallback - just find country code after HTML tags - java.util.regex.Pattern countryPattern = java.util.regex.Pattern.compile( - "(?:-->||)\\s*([A-Z]{2})(?![A-Za-z])"); - java.util.regex.Matcher countryMatcher = countryPattern.matcher(context); - - if (countryMatcher.find()) { - String country = countryMatcher.group(1); - System.out.println(" Found country code: " + country + " for auction " + href); - return "Unknown, " + country; - } - - System.out.println(" ⚠️ No location found for auction " + href); - return "Unknown"; - } - - /** - * Extracts human-readable title from URL slug - * Converts "some-auction-title-A1-12345" to "Some Auction Title" - */ - private String extractTitleFromHref(String href) { - // Extract everything between "/a/" and "-A1-" or "-A7-" - java.util.regex.Pattern titlePattern = java.util.regex.Pattern.compile( - "/a/(.+?)-A[17]-"); - java.util.regex.Matcher titleMatcher = titlePattern.matcher(href); - - if (titleMatcher.find()) { - String slug = titleMatcher.group(1); - // Convert kebab-case to Title Case - String[] words = slug.split("-"); - StringBuilder title = new StringBuilder(); - for (String word : words) { - if (!word.isEmpty()) { - title.append(Character.toUpperCase(word.charAt(0))) - .append(word.substring(1)) - .append(" "); - } - } - return title.toString().trim(); - } - - return "Untitled Auction"; - } - - /** - * Loads cached HTML for a page from SQLite database - * Returns null if not cached or cache has expired - * - * @param pageNumber Page number - * @return Cached HTML or null if not found/expired - */ - private String loadFromCache(int pageNumber) { - if (!useCache || cacheDb == null) return null; - - String url = pageNumber == 1 - ? AUCTIONS_BASE_URL - : AUCTIONS_BASE_URL + "?page=" + pageNumber; - - return cacheDb.get(url); - } - - /** - * Saves HTML to SQLite cache database with expiration time - * - * @param pageNumber Page number - * @param html HTML content - */ - private void saveToCache(int pageNumber, String html) { - if (!useCache || cacheDb == null) return; - - String url = pageNumber == 1 - ? AUCTIONS_BASE_URL - : AUCTIONS_BASE_URL + "?page=" + pageNumber; - - cacheDb.put(url, html, CACHE_EXPIRATION_HOURS); - } - - /** - * Filters auctions by location - * - * @param auctions List of auctions - * @param locationFilter Location string to match (e.g., "NL") - * @return Filtered list - */ - public static List filterByLocation(List auctions, String locationFilter) { - return auctions.stream() - .filter(a -> a.location.contains(locationFilter)) - .toList(); - } - - /** - * Entry point for testing - * - * Arguments: - * --max-visits : Maximum number of page visits (0 = unlimited, default) - * --no-cache : Disable caching - */ - public static void main(String[] args) throws Exception { - System.out.println("=== Troostwijk Auction Extractor ===\n"); - - // Parse command line arguments - boolean useCache = true; - int maxVisits = 0; // 0 = unlimited - - for (int i = 0; i < args.length; i++) { - switch (args[i]) { - case "--max-visits": - if (i + 1 < args.length) { - maxVisits = Integer.parseInt(args[++i]); - System.out.println("Max page visits set to: " + maxVisits); - } - break; - case "--no-cache": - useCache = false; - System.out.println("Caching disabled"); - break; - case "--help": - System.out.println("Usage: java TroostwijkAuctionExtractor [options]"); - System.out.println("Options:"); - System.out.println(" --max-visits : Limit actual page fetches to n (0 = unlimited)"); - System.out.println(" --no-cache : Disable page caching"); - System.out.println(" --help : Show this help message"); - return; - } - } - - TroostwijkAuctionExtractor extractor = new TroostwijkAuctionExtractor(useCache, maxVisits); - - try { - // Initialize browser - extractor.initialize(); - - // Extract all auctions - List allAuctions = extractor.extractAllAuctions(); - - // Filter for Dutch auctions only - List dutchAuctions = filterByLocation(allAuctions, "NL"); - - System.out.println("\n=== Results ==="); - System.out.println("Total auctions found: " + allAuctions.size()); - System.out.println("Dutch auctions (NL): " + dutchAuctions.size()); - System.out.println("Actual page visits: " + extractor.pageVisitCount); - - // Display first 10 Dutch auctions - System.out.println("\n=== Sample Dutch Auctions ==="); - dutchAuctions.stream() - .limit(10) - .forEach(System.out::println); - - } finally { - // Always close browser - extractor.close(); - } - } - - /** - * SQLite-based caching system for HTML pages with expiration support - */ - static class CacheDatabase { - private final String dbPath; - private Connection connection; - - public CacheDatabase(String dbPath) { - this.dbPath = dbPath; - } - - /** - * Initialize database and create schema - */ - public void initialize() throws SQLException, IOException { - // Create cache directory if it doesn't exist - Path cacheDir = Paths.get(dbPath).getParent(); - if (cacheDir != null) { - Files.createDirectories(cacheDir); - } - - connection = DriverManager.getConnection("jdbc:sqlite:" + dbPath); - - // Create cache table with URL as primary key - String createTable = """ - CREATE TABLE IF NOT EXISTS page_cache ( - url TEXT PRIMARY KEY, - html TEXT NOT NULL, - cached_at INTEGER NOT NULL, - expires_at INTEGER NOT NULL - ) - """; - - try (Statement stmt = connection.createStatement()) { - stmt.execute(createTable); - // Create index on expires_at for efficient cleanup - stmt.execute("CREATE INDEX IF NOT EXISTS idx_expires_at ON page_cache(expires_at)"); - } - - // Clean up expired entries on initialization - cleanupExpired(); - - System.out.println("✓ Cache database initialized"); - } - - /** - * Get cached HTML for a URL if it exists and hasn't expired - * - * @param url The URL to look up - * @return Cached HTML or null if not found/expired - */ - public synchronized String get(String url) { - String sql = "SELECT html FROM page_cache WHERE url = ? AND expires_at > ?"; - - try (PreparedStatement ps = connection.prepareStatement(sql)) { - ps.setString(1, url); - ps.setLong(2, Instant.now().getEpochSecond()); - - ResultSet rs = ps.executeQuery(); - if (rs.next()) { - return rs.getString("html"); - } - } catch (SQLException e) { - System.err.println("Cache read error: " + e.getMessage()); - } - - return null; - } - - /** - * Store HTML in cache with expiration time - * - * @param url The URL to cache - * @param html The HTML content - * @param expirationHours Hours until cache expires - */ - public synchronized void put(String url, String html, long expirationHours) { - String sql = """ - INSERT OR REPLACE INTO page_cache (url, html, cached_at, expires_at) - VALUES (?, ?, ?, ?) - """; - - long now = Instant.now().getEpochSecond(); - long expiresAt = now + (expirationHours * 3600); - - try (PreparedStatement ps = connection.prepareStatement(sql)) { - ps.setString(1, url); - ps.setString(2, html); - ps.setLong(3, now); - ps.setLong(4, expiresAt); - ps.executeUpdate(); - } catch (SQLException e) { - System.err.println("Cache write error: " + e.getMessage()); - } - } - - /** - * Remove expired cache entries - */ - public synchronized void cleanupExpired() { - String sql = "DELETE FROM page_cache WHERE expires_at <= ?"; - - try (PreparedStatement ps = connection.prepareStatement(sql)) { - ps.setLong(1, Instant.now().getEpochSecond()); - int deleted = ps.executeUpdate(); - if (deleted > 0) { - System.out.println("✓ Cleaned up " + deleted + " expired cache entries"); - } - } catch (SQLException e) { - System.err.println("Cache cleanup error: " + e.getMessage()); - } - } - - /** - * Get cache statistics - */ - public synchronized void printStats() { - String sql = "SELECT COUNT(*) as total, " + - "SUM(CASE WHEN expires_at > ? THEN 1 ELSE 0 END) as valid, " + - "SUM(LENGTH(html)) as total_size " + - "FROM page_cache"; - - try (PreparedStatement ps = connection.prepareStatement(sql)) { - ps.setLong(1, Instant.now().getEpochSecond()); - ResultSet rs = ps.executeQuery(); - - if (rs.next()) { - int total = rs.getInt("total"); - int valid = rs.getInt("valid"); - long size = rs.getLong("total_size"); - - System.out.println("\n=== Cache Statistics ==="); - System.out.println("Total entries: " + total); - System.out.println("Valid entries: " + valid); - System.out.println("Expired entries: " + (total - valid)); - System.out.println("Total size: " + (size / 1024) + " KB"); - } - } catch (SQLException e) { - System.err.println("Cache stats error: " + e.getMessage()); - } - } - - /** - * Close database connection - */ - public void close() { - if (connection != null) { - try { - connection.close(); - } catch (SQLException e) { - System.err.println("Error closing cache database: " + e.getMessage()); - } - } - } - } -} diff --git a/src/main/java/com/auction/TroostwijkScraper.java b/src/main/java/com/auction/TroostwijkScraper.java index 33ca429..0d69385 100644 --- a/src/main/java/com/auction/TroostwijkScraper.java +++ b/src/main/java/com/auction/TroostwijkScraper.java @@ -41,23 +41,20 @@ package com.auction; * database for later search. */ +import com.fasterxml.jackson.databind.ObjectMapper; +import com.microsoft.playwright.Browser; +import com.microsoft.playwright.BrowserType; +import com.microsoft.playwright.Page; +import com.microsoft.playwright.Playwright; +import com.microsoft.playwright.options.WaitUntilState; import java.io.IOException; -import java.io.InputStream; import java.net.URI; import java.net.http.HttpClient; import java.net.http.HttpRequest; import java.net.http.HttpResponse; import java.nio.file.Files; -import java.nio.file.Path; import java.nio.file.Paths; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.PreparedStatement; -import java.sql.ResultSet; import java.sql.SQLException; -import java.sql.Statement; -import java.time.Instant; -import java.time.LocalDateTime; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; @@ -65,30 +62,8 @@ import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; -import com.auction.TroostwijkAuctionExtractor.CacheDatabase; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.microsoft.playwright.Browser; -import com.microsoft.playwright.BrowserType; -import com.microsoft.playwright.Page; -import com.microsoft.playwright.Playwright; -import com.microsoft.playwright.options.WaitUntilState; -import net.bytebuddy.build.Plugin.Engine.Source.Element; -import org.jsoup.Jsoup; -import org.opencv.core.Core; -import org.opencv.core.Mat; -import org.opencv.core.Scalar; -import org.opencv.core.Size; -import org.opencv.dnn.Dnn; -import org.opencv.dnn.Net; -import org.opencv.imgcodecs.Imgcodecs; -import org.w3c.dom.Document; -import static org.opencv.dnn.Dnn.DNN_BACKEND_OPENCV; -import static org.opencv.dnn.Dnn.DNN_TARGET_CPU; - /** * Main scraper class. It encapsulates the logic for scraping auctions, * persisting data, scheduling updates, and performing object detection. @@ -149,7 +124,7 @@ public class TroostwijkScraper { this.notifier = new NotificationService(notificationConfig, unused); this.detector = new ObjectDetectionService(yoloCfgPath, yoloWeightsPath, classNamesPath); this.useCache = useCache; - this.cacheDb = useCache ? new TroostwijkAuctionExtractor.CacheDatabase(CACHE_DB_PATH) : null; + this.cacheDb = useCache ? new CacheDatabase(CACHE_DB_PATH) : null; // initialize DB db.ensureSchema(); @@ -164,12 +139,12 @@ public class TroostwijkScraper { */ public void initializeBrowser() { if (playwright == null) { - System.out.println("Initializing Playwright browser..."); + IO.println("Initializing Playwright browser..."); this.playwright = Playwright.create(); this.browser = playwright.chromium().launch(new BrowserType.LaunchOptions() .setHeadless(true) .setArgs(Arrays.asList("--no-sandbox", "--disable-setuid-sandbox"))); - System.out.println("✓ Browser ready"); + IO.println("✓ Browser ready"); } } @@ -209,30 +184,30 @@ public class TroostwijkScraper { if (browser == null) { initializeBrowser(); } + + var pageNumber = 1; + var hasMorePages = true; - int pageNumber = 1; - boolean hasMorePages = true; - - System.out.println("Starting Dutch auction discovery from " + AUCTIONS_PAGE); + IO.println("Starting Dutch auction discovery from " + AUCTIONS_PAGE); while (hasMorePages) { - System.out.println("\n[Page " + pageNumber + "] Fetching auctions..."); + IO.println("\n[Page " + pageNumber + "] Fetching auctions..."); // Check cache first - String html = loadFromCache(pageNumber); + var html = loadFromCache(pageNumber); if (html != null) { - System.out.println(" ✓ Loaded from cache"); + IO.println(" ✓ Loaded from cache"); } else { // Fetch with Playwright html = fetchPageWithPlaywright(pageNumber); if (html == null || html.isEmpty()) { - System.out.println(" ⚠️ Failed to fetch page, stopping pagination"); + IO.println(" ⚠️ Failed to fetch page, stopping pagination"); break; } - System.out.println(" ✓ Fetched from website"); + IO.println(" ✓ Fetched from website"); // Save to cache if (useCache) { @@ -249,46 +224,32 @@ public class TroostwijkScraper { } // Parse auctions from HTML (saves Dutch auctions to database) - int foundOnPage = parseAuctionsFromHtml(html, saleIds); + var foundOnPage = parseAuctionsFromHtml(html, saleIds); if (foundOnPage == 0) { - System.out.println(" ⚠️ No Dutch auctions found on page, stopping pagination"); + IO.println(" ⚠️ No Dutch auctions found on page, stopping pagination"); hasMorePages = false; } else { - System.out.println(" ✓ Found " + foundOnPage + " Dutch auctions"); + IO.println(" ✓ Found " + foundOnPage + " Dutch auctions"); pageNumber++; } } - System.out.println("\n✓ Total Dutch auctions discovered: " + saleIds.size()); + IO.println("\n✓ Total Dutch auctions discovered: " + saleIds.size()); return new ArrayList<>(saleIds); } - /** - * Gets all Dutch auctions from the database. - * Call discoverDutchAuctions() first to populate the database. - * - * @return List of Dutch auctions with full metadata - */ - public List getDutchAuctions() { - try { - return db.getAuctionsByCountry("NL"); - } catch (SQLException e) { - System.err.println("Failed to retrieve Dutch auctions from database: " + e.getMessage()); - return new ArrayList<>(); - } - } /** * Fetches a single page using Playwright */ private String fetchPageWithPlaywright(int pageNumber) { - String url = pageNumber == 1 + var url = pageNumber == 1 ? AUCTIONS_PAGE : AUCTIONS_PAGE + "?page=" + pageNumber; try { - Page page = browser.newPage(); + var page = browser.newPage(); // Set user agent page.setExtraHTTPHeaders(Map.of( @@ -306,11 +267,11 @@ public class TroostwijkScraper { .setTimeout(10000)); } catch (Exception e) { // Continue even if selector not found - System.out.println(" ⚠️ Auction selector not found"); + IO.println(" ⚠️ Auction selector not found"); } // Get HTML content - String html = page.content(); + var html = page.content(); page.close(); return html; @@ -327,27 +288,27 @@ public class TroostwijkScraper { * @return number of Dutch auctions found on this page */ private int parseAuctionsFromHtml(String html, Set saleIds) { - int foundCount = 0; + var foundCount = 0; try { - org.jsoup.nodes.Document doc = org.jsoup.Jsoup.parse(html); + var doc = org.jsoup.Jsoup.parse(html); // Find all auction links (format: /a/title-A1-12345 or /a/title-A7-12345) - org.jsoup.select.Elements auctionLinks = doc.select("a[href^='/a/']"); + var auctionLinks = doc.select("a[href^='/a/']"); - for (org.jsoup.nodes.Element link : auctionLinks) { - String href = link.attr("href"); + for (var link : auctionLinks) { + var href = link.attr("href"); // Extract auction ID from URL - java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("/a/.*?-A([17])-(\\d+)"); - java.util.regex.Matcher matcher = pattern.matcher(href); + var pattern = java.util.regex.Pattern.compile("/a/.*?-A([17])-(\\d+)"); + var matcher = pattern.matcher(href); if (!matcher.find()) { continue; } - - String typeNum = matcher.group(1); - int auctionId = Integer.parseInt(matcher.group(2)); + + var typeNum = matcher.group(1); + var auctionId = Integer.parseInt(matcher.group(2)); // Skip duplicates if (saleIds.contains(auctionId)) { @@ -355,7 +316,7 @@ public class TroostwijkScraper { } // Extract auction info using JSoup - AuctionInfo auction = extractAuctionInfo(link, href, auctionId, "A" + typeNum); + var auction = extractAuctionInfo(link, href, auctionId, "A" + typeNum); // Only keep Dutch auctions if (auction != null && "NL".equals(auction.country)) { @@ -365,7 +326,7 @@ public class TroostwijkScraper { // Save to database try { db.upsertAuction(auction); - System.out.println(" Found Dutch auction: " + auctionId + " - " + auction.title + " (" + auction.location + ")"); + IO.println(" Found Dutch auction: " + auctionId + " - " + auction.title + " (" + auction.location + ")"); } catch (SQLException e) { System.err.println(" Failed to save auction: " + e.getMessage()); } @@ -386,16 +347,16 @@ public class TroostwijkScraper { * - Lot count (if available) */ private AuctionInfo extractAuctionInfo(org.jsoup.nodes.Element link, String href, int auctionId, String type) { - AuctionInfo auction = new AuctionInfo(); + var auction = new AuctionInfo(); auction.auctionId = auctionId; auction.type = type; auction.url = "https://www.troostwijkauctions.com" + href; // Extract title from href (convert kebab-case to title) - java.util.regex.Pattern titlePattern = java.util.regex.Pattern.compile("/a/(.+?)-A[17]-"); - java.util.regex.Matcher titleMatcher = titlePattern.matcher(href); + var titlePattern = java.util.regex.Pattern.compile("/a/(.+?)-A[17]-"); + var titleMatcher = titlePattern.matcher(href); if (titleMatcher.find()) { - String slug = titleMatcher.group(1); + var slug = titleMatcher.group(1); auction.title = slug.replace("-", " "); // Capitalize first letter if (!auction.title.isEmpty()) { @@ -406,10 +367,10 @@ public class TroostwijkScraper { } // Try to find title in link text (more accurate) - String linkText = link.text(); + var linkText = link.text(); if (!linkText.isEmpty() && !linkText.matches(".*\\d+.*")) { // If link text doesn't contain numbers, it's likely the title - String[] parts = linkText.split(",|\\d+"); + var parts = linkText.split(",|\\d+"); if (parts.length > 0 && parts[0].trim().length() > 5) { auction.title = parts[0].trim(); } @@ -417,15 +378,15 @@ public class TroostwijkScraper { // Extract location using JSoup selectors // Look for

tags that contain location info - org.jsoup.select.Elements locationElements = link.select("p"); - for (org.jsoup.nodes.Element p : locationElements) { - String text = p.text(); + var locationElements = link.select("p"); + for (var p : locationElements) { + var text = p.text(); // Pattern: "City, Country" or "City, Region, Country" if (text.matches(".*[A-Z]{2}$")) { // Ends with 2-letter country code - String countryCode = text.substring(text.length() - 2); - String cityPart = text.substring(0, text.length() - 2).trim(); + var countryCode = text.substring(text.length() - 2); + var cityPart = text.substring(0, text.length() - 2).trim(); // Remove trailing comma or whitespace cityPart = cityPart.replaceAll("[,\\s]+$", ""); @@ -439,14 +400,14 @@ public class TroostwijkScraper { // Fallback: check HTML content directly if (auction.country == null) { - String html = link.html(); - java.util.regex.Pattern locPattern = java.util.regex.Pattern.compile( + var html = link.html(); + var locPattern = java.util.regex.Pattern.compile( "([A-Za-z][A-Za-z\\s,\\-']+?)\\s*(?:)?\\s*\\s*([A-Z]{2})(?![A-Za-z])"); - java.util.regex.Matcher locMatcher = locPattern.matcher(html); + var locMatcher = locPattern.matcher(html); if (locMatcher.find()) { - String city = locMatcher.group(1).trim().replaceAll(",$", ""); - String country = locMatcher.group(2); + var city = locMatcher.group(1).trim().replaceAll(",$", ""); + var country = locMatcher.group(2); auction.city = city; auction.country = country; auction.location = city + ", " + country; @@ -454,12 +415,12 @@ public class TroostwijkScraper { } // Extract lot count if available (kavels/lots) - org.jsoup.select.Elements textElements = link.select("*"); - for (org.jsoup.nodes.Element elem : textElements) { - String text = elem.ownText(); + var textElements = link.select("*"); + for (var elem : textElements) { + var text = elem.ownText(); if (text.matches("\\d+\\s+(?:kavel|lot|item)s?.*")) { - java.util.regex.Pattern countPattern = java.util.regex.Pattern.compile("(\\d+)"); - java.util.regex.Matcher countMatcher = countPattern.matcher(text); + var countPattern = java.util.regex.Pattern.compile("(\\d+)"); + var countMatcher = countPattern.matcher(text); if (countMatcher.find()) { auction.lotCount = Integer.parseInt(countMatcher.group(1)); break; @@ -475,8 +436,8 @@ public class TroostwijkScraper { */ private String loadFromCache(int pageNumber) { if (!useCache || cacheDb == null) return null; - - String url = pageNumber == 1 + + var url = pageNumber == 1 ? AUCTIONS_PAGE : AUCTIONS_PAGE + "?page=" + pageNumber; @@ -488,8 +449,8 @@ public class TroostwijkScraper { */ private void saveToCache(int pageNumber, String html) { if (!useCache || cacheDb == null) return; - - String url = pageNumber == 1 + + var url = pageNumber == 1 ? AUCTIONS_PAGE : AUCTIONS_PAGE + "?page=" + pageNumber; @@ -506,28 +467,28 @@ public class TroostwijkScraper { * @param saleId the sale identifier */ public void fetchLotsForSale(int saleId) { - int batchSize = 200; - int offset = 0; - boolean more = true; - int totalLots = 0; + var batchSize = 200; + var offset = 0; + var more = true; + var totalLots = 0; while (more) { try { - String url = LOT_API + "?batchSize=" + batchSize - + "&listType=7&offset=" + offset - + "&sortOption=0&saleID=" + saleId - + "&parentID=0&relationID=0&buildversion=201807311"; + var url = LOT_API + "?batchSize=" + batchSize + + "&listType=7&offset=" + offset + + "&sortOption=0&saleID=" + saleId + + "&parentID=0&relationID=0&buildversion=201807311"; - System.out.println(" Fetching lots from API (offset=" + offset + ")..."); + IO.println(" Fetching lots from API (offset=" + offset + ")..."); - HttpRequest request = HttpRequest.newBuilder() - .uri(URI.create(url)) - .header("Accept", "application/json") - .header("User-Agent", "Mozilla/5.0") - .GET() - .build(); + var request = HttpRequest.newBuilder() + .uri(URI.create(url)) + .header("Accept", "application/json") + .header("User-Agent", "Mozilla/5.0") + .GET() + .build(); - HttpResponse response = httpClient.send(request, HttpResponse.BodyHandlers.ofString()); + var response = httpClient.send(request, HttpResponse.BodyHandlers.ofString()); if (response.statusCode() != 200) { System.err.println(" ⚠️ API call failed for sale " + saleId); @@ -536,22 +497,22 @@ public class TroostwijkScraper { break; } - JsonNode root = objectMapper.readTree(response.body()); - JsonNode results = root.path("results"); + var root = objectMapper.readTree(response.body()); + var results = root.path("results"); if (!results.isArray() || results.isEmpty()) { if (offset == 0) { - System.out.println(" ⚠️ No lots found for sale " + saleId); - System.out.println(" API Response: " + response.body().substring(0, Math.min(500, response.body().length()))); + IO.println(" ⚠️ No lots found for sale " + saleId); + IO.println(" API Response: " + response.body().substring(0, Math.min(500, response.body().length()))); } more = false; break; } - int lotsInBatch = results.size(); - System.out.println(" Found " + lotsInBatch + " lots in this batch"); + var lotsInBatch = results.size(); + IO.println(" Found " + lotsInBatch + " lots in this batch"); - for (JsonNode node : results) { - Lot lot = new Lot(); + for (var node : results) { + var lot = new Lot(); lot.saleId = saleId; lot.lotId = node.path("lotID").asInt(); lot.title = node.path("t").asText(); @@ -571,26 +532,26 @@ public class TroostwijkScraper { // Download images and perform object detection List imageUrls = new ArrayList<>(); - JsonNode imgs = node.path("imgs"); + var imgs = node.path("imgs"); if (imgs.isArray()) { - for (JsonNode imgNode : imgs) { - String imgUrl = imgNode.asText(); + for (var imgNode : imgs) { + var imgUrl = imgNode.asText(); imageUrls.add(imgUrl); } } // Download and analyze images (optional, can be slow) - for (String imgUrl : imageUrls) { - String fileName = downloadImage(imgUrl, saleId, lot.lotId); + for (var imgUrl : imageUrls) { + var fileName = downloadImage(imgUrl, saleId, lot.lotId); if (fileName != null) { // run object detection once per image - List labels = detector.detectObjects(fileName); + var labels = detector.detectObjects(fileName); db.insertImage(lot.lotId, imgUrl, fileName, labels); } } } - System.out.println(" ✓ Processed " + totalLots + " lots so far"); + IO.println(" ✓ Processed " + totalLots + " lots so far"); offset += batchSize; } catch (IOException | InterruptedException e) { System.err.println("Error fetching lots for sale " + saleId + ": " + e.getMessage()); @@ -612,16 +573,16 @@ public class TroostwijkScraper { */ private String downloadImage(String imageUrl, int saleId, int lotId) { try { - HttpRequest request = HttpRequest.newBuilder() - .uri(URI.create(imageUrl)) - .GET() - .build(); - HttpResponse response = httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream()); + var request = HttpRequest.newBuilder() + .uri(URI.create(imageUrl)) + .GET() + .build(); + var response = httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream()); if (response.statusCode() == 200) { - Path dir = Paths.get("images", String.valueOf(saleId), String.valueOf(lotId)); + var dir = Paths.get("images", String.valueOf(saleId), String.valueOf(lotId)); Files.createDirectories(dir); - String fileName = Paths.get(imageUrl).getFileName().toString(); - Path dest = dir.resolve(fileName); + var fileName = Paths.get(imageUrl).getFileName().toString(); + var dest = dir.resolve(fileName); Files.copy(response.body(), dest); return dest.toAbsolutePath().toString(); } @@ -639,16 +600,16 @@ public class TroostwijkScraper { * expire, a Pushover notification is sent to the configured user. * Note: In production, ensure proper shutdown handling for the scheduler. */ - public ScheduledExecutorService scheduleMonitoring() { - ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1); + public void scheduleMonitoring() { + var scheduler = Executors.newScheduledThreadPool(1); scheduler.scheduleAtFixedRate(() -> { try { - List activeLots = db.getActiveLots(); - for (Lot lot : activeLots) { + var activeLots = db.getActiveLots(); + for (var lot : activeLots) { // refresh the lot's bidding information via API refreshLotBid(lot); // check closing time to adjust monitoring - long minutesLeft = lot.minutesUntilClose(); + var minutesLeft = lot.minutesUntilClose(); if (minutesLeft < 30) { // send warning when within 5 minutes if (minutesLeft <= 5 && !lot.closingNotified) { @@ -665,7 +626,6 @@ public class TroostwijkScraper { System.err.println("Error during scheduled monitoring: " + e.getMessage()); } }, 0, 1, TimeUnit.HOURS); - return scheduler; } /** @@ -677,21 +637,21 @@ public class TroostwijkScraper { */ private void refreshLotBid(Lot lot) { try { - String url = LOT_API + "?batchSize=1&listType=7&offset=0&sortOption=0&saleID=" + lot.saleId - + "&parentID=0&relationID=0&buildversion=201807311&lotID=" + lot.lotId; - HttpRequest request = HttpRequest.newBuilder().uri(URI.create(url)).GET().build(); - HttpResponse response = httpClient.send(request, HttpResponse.BodyHandlers.ofString()); + var url = LOT_API + "?batchSize=1&listType=7&offset=0&sortOption=0&saleID=" + lot.saleId + + "&parentID=0&relationID=0&buildversion=201807311&lotID=" + lot.lotId; + var request = HttpRequest.newBuilder().uri(URI.create(url)).GET().build(); + var response = httpClient.send(request, HttpResponse.BodyHandlers.ofString()); if (response.statusCode() != 200) return; - JsonNode root = objectMapper.readTree(response.body()); - JsonNode results = root.path("results"); + var root = objectMapper.readTree(response.body()); + var results = root.path("results"); if (results.isArray() && !results.isEmpty()) { - JsonNode node = results.get(0); - double newBid = node.path("cb").asDouble(); + var node = results.get(0); + var newBid = node.path("cb").asDouble(); if (Double.compare(newBid, lot.currentBid) > 0) { - double previous = lot.currentBid; + var previous = lot.currentBid; lot.currentBid = newBid; db.updateLotCurrentBid(lot); - String msg = String.format("Nieuw bod op kavel %d: €%.2f (was €%.2f)", lot.lotId, newBid, previous); + var msg = String.format("Nieuw bod op kavel %d: €%.2f (was €%.2f)", lot.lotId, newBid, previous); notifier.sendNotification(msg, "Kavel bieding update", 0); } } @@ -700,78 +660,20 @@ public class TroostwijkScraper { } } - /** - * Entry point. Configure database location, notification settings, and - * YOLO model paths here before running. Once started the scraper - * discovers Dutch auctions, scrapes lots, and begins monitoring. - */ - public static void main(String[] args) throws Exception { - System.out.println("=== Troostwijk Auction Scraper ===\n"); - - // Configuration parameters (replace with your own values) - String databaseFile = "troostwijk.db"; - - // Notification configuration - choose one: - // Option 1: Desktop notifications only (free, no setup required) - String notificationConfig = System.getenv().getOrDefault("NOTIFICATION_CONFIG", "desktop"); - - // Option 2: Desktop + Email via Gmail (free, requires Gmail app password) - // Format: "smtp:username:appPassword:toEmail" - // Example: "smtp:your.email@gmail.com:abcd1234efgh5678:recipient@example.com" - // Get app password: Google Account > Security > 2-Step Verification > App passwords - - // YOLO model paths (optional - scraper works without object detection) - String yoloCfg = "models/yolov4.cfg"; - String yoloWeights = "models/yolov4.weights"; - String yoloClasses = "models/coco.names"; - - // Load native OpenCV library - System.loadLibrary(Core.NATIVE_LIBRARY_NAME); - - System.out.println("Initializing scraper..."); - TroostwijkScraper scraper = new TroostwijkScraper(databaseFile, notificationConfig, "", - yoloCfg, yoloWeights, yoloClasses); - - // Step 1: Discover auctions in NL - System.out.println("\n[1/3] Discovering Dutch auctions..."); - List auctions = scraper.discoverDutchAuctions(); - System.out.println("✓ Found " + auctions.size() + " auctions: " + auctions); - - // Step 2: Fetch lots for each auction - System.out.println("\n[2/3] Fetching lot details..."); - int totalAuctions = auctions.size(); - int currentAuction = 0; - for (int saleId : auctions) { - currentAuction++; - System.out.println(" [Page " + currentAuction + "] Fetching auctions..."); - System.out.println(" [" + currentAuction + "/" + totalAuctions + "] Processing sale " + saleId + "..."); - scraper.fetchLotsForSale(saleId); - } - - // Show database summary - System.out.println("\n📊 Database Summary:"); - scraper.printDatabaseStats(); - - // Step 3: Start monitoring bids and closures - System.out.println("\n[3/3] Starting monitoring service..."); - scraper.scheduleMonitoring(); - System.out.println("✓ Monitoring active. Press Ctrl+C to stop.\n"); - } - /** * Prints statistics about the data in the database. */ - private void printDatabaseStats() { + public void printDatabaseStats() { try { - List allLots = db.getAllLots(); - int imageCount = db.getImageCount(); + var allLots = db.getAllLots(); + var imageCount = db.getImageCount(); - System.out.println(" Total lots in database: " + allLots.size()); - System.out.println(" Total images downloaded: " + imageCount); + IO.println(" Total lots in database: " + allLots.size()); + IO.println(" Total images downloaded: " + imageCount); if (!allLots.isEmpty()) { - double totalBids = allLots.stream().mapToDouble(l -> l.currentBid).sum(); - System.out.println(" Total current bids: €" + String.format("%.2f", totalBids)); + var totalBids = allLots.stream().mapToDouble(l -> l.currentBid).sum(); + IO.println(" Total current bids: €" + String.format("%.2f", totalBids)); } } catch (SQLException e) { System.err.println(" ⚠️ Could not retrieve database stats: " + e.getMessage()); @@ -782,610 +684,4 @@ public class TroostwijkScraper { // Domain classes and services // ---------------------------------------------------------------------- - /** - * Represents auction metadata (veiling informatie) - */ - public static class AuctionInfo { - public int auctionId; // Unique auction ID (from URL) - public String title; // Auction title - public String location; // Location (e.g., "Amsterdam, NL") - public String city; // City name - public String country; // Country code (e.g., "NL") - public String url; // Full auction URL - public String type; // Auction type (A1 or A7) - public int lotCount; // Number of lots/kavels - public LocalDateTime closingTime; // Closing time if available - - @Override - public String toString() { - return String.format("Auction{id=%d, type=%s, title='%s', location='%s', lots=%d, url='%s'}", - auctionId, type, title, location, lotCount, url); - } - } - - /** - * Simple POJO representing a lot (kavel) in an auction. It keeps track - * of the sale it belongs to, current bid and closing time. The method - * minutesUntilClose computes how many minutes remain until the lot closes. - */ - static class Lot { - - int saleId; - int lotId; - String title; - String description; - String manufacturer; - String type; - int year; - String category; - double currentBid; - String currency; - String url; - LocalDateTime closingTime; // null if unknown - boolean closingNotified; - - long minutesUntilClose() { - if (closingTime == null) return Long.MAX_VALUE; - return java.time.Duration.between(LocalDateTime.now(), closingTime).toMinutes(); - } - } - - /** - * Service for persisting auctions, lots, images, and object labels into - * a SQLite database. Uses the Xerial JDBC driver which connects to - * SQLite via a URL of the form "jdbc:sqlite:path_to_file"【329850066306528†L40-L63】. - */ - static public class DatabaseService { - - private final String url; - DatabaseService(String dbPath) { - this.url = "jdbc:sqlite:" + dbPath; - } - /** - * Creates tables if they do not already exist. The schema includes - * tables for auctions, lots, images, and object labels. This method is - * idempotent; it can be called multiple times. - */ - void ensureSchema() throws SQLException { - try (Connection conn = DriverManager.getConnection(url); Statement stmt = conn.createStatement()) { - // Auctions table (veilingen) - stmt.execute("CREATE TABLE IF NOT EXISTS auctions (" - + "auction_id INTEGER PRIMARY KEY," - + "title TEXT NOT NULL," - + "location TEXT," - + "city TEXT," - + "country TEXT," - + "url TEXT NOT NULL," - + "type TEXT," - + "lot_count INTEGER DEFAULT 0," - + "closing_time TEXT," - + "discovered_at INTEGER" // Unix timestamp - + ")"); - - // Sales table (legacy - keep for compatibility) - stmt.execute("CREATE TABLE IF NOT EXISTS sales (" - + "sale_id INTEGER PRIMARY KEY," - + "title TEXT," - + "location TEXT," - + "closing_time TEXT" - + ")"); - - // Lots table - stmt.execute("CREATE TABLE IF NOT EXISTS lots (" - + "lot_id INTEGER PRIMARY KEY," - + "sale_id INTEGER," - + "title TEXT," - + "description TEXT," - + "manufacturer TEXT," - + "type TEXT," - + "year INTEGER," - + "category TEXT," - + "current_bid REAL," - + "currency TEXT," - + "url TEXT," - + "closing_time TEXT," - + "closing_notified INTEGER DEFAULT 0," - + "FOREIGN KEY (sale_id) REFERENCES auctions(auction_id)" - + ")"); - - // Images table - stmt.execute("CREATE TABLE IF NOT EXISTS images (" - + "id INTEGER PRIMARY KEY AUTOINCREMENT," - + "lot_id INTEGER," - + "url TEXT," - + "file_path TEXT," - + "labels TEXT," - + "FOREIGN KEY (lot_id) REFERENCES lots(lot_id)" - + ")"); - - // Create indexes for better query performance - stmt.execute("CREATE INDEX IF NOT EXISTS idx_auctions_country ON auctions(country)"); - stmt.execute("CREATE INDEX IF NOT EXISTS idx_lots_sale_id ON lots(sale_id)"); - } - } - - /** - * Inserts or updates an auction record - */ - synchronized void upsertAuction(AuctionInfo auction) throws SQLException { - String sql = "INSERT INTO auctions (auction_id, title, location, city, country, url, type, lot_count, closing_time, discovered_at)" - + " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" - + " ON CONFLICT(auction_id) DO UPDATE SET " - + "title = excluded.title, location = excluded.location, city = excluded.city, " - + "country = excluded.country, url = excluded.url, type = excluded.type, " - + "lot_count = excluded.lot_count, closing_time = excluded.closing_time"; - - try (Connection conn = DriverManager.getConnection(url); PreparedStatement ps = conn.prepareStatement(sql)) { - ps.setInt(1, auction.auctionId); - ps.setString(2, auction.title); - ps.setString(3, auction.location); - ps.setString(4, auction.city); - ps.setString(5, auction.country); - ps.setString(6, auction.url); - ps.setString(7, auction.type); - ps.setInt(8, auction.lotCount); - ps.setString(9, auction.closingTime != null ? auction.closingTime.toString() : null); - ps.setLong(10, Instant.now().getEpochSecond()); - ps.executeUpdate(); - } - } - - /** - * Retrieves all auctions from the database - */ - synchronized List getAllAuctions() throws SQLException { - List auctions = new ArrayList<>(); - String sql = "SELECT auction_id, title, location, city, country, url, type, lot_count, closing_time FROM auctions"; - - try (Connection conn = DriverManager.getConnection(url); Statement stmt = conn.createStatement()) { - ResultSet rs = stmt.executeQuery(sql); - while (rs.next()) { - AuctionInfo auction = new AuctionInfo(); - auction.auctionId = rs.getInt("auction_id"); - auction.title = rs.getString("title"); - auction.location = rs.getString("location"); - auction.city = rs.getString("city"); - auction.country = rs.getString("country"); - auction.url = rs.getString("url"); - auction.type = rs.getString("type"); - auction.lotCount = rs.getInt("lot_count"); - String closing = rs.getString("closing_time"); - if (closing != null) { - auction.closingTime = LocalDateTime.parse(closing); - } - auctions.add(auction); - } - } - return auctions; - } - - /** - * Retrieves auctions by country code - */ - synchronized List getAuctionsByCountry(String countryCode) throws SQLException { - List auctions = new ArrayList<>(); - String sql = "SELECT auction_id, title, location, city, country, url, type, lot_count, closing_time " - + "FROM auctions WHERE country = ?"; - - try (Connection conn = DriverManager.getConnection(url); PreparedStatement ps = conn.prepareStatement(sql)) { - ps.setString(1, countryCode); - ResultSet rs = ps.executeQuery(); - while (rs.next()) { - AuctionInfo auction = new AuctionInfo(); - auction.auctionId = rs.getInt("auction_id"); - auction.title = rs.getString("title"); - auction.location = rs.getString("location"); - auction.city = rs.getString("city"); - auction.country = rs.getString("country"); - auction.url = rs.getString("url"); - auction.type = rs.getString("type"); - auction.lotCount = rs.getInt("lot_count"); - String closing = rs.getString("closing_time"); - if (closing != null) { - auction.closingTime = LocalDateTime.parse(closing); - } - auctions.add(auction); - } - } - return auctions; - } - - /** - * Inserts or updates a lot record. Uses INSERT OR REPLACE to - * implement upsert semantics so that existing rows are replaced. - */ - synchronized void upsertLot(Lot lot) throws SQLException { - String sql = "INSERT INTO lots (lot_id, sale_id, title, description, manufacturer, type, year, category, current_bid, currency, url, closing_time, closing_notified)" - + " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" - + " ON CONFLICT(lot_id) DO UPDATE SET " - + "sale_id = excluded.sale_id, title = excluded.title, description = excluded.description, " - + "manufacturer = excluded.manufacturer, type = excluded.type, year = excluded.year, category = excluded.category, " - + "current_bid = excluded.current_bid, currency = excluded.currency, url = excluded.url, closing_time = excluded.closing_time"; - try (Connection conn = DriverManager.getConnection(url); PreparedStatement ps = conn.prepareStatement(sql)) { - ps.setInt(1, lot.lotId); - ps.setInt(2, lot.saleId); - ps.setString(3, lot.title); - ps.setString(4, lot.description); - ps.setString(5, lot.manufacturer); - ps.setString(6, lot.type); - ps.setInt(7, lot.year); - ps.setString(8, lot.category); - ps.setDouble(9, lot.currentBid); - ps.setString(10, lot.currency); - ps.setString(11, lot.url); - ps.setString(12, lot.closingTime != null ? lot.closingTime.toString() : null); - ps.setInt(13, lot.closingNotified ? 1 : 0); - ps.executeUpdate(); - } - } - - /** - * Inserts a new image record. Each image is associated with a lot and - * stores both the original URL and the local file path. Detected - * labels are stored as a comma separated string. - */ - synchronized void insertImage(int lotId, String url, String filePath, List labels) throws SQLException { - String sql = "INSERT INTO images (lot_id, url, file_path, labels) VALUES (?, ?, ?, ?)"; - try (Connection conn = DriverManager.getConnection(this.url); PreparedStatement ps = conn.prepareStatement(sql)) { - ps.setInt(1, lotId); - ps.setString(2, url); - ps.setString(3, filePath); - ps.setString(4, String.join(",", labels)); - ps.executeUpdate(); - } - } - - /** - * Retrieves all lots that are still active (i.e., have a closing time - * in the future or unknown). Only these lots need to be monitored. - */ - synchronized List getActiveLots() throws SQLException { - List list = new ArrayList<>(); - String sql = "SELECT lot_id, sale_id, current_bid, currency, closing_time, closing_notified FROM lots"; - try (Connection conn = DriverManager.getConnection(url); Statement stmt = conn.createStatement()) { - ResultSet rs = stmt.executeQuery(sql); - while (rs.next()) { - Lot lot = new Lot(); - lot.lotId = rs.getInt("lot_id"); - lot.saleId = rs.getInt("sale_id"); - lot.currentBid = rs.getDouble("current_bid"); - lot.currency = rs.getString("currency"); - String closing = rs.getString("closing_time"); - lot.closingNotified = rs.getInt("closing_notified") != 0; - if (closing != null) { - lot.closingTime = LocalDateTime.parse(closing); - } - list.add(lot); - } - } - return list; - } - - /** - * Retrieves all lots from the database. - */ - synchronized List getAllLots() throws SQLException { - List list = new ArrayList<>(); - String sql = "SELECT lot_id, sale_id, title, current_bid, currency FROM lots"; - try (Connection conn = DriverManager.getConnection(url); Statement stmt = conn.createStatement()) { - ResultSet rs = stmt.executeQuery(sql); - while (rs.next()) { - Lot lot = new Lot(); - lot.lotId = rs.getInt("lot_id"); - lot.saleId = rs.getInt("sale_id"); - lot.title = rs.getString("title"); - lot.currentBid = rs.getDouble("current_bid"); - lot.currency = rs.getString("currency"); - list.add(lot); - } - } - return list; - } - - /** - * Gets the total number of images in the database. - */ - synchronized int getImageCount() throws SQLException { - String sql = "SELECT COUNT(*) as count FROM images"; - try (Connection conn = DriverManager.getConnection(url); Statement stmt = conn.createStatement()) { - ResultSet rs = stmt.executeQuery(sql); - if (rs.next()) { - return rs.getInt("count"); - } - } - return 0; - } - - /** - * Updates the current bid of a lot after a bid refresh. - */ - synchronized void updateLotCurrentBid(Lot lot) throws SQLException { - try (Connection conn = DriverManager.getConnection(url); PreparedStatement ps = conn.prepareStatement( - "UPDATE lots SET current_bid = ? WHERE lot_id = ?")) { - ps.setDouble(1, lot.currentBid); - ps.setInt(2, lot.lotId); - ps.executeUpdate(); - } - } - - /** - * Updates the closingNotified flag of a lot (set to 1 when we have - * warned the user about its imminent closure). - */ - synchronized void updateLotNotificationFlags(Lot lot) throws SQLException { - try (Connection conn = DriverManager.getConnection(url); PreparedStatement ps = conn.prepareStatement( - "UPDATE lots SET closing_notified = ? WHERE lot_id = ?")) { - ps.setInt(1, lot.closingNotified ? 1 : 0); - ps.setInt(2, lot.lotId); - ps.executeUpdate(); - } - } - } - - /** - * Service for sending notifications via desktop notifications and/or email. - * Supports free notification methods: - * 1. Desktop notifications (Windows/Linux/macOS system tray) - * 2. Email via Gmail SMTP (free, requires app password) - * - * Configuration: - * - For email: Set notificationEmail to your Gmail address - * - Enable 2FA in Gmail and create an App Password - * - Use format "smtp:username:appPassword:toEmail" for credentials - * - Or use "desktop" for desktop-only notifications - */ - static class NotificationService { - - private final boolean useDesktop; - private final boolean useEmail; - private final String smtpUsername; - private final String smtpPassword; - private final String toEmail; - - /** - * Creates a notification service. - * - * @param config "desktop" for desktop only, or "smtp:username:password:toEmail" for email - * @param unusedParam Kept for compatibility (can pass empty string) - */ - NotificationService(String config, String unusedParam) { - - if ("desktop".equalsIgnoreCase(config)) { - this.useDesktop = true; - this.useEmail = false; - this.smtpUsername = null; - this.smtpPassword = null; - this.toEmail = null; - } else if (config.startsWith("smtp:")) { - String[] parts = config.split(":", 4); - if (parts.length != 4) { - throw new IllegalArgumentException("Email config must be 'smtp:username:password:toEmail'"); - } - this.useDesktop = true; // Always include desktop - this.useEmail = true; - this.smtpUsername = parts[1]; - this.smtpPassword = parts[2]; - this.toEmail = parts[3]; - } else { - throw new IllegalArgumentException("Config must be 'desktop' or 'smtp:username:password:toEmail'"); - } - } - - /** - * Sends notification via configured channels. - * - * @param message The message body - * @param title Message title - * @param priority Priority level (0=normal, 1=high) - */ - void sendNotification(String message, String title, int priority) { - if (useDesktop) { - sendDesktopNotification(title, message, priority); - } - if (useEmail) { - sendEmailNotification(title, message, priority); - } - } - - /** - * Sends a desktop notification using system tray. - * Works on Windows, macOS, and Linux with desktop environments. - */ - private void sendDesktopNotification(String title, String message, int priority) { - try { - if (java.awt.SystemTray.isSupported()) { - java.awt.SystemTray tray = java.awt.SystemTray.getSystemTray(); - java.awt.Image image = java.awt.Toolkit.getDefaultToolkit() - .createImage(new byte[0]); // Empty image - - java.awt.TrayIcon trayIcon = new java.awt.TrayIcon(image, "Troostwijk Scraper"); - trayIcon.setImageAutoSize(true); - - java.awt.TrayIcon.MessageType messageType = priority > 0 - ? java.awt.TrayIcon.MessageType.WARNING - : java.awt.TrayIcon.MessageType.INFO; - - tray.add(trayIcon); - trayIcon.displayMessage(title, message, messageType); - - // Remove icon after 2 seconds to avoid clutter - Thread.sleep(2000); - tray.remove(trayIcon); - - System.out.println("Desktop notification sent: " + title); - } else { - System.out.println("Desktop notifications not supported, logging: " + title + " - " + message); - } - } catch (Exception e) { - System.err.println("Desktop notification failed: " + e.getMessage()); - } - } - - /** - * Sends email notification via Gmail SMTP (free). - * Uses Gmail's SMTP server with app password authentication. - */ - private void sendEmailNotification(String title, String message, int priority) { - try { - java.util.Properties props = new java.util.Properties(); - props.put("mail.smtp.auth", "true"); - props.put("mail.smtp.starttls.enable", "true"); - props.put("mail.smtp.host", "smtp.gmail.com"); - props.put("mail.smtp.port", "587"); - props.put("mail.smtp.ssl.trust", "smtp.gmail.com"); - - javax.mail.Session session = javax.mail.Session.getInstance(props, - new javax.mail.Authenticator() { - - protected javax.mail.PasswordAuthentication getPasswordAuthentication() { - return new javax.mail.PasswordAuthentication(smtpUsername, smtpPassword); - } - }); - - javax.mail.Message msg = new javax.mail.internet.MimeMessage(session); - msg.setFrom(new javax.mail.internet.InternetAddress(smtpUsername)); - msg.setRecipients(javax.mail.Message.RecipientType.TO, - javax.mail.internet.InternetAddress.parse(toEmail)); - msg.setSubject("[Troostwijk] " + title); - msg.setText(message); - msg.setSentDate(new java.util.Date()); - - if (priority > 0) { - msg.setHeader("X-Priority", "1"); - msg.setHeader("Importance", "High"); - } - - javax.mail.Transport.send(msg); - System.out.println("Email notification sent: " + title); - - } catch (Exception e) { - System.err.println("Email notification failed: " + e.getMessage()); - } - } - } - - /** - * Service for performing object detection on images using OpenCV's DNN - * module. The DNN module can load pre‑trained models from several - * frameworks (Darknet, TensorFlow, ONNX, etc.)【784097309529506†L209-L233】. Here - * we load a YOLO model (Darknet) by specifying the configuration and - * weights files. For each image we run a forward pass and return a - * list of detected class labels. - * - * If model files are not found, the service operates in disabled mode - * and returns empty lists. - */ - static class ObjectDetectionService { - - private final Net net; - private final List classNames; - private final boolean enabled; - - ObjectDetectionService(String cfgPath, String weightsPath, String classNamesPath) throws IOException { - // Check if model files exist - Path cfgFile = Paths.get(cfgPath); - Path weightsFile = Paths.get(weightsPath); - Path classNamesFile = Paths.get(classNamesPath); - - if (!Files.exists(cfgFile) || !Files.exists(weightsFile) || !Files.exists(classNamesFile)) { - System.out.println("⚠️ Object detection disabled: YOLO model files not found"); - System.out.println(" Expected files:"); - System.out.println(" - " + cfgPath); - System.out.println(" - " + weightsPath); - System.out.println(" - " + classNamesPath); - System.out.println(" Scraper will continue without image analysis."); - this.enabled = false; - this.net = null; - this.classNames = new ArrayList<>(); - return; - } - - try { - // Load network - this.net = Dnn.readNetFromDarknet(cfgPath, weightsPath); - this.net.setPreferableBackend(DNN_BACKEND_OPENCV); - this.net.setPreferableTarget(DNN_TARGET_CPU); - // Load class names (one per line) - this.classNames = Files.readAllLines(classNamesFile); - this.enabled = true; - System.out.println("✓ Object detection enabled with YOLO"); - } catch (Exception e) { - System.err.println("⚠️ Object detection disabled: " + e.getMessage()); - throw new IOException("Failed to initialize object detection", e); - } - } - /** - * Detects objects in the given image file and returns a list of - * human‑readable labels. Only detections above a confidence - * threshold are returned. For brevity this method omits drawing - * bounding boxes. See the OpenCV DNN documentation for details on - * post‑processing【784097309529506†L324-L344】. - * - * @param imagePath absolute path to the image - * @return list of detected class names (empty if detection disabled) - */ - List detectObjects(String imagePath) { - if (!enabled) { - return new ArrayList<>(); - } - - List labels = new ArrayList<>(); - Mat image = Imgcodecs.imread(imagePath); - if (image.empty()) return labels; - // Create a 4D blob from the image - Mat blob = Dnn.blobFromImage(image, 1.0 / 255.0, new Size(416, 416), new Scalar(0, 0, 0), true, false); - net.setInput(blob); - List outs = new ArrayList<>(); - List outNames = getOutputLayerNames(net); - net.forward(outs, outNames); - // Post‑process: for each detection compute score and choose class - float confThreshold = 0.5f; - for (Mat out : outs) { - for (int i = 0; i < out.rows(); i++) { - double[] data = out.get(i, 0); - if (data == null) continue; - // The first 5 numbers are bounding box, then class scores - double[] scores = new double[classNames.size()]; - System.arraycopy(data, 5, scores, 0, scores.length); - int classId = argMax(scores); - double confidence = scores[classId]; - if (confidence > confThreshold) { - String label = classNames.get(classId); - if (!labels.contains(label)) { - labels.add(label); - } - } - } - } - return labels; - } - /** - * Returns the indexes of the output layers in the network. YOLO - * automatically discovers its output layers; other models may require - * manually specifying them【784097309529506†L356-L365】. - */ - private List getOutputLayerNames(Net net) { - List names = new ArrayList<>(); - List outLayers = net.getUnconnectedOutLayers().toList(); - List layersNames = net.getLayerNames(); - for (Integer i : outLayers) { - names.add(layersNames.get(i - 1)); - } - return names; - } - /** - * Returns the index of the maximum value in the array. - */ - private int argMax(double[] array) { - int best = 0; - double max = array[0]; - for (int i = 1; i < array.length; i++) { - if (array[i] > max) { - max = array[i]; - best = i; - } - } - return best; - } - } } \ No newline at end of file diff --git a/src/test/java/com/auction/AuctionParsingTest.java b/src/test/java/com/auction/AuctionParsingTest.java index 2464f38..370d06c 100644 --- a/src/test/java/com/auction/AuctionParsingTest.java +++ b/src/test/java/com/auction/AuctionParsingTest.java @@ -41,8 +41,8 @@ public class AuctionParsingTest { System.out.println("\n=== Auction Parsing Test ==="); System.out.println("Found " + auctionLinks.size() + " auction links"); - List auctions = new ArrayList<>(); - int count = 0; + List auctions = new ArrayList<>(); + int count = 0; for (Element link : auctionLinks) { String href = link.attr("href"); @@ -59,7 +59,7 @@ public class AuctionParsingTest { int auctionId = Integer.parseInt(matcher.group(2)); // Extract auction info using IMPROVED text-based method - TroostwijkScraper.AuctionInfo auction = extractAuctionInfoFromText(link, href, auctionId, "A" + typeNum); + AuctionInfo auction = extractAuctionInfoFromText(link, href, auctionId, "A" + typeNum); auctions.add(auction); // Print the first 10 auctions for verification @@ -101,7 +101,7 @@ public class AuctionParsingTest { assertTrue(auctions.size() > 0, "Should find at least one auction"); // Verify all auctions have basic info - for (TroostwijkScraper.AuctionInfo auction : auctions) { + for (AuctionInfo auction : auctions) { assertNotNull(auction.title, "Title should not be null for auction " + auction.auctionId); assertTrue(auction.title.length() > 0, "Title should not be empty for auction " + auction.auctionId); assertNotNull(auction.url, "URL should not be null for auction " + auction.auctionId); @@ -119,8 +119,8 @@ public class AuctionParsingTest { * Expected format: "[day] om [time] [lot_count] [title] [city], [CC]" * Example: "woensdag om 18:00 1 Vrachtwagens voor bedrijfsvoertuigen Loßburg, DE" */ - private TroostwijkScraper.AuctionInfo extractAuctionInfoFromText(Element link, String href, int auctionId, String type) { - TroostwijkScraper.AuctionInfo auction = new TroostwijkScraper.AuctionInfo(); + private AuctionInfo extractAuctionInfoFromText(Element link, String href, int auctionId, String type) { + AuctionInfo auction = new AuctionInfo(); auction.auctionId = auctionId; auction.type = type; auction.url = "https://www.troostwijkauctions.com" + href; diff --git a/src/test/java/com/auction/TroostwijkScraperTest.java b/src/test/java/com/auction/TroostwijkScraperTest.java index 0b41454..1f4d226 100644 --- a/src/test/java/com/auction/TroostwijkScraperTest.java +++ b/src/test/java/com/auction/TroostwijkScraperTest.java @@ -68,71 +68,18 @@ public class TroostwijkScraperTest { } - @Test - public void testFetchAndPersistAuctionData() throws SQLException { - // First, discover auctions - List auctions = scraper.discoverDutchAuctions(); - assertFalse(auctions.isEmpty(), "Need at least one auction to test"); - - // Take the first auction and fetch its lots - Integer firstSaleId = auctions.getFirst(); - System.out.println("Testing with sale ID: " + firstSaleId); - - scraper.fetchLotsForSale(firstSaleId); - - // Verify data was persisted to database - List lotsInDb = scraper.db.getAllLots(); - - assertNotNull(lotsInDb, "Lots list should not be null"); - assertFalse(lotsInDb.isEmpty(), "Should have persisted at least one lot"); - - // Verify lot properties - for (TroostwijkScraper.Lot lot : lotsInDb) { - assertEquals(firstSaleId.intValue(), lot.saleId, "Lot should belong to the correct sale"); - assertTrue(lot.lotId > 0, "Lot ID should be positive"); - assertNotNull(lot.title, "Lot title should not be null"); - assertFalse(lot.title.isEmpty(), "Lot title should not be empty"); - assertNotNull(lot.url, "Lot URL should not be null"); - assertTrue(lot.url.startsWith("https://"), "Lot URL should be valid"); - assertTrue(lot.currentBid >= 0, "Current bid should be non-negative"); - } - - System.out.println("✓ Successfully persisted " + lotsInDb.size() + " lots to database"); - System.out.println("✓ All lot properties are valid"); - } - @Test public void testDatabaseSchema() throws SQLException { // Verify that the database schema was created correctly - List lots = scraper.db.getAllLots(); + List lots = scraper.db.getAllLots(); assertNotNull(lots, "Should be able to query lots table"); int imageCount = scraper.db.getImageCount(); assertTrue(imageCount >= 0, "Image count should be non-negative"); - List activeLots = scraper.db.getActiveLots(); + List activeLots = scraper.db.getActiveLots(); assertNotNull(activeLots, "Should be able to query active lots"); System.out.println("✓ Database schema is valid and queryable"); } - - @Test - public void testAuctionProperties() { - List auctions = scraper.discoverDutchAuctions(); - assertFalse(auctions.isEmpty(), "Should find auctions"); - - // Test that we can fetch data for multiple auctions - int auctionsToTest = Math.min(2, auctions.size()); - - for (int i = 0; i < auctionsToTest; i++) { - Integer saleId = auctions.get(i); - System.out.println("Testing auction " + (i + 1) + ": " + saleId); - - // This should not throw an exception - assertDoesNotThrow(() -> scraper.fetchLotsForSale(saleId), - "Should be able to fetch lots for sale " + saleId); - } - - System.out.println("✓ Successfully tested " + auctionsToTest + " auctions"); - } } diff --git a/src/test/resources/test.md b/src/test/resources/test.md index b0cee91..c6b0773 100644 --- a/src/test/resources/test.md +++ b/src/test/resources/test.md @@ -1,456 +1,61 @@ -## Woensdag 3 dec 25 - -* [ - - woensdag om 16:00 - - ![Industry & machinery](https://media.tbauctions.com/image-media/37f8e30d-7f4e-4af4-bb8a-029975b089cf/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/da3276a5-eb99-4a5d-ac3b-cc546b0a5f39/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/4d273787-cc80-4ac5-b89d-20525871426a/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/0b0b946d-26a6-486f-8c73-e9d0394e4e70/file?imageSize=1024x768 1024w) - - 145 - - Industrie & machines - - Meerdere locaties (45) - - - - - - ](/a/industrie-machines-A3-37358) -* [ - - woensdag om 16:00 - - ![D | Racing car transporters, crane polyp grabs and containers from inventory adjustment](https://media.tbauctions.com/image-media/b020da96-bb92-4e22-8a7b-2dd205dd5f7f/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/d3153444-7d59-45ac-8160-3cdaa7e1074e/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/be9f0e03-f585-47d0-abf0-3e802e21c8ad/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/658c88d7-0430-4454-b7fb-f48eeadeb401/file?imageSize=1024x768 1024w) - - 38 - - D | Raceautotransporters, kraan-polypengrepen en containers uit voorraadaanpassing - - Nieheim, DE - - - - - - ](/a/d-%7C-raceautotransporters-kraan-polypengrepen-en-containers-uit-voorraadaanpassing-A1-39772) -* [ - - woensdag om 16:00 - - ![Food Processing Equipment and Packaging Machinery](https://media.tbauctions.com/image-media/dd6b698d-79f5-40a4-ab74-2beca7a5341e/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/198b179c-0921-420f-9b14-ab430a957fb0/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/842f0a7d-cd05-4040-bb00-8691def7e9bc/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/b7855c86-f9e8-4346-9d0b-565d7cd126cc/file?imageSize=1024x768 1024w) - - 61 - - Voedselverwerkende apparatuur en verpakkingsmachines - - CHOMERAC, FR - - - - - - ](/a/voedselverwerkende-apparatuur-en-verpakkingsmachines-A1-39319) -* [ - - woensdag om 16:00 - - ![Agricultural & earthmoving machines](https://media.tbauctions.com/image-media/a11f32a9-5197-4486-8074-803b4da25227/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/dcf35ea2-0e1b-4d30-8cd7-1b534ecf7b0e/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/4857e24d-25b2-46f6-bde4-c0a5383508dd/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/7210f5e7-e124-40ab-b2db-c54e02af416a/file?imageSize=1024x768 1024w) - - 117 - - Landbouw- & grondverzetmachines - - Meerdere locaties (49) - - - - - - ](/a/landbouw-grondverzetmachines-A3-37375) -* [ - - woensdag om 17:00 - - ![Tools & equipment](https://media.tbauctions.com/image-media/24f212ce-e1f0-45a3-b095-c3944fd35340/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/1cc0cb65-68f6-4108-b51a-11cb1652f366/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/92d4629a-d259-43ad-806d-de99da15ea32/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/2653c057-5e28-46dc-aacc-08920f0500b4/file?imageSize=1024x768 1024w) - - 261 - - Gereedschappen & uitrusting - - Meerdere locaties (36), BE - - - - - - ](/a/gereedschappen-uitrusting-A3-37367) -* [ - - woensdag om 18:00 - - ![](https://media.tbauctions.com/image-media/13b74047-5372-493f-81dc-2d075c3bada1/file?imageSize=1024x768 1024w) - - 1 - - Vrachtwagens voor bedrijfsvoertuigen - - Loßburg, DE - - - - - - ](/a/vrachtwagens-voor-bedrijfsvoertuigen-A7-39531) -* [ - - woensdag om 19:00 - - ![White goods and accessories](https://media.tbauctions.com/image-media/1abf2d9b-7596-45e3-93b3-b503397eba0e/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/8cb4c2f1-f298-4e20-b221-ec828cca717c/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/d588cf94-3f82-4785-ace4-a8014a1859fd/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/9036f5bf-7fc7-43a3-ae8a-41c1376f60ec/file?imageSize=1024x768 1024w) - - 61 - - Witgoed en accessoires - - Etten-Leur, NL - - - - - - ](/a/witgoed-en-accessoires-A1-27241) -* [ - - Opent 28 nov 17:00 - - ![](https://media.tbauctions.com/image-media/4f41caa7-865a-4fe7-9cd5-305bd2e455f6/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/1cdf88f5-67d4-4932-96a4-449ad17ba51d/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/c47fee25-0326-40e1-b1db-3190a19ecb1a/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/a3e7c080-2dcf-45aa-99ab-2072697b4f54/file?imageSize=1024x768 1024w) - - 54 - - Collectie Rolex en Cartier horloges - - Dordrecht, NL - - - - - - ](/a/collectie-rolex-en-cartier-horloges-A1-39398) -* [ - - woensdag om 19:00 - - ![Kitchens and sanitary facilities](https://media.tbauctions.com/image-media/6379bf2c-aed2-4fbe-8fce-367c9d200141/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/16e97117-3cc2-4ced-bfe1-4f24e0b8d784/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/0002b9da-1ba5-429c-b377-eaddc3714e37/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/d3186016-0d7b-433a-b3e8-903fd85ec929/file?imageSize=1024x768 1024w) - - 254 - - SHOWROOMKEUKENS en INBOUWAPPARATUUR - - Tilburg, NL - - - - - - ](/a/showroomkeukens-en-inbouwapparatuur-A1-39480) -* [ - - woensdag om 19:00 - - ![](https://media.tbauctions.com/image-media/e6b76a75-9994-46b0-a2fa-c9a321f62980/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/ebef8850-5265-446a-ab32-5f2cd0d2bf88/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/f82e2b71-f908-472e-8019-9b2b15b0cb17/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/d92ba732-28b0-462f-bab5-ce82540d1c81/file?imageSize=1024x768 1024w) - - 499 - - Machines, retourgoederen en restpartijen - - Harlingen, NL - - - - - - ](/a/machines-retourgoederen-en-restpartijen-A1-39642) -* [ - - woensdag om 19:00 - - ![Lots of tools, office inventory, retail goods, decoration and olive trees](https://media.tbauctions.com/image-media/f949084a-50d5-4182-acfb-5d21ac54e471/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/96ef91fa-3927-49d4-af9e-ef709581ac51/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/2917a150-c99a-439f-8767-68ece5214800/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/328e4999-4a75-4f29-bd95-66b1de4e46e8/file?imageSize=1024x768 1024w) - - 120 - - Partijen gereedschap, kantoorinventaris, detailhandelgoederen, decoratie en olijfbomen - - Meerdere locaties (3), NL - - - - - - ](/a/partijen-gereedschap-kantoorinventaris-detailhandelgoederen-decoratie-en-olijfbomen-A1-27016) -* [ - - woensdag om 19:00 - - ![Bankruptcy vehicles](https://media.tbauctions.com/image-media/82880ed5-30f0-4055-99bd-a2cf07fad2ef/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/64312888-1b04-4f75-a0f1-386752232172/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/f4268e70-82fb-4ea1-ab92-c23dff650909/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/0c52cb25-fe99-4848-961b-a8a85c7a66d4/file?imageSize=1024x768 1024w) - - 16 - - Faillissementsvoertuigen - - Meerdere locaties (3), NL - - - - - - ](/a/faillissementsvoertuigen-A1-38368) -* [ - - woensdag om 19:00 - - ![](https://media.tbauctions.com/image-media/41066c8c-7806-43ee-beef-918c43e18cc7/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/a2543ed7-409e-449f-aa42-c68721432fdf/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/17dee71a-201f-40c5-9e8d-f71f807b27a9/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/0c8f8bb7-60fc-4c3f-af17-1080f489a8c2/file?imageSize=1024x768 1024w) - - 78 - - Personenauto’s, oldtimers, campers en brommobielen - - Buitenpost, NL - - - - - - ](/a/personenauto%E2%80%99s-oldtimers-campers-en-brommobielen-A1-39508) -* [ - - woensdag om 19:00 - - ![](https://media.tbauctions.com/image-media/278c35b9-3e5a-42eb-a09b-1bfdf48c60b2/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/2ced3007-a441-4752-84fc-eb3e2ceba7a2/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/36ed78ee-32f2-4824-b8eb-fd38f3d87e27/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/caa71786-c172-491f-bd78-787946e35480/file?imageSize=1024x768 1024w) - - 391 - - Bezorgveiling Faillissement Dvize B.V. – Hyundai Power Products gereedschappen - - Meerdere locaties (2) - - - - - - ](/a/bezorgveiling-faillissement-dvize-b-v-%E2%80%93-hyundai-power-products-gereedschappen-A1-39409) -* [ - - woensdag om 19:00 - - ![](https://media.tbauctions.com/image-media/37df514f-357a-43aa-9a5a-8fedefb7068f/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/37df514f-357a-43aa-9a5a-8fedefb7068f/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/37df514f-357a-43aa-9a5a-8fedefb7068f/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/4b8b6f24-4ca1-4141-a3e3-254caba98284/file?imageSize=1024x768 1024w) - - 208 - - Kunstplanten en bomen, composiet gevel- en vloerbekleding en akoestische materialen - - De Lier, NL - - - - - - ](/a/kunstplanten-en-bomen-composiet-gevel-en-vloerbekleding-en-akoestische-materialen-A1-28707) -* [ - - woensdag om 19:00 - - ![Metalworking machines, tools and stock in connection with company relocation](https://media.tbauctions.com/image-media/d5232c84-b313-48dc-973d-b82e762f000e/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/997e6803-7667-49ce-8a2c-b62939ee0aa7/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/40cec190-c669-4d80-b9fa-99039e6804ea/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/77371ea9-2869-4df6-97b4-145d76a34615/file?imageSize=1024x768 1024w) - - 181 - - Metaalbewerkingsmachines, gereedschap en voorraad in verband met bedrijfsverhuizing - - Cuijk, NL - - - - - - ](/a/metaalbewerkingsmachines-gereedschap-en-voorraad-in-verband-met-bedrijfsverhuizing-A1-39360) -* [ - - woensdag om 19:00 - - ![](https://media.tbauctions.com/image-media/2faa2b06-09c0-49ec-ba31-b8d24e15263a/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/485930ea-fbd4-4fc0-adeb-53f9e4f9b89e/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/fdb0a385-b27e-4aa2-b0c9-aca59d7c5f96/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/a268ff31-f1f1-421c-80e9-97cfd0fcfa7b/file?imageSize=1024x768 1024w) - - 238 - - Overstock en magazijnopruiming - - Heesch, NL - - - - - - ](/a/overstock-en-magazijnopruiming-A1-39538) -* [ - - woensdag om 19:00 - - ![Collector's Auction Scooters & Motorcycles](https://media.tbauctions.com/image-media/8464e9fc-b60a-4081-b898-d98328c8d1dd/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/a953420a-4d1d-4e80-9c53-be7bb85106c5/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/7f0198da-b213-46c0-8a0b-6a382ca1b029/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/7b11eee5-efe7-4fbd-8529-b1d50bd4db2e/file?imageSize=1024x768 1024w) - - 47 - - Verzamelveiling Scooters en Motoren - - Meerdere locaties (2), NL - - - - - - ](/a/verzamelveiling-scooters-en-motoren-A1-28428) -* [ - - woensdag om 19:00 - - ![Cars & transport](https://media.tbauctions.com/image-media/28be6ce7-6987-48ed-8758-622ab308ca2a/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/5be09c82-6f9a-41b8-b0dd-2d5a43327cb4/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/8fe5b954-b16c-4e72-a4b9-be6b345d5a82/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/183e08ae-4769-4a9d-a7db-2d07ab487781/file?imageSize=1024x768 1024w) - - 338 - - Auto's & transport - - Meerdere locaties (109) - - - - - - ](/a/auto%27s-transport-A3-37349) -* [ - - woensdag om 19:30 - - ![](https://media.tbauctions.com/image-media/f1401ff5-4e5d-41e5-b4b2-6771fd7aad83/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/d788438c-5a47-4eeb-aced-4777c5bb4701/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/f888c72a-8756-4b83-994d-a4bb6a08eb05/file?imageSize=1024x768 1024w) - - ![](https://media.tbauctions.com/image-media/9566453d-2c19-431c-b5c6-521cfdc01594/file?imageSize=1024x768 1024w) - - 74 - - Gouden juwelen en diamanten - - Meerdere locaties (28) - - - - - - ](/a/gouden-juwelen-en-diamanten-A1-29562) \ No newline at end of file +Configure your devices to use the Pi-hole as their DNS server │ +│ using: │ +│ │ +│ IPv4: 192.168.1.159 │ +│ IPv6: fdc5:59a6:9ac1:f11f:2c86:25d3:6282:37ef │ +│ If you have not done so already, the above IP should be set to │ +│ static. │ +│ View the web interface at http://pi.hole:80/admin or │ +│ http://192.168.1.159:80/admin │ +│ │ +│ Your Admin Webpage login password is gYj7Enh- │ +│ │ +│ │ +│ To allow your user to use all CLI functions without │ +│ authentication, │ +│ refer to https://docs.pi-hole.net/main/post-install/ │ +├───────────────────────────────────────────────────────────── + + +127.0.0.1 +192.168.1.159 +::1 +fdc5:59a6:9ac1:f11f:2c86:25d3:6282:37ef +fdc5:59a6:9ac1:f11f:bd8c:6e87:65f0:243c +fe80::a05b:bbc6:d47f:3002%enp9s0 +2IXD-XJN9-C337-1K4Y-BBEO-HV1F-3BVI + +https://ollama.lan:9443/#!/wizard - heel-goed-wachtwoord + +[ +{ +"domain": "ollama.lan", +"answer": "192.168.1.159", +"enabled": true +}, +{ +"domain": "hephaestus.lan", +"answer": "192.168.1.159", +"enabled": true +}, +{ +"domain": "hermes.lan", +"answer": "192.168.137.239", +"enabled": true +}, +{ +"domain": "atlas.lan", +"answer": "192.168.1.100", +"enabled": true +}, +{ +"domain": "hub.lan", +"answer": "192.168.1.1", +"enabled": true +}, +{ +"domain": "ha.lan", +"answer": "192.168.1.193", +"enabled": true +} +] diff --git a/wiki/ARCHITECTURE-TROOSTWIJK-SCRAPER.md b/wiki/ARCHITECTURE-TROOSTWIJK-SCRAPER.md new file mode 100644 index 0000000..98a9682 --- /dev/null +++ b/wiki/ARCHITECTURE-TROOSTWIJK-SCRAPER.md @@ -0,0 +1,326 @@ +# Troostwijk Scraper - Architecture & Data Flow + +## System Overview + +The scraper follows a **3-phase hierarchical crawling pattern** to extract auction and lot data from Troostwijk Auctions website. + +## Architecture Diagram + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ TROOSTWIJK SCRAPER │ +└─────────────────────────────────────────────────────────────────┘ + +┌─────────────────────────────────────────────────────────────────┐ +│ PHASE 1: COLLECT AUCTION URLs │ +│ ┌──────────────┐ ┌──────────────┐ │ +│ │ Listing Page │────────▶│ Extract /a/ │ │ +│ │ /auctions? │ │ auction URLs │ │ +│ │ page=1..N │ └──────────────┘ │ +│ └──────────────┘ │ │ +│ ▼ │ +│ [ List of Auction URLs ] │ +└─────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ PHASE 2: EXTRACT LOT URLs FROM AUCTIONS │ +│ ┌──────────────┐ ┌──────────────┐ │ +│ │ Auction Page │────────▶│ Parse │ │ +│ │ /a/... │ │ __NEXT_DATA__│ │ +│ └──────────────┘ │ JSON │ │ +│ │ └──────────────┘ │ +│ │ │ │ +│ ▼ ▼ │ +│ ┌──────────────┐ ┌──────────────┐ │ +│ │ Save Auction │ │ Extract /l/ │ │ +│ │ Metadata │ │ lot URLs │ │ +│ │ to DB │ └──────────────┘ │ +│ └──────────────┘ │ │ +│ ▼ │ +│ [ List of Lot URLs ] │ +└─────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ PHASE 3: SCRAPE LOT DETAILS │ +│ ┌──────────────┐ ┌──────────────┐ │ +│ │ Lot Page │────────▶│ Parse │ │ +│ │ /l/... │ │ __NEXT_DATA__│ │ +│ └──────────────┘ │ JSON │ │ +│ └──────────────┘ │ +│ │ │ +│ ┌─────────────────────────┴─────────────────┐ │ +│ ▼ ▼ │ +│ ┌──────────────┐ ┌──────────────┐ │ +│ │ Save Lot │ │ Save Images │ │ +│ │ Details │ │ URLs to DB │ │ +│ │ to DB │ └──────────────┘ │ +│ └──────────────┘ │ │ +│ ▼ │ +│ [Optional Download] │ +└─────────────────────────────────────────────────────────────────┘ +``` + +## Database Schema + +```sql +┌──────────────────────────────────────────────────────────────────┐ +│ CACHE TABLE (HTML Storage with Compression) │ +├──────────────────────────────────────────────────────────────────┤ +│ cache │ +│ ├── url (TEXT, PRIMARY KEY) │ +│ ├── content (BLOB) -- Compressed HTML (zlib) │ +│ ├── timestamp (REAL) │ +│ ├── status_code (INTEGER) │ +│ └── compressed (INTEGER) -- 1=compressed, 0=plain │ +└──────────────────────────────────────────────────────────────────┘ + +┌──────────────────────────────────────────────────────────────────┐ +│ AUCTIONS TABLE │ +├──────────────────────────────────────────────────────────────────┤ +│ auctions │ +│ ├── auction_id (TEXT, PRIMARY KEY) -- e.g. "A7-39813" │ +│ ├── url (TEXT, UNIQUE) │ +│ ├── title (TEXT) │ +│ ├── location (TEXT) -- e.g. "Cluj-Napoca, RO" │ +│ ├── lots_count (INTEGER) │ +│ ├── first_lot_closing_time (TEXT) │ +│ └── scraped_at (TEXT) │ +└──────────────────────────────────────────────────────────────────┘ + +┌──────────────────────────────────────────────────────────────────┐ +│ LOTS TABLE │ +├──────────────────────────────────────────────────────────────────┤ +│ lots │ +│ ├── lot_id (TEXT, PRIMARY KEY) -- e.g. "A1-28505-5" │ +│ ├── auction_id (TEXT) -- FK to auctions │ +│ ├── url (TEXT, UNIQUE) │ +│ ├── title (TEXT) │ +│ ├── current_bid (TEXT) -- "€123.45" or "No bids" │ +│ ├── bid_count (INTEGER) │ +│ ├── closing_time (TEXT) │ +│ ├── viewing_time (TEXT) │ +│ ├── pickup_date (TEXT) │ +│ ├── location (TEXT) -- e.g. "Dongen, NL" │ +│ ├── description (TEXT) │ +│ ├── category (TEXT) │ +│ └── scraped_at (TEXT) │ +│ FOREIGN KEY (auction_id) → auctions(auction_id) │ +└──────────────────────────────────────────────────────────────────┘ + +┌──────────────────────────────────────────────────────────────────┐ +│ IMAGES TABLE (Image URLs & Download Status) │ +├──────────────────────────────────────────────────────────────────┤ +│ images ◀── THIS TABLE HOLDS IMAGE LINKS│ +│ ├── id (INTEGER, PRIMARY KEY AUTOINCREMENT) │ +│ ├── lot_id (TEXT) -- FK to lots │ +│ ├── url (TEXT) -- Image URL │ +│ ├── local_path (TEXT) -- Path after download │ +│ └── downloaded (INTEGER) -- 0=pending, 1=downloaded │ +│ FOREIGN KEY (lot_id) → lots(lot_id) │ +└──────────────────────────────────────────────────────────────────┘ +``` + +## Sequence Diagram + +``` +User Scraper Playwright Cache DB Data Tables + │ │ │ │ │ + │ Run │ │ │ │ + ├──────────────▶│ │ │ │ + │ │ │ │ │ + │ │ Phase 1: Listing Pages │ │ + │ ├───────────────▶│ │ │ + │ │ goto() │ │ │ + │ │◀───────────────┤ │ │ + │ │ HTML │ │ │ + │ ├───────────────────────────────▶│ │ + │ │ compress & cache │ │ + │ │ │ │ │ + │ │ Phase 2: Auction Pages │ │ + │ ├───────────────▶│ │ │ + │ │◀───────────────┤ │ │ + │ │ HTML │ │ │ + │ │ │ │ │ + │ │ Parse __NEXT_DATA__ JSON │ │ + │ │────────────────────────────────────────────────▶│ + │ │ │ │ INSERT auctions + │ │ │ │ │ + │ │ Phase 3: Lot Pages │ │ + │ ├───────────────▶│ │ │ + │ │◀───────────────┤ │ │ + │ │ HTML │ │ │ + │ │ │ │ │ + │ │ Parse __NEXT_DATA__ JSON │ │ + │ │────────────────────────────────────────────────▶│ + │ │ │ │ INSERT lots │ + │ │────────────────────────────────────────────────▶│ + │ │ │ │ INSERT images│ + │ │ │ │ │ + │ │ Export to CSV/JSON │ │ + │ │◀────────────────────────────────────────────────┤ + │ │ Query all data │ │ + │◀──────────────┤ │ │ │ + │ Results │ │ │ │ +``` + +## Data Flow Details + +### 1. **Page Retrieval & Caching** +``` +Request URL + │ + ├──▶ Check cache DB (with timestamp validation) + │ │ + │ ├─[HIT]──▶ Decompress (if compressed=1) + │ │ └──▶ Return HTML + │ │ + │ └─[MISS]─▶ Fetch via Playwright + │ │ + │ ├──▶ Compress HTML (zlib level 9) + │ │ ~70-90% size reduction + │ │ + │ └──▶ Store in cache DB (compressed=1) + │ + └──▶ Return HTML for parsing +``` + +### 2. **JSON Parsing Strategy** +``` +HTML Content + │ + └──▶ Extract