start
This commit is contained in:
@@ -1,24 +1,19 @@
|
||||
package com.auction;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
/**
|
||||
* Represents auction metadata (veiling informatie)
|
||||
* Data typically populated by the external scraper process
|
||||
*/
|
||||
public final class AuctionInfo {
|
||||
|
||||
public int auctionId; // Unique auction ID (from URL)
|
||||
public String title; // Auction title
|
||||
public String location; // Location (e.g., "Amsterdam, NL")
|
||||
public String city; // City name
|
||||
public String country; // Country code (e.g., "NL")
|
||||
public String url; // Full auction URL
|
||||
public String type; // Auction type (A1 or A7)
|
||||
public int lotCount; // Number of lots/kavels
|
||||
public LocalDateTime closingTime; // Closing time if available
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("Auction{id=%d, type=%s, title='%s', location='%s', lots=%d, url='%s'}",
|
||||
auctionId, type, title, location, lotCount, url);
|
||||
}
|
||||
}
|
||||
public record AuctionInfo(
|
||||
int auctionId, // Unique auction ID (from URL)
|
||||
String title, // Auction title
|
||||
String location, // Location (e.g., "Amsterdam, NL")
|
||||
String city, // City name
|
||||
String country, // Country code (e.g., "NL")
|
||||
String url, // Full auction URL
|
||||
String type, // Auction type (A1 or A7)
|
||||
int lotCount, // Number of lots/kavels
|
||||
LocalDateTime closingTime // Closing time if available
|
||||
) {}
|
||||
|
||||
@@ -1,110 +1,112 @@
|
||||
package com.auction;
|
||||
|
||||
import java.sql.Connection;
|
||||
import java.sql.DriverManager;
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Statement;
|
||||
import java.time.Instant;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Service for persisting auctions, lots, images, and object labels into
|
||||
* a SQLite database. Uses the Xerial JDBC driver which connects to
|
||||
* SQLite via a URL of the form "jdbc:sqlite:path_to_file"【329850066306528†L40-L63】.
|
||||
* Service for persisting auctions, lots, and images into a SQLite database.
|
||||
* Data is typically populated by an external scraper process;
|
||||
* this service enriches it with image processing and monitoring.
|
||||
*/
|
||||
public class DatabaseService {
|
||||
|
||||
private final String url;
|
||||
|
||||
DatabaseService(String dbPath) {
|
||||
this.url = "jdbc:sqlite:" + dbPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates tables if they do not already exist. The schema includes
|
||||
* tables for auctions, lots, images, and object labels. This method is
|
||||
* idempotent; it can be called multiple times.
|
||||
* Creates tables if they do not already exist.
|
||||
* Schema supports data from external scraper and adds image processing results.
|
||||
*/
|
||||
void ensureSchema() throws SQLException {
|
||||
try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) {
|
||||
// Auctions table (veilingen)
|
||||
stmt.execute("CREATE TABLE IF NOT EXISTS auctions ("
|
||||
+ "auction_id INTEGER PRIMARY KEY,"
|
||||
+ "title TEXT NOT NULL,"
|
||||
+ "location TEXT,"
|
||||
+ "city TEXT,"
|
||||
+ "country TEXT,"
|
||||
+ "url TEXT NOT NULL,"
|
||||
+ "type TEXT,"
|
||||
+ "lot_count INTEGER DEFAULT 0,"
|
||||
+ "closing_time TEXT,"
|
||||
+ "discovered_at INTEGER" // Unix timestamp
|
||||
+ ")");
|
||||
// Auctions table (populated by external scraper)
|
||||
stmt.execute("""
|
||||
CREATE TABLE IF NOT EXISTS auctions (
|
||||
auction_id INTEGER PRIMARY KEY,
|
||||
title TEXT NOT NULL,
|
||||
location TEXT,
|
||||
city TEXT,
|
||||
country TEXT,
|
||||
url TEXT NOT NULL,
|
||||
type TEXT,
|
||||
lot_count INTEGER DEFAULT 0,
|
||||
closing_time TEXT,
|
||||
discovered_at INTEGER
|
||||
)""");
|
||||
|
||||
// Sales table (legacy - keep for compatibility)
|
||||
stmt.execute("CREATE TABLE IF NOT EXISTS sales ("
|
||||
+ "sale_id INTEGER PRIMARY KEY,"
|
||||
+ "title TEXT,"
|
||||
+ "location TEXT,"
|
||||
+ "closing_time TEXT"
|
||||
+ ")");
|
||||
// Lots table (populated by external scraper)
|
||||
stmt.execute("""
|
||||
CREATE TABLE IF NOT EXISTS lots (
|
||||
lot_id INTEGER PRIMARY KEY,
|
||||
sale_id INTEGER,
|
||||
title TEXT,
|
||||
description TEXT,
|
||||
manufacturer TEXT,
|
||||
type TEXT,
|
||||
year INTEGER,
|
||||
category TEXT,
|
||||
current_bid REAL,
|
||||
currency TEXT,
|
||||
url TEXT,
|
||||
closing_time TEXT,
|
||||
closing_notified INTEGER DEFAULT 0,
|
||||
FOREIGN KEY (sale_id) REFERENCES auctions(auction_id)
|
||||
)""");
|
||||
|
||||
// Lots table
|
||||
stmt.execute("CREATE TABLE IF NOT EXISTS lots ("
|
||||
+ "lot_id INTEGER PRIMARY KEY,"
|
||||
+ "sale_id INTEGER,"
|
||||
+ "title TEXT,"
|
||||
+ "description TEXT,"
|
||||
+ "manufacturer TEXT,"
|
||||
+ "type TEXT,"
|
||||
+ "year INTEGER,"
|
||||
+ "category TEXT,"
|
||||
+ "current_bid REAL,"
|
||||
+ "currency TEXT,"
|
||||
+ "url TEXT,"
|
||||
+ "closing_time TEXT,"
|
||||
+ "closing_notified INTEGER DEFAULT 0,"
|
||||
+ "FOREIGN KEY (sale_id) REFERENCES auctions(auction_id)"
|
||||
+ ")");
|
||||
// Images table (populated by this process)
|
||||
stmt.execute("""
|
||||
CREATE TABLE IF NOT EXISTS images (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
lot_id INTEGER,
|
||||
url TEXT,
|
||||
file_path TEXT,
|
||||
labels TEXT,
|
||||
processed_at INTEGER,
|
||||
FOREIGN KEY (lot_id) REFERENCES lots(lot_id)
|
||||
)""");
|
||||
|
||||
// Images table
|
||||
stmt.execute("CREATE TABLE IF NOT EXISTS images ("
|
||||
+ "id INTEGER PRIMARY KEY AUTOINCREMENT,"
|
||||
+ "lot_id INTEGER,"
|
||||
+ "url TEXT,"
|
||||
+ "file_path TEXT,"
|
||||
+ "labels TEXT,"
|
||||
+ "FOREIGN KEY (lot_id) REFERENCES lots(lot_id)"
|
||||
+ ")");
|
||||
|
||||
// Create indexes for better query performance
|
||||
// Indexes for performance
|
||||
stmt.execute("CREATE INDEX IF NOT EXISTS idx_auctions_country ON auctions(country)");
|
||||
stmt.execute("CREATE INDEX IF NOT EXISTS idx_lots_sale_id ON lots(sale_id)");
|
||||
stmt.execute("CREATE INDEX IF NOT EXISTS idx_images_lot_id ON images(lot_id)");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Inserts or updates an auction record
|
||||
* Inserts or updates an auction record (typically called by external scraper)
|
||||
*/
|
||||
synchronized void upsertAuction(AuctionInfo auction) throws SQLException {
|
||||
var sql = "INSERT INTO auctions (auction_id, title, location, city, country, url, type, lot_count, closing_time, discovered_at)"
|
||||
+ " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
|
||||
+ " ON CONFLICT(auction_id) DO UPDATE SET "
|
||||
+ "title = excluded.title, location = excluded.location, city = excluded.city, "
|
||||
+ "country = excluded.country, url = excluded.url, type = excluded.type, "
|
||||
+ "lot_count = excluded.lot_count, closing_time = excluded.closing_time";
|
||||
var sql = """
|
||||
INSERT INTO auctions (auction_id, title, location, city, country, url, type, lot_count, closing_time, discovered_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(auction_id) DO UPDATE SET
|
||||
title = excluded.title,
|
||||
location = excluded.location,
|
||||
city = excluded.city,
|
||||
country = excluded.country,
|
||||
url = excluded.url,
|
||||
type = excluded.type,
|
||||
lot_count = excluded.lot_count,
|
||||
closing_time = excluded.closing_time
|
||||
""";
|
||||
|
||||
try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(sql)) {
|
||||
ps.setInt(1, auction.auctionId);
|
||||
ps.setString(2, auction.title);
|
||||
ps.setString(3, auction.location);
|
||||
ps.setString(4, auction.city);
|
||||
ps.setString(5, auction.country);
|
||||
ps.setString(6, auction.url);
|
||||
ps.setString(7, auction.type);
|
||||
ps.setInt(8, auction.lotCount);
|
||||
ps.setString(9, auction.closingTime != null ? auction.closingTime.toString() : null);
|
||||
ps.setInt(1, auction.auctionId());
|
||||
ps.setString(2, auction.title());
|
||||
ps.setString(3, auction.location());
|
||||
ps.setString(4, auction.city());
|
||||
ps.setString(5, auction.country());
|
||||
ps.setString(6, auction.url());
|
||||
ps.setString(7, auction.type());
|
||||
ps.setInt(8, auction.lotCount());
|
||||
ps.setString(9, auction.closingTime() != null ? auction.closingTime().toString() : null);
|
||||
ps.setLong(10, Instant.now().getEpochSecond());
|
||||
ps.executeUpdate();
|
||||
}
|
||||
@@ -120,20 +122,20 @@ public class DatabaseService {
|
||||
try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) {
|
||||
var rs = stmt.executeQuery(sql);
|
||||
while (rs.next()) {
|
||||
var auction = new AuctionInfo();
|
||||
auction.auctionId = rs.getInt("auction_id");
|
||||
auction.title = rs.getString("title");
|
||||
auction.location = rs.getString("location");
|
||||
auction.city = rs.getString("city");
|
||||
auction.country = rs.getString("country");
|
||||
auction.url = rs.getString("url");
|
||||
auction.type = rs.getString("type");
|
||||
auction.lotCount = rs.getInt("lot_count");
|
||||
var closing = rs.getString("closing_time");
|
||||
if (closing != null) {
|
||||
auction.closingTime = LocalDateTime.parse(closing);
|
||||
}
|
||||
auctions.add(auction);
|
||||
var closingStr = rs.getString("closing_time");
|
||||
var closing = closingStr != null ? LocalDateTime.parse(closingStr) : null;
|
||||
|
||||
auctions.add(new AuctionInfo(
|
||||
rs.getInt("auction_id"),
|
||||
rs.getString("title"),
|
||||
rs.getString("location"),
|
||||
rs.getString("city"),
|
||||
rs.getString("country"),
|
||||
rs.getString("url"),
|
||||
rs.getString("type"),
|
||||
rs.getInt("lot_count"),
|
||||
closing
|
||||
));
|
||||
}
|
||||
}
|
||||
return auctions;
|
||||
@@ -151,119 +153,145 @@ public class DatabaseService {
|
||||
ps.setString(1, countryCode);
|
||||
var rs = ps.executeQuery();
|
||||
while (rs.next()) {
|
||||
var auction = new AuctionInfo();
|
||||
auction.auctionId = rs.getInt("auction_id");
|
||||
auction.title = rs.getString("title");
|
||||
auction.location = rs.getString("location");
|
||||
auction.city = rs.getString("city");
|
||||
auction.country = rs.getString("country");
|
||||
auction.url = rs.getString("url");
|
||||
auction.type = rs.getString("type");
|
||||
auction.lotCount = rs.getInt("lot_count");
|
||||
var closing = rs.getString("closing_time");
|
||||
if (closing != null) {
|
||||
auction.closingTime = LocalDateTime.parse(closing);
|
||||
}
|
||||
auctions.add(auction);
|
||||
var closingStr = rs.getString("closing_time");
|
||||
var closing = closingStr != null ? LocalDateTime.parse(closingStr) : null;
|
||||
|
||||
auctions.add(new AuctionInfo(
|
||||
rs.getInt("auction_id"),
|
||||
rs.getString("title"),
|
||||
rs.getString("location"),
|
||||
rs.getString("city"),
|
||||
rs.getString("country"),
|
||||
rs.getString("url"),
|
||||
rs.getString("type"),
|
||||
rs.getInt("lot_count"),
|
||||
closing
|
||||
));
|
||||
}
|
||||
}
|
||||
return auctions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Inserts or updates a lot record. Uses INSERT OR REPLACE to
|
||||
* implement upsert semantics so that existing rows are replaced.
|
||||
* Inserts or updates a lot record (typically called by external scraper)
|
||||
*/
|
||||
synchronized void upsertLot(Lot lot) throws SQLException {
|
||||
var sql = "INSERT INTO lots (lot_id, sale_id, title, description, manufacturer, type, year, category, current_bid, currency, url, closing_time, closing_notified)"
|
||||
+ " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
|
||||
+ " ON CONFLICT(lot_id) DO UPDATE SET "
|
||||
+ "sale_id = excluded.sale_id, title = excluded.title, description = excluded.description, "
|
||||
+ "manufacturer = excluded.manufacturer, type = excluded.type, year = excluded.year, category = excluded.category, "
|
||||
+ "current_bid = excluded.current_bid, currency = excluded.currency, url = excluded.url, closing_time = excluded.closing_time";
|
||||
var sql = """
|
||||
INSERT INTO lots (lot_id, sale_id, title, description, manufacturer, type, year, category, current_bid, currency, url, closing_time, closing_notified)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(lot_id) DO UPDATE SET
|
||||
sale_id = excluded.sale_id,
|
||||
title = excluded.title,
|
||||
description = excluded.description,
|
||||
manufacturer = excluded.manufacturer,
|
||||
type = excluded.type,
|
||||
year = excluded.year,
|
||||
category = excluded.category,
|
||||
current_bid = excluded.current_bid,
|
||||
currency = excluded.currency,
|
||||
url = excluded.url,
|
||||
closing_time = excluded.closing_time
|
||||
""";
|
||||
|
||||
try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(sql)) {
|
||||
ps.setInt(1, lot.lotId);
|
||||
ps.setInt(2, lot.saleId);
|
||||
ps.setString(3, lot.title);
|
||||
ps.setString(4, lot.description);
|
||||
ps.setString(5, lot.manufacturer);
|
||||
ps.setString(6, lot.type);
|
||||
ps.setInt(7, lot.year);
|
||||
ps.setString(8, lot.category);
|
||||
ps.setDouble(9, lot.currentBid);
|
||||
ps.setString(10, lot.currency);
|
||||
ps.setString(11, lot.url);
|
||||
ps.setString(12, lot.closingTime != null ? lot.closingTime.toString() : null);
|
||||
ps.setInt(13, lot.closingNotified ? 1 : 0);
|
||||
ps.setInt(1, lot.lotId());
|
||||
ps.setInt(2, lot.saleId());
|
||||
ps.setString(3, lot.title());
|
||||
ps.setString(4, lot.description());
|
||||
ps.setString(5, lot.manufacturer());
|
||||
ps.setString(6, lot.type());
|
||||
ps.setInt(7, lot.year());
|
||||
ps.setString(8, lot.category());
|
||||
ps.setDouble(9, lot.currentBid());
|
||||
ps.setString(10, lot.currency());
|
||||
ps.setString(11, lot.url());
|
||||
ps.setString(12, lot.closingTime() != null ? lot.closingTime().toString() : null);
|
||||
ps.setInt(13, lot.closingNotified() ? 1 : 0);
|
||||
ps.executeUpdate();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Inserts a new image record. Each image is associated with a lot and
|
||||
* stores both the original URL and the local file path. Detected
|
||||
* labels are stored as a comma separated string.
|
||||
* Inserts a new image record with object detection labels
|
||||
*/
|
||||
synchronized void insertImage(int lotId, String url, String filePath, List<String> labels) throws SQLException {
|
||||
var sql = "INSERT INTO images (lot_id, url, file_path, labels) VALUES (?, ?, ?, ?)";
|
||||
var sql = "INSERT INTO images (lot_id, url, file_path, labels, processed_at) VALUES (?, ?, ?, ?, ?)";
|
||||
try (var conn = DriverManager.getConnection(this.url); var ps = conn.prepareStatement(sql)) {
|
||||
ps.setInt(1, lotId);
|
||||
ps.setString(2, url);
|
||||
ps.setString(3, filePath);
|
||||
ps.setString(4, String.join(",", labels));
|
||||
ps.setLong(5, Instant.now().getEpochSecond());
|
||||
ps.executeUpdate();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves all lots that are still active (i.e., have a closing time
|
||||
* in the future or unknown). Only these lots need to be monitored.
|
||||
* Retrieves images for a specific lot
|
||||
*/
|
||||
synchronized List<ImageRecord> getImagesForLot(int lotId) throws SQLException {
|
||||
List<ImageRecord> images = new ArrayList<>();
|
||||
var sql = "SELECT id, lot_id, url, file_path, labels FROM images WHERE lot_id = ?";
|
||||
|
||||
try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(sql)) {
|
||||
ps.setInt(1, lotId);
|
||||
var rs = ps.executeQuery();
|
||||
while (rs.next()) {
|
||||
images.add(new ImageRecord(
|
||||
rs.getInt("id"),
|
||||
rs.getInt("lot_id"),
|
||||
rs.getString("url"),
|
||||
rs.getString("file_path"),
|
||||
rs.getString("labels")
|
||||
));
|
||||
}
|
||||
}
|
||||
return images;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves all lots that are active and need monitoring
|
||||
*/
|
||||
synchronized List<Lot> getActiveLots() throws SQLException {
|
||||
List<Lot> list = new ArrayList<>();
|
||||
var sql = "SELECT lot_id, sale_id, current_bid, currency, closing_time, closing_notified FROM lots";
|
||||
var sql = "SELECT lot_id, sale_id, title, description, manufacturer, type, year, category, " +
|
||||
"current_bid, currency, url, closing_time, closing_notified FROM lots";
|
||||
|
||||
try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) {
|
||||
var rs = stmt.executeQuery(sql);
|
||||
while (rs.next()) {
|
||||
var lot = new Lot();
|
||||
lot.lotId = rs.getInt("lot_id");
|
||||
lot.saleId = rs.getInt("sale_id");
|
||||
lot.currentBid = rs.getDouble("current_bid");
|
||||
lot.currency = rs.getString("currency");
|
||||
var closing = rs.getString("closing_time");
|
||||
lot.closingNotified = rs.getInt("closing_notified") != 0;
|
||||
if (closing != null) {
|
||||
lot.closingTime = LocalDateTime.parse(closing);
|
||||
}
|
||||
list.add(lot);
|
||||
var closingStr = rs.getString("closing_time");
|
||||
var closing = closingStr != null ? LocalDateTime.parse(closingStr) : null;
|
||||
|
||||
list.add(new Lot(
|
||||
rs.getInt("sale_id"),
|
||||
rs.getInt("lot_id"),
|
||||
rs.getString("title"),
|
||||
rs.getString("description"),
|
||||
rs.getString("manufacturer"),
|
||||
rs.getString("type"),
|
||||
rs.getInt("year"),
|
||||
rs.getString("category"),
|
||||
rs.getDouble("current_bid"),
|
||||
rs.getString("currency"),
|
||||
rs.getString("url"),
|
||||
closing,
|
||||
rs.getInt("closing_notified") != 0
|
||||
));
|
||||
}
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves all lots from the database.
|
||||
* Retrieves all lots from the database
|
||||
*/
|
||||
synchronized List<Lot> getAllLots() throws SQLException {
|
||||
List<Lot> list = new ArrayList<>();
|
||||
var sql = "SELECT lot_id, sale_id, title, current_bid, currency FROM lots";
|
||||
try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) {
|
||||
var rs = stmt.executeQuery(sql);
|
||||
while (rs.next()) {
|
||||
var lot = new Lot();
|
||||
lot.lotId = rs.getInt("lot_id");
|
||||
lot.saleId = rs.getInt("sale_id");
|
||||
lot.title = rs.getString("title");
|
||||
lot.currentBid = rs.getDouble("current_bid");
|
||||
lot.currency = rs.getString("currency");
|
||||
list.add(lot);
|
||||
}
|
||||
}
|
||||
return list;
|
||||
return getActiveLots();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the total number of images in the database.
|
||||
* Gets the total number of images in the database
|
||||
*/
|
||||
synchronized int getImageCount() throws SQLException {
|
||||
var sql = "SELECT COUNT(*) as count FROM images";
|
||||
@@ -277,27 +305,31 @@ public class DatabaseService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates the current bid of a lot after a bid refresh.
|
||||
* Updates the current bid of a lot (used by monitoring service)
|
||||
*/
|
||||
synchronized void updateLotCurrentBid(Lot lot) throws SQLException {
|
||||
try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(
|
||||
"UPDATE lots SET current_bid = ? WHERE lot_id = ?")) {
|
||||
ps.setDouble(1, lot.currentBid);
|
||||
ps.setInt(2, lot.lotId);
|
||||
try (var conn = DriverManager.getConnection(url);
|
||||
var ps = conn.prepareStatement("UPDATE lots SET current_bid = ? WHERE lot_id = ?")) {
|
||||
ps.setDouble(1, lot.currentBid());
|
||||
ps.setInt(2, lot.lotId());
|
||||
ps.executeUpdate();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates the closingNotified flag of a lot (set to 1 when we have
|
||||
* warned the user about its imminent closure).
|
||||
* Updates the closingNotified flag of a lot
|
||||
*/
|
||||
synchronized void updateLotNotificationFlags(Lot lot) throws SQLException {
|
||||
try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(
|
||||
"UPDATE lots SET closing_notified = ? WHERE lot_id = ?")) {
|
||||
ps.setInt(1, lot.closingNotified ? 1 : 0);
|
||||
ps.setInt(2, lot.lotId);
|
||||
try (var conn = DriverManager.getConnection(url);
|
||||
var ps = conn.prepareStatement("UPDATE lots SET closing_notified = ? WHERE lot_id = ?")) {
|
||||
ps.setInt(1, lot.closingNotified() ? 1 : 0);
|
||||
ps.setInt(2, lot.lotId());
|
||||
ps.executeUpdate();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple record for image data
|
||||
*/
|
||||
record ImageRecord(int id, int lotId, String url, String filePath, String labels) {}
|
||||
}
|
||||
|
||||
@@ -1,29 +1,30 @@
|
||||
package com.auction;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
/**
|
||||
* Simple POJO representing a lot (kavel) in an auction. It keeps track
|
||||
* of the sale it belongs to, current bid and closing time. The method
|
||||
* minutesUntilClose computes how many minutes remain until the lot closes.
|
||||
* Represents a lot (kavel) in an auction.
|
||||
* Data typically populated by the external scraper process.
|
||||
* This project enriches the data with image analysis and monitoring.
|
||||
*/
|
||||
final class Lot {
|
||||
|
||||
int saleId;
|
||||
int lotId;
|
||||
String title;
|
||||
String description;
|
||||
String manufacturer;
|
||||
String type;
|
||||
int year;
|
||||
String category;
|
||||
double currentBid;
|
||||
String currency;
|
||||
String url;
|
||||
LocalDateTime closingTime; // null if unknown
|
||||
boolean closingNotified;
|
||||
|
||||
record Lot(
|
||||
int saleId,
|
||||
int lotId,
|
||||
String title,
|
||||
String description,
|
||||
String manufacturer,
|
||||
String type,
|
||||
int year,
|
||||
String category,
|
||||
double currentBid,
|
||||
String currency,
|
||||
String url,
|
||||
LocalDateTime closingTime,
|
||||
boolean closingNotified
|
||||
) {
|
||||
long minutesUntilClose() {
|
||||
if (closingTime == null) return Long.MAX_VALUE;
|
||||
return java.time.Duration.between(LocalDateTime.now(), closingTime).toMinutes();
|
||||
return Duration.between(LocalDateTime.now(), closingTime).toMinutes();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,82 +1,93 @@
|
||||
package com.auction;
|
||||
|
||||
import org.opencv.core.Core;
|
||||
import java.util.List;
|
||||
public class Main {
|
||||
public static void main2(String[] args) {
|
||||
// If arguments are passed, this is likely a one-off command via dokku run
|
||||
// Just exit immediately to allow the command to run
|
||||
if (args.length > 0) {
|
||||
IO.println("Command mode - exiting to allow shell commands");
|
||||
return;
|
||||
}
|
||||
|
||||
IO.println("Starting Troostwijk Auction Scraper...");
|
||||
IO.println("Container is running and healthy.");
|
||||
|
||||
// Keep container alive
|
||||
try {
|
||||
Thread.sleep(Long.MAX_VALUE);
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
IO.println("Container interrupted, exiting.");
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Entry point. Configure database location, notification settings, and
|
||||
* YOLO model paths here before running. Once started the scraper
|
||||
* discovers Dutch auctions, scrapes lots, and begins monitoring.
|
||||
/**
|
||||
* Main entry point for Troostwijk Auction Monitor.
|
||||
*
|
||||
* ARCHITECTURE:
|
||||
* This project focuses on:
|
||||
* 1. Image processing and object detection
|
||||
* 2. Bid monitoring and notifications
|
||||
* 3. Data enrichment
|
||||
*
|
||||
* Auction/Lot scraping is handled by the external ARCHITECTURE-TROOSTWIJK-SCRAPER process.
|
||||
* That process populates the auctions and lots tables in the shared database.
|
||||
* This process reads from those tables and enriches them with:
|
||||
* - Downloaded images
|
||||
* - Object detection labels
|
||||
* - Bid monitoring
|
||||
* - Notifications
|
||||
*/
|
||||
public class Main {
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
IO.println("=== Troostwijk Auction Scraper ===\n");
|
||||
Console.println("=== Troostwijk Auction Monitor ===\n");
|
||||
|
||||
// Configuration parameters (replace with your own values)
|
||||
String databaseFile = "troostwijk.db";
|
||||
|
||||
// Notification configuration - choose one:
|
||||
// Option 1: Desktop notifications only (free, no setup required)
|
||||
// Configuration
|
||||
String databaseFile = System.getenv().getOrDefault("DATABASE_FILE", "troostwijk.db");
|
||||
String notificationConfig = System.getenv().getOrDefault("NOTIFICATION_CONFIG", "desktop");
|
||||
|
||||
// Option 2: Desktop + Email via Gmail (free, requires Gmail app password)
|
||||
// Format: "smtp:username:appPassword:toEmail"
|
||||
// Example: "smtp:your.email@gmail.com:abcd1234efgh5678:recipient@example.com"
|
||||
// Get app password: Google Account > Security > 2-Step Verification > App passwords
|
||||
|
||||
// YOLO model paths (optional - scraper works without object detection)
|
||||
// YOLO model paths (optional - monitor works without object detection)
|
||||
String yoloCfg = "models/yolov4.cfg";
|
||||
String yoloWeights = "models/yolov4.weights";
|
||||
String yoloClasses = "models/coco.names";
|
||||
|
||||
// Load native OpenCV library
|
||||
// Load native OpenCV library (only if models exist)
|
||||
try {
|
||||
System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
|
||||
Console.println("✓ OpenCV loaded");
|
||||
} catch (UnsatisfiedLinkError e) {
|
||||
Console.println("⚠️ OpenCV not available - image detection disabled");
|
||||
}
|
||||
|
||||
IO.println("Initializing scraper...");
|
||||
TroostwijkScraper scraper = new TroostwijkScraper(databaseFile, notificationConfig, "",
|
||||
Console.println("Initializing monitor...");
|
||||
var monitor = new TroostwijkMonitor(databaseFile, notificationConfig,
|
||||
yoloCfg, yoloWeights, yoloClasses);
|
||||
|
||||
// Step 1: Discover auctions in NL
|
||||
IO.println("\n[1/3] Discovering Dutch auctions...");
|
||||
List<Integer> auctions = scraper.discoverDutchAuctions();
|
||||
IO.println("✓ Found " + auctions.size() + " auctions: " + auctions);
|
||||
// Show current database state
|
||||
Console.println("\n📊 Current Database State:");
|
||||
monitor.printDatabaseStats();
|
||||
|
||||
// Step 2: Fetch lots for each auction
|
||||
IO.println("\n[2/3] Fetching lot details...");
|
||||
int totalAuctions = auctions.size();
|
||||
int currentAuction = 0;
|
||||
for (int saleId : auctions) {
|
||||
currentAuction++;
|
||||
IO.println(" [Page " + currentAuction + "] Fetching auctions...");
|
||||
IO.println(" [" + currentAuction + "/" + totalAuctions + "] Processing sale " + saleId + "...");
|
||||
scraper.fetchLotsForSale(saleId);
|
||||
// Check for pending image processing
|
||||
Console.println("\n[1/2] Processing images...");
|
||||
monitor.processPendingImages();
|
||||
|
||||
// Start monitoring service
|
||||
Console.println("\n[2/2] Starting bid monitoring...");
|
||||
monitor.scheduleMonitoring();
|
||||
|
||||
Console.println("\n✓ Monitor is running. Press Ctrl+C to stop.\n");
|
||||
Console.println("NOTE: This process expects auction/lot data from the external scraper.");
|
||||
Console.println(" Make sure ARCHITECTURE-TROOSTWIJK-SCRAPER is running and populating the database.\n");
|
||||
|
||||
// Keep application alive
|
||||
try {
|
||||
Thread.sleep(Long.MAX_VALUE);
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
Console.println("Monitor interrupted, exiting.");
|
||||
}
|
||||
}
|
||||
|
||||
// Show database summary
|
||||
IO.println("\n📊 Database Summary:");
|
||||
scraper.printDatabaseStats();
|
||||
/**
|
||||
* Alternative entry point for container environments.
|
||||
* Simply keeps the container alive for manual commands.
|
||||
*/
|
||||
public static void main2(String[] args) {
|
||||
if (args.length > 0) {
|
||||
Console.println("Command mode - exiting to allow shell commands");
|
||||
return;
|
||||
}
|
||||
|
||||
// Step 3: Start monitoring bids and closures
|
||||
IO.println("\n[3/3] Starting monitoring service...");
|
||||
scraper.scheduleMonitoring();
|
||||
IO.println("✓ Monitoring active. Press Ctrl+C to stop.\n");
|
||||
Console.println("Troostwijk Monitor container is running and healthy.");
|
||||
Console.println("Use 'docker exec' or 'dokku run' to execute commands.");
|
||||
|
||||
try {
|
||||
Thread.sleep(Long.MAX_VALUE);
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
Console.println("Container interrupted, exiting.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -103,9 +103,9 @@ class NotificationService {
|
||||
Thread.sleep(2000);
|
||||
tray.remove(trayIcon);
|
||||
|
||||
IO.println("Desktop notification sent: " + title);
|
||||
Console.println("Desktop notification sent: " + title);
|
||||
} else {
|
||||
IO.println("Desktop notifications not supported, logging: " + title + " - " + message);
|
||||
Console.println("Desktop notifications not supported, logging: " + title + " - " + message);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
System.err.println("Desktop notification failed: " + e.getMessage());
|
||||
@@ -147,7 +147,7 @@ class NotificationService {
|
||||
}
|
||||
|
||||
Transport.send(msg);
|
||||
IO.println("Email notification sent: " + title);
|
||||
Console.println("Email notification sent: " + title);
|
||||
|
||||
} catch (Exception e) {
|
||||
System.err.println("Email notification failed: " + e.getMessage());
|
||||
|
||||
@@ -38,12 +38,12 @@ class ObjectDetectionService {
|
||||
var classNamesFile = Paths.get(classNamesPath);
|
||||
|
||||
if (!Files.exists(cfgFile) || !Files.exists(weightsFile) || !Files.exists(classNamesFile)) {
|
||||
IO.println("⚠️ Object detection disabled: YOLO model files not found");
|
||||
IO.println(" Expected files:");
|
||||
IO.println(" - " + cfgPath);
|
||||
IO.println(" - " + weightsPath);
|
||||
IO.println(" - " + classNamesPath);
|
||||
IO.println(" Scraper will continue without image analysis.");
|
||||
Console.println("⚠️ Object detection disabled: YOLO model files not found");
|
||||
Console.println(" Expected files:");
|
||||
Console.println(" - " + cfgPath);
|
||||
Console.println(" - " + weightsPath);
|
||||
Console.println(" - " + classNamesPath);
|
||||
Console.println(" Scraper will continue without image analysis.");
|
||||
this.enabled = false;
|
||||
this.net = null;
|
||||
this.classNames = new ArrayList<>();
|
||||
@@ -58,7 +58,7 @@ class ObjectDetectionService {
|
||||
// Load class names (one per line)
|
||||
this.classNames = Files.readAllLines(classNamesFile);
|
||||
this.enabled = true;
|
||||
IO.println("✓ Object detection enabled with YOLO");
|
||||
Console.println("✓ Object detection enabled with YOLO");
|
||||
} catch (Exception e) {
|
||||
System.err.println("⚠️ Object detection disabled: " + e.getMessage());
|
||||
throw new IOException("Failed to initialize object detection", e);
|
||||
|
||||
@@ -1,687 +0,0 @@
|
||||
package com.auction;
|
||||
|
||||
/*
|
||||
* TroostwijkScraper
|
||||
*
|
||||
* This example shows how you could build a Java‐based scraper for the Dutch
|
||||
* auctions on Troostwijk Auctions. The scraper uses a combination of
|
||||
* HTTP requests and HTML parsing with the jsoup library to discover active
|
||||
* auctions, calls Troostwijk's internal JSON API to fetch lot (kavel) data
|
||||
* efficiently, writes the results into a local SQLite database, performs
|
||||
* object detection on lot images using OpenCV's DNN module, and sends
|
||||
* desktop/email notifications when bids change or lots are about to expire.
|
||||
* The implementation uses well known open source libraries for each of these
|
||||
* concerns. You can adjust the API endpoints and CSS selectors as
|
||||
* Troostwijk's site evolves. The code is organised into small helper
|
||||
* classes to make it easier to maintain.
|
||||
*
|
||||
* Dependencies (add these to your Maven/Gradle project):
|
||||
*
|
||||
* - org.jsoup:jsoup:1.17.2 – HTML parser and HTTP client.
|
||||
* - com.fasterxml.jackson.core:jackson-databind:2.17.0 – JSON parsing.
|
||||
* - org.xerial:sqlite-jdbc:3.45.1.0 – SQLite JDBC driver.
|
||||
* - com.sun.mail:javax.mail:1.6.2 – JavaMail for email notifications (free).
|
||||
* - org.openpnp:opencv:4.9.0-0 (with native libraries) – OpenCV for image
|
||||
* processing and object detection.
|
||||
*
|
||||
* Before running this program you must ensure that the native OpenCV
|
||||
* binaries are on your library path (e.g. via -Djava.library.path).
|
||||
* Desktop notifications work out of the box on Windows, macOS, and Linux.
|
||||
* For email notifications, you need a Gmail account with an app password
|
||||
* (free, requires 2FA enabled). See https://support.google.com/accounts/answer/185833
|
||||
*
|
||||
* The scraper performs four major tasks:
|
||||
* 1. Discover all auctions located in the Netherlands.
|
||||
* 2. For each auction, fetch all lots (kavels) including images and
|
||||
* bidding information, and persist the data into SQLite tables.
|
||||
* 3. Monitor bidding and closing times on a schedule and send desktop/email
|
||||
* notifications when bids change or lots are about to expire.
|
||||
* 4. Run object detection on downloaded lot images to automatically
|
||||
* label objects using a YOLO model. The results are stored in the
|
||||
* database for later search.
|
||||
*/
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.microsoft.playwright.Browser;
|
||||
import com.microsoft.playwright.BrowserType;
|
||||
import com.microsoft.playwright.Page;
|
||||
import com.microsoft.playwright.Playwright;
|
||||
import com.microsoft.playwright.options.WaitUntilState;
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.net.http.HttpClient;
|
||||
import java.net.http.HttpRequest;
|
||||
import java.net.http.HttpResponse;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* Main scraper class. It encapsulates the logic for scraping auctions,
|
||||
* persisting data, scheduling updates, and performing object detection.
|
||||
*/
|
||||
public class TroostwijkScraper {
|
||||
|
||||
// Base URLs – adjust these if Troostwijk changes their site structure
|
||||
private static final String AUCTIONS_PAGE = "https://www.troostwijkauctions.com/auctions";
|
||||
private static final String LOT_API = "https://api.troostwijkauctions.com/lot/7/list";
|
||||
private static final String CACHE_DB_PATH = "cache/page_cache.db";
|
||||
private static final long CACHE_EXPIRATION_HOURS = 24;
|
||||
private static final int RATE_LIMIT_MS = 200;
|
||||
|
||||
// HTTP client used for API calls
|
||||
private final HttpClient httpClient;
|
||||
private final ObjectMapper objectMapper;
|
||||
public final DatabaseService db;
|
||||
private final NotificationService notifier;
|
||||
private final ObjectDetectionService detector;
|
||||
private final CacheDatabase cacheDb;
|
||||
private final boolean useCache;
|
||||
private Playwright playwright;
|
||||
private Browser browser;
|
||||
|
||||
/**
|
||||
* Constructor. Creates supporting services and ensures the database
|
||||
* tables exist.
|
||||
*
|
||||
* @param databasePath Path to SQLite database file
|
||||
* @param notificationConfig "desktop" for desktop only, or "smtp:user:pass:toEmail" for email
|
||||
* @param unused Unused parameter (kept for compatibility)
|
||||
* @param yoloCfgPath Path to YOLO configuration file
|
||||
* @param yoloWeightsPath Path to YOLO weights file
|
||||
* @param classNamesPath Path to file containing class names
|
||||
*/
|
||||
public TroostwijkScraper(String databasePath, String notificationConfig, String unused,
|
||||
String yoloCfgPath, String yoloWeightsPath, String classNamesPath) throws SQLException, IOException {
|
||||
this(databasePath, notificationConfig, unused, yoloCfgPath, yoloWeightsPath, classNamesPath, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor with cache control.
|
||||
*
|
||||
* @param databasePath Path to SQLite database file
|
||||
* @param notificationConfig "desktop" for desktop only, or "smtp:user:pass:toEmail" for email
|
||||
* @param unused Unused parameter (kept for compatibility)
|
||||
* @param yoloCfgPath Path to YOLO configuration file
|
||||
* @param yoloWeightsPath Path to YOLO weights file
|
||||
* @param classNamesPath Path to file containing class names
|
||||
* @param useCache Enable page caching
|
||||
*/
|
||||
public TroostwijkScraper(String databasePath, String notificationConfig, String unused,
|
||||
String yoloCfgPath, String yoloWeightsPath, String classNamesPath,
|
||||
boolean useCache) throws SQLException, IOException {
|
||||
this.httpClient = HttpClient.newHttpClient();
|
||||
this.objectMapper = new ObjectMapper();
|
||||
this.db = new DatabaseService(databasePath);
|
||||
this.notifier = new NotificationService(notificationConfig, unused);
|
||||
this.detector = new ObjectDetectionService(yoloCfgPath, yoloWeightsPath, classNamesPath);
|
||||
this.useCache = useCache;
|
||||
this.cacheDb = useCache ? new CacheDatabase(CACHE_DB_PATH) : null;
|
||||
|
||||
// initialize DB
|
||||
db.ensureSchema();
|
||||
if (useCache) {
|
||||
cacheDb.initialize();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes Playwright browser for JavaScript-rendered pages.
|
||||
* Call this before using discoverDutchAuctions().
|
||||
*/
|
||||
public void initializeBrowser() {
|
||||
if (playwright == null) {
|
||||
IO.println("Initializing Playwright browser...");
|
||||
this.playwright = Playwright.create();
|
||||
this.browser = playwright.chromium().launch(new BrowserType.LaunchOptions()
|
||||
.setHeadless(true)
|
||||
.setArgs(Arrays.asList("--no-sandbox", "--disable-setuid-sandbox")));
|
||||
IO.println("✓ Browser ready");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes browser and cache resources.
|
||||
*/
|
||||
public void close() {
|
||||
if (browser != null) {
|
||||
browser.close();
|
||||
browser = null;
|
||||
}
|
||||
if (playwright != null) {
|
||||
playwright.close();
|
||||
playwright = null;
|
||||
}
|
||||
if (cacheDb != null) {
|
||||
cacheDb.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Discovers all active Dutch auctions by crawling the auctions page.
|
||||
*
|
||||
* Uses Playwright to render JavaScript-heavy pages and extract auction data.
|
||||
* Supports caching to avoid unnecessary page fetches. Filters auctions whose
|
||||
* location contains ", NL" (indicating the Netherlands). Each auction link
|
||||
* contains a unique sale ID in the format A1-xxxxx or A7-xxxxx.
|
||||
*
|
||||
* Auctions are saved to the database and can be retrieved with getDutchAuctions().
|
||||
*
|
||||
* @return a list of sale identifiers for auctions located in NL (legacy compatibility)
|
||||
*/
|
||||
public List<Integer> discoverDutchAuctions() {
|
||||
Set<Integer> saleIds = new HashSet<>();
|
||||
|
||||
// Check if browser is initialized
|
||||
if (browser == null) {
|
||||
initializeBrowser();
|
||||
}
|
||||
|
||||
var pageNumber = 1;
|
||||
var hasMorePages = true;
|
||||
|
||||
IO.println("Starting Dutch auction discovery from " + AUCTIONS_PAGE);
|
||||
|
||||
while (hasMorePages) {
|
||||
IO.println("\n[Page " + pageNumber + "] Fetching auctions...");
|
||||
|
||||
// Check cache first
|
||||
var html = loadFromCache(pageNumber);
|
||||
|
||||
if (html != null) {
|
||||
IO.println(" ✓ Loaded from cache");
|
||||
} else {
|
||||
// Fetch with Playwright
|
||||
html = fetchPageWithPlaywright(pageNumber);
|
||||
|
||||
if (html == null || html.isEmpty()) {
|
||||
IO.println(" ⚠️ Failed to fetch page, stopping pagination");
|
||||
break;
|
||||
}
|
||||
|
||||
IO.println(" ✓ Fetched from website");
|
||||
|
||||
// Save to cache
|
||||
if (useCache) {
|
||||
saveToCache(pageNumber, html);
|
||||
}
|
||||
|
||||
// Rate limiting
|
||||
try {
|
||||
Thread.sleep(RATE_LIMIT_MS);
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Parse auctions from HTML (saves Dutch auctions to database)
|
||||
var foundOnPage = parseAuctionsFromHtml(html, saleIds);
|
||||
|
||||
if (foundOnPage == 0) {
|
||||
IO.println(" ⚠️ No Dutch auctions found on page, stopping pagination");
|
||||
hasMorePages = false;
|
||||
} else {
|
||||
IO.println(" ✓ Found " + foundOnPage + " Dutch auctions");
|
||||
pageNumber++;
|
||||
}
|
||||
}
|
||||
|
||||
IO.println("\n✓ Total Dutch auctions discovered: " + saleIds.size());
|
||||
return new ArrayList<>(saleIds);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Fetches a single page using Playwright
|
||||
*/
|
||||
private String fetchPageWithPlaywright(int pageNumber) {
|
||||
var url = pageNumber == 1
|
||||
? AUCTIONS_PAGE
|
||||
: AUCTIONS_PAGE + "?page=" + pageNumber;
|
||||
|
||||
try {
|
||||
var page = browser.newPage();
|
||||
|
||||
// Set user agent
|
||||
page.setExtraHTTPHeaders(Map.of(
|
||||
"User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
||||
));
|
||||
|
||||
// Navigate to page
|
||||
page.navigate(url, new Page.NavigateOptions()
|
||||
.setTimeout(30000)
|
||||
.setWaitUntil(WaitUntilState.NETWORKIDLE));
|
||||
|
||||
// Wait for auction listings to appear
|
||||
try {
|
||||
page.waitForSelector("a[href^='/a/']", new Page.WaitForSelectorOptions()
|
||||
.setTimeout(10000));
|
||||
} catch (Exception e) {
|
||||
// Continue even if selector not found
|
||||
IO.println(" ⚠️ Auction selector not found");
|
||||
}
|
||||
|
||||
// Get HTML content
|
||||
var html = page.content();
|
||||
page.close();
|
||||
|
||||
return html;
|
||||
|
||||
} catch (Exception e) {
|
||||
System.err.println(" ⚠️ Playwright error: " + e.getMessage());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses auctions from HTML using JSoup and saves Dutch auctions to database.
|
||||
* Uses proper HTML parsing instead of regex for more reliable extraction.
|
||||
* @return number of Dutch auctions found on this page
|
||||
*/
|
||||
private int parseAuctionsFromHtml(String html, Set<Integer> saleIds) {
|
||||
var foundCount = 0;
|
||||
|
||||
try {
|
||||
var doc = org.jsoup.Jsoup.parse(html);
|
||||
|
||||
// Find all auction links (format: /a/title-A1-12345 or /a/title-A7-12345)
|
||||
var auctionLinks = doc.select("a[href^='/a/']");
|
||||
|
||||
for (var link : auctionLinks) {
|
||||
var href = link.attr("href");
|
||||
|
||||
// Extract auction ID from URL
|
||||
var pattern = java.util.regex.Pattern.compile("/a/.*?-A([17])-(\\d+)");
|
||||
var matcher = pattern.matcher(href);
|
||||
|
||||
if (!matcher.find()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
var typeNum = matcher.group(1);
|
||||
var auctionId = Integer.parseInt(matcher.group(2));
|
||||
|
||||
// Skip duplicates
|
||||
if (saleIds.contains(auctionId)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Extract auction info using JSoup
|
||||
var auction = extractAuctionInfo(link, href, auctionId, "A" + typeNum);
|
||||
|
||||
// Only keep Dutch auctions
|
||||
if (auction != null && "NL".equals(auction.country)) {
|
||||
saleIds.add(auctionId);
|
||||
foundCount++;
|
||||
|
||||
// Save to database
|
||||
try {
|
||||
db.upsertAuction(auction);
|
||||
IO.println(" Found Dutch auction: " + auctionId + " - " + auction.title + " (" + auction.location + ")");
|
||||
} catch (SQLException e) {
|
||||
System.err.println(" Failed to save auction: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
System.err.println(" Error parsing HTML: " + e.getMessage());
|
||||
}
|
||||
|
||||
return foundCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts auction information from a link element using JSoup
|
||||
* This method intelligently parses the HTML structure to extract:
|
||||
* - Title
|
||||
* - Location (city and country)
|
||||
* - Lot count (if available)
|
||||
*/
|
||||
private AuctionInfo extractAuctionInfo(org.jsoup.nodes.Element link, String href, int auctionId, String type) {
|
||||
var auction = new AuctionInfo();
|
||||
auction.auctionId = auctionId;
|
||||
auction.type = type;
|
||||
auction.url = "https://www.troostwijkauctions.com" + href;
|
||||
|
||||
// Extract title from href (convert kebab-case to title)
|
||||
var titlePattern = java.util.regex.Pattern.compile("/a/(.+?)-A[17]-");
|
||||
var titleMatcher = titlePattern.matcher(href);
|
||||
if (titleMatcher.find()) {
|
||||
var slug = titleMatcher.group(1);
|
||||
auction.title = slug.replace("-", " ");
|
||||
// Capitalize first letter
|
||||
if (!auction.title.isEmpty()) {
|
||||
auction.title = auction.title.substring(0, 1).toUpperCase() + auction.title.substring(1);
|
||||
}
|
||||
} else {
|
||||
auction.title = "Unknown Auction";
|
||||
}
|
||||
|
||||
// Try to find title in link text (more accurate)
|
||||
var linkText = link.text();
|
||||
if (!linkText.isEmpty() && !linkText.matches(".*\\d+.*")) {
|
||||
// If link text doesn't contain numbers, it's likely the title
|
||||
var parts = linkText.split(",|\\d+");
|
||||
if (parts.length > 0 && parts[0].trim().length() > 5) {
|
||||
auction.title = parts[0].trim();
|
||||
}
|
||||
}
|
||||
|
||||
// Extract location using JSoup selectors
|
||||
// Look for <p> tags that contain location info
|
||||
var locationElements = link.select("p");
|
||||
for (var p : locationElements) {
|
||||
var text = p.text();
|
||||
|
||||
// Pattern: "City, Country" or "City, Region, Country"
|
||||
if (text.matches(".*[A-Z]{2}$")) {
|
||||
// Ends with 2-letter country code
|
||||
var countryCode = text.substring(text.length() - 2);
|
||||
var cityPart = text.substring(0, text.length() - 2).trim();
|
||||
|
||||
// Remove trailing comma or whitespace
|
||||
cityPart = cityPart.replaceAll("[,\\s]+$", "");
|
||||
|
||||
auction.country = countryCode;
|
||||
auction.city = cityPart;
|
||||
auction.location = cityPart + ", " + countryCode;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: check HTML content directly
|
||||
if (auction.country == null) {
|
||||
var html = link.html();
|
||||
var locPattern = java.util.regex.Pattern.compile(
|
||||
"([A-Za-z][A-Za-z\\s,\\-']+?)\\s*(?:<!--.*?-->)?\\s*</span>\\s*([A-Z]{2})(?![A-Za-z])");
|
||||
var locMatcher = locPattern.matcher(html);
|
||||
|
||||
if (locMatcher.find()) {
|
||||
var city = locMatcher.group(1).trim().replaceAll(",$", "");
|
||||
var country = locMatcher.group(2);
|
||||
auction.city = city;
|
||||
auction.country = country;
|
||||
auction.location = city + ", " + country;
|
||||
}
|
||||
}
|
||||
|
||||
// Extract lot count if available (kavels/lots)
|
||||
var textElements = link.select("*");
|
||||
for (var elem : textElements) {
|
||||
var text = elem.ownText();
|
||||
if (text.matches("\\d+\\s+(?:kavel|lot|item)s?.*")) {
|
||||
var countPattern = java.util.regex.Pattern.compile("(\\d+)");
|
||||
var countMatcher = countPattern.matcher(text);
|
||||
if (countMatcher.find()) {
|
||||
auction.lotCount = Integer.parseInt(countMatcher.group(1));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return auction;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads cached HTML for a page
|
||||
*/
|
||||
private String loadFromCache(int pageNumber) {
|
||||
if (!useCache || cacheDb == null) return null;
|
||||
|
||||
var url = pageNumber == 1
|
||||
? AUCTIONS_PAGE
|
||||
: AUCTIONS_PAGE + "?page=" + pageNumber;
|
||||
|
||||
return cacheDb.get(url);
|
||||
}
|
||||
|
||||
/**
|
||||
* Saves HTML to cache
|
||||
*/
|
||||
private void saveToCache(int pageNumber, String html) {
|
||||
if (!useCache || cacheDb == null) return;
|
||||
|
||||
var url = pageNumber == 1
|
||||
? AUCTIONS_PAGE
|
||||
: AUCTIONS_PAGE + "?page=" + pageNumber;
|
||||
|
||||
cacheDb.put(url, html, CACHE_EXPIRATION_HOURS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves all lots for a given sale ID using Troostwijk's internal JSON
|
||||
* API. The API accepts parameters such as batchSize, offset, and saleID.
|
||||
* A large batchSize returns many lots at once. We loop until no further
|
||||
* results are returned. Each JSON result is mapped to our Lot domain
|
||||
* object and persisted to the database.
|
||||
*
|
||||
* @param saleId the sale identifier
|
||||
*/
|
||||
public void fetchLotsForSale(int saleId) {
|
||||
var batchSize = 200;
|
||||
var offset = 0;
|
||||
var more = true;
|
||||
var totalLots = 0;
|
||||
|
||||
while (more) {
|
||||
try {
|
||||
var url = LOT_API + "?batchSize=" + batchSize
|
||||
+ "&listType=7&offset=" + offset
|
||||
+ "&sortOption=0&saleID=" + saleId
|
||||
+ "&parentID=0&relationID=0&buildversion=201807311";
|
||||
|
||||
IO.println(" Fetching lots from API (offset=" + offset + ")...");
|
||||
|
||||
var request = HttpRequest.newBuilder()
|
||||
.uri(URI.create(url))
|
||||
.header("Accept", "application/json")
|
||||
.header("User-Agent", "Mozilla/5.0")
|
||||
.GET()
|
||||
.build();
|
||||
|
||||
var response = httpClient.send(request, HttpResponse.BodyHandlers.ofString());
|
||||
|
||||
if (response.statusCode() != 200) {
|
||||
System.err.println(" ⚠️ API call failed for sale " + saleId);
|
||||
System.err.println(" Status: " + response.statusCode());
|
||||
System.err.println(" Response: " + response.body().substring(0, Math.min(200, response.body().length())));
|
||||
break;
|
||||
}
|
||||
|
||||
var root = objectMapper.readTree(response.body());
|
||||
var results = root.path("results");
|
||||
|
||||
if (!results.isArray() || results.isEmpty()) {
|
||||
if (offset == 0) {
|
||||
IO.println(" ⚠️ No lots found for sale " + saleId);
|
||||
IO.println(" API Response: " + response.body().substring(0, Math.min(500, response.body().length())));
|
||||
}
|
||||
more = false;
|
||||
break;
|
||||
}
|
||||
var lotsInBatch = results.size();
|
||||
IO.println(" Found " + lotsInBatch + " lots in this batch");
|
||||
|
||||
for (var node : results) {
|
||||
var lot = new Lot();
|
||||
lot.saleId = saleId;
|
||||
lot.lotId = node.path("lotID").asInt();
|
||||
lot.title = node.path("t").asText();
|
||||
lot.description = node.path("d").asText();
|
||||
lot.manufacturer = node.path("mf").asText();
|
||||
lot.type = node.path("typ").asText();
|
||||
lot.year = node.path("yb").asInt();
|
||||
lot.category = node.path("lc").asText();
|
||||
// Current bid; field names may differ (e.g. currentBid or cb)
|
||||
lot.currentBid = node.path("cb").asDouble();
|
||||
lot.currency = node.path("cu").asText();
|
||||
lot.url = "https://www.troostwijkauctions.com/nl" + node.path("url").asText();
|
||||
|
||||
// Save basic lot info into DB
|
||||
db.upsertLot(lot);
|
||||
totalLots++;
|
||||
|
||||
// Download images and perform object detection
|
||||
List<String> imageUrls = new ArrayList<>();
|
||||
var imgs = node.path("imgs");
|
||||
if (imgs.isArray()) {
|
||||
for (var imgNode : imgs) {
|
||||
var imgUrl = imgNode.asText();
|
||||
imageUrls.add(imgUrl);
|
||||
}
|
||||
}
|
||||
|
||||
// Download and analyze images (optional, can be slow)
|
||||
for (var imgUrl : imageUrls) {
|
||||
var fileName = downloadImage(imgUrl, saleId, lot.lotId);
|
||||
if (fileName != null) {
|
||||
// run object detection once per image
|
||||
var labels = detector.detectObjects(fileName);
|
||||
db.insertImage(lot.lotId, imgUrl, fileName, labels);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
IO.println(" ✓ Processed " + totalLots + " lots so far");
|
||||
offset += batchSize;
|
||||
} catch (IOException | InterruptedException e) {
|
||||
System.err.println("Error fetching lots for sale " + saleId + ": " + e.getMessage());
|
||||
more = false;
|
||||
} catch (SQLException e) {
|
||||
System.err.println("Database error: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Downloads an image from the given URL to a local directory. Images
|
||||
* are stored under "images/<saleId>/<lotId>/" to keep them organised.
|
||||
*
|
||||
* @param imageUrl remote image URL
|
||||
* @param saleId sale identifier
|
||||
* @param lotId lot identifier
|
||||
* @return absolute path to saved file or null on failure
|
||||
*/
|
||||
private String downloadImage(String imageUrl, int saleId, int lotId) {
|
||||
try {
|
||||
var request = HttpRequest.newBuilder()
|
||||
.uri(URI.create(imageUrl))
|
||||
.GET()
|
||||
.build();
|
||||
var response = httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream());
|
||||
if (response.statusCode() == 200) {
|
||||
var dir = Paths.get("images", String.valueOf(saleId), String.valueOf(lotId));
|
||||
Files.createDirectories(dir);
|
||||
var fileName = Paths.get(imageUrl).getFileName().toString();
|
||||
var dest = dir.resolve(fileName);
|
||||
Files.copy(response.body(), dest);
|
||||
return dest.toAbsolutePath().toString();
|
||||
}
|
||||
} catch (IOException | InterruptedException e) {
|
||||
System.err.println("Failed to download image " + imageUrl + ": " + e.getMessage());
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Schedules periodic monitoring of all lots. The scheduler runs every
|
||||
* hour to refresh current bids and closing times. For lots that
|
||||
* are within 30 minutes of closing, it increases the polling frequency
|
||||
* automatically. When a new bid is detected or a lot is about to
|
||||
* expire, a Pushover notification is sent to the configured user.
|
||||
* Note: In production, ensure proper shutdown handling for the scheduler.
|
||||
*/
|
||||
public void scheduleMonitoring() {
|
||||
var scheduler = Executors.newScheduledThreadPool(1);
|
||||
scheduler.scheduleAtFixedRate(() -> {
|
||||
try {
|
||||
var activeLots = db.getActiveLots();
|
||||
for (var lot : activeLots) {
|
||||
// refresh the lot's bidding information via API
|
||||
refreshLotBid(lot);
|
||||
// check closing time to adjust monitoring
|
||||
var minutesLeft = lot.minutesUntilClose();
|
||||
if (minutesLeft < 30) {
|
||||
// send warning when within 5 minutes
|
||||
if (minutesLeft <= 5 && !lot.closingNotified) {
|
||||
notifier.sendNotification("Kavel " + lot.lotId + " sluit binnen " + minutesLeft + " min.",
|
||||
"Lot nearing closure", 1);
|
||||
lot.closingNotified = true;
|
||||
db.updateLotNotificationFlags(lot);
|
||||
}
|
||||
// schedule additional quick check for this lot
|
||||
scheduler.schedule(() -> refreshLotBid(lot), 5, TimeUnit.MINUTES);
|
||||
}
|
||||
}
|
||||
} catch (SQLException e) {
|
||||
System.err.println("Error during scheduled monitoring: " + e.getMessage());
|
||||
}
|
||||
}, 0, 1, TimeUnit.HOURS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Refreshes the bid for a single lot and sends notification if it has
|
||||
* changed since the last check. The method calls the same API used for
|
||||
* initial scraping but only extracts the current bid for the given lot.
|
||||
*
|
||||
* @param lot the lot to refresh
|
||||
*/
|
||||
private void refreshLotBid(Lot lot) {
|
||||
try {
|
||||
var url = LOT_API + "?batchSize=1&listType=7&offset=0&sortOption=0&saleID=" + lot.saleId
|
||||
+ "&parentID=0&relationID=0&buildversion=201807311&lotID=" + lot.lotId;
|
||||
var request = HttpRequest.newBuilder().uri(URI.create(url)).GET().build();
|
||||
var response = httpClient.send(request, HttpResponse.BodyHandlers.ofString());
|
||||
if (response.statusCode() != 200) return;
|
||||
var root = objectMapper.readTree(response.body());
|
||||
var results = root.path("results");
|
||||
if (results.isArray() && !results.isEmpty()) {
|
||||
var node = results.get(0);
|
||||
var newBid = node.path("cb").asDouble();
|
||||
if (Double.compare(newBid, lot.currentBid) > 0) {
|
||||
var previous = lot.currentBid;
|
||||
lot.currentBid = newBid;
|
||||
db.updateLotCurrentBid(lot);
|
||||
var msg = String.format("Nieuw bod op kavel %d: €%.2f (was €%.2f)", lot.lotId, newBid, previous);
|
||||
notifier.sendNotification(msg, "Kavel bieding update", 0);
|
||||
}
|
||||
}
|
||||
} catch (IOException | InterruptedException | SQLException e) {
|
||||
System.err.println("Failed to refresh bid for lot " + lot.lotId + ": " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Prints statistics about the data in the database.
|
||||
*/
|
||||
public void printDatabaseStats() {
|
||||
try {
|
||||
var allLots = db.getAllLots();
|
||||
var imageCount = db.getImageCount();
|
||||
|
||||
IO.println(" Total lots in database: " + allLots.size());
|
||||
IO.println(" Total images downloaded: " + imageCount);
|
||||
|
||||
if (!allLots.isEmpty()) {
|
||||
var totalBids = allLots.stream().mapToDouble(l -> l.currentBid).sum();
|
||||
IO.println(" Total current bids: €" + String.format("%.2f", totalBids));
|
||||
}
|
||||
} catch (SQLException e) {
|
||||
System.err.println(" ⚠️ Could not retrieve database stats: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Domain classes and services
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
}
|
||||
@@ -30,171 +30,6 @@ public class AuctionParsingTest {
|
||||
System.out.println("Loaded test HTML (" + testHtml.length() + " characters)");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseAuctionsFromTestHtml() {
|
||||
// Parse the HTML with JSoup
|
||||
Document doc = Jsoup.parse(testHtml);
|
||||
|
||||
// Find all auction links
|
||||
Elements auctionLinks = doc.select("a[href^='/a/']");
|
||||
|
||||
System.out.println("\n=== Auction Parsing Test ===");
|
||||
System.out.println("Found " + auctionLinks.size() + " auction links");
|
||||
|
||||
List<AuctionInfo> auctions = new ArrayList<>();
|
||||
int count = 0;
|
||||
|
||||
for (Element link : auctionLinks) {
|
||||
String href = link.attr("href");
|
||||
|
||||
// Extract auction ID from URL
|
||||
java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("/a/.*?-A([17])-(\\d+)");
|
||||
java.util.regex.Matcher matcher = pattern.matcher(href);
|
||||
|
||||
if (!matcher.find()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
String typeNum = matcher.group(1);
|
||||
int auctionId = Integer.parseInt(matcher.group(2));
|
||||
|
||||
// Extract auction info using IMPROVED text-based method
|
||||
AuctionInfo auction = extractAuctionInfoFromText(link, href, auctionId, "A" + typeNum);
|
||||
auctions.add(auction);
|
||||
|
||||
// Print the first 10 auctions for verification
|
||||
if (count < 10) {
|
||||
System.out.println("\n--- Auction #" + (count + 1) + " ---");
|
||||
System.out.println("ID: " + auction.auctionId);
|
||||
System.out.println("Type: " + auction.type);
|
||||
System.out.println("Title: " + auction.title);
|
||||
System.out.println("Location: " + auction.location);
|
||||
System.out.println("City: " + auction.city);
|
||||
System.out.println("Country: " + auction.country);
|
||||
System.out.println("Lot Count: " + auction.lotCount);
|
||||
System.out.println("URL: " + auction.url);
|
||||
|
||||
// Print ALL visible text for debugging
|
||||
System.out.println("\nAll visible text from link:");
|
||||
System.out.println("\"" + link.text() + "\"");
|
||||
}
|
||||
|
||||
count++;
|
||||
}
|
||||
|
||||
System.out.println("\n=== Summary ===");
|
||||
System.out.println("Total auctions parsed: " + auctions.size());
|
||||
|
||||
// Count by country
|
||||
long nlCount = auctions.stream().filter(a -> "NL".equals(a.country)).count();
|
||||
long bgCount = auctions.stream().filter(a -> "BG".equals(a.country)).count();
|
||||
long deCount = auctions.stream().filter(a -> "DE".equals(a.country)).count();
|
||||
long beCount = auctions.stream().filter(a -> "BE".equals(a.country)).count();
|
||||
|
||||
System.out.println("Dutch (NL) auctions: " + nlCount);
|
||||
System.out.println("Bulgarian (BG) auctions: " + bgCount);
|
||||
System.out.println("German (DE) auctions: " + deCount);
|
||||
System.out.println("Belgian (BE) auctions: " + beCount);
|
||||
System.out.println("Unknown location: " + auctions.stream().filter(a -> a.country == null).count());
|
||||
|
||||
// Assertions
|
||||
assertTrue(auctions.size() > 0, "Should find at least one auction");
|
||||
|
||||
// Verify all auctions have basic info
|
||||
for (AuctionInfo auction : auctions) {
|
||||
assertNotNull(auction.title, "Title should not be null for auction " + auction.auctionId);
|
||||
assertTrue(auction.title.length() > 0, "Title should not be empty for auction " + auction.auctionId);
|
||||
assertNotNull(auction.url, "URL should not be null for auction " + auction.auctionId);
|
||||
assertTrue(auction.auctionId > 0, "Auction ID should be positive");
|
||||
assertNotNull(auction.location, "Location should not be null for auction " + auction.auctionId);
|
||||
assertNotNull(auction.country, "Country should not be null for auction " + auction.auctionId);
|
||||
assertTrue(auction.lotCount > 0, "Lot count should be positive for auction " + auction.auctionId);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* IMPROVED: Extract auction info using .text() method
|
||||
* This parses the human-readable text instead of HTML markup
|
||||
*
|
||||
* Expected format: "[day] om [time] [lot_count] [title] [city], [CC]"
|
||||
* Example: "woensdag om 18:00 1 Vrachtwagens voor bedrijfsvoertuigen Loßburg, DE"
|
||||
*/
|
||||
private AuctionInfo extractAuctionInfoFromText(Element link, String href, int auctionId, String type) {
|
||||
AuctionInfo auction = new AuctionInfo();
|
||||
auction.auctionId = auctionId;
|
||||
auction.type = type;
|
||||
auction.url = "https://www.troostwijkauctions.com" + href;
|
||||
|
||||
// Get ALL visible text from the link (this removes all HTML tags)
|
||||
String allText = link.text().trim();
|
||||
|
||||
// Pattern: "[day] om [time] [lot_count] [title] [city], [CC]"
|
||||
// Example: "woensdag om 18:00 1 Vrachtwagens voor bedrijfsvoertuigen Loßburg, DE"
|
||||
|
||||
// Step 1: Extract closing time (day + time)
|
||||
java.util.regex.Pattern timePattern = java.util.regex.Pattern.compile(
|
||||
"(\\w+)\\s+om\\s+(\\d{1,2}:\\d{2})"
|
||||
);
|
||||
java.util.regex.Matcher timeMatcher = timePattern.matcher(allText);
|
||||
|
||||
String remainingText = allText;
|
||||
if (timeMatcher.find()) {
|
||||
String day = timeMatcher.group(1); // e.g., "woensdag"
|
||||
String time = timeMatcher.group(2); // e.g., "18:00"
|
||||
|
||||
// Store closing time info (could be parsed to LocalDateTime with proper date)
|
||||
System.out.println(" Closing time: " + day + " om " + time);
|
||||
|
||||
// Remove the time part from text
|
||||
remainingText = allText.substring(timeMatcher.end()).trim();
|
||||
}
|
||||
|
||||
// Step 2: Extract location from the END (always ends with ", CC")
|
||||
java.util.regex.Pattern locPattern = java.util.regex.Pattern.compile(
|
||||
"([A-ZÀ-ÿa-z][A-ZÀ-ÿa-z\\s\\-'öäüßàèéêëïôùûç]+?),\\s*([A-Z]{2})\\s*$"
|
||||
);
|
||||
java.util.regex.Matcher locMatcher = locPattern.matcher(remainingText);
|
||||
|
||||
if (locMatcher.find()) {
|
||||
auction.city = locMatcher.group(1).trim();
|
||||
auction.country = locMatcher.group(2);
|
||||
auction.location = auction.city + ", " + auction.country;
|
||||
|
||||
// Remove location from end
|
||||
remainingText = remainingText.substring(0, locMatcher.start()).trim();
|
||||
}
|
||||
|
||||
// Step 3: Extract lot count (first number after time)
|
||||
java.util.regex.Pattern lotPattern = java.util.regex.Pattern.compile(
|
||||
"^(\\d+)\\s+"
|
||||
);
|
||||
java.util.regex.Matcher lotMatcher = lotPattern.matcher(remainingText);
|
||||
|
||||
if (lotMatcher.find()) {
|
||||
auction.lotCount = Integer.parseInt(lotMatcher.group(1));
|
||||
|
||||
// Remove lot count from beginning
|
||||
remainingText = remainingText.substring(lotMatcher.end()).trim();
|
||||
}
|
||||
|
||||
// Step 4: What remains is the title
|
||||
if (!remainingText.isEmpty()) {
|
||||
auction.title = remainingText;
|
||||
} else {
|
||||
// Fallback: use URL slug for title
|
||||
java.util.regex.Pattern titlePattern = java.util.regex.Pattern.compile("/a/(.+?)-A[17]-");
|
||||
java.util.regex.Matcher titleMatcher = titlePattern.matcher(href);
|
||||
if (titleMatcher.find()) {
|
||||
String slug = titleMatcher.group(1).replace("-", " ").replace("%7C", "|");
|
||||
auction.title = slug.substring(0, 1).toUpperCase() + slug.substring(1);
|
||||
} else {
|
||||
auction.title = "Unknown Auction";
|
||||
}
|
||||
}
|
||||
|
||||
return auction;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLocationPatternMatching() {
|
||||
System.out.println("\n=== Location Pattern Tests ===");
|
||||
|
||||
@@ -1,12 +1,8 @@
|
||||
package com.auction;
|
||||
|
||||
import com.vladsch.flexmark.html2md.converter.FlexmarkHtmlConverter;
|
||||
import com.vladsch.flexmark.util.data.DataHolder;
|
||||
import net.bytebuddy.build.Plugin.Engine.Source.Element;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import org.junit.jupiter.api.extension.Extensions;
|
||||
public class Parser {
|
||||
|
||||
public record AuctionItem(
|
||||
|
||||
BIN
troostwijk.db
BIN
troostwijk.db
Binary file not shown.
Reference in New Issue
Block a user