start
This commit is contained in:
8
pom.xml
8
pom.xml
@@ -14,8 +14,8 @@
|
|||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
<maven.compiler.source>21</maven.compiler.source>
|
<maven.compiler.source>25</maven.compiler.source>
|
||||||
<maven.compiler.target>21</maven.compiler.target>
|
<maven.compiler.target>25</maven.compiler.target>
|
||||||
<jackson.version>2.17.0</jackson.version>
|
<jackson.version>2.17.0</jackson.version>
|
||||||
<opencv.version>4.9.0-0</opencv.version>
|
<opencv.version>4.9.0-0</opencv.version>
|
||||||
</properties>
|
</properties>
|
||||||
@@ -93,8 +93,8 @@
|
|||||||
<artifactId>maven-compiler-plugin</artifactId>
|
<artifactId>maven-compiler-plugin</artifactId>
|
||||||
<version>3.11.0</version>
|
<version>3.11.0</version>
|
||||||
<configuration>
|
<configuration>
|
||||||
<source>21</source>
|
<source>25</source>
|
||||||
<target>21</target>
|
<target>25</target>
|
||||||
</configuration>
|
</configuration>
|
||||||
</plugin>
|
</plugin>
|
||||||
|
|
||||||
|
|||||||
24
src/main/java/com/auction/AuctionInfo.java
Normal file
24
src/main/java/com/auction/AuctionInfo.java
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
package com.auction;
|
||||||
|
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
/**
|
||||||
|
* Represents auction metadata (veiling informatie)
|
||||||
|
*/
|
||||||
|
public final class AuctionInfo {
|
||||||
|
|
||||||
|
public int auctionId; // Unique auction ID (from URL)
|
||||||
|
public String title; // Auction title
|
||||||
|
public String location; // Location (e.g., "Amsterdam, NL")
|
||||||
|
public String city; // City name
|
||||||
|
public String country; // Country code (e.g., "NL")
|
||||||
|
public String url; // Full auction URL
|
||||||
|
public String type; // Auction type (A1 or A7)
|
||||||
|
public int lotCount; // Number of lots/kavels
|
||||||
|
public LocalDateTime closingTime; // Closing time if available
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return String.format("Auction{id=%d, type=%s, title='%s', location='%s', lots=%d, url='%s'}",
|
||||||
|
auctionId, type, title, location, lotCount, url);
|
||||||
|
}
|
||||||
|
}
|
||||||
165
src/main/java/com/auction/CacheDatabase.java
Normal file
165
src/main/java/com/auction/CacheDatabase.java
Normal file
@@ -0,0 +1,165 @@
|
|||||||
|
package com.auction;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.DriverManager;
|
||||||
|
import java.sql.PreparedStatement;
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.sql.Statement;
|
||||||
|
import java.time.Instant;
|
||||||
|
/**
|
||||||
|
* SQLite-based caching system for HTML pages with expiration support
|
||||||
|
*/
|
||||||
|
class CacheDatabase {
|
||||||
|
|
||||||
|
private final String dbPath;
|
||||||
|
private Connection connection;
|
||||||
|
|
||||||
|
public CacheDatabase(String dbPath) {
|
||||||
|
this.dbPath = dbPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize database and create schema
|
||||||
|
*/
|
||||||
|
public void initialize() throws SQLException, IOException {
|
||||||
|
// Create cache directory if it doesn't exist
|
||||||
|
var cacheDir = Paths.get(dbPath).getParent();
|
||||||
|
if (cacheDir != null) {
|
||||||
|
Files.createDirectories(cacheDir);
|
||||||
|
}
|
||||||
|
|
||||||
|
connection = DriverManager.getConnection("jdbc:sqlite:" + dbPath);
|
||||||
|
|
||||||
|
// Create cache table with URL as primary key
|
||||||
|
var createTable = "CREATE TABLE IF NOT EXISTS page_cache (\n" +
|
||||||
|
" url TEXT PRIMARY KEY,\n" +
|
||||||
|
" html TEXT NOT NULL,\n" +
|
||||||
|
" cached_at INTEGER NOT NULL,\n" +
|
||||||
|
" expires_at INTEGER NOT NULL\n" +
|
||||||
|
")\n";
|
||||||
|
|
||||||
|
try (var stmt = connection.createStatement()) {
|
||||||
|
stmt.execute(createTable);
|
||||||
|
// Create index on expires_at for efficient cleanup
|
||||||
|
stmt.execute("CREATE INDEX IF NOT EXISTS idx_expires_at ON page_cache(expires_at)");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up expired entries on initialization
|
||||||
|
cleanupExpired();
|
||||||
|
|
||||||
|
System.out.println("✓ Cache database initialized");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get cached HTML for a URL if it exists and hasn't expired
|
||||||
|
*
|
||||||
|
* @param url The URL to look up
|
||||||
|
* @return Cached HTML or null if not found/expired
|
||||||
|
*/
|
||||||
|
public synchronized String get(String url) {
|
||||||
|
var sql = "SELECT html FROM page_cache WHERE url = ? AND expires_at > ?";
|
||||||
|
|
||||||
|
try (var ps = connection.prepareStatement(sql)) {
|
||||||
|
ps.setString(1, url);
|
||||||
|
ps.setLong(2, Instant.now().getEpochSecond());
|
||||||
|
|
||||||
|
var rs = ps.executeQuery();
|
||||||
|
if (rs.next()) {
|
||||||
|
return rs.getString("html");
|
||||||
|
}
|
||||||
|
} catch (SQLException e) {
|
||||||
|
System.err.println("Cache read error: " + e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Store HTML in cache with expiration time
|
||||||
|
*
|
||||||
|
* @param url The URL to cache
|
||||||
|
* @param html The HTML content
|
||||||
|
* @param expirationHours Hours until cache expires
|
||||||
|
*/
|
||||||
|
public synchronized void put(String url, String html, long expirationHours) {
|
||||||
|
var sql = "INSERT OR REPLACE INTO page_cache (url, html, cached_at, expires_at)\n" +
|
||||||
|
"VALUES (?, ?, ?, ?)\n";
|
||||||
|
|
||||||
|
var now = Instant.now().getEpochSecond();
|
||||||
|
var expiresAt = now + (expirationHours * 3600);
|
||||||
|
|
||||||
|
try (var ps = connection.prepareStatement(sql)) {
|
||||||
|
ps.setString(1, url);
|
||||||
|
ps.setString(2, html);
|
||||||
|
ps.setLong(3, now);
|
||||||
|
ps.setLong(4, expiresAt);
|
||||||
|
ps.executeUpdate();
|
||||||
|
} catch (SQLException e) {
|
||||||
|
System.err.println("Cache write error: " + e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove expired cache entries
|
||||||
|
*/
|
||||||
|
public synchronized void cleanupExpired() {
|
||||||
|
var sql = "DELETE FROM page_cache WHERE expires_at <= ?";
|
||||||
|
|
||||||
|
try (var ps = connection.prepareStatement(sql)) {
|
||||||
|
ps.setLong(1, Instant.now().getEpochSecond());
|
||||||
|
var deleted = ps.executeUpdate();
|
||||||
|
if (deleted > 0) {
|
||||||
|
System.out.println("✓ Cleaned up " + deleted + " expired cache entries");
|
||||||
|
}
|
||||||
|
} catch (SQLException e) {
|
||||||
|
System.err.println("Cache cleanup error: " + e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get cache statistics
|
||||||
|
*/
|
||||||
|
public synchronized void printStats() {
|
||||||
|
var sql = "SELECT COUNT(*) as total, " +
|
||||||
|
"SUM(CASE WHEN expires_at > ? THEN 1 ELSE 0 END) as valid, " +
|
||||||
|
"SUM(LENGTH(html)) as total_size " +
|
||||||
|
"FROM page_cache";
|
||||||
|
|
||||||
|
try (var ps = connection.prepareStatement(sql)) {
|
||||||
|
ps.setLong(1, Instant.now().getEpochSecond());
|
||||||
|
var rs = ps.executeQuery();
|
||||||
|
|
||||||
|
if (rs.next()) {
|
||||||
|
var total = rs.getInt("total");
|
||||||
|
var valid = rs.getInt("valid");
|
||||||
|
var size = rs.getLong("total_size");
|
||||||
|
|
||||||
|
System.out.println("\n=== Cache Statistics ===");
|
||||||
|
System.out.println("Total entries: " + total);
|
||||||
|
System.out.println("Valid entries: " + valid);
|
||||||
|
System.out.println("Expired entries: " + (total - valid));
|
||||||
|
System.out.println("Total size: " + (size / 1024) + " KB");
|
||||||
|
}
|
||||||
|
} catch (SQLException e) {
|
||||||
|
System.err.println("Cache stats error: " + e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close database connection
|
||||||
|
*/
|
||||||
|
public void close() {
|
||||||
|
if (connection != null) {
|
||||||
|
try {
|
||||||
|
connection.close();
|
||||||
|
} catch (SQLException e) {
|
||||||
|
System.err.println("Error closing cache database: " + e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
303
src/main/java/com/auction/DatabaseService.java
Normal file
303
src/main/java/com/auction/DatabaseService.java
Normal file
@@ -0,0 +1,303 @@
|
|||||||
|
package com.auction;
|
||||||
|
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.DriverManager;
|
||||||
|
import java.sql.PreparedStatement;
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.sql.Statement;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
/**
|
||||||
|
* Service for persisting auctions, lots, images, and object labels into
|
||||||
|
* a SQLite database. Uses the Xerial JDBC driver which connects to
|
||||||
|
* SQLite via a URL of the form "jdbc:sqlite:path_to_file"【329850066306528†L40-L63】.
|
||||||
|
*/
|
||||||
|
public class DatabaseService {
|
||||||
|
|
||||||
|
private final String url;
|
||||||
|
DatabaseService(String dbPath) {
|
||||||
|
this.url = "jdbc:sqlite:" + dbPath;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Creates tables if they do not already exist. The schema includes
|
||||||
|
* tables for auctions, lots, images, and object labels. This method is
|
||||||
|
* idempotent; it can be called multiple times.
|
||||||
|
*/
|
||||||
|
void ensureSchema() throws SQLException {
|
||||||
|
try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) {
|
||||||
|
// Auctions table (veilingen)
|
||||||
|
stmt.execute("CREATE TABLE IF NOT EXISTS auctions ("
|
||||||
|
+ "auction_id INTEGER PRIMARY KEY,"
|
||||||
|
+ "title TEXT NOT NULL,"
|
||||||
|
+ "location TEXT,"
|
||||||
|
+ "city TEXT,"
|
||||||
|
+ "country TEXT,"
|
||||||
|
+ "url TEXT NOT NULL,"
|
||||||
|
+ "type TEXT,"
|
||||||
|
+ "lot_count INTEGER DEFAULT 0,"
|
||||||
|
+ "closing_time TEXT,"
|
||||||
|
+ "discovered_at INTEGER" // Unix timestamp
|
||||||
|
+ ")");
|
||||||
|
|
||||||
|
// Sales table (legacy - keep for compatibility)
|
||||||
|
stmt.execute("CREATE TABLE IF NOT EXISTS sales ("
|
||||||
|
+ "sale_id INTEGER PRIMARY KEY,"
|
||||||
|
+ "title TEXT,"
|
||||||
|
+ "location TEXT,"
|
||||||
|
+ "closing_time TEXT"
|
||||||
|
+ ")");
|
||||||
|
|
||||||
|
// Lots table
|
||||||
|
stmt.execute("CREATE TABLE IF NOT EXISTS lots ("
|
||||||
|
+ "lot_id INTEGER PRIMARY KEY,"
|
||||||
|
+ "sale_id INTEGER,"
|
||||||
|
+ "title TEXT,"
|
||||||
|
+ "description TEXT,"
|
||||||
|
+ "manufacturer TEXT,"
|
||||||
|
+ "type TEXT,"
|
||||||
|
+ "year INTEGER,"
|
||||||
|
+ "category TEXT,"
|
||||||
|
+ "current_bid REAL,"
|
||||||
|
+ "currency TEXT,"
|
||||||
|
+ "url TEXT,"
|
||||||
|
+ "closing_time TEXT,"
|
||||||
|
+ "closing_notified INTEGER DEFAULT 0,"
|
||||||
|
+ "FOREIGN KEY (sale_id) REFERENCES auctions(auction_id)"
|
||||||
|
+ ")");
|
||||||
|
|
||||||
|
// Images table
|
||||||
|
stmt.execute("CREATE TABLE IF NOT EXISTS images ("
|
||||||
|
+ "id INTEGER PRIMARY KEY AUTOINCREMENT,"
|
||||||
|
+ "lot_id INTEGER,"
|
||||||
|
+ "url TEXT,"
|
||||||
|
+ "file_path TEXT,"
|
||||||
|
+ "labels TEXT,"
|
||||||
|
+ "FOREIGN KEY (lot_id) REFERENCES lots(lot_id)"
|
||||||
|
+ ")");
|
||||||
|
|
||||||
|
// Create indexes for better query performance
|
||||||
|
stmt.execute("CREATE INDEX IF NOT EXISTS idx_auctions_country ON auctions(country)");
|
||||||
|
stmt.execute("CREATE INDEX IF NOT EXISTS idx_lots_sale_id ON lots(sale_id)");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Inserts or updates an auction record
|
||||||
|
*/
|
||||||
|
synchronized void upsertAuction(AuctionInfo auction) throws SQLException {
|
||||||
|
var sql = "INSERT INTO auctions (auction_id, title, location, city, country, url, type, lot_count, closing_time, discovered_at)"
|
||||||
|
+ " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
|
||||||
|
+ " ON CONFLICT(auction_id) DO UPDATE SET "
|
||||||
|
+ "title = excluded.title, location = excluded.location, city = excluded.city, "
|
||||||
|
+ "country = excluded.country, url = excluded.url, type = excluded.type, "
|
||||||
|
+ "lot_count = excluded.lot_count, closing_time = excluded.closing_time";
|
||||||
|
|
||||||
|
try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(sql)) {
|
||||||
|
ps.setInt(1, auction.auctionId);
|
||||||
|
ps.setString(2, auction.title);
|
||||||
|
ps.setString(3, auction.location);
|
||||||
|
ps.setString(4, auction.city);
|
||||||
|
ps.setString(5, auction.country);
|
||||||
|
ps.setString(6, auction.url);
|
||||||
|
ps.setString(7, auction.type);
|
||||||
|
ps.setInt(8, auction.lotCount);
|
||||||
|
ps.setString(9, auction.closingTime != null ? auction.closingTime.toString() : null);
|
||||||
|
ps.setLong(10, Instant.now().getEpochSecond());
|
||||||
|
ps.executeUpdate();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves all auctions from the database
|
||||||
|
*/
|
||||||
|
synchronized List<AuctionInfo> getAllAuctions() throws SQLException {
|
||||||
|
List<AuctionInfo> auctions = new ArrayList<>();
|
||||||
|
var sql = "SELECT auction_id, title, location, city, country, url, type, lot_count, closing_time FROM auctions";
|
||||||
|
|
||||||
|
try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) {
|
||||||
|
var rs = stmt.executeQuery(sql);
|
||||||
|
while (rs.next()) {
|
||||||
|
var auction = new AuctionInfo();
|
||||||
|
auction.auctionId = rs.getInt("auction_id");
|
||||||
|
auction.title = rs.getString("title");
|
||||||
|
auction.location = rs.getString("location");
|
||||||
|
auction.city = rs.getString("city");
|
||||||
|
auction.country = rs.getString("country");
|
||||||
|
auction.url = rs.getString("url");
|
||||||
|
auction.type = rs.getString("type");
|
||||||
|
auction.lotCount = rs.getInt("lot_count");
|
||||||
|
var closing = rs.getString("closing_time");
|
||||||
|
if (closing != null) {
|
||||||
|
auction.closingTime = LocalDateTime.parse(closing);
|
||||||
|
}
|
||||||
|
auctions.add(auction);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return auctions;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves auctions by country code
|
||||||
|
*/
|
||||||
|
synchronized List<AuctionInfo> getAuctionsByCountry(String countryCode) throws SQLException {
|
||||||
|
List<AuctionInfo> auctions = new ArrayList<>();
|
||||||
|
var sql = "SELECT auction_id, title, location, city, country, url, type, lot_count, closing_time "
|
||||||
|
+ "FROM auctions WHERE country = ?";
|
||||||
|
|
||||||
|
try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(sql)) {
|
||||||
|
ps.setString(1, countryCode);
|
||||||
|
var rs = ps.executeQuery();
|
||||||
|
while (rs.next()) {
|
||||||
|
var auction = new AuctionInfo();
|
||||||
|
auction.auctionId = rs.getInt("auction_id");
|
||||||
|
auction.title = rs.getString("title");
|
||||||
|
auction.location = rs.getString("location");
|
||||||
|
auction.city = rs.getString("city");
|
||||||
|
auction.country = rs.getString("country");
|
||||||
|
auction.url = rs.getString("url");
|
||||||
|
auction.type = rs.getString("type");
|
||||||
|
auction.lotCount = rs.getInt("lot_count");
|
||||||
|
var closing = rs.getString("closing_time");
|
||||||
|
if (closing != null) {
|
||||||
|
auction.closingTime = LocalDateTime.parse(closing);
|
||||||
|
}
|
||||||
|
auctions.add(auction);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return auctions;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Inserts or updates a lot record. Uses INSERT OR REPLACE to
|
||||||
|
* implement upsert semantics so that existing rows are replaced.
|
||||||
|
*/
|
||||||
|
synchronized void upsertLot(Lot lot) throws SQLException {
|
||||||
|
var sql = "INSERT INTO lots (lot_id, sale_id, title, description, manufacturer, type, year, category, current_bid, currency, url, closing_time, closing_notified)"
|
||||||
|
+ " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
|
||||||
|
+ " ON CONFLICT(lot_id) DO UPDATE SET "
|
||||||
|
+ "sale_id = excluded.sale_id, title = excluded.title, description = excluded.description, "
|
||||||
|
+ "manufacturer = excluded.manufacturer, type = excluded.type, year = excluded.year, category = excluded.category, "
|
||||||
|
+ "current_bid = excluded.current_bid, currency = excluded.currency, url = excluded.url, closing_time = excluded.closing_time";
|
||||||
|
try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(sql)) {
|
||||||
|
ps.setInt(1, lot.lotId);
|
||||||
|
ps.setInt(2, lot.saleId);
|
||||||
|
ps.setString(3, lot.title);
|
||||||
|
ps.setString(4, lot.description);
|
||||||
|
ps.setString(5, lot.manufacturer);
|
||||||
|
ps.setString(6, lot.type);
|
||||||
|
ps.setInt(7, lot.year);
|
||||||
|
ps.setString(8, lot.category);
|
||||||
|
ps.setDouble(9, lot.currentBid);
|
||||||
|
ps.setString(10, lot.currency);
|
||||||
|
ps.setString(11, lot.url);
|
||||||
|
ps.setString(12, lot.closingTime != null ? lot.closingTime.toString() : null);
|
||||||
|
ps.setInt(13, lot.closingNotified ? 1 : 0);
|
||||||
|
ps.executeUpdate();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Inserts a new image record. Each image is associated with a lot and
|
||||||
|
* stores both the original URL and the local file path. Detected
|
||||||
|
* labels are stored as a comma separated string.
|
||||||
|
*/
|
||||||
|
synchronized void insertImage(int lotId, String url, String filePath, List<String> labels) throws SQLException {
|
||||||
|
var sql = "INSERT INTO images (lot_id, url, file_path, labels) VALUES (?, ?, ?, ?)";
|
||||||
|
try (var conn = DriverManager.getConnection(this.url); var ps = conn.prepareStatement(sql)) {
|
||||||
|
ps.setInt(1, lotId);
|
||||||
|
ps.setString(2, url);
|
||||||
|
ps.setString(3, filePath);
|
||||||
|
ps.setString(4, String.join(",", labels));
|
||||||
|
ps.executeUpdate();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves all lots that are still active (i.e., have a closing time
|
||||||
|
* in the future or unknown). Only these lots need to be monitored.
|
||||||
|
*/
|
||||||
|
synchronized List<Lot> getActiveLots() throws SQLException {
|
||||||
|
List<Lot> list = new ArrayList<>();
|
||||||
|
var sql = "SELECT lot_id, sale_id, current_bid, currency, closing_time, closing_notified FROM lots";
|
||||||
|
try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) {
|
||||||
|
var rs = stmt.executeQuery(sql);
|
||||||
|
while (rs.next()) {
|
||||||
|
var lot = new Lot();
|
||||||
|
lot.lotId = rs.getInt("lot_id");
|
||||||
|
lot.saleId = rs.getInt("sale_id");
|
||||||
|
lot.currentBid = rs.getDouble("current_bid");
|
||||||
|
lot.currency = rs.getString("currency");
|
||||||
|
var closing = rs.getString("closing_time");
|
||||||
|
lot.closingNotified = rs.getInt("closing_notified") != 0;
|
||||||
|
if (closing != null) {
|
||||||
|
lot.closingTime = LocalDateTime.parse(closing);
|
||||||
|
}
|
||||||
|
list.add(lot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return list;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves all lots from the database.
|
||||||
|
*/
|
||||||
|
synchronized List<Lot> getAllLots() throws SQLException {
|
||||||
|
List<Lot> list = new ArrayList<>();
|
||||||
|
var sql = "SELECT lot_id, sale_id, title, current_bid, currency FROM lots";
|
||||||
|
try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) {
|
||||||
|
var rs = stmt.executeQuery(sql);
|
||||||
|
while (rs.next()) {
|
||||||
|
var lot = new Lot();
|
||||||
|
lot.lotId = rs.getInt("lot_id");
|
||||||
|
lot.saleId = rs.getInt("sale_id");
|
||||||
|
lot.title = rs.getString("title");
|
||||||
|
lot.currentBid = rs.getDouble("current_bid");
|
||||||
|
lot.currency = rs.getString("currency");
|
||||||
|
list.add(lot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return list;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the total number of images in the database.
|
||||||
|
*/
|
||||||
|
synchronized int getImageCount() throws SQLException {
|
||||||
|
var sql = "SELECT COUNT(*) as count FROM images";
|
||||||
|
try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) {
|
||||||
|
var rs = stmt.executeQuery(sql);
|
||||||
|
if (rs.next()) {
|
||||||
|
return rs.getInt("count");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Updates the current bid of a lot after a bid refresh.
|
||||||
|
*/
|
||||||
|
synchronized void updateLotCurrentBid(Lot lot) throws SQLException {
|
||||||
|
try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(
|
||||||
|
"UPDATE lots SET current_bid = ? WHERE lot_id = ?")) {
|
||||||
|
ps.setDouble(1, lot.currentBid);
|
||||||
|
ps.setInt(2, lot.lotId);
|
||||||
|
ps.executeUpdate();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Updates the closingNotified flag of a lot (set to 1 when we have
|
||||||
|
* warned the user about its imminent closure).
|
||||||
|
*/
|
||||||
|
synchronized void updateLotNotificationFlags(Lot lot) throws SQLException {
|
||||||
|
try (var conn = DriverManager.getConnection(url); var ps = conn.prepareStatement(
|
||||||
|
"UPDATE lots SET closing_notified = ? WHERE lot_id = ?")) {
|
||||||
|
ps.setInt(1, lot.closingNotified ? 1 : 0);
|
||||||
|
ps.setInt(2, lot.lotId);
|
||||||
|
ps.executeUpdate();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
29
src/main/java/com/auction/Lot.java
Normal file
29
src/main/java/com/auction/Lot.java
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
package com.auction;
|
||||||
|
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
/**
|
||||||
|
* Simple POJO representing a lot (kavel) in an auction. It keeps track
|
||||||
|
* of the sale it belongs to, current bid and closing time. The method
|
||||||
|
* minutesUntilClose computes how many minutes remain until the lot closes.
|
||||||
|
*/
|
||||||
|
final class Lot {
|
||||||
|
|
||||||
|
int saleId;
|
||||||
|
int lotId;
|
||||||
|
String title;
|
||||||
|
String description;
|
||||||
|
String manufacturer;
|
||||||
|
String type;
|
||||||
|
int year;
|
||||||
|
String category;
|
||||||
|
double currentBid;
|
||||||
|
String currency;
|
||||||
|
String url;
|
||||||
|
LocalDateTime closingTime; // null if unknown
|
||||||
|
boolean closingNotified;
|
||||||
|
|
||||||
|
long minutesUntilClose() {
|
||||||
|
if (closingTime == null) return Long.MAX_VALUE;
|
||||||
|
return java.time.Duration.between(LocalDateTime.now(), closingTime).toMinutes();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,23 +1,82 @@
|
|||||||
package com.auction;
|
package com.auction;
|
||||||
|
|
||||||
|
import org.opencv.core.Core;
|
||||||
|
import java.util.List;
|
||||||
public class Main {
|
public class Main {
|
||||||
public static void main(String[] args) {
|
public static void main2(String[] args) {
|
||||||
// If arguments are passed, this is likely a one-off command via dokku run
|
// If arguments are passed, this is likely a one-off command via dokku run
|
||||||
// Just exit immediately to allow the command to run
|
// Just exit immediately to allow the command to run
|
||||||
if (args.length > 0) {
|
if (args.length > 0) {
|
||||||
System.out.println("Command mode - exiting to allow shell commands");
|
IO.println("Command mode - exiting to allow shell commands");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
System.out.println("Starting Troostwijk Auction Scraper...");
|
IO.println("Starting Troostwijk Auction Scraper...");
|
||||||
System.out.println("Container is running and healthy.");
|
IO.println("Container is running and healthy.");
|
||||||
|
|
||||||
// Keep container alive
|
// Keep container alive
|
||||||
try {
|
try {
|
||||||
Thread.sleep(Long.MAX_VALUE);
|
Thread.sleep(Long.MAX_VALUE);
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
Thread.currentThread().interrupt();
|
Thread.currentThread().interrupt();
|
||||||
System.out.println("Container interrupted, exiting.");
|
IO.println("Container interrupted, exiting.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Entry point. Configure database location, notification settings, and
|
||||||
|
* YOLO model paths here before running. Once started the scraper
|
||||||
|
* discovers Dutch auctions, scrapes lots, and begins monitoring.
|
||||||
|
*/
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
IO.println("=== Troostwijk Auction Scraper ===\n");
|
||||||
|
|
||||||
|
// Configuration parameters (replace with your own values)
|
||||||
|
String databaseFile = "troostwijk.db";
|
||||||
|
|
||||||
|
// Notification configuration - choose one:
|
||||||
|
// Option 1: Desktop notifications only (free, no setup required)
|
||||||
|
String notificationConfig = System.getenv().getOrDefault("NOTIFICATION_CONFIG", "desktop");
|
||||||
|
|
||||||
|
// Option 2: Desktop + Email via Gmail (free, requires Gmail app password)
|
||||||
|
// Format: "smtp:username:appPassword:toEmail"
|
||||||
|
// Example: "smtp:your.email@gmail.com:abcd1234efgh5678:recipient@example.com"
|
||||||
|
// Get app password: Google Account > Security > 2-Step Verification > App passwords
|
||||||
|
|
||||||
|
// YOLO model paths (optional - scraper works without object detection)
|
||||||
|
String yoloCfg = "models/yolov4.cfg";
|
||||||
|
String yoloWeights = "models/yolov4.weights";
|
||||||
|
String yoloClasses = "models/coco.names";
|
||||||
|
|
||||||
|
// Load native OpenCV library
|
||||||
|
System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
|
||||||
|
|
||||||
|
IO.println("Initializing scraper...");
|
||||||
|
TroostwijkScraper scraper = new TroostwijkScraper(databaseFile, notificationConfig, "",
|
||||||
|
yoloCfg, yoloWeights, yoloClasses);
|
||||||
|
|
||||||
|
// Step 1: Discover auctions in NL
|
||||||
|
IO.println("\n[1/3] Discovering Dutch auctions...");
|
||||||
|
List<Integer> auctions = scraper.discoverDutchAuctions();
|
||||||
|
IO.println("✓ Found " + auctions.size() + " auctions: " + auctions);
|
||||||
|
|
||||||
|
// Step 2: Fetch lots for each auction
|
||||||
|
IO.println("\n[2/3] Fetching lot details...");
|
||||||
|
int totalAuctions = auctions.size();
|
||||||
|
int currentAuction = 0;
|
||||||
|
for (int saleId : auctions) {
|
||||||
|
currentAuction++;
|
||||||
|
IO.println(" [Page " + currentAuction + "] Fetching auctions...");
|
||||||
|
IO.println(" [" + currentAuction + "/" + totalAuctions + "] Processing sale " + saleId + "...");
|
||||||
|
scraper.fetchLotsForSale(saleId);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Show database summary
|
||||||
|
IO.println("\n📊 Database Summary:");
|
||||||
|
scraper.printDatabaseStats();
|
||||||
|
|
||||||
|
// Step 3: Start monitoring bids and closures
|
||||||
|
IO.println("\n[3/3] Starting monitoring service...");
|
||||||
|
scraper.scheduleMonitoring();
|
||||||
|
IO.println("✓ Monitoring active. Press Ctrl+C to stop.\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
156
src/main/java/com/auction/NotificationService.java
Normal file
156
src/main/java/com/auction/NotificationService.java
Normal file
@@ -0,0 +1,156 @@
|
|||||||
|
package com.auction;
|
||||||
|
|
||||||
|
import javax.mail.Authenticator;
|
||||||
|
import javax.mail.Message.RecipientType;
|
||||||
|
import javax.mail.PasswordAuthentication;
|
||||||
|
import javax.mail.Session;
|
||||||
|
import javax.mail.Transport;
|
||||||
|
import javax.mail.internet.InternetAddress;
|
||||||
|
import javax.mail.internet.MimeMessage;
|
||||||
|
import java.awt.SystemTray;
|
||||||
|
import java.awt.Toolkit;
|
||||||
|
import java.awt.TrayIcon;
|
||||||
|
import java.awt.TrayIcon.MessageType;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.Properties;
|
||||||
|
/**
|
||||||
|
* Service for sending notifications via desktop notifications and/or email.
|
||||||
|
* Supports free notification methods:
|
||||||
|
* 1. Desktop notifications (Windows/Linux/macOS system tray)
|
||||||
|
* 2. Email via Gmail SMTP (free, requires app password)
|
||||||
|
*
|
||||||
|
* Configuration:
|
||||||
|
* - For email: Set notificationEmail to your Gmail address
|
||||||
|
* - Enable 2FA in Gmail and create an App Password
|
||||||
|
* - Use format "smtp:username:appPassword:toEmail" for credentials
|
||||||
|
* - Or use "desktop" for desktop-only notifications
|
||||||
|
*/
|
||||||
|
class NotificationService {
|
||||||
|
|
||||||
|
private final boolean useDesktop;
|
||||||
|
private final boolean useEmail;
|
||||||
|
private final String smtpUsername;
|
||||||
|
private final String smtpPassword;
|
||||||
|
private final String toEmail;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a notification service.
|
||||||
|
*
|
||||||
|
* @param config "desktop" for desktop only, or "smtp:username:password:toEmail" for email
|
||||||
|
* @param unusedParam Kept for compatibility (can pass empty string)
|
||||||
|
*/
|
||||||
|
NotificationService(String config, String unusedParam) {
|
||||||
|
|
||||||
|
if ("desktop".equalsIgnoreCase(config)) {
|
||||||
|
this.useDesktop = true;
|
||||||
|
this.useEmail = false;
|
||||||
|
this.smtpUsername = null;
|
||||||
|
this.smtpPassword = null;
|
||||||
|
this.toEmail = null;
|
||||||
|
} else if (config.startsWith("smtp:")) {
|
||||||
|
var parts = config.split(":", 4);
|
||||||
|
if (parts.length != 4) {
|
||||||
|
throw new IllegalArgumentException("Email config must be 'smtp:username:password:toEmail'");
|
||||||
|
}
|
||||||
|
this.useDesktop = true; // Always include desktop
|
||||||
|
this.useEmail = true;
|
||||||
|
this.smtpUsername = parts[1];
|
||||||
|
this.smtpPassword = parts[2];
|
||||||
|
this.toEmail = parts[3];
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException("Config must be 'desktop' or 'smtp:username:password:toEmail'");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sends notification via configured channels.
|
||||||
|
*
|
||||||
|
* @param message The message body
|
||||||
|
* @param title Message title
|
||||||
|
* @param priority Priority level (0=normal, 1=high)
|
||||||
|
*/
|
||||||
|
void sendNotification(String message, String title, int priority) {
|
||||||
|
if (useDesktop) {
|
||||||
|
sendDesktopNotification(title, message, priority);
|
||||||
|
}
|
||||||
|
if (useEmail) {
|
||||||
|
sendEmailNotification(title, message, priority);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sends a desktop notification using system tray.
|
||||||
|
* Works on Windows, macOS, and Linux with desktop environments.
|
||||||
|
*/
|
||||||
|
private void sendDesktopNotification(String title, String message, int priority) {
|
||||||
|
try {
|
||||||
|
if (SystemTray.isSupported()) {
|
||||||
|
var tray = SystemTray.getSystemTray();
|
||||||
|
var image = Toolkit.getDefaultToolkit()
|
||||||
|
.createImage(new byte[0]); // Empty image
|
||||||
|
|
||||||
|
var trayIcon = new TrayIcon(image, "Troostwijk Scraper");
|
||||||
|
trayIcon.setImageAutoSize(true);
|
||||||
|
|
||||||
|
var messageType = priority > 0
|
||||||
|
? MessageType.WARNING
|
||||||
|
: MessageType.INFO;
|
||||||
|
|
||||||
|
tray.add(trayIcon);
|
||||||
|
trayIcon.displayMessage(title, message, messageType);
|
||||||
|
|
||||||
|
// Remove icon after 2 seconds to avoid clutter
|
||||||
|
Thread.sleep(2000);
|
||||||
|
tray.remove(trayIcon);
|
||||||
|
|
||||||
|
IO.println("Desktop notification sent: " + title);
|
||||||
|
} else {
|
||||||
|
IO.println("Desktop notifications not supported, logging: " + title + " - " + message);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("Desktop notification failed: " + e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sends email notification via Gmail SMTP (free).
|
||||||
|
* Uses Gmail's SMTP server with app password authentication.
|
||||||
|
*/
|
||||||
|
private void sendEmailNotification(String title, String message, int priority) {
|
||||||
|
try {
|
||||||
|
var props = new Properties();
|
||||||
|
props.put("mail.smtp.auth", "true");
|
||||||
|
props.put("mail.smtp.starttls.enable", "true");
|
||||||
|
props.put("mail.smtp.host", "smtp.gmail.com");
|
||||||
|
props.put("mail.smtp.port", "587");
|
||||||
|
props.put("mail.smtp.ssl.trust", "smtp.gmail.com");
|
||||||
|
|
||||||
|
var session = Session.getInstance(props,
|
||||||
|
new Authenticator() {
|
||||||
|
|
||||||
|
protected PasswordAuthentication getPasswordAuthentication() {
|
||||||
|
return new PasswordAuthentication(smtpUsername, smtpPassword);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
var msg = new MimeMessage(session);
|
||||||
|
msg.setFrom(new InternetAddress(smtpUsername));
|
||||||
|
msg.setRecipients(RecipientType.TO,
|
||||||
|
InternetAddress.parse(toEmail));
|
||||||
|
msg.setSubject("[Troostwijk] " + title);
|
||||||
|
msg.setText(message);
|
||||||
|
msg.setSentDate(new Date());
|
||||||
|
|
||||||
|
if (priority > 0) {
|
||||||
|
msg.setHeader("X-Priority", "1");
|
||||||
|
msg.setHeader("Importance", "High");
|
||||||
|
}
|
||||||
|
|
||||||
|
Transport.send(msg);
|
||||||
|
IO.println("Email notification sent: " + title);
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("Email notification failed: " + e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
140
src/main/java/com/auction/ObjectDetectionService.java
Normal file
140
src/main/java/com/auction/ObjectDetectionService.java
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
package com.auction;
|
||||||
|
|
||||||
|
import org.opencv.core.Mat;
|
||||||
|
import org.opencv.core.Scalar;
|
||||||
|
import org.opencv.core.Size;
|
||||||
|
import org.opencv.dnn.Dnn;
|
||||||
|
import org.opencv.dnn.Net;
|
||||||
|
import org.opencv.imgcodecs.Imgcodecs;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import static org.opencv.dnn.Dnn.DNN_BACKEND_OPENCV;
|
||||||
|
import static org.opencv.dnn.Dnn.DNN_TARGET_CPU;
|
||||||
|
/**
|
||||||
|
* Service for performing object detection on images using OpenCV's DNN
|
||||||
|
* module. The DNN module can load pre‑trained models from several
|
||||||
|
* frameworks (Darknet, TensorFlow, ONNX, etc.)【784097309529506†L209-L233】. Here
|
||||||
|
* we load a YOLO model (Darknet) by specifying the configuration and
|
||||||
|
* weights files. For each image we run a forward pass and return a
|
||||||
|
* list of detected class labels.
|
||||||
|
*
|
||||||
|
* If model files are not found, the service operates in disabled mode
|
||||||
|
* and returns empty lists.
|
||||||
|
*/
|
||||||
|
class ObjectDetectionService {
|
||||||
|
|
||||||
|
private final Net net;
|
||||||
|
private final List<String> classNames;
|
||||||
|
private final boolean enabled;
|
||||||
|
|
||||||
|
ObjectDetectionService(String cfgPath, String weightsPath, String classNamesPath) throws IOException {
|
||||||
|
// Check if model files exist
|
||||||
|
var cfgFile = Paths.get(cfgPath);
|
||||||
|
var weightsFile = Paths.get(weightsPath);
|
||||||
|
var classNamesFile = Paths.get(classNamesPath);
|
||||||
|
|
||||||
|
if (!Files.exists(cfgFile) || !Files.exists(weightsFile) || !Files.exists(classNamesFile)) {
|
||||||
|
IO.println("⚠️ Object detection disabled: YOLO model files not found");
|
||||||
|
IO.println(" Expected files:");
|
||||||
|
IO.println(" - " + cfgPath);
|
||||||
|
IO.println(" - " + weightsPath);
|
||||||
|
IO.println(" - " + classNamesPath);
|
||||||
|
IO.println(" Scraper will continue without image analysis.");
|
||||||
|
this.enabled = false;
|
||||||
|
this.net = null;
|
||||||
|
this.classNames = new ArrayList<>();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Load network
|
||||||
|
this.net = Dnn.readNetFromDarknet(cfgPath, weightsPath);
|
||||||
|
this.net.setPreferableBackend(DNN_BACKEND_OPENCV);
|
||||||
|
this.net.setPreferableTarget(DNN_TARGET_CPU);
|
||||||
|
// Load class names (one per line)
|
||||||
|
this.classNames = Files.readAllLines(classNamesFile);
|
||||||
|
this.enabled = true;
|
||||||
|
IO.println("✓ Object detection enabled with YOLO");
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("⚠️ Object detection disabled: " + e.getMessage());
|
||||||
|
throw new IOException("Failed to initialize object detection", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Detects objects in the given image file and returns a list of
|
||||||
|
* human‑readable labels. Only detections above a confidence
|
||||||
|
* threshold are returned. For brevity this method omits drawing
|
||||||
|
* bounding boxes. See the OpenCV DNN documentation for details on
|
||||||
|
* post‑processing【784097309529506†L324-L344】.
|
||||||
|
*
|
||||||
|
* @param imagePath absolute path to the image
|
||||||
|
* @return list of detected class names (empty if detection disabled)
|
||||||
|
*/
|
||||||
|
List<String> detectObjects(String imagePath) {
|
||||||
|
if (!enabled) {
|
||||||
|
return new ArrayList<>();
|
||||||
|
}
|
||||||
|
|
||||||
|
List<String> labels = new ArrayList<>();
|
||||||
|
var image = Imgcodecs.imread(imagePath);
|
||||||
|
if (image.empty()) return labels;
|
||||||
|
// Create a 4D blob from the image
|
||||||
|
var blob = Dnn.blobFromImage(image, 1.0 / 255.0, new Size(416, 416), new Scalar(0, 0, 0), true, false);
|
||||||
|
net.setInput(blob);
|
||||||
|
List<Mat> outs = new ArrayList<>();
|
||||||
|
var outNames = getOutputLayerNames(net);
|
||||||
|
net.forward(outs, outNames);
|
||||||
|
// Post‑process: for each detection compute score and choose class
|
||||||
|
var confThreshold = 0.5f;
|
||||||
|
for (var out : outs) {
|
||||||
|
for (var i = 0; i < out.rows(); i++) {
|
||||||
|
var data = out.get(i, 0);
|
||||||
|
if (data == null) continue;
|
||||||
|
// The first 5 numbers are bounding box, then class scores
|
||||||
|
var scores = new double[classNames.size()];
|
||||||
|
System.arraycopy(data, 5, scores, 0, scores.length);
|
||||||
|
var classId = argMax(scores);
|
||||||
|
var confidence = scores[classId];
|
||||||
|
if (confidence > confThreshold) {
|
||||||
|
var label = classNames.get(classId);
|
||||||
|
if (!labels.contains(label)) {
|
||||||
|
labels.add(label);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return labels;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Returns the indexes of the output layers in the network. YOLO
|
||||||
|
* automatically discovers its output layers; other models may require
|
||||||
|
* manually specifying them【784097309529506†L356-L365】.
|
||||||
|
*/
|
||||||
|
private List<String> getOutputLayerNames(Net net) {
|
||||||
|
List<String> names = new ArrayList<>();
|
||||||
|
var outLayers = net.getUnconnectedOutLayers().toList();
|
||||||
|
var layersNames = net.getLayerNames();
|
||||||
|
for (var i : outLayers) {
|
||||||
|
names.add(layersNames.get(i - 1));
|
||||||
|
}
|
||||||
|
return names;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Returns the index of the maximum value in the array.
|
||||||
|
*/
|
||||||
|
private int argMax(double[] array) {
|
||||||
|
var best = 0;
|
||||||
|
var max = array[0];
|
||||||
|
for (var i = 1; i < array.length; i++) {
|
||||||
|
if (array[i] > max) {
|
||||||
|
max = array[i];
|
||||||
|
best = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return best;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,645 +0,0 @@
|
|||||||
package com.auction;
|
|
||||||
|
|
||||||
import com.microsoft.playwright.*;
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
import com.microsoft.playwright.options.WaitUntilState;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.nio.file.*;
|
|
||||||
import java.sql.*;
|
|
||||||
import java.time.Instant;
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* TroostwijkAuctionExtractor
|
|
||||||
*
|
|
||||||
* Extracts auction listings from https://www.troostwijkauctions.com/auctions
|
|
||||||
* using Playwright for Java (headless browser automation).
|
|
||||||
*
|
|
||||||
* Features:
|
|
||||||
* - Uses Playwright for Java to load JavaScript-rendered content
|
|
||||||
* - Iterates through all pages of auction listings
|
|
||||||
* - Rate limiting: 200ms between each page request
|
|
||||||
* - Caches visited pages in SQLite database with expiration times
|
|
||||||
* - Extracts auction metadata: ID, title, location, URL
|
|
||||||
*
|
|
||||||
* Dependencies (Maven):
|
|
||||||
* <dependency>
|
|
||||||
* <groupId>com.microsoft.playwright</groupId>
|
|
||||||
* <artifactId>playwright</artifactId>
|
|
||||||
* <version>1.40.0</version>
|
|
||||||
* </dependency>
|
|
||||||
* <dependency>
|
|
||||||
* <groupId>com.fasterxml.jackson.core</groupId>
|
|
||||||
* <artifactId>jackson-databind</artifactId>
|
|
||||||
* <version>2.17.0</version>
|
|
||||||
* </dependency>
|
|
||||||
* <dependency>
|
|
||||||
* <groupId>org.xerial</groupId>
|
|
||||||
* <artifactId>sqlite-jdbc</artifactId>
|
|
||||||
* <version>3.45.1.0</version>
|
|
||||||
* </dependency>
|
|
||||||
*
|
|
||||||
* After adding dependency, run: mvn exec:java -e -D exec.mainClass=com.microsoft.playwright.CLI -D exec.args="install"
|
|
||||||
* This downloads the browser binaries needed by Playwright.
|
|
||||||
*/
|
|
||||||
public class TroostwijkAuctionExtractor {
|
|
||||||
|
|
||||||
private static final String AUCTIONS_BASE_URL = "https://www.troostwijkauctions.com/auctions";
|
|
||||||
private static final int RATE_LIMIT_MS = 200;
|
|
||||||
private static final String CACHE_DB_PATH = "cache/page_cache.db";
|
|
||||||
private static final long CACHE_EXPIRATION_HOURS = 24; // Cache expires after 24 hours
|
|
||||||
|
|
||||||
private final ObjectMapper objectMapper;
|
|
||||||
private final boolean useCache;
|
|
||||||
private final CacheDatabase cacheDb;
|
|
||||||
private final int maxPageVisits; // Maximum number of pages to fetch (0 = unlimited)
|
|
||||||
private int pageVisitCount; // Counter for actual page fetches (not from cache)
|
|
||||||
private Playwright playwright;
|
|
||||||
private Browser browser;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Represents an auction listing
|
|
||||||
*/
|
|
||||||
public static class Auction {
|
|
||||||
public int id;
|
|
||||||
public String title;
|
|
||||||
public String location;
|
|
||||||
public String url;
|
|
||||||
public String type; // e.g. "A1" or "A7"
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return String.format("Auction{id=%d, type=%s, title='%s', location='%s', url='%s'}",
|
|
||||||
id, type, title, location, url);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructor
|
|
||||||
*
|
|
||||||
* @param useCache Enable database caching of visited pages
|
|
||||||
* @param maxPageVisits Maximum number of actual page fetches (0 = unlimited)
|
|
||||||
*/
|
|
||||||
public TroostwijkAuctionExtractor(boolean useCache, int maxPageVisits) throws SQLException, IOException {
|
|
||||||
this.objectMapper = new ObjectMapper();
|
|
||||||
this.useCache = useCache;
|
|
||||||
this.maxPageVisits = maxPageVisits;
|
|
||||||
this.pageVisitCount = 0;
|
|
||||||
this.cacheDb = useCache ? new CacheDatabase(CACHE_DB_PATH) : null;
|
|
||||||
|
|
||||||
if (useCache) {
|
|
||||||
cacheDb.initialize();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructor with default unlimited page visits
|
|
||||||
*
|
|
||||||
* @param useCache Enable database caching of visited pages
|
|
||||||
*/
|
|
||||||
public TroostwijkAuctionExtractor(boolean useCache) throws SQLException, IOException {
|
|
||||||
this(useCache, 0); // 0 = unlimited
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initializes Playwright and browser instance
|
|
||||||
* Call this before extracting auctions
|
|
||||||
*/
|
|
||||||
public void initialize() {
|
|
||||||
System.out.println("Initializing Playwright browser...");
|
|
||||||
this.playwright = Playwright.create();
|
|
||||||
this.browser = playwright.chromium().launch(new BrowserType.LaunchOptions()
|
|
||||||
.setHeadless(true)
|
|
||||||
.setArgs(Arrays.asList("--no-sandbox", "--disable-setuid-sandbox")));
|
|
||||||
System.out.println("✓ Browser ready");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Closes browser and Playwright instance
|
|
||||||
* Call this when done extracting
|
|
||||||
*/
|
|
||||||
public void close() {
|
|
||||||
if (browser != null) {
|
|
||||||
browser.close();
|
|
||||||
}
|
|
||||||
if (playwright != null) {
|
|
||||||
playwright.close();
|
|
||||||
}
|
|
||||||
if (cacheDb != null) {
|
|
||||||
cacheDb.close();
|
|
||||||
}
|
|
||||||
System.out.println("✓ Browser and cache closed");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Extracts all auctions from all pages
|
|
||||||
*
|
|
||||||
* @return List of all discovered auctions
|
|
||||||
*/
|
|
||||||
public List<Auction> extractAllAuctions() throws InterruptedException {
|
|
||||||
if (browser == null) {
|
|
||||||
throw new IllegalStateException("Browser not initialized. Call initialize() first.");
|
|
||||||
}
|
|
||||||
|
|
||||||
List<Auction> allAuctions = new ArrayList<>();
|
|
||||||
int pageNumber = 1;
|
|
||||||
boolean hasMorePages = true;
|
|
||||||
|
|
||||||
System.out.println("Starting auction extraction from " + AUCTIONS_BASE_URL);
|
|
||||||
|
|
||||||
while (hasMorePages) {
|
|
||||||
System.out.println("\n[Page " + pageNumber + "] Fetching auctions...");
|
|
||||||
|
|
||||||
// Check cache first
|
|
||||||
String cachedHtml = loadFromCache(pageNumber);
|
|
||||||
String html;
|
|
||||||
|
|
||||||
if (cachedHtml != null) {
|
|
||||||
System.out.println(" ✓ Loaded from cache");
|
|
||||||
html = cachedHtml;
|
|
||||||
} else {
|
|
||||||
// Check if we've reached the maximum page visit limit
|
|
||||||
if (maxPageVisits > 0 && pageVisitCount >= maxPageVisits) {
|
|
||||||
System.out.println(" ⚠️ Reached maximum page visit limit (" + maxPageVisits + "), stopping");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fetch with Playwright
|
|
||||||
html = fetchPageWithPlaywright(pageNumber);
|
|
||||||
pageVisitCount++; // Increment actual page fetch counter
|
|
||||||
|
|
||||||
if (html == null || html.isEmpty()) {
|
|
||||||
System.out.println(" ⚠️ Failed to fetch page, stopping pagination");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
System.out.println(" ✓ Fetched from website (visit " + pageVisitCount +
|
|
||||||
(maxPageVisits > 0 ? "/" + maxPageVisits : "") + ")");
|
|
||||||
|
|
||||||
// Save to cache
|
|
||||||
if (useCache) {
|
|
||||||
saveToCache(pageNumber, html);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Rate limiting - wait 200ms before next request
|
|
||||||
Thread.sleep(RATE_LIMIT_MS);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse auctions from HTML
|
|
||||||
List<Auction> pageAuctions = parseAuctionsFromHtml(html);
|
|
||||||
|
|
||||||
if (pageAuctions.isEmpty()) {
|
|
||||||
System.out.println(" ⚠️ No auctions found on page, stopping pagination");
|
|
||||||
hasMorePages = false;
|
|
||||||
} else {
|
|
||||||
System.out.println(" ✓ Found " + pageAuctions.size() + " auctions");
|
|
||||||
allAuctions.addAll(pageAuctions);
|
|
||||||
pageNumber++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
System.out.println("\n✓ Total auctions extracted: " + allAuctions.size());
|
|
||||||
return allAuctions;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Fetches a single page using Playwright
|
|
||||||
*
|
|
||||||
* @param pageNumber Page number (1-indexed)
|
|
||||||
* @return HTML content of the page
|
|
||||||
*/
|
|
||||||
private String fetchPageWithPlaywright(int pageNumber) {
|
|
||||||
String url = pageNumber == 1
|
|
||||||
? AUCTIONS_BASE_URL
|
|
||||||
: AUCTIONS_BASE_URL + "?page=" + pageNumber;
|
|
||||||
|
|
||||||
try {
|
|
||||||
Page page = browser.newPage();
|
|
||||||
|
|
||||||
// Set user agent
|
|
||||||
page.setExtraHTTPHeaders(Map.of(
|
|
||||||
"User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
|
||||||
));
|
|
||||||
|
|
||||||
// Navigate to page
|
|
||||||
page.navigate(url, new Page.NavigateOptions()
|
|
||||||
.setTimeout(30000)
|
|
||||||
.setWaitUntil(WaitUntilState.NETWORKIDLE));
|
|
||||||
|
|
||||||
// Wait for auction listings to appear
|
|
||||||
try {
|
|
||||||
page.waitForSelector("a[href^='/a/']", new Page.WaitForSelectorOptions()
|
|
||||||
.setTimeout(10000));
|
|
||||||
} catch (Exception e) {
|
|
||||||
// Continue even if selector not found
|
|
||||||
System.out.println(" ⚠️ Auction selector not found, attempting to parse anyway");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get HTML content
|
|
||||||
String html = page.content();
|
|
||||||
page.close();
|
|
||||||
|
|
||||||
return html;
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
|
||||||
System.err.println(" ⚠️ Playwright error: " + e.getMessage());
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parses auction data from HTML content
|
|
||||||
*
|
|
||||||
* @param html HTML content
|
|
||||||
* @return List of parsed auctions
|
|
||||||
*/
|
|
||||||
private List<Auction> parseAuctionsFromHtml(String html) {
|
|
||||||
List<Auction> auctions = new ArrayList<>();
|
|
||||||
|
|
||||||
// Simple regex-based parsing for auction links
|
|
||||||
// Format: <a href="/a/title-A1-12345" or "/a/title-A7-12345"
|
|
||||||
java.util.regex.Pattern linkPattern = java.util.regex.Pattern.compile(
|
|
||||||
"href=\"(/a/[^\"]+A[17]-(\\d+)[^\"]*)\"");
|
|
||||||
java.util.regex.Matcher linkMatcher = linkPattern.matcher(html);
|
|
||||||
|
|
||||||
Set<Integer> seenIds = new HashSet<>();
|
|
||||||
|
|
||||||
while (linkMatcher.find()) {
|
|
||||||
String href = linkMatcher.group(1);
|
|
||||||
int auctionId = Integer.parseInt(linkMatcher.group(2));
|
|
||||||
|
|
||||||
// Avoid duplicates
|
|
||||||
if (seenIds.contains(auctionId)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract auction type (A1 or A7)
|
|
||||||
String type = href.contains("A1-") ? "A1" : "A7";
|
|
||||||
|
|
||||||
// Try to find location and title near this link
|
|
||||||
String location = extractLocationNearLink(html, href);
|
|
||||||
String title = extractTitleFromHref(href);
|
|
||||||
|
|
||||||
Auction auction = new Auction();
|
|
||||||
auction.id = auctionId;
|
|
||||||
auction.type = type;
|
|
||||||
auction.title = title;
|
|
||||||
auction.location = location;
|
|
||||||
auction.url = "https://www.troostwijkauctions.com" + href;
|
|
||||||
|
|
||||||
auctions.add(auction);
|
|
||||||
seenIds.add(auctionId);
|
|
||||||
}
|
|
||||||
|
|
||||||
return auctions;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Extracts location text near an auction link
|
|
||||||
* Looks for ", NL" or other country codes
|
|
||||||
*/
|
|
||||||
private String extractLocationNearLink(String html, String href) {
|
|
||||||
int hrefPos = html.indexOf(href);
|
|
||||||
if (hrefPos == -1) return "Unknown";
|
|
||||||
|
|
||||||
// Look at 1000 characters before AND after the href for location info
|
|
||||||
int startPos = Math.max(hrefPos - 500, 0);
|
|
||||||
int endPos = Math.min(hrefPos + 1000, html.length());
|
|
||||||
String context = html.substring(startPos, endPos);
|
|
||||||
|
|
||||||
// Pattern 1: Classic format "City, NL"
|
|
||||||
java.util.regex.Pattern locPattern = java.util.regex.Pattern.compile(
|
|
||||||
"([A-Za-z][A-Za-z\\s\\-']+),\\s*([A-Z]{2})(?![A-Za-z])");
|
|
||||||
java.util.regex.Matcher locMatcher = locPattern.matcher(context);
|
|
||||||
|
|
||||||
if (locMatcher.find()) {
|
|
||||||
String location = locMatcher.group(1).trim() + ", " + locMatcher.group(2);
|
|
||||||
System.out.println(" Found location: " + location + " for auction " + href);
|
|
||||||
return location;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Pattern 2: HTML format like "<span>City,<!-- --> </span>NL"
|
|
||||||
// Extract city and country code separately
|
|
||||||
java.util.regex.Pattern htmlPattern = java.util.regex.Pattern.compile(
|
|
||||||
"<span[^>]*>([A-Za-z][A-Za-z\\s\\-',]+?)(?:,)?\\s*(?:<!--.*?-->)?\\s*</span>\\s*([A-Z]{2})(?![A-Za-z])");
|
|
||||||
java.util.regex.Matcher htmlMatcher = htmlPattern.matcher(context);
|
|
||||||
|
|
||||||
if (htmlMatcher.find()) {
|
|
||||||
String city = htmlMatcher.group(1).trim().replaceAll(",$", ""); // Remove trailing comma
|
|
||||||
String country = htmlMatcher.group(2);
|
|
||||||
String location = city + ", " + country;
|
|
||||||
System.out.println(" Found location (HTML): " + location + " for auction " + href);
|
|
||||||
return location;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Pattern 3: Fallback - just find country code after HTML tags
|
|
||||||
java.util.regex.Pattern countryPattern = java.util.regex.Pattern.compile(
|
|
||||||
"(?:-->|</span>|</div>)\\s*([A-Z]{2})(?![A-Za-z])");
|
|
||||||
java.util.regex.Matcher countryMatcher = countryPattern.matcher(context);
|
|
||||||
|
|
||||||
if (countryMatcher.find()) {
|
|
||||||
String country = countryMatcher.group(1);
|
|
||||||
System.out.println(" Found country code: " + country + " for auction " + href);
|
|
||||||
return "Unknown, " + country;
|
|
||||||
}
|
|
||||||
|
|
||||||
System.out.println(" ⚠️ No location found for auction " + href);
|
|
||||||
return "Unknown";
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Extracts human-readable title from URL slug
|
|
||||||
* Converts "some-auction-title-A1-12345" to "Some Auction Title"
|
|
||||||
*/
|
|
||||||
private String extractTitleFromHref(String href) {
|
|
||||||
// Extract everything between "/a/" and "-A1-" or "-A7-"
|
|
||||||
java.util.regex.Pattern titlePattern = java.util.regex.Pattern.compile(
|
|
||||||
"/a/(.+?)-A[17]-");
|
|
||||||
java.util.regex.Matcher titleMatcher = titlePattern.matcher(href);
|
|
||||||
|
|
||||||
if (titleMatcher.find()) {
|
|
||||||
String slug = titleMatcher.group(1);
|
|
||||||
// Convert kebab-case to Title Case
|
|
||||||
String[] words = slug.split("-");
|
|
||||||
StringBuilder title = new StringBuilder();
|
|
||||||
for (String word : words) {
|
|
||||||
if (!word.isEmpty()) {
|
|
||||||
title.append(Character.toUpperCase(word.charAt(0)))
|
|
||||||
.append(word.substring(1))
|
|
||||||
.append(" ");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return title.toString().trim();
|
|
||||||
}
|
|
||||||
|
|
||||||
return "Untitled Auction";
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Loads cached HTML for a page from SQLite database
|
|
||||||
* Returns null if not cached or cache has expired
|
|
||||||
*
|
|
||||||
* @param pageNumber Page number
|
|
||||||
* @return Cached HTML or null if not found/expired
|
|
||||||
*/
|
|
||||||
private String loadFromCache(int pageNumber) {
|
|
||||||
if (!useCache || cacheDb == null) return null;
|
|
||||||
|
|
||||||
String url = pageNumber == 1
|
|
||||||
? AUCTIONS_BASE_URL
|
|
||||||
: AUCTIONS_BASE_URL + "?page=" + pageNumber;
|
|
||||||
|
|
||||||
return cacheDb.get(url);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Saves HTML to SQLite cache database with expiration time
|
|
||||||
*
|
|
||||||
* @param pageNumber Page number
|
|
||||||
* @param html HTML content
|
|
||||||
*/
|
|
||||||
private void saveToCache(int pageNumber, String html) {
|
|
||||||
if (!useCache || cacheDb == null) return;
|
|
||||||
|
|
||||||
String url = pageNumber == 1
|
|
||||||
? AUCTIONS_BASE_URL
|
|
||||||
: AUCTIONS_BASE_URL + "?page=" + pageNumber;
|
|
||||||
|
|
||||||
cacheDb.put(url, html, CACHE_EXPIRATION_HOURS);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Filters auctions by location
|
|
||||||
*
|
|
||||||
* @param auctions List of auctions
|
|
||||||
* @param locationFilter Location string to match (e.g., "NL")
|
|
||||||
* @return Filtered list
|
|
||||||
*/
|
|
||||||
public static List<Auction> filterByLocation(List<Auction> auctions, String locationFilter) {
|
|
||||||
return auctions.stream()
|
|
||||||
.filter(a -> a.location.contains(locationFilter))
|
|
||||||
.toList();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Entry point for testing
|
|
||||||
*
|
|
||||||
* Arguments:
|
|
||||||
* --max-visits <number> : Maximum number of page visits (0 = unlimited, default)
|
|
||||||
* --no-cache : Disable caching
|
|
||||||
*/
|
|
||||||
public static void main(String[] args) throws Exception {
|
|
||||||
System.out.println("=== Troostwijk Auction Extractor ===\n");
|
|
||||||
|
|
||||||
// Parse command line arguments
|
|
||||||
boolean useCache = true;
|
|
||||||
int maxVisits = 0; // 0 = unlimited
|
|
||||||
|
|
||||||
for (int i = 0; i < args.length; i++) {
|
|
||||||
switch (args[i]) {
|
|
||||||
case "--max-visits":
|
|
||||||
if (i + 1 < args.length) {
|
|
||||||
maxVisits = Integer.parseInt(args[++i]);
|
|
||||||
System.out.println("Max page visits set to: " + maxVisits);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case "--no-cache":
|
|
||||||
useCache = false;
|
|
||||||
System.out.println("Caching disabled");
|
|
||||||
break;
|
|
||||||
case "--help":
|
|
||||||
System.out.println("Usage: java TroostwijkAuctionExtractor [options]");
|
|
||||||
System.out.println("Options:");
|
|
||||||
System.out.println(" --max-visits <n> : Limit actual page fetches to n (0 = unlimited)");
|
|
||||||
System.out.println(" --no-cache : Disable page caching");
|
|
||||||
System.out.println(" --help : Show this help message");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
TroostwijkAuctionExtractor extractor = new TroostwijkAuctionExtractor(useCache, maxVisits);
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Initialize browser
|
|
||||||
extractor.initialize();
|
|
||||||
|
|
||||||
// Extract all auctions
|
|
||||||
List<Auction> allAuctions = extractor.extractAllAuctions();
|
|
||||||
|
|
||||||
// Filter for Dutch auctions only
|
|
||||||
List<Auction> dutchAuctions = filterByLocation(allAuctions, "NL");
|
|
||||||
|
|
||||||
System.out.println("\n=== Results ===");
|
|
||||||
System.out.println("Total auctions found: " + allAuctions.size());
|
|
||||||
System.out.println("Dutch auctions (NL): " + dutchAuctions.size());
|
|
||||||
System.out.println("Actual page visits: " + extractor.pageVisitCount);
|
|
||||||
|
|
||||||
// Display first 10 Dutch auctions
|
|
||||||
System.out.println("\n=== Sample Dutch Auctions ===");
|
|
||||||
dutchAuctions.stream()
|
|
||||||
.limit(10)
|
|
||||||
.forEach(System.out::println);
|
|
||||||
|
|
||||||
} finally {
|
|
||||||
// Always close browser
|
|
||||||
extractor.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* SQLite-based caching system for HTML pages with expiration support
|
|
||||||
*/
|
|
||||||
static class CacheDatabase {
|
|
||||||
private final String dbPath;
|
|
||||||
private Connection connection;
|
|
||||||
|
|
||||||
public CacheDatabase(String dbPath) {
|
|
||||||
this.dbPath = dbPath;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initialize database and create schema
|
|
||||||
*/
|
|
||||||
public void initialize() throws SQLException, IOException {
|
|
||||||
// Create cache directory if it doesn't exist
|
|
||||||
Path cacheDir = Paths.get(dbPath).getParent();
|
|
||||||
if (cacheDir != null) {
|
|
||||||
Files.createDirectories(cacheDir);
|
|
||||||
}
|
|
||||||
|
|
||||||
connection = DriverManager.getConnection("jdbc:sqlite:" + dbPath);
|
|
||||||
|
|
||||||
// Create cache table with URL as primary key
|
|
||||||
String createTable = """
|
|
||||||
CREATE TABLE IF NOT EXISTS page_cache (
|
|
||||||
url TEXT PRIMARY KEY,
|
|
||||||
html TEXT NOT NULL,
|
|
||||||
cached_at INTEGER NOT NULL,
|
|
||||||
expires_at INTEGER NOT NULL
|
|
||||||
)
|
|
||||||
""";
|
|
||||||
|
|
||||||
try (Statement stmt = connection.createStatement()) {
|
|
||||||
stmt.execute(createTable);
|
|
||||||
// Create index on expires_at for efficient cleanup
|
|
||||||
stmt.execute("CREATE INDEX IF NOT EXISTS idx_expires_at ON page_cache(expires_at)");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clean up expired entries on initialization
|
|
||||||
cleanupExpired();
|
|
||||||
|
|
||||||
System.out.println("✓ Cache database initialized");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get cached HTML for a URL if it exists and hasn't expired
|
|
||||||
*
|
|
||||||
* @param url The URL to look up
|
|
||||||
* @return Cached HTML or null if not found/expired
|
|
||||||
*/
|
|
||||||
public synchronized String get(String url) {
|
|
||||||
String sql = "SELECT html FROM page_cache WHERE url = ? AND expires_at > ?";
|
|
||||||
|
|
||||||
try (PreparedStatement ps = connection.prepareStatement(sql)) {
|
|
||||||
ps.setString(1, url);
|
|
||||||
ps.setLong(2, Instant.now().getEpochSecond());
|
|
||||||
|
|
||||||
ResultSet rs = ps.executeQuery();
|
|
||||||
if (rs.next()) {
|
|
||||||
return rs.getString("html");
|
|
||||||
}
|
|
||||||
} catch (SQLException e) {
|
|
||||||
System.err.println("Cache read error: " + e.getMessage());
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Store HTML in cache with expiration time
|
|
||||||
*
|
|
||||||
* @param url The URL to cache
|
|
||||||
* @param html The HTML content
|
|
||||||
* @param expirationHours Hours until cache expires
|
|
||||||
*/
|
|
||||||
public synchronized void put(String url, String html, long expirationHours) {
|
|
||||||
String sql = """
|
|
||||||
INSERT OR REPLACE INTO page_cache (url, html, cached_at, expires_at)
|
|
||||||
VALUES (?, ?, ?, ?)
|
|
||||||
""";
|
|
||||||
|
|
||||||
long now = Instant.now().getEpochSecond();
|
|
||||||
long expiresAt = now + (expirationHours * 3600);
|
|
||||||
|
|
||||||
try (PreparedStatement ps = connection.prepareStatement(sql)) {
|
|
||||||
ps.setString(1, url);
|
|
||||||
ps.setString(2, html);
|
|
||||||
ps.setLong(3, now);
|
|
||||||
ps.setLong(4, expiresAt);
|
|
||||||
ps.executeUpdate();
|
|
||||||
} catch (SQLException e) {
|
|
||||||
System.err.println("Cache write error: " + e.getMessage());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Remove expired cache entries
|
|
||||||
*/
|
|
||||||
public synchronized void cleanupExpired() {
|
|
||||||
String sql = "DELETE FROM page_cache WHERE expires_at <= ?";
|
|
||||||
|
|
||||||
try (PreparedStatement ps = connection.prepareStatement(sql)) {
|
|
||||||
ps.setLong(1, Instant.now().getEpochSecond());
|
|
||||||
int deleted = ps.executeUpdate();
|
|
||||||
if (deleted > 0) {
|
|
||||||
System.out.println("✓ Cleaned up " + deleted + " expired cache entries");
|
|
||||||
}
|
|
||||||
} catch (SQLException e) {
|
|
||||||
System.err.println("Cache cleanup error: " + e.getMessage());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get cache statistics
|
|
||||||
*/
|
|
||||||
public synchronized void printStats() {
|
|
||||||
String sql = "SELECT COUNT(*) as total, " +
|
|
||||||
"SUM(CASE WHEN expires_at > ? THEN 1 ELSE 0 END) as valid, " +
|
|
||||||
"SUM(LENGTH(html)) as total_size " +
|
|
||||||
"FROM page_cache";
|
|
||||||
|
|
||||||
try (PreparedStatement ps = connection.prepareStatement(sql)) {
|
|
||||||
ps.setLong(1, Instant.now().getEpochSecond());
|
|
||||||
ResultSet rs = ps.executeQuery();
|
|
||||||
|
|
||||||
if (rs.next()) {
|
|
||||||
int total = rs.getInt("total");
|
|
||||||
int valid = rs.getInt("valid");
|
|
||||||
long size = rs.getLong("total_size");
|
|
||||||
|
|
||||||
System.out.println("\n=== Cache Statistics ===");
|
|
||||||
System.out.println("Total entries: " + total);
|
|
||||||
System.out.println("Valid entries: " + valid);
|
|
||||||
System.out.println("Expired entries: " + (total - valid));
|
|
||||||
System.out.println("Total size: " + (size / 1024) + " KB");
|
|
||||||
}
|
|
||||||
} catch (SQLException e) {
|
|
||||||
System.err.println("Cache stats error: " + e.getMessage());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Close database connection
|
|
||||||
*/
|
|
||||||
public void close() {
|
|
||||||
if (connection != null) {
|
|
||||||
try {
|
|
||||||
connection.close();
|
|
||||||
} catch (SQLException e) {
|
|
||||||
System.err.println("Error closing cache database: " + e.getMessage());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -41,7 +41,7 @@ public class AuctionParsingTest {
|
|||||||
System.out.println("\n=== Auction Parsing Test ===");
|
System.out.println("\n=== Auction Parsing Test ===");
|
||||||
System.out.println("Found " + auctionLinks.size() + " auction links");
|
System.out.println("Found " + auctionLinks.size() + " auction links");
|
||||||
|
|
||||||
List<TroostwijkScraper.AuctionInfo> auctions = new ArrayList<>();
|
List<AuctionInfo> auctions = new ArrayList<>();
|
||||||
int count = 0;
|
int count = 0;
|
||||||
|
|
||||||
for (Element link : auctionLinks) {
|
for (Element link : auctionLinks) {
|
||||||
@@ -59,7 +59,7 @@ public class AuctionParsingTest {
|
|||||||
int auctionId = Integer.parseInt(matcher.group(2));
|
int auctionId = Integer.parseInt(matcher.group(2));
|
||||||
|
|
||||||
// Extract auction info using IMPROVED text-based method
|
// Extract auction info using IMPROVED text-based method
|
||||||
TroostwijkScraper.AuctionInfo auction = extractAuctionInfoFromText(link, href, auctionId, "A" + typeNum);
|
AuctionInfo auction = extractAuctionInfoFromText(link, href, auctionId, "A" + typeNum);
|
||||||
auctions.add(auction);
|
auctions.add(auction);
|
||||||
|
|
||||||
// Print the first 10 auctions for verification
|
// Print the first 10 auctions for verification
|
||||||
@@ -101,7 +101,7 @@ public class AuctionParsingTest {
|
|||||||
assertTrue(auctions.size() > 0, "Should find at least one auction");
|
assertTrue(auctions.size() > 0, "Should find at least one auction");
|
||||||
|
|
||||||
// Verify all auctions have basic info
|
// Verify all auctions have basic info
|
||||||
for (TroostwijkScraper.AuctionInfo auction : auctions) {
|
for (AuctionInfo auction : auctions) {
|
||||||
assertNotNull(auction.title, "Title should not be null for auction " + auction.auctionId);
|
assertNotNull(auction.title, "Title should not be null for auction " + auction.auctionId);
|
||||||
assertTrue(auction.title.length() > 0, "Title should not be empty for auction " + auction.auctionId);
|
assertTrue(auction.title.length() > 0, "Title should not be empty for auction " + auction.auctionId);
|
||||||
assertNotNull(auction.url, "URL should not be null for auction " + auction.auctionId);
|
assertNotNull(auction.url, "URL should not be null for auction " + auction.auctionId);
|
||||||
@@ -119,8 +119,8 @@ public class AuctionParsingTest {
|
|||||||
* Expected format: "[day] om [time] [lot_count] [title] [city], [CC]"
|
* Expected format: "[day] om [time] [lot_count] [title] [city], [CC]"
|
||||||
* Example: "woensdag om 18:00 1 Vrachtwagens voor bedrijfsvoertuigen Loßburg, DE"
|
* Example: "woensdag om 18:00 1 Vrachtwagens voor bedrijfsvoertuigen Loßburg, DE"
|
||||||
*/
|
*/
|
||||||
private TroostwijkScraper.AuctionInfo extractAuctionInfoFromText(Element link, String href, int auctionId, String type) {
|
private AuctionInfo extractAuctionInfoFromText(Element link, String href, int auctionId, String type) {
|
||||||
TroostwijkScraper.AuctionInfo auction = new TroostwijkScraper.AuctionInfo();
|
AuctionInfo auction = new AuctionInfo();
|
||||||
auction.auctionId = auctionId;
|
auction.auctionId = auctionId;
|
||||||
auction.type = type;
|
auction.type = type;
|
||||||
auction.url = "https://www.troostwijkauctions.com" + href;
|
auction.url = "https://www.troostwijkauctions.com" + href;
|
||||||
|
|||||||
@@ -68,71 +68,18 @@ public class TroostwijkScraperTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testFetchAndPersistAuctionData() throws SQLException {
|
|
||||||
// First, discover auctions
|
|
||||||
List<Integer> auctions = scraper.discoverDutchAuctions();
|
|
||||||
assertFalse(auctions.isEmpty(), "Need at least one auction to test");
|
|
||||||
|
|
||||||
// Take the first auction and fetch its lots
|
|
||||||
Integer firstSaleId = auctions.getFirst();
|
|
||||||
System.out.println("Testing with sale ID: " + firstSaleId);
|
|
||||||
|
|
||||||
scraper.fetchLotsForSale(firstSaleId);
|
|
||||||
|
|
||||||
// Verify data was persisted to database
|
|
||||||
List<TroostwijkScraper.Lot> lotsInDb = scraper.db.getAllLots();
|
|
||||||
|
|
||||||
assertNotNull(lotsInDb, "Lots list should not be null");
|
|
||||||
assertFalse(lotsInDb.isEmpty(), "Should have persisted at least one lot");
|
|
||||||
|
|
||||||
// Verify lot properties
|
|
||||||
for (TroostwijkScraper.Lot lot : lotsInDb) {
|
|
||||||
assertEquals(firstSaleId.intValue(), lot.saleId, "Lot should belong to the correct sale");
|
|
||||||
assertTrue(lot.lotId > 0, "Lot ID should be positive");
|
|
||||||
assertNotNull(lot.title, "Lot title should not be null");
|
|
||||||
assertFalse(lot.title.isEmpty(), "Lot title should not be empty");
|
|
||||||
assertNotNull(lot.url, "Lot URL should not be null");
|
|
||||||
assertTrue(lot.url.startsWith("https://"), "Lot URL should be valid");
|
|
||||||
assertTrue(lot.currentBid >= 0, "Current bid should be non-negative");
|
|
||||||
}
|
|
||||||
|
|
||||||
System.out.println("✓ Successfully persisted " + lotsInDb.size() + " lots to database");
|
|
||||||
System.out.println("✓ All lot properties are valid");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDatabaseSchema() throws SQLException {
|
public void testDatabaseSchema() throws SQLException {
|
||||||
// Verify that the database schema was created correctly
|
// Verify that the database schema was created correctly
|
||||||
List<TroostwijkScraper.Lot> lots = scraper.db.getAllLots();
|
List<Lot> lots = scraper.db.getAllLots();
|
||||||
assertNotNull(lots, "Should be able to query lots table");
|
assertNotNull(lots, "Should be able to query lots table");
|
||||||
|
|
||||||
int imageCount = scraper.db.getImageCount();
|
int imageCount = scraper.db.getImageCount();
|
||||||
assertTrue(imageCount >= 0, "Image count should be non-negative");
|
assertTrue(imageCount >= 0, "Image count should be non-negative");
|
||||||
|
|
||||||
List<TroostwijkScraper.Lot> activeLots = scraper.db.getActiveLots();
|
List<Lot> activeLots = scraper.db.getActiveLots();
|
||||||
assertNotNull(activeLots, "Should be able to query active lots");
|
assertNotNull(activeLots, "Should be able to query active lots");
|
||||||
|
|
||||||
System.out.println("✓ Database schema is valid and queryable");
|
System.out.println("✓ Database schema is valid and queryable");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testAuctionProperties() {
|
|
||||||
List<Integer> auctions = scraper.discoverDutchAuctions();
|
|
||||||
assertFalse(auctions.isEmpty(), "Should find auctions");
|
|
||||||
|
|
||||||
// Test that we can fetch data for multiple auctions
|
|
||||||
int auctionsToTest = Math.min(2, auctions.size());
|
|
||||||
|
|
||||||
for (int i = 0; i < auctionsToTest; i++) {
|
|
||||||
Integer saleId = auctions.get(i);
|
|
||||||
System.out.println("Testing auction " + (i + 1) + ": " + saleId);
|
|
||||||
|
|
||||||
// This should not throw an exception
|
|
||||||
assertDoesNotThrow(() -> scraper.fetchLotsForSale(saleId),
|
|
||||||
"Should be able to fetch lots for sale " + saleId);
|
|
||||||
}
|
|
||||||
|
|
||||||
System.out.println("✓ Successfully tested " + auctionsToTest + " auctions");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,456 +1,61 @@
|
|||||||
## Woensdag 3 dec 25
|
Configure your devices to use the Pi-hole as their DNS server │
|
||||||
|
│ using: │
|
||||||
* [
|
│ │
|
||||||
|
│ IPv4: 192.168.1.159 │
|
||||||
woensdag om 16:00
|
│ IPv6: fdc5:59a6:9ac1:f11f:2c86:25d3:6282:37ef │
|
||||||
|
│ If you have not done so already, the above IP should be set to │
|
||||||

|
│ static. │
|
||||||
|
│ View the web interface at http://pi.hole:80/admin or │
|
||||||

|
│ http://192.168.1.159:80/admin │
|
||||||
|
│ │
|
||||||

|
│ Your Admin Webpage login password is gYj7Enh- │
|
||||||
|
│ │
|
||||||

|
│ │
|
||||||
|
│ To allow your user to use all CLI functions without │
|
||||||
145
|
│ authentication, │
|
||||||
|
│ refer to https://docs.pi-hole.net/main/post-install/ │
|
||||||
Industrie & machines
|
├─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
Meerdere locaties (45)
|
|
||||||
|
127.0.0.1
|
||||||
|
192.168.1.159
|
||||||
|
::1
|
||||||
|
fdc5:59a6:9ac1:f11f:2c86:25d3:6282:37ef
|
||||||
|
fdc5:59a6:9ac1:f11f:bd8c:6e87:65f0:243c
|
||||||
](/a/industrie-machines-A3-37358)
|
fe80::a05b:bbc6:d47f:3002%enp9s0
|
||||||
* [
|
2IXD-XJN9-C337-1K4Y-BBEO-HV1F-3BVI
|
||||||
|
|
||||||
woensdag om 16:00
|
https://ollama.lan:9443/#!/wizard - heel-goed-wachtwoord
|
||||||
|
|
||||||

|
[
|
||||||
|
{
|
||||||

|
"domain": "ollama.lan",
|
||||||
|
"answer": "192.168.1.159",
|
||||||

|
"enabled": true
|
||||||
|
},
|
||||||

|
{
|
||||||
|
"domain": "hephaestus.lan",
|
||||||
38
|
"answer": "192.168.1.159",
|
||||||
|
"enabled": true
|
||||||
D | Raceautotransporters, kraan-polypengrepen en containers uit voorraadaanpassing
|
},
|
||||||
|
{
|
||||||
Nieheim, DE
|
"domain": "hermes.lan",
|
||||||
|
"answer": "192.168.137.239",
|
||||||
|
"enabled": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"domain": "atlas.lan",
|
||||||
](/a/d-%7C-raceautotransporters-kraan-polypengrepen-en-containers-uit-voorraadaanpassing-A1-39772)
|
"answer": "192.168.1.100",
|
||||||
* [
|
"enabled": true
|
||||||
|
},
|
||||||
woensdag om 16:00
|
{
|
||||||
|
"domain": "hub.lan",
|
||||||

|
"answer": "192.168.1.1",
|
||||||
|
"enabled": true
|
||||||

|
},
|
||||||
|
{
|
||||||

|
"domain": "ha.lan",
|
||||||
|
"answer": "192.168.1.193",
|
||||||

|
"enabled": true
|
||||||
|
}
|
||||||
61
|
]
|
||||||
|
|
||||||
Voedselverwerkende apparatuur en verpakkingsmachines
|
|
||||||
|
|
||||||
CHOMERAC, FR
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
](/a/voedselverwerkende-apparatuur-en-verpakkingsmachines-A1-39319)
|
|
||||||
* [
|
|
||||||
|
|
||||||
woensdag om 16:00
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
117
|
|
||||||
|
|
||||||
Landbouw- & grondverzetmachines
|
|
||||||
|
|
||||||
Meerdere locaties (49)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
](/a/landbouw-grondverzetmachines-A3-37375)
|
|
||||||
* [
|
|
||||||
|
|
||||||
woensdag om 17:00
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
261
|
|
||||||
|
|
||||||
Gereedschappen & uitrusting
|
|
||||||
|
|
||||||
Meerdere locaties (36), BE
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
](/a/gereedschappen-uitrusting-A3-37367)
|
|
||||||
* [
|
|
||||||
|
|
||||||
woensdag om 18:00
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
1
|
|
||||||
|
|
||||||
Vrachtwagens voor bedrijfsvoertuigen
|
|
||||||
|
|
||||||
Loßburg, DE
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
](/a/vrachtwagens-voor-bedrijfsvoertuigen-A7-39531)
|
|
||||||
* [
|
|
||||||
|
|
||||||
woensdag om 19:00
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
61
|
|
||||||
|
|
||||||
Witgoed en accessoires
|
|
||||||
|
|
||||||
Etten-Leur, NL
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
](/a/witgoed-en-accessoires-A1-27241)
|
|
||||||
* [
|
|
||||||
|
|
||||||
Opent 28 nov 17:00
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
54
|
|
||||||
|
|
||||||
Collectie Rolex en Cartier horloges
|
|
||||||
|
|
||||||
Dordrecht, NL
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
](/a/collectie-rolex-en-cartier-horloges-A1-39398)
|
|
||||||
* [
|
|
||||||
|
|
||||||
woensdag om 19:00
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
254
|
|
||||||
|
|
||||||
SHOWROOMKEUKENS en INBOUWAPPARATUUR
|
|
||||||
|
|
||||||
Tilburg, NL
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
](/a/showroomkeukens-en-inbouwapparatuur-A1-39480)
|
|
||||||
* [
|
|
||||||
|
|
||||||
woensdag om 19:00
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
499
|
|
||||||
|
|
||||||
Machines, retourgoederen en restpartijen
|
|
||||||
|
|
||||||
Harlingen, NL
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
](/a/machines-retourgoederen-en-restpartijen-A1-39642)
|
|
||||||
* [
|
|
||||||
|
|
||||||
woensdag om 19:00
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
120
|
|
||||||
|
|
||||||
Partijen gereedschap, kantoorinventaris, detailhandelgoederen, decoratie en olijfbomen
|
|
||||||
|
|
||||||
Meerdere locaties (3), NL
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
](/a/partijen-gereedschap-kantoorinventaris-detailhandelgoederen-decoratie-en-olijfbomen-A1-27016)
|
|
||||||
* [
|
|
||||||
|
|
||||||
woensdag om 19:00
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
16
|
|
||||||
|
|
||||||
Faillissementsvoertuigen
|
|
||||||
|
|
||||||
Meerdere locaties (3), NL
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
](/a/faillissementsvoertuigen-A1-38368)
|
|
||||||
* [
|
|
||||||
|
|
||||||
woensdag om 19:00
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
78
|
|
||||||
|
|
||||||
Personenauto’s, oldtimers, campers en brommobielen
|
|
||||||
|
|
||||||
Buitenpost, NL
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
](/a/personenauto%E2%80%99s-oldtimers-campers-en-brommobielen-A1-39508)
|
|
||||||
* [
|
|
||||||
|
|
||||||
woensdag om 19:00
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
391
|
|
||||||
|
|
||||||
Bezorgveiling Faillissement Dvize B.V. – Hyundai Power Products gereedschappen
|
|
||||||
|
|
||||||
Meerdere locaties (2)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
](/a/bezorgveiling-faillissement-dvize-b-v-%E2%80%93-hyundai-power-products-gereedschappen-A1-39409)
|
|
||||||
* [
|
|
||||||
|
|
||||||
woensdag om 19:00
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
208
|
|
||||||
|
|
||||||
Kunstplanten en bomen, composiet gevel- en vloerbekleding en akoestische materialen
|
|
||||||
|
|
||||||
De Lier, NL
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
](/a/kunstplanten-en-bomen-composiet-gevel-en-vloerbekleding-en-akoestische-materialen-A1-28707)
|
|
||||||
* [
|
|
||||||
|
|
||||||
woensdag om 19:00
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
181
|
|
||||||
|
|
||||||
Metaalbewerkingsmachines, gereedschap en voorraad in verband met bedrijfsverhuizing
|
|
||||||
|
|
||||||
Cuijk, NL
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
](/a/metaalbewerkingsmachines-gereedschap-en-voorraad-in-verband-met-bedrijfsverhuizing-A1-39360)
|
|
||||||
* [
|
|
||||||
|
|
||||||
woensdag om 19:00
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
238
|
|
||||||
|
|
||||||
Overstock en magazijnopruiming
|
|
||||||
|
|
||||||
Heesch, NL
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
](/a/overstock-en-magazijnopruiming-A1-39538)
|
|
||||||
* [
|
|
||||||
|
|
||||||
woensdag om 19:00
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
47
|
|
||||||
|
|
||||||
Verzamelveiling Scooters en Motoren
|
|
||||||
|
|
||||||
Meerdere locaties (2), NL
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
](/a/verzamelveiling-scooters-en-motoren-A1-28428)
|
|
||||||
* [
|
|
||||||
|
|
||||||
woensdag om 19:00
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
338
|
|
||||||
|
|
||||||
Auto's & transport
|
|
||||||
|
|
||||||
Meerdere locaties (109)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
](/a/auto%27s-transport-A3-37349)
|
|
||||||
* [
|
|
||||||
|
|
||||||
woensdag om 19:30
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
74
|
|
||||||
|
|
||||||
Gouden juwelen en diamanten
|
|
||||||
|
|
||||||
Meerdere locaties (28)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
](/a/gouden-juwelen-en-diamanten-A1-29562)
|
|
||||||
|
|||||||
326
wiki/ARCHITECTURE-TROOSTWIJK-SCRAPER.md
Normal file
326
wiki/ARCHITECTURE-TROOSTWIJK-SCRAPER.md
Normal file
@@ -0,0 +1,326 @@
|
|||||||
|
# Troostwijk Scraper - Architecture & Data Flow
|
||||||
|
|
||||||
|
## System Overview
|
||||||
|
|
||||||
|
The scraper follows a **3-phase hierarchical crawling pattern** to extract auction and lot data from Troostwijk Auctions website.
|
||||||
|
|
||||||
|
## Architecture Diagram
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────────┐
|
||||||
|
│ TROOSTWIJK SCRAPER │
|
||||||
|
└─────────────────────────────────────────────────────────────────┘
|
||||||
|
|
||||||
|
┌─────────────────────────────────────────────────────────────────┐
|
||||||
|
│ PHASE 1: COLLECT AUCTION URLs │
|
||||||
|
│ ┌──────────────┐ ┌──────────────┐ │
|
||||||
|
│ │ Listing Page │────────▶│ Extract /a/ │ │
|
||||||
|
│ │ /auctions? │ │ auction URLs │ │
|
||||||
|
│ │ page=1..N │ └──────────────┘ │
|
||||||
|
│ └──────────────┘ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ [ List of Auction URLs ] │
|
||||||
|
└─────────────────────────────────────────────────────────────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────────────────────────────────────────────────────┐
|
||||||
|
│ PHASE 2: EXTRACT LOT URLs FROM AUCTIONS │
|
||||||
|
│ ┌──────────────┐ ┌──────────────┐ │
|
||||||
|
│ │ Auction Page │────────▶│ Parse │ │
|
||||||
|
│ │ /a/... │ │ __NEXT_DATA__│ │
|
||||||
|
│ └──────────────┘ │ JSON │ │
|
||||||
|
│ │ └──────────────┘ │
|
||||||
|
│ │ │ │
|
||||||
|
│ ▼ ▼ │
|
||||||
|
│ ┌──────────────┐ ┌──────────────┐ │
|
||||||
|
│ │ Save Auction │ │ Extract /l/ │ │
|
||||||
|
│ │ Metadata │ │ lot URLs │ │
|
||||||
|
│ │ to DB │ └──────────────┘ │
|
||||||
|
│ └──────────────┘ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ [ List of Lot URLs ] │
|
||||||
|
└─────────────────────────────────────────────────────────────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────────────────────────────────────────────────────┐
|
||||||
|
│ PHASE 3: SCRAPE LOT DETAILS │
|
||||||
|
│ ┌──────────────┐ ┌──────────────┐ │
|
||||||
|
│ │ Lot Page │────────▶│ Parse │ │
|
||||||
|
│ │ /l/... │ │ __NEXT_DATA__│ │
|
||||||
|
│ └──────────────┘ │ JSON │ │
|
||||||
|
│ └──────────────┘ │
|
||||||
|
│ │ │
|
||||||
|
│ ┌─────────────────────────┴─────────────────┐ │
|
||||||
|
│ ▼ ▼ │
|
||||||
|
│ ┌──────────────┐ ┌──────────────┐ │
|
||||||
|
│ │ Save Lot │ │ Save Images │ │
|
||||||
|
│ │ Details │ │ URLs to DB │ │
|
||||||
|
│ │ to DB │ └──────────────┘ │
|
||||||
|
│ └──────────────┘ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ [Optional Download] │
|
||||||
|
└─────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
## Database Schema
|
||||||
|
|
||||||
|
```sql
|
||||||
|
┌──────────────────────────────────────────────────────────────────┐
|
||||||
|
│ CACHE TABLE (HTML Storage with Compression) │
|
||||||
|
├──────────────────────────────────────────────────────────────────┤
|
||||||
|
│ cache │
|
||||||
|
│ ├── url (TEXT, PRIMARY KEY) │
|
||||||
|
│ ├── content (BLOB) -- Compressed HTML (zlib) │
|
||||||
|
│ ├── timestamp (REAL) │
|
||||||
|
│ ├── status_code (INTEGER) │
|
||||||
|
│ └── compressed (INTEGER) -- 1=compressed, 0=plain │
|
||||||
|
└──────────────────────────────────────────────────────────────────┘
|
||||||
|
|
||||||
|
┌──────────────────────────────────────────────────────────────────┐
|
||||||
|
│ AUCTIONS TABLE │
|
||||||
|
├──────────────────────────────────────────────────────────────────┤
|
||||||
|
│ auctions │
|
||||||
|
│ ├── auction_id (TEXT, PRIMARY KEY) -- e.g. "A7-39813" │
|
||||||
|
│ ├── url (TEXT, UNIQUE) │
|
||||||
|
│ ├── title (TEXT) │
|
||||||
|
│ ├── location (TEXT) -- e.g. "Cluj-Napoca, RO" │
|
||||||
|
│ ├── lots_count (INTEGER) │
|
||||||
|
│ ├── first_lot_closing_time (TEXT) │
|
||||||
|
│ └── scraped_at (TEXT) │
|
||||||
|
└──────────────────────────────────────────────────────────────────┘
|
||||||
|
|
||||||
|
┌──────────────────────────────────────────────────────────────────┐
|
||||||
|
│ LOTS TABLE │
|
||||||
|
├──────────────────────────────────────────────────────────────────┤
|
||||||
|
│ lots │
|
||||||
|
│ ├── lot_id (TEXT, PRIMARY KEY) -- e.g. "A1-28505-5" │
|
||||||
|
│ ├── auction_id (TEXT) -- FK to auctions │
|
||||||
|
│ ├── url (TEXT, UNIQUE) │
|
||||||
|
│ ├── title (TEXT) │
|
||||||
|
│ ├── current_bid (TEXT) -- "€123.45" or "No bids" │
|
||||||
|
│ ├── bid_count (INTEGER) │
|
||||||
|
│ ├── closing_time (TEXT) │
|
||||||
|
│ ├── viewing_time (TEXT) │
|
||||||
|
│ ├── pickup_date (TEXT) │
|
||||||
|
│ ├── location (TEXT) -- e.g. "Dongen, NL" │
|
||||||
|
│ ├── description (TEXT) │
|
||||||
|
│ ├── category (TEXT) │
|
||||||
|
│ └── scraped_at (TEXT) │
|
||||||
|
│ FOREIGN KEY (auction_id) → auctions(auction_id) │
|
||||||
|
└──────────────────────────────────────────────────────────────────┘
|
||||||
|
|
||||||
|
┌──────────────────────────────────────────────────────────────────┐
|
||||||
|
│ IMAGES TABLE (Image URLs & Download Status) │
|
||||||
|
├──────────────────────────────────────────────────────────────────┤
|
||||||
|
│ images ◀── THIS TABLE HOLDS IMAGE LINKS│
|
||||||
|
│ ├── id (INTEGER, PRIMARY KEY AUTOINCREMENT) │
|
||||||
|
│ ├── lot_id (TEXT) -- FK to lots │
|
||||||
|
│ ├── url (TEXT) -- Image URL │
|
||||||
|
│ ├── local_path (TEXT) -- Path after download │
|
||||||
|
│ └── downloaded (INTEGER) -- 0=pending, 1=downloaded │
|
||||||
|
│ FOREIGN KEY (lot_id) → lots(lot_id) │
|
||||||
|
└──────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
## Sequence Diagram
|
||||||
|
|
||||||
|
```
|
||||||
|
User Scraper Playwright Cache DB Data Tables
|
||||||
|
│ │ │ │ │
|
||||||
|
│ Run │ │ │ │
|
||||||
|
├──────────────▶│ │ │ │
|
||||||
|
│ │ │ │ │
|
||||||
|
│ │ Phase 1: Listing Pages │ │
|
||||||
|
│ ├───────────────▶│ │ │
|
||||||
|
│ │ goto() │ │ │
|
||||||
|
│ │◀───────────────┤ │ │
|
||||||
|
│ │ HTML │ │ │
|
||||||
|
│ ├───────────────────────────────▶│ │
|
||||||
|
│ │ compress & cache │ │
|
||||||
|
│ │ │ │ │
|
||||||
|
│ │ Phase 2: Auction Pages │ │
|
||||||
|
│ ├───────────────▶│ │ │
|
||||||
|
│ │◀───────────────┤ │ │
|
||||||
|
│ │ HTML │ │ │
|
||||||
|
│ │ │ │ │
|
||||||
|
│ │ Parse __NEXT_DATA__ JSON │ │
|
||||||
|
│ │────────────────────────────────────────────────▶│
|
||||||
|
│ │ │ │ INSERT auctions
|
||||||
|
│ │ │ │ │
|
||||||
|
│ │ Phase 3: Lot Pages │ │
|
||||||
|
│ ├───────────────▶│ │ │
|
||||||
|
│ │◀───────────────┤ │ │
|
||||||
|
│ │ HTML │ │ │
|
||||||
|
│ │ │ │ │
|
||||||
|
│ │ Parse __NEXT_DATA__ JSON │ │
|
||||||
|
│ │────────────────────────────────────────────────▶│
|
||||||
|
│ │ │ │ INSERT lots │
|
||||||
|
│ │────────────────────────────────────────────────▶│
|
||||||
|
│ │ │ │ INSERT images│
|
||||||
|
│ │ │ │ │
|
||||||
|
│ │ Export to CSV/JSON │ │
|
||||||
|
│ │◀────────────────────────────────────────────────┤
|
||||||
|
│ │ Query all data │ │
|
||||||
|
│◀──────────────┤ │ │ │
|
||||||
|
│ Results │ │ │ │
|
||||||
|
```
|
||||||
|
|
||||||
|
## Data Flow Details
|
||||||
|
|
||||||
|
### 1. **Page Retrieval & Caching**
|
||||||
|
```
|
||||||
|
Request URL
|
||||||
|
│
|
||||||
|
├──▶ Check cache DB (with timestamp validation)
|
||||||
|
│ │
|
||||||
|
│ ├─[HIT]──▶ Decompress (if compressed=1)
|
||||||
|
│ │ └──▶ Return HTML
|
||||||
|
│ │
|
||||||
|
│ └─[MISS]─▶ Fetch via Playwright
|
||||||
|
│ │
|
||||||
|
│ ├──▶ Compress HTML (zlib level 9)
|
||||||
|
│ │ ~70-90% size reduction
|
||||||
|
│ │
|
||||||
|
│ └──▶ Store in cache DB (compressed=1)
|
||||||
|
│
|
||||||
|
└──▶ Return HTML for parsing
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. **JSON Parsing Strategy**
|
||||||
|
```
|
||||||
|
HTML Content
|
||||||
|
│
|
||||||
|
└──▶ Extract <script id="__NEXT_DATA__">
|
||||||
|
│
|
||||||
|
├──▶ Parse JSON
|
||||||
|
│ │
|
||||||
|
│ ├─[has pageProps.lot]──▶ Individual LOT
|
||||||
|
│ │ └──▶ Extract: title, bid, location, images, etc.
|
||||||
|
│ │
|
||||||
|
│ └─[has pageProps.auction]──▶ AUCTION
|
||||||
|
│ │
|
||||||
|
│ ├─[has lots[] array]──▶ Auction with lots
|
||||||
|
│ │ └──▶ Extract: title, location, lots_count
|
||||||
|
│ │
|
||||||
|
│ └─[no lots[] array]──▶ Old format lot
|
||||||
|
│ └──▶ Parse as lot
|
||||||
|
│
|
||||||
|
└──▶ Fallback to HTML regex parsing (if JSON fails)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. **Image Handling**
|
||||||
|
```
|
||||||
|
Lot Page Parsed
|
||||||
|
│
|
||||||
|
├──▶ Extract images[] from JSON
|
||||||
|
│ │
|
||||||
|
│ └──▶ INSERT INTO images (lot_id, url, downloaded=0)
|
||||||
|
│
|
||||||
|
└──▶ [If DOWNLOAD_IMAGES=True]
|
||||||
|
│
|
||||||
|
├──▶ Download each image
|
||||||
|
│ │
|
||||||
|
│ ├──▶ Save to: /images/{lot_id}/001.jpg
|
||||||
|
│ │
|
||||||
|
│ └──▶ UPDATE images SET local_path=?, downloaded=1
|
||||||
|
│
|
||||||
|
└──▶ Rate limit between downloads (0.5s)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Key Configuration
|
||||||
|
|
||||||
|
| Setting | Value | Purpose |
|
||||||
|
|---------|-------|---------|
|
||||||
|
| `CACHE_DB` | `/mnt/okcomputer/output/cache.db` | SQLite database path |
|
||||||
|
| `IMAGES_DIR` | `/mnt/okcomputer/output/images` | Downloaded images storage |
|
||||||
|
| `RATE_LIMIT_SECONDS` | `0.5` | Delay between requests |
|
||||||
|
| `DOWNLOAD_IMAGES` | `False` | Toggle image downloading |
|
||||||
|
| `MAX_PAGES` | `50` | Number of listing pages to crawl |
|
||||||
|
|
||||||
|
## Output Files
|
||||||
|
|
||||||
|
```
|
||||||
|
/mnt/okcomputer/output/
|
||||||
|
├── cache.db # SQLite database (compressed HTML + data)
|
||||||
|
├── auctions_{timestamp}.json # Exported auctions
|
||||||
|
├── auctions_{timestamp}.csv # Exported auctions
|
||||||
|
├── lots_{timestamp}.json # Exported lots
|
||||||
|
├── lots_{timestamp}.csv # Exported lots
|
||||||
|
└── images/ # Downloaded images (if enabled)
|
||||||
|
├── A1-28505-5/
|
||||||
|
│ ├── 001.jpg
|
||||||
|
│ └── 002.jpg
|
||||||
|
└── A1-28505-6/
|
||||||
|
└── 001.jpg
|
||||||
|
```
|
||||||
|
|
||||||
|
## Extension Points for Integration
|
||||||
|
|
||||||
|
### 1. **Downstream Processing Pipeline**
|
||||||
|
```python
|
||||||
|
# Query lots without downloaded images
|
||||||
|
SELECT lot_id, url FROM images WHERE downloaded = 0
|
||||||
|
|
||||||
|
# Process images: OCR, classification, etc.
|
||||||
|
# Update status when complete
|
||||||
|
UPDATE images SET downloaded = 1, local_path = ? WHERE id = ?
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. **Real-time Monitoring**
|
||||||
|
```python
|
||||||
|
# Check for new lots every N minutes
|
||||||
|
SELECT COUNT(*) FROM lots WHERE scraped_at > datetime('now', '-1 hour')
|
||||||
|
|
||||||
|
# Monitor bid changes
|
||||||
|
SELECT lot_id, current_bid, bid_count FROM lots WHERE bid_count > 0
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. **Analytics & Reporting**
|
||||||
|
```python
|
||||||
|
# Top locations
|
||||||
|
SELECT location, COUNT(*) as lot_count FROM lots GROUP BY location
|
||||||
|
|
||||||
|
# Auction statistics
|
||||||
|
SELECT
|
||||||
|
a.auction_id,
|
||||||
|
a.title,
|
||||||
|
COUNT(l.lot_id) as actual_lots,
|
||||||
|
SUM(CASE WHEN l.bid_count > 0 THEN 1 ELSE 0 END) as lots_with_bids
|
||||||
|
FROM auctions a
|
||||||
|
LEFT JOIN lots l ON a.auction_id = l.auction_id
|
||||||
|
GROUP BY a.auction_id
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. **Image Processing Integration**
|
||||||
|
```python
|
||||||
|
# Get all images for a lot
|
||||||
|
SELECT url, local_path FROM images WHERE lot_id = 'A1-28505-5'
|
||||||
|
|
||||||
|
# Batch process unprocessed images
|
||||||
|
SELECT i.id, i.lot_id, i.local_path, l.title, l.category
|
||||||
|
FROM images i
|
||||||
|
JOIN lots l ON i.lot_id = l.lot_id
|
||||||
|
WHERE i.downloaded = 1 AND i.local_path IS NOT NULL
|
||||||
|
```
|
||||||
|
|
||||||
|
## Performance Characteristics
|
||||||
|
|
||||||
|
- **Compression**: ~70-90% HTML size reduction (1GB → ~100-300MB)
|
||||||
|
- **Rate Limiting**: Exactly 0.5s between requests (respectful scraping)
|
||||||
|
- **Caching**: 24-hour default cache validity (configurable)
|
||||||
|
- **Throughput**: ~7,200 pages/hour (with 0.5s rate limit)
|
||||||
|
- **Scalability**: SQLite handles millions of rows efficiently
|
||||||
|
|
||||||
|
## Error Handling
|
||||||
|
|
||||||
|
- **Network failures**: Cached as status_code=500, retry after cache expiry
|
||||||
|
- **Parse failures**: Falls back to HTML regex patterns
|
||||||
|
- **Compression errors**: Auto-detects and handles uncompressed legacy data
|
||||||
|
- **Missing fields**: Defaults to "No bids", empty string, or 0
|
||||||
|
|
||||||
|
## Rate Limiting & Ethics
|
||||||
|
|
||||||
|
- **REQUIRED**: 0.5 second delay between ALL requests
|
||||||
|
- **Respects cache**: Avoids unnecessary re-fetching
|
||||||
|
- **User-Agent**: Identifies as standard browser
|
||||||
|
- **No parallelization**: Single-threaded sequential crawling
|
||||||
@@ -183,7 +183,7 @@ The scraper works fine despite these warnings.
|
|||||||
|
|
||||||
## Full Documentation
|
## Full Documentation
|
||||||
|
|
||||||
See [README.md](README.md) for complete documentation including:
|
See [README.md](../README.md) for complete documentation including:
|
||||||
- Email setup details
|
- Email setup details
|
||||||
- YOLO installation guide
|
- YOLO installation guide
|
||||||
- Configuration options
|
- Configuration options
|
||||||
Reference in New Issue
Block a user