This commit is contained in:
2025-11-27 08:15:26 +01:00
parent 47854d8b39
commit afa52cb11c
12 changed files with 968 additions and 741 deletions

42
.github/workflows/deploy.yml vendored Normal file
View File

@@ -0,0 +1,42 @@
name: Build and Deploy
on:
push:
branches: ["main"]
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Java
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: '21'
- name: Build
run: mvn -B clean package
- name: Upload to JFrog
run: |
curl -u "${{ secrets.JFROG_USER }}:${{ secrets.JFROG_PASS }}" \
-T target/*.jar \
"http://JFROG-SERVER/artifactory/myrepo/app-latest.jar"
deploy:
needs: build
runs-on: ubuntu-latest
steps:
- name: Trigger remote deploy script
uses: appleboy/ssh-action@v0.1.7
with:
host: ${{ secrets.SERVER_IP }}
username: ${{ secrets.SERVER_USER }}
key: ${{ secrets.SERVER_SSH_KEY }}
script: |
/opt/myapp/update.sh

2
.gitignore vendored
View File

@@ -28,3 +28,5 @@ bin/
### Mac OS ### ### Mac OS ###
.DS_Store .DS_Store
NUL

1
.idea/.name generated Normal file
View File

@@ -0,0 +1 @@
scrappy

65
.idea/dataSources.xml generated Normal file
View File

@@ -0,0 +1,65 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="DataSourceManagerImpl" format="xml" multifile-model="true">
<data-source source="LOCAL" name="troostwijk" uuid="9cb4e997-fbca-4426-9093-d308871c5d5e">
<driver-ref>sqlite.xerial</driver-ref>
<synchronize>true</synchronize>
<jdbc-driver>org.sqlite.JDBC</jdbc-driver>
<jdbc-url>jdbc:sqlite:$PROJECT_DIR$/troostwijk.db</jdbc-url>
<working-dir>$ProjectFileDir$</working-dir>
<libraries>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/license.txt</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/slf4j/slf4j-api/1.7.36/slf4j-api-1.7.36.jar</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/slf4j/slf4j-api/1.7.36/slf4j-api-1.7.36.jar.sha1</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/slf4j/slf4j-api/1.7.36/slf4j-api-1.7.36.pom</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/slf4j/slf4j-api/1.7.36/slf4j-api-1.7.36.pom.sha1</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/slf4j/slf4j-api/1.7.36/_remote.repositories</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/slf4j/slf4j-parent/1.7.36/slf4j-parent-1.7.36.pom</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/slf4j/slf4j-parent/1.7.36/slf4j-parent-1.7.36.pom.sha1</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/slf4j/slf4j-parent/1.7.36/_remote.repositories</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/xerial/sqlite-jdbc/3.45.1.0/sqlite-jdbc-3.45.1.0.jar</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/xerial/sqlite-jdbc/3.45.1.0/sqlite-jdbc-3.45.1.0.jar.sha1</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/xerial/sqlite-jdbc/3.45.1.0/sqlite-jdbc-3.45.1.0.pom</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/xerial/sqlite-jdbc/3.45.1.0/sqlite-jdbc-3.45.1.0.pom.sha1</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/xerial/sqlite-jdbc/3.45.1.0/_remote.repositories</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/xerial/sqlite-jdbc/3.45.1.0/sqlite-jdbc-3.45.1.0.jar</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/slf4j/slf4j-api/1.7.36/slf4j-api-1.7.36.jar</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.43.0/org/xerial/sqlite-jdbc/3.43.0.0/sqlite-jdbc-3.43.0.0.jar</url>
</library>
</libraries>
</data-source>
</component>
</project>

View File

@@ -3,8 +3,16 @@
<component name="MaterialThemeProjectNewConfig"> <component name="MaterialThemeProjectNewConfig">
<option name="metadata"> <option name="metadata">
<MTProjectMetadataState> <MTProjectMetadataState>
<option name="userId" value="-3d6055cc:19abfe94c99:-7ff7" /> <option name="migrated" value="true" />
<option name="pristineConfig" value="false" />
<option name="userId" value="50e4e899:187ededa0cf:-8000" />
<option name="version" value="8.6.3" />
</MTProjectMetadataState> </MTProjectMetadataState>
</option> </option>
<option name="titleBarState">
<MTProjectTitleBarConfigState>
<option name="overrideColor" value="false" />
</MTProjectTitleBarConfigState>
</option>
</component> </component>
</project> </project>

View File

@@ -123,10 +123,9 @@ public class TroostwijkScraper {
* Discovers all active Dutch auctions by crawling the auctions page. * Discovers all active Dutch auctions by crawling the auctions page.
* *
* Troostwijk lists auctions for many countries on one page. We parse * Troostwijk lists auctions for many countries on one page. We parse
* the page with jsoup (an HTML parser that fetches and parses realworld * the page with jsoup and filter auctions whose location contains ", NL"
* HTML easily【438902460386021†L14-L24】) and filter auctions whose location * (indicating the Netherlands). Each auction link contains a unique sale ID
* contains ", NL" (indicating the Netherlands). Each auction link * in the format A1-xxxxx or A7-xxxxx which we extract from the URL.
* contains a unique sale ID which we extract from its URL.
* *
* @return a list of sale identifiers for auctions located in NL * @return a list of sale identifiers for auctions located in NL
*/ */
@@ -135,47 +134,64 @@ public class TroostwijkScraper {
try { try {
// Fetch the auctions overview page // Fetch the auctions overview page
Document doc = Jsoup.connect(AUCTIONS_PAGE).get(); Document doc = Jsoup.connect(AUCTIONS_PAGE).get();
// Select all anchor elements that represent an auction listing.
// The exact selector may change; inspect the page with your browsers // Select all anchor elements that link to auction pages
// developer tools and update accordingly. // The URL pattern is: /a/auction-title-A1-xxxxx or /a/auction-title-A7-xxxxx
Elements auctionLinks = doc.select("a[href][data-id]"); Elements auctionLinks = doc.select("a[href^='/a/']");
System.out.println("Found " + auctionLinks.size() + " potential auction links");
for (Element link : auctionLinks) { for (Element link : auctionLinks) {
Element locationElement = link.selectFirst(".auction-location"); // Get the href to extract the auction ID
String location = locationElement != null ? locationElement.text() : "";
if (location.contains(", NL")) {
// Extract saleID from the data-id attribute or href
String saleIdStr = link.attr("data-id");
if (saleIdStr.isEmpty()) {
// Fallback: parse from URL path, e.g. /nl/sale/27213/machines
String href = link.attr("href"); String href = link.attr("href");
String[] parts = href.split("/");
for (String p : parts) { // Check if this link contains location text with ", NL"
if (p.matches("\\d+")) { String linkText = link.text();
saleIdStr = p;
// Look for location in any div inside the link
Elements divs = link.select("div");
boolean isDutch = false;
for (Element div : divs) {
String text = div.text();
if (text.contains(", NL")) {
isDutch = true;
break; break;
} }
} }
}
if (isDutch) {
// Extract auction ID from URL
// Format: /a/title-A1-38375 or /a/title-A7-12345
// We want the number after A1- or A7-
java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("A[17]-(\\d+)");
java.util.regex.Matcher matcher = pattern.matcher(href);
if (matcher.find()) {
try { try {
int saleId = Integer.parseInt(saleIdStr); int saleId = Integer.parseInt(matcher.group(1));
if (!saleIds.contains(saleId)) {
saleIds.add(saleId); saleIds.add(saleId);
} catch (NumberFormatException ignored) { System.out.println(" Found Dutch auction: " + saleId + " - " + href);
// not a sale ID }
} catch (NumberFormatException e) {
// Skip invalid IDs
}
} }
} }
} }
} catch (IOException e) { } catch (IOException e) {
System.err.println("Failed to discover auctions: " + e.getMessage()); System.err.println("Failed to discover auctions: " + e.getMessage());
e.printStackTrace();
} }
return saleIds; return saleIds;
} }
/** /**
* Retrieves all lots for a given sale ID using Troostwijks internal JSON * Retrieves all lots for a given sale ID using Troostwijk's internal JSON
* API. The API accepts parameters such as batchSize, offset, and saleID. * API. The API accepts parameters such as batchSize, offset, and saleID.
* A large batchSize returns many lots at once【610752406306016†L124-L134】. We loop * A large batchSize returns many lots at once. We loop until no further
* until no further results are returned. Each JSON result is mapped to * results are returned. Each JSON result is mapped to our Lot domain
* our Lot domain object and persisted to the database. * object and persisted to the database.
* *
* @param saleId the sale identifier * @param saleId the sale identifier
*/ */
@@ -183,28 +199,47 @@ public class TroostwijkScraper {
int batchSize = 200; int batchSize = 200;
int offset = 0; int offset = 0;
boolean more = true; boolean more = true;
int totalLots = 0;
while (more) { while (more) {
try { try {
String url = LOT_API + "?batchSize=" + batchSize String url = LOT_API + "?batchSize=" + batchSize
+ "&listType=7&offset=" + offset + "&listType=7&offset=" + offset
+ "&sortOption=0&saleID=" + saleId + "&sortOption=0&saleID=" + saleId
+ "&parentID=0&relationID=0&buildversion=201807311"; + "&parentID=0&relationID=0&buildversion=201807311";
System.out.println(" Fetching lots from API (offset=" + offset + ")...");
HttpRequest request = HttpRequest.newBuilder() HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create(url)) .uri(URI.create(url))
.header("Accept", "application/json") .header("Accept", "application/json")
.header("User-Agent", "Mozilla/5.0")
.GET() .GET()
.build(); .build();
HttpResponse<String> response = httpClient.send(request, HttpResponse.BodyHandlers.ofString()); HttpResponse<String> response = httpClient.send(request, HttpResponse.BodyHandlers.ofString());
if (response.statusCode() != 200) { if (response.statusCode() != 200) {
System.err.println("API call failed for sale " + saleId + " with status " + response.statusCode()); System.err.println(" ⚠️ API call failed for sale " + saleId);
System.err.println(" Status: " + response.statusCode());
System.err.println(" Response: " + response.body().substring(0, Math.min(200, response.body().length())));
break; break;
} }
JsonNode root = objectMapper.readTree(response.body()); JsonNode root = objectMapper.readTree(response.body());
JsonNode results = root.path("results"); JsonNode results = root.path("results");
if (!results.isArray() || results.isEmpty()) { if (!results.isArray() || results.isEmpty()) {
if (offset == 0) {
System.out.println(" ⚠️ No lots found for sale " + saleId);
System.out.println(" API Response: " + response.body().substring(0, Math.min(500, response.body().length())));
}
more = false; more = false;
break; break;
} }
int lotsInBatch = results.size();
System.out.println(" Found " + lotsInBatch + " lots in this batch");
for (JsonNode node : results) { for (JsonNode node : results) {
Lot lot = new Lot(); Lot lot = new Lot();
lot.saleId = saleId; lot.saleId = saleId;
@@ -219,8 +254,11 @@ public class TroostwijkScraper {
lot.currentBid = node.path("cb").asDouble(); lot.currentBid = node.path("cb").asDouble();
lot.currency = node.path("cu").asText(); lot.currency = node.path("cu").asText();
lot.url = "https://www.troostwijkauctions.com/nl" + node.path("url").asText(); lot.url = "https://www.troostwijkauctions.com/nl" + node.path("url").asText();
// Save basic lot info into DB // Save basic lot info into DB
db.upsertLot(lot); db.upsertLot(lot);
totalLots++;
// Download images and perform object detection // Download images and perform object detection
List<String> imageUrls = new ArrayList<>(); List<String> imageUrls = new ArrayList<>();
JsonNode imgs = node.path("imgs"); JsonNode imgs = node.path("imgs");
@@ -230,6 +268,8 @@ public class TroostwijkScraper {
imageUrls.add(imgUrl); imageUrls.add(imgUrl);
} }
} }
// Download and analyze images (optional, can be slow)
for (String imgUrl : imageUrls) { for (String imgUrl : imageUrls) {
String fileName = downloadImage(imgUrl, saleId, lot.lotId); String fileName = downloadImage(imgUrl, saleId, lot.lotId);
if (fileName != null) { if (fileName != null) {
@@ -239,6 +279,8 @@ public class TroostwijkScraper {
} }
} }
} }
System.out.println(" ✓ Processed " + totalLots + " lots so far");
offset += batchSize; offset += batchSize;
} catch (IOException | InterruptedException e) { } catch (IOException | InterruptedException e) {
System.err.println("Error fetching lots for sale " + saleId + ": " + e.getMessage()); System.err.println("Error fetching lots for sale " + saleId + ": " + e.getMessage());
@@ -387,17 +429,44 @@ public class TroostwijkScraper {
// Step 2: Fetch lots for each auction // Step 2: Fetch lots for each auction
System.out.println("\n[2/3] Fetching lot details..."); System.out.println("\n[2/3] Fetching lot details...");
int totalAuctions = auctions.size();
int currentAuction = 0;
for (int saleId : auctions) { for (int saleId : auctions) {
System.out.println(" Processing sale " + saleId + "..."); currentAuction++;
System.out.println(" [" + currentAuction + "/" + totalAuctions + "] Processing sale " + saleId + "...");
scraper.fetchLotsForSale(saleId); scraper.fetchLotsForSale(saleId);
} }
// Show database summary
System.out.println("\n📊 Database Summary:");
scraper.printDatabaseStats();
// Step 3: Start monitoring bids and closures // Step 3: Start monitoring bids and closures
System.out.println("\n[3/3] Starting monitoring service..."); System.out.println("\n[3/3] Starting monitoring service...");
scraper.scheduleMonitoring(); scraper.scheduleMonitoring();
System.out.println("✓ Monitoring active. Press Ctrl+C to stop.\n"); System.out.println("✓ Monitoring active. Press Ctrl+C to stop.\n");
} }
/**
* Prints statistics about the data in the database.
*/
private void printDatabaseStats() {
try {
List<Lot> allLots = db.getAllLots();
int imageCount = db.getImageCount();
System.out.println(" Total lots in database: " + allLots.size());
System.out.println(" Total images downloaded: " + imageCount);
if (!allLots.isEmpty()) {
double totalBids = allLots.stream().mapToDouble(l -> l.currentBid).sum();
System.out.println(" Total current bids: €" + String.format("%.2f", totalBids));
}
} catch (SQLException e) {
System.err.println(" ⚠️ Could not retrieve database stats: " + e.getMessage());
}
}
// ---------------------------------------------------------------------- // ----------------------------------------------------------------------
// Domain classes and services // Domain classes and services
// ---------------------------------------------------------------------- // ----------------------------------------------------------------------
@@ -408,6 +477,7 @@ public class TroostwijkScraper {
* minutesUntilClose computes how many minutes remain until the lot closes. * minutesUntilClose computes how many minutes remain until the lot closes.
*/ */
static class Lot { static class Lot {
int saleId; int saleId;
int lotId; int lotId;
String title; String title;
@@ -434,6 +504,7 @@ public class TroostwijkScraper {
* SQLite via a URL of the form "jdbc:sqlite:path_to_file"【329850066306528†L40-L63】. * SQLite via a URL of the form "jdbc:sqlite:path_to_file"【329850066306528†L40-L63】.
*/ */
static class DatabaseService { static class DatabaseService {
private final String url; private final String url;
DatabaseService(String dbPath) { DatabaseService(String dbPath) {
this.url = "jdbc:sqlite:" + dbPath; this.url = "jdbc:sqlite:" + dbPath;
@@ -552,6 +623,41 @@ public class TroostwijkScraper {
return list; return list;
} }
/**
* Retrieves all lots from the database.
*/
synchronized List<Lot> getAllLots() throws SQLException {
List<Lot> list = new ArrayList<>();
String sql = "SELECT lot_id, sale_id, title, current_bid, currency FROM lots";
try (Connection conn = DriverManager.getConnection(url); Statement stmt = conn.createStatement()) {
ResultSet rs = stmt.executeQuery(sql);
while (rs.next()) {
Lot lot = new Lot();
lot.lotId = rs.getInt("lot_id");
lot.saleId = rs.getInt("sale_id");
lot.title = rs.getString("title");
lot.currentBid = rs.getDouble("current_bid");
lot.currency = rs.getString("currency");
list.add(lot);
}
}
return list;
}
/**
* Gets the total number of images in the database.
*/
synchronized int getImageCount() throws SQLException {
String sql = "SELECT COUNT(*) as count FROM images";
try (Connection conn = DriverManager.getConnection(url); Statement stmt = conn.createStatement()) {
ResultSet rs = stmt.executeQuery(sql);
if (rs.next()) {
return rs.getInt("count");
}
}
return 0;
}
/** /**
* Updates the current bid of a lot after a bid refresh. * Updates the current bid of a lot after a bid refresh.
*/ */
@@ -591,6 +697,7 @@ public class TroostwijkScraper {
* - Or use "desktop" for desktop-only notifications * - Or use "desktop" for desktop-only notifications
*/ */
static class NotificationService { static class NotificationService {
private final boolean useDesktop; private final boolean useDesktop;
private final boolean useEmail; private final boolean useEmail;
private final String smtpUsername; private final String smtpUsername;
@@ -691,6 +798,7 @@ public class TroostwijkScraper {
javax.mail.Session session = javax.mail.Session.getInstance(props, javax.mail.Session session = javax.mail.Session.getInstance(props,
new javax.mail.Authenticator() { new javax.mail.Authenticator() {
protected javax.mail.PasswordAuthentication getPasswordAuthentication() { protected javax.mail.PasswordAuthentication getPasswordAuthentication() {
return new javax.mail.PasswordAuthentication(smtpUsername, smtpPassword); return new javax.mail.PasswordAuthentication(smtpUsername, smtpPassword);
} }
@@ -730,6 +838,7 @@ public class TroostwijkScraper {
* and returns empty lists. * and returns empty lists.
*/ */
static class ObjectDetectionService { static class ObjectDetectionService {
private final Net net; private final Net net;
private final List<String> classNames; private final List<String> classNames;
private final boolean enabled; private final boolean enabled;