This commit is contained in:
2025-11-27 08:15:26 +01:00
parent 47854d8b39
commit afa52cb11c
12 changed files with 968 additions and 741 deletions

42
.github/workflows/deploy.yml vendored Normal file
View File

@@ -0,0 +1,42 @@
name: Build and Deploy
on:
push:
branches: ["main"]
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Java
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: '21'
- name: Build
run: mvn -B clean package
- name: Upload to JFrog
run: |
curl -u "${{ secrets.JFROG_USER }}:${{ secrets.JFROG_PASS }}" \
-T target/*.jar \
"http://JFROG-SERVER/artifactory/myrepo/app-latest.jar"
deploy:
needs: build
runs-on: ubuntu-latest
steps:
- name: Trigger remote deploy script
uses: appleboy/ssh-action@v0.1.7
with:
host: ${{ secrets.SERVER_IP }}
username: ${{ secrets.SERVER_USER }}
key: ${{ secrets.SERVER_SSH_KEY }}
script: |
/opt/myapp/update.sh

2
.gitignore vendored
View File

@@ -28,3 +28,5 @@ bin/
### Mac OS ###
.DS_Store
NUL

1
.idea/.name generated Normal file
View File

@@ -0,0 +1 @@
scrappy

65
.idea/dataSources.xml generated Normal file
View File

@@ -0,0 +1,65 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="DataSourceManagerImpl" format="xml" multifile-model="true">
<data-source source="LOCAL" name="troostwijk" uuid="9cb4e997-fbca-4426-9093-d308871c5d5e">
<driver-ref>sqlite.xerial</driver-ref>
<synchronize>true</synchronize>
<jdbc-driver>org.sqlite.JDBC</jdbc-driver>
<jdbc-url>jdbc:sqlite:$PROJECT_DIR$/troostwijk.db</jdbc-url>
<working-dir>$ProjectFileDir$</working-dir>
<libraries>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/license.txt</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/slf4j/slf4j-api/1.7.36/slf4j-api-1.7.36.jar</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/slf4j/slf4j-api/1.7.36/slf4j-api-1.7.36.jar.sha1</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/slf4j/slf4j-api/1.7.36/slf4j-api-1.7.36.pom</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/slf4j/slf4j-api/1.7.36/slf4j-api-1.7.36.pom.sha1</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/slf4j/slf4j-api/1.7.36/_remote.repositories</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/slf4j/slf4j-parent/1.7.36/slf4j-parent-1.7.36.pom</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/slf4j/slf4j-parent/1.7.36/slf4j-parent-1.7.36.pom.sha1</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/slf4j/slf4j-parent/1.7.36/_remote.repositories</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/xerial/sqlite-jdbc/3.45.1.0/sqlite-jdbc-3.45.1.0.jar</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/xerial/sqlite-jdbc/3.45.1.0/sqlite-jdbc-3.45.1.0.jar.sha1</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/xerial/sqlite-jdbc/3.45.1.0/sqlite-jdbc-3.45.1.0.pom</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/xerial/sqlite-jdbc/3.45.1.0/sqlite-jdbc-3.45.1.0.pom.sha1</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/xerial/sqlite-jdbc/3.45.1.0/_remote.repositories</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/xerial/sqlite-jdbc/3.45.1.0/sqlite-jdbc-3.45.1.0.jar</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/slf4j/slf4j-api/1.7.36/slf4j-api-1.7.36.jar</url>
</library>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.43.0/org/xerial/sqlite-jdbc/3.43.0.0/sqlite-jdbc-3.43.0.0.jar</url>
</library>
</libraries>
</data-source>
</component>
</project>

View File

@@ -3,8 +3,16 @@
<component name="MaterialThemeProjectNewConfig">
<option name="metadata">
<MTProjectMetadataState>
<option name="userId" value="-3d6055cc:19abfe94c99:-7ff7" />
<option name="migrated" value="true" />
<option name="pristineConfig" value="false" />
<option name="userId" value="50e4e899:187ededa0cf:-8000" />
<option name="version" value="8.6.3" />
</MTProjectMetadataState>
</option>
<option name="titleBarState">
<MTProjectTitleBarConfigState>
<option name="overrideColor" value="false" />
</MTProjectTitleBarConfigState>
</option>
</component>
</project>

View File

@@ -123,10 +123,9 @@ public class TroostwijkScraper {
* Discovers all active Dutch auctions by crawling the auctions page.
*
* Troostwijk lists auctions for many countries on one page. We parse
* the page with jsoup (an HTML parser that fetches and parses realworld
* HTML easily【438902460386021†L14-L24】) and filter auctions whose location
* contains ", NL" (indicating the Netherlands). Each auction link
* contains a unique sale ID which we extract from its URL.
* the page with jsoup and filter auctions whose location contains ", NL"
* (indicating the Netherlands). Each auction link contains a unique sale ID
* in the format A1-xxxxx or A7-xxxxx which we extract from the URL.
*
* @return a list of sale identifiers for auctions located in NL
*/
@@ -135,47 +134,64 @@ public class TroostwijkScraper {
try {
// Fetch the auctions overview page
Document doc = Jsoup.connect(AUCTIONS_PAGE).get();
// Select all anchor elements that represent an auction listing.
// The exact selector may change; inspect the page with your browsers
// developer tools and update accordingly.
Elements auctionLinks = doc.select("a[href][data-id]");
// Select all anchor elements that link to auction pages
// The URL pattern is: /a/auction-title-A1-xxxxx or /a/auction-title-A7-xxxxx
Elements auctionLinks = doc.select("a[href^='/a/']");
System.out.println("Found " + auctionLinks.size() + " potential auction links");
for (Element link : auctionLinks) {
Element locationElement = link.selectFirst(".auction-location");
String location = locationElement != null ? locationElement.text() : "";
if (location.contains(", NL")) {
// Extract saleID from the data-id attribute or href
String saleIdStr = link.attr("data-id");
if (saleIdStr.isEmpty()) {
// Fallback: parse from URL path, e.g. /nl/sale/27213/machines
// Get the href to extract the auction ID
String href = link.attr("href");
String[] parts = href.split("/");
for (String p : parts) {
if (p.matches("\\d+")) {
saleIdStr = p;
// Check if this link contains location text with ", NL"
String linkText = link.text();
// Look for location in any div inside the link
Elements divs = link.select("div");
boolean isDutch = false;
for (Element div : divs) {
String text = div.text();
if (text.contains(", NL")) {
isDutch = true;
break;
}
}
}
if (isDutch) {
// Extract auction ID from URL
// Format: /a/title-A1-38375 or /a/title-A7-12345
// We want the number after A1- or A7-
java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("A[17]-(\\d+)");
java.util.regex.Matcher matcher = pattern.matcher(href);
if (matcher.find()) {
try {
int saleId = Integer.parseInt(saleIdStr);
int saleId = Integer.parseInt(matcher.group(1));
if (!saleIds.contains(saleId)) {
saleIds.add(saleId);
} catch (NumberFormatException ignored) {
// not a sale ID
System.out.println(" Found Dutch auction: " + saleId + " - " + href);
}
} catch (NumberFormatException e) {
// Skip invalid IDs
}
}
}
}
} catch (IOException e) {
System.err.println("Failed to discover auctions: " + e.getMessage());
e.printStackTrace();
}
return saleIds;
}
/**
* Retrieves all lots for a given sale ID using Troostwijks internal JSON
* Retrieves all lots for a given sale ID using Troostwijk's internal JSON
* API. The API accepts parameters such as batchSize, offset, and saleID.
* A large batchSize returns many lots at once【610752406306016†L124-L134】. We loop
* until no further results are returned. Each JSON result is mapped to
* our Lot domain object and persisted to the database.
* A large batchSize returns many lots at once. We loop until no further
* results are returned. Each JSON result is mapped to our Lot domain
* object and persisted to the database.
*
* @param saleId the sale identifier
*/
@@ -183,28 +199,47 @@ public class TroostwijkScraper {
int batchSize = 200;
int offset = 0;
boolean more = true;
int totalLots = 0;
while (more) {
try {
String url = LOT_API + "?batchSize=" + batchSize
+ "&listType=7&offset=" + offset
+ "&sortOption=0&saleID=" + saleId
+ "&parentID=0&relationID=0&buildversion=201807311";
System.out.println(" Fetching lots from API (offset=" + offset + ")...");
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create(url))
.header("Accept", "application/json")
.header("User-Agent", "Mozilla/5.0")
.GET()
.build();
HttpResponse<String> response = httpClient.send(request, HttpResponse.BodyHandlers.ofString());
if (response.statusCode() != 200) {
System.err.println("API call failed for sale " + saleId + " with status " + response.statusCode());
System.err.println(" ⚠️ API call failed for sale " + saleId);
System.err.println(" Status: " + response.statusCode());
System.err.println(" Response: " + response.body().substring(0, Math.min(200, response.body().length())));
break;
}
JsonNode root = objectMapper.readTree(response.body());
JsonNode results = root.path("results");
if (!results.isArray() || results.isEmpty()) {
if (offset == 0) {
System.out.println(" ⚠️ No lots found for sale " + saleId);
System.out.println(" API Response: " + response.body().substring(0, Math.min(500, response.body().length())));
}
more = false;
break;
}
int lotsInBatch = results.size();
System.out.println(" Found " + lotsInBatch + " lots in this batch");
for (JsonNode node : results) {
Lot lot = new Lot();
lot.saleId = saleId;
@@ -219,8 +254,11 @@ public class TroostwijkScraper {
lot.currentBid = node.path("cb").asDouble();
lot.currency = node.path("cu").asText();
lot.url = "https://www.troostwijkauctions.com/nl" + node.path("url").asText();
// Save basic lot info into DB
db.upsertLot(lot);
totalLots++;
// Download images and perform object detection
List<String> imageUrls = new ArrayList<>();
JsonNode imgs = node.path("imgs");
@@ -230,6 +268,8 @@ public class TroostwijkScraper {
imageUrls.add(imgUrl);
}
}
// Download and analyze images (optional, can be slow)
for (String imgUrl : imageUrls) {
String fileName = downloadImage(imgUrl, saleId, lot.lotId);
if (fileName != null) {
@@ -239,6 +279,8 @@ public class TroostwijkScraper {
}
}
}
System.out.println(" ✓ Processed " + totalLots + " lots so far");
offset += batchSize;
} catch (IOException | InterruptedException e) {
System.err.println("Error fetching lots for sale " + saleId + ": " + e.getMessage());
@@ -387,17 +429,44 @@ public class TroostwijkScraper {
// Step 2: Fetch lots for each auction
System.out.println("\n[2/3] Fetching lot details...");
int totalAuctions = auctions.size();
int currentAuction = 0;
for (int saleId : auctions) {
System.out.println(" Processing sale " + saleId + "...");
currentAuction++;
System.out.println(" [" + currentAuction + "/" + totalAuctions + "] Processing sale " + saleId + "...");
scraper.fetchLotsForSale(saleId);
}
// Show database summary
System.out.println("\n📊 Database Summary:");
scraper.printDatabaseStats();
// Step 3: Start monitoring bids and closures
System.out.println("\n[3/3] Starting monitoring service...");
scraper.scheduleMonitoring();
System.out.println("✓ Monitoring active. Press Ctrl+C to stop.\n");
}
/**
* Prints statistics about the data in the database.
*/
private void printDatabaseStats() {
try {
List<Lot> allLots = db.getAllLots();
int imageCount = db.getImageCount();
System.out.println(" Total lots in database: " + allLots.size());
System.out.println(" Total images downloaded: " + imageCount);
if (!allLots.isEmpty()) {
double totalBids = allLots.stream().mapToDouble(l -> l.currentBid).sum();
System.out.println(" Total current bids: €" + String.format("%.2f", totalBids));
}
} catch (SQLException e) {
System.err.println(" ⚠️ Could not retrieve database stats: " + e.getMessage());
}
}
// ----------------------------------------------------------------------
// Domain classes and services
// ----------------------------------------------------------------------
@@ -408,6 +477,7 @@ public class TroostwijkScraper {
* minutesUntilClose computes how many minutes remain until the lot closes.
*/
static class Lot {
int saleId;
int lotId;
String title;
@@ -434,6 +504,7 @@ public class TroostwijkScraper {
* SQLite via a URL of the form "jdbc:sqlite:path_to_file"【329850066306528†L40-L63】.
*/
static class DatabaseService {
private final String url;
DatabaseService(String dbPath) {
this.url = "jdbc:sqlite:" + dbPath;
@@ -552,6 +623,41 @@ public class TroostwijkScraper {
return list;
}
/**
* Retrieves all lots from the database.
*/
synchronized List<Lot> getAllLots() throws SQLException {
List<Lot> list = new ArrayList<>();
String sql = "SELECT lot_id, sale_id, title, current_bid, currency FROM lots";
try (Connection conn = DriverManager.getConnection(url); Statement stmt = conn.createStatement()) {
ResultSet rs = stmt.executeQuery(sql);
while (rs.next()) {
Lot lot = new Lot();
lot.lotId = rs.getInt("lot_id");
lot.saleId = rs.getInt("sale_id");
lot.title = rs.getString("title");
lot.currentBid = rs.getDouble("current_bid");
lot.currency = rs.getString("currency");
list.add(lot);
}
}
return list;
}
/**
* Gets the total number of images in the database.
*/
synchronized int getImageCount() throws SQLException {
String sql = "SELECT COUNT(*) as count FROM images";
try (Connection conn = DriverManager.getConnection(url); Statement stmt = conn.createStatement()) {
ResultSet rs = stmt.executeQuery(sql);
if (rs.next()) {
return rs.getInt("count");
}
}
return 0;
}
/**
* Updates the current bid of a lot after a bid refresh.
*/
@@ -591,6 +697,7 @@ public class TroostwijkScraper {
* - Or use "desktop" for desktop-only notifications
*/
static class NotificationService {
private final boolean useDesktop;
private final boolean useEmail;
private final String smtpUsername;
@@ -691,6 +798,7 @@ public class TroostwijkScraper {
javax.mail.Session session = javax.mail.Session.getInstance(props,
new javax.mail.Authenticator() {
protected javax.mail.PasswordAuthentication getPasswordAuthentication() {
return new javax.mail.PasswordAuthentication(smtpUsername, smtpPassword);
}
@@ -730,6 +838,7 @@ public class TroostwijkScraper {
* and returns empty lists.
*/
static class ObjectDetectionService {
private final Net net;
private final List<String> classNames;
private final boolean enabled;