From b560240c17f90f987c6ca1934fa7e534e4ee94c7 Mon Sep 17 00:00:00 2001 From: michael1986 Date: Fri, 28 Nov 2025 05:16:51 +0100 Subject: [PATCH] start --- .idea/compiler.xml | 5 + .idea/dataSources.xml | 24 +++ RUN_INSTRUCTIONS.md | 164 ++++++++++++++++++ pom.xml | 38 ++++ .../auction/TroostwijkAuctionExtractor.java | 59 ++++++- src/main/resources/simplelogger.properties | 20 +++ 6 files changed, 307 insertions(+), 3 deletions(-) create mode 100644 RUN_INSTRUCTIONS.md create mode 100644 src/main/resources/simplelogger.properties diff --git a/.idea/compiler.xml b/.idea/compiler.xml index f202af8..b57acd2 100644 --- a/.idea/compiler.xml +++ b/.idea/compiler.xml @@ -10,4 +10,9 @@ + + + \ No newline at end of file diff --git a/.idea/dataSources.xml b/.idea/dataSources.xml index 53c09db..4cc6ef4 100644 --- a/.idea/dataSources.xml +++ b/.idea/dataSources.xml @@ -61,5 +61,29 @@ + + sqlite.xerial + true + org.sqlite.JDBC + jdbc:sqlite:$PROJECT_DIR$/cache/page_cache.db + $ProjectFileDir$ + + + file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/xerial/sqlite-jdbc/3.45.1.0/sqlite-jdbc-3.45.1.0.jar + + + file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/slf4j/slf4j-api/1.7.36/slf4j-api-1.7.36.jar + + + file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/xerial/sqlite-jdbc/3.45.1.0/sqlite-jdbc-3.45.1.0.jar + + + file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.45.1/org/slf4j/slf4j-api/1.7.36/slf4j-api-1.7.36.jar + + + file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.43.0/org/xerial/sqlite-jdbc/3.43.0.0/sqlite-jdbc-3.43.0.0.jar + + + \ No newline at end of file diff --git a/RUN_INSTRUCTIONS.md b/RUN_INSTRUCTIONS.md new file mode 100644 index 0000000..3c90def --- /dev/null +++ b/RUN_INSTRUCTIONS.md @@ -0,0 +1,164 @@ +# Troostwijk Auction Extractor - Run Instructions + +## Fixed Warnings + +All warnings have been resolved: +- ✅ SLF4J logging configured (slf4j-simple) +- ✅ Native access enabled for SQLite JDBC +- ✅ Logging output controlled via simplelogger.properties + +## Prerequisites + +1. **Java 21** installed +2. **Maven** installed +3. **IntelliJ IDEA** (recommended) or command line + +## Setup (First Time Only) + +### 1. Install Dependencies + +In IntelliJ Terminal or PowerShell: + +```bash +# Reload Maven dependencies +mvn clean install + +# Install Playwright browser binaries (first time only) +mvn exec:java -e -Dexec.mainClass=com.microsoft.playwright.CLI -Dexec.args="install" +``` + +## Running the Application + +### Option A: Using IntelliJ IDEA (Easiest) + +1. **Add VM Options for native access:** + - Run → Edit Configurations + - Select or create configuration for `TroostwijkAuctionExtractor` + - In "VM options" field, add: + ``` + --enable-native-access=ALL-UNNAMED + ``` + +2. **Add Program Arguments (optional):** + - In "Program arguments" field, add: + ``` + --max-visits 3 + ``` + +3. **Run the application:** + - Click the green Run button + +### Option B: Using Maven (Command Line) + +```bash +# Run with 3 page limit +mvn exec:java + +# Run with custom arguments (override pom.xml defaults) +mvn exec:java -Dexec.args="--max-visits 5" + +# Run without cache +mvn exec:java -Dexec.args="--no-cache --max-visits 2" + +# Run with unlimited visits +mvn exec:java -Dexec.args="" +``` + +### Option C: Using Java Directly + +```bash +# Compile first +mvn clean compile + +# Run with native access enabled +java --enable-native-access=ALL-UNNAMED \ + -cp target/classes:$(mvn dependency:build-classpath -Dmdep.outputFile=/dev/stdout -q) \ + com.auction.TroostwijkAuctionExtractor --max-visits 3 +``` + +## Command Line Arguments + +``` +--max-visits Limit actual page fetches to n (0 = unlimited, default) +--no-cache Disable page caching +--help Show help message +``` + +## Examples + +### Test with 3 page visits (cached pages don't count): +```bash +mvn exec:java -Dexec.args="--max-visits 3" +``` + +### Fresh extraction without cache: +```bash +mvn exec:java -Dexec.args="--no-cache --max-visits 5" +``` + +### Full extraction (all pages, unlimited): +```bash +mvn exec:java -Dexec.args="" +``` + +## Expected Output (No Warnings) + +``` +=== Troostwijk Auction Extractor === +Max page visits set to: 3 + +Initializing Playwright browser... +✓ Browser ready +✓ Cache database initialized + +Starting auction extraction from https://www.troostwijkauctions.com/auctions + +[Page 1] Fetching auctions... + ✓ Fetched from website (visit 1/3) + ✓ Found 20 auctions + +[Page 2] Fetching auctions... + ✓ Loaded from cache + ✓ Found 20 auctions + +[Page 3] Fetching auctions... + ✓ Fetched from website (visit 2/3) + ✓ Found 20 auctions + +✓ Total auctions extracted: 60 + +=== Results === +Total auctions found: 60 +Dutch auctions (NL): 45 +Actual page visits: 2 + +✓ Browser and cache closed +``` + +## Cache Management + +- Cache is stored in: `cache/page_cache.db` +- Cache expires after: 24 hours (configurable in code) +- To clear cache: Delete `cache/page_cache.db` file + +## Troubleshooting + +### If you still see warnings: + +1. **Reload Maven project in IntelliJ:** + - Right-click `pom.xml` → Maven → Reload project + +2. **Verify VM options:** + - Ensure `--enable-native-access=ALL-UNNAMED` is in VM options + +3. **Clean and rebuild:** + ```bash + mvn clean install + ``` + +### If Playwright fails: + +```bash +# Reinstall browser binaries +mvn exec:java -e -Dexec.mainClass=com.microsoft.playwright.CLI -Dexec.args="install chromium" +``` diff --git a/pom.xml b/pom.xml index 51e2be6..765dcd5 100644 --- a/pom.xml +++ b/pom.xml @@ -60,6 +60,18 @@ playwright 1.40.0 + + + + org.slf4j + slf4j-api + 2.0.9 + + + org.slf4j + slf4j-simple + 2.0.9 + @@ -72,6 +84,32 @@ 21 21 + + --enable-native-access=ALL-UNNAMED + + + + + + + org.codehaus.mojo + exec-maven-plugin + 3.1.0 + + com.auction.TroostwijkAuctionExtractor + + --max-visits + 3 + + + + java.util.logging.SimpleFormatter.format + %1$tF %1$tT %4$s %2$s %5$s%6$s%n + + + + --enable-native-access=ALL-UNNAMED +