start
This commit is contained in:
@@ -53,6 +53,8 @@ public class TroostwijkAuctionExtractor {
|
||||
private final ObjectMapper objectMapper;
|
||||
private final boolean useCache;
|
||||
private final CacheDatabase cacheDb;
|
||||
private final int maxPageVisits; // Maximum number of pages to fetch (0 = unlimited)
|
||||
private int pageVisitCount; // Counter for actual page fetches (not from cache)
|
||||
private Playwright playwright;
|
||||
private Browser browser;
|
||||
|
||||
@@ -77,10 +79,13 @@ public class TroostwijkAuctionExtractor {
|
||||
* Constructor
|
||||
*
|
||||
* @param useCache Enable database caching of visited pages
|
||||
* @param maxPageVisits Maximum number of actual page fetches (0 = unlimited)
|
||||
*/
|
||||
public TroostwijkAuctionExtractor(boolean useCache) throws SQLException, IOException {
|
||||
public TroostwijkAuctionExtractor(boolean useCache, int maxPageVisits) throws SQLException, IOException {
|
||||
this.objectMapper = new ObjectMapper();
|
||||
this.useCache = useCache;
|
||||
this.maxPageVisits = maxPageVisits;
|
||||
this.pageVisitCount = 0;
|
||||
this.cacheDb = useCache ? new CacheDatabase(CACHE_DB_PATH) : null;
|
||||
|
||||
if (useCache) {
|
||||
@@ -88,6 +93,15 @@ public class TroostwijkAuctionExtractor {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor with default unlimited page visits
|
||||
*
|
||||
* @param useCache Enable database caching of visited pages
|
||||
*/
|
||||
public TroostwijkAuctionExtractor(boolean useCache) throws SQLException, IOException {
|
||||
this(useCache, 0); // 0 = unlimited
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes Playwright and browser instance
|
||||
* Call this before extracting auctions
|
||||
@@ -145,14 +159,24 @@ public class TroostwijkAuctionExtractor {
|
||||
System.out.println(" ✓ Loaded from cache");
|
||||
html = cachedHtml;
|
||||
} else {
|
||||
// Check if we've reached the maximum page visit limit
|
||||
if (maxPageVisits > 0 && pageVisitCount >= maxPageVisits) {
|
||||
System.out.println(" ⚠️ Reached maximum page visit limit (" + maxPageVisits + "), stopping");
|
||||
break;
|
||||
}
|
||||
|
||||
// Fetch with Playwright
|
||||
html = fetchPageWithPlaywright(pageNumber);
|
||||
pageVisitCount++; // Increment actual page fetch counter
|
||||
|
||||
if (html == null || html.isEmpty()) {
|
||||
System.out.println(" ⚠️ Failed to fetch page, stopping pagination");
|
||||
break;
|
||||
}
|
||||
|
||||
System.out.println(" ✓ Fetched from website (visit " + pageVisitCount +
|
||||
(maxPageVisits > 0 ? "/" + maxPageVisits : "") + ")");
|
||||
|
||||
// Save to cache
|
||||
if (useCache) {
|
||||
saveToCache(pageNumber, html);
|
||||
@@ -371,13 +395,41 @@ public class TroostwijkAuctionExtractor {
|
||||
|
||||
/**
|
||||
* Entry point for testing
|
||||
*
|
||||
* Arguments:
|
||||
* --max-visits <number> : Maximum number of page visits (0 = unlimited, default)
|
||||
* --no-cache : Disable caching
|
||||
*/
|
||||
public static void main(String[] args) throws Exception {
|
||||
System.out.println("=== Troostwijk Auction Extractor ===\n");
|
||||
|
||||
// Enable caching by default
|
||||
// Parse command line arguments
|
||||
boolean useCache = true;
|
||||
TroostwijkAuctionExtractor extractor = new TroostwijkAuctionExtractor(useCache);
|
||||
int maxVisits = 0; // 0 = unlimited
|
||||
|
||||
for (int i = 0; i < args.length; i++) {
|
||||
switch (args[i]) {
|
||||
case "--max-visits":
|
||||
if (i + 1 < args.length) {
|
||||
maxVisits = Integer.parseInt(args[++i]);
|
||||
System.out.println("Max page visits set to: " + maxVisits);
|
||||
}
|
||||
break;
|
||||
case "--no-cache":
|
||||
useCache = false;
|
||||
System.out.println("Caching disabled");
|
||||
break;
|
||||
case "--help":
|
||||
System.out.println("Usage: java TroostwijkAuctionExtractor [options]");
|
||||
System.out.println("Options:");
|
||||
System.out.println(" --max-visits <n> : Limit actual page fetches to n (0 = unlimited)");
|
||||
System.out.println(" --no-cache : Disable page caching");
|
||||
System.out.println(" --help : Show this help message");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
TroostwijkAuctionExtractor extractor = new TroostwijkAuctionExtractor(useCache, maxVisits);
|
||||
|
||||
try {
|
||||
// Initialize browser
|
||||
@@ -392,6 +444,7 @@ public class TroostwijkAuctionExtractor {
|
||||
System.out.println("\n=== Results ===");
|
||||
System.out.println("Total auctions found: " + allAuctions.size());
|
||||
System.out.println("Dutch auctions (NL): " + dutchAuctions.size());
|
||||
System.out.println("Actual page visits: " + extractor.pageVisitCount);
|
||||
|
||||
// Display first 10 Dutch auctions
|
||||
System.out.println("\n=== Sample Dutch Auctions ===");
|
||||
|
||||
Reference in New Issue
Block a user