This commit is contained in:
2025-11-28 05:54:30 +01:00
parent f5ee240283
commit 0f5800441a
4 changed files with 268 additions and 65 deletions

View File

@@ -303,19 +303,24 @@ public class TroostwijkAuctionExtractor {
int hrefPos = html.indexOf(href);
if (hrefPos == -1) return "Unknown";
// Look at 500 characters after the href for location info
int endPos = Math.min(hrefPos + 500, html.length());
String context = html.substring(hrefPos, endPos);
// Look at 1000 characters before AND after the href for location info
int startPos = Math.max(hrefPos - 500, 0);
int endPos = Math.min(hrefPos + 1000, html.length());
String context = html.substring(startPos, endPos);
// Try to find location pattern like "City, NL" or "City, Country"
// More flexible pattern to catch various location formats
java.util.regex.Pattern locPattern = java.util.regex.Pattern.compile(
"([A-Za-z\\s]+),\\s*([A-Z]{2})");
"([A-Za-z][A-Za-z\\s\\-']+),\\s*([A-Z]{2})(?![A-Za-z])");
java.util.regex.Matcher locMatcher = locPattern.matcher(context);
if (locMatcher.find()) {
return locMatcher.group(1).trim() + ", " + locMatcher.group(2);
String location = locMatcher.group(1).trim() + ", " + locMatcher.group(2);
System.out.println(" Found location: " + location + " for auction " + href);
return location;
}
System.out.println(" ⚠️ No location found for auction " + href);
return "Unknown";
}