start
This commit is contained in:
@@ -303,19 +303,24 @@ public class TroostwijkAuctionExtractor {
|
||||
int hrefPos = html.indexOf(href);
|
||||
if (hrefPos == -1) return "Unknown";
|
||||
|
||||
// Look at 500 characters after the href for location info
|
||||
int endPos = Math.min(hrefPos + 500, html.length());
|
||||
String context = html.substring(hrefPos, endPos);
|
||||
// Look at 1000 characters before AND after the href for location info
|
||||
int startPos = Math.max(hrefPos - 500, 0);
|
||||
int endPos = Math.min(hrefPos + 1000, html.length());
|
||||
String context = html.substring(startPos, endPos);
|
||||
|
||||
// Try to find location pattern like "City, NL" or "City, Country"
|
||||
// More flexible pattern to catch various location formats
|
||||
java.util.regex.Pattern locPattern = java.util.regex.Pattern.compile(
|
||||
"([A-Za-z\\s]+),\\s*([A-Z]{2})");
|
||||
"([A-Za-z][A-Za-z\\s\\-']+),\\s*([A-Z]{2})(?![A-Za-z])");
|
||||
java.util.regex.Matcher locMatcher = locPattern.matcher(context);
|
||||
|
||||
if (locMatcher.find()) {
|
||||
return locMatcher.group(1).trim() + ", " + locMatcher.group(2);
|
||||
String location = locMatcher.group(1).trim() + ", " + locMatcher.group(2);
|
||||
System.out.println(" Found location: " + location + " for auction " + href);
|
||||
return location;
|
||||
}
|
||||
|
||||
System.out.println(" ⚠️ No location found for auction " + href);
|
||||
return "Unknown";
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user