114 lines
4.2 KiB
Java
114 lines
4.2 KiB
Java
package com.auction;
|
|
|
|
import org.jsoup.Jsoup;
|
|
import org.jsoup.nodes.Document;
|
|
import org.jsoup.nodes.Element;
|
|
import org.jsoup.select.Elements;
|
|
import org.junit.jupiter.api.BeforeAll;
|
|
import org.junit.jupiter.api.Test;
|
|
|
|
import java.io.IOException;
|
|
import java.nio.file.Files;
|
|
import java.nio.file.Paths;
|
|
import java.util.ArrayList;
|
|
import java.util.List;
|
|
|
|
import static org.junit.jupiter.api.Assertions.*;
|
|
|
|
/**
|
|
* Test auction parsing logic using saved HTML from test.html
|
|
* Tests the markup data extraction for each auction found
|
|
*/
|
|
public class AuctionParsingTest {
|
|
|
|
private static String testHtml;
|
|
|
|
@BeforeAll
|
|
public static void loadTestHtml() throws IOException {
|
|
// Load the test HTML file
|
|
testHtml = Files.readString(Paths.get("src/test/resources/test_auctions.html"));
|
|
System.out.println("Loaded test HTML (" + testHtml.length() + " characters)");
|
|
}
|
|
|
|
@Test
|
|
public void testLocationPatternMatching() {
|
|
System.out.println("\n=== Location Pattern Tests ===");
|
|
|
|
// Test different location formats
|
|
String[] testCases = {
|
|
"<p>Amsterdam, NL</p>",
|
|
"<p class=\"flex truncate\"><span class=\"w-full truncate\">Sofia,<!-- --> </span>BG</p>",
|
|
"<p>Berlin, DE</p>",
|
|
"<span>Brussels,</span>BE"
|
|
};
|
|
|
|
for (String testHtml : testCases) {
|
|
Document doc = Jsoup.parse(testHtml);
|
|
Element elem = doc.select("p, span").first();
|
|
|
|
if (elem != null) {
|
|
String text = elem.text();
|
|
System.out.println("\nTest: " + testHtml);
|
|
System.out.println("Text: " + text);
|
|
|
|
// Test regex pattern
|
|
if (text.matches(".*[A-Z]{2}$")) {
|
|
String countryCode = text.substring(text.length() - 2);
|
|
String cityPart = text.substring(0, text.length() - 2).trim().replaceAll("[,\\s]+$", "");
|
|
System.out.println("→ Extracted: " + cityPart + ", " + countryCode);
|
|
} else {
|
|
System.out.println("→ No match");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
@Test
|
|
public void testFullTextPatternMatching() {
|
|
System.out.println("\n=== Full Text Pattern Tests ===");
|
|
|
|
// Test the complete auction text format
|
|
String[] testCases = {
|
|
"woensdag om 18:00 1 Vrachtwagens voor bedrijfsvoertuigen Loßburg, DE",
|
|
"maandag om 14:30 5 Industriële machines Amsterdam, NL",
|
|
"vrijdag om 10:00 12 Landbouwmachines Antwerpen, BE"
|
|
};
|
|
|
|
for (String testText : testCases) {
|
|
System.out.println("\nParsing: \"" + testText + "\"");
|
|
|
|
// Simulated extraction
|
|
String remaining = testText;
|
|
|
|
// Extract time
|
|
java.util.regex.Pattern timePattern = java.util.regex.Pattern.compile("(\\w+)\\s+om\\s+(\\d{1,2}:\\d{2})");
|
|
java.util.regex.Matcher timeMatcher = timePattern.matcher(remaining);
|
|
if (timeMatcher.find()) {
|
|
System.out.println(" Time: " + timeMatcher.group(1) + " om " + timeMatcher.group(2));
|
|
remaining = remaining.substring(timeMatcher.end()).trim();
|
|
}
|
|
|
|
// Extract location
|
|
java.util.regex.Pattern locPattern = java.util.regex.Pattern.compile(
|
|
"([A-ZÀ-ÿa-z][A-ZÀ-ÿa-z\\s\\-'öäüßàèéêëïôùûç]+?),\\s*([A-Z]{2})\\s*$"
|
|
);
|
|
java.util.regex.Matcher locMatcher = locPattern.matcher(remaining);
|
|
if (locMatcher.find()) {
|
|
System.out.println(" Location: " + locMatcher.group(1) + ", " + locMatcher.group(2));
|
|
remaining = remaining.substring(0, locMatcher.start()).trim();
|
|
}
|
|
|
|
// Extract lot count
|
|
java.util.regex.Pattern lotPattern = java.util.regex.Pattern.compile("^(\\d+)\\s+");
|
|
java.util.regex.Matcher lotMatcher = lotPattern.matcher(remaining);
|
|
if (lotMatcher.find()) {
|
|
System.out.println(" Lot count: " + lotMatcher.group(1));
|
|
remaining = remaining.substring(lotMatcher.end()).trim();
|
|
}
|
|
|
|
// What remains is title
|
|
System.out.println(" Title: " + remaining);
|
|
}
|
|
}
|
|
}
|