Files
auctiora/src/test/java/com/auction/AuctionParsingTest.java
2025-12-03 15:32:34 +01:00

114 lines
4.2 KiB
Java

package com.auction;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import static org.junit.jupiter.api.Assertions.*;
/**
* Test auction parsing logic using saved HTML from test.html
* Tests the markup data extraction for each auction found
*/
public class AuctionParsingTest {
private static String testHtml;
@BeforeAll
public static void loadTestHtml() throws IOException {
// Load the test HTML file
testHtml = Files.readString(Paths.get("src/test/resources/test_auctions.html"));
System.out.println("Loaded test HTML (" + testHtml.length() + " characters)");
}
@Test
public void testLocationPatternMatching() {
System.out.println("\n=== Location Pattern Tests ===");
// Test different location formats
String[] testCases = {
"<p>Amsterdam, NL</p>",
"<p class=\"flex truncate\"><span class=\"w-full truncate\">Sofia,<!-- --> </span>BG</p>",
"<p>Berlin, DE</p>",
"<span>Brussels,</span>BE"
};
for (String testHtml : testCases) {
Document doc = Jsoup.parse(testHtml);
Element elem = doc.select("p, span").first();
if (elem != null) {
String text = elem.text();
System.out.println("\nTest: " + testHtml);
System.out.println("Text: " + text);
// Test regex pattern
if (text.matches(".*[A-Z]{2}$")) {
String countryCode = text.substring(text.length() - 2);
String cityPart = text.substring(0, text.length() - 2).trim().replaceAll("[,\\s]+$", "");
System.out.println("→ Extracted: " + cityPart + ", " + countryCode);
} else {
System.out.println("→ No match");
}
}
}
}
@Test
public void testFullTextPatternMatching() {
System.out.println("\n=== Full Text Pattern Tests ===");
// Test the complete auction text format
String[] testCases = {
"woensdag om 18:00 1 Vrachtwagens voor bedrijfsvoertuigen Loßburg, DE",
"maandag om 14:30 5 Industriële machines Amsterdam, NL",
"vrijdag om 10:00 12 Landbouwmachines Antwerpen, BE"
};
for (String testText : testCases) {
System.out.println("\nParsing: \"" + testText + "\"");
// Simulated extraction
String remaining = testText;
// Extract time
java.util.regex.Pattern timePattern = java.util.regex.Pattern.compile("(\\w+)\\s+om\\s+(\\d{1,2}:\\d{2})");
java.util.regex.Matcher timeMatcher = timePattern.matcher(remaining);
if (timeMatcher.find()) {
System.out.println(" Time: " + timeMatcher.group(1) + " om " + timeMatcher.group(2));
remaining = remaining.substring(timeMatcher.end()).trim();
}
// Extract location
java.util.regex.Pattern locPattern = java.util.regex.Pattern.compile(
"([A-ZÀ-ÿa-z][A-ZÀ-ÿa-z\\s\\-'öäüßàèéêëïôùûç]+?),\\s*([A-Z]{2})\\s*$"
);
java.util.regex.Matcher locMatcher = locPattern.matcher(remaining);
if (locMatcher.find()) {
System.out.println(" Location: " + locMatcher.group(1) + ", " + locMatcher.group(2));
remaining = remaining.substring(0, locMatcher.start()).trim();
}
// Extract lot count
java.util.regex.Pattern lotPattern = java.util.regex.Pattern.compile("^(\\d+)\\s+");
java.util.regex.Matcher lotMatcher = lotPattern.matcher(remaining);
if (lotMatcher.find()) {
System.out.println(" Lot count: " + lotMatcher.group(1));
remaining = remaining.substring(lotMatcher.end()).trim();
}
// What remains is title
System.out.println(" Title: " + remaining);
}
}
}