package com.auction; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; import static org.junit.jupiter.api.Assertions.*; /** * Test auction parsing logic using saved HTML from test.html * Tests the markup data extraction for each auction found */ public class AuctionParsingTest { private static String testHtml; @BeforeAll public static void loadTestHtml() throws IOException { // Load the test HTML file testHtml = Files.readString(Paths.get("src/test/resources/test_auctions.html")); System.out.println("Loaded test HTML (" + testHtml.length() + " characters)"); } @Test public void testLocationPatternMatching() { System.out.println("\n=== Location Pattern Tests ==="); // Test different location formats String[] testCases = { "

Amsterdam, NL

", "

Sofia, BG

", "

Berlin, DE

", "Brussels,BE" }; for (String testHtml : testCases) { Document doc = Jsoup.parse(testHtml); Element elem = doc.select("p, span").first(); if (elem != null) { String text = elem.text(); System.out.println("\nTest: " + testHtml); System.out.println("Text: " + text); // Test regex pattern if (text.matches(".*[A-Z]{2}$")) { String countryCode = text.substring(text.length() - 2); String cityPart = text.substring(0, text.length() - 2).trim().replaceAll("[,\\s]+$", ""); System.out.println("→ Extracted: " + cityPart + ", " + countryCode); } else { System.out.println("→ No match"); } } } } @Test public void testFullTextPatternMatching() { System.out.println("\n=== Full Text Pattern Tests ==="); // Test the complete auction text format String[] testCases = { "woensdag om 18:00 1 Vrachtwagens voor bedrijfsvoertuigen Loßburg, DE", "maandag om 14:30 5 Industriële machines Amsterdam, NL", "vrijdag om 10:00 12 Landbouwmachines Antwerpen, BE" }; for (String testText : testCases) { System.out.println("\nParsing: \"" + testText + "\""); // Simulated extraction String remaining = testText; // Extract time java.util.regex.Pattern timePattern = java.util.regex.Pattern.compile("(\\w+)\\s+om\\s+(\\d{1,2}:\\d{2})"); java.util.regex.Matcher timeMatcher = timePattern.matcher(remaining); if (timeMatcher.find()) { System.out.println(" Time: " + timeMatcher.group(1) + " om " + timeMatcher.group(2)); remaining = remaining.substring(timeMatcher.end()).trim(); } // Extract location java.util.regex.Pattern locPattern = java.util.regex.Pattern.compile( "([A-ZÀ-ÿa-z][A-ZÀ-ÿa-z\\s\\-'öäüßàèéêëïôùûç]+?),\\s*([A-Z]{2})\\s*$" ); java.util.regex.Matcher locMatcher = locPattern.matcher(remaining); if (locMatcher.find()) { System.out.println(" Location: " + locMatcher.group(1) + ", " + locMatcher.group(2)); remaining = remaining.substring(0, locMatcher.start()).trim(); } // Extract lot count java.util.regex.Pattern lotPattern = java.util.regex.Pattern.compile("^(\\d+)\\s+"); java.util.regex.Matcher lotMatcher = lotPattern.matcher(remaining); if (lotMatcher.find()) { System.out.println(" Lot count: " + lotMatcher.group(1)); remaining = remaining.substring(lotMatcher.end()).trim(); } // What remains is title System.out.println(" Title: " + remaining); } } }