Files
auctiora/src/test/java/com/auction/Parser.java
2025-12-03 15:32:34 +01:00

60 lines
7.8 KiB
Java
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package com.auction;
import com.vladsch.flexmark.html2md.converter.FlexmarkHtmlConverter;
import org.jsoup.Jsoup;
import org.junit.jupiter.api.Test;
public class Parser {
public record AuctionItem(
String title,
String link,
int lotCount,
String location,
String closingTime
) { }
public static AuctionItem parseItem(String html, String baseUrl) {
var doc = Jsoup.parse(html, baseUrl);
org.jsoup.nodes.Element li = doc.selectFirst("li.grid");
if (li == null) return null;
var linkEl = li.selectFirst("a[data-cy=item-link]");
String link = linkEl != null ? linkEl.absUrl("href") : null;
String title = text(li, "div.heading-6");
String closingTime = text(li, "[data-cy=end-time-text]");
String lotCountStr = text(li, "[data-cy=lot-count-text]").trim();
int lotCount = lotCountStr.isEmpty() ? 0 : Integer.parseInt(lotCountStr);
// Tweede span in de location grid
String location = li.select("[data-cy=location-text] span").size() >= 2
? li.select("[data-cy=location-text] span").get(1).text()
: null;
return new AuctionItem(title, link, lotCount, location, closingTime);
}
private static String text(org.jsoup.nodes.Element root, String css) {
var el = root.selectFirst(css);
return el != null ? el.text() : "";
}
@Test
void testbla() {
String html = "<li class=\"grid\"><a class=\"group/card flex w-full cursor-pointer flex-col overflow-hidden rounded no-underline shadow\" data-cy=\"item-link\" href=\"/a/sluiting-van-een-metaalbewerkingsfabriek-%E2%80%93-cnc-bewerkingscentra-draadvonkmachine-gereedschapsmachines-en-meer-A7-38384\"><div class=\"relative grid-cols-[11fr_3fr] gap-px grid\"><div class=\"absolute bottom-1 left-1 z-[1] rounded bg-c-background-neutral-x-subtle-default py-0.5 pl-0.5 pr-1 shadow md:bottom-2 md:left-2\"><div class=\"relative flex items-center gap-1 body-xs-regular !leading-none text-c-text-danger-default\"><svg class=\"inline align-text-top size-4\"><use href=\"/assets/svg/icon-sprite-sheet.4ff9a809c0f1221d214235ed55a7e524.svg#clock\"></use></svg><span data-cy=\"end-time-text\"> 03:17:00</span></div></div><div class=\"FixedRatio_wrapper__FSa0h\" style=\"padding-bottom: 75%;\"><div class=\"FixedRatio_content__Gr5Uo\"><div class=\"transition-transform duration-700 md:group-hover/card:scale-105\"><picture><source type=\"image/webp\" srcset=\"https://media.tbauctions.com/image-media/23102708-b2f6-4727-9197-0fa1705c28a1/file?imageSize=256x192&amp;imageFormat=webp 256w,https://media.tbauctions.com/image-media/23102708-b2f6-4727-9197-0fa1705c28a1/file?imageSize=384x288&amp;imageFormat=webp 384w,https://media.tbauctions.com/image-media/23102708-b2f6-4727-9197-0fa1705c28a1/file?imageSize=512x384&amp;imageFormat=webp 512w\" sizes=\"(min-width: 1024px) 30vw, 50vw\"><img alt=\"\" loading=\"lazy\" class=\"h-full w-full object-cover object-center\" sizes=\"(min-width: 1024px) 30vw, 50vw\" data-cy=\"image\" srcset=\"https://media.tbauctions.com/image-media/23102708-b2f6-4727-9197-0fa1705c28a1/file?imageSize=256x192 256w,https://media.tbauctions.com/image-media/23102708-b2f6-4727-9197-0fa1705c28a1/file?imageSize=384x288 384w,https://media.tbauctions.com/image-media/23102708-b2f6-4727-9197-0fa1705c28a1/file?imageSize=512x384 512w\" src=\"https://media.tbauctions.com/image-media/23102708-b2f6-4727-9197-0fa1705c28a1/file?imageSize=1024x768 1024w\"></picture></div></div></div><div class=\"flex flex-col\" style=\"gap: 1px;\"><div class=\"FixedRatio_wrapper__FSa0h\" style=\"padding-bottom: 75%;\"><div class=\"FixedRatio_content__Gr5Uo\"><picture><source type=\"image/webp\" srcset=\"https://media.tbauctions.com/image-media/a4430584-1a9a-4c45-a667-9c8ab47bbd36/file?imageSize=90x68&amp;imageFormat=webp 90w,https://media.tbauctions.com/image-media/a4430584-1a9a-4c45-a667-9c8ab47bbd36/file?imageSize=256x192&amp;imageFormat=webp 256w\" sizes=\"(min-width: 1024px) 10vw, 100px\"><img alt=\"\" loading=\"lazy\" class=\"\" sizes=\"(min-width: 1024px) 10vw, 100px\" data-cy=\"image\" srcset=\"https://media.tbauctions.com/image-media/a4430584-1a9a-4c45-a667-9c8ab47bbd36/file?imageSize=90x68 90w,https://media.tbauctions.com/image-media/a4430584-1a9a-4c45-a667-9c8ab47bbd36/file?imageSize=256x192 256w\" src=\"https://media.tbauctions.com/image-media/a4430584-1a9a-4c45-a667-9c8ab47bbd36/file?imageSize=1024x768 1024w\"></picture></div></div><div class=\"FixedRatio_wrapper__FSa0h\" style=\"padding-bottom: 75%;\"><div class=\"FixedRatio_content__Gr5Uo\"><picture><source type=\"image/webp\" srcset=\"https://media.tbauctions.com/image-media/b0f5051d-c3ad-4701-b611-2a153ef930ef/file?imageSize=90x68&amp;imageFormat=webp 90w,https://media.tbauctions.com/image-media/b0f5051d-c3ad-4701-b611-2a153ef930ef/file?imageSize=256x192&amp;imageFormat=webp 256w\" sizes=\"(min-width: 1024px) 10vw, 100px\"><img alt=\"\" loading=\"lazy\" class=\"\" sizes=\"(min-width: 1024px) 10vw, 100px\" data-cy=\"image\" srcset=\"https://media.tbauctions.com/image-media/b0f5051d-c3ad-4701-b611-2a153ef930ef/file?imageSize=90x68 90w,https://media.tbauctions.com/image-media/b0f5051d-c3ad-4701-b611-2a153ef930ef/file?imageSize=256x192 256w\" src=\"https://media.tbauctions.com/image-media/b0f5051d-c3ad-4701-b611-2a153ef930ef/file?imageSize=1024x768 1024w\"></picture></div></div><div class=\"FixedRatio_wrapper__FSa0h\" style=\"padding-bottom: 75%;\"><div class=\"FixedRatio_content__Gr5Uo\"><picture><source type=\"image/webp\" srcset=\"https://media.tbauctions.com/image-media/8b4bbcf2-e30b-48e0-8513-cc8e2593c8aa/file?imageSize=90x68&amp;imageFormat=webp 90w,https://media.tbauctions.com/image-media/8b4bbcf2-e30b-48e0-8513-cc8e2593c8aa/file?imageSize=256x192&amp;imageFormat=webp 256w\" sizes=\"(min-width: 1024px) 10vw, 100px\"><img alt=\"\" loading=\"lazy\" class=\"\" sizes=\"(min-width: 1024px) 10vw, 100px\" data-cy=\"image\" srcset=\"https://media.tbauctions.com/image-media/8b4bbcf2-e30b-48e0-8513-cc8e2593c8aa/file?imageSize=90x68 90w,https://media.tbauctions.com/image-media/8b4bbcf2-e30b-48e0-8513-cc8e2593c8aa/file?imageSize=256x192 256w\" src=\"https://media.tbauctions.com/image-media/8b4bbcf2-e30b-48e0-8513-cc8e2593c8aa/file?imageSize=1024x768 1024w\"></picture></div></div><div class=\"flex h-full min-w-min items-center justify-center gap-1 bg-c-background-brand-primary-obvious-default text-c-text-inverse-default\"><svg class=\"inline align-text-top size-4\"><use href=\"/assets/svg/icon-sprite-sheet.4ff9a809c0f1221d214235ed55a7e524.svg#stack-outline\"></use></svg><span class=\"text-xs min-w-fit\" data-cy=\"lot-count-text\"> 115</span></div></div></div><div class=\"flex h-full flex-col justify-between overflow-hidden p-2 pb-1\"><div class=\"heading-6 mb-2 line-clamp-2 break-words text-c-text-neutral-obvious-default\">Sluiting van een metaalbewerkingsfabriek CNC-bewerkingscentra, draadvonkmachine, gereedschapsmachines en meer</div><div><div class=\"grid grid-cols-[min-content_auto] items-center gap-1.5\" data-cy=\"location-text\"><span class=\"flex items-center justify-start size-5\"><span class=\"IconFlag_fflag__jK2TE IconFlag_fflag-DE__hMciG\"></span></span><span class=\"body-xs-regular text-nowrap capitalize text-c-text-neutral-obvious-default text-xs !text-c-text-neutral-obvious-default\">Vahingen, DE</span></div></div></div></a></li>";
var doc = Jsoup.parse(html, "https://www.troostwijkauctions.com");
String markdown = FlexmarkHtmlConverter.builder().build().convert(html);
System.out.println(doc.body());
AuctionItem item = Parser.parseItem(html, "https://www.troostwijkauctions.com");
System.out.println(item.title());
System.out.println(item.link());
System.out.println(item.lotCount());
System.out.println(item.location());
System.out.println(item.closingTime());
}
}