60 lines
7.8 KiB
Java
60 lines
7.8 KiB
Java
package com.auction;
|
||
|
||
import com.vladsch.flexmark.html2md.converter.FlexmarkHtmlConverter;
|
||
import org.jsoup.Jsoup;
|
||
import org.junit.jupiter.api.Test;
|
||
public class Parser {
|
||
|
||
public record AuctionItem(
|
||
String title,
|
||
String link,
|
||
int lotCount,
|
||
String location,
|
||
String closingTime
|
||
) { }
|
||
|
||
public static AuctionItem parseItem(String html, String baseUrl) {
|
||
var doc = Jsoup.parse(html, baseUrl);
|
||
|
||
org.jsoup.nodes.Element li = doc.selectFirst("li.grid");
|
||
if (li == null) return null;
|
||
|
||
var linkEl = li.selectFirst("a[data-cy=item-link]");
|
||
String link = linkEl != null ? linkEl.absUrl("href") : null;
|
||
|
||
String title = text(li, "div.heading-6");
|
||
|
||
String closingTime = text(li, "[data-cy=end-time-text]");
|
||
|
||
String lotCountStr = text(li, "[data-cy=lot-count-text]").trim();
|
||
int lotCount = lotCountStr.isEmpty() ? 0 : Integer.parseInt(lotCountStr);
|
||
|
||
// Tweede span in de location grid
|
||
String location = li.select("[data-cy=location-text] span").size() >= 2
|
||
? li.select("[data-cy=location-text] span").get(1).text()
|
||
: null;
|
||
|
||
return new AuctionItem(title, link, lotCount, location, closingTime);
|
||
}
|
||
|
||
private static String text(org.jsoup.nodes.Element root, String css) {
|
||
var el = root.selectFirst(css);
|
||
return el != null ? el.text() : "";
|
||
}
|
||
|
||
@Test
|
||
void testbla() {
|
||
String html = "<li class=\"grid\"><a class=\"group/card flex w-full cursor-pointer flex-col overflow-hidden rounded no-underline shadow\" data-cy=\"item-link\" href=\"/a/sluiting-van-een-metaalbewerkingsfabriek-%E2%80%93-cnc-bewerkingscentra-draadvonkmachine-gereedschapsmachines-en-meer-A7-38384\"><div class=\"relative grid-cols-[11fr_3fr] gap-px grid\"><div class=\"absolute bottom-1 left-1 z-[1] rounded bg-c-background-neutral-x-subtle-default py-0.5 pl-0.5 pr-1 shadow md:bottom-2 md:left-2\"><div class=\"relative flex items-center gap-1 body-xs-regular !leading-none text-c-text-danger-default\"><svg class=\"inline align-text-top size-4\"><use href=\"/assets/svg/icon-sprite-sheet.4ff9a809c0f1221d214235ed55a7e524.svg#clock\"></use></svg><span data-cy=\"end-time-text\"> 03:17:00</span></div></div><div class=\"FixedRatio_wrapper__FSa0h\" style=\"padding-bottom: 75%;\"><div class=\"FixedRatio_content__Gr5Uo\"><div class=\"transition-transform duration-700 md:group-hover/card:scale-105\"><picture><source type=\"image/webp\" srcset=\"https://media.tbauctions.com/image-media/23102708-b2f6-4727-9197-0fa1705c28a1/file?imageSize=256x192&imageFormat=webp 256w,https://media.tbauctions.com/image-media/23102708-b2f6-4727-9197-0fa1705c28a1/file?imageSize=384x288&imageFormat=webp 384w,https://media.tbauctions.com/image-media/23102708-b2f6-4727-9197-0fa1705c28a1/file?imageSize=512x384&imageFormat=webp 512w\" sizes=\"(min-width: 1024px) 30vw, 50vw\"><img alt=\"\" loading=\"lazy\" class=\"h-full w-full object-cover object-center\" sizes=\"(min-width: 1024px) 30vw, 50vw\" data-cy=\"image\" srcset=\"https://media.tbauctions.com/image-media/23102708-b2f6-4727-9197-0fa1705c28a1/file?imageSize=256x192 256w,https://media.tbauctions.com/image-media/23102708-b2f6-4727-9197-0fa1705c28a1/file?imageSize=384x288 384w,https://media.tbauctions.com/image-media/23102708-b2f6-4727-9197-0fa1705c28a1/file?imageSize=512x384 512w\" src=\"https://media.tbauctions.com/image-media/23102708-b2f6-4727-9197-0fa1705c28a1/file?imageSize=1024x768 1024w\"></picture></div></div></div><div class=\"flex flex-col\" style=\"gap: 1px;\"><div class=\"FixedRatio_wrapper__FSa0h\" style=\"padding-bottom: 75%;\"><div class=\"FixedRatio_content__Gr5Uo\"><picture><source type=\"image/webp\" srcset=\"https://media.tbauctions.com/image-media/a4430584-1a9a-4c45-a667-9c8ab47bbd36/file?imageSize=90x68&imageFormat=webp 90w,https://media.tbauctions.com/image-media/a4430584-1a9a-4c45-a667-9c8ab47bbd36/file?imageSize=256x192&imageFormat=webp 256w\" sizes=\"(min-width: 1024px) 10vw, 100px\"><img alt=\"\" loading=\"lazy\" class=\"\" sizes=\"(min-width: 1024px) 10vw, 100px\" data-cy=\"image\" srcset=\"https://media.tbauctions.com/image-media/a4430584-1a9a-4c45-a667-9c8ab47bbd36/file?imageSize=90x68 90w,https://media.tbauctions.com/image-media/a4430584-1a9a-4c45-a667-9c8ab47bbd36/file?imageSize=256x192 256w\" src=\"https://media.tbauctions.com/image-media/a4430584-1a9a-4c45-a667-9c8ab47bbd36/file?imageSize=1024x768 1024w\"></picture></div></div><div class=\"FixedRatio_wrapper__FSa0h\" style=\"padding-bottom: 75%;\"><div class=\"FixedRatio_content__Gr5Uo\"><picture><source type=\"image/webp\" srcset=\"https://media.tbauctions.com/image-media/b0f5051d-c3ad-4701-b611-2a153ef930ef/file?imageSize=90x68&imageFormat=webp 90w,https://media.tbauctions.com/image-media/b0f5051d-c3ad-4701-b611-2a153ef930ef/file?imageSize=256x192&imageFormat=webp 256w\" sizes=\"(min-width: 1024px) 10vw, 100px\"><img alt=\"\" loading=\"lazy\" class=\"\" sizes=\"(min-width: 1024px) 10vw, 100px\" data-cy=\"image\" srcset=\"https://media.tbauctions.com/image-media/b0f5051d-c3ad-4701-b611-2a153ef930ef/file?imageSize=90x68 90w,https://media.tbauctions.com/image-media/b0f5051d-c3ad-4701-b611-2a153ef930ef/file?imageSize=256x192 256w\" src=\"https://media.tbauctions.com/image-media/b0f5051d-c3ad-4701-b611-2a153ef930ef/file?imageSize=1024x768 1024w\"></picture></div></div><div class=\"FixedRatio_wrapper__FSa0h\" style=\"padding-bottom: 75%;\"><div class=\"FixedRatio_content__Gr5Uo\"><picture><source type=\"image/webp\" srcset=\"https://media.tbauctions.com/image-media/8b4bbcf2-e30b-48e0-8513-cc8e2593c8aa/file?imageSize=90x68&imageFormat=webp 90w,https://media.tbauctions.com/image-media/8b4bbcf2-e30b-48e0-8513-cc8e2593c8aa/file?imageSize=256x192&imageFormat=webp 256w\" sizes=\"(min-width: 1024px) 10vw, 100px\"><img alt=\"\" loading=\"lazy\" class=\"\" sizes=\"(min-width: 1024px) 10vw, 100px\" data-cy=\"image\" srcset=\"https://media.tbauctions.com/image-media/8b4bbcf2-e30b-48e0-8513-cc8e2593c8aa/file?imageSize=90x68 90w,https://media.tbauctions.com/image-media/8b4bbcf2-e30b-48e0-8513-cc8e2593c8aa/file?imageSize=256x192 256w\" src=\"https://media.tbauctions.com/image-media/8b4bbcf2-e30b-48e0-8513-cc8e2593c8aa/file?imageSize=1024x768 1024w\"></picture></div></div><div class=\"flex h-full min-w-min items-center justify-center gap-1 bg-c-background-brand-primary-obvious-default text-c-text-inverse-default\"><svg class=\"inline align-text-top size-4\"><use href=\"/assets/svg/icon-sprite-sheet.4ff9a809c0f1221d214235ed55a7e524.svg#stack-outline\"></use></svg><span class=\"text-xs min-w-fit\" data-cy=\"lot-count-text\"> 115</span></div></div></div><div class=\"flex h-full flex-col justify-between overflow-hidden p-2 pb-1\"><div class=\"heading-6 mb-2 line-clamp-2 break-words text-c-text-neutral-obvious-default\">Sluiting van een metaalbewerkingsfabriek – CNC-bewerkingscentra, draadvonkmachine, gereedschapsmachines en meer</div><div><div class=\"grid grid-cols-[min-content_auto] items-center gap-1.5\" data-cy=\"location-text\"><span class=\"flex items-center justify-start size-5\"><span class=\"IconFlag_fflag__jK2TE IconFlag_fflag-DE__hMciG\"></span></span><span class=\"body-xs-regular text-nowrap capitalize text-c-text-neutral-obvious-default text-xs !text-c-text-neutral-obvious-default\">Vahingen, DE</span></div></div></div></a></li>";
|
||
var doc = Jsoup.parse(html, "https://www.troostwijkauctions.com");
|
||
String markdown = FlexmarkHtmlConverter.builder().build().convert(html);
|
||
|
||
System.out.println(doc.body());
|
||
AuctionItem item = Parser.parseItem(html, "https://www.troostwijkauctions.com");
|
||
|
||
System.out.println(item.title());
|
||
System.out.println(item.link());
|
||
System.out.println(item.lotCount());
|
||
System.out.println(item.location());
|
||
System.out.println(item.closingTime());
|
||
}
|
||
} |