start
This commit is contained in:
@@ -329,7 +329,111 @@ public class DatabaseService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple record for image data
|
||||
* Imports auctions from scraper's schema format.
|
||||
* Reads from scraper's tables and converts to monitor format using adapter.
|
||||
*
|
||||
* @return List of imported auctions
|
||||
*/
|
||||
synchronized List<AuctionInfo> importAuctionsFromScraper() throws SQLException {
|
||||
List<AuctionInfo> imported = new ArrayList<>();
|
||||
var sql = "SELECT auction_id, title, location, url, lots_count, first_lot_closing_time, scraped_at " +
|
||||
"FROM auctions WHERE location LIKE '%NL%'";
|
||||
|
||||
try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) {
|
||||
var rs = stmt.executeQuery(sql);
|
||||
while (rs.next()) {
|
||||
try {
|
||||
var auction = ScraperDataAdapter.fromScraperAuction(rs);
|
||||
upsertAuction(auction);
|
||||
imported.add(auction);
|
||||
} catch (Exception e) {
|
||||
System.err.println("Failed to import auction: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
} catch (SQLException e) {
|
||||
// Table might not exist in scraper format - that's ok
|
||||
Console.println("ℹ️ Scraper auction table not found or incompatible schema");
|
||||
}
|
||||
|
||||
return imported;
|
||||
}
|
||||
|
||||
/**
|
||||
* Imports lots from scraper's schema format.
|
||||
* Reads from scraper's tables and converts to monitor format using adapter.
|
||||
*
|
||||
* @return List of imported lots
|
||||
*/
|
||||
synchronized List<Lot> importLotsFromScraper() throws SQLException {
|
||||
List<Lot> imported = new ArrayList<>();
|
||||
var sql = "SELECT lot_id, auction_id, title, description, category, " +
|
||||
"current_bid, closing_time, url " +
|
||||
"FROM lots";
|
||||
|
||||
try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) {
|
||||
var rs = stmt.executeQuery(sql);
|
||||
while (rs.next()) {
|
||||
try {
|
||||
var lot = ScraperDataAdapter.fromScraperLot(rs);
|
||||
upsertLot(lot);
|
||||
imported.add(lot);
|
||||
} catch (Exception e) {
|
||||
System.err.println("Failed to import lot: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
} catch (SQLException e) {
|
||||
// Table might not exist in scraper format - that's ok
|
||||
Console.println("ℹ️ Scraper lots table not found or incompatible schema");
|
||||
}
|
||||
|
||||
return imported;
|
||||
}
|
||||
|
||||
/**
|
||||
* Imports image URLs from scraper's schema.
|
||||
* The scraper populates the images table with URLs but doesn't download them.
|
||||
* This method retrieves undownloaded images for processing.
|
||||
*
|
||||
* @return List of image URLs that need to be downloaded
|
||||
*/
|
||||
synchronized List<ImageImportRecord> getUnprocessedImagesFromScraper() throws SQLException {
|
||||
List<ImageImportRecord> images = new ArrayList<>();
|
||||
var sql = """
|
||||
SELECT i.lot_id, i.url, l.auction_id
|
||||
FROM images i
|
||||
LEFT JOIN lots l ON i.lot_id = l.lot_id
|
||||
WHERE i.downloaded = 0 OR i.local_path IS NULL
|
||||
""";
|
||||
|
||||
try (var conn = DriverManager.getConnection(url); var stmt = conn.createStatement()) {
|
||||
var rs = stmt.executeQuery(sql);
|
||||
while (rs.next()) {
|
||||
String lotIdStr = rs.getString("lot_id");
|
||||
String auctionIdStr = rs.getString("auction_id");
|
||||
|
||||
int lotId = ScraperDataAdapter.extractNumericId(lotIdStr);
|
||||
int saleId = ScraperDataAdapter.extractNumericId(auctionIdStr);
|
||||
|
||||
images.add(new ImageImportRecord(
|
||||
lotId,
|
||||
saleId,
|
||||
rs.getString("url")
|
||||
));
|
||||
}
|
||||
} catch (SQLException e) {
|
||||
Console.println("ℹ️ No unprocessed images found in scraper format");
|
||||
}
|
||||
|
||||
return images;
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple record for image data from database
|
||||
*/
|
||||
record ImageRecord(int id, int lotId, String url, String filePath, String labels) {}
|
||||
|
||||
/**
|
||||
* Record for importing images from scraper format
|
||||
*/
|
||||
record ImageImportRecord(int lotId, int saleId, String url) {}
|
||||
}
|
||||
|
||||
246
src/main/java/com/auction/ScraperDataAdapter.java
Normal file
246
src/main/java/com/auction/ScraperDataAdapter.java
Normal file
@@ -0,0 +1,246 @@
|
||||
package com.auction;
|
||||
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.format.DateTimeParseException;
|
||||
|
||||
/**
|
||||
* Adapter to convert data from the Python scraper's schema to the Monitor's schema.
|
||||
*
|
||||
* SCRAPER SCHEMA DIFFERENCES:
|
||||
* - auction_id: TEXT ("A7-39813") vs INTEGER (39813)
|
||||
* - lot_id: TEXT ("A1-28505-5") vs INTEGER (285055)
|
||||
* - current_bid: TEXT ("€123.45") vs REAL (123.45)
|
||||
* - Field names: lots_count vs lot_count, auction_id vs sale_id, etc.
|
||||
*
|
||||
* This adapter handles the translation between the two schemas.
|
||||
*/
|
||||
class ScraperDataAdapter {
|
||||
|
||||
private static final DateTimeFormatter[] TIMESTAMP_FORMATS = {
|
||||
DateTimeFormatter.ISO_LOCAL_DATE_TIME,
|
||||
DateTimeFormatter.ISO_DATE_TIME,
|
||||
DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")
|
||||
};
|
||||
|
||||
/**
|
||||
* Converts scraper's auction format to monitor's AuctionInfo record.
|
||||
*
|
||||
* Scraper format:
|
||||
* - auction_id: "A7-39813" (TEXT)
|
||||
* - location: "Cluj-Napoca, RO" (combined)
|
||||
* - lots_count: INTEGER
|
||||
* - first_lot_closing_time: TEXT
|
||||
* - scraped_at: TEXT
|
||||
*/
|
||||
static AuctionInfo fromScraperAuction(ResultSet rs) throws SQLException {
|
||||
// Parse "A7-39813" → auctionId=39813, type="A7"
|
||||
String auctionIdStr = rs.getString("auction_id");
|
||||
int auctionId = extractNumericId(auctionIdStr);
|
||||
String type = extractTypePrefix(auctionIdStr);
|
||||
|
||||
// Split "Cluj-Napoca, RO" → city="Cluj-Napoca", country="RO"
|
||||
String location = rs.getString("location");
|
||||
String[] locationParts = parseLocation(location);
|
||||
String city = locationParts[0];
|
||||
String country = locationParts[1];
|
||||
|
||||
// Map field names
|
||||
int lotCount = getIntOrDefault(rs, "lots_count", 0);
|
||||
LocalDateTime closingTime = parseTimestamp(getStringOrNull(rs, "first_lot_closing_time"));
|
||||
|
||||
return new AuctionInfo(
|
||||
auctionId,
|
||||
rs.getString("title"),
|
||||
location,
|
||||
city,
|
||||
country,
|
||||
rs.getString("url"),
|
||||
type,
|
||||
lotCount,
|
||||
closingTime
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts scraper's lot format to monitor's Lot record.
|
||||
*
|
||||
* Scraper format:
|
||||
* - lot_id: "A1-28505-5" (TEXT)
|
||||
* - auction_id: "A7-39813" (TEXT)
|
||||
* - current_bid: "€123.45" or "No bids" (TEXT)
|
||||
* - bid_count: INTEGER
|
||||
* - closing_time: TEXT
|
||||
*/
|
||||
static Lot fromScraperLot(ResultSet rs) throws SQLException {
|
||||
// Parse "A1-28505-5" → lotId=285055
|
||||
String lotIdStr = rs.getString("lot_id");
|
||||
int lotId = extractNumericId(lotIdStr);
|
||||
|
||||
// Parse "A7-39813" → saleId=39813
|
||||
String auctionIdStr = rs.getString("auction_id");
|
||||
int saleId = extractNumericId(auctionIdStr);
|
||||
|
||||
// Parse "€123.45" → currentBid=123.45, currency="EUR"
|
||||
String currentBidStr = getStringOrNull(rs, "current_bid");
|
||||
double currentBid = parseBidAmount(currentBidStr);
|
||||
String currency = parseBidCurrency(currentBidStr);
|
||||
|
||||
// Parse timestamp
|
||||
LocalDateTime closingTime = parseTimestamp(getStringOrNull(rs, "closing_time"));
|
||||
|
||||
return new Lot(
|
||||
saleId,
|
||||
lotId,
|
||||
rs.getString("title"),
|
||||
getStringOrDefault(rs, "description", ""),
|
||||
"", // manufacturer - not in scraper schema
|
||||
"", // type - not in scraper schema
|
||||
0, // year - not in scraper schema
|
||||
getStringOrDefault(rs, "category", ""),
|
||||
currentBid,
|
||||
currency,
|
||||
rs.getString("url"),
|
||||
closingTime,
|
||||
false // closing_notified - not yet notified
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts numeric ID from scraper's text format.
|
||||
* Examples:
|
||||
* - "A7-39813" → 39813
|
||||
* - "A1-28505-5" → 285055 (concatenates all digits)
|
||||
*/
|
||||
static int extractNumericId(String id) {
|
||||
if (id == null || id.isEmpty()) {
|
||||
return 0;
|
||||
}
|
||||
String digits = id.replaceAll("[^0-9]", "");
|
||||
return digits.isEmpty() ? 0 : Integer.parseInt(digits);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts type prefix from scraper's auction/lot ID.
|
||||
* Examples:
|
||||
* - "A7-39813" → "A7"
|
||||
* - "A1-28505-5" → "A1"
|
||||
*/
|
||||
private static String extractTypePrefix(String id) {
|
||||
if (id == null || id.isEmpty()) {
|
||||
return "";
|
||||
}
|
||||
int dashIndex = id.indexOf('-');
|
||||
return dashIndex > 0 ? id.substring(0, dashIndex) : "";
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses location string into [city, country] array.
|
||||
* Examples:
|
||||
* - "Cluj-Napoca, RO" → ["Cluj-Napoca", "RO"]
|
||||
* - "Amsterdam" → ["Amsterdam", ""]
|
||||
*/
|
||||
private static String[] parseLocation(String location) {
|
||||
if (location == null || location.isEmpty()) {
|
||||
return new String[]{"", ""};
|
||||
}
|
||||
|
||||
String[] parts = location.split(",\\s*");
|
||||
String city = parts.length > 0 ? parts[0].trim() : "";
|
||||
String country = parts.length > 1 ? parts[parts.length - 1].trim() : "";
|
||||
|
||||
return new String[]{city, country};
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses bid amount from scraper's text format.
|
||||
* Examples:
|
||||
* - "€123.45" → 123.45
|
||||
* - "$50.00" → 50.0
|
||||
* - "No bids" → 0.0
|
||||
* - "123.45" → 123.45
|
||||
*/
|
||||
private static double parseBidAmount(String bid) {
|
||||
if (bid == null || bid.isEmpty() || bid.toLowerCase().contains("no")) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
try {
|
||||
// Remove all non-numeric characters except decimal point
|
||||
String cleanBid = bid.replaceAll("[^0-9.]", "");
|
||||
return cleanBid.isEmpty() ? 0.0 : Double.parseDouble(cleanBid);
|
||||
} catch (NumberFormatException e) {
|
||||
return 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts currency from bid string.
|
||||
* Examples:
|
||||
* - "€123.45" → "EUR"
|
||||
* - "$50.00" → "USD"
|
||||
* - "123.45" → "EUR" (default)
|
||||
*/
|
||||
private static String parseBidCurrency(String bid) {
|
||||
if (bid == null || bid.isEmpty()) {
|
||||
return "EUR";
|
||||
}
|
||||
|
||||
if (bid.contains("€")) return "EUR";
|
||||
if (bid.contains("$")) return "USD";
|
||||
if (bid.contains("£")) return "GBP";
|
||||
|
||||
return "EUR"; // Default
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses timestamp from various formats used by the scraper.
|
||||
* Tries multiple formats in order.
|
||||
*/
|
||||
private static LocalDateTime parseTimestamp(String timestamp) {
|
||||
if (timestamp == null || timestamp.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
for (DateTimeFormatter formatter : TIMESTAMP_FORMATS) {
|
||||
try {
|
||||
return LocalDateTime.parse(timestamp, formatter);
|
||||
} catch (DateTimeParseException e) {
|
||||
// Try next format
|
||||
}
|
||||
}
|
||||
|
||||
// Couldn't parse - return null
|
||||
Console.println("⚠️ Could not parse timestamp: " + timestamp);
|
||||
return null;
|
||||
}
|
||||
|
||||
// Helper methods for safe ResultSet access
|
||||
|
||||
private static String getStringOrNull(ResultSet rs, String column) throws SQLException {
|
||||
try {
|
||||
return rs.getString(column);
|
||||
} catch (SQLException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static String getStringOrDefault(ResultSet rs, String column, String defaultValue) throws SQLException {
|
||||
try {
|
||||
String value = rs.getString(column);
|
||||
return value != null ? value : defaultValue;
|
||||
} catch (SQLException e) {
|
||||
return defaultValue;
|
||||
}
|
||||
}
|
||||
|
||||
private static int getIntOrDefault(ResultSet rs, String column, int defaultValue) throws SQLException {
|
||||
try {
|
||||
return rs.getInt(column);
|
||||
} catch (SQLException e) {
|
||||
return defaultValue;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -35,26 +35,26 @@ public class AuctionParsingTest {
|
||||
System.out.println("\n=== Location Pattern Tests ===");
|
||||
|
||||
// Test different location formats
|
||||
String[] testCases = {
|
||||
var testCases = new String[]{
|
||||
"<p>Amsterdam, NL</p>",
|
||||
"<p class=\"flex truncate\"><span class=\"w-full truncate\">Sofia,<!-- --> </span>BG</p>",
|
||||
"<p>Berlin, DE</p>",
|
||||
"<span>Brussels,</span>BE"
|
||||
};
|
||||
|
||||
for (String testHtml : testCases) {
|
||||
Document doc = Jsoup.parse(testHtml);
|
||||
Element elem = doc.select("p, span").first();
|
||||
for (var testHtml : testCases) {
|
||||
var doc = Jsoup.parse(testHtml);
|
||||
var elem = doc.select("p, span").first();
|
||||
|
||||
if (elem != null) {
|
||||
String text = elem.text();
|
||||
var text = elem.text();
|
||||
System.out.println("\nTest: " + testHtml);
|
||||
System.out.println("Text: " + text);
|
||||
|
||||
// Test regex pattern
|
||||
if (text.matches(".*[A-Z]{2}$")) {
|
||||
String countryCode = text.substring(text.length() - 2);
|
||||
String cityPart = text.substring(0, text.length() - 2).trim().replaceAll("[,\\s]+$", "");
|
||||
var countryCode = text.substring(text.length() - 2);
|
||||
var cityPart = text.substring(0, text.length() - 2).trim().replaceAll("[,\\s]+$", "");
|
||||
System.out.println("→ Extracted: " + cityPart + ", " + countryCode);
|
||||
} else {
|
||||
System.out.println("→ No match");
|
||||
@@ -68,39 +68,39 @@ public class AuctionParsingTest {
|
||||
System.out.println("\n=== Full Text Pattern Tests ===");
|
||||
|
||||
// Test the complete auction text format
|
||||
String[] testCases = {
|
||||
var testCases = new String[]{
|
||||
"woensdag om 18:00 1 Vrachtwagens voor bedrijfsvoertuigen Loßburg, DE",
|
||||
"maandag om 14:30 5 Industriële machines Amsterdam, NL",
|
||||
"vrijdag om 10:00 12 Landbouwmachines Antwerpen, BE"
|
||||
};
|
||||
|
||||
for (String testText : testCases) {
|
||||
for (var testText : testCases) {
|
||||
System.out.println("\nParsing: \"" + testText + "\"");
|
||||
|
||||
// Simulated extraction
|
||||
String remaining = testText;
|
||||
var remaining = testText;
|
||||
|
||||
// Extract time
|
||||
java.util.regex.Pattern timePattern = java.util.regex.Pattern.compile("(\\w+)\\s+om\\s+(\\d{1,2}:\\d{2})");
|
||||
java.util.regex.Matcher timeMatcher = timePattern.matcher(remaining);
|
||||
var timePattern = java.util.regex.Pattern.compile("(\\w+)\\s+om\\s+(\\d{1,2}:\\d{2})");
|
||||
var timeMatcher = timePattern.matcher(remaining);
|
||||
if (timeMatcher.find()) {
|
||||
System.out.println(" Time: " + timeMatcher.group(1) + " om " + timeMatcher.group(2));
|
||||
remaining = remaining.substring(timeMatcher.end()).trim();
|
||||
}
|
||||
|
||||
// Extract location
|
||||
java.util.regex.Pattern locPattern = java.util.regex.Pattern.compile(
|
||||
var locPattern = java.util.regex.Pattern.compile(
|
||||
"([A-ZÀ-ÿa-z][A-ZÀ-ÿa-z\\s\\-'öäüßàèéêëïôùûç]+?),\\s*([A-Z]{2})\\s*$"
|
||||
);
|
||||
java.util.regex.Matcher locMatcher = locPattern.matcher(remaining);
|
||||
);
|
||||
var locMatcher = locPattern.matcher(remaining);
|
||||
if (locMatcher.find()) {
|
||||
System.out.println(" Location: " + locMatcher.group(1) + ", " + locMatcher.group(2));
|
||||
remaining = remaining.substring(0, locMatcher.start()).trim();
|
||||
}
|
||||
|
||||
// Extract lot count
|
||||
java.util.regex.Pattern lotPattern = java.util.regex.Pattern.compile("^(\\d+)\\s+");
|
||||
java.util.regex.Matcher lotMatcher = lotPattern.matcher(remaining);
|
||||
var lotPattern = java.util.regex.Pattern.compile("^(\\d+)\\s+");
|
||||
var lotMatcher = lotPattern.matcher(remaining);
|
||||
if (lotMatcher.find()) {
|
||||
System.out.println(" Lot count: " + lotMatcher.group(1));
|
||||
remaining = remaining.substring(lotMatcher.end()).trim();
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -29,7 +29,7 @@ public class TroostwijkScraperTest {
|
||||
// Load native OpenCV library before any tests run
|
||||
try {
|
||||
System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
|
||||
System.out.println("✓ OpenCV native library loaded successfully");
|
||||
IO.println("✓ OpenCV native library loaded successfully");
|
||||
} catch (UnsatisfiedLinkError e) {
|
||||
System.err.println("⚠️ Warning: Could not load OpenCV native library");
|
||||
System.err.println(" Tests will run without object detection support");
|
||||
@@ -61,25 +61,10 @@ public class TroostwijkScraperTest {
|
||||
}
|
||||
|
||||
// Clean up test database
|
||||
File dbFile = new File(testDatabasePath);
|
||||
var dbFile = new File(testDatabasePath);
|
||||
if (dbFile.exists()) {
|
||||
dbFile.delete();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testDatabaseSchema() throws SQLException {
|
||||
// Verify that the database schema was created correctly
|
||||
List<Lot> lots = scraper.db.getAllLots();
|
||||
assertNotNull(lots, "Should be able to query lots table");
|
||||
|
||||
int imageCount = scraper.db.getImageCount();
|
||||
assertTrue(imageCount >= 0, "Image count should be non-negative");
|
||||
|
||||
List<Lot> activeLots = scraper.db.getActiveLots();
|
||||
assertNotNull(activeLots, "Should be able to query active lots");
|
||||
|
||||
System.out.println("✓ Database schema is valid and queryable");
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user