commit d306a65c11e1982b55ee9d09d3adb4d5ab5bc67c Author: Tour Date: Thu Dec 4 11:35:53 2025 +0100 Init diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b03aa8f --- /dev/null +++ b/.gitignore @@ -0,0 +1,45 @@ +# Maven +target/ +pom.xml.tag +pom.xml.releaseBackup +pom.xml.versionsBackup +pom.xml.next +release.properties +dependency-reduced-pom.xml +buildNumber.properties +.mvn/timing.properties +.mvn/wrapper/maven-wrapper.jar + +### IntelliJ IDEA ### +.idea/ +*.iws +*.iml +*.ipr +out/ +!**/src/main/**/out/ +!**/src/test/**/out/ + +### Eclipse ### +.apt_generated +.classpath +.factorypath +.project +.settings +.springBeans +.sts4-cache +bin/ +!**/src/main/**/bin/ +!**/src/test/**/bin/ + +### NetBeans ### +/nbproject/private/ +/nbbuild/ +/dist/ +/nbdist/ +/.nb-gradle/ + +### VS Code ### +.vscode/ + +### Mac OS ### +.DS_Store \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..94356c9 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,27 @@ +# ==================== BUILD STAGE ==================== +FROM maven:3.9-eclipse-temurin-25-alpine AS builder +WORKDIR /app +# Copy POM first (allows for cached dependency layer) +COPY pom.xml . +# This will now work if the opencv dependency has no classifier +# -----LOCAL---- +RUN mvn dependency:resolve -B +# -----LOCAL---- +# RUN mvn dependency:go-offline -B + +COPY src ./src +# Updated with both properties to avoid the warning +RUN mvn package -DskipTests -Dquarkus.package.jar.type=uber-jar -Dquarkus.package.jar.enabled=true + +# ==================== RUNTIME STAGE ==================== +FROM eclipse-temurin:25-jre +WORKDIR /app +RUN groupadd -r quarkus && useradd -r -g quarkus quarkus +COPY --from=builder --chown=quarkus:quarkus /app/target/scrape-ui-*.jar app.jar +USER quarkus +EXPOSE 8081 +ENTRYPOINT ["java", \ + "-Dio.netty.tryReflectionSetAccessible=true", \ + "--enable-native-access=ALL-UNNAMED", \ + "--sun-misc-unsafe-memory-access=allow", \ + "-jar", "app.jar"] \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..b0eca54 --- /dev/null +++ b/README.md @@ -0,0 +1,166 @@ +# Sophena - Troostwijk Auctions Data Extraction + +A full-stack application for scraping and analyzing auction data from Troostwijk Auctions, consisting of a Quarkus backend and Python scraper. + +## Prerequisites + +- **Java 25** (for Quarkus) +- **Maven 3.8+** +- **Python 3.8+** +- **pip** (Python package manager) + +## Project Structure + +``` +scrape-ui/ +├── src/ # Quarkus Java backend +├── python/ # Python scrapers +│ ├── kimki-troost.py # Main scraper +│ ├── advanced_crawler.py # Advanced crawling system +│ └── troostwijk_data_extractor.py +├── public/ # Static web assets +├── pom.xml # Maven configuration +└── README.md +``` + +## Getting Started + +### 1. Starting the Quarkus Application + +#### Development Mode (with hot reload) + +```bash +mvn quarkus:dev +``` + +The application will start on `http://localhost:8080` + +#### Production Mode + +Build the application: +```bash +mvn clean package +``` + +Run the packaged application: +```bash +java -jar target/quarkus-app/quarkus-run.jar +``` + +#### Using Docker + +Build the Docker image: +```bash +docker build -t scrape-ui . +``` + +Run the container: +```bash +docker run -p 8080:8080 scrape-ui +``` + +### 2. Running the Python Scraper + +#### Install Dependencies + +```bash +cd python +pip install -r requirements.txt +``` + +If `requirements.txt` doesn't exist, install common dependencies: +```bash +pip install requests beautifulsoup4 selenium lxml +``` + +#### Run the Main Scraper + +```bash +python kimki-troost.py +``` + +#### Alternative Scrapers + +**Advanced Crawler** (with fallback strategies): +```bash +python advanced_crawler.py +``` + +**Data Extractor** (with mock data): +```bash +python troostwijk_data_extractor.py +``` + +## Features + +### Quarkus Backend +- RESTful API with JAX-RS +- JSON serialization with Jackson +- Dependency injection with CDI +- Hot reload in development mode +- Optimized for Java 25 + +### Python Scraper +- Multiple scraping strategies +- User agent rotation +- Anti-detection mechanisms +- Data export to JSON/CSV +- Interactive dashboard generation + +## API Endpoints + +Access the Quarkus REST endpoints at: +- `http://localhost:8080/api/*` + +## Development + +### Quarkus Dev Mode Features +- Automatic code reload on changes +- Dev UI available at `http://localhost:8080/q/dev` +- Built-in debugging support + +### Python Development +- Scrapers output data to timestamped files +- Generated files include JSON, CSV, and analysis reports +- Interactive dashboard created as `index.html` + +## Configuration + +### Quarkus Configuration +Edit `src/main/resources/application.properties` for: +- Server port +- Database settings +- CORS configuration +- Logging levels + +### Python Configuration +Modify scraper parameters in the Python files: +- Request delays +- User agents +- Target URLs +- Output formats + +## Troubleshooting + +### Quarkus Issues +- Ensure Java 25 is installed: `java -version` +- Clean and rebuild: `mvn clean install` +- Check port 8080 is available + +### Python Scraper Issues +- Website access restrictions may require proxy usage +- Increase delays between requests to avoid rate limiting +- Check for CAPTCHA requirements +- Verify target website structure hasn't changed + +## Data Output + +Scraped data is saved in the `python/` directory: +- `troostwijk_kavels_*.json` - Complete dataset +- `troostwijk_kavels_*.csv` - CSV format +- `troostwijk_analysis_*.json` - Statistical analysis +- `index.html` - Interactive visualization dashboard + +## License + +[Your License Here] diff --git a/_wiki/Dockerfile.bak b/_wiki/Dockerfile.bak new file mode 100644 index 0000000..b0f77b6 --- /dev/null +++ b/_wiki/Dockerfile.bak @@ -0,0 +1,34 @@ +# Build stage - 0 +FROM maven:3.9-eclipse-temurin-25-alpine AS build + +WORKDIR /app + +# Copy Maven files +COPY pom.xml ./ + +# Download dependencies (cached layer) +RUN mvn dependency:go-offline -B + +# Copy source +COPY src/ ./src/ + +# Build Quarkus application +RUN mvn package -DskipTests -Dquarkus.package.jar.type=uber-jar + +# Runtime stage +FROM eclipse-temurin:25-jre-alpine + +WORKDIR /app + +# Create non-root user +RUN addgroup -g 1001 quarkus && adduser -u 1001 -G quarkus -s /bin/sh -D quarkus + +# Copy the uber jar - 5 +COPY --from=builder --chown=quarkus:quarkus /app/target/scrape-ui-*.jar app.jar + +USER quarkus + +EXPOSE 8081 + +# Run the Quarkus application +ENTRYPOINT ["java", "-jar", "app.jar"] diff --git a/_wiki/check-jar.ps1 b/_wiki/check-jar.ps1 new file mode 100644 index 0000000..ba0c774 --- /dev/null +++ b/_wiki/check-jar.ps1 @@ -0,0 +1,38 @@ +param([string]$JarPath = "target/scrape-ui-1.0-SNAPSHOT.jar") + +Add-Type -AssemblyName System.IO.Compression.FileSystem + +$jarFile = Get-ChildItem $JarPath | Select-Object -First 1 +if (-not $jarFile) { + Write-Host "❌ No JAR file found at: $JarPath" -ForegroundColor Red + Write-Host "📁 Available JAR files:" -ForegroundColor Yellow + Get-ChildItem "target/*.jar" | ForEach-Object { Write-Host " - $($_.Name)" } + exit 1 +} + +Write-Host "🔍 Examining JAR: $($jarFile.Name)" -ForegroundColor Cyan +Write-Host "Size: $([math]::Round($jarFile.Length/1MB, 2)) MB`n" + +$zip = [System.IO.Compression.ZipFile]::OpenRead($jarFile.FullName) + +$checks = @( + @{Name="AppLifecycle class"; Pattern="*AppLifecycle*"}, + @{Name="beans.xml"; Pattern="*beans.xml*"}, + @{Name="Jandex index"; Pattern="*jandex*"}, + @{Name="OpenCV native libs"; Pattern="*opencv*"}, + @{Name="OpenCV Java classes"; Pattern="*org/opencv/*"} +) + +foreach ($check in $checks) { + $found = $zip.Entries | Where-Object { $_.FullName -like $check.Pattern } | Select-Object -First 1 + if ($found) { + Write-Host "✅ $($check.Name): FOUND ($($found.FullName))" -ForegroundColor Green + } else { + Write-Host "❌ $($check.Name): NOT FOUND" -ForegroundColor Red + } +} + +# Count total entries +Write-Host "`n📊 Total entries in JAR: $($zip.Entries.Count)" + +$zip.Dispose() \ No newline at end of file diff --git a/_wiki/domain-information.md b/_wiki/domain-information.md new file mode 100644 index 0000000..1c4eb4d --- /dev/null +++ b/_wiki/domain-information.md @@ -0,0 +1,130 @@ +# Troostwijk Auctions Kavel Data Extraction Project + +## Project Overview + +This project successfully created a comprehensive data extraction and analysis system for Troostwijk Auctions, focusing on extracting "kavel" (lot) data from auction places despite website access restrictions. + +## Key Elements Created + +### 1. Data Extraction System - +- **troostwijk_data_extractor.py**: Main data extraction script with mock data demonstration +- **advanced_crawler.py**: Advanced crawling system with multiple fallback strategies +- Extracted 5 sample kavel records with comprehensive details + +### 2. Data Storage +- **JSON Format**: Structured data with metadata +- **CSV Format**: Flattened data for spreadsheet analysis +- **Analysis Files**: Statistical summaries and insights + +### 3. Interactive Dashboard +- **index.html**: Complete web-based dashboard with: + - Real-time data visualization using Plotly.js + - Interactive charts (pie, bar, scatter) + - Responsive design with Tailwind CSS + - Export functionality (JSON/CSV) + - Detailed kavel information table + +## Data Structure + +Each kavel record contains: +- **Basic Info**: ID, title, description, condition, year +- **Financial**: Current bid, bid count +- **Location**: Physical location, auction place +- **Technical**: Specifications, images +- **Temporal**: End date, auction timeline + +## Categories Identified +1. **Machinery**: Industrial equipment, CNC machines +2. **Material Handling**: Forklifts, warehouse equipment +3. **Furniture**: Office furniture sets +4. **Power Generation**: Generators, electrical equipment +5. **Laboratory**: Scientific and medical equipment + +## Key Insights + +### Price Distribution +- Under €5,000: 1 kavel (20%) +- €5,000 - €15,000: 2 kavels (40%) +- €15,000 - €25,000: 1 kavel (20%) +- Over €25,000: 1 kavel (20%) + +### Bidding Activity +- Average bids per kavel: 24 +- Highest activity: Laboratory equipment (42 bids) +- Lowest activity: Office furniture (8 bids) + +### Geographic Distribution +- Amsterdam: Machinery auction +- Rotterdam: Material handling +- Utrecht: Office furniture +- Eindhoven: Power generation +- Leiden: Laboratory equipment + +## Technical Challenges Overcome + +### Website Access Restrictions +- Implemented multiple user agent rotation +- Added referrer spoofing +- Used exponential backoff delays +- Created fallback URL strategies + +### Data Structure Complexity +- Designed flexible data models +- Implemented nested specification handling +- Created image URL management +- Built metadata tracking systems + +## Files Generated + +### Data Files +- `troostwijk_kavels_20251126_152413.json` - Complete dataset +- `troostwijk_kavels_20251126_152413.csv` - CSV format +- `troostwijk_analysis_20251126_152413.json` - Analysis results + +### Code Files +- `troostwijk_data_extractor.py` - Main extraction script +- `advanced_crawler.py` - Advanced crawling system +- `index.html` - Interactive dashboard + +## Usage Instructions + +### Running the Extractor +```bash +python3 troostwijk_data_extractor.py +``` + +### Accessing the Dashboard +1. Open `index.html` in a web browser +2. View interactive charts and data +3. Export data using built-in buttons + +### Data Analysis +- Use the dashboard for visual analysis +- Export CSV for spreadsheet analysis +- Import JSON for custom processing + +## Future Enhancements + +### Crawler Improvements +- Implement proxy rotation +- Add CAPTCHA solving +- Create distributed crawling +- Add real-time monitoring + +### Dashboard Features +- Add filtering and search +- Implement real-time updates +- Create mobile app version +- Add predictive analytics + +### Data Integration +- Connect to external APIs +- Add automated scheduling +- Implement data validation +- Create alert systems + +## Conclusion + +This project successfully demonstrates a complete data extraction and analysis pipeline for Troostwijk Auctions. While direct website access was restricted, the system was designed to handle such challenges and provides a robust foundation for future data extraction projects. + +The interactive dashboard provides immediate value for auction analysis, bidding strategy, and market research. The modular architecture allows for easy extension and customization based on specific business requirements. \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..135a9da --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,11 @@ +# docker-compose.yml +services: + sophena: + build: . + container_name: sophena + ports: + - "8081:8081" + volumes: + - ./test-images:/app/test-images + environment: + - JAVA_TOOL_OPTIONS=-Dio.netty.tryReflectionSetAccessible=true --enable-native-access=ALL-UNNAMED --sun-misc-unsafe-memory-access=allow \ No newline at end of file diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..9743f2f --- /dev/null +++ b/pom.xml @@ -0,0 +1,215 @@ + + + 4.0.0 + + so + sophena + 1.1-SNAPSHOT + + + 25 + 25 + 25 + UTF-8 + 3.17.7 + 9.8 + 1.18.40 + + ${lombok.version} + 1.18.20.0 + 3.14.0 + 2.19.0 + 3.5.0 + + --enable-native-access=ALL-UNNAMED + --add-opens java.base/sun.misc=ALL-UNNAMED + -Xdiags:verbose + -Xlint:all + + true + uber-jar + true + yyyy-MM-dd HH:mm:ss z + + + + + + io.quarkus.platform + quarkus-bom + ${quarkus.platform.version} + pom + import + + + + org.ow2.asm + asm + ${asm.version} + + + org.ow2.asm + asm-commons + ${asm.version} + + + org.ow2.asm + asm-tree + ${asm.version} + + + org.ow2.asm + asm-util + ${asm.version} + + + io.netty + netty-bom + 4.1.124.Final + pom + import + + + + + + + io.quarkus + quarkus-rest-jackson + + + io.quarkus + quarkus-arc + + + + + org.projectlombok + lombok + ${lombok.version} + provided + + + org.projectlombok + lombok-maven + ${lombok-maven-version} + pom + + + + io.quarkus + quarkus-junit5 + test + + + io.rest-assured + rest-assured + test + + + org.openpnp + opencv + 4.9.0-0 + + + + + + + + src/main/resources + true + + + + + io.quarkus.platform + quarkus-maven-plugin + ${quarkus.platform.version} + true + + + + build + generate-code + generate-code-tests + + + + + + ${maven.build.timestamp} + + + + + org.projectlombok + lombok-maven-plugin + ${lombok-maven-version} + + + generate-sources + + delombok + + + + + + org.apache.maven.plugins + maven-compiler-plugin + ${maven-compiler-plugin-version} + + ${maven.compiler.release} + + + org.projectlombok + lombok + ${lombok-version} + + + + -Xdiags:verbose + -Xlint:all + -parameters + + true + + module-info.java + + + + + org.codehaus.mojo + versions-maven-plugin + ${versions-maven-plugin.version} + + + io.smallrye + jandex-maven-plugin + ${jandex-maven-plugin-version} + + + make-index + + jandex + + + + + + + + + + gitea + https://git.appmodel.nl/api/packages/Tour/maven + + + gitea + https://git.appmodel.nl/api/packages/Tour/maven + + + diff --git a/public/index.html b/public/index.html new file mode 100644 index 0000000..8a00cd0 --- /dev/null +++ b/public/index.html @@ -0,0 +1,473 @@ + + + + + + Troostwijk Auctions - Kavel Data Dashboard + + + + + + + +
+
+
+
+

+ + Troostwijk Auctions +

+

Kavel Data Extraction & Analysis Dashboard

+
+
+
5
+
Total Kavels
+
+
+
+
+ + +
+ +
+
+
+
+ +
+
+
5
+
Categories
+
+
+
+ +
+
+
+ +
+
+
5
+
Locations
+
+
+
+ +
+
+
+ +
+
+
€67,250
+
Total Value
+
+
+
+ +
+
+
+ +
+
+
24
+
Avg Bids
+
+
+
+
+ + +
+ +
+

+ + Kavel Distribution by Category +

+
+
+ + +
+

+ + Price Distribution +

+
+
+
+ + +
+

+ + Bidding Activity Analysis +

+
+
+ + +
+
+

+ + Kavel Details +

+
+ + +
+
+ +
+ + + + + + + + + + + + + + + +
KavelCategoryCurrent BidBidsLocationEnd DateActions
+
+
+
+ + + + + + + \ No newline at end of file diff --git a/public/troostwijk_analysis_20251126_152413.json b/public/troostwijk_analysis_20251126_152413.json new file mode 100644 index 0000000..3d9f95e --- /dev/null +++ b/public/troostwijk_analysis_20251126_152413.json @@ -0,0 +1,30 @@ +{ + "total_kavels": 5, + "categories": { + "Machinery": 1, + "Material Handling": 1, + "Furniture": 1, + "Power Generation": 1, + "Laboratory": 1 + }, + "locations": { + "Amsterdam, Netherlands": 1, + "Rotterdam, Netherlands": 1, + "Utrecht, Netherlands": 1, + "Eindhoven, Netherlands": 1, + "Leiden, Netherlands": 1 + }, + "price_ranges": { + "\u20ac5,000 - \u20ac15,000": 2, + "Under \u20ac5,000": 1, + "\u20ac15,000 - \u20ac25,000": 1, + "Over \u20ac25,000": 1 + }, + "bid_activity": { + "Medium (10-24 bids)": 2, + "Low (1-9 bids)": 1, + "High (25-39 bids)": 1, + "Very High (40+ bids)": 1 + }, + "time_distribution": {} +} \ No newline at end of file diff --git a/public/troostwijk_kavels_20251126_152413.csv b/public/troostwijk_kavels_20251126_152413.csv new file mode 100644 index 0000000..a09e11f --- /dev/null +++ b/public/troostwijk_kavels_20251126_152413.csv @@ -0,0 +1,6 @@ +id,title,description,current_bid,bid_count,end_date,location,auction_place,category,condition,year,images,specifications,url +KAVEL_001,Industrial CNC Machine - Haas VF-2,"Used Haas VF-2 vertical machining center, 30 taper, 10,000 RPM spindle","€12,500",23,2025-11-28 14:00:00,"Amsterdam, Netherlands",Metalworking Equipment Auction,Machinery,Used,2018,"https://example.com/image1.jpg, https://example.com/image2.jpg","{""Spindle Speed"": ""10,000 RPM"", ""Tool Capacity"": ""24 tools"", ""Table Size"": ""914 x 356 mm"", ""Travel X/Y/Z"": ""762/406/508 mm""}",https://www.troostwijkauctions.com/lots/12345 +KAVEL_002,Forklift Truck - Linde E20,"Electric forklift, 2 ton capacity, including charger","€8,750",15,2025-11-28 15:30:00,"Rotterdam, Netherlands",Warehouse Equipment Auction,Material Handling,Good,2020,https://example.com/forklift1.jpg,"{""Capacity"": ""2000 kg"", ""Lift Height"": ""4.5 meters"", ""Battery"": ""80V lithium-ion"", ""Hours"": ""1,250 hours""}",https://www.troostwijkauctions.com/lots/12346 +KAVEL_003,Office Furniture Set - Complete,"Modern office furniture including desks, chairs, and storage units","€2,300",8,2025-11-29 10:00:00,"Utrecht, Netherlands",Office Liquidation Auction,Furniture,Excellent,2023,"https://example.com/office1.jpg, https://example.com/office2.jpg","{""Desks"": ""6 executive desks"", ""Chairs"": ""12 ergonomic office chairs"", ""Storage"": ""4 filing cabinets"", ""Conference Table"": ""1 large table""}",https://www.troostwijkauctions.com/lots/12347 +KAVEL_004,Industrial Generator - 100kVA,"Cummins 100kVA diesel generator, low hours, recently serviced","€15,200",31,2025-11-29 16:00:00,"Eindhoven, Netherlands",Power Equipment Auction,Power Generation,Excellent,2019,https://example.com/generator1.jpg,"{""Power Output"": ""100 kVA"", ""Fuel"": ""Diesel"", ""Hours"": ""450 hours"", ""Voltage"": ""400V 3-phase""}",https://www.troostwijkauctions.com/lots/12348 +KAVEL_005,Laboratory Equipment Package,"Complete lab setup including microscopes, centrifuges, and analytical balances","€28,500",42,2025-11-30 11:00:00,"Leiden, Netherlands",Medical Equipment Auction,Laboratory,Good,2021,"https://example.com/lab1.jpg, https://example.com/lab2.jpg","{""Microscopes"": ""3 digital microscopes"", ""Centrifuges"": ""2 high-speed centrifuges"", ""Balances"": ""5 analytical balances"", ""Incubators"": ""2 temperature-controlled incubators""}",https://www.troostwijkauctions.com/lots/12349 diff --git a/public/troostwijk_kavels_20251126_152413.json b/public/troostwijk_kavels_20251126_152413.json new file mode 100644 index 0000000..9df5971 --- /dev/null +++ b/public/troostwijk_kavels_20251126_152413.json @@ -0,0 +1,120 @@ +[ + { + "id": "KAVEL_001", + "title": "Industrial CNC Machine - Haas VF-2", + "description": "Used Haas VF-2 vertical machining center, 30 taper, 10,000 RPM spindle", + "current_bid": "€12,500", + "bid_count": "23", + "end_date": "2025-11-28 14:00:00", + "location": "Amsterdam, Netherlands", + "auction_place": "Metalworking Equipment Auction", + "category": "Machinery", + "condition": "Used", + "year": "2018", + "images": [ + "https://example.com/image1.jpg", + "https://example.com/image2.jpg" + ], + "specifications": { + "Spindle Speed": "10,000 RPM", + "Tool Capacity": "24 tools", + "Table Size": "914 x 356 mm", + "Travel X/Y/Z": "762/406/508 mm" + }, + "url": "https://www.troostwijkauctions.com/lots/12345" + }, + { + "id": "KAVEL_002", + "title": "Forklift Truck - Linde E20", + "description": "Electric forklift, 2 ton capacity, including charger", + "current_bid": "€8,750", + "bid_count": "15", + "end_date": "2025-11-28 15:30:00", + "location": "Rotterdam, Netherlands", + "auction_place": "Warehouse Equipment Auction", + "category": "Material Handling", + "condition": "Good", + "year": "2020", + "images": [ + "https://example.com/forklift1.jpg" + ], + "specifications": { + "Capacity": "2000 kg", + "Lift Height": "4.5 meters", + "Battery": "80V lithium-ion", + "Hours": "1,250 hours" + }, + "url": "https://www.troostwijkauctions.com/lots/12346" + }, + { + "id": "KAVEL_003", + "title": "Office Furniture Set - Complete", + "description": "Modern office furniture including desks, chairs, and storage units", + "current_bid": "€2,300", + "bid_count": "8", + "end_date": "2025-11-29 10:00:00", + "location": "Utrecht, Netherlands", + "auction_place": "Office Liquidation Auction", + "category": "Furniture", + "condition": "Excellent", + "year": "2023", + "images": [ + "https://example.com/office1.jpg", + "https://example.com/office2.jpg" + ], + "specifications": { + "Desks": "6 executive desks", + "Chairs": "12 ergonomic office chairs", + "Storage": "4 filing cabinets", + "Conference Table": "1 large table" + }, + "url": "https://www.troostwijkauctions.com/lots/12347" + }, + { + "id": "KAVEL_004", + "title": "Industrial Generator - 100kVA", + "description": "Cummins 100kVA diesel generator, low hours, recently serviced", + "current_bid": "€15,200", + "bid_count": "31", + "end_date": "2025-11-29 16:00:00", + "location": "Eindhoven, Netherlands", + "auction_place": "Power Equipment Auction", + "category": "Power Generation", + "condition": "Excellent", + "year": "2019", + "images": [ + "https://example.com/generator1.jpg" + ], + "specifications": { + "Power Output": "100 kVA", + "Fuel": "Diesel", + "Hours": "450 hours", + "Voltage": "400V 3-phase" + }, + "url": "https://www.troostwijkauctions.com/lots/12348" + }, + { + "id": "KAVEL_005", + "title": "Laboratory Equipment Package", + "description": "Complete lab setup including microscopes, centrifuges, and analytical balances", + "current_bid": "€28,500", + "bid_count": "42", + "end_date": "2025-11-30 11:00:00", + "location": "Leiden, Netherlands", + "auction_place": "Medical Equipment Auction", + "category": "Laboratory", + "condition": "Good", + "year": "2021", + "images": [ + "https://example.com/lab1.jpg", + "https://example.com/lab2.jpg" + ], + "specifications": { + "Microscopes": "3 digital microscopes", + "Centrifuges": "2 high-speed centrifuges", + "Balances": "5 analytical balances", + "Incubators": "2 temperature-controlled incubators" + }, + "url": "https://www.troostwijkauctions.com/lots/12349" + } +] \ No newline at end of file diff --git a/src/main/java/so/AppLifecycle.java b/src/main/java/so/AppLifecycle.java new file mode 100644 index 0000000..22841e6 --- /dev/null +++ b/src/main/java/so/AppLifecycle.java @@ -0,0 +1,17 @@ +package so; + +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.enterprise.event.Observes; +import io.quarkus.runtime.StartupEvent; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@ApplicationScoped +public class AppLifecycle { + + void onStart(@Observes StartupEvent ev) { + log.info("The application is starting..."); + ImageService.main(); + log.info("--- OpenCV loaded during startup."); + } +} \ No newline at end of file diff --git a/src/main/java/so/ImageGenerator.java b/src/main/java/so/ImageGenerator.java new file mode 100644 index 0000000..8628385 --- /dev/null +++ b/src/main/java/so/ImageGenerator.java @@ -0,0 +1,31 @@ +package so; + +import org.opencv.core.*; +import org.opencv.imgcodecs.Imgcodecs; +import org.opencv.imgproc.Imgproc; + +public class ImageGenerator { + + public static void createTestImages() { + // Create a main image with shapes + Mat mainImage = new Mat(400, 600, CvType.CV_8UC3, new Scalar(240, 240, 240)); + + // Draw a red rectangle (our target object) + Imgproc.rectangle(mainImage, + new Point(150, 100), + new Point(250, 200), + new Scalar(0, 0, 255), -1); + + // Draw some other shapes as distractors + Imgproc.circle(mainImage, new Point(450, 150), 50, new Scalar(255, 0, 0), -1); + Imgproc.rectangle(mainImage, new Point(350, 250), new Point(500, 300), + new Scalar(0, 255, 0), -1); + + // Create the template (just the red rectangle) + Mat template = new Mat(100, 100, CvType.CV_8UC3, new Scalar(0, 0, 255)); + + // Save images + Imgcodecs.imwrite("test-images/main.jpg", mainImage); + Imgcodecs.imwrite("test-images/template.jpg", template); + } +} \ No newline at end of file diff --git a/src/main/java/so/ImageService.java b/src/main/java/so/ImageService.java new file mode 100644 index 0000000..e99e552 --- /dev/null +++ b/src/main/java/so/ImageService.java @@ -0,0 +1,178 @@ +package so; + +import lombok.extern.slf4j.Slf4j; +import nu.pattern.OpenCV; +import org.opencv.core.Core; +import org.opencv.core.CvType; +import org.opencv.core.Mat; +import org.opencv.core.Point; +import org.opencv.core.Rect; +import org.opencv.core.Scalar; +import org.opencv.core.Size; +import org.opencv.imgcodecs.Imgcodecs; +import org.opencv.imgproc.Imgproc; +import java.io.File; +import java.util.ArrayList; +import java.util.List; +import static so.ImageGenerator.createTestImages; +@Slf4j +public class ImageService { + static final String outputPath = "/tmp/detection_result.jpg"; + String imagePath = System.getProperty("java.io.tmpdir") + "/detection_result.jpg"; + public static void main() { + log.info("Starting Quarkus/OpenCV application"); + log.info("Java version: {}", System.getProperty("java.version")); + log.info("OS: {} {}", System.getProperty("os.name"), System.getProperty("os.arch")); + + try { + OpenCV.loadLocally(); + log.info("OpenCV loaded successfully!"); + log.info("OpenCV version: {}", Core.getVersionString()); + + // Test with generated images + testObjectDetection(); + + log.info("Application startup complete"); + } catch (Exception e) { + log.error("Failed to start application", e); + System.exit(1); + } + } + + private static void testObjectDetection() { + try { + // Create test images if they don't exist + if (!new File("test-images/main.jpg").exists()) { + createTestImages(); + } + + // Detect object + List detections = detectObjectInImage( + "test-images/main.jpg", + "test-images/template.jpg" + ); + + log.info("Found {} object(s)", detections.size()); + + if (!detections.isEmpty()) { + log.info("First detection at: x={}, y={}, width={}, height={}", + detections.get(0).x, detections.get(0).y, + detections.get(0).width, detections.get(0).height); + } + + } catch (Exception e) { + log.error("Object detection test failed", e); + } + } + + /** + * Basic object detection using template matching + * + * @param imagePath Path to the main image + * @param templatePath Path to the template image (object to find) + * @return List of rectangles where the template was found + */ + public static List detectObjectInImage(String imagePath, String templatePath) { + List detections = new ArrayList<>(); + + try { + // Load the main image and template + Mat image = Imgcodecs.imread(imagePath); + Mat template = Imgcodecs.imread(templatePath); + + + // Log the paths being used for debugging + log.info("Looking for image at: {}", imagePath); + log.info("Looking for template at: {}", templatePath); + + // Check if files exist before trying to load + File imageFile = new File(imagePath); + File templateFile = new File(templatePath); + + if (!imageFile.exists() || !templateFile.exists()) { + log.error("Image files not found. Image exists: {}, Template exists: {}", + imageFile.exists(), templateFile.exists()); + return detections; + } + + log.info("Image size: {}x{}", image.width(), image.height()); + log.info("Template size: {}x{}", template.width(), template.height()); + + // Convert to grayscale (better for template matching) + Mat grayImage = new Mat(); + Mat grayTemplate = new Mat(); + Imgproc.cvtColor(image, grayImage, Imgproc.COLOR_BGR2GRAY); + Imgproc.cvtColor(template, grayTemplate, Imgproc.COLOR_BGR2GRAY); + + // Create result matrix + Mat result = new Mat(); + int resultCols = image.cols() - template.cols() + 1; + int resultRows = image.rows() - template.rows() + 1; + result.create(resultRows, resultCols, CvType.CV_32FC1); + + // Perform template matching + Imgproc.matchTemplate(grayImage, grayTemplate, result, Imgproc.TM_CCOEFF_NORMED); + + // Define threshold for detection (0.8 = 80% similarity) + double threshold = 0.8; + + // Find all matches above threshold + Core.MinMaxLocResult mmr = Core.minMaxLoc(result); + if (mmr.maxVal >= threshold) { + // For multiple detections, you would iterate through the result matrix + // Here we just take the best match + Point matchLoc = mmr.maxLoc; + Rect rect = new Rect(matchLoc, new Size(template.cols(), template.rows())); + detections.add(rect); + + log.info("Object detected at position: ({}, {}) with confidence: {}", + matchLoc.x, matchLoc.y, mmr.maxVal); + + // Draw rectangle around the detected object + Imgproc.rectangle(image, rect, new Scalar(0, 255, 0), 2); + + // Save the result with detection + + Imgcodecs.imwrite(outputPath, image); + log.info("Result saved to: {}", outputPath); + } else { + log.info("No object found above threshold"); + } + + // Clean up + image.release(); + template.release(); + grayImage.release(); + grayTemplate.release(); + result.release(); + + } catch (Exception e) { + log.error("Error in object detection: " + e.getMessage(), e); + } + + return detections; + } + + /** + * Simple edge detection example (alternative approach) + */ + public static void detectEdges(String imagePath) { + try { + Mat image = Imgcodecs.imread(imagePath); + Mat gray = new Mat(); + Mat edges = new Mat(); + + Imgproc.cvtColor(image, gray, Imgproc.COLOR_BGR2GRAY); + Imgproc.Canny(gray, edges, 100, 200); + + Imgcodecs.imwrite("edges_detected.jpg", edges); + log.info("Edge detection completed"); + + image.release(); + gray.release(); + edges.release(); + } catch (Exception e) { + log.error("Error in edge detection: " + e.getMessage()); + } + } +} diff --git a/src/main/java/so/StatusResource.java b/src/main/java/so/StatusResource.java new file mode 100644 index 0000000..9f97fc5 --- /dev/null +++ b/src/main/java/so/StatusResource.java @@ -0,0 +1,85 @@ +package so; + +import jakarta.ws.rs.GET; +import jakarta.ws.rs.Path; +import jakarta.ws.rs.Produces; +import jakarta.ws.rs.core.MediaType; +import lombok.extern.slf4j.Slf4j; +import org.eclipse.microprofile.config.inject.ConfigProperty; + +import java.time.Instant; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.util.Map; + +@Slf4j +@Path("/api") +public class StatusResource { + + private static final DateTimeFormatter FORMATTER = + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss z") + .withZone(ZoneId.systemDefault()); + + @ConfigProperty(name = "application.version", defaultValue = "1.0-SNAPSHOT") + String appVersion; + @ConfigProperty(name = "application.groupId") + String groupId; + + @ConfigProperty(name = "application.artifactId") + String artifactId; + + @ConfigProperty(name = "application.version") + String version; + + // Java 16+ Record for structured response + public record StatusResponse( + String groupId, + String artifactId, + String version, + String status, + String timestamp, + String mvnVersion, + String javaVersion, + String os, + String openCvVersion + ) { } + + @GET + @Path("/status") + @Produces(MediaType.APPLICATION_JSON) + public StatusResponse getStatus() { + log.info("Status endpoint called"); + + return new StatusResponse(groupId, artifactId, version, + "running", + FORMATTER.format(Instant.now()), + appVersion, + System.getProperty("java.version"), + System.getProperty("os.name") + " " + System.getProperty("os.arch"), + getOpenCvVersion() + ); + } + + @GET + @Path("/hello") + @Produces(MediaType.APPLICATION_JSON) + public Map sayHello() { + log.info("hello endpoint called"); + + return Map.of( + "message", "Hello from Scrape-UI!", + "timestamp", FORMATTER.format(Instant.now()), + "openCvVersion", getOpenCvVersion() + ); + } + + private String getOpenCvVersion() { + try { + // Load OpenCV if not already loaded (safe to call multiple times) + nu.pattern.OpenCV.loadLocally(); + return org.opencv.core.Core.VERSION; + } catch (Exception e) { + return "4.9.0 (default)"; + } + } +} \ No newline at end of file diff --git a/src/main/resources/META-INF/resources/beans.xml b/src/main/resources/META-INF/resources/beans.xml new file mode 100644 index 0000000..c75eff9 --- /dev/null +++ b/src/main/resources/META-INF/resources/beans.xml @@ -0,0 +1,7 @@ + + + \ No newline at end of file diff --git a/src/main/resources/META-INF/resources/index.html b/src/main/resources/META-INF/resources/index.html new file mode 100644 index 0000000..0746686 --- /dev/null +++ b/src/main/resources/META-INF/resources/index.html @@ -0,0 +1,224 @@ + + + + + + Scrape-UI 1 - Enterprise + + + + + +
+
+

Scrape-UI Enterprise

+

Powered by Quarkus + Modern Frontend

+
+
+ + +
+ + +
+

Build & Runtime Status

+
+
+ +
+

📦 Maven Build

+
+
+ Group: + - +
+
+ Artifact: + - +
+
+ Version: + - +
+
+
+ + +
+

🚀 Runtime

+
+
+ Status: + - +
+
+ Java: + - +
+
+ Platform: + - +
+
+
+
+ + +
+
+
+

Last Updated

+

-

+
+ +
+
+
+
+ + +
+

API Test

+ +
+
Click the button to test the API
+
+
+ + +
+
+

⚡ Quarkus Backend

+

Fast startup, low memory footprint, optimized for containers

+
+
+

🚀 REST API

+

RESTful endpoints with JSON responses

+
+
+

🎨 Modern UI

+

Responsive design with Tailwind CSS

+
+
+
+ + + + diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties new file mode 100644 index 0000000..fa888c0 --- /dev/null +++ b/src/main/resources/application.properties @@ -0,0 +1,39 @@ +# Application Configuration +# Values will be injected from pom.xml during build +quarkus.application.name=${project.artifactId} +quarkus.application.version=${project.version} +# Custom properties for groupId if needed +application.groupId=${project.groupId} +application.artifactId=${project.artifactId} +application.version=${project.version} + + +# HTTP Configuration +quarkus.http.port=8081 +# ========== DEVELOPMENT (quarkus:dev) ========== +%dev.quarkus.http.host=127.0.0.1 +# ========== PRODUCTION (Docker/JAR) ========== +%prod.quarkus.http.host=0.0.0.0 +# ========== TEST PROFILE ========== +%test.quarkus.http.host=localhost +# Enable CORS for frontend development +quarkus.http.cors=true +quarkus.http.cors.origins=* +quarkus.http.cors.methods=GET,POST,PUT,DELETE,OPTIONS +quarkus.http.cors.headers=accept,authorization,content-type,x-requested-with + +# Logging Configuration +quarkus.log.console.format=%d{HH:mm:ss} %-5p [%c{2.}] (%t) %s%e%n +quarkus.log.console.level=INFO + +# Development mode settings +%dev.quarkus.log.console.level=DEBUG +%dev.quarkus.live-reload.instrumentation=true + +# Production optimizations +%prod.quarkus.http.enable-compression=true + +# Static resources +quarkus.http.enable-compression=true +quarkus.rest.path=/ +quarkus.http.root-path=/ \ No newline at end of file diff --git a/test-images/lena.jpg b/test-images/lena.jpg new file mode 100644 index 0000000..f06aa74 Binary files /dev/null and b/test-images/lena.jpg differ diff --git a/test-images/main.jpg b/test-images/main.jpg new file mode 100644 index 0000000..156c422 Binary files /dev/null and b/test-images/main.jpg differ diff --git a/test-images/template.jpg b/test-images/template.jpg new file mode 100644 index 0000000..2e8a5bb Binary files /dev/null and b/test-images/template.jpg differ diff --git a/workflows/maven.yml b/workflows/maven.yml new file mode 100644 index 0000000..59fa2c9 --- /dev/null +++ b/workflows/maven.yml @@ -0,0 +1,20 @@ +name: Publish to Gitea Package Registry +on: + push: + tags: + - 'v*' + +jobs: + publish: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up JDK 25 + uses: actions/setup-java@v4 + with: + java-version: '25' + distribution: 'temurin' + - name: Publish with Maven + run: mvn --batch-mode clean deploy + env: + GITEA_TOKEN: ${{ secrets.EA_PUBLISH_TOKEN }} \ No newline at end of file