This commit is contained in:
Tour
2025-12-04 14:49:58 +01:00
commit 79e14be37a
22 changed files with 2765 additions and 0 deletions

50
Dockerfile Normal file
View File

@@ -0,0 +1,50 @@
# Use Python 3.10+ base image
FROM python:3.11-slim
# Install system dependencies required for Playwright
RUN apt-get update && apt-get install -y \
wget \
gnupg \
ca-certificates \
fonts-liberation \
libnss3 \
libnspr4 \
libatk1.0-0 \
libatk-bridge2.0-0 \
libcups2 \
libdrm2 \
libxkbcommon0 \
libxcomposite1 \
libxdamage1 \
libxfixes3 \
libxrandr2 \
libgbm1 \
libasound2 \
libpango-1.0-0 \
libcairo2 \
&& rm -rf /var/lib/apt/lists/*
# Set working directory
WORKDIR /app
# Copy requirements first for better caching
COPY requirements.txt .
# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt
# Install Playwright browsers
RUN playwright install chromium
RUN playwright install-deps chromium
# Copy the rest of the application
COPY . .
# Create output directory
RUN mkdir -p output
# Set Python path to include both project root and src directory
ENV PYTHONPATH=/app:/app/src
# Run the scraper
CMD ["python", "src/main.py"]