# Use Python 3.10+ base image FROM python:3.11-slim # Install system dependencies required for Playwright RUN apt-get update && apt-get install -y \ wget \ gnupg \ ca-certificates \ fonts-liberation \ libnss3 \ libnspr4 \ libatk1.0-0 \ libatk-bridge2.0-0 \ libcups2 \ libdrm2 \ libxkbcommon0 \ libxcomposite1 \ libxdamage1 \ libxfixes3 \ libxrandr2 \ libgbm1 \ libasound2 \ libpango-1.0-0 \ libcairo2 \ && rm -rf /var/lib/apt/lists/* # Set working directory WORKDIR /app # Copy requirements first for better caching COPY requirements.txt . # Install Python dependencies RUN pip install --no-cache-dir -r requirements.txt # Install Playwright browsers RUN playwright install chromium RUN playwright install-deps chromium # Copy the rest of the application COPY . . # Create output directory RUN mkdir -p output # Set Python path to include both project root and src directory ENV PYTHONPATH=/app:/app/src # Run the scraper CMD ["python", "src/main.py"]