51 lines
1.0 KiB
Docker
51 lines
1.0 KiB
Docker
# Use Python 3.10+ base image
|
|
FROM python:3.11-slim
|
|
|
|
# Install system dependencies required for Playwright
|
|
RUN apt-get update && apt-get install -y \
|
|
wget \
|
|
gnupg \
|
|
ca-certificates \
|
|
fonts-liberation \
|
|
libnss3 \
|
|
libnspr4 \
|
|
libatk1.0-0 \
|
|
libatk-bridge2.0-0 \
|
|
libcups2 \
|
|
libdrm2 \
|
|
libxkbcommon0 \
|
|
libxcomposite1 \
|
|
libxdamage1 \
|
|
libxfixes3 \
|
|
libxrandr2 \
|
|
libgbm1 \
|
|
libasound2 \
|
|
libpango-1.0-0 \
|
|
libcairo2 \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Set working directory
|
|
WORKDIR /app
|
|
|
|
# Copy requirements first for better caching
|
|
COPY requirements.txt .
|
|
|
|
# Install Python dependencies
|
|
RUN pip install --no-cache-dir -r requirements.txt
|
|
|
|
# Install Playwright browsers
|
|
RUN playwright install chromium
|
|
RUN playwright install-deps chromium
|
|
|
|
# Copy the rest of the application
|
|
COPY . .
|
|
|
|
# Create output directory
|
|
RUN mkdir -p output
|
|
|
|
# Set Python path to include both project root and src directory
|
|
ENV PYTHONPATH=/app:/app/src
|
|
|
|
# Run the scraper
|
|
CMD ["python", "src/main.py"]
|