diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..d0ca5813 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,75 @@ +# Git files +.git +.gitignore +.github + +# Python cache +__pycache__ +*.pyc +*.pyo +*.pyd +.Python +*.so +*.egg +*.egg-info +dist +build +eggs +.eggs +lib +lib64 +parts +sdist +var +wheels +pip-wheel-metadata +*.manifest +*.spec + +# Testing +.coverage +.pytest_cache +.tox +htmlcov +.hypothesis + +# Virtual environments +venv +env +ENV +.venv + +# IDE +.vscode +.idea +*.swp +*.swo +*~ +.DS_Store + +# Documentation +docs +*.md +!README.md + +# CI/CD +.travis.yml +.gitlab-ci.yml +azure-pipelines.yml + +# Docker +Dockerfile* +docker-compose* +.dockerignore + +# Logs +*.log + +# Environment files +.env +.env.* + +# Temporary files +tmp +temp +*.tmp \ No newline at end of file diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml new file mode 100644 index 00000000..086b797c --- /dev/null +++ b/.github/workflows/docker-publish.yml @@ -0,0 +1,93 @@ +name: Build and publish Docker image + +on: + push: + branches: [main] + tags: + - 'v*' + pull_request: + branches: [main] + workflow_dispatch: + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + build-and-push: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + security-events: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to GitHub Container Registry + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=ref,event=branch + type=ref,event=pr + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=sha,prefix={{branch}}- + type=raw,value=latest,enable={{is_default_branch}} + + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + context: . + platforms: linux/amd64,linux/arm64 + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + provenance: true + sbom: true + + - name: Run Trivy vulnerability scanner + if: github.event_name != 'pull_request' + uses: aquasecurity/trivy-action@master + with: + image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.meta.outputs.version }} + format: 'sarif' + output: 'trivy-results.sarif' + severity: 'CRITICAL,HIGH' + + - name: Upload Trivy results to GitHub Security tab + if: github.event_name != 'pull_request' + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: 'trivy-results.sarif' + + - name: Generate SBOM + if: github.event_name != 'pull_request' + uses: anchore/sbom-action@v0 + with: + image: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.meta.outputs.version }} + format: spdx-json + output-file: sbom.spdx.json + + - name: Upload SBOM as artifact + if: github.event_name != 'pull_request' + uses: actions/upload-artifact@v4 + with: + name: sbom + path: sbom.spdx.json \ No newline at end of file diff --git a/.github/workflows/security-scan.yml b/.github/workflows/security-scan.yml new file mode 100644 index 00000000..95f71f1e --- /dev/null +++ b/.github/workflows/security-scan.yml @@ -0,0 +1,39 @@ +name: Security scanning + +on: + schedule: + - cron: '0 2 * * MON' # Weekly scan on Mondays at 2 AM + workflow_dispatch: + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + scan: + runs-on: ubuntu-latest + permissions: + contents: read + packages: read + security-events: write + + steps: + - name: Run Trivy vulnerability scanner on latest image + uses: aquasecurity/trivy-action@master + with: + image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest + format: 'sarif' + output: 'trivy-results.sarif' + severity: 'CRITICAL,HIGH,MEDIUM' + + - name: Upload Trivy results to GitHub Security tab + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: 'trivy-results.sarif' + + - name: Run Grype vulnerability scanner + uses: anchore/scan-action@v3 + with: + image: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest + fail-build: false + severity-cutoff: high \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..45ea9703 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,69 @@ +# Multi-stage build for security and minimal size +FROM python:3.13-slim AS builder + +# Set working directory +WORKDIR /build + +# Install build dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + g++ \ + make \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements first for better caching +COPY setup.py . +COPY policyengine_household_api/__init__.py policyengine_household_api/ +RUN pip install --no-cache-dir --user --upgrade pip setuptools wheel + +# Install dependencies +COPY . . +RUN pip install --no-cache-dir --user -e . + +# Final stage - hardened runtime image +FROM python:3.13-slim + +# Security: Create non-root user +RUN groupadd -r appuser && useradd -r -g appuser -u 1000 appuser + +# Install runtime dependencies only +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* \ + && apt-get clean + +# Set working directory +WORKDIR /app + +# Copy Python packages from builder +COPY --from=builder --chown=appuser:appuser /root/.local /home/appuser/.local + +# Copy application code +COPY --chown=appuser:appuser . /app + +# Update PATH for user-installed packages +ENV PATH=/home/appuser/.local/bin:$PATH +ENV PYTHONPATH=/app:$PYTHONPATH + +# Security: Set environment variables +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PIP_NO_CACHE_DIR=1 \ + PIP_DISABLE_PIP_VERSION_CHECK=1 + +# Security: Drop all capabilities +RUN setcap -r /usr/local/bin/python3.13 2>/dev/null || true + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ + CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8080/')" || exit 1 + +# Switch to non-root user +USER appuser + +# Expose port (informational) +EXPOSE 8080 + +# Run the application with gunicorn for production +ENV PORT=8080 +CMD ["gunicorn", "-b", ":8080", "policyengine_household_api.api", "--timeout", "300", "--workers", "2"] \ No newline at end of file diff --git a/docker-readme.md b/docker-readme.md new file mode 100644 index 00000000..ea0f9dbd --- /dev/null +++ b/docker-readme.md @@ -0,0 +1,61 @@ +# Docker setup for PolicyEngine Household API + +This repository publishes a hardened Docker image to GitHub Container Registry. + +## Features + +### Security hardening +- Multi-stage build to minimize image size +- Non-root user execution (uid 1000) +- Read-only root filesystem compatible +- No unnecessary capabilities +- Health checks included +- Regular vulnerability scanning with Trivy and Grype + +### CI/CD pipeline +The GitHub Actions workflow: +- Builds on push to main and tags +- Multi-platform support (linux/amd64, linux/arm64) +- Automatic versioning from git tags +- SBOM (Software Bill of Materials) generation +- Vulnerability scanning and reporting +- Results uploaded to GitHub Security tab + +## Using the image + +Pull the latest image: +```bash +docker pull ghcr.io/policyengine/policyengine-household-api:latest +``` + +Run the container: +```bash +docker run -p 8080:8080 \ + -e AUTH0_ADDRESS_NO_DOMAIN=your_auth0_address \ + -e AUTH0_AUDIENCE_NO_DOMAIN=your_auth0_audience \ + ghcr.io/policyengine/policyengine-household-api:latest +``` + +## Building locally + +```bash +docker build -t policyengine-household-api . +``` + +## Environment variables + +Required environment variables: +- `AUTH0_ADDRESS_NO_DOMAIN`: Auth0 domain address +- `AUTH0_AUDIENCE_NO_DOMAIN`: Auth0 API audience +- `PORT`: Server port (defaults to 8080) + +Optional for database connectivity: +- `USER_ANALYTICS_DB_USERNAME` +- `USER_ANALYTICS_DB_PASSWORD` +- `USER_ANALYTICS_DB_CONNECTION_NAME` +- `ANTHROPIC_API_KEY` + +## Workflows + +- **docker-publish.yml**: Builds and publishes images on push to main +- **security-scan.yml**: Weekly vulnerability scans of the latest image \ No newline at end of file