-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathDockerfile.cloudrun
More file actions
94 lines (75 loc) · 3.32 KB
/
Dockerfile.cloudrun
File metadata and controls
94 lines (75 loc) · 3.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# ============================================================
# PharmaGraphRAG — Cloud Run Dockerfile
# ============================================================
# Packages FastAPI + ChromaDB data into a single stateless
# container intended for Google Cloud Run.
#
# Requirements before building:
# - data/chroma/ must be populated (run scripts/setup_demo.py
# locally or use the full pipeline first).
#
# Build:
# docker build -f docker/Dockerfile.cloudrun -t pharmagraphrag-api .
#
# Run locally (test):
# docker run -p 8000:8000 \
# -e NEO4J_URI=bolt://<aura-host>:7687 \
# -e NEO4J_USER=neo4j \
# -e NEO4J_PASSWORD=<password> \
# -e GEMINI_API_KEY=<key> \
# pharmagraphrag-api
#
# Deploy to Cloud Run:
# gcloud run deploy pharmagraphrag-api \
# --image gcr.io/<project>/pharmagraphrag-api \
# --platform managed --region us-central1 \
# --allow-unauthenticated \
# --set-env-vars NEO4J_URI=...,NEO4J_USER=...,NEO4J_PASSWORD=...,GEMINI_API_KEY=...
# ============================================================
# ---- Stage 1: build ----
FROM python:3.12-slim AS builder
WORKDIR /app
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
COPY pyproject.toml uv.lock README.md ./
RUN uv sync --frozen --no-dev --no-install-project
COPY src/ src/
RUN uv sync --frozen --no-dev
# Replace PyTorch CUDA with CPU-only version (saves ~10 GB)
RUN uv pip install --reinstall --no-cache \
torch torchvision --index-url https://download.pytorch.org/whl/cpu \
&& uv pip uninstall \
nvidia-cublas-cu12 nvidia-cuda-cupti-cu12 nvidia-cuda-nvrtc-cu12 \
nvidia-cuda-runtime-cu12 nvidia-cudnn-cu12 nvidia-cufft-cu12 \
nvidia-curand-cu12 nvidia-cusolver-cu12 nvidia-cusparse-cu12 \
nvidia-nccl-cu12 nvidia-nvjitlink-cu12 nvidia-nvtx-cu12 \
nvidia-cufile-cu12 nvidia-cusparselt-cu12 nvidia-nvshmem-cu12 \
cuda-bindings cuda-pathfinder \
triton 2>/dev/null ; true \
&& rm -rf /app/.venv/lib/python*/site-packages/nvidia* \
/app/.venv/lib/python*/site-packages/cuda* \
/app/.venv/lib/python*/site-packages/triton* \
/app/.venv/lib/python*/site-packages/_cuda_*.pth \
/app/.venv/lib/python*/site-packages/_cuda_*.py
# Pre-cache the embedding model to avoid HuggingFace downloads on cold start
RUN /app/.venv/bin/python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')"
# ---- Stage 2: runtime ----
FROM python:3.12-slim AS runtime
LABEL maintainer="Jose María Ponce Bernabé <jmponcebe@gmail.com>"
RUN groupadd --gid 1000 appuser \
&& useradd --uid 1000 --gid appuser --create-home appuser
WORKDIR /app
# Installed Python environment
COPY --from=builder /app /app
# HuggingFace model cache (pre-downloaded in builder)
COPY --from=builder /root/.cache/huggingface /home/appuser/.cache/huggingface
# Pre-built ChromaDB embeddings (required)
COPY data/chroma/ /app/data/chroma/
# Ensure data dir and model cache are owned by appuser
RUN chown -R appuser:appuser /app/data /home/appuser/.cache
USER appuser
# Cloud Run injects PORT (default 8080); allow override
ENV PORT=8080
ENV CHROMA_PERSIST_DIR=/app/data/chroma
EXPOSE ${PORT}
# Use shell form so $PORT is expanded at runtime
CMD /app/.venv/bin/uvicorn pharmagraphrag.api.main:app --host 0.0.0.0 --port $PORT