diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 373bc9d..a71b502 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -24,7 +24,8 @@ All three development phases are finished. The system is fully operational end-t | Agent Mode | Complete | `src/pharmagraphrag/agent/` (LangGraph ReAct + multi-agent supervisor) | | Observability | Complete | `src/pharmagraphrag/observability.py` (Langfuse tracing) | | Docker Compose | Complete | `docker-compose.yml` + `docker/` | -| CI/CD | Complete | `.github/workflows/ci.yml` + `deploy.yml` | +| Kubernetes / Helm | Complete | `k8s/` (raw manifests) + `helm/pharmagraphrag/` (chart) | +| CI/CD | Complete | `.github/workflows/ci.yml` + `deploy.yml` + `deploy-gke.yml` | | Evaluation | Complete | `src/pharmagraphrag/evaluation/` (RAGAS metrics, agent eval, curated testset) | | Tests | 263 passing | `tests/` | | Cloud Deployment | Live | Streamlit Cloud + Cloud Run + Neo4j Aura | @@ -102,6 +103,7 @@ FDA FAERS (CSV) + DailyMed (API) - **API**: FastAPI >= 0.115 with Pydantic v2 - **UI**: Streamlit 1.54+ with streamlit-agraph, pyvis, plotly - **Containers**: Docker Compose (Neo4j + API + UI + optional Ollama) +- **Kubernetes**: Helm 3 chart (`helm/pharmagraphrag/`) + raw manifests (`k8s/`). HPA on CPU/memory, startup probes tuned for ~50s embedding-model cold start, LoadBalancer for UI, ClusterIP for API, optional GKE Ingress + managed cert. Designed for on-demand GKE Autopilot (destroy cluster after demos). - **CI/CD**: GitHub Actions (ci.yml: lint+test on push; deploy.yml: CD on v* tags via Cloud Build) - **Evaluation**: RAGAS 0.4.3 (Faithfulness, Relevancy, Precision, Recall, Correctness) + custom agent tool accuracy - **Testing**: pytest (261 tests passing) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index aaa7056..0707e53 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -63,6 +63,7 @@ jobs: runs-on: ubuntu-latest outputs: docker: ${{ steps.filter.outputs.docker }} + helm: ${{ steps.filter.outputs.helm }} steps: - uses: actions/checkout@v4 - uses: dorny/paths-filter@v3 @@ -75,6 +76,48 @@ jobs: - 'uv.lock' - 'requirements.txt' - '.github/workflows/ci.yml' + helm: + - 'helm/**' + - 'k8s/**' + - '.github/workflows/ci.yml' + + # --------------------------------------------------------------- + # Job 3 β€” Validate Helm chart and K8s manifests + # --------------------------------------------------------------- + helm-validate: + runs-on: ubuntu-latest + needs: changes + if: needs.changes.outputs.helm == 'true' + steps: + - uses: actions/checkout@v4 + + - name: Set up Helm + uses: azure/setup-helm@v4 + with: + version: v3.16.2 + + - name: Helm lint + run: helm lint helm/pharmagraphrag + + - name: Helm template (default values) + run: helm template demo helm/pharmagraphrag --namespace pharmagraphrag > /tmp/rendered.yaml + + - name: Install kubeconform + run: | + curl -L -o kubeconform.tar.gz https://github.com/yannh/kubeconform/releases/latest/download/kubeconform-linux-amd64.tar.gz + tar xf kubeconform.tar.gz + sudo mv kubeconform /usr/local/bin/ + + - name: Validate rendered manifests against K8s schema + run: kubeconform -strict -ignore-missing-schemas -schema-location default /tmp/rendered.yaml + + - name: Validate raw k8s/ manifests + run: | + # Skip the example secret file + for f in k8s/*.yaml; do + [[ "$f" == *secret.example.yaml ]] && continue + kubeconform -strict -ignore-missing-schemas -schema-location default "$f" + done # --------------------------------------------------------------- # Job 2 β€” Build Docker images (only when Docker-relevant files change) diff --git a/.github/workflows/deploy-gke.yml b/.github/workflows/deploy-gke.yml new file mode 100644 index 0000000..62e67be --- /dev/null +++ b/.github/workflows/deploy-gke.yml @@ -0,0 +1,146 @@ +# ============================================================ +# GitHub Actions: Deploy PharmaGraphRAG to GKE +# ============================================================ +# Triggered by tags matching v*-k8s (e.g. v1.5.0-k8s). +# Reuses CI workflow first, then builds API+UI images and +# deploys via Helm to a GKE Autopilot cluster. +# +# Required GitHub secrets: +# - GCP_SA_KEY: Service account JSON key (roles: container.developer, +# artifactregistry.writer, storage.admin) +# - GKE_CLUSTER_NAME: e.g. pharmagraphrag-autopilot +# - GKE_CLUSTER_LOCATION: e.g. us-central1 +# - GKE_PROJECT_ID: e.g. pharmagraphrag +# - PGRAG_NEO4J_URI +# - PGRAG_NEO4J_PASSWORD +# - PGRAG_GEMINI_API_KEY +# ============================================================ +name: Deploy to GKE + +on: + push: + tags: + - "v*-k8s" + workflow_dispatch: + inputs: + tag: + description: "Image tag to deploy (e.g. v1.5.0-k8s)" + required: true + default: "latest-k8s" + +env: + REGISTRY: gcr.io + PROJECT_ID: ${{ secrets.GKE_PROJECT_ID }} + API_IMAGE: pharmagraphrag-api + UI_IMAGE: pharmagraphrag-ui + CLUSTER_NAME: ${{ secrets.GKE_CLUSTER_NAME }} + CLUSTER_LOCATION: ${{ secrets.GKE_CLUSTER_LOCATION }} + NAMESPACE: pharmagraphrag + +jobs: + ci: + uses: ./.github/workflows/ci.yml + + build-and-deploy: + needs: ci + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Resolve image tag + id: tag + run: | + if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then + echo "value=${{ github.event.inputs.tag }}" >> "$GITHUB_OUTPUT" + else + echo "value=${GITHUB_REF_NAME}" >> "$GITHUB_OUTPUT" + fi + + - name: Authenticate to Google Cloud + uses: google-github-actions/auth@v2 + with: + credentials_json: ${{ secrets.GCP_SA_KEY }} + + - name: Set up gcloud CLI + uses: google-github-actions/setup-gcloud@v2 + with: + project_id: ${{ env.PROJECT_ID }} + + - name: Configure Docker for GCR + run: gcloud auth configure-docker gcr.io --quiet + + - name: Download ChromaDB snapshot from GCS + run: | + mkdir -p data/chroma + gcloud storage cp -r gs://pharmagraphrag-data/chroma/chroma/* data/chroma/ + + - name: Build API image + run: | + cp docker/Dockerfile.cloudrun.dockerignore .dockerignore + docker build -f docker/Dockerfile.cloudrun \ + -t ${{ env.REGISTRY }}/${{ env.PROJECT_ID }}/${{ env.API_IMAGE }}:${{ steps.tag.outputs.value }} \ + -t ${{ env.REGISTRY }}/${{ env.PROJECT_ID }}/${{ env.API_IMAGE }}:latest-k8s \ + . + + - name: Build UI image + run: | + docker build -f docker/Dockerfile.ui \ + -t ${{ env.REGISTRY }}/${{ env.PROJECT_ID }}/${{ env.UI_IMAGE }}:${{ steps.tag.outputs.value }} \ + -t ${{ env.REGISTRY }}/${{ env.PROJECT_ID }}/${{ env.UI_IMAGE }}:latest-k8s \ + . + + - name: Push images + run: | + docker push ${{ env.REGISTRY }}/${{ env.PROJECT_ID }}/${{ env.API_IMAGE }}:${{ steps.tag.outputs.value }} + docker push ${{ env.REGISTRY }}/${{ env.PROJECT_ID }}/${{ env.API_IMAGE }}:latest-k8s + docker push ${{ env.REGISTRY }}/${{ env.PROJECT_ID }}/${{ env.UI_IMAGE }}:${{ steps.tag.outputs.value }} + docker push ${{ env.REGISTRY }}/${{ env.PROJECT_ID }}/${{ env.UI_IMAGE }}:latest-k8s + + - name: Install gke-gcloud-auth-plugin + run: gcloud components install gke-gcloud-auth-plugin --quiet + + - name: Get GKE credentials + run: | + gcloud container clusters get-credentials ${{ env.CLUSTER_NAME }} \ + --region ${{ env.CLUSTER_LOCATION }} \ + --project ${{ env.PROJECT_ID }} + + - name: Create namespace if missing + run: | + kubectl get namespace ${{ env.NAMESPACE }} 2>/dev/null \ + || kubectl create namespace ${{ env.NAMESPACE }} + + - name: Apply secrets + run: | + kubectl -n ${{ env.NAMESPACE }} create secret generic pharmagraphrag-secrets \ + --from-literal=GEMINI_API_KEY='${{ secrets.PGRAG_GEMINI_API_KEY }}' \ + --from-literal=NEO4J_URI='${{ secrets.PGRAG_NEO4J_URI }}' \ + --from-literal=NEO4J_PASSWORD='${{ secrets.PGRAG_NEO4J_PASSWORD }}' \ + --dry-run=client -o yaml | kubectl apply -f - + + - name: Set up Helm + uses: azure/setup-helm@v4 + with: + version: v3.14.0 + + - name: Deploy via Helm + run: | + helm upgrade --install pharmagraphrag helm/pharmagraphrag \ + --namespace ${{ env.NAMESPACE }} \ + --set image.registry=${{ env.REGISTRY }}/${{ env.PROJECT_ID }} \ + --set image.tag=${{ steps.tag.outputs.value }} \ + --set secrets.create=false \ + --set secrets.existingSecret=pharmagraphrag-secrets \ + --wait --timeout 10m + + - name: Show deployment status + run: | + kubectl -n ${{ env.NAMESPACE }} get pods,svc,hpa + echo "---" + echo "UI external IP (may take 1-2 min):" + kubectl -n ${{ env.NAMESPACE }} get svc pharmagraphrag-ui -o wide diff --git a/README.md b/README.md index 7db9de1..dc306a7 100644 --- a/README.md +++ b/README.md @@ -360,6 +360,17 @@ The project is **deployed and live** on a distributed cloud architecture: | API + Vector Store | [Google Cloud Run](https://cloud.google.com/run) | [pharmagraphrag-api-...run.app](https://pharmagraphrag-api-893694384146.us-central1.run.app/health) | | Knowledge Graph | [Neo4j Aura](https://neo4j.com/cloud/aura-free/) | Managed instance (11.9K nodes, 381K rels) | +### Deployment Options + +PharmaGraphRAG ships with two cloud deployment paths that you can pick from: + +| Path | Best for | Cost (idle) | Files | +| --- | --- | --- | --- | +| **Cloud Run + Streamlit Cloud** (default) | Low-traffic demos, scale-to-zero | $0 | [`cloudbuild.yaml`](cloudbuild.yaml), [`docker/Dockerfile.cloudrun`](docker/Dockerfile.cloudrun) | +| **GKE Autopilot + Helm** | Production-grade orchestration, HPA, rolling updates | ~$1-2/h while running | [`k8s/`](k8s/), [`helm/pharmagraphrag/`](helm/pharmagraphrag/), [`.github/workflows/deploy-gke.yml`](.github/workflows/deploy-gke.yml) | + +The Kubernetes path was added to the portfolio to demonstrate production patterns: parameterized Helm chart, HorizontalPodAutoscalers, ConfigMap/Secret separation, probes tuned for the embedding-model cold start, and an automated GitHub Actions workflow that builds images, pushes to GCR and rolls out via `helm upgrade`. The cluster is **provisioned on-demand**: manifests live permanently in the repo, and the cluster is destroyed after each demo to avoid GKE costs. See [`k8s/README.md`](k8s/README.md) for the deploy/destroy commands. +
πŸ“‹ Reproducing the deployment diff --git a/helm/pharmagraphrag/.helmignore b/helm/pharmagraphrag/.helmignore new file mode 100644 index 0000000..86bdce1 --- /dev/null +++ b/helm/pharmagraphrag/.helmignore @@ -0,0 +1,17 @@ +# Patterns to ignore when building Helm packages. +.DS_Store +.git/ +.gitignore +.bzr/ +.hg/ +.svn/ +*.swp +*.tmp +*.bak +*.orig +*~ +.project +.idea/ +*.tmproj +.vscode/ +README.md diff --git a/helm/pharmagraphrag/Chart.yaml b/helm/pharmagraphrag/Chart.yaml new file mode 100644 index 0000000..aa2acc0 --- /dev/null +++ b/helm/pharmagraphrag/Chart.yaml @@ -0,0 +1,20 @@ +apiVersion: v2 +name: pharmagraphrag +description: GraphRAG system for drug interactions & adverse events (FDA data + Neo4j + ChromaDB + Gemini/Ollama) +type: application +version: 0.1.0 +appVersion: "0.1.0" +keywords: + - graphrag + - rag + - neo4j + - chromadb + - llm + - pharmacovigilance +home: https://github.com/jmponcebe/PharmaGraphRAG +sources: + - https://github.com/jmponcebe/PharmaGraphRAG +maintainers: + - name: Jose MarΓ­a Ponce BernabΓ© + email: jmponcebe@gmail.com +icon: https://raw.githubusercontent.com/jmponcebe/PharmaGraphRAG/main/assets/screenshots/social-preview.png diff --git a/helm/pharmagraphrag/README.md b/helm/pharmagraphrag/README.md new file mode 100644 index 0000000..6c1b26e --- /dev/null +++ b/helm/pharmagraphrag/README.md @@ -0,0 +1,63 @@ +# PharmaGraphRAG Helm chart + +A Helm 3 chart that packages PharmaGraphRAG (API + UI) for Kubernetes. + +## TL;DR + +```bash +# Install / upgrade (uses an existing Secret by default) +helm upgrade --install pharmagraphrag ./helm/pharmagraphrag \ + --namespace pharmagraphrag --create-namespace \ + --set secrets.create=true \ + --set secrets.values.GEMINI_API_KEY="$GEMINI_API_KEY" \ + --set secrets.values.NEO4J_URI="$NEO4J_URI" \ + --set secrets.values.NEO4J_PASSWORD="$NEO4J_PASSWORD" + +# Uninstall (frees up GKE costs) +helm uninstall pharmagraphrag -n pharmagraphrag +``` + +## What it deploys + +- `Deployment` + `Service` + `HPA` for the FastAPI **API**. +- `Deployment` + `Service` (LoadBalancer by default) + `HPA` for the Streamlit **UI**. +- `ConfigMap` with non-secret config (LLM model, Neo4j user, Chroma path, inter-service URL). +- `Secret` with credentials (created by the chart in dev, or referenced from an existing one in prod). +- Optional `Ingress` + GKE `ManagedCertificate` for custom-domain HTTPS. +- Production-grade probes: startup probe sized for ~50s embedding-model cold start, plus liveness + readiness. + +## Key values + +| Key | Default | Notes | +|---|---|---| +| `image.registry` | `gcr.io/pharmagraphrag` | Change to your registry | +| `image.tag` | `latest` | Falls back to `Chart.appVersion` if empty | +| `api.replicaCount` / `ui.replicaCount` | `1` | Ignored when `autoscaling.enabled` | +| `api.autoscaling.{min,max}Replicas` | `1` / `3` | CPU 70%, memory 80% targets | +| `ui.autoscaling.{min,max}Replicas` | `1` / `2` | CPU 75% target | +| `ui.service.type` | `LoadBalancer` | Use `ClusterIP` if exposing via Ingress | +| `secrets.create` | `false` | `true` to let the chart create the Secret (dev only) | +| `secrets.existingSecret` | `pharmagraphrag-secrets` | Name when `create=false` | +| `ingress.enabled` | `false` | Set `true` + provide `ingress.host` to use GKE Ingress | + +## Render without installing + +```bash +helm template demo ./helm/pharmagraphrag --namespace pharmagraphrag +``` + +## Lint + +```bash +helm lint ./helm/pharmagraphrag +``` + +## Why a chart and not just raw manifests? + +Both are checked in: + +- [`k8s/`](../../k8s) β€” raw manifests, useful for understanding what gets created. +- [`helm/pharmagraphrag/`](.) β€” Helm chart, recommended for real deploys: parameterization, + upgrade/rollback, environment-specific values files, NOTES output, secret indirection. + +CI/CD ([`deploy-gke.yml`](../../.github/workflows/deploy-gke.yml)) uses the Helm chart. diff --git a/helm/pharmagraphrag/templates/NOTES.txt b/helm/pharmagraphrag/templates/NOTES.txt new file mode 100644 index 0000000..acd0216 --- /dev/null +++ b/helm/pharmagraphrag/templates/NOTES.txt @@ -0,0 +1,24 @@ +PharmaGraphRAG release "{{ .Release.Name }}" deployed to namespace "{{ .Release.Namespace }}". + +To check status: + kubectl -n {{ .Release.Namespace }} get pods,svc,hpa,ingress + +{{- if eq .Values.ui.service.type "LoadBalancer" }} + +The UI is exposed as a LoadBalancer Service. Wait ~1-2 min for the external IP to be assigned, then: + kubectl -n {{ .Release.Namespace }} get svc {{ .Release.Name }}-ui + +Once the EXTERNAL-IP shows a value (not ), open: http:// +{{- end }} + +{{- if .Values.ingress.enabled }} + +Ingress is enabled at host: {{ .Values.ingress.host }} +Point a DNS A record to the ingress IP, then access: https://{{ .Values.ingress.host }} +{{- end }} + +API is reachable inside the cluster at: + http://{{ .Release.Name }}-api.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.api.service.port }} + +To uninstall (and free up GKE costs): + helm uninstall {{ .Release.Name }} -n {{ .Release.Namespace }} diff --git a/helm/pharmagraphrag/templates/_helpers.tpl b/helm/pharmagraphrag/templates/_helpers.tpl new file mode 100644 index 0000000..e2ae4c8 --- /dev/null +++ b/helm/pharmagraphrag/templates/_helpers.tpl @@ -0,0 +1,42 @@ +{{/* +Common labels applied to every resource. +*/}} +{{- define "pharmagraphrag.labels" -}} +app.kubernetes.io/name: {{ .Chart.Name }} +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +helm.sh/chart: {{ printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" }} +{{- end }} + +{{/* +Selector labels for a given component. +Usage: {{ include "pharmagraphrag.selectorLabels" (dict "Chart" .Chart "Release" .Release "component" "api") }} +*/}} +{{- define "pharmagraphrag.selectorLabels" -}} +app.kubernetes.io/name: {{ .Chart.Name }} +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/component: {{ .component }} +{{- end }} + +{{/* +Component image string (registry/repository:tag). +Falls back to Chart.appVersion if no tag is set. +*/}} +{{- define "pharmagraphrag.image" -}} +{{- $componentTag := .componentImage.tag | default "" -}} +{{- $globalTag := .globalImage.tag | default "" -}} +{{- $tag := $componentTag | default $globalTag | default .chartAppVersion -}} +{{ .globalImage.registry }}/{{ .componentImage.repository }}:{{ $tag }} +{{- end }} + +{{/* +Resolve the secret name (existing or generated). +*/}} +{{- define "pharmagraphrag.secretName" -}} +{{- if .Values.secrets.create -}} +{{ .Release.Name }}-secrets +{{- else -}} +{{ .Values.secrets.existingSecret }} +{{- end -}} +{{- end }} diff --git a/helm/pharmagraphrag/templates/api-deployment.yaml b/helm/pharmagraphrag/templates/api-deployment.yaml new file mode 100644 index 0000000..6aafd0e --- /dev/null +++ b/helm/pharmagraphrag/templates/api-deployment.yaml @@ -0,0 +1,69 @@ +{{- if .Values.api.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ .Release.Name }}-api + labels: + {{- include "pharmagraphrag.labels" . | nindent 4 }} + app.kubernetes.io/component: api +spec: + {{- if not .Values.api.autoscaling.enabled }} + replicas: {{ .Values.api.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "pharmagraphrag.selectorLabels" (dict "Chart" .Chart "Release" .Release "component" "api") | nindent 6 }} + template: + metadata: + labels: + {{- include "pharmagraphrag.selectorLabels" (dict "Chart" .Chart "Release" .Release "component" "api") | nindent 8 }} + spec: + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: api + image: {{ include "pharmagraphrag.image" (dict "globalImage" .Values.image "componentImage" .Values.api.image "chartAppVersion" .Chart.AppVersion) }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + ports: + - name: http + containerPort: 8000 + envFrom: + - configMapRef: + name: {{ .Release.Name }}-config + - secretRef: + name: {{ include "pharmagraphrag.secretName" . }} + env: + - name: PORT + value: "8000" + resources: + {{- toYaml .Values.api.resources | nindent 12 }} + startupProbe: + httpGet: + path: /health + port: http + {{- toYaml .Values.api.probes.startup | nindent 12 }} + readinessProbe: + httpGet: + path: /health + port: http + {{- toYaml .Values.api.probes.readiness | nindent 12 }} + livenessProbe: + httpGet: + path: /health + port: http + {{- toYaml .Values.api.probes.liveness | nindent 12 }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/helm/pharmagraphrag/templates/api-hpa.yaml b/helm/pharmagraphrag/templates/api-hpa.yaml new file mode 100644 index 0000000..e0dab43 --- /dev/null +++ b/helm/pharmagraphrag/templates/api-hpa.yaml @@ -0,0 +1,31 @@ +{{- if and .Values.api.enabled .Values.api.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ .Release.Name }}-api + labels: + {{- include "pharmagraphrag.labels" . | nindent 4 }} + app.kubernetes.io/component: api +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ .Release.Name }}-api + minReplicas: {{ .Values.api.autoscaling.minReplicas }} + maxReplicas: {{ .Values.api.autoscaling.maxReplicas }} + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.api.autoscaling.targetCPUUtilizationPercentage }} + {{- if .Values.api.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.api.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/helm/pharmagraphrag/templates/api-service.yaml b/helm/pharmagraphrag/templates/api-service.yaml new file mode 100644 index 0000000..d1f6e7a --- /dev/null +++ b/helm/pharmagraphrag/templates/api-service.yaml @@ -0,0 +1,18 @@ +{{- if .Values.api.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ .Release.Name }}-api + labels: + {{- include "pharmagraphrag.labels" . | nindent 4 }} + app.kubernetes.io/component: api +spec: + type: {{ .Values.api.service.type }} + ports: + - port: {{ .Values.api.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "pharmagraphrag.selectorLabels" (dict "Chart" .Chart "Release" .Release "component" "api") | nindent 4 }} +{{- end }} diff --git a/helm/pharmagraphrag/templates/configmap.yaml b/helm/pharmagraphrag/templates/configmap.yaml new file mode 100644 index 0000000..f47f640 --- /dev/null +++ b/helm/pharmagraphrag/templates/configmap.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Release.Name }}-config + labels: + {{- include "pharmagraphrag.labels" . | nindent 4 }} +data: + {{- range $key, $value := .Values.config }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- if not (hasKey .Values.config "API_URL") }} + # Inter-service DNS for the UI to reach the API (auto-injected when not overridden) + API_URL: "http://{{ .Release.Name }}-api:{{ .Values.api.service.port }}" + {{- end }} diff --git a/helm/pharmagraphrag/templates/ingress.yaml b/helm/pharmagraphrag/templates/ingress.yaml new file mode 100644 index 0000000..6830909 --- /dev/null +++ b/helm/pharmagraphrag/templates/ingress.yaml @@ -0,0 +1,48 @@ +{{- if .Values.ingress.enabled }} +{{- $host := required "ingress.host is required when ingress.enabled=true" .Values.ingress.host -}} +{{- if .Values.ingress.managedCertificate }} +apiVersion: networking.gke.io/v1 +kind: ManagedCertificate +metadata: + name: {{ .Release.Name }}-cert + labels: + {{- include "pharmagraphrag.labels" . | nindent 4 }} +spec: + domains: + - {{ $host | quote }} +--- +{{- end }} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ .Release.Name }} + labels: + {{- include "pharmagraphrag.labels" . | nindent 4 }} + annotations: + kubernetes.io/ingress.class: {{ .Values.ingress.className | quote }} + {{- if .Values.ingress.managedCertificate }} + networking.gke.io/managed-certificates: {{ .Release.Name }}-cert + {{- end }} + {{- if .Values.ingress.staticIPName }} + kubernetes.io/ingress.global-static-ip-name: {{ .Values.ingress.staticIPName | quote }} + {{- end }} +spec: + rules: + - host: {{ $host | quote }} + http: + paths: + - path: /api + pathType: Prefix + backend: + service: + name: {{ .Release.Name }}-api + port: + number: {{ .Values.api.service.port }} + - path: / + pathType: Prefix + backend: + service: + name: {{ .Release.Name }}-ui + port: + number: {{ .Values.ui.service.port }} +{{- end }} diff --git a/helm/pharmagraphrag/templates/secret.yaml b/helm/pharmagraphrag/templates/secret.yaml new file mode 100644 index 0000000..94df98a --- /dev/null +++ b/helm/pharmagraphrag/templates/secret.yaml @@ -0,0 +1,13 @@ +{{- if .Values.secrets.create }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ .Release.Name }}-secrets + labels: + {{- include "pharmagraphrag.labels" . | nindent 4 }} +type: Opaque +stringData: + {{- range $key, $value := .Values.secrets.values }} + {{ $key }}: {{ $value | quote }} + {{- end }} +{{- end }} diff --git a/helm/pharmagraphrag/templates/ui-deployment.yaml b/helm/pharmagraphrag/templates/ui-deployment.yaml new file mode 100644 index 0000000..81e8446 --- /dev/null +++ b/helm/pharmagraphrag/templates/ui-deployment.yaml @@ -0,0 +1,47 @@ +{{- if .Values.ui.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ .Release.Name }}-ui + labels: + {{- include "pharmagraphrag.labels" . | nindent 4 }} + app.kubernetes.io/component: ui +spec: + {{- if not .Values.ui.autoscaling.enabled }} + replicas: {{ .Values.ui.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "pharmagraphrag.selectorLabels" (dict "Chart" .Chart "Release" .Release "component" "ui") | nindent 6 }} + template: + metadata: + labels: + {{- include "pharmagraphrag.selectorLabels" (dict "Chart" .Chart "Release" .Release "component" "ui") | nindent 8 }} + spec: + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: ui + image: {{ include "pharmagraphrag.image" (dict "globalImage" .Values.image "componentImage" .Values.ui.image "chartAppVersion" .Chart.AppVersion) }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + ports: + - name: http + containerPort: {{ .Values.ui.service.targetPort }} + envFrom: + - configMapRef: + name: {{ .Release.Name }}-config + resources: + {{- toYaml .Values.ui.resources | nindent 12 }} + readinessProbe: + httpGet: + path: /_stcore/health + port: http + {{- toYaml .Values.ui.probes.readiness | nindent 12 }} + livenessProbe: + httpGet: + path: /_stcore/health + port: http + {{- toYaml .Values.ui.probes.liveness | nindent 12 }} +{{- end }} diff --git a/helm/pharmagraphrag/templates/ui-hpa.yaml b/helm/pharmagraphrag/templates/ui-hpa.yaml new file mode 100644 index 0000000..a8e21a1 --- /dev/null +++ b/helm/pharmagraphrag/templates/ui-hpa.yaml @@ -0,0 +1,23 @@ +{{- if and .Values.ui.enabled .Values.ui.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ .Release.Name }}-ui + labels: + {{- include "pharmagraphrag.labels" . | nindent 4 }} + app.kubernetes.io/component: ui +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ .Release.Name }}-ui + minReplicas: {{ .Values.ui.autoscaling.minReplicas }} + maxReplicas: {{ .Values.ui.autoscaling.maxReplicas }} + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.ui.autoscaling.targetCPUUtilizationPercentage }} +{{- end }} diff --git a/helm/pharmagraphrag/templates/ui-service.yaml b/helm/pharmagraphrag/templates/ui-service.yaml new file mode 100644 index 0000000..41bff96 --- /dev/null +++ b/helm/pharmagraphrag/templates/ui-service.yaml @@ -0,0 +1,22 @@ +{{- if .Values.ui.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ .Release.Name }}-ui + labels: + {{- include "pharmagraphrag.labels" . | nindent 4 }} + app.kubernetes.io/component: ui + {{- if eq .Values.ui.service.type "LoadBalancer" }} + annotations: + cloud.google.com/load-balancer-type: "External" + {{- end }} +spec: + type: {{ .Values.ui.service.type }} + ports: + - port: {{ .Values.ui.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "pharmagraphrag.selectorLabels" (dict "Chart" .Chart "Release" .Release "component" "ui") | nindent 4 }} +{{- end }} diff --git a/helm/pharmagraphrag/values.yaml b/helm/pharmagraphrag/values.yaml new file mode 100644 index 0000000..e1c7602 --- /dev/null +++ b/helm/pharmagraphrag/values.yaml @@ -0,0 +1,127 @@ +# Default values for pharmagraphrag. +# This is a YAML-formatted file. + +# -- Global image registry shared by API + UI (override per component below if needed) +image: + registry: gcr.io/pharmagraphrag + pullPolicy: IfNotPresent + tag: latest + +# -- API component (FastAPI + ChromaDB embedded) +api: + enabled: true + image: + repository: pharmagraphrag-api + tag: "" # falls back to .Values.image.tag and Chart.appVersion + replicaCount: 1 + service: + type: ClusterIP + port: 8000 + resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: 1000m + memory: 2Gi + autoscaling: + enabled: true + minReplicas: 1 + maxReplicas: 3 + targetCPUUtilizationPercentage: 70 + targetMemoryUtilizationPercentage: 80 + probes: + startup: + initialDelaySeconds: 10 + periodSeconds: 5 + failureThreshold: 24 + readiness: + initialDelaySeconds: 30 + periodSeconds: 10 + liveness: + initialDelaySeconds: 60 + periodSeconds: 30 + +# -- UI component (Streamlit) +ui: + enabled: true + image: + repository: pharmagraphrag-ui + tag: "" + replicaCount: 1 + service: + # LoadBalancer = direct external IP (recommended for demos without a domain) + # ClusterIP = internal-only (use Ingress for external access) + type: LoadBalancer + port: 80 + targetPort: 8501 + resources: + requests: + cpu: 250m + memory: 512Mi + limits: + cpu: 500m + memory: 1Gi + autoscaling: + enabled: true + minReplicas: 1 + maxReplicas: 2 + targetCPUUtilizationPercentage: 75 + probes: + readiness: + initialDelaySeconds: 15 + periodSeconds: 10 + liveness: + initialDelaySeconds: 30 + periodSeconds: 30 + +# -- Non-secret application configuration (mounted as env vars via ConfigMap). +# Keys must match attributes in src/pharmagraphrag/config.py:Settings. +config: + LLM_PROVIDER: "gemini" + NEO4J_USER: "neo4j" + CHROMA_PERSIST_DIR: "/app/data/chroma" + LANGFUSE_ENABLED: "false" + LANGFUSE_BASE_URL: "https://cloud.langfuse.com" + +# -- Secret values (NEVER commit real values here; override via --set or external secret manager) +secrets: + # Set to true to let Helm create the Secret (dev/demo only) + create: false + # If create=false, an existing Secret with this name must be provided out-of-band + existingSecret: pharmagraphrag-secrets + # Used only when create=true + values: + GEMINI_API_KEY: "" + NEO4J_URI: "" + NEO4J_PASSWORD: "" + LANGFUSE_PUBLIC_KEY: "" + LANGFUSE_SECRET_KEY: "" + +# -- Optional Ingress (GKE managed cert) +ingress: + enabled: false + className: "gce" + host: "" # e.g. pharmagraphrag.example.com + managedCertificate: false + staticIPName: "" # e.g. pharmagraphrag-ip (reserved beforehand with gcloud) + +# -- Pod security context (non-root) +podSecurityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + +# -- Container security context +securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false # ChromaDB needs writes to /tmp internally + +# -- Node selection +nodeSelector: {} +tolerations: [] +affinity: {} diff --git a/k8s/README.md b/k8s/README.md new file mode 100644 index 0000000..12ab703 --- /dev/null +++ b/k8s/README.md @@ -0,0 +1,96 @@ +# Kubernetes manifests + +Production-grade Kubernetes manifests for PharmaGraphRAG. Two ways to deploy: + +1. **Raw manifests** (this folder) β€” apply directly with `kubectl apply -f k8s/`. +2. **Helm chart** ([../helm/pharmagraphrag/](../helm/pharmagraphrag/)) β€” recommended for real environments. + +## What's deployed + +| Resource | Purpose | +|---|---| +| `namespace.yaml` | Isolates all resources in `pharmagraphrag` namespace | +| `configmap.yaml` | Non-secret config (LLM model, Neo4j user, Chroma path, internal API URL) | +| `secret.example.yaml` | Template only β€” real Secret created via `kubectl create secret` | +| `api-deployment.yaml` | FastAPI Deployment (1 replica baseline, ChromaDB baked-in image) | +| `api-service.yaml` | ClusterIP exposing API on port 8000 | +| `api-hpa.yaml` | HPA: scale 1β†’3 on CPU 70% / memory 80% | +| `ui-deployment.yaml` | Streamlit Deployment | +| `ui-service.yaml` | LoadBalancer Service (external IP) for UI | +| `ui-hpa.yaml` | HPA: scale 1β†’2 on CPU 75% | +| `ingress.yaml` | Optional: GKE Ingress + managed cert (requires domain) | +| `kustomization.yaml` | Apply everything at once via Kustomize | + +## Quick deploy (GKE Autopilot) + +Assumes `gcloud`, `kubectl` and a GCP project with billing are set up. + +```bash +# 1. Create cluster (~5 min) +gcloud container clusters create-auto pharmagraphrag-autopilot \ + --region=us-central1 --project=pharmagraphrag + +# 2. Get credentials +gcloud container clusters get-credentials pharmagraphrag-autopilot \ + --region=us-central1 --project=pharmagraphrag + +# 3. Create namespace +kubectl apply -f k8s/namespace.yaml + +# 4. Create the real Secret (DO NOT use secret.example.yaml directly) +kubectl create secret generic pharmagraphrag-secrets \ + --namespace=pharmagraphrag \ + --from-literal=GEMINI_API_KEY="$GEMINI_API_KEY" \ + --from-literal=NEO4J_URI="$NEO4J_URI" \ + --from-literal=NEO4J_PASSWORD="$NEO4J_PASSWORD" + +# 5. Apply everything else +kubectl apply -k k8s/ + +# 6. Watch pods come up +kubectl -n pharmagraphrag get pods -w + +# 7. Get UI external IP (~1-2 min for LB provisioning) +kubectl -n pharmagraphrag get svc pharmagraphrag-ui +``` + +## Local validation with kind + +```bash +kind create cluster --name pgrag +kubectl apply -k k8s/ +# Port-forward instead of LoadBalancer: +kubectl -n pharmagraphrag port-forward svc/pharmagraphrag-ui 8501:80 +``` + +## Cost & cleanup + +GKE Autopilot bills per pod CPU/memory plus a small cluster management fee. +For a portfolio demo: + +- Run cluster ~2 hours β†’ ~$1-2 total +- **Always destroy after screenshots** to avoid surprise bills: + +```bash +helm uninstall pharmagraphrag -n pharmagraphrag # if installed via Helm +kubectl delete -k k8s/ # if applied raw +gcloud container clusters delete pharmagraphrag-autopilot \ + --region=us-central1 --project=pharmagraphrag --quiet +``` + +The cluster is **on-demand by design**: manifests live in this repo, you can `helm install` +in 5 minutes whenever you need the live demo (e.g. before an interview screen-share). + +## Why this exists + +PharmaGraphRAG's primary cloud deployment is **Cloud Run** (lower cost for low-traffic +demos, scales to zero). The Kubernetes path was added as part of the +[portfolio upgrade roadmap](../README.md#deployment-options) to demonstrate +production-grade orchestration patterns: + +- Stateless API + UI Deployments with resource requests/limits +- Liveness, readiness and startup probes tuned for the embedding-model cold start +- HorizontalPodAutoscalers on CPU and memory +- ConfigMap + Secret separation +- Helm packaging with parameterized values +- CI/CD via GitHub Actions to GKE diff --git a/k8s/api-deployment.yaml b/k8s/api-deployment.yaml new file mode 100644 index 0000000..c9de744 --- /dev/null +++ b/k8s/api-deployment.yaml @@ -0,0 +1,78 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: pharmagraphrag-api + namespace: pharmagraphrag + labels: + app.kubernetes.io/name: pharmagraphrag + app.kubernetes.io/component: api +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: pharmagraphrag + app.kubernetes.io/component: api + template: + metadata: + labels: + app.kubernetes.io/name: pharmagraphrag + app.kubernetes.io/component: api + spec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + containers: + - name: api + image: gcr.io/pharmagraphrag/pharmagraphrag-api:latest + imagePullPolicy: IfNotPresent + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false + ports: + - name: http + containerPort: 8000 + envFrom: + - configMapRef: + name: pharmagraphrag-config + - secretRef: + name: pharmagraphrag-secrets + env: + - name: PORT + value: "8000" + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 60 + periodSeconds: 30 + timeoutSeconds: 10 + failureThreshold: 3 + startupProbe: + httpGet: + path: /health + port: http + # Embedding model + ChromaDB load can take ~50s on cold start + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 5 + failureThreshold: 24 diff --git a/k8s/api-hpa.yaml b/k8s/api-hpa.yaml new file mode 100644 index 0000000..40bd948 --- /dev/null +++ b/k8s/api-hpa.yaml @@ -0,0 +1,38 @@ +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: pharmagraphrag-api + namespace: pharmagraphrag +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: pharmagraphrag-api + minReplicas: 1 + maxReplicas: 3 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 70 + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: 80 + behavior: + scaleDown: + stabilizationWindowSeconds: 300 + policies: + - type: Percent + value: 50 + periodSeconds: 60 + scaleUp: + stabilizationWindowSeconds: 60 + policies: + - type: Pods + value: 1 + periodSeconds: 60 diff --git a/k8s/api-service.yaml b/k8s/api-service.yaml new file mode 100644 index 0000000..4f89fea --- /dev/null +++ b/k8s/api-service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: pharmagraphrag-api + namespace: pharmagraphrag + labels: + app.kubernetes.io/name: pharmagraphrag + app.kubernetes.io/component: api +spec: + type: ClusterIP + ports: + - port: 8000 + targetPort: http + protocol: TCP + name: http + selector: + app.kubernetes.io/name: pharmagraphrag + app.kubernetes.io/component: api diff --git a/k8s/configmap.yaml b/k8s/configmap.yaml new file mode 100644 index 0000000..3962b89 --- /dev/null +++ b/k8s/configmap.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: pharmagraphrag-config + namespace: pharmagraphrag +data: + # Non-secret application configuration (keys must match Settings in src/pharmagraphrag/config.py) + LLM_PROVIDER: "gemini" + NEO4J_USER: "neo4j" + CHROMA_PERSIST_DIR: "/app/data/chroma" + # UI uses the internal service DNS to reach the API + API_URL: "http://pharmagraphrag-api.pharmagraphrag.svc.cluster.local:8000" + # Langfuse (opt-in) + LANGFUSE_ENABLED: "false" + LANGFUSE_BASE_URL: "https://cloud.langfuse.com" diff --git a/k8s/ingress.yaml b/k8s/ingress.yaml new file mode 100644 index 0000000..9ef6d3f --- /dev/null +++ b/k8s/ingress.yaml @@ -0,0 +1,51 @@ +# Optional: Ingress with GKE managed certificate (HTTPS) +# Requires: +# 1. A domain pointing to the Ingress IP via A record. +# 2. ManagedCertificate resource (GKE-specific). +# +# For quick demos without a domain, the LoadBalancer Service in +# ui-service.yaml already exposes the UI directly (no Ingress needed). +# +# Apply only if you have a domain. +apiVersion: networking.gke.io/v1 +kind: ManagedCertificate +metadata: + name: pharmagraphrag-cert + namespace: pharmagraphrag +spec: + domains: + - REPLACE_WITH_YOUR_DOMAIN.example.com +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: pharmagraphrag + namespace: pharmagraphrag + annotations: + kubernetes.io/ingress.class: "gce" + networking.gke.io/managed-certificates: pharmagraphrag-cert + kubernetes.io/ingress.global-static-ip-name: pharmagraphrag-ip +spec: + defaultBackend: + service: + name: pharmagraphrag-ui + port: + number: 80 + rules: + - host: REPLACE_WITH_YOUR_DOMAIN.example.com + http: + paths: + - path: /api + pathType: Prefix + backend: + service: + name: pharmagraphrag-api + port: + number: 8000 + - path: / + pathType: Prefix + backend: + service: + name: pharmagraphrag-ui + port: + number: 80 diff --git a/k8s/kustomization.yaml b/k8s/kustomization.yaml new file mode 100644 index 0000000..88d3225 --- /dev/null +++ b/k8s/kustomization.yaml @@ -0,0 +1,15 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: pharmagraphrag +resources: + - namespace.yaml + - configmap.yaml + - api-deployment.yaml + - api-service.yaml + - api-hpa.yaml + - ui-deployment.yaml + - ui-service.yaml + - ui-hpa.yaml + # secret.example.yaml is intentionally excluded β€” create the real + # secret out-of-band with `kubectl create secret` (see secret.example.yaml). + # ingress.yaml is opt-in β€” uncomment if you have a domain. diff --git a/k8s/namespace.yaml b/k8s/namespace.yaml new file mode 100644 index 0000000..c0cacf0 --- /dev/null +++ b/k8s/namespace.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: pharmagraphrag + labels: + app.kubernetes.io/name: pharmagraphrag + app.kubernetes.io/managed-by: kubectl diff --git a/k8s/secret.example.yaml b/k8s/secret.example.yaml new file mode 100644 index 0000000..7e0c38d --- /dev/null +++ b/k8s/secret.example.yaml @@ -0,0 +1,24 @@ +# Example Secret manifest. DO NOT commit real values. +# +# Apply with real values via: +# kubectl create secret generic pharmagraphrag-secrets \ +# --namespace=pharmagraphrag \ +# --from-literal=GEMINI_API_KEY=$GEMINI_API_KEY \ +# --from-literal=NEO4J_URI=$NEO4J_URI \ +# --from-literal=NEO4J_PASSWORD=$NEO4J_PASSWORD \ +# --from-literal=LANGFUSE_PUBLIC_KEY=$LANGFUSE_PUBLIC_KEY \ +# --from-literal=LANGFUSE_SECRET_KEY=$LANGFUSE_SECRET_KEY +# +# Or use external-secrets-operator / GCP Secret Manager in production. +apiVersion: v1 +kind: Secret +metadata: + name: pharmagraphrag-secrets + namespace: pharmagraphrag +type: Opaque +stringData: + GEMINI_API_KEY: "REPLACE_ME" + NEO4J_URI: "neo4j+s://REPLACE_ME.databases.neo4j.io" + NEO4J_PASSWORD: "REPLACE_ME" + LANGFUSE_PUBLIC_KEY: "" + LANGFUSE_SECRET_KEY: "" diff --git a/k8s/ui-deployment.yaml b/k8s/ui-deployment.yaml new file mode 100644 index 0000000..07e2730 --- /dev/null +++ b/k8s/ui-deployment.yaml @@ -0,0 +1,64 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: pharmagraphrag-ui + namespace: pharmagraphrag + labels: + app.kubernetes.io/name: pharmagraphrag + app.kubernetes.io/component: ui +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: pharmagraphrag + app.kubernetes.io/component: ui + template: + metadata: + labels: + app.kubernetes.io/name: pharmagraphrag + app.kubernetes.io/component: ui + spec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + containers: + - name: ui + image: gcr.io/pharmagraphrag/pharmagraphrag-ui:latest + imagePullPolicy: IfNotPresent + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false + ports: + - name: http + containerPort: 8501 + envFrom: + - configMapRef: + name: pharmagraphrag-config + resources: + requests: + cpu: "250m" + memory: "512Mi" + limits: + cpu: "500m" + memory: "1Gi" + readinessProbe: + httpGet: + path: /_stcore/health + port: http + initialDelaySeconds: 15 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + livenessProbe: + httpGet: + path: /_stcore/health + port: http + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 10 + failureThreshold: 3 diff --git a/k8s/ui-hpa.yaml b/k8s/ui-hpa.yaml new file mode 100644 index 0000000..cc25119 --- /dev/null +++ b/k8s/ui-hpa.yaml @@ -0,0 +1,19 @@ +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: pharmagraphrag-ui + namespace: pharmagraphrag +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: pharmagraphrag-ui + minReplicas: 1 + maxReplicas: 2 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 75 diff --git a/k8s/ui-service.yaml b/k8s/ui-service.yaml new file mode 100644 index 0000000..b162126 --- /dev/null +++ b/k8s/ui-service.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +kind: Service +metadata: + name: pharmagraphrag-ui + namespace: pharmagraphrag + labels: + app.kubernetes.io/name: pharmagraphrag + app.kubernetes.io/component: ui + annotations: + # GKE: provision a Google Cloud Load Balancer for external access + cloud.google.com/load-balancer-type: "External" +spec: + type: LoadBalancer + ports: + - port: 80 + targetPort: http + protocol: TCP + name: http + selector: + app.kubernetes.io/name: pharmagraphrag + app.kubernetes.io/component: ui