Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 47 additions & 24 deletions .github/workflows/google-cloudrun-docker.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# This workflow builds and pushes Docker containers to Google Artifact Registry
# and deploys both backend and frontend on Cloud Run when a commit is pushed to the "production"
# branch.
# Build and deploy QueryPal to Cloud Run.
# Runs on pushes to the production branch.
#
# Infrastructure changes (VPC connector, Secret Manager, IAM) are managed by
# Terraform in the terraform/ directory and must be applied before first deploy.

name: 'Build and Deploy QueryPal to Cloud Run'

Expand All @@ -16,6 +18,11 @@ env:
BACKEND_SERVICE: 'querypal-backend'
FRONTEND_SERVICE: 'querypal-frontend'
WORKLOAD_IDENTITY_PROVIDER: 'projects/874216619692/locations/global/workloadIdentityPools/github/providers/querypal'
# Short name of the Cloud Run SA and VPC connector created by Terraform.
# The full SA email is constructed inline in flags using ${{ env.PROJECT_ID }}
# because GitHub Actions does not interpolate env vars inside the env: block.
CLOUD_RUN_SA_NAME: 'querypal-cloudrun-sa'
VPC_CONNECTOR: 'querypal-vpc-connector'

jobs:
deploy:
Expand All @@ -29,80 +36,96 @@ jobs:
- name: 'Checkout'
uses: 'actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332' # actions/checkout@v4

# Configure Workload Identity Federation and generate an access token.
- id: 'auth'
name: 'Authenticate to Google Cloud'
uses: 'google-github-actions/auth@f112390a2df9932162083945e46d439060d66ec2' # google-github-actions/auth@v2
with:
workload_identity_provider: '${{ env.WORKLOAD_IDENTITY_PROVIDER }}'
service_account: 'github-actions@gen-lang-client-0698668474.iam.gserviceaccount.com'
service_account: 'github-actions@${{ env.PROJECT_ID }}.iam.gserviceaccount.com'

# Set up Cloud SDK
- name: 'Set up Cloud SDK'
uses: 'google-github-actions/setup-gcloud@98ddc00a17442e89a24bbf282954a3b65ce6d200' # google-github-actions/setup-gcloud@v2

# Configure Docker to use gcloud as a credential helper
- name: 'Configure Docker for GCR'
run: |-
gcloud auth configure-docker --quiet
run: gcloud auth configure-docker --quiet

# ── Backend ──────────────────────────────────────────────────────────────

# Build and Push Backend Container
- name: 'Build and Push Backend Container'
run: |-
cd backend
DOCKER_TAG="gcr.io/${{ env.PROJECT_ID }}/${{ env.BACKEND_SERVICE }}:${{ github.sha }}"
docker build --tag "${DOCKER_TAG}" --platform linux/amd64 .
docker push "${DOCKER_TAG}"

# Deploy Backend to Cloud Run
- id: 'deploy-backend'
name: 'Deploy Backend to Cloud Run'
uses: 'google-github-actions/deploy-cloudrun@33553064113a37d688aa6937bacbdc481580be17' # google-github-actions/deploy-cloudrun@v2
with:
service: '${{ env.BACKEND_SERVICE }}'
region: '${{ env.REGION }}'
image: 'gcr.io/${{ env.PROJECT_ID }}/${{ env.BACKEND_SERVICE }}:${{ github.sha }}'
# Non-secret runtime configuration only.
env_vars: |
ENVIRONMENT=production
AZURE_TENANT_ID=${{ secrets.AZURE_TENANT_ID }}
AZURE_CLIENT_ID=${{ secrets.AZURE_CLIENT_ID }}
AZURE_CLIENT_SECRET=${{ secrets.AZURE_CLIENT_SECRET }}
ARM_SCOPE=https://management.azure.com/.default
GEMINI_API_KEY=${{ secrets.GEMINI_API_KEY }}
DB_USER=${{ secrets.DB_USER }}
DB_PASS=${{ secrets.DB_PASS }}
DB_NAME=querypal
DB_UNIX_SOCKET=/cloudsql/gen-lang-client-0698668474:europe-west1:querypal-db
DB_UNIX_SOCKET=/cloudsql/${{ env.PROJECT_ID }}:${{ env.REGION }}:querypal-db
# Sensitive values are read directly from Secret Manager at runtime.
# Secret must exist before first deploy (created by terraform/secrets.tf).
secrets: |
AZURE_TENANT_ID=querypal-azure-tenant-id:latest
AZURE_CLIENT_ID=querypal-azure-client-id:latest
AZURE_CLIENT_SECRET=querypal-azure-client-secret:latest
GEMINI_API_KEY=querypal-gemini-api-key:latest
DB_USER=querypal-db-user:latest
DB_PASS=querypal-db-pass:latest
flags: |
--port=8000
--add-cloudsql-instances=gen-lang-client-0698668474:europe-west1:querypal-db
--service-account=${{ env.CLOUD_RUN_SA_NAME }}@${{ env.PROJECT_ID }}.iam.gserviceaccount.com
--add-cloudsql-instances=${{ env.PROJECT_ID }}:${{ env.REGION }}:querypal-db
--vpc-connector=${{ env.VPC_CONNECTOR }}
--vpc-egress=private-ranges-only
--ingress=internal
--allow-unauthenticated

# Build and Push Frontend Container
# ── Frontend ─────────────────────────────────────────────────────────────

- name: 'Build and Push Frontend Container'
run: |-
cd frontend
DOCKER_TAG="gcr.io/${{ env.PROJECT_ID }}/${{ env.FRONTEND_SERVICE }}:${{ github.sha }}"
# VITE_API_BASE_URL=/api tells the React app to send all API calls to
# the /api/* path on its own origin instead of a full backend URL.
# Nginx then proxies those requests to the internal backend service.
docker build --tag "${DOCKER_TAG}" --platform linux/amd64 \
--build-arg VITE_API_BASE_URL=${{ steps.deploy-backend.outputs.url }} \
--build-arg VITE_API_BASE_URL=/api \
--build-arg VITE_AZURE_REDIRECT_URI=https://querypal.virtonomy.io \
.
docker push "${DOCKER_TAG}"

# Deploy Frontend to Cloud Run
- id: 'deploy-frontend'
name: 'Deploy Frontend to Cloud Run'
uses: 'google-github-actions/deploy-cloudrun@33553064113a37d688aa6937bacbdc481580be17' # google-github-actions/deploy-cloudrun@v2
with:
service: '${{ env.FRONTEND_SERVICE }}'
region: '${{ env.REGION }}'
image: 'gcr.io/${{ env.PROJECT_ID }}/${{ env.FRONTEND_SERVICE }}:${{ github.sha }}'
# BACKEND_URL is the internal Cloud Run URL; nginx uses it at runtime to
# proxy /api/* requests to the backend (which is not publicly reachable).
env_vars: |
BACKEND_URL=${{ steps.deploy-backend.outputs.url }}
flags: |
--port=4000
--service-account=${{ env.CLOUD_RUN_SA_NAME }}@${{ env.PROJECT_ID }}.iam.gserviceaccount.com
--vpc-connector=${{ env.VPC_CONNECTOR }}
--vpc-egress=all-traffic
--ingress=all
--allow-unauthenticated

# Show output URLs
# ── Summary ───────────────────────────────────────────────────────────────

- name: 'Show deployment URLs'
run: |-
echo "Backend URL: ${{ steps.deploy-backend.outputs.url }}"
echo "Frontend URL: ${{ steps.deploy-frontend.outputs.url }}"
echo "Backend URL: ${{ steps.deploy-backend.outputs.url }} (internal only)"
150 changes: 114 additions & 36 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ Azure Cosmos DB's portal interface can be limiting for real-world data explorati
| **Backend API** | FastAPI (Python 3.12), Uvicorn, Pydantic V2 |
| **Database** | Azure Cosmos DB (MongoDB API), PostgreSQL (User Data) |
| **Cloud Platform** | Google Cloud Run, Azure Resource Manager (ARM) |
| **Infrastructure** | Terraform, GCP Secret Manager, Serverless VPC Access, Cloud SQL |
| **DevOps & CI/CD** | GitHub Actions, Docker, Google Container Registry |
| **Testing** | Vitest, React Testing Library, Pytest, Coverage.py |
| **Code Quality** | ESLint, Black, Flake8, MyPy, TypeScript Strict Mode |
Expand Down Expand Up @@ -286,50 +287,127 @@ npm run test:ui

---

## ☁️ Cloud Deployment
## ☁️ Infrastructure & Deployment

### Google Cloud Run (Production)
### Production Architecture

QueryPal is designed for Google Cloud Run with automatic CI/CD:
QueryPal runs on Google Cloud Run with a private backend topology. The frontend nginx container is the only public entry point — the backend service is network-isolated and unreachable from the internet.

#### Automatic Deployment
1. **Push to Production**: Commits to `production` branch trigger automatic deployment
2. **GitHub Actions**: Builds and deploys both frontend and backend containers
3. **Environment Variables**: Securely managed through GitHub Secrets
```mermaid
graph TB
Browser(["👤 Browser"])

subgraph gcp["☁️ Google Cloud Platform — europe-west1"]
subgraph cloudrun["Cloud Run"]
direction TB
Frontend["<b>querypal-frontend</b><br/>──────────────<br/>ingress: public<br/>nginx · serves SPA<br/>proxies /api/* → backend"]
Backend["<b>querypal-backend</b><br/>──────────────<br/>ingress: internal only<br/>FastAPI · Uvicorn<br/>❌ not reachable from internet"]
end

subgraph vpc["VPC Network"]
Connector["Serverless VPC<br/>Access Connector<br/><i>10.8.0.0/28</i>"]
end

SM[("🔑 Secret Manager<br/>6 secrets")]
SQL[("🗄️ Cloud SQL<br/>PostgreSQL")]
GCR["📦 Container Registry"]
SA["🪪 Cloud Run SA<br/><i>least-privilege</i>"]
end

subgraph azure["☁️ Microsoft Azure"]
Entra["🔐 Entra ID<br/><i>MSAL · OBO flow</i>"]
Cosmos[("🌍 Cosmos DB<br/>MongoDB API")]
end

Gemini["🤖 Google Gemini Pro"]

Browser -- "HTTPS" --> Frontend
Frontend -. "vpc-egress: all-traffic" .-> Connector
Connector -- "internal ingress\n✅ VPC source allowed" --> Backend
Backend -- "Cloud SQL Proxy\nunix socket" --> SQL
Backend -- "HTTPS" --> Entra
Backend -- "HTTPS" --> Cosmos
Backend -- "HTTPS" --> Gemini
SM -- "mounted at startup\nvia --set-secrets" --> Backend
SA -. "identity" .-> Frontend
SA -. "identity" .-> Backend
GCR -- "image" --> Frontend
GCR -- "image" --> Backend
```

### Network Security Model

| | Frontend | Backend |
|---|---|---|
| **Cloud Run ingress** | `all` (public) | `internal` (VPC only) |
| **VPC egress** | `all-traffic` (proxy to backend) | `private-ranges-only` |
| **Internet accessible** | ✅ Yes | ❌ No — 403 from GFE |
| **Who can call it** | Anyone | Frontend nginx via VPC connector |

All API calls from the browser go to `/api/*` on the frontend's own origin. Nginx strips the `/api` prefix and proxies the request to the backend's internal Cloud Run URL through the VPC connector. The backend URL is never exposed to the browser.

### Secret Management

All sensitive configuration is stored in **GCP Secret Manager** and mounted into the backend container at startup via Cloud Run's native `--set-secrets` integration. Secrets are never passed as plain environment variables and never appear in deployment logs or `gcloud run describe` output.

| Secret | Description |
|---|---|
| `querypal-azure-tenant-id` | Microsoft Entra ID tenant |
| `querypal-azure-client-id` | Backend app registration client ID |
| `querypal-azure-client-secret` | Backend app registration client secret |
| `querypal-gemini-api-key` | Google Gemini API key |
| `querypal-db-user` | Cloud SQL PostgreSQL username |
| `querypal-db-pass` | Cloud SQL PostgreSQL password |

### Infrastructure as Code

Cloud infrastructure is managed by **Terraform** in the `terraform/` directory. The CI pipeline owns image builds and Cloud Run deployments; Terraform owns everything underneath.

| Resource | Managed by |
|---|---|
| VPC connector | Terraform |
| Secret Manager secrets | Terraform |
| Cloud Run service account + IAM | Terraform |
| Cloud SQL instance & database | Terraform (import existing) |
| Cloud Run services | CI pipeline (GitHub Actions) |
| Docker images | CI pipeline (GitHub Actions) |

#### Manual Deployment
```bash
# Authenticate with Google Cloud
gcloud auth login
gcloud config set project YOUR_PROJECT_ID
cd terraform
cp terraform.tfvars.example terraform.tfvars
terraform init
./import.sh # import existing Cloud SQL — no data migration needed
terraform apply
```

# Deploy backend
cd backend
docker build -t gcr.io/YOUR_PROJECT_ID/querypal-backend .
docker push gcr.io/YOUR_PROJECT_ID/querypal-backend
gcloud run deploy querypal-backend \
--image gcr.io/YOUR_PROJECT_ID/querypal-backend \
--region europe-west1 \
--port 8000 \
--add-cloudsql-instances YOUR_CLOUDSQL_INSTANCE \
--set-env-vars AZURE_TENANT_ID=xxx,GEMINI_API_KEY=xxx \
--allow-unauthenticated

# Deploy frontend
cd ../frontend
docker build -t gcr.io/YOUR_PROJECT_ID/querypal-frontend \
--build-arg VITE_API_BASE_URL=https://your-backend-url \
--build-arg VITE_AZURE_REDIRECT_URI=https://your-frontend-url .
docker push gcr.io/YOUR_PROJECT_ID/querypal-frontend
gcloud run deploy querypal-frontend \
--image gcr.io/YOUR_PROJECT_ID/querypal-frontend \
--region europe-west1 \
--port 4000 \
--allow-unauthenticated
> See the PR migration guide for the full step-by-step checklist, including how to populate Secret Manager values and what to verify before the first production deploy.

### CI/CD Pipeline

Pushes to the `production` branch trigger the deploy workflow (`.github/workflows/google-cloudrun-docker.yml`).

```mermaid
flowchart LR
Push(["push to\nproduction"]) --> Auth

subgraph gha["GitHub Actions"]
Auth["Authenticate\nWorkload Identity\nFederation"]
Auth --> BuildBE["Build &amp; push\nbackend image"]
Auth --> BuildFE["Build &amp; push\nfrontend image"]
BuildBE --> DeployBE["Deploy backend\n--ingress=internal\n--set-secrets\n--vpc-connector"]
BuildFE --> DeployFE
DeployBE -- "backend URL" --> DeployFE["Deploy frontend\nBACKEND_URL=internal URL\n--vpc-connector"]
end

DeployBE --> SM
DeployFE --> Done(["✅ Live"])

subgraph gcp["GCP"]
SM[("Secret Manager\nfetch at startup")]
end
```

### Azure Web App (Alternative)
QueryPal also supports deployment to Azure Web Apps using the included publish profiles.
Workload Identity Federation is used for keyless authentication — no long-lived service account keys are stored in GitHub. The dedicated Cloud Run service account (`querypal-cloudrun-sa`) holds only the permissions it needs: `secretmanager.secretAccessor`, `cloudsql.client`, and `vpcaccess.user`.

---

Expand Down
10 changes: 2 additions & 8 deletions frontend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,7 @@ COPY --from=build /app/dist .
EXPOSE 4000
RUN rm -rf /etc/nginx/conf.d/default.conf
COPY nginx.conf /etc/nginx/conf.d/default.conf.template

# Create script to substitute environment variables in nginx config
RUN echo '#!/bin/sh' > /docker-entrypoint.sh && \
echo '# Set PORT default if not provided' >> /docker-entrypoint.sh && \
echo 'export PORT=${PORT:-4000}' >> /docker-entrypoint.sh && \
echo 'envsubst "\$PORT" < /etc/nginx/conf.d/default.conf.template > /etc/nginx/conf.d/default.conf' >> /docker-entrypoint.sh && \
echo 'exec nginx -g "daemon off;"' >> /docker-entrypoint.sh && \
chmod +x /docker-entrypoint.sh
COPY docker-entrypoint.sh /docker-entrypoint.sh
RUN chmod +x /docker-entrypoint.sh

CMD ["/docker-entrypoint.sh"]
15 changes: 15 additions & 0 deletions frontend/docker-entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/sh
set -e

export PORT=${PORT:-4000}
# BACKEND_URL is the internal Cloud Run URL of the backend service.
# In production this is set as a Cloud Run environment variable.
# Locally, point directly at the backend container.
export BACKEND_URL=${BACKEND_URL:-http://localhost:8000}

# Substitute only $PORT and $BACKEND_URL; leave nginx's own $variables untouched.
envsubst '$PORT $BACKEND_URL' \
< /etc/nginx/conf.d/default.conf.template \
> /etc/nginx/conf.d/default.conf

exec nginx -g "daemon off;"
24 changes: 16 additions & 8 deletions frontend/nginx.conf
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,26 @@ server {
listen $PORT;
server_name localhost;

# Serve the React SPA static files.
location / {
root /usr/share/nginx/html;
index index.html index.htm;
try_files $uri $uri/ /index.html;
}

# Optionally, proxy API requests to backend if needed
# location /api/ {
# proxy_pass http://backend:8000;
# proxy_set_header Host $host;
# proxy_set_header X-Real-IP $remote_addr;
# proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
# proxy_set_header X-Forwarded-Proto $scheme;
# }
# Proxy all /api/ requests to the internal backend Cloud Run service.
# The trailing slash on proxy_pass strips the /api prefix before forwarding,
# so /api/query/execute becomes /query/execute on the backend.
# BACKEND_URL is injected at container startup via docker-entrypoint.sh.
location /api/ {
proxy_pass $BACKEND_URL/;
proxy_http_version 1.1;
proxy_set_header Host $proxy_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $http_x_forwarded_proto;
proxy_read_timeout 300s;
proxy_connect_timeout 10s;
proxy_send_timeout 300s;
}
}
Loading
Loading