From 27bb9b55cf7a69009338c0613ab71f50e0ed5c64 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 15 May 2026 07:50:06 +0000 Subject: [PATCH 1/4] feat: harden Cloud Run security with Secret Manager, VPC connector, and private backend - Secret Manager: move all sensitive env vars (Azure credentials, Gemini key, DB credentials) out of GitHub Secrets and into GCP Secret Manager; Cloud Run reads them at runtime via --set-secrets, so secrets are never exposed in workflow logs or build args. - VPC Connector: add Serverless VPC Access connector (terraform/network.tf) so Cloud Run services can reach Cloud SQL and each other over the private VPC network. - Private backend: set backend Cloud Run ingress to 'internal', blocking all public internet access. Frontend nginx now proxies /api/* to the backend's internal URL (with BACKEND_URL injected as a runtime env var), so the browser never needs a direct connection to the backend. - Terraform IaC: terraform/ directory manages the VPC connector, Secret Manager secrets, Cloud Run service account, and Cloud SQL (importable via import.sh). CI continues to own image builds and Cloud Run deployments. - Data migration script: scripts/migrate_db.sh migrates PostgreSQL data between Cloud SQL instances via Cloud SQL Auth Proxy if the database ever needs to be rebuilt. https://claude.ai/code/session_01SRRzCWrpwgMpdYFurMVn7m --- .github/workflows/google-cloudrun-docker.yml | 67 ++++++---- frontend/Dockerfile | 10 +- frontend/docker-entrypoint.sh | 15 +++ frontend/nginx.conf | 24 ++-- scripts/migrate_db.sh | 130 +++++++++++++++++++ terraform/.gitignore | 8 ++ terraform/database.tf | 38 ++++++ terraform/iam.tf | 44 +++++++ terraform/import.sh | 34 +++++ terraform/main.tf | 22 ++++ terraform/network.tf | 22 ++++ terraform/outputs.tf | 24 ++++ terraform/secrets.tf | 37 ++++++ terraform/terraform.tfvars.example | 12 ++ terraform/variables.tf | 53 ++++++++ 15 files changed, 500 insertions(+), 40 deletions(-) create mode 100644 frontend/docker-entrypoint.sh create mode 100755 scripts/migrate_db.sh create mode 100644 terraform/.gitignore create mode 100644 terraform/database.tf create mode 100644 terraform/iam.tf create mode 100755 terraform/import.sh create mode 100644 terraform/main.tf create mode 100644 terraform/network.tf create mode 100644 terraform/outputs.tf create mode 100644 terraform/secrets.tf create mode 100644 terraform/terraform.tfvars.example create mode 100644 terraform/variables.tf diff --git a/.github/workflows/google-cloudrun-docker.yml b/.github/workflows/google-cloudrun-docker.yml index a683d12..3053b97 100644 --- a/.github/workflows/google-cloudrun-docker.yml +++ b/.github/workflows/google-cloudrun-docker.yml @@ -1,6 +1,8 @@ -# This workflow builds and pushes Docker containers to Google Artifact Registry -# and deploys both backend and frontend on Cloud Run when a commit is pushed to the "production" -# branch. +# Build and deploy QueryPal to Cloud Run. +# Runs on pushes to the production branch. +# +# Infrastructure changes (VPC connector, Secret Manager, IAM) are managed by +# Terraform in the terraform/ directory and must be applied before first deploy. name: 'Build and Deploy QueryPal to Cloud Run' @@ -16,6 +18,10 @@ env: BACKEND_SERVICE: 'querypal-backend' FRONTEND_SERVICE: 'querypal-frontend' WORKLOAD_IDENTITY_PROVIDER: 'projects/874216619692/locations/global/workloadIdentityPools/github/providers/querypal' + # Cloud Run service account created by Terraform (terraform/iam.tf). + CLOUD_RUN_SA: 'querypal-cloudrun-sa@gen-lang-client-0698668474.iam.gserviceaccount.com' + # VPC connector created by Terraform (terraform/network.tf). + VPC_CONNECTOR: 'querypal-vpc-connector' jobs: deploy: @@ -29,24 +35,21 @@ jobs: - name: 'Checkout' uses: 'actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332' # actions/checkout@v4 - # Configure Workload Identity Federation and generate an access token. - id: 'auth' name: 'Authenticate to Google Cloud' uses: 'google-github-actions/auth@f112390a2df9932162083945e46d439060d66ec2' # google-github-actions/auth@v2 with: workload_identity_provider: '${{ env.WORKLOAD_IDENTITY_PROVIDER }}' - service_account: 'github-actions@gen-lang-client-0698668474.iam.gserviceaccount.com' + service_account: 'github-actions@${{ env.PROJECT_ID }}.iam.gserviceaccount.com' - # Set up Cloud SDK - name: 'Set up Cloud SDK' uses: 'google-github-actions/setup-gcloud@98ddc00a17442e89a24bbf282954a3b65ce6d200' # google-github-actions/setup-gcloud@v2 - # Configure Docker to use gcloud as a credential helper - name: 'Configure Docker for GCR' - run: |- - gcloud auth configure-docker --quiet + run: gcloud auth configure-docker --quiet + + # ── Backend ────────────────────────────────────────────────────────────── - # Build and Push Backend Container - name: 'Build and Push Backend Container' run: |- cd backend @@ -54,7 +57,6 @@ jobs: docker build --tag "${DOCKER_TAG}" --platform linux/amd64 . docker push "${DOCKER_TAG}" - # Deploy Backend to Cloud Run - id: 'deploy-backend' name: 'Deploy Backend to Cloud Run' uses: 'google-github-actions/deploy-cloudrun@33553064113a37d688aa6937bacbdc481580be17' # google-github-actions/deploy-cloudrun@v2 @@ -62,34 +64,42 @@ jobs: service: '${{ env.BACKEND_SERVICE }}' region: '${{ env.REGION }}' image: 'gcr.io/${{ env.PROJECT_ID }}/${{ env.BACKEND_SERVICE }}:${{ github.sha }}' + # Non-secret runtime configuration only. env_vars: | ENVIRONMENT=production - AZURE_TENANT_ID=${{ secrets.AZURE_TENANT_ID }} - AZURE_CLIENT_ID=${{ secrets.AZURE_CLIENT_ID }} - AZURE_CLIENT_SECRET=${{ secrets.AZURE_CLIENT_SECRET }} ARM_SCOPE=https://management.azure.com/.default - GEMINI_API_KEY=${{ secrets.GEMINI_API_KEY }} - DB_USER=${{ secrets.DB_USER }} - DB_PASS=${{ secrets.DB_PASS }} DB_NAME=querypal - DB_UNIX_SOCKET=/cloudsql/gen-lang-client-0698668474:europe-west1:querypal-db + DB_UNIX_SOCKET=/cloudsql/${{ env.PROJECT_ID }}:${{ env.REGION }}:querypal-db + # Sensitive values are read directly from Secret Manager at runtime. + # Secret must exist before first deploy (created by terraform/secrets.tf). + secrets: | + AZURE_TENANT_ID=querypal-azure-tenant-id:latest + AZURE_CLIENT_ID=querypal-azure-client-id:latest + AZURE_CLIENT_SECRET=querypal-azure-client-secret:latest + GEMINI_API_KEY=querypal-gemini-api-key:latest + DB_USER=querypal-db-user:latest + DB_PASS=querypal-db-pass:latest flags: | --port=8000 - --add-cloudsql-instances=gen-lang-client-0698668474:europe-west1:querypal-db + --service-account=${{ env.CLOUD_RUN_SA }} + --add-cloudsql-instances=${{ env.PROJECT_ID }}:${{ env.REGION }}:querypal-db + --vpc-connector=${{ env.VPC_CONNECTOR }} + --vpc-egress=private-ranges-only + --ingress=internal --allow-unauthenticated - # Build and Push Frontend Container + # ── Frontend ───────────────────────────────────────────────────────────── + - name: 'Build and Push Frontend Container' run: |- cd frontend DOCKER_TAG="gcr.io/${{ env.PROJECT_ID }}/${{ env.FRONTEND_SERVICE }}:${{ github.sha }}" docker build --tag "${DOCKER_TAG}" --platform linux/amd64 \ - --build-arg VITE_API_BASE_URL=${{ steps.deploy-backend.outputs.url }} \ + --build-arg VITE_API_BASE_URL=/api \ --build-arg VITE_AZURE_REDIRECT_URI=https://querypal.virtonomy.io \ . docker push "${DOCKER_TAG}" - # Deploy Frontend to Cloud Run - id: 'deploy-frontend' name: 'Deploy Frontend to Cloud Run' uses: 'google-github-actions/deploy-cloudrun@33553064113a37d688aa6937bacbdc481580be17' # google-github-actions/deploy-cloudrun@v2 @@ -97,12 +107,21 @@ jobs: service: '${{ env.FRONTEND_SERVICE }}' region: '${{ env.REGION }}' image: 'gcr.io/${{ env.PROJECT_ID }}/${{ env.FRONTEND_SERVICE }}:${{ github.sha }}' + # BACKEND_URL is the internal Cloud Run URL; nginx uses it at runtime to + # proxy /api/* requests to the backend (which is not publicly reachable). + env_vars: | + BACKEND_URL=${{ steps.deploy-backend.outputs.url }} flags: | --port=4000 + --service-account=${{ env.CLOUD_RUN_SA }} + --vpc-connector=${{ env.VPC_CONNECTOR }} + --vpc-egress=all-traffic + --ingress=all --allow-unauthenticated - # Show output URLs + # ── Summary ─────────────────────────────────────────────────────────────── + - name: 'Show deployment URLs' run: |- - echo "Backend URL: ${{ steps.deploy-backend.outputs.url }}" echo "Frontend URL: ${{ steps.deploy-frontend.outputs.url }}" + echo "Backend URL: ${{ steps.deploy-backend.outputs.url }} (internal only)" diff --git a/frontend/Dockerfile b/frontend/Dockerfile index cf1f943..8ceeb36 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -21,13 +21,7 @@ COPY --from=build /app/dist . EXPOSE 4000 RUN rm -rf /etc/nginx/conf.d/default.conf COPY nginx.conf /etc/nginx/conf.d/default.conf.template - -# Create script to substitute environment variables in nginx config -RUN echo '#!/bin/sh' > /docker-entrypoint.sh && \ - echo '# Set PORT default if not provided' >> /docker-entrypoint.sh && \ - echo 'export PORT=${PORT:-4000}' >> /docker-entrypoint.sh && \ - echo 'envsubst "\$PORT" < /etc/nginx/conf.d/default.conf.template > /etc/nginx/conf.d/default.conf' >> /docker-entrypoint.sh && \ - echo 'exec nginx -g "daemon off;"' >> /docker-entrypoint.sh && \ - chmod +x /docker-entrypoint.sh +COPY docker-entrypoint.sh /docker-entrypoint.sh +RUN chmod +x /docker-entrypoint.sh CMD ["/docker-entrypoint.sh"] diff --git a/frontend/docker-entrypoint.sh b/frontend/docker-entrypoint.sh new file mode 100644 index 0000000..da8ccd1 --- /dev/null +++ b/frontend/docker-entrypoint.sh @@ -0,0 +1,15 @@ +#!/bin/sh +set -e + +export PORT=${PORT:-4000} +# BACKEND_URL is the internal Cloud Run URL of the backend service. +# In production this is set as a Cloud Run environment variable. +# Locally, point directly at the backend container. +export BACKEND_URL=${BACKEND_URL:-http://localhost:8000} + +# Substitute only $PORT and $BACKEND_URL; leave nginx's own $variables untouched. +envsubst '$PORT $BACKEND_URL' \ + < /etc/nginx/conf.d/default.conf.template \ + > /etc/nginx/conf.d/default.conf + +exec nginx -g "daemon off;" diff --git a/frontend/nginx.conf b/frontend/nginx.conf index 07d5083..b789005 100644 --- a/frontend/nginx.conf +++ b/frontend/nginx.conf @@ -2,18 +2,26 @@ server { listen $PORT; server_name localhost; + # Serve the React SPA static files. location / { root /usr/share/nginx/html; index index.html index.htm; try_files $uri $uri/ /index.html; } - # Optionally, proxy API requests to backend if needed - # location /api/ { - # proxy_pass http://backend:8000; - # proxy_set_header Host $host; - # proxy_set_header X-Real-IP $remote_addr; - # proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - # proxy_set_header X-Forwarded-Proto $scheme; - # } + # Proxy all /api/ requests to the internal backend Cloud Run service. + # The trailing slash on proxy_pass strips the /api prefix before forwarding, + # so /api/query/execute becomes /query/execute on the backend. + # BACKEND_URL is injected at container startup via docker-entrypoint.sh. + location /api/ { + proxy_pass $BACKEND_URL/; + proxy_http_version 1.1; + proxy_set_header Host $proxy_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $http_x_forwarded_proto; + proxy_read_timeout 300s; + proxy_connect_timeout 10s; + proxy_send_timeout 300s; + } } \ No newline at end of file diff --git a/scripts/migrate_db.sh b/scripts/migrate_db.sh new file mode 100755 index 0000000..91e3845 --- /dev/null +++ b/scripts/migrate_db.sh @@ -0,0 +1,130 @@ +#!/usr/bin/env bash +# Migrate PostgreSQL data between Cloud SQL instances using Cloud SQL Auth Proxy. +# +# Use this when you need to move data to a new Cloud SQL instance +# (e.g., after recreating the instance via Terraform). +# +# Prerequisites: +# - gcloud CLI authenticated with Cloud SQL Admin permissions +# - cloud-sql-proxy binary in PATH (https://cloud.google.com/sql/docs/postgres/sql-proxy) +# - pg_dump / psql installed locally +# +# Usage: +# ./scripts/migrate_db.sh \ +# --source-instance gen-lang-client-0698668474:europe-west1:querypal-db \ +# --target-instance gen-lang-client-0698668474:europe-west1:querypal-db-new \ +# --db-name querypal \ +# --db-user postgres +# +# The script will prompt for the database password interactively. + +set -euo pipefail + +# ── Argument parsing ───────────────────────────────────────────────────────── + +SOURCE_INSTANCE="" +TARGET_INSTANCE="" +DB_NAME="querypal" +DB_USER="postgres" + +while [[ $# -gt 0 ]]; do + case "$1" in + --source-instance) SOURCE_INSTANCE="$2"; shift 2 ;; + --target-instance) TARGET_INSTANCE="$2"; shift 2 ;; + --db-name) DB_NAME="$2"; shift 2 ;; + --db-user) DB_USER="$2"; shift 2 ;; + *) echo "Unknown argument: $1" >&2; exit 1 ;; + esac +done + +if [[ -z "$SOURCE_INSTANCE" || -z "$TARGET_INSTANCE" ]]; then + echo "Usage: $0 --source-instance CONN_NAME --target-instance CONN_NAME [--db-name NAME] [--db-user USER]" >&2 + exit 1 +fi + +# ── Setup ──────────────────────────────────────────────────────────────────── + +SOURCE_PORT=5432 +TARGET_PORT=5433 +DUMP_FILE="$(mktemp /tmp/querypal_dump_XXXXXX.sql)" +SOURCE_PROXY_PID="" +TARGET_PROXY_PID="" + +cleanup() { + echo "==> Cleaning up..." + [[ -n "$SOURCE_PROXY_PID" ]] && kill "$SOURCE_PROXY_PID" 2>/dev/null || true + [[ -n "$TARGET_PROXY_PID" ]] && kill "$TARGET_PROXY_PID" 2>/dev/null || true + rm -f "$DUMP_FILE" +} +trap cleanup EXIT + +echo "==> Migration plan:" +echo " Source: ${SOURCE_INSTANCE} (port ${SOURCE_PORT})" +echo " Target: ${TARGET_INSTANCE} (port ${TARGET_PORT})" +echo " Database: ${DB_NAME}" +echo "" +read -rsp "Enter database password for '${DB_USER}': " DB_PASS +echo "" +export PGPASSWORD="$DB_PASS" + +# ── Start Cloud SQL Auth Proxy ─────────────────────────────────────────────── + +echo "==> Starting Cloud SQL Auth Proxy for source instance..." +cloud-sql-proxy \ + "${SOURCE_INSTANCE}?port=${SOURCE_PORT}" \ + --quiet & +SOURCE_PROXY_PID=$! + +echo "==> Starting Cloud SQL Auth Proxy for target instance..." +cloud-sql-proxy \ + "${TARGET_INSTANCE}?port=${TARGET_PORT}" \ + --quiet & +TARGET_PROXY_PID=$! + +# Wait for proxies to be ready. +sleep 3 + +# ── Dump source ────────────────────────────────────────────────────────────── + +echo "==> Dumping source database to ${DUMP_FILE}..." +pg_dump \ + --host=127.0.0.1 \ + --port="${SOURCE_PORT}" \ + --username="${DB_USER}" \ + --dbname="${DB_NAME}" \ + --format=plain \ + --no-owner \ + --no-acl \ + --file="${DUMP_FILE}" + +DUMP_SIZE=$(du -sh "$DUMP_FILE" | cut -f1) +echo " Dump complete: ${DUMP_SIZE}" + +# ── Restore to target ──────────────────────────────────────────────────────── + +echo "==> Restoring to target database..." +# Drop and recreate schema to ensure a clean slate. +psql \ + --host=127.0.0.1 \ + --port="${TARGET_PORT}" \ + --username="${DB_USER}" \ + --dbname=postgres \ + --command="DROP DATABASE IF EXISTS ${DB_NAME};" + +psql \ + --host=127.0.0.1 \ + --port="${TARGET_PORT}" \ + --username="${DB_USER}" \ + --dbname=postgres \ + --command="CREATE DATABASE ${DB_NAME};" + +psql \ + --host=127.0.0.1 \ + --port="${TARGET_PORT}" \ + --username="${DB_USER}" \ + --dbname="${DB_NAME}" \ + --file="${DUMP_FILE}" + +echo "" +echo "==> Migration complete." +echo " Verify the target database before updating DB_UNIX_SOCKET in Cloud Run." diff --git a/terraform/.gitignore b/terraform/.gitignore new file mode 100644 index 0000000..2f13459 --- /dev/null +++ b/terraform/.gitignore @@ -0,0 +1,8 @@ +.terraform/ +.terraform.lock.hcl +terraform.tfstate +terraform.tfstate.backup +*.tfplan +tfplan +terraform.tfvars +*.auto.tfvars diff --git a/terraform/database.tf b/terraform/database.tf new file mode 100644 index 0000000..8d20751 --- /dev/null +++ b/terraform/database.tf @@ -0,0 +1,38 @@ +# Existing Cloud SQL instance brought under Terraform management. +# Run ./import.sh once to import the existing instance into Terraform state +# before applying this configuration. +resource "google_sql_database_instance" "querypal_db" { + name = var.cloud_sql_instance_name + database_version = "POSTGRES_15" + region = var.region + + settings { + tier = "db-f1-micro" + + backup_configuration { + enabled = true + start_time = "02:00" + point_in_time_recovery_enabled = true + transaction_log_retention_days = 7 + } + + ip_configuration { + ipv4_enabled = true + # Require SSL for all connections. + ssl_mode = "ENCRYPTED_ONLY" + } + + database_flags { + name = "log_connections" + value = "on" + } + } + + # Prevent accidental destruction of the production database. + deletion_protection = true +} + +resource "google_sql_database" "querypal" { + name = var.db_name + instance = google_sql_database_instance.querypal_db.name +} diff --git a/terraform/iam.tf b/terraform/iam.tf new file mode 100644 index 0000000..b2b7114 --- /dev/null +++ b/terraform/iam.tf @@ -0,0 +1,44 @@ +# Dedicated service account for the Cloud Run backend and frontend services. +# Using a least-privilege SA instead of the default Compute SA reduces blast radius. +resource "google_service_account" "cloudrun_sa" { + account_id = var.cloud_run_sa_name + display_name = "QueryPal Cloud Run Service Account" + description = "Used by querypal-backend and querypal-frontend Cloud Run services" +} + +# Allow Cloud Run SA to read Secret Manager secrets at runtime. +resource "google_project_iam_member" "cloudrun_secret_accessor" { + project = var.project_id + role = "roles/secretmanager.secretAccessor" + member = "serviceAccount:${google_service_account.cloudrun_sa.email}" +} + +# Allow Cloud Run SA to connect to Cloud SQL instances. +resource "google_project_iam_member" "cloudrun_cloudsql_client" { + project = var.project_id + role = "roles/cloudsql.client" + member = "serviceAccount:${google_service_account.cloudrun_sa.email}" +} + +# Allow Cloud Run SA to use the VPC connector. +resource "google_project_iam_member" "cloudrun_vpc_user" { + project = var.project_id + role = "roles/vpcaccess.user" + member = "serviceAccount:${google_service_account.cloudrun_sa.email}" +} + +# Allow the github-actions SA to read secrets so it can populate them during +# first-time bootstrap (optional — remove if you populate secrets manually). +resource "google_project_iam_member" "github_actions_secret_accessor" { + project = var.project_id + role = "roles/secretmanager.secretAccessor" + member = "serviceAccount:${var.github_actions_sa_email}" +} + +# Allow GitHub Actions SA to act as the Cloud Run SA when deploying services +# (required to set --service-account on Cloud Run deployments). +resource "google_service_account_iam_member" "github_actions_act_as_cloudrun_sa" { + service_account_id = google_service_account.cloudrun_sa.name + role = "roles/iam.serviceAccountUser" + member = "serviceAccount:${var.github_actions_sa_email}" +} diff --git a/terraform/import.sh b/terraform/import.sh new file mode 100755 index 0000000..1f4b574 --- /dev/null +++ b/terraform/import.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# Import existing GCP resources into Terraform state. +# Run this ONCE when migrating existing infrastructure to Terraform management. +# After the import, subsequent changes should be made via Terraform only. +# +# Prerequisites: +# - terraform init has been run +# - gcloud is authenticated with sufficient permissions +# - terraform.tfvars exists (copy from terraform.tfvars.example) +set -euo pipefail + +PROJECT_ID="${PROJECT_ID:-gen-lang-client-0698668474}" +INSTANCE_NAME="${INSTANCE_NAME:-querypal-db}" +DB_NAME="${DB_NAME:-querypal}" + +echo "==> Initializing Terraform..." +terraform init + +echo "==> Importing Cloud SQL instance '${INSTANCE_NAME}'..." +terraform import \ + google_sql_database_instance.querypal_db \ + "${PROJECT_ID}/${INSTANCE_NAME}" || echo " Already imported or not found — skipping." + +echo "==> Importing Cloud SQL database '${DB_NAME}'..." +terraform import \ + google_sql_database.querypal \ + "${PROJECT_ID}/${INSTANCE_NAME}/${DB_NAME}" || echo " Already imported or not found — skipping." + +echo "==> Import complete. Running 'terraform plan' to review state..." +terraform plan -out=tfplan + +echo "" +echo "Review the plan above. If it looks correct, apply with:" +echo " terraform apply tfplan" diff --git a/terraform/main.tf b/terraform/main.tf new file mode 100644 index 0000000..ecc2533 --- /dev/null +++ b/terraform/main.tf @@ -0,0 +1,22 @@ +terraform { + required_version = ">= 1.5" + + required_providers { + google = { + source = "hashicorp/google" + version = "~> 5.0" + } + } + + # Uncomment to enable remote state (recommended for teams). + # Create the bucket first: gsutil mb -p gs://-tfstate + # backend "gcs" { + # bucket = "gen-lang-client-0698668474-tfstate" + # prefix = "querypal/state" + # } +} + +provider "google" { + project = var.project_id + region = var.region +} diff --git a/terraform/network.tf b/terraform/network.tf new file mode 100644 index 0000000..06c2de9 --- /dev/null +++ b/terraform/network.tf @@ -0,0 +1,22 @@ +# Enable the Serverless VPC Access API (required before creating a connector). +resource "google_project_service" "vpcaccess" { + service = "vpcaccess.googleapis.com" + disable_on_destroy = false +} + +# Serverless VPC Access connector — lets Cloud Run services reach the VPC +# (and therefore Cloud SQL private IP and internal Cloud Run services). +resource "google_vpc_access_connector" "querypal" { + name = var.vpc_connector_name + region = var.region + network = var.vpc_network + + # Reserve a /28 block that does not overlap any existing subnets. + ip_cidr_range = var.vpc_connector_cidr + + min_instances = 2 + max_instances = 3 + machine_type = "e2-micro" + + depends_on = [google_project_service.vpcaccess] +} diff --git a/terraform/outputs.tf b/terraform/outputs.tf new file mode 100644 index 0000000..f9c0073 --- /dev/null +++ b/terraform/outputs.tf @@ -0,0 +1,24 @@ +output "vpc_connector_id" { + description = "Full resource ID of the Serverless VPC Access connector" + value = google_vpc_access_connector.querypal.id +} + +output "vpc_connector_name" { + description = "Short name of the VPC connector (for use in gcloud / CI flags)" + value = google_vpc_access_connector.querypal.name +} + +output "cloud_run_sa_email" { + description = "Email of the Cloud Run service account" + value = google_service_account.cloudrun_sa.email +} + +output "cloud_sql_connection_name" { + description = "Cloud SQL connection name (for --add-cloudsql-instances flag)" + value = google_sql_database_instance.querypal_db.connection_name +} + +output "secret_ids" { + description = "Secret Manager secret IDs managed by Terraform" + value = [for s in google_secret_manager_secret.querypal : s.secret_id] +} diff --git a/terraform/secrets.tf b/terraform/secrets.tf new file mode 100644 index 0000000..742587f --- /dev/null +++ b/terraform/secrets.tf @@ -0,0 +1,37 @@ +# Enable Secret Manager API. +resource "google_project_service" "secretmanager" { + service = "secretmanager.googleapis.com" + disable_on_destroy = false +} + +locals { + # Map of secret IDs to a human-readable description. + secrets = { + "querypal-azure-tenant-id" = "Microsoft Entra ID tenant ID" + "querypal-azure-client-id" = "Microsoft Entra ID app client ID" + "querypal-azure-client-secret" = "Microsoft Entra ID app client secret" + "querypal-gemini-api-key" = "Google Gemini API key" + "querypal-db-user" = "Cloud SQL PostgreSQL username" + "querypal-db-pass" = "Cloud SQL PostgreSQL password" + } +} + +resource "google_secret_manager_secret" "querypal" { + for_each = local.secrets + secret_id = each.key + + labels = { + managed-by = "terraform" + app = "querypal" + } + + replication { + auto {} + } + + depends_on = [google_project_service.secretmanager] +} + +# After 'terraform apply', populate each secret value with: +# echo -n "VALUE" | gcloud secrets versions add SECRET_ID --data-file=- +# Or use the GCP console / a one-time bootstrap script. diff --git a/terraform/terraform.tfvars.example b/terraform/terraform.tfvars.example new file mode 100644 index 0000000..8146397 --- /dev/null +++ b/terraform/terraform.tfvars.example @@ -0,0 +1,12 @@ +# Copy this file to terraform.tfvars and adjust values for your environment. +# terraform.tfvars is gitignored — never commit secret values. + +project_id = "gen-lang-client-0698668474" +region = "europe-west1" +vpc_network = "default" +vpc_connector_name = "querypal-vpc-connector" +vpc_connector_cidr = "10.8.0.0/28" +cloud_sql_instance_name = "querypal-db" +db_name = "querypal" +cloud_run_sa_name = "querypal-cloudrun-sa" +github_actions_sa_email = "github-actions@gen-lang-client-0698668474.iam.gserviceaccount.com" diff --git a/terraform/variables.tf b/terraform/variables.tf new file mode 100644 index 0000000..718ec24 --- /dev/null +++ b/terraform/variables.tf @@ -0,0 +1,53 @@ +variable "project_id" { + description = "GCP project ID" + type = string + default = "gen-lang-client-0698668474" +} + +variable "region" { + description = "GCP region for all resources" + type = string + default = "europe-west1" +} + +variable "vpc_network" { + description = "VPC network name to attach the connector to" + type = string + default = "default" +} + +variable "vpc_connector_name" { + description = "Serverless VPC Access connector name" + type = string + default = "querypal-vpc-connector" +} + +variable "vpc_connector_cidr" { + description = "Unused /28 CIDR block for the VPC connector (must not overlap existing subnets)" + type = string + default = "10.8.0.0/28" +} + +variable "cloud_sql_instance_name" { + description = "Cloud SQL instance name" + type = string + default = "querypal-db" +} + +variable "db_name" { + description = "PostgreSQL database name" + type = string + default = "querypal" +} + +variable "cloud_run_sa_name" { + description = "Service account ID for Cloud Run services" + type = string + default = "querypal-cloudrun-sa" +} + +variable "github_actions_sa_email" { + description = "Email of the service account used by GitHub Actions" + type = string + default = "github-actions@gen-lang-client-0698668474.iam.gserviceaccount.com" +} From cdb3ddb982ce09746ad1bd302f23f142b624778e Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 16 May 2026 20:14:58 +0000 Subject: [PATCH 2/4] fix: construct Cloud Run SA email inline to use PROJECT_ID, clarify VITE_API_BASE_URL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GitHub Actions does not interpolate ${{ env.X }} inside the top-level env: block, so the full SA email could not reference PROJECT_ID there. Replaced CLOUD_RUN_SA with CLOUD_RUN_SA_NAME and build the email inline in the flags blocks where expression context is available. Added a comment explaining VITE_API_BASE_URL=/api — it is the nginx location prefix, not a full URL, because the browser calls the frontend's own origin and nginx proxies /api/* to the internal backend. https://claude.ai/code/session_01SRRzCWrpwgMpdYFurMVn7m --- .github/workflows/google-cloudrun-docker.yml | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/.github/workflows/google-cloudrun-docker.yml b/.github/workflows/google-cloudrun-docker.yml index 3053b97..6f73cf7 100644 --- a/.github/workflows/google-cloudrun-docker.yml +++ b/.github/workflows/google-cloudrun-docker.yml @@ -18,9 +18,10 @@ env: BACKEND_SERVICE: 'querypal-backend' FRONTEND_SERVICE: 'querypal-frontend' WORKLOAD_IDENTITY_PROVIDER: 'projects/874216619692/locations/global/workloadIdentityPools/github/providers/querypal' - # Cloud Run service account created by Terraform (terraform/iam.tf). - CLOUD_RUN_SA: 'querypal-cloudrun-sa@gen-lang-client-0698668474.iam.gserviceaccount.com' - # VPC connector created by Terraform (terraform/network.tf). + # Short name of the Cloud Run SA and VPC connector created by Terraform. + # The full SA email is constructed inline in flags using ${{ env.PROJECT_ID }} + # because GitHub Actions does not interpolate env vars inside the env: block. + CLOUD_RUN_SA_NAME: 'querypal-cloudrun-sa' VPC_CONNECTOR: 'querypal-vpc-connector' jobs: @@ -81,7 +82,7 @@ jobs: DB_PASS=querypal-db-pass:latest flags: | --port=8000 - --service-account=${{ env.CLOUD_RUN_SA }} + --service-account=${{ env.CLOUD_RUN_SA_NAME }}@${{ env.PROJECT_ID }}.iam.gserviceaccount.com --add-cloudsql-instances=${{ env.PROJECT_ID }}:${{ env.REGION }}:querypal-db --vpc-connector=${{ env.VPC_CONNECTOR }} --vpc-egress=private-ranges-only @@ -94,6 +95,9 @@ jobs: run: |- cd frontend DOCKER_TAG="gcr.io/${{ env.PROJECT_ID }}/${{ env.FRONTEND_SERVICE }}:${{ github.sha }}" + # VITE_API_BASE_URL=/api tells the React app to send all API calls to + # the /api/* path on its own origin instead of a full backend URL. + # Nginx then proxies those requests to the internal backend service. docker build --tag "${DOCKER_TAG}" --platform linux/amd64 \ --build-arg VITE_API_BASE_URL=/api \ --build-arg VITE_AZURE_REDIRECT_URI=https://querypal.virtonomy.io \ @@ -113,7 +117,7 @@ jobs: BACKEND_URL=${{ steps.deploy-backend.outputs.url }} flags: | --port=4000 - --service-account=${{ env.CLOUD_RUN_SA }} + --service-account=${{ env.CLOUD_RUN_SA_NAME }}@${{ env.PROJECT_ID }}.iam.gserviceaccount.com --vpc-connector=${{ env.VPC_CONNECTOR }} --vpc-egress=all-traffic --ingress=all From 2ca3e1101e7eb2b2ed4760ca1ae9df2e4c98a9a4 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 16 May 2026 20:34:56 +0000 Subject: [PATCH 3/4] docs: add infrastructure documentation with architecture diagrams to README MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add production architecture Mermaid diagram showing Cloud Run services, VPC connector, Secret Manager, Cloud SQL, and external dependencies - Add network security model table (frontend public / backend internal) - Add secret management table listing all Secret Manager secrets - Add IaC ownership table (Terraform vs CI pipeline) - Add CI/CD pipeline Mermaid flowchart showing Workload Identity auth, image build/push, and deploy steps with secret injection - Add Terraform to technology stack table Also remove overly broad github_actions_secret_accessor IAM binding from terraform/iam.tf — the GitHub Actions SA never reads secret values directly; Cloud Run reads them at startup using the Cloud Run SA identity. https://claude.ai/code/session_01SRRzCWrpwgMpdYFurMVn7m --- README.md | 150 +++++++++++++++++++++++++++++++++++------------ terraform/iam.tf | 8 --- 2 files changed, 114 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index e96653f..3765426 100644 --- a/README.md +++ b/README.md @@ -78,6 +78,7 @@ Azure Cosmos DB's portal interface can be limiting for real-world data explorati | **Backend API** | FastAPI (Python 3.12), Uvicorn, Pydantic V2 | | **Database** | Azure Cosmos DB (MongoDB API), PostgreSQL (User Data) | | **Cloud Platform** | Google Cloud Run, Azure Resource Manager (ARM) | +| **Infrastructure** | Terraform, GCP Secret Manager, Serverless VPC Access, Cloud SQL | | **DevOps & CI/CD** | GitHub Actions, Docker, Google Container Registry | | **Testing** | Vitest, React Testing Library, Pytest, Coverage.py | | **Code Quality** | ESLint, Black, Flake8, MyPy, TypeScript Strict Mode | @@ -286,50 +287,127 @@ npm run test:ui --- -## ☁️ Cloud Deployment +## ☁️ Infrastructure & Deployment -### Google Cloud Run (Production) +### Production Architecture -QueryPal is designed for Google Cloud Run with automatic CI/CD: +QueryPal runs on Google Cloud Run with a private backend topology. The frontend nginx container is the only public entry point — the backend service is network-isolated and unreachable from the internet. -#### Automatic Deployment -1. **Push to Production**: Commits to `production` branch trigger automatic deployment -2. **GitHub Actions**: Builds and deploys both frontend and backend containers -3. **Environment Variables**: Securely managed through GitHub Secrets +```mermaid +graph TB + Browser(["👤 Browser"]) + + subgraph gcp["☁️ Google Cloud Platform — europe-west1"] + subgraph cloudrun["Cloud Run"] + direction TB + Frontend["querypal-frontend
──────────────
ingress: public
nginx · serves SPA
proxies /api/* → backend"] + Backend["querypal-backend
──────────────
ingress: internal only
FastAPI · Uvicorn
❌ not reachable from internet"] + end + + subgraph vpc["VPC Network"] + Connector["Serverless VPC
Access Connector
10.8.0.0/28"] + end + + SM[("🔑 Secret Manager
6 secrets")] + SQL[("🗄️ Cloud SQL
PostgreSQL")] + GCR["📦 Container Registry"] + SA["🪪 Cloud Run SA
least-privilege"] + end + + subgraph azure["☁️ Microsoft Azure"] + Entra["🔐 Entra ID
MSAL · OBO flow"] + Cosmos[("🌍 Cosmos DB
MongoDB API")] + end + + Gemini["🤖 Google Gemini Pro"] + + Browser -- "HTTPS" --> Frontend + Frontend -. "vpc-egress: all-traffic" .-> Connector + Connector -- "internal ingress\n✅ VPC source allowed" --> Backend + Backend -- "Cloud SQL Proxy\nunix socket" --> SQL + Backend -- "HTTPS" --> Entra + Backend -- "HTTPS" --> Cosmos + Backend -- "HTTPS" --> Gemini + SM -- "mounted at startup\nvia --set-secrets" --> Backend + SA -. "identity" .-> Frontend + SA -. "identity" .-> Backend + GCR -- "image" --> Frontend + GCR -- "image" --> Backend +``` + +### Network Security Model + +| | Frontend | Backend | +|---|---|---| +| **Cloud Run ingress** | `all` (public) | `internal` (VPC only) | +| **VPC egress** | `all-traffic` (proxy to backend) | `private-ranges-only` | +| **Internet accessible** | ✅ Yes | ❌ No — 403 from GFE | +| **Who can call it** | Anyone | Frontend nginx via VPC connector | + +All API calls from the browser go to `/api/*` on the frontend's own origin. Nginx strips the `/api` prefix and proxies the request to the backend's internal Cloud Run URL through the VPC connector. The backend URL is never exposed to the browser. + +### Secret Management + +All sensitive configuration is stored in **GCP Secret Manager** and mounted into the backend container at startup via Cloud Run's native `--set-secrets` integration. Secrets are never passed as plain environment variables and never appear in deployment logs or `gcloud run describe` output. + +| Secret | Description | +|---|---| +| `querypal-azure-tenant-id` | Microsoft Entra ID tenant | +| `querypal-azure-client-id` | Backend app registration client ID | +| `querypal-azure-client-secret` | Backend app registration client secret | +| `querypal-gemini-api-key` | Google Gemini API key | +| `querypal-db-user` | Cloud SQL PostgreSQL username | +| `querypal-db-pass` | Cloud SQL PostgreSQL password | + +### Infrastructure as Code + +Cloud infrastructure is managed by **Terraform** in the `terraform/` directory. The CI pipeline owns image builds and Cloud Run deployments; Terraform owns everything underneath. + +| Resource | Managed by | +|---|---| +| VPC connector | Terraform | +| Secret Manager secrets | Terraform | +| Cloud Run service account + IAM | Terraform | +| Cloud SQL instance & database | Terraform (import existing) | +| Cloud Run services | CI pipeline (GitHub Actions) | +| Docker images | CI pipeline (GitHub Actions) | -#### Manual Deployment ```bash -# Authenticate with Google Cloud -gcloud auth login -gcloud config set project YOUR_PROJECT_ID +cd terraform +cp terraform.tfvars.example terraform.tfvars +terraform init +./import.sh # import existing Cloud SQL — no data migration needed +terraform apply +``` -# Deploy backend -cd backend -docker build -t gcr.io/YOUR_PROJECT_ID/querypal-backend . -docker push gcr.io/YOUR_PROJECT_ID/querypal-backend -gcloud run deploy querypal-backend \ - --image gcr.io/YOUR_PROJECT_ID/querypal-backend \ - --region europe-west1 \ - --port 8000 \ - --add-cloudsql-instances YOUR_CLOUDSQL_INSTANCE \ - --set-env-vars AZURE_TENANT_ID=xxx,GEMINI_API_KEY=xxx \ - --allow-unauthenticated - -# Deploy frontend -cd ../frontend -docker build -t gcr.io/YOUR_PROJECT_ID/querypal-frontend \ - --build-arg VITE_API_BASE_URL=https://your-backend-url \ - --build-arg VITE_AZURE_REDIRECT_URI=https://your-frontend-url . -docker push gcr.io/YOUR_PROJECT_ID/querypal-frontend -gcloud run deploy querypal-frontend \ - --image gcr.io/YOUR_PROJECT_ID/querypal-frontend \ - --region europe-west1 \ - --port 4000 \ - --allow-unauthenticated +> See the PR migration guide for the full step-by-step checklist, including how to populate Secret Manager values and what to verify before the first production deploy. + +### CI/CD Pipeline + +Pushes to the `production` branch trigger the deploy workflow (`.github/workflows/google-cloudrun-docker.yml`). + +```mermaid +flowchart LR + Push(["push to\nproduction"]) --> Auth + + subgraph gha["GitHub Actions"] + Auth["Authenticate\nWorkload Identity\nFederation"] + Auth --> BuildBE["Build & push\nbackend image"] + Auth --> BuildFE["Build & push\nfrontend image"] + BuildBE --> DeployBE["Deploy backend\n--ingress=internal\n--set-secrets\n--vpc-connector"] + BuildFE --> DeployFE + DeployBE -- "backend URL" --> DeployFE["Deploy frontend\nBACKEND_URL=internal URL\n--vpc-connector"] + end + + DeployBE --> SM + DeployFE --> Done(["✅ Live"]) + + subgraph gcp["GCP"] + SM[("Secret Manager\nfetch at startup")] + end ``` -### Azure Web App (Alternative) -QueryPal also supports deployment to Azure Web Apps using the included publish profiles. +Workload Identity Federation is used for keyless authentication — no long-lived service account keys are stored in GitHub. The dedicated Cloud Run service account (`querypal-cloudrun-sa`) holds only the permissions it needs: `secretmanager.secretAccessor`, `cloudsql.client`, and `vpcaccess.user`. --- diff --git a/terraform/iam.tf b/terraform/iam.tf index b2b7114..2396b1e 100644 --- a/terraform/iam.tf +++ b/terraform/iam.tf @@ -27,14 +27,6 @@ resource "google_project_iam_member" "cloudrun_vpc_user" { member = "serviceAccount:${google_service_account.cloudrun_sa.email}" } -# Allow the github-actions SA to read secrets so it can populate them during -# first-time bootstrap (optional — remove if you populate secrets manually). -resource "google_project_iam_member" "github_actions_secret_accessor" { - project = var.project_id - role = "roles/secretmanager.secretAccessor" - member = "serviceAccount:${var.github_actions_sa_email}" -} - # Allow GitHub Actions SA to act as the Cloud Run SA when deploying services # (required to set --service-account on Cloud Run deployments). resource "google_service_account_iam_member" "github_actions_act_as_cloudrun_sa" { From 8cc3d18fed60ce62c15f84a62bf0c831743fa45e Mon Sep 17 00:00:00 2001 From: CE Lin <50169422+ChingEnLin@users.noreply.github.com> Date: Sun, 17 May 2026 12:17:47 +0200 Subject: [PATCH 4/4] fix: align Terraform DB config and reorganize docs (#33) * fix: align database.tf backup config with actual Cloud SQL instance state Backup was disabled on the real instance; syncing so terraform plan is clean with no spurious diff on next apply. Co-Authored-By: Claude Sonnet 4.6 * docs: reorganize README into focused docs/ files README trimmed to a lean landing page. Detailed content moved to: - docs/ARCHITECTURE.md (BFF pattern, ReAct agent, security model) - docs/INFRASTRUCTURE.md (Cloud topology, Terraform, Secret Manager, CI/CD) - docs/AZURE_SETUP.md (Entra ID registration, Cosmos DB, frontend config) - docs/DEVELOPMENT.md (local setup, testing, code style) Co-Authored-By: Claude Sonnet 4.6 --------- Co-authored-by: Claude Sonnet 4.6 --- README.md | 536 +++-------------------------------------- docs/ARCHITECTURE.md | 91 +++++++ docs/AZURE_SETUP.md | 71 ++++++ docs/DEVELOPMENT.md | 96 ++++++++ docs/INFRASTRUCTURE.md | 133 ++++++++++ terraform/database.tf | 6 +- 6 files changed, 429 insertions(+), 504 deletions(-) create mode 100644 docs/ARCHITECTURE.md create mode 100644 docs/AZURE_SETUP.md create mode 100644 docs/DEVELOPMENT.md create mode 100644 docs/INFRASTRUCTURE.md diff --git a/README.md b/README.md index 3765426..30213ab 100644 --- a/README.md +++ b/README.md @@ -1,532 +1,66 @@ # QueryPal ### AI-Powered Database Assistant for Azure Cosmos DB -QueryPal is a highly scalable, intelligent database exploration and management platform designed for developers, analysts, and data professionals working with **Azure Cosmos DB (MongoDB API)**. It combines the power of **Google Gemini AI** with a secure, user-friendly interface to transform how you interact with your NoSQL databases. +QueryPal lets you query, explore, and manage **Azure Cosmos DB (MongoDB API)** using natural language. Type a question, get an optimized MongoDB query and AI-generated analysis back. -**Key Capabilities:** -- 🧠 **Natural Language Queries**: Convert plain English to MongoDB queries using AI -- 📊 **AI-Powered Data Analysis**: Automatic insights and visualizations from query results -- 🔍 **Smart Data Explorer**: Paginated browsing with advanced filtering and search -- 💾 **Query Management**: Save, share, and collaborate on queries with team members -- 🔒 **Enterprise Security**: Microsoft Entra ID authentication with On-Behalf-Of (OBO) flow -- 📝 **Document Management**: Full CRUD operations with audit trails and history -- 🎯 **Schema Discovery**: Intelligent schema inference and documentation +**Key capabilities:** +- Natural language → MongoDB query via Google Gemini + LangGraph ReAct agent +- Paginated data explorer with filtering, multi-select, and document editing +- Saved queries, audit trails, and schema relationship graph +- Enterprise auth: Microsoft Entra ID with On-Behalf-Of (OBO) flow +- Private backend: frontend nginx proxies all API calls internally — backend is unreachable from the internet --- -## 🚀 Why QueryPal? - -Azure Cosmos DB's portal interface can be limiting for real-world data exploration and analysis. QueryPal addresses these pain points by providing: - -- **🎯 Intuitive Data Discovery**: Browse collections, analyze schemas, and understand your data structure without complex queries -- **🧠 AI-Powered Query Generation**: Ask questions in natural language and get optimized MongoDB queries instantly -- **📊 Intelligent Analytics**: Automatic data analysis with AI-generated insights and Chart.js visualizations -- **👥 Team Collaboration**: Share queries, insights, and findings with your team through built-in collaboration features -- **🛡️ Enterprise-Grade Security**: Zero-trust architecture with Microsoft Entra ID and secure token management -- **📋 Data Management**: Complete document lifecycle management with audit trails and version history -- **🔍 Advanced Search**: Powerful filtering and search capabilities across collections and documents - ---- - -## ✨ Features -## 🎯 Key Features Deep Dive - -### 🧠 AI-Powered Natural Language Queries -- **Smart Query Generation**: Convert plain English to optimized MongoDB queries -- **Context Awareness**: Uses database schema and collection metadata for better results -- **Query Optimization**: AI suggests performance improvements and best practices -- **Multi-step Queries**: Handle complex queries requiring multiple steps - -### 📊 Intelligent Data Analysis -- **Automatic Insights**: AI analyzes query results and provides meaningful insights -- **Dynamic Visualizations**: Chart.js integration with 8+ chart types -- **Theme-Aware Charts**: Automatic dark/light mode adaptation -- **Export Capabilities**: Save query output for external analysis - -### 💾 Team Collaboration & Query Management -- **Save & Share Queries**: Build a knowledge base of useful queries -- **Team Collaboration**: Share queries with specific team members -- **Version History**: Track query modifications and usage -- **Quick Access**: Organize and categorize saved queries - -### 🔍 Advanced Data Explorer -- **Paginated Browsing**: Handle large collections efficiently -- **Smart Filtering**: Filter by any field with intelligent search -- **Document Linking**: Automatic cross-reference detection and navigation - -### 📝 Document Management -- **Full CRUD Operations**: Create, read, update, delete documents -- **Audit Trails**: Complete history of document changes -- **Field-Level Editing**: Modify specific fields without affecting the whole document -- **Data Validation**: Ensure data integrity with schema validation - -### 🎓 User Experience -- **Interactive Tutorial**: Guided onboarding for new users -- **Contextual Help**: In-app assistance and tooltips -- **Responsive Design**: Works seamlessly on desktop and mobile -- **Accessibility**: WCAG 2.1 compliant interface - ---- - -## 🧱 Technology Stack - -| Component | Technology | -|--------------------|-----------------------------------------------------------------------------| -| **Frontend** | React 18, TypeScript, Vite, Tailwind CSS, Material-UI | -| **AI & Analytics** | Google Gemini Pro, Chart.js, React Chart.js 2 | -| **Authentication** | Microsoft Entra ID, MSAL (Browser & Python), On-Behalf-Of Flow | -| **Backend API** | FastAPI (Python 3.12), Uvicorn, Pydantic V2 | -| **Database** | Azure Cosmos DB (MongoDB API), PostgreSQL (User Data) | -| **Cloud Platform** | Google Cloud Run, Azure Resource Manager (ARM) | -| **Infrastructure** | Terraform, GCP Secret Manager, Serverless VPC Access, Cloud SQL | -| **DevOps & CI/CD** | GitHub Actions, Docker, Google Container Registry | -| **Testing** | Vitest, React Testing Library, Pytest, Coverage.py | -| **Code Quality** | ESLint, Black, Flake8, MyPy, TypeScript Strict Mode | -| **Monitoring** | Application Insights, Cloud SQL Proxy, Logging | - ---- - -## 🏗️ Architecture Overview - -QueryPal follows a secure **Backend-for-Frontend (BFF)** pattern with enterprise-grade security: - -``` -┌─────────────────────┐ Auth ┌─────────────────────┐ -│ React Frontend ├───────────────►│ Microsoft Entra │ -│ (SPA + MSAL.js) │◄───────────────┤ Identity Platform │ -└─────────────────────┘ Access Token └─────────────────────┘ - │ - ▼ Bearer Token -┌─────────────────────┐ -│ FastAPI Backend │ -│ • Token Validation │ -│ • OBO Exchange │◄──────────┐ -│ • Query Processing │ │ -│ • AI Integration │ │ -│ • Document CRUD │ │ -└─────────────────────┘ │ - │ │ - ▼ │ -┌─────────────────────┐ │ -│ Google Gemini API │ │ -│ • NL2Query │ │ -│ • Data Analysis │ │ -│ • Insights Gen │ │ -└─────────────────────┘ │ - │ -┌─────────────────────┐ │ -│ PostgreSQL DB │ │ -│ • User Queries │ │ -│ • Audit Logs │ │ -│ • Query History │ │ -└─────────────────────┘ │ - │ -┌─────────────────────┐ │ -│ Azure Cosmos DB │◄──────────┘ -│ • Document Storage │ -│ • MongoDB API │ -│ • ARM Management │ -└─────────────────────┘ -``` - -### Autonomous Query Generation with ReAct Agent - -QueryPal employs a powerful LangGraph-based ReAct (Reasoning and Acting) agent to autonomously generate, sandbox-test, and evaluate MongoDB queries. This ensures unparalleled accuracy while maintaining strict safety boundaries for write operations. - -```mermaid -graph TD - A[User Intent] -->|Natural Language| B(Generate Query) - B --> C{Write Operation?} - C -- Yes --> D[Return Query to User] - D --> E((Manual Review & Run)) - E --> F[Execute Write] - F --> G[Log to Audit Database] - F --> H[Evaluate with AI] - - C -- No (Read Only) --> I(Sandbox Execution) - I --> J{Evaluate Result} - J -- Success --> K[Return Final Query & Data] - J -- Failure/Error --> B - - subgraph LangGraph Agent Loop - B - C - I - J - end -``` - -**Security Features:** -- ✅ **Zero-Trust Architecture**: No secrets stored in frontend -- 🔐 **Token-Based Authentication**: MSAL with automatic token refresh -- 🛡️ **On-Behalf-Of Flow**: Secure Azure resource access -- 🛡️ **Input Validation**: Comprehensive request/response validation -- 📝 **Audit Logging**: Complete audit trail for all operations - ---- +## Tech Stack +| Layer | Technology | +|---|---| +| Frontend | React 18, TypeScript, Vite | +| Backend | FastAPI (Python 3.12), Pydantic V2 | +| AI | Google Gemini (`gemini-2.5-flash`), LangGraph | +| Auth | Microsoft Entra ID, MSAL, OBO flow | +| Databases | Azure Cosmos DB (MongoDB API), PostgreSQL (Cloud SQL) | +| Infrastructure | Google Cloud Run, Terraform, GCP Secret Manager, Serverless VPC Access | +| CI/CD | GitHub Actions, Docker, Google Container Registry | --- ## Quick Start -### Option 1: Docker Compose (Recommended) - -The fastest way to get QueryPal running locally: - ```bash -# Clone the repository -git clone https://github.com/ChingEnLin/QueryPal -cd QueryPal - -# Configure environment variables cp backend/.env.example backend/.env -# Edit backend/.env with your API keys and Azure credentials - -# Start both frontend and backend +# Fill in backend/.env — see docs/DEVELOPMENT.md for all variables docker-compose up --build - -# Access the application -# Frontend: http://localhost:5173 -# Backend API: http://localhost:8000 -# API Documentation: http://localhost:8000/docs ``` -### Option 2: Development Setup - -For development with hot reload: - -```bash -# Backend setup -cd backend -python -m venv venv -source venv/bin/activate # or venv\Scripts\activate on Windows -pip install -r requirements.txt -cp .env.example .env # Configure your environment variables -uvicorn main:app --reload - -# Frontend setup (new terminal) -cd frontend -npm install -npm run dev -``` - -### Environment Configuration - -Create a `backend/.env` file with: +- Frontend: http://localhost:5173 +- Backend API: http://localhost:8000 +- API docs: http://localhost:8000/docs -```env -# Google Gemini API -GEMINI_API_KEY=your_gemini_api_key_here - -# Azure Entra ID Configuration -AZURE_TENANT_ID=your_tenant_id -AZURE_CLIENT_ID=your_backend_app_id -AZURE_CLIENT_SECRET=your_client_secret -ARM_SCOPE=https://management.azure.com/.default - -# PostgreSQL Database (for user data) -DB_USER=querypal_user -DB_PASS=your_db_password -DB_NAME=querypal -DB_HOST=localhost -DB_PORT=5432 - -# Optional: For production -DB_UNIX_SOCKET=/cloudsql/project:region:instance -``` +For dev without Azure, set `USE_MSAL_AUTH = false` in `frontend/app.config.ts` to use mock data. --- -## 🧪 Testing & Quality Assurance - -QueryPal maintains high code quality with comprehensive testing: - -### Backend Testing -```bash -cd backend - -# Run all tests with coverage -./run_tests.sh - -# Individual commands -pytest --cov=. --cov-report=html # Tests with coverage -flake8 . --statistics # Code linting -black --check . # Code formatting -mypy . # Type checking -``` - -### Frontend Testing -```bash -cd frontend - -# Run all tests -npm test - -# Run with coverage -npm run test:coverage - -# Run tests once -npm run test:run - -# Interactive UI testing -npm run test:ui -``` - -### Test Coverage -- **Backend**: 85%+ code coverage with pytest -- **Frontend**: 80%+ code coverage with Vitest -- **Integration Tests**: E2E testing of critical user flows -- **Static Analysis**: Type checking, linting, and formatting - -### CI/CD Pipeline -- ✅ **Automated Testing**: All PRs trigger comprehensive test suites -- 🚀 **Deployment**: Automatic deployment to Google Cloud Run on production branch -- 📊 **Code Coverage**: Coverage reports uploaded to Codecov -- 🔍 **Code Quality**: ESLint, Black, MyPy, and TypeScript strict mode - ---- - -## ☁️ Infrastructure & Deployment - -### Production Architecture - -QueryPal runs on Google Cloud Run with a private backend topology. The frontend nginx container is the only public entry point — the backend service is network-isolated and unreachable from the internet. +## Documentation -```mermaid -graph TB - Browser(["👤 Browser"]) - - subgraph gcp["☁️ Google Cloud Platform — europe-west1"] - subgraph cloudrun["Cloud Run"] - direction TB - Frontend["querypal-frontend
──────────────
ingress: public
nginx · serves SPA
proxies /api/* → backend"] - Backend["querypal-backend
──────────────
ingress: internal only
FastAPI · Uvicorn
❌ not reachable from internet"] - end - - subgraph vpc["VPC Network"] - Connector["Serverless VPC
Access Connector
10.8.0.0/28"] - end - - SM[("🔑 Secret Manager
6 secrets")] - SQL[("🗄️ Cloud SQL
PostgreSQL")] - GCR["📦 Container Registry"] - SA["🪪 Cloud Run SA
least-privilege"] - end - - subgraph azure["☁️ Microsoft Azure"] - Entra["🔐 Entra ID
MSAL · OBO flow"] - Cosmos[("🌍 Cosmos DB
MongoDB API")] - end - - Gemini["🤖 Google Gemini Pro"] - - Browser -- "HTTPS" --> Frontend - Frontend -. "vpc-egress: all-traffic" .-> Connector - Connector -- "internal ingress\n✅ VPC source allowed" --> Backend - Backend -- "Cloud SQL Proxy\nunix socket" --> SQL - Backend -- "HTTPS" --> Entra - Backend -- "HTTPS" --> Cosmos - Backend -- "HTTPS" --> Gemini - SM -- "mounted at startup\nvia --set-secrets" --> Backend - SA -. "identity" .-> Frontend - SA -. "identity" .-> Backend - GCR -- "image" --> Frontend - GCR -- "image" --> Backend -``` - -### Network Security Model - -| | Frontend | Backend | -|---|---|---| -| **Cloud Run ingress** | `all` (public) | `internal` (VPC only) | -| **VPC egress** | `all-traffic` (proxy to backend) | `private-ranges-only` | -| **Internet accessible** | ✅ Yes | ❌ No — 403 from GFE | -| **Who can call it** | Anyone | Frontend nginx via VPC connector | - -All API calls from the browser go to `/api/*` on the frontend's own origin. Nginx strips the `/api` prefix and proxies the request to the backend's internal Cloud Run URL through the VPC connector. The backend URL is never exposed to the browser. - -### Secret Management - -All sensitive configuration is stored in **GCP Secret Manager** and mounted into the backend container at startup via Cloud Run's native `--set-secrets` integration. Secrets are never passed as plain environment variables and never appear in deployment logs or `gcloud run describe` output. - -| Secret | Description | -|---|---| -| `querypal-azure-tenant-id` | Microsoft Entra ID tenant | -| `querypal-azure-client-id` | Backend app registration client ID | -| `querypal-azure-client-secret` | Backend app registration client secret | -| `querypal-gemini-api-key` | Google Gemini API key | -| `querypal-db-user` | Cloud SQL PostgreSQL username | -| `querypal-db-pass` | Cloud SQL PostgreSQL password | - -### Infrastructure as Code - -Cloud infrastructure is managed by **Terraform** in the `terraform/` directory. The CI pipeline owns image builds and Cloud Run deployments; Terraform owns everything underneath. - -| Resource | Managed by | +| Doc | Contents | |---|---| -| VPC connector | Terraform | -| Secret Manager secrets | Terraform | -| Cloud Run service account + IAM | Terraform | -| Cloud SQL instance & database | Terraform (import existing) | -| Cloud Run services | CI pipeline (GitHub Actions) | -| Docker images | CI pipeline (GitHub Actions) | - -```bash -cd terraform -cp terraform.tfvars.example terraform.tfvars -terraform init -./import.sh # import existing Cloud SQL — no data migration needed -terraform apply -``` - -> See the PR migration guide for the full step-by-step checklist, including how to populate Secret Manager values and what to verify before the first production deploy. - -### CI/CD Pipeline - -Pushes to the `production` branch trigger the deploy workflow (`.github/workflows/google-cloudrun-docker.yml`). - -```mermaid -flowchart LR - Push(["push to\nproduction"]) --> Auth - - subgraph gha["GitHub Actions"] - Auth["Authenticate\nWorkload Identity\nFederation"] - Auth --> BuildBE["Build & push\nbackend image"] - Auth --> BuildFE["Build & push\nfrontend image"] - BuildBE --> DeployBE["Deploy backend\n--ingress=internal\n--set-secrets\n--vpc-connector"] - BuildFE --> DeployFE - DeployBE -- "backend URL" --> DeployFE["Deploy frontend\nBACKEND_URL=internal URL\n--vpc-connector"] - end - - DeployBE --> SM - DeployFE --> Done(["✅ Live"]) - - subgraph gcp["GCP"] - SM[("Secret Manager\nfetch at startup")] - end -``` - -Workload Identity Federation is used for keyless authentication — no long-lived service account keys are stored in GitHub. The dedicated Cloud Run service account (`querypal-cloudrun-sa`) holds only the permissions it needs: `secretmanager.secretAccessor`, `cloudsql.client`, and `vpcaccess.user`. +| [Architecture](docs/ARCHITECTURE.md) | BFF pattern, auth flow, ReAct agent loop, security model | +| [Infrastructure](docs/INFRASTRUCTURE.md) | Cloud topology, network security, Secret Manager, Terraform setup, CI/CD pipeline | +| [Azure Setup](docs/AZURE_SETUP.md) | Entra ID app registrations, Cosmos DB permissions, frontend auth config | +| [Development](docs/DEVELOPMENT.md) | Local setup, testing commands, code style | +| [Design Handbook](DESIGN_HANDBOOK.md) | CSS tokens, utility classes, component conventions | +| [Versioning](docs/SEMANTIC_VERSIONING.md) | Semantic versioning and conventional commits | --- -## 🔧 Development Setup +## Links -### Prerequisites -- **Node.js** 20+ and npm -- **Python** 3.12+ -- **Docker** and Docker Compose -- **Google Cloud SDK** (for deployment) -- **Azure CLI** (optional, for Azure resources) - -### IDE Recommendations -- **VS Code** with extensions: - - Python - - TypeScript - - Pylance - - Prettier - - ESLint - - Docker +- **Live**: https://querypal.virtonomy.io +- **Issues**: https://github.com/ChingEnLin/QueryPal/issues +- **License**: [MIT](LICENSE) --- -## ⚙️ Azure Setup & Configuration - -### 1. Microsoft Entra ID Application Registration - -**Frontend Application (SPA):** -1. Go to [Azure Portal → App Registrations](https://portal.azure.com/#blade/Microsoft_AAD_RegisteredApps) -2. Create new registration: - - **Name**: `QueryPal Frontend` - - **Platform**: Single-page application (SPA) - - **Redirect URI**: `http://localhost:5173` (development) / your production URL -3. Note the **Application (client) ID** and **Directory (tenant) ID** - -**Backend Application (Confidential Client):** -1. Create another registration: - - **Name**: `QueryPal Backend` - - **Client type**: Confidential client -2. Add a **client secret** (Certificates & secrets) -3. **Expose an API**: - - Add scope: `api://[backend-client-id]/access_as_user` - - Add the frontend app as an authorized client - -**API Permissions:** -- Add permissions for both apps: - - `Microsoft Graph` → `User.Read` - - `Azure Service Management` → `user_impersonation` -- **Grant admin consent** for your organization - -### 2. Azure Cosmos DB Permissions - -Grant the backend application appropriate access: -1. Go to your **Cosmos DB account** → **Access control (IAM)** -2. Add role assignment: - - **Role**: `Cosmos DB Account Reader Role` - - **Assign access to**: Service principal - - **Select**: Your backend application - -### 3. Frontend Configuration - -Update `frontend/authConfig.ts`: - -```typescript -export const msalConfig = { - auth: { - clientId: "your-frontend-client-id", - authority: "https://login.microsoftonline.com/your-tenant-id", - redirectUri: "http://localhost:5173" // or your production URL - }, -}; - -export const loginRequest = { - scopes: ["User.Read", "api://your-backend-client-id/access_as_user"] -}; -``` - ---- - -## 🏷️ Versioning - -This project uses [Semantic Versioning](https://semver.org/) with automated releases based on [Conventional Commits](https://www.conventionalcommits.org/). - -- **Version format**: `vMAJOR.MINOR.PATCH` (e.g., `v2.1.0`) -- **Automated releases**: Triggered when pushing to the `production` branch -- **Release notes**: Auto-generated and published to GitHub Releases and project wiki - -For detailed information about our versioning process and commit message conventions, see [docs/SEMANTIC_VERSIONING.md](docs/SEMANTIC_VERSIONING.md). - ---- - -## 📚 API Documentation - -QueryPal provides comprehensive REST APIs. When running locally, access: -- **Interactive Docs**: http://localhost:8000/docs -- **OpenAPI Spec**: http://localhost:8000/openapi.json - ---- - -## 📄 License - -This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. - ---- - -## 👨‍💻 Author & Acknowledgments - -**Built by [Ching-En Lin](https://github.com/ChingEnLin)** - -**Powered by:** -- 🤖 Google Gemini Pro AI -- ☁️ Microsoft Azure & Google Cloud -- ⚡ Modern web technologies - ---- - -## 🔗 Links - -- **Live Demo**: [QueryPal Production](https://querypal.virtonomy.io) -- **GitHub Repository**: [QueryPal Source](https://github.com/ChingEnLin/QueryPal) -- **Issues & Feedback**: [GitHub Issues](https://github.com/ChingEnLin/QueryPal/issues) - +Built by [Ching-En Lin](https://github.com/ChingEnLin) diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md new file mode 100644 index 0000000..e3f147e --- /dev/null +++ b/docs/ARCHITECTURE.md @@ -0,0 +1,91 @@ +# Architecture + +## Backend-for-Frontend (BFF) Pattern + +QueryPal uses a BFF architecture where the FastAPI backend is the sole actor that touches Azure and database credentials. The browser only ever holds an MSAL access token. + +``` +┌─────────────────────┐ Auth ┌─────────────────────┐ +│ React Frontend ├───────────────►│ Microsoft Entra │ +│ (SPA + MSAL.js) │◄───────────────┤ Identity Platform │ +└─────────────────────┘ Access Token └─────────────────────┘ + │ + ▼ Bearer Token +┌─────────────────────┐ +│ FastAPI Backend │ +│ • Token Validation │ +│ • OBO Exchange │ +│ • Query Processing │ +│ • AI Integration │ +│ • Document CRUD │ +└─────────────────────┘ + │ + ├──────────────────────────────────────┐ + ▼ ▼ +┌─────────────────────┐ ┌─────────────────────┐ +│ Google Gemini API │ │ Azure Cosmos DB │ +│ • NL2Query │ │ • Document Storage │ +│ • Data Analysis │ │ • MongoDB API │ +└─────────────────────┘ └─────────────────────┘ + │ + ▼ +┌─────────────────────┐ +│ PostgreSQL DB │ +│ • User Queries │ +│ • Audit Logs │ +└─────────────────────┘ +``` + +### Authentication Flow + +1. Frontend acquires an access token from Microsoft Entra ID via MSAL (`api:///access_as_user` scope). +2. Frontend sends `Authorization: Bearer ` to the backend. +3. Backend performs an On-Behalf-Of (OBO) exchange to get an ARM-scoped token. +4. Backend uses the ARM token to fetch Cosmos DB accounts and connection strings from Azure Resource Manager. + +--- + +## ReAct Agent Loop + +QueryPal uses a LangGraph-based ReAct agent to autonomously generate, sandbox-test, and evaluate MongoDB queries. Write operations are detected via AST (not regex) and returned to the user for manual review rather than executed automatically. + +```mermaid +graph TD + A[User Intent] -->|Natural Language| B(Generate Query) + B --> C{Write Operation?} + C -- Yes --> D[Return Query to User] + D --> E((Manual Review & Run)) + E --> F[Execute Write] + F --> G[Log to Audit Database] + + C -- No (Read Only) --> I(Sandbox Execution) + I --> J{Evaluate Result} + J -- Success --> K[Return Final Query & Data] + J -- Failure/Error --> B + + subgraph LangGraph Agent Loop + B + C + I + J + end +``` + +**Agent nodes:** `generate_query → execute_test → evaluate_result → (loop or end)` + +- Max iterations enforced server-side via `max_iterations` in `QueryPrompt` (default 3, max 10). +- Sandbox scope is locked to `{"db": db, "ObjectId": ObjectId}` — no arbitrary code execution. +- Write operations (`insert_one`, `update_*`, `delete_*`, etc.) are AST-detected and never executed in the sandbox. + +--- + +## Security Model + +| Layer | Mechanism | +|---|---| +| Browser → Backend | MSAL Bearer token, validated on every request | +| Backend → Azure | OBO token exchange, never stored | +| Backend → Cosmos DB | ARM-fetched connection string, scoped per request | +| Secrets at rest | GCP Secret Manager, mounted at container startup | +| Backend network | `--ingress=internal` — unreachable from public internet | +| Frontend → Backend | nginx proxy over VPC connector — backend URL never exposed to browser | diff --git a/docs/AZURE_SETUP.md b/docs/AZURE_SETUP.md new file mode 100644 index 0000000..9a6004d --- /dev/null +++ b/docs/AZURE_SETUP.md @@ -0,0 +1,71 @@ +# Azure Setup + +## 1. Entra ID App Registrations + +You need two app registrations: one for the frontend SPA and one for the backend confidential client. + +### Frontend (SPA) + +1. Go to Azure Portal → App Registrations → New registration +2. Name: `QueryPal Frontend`, Platform: Single-page application +3. Redirect URI: `http://localhost:5173` (dev) / your production URL +4. Note the **Application (client) ID** and **Directory (tenant) ID** + +### Backend (Confidential Client) + +1. New registration — Name: `QueryPal Backend`, Client type: Confidential client +2. Add a **client secret** under Certificates & secrets +3. Expose an API: + - Application ID URI: `api://` + - Add scope: `access_as_user` + - Add the frontend app as an authorized client application + +### API Permissions (both apps) + +- `Microsoft Graph` → `User.Read` +- `Azure Service Management` → `user_impersonation` +- Grant admin consent for your organization + +--- + +## 2. Cosmos DB Access + +Grant the backend application read access to your Cosmos DB account: + +1. Go to your Cosmos DB account → Access control (IAM) +2. Add role assignment: + - Role: `Cosmos DB Account Reader Role` + - Assign to: your backend app registration (service principal) + +--- + +## 3. Frontend Configuration + +Update `frontend/authConfig.ts`: + +```typescript +export const msalConfig = { + auth: { + clientId: "your-frontend-client-id", + authority: "https://login.microsoftonline.com/your-tenant-id", + redirectUri: "http://localhost:5173", + }, +}; + +export const loginRequest = { + scopes: ["User.Read", "api://your-backend-client-id/access_as_user"], +}; +``` + +--- + +## 4. Backend Environment Variables + +```env +AZURE_TENANT_ID=your_tenant_id +AZURE_CLIENT_ID=your_backend_client_id +AZURE_CLIENT_SECRET=your_client_secret +ARM_SCOPE=https://management.azure.com/.default +``` + +In production these are sourced from GCP Secret Manager — see [INFRASTRUCTURE.md](INFRASTRUCTURE.md#secret-management). diff --git a/docs/DEVELOPMENT.md b/docs/DEVELOPMENT.md new file mode 100644 index 0000000..c2b3c19 --- /dev/null +++ b/docs/DEVELOPMENT.md @@ -0,0 +1,96 @@ +# Development Guide + +## Prerequisites + +- Node.js 20+ +- Python 3.12+ +- Docker & Docker Compose +- Google Cloud SDK (for deployment) + +--- + +## Running Locally + +### Docker Compose (recommended) + +```bash +cp backend/.env.example backend/.env +# Fill in backend/.env with your credentials +docker-compose up --build +# Frontend: http://localhost:5173 +# Backend: http://localhost:8000 +# API docs: http://localhost:8000/docs +``` + +### Without Azure (mock mode) + +Set `USE_MSAL_AUTH = false` in `frontend/app.config.ts`. All `dbService.ts` calls return mock data without hitting the backend or Azure. + +### Manual (hot reload) + +```bash +# Backend +cd backend +python -m venv venv && source venv/bin/activate +pip install -r requirements.txt +cp .env.example .env +uvicorn main:app --reload + +# Frontend (separate terminal) +cd frontend +npm install +npm run dev +``` + +### Backend `.env` + +```env +GEMINI_API_KEY= +AZURE_TENANT_ID= +AZURE_CLIENT_ID= +AZURE_CLIENT_SECRET= +ARM_SCOPE=https://management.azure.com/.default +DB_USER= +DB_PASS= +DB_NAME=querypal +DB_HOST=localhost +DB_PORT=5432 +# DB_UNIX_SOCKET=/cloudsql/project:region:instance # Cloud SQL only +``` + +--- + +## Testing + +### Backend + +```bash +cd backend +pytest --cov=. --cov-report=term-missing # with coverage +PYTHONPATH=. pytest tests/test_query_routes.py -v # single file +make lint # flake8 +make format # black --check +make format-fix # black (auto-fix) +make typecheck # mypy +make all # install + lint + format + test +``` + +### Frontend + +```bash +cd frontend +npm test # watch mode +npm run test:run # CI mode (run once) +npm run test:coverage # with coverage report +npm run test:ui # interactive Vitest UI +``` + +--- + +## Code Style + +- **Python**: Black (formatting), Flake8 (linting), MyPy (permissive type checking) +- **TypeScript**: ESLint, TypeScript strict mode +- **CSS**: No Tailwind — use CSS tokens from `frontend/src/design-tokens.css` and inline `style` props. See [DESIGN_HANDBOOK.md](../DESIGN_HANDBOOK.md). +- **Icons**: Inline SVGs only (`stroke="currentColor"`), no icon libraries. +- **Comments**: Only when the *why* is non-obvious. No docstrings or block comments explaining what the code does. diff --git a/docs/INFRASTRUCTURE.md b/docs/INFRASTRUCTURE.md new file mode 100644 index 0000000..09b74f3 --- /dev/null +++ b/docs/INFRASTRUCTURE.md @@ -0,0 +1,133 @@ +# Infrastructure & Deployment + +## Production Topology + +The frontend nginx container is the only public entry point. The backend is network-isolated and unreachable from the internet — all browser traffic goes to `/api/*` on the frontend's origin, which nginx proxies internally through the VPC connector. + +```mermaid +graph TB + Browser(["👤 Browser"]) + + subgraph gcp["☁️ Google Cloud Platform — europe-west1"] + subgraph cloudrun["Cloud Run"] + direction TB + Frontend["querypal-frontend\ningress: public\nnginx · serves SPA\nproxies /api/* → backend"] + Backend["querypal-backend\ningress: internal only\nFastAPI · Uvicorn\n❌ not reachable from internet"] + end + + subgraph vpc["VPC Network"] + Connector["Serverless VPC\nAccess Connector\n10.8.0.0/28"] + end + + SM[("Secret Manager\n6 secrets")] + SQL[("Cloud SQL\nPostgreSQL")] + GCR["Container Registry"] + SA["Cloud Run SA\nleast-privilege"] + end + + subgraph azure["☁️ Microsoft Azure"] + Entra["Entra ID\nMSAL · OBO flow"] + Cosmos[("Cosmos DB\nMongoDB API")] + end + + Gemini["Google Gemini"] + + Browser -- "HTTPS" --> Frontend + Frontend -. "vpc-egress: all-traffic" .-> Connector + Connector -- "internal ingress" --> Backend + Backend -- "Cloud SQL Proxy / unix socket" --> SQL + Backend -- "HTTPS" --> Entra + Backend -- "HTTPS" --> Cosmos + Backend -- "HTTPS" --> Gemini + SM -- "mounted at startup via --set-secrets" --> Backend + SA -. "identity" .-> Frontend + SA -. "identity" .-> Backend +``` + +## Network Security + +| | Frontend | Backend | +|---|---|---| +| Cloud Run ingress | `all` (public) | `internal` (VPC only) | +| VPC egress | `all-traffic` | `private-ranges-only` | +| Internet accessible | Yes | No — 403 from GFE | +| Who can call it | Anyone | Frontend nginx via VPC connector | + +--- + +## Secret Management + +All sensitive configuration lives in **GCP Secret Manager** and is mounted into the backend container at startup via `--set-secrets`. Secrets are never passed as plain environment variables and never appear in `gcloud run describe` output. + +| Secret ID | Description | +|---|---| +| `querypal-azure-tenant-id` | Microsoft Entra ID tenant | +| `querypal-azure-client-id` | Backend app registration client ID | +| `querypal-azure-client-secret` | Backend app registration client secret | +| `querypal-gemini-api-key` | Google Gemini API key | +| `querypal-db-user` | Cloud SQL PostgreSQL username | +| `querypal-db-pass` | Cloud SQL PostgreSQL password | + +--- + +## Terraform + +Cloud infrastructure is managed by Terraform in `terraform/`. The CI pipeline owns image builds and Cloud Run deployments; Terraform owns everything underneath. + +| Resource | Owner | +|---|---| +| VPC connector | Terraform | +| Secret Manager secrets | Terraform | +| Cloud Run service account + IAM | Terraform | +| Cloud SQL instance & database | Terraform (imported existing) | +| Cloud Run services | CI pipeline | +| Docker images | CI pipeline | + +### First-time setup + +```bash +cd terraform +cp terraform.tfvars.example terraform.tfvars +terraform init +./import.sh # import existing Cloud SQL — no data migration needed +terraform apply +``` + +After apply, populate Secret Manager before triggering any deployment: + +```bash +for SECRET_ID in querypal-azure-tenant-id querypal-azure-client-id querypal-azure-client-secret querypal-gemini-api-key querypal-db-user querypal-db-pass; do + echo -n "Enter value for ${SECRET_ID}: " + read -rs VALUE && echo + echo -n "${VALUE}" | gcloud secrets versions add "${SECRET_ID}" --data-file=- +done +``` + +--- + +## CI/CD Pipeline + +Pushes to the `production` branch (or manual `workflow_dispatch`) trigger `.github/workflows/google-cloudrun-docker.yml`. + +```mermaid +flowchart LR + Push(["push to production"]) --> Auth + + subgraph gha["GitHub Actions"] + Auth["Authenticate\nWorkload Identity Federation"] + Auth --> BuildBE["Build & push\nbackend image"] + Auth --> BuildFE["Build & push\nfrontend image"] + BuildBE --> DeployBE["Deploy backend\n--ingress=internal\n--set-secrets\n--vpc-connector"] + BuildFE --> DeployFE + DeployBE -- "backend URL" --> DeployFE["Deploy frontend\nBACKEND_URL=internal URL\n--vpc-connector"] + end + + subgraph gcp["GCP"] + SM[("Secret Manager\nfetch at startup")] + end + + DeployBE --> SM + DeployFE --> Done(["✅ Live"]) +``` + +Authentication uses Workload Identity Federation — no long-lived service account keys are stored in GitHub. The `querypal-cloudrun-sa` service account holds only the permissions it needs: `secretmanager.secretAccessor`, `cloudsql.client`, and `vpcaccess.user`. diff --git a/terraform/database.tf b/terraform/database.tf index 8d20751..028dde8 100644 --- a/terraform/database.tf +++ b/terraform/database.tf @@ -10,9 +10,9 @@ resource "google_sql_database_instance" "querypal_db" { tier = "db-f1-micro" backup_configuration { - enabled = true - start_time = "02:00" - point_in_time_recovery_enabled = true + enabled = false + start_time = "03:00" + point_in_time_recovery_enabled = false transaction_log_retention_days = 7 }