From ed4b527b48050213198585935e69164745eb7559 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 16 Apr 2026 13:15:37 -0400 Subject: [PATCH 1/9] chore(deps): bump dompurify from 3.3.3 to 3.4.0 in /src/ui (#285) Bumps [dompurify](https://github.com/cure53/DOMPurify) from 3.3.3 to 3.4.0. - [Release notes](https://github.com/cure53/DOMPurify/releases) - [Commits](https://github.com/cure53/DOMPurify/compare/3.3.3...3.4.0) --- updated-dependencies: - dependency-name: dompurify dependency-version: 3.4.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- src/ui/package-lock.json | 8 ++++---- src/ui/package.json | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ui/package-lock.json b/src/ui/package-lock.json index b6c5d9a2e..595e7681e 100644 --- a/src/ui/package-lock.json +++ b/src/ui/package-lock.json @@ -37,7 +37,7 @@ "axios": "^1.12.1", "brace-expansion": "^2.0.3", "chart.js": "^4.5.0", - "dompurify": "^3.3.2", + "dompurify": "^3.4.0", "form-data": "^4.0.4", "immer": "^10.1.3", "js-yaml": "^4.1.1", @@ -12108,9 +12108,9 @@ } }, "node_modules/dompurify": { - "version": "3.3.3", - "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.3.3.tgz", - "integrity": "sha512-Oj6pzI2+RqBfFG+qOaOLbFXLQ90ARpcGG6UePL82bJLtdsa6CYJD7nmiU8MW9nQNOtCHV3lZ/Bzq1X0QYbBZCA==", + "version": "3.4.0", + "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.4.0.tgz", + "integrity": "sha512-nolgK9JcaUXMSmW+j1yaSvaEaoXYHwWyGJlkoCTghc97KgGDDSnpoU/PlEnw63Ah+TGKFOyY+X5LnxaWbCSfXg==", "license": "(MPL-2.0 OR Apache-2.0)", "optionalDependencies": { "@types/trusted-types": "^2.0.7" diff --git a/src/ui/package.json b/src/ui/package.json index 6c2a577eb..9e107ce50 100644 --- a/src/ui/package.json +++ b/src/ui/package.json @@ -59,7 +59,7 @@ "uuid": "^11.1.0", "vuera": "^0.2.7", "xlsx": "https://cdn.sheetjs.com/xlsx-0.20.2/xlsx-0.20.2.tgz", - "dompurify": "^3.3.2" + "dompurify": "^3.4.0" }, "scripts": { "dev": "vite", From 198a49021c5dff02570ae7ed76f1b61348ae607e Mon Sep 17 00:00:00 2001 From: Bob Strahan Date: Thu, 16 Apr 2026 13:16:22 -0400 Subject: [PATCH 2/9] Revert "chore(deps): bump dompurify from 3.3.3 to 3.4.0 in /src/ui (#285)" (#287) This reverts commit ed4b527b48050213198585935e69164745eb7559. --- src/ui/package-lock.json | 8 ++++---- src/ui/package.json | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ui/package-lock.json b/src/ui/package-lock.json index 595e7681e..b6c5d9a2e 100644 --- a/src/ui/package-lock.json +++ b/src/ui/package-lock.json @@ -37,7 +37,7 @@ "axios": "^1.12.1", "brace-expansion": "^2.0.3", "chart.js": "^4.5.0", - "dompurify": "^3.4.0", + "dompurify": "^3.3.2", "form-data": "^4.0.4", "immer": "^10.1.3", "js-yaml": "^4.1.1", @@ -12108,9 +12108,9 @@ } }, "node_modules/dompurify": { - "version": "3.4.0", - "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.4.0.tgz", - "integrity": "sha512-nolgK9JcaUXMSmW+j1yaSvaEaoXYHwWyGJlkoCTghc97KgGDDSnpoU/PlEnw63Ah+TGKFOyY+X5LnxaWbCSfXg==", + "version": "3.3.3", + "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.3.3.tgz", + "integrity": "sha512-Oj6pzI2+RqBfFG+qOaOLbFXLQ90ARpcGG6UePL82bJLtdsa6CYJD7nmiU8MW9nQNOtCHV3lZ/Bzq1X0QYbBZCA==", "license": "(MPL-2.0 OR Apache-2.0)", "optionalDependencies": { "@types/trusted-types": "^2.0.7" diff --git a/src/ui/package.json b/src/ui/package.json index 9e107ce50..6c2a577eb 100644 --- a/src/ui/package.json +++ b/src/ui/package.json @@ -59,7 +59,7 @@ "uuid": "^11.1.0", "vuera": "^0.2.7", "xlsx": "https://cdn.sheetjs.com/xlsx-0.20.2/xlsx-0.20.2.tgz", - "dompurify": "^3.4.0" + "dompurify": "^3.3.2" }, "scripts": { "dev": "vite", From 4adcecb7c1f493b6c1d2978289c38eec85075718 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CSirirat?= <“siriratk@amazon.com”> Date: Mon, 20 Apr 2026 14:19:31 -0400 Subject: [PATCH 3/9] feat: add private network / air-gapped CodeBuild support for internal registries - Dockerfile.optimized: parameterize FROM sources via ARG UV_IMAGE, ARG LAMBDA_BASE_IMAGE; add ARG/ENV UV_INDEX_URL for internal PyPI mirror - patterns/unified/buildspec.yml: add air-gapped pre_build logic for UV_IMAGE/LAMBDA_BASE_IMAGE fallback, Artifactory Docker login via Secrets Manager (credentials never hardcoded), UV_INDEX_URL passthrough to docker buildx --build-arg - patterns/unified/template.yaml: add 5 new CFN parameters (UvImage, LambdaBaseImage, UvIndexUrl, ArtifactoryDockerUrl, ArtifactoryCredentialsSecretArn), corresponding Conditions, CodeBuild EnvironmentVariables, and conditional IAM Secrets Manager grant on DockerBuildRole (scoped to exact secret ARN) - template.yaml: add matching 5 top-level CFN parameters and pass them through to PATTERNSTACK nested stack (fixes parameter passthrough gap); add Artifactory npm registry support in UICodeBuildProject BuildSpec with dual auth format (token + basic auth) via Secrets Manager - scripts/setup-airgapped-codebuild.sh: new helper script to pull public images, re-tag, and push to customer ECR; outputs ready-to-paste idp-cli deploy --parameters string - docs/artifactory-dependency-workaround.md: comprehensive guide for Artifactory/internal registry dependency resolution (Options 1-4) Fixes critical gap: air-gapped params were not passed from parent template.yaml to PATTERNSTACK, making idp-cli deploy --parameters UvImage=... silently ignored. Closes: private network CodeBuild support feature --- Dockerfile.optimized | 24 +- docs/artifactory-dependency-workaround.md | 587 ++++++++++++++++++++++ patterns/unified/buildspec.yml | 58 +++ patterns/unified/template.yaml | 81 +++ scripts/setup-airgapped-codebuild.sh | 236 +++++++++ template.yaml | 56 +++ 6 files changed, 1039 insertions(+), 3 deletions(-) create mode 100644 docs/artifactory-dependency-workaround.md create mode 100755 scripts/setup-airgapped-codebuild.sh diff --git a/Dockerfile.optimized b/Dockerfile.optimized index 06667b3fc..c64e2c6a7 100644 --- a/Dockerfile.optimized +++ b/Dockerfile.optimized @@ -4,11 +4,22 @@ # checkov:skip=CKV_DOCKER_3: "The Dockerfile uses the official AWS Lambda Python base image (public.ecr.aws/lambda/python:3.12-arm64), which already configures the appropriate non-root user for Lambda execution" # checkov:skip=CKV_DOCKER_2: "The Dockerfile.optimized is specifically designed for AWS Lambda container images, which don't use Docker HEALTHCHECK instructions." +# ── Air-gapped / internal registry support ──────────────────────────────────── +# Override these ARGs to use images from an internal registry instead of public ones. +# Example (air-gapped): +# docker build \ +# --build-arg UV_IMAGE=123456789.dkr.ecr.us-east-1.amazonaws.com/idp-base:uv-0.9.6 \ +# --build-arg LAMBDA_BASE_IMAGE=123456789.dkr.ecr.us-east-1.amazonaws.com/idp-base:lambda-python-3.12-arm64 \ +# -f Dockerfile.optimized . +# ───────────────────────────────────────────────────────────────────────────── +ARG UV_IMAGE=ghcr.io/astral-sh/uv:0.9.6 +ARG LAMBDA_BASE_IMAGE=public.ecr.aws/lambda/python:3.12-arm64 + # Use specific version to avoid network issues -FROM ghcr.io/astral-sh/uv:0.9.6 AS uv +FROM ${UV_IMAGE} AS uv # Builder stage - bundle dependencies into Lambda task root -FROM public.ecr.aws/lambda/python:3.12-arm64 AS builder +FROM ${LAMBDA_BASE_IMAGE} AS builder # Enable bytecode compilation to improve cold-start performance ENV UV_COMPILE_BYTECODE=1 @@ -19,6 +30,11 @@ ENV UV_NO_INSTALLER_METADATA=1 # Enable copy mode to support bind mount caching ENV UV_LINK_MODE=copy +# Air-gapped PyPI mirror support: pass --build-arg UV_INDEX_URL=https://your-artifactory/pypi/simple/ +# When set, uv will use this instead of pypi.org to install Lambda requirements.txt packages. +ARG UV_INDEX_URL="" +ENV UV_INDEX_URL=${UV_INDEX_URL} + # Build argument for function path ARG FUNCTION_PATH ARG INSTALL_IDP_COMMON=true @@ -43,7 +59,9 @@ RUN --mount=from=uv,source=/uv,target=/bin/uv \ rm -rf /tmp/idp_common_pkg # Final stage - minimal runtime -FROM public.ecr.aws/lambda/python:3.12-arm64 +# LAMBDA_BASE_IMAGE ARG must be re-declared after each FROM (Docker scoping rule) +ARG LAMBDA_BASE_IMAGE=public.ecr.aws/lambda/python:3.12-arm64 +FROM ${LAMBDA_BASE_IMAGE} # Conditionally install git (required for mlflow/gitpython) ARG INSTALL_GIT=false diff --git a/docs/artifactory-dependency-workaround.md b/docs/artifactory-dependency-workaround.md new file mode 100644 index 000000000..c2aa1ef1c --- /dev/null +++ b/docs/artifactory-dependency-workaround.md @@ -0,0 +1,587 @@ +--- +title: "Artifactory Dependency Workaround Guide" +--- + +# Artifactory Dependency Workaround Guide + +**Document Purpose:** This guide describes how to resolve dependency download failures when Artifactory is used as the only package registry and public registries (PyPI, npm, Docker Hub, GitHub Container Registry) are blocked. + +--- + +## Table of Contents + +1. [Overview & Root Cause](#1-overview--root-cause) +2. [Complete Dependency Inventory](#2-complete-dependency-inventory) +3. [Option 1 — Configure Artifactory as a Remote Proxy (Recommended)](#3-option-1--configure-artifactory-as-a-remote-proxy-recommended) +4. [Option 2 — Bridge Machine: Download & Upload Missing Packages](#4-option-2--bridge-machine-download--upload-missing-packages) +5. [Option 3 — Vendor Dependencies into the Repository (Fully Air-Gapped)](#5-option-3--vendor-dependencies-into-the-repository-fully-air-gapped) +6. [Option 4 — Point Build Tools to Artifactory Explicitly](#6-option-4--point-build-tools-to-artifactory-explicitly) +7. [Decision Guide](#7-decision-guide) +8. [Quick Reference: Copy-Paste Commands](#8-quick-reference-copy-paste-commands) + +--- + +## 1. Overview & Root Cause + +### What Is Happening + +This project downloads dependencies from public registries during builds: + +| Tool | Registry | Used for | +|------|----------|----------| +| `pip` / `uv` | `https://pypi.org/simple/` | Python packages | +| `npm` | `https://registry.npmjs.org/` | JavaScript / UI packages | +| `docker pull` | `ghcr.io` (GitHub Container Registry) | `uv` build tool image | +| `docker pull` | `public.ecr.aws` | AWS Lambda Python base image | + +When **Artifactory is the only allowed registry** and packages are not in its cache, installations fail with errors such as: + +``` +ERROR: Could not find a version that satisfies the requirement strands-agents==1.14.0 +ERROR: No matching distribution found for bedrock-agentcore>=0.1.1 +npm ERR! code E404 - Not Found +``` + +### Why Packages Are Missing from Artifactory + +Artifactory caches packages **on first request**. Packages that have never been requested, or were added after the last cache refresh, will be missing. The solution is one of: + +- Enable Artifactory to proxy public registries (preferred) +- Manually upload the missing packages to Artifactory +- Vendor the packages directly in the repository + +--- + +## 2. Complete Dependency Inventory + +### 2.1 Python Dependencies + +#### Core Library (`lib/idp_common_pkg/pyproject.toml`) + +``` +boto3==1.42.0 +jsonschema>=4.25.1 +pydantic>=2.12.0 +deepdiff>=6.0.0 +PyYAML>=6.0.0 +Pillow==12.1.1 +pypdfium2>=5.5.0 +amazon-textract-textractor[pandas]==1.9.2 +numpy==1.26.4 +pandas==2.2.3 +openpyxl==3.1.5 +python-docx==1.2.0 +strands-agents==1.14.0 +strands-agents-tools==0.2.22 +bedrock-agentcore>=0.1.1 +stickler-eval==0.1.4 +genson==1.3.0 +munkres>=1.1.4 +requests==2.33.0 +pyarrow==20.0.0 +aws-lambda-powertools>=3.2.0 +jsonpatch==1.33 +email-validator>=2.3.0 +tabulate>=0.9.0 +datamodel-code-generator>=0.25.0 +mypy-boto3-bedrock-runtime>=1.39.0 +ruamel-yaml>=0.17.0,<0.19.0 +aws-xray-sdk>=2.14.0 +genson==1.3.0 +``` + +#### Lambda Function Dependencies (`src/lambda/*/requirements.txt`) + +``` +huggingface-hub==0.20.0 +cfnresponse +crhelper~=2.0.10 +aws-requests-auth==0.4.3 +bedrock_agentcore_starter_toolkit +urllib3>=1.26.0 +pypdf>=4.0.0 +``` + +#### Development / Test Dependencies + +``` +pytest>=7.4.0 +pytest-cov>=4.1.0 +pytest-xdist>=3.3.1 +pytest-asyncio>=1.1.0 +pytest-mock>=3.11.1 +moto[s3]==5.1.8 +ruff>=0.14.0 +typer>=0.19.2 +rich>=13.0.0 +cfn-lint +basedpyright +build==1.3.0 +python-dotenv>=1.1.0 +``` + +### 2.2 Node.js (npm) Dependencies + +Located in `src/ui/package.json` and `docs-site/package.json`. + +**Key packages include:** +- `react`, `react-dom` +- `@aws-amplify/ui-react` +- `@aws-appsync/gql` +- AWS AppSync codegen libraries +- `astro` (docs site) + +To get the full list: +```bash +cat src/ui/package.json | jq '.dependencies, .devDependencies' +cat docs-site/package.json | jq '.dependencies, .devDependencies' +``` + +### 2.3 Docker Base Images + +| Image | Registry | Purpose | +|-------|----------|---------| +| `ghcr.io/astral-sh/uv:0.9.6` | GitHub Container Registry | `uv` Python package installer (multi-stage build) | +| `public.ecr.aws/lambda/python:3.12-arm64` | AWS Public ECR | Lambda function runtime base image | + +--- + +## 3. Option 1 — Configure Artifactory as a Remote Proxy *(Recommended)* + +**Best for:** Long-term fix. All future installs work transparently. No code changes required. + +**Who performs this:** Your Artifactory administrator. + +### Steps for Artifactory Admin + +#### A. Add PyPI Remote Repository + +1. Log into Artifactory → **Administration** → **Repositories** → **Remote** +2. Click **New Remote Repository** +3. Set: + - **Package Type:** `PyPI` + - **Repository Key:** `pypi-remote` (or any name) + - **URL:** `https://pypi.org/` +4. Save + +#### B. Add npm Remote Repository + +1. Click **New Remote Repository** +2. Set: + - **Package Type:** `npm` + - **Repository Key:** `npm-remote` + - **URL:** `https://registry.npmjs.org` +3. Save + +#### C. Add Docker Remote Repositories + +For `ghcr.io` (GitHub Container Registry): +1. Click **New Remote Repository** +2. Set: + - **Package Type:** `Docker` + - **Repository Key:** `ghcr-remote` + - **URL:** `https://ghcr.io` +3. Save + +For AWS Public ECR (`public.ecr.aws`): +1. Click **New Remote Repository** +2. Set: + - **Package Type:** `Docker` + - **Repository Key:** `ecr-public-remote` + - **URL:** `https://public.ecr.aws` +3. Save + +#### D. Create Virtual Repositories (Aggregate local + remote) + +Create virtual repositories that front your local + remote repos for seamless access: +- `pypi-virtual` → includes `pypi-local` + `pypi-remote` +- `npm-virtual` → includes `npm-local` + `npm-remote` +- `docker-virtual` → includes `docker-local` + `ghcr-remote` + `ecr-public-remote` + +### Developer Configuration (after admin sets up proxy) + +```bash +# Set pip to use Artifactory +export PIP_INDEX_URL=https://your-artifactory.company.com/artifactory/api/pypi/pypi-virtual/simple/ +export PIP_TRUSTED_HOST=your-artifactory.company.com + +# Set uv to use Artifactory +export UV_INDEX_URL=https://your-artifactory.company.com/artifactory/api/pypi/pypi-virtual/simple/ + +# Set npm to use Artifactory +npm config set registry https://your-artifactory.company.com/artifactory/api/npm/npm-virtual/ + +# Then run setup as normal +make setup-venv +``` + +--- + +## 4. Option 2 — Bridge Machine: Download & Upload Missing Packages + +**Best for:** When you cannot change Artifactory config but have a machine that can reach the internet AND Artifactory. + +```mermaid +flowchart LR + Internet[Public Internet\nPyPI / npm / Docker Hub] + Bridge[Bridge Machine\ninternet + Artifactory access] + AF[Artifactory\ninternal only] + Dev[Developer Machine\nArtifactory only] + + Internet -->|1. Download packages| Bridge + Bridge -->|2. Upload to Artifactory| AF + AF -->|3. Install packages| Dev +``` + +### Python Packages + +**On the bridge machine (internet access):** + +```bash +# Create a directory for wheels +mkdir -p ./wheel-cache + +# Download all Python dependencies as wheel files +# For Linux ARM64 (used by Lambda container images) +pip download \ + --platform manylinux2014_aarch64 \ + --python-version 312 \ + --only-binary=:all: \ + -d ./wheel-cache \ + "boto3==1.42.0" \ + "strands-agents==1.14.0" \ + "strands-agents-tools==0.2.22" \ + "bedrock-agentcore>=0.1.1" \ + "stickler-eval==0.1.4" \ + "Pillow==12.1.1" \ + "pypdfium2>=5.5.0" \ + "pyarrow==20.0.0" \ + "numpy==1.26.4" \ + "huggingface-hub==0.20.0" \ + "cfnresponse" \ + "crhelper~=2.0.10" \ + "aws-requests-auth==0.4.3" \ + "bedrock_agentcore_starter_toolkit" + +# Download remaining packages for local dev (your OS/arch) +pip download \ + -d ./wheel-cache-local \ + -e "lib/idp_common_pkg[all,dev,test]" \ + -e lib/idp_cli_pkg \ + -e lib/idp_sdk \ + -e lib/idp_mcp_connector_pkg +``` + +**Upload to Artifactory via REST API:** + +```bash +ARTIFACTORY_URL="https://your-artifactory.company.com/artifactory" +REPO="pypi-local" +AF_USER="your-username" +AF_PASSWORD="your-password-or-api-key" + +for whl in ./wheel-cache/*.whl ./wheel-cache/*.tar.gz; do + filename=$(basename "$whl") + echo "Uploading $filename ..." + curl -u "${AF_USER}:${AF_PASSWORD}" \ + -T "$whl" \ + "${ARTIFACTORY_URL}/${REPO}/${filename}" +done +``` + +**Or upload via Artifactory Web UI:** +1. Navigate to **Artifactory** → **Artifacts** +2. Select your `pypi-local` repository +3. Click **Deploy** → Upload `.whl` files from `./wheel-cache/` + +### Docker Images + +```bash +# Pull from public registries +docker pull ghcr.io/astral-sh/uv:0.9.6 +docker pull public.ecr.aws/lambda/python:3.12-arm64 + +# Re-tag for your Artifactory Docker registry +docker tag ghcr.io/astral-sh/uv:0.9.6 \ + your-artifactory.company.com/docker-local/astral-sh/uv:0.9.6 + +docker tag public.ecr.aws/lambda/python:3.12-arm64 \ + your-artifactory.company.com/docker-local/lambda/python:3.12-arm64 + +# Push to Artifactory +docker login your-artifactory.company.com +docker push your-artifactory.company.com/docker-local/astral-sh/uv:0.9.6 +docker push your-artifactory.company.com/docker-local/lambda/python:3.12-arm64 +``` + +Then update `Dockerfile.optimized` lines 1 and 6: +```dockerfile +# Line 1 - change FROM +FROM your-artifactory.company.com/docker-local/astral-sh/uv:0.9.6 AS uv + +# Line 6 - change FROM +FROM your-artifactory.company.com/docker-local/lambda/python:3.12-arm64 AS builder +``` + +--- + +## 5. Option 3 — Vendor Dependencies into the Repository (Fully Air-Gapped) + +**Best for:** Completely air-gapped environments with no internet access whatsoever. + +This involves downloading all packages **once** on an internet-connected machine and committing them to the repository, so no registry is needed at install time. + +### Setup (on a machine with internet access) + +```bash +# Create vendor directories +mkdir -p vendor/python vendor/npm + +# Download all Python wheels for local development +pip download \ + -d vendor/python \ + "boto3==1.42.0" \ + "jsonschema>=4.25.1" \ + "pydantic>=2.12.0" \ + "deepdiff>=6.0.0" \ + "PyYAML>=6.0.0" \ + "Pillow==12.1.1" \ + "pypdfium2>=5.5.0" \ + "strands-agents==1.14.0" \ + "strands-agents-tools==0.2.22" \ + "bedrock-agentcore>=0.1.1" \ + "stickler-eval==0.1.4" \ + "numpy==1.26.4" \ + "pandas==2.2.3" \ + "pyarrow==20.0.0" \ + "requests==2.33.0" \ + "huggingface-hub==0.20.0" \ + "cfnresponse" \ + "crhelper~=2.0.10" \ + "aws-requests-auth==0.4.3" \ + "pytest>=7.4.0" \ + "moto[s3]==5.1.8" \ + "ruff>=0.14.0" \ + "typer>=0.19.2" \ + "rich>=13.0.0" + +# Pack npm dependencies +cd src/ui && npm pack --pack-destination ../../vendor/npm +cd ../../docs-site && npm pack --pack-destination ../vendor/npm +cd .. +``` + +### Install from Vendor Directory (no network needed) + +```bash +# Python +pip install --no-index --find-links vendor/python \ + -e "lib/idp_common_pkg[all,dev,test]" \ + -e lib/idp_cli_pkg \ + -e lib/idp_sdk \ + -e lib/idp_mcp_connector_pkg + +# npm (configure local registry) +cd src/ui && npm install --prefer-offline --cache ../../vendor/npm +``` + +### Add a Makefile Target for Vendored Install + +Add this to `Makefile`: + +```makefile +setup-vendored: ## Install from local vendor/ directory (no network required) + @echo "Installing from vendor directory (no-index mode)..." + $(PIP) install --no-index --find-links vendor/python \ + -e "lib/idp_common_pkg[all,dev,test]" \ + -e lib/idp_cli_pkg \ + -e lib/idp_sdk \ + -e lib/idp_mcp_connector_pkg + @echo -e "$(GREEN)✅ Vendored install complete!$(NC)" +``` + +### Add `vendor/` to `.gitignore` or commit it + +If committing to git (fully self-contained): +```bash +# Remove vendor/ from .gitignore if present +grep -v "^vendor/" .gitignore > .gitignore.tmp && mv .gitignore.tmp .gitignore + +# Commit +git add vendor/ +git commit -m "Add vendored dependencies for air-gapped deployment" +``` + +--- + +## 6. Option 4 — Point Build Tools to Artifactory Explicitly + +**Best for:** When Artifactory *does* have the packages but the build tools are not configured to use it (wrong index URL). + +### Configure pip + +Create or update `~/.pip/pip.conf` (macOS/Linux) or `%APPDATA%\pip\pip.ini` (Windows): + +```ini +[global] +index-url = https://your-artifactory.company.com/artifactory/api/pypi/pypi-virtual/simple/ +trusted-host = your-artifactory.company.com +``` + +Or use environment variables (temporary, no file changes): + +```bash +export PIP_INDEX_URL=https://your-artifactory.company.com/artifactory/api/pypi/pypi-virtual/simple/ +export PIP_TRUSTED_HOST=your-artifactory.company.com +``` + +### Configure uv + +`uv` (used in `Dockerfile.optimized` and optionally in CI) reads: + +```bash +export UV_INDEX_URL=https://your-artifactory.company.com/artifactory/api/pypi/pypi-virtual/simple/ +``` + +Or create `~/.config/uv/uv.toml`: + +```toml +[pip] +index-url = "https://your-artifactory.company.com/artifactory/api/pypi/pypi-virtual/simple/" +``` + +### Configure npm + +```bash +# Set globally +npm config set registry https://your-artifactory.company.com/artifactory/api/npm/npm-virtual/ + +# OR create a project-level .npmrc file +echo "registry=https://your-artifactory.company.com/artifactory/api/npm/npm-virtual/" \ + > src/ui/.npmrc +echo "registry=https://your-artifactory.company.com/artifactory/api/npm/npm-virtual/" \ + > docs-site/.npmrc +``` + +### Configure Docker + +```bash +# Configure Docker to use Artifactory as a mirror +# Edit /etc/docker/daemon.json (Linux) or Docker Desktop settings: +{ + "registry-mirrors": [ + "https://your-artifactory.company.com/artifactory/docker-virtual" + ] +} +``` + +--- + +## 7. Decision Guide + +```mermaid +flowchart TD + A[Artifactory: packages failing] --> B{Can Artifactory admin\nadd remote proxy repos?} + + B -->|Yes| C[✅ Option 1\nConfigure Artifactory\nas remote proxy\nBest long-term fix] + + B -->|No| D{Is there a bridge machine\nwith internet AND\nArtifactory access?} + + D -->|Yes| E[✅ Option 2\nDownload packages\non bridge machine\nUpload to Artifactory] + + D -->|No| F{Are packages in Artifactory\nbut URL is wrong?} + + F -->|Yes| G[✅ Option 4\nPoint pip/npm/uv\nexplicitly to Artifactory URL] + + F -->|No / Fully air-gapped| H[✅ Option 3\nVendor dependencies\ninto git repository] + + style C fill:#90EE90 + style E fill:#90EE90 + style G fill:#90EE90 + style H fill:#90EE90 +``` + +--- + +## 8. Quick Reference: Copy-Paste Commands + +### Identify Missing Packages (run this first) + +```bash +# Capture all errors during setup to identify exactly which packages are failing +make setup-venv 2>&1 | tee /tmp/setup-errors.txt +grep -E "ERROR|Could not find|No matching|WARN" /tmp/setup-errors.txt +``` + +### Option 1 — Temporary environment variables to use Artifactory + +```bash +# Replace with your actual Artifactory URL +ARTIFACTORY_URL="https://your-artifactory.company.com/artifactory" + +export PIP_INDEX_URL="${ARTIFACTORY_URL}/api/pypi/pypi-virtual/simple/" +export PIP_TRUSTED_HOST="your-artifactory.company.com" +export UV_INDEX_URL="${ARTIFACTORY_URL}/api/pypi/pypi-virtual/simple/" +npm config set registry "${ARTIFACTORY_URL}/api/npm/npm-virtual/" + +make setup-venv +``` + +### Option 2 — Download + Upload specific missing package + +```bash +# Replace package name and version as needed +PACKAGE="strands-agents==1.14.0" +ARTIFACTORY_URL="https://your-artifactory.company.com/artifactory" +REPO="pypi-local" +AF_CREDS="username:api-key" + +# Download +pip download -d /tmp/pkg "$PACKAGE" + +# Upload +for f in /tmp/pkg/*.whl /tmp/pkg/*.tar.gz; do + curl -u "$AF_CREDS" -T "$f" "${ARTIFACTORY_URL}/${REPO}/$(basename $f)" +done +``` + +### Option 3 — Install from vendor directory + +```bash +pip install --no-index --find-links ./vendor/python \ + -e "lib/idp_common_pkg[all,dev,test]" \ + -e lib/idp_cli_pkg \ + -e lib/idp_sdk \ + -e lib/idp_mcp_connector_pkg +``` + +### Option 4 — Set pip.conf to use Artifactory + +```bash +# Create pip config (macOS/Linux) +mkdir -p ~/.pip +cat > ~/.pip/pip.conf << 'EOF' +[global] +index-url = https://your-artifactory.company.com/artifactory/api/pypi/pypi-virtual/simple/ +trusted-host = your-artifactory.company.com +EOF +``` + +--- + +## Need Help? + +If you are unsure which packages are failing or need help generating a specific package list for your Artifactory admin to upload, run: + +```bash +# Generate full resolved dependency list +cd lib/idp_common_pkg +pip-compile pyproject.toml --all-extras --output-file /tmp/full-requirements.txt 2>/dev/null +cat /tmp/full-requirements.txt +``` + +This produces a flat list of every package (and their exact versions) that can be handed to your Artifactory admin for bulk upload. + +--- + +*Generated for the GenAI IDP Accelerator project — [GitHub Repository](https://github.com/aws-solutions-library-samples/accelerated-intelligent-document-processing-on-aws)* diff --git a/patterns/unified/buildspec.yml b/patterns/unified/buildspec.yml index 8a74c4e3d..351bd265a 100644 --- a/patterns/unified/buildspec.yml +++ b/patterns/unified/buildspec.yml @@ -9,11 +9,61 @@ phases: - echo "Setting up Docker buildx for cross-platform builds..." - docker buildx create --use --name multiarch-builder --driver docker-container || docker buildx use multiarch-builder - docker buildx inspect --bootstrap + # ── Air-gapped: log into internal base image registry if provided ────── + # UV_IMAGE and LAMBDA_BASE_IMAGE env vars may point to an internal ECR + # repo instead of ghcr.io / public.ecr.aws. If they are in an ECR + # registry in the same account, the ECR login above already covers them. + # If they are in a different account, add an extra login here. + - | + if [ -n "$UV_IMAGE" ]; then + echo "Air-gapped mode: using custom UV image: $UV_IMAGE" + else + UV_IMAGE="ghcr.io/astral-sh/uv:0.9.6" + echo "Standard mode: using default UV image: $UV_IMAGE" + fi + - | + if [ -n "$LAMBDA_BASE_IMAGE" ]; then + echo "Air-gapped mode: using custom Lambda base image: $LAMBDA_BASE_IMAGE" + else + LAMBDA_BASE_IMAGE="public.ecr.aws/lambda/python:3.12-arm64" + echo "Standard mode: using default Lambda base image: $LAMBDA_BASE_IMAGE" + fi + - | + if [ -n "$UV_INDEX_URL" ]; then + echo "Air-gapped mode: uv will use custom PyPI index: $UV_INDEX_URL" + else + echo "Standard mode: uv will use default PyPI index (pypi.org)" + fi + # ── Air-gapped: Artifactory Docker registry login ────────────────────── + # When ARTIFACTORY_DOCKER_URL + ARTIFACTORY_CREDENTIALS_SECRET_ARN are + # set, CodeBuild fetches credentials from AWS Secrets Manager at runtime + # (credentials are NEVER hardcoded) and logs in so that docker buildx + # build can pull UV_IMAGE and LAMBDA_BASE_IMAGE from Artifactory. + # Customer admin creates the secret once: + # aws secretsmanager create-secret --name "idp/artifactory-docker-creds" + # --secret-string '{"username":"svc-build","password":""}' + - | + if [ -n "$ARTIFACTORY_DOCKER_URL" ] && [ -n "$ARTIFACTORY_CREDENTIALS_SECRET_ARN" ]; then + echo "Artifactory mode: logging into $ARTIFACTORY_DOCKER_URL via Secrets Manager" + SECRET=$(aws secretsmanager get-secret-value \ + --secret-id "$ARTIFACTORY_CREDENTIALS_SECRET_ARN" \ + --query SecretString --output text) + AF_USER=$(echo "$SECRET" | python3 -c "import sys,json; print(json.load(sys.stdin)['username'])") + AF_PASS=$(echo "$SECRET" | python3 -c "import sys,json; print(json.load(sys.stdin)['password'])") + echo "$AF_PASS" | docker login "$ARTIFACTORY_DOCKER_URL" \ + --username "$AF_USER" --password-stdin + echo "Artifactory Docker login successful" + unset AF_USER AF_PASS SECRET + else + echo "Standard mode: no Artifactory Docker registry configured" + fi build: commands: - echo "Building Unified Pattern Docker images (BDA + Pipeline)..." - echo "Using IMAGE_VERSION from environment (content-based hash)" - echo "Image tag will be $IMAGE_VERSION" + - echo "UV_IMAGE=${UV_IMAGE:-ghcr.io/astral-sh/uv:0.9.6}" + - echo "LAMBDA_BASE_IMAGE=${LAMBDA_BASE_IMAGE:-public.ecr.aws/lambda/python:3.12-arm64}" # BDA functions - export FUNCTION_bda_invoke="patterns/unified/src/bda_invoke_function" - export FUNCTION_bda_completion="patterns/unified/src/bda_completion_function" @@ -39,10 +89,18 @@ phases: if [ "$func_var" = "FUNCTION_mlflow_logger" ]; then EXTRA_ARGS="--build-arg INSTALL_GIT=true" fi + # Build UV_INDEX_URL arg string only when set (air-gapped PyPI mirror) + UV_INDEX_ARG="" + if [ -n "$UV_INDEX_URL" ]; then + UV_INDEX_ARG="--build-arg UV_INDEX_URL=${UV_INDEX_URL}" + fi docker buildx build \ --platform linux/arm64 \ -f Dockerfile.optimized \ --build-arg FUNCTION_PATH="${func_path}" \ + --build-arg UV_IMAGE="${UV_IMAGE}" \ + --build-arg LAMBDA_BASE_IMAGE="${LAMBDA_BASE_IMAGE}" \ + ${UV_INDEX_ARG} \ ${EXTRA_ARGS} \ --tag "${ECR_URI}:${func_name}-${IMAGE_VERSION}" \ --provenance=false \ diff --git a/patterns/unified/template.yaml b/patterns/unified/template.yaml index f33ef7927..1e6d65c1c 100644 --- a/patterns/unified/template.yaml +++ b/patterns/unified/template.yaml @@ -196,6 +196,53 @@ Parameters: Default: "" Description: SageMaker MLflow tracking server ARN + # ── Air-gapped / private network parameters ─────────────────────────────── + # Leave these empty for standard internet-connected deployments. + # Set them when CodeBuild cannot reach public container registries or PyPI. + UvImage: + Type: String + Default: "" + Description: >- + (Optional — air-gapped) Internal ECR URI for the uv build tool image. + Replaces ghcr.io/astral-sh/uv:0.9.6 in Dockerfile.optimized. + Example: 123456789012.dkr.ecr.us-east-1.amazonaws.com/idp-base:uv-0.9.6 + + LambdaBaseImage: + Type: String + Default: "" + Description: >- + (Optional — air-gapped) Internal ECR URI for the Lambda Python base image. + Replaces public.ecr.aws/lambda/python:3.12-arm64 in Dockerfile.optimized. + Example: 123456789012.dkr.ecr.us-east-1.amazonaws.com/idp-base:lambda-python-3.12-arm64 + + UvIndexUrl: + Type: String + Default: "" + Description: >- + (Optional — air-gapped) Internal PyPI index URL for uv to install Lambda requirements.txt packages. + Replaces pypi.org when CodeBuild cannot reach the public internet. + Example: https://artifactory.company.com/artifactory/api/pypi/pypi-virtual/simple/ + + ArtifactoryDockerUrl: + Type: String + Default: "" + Description: >- + (Optional — air-gapped) Artifactory Docker registry hostname that CodeBuild logs into before + pulling UV_IMAGE and LAMBDA_BASE_IMAGE. Required when images are stored in Artifactory instead + of ECR. Must be used together with ArtifactoryCredentialsSecretArn. + Example: artifactory.company.com + + ArtifactoryCredentialsSecretArn: + Type: String + Default: "" + Description: >- + (Optional — air-gapped) ARN of an AWS Secrets Manager secret containing Artifactory Docker + registry credentials. The secret must have JSON format: {"username":"...","password":"..."}. + Required when ArtifactoryDockerUrl is set. Create the secret once with: + aws secretsmanager create-secret --name idp/artifactory-docker-creds + --secret-string '{"username":"svc-build","password":""}' + AllowedPattern: "^(|arn:aws[a-z-]*:secretsmanager:[a-z0-9-]+:[0-9]{12}:secret:.+)$" + Conditions: HasGuardrailConfig: !And [ @@ -212,6 +259,12 @@ Conditions: IsPrivateAppSync: !Equals [!Ref UsePrivateAppSync, "true"] IsMLflowEnabled: !Equals [!Ref EnableMLflow, "true"] HasArtifactsBucketKmsKey: !Not [!Equals [!Ref ArtifactsBucketKmsKeyArn, ""]] + # Air-gapped support conditions + HasUvImage: !Not [!Equals [!Ref UvImage, ""]] + HasLambdaBaseImage: !Not [!Equals [!Ref LambdaBaseImage, ""]] + HasUvIndexUrl: !Not [!Equals [!Ref UvIndexUrl, ""]] + HasArtifactoryDockerUrl: !Not [!Equals [!Ref ArtifactoryDockerUrl, ""]] + HasArtifactoryCredentials: !Not [!Equals [!Ref ArtifactoryCredentialsSecretArn, ""]] Resources: @@ -2158,6 +2211,15 @@ Resources: - kms:DescribeKey Resource: !Ref ArtifactsBucketKmsKeyArn - !Ref AWS::NoValue + # Allow CodeBuild to read Artifactory credentials from Secrets Manager (air-gapped mode) + # Only applies when ArtifactoryCredentialsSecretArn is provided + - !If + - HasArtifactoryCredentials + - Effect: Allow + Action: + - secretsmanager:GetSecretValue + Resource: !Ref ArtifactoryCredentialsSecretArn + - !Ref AWS::NoValue DockerBuildProject: Type: AWS::CodeBuild::Project @@ -2186,6 +2248,25 @@ Resources: Value: !Ref ImageVersion - Name: AWS_REGION Value: !Ref AWS::Region + # ── Air-gapped support ───────────────────────────────────────────── + # Set these parameters when CodeBuild cannot reach public registries. + # UV_IMAGE: internal ECR URI for ghcr.io/astral-sh/uv:0.9.6 + # LAMBDA_BASE_IMAGE: internal ECR URI for public.ecr.aws/lambda/python:3.12-arm64 + # UV_INDEX_URL: Artifactory/internal PyPI URL for Lambda pip installs + # Leave empty (default) for standard internet-connected deployments. + - Name: UV_IMAGE + Value: !If [HasUvImage, !Ref UvImage, ""] + - Name: LAMBDA_BASE_IMAGE + Value: !If [HasLambdaBaseImage, !Ref LambdaBaseImage, ""] + - Name: UV_INDEX_URL + Value: !If [HasUvIndexUrl, !Ref UvIndexUrl, ""] + # ── Artifactory Docker registry (air-gapped) ─────────────────────── + # When set, buildspec.yml fetches credentials from Secrets Manager + # and logs into Artifactory before pulling UV_IMAGE/LAMBDA_BASE_IMAGE. + - Name: ARTIFACTORY_DOCKER_URL + Value: !If [HasArtifactoryDockerUrl, !Ref ArtifactoryDockerUrl, ""] + - Name: ARTIFACTORY_CREDENTIALS_SECRET_ARN + Value: !If [HasArtifactoryCredentials, !Ref ArtifactoryCredentialsSecretArn, ""] TimeoutInMinutes: 90 CodeBuildExecutionRole: diff --git a/scripts/setup-airgapped-codebuild.sh b/scripts/setup-airgapped-codebuild.sh new file mode 100755 index 000000000..038f4c3c2 --- /dev/null +++ b/scripts/setup-airgapped-codebuild.sh @@ -0,0 +1,236 @@ +#!/usr/bin/env bash +# ============================================================================= +# setup-airgapped-codebuild.sh +# +# Prepares an air-gapped AWS environment so that CodeBuild can build Lambda +# container images WITHOUT any outbound internet access. +# +# What this script does: +# 1. Creates an ECR repository for base images in the customer account +# 2. Pulls ghcr.io/astral-sh/uv:0.9.6 and re-tags + pushes it to ECR +# 3. Pulls public.ecr.aws/lambda/python:3.12-arm64 and re-tags + pushes to ECR +# 4. Prints the --parameters strings to pass to idp-cli deploy +# +# Run this on a machine that HAS internet access AND AWS credentials for the +# customer account. +# +# Usage: +# bash scripts/setup-airgapped-codebuild.sh \ +# --region \ +# --account \ +# [--repo-name ] # default: idp-base-images +# [--pypi-url ] # optional: your internal PyPI URL +# +# Requirements on this machine: +# - docker (running) +# - aws CLI v2 (configured with customer account credentials) +# ============================================================================= + +set -euo pipefail + +# ── Colours ────────────────────────────────────────────────────────────────── +RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m' +CYAN='\033[0;36m'; BOLD='\033[1m'; NC='\033[0m' + +info() { echo -e "${CYAN}[INFO]${NC} $*"; } +success() { echo -e "${GREEN}[OK]${NC} $*"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } +die() { echo -e "${RED}[ERROR]${NC} $*" >&2; exit 1; } + +# ── Source image references (what we pull from public registries) ───────────── +UV_SOURCE_IMAGE="ghcr.io/astral-sh/uv:0.9.6" +LAMBDA_SOURCE_IMAGE="public.ecr.aws/lambda/python:3.12-arm64" + +# ── Defaults ───────────────────────────────────────────────────────────────── +REGION="" +ACCOUNT_ID="" +REPO_NAME="idp-base-images" +PYPI_URL="" + +# ── Argument parsing ────────────────────────────────────────────────────────── +while [[ $# -gt 0 ]]; do + case "$1" in + --region) REGION="$2"; shift 2 ;; + --account) ACCOUNT_ID="$2"; shift 2 ;; + --repo-name) REPO_NAME="$2"; shift 2 ;; + --pypi-url) PYPI_URL="$2"; shift 2 ;; + -h|--help) + echo "Usage: $0 --region --account [--repo-name ] [--pypi-url ]" + echo "" + echo "Options:" + echo " --region AWS region (e.g. us-east-1)" + echo " --account AWS account ID (12-digit)" + echo " --repo-name ECR repo name for base images (default: idp-base-images)" + echo " --pypi-url Internal PyPI/Artifactory URL for Lambda pip installs (optional)" + exit 0 ;; + *) die "Unknown argument: $1" ;; + esac +done + +# ── Validate required args ──────────────────────────────────────────────────── +[[ -z "$REGION" ]] && die "--region is required. Example: --region us-east-1" +[[ -z "$ACCOUNT_ID" ]] && die "--account is required. Example: --account 123456789012" + +ECR_REGISTRY="${ACCOUNT_ID}.dkr.ecr.${REGION}.amazonaws.com" +ECR_REPO="${ECR_REGISTRY}/${REPO_NAME}" + +UV_TARGET_TAG="${ECR_REPO}:uv-0.9.6" +LAMBDA_TARGET_TAG="${ECR_REPO}:lambda-python-3.12-arm64" + +echo "" +echo -e "${BOLD}============================================================" +echo -e " GenAI IDP Accelerator — Air-Gapped CodeBuild Setup" +echo -e "============================================================${NC}" +echo -e " AWS Region: ${REGION}" +echo -e " AWS Account: ${ACCOUNT_ID}" +echo -e " ECR Registry: ${ECR_REGISTRY}" +echo -e " ECR Repo: ${REPO_NAME}" +if [[ -n "$PYPI_URL" ]]; then + echo -e " PyPI Mirror: ${PYPI_URL}" +fi +echo "" + +# ── Step 1: Check prerequisites ─────────────────────────────────────────────── +info "Checking prerequisites..." +command -v docker &>/dev/null || die "Docker is not installed or not running" +command -v aws &>/dev/null || die "AWS CLI is not installed" +success "docker: $(docker --version | head -1)" +success "aws: $(aws --version)" + +# ── Step 2: Verify AWS credentials ─────────────────────────────────────────── +info "Verifying AWS credentials..." +CALLER=$(aws sts get-caller-identity --region "$REGION" --output json 2>/dev/null) \ + || die "AWS credentials are not configured. Run 'aws configure' first." +CALLER_ACCOUNT=$(echo "$CALLER" | python3 -c "import sys,json; print(json.load(sys.stdin)['Account'])") +CALLER_ARN=$(echo "$CALLER" | python3 -c "import sys,json; print(json.load(sys.stdin)['Arn'])") +if [[ "$CALLER_ACCOUNT" != "$ACCOUNT_ID" ]]; then + warn "AWS credentials are for account ${CALLER_ACCOUNT}, but --account is ${ACCOUNT_ID}" + warn "Make sure you're using the correct AWS profile for the customer account." +fi +success "Authenticated as: ${CALLER_ARN}" + +# ── Step 3: Create ECR repository ──────────────────────────────────────────── +info "Creating ECR repository: ${REPO_NAME}..." +aws ecr describe-repositories --repository-names "$REPO_NAME" --region "$REGION" &>/dev/null \ + && success "ECR repository '${REPO_NAME}' already exists" \ + || { + aws ecr create-repository \ + --repository-name "$REPO_NAME" \ + --region "$REGION" \ + --image-scanning-configuration scanOnPush=true \ + --output json > /dev/null + success "Created ECR repository: ${REPO_NAME}" + } + +# ── Step 4: Login to ECR ────────────────────────────────────────────────────── +info "Logging into ECR (${ECR_REGISTRY})..." +aws ecr get-login-password --region "$REGION" \ + | docker login --username AWS --password-stdin "$ECR_REGISTRY" 2>/dev/null +success "Logged into ECR" + +# ── Step 5: Login to AWS Public ECR (for Lambda base image) ────────────────── +info "Logging into AWS Public ECR (public.ecr.aws)..." +aws ecr-public get-login-password --region us-east-1 \ + | docker login --username AWS --password-stdin public.ecr.aws 2>/dev/null \ + && success "Logged into AWS Public ECR" \ + || warn "Could not log into public ECR — proceeding (may fail for Lambda image pull)" + +# ── Step 6: Pull, re-tag, and push uv image ────────────────────────────────── +echo "" +echo -e "${BOLD}── Processing uv image ────────────────────────────────────────${NC}" +info "Pulling ${UV_SOURCE_IMAGE}..." +docker pull "${UV_SOURCE_IMAGE}" +success "Pulled ${UV_SOURCE_IMAGE}" + +info "Re-tagging as ${UV_TARGET_TAG}..." +docker tag "${UV_SOURCE_IMAGE}" "${UV_TARGET_TAG}" + +info "Pushing ${UV_TARGET_TAG} to ECR..." +docker push "${UV_TARGET_TAG}" +success "Pushed ${UV_TARGET_TAG}" + +# ── Step 7: Pull, re-tag, and push Lambda base image ───────────────────────── +echo "" +echo -e "${BOLD}── Processing Lambda base image ───────────────────────────────${NC}" +info "Pulling ${LAMBDA_SOURCE_IMAGE}..." +docker pull "${LAMBDA_SOURCE_IMAGE}" +success "Pulled ${LAMBDA_SOURCE_IMAGE}" + +info "Re-tagging as ${LAMBDA_TARGET_TAG}..." +docker tag "${LAMBDA_SOURCE_IMAGE}" "${LAMBDA_TARGET_TAG}" + +info "Pushing ${LAMBDA_TARGET_TAG} to ECR..." +docker push "${LAMBDA_TARGET_TAG}" +success "Pushed ${LAMBDA_TARGET_TAG}" + +# ── Step 8: Clean up local images (optional) ───────────────────────────────── +info "Cleaning up local re-tagged images..." +docker rmi "${UV_TARGET_TAG}" "${LAMBDA_TARGET_TAG}" 2>/dev/null || true +success "Cleaned up" + +# ── Step 9: Print deployment parameters ────────────────────────────────────── +echo "" +echo -e "${BOLD}${GREEN}════════════════════════════════════════════════════════════════${NC}" +echo -e "${BOLD}${GREEN} ✅ Air-gapped base images pushed successfully!${NC}" +echo -e "${BOLD}${GREEN}════════════════════════════════════════════════════════════════${NC}" +echo "" +echo -e "${BOLD}Images pushed to ECR:${NC}" +echo -e " UV image: ${UV_TARGET_TAG}" +echo -e " Lambda base image: ${LAMBDA_TARGET_TAG}" +echo "" + +# Build the parameters string +EXTRA_PARAMS="UvImage=${UV_TARGET_TAG},LambdaBaseImage=${LAMBDA_TARGET_TAG}" +if [[ -n "$PYPI_URL" ]]; then + EXTRA_PARAMS="${EXTRA_PARAMS},UvIndexUrl=${PYPI_URL}" +fi + +echo -e "${BOLD}Use these parameters in your idp-cli deploy command:${NC}" +echo "" +echo -e " ${CYAN}idp-cli deploy \\${NC}" +echo -e " ${CYAN}--stack-name IDP-PRIVATE \\${NC}" +echo -e " ${CYAN}--template-url \\${NC}" +echo -e " ${CYAN}--admin-email admin@example.com \\${NC}" +echo -e " ${CYAN}--region ${REGION} --wait \\${NC}" +echo -e " ${CYAN}--parameters \"WebUIHosting=ALB,ALBVpcId=,ALBSubnetIds=,,ALBCertificateArn=,ALBScheme=internal,AppSyncVisibility=PRIVATE,LambdaSubnetIds=,,EnableMCP=false,DocumentKnowledgeBase=DISABLED,${EXTRA_PARAMS}\"${NC}" +echo "" + +if [[ -n "$PYPI_URL" ]]; then + echo -e "${BOLD}Air-gapped parameters breakdown:${NC}" + echo -e " ${YELLOW}UvImage${NC} = ${UV_TARGET_TAG}" + echo -e " ${YELLOW}LambdaBaseImage${NC} = ${LAMBDA_TARGET_TAG}" + echo -e " ${YELLOW}UvIndexUrl${NC} = ${PYPI_URL}" +else + echo -e "${BOLD}Air-gapped parameters breakdown:${NC}" + echo -e " ${YELLOW}UvImage${NC} = ${UV_TARGET_TAG}" + echo -e " ${YELLOW}LambdaBaseImage${NC} = ${LAMBDA_TARGET_TAG}" + echo "" + echo -e "${YELLOW}TIP:${NC} If Lambda requirements.txt packages also fail (uv pip install),${NC}" + echo -e " add ${YELLOW}--pypi-url ${NC} to this script and re-run." + echo -e " Then add ${YELLOW}UvIndexUrl=${NC} to the --parameters string above." +fi + +echo "" +echo -e "${BOLD}Also make sure the DockerBuildRole IAM role has ECR pull permissions${NC}" +echo -e "${BOLD}for ${ECR_REGISTRY}/${REPO_NAME} (already granted via AmazonEC2ContainerRegistryPowerUser).${NC}" +echo "" + +# ── Step 10: Save parameters to a file for convenience ─────────────────────── +PARAMS_FILE="airgapped-params-${REGION}.env" +cat > "${PARAMS_FILE}" << EOF +# Auto-generated by scripts/setup-airgapped-codebuild.sh +# Use these values in your idp-cli deploy --parameters string + +UV_IMAGE=${UV_TARGET_TAG} +LAMBDA_BASE_IMAGE=${LAMBDA_TARGET_TAG} +UV_INDEX_URL=${PYPI_URL} +REGION=${REGION} +ACCOUNT_ID=${ACCOUNT_ID} + +# Full parameter string for idp-cli deploy: +IDP_AIRGAPPED_PARAMS=UvImage=${UV_TARGET_TAG},LambdaBaseImage=${LAMBDA_TARGET_TAG}$([ -n "$PYPI_URL" ] && echo ",UvIndexUrl=${PYPI_URL}" || echo "") +EOF + +success "Parameters saved to: ${PARAMS_FILE}" +echo -e " Source this file to use the values: ${CYAN}source ${PARAMS_FILE}${NC}" +echo "" diff --git a/template.yaml b/template.yaml index 4b2df9ea2..2edf0eb03 100644 --- a/template.yaml +++ b/template.yaml @@ -557,6 +557,53 @@ Parameters: Default: "" Description: SageMaker MLflow tracking server ARN + # ── Air-gapped / private network parameters ─────────────────────────────── + # Leave these empty for standard internet-connected deployments. + # Set them when CodeBuild cannot reach public container registries or PyPI. + UvImage: + Type: String + Default: "" + Description: >- + (Optional — air-gapped) Internal ECR URI for the uv build tool image. + Replaces ghcr.io/astral-sh/uv:0.9.6 in Dockerfile.optimized. + Example: 123456789012.dkr.ecr.us-east-1.amazonaws.com/idp-base:uv-0.9.6 + + LambdaBaseImage: + Type: String + Default: "" + Description: >- + (Optional — air-gapped) Internal ECR URI for the Lambda Python base image. + Replaces public.ecr.aws/lambda/python:3.12-arm64 in Dockerfile.optimized. + Example: 123456789012.dkr.ecr.us-east-1.amazonaws.com/idp-base:lambda-python-3.12-arm64 + + UvIndexUrl: + Type: String + Default: "" + Description: >- + (Optional — air-gapped) Internal PyPI index URL for uv to install Lambda requirements.txt packages. + Replaces pypi.org when CodeBuild cannot reach the public internet. + Example: https://artifactory.company.com/artifactory/api/pypi/pypi-virtual/simple/ + + ArtifactoryDockerUrl: + Type: String + Default: "" + Description: >- + (Optional — air-gapped) Artifactory Docker registry hostname that CodeBuild logs into before + pulling UV_IMAGE and LAMBDA_BASE_IMAGE. Required when images are stored in Artifactory instead + of ECR. Must be used together with ArtifactoryCredentialsSecretArn. + Example: artifactory.company.com + + ArtifactoryCredentialsSecretArn: + Type: String + Default: "" + Description: >- + (Optional — air-gapped) ARN of an AWS Secrets Manager secret containing Artifactory Docker + registry credentials. The secret must have JSON format: {"username":"...","password":"..."}. + Required when ArtifactoryDockerUrl is set. Create the secret once with: + aws secretsmanager create-secret --name idp/artifactory-docker-creds + --secret-string '{"username":"svc-build","password":""}' + AllowedPattern: "^(|arn:aws[a-z-]*:secretsmanager:[a-z0-9-]+:[0-9]{12}:secret:.+)$" + Rules: MLflowTrackingURIRequired: RuleCondition: !Equals [ !Ref EnableMLflow, "true" ] @@ -1527,6 +1574,15 @@ Resources: EnableMLflow: !Ref EnableMLflow MlflowTrackingURI: !Ref MlflowTrackingURI + # ── Air-gapped / private network parameters ──────────────────────── + # Passed through to the nested unified pattern stack so CodeBuild + # can use internal registries and PyPI mirrors when internet is blocked. + UvImage: !Ref UvImage + LambdaBaseImage: !Ref LambdaBaseImage + UvIndexUrl: !Ref UvIndexUrl + ArtifactoryDockerUrl: !Ref ArtifactoryDockerUrl + ArtifactoryCredentialsSecretArn: !Ref ArtifactoryCredentialsSecretArn + ########################################################################## # Encryption key ########################################################################## From 48beeeda4d5b9afd931494b9587e648f49b365b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CSirirat?= <“siriratk@amazon.com”> Date: Mon, 20 Apr 2026 14:27:45 -0400 Subject: [PATCH 4/9] docs: update deployment-private-network.md with air-gapped CodeBuild section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add 'Air-gapped CodeBuild — Internal Container Registries' subsection in Step 1 explaining the two public images CodeBuild needs (uv, Lambda base) - Document setup-airgapped-codebuild.sh helper script usage with example output - Document UvImage, LambdaBaseImage, UvIndexUrl, ArtifactoryDockerUrl, and ArtifactoryCredentialsSecretArn deploy parameters - Add troubleshooting row for 'pull access denied / name unknown' CodeBuild error - Cross-link to docs/artifactory-dependency-workaround.md --- docs/deployment-private-network.md | 67 ++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/docs/deployment-private-network.md b/docs/deployment-private-network.md index 68f260f81..8cb1d84d7 100644 --- a/docs/deployment-private-network.md +++ b/docs/deployment-private-network.md @@ -129,6 +129,72 @@ idp-cli publish --source-dir . --bucket-basename my-idp-artifacts --prefix idp - --- +### Air-gapped CodeBuild — Internal Container Registries (optional) + +During stack deployment, CodeBuild builds the Lambda container images inside your account. In a **fully air-gapped VPC** where CodeBuild has no internet access, it cannot reach the two public registries used by the build: + +| Image | Public source | Used for | +|-------|--------------|----------| +| `ghcr.io/astral-sh/uv:0.9.6` | GitHub Container Registry | Python dependency installer inside Dockerfile | +| `public.ecr.aws/lambda/python:3.12-arm64` | Amazon Public ECR | Lambda base image | + +**Solution**: mirror both images to your internal ECR (or Artifactory), then pass their URIs as CloudFormation parameters. + +#### Step A: Mirror images to internal ECR + +Run the provided helper script — it pulls both images, re-tags them into the deployment account's ECR, and prints the ready-to-paste parameter string: + +```bash +./scripts/setup-airgapped-codebuild.sh \ + --region \ + --account-id +``` + +Example output: + +``` +✅ UV image pushed: 123456789012.dkr.ecr.us-east-1.amazonaws.com/idp-build-tools:uv-0.9.6 +✅ Lambda base image pushed: 123456789012.dkr.ecr.us-east-1.amazonaws.com/idp-build-tools:lambda-python-3.12-arm64 + +Add these parameters to your idp-cli deploy command: + UvImage=123456789012.dkr.ecr.us-east-1.amazonaws.com/idp-build-tools:uv-0.9.6,LambdaBaseImage=123456789012.dkr.ecr.us-east-1.amazonaws.com/idp-build-tools:lambda-python-3.12-arm64 +``` + +#### Step B: Add image parameters to the deploy command + +Append the `UvImage` and `LambdaBaseImage` parameters (from the script output) to your `idp-cli deploy --parameters` string (see Step 2 below). + +#### Internal PyPI mirror (optional) + +If CodeBuild also cannot reach PyPI (`pypi.org`) to install Lambda Python dependencies, pass `UvIndexUrl` pointing to your internal PyPI mirror (e.g. Artifactory): + +``` +UvIndexUrl=https://artifactory.company.com/artifactory/api/pypi/pypi-virtual/simple/ +``` + +#### Artifactory Docker registry (optional) + +If the images are stored in Artifactory rather than ECR, you need to provide credentials so CodeBuild can log in before pulling: + +1. Create a Secrets Manager secret with your Artifactory credentials (only needed once): + ```bash + aws secretsmanager create-secret \ + --name idp/artifactory-docker-creds \ + --secret-string '{"username":"svc-build","password":""}' \ + --region + ``` + +2. Add these parameters to your deploy command: + ``` + ArtifactoryDockerUrl=artifactory.company.com,ArtifactoryCredentialsSecretArn=arn:aws:secretsmanager:::secret:idp/artifactory-docker-creds- + ``` + +> **Security note**: credentials are fetched at build time via the IAM role — they are never stored in CloudFormation parameters or environment variable logs. + +> **See also**: [Artifactory Dependency Workaround](./artifactory-dependency-workaround.md) for a comprehensive guide covering all four dependency resolution options. + +--- + ## Step 2: Deploy the IDP Stack Replace the placeholder values and run: @@ -379,6 +445,7 @@ When `WebUIHosting=ALB` and `AppSyncVisibility=PRIVATE`, the following are handl | **`npm error engine Unsupported engine`** | Node.js 22.12+ required. `brew install node@22 && export PATH="/opt/homebrew/opt/node@22/bin:$PATH"` | | **Stack fails with `conflicting DNS domain`** | A VPC endpoint already exists for that service. Re-run `check-vpc-endpoints.sh` — it will detect this and set the right `Create*=false` flags. | | **UI loads but shows "network error"** | AppSync API is PRIVATE. From outside the VPC you need an SSM tunnel + `/etc/hosts` entry. From inside VPN/VPC it works automatically. | +| **CodeBuild fails: `pull access denied` / `name unknown` on image pull** | CodeBuild cannot reach public container registries (`ghcr.io`, `public.ecr.aws`). Run `scripts/setup-airgapped-codebuild.sh` to mirror images to ECR, then pass `UvImage=` and `LambdaBaseImage=` to the deploy command. See [Artifactory Dependency Workaround](./artifactory-dependency-workaround.md). | | **CodeBuild fails: `AccessDenied: kms:Decrypt`** | The artifact bucket is KMS-encrypted but `ArtifactsBucketKmsKeyArn` was not passed to the deploy command. Redeploy with `--parameters "...ArtifactsBucketKmsKeyArn="`. | | **`UpdateDefaultConfig` custom resource fails with `NoSuchKey`** | Same root cause as above — `ConfigurationCopyFunction` silently skipped copying config files due to missing `kms:Decrypt`. Pass `ArtifactsBucketKmsKeyArn` and redeploy. | | **OCR Lambda times out / Textract calls hang** | Missing `textract` VPC endpoint. Lambda can't reach `com.amazonaws..textract` — security group only allows port 443 outbound to VPC endpoints. Run `deploy-vpc-endpoints.py` to add the missing endpoint. | From cd21c945560ceabee84aa8d28bff6229bbd3b7ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CSirirat?= <“siriratk@amazon.com”> Date: Mon, 20 Apr 2026 14:33:08 -0400 Subject: [PATCH 5/9] docs: add air-gapped parameters to Step 2 parameters table Add UvImage, LambdaBaseImage, UvIndexUrl, ArtifactoryDockerUrl, and ArtifactoryCredentialsSecretArn to the key parameters table in Step 2 so users can see all available parameters in one place. --- docs/deployment-private-network.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/deployment-private-network.md b/docs/deployment-private-network.md index 8cb1d84d7..8cea308bf 100644 --- a/docs/deployment-private-network.md +++ b/docs/deployment-private-network.md @@ -221,7 +221,11 @@ idp-cli deploy \ | `LambdaSubnetIds` | subnet IDs | Subnets where Lambda functions run (can match ALBSubnetIds) | | `EnableMCP` | `false` | Disable Bedrock AgentCore Gateway (requires public endpoint) | | `DocumentKnowledgeBase` | `DISABLED` | Disable Knowledge Base (avoids extra VPC endpoints) | - +| `UvImage` | ECR URI | *(Air-gapped only)* Internal ECR URI for the `uv` build tool image. Replaces `ghcr.io/astral-sh/uv:0.9.6`. | +| `LambdaBaseImage` | ECR URI | *(Air-gapped only)* Internal ECR URI for the Lambda Python base image. Replaces `public.ecr.aws/lambda/python:3.12-arm64`. | +| `UvIndexUrl` | HTTPS URL | *(Air-gapped only)* Internal PyPI index URL (e.g. Artifactory). Replaces `pypi.org` for Lambda dependency installs. | +| `ArtifactoryDockerUrl` | hostname | *(Air-gapped only)* Artifactory Docker registry hostname. Required when images are stored in Artifactory instead of ECR. | +| `ArtifactoryCredentialsSecretArn` | secret ARN | *(Air-gapped only)* Secrets Manager secret ARN with Artifactory credentials. Required with `ArtifactoryDockerUrl`. | > **`AppSyncVisibility` is immutable** — it cannot be changed after the stack is created. To switch between GLOBAL and PRIVATE, delete and recreate the stack. ### Enterprise: deploying with a KMS-encrypted artifact bucket From 1d1c6136517aedd2f0aaa8df67df95225156db81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CSirirat?= <“siriratk@amazon.com”> Date: Mon, 20 Apr 2026 14:58:43 -0400 Subject: [PATCH 6/9] docs: update artifactory-dependency-workaround.md with Option 5 automated manifest generation --- docs/artifactory-dependency-workaround.md | 194 ++++++++++++++++++++-- 1 file changed, 176 insertions(+), 18 deletions(-) diff --git a/docs/artifactory-dependency-workaround.md b/docs/artifactory-dependency-workaround.md index c2aa1ef1c..2a16c3a7a 100644 --- a/docs/artifactory-dependency-workaround.md +++ b/docs/artifactory-dependency-workaround.md @@ -14,10 +14,11 @@ title: "Artifactory Dependency Workaround Guide" 2. [Complete Dependency Inventory](#2-complete-dependency-inventory) 3. [Option 1 — Configure Artifactory as a Remote Proxy (Recommended)](#3-option-1--configure-artifactory-as-a-remote-proxy-recommended) 4. [Option 2 — Bridge Machine: Download & Upload Missing Packages](#4-option-2--bridge-machine-download--upload-missing-packages) -5. [Option 3 — Vendor Dependencies into the Repository (Fully Air-Gapped)](#5-option-3--vendor-dependencies-into-the-repository-fully-air-gapped) -6. [Option 4 — Point Build Tools to Artifactory Explicitly](#6-option-4--point-build-tools-to-artifactory-explicitly) -7. [Decision Guide](#7-decision-guide) -8. [Quick Reference: Copy-Paste Commands](#8-quick-reference-copy-paste-commands) +5. [Option 5 — Automated Manifest Generation (Recommended Pre-Step for Option 2)](#5-option-5--automated-manifest-generation-recommended-pre-step-for-option-2) +6. [Option 3 — Vendor Dependencies into the Repository (Fully Air-Gapped)](#6-option-3--vendor-dependencies-into-the-repository-fully-air-gapped) +7. [Option 4 — Point Build Tools to Artifactory Explicitly](#7-option-4--point-build-tools-to-artifactory-explicitly) +8. [Decision Guide](#8-decision-guide) +9. [Quick Reference: Copy-Paste Commands](#9-quick-reference-copy-paste-commands) --- @@ -233,15 +234,20 @@ flowchart LR AF -->|3. Install packages| Dev ``` +> **Tip:** Use [Option 5 — Automated Manifest Generation](#5-option-5--automated-manifest-generation-recommended-pre-step-for-option-2) first to get a complete, fully-resolved list of all packages (including transitive dependencies) before running the bridge machine download steps below. This avoids the common mistake of only downloading direct dependencies and missing transitive ones. + ### Python Packages **On the bridge machine (internet access):** ```bash -# Create a directory for wheels +# If you have run Option 5, use the generated manifest directly: mkdir -p ./wheel-cache +while IFS= read -r pkg; do + pip download -d ./wheel-cache "$pkg" 2>/dev/null || echo "WARN: $pkg not downloaded" +done < deps/python/master/manifest.txt -# Download all Python dependencies as wheel files +# Alternatively, download specific packages manually: # For Linux ARM64 (used by Lambda container images) pip download \ --platform manylinux2014_aarch64 \ @@ -325,7 +331,120 @@ FROM your-artifactory.company.com/docker-local/lambda/python:3.12-arm64 AS build --- -## 5. Option 3 — Vendor Dependencies into the Repository (Fully Air-Gapped) +## 5. Option 5 — Automated Manifest Generation *(Recommended Pre-Step for Option 2)* + +**Best for:** When you need to seed Artifactory with a complete, verified list of every resolved dependency — including all transitive dependencies — before executing the bridge machine workflow. Run this once on any internet-connected machine to produce authoritative manifests. + +**Why this matters:** The manual approach in Option 2 lists only *direct* top-level dependencies. In practice, `boto3` alone pulls in dozens of sub-packages, and multi-platform build tools (esbuild, sharp, rollup) each have 20+ platform-specific optional binaries. Manually enumerating these is error-prone and almost always incomplete. This script uses `uv lock` and `pnpm install --lockfile-only` to perform full dependency-graph resolution and capture every package at its locked version. + +```mermaid +flowchart LR + A[Internet-connected machine] -->|Run generate_lockfiles.py| B[Per-component lockfiles\ndeps/python/ and deps/node/] + B -->|Merge| C[Master manifests\ndeps/python/master/manifest.txt\ndeps/node/master/manifest.txt] + C -->|Input to| D[Option 2: Bridge Machine\nDownload & Upload workflow] + D --> E[Artifactory seeded\nBuilds work] +``` + +### Prerequisites + +```bash +# Install uv (Python resolver) +curl -LsSf https://astral.sh/uv/install.sh | sh + +# Install pnpm (Node resolver) +npm install -g pnpm +``` + +### Generate the Manifests + +```bash +# From the repo root on any internet-connected machine +python3 scripts/generate_lockfiles.py + +# Python only (skip Node resolution) +python3 scripts/generate_lockfiles.py --python-only + +# Node only (skip Python resolution) +python3 scripts/generate_lockfiles.py --node-only +``` + +This scans all `requirements.txt`, `pyproject.toml` (in `lib/`), and `package.json` files in the repo. It generates: + +| Output | Contents | +|--------|----------| +| `deps/python/master/manifest.txt` | Flat list of `name==version` for every resolved Python package (~295 packages) | +| `deps/node/master/manifest.txt` | Flat list of `name@version` for every resolved Node package (~1,524 packages) | +| `deps/python//uv.lock` | Per-component Python lockfiles for traceability | +| `deps/node//pnpm-lock.yaml` | Per-component Node lockfiles for traceability | + +### Verify Packages Are Accessible from Artifactory + +Before running a real build, use the verification scripts to confirm packages are reachable. Set the registry URL to your Artifactory endpoint: + +```bash +# Verify Python packages +UV_INDEX_URL="https://your-artifactory.company.com/artifactory/api/pypi/pypi-virtual/simple/" \ + python3 scripts/verify_python_packages.py + +# Results written to: +# deps/python/master/verify-passed.txt +# deps/python/master/verify-failed.txt + +# Verify Node packages +NODE_REGISTRY_URL="https://your-artifactory.company.com/artifactory/api/npm/npm-virtual/" \ + python3 scripts/verify_node_packages.py + +# Results written to: +# deps/node/master/verify-passed.txt +# deps/node/master/verify-failed.txt +``` + +### Use the Manifests with Option 2 (Bridge Machine) + +Instead of manually listing packages, use the manifests as input to the bridge machine download step: + +```bash +# On the bridge machine — download all Python packages from the manifest +mkdir -p ./wheel-cache +while IFS= read -r pkg; do + pip download -d ./wheel-cache "$pkg" 2>/dev/null || echo "WARN: $pkg not downloaded" +done < deps/python/master/manifest.txt + +# Upload all to Artifactory +ARTIFACTORY_URL="https://your-artifactory.company.com/artifactory" +REPO="pypi-local" +AF_CREDS="username:api-key" +for f in ./wheel-cache/*.whl ./wheel-cache/*.tar.gz; do + curl -u "$AF_CREDS" -T "$f" "${ARTIFACTORY_URL}/${REPO}/$(basename $f)" +done +``` + +For Node packages, provide `deps/node/master/manifest.txt` to your Artifactory admin for bulk npm import via the Artifactory web UI or REST API. + +### Known Caveats + +| Issue | Details | +|-------|---------| +| **Windows-only packages** | `pywin32` and `pywinpty` appear in the Python manifest because they are pulled in transitively by Jupyter. These are Windows-only and will fail `pip install` on Linux — skip them if your build agents are Linux-only. They are listed in `deps/python/master/verify-failed.txt`. | +| **Docker images not included** | The two Docker base images (`ghcr.io/astral-sh/uv:0.9.6` and `public.ecr.aws/lambda/python:3.12-arm64`) are **not** in the manifests. Handle these separately using the Docker section of Option 2. | +| **Multiple versions of same package** | The master manifest intentionally keeps all versions (e.g., `boto3==1.42.0` and `boto3==1.42.80`) because different Lambda functions pin to different versions. Import all of them. | +| **Re-run on dependency changes** | When any `requirements.txt` or `pyproject.toml` version is bumped, re-run the script to regenerate the manifests before the next Artifactory seeding operation. | + +### Keep Manifests Up to Date + +Add this to your workflow when updating dependencies: + +```bash +# After bumping any dependency version, regenerate manifests +python3 scripts/generate_lockfiles.py + +# Review what changed +git diff deps/python/master/manifest.txt deps/node/master/manifest.txt +``` + +--- + +## 6. Option 3 — Vendor Dependencies into the Repository (Fully Air-Gapped) **Best for:** Completely air-gapped environments with no internet access whatsoever. @@ -414,7 +533,7 @@ git commit -m "Add vendored dependencies for air-gapped deployment" --- -## 6. Option 4 — Point Build Tools to Artifactory Explicitly +## 7. Option 4 — Point Build Tools to Artifactory Explicitly **Best for:** When Artifactory *does* have the packages but the build tools are not configured to use it (wrong index URL). @@ -477,7 +596,7 @@ echo "registry=https://your-artifactory.company.com/artifactory/api/npm/npm-virt --- -## 7. Decision Guide +## 8. Decision Guide ```mermaid flowchart TD @@ -487,7 +606,8 @@ flowchart TD B -->|No| D{Is there a bridge machine\nwith internet AND\nArtifactory access?} - D -->|Yes| E[✅ Option 2\nDownload packages\non bridge machine\nUpload to Artifactory] + D -->|Yes| E[Run Option 5 first\ngenerate_lockfiles.py\nGet complete manifest] + E --> E2[✅ Option 2\nUse manifest to download\n& upload to Artifactory] D -->|No| F{Are packages in Artifactory\nbut URL is wrong?} @@ -496,14 +616,26 @@ flowchart TD F -->|No / Fully air-gapped| H[✅ Option 3\nVendor dependencies\ninto git repository] style C fill:#90EE90 - style E fill:#90EE90 + style E fill:#87CEEB + style E2 fill:#90EE90 style G fill:#90EE90 style H fill:#90EE90 ``` --- -## 8. Quick Reference: Copy-Paste Commands +## 9. Quick Reference: Copy-Paste Commands + +### Option 5 — Generate Complete Dependency Manifests (run this before Option 2) + +```bash +# Requires uv and pnpm installed +python3 scripts/generate_lockfiles.py + +# Outputs: +# deps/python/master/manifest.txt (~295 Python packages) +# deps/node/master/manifest.txt (~1,524 Node packages) +``` ### Identify Missing Packages (run this first) @@ -527,6 +659,24 @@ npm config set registry "${ARTIFACTORY_URL}/api/npm/npm-virtual/" make setup-venv ``` +### Option 2 — Download + Upload using generated manifest + +```bash +# Download all packages listed in the manifest +mkdir -p ./wheel-cache +while IFS= read -r pkg; do + pip download -d ./wheel-cache "$pkg" 2>/dev/null || echo "WARN: $pkg" +done < deps/python/master/manifest.txt + +# Upload to Artifactory +ARTIFACTORY_URL="https://your-artifactory.company.com/artifactory" +REPO="pypi-local" +AF_CREDS="username:api-key" +for f in ./wheel-cache/*.whl ./wheel-cache/*.tar.gz; do + curl -u "$AF_CREDS" -T "$f" "${ARTIFACTORY_URL}/${REPO}/$(basename $f)" +done +``` + ### Option 2 — Download + Upload specific missing package ```bash @@ -571,16 +721,24 @@ EOF ## Need Help? -If you are unsure which packages are failing or need help generating a specific package list for your Artifactory admin to upload, run: +If you are unsure which packages are failing or need to generate a complete package list for your Artifactory admin: ```bash -# Generate full resolved dependency list -cd lib/idp_common_pkg -pip-compile pyproject.toml --all-extras --output-file /tmp/full-requirements.txt 2>/dev/null -cat /tmp/full-requirements.txt +# Generate complete resolved manifests for ALL components (recommended) +python3 scripts/generate_lockfiles.py + +# Outputs a flat list of every package at every version: +# deps/python/master/manifest.txt — hand this to your Artifactory admin +# deps/node/master/manifest.txt — hand this to your Artifactory admin + +# Then verify packages are accessible from Artifactory +UV_INDEX_URL="https://your-artifactory.company.com/artifactory/api/pypi/pypi-virtual/simple/" \ + python3 scripts/verify_python_packages.py +NODE_REGISTRY_URL="https://your-artifactory.company.com/artifactory/api/npm/npm-virtual/" \ + python3 scripts/verify_node_packages.py ``` -This produces a flat list of every package (and their exact versions) that can be handed to your Artifactory admin for bulk upload. +This produces a flat, fully-resolved list of every package (including transitive dependencies) across all project components that can be handed to your Artifactory admin for bulk upload. --- From 86db5aaba3b7ce5272edc602e1239d922465eb4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CSirirat?= <“siriratk@amazon.com”> Date: Mon, 20 Apr 2026 17:53:53 -0400 Subject: [PATCH 7/9] fix: address maintainer feedback on air-gapped CodeBuild PR - Dockerfile.optimized: explicit --index-url flag in uv pip install to prevent stale cache bypassing UV_INDEX_URL in air-gapped builds - patterns/unified/template.yaml: add NpmRegistryUrl, CodeBuildVpcId, CodeBuildSubnetIds, CodeBuildSecurityGroupId params; UseCodeBuildVpc condition; VpcConfig on DockerBuildProject; ec2 ENI perms on DockerBuildRole for VPC placement - template.yaml: same 4 new params with NoEcho on UvIndexUrl; pass all new params through to nested PATTERNSTACK; UICodeBuildProject gets NpmRegistryUrl env var + npm registry injection in buildspec - scripts/setup-airgapped-codebuild.sh: fix comment typo --account-id -> --account (match usage message) - scripts/deploy-vpc-endpoints.py: add ecr.api, ecr.dkr, codebuild endpoints for CodeBuild VPC placement; add --codebuild-endpoints flag and --security-group-id override arg - scripts/vpc-endpoints.yaml: add CreateEcrApiEndpoint, CreateEcrDkrEndpoint, CreateCodeBuildEndpoint parameters, conditions, and Interface endpoint resources (default false) - docs/deployment-private-network.md: fix --account-id -> --account; add NpmRegistryUrl + VPC params to table; add CodeBuild VPC placement section with endpoint instructions --- Dockerfile.optimized | 11 ++- docs/deployment-private-network.md | 32 +++++++-- patterns/unified/template.yaml | 58 +++++++++++++++ scripts/deploy-vpc-endpoints.py | 67 ++++++++++++++---- scripts/setup-airgapped-codebuild.sh | 2 +- scripts/vpc-endpoints.yaml | 102 +++++++++++++++++++++++++++ template.yaml | 39 ++++++++++ 7 files changed, 291 insertions(+), 20 deletions(-) diff --git a/Dockerfile.optimized b/Dockerfile.optimized index c64e2c6a7..9d86edaaf 100644 --- a/Dockerfile.optimized +++ b/Dockerfile.optimized @@ -49,12 +49,17 @@ COPY ${FUNCTION_PATH}/requirements.txt* /build/ # Install all dependencies including idp_common_pkg in one step # Using mount from uv stage instead of COPY to avoid layer bloat +# When UV_INDEX_URL is set (air-gapped), pass --index-url explicitly rather than relying +# on the env var alone — this ensures the flag is always evaluated even when the uv +# cache layer was previously built against pypi.org. RUN --mount=from=uv,source=/uv,target=/bin/uv \ --mount=type=cache,target=/root/.cache/uv \ if [ -f /build/requirements.txt ]; then \ - sed 's|^\.\./\.\.\(/\.\.\)\?/lib/idp_common_pkg|/tmp/idp_common_pkg|' /build/requirements.txt > /tmp/requirements.txt && \ - uv pip install --python python3.12 --target "${LAMBDA_TASK_ROOT}" -r /tmp/requirements.txt && \ - rm /tmp/requirements.txt; \ + sed 's|^\.\./\.\.\(/\.\.\)\?/lib/idp_common_pkg|/tmp/idp_common_pkg|' /build/requirements.txt > /tmp/requirements.txt && \ + INDEX_ARG="" && \ + if [ -n "$UV_INDEX_URL" ]; then INDEX_ARG="--index-url $UV_INDEX_URL"; fi && \ + uv pip install --python python3.12 --target "${LAMBDA_TASK_ROOT}" $INDEX_ARG -r /tmp/requirements.txt && \ + rm /tmp/requirements.txt; \ fi && \ rm -rf /tmp/idp_common_pkg diff --git a/docs/deployment-private-network.md b/docs/deployment-private-network.md index 8cea308bf..43f829407 100644 --- a/docs/deployment-private-network.md +++ b/docs/deployment-private-network.md @@ -147,7 +147,7 @@ Run the provided helper script — it pulls both images, re-tags them into the d ```bash ./scripts/setup-airgapped-codebuild.sh \ --region \ - --account-id + --account ``` Example output: @@ -191,7 +191,27 @@ If the images are stored in Artifactory rather than ECR, you need to provide cre > **Security note**: credentials are fetched at build time via the IAM role — they are never stored in CloudFormation parameters or environment variable logs. -> **See also**: [Artifactory Dependency Workaround](./artifactory-dependency-workaround.md) for a comprehensive guide covering all four dependency resolution options. +#### CodeBuild VPC Placement (optional, for fully air-gapped subnets) + +If CodeBuild's subnets have no internet access at all (no NAT Gateway), you can place CodeBuild inside your VPC using the `CodeBuildVpcId`, `CodeBuildSubnetIds`, and `CodeBuildSecurityGroupId` parameters. When these are set: + +- CodeBuild runs entirely within your VPC with no public internet access +- The subnets need VPC Interface Endpoints for: `ecr.api`, `ecr.dkr`, `codebuild`, `logs`, `secretsmanager` — plus a free S3 **Gateway** endpoint +- Run the VPC endpoint deployment script with the `--codebuild-endpoints` flag to create them: + +```bash +python scripts/deploy-vpc-endpoints.py \ + --vpc-id \ + --stack-name IDP-PRIVATE \ + --security-group-id \ + --subnet-ids , \ + --codebuild-endpoints \ + --region +``` + +> **Note:** `CodeBuildVpcId` can be the same VPC as `ALBVpcId`, but `CodeBuildSubnetIds` should be private subnets that have the required VPC endpoints. These can be different from `LambdaSubnetIds`. + +> **See also**: [Artifactory Dependency Workaround](./artifactory-dependency-workaround.md) for a comprehensive guide covering all dependency resolution options. --- @@ -223,9 +243,13 @@ idp-cli deploy \ | `DocumentKnowledgeBase` | `DISABLED` | Disable Knowledge Base (avoids extra VPC endpoints) | | `UvImage` | ECR URI | *(Air-gapped only)* Internal ECR URI for the `uv` build tool image. Replaces `ghcr.io/astral-sh/uv:0.9.6`. | | `LambdaBaseImage` | ECR URI | *(Air-gapped only)* Internal ECR URI for the Lambda Python base image. Replaces `public.ecr.aws/lambda/python:3.12-arm64`. | -| `UvIndexUrl` | HTTPS URL | *(Air-gapped only)* Internal PyPI index URL (e.g. Artifactory). Replaces `pypi.org` for Lambda dependency installs. | +| `UvIndexUrl` | HTTPS URL | *(Air-gapped only)* Internal PyPI index URL (e.g. Artifactory). Replaces `pypi.org` for Lambda dependency installs. May contain auth credentials — stored with `NoEcho`. | +| `NpmRegistryUrl` | HTTPS URL | *(Air-gapped only)* Internal npm registry URL (e.g. Artifactory). Replaces `registry.npmjs.org` for the Web UI build. | | `ArtifactoryDockerUrl` | hostname | *(Air-gapped only)* Artifactory Docker registry hostname. Required when images are stored in Artifactory instead of ECR. | | `ArtifactoryCredentialsSecretArn` | secret ARN | *(Air-gapped only)* Secrets Manager secret ARN with Artifactory credentials. Required with `ArtifactoryDockerUrl`. | +| `CodeBuildVpcId` | VPC ID | *(Air-gapped only)* VPC ID to place CodeBuild in for network isolation. When set, both `DockerBuildProject` and `UICodeBuildProject` run inside this VPC. Requires `CodeBuildSubnetIds` and `CodeBuildSecurityGroupId`. | +| `CodeBuildSubnetIds` | subnet IDs | *(Air-gapped only)* Comma-separated private subnet IDs for CodeBuild VPC placement. Required with `CodeBuildVpcId`. | +| `CodeBuildSecurityGroupId` | SG ID | *(Air-gapped only)* Security group ID for CodeBuild VPC placement. Must allow outbound HTTPS (443) to VPC endpoints. Required with `CodeBuildVpcId`. | > **`AppSyncVisibility` is immutable** — it cannot be changed after the stack is created. To switch between GLOBAL and PRIVATE, delete and recreate the stack. ### Enterprise: deploying with a KMS-encrypted artifact bucket @@ -449,7 +473,7 @@ When `WebUIHosting=ALB` and `AppSyncVisibility=PRIVATE`, the following are handl | **`npm error engine Unsupported engine`** | Node.js 22.12+ required. `brew install node@22 && export PATH="/opt/homebrew/opt/node@22/bin:$PATH"` | | **Stack fails with `conflicting DNS domain`** | A VPC endpoint already exists for that service. Re-run `check-vpc-endpoints.sh` — it will detect this and set the right `Create*=false` flags. | | **UI loads but shows "network error"** | AppSync API is PRIVATE. From outside the VPC you need an SSM tunnel + `/etc/hosts` entry. From inside VPN/VPC it works automatically. | -| **CodeBuild fails: `pull access denied` / `name unknown` on image pull** | CodeBuild cannot reach public container registries (`ghcr.io`, `public.ecr.aws`). Run `scripts/setup-airgapped-codebuild.sh` to mirror images to ECR, then pass `UvImage=` and `LambdaBaseImage=` to the deploy command. See [Artifactory Dependency Workaround](./artifactory-dependency-workaround.md). | +| **CodeBuild fails: `pull access denied` / `name unknown` on image pull** | CodeBuild cannot reach public container registries (`ghcr.io`, `public.ecr.aws`). Run `scripts/setup-airgapped-codebuild.sh --region --account ` to mirror images to ECR, then pass `UvImage=` and `LambdaBaseImage=` to the deploy command. See [Artifactory Dependency Workaround](./artifactory-dependency-workaround.md). | | **CodeBuild fails: `AccessDenied: kms:Decrypt`** | The artifact bucket is KMS-encrypted but `ArtifactsBucketKmsKeyArn` was not passed to the deploy command. Redeploy with `--parameters "...ArtifactsBucketKmsKeyArn="`. | | **`UpdateDefaultConfig` custom resource fails with `NoSuchKey`** | Same root cause as above — `ConfigurationCopyFunction` silently skipped copying config files due to missing `kms:Decrypt`. Pass `ArtifactsBucketKmsKeyArn` and redeploy. | | **OCR Lambda times out / Textract calls hang** | Missing `textract` VPC endpoint. Lambda can't reach `com.amazonaws..textract` — security group only allows port 443 outbound to VPC endpoints. Run `deploy-vpc-endpoints.py` to add the missing endpoint. | diff --git a/patterns/unified/template.yaml b/patterns/unified/template.yaml index 1e6d65c1c..91db9201f 100644 --- a/patterns/unified/template.yaml +++ b/patterns/unified/template.yaml @@ -243,6 +243,39 @@ Parameters: --secret-string '{"username":"svc-build","password":""}' AllowedPattern: "^(|arn:aws[a-z-]*:secretsmanager:[a-z0-9-]+:[0-9]{12}:secret:.+)$" + NpmRegistryUrl: + Type: String + Default: "" + Description: >- + (Optional — air-gapped) Internal npm registry URL for any npm install steps in CodeBuild. + Replaces registry.npmjs.org when CodeBuild cannot reach the public internet. + Example: https://artifactory.company.com/artifactory/api/npm/npm-virtual/ + + CodeBuildVpcId: + Type: String + Default: "" + Description: >- + (Optional — air-gapped) VPC ID to place CodeBuild in for network isolation. + When set, CodeBuild runs inside this VPC and cannot reach the public internet directly. + Requires CodeBuildSubnetIds and CodeBuildSecurityGroupId. + The subnets must have access to ECR, S3, Secrets Manager, Logs, and CodeBuild via + VPC endpoints — run scripts/deploy-vpc-endpoints.py --codebuild-endpoints to create them. + + CodeBuildSubnetIds: + Type: CommaDelimitedList + Default: "" + Description: >- + (Optional — air-gapped) Comma-separated subnet IDs for CodeBuild VPC placement. + Required when CodeBuildVpcId is set. Use private subnets that have VPC endpoints + for ECR (ecr.api, ecr.dkr), S3 (Gateway), Secrets Manager, Logs, and CodeBuild. + + CodeBuildSecurityGroupId: + Type: String + Default: "" + Description: >- + (Optional — air-gapped) Security group ID for CodeBuild VPC placement. + Required when CodeBuildVpcId is set. Must allow outbound HTTPS (443) to VPC endpoints. + Conditions: HasGuardrailConfig: !And [ @@ -265,6 +298,7 @@ Conditions: HasUvIndexUrl: !Not [!Equals [!Ref UvIndexUrl, ""]] HasArtifactoryDockerUrl: !Not [!Equals [!Ref ArtifactoryDockerUrl, ""]] HasArtifactoryCredentials: !Not [!Equals [!Ref ArtifactoryCredentialsSecretArn, ""]] + UseCodeBuildVpc: !Not [!Equals [!Ref CodeBuildVpcId, ""]] Resources: @@ -2220,6 +2254,21 @@ Resources: - secretsmanager:GetSecretValue Resource: !Ref ArtifactoryCredentialsSecretArn - !Ref AWS::NoValue + # Required by CodeBuild when placed in a VPC (CodeBuildVpcId is set). + # CodeBuild needs to create/describe/delete ENIs to attach to the VPC. + - !If + - UseCodeBuildVpc + - Effect: Allow + Action: + - ec2:CreateNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DeleteNetworkInterface + - ec2:DescribeSubnets + - ec2:DescribeSecurityGroups + - ec2:DescribeVpcs + - ec2:CreateNetworkInterfacePermission + Resource: "*" + - !Ref AWS::NoValue DockerBuildProject: Type: AWS::CodeBuild::Project @@ -2267,6 +2316,15 @@ Resources: Value: !If [HasArtifactoryDockerUrl, !Ref ArtifactoryDockerUrl, ""] - Name: ARTIFACTORY_CREDENTIALS_SECRET_ARN Value: !If [HasArtifactoryCredentials, !Ref ArtifactoryCredentialsSecretArn, ""] + - Name: NPM_REGISTRY_URL + Value: !Ref NpmRegistryUrl + VpcConfig: !If + - UseCodeBuildVpc + - VpcId: !Ref CodeBuildVpcId + Subnets: !Ref CodeBuildSubnetIds + SecurityGroupIds: + - !Ref CodeBuildSecurityGroupId + - !Ref AWS::NoValue TimeoutInMinutes: 90 CodeBuildExecutionRole: diff --git a/scripts/deploy-vpc-endpoints.py b/scripts/deploy-vpc-endpoints.py index 638e1f41b..864cbc717 100644 --- a/scripts/deploy-vpc-endpoints.py +++ b/scripts/deploy-vpc-endpoints.py @@ -31,7 +31,7 @@ import boto3 from botocore.exceptions import ClientError, NoCredentialsError -# The 14 Interface endpoint services IDP requires +# The core Interface endpoint services IDP requires (Lambda + AppSync deployment) # Maps CFN parameter name → AWS service suffix REQUIRED_ENDPOINTS = { "CreateAppSyncApiEndpoint": "appsync-api", @@ -54,6 +54,16 @@ "CreateStsEndpoint": "sts", } +# Additional endpoints required when CodeBuild is placed in a VPC (CodeBuildVpcId is set). +# These default to "false" in vpc-endpoints.yaml and must be explicitly enabled. +CODEBUILD_ENDPOINTS = { + "CreateEcrApiEndpoint": "ecr.api", # docker pull metadata / image manifests + "CreateEcrDkrEndpoint": "ecr.dkr", # docker image layer downloads from ECR + "CreateCodeBuildEndpoint": "codebuild", # CodeBuild agent ↔ service communication + # NOTE: S3 access for ECR layers and build artifacts uses a Gateway endpoint (free). + # Set --route-table-ids when running this script to auto-create the S3 Gateway endpoint. +} + def parse_args(): parser = argparse.ArgumentParser( @@ -71,6 +81,23 @@ def parse_args(): "--subnet-ids", help="Comma-separated subnet IDs (auto-read from IDP stack if omitted)", ) + parser.add_argument( + "--security-group-id", + help=( + "Security group ID to attach to VPC endpoints (auto-read from IDP stack output " + "'LambdaVpcSecurityGroupId' if omitted). Required when AppSyncVisibility=GLOBAL " + "or when using CodeBuild VPC endpoints without private AppSync." + ), + ) + parser.add_argument( + "--codebuild-endpoints", + action="store_true", + help=( + "Also create CodeBuild VPC endpoints (ecr.api, ecr.dkr, codebuild). " + "Required when CodeBuildVpcId is set and CodeBuild cannot reach the internet. " + "Ensure --route-table-ids is also set to create the required S3 Gateway endpoint." + ), + ) parser.add_argument("--region", default=None, help="AWS region (default: from AWS config)") parser.add_argument("--profile", default=None, help="AWS CLI profile name") parser.add_argument( @@ -166,20 +193,24 @@ def main(): print("❌ No AWS credentials found. Configure with 'aws configure' or set environment variables.") sys.exit(1) - # ── Read Lambda SG from IDP stack ──────────────────────────────────── + # ── Read Lambda SG from IDP stack (or use --security-group-id override) ─ print(f"🔍 Reading IDP stack outputs from: {args.stack_name}") - try: - lambda_sg = get_stack_output(cf, args.stack_name, "LambdaVpcSecurityGroupId") - except ClientError as e: - print(f"❌ Could not read stack '{args.stack_name}': {e}") - print(" Make sure the stack is CREATE_COMPLETE and AppSyncVisibility=PRIVATE.") - sys.exit(1) + lambda_sg = args.security_group_id + if not lambda_sg: + try: + lambda_sg = get_stack_output(cf, args.stack_name, "LambdaVpcSecurityGroupId") + except ClientError as e: + print(f"❌ Could not read stack '{args.stack_name}': {e}") + print(" Make sure the stack is CREATE_COMPLETE and the stack name is correct.") + sys.exit(1) if not lambda_sg: - print(f"❌ Output 'LambdaVpcSecurityGroupId' not found in stack '{args.stack_name}'.") - print(" Make sure AppSyncVisibility=PRIVATE was set when the stack was created.") + print(f"❌ Could not determine security group ID.") + print(" Either:") + print(" • Set AppSyncVisibility=PRIVATE when deploying the IDP stack, OR") + print(" • Pass --security-group-id explicitly") sys.exit(1) - print(f" Lambda SG: {lambda_sg}") + print(f" Security Group: {lambda_sg}") # ── Read subnet IDs ─────────────────────────────────────────────────── subnet_ids = args.subnet_ids @@ -191,6 +222,12 @@ def main(): sys.exit(1) print(f" Subnets: {subnet_ids}") + # ── Build endpoint map (core + optional CodeBuild endpoints) ───────── + endpoints_to_check = dict(REQUIRED_ENDPOINTS) + if args.codebuild_endpoints: + print(" CodeBuild VPC endpoints enabled (--codebuild-endpoints flag set)") + endpoints_to_check.update(CODEBUILD_ENDPOINTS) + # ── Check each endpoint ─────────────────────────────────────────────── print(f"\n🔍 Checking existing VPC endpoints in {args.vpc_id} (region: {region})...\n") @@ -198,7 +235,7 @@ def main(): create_list = [] # service suffixes to create skip_list = [] # service suffixes to skip - for param, service in sorted(REQUIRED_ENDPOINTS.items()): + for param, service in sorted(endpoints_to_check.items()): full = f"com.amazonaws.{region}.{service}" if endpoint_exists(ec2, args.vpc_id, region, service): print(f" ✅ {full:<50} already exists — will skip") @@ -208,6 +245,12 @@ def main(): print(f" ➕ {full:<50} missing — will create") create_list.append(service) + # For CodeBuild endpoints not in the check list, explicitly set them to "false" + # so the CFN template skips creating them (they default to "false" but be explicit) + if not args.codebuild_endpoints: + for param in CODEBUILD_ENDPOINTS: + skip_params[param] = "false" + print(f"\n📊 Summary: {len(create_list)} to create, {len(skip_list)} already exist") if not create_list: diff --git a/scripts/setup-airgapped-codebuild.sh b/scripts/setup-airgapped-codebuild.sh index 038f4c3c2..6c167d004 100755 --- a/scripts/setup-airgapped-codebuild.sh +++ b/scripts/setup-airgapped-codebuild.sh @@ -17,7 +17,7 @@ # Usage: # bash scripts/setup-airgapped-codebuild.sh \ # --region \ -# --account \ +# --account \ # [--repo-name ] # default: idp-base-images # [--pypi-url ] # optional: your internal PyPI URL # diff --git a/scripts/vpc-endpoints.yaml b/scripts/vpc-endpoints.yaml index 7898688e7..72d70b49e 100644 --- a/scripts/vpc-endpoints.yaml +++ b/scripts/vpc-endpoints.yaml @@ -57,6 +57,13 @@ Parameters: # (If you are accessing the UI via VPN/Direct Connect in production, # you do NOT need ssmmessages or ec2messages.) # + # REQUIRED only when CodeBuild is placed in a VPC (CodeBuildVpcId is set): + # ecr.api — ECR API endpoint (docker pull metadata / image manifest) + # ecr.dkr — ECR DKR endpoint (docker layer downloads) + # codebuild — CodeBuild service endpoint (build agent ↔ service communication) + # NOTE: S3 access for ECR layer data and build artifacts requires a free S3 Gateway + # endpoint — set RouteTableIds to automatically create it. + # # Set any flag to "false" if the endpoint already exists in the VPC. # Use scripts/check-vpc-endpoints.sh to auto-detect and generate the correct deploy command. # ────────────────────────────────────────────────────────── @@ -162,6 +169,37 @@ Parameters: that call STS AssumeRole (bda/bda_service.py, bda/blueprint_optimizer.py). Set to "false" if already exists. + # ── CodeBuild VPC endpoints (only needed when CodeBuildVpcId is set) ────── + CreateEcrApiEndpoint: + Type: String + Default: "false" + AllowedValues: ["true", "false"] + Description: >- + Create the ecr.api Interface endpoint. Required when CodeBuild is placed in a + VPC (CodeBuildVpcId is set) and needs to pull base images from ECR. + Handles docker pull metadata and image manifests. + Set to "false" if already exists or CodeBuild is not in a VPC. + + CreateEcrDkrEndpoint: + Type: String + Default: "false" + AllowedValues: ["true", "false"] + Description: >- + Create the ecr.dkr Interface endpoint. Required when CodeBuild is placed in a + VPC (CodeBuildVpcId is set) and needs to pull base images from ECR. + Handles docker image layer downloads. Must be used together with ecr.api endpoint. + Set to "false" if already exists or CodeBuild is not in a VPC. + + CreateCodeBuildEndpoint: + Type: String + Default: "false" + AllowedValues: ["true", "false"] + Description: >- + Create the codebuild Interface endpoint. Required when CodeBuild is placed in a + VPC (CodeBuildVpcId is set). Allows the CodeBuild agent to communicate with the + CodeBuild service without internet access. + Set to "false" if already exists or CodeBuild is not in a VPC. + Conditions: CreateGatewayEndpoints: !Not - !Equals [ !Join [ "", !Ref RouteTableIds ], "" ] @@ -181,6 +219,9 @@ Conditions: ShouldCreateAthenaEndpoint: !Equals [ !Ref CreateAthenaEndpoint, "true" ] ShouldCreateTextractEndpoint: !Equals [ !Ref CreateTextractEndpoint, "true" ] ShouldCreateStsEndpoint: !Equals [ !Ref CreateStsEndpoint, "true" ] + ShouldCreateEcrApiEndpoint: !Equals [ !Ref CreateEcrApiEndpoint, "true" ] + ShouldCreateEcrDkrEndpoint: !Equals [ !Ref CreateEcrDkrEndpoint, "true" ] + ShouldCreateCodeBuildEndpoint: !Equals [ !Ref CreateCodeBuildEndpoint, "true" ] Resources: @@ -522,6 +563,67 @@ Resources: - Key: Environment Value: !Ref EnvironmentTag + ########################################################################## + # CodeBuild VPC Endpoints (only needed when CodeBuildVpcId is set) + ########################################################################## + + EcrApiVpcEndpoint: + Type: AWS::EC2::VPCEndpoint + Condition: ShouldCreateEcrApiEndpoint + Properties: + VpcId: !Ref VpcId + ServiceName: !Sub "com.amazonaws.${AWS::Region}.ecr.api" + VpcEndpointType: Interface + SubnetIds: !Ref SubnetIds + SecurityGroupIds: + - !Ref VpcEndpointSecurityGroup + PrivateDnsEnabled: true + Tags: + - Key: Name + Value: !Sub "${IDPStackName}-ecr-api" + - Key: IDPStack + Value: !Ref IDPStackName + - Key: Environment + Value: !Ref EnvironmentTag + + EcrDkrVpcEndpoint: + Type: AWS::EC2::VPCEndpoint + Condition: ShouldCreateEcrDkrEndpoint + Properties: + VpcId: !Ref VpcId + ServiceName: !Sub "com.amazonaws.${AWS::Region}.ecr.dkr" + VpcEndpointType: Interface + SubnetIds: !Ref SubnetIds + SecurityGroupIds: + - !Ref VpcEndpointSecurityGroup + PrivateDnsEnabled: true + Tags: + - Key: Name + Value: !Sub "${IDPStackName}-ecr-dkr" + - Key: IDPStack + Value: !Ref IDPStackName + - Key: Environment + Value: !Ref EnvironmentTag + + CodeBuildVpcEndpoint: + Type: AWS::EC2::VPCEndpoint + Condition: ShouldCreateCodeBuildEndpoint + Properties: + VpcId: !Ref VpcId + ServiceName: !Sub "com.amazonaws.${AWS::Region}.codebuild" + VpcEndpointType: Interface + SubnetIds: !Ref SubnetIds + SecurityGroupIds: + - !Ref VpcEndpointSecurityGroup + PrivateDnsEnabled: true + Tags: + - Key: Name + Value: !Sub "${IDPStackName}-codebuild" + - Key: IDPStack + Value: !Ref IDPStackName + - Key: Environment + Value: !Ref EnvironmentTag + ########################################################################## # Gateway Endpoints (free — only created when RouteTableIds are provided) ########################################################################## diff --git a/template.yaml b/template.yaml index 2edf0eb03..fc8390ff1 100644 --- a/template.yaml +++ b/template.yaml @@ -579,9 +579,11 @@ Parameters: UvIndexUrl: Type: String Default: "" + NoEcho: true Description: >- (Optional — air-gapped) Internal PyPI index URL for uv to install Lambda requirements.txt packages. Replaces pypi.org when CodeBuild cannot reach the public internet. + May contain authentication credentials in the URL — stored with NoEcho. Example: https://artifactory.company.com/artifactory/api/pypi/pypi-virtual/simple/ ArtifactoryDockerUrl: @@ -604,6 +606,39 @@ Parameters: --secret-string '{"username":"svc-build","password":""}' AllowedPattern: "^(|arn:aws[a-z-]*:secretsmanager:[a-z0-9-]+:[0-9]{12}:secret:.+)$" + NpmRegistryUrl: + Type: String + Default: "" + Description: >- + (Optional — air-gapped) Internal npm registry URL for the Web UI CodeBuild build. + Replaces registry.npmjs.org when CodeBuild cannot reach the public internet. + Example: https://artifactory.company.com/artifactory/api/npm/npm-virtual/ + + CodeBuildVpcId: + Type: String + Default: "" + Description: >- + (Optional — air-gapped) VPC ID to place CodeBuild in for network isolation. + When set, both DockerBuildProject and UICodeBuildProject run inside this VPC. + Requires CodeBuildSubnetIds and CodeBuildSecurityGroupId. + The subnets must have VPC endpoints for ECR (ecr.api, ecr.dkr), S3 (Gateway), + Secrets Manager, Logs, and CodeBuild — run scripts/deploy-vpc-endpoints.py + --codebuild-endpoints to create them. + + CodeBuildSubnetIds: + Type: CommaDelimitedList + Default: "" + Description: >- + (Optional — air-gapped) Comma-separated subnet IDs for CodeBuild VPC placement. + Required when CodeBuildVpcId is set. + + CodeBuildSecurityGroupId: + Type: String + Default: "" + Description: >- + (Optional — air-gapped) Security group ID for CodeBuild VPC placement. + Required when CodeBuildVpcId is set. Must allow outbound HTTPS (443) to VPC endpoints. + Rules: MLflowTrackingURIRequired: RuleCondition: !Equals [ !Ref EnableMLflow, "true" ] @@ -1582,6 +1617,10 @@ Resources: UvIndexUrl: !Ref UvIndexUrl ArtifactoryDockerUrl: !Ref ArtifactoryDockerUrl ArtifactoryCredentialsSecretArn: !Ref ArtifactoryCredentialsSecretArn + NpmRegistryUrl: !Ref NpmRegistryUrl + CodeBuildVpcId: !Ref CodeBuildVpcId + CodeBuildSubnetIds: !Ref CodeBuildSubnetIds + CodeBuildSecurityGroupId: !Ref CodeBuildSecurityGroupId ########################################################################## # Encryption key From 3ef67882dba6e71bba1a508961b36f4cb4b19948 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CSirirat?= <“siriratk@amazon.com”> Date: Mon, 20 Apr 2026 17:55:07 -0400 Subject: [PATCH 8/9] docs: remove artifactory-dependency-workaround.md from this branch Moving to a separate PR to keep this PR focused on the air-gapped CodeBuild infrastructure changes. The workaround guide is valuable but better reviewed independently. --- docs/artifactory-dependency-workaround.md | 745 ---------------------- 1 file changed, 745 deletions(-) delete mode 100644 docs/artifactory-dependency-workaround.md diff --git a/docs/artifactory-dependency-workaround.md b/docs/artifactory-dependency-workaround.md deleted file mode 100644 index 2a16c3a7a..000000000 --- a/docs/artifactory-dependency-workaround.md +++ /dev/null @@ -1,745 +0,0 @@ ---- -title: "Artifactory Dependency Workaround Guide" ---- - -# Artifactory Dependency Workaround Guide - -**Document Purpose:** This guide describes how to resolve dependency download failures when Artifactory is used as the only package registry and public registries (PyPI, npm, Docker Hub, GitHub Container Registry) are blocked. - ---- - -## Table of Contents - -1. [Overview & Root Cause](#1-overview--root-cause) -2. [Complete Dependency Inventory](#2-complete-dependency-inventory) -3. [Option 1 — Configure Artifactory as a Remote Proxy (Recommended)](#3-option-1--configure-artifactory-as-a-remote-proxy-recommended) -4. [Option 2 — Bridge Machine: Download & Upload Missing Packages](#4-option-2--bridge-machine-download--upload-missing-packages) -5. [Option 5 — Automated Manifest Generation (Recommended Pre-Step for Option 2)](#5-option-5--automated-manifest-generation-recommended-pre-step-for-option-2) -6. [Option 3 — Vendor Dependencies into the Repository (Fully Air-Gapped)](#6-option-3--vendor-dependencies-into-the-repository-fully-air-gapped) -7. [Option 4 — Point Build Tools to Artifactory Explicitly](#7-option-4--point-build-tools-to-artifactory-explicitly) -8. [Decision Guide](#8-decision-guide) -9. [Quick Reference: Copy-Paste Commands](#9-quick-reference-copy-paste-commands) - ---- - -## 1. Overview & Root Cause - -### What Is Happening - -This project downloads dependencies from public registries during builds: - -| Tool | Registry | Used for | -|------|----------|----------| -| `pip` / `uv` | `https://pypi.org/simple/` | Python packages | -| `npm` | `https://registry.npmjs.org/` | JavaScript / UI packages | -| `docker pull` | `ghcr.io` (GitHub Container Registry) | `uv` build tool image | -| `docker pull` | `public.ecr.aws` | AWS Lambda Python base image | - -When **Artifactory is the only allowed registry** and packages are not in its cache, installations fail with errors such as: - -``` -ERROR: Could not find a version that satisfies the requirement strands-agents==1.14.0 -ERROR: No matching distribution found for bedrock-agentcore>=0.1.1 -npm ERR! code E404 - Not Found -``` - -### Why Packages Are Missing from Artifactory - -Artifactory caches packages **on first request**. Packages that have never been requested, or were added after the last cache refresh, will be missing. The solution is one of: - -- Enable Artifactory to proxy public registries (preferred) -- Manually upload the missing packages to Artifactory -- Vendor the packages directly in the repository - ---- - -## 2. Complete Dependency Inventory - -### 2.1 Python Dependencies - -#### Core Library (`lib/idp_common_pkg/pyproject.toml`) - -``` -boto3==1.42.0 -jsonschema>=4.25.1 -pydantic>=2.12.0 -deepdiff>=6.0.0 -PyYAML>=6.0.0 -Pillow==12.1.1 -pypdfium2>=5.5.0 -amazon-textract-textractor[pandas]==1.9.2 -numpy==1.26.4 -pandas==2.2.3 -openpyxl==3.1.5 -python-docx==1.2.0 -strands-agents==1.14.0 -strands-agents-tools==0.2.22 -bedrock-agentcore>=0.1.1 -stickler-eval==0.1.4 -genson==1.3.0 -munkres>=1.1.4 -requests==2.33.0 -pyarrow==20.0.0 -aws-lambda-powertools>=3.2.0 -jsonpatch==1.33 -email-validator>=2.3.0 -tabulate>=0.9.0 -datamodel-code-generator>=0.25.0 -mypy-boto3-bedrock-runtime>=1.39.0 -ruamel-yaml>=0.17.0,<0.19.0 -aws-xray-sdk>=2.14.0 -genson==1.3.0 -``` - -#### Lambda Function Dependencies (`src/lambda/*/requirements.txt`) - -``` -huggingface-hub==0.20.0 -cfnresponse -crhelper~=2.0.10 -aws-requests-auth==0.4.3 -bedrock_agentcore_starter_toolkit -urllib3>=1.26.0 -pypdf>=4.0.0 -``` - -#### Development / Test Dependencies - -``` -pytest>=7.4.0 -pytest-cov>=4.1.0 -pytest-xdist>=3.3.1 -pytest-asyncio>=1.1.0 -pytest-mock>=3.11.1 -moto[s3]==5.1.8 -ruff>=0.14.0 -typer>=0.19.2 -rich>=13.0.0 -cfn-lint -basedpyright -build==1.3.0 -python-dotenv>=1.1.0 -``` - -### 2.2 Node.js (npm) Dependencies - -Located in `src/ui/package.json` and `docs-site/package.json`. - -**Key packages include:** -- `react`, `react-dom` -- `@aws-amplify/ui-react` -- `@aws-appsync/gql` -- AWS AppSync codegen libraries -- `astro` (docs site) - -To get the full list: -```bash -cat src/ui/package.json | jq '.dependencies, .devDependencies' -cat docs-site/package.json | jq '.dependencies, .devDependencies' -``` - -### 2.3 Docker Base Images - -| Image | Registry | Purpose | -|-------|----------|---------| -| `ghcr.io/astral-sh/uv:0.9.6` | GitHub Container Registry | `uv` Python package installer (multi-stage build) | -| `public.ecr.aws/lambda/python:3.12-arm64` | AWS Public ECR | Lambda function runtime base image | - ---- - -## 3. Option 1 — Configure Artifactory as a Remote Proxy *(Recommended)* - -**Best for:** Long-term fix. All future installs work transparently. No code changes required. - -**Who performs this:** Your Artifactory administrator. - -### Steps for Artifactory Admin - -#### A. Add PyPI Remote Repository - -1. Log into Artifactory → **Administration** → **Repositories** → **Remote** -2. Click **New Remote Repository** -3. Set: - - **Package Type:** `PyPI` - - **Repository Key:** `pypi-remote` (or any name) - - **URL:** `https://pypi.org/` -4. Save - -#### B. Add npm Remote Repository - -1. Click **New Remote Repository** -2. Set: - - **Package Type:** `npm` - - **Repository Key:** `npm-remote` - - **URL:** `https://registry.npmjs.org` -3. Save - -#### C. Add Docker Remote Repositories - -For `ghcr.io` (GitHub Container Registry): -1. Click **New Remote Repository** -2. Set: - - **Package Type:** `Docker` - - **Repository Key:** `ghcr-remote` - - **URL:** `https://ghcr.io` -3. Save - -For AWS Public ECR (`public.ecr.aws`): -1. Click **New Remote Repository** -2. Set: - - **Package Type:** `Docker` - - **Repository Key:** `ecr-public-remote` - - **URL:** `https://public.ecr.aws` -3. Save - -#### D. Create Virtual Repositories (Aggregate local + remote) - -Create virtual repositories that front your local + remote repos for seamless access: -- `pypi-virtual` → includes `pypi-local` + `pypi-remote` -- `npm-virtual` → includes `npm-local` + `npm-remote` -- `docker-virtual` → includes `docker-local` + `ghcr-remote` + `ecr-public-remote` - -### Developer Configuration (after admin sets up proxy) - -```bash -# Set pip to use Artifactory -export PIP_INDEX_URL=https://your-artifactory.company.com/artifactory/api/pypi/pypi-virtual/simple/ -export PIP_TRUSTED_HOST=your-artifactory.company.com - -# Set uv to use Artifactory -export UV_INDEX_URL=https://your-artifactory.company.com/artifactory/api/pypi/pypi-virtual/simple/ - -# Set npm to use Artifactory -npm config set registry https://your-artifactory.company.com/artifactory/api/npm/npm-virtual/ - -# Then run setup as normal -make setup-venv -``` - ---- - -## 4. Option 2 — Bridge Machine: Download & Upload Missing Packages - -**Best for:** When you cannot change Artifactory config but have a machine that can reach the internet AND Artifactory. - -```mermaid -flowchart LR - Internet[Public Internet\nPyPI / npm / Docker Hub] - Bridge[Bridge Machine\ninternet + Artifactory access] - AF[Artifactory\ninternal only] - Dev[Developer Machine\nArtifactory only] - - Internet -->|1. Download packages| Bridge - Bridge -->|2. Upload to Artifactory| AF - AF -->|3. Install packages| Dev -``` - -> **Tip:** Use [Option 5 — Automated Manifest Generation](#5-option-5--automated-manifest-generation-recommended-pre-step-for-option-2) first to get a complete, fully-resolved list of all packages (including transitive dependencies) before running the bridge machine download steps below. This avoids the common mistake of only downloading direct dependencies and missing transitive ones. - -### Python Packages - -**On the bridge machine (internet access):** - -```bash -# If you have run Option 5, use the generated manifest directly: -mkdir -p ./wheel-cache -while IFS= read -r pkg; do - pip download -d ./wheel-cache "$pkg" 2>/dev/null || echo "WARN: $pkg not downloaded" -done < deps/python/master/manifest.txt - -# Alternatively, download specific packages manually: -# For Linux ARM64 (used by Lambda container images) -pip download \ - --platform manylinux2014_aarch64 \ - --python-version 312 \ - --only-binary=:all: \ - -d ./wheel-cache \ - "boto3==1.42.0" \ - "strands-agents==1.14.0" \ - "strands-agents-tools==0.2.22" \ - "bedrock-agentcore>=0.1.1" \ - "stickler-eval==0.1.4" \ - "Pillow==12.1.1" \ - "pypdfium2>=5.5.0" \ - "pyarrow==20.0.0" \ - "numpy==1.26.4" \ - "huggingface-hub==0.20.0" \ - "cfnresponse" \ - "crhelper~=2.0.10" \ - "aws-requests-auth==0.4.3" \ - "bedrock_agentcore_starter_toolkit" - -# Download remaining packages for local dev (your OS/arch) -pip download \ - -d ./wheel-cache-local \ - -e "lib/idp_common_pkg[all,dev,test]" \ - -e lib/idp_cli_pkg \ - -e lib/idp_sdk \ - -e lib/idp_mcp_connector_pkg -``` - -**Upload to Artifactory via REST API:** - -```bash -ARTIFACTORY_URL="https://your-artifactory.company.com/artifactory" -REPO="pypi-local" -AF_USER="your-username" -AF_PASSWORD="your-password-or-api-key" - -for whl in ./wheel-cache/*.whl ./wheel-cache/*.tar.gz; do - filename=$(basename "$whl") - echo "Uploading $filename ..." - curl -u "${AF_USER}:${AF_PASSWORD}" \ - -T "$whl" \ - "${ARTIFACTORY_URL}/${REPO}/${filename}" -done -``` - -**Or upload via Artifactory Web UI:** -1. Navigate to **Artifactory** → **Artifacts** -2. Select your `pypi-local` repository -3. Click **Deploy** → Upload `.whl` files from `./wheel-cache/` - -### Docker Images - -```bash -# Pull from public registries -docker pull ghcr.io/astral-sh/uv:0.9.6 -docker pull public.ecr.aws/lambda/python:3.12-arm64 - -# Re-tag for your Artifactory Docker registry -docker tag ghcr.io/astral-sh/uv:0.9.6 \ - your-artifactory.company.com/docker-local/astral-sh/uv:0.9.6 - -docker tag public.ecr.aws/lambda/python:3.12-arm64 \ - your-artifactory.company.com/docker-local/lambda/python:3.12-arm64 - -# Push to Artifactory -docker login your-artifactory.company.com -docker push your-artifactory.company.com/docker-local/astral-sh/uv:0.9.6 -docker push your-artifactory.company.com/docker-local/lambda/python:3.12-arm64 -``` - -Then update `Dockerfile.optimized` lines 1 and 6: -```dockerfile -# Line 1 - change FROM -FROM your-artifactory.company.com/docker-local/astral-sh/uv:0.9.6 AS uv - -# Line 6 - change FROM -FROM your-artifactory.company.com/docker-local/lambda/python:3.12-arm64 AS builder -``` - ---- - -## 5. Option 5 — Automated Manifest Generation *(Recommended Pre-Step for Option 2)* - -**Best for:** When you need to seed Artifactory with a complete, verified list of every resolved dependency — including all transitive dependencies — before executing the bridge machine workflow. Run this once on any internet-connected machine to produce authoritative manifests. - -**Why this matters:** The manual approach in Option 2 lists only *direct* top-level dependencies. In practice, `boto3` alone pulls in dozens of sub-packages, and multi-platform build tools (esbuild, sharp, rollup) each have 20+ platform-specific optional binaries. Manually enumerating these is error-prone and almost always incomplete. This script uses `uv lock` and `pnpm install --lockfile-only` to perform full dependency-graph resolution and capture every package at its locked version. - -```mermaid -flowchart LR - A[Internet-connected machine] -->|Run generate_lockfiles.py| B[Per-component lockfiles\ndeps/python/ and deps/node/] - B -->|Merge| C[Master manifests\ndeps/python/master/manifest.txt\ndeps/node/master/manifest.txt] - C -->|Input to| D[Option 2: Bridge Machine\nDownload & Upload workflow] - D --> E[Artifactory seeded\nBuilds work] -``` - -### Prerequisites - -```bash -# Install uv (Python resolver) -curl -LsSf https://astral.sh/uv/install.sh | sh - -# Install pnpm (Node resolver) -npm install -g pnpm -``` - -### Generate the Manifests - -```bash -# From the repo root on any internet-connected machine -python3 scripts/generate_lockfiles.py - -# Python only (skip Node resolution) -python3 scripts/generate_lockfiles.py --python-only - -# Node only (skip Python resolution) -python3 scripts/generate_lockfiles.py --node-only -``` - -This scans all `requirements.txt`, `pyproject.toml` (in `lib/`), and `package.json` files in the repo. It generates: - -| Output | Contents | -|--------|----------| -| `deps/python/master/manifest.txt` | Flat list of `name==version` for every resolved Python package (~295 packages) | -| `deps/node/master/manifest.txt` | Flat list of `name@version` for every resolved Node package (~1,524 packages) | -| `deps/python//uv.lock` | Per-component Python lockfiles for traceability | -| `deps/node//pnpm-lock.yaml` | Per-component Node lockfiles for traceability | - -### Verify Packages Are Accessible from Artifactory - -Before running a real build, use the verification scripts to confirm packages are reachable. Set the registry URL to your Artifactory endpoint: - -```bash -# Verify Python packages -UV_INDEX_URL="https://your-artifactory.company.com/artifactory/api/pypi/pypi-virtual/simple/" \ - python3 scripts/verify_python_packages.py - -# Results written to: -# deps/python/master/verify-passed.txt -# deps/python/master/verify-failed.txt - -# Verify Node packages -NODE_REGISTRY_URL="https://your-artifactory.company.com/artifactory/api/npm/npm-virtual/" \ - python3 scripts/verify_node_packages.py - -# Results written to: -# deps/node/master/verify-passed.txt -# deps/node/master/verify-failed.txt -``` - -### Use the Manifests with Option 2 (Bridge Machine) - -Instead of manually listing packages, use the manifests as input to the bridge machine download step: - -```bash -# On the bridge machine — download all Python packages from the manifest -mkdir -p ./wheel-cache -while IFS= read -r pkg; do - pip download -d ./wheel-cache "$pkg" 2>/dev/null || echo "WARN: $pkg not downloaded" -done < deps/python/master/manifest.txt - -# Upload all to Artifactory -ARTIFACTORY_URL="https://your-artifactory.company.com/artifactory" -REPO="pypi-local" -AF_CREDS="username:api-key" -for f in ./wheel-cache/*.whl ./wheel-cache/*.tar.gz; do - curl -u "$AF_CREDS" -T "$f" "${ARTIFACTORY_URL}/${REPO}/$(basename $f)" -done -``` - -For Node packages, provide `deps/node/master/manifest.txt` to your Artifactory admin for bulk npm import via the Artifactory web UI or REST API. - -### Known Caveats - -| Issue | Details | -|-------|---------| -| **Windows-only packages** | `pywin32` and `pywinpty` appear in the Python manifest because they are pulled in transitively by Jupyter. These are Windows-only and will fail `pip install` on Linux — skip them if your build agents are Linux-only. They are listed in `deps/python/master/verify-failed.txt`. | -| **Docker images not included** | The two Docker base images (`ghcr.io/astral-sh/uv:0.9.6` and `public.ecr.aws/lambda/python:3.12-arm64`) are **not** in the manifests. Handle these separately using the Docker section of Option 2. | -| **Multiple versions of same package** | The master manifest intentionally keeps all versions (e.g., `boto3==1.42.0` and `boto3==1.42.80`) because different Lambda functions pin to different versions. Import all of them. | -| **Re-run on dependency changes** | When any `requirements.txt` or `pyproject.toml` version is bumped, re-run the script to regenerate the manifests before the next Artifactory seeding operation. | - -### Keep Manifests Up to Date - -Add this to your workflow when updating dependencies: - -```bash -# After bumping any dependency version, regenerate manifests -python3 scripts/generate_lockfiles.py - -# Review what changed -git diff deps/python/master/manifest.txt deps/node/master/manifest.txt -``` - ---- - -## 6. Option 3 — Vendor Dependencies into the Repository (Fully Air-Gapped) - -**Best for:** Completely air-gapped environments with no internet access whatsoever. - -This involves downloading all packages **once** on an internet-connected machine and committing them to the repository, so no registry is needed at install time. - -### Setup (on a machine with internet access) - -```bash -# Create vendor directories -mkdir -p vendor/python vendor/npm - -# Download all Python wheels for local development -pip download \ - -d vendor/python \ - "boto3==1.42.0" \ - "jsonschema>=4.25.1" \ - "pydantic>=2.12.0" \ - "deepdiff>=6.0.0" \ - "PyYAML>=6.0.0" \ - "Pillow==12.1.1" \ - "pypdfium2>=5.5.0" \ - "strands-agents==1.14.0" \ - "strands-agents-tools==0.2.22" \ - "bedrock-agentcore>=0.1.1" \ - "stickler-eval==0.1.4" \ - "numpy==1.26.4" \ - "pandas==2.2.3" \ - "pyarrow==20.0.0" \ - "requests==2.33.0" \ - "huggingface-hub==0.20.0" \ - "cfnresponse" \ - "crhelper~=2.0.10" \ - "aws-requests-auth==0.4.3" \ - "pytest>=7.4.0" \ - "moto[s3]==5.1.8" \ - "ruff>=0.14.0" \ - "typer>=0.19.2" \ - "rich>=13.0.0" - -# Pack npm dependencies -cd src/ui && npm pack --pack-destination ../../vendor/npm -cd ../../docs-site && npm pack --pack-destination ../vendor/npm -cd .. -``` - -### Install from Vendor Directory (no network needed) - -```bash -# Python -pip install --no-index --find-links vendor/python \ - -e "lib/idp_common_pkg[all,dev,test]" \ - -e lib/idp_cli_pkg \ - -e lib/idp_sdk \ - -e lib/idp_mcp_connector_pkg - -# npm (configure local registry) -cd src/ui && npm install --prefer-offline --cache ../../vendor/npm -``` - -### Add a Makefile Target for Vendored Install - -Add this to `Makefile`: - -```makefile -setup-vendored: ## Install from local vendor/ directory (no network required) - @echo "Installing from vendor directory (no-index mode)..." - $(PIP) install --no-index --find-links vendor/python \ - -e "lib/idp_common_pkg[all,dev,test]" \ - -e lib/idp_cli_pkg \ - -e lib/idp_sdk \ - -e lib/idp_mcp_connector_pkg - @echo -e "$(GREEN)✅ Vendored install complete!$(NC)" -``` - -### Add `vendor/` to `.gitignore` or commit it - -If committing to git (fully self-contained): -```bash -# Remove vendor/ from .gitignore if present -grep -v "^vendor/" .gitignore > .gitignore.tmp && mv .gitignore.tmp .gitignore - -# Commit -git add vendor/ -git commit -m "Add vendored dependencies for air-gapped deployment" -``` - ---- - -## 7. Option 4 — Point Build Tools to Artifactory Explicitly - -**Best for:** When Artifactory *does* have the packages but the build tools are not configured to use it (wrong index URL). - -### Configure pip - -Create or update `~/.pip/pip.conf` (macOS/Linux) or `%APPDATA%\pip\pip.ini` (Windows): - -```ini -[global] -index-url = https://your-artifactory.company.com/artifactory/api/pypi/pypi-virtual/simple/ -trusted-host = your-artifactory.company.com -``` - -Or use environment variables (temporary, no file changes): - -```bash -export PIP_INDEX_URL=https://your-artifactory.company.com/artifactory/api/pypi/pypi-virtual/simple/ -export PIP_TRUSTED_HOST=your-artifactory.company.com -``` - -### Configure uv - -`uv` (used in `Dockerfile.optimized` and optionally in CI) reads: - -```bash -export UV_INDEX_URL=https://your-artifactory.company.com/artifactory/api/pypi/pypi-virtual/simple/ -``` - -Or create `~/.config/uv/uv.toml`: - -```toml -[pip] -index-url = "https://your-artifactory.company.com/artifactory/api/pypi/pypi-virtual/simple/" -``` - -### Configure npm - -```bash -# Set globally -npm config set registry https://your-artifactory.company.com/artifactory/api/npm/npm-virtual/ - -# OR create a project-level .npmrc file -echo "registry=https://your-artifactory.company.com/artifactory/api/npm/npm-virtual/" \ - > src/ui/.npmrc -echo "registry=https://your-artifactory.company.com/artifactory/api/npm/npm-virtual/" \ - > docs-site/.npmrc -``` - -### Configure Docker - -```bash -# Configure Docker to use Artifactory as a mirror -# Edit /etc/docker/daemon.json (Linux) or Docker Desktop settings: -{ - "registry-mirrors": [ - "https://your-artifactory.company.com/artifactory/docker-virtual" - ] -} -``` - ---- - -## 8. Decision Guide - -```mermaid -flowchart TD - A[Artifactory: packages failing] --> B{Can Artifactory admin\nadd remote proxy repos?} - - B -->|Yes| C[✅ Option 1\nConfigure Artifactory\nas remote proxy\nBest long-term fix] - - B -->|No| D{Is there a bridge machine\nwith internet AND\nArtifactory access?} - - D -->|Yes| E[Run Option 5 first\ngenerate_lockfiles.py\nGet complete manifest] - E --> E2[✅ Option 2\nUse manifest to download\n& upload to Artifactory] - - D -->|No| F{Are packages in Artifactory\nbut URL is wrong?} - - F -->|Yes| G[✅ Option 4\nPoint pip/npm/uv\nexplicitly to Artifactory URL] - - F -->|No / Fully air-gapped| H[✅ Option 3\nVendor dependencies\ninto git repository] - - style C fill:#90EE90 - style E fill:#87CEEB - style E2 fill:#90EE90 - style G fill:#90EE90 - style H fill:#90EE90 -``` - ---- - -## 9. Quick Reference: Copy-Paste Commands - -### Option 5 — Generate Complete Dependency Manifests (run this before Option 2) - -```bash -# Requires uv and pnpm installed -python3 scripts/generate_lockfiles.py - -# Outputs: -# deps/python/master/manifest.txt (~295 Python packages) -# deps/node/master/manifest.txt (~1,524 Node packages) -``` - -### Identify Missing Packages (run this first) - -```bash -# Capture all errors during setup to identify exactly which packages are failing -make setup-venv 2>&1 | tee /tmp/setup-errors.txt -grep -E "ERROR|Could not find|No matching|WARN" /tmp/setup-errors.txt -``` - -### Option 1 — Temporary environment variables to use Artifactory - -```bash -# Replace with your actual Artifactory URL -ARTIFACTORY_URL="https://your-artifactory.company.com/artifactory" - -export PIP_INDEX_URL="${ARTIFACTORY_URL}/api/pypi/pypi-virtual/simple/" -export PIP_TRUSTED_HOST="your-artifactory.company.com" -export UV_INDEX_URL="${ARTIFACTORY_URL}/api/pypi/pypi-virtual/simple/" -npm config set registry "${ARTIFACTORY_URL}/api/npm/npm-virtual/" - -make setup-venv -``` - -### Option 2 — Download + Upload using generated manifest - -```bash -# Download all packages listed in the manifest -mkdir -p ./wheel-cache -while IFS= read -r pkg; do - pip download -d ./wheel-cache "$pkg" 2>/dev/null || echo "WARN: $pkg" -done < deps/python/master/manifest.txt - -# Upload to Artifactory -ARTIFACTORY_URL="https://your-artifactory.company.com/artifactory" -REPO="pypi-local" -AF_CREDS="username:api-key" -for f in ./wheel-cache/*.whl ./wheel-cache/*.tar.gz; do - curl -u "$AF_CREDS" -T "$f" "${ARTIFACTORY_URL}/${REPO}/$(basename $f)" -done -``` - -### Option 2 — Download + Upload specific missing package - -```bash -# Replace package name and version as needed -PACKAGE="strands-agents==1.14.0" -ARTIFACTORY_URL="https://your-artifactory.company.com/artifactory" -REPO="pypi-local" -AF_CREDS="username:api-key" - -# Download -pip download -d /tmp/pkg "$PACKAGE" - -# Upload -for f in /tmp/pkg/*.whl /tmp/pkg/*.tar.gz; do - curl -u "$AF_CREDS" -T "$f" "${ARTIFACTORY_URL}/${REPO}/$(basename $f)" -done -``` - -### Option 3 — Install from vendor directory - -```bash -pip install --no-index --find-links ./vendor/python \ - -e "lib/idp_common_pkg[all,dev,test]" \ - -e lib/idp_cli_pkg \ - -e lib/idp_sdk \ - -e lib/idp_mcp_connector_pkg -``` - -### Option 4 — Set pip.conf to use Artifactory - -```bash -# Create pip config (macOS/Linux) -mkdir -p ~/.pip -cat > ~/.pip/pip.conf << 'EOF' -[global] -index-url = https://your-artifactory.company.com/artifactory/api/pypi/pypi-virtual/simple/ -trusted-host = your-artifactory.company.com -EOF -``` - ---- - -## Need Help? - -If you are unsure which packages are failing or need to generate a complete package list for your Artifactory admin: - -```bash -# Generate complete resolved manifests for ALL components (recommended) -python3 scripts/generate_lockfiles.py - -# Outputs a flat list of every package at every version: -# deps/python/master/manifest.txt — hand this to your Artifactory admin -# deps/node/master/manifest.txt — hand this to your Artifactory admin - -# Then verify packages are accessible from Artifactory -UV_INDEX_URL="https://your-artifactory.company.com/artifactory/api/pypi/pypi-virtual/simple/" \ - python3 scripts/verify_python_packages.py -NODE_REGISTRY_URL="https://your-artifactory.company.com/artifactory/api/npm/npm-virtual/" \ - python3 scripts/verify_node_packages.py -``` - -This produces a flat, fully-resolved list of every package (including transitive dependencies) across all project components that can be handed to your Artifactory admin for bulk upload. - ---- - -*Generated for the GenAI IDP Accelerator project — [GitHub Repository](https://github.com/aws-solutions-library-samples/accelerated-intelligent-document-processing-on-aws)* From f07fd1ee43d1647de3413d23465d534081cd235c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CSirirat?= <“siriratk@amazon.com”> Date: Tue, 21 Apr 2026 10:45:31 -0400 Subject: [PATCH 9/9] fix: pass CodeBuildSubnetIds as joined string to PATTERNSTACK nested stack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CommaDelimitedList parameters resolve to a list when referenced with !Ref, which is invalid for nested stack Parameters (must be plain strings). Fix: !Ref CodeBuildSubnetIds → !Join [",", !Ref CodeBuildSubnetIds] Matches the existing pattern used for LambdaSubnetIds and ALBSubnetIds. Verified: IDP-ALB stack deployed successfully with ECR images: - uv-0.9.6 pulled from 549366490058.dkr.ecr.us-east-1.amazonaws.com - lambda-python-3.12-arm64 pulled from 549366490058.dkr.ecr.us-east-1.amazonaws.com DockerBuild SUCCEEDED, stack reached UPDATE_COMPLETE. --- template.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/template.yaml b/template.yaml index fc8390ff1..15838b5f6 100644 --- a/template.yaml +++ b/template.yaml @@ -1619,7 +1619,7 @@ Resources: ArtifactoryCredentialsSecretArn: !Ref ArtifactoryCredentialsSecretArn NpmRegistryUrl: !Ref NpmRegistryUrl CodeBuildVpcId: !Ref CodeBuildVpcId - CodeBuildSubnetIds: !Ref CodeBuildSubnetIds + CodeBuildSubnetIds: !Join [",", !Ref CodeBuildSubnetIds] CodeBuildSecurityGroupId: !Ref CodeBuildSecurityGroupId ##########################################################################