Skip to content

Commit c746af2

Browse files
authored
vercel ai webshop example (#440)
1 parent 49bf9cd commit c746af2

39 files changed

Lines changed: 166341 additions & 0 deletions

contrib/recipes/webshop/.gitignore

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
node_modules/
2+
.venv/
3+
.next/
4+
dist/
5+
*.tsbuildinfo
6+
server/webshop/
7+
8+
# Log files from make train
9+
logs/
10+
std_log.txt
11+
12+
# Auto-generated by Next.js
13+
next-env.d.ts

contrib/recipes/webshop/Dockerfile

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
# Unified WebShop Training Image
2+
#
3+
# This Dockerfile creates a single image containing all components needed for
4+
# the WebShop training pipeline:
5+
# - WebShop Flask server (Python + Java for pyserini)
6+
# - Agent Lightning coordinator (Python + optional VERL for GPU)
7+
# - Headless runner (Node.js + pnpm)
8+
#
9+
# Build context must be the repository root:
10+
# docker build -f examples/vercel_ai_webshop/Dockerfile -t webshop-agl .
11+
#
12+
# For GPU training:
13+
# docker build -f examples/vercel_ai_webshop/Dockerfile --build-arg INSTALL_GPU=true -t webshop-agl-gpu .
14+
#
15+
# Run modes:
16+
# - Full stack: docker run webshop-agl scripts/run_stack.sh qwen
17+
# - WebShop only: docker run webshop-agl python server/webshop_server.py
18+
# - Runner only: docker run webshop-agl pnpm headless
19+
20+
# Base image with CUDA support for GPU training
21+
FROM mcr.microsoft.com/azureml/openmpi5.0-cuda12.4-ubuntu22.04:latest
22+
23+
# Build argument for GPU support
24+
ARG INSTALL_GPU=false
25+
26+
# Environment variables
27+
ENV PYTHONUNBUFFERED=1 \
28+
DEBIAN_FRONTEND=noninteractive \
29+
JAVA_HOME=/usr/lib/jvm/temurin-21-jdk-amd64 \
30+
PATH="/usr/lib/jvm/temurin-21-jdk-amd64/bin:${PATH}"
31+
32+
WORKDIR /app
33+
34+
# ==============================================================================
35+
# System Dependencies
36+
# ==============================================================================
37+
38+
# Install system packages:
39+
# - Java 21 (Temurin) for pyserini search engine
40+
# - Node.js 20 for headless runner
41+
# - Git, curl, wget for general utilities
42+
RUN apt-get update && apt-get install -y --no-install-recommends \
43+
git curl wget gnupg ca-certificates procps \
44+
# Add Adoptium (Temurin) repository for Java 21
45+
&& mkdir -p /etc/apt/keyrings \
46+
&& wget -qO- https://packages.adoptium.net/artifactory/api/gpg/key/public | gpg --dearmor -o /etc/apt/keyrings/adoptium.gpg \
47+
&& echo "deb [signed-by=/etc/apt/keyrings/adoptium.gpg] https://packages.adoptium.net/artifactory/deb bookworm main" > /etc/apt/sources.list.d/adoptium.list \
48+
# Add NodeSource repository for Node.js 20
49+
&& curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
50+
&& apt-get update \
51+
&& apt-get install -y --no-install-recommends \
52+
temurin-21-jdk \
53+
nodejs \
54+
# Install pnpm globally
55+
&& npm install -g pnpm \
56+
# Clean up
57+
&& apt-get clean \
58+
&& rm -rf /var/lib/apt/lists/*
59+
60+
# ==============================================================================
61+
# Python Dependencies
62+
# ==============================================================================
63+
64+
# Copy Agent Lightning package files from repo root
65+
COPY pyproject.toml uv.lock README.md ./
66+
COPY agentlightning/ ./agentlightning/
67+
68+
# Install Agent Lightning (with optional GPU extras)
69+
RUN pip install --no-cache-dir --upgrade pip wheel setuptools && \
70+
if [ "$INSTALL_GPU" = "true" ]; then \
71+
pip install --no-cache-dir -e ".[verl]" || pip install --no-cache-dir -e .; \
72+
else \
73+
pip install --no-cache-dir -e .; \
74+
fi
75+
76+
# Copy and install WebShop server requirements
77+
COPY examples/vercel_ai_webshop/server/requirements.txt ./server-requirements.txt
78+
RUN pip install --no-cache-dir -r server-requirements.txt && \
79+
python -m spacy download en_core_web_sm
80+
81+
# Copy and install AGL coordinator requirements
82+
COPY examples/vercel_ai_webshop/agl/requirements.txt ./agl-requirements.txt
83+
RUN pip install --no-cache-dir -r agl-requirements.txt
84+
85+
# ==============================================================================
86+
# WebShop Setup
87+
# ==============================================================================
88+
89+
# Clone WebShop repository
90+
RUN git clone --depth 1 https://github.com/princeton-nlp/WebShop.git /app/webshop
91+
92+
# Install WebShop-specific dependencies
93+
RUN cd /app/webshop && \
94+
pip install --no-cache-dir -r requirements.txt || \
95+
pip install --no-cache-dir flask gym beautifulsoup4 rank_bm25 thefuzz numpy pandas tqdm
96+
97+
ENV PYTHONPATH="/app/webshop:${PYTHONPATH}"
98+
99+
# ==============================================================================
100+
# Node.js Dependencies
101+
# ==============================================================================
102+
103+
# Copy package files and install dependencies
104+
COPY examples/vercel_ai_webshop/package.json examples/vercel_ai_webshop/pnpm-lock.yaml* ./
105+
RUN pnpm install --frozen-lockfile || pnpm install
106+
107+
# ==============================================================================
108+
# Application Code
109+
# ==============================================================================
110+
111+
# Copy example source code
112+
COPY examples/vercel_ai_webshop/tsconfig.json ./
113+
COPY examples/vercel_ai_webshop/src/ ./src/
114+
COPY examples/vercel_ai_webshop/scripts/ ./scripts/
115+
COPY examples/vercel_ai_webshop/agl/ ./agl/
116+
COPY examples/vercel_ai_webshop/server/webshop_server.py ./server/webshop_server.py
117+
COPY examples/vercel_ai_webshop/server/docker-entrypoint.sh ./server/docker-entrypoint.sh
118+
119+
# Make scripts executable
120+
RUN chmod +x scripts/*.sh server/docker-entrypoint.sh
121+
122+
# ==============================================================================
123+
# Runtime Configuration
124+
# ==============================================================================
125+
126+
# Default environment variables
127+
ENV AGENT_LIGHTNING_STORE_HOST=0.0.0.0 \
128+
AGENT_LIGHTNING_STORE_PORT=4747 \
129+
WEBSHOP_URL=http://127.0.0.1:3000 \
130+
N_RUNNERS=1
131+
132+
# Expose ports
133+
# - 3000: WebShop server
134+
# - 4747: Agent Lightning Store
135+
EXPOSE 3000 4747
136+
137+
# Volume for WebShop dataset persistence
138+
VOLUME /app/webshop/data
139+
140+
# Health check for the Store server
141+
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
142+
CMD curl -f http://localhost:4747/v1/agl/health || exit 1
143+
144+
# Default: run full stack with qwen config
145+
CMD ["bash", "scripts/run_stack.sh", "qwen"]

contrib/recipes/webshop/Makefile

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
# WebShop Training Workflow
2+
#
3+
# Usage:
4+
# make setup # Initialize .env
5+
# make train # Run training stack (GPU)
6+
# make stop # Stop all services
7+
#
8+
# Azure ML:
9+
# make aml-setup # One-time AML setup (compute + environment)
10+
# make aml-train # Submit training job to Azure ML
11+
# make aml-logs # Stream logs from running AML job
12+
13+
.PHONY: help setup build build-gpu train scale stop clean status \
14+
aml-setup aml-compute aml-train aml-train-qwen aml-logs aml-status
15+
16+
N ?= 1
17+
18+
# Azure ML defaults (override with environment variables)
19+
AML_RG ?= <your-resource-group>
20+
AML_WS ?= <your-workspace>
21+
22+
23+
.DEFAULT_GOAL := help
24+
25+
#==============================================================================
26+
# Help
27+
#==============================================================================
28+
29+
help: ## Show this help message
30+
@echo "WebShop Agent - Commands"
31+
@echo "------------------------"
32+
@echo ""
33+
@echo "Setup:"
34+
@echo " make setup Create .env configuration"
35+
@echo " make build-gpu Build GPU Docker image"
36+
@echo ""
37+
@echo "Run:"
38+
@echo " make train Start Training Stack (single container, GPU)"
39+
@echo " make scale N=3 Set number of runners (default: 1)"
40+
@echo ""
41+
@echo "Manage:"
42+
@echo " make status Show container status"
43+
@echo " make stop Stop all services"
44+
@echo " make clean Stop and remove volumes"
45+
@echo ""
46+
@echo "Azure ML:"
47+
@echo " make aml-train Submit Qwen training job"
48+
@echo " make aml-compute Create AML compute cluster"
49+
@echo ""
50+
#==============================================================================
51+
# Setup
52+
#==============================================================================
53+
54+
setup: ## Create .env configuration
55+
@if [ ! -f .env ]; then \
56+
cp .env.example .env; \
57+
echo "Created .env from .env.example"; \
58+
echo "Please edit .env and add your OPENAI_API_KEY"; \
59+
else \
60+
echo ".env already exists"; \
61+
fi
62+
63+
build-gpu: ## Build GPU Docker image
64+
@echo "Building WebShop GPU image..."
65+
docker build -f Dockerfile --build-arg INSTALL_GPU=true -t webshop-agl-gpu ../..
66+
@echo "Build complete."
67+
68+
#==============================================================================
69+
# Run
70+
#==============================================================================
71+
72+
train: setup ## Start Training Stack (GPU)
73+
@echo "Starting Training Stack (GPU)..."
74+
@echo " - WebShop: http://localhost:3000"
75+
@echo " - Coordinator: http://localhost:4747"
76+
@echo ""
77+
N_RUNNERS=$(N) docker compose --profile gpu up --build -d
78+
79+
scale: ## Set number of runners (e.g., make scale N=3)
80+
@echo "To change runner count, stop and restart with N=$(N):"
81+
@echo " make stop && N=$(N) make train"
82+
@echo ""
83+
@echo "Or set N_RUNNERS=$(N) in your .env file"
84+
85+
#==============================================================================
86+
# Azure ML
87+
#==============================================================================
88+
89+
aml-compute: ## Create Azure ML compute cluster
90+
@echo "Creating Azure ML compute cluster..."
91+
az ml compute create -f aml/compute.yml -g $(AML_RG) -w $(AML_WS) || \
92+
echo "Compute cluster may already exist (this is OK)"
93+
94+
95+
aml-train: ## Submit Qwen training job
96+
@echo "Submitting Qwen training job to Azure ML..."
97+
@echo "Note: Requires HF_TOKEN and WANDB_API_KEY environment variables"
98+
@cp .amlignore ../../../.amlignore && \
99+
trap 'rm -f ../../../.amlignore' EXIT && \
100+
az ml job create -f aml/jobs/webshop-qwen.yml --stream \
101+
--set environment_variables.HF_TOKEN="$$HF_TOKEN" \
102+
--set environment_variables.WANDB_API_KEY="$$WANDB_API_KEY" \
103+
-g $(AML_RG) -w $(AML_WS)

0 commit comments

Comments
 (0)