docs: update readme

JingBh · JingBh · commit 1aaf72b8cb26 · 2025-10-23T13:10:25.000+08:00
build: update training base image
diff --git a/.github/workflows/build-training-base-image.yml b/.github/workflows/build-training-base-image.yml
@@ -2,14 +2,23 @@ name: Training Base Image
 
 on:
   workflow_dispatch:
+    inputs:
+      tag:
+        description: Tag of the image to build
+        required: true
+        type: choice
+        options:
+          - 25.09-cu130-torch290-sglang053
+          - 25.06-cu129-torch280-sglang053
+          - 25.03-cu128-torch271-sglang048
 
 permissions:
   contents: read
   packages: write
 
 concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: false
+  group: ${{ github.workflow }}-${{ inputs.tag }}
+  cancel-in-progress: true
 
 jobs:
   build-and-push:
@@ -43,10 +52,10 @@ jobs:
         uses: docker/build-push-action@v6
         with:
           context: .
-          file: extra/docker/training-base.Dockerfile
+          file: extra/docker/training-base/${{ inputs.tag }}.Dockerfile
           push: true
-          tags: ${{ github.repository == 'THUDM/AgentRL' && format('{0}/agentrl-training-base:25.03-cu128-torch271-sglang048', secrets.DOCKER_USER) || format('ghcr.io/{0}/training-base:25.03-cu128-torch271-sglang048', steps.repo_slug.outputs.repo_lower) }}
+          tags: ${{ github.repository == 'THUDM/AgentRL' && format('{0}/agentrl-training-base:{1}', secrets.DOCKER_USER, inputs.tag) || format('ghcr.io/{0}/training-base:{1}', steps.repo_slug.outputs.repo_lower, inputs.tag) }}
           cache-from: |
-            type=registry,ref=${{ github.repository == 'THUDM/AgentRL' && format('{0}/agentrl-training-base:buildcache', secrets.DOCKER_USER) || format('ghcr.io/{0}/training-base:buildcache', steps.repo_slug.outputs.repo_lower) }}
+            type=registry,ref=${{ github.repository == 'THUDM/AgentRL' && format('{0}/agentrl-training-base:cache-{1}', secrets.DOCKER_USER, inputs.tag) || format('ghcr.io/{0}/training-base:cache-{1}', steps.repo_slug.outputs.repo_lower, inputs.tag) }}
           cache-to: |
-            type=registry,ref=${{ github.repository == 'THUDM/AgentRL' && format('{0}/agentrl-training-base:buildcache', secrets.DOCKER_USER) || format('ghcr.io/{0}/training-base:buildcache', steps.repo_slug.outputs.repo_lower) }},mode=max
+            type=registry,ref=${{ github.repository == 'THUDM/AgentRL' && format('{0}/agentrl-training-base:cache-{1}', secrets.DOCKER_USER, inputs.tag) || format('ghcr.io/{0}/training-base:cache-{1}', steps.repo_slug.outputs.repo_lower, inputs.tag) }},mode=max
diff --git a/README.md b/README.md
@@ -11,6 +11,8 @@ Scaling Agentic Reinforcement Learning with a Multi-Turn, Multi-Task Framework
 - [Quickstart](#quickstart)
 - [Architectural Overview](#architectural-overview)
 - [Training Overview](#training-overview)
+  - [Installation](#installation)
+  - [Getting Started](#getting-started)
   - [Placement Group](#placement-group)
   - [Workers](#workers)
   - [Data](#data)
@@ -24,25 +26,39 @@ Scaling Agentic Reinforcement Learning with a Multi-Turn, Multi-Task Framework
 
 ## Quickstart
 
-For a minimal example of how to use the environment framework,
+- For a minimal example of how to use the environment framework,
 refer to [`examples/simple-calculator`](examples/simple-calculator).
 
+- For the environment and training data used in our paper,
+see [AgentBench FC](https://github.com/THUDM/AgentBench).
+
+- For reproducing the training results in our paper,
+refer to [`examples/training/agentrl_trainer.py`](examples/training/agentrl_trainer.py).
+
 ## Architectural Overview
 
 ![architecture](docs/assets/deployment-framework.png)
 
 This project mainly consists of two parts: the training framework and the environment deployment framework.
 
-For the training framework, see [Training Overview](#training-overview).
+- For the training framework, see [Training Overview](#training-overview).
 The code is available in the [`trainer`](trainer) directory.
 
-For the environment deployment framework, see [Environment Overview](#environment-overview).
+- For the environment deployment framework, see [Environment Overview](#environment-overview).
 The code of the controller and the task worker is available in [`controller`](controller) and [`worker`](worker) respectively.
 
 ## Training Overview
 
 AgentRL training package provide basic workers and components to compose a training routine.
 
+### Installation
+
+```shell
+pip install -e ./trainer
+```
+
+### Getting Started
+
 We take [`async_trainer.py`](examples/training/async_trainer.py) as an example to demonstrate how to compose a fully asynchronous GRPO agentic training pipeline.
 
 `async_trainer` trains LLM agents by utilizing three specialised worker pools over a Ray cluster:
@@ -187,7 +203,7 @@ There's also sample configs for the example trainer available in [`examples/trai
 
 ## Environment Overview
 
-Building upon [AgentBench](https://github.com/THUDM/AgentBench),
+Building upon [AgentBench](https://github.com/THUDM/AgentBench/tree/v0.2),
 this part mainly consists of the following components:
 
 ### Controller
diff --git a/docs/tasks.md b/docs/tasks.md
@@ -15,10 +15,10 @@ We provide first-party integration for the following tasks into the environment
 
 ### AgentBench FC
 
-We have refactored the original [AgentBench](https://github.com/THUDM/AgentBench),
+We have refactored the original [AgentBench](https://github.com/THUDM/AgentBench/tree/v0.2),
 supporting a function-calling style prompt and containerized deployment.
 
-Available in the [agentbench_fc](https://github.com/THUDM/AgentBench/tree/agentbench_fc) branch of the original repository.
+Available in the [AgentBench](https://github.com/THUDM/AgentBench) repository.
 
 ### MobileRL (Android)
 
diff --git a/extra/docker/training-base/25.03-cu128-torch271-sglang048.Dockerfile b/extra/docker/training-base/25.03-cu128-torch271-sglang048.Dockerfile
@@ -26,16 +26,19 @@ RUN apt-get update && \
 RUN curl -fsSL https://astral.sh/uv/install.sh | sh
 
 RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install --system --upgrade setuptools packaging pybind11
+    uv pip install --system --upgrade setuptools packaging psutil ninja pybind11
 RUN --mount=type=cache,target=/root/.cache/uv \
     uv pip install --system \
-      --extra-index-url https://download.pytorch.org/whl/cu128  \
+      --extra-index-url https://download.pytorch.org/whl/cu128 \
       torch==2.7.1 torchvision==0.22.1 torchaudio==2.7.1
 RUN --mount=type=cache,target=/root/.cache/uv \
     echo "flashinfer-python==0.2.11.post3" > /tmp/overrides.txt && \
     uv pip install --system --override /tmp/overrides.txt \
-      sglang[all]==0.4.8.post1 megatron-core transformer-engine[pytorch] \
-      flash-attn accelerate binpacking wandb ray[rllib] tensordict nvitop py-spy && \
+      sglang[all]==0.4.8.post1 \
+      megatron-core transformer-engine[pytorch] "flash-attn<=2.8.1" \
+      accelerate aiohttp binpacking filelock numpy Pillow \
+      PyYAML ray[rllib] requests tensordict transformers \
+      wandb nvitop py-spy && \
     rm -f /tmp/overrides.txt
 
 ### 3. configure utils
@@ -45,8 +48,15 @@ RUN echo 'set -g default-terminal "tmux-256color"' > /root/.tmux.conf && \
     echo 'set-environment -g LC_ALL "C.UTF-8"' >> /root/.tmux.conf && \
     echo 'set-option -g history-limit 50000' >> /root/.tmux.conf && \
     echo 'set-option -g mouse on' >> /root/.tmux.conf && \
+    echo 'alias pip="uv pip"' >> /root/.bashrc && \
     echo 'alias tt="tmux attach -t"' >> /root/.bashrc && \
     echo 'alias tn="tmux new -s"' >> /root/.bashrc && \
     echo 'alias dp="ls -A | parallel du -sh 2>/dev/null | sort -h"' >> /root/.bashrc && \
     echo 'alias ds="du -sh .[!.]* * 2>/dev/null | sort -h"' >> /root/.bashrc && \
     echo 'alias pd="py-spy dump --pid"' >> /root/.bashrc
+
+### 4. install current agentrl trainer
+COPY . /workspace/agentrl
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv pip install --system --no-deps \
+      -e /workspace/agentrl/trainer[megatron]
diff --git a/extra/docker/training-base/25.06-cu129-torch280-sglang053.Dockerfile b/extra/docker/training-base/25.06-cu129-torch280-sglang053.Dockerfile
@@ -0,0 +1,62 @@
+### Common dependencies for the training environment
+# May not be up to date, double-check before using
+
+FROM nvcr.io/nvidia/cuda-dl-base:25.06-cuda12.9-devel-ubuntu24.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV LANG=C.UTF-8
+ENV LC_ALL=C.UTF-8
+ENV PYTHONUNBUFFERED=1
+ENV UV_BREAK_SYSTEM_PACKAGES=1
+ENV UV_LINK_MODE=copy
+ENV UV_NO_BUILD_ISOLATION=1
+ENV PATH="/root/.local/bin:${PATH}"
+
+WORKDIR /workspace
+
+### 1. install python and base tooling
+RUN apt-get update && \
+    apt-get install -y \
+      python-is-python3 python3 python3-dev \
+      curl ca-certificates git htop ncurses-term parallel tmux && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+### 2. install uv and python dependencies
+RUN curl -fsSL https://astral.sh/uv/install.sh | sh
+
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv pip install --system --upgrade setuptools packaging psutil ninja pybind11
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv pip install --system \
+      --extra-index-url https://download.pytorch.org/whl/cu129 \
+      torch==2.8.0 torchvision==0.23.0 torchaudio==2.8.0
+RUN --mount=type=cache,target=/root/.cache/uv \
+    echo "torch-memory-saver==0.0.9rc2" > /tmp/overrides.txt && \
+    uv pip install --system --override /tmp/overrides.txt \
+      sglang[all]==0.5.3.post2 \
+      megatron-core transformer-engine[pytorch] flash-attn==2.7.3 \
+      accelerate aiohttp binpacking filelock numpy Pillow \
+      PyYAML ray[rllib] requests tensordict transformers \
+      wandb nvitop py-spy && \
+    rm -f /tmp/overrides.txt
+
+### 3. configure utils
+RUN echo 'set -g default-terminal "tmux-256color"' > /root/.tmux.conf && \
+    echo "set -ga terminal-overrides ',*:Tc'" >> /root/.tmux.conf && \
+    echo 'set-environment -g LANG "C.UTF-8"' >> /root/.tmux.conf && \
+    echo 'set-environment -g LC_ALL "C.UTF-8"' >> /root/.tmux.conf && \
+    echo 'set-option -g history-limit 50000' >> /root/.tmux.conf && \
+    echo 'set-option -g mouse on' >> /root/.tmux.conf && \
+    echo 'alias pip="uv pip"' >> /root/.bashrc && \
+    echo 'alias tt="tmux attach -t"' >> /root/.bashrc && \
+    echo 'alias tn="tmux new -s"' >> /root/.bashrc && \
+    echo 'alias dp="ls -A | parallel du -sh 2>/dev/null | sort -h"' >> /root/.bashrc && \
+    echo 'alias ds="du -sh .[!.]* * 2>/dev/null | sort -h"' >> /root/.bashrc && \
+    echo 'alias pd="py-spy dump --pid"' >> /root/.bashrc
+
+### 4. install current agentrl trainer
+COPY . /workspace/agentrl
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv pip install --system --no-deps \
+      -e /workspace/agentrl/trainer[megatron]
diff --git a/extra/docker/training-base/25.09-cu130-torch290-sglang053.Dockerfile b/extra/docker/training-base/25.09-cu130-torch290-sglang053.Dockerfile
@@ -0,0 +1,61 @@
+### Common dependencies for the training environment
+# May not be up to date, double-check before using
+
+FROM nvcr.io/nvidia/cuda-dl-base:25.09-cuda13.0-devel-ubuntu24.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV LANG=C.UTF-8
+ENV LC_ALL=C.UTF-8
+ENV PYTHONUNBUFFERED=1
+ENV UV_BREAK_SYSTEM_PACKAGES=1
+ENV UV_LINK_MODE=copy
+ENV UV_NO_BUILD_ISOLATION=1
+ENV PATH="/root/.local/bin:${PATH}"
+
+WORKDIR /workspace
+
+### 1. install python and base tooling
+RUN apt-get update && \
+    apt-get install -y \
+      python-is-python3 python3 python3-dev \
+      curl ca-certificates git htop ncurses-term parallel tmux && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+### 2. install uv and python dependencies
+RUN curl -fsSL https://astral.sh/uv/install.sh | sh
+
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv pip install --system --upgrade setuptools packaging psutil ninja pybind11
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv pip install --system \
+      --extra-index-url https://download.pytorch.org/whl/cu130 \
+      torch==2.9.0 torchvision==0.24.0 torchaudio==2.9.0
+RUN --mount=type=cache,target=/root/.cache/uv \
+    echo "torch-memory-saver==0.0.9rc2" > /tmp/overrides.txt && \
+    uv pip install --system --override /tmp/overrides.txt \
+      sglang[cu130_all]==0.5.3.post2 \
+      accelerate aiohttp binpacking filelock numpy Pillow \
+      PyYAML ray[rllib] requests tensordict transformers \
+      wandb nvitop py-spy && \
+    rm -f /tmp/overrides.txt
+
+### 3. configure utils
+RUN echo 'set -g default-terminal "tmux-256color"' > /root/.tmux.conf && \
+    echo "set -ga terminal-overrides ',*:Tc'" >> /root/.tmux.conf && \
+    echo 'set-environment -g LANG "C.UTF-8"' >> /root/.tmux.conf && \
+    echo 'set-environment -g LC_ALL "C.UTF-8"' >> /root/.tmux.conf && \
+    echo 'set-option -g history-limit 50000' >> /root/.tmux.conf && \
+    echo 'set-option -g mouse on' >> /root/.tmux.conf && \
+    echo 'alias pip="uv pip"' >> /root/.bashrc && \
+    echo 'alias tt="tmux attach -t"' >> /root/.bashrc && \
+    echo 'alias tn="tmux new -s"' >> /root/.bashrc && \
+    echo 'alias dp="ls -A | parallel du -sh 2>/dev/null | sort -h"' >> /root/.bashrc && \
+    echo 'alias ds="du -sh .[!.]* * 2>/dev/null | sort -h"' >> /root/.bashrc && \
+    echo 'alias pd="py-spy dump --pid"' >> /root/.bashrc
+
+### 4. install current agentrl trainer
+COPY . /workspace/agentrl
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv pip install --system --no-deps \
+      -e /workspace/agentrl/trainer
diff --git a/trainer/pyproject.toml b/trainer/pyproject.toml
@@ -23,7 +23,7 @@ dependencies = [
   "aiohttp",
   "binpacking",
   "filelock",
-  "flash-attn",
+  "flash-attn>=2.4.3",
   "numpy",
   "Pillow",
   "PyYAML",
@@ -39,7 +39,8 @@ dependencies = [
 [project.optional-dependencies]
 megatron = [
   "megatron-core",
-  "transformer-engine[pytorch]"
+  "transformer-engine[pytorch]",
+  "flash-attn<=2.8.1"
 ]
 
 [project.readme]