Skip to content

Commit 9306c14

Browse files
committed
Merge remote-tracking branch 'upstream/master' into doc-879-ray-doc-claude-md
2 parents d513345 + d1de01b commit 9306c14

460 files changed

Lines changed: 27653 additions & 56461 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.buildkite/_images.rayci.yml

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -115,10 +115,6 @@ steps:
115115
- "3.12"
116116
cuda:
117117
- "13.0.0-cudnn"
118-
adjustments:
119-
- with:
120-
python: "3.11"
121-
cuda: "12.8.1-cudnn"
122118
env:
123119
PYTHON_VERSION: "{{array.python}}"
124120
CUDA_VERSION: "{{array.cuda}}"
@@ -131,10 +127,6 @@ steps:
131127
- "3.12"
132128
cuda:
133129
- "13.0.0-cudnn"
134-
adjustments:
135-
- with:
136-
python: "3.11"
137-
cuda: "12.8.1-cudnn"
138130
env:
139131
PYTHON_VERSION: "{{array.python}}"
140132
CUDA_VERSION: "{{array.cuda}}"

.buildkite/build.rayci.yml

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -271,10 +271,6 @@ steps:
271271
- "3.12"
272272
cuda:
273273
- "13.0.0-cudnn"
274-
adjustments:
275-
- with:
276-
python: "3.11"
277-
cuda: "12.8.1-cudnn"
278274
env_file: rayci.env
279275
env:
280276
PYTHON_VERSION: "{{array.python}}"
@@ -296,10 +292,6 @@ steps:
296292
- "3.12"
297293
cuda:
298294
- "13.0.0-cudnn"
299-
adjustments:
300-
- with:
301-
python: "3.11"
302-
cuda: "12.8.1-cudnn"
303295
env_file: rayci.env
304296
env:
305297
PYTHON_VERSION: "{{array.python}}"
@@ -362,10 +354,6 @@ steps:
362354
- "3.12"
363355
platform:
364356
- cu13.0.0-cudnn
365-
adjustments:
366-
- with:
367-
python: "3.11"
368-
platform: "cu12.8.1-cudnn"
369357
depends_on:
370358
- ray-llm-image-cuda-build($)
371359
tags:
@@ -389,10 +377,6 @@ steps:
389377
- "3.12"
390378
platform:
391379
- cu13.0.0-cudnn
392-
adjustments:
393-
- with:
394-
python: "3.11"
395-
platform: "cu12.8.1-cudnn"
396380
depends_on:
397381
- ray-llm-extra-image-cuda-build($)
398382
tags:

.buildkite/data.rayci.yml

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,6 @@ steps:
4040
PYTHON: "3.10"
4141
tags: cibase
4242

43-
- name: datatfxbslbuild-multipy
44-
label: "wanda: datatfxbslbuild-py3.11"
45-
wanda: ci/docker/datatfxbsl.build.wanda.yaml
46-
env:
47-
PYTHON: "3.11"
48-
tags: cibase
49-
5043
- name: datatfdsbuild-multipy
5144
label: "wanda: datatfdsbuild-py3.12"
5245
wanda: ci/docker/datatfds.build.wanda.yaml
@@ -239,16 +232,6 @@ steps:
239232
--only-tags dask
240233
depends_on: datalbuild-multipy(python=3.12)
241234

242-
- label: ":database: data: TFRecords (tfx-bsl) tests"
243-
tags:
244-
- data
245-
instance_type: medium
246-
commands:
247-
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/data/... data
248-
--build-name datatfxbslbuild-py3.11 --python-version 3.11
249-
--only-tags tfxbsl
250-
depends_on: datatfxbslbuild-multipy
251-
252235
- label: ":database: data: doc tests"
253236
tags:
254237
- data

.buildkite/llm.rayci.yml

Lines changed: 8 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -5,46 +5,24 @@ depends_on:
55
- ray-dashboard-build
66
steps:
77
- name: llmbuild
8-
wanda: ci/docker/llm.build.wanda.yaml
9-
depends_on:
10-
- oss-ci-base_build-multipy(python=3.11)
11-
env:
12-
PYTHON: "3.11"
13-
BASE_TYPE: "build"
14-
BUILD_VARIANT: "build"
15-
RAY_CUDA_CODE: "cpu"
16-
tags: cibase
17-
18-
- name: llmbuild-py312
198
wanda: ci/docker/llm.build.wanda.yaml
209
depends_on:
2110
- oss-ci-base_build-multipy(python=3.12)
2211
env:
2312
PYTHON: "3.12"
2413
BASE_TYPE: "build"
25-
BUILD_VARIANT: "build-py312"
14+
BUILD_VARIANT: "build"
2615
RAY_CUDA_CODE: "cpu"
2716
tags: cibase
2817

2918
- name: llmgpubuild
30-
wanda: ci/docker/llm.build.wanda.yaml
31-
depends_on:
32-
- oss-ci-base_cu128-multipy
33-
env:
34-
PYTHON: "3.11"
35-
BASE_TYPE: "cu128"
36-
BUILD_VARIANT: "gpubuild"
37-
RAY_CUDA_CODE: "cu128"
38-
tags: cibase
39-
40-
- name: llmgpubuild-py312
4119
wanda: ci/docker/llm.build.wanda.yaml
4220
depends_on:
4321
- oss-ci-base_cu130-multipy(python=3.12)
4422
env:
4523
PYTHON: "3.12"
4624
BASE_TYPE: "cu130"
47-
BUILD_VARIANT: "gpubuild-py312"
25+
BUILD_VARIANT: "gpubuild"
4826
RAY_CUDA_CODE: "cu130"
4927
tags: cibase
5028

@@ -57,9 +35,9 @@ steps:
5735
instance_type: medium
5836
commands:
5937
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/llm/... //doc/... llm
60-
--python-version 3.12 --build-name llmbuild-py312
38+
--python-version 3.12 --build-name llmbuild
6139
--except-tags gpu
62-
depends_on: llmbuild-py312
40+
depends_on: llmbuild
6341

6442
- label: "llm gpu tests"
6543
key: "llm-gpu-tests"
@@ -69,9 +47,9 @@ steps:
6947
instance_type: g6-large
7048
commands:
7149
- RAYCI_DISABLE_TEST_DB=1 bazel run //ci/ray_ci:test_in_docker -- //python/ray/llm/... //doc/... llm
72-
--python-version 3.12 --build-name llmgpubuild-py312 --only-tags gpu
50+
--python-version 3.12 --build-name llmgpubuild --only-tags gpu
7351
--except-tags multi_gpu_4
74-
depends_on: llmgpubuild-py312
52+
depends_on: llmgpubuild
7553

7654
- label: "llm gpu tests (4 GPUs)"
7755
key: "llm-gpu-tests-4gpu"
@@ -81,7 +59,7 @@ steps:
8159
instance_type: gpu-large
8260
commands:
8361
- RAYCI_DISABLE_TEST_DB=1 bazel run //ci/ray_ci:test_in_docker -- //doc/... llm
84-
--python-version 3.12 --build-name llmgpubuild-py312
62+
--python-version 3.12 --build-name llmgpubuild
8563
--only-tags multi_gpu_4
8664
--gpus 4
87-
depends_on: llmgpubuild-py312
65+
depends_on: llmgpubuild

.buildkite/release/build.rayci.yml

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,6 @@ steps:
4545
- "3.12"
4646
cuda:
4747
- "13.0.0-cudnn"
48-
adjustments:
49-
- with:
50-
python: "3.11"
51-
cuda: "12.8.1-cudnn"
5248
env:
5349
PYTHON_VERSION: "{{array.python}}"
5450
CUDA_VERSION: "{{array.cuda}}"
@@ -179,10 +175,6 @@ steps:
179175
- "cu13.0.0-cudnn"
180176
python:
181177
- "3.12"
182-
adjustments:
183-
- with:
184-
python: "3.11"
185-
gpu: "cu12.8.1-cudnn"
186178
env:
187179
PYTHON_VERSION: "{{array.python}}"
188180
GPU: "{{array.gpu}}"
@@ -213,10 +205,6 @@ steps:
213205
- cu13.0.0-cudnn
214206
python:
215207
- "3.12"
216-
adjustments:
217-
- with:
218-
python: "3.11"
219-
gpu: "cu12.8.1-cudnn"
220208

221209
- name: ray-ml-anyscale-cuda-build
222210
label: "wanda: ray-ml-anyscale py{{array.python}} {{array.gpu}}"

.buildkite/serve.rayci.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,13 +224,14 @@ steps:
224224
- python
225225
instance_type: large
226226
commands:
227+
# Exit 42 skips whole-step retry (per @kouroshHakha); per-test retry is enough.
227228
- bazel run //ci/ray_ci:test_in_docker -- $(cat ci/ray_ci/serve_hap_test_names.txt) serve
228229
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}"
229230
--build-name servebuild-py3.10 --python-version 3.10
230231
--test-env=RAY_SERVE_ENABLE_HA_PROXY=1
231232
--test-env=RAY_SERVE_ENABLE_DIRECT_INGRESS=0
232233
--test-env=RAY_SERVE_DIRECT_INGRESS_MIN_DRAINING_PERIOD_S=0.01
233-
--test-env=SERVE_SOCKET_REUSE_PORT_ENABLED=1
234+
--test-env=SERVE_SOCKET_REUSE_PORT_ENABLED=1 || exit 42
234235
depends_on: servebuild-multipy(python=3.10)
235236

236237
- label: ":ray-serve: serve: HAProxy tests (pip)"
@@ -240,14 +241,15 @@ steps:
240241
- python
241242
instance_type: large
242243
commands:
244+
# Exit 42 skips whole-step retry (per @kouroshHakha); per-test retry is enough.
243245
- bazel run //ci/ray_ci:test_in_docker -- $(cat ci/ray_ci/serve_hap_test_names.txt) serve
244246
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}"
245247
--build-name servebuild-py3.10 --python-version 3.10
246248
--test-env=RAY_SERVE_ENABLE_HA_PROXY=1
247249
--test-env=RAY_SERVE_EXPERIMENTAL_PIP_HAPROXY=1
248250
--test-env=RAY_SERVE_ENABLE_DIRECT_INGRESS=0
249251
--test-env=RAY_SERVE_DIRECT_INGRESS_MIN_DRAINING_PERIOD_S=0.01
250-
--test-env=SERVE_SOCKET_REUSE_PORT_ENABLED=1
252+
--test-env=SERVE_SOCKET_REUSE_PORT_ENABLED=1 || exit 42
251253
depends_on: servebuild-multipy(python=3.10)
252254

253255
- label: ":ray-serve: serve: direct ingress tests"

.buildkite/test.rules.test.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@ docker/Dockerfile.ray: docker linux_wheels
8383

8484
doc/code.py: doc
8585
doc/example.ipynb: doc
86-
doc/tutorial.rst: doc
8786
.readthedocs.yaml: doc
8887
.vale.ini: doc
8988
.vale/styles/config/vocabularies/Core/accept.txt: doc

.buildkite/test.rules.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,13 @@
2020
! linux_wheels macos_wheels docker doc python_dependencies min_build tools
2121
! release_tests spark_on_ray
2222

23+
# NOTE: dstrodtman is leading effort to rework content in RST files, and wanted to shift
24+
# from blocking premerge for these changes. We can remove this pass block once effort concludes,
25+
# or earlier should regressions become an issue.
26+
doc/*.rst
27+
# pass
28+
;
29+
2330
python/ray/air/
2431
@ ml train train_gpu tune data linux_wheels
2532
;

.buildkite/windows.rayci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ steps:
106106
--test-env=CI="1"
107107
--test-env=RAY_CI_POST_WHEEL_TESTS="1"
108108
--test-env=USERPROFILE
109-
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3
109+
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 1
110110
depends_on:
111111
- windowsbuild
112112
- block-windows-tests

.claude/skills/fetch-buildkite-logs/SKILL.md

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,22 +6,49 @@ description: Fetch and analyze Buildkite CI build logs for failures
66
# Fetch Buildkite Logs
77

88
## Prerequisites
9-
- BUILDKITE_API_TOKEN must be set in the environment (typically ~/.bashrc)
10-
- If not configured, direct user to doc/source/ray-contribute/agent-development.md for setup
9+
- `BUILDKITE_API_TOKEN` must be set in the environment (typically `~/.bashrc`)
10+
- If not configured, direct user to `doc/source/ray-contribute/agent-development.md` for setup
11+
12+
## Parsing the Buildkite URL
13+
14+
A Buildkite URL has the form:
15+
16+
```
17+
https://buildkite.com/ray-project/<PIPELINE>/builds/<BUILD_NUM>#<JOB_ID>
18+
```
19+
20+
Always extract `<PIPELINE>` (e.g. `premerge`, `postmerge`) and `<BUILD_NUM>` from the URL the user provides. **Do not hardcode `premerge`** — the same skill is used for all pipelines. If a `#<JOB_ID>` fragment is present, it identifies a specific real job (not a group/wait job) and can be queried directly.
1121

1222
## Steps
23+
1324
1. Verify token: `echo $BUILDKITE_API_TOKEN | head -c4`
1425
2. If token missing, stop and show setup instructions from the dev docs
15-
3. Fetch build:
26+
3. Fetch build (use the pipeline from the URL):
1627
```bash
1728
curl -s -H "Authorization: Bearer $BUILDKITE_API_TOKEN" \
18-
"https://api.buildkite.com/v2/organizations/ray-project/pipelines/premerge/builds/<BUILD_NUM>"
29+
"https://api.buildkite.com/v2/organizations/ray-project/pipelines/<PIPELINE>/builds/<BUILD_NUM>"
1930
```
20-
4. List failed jobs:
31+
4. If a job ID is present in the URL fragment, look it up directly:
2132
```bash
2233
curl -s -H "Authorization: Bearer $BUILDKITE_API_TOKEN" \
23-
"https://api.buildkite.com/v2/organizations/ray-project/pipelines/premerge/builds/<BUILD_NUM>" \
34+
"https://api.buildkite.com/v2/organizations/ray-project/pipelines/<PIPELINE>/builds/<BUILD_NUM>" \
35+
| python3 -c "import sys,json; jobs=json.load(sys.stdin)['jobs']; [print(f\"{j['id']} {j.get('name')} -> {j.get('state')}\") for j in jobs if j['id']=='<JOB_ID>']"
36+
```
37+
5. Otherwise list failed/broken jobs:
38+
```bash
39+
curl -s -H "Authorization: Bearer $BUILDKITE_API_TOKEN" \
40+
"https://api.buildkite.com/v2/organizations/ray-project/pipelines/<PIPELINE>/builds/<BUILD_NUM>" \
2441
| python3 -c "import sys,json; jobs=json.load(sys.stdin)['jobs']; [print(f\"{j['id']} {j['name']} -> {j['state']}\") for j in jobs if j.get('state') in ('failed','broken')]"
2542
```
26-
5. Fetch individual job logs and analyze root causes
27-
6. Summarize failures and suggest fixes
43+
6. Fetch individual job log:
44+
```bash
45+
curl -s -H "Authorization: Bearer $BUILDKITE_API_TOKEN" \
46+
"https://api.buildkite.com/v2/organizations/ray-project/pipelines/<PIPELINE>/builds/<BUILD_NUM>/jobs/<JOB_ID>/log" \
47+
> /tmp/log_<JOB_ID>.json
48+
```
49+
Logs come back as JSON with a `content` field containing ANSI escape codes — strip them with `re.sub(r'\x1b\[[0-9;]*m', '', content)` before grepping.
50+
7. Summarize failures and suggest fixes.
51+
52+
## Authentication note
53+
54+
If `curl` returns `{"message":"No organization found"}`, the configured token does not have access to `ray-project`. The user may have a separate org-scoped token — ask them which env var to source.

0 commit comments

Comments
 (0)