Skip to content

Commit b8c556b

Browse files
authored
split test_be into multiple jobs, one per OS (#9290)
Today's test_python job uses a single matrix across ubuntu, macos, and windows with GitHub's default fail-fast: true. A single test failure on any runner cancels every other matrix entry, so a flaky Windows test wipes out in-flight Ubuntu and macOS runs that have nothing to do with the failure, forcing a full re-run. Split test_python into test_python_ubuntu, test_python_macos, and test_python_windows, each with its own matrix. fail-fast now scopes per OS: sibling entries on the same OS still cancel (preserving the cost-saving behavior), but other OSes continue independently. The per-entry steps (setup-uv, assets, flag-computation, pytest invocations) are factored into a new composite action at .github/actions/test-python to avoid duplicating ~100 lines of yaml across the three jobs. Matches the existing composite-action pattern used by install, build-frontend, and pr-comment-on-failure. The workflow-level concurrency block is unchanged and continues to cancel stale runs on fresh pushes as before.
1 parent 506b7aa commit b8c556b

2 files changed

Lines changed: 180 additions & 126 deletions

File tree

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
name: Test Python
2+
description: Run the marimo Python test suite for a given Python version and dependency set. Assumes the repo has already been checked out.
3+
4+
inputs:
5+
os:
6+
description: Runner OS (ubuntu-latest, macos-latest, or windows-latest). Used to disable pytest-xdist on Windows.
7+
required: true
8+
python-version:
9+
description: Python version to test against.
10+
required: true
11+
dependencies:
12+
description: Dependency group — "core", "core,optional", or "minimal".
13+
required: true
14+
15+
runs:
16+
using: composite
17+
steps:
18+
# Use uv's managed Python rather than the hostedtoolcache build from
19+
# actions/setup-python: the hostedtoolcache interpreter has ABI quirks
20+
# that caused numpy/matplotlib lazy submodule imports to fail on CI.
21+
- name: 🐍 Setup uv
22+
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
23+
with:
24+
python-version: ${{ inputs.python-version }}
25+
enable-cache: true
26+
27+
# This step is needed since some of our tests rely on the index.html file
28+
- name: Create assets directory, copy over index.html
29+
shell: bash
30+
run: |
31+
mkdir -p marimo/_static/assets
32+
cp frontend/index.html marimo/_static/index.html
33+
cp frontend/public/favicon.ico marimo/_static/favicon.ico
34+
35+
# Setup test flags based on conditions
36+
- name: Setup test flags
37+
id: setup-flags
38+
shell: bash
39+
run: |
40+
# Set CHANGED_FROM: On PRs, compare against base branch; on main, compare against HEAD~1
41+
CHANGED_FROM="${{ github.base_ref && format('origin/{0}', github.base_ref) || 'HEAD~1' }}"
42+
echo "changed_from=$CHANGED_FROM"
43+
echo "changed_from=$CHANGED_FROM" >> $GITHUB_OUTPUT
44+
45+
# Set INCLUDE_UNCHANGED: true if test-all label, python 3.13 + optional deps, or main branch
46+
if [[ "${{ contains(github.event.pull_request.labels.*.name, 'test-all') }}" == "true" ]] || \
47+
[[ "${{ inputs.python-version }}" == "3.13" && "${{ inputs.dependencies }}" == "core,optional" ]] || \
48+
[[ "${{ github.ref }}" == "refs/heads/main" ]]; then
49+
echo "include_unchanged=true"
50+
echo "include_unchanged=true" >> $GITHUB_OUTPUT
51+
else
52+
echo "include_unchanged=false"
53+
echo "include_unchanged=false" >> $GITHUB_OUTPUT
54+
fi
55+
56+
# Test with base dependencies
57+
# Exit code 5 = no tests collected (e.g. only CLI test files changed);
58+
# treat that as success since CLI tests run in a separate workflow.
59+
#
60+
# TODO: xdist is disabled on Windows because several tests
61+
# crash workers. Fix and re-enable. Failing tests:
62+
# - tests/_islands/test_island_generator.py::test_build
63+
# - tests/_islands/test_island_generator.py::test_render
64+
# - tests/_islands/test_island_generator.py::test_render_multiline_markdown
65+
# - tests/_messaging/test_streams.py::test_import_multiprocessing
66+
# - tests/_server/api/endpoints/test_ai.py::TestOpenAiEndpoints::test_completion_without_token
67+
# - tests/_server/test_session_manager.py::test_create_session_new
68+
# - tests/_server/test_session_manager.py::test_create_session_absolute_url
69+
# - tests/_server/test_session_manager.py::test_create_session_with_script_config_overrides
70+
# - tests/_server/test_session_manager.py::test_recents_touch_called_on_session_create
71+
- name: Test changed with base dependencies
72+
if: ${{ inputs.dependencies == 'core' }}
73+
shell: bash
74+
run: |
75+
uv run --python ${{ inputs.python-version }} --group test pytest tests/ \
76+
-v \
77+
${{ inputs.os != 'windows-latest' && '-n auto' || '-p no:xdist' }} \
78+
-k "not test_cli" \
79+
--durations=10 \
80+
-p packages.pytest_changed \
81+
--changed-from=${{ steps.setup-flags.outputs.changed_from }} \
82+
--include-unchanged=${{ steps.setup-flags.outputs.include_unchanged }} \
83+
--picked=first \
84+
--inline-snapshot=disable \
85+
|| { ec=$?; [ $ec -eq 5 ] && exit 0 || exit $ec; }
86+
87+
# Test with optional dependencies
88+
- name: Test changed with optional dependencies
89+
if: ${{ inputs.dependencies == 'core,optional' }}
90+
shell: bash
91+
run: |
92+
uv run --python ${{ inputs.python-version }} --group test-optional pytest tests/ \
93+
-v \
94+
${{ inputs.os != 'windows-latest' && '-n auto' || '-p no:xdist' }} \
95+
-k "not test_cli" \
96+
--durations=10 \
97+
-p packages.pytest_changed \
98+
--changed-from=${{ steps.setup-flags.outputs.changed_from }} \
99+
--include-unchanged=${{ steps.setup-flags.outputs.include_unchanged }} \
100+
--picked=first \
101+
--inline-snapshot=disable \
102+
|| { ec=$?; [ $ec -eq 5 ] && exit 0 || exit $ec; }
103+
104+
# Test with minimal dependencies using lowest resolution
105+
# https://docs.astral.sh/uv/concepts/resolution/#lowest-resolution
106+
# https://docs.astral.sh/uv/reference/environment/#uv_resolution
107+
- name: Test with minimal dependencies (lowest resolution)
108+
if: ${{ inputs.dependencies == 'minimal' }}
109+
shell: bash
110+
env:
111+
UV_RESOLUTION: lowest-direct
112+
run: |
113+
uv run --python ${{ inputs.python-version }} --group test pytest tests/ \
114+
-v \
115+
${{ inputs.os != 'windows-latest' && '-n auto' || '-p no:xdist' }} \
116+
-k "not test_cli" \
117+
--durations=10 \
118+
-p packages.pytest_changed \
119+
--changed-from=${{ steps.setup-flags.outputs.changed_from }} \
120+
--include-unchanged=${{ steps.setup-flags.outputs.include_unchanged }} \
121+
--picked=first \
122+
--inline-snapshot=disable \
123+
|| { ec=$?; [ $ec -eq 5 ] && exit 0 || exit $ec; }

.github/workflows/test_be.yaml

Lines changed: 57 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -100,156 +100,87 @@ jobs:
100100

101101
# For PRs, we only run the tests for the changed files.
102102
# If there is a `test-all` label, we run the tests across unchanged files as well.
103-
test_python:
103+
#
104+
# Split by OS so that a failure on one runner does not cancel sibling jobs on
105+
# other runners (each job has its own matrix and therefore its own fail-fast
106+
# scope). Within a single OS, the default fail-fast behavior is preserved.
107+
test_python_ubuntu:
104108
needs: changes
105109
if: ${{ needs.changes.outputs.backend == 'true' || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'test-all')) }}
106-
name: ${{ matrix.os }} / Py ${{ matrix.python-version }} / ${{ matrix.dependencies }} deps
107-
runs-on: ${{ matrix.os }}
110+
name: ubuntu-latest / Py ${{ matrix.python-version }} / ${{ matrix.dependencies }} deps
111+
runs-on: ubuntu-latest
108112
timeout-minutes: 25
109-
defaults:
110-
run:
111-
shell: bash
112-
113113
strategy:
114114
matrix:
115-
os: [ubuntu-latest, macos-latest, windows-latest]
116-
dependencies: ["core", "core,optional"]
117115
python-version: ["3.10"]
116+
dependencies: ["core", "core,optional"]
118117
include:
119-
- os: ubuntu-latest
120-
python-version: "3.10"
118+
- python-version: "3.10"
121119
dependencies: "minimal"
122-
- os: ubuntu-latest
123-
python-version: "3.11"
120+
- python-version: "3.11"
124121
dependencies: "core"
125-
- os: ubuntu-latest
126-
python-version: "3.12"
122+
- python-version: "3.12"
127123
dependencies: "core"
128-
- os: ubuntu-latest
129-
python-version: "3.13"
124+
- python-version: "3.13"
130125
dependencies: "core"
131-
- os: ubuntu-latest
132-
python-version: "3.14"
126+
- python-version: "3.14"
133127
dependencies: "core"
134-
- os: ubuntu-latest
135-
python-version: "3.10"
128+
- python-version: "3.11"
136129
dependencies: "core,optional"
137-
- os: ubuntu-latest
138-
python-version: "3.11"
130+
- python-version: "3.12"
139131
dependencies: "core,optional"
140-
- os: ubuntu-latest
141-
python-version: "3.12"
142-
dependencies: "core,optional"
143-
- os: ubuntu-latest
144-
python-version: "3.13"
132+
- python-version: "3.13"
145133
dependencies: "core,optional"
146134
# TODO: Add in 3.14 optional once there is broader wheel support
147135
steps:
148136
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
149137
with:
150138
fetch-depth: 0 # Fetch all history for git diff
151-
152-
# Use uv's managed Python rather than the hostedtoolcache build from
153-
# actions/setup-python: the hostedtoolcache interpreter has ABI quirks
154-
# that caused numpy/matplotlib lazy submodule imports to fail on CI.
155-
- name: 🐍 Setup uv
156-
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
139+
- uses: ./.github/actions/test-python
157140
with:
141+
os: ubuntu-latest
158142
python-version: ${{ matrix.python-version }}
159-
enable-cache: true
160-
161-
# This step is needed since some of our tests rely on the index.html file
162-
- name: Create assets directory, copy over index.html
163-
run: |
164-
mkdir -p marimo/_static/assets
165-
cp frontend/index.html marimo/_static/index.html
166-
cp frontend/public/favicon.ico marimo/_static/favicon.ico
167-
168-
# Setup test flags based on conditions
169-
- name: Setup test flags
170-
id: setup-flags
171-
run: |
172-
# Set CHANGED_FROM: On PRs, compare against base branch; on main, compare against HEAD~1
173-
CHANGED_FROM="${{ github.base_ref && format('origin/{0}', github.base_ref) || 'HEAD~1' }}"
174-
echo "changed_from=$CHANGED_FROM"
175-
echo "changed_from=$CHANGED_FROM" >> $GITHUB_OUTPUT
176-
177-
# Set INCLUDE_UNCHANGED: true if test-all label, python 3.13 + optional deps, or main branch
178-
if [[ "${{ contains(github.event.pull_request.labels.*.name, 'test-all') }}" == "true" ]] || \
179-
[[ "${{ matrix.python-version }}" == "3.13" && "${{ matrix.dependencies }}" == "core,optional" ]] || \
180-
[[ "${{ github.ref }}" == "refs/heads/main" ]]; then
181-
echo "include_unchanged=true"
182-
echo "include_unchanged=true" >> $GITHUB_OUTPUT
183-
else
184-
echo "include_unchanged=false"
185-
echo "include_unchanged=false" >> $GITHUB_OUTPUT
186-
fi
187-
188-
# Test with base dependencies
189-
# Exit code 5 = no tests collected (e.g. only CLI test files changed);
190-
# treat that as success since CLI tests run in a separate workflow.
191-
#
192-
# TODO: xdist is disabled on Windows because several tests
193-
# crash workers. Fix and re-enable. Failing tests:
194-
# - tests/_islands/test_island_generator.py::test_build
195-
# - tests/_islands/test_island_generator.py::test_render
196-
# - tests/_islands/test_island_generator.py::test_render_multiline_markdown
197-
# - tests/_messaging/test_streams.py::test_import_multiprocessing
198-
# - tests/_server/api/endpoints/test_ai.py::TestOpenAiEndpoints::test_completion_without_token
199-
# - tests/_server/test_session_manager.py::test_create_session_new
200-
# - tests/_server/test_session_manager.py::test_create_session_absolute_url
201-
# - tests/_server/test_session_manager.py::test_create_session_with_script_config_overrides
202-
# - tests/_server/test_session_manager.py::test_recents_touch_called_on_session_create
203-
- name: Test changed with base dependencies
204-
if: ${{ matrix.dependencies == 'core' }}
205-
run: |
206-
uv run --python ${{ matrix.python-version }} --group test pytest tests/ \
207-
-v \
208-
${{ matrix.os != 'windows-latest' && '-n auto' || '-p no:xdist' }} \
209-
-k "not test_cli" \
210-
--durations=10 \
211-
-p packages.pytest_changed \
212-
--changed-from=${{ steps.setup-flags.outputs.changed_from }} \
213-
--include-unchanged=${{ steps.setup-flags.outputs.include_unchanged }} \
214-
--picked=first \
215-
--inline-snapshot=disable \
216-
|| { ec=$?; [ $ec -eq 5 ] && exit 0 || exit $ec; }
143+
dependencies: ${{ matrix.dependencies }}
217144

218-
# Test with optional dependencies
219-
- name: Test changed with optional dependencies
220-
if: ${{ matrix.dependencies == 'core,optional' }}
221-
run: |
222-
uv run --python ${{ matrix.python-version }} --group test-optional pytest tests/ \
223-
-v \
224-
${{ matrix.os != 'windows-latest' && '-n auto' || '-p no:xdist' }} \
225-
-k "not test_cli" \
226-
--durations=10 \
227-
-p packages.pytest_changed \
228-
--changed-from=${{ steps.setup-flags.outputs.changed_from }} \
229-
--include-unchanged=${{ steps.setup-flags.outputs.include_unchanged }} \
230-
--picked=first \
231-
--inline-snapshot=disable \
232-
|| { ec=$?; [ $ec -eq 5 ] && exit 0 || exit $ec; }
145+
test_python_macos:
146+
needs: changes
147+
if: ${{ needs.changes.outputs.backend == 'true' || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'test-all')) }}
148+
name: macos-latest / Py ${{ matrix.python-version }} / ${{ matrix.dependencies }} deps
149+
runs-on: macos-latest
150+
timeout-minutes: 25
151+
strategy:
152+
matrix:
153+
python-version: ["3.10"]
154+
dependencies: ["core", "core,optional"]
155+
steps:
156+
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
157+
with:
158+
fetch-depth: 0 # Fetch all history for git diff
159+
- uses: ./.github/actions/test-python
160+
with:
161+
os: macos-latest
162+
python-version: ${{ matrix.python-version }}
163+
dependencies: ${{ matrix.dependencies }}
233164

234-
# Test with minimal dependencies using lowest resolution
235-
# https://docs.astral.sh/uv/concepts/resolution/#lowest-resolution
236-
# https://docs.astral.sh/uv/reference/environment/#uv_resolution
237-
- name: Test with minimal dependencies (lowest resolution)
238-
if: ${{ matrix.dependencies == 'minimal' }}
239-
run: |
240-
uv run --python ${{ matrix.python-version }} --group test pytest tests/ \
241-
-v \
242-
${{ matrix.os != 'windows-latest' && '-n auto' || '-p no:xdist' }} \
243-
-k "not test_cli" \
244-
--durations=10 \
245-
-p packages.pytest_changed \
246-
--changed-from=${{ steps.setup-flags.outputs.changed_from }} \
247-
--include-unchanged=${{ steps.setup-flags.outputs.include_unchanged }} \
248-
--picked=first \
249-
--inline-snapshot=disable \
250-
|| { ec=$?; [ $ec -eq 5 ] && exit 0 || exit $ec; }
251-
env:
252-
UV_RESOLUTION: lowest-direct
165+
test_python_windows:
166+
needs: changes
167+
if: ${{ needs.changes.outputs.backend == 'true' || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'test-all')) }}
168+
name: windows-latest / Py ${{ matrix.python-version }} / ${{ matrix.dependencies }} deps
169+
runs-on: windows-latest
170+
timeout-minutes: 25
171+
strategy:
172+
matrix:
173+
python-version: ["3.10"]
174+
dependencies: ["core", "core,optional"]
175+
steps:
176+
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
177+
with:
178+
fetch-depth: 0 # Fetch all history for git diff
179+
- uses: ./.github/actions/test-python
180+
with:
181+
os: windows-latest
182+
python-version: ${{ matrix.python-version }}
183+
dependencies: ${{ matrix.dependencies }}
253184

254185
# Only run coverage on `main` so it is not blocking
255186
test_coverage:

0 commit comments

Comments
 (0)