Evals: Nightly #7
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: 'Evals: Nightly' | |
| on: | |
| schedule: | |
| - cron: '0 1 * * *' # Runs at 1 AM every day | |
| workflow_dispatch: | |
| inputs: | |
| run_all: | |
| description: 'Run all evaluations (including usually passing)' | |
| type: 'boolean' | |
| default: true | |
| test_name_pattern: | |
| description: 'Test name pattern or file name' | |
| required: false | |
| type: 'string' | |
| permissions: | |
| contents: 'read' | |
| checks: 'write' | |
| actions: 'read' | |
| jobs: | |
| evals: | |
| name: 'Evals (USUALLY_PASSING) nightly run' | |
| runs-on: 'gemini-cli-ubuntu-16-core' | |
| if: "github.repository == 'google-gemini/gemini-cli'" | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| model: | |
| - 'gemini-3.1-pro-preview-customtools' | |
| - 'gemini-3-pro-preview' | |
| - 'gemini-3-flash-preview' | |
| - 'gemini-2.5-pro' | |
| - 'gemini-2.5-flash' | |
| - 'gemini-2.5-flash-lite' | |
| run_attempt: [1, 2, 3] | |
| steps: | |
| - name: 'Checkout' | |
| uses: 'actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8' # ratchet:actions/checkout@v5 | |
| - name: 'Set up Node.js' | |
| uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4 | |
| with: | |
| node-version-file: '.nvmrc' | |
| cache: 'npm' | |
| - name: 'Install dependencies' | |
| run: 'npm ci' | |
| - name: 'Build project' | |
| run: 'npm run build' | |
| - name: 'Create logs directory' | |
| run: 'mkdir -p evals/logs' | |
| - name: 'Run Evals' | |
| continue-on-error: true | |
| env: | |
| GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}' | |
| GEMINI_MODEL: '${{ matrix.model }}' | |
| RUN_EVALS: "${{ github.event.inputs.run_all != 'false' }}" | |
| TEST_NAME_PATTERN: '${{ github.event.inputs.test_name_pattern }}' | |
| # Disable Vitest internal retries to avoid double-retrying; | |
| # custom retry logic is handled in evals/test-helper.ts | |
| VITEST_RETRY: 0 | |
| run: | | |
| CMD="npm run test:all_evals" | |
| PATTERN="${TEST_NAME_PATTERN}" | |
| if [[ -n "$PATTERN" ]]; then | |
| if [[ "$PATTERN" == *.ts || "$PATTERN" == *.js || "$PATTERN" == */* ]]; then | |
| $CMD -- "$PATTERN" | |
| else | |
| $CMD -- -t "$PATTERN" | |
| fi | |
| else | |
| $CMD | |
| fi | |
| - name: 'Upload Logs' | |
| if: 'always()' | |
| uses: 'actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02' # ratchet:actions/upload-artifact@v4 | |
| with: | |
| name: 'eval-logs-${{ matrix.model }}-${{ matrix.run_attempt }}' | |
| path: 'evals/logs' | |
| retention-days: 7 | |
| aggregate-results: | |
| name: 'Aggregate Results' | |
| needs: ['evals'] | |
| if: "github.repository == 'google-gemini/gemini-cli' && always()" | |
| runs-on: 'gemini-cli-ubuntu-16-core' | |
| steps: | |
| - name: 'Checkout' | |
| uses: 'actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8' # ratchet:actions/checkout@v5 | |
| - name: 'Download Logs' | |
| uses: 'actions/download-artifact@cc203385981b70ca67e1cc392babf9cc229d5806' # ratchet:actions/download-artifact@v4 | |
| with: | |
| path: 'artifacts' | |
| - name: 'Generate Summary' | |
| env: | |
| GH_TOKEN: '${{ secrets.GITHUB_TOKEN }}' | |
| run: 'node scripts/aggregate_evals.js artifacts >> "$GITHUB_STEP_SUMMARY"' |