-
Notifications
You must be signed in to change notification settings - Fork 17
118 lines (107 loc) · 4.53 KB
/
Copy pathstreaming_compliance.yml
File metadata and controls
118 lines (107 loc) · 4.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
name: Streaming Compliance Benchmark
on:
push:
workflow_dispatch:
inputs:
model:
description: "Model id"
required: true
default: "fireworks_ai/accounts/fireworks/models/glm-4p6"
max_tokens:
description: "Override max_tokens (integer)"
required: false
default: ""
reasoning_effort:
description: "Reasoning effort (low|medium|high|none)"
required: false
default: ""
max_rows:
description: "Max rows for smoke vs full run (integer or 'all')"
required: false
default: ""
temperature:
description: "Temperature (float)"
required: false
default: ""
stream:
description: "Enable streaming (true or empty)"
required: false
default: "true"
max_concurrency:
description: "Max concurrency (integer)"
required: false
default: ""
num_runs:
description: "Number of runs (integer)"
required: false
default: ""
max_retry:
description: "Max retry (integer)"
required: false
default: ""
success_threshold:
description: "Minimum test score needed to pass (float)"
required: false
default: ""
jobs:
streaming-compliance:
runs-on: ubuntu-latest
timeout-minutes: 180
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Setup uv and .venv
run: |
python -m pip install --upgrade pip
pip install uv
uv venv
. .venv/bin/activate
uv pip install --upgrade pip
- name: Install python-sdk package
run: |
. .venv/bin/activate
uv pip install .
- name: Run streaming compliance benchmark (pytest)
env:
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
FIREWORKS_ACCOUNT_ID: ${{ vars.FIREWORKS_ACCOUNT_ID }}
run: |
. .venv/bin/activate
mkdir -p artifacts
MODEL="${{ github.event.inputs.model }}"
MAX_TOKENS="${{ github.event.inputs.max_tokens }}"
REASONING="${{ github.event.inputs.reasoning_effort }}"
MAX_ROWS="${{ github.event.inputs.max_rows }}"
TEMPERATURE="${{ github.event.inputs.temperature }}"
STREAM="${{ github.event.inputs.stream }}"
NUM_RUNS="${{ github.event.inputs.num_runs }}"
MAX_CONC="${{ github.event.inputs.max_concurrency }}"
MAX_RETRY="${{ github.event.inputs.max_retry }}"
SUCCESS_THRESHOLD="${{ github.event.inputs.success_threshold }}"
echo "Running streaming compliance with reasoning_effort=${REASONING:-<default>} max_rows=${MAX_ROWS:-<default>} model=${MODEL:-<default>} max_tokens=${MAX_TOKENS:-<default>} temperature=${TEMPERATURE:-<default>} stream=${STREAM:-<default>} num_runs=${NUM_RUNS:-<default>} max_concurrency=${MAX_CONC:-<default>} max_retry=${MAX_RETRY:-<default>} success_threshold=${SUCCESS_THRESHOLD:-<default>}"
PYTEST_TARGET=eval_protocol.benchmarks.test_glm_streaming_compliance
PYTEST_ARGS="--pyargs $PYTEST_TARGET -q -s --ep-print-summary --ep-summary-json artifacts/streaming_compliance.json"
[ -n "$MAX_ROWS" ] && PYTEST_ARGS="$PYTEST_ARGS --ep-max-rows=$MAX_ROWS"
[ -n "$REASONING" ] && PYTEST_ARGS="$PYTEST_ARGS --ep-reasoning-effort=$REASONING"
[ -n "$MODEL" ] && PYTEST_ARGS="$PYTEST_ARGS --ep-input-param model=$MODEL"
[ -n "$MAX_TOKENS" ] && PYTEST_ARGS="$PYTEST_ARGS --ep-input-param max_tokens=$MAX_TOKENS"
[ -n "$TEMPERATURE" ] && PYTEST_ARGS="$PYTEST_ARGS --ep-input-param temperature=$TEMPERATURE"
[ -n "$STREAM" ] && PYTEST_ARGS="$PYTEST_ARGS --ep-input-param stream=$STREAM"
[ -n "$NUM_RUNS" ] && PYTEST_ARGS="$PYTEST_ARGS --ep-num-runs=$NUM_RUNS"
[ -n "$MAX_CONC" ] && PYTEST_ARGS="$PYTEST_ARGS --ep-max-concurrent-rollouts=$MAX_CONC"
[ -n "$MAX_RETRY" ] && PYTEST_ARGS="$PYTEST_ARGS --ep-max-retry=$MAX_RETRY"
[ -n "$SUCCESS_THRESHOLD" ] && PYTEST_ARGS="$PYTEST_ARGS --ep-success-threshold=$SUCCESS_THRESHOLD"
echo "Running: pytest $PYTEST_ARGS"
pytest $PYTEST_ARGS
- name: Upload JSON artifact(s)
if: always()
uses: actions/upload-artifact@v4
with:
name: streaming_compliance_json
path: artifacts/*.json
if-no-files-found: warn
retention-days: 14