Skip to content

Commit f7755d5

Browse files
committed
test
1 parent 269f0ae commit f7755d5

10 files changed

Lines changed: 398 additions & 387 deletions

.github/workflows/rollout.yml

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,34 @@
11
name: Eval Protocol Rollout
22

3-
run-name: rollout:${{ inputs.rollout_id }}
3+
run-name: rollout:${{ fromJSON(inputs.metadata).rollout_id }}
44

55
on:
66
workflow_dispatch:
77
inputs:
88
model:
9-
description: 'Model to use for the rollout'
9+
description: 'Model to use'
1010
required: true
1111
type: string
12-
rollout_id:
13-
description: 'Rollout ID for tracking'
12+
metadata:
13+
description: 'JSON serialized metadata object'
1414
required: true
1515
type: string
16-
prompt:
17-
description: 'User prompt for the rollout'
16+
messages:
17+
description: 'JSON serialized messages array'
18+
required: true
19+
type: string
20+
tools:
21+
description: 'JSON serialized tools array'
22+
required: false
23+
type: string
24+
model_base_url:
25+
description: 'Base URL for the model API'
1826
required: true
1927
type: string
2028

2129
jobs:
2230
rollout:
2331
runs-on: ubuntu-latest
24-
name: rollout-${{ inputs.rollout_id }}
2532

2633
steps:
2734
- name: Checkout code
@@ -42,14 +49,8 @@ jobs:
4249
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
4350
run: |
4451
python tests/github_actions/rollout_worker.py \
45-
--rollout-id "${{ inputs.rollout_id }}" \
4652
--model "${{ inputs.model }}" \
47-
--prompt "${{ inputs.prompt }}"
48-
49-
- name: Upload rollout trace
50-
uses: actions/upload-artifact@v4
51-
if: always() # Upload even if the rollout failed
52-
with:
53-
name: rollout-trace-${{ inputs.rollout_id }}
54-
path: rollout_trace_${{ inputs.rollout_id }}.json
55-
retention-days: 7
53+
--metadata '${{ inputs.metadata }}' \
54+
--messages '${{ inputs.messages }}' \
55+
--tools '${{ inputs.tools }}' \
56+
--model-base-url "${{ inputs.model_base_url }}"

eval_protocol/pytest/evaluation_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
)
4848

4949

50-
from eval_protocol.pytest.utils import (
50+
from eval_protocol.pytest.evaluation_test_utils import (
5151
AggregationMethod,
5252
add_cost_metrics,
5353
log_eval_status_and_rows,

eval_protocol/pytest/evaluation_test_postprocess.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,12 @@
1010
from eval_protocol.models import CompletionParams, EvaluationRow, EvaluationThreshold, Status
1111
from eval_protocol.pytest.handle_persist_flow import handle_persist_flow
1212
from eval_protocol.pytest.types import EvaluationTestMode
13-
from eval_protocol.pytest.utils import AggregationMethod, aggregate, extract_effort_tag, sanitize_filename
13+
from eval_protocol.pytest.evaluation_test_utils import (
14+
AggregationMethod,
15+
aggregate,
16+
extract_effort_tag,
17+
sanitize_filename,
18+
)
1419
from eval_protocol.stats.confidence_intervals import compute_fixed_set_mu_ci
1520

1621

File renamed without changes.

eval_protocol/pytest/generate_parameter_combinations.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from eval_protocol.data_loader.models import EvaluationDataLoader
33
from eval_protocol.models import CompletionParams, EvaluationRow
44
from eval_protocol.pytest.types import Dataset, DatasetPathParam, EvaluationInputParam, InputMessagesParam
5-
from eval_protocol.pytest.utils import parse_ep_max_rows
5+
from eval_protocol.pytest.evaluation_test_utils import parse_ep_max_rows
66
from collections.abc import Sequence
77

88

0 commit comments

Comments
 (0)