Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
6740580
WIP: difficulty mapper + persist sample level rewards
undfined May 1, 2026
0b99fa1
File perms
undfined May 1, 2026
cb30daa
Script fixes
undfined May 1, 2026
35c6964
Explicit tokenizer
undfined May 1, 2026
6735515
Fix save destination
undfined May 1, 2026
2bbd3f2
Tweaks for results output location
undfined May 1, 2026
2ab2d63
Adjust to support image builder script
undfined May 3, 2026
ae30b59
Add source row id explicitly
undfined May 3, 2026
866319f
Drop sample limit
undfined May 3, 2026
13c307b
Persist source / id and experiment id in the map
undfined May 3, 2026
6af2b45
Small changes for sample size samller than batch size
undfined May 3, 2026
28b1bb0
Run everything and data parallel setup
undfined May 3, 2026
df7c586
Duplicate argument
undfined May 3, 2026
1d3c168
Ugh
undfined May 3, 2026
a9b1ffe
Need 16 samples
undfined May 3, 2026
2caecca
Configurable output format
undfined May 3, 2026
d45c661
Only save scores
undfined May 3, 2026
2f41541
Support HF datasets as well
undfined May 4, 2026
fb8599e
WIP: First pass at difficulty sampling loader
undfined May 4, 2026
3a7b3a4
Launcher tweaks
undfined May 4, 2026
c96506e
Some cleanup, renames
undfined May 6, 2026
bc4b9cc
More cleanup
undfined May 6, 2026
484a455
Move some stuff around
undfined May 6, 2026
caf89c7
Shorten filename
undfined May 6, 2026
efc2860
Rename + revert some unnecessary changes
undfined May 6, 2026
1eebbbc
More cleanup
undfined May 6, 2026
8c1e55e
More cruft
undfined May 6, 2026
3c4bfaa
Drop noop in favor of conditional
undfined May 6, 2026
3f77115
Merge main
undfined May 6, 2026
8b04761
Revert
undfined May 6, 2026
cefb1b9
Some verification methods
undfined May 7, 2026
d5414f8
Add CHANGELOG update
undfined May 7, 2026
b606db0
Merge main
undfined May 11, 2026
cbe96f3
Cache some stuff in the init
undfined May 11, 2026
bb65e55
Adds some variants
undfined May 11, 2026
0318c40
Perms
undfined May 11, 2026
335b6c6
Budget
undfined May 11, 2026
b4a7e56
Outdated argument
undfined May 11, 2026
20461bf
Merge branch 'main' of github.com:allenai/open-instruct into undfined…
undfined May 11, 2026
ccbf686
Better layout
undfined May 11, 2026
4de09bf
Readme, stronger adaptive, cleanup
undfined May 12, 2026
45463cd
Adds adaptive + hardest 50th variants
undfined May 12, 2026
ae2354c
Char length in wandb
undfined May 12, 2026
725fd83
Merge branch 'main' of github.com:allenai/open-instruct into undfined…
undfined May 12, 2026
153bcd4
Merge main
undfined May 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file.


### Changed
- Add optional difficulty-curriculum prompt sampling to grpo_fast.py, `DifficultyCurriculumHFDataLoader`, and a scripts/data/difficulty_sampling/create_difficulty_map.py builder, with matching docs/tests and a Qwen launch script (https://github.com/allenai/open-instruct/pull/1661).
- Add parameterized `combine_dataset` tests in `open_instruct/test_utils.py` against local jsonl fixtures (no network), covering varied fractional/sample-count weight combinations and split-count mismatch (would have caught the bug fixed in #1674). Extract the interleaved-list→dict parsing into a shared `utils.parse_dataset_mixer_list` helper (with its own parameterized unit tests) and tighten `combine_dataset` / `get_datasets` to accept dict-only `dataset_mixer`; the one external list-form caller (`rejection_sampling/generation.py`) now converts at the call site.
- Make `mason.py` `--output_dir` / `--checkpoint_state_dir` overrides idempotent via `replace_or_append_flag`, add `open_instruct/grpo.py` to `OPEN_INSTRUCT_COMMANDS` / `OPEN_INSTRUCT_RESUMABLES`, and wire OLMo-core checkpoint save/resume into `grpo.py` (`CheckpointerCallback` + `DataPreparationActorCheckpointCallback` + `LoadStrategy.if_available`) so resumable Beaker jobs actually resume (https://github.com/allenai/open-instruct/pull/1666).
- Make `--budget` optional in `mason.py` (falls back to the workspace's default budget) and drop the explicit `--budget` flag from launch scripts where it already matched the workspace default (https://github.com/allenai/open-instruct/pull/1673).
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"difficulty_generation": {
"beta_prior_requested": "empirical-bayes",
"beta_prior_used": {
"alpha": 0.4383101221875231,
"beta": 2.1002075643639166,
"source": "empirical_bayes"
},
"binary_instance_count": 12643,
"bucket_count_effective": 5,
"bucket_count_field": "difficulty.bucket_count",
"bucket_count_requested": 5,
"bucket_field": "difficulty.bucket_index",
"bucket_ranking_field": "difficulty.expected_quantile",
"difficulty_value_definition": "1 - difficulty.posterior_lower_bound",
"difficulty_value_field": "difficulty.value",
"method": "beta_binomial_posterior_quantiles",
"nonbinary_instance_count": 0,
"posterior_lower_quantile": 0.1,
"tag": "bbq-eb-q10-k5"
},
"model_name": "Qwen/Qwen3-4B-Base",
"row_count": 12643,
"score_processing": {
"normalization": "identity_binary",
"output_field": "attempt_scores",
"positive_reward_value": 1.0,
"source_field": "pass_count,num_samples,pass_rate",
"supports_binary_difficulty": true
},
"source_format": {
"attempt_count_field": "num_samples",
"config_name": null,
"dataset_repo_id": "mnoukhov/dapo-math-17k-processed-filtered-qwen3-4b-base-32samples",
"instance_id_definition": "dataset_repo_id::row_id_field when a stable row id is available; otherwise dataset_repo_id::row_index",
"kind": "hugging_face_dataset_passrate_rows",
"model_field": "generator_model",
"pass_count_field": "pass_count",
"pass_rate_field": "pass_rate",
"row_id_field": "extra_info.index",
"split": "train",
"task_field": "dataset"
},
"task_name": "math"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
{
"ability": {
"_type": "Value",
"dtype": "string"
},
"completions": {
"_type": "List",
"feature": {
"_type": "Value",
"dtype": "string"
}
},
"data_source": {
"_type": "Value",
"dtype": "string"
},
"dataset": {
"_type": "Value",
"dtype": "string"
},
"difficulty": {
"bucket_count": {
"_type": "Value",
"dtype": "int64"
},
"bucket_index": {
"_type": "Value",
"dtype": "int64"
},
"expected_quantile": {
"_type": "Value",
"dtype": "float64"
},
"posterior_lower_bound": {
"_type": "Value",
"dtype": "float64"
},
"posterior_mean": {
"_type": "Value",
"dtype": "float64"
},
"value": {
"_type": "Value",
"dtype": "float64"
}
},
"extra_info": {
"index": {
"_type": "Value",
"dtype": "string"
}
},
"generator_chat_template": {
"_type": "Value",
"dtype": "string"
},
"generator_max_tokens": {
"_type": "Value",
"dtype": "int64"
},
"generator_model": {
"_type": "Value",
"dtype": "string"
},
"generator_temperature": {
"_type": "Value",
"dtype": "float64"
},
"generator_top_p": {
"_type": "Value",
"dtype": "float64"
},
"ground_truth": {
"_type": "Value",
"dtype": "string"
},
"messages": {
"_type": "List",
"feature": {
"content": {
"_type": "Value",
"dtype": "string"
},
"role": {
"_type": "Value",
"dtype": "string"
}
}
},
"num_samples": {
"_type": "Value",
"dtype": "int64"
},
"pass_count": {
"_type": "Value",
"dtype": "int64"
},
"pass_rate": {
"_type": "Value",
"dtype": "string"
},
"prompt": {
"_type": "Value",
"dtype": "string"
},
"reward_model": {
"ground_truth": {
"_type": "Value",
"dtype": "string"
},
"style": {
"_type": "Value",
"dtype": "string"
}
},
"solution": {
"_type": "Value",
"dtype": "string"
},
"source_prompt": {
"_type": "List",
"feature": {
"content": {
"_type": "Value",
"dtype": "string"
},
"role": {
"_type": "Value",
"dtype": "string"
}
}
},
"source_split": {
"_type": "Value",
"dtype": "string"
}
}
Loading
Loading