Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
c32dbb5
Add difficulty curriculum sampler
undfined May 12, 2026
bc7191c
Add difficulty map builder
undfined May 12, 2026
8cd1c04
Merge branch 'main' into split-create-difficulty-map
undfined May 12, 2026
d127ba4
Merge branch 'main' into split-difficulty-curriculum-core
undfined May 12, 2026
fc1943d
Document adaptive curriculum scoring rationale
undfined May 13, 2026
fd53ea7
Merge branch 'split-difficulty-curriculum-core' into split-create-dif…
undfined May 13, 2026
49f8f03
Merge remote-tracking branch 'origin/split-difficulty-curriculum-core…
undfined May 13, 2026
9b3e13b
Merge remote-tracking branch 'origin/split-create-difficulty-map' int…
undfined May 13, 2026
3efb3f4
Merge branch 'split-difficulty-curriculum-core' into split-create-dif…
undfined May 13, 2026
62369cd
Add difficulty curriculum changelog entry
undfined May 13, 2026
ead1ab7
Merge branch 'split-difficulty-curriculum-core' into split-create-dif…
undfined May 13, 2026
ba0ff62
Add difficulty map changelog entry
undfined May 13, 2026
d8920e9
Merge remote-tracking branch 'origin/main' into split-difficulty-curr…
undfined May 13, 2026
44f8a4d
Merge branch 'split-difficulty-curriculum-core' into split-create-dif…
undfined May 13, 2026
286c994
Clarify adaptive curriculum learning signal comment
undfined May 13, 2026
f291320
Merge branch 'split-difficulty-curriculum-core' into split-create-dif…
undfined May 13, 2026
07c3e18
Clarify hard bucket scheduling comment
undfined May 13, 2026
25b4f35
Merge branch 'split-difficulty-curriculum-core' into split-create-dif…
undfined May 13, 2026
784103b
Handle NumPy values in difficulty map JSON output
undfined May 14, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ All notable changes to this project will be documented in this file.


### Changed
- Add a difficulty map builder for pass-rate Hugging Face datasets, including schema/metadata outputs and tests (https://github.com/allenai/open-instruct/pull/1693).
- Add difficulty curriculum sampler configs, scheduling, adaptive bucket scoring, and unit tests (https://github.com/allenai/open-instruct/pull/1692).
- Rename `time/trainer_idle_waiting_for_inference` to `time/trainer_waiting_for_data` and `time/generation_idle_waiting_for_trainer` to `time/generation_waiting_for_trainer`, and emit per-Group generation timing (`time/group_generation_{mean,max,min}` plus `batch/per_group_generation_times` histogram) so latency vs. throughput in the inference pipeline is legible from wandb (https://github.com/allenai/open-instruct/pull/1690).
- Add parameterized `combine_dataset` tests in `open_instruct/test_utils.py` against local jsonl fixtures (no network), covering varied fractional/sample-count weight combinations and split-count mismatch (would have caught the bug fixed in #1674). Extract the interleaved-list→dict parsing into a shared `utils.parse_dataset_mixer_list` helper (with its own parameterized unit tests) and tighten `combine_dataset` / `get_datasets` to accept dict-only `dataset_mixer`; the one external list-form caller (`rejection_sampling/generation.py`) now converts at the call site.
- Make `mason.py` `--output_dir` / `--checkpoint_state_dir` overrides idempotent via `replace_or_append_flag`, add `open_instruct/grpo.py` to `OPEN_INSTRUCT_COMMANDS` / `OPEN_INSTRUCT_RESUMABLES`, and wire OLMo-core checkpoint save/resume into `grpo.py` (`CheckpointerCallback` + `DataPreparationActorCheckpointCallback` + `LoadStrategy.if_available`) so resumable Beaker jobs actually resume (https://github.com/allenai/open-instruct/pull/1666).
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"difficulty_generation": {
"beta_prior_requested": "empirical-bayes",
"beta_prior_used": {
"alpha": 0.4383101221875231,
"beta": 2.1002075643639166,
"source": "empirical_bayes"
},
"binary_instance_count": 12643,
"bucket_count_effective": 5,
"bucket_count_field": "difficulty.bucket_count",
"bucket_count_requested": 5,
"bucket_field": "difficulty.bucket_index",
"bucket_ranking_field": "difficulty.expected_quantile",
"difficulty_value_definition": "1 - difficulty.posterior_lower_bound",
"difficulty_value_field": "difficulty.value",
"method": "beta_binomial_posterior_quantiles",
"nonbinary_instance_count": 0,
"posterior_lower_quantile": 0.1,
"tag": "bbq-eb-q10-k5"
},
"model_name": "Qwen/Qwen3-4B-Base",
"row_count": 12643,
"score_processing": {
"normalization": "identity_binary",
"output_field": "attempt_scores",
"positive_reward_value": 1.0,
"source_field": "pass_count,num_samples,pass_rate",
"supports_binary_difficulty": true
},
"source_format": {
"attempt_count_field": "num_samples",
"config_name": null,
"dataset_repo_id": "mnoukhov/dapo-math-17k-processed-filtered-qwen3-4b-base-32samples",
"instance_id_definition": "dataset_repo_id::row_id_field when a stable row id is available; otherwise dataset_repo_id::row_index",
"kind": "hugging_face_dataset_passrate_rows",
"model_field": "generator_model",
"pass_count_field": "pass_count",
"pass_rate_field": "pass_rate",
"row_id_field": "extra_info.index",
"split": "train",
"task_field": "dataset"
},
"task_name": "math"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
{
"ability": {
"_type": "Value",
"dtype": "string"
},
"completions": {
"_type": "List",
"feature": {
"_type": "Value",
"dtype": "string"
}
},
"data_source": {
"_type": "Value",
"dtype": "string"
},
"dataset": {
"_type": "Value",
"dtype": "string"
},
"difficulty": {
"bucket_count": {
"_type": "Value",
"dtype": "int64"
},
"bucket_index": {
"_type": "Value",
"dtype": "int64"
},
"expected_quantile": {
"_type": "Value",
"dtype": "float64"
},
"posterior_lower_bound": {
"_type": "Value",
"dtype": "float64"
},
"posterior_mean": {
"_type": "Value",
"dtype": "float64"
},
"value": {
"_type": "Value",
"dtype": "float64"
}
},
"extra_info": {
"index": {
"_type": "Value",
"dtype": "string"
}
},
"generator_chat_template": {
"_type": "Value",
"dtype": "string"
},
"generator_max_tokens": {
"_type": "Value",
"dtype": "int64"
},
"generator_model": {
"_type": "Value",
"dtype": "string"
},
"generator_temperature": {
"_type": "Value",
"dtype": "float64"
},
"generator_top_p": {
"_type": "Value",
"dtype": "float64"
},
"ground_truth": {
"_type": "Value",
"dtype": "string"
},
"messages": {
"_type": "List",
"feature": {
"content": {
"_type": "Value",
"dtype": "string"
},
"role": {
"_type": "Value",
"dtype": "string"
}
}
},
"num_samples": {
"_type": "Value",
"dtype": "int64"
},
"pass_count": {
"_type": "Value",
"dtype": "int64"
},
"pass_rate": {
"_type": "Value",
"dtype": "string"
},
"prompt": {
"_type": "Value",
"dtype": "string"
},
"reward_model": {
"ground_truth": {
"_type": "Value",
"dtype": "string"
},
"style": {
"_type": "Value",
"dtype": "string"
}
},
"solution": {
"_type": "Value",
"dtype": "string"
},
"source_prompt": {
"_type": "List",
"feature": {
"content": {
"_type": "Value",
"dtype": "string"
},
"role": {
"_type": "Value",
"dtype": "string"
}
}
},
"source_split": {
"_type": "Value",
"dtype": "string"
}
}
Loading