allenai · undfined · May 1, 2026 · May 1, 2026 · May 1, 2026 · May 1, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file.
 
 
 ### Changed
+- Add optional difficulty-curriculum prompt sampling to grpo_fast.py, `DifficultyCurriculumHFDataLoader`, and a scripts/data/difficulty_sampling/create_difficulty_map.py builder, with matching docs/tests and a Qwen launch script (https://github.com/allenai/open-instruct/pull/1661).
 - Add parameterized `combine_dataset` tests in `open_instruct/test_utils.py` against local jsonl fixtures (no network), covering varied fractional/sample-count weight combinations and split-count mismatch (would have caught the bug fixed in #1674). Extract the interleaved-list→dict parsing into a shared `utils.parse_dataset_mixer_list` helper (with its own parameterized unit tests) and tighten `combine_dataset` / `get_datasets` to accept dict-only `dataset_mixer`; the one external list-form caller (`rejection_sampling/generation.py`) now converts at the call site.
 - Make `mason.py` `--output_dir` / `--checkpoint_state_dir` overrides idempotent via `replace_or_append_flag`, add `open_instruct/grpo.py` to `OPEN_INSTRUCT_COMMANDS` / `OPEN_INSTRUCT_RESUMABLES`, and wire OLMo-core checkpoint save/resume into `grpo.py` (`CheckpointerCallback` + `DataPreparationActorCheckpointCallback` + `LoadStrategy.if_available`) so resumable Beaker jobs actually resume (https://github.com/allenai/open-instruct/pull/1666).
 - Make `--budget` optional in `mason.py` (falls back to the workspace's default budget) and drop the explicit `--budget` flag from launch scripts where it already matched the workspace default (https://github.com/allenai/open-instruct/pull/1673).

diff --git a/configs/curriculum/Qwen_Qwen3-4B-Base/math__Qwen_Qwen3-4B-Base__bbq-eb-q10-k5.metadata.json b/configs/curriculum/Qwen_Qwen3-4B-Base/math__Qwen_Qwen3-4B-Base__bbq-eb-q10-k5.metadata.json
@@ -0,0 +1,45 @@
+{
+  "difficulty_generation": {
+    "beta_prior_requested": "empirical-bayes",
+    "beta_prior_used": {
+      "alpha": 0.4383101221875231,
+      "beta": 2.1002075643639166,
+      "source": "empirical_bayes"
+    },
+    "binary_instance_count": 12643,
+    "bucket_count_effective": 5,
+    "bucket_count_field": "difficulty.bucket_count",
+    "bucket_count_requested": 5,
+    "bucket_field": "difficulty.bucket_index",
+    "bucket_ranking_field": "difficulty.expected_quantile",
+    "difficulty_value_definition": "1 - difficulty.posterior_lower_bound",
+    "difficulty_value_field": "difficulty.value",
+    "method": "beta_binomial_posterior_quantiles",
+    "nonbinary_instance_count": 0,
+    "posterior_lower_quantile": 0.1,
+    "tag": "bbq-eb-q10-k5"
+  },
+  "model_name": "Qwen/Qwen3-4B-Base",
+  "row_count": 12643,
+  "score_processing": {
+    "normalization": "identity_binary",
+    "output_field": "attempt_scores",
+    "positive_reward_value": 1.0,
+    "source_field": "pass_count,num_samples,pass_rate",
+    "supports_binary_difficulty": true
+  },
+  "source_format": {
+    "attempt_count_field": "num_samples",
+    "config_name": null,
+    "dataset_repo_id": "mnoukhov/dapo-math-17k-processed-filtered-qwen3-4b-base-32samples",
+    "instance_id_definition": "dataset_repo_id::row_id_field when a stable row id is available; otherwise dataset_repo_id::row_index",
+    "kind": "hugging_face_dataset_passrate_rows",
+    "model_field": "generator_model",
+    "pass_count_field": "pass_count",
+    "pass_rate_field": "pass_rate",
+    "row_id_field": "extra_info.index",
+    "split": "train",
+    "task_field": "dataset"
+  },
+  "task_name": "math"
+}
diff --git a/configs/curriculum/Qwen_Qwen3-4B-Base/math__Qwen_Qwen3-4B-Base__bbq-eb-q10-k5.schema.json b/configs/curriculum/Qwen_Qwen3-4B-Base/math__Qwen_Qwen3-4B-Base__bbq-eb-q10-k5.schema.json
@@ -0,0 +1,137 @@
+{
+  "ability": {
+    "_type": "Value",
+    "dtype": "string"
+  },
+  "completions": {
+    "_type": "List",
+    "feature": {
+      "_type": "Value",
+      "dtype": "string"
+    }
+  },
+  "data_source": {
+    "_type": "Value",
+    "dtype": "string"
+  },
+  "dataset": {
+    "_type": "Value",
+    "dtype": "string"
+  },
+  "difficulty": {
+    "bucket_count": {
+      "_type": "Value",
+      "dtype": "int64"
+    },
+    "bucket_index": {
+      "_type": "Value",
+      "dtype": "int64"
+    },
+    "expected_quantile": {
+      "_type": "Value",
+      "dtype": "float64"
+    },
+    "posterior_lower_bound": {
+      "_type": "Value",
+      "dtype": "float64"
+    },
+    "posterior_mean": {
+      "_type": "Value",
+      "dtype": "float64"
+    },
+    "value": {
+      "_type": "Value",
+      "dtype": "float64"
+    }
+  },
+  "extra_info": {
+    "index": {
+      "_type": "Value",
+      "dtype": "string"
+    }
+  },
+  "generator_chat_template": {
+    "_type": "Value",
+    "dtype": "string"
+  },
+  "generator_max_tokens": {
+    "_type": "Value",
+    "dtype": "int64"
+  },
+  "generator_model": {
+    "_type": "Value",
+    "dtype": "string"
+  },
+  "generator_temperature": {
+    "_type": "Value",
+    "dtype": "float64"
+  },
+  "generator_top_p": {
+    "_type": "Value",
+    "dtype": "float64"
+  },
+  "ground_truth": {
+    "_type": "Value",
+    "dtype": "string"
+  },
+  "messages": {
+    "_type": "List",
+    "feature": {
+      "content": {
+        "_type": "Value",
+        "dtype": "string"
+      },
+      "role": {
+        "_type": "Value",
+        "dtype": "string"
+      }
+    }
+  },
+  "num_samples": {
+    "_type": "Value",
+    "dtype": "int64"
+  },
+  "pass_count": {
+    "_type": "Value",
+    "dtype": "int64"
+  },
+  "pass_rate": {
+    "_type": "Value",
+    "dtype": "string"
+  },
+  "prompt": {
+    "_type": "Value",
+    "dtype": "string"
+  },
+  "reward_model": {
+    "ground_truth": {
+      "_type": "Value",
+      "dtype": "string"
+    },
+    "style": {
+      "_type": "Value",
+      "dtype": "string"
+    }
+  },
+  "solution": {
+    "_type": "Value",
+    "dtype": "string"
+  },
+  "source_prompt": {
+    "_type": "List",
+    "feature": {
+      "content": {
+        "_type": "Value",
+        "dtype": "string"
+      },
+      "role": {
+        "_type": "Value",
+        "dtype": "string"
+      }
+    }
+  },
+  "source_split": {
+    "_type": "Value",
+    "dtype": "string"
+  }
+}