diff --git a/configs/07/ifeval-eval.toml b/configs/07/ifeval-eval.toml index 0ed6bd3..47bbb84 100644 --- a/configs/07/ifeval-eval.toml +++ b/configs/07/ifeval-eval.toml @@ -2,7 +2,7 @@ model = "openai/gpt-4.1-mini" save_results = true [[eval]] -env_id = "prime/ifeval" +env_id = "primeintellect/ifeval" num_examples = 10 rollouts_per_example = 1 sampling_args = { max_tokens = 1024 } diff --git a/configs/09/math-python-eval.toml b/configs/09/math-python-eval.toml index d9fbdb0..3544ef4 100644 --- a/configs/09/math-python-eval.toml +++ b/configs/09/math-python-eval.toml @@ -2,7 +2,7 @@ model = "openai/gpt-5.4-nano" save_results = true [[eval]] -env_id = "prime/math-python" +env_id = "primeintellect/math-python" num_examples = 5 rollouts_per_example = 2 sampling_args = { max_tokens = 1024 } diff --git a/configs/09/opencode-harbor.toml b/configs/09/opencode-harbor.toml index 3d79f1c..991c8c0 100644 --- a/configs/09/opencode-harbor.toml +++ b/configs/09/opencode-harbor.toml @@ -2,7 +2,7 @@ model = "openai/gpt-5.4-mini" save_results = true [[eval]] -env_id = "prime/opencode-harbor" +env_id = "primeintellect/opencode-harbor" taskset = { task_names = ["regex-log"] } [eval.harness] diff --git a/configs/12/deep-agents-eval.toml b/configs/12/deep-agents-eval.toml index c5ef7ba..e34921d 100644 --- a/configs/12/deep-agents-eval.toml +++ b/configs/12/deep-agents-eval.toml @@ -2,7 +2,7 @@ model = "openai/gpt-5.4-nano" save_results = true [[eval]] -env_id = "prime/langchain-deep-agents-env" +env_id = "prime/langchain-deep-agents-math" num_examples = 5 rollouts_per_example = 1 sampling_args = { max_tokens = 2048 } diff --git a/configs/README.md b/configs/README.md index ae1eba0..bf02080 100644 --- a/configs/README.md +++ b/configs/README.md @@ -29,7 +29,7 @@ Training configs use nested tables under the relevant `[[env]]` block: ```toml [[env]] -id = "prime/opencode-harbor" +id = "primeintellect/opencode-harbor" [env.taskset] task_names = ["regex-log"] diff --git a/configs/eval/gpt-oss.toml b/configs/eval/gpt-oss.toml index c44dbdb..39295ed 100644 --- a/configs/eval/gpt-oss.toml +++ b/configs/eval/gpt-oss.toml @@ -16,7 +16,7 @@ env_id = "primeintellect/wiki-search" # env_id = "primeintellect/wordle" # [[eval]] -# env_id = "primeintellect/dspy-flights" +# env_id = "prime/dspy-rlm" # [[eval]] # env_id = "primeintellect/opencode-harbor" diff --git a/configs/eval/llama-3.toml b/configs/eval/llama-3.toml index 02ebaa7..bc1c3e8 100644 --- a/configs/eval/llama-3.toml +++ b/configs/eval/llama-3.toml @@ -16,7 +16,7 @@ env_id = "primeintellect/reverse-text" # env_id = "primeintellect/wiki-search" # [[eval]] -# env_id = "primeintellect/dspy-flights" +# env_id = "prime/dspy-rlm" # [[eval]] # env_id = "primeintellect/opencode-harbor" diff --git a/configs/eval/nemotron-3.toml b/configs/eval/nemotron-3.toml index 51d3e30..2cd87a4 100644 --- a/configs/eval/nemotron-3.toml +++ b/configs/eval/nemotron-3.toml @@ -16,7 +16,7 @@ env_id = "primeintellect/wiki-search" # env_id = "primeintellect/wordle" # [[eval]] -# env_id = "primeintellect/dspy-flights" +# env_id = "prime/dspy-rlm" # [[eval]] # env_id = "primeintellect/opencode-harbor" diff --git a/configs/eval/qwen-3-5-moe.toml b/configs/eval/qwen-3-5-moe.toml index b7bf740..fd9ebab 100644 --- a/configs/eval/qwen-3-5-moe.toml +++ b/configs/eval/qwen-3-5-moe.toml @@ -17,7 +17,7 @@ env_id = "primeintellect/wiki-search" # env_id = "primeintellect/wordle" # [[eval]] -# env_id = "primeintellect/dspy-flights" +# env_id = "prime/dspy-rlm" # [[eval]] # env_id = "primeintellect/opencode-harbor" diff --git a/configs/eval/qwen-3-5.toml b/configs/eval/qwen-3-5.toml index 4793861..ae51180 100644 --- a/configs/eval/qwen-3-5.toml +++ b/configs/eval/qwen-3-5.toml @@ -18,7 +18,7 @@ env_id = "primeintellect/reverse-text" # env_id = "primeintellect/wiki-search" # [[eval]] -# env_id = "primeintellect/dspy-flights" +# env_id = "prime/dspy-rlm" # [[eval]] # env_id = "primeintellect/opencode-harbor" diff --git a/configs/gepa/gpt-oss.toml b/configs/gepa/gpt-oss.toml index eb7f847..54f2afe 100644 --- a/configs/gepa/gpt-oss.toml +++ b/configs/gepa/gpt-oss.toml @@ -14,7 +14,7 @@ env_id = "primeintellect/wiki-search" # env_id = "primeintellect/wordle" # [[env]] -# env_id = "primeintellect/dspy-flights" +# env_id = "prime/dspy-rlm" # [[env]] # env_id = "primeintellect/opencode-harbor" diff --git a/configs/gepa/llama-3.toml b/configs/gepa/llama-3.toml index 024b726..598d6d0 100644 --- a/configs/gepa/llama-3.toml +++ b/configs/gepa/llama-3.toml @@ -14,7 +14,7 @@ env_id = "primeintellect/reverse-text" # env_id = "primeintellect/wiki-search" # [[env]] -# env_id = "primeintellect/dspy-flights" +# env_id = "prime/dspy-rlm" # [[env]] # env_id = "primeintellect/opencode-harbor" diff --git a/configs/gepa/nemotron-3.toml b/configs/gepa/nemotron-3.toml index 8bc49f4..2c93ece 100644 --- a/configs/gepa/nemotron-3.toml +++ b/configs/gepa/nemotron-3.toml @@ -14,7 +14,7 @@ env_id = "primeintellect/wiki-search" # env_id = "primeintellect/wordle" # [[env]] -# env_id = "primeintellect/dspy-flights" +# env_id = "prime/dspy-rlm" # [[env]] # env_id = "primeintellect/opencode-harbor" diff --git a/configs/gepa/qwen-3-5-moe.toml b/configs/gepa/qwen-3-5-moe.toml index 9bb3d31..3c12ebc 100644 --- a/configs/gepa/qwen-3-5-moe.toml +++ b/configs/gepa/qwen-3-5-moe.toml @@ -16,7 +16,7 @@ env_id = "primeintellect/wiki-search" # env_id = "primeintellect/wordle" # [[env]] -# env_id = "primeintellect/dspy-flights" +# env_id = "prime/dspy-rlm" # [[env]] # env_id = "primeintellect/opencode-harbor" diff --git a/configs/gepa/qwen-3-5.toml b/configs/gepa/qwen-3-5.toml index cce5985..37e2bb4 100644 --- a/configs/gepa/qwen-3-5.toml +++ b/configs/gepa/qwen-3-5.toml @@ -18,7 +18,7 @@ env_id = "primeintellect/wiki-search" # env_id = "primeintellect/wordle" # [[env]] -# env_id = "primeintellect/dspy-flights" +# env_id = "prime/dspy-rlm" # [[env]] # env_id = "primeintellect/opencode-harbor" diff --git a/configs/rl/gpt-oss.toml b/configs/rl/gpt-oss.toml index 58d3426..04fec3c 100644 --- a/configs/rl/gpt-oss.toml +++ b/configs/rl/gpt-oss.toml @@ -21,7 +21,7 @@ id = "primeintellect/wiki-search" # id = "primeintellect/wordle" # [[env]] -# id = "primeintellect/dspy-flights" +# id = "prime/dspy-rlm" # [[env]] # id = "primeintellect/opencode-harbor" diff --git a/configs/rl/llama.toml b/configs/rl/llama.toml index 2be8fdd..f56fbbe 100644 --- a/configs/rl/llama.toml +++ b/configs/rl/llama.toml @@ -20,7 +20,7 @@ id = "primeintellect/reverse-text" # id = "primeintellect/wiki-search" # [[env]] -# id = "primeintellect/dspy-flights" +# id = "prime/dspy-rlm" # [[env]] # id = "primeintellect/opencode-harbor" diff --git a/configs/rl/nemotron.toml b/configs/rl/nemotron.toml index 545f7ae..9a8278a 100644 --- a/configs/rl/nemotron.toml +++ b/configs/rl/nemotron.toml @@ -21,7 +21,7 @@ id = "primeintellect/reverse-text" # id = "primeintellect/wiki-search" # [[env]] -# id = "primeintellect/dspy-flights" +# id = "prime/dspy-rlm" # [[env]] # id = "primeintellect/opencode-harbor" diff --git a/configs/rl/qwen-moe.toml b/configs/rl/qwen-moe.toml index 9890455..129feb6 100644 --- a/configs/rl/qwen-moe.toml +++ b/configs/rl/qwen-moe.toml @@ -23,7 +23,7 @@ id = "primeintellect/reverse-text" # id = "primeintellect/wiki-search" # [[env]] -# id = "primeintellect/dspy-flights" +# id = "prime/dspy-rlm" # [[env]] # id = "primeintellect/opencode-harbor" diff --git a/configs/rl/qwen.toml b/configs/rl/qwen.toml index 152dec8..d46044b 100644 --- a/configs/rl/qwen.toml +++ b/configs/rl/qwen.toml @@ -23,7 +23,7 @@ id = "primeintellect/reverse-text" # id = "primeintellect/wiki-search" # [[env]] -# id = "primeintellect/dspy-flights" +# id = "prime/dspy-rlm" # [[env]] # id = "primeintellect/opencode-harbor" diff --git a/guides/07-judges-and-instruction-following/README.md b/guides/07-judges-and-instruction-following/README.md index 11ebecc..5ec9f69 100644 --- a/guides/07-judges-and-instruction-following/README.md +++ b/guides/07-judges-and-instruction-following/README.md @@ -3,7 +3,7 @@ Three environments, in order: 1. [simple-judge](../../environments/simple_judge/simple_judge.py) — local Taskset, one yes/no judge criterion per task -2. [prime/ifeval](https://app.primeintellect.ai/dashboard/environments/primeintellect/ifeval) — [google/IFEval](https://huggingface.co/datasets/google/IFEval), programmatic constraint checks +2. [primeintellect/ifeval](https://app.primeintellect.ai/dashboard/environments/primeintellect/ifeval) — [google/IFEval](https://huggingface.co/datasets/google/IFEval), programmatic constraint checks 3. [will/advanced-if](https://app.primeintellect.ai/dashboard/environments/will/advanced-if) — [facebook/AdvancedIF](https://huggingface.co/datasets/facebook/AdvancedIF), multiple rubric bullets per task ## Part 1: simple-judge @@ -60,7 +60,7 @@ Implementation: [environments/simple_judge/simple_judge.py](../../environments/s ## Part 2: IFEval ```bash -prime eval run prime/ifeval -m openai/gpt-4.1-mini -n 10 -r 1 -t 1024 +prime eval run primeintellect/ifeval -m openai/gpt-4.1-mini -n 10 -r 1 -t 1024 ``` ```toml @@ -69,7 +69,7 @@ model = "openai/gpt-4.1-mini" save_results = true [[eval]] -env_id = "prime/ifeval" +env_id = "primeintellect/ifeval" num_examples = 10 rollouts_per_example = 1 sampling_args = { max_tokens = 1024 } diff --git a/guides/10-coding-agents-and-sandboxes/README.md b/guides/10-coding-agents-and-sandboxes/README.md index b5a674c..86d0b5d 100644 --- a/guides/10-coding-agents-and-sandboxes/README.md +++ b/guides/10-coding-agents-and-sandboxes/README.md @@ -13,7 +13,7 @@ This guide starts with [primeintellect/math-python](https://app.primeintellect.a Run a small eval: ```bash -prime eval run prime/math-python \ +prime eval run primeintellect/math-python \ -m openai/gpt-5.4-nano \ -n 5 \ -r 2 \ @@ -28,7 +28,7 @@ model = "openai/gpt-5.4-nano" save_results = true [[eval]] -env_id = "prime/math-python" +env_id = "primeintellect/math-python" num_examples = 5 rollouts_per_example = 2 sampling_args = { max_tokens = 1024 } @@ -47,7 +47,7 @@ This is the smallest useful sandbox pattern: one task, one Python tool, one isol Run a small eval: ```bash -prime eval run prime/opencode-harbor -m openai/gpt-5.4-mini +prime eval run primeintellect/opencode-harbor -m openai/gpt-5.4-mini ``` Or run with a config file: @@ -58,7 +58,7 @@ model = "openai/gpt-5.4-mini" save_results = true [[eval]] -env_id = "prime/opencode-harbor" +env_id = "primeintellect/opencode-harbor" taskset = { task_names = ["regex-log"] } [eval.harness] @@ -81,7 +81,7 @@ The reward comes from the task tests, not from judging the final message. That m Use the same override split from the CLI when iterating locally: ```bash -prime eval run prime/opencode-harbor \ +prime eval run primeintellect/opencode-harbor \ -m openai/gpt-5.4-mini \ -a '{"taskset": {"task_names": ["regex-log"]}, "harness": {"max_turns": 4, "program": {"disabled_tools": ["webfetch", "question"]}}}' ``` diff --git a/guides/12-custom-harnesses/README.md b/guides/12-custom-harnesses/README.md index 87dfc76..33aec67 100644 --- a/guides/12-custom-harnesses/README.md +++ b/guides/12-custom-harnesses/README.md @@ -60,7 +60,7 @@ agent gets; the nested program config decides how OpenCode itself is launched. ```toml [[eval]] -env_id = "prime/opencode-harbor" +env_id = "primeintellect/opencode-harbor" taskset = { task_names = ["regex-log"] } [eval.harness] @@ -73,7 +73,7 @@ disabled_tools = ["webfetch", "question"] The same shape works from the CLI: ```bash -prime eval run prime/opencode-harbor \ +prime eval run primeintellect/opencode-harbor \ -m openai/gpt-5.4-mini \ -a '{"taskset": {"task_names": ["regex-log"]}, "harness": {"max_turns": 4, "program": {"disabled_tools": ["webfetch", "question"]}}}' ``` @@ -88,12 +88,12 @@ Use this split as the default rule: ## Deep Agents -[primeintellect/langchain-deep-agents-env](https://app.primeintellect.ai/dashboard/environments/primeintellect/langchain-deep-agents-env) is a Hub example. The Taskset loads GSM8K tasks and scores boxed answers. The Harness runs a LangChain Deep Agents program. +[prime/langchain-deep-agents-math](https://app.primeintellect.ai/dashboard/environments/prime/langchain-deep-agents-math) is a Hub example. The Taskset loads INTELLECT-3-RL math tasks and scores boxed answers. The Harness runs a LangChain Deep Agents program. Run a small eval: ```bash -prime eval run primeintellect/langchain-deep-agents-env \ +prime eval run prime/langchain-deep-agents-math \ -m openai/gpt-5-nano \ -n 5 \ -r 1 \ @@ -108,7 +108,7 @@ model = "openai/gpt-5.4-nano" save_results = true [[eval]] -env_id = "prime/langchain-deep-agents-env" +env_id = "prime/langchain-deep-agents-math" num_examples = 5 rollouts_per_example = 1 sampling_args = { max_tokens = 2048 } @@ -120,12 +120,12 @@ prime eval run configs/12/deep-agents-eval.toml ## DSPy -[primeintellect/dspy-rlm](https://app.primeintellect.ai/dashboard/environments/primeintellect/dspy-rlm) shows the same split with DSPy. +[prime/dspy-rlm](https://app.primeintellect.ai/dashboard/environments/prime/dspy-rlm) shows the same split with DSPy. Run a small eval: ```bash -prime eval run primeintellect/dspy-rlm \ +prime eval run prime/dspy-rlm \ -m openai/gpt-5-nano \ -n 5 \ -r 1 \ @@ -150,7 +150,7 @@ sampling_args = { max_tokens = 2048 } prime eval run configs/12/dspy-rlm-eval.toml ``` -For a domain-specific DSPy example, use `dspy-flights`. +For more DSPy examples, run `prime env list --search dspy`. ## When to Use One