Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion configs/07/ifeval-eval.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ model = "openai/gpt-4.1-mini"
save_results = true

[[eval]]
env_id = "prime/ifeval"
env_id = "primeintellect/ifeval"
num_examples = 10
rollouts_per_example = 1
sampling_args = { max_tokens = 1024 }
Expand Down
2 changes: 1 addition & 1 deletion configs/09/math-python-eval.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ model = "openai/gpt-5.4-nano"
save_results = true

[[eval]]
env_id = "prime/math-python"
env_id = "primeintellect/math-python"
num_examples = 5
rollouts_per_example = 2
sampling_args = { max_tokens = 1024 }
2 changes: 1 addition & 1 deletion configs/09/opencode-harbor.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ model = "openai/gpt-5.4-mini"
save_results = true

[[eval]]
env_id = "prime/opencode-harbor"
env_id = "primeintellect/opencode-harbor"
taskset = { task_names = ["regex-log"] }

[eval.harness]
Expand Down
2 changes: 1 addition & 1 deletion configs/12/deep-agents-eval.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ model = "openai/gpt-5.4-nano"
save_results = true

[[eval]]
env_id = "prime/langchain-deep-agents-env"
env_id = "prime/langchain-deep-agents-math"
num_examples = 5
rollouts_per_example = 1
sampling_args = { max_tokens = 2048 }
2 changes: 1 addition & 1 deletion configs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ Training configs use nested tables under the relevant `[[env]]` block:

```toml
[[env]]
id = "prime/opencode-harbor"
id = "primeintellect/opencode-harbor"

[env.taskset]
task_names = ["regex-log"]
Expand Down
2 changes: 1 addition & 1 deletion configs/eval/gpt-oss.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ env_id = "primeintellect/wiki-search"
# env_id = "primeintellect/wordle"

# [[eval]]
# env_id = "primeintellect/dspy-flights"
# env_id = "prime/dspy-rlm"

# [[eval]]
# env_id = "primeintellect/opencode-harbor"
Expand Down
2 changes: 1 addition & 1 deletion configs/eval/llama-3.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ env_id = "primeintellect/reverse-text"
# env_id = "primeintellect/wiki-search"

# [[eval]]
# env_id = "primeintellect/dspy-flights"
# env_id = "prime/dspy-rlm"

# [[eval]]
# env_id = "primeintellect/opencode-harbor"
Expand Down
2 changes: 1 addition & 1 deletion configs/eval/nemotron-3.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ env_id = "primeintellect/wiki-search"
# env_id = "primeintellect/wordle"

# [[eval]]
# env_id = "primeintellect/dspy-flights"
# env_id = "prime/dspy-rlm"

# [[eval]]
# env_id = "primeintellect/opencode-harbor"
Expand Down
2 changes: 1 addition & 1 deletion configs/eval/qwen-3-5-moe.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ env_id = "primeintellect/wiki-search"
# env_id = "primeintellect/wordle"

# [[eval]]
# env_id = "primeintellect/dspy-flights"
# env_id = "prime/dspy-rlm"

# [[eval]]
# env_id = "primeintellect/opencode-harbor"
Expand Down
2 changes: 1 addition & 1 deletion configs/eval/qwen-3-5.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ env_id = "primeintellect/reverse-text"
# env_id = "primeintellect/wiki-search"

# [[eval]]
# env_id = "primeintellect/dspy-flights"
# env_id = "prime/dspy-rlm"

# [[eval]]
# env_id = "primeintellect/opencode-harbor"
Expand Down
2 changes: 1 addition & 1 deletion configs/gepa/gpt-oss.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ env_id = "primeintellect/wiki-search"
# env_id = "primeintellect/wordle"

# [[env]]
# env_id = "primeintellect/dspy-flights"
# env_id = "prime/dspy-rlm"

# [[env]]
# env_id = "primeintellect/opencode-harbor"
Expand Down
2 changes: 1 addition & 1 deletion configs/gepa/llama-3.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ env_id = "primeintellect/reverse-text"
# env_id = "primeintellect/wiki-search"

# [[env]]
# env_id = "primeintellect/dspy-flights"
# env_id = "prime/dspy-rlm"

# [[env]]
# env_id = "primeintellect/opencode-harbor"
Expand Down
2 changes: 1 addition & 1 deletion configs/gepa/nemotron-3.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ env_id = "primeintellect/wiki-search"
# env_id = "primeintellect/wordle"

# [[env]]
# env_id = "primeintellect/dspy-flights"
# env_id = "prime/dspy-rlm"

# [[env]]
# env_id = "primeintellect/opencode-harbor"
Expand Down
2 changes: 1 addition & 1 deletion configs/gepa/qwen-3-5-moe.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ env_id = "primeintellect/wiki-search"
# env_id = "primeintellect/wordle"

# [[env]]
# env_id = "primeintellect/dspy-flights"
# env_id = "prime/dspy-rlm"

# [[env]]
# env_id = "primeintellect/opencode-harbor"
Expand Down
2 changes: 1 addition & 1 deletion configs/gepa/qwen-3-5.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ env_id = "primeintellect/wiki-search"
# env_id = "primeintellect/wordle"

# [[env]]
# env_id = "primeintellect/dspy-flights"
# env_id = "prime/dspy-rlm"

# [[env]]
# env_id = "primeintellect/opencode-harbor"
Expand Down
2 changes: 1 addition & 1 deletion configs/rl/gpt-oss.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ id = "primeintellect/wiki-search"
# id = "primeintellect/wordle"

# [[env]]
# id = "primeintellect/dspy-flights"
# id = "prime/dspy-rlm"

# [[env]]
# id = "primeintellect/opencode-harbor"
Expand Down
2 changes: 1 addition & 1 deletion configs/rl/llama.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ id = "primeintellect/reverse-text"
# id = "primeintellect/wiki-search"

# [[env]]
# id = "primeintellect/dspy-flights"
# id = "prime/dspy-rlm"

# [[env]]
# id = "primeintellect/opencode-harbor"
Expand Down
2 changes: 1 addition & 1 deletion configs/rl/nemotron.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ id = "primeintellect/reverse-text"
# id = "primeintellect/wiki-search"

# [[env]]
# id = "primeintellect/dspy-flights"
# id = "prime/dspy-rlm"

# [[env]]
# id = "primeintellect/opencode-harbor"
Expand Down
2 changes: 1 addition & 1 deletion configs/rl/qwen-moe.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ id = "primeintellect/reverse-text"
# id = "primeintellect/wiki-search"

# [[env]]
# id = "primeintellect/dspy-flights"
# id = "prime/dspy-rlm"

# [[env]]
# id = "primeintellect/opencode-harbor"
Expand Down
2 changes: 1 addition & 1 deletion configs/rl/qwen.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ id = "primeintellect/reverse-text"
# id = "primeintellect/wiki-search"

# [[env]]
# id = "primeintellect/dspy-flights"
# id = "prime/dspy-rlm"

# [[env]]
# id = "primeintellect/opencode-harbor"
Expand Down
6 changes: 3 additions & 3 deletions guides/07-judges-and-instruction-following/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
Three environments, in order:

1. [simple-judge](../../environments/simple_judge/simple_judge.py) — local Taskset, one yes/no judge criterion per task
2. [prime/ifeval](https://app.primeintellect.ai/dashboard/environments/primeintellect/ifeval) — [google/IFEval](https://huggingface.co/datasets/google/IFEval), programmatic constraint checks
2. [primeintellect/ifeval](https://app.primeintellect.ai/dashboard/environments/primeintellect/ifeval) — [google/IFEval](https://huggingface.co/datasets/google/IFEval), programmatic constraint checks
3. [will/advanced-if](https://app.primeintellect.ai/dashboard/environments/will/advanced-if) — [facebook/AdvancedIF](https://huggingface.co/datasets/facebook/AdvancedIF), multiple rubric bullets per task

## Part 1: simple-judge
Expand Down Expand Up @@ -60,7 +60,7 @@ Implementation: [environments/simple_judge/simple_judge.py](../../environments/s
## Part 2: IFEval

```bash
prime eval run prime/ifeval -m openai/gpt-4.1-mini -n 10 -r 1 -t 1024
prime eval run primeintellect/ifeval -m openai/gpt-4.1-mini -n 10 -r 1 -t 1024
```

```toml
Expand All @@ -69,7 +69,7 @@ model = "openai/gpt-4.1-mini"
save_results = true

[[eval]]
env_id = "prime/ifeval"
env_id = "primeintellect/ifeval"
num_examples = 10
rollouts_per_example = 1
sampling_args = { max_tokens = 1024 }
Expand Down
10 changes: 5 additions & 5 deletions guides/10-coding-agents-and-sandboxes/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ This guide starts with [primeintellect/math-python](https://app.primeintellect.a
Run a small eval:

```bash
prime eval run prime/math-python \
prime eval run primeintellect/math-python \
-m openai/gpt-5.4-nano \
-n 5 \
-r 2 \
Expand All @@ -28,7 +28,7 @@ model = "openai/gpt-5.4-nano"
save_results = true

[[eval]]
env_id = "prime/math-python"
env_id = "primeintellect/math-python"
num_examples = 5
rollouts_per_example = 2
sampling_args = { max_tokens = 1024 }
Expand All @@ -47,7 +47,7 @@ This is the smallest useful sandbox pattern: one task, one Python tool, one isol
Run a small eval:

```bash
prime eval run prime/opencode-harbor -m openai/gpt-5.4-mini
prime eval run primeintellect/opencode-harbor -m openai/gpt-5.4-mini
```

Or run with a config file:
Expand All @@ -58,7 +58,7 @@ model = "openai/gpt-5.4-mini"
save_results = true

[[eval]]
env_id = "prime/opencode-harbor"
env_id = "primeintellect/opencode-harbor"
taskset = { task_names = ["regex-log"] }

[eval.harness]
Expand All @@ -81,7 +81,7 @@ The reward comes from the task tests, not from judging the final message. That m
Use the same override split from the CLI when iterating locally:

```bash
prime eval run prime/opencode-harbor \
prime eval run primeintellect/opencode-harbor \
-m openai/gpt-5.4-mini \
-a '{"taskset": {"task_names": ["regex-log"]}, "harness": {"max_turns": 4, "program": {"disabled_tools": ["webfetch", "question"]}}}'
```
Expand Down
16 changes: 8 additions & 8 deletions guides/12-custom-harnesses/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ agent gets; the nested program config decides how OpenCode itself is launched.

```toml
[[eval]]
env_id = "prime/opencode-harbor"
env_id = "primeintellect/opencode-harbor"
taskset = { task_names = ["regex-log"] }

[eval.harness]
Expand All @@ -73,7 +73,7 @@ disabled_tools = ["webfetch", "question"]
The same shape works from the CLI:

```bash
prime eval run prime/opencode-harbor \
prime eval run primeintellect/opencode-harbor \
-m openai/gpt-5.4-mini \
-a '{"taskset": {"task_names": ["regex-log"]}, "harness": {"max_turns": 4, "program": {"disabled_tools": ["webfetch", "question"]}}}'
```
Expand All @@ -88,12 +88,12 @@ Use this split as the default rule:

## Deep Agents

[primeintellect/langchain-deep-agents-env](https://app.primeintellect.ai/dashboard/environments/primeintellect/langchain-deep-agents-env) is a Hub example. The Taskset loads GSM8K tasks and scores boxed answers. The Harness runs a LangChain Deep Agents program.
[prime/langchain-deep-agents-math](https://app.primeintellect.ai/dashboard/environments/prime/langchain-deep-agents-math) is a Hub example. The Taskset loads INTELLECT-3-RL math tasks and scores boxed answers. The Harness runs a LangChain Deep Agents program.

Run a small eval:

```bash
prime eval run primeintellect/langchain-deep-agents-env \
prime eval run prime/langchain-deep-agents-math \
-m openai/gpt-5-nano \
-n 5 \
-r 1 \
Expand All @@ -108,7 +108,7 @@ model = "openai/gpt-5.4-nano"
save_results = true

[[eval]]
env_id = "prime/langchain-deep-agents-env"
env_id = "prime/langchain-deep-agents-math"
num_examples = 5
rollouts_per_example = 1
sampling_args = { max_tokens = 2048 }
Expand All @@ -120,12 +120,12 @@ prime eval run configs/12/deep-agents-eval.toml

## DSPy

[primeintellect/dspy-rlm](https://app.primeintellect.ai/dashboard/environments/primeintellect/dspy-rlm) shows the same split with DSPy.
[prime/dspy-rlm](https://app.primeintellect.ai/dashboard/environments/prime/dspy-rlm) shows the same split with DSPy.

Run a small eval:

```bash
prime eval run primeintellect/dspy-rlm \
prime eval run prime/dspy-rlm \
-m openai/gpt-5-nano \
-n 5 \
-r 1 \
Expand All @@ -150,7 +150,7 @@ sampling_args = { max_tokens = 2048 }
prime eval run configs/12/dspy-rlm-eval.toml
```

For a domain-specific DSPy example, use `dspy-flights`.
For more DSPy examples, run `prime env list --search dspy`.

## When to Use One

Expand Down