Swarm-Agents-Lab/models.yaml at main · callstackincubator/Swarm-Agents-Lab · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# Model registry consumed by the `sweep` subcommand.
# Each entry is benchmarked at every swarm size in `workers`.

defaults:
  backend: llama_cpp
  generation:
    max_tokens: 128
    temperature: 0.2
    top_p: 0.9

models:
  - name: gemma-3-270m
    backend: llama_cpp
    model_path: "/models/gemma-3-270m-Q4_K_M.gguf"
    notes: "Smallest Gemma 3 — best swarm density candidate."

  - name: gemma-3-1b
    backend: llama_cpp
    model_path: "/models/gemma-3-1b-it-Q4_K_M.gguf"
    notes: "Instruction-tuned 1B — useful baseline for quality."

  - name: gemma-2-2b
    backend: llama_cpp
    model_path: "/models/gemma-2-2b-it-Q4_K_M.gguf"
    notes: "Larger reference; expect lower swarm density."

  # Ollama example — uncomment if Ollama is your preferred backend:
  # - name: gemma3-270m-ollama
  #   backend: ollama
  #   model_name: "gemma3:270m"

sweep:
  workers: [1, 2, 4, 8, 16, 24, 32]
  prompts_per_worker: 5
  concurrent_requests_per_worker: 1
  stop_on_threshold_violation: usable   # interactive | usable | none