-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodels.yaml
More file actions
36 lines (30 loc) · 1.01 KB
/
models.yaml
File metadata and controls
36 lines (30 loc) · 1.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# Model registry consumed by the `sweep` subcommand.
# Each entry is benchmarked at every swarm size in `workers`.
defaults:
backend: llama_cpp
generation:
max_tokens: 128
temperature: 0.2
top_p: 0.9
models:
- name: gemma-3-270m
backend: llama_cpp
model_path: "/models/gemma-3-270m-Q4_K_M.gguf"
notes: "Smallest Gemma 3 — best swarm density candidate."
- name: gemma-3-1b
backend: llama_cpp
model_path: "/models/gemma-3-1b-it-Q4_K_M.gguf"
notes: "Instruction-tuned 1B — useful baseline for quality."
- name: gemma-2-2b
backend: llama_cpp
model_path: "/models/gemma-2-2b-it-Q4_K_M.gguf"
notes: "Larger reference; expect lower swarm density."
# Ollama example — uncomment if Ollama is your preferred backend:
# - name: gemma3-270m-ollama
# backend: ollama
# model_name: "gemma3:270m"
sweep:
workers: [1, 2, 4, 8, 16, 24, 32]
prompts_per_worker: 5
concurrent_requests_per_worker: 1
stop_on_threshold_violation: usable # interactive | usable | none