-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Expand file tree
/
Copy pathclients.baml
More file actions
118 lines (105 loc) · 2.51 KB
/
clients.baml
File metadata and controls
118 lines (105 loc) · 2.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview
client<llm> CustomGPT4o {
provider openai
options {
model "gpt-4o"
api_key env.OPENAI_API_KEY
}
}
client<llm> CustomGPT4oMini {
provider openai
retry_policy Exponential
options {
model "gpt-4o-mini"
api_key env.OPENAI_API_KEY
}
}
client<llm> CustomSonnet {
provider anthropic
options {
model "claude-3-5-sonnet-20241022"
api_key env.ANTHROPIC_API_KEY
}
}
client<llm> CustomHaiku {
provider anthropic
retry_policy Constant
options {
model "claude-3-haiku-20240307"
api_key env.ANTHROPIC_API_KEY
}
}
// MiniMax M2.7 - latest high-capability model (recommended)
// Uses OpenAI-compatible API: https://www.minimaxi.com/
client<llm> MiniMaxM27 {
provider openai
options {
model "MiniMax-M2.7"
api_key env.MINIMAX_API_KEY
base_url "https://api.minimax.io/v1"
}
}
// MiniMax M2.7-highspeed - faster variant optimized for low-latency tasks
client<llm> MiniMaxM27Highspeed {
provider openai
retry_policy Exponential
options {
model "MiniMax-M2.7-highspeed"
api_key env.MINIMAX_API_KEY
base_url "https://api.minimax.io/v1"
}
}
// MiniMax M2.5 - previous generation model with 204K context window
client<llm> MiniMaxM25 {
provider openai
options {
model "MiniMax-M2.5"
api_key env.MINIMAX_API_KEY
base_url "https://api.minimax.io/v1"
}
}
// MiniMax M2.5-highspeed - previous generation faster variant
client<llm> MiniMaxM25Highspeed {
provider openai
retry_policy Exponential
options {
model "MiniMax-M2.5-highspeed"
api_key env.MINIMAX_API_KEY
base_url "https://api.minimax.io/v1"
}
}
// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client<llm> CustomFast {
provider round-robin
options {
// This will alternate between the two clients
strategy [CustomGPT4oMini, CustomHaiku, MiniMaxM27Highspeed]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/fallback
client<llm> OpenaiFallback {
provider fallback
options {
// This will try the clients in order until one succeeds
strategy [CustomGPT4oMini, CustomGPT4oMini, MiniMaxM27]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
max_retries 3
// Strategy is optional
strategy {
type constant_delay
delay_ms 200
}
}
retry_policy Exponential {
max_retries 2
// Strategy is optional
strategy {
type exponential_backoff
delay_ms 300
multiplier 1.5
max_delay_ms 10000
}
}