You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
description: "The type of the key for the context KV cache tensors"
185
+
default: "F16"asconst,
186
+
description: "Experimental. The type of the key for the context KV cache tensors. Use `currentQuant` to use the same type as the current quantization of the model weights tensors"
description: "The type of the value for the context KV cache tensors"
195
+
default: "F16"asconst,
196
+
description: "Experimental. The type of the value for the context KV cache tensors. Use `currentQuant` to use the same type as the current quantization of the model weights tensors"
description: "The type of the key for the context KV cache tensors"
142
+
default: "F16"asconst,
143
+
description: "Experimental. The type of the key for the context KV cache tensors. Use `currentQuant` to use the same type as the current quantization of the model weights tensors"
description: "The type of the value for the context KV cache tensors"
152
+
default: "F16"asconst,
153
+
description: "Experimental. The type of the value for the context KV cache tensors. Use `currentQuant` to use the same type as the current quantization of the model weights tensors"
152
154
})
153
155
.option("swaFullCache",{
154
156
alias: "noSwa",
@@ -427,8 +429,8 @@ async function RunCompletion({
description: "The type of the key for the context KV cache tensors"
152
+
default: "F16"asconst,
153
+
description: "Experimental. The type of the key for the context KV cache tensors. Use `currentQuant` to use the same type as the current quantization of the model weights tensors"
description: "The type of the value for the context KV cache tensors"
162
+
default: "F16"asconst,
163
+
description: "Experimental. The type of the value for the context KV cache tensors. Use `currentQuant` to use the same type as the current quantization of the model weights tensors"
description: "The type of the key for the context KV cache tensors"
130
+
default: "F16"asconst,
131
+
description: "Experimental. The type of the key for the context KV cache tensors. Use `currentQuant` to use the same type as the current quantization of the model weights tensors"
description: "The type of the value for the context KV cache tensors"
140
+
default: "F16"asconst,
141
+
description: "Experimental. The type of the value for the context KV cache tensors. Use `currentQuant` to use the same type as the current quantization of the model weights tensors"
description: "The type of the key for the context KV cache tensors"
122
+
default: "F16"asconst,
123
+
description: "Experimental. The type of the key for the context KV cache tensors. Use `currentQuant` to use the same type as the current quantization of the model weights tensors"
description: "The type of the value for the context KV cache tensors"
132
+
default: "F16"asconst,
133
+
description: "Experimental. The type of the value for the context KV cache tensors. Use `currentQuant` to use the same type as the current quantization of the model weights tensors"
132
134
})
133
135
.option("swaFullCache",{
134
136
alias: "noSwa",
@@ -833,8 +835,8 @@ async function runTestWorkerLogic() {
0 commit comments