Skip to content

Commit 48f109a

Browse files
committed
bug fix: remove flash attention parameter from the model params
1 parent 20bcf74 commit 48f109a

8 files changed

Lines changed: 7 additions & 20 deletions

File tree

LLama.KernelMemory/BuilderExtensions.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ public static IKernelMemoryBuilder WithLLamaSharpDefaults(this IKernelMemoryBuil
7777
SplitMode = config.SplitMode,
7878
BatchSize = 512,
7979
UBatchSize = 512,
80-
FlashAttention = true,
8180
UseMemorymap = true
8281
};
8382

LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config)
4040
SplitMode = config?.SplitMode ?? LLama.Native.GPUSplitMode.Layer,
4141
BatchSize = 512,
4242
UBatchSize = 512,
43-
FlashAttention = true,
4443
UseMemorymap = true,
4544
PoolingType = LLamaPoolingType.Mean,
4645
};
@@ -68,7 +67,6 @@ public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config, LLamaWeights we
6867
SplitMode = config?.SplitMode ?? LLama.Native.GPUSplitMode.Layer,
6968
BatchSize = 512,
7069
UBatchSize = 512,
71-
FlashAttention = true,
7270
UseMemorymap = true,
7371
PoolingType = LLamaPoolingType.Mean,
7472
};

LLama.KernelMemory/LlamaSharpTextGenerator.cs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ public LlamaSharpTextGenerator(LLamaSharpConfig config)
3838
SplitMode = config?.SplitMode ?? LLama.Native.GPUSplitMode.Layer,
3939
BatchSize = 512,
4040
UBatchSize = 512,
41-
FlashAttention = true,
4241
UseMemorymap = true
4342
};
4443
_weights = LLamaWeights.LoadFromFile(@params);
@@ -66,7 +65,6 @@ public LlamaSharpTextGenerator(LLamaWeights weights, LLamaSharpConfig config, St
6665
SplitMode = config?.SplitMode ?? LLama.Native.GPUSplitMode.Layer,
6766
BatchSize = 512,
6867
UBatchSize = 512,
69-
FlashAttention = true,
7068
UseMemorymap = true
7169
};
7270
_executor = executor ?? new StatelessExecutor(_weights, @params);

LLama.Unittest/SamplingTests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ public void BatchedSampling()
104104
}
105105
}
106106

107-
// Add " repeat" and test whether next tokens will be "this phrase forever.".
107+
// Add " repeat" and test whether next tokens will be "this phrase forever."
108108
for (int i = 0; i < 4; i++)
109109
{
110110
for (int b = 0; b < batch_count; b++)

LLama/Abstractions/IContextParams.cs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -103,11 +103,6 @@ public interface IContextParams
103103
/// </summary>
104104
bool NoKqvOffload { get; }
105105

106-
/// <summary>
107-
/// Whether to use flash attention
108-
/// </summary>
109-
bool FlashAttention { get; }
110-
111106
/// <summary>
112107
/// defragment the KV cache if holes/size &gt; defrag_threshold, Set to &lt;= 0 to disable (default)
113108
/// </summary>

LLama/Common/ModelParams.cs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
using System;
12
using LLama.Abstractions;
23
using System.Text;
34
using System.Text.Json.Serialization;
@@ -97,10 +98,7 @@ public record ModelParams
9798
public bool NoKqvOffload { get; set; }
9899

99100
/// <inheritdoc />
100-
101-
public bool FlashAttention { get; set; }
102-
103-
/// <inheritdoc />
101+
[Obsolete]
104102
public float? DefragThreshold { get; set; }
105103

106104
/// <inheritdoc />

LLama/Extensions/IContextParamsExtensions.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ public static void ToLlamaContextParams(this IContextParams @params, out LLamaCo
4949
result.type_k = @params.TypeK ?? GGMLType.GGML_TYPE_F16;
5050
result.type_v = @params.TypeV ?? GGMLType.GGML_TYPE_F16;
5151
result.offload_kqv = !@params.NoKqvOffload;
52-
result.flash_attention = @params.FlashAttention;
5352
result.llama_pooling_type = @params.PoolingType;
5453
result.attention_type = @params.AttentionType;
5554

LLama/Native/SafeLLamaContextHandle.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -348,15 +348,15 @@ static SafeLLamaContextHandle()
348348
/// <param name="key"></param>
349349
/// <param name="buf"></param>
350350
/// <param name="buf_size"></param>
351-
/// <returns></returns>
351+
/// <returns>The length of the value string (on success) -1 otherwise </returns>
352352
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
353353
private static extern int llama_adapter_meta_val_str(IntPtr adapter, string key, StringBuilder buf, UIntPtr buf_size);
354354

355355
/// <summary>
356356
/// Get the number of metadata key value pairs
357357
/// </summary>
358358
/// <param name="adapter"></param>
359-
/// <returns></returns>
359+
/// <returns>The count of meta key value pairs</returns>
360360
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
361361
private static extern int llama_adapter_meta_count(IntPtr adapter);
362362

@@ -367,7 +367,7 @@ static SafeLLamaContextHandle()
367367
/// <param name="i"></param>
368368
/// <param name="buf"></param>
369369
/// <param name="buf_size"></param>
370-
/// <returns></returns>
370+
/// <returns>The length of string i.e meta key (on success) -1 otherwise</returns>
371371
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
372372
private static extern int llama_adapter_meta_key_by_index(IntPtr adapter, int i, StringBuilder buf, UIntPtr buf_size);
373373

@@ -378,7 +378,7 @@ static SafeLLamaContextHandle()
378378
/// <param name="i"></param>
379379
/// <param name="buf"></param>
380380
/// <param name="buf_size"></param>
381-
/// <returns></returns>
381+
/// <returns>The length of value string (on success) -1 otherwise</returns>
382382
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
383383
private static extern int llama_adapter_meta_val_by_index(IntPtr adapter, int i, StringBuilder buf, UIntPtr buf_size);
384384

0 commit comments

Comments
 (0)