@@ -209,7 +209,7 @@ private void AddMessagesToHistory(ChatSession session, List<Message> messages)
209209 var model = KnownModels . GetModel ( path , chat ! . Model ) ;
210210 var modelKey = model . FileName ;
211211
212- var kernelMemory = CreateMemory ( modelKey , path ) ;
212+ var kernelMemory = CreateMemory ( modelKey , path , out var generator ) ;
213213
214214 if ( textData != null )
215215 {
@@ -251,13 +251,15 @@ private void AddMessagesToHistory(ChatSession session, List<Message> messages)
251251 Role = AuthorRole . Assistant . ToString ( )
252252 }
253253 } ;
254+
255+ generator . Dispose ( ) ;
254256
255257 return chatResult ;
256258 }
257259
258260
259261 [ Experimental ( "KMEXP01" ) ]
260- private static IKernelMemory CreateMemory ( string modelName , string path )
262+ private static IKernelMemory CreateMemory ( string modelName , string path , out KernelMemFix . LlamaSharpTextGenerator generator )
261263 {
262264 InferenceParams infParams = new ( ) { AntiPrompts = [ "INFO" , "<|im_end|>" , "Question:" ] } ;
263265
@@ -280,7 +282,7 @@ private static IKernelMemory CreateMemory(string modelName, string path)
280282
281283 return new KernelMemoryBuilder ( )
282284 //.WithLLamaSharpDefaults2(lsConfig)
283- . WithLLamaSharpMaINTemp ( lsConfig , Path . Combine ( path , modelName ) )
285+ . WithLLamaSharpMaINTemp ( lsConfig , Path . Combine ( path , modelName ) , out generator )
284286 . WithSearchClientConfig ( searchClientConfig )
285287 . WithCustomImageOcr ( new OcrWrapper ( ) )
286288 . With ( parseOptions )
@@ -325,10 +327,10 @@ public Task CleanSessionCache(string id)
325327 }
326328}
327329
328- file static class KernelMemFix
330+ internal static class KernelMemFix
329331{
330332 [ Experimental ( "KMEXP00" ) ]
331- public sealed class LlamaSharpTextGenerator2 : ITextGenerator , ITextTokenizer , IDisposable
333+ public sealed class LlamaSharpTextGenerator : ITextGenerator , ITextTokenizer , IDisposable
332334 {
333335 private readonly StatelessExecutor _executor ;
334336 private readonly LLamaWeights _weights ;
@@ -339,22 +341,8 @@ public sealed class LlamaSharpTextGenerator2 : ITextGenerator, ITextTokenizer, I
339341
340342 public int MaxTokenTotal { get ; }
341343
342- public LlamaSharpTextGenerator2 ( LLamaSharpConfig config )
343- {
344- ModelParams @params = new ModelParams ( config . ModelPath )
345- {
346- ContextSize = new uint ? ( config . ContextSize . GetValueOrDefault ( 2048U ) ) ,
347- GpuLayerCount = config . GpuLayerCount . GetValueOrDefault ( 20 )
348- } ;
349- this . _weights = LLamaWeights . LoadFromFile ( ( IModelParams ) @params ) ;
350- this . _context = this . _weights . CreateContext ( ( IContextParams ) @params ) ;
351- this . _executor = new StatelessExecutor ( this . _weights , ( IContextParams ) @params ) ;
352- this . _defaultInferenceParams = config . DefaultInferenceParams ;
353- this . _ownsWeights = this . _ownsContext = true ;
354- this . MaxTokenTotal = ( int ) @params . ContextSize . Value ;
355- }
356344
357- public LlamaSharpTextGenerator2 (
345+ public LlamaSharpTextGenerator (
358346 LLamaWeights weights ,
359347 LLamaContext context ,
360348 StatelessExecutor ? executor = null ,
@@ -431,45 +419,19 @@ public IReadOnlyList<string> GetTokens(string text)
431419 }
432420
433421 [ Experimental ( "KMEXP00" ) ]
434- public static IKernelMemoryBuilder WithLLamaSharpTextGeneration2 (
422+ public static IKernelMemoryBuilder WithLLamaSharpTextGeneration (
435423 this IKernelMemoryBuilder builder ,
436- LlamaSharpTextGenerator2 textGenerator )
424+ LlamaSharpTextGenerator textGenerator )
437425 {
438426 builder . AddSingleton ( ( ITextGenerator ) textGenerator ) ;
439427 return builder ;
440428 }
441429
442- [ Experimental ( "KMEXP00" ) ]
443- public static IKernelMemoryBuilder WithLLamaSharpDefaults2 (
444- this IKernelMemoryBuilder builder ,
445- LLamaSharpConfig config ,
446- LLamaWeights ? weights = null ,
447- LLamaContext ? context = null )
448- {
449- ModelParams @params = new ModelParams ( config . ModelPath )
450- {
451- ContextSize = new uint ? ( config . ContextSize . GetValueOrDefault ( 2048U ) ) ,
452- GpuLayerCount = config . GpuLayerCount . GetValueOrDefault ( 20 ) ,
453- MainGpu = config . MainGpu ,
454- SplitMode = new GPUSplitMode ? ( config . SplitMode )
455- } ;
456- if ( weights == null || context == null )
457- {
458- weights = LLamaWeights . LoadFromFile ( ( IModelParams ) @params ) ;
459- context = weights . CreateContext ( ( IContextParams ) @params ) ;
460- }
461- StatelessExecutor executor = new StatelessExecutor ( weights , ( IContextParams ) @params ) ;
462- builder . WithLLamaSharpTextEmbeddingGeneration ( new LLamaSharpTextEmbeddingGenerator ( config , weights ) ) ;
463- builder . WithLLamaSharpTextGeneration2 ( new LlamaSharpTextGenerator2 ( weights , context , executor ,
464- config . DefaultInferenceParams ) ) ;
465- return builder ;
466- }
467-
468430 private static readonly ConcurrentDictionary < string , LLamaWeights > ModelCache = new ( ) ;
469431
470432 [ Experimental ( "KMEXP01" ) ]
471433 public static IKernelMemoryBuilder WithLLamaSharpMaINTemp ( this IKernelMemoryBuilder builder ,
472- LLamaSharpConfig config , string modelPath )
434+ LLamaSharpConfig config , string modelPath , out LlamaSharpTextGenerator generator )
473435 {
474436 // Create ModelParams for the first model.
475437 var parameters1 = new ModelParams ( modelPath )
@@ -495,26 +457,19 @@ public static IKernelMemoryBuilder WithLLamaSharpMaINTemp(this IKernelMemoryBuil
495457 var weights = GetOrLoadModel ( parameters2 ) ;
496458
497459 var context = model . CreateContext ( parameters2 ) ;
498-
499460 StatelessExecutor executor = new StatelessExecutor ( model , parameters2 ) ;
461+
462+ generator = new LlamaSharpTextGenerator ( model , context , executor ,
463+ config . DefaultInferenceParams ) ;
464+
500465 builder . WithLLamaSharpTextEmbeddingGeneration ( new LLamaSharpTextEmbeddingGenerator ( config , weights ) ) ;
501- builder . WithLLamaSharpTextGeneration2 ( new LlamaSharpTextGenerator2 ( model , context , executor ,
502- config . DefaultInferenceParams ) ) ;
466+ builder . WithLLamaSharpTextGeneration ( generator ) ;
503467 return builder ;
504468 }
505469
506470 private static LLamaWeights GetOrLoadModel ( ModelParams modelParams )
507471 {
508- // Use a unique key based on the serialized ModelParams object.
509- string cacheKey = GenerateCacheKey ( modelParams ) ;
510-
511- // Retrieve from cache or load if not already cached.
512- return ModelCache . GetOrAdd ( cacheKey , _ => LLamaWeights . LoadFromFile ( modelParams ) ) ;
472+ return LLamaWeights . LoadFromFile ( modelParams ) ;
513473 }
514474
515- private static string GenerateCacheKey ( ModelParams modelParams )
516- {
517- // Create a unique key by combining important properties of ModelParams.
518- return $ "{ modelParams . ModelPath } :{ modelParams . ContextSize } :{ modelParams . GpuLayerCount } :{ modelParams . MainGpu } :{ modelParams . SplitMode } ";
519- }
520475}
0 commit comments