Skip to content

Commit 2119cd7

Browse files
authored
Support Request Context in Ollama and Anthropic generators (#844)
Allow to override the model name during a request, using request context arguments. The feature is available only for Anthropic and Ollama. Updated example 212 showing how to override Ollama settings at runtime. The feature is not supported when using OpenAI/Azure OpenAI, because the underlying connectors hard code the model name in client instances and would require a considerable amount of refactoring, plus memory overhead, to support this feature. See microsoft/semantic-kernel#9337
1 parent 265c4a2 commit 2119cd7

15 files changed

+161
-8
lines changed

examples/210-KM-without-builder/Program.cs

+3-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
using Microsoft.KernelMemory.AI.AzureOpenAI;
77
using Microsoft.KernelMemory.AI.OpenAI;
88
using Microsoft.KernelMemory.Configuration;
9+
using Microsoft.KernelMemory.Context;
910
using Microsoft.KernelMemory.DataFormats;
1011
using Microsoft.KernelMemory.DataFormats.AzureAIDocIntel;
1112
using Microsoft.KernelMemory.DataFormats.Image;
@@ -73,6 +74,7 @@ public static async Task Main()
7374
LoggerFactory? loggerFactory = null; // Alternative: app.Services.GetService<ILoggerFactory>();
7475

7576
// Generic dependencies
77+
var requestContextProvider = new RequestContextProvider();
7678
var mimeTypeDetection = new MimeTypesDetection();
7779
var promptProvider = new EmbeddedPromptProvider();
7880

@@ -121,7 +123,7 @@ public static async Task Main()
121123

122124
// Create memory instance
123125
var searchClient = new SearchClient(memoryDb, textGenerator, searchClientConfig, promptProvider, contentModeration, loggerFactory);
124-
var memory = new MemoryServerless(orchestrator, searchClient, kernelMemoryConfig);
126+
var memory = new MemoryServerless(orchestrator, searchClient, requestContextProvider, kernelMemoryConfig);
125127

126128
// End-to-end test
127129
await memory.ImportTextAsync("I'm waiting for Godot", documentId: "tg01");

examples/212-dotnet-ollama/Program.cs

+28
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
using Microsoft.KernelMemory;
44
using Microsoft.KernelMemory.AI.Ollama;
55
using Microsoft.KernelMemory.AI.OpenAI;
6+
using Microsoft.KernelMemory.Context;
67
using Microsoft.KernelMemory.Diagnostics;
78

89
/* This example shows how to use KM with Ollama
@@ -49,19 +50,46 @@ public static async Task Main()
4950

5051
// Generate an answer - This uses OpenAI for embeddings and finding relevant data, and LM Studio to generate an answer
5152
var answer = await memory.AskAsync("What's the current date (don't check for validity)?");
53+
Console.WriteLine("-------------------");
5254
Console.WriteLine(answer.Question);
5355
Console.WriteLine(answer.Result);
56+
Console.WriteLine("-------------------");
5457

5558
/*
5659
5760
-- Output using phi3:medium-128k:
5861
5962
What's the current date (don't check for validity)?
63+
6064
The given fact states that "Today is October 32nd, 2476." However, it appears to be an incorrect statement as
6165
there are never more than 31 days in any month. If we consider this date without checking its validity and accept
6266
the stated day of October as being 32, then the current date would be "October 32nd, 2476." However, it is important
6367
to note that this date does not align with our calendar system.
6468
6569
*/
70+
71+
// How to override config with Request Context
72+
var context = new RequestContext();
73+
context.SetArg("custom_text_generation_model_name", "llama2:70b");
74+
// context.SetArg("custom_embedding_generation_model_name", "...");
75+
76+
answer = await memory.AskAsync("What's the current date (don't check for validity)?", context: context);
77+
Console.WriteLine("-------------------");
78+
Console.WriteLine(answer.Question);
79+
Console.WriteLine(answer.Result);
80+
Console.WriteLine("-------------------");
81+
82+
/*
83+
84+
-- Output using llama2:70b:
85+
86+
What's the current date (don't check for validity)?
87+
88+
The provided facts state that "Today is October 32nd, 2476." However, considering the Gregorian calendar system
89+
commonly used today, this information appears to be incorrect as there are no such dates. This could
90+
potentially refer to a different calendar or timekeeping system in use in your fictional world, but based on our
91+
current understanding of calendars and dates, an "October 32nd" does not exist. Therefore, the answer is
92+
'INFO NOT FOUND'.
93+
*/
6694
}
6795
}

extensions/Anthropic/AnthropicTextGeneration.cs

+9-2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
using Microsoft.Extensions.Logging;
99
using Microsoft.KernelMemory.AI.Anthropic.Client;
1010
using Microsoft.KernelMemory.AI.OpenAI;
11+
using Microsoft.KernelMemory.Context;
1112
using Microsoft.KernelMemory.Diagnostics;
1213

1314
namespace Microsoft.KernelMemory.AI.Anthropic;
@@ -23,6 +24,7 @@ public sealed class AnthropicTextGeneration : ITextGenerator, IDisposable
2324

2425
private readonly RawAnthropicClient _client;
2526
private readonly ITextTokenizer _textTokenizer;
27+
private readonly IContextProvider _contextProvider;
2628
private readonly HttpClient _httpClient;
2729
private readonly ILogger<AnthropicTextGeneration> _log;
2830
private readonly string _modelName;
@@ -34,11 +36,13 @@ public sealed class AnthropicTextGeneration : ITextGenerator, IDisposable
3436
/// <param name="config">Client configuration, including credentials and model details</param>
3537
/// <param name="textTokenizer">Tokenizer used to count tokens</param>
3638
/// <param name="httpClientFactory">Optional factory used to inject a pre-configured HTTP client for requests to Anthropic API</param>
39+
/// <param name="contextProvider">Request context provider with runtime configuration overrides</param>
3740
/// <param name="loggerFactory">Optional factory used to inject configured loggers</param>
3841
public AnthropicTextGeneration(
3942
AnthropicConfig config,
4043
ITextTokenizer? textTokenizer = null,
4144
IHttpClientFactory? httpClientFactory = null,
45+
IContextProvider? contextProvider = null,
4246
ILoggerFactory? loggerFactory = null)
4347
{
4448
this._modelName = config.TextModelName;
@@ -48,6 +52,7 @@ public AnthropicTextGeneration(
4852
this.MaxTokenTotal = config.MaxTokenOut;
4953

5054
this._log = (loggerFactory ?? DefaultLogger.Factory).CreateLogger<AnthropicTextGeneration>();
55+
this._contextProvider = contextProvider ?? new RequestContextProvider();
5156

5257
if (httpClientFactory == null)
5358
{
@@ -96,9 +101,11 @@ public async IAsyncEnumerable<string> GenerateTextAsync(
96101
TextGenerationOptions options,
97102
[EnumeratorCancellation] CancellationToken cancellationToken = default)
98103
{
99-
this._log.LogTrace("Sending text generation request, model '{0}'", this._modelName);
104+
string modelName = this._contextProvider.GetContext().GetCustomTextGenerationModelNameOrDefault(this._modelName);
100105

101-
CallClaudeStreamingParams parameters = new(this._modelName, prompt)
106+
this._log.LogTrace("Sending text generation request, model '{0}'", modelName);
107+
108+
CallClaudeStreamingParams parameters = new(modelName, prompt)
102109
{
103110
System = this._defaultSystemPrompt,
104111
Temperature = options.Temperature,

extensions/AzureOpenAI/AzureOpenAITextEmbeddingGenerator.cs

+6
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@
1717

1818
namespace Microsoft.KernelMemory.AI.AzureOpenAI;
1919

20+
/// <summary>
21+
/// Azure OpenAI connector
22+
///
23+
/// Note: does not support model name override via request context
24+
/// see https://github.com/microsoft/semantic-kernel/issues/9337
25+
/// </summary>
2026
[Experimental("KMEXP01")]
2127
public sealed class AzureOpenAITextEmbeddingGenerator : ITextEmbeddingGenerator, ITextEmbeddingBatchGenerator
2228
{

extensions/AzureOpenAI/AzureOpenAITextGenerator.cs

+6
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,12 @@
1515

1616
namespace Microsoft.KernelMemory.AI.AzureOpenAI;
1717

18+
/// <summary>
19+
/// Azure OpenAI connector
20+
///
21+
/// Note: does not support model name override via request context
22+
/// see https://github.com/microsoft/semantic-kernel/issues/9337
23+
/// </summary>
1824
[Experimental("KMEXP01")]
1925
public sealed class AzureOpenAITextGenerator : ITextGenerator
2026
{

extensions/ONNX/Onnx/OnnxTextGenerator.cs

+2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ namespace Microsoft.KernelMemory.AI.Onnx;
1919
/// <summary>
2020
/// Text generator based on ONNX models, via OnnxRuntimeGenAi
2121
/// See https://github.com/microsoft/onnxruntime-genai
22+
///
23+
/// Note: does not support model name override via request context
2224
/// </summary>
2325
[Experimental("KMEXP01")]
2426
public sealed class OnnxTextGenerator : ITextGenerator, IDisposable

extensions/Ollama/Ollama/DependencyInjection.cs

+5
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
using Microsoft.Extensions.Logging;
66
using Microsoft.KernelMemory.AI;
77
using Microsoft.KernelMemory.AI.Ollama;
8+
using Microsoft.KernelMemory.Context;
89
using OllamaSharp;
910

1011
#pragma warning disable IDE0130 // reduce number of "using" statements
@@ -72,6 +73,7 @@ public static IServiceCollection AddOllamaTextGeneration(
7273
new OllamaApiClient(new Uri(endpoint), modelName),
7374
new OllamaModelConfig { ModelName = modelName },
7475
textTokenizer,
76+
serviceProvider.GetService<IContextProvider>(),
7577
serviceProvider.GetService<ILoggerFactory>()));
7678
}
7779

@@ -86,6 +88,7 @@ public static IServiceCollection AddOllamaTextGeneration(
8688
new OllamaApiClient(new Uri(config.Endpoint), config.TextModel.ModelName),
8789
config.TextModel,
8890
textTokenizer,
91+
serviceProvider.GetService<IContextProvider>(),
8992
serviceProvider.GetService<ILoggerFactory>()));
9093
}
9194

@@ -101,6 +104,7 @@ public static IServiceCollection AddOllamaTextEmbeddingGeneration(
101104
new OllamaApiClient(new Uri(endpoint), modelName),
102105
new OllamaModelConfig { ModelName = modelName },
103106
textTokenizer,
107+
serviceProvider.GetService<IContextProvider>(),
104108
serviceProvider.GetService<ILoggerFactory>()));
105109
}
106110

@@ -115,6 +119,7 @@ public static IServiceCollection AddOllamaTextEmbeddingGeneration(
115119
new OllamaApiClient(new Uri(config.Endpoint), config.EmbeddingModel.ModelName),
116120
config.EmbeddingModel,
117121
textTokenizer,
122+
serviceProvider.GetService<IContextProvider>(),
118123
serviceProvider.GetService<ILoggerFactory>()));
119124
}
120125
}

extensions/Ollama/Ollama/OllamaTextEmbeddingGenerator.cs

+13-3
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
using System.Threading.Tasks;
99
using Microsoft.Extensions.Logging;
1010
using Microsoft.KernelMemory.AI.OpenAI;
11+
using Microsoft.KernelMemory.Context;
1112
using Microsoft.KernelMemory.Diagnostics;
1213
using OllamaSharp;
1314
using OllamaSharp.Models;
@@ -20,8 +21,9 @@ public class OllamaTextEmbeddingGenerator : ITextEmbeddingGenerator, ITextEmbedd
2021

2122
private readonly IOllamaApiClient _client;
2223
private readonly OllamaModelConfig _modelConfig;
23-
private readonly ILogger<OllamaTextEmbeddingGenerator> _log;
2424
private readonly ITextTokenizer _textTokenizer;
25+
private readonly IContextProvider _contextProvider;
26+
private readonly ILogger<OllamaTextEmbeddingGenerator> _log;
2527

2628
public int MaxTokens { get; }
2729

@@ -31,6 +33,7 @@ public OllamaTextEmbeddingGenerator(
3133
IOllamaApiClient ollamaClient,
3234
OllamaModelConfig modelConfig,
3335
ITextTokenizer? textTokenizer = null,
36+
IContextProvider? contextProvider = null,
3437
ILoggerFactory? loggerFactory = null)
3538
{
3639
this._client = ollamaClient;
@@ -47,18 +50,21 @@ public OllamaTextEmbeddingGenerator(
4750
}
4851

4952
this._textTokenizer = textTokenizer;
53+
this._contextProvider = contextProvider ?? new RequestContextProvider();
5054

5155
this.MaxTokens = modelConfig.MaxTokenTotal ?? MaxTokensIfUndefined;
5256
}
5357

5458
public OllamaTextEmbeddingGenerator(
5559
OllamaConfig config,
5660
ITextTokenizer? textTokenizer = null,
61+
IContextProvider? contextProvider = null,
5762
ILoggerFactory? loggerFactory = null)
5863
: this(
5964
new OllamaApiClient(new Uri(config.Endpoint), config.EmbeddingModel.ModelName),
6065
config.EmbeddingModel,
6166
textTokenizer,
67+
contextProvider,
6268
loggerFactory)
6369
{
6470
}
@@ -67,11 +73,13 @@ public OllamaTextEmbeddingGenerator(
6773
HttpClient httpClient,
6874
OllamaConfig config,
6975
ITextTokenizer? textTokenizer = null,
76+
IContextProvider? contextProvider = null,
7077
ILoggerFactory? loggerFactory = null)
7178
: this(
7279
new OllamaApiClient(httpClient, config.EmbeddingModel.ModelName),
7380
config.EmbeddingModel,
7481
textTokenizer,
82+
contextProvider,
7583
loggerFactory)
7684
{
7785
}
@@ -104,11 +112,13 @@ public async Task<Embedding[]> GenerateEmbeddingBatchAsync(
104112
CancellationToken cancellationToken = default)
105113
{
106114
var list = textList.ToList();
107-
this._log.LogTrace("Generating embeddings batch, size {0} texts", list.Count);
115+
116+
string modelName = this._contextProvider.GetContext().GetCustomEmbeddingGenerationModelNameOrDefault(this._client.SelectedModel);
117+
this._log.LogTrace("Generating embeddings batch, size {0} texts, with model {1}", list.Count, modelName);
108118

109119
var request = new EmbedRequest
110120
{
111-
Model = this._client.SelectedModel,
121+
Model = modelName,
112122
Input = list,
113123
Options = new RequestOptions
114124
{

extensions/Ollama/Ollama/OllamaTextGenerator.cs

+13-2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
using System.Threading;
88
using Microsoft.Extensions.Logging;
99
using Microsoft.KernelMemory.AI.OpenAI;
10+
using Microsoft.KernelMemory.Context;
1011
using Microsoft.KernelMemory.Diagnostics;
1112
using OllamaSharp;
1213
using OllamaSharp.Models;
@@ -19,15 +20,17 @@ public class OllamaTextGenerator : ITextGenerator
1920

2021
private readonly IOllamaApiClient _client;
2122
private readonly OllamaModelConfig _modelConfig;
22-
private readonly ILogger<OllamaTextGenerator> _log;
2323
private readonly ITextTokenizer _textTokenizer;
24+
private readonly IContextProvider _contextProvider;
25+
private readonly ILogger<OllamaTextGenerator> _log;
2426

2527
public int MaxTokenTotal { get; }
2628

2729
public OllamaTextGenerator(
2830
IOllamaApiClient ollamaClient,
2931
OllamaModelConfig modelConfig,
3032
ITextTokenizer? textTokenizer = null,
33+
IContextProvider? contextProvider = null,
3134
ILoggerFactory? loggerFactory = null)
3235
{
3336
this._client = ollamaClient;
@@ -43,18 +46,21 @@ public OllamaTextGenerator(
4346
}
4447

4548
this._textTokenizer = textTokenizer;
49+
this._contextProvider = contextProvider ?? new RequestContextProvider();
4650

4751
this.MaxTokenTotal = modelConfig.MaxTokenTotal ?? MaxTokensIfUndefined;
4852
}
4953

5054
public OllamaTextGenerator(
5155
OllamaConfig config,
5256
ITextTokenizer? textTokenizer = null,
57+
IContextProvider? contextProvider = null,
5358
ILoggerFactory? loggerFactory = null)
5459
: this(
5560
new OllamaApiClient(new Uri(config.Endpoint), config.TextModel.ModelName),
5661
config.TextModel,
5762
textTokenizer,
63+
contextProvider,
5864
loggerFactory)
5965
{
6066
}
@@ -63,11 +69,13 @@ public OllamaTextGenerator(
6369
HttpClient httpClient,
6470
OllamaConfig config,
6571
ITextTokenizer? textTokenizer = null,
72+
IContextProvider? contextProvider = null,
6673
ILoggerFactory? loggerFactory = null)
6774
: this(
6875
new OllamaApiClient(httpClient, config.TextModel.ModelName),
6976
config.TextModel,
7077
textTokenizer,
78+
contextProvider,
7179
loggerFactory)
7280
{
7381
}
@@ -87,9 +95,12 @@ public async IAsyncEnumerable<string> GenerateTextAsync(
8795
TextGenerationOptions options,
8896
[EnumeratorCancellation] CancellationToken cancellationToken = default)
8997
{
98+
string modelName = this._contextProvider.GetContext().GetCustomTextGenerationModelNameOrDefault(this._client.SelectedModel);
99+
this._log.LogTrace("Generating text with model {0}", modelName);
100+
90101
var request = new GenerateRequest
91102
{
92-
Model = this._client.SelectedModel,
103+
Model = modelName,
93104
Prompt = prompt,
94105
Stream = true,
95106
Options = new RequestOptions

extensions/OpenAI/OpenAI/OpenAITextEmbeddingGenerator.cs

+3
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ namespace Microsoft.KernelMemory.AI.OpenAI;
1818
/// <summary>
1919
/// Text embedding generator. The class can be used with any service
2020
/// supporting OpenAI HTTP schema.
21+
///
22+
/// Note: does not support model name override via request context
23+
/// see https://github.com/microsoft/semantic-kernel/issues/9337
2124
/// </summary>
2225
[Experimental("KMEXP01")]
2326
public sealed class OpenAITextEmbeddingGenerator : ITextEmbeddingGenerator, ITextEmbeddingBatchGenerator

extensions/OpenAI/OpenAI/OpenAITextGenerator.cs

+3
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ namespace Microsoft.KernelMemory.AI.OpenAI;
1717
/// <summary>
1818
/// Text generator, supporting OpenAI text and chat completion. The class can be used with any service
1919
/// supporting OpenAI HTTP schema, such as LM Studio HTTP API.
20+
///
21+
/// Note: does not support model name override via request context
22+
/// see https://github.com/microsoft/semantic-kernel/issues/9337
2023
/// </summary>
2124
[Experimental("KMEXP01")]
2225
public sealed class OpenAITextGenerator : ITextGenerator

0 commit comments

Comments
 (0)