Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Semantic caching #2117

Merged
merged 9 commits into from
Jan 13, 2025
44 changes: 44 additions & 0 deletions docs/release-notes/breaking-changes.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,50 @@
> [!NOTE]
> This section is for changes that are not yet released but will affect future releases.

## Starting with 0.9.1-rc117

### Agent configuration changes

```json
"text_rewrite_settings": {
"user_prompt_rewrite_enabled" : true,
"user_prompt_rewrite_settings": {
"user_prompt_rewrite_ai_model_object_id": "/instances/73fad442-f614-4510-811f-414cb3a3d34b/providers/FoundationaLLM.AIModel/aiModels/GPT4oCompletionAIModel",
"user_prompt_rewrite_prompt_object_id": "/instances/73fad442-f614-4510-811f-414cb3a3d34b/providers/FoundationaLLM.Prompt/prompts/FoundationaLLM-v2-Rewrite",
"user_prompts_window_size": 1
}
},
"cache_settings": {
"semantic_cache_enabled": true,
"semantic_cache_settings": {
"embedding_ai_model_object_id": "/instances/73fad442-f614-4510-811f-414cb3a3d34b/providers/FoundationaLLM.AIModel/aiModels/DefaultEmbeddingAIModel",
"embedding_dimensions": 2048,
"minimum_similarity_threshold": 0.975
}
},
```

### Semantic cache

Enable vector search in the Cosmos DB database using the following CLI command:

```cli
az cosmosdb update --resource-group <resource-group-name> --name <account-name> --capabilities EnableNoSQLVectorSearch
```

Create the `CompletionsCache` container in the Cosmos DB database with the following properties:

- **Container id**: `CompletionsCache`
- **Partition key**: `/operationId`
- **Container Vector Policy**: a policy with the following properties:
- **Path**: `/userPromptEmbedding`
- **Data type**: `float32`
- **Distance function**: `Cosine`
- **Dimensions**: 2048
- **Index type**: `diskANN` (leave the default values)

After the container is created, set the `Time to Live` property on the container to 300 seconds.

## Starting with 0.9.1-rc105

### Configuration changes
Expand Down
2 changes: 1 addition & 1 deletion src/dotnet/Common/Common.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
<PackageReference Include="Azure.Storage.Files.DataLake" Version="12.17.1" />
<PackageReference Include="FluentValidation" Version="11.9.0" />
<PackageReference Include="Microsoft.AspNetCore.Authentication.JwtBearer" Version="8.0.4" />
<PackageReference Include="Microsoft.Azure.Cosmos" Version="3.39.0" />
<PackageReference Include="Microsoft.Azure.Cosmos" Version="3.46.1" />
<PackageReference Include="Microsoft.DeepDev.TokenizerLib" Version="1.3.3" />
<PackageReference Include="Microsoft.Extensions.Azure" Version="1.7.2" />
<PackageReference Include="Microsoft.Graph" Version="5.48.0" />
Expand Down
5 changes: 5 additions & 0 deletions src/dotnet/Common/Constants/AzureCosmosDBContainers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -44,5 +44,10 @@ public static class AzureCosmosDBContainers
/// Stores information about external resources (e.g., Azure OpenAI assistants threads and files).
/// </summary>
public const string ExternalResources = "ExternalResources";

/// <summary>
/// The vector store for cached completions (used by the semantic cache service).
/// </summary>
public const string CompletionsCache = "CompletionsCache";
}
}
28 changes: 0 additions & 28 deletions src/dotnet/Common/Constants/Chat/MessageContentTypes.cs

This file was deleted.

32 changes: 32 additions & 0 deletions src/dotnet/Common/Exceptions/SemanticCacheException.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
namespace FoundationaLLM.Common.Exceptions
{
/// <summary>
/// Represents an error generated by the semantic cache.
/// </summary>
public class SemanticCacheException : Exception
{
/// <summary>
/// Initializes a new instance of the <see cref="SemanticCacheException"/> class with a default message.
/// </summary>
public SemanticCacheException()
{
}

/// <summary>
/// Initializes a new instance of the <see cref="SemanticCacheException"/> class with its message set to <paramref name="message"/>.
/// </summary>
/// <param name="message">A string that describes the error.</param>
public SemanticCacheException(string? message) : base(message)
{
}

/// <summary>
/// Initializes a new instance of the <see cref="SemanticCacheException"/> class with its message set to <paramref name="message"/>.
/// </summary>
/// <param name="message">A string that describes the error.</param>
/// <param name="innerException">The exception that is the cause of the current exception.</param>
public SemanticCacheException(string? message, Exception? innerException) : base(message, innerException)
{
}
}
}
32 changes: 32 additions & 0 deletions src/dotnet/Common/Exceptions/UserPromptRewriteException.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
namespace FoundationaLLM.Common.Exceptions
{
/// <summary>
/// Represents an error generated by user prompt rewrite service.
/// </summary>
public class UserPromptRewriteException : Exception
{
/// <summary>
/// Initializes a new instance of the <see cref="UserPromptRewriteException"/> class with a default message.
/// </summary>
public UserPromptRewriteException()
{
}

/// <summary>
/// Initializes a new instance of the <see cref="UserPromptRewriteException"/> class with its message set to <paramref name="message"/>.
/// </summary>
/// <param name="message">A string that describes the error.</param>
public UserPromptRewriteException(string? message) : base(message)
{
}

/// <summary>
/// Initializes a new instance of the <see cref="UserPromptRewriteException"/> class with its message set to <paramref name="message"/>.
/// </summary>
/// <param name="message">A string that describes the error.</param>
/// <param name="innerException">The exception that is the cause of the current exception.</param>
public UserPromptRewriteException(string? message, Exception? innerException) : base(message, innerException)
{
}
}
}
29 changes: 29 additions & 0 deletions src/dotnet/Common/Interfaces/IAzureCosmosDBService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using FoundationaLLM.Common.Models.Configuration.Users;
using FoundationaLLM.Common.Models.Conversation;
using FoundationaLLM.Common.Models.Orchestration;
using FoundationaLLM.Common.Models.Orchestration.Response;
using FoundationaLLM.Common.Models.ResourceProviders;
using FoundationaLLM.Common.Models.ResourceProviders.Attachment;

Expand Down Expand Up @@ -258,4 +259,32 @@ Task<Dictionary<string, object>> PatchMultipleSessionsItemsInTransactionAsync(
/// <param name="cancellationToken">Cancellation token for async calls.</param>
/// <returns></returns>
Task DeleteAttachment(AttachmentReference attachment, CancellationToken cancellationToken = default);

/// <summary>
/// Creates a new container for vector search.
/// </summary>
/// <param name="containerName">The name of the container to create.</param>
/// <param name="partitionKeyPath">The property path that contains the partition key.</param>
/// <param name="vectorProperyPath">The property path that contains the vectors.</param>
/// <param name="vectorDimensions">The length of each vector (the number of dimensions used for embedding).</param>
/// <param name="cancellationToken">The cancellation token to signal the need to cancel the operation.</param>
/// <returns></returns>
Task CreateVectorSearchContainerAsync(
string containerName,
string partitionKeyPath,
string vectorProperyPath,
int vectorDimensions,
CancellationToken cancellationToken = default);

/// <summary>
/// Gets the completion response for a given user prompt embedding using vector search and a minimum threshold for similarity.
/// </summary>
/// <param name="containerName">The name of the container holding the vector index.</param>
/// <param name="userPromptEmbedding">The reference embedding used for the vector search.</param>
/// <param name="minimumSimilarityScore">The threshold used for the similarity score.</param>
/// <returns>A <see cref="CompletionResponse"/> that matches the search criteria. If no item in the vector index matches the criteria, returns <see langref="null"/>.</returns>
Task<CompletionResponse?> GetCompletionResponseAsync(
string containerName,
ReadOnlyMemory<float> userPromptEmbedding,
decimal minimumSimilarityScore);
}
5 changes: 5 additions & 0 deletions src/dotnet/Common/Models/Conversation/Message.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
/// <summary>
/// The unique identifier.
/// </summary>
public string Id { get; set; }

Check warning on line 15 in src/dotnet/Common/Models/Conversation/Message.cs

View workflow job for this annotation

GitHub Actions / build (OrchestrationAPI)

Non-nullable property 'Id' must contain a non-null value when exiting constructor. Consider adding the 'required' modifier or declaring the property as nullable.

/// <summary>
/// The type of the message.
Expand Down Expand Up @@ -58,6 +58,11 @@
/// </summary>
public string Text { get; set; }

/// <summary>
/// The optional rewrite of the text content of the message.
/// </summary>
public string? TextRewrite { get; set; }

/// <summary>
/// The rating associated with the message, if any.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,31 +9,37 @@ namespace FoundationaLLM.Common.Models.Orchestration.Request
public class CompletionRequestBase
{
/// <summary>
/// The Operation ID identifying the completion request.
/// Gets or sets the operation identifier of the completion request.
/// </summary>
[JsonPropertyName("operation_id")]
public string? OperationId { get; set; }

/// <summary>
/// Indicates whether this is a long-running operation.
/// Gets or sets a flag that indicates whether this is a long-running operation.
/// </summary>
[JsonPropertyName("long_running_operation")]
public bool LongRunningOperation { get; set; }

/// <summary>
/// The session ID.
/// Gets or sets the conversation identifier.
/// </summary>
[JsonPropertyName("session_id")]
public string? SessionId { get; set; }

/// <summary>
/// Represent the input or user prompt.
/// Gets or sets the user prompt.
/// </summary>
[JsonPropertyName("user_prompt")]
public required string UserPrompt { get; set; }

/// <summary>
/// The message history associated with the completion request.
/// Gets or sets the rewrite of the user prompt.
/// </summary>
[JsonPropertyName("user_prompt_rewrite")]
public string? UserPromptRewrite { get; set; }

/// <summary>
/// Gets or sets the message history associated with the completion request.
/// </summary>
[JsonPropertyName("message_history")]
public List<MessageHistoryItem>? MessageHistory { get; set; } = [];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
/// The Operation ID identifying the completion request.
/// </summary>
[JsonPropertyName("operation_id")]
public string OperationId { get; set; }

Check warning on line 14 in src/dotnet/Common/Models/Orchestration/Response/CompletionResponseBase.cs

View workflow job for this annotation

GitHub Actions / build (CoreWorker)

Non-nullable property 'OperationId' must contain a non-null value when exiting constructor. Consider adding the 'required' modifier or declaring the property as nullable.

Check warning on line 14 in src/dotnet/Common/Models/Orchestration/Response/CompletionResponseBase.cs

View workflow job for this annotation

GitHub Actions / build (SemanticKernelAPI)

Non-nullable property 'OperationId' must contain a non-null value when exiting constructor. Consider adding the 'required' modifier or declaring the property as nullable.

Check warning on line 14 in src/dotnet/Common/Models/Orchestration/Response/CompletionResponseBase.cs

View workflow job for this annotation

GitHub Actions / build (OrchestrationAPI)

Non-nullable property 'OperationId' must contain a non-null value when exiting constructor. Consider adding the 'required' modifier or declaring the property as nullable.

/// <summary>
/// The completion response from the language model.
Expand Down Expand Up @@ -41,8 +41,14 @@
/// The user prompt the language model responded to.
/// </summary>
[JsonPropertyName("user_prompt")]
public string UserPrompt { get; set; }

Check warning on line 44 in src/dotnet/Common/Models/Orchestration/Response/CompletionResponseBase.cs

View workflow job for this annotation

GitHub Actions / build (CoreWorker)

Non-nullable property 'UserPrompt' must contain a non-null value when exiting constructor. Consider adding the 'required' modifier or declaring the property as nullable.

Check warning on line 44 in src/dotnet/Common/Models/Orchestration/Response/CompletionResponseBase.cs

View workflow job for this annotation

GitHub Actions / build (SemanticKernelAPI)

Non-nullable property 'UserPrompt' must contain a non-null value when exiting constructor. Consider adding the 'required' modifier or declaring the property as nullable.

Check warning on line 44 in src/dotnet/Common/Models/Orchestration/Response/CompletionResponseBase.cs

View workflow job for this annotation

GitHub Actions / build (OrchestrationAPI)

Non-nullable property 'UserPrompt' must contain a non-null value when exiting constructor. Consider adding the 'required' modifier or declaring the property as nullable.

/// <summary>
/// The user prompt rewrite.
/// </summary>
[JsonPropertyName("user_prompt_rewrite")]
public string? UserPromptRewrite { get; set; }

/// <summary>
/// The full prompt composed by the LLM.
/// </summary>
Expand Down
56 changes: 16 additions & 40 deletions src/dotnet/Common/Models/ResourceProviders/Agent/AgentBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,30 @@ public class AgentBase : ResourceBase
/// </summary>
[JsonPropertyName("sessions_enabled")]
public bool SessionsEnabled { get; set; }

/// <summary>
/// Gets or sets the agent's text rewrite settings.
/// </summary>
[JsonPropertyName("text_rewrite_settings")]
public AgentTextRewriteSettings? TextRewriteSettings { get; set; }

/// <summary>
/// Gets or sets the agent's caching settings.
/// </summary>
[JsonPropertyName("cache_settings")]
public AgentCacheSettings? CacheSettings { get; set; }

/// <summary>
/// The agent's conversation history configuration.
/// </summary>
[JsonPropertyName("conversation_history_settings")]
public ConversationHistorySettings? ConversationHistorySettings { get; set; }
public AgentConversationHistorySettings? ConversationHistorySettings { get; set; }

/// <summary>
/// The agent's Gatekeeper configuration.
/// </summary>
[JsonPropertyName("gatekeeper_settings")]
public GatekeeperSettings? GatekeeperSettings { get; set; }

public AgentGatekeeperSettings? GatekeeperSettings { get; set; }

/// <summary>
/// Settings for the orchestration service.
Expand Down Expand Up @@ -103,41 +116,4 @@ public class AgentBase : ResourceBase
public bool HasCapability(string capabilityName) =>
Capabilities?.Contains(capabilityName) ?? false;
}

/// <summary>
/// Agent conversation history settings.
/// </summary>
public class ConversationHistorySettings
{
/// <summary>
/// Indicates whether the conversation history is enabled.
/// </summary>
[JsonPropertyName("enabled")]
public bool Enabled { get; set; }

/// <summary>
/// The maximum number of turns to store in the conversation history.
/// </summary>
[JsonPropertyName("max_history")]
public int MaxHistory { get; set; }
}

/// <summary>
/// Agent Gatekeeper settings.
/// </summary>
public class GatekeeperSettings
{
/// <summary>
/// Indicates whether to abide by or override the system settings for the Gatekeeper.
/// </summary>
[JsonPropertyName("use_system_setting")]
public bool UseSystemSetting { get; set; }

/// <summary>
/// If <see cref="UseSystemSetting"/> is false, provides Gatekeeper feature selection.
/// </summary>
[JsonPropertyName("options")]
public string[]? Options { get; set; }
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
using System.Text.Json.Serialization;

namespace FoundationaLLM.Common.Models.ResourceProviders.Agent
{
/// <summary>
/// Provides agent-related caching settings.
/// </summary>
public class AgentCacheSettings
{
/// <summary>
/// Gets or sets a value indicating whether the agent's semantic cache is enabled.
/// </summary>
/// <remarks>
/// When enabled, the agent's semantic cache settings are provided in <see cref="SemanticCacheSettings"/>.
/// </remarks>
[JsonPropertyName("semantic_cache_enabled")]
public bool SemanticCacheEnabled { get; set; } = false;

/// <summary>
/// Gets or sets the agent's semantic cache settings.
/// </summary>
/// <remarks>
/// The values in this property are only valid when <see cref="SemanticCacheEnabled"/> is <see langword="true"/>.
/// </remarks>
[JsonPropertyName("semantic_cache_settings")]
public AgentSemanticCacheSettings? SemanticCacheSettings { get; set; }
}
}
Loading
Loading