Skip to content

Commit

Permalink
Merge pull request #500 from solliancenet/cj-vectorization-indexing
Browse files Browse the repository at this point in the history
Vectorization indexing improvements
  • Loading branch information
joelhulen authored Jan 27, 2024
2 parents 2199bb6 + 5517894 commit a5fd7cd
Show file tree
Hide file tree
Showing 81 changed files with 1,420 additions and 249 deletions.
9 changes: 8 additions & 1 deletion src/FoundationaLLM.sln
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Core", "dotnet\Core\Core.csproj", "{5AA7F0B6-30E6-451A-B1BE-F003BD3EC203}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SemanticKernel", "dotnet\SemanticKernel\SemanticKernel.csproj", "{503CE23D-63D7-4A26-8475-AA71A45D519B}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SemanticKernel-obsolete", "dotnet\SemanticKernel-obsolete\SemanticKernel-obsolete.csproj", "{503CE23D-63D7-4A26-8475-AA71A45D519B}"
EndProject
Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "LangChainAPI", "python\LangChainAPI\LangChainAPI.pyproj", "{DF3AF954-1999-4244-A783-BCE96EE17816}"
EndProject
Expand Down Expand Up @@ -81,6 +81,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Management", "dotnet\Manage
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ManagementAPI", "dotnet\ManagementAPI\ManagementAPI.csproj", "{2D54392A-8D86-4F54-9993-FB3B6C4C090E}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SemanticKernel", "dotnet\SemanticKernel\SemanticKernel.csproj", "{CDB843FE-108B-435A-BF17-68052C64F500}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -194,6 +196,10 @@ Global
{2D54392A-8D86-4F54-9993-FB3B6C4C090E}.Debug|Any CPU.Build.0 = Debug|Any CPU
{2D54392A-8D86-4F54-9993-FB3B6C4C090E}.Release|Any CPU.ActiveCfg = Release|Any CPU
{2D54392A-8D86-4F54-9993-FB3B6C4C090E}.Release|Any CPU.Build.0 = Release|Any CPU
{CDB843FE-108B-435A-BF17-68052C64F500}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{CDB843FE-108B-435A-BF17-68052C64F500}.Debug|Any CPU.Build.0 = Debug|Any CPU
{CDB843FE-108B-435A-BF17-68052C64F500}.Release|Any CPU.ActiveCfg = Release|Any CPU
{CDB843FE-108B-435A-BF17-68052C64F500}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -232,6 +238,7 @@ Global
{6330DD34-9B05-4BD9-98E7-507134751CCA} = {23275624-C0DA-4E93-9291-081D75E8CCD2}
{46FB5F1B-57C6-4CA3-B626-887DF6D806DD} = {B6DC1190-2873-44A3-85B3-63D7BDE99231}
{2D54392A-8D86-4F54-9993-FB3B6C4C090E} = {B6DC1190-2873-44A3-85B3-63D7BDE99231}
{CDB843FE-108B-435A-BF17-68052C64F500} = {B6DC1190-2873-44A3-85B3-63D7BDE99231}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {FF5DE858-4B85-4EE8-8A6D-46E8E4FBA078}
Expand Down
9 changes: 9 additions & 0 deletions src/dotnet/Common/Constants/AppConfigurationKeys.cs
Original file line number Diff line number Diff line change
Expand Up @@ -824,5 +824,14 @@ public static class AppConfigurationKeySections
/// The key section for the FoundationaLLM:Vectorization:ContentSources app configuration settings.
/// </summary>
public const string FoundationaLLM_Vectorization_ContentSources = "FoundationaLLM:Vectorization:ContentSources";
/// <summary>
/// The key section for the FoundationaLLM:Vectorization:SemanticKernelTextEmbeddingService app configuration settings.
/// </summary>
public const string FoundationaLLM_Vectorization_SemanticKernelTextEmbeddingService = "FoundationaLLM:Vectorization:SemanticKernelTextEmbeddingService";

/// <summary>
/// The key section for the FoundationaLLM:Vectorization:AzureAISearchIndexingService app configuration settings.
/// </summary>
public const string FoundationaLLM_Vectorization_AzureAISearchIndexingService = "FoundationaLLM:Vectorization:AzureAISearchIndexingService";
}
}
10 changes: 10 additions & 0 deletions src/dotnet/Common/Constants/DependencyInjectionKeys.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,15 @@ public static class DependencyInjectionKeys
/// The dependency injection key for the content source service factory.
/// </summary>
public const string FoundationaLLM_Vectorization_ContentSourceServiceFactory = "FoundationaLLM:Vectorization:ContentSourceServiceFactory";

/// <summary>
/// The dependency injection key for the Semantic Kernel text embedding service.
/// </summary>
public const string FoundationaLLM_Vectorization_SemanticKernelTextEmbeddingService = "FoundationaLLM:Vectorization:SemanticKernelTextEmbeddingService";

/// <summary>
/// The dependency injection key for the Azure AI Search indexing service.
/// </summary>
public const string FoundationaLLM_Vectorization_AzureAISearchIndexingService = "FoundationaLLM:Vectorization:AzureAISearchIndexingService";
}
}
8 changes: 1 addition & 7 deletions src/dotnet/Common/Exceptions/ConfigurationValueException.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,4 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace FoundationaLLM.Common.Exceptions
namespace FoundationaLLM.Common.Exceptions
{
/// <summary>
/// Represents an error with a configuration value.
Expand Down
18 changes: 18 additions & 0 deletions src/dotnet/Common/Interfaces/IIndexingService.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
using FoundationaLLM.Common.Models.TextEmbedding;

namespace FoundationaLLM.Common.Interfaces
{
/// <summary>
/// Provides indexing capabilities for embedding vectors.
/// </summary>
public interface IIndexingService
{
/// <summary>
/// Adds to a specified index the list of embeddings associated with a content.
/// </summary>
/// <param name="embeddedContent">The <see cref="EmbeddedContent"/> containind the embeddings to index.</param>
/// <param name="indexName">The name of the index.</param>
/// <returns></returns>
Task<List<string>> IndexEmbeddingsAsync(EmbeddedContent embeddedContent, string indexName);
}
}
40 changes: 36 additions & 4 deletions src/dotnet/Common/Interfaces/IResourceProviderService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,16 @@ public interface IResourceProviderService
/// <summary>
/// Gets a resource based on its logical path.
/// </summary>
/// <typeparam name="T">The type of the requested resource.</typeparam>
/// <param name="resourcePath">The logical path of the requested resource.</param>
/// <typeparam name="T">The type of the resource.</typeparam>
/// <param name="resourcePath">The logical path of the resource.</param>
/// <returns>The instance of the resource corresponding to the specified logical path.</returns>
Task<T> GetResourceAsync<T>(string resourcePath) where T: class;

/// <summary>
/// Gets a resource based on its logical path.
/// </summary>
/// <typeparam name="T">The type of the requested resource.</typeparam>
/// <param name="resourcePath">The logical path of the requested resource.</param>
/// <typeparam name="T">The type of the resource.</typeparam>
/// <param name="resourcePath">The logical path of the resource.</param>
/// <returns>The instance of the resource corresponding to the specified logical path.</returns>
T GetResource<T>(string resourcePath) where T : class;

Expand All @@ -45,5 +45,37 @@ public interface IResourceProviderService
/// <param name="actionPath">The logical path of the action to be executed.</param>
/// <returns>The <see cref="ResourceProviderActionResult"/> that contains details about the result of the execution.</returns>
Task<ResourceProviderActionResult> ExecuteAction(string actionPath);

/// <summary>
/// Creates or updates a resource based on its logical path.
/// </summary>
/// <typeparam name="T">The type of the resource.</typeparam>
/// <param name="resourcePath">The logical path of the resource.</param>
/// <param name="resource">The instance of the resource being created or updated.</param>
/// <returns></returns>
Task UpsertResourceAsync<T>(string resourcePath, T resource) where T : class;

/// <summary>
/// Creates or updates a resource based on its logical path.
/// </summary>
/// <typeparam name="T">The type of the resource.</typeparam>
/// <param name="resourcePath">The logical path of the resource.</param>
/// <param name="resource">The instance of the resource being created or updated.</param>
void UpsertResource<T>(string resourcePath, T resource) where T : class;

/// <summary>
/// Deletes a resource based on its logical path.
/// </summary>
/// <typeparam name="T">The type of the resource.</typeparam>
/// <param name="resourcePath">The logical path of the resource.</param>
/// <returns></returns>
Task DeleteResourceAsync<T>(string resourcePath) where T : class;

/// <summary>
/// Deletes a resource based on its logical path.
/// </summary>
/// <typeparam name="T">The type of the resource.</typeparam>
/// <param name="resourcePath">The logical path of the resource.</param>
void DeleteResource<T>(string resourcePath) where T : class;
}
}
16 changes: 12 additions & 4 deletions src/dotnet/Common/Interfaces/IServiceFactory`1.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System;
using FoundationaLLM.Common.Models.Vectorization;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
Expand All @@ -9,14 +10,21 @@ namespace FoundationaLLM.Common.Interfaces
/// <summary>
/// Creates typed service instances.
/// </summary>
public interface IServiceFactory<T>
public interface IVectorizationServiceFactory<T>

{
/// <summary>
/// Creates a service instance of type T specified by name.
/// Retrieves a service instance of type T specified by name.
/// </summary>
/// <param name="serviceName">The name of the service instance to create.</param>
/// <returns>The service instance created by name.</returns>
T CreateService(string serviceName);
T GetService(string serviceName);

/// <summary>
/// Retrieves a service instance of type T specified by name and its associated vectorizaiton profile.
/// </summary>
/// <param name="serviceName">The name of the service instance to create.</param>
/// <returns>The service instance and its associated vectorization profile.</returns>
(T Service, VectorizationProfileBase VectorizationProfile) GetServiceWithProfile(string serviceName);
}
}
24 changes: 24 additions & 0 deletions src/dotnet/Common/Interfaces/ITextEmbeddingService.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
using FoundationaLLM.Common.Models.TextEmbedding;

namespace FoundationaLLM.Common.Interfaces
{
/// <summary>
/// Provides text embedding capabilities.
/// </summary>
public interface ITextEmbeddingService
{
/// <summary>
/// Creates the vector embedding for a specified text.
/// </summary>
/// <param name="text">The text which needs to be embedded.</param>
/// <returns>Response containing the vector embedding and the amount of tokens used.</returns>
Task<(Embedding Embedding, int TokenCount)> GetEmbeddingAsync(string text);

/// <summary>
/// Creates the vector embeddings for a specified list of texts.
/// </summary>
/// <param name="texts">The list of texts which need to be embedded.</param>
/// <returns>Response containing the list of vector embeddings and the amount of tokens used.</returns>
Task<(IList<Embedding> Embeddings, int TokenCount)> GetEmbeddingsAsync(IList<string> texts);
}
}
31 changes: 31 additions & 0 deletions src/dotnet/Common/Models/TextEmbedding/ContentIdentifier.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
using System.Text.Json.Serialization;

namespace FoundationaLLM.Common.Models.TextEmbedding;

/// <summary>
/// Represents the content associated with a vectorization request.
/// </summary>
public class ContentIdentifier
{
/// <summary>
/// The multipart unique identifier of the the content (i.e. document) being vectorized.
/// </summary>
[JsonPropertyOrder(1)]
[JsonPropertyName("multipart_id")]
public required List<string> MultipartId { get; set; }

/// <summary>
/// The unique identifier of the content (i.e., document) being vectorized.
/// The identifier is determined by concatenating the parts from <see cref="MultipartId"/>.
/// </summary>
[JsonIgnore]
public string UniqueId => string.Join("/", MultipartId);

/// <summary>
/// The canonical identifier of the content being vectorized.
/// Vectorization state services use it to derive the location of the state in the underlying storage.
/// </summary>
[JsonPropertyOrder(2)]
[JsonPropertyName("canonical_id")]
public required string CanonicalId { get; set; }
}
29 changes: 29 additions & 0 deletions src/dotnet/Common/Models/TextEmbedding/EmbeddedContent.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace FoundationaLLM.Common.Models.TextEmbedding
{
/// <summary>
/// Provides information about embedded content.
/// </summary>
public class EmbeddedContent
{
/// <summary>
/// The canonical identifier of the content.
/// </summary>
public required ContentIdentifier ContentId { get; set; }

/// <summary>
/// The name of the content source profile used to retrieve content.
/// </summary>
public required string ContentSourceProfileName { get; set; }

/// <summary>
/// The list of conent
/// </summary>
public required List<EmbeddedContentPart> ContentParts { get; set; } = [];
}
}
24 changes: 24 additions & 0 deletions src/dotnet/Common/Models/TextEmbedding/EmbeddedContentPart.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace FoundationaLLM.Common.Models.TextEmbedding
{
/// <summary>
/// Provides information about an embedded content part.
/// </summary>
public class EmbeddedContentPart
{
/// <summary>
/// The text content that was embedded.
/// </summary>
public required string Content { get; set; }

/// <summary>
/// The vector embedding associated with the content.
/// </summary>
public required Embedding Embedding { get; set; }
}
}
Loading

0 comments on commit a5fd7cd

Please sign in to comment.