diff --git a/src/FoundationaLLM.sln b/src/FoundationaLLM.sln
index b340e7fc95..0406170311 100644
--- a/src/FoundationaLLM.sln
+++ b/src/FoundationaLLM.sln
@@ -11,7 +11,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Core", "dotnet\Core\Core.csproj", "{5AA7F0B6-30E6-451A-B1BE-F003BD3EC203}"
EndProject
-Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SemanticKernel", "dotnet\SemanticKernel\SemanticKernel.csproj", "{503CE23D-63D7-4A26-8475-AA71A45D519B}"
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SemanticKernel-obsolete", "dotnet\SemanticKernel-obsolete\SemanticKernel-obsolete.csproj", "{503CE23D-63D7-4A26-8475-AA71A45D519B}"
EndProject
Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "LangChainAPI", "python\LangChainAPI\LangChainAPI.pyproj", "{DF3AF954-1999-4244-A783-BCE96EE17816}"
EndProject
@@ -81,6 +81,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Management", "dotnet\Manage
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ManagementAPI", "dotnet\ManagementAPI\ManagementAPI.csproj", "{2D54392A-8D86-4F54-9993-FB3B6C4C090E}"
EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SemanticKernel", "dotnet\SemanticKernel\SemanticKernel.csproj", "{CDB843FE-108B-435A-BF17-68052C64F500}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -194,6 +196,10 @@ Global
{2D54392A-8D86-4F54-9993-FB3B6C4C090E}.Debug|Any CPU.Build.0 = Debug|Any CPU
{2D54392A-8D86-4F54-9993-FB3B6C4C090E}.Release|Any CPU.ActiveCfg = Release|Any CPU
{2D54392A-8D86-4F54-9993-FB3B6C4C090E}.Release|Any CPU.Build.0 = Release|Any CPU
+ {CDB843FE-108B-435A-BF17-68052C64F500}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {CDB843FE-108B-435A-BF17-68052C64F500}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {CDB843FE-108B-435A-BF17-68052C64F500}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {CDB843FE-108B-435A-BF17-68052C64F500}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
@@ -232,6 +238,7 @@ Global
{6330DD34-9B05-4BD9-98E7-507134751CCA} = {23275624-C0DA-4E93-9291-081D75E8CCD2}
{46FB5F1B-57C6-4CA3-B626-887DF6D806DD} = {B6DC1190-2873-44A3-85B3-63D7BDE99231}
{2D54392A-8D86-4F54-9993-FB3B6C4C090E} = {B6DC1190-2873-44A3-85B3-63D7BDE99231}
+ {CDB843FE-108B-435A-BF17-68052C64F500} = {B6DC1190-2873-44A3-85B3-63D7BDE99231}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {FF5DE858-4B85-4EE8-8A6D-46E8E4FBA078}
diff --git a/src/dotnet/Common/Constants/AppConfigurationKeys.cs b/src/dotnet/Common/Constants/AppConfigurationKeys.cs
index 51035ba53f..31f306f876 100644
--- a/src/dotnet/Common/Constants/AppConfigurationKeys.cs
+++ b/src/dotnet/Common/Constants/AppConfigurationKeys.cs
@@ -824,5 +824,14 @@ public static class AppConfigurationKeySections
/// The key section for the FoundationaLLM:Vectorization:ContentSources app configuration settings.
///
public const string FoundationaLLM_Vectorization_ContentSources = "FoundationaLLM:Vectorization:ContentSources";
+ ///
+ /// The key section for the FoundationaLLM:Vectorization:SemanticKernelTextEmbeddingService app configuration settings.
+ ///
+ public const string FoundationaLLM_Vectorization_SemanticKernelTextEmbeddingService = "FoundationaLLM:Vectorization:SemanticKernelTextEmbeddingService";
+
+ ///
+ /// The key section for the FoundationaLLM:Vectorization:AzureAISearchIndexingService app configuration settings.
+ ///
+ public const string FoundationaLLM_Vectorization_AzureAISearchIndexingService = "FoundationaLLM:Vectorization:AzureAISearchIndexingService";
}
}
diff --git a/src/dotnet/Common/Constants/DependencyInjectionKeys.cs b/src/dotnet/Common/Constants/DependencyInjectionKeys.cs
index 8ab08cc1dd..d29ff08bcf 100644
--- a/src/dotnet/Common/Constants/DependencyInjectionKeys.cs
+++ b/src/dotnet/Common/Constants/DependencyInjectionKeys.cs
@@ -30,5 +30,15 @@ public static class DependencyInjectionKeys
/// The dependency injection key for the content source service factory.
///
public const string FoundationaLLM_Vectorization_ContentSourceServiceFactory = "FoundationaLLM:Vectorization:ContentSourceServiceFactory";
+
+ ///
+ /// The dependency injection key for the Semantic Kernel text embedding service.
+ ///
+ public const string FoundationaLLM_Vectorization_SemanticKernelTextEmbeddingService = "FoundationaLLM:Vectorization:SemanticKernelTextEmbeddingService";
+
+ ///
+ /// The dependency injection key for the Azure AI Search indexing service.
+ ///
+ public const string FoundationaLLM_Vectorization_AzureAISearchIndexingService = "FoundationaLLM:Vectorization:AzureAISearchIndexingService";
}
}
diff --git a/src/dotnet/Common/Exceptions/ConfigurationValueException.cs b/src/dotnet/Common/Exceptions/ConfigurationValueException.cs
index d09f219bc4..a0c9bc5141 100644
--- a/src/dotnet/Common/Exceptions/ConfigurationValueException.cs
+++ b/src/dotnet/Common/Exceptions/ConfigurationValueException.cs
@@ -1,10 +1,4 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
-
-namespace FoundationaLLM.Common.Exceptions
+namespace FoundationaLLM.Common.Exceptions
{
///
/// Represents an error with a configuration value.
diff --git a/src/dotnet/Common/Interfaces/IIndexingService.cs b/src/dotnet/Common/Interfaces/IIndexingService.cs
new file mode 100644
index 0000000000..135735b498
--- /dev/null
+++ b/src/dotnet/Common/Interfaces/IIndexingService.cs
@@ -0,0 +1,18 @@
+using FoundationaLLM.Common.Models.TextEmbedding;
+
+namespace FoundationaLLM.Common.Interfaces
+{
+ ///
+ /// Provides indexing capabilities for embedding vectors.
+ ///
+ public interface IIndexingService
+ {
+ ///
+ /// Adds to a specified index the list of embeddings associated with a content.
+ ///
+ /// The containind the embeddings to index.
+ /// The name of the index.
+ ///
+ Task> IndexEmbeddingsAsync(EmbeddedContent embeddedContent, string indexName);
+ }
+}
diff --git a/src/dotnet/Common/Interfaces/IResourceProviderService.cs b/src/dotnet/Common/Interfaces/IResourceProviderService.cs
index 03d19e4676..970cb1a103 100644
--- a/src/dotnet/Common/Interfaces/IResourceProviderService.cs
+++ b/src/dotnet/Common/Interfaces/IResourceProviderService.cs
@@ -26,16 +26,16 @@ public interface IResourceProviderService
///
/// Gets a resource based on its logical path.
///
- /// The type of the requested resource.
- /// The logical path of the requested resource.
+ /// The type of the resource.
+ /// The logical path of the resource.
/// The instance of the resource corresponding to the specified logical path.
Task GetResourceAsync(string resourcePath) where T: class;
///
/// Gets a resource based on its logical path.
///
- /// The type of the requested resource.
- /// The logical path of the requested resource.
+ /// The type of the resource.
+ /// The logical path of the resource.
/// The instance of the resource corresponding to the specified logical path.
T GetResource(string resourcePath) where T : class;
@@ -45,5 +45,37 @@ public interface IResourceProviderService
/// The logical path of the action to be executed.
/// The that contains details about the result of the execution.
Task ExecuteAction(string actionPath);
+
+ ///
+ /// Creates or updates a resource based on its logical path.
+ ///
+ /// The type of the resource.
+ /// The logical path of the resource.
+ /// The instance of the resource being created or updated.
+ ///
+ Task UpsertResourceAsync(string resourcePath, T resource) where T : class;
+
+ ///
+ /// Creates or updates a resource based on its logical path.
+ ///
+ /// The type of the resource.
+ /// The logical path of the resource.
+ /// The instance of the resource being created or updated.
+ void UpsertResource(string resourcePath, T resource) where T : class;
+
+ ///
+ /// Deletes a resource based on its logical path.
+ ///
+ /// The type of the resource.
+ /// The logical path of the resource.
+ ///
+ Task DeleteResourceAsync(string resourcePath) where T : class;
+
+ ///
+ /// Deletes a resource based on its logical path.
+ ///
+ /// The type of the resource.
+ /// The logical path of the resource.
+ void DeleteResource(string resourcePath) where T : class;
}
}
diff --git a/src/dotnet/Common/Interfaces/IServiceFactory`1.cs b/src/dotnet/Common/Interfaces/IServiceFactory`1.cs
index 4fa3fd646e..7602ba7b57 100644
--- a/src/dotnet/Common/Interfaces/IServiceFactory`1.cs
+++ b/src/dotnet/Common/Interfaces/IServiceFactory`1.cs
@@ -1,4 +1,5 @@
-using System;
+using FoundationaLLM.Common.Models.Vectorization;
+using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
@@ -9,14 +10,21 @@ namespace FoundationaLLM.Common.Interfaces
///
/// Creates typed service instances.
///
- public interface IServiceFactory
+ public interface IVectorizationServiceFactory
{
///
- /// Creates a service instance of type T specified by name.
+ /// Retrieves a service instance of type T specified by name.
///
/// The name of the service instance to create.
/// The service instance created by name.
- T CreateService(string serviceName);
+ T GetService(string serviceName);
+
+ ///
+ /// Retrieves a service instance of type T specified by name and its associated vectorizaiton profile.
+ ///
+ /// The name of the service instance to create.
+ /// The service instance and its associated vectorization profile.
+ (T Service, VectorizationProfileBase VectorizationProfile) GetServiceWithProfile(string serviceName);
}
}
diff --git a/src/dotnet/Common/Interfaces/ITextEmbeddingService.cs b/src/dotnet/Common/Interfaces/ITextEmbeddingService.cs
new file mode 100644
index 0000000000..0aaed708bc
--- /dev/null
+++ b/src/dotnet/Common/Interfaces/ITextEmbeddingService.cs
@@ -0,0 +1,24 @@
+using FoundationaLLM.Common.Models.TextEmbedding;
+
+namespace FoundationaLLM.Common.Interfaces
+{
+ ///
+ /// Provides text embedding capabilities.
+ ///
+ public interface ITextEmbeddingService
+ {
+ ///
+ /// Creates the vector embedding for a specified text.
+ ///
+ /// The text which needs to be embedded.
+ /// Response containing the vector embedding and the amount of tokens used.
+ Task<(Embedding Embedding, int TokenCount)> GetEmbeddingAsync(string text);
+
+ ///
+ /// Creates the vector embeddings for a specified list of texts.
+ ///
+ /// The list of texts which need to be embedded.
+ /// Response containing the list of vector embeddings and the amount of tokens used.
+ Task<(IList Embeddings, int TokenCount)> GetEmbeddingsAsync(IList texts);
+ }
+}
diff --git a/src/dotnet/Common/Models/TextEmbedding/ContentIdentifier.cs b/src/dotnet/Common/Models/TextEmbedding/ContentIdentifier.cs
new file mode 100644
index 0000000000..0adaadd7f0
--- /dev/null
+++ b/src/dotnet/Common/Models/TextEmbedding/ContentIdentifier.cs
@@ -0,0 +1,31 @@
+using System.Text.Json.Serialization;
+
+namespace FoundationaLLM.Common.Models.TextEmbedding;
+
+///
+/// Represents the content associated with a vectorization request.
+///
+public class ContentIdentifier
+{
+ ///
+ /// The multipart unique identifier of the the content (i.e. document) being vectorized.
+ ///
+ [JsonPropertyOrder(1)]
+ [JsonPropertyName("multipart_id")]
+ public required List MultipartId { get; set; }
+
+ ///
+ /// The unique identifier of the content (i.e., document) being vectorized.
+ /// The identifier is determined by concatenating the parts from .
+ ///
+ [JsonIgnore]
+ public string UniqueId => string.Join("/", MultipartId);
+
+ ///
+ /// The canonical identifier of the content being vectorized.
+ /// Vectorization state services use it to derive the location of the state in the underlying storage.
+ ///
+ [JsonPropertyOrder(2)]
+ [JsonPropertyName("canonical_id")]
+ public required string CanonicalId { get; set; }
+}
diff --git a/src/dotnet/Common/Models/TextEmbedding/EmbeddedContent.cs b/src/dotnet/Common/Models/TextEmbedding/EmbeddedContent.cs
new file mode 100644
index 0000000000..a7c20231a6
--- /dev/null
+++ b/src/dotnet/Common/Models/TextEmbedding/EmbeddedContent.cs
@@ -0,0 +1,29 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace FoundationaLLM.Common.Models.TextEmbedding
+{
+ ///
+ /// Provides information about embedded content.
+ ///
+ public class EmbeddedContent
+ {
+ ///
+ /// The canonical identifier of the content.
+ ///
+ public required ContentIdentifier ContentId { get; set; }
+
+ ///
+ /// The name of the content source profile used to retrieve content.
+ ///
+ public required string ContentSourceProfileName { get; set; }
+
+ ///
+ /// The list of conent
+ ///
+ public required List ContentParts { get; set; } = [];
+ }
+}
diff --git a/src/dotnet/Common/Models/TextEmbedding/EmbeddedContentPart.cs b/src/dotnet/Common/Models/TextEmbedding/EmbeddedContentPart.cs
new file mode 100644
index 0000000000..2d7e7054c8
--- /dev/null
+++ b/src/dotnet/Common/Models/TextEmbedding/EmbeddedContentPart.cs
@@ -0,0 +1,24 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace FoundationaLLM.Common.Models.TextEmbedding
+{
+ ///
+ /// Provides information about an embedded content part.
+ ///
+ public class EmbeddedContentPart
+ {
+ ///
+ /// The text content that was embedded.
+ ///
+ public required string Content { get; set; }
+
+ ///
+ /// The vector embedding associated with the content.
+ ///
+ public required Embedding Embedding { get; set; }
+ }
+}
diff --git a/src/dotnet/Common/Models/TextEmbedding/Embedding.cs b/src/dotnet/Common/Models/TextEmbedding/Embedding.cs
new file mode 100644
index 0000000000..81cca4b2af
--- /dev/null
+++ b/src/dotnet/Common/Models/TextEmbedding/Embedding.cs
@@ -0,0 +1,102 @@
+using System.Runtime.InteropServices;
+using System.Text.Json;
+using System.Text.Json.Serialization;
+
+namespace FoundationaLLM.Common.Models.TextEmbedding
+{
+ ///
+ /// Stores a vector embedding.
+ /// This type should be serialized using Emedding.JsonConverter.
+ ///
+ public struct Embedding : IEquatable
+ {
+ ///
+ /// The vector that represents the embedding.
+ /// This property is only serialized when Embedding.JsonConverter is used.
+ ///
+ [JsonIgnore]
+ public ReadOnlyMemory Vector { get; set; } = new();
+
+ ///
+ /// Length of the vector representing the embedding.
+ /// This property is only serialized when Embedding.JsonConverter is used.
+ ///
+ [JsonIgnore]
+ public readonly int Length => this.Vector.Length;
+
+ ///
+ /// Creates an embedding from a vector represented as an array of real numbers.
+ ///
+ /// The array containing the vector values.
+ public Embedding(float[] vector) => this.Vector = vector;
+
+ ///
+ /// Creates an embedding from a vector represents as a object.
+ ///
+ ///
+ public Embedding(ReadOnlyMemory vector) => this.Vector = vector;
+
+ ///
+ /// Creates an embedding with a zero-initialzed vector of a specified size.
+ ///
+ /// The size of the vector representing the embedding.
+ public Embedding(int size) => this.Vector = new ReadOnlyMemory(new float[size]);
+
+ ///
+ public readonly bool Equals(Embedding other) => this.Vector.Equals(other.Vector);
+
+ ///
+ /// Inidicates whether the current object is equal to another object.
+ ///
+ /// An object to compare with this object.
+ /// True if the object is equal to the obj param and False otherwise.
+ public override readonly bool Equals(object? obj) => (obj is Embedding other && this.Equals(other));
+
+ ///
+ /// Checks if two values are equal.
+ ///
+ /// The first value to be cheched.
+ /// The second value to be checked.
+ /// True if the two values are equal, False otherwise.
+ public static bool operator ==(Embedding v1, Embedding v2) => v1.Equals(v2);
+
+ ///
+ /// Checks if two values are different.
+ ///
+ /// The first value to be cheched.
+ /// The second value to be checked.
+ /// True if the two values are different, False otherwise.
+ public static bool operator !=(Embedding v1, Embedding v2) => !(v1 == v2);
+
+ ///
+ /// Calculated the hashcode for this .
+ ///
+ /// The hash value represented by an integer.
+ public override readonly int GetHashCode() => this.Vector.GetHashCode();
+
+ ///
+ /// Serializes the content of an value.
+ /// Note: use Embedding.JsonConverter to serialize objects using
+ /// the Embedding type, for example:
+ /// [JsonPropertyName("vector")]
+ /// [JsonConverter(typeof(Embedding.JsonConverter))]
+ /// public Embedding Vector { get; set; }
+ ///
+ public sealed class JsonConverter : JsonConverter
+ {
+ /// An instance of a converter for float[] that all operations delegate to
+ private static readonly JsonConverter Converter =
+ (JsonConverter)new JsonSerializerOptions().GetConverter(typeof(float[]));
+
+ ///
+ public override Embedding Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) =>
+ new(Converter.Read(ref reader, typeof(float[]), options) ?? []);
+
+ ///
+ public override void Write(Utf8JsonWriter writer, Embedding value, JsonSerializerOptions options) =>
+ Converter.Write(writer, MemoryMarshal.TryGetArray(value.Vector, out ArraySegment array) && array.Count == value.Length
+ ? array.Array!
+ : value.Vector.ToArray(), options);
+ }
+ }
+}
diff --git a/src/dotnet/Common/Models/Vectorization/VectorizationProfileBase.cs b/src/dotnet/Common/Models/Vectorization/VectorizationProfileBase.cs
new file mode 100644
index 0000000000..defb48ad7f
--- /dev/null
+++ b/src/dotnet/Common/Models/Vectorization/VectorizationProfileBase.cs
@@ -0,0 +1,18 @@
+namespace FoundationaLLM.Common.Models.Vectorization
+{
+ ///
+ /// Basic properties for vectorization profiles.
+ ///
+ public class VectorizationProfileBase
+ {
+ ///
+ /// The name of the vectorization profile.
+ ///
+ public required string Name { get; set; }
+
+ ///
+ /// The configuration associated with the vectorization profile.
+ ///
+ public Dictionary? Settings { get; set; } = [];
+ }
+}
diff --git a/src/dotnet/Common/Services/ResourceProviders/ResourceProviderServiceBase.cs b/src/dotnet/Common/Services/ResourceProviders/ResourceProviderServiceBase.cs
index ca45c18b3f..bd86589318 100644
--- a/src/dotnet/Common/Services/ResourceProviders/ResourceProviderServiceBase.cs
+++ b/src/dotnet/Common/Services/ResourceProviders/ResourceProviderServiceBase.cs
@@ -94,6 +94,42 @@ public async Task GetResourceAsync(string resourcePath) where T : class
return await GetResourceAsyncInternal(instances);
}
+ ///
+ public async Task UpsertResourceAsync(string resourcePath, T resource) where T : class
+ {
+ if (!_isInitialized)
+ throw new ResourceProviderException($"The resource provider {_name} is not initialized.");
+ var instances = GetResourceInstancesFromPath(resourcePath);
+ await UpsertResourceAsync(resourcePath, resource);
+ }
+
+ ///
+ public void UpsertResource(string resourcePath, T resource) where T : class
+ {
+ if (!_isInitialized)
+ throw new ResourceProviderException($"The resource provider {_name} is not initialized.");
+ var instances = GetResourceInstancesFromPath(resourcePath);
+ UpsertResource(instances, resource);
+ }
+
+ ///
+ public async Task DeleteResourceAsync(string resourcePath) where T : class
+ {
+ if (!_isInitialized)
+ throw new ResourceProviderException($"The resource provider {_name} is not initialized.");
+ var instances = GetResourceInstancesFromPath(resourcePath);
+ await DeleteResourceAsync(instances);
+ }
+
+ ///
+ public void DeleteResource(string resourcePath) where T : class
+ {
+ if (!_isInitialized)
+ throw new ResourceProviderException($"The resource provider {_name} is not initialized.");
+ var instances = GetResourceInstancesFromPath(resourcePath);
+ DeleteResource(instances);
+ }
+
///
/// The internal implementation of Initialize. Must be overridden in derived classes.
///
@@ -134,6 +170,46 @@ protected virtual async Task GetResourceAsyncInternal(List
+ /// The internal implementation of UpsertResource. Must be overridden in derived classes.
+ ///
+ /// The list of objects parsed from the resource path.
+ /// The instance of the resource being created or updated.
+ ///
+ protected virtual void UpsertResource(List instances, T resource) =>
+ throw new NotImplementedException();
+
+ ///
+ /// The internal implementation of UpsertResourceAsync. Must be overridden in derived classes.
+ ///
+ /// The list of objects parsed from the resource path.
+ /// The instance of the resource being created or updated.
+ ///
+ protected virtual async Task UpsertResourceAsync(List instances, T resource)
+ {
+ await Task.CompletedTask;
+ throw new NotImplementedException();
+ }
+
+ ///
+ /// The internal implementation of DeleteResource. Must be overridden in derived classes.
+ ///
+ /// The list of objects parsed from the resource path.
+ ///
+ protected virtual void DeleteResource(List instances) =>
+ throw new NotImplementedException();
+
+ ///
+ /// The internal implementation of DeleteResourceAsync. Must be overridden in derived classes.
+ ///
+ /// The list of objects parsed from the resource path.
+ ///
+ protected virtual async Task DeleteResourceAsync(List instances)
+ {
+ await Task.CompletedTask;
+ throw new NotImplementedException();
+ }
+
private List GetResourceInstancesFromPath(string resourcePath)
{
if (string.IsNullOrWhiteSpace(resourcePath))
diff --git a/src/dotnet/Common/Services/TextSplitters/TokenTextSplitterService.cs b/src/dotnet/Common/Services/TextSplitters/TokenTextSplitterService.cs
index 48e2b84fd8..7a8d5d0e55 100644
--- a/src/dotnet/Common/Services/TextSplitters/TokenTextSplitterService.cs
+++ b/src/dotnet/Common/Services/TextSplitters/TokenTextSplitterService.cs
@@ -5,74 +5,77 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
+namespace FoundationaLLM.Common.Services.TextSplitters
+{
-///
-/// Splits text based on number of tokens.
-///
-/// The used to tokenize the input text.
-/// The providing the settings for the service.
-/// The logger used for logging.
-public class TokenTextSplitterService(
+ ///
+ /// Splits text based on number of tokens.
+ ///
+ /// The used to tokenize the input text.
+ /// The providing the settings for the service.
+ /// The logger used for logging.
+ public class TokenTextSplitterService(
ITokenizerService tokenizerService,
IOptions options,
ILogger logger) : ITextSplitterService
-{
- private readonly ITokenizerService _tokenizerService = tokenizerService;
- private readonly TokenTextSplitterServiceSettings _settings = options.Value;
- private readonly ILogger _logger = logger;
-
- ///
- public (List TextChunks, string Message) SplitPlainText(string text)
{
- var tokens = _tokenizerService.Encode(text, _settings.TokenizerEncoder);
+ private readonly ITokenizerService _tokenizerService = tokenizerService;
+ private readonly TokenTextSplitterServiceSettings _settings = options.Value;
+ private readonly ILogger _logger = logger;
- if (tokens != null)
+ ///
+ public (List TextChunks, string Message) SplitPlainText(string text)
{
- _logger.LogInformation("The tokenizer identified {TokensCount} tokens.", tokens.Count);
+ var tokens = _tokenizerService.Encode(text, _settings.TokenizerEncoder);
- var chunksCount = (int) Math.Ceiling((1f * tokens!.Count - _settings.OverlapSizeTokens) / (_settings.ChunkSizeTokens - _settings.OverlapSizeTokens));
+ if (tokens != null)
+ {
+ _logger.LogInformation("The tokenizer identified {TokensCount} tokens.", tokens.Count);
- var chunks = Enumerable.Range(0, chunksCount - 1)
- .Select(i => tokens.Skip(i * (_settings.ChunkSizeTokens - _settings.OverlapSizeTokens)).Take(_settings.ChunkSizeTokens).ToArray())
- .Select(t => _tokenizerService.Decode(t, _settings.TokenizerEncoder))
- .ToList();
+ var chunksCount = (int)Math.Ceiling((1f * tokens!.Count - _settings.OverlapSizeTokens) / (_settings.ChunkSizeTokens - _settings.OverlapSizeTokens));
- var lastChunkStart = (chunksCount - 1) * _settings.ChunkSizeTokens;
- var lastChunkSize = tokens.Count - lastChunkStart + 1;
- var resultMessage = string.Empty;
+ var chunks = Enumerable.Range(0, chunksCount - 1)
+ .Select(i => tokens.Skip(i * (_settings.ChunkSizeTokens - _settings.OverlapSizeTokens)).Take(_settings.ChunkSizeTokens).ToArray())
+ .Select(t => _tokenizerService.Decode(t, _settings.TokenizerEncoder))
+ .ToList();
- if (lastChunkSize < 2 * _settings.OverlapSizeTokens)
- {
- // The last chunk is to small, will just incorporate it into the second to last.
- var secondToLastChunkStart = (chunksCount - 2) * _settings.ChunkSizeTokens;
- var newLastChunkSize = tokens.Count - secondToLastChunkStart + 1;
- var newLastChunk = _tokenizerService.Decode(
- tokens
- .Skip(secondToLastChunkStart)
- .Take(newLastChunkSize)
- .ToArray(),
- _settings.TokenizerEncoder);
- chunks.RemoveAt(chunks.Count - 1);
- chunks.Add(newLastChunk);
+ var lastChunkStart = (chunksCount - 1) * _settings.ChunkSizeTokens;
+ var lastChunkSize = tokens.Count - lastChunkStart + 1;
+ var resultMessage = string.Empty;
- resultMessage = $"The number of text chunks is {chunks.Count}. The size of the last chunk is {newLastChunkSize} tokens.";
- }
- else
- {
- var lastChunk = _tokenizerService.Decode(
- tokens
- .Skip(lastChunkStart)
- .Take(lastChunkSize)
- .ToArray(),
- _settings.TokenizerEncoder);
- chunks.Add(lastChunk);
+ if (lastChunkSize < 2 * _settings.OverlapSizeTokens)
+ {
+ // The last chunk is to small, will just incorporate it into the second to last.
+ var secondToLastChunkStart = (chunksCount - 2) * _settings.ChunkSizeTokens;
+ var newLastChunkSize = tokens.Count - secondToLastChunkStart + 1;
+ var newLastChunk = _tokenizerService.Decode(
+ tokens
+ .Skip(secondToLastChunkStart)
+ .Take(newLastChunkSize)
+ .ToArray(),
+ _settings.TokenizerEncoder);
+ chunks.RemoveAt(chunks.Count - 1);
+ chunks.Add(newLastChunk);
- resultMessage = $"The number of text chunks is {chunks.Count}. The size of the last chunk is {lastChunkSize} tokens.";
- }
+ resultMessage = $"The number of text chunks is {chunks.Count}. The size of the last chunk is {newLastChunkSize} tokens.";
+ }
+ else
+ {
+ var lastChunk = _tokenizerService.Decode(
+ tokens
+ .Skip(lastChunkStart)
+ .Take(lastChunkSize)
+ .ToArray(),
+ _settings.TokenizerEncoder);
+ chunks.Add(lastChunk);
- return new (chunks, resultMessage);
+ resultMessage = $"The number of text chunks is {chunks.Count}. The size of the last chunk is {lastChunkSize} tokens.";
+ }
+
+ return new(chunks, resultMessage);
+ }
+ else
+ throw new TextProcessingException("The tokenizer service failed to split the text into tokens.");
}
- else
- throw new TextProcessingException("The tokenizer service failed to split the text into tokens.");
}
}
diff --git a/src/dotnet/Common/Settings/AzureAISearchAuthenticationTypes.cs b/src/dotnet/Common/Settings/AzureAISearchAuthenticationTypes.cs
new file mode 100644
index 0000000000..e65ec4d55e
--- /dev/null
+++ b/src/dotnet/Common/Settings/AzureAISearchAuthenticationTypes.cs
@@ -0,0 +1,29 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace FoundationaLLM.Common.Settings
+{
+ ///
+ /// Types of authentication for Azure AI Search.
+ ///
+ public enum AzureAISearchAuthenticationTypes
+ {
+ ///
+ /// Unknown authentication type.
+ ///
+ Unknown = -1,
+
+ ///
+ /// Azure managed identity authentication type.
+ ///
+ AzureIdentity,
+
+ ///
+ /// API key authentication type.
+ ///
+ APIKey
+ }
+}
diff --git a/src/dotnet/Common/Settings/AzureOpenAIAuthenticationTypes.cs b/src/dotnet/Common/Settings/AzureOpenAIAuthenticationTypes.cs
new file mode 100644
index 0000000000..a9fbe88e5b
--- /dev/null
+++ b/src/dotnet/Common/Settings/AzureOpenAIAuthenticationTypes.cs
@@ -0,0 +1,29 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace FoundationaLLM.Common.Settings
+{
+ ///
+ /// Types of authentication for Azure Open AI.
+ ///
+ public enum AzureOpenAIAuthenticationTypes
+ {
+ ///
+ /// Unknown authentication type.
+ ///
+ Unknown = -1,
+
+ ///
+ /// Azure managed identity authentication type.
+ ///
+ AzureIdentity,
+
+ ///
+ /// API key authentication type.
+ ///
+ APIKey
+ }
+}
diff --git a/src/dotnet/SemanticKernel/Chat/ChatBuilder.cs b/src/dotnet/SemanticKernel-obsolete/Chat/ChatBuilder.cs
similarity index 100%
rename from src/dotnet/SemanticKernel/Chat/ChatBuilder.cs
rename to src/dotnet/SemanticKernel-obsolete/Chat/ChatBuilder.cs
diff --git a/src/dotnet/SemanticKernel/Chat/PromptOptimizationSettings.cs b/src/dotnet/SemanticKernel-obsolete/Chat/PromptOptimizationSettings.cs
similarity index 100%
rename from src/dotnet/SemanticKernel/Chat/PromptOptimizationSettings.cs
rename to src/dotnet/SemanticKernel-obsolete/Chat/PromptOptimizationSettings.cs
diff --git a/src/dotnet/SemanticKernel/Interfaces/IMemorySource.cs b/src/dotnet/SemanticKernel-obsolete/Interfaces/IMemorySource.cs
similarity index 100%
rename from src/dotnet/SemanticKernel/Interfaces/IMemorySource.cs
rename to src/dotnet/SemanticKernel-obsolete/Interfaces/IMemorySource.cs
diff --git a/src/dotnet/SemanticKernel/Interfaces/ISemanticKernelService.cs b/src/dotnet/SemanticKernel-obsolete/Interfaces/ISemanticKernelService.cs
similarity index 100%
rename from src/dotnet/SemanticKernel/Interfaces/ISemanticKernelService.cs
rename to src/dotnet/SemanticKernel-obsolete/Interfaces/ISemanticKernelService.cs
diff --git a/src/dotnet/SemanticKernel/Interfaces/ISystemPromptService.cs b/src/dotnet/SemanticKernel-obsolete/Interfaces/ISystemPromptService.cs
similarity index 100%
rename from src/dotnet/SemanticKernel/Interfaces/ISystemPromptService.cs
rename to src/dotnet/SemanticKernel-obsolete/Interfaces/ISystemPromptService.cs
diff --git a/src/dotnet/SemanticKernel/Interfaces/ITokenizer.cs b/src/dotnet/SemanticKernel-obsolete/Interfaces/ITokenizer.cs
similarity index 100%
rename from src/dotnet/SemanticKernel/Interfaces/ITokenizer.cs
rename to src/dotnet/SemanticKernel-obsolete/Interfaces/ITokenizer.cs
diff --git a/src/dotnet/SemanticKernel/MemorySource/AzureCognitiveSearchMemorySource.cs b/src/dotnet/SemanticKernel-obsolete/MemorySource/AzureCognitiveSearchMemorySource.cs
similarity index 100%
rename from src/dotnet/SemanticKernel/MemorySource/AzureCognitiveSearchMemorySource.cs
rename to src/dotnet/SemanticKernel-obsolete/MemorySource/AzureCognitiveSearchMemorySource.cs
diff --git a/src/dotnet/SemanticKernel/MemorySource/AzureCognitiveSearchMemorySourceConfig.cs b/src/dotnet/SemanticKernel-obsolete/MemorySource/AzureCognitiveSearchMemorySourceConfig.cs
similarity index 100%
rename from src/dotnet/SemanticKernel/MemorySource/AzureCognitiveSearchMemorySourceConfig.cs
rename to src/dotnet/SemanticKernel-obsolete/MemorySource/AzureCognitiveSearchMemorySourceConfig.cs
diff --git a/src/dotnet/SemanticKernel/MemorySource/BlobStorageMemorySource.cs b/src/dotnet/SemanticKernel-obsolete/MemorySource/BlobStorageMemorySource.cs
similarity index 100%
rename from src/dotnet/SemanticKernel/MemorySource/BlobStorageMemorySource.cs
rename to src/dotnet/SemanticKernel-obsolete/MemorySource/BlobStorageMemorySource.cs
diff --git a/src/dotnet/SemanticKernel/MemorySource/BlobStorageMemorySourceConfig.cs b/src/dotnet/SemanticKernel-obsolete/MemorySource/BlobStorageMemorySourceConfig.cs
similarity index 100%
rename from src/dotnet/SemanticKernel/MemorySource/BlobStorageMemorySourceConfig.cs
rename to src/dotnet/SemanticKernel-obsolete/MemorySource/BlobStorageMemorySourceConfig.cs
diff --git a/src/dotnet/SemanticKernel/Models/ConfigurationOptions/AzureCognitiveSearchMemorySourceSettings.cs b/src/dotnet/SemanticKernel-obsolete/Models/ConfigurationOptions/AzureCognitiveSearchMemorySourceSettings.cs
similarity index 100%
rename from src/dotnet/SemanticKernel/Models/ConfigurationOptions/AzureCognitiveSearchMemorySourceSettings.cs
rename to src/dotnet/SemanticKernel-obsolete/Models/ConfigurationOptions/AzureCognitiveSearchMemorySourceSettings.cs
diff --git a/src/dotnet/SemanticKernel/Models/ConfigurationOptions/BlobStorageMemorySourceSettings.cs b/src/dotnet/SemanticKernel-obsolete/Models/ConfigurationOptions/BlobStorageMemorySourceSettings.cs
similarity index 100%
rename from src/dotnet/SemanticKernel/Models/ConfigurationOptions/BlobStorageMemorySourceSettings.cs
rename to src/dotnet/SemanticKernel-obsolete/Models/ConfigurationOptions/BlobStorageMemorySourceSettings.cs
diff --git a/src/dotnet/SemanticKernel/Models/ConfigurationOptions/DurableSystemPromptServiceSettings.cs b/src/dotnet/SemanticKernel-obsolete/Models/ConfigurationOptions/DurableSystemPromptServiceSettings.cs
similarity index 100%
rename from src/dotnet/SemanticKernel/Models/ConfigurationOptions/DurableSystemPromptServiceSettings.cs
rename to src/dotnet/SemanticKernel-obsolete/Models/ConfigurationOptions/DurableSystemPromptServiceSettings.cs
diff --git a/src/dotnet/SemanticKernel/Models/ConfigurationOptions/SemanticKernelServiceSettings.cs b/src/dotnet/SemanticKernel-obsolete/Models/ConfigurationOptions/SemanticKernelServiceSettings.cs
similarity index 100%
rename from src/dotnet/SemanticKernel/Models/ConfigurationOptions/SemanticKernelServiceSettings.cs
rename to src/dotnet/SemanticKernel-obsolete/Models/ConfigurationOptions/SemanticKernelServiceSettings.cs
diff --git a/src/dotnet/SemanticKernel/Plugins/Core/TextEmbeddingObjectMemoryPlugin.cs b/src/dotnet/SemanticKernel-obsolete/Plugins/Core/TextEmbeddingObjectMemoryPlugin.cs
similarity index 100%
rename from src/dotnet/SemanticKernel/Plugins/Core/TextEmbeddingObjectMemoryPlugin.cs
rename to src/dotnet/SemanticKernel-obsolete/Plugins/Core/TextEmbeddingObjectMemoryPlugin.cs
diff --git a/src/dotnet/SemanticKernel/Plugins/Core/TextSummaryPlugin.cs b/src/dotnet/SemanticKernel-obsolete/Plugins/Core/TextSummaryPlugin.cs
similarity index 100%
rename from src/dotnet/SemanticKernel/Plugins/Core/TextSummaryPlugin.cs
rename to src/dotnet/SemanticKernel-obsolete/Plugins/Core/TextSummaryPlugin.cs
diff --git a/src/dotnet/SemanticKernel/Plugins/Memory/VectorMemoryStore.cs b/src/dotnet/SemanticKernel-obsolete/Plugins/Memory/VectorMemoryStore.cs
similarity index 100%
rename from src/dotnet/SemanticKernel/Plugins/Memory/VectorMemoryStore.cs
rename to src/dotnet/SemanticKernel-obsolete/Plugins/Memory/VectorMemoryStore.cs
diff --git a/src/dotnet/SemanticKernel-obsolete/SemanticKernel-obsolete.csproj b/src/dotnet/SemanticKernel-obsolete/SemanticKernel-obsolete.csproj
new file mode 100644
index 0000000000..7279918af0
--- /dev/null
+++ b/src/dotnet/SemanticKernel-obsolete/SemanticKernel-obsolete.csproj
@@ -0,0 +1,29 @@
+
+
+
+ net8.0
+ enable
+ enable
+ FoundationaLLM.SemanticKernel.Core
+ FoundationaLLM.SemanticKernel.Core
+ True
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/dotnet/SemanticKernel/Services/DurableSystemPromptService.cs b/src/dotnet/SemanticKernel-obsolete/Services/DurableSystemPromptService.cs
similarity index 100%
rename from src/dotnet/SemanticKernel/Services/DurableSystemPromptService.cs
rename to src/dotnet/SemanticKernel-obsolete/Services/DurableSystemPromptService.cs
diff --git a/src/dotnet/SemanticKernel/Services/InMemorySystemPromptService.cs b/src/dotnet/SemanticKernel-obsolete/Services/InMemorySystemPromptService.cs
similarity index 100%
rename from src/dotnet/SemanticKernel/Services/InMemorySystemPromptService.cs
rename to src/dotnet/SemanticKernel-obsolete/Services/InMemorySystemPromptService.cs
diff --git a/src/dotnet/SemanticKernel/Services/SemanticKernelService.cs b/src/dotnet/SemanticKernel-obsolete/Services/SemanticKernelService.cs
similarity index 100%
rename from src/dotnet/SemanticKernel/Services/SemanticKernelService.cs
rename to src/dotnet/SemanticKernel-obsolete/Services/SemanticKernelService.cs
diff --git a/src/dotnet/SemanticKernel/Services/SemanticKernelTokenizer.cs b/src/dotnet/SemanticKernel-obsolete/Services/SemanticKernelTokenizer.cs
similarity index 100%
rename from src/dotnet/SemanticKernel/Services/SemanticKernelTokenizer.cs
rename to src/dotnet/SemanticKernel-obsolete/Services/SemanticKernelTokenizer.cs
diff --git a/src/dotnet/SemanticKernel/Text/StringExtensions.cs b/src/dotnet/SemanticKernel-obsolete/Text/StringExtensions.cs
similarity index 100%
rename from src/dotnet/SemanticKernel/Text/StringExtensions.cs
rename to src/dotnet/SemanticKernel-obsolete/Text/StringExtensions.cs
diff --git a/src/dotnet/SemanticKernel/TextEmbedding/EmbeddingUtility.cs b/src/dotnet/SemanticKernel-obsolete/TextEmbedding/EmbeddingUtility.cs
similarity index 100%
rename from src/dotnet/SemanticKernel/TextEmbedding/EmbeddingUtility.cs
rename to src/dotnet/SemanticKernel-obsolete/TextEmbedding/EmbeddingUtility.cs
diff --git a/src/dotnet/SemanticKernel/nuget.config b/src/dotnet/SemanticKernel-obsolete/nuget.config
similarity index 100%
rename from src/dotnet/SemanticKernel/nuget.config
rename to src/dotnet/SemanticKernel-obsolete/nuget.config
diff --git a/src/dotnet/SemanticKernel/Models/Configuration/AzureAISearchIndexingServiceSettings.cs b/src/dotnet/SemanticKernel/Models/Configuration/AzureAISearchIndexingServiceSettings.cs
new file mode 100644
index 0000000000..bb65090224
--- /dev/null
+++ b/src/dotnet/SemanticKernel/Models/Configuration/AzureAISearchIndexingServiceSettings.cs
@@ -0,0 +1,32 @@
+using FoundationaLLM.Common.Settings;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Text.Json.Serialization;
+using System.Threading.Tasks;
+
+namespace FoundationaLLM.SemanticKernel.Core.Models.Configuration
+{
+ ///
+ /// Provides configuration settings for the Azure AI Search indexing service.
+ ///
+ public record AzureAISearchIndexingServiceSettings
+ {
+ ///
+ /// The endpoint of the Azure AI deployment.
+ ///
+ public required string Endpoint { get; set; }
+
+ ///
+ /// The API key used to connect to the Azure AI Search endpoint. Valid only if AuthenticationType is APIKey.
+ ///
+ public string? APIKey { get; set; }
+
+ ///
+ /// The indicating which authentication mechanism to use.
+ ///
+ [JsonConverter(typeof(JsonStringEnumConverter))]
+ public required AzureAISearchAuthenticationTypes AuthenticationType { get; set; }
+ }
+}
diff --git a/src/dotnet/SemanticKernel/Models/Configuration/SemanticKernelTextEmbeddingServiceSettings.cs b/src/dotnet/SemanticKernel/Models/Configuration/SemanticKernelTextEmbeddingServiceSettings.cs
new file mode 100644
index 0000000000..538a54da34
--- /dev/null
+++ b/src/dotnet/SemanticKernel/Models/Configuration/SemanticKernelTextEmbeddingServiceSettings.cs
@@ -0,0 +1,32 @@
+using FoundationaLLM.Common.Settings;
+using System.Text.Json.Serialization;
+
+namespace FoundationaLLM.SemanticKernel.Core.Models.Configuration
+{
+ ///
+ /// Provides configuration settings for the service.
+ ///
+ public record SemanticKernelTextEmbeddingServiceSettings
+ {
+ ///
+ /// The name of the Azure Open AI deployment.
+ ///
+ public required string DeploymentName { get; set; }
+
+ ///
+ /// The endpoint of the Azure Open AI deployment.
+ ///
+ public required string Endpoint { get; set; }
+
+ ///
+ /// The API key used to connect to the Azure Open AI endpoint. Valid only if AuthenticationType is APIKey.
+ ///
+ public string? APIKey { get; set; }
+
+ ///
+ /// The indicating which authentication mechanism to use.
+ ///
+ [JsonConverter(typeof(JsonStringEnumConverter))]
+ public required AzureOpenAIAuthenticationTypes AuthenticationType { get; set; }
+ }
+}
diff --git a/src/dotnet/SemanticKernel/SemanticKernel.csproj b/src/dotnet/SemanticKernel/SemanticKernel.csproj
index 7279918af0..3b2f01cb40 100644
--- a/src/dotnet/SemanticKernel/SemanticKernel.csproj
+++ b/src/dotnet/SemanticKernel/SemanticKernel.csproj
@@ -6,24 +6,16 @@
enable
FoundationaLLM.SemanticKernel.Core
FoundationaLLM.SemanticKernel.Core
- True
+ True
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
-
+
diff --git a/src/dotnet/SemanticKernel/Services/AzureAISearchIndexingService.cs b/src/dotnet/SemanticKernel/Services/AzureAISearchIndexingService.cs
new file mode 100644
index 0000000000..3411b15b8f
--- /dev/null
+++ b/src/dotnet/SemanticKernel/Services/AzureAISearchIndexingService.cs
@@ -0,0 +1,117 @@
+using Azure.Identity;
+using FoundationaLLM.Common.Exceptions;
+using FoundationaLLM.Common.Interfaces;
+using FoundationaLLM.Common.Models.TextEmbedding;
+using FoundationaLLM.Common.Settings;
+using FoundationaLLM.SemanticKernel.Core.Models.Configuration;
+using Microsoft.Extensions.Logging;
+using Microsoft.Extensions.Options;
+using Microsoft.SemanticKernel;
+using Microsoft.SemanticKernel.Connectors.AzureAISearch;
+using Microsoft.SemanticKernel.Embeddings;
+using Microsoft.SemanticKernel.Memory;
+using System.ComponentModel;
+
+#pragma warning disable SKEXP0003, SKEXP0021
+
+namespace FoundationaLLM.SemanticKernel.Core.Services
+{
+ ///
+ /// Provides vector embedding indexing based on Azure AI Search.
+ ///
+ public class AzureAISearchIndexingService : IIndexingService
+ {
+ private readonly AzureAISearchIndexingServiceSettings _settings;
+ private readonly ILogger _logger;
+ private readonly AzureAISearchMemoryStore _memoryStore;
+
+ ///
+ /// Creates a new instance.
+ ///
+ /// The providing configuration settings.
+ /// The used for logging.
+ public AzureAISearchIndexingService(
+ IOptions options,
+ ILogger logger)
+ {
+ _settings = options.Value;
+ _logger = logger;
+ _memoryStore = CreateMemoryStore();
+ }
+
+ ///
+ public async Task> IndexEmbeddingsAsync(EmbeddedContent embeddedContent, string indexName)
+ {
+ var indexIds = new List();
+
+ await foreach (var id in _memoryStore.UpsertBatchAsync(
+ indexName,
+ embeddedContent.ContentParts.Select(cp => new MemoryRecord(
+ new MemoryRecordMetadata(
+ true,
+ embeddedContent.ContentId.UniqueId,
+ cp.Content,
+ "Generated by FoundationaLLM.",
+ embeddedContent.ContentSourceProfileName,
+ string.Empty),
+ cp.Embedding.Vector,
+ embeddedContent.ContentId.UniqueId,
+ DateTimeOffset.UtcNow))))
+ {
+ indexIds.Add(id);
+ }
+
+ return indexIds;
+ }
+
+ ///
+ /// Creates an instance using the endpoint and the API key.
+ ///
+ /// The endpoint of the Azure AI Search deployment.
+ /// The API key used to connect to the Azure AI Search deployment.
+ /// The instance.
+ private AzureAISearchMemoryStore CreateMemoryStoreFromAPIKey(string endpoint, string apiKey) =>
+ new AzureAISearchMemoryStore(endpoint, apiKey);
+
+ ///
+ /// Creates an instance using the endpoint and the Azure identity.
+ ///
+ /// The endpoint of the Azure AI Search deployment.
+ /// The instance.
+ private AzureAISearchMemoryStore CreateMemoryStoreFromIdentity(string endpoint) =>
+ new AzureAISearchMemoryStore(endpoint, new DefaultAzureCredential());
+
+ private void ValidateEndpoint(string? value)
+ {
+ if (string.IsNullOrWhiteSpace(value))
+ {
+ _logger.LogCritical("The Azure AI Search endpoint is invalid.");
+ throw new ConfigurationValueException("The Azure AI Search endpoint is invalid.");
+ }
+ }
+ private void ValidateAPIKey(string? value)
+ {
+ if (string.IsNullOrWhiteSpace(value))
+ {
+ _logger.LogCritical("The Azure AI Search API key is invalid.");
+ throw new ConfigurationValueException("The Azure AI Search API key is invalid.");
+ }
+ }
+
+ private AzureAISearchMemoryStore CreateMemoryStore()
+ {
+ switch (_settings.AuthenticationType)
+ {
+ case AzureAISearchAuthenticationTypes.APIKey:
+ ValidateEndpoint(_settings.Endpoint);
+ ValidateAPIKey(_settings.APIKey);
+ return CreateMemoryStoreFromAPIKey(_settings.Endpoint, _settings.APIKey!);
+ case AzureAISearchAuthenticationTypes.AzureIdentity:
+ ValidateEndpoint(_settings.Endpoint);
+ return CreateMemoryStoreFromIdentity(_settings.Endpoint);
+ default:
+ throw new InvalidEnumArgumentException($"The authentication type {_settings.AuthenticationType} is not supported.");
+ }
+ }
+ }
+}
diff --git a/src/dotnet/SemanticKernel/Services/SemanticKernelTextEmbeddingService.cs b/src/dotnet/SemanticKernel/Services/SemanticKernelTextEmbeddingService.cs
new file mode 100644
index 0000000000..1ab4983d68
--- /dev/null
+++ b/src/dotnet/SemanticKernel/Services/SemanticKernelTextEmbeddingService.cs
@@ -0,0 +1,127 @@
+using Azure.Identity;
+using FoundationaLLM.Common.Exceptions;
+using FoundationaLLM.Common.Interfaces;
+using FoundationaLLM.Common.Models.TextEmbedding;
+using FoundationaLLM.Common.Settings;
+using FoundationaLLM.SemanticKernel.Core.Models.Configuration;
+using Microsoft.Extensions.Logging;
+using Microsoft.Extensions.Options;
+using Microsoft.SemanticKernel;
+using Microsoft.SemanticKernel.Embeddings;
+using System.ComponentModel;
+
+#pragma warning disable SKEXP0001, SKEXP0011
+
+namespace FoundationaLLM.SemanticKernel.Core.Services
+{
+ ///
+ /// Generates text embeddings using the Semantic Kernel orchestrator.
+ ///
+ public class SemanticKernelTextEmbeddingService : ITextEmbeddingService
+ {
+ private readonly SemanticKernelTextEmbeddingServiceSettings _settings;
+ private readonly ILogger _logger;
+ private readonly Kernel _kernel;
+ private readonly ITextEmbeddingGenerationService _textEmbeddingService;
+
+ ///
+ /// Creates a new instance.
+ ///
+ /// The providing configuration settings.
+ /// The used for logging.
+ public SemanticKernelTextEmbeddingService(
+ IOptions options,
+ ILogger logger)
+ {
+ _settings = options.Value;
+ _logger = logger;
+ _kernel = CreateKernel();
+ _textEmbeddingService = _kernel.GetRequiredService();
+ }
+
+ ///
+ public async Task<(Embedding Embedding, int TokenCount)> GetEmbeddingAsync(string text)
+ {
+ var embedding = await _textEmbeddingService.GenerateEmbeddingAsync(text);
+ return new(new(embedding), 0);
+ }
+
+ ///
+ public async Task<(IList Embeddings, int TokenCount)> GetEmbeddingsAsync(IList texts)
+ {
+ var embeddings = await _textEmbeddingService.GenerateEmbeddingsAsync(texts);
+ return new(embeddings.Select(e => new Embedding(e)).ToList(), 0);
+ }
+
+ ///
+ /// Creates a instance using the deployment name, endpoint, and API key.
+ ///
+ /// The name of the Azure Open AI deployment.
+ /// The endpoint of the Azure Open AI deployment.
+ /// The API key used to connect to the Azure Open AI deployment.
+ /// The instance.
+ private Kernel CreateKernelFromAPIKey(string deploymentName, string endpoint, string apiKey)
+ {
+ var builder = Kernel.CreateBuilder();
+ builder.AddAzureOpenAITextEmbeddingGeneration(deploymentName, endpoint, apiKey);
+ return builder.Build();
+ }
+
+ ///
+ /// Creates a instance using the deployment name, endpoint, and the Azure identity.
+ ///
+ /// The name of the Azure Open AI deployment.
+ /// The endpoint of the Azure Open AI deployment.
+ /// The instance.
+ private Kernel CreateKernelFromIdentity(string deploymentName, string endpoint)
+ {
+ var builder = Kernel.CreateBuilder();
+ builder.AddAzureOpenAITextEmbeddingGeneration(deploymentName, endpoint, new DefaultAzureCredential());
+ return builder.Build();
+ }
+
+ private void ValidateDeploymentName(string? value)
+ {
+ if (string.IsNullOrWhiteSpace(value))
+ {
+ _logger.LogCritical("The Azure Open AI deployment name is invalid.");
+ throw new ConfigurationValueException("The Azure Open AI deployment name is invalid.");
+ }
+ }
+
+ private void ValidateEndpoint(string? value)
+ {
+ if (string.IsNullOrWhiteSpace(value))
+ {
+ _logger.LogCritical("The Azure Open AI endpoint is invalid.");
+ throw new ConfigurationValueException("The Azure Open AI endpoint is invalid.");
+ }
+ }
+ private void ValidateAPIKey(string? value)
+ {
+ if (string.IsNullOrWhiteSpace(value))
+ {
+ _logger.LogCritical("The Azure Open AI API key is invalid.");
+ throw new ConfigurationValueException("The Azure Open AI API key is invalid.");
+ }
+ }
+
+ private Kernel CreateKernel()
+ {
+ switch (_settings.AuthenticationType)
+ {
+ case AzureOpenAIAuthenticationTypes.APIKey:
+ ValidateDeploymentName(_settings.DeploymentName);
+ ValidateEndpoint(_settings.Endpoint);
+ ValidateAPIKey(_settings.APIKey);
+ return CreateKernelFromAPIKey(_settings.DeploymentName, _settings.Endpoint, _settings.APIKey!);
+ case AzureOpenAIAuthenticationTypes.AzureIdentity:
+ ValidateDeploymentName(_settings.DeploymentName);
+ ValidateEndpoint(_settings.Endpoint);
+ return CreateKernelFromIdentity(_settings.DeploymentName, _settings.Endpoint);
+ default:
+ throw new InvalidEnumArgumentException($"The authentication type {_settings.AuthenticationType} is not supported.");
+ }
+ }
+ }
+}
diff --git a/src/dotnet/SemanticKernelAPI/SemanticKernelAPI.csproj b/src/dotnet/SemanticKernelAPI/SemanticKernelAPI.csproj
index 389c777f82..b123838bae 100644
--- a/src/dotnet/SemanticKernelAPI/SemanticKernelAPI.csproj
+++ b/src/dotnet/SemanticKernelAPI/SemanticKernelAPI.csproj
@@ -32,7 +32,7 @@
-
+
diff --git a/src/dotnet/Vectorization/Handlers/EmbeddingHandler.cs b/src/dotnet/Vectorization/Handlers/EmbeddingHandler.cs
index d81dc09343..a51aac61dc 100644
--- a/src/dotnet/Vectorization/Handlers/EmbeddingHandler.cs
+++ b/src/dotnet/Vectorization/Handlers/EmbeddingHandler.cs
@@ -1,8 +1,13 @@
using FoundationaLLM.Common.Constants;
+using FoundationaLLM.Common.Interfaces;
+using FoundationaLLM.Common.Models.TextEmbedding;
+using FoundationaLLM.Vectorization.Exceptions;
using FoundationaLLM.Vectorization.Interfaces;
using FoundationaLLM.Vectorization.Models;
using Microsoft.Extensions.Configuration;
+using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
+using System.Text.Json;
namespace FoundationaLLM.Vectorization.Handlers
{
@@ -35,6 +40,42 @@ protected override async Task ProcessRequest(
VectorizationRequest request,
VectorizationState state,
IConfigurationSection? stepConfiguration,
- CancellationToken cancellationToken) => await Task.Delay(TimeSpan.FromSeconds(10));
+ CancellationToken cancellationToken)
+ {
+ await _stateService.LoadArtifacts(state, VectorizationArtifactType.TextPartition);
+
+ var textPartitioningArtifacts = state.Artifacts.Where(a => a.Type == VectorizationArtifactType.TextPartition).ToList();
+
+ if (textPartitioningArtifacts == null
+ || textPartitioningArtifacts.Count == 0)
+ {
+ state.Log(this, request.Id, _messageId, "The text partition artifacts were not found.");
+ return;
+ }
+
+ var serviceFactory = _serviceProvider.GetService>()
+ ?? throw new VectorizationException($"Could not retrieve the text embedding service factory instance.");
+ var textEmbedding = serviceFactory.GetService(_parameters["text_embedding_profile_name"]);
+
+ var embeddingResult = await textEmbedding.GetEmbeddingsAsync(
+ textPartitioningArtifacts.Select(tpa => tpa.Content!).ToList());
+
+ var position = 0;
+ var serializerOptions = new JsonSerializerOptions
+ {
+ Converters =
+ {
+ new Embedding.JsonConverter()
+ }
+ };
+
+ foreach (var embedding in embeddingResult.Embeddings)
+ state.AddOrReplaceArtifact(new VectorizationArtifact
+ {
+ Type = VectorizationArtifactType.TextEmbeddingVector,
+ Position = ++position,
+ Content = JsonSerializer.Serialize(embedding, serializerOptions)
+ });
+ }
}
}
diff --git a/src/dotnet/Vectorization/Handlers/ExtractionHandler.cs b/src/dotnet/Vectorization/Handlers/ExtractionHandler.cs
index c87d197873..c3ffe5c605 100644
--- a/src/dotnet/Vectorization/Handlers/ExtractionHandler.cs
+++ b/src/dotnet/Vectorization/Handlers/ExtractionHandler.cs
@@ -41,9 +41,9 @@ protected override async Task ProcessRequest(
IConfigurationSection? stepConfiguration,
CancellationToken cancellationToken)
{
- var serviceFactory = _serviceProvider.GetService>()
+ var serviceFactory = _serviceProvider.GetService>()
?? throw new VectorizationException($"Could not retrieve the content source service factory instance.");
- var contentSource = serviceFactory.CreateService(_parameters["content_source_name"]);
+ var contentSource = serviceFactory.GetService(_parameters["content_source_profile_name"]);
var textContent = await contentSource.ExtractTextFromFileAsync(request.ContentIdentifier.MultipartId, cancellationToken);
@@ -53,6 +53,7 @@ protected override async Task ProcessRequest(
Position = 1,
Content = textContent
});
+ state.ContentSourceProfileName = _parameters["content_source_profile_name"];
}
}
}
diff --git a/src/dotnet/Vectorization/Handlers/IndexingHandler.cs b/src/dotnet/Vectorization/Handlers/IndexingHandler.cs
index 096f13d7dc..c63eb6c94b 100644
--- a/src/dotnet/Vectorization/Handlers/IndexingHandler.cs
+++ b/src/dotnet/Vectorization/Handlers/IndexingHandler.cs
@@ -1,8 +1,13 @@
using FoundationaLLM.Common.Constants;
+using FoundationaLLM.Common.Interfaces;
+using FoundationaLLM.Common.Models.TextEmbedding;
+using FoundationaLLM.Vectorization.Exceptions;
using FoundationaLLM.Vectorization.Interfaces;
using FoundationaLLM.Vectorization.Models;
using Microsoft.Extensions.Configuration;
+using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
+using System.Text.Json;
namespace FoundationaLLM.Vectorization.Handlers
{
@@ -35,6 +40,55 @@ protected override async Task ProcessRequest(
VectorizationRequest request,
VectorizationState state,
IConfigurationSection? stepConfiguration,
- CancellationToken cancellationToken) => await Task.Delay(TimeSpan.FromSeconds(10));
+ CancellationToken cancellationToken)
+ {
+ await _stateService.LoadArtifacts(state, VectorizationArtifactType.TextEmbeddingVector);
+
+ var textEmbeddingArtifacts = state.Artifacts.Where(a => a.Type == VectorizationArtifactType.TextEmbeddingVector).ToList();
+
+ if (textEmbeddingArtifacts == null
+ || textEmbeddingArtifacts.Count == 0)
+ {
+ state.Log(this, request.Id, _messageId, "The text partition artifacts were not found.");
+ return;
+ }
+
+ var textPartitioningArtifacts = state.Artifacts.Where(a => a.Type == VectorizationArtifactType.TextPartition).ToList();
+
+ if (textPartitioningArtifacts == null
+ || textPartitioningArtifacts.Count == 0)
+ {
+ state.Log(this, request.Id, _messageId, "The text partition artifacts were not found.");
+ return;
+ }
+
+ var serializerOptions = new JsonSerializerOptions
+ {
+ Converters =
+ {
+ new Embedding.JsonConverter()
+ }
+ };
+
+ var embeddedContent = new EmbeddedContent
+ {
+ ContentId = request.ContentIdentifier,
+ ContentSourceProfileName = state.ContentSourceProfileName!,
+ ContentParts = Enumerable.Range(0, textEmbeddingArtifacts.Count)
+ .Select(i => new EmbeddedContentPart
+ {
+ Content = textPartitioningArtifacts[i].Content!,
+ Embedding = JsonSerializer.Deserialize(textEmbeddingArtifacts[i].Content!, serializerOptions)
+ }).ToList()
+ };
+
+ var serviceFactory = _serviceProvider.GetService>()
+ ?? throw new VectorizationException($"Could not retrieve the indexing service factory instance.");
+ var indexing = serviceFactory.GetServiceWithProfile(_parameters["indexing_profile_name"]);
+
+ await indexing.Service.IndexEmbeddingsAsync(
+ embeddedContent,
+ indexing.VectorizationProfile.Settings!["IndexName"]);
+ }
}
}
diff --git a/src/dotnet/Vectorization/Handlers/PartitionHandler.cs b/src/dotnet/Vectorization/Handlers/PartitionHandler.cs
index ab7300bded..59c9563688 100644
--- a/src/dotnet/Vectorization/Handlers/PartitionHandler.cs
+++ b/src/dotnet/Vectorization/Handlers/PartitionHandler.cs
@@ -53,9 +53,9 @@ protected override async Task ProcessRequest(
return;
}
- var serviceFactory = _serviceProvider.GetService>()
+ var serviceFactory = _serviceProvider.GetService>()
?? throw new VectorizationException($"Could not retrieve the text splitter service factory instance.");
- var textSplitter = serviceFactory.CreateService(_parameters["text_partition_profile_name"]);
+ var textSplitter = serviceFactory.GetService(_parameters["text_partition_profile_name"]);
var splitResult = textSplitter.SplitPlainText(extractedTextArtifact.Content!);
diff --git a/src/dotnet/Vectorization/Handlers/VectorizationStepHandlerBase.cs b/src/dotnet/Vectorization/Handlers/VectorizationStepHandlerBase.cs
index 8e46e6a21b..4031cac567 100644
--- a/src/dotnet/Vectorization/Handlers/VectorizationStepHandlerBase.cs
+++ b/src/dotnet/Vectorization/Handlers/VectorizationStepHandlerBase.cs
@@ -59,8 +59,10 @@ public class VectorizationStepHandlerBase(
public string StepId => _stepId;
///
- public async Task Invoke(VectorizationRequest request, VectorizationState state, CancellationToken cancellationToken)
+ public async Task Invoke(VectorizationRequest request, VectorizationState state, CancellationToken cancellationToken)
{
+ var success = true;
+
try
{
state.LogHandlerStart(this, request.Id, _messageId);
@@ -93,9 +95,12 @@ public async Task Invoke(VectorizationRequest request, VectorizationState state,
}
catch (Exception ex)
{
+ success = false;
state.LogHandlerError(this, request.Id, _messageId, ex);
_logger.LogError(ex, "Error in executing [{HandlerId}] step handler for request {VectorizationRequestId} (message id {MessageId}).", _stepId, request.Id, _messageId);
}
+
+ return success;
}
private void ValidateRequest(VectorizationRequest request)
diff --git a/src/dotnet/Vectorization/Interfaces/IVectorizationStepHandler.cs b/src/dotnet/Vectorization/Interfaces/IVectorizationStepHandler.cs
index d8f6f39682..b80a1f4f7e 100644
--- a/src/dotnet/Vectorization/Interfaces/IVectorizationStepHandler.cs
+++ b/src/dotnet/Vectorization/Interfaces/IVectorizationStepHandler.cs
@@ -19,7 +19,7 @@ public interface IVectorizationStepHandler
/// The for which the step should be handled.
/// The holding the state associated with the vectorization request.
/// The to monitor for cancellation requests.
- ///
- Task Invoke(VectorizationRequest request, VectorizationState state, CancellationToken cancellationToken);
+ /// True if the vectorization step request was handled successfully.
+ Task Invoke(VectorizationRequest request, VectorizationState state, CancellationToken cancellationToken);
}
}
diff --git a/src/dotnet/Vectorization/Models/Resources/ContentSource.cs b/src/dotnet/Vectorization/Models/Resources/ContentSourceProfile.cs
similarity index 63%
rename from src/dotnet/Vectorization/Models/Resources/ContentSource.cs
rename to src/dotnet/Vectorization/Models/Resources/ContentSourceProfile.cs
index cc3813cbd4..149e92e975 100644
--- a/src/dotnet/Vectorization/Models/Resources/ContentSource.cs
+++ b/src/dotnet/Vectorization/Models/Resources/ContentSourceProfile.cs
@@ -1,17 +1,13 @@
-using System.Text.Json.Serialization;
+using FoundationaLLM.Common.Models.Vectorization;
+using System.Text.Json.Serialization;
namespace FoundationaLLM.Vectorization.Models.Resources
{
///
/// Provides detials about a content source.
///
- public class ContentSource
+ public class ContentSourceProfile : VectorizationProfileBase
{
- ///
- /// The name of the content source.
- ///
- public required string Name { get; set; }
-
///
/// The type of the content source.
///
diff --git a/src/dotnet/Vectorization/Models/Resources/ContentSourceStore.cs b/src/dotnet/Vectorization/Models/Resources/ContentSourceStore.cs
index fab5b72937..1f4d9b9e4a 100644
--- a/src/dotnet/Vectorization/Models/Resources/ContentSourceStore.cs
+++ b/src/dotnet/Vectorization/Models/Resources/ContentSourceStore.cs
@@ -11,6 +11,6 @@ public class ContentSourceStore
///
/// The list of all content sources that are registered for use by the vectorization pipelines.
///
- public required List ContentSources { get; set; }
+ public required List ContentSourceProfiles { get; set; }
}
}
diff --git a/src/dotnet/Vectorization/Models/Resources/IndexerType.cs b/src/dotnet/Vectorization/Models/Resources/IndexerType.cs
new file mode 100644
index 0000000000..de45c3624b
--- /dev/null
+++ b/src/dotnet/Vectorization/Models/Resources/IndexerType.cs
@@ -0,0 +1,13 @@
+namespace FoundationaLLM.Vectorization.Models.Resources
+{
+ ///
+ /// Types of vectori indexes available to store embeddings.
+ ///
+ public enum IndexerType
+ {
+ ///
+ /// Indexer using Azure AI Search vector indexes.
+ ///
+ AzureAISearchIndexer
+ }
+}
diff --git a/src/dotnet/Vectorization/Models/Resources/IndexingProfile.cs b/src/dotnet/Vectorization/Models/Resources/IndexingProfile.cs
new file mode 100644
index 0000000000..add495b8a7
--- /dev/null
+++ b/src/dotnet/Vectorization/Models/Resources/IndexingProfile.cs
@@ -0,0 +1,17 @@
+using FoundationaLLM.Common.Models.Vectorization;
+using System.Text.Json.Serialization;
+
+namespace FoundationaLLM.Vectorization.Models.Resources
+{
+ ///
+ /// Provides details about an indexing profile.
+ ///
+ public class IndexingProfile : VectorizationProfileBase
+ {
+ ///
+ /// The type of the indexer.
+ ///
+ [JsonConverter(typeof(JsonStringEnumConverter))]
+ public required IndexerType Indexer { get; set; }
+ }
+}
diff --git a/src/dotnet/Vectorization/Models/Resources/IndexingProfileStore.cs b/src/dotnet/Vectorization/Models/Resources/IndexingProfileStore.cs
new file mode 100644
index 0000000000..583fa0ad6b
--- /dev/null
+++ b/src/dotnet/Vectorization/Models/Resources/IndexingProfileStore.cs
@@ -0,0 +1,13 @@
+namespace FoundationaLLM.Vectorization.Models.Resources
+{
+ ///
+ /// Models the content of the indexing profiles store managed by the FoundationaLLM.Vectorization resource provider.
+ ///
+ public class IndexingProfileStore
+ {
+ ///
+ /// The list of all indexing profiles that are registered for use by the vectorization pipelines.
+ ///
+ public required List IndexingProfiles { get; set; }
+ }
+}
diff --git a/src/dotnet/Vectorization/Models/Resources/TextEmbeddingProfile.cs b/src/dotnet/Vectorization/Models/Resources/TextEmbeddingProfile.cs
new file mode 100644
index 0000000000..68b4e1db85
--- /dev/null
+++ b/src/dotnet/Vectorization/Models/Resources/TextEmbeddingProfile.cs
@@ -0,0 +1,17 @@
+using FoundationaLLM.Common.Models.Vectorization;
+using System.Text.Json.Serialization;
+
+namespace FoundationaLLM.Vectorization.Models.Resources
+{
+ ///
+ /// Provides details about a text embedding profile.
+ ///
+ public class TextEmbeddingProfile : VectorizationProfileBase
+ {
+ ///
+ /// The type of the text splitter.
+ ///
+ [JsonConverter(typeof(JsonStringEnumConverter))]
+ public required TextEmbeddingType TextEmbedding { get; set; }
+ }
+}
diff --git a/src/dotnet/Vectorization/Models/Resources/TextEmbeddingProfileStore.cs b/src/dotnet/Vectorization/Models/Resources/TextEmbeddingProfileStore.cs
new file mode 100644
index 0000000000..3d293cb8cb
--- /dev/null
+++ b/src/dotnet/Vectorization/Models/Resources/TextEmbeddingProfileStore.cs
@@ -0,0 +1,13 @@
+namespace FoundationaLLM.Vectorization.Models.Resources
+{
+ ///
+ /// Models the content of the text embedding profiles store managed by the FoundationaLLM.Vectorization resource provider.
+ ///
+ public class TextEmbeddingProfileStore
+ {
+ ///
+ /// The list of all embedding profiles that are registered for use by the vectorization pipelines.
+ ///
+ public required List TextEmbeddingProfiles { get; set; }
+ }
+}
diff --git a/src/dotnet/Vectorization/Models/Resources/TextEmbeddingType.cs b/src/dotnet/Vectorization/Models/Resources/TextEmbeddingType.cs
new file mode 100644
index 0000000000..116a1b1ad2
--- /dev/null
+++ b/src/dotnet/Vectorization/Models/Resources/TextEmbeddingType.cs
@@ -0,0 +1,19 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace FoundationaLLM.Vectorization.Models.Resources
+{
+ ///
+ /// Types of text embeddings available for text embedding.
+ ///
+ public enum TextEmbeddingType
+ {
+ ///
+ /// Text embedding that uses Semantic Kernel to embed text.
+ ///
+ SemanticKernelTextEmbedding
+ }
+}
diff --git a/src/dotnet/Vectorization/Models/Resources/TextPartitionProfile.cs b/src/dotnet/Vectorization/Models/Resources/TextPartitionProfile.cs
deleted file mode 100644
index 9932654ab2..0000000000
--- a/src/dotnet/Vectorization/Models/Resources/TextPartitionProfile.cs
+++ /dev/null
@@ -1,26 +0,0 @@
-using System.Text.Json.Serialization;
-
-namespace FoundationaLLM.Vectorization.Models.Resources
-{
- ///
- /// Provides details about a text partitioning profile.
- ///
- public class TextPartitionProfile
- {
- ///
- /// The name of the text partitioning profile.
- ///
- public required string Name { get; set; }
-
- ///
- /// The type of the text splitter.
- ///
- [JsonConverter(typeof(JsonStringEnumConverter))]
- public required TextSplitterType TextSplitter { get; set; }
-
- ///
- /// The settings used to configure the text splitter.
- ///
- public Dictionary? TextSplitterSettings { get; set; }
- }
-}
diff --git a/src/dotnet/Vectorization/Models/Resources/TextPartitioningProfile.cs b/src/dotnet/Vectorization/Models/Resources/TextPartitioningProfile.cs
new file mode 100644
index 0000000000..18cbcd3914
--- /dev/null
+++ b/src/dotnet/Vectorization/Models/Resources/TextPartitioningProfile.cs
@@ -0,0 +1,17 @@
+using FoundationaLLM.Common.Models.Vectorization;
+using System.Text.Json.Serialization;
+
+namespace FoundationaLLM.Vectorization.Models.Resources
+{
+ ///
+ /// Provides details about a text partitioning profile.
+ ///
+ public class TextPartitioningProfile : VectorizationProfileBase
+ {
+ ///
+ /// The type of the text splitter.
+ ///
+ [JsonConverter(typeof(JsonStringEnumConverter))]
+ public required TextSplitterType TextSplitter { get; set; }
+ }
+}
diff --git a/src/dotnet/Vectorization/Models/Resources/TextPartitionProfileStore.cs b/src/dotnet/Vectorization/Models/Resources/TextPartitioningProfileStore.cs
similarity index 73%
rename from src/dotnet/Vectorization/Models/Resources/TextPartitionProfileStore.cs
rename to src/dotnet/Vectorization/Models/Resources/TextPartitioningProfileStore.cs
index e91e28d46a..1997d8d412 100644
--- a/src/dotnet/Vectorization/Models/Resources/TextPartitionProfileStore.cs
+++ b/src/dotnet/Vectorization/Models/Resources/TextPartitioningProfileStore.cs
@@ -3,11 +3,11 @@
///
/// Models the content of the text partition profiles store managed by the FoundationaLLM.Vectorization resource provider.
///
- public class TextPartitionProfileStore
+ public class TextPartitioningProfileStore
{
///
/// The list of all partition profiles that are registered for use by the vectorization pipelines.
///
- public required List TextPartitioningProfiles { get; set; }
+ public required List TextPartitioningProfiles { get; set; }
}
}
diff --git a/src/dotnet/Vectorization/Models/Resources/TextSplitterType.cs b/src/dotnet/Vectorization/Models/Resources/TextSplitterType.cs
index c4d6d57a8b..23f2d9eb21 100644
--- a/src/dotnet/Vectorization/Models/Resources/TextSplitterType.cs
+++ b/src/dotnet/Vectorization/Models/Resources/TextSplitterType.cs
@@ -1,10 +1,4 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
-
-namespace FoundationaLLM.Vectorization.Models.Resources
+namespace FoundationaLLM.Vectorization.Models.Resources
{
///
/// Types of text splitters available for text partitioning.
diff --git a/src/dotnet/Vectorization/Models/VectorizationContentIdentifier.cs b/src/dotnet/Vectorization/Models/VectorizationContentIdentifier.cs
deleted file mode 100644
index f5b54bdbc2..0000000000
--- a/src/dotnet/Vectorization/Models/VectorizationContentIdentifier.cs
+++ /dev/null
@@ -1,37 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Text.Json.Serialization;
-using System.Threading.Tasks;
-
-namespace FoundationaLLM.Vectorization.Models
-{
- ///
- /// Represents the content associated with a vectorization request.
- ///
- public class VectorizationContentIdentifier
- {
- ///
- /// The multipart unique identifier of the the content (i.e. document) being vectorized.
- ///
- [JsonPropertyOrder(1)]
- [JsonPropertyName("multipart_id")]
- public required List MultipartId { get; set; }
-
- ///
- /// The unique identifier of the content (i.e., document) being vectorized.
- /// The identifier is determined by concatenating the parts from .
- ///
- [JsonIgnore]
- public string UniqueId => string.Join("/", MultipartId);
-
- ///
- /// The canonical identifier of the content being vectorized.
- /// Vectorization state services use it to derive the location of the state in the underlying storage.
- ///
- [JsonPropertyOrder(2)]
- [JsonPropertyName("canonical_id")]
- public required string CanonicalId { get; set; }
- }
-}
diff --git a/src/dotnet/Vectorization/Models/VectorizationRequest.cs b/src/dotnet/Vectorization/Models/VectorizationRequest.cs
index 8493999edf..a7fa477784 100644
--- a/src/dotnet/Vectorization/Models/VectorizationRequest.cs
+++ b/src/dotnet/Vectorization/Models/VectorizationRequest.cs
@@ -1,4 +1,5 @@
-using FoundationaLLM.Vectorization.Exceptions;
+using FoundationaLLM.Common.Models.TextEmbedding;
+using FoundationaLLM.Vectorization.Exceptions;
using System.Text.Json.Serialization;
namespace FoundationaLLM.Vectorization.Models
@@ -17,11 +18,11 @@ public class VectorizationRequest
public required string Id { get; set; }
///
- /// The object identifying the content being vectorized.
+ /// The object identifying the content being vectorized.
///
[JsonPropertyOrder(1)]
[JsonPropertyName("content_identifier")]
- public required VectorizationContentIdentifier ContentIdentifier { get; set; }
+ public required ContentIdentifier ContentIdentifier { get; set; }
///
/// The list of vectorization steps requested by the vectorization request.
diff --git a/src/dotnet/Vectorization/Models/VectorizationState.cs b/src/dotnet/Vectorization/Models/VectorizationState.cs
index fabc2da0ae..172c14b9f3 100644
--- a/src/dotnet/Vectorization/Models/VectorizationState.cs
+++ b/src/dotnet/Vectorization/Models/VectorizationState.cs
@@ -1,11 +1,6 @@
-using FoundationaLLM.Vectorization.Interfaces;
-using Microsoft.Extensions.Hosting;
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
+using FoundationaLLM.Common.Models.TextEmbedding;
+using FoundationaLLM.Vectorization.Interfaces;
using System.Text.Json.Serialization;
-using System.Threading.Tasks;
namespace FoundationaLLM.Vectorization.Models
{
@@ -24,11 +19,11 @@ public class VectorizationState
public required string CurrentRequestId { get; set; }
///
- /// The object identifying the content being vectorized.
+ /// The object identifying the content being vectorized.
///
[JsonPropertyOrder(1)]
[JsonPropertyName("content_identifier")]
- public required VectorizationContentIdentifier ContentIdentifier { get; set; }
+ public required ContentIdentifier ContentIdentifier { get; set; }
///
/// The vectorization artifacts associated with the vectorization state.
@@ -37,6 +32,13 @@ public class VectorizationState
[JsonPropertyName("artifacts")]
public List Artifacts { get; set; } = [];
+ ///
+ /// The name of the content source profile.
+ ///
+ [JsonPropertyOrder(3)]
+ [JsonPropertyName("content_source_profile_name")]
+ public string? ContentSourceProfileName { get; set; }
+
///
/// The list of log entries associated with actions executed by the vectorization pipeline.
///
diff --git a/src/dotnet/Vectorization/ResourceProviders/VectorizationResourceProviderService.cs b/src/dotnet/Vectorization/ResourceProviders/VectorizationResourceProviderService.cs
index c3de1bf364..a9c47bbb0f 100644
--- a/src/dotnet/Vectorization/ResourceProviders/VectorizationResourceProviderService.cs
+++ b/src/dotnet/Vectorization/ResourceProviders/VectorizationResourceProviderService.cs
@@ -21,11 +21,15 @@ public class VectorizationResourceProviderService(
storageService,
logger)
{
- private Dictionary _contentSources = [];
- private Dictionary _textPartitionProfiles = [];
+ private Dictionary _contentSourceProfiles = [];
+ private Dictionary _textPartitioningProfiles = [];
+ private Dictionary _textEmbeddingProfiles = [];
+ private Dictionary _indexingProfiles = [];
- private const string CONTENT_SOURCES_FILE_NAME = "vectorization-content-sources.json";
- private const string TEXT_PARTITION_PROFILES_FILE_NAME = "vectorization-text-partition-profiles.json";
+ private const string CONTENT_SOURCE_PROFILES_FILE_NAME = "vectorization-content-source-profiles.json";
+ private const string TEXT_PARTITION_PROFILES_FILE_NAME = "vectorization-text-partitioning-profiles.json";
+ private const string TEXT_EMBEDDING_PROFILES_FILE_NAME = "vectorization-text-embedding-profiles.json";
+ private const string INDEXING_PROFILES_FILE_NAME = "vectorization-indexing-profiles.json";
///
protected override string _name => ResourceProviderNames.FoundationaLLM_Vectorization;
@@ -34,12 +38,20 @@ public class VectorizationResourceProviderService(
protected override Dictionary _resourceTypes => new Dictionary
{
{
- VectorizationResourceTypeNames.ContentSources,
- new ResourceTypeDescriptor(VectorizationResourceTypeNames.ContentSources)
+ VectorizationResourceTypeNames.ContentSourceProfiles,
+ new ResourceTypeDescriptor(VectorizationResourceTypeNames.ContentSourceProfiles)
},
{
- VectorizationResourceTypeNames.TextPartitionProfiles,
- new ResourceTypeDescriptor(VectorizationResourceTypeNames.TextPartitionProfiles)
+ VectorizationResourceTypeNames.TextPartitioningProfiles,
+ new ResourceTypeDescriptor(VectorizationResourceTypeNames.TextPartitioningProfiles)
+ },
+ {
+ VectorizationResourceTypeNames.TextEmbeddingProfiles,
+ new ResourceTypeDescriptor(VectorizationResourceTypeNames.TextEmbeddingProfiles)
+ },
+ {
+ VectorizationResourceTypeNames.IndexingProfiles,
+ new ResourceTypeDescriptor(VectorizationResourceTypeNames.IndexingProfiles)
}
};
@@ -48,25 +60,45 @@ protected override async Task InitializeInternal()
{
_logger.LogInformation("Starting to initialize the {ResourceProvider} resource provider...", _name);
- var contentSourcesFilePath = $"/{_name}/{CONTENT_SOURCES_FILE_NAME}";
+ var contentSourceProfilesFilePath = $"/{_name}/{CONTENT_SOURCE_PROFILES_FILE_NAME}";
var partitionProfilesFilePath = $"/{_name}/{TEXT_PARTITION_PROFILES_FILE_NAME}";
+ var embeddingProfilesPath = $"/{_name}/{TEXT_EMBEDDING_PROFILES_FILE_NAME}";
+ var indexingProfilesPath = $"/{_name}/{INDEXING_PROFILES_FILE_NAME}";
- if (await _storageService.FileExistsAsync(_storageContainerName, contentSourcesFilePath, default))
+ if (await _storageService.FileExistsAsync(_storageContainerName, contentSourceProfilesFilePath, default))
{
- var fileContent = await _storageService.ReadFileAsync(_storageContainerName, contentSourcesFilePath, default);
- var contentSourcesStore = JsonConvert.DeserializeObject(
+ var fileContent = await _storageService.ReadFileAsync(_storageContainerName, contentSourceProfilesFilePath, default);
+ var contentSourceProfilesStore = JsonConvert.DeserializeObject(
Encoding.UTF8.GetString(fileContent.ToArray()));
- _contentSources = contentSourcesStore!.ContentSources.ToDictionary(cs => cs.Name);
+ _contentSourceProfiles = contentSourceProfilesStore!.ContentSourceProfiles.ToDictionary(cs => cs.Name);
}
if (await _storageService.FileExistsAsync(_storageContainerName, partitionProfilesFilePath, default))
{
var fileContent = await _storageService.ReadFileAsync(_storageContainerName, partitionProfilesFilePath, default);
- var textPartitionProfileStore = JsonConvert.DeserializeObject(
+ var textPartitionProfileStore = JsonConvert.DeserializeObject(
+ Encoding.UTF8.GetString(fileContent.ToArray()));
+
+ _textPartitioningProfiles = textPartitionProfileStore!.TextPartitioningProfiles.ToDictionary(tpp => tpp.Name);
+ }
+
+ if (await _storageService.FileExistsAsync(_storageContainerName, embeddingProfilesPath, default))
+ {
+ var fileContent = await _storageService.ReadFileAsync(_storageContainerName, embeddingProfilesPath, default);
+ var textEmbeddingProfileStore = JsonConvert.DeserializeObject(
+ Encoding.UTF8.GetString(fileContent.ToArray()));
+
+ _textEmbeddingProfiles = textEmbeddingProfileStore!.TextEmbeddingProfiles.ToDictionary(tep => tep.Name);
+ }
+
+ if (await _storageService.FileExistsAsync(_storageContainerName, indexingProfilesPath, default))
+ {
+ var fileContent = await _storageService.ReadFileAsync(_storageContainerName, indexingProfilesPath, default);
+ var indexingProfileStore = JsonConvert.DeserializeObject(
Encoding.UTF8.GetString(fileContent.ToArray()));
- _textPartitionProfiles = textPartitionProfileStore!.TextPartitioningProfiles.ToDictionary(cs => cs.Name);
+ _indexingProfiles = indexingProfileStore!.IndexingProfiles.ToDictionary(ip => ip.Name);
}
_logger.LogInformation("The {ResourceProvider} resource provider was successfully initialized.", _name);
@@ -76,34 +108,62 @@ protected override async Task InitializeInternal()
protected override T GetResourceInternal(List instances) where T: class =>
instances[0].ResourceType switch
{
- VectorizationResourceTypeNames.ContentSources => GetContentSource(instances),
- VectorizationResourceTypeNames.TextPartitionProfiles => GetPartitionProfile(instances),
+ VectorizationResourceTypeNames.ContentSourceProfiles => GetContentSourceProfiles(instances),
+ VectorizationResourceTypeNames.TextPartitioningProfiles => GetTextPartitioningProfile(instances),
+ VectorizationResourceTypeNames.TextEmbeddingProfiles => GetTextEmbeddingProfile(instances),
+ VectorizationResourceTypeNames.IndexingProfiles => GetIndexingProfile(instances),
_ => throw new ResourceProviderException($"The resource type {instances[0].ResourceType} is not supported by the {_name} resource manager.")
};
- private T GetContentSource(List instances) where T: class
+ private T GetContentSourceProfiles(List instances) where T: class
{
if (instances.Count != 1)
throw new ResourceProviderException($"Invalid resource path");
- if (typeof(T) != typeof(ContentSource))
+ if (typeof(T) != typeof(ContentSourceProfile))
throw new ResourceProviderException($"The type of requested resource ({typeof(T)}) does not match the resource type specified in the path ({instances[0].ResourceType}).");
- _contentSources.TryGetValue(instances[0].ResourceId!, out var contentSource);
+ _contentSourceProfiles.TryGetValue(instances[0].ResourceId!, out var contentSource);
return contentSource as T
?? throw new ResourceProviderException($"The resource {instances[0].ResourceId!} of type {instances[0].ResourceType} was not found.");
}
- private T GetPartitionProfile(List instances) where T: class
+ private T GetTextPartitioningProfile(List instances) where T: class
+ {
+ if (instances.Count != 1)
+ throw new ResourceProviderException($"Invalid resource path");
+
+ if (typeof(T) != typeof(TextPartitioningProfile))
+ throw new ResourceProviderException($"The type of requested resource ({typeof(T)}) does not match the resource type specified in the path ({instances[0].ResourceType}).");
+
+ _textPartitioningProfiles.TryGetValue(instances[0].ResourceId!, out var textPartitioningProfile);
+ return textPartitioningProfile as T
+ ?? throw new ResourceProviderException($"The resource {instances[0].ResourceId!} of type {instances[0].ResourceType} was not found.");
+ }
+
+ private T GetTextEmbeddingProfile(List instances) where T : class
+ {
+ if (instances.Count != 1)
+ throw new ResourceProviderException($"Invalid resource path");
+
+ if (typeof(T) != typeof(TextEmbeddingProfile))
+ throw new ResourceProviderException($"The type of requested resource ({typeof(T)}) does not match the resource type specified in the path ({instances[0].ResourceType}).");
+
+ _textEmbeddingProfiles.TryGetValue(instances[0].ResourceId!, out var textEmbeddingProfile);
+ return textEmbeddingProfile as T
+ ?? throw new ResourceProviderException($"The resource {instances[0].ResourceId!} of type {instances[0].ResourceType} was not found.");
+ }
+
+ private T GetIndexingProfile(List instances) where T : class
{
if (instances.Count != 1)
throw new ResourceProviderException($"Invalid resource path");
- if (typeof(T) != typeof(TextPartitionProfile))
+ if (typeof(T) != typeof(IndexingProfile))
throw new ResourceProviderException($"The type of requested resource ({typeof(T)}) does not match the resource type specified in the path ({instances[0].ResourceType}).");
- _textPartitionProfiles.TryGetValue(instances[0].ResourceId!, out var partitionProfile);
- return partitionProfile as T
+ _indexingProfiles.TryGetValue(instances[0].ResourceId!, out var indexingProfile);
+ return indexingProfile as T
?? throw new ResourceProviderException($"The resource {instances[0].ResourceId!} of type {instances[0].ResourceType} was not found.");
}
}
diff --git a/src/dotnet/Vectorization/ResourceProviders/VectorizationResourceTypeNames.cs b/src/dotnet/Vectorization/ResourceProviders/VectorizationResourceTypeNames.cs
index 6ea6fc1fd4..68293111ea 100644
--- a/src/dotnet/Vectorization/ResourceProviders/VectorizationResourceTypeNames.cs
+++ b/src/dotnet/Vectorization/ResourceProviders/VectorizationResourceTypeNames.cs
@@ -14,11 +14,21 @@ public static class VectorizationResourceTypeNames
///
/// Vectorization content sources.
///
- public const string ContentSources = "contentSources";
+ public const string ContentSourceProfiles = "contentsourceprofiles";
///
/// Text partitioning profiles.
///
- public const string TextPartitionProfiles = "textPartitionProfiles";
+ public const string TextPartitioningProfiles = "textpartitionprofiles";
+
+ ///
+ /// Text embedding profiles.
+ ///
+ public const string TextEmbeddingProfiles = "textembeddingprofiles";
+
+ ///
+ /// Indexing profiles.
+ ///
+ public const string IndexingProfiles = "indexingprofiles";
}
}
diff --git a/src/dotnet/Vectorization/Services/ContentSources/ContentSourceServiceFactory.cs b/src/dotnet/Vectorization/Services/ContentSources/ContentSourceServiceFactory.cs
index ddcdf8a39a..bc1781f1dc 100644
--- a/src/dotnet/Vectorization/Services/ContentSources/ContentSourceServiceFactory.cs
+++ b/src/dotnet/Vectorization/Services/ContentSources/ContentSourceServiceFactory.cs
@@ -1,5 +1,6 @@
using FoundationaLLM.Common.Constants;
using FoundationaLLM.Common.Interfaces;
+using FoundationaLLM.Common.Models.Vectorization;
using FoundationaLLM.Common.Settings;
using FoundationaLLM.Vectorization.Exceptions;
using FoundationaLLM.Vectorization.Interfaces;
@@ -27,25 +28,39 @@ namespace FoundationaLLM.Vectorization.Services.ContentSources
public class ContentSourceServiceFactory(
[FromKeyedServices(DependencyInjectionKeys.FoundationaLLM_Vectorization_ResourceProviderService)] IResourceProviderService vectorizationResourceProviderService,
IConfiguration configuration,
- ILoggerFactory loggerFactory) : IServiceFactory
+ ILoggerFactory loggerFactory) : IVectorizationServiceFactory
{
private readonly IResourceProviderService _vectorizationResourceProviderService = vectorizationResourceProviderService;
private readonly IConfiguration _configuration = configuration;
private readonly ILoggerFactory _loggerFactory = loggerFactory;
///
- public IContentSourceService CreateService(string serviceName)
+ public IContentSourceService GetService(string serviceName)
{
- var contentSource = _vectorizationResourceProviderService.GetResource(
- $"/{VectorizationResourceTypeNames.ContentSources}/{serviceName}");
+ var contentSourceProfile = _vectorizationResourceProviderService.GetResource(
+ $"/{VectorizationResourceTypeNames.ContentSourceProfiles}/{serviceName}");
- return contentSource.Type switch
+ return contentSourceProfile.Type switch
{
ContentSourceType.AzureDataLake => CreateAzureDataLakeContentSourceService(serviceName),
- _ => throw new VectorizationException($"The content source type {contentSource.Type} is not supported."),
+ _ => throw new VectorizationException($"The content source type {contentSourceProfile.Type} is not supported."),
};
}
+ ///
+ public (IContentSourceService Service, VectorizationProfileBase VectorizationProfile) GetServiceWithProfile(string serviceName)
+ {
+ var contentSourceProfile = _vectorizationResourceProviderService.GetResource(
+ $"/{VectorizationResourceTypeNames.ContentSourceProfiles}/{serviceName}");
+
+ return contentSourceProfile.Type switch
+ {
+ ContentSourceType.AzureDataLake => (CreateAzureDataLakeContentSourceService(serviceName), contentSourceProfile),
+ _ => throw new VectorizationException($"The content source type {contentSourceProfile.Type} is not supported."),
+ };
+ }
+
+
private DataLakeContentSourceService CreateAzureDataLakeContentSourceService(string serviceName)
{
var blobStorageServiceSettings = new BlobStorageServiceSettings { AuthenticationType = BlobStorageAuthenticationTypes.Unknown };
diff --git a/src/dotnet/Vectorization/Services/RequestManagerService.cs b/src/dotnet/Vectorization/Services/RequestManagerService.cs
index 55e92f1f4f..52f3a181d3 100644
--- a/src/dotnet/Vectorization/Services/RequestManagerService.cs
+++ b/src/dotnet/Vectorization/Services/RequestManagerService.cs
@@ -108,10 +108,12 @@ private async Task ProcessRequest(VectorizationRequest request, string messageId
{
try
{
- await HandleRequest(request, messageId).ConfigureAwait(false);
-
- await _incomingRequestSourceService.DeleteRequest(messageId, popReceipt).ConfigureAwait(false);
- await AdvanceRequest(request).ConfigureAwait(false);
+ if (await HandleRequest(request, messageId).ConfigureAwait(false))
+ {
+ // If the request was handled successfully, remove it from the current source and advance it to the next step.
+ await _incomingRequestSourceService.DeleteRequest(messageId, popReceipt).ConfigureAwait(false);
+ await AdvanceRequest(request).ConfigureAwait(false);
+ }
}
catch (Exception ex)
{
@@ -119,7 +121,7 @@ private async Task ProcessRequest(VectorizationRequest request, string messageId
}
}
- private async Task HandleRequest(VectorizationRequest request, string messageId)
+ private async Task HandleRequest(VectorizationRequest request, string messageId)
{
var state = await _vectorizationStateService.HasState(request).ConfigureAwait(false)
? await _vectorizationStateService.ReadState(request).ConfigureAwait(false)
@@ -133,9 +135,11 @@ private async Task HandleRequest(VectorizationRequest request, string messageId)
_vectorizationStateService,
_serviceProvider,
_loggerFactory);
- await stepHandler.Invoke(request, state, _cancellationToken).ConfigureAwait(false);
+ var handlerSuccess = await stepHandler.Invoke(request, state, _cancellationToken).ConfigureAwait(false);
await _vectorizationStateService.SaveState(state).ConfigureAwait(false);
+
+ return handlerSuccess;
}
private async Task AdvanceRequest(VectorizationRequest request)
diff --git a/src/dotnet/Vectorization/Services/Text/IndexingServiceFactory.cs b/src/dotnet/Vectorization/Services/Text/IndexingServiceFactory.cs
new file mode 100644
index 0000000000..36f27c712c
--- /dev/null
+++ b/src/dotnet/Vectorization/Services/Text/IndexingServiceFactory.cs
@@ -0,0 +1,68 @@
+using FoundationaLLM.Common.Constants;
+using FoundationaLLM.Common.Interfaces;
+using FoundationaLLM.Common.Models.Vectorization;
+using FoundationaLLM.Vectorization.Exceptions;
+using FoundationaLLM.Vectorization.Models.Resources;
+using FoundationaLLM.Vectorization.ResourceProviders;
+using Microsoft.Extensions.Configuration;
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.Extensions.Logging;
+
+namespace FoundationaLLM.Vectorization.Services.Text
+{
+ ///
+ /// Creates text splitter service instances.
+ ///
+ /// The vectorization resource provider service.
+ /// The global configuration provider.
+ /// The providing dependency injection services.
+ /// The logger factory used to create loggers.
+ public class IndexingServiceFactory(
+ [FromKeyedServices(DependencyInjectionKeys.FoundationaLLM_Vectorization_ResourceProviderService)] IResourceProviderService vectorizationResourceProviderService,
+ IConfiguration configuration,
+ IServiceProvider serviceProvider,
+ ILoggerFactory loggerFactory) : IVectorizationServiceFactory
+ {
+ private readonly IResourceProviderService _vectorizationResourceProviderService = vectorizationResourceProviderService;
+ private readonly IConfiguration _configuration = configuration;
+ private readonly IServiceProvider _serviceProvider = serviceProvider;
+ private readonly ILoggerFactory _loggerFactory = loggerFactory;
+
+ ///
+ public IIndexingService GetService(string serviceName)
+ {
+ var indexingProfile = _vectorizationResourceProviderService.GetResource(
+ $"/{VectorizationResourceTypeNames.IndexingProfiles}/{serviceName}");
+
+ return indexingProfile.Indexer switch
+ {
+ IndexerType.AzureAISearchIndexer => CreateAzureAISearchIndexingService(
+ indexingProfile.Settings!["IndexName"]),
+ _ => throw new VectorizationException($"The text embedding type {indexingProfile.Indexer} is not supported."),
+ };
+ }
+
+ ///
+ public (IIndexingService Service, VectorizationProfileBase VectorizationProfile) GetServiceWithProfile(string serviceName)
+ {
+ var indexingProfile = _vectorizationResourceProviderService.GetResource(
+ $"/{VectorizationResourceTypeNames.IndexingProfiles}/{serviceName}");
+
+ return indexingProfile.Indexer switch
+ {
+ IndexerType.AzureAISearchIndexer => (CreateAzureAISearchIndexingService(
+ indexingProfile.Settings!["IndexName"]), indexingProfile),
+ _ => throw new VectorizationException($"The text embedding type {indexingProfile.Indexer} is not supported."),
+ };
+ }
+
+ private IIndexingService CreateAzureAISearchIndexingService(string indexName)
+ {
+ var indexingService = _serviceProvider.GetKeyedService(
+ DependencyInjectionKeys.FoundationaLLM_Vectorization_AzureAISearchIndexingService)
+ ?? throw new VectorizationException($"Could not retrieve the Azure AI Search indexing service instance.");
+
+ return indexingService!;
+ }
+ }
+}
diff --git a/src/dotnet/Vectorization/Services/Text/TextEmbeddingServiceFactory.cs b/src/dotnet/Vectorization/Services/Text/TextEmbeddingServiceFactory.cs
new file mode 100644
index 0000000000..8550a5d2d8
--- /dev/null
+++ b/src/dotnet/Vectorization/Services/Text/TextEmbeddingServiceFactory.cs
@@ -0,0 +1,70 @@
+using FoundationaLLM.Common.Constants;
+using FoundationaLLM.Common.Interfaces;
+using FoundationaLLM.Common.Models.Configuration.Text;
+using FoundationaLLM.Common.Models.Vectorization;
+using FoundationaLLM.Common.Services.TextSplitters;
+using FoundationaLLM.SemanticKernel.Core.Services;
+using FoundationaLLM.Vectorization.Exceptions;
+using FoundationaLLM.Vectorization.Models.Resources;
+using FoundationaLLM.Vectorization.ResourceProviders;
+using Microsoft.Extensions.Configuration;
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.Extensions.Logging;
+using Microsoft.Extensions.Options;
+
+namespace FoundationaLLM.Vectorization.Services.Text
+{
+ ///
+ /// Creates text splitter service instances.
+ ///
+ /// The vectorization resource provider service.
+ /// The global configuration provider.
+ /// The providing dependency injection services.
+ /// The logger factory used to create loggers.
+ public class TextEmbeddingServiceFactory(
+ [FromKeyedServices(DependencyInjectionKeys.FoundationaLLM_Vectorization_ResourceProviderService)] IResourceProviderService vectorizationResourceProviderService,
+ IConfiguration configuration,
+ IServiceProvider serviceProvider,
+ ILoggerFactory loggerFactory) : IVectorizationServiceFactory
+ {
+ private readonly IResourceProviderService _vectorizationResourceProviderService = vectorizationResourceProviderService;
+ private readonly IConfiguration _configuration = configuration;
+ private readonly IServiceProvider _serviceProvider = serviceProvider;
+ private readonly ILoggerFactory _loggerFactory = loggerFactory;
+
+ ///
+ public ITextEmbeddingService GetService(string serviceName)
+ {
+ var textEmbeddingProfile = _vectorizationResourceProviderService.GetResource(
+ $"/{VectorizationResourceTypeNames.TextEmbeddingProfiles}/{serviceName}");
+
+ return textEmbeddingProfile.TextEmbedding switch
+ {
+ TextEmbeddingType.SemanticKernelTextEmbedding => CreateSemanticKernelTextEmbeddingService(),
+ _ => throw new VectorizationException($"The text embedding type {textEmbeddingProfile.TextEmbedding} is not supported."),
+ };
+ }
+
+ ///
+ public (ITextEmbeddingService Service, VectorizationProfileBase VectorizationProfile) GetServiceWithProfile(string serviceName)
+ {
+ var textEmbeddingProfile = _vectorizationResourceProviderService.GetResource(
+ $"/{VectorizationResourceTypeNames.TextEmbeddingProfiles}/{serviceName}");
+
+ return textEmbeddingProfile.TextEmbedding switch
+ {
+ TextEmbeddingType.SemanticKernelTextEmbedding => (CreateSemanticKernelTextEmbeddingService(), textEmbeddingProfile),
+ _ => throw new VectorizationException($"The text embedding type {textEmbeddingProfile.TextEmbedding} is not supported."),
+ };
+ }
+
+ private ITextEmbeddingService CreateSemanticKernelTextEmbeddingService()
+ {
+ var textEmbeddingService = _serviceProvider.GetKeyedService(
+ DependencyInjectionKeys.FoundationaLLM_Vectorization_SemanticKernelTextEmbeddingService)
+ ?? throw new VectorizationException($"Could not retrieve the Semantic Kernel text embedding service instance.");
+
+ return textEmbeddingService!;
+ }
+ }
+}
diff --git a/src/dotnet/Vectorization/Services/Text/TextSplitterServiceFactory.cs b/src/dotnet/Vectorization/Services/Text/TextSplitterServiceFactory.cs
index 979bc558dd..c0ce2c6983 100644
--- a/src/dotnet/Vectorization/Services/Text/TextSplitterServiceFactory.cs
+++ b/src/dotnet/Vectorization/Services/Text/TextSplitterServiceFactory.cs
@@ -1,7 +1,8 @@
-using Azure.Core;
-using FoundationaLLM.Common.Constants;
+using FoundationaLLM.Common.Constants;
using FoundationaLLM.Common.Interfaces;
using FoundationaLLM.Common.Models.Configuration.Text;
+using FoundationaLLM.Common.Models.Vectorization;
+using FoundationaLLM.Common.Services.TextSplitters;
using FoundationaLLM.Vectorization.Exceptions;
using FoundationaLLM.Vectorization.Models.Resources;
using FoundationaLLM.Vectorization.ResourceProviders;
@@ -9,11 +10,6 @@
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
namespace FoundationaLLM.Vectorization.Services.Text
{
@@ -28,7 +24,7 @@ public class TextSplitterServiceFactory(
[FromKeyedServices(DependencyInjectionKeys.FoundationaLLM_Vectorization_ResourceProviderService)] IResourceProviderService vectorizationResourceProviderService,
IConfiguration configuration,
IServiceProvider serviceProvider,
- ILoggerFactory loggerFactory) : IServiceFactory
+ ILoggerFactory loggerFactory) : IVectorizationServiceFactory
{
private readonly IResourceProviderService _vectorizationResourceProviderService = vectorizationResourceProviderService;
private readonly IConfiguration _configuration = configuration;
@@ -36,15 +32,29 @@ public class TextSplitterServiceFactory(
private readonly ILoggerFactory _loggerFactory = loggerFactory;
///
- public ITextSplitterService CreateService(string serviceName)
+ public ITextSplitterService GetService(string serviceName)
{
- var textPartitionProfile = _vectorizationResourceProviderService.GetResource(
- $"/{VectorizationResourceTypeNames.TextPartitionProfiles}/{serviceName}");
+ var textPartitionProfile = _vectorizationResourceProviderService.GetResource(
+ $"/{VectorizationResourceTypeNames.TextPartitioningProfiles}/{serviceName}");
return textPartitionProfile.TextSplitter switch
{
TextSplitterType.TokenTextSplitter => CreateTokenTextSplitterService(
- TokenTextSplitterServiceSettings.FromDictionary(textPartitionProfile.TextSplitterSettings!)),
+ TokenTextSplitterServiceSettings.FromDictionary(textPartitionProfile.Settings!)),
+ _ => throw new VectorizationException($"The text splitter type {textPartitionProfile.TextSplitter} is not supported."),
+ };
+ }
+
+ ///
+ public (ITextSplitterService Service, VectorizationProfileBase VectorizationProfile) GetServiceWithProfile(string serviceName)
+ {
+ var textPartitionProfile = _vectorizationResourceProviderService.GetResource(
+ $"/{VectorizationResourceTypeNames.TextPartitioningProfiles}/{serviceName}");
+
+ return textPartitionProfile.TextSplitter switch
+ {
+ TextSplitterType.TokenTextSplitter => (CreateTokenTextSplitterService(
+ TokenTextSplitterServiceSettings.FromDictionary(textPartitionProfile.Settings!)), textPartitionProfile),
_ => throw new VectorizationException($"The text splitter type {textPartitionProfile.TextSplitter} is not supported."),
};
}
diff --git a/src/dotnet/Vectorization/Services/VectorizationStates/VectorizationStateServiceBase.cs b/src/dotnet/Vectorization/Services/VectorizationStates/VectorizationStateServiceBase.cs
index b10df6398e..9da26d43ed 100644
--- a/src/dotnet/Vectorization/Services/VectorizationStates/VectorizationStateServiceBase.cs
+++ b/src/dotnet/Vectorization/Services/VectorizationStates/VectorizationStateServiceBase.cs
@@ -1,10 +1,6 @@
-using FoundationaLLM.Vectorization.Models;
-using System;
-using System.Collections.Generic;
-using System.Linq;
+using FoundationaLLM.Common.Models.TextEmbedding;
using System.Security.Cryptography;
using System.Text;
-using System.Threading.Tasks;
namespace FoundationaLLM.Vectorization.Services.VectorizationStates
{
@@ -16,17 +12,17 @@ public abstract class VectorizationStateServiceBase
///
/// Gets the location of the vectorization state based on the content identifier.
///
- /// The holding the content identification information.
+ /// The holding the content identification information.
///
- protected string GetPersistenceIdentifier(VectorizationContentIdentifier contentIdentifier) =>
+ protected string GetPersistenceIdentifier(ContentIdentifier contentIdentifier) =>
$"{contentIdentifier.CanonicalId}_state_{HashContentIdentifier(contentIdentifier)}";
///
/// Computes the MD5 hash of the content identifier.
///
- /// The holding the content identification information.
+ /// The holding the content identification information.
///
- protected static string HashContentIdentifier(VectorizationContentIdentifier contentIdentifier)
+ protected static string HashContentIdentifier(ContentIdentifier contentIdentifier)
{
var byteHash = MD5.HashData(
Encoding.UTF8.GetBytes(
diff --git a/src/dotnet/Vectorization/Vectorization.csproj b/src/dotnet/Vectorization/Vectorization.csproj
index b142c34fa9..dfc7d1ba8c 100644
--- a/src/dotnet/Vectorization/Vectorization.csproj
+++ b/src/dotnet/Vectorization/Vectorization.csproj
@@ -20,6 +20,7 @@
+
diff --git a/src/dotnet/VectorizationWorker/Program.cs b/src/dotnet/VectorizationWorker/Program.cs
index 392f6d4cd0..e7540c0c47 100644
--- a/src/dotnet/VectorizationWorker/Program.cs
+++ b/src/dotnet/VectorizationWorker/Program.cs
@@ -7,6 +7,8 @@
using FoundationaLLM.Common.Services;
using FoundationaLLM.Common.Services.Tokenizers;
using FoundationaLLM.Common.Settings;
+using FoundationaLLM.SemanticKernel.Core.Models.Configuration;
+using FoundationaLLM.SemanticKernel.Core.Services;
using FoundationaLLM.Vectorization.Interfaces;
using FoundationaLLM.Vectorization.Models.Configuration;
using FoundationaLLM.Vectorization.ResourceProviders;
@@ -64,6 +66,12 @@
DependencyInjectionKeys.FoundationaLLM_Vectorization_ResourceProviderService)
.Bind(builder.Configuration.GetSection(AppConfigurationKeySections.FoundationaLLM_Vectorization_ResourceProviderService_Storage));
+builder.Services.AddOptions()
+ .Bind(builder.Configuration.GetSection(AppConfigurationKeySections.FoundationaLLM_Vectorization_SemanticKernelTextEmbeddingService));
+
+builder.Services.AddOptions()
+ .Bind(builder.Configuration.GetSection(AppConfigurationKeySections.FoundationaLLM_Vectorization_AzureAISearchIndexingService));
+
builder.Services.AddSingleton(
typeof(IEnumerable),
new IConfigurationSection[] {
@@ -96,17 +104,33 @@
logger);
});
+// Vectorization state
builder.Services.AddSingleton();
+
+// Vectorization resource provider
builder.Services.AddKeyedSingleton(
DependencyInjectionKeys.FoundationaLLM_Vectorization_ResourceProviderService);
builder.Services.ActivateKeyedSingleton(
DependencyInjectionKeys.FoundationaLLM_Vectorization_ResourceProviderService);
-builder.Services.AddSingleton, ContentSourceServiceFactory>();
-builder.Services.AddSingleton, TextSplitterServiceFactory>();
+// Service factories
+builder.Services.AddSingleton, ContentSourceServiceFactory>();
+builder.Services.AddSingleton, TextSplitterServiceFactory>();
+builder.Services.AddSingleton, TextEmbeddingServiceFactory>();
+builder.Services.AddSingleton, IndexingServiceFactory>();
+
+// Tokenizer
builder.Services.AddKeyedSingleton(TokenizerServiceNames.MICROSOFT_BPE_TOKENIZER);
builder.Services.ActivateKeyedSingleton(TokenizerServiceNames.MICROSOFT_BPE_TOKENIZER);
+// Text embedding
+builder.Services.AddKeyedSingleton(
+ DependencyInjectionKeys.FoundationaLLM_Vectorization_SemanticKernelTextEmbeddingService);
+
+// Indexing
+builder.Services.AddKeyedSingleton(
+ DependencyInjectionKeys.FoundationaLLM_Vectorization_AzureAISearchIndexingService);
+
builder.Services.AddTransient();
builder.Services.AddHostedService();
diff --git a/src/python/LangChainAPI/LangChainAPI.pyproj b/src/python/LangChainAPI/LangChainAPI.pyproj
index 697867af2a..2c67eac325 100644
--- a/src/python/LangChainAPI/LangChainAPI.pyproj
+++ b/src/python/LangChainAPI/LangChainAPI.pyproj
@@ -24,6 +24,7 @@
.
+ True
true
diff --git a/tests/dotnet/SemanticKernel.Tests/SemanticKernel.Tests.csproj b/tests/dotnet/SemanticKernel.Tests/SemanticKernel.Tests.csproj
index 9dc5a4ca68..617d193baf 100644
--- a/tests/dotnet/SemanticKernel.Tests/SemanticKernel.Tests.csproj
+++ b/tests/dotnet/SemanticKernel.Tests/SemanticKernel.Tests.csproj
@@ -26,7 +26,7 @@
-
+