diff --git a/src/dotnet/Common/Constants/DependencyInjectionKeys.cs b/src/dotnet/Common/Constants/DependencyInjectionKeys.cs index d29ff08bcf..f79f769430 100644 --- a/src/dotnet/Common/Constants/DependencyInjectionKeys.cs +++ b/src/dotnet/Common/Constants/DependencyInjectionKeys.cs @@ -40,5 +40,15 @@ public static class DependencyInjectionKeys /// The dependency injection key for the Azure AI Search indexing service. /// public const string FoundationaLLM_Vectorization_AzureAISearchIndexingService = "FoundationaLLM:Vectorization:AzureAISearchIndexingService"; + + /// + /// The dependency injection key for the vectorization queues configuration section. + /// + public const string FoundationaLLM_Vectorization_Queues = "FoundationaLLM:Vectorization:Queues"; + + /// + /// The dependency injection key for the vectorization steps configuration section. + /// + public const string FoundationaLLM_Vectorization_Steps = "FoundationaLLM:Vectorization:Steps"; } } diff --git a/src/dotnet/Vectorization/Interfaces/IVectorizationService.cs b/src/dotnet/Vectorization/Interfaces/IVectorizationService.cs index e0bb540c70..a86594df16 100644 --- a/src/dotnet/Vectorization/Interfaces/IVectorizationService.cs +++ b/src/dotnet/Vectorization/Interfaces/IVectorizationService.cs @@ -17,6 +17,6 @@ public interface IVectorizationService /// /// The object containing the details of the vectorization request. /// - Task ProcessRequest(VectorizationRequest vectorizationRequest); + Task ProcessRequest(VectorizationRequest vectorizationRequest); } } diff --git a/src/dotnet/Vectorization/Models/VectorizationProcessingResult.cs b/src/dotnet/Vectorization/Models/VectorizationProcessingResult.cs new file mode 100644 index 0000000000..49975b5f70 --- /dev/null +++ b/src/dotnet/Vectorization/Models/VectorizationProcessingResult.cs @@ -0,0 +1,21 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace FoundationaLLM.Vectorization.Models +{ + /// + /// Represents the result of processing a vectorization request. + /// + /// Indicates whether the processing was completed successfully. + /// The identifier of the vectorization operation. Can be used to request the status of the operation. + /// When IsSuccess is false, contains an error message with details. + public record VectorizationProcessingResult( + bool IsSuccess, + Guid? OperationId, + string? ErrorMessage) + { + } +} diff --git a/src/dotnet/Vectorization/Models/VectorizationProcessingType.cs b/src/dotnet/Vectorization/Models/VectorizationProcessingType.cs new file mode 100644 index 0000000000..aa64ae25b6 --- /dev/null +++ b/src/dotnet/Vectorization/Models/VectorizationProcessingType.cs @@ -0,0 +1,24 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace FoundationaLLM.Vectorization.Models +{ + /// + /// The type of vectorization request processing. + /// + public enum VectorizationProcessingType + { + /// + /// Asynchronous processing using vectorization workers. + /// + Asynchronous, + + /// + /// Synchronous processing using the vectorization API. + /// + Synchronous + } +} diff --git a/src/dotnet/Vectorization/Models/VectorizationRequest.cs b/src/dotnet/Vectorization/Models/VectorizationRequest.cs index a7fa477784..434a76e6c6 100644 --- a/src/dotnet/Vectorization/Models/VectorizationRequest.cs +++ b/src/dotnet/Vectorization/Models/VectorizationRequest.cs @@ -24,6 +24,14 @@ public class VectorizationRequest [JsonPropertyName("content_identifier")] public required ContentIdentifier ContentIdentifier { get; set; } + /// + /// The indicating how should the request be processed. + /// + [JsonPropertyOrder(2)] + [JsonPropertyName("processing_type")] + [JsonConverter(typeof(JsonStringEnumConverter))] + public required VectorizationProcessingType ProcessingType { get; set; } + /// /// The list of vectorization steps requested by the vectorization request. /// Vectorization steps are identified by unique names like "extract", "partition", "embed", "index", etc. diff --git a/src/dotnet/Vectorization/Services/RequestSources/RequestSourcesCache.cs b/src/dotnet/Vectorization/Services/RequestSources/RequestSourcesCache.cs index 6a838fe416..a02c78a249 100644 --- a/src/dotnet/Vectorization/Services/RequestSources/RequestSourcesCache.cs +++ b/src/dotnet/Vectorization/Services/RequestSources/RequestSourcesCache.cs @@ -1,6 +1,8 @@ -using FoundationaLLM.Vectorization.Interfaces; +using FoundationaLLM.Common.Constants; +using FoundationaLLM.Vectorization.Interfaces; using FoundationaLLM.Vectorization.Models.Configuration; using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; @@ -17,7 +19,7 @@ namespace FoundationaLLM.Vectorization.Services.RequestSources /// The used to create new loggers for child objects. public class RequestSourcesCache( IOptions vectorizationWorkerOptions, - IConfigurationSection queuesConfiguration, + [FromKeyedServices(DependencyInjectionKeys.FoundationaLLM_Vectorization_Queues)] IConfigurationSection queuesConfiguration, ILoggerFactory loggerFactory) : IRequestSourcesCache { private readonly Dictionary _requestSources = (new RequestSourcesBuilder()) diff --git a/src/dotnet/Vectorization/Services/VectorizationService.cs b/src/dotnet/Vectorization/Services/VectorizationService.cs index dd09eb13a1..681fe812c2 100644 --- a/src/dotnet/Vectorization/Services/VectorizationService.cs +++ b/src/dotnet/Vectorization/Services/VectorizationService.cs @@ -1,7 +1,14 @@ -using FoundationaLLM.Vectorization.Exceptions; +using FoundationaLLM.Common.Constants; +using FoundationaLLM.Common.Models.Chat; +using FoundationaLLM.Vectorization.Exceptions; +using FoundationaLLM.Vectorization.Handlers; using FoundationaLLM.Vectorization.Interfaces; using FoundationaLLM.Vectorization.Models; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; +using System.Runtime; +using System.Threading; namespace FoundationaLLM.Vectorization.Services { @@ -12,21 +19,47 @@ namespace FoundationaLLM.Vectorization.Services /// Creates a new instance of the service. /// /// The cache of request sources. - /// The logger instance used for logging. + /// The service providing vectorization state management. + /// The object providing access to the settings. + /// The implemented by the dependency injection container. + /// The logger factory used to create loggers. public class VectorizationService( IRequestSourcesCache requestSourcesCache, - ILogger logger) : IVectorizationService + IVectorizationStateService vectorizationStateService, + [FromKeyedServices(DependencyInjectionKeys.FoundationaLLM_Vectorization_Steps)] IConfigurationSection stepsConfiguration, + IServiceProvider serviceProvider, + ILoggerFactory loggerFactory) : IVectorizationService { private readonly Dictionary _requestSources = requestSourcesCache.RequestSources; - private readonly ILogger _logger = logger; + private readonly IVectorizationStateService _vectorizationStateService = vectorizationStateService; + private readonly IConfigurationSection? _stepsConfiguration = stepsConfiguration; + private readonly IServiceProvider _serviceProvider = serviceProvider; + private readonly ILoggerFactory _loggerFactory = loggerFactory; + private readonly ILogger _logger = loggerFactory.CreateLogger(); /// - public async Task ProcessRequest(VectorizationRequest vectorizationRequest) + public async Task ProcessRequest(VectorizationRequest vectorizationRequest) { - ValidateRequest(vectorizationRequest); + try + { + ValidateRequest(vectorizationRequest); - var firstRequestSource = _requestSources[vectorizationRequest.Steps.First().Id]; - await firstRequestSource.SubmitRequest(vectorizationRequest); + switch (vectorizationRequest.ProcessingType) + { + case VectorizationProcessingType.Asynchronous: + var firstRequestSource = _requestSources[vectorizationRequest.Steps.First().Id]; + await firstRequestSource.SubmitRequest(vectorizationRequest); + return new VectorizationProcessingResult(true, null, null); + case VectorizationProcessingType.Synchronous: + return await ProcessRequestInternal(vectorizationRequest); + default: + throw new VectorizationException($"The vectorization processing type {vectorizationRequest.ProcessingType} is not supported."); + } + } + catch (Exception ex) + { + return new VectorizationProcessingResult(false, null, ex.Message); + } } private void ValidateRequest(VectorizationRequest vectorizationRequest) @@ -60,5 +93,51 @@ private void HandleValidationError(string validationError) _logger.LogError(validationError); throw new VectorizationException(validationError); } + + private async Task ProcessRequestInternal(VectorizationRequest request) + { + _logger.LogInformation("Starting synchronous processing for request {RequestId}.", request.Id); + + var state = VectorizationState.FromRequest(request); + + foreach (var step in request.Steps) + { + _logger.LogInformation("Starting step [{Step}] for request {RequestId}.", step.Id, request.Id); + + var stepHandler = VectorizationStepHandlerFactory.Create( + step.Id, + "N/A", + step.Parameters, + _stepsConfiguration, + _vectorizationStateService, + _serviceProvider, + _loggerFactory); + var handlerSuccess = await stepHandler.Invoke(request, state, default).ConfigureAwait(false); + if (!handlerSuccess) + break; + + var steps = request.MoveToNextStep(); + + if (!string.IsNullOrEmpty(steps.CurrentStep)) + _logger.LogInformation("The pipeline for request id {RequestId} was advanced from step [{PreviousStepName}] to step [{CurrentStepName}].", + request.Id, steps.PreviousStep, steps.CurrentStep); + else + _logger.LogInformation("The pipeline for request id {RequestId} was advanced from step [{PreviousStepName}] to finalized state.", + request.Id, steps.PreviousStep); + } + + if (request.Complete) + { + _logger.LogInformation("Finished synchronous processing for request {RequestId}. All steps were processed successfully.", request.Id); + return new VectorizationProcessingResult(true, null, null); + } + else + { + var errorMessage = + $"Execution stopped at step [{request.CurrentStep}] due to an error."; + _logger.LogInformation("Finished synchronous processing for request {RequestId}. {ErrorMessage}", request.Id, errorMessage); + return new VectorizationProcessingResult(false, null, errorMessage); + } + } } } diff --git a/src/dotnet/Vectorization/Services/VectorizationStates/MemoryVectorizationStateService.cs b/src/dotnet/Vectorization/Services/VectorizationStates/MemoryVectorizationStateService.cs index be20d32fa3..57b372fd53 100644 --- a/src/dotnet/Vectorization/Services/VectorizationStates/MemoryVectorizationStateService.cs +++ b/src/dotnet/Vectorization/Services/VectorizationStates/MemoryVectorizationStateService.cs @@ -33,11 +33,8 @@ public async Task ReadState(VectorizationRequest request) } /// - public async Task LoadArtifacts(VectorizationState state, VectorizationArtifactType artifactType) - { + public async Task LoadArtifacts(VectorizationState state, VectorizationArtifactType artifactType) => await Task.CompletedTask; - throw new NotImplementedException(); - } /// public async Task SaveState(VectorizationState state) diff --git a/src/dotnet/VectorizationAPI/Controllers/VectorizationRequestController.cs b/src/dotnet/VectorizationAPI/Controllers/VectorizationRequestController.cs index 82d618f162..d0e7fd1aab 100644 --- a/src/dotnet/VectorizationAPI/Controllers/VectorizationRequestController.cs +++ b/src/dotnet/VectorizationAPI/Controllers/VectorizationRequestController.cs @@ -28,7 +28,7 @@ public class VectorizationRequestController( /// /// [HttpPost] - public async Task ProcessRequest([FromBody] VectorizationRequest vectorizationRequest) => - await _vectorizationService.ProcessRequest(vectorizationRequest); + public async Task ProcessRequest([FromBody] VectorizationRequest vectorizationRequest) => + new OkObjectResult(await _vectorizationService.ProcessRequest(vectorizationRequest)); } } diff --git a/src/dotnet/VectorizationAPI/Program.cs b/src/dotnet/VectorizationAPI/Program.cs index f41c8a1330..1463c377c6 100644 --- a/src/dotnet/VectorizationAPI/Program.cs +++ b/src/dotnet/VectorizationAPI/Program.cs @@ -4,11 +4,21 @@ using FoundationaLLM.Common.Constants; using FoundationaLLM.Common.Interfaces; using FoundationaLLM.Common.OpenAPI; +using FoundationaLLM.Common.Services.Tokenizers; +using FoundationaLLM.Common.Services; +using FoundationaLLM.Common.Settings; +using FoundationaLLM.SemanticKernel.Core.Models.Configuration; +using FoundationaLLM.SemanticKernel.Core.Services; using FoundationaLLM.Vectorization.Interfaces; using FoundationaLLM.Vectorization.Models.Configuration; +using FoundationaLLM.Vectorization.ResourceProviders; using FoundationaLLM.Vectorization.Services; +using FoundationaLLM.Vectorization.Services.ContentSources; using FoundationaLLM.Vectorization.Services.RequestSources; +using FoundationaLLM.Vectorization.Services.Text; +using FoundationaLLM.Vectorization.Services.VectorizationStates; using Microsoft.ApplicationInsights.AspNetCore.Extensions; +using Microsoft.Extensions.Options; using Microsoft.Extensions.Primitives; var builder = WebApplication.CreateBuilder(args); @@ -52,20 +62,75 @@ builder.Services.AddOptions() .Bind(builder.Configuration.GetSection(AppConfigurationKeys.FoundationaLLM_Vectorization_VectorizationWorker)); -builder.Services.AddSingleton( +builder.Services.AddOptions( + DependencyInjectionKeys.FoundationaLLM_Vectorization_ResourceProviderService) + .Bind(builder.Configuration.GetSection(AppConfigurationKeySections.FoundationaLLM_Vectorization_ResourceProviderService_Storage)); + +builder.Services.AddOptions() + .Bind(builder.Configuration.GetSection(AppConfigurationKeySections.FoundationaLLM_Vectorization_SemanticKernelTextEmbeddingService)); + +builder.Services.AddOptions() + .Bind(builder.Configuration.GetSection(AppConfigurationKeySections.FoundationaLLM_Vectorization_AzureAISearchIndexingService)); + +builder.Services.AddKeyedSingleton( typeof(IConfigurationSection), + DependencyInjectionKeys.FoundationaLLM_Vectorization_Queues, builder.Configuration.GetSection(AppConfigurationKeySections.FoundationaLLM_Vectorization_Queues)); +builder.Services.AddKeyedSingleton( + typeof(IConfigurationSection), + DependencyInjectionKeys.FoundationaLLM_Vectorization_Steps, + builder.Configuration.GetSection(AppConfigurationKeySections.FoundationaLLM_Vectorization_Steps)); + // Add services to the container. -builder.Services.AddTransient(); -builder.Services.AddScoped(); -builder.Services.AddSingleton(); +builder.Services.AddKeyedSingleton( + DependencyInjectionKeys.FoundationaLLM_Vectorization_ResourceProviderService, (sp, obj) => + { + var settings = sp.GetRequiredService>() + .Get(DependencyInjectionKeys.FoundationaLLM_Vectorization_ResourceProviderService); + var logger = sp.GetRequiredService>(); + + return new BlobStorageService( + Options.Create(settings), + logger); + }); + +// Vectorization state +builder.Services.AddSingleton(); + +// Vectorization resource provider +builder.Services.AddKeyedSingleton( + DependencyInjectionKeys.FoundationaLLM_Vectorization_ResourceProviderService); +builder.Services.ActivateKeyedSingleton( + DependencyInjectionKeys.FoundationaLLM_Vectorization_ResourceProviderService); + +// Service factories +builder.Services.AddSingleton, ContentSourceServiceFactory>(); +builder.Services.AddSingleton, TextSplitterServiceFactory>(); +builder.Services.AddSingleton, TextEmbeddingServiceFactory>(); +builder.Services.AddSingleton, IndexingServiceFactory>(); + +// Tokenizer +builder.Services.AddKeyedSingleton(TokenizerServiceNames.MICROSOFT_BPE_TOKENIZER); +builder.Services.ActivateKeyedSingleton(TokenizerServiceNames.MICROSOFT_BPE_TOKENIZER); -// Activate singleton services +// Text embedding +builder.Services.AddKeyedSingleton( + DependencyInjectionKeys.FoundationaLLM_Vectorization_SemanticKernelTextEmbeddingService); +// Indexing +builder.Services.AddKeyedSingleton( + DependencyInjectionKeys.FoundationaLLM_Vectorization_AzureAISearchIndexingService); + +// Request sources cache +builder.Services.AddSingleton(); builder.Services.ActivateSingleton(); +// Vectorization +builder.Services.AddScoped(); + +builder.Services.AddTransient(); builder.Services.AddControllers(); // Add API Key Authorization diff --git a/src/dotnet/VectorizationWorker/Program.cs b/src/dotnet/VectorizationWorker/Program.cs index e7540c0c47..ad61f014c6 100644 --- a/src/dotnet/VectorizationWorker/Program.cs +++ b/src/dotnet/VectorizationWorker/Program.cs @@ -72,12 +72,15 @@ builder.Services.AddOptions() .Bind(builder.Configuration.GetSection(AppConfigurationKeySections.FoundationaLLM_Vectorization_AzureAISearchIndexingService)); -builder.Services.AddSingleton( - typeof(IEnumerable), - new IConfigurationSection[] { - builder.Configuration.GetSection(AppConfigurationKeySections.FoundationaLLM_Vectorization_Queues), - builder.Configuration.GetSection(AppConfigurationKeySections.FoundationaLLM_Vectorization_Steps) - }); +builder.Services.AddKeyedSingleton( + typeof(IConfigurationSection), + DependencyInjectionKeys.FoundationaLLM_Vectorization_Queues, + builder.Configuration.GetSection(AppConfigurationKeySections.FoundationaLLM_Vectorization_Queues)); + +builder.Services.AddKeyedSingleton( + typeof(IConfigurationSection), + DependencyInjectionKeys.FoundationaLLM_Vectorization_Steps, + builder.Configuration.GetSection(AppConfigurationKeySections.FoundationaLLM_Vectorization_Steps)); // Add services to the container. diff --git a/src/dotnet/VectorizationWorker/Worker.cs b/src/dotnet/VectorizationWorker/Worker.cs index dd322c6cf0..8e25f45205 100644 --- a/src/dotnet/VectorizationWorker/Worker.cs +++ b/src/dotnet/VectorizationWorker/Worker.cs @@ -14,19 +14,22 @@ namespace FoundationaLLM.Vectorization.Worker /// /// The used to manage the vectorization state. /// The options holding the vectorization worker settings. - /// The list of configuration sections required by the vectorization worker builder. + /// The containing settings for the queues. + /// The containing settings for the vectorization steps. /// The implemented by the dependency injection container. /// The used to create loggers in child objects. public class Worker( IVectorizationStateService stateService, IOptions settings, - IEnumerable configurationSections, + [FromKeyedServices(DependencyInjectionKeys.FoundationaLLM_Vectorization_Queues)] IConfigurationSection queuesConfigurationSection, + [FromKeyedServices(DependencyInjectionKeys.FoundationaLLM_Vectorization_Steps)] IConfigurationSection stepsConfigurationSection, IServiceProvider serviceProvider, ILoggerFactory loggerFactory) : BackgroundService { private readonly IVectorizationStateService _stateService = stateService; private readonly VectorizationWorkerSettings _settings = settings.Value; - private readonly IEnumerable _configurationSections = configurationSections; + private readonly IConfigurationSection _queuesConfigurationSection = queuesConfigurationSection; + private readonly IConfigurationSection _stepsConfigurationSection = stepsConfigurationSection; private readonly IServiceProvider _serviceProvider = serviceProvider; private readonly ILoggerFactory _loggerFactory = loggerFactory; @@ -36,8 +39,8 @@ protected override async Task ExecuteAsync(CancellationToken stoppingToken) var vectorizationWorker = new VectorizationWorkerBuilder() .WithStateService(_stateService) .WithSettings(_settings) - .WithQueuesConfiguration(_configurationSections.Single(cs => cs.Path == AppConfigurationKeySections.FoundationaLLM_Vectorization_Queues)) - .WithStepsConfiguration(_configurationSections.Single(cs => cs.Path == AppConfigurationKeySections.FoundationaLLM_Vectorization_Steps)) + .WithQueuesConfiguration(_queuesConfigurationSection) + .WithStepsConfiguration(_stepsConfigurationSection) .WithServiceProvider(_serviceProvider) .WithLoggerFactory(_loggerFactory) .WithCancellationToken(stoppingToken)