diff --git a/src/dotnet/Vectorization/Models/Configuration/AzureSQLDatabaseContentSourceServiceSettings.cs b/src/dotnet/Vectorization/Models/Configuration/AzureSQLDatabaseContentSourceServiceSettings.cs
new file mode 100644
index 0000000000..dfe4447331
--- /dev/null
+++ b/src/dotnet/Vectorization/Models/Configuration/AzureSQLDatabaseContentSourceServiceSettings.cs
@@ -0,0 +1,13 @@
+namespace FoundationaLLM.Vectorization.Models.Configuration
+{
+ ///
+ /// Provides configuration settings to initialize a Sql Database content source service.
+ ///
+ public class AzureSQLDatabaseContentSourceServiceSettings
+ {
+ ///
+ /// The connection string used for authentication.
+ ///
+ public string? ConnectionString { get; set; }
+ }
+}
diff --git a/src/dotnet/Vectorization/Models/Resources/ContentSourceType.cs b/src/dotnet/Vectorization/Models/Resources/ContentSourceType.cs
index 89266d3eb5..88fb4cc654 100644
--- a/src/dotnet/Vectorization/Models/Resources/ContentSourceType.cs
+++ b/src/dotnet/Vectorization/Models/Resources/ContentSourceType.cs
@@ -19,6 +19,11 @@ public enum ContentSourceType
///
/// SharePoint Online document library.
///
- SharePointOnline
+ SharePointOnline,
+
+ ///
+ /// Azure SQL Database.
+ ///
+ AzureSQLDatabase
}
}
diff --git a/src/dotnet/Vectorization/Services/ContentSources/AzureSQLDatabaseContentSourceService.cs b/src/dotnet/Vectorization/Services/ContentSources/AzureSQLDatabaseContentSourceService.cs
new file mode 100644
index 0000000000..4dcf1e24a0
--- /dev/null
+++ b/src/dotnet/Vectorization/Services/ContentSources/AzureSQLDatabaseContentSourceService.cs
@@ -0,0 +1,87 @@
+using FoundationaLLM.Common.Constants;
+using FoundationaLLM.Vectorization.DataFormats.PDF;
+using FoundationaLLM.Vectorization.Exceptions;
+using FoundationaLLM.Vectorization.Interfaces;
+using FoundationaLLM.Vectorization.Models.Configuration;
+using Microsoft.Data.SqlClient;
+using Microsoft.Extensions.Logging;
+
+namespace FoundationaLLM.Vectorization.Services.ContentSources
+{
+ ///
+ /// Implements a vectorization content source for content residing in Azure SQL.
+ ///
+ public class AzureSQLDatabaseContentSourceService : ContentSourceServiceBase, IContentSourceService
+ {
+ private readonly ILogger _logger;
+ private readonly AzureSQLDatabaseContentSourceServiceSettings _settings;
+
+ ///
+ /// Creates a new instance of the vectorization content source.
+ ///
+ public AzureSQLDatabaseContentSourceService(
+ AzureSQLDatabaseContentSourceServiceSettings settings,
+ ILoggerFactory loggerFactory)
+ {
+ _settings = settings;
+ _logger = loggerFactory.CreateLogger();
+ }
+
+ ///
+ public async Task ExtractTextFromFileAsync(List multipartId, CancellationToken cancellationToken)
+ {
+ ValidateMultipartId(multipartId, 5);
+
+ var binaryContent = await GetBinaryContent(
+ multipartId[0],
+ multipartId[1],
+ multipartId[2],
+ multipartId[3],
+ multipartId[4],
+ cancellationToken);
+
+ return await ExtractTextFromFileAsync(multipartId[4], binaryContent);
+ }
+
+ ///
+ /// Retrieves the binary content.
+ ///
+ /// The database schema containing the target table.
+ /// The name of the table from which to retrieve binary content.
+ /// The name of the column containing binary data.
+ /// The name of the column used for identifying the specific record.
+ /// The value identifying the specific record in the identifier column.
+ /// The cancellation token that signals that operations should be cancelled
+ /// An object representing the binary contents.
+ private async Task GetBinaryContent(string schema, string tableName, string contentColumnName, string identifierColumnName, string identifierValue, CancellationToken cancellationToken)
+ {
+ try
+ {
+ using (var connection = new SqlConnection(_settings.ConnectionString))
+ {
+ await connection.OpenAsync(cancellationToken);
+
+ // WARNING! This is for experimentation purposes only as it is not injection-safe!
+ // TODO: More work to sanitize and add safety layers against injection.
+
+ using (var command = new SqlCommand($"SELECT TOP 1 {contentColumnName} FROM [{schema}].[{tableName}] WHERE {identifierColumnName} = @identifierValue", connection))
+ {
+ command.Parameters.Add(new SqlParameter("@identifierValue", identifierValue));
+
+ using (var reader = await command.ExecuteReaderAsync(cancellationToken))
+ {
+ if (!reader.HasRows)
+ throw new VectorizationException($"The file {identifierValue} was not found in the database.");
+ await reader.ReadAsync();
+ return new BinaryData(reader[contentColumnName]);
+ }
+ }
+ }
+ }
+ catch (Exception ex)
+ {
+ throw new VectorizationException($"Error when extracting content from file identified by {identifierValue} in Azure SQL Database.", ex);
+ }
+ }
+ }
+}
diff --git a/src/dotnet/Vectorization/Services/ContentSources/ContentSourceServiceFactory.cs b/src/dotnet/Vectorization/Services/ContentSources/ContentSourceServiceFactory.cs
index 924f78bc5d..8d39028bee 100644
--- a/src/dotnet/Vectorization/Services/ContentSources/ContentSourceServiceFactory.cs
+++ b/src/dotnet/Vectorization/Services/ContentSources/ContentSourceServiceFactory.cs
@@ -41,6 +41,7 @@ public IContentSourceService GetService(string serviceName)
{
ContentSourceType.AzureDataLake => CreateAzureDataLakeContentSourceService(serviceName),
ContentSourceType.SharePointOnline => CreateSharePointOnlineContentSourceService(serviceName),
+ ContentSourceType.AzureSQLDatabase => CreateAzureSQLDatabaseContentSourceService(serviceName),
_ => throw new VectorizationException($"The content source type {contentSourceProfile.Type} is not supported."),
};
}
@@ -55,6 +56,7 @@ public IContentSourceService GetService(string serviceName)
{
ContentSourceType.AzureDataLake => (CreateAzureDataLakeContentSourceService(serviceName), contentSourceProfile),
ContentSourceType.SharePointOnline => (CreateSharePointOnlineContentSourceService(serviceName), contentSourceProfile),
+ ContentSourceType.AzureSQLDatabase => (CreateAzureSQLDatabaseContentSourceService(serviceName), contentSourceProfile),
_ => throw new VectorizationException($"The content source type {contentSourceProfile.Type} is not supported."),
};
}
@@ -83,5 +85,17 @@ private SharePointOnlineContentSourceService CreateSharePointOnlineContentSource
sharePointOnlineContentSourceServiceSettings,
_loggerFactory);
}
+
+ private AzureSQLDatabaseContentSourceService CreateAzureSQLDatabaseContentSourceService(string serviceName)
+ {
+ var azureSQLDatabaseContentSourceServiceSettings = new AzureSQLDatabaseContentSourceServiceSettings();
+ _configuration.Bind(
+ $"{AppConfigurationKeySections.FoundationaLLM_Vectorization_ContentSources}:{serviceName}",
+ azureSQLDatabaseContentSourceServiceSettings);
+
+ return new AzureSQLDatabaseContentSourceService(
+ azureSQLDatabaseContentSourceServiceSettings,
+ _loggerFactory);
+ }
}
}
diff --git a/src/dotnet/Vectorization/Vectorization.csproj b/src/dotnet/Vectorization/Vectorization.csproj
index 5c0decc269..0a663fc22c 100644
--- a/src/dotnet/Vectorization/Vectorization.csproj
+++ b/src/dotnet/Vectorization/Vectorization.csproj
@@ -15,6 +15,7 @@
+
diff --git a/src/dotnet/VectorizationAPI/VectorizationAPI.csproj b/src/dotnet/VectorizationAPI/VectorizationAPI.csproj
index fb544ce400..0d9a773b7c 100644
--- a/src/dotnet/VectorizationAPI/VectorizationAPI.csproj
+++ b/src/dotnet/VectorizationAPI/VectorizationAPI.csproj
@@ -4,7 +4,7 @@
net8.0
enable
enable
- true
+ false
35b5c460-a49c-4185-a169-676d90673146
FoundationaLLM.Vectorization.API
FoundationaLLM.Vectorization.API