diff --git a/tests/dotnet/Core.Examples/Catalogs/DataSourceCatalog.cs b/tests/dotnet/Core.Examples/Catalogs/DataSourceCatalog.cs index 3bb9ccadb1..d2d2ac7c45 100644 --- a/tests/dotnet/Core.Examples/Catalogs/DataSourceCatalog.cs +++ b/tests/dotnet/Core.Examples/Catalogs/DataSourceCatalog.cs @@ -6,8 +6,8 @@ public static class DataSourceCatalog { public static readonly List Items = [ - new AzureDataLakeDataSource { Name = "datalake_vectorization_input", DisplayName = "datalake_vectorization_input", ConfigurationReferences = new Dictionary { { "AuthenticationType", "FoundationaLLM:DataSources:datalake_vectorization_input:AuthenticationType" }, { "AccountName", "FoundationaLLM:DataSources:datalake_vectorization_input:AccountName" } }, Folders = new List { "vectorization-input" } } - + new AzureDataLakeDataSource { Name = "datalake_vectorization_input", DisplayName = "datalake_vectorization_input", ConfigurationReferences = new Dictionary { { "AuthenticationType", "FoundationaLLM:DataSources:datalake_vectorization_input:AuthenticationType" }, { "AccountName", "FoundationaLLM:DataSources:datalake_vectorization_input:AccountName" } }, Folders = new List { "vectorization-input" } }, + new SharePointOnlineSiteDataSource { Name = "sharepoint_fllm", DisplayName="sharepoint_fllm", SiteUrl="https://fllm.sharepoint.com/sites/FoundationaLLM", DocumentLibraries=["/documents02"], ConfigurationReferences = new Dictionary{ {"ClientId", "FoundationaLLM:DataSources:sharepoint_fllm:ClientId" },{"TenantId", "FoundationaLLM:DataSources:sharepoint_fllm:TenantId" },{"CertificateName", "FoundationaLLM:DataSources:sharepoint_fllm:CertificateName" },{ "KeyVaultURL", "FoundationaLLM:DataSources:sharepoint_fllm:KeyVaultURL" } } } ]; public static List GetDataSources() diff --git a/tests/dotnet/Core.Examples/Catalogs/IndexingProfilesCatalog.cs b/tests/dotnet/Core.Examples/Catalogs/IndexingProfilesCatalog.cs index 1d02076425..991bc4a2b6 100644 --- a/tests/dotnet/Core.Examples/Catalogs/IndexingProfilesCatalog.cs +++ b/tests/dotnet/Core.Examples/Catalogs/IndexingProfilesCatalog.cs @@ -7,7 +7,7 @@ public static class IndexingProfilesCatalog public static readonly List Items = [ new IndexingProfile { Name = "indexing_profile_really_big", Indexer = IndexerType.AzureAISearchIndexer, Settings = new Dictionary{ { "IndexName", "reallybig" }, { "TopN", "3" }, { "Filters", "" }, { "EmbeddingFieldName", "Embedding" }, { "TextFieldName", "Text" } }, ConfigurationReferences = new Dictionary{ { "AuthenticationType", "FoundationaLLM:Vectorization:AzureAISearchIndexingService:AuthenticationType" }, { "Endpoint", "FoundationaLLM:Vectorization:AzureAISearchIndexingService:Endpoint" } } }, - new IndexingProfile { Name = "indexing_profile_pdf_datalake", Indexer = IndexerType.AzureAISearchIndexer, Settings = new Dictionary{ { "IndexName", "pdfdatalake" }, { "TopN", "3" }, { "Filters", "" }, { "EmbeddingFieldName", "Embedding" }, { "TextFieldName", "Text" } }, ConfigurationReferences = new Dictionary{ { "AuthenticationType", "FoundationaLLM:Vectorization:AzureAISearchIndexingService:AuthenticationType" }, { "Endpoint", "FoundationaLLM:Vectorization:AzureAISearchIndexingService:Endpoint" } } } + new IndexingProfile { Name = "indexing_profile_pdf", Indexer = IndexerType.AzureAISearchIndexer, Settings = new Dictionary{ { "IndexName", "pdf" }, { "TopN", "3" }, { "Filters", "" }, { "EmbeddingFieldName", "Embedding" }, { "TextFieldName", "Text" } }, ConfigurationReferences = new Dictionary{ { "AuthenticationType", "FoundationaLLM:Vectorization:AzureAISearchIndexingService:AuthenticationType" }, { "Endpoint", "FoundationaLLM:Vectorization:AzureAISearchIndexingService:Endpoint" } } } ]; public static List GetIndexingProfiles() diff --git a/tests/dotnet/Core.Examples/Example0004_SynchronousVectorizationOfPDFFromDataLake.cs b/tests/dotnet/Core.Examples/Example0004_SynchronousVectorizationOfPDFFromDataLake.cs index 835bf42358..9e46a411e9 100644 --- a/tests/dotnet/Core.Examples/Example0004_SynchronousVectorizationOfPDFFromDataLake.cs +++ b/tests/dotnet/Core.Examples/Example0004_SynchronousVectorizationOfPDFFromDataLake.cs @@ -20,18 +20,20 @@ namespace FoundationaLLM.Core.Examples /// FoundationaLLM:DataSources:datalake_vectorization_input:AccountName /// Expects the following document in the storage account: /// /vectorization-input/SDZWA-Journal-January-2024.pdf + /// References: + /// PDF public source: https://sandiegozoowildlifealliance.org/Journal/january-2024 /// public class Example0004_SynchronousVectorizationOfPDFFromDataLake: BaseTest, IClassFixture { private readonly IVectorizationTestService _vectorizationTestService; private InstanceSettings _instanceSettings; - private string containerName = "vectorization-input"; + private string containerName = "vectorization-input"; private string blobName = "SDZWA-Journal-January-2024.pdf"; private string dataSourceName = "datalake_vectorization_input"; private string dataSourceObjectId = String.Empty; private string textPartitioningProfileName = "text_partition_profile"; private string textEmbeddingProfileName = "text_embedding_profile_generic"; - private string indexingProfileName = "indexing_profile_pdf_datalake"; + private string indexingProfileName = "indexing_profile_pdf"; private string searchString = "Kurt and Ollie"; private string id = String.Empty; private BlobStorageServiceSettings? _settings; @@ -60,6 +62,8 @@ private async Task RunExampleAsync() WriteLine($"Create the data source: {dataSourceName} via the Management API"); await _vectorizationTestService.CreateDataSource(dataSourceName); + Thread.Sleep(5000); // processing too quickly, pause after the creation of the data source + WriteLine($"Create the vectorization text partitioning profile: {textPartitioningProfileName} via the Management API"); await _vectorizationTestService.CreateTextPartitioningProfile(textPartitioningProfileName); @@ -141,7 +145,7 @@ private async Task RunExampleAsync() WriteLine($"Delete the vectorization text embedding profile: {textEmbeddingProfileName} via the Management API"); await _vectorizationTestService.DeleteTextEmbeddingProfile(textEmbeddingProfileName); - WriteLine($"Delete the vectorization indexing profile: {indexingProfileName} via the Management API"); + WriteLine($"Delete the vectorization indexing profile: {indexingProfileName} via the Management API and delete the created index"); await _vectorizationTestService.DeleteIndexingProfile(indexingProfileName, true); } } diff --git a/tests/dotnet/Core.Examples/Example0005_AsynchronousVectorizationOfPDFFromDataLake.cs b/tests/dotnet/Core.Examples/Example0005_AsynchronousVectorizationOfPDFFromDataLake.cs index 4d9532c741..984bb74ff6 100644 --- a/tests/dotnet/Core.Examples/Example0005_AsynchronousVectorizationOfPDFFromDataLake.cs +++ b/tests/dotnet/Core.Examples/Example0005_AsynchronousVectorizationOfPDFFromDataLake.cs @@ -20,18 +20,20 @@ namespace FoundationaLLM.Core.Examples /// FoundationaLLM:DataSources:datalake_vectorization_input:AccountName /// Expects the following document in the storage account: /// /vectorization-input/SDZWA-Journal-January-2024.pdf + /// References: + /// PDF public source: https://sandiegozoowildlifealliance.org/Journal/january-2024 /// public class Example0005_AsynchronousVectorizationOfPDFFromDataLake: BaseTest, IClassFixture { private readonly IVectorizationTestService _vectorizationTestService; private InstanceSettings _instanceSettings; - private string containerName = "vectorization-input"; + private string containerName = "vectorization-input"; private string blobName = "SDZWA-Journal-January-2024.pdf"; private string dataSourceName = "datalake_vectorization_input"; private string dataSourceObjectId = String.Empty; private string textPartitioningProfileName = "text_partition_profile"; private string textEmbeddingProfileName = "text_embedding_profile_generic"; - private string indexingProfileName = "indexing_profile_pdf_datalake"; + private string indexingProfileName = "indexing_profile_pdf"; private string searchString = "Kurt and Ollie"; private string id = String.Empty; private BlobStorageServiceSettings? _settings; @@ -60,6 +62,8 @@ private async Task RunExampleAsync() WriteLine($"Create the data source: {dataSourceName} via the Management API"); await _vectorizationTestService.CreateDataSource(dataSourceName); + Thread.Sleep(5000); // processing too quickly, pause after the creation of the data source + WriteLine($"Create the vectorization text partitioning profile: {textPartitioningProfileName} via the Management API"); await _vectorizationTestService.CreateTextPartitioningProfile(textPartitioningProfileName); @@ -163,7 +167,7 @@ private async Task RunExampleAsync() WriteLine($"Delete the vectorization text embedding profile: {textEmbeddingProfileName} via the Management API"); await _vectorizationTestService.DeleteTextEmbeddingProfile(textEmbeddingProfileName); - WriteLine($"Delete the vectorization indexing profile: {indexingProfileName} via the Management API"); + WriteLine($"Delete the vectorization indexing profile: {indexingProfileName} via the Management API along with the index"); await _vectorizationTestService.DeleteIndexingProfile(indexingProfileName, true); } } diff --git a/tests/dotnet/Core.Examples/Example0008_SynchronousVectorizationOfPDFFromSharePoint.cs b/tests/dotnet/Core.Examples/Example0008_SynchronousVectorizationOfPDFFromSharePoint.cs new file mode 100644 index 0000000000..748c6f6396 --- /dev/null +++ b/tests/dotnet/Core.Examples/Example0008_SynchronousVectorizationOfPDFFromSharePoint.cs @@ -0,0 +1,147 @@ +using FoundationaLLM.Common.Constants.ResourceProviders; +using FoundationaLLM.Common.Models.Configuration.Instance; +using FoundationaLLM.Common.Models.ResourceProviders.Vectorization; +using FoundationaLLM.Common.Models.Vectorization; +using FoundationaLLM.Core.Examples.Interfaces; +using FoundationaLLM.Core.Examples.Models; +using FoundationaLLM.Core.Examples.Setup; +using Xunit.Abstractions; + +namespace FoundationaLLM.Core.Examples +{ + /// + /// Example class for running synchronous vectorization over a PDF file in a SharePoint Online Site document library. + /// Expects the following configuration values: + /// FoundationaLLM:DataSources:sharepoint_fllm:ClientId + /// FoundationaLLM:DataSources:sharepoint_fllm:TenantId + /// FoundationaLLM:DataSources:sharepoint_fllm:CertificateName + /// FoundationaLLM:DataSources:sharepoint_fllm:KeyVaultURL + /// References: + /// PDF public source: https://sandiegozoowildlifealliance.org/Journal/january-2024 + /// + public class Example0008_SynchronousVectorizationOfPDFFromSharePoint: BaseTest, IClassFixture + { + private readonly IVectorizationTestService _vectorizationTestService; + private InstanceSettings _instanceSettings; + private string dataSourceName = "sharepoint_fllm"; + private string dataSourceObjectId = String.Empty; + private string textPartitioningProfileName = "text_partition_profile"; + private string textEmbeddingProfileName = "text_embedding_profile_generic"; + private string indexingProfileName = "indexing_profile_pdf"; + private string searchString = "Kurt and Ollie"; + private string id = String.Empty; + private SharePointVectorizationConfiguration _sharePointVectorizationConfiguration; + + public Example0008_SynchronousVectorizationOfPDFFromSharePoint(ITestOutputHelper output, TestFixture fixture) + : base(output, fixture.ServiceProvider) + { + _vectorizationTestService = GetService(); + _instanceSettings = _vectorizationTestService.InstanceSettings; + dataSourceObjectId = $"/instances/{_instanceSettings.Id}/providers/FoundationaLLM.DataSource/dataSources/{dataSourceName}"; + id = Guid.NewGuid().ToString(); + _sharePointVectorizationConfiguration = TestConfiguration.SharePointVectorizationConfiguration; + } + + [Fact] + public async Task RunAsync() + { + WriteLine("============ Synchronous Vectorization of a PDF from SharePoint Online ============"); + await RunExampleAsync(); + } + + private async Task RunExampleAsync() + { + WriteLine($"Create the data source: {dataSourceName} via the Management API"); + await _vectorizationTestService.CreateDataSource(dataSourceName); + + Thread.Sleep(5000); // processing too quickly, pause after the creation of the data source + + WriteLine($"Create the vectorization text partitioning profile: {textPartitioningProfileName} via the Management API"); + await _vectorizationTestService.CreateTextPartitioningProfile(textPartitioningProfileName); + + WriteLine($"Create the vectorization text embedding profile: {textEmbeddingProfileName} via the Management API"); + await _vectorizationTestService.CreateTextEmbeddingProfile(textEmbeddingProfileName); + + WriteLine($"Create the vectorization indexing profile: {indexingProfileName} via the Management API"); + await _vectorizationTestService.CreateIndexingProfile(indexingProfileName); + + // used for defining the canonical id, optionally remove the file extension + var fileNameWithoutExtension = _sharePointVectorizationConfiguration.FileName.Substring(0, _sharePointVectorizationConfiguration.FileName.LastIndexOf('.')); + + ContentIdentifier ci = new ContentIdentifier + { + DataSourceObjectId = dataSourceObjectId, + MultipartId = new List + { + _sharePointVectorizationConfiguration.HostName, + _sharePointVectorizationConfiguration.SitePath, + _sharePointVectorizationConfiguration.FolderPath, + _sharePointVectorizationConfiguration.FileName + }, + CanonicalId = $"{_sharePointVectorizationConfiguration.FolderPath}/{fileNameWithoutExtension}" + }; + + WriteLine($"Create the vectorization request: {id} via the Management API"); + List steps = + [ + new VectorizationStep { Id = "extract", Parameters = new Dictionary() }, + new VectorizationStep { Id = "partition", Parameters = new Dictionary() { { "text_partitioning_profile_name", textPartitioningProfileName } } }, + new VectorizationStep { Id = "embed", Parameters = new Dictionary() { { "text_embedding_profile_name", textEmbeddingProfileName } } }, + new VectorizationStep { Id = "index", Parameters = new Dictionary() { { "indexing_profile_name", indexingProfileName } } }, + ]; + var request = new VectorizationRequest + { + RemainingSteps = new List { "extract", "partition", "embed", "index" }, + CompletedSteps = new List(), + ProcessingType = VectorizationProcessingType.Synchronous, + ContentIdentifier = ci, + Id = id, + Steps = steps, + ObjectId = $"{VectorizationResourceTypeNames.VectorizationRequests}/{id}" + }; + //Create the vectorization request, re-assign the fully qualified object id if desired. + request.ObjectId = await _vectorizationTestService.CreateVectorizationRequest(request); + + WriteLine($"Verify the vectorization request {id} was created by retrieving it from the Management API"); + var resource = await _vectorizationTestService.GetVectorizationRequest(request); + if (resource == null) + throw new Exception("Vectorization request failed creation. Invalid result was returned."); + + WriteLine($"Issue the process action on the vectorization request: {id} via the Management API"); + var vectorizationResult = await _vectorizationTestService.ProcessVectorizationRequest(request); + + // Ensure the vectorization request was successful + if (vectorizationResult == null) + throw new Exception("Vectorization request failed to complete successfully. Invalid result was returned."); + + if (vectorizationResult.IsSuccess == false) + { + //retrieve more verbose error logging from resource.... + resource = await _vectorizationTestService.GetVectorizationRequest(request); + throw new Exception($"Vectorization request failed to complete successfully. Message(s):\n{string.Join("\n", resource.ErrorMessages)}"); + } + + WriteLine($"Vectorization request: {id} completed successfully."); + + WriteLine($"Verify a search yields 27 documents."); + TestSearchResult result = await _vectorizationTestService.QueryIndex(indexingProfileName, textEmbeddingProfileName, searchString); + if(result.QueryResult.TotalCount!=27) + throw new Exception($"Query did not return the expected number of query results. Expected: 27, Retrieved: {result.QueryResult.TotalCount}"); + if(result.VectorResults.TotalCount!=27) + throw new Exception($"Query did not return the expected number of vector results. Expected: 27, Retrieved: {result.VectorResults.TotalCount}"); + + WriteLine($"Delete the data source: {dataSourceName} via the Management API"); + await _vectorizationTestService.DeleteDataSource(dataSourceName); + + WriteLine($"Delete the vectorization text partitioning profile: {textPartitioningProfileName} via the Management API"); + await _vectorizationTestService.DeleteTextPartitioningProfile(textPartitioningProfileName); + + WriteLine($"Delete the vectorization text embedding profile: {textEmbeddingProfileName} via the Management API"); + await _vectorizationTestService.DeleteTextEmbeddingProfile(textEmbeddingProfileName); + + WriteLine($"Delete the vectorization indexing profile: {indexingProfileName} via the Management API along with the index"); + await _vectorizationTestService.DeleteIndexingProfile(indexingProfileName, true); + + } + } +} diff --git a/tests/dotnet/Core.Examples/Example0009_AsynchronousVectorizationOfPDFFromSharePoint.cs b/tests/dotnet/Core.Examples/Example0009_AsynchronousVectorizationOfPDFFromSharePoint.cs new file mode 100644 index 0000000000..07ea679a07 --- /dev/null +++ b/tests/dotnet/Core.Examples/Example0009_AsynchronousVectorizationOfPDFFromSharePoint.cs @@ -0,0 +1,168 @@ +using FoundationaLLM.Common.Constants.ResourceProviders; +using FoundationaLLM.Common.Models.Configuration.Instance; +using FoundationaLLM.Common.Models.ResourceProviders.Vectorization; +using FoundationaLLM.Common.Models.Vectorization; +using FoundationaLLM.Core.Examples.Interfaces; +using FoundationaLLM.Core.Examples.Models; +using FoundationaLLM.Core.Examples.Setup; +using Xunit.Abstractions; + +namespace FoundationaLLM.Core.Examples +{ + /// + /// Example class for running asynchronous vectorization over a PDF file in a SharePoint Online Site document library. + /// Expects the following configuration values: + /// FoundationaLLM:DataSources:sharepoint_fllm:ClientId + /// FoundationaLLM:DataSources:sharepoint_fllm:TenantId + /// FoundationaLLM:DataSources:sharepoint_fllm:CertificateName + /// FoundationaLLM:DataSources:sharepoint_fllm:KeyVaultURL + /// References: + /// PDF public source: https://sandiegozoowildlifealliance.org/Journal/january-2024 + /// + public class Example0009_AsynchronousVectorizationOfPDFFromSharePoint: BaseTest, IClassFixture + { + private readonly IVectorizationTestService _vectorizationTestService; + private InstanceSettings _instanceSettings; + private string dataSourceName = "sharepoint_fllm"; + private string dataSourceObjectId = String.Empty; + private string textPartitioningProfileName = "text_partition_profile"; + private string textEmbeddingProfileName = "text_embedding_profile_generic"; + private string indexingProfileName = "indexing_profile_pdf"; + private string searchString = "Kurt and Ollie"; + private string id = String.Empty; + private SharePointVectorizationConfiguration _sharePointVectorizationConfiguration; + + public Example0009_AsynchronousVectorizationOfPDFFromSharePoint(ITestOutputHelper output, TestFixture fixture) + : base(output, fixture.ServiceProvider) + { + _vectorizationTestService = GetService(); + _instanceSettings = _vectorizationTestService.InstanceSettings; + dataSourceObjectId = $"/instances/{_instanceSettings.Id}/providers/FoundationaLLM.DataSource/dataSources/{dataSourceName}"; + id = Guid.NewGuid().ToString(); + _sharePointVectorizationConfiguration = TestConfiguration.SharePointVectorizationConfiguration; + } + + [Fact] + public async Task RunAsync() + { + WriteLine("============ Asynchronous Vectorization of a PDF from SharePoint Online ============"); + await RunExampleAsync(); + } + + private async Task RunExampleAsync() + { + WriteLine($"Create the data source: {dataSourceName} via the Management API"); + await _vectorizationTestService.CreateDataSource(dataSourceName); + + Thread.Sleep(5000); // processing too quickly, pause after the creation of the data source + + WriteLine($"Create the vectorization text partitioning profile: {textPartitioningProfileName} via the Management API"); + await _vectorizationTestService.CreateTextPartitioningProfile(textPartitioningProfileName); + + WriteLine($"Create the vectorization text embedding profile: {textEmbeddingProfileName} via the Management API"); + await _vectorizationTestService.CreateTextEmbeddingProfile(textEmbeddingProfileName); + + WriteLine($"Create the vectorization indexing profile: {indexingProfileName} via the Management API"); + await _vectorizationTestService.CreateIndexingProfile(indexingProfileName); + + // used for defining the canonical id, optionally remove the file extension + var fileNameWithoutExtension = _sharePointVectorizationConfiguration.FileName.Substring(0, _sharePointVectorizationConfiguration.FileName.LastIndexOf('.')); + + ContentIdentifier ci = new ContentIdentifier + { + DataSourceObjectId = dataSourceObjectId, + MultipartId = new List + { + _sharePointVectorizationConfiguration.HostName, + _sharePointVectorizationConfiguration.SitePath, + _sharePointVectorizationConfiguration.FolderPath, + _sharePointVectorizationConfiguration.FileName + }, + CanonicalId = $"{_sharePointVectorizationConfiguration.FolderPath}/{fileNameWithoutExtension}" + }; + + WriteLine($"Create the vectorization request: {id} via the Management API"); + List steps = + [ + new VectorizationStep { Id = "extract", Parameters = new Dictionary() }, + new VectorizationStep { Id = "partition", Parameters = new Dictionary() { { "text_partitioning_profile_name", textPartitioningProfileName } } }, + new VectorizationStep { Id = "embed", Parameters = new Dictionary() { { "text_embedding_profile_name", textEmbeddingProfileName } } }, + new VectorizationStep { Id = "index", Parameters = new Dictionary() { { "indexing_profile_name", indexingProfileName } } }, + ]; + var request = new VectorizationRequest + { + RemainingSteps = new List { "extract", "partition", "embed", "index" }, + CompletedSteps = new List(), + ProcessingType = VectorizationProcessingType.Asynchronous, + ContentIdentifier = ci, + Id = id, + Steps = steps, + ObjectId = $"{VectorizationResourceTypeNames.VectorizationRequests}/{id}" + }; + //Create the vectorization request, re-assign the fully qualified object id if desired. + request.ObjectId = await _vectorizationTestService.CreateVectorizationRequest(request); + + WriteLine($"Verify the vectorization request {id} was created by retrieving it from the Management API"); + var resource = await _vectorizationTestService.GetVectorizationRequest(request); + if (resource == null) + throw new Exception("Vectorization request failed creation. Invalid result was returned."); + + WriteLine($"Issue the process action on the vectorization request: {id} via the Management API"); + var vectorizationResult = await _vectorizationTestService.ProcessVectorizationRequest(request); + + // Ensure the vectorization request was successful + if (vectorizationResult == null) + throw new Exception("Vectorization request failed to complete successfully. Invalid result was returned."); + + if (vectorizationResult.IsSuccess == false) + { + //retrieve more verbose error logging from resource.... + resource = await _vectorizationTestService.GetVectorizationRequest(request); + throw new Exception($"Vectorization request failed to complete successfully. Message(s):\n{string.Join("\n", resource.ErrorMessages)}"); + } + + WriteLine($"Get the initial processing state for the vectorization request: {id} via the Management API"); + //As this is an asynchronous request, poll the status of the vectorization request until it has completed (or failed). Retrieve initial state. + resource = await _vectorizationTestService.GetVectorizationRequest(request); + + // The finalized state of the vectorization request is either "Completed" or "Failed" + // Give it a max of 10 minutes to complete, then exit loop and fail the test. + WriteLine($"Polling the processing state of the async vectorization request: {id} by retrieving the request from the Management API"); + int timeRemainingMilliseconds = 600000; + var pollDurationMilliseconds = 30000; //poll duration of 30 seconds + while (resource.ProcessingState != VectorizationProcessingState.Completed && resource.ProcessingState != VectorizationProcessingState.Failed && timeRemainingMilliseconds > 0) + { + Thread.Sleep(pollDurationMilliseconds); + timeRemainingMilliseconds -= pollDurationMilliseconds; + resource = await _vectorizationTestService.GetVectorizationRequest(request); + } + + if (resource.ProcessingState == VectorizationProcessingState.Failed) + throw new Exception($"Vectorization request failed to complete successfully. Error Messages:\n{string.Join("\n", resource.ErrorMessages)}"); + + if (timeRemainingMilliseconds <= 0) + throw new Exception("Vectorization request failed to complete successfully. Timeout exceeded."); + + WriteLine($"Vectorization request: {id} completed successfully."); + + WriteLine($"Verify a search yields 27 documents."); + TestSearchResult result = await _vectorizationTestService.QueryIndex(indexingProfileName, textEmbeddingProfileName, searchString); + if (result.QueryResult.TotalCount != 27) + throw new Exception($"Query did not return the expected number of query results. Expected: 27, Retrieved: {result.QueryResult.TotalCount}"); + if (result.VectorResults.TotalCount != 27) + throw new Exception($"Query did not return the expected number of vector results. Expected: 27, Retrieved: {result.VectorResults.TotalCount}"); + + WriteLine($"Delete the data source: {dataSourceName} via the Management API"); + await _vectorizationTestService.DeleteDataSource(dataSourceName); + + WriteLine($"Delete the vectorization text partitioning profile: {textPartitioningProfileName} via the Management API"); + await _vectorizationTestService.DeleteTextPartitioningProfile(textPartitioningProfileName); + + WriteLine($"Delete the vectorization text embedding profile: {textEmbeddingProfileName} via the Management API"); + await _vectorizationTestService.DeleteTextEmbeddingProfile(textEmbeddingProfileName); + + WriteLine($"Delete the vectorization indexing profile: {indexingProfileName} via the Management API and delete the created index"); + await _vectorizationTestService.DeleteIndexingProfile(indexingProfileName, true); + } + } +} diff --git a/tests/dotnet/Core.Examples/Models/SharePointVectorizationConfiguration.cs b/tests/dotnet/Core.Examples/Models/SharePointVectorizationConfiguration.cs new file mode 100644 index 0000000000..834edfd1a8 --- /dev/null +++ b/tests/dotnet/Core.Examples/Models/SharePointVectorizationConfiguration.cs @@ -0,0 +1,28 @@ +namespace FoundationaLLM.Core.Examples.Models +{ + /// + /// SharePoint Vectorization testing configuration. + /// + public class SharePointVectorizationConfiguration + { + /// + /// Host name of the SharePoint site without the protocol, ex: fllm.sharepoint.com. + /// + public string HostName { get; set; } = string.Empty; + + /// + /// Relative path of the site/subsite, ex: /sites/FoundationaLLM. + /// + public string SitePath { get; set; } = string.Empty; + + /// + /// The folder path, starting with the document library. + /// + public string FolderPath { get; set; } = string.Empty; + + /// + /// The file name of the document to vectorize. + /// + public string FileName { get; set; } = string.Empty; + } +} diff --git a/tests/dotnet/Core.Examples/README.md b/tests/dotnet/Core.Examples/README.md index e4aa99dadc..2e2fb36735 100644 --- a/tests/dotnet/Core.Examples/README.md +++ b/tests/dotnet/Core.Examples/README.md @@ -31,6 +31,234 @@ You will see an output similar to the following after the test is completed: ![The completed test is displayed.](media/example-1-completed-test.png) + +### Example 4: Synchronous vectorization of a file located in Azure Data Lake Storage Gen2 + +**Purpose**: Run synchronous vectorization of a file located in Azure Data Lake Storage Gen2. + +**File**: [Example0004_SynchronousVectorizationOfPDFFromDataLake.cs](Example0004_SynchronousVectorizationOfPDFFromDataLake.cs) + +This example demonstrates a synchronous vectorization request for a file located in a storage account. + +#### Setup + +This example expects the following file named [`SDZWA-Journal-January-2024.pdf`](https://sandiegozoowildlifealliance.org/Journal/january-2024) to be located `vectorization-input` container in the data lake storage account created with the FoundationaLLM deployment. + +##### App Config settings +| Key | Value | Description | +| --- | --- | --- | +| `FoundationaLLM:DataSources:datalake_vectorization_input:AuthenticationType` | `AzureIdentity` | The authentication method for the vectorization api and vectorization job managed identities. | +| `FoundationaLLM:DataSources:datalake_vectorization_input:AccountName` | N/A | Account name of the storage account. | + +#### Running the example + +Run the example by running a test on the `Example0004_SynchronousVectorizationOfPDFFromDataLake.cs` file. You can run the test using the Visual Studio Test Explorer, the command line, or by simply right-clicking anywhere on the `Example0004_SynchronousVectorizationOfPDFFromDataLake.cs` file and selecting **Run Tests**. + +You will see an output similar to the following after the test is completed: + +```text +============ Synchronous Vectorization of a PDF from Data Lake ============ +Create the data source: datalake_vectorization_input via the Management API +Create the vectorization text partitioning profile: text_partition_profile via the Management API +Create the vectorization text embedding profile: text_embedding_profile_generic via the Management API +Create the vectorization indexing profile: indexing_profile_pdf via the Management API +Create the vectorization request: ab74c501-6e49-41ac-95bf-7284174564c8 via the Management API +Verify the vectorization request ab74c501-6e49-41ac-95bf-7284174564c8 was created by retrieving it from the Management API +Issue the process action on the vectorization request: ab74c501-6e49-41ac-95bf-7284174564c8 via the Management API +Vectorization request: ab74c501-6e49-41ac-95bf-7284174564c8 completed successfully. +Verify a search yields 27 documents. +Delete the data source: datalake_vectorization_input via the Management API +Delete the vectorization text partitioning profile: text_partition_profile via the Management API +Delete the vectorization text embedding profile: text_embedding_profile_generic via the Management API +Delete the vectorization indexing profile: indexing_profile_pdf via the Management API and delete the created index +``` + +### Example 5: Asynchronous vectorization of a file located in Azure Data Lake Storage Gen2 + +**Purpose**: Run synchronous vectorization of a file located in Azure Data Lake Storage Gen2. + +**File**: [Example0005_AsynchronousVectorizationOfPDFFromDataLake.cs](Example0005_AsynchronousVectorizationOfPDFFromDataLake.cs) + +This example demonstrates a synchronous vectorization request for a file located in a storage account. + +#### Setup + +This example expects the following file named [`SDZWA-Journal-January-2024.pdf`](https://sandiegozoowildlifealliance.org/Journal/january-2024) to be located `vectorization-input` container in the data lake storage account created with the FoundationaLLM deployment. + +##### App Config settings +| Key | Value | Description | +| --- | --- | --- | +| `FoundationaLLM:DataSources:datalake_vectorization_input:AuthenticationType` | `AzureIdentity` | The authentication method for the vectorization api and vectorization job managed identities. | +| `FoundationaLLM:DataSources:datalake_vectorization_input:AccountName` | N/A | Account name of the storage account. | + +#### Running the example + +Run the example by running a test on the `Example0005_AsynchronousVectorizationOfPDFFromDataLake.cs` file. You can run the test using the Visual Studio Test Explorer, the command line, or by simply right-clicking anywhere on the `Example0005_AsynchronousVectorizationOfPDFFromDataLake.cs` file and selecting **Run Tests**. + +You will see an output similar to the following after the test is completed: + +```text +============ Asynchronous Vectorization of a PDF from Data Lake ============ +Create the data source: datalake_vectorization_input via the Management API +Create the vectorization text partitioning profile: text_partition_profile via the Management API +Create the vectorization text embedding profile: text_embedding_profile_generic via the Management API +Create the vectorization indexing profile: indexing_profile_pdf via the Management API +Create the vectorization request: 912e93ba-f5cb-4398-9ab6-c13f986269a2 via the Management API +Verify the vectorization request 912e93ba-f5cb-4398-9ab6-c13f986269a2 was created by retrieving it from the Management API +Issue the process action on the vectorization request: 912e93ba-f5cb-4398-9ab6-c13f986269a2 via the Management API +Get the initial processing state for the vectorization request: 912e93ba-f5cb-4398-9ab6-c13f986269a2 via the Management API +Polling the processing state of the async vectorization request: 912e93ba-f5cb-4398-9ab6-c13f986269a2 by retrieving the request from the Management API +Vectorization request: 912e93ba-f5cb-4398-9ab6-c13f986269a2 completed successfully. +Verify a search yields 27 documents. +Delete the data source: datalake_vectorization_input via the Management API +Delete the vectorization text partitioning profile: text_partition_profile via the Management API +Delete the vectorization text embedding profile: text_embedding_profile_generic via the Management API +Delete the vectorization indexing profile: indexing_profile_pdf via the Management API along with the index +``` + +### Example 8: Synchronous vectorization of a file located in SharePoint Online + +**Purpose**: Run synchronous vectorization of a file located SharePoint Online. + +**File**: [Example0008_SynchronousVectorizationOfPDFFromSharePoint.cs](Example0008_SynchronousVectorizationOfPDFFromSharePoint.cs) + +This example demonstrates a synchronous vectorization request for a file located in SharePoint Online. + +#### Setup + +This example expects a service principal to be created using the following guidance: [Create a service principal with access to SharePoint Online](https://learn.microsoft.com/en-us/azure/data-factory/connector-sharepoint-online-list?tabs=data-factory#prerequisites) REFER TO THE **Prerequisites** SECTION ONLY. + +The certificate used to authenticate the service principal needs to be uploaded to the Azure Key Vault. + +This example expects the following file named [`SDZWA-Journal-January-2024.pdf`](https://sandiegozoowildlifealliance.org/Journal/january-2024) to be located in a SharePoint Online site. + +##### App Config settings +| Key | Value | Description | +| --- | --- | --- | +| `FoundationaLLM:DataSources:sharepoint_fllm:ClientId` | N/A | The ClientId of the service principal accessing SharePoint Online. | +| `FoundationaLLM:DataSources:sharepoint_fllm:TenantId` | N/A | The TenantId of the serivce principal accessing SharePoint Online. | +| `FoundationaLLM:DataSources:sharepoint_fllm:CertificateName` | N/A | The name of the certificate in the Azure Key Vault used to authenticate the service principal. | +| `FoundationaLLM:DataSources:sharepoint_fllm:KeyVaultURL` | N/A | The URL of the Azure Key Vault where the certificate is stored. | + + +##### `testsettings.json` settings + +The test settings file provides information to the vectorization service about the location and file name of the document to vectorize. The following is an example of the `testsettings.json` file for this example: + +```json +{ + "SharePointVectorizationConfiguration": { + "HostName": "fllm.sharepoint.com", + "SitePath": "sites/FoundationaLLM", + "FolderPath": "SDZWA/Journals", + "FileName": "SDZWA-Journal-January-2024.pdf" + } +} +``` + +Property definitions: + +- `HostName`: Host name of the SharePoint site without the protocol, ex: `fllm.sharepoint.com`. +- `SitePath`: The relative path of the site/subsite, ex: `/sites/FoundationaLLM`. +- `FolderPath`: The folder path, starting with the document library, ex: `SDZWA/Journals`. +- `FileName`: The file name of the document to vectorize. + +#### Running the example + +Run the example by running a test on the `Example0008_SynchronousVectorizationOfPDFFromSharePoint.cs` file. You can run the test using the Visual Studio Test Explorer, the command line, or by simply right-clicking anywhere on the `Example0008_SynchronousVectorizationOfPDFFromSharePoint.cs` file and selecting **Run Tests**. + +You will see an output similar to the following after the test is completed: + +```text +============ Synchronous Vectorization of a PDF from SharePoint Online ============ +Create the data source: sharepoint_fllm via the Management API +Create the vectorization text partitioning profile: text_partition_profile via the Management API +Create the vectorization text embedding profile: text_embedding_profile_generic via the Management API +Create the vectorization indexing profile: indexing_profile_pdf via the Management API +Create the vectorization request: 01f507af-7780-4c5e-b8e8-198e7ea6fcb0 via the Management API +Verify the vectorization request 01f507af-7780-4c5e-b8e8-198e7ea6fcb0 was created by retrieving it from the Management API +Issue the process action on the vectorization request: 01f507af-7780-4c5e-b8e8-198e7ea6fcb0 via the Management API +Vectorization request: 01f507af-7780-4c5e-b8e8-198e7ea6fcb0 completed successfully. +Verify a search yields 27 documents. +Delete the data source: sharepoint_fllm via the Management API +Delete the vectorization text partitioning profile: text_partition_profile via the Management API +Delete the vectorization text embedding profile: text_embedding_profile_generic via the Management API +Delete the vectorization indexing profile: indexing_profile_pdf via the Management API along with the index +``` + +### Example 9: Asynchronous vectorization of a file located in SharePoint Online + +**Purpose**: Run asynchronous vectorization of a file located SharePoint Online. + +**File**: [Example0009_AsynchronousVectorizationOfPDFFromSharePoint.cs](Example0009_AsynchronousVectorizationOfPDFFromSharePoint.cs) + +This example demonstrates an asynchronous vectorization request for a file located in SharePoint Online. + +#### Setup + +This example expects a service principal to be created using the following guidance: [Create a service principal with access to SharePoint Online](https://learn.microsoft.com/en-us/azure/data-factory/connector-sharepoint-online-list?tabs=data-factory#prerequisites) REFER TO THE **Prerequisites** SECTION ONLY. + +The certificate used to authenticate the service principal needs to be uploaded to the Azure Key Vault. + +This example expects the following file named [`SDZWA-Journal-January-2024.pdf`](https://sandiegozoowildlifealliance.org/Journal/january-2024) to be located in a SharePoint Online site. + +##### App Config settings +| Key | Value | Description | +| --- | --- | --- | +| `FoundationaLLM:DataSources:sharepoint_fllm:ClientId` | N/A | The ClientId of the service principal accessing SharePoint Online. | +| `FoundationaLLM:DataSources:sharepoint_fllm:TenantId` | N/A | The TenantId of the serivce principal accessing SharePoint Online. | +| `FoundationaLLM:DataSources:sharepoint_fllm:CertificateName` | N/A | The name of the certificate in the Azure Key Vault used to authenticate the service principal. | +| `FoundationaLLM:DataSources:sharepoint_fllm:KeyVaultURL` | N/A | The URL of the Azure Key Vault where the certificate is stored. | + + +##### `testsettings.json` settings + +The test settings file provides information to the vectorization service about the location and file name of the document to vectorize. The following is an example of the `testsettings.json` file for this example: + +```json +{ + "SharePointVectorizationConfiguration": { + "HostName": "fllm.sharepoint.com", + "SitePath": "sites/FoundationaLLM", + "FolderPath": "SDZWA/Journals", + "FileName": "SDZWA-Journal-January-2024.pdf" + } +} +``` + +Property definitions: + +- `HostName`: Host name of the SharePoint site without the protocol, ex: `fllm.sharepoint.com`. +- `SitePath`: The relative path of the site/subsite, ex: `/sites/FoundationaLLM`. +- `FolderPath`: The folder path, starting with the document library, ex: `SDZWA/Journals`. +- `FileName`: The file name of the document to vectorize. + +#### Running the example + +Run the example by running a test on the `Example0009_AsynchronousVectorizationOfPDFFromSharePoint.cs` file. You can run the test using the Visual Studio Test Explorer, the command line, or by simply right-clicking anywhere on the `Example0009_AsynchronousVectorizationOfPDFFromSharePoint.cs` file and selecting **Run Tests**. + +You will see an output similar to the following after the test is completed: + +```text +============ Asynchronous Vectorization of a PDF from SharePoint Online ============ +Create the data source: sharepoint_fllm via the Management API +Create the vectorization text partitioning profile: text_partition_profile via the Management API +Create the vectorization text embedding profile: text_embedding_profile_generic via the Management API +Create the vectorization indexing profile: indexing_profile_pdf via the Management API +Create the vectorization request: 2b88e6d2-a51c-4e38-b5d7-e1a7f72cb694 via the Management API +Verify the vectorization request 2b88e6d2-a51c-4e38-b5d7-e1a7f72cb694 was created by retrieving it from the Management API +Issue the process action on the vectorization request: 2b88e6d2-a51c-4e38-b5d7-e1a7f72cb694 via the Management API +Get the initial processing state for the vectorization request: 2b88e6d2-a51c-4e38-b5d7-e1a7f72cb694 via the Management API +Polling the processing state of the async vectorization request: 2b88e6d2-a51c-4e38-b5d7-e1a7f72cb694 by retrieving the request from the Management API +Vectorization request: 2b88e6d2-a51c-4e38-b5d7-e1a7f72cb694 completed successfully. +Verify a search yields 27 documents. +Delete the data source: sharepoint_fllm via the Management API +Delete the vectorization text partitioning profile: text_partition_profile via the Management API +Delete the vectorization text embedding profile: text_embedding_profile_generic via the Management API +Delete the vectorization indexing profile: indexing_profile_pdf via the Management API and delete the created index +``` + + ### Example 16: Completion quality measurements with Azure AI Studio **Purpose**: Verify that the completion quality measurements can be completed successfully with Azure AI Studio. diff --git a/tests/dotnet/Core.Examples/Services/ManagementAPITestManager.cs b/tests/dotnet/Core.Examples/Services/ManagementAPITestManager.cs index d1491898c4..c04dd7e290 100644 --- a/tests/dotnet/Core.Examples/Services/ManagementAPITestManager.cs +++ b/tests/dotnet/Core.Examples/Services/ManagementAPITestManager.cs @@ -135,7 +135,7 @@ public async Task ProcessVectorizationRequestAsync(Vectoriz var processResult = JsonSerializer.Deserialize(responseContent, _jsonSerializerOptions); return processResult!; } - throw new FoundationaLLMException($"Failed to upsert resource. Status code: {response.StatusCode}. Reason: {response.ReasonPhrase}"); + throw new FoundationaLLMException($"Failed to process vectorization request. Status code: {response.StatusCode}. Reason: {response.ReasonPhrase}"); } public async Task DeleteVectorizationRequest(VectorizationRequest vectorizationRequest) diff --git a/tests/dotnet/Core.Examples/Setup/TestConfiguration.cs b/tests/dotnet/Core.Examples/Setup/TestConfiguration.cs index ce27ab38e5..5357a50e1d 100644 --- a/tests/dotnet/Core.Examples/Setup/TestConfiguration.cs +++ b/tests/dotnet/Core.Examples/Setup/TestConfiguration.cs @@ -21,6 +21,7 @@ public sealed class TestConfiguration private static ConfigurationClient? _client; private readonly ChainedTokenCredential _tokenCredential; public static CompletionQualityMeasurementConfiguration CompletionQualityMeasurementConfiguration => LoadSection(); + public static SharePointVectorizationConfiguration SharePointVectorizationConfiguration => LoadSection(); private TestConfiguration(IConfigurationRoot configRoot) {