-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #500 from solliancenet/cj-vectorization-indexing
Vectorization indexing improvements
- Loading branch information
Showing
81 changed files
with
1,420 additions
and
249 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
using FoundationaLLM.Common.Models.TextEmbedding; | ||
|
||
namespace FoundationaLLM.Common.Interfaces | ||
{ | ||
/// <summary> | ||
/// Provides indexing capabilities for embedding vectors. | ||
/// </summary> | ||
public interface IIndexingService | ||
{ | ||
/// <summary> | ||
/// Adds to a specified index the list of embeddings associated with a content. | ||
/// </summary> | ||
/// <param name="embeddedContent">The <see cref="EmbeddedContent"/> containind the embeddings to index.</param> | ||
/// <param name="indexName">The name of the index.</param> | ||
/// <returns></returns> | ||
Task<List<string>> IndexEmbeddingsAsync(EmbeddedContent embeddedContent, string indexName); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
using FoundationaLLM.Common.Models.TextEmbedding; | ||
|
||
namespace FoundationaLLM.Common.Interfaces | ||
{ | ||
/// <summary> | ||
/// Provides text embedding capabilities. | ||
/// </summary> | ||
public interface ITextEmbeddingService | ||
{ | ||
/// <summary> | ||
/// Creates the vector embedding for a specified text. | ||
/// </summary> | ||
/// <param name="text">The text which needs to be embedded.</param> | ||
/// <returns>Response containing the vector embedding and the amount of tokens used.</returns> | ||
Task<(Embedding Embedding, int TokenCount)> GetEmbeddingAsync(string text); | ||
|
||
/// <summary> | ||
/// Creates the vector embeddings for a specified list of texts. | ||
/// </summary> | ||
/// <param name="texts">The list of texts which need to be embedded.</param> | ||
/// <returns>Response containing the list of vector embeddings and the amount of tokens used.</returns> | ||
Task<(IList<Embedding> Embeddings, int TokenCount)> GetEmbeddingsAsync(IList<string> texts); | ||
} | ||
} |
31 changes: 31 additions & 0 deletions
31
src/dotnet/Common/Models/TextEmbedding/ContentIdentifier.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
using System.Text.Json.Serialization; | ||
|
||
namespace FoundationaLLM.Common.Models.TextEmbedding; | ||
|
||
/// <summary> | ||
/// Represents the content associated with a vectorization request. | ||
/// </summary> | ||
public class ContentIdentifier | ||
{ | ||
/// <summary> | ||
/// The multipart unique identifier of the the content (i.e. document) being vectorized. | ||
/// </summary> | ||
[JsonPropertyOrder(1)] | ||
[JsonPropertyName("multipart_id")] | ||
public required List<string> MultipartId { get; set; } | ||
|
||
/// <summary> | ||
/// The unique identifier of the content (i.e., document) being vectorized. | ||
/// The identifier is determined by concatenating the parts from <see cref="MultipartId"/>. | ||
/// </summary> | ||
[JsonIgnore] | ||
public string UniqueId => string.Join("/", MultipartId); | ||
|
||
/// <summary> | ||
/// The canonical identifier of the content being vectorized. | ||
/// Vectorization state services use it to derive the location of the state in the underlying storage. | ||
/// </summary> | ||
[JsonPropertyOrder(2)] | ||
[JsonPropertyName("canonical_id")] | ||
public required string CanonicalId { get; set; } | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Text; | ||
using System.Threading.Tasks; | ||
|
||
namespace FoundationaLLM.Common.Models.TextEmbedding | ||
{ | ||
/// <summary> | ||
/// Provides information about embedded content. | ||
/// </summary> | ||
public class EmbeddedContent | ||
{ | ||
/// <summary> | ||
/// The canonical identifier of the content. | ||
/// </summary> | ||
public required ContentIdentifier ContentId { get; set; } | ||
|
||
/// <summary> | ||
/// The name of the content source profile used to retrieve content. | ||
/// </summary> | ||
public required string ContentSourceProfileName { get; set; } | ||
|
||
/// <summary> | ||
/// The list of conent | ||
/// </summary> | ||
public required List<EmbeddedContentPart> ContentParts { get; set; } = []; | ||
} | ||
} |
24 changes: 24 additions & 0 deletions
24
src/dotnet/Common/Models/TextEmbedding/EmbeddedContentPart.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Text; | ||
using System.Threading.Tasks; | ||
|
||
namespace FoundationaLLM.Common.Models.TextEmbedding | ||
{ | ||
/// <summary> | ||
/// Provides information about an embedded content part. | ||
/// </summary> | ||
public class EmbeddedContentPart | ||
{ | ||
/// <summary> | ||
/// The text content that was embedded. | ||
/// </summary> | ||
public required string Content { get; set; } | ||
|
||
/// <summary> | ||
/// The vector embedding associated with the content. | ||
/// </summary> | ||
public required Embedding Embedding { get; set; } | ||
} | ||
} |
Oops, something went wrong.