Skip to content

Commit

Permalink
CBL-5500 : Implement Vector Search API (#527)
Browse files Browse the repository at this point in the history
* Implemented Vector Search API.

* Added a sanity test but disabled (Need to implement structure for automated testing).
  • Loading branch information
pasin authored Mar 22, 2024
1 parent 2a0584e commit 2def36d
Show file tree
Hide file tree
Showing 25 changed files with 722 additions and 9 deletions.
32 changes: 30 additions & 2 deletions CBL_C.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@
4022546E29355577000FBAC8 /* assets in Resources */ = {isa = PBXBuildFile; fileRef = 4022546D29355576000FBAC8 /* assets */; };
4022546F29355577000FBAC8 /* assets in Resources */ = {isa = PBXBuildFile; fileRef = 4022546D29355576000FBAC8 /* assets */; };
4022547029355577000FBAC8 /* assets in Resources */ = {isa = PBXBuildFile; fileRef = 4022546D29355576000FBAC8 /* assets */; };
406E46E42BACC4BF0088198C /* VectorSearchTest.cc in Sources */ = {isa = PBXBuildFile; fileRef = 406E46D22BACAEFF0088198C /* VectorSearchTest.cc */; };
4083FCAF2BA3B8B00061509D /* CBLVectorIndexConfig_CAPI.cc in Sources */ = {isa = PBXBuildFile; fileRef = 4083FCAE2BA3B8B00061509D /* CBLVectorIndexConfig_CAPI.cc */; };
4083FCB02BA3B8C50061509D /* CBLVectorIndexConfig.hh in Headers */ = {isa = PBXBuildFile; fileRef = 4083FC9F2BA3A4390061509D /* CBLVectorIndexConfig.hh */; };
4083FCBC2BA8DE200061509D /* CBLPrediction_CAPI.cc in Sources */ = {isa = PBXBuildFile; fileRef = 4083FCBB2BA8DE200061509D /* CBLPrediction_CAPI.cc */; };
4083FCC02BAA235F0061509D /* CBLPrediction_Internal.hh in Headers */ = {isa = PBXBuildFile; fileRef = 4083FCBF2BAA203A0061509D /* CBLPrediction_Internal.hh */; };
4083FCC22BAA38F30061509D /* CBLPrediction.cc in Sources */ = {isa = PBXBuildFile; fileRef = 4083FCC12BAA38F30061509D /* CBLPrediction.cc */; };
40D1862529B6D1A50061AA85 /* Collection.hh in Headers */ = {isa = PBXBuildFile; fileRef = FCC064BD287CBD95000C5BD7 /* Collection.hh */; };
40F902D82B9F7B12002EA0A0 /* PrivacyInfo.xcprivacy in Resources */ = {isa = PBXBuildFile; fileRef = 40F902C82B9F7AF6002EA0A0 /* PrivacyInfo.xcprivacy */; };
42D1B69D2978AD31003B9871 /* CBLUserAgent.mm in Sources */ = {isa = PBXBuildFile; fileRef = 42D1B68F2978AD31003B9871 /* CBLUserAgent.mm */; };
Expand Down Expand Up @@ -464,6 +470,14 @@
27DBD096246C99AF002FD7A7 /* mergeIntoStaticLib.sh */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; path = mergeIntoStaticLib.sh; sourceTree = "<group>"; };
27DBD097246C9DE7002FD7A7 /* CBLDatabase+Apple.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = "CBLDatabase+Apple.mm"; sourceTree = "<group>"; };
4022546D29355576000FBAC8 /* assets */ = {isa = PBXFileReference; lastKnownFileType = folder; path = assets; sourceTree = "<group>"; };
406E46D22BACAEFF0088198C /* VectorSearchTest.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = VectorSearchTest.cc; sourceTree = "<group>"; };
406E46E72BAD3BB30088198C /* cmake */ = {isa = PBXFileReference; lastKnownFileType = folder; path = cmake; sourceTree = "<group>"; };
4083FC9F2BA3A4390061509D /* CBLVectorIndexConfig.hh */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = CBLVectorIndexConfig.hh; sourceTree = "<group>"; };
4083FCAE2BA3B8B00061509D /* CBLVectorIndexConfig_CAPI.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = CBLVectorIndexConfig_CAPI.cc; sourceTree = "<group>"; };
4083FCB62BA3F6930061509D /* CBLPrediction.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = CBLPrediction.h; sourceTree = "<group>"; };
4083FCBB2BA8DE200061509D /* CBLPrediction_CAPI.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = CBLPrediction_CAPI.cc; sourceTree = "<group>"; };
4083FCBF2BAA203A0061509D /* CBLPrediction_Internal.hh */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = CBLPrediction_Internal.hh; sourceTree = "<group>"; };
4083FCC12BAA38F30061509D /* CBLPrediction.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = CBLPrediction.cc; sourceTree = "<group>"; };
40F902C82B9F7AF6002EA0A0 /* PrivacyInfo.xcprivacy */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xml; path = PrivacyInfo.xcprivacy; sourceTree = "<group>"; };
42D1B68F2978AD31003B9871 /* CBLUserAgent.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = CBLUserAgent.mm; sourceTree = "<group>"; };
932062DA26BC6B43006917A5 /* CBLQuery.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = CBLQuery.cc; sourceTree = "<group>"; };
Expand Down Expand Up @@ -633,6 +647,7 @@
93C70CD226C4D3F20093E927 /* CBLEncryptable.h */,
277B77C6245B44BE00B222D3 /* CBLLog.h */,
9320630126BDB340006917A5 /* CBLPlatform.h */,
4083FCB62BA3F6930061509D /* CBLPrediction.h */,
271C2A3021CAC98F0045856E /* CBLQuery.h */,
271C2A2C21CAC98F0045856E /* CBLReplicator.h */,
FCD829B12835ECE0004AA814 /* CBLScope.h */,
Expand All @@ -654,12 +669,15 @@
271C2A7721CC750E0045856E /* CBLDocument.cc */,
277FEE7A21ED6C0000B60E3C /* CBLDocument_Internal.hh */,
93EC365D26C498AB00182B02 /* CBLEncryptable_Internal.hh */,
277FEE7621ED62AA00B60E3C /* CBLReplicatorConfig.hh */,
934AD381270E797D0038D62E /* CBLLog_Internal.hh */,
277B77D4245B44E900B222D3 /* CBLLog.cc */,
4083FCBF2BAA203A0061509D /* CBLPrediction_Internal.hh */,
4083FCC12BAA38F30061509D /* CBLPrediction.cc */,
932062DA26BC6B43006917A5 /* CBLQuery.cc */,
27DBCF2E246B4352002FD7A7 /* CBLQuery_Internal.hh */,
4083FC9F2BA3A4390061509D /* CBLVectorIndexConfig.hh */,
27D11BFF235140E300C58A70 /* CBLReplicator_Internal.hh */,
277FEE7621ED62AA00B60E3C /* CBLReplicatorConfig.hh */,
FCC063C828588DA6000C5BD7 /* CBLScope.cc */,
FCD8299C2835AC20004AA814 /* CBLScope_Internal.hh */,
AE591AB729473A4400E4BDE8 /* CBLUserAgent.hh */,
Expand Down Expand Up @@ -729,9 +747,11 @@
276634182605338300B9BD36 /* CBLDocument_CAPI.cc */,
93C70CE226C5B4BC0093E927 /* CBLEncryptable_CAPI.cc */,
9320631126BDB5CA006917A5 /* CBLPlatform_CAPI+Android.cc */,
4083FCBB2BA8DE200061509D /* CBLPrediction_CAPI.cc */,
27B61D5521D5ABA60027CCDB /* CBLQuery_CAPI.cc */,
277FEE7421ED3C4900B60E3C /* CBLReplicator_CAPI.cc */,
FCD829B22835EE39004AA814 /* CBLScope_CAPI.cc */,
4083FCAE2BA3B8B00061509D /* CBLVectorIndexConfig_CAPI.cc */,
);
name = "C Glue";
sourceTree = "<group>";
Expand All @@ -758,6 +778,7 @@
FC645E1F29085CE5007D5536 /* supports */,
27B61D6821D6B60D0027CCDB /* CBLTest.hh */,
27B61D6921D6B60D0027CCDB /* CBLTest.cc */,
27C9B5F221F7EE670040BC45 /* CBLTest.c */,
27A63F252633408000634F7B /* CBLTest_Cpp.hh */,
275B3593234BDB2700FE9CF0 /* CBLTestsMain.cpp */,
93C70D3326D01B5A0093E927 /* BlobTest.cc */,
Expand All @@ -778,11 +799,12 @@
FC82CE0428C6E2BD001FA083 /* ReplicatorCollectionTest_Cpp.cc */,
2736A633242E5A74002B9D65 /* ReplicatorEETest.cc */,
93C70D1626CB334D0093E927 /* ReplicatorPropEncTest.cc */,
406E46D22BACAEFF0088198C /* VectorSearchTest.cc */,
27D30F9123A2D30500392107 /* PerfTest.cc */,
27C9B5F221F7EE670040BC45 /* CBLTest.c */,
275B3597234C158400FE9CF0 /* CouchbaseLiteTests.mm */,
27DBCF41246B81EE002FD7A7 /* LibC++Debug.cc */,
27B61DD421DEE5DC0027CCDB /* CMakeLists.txt */,
406E46E72BAD3BB30088198C /* cmake */,
4022546D29355576000FBAC8 /* assets */,
);
path = test;
Expand Down Expand Up @@ -884,12 +906,14 @@
files = (
277FEE7821ED62AA00B60E3C /* CBLReplicatorConfig.hh in Headers */,
9320630F26BDB408006917A5 /* CBLPlatform.h in Headers */,
4083FCB02BA3B8C50061509D /* CBLVectorIndexConfig.hh in Headers */,
27DBCF2F246B4352002FD7A7 /* CBLQuery_Internal.hh in Headers */,
27D11BEF2351043B00C58A70 /* ConflictResolver.hh in Headers */,
FC5FBBA62821B3450066157F /* CBLCollection_Internal.hh in Headers */,
93C70CE026C4D3F80093E927 /* CBLEncryptable.h in Headers */,
271C2A3321CAC98F0045856E /* CBLDocument.h in Headers */,
FCD988BB2821B10300512BBD /* CBLCollection.h in Headers */,
4083FCC02BAA235F0061509D /* CBLPrediction_Internal.hh in Headers */,
FCD8299D2835AC3F004AA814 /* CBLScope_Internal.hh in Headers */,
277B77D3245B44BE00B222D3 /* CBLLog.h in Headers */,
FC5FBBB72821E4970066157F /* CBLDatabase_Internal.hh in Headers */,
Expand Down Expand Up @@ -1506,14 +1530,17 @@
27DBD098246C9DE7002FD7A7 /* CBLDatabase+Apple.mm in Sources */,
FC5FBBB52821CC2E0066157F /* CBLCollection_CAPI.cc in Sources */,
275BC4DE2201323700DBE7D2 /* CBLBlob_CAPI.cc in Sources */,
4083FCAF2BA3B8B00061509D /* CBLVectorIndexConfig_CAPI.cc in Sources */,
271C2A7821CC750E0045856E /* CBLDocument.cc in Sources */,
4083FCBC2BA8DE200061509D /* CBLPrediction_CAPI.cc in Sources */,
276634192605338300B9BD36 /* CBLDocument_CAPI.cc in Sources */,
27B61D5621D5ABA60027CCDB /* CBLQuery_CAPI.cc in Sources */,
277FEE7521ED3C4900B60E3C /* CBLReplicator_CAPI.cc in Sources */,
FCD829B32835EE39004AA814 /* CBLScope_CAPI.cc in Sources */,
27D11BF02351043B00C58A70 /* ConflictResolver.cc in Sources */,
27886C8E21F64C1400069BEA /* Listener.cc in Sources */,
FCB96E8329007D37001C4DED /* CBLDefaults_CAPI.cc in Sources */,
4083FCC22BAA38F30061509D /* CBLPrediction.cc in Sources */,
271C2A7621CC4BD60045856E /* Internal.cc in Sources */,
FCC063C928588DA6000C5BD7 /* CBLScope.cc in Sources */,
FC5FBBA52821B3450066157F /* CBLCollection.cc in Sources */,
Expand Down Expand Up @@ -1591,6 +1618,7 @@
FC09078A28A5F40200201B07 /* CollectionTest_Cpp.cc in Sources */,
27D30F9F23A2D30500392107 /* PerfTest.cc in Sources */,
FC82CE1328C6E2FD001FA083 /* ReplicatorCollectionTest_Cpp.cc in Sources */,
406E46E42BACC4BF0088198C /* VectorSearchTest.cc in Sources */,
27DBCF42246B81EE002FD7A7 /* LibC++Debug.cc in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
Expand Down
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,14 @@ set(
src/CBLDocument_CAPI.cc
src/CBLEncryptable_CAPI.cc
src/CBLLog.cc
src/CBLPrediction.cc
src/CBLPrediction_CAPI.cc
src/CBLQuery.cc
src/CBLQuery_CAPI.cc
src/CBLReplicator_CAPI.cc
src/CBLScope.cc
src/CBLScope_CAPI.cc
src/CBLVectorIndexConfig_CAPI.cc
src/ConflictResolver.cc
src/Internal.cc
src/Listener.cc
Expand Down
14 changes: 14 additions & 0 deletions include/cbl/CBLCollection.h
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,20 @@ bool CBLCollection_CreateFullTextIndex(CBLCollection *collection,
CBLFullTextIndexConfiguration config,
CBLError* _cbl_nullable outError) CBLAPI;

#ifdef COUCHBASE_ENTERPRISE
/** ENTERPRISE EDITION ONLY
Creatres a vector index in the collection.
If an identical index with that name already exists, nothing happens (and no error is returned.)
If a non-identical index with that name already exists, it is deleted and re-created.
*/
bool CBLCollection_CreateVectorIndex(CBLCollection *collection,
FLString name,
CBLVectorIndexConfiguration config,
CBLError* _cbl_nullable outError) CBLAPI;

#endif

/** Deletes an index in the collection by name.
@param collection The collection.
@param name The name of the index.
Expand Down
19 changes: 19 additions & 0 deletions include/cbl/CBLDatabase.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,25 @@ CBL_CAPI_BEGIN
A \ref CBLDatabase is both a filesystem object and a container for documents.
*/

#ifdef COUCHBASE_ENTERPRISE

#ifdef __APPLE__
#pragma mark - Database Extension
#endif

/** \name Database Extension
@{ */

/** ENTERPRISE EDITION ONLY
Registers a directory path to load extension libraries from, such as Vector Search.
Must be called before opening a database that will use an extension. */
void CBL_SetExtensionPath(FLString path) CBLAPI;

/** @} */

#endif

#ifdef __APPLE__
#pragma mark - CONFIGURATION
#endif
Expand Down
52 changes: 52 additions & 0 deletions include/cbl/CBLPrediction.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
//
// CBLPrediction.h
//
// Copyright (c) 2024 Couchbase, Inc All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

#pragma once
#include "CBLBase.h"

#ifdef COUCHBASE_ENTERPRISE

CBL_CAPI_BEGIN

/** Predictive Model */
typedef struct {
/** A pointer to any external data needed by the `prediction` callback, which will receive this as its first parameter. */
void* _cbl_nullable context;

/** Called from within a query (or document indexing) to run the prediction.
@param context The value of the CBLPredictiveModel's `context` field.
@param input The input dictionary from the query.
@return The output dictionary of the prediction function or NULL if there is no output.
@note The output dictionary will be automatically released after it's being consumed.
@warning This function must be "pure": given the same input parameters it must always
produce the same output (otherwise indexes or queries may be messed up).
It MUST NOT alter the database or any documents, nor run a query: either of
those are very likely to cause a crash. */
FLDict _cbl_nullable (* _cbl_nonnull prediction)(void* _cbl_nullable context, FLDict input);

/** Called if the model is unregistered, so it can release resources. */
void (*_cbl_nullable unregistered)(void* context);
} CBLPredictiveModel;

void CBL_RegisterPredictiveModel(FLString name, CBLPredictiveModel model) CBLAPI;

void CBL_UnregisterPredictiveModel(FLString name) CBLAPI;

CBL_CAPI_END

#endif
85 changes: 80 additions & 5 deletions include/cbl/CBLQuery.h
Original file line number Diff line number Diff line change
Expand Up @@ -250,16 +250,19 @@ CBLResultSet* _cbl_nullable CBLQuery_CopyCurrentResults(const CBLQuery* query,
You may find SQLite's documentation particularly helpful since Couchbase Lite's querying is
based on SQLite.
Two types of indexes are currently supported:
Three types of indexes are currently supported:
* Value indexes speed up queries by making it possible to look up property (or expression)
values without scanning every document. They're just like regular indexes in SQL or N1QL.
Multiple expressions are supported; the first is the primary key, second is secondary.
Expressions must evaluate to scalar types (boolean, number, string).
* Full-Text Search (FTS) indexes enable fast search of natural-language words or phrases
by using the `MATCH` operator in a query. A FTS index is **required** for full-text
search: a query with a `MATCH` operator will fail to compile unless there is already a
FTS index for the property/expression being matched. Only a single expression is
currently allowed, and it must evaluate to a string. */
by using the `MATCH()` function in a query. A FTS index is **required** for full-text
search: a query with a `MATCH()` function will fail to compile unless there is already a
FTS index for the property/expression being matched.
* (Enterprise Edition Only) Vector indexes allows efficient search of ML vectors by using
the `VECTOR_MATCH()` function in a query. The `CouchbaseLiteVectorSearch`
extension library is **required** to use the functionality. Use \ref CBL_SetExtensionPath
function to set the directoary path containing the extension library. */

/** Value Index Configuration. */
typedef struct {
Expand Down Expand Up @@ -310,6 +313,78 @@ typedef struct {
FLString language;
} CBLFullTextIndexConfiguration;

#ifdef COUCHBASE_ENTERPRISE

/** An opaque object representing vector encoding config to use in CBLVectorIndexConfiguration. */
typedef struct CBLVectorEncoding CBLVectorEncoding;

/** Creates a no-encoding config to use in CBLVectorIndexConfiguration; 4 bytes per dimension, no data loss. */
_cbl_warn_unused
CBLVectorEncoding* CBLVectorEncoding_CreateNone(void) CBLAPI;

/** Scalar Quantizer encoding type */
typedef CBL_ENUM(uint32_t, CBLScalarQuantizerType) {
kCBLSQ4 = 0, ///< 4 bits per dimension
kCBLSQ6, ///< 6 bits per dimension
kCBLSQ8 ///< 8 bits per dimension
};

/** Creates a Scalar Quantizer encoding config to use in CBLVectorIndexConfiguration. */
_cbl_warn_unused
CBLVectorEncoding* CBLVectorEncoding_CreateScalarQuantizer(CBLScalarQuantizerType type) CBLAPI;

/** Creates a Product Quantizer encoding config to use in CBLVectorIndexConfiguration. */
_cbl_warn_unused
CBLVectorEncoding* CBLVectorEncoding_CreateProductQuantizer(unsigned subquantizers, unsigned bits) CBLAPI;

/** Frees a CBLVectorEncoding object. The encoding object can be freed after the index is created. */
void CBLVectorEncoding_Free(CBLVectorEncoding* _cbl_nullable) CBLAPI;

/** Distance metric to use in CBLVectorIndexConfiguration. */
typedef CBL_ENUM(uint32_t, CBLDistanceMetric) {
kCBLDistanceMetricEuclidean = 0, ///< Euclidean distance
kCBLDistanceMetricCosine, ///< Cosine distance (1.0 - Cosine Similarity)
};

/** ENTERPRISE EDITION ONLY
Vector Index Configuration. */
typedef struct {
/** The language used in the expressions (Required). */
CBLQueryLanguage expressionLanguage;

/** An expression returning a vector which is an array of numbers. The expression could be specified
in a JSON Array or in N1QL syntax depending on the expressionLanguage. (Required) */
FLString expression;

/** The number of vector dimensions. (Required) */
unsigned dimensions;

/** The number of centroids which is the number buckets to partition the vectors in the index. (Required) */
unsigned centroids;

/** Vector encoding type. The default value is 8-bits Scalar Quantizer. */
CBLVectorEncoding* encoding;

/** Distance Metric type. The default value is euclidean distance. */
CBLDistanceMetric metric;

/** The minium number of vectors for training the index, an initial process for preparing an index based on the characteristics of the vectors to be indexed.
Prior training, the full table scan will be peformed when the vector_match() function is used in the query.
The default value is 25 times number of centroids.The number must be more than zero and not greater than maxTrainingSize.
An invalid argument error will be thrown when creating the index if an invalid value is used. */
unsigned minTrainingSize;

/** The max number of vectors used when trainning the index. The default
value is 256 times number of centroids. The number must be more than zero
and not less than minTrainingSize. An invalid argument will be thrown
when creating the index if an invalid value is used. */
unsigned maxTrainingSize;
} CBLVectorIndexConfiguration;

#endif

/** Creates a full-text index.
Indexes are persistent.
If an identical index with that name already exists, nothing happens (and no error is returned.)
Expand Down
15 changes: 15 additions & 0 deletions src/CBLCollection_CAPI.cc
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,21 @@ bool CBLCollection_CreateFullTextIndex(CBLCollection *collection,
} catchAndBridge(outError)
}

#ifdef COUCHBASE_ENTERPRISE

bool CBLCollection_CreateVectorIndex(CBLCollection *collection,
FLString name,
CBLVectorIndexConfiguration config,
CBLError *outError) noexcept
{
try {
collection->createVectorIndex(name, config);
return true;
} catchAndBridge(outError)
}

#endif

bool CBLCollection_DeleteIndex(CBLCollection *collection,
FLString name,
CBLError *outError) noexcept
Expand Down
Loading

0 comments on commit 2def36d

Please sign in to comment.