Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Annoy knn search #2354

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/ddl.l
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ EXPONENT [eE](\+|-)?{DIGIT}+
"HNSW_EF_CONSTRUCTION" { YYSTOREBOUNDS; return TOK_HNSW_EF_CONSTRUCTION; }
"HNSW_M" { YYSTOREBOUNDS; return TOK_HNSW_M; }
"HNSW_SIMILARITY" { YYSTOREBOUNDS; return TOK_HNSW_SIMILARITY; }
"ANNOY_METRIC" { YYSTOREBOUNDS; return TOK_ANNOY_METRIC; }
"ANNOY_N_TREES" { YYSTOREBOUNDS; return TOK_ANNOY_N_TREES; }
"IMPORT" { YYSTOREBOUNDS; return TOK_IMPORT; }
"INDEXED" { YYSTOREBOUNDS; return TOK_INDEXED; }
"INTEGER" { YYSTOREBOUNDS; return TOK_INTEGER; }
Expand Down Expand Up @@ -146,4 +148,4 @@ static void yy3lex_unhold ( yyscan_t yyscanner )

#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
#endif
18 changes: 18 additions & 0 deletions src/ddl.y
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@
%token TOK_HNSW_EF_CONSTRUCTION
%token TOK_HNSW_M
%token TOK_HNSW_SIMILARITY
%token TOK_ANNOY_METRIC
%token TOK_ANNOY_N_TREES
%token TOK_IF
%token TOK_IMPORT
%token TOK_INDEXED
Expand Down Expand Up @@ -336,6 +338,22 @@ item_option:
YYERROR;
}
}
| TOK_ANNOY_METRIC '=' TOK_QUOTED_STRING
{
if ( !pParser->AddItemOptionAnnoyMetric ( $3 ) )
{
yyerror ( pParser, pParser->GetLastError() );
YYERROR;
}
}
| TOK_ANNOY_N_TREES '=' TOK_QUOTED_STRING
{
if ( !pParser->AddItemOptionAnnoyNTrees ( $3 ) )
{
yyerror ( pParser, pParser->GetLastError() );
YYERROR;
}
}
;

item_option_list:
Expand Down
72 changes: 67 additions & 5 deletions src/knnmisc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,36 @@ static knn::HNSWSimilarity_e Str2HNSWSimilarity ( const CSphString & sSimilarity
return knn::HNSWSimilarity_e::L2;
}

static const char* AnnoyMetric2Str ( knn::AnnoyMetric_e eMetric )
{
switch ( eMetric )
{
case knn::AnnoyMetric_e::ANGULAR: return "ANGULAR";
case knn::AnnoyMetric_e::EUCLIDEAN: return "EUCLIDEAN";
case knn::AnnoyMetric_e::MANHATTAN: return "MANHATTAN";
case knn::AnnoyMetric_e::DOT: return "DOT";
default: return nullptr;
}
}

static knn::AnnoyMetric_e Str2AnnoyMetric ( const CSphString& sMetric )
{
CSphString sMet = sMetric;
sMet.ToUpper();

if ( sMet == "ANGULAR" )
return knn::AnnoyMetric_e::ANGULAR;
if ( sMet == "EUCLIDEAN" )
return knn::AnnoyMetric_e::EUCLIDEAN;
if ( sMet == "MANHATTAN" )
return knn::AnnoyMetric_e::MANHATTAN;
if ( sMet == "DOT" )
return knn::AnnoyMetric_e::DOT;

assert ( 0 && "Unknown similarity" );
return knn::AnnoyMetric_e::EUCLIDEAN;
}


void AddKNNSettings ( StringBuilder_c & sRes, const CSphColumnInfo & tAttr )
{
Expand All @@ -195,26 +225,33 @@ void AddKNNSettings ( StringBuilder_c & sRes, const CSphColumnInfo & tAttr )

const auto & tKNN = tAttr.m_tKNN;

sRes << " knn_type='hnsw'";
sRes << " knn_type='" << (tKNN.m_eKnnType == knn::KNNType_e::HNSW ? "hnsw" : "annoy") << "'";
sRes << " knn_dims='" << tKNN.m_iDims << "'";
sRes << " hnsw_similarity='" << HNSWSimilarity2Str ( tKNN.m_eHNSWSimilarity ) << "'";
sRes << " annoy_metric='" << AnnoyMetric2Str ( tKNN.m_eAnnoyMetric ) << "'";

knn::IndexSettings_t tDefault;
if ( tKNN.m_iHNSWM!=tDefault.m_iHNSWM )
sRes << " hnsw_m='" << tKNN.m_iHNSWM << "'";

if ( tKNN.m_iHNSWEFConstruction!=tDefault.m_iHNSWEFConstruction )
sRes << " hnsw_ef_construction='" << tKNN.m_iHNSWEFConstruction << "'";

if ( tKNN.m_iAnnoyNTrees != tDefault.m_iAnnoyNTrees )
sRes << " annoy_n_trees='" << tKNN.m_iAnnoyNTrees << "'";
}


knn::IndexSettings_t ReadKNNJson ( bson::Bson_c tRoot )
{
knn::IndexSettings_t tRes;
tRes.m_eKnnType = bson::String ( tRoot.ChildByName ( "knn_type" ) ) == "hnsw" ? knn::KNNType_e::HNSW : knn::KNNType_e::ANNOY;
tRes.m_iDims = (int) bson::Int ( tRoot.ChildByName ( "knn_dims" ) );
tRes.m_eHNSWSimilarity = Str2HNSWSimilarity ( bson::String ( tRoot.ChildByName ( "hnsw_similarity" ) ) );
tRes.m_iHNSWM = (int) bson::Int ( tRoot.ChildByName ( "hnsw_m" ), tRes.m_iHNSWM );
tRes.m_iHNSWEFConstruction = (int) bson::Int ( tRoot.ChildByName ( "hnsw_ef_construction" ), tRes.m_iHNSWEFConstruction );
tRes.m_iAnnoyNTrees = (int) bson::Int ( tRoot.ChildByName ( "annoy_n_trees" ), tRes.m_iAnnoyNTrees );
tRes.m_eAnnoyMetric = Str2AnnoyMetric ( bson::String ( tRoot.ChildByName ( "annoy_metric" ) ) );

return tRes;
}
Expand All @@ -226,11 +263,13 @@ void operator << ( JsonEscapedBuilder & tOut, const knn::IndexSettings_t & tSett

knn::IndexSettings_t tDefault;

tOut.NamedString ( "knn_type", "hnsw" );
tOut.NamedString ( "knn_type", tSettings.m_eKnnType == knn::KNNType_e::HNSW ? "hnsw" : "annoy" );
tOut.NamedVal ( "knn_dims", tSettings.m_iDims );
tOut.NamedString ( "hnsw_similarity", HNSWSimilarity2Str ( tSettings.m_eHNSWSimilarity ) );
tOut.NamedValNonDefault ( "hnsw_m", tSettings.m_iHNSWM, tDefault.m_iHNSWM );
tOut.NamedValNonDefault ( "hnsw_ef_construction", tSettings.m_iHNSWEFConstruction, tDefault.m_iHNSWEFConstruction );
tOut.NamedValNonDefault ( "annoy_n_trees", tSettings.m_iAnnoyNTrees, tDefault.m_iAnnoyNTrees );
tOut.NamedString ( "annoy_metric", AnnoyMetric2Str ( tSettings.m_eAnnoyMetric ) );
}


Expand All @@ -243,11 +282,14 @@ CSphString FormatKNNConfigStr ( const CSphVector<NamedKNNSettings_t> & dAttrs )
{
JsonObj_c tObj;
tObj.AddStr ( "name", i.m_sName );
tObj.AddStr ( "type", "hnsw" );
tObj.AddStr ( "type", i.m_eKnnType == knn::KNNType_e::HNSW ? "hnsw" : "annoy" );
tObj.AddInt ( "dims", i.m_iDims );
tObj.AddStr ( "hnsw_similarity", HNSWSimilarity2Str ( i.m_eHNSWSimilarity ) );
tObj.AddInt ( "hnsw_m", i.m_iHNSWM );
tObj.AddInt ( "hnsw_ef_construction", i.m_iHNSWEFConstruction );
tObj.AddInt ( "annoy_n_trees", i.m_iAnnoyNTrees );
tObj.AddStr ( "annoy_metric", AnnoyMetric2Str ( i.m_eAnnoyMetric ) );

tArray.AddItem(tObj);
}

Expand Down Expand Up @@ -280,7 +322,15 @@ bool ParseKNNConfigStr ( const CSphString & sStr, CSphVector<NamedKNNSettings_t>
return false;

sType.ToUpper();
if ( sType!="HNSW" )
if ( sType == "HNSW" )
{
tParsed.m_eKnnType = knn::KNNType_e::HNSW;
}
else if ( sType == "ANNOY" )
{
tParsed.m_eKnnType = knn::KNNType_e::ANNOY;
}
else
{
sError.SetSprintf ( "Unknown knn type '%s'", sType.cstr() );
return false;
Expand All @@ -290,16 +340,28 @@ bool ParseKNNConfigStr ( const CSphString & sStr, CSphVector<NamedKNNSettings_t>
if ( !i.FetchIntItem ( tParsed.m_iDims, "dims", sError ) ) return false;
if ( !i.FetchIntItem ( tParsed.m_iHNSWM, "hnsw_m", sError, true ) ) return false;
if ( !i.FetchIntItem ( tParsed.m_iHNSWEFConstruction, "hnsw_ef_construction", sError, true ) ) return false;
if ( !i.FetchIntItem ( tParsed.m_iAnnoyNTrees, "annoy_n_trees", sError, true ) ) return false;
if ( !i.FetchStrItem ( sSimilarity, "hnsw_similarity", sError) ) return false;

sSimilarity.ToUpper();
if ( sSimilarity!="L2" && sSimilarity!="IP" && sSimilarity!="COSINE" )
{
sError.SetSprintf ( "Unknown knn similarity '%s'", sSimilarity.cstr() );
sError.SetSprintf ( "Unknown hnsw similarity '%s'", sSimilarity.cstr() );
return false;
}

tParsed.m_eHNSWSimilarity = Str2HNSWSimilarity ( sSimilarity.cstr() );

if ( !i.FetchStrItem ( sSimilarity, "annoy_metric", sError ) ) return false;

sSimilarity.ToUpper();
if ( sSimilarity != "ANGULAR" && sSimilarity != "EUCLIDEAN" && sSimilarity != "MANHATTAN" && sSimilarity != "HAMMING" && sSimilarity != "DOT" )
{
sError.SetSprintf ( "Unknown annoy metric '%s'", sSimilarity.cstr() );
return false;
}

tParsed.m_eAnnoyMetric = Str2AnnoyMetric ( sSimilarity.cstr() );
}

return true;
Expand Down
53 changes: 50 additions & 3 deletions src/searchdddl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

#include "searchdddl.h"

#include <optional>

class DdlParser_c : public SqlParserTraits_c
{
public:
Expand All @@ -37,10 +39,13 @@ class DdlParser_c : public SqlParserTraits_c
int m_iKNNDims = 0;
int m_iHNSWM = 16;
int m_iHNSWEFConstruction = 200;
int m_iAnnoyNTrees = 10;
knn::HNSWSimilarity_e m_eHNSWSimilarity = knn::HNSWSimilarity_e::L2;
bool m_bKNNDimsSpecified = false;
bool m_bHNSWSimilaritySpecified = false;

std::optional<knn::AnnoyMetric_e> m_eAnnoyMetric;

void Reset() { *this = ItemOptions_t(); }
DWORD ToFlags() const;
knn::IndexSettings_t ToKNN() const;
Expand All @@ -65,6 +70,8 @@ class DdlParser_c : public SqlParserTraits_c
bool AddItemOptionHNSWSimilarity ( const SqlNode_t & tOption );
bool AddItemOptionHNSWM ( const SqlNode_t & tOption );
bool AddItemOptionHNSWEfConstruction ( const SqlNode_t & tOption );
bool AddItemOptionAnnoyMetric ( const SqlNode_t& tOption );
bool AddItemOptionAnnoyNTrees ( const SqlNode_t& tOption );

void AddCreateTableOption ( const SqlNode_t & tName, const SqlNode_t & tValue );
bool SetupAlterTable ( const SqlNode_t & tIndex, const SqlNode_t & tAttr, const SqlNode_t & tType, bool bModify = false );
Expand Down Expand Up @@ -141,6 +148,13 @@ knn::IndexSettings_t DdlParser_c::ItemOptions_t::ToKNN() const
tKNN.m_eHNSWSimilarity = m_eHNSWSimilarity;
tKNN.m_iHNSWM = m_iHNSWM;
tKNN.m_iHNSWEFConstruction = m_iHNSWEFConstruction;
tKNN.m_iAnnoyNTrees = m_iAnnoyNTrees;
tKNN.m_eKnnType = m_sKNNType == "HNSW" ? knn::KNNType_e::HNSW : knn::KNNType_e::ANNOY;
if ( m_eAnnoyMetric.has_value() )
{
tKNN.m_eAnnoyMetric = m_eAnnoyMetric.value();
}


return tKNN;
}
Expand Down Expand Up @@ -206,7 +220,7 @@ bool DdlParser_c::CheckFieldFlags ( ESphAttr eAttrType, int iFlags, const CSphSt
{
if ( eAttrType!=SPH_ATTR_FLOAT_VECTOR && !tOpts.m_sKNNType.IsEmpty() )
{
sError = "knn_type='hnsw' can only be used with float_vector attributes";
sError = "knn_type='hnsw' or knn_type='annoy' can only be used with float_vector attributes";
return false;
}

Expand All @@ -220,11 +234,17 @@ bool DdlParser_c::CheckFieldFlags ( ESphAttr eAttrType, int iFlags, const CSphSt
}
else if ( eAttrType==SPH_ATTR_FLOAT_VECTOR )
{
if ( !tOpts.m_sKNNType.IsEmpty() && ( !tOpts.m_bKNNDimsSpecified || !tOpts.m_bHNSWSimilaritySpecified ) )
if ( tOpts.m_sKNNType == "HNSW" && ( !tOpts.m_bKNNDimsSpecified || !tOpts.m_bHNSWSimilaritySpecified ) )
{
sError = "knn_dims and hnsw_similarity are required if knn_type='hnsw'";
return false;
}

if ( tOpts.m_sKNNType == "ANNOY" && ( !tOpts.m_bKNNDimsSpecified || !tOpts.m_eAnnoyMetric.has_value() ) )
{
sError = "knn_dims and annoy_metric are required if knn_type='annoy'";
return false;
}
}
else
{
Expand Down Expand Up @@ -400,7 +420,7 @@ bool DdlParser_c::AddItemOptionIndexed ( const SqlNode_t & tOption )
bool DdlParser_c::AddItemOptionKNNType ( const SqlNode_t & tOption )
{
m_tItemOptions.m_sKNNType = ToStringUnescape(tOption).ToUpper();
if ( m_tItemOptions.m_sKNNType!="HNSW" )
if ( m_tItemOptions.m_sKNNType != "HNSW" && m_tItemOptions.m_sKNNType != "ANNOY" )
{
m_sError.SetSprintf ( "Unknown KNN type '%s'", m_tItemOptions.m_sKNNType.cstr() );
return false;
Expand Down Expand Up @@ -454,6 +474,33 @@ bool DdlParser_c::AddItemOptionHNSWEfConstruction ( const SqlNode_t & tOption )
return true;
}

bool DdlParser_c::AddItemOptionAnnoyMetric ( const SqlNode_t& tOption )
{
CSphString sValue = ToStringUnescape ( tOption ).ToUpper();
if ( sValue == "ANGULAR" )
m_tItemOptions.m_eAnnoyMetric = knn::AnnoyMetric_e::ANGULAR;
else if ( sValue == "EUCLIDEAN" )
m_tItemOptions.m_eAnnoyMetric = knn::AnnoyMetric_e::EUCLIDEAN;
else if ( sValue == "MANHATTAN" )
m_tItemOptions.m_eAnnoyMetric = knn::AnnoyMetric_e::MANHATTAN;
else if ( sValue == "DOT" )
m_tItemOptions.m_eAnnoyMetric = knn::AnnoyMetric_e::DOT;
else
{
m_sError.SetSprintf ( "Unknown Annoy metric '%s'", sValue.cstr() );
return false;
}

return true;
}

bool DdlParser_c::AddItemOptionAnnoyNTrees ( const SqlNode_t& tOption )
{
CSphString sValue = ToStringUnescape ( tOption );
m_tItemOptions.m_iAnnoyNTrees = strtoull ( sValue.cstr(), NULL, 10 );
return true;
}


bool DdlParser_c::ConvertToAttrEngine ( const SqlNode_t & tEngine, AttrEngine_e & eEngine )
{
Expand Down
Loading