diff --git a/cli/config.go b/cli/config.go index f61aec83..14b9b88e 100644 --- a/cli/config.go +++ b/cli/config.go @@ -103,6 +103,9 @@ type Config struct { // Client Client client.Config `mapstructure:"client"` + + // Column search excluded keyword list + ColSearchExclusionKeywords string `yaml:"col_search_excluded_keywords" mapstructure:"col_search_excluded_keywords"` } func LoadConfig() (*Config, error) { diff --git a/cli/server.go b/cli/server.go index c17b5b14..b7c0a650 100644 --- a/cli/server.go +++ b/cli/server.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "os" + "strings" "github.com/MakeNowJust/heredoc" "github.com/goto/compass/core/asset" @@ -131,7 +132,7 @@ func runServer(ctx context.Context, cfg *Config) error { if err != nil { return fmt.Errorf("create new asset repository: %w", err) } - discoveryRepository := esStore.NewDiscoveryRepository(esClient, logger) + discoveryRepository := esStore.NewDiscoveryRepository(esClient, logger, strings.Split(cfg.ColSearchExclusionKeywords, ",")) lineageRepository, err := postgres.NewLineageRepository(pgClient) if err != nil { return fmt.Errorf("create new lineage repository: %w", err) diff --git a/cli/worker.go b/cli/worker.go index 6a5bef15..27161b19 100644 --- a/cli/worker.go +++ b/cli/worker.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "strings" "github.com/MakeNowJust/heredoc" "github.com/goto/compass/internal/store/elasticsearch" @@ -68,7 +69,7 @@ func runWorker(ctx context.Context, cfg *Config) error { mgr, err := workermanager.New(ctx, workermanager.Deps{ Config: cfg.Worker, - DiscoveryRepo: elasticsearch.NewDiscoveryRepository(esClient, logger), + DiscoveryRepo: elasticsearch.NewDiscoveryRepository(esClient, logger, strings.Split(cfg.ColSearchExclusionKeywords, ",")), Logger: logger, }) if err != nil { diff --git a/core/asset/discovery.go b/core/asset/discovery.go index ba2fd2c0..e60abf13 100644 --- a/core/asset/discovery.go +++ b/core/asset/discovery.go @@ -43,6 +43,8 @@ type SearchFlags struct { // DisableFuzzy disables fuzziness on search DisableFuzzy bool + + IsColumnSearch bool } // SearchConfig represents a search query along diff --git a/internal/server/v1beta1/search.go b/internal/server/v1beta1/search.go index 35b672b8..4717b502 100644 --- a/internal/server/v1beta1/search.go +++ b/internal/server/v1beta1/search.go @@ -140,5 +140,6 @@ func getSearchFlagsFromFlags(inputFlags *compassv1beta1.SearchFlags) asset.Searc return asset.SearchFlags{ EnableHighlight: inputFlags.GetEnableHighlight(), DisableFuzzy: inputFlags.GetDisableFuzzy(), + IsColumnSearch: inputFlags.GetIsColumnSearch(), } } diff --git a/internal/store/elasticsearch/discovery_repository.go b/internal/store/elasticsearch/discovery_repository.go index 2c66dfc4..1ab274ba 100644 --- a/internal/store/elasticsearch/discovery_repository.go +++ b/internal/store/elasticsearch/discovery_repository.go @@ -18,14 +18,16 @@ import ( // DiscoveryRepository implements discovery.Repository // with elasticsearch as the backing store. type DiscoveryRepository struct { - cli *Client - logger log.Logger + cli *Client + logger log.Logger + ColumnSearchExclusionList []string } -func NewDiscoveryRepository(cli *Client, logger log.Logger) *DiscoveryRepository { +func NewDiscoveryRepository(cli *Client, logger log.Logger, colSearchExclusionList []string) *DiscoveryRepository { return &DiscoveryRepository{ - cli: cli, - logger: logger, + cli: cli, + logger: logger, + ColumnSearchExclusionList: colSearchExclusionList, } } diff --git a/internal/store/elasticsearch/discovery_repository_test.go b/internal/store/elasticsearch/discovery_repository_test.go index 43fd81e8..e2680872 100644 --- a/internal/store/elasticsearch/discovery_repository_test.go +++ b/internal/store/elasticsearch/discovery_repository_test.go @@ -31,7 +31,7 @@ func TestDiscoveryRepositoryUpsert(t *testing.T) { ) require.NoError(t, err) - repo := store.NewDiscoveryRepository(esClient, log.NewNoop()) + repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), []string{"number", "id"}) err = repo.Upsert(ctx, asset.Asset{ ID: "", Type: asset.TypeTable, @@ -50,7 +50,7 @@ func TestDiscoveryRepositoryUpsert(t *testing.T) { ) require.NoError(t, err) - repo := store.NewDiscoveryRepository(esClient, log.NewNoop()) + repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), []string{"number", "id"}) err = repo.Upsert(ctx, asset.Asset{ ID: "sample-id", Type: asset.Type("unknown-type"), @@ -69,7 +69,7 @@ func TestDiscoveryRepositoryUpsert(t *testing.T) { ) require.NoError(t, err) - repo := store.NewDiscoveryRepository(esClient, log.NewNoop()) + repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), []string{"number", "id"}) // upsert with create_time as a object err = repo.Upsert(ctx, asset.Asset{ @@ -129,7 +129,7 @@ func TestDiscoveryRepositoryUpsert(t *testing.T) { ) require.NoError(t, err) - repo := store.NewDiscoveryRepository(esClient, log.NewNoop()) + repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), []string{"number", "id"}) err = repo.Upsert(ctx, ast) assert.NoError(t, err) @@ -178,7 +178,7 @@ func TestDiscoveryRepositoryUpsert(t *testing.T) { ) require.NoError(t, err) - repo := store.NewDiscoveryRepository(esClient, log.NewNoop()) + repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), []string{"number", "id"}) err = repo.Upsert(ctx, existingAsset) assert.NoError(t, err) @@ -219,7 +219,7 @@ func TestDiscoveryRepositoryDeleteByID(t *testing.T) { ) require.NoError(t, err) - repo := store.NewDiscoveryRepository(esClient, log.NewNoop()) + repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), []string{"number", "id"}) err = repo.DeleteByID(ctx, "") assert.ErrorIs(t, err, asset.ErrEmptyID) }) @@ -241,7 +241,7 @@ func TestDiscoveryRepositoryDeleteByID(t *testing.T) { ) require.NoError(t, err) - repo := store.NewDiscoveryRepository(esClient, log.NewNoop()) + repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), []string{"number", "id"}) err = repo.Upsert(ctx, ast) require.NoError(t, err) @@ -288,7 +288,7 @@ func TestDiscoveryRepositoryDeleteByID(t *testing.T) { ) require.NoError(t, err) - repo := store.NewDiscoveryRepository(esClient, log.NewNoop()) + repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), []string{"number", "id"}) err = repo.Upsert(ctx, ast1) require.NoError(t, err) @@ -319,7 +319,7 @@ func TestDiscoveryRepositoryDeleteByURN(t *testing.T) { ) require.NoError(t, err) - repo := store.NewDiscoveryRepository(esClient, log.NewNoop()) + repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), []string{"number", "id"}) t.Run("should return error if the given urn is empty", func(t *testing.T) { err = repo.DeleteByURN(ctx, "") @@ -378,7 +378,7 @@ func TestDiscoveryRepositoryDeleteByURN(t *testing.T) { ) require.NoError(t, err) - repo := store.NewDiscoveryRepository(esClient, log.NewNoop()) + repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), []string{"number", "id"}) err = repo.Upsert(ctx, ast1) require.NoError(t, err) diff --git a/internal/store/elasticsearch/discovery_search_repository.go b/internal/store/elasticsearch/discovery_search_repository.go index 28fc6649..ec027bbd 100644 --- a/internal/store/elasticsearch/discovery_search_repository.go +++ b/internal/store/elasticsearch/discovery_search_repository.go @@ -53,7 +53,7 @@ func (repo *DiscoveryRepository) Search(ctx context.Context, cfg asset.SearchCon }) }(time.Now()) - query, err := buildQuery(cfg) + query, err := repo.buildQuery(cfg) if err != nil { return nil, asset.DiscoveryError{Op: "Search", Err: fmt.Errorf("build query: %w", err)} } @@ -195,17 +195,109 @@ func (repo *DiscoveryRepository) Suggest(ctx context.Context, config asset.Searc return results, nil } -func buildQuery(cfg asset.SearchConfig) (io.Reader, error) { +func (repo *DiscoveryRepository) buildColumnQuery(query *elastic.BoolQuery, cfg asset.SearchConfig, field string) *elastic.Highlight { + matchString := cfg.Text + for _, exclusionStr := range repo.ColumnSearchExclusionList { + exclusionStr = strings.TrimSpace(exclusionStr) + if strings.Contains(matchString, exclusionStr) { + matchString = strings.ReplaceAll(matchString, fmt.Sprintf("_%s", exclusionStr), "") + matchString = strings.ReplaceAll(matchString, fmt.Sprintf(" %s", exclusionStr), "") + matchString = strings.ReplaceAll(matchString, fmt.Sprintf("-%s", exclusionStr), "") + } + } + + if matchString == "" { + matchString = cfg.Text + } + + queries := make([]elastic.Query, 0) + termQuery := elastic.NewTermQuery( + fmt.Sprintf("%s.keyword", field), + cfg.Text, + ).Boost(20) + + descriptionTermQuery := elastic.NewTermQuery( + fmt.Sprintf("%s.keyword", "data.columns.description"), + cfg.Text, + ) + phraseQuery := elastic.NewMultiMatchQuery( + cfg.Text, + []string{ + "data.columns.name^10", + "data.columns.description", + }..., + ).Type("phrase") + + matchQuery := elastic.NewMultiMatchQuery( + matchString, + []string{ + "data.columns.name^5", + "data.columns.description", + }..., + ) + + andMatchQuery := elastic.NewMultiMatchQuery( + matchString, + []string{ + "data.columns.name^5", + "data.columns.description", + }..., + ).Operator("and") + + multiMatchQueries := []*elastic.MultiMatchQuery{phraseQuery, matchQuery, andMatchQuery} + queries = append(queries, termQuery, descriptionTermQuery) + query.Should(queries...) + highlightQuery := make([]elastic.Query, 0) + highlightQuery = append(highlightQuery, queries...) + for _, q := range multiMatchQueries { + if !cfg.Flags.DisableFuzzy { + updatedQuery := q.Fuzziness("AUTO") + highlightQuery = append(highlightQuery, updatedQuery) + } + query.Should(q) + } + + if cfg.Flags.EnableHighlight { + return elastic.NewHighlight(). + Order("score"). + Field("data.columns.name"). + Field("data.columns.description"). + HighlightQuery( + elastic.NewBoolQuery(). + Should(highlightQuery...), + ) + } + + return nil +} + +func (repo *DiscoveryRepository) buildQuery(cfg asset.SearchConfig) (io.Reader, error) { boolQuery := elastic.NewBoolQuery() - buildTextQuery(boolQuery, cfg) + var highlightQuery *elastic.Highlight + field := "" + + // if the search text is empty, do a match all query and return results + if strings.TrimSpace(cfg.Text) == "" { + boolQuery.Should(elastic.NewMatchAllQuery()) + highlightQuery = buildHighlightQuery(cfg) + } else { + if cfg.Flags.IsColumnSearch { + field = "data.columns.name" + highlightQuery = repo.buildColumnQuery(boolQuery, cfg, field) + } else { + field = "name" + buildTextQuery(boolQuery, cfg) + highlightQuery = buildHighlightQuery(cfg) + } + } + buildFilterTermQueries(boolQuery, cfg.Filters) buildMustMatchQueries(boolQuery, cfg) - query := buildFunctionScoreQuery(boolQuery, cfg.RankBy, cfg.Text) - highlight := buildHighlightQuery(cfg) + query := buildFunctionScoreQuery(boolQuery, cfg.RankBy, cfg.Text, field) body, err := elastic.NewSearchRequest(). Query(query). - Highlight(highlight). + Highlight(highlightQuery). MinScore(defaultMinScore). Body() if err != nil { @@ -238,10 +330,6 @@ func buildSuggestQuery(cfg asset.SearchConfig) (io.Reader, error) { } func buildTextQuery(q *elastic.BoolQuery, cfg asset.SearchConfig) { - if strings.TrimSpace(cfg.Text) == "" { - q.Should(elastic.NewMatchAllQuery()) - } - boostedFields := []string{"urn^10", "name^5"} q.Should( // Phrase query cannot have `FUZZINESS` @@ -312,12 +400,12 @@ func buildFilterExistsQueries(q *elastic.BoolQuery, fields []string) { } } -func buildFunctionScoreQuery(query elastic.Query, rankBy, text string) elastic.Query { +func buildFunctionScoreQuery(query elastic.Query, rankBy, text, field string) elastic.Query { // Added exact match term query here so that exact match gets higher priority. fsQuery := elastic.NewFunctionScoreQuery() if text != "" { fsQuery.Add( - elastic.NewTermQuery("name.keyword", text), + elastic.NewTermQuery(fmt.Sprintf("%s.keyword", field), text), elastic.NewWeightFactorFunction(2), ) } diff --git a/internal/store/elasticsearch/discovery_search_repository_test.go b/internal/store/elasticsearch/discovery_search_repository_test.go index fd05b7cf..fec5926e 100644 --- a/internal/store/elasticsearch/discovery_search_repository_test.go +++ b/internal/store/elasticsearch/discovery_search_repository_test.go @@ -35,7 +35,7 @@ func TestSearcherSearch(t *testing.T) { err = loadTestFixture(cli, esClient, "./testdata/search-test-fixture.json") require.NoError(t, err) - repo := store.NewDiscoveryRepository(esClient, log.NewNoop()) + repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), []string{"number", "id"}) type expectedRow struct { Type string @@ -354,7 +354,320 @@ func TestSearcherSearch(t *testing.T) { }, }, } - for _, test := range tests { + + columnTests := []searchTest{ + { + Description: "should fetch assets with fields mentioned in included fields for column search", + Config: asset.SearchConfig{ + Text: "username", + IncludeFields: []string{"id", "data.company", "type"}, + Flags: asset.SearchFlags{ + IsColumnSearch: true, + }, + }, + Expected: []expectedRow{ + {Type: "table", AssetID: "au2-microsoft-invoice", Data: map[string]interface{}{"company": "microsoft"}}, + }, + }, + { + Description: "should fetch assets with default fields if included fields is empty", + Config: asset.SearchConfig{ + Text: "username", + Flags: asset.SearchFlags{ + IsColumnSearch: true, + }, + }, + Expected: []expectedRow{ + {Type: "table", AssetID: "au2-microsoft-invoice", Service: "postgres", Data: map[string]interface{}{ + "company": "microsoft", "country": "us", "description": "Transaction records for every microsoft purchase", "environment": "integration"}}, + }, + }, + { + Description: "should fetch assets with empty text", + Config: asset.SearchConfig{ + Text: "", + IncludeFields: []string{"id", "type"}, + Filters: map[string][]string{"service": {"bigquery"}}, + Flags: asset.SearchFlags{ + IsColumnSearch: true, + }, + }, + Expected: []expectedRow{ + {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-1"}, + {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-common"}, + {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-abc-common-test"}, + {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-mid"}, + {Type: "table", AssetID: "bigquery::gcpproject/dataset/abc-tablename-mid"}, + {Type: "table", AssetID: "bigquery::gcpproject/dataset/test"}, + }, + }, + { + Description: "should fetch assets with empty text and rank by", + Config: asset.SearchConfig{ + Text: "", + RankBy: "data.profile.usage_count", + IncludeFields: []string{"id", "type"}, + Filters: map[string][]string{"service": {"bigquery"}}, + Flags: asset.SearchFlags{ + IsColumnSearch: true, + }, + }, + Expected: []expectedRow{ + {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-common"}, + {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-mid"}, + {Type: "table", AssetID: "bigquery::gcpproject/dataset/test"}, + {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-1"}, + {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-abc-common-test"}, + {Type: "table", AssetID: "bigquery::gcpproject/dataset/abc-tablename-mid"}, + }, + }, + { + Description: "should fetch assets which has text in either column name or description", + Config: asset.SearchConfig{ + Text: "records", + IncludeFields: []string{"type", "id"}, + Flags: asset.SearchFlags{ + IsColumnSearch: true, + }, + }, + Expected: []expectedRow{ + {Type: "table", AssetID: "au2-microsoft-invoice"}, + {Type: "table", AssetID: "us1-apple-invoice"}, + }, + }, + { + Description: "should enable fuzzy search", + Config: asset.SearchConfig{ + Text: "sernm", + IncludeFields: []string{"type", "id"}, + Flags: asset.SearchFlags{IsColumnSearch: true}, + }, + Expected: []expectedRow{ + {Type: "table", AssetID: "au2-microsoft-invoice"}, + }, + }, + { + Description: "should disable fuzzy search", + Config: asset.SearchConfig{ + Text: "sernm", + Flags: asset.SearchFlags{DisableFuzzy: true, IsColumnSearch: true}, + IncludeFields: []string{"type", "id"}, + }, + Expected: []expectedRow{}, + }, + /* + { + Description: "should put more weight on column name field", + Config: asset.SearchConfig{ + Text: "invoice", + IncludeFields: []string{"type", "id"}, + }, + Expected: []expectedRow{ + {Type: "table", AssetID: "us1-apple-invoice"}, + {Type: "table", AssetID: "au2-microsoft-invoice"}, + {Type: "topic", AssetID: "transaction"}, + }, + }, + { + Description: "should filter by service if given", + Config: asset.SearchConfig{ + Text: "invoice", + Filters: map[string][]string{ + "service": {"rabbitmq", "postgres"}, + }, + IncludeFields: []string{"type", "id"}, + }, + Expected: []expectedRow{ + {Type: "table", AssetID: "au2-microsoft-invoice"}, + {Type: "topic", AssetID: "transaction"}, + }, + }, + { + Description: "should match documents based on filter criteria", + Config: asset.SearchConfig{ + Text: "topic", + Filters: map[string][]string{ + "data.company": {"gotocompany"}, + }, + IncludeFields: []string{"type", "id"}, + }, + Expected: []expectedRow{ + {Type: "topic", AssetID: "consumer-topic"}, + {Type: "topic", AssetID: "order-topic"}, + {Type: "topic", AssetID: "consumer-mq-2"}, + {Type: "topic", AssetID: "transaction"}, + }, + }, + { + Description: "should not return assets without fields specified in filters", + Config: asset.SearchConfig{ + Text: "invoice topic", + Filters: map[string][]string{ + "data.country": {"id"}, + "data.environment": {"production"}, + "data.company": {"gotocompany"}, + }, + IncludeFields: []string{"type", "id"}, + }, + Expected: []expectedRow{ + {Type: "topic", AssetID: "consumer-topic"}, + {Type: "topic", AssetID: "consumer-mq-2"}, + }, + }, + { + Description: "should return 'consumer-topic' if filter owner email with 'john.doe@email.com'", + Config: asset.SearchConfig{ + Text: "topic", + Filters: map[string][]string{ + "owners.email": {"john.doe@email.com"}, + }, + IncludeFields: []string{"type", "id"}, + }, + Expected: []expectedRow{ + {Type: "topic", AssetID: "consumer-topic"}, + }, + }, + { + Description: "should return a descendingly sorted based on usage count in search results if rank by usage in the config", + Config: asset.SearchConfig{ + Text: "bigquery", + RankBy: "data.profile.usage_count", + IncludeFields: []string{"type", "id"}, + }, + Expected: []expectedRow{ + {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-common"}, + {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-mid"}, + {Type: "table", AssetID: "bigquery::gcpproject/dataset/test"}, + {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-1"}, + {Type: "table", AssetID: "bigquery::gcpproject/dataset/abc-tablename-mid"}, + {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-abc-common-test"}, + }, + }, + { + Description: "should return consumer-topic if search by query description field with text 'rabbitmq' and owners name 'johndoe'", + Config: asset.SearchConfig{ + Text: "consumer", + Queries: map[string]string{ + "description": "rabbitmq", + "owners.email": "john.doe", + }, + IncludeFields: []string{"type", "id"}, + }, + Expected: []expectedRow{ + {Type: "topic", AssetID: "consumer-topic"}, + }, + }, + { + Description: "should return 5 records with offset of 0", + Config: asset.SearchConfig{ + Text: "topic", + Offset: 0, + MaxResults: 5, + IncludeFields: []string{"type", "id"}, + }, + Expected: []expectedRow{ + {Type: "topic", AssetID: "consumer-topic"}, + {Type: "topic", AssetID: "order-topic"}, + {Type: "topic", AssetID: "purchase-topic"}, + {Type: "topic", AssetID: "consumer-mq-2"}, + {Type: "topic", AssetID: "transaction"}, + }, + }, + { + Description: "should return 4 records with offset of 1", + Config: asset.SearchConfig{ + Text: "topic", + Offset: 1, + MaxResults: 5, + IncludeFields: []string{"type", "id"}, + }, + Expected: []expectedRow{ + // {Type: "topic", AssetID: "consumer-topic"}, + {Type: "topic", AssetID: "order-topic"}, + {Type: "topic", AssetID: "purchase-topic"}, + {Type: "topic", AssetID: "consumer-mq-2"}, + {Type: "topic", AssetID: "transaction"}, + }, + }, + { + Description: "should return 'bigquery::gcpproject/dataset/tablename-common' resource on top if search by query table column name field with text 'tablename-common-column1'", + Config: asset.SearchConfig{ + Text: "tablename", + Queries: map[string]string{ + "data.schema.columns.name": "common", + }, + IncludeFields: []string{"type", "id"}, + }, + Expected: []expectedRow{ + {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-common"}, + {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-abc-common-test"}, + }, + }, + { + Description: "should return 'bigquery::gcpproject/dataset/tablename-abc-common-test' resource on top if searched for text 'tablename-abc-common-test'", + Config: asset.SearchConfig{ + Text: "tablename-abc-common-test", + RankBy: "data.profile.usage_count", + IncludeFields: []string{"type", "id"}, + }, + Expected: []expectedRow{ + {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-abc-common-test"}, + {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-common"}, + {Type: "table", AssetID: "bigquery::gcpproject/dataset/test"}, + {Type: "table", AssetID: "bigquery::gcpproject/dataset/abc-tablename-mid"}, + {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-mid"}, + {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-1"}, + }, + }, + { + Description: "should return highlighted text in resource if searched highlight text is enabled.", + Config: asset.SearchConfig{ + Text: "order", + RankBy: "data.profile.usage_count", + Flags: asset.SearchFlags{ + EnableHighlight: true, + }, + IncludeFields: []string{"type", "id"}, + }, + + Expected: []expectedRow{ + { + Type: "topic", + AssetID: "order-topic", + Data: map[string]interface{}{ + "_highlight": map[string]interface{}{ + "urn": []interface{}{"order-topic"}, + "data.topic_name": []interface{}{"order-topic"}, + "name": []interface{}{"order-topic"}, + "description": []interface{}{"Topic for each submitted order"}, + "id": []interface{}{"order-topic"}, + "data.description": []interface{}{"Topic for each submitted order"}, + }, + }, + }, + }, + }, + { + Description: "should return 'bigquery::gcpproject/dataset/tablename-abc-common-test' resource on top if " + + "searched for text 'abc-test' as it has both the keywords searched", + Config: asset.SearchConfig{ + Text: "abc-test", + RankBy: "data.profile.usage_count", + IncludeFields: []string{"type", "id"}, + }, + Expected: []expectedRow{ + {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-abc-common-test"}, + {Type: "table", AssetID: "bigquery::gcpproject/dataset/test"}, + {Type: "table", AssetID: "bigquery::gcpproject/dataset/abc-tablename-mid"}, + }, + }, + */ + } + + allTests := make([]searchTest, 0) + allTests = append(tests, columnTests...) + + for _, test := range allTests { t.Run(test.Description, func(t *testing.T) { results, err := repo.Search(ctx, test.Config) require.NoError(t, err) @@ -387,7 +700,7 @@ func TestSearcherSuggest(t *testing.T) { err = loadTestFixture(cli, esClient, "./testdata/suggest-test-fixture.json") require.NoError(t, err) - repo := store.NewDiscoveryRepository(esClient, log.NewNoop()) + repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), []string{"number", "id"}) t.Run("fixtures", func(t *testing.T) { testCases := []struct { @@ -424,7 +737,7 @@ func loadTestFixture(cli *elasticsearch.Client, esClient *store.Client, filePath ctx := context.TODO() for _, testdata := range data { - repo := store.NewDiscoveryRepository(esClient, log.NewNoop()) + repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), []string{"number", "id"}) for _, ast := range testdata.Assets { if err := repo.Upsert(ctx, ast); err != nil { return err @@ -452,7 +765,7 @@ func TestGroupAssets(t *testing.T) { ) require.NoError(t, err) - repo := store.NewDiscoveryRepository(esClient, log.NewNoop()) + repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), []string{"number", "id"}) _, err = repo.GroupAssets(ctx, asset.GroupConfig{ GroupBy: []string{""}, }) @@ -473,7 +786,7 @@ func TestGroupAssets(t *testing.T) { err = loadTestFixture(cli, esClient, "./testdata/search-test-fixture.json") require.NoError(t, err) - repo := store.NewDiscoveryRepository(esClient, log.NewNoop()) + repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), []string{"number", "id"}) type groupTest struct { Description string diff --git a/internal/store/elasticsearch/schema.go b/internal/store/elasticsearch/schema.go index 18b7d398..ccb3ea7f 100644 --- a/internal/store/elasticsearch/schema.go +++ b/internal/store/elasticsearch/schema.go @@ -9,6 +9,12 @@ var indexSettingsTemplate = `{ %q: {} }, "settings": { + "similarity": { + "my_bm25_without_length_normalization": { + "type": "BM25", + "b": "0" + } + }, "index.mapping.ignore_malformed": true, "analysis": { "analyzer": {