diff --git a/cli/config.go b/cli/config.go
index f61aec83..14b9b88e 100644
--- a/cli/config.go
+++ b/cli/config.go
@@ -103,6 +103,9 @@ type Config struct {
// Client
Client client.Config `mapstructure:"client"`
+
+ // Column search excluded keyword list
+ ColSearchExclusionKeywords string `yaml:"col_search_excluded_keywords" mapstructure:"col_search_excluded_keywords"`
}
func LoadConfig() (*Config, error) {
diff --git a/cli/server.go b/cli/server.go
index 63890a75..9a7b5a9b 100644
--- a/cli/server.go
+++ b/cli/server.go
@@ -4,6 +4,7 @@ import (
"context"
"fmt"
"os"
+ "strings"
"github.com/MakeNowJust/heredoc"
"github.com/goto/compass/core/asset"
@@ -131,7 +132,7 @@ func runServer(ctx context.Context, cfg *Config) error {
if err != nil {
return fmt.Errorf("create new asset repository: %w", err)
}
- discoveryRepository := esStore.NewDiscoveryRepository(esClient, logger, cfg.Elasticsearch.RequestTimeout)
+ discoveryRepository := esStore.NewDiscoveryRepository(esClient, logger, cfg.Elasticsearch.RequestTimeout, strings.Split(cfg.ColSearchExclusionKeywords, ","))
lineageRepository, err := postgres.NewLineageRepository(pgClient)
if err != nil {
return fmt.Errorf("create new lineage repository: %w", err)
diff --git a/cli/worker.go b/cli/worker.go
index c9b75aad..e7d372a1 100644
--- a/cli/worker.go
+++ b/cli/worker.go
@@ -4,6 +4,7 @@ import (
"context"
"errors"
"fmt"
+ "strings"
"github.com/MakeNowJust/heredoc"
"github.com/goto/compass/internal/store/elasticsearch"
@@ -67,9 +68,10 @@ func runWorker(ctx context.Context, cfg *Config) error {
}
mgr, err := workermanager.New(ctx, workermanager.Deps{
- Config: cfg.Worker,
- DiscoveryRepo: elasticsearch.NewDiscoveryRepository(esClient, logger, cfg.Elasticsearch.RequestTimeout),
- Logger: logger,
+ Config: cfg.Worker,
+ DiscoveryRepo: elasticsearch.NewDiscoveryRepository(esClient, logger, cfg.Elasticsearch.RequestTimeout,
+ strings.Split(cfg.ColSearchExclusionKeywords, ",")),
+ Logger: logger,
})
if err != nil {
return err
diff --git a/core/asset/discovery.go b/core/asset/discovery.go
index ba2fd2c0..e60abf13 100644
--- a/core/asset/discovery.go
+++ b/core/asset/discovery.go
@@ -43,6 +43,8 @@ type SearchFlags struct {
// DisableFuzzy disables fuzziness on search
DisableFuzzy bool
+
+ IsColumnSearch bool
}
// SearchConfig represents a search query along
diff --git a/internal/server/v1beta1/search.go b/internal/server/v1beta1/search.go
index 35b672b8..4717b502 100644
--- a/internal/server/v1beta1/search.go
+++ b/internal/server/v1beta1/search.go
@@ -140,5 +140,6 @@ func getSearchFlagsFromFlags(inputFlags *compassv1beta1.SearchFlags) asset.Searc
return asset.SearchFlags{
EnableHighlight: inputFlags.GetEnableHighlight(),
DisableFuzzy: inputFlags.GetDisableFuzzy(),
+ IsColumnSearch: inputFlags.GetIsColumnSearch(),
}
}
diff --git a/internal/store/elasticsearch/discovery_repository.go b/internal/store/elasticsearch/discovery_repository.go
index c193237b..d9c21780 100644
--- a/internal/store/elasticsearch/discovery_repository.go
+++ b/internal/store/elasticsearch/discovery_repository.go
@@ -18,16 +18,18 @@ import (
// DiscoveryRepository implements discovery.Repository
// with elasticsearch as the backing store.
type DiscoveryRepository struct {
- cli *Client
- logger log.Logger
- requestTimeout time.Duration
+ cli *Client
+ logger log.Logger
+ requestTimeout time.Duration
+ columnSearchExclusionList []string
}
-func NewDiscoveryRepository(cli *Client, logger log.Logger, requestTimeout time.Duration) *DiscoveryRepository {
+func NewDiscoveryRepository(cli *Client, logger log.Logger, requestTimeout time.Duration, colSearchExclusionList []string) *DiscoveryRepository {
return &DiscoveryRepository{
- cli: cli,
- logger: logger,
- requestTimeout: requestTimeout,
+ cli: cli,
+ logger: logger,
+ requestTimeout: requestTimeout,
+ columnSearchExclusionList: colSearchExclusionList,
}
}
diff --git a/internal/store/elasticsearch/discovery_repository_test.go b/internal/store/elasticsearch/discovery_repository_test.go
index a3c36136..cb230a2d 100644
--- a/internal/store/elasticsearch/discovery_repository_test.go
+++ b/internal/store/elasticsearch/discovery_repository_test.go
@@ -30,8 +30,7 @@ func TestDiscoveryRepositoryUpsert(t *testing.T) {
store.WithClient(cli),
)
require.NoError(t, err)
-
- repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10)
+ repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10, []string{"number", "id"})
err = repo.Upsert(ctx, asset.Asset{
ID: "",
Type: asset.TypeTable,
@@ -50,7 +49,7 @@ func TestDiscoveryRepositoryUpsert(t *testing.T) {
)
require.NoError(t, err)
- repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10)
+ repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10, []string{"number", "id"})
err = repo.Upsert(ctx, asset.Asset{
ID: "sample-id",
Type: asset.Type("unknown-type"),
@@ -68,8 +67,7 @@ func TestDiscoveryRepositoryUpsert(t *testing.T) {
store.WithClient(cli),
)
require.NoError(t, err)
-
- repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10)
+ repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10, []string{"number", "id"})
// upsert with create_time as a object
err = repo.Upsert(ctx, asset.Asset{
@@ -129,7 +127,7 @@ func TestDiscoveryRepositoryUpsert(t *testing.T) {
)
require.NoError(t, err)
- repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10)
+ repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10, []string{"number", "id"})
err = repo.Upsert(ctx, ast)
assert.NoError(t, err)
@@ -177,8 +175,7 @@ func TestDiscoveryRepositoryUpsert(t *testing.T) {
store.WithClient(cli),
)
require.NoError(t, err)
-
- repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10)
+ repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10, []string{"number", "id"})
err = repo.Upsert(ctx, existingAsset)
assert.NoError(t, err)
@@ -219,7 +216,7 @@ func TestDiscoveryRepositoryDeleteByID(t *testing.T) {
)
require.NoError(t, err)
- repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10)
+ repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10, []string{"number", "id"})
err = repo.DeleteByID(ctx, "")
assert.ErrorIs(t, err, asset.ErrEmptyID)
})
@@ -241,7 +238,7 @@ func TestDiscoveryRepositoryDeleteByID(t *testing.T) {
)
require.NoError(t, err)
- repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10)
+ repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10, []string{"number", "id"})
err = repo.Upsert(ctx, ast)
require.NoError(t, err)
@@ -288,7 +285,7 @@ func TestDiscoveryRepositoryDeleteByID(t *testing.T) {
)
require.NoError(t, err)
- repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10)
+ repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10, []string{"number", "id"})
err = repo.Upsert(ctx, ast1)
require.NoError(t, err)
@@ -319,7 +316,7 @@ func TestDiscoveryRepositoryDeleteByURN(t *testing.T) {
)
require.NoError(t, err)
- repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10)
+ repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10, []string{"number", "id"})
t.Run("should return error if the given urn is empty", func(t *testing.T) {
err = repo.DeleteByURN(ctx, "")
@@ -378,7 +375,7 @@ func TestDiscoveryRepositoryDeleteByURN(t *testing.T) {
)
require.NoError(t, err)
- repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10)
+ repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10, []string{"number", "id"})
err = repo.Upsert(ctx, ast1)
require.NoError(t, err)
diff --git a/internal/store/elasticsearch/discovery_search_repository.go b/internal/store/elasticsearch/discovery_search_repository.go
index 0ce7e189..e50d8422 100644
--- a/internal/store/elasticsearch/discovery_search_repository.go
+++ b/internal/store/elasticsearch/discovery_search_repository.go
@@ -53,7 +53,7 @@ func (repo *DiscoveryRepository) Search(ctx context.Context, cfg asset.SearchCon
})
}(time.Now())
- query, err := buildQuery(cfg)
+ query, err := repo.buildQuery(cfg)
if err != nil {
return nil, asset.DiscoveryError{Op: "Search", Err: fmt.Errorf("build query: %w", err)}
}
@@ -197,17 +197,109 @@ func (repo *DiscoveryRepository) Suggest(ctx context.Context, config asset.Searc
return results, nil
}
-func buildQuery(cfg asset.SearchConfig) (io.Reader, error) {
+func (repo *DiscoveryRepository) buildColumnQuery(query *elastic.BoolQuery, cfg asset.SearchConfig, field string) *elastic.Highlight {
+ matchString := cfg.Text
+ for _, exclusionStr := range repo.columnSearchExclusionList {
+ exclusionStr = strings.TrimSpace(exclusionStr)
+ if strings.Contains(matchString, exclusionStr) {
+ matchString = strings.ReplaceAll(matchString, fmt.Sprintf("_%s", exclusionStr), "")
+ matchString = strings.ReplaceAll(matchString, fmt.Sprintf(" %s", exclusionStr), "")
+ matchString = strings.ReplaceAll(matchString, fmt.Sprintf("-%s", exclusionStr), "")
+ }
+ }
+
+ if matchString == "" {
+ matchString = cfg.Text
+ }
+
+ queries := make([]elastic.Query, 0)
+ termQuery := elastic.NewTermQuery(
+ fmt.Sprintf("%s.keyword", field),
+ cfg.Text,
+ ).Boost(20)
+
+ descriptionTermQuery := elastic.NewTermQuery(
+ fmt.Sprintf("%s.keyword", "data.columns.description"),
+ cfg.Text,
+ )
+ phraseQuery := elastic.NewMultiMatchQuery(
+ cfg.Text,
+ []string{
+ "data.columns.name^10",
+ "data.columns.description",
+ }...,
+ ).Type("phrase")
+
+ matchQuery := elastic.NewMultiMatchQuery(
+ matchString,
+ []string{
+ "data.columns.name^5",
+ "data.columns.description",
+ }...,
+ )
+
+ andMatchQuery := elastic.NewMultiMatchQuery(
+ matchString,
+ []string{
+ "data.columns.name^5",
+ "data.columns.description",
+ }...,
+ ).Operator("and")
+
+ multiMatchQueries := []*elastic.MultiMatchQuery{matchQuery, andMatchQuery}
+ queries = append(queries, termQuery, descriptionTermQuery, phraseQuery)
+ query.Should(queries...)
+ highlightQuery := make([]elastic.Query, 0)
+ highlightQuery = append(highlightQuery, queries...)
+ for _, q := range multiMatchQueries {
+ if !cfg.Flags.DisableFuzzy {
+ updatedQuery := q.Fuzziness("AUTO")
+ highlightQuery = append(highlightQuery, updatedQuery)
+ }
+ query.Should(q)
+ }
+
+ if cfg.Flags.EnableHighlight {
+ return elastic.NewHighlight().
+ Order("score").
+ Field("data.columns.name").
+ Field("data.columns.description").
+ HighlightQuery(
+ elastic.NewBoolQuery().
+ Should(highlightQuery...),
+ )
+ }
+
+ return nil
+}
+
+func (repo *DiscoveryRepository) buildQuery(cfg asset.SearchConfig) (io.Reader, error) {
boolQuery := elastic.NewBoolQuery()
- buildTextQuery(boolQuery, cfg)
+ var highlightQuery *elastic.Highlight
+ field := ""
+
+ // if the search text is empty, do a match all query and return results
+ if strings.TrimSpace(cfg.Text) == "" {
+ boolQuery.Should(elastic.NewMatchAllQuery())
+ highlightQuery = buildHighlightQuery(cfg)
+ } else {
+ if cfg.Flags.IsColumnSearch {
+ field = "data.columns.name"
+ highlightQuery = repo.buildColumnQuery(boolQuery, cfg, field)
+ } else {
+ field = "name"
+ buildTextQuery(boolQuery, cfg)
+ highlightQuery = buildHighlightQuery(cfg)
+ }
+ }
+
buildFilterTermQueries(boolQuery, cfg.Filters)
buildMustMatchQueries(boolQuery, cfg)
- query := buildFunctionScoreQuery(boolQuery, cfg.RankBy, cfg.Text)
- highlight := buildHighlightQuery(cfg)
+ query := buildFunctionScoreQuery(boolQuery, cfg.RankBy, cfg.Text, field)
body, err := elastic.NewSearchRequest().
Query(query).
- Highlight(highlight).
+ Highlight(highlightQuery).
MinScore(defaultMinScore).
Body()
if err != nil {
@@ -240,10 +332,6 @@ func buildSuggestQuery(cfg asset.SearchConfig) (io.Reader, error) {
}
func buildTextQuery(q *elastic.BoolQuery, cfg asset.SearchConfig) {
- if strings.TrimSpace(cfg.Text) == "" {
- q.Should(elastic.NewMatchAllQuery())
- }
-
boostedFields := []string{"urn^10", "name^5"}
q.Should(
// Phrase query cannot have `FUZZINESS`
@@ -314,12 +402,12 @@ func buildFilterExistsQueries(q *elastic.BoolQuery, fields []string) {
}
}
-func buildFunctionScoreQuery(query elastic.Query, rankBy, text string) elastic.Query {
+func buildFunctionScoreQuery(query elastic.Query, rankBy, text, field string) elastic.Query {
// Added exact match term query here so that exact match gets higher priority.
fsQuery := elastic.NewFunctionScoreQuery()
if text != "" {
fsQuery.Add(
- elastic.NewTermQuery("name.keyword", text),
+ elastic.NewTermQuery(fmt.Sprintf("%s.keyword", field), text),
elastic.NewWeightFactorFunction(2),
)
}
diff --git a/internal/store/elasticsearch/discovery_search_repository_test.go b/internal/store/elasticsearch/discovery_search_repository_test.go
index de0a4896..f96ce06a 100644
--- a/internal/store/elasticsearch/discovery_search_repository_test.go
+++ b/internal/store/elasticsearch/discovery_search_repository_test.go
@@ -35,9 +35,7 @@ func TestSearcherSearch(t *testing.T) {
err = loadTestFixture(cli, esClient, "./testdata/search-test-fixture.json")
require.NoError(t, err)
-
- repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10)
-
+ repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10, []string{"number", "id"})
type expectedRow struct {
Type string
AssetID string
@@ -287,7 +285,7 @@ func TestSearcherSearch(t *testing.T) {
Config: asset.SearchConfig{
Text: "tablename",
Queries: map[string]string{
- "data.schema.columns.name": "common",
+ "data.columns.name": "common",
},
IncludeFields: []string{"type", "id"},
},
@@ -355,7 +353,276 @@ func TestSearcherSearch(t *testing.T) {
},
},
}
- for _, test := range tests {
+
+ columnTests := []searchTest{
+ {
+ Description: "should fetch assets with fields mentioned in included fields for column search",
+ Config: asset.SearchConfig{
+ Text: "username",
+ IncludeFields: []string{"id", "data.company", "type"},
+ Flags: asset.SearchFlags{
+ IsColumnSearch: true,
+ },
+ },
+ Expected: []expectedRow{
+ {Type: "table", AssetID: "au2-microsoft-invoice", Data: map[string]interface{}{"company": "microsoft"}},
+ },
+ },
+ {
+ Description: "should fetch assets with default fields if included fields is empty",
+ Config: asset.SearchConfig{
+ Text: "username",
+ Flags: asset.SearchFlags{
+ IsColumnSearch: true,
+ },
+ },
+ Expected: []expectedRow{
+ {Type: "table", AssetID: "au2-microsoft-invoice", Service: "postgres", Data: map[string]interface{}{
+ "columns": []interface{}{map[string]interface{}{"name": "id"}, map[string]interface{}{"description": "purchaser username", "name": "username"}, map[string]interface{}{"description": "item identifications", "name": "item_id"}}, "company": "microsoft", "country": "us", "description": "Transaction records for every microsoft purchase", "environment": "integration", "table_id": "au2-microsoft-invoice", "table_name": "microsoft-invoice", "total_rows": float64(100),
+ }},
+ },
+ },
+ {
+ Description: "should fetch assets with empty text",
+ Config: asset.SearchConfig{
+ Text: "",
+ IncludeFields: []string{"id", "type"},
+ Filters: map[string][]string{"service": {"bigquery"}},
+ Flags: asset.SearchFlags{
+ IsColumnSearch: true,
+ },
+ },
+ Expected: []expectedRow{
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-1"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-common"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-abc-common-test"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-mid"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/abc-tablename-mid"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/test"},
+ },
+ },
+ {
+ Description: "should fetch assets with empty text and rank by",
+ Config: asset.SearchConfig{
+ Text: "",
+ RankBy: "data.profile.usage_count",
+ IncludeFields: []string{"id", "type"},
+ Filters: map[string][]string{"service": {"bigquery"}},
+ Flags: asset.SearchFlags{
+ IsColumnSearch: true,
+ },
+ },
+ Expected: []expectedRow{
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-common"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-mid"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/test"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-1"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-abc-common-test"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/abc-tablename-mid"},
+ },
+ },
+ {
+ Description: "should fetch assets which has text in either column name or description",
+ Config: asset.SearchConfig{
+ Text: "purchaser",
+ IncludeFields: []string{"type", "id"},
+ Flags: asset.SearchFlags{
+ IsColumnSearch: true,
+ },
+ },
+ Expected: []expectedRow{
+ {Type: "table", AssetID: "us1-apple-invoice"},
+ {Type: "table", AssetID: "au2-microsoft-invoice"},
+ },
+ },
+ {
+ Description: "should enable fuzzy search",
+ Config: asset.SearchConfig{
+ Text: "uername",
+ IncludeFields: []string{"type", "id"},
+ Flags: asset.SearchFlags{IsColumnSearch: true},
+ },
+ Expected: []expectedRow{
+ {Type: "table", AssetID: "au2-microsoft-invoice"},
+ },
+ },
+ {
+ Description: "should disable fuzzy search",
+ Config: asset.SearchConfig{
+ Text: "uername",
+ Flags: asset.SearchFlags{DisableFuzzy: true, IsColumnSearch: true},
+ IncludeFields: []string{"type", "id"},
+ },
+ Expected: []expectedRow{},
+ },
+ /*
+ {
+ Description: "should put more weight on column name field",
+ Config: asset.SearchConfig{
+ Text: "invoice",
+ IncludeFields: []string{"type", "id"},
+ },
+ Expected: []expectedRow{
+ {Type: "table", AssetID: "us1-apple-invoice"},
+ {Type: "table", AssetID: "au2-microsoft-invoice"},
+ {Type: "topic", AssetID: "transaction"},
+ },
+ },*/
+ {
+ Description: "should filter by service if given",
+ Config: asset.SearchConfig{
+ Text: "purchaser",
+ Filters: map[string][]string{
+ "service": {"postgres"},
+ },
+ IncludeFields: []string{"type", "id"},
+ Flags: asset.SearchFlags{IsColumnSearch: true},
+ },
+ Expected: []expectedRow{
+ {Type: "table", AssetID: "au2-microsoft-invoice"},
+ },
+ },
+ {
+ Description: "should match documents based on filter criteria",
+ Config: asset.SearchConfig{
+ Text: "purchaser",
+ Filters: map[string][]string{
+ "data.company": {"apple"},
+ },
+ IncludeFields: []string{"type", "id"},
+ Flags: asset.SearchFlags{IsColumnSearch: true},
+ },
+ Expected: []expectedRow{
+ {Type: "table", AssetID: "us1-apple-invoice"},
+ },
+ },
+ {
+ Description: "should return a descendingly sorted based on usage count in search results if rank by usage in the config",
+ Config: asset.SearchConfig{
+ Text: "column3",
+ RankBy: "data.profile.usage_count",
+ IncludeFields: []string{"type", "id"},
+ Flags: asset.SearchFlags{IsColumnSearch: true},
+ },
+ Expected: []expectedRow{
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-common"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-mid"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/test"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-1"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-abc-common-test"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/abc-tablename-mid"},
+ },
+ },
+ {
+ Description: "should return 5 records with offset of 0",
+ Config: asset.SearchConfig{
+ Text: "column3",
+ Offset: 0,
+ MaxResults: 5,
+ IncludeFields: []string{"type", "id"},
+ RankBy: "data.profile.usage_count",
+ Flags: asset.SearchFlags{IsColumnSearch: true},
+ },
+ Expected: []expectedRow{
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-common"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-mid"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/test"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-1"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-abc-common-test"},
+ },
+ },
+ {
+ Description: "should return 4 records with offset of 1",
+ Config: asset.SearchConfig{
+ Text: "topic",
+ Offset: 1,
+ MaxResults: 5,
+ IncludeFields: []string{"type", "id"},
+ RankBy: "data.profile.usage_count",
+ Flags: asset.SearchFlags{IsColumnSearch: true},
+ },
+ Expected: []expectedRow{
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-mid"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/test"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-1"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-abc-common-test"},
+ },
+ },
+ {
+ Description: "should return 'bigquery::gcpproject/dataset/tablename-common' resource on top if search by column name 'tablename-common-column2",
+ Config: asset.SearchConfig{
+ Text: "tablename-common-column2",
+ IncludeFields: []string{"type", "id"},
+ Flags: asset.SearchFlags{IsColumnSearch: true},
+ },
+ Expected: []expectedRow{
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-common"},
+ },
+ }, /*
+ {
+ Description: "should return 'bigquery::gcpproject/dataset/tablename-abc-common-test' resource on top if searched for text 'tablename-abc-common-test'",
+ Config: asset.SearchConfig{
+ Text: "tablename-abc-common-test",
+ RankBy: "data.profile.usage_count",
+ IncludeFields: []string{"type", "id"},
+ },
+ Expected: []expectedRow{
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-abc-common-test"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-common"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/test"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/abc-tablename-mid"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-mid"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-1"},
+ },
+ },
+ {
+ Description: "should return highlighted text in resource if searched highlight text is enabled.",
+ Config: asset.SearchConfig{
+ Text: "order",
+ RankBy: "data.profile.usage_count",
+ Flags: asset.SearchFlags{
+ EnableHighlight: true,
+ },
+ IncludeFields: []string{"type", "id"},
+ },
+
+ Expected: []expectedRow{
+ {
+ Type: "topic",
+ AssetID: "order-topic",
+ Data: map[string]interface{}{
+ "_highlight": map[string]interface{}{
+ "urn": []interface{}{"order-topic"},
+ "data.topic_name": []interface{}{"order-topic"},
+ "name": []interface{}{"order-topic"},
+ "description": []interface{}{"Topic for each submitted order"},
+ "id": []interface{}{"order-topic"},
+ "data.description": []interface{}{"Topic for each submitted order"},
+ },
+ },
+ },
+ },
+ },
+ {
+ Description: "should return 'bigquery::gcpproject/dataset/tablename-abc-common-test' resource on top if " +
+ "searched for text 'abc-test' as it has both the keywords searched",
+ Config: asset.SearchConfig{
+ Text: "abc-test",
+ RankBy: "data.profile.usage_count",
+ IncludeFields: []string{"type", "id"},
+ },
+ Expected: []expectedRow{
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-abc-common-test"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/test"},
+ {Type: "table", AssetID: "bigquery::gcpproject/dataset/abc-tablename-mid"},
+ },
+ },
+ */
+ }
+
+ allTests := append(tests, columnTests...)
+
+ for _, test := range allTests {
t.Run(test.Description, func(t *testing.T) {
results, err := repo.Search(ctx, test.Config)
require.NoError(t, err)
@@ -388,7 +655,7 @@ func TestSearcherSuggest(t *testing.T) {
err = loadTestFixture(cli, esClient, "./testdata/suggest-test-fixture.json")
require.NoError(t, err)
- repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10)
+ repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10, []string{"number", "id"})
t.Run("fixtures", func(t *testing.T) {
testCases := []struct {
@@ -425,7 +692,7 @@ func loadTestFixture(cli *elasticsearch.Client, esClient *store.Client, filePath
ctx := context.TODO()
for _, testdata := range data {
- repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10)
+ repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10, []string{"number", "id"})
for _, ast := range testdata.Assets {
if err := repo.Upsert(ctx, ast); err != nil {
return err
@@ -453,7 +720,7 @@ func TestGroupAssets(t *testing.T) {
)
require.NoError(t, err)
- repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10)
+ repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10, []string{"number", "id"})
_, err = repo.GroupAssets(ctx, asset.GroupConfig{
GroupBy: []string{""},
})
@@ -474,7 +741,7 @@ func TestGroupAssets(t *testing.T) {
err = loadTestFixture(cli, esClient, "./testdata/search-test-fixture.json")
require.NoError(t, err)
- repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10)
+ repo := store.NewDiscoveryRepository(esClient, log.NewNoop(), time.Second*10, []string{"number", "id"})
type groupTest struct {
Description string
diff --git a/internal/store/elasticsearch/schema.go b/internal/store/elasticsearch/schema.go
index 18b7d398..ccb3ea7f 100644
--- a/internal/store/elasticsearch/schema.go
+++ b/internal/store/elasticsearch/schema.go
@@ -9,6 +9,12 @@ var indexSettingsTemplate = `{
%q: {}
},
"settings": {
+ "similarity": {
+ "my_bm25_without_length_normalization": {
+ "type": "BM25",
+ "b": "0"
+ }
+ },
"index.mapping.ignore_malformed": true,
"analysis": {
"analyzer": {
diff --git a/internal/store/elasticsearch/testdata/search-test-fixture.json b/internal/store/elasticsearch/testdata/search-test-fixture.json
index b990e928..c7fc662c 100644
--- a/internal/store/elasticsearch/testdata/search-test-fixture.json
+++ b/internal/store/elasticsearch/testdata/search-test-fixture.json
@@ -114,7 +114,7 @@
"country": "us",
"description": "Transaction records for every microsoft purchase",
"total_rows": 100,
- "schema": [
+ "columns": [
{
"name": "id"
},
@@ -144,7 +144,7 @@
"country": "id",
"description": "Transaction records for every Apple purchase",
"total_rows": 100,
- "schema": [
+ "columns": [
{
"name": "id"
},
@@ -195,8 +195,7 @@
"owner": "user_1"
}
},
- "schema": {
- "columns": [
+ "columns": [
{
"data_type": "STRING",
"is_nullable": true,
@@ -227,8 +226,7 @@
}
}
}
- ]
- },
+ ],
"resource": {
"name": "tablename-1",
"service": "bigquery",
@@ -280,8 +278,7 @@
"owner": "user_1"
}
},
- "schema": {
- "columns": [
+ "columns": [
{
"data_type": "STRING",
"is_nullable": true,
@@ -312,8 +309,7 @@
}
}
}
- ]
- },
+ ],
"resource": {
"name": "tablename-common",
"service": "bigquery",
@@ -371,8 +367,7 @@
"owner": "user_1"
}
},
- "schema": {
- "columns": [
+ "columns": [
{
"data_type": "STRING",
"is_nullable": true,
@@ -403,8 +398,8 @@
}
}
}
- ]
- },
+ ],
+
"resource": {
"name": "tablename-common-test",
"service": "bigquery",
@@ -469,8 +464,7 @@
"owner": "user_1"
}
},
- "schema": {
- "columns": [
+ "columns": [
{
"data_type": "STRING",
"is_nullable": true,
@@ -501,8 +495,7 @@
}
}
}
- ]
- },
+ ],
"resource": {
"name": "tablename-mid",
"service": "bigquery",
@@ -560,8 +553,7 @@
"owner": "user_1"
}
},
- "schema": {
- "columns": [
+ "columns": [
{
"data_type": "STRING",
"is_nullable": true,
@@ -592,8 +584,7 @@
}
}
}
- ]
- },
+ ],
"resource": {
"name": "abc-mid-test",
"service": "bigquery",
@@ -658,8 +649,7 @@
"owner": "user_1"
}
},
- "schema": {
- "columns": [
+ "columns": [
{
"data_type": "STRING",
"is_nullable": true,
@@ -690,8 +680,7 @@
}
}
}
- ]
- },
+ ],
"resource": {
"name": "tablename-mid",
"service": "bigquery",