Skip to content

Commit

Permalink
Popularity wired through
Browse files Browse the repository at this point in the history
Signed-off-by: AbstractionFactory <[email protected]>
  • Loading branch information
abstractionfactory committed Dec 2, 2024
1 parent e7abbd0 commit 530ec95
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 48 deletions.
4 changes: 2 additions & 2 deletions backend/internal/providerindex/generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,8 @@ func (d *documentationGenerator) extractRepoInfo(ctx context.Context, addr provi
providerData.Description = repoInfo.Description
providerData.Popularity = repoInfo.Popularity
providerData.ForkCount = repoInfo.ForkCount
providerData.UpstreamPopularity = upstreamRepoInfo.Popularity
providerData.UpstreamForkCount = upstreamRepoInfo.ForkCount

forkRepo := repoInfo.ForkOf
if forkRepo == nil {
Expand Down Expand Up @@ -378,8 +380,6 @@ func (d *documentationGenerator) extractRepoInfo(ctx context.Context, addr provi
d.log.Warn(ctx, "Cannot fetch upstream repository info for %s (%v)", forkRepo.String(), err)
return
}
providerData.UpstreamPopularity = upstreamRepoInfo.Popularity
providerData.UpstreamForkCount = upstreamRepoInfo.ForkCount
}

func (d *documentationGenerator) scrapeVersion(ctx context.Context, addr providertypes.ProviderAddr, canonicalAddr provider.Addr, providerDetails *providertypes.Provider, version provider.Version, blocked bool, blockedReason string) (providertypes.ProviderVersion, error) {
Expand Down
4 changes: 3 additions & 1 deletion backend/internal/providerindex/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,9 @@ func (p providerSearch) indexProviderVersion(ctx context.Context, providerAddr p
"version": string(version),
"id": string(docItem.Name),
},
ParentID: providerItem.ID,
ParentID: providerItem.ID,
Popularity: popularity,
Warnings: len(providerDetails.Warnings),
}); err != nil {
return fmt.Errorf("failed to add resource %s to search index (%w)", docItem.Name, err)
}
Expand Down
77 changes: 32 additions & 45 deletions search/worker/src/query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,51 +7,38 @@ export const query = async (client: Client, queryParam: string): Promise<Entity[
};

const searchQuery = `
WITH search_terms AS (
SELECT unnest(regexp_split_to_array($1, '[ /]+')) AS term
),
term_matches AS (
SELECT e.*
FROM entities e
INNER JOIN search_terms st
ON e.addr ILIKE '%' || st.term || '%'
OR e.description ILIKE '%' || st.term || '%'
GROUP BY e.id
),
ranked_entities AS (
SELECT *,
/* TODO: remove hard-coded hashicorp/opentofu preferential treatment */
CASE WHEN link_variables->>'namespace' = 'hashicorp' THEN 1 WHEN link_variables->>'namespace' = 'opentofu' THEN 0 ELSE 0.5 END AS popularity_fudge,
CASE WHEN type = 'provider' THEN 1 ELSE 0 END AS provider_rank_fudge,
similarity(tm.addr, $1) AS title_sim,
similarity(tm.description, $1) AS description_sim,
similarity(link_variables->>'name', $1) AS name_sim
FROM term_matches tm
),
providers AS (
WITH search_terms AS (SELECT unnest(regexp_split_to_array($1, '[ /]+')) AS term),
term_matches AS (SELECT e.*
FROM entities e
INNER JOIN search_terms st
ON e.addr ILIKE '%' || st.term || '%'
OR e.description ILIKE '%' || st.term || '%'
GROUP BY id, last_updated, type, addr, version, title, description, link_variables, document, popularity, warnings),
ranked_entities AS (SELECT *,
/* The provider rank fudge ranks providers higher than their resources */
CASE WHEN type = 'provider' THEN 1 ELSE 0 END AS provider_rank_fudge,
/* When warnings are present, rank the provider lower because it's likely deprecated. */
CASE WHEN warnings > 0 THEN -1 ELSE 0 END AS warnings_rank_fudge,
/* Give a slight boost to providers with a higher star rating. */
tm.popularity / (SELECT max(popularity) FROM tm) AS popularity_rank,
/* Text similarity rankings, each taking a value from 0 to 1. */
similarity(tm.addr, $1) AS title_sim,
similarity(tm.description, $1) AS description_sim,
similarity(link_variables ->> 'name', $1) AS name_sim,
FROM term_matches tm),
providers AS (SELECT *
FROM ranked_entities
WHERE type LIKE 'provider%'
ORDER BY (provider_rank_fudge + warnings_rank_fudge + 1) *(popularity_rank + title_sim + name_sim + description_sim/0.5) DESC
LIMIT 5),
modules AS (SELECT *
FROM ranked_entities
WHERE type LIKE 'module%'
ORDER BY (warnings_rank_fudge + 1) * (popularity_rank + title_sim + name_sim + description_sim/0.5) DESC
LIMIT 5)
SELECT *
FROM ranked_entities
WHERE type LIKE 'provider%'
ORDER BY
provider_rank_fudge DESC,
popularity_fudge DESC,
title_sim DESC,
name_sim DESC,
description_sim DESC
LIMIT 5
),
modules AS (
SELECT *
FROM ranked_entities
WHERE type LIKE 'module%'
ORDER BY
popularity_fudge DESC,
title_sim DESC,
name_sim DESC,
description_sim DESC
LIMIT 5
)
SELECT * FROM providers
FROM providers
UNION ALL
SELECT * FROM modules;
SELECT *
FROM modules;
`;

0 comments on commit 530ec95

Please sign in to comment.