diff options
| author | Julie Qiu <julie@golang.org> | 2021-07-02 11:30:29 -0400 |
|---|---|---|
| committer | Julie Qiu <julie@golang.org> | 2021-07-13 16:22:21 +0000 |
| commit | ebfc9de34b8595b294b23ed9dad6d627ee7cd6f5 (patch) | |
| tree | e0cb9dc7f60f4846d1c2c0f4a220175b8beb4282 /internal/postgres | |
| parent | f35d711d9d7082ea80a81b666e5c53a3a5ef392e (diff) | |
| download | go-x-pkgsite-ebfc9de34b8595b294b23ed9dad6d627ee7cd6f5.tar.xz | |
internal/postgres: change symbolsearch to index underscores
The symbol search insert and update queries are changed to index
underscores as slashes. For example, "A_B" is indexed as "A/B".
Underscores are treated as "blanks" by the postgres parser. However, we
want "A_B" to be ranked lower than just "A" in a search for "A", not
equally.
For golang/go#44142
Change-Id: I4399c046adae33f69cb806012a68127ab21cb937
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/332413
Trust: Julie Qiu <julie@golang.org>
Run-TryBot: Julie Qiu <julie@golang.org>
TryBot-Result: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jonathan Amsterdam <jba@google.com>
Diffstat (limited to 'internal/postgres')
| -rw-r--r-- | internal/postgres/symbolsearch.go | 33 |
1 files changed, 27 insertions, 6 deletions
diff --git a/internal/postgres/symbolsearch.go b/internal/postgres/symbolsearch.go index 0e32b343..24448837 100644 --- a/internal/postgres/symbolsearch.go +++ b/internal/postgres/symbolsearch.go @@ -41,14 +41,16 @@ func upsertSymbolSearchDocuments(ctx context.Context, tx *database.DB, s.id, u.id,` + // Index <package>.<identifier> (i.e. "sql.DB.Begin") - `SETWEIGHT( TO_TSVECTOR('simple', concat(s.name, ' ', concat(u.name, '.', s.name))), 'A') ||` + + symbolSetWeight("concat(s.name, ' ', concat(u.name, '.', s.name))", "A") + " || " + // Index <identifier>, including the parent name (i.e. DB.Begin). - `SETWEIGHT( TO_TSVECTOR('simple', s.name), 'A') ||` + + symbolSetWeight("s.name", "A") + " || " + // Index <identifier> without parent name (i.e. "Begin"). // // This is weighted less, so that if other symbols are just named // "Begin" they will rank higher in a search for "Begin". - `SETWEIGHT( TO_TSVECTOR('simple', split_part(s.name, '.', 2)), 'B') AS tokens` + + symbolSetWeight("split_part(s.name, '.', 2)", "C") + + // TODO(https://golang.org/issue/44142): allow searching for "A_B" when + // querying for either "A" or "B", but at a lower rank. ` FROM symbol_names s INNER JOIN package_symbols ps ON s.id = ps.symbol_name_id @@ -107,8 +109,8 @@ func (db *DB) symbolSearch(ctx context.Context, q string, limit, offset, maxResu INNER JOIN documentation_symbols ds ON ds.documentation_id = d.id INNER JOIN package_symbols ps ON ps.id = ds.package_symbol_id WHERE - ssd.tsv_symbol_tokens @@ to_tsquery('simple', $1) - ORDER BY + ssd.tsv_symbol_tokens @@ `+symbolToTSQuery+ + `ORDER BY symbol_name, CASE WHEN goos = 'all' THEN 0 WHEN goos = 'linux' THEN 1 @@ -166,8 +168,27 @@ func (db *DB) symbolSearch(ctx context.Context, q string, limit, offset, maxResu } } +// symbolTextSearchConfiguration is the search configuration that is used for +// indexing and searching for symbols. +const symbolTextSearchConfiguration = "simple" + +// processSymbol converts a symbol with underscores to slashes (for example, +// "A_B" -> "A/B"). This is because the postgres parser treats underscores as +// slashes, but we want a search for "A" to rank "A_B" lower than just "A". We +// also want to be able to search specificially for "A_B". +func processSymbol(s string) string { + return fmt.Sprintf("replace(%s, '_', '/')", s) +} + +var symbolToTSQuery = fmt.Sprintf("to_tsquery('%s', %s)", symbolTextSearchConfiguration, processSymbol("$1")) + +func symbolSetWeight(s, w string) string { + return fmt.Sprintf("SETWEIGHT(TO_TSVECTOR('%s', %s), '%s')", + symbolTextSearchConfiguration, processSymbol(s), w) +} + var symbolScoreExpr = fmt.Sprintf(` - ts_rank('{0.1, 0.2, 1.0, 1.0}', ssd.tsv_symbol_tokens, to_tsquery('simple', $1)) * + ts_rank('{0.1, 0.2, 1.0, 1.0}', ssd.tsv_symbol_tokens, `+symbolToTSQuery+`) * ln(exp(1)+imported_by_count) * CASE WHEN u.redistributable THEN 1 ELSE %f END * CASE WHEN COALESCE(has_go_mod, true) THEN 1 ELSE %f END |
