blob: 3057304aa6b3d3c263dc1b03f0da979e97249ae0 [file] [log] [blame]
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen_query.go
// Package symbolsearch provides helper functions for constructing queries for
// symbol search, which are using in internal/postgres.
//
// The exported queries are generated using gen_query.go. query.gen.go should
// never be edited directly. It should always be recreated by running
// `go generate -run gen_query.go`.
package symbolsearch
import (
"fmt"
"strings"
)
const SymbolTextSearchConfiguration = "symbols"
var (
rawLegacyQuerySymbol = constructQuery(filterSymbol)
rawLegacyQueryPackageDotSymbol = constructQuery(filterPackageDotSymbol)
rawLegacyQueryMultiWord = constructQuery(filterMultiWord)
)
// constructQuery is used to construct a symbol search query.
func constructQuery(where string) string {
// When there is only one word in the query, popularity is the only score
// that matters.
score := popularityMultiplier
if where == filterMultiWord {
score = formatScore(scoreMultiWord)
}
return fmt.Sprintf(symbolSearchBaseQuery, score, where)
}
var (
// filterSymbol is used when $1 is the full symbol name, either
// <symbol> or <type>.<methodOrField>.
filterSymbol = fmt.Sprintf(`s.tsv_name_tokens @@ %s`, toTSQuery("$1"))
// filterSymbol is used when $1 contains the full symbol name, either
// <symbol> or <type>.<methodOrField>, and has multiple words.
filterSymbolOR = fmt.Sprintf(`s.tsv_name_tokens @@ %s`, toTSQuery(splitOR))
// filterPackageDotSymbol is used when $1 is either <package>.<symbol> OR
// <package>.<type>.<methodOrField>.
filterPackageDotSymbol = fmt.Sprintf("%s AND %s",
filterPackageNameOrPath,
fmt.Sprintf(formatFilter("s.tsv_name_tokens @@ %s"),
toTSQuery("substring($1 from E'[^.]*\\.(.+)$')")))
filterPackageNameOrPath = fmt.Sprintf(
"(sd.name=%s OR sd.package_path=%[1]s)", splitFirstDot)
// filterPackage is used to filter matching elements from
// sd.tsv_path_tokens.
filterPackage = fmt.Sprintf(`sd.tsv_path_tokens @@ %s`, toTSQuery(splitOR))
// filterMultiWord when $1 contains multiple words, separated by spaces.
// One element for the query must match a symbol name, and one (could be
// the same element) must match the package name.
filterMultiWord = fmt.Sprintf("%s AND %s", formatFilter(filterSymbolOR),
formatFilter(filterPackage))
)
var (
// scoreMultiWord is the score when $1 contains multiple words.
scoreMultiWord = fmt.Sprintf("%s%s", rankPathTokens, formatMultiplier(popularityMultiplier))
rankPathTokens = fmt.Sprintf(
"ts_rank(%s,%s,%s"+indent(")", 3),
indent("'{0.1, 0.2, 1.0, 1.0}'", 4),
indent("sd.tsv_path_tokens", 4),
indent(toTSQuery(splitOR), 4))
// Popularity multipler to increase ranking of popular packages.
popularityMultiplier = `ln(exp(1)+sd.imported_by_count)`
)
func formatScore(s string) string {
return fmt.Sprintf("(\n\t\t\t\t%s\n\t\t\t)", s)
}
func formatFilter(s string) string {
return fmt.Sprintf("(\n\t\t\t%s\n\t\t)", s)
}
func formatMultiplier(s string) string {
return indent(fmt.Sprintf("* %s", s), 3)
}
func indent(s string, n int) string {
for i := 0; i <= n; i++ {
s = "\t" + s
}
return "\n" + s
}
const (
splitOR = "replace($1, ' ', ' | ')"
// splitFirstDot splits everything preceding the first dot in $1.
// This is used to parse th package name or path.
splitFirstDot = "split_part($1, '.', 1)"
)
func toTSQuery(arg string) string {
return fmt.Sprintf("to_tsquery('%s', %s)", SymbolTextSearchConfiguration, processArg(arg))
}
// processArg converts a symbol with underscores to slashes (for example,
// "A_B" -> "A-B"). This is because the postgres parser treats underscores as
// slashes, but we want a search for "A" to rank "A_B" lower than just "A". We
// also want to be able to search specificially for "A_B".
func processArg(arg string) string {
s := "$1"
if len(arg) == 2 && strings.HasPrefix(arg, "$") {
// If the arg is a different $N, substitute that instead.
s = arg
}
return strings.ReplaceAll(arg, s, fmt.Sprintf("replace(%s, '_', '-')", s))
}
const symbolSearchBaseQuery = `
WITH results AS (
SELECT
s.name AS symbol_name,
sd.package_path,
sd.module_path,
sd.version,
sd.name AS package_name,
sd.synopsis,
sd.license_types,
sd.commit_time,
sd.imported_by_count,
ssd.package_symbol_id,
ssd.goos,
ssd.goarch,
%s AS score
FROM symbol_search_documents ssd
INNER JOIN search_documents sd ON sd.unit_id = ssd.unit_id
INNER JOIN symbol_names s ON s.id = ssd.symbol_name_id
WHERE %s
)
SELECT
r.symbol_name,
r.package_path,
r.module_path,
r.version,
r.package_name,
r.synopsis,
r.license_types,
r.commit_time,
r.imported_by_count,
r.goos,
r.goarch,
ps.type AS symbol_type,
ps.synopsis AS symbol_synopsis
FROM results r
INNER JOIN package_symbols ps ON r.package_symbol_id = ps.id
WHERE r.score > 0.1
ORDER BY
score DESC,
commit_time DESC,
symbol_name,
package_path
LIMIT $2;`