blob: 70b9f64acd14577729759e0745adbd698a18011f [file] [log] [blame]
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen_query.go
package symbolsearch
import (
"fmt"
"regexp"
)
// SymbolTextSearchConfiguration is a custom postgres text search configuration
// used for symbol search.
const SymbolTextSearchConfiguration = "symbols"
// Query returns a symbol search query to be used in internal/postgres.
// Each query that is returned accepts the following args:
// $1 = query
// $2 = limit
// $3 = only used by multi-word-exact for path tokens
func Query(st SearchType) string {
switch st {
case SearchTypeMultiWordExact:
return fmt.Sprintf(baseQuery, multiwordCTE)
case SearchTypePackageDotSymbol:
// When $1 is either <package>.<symbol> OR
// <package>.<type>.<methodOrField>, only match on the exact
// symbol name.
return fmt.Sprintf(baseQuery, fmt.Sprintf(symbolCTE, filterPackageDotSymbol))
case SearchTypeSymbol:
// When $1 is the full symbol name, either <symbol> or
// <type>.<methodOrField>, match on just the identifier name.
//
// Matching on just <field> and <method> is too slow at the moment (can
// take several seconds to return results), but we
// might want to add support for that later. For example, searching for
// "Begin" should return "DB.Begin".
return fmt.Sprintf(baseQuery, fmt.Sprintf(symbolCTE, filterSymbol))
}
return ""
}
const symbolCTE = `
SELECT
ssd.unit_id,
ssd.package_symbol_id,
ssd.symbol_name_id,
ssd.goos,
ssd.goarch,
ssd.ln_imported_by_count AS score
FROM symbol_search_documents ssd
WHERE %s
ORDER BY
score DESC,
package_path,
symbol_name_id
LIMIT $2
`
const filterSymbol = `
lower(symbol_name) = lower($1)`
// TODO(golang/go#44142): Filtering on package path currently only works for
// standard library packages, since non-standard library packages will have a
// dot.
var filterPackageDotSymbol = fmt.Sprintf(`
lower(symbol_name) = lower($1)
AND (
ssd.uuid_package_name=%[1]s OR
ssd.uuid_package_path=%[1]s
)`,
"uuid_generate_v5(uuid_nil(), split_part($3, '.', 1))")
var multiwordCTE = fmt.Sprintf(`
SELECT
ssd.unit_id,
ssd.package_symbol_id,
ssd.symbol_name_id,
ssd.goos,
ssd.goarch,
(
ts_rank(
'{0.1, 0.2, 1.0, 1.0}',
sd.tsv_path_tokens,
%[1]s
) * ssd.ln_imported_by_count
) AS score
FROM symbol_search_documents ssd
INNER JOIN search_documents sd ON sd.package_path_id = ssd.package_path_id
WHERE
lower(symbol_name) = lower($1)
AND sd.tsv_path_tokens @@ %[1]s
ORDER BY score DESC
LIMIT $2
`, toTSQuery("$3"))
const baseQuery = `
WITH ssd AS (%s)
SELECT
s.name AS symbol_name,
sd.package_path,
sd.module_path,
sd.version,
sd.name,
sd.synopsis,
sd.license_types,
sd.commit_time,
sd.imported_by_count,
ssd.goos,
ssd.goarch,
ps.type AS symbol_kind,
ps.synopsis AS symbol_synopsis
FROM ssd
INNER JOIN symbol_names s ON s.id=ssd.symbol_name_id
INNER JOIN search_documents sd ON sd.unit_id = ssd.unit_id
INNER JOIN package_symbols ps ON ps.id=ssd.package_symbol_id
ORDER BY score DESC;`
func toTSQuery(arg string) string {
return fmt.Sprintf("to_tsquery('%s', quote_literal(%s))", SymbolTextSearchConfiguration, processArg(arg))
}
// regexpPostgresArg finds $N arg in a postgres expression.
var regexpPostgresArg = regexp.MustCompile(`\$[0-9]+`)
// processArg returns a postgres expression which converts all of the
// underscores in arg to dashes.
//
// arg is expected to be a postgres expression containing a $N.
//
// For example, if arg is to_tsquery($1), processArg will return
// to_tsquery(replace($1, '_', '-')). This means that if $1 has a value of
// "A_B", to_tsquery will actually run on "A-B".
// This preprocessing step is necessary because the postgres parser treats
// underscores as whitespace, but if a user searches for "A_B", we don't want
// results for "A" or "B" to be returned with the same weight as "A_B".
func processArg(arg string) string {
return regexpPostgresArg.ReplaceAllString(arg, "replace($0, '_', '-')")
}