blob: 4424083d3373ad6dfb74b06477752c7c333db753 [file] [log] [blame]
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package postgres
import (
"context"
"database/sql"
"fmt"
"sort"
"github.com/Masterminds/squirrel"
"github.com/lib/pq"
"golang.org/x/pkgsite/internal"
"golang.org/x/pkgsite/internal/database"
"golang.org/x/pkgsite/internal/derrors"
"golang.org/x/pkgsite/internal/log"
"golang.org/x/pkgsite/internal/version"
)
func insertSymbols(ctx context.Context, tx *database.DB, modulePath, v string,
isLatest bool,
pathToID map[string]int,
pathToUnitID map[string]int,
pathToDocs map[string][]*internal.Documentation) (err error) {
defer derrors.WrapStack(&err, "insertSymbols(ctx, db, %q, %q, pathToID, pathToDocs)", modulePath, v)
// Only update symbol history if the version type is release.
versionType, err := version.ParseType(v)
if err != nil {
return err
}
if versionType != version.TypeRelease && !isLatest {
return nil
}
modulePathID := pathToID[modulePath]
if modulePathID == 0 {
return fmt.Errorf("modulePathID cannot be 0: %q", modulePath)
}
pathToDocIDToDoc, err := getDocIDsForPath(ctx, tx, pathToUnitID, pathToDocs)
if err != nil {
return err
}
nameToID, err := upsertSymbolNamesReturningIDs(ctx, tx, pathToDocIDToDoc)
if err != nil {
return err
}
pathToPkgsymToID, err := upsertPackageSymbolsReturningIDs(ctx, tx, modulePathID, pathToID, nameToID, pathToDocIDToDoc)
if err != nil {
return err
}
if err := upsertDocumentationSymbols(ctx, tx, pathToPkgsymToID, pathToDocIDToDoc); err != nil {
return err
}
if versionType == version.TypeRelease {
if err := upsertSymbolHistory(ctx, tx, modulePath, v, nameToID,
pathToID, pathToPkgsymToID, pathToDocIDToDoc); err != nil {
return err
}
}
if isLatest {
return deleteOldSymbolSearchDocuments(ctx, tx, modulePathID, pathToID, pathToDocIDToDoc, pathToPkgsymToID)
}
return nil
}
type packageSymbol struct {
name string
synopsis string
// parentName is a unique key in packageSymbol because the section can change
// with the name and synopsis remaining the same. For example:
// https://pkg.go.dev/go/types@go1.8#Universe is in the Variables section,
// with parentName Universe.
// https://pkg.go.dev/go/types@go1.16#Universe is in the Types section,
// with parentName Scope.
//
// https://pkg.go.dev/github.com/89z/page@v1.2.1#Help is in the Types
// section under type InputMode.
// https://pkg.go.dev/github.com/89z/page@v1.1.3#Help is in the Types
// section under ScreenMode.
parentName string
}
func upsertDocumentationSymbols(ctx context.Context, db *database.DB,
pathToPkgsymID map[string]map[packageSymbol]int,
pathToDocIDToDoc map[string]map[int]*internal.Documentation) (err error) {
defer derrors.WrapStack(&err, "upsertDocumentationSymbols(ctx, db, pathToPkgsymID, pathToDocIDToDoc)")
// Create a map of documentation_id TO package_symbol_id set.
// This will be used to verify that all package_symbols for the unit have
// been inserted.
docIDToPkgsymIDs := map[int]map[int]bool{}
for path, docIDToDoc := range pathToDocIDToDoc {
for docID, doc := range docIDToDoc {
err := updateSymbols(doc.API, func(sm *internal.SymbolMeta) error {
pkgsymToID, ok := pathToPkgsymID[path]
if !ok {
return fmt.Errorf("path could not be found: %q", path)
}
ps := packageSymbol{synopsis: sm.Synopsis, name: sm.Name, parentName: sm.ParentName}
pkgsymID, ok := pkgsymToID[ps]
if !ok {
return fmt.Errorf("package symbol could not be found: %v", ps)
}
_, ok = docIDToPkgsymIDs[docID]
if !ok {
docIDToPkgsymIDs[docID] = map[int]bool{}
}
docIDToPkgsymIDs[docID][pkgsymID] = true
return nil
})
if err != nil {
return err
}
}
}
// Fetch all existing rows in documentation_symbols for this unit using the
// documentation IDs.
// Keep track of which rows already exist in documentation_symbols using
// gotDocIDToPkgsymIDs.
var documentationIDs []any
for docID := range docIDToPkgsymIDs {
documentationIDs = append(documentationIDs, docID)
}
gotDocIDToPkgsymIDs := map[int]map[int]bool{}
collect := func(rows *sql.Rows) error {
var id, docID, pkgsymID int
if err := rows.Scan(&id, &docID, &pkgsymID); err != nil {
return fmt.Errorf("row.Scan(): %v", err)
}
if !docIDToPkgsymIDs[docID][pkgsymID] {
// The package_symbol_id in the documentation_symbols table does
// not match the one we want to insert. This can happen if we
// change the package_symbol_id. In that case, do not add this to
// the map, so that we can upsert below.
//
// See https://go-review.googlesource.com/c/pkgsite/+/315309
// and https://go-review.googlesource.com/c/pkgsite/+/315310
// where the package_symbol_id was potentially changed.
return nil
}
if _, ok := gotDocIDToPkgsymIDs[docID]; !ok {
gotDocIDToPkgsymIDs[docID] = map[int]bool{}
}
gotDocIDToPkgsymIDs[docID][pkgsymID] = true
return nil
}
if err := db.RunQuery(ctx, `
SELECT
ds.id,
ds.documentation_id,
ds.package_symbol_id
FROM documentation_symbols ds
WHERE documentation_id = ANY($1);`, collect, pq.Array(documentationIDs)); err != nil {
return err
}
// Get the difference between the documentation_symbols for this package,
// and the ones that already exist in the documentation_symbols table. Only
// insert rows that do not already exist.
//
// Sort first to prevent deadlocks.
var docIDs []int
for docID := range docIDToPkgsymIDs {
docIDs = append(docIDs, docID)
}
sort.Ints(docIDs)
var values []any
for _, docID := range docIDs {
gotSet := gotDocIDToPkgsymIDs[docID]
for pkgsymID := range docIDToPkgsymIDs[docID] {
if !gotSet[pkgsymID] {
values = append(values, docID, pkgsymID)
}
}
}
// Upsert the rows.
// Note that the order of pkgsymcols must match that of the SELECT query in
// the collect function.
docsymcols := []string{"documentation_id", "package_symbol_id"}
if err := db.BulkInsert(ctx, "documentation_symbols", docsymcols,
values, `
ON CONFLICT (documentation_id, package_symbol_id)
DO UPDATE SET
documentation_id=excluded.documentation_id,
package_symbol_id=excluded.package_symbol_id`); err != nil {
return err
}
return nil
}
func upsertPackageSymbolsReturningIDs(ctx context.Context, db *database.DB,
modulePathID int,
pathToID map[string]int,
nameToID map[string]int,
pathToDocIDToDoc map[string]map[int]*internal.Documentation) (_ map[string]map[packageSymbol]int, err error) {
defer derrors.WrapStack(&err, "upsertPackageSymbolsReturningIDs(ctx, db, %d, pathToID, pathToDocIDToDoc)", modulePathID)
idToPath := map[int]string{}
for path, id := range pathToID {
idToPath[id] = path
}
var names []string
idToSymbolName := map[int]string{}
for name, id := range nameToID {
idToSymbolName[id] = name
names = append(names, name)
}
pathTopkgsymToID := map[string]map[packageSymbol]int{}
collect := func(rows *sql.Rows) error {
var (
id, pathID, symbolID, parentSymbolID int
synopsis string
)
if err := rows.Scan(&id, &pathID, &symbolID, &parentSymbolID, &synopsis); err != nil {
return fmt.Errorf("row.Scan(): %v", err)
}
path := idToPath[pathID]
if _, ok := pathTopkgsymToID[path]; !ok {
pathTopkgsymToID[path] = map[packageSymbol]int{}
}
sym := idToSymbolName[symbolID]
if sym == "" {
return fmt.Errorf("symbol name cannot be empty: %d", symbolID)
}
parentSym, ok := idToSymbolName[parentSymbolID]
if !ok {
// A different variable of this symbol was previously inserted.
// Don't add this to pathTopkgsymToID, since it's not the package
// symbol that we want.
// For example:
// https://dev-pkg.go.dev/github.com/fastly/kingpin@v1.2.6#TokenShort
// and
// https://pkg.go.dev/github.com/fastly/kingpin@v1.3.7#TokenShort
// have the same synopsis, but different parents and sections.
return nil
}
pathTopkgsymToID[path][packageSymbol{
synopsis: synopsis,
name: sym,
parentName: parentSym,
}] = id
return nil
}
// This query fetches more that just the package symbols that we want.
// The relevant package symbols are filtered above.
if err := db.RunQuery(ctx, `
SELECT
ps.id,
ps.package_path_id,
ps.symbol_name_id,
ps.parent_symbol_name_id,
ps.synopsis
FROM package_symbols ps
INNER JOIN symbol_names s ON ps.symbol_name_id = s.id
WHERE module_path_id = $1 AND s.name = ANY($2);`, collect, modulePathID, pq.Array(names)); err != nil {
return nil, err
}
// Sort to prevent deadlocks.
var paths []string
for path := range pathToDocIDToDoc {
paths = append(paths, path)
}
sort.Strings(paths)
var packageSymbols []any
for _, path := range paths {
docs := pathToDocIDToDoc[path]
pathID := pathToID[path]
if pathID == 0 {
return nil, fmt.Errorf("pathID cannot be 0: %q", path)
}
for _, doc := range docs {
// Sort to prevent deadlocks.
sort.Slice(doc.API, func(i, j int) bool {
return doc.API[i].Name < doc.API[j].Name
})
if err := updateSymbols(doc.API, func(sm *internal.SymbolMeta) error {
ps := packageSymbol{synopsis: sm.Synopsis, name: sm.Name, parentName: sm.ParentName}
symID := nameToID[sm.Name]
if symID == 0 {
return fmt.Errorf("symID cannot be 0: %q", sm.Name)
}
if sm.ParentName == "" {
sm.ParentName = sm.Name
}
parentID := nameToID[sm.ParentName]
if parentID == 0 {
return fmt.Errorf("parentSymID cannot be 0: %q", sm.ParentName)
}
if _, ok := pathTopkgsymToID[path][ps]; !ok {
packageSymbols = append(packageSymbols, pathID,
modulePathID, symID, parentID, sm.Section, sm.Kind,
sm.Synopsis)
}
return nil
}); err != nil {
return nil, err
}
}
}
// The order of pkgsymcols must match that of the SELECT query in the
//collect function.
pkgsymcols := []string{"id", "package_path_id", "symbol_name_id", "parent_symbol_name_id", "synopsis"}
if err := db.BulkInsertReturning(ctx, "package_symbols",
[]string{
"package_path_id",
"module_path_id",
"symbol_name_id",
"parent_symbol_name_id",
"section",
"type",
"synopsis",
}, packageSymbols, database.OnConflictDoNothing, pkgsymcols, collect); err != nil {
return nil, err
}
return pathTopkgsymToID, nil
}
func upsertSymbolNamesReturningIDs(ctx context.Context, db *database.DB,
pathToDocIDToDocs map[string]map[int]*internal.Documentation) (_ map[string]int, err error) {
defer derrors.WrapStack(&err, "upsertSymbolNamesReturningIDs")
var names []string
for _, docIDToDocs := range pathToDocIDToDocs {
for _, doc := range docIDToDocs {
if err := updateSymbols(doc.API, func(sm *internal.SymbolMeta) error {
names = append(names, sm.Name)
return nil
}); err != nil {
return nil, err
}
}
}
sort.Strings(names)
nameToID := map[string]int{}
collect := func(rows *sql.Rows) error {
var (
id int
name string
)
if err := rows.Scan(&id, &name); err != nil {
return fmt.Errorf("row.Scan(): %v", err)
}
nameToID[name] = id
if id == 0 {
return fmt.Errorf("id can't be 0: %q", name)
}
return nil
}
query := `
SELECT id, name
FROM symbol_names
WHERE name = ANY($1);`
if err := db.RunQuery(ctx, query, collect, pq.Array(names)); err != nil {
return nil, err
}
var values []any
for _, name := range names {
if _, ok := nameToID[name]; !ok {
values = append(values, name)
}
}
if err := db.BulkInsertReturning(ctx, "symbol_names", []string{"name"},
values, database.OnConflictDoNothing, []string{"id", "name"}, collect); err != nil {
return nil, err
}
return nameToID, nil
}
func updateSymbols(symbols []*internal.Symbol, updateFunc func(sm *internal.SymbolMeta) error) error {
for _, s := range symbols {
if err := updateFunc(&s.SymbolMeta); err != nil {
return err
}
for _, c := range s.Children {
if err := updateFunc(c); err != nil {
return err
}
}
}
return nil
}
func deleteOldSymbolSearchDocuments(ctx context.Context, db *database.DB,
modulePathID int,
pathToID map[string]int,
pathToDocIDToDoc map[string]map[int]*internal.Documentation,
latestPathToPkgsymToID map[string]map[packageSymbol]int) (err error) {
defer derrors.WrapStack(&err, "deleteOldSymbolSearchDocuments(ctx, db, %q, pathToID, pathToDocIDToDoc)", modulePathID)
// Get all package_symbol_ids for the latest module (the current one we are
// trying to insert).
latestPkgsymIDs := map[int]bool{}
for path := range pathToID {
docs := pathToDocIDToDoc[path]
pathID := pathToID[path]
if pathID == 0 {
return fmt.Errorf("pathID cannot be 0: %q", path)
}
for _, doc := range docs {
err := updateSymbols(doc.API, func(sm *internal.SymbolMeta) error {
pkgsymToID, ok := latestPathToPkgsymToID[path]
if !ok {
return fmt.Errorf("path could not be found: %q", path)
}
ps := packageSymbol{synopsis: sm.Synopsis, name: sm.Name, parentName: sm.ParentName}
pkgsymID, ok := pkgsymToID[ps]
if !ok {
return fmt.Errorf("package symbol could not be found: %v", ps)
}
latestPkgsymIDs[pkgsymID] = true
return nil
})
if err != nil {
return err
}
}
}
var pathIDs []int
for _, id := range pathToID {
pathIDs = append(pathIDs, id)
}
// Fetch package_symbol_id currently in symbol_search_documents.
dbPkgSymIDs, err := database.Collect1[int](ctx, db, `
SELECT package_symbol_id
FROM symbol_search_documents
WHERE package_path_id = ANY($1);`,
pq.Array(pathIDs))
if err != nil {
return err
}
var toDelete []int
for _, id := range dbPkgSymIDs {
if _, ok := latestPkgsymIDs[id]; !ok {
toDelete = append(toDelete, id)
}
}
// Delete stale rows.
q, args, err := squirrel.Delete("symbol_search_documents").
Where("package_symbol_id = ANY(?)", pq.Array(toDelete)).
PlaceholderFormat(squirrel.Dollar).ToSql()
if err != nil {
return err
}
n, err := db.Exec(ctx, q, args...)
if err != nil {
return err
}
log.Infof(ctx, "deleted %d rows from symbol_search_documents", n)
return nil
}