internal/postgres: on insert, delete only old packages from search
Instead of deleting the entire module from search_documents on insert,
just delete packages that are not in the current version. That will
avoid resetting imported-by counts to zero when we reprocess the
latest version of a module.
For golang/go#47555
Change-Id: Ie2aedc920f0e840c20f4487ed10bf7801b43a3f8
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/341269
Trust: Jonathan Amsterdam <jba@google.com>
Run-TryBot: Jonathan Amsterdam <jba@google.com>
Reviewed-by: Julie Qiu <julie@golang.org>
diff --git a/internal/postgres/insert_module.go b/internal/postgres/insert_module.go
index ea58cd5..ef71c46 100644
--- a/internal/postgres/insert_module.go
+++ b/internal/postgres/insert_module.go
@@ -149,10 +149,7 @@
return err
}
- // Delete this module from search_documents completely. Below we'll
- // insert the packages from this module. This will effectively remove
- // packages from older module versions that are not in the latest one.
- if err := deleteModuleFromSearchDocuments(ctx, tx, m.ModulePath, nil); err != nil {
+ if err := deleteOtherModulePackagesFromSearchDocuments(ctx, tx, m); err != nil {
return err
}
diff --git a/internal/postgres/search.go b/internal/postgres/search.go
index c3a9e22..56376bb 100644
--- a/internal/postgres/search.go
+++ b/internal/postgres/search.go
@@ -1092,6 +1092,28 @@
})
}
+// deleteOtherModulePackagesFromSearchDocuments deletes all packages from search
+// documents with the given module that are not in m.
+func deleteOtherModulePackagesFromSearchDocuments(ctx context.Context, tx *database.DB, m *internal.Module) error {
+ dbPkgs, err := tx.CollectStrings(ctx, `
+ SELECT package_path FROM search_documents WHERE module_path = $1
+ `, m.ModulePath)
+ if err != nil {
+ return err
+ }
+ pkgInModule := map[string]bool{}
+ for _, u := range m.Packages() {
+ pkgInModule[u.Path] = true
+ }
+ var otherPkgs []string
+ for _, p := range dbPkgs {
+ if !pkgInModule[p] {
+ otherPkgs = append(otherPkgs, p)
+ }
+ }
+ return deleteModuleFromSearchDocuments(ctx, tx, m.ModulePath, otherPkgs)
+}
+
// deleteModuleFromSearchDocuments deletes module_path from search_documents.
// If packages is non-empty, it only deletes those packages.
func deleteModuleFromSearchDocuments(ctx context.Context, tx *database.DB, modulePath string, packages []string) error {
diff --git a/internal/postgres/search_test.go b/internal/postgres/search_test.go
index 838ce4e..5383ff1 100644
--- a/internal/postgres/search_test.go
+++ b/internal/postgres/search_test.go
@@ -1341,10 +1341,10 @@
const modulePath = "deleteme.com"
initial := []searchDocumentRow{
- {modulePath + "/p1", modulePath, "v0.0.9"}, // oldest version of same module
- {modulePath + "/p2", modulePath, "v1.1.0"}, // older version of same module
- {modulePath + "/p4", modulePath, "v1.9.0"}, // newer version of same module
- {"other.org/p2", "other.org", "v1.1.0"}, // older version of a different module
+ {modulePath + "/p1", modulePath, "v0.0.9", 0}, // oldest version of same module
+ {modulePath + "/p2", modulePath, "v1.1.0", 0}, // older version of same module
+ {modulePath + "/p4", modulePath, "v1.9.0", 0}, // newer version of same module
+ {"other.org/p2", "other.org", "v1.1.0", 0}, // older version of a different module
}
insertInitial := func(db *DB) {
@@ -1367,8 +1367,8 @@
}
checkSearchDocuments(ctx, t, testDB, []searchDocumentRow{
- {modulePath + "/p4", modulePath, "v1.9.0"}, // newer version not deleted
- {"other.org/p2", "other.org", "v1.1.0"}, // other module not deleted
+ {modulePath + "/p4", modulePath, "v1.9.0", 0}, // newer version not deleted
+ {"other.org/p2", "other.org", "v1.1.0", 0}, // other module not deleted
})
})
t.Run("deleteModuleFromSearchDocuments", func(t *testing.T) {
@@ -1381,18 +1381,43 @@
t.Fatal(err)
}
checkSearchDocuments(ctx, t, testDB, []searchDocumentRow{
- {"other.org/p2", "other.org", "v1.1.0"}, // other module not deleted
+ {"other.org/p2", "other.org", "v1.1.0", 0}, // other module not deleted
})
})
+ t.Run("deleteOtherModulePackagesFromSearchDocuments", func(t *testing.T) {
+ testDB, release := acquire(t)
+ defer release()
+
+ // Insert a module with two packages.
+ m0 := sample.Module(modulePath, "v1.0.0", "p1", "p2")
+ MustInsertModule(ctx, t, testDB, m0)
+ // Set the imported-by count to a non-zero value so we can tell which
+ // rows were deleted.
+ if _, err := testDB.db.Exec(ctx, `UPDATE search_documents SET imported_by_count = 1`); err != nil {
+ t.Fatal(err)
+ }
+ // Later version of module does not have p1.
+ m1 := sample.Module(modulePath, "v1.1.0", "p2", "p3")
+ MustInsertModule(ctx, t, testDB, m1)
+
+ // p1 should be gone, p2 should be there with the same
+ // imported_by_count, and p3 should be there with a zero count.
+ want := []searchDocumentRow{
+ {modulePath + "/p2", modulePath, "v1.1.0", 1},
+ {modulePath + "/p3", modulePath, "v1.1.0", 0},
+ }
+ checkSearchDocuments(ctx, t, testDB, want)
+ })
}
type searchDocumentRow struct {
PackagePath, ModulePath, Version string
+ ImportedByCount int
}
func readSearchDocuments(ctx context.Context, db *DB) ([]searchDocumentRow, error) {
var rows []searchDocumentRow
- err := db.db.CollectStructs(ctx, &rows, `SELECT package_path, module_path, version FROM search_documents`)
+ err := db.db.CollectStructs(ctx, &rows, `SELECT package_path, module_path, version, imported_by_count FROM search_documents`)
if err != nil {
return nil, err
}