internal/postgres: on insert, delete only old packages from search

Instead of deleting the entire module from search_documents on insert,
just delete packages that are not in the current version. That will
avoid resetting imported-by counts to zero when we reprocess the
latest version of a module.

For golang/go#47555

Change-Id: Ie2aedc920f0e840c20f4487ed10bf7801b43a3f8
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/341269
Trust: Jonathan Amsterdam <jba@google.com>
Run-TryBot: Jonathan Amsterdam <jba@google.com>
Reviewed-by: Julie Qiu <julie@golang.org>
diff --git a/internal/postgres/insert_module.go b/internal/postgres/insert_module.go
index ea58cd5..ef71c46 100644
--- a/internal/postgres/insert_module.go
+++ b/internal/postgres/insert_module.go
@@ -149,10 +149,7 @@
 			return err
 		}
 
-		// Delete this module from search_documents completely. Below we'll
-		// insert the packages from this module. This will effectively remove
-		// packages from older module versions that are not in the latest one.
-		if err := deleteModuleFromSearchDocuments(ctx, tx, m.ModulePath, nil); err != nil {
+		if err := deleteOtherModulePackagesFromSearchDocuments(ctx, tx, m); err != nil {
 			return err
 		}
 
diff --git a/internal/postgres/search.go b/internal/postgres/search.go
index c3a9e22..56376bb 100644
--- a/internal/postgres/search.go
+++ b/internal/postgres/search.go
@@ -1092,6 +1092,28 @@
 	})
 }
 
+// deleteOtherModulePackagesFromSearchDocuments deletes all packages from search
+// documents with the given module that are not in m.
+func deleteOtherModulePackagesFromSearchDocuments(ctx context.Context, tx *database.DB, m *internal.Module) error {
+	dbPkgs, err := tx.CollectStrings(ctx, `
+		SELECT package_path FROM search_documents WHERE module_path = $1
+	`, m.ModulePath)
+	if err != nil {
+		return err
+	}
+	pkgInModule := map[string]bool{}
+	for _, u := range m.Packages() {
+		pkgInModule[u.Path] = true
+	}
+	var otherPkgs []string
+	for _, p := range dbPkgs {
+		if !pkgInModule[p] {
+			otherPkgs = append(otherPkgs, p)
+		}
+	}
+	return deleteModuleFromSearchDocuments(ctx, tx, m.ModulePath, otherPkgs)
+}
+
 // deleteModuleFromSearchDocuments deletes module_path from search_documents.
 // If packages is non-empty, it only deletes those packages.
 func deleteModuleFromSearchDocuments(ctx context.Context, tx *database.DB, modulePath string, packages []string) error {
diff --git a/internal/postgres/search_test.go b/internal/postgres/search_test.go
index 838ce4e..5383ff1 100644
--- a/internal/postgres/search_test.go
+++ b/internal/postgres/search_test.go
@@ -1341,10 +1341,10 @@
 	const modulePath = "deleteme.com"
 
 	initial := []searchDocumentRow{
-		{modulePath + "/p1", modulePath, "v0.0.9"}, // oldest version of same module
-		{modulePath + "/p2", modulePath, "v1.1.0"}, // older version of same module
-		{modulePath + "/p4", modulePath, "v1.9.0"}, // newer version of same module
-		{"other.org/p2", "other.org", "v1.1.0"},    // older version of a different module
+		{modulePath + "/p1", modulePath, "v0.0.9", 0}, // oldest version of same module
+		{modulePath + "/p2", modulePath, "v1.1.0", 0}, // older version of same module
+		{modulePath + "/p4", modulePath, "v1.9.0", 0}, // newer version of same module
+		{"other.org/p2", "other.org", "v1.1.0", 0},    // older version of a different module
 	}
 
 	insertInitial := func(db *DB) {
@@ -1367,8 +1367,8 @@
 		}
 
 		checkSearchDocuments(ctx, t, testDB, []searchDocumentRow{
-			{modulePath + "/p4", modulePath, "v1.9.0"}, // newer version not deleted
-			{"other.org/p2", "other.org", "v1.1.0"},    // other module not deleted
+			{modulePath + "/p4", modulePath, "v1.9.0", 0}, // newer version not deleted
+			{"other.org/p2", "other.org", "v1.1.0", 0},    // other module not deleted
 		})
 	})
 	t.Run("deleteModuleFromSearchDocuments", func(t *testing.T) {
@@ -1381,18 +1381,43 @@
 			t.Fatal(err)
 		}
 		checkSearchDocuments(ctx, t, testDB, []searchDocumentRow{
-			{"other.org/p2", "other.org", "v1.1.0"}, // other module not deleted
+			{"other.org/p2", "other.org", "v1.1.0", 0}, // other module not deleted
 		})
 	})
+	t.Run("deleteOtherModulePackagesFromSearchDocuments", func(t *testing.T) {
+		testDB, release := acquire(t)
+		defer release()
+
+		// Insert a module with two packages.
+		m0 := sample.Module(modulePath, "v1.0.0", "p1", "p2")
+		MustInsertModule(ctx, t, testDB, m0)
+		// Set the imported-by count to a non-zero value so we can tell which
+		// rows were deleted.
+		if _, err := testDB.db.Exec(ctx, `UPDATE search_documents SET imported_by_count = 1`); err != nil {
+			t.Fatal(err)
+		}
+		// Later version of module does not have p1.
+		m1 := sample.Module(modulePath, "v1.1.0", "p2", "p3")
+		MustInsertModule(ctx, t, testDB, m1)
+
+		// p1 should be gone, p2 should be there with the same
+		// imported_by_count, and p3 should be there with a zero count.
+		want := []searchDocumentRow{
+			{modulePath + "/p2", modulePath, "v1.1.0", 1},
+			{modulePath + "/p3", modulePath, "v1.1.0", 0},
+		}
+		checkSearchDocuments(ctx, t, testDB, want)
+	})
 }
 
 type searchDocumentRow struct {
 	PackagePath, ModulePath, Version string
+	ImportedByCount                  int
 }
 
 func readSearchDocuments(ctx context.Context, db *DB) ([]searchDocumentRow, error) {
 	var rows []searchDocumentRow
-	err := db.db.CollectStructs(ctx, &rows, `SELECT package_path, module_path, version FROM search_documents`)
+	err := db.db.CollectStructs(ctx, &rows, `SELECT package_path, module_path, version, imported_by_count FROM search_documents`)
 	if err != nil {
 		return nil, err
 	}