internal/postgres: keep only one module major version in search grouping

When grouping search results by module, keep only the highest-scoring
package from the highest tagged major version.

Previously, we kept all major versions, and only ranked the higher one
first, so as not to lose information (like import counts) for
higher-scoring packages in a lower major version. But the results are
unexpected.

Fixes golang/go#47839

Change-Id: I66dedec470dd1588f02dcfed5604a6e63f020d62
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/345611
Trust: Jonathan Amsterdam <jba@google.com>
Run-TryBot: Jonathan Amsterdam <jba@google.com>
TryBot-Result: kokoro <noreply+kokoro@google.com>
Reviewed-by: Julie Qiu <julie@golang.org>
diff --git a/internal/postgres/search.go b/internal/postgres/search.go
index 7a08708..670792c 100644
--- a/internal/postgres/search.go
+++ b/internal/postgres/search.go
@@ -27,6 +27,7 @@
 	"golang.org/x/pkgsite/internal/log"
 	"golang.org/x/pkgsite/internal/postgres/symbolsearch"
 	"golang.org/x/pkgsite/internal/stdlib"
+	"golang.org/x/pkgsite/internal/version"
 )
 
 var (
@@ -537,51 +538,48 @@
 // The second and later packages from a module are grouped under the first package,
 // and removed from the top-level list.
 //
-// Higher major versions of a module are put before lower ones.
-//
-// Packages from lower major versions of the module are grouped under the first
-// package of the highest major version. But they are not removed from the
-// top-level list.
+// Higher tagged major versions of a module replace lower ones.
 func groupSearchResults(rs []*SearchResult) []*SearchResult {
-	modules := map[string]*SearchResult{} // module path to first result
-	series := map[string]*SearchResult{}  // series path to result with max major version
-	var results []*SearchResult
+	bestInSeries := map[string]*SearchResult{} // series path to result with max major version
+	// Since rs is sorted by score, the first package we see for a series is the
+	// highest-ranked one. However, we may prefer to show a lower-ranked one from a
+	// module in the same series with a higher major version.
 	for _, r := range rs {
-		f := modules[r.ModulePath]
-		if f == nil {
-			// First result (package) with this module path; remember it and
-			// keep it.
-			modules[r.ModulePath] = r
-			results = append(results, r)
-		} else {
-			// Record this result under the first result.
-			f.SameModule = append(f.SameModule, r)
-		}
-
-		seriesPath, vr := internal.SeriesPathAndMajorVersion(r.ModulePath)
-		f = series[seriesPath]
-		if f == nil {
-			// First time we've seen anything from this series: remember it.
+		seriesPath, rMajor := internal.SeriesPathAndMajorVersion(r.ModulePath)
+		b := bestInSeries[seriesPath]
+		if b == nil {
+			// First result (package) with this series path; remember it.
+			bestInSeries[seriesPath] = r
 			r.OtherMajor = map[string]bool{}
-			series[seriesPath] = r
-		} else if r.ModulePath != f.ModulePath {
-			// Result is from a different major version.
-			// Record the larger one, and give it a higher score.
-			_, vf := internal.SeriesPathAndMajorVersion(f.ModulePath)
-			if vr > vf {
-				series[seriesPath] = r
-				r.OtherMajor = f.OtherMajor
-				f.OtherMajor = nil
-				r.OtherMajor[f.ModulePath] = true
-				if f.Score > r.Score {
-					r.Score = f.Score + 1e-5
-				}
-			} else {
-				f.OtherMajor[r.ModulePath] = true
+		} else {
+			_, bMajor := internal.SeriesPathAndMajorVersion(b.ModulePath)
+			switch {
+			case !version.IsPseudo(r.Version) && (rMajor > bMajor || version.IsPseudo(b.Version)):
+				// r is tagged, and is either in a higher major version, or the current best
+				// is not tagged. Either way, prefer r to b.
+				bestInSeries[seriesPath] = r
+				r.OtherMajor = b.OtherMajor
+				r.OtherMajor[b.ModulePath] = true
+				r.Score = b.Score // inherit the lower major version's higher score
+			case rMajor == bMajor:
+				// r is another package from the module of b; remember it there.
+				b.SameModule = append(b.SameModule, r)
+
+			default:
+				// A package from a different major version (either lower, or
+				// higher untagged). Remember the major version.
+				b.OtherMajor[r.ModulePath] = true
 			}
 		}
 	}
-	// Re-sort by score, since we may have changed some.
+	// Collect new results and re-sort by score.
+	var results []*SearchResult
+	for _, r := range bestInSeries {
+		if len(r.OtherMajor) == 0 {
+			r.OtherMajor = nil
+		}
+		results = append(results, r)
+	}
 	sort.Slice(results, func(i, j int) bool {
 		return results[i].Score > results[j].Score
 	})
diff --git a/internal/postgres/search_test.go b/internal/postgres/search_test.go
index 785de58..e5edca6 100644
--- a/internal/postgres/search_test.go
+++ b/internal/postgres/search_test.go
@@ -1353,44 +1353,97 @@
 }
 
 func TestGroupSearchResults(t *testing.T) {
-	rs := []*SearchResult{
-		{PackagePath: "m.com/p", ModulePath: "m.com", Score: 10},
-		{PackagePath: "m.com/p2", ModulePath: "m.com", Score: 8},
-		{PackagePath: "a.com/p", ModulePath: "a.com", Score: 7},
-		{PackagePath: "m.com/v2/p", ModulePath: "m.com/v2", Score: 6},
-		{PackagePath: "m.com/v2/p2", ModulePath: "m.com/v2", Score: 4},
+	set := func(els ...string) map[string]bool {
+		s := map[string]bool{}
+		for _, e := range els {
+			s[e] = true
+		}
+		return s
 	}
-	got := groupSearchResults(rs)
-	sp2 := &SearchResult{
-		PackagePath: "m.com/p2",
-		ModulePath:  "m.com",
-		Score:       8,
-	}
-	sp := &SearchResult{
-		PackagePath: "m.com/p",
-		ModulePath:  "m.com",
-		Score:       10,
-		SameModule:  []*SearchResult{sp2},
-	}
-	want := []*SearchResult{
+
+	for _, test := range []struct {
+		name     string
+		in, want []*SearchResult
+	}{
 		{
-			PackagePath: "m.com/v2/p",
-			ModulePath:  "m.com/v2",
-			Score:       10.00001,
-			SameModule: []*SearchResult{
-				{PackagePath: "m.com/v2/p2", ModulePath: "m.com/v2", Score: 4},
+			name: "simple",
+			in: []*SearchResult{
+				{Name: "m1", ModulePath: "m1", Score: 10},
+				{Name: "m2", ModulePath: "m2", Score: 9},
 			},
-			OtherMajor: map[string]bool{"m.com": true},
+			want: []*SearchResult{
+				{Name: "m1", ModulePath: "m1", Score: 10},
+				{Name: "m2", ModulePath: "m2", Score: 9},
+			},
 		},
-		sp,
 		{
-			PackagePath: "a.com/p",
-			ModulePath:  "a.com",
-			Score:       7,
-			OtherMajor:  map[string]bool{},
+			name: "higher major first",
+			in: []*SearchResult{
+				{Name: "m1", ModulePath: "m1/v2", Score: 10},
+				{Name: "m2", ModulePath: "m2", Score: 9},
+				{Name: "m", ModulePath: "m1", Score: 8},
+			},
+			want: []*SearchResult{
+				{Name: "m1", ModulePath: "m1/v2", Score: 10, OtherMajor: set("m1")},
+				{Name: "m2", ModulePath: "m2", Score: 9},
+			},
 		},
-	}
-	if diff := cmp.Diff(want, got); diff != "" {
-		t.Errorf("mismatch (-want, +got)\n%s", diff)
+		{
+			name: "lower major first",
+			in: []*SearchResult{
+				{Name: "m1", ModulePath: "m1", Score: 10},
+				{Name: "m2", ModulePath: "m2", Score: 9},
+				{Name: "m12", ModulePath: "m1/v2", Score: 8},
+			},
+			want: []*SearchResult{
+				{Name: "m12", ModulePath: "m1/v2", Score: 10, OtherMajor: set("m1")},
+				{Name: "m2", ModulePath: "m2", Score: 9},
+			},
+		},
+		{
+			name: "multiple majors",
+			in: []*SearchResult{
+				{Name: "m1", ModulePath: "m1", Score: 10},
+				{Name: "m23", ModulePath: "m2/v3", Score: 9},
+				{Name: "m12", ModulePath: "m1/v2", Score: 8},
+				{Name: "m22", ModulePath: "m2/v2", Score: 7},
+				{Name: "m13", ModulePath: "m1/v3", Score: 6},
+				{Name: "m2", ModulePath: "m2", Score: 5},
+			},
+			want: []*SearchResult{
+				{Name: "m13", ModulePath: "m1/v3", Score: 10, OtherMajor: set("m1", "m1/v2")},
+				{Name: "m23", ModulePath: "m2/v3", Score: 9, OtherMajor: set("m2", "m2/v2")},
+			},
+		},
+		{
+			name: "higher untagged",
+			in: []*SearchResult{
+				{Name: "m1", ModulePath: "m1", Score: 10},
+				{Name: "m2", ModulePath: "m2", Score: 9},
+				{Name: "m12", ModulePath: "m1/v2", Version: "v0.0.0-12345678901234-A", Score: 8},
+			},
+			want: []*SearchResult{
+				{Name: "m1", ModulePath: "m1", Score: 10, OtherMajor: set("m1/v2")},
+				{Name: "m2", ModulePath: "m2", Score: 9},
+			},
+		},
+		{
+			name: "lowest tagged",
+			in: []*SearchResult{
+				{Name: "m13", ModulePath: "m1/v3", Version: "v0.0.0-12345678901234-A", Score: 10},
+				{Name: "m12", ModulePath: "m1/v2", Version: "v0.0.0-12345678901234-B", Score: 9},
+				{Name: "m1", ModulePath: "m1", Version: "v0.0.0", Score: 8},
+			},
+			want: []*SearchResult{
+				{Name: "m1", ModulePath: "m1", Version: "v0.0.0", Score: 10, OtherMajor: set("m1/v2", "m1/v3")},
+			},
+		},
+	} {
+		t.Run(test.name, func(t *testing.T) {
+			got := groupSearchResults(test.in)
+			if diff := cmp.Diff(test.want, got); diff != "" {
+				t.Errorf("mismatch (-want, +got)\n%s", diff)
+			}
+		})
 	}
 }