godev/cmd/worker: lift bucket normalization to the caller of partition

Address a TODO by simplifying the factoring of normalizeCounterName,
letting the caller pass in the bucket normalization func.

Change-Id: I1f053cb291fec1a3becda9bc916c138f25df28ef
Reviewed-on: https://go-review.googlesource.com/c/telemetry/+/613077
Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
diff --git a/godev/cmd/worker/main.go b/godev/cmd/worker/main.go
index aebc41e..473ecd6 100644
--- a/godev/cmd/worker/main.go
+++ b/godev/cmd/worker/main.go
@@ -401,12 +401,21 @@
 		var charts []*chart
 		program := programName(p.Name)
 		if !telemetry.IsToolchainProgram(p.Name) {
-			charts = append(charts, d.partition(program, "Version", toSliceOf[bucketName](p.Versions), compareSemver))
+			charts = append(charts, d.partition(program, "Version", toSliceOf[bucketName](p.Versions), func(b bucketName) bucketName {
+				if b == "devel" {
+					return b
+				}
+				// map v1.2.3 -> v1.2
+				return bucketName(semver.MajorMinor(string(b)))
+			}, compareSemver))
 		}
 		charts = append(charts,
-			d.partition(program, "GOOS", toSliceOf[bucketName](cfg.GOOS), nil),
-			d.partition(program, "GOARCH", toSliceOf[bucketName](cfg.GOARCH), nil),
-			d.partition(program, "GoVersion", toSliceOf[bucketName](cfg.GoVersion), version.Compare))
+			d.partition(program, "GOOS", toSliceOf[bucketName](cfg.GOOS), nil, nil),
+			d.partition(program, "GOARCH", toSliceOf[bucketName](cfg.GOARCH), nil, nil),
+			d.partition(program, "GoVersion", toSliceOf[bucketName](cfg.GoVersion), func(b bucketName) bucketName {
+				// map go1.2.3 -> go1.2
+				return bucketName(goMajorMinor(string(b)))
+			}, version.Compare))
 		for _, c := range p.Counters {
 			// TODO: add support for histogram counters by getting the counter type
 			// from the chart config.
@@ -416,7 +425,7 @@
 				_, bucket := splitCounterName(counter)
 				buckets = append(buckets, bucket)
 			}
-			charts = append(charts, d.partition(program, chart, buckets, nil))
+			charts = append(charts, d.partition(program, chart, buckets, nil, nil))
 		}
 		for _, p := range charts {
 			if p != nil {
@@ -460,10 +469,13 @@
 // partition builds a chart for the program and the counter. It can return nil
 // if there is no data for the counter in d.
 //
-// if compareBuckets is provided, it is used to sort the buckets, where
+// If normalizeBuckets is provided, it is used to map bucket names to new
+// values. Buckets that map to the same value will be merged.
+//
+// If compareBuckets is provided, it is used to sort the buckets, where
 // compareBuckets returns -1, 0, or +1 if x < y, x == y, or x > y.
 // Otherwise, buckets are sorted lexically.
-func (d data) partition(program programName, chartName graphName, buckets []bucketName, compareBuckets func(x, y string) int) *chart {
+func (d data) partition(program programName, chartName graphName, buckets []bucketName, normalizeBucket func(bucketName) bucketName, compareBuckets func(x, y string) int) *chart {
 	chart := &chart{
 		ID:   fmt.Sprintf("charts:%s:%s", program, chartName),
 		Name: string(chartName),
@@ -488,14 +500,16 @@
 				continue
 			}
 			seen[bucket] = true
-			// TODO(hyangah): let caller normalize names in counters.
-			normal := normalizeCounterName(chartName, bucket)
-			if _, ok := merged[normal]; !ok {
-				merged[normal] = make(map[reportID]struct{})
+			key := bucket
+			if normalizeBucket != nil {
+				key = normalizeBucket(bucket)
+			}
+			if _, ok := merged[key]; !ok {
+				merged[key] = make(map[reportID]struct{})
 			}
 			for id := range d[wk][pk][chartName][bucket] {
 				empty = false
-				merged[normal][id] = struct{}{}
+				merged[key][id] = struct{}{}
 			}
 		}
 	}
@@ -611,30 +625,6 @@
 	d[week][program][chart][bucket][id] = value
 }
 
-// normalizeCounterName normalizes the counter name.
-// More specifically, program version, goos, goarch, and goVersion
-// are not a real counter, but information from the metadata in the report.
-// This function constructs pseudo counter names to handle them
-// like other normal counters in aggregation and chart drawing.
-// To limit the cardinality of version and goVersion, this function
-// uses only major and minor version numbers in the pseudo-counter names.
-// If the counter is a normal counter name, it is returned as is.
-func normalizeCounterName(chart graphName, bucket bucketName) bucketName {
-	switch chart {
-	case versionCounter:
-		if bucket == "devel" {
-			return bucket
-		}
-		if strings.HasPrefix(string(bucket), "go") {
-			return bucketName(goMajorMinor(string(bucket)))
-		}
-		return bucketName(semver.MajorMinor(string(bucket)))
-	case goversionCounter:
-		return bucketName(goMajorMinor(string(bucket)))
-	}
-	return bucket
-}
-
 // splitCounterName gets splits the prefix and bucket splitCounterName of a counter name
 // or a bucket name. For an input with no bucket part prefix and bucket
 // are the same.
diff --git a/godev/cmd/worker/main_test.go b/godev/cmd/worker/main_test.go
index 4ee1bde..64b0687 100644
--- a/godev/cmd/worker/main_test.go
+++ b/godev/cmd/worker/main_test.go
@@ -10,6 +10,7 @@
 	"time"
 
 	"github.com/google/go-cmp/cmp"
+	"golang.org/x/mod/semver"
 	"golang.org/x/telemetry/internal/config"
 	"golang.org/x/telemetry/internal/telemetry"
 )
@@ -221,6 +222,12 @@
 }
 
 func TestPartition(t *testing.T) {
+	normalVersion := func(b bucketName) bucketName {
+		return bucketName(semver.MajorMinor(string(b)))
+	}
+	normalGoVersion := func(b bucketName) bucketName {
+		return bucketName(goMajorMinor(string(b)))
+	}
 	exampleData := group(exampleReports)
 	type args struct {
 		program programName
@@ -228,10 +235,11 @@
 		buckets []bucketName
 	}
 	tests := []struct {
-		name string
-		data data
-		args args
-		want *chart
+		name      string
+		data      data
+		args      args
+		normalize func(bucketName) bucketName
+		want      *chart
 	}{
 		{
 			name: "major.minor.patch version counter",
@@ -241,6 +249,7 @@
 				name:    "Version",
 				buckets: []bucketName{"v1.2.3", "v2.3.4"},
 			},
+			normalize: normalVersion,
 			want: &chart{
 				ID:   "charts:example.com/mod/pkg:Version",
 				Name: "Version",
@@ -254,7 +263,7 @@
 					{
 						Week:  "2999-01-01",
 						Key:   "v2.3",
-						Value: 2, // TODO(rfindley): why isn't this '2'? There are two reports in the data.
+						Value: 2,
 					},
 				},
 			},
@@ -267,6 +276,7 @@
 				name:    "Version",
 				buckets: []bucketName{"v1.2.3", "v2.3.4"},
 			},
+			normalize: normalVersion,
 			want: &chart{
 				ID:   "charts:example.com/mod/pkg:Version",
 				Name: "Version",
@@ -293,6 +303,7 @@
 				name:    "Version",
 				buckets: []bucketName{"v1.2.3", "v2.3.4", "v1.2.3"},
 			},
+			normalize: normalVersion,
 			want: &chart{
 				ID:   "charts:example.com/mod/pkg:Version",
 				Name: "Version",
@@ -338,6 +349,33 @@
 			},
 		},
 		{
+			name: "GoVersion counter",
+			data: exampleData,
+			args: args{
+				program: "example.com/mod/pkg",
+				name:    "GoVersion",
+				buckets: []bucketName{"go1.2.3", "go2.3.4"},
+			},
+			normalize: normalGoVersion,
+			want: &chart{
+				ID:   "charts:example.com/mod/pkg:GoVersion",
+				Name: "GoVersion",
+				Type: "partition",
+				Data: []*datum{
+					{
+						Week:  "2999-01-01",
+						Key:   "go1.2",
+						Value: 3,
+					},
+					{
+						Week:  "2999-01-01",
+						Key:   "go2.3",
+						Value: 0,
+					},
+				},
+			},
+		},
+		{
 			name: "three days, multiple versions",
 			data: data{
 				"2999-01-01": {"example.com/mod/pkg": {"Version": {
@@ -359,6 +397,7 @@
 				name:    "Version",
 				buckets: []bucketName{"v1.2.3", "v2.3.4"},
 			},
+			normalize: normalVersion,
 			want: &chart{
 				ID:   "charts:example.com/mod/pkg:Version",
 				Name: "Version",
@@ -478,12 +517,13 @@
 				name:    "Version",
 				buckets: []bucketName{"v1.2.3", "v2.3.4"},
 			},
-			want: nil,
+			normalize: normalVersion,
+			want:      nil,
 		},
 	}
 	for _, tc := range tests {
 		t.Run(tc.name, func(t *testing.T) {
-			got := tc.data.partition(tc.args.program, tc.args.name, tc.args.buckets, nil)
+			got := tc.data.partition(tc.args.program, tc.args.name, tc.args.buckets, tc.normalize, nil)
 			if diff := cmp.Diff(tc.want, got); diff != "" {
 				t.Errorf("partition() mismatch (-want +got):\n%s", diff)
 			}
@@ -628,61 +668,6 @@
 	}
 }
 
-func TestNormalizeCounterName(t *testing.T) {
-	testcases := []struct {
-		name   string
-		chart  graphName
-		bucket bucketName
-		want   bucketName
-	}{
-		{
-			name:   "strip patch version for Version",
-			chart:  "Version",
-			bucket: "v0.15.3",
-			want:   "v0.15",
-		},
-		{
-			name:   "strip patch go version for Version",
-			chart:  "Version",
-			bucket: "go1.12.3",
-			want:   "go1.12",
-		},
-		{
-			name:   "concatenate devel for Version",
-			chart:  "Version",
-			bucket: "devel",
-			want:   "devel",
-		},
-		{
-			name:   "concatenate for GOOS",
-			chart:  "GOOS",
-			bucket: "darwin",
-			want:   "darwin",
-		},
-		{
-			name:   "concatenate for GOARCH",
-			chart:  "GOARCH",
-			bucket: "amd64",
-			want:   "amd64",
-		},
-		{
-			name:   "strip patch version for GoVersion",
-			chart:  "GoVersion",
-			bucket: "go1.12.3",
-			want:   "go1.12",
-		},
-	}
-
-	for _, tc := range testcases {
-		t.Run(tc.name, func(t *testing.T) {
-			got := normalizeCounterName(tc.chart, tc.bucket)
-			if tc.want != got {
-				t.Errorf("normalizeCounterName(%q, %q) = %q, want %q", tc.chart, tc.bucket, got, tc.want)
-			}
-		})
-	}
-}
-
 func TestWriteCount(t *testing.T) {
 	type keyValue struct {
 		week    weekName