blob: 9163f41006a4eff03a55ffbfea12aaedd9a64654 [file] [log] [blame]
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.jsonv2
package json
import (
"bytes"
"fmt"
"io"
"testing"
"encoding/json/internal/jsontest"
"encoding/json/jsontext"
)
func TestIntern(t *testing.T) {
var sc stringCache
const alphabet = "abcdefghijklmnopqrstuvwxyz"
for i := range len(alphabet) + 1 {
want := alphabet[i:]
if got := makeString(&sc, []byte(want)); got != want {
t.Fatalf("make = %v, want %v", got, want)
}
}
for i := range 1000 {
want := fmt.Sprintf("test%b", i)
if got := makeString(&sc, []byte(want)); got != want {
t.Fatalf("make = %v, want %v", got, want)
}
}
}
var sink string
func BenchmarkIntern(b *testing.B) {
datasetStrings := func(name string) (out [][]byte) {
var data []byte
for _, ts := range jsontest.Data {
if ts.Name == name {
data = ts.Data()
}
}
dec := jsontext.NewDecoder(bytes.NewReader(data))
for {
k, n := dec.StackIndex(dec.StackDepth())
isObjectName := k == '{' && n%2 == 0
tok, err := dec.ReadToken()
if err != nil {
if err == io.EOF {
break
}
b.Fatalf("ReadToken error: %v", err)
}
if tok.Kind() == '"' && !isObjectName {
out = append(out, []byte(tok.String()))
}
}
return out
}
tests := []struct {
label string
data [][]byte
}{
// Best is the best case scenario where every string is the same.
{"Best", func() (out [][]byte) {
for range 1000 {
out = append(out, []byte("hello, world!"))
}
return out
}()},
// Repeat is a sequence of the same set of names repeated.
// This commonly occurs when unmarshaling a JSON array of JSON objects,
// where the set of all names is usually small.
{"Repeat", func() (out [][]byte) {
for range 100 {
for _, s := range []string{"first_name", "last_name", "age", "address", "street_address", "city", "state", "postal_code", "phone_numbers", "gender"} {
out = append(out, []byte(s))
}
}
return out
}()},
// Synthea is all string values encountered in the Synthea FHIR dataset.
{"Synthea", datasetStrings("SyntheaFhir")},
// Twitter is all string values encountered in the Twitter dataset.
{"Twitter", datasetStrings("TwitterStatus")},
// Worst is the worst case scenario where every string is different
// resulting in wasted time looking up a string that will never match.
{"Worst", func() (out [][]byte) {
for i := range 1000 {
out = append(out, []byte(fmt.Sprintf("%016x", i)))
}
return out
}()},
}
for _, tt := range tests {
b.Run(tt.label, func(b *testing.B) {
// Alloc simply heap allocates each string.
// This provides an upper bound on the number of allocations.
b.Run("Alloc", func(b *testing.B) {
b.ReportAllocs()
for range b.N {
for _, b := range tt.data {
sink = string(b)
}
}
})
// Cache interns strings using stringCache.
// We want to optimize for having a faster runtime than Alloc,
// and also keeping the number of allocations closer to GoMap.
b.Run("Cache", func(b *testing.B) {
b.ReportAllocs()
for range b.N {
var sc stringCache
for _, b := range tt.data {
sink = makeString(&sc, b)
}
}
})
// GoMap interns all strings in a simple Go map.
// This provides a lower bound on the number of allocations.
b.Run("GoMap", func(b *testing.B) {
b.ReportAllocs()
for range b.N {
m := make(map[string]string)
for _, b := range tt.data {
s, ok := m[string(b)]
if !ok {
s = string(b)
m[s] = s
}
sink = s
}
}
})
})
}
}