Source file src/encoding/json/v2/intern_test.go

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build goexperiment.jsonv2
     6  
     7  package json
     8  
     9  import (
    10  	"bytes"
    11  	"fmt"
    12  	"io"
    13  	"testing"
    14  
    15  	"encoding/json/internal/jsontest"
    16  	"encoding/json/jsontext"
    17  )
    18  
    19  func TestIntern(t *testing.T) {
    20  	var sc stringCache
    21  	const alphabet = "abcdefghijklmnopqrstuvwxyz"
    22  	for i := range len(alphabet) + 1 {
    23  		want := alphabet[i:]
    24  		if got := makeString(&sc, []byte(want)); got != want {
    25  			t.Fatalf("make = %v, want %v", got, want)
    26  		}
    27  	}
    28  	for i := range 1000 {
    29  		want := fmt.Sprintf("test%b", i)
    30  		if got := makeString(&sc, []byte(want)); got != want {
    31  			t.Fatalf("make = %v, want %v", got, want)
    32  		}
    33  	}
    34  }
    35  
    36  var sink string
    37  
    38  func BenchmarkIntern(b *testing.B) {
    39  	datasetStrings := func(name string) (out [][]byte) {
    40  		var data []byte
    41  		for _, ts := range jsontest.Data {
    42  			if ts.Name == name {
    43  				data = ts.Data()
    44  			}
    45  		}
    46  		dec := jsontext.NewDecoder(bytes.NewReader(data))
    47  		for {
    48  			k, n := dec.StackIndex(dec.StackDepth())
    49  			isObjectName := k == '{' && n%2 == 0
    50  			tok, err := dec.ReadToken()
    51  			if err != nil {
    52  				if err == io.EOF {
    53  					break
    54  				}
    55  				b.Fatalf("ReadToken error: %v", err)
    56  			}
    57  			if tok.Kind() == '"' && !isObjectName {
    58  				out = append(out, []byte(tok.String()))
    59  			}
    60  		}
    61  		return out
    62  	}
    63  
    64  	tests := []struct {
    65  		label string
    66  		data  [][]byte
    67  	}{
    68  		// Best is the best case scenario where every string is the same.
    69  		{"Best", func() (out [][]byte) {
    70  			for range 1000 {
    71  				out = append(out, []byte("hello, world!"))
    72  			}
    73  			return out
    74  		}()},
    75  
    76  		// Repeat is a sequence of the same set of names repeated.
    77  		// This commonly occurs when unmarshaling a JSON array of JSON objects,
    78  		// where the set of all names is usually small.
    79  		{"Repeat", func() (out [][]byte) {
    80  			for range 100 {
    81  				for _, s := range []string{"first_name", "last_name", "age", "address", "street_address", "city", "state", "postal_code", "phone_numbers", "gender"} {
    82  					out = append(out, []byte(s))
    83  				}
    84  			}
    85  			return out
    86  		}()},
    87  
    88  		// Synthea is all string values encountered in the Synthea FHIR dataset.
    89  		{"Synthea", datasetStrings("SyntheaFhir")},
    90  
    91  		// Twitter is all string values encountered in the Twitter dataset.
    92  		{"Twitter", datasetStrings("TwitterStatus")},
    93  
    94  		// Worst is the worst case scenario where every string is different
    95  		// resulting in wasted time looking up a string that will never match.
    96  		{"Worst", func() (out [][]byte) {
    97  			for i := range 1000 {
    98  				out = append(out, []byte(fmt.Sprintf("%016x", i)))
    99  			}
   100  			return out
   101  		}()},
   102  	}
   103  
   104  	for _, tt := range tests {
   105  		b.Run(tt.label, func(b *testing.B) {
   106  			// Alloc simply heap allocates each string.
   107  			// This provides an upper bound on the number of allocations.
   108  			b.Run("Alloc", func(b *testing.B) {
   109  				b.ReportAllocs()
   110  				for range b.N {
   111  					for _, b := range tt.data {
   112  						sink = string(b)
   113  					}
   114  				}
   115  			})
   116  			// Cache interns strings using stringCache.
   117  			// We want to optimize for having a faster runtime than Alloc,
   118  			// and also keeping the number of allocations closer to GoMap.
   119  			b.Run("Cache", func(b *testing.B) {
   120  				b.ReportAllocs()
   121  				for range b.N {
   122  					var sc stringCache
   123  					for _, b := range tt.data {
   124  						sink = makeString(&sc, b)
   125  					}
   126  				}
   127  			})
   128  			// GoMap interns all strings in a simple Go map.
   129  			// This provides a lower bound on the number of allocations.
   130  			b.Run("GoMap", func(b *testing.B) {
   131  				b.ReportAllocs()
   132  				for range b.N {
   133  					m := make(map[string]string)
   134  					for _, b := range tt.data {
   135  						s, ok := m[string(b)]
   136  						if !ok {
   137  							s = string(b)
   138  							m[s] = s
   139  						}
   140  						sink = s
   141  					}
   142  				}
   143  			})
   144  		})
   145  	}
   146  }
   147  

View as plain text