Source file src/encoding/json/jsontext/state_test.go

     1  // Copyright 2020 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build goexperiment.jsonv2
     6  
     7  package jsontext
     8  
     9  import (
    10  	"fmt"
    11  	"slices"
    12  	"strings"
    13  	"testing"
    14  	"unicode/utf8"
    15  )
    16  
    17  func TestPointer(t *testing.T) {
    18  	tests := []struct {
    19  		in         Pointer
    20  		wantParent Pointer
    21  		wantLast   string
    22  		wantTokens []string
    23  		wantValid  bool
    24  	}{
    25  		{"", "", "", nil, true},
    26  		{"a", "", "a", []string{"a"}, false},
    27  		{"~", "", "~", []string{"~"}, false},
    28  		{"/a", "", "a", []string{"a"}, true},
    29  		{"/foo/bar", "/foo", "bar", []string{"foo", "bar"}, true},
    30  		{"///", "//", "", []string{"", "", ""}, true},
    31  		{"/~0~1", "", "~/", []string{"~/"}, true},
    32  		{"/\xde\xad\xbe\xef", "", "\xde\xad\xbe\xef", []string{"\xde\xad\xbe\xef"}, false},
    33  	}
    34  	for _, tt := range tests {
    35  		if got := tt.in.Parent(); got != tt.wantParent {
    36  			t.Errorf("Pointer(%q).Parent = %q, want %q", tt.in, got, tt.wantParent)
    37  		}
    38  		if got := tt.in.LastToken(); got != tt.wantLast {
    39  			t.Errorf("Pointer(%q).Last = %q, want %q", tt.in, got, tt.wantLast)
    40  		}
    41  		if strings.HasPrefix(string(tt.in), "/") {
    42  			wantRoundtrip := tt.in
    43  			if !utf8.ValidString(string(wantRoundtrip)) {
    44  				// Replace bytes of invalid UTF-8 with Unicode replacement character.
    45  				wantRoundtrip = Pointer([]rune(wantRoundtrip))
    46  			}
    47  			if got := tt.in.Parent().AppendToken(tt.in.LastToken()); got != wantRoundtrip {
    48  				t.Errorf("Pointer(%q).Parent().AppendToken(LastToken()) = %q, want %q", tt.in, got, tt.in)
    49  			}
    50  			in := tt.in
    51  			for {
    52  				if (in + "x").Contains(tt.in) {
    53  					t.Errorf("Pointer(%q).Contains(%q) = true, want false", in+"x", tt.in)
    54  				}
    55  				if !in.Contains(tt.in) {
    56  					t.Errorf("Pointer(%q).Contains(%q) = false, want true", in, tt.in)
    57  				}
    58  				if in == in.Parent() {
    59  					break
    60  				}
    61  				in = in.Parent()
    62  			}
    63  		}
    64  		if got := slices.Collect(tt.in.Tokens()); !slices.Equal(got, tt.wantTokens) {
    65  			t.Errorf("Pointer(%q).Tokens = %q, want %q", tt.in, got, tt.wantTokens)
    66  		}
    67  		if got := tt.in.IsValid(); got != tt.wantValid {
    68  			t.Errorf("Pointer(%q).IsValid = %v, want %v", tt.in, got, tt.wantValid)
    69  		}
    70  	}
    71  }
    72  
    73  func TestStateMachine(t *testing.T) {
    74  	// To test a state machine, we pass an ordered sequence of operations and
    75  	// check whether the current state is as expected.
    76  	// The operation type is a union type of various possible operations,
    77  	// which either call mutating methods on the state machine or
    78  	// call accessor methods on state machine and verify the results.
    79  	type operation any
    80  	type (
    81  		// stackLengths checks the results of stateEntry.length accessors.
    82  		stackLengths []int64
    83  
    84  		// appendTokens is sequence of token kinds to append where
    85  		// none of them are expected to fail.
    86  		//
    87  		// For example: `[nft]` is equivalent to the following sequence:
    88  		//
    89  		//	pushArray()
    90  		//	appendLiteral()
    91  		//	appendString()
    92  		//	appendNumber()
    93  		//	popArray()
    94  		//
    95  		appendTokens string
    96  
    97  		// appendToken is a single token kind to append with the expected error.
    98  		appendToken struct {
    99  			kind Kind
   100  			want error
   101  		}
   102  
   103  		// needDelim checks the result of the needDelim accessor.
   104  		needDelim struct {
   105  			next Kind
   106  			want byte
   107  		}
   108  	)
   109  
   110  	// Each entry is a sequence of tokens to pass to the state machine.
   111  	tests := []struct {
   112  		label string
   113  		ops   []operation
   114  	}{{
   115  		"TopLevelValues",
   116  		[]operation{
   117  			stackLengths{0},
   118  			needDelim{'n', 0},
   119  			appendTokens(`nft`),
   120  			stackLengths{3},
   121  			needDelim{'"', 0},
   122  			appendTokens(`"0[]{}`),
   123  			stackLengths{7},
   124  		},
   125  	}, {
   126  		"ArrayValues",
   127  		[]operation{
   128  			stackLengths{0},
   129  			needDelim{'[', 0},
   130  			appendTokens(`[`),
   131  			stackLengths{1, 0},
   132  			needDelim{'n', 0},
   133  			appendTokens(`nft`),
   134  			stackLengths{1, 3},
   135  			needDelim{'"', ','},
   136  			appendTokens(`"0[]{}`),
   137  			stackLengths{1, 7},
   138  			needDelim{']', 0},
   139  			appendTokens(`]`),
   140  			stackLengths{1},
   141  		},
   142  	}, {
   143  		"ObjectValues",
   144  		[]operation{
   145  			stackLengths{0},
   146  			needDelim{'{', 0},
   147  			appendTokens(`{`),
   148  			stackLengths{1, 0},
   149  			needDelim{'"', 0},
   150  			appendTokens(`"`),
   151  			stackLengths{1, 1},
   152  			needDelim{'n', ':'},
   153  			appendTokens(`n`),
   154  			stackLengths{1, 2},
   155  			needDelim{'"', ','},
   156  			appendTokens(`"f"t`),
   157  			stackLengths{1, 6},
   158  			appendTokens(`"""0"[]"{}`),
   159  			stackLengths{1, 14},
   160  			needDelim{'}', 0},
   161  			appendTokens(`}`),
   162  			stackLengths{1},
   163  		},
   164  	}, {
   165  		"ObjectCardinality",
   166  		[]operation{
   167  			appendTokens(`{`),
   168  
   169  			// Appending any kind other than string for object name is an error.
   170  			appendToken{'n', ErrNonStringName},
   171  			appendToken{'f', ErrNonStringName},
   172  			appendToken{'t', ErrNonStringName},
   173  			appendToken{'0', ErrNonStringName},
   174  			appendToken{'{', ErrNonStringName},
   175  			appendToken{'[', ErrNonStringName},
   176  			appendTokens(`"`),
   177  
   178  			// Appending '}' without first appending any value is an error.
   179  			appendToken{'}', errMissingValue},
   180  			appendTokens(`"`),
   181  
   182  			appendTokens(`}`),
   183  		},
   184  	}, {
   185  		"MismatchingDelims",
   186  		[]operation{
   187  			appendToken{'}', errMismatchDelim}, // appending '}' without preceding '{'
   188  			appendTokens(`[[{`),
   189  			appendToken{']', errMismatchDelim}, // appending ']' that mismatches preceding '{'
   190  			appendTokens(`}]`),
   191  			appendToken{'}', errMismatchDelim}, // appending '}' that mismatches preceding '['
   192  			appendTokens(`]`),
   193  			appendToken{']', errMismatchDelim}, // appending ']' without preceding '['
   194  		},
   195  	}}
   196  
   197  	for _, tt := range tests {
   198  		t.Run(tt.label, func(t *testing.T) {
   199  			// Flatten appendTokens to sequence of appendToken entries.
   200  			var ops []operation
   201  			for _, op := range tt.ops {
   202  				if toks, ok := op.(appendTokens); ok {
   203  					for _, k := range []byte(toks) {
   204  						ops = append(ops, appendToken{Kind(k), nil})
   205  					}
   206  					continue
   207  				}
   208  				ops = append(ops, op)
   209  			}
   210  
   211  			// Append each token to the state machine and check the output.
   212  			var state stateMachine
   213  			state.reset()
   214  			var sequence []Kind
   215  			for _, op := range ops {
   216  				switch op := op.(type) {
   217  				case stackLengths:
   218  					var got []int64
   219  					for i := range state.Depth() {
   220  						e := state.index(i)
   221  						got = append(got, e.Length())
   222  					}
   223  					want := []int64(op)
   224  					if !slices.Equal(got, want) {
   225  						t.Fatalf("%s: stack lengths mismatch:\ngot  %v\nwant %v", sequence, got, want)
   226  					}
   227  				case appendToken:
   228  					got := state.append(op.kind)
   229  					if !equalError(got, op.want) {
   230  						t.Fatalf("%s: append('%c') = %v, want %v", sequence, op.kind, got, op.want)
   231  					}
   232  					if got == nil {
   233  						sequence = append(sequence, op.kind)
   234  					}
   235  				case needDelim:
   236  					if got := state.needDelim(op.next); got != op.want {
   237  						t.Fatalf("%s: needDelim('%c') = '%c', want '%c'", sequence, op.next, got, op.want)
   238  					}
   239  				default:
   240  					panic(fmt.Sprintf("unknown operation: %T", op))
   241  				}
   242  			}
   243  		})
   244  	}
   245  }
   246  
   247  // append is a thin wrapper over the other append, pop, or push methods
   248  // based on the token kind.
   249  func (s *stateMachine) append(k Kind) error {
   250  	switch k {
   251  	case 'n', 'f', 't':
   252  		return s.appendLiteral()
   253  	case '"':
   254  		return s.appendString()
   255  	case '0':
   256  		return s.appendNumber()
   257  	case '{':
   258  		return s.pushObject()
   259  	case '}':
   260  		return s.popObject()
   261  	case '[':
   262  		return s.pushArray()
   263  	case ']':
   264  		return s.popArray()
   265  	default:
   266  		panic(fmt.Sprintf("invalid token kind: '%c'", k))
   267  	}
   268  }
   269  
   270  func TestObjectNamespace(t *testing.T) {
   271  	type operation any
   272  	type (
   273  		insert struct {
   274  			name         string
   275  			wantInserted bool
   276  		}
   277  		removeLast struct{}
   278  	)
   279  
   280  	// Sequence of insert operations to perform (order matters).
   281  	ops := []operation{
   282  		insert{`""`, true},
   283  		removeLast{},
   284  		insert{`""`, true},
   285  		insert{`""`, false},
   286  
   287  		// Test insertion of the same name with different formatting.
   288  		insert{`"alpha"`, true},
   289  		insert{`"ALPHA"`, true}, // case-sensitive matching
   290  		insert{`"alpha"`, false},
   291  		insert{`"\u0061\u006c\u0070\u0068\u0061"`, false}, // unescapes to "alpha"
   292  		removeLast{},                                      // removes "ALPHA"
   293  		insert{`"alpha"`, false},
   294  		removeLast{}, // removes "alpha"
   295  		insert{`"alpha"`, true},
   296  		removeLast{},
   297  
   298  		// Bulk insert simple names.
   299  		insert{`"alpha"`, true},
   300  		insert{`"bravo"`, true},
   301  		insert{`"charlie"`, true},
   302  		insert{`"delta"`, true},
   303  		insert{`"echo"`, true},
   304  		insert{`"foxtrot"`, true},
   305  		insert{`"golf"`, true},
   306  		insert{`"hotel"`, true},
   307  		insert{`"india"`, true},
   308  		insert{`"juliet"`, true},
   309  		insert{`"kilo"`, true},
   310  		insert{`"lima"`, true},
   311  		insert{`"mike"`, true},
   312  		insert{`"november"`, true},
   313  		insert{`"oscar"`, true},
   314  		insert{`"papa"`, true},
   315  		insert{`"quebec"`, true},
   316  		insert{`"romeo"`, true},
   317  		insert{`"sierra"`, true},
   318  		insert{`"tango"`, true},
   319  		insert{`"uniform"`, true},
   320  		insert{`"victor"`, true},
   321  		insert{`"whiskey"`, true},
   322  		insert{`"xray"`, true},
   323  		insert{`"yankee"`, true},
   324  		insert{`"zulu"`, true},
   325  
   326  		// Test insertion of invalid UTF-8.
   327  		insert{`"` + "\ufffd" + `"`, true},
   328  		insert{`"` + "\ufffd" + `"`, false},
   329  		insert{`"\ufffd"`, false},         // unescapes to Unicode replacement character
   330  		insert{`"\uFFFD"`, false},         // unescapes to Unicode replacement character
   331  		insert{`"` + "\xff" + `"`, false}, // mangles as Unicode replacement character
   332  		removeLast{},
   333  		insert{`"` + "\ufffd" + `"`, true},
   334  
   335  		// Test insertion of unicode characters.
   336  		insert{`"☺☻☹"`, true},
   337  		insert{`"☺☻☹"`, false},
   338  		removeLast{},
   339  		insert{`"☺☻☹"`, true},
   340  	}
   341  
   342  	// Execute the sequence of operations twice:
   343  	// 1) on a fresh namespace and 2) on a namespace that has been reset.
   344  	var ns objectNamespace
   345  	wantNames := []string{}
   346  	for _, reset := range []bool{false, true} {
   347  		if reset {
   348  			ns.reset()
   349  			wantNames = nil
   350  		}
   351  
   352  		// Execute the operations and ensure the state is consistent.
   353  		for i, op := range ops {
   354  			switch op := op.(type) {
   355  			case insert:
   356  				gotInserted := ns.insertQuoted([]byte(op.name), false)
   357  				if gotInserted != op.wantInserted {
   358  					t.Fatalf("%d: objectNamespace{%v}.insert(%v) = %v, want %v", i, strings.Join(wantNames, " "), op.name, gotInserted, op.wantInserted)
   359  				}
   360  				if gotInserted {
   361  					b, _ := AppendUnquote(nil, []byte(op.name))
   362  					wantNames = append(wantNames, string(b))
   363  				}
   364  			case removeLast:
   365  				ns.removeLast()
   366  				wantNames = wantNames[:len(wantNames)-1]
   367  			default:
   368  				panic(fmt.Sprintf("unknown operation: %T", op))
   369  			}
   370  
   371  			// Check that the namespace is consistent.
   372  			gotNames := []string{}
   373  			for i := range ns.length() {
   374  				gotNames = append(gotNames, string(ns.getUnquoted(i)))
   375  			}
   376  			if !slices.Equal(gotNames, wantNames) {
   377  				t.Fatalf("%d: objectNamespace = {%v}, want {%v}", i, strings.Join(gotNames, " "), strings.Join(wantNames, " "))
   378  			}
   379  		}
   380  
   381  		// Verify that we have not switched to using a Go map.
   382  		if ns.mapNames != nil {
   383  			t.Errorf("objectNamespace.mapNames = non-nil, want nil")
   384  		}
   385  
   386  		// Insert a large number of names.
   387  		for i := range 64 {
   388  			ns.InsertUnquoted([]byte(fmt.Sprintf(`name%d`, i)))
   389  		}
   390  
   391  		// Verify that we did switch to using a Go map.
   392  		if ns.mapNames == nil {
   393  			t.Errorf("objectNamespace.mapNames = nil, want non-nil")
   394  		}
   395  	}
   396  }
   397  

View as plain text