Source file src/regexp/all_test.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package regexp
     6  
     7  import (
     8  	"bytes"
     9  	"reflect"
    10  	"regexp/syntax"
    11  	"slices"
    12  	"strings"
    13  	"testing"
    14  	"unicode/utf8"
    15  )
    16  
    17  var goodRe = []string{
    18  	``,
    19  	`.`,
    20  	`^.$`,
    21  	`a`,
    22  	`a*`,
    23  	`a+`,
    24  	`a?`,
    25  	`a|b`,
    26  	`a*|b*`,
    27  	`(a*|b)(c*|d)`,
    28  	`[a-z]`,
    29  	`[a-abc-c\-\]\[]`,
    30  	`[a-z]+`,
    31  	`[abc]`,
    32  	`[^1234]`,
    33  	`[^\n]`,
    34  	`\!\\`,
    35  }
    36  
    37  type stringError struct {
    38  	re  string
    39  	err string
    40  }
    41  
    42  var badRe = []stringError{
    43  	{`*`, "missing argument to repetition operator: `*`"},
    44  	{`+`, "missing argument to repetition operator: `+`"},
    45  	{`?`, "missing argument to repetition operator: `?`"},
    46  	{`(abc`, "missing closing ): `(abc`"},
    47  	{`abc)`, "unexpected ): `abc)`"},
    48  	{`x[a-z`, "missing closing ]: `[a-z`"},
    49  	{`[z-a]`, "invalid character class range: `z-a`"},
    50  	{`abc\`, "trailing backslash at end of expression"},
    51  	{`a**`, "invalid nested repetition operator: `**`"},
    52  	{`a*+`, "invalid nested repetition operator: `*+`"},
    53  	{`\x`, "invalid escape sequence: `\\x`"},
    54  	{strings.Repeat(`\pL`, 27000), "expression too large"},
    55  }
    56  
    57  func compileTest(t *testing.T, expr string, error string) *Regexp {
    58  	re, err := Compile(expr)
    59  	if error == "" && err != nil {
    60  		t.Error("compiling `", expr, "`; unexpected error: ", err.Error())
    61  	}
    62  	if error != "" && err == nil {
    63  		t.Error("compiling `", expr, "`; missing error")
    64  	} else if error != "" && !strings.Contains(err.Error(), error) {
    65  		t.Error("compiling `", expr, "`; wrong error: ", err.Error(), "; want ", error)
    66  	}
    67  	return re
    68  }
    69  
    70  func TestGoodCompile(t *testing.T) {
    71  	for i := 0; i < len(goodRe); i++ {
    72  		compileTest(t, goodRe[i], "")
    73  	}
    74  }
    75  
    76  func TestBadCompile(t *testing.T) {
    77  	for i := 0; i < len(badRe); i++ {
    78  		compileTest(t, badRe[i].re, badRe[i].err)
    79  	}
    80  }
    81  
    82  func matchTest(t *testing.T, test *FindTest) {
    83  	re := compileTest(t, test.pat, "")
    84  	if re == nil {
    85  		return
    86  	}
    87  	m := re.MatchString(test.text)
    88  	if m != (len(test.matches) > 0) {
    89  		t.Errorf("MatchString failure on %s: %t should be %t", test, m, len(test.matches) > 0)
    90  	}
    91  	// now try bytes
    92  	m = re.Match([]byte(test.text))
    93  	if m != (len(test.matches) > 0) {
    94  		t.Errorf("Match failure on %s: %t should be %t", test, m, len(test.matches) > 0)
    95  	}
    96  }
    97  
    98  func TestMatch(t *testing.T) {
    99  	for _, test := range findTests {
   100  		matchTest(t, &test)
   101  	}
   102  }
   103  
   104  func matchFunctionTest(t *testing.T, test *FindTest) {
   105  	m, err := MatchString(test.pat, test.text)
   106  	if err == nil {
   107  		return
   108  	}
   109  	if m != (len(test.matches) > 0) {
   110  		t.Errorf("Match failure on %s: %t should be %t", test, m, len(test.matches) > 0)
   111  	}
   112  }
   113  
   114  func TestMatchFunction(t *testing.T) {
   115  	for _, test := range findTests {
   116  		matchFunctionTest(t, &test)
   117  	}
   118  }
   119  
   120  func copyMatchTest(t *testing.T, test *FindTest) {
   121  	re := compileTest(t, test.pat, "")
   122  	if re == nil {
   123  		return
   124  	}
   125  	m1 := re.MatchString(test.text)
   126  	m2 := re.Copy().MatchString(test.text)
   127  	if m1 != m2 {
   128  		t.Errorf("Copied Regexp match failure on %s: original gave %t; copy gave %t; should be %t",
   129  			test, m1, m2, len(test.matches) > 0)
   130  	}
   131  }
   132  
   133  func TestCopyMatch(t *testing.T) {
   134  	for _, test := range findTests {
   135  		copyMatchTest(t, &test)
   136  	}
   137  }
   138  
   139  type ReplaceTest struct {
   140  	pattern, replacement, input, output string
   141  }
   142  
   143  var replaceTests = []ReplaceTest{
   144  	// Test empty input and/or replacement, with pattern that matches the empty string.
   145  	{"", "", "", ""},
   146  	{"", "x", "", "x"},
   147  	{"", "", "abc", "abc"},
   148  	{"", "x", "abc", "xaxbxcx"},
   149  
   150  	// Test empty input and/or replacement, with pattern that does not match the empty string.
   151  	{"b", "", "", ""},
   152  	{"b", "x", "", ""},
   153  	{"b", "", "abc", "ac"},
   154  	{"b", "x", "abc", "axc"},
   155  	{"y", "", "", ""},
   156  	{"y", "x", "", ""},
   157  	{"y", "", "abc", "abc"},
   158  	{"y", "x", "abc", "abc"},
   159  
   160  	// Multibyte characters -- verify that we don't try to match in the middle
   161  	// of a character.
   162  	{"[a-c]*", "x", "\u65e5", "x\u65e5x"},
   163  	{"[^\u65e5]", "x", "abc\u65e5def", "xxx\u65e5xxx"},
   164  
   165  	// Start and end of a string.
   166  	{"^[a-c]*", "x", "abcdabc", "xdabc"},
   167  	{"[a-c]*$", "x", "abcdabc", "abcdx"},
   168  	{"^[a-c]*$", "x", "abcdabc", "abcdabc"},
   169  	{"^[a-c]*", "x", "abc", "x"},
   170  	{"[a-c]*$", "x", "abc", "x"},
   171  	{"^[a-c]*$", "x", "abc", "x"},
   172  	{"^[a-c]*", "x", "dabce", "xdabce"},
   173  	{"[a-c]*$", "x", "dabce", "dabcex"},
   174  	{"^[a-c]*$", "x", "dabce", "dabce"},
   175  	{"^[a-c]*", "x", "", "x"},
   176  	{"[a-c]*$", "x", "", "x"},
   177  	{"^[a-c]*$", "x", "", "x"},
   178  
   179  	{"^[a-c]+", "x", "abcdabc", "xdabc"},
   180  	{"[a-c]+$", "x", "abcdabc", "abcdx"},
   181  	{"^[a-c]+$", "x", "abcdabc", "abcdabc"},
   182  	{"^[a-c]+", "x", "abc", "x"},
   183  	{"[a-c]+$", "x", "abc", "x"},
   184  	{"^[a-c]+$", "x", "abc", "x"},
   185  	{"^[a-c]+", "x", "dabce", "dabce"},
   186  	{"[a-c]+$", "x", "dabce", "dabce"},
   187  	{"^[a-c]+$", "x", "dabce", "dabce"},
   188  	{"^[a-c]+", "x", "", ""},
   189  	{"[a-c]+$", "x", "", ""},
   190  	{"^[a-c]+$", "x", "", ""},
   191  
   192  	// Other cases.
   193  	{"abc", "def", "abcdefg", "defdefg"},
   194  	{"bc", "BC", "abcbcdcdedef", "aBCBCdcdedef"},
   195  	{"abc", "", "abcdabc", "d"},
   196  	{"x", "xXx", "xxxXxxx", "xXxxXxxXxXxXxxXxxXx"},
   197  	{"abc", "d", "", ""},
   198  	{"abc", "d", "abc", "d"},
   199  	{".+", "x", "abc", "x"},
   200  	{"[a-c]*", "x", "def", "xdxexfx"},
   201  	{"[a-c]+", "x", "abcbcdcdedef", "xdxdedef"},
   202  	{"[a-c]*", "x", "abcbcdcdedef", "xdxdxexdxexfx"},
   203  
   204  	// Substitutions
   205  	{"a+", "($0)", "banana", "b(a)n(a)n(a)"},
   206  	{"a+", "(${0})", "banana", "b(a)n(a)n(a)"},
   207  	{"a+", "(${0})$0", "banana", "b(a)an(a)an(a)a"},
   208  	{"a+", "(${0})$0", "banana", "b(a)an(a)an(a)a"},
   209  	{"hello, (.+)", "goodbye, ${1}", "hello, world", "goodbye, world"},
   210  	{"hello, (.+)", "goodbye, $1x", "hello, world", "goodbye, "},
   211  	{"hello, (.+)", "goodbye, ${1}x", "hello, world", "goodbye, worldx"},
   212  	{"hello, (.+)", "<$0><$1><$2><$3>", "hello, world", "<hello, world><world><><>"},
   213  	{"hello, (?P<noun>.+)", "goodbye, $noun!", "hello, world", "goodbye, world!"},
   214  	{"hello, (?P<noun>.+)", "goodbye, ${noun}", "hello, world", "goodbye, world"},
   215  	{"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "hi", "hihihi"},
   216  	{"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "bye", "byebyebye"},
   217  	{"(?P<x>hi)|(?P<x>bye)", "$xyz", "hi", ""},
   218  	{"(?P<x>hi)|(?P<x>bye)", "${x}yz", "hi", "hiyz"},
   219  	{"(?P<x>hi)|(?P<x>bye)", "hello $$x", "hi", "hello $x"},
   220  	{"a+", "${oops", "aaa", "${oops"},
   221  	{"a+", "$$", "aaa", "$"},
   222  	{"a+", "$", "aaa", "$"},
   223  
   224  	// Substitution when subexpression isn't found
   225  	{"(x)?", "$1", "123", "123"},
   226  	{"abc", "$1", "123", "123"},
   227  
   228  	// Substitutions involving a (x){0}
   229  	{"(a)(b){0}(c)", ".$1|$3.", "xacxacx", "x.a|c.x.a|c.x"},
   230  	{"(a)(((b))){0}c", ".$1.", "xacxacx", "x.a.x.a.x"},
   231  	{"((a(b){0}){3}){5}(h)", "y caramb$2", "say aaaaaaaaaaaaaaaah", "say ay caramba"},
   232  	{"((a(b){0}){3}){5}h", "y caramb$2", "say aaaaaaaaaaaaaaaah", "say ay caramba"},
   233  }
   234  
   235  var replaceLiteralTests = []ReplaceTest{
   236  	// Substitutions
   237  	{"a+", "($0)", "banana", "b($0)n($0)n($0)"},
   238  	{"a+", "(${0})", "banana", "b(${0})n(${0})n(${0})"},
   239  	{"a+", "(${0})$0", "banana", "b(${0})$0n(${0})$0n(${0})$0"},
   240  	{"a+", "(${0})$0", "banana", "b(${0})$0n(${0})$0n(${0})$0"},
   241  	{"hello, (.+)", "goodbye, ${1}", "hello, world", "goodbye, ${1}"},
   242  	{"hello, (?P<noun>.+)", "goodbye, $noun!", "hello, world", "goodbye, $noun!"},
   243  	{"hello, (?P<noun>.+)", "goodbye, ${noun}", "hello, world", "goodbye, ${noun}"},
   244  	{"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "hi", "$x$x$x"},
   245  	{"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "bye", "$x$x$x"},
   246  	{"(?P<x>hi)|(?P<x>bye)", "$xyz", "hi", "$xyz"},
   247  	{"(?P<x>hi)|(?P<x>bye)", "${x}yz", "hi", "${x}yz"},
   248  	{"(?P<x>hi)|(?P<x>bye)", "hello $$x", "hi", "hello $$x"},
   249  	{"a+", "${oops", "aaa", "${oops"},
   250  	{"a+", "$$", "aaa", "$$"},
   251  	{"a+", "$", "aaa", "$"},
   252  }
   253  
   254  type ReplaceFuncTest struct {
   255  	pattern       string
   256  	replacement   func(string) string
   257  	input, output string
   258  }
   259  
   260  var replaceFuncTests = []ReplaceFuncTest{
   261  	{"[a-c]", func(s string) string { return "x" + s + "y" }, "defabcdef", "defxayxbyxcydef"},
   262  	{"[a-c]+", func(s string) string { return "x" + s + "y" }, "defabcdef", "defxabcydef"},
   263  	{"[a-c]*", func(s string) string { return "x" + s + "y" }, "defabcdef", "xydxyexyfxabcydxyexyfxy"},
   264  }
   265  
   266  func TestReplaceAll(t *testing.T) {
   267  	for _, tc := range replaceTests {
   268  		re, err := Compile(tc.pattern)
   269  		if err != nil {
   270  			t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err)
   271  			continue
   272  		}
   273  		actual := re.ReplaceAllString(tc.input, tc.replacement)
   274  		if actual != tc.output {
   275  			t.Errorf("%q.ReplaceAllString(%q,%q) = %q; want %q",
   276  				tc.pattern, tc.input, tc.replacement, actual, tc.output)
   277  		}
   278  		// now try bytes
   279  		actual = string(re.ReplaceAll([]byte(tc.input), []byte(tc.replacement)))
   280  		if actual != tc.output {
   281  			t.Errorf("%q.ReplaceAll(%q,%q) = %q; want %q",
   282  				tc.pattern, tc.input, tc.replacement, actual, tc.output)
   283  		}
   284  	}
   285  }
   286  
   287  func TestReplaceAllLiteral(t *testing.T) {
   288  	// Run ReplaceAll tests that do not have $ expansions.
   289  	for _, tc := range replaceTests {
   290  		if strings.Contains(tc.replacement, "$") {
   291  			continue
   292  		}
   293  		re, err := Compile(tc.pattern)
   294  		if err != nil {
   295  			t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err)
   296  			continue
   297  		}
   298  		actual := re.ReplaceAllLiteralString(tc.input, tc.replacement)
   299  		if actual != tc.output {
   300  			t.Errorf("%q.ReplaceAllLiteralString(%q,%q) = %q; want %q",
   301  				tc.pattern, tc.input, tc.replacement, actual, tc.output)
   302  		}
   303  		// now try bytes
   304  		actual = string(re.ReplaceAllLiteral([]byte(tc.input), []byte(tc.replacement)))
   305  		if actual != tc.output {
   306  			t.Errorf("%q.ReplaceAllLiteral(%q,%q) = %q; want %q",
   307  				tc.pattern, tc.input, tc.replacement, actual, tc.output)
   308  		}
   309  	}
   310  
   311  	// Run literal-specific tests.
   312  	for _, tc := range replaceLiteralTests {
   313  		re, err := Compile(tc.pattern)
   314  		if err != nil {
   315  			t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err)
   316  			continue
   317  		}
   318  		actual := re.ReplaceAllLiteralString(tc.input, tc.replacement)
   319  		if actual != tc.output {
   320  			t.Errorf("%q.ReplaceAllLiteralString(%q,%q) = %q; want %q",
   321  				tc.pattern, tc.input, tc.replacement, actual, tc.output)
   322  		}
   323  		// now try bytes
   324  		actual = string(re.ReplaceAllLiteral([]byte(tc.input), []byte(tc.replacement)))
   325  		if actual != tc.output {
   326  			t.Errorf("%q.ReplaceAllLiteral(%q,%q) = %q; want %q",
   327  				tc.pattern, tc.input, tc.replacement, actual, tc.output)
   328  		}
   329  	}
   330  }
   331  
   332  func TestReplaceAllFunc(t *testing.T) {
   333  	for _, tc := range replaceFuncTests {
   334  		re, err := Compile(tc.pattern)
   335  		if err != nil {
   336  			t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err)
   337  			continue
   338  		}
   339  		actual := re.ReplaceAllStringFunc(tc.input, tc.replacement)
   340  		if actual != tc.output {
   341  			t.Errorf("%q.ReplaceFunc(%q,fn) = %q; want %q",
   342  				tc.pattern, tc.input, actual, tc.output)
   343  		}
   344  		// now try bytes
   345  		actual = string(re.ReplaceAllFunc([]byte(tc.input), func(s []byte) []byte { return []byte(tc.replacement(string(s))) }))
   346  		if actual != tc.output {
   347  			t.Errorf("%q.ReplaceFunc(%q,fn) = %q; want %q",
   348  				tc.pattern, tc.input, actual, tc.output)
   349  		}
   350  	}
   351  }
   352  
   353  type MetaTest struct {
   354  	pattern, output, literal string
   355  	isLiteral                bool
   356  }
   357  
   358  var metaTests = []MetaTest{
   359  	{``, ``, ``, true},
   360  	{`foo`, `foo`, `foo`, true},
   361  	{`日本語+`, `日本語\+`, `日本語`, false},
   362  	{`foo\.\$`, `foo\\\.\\\$`, `foo.$`, true}, // has meta but no operator
   363  	{`foo.\$`, `foo\.\\\$`, `foo`, false},     // has escaped operators and real operators
   364  	{`!@#$%^&*()_+-=[{]}\|,<.>/?~`, `!@#\$%\^&\*\(\)_\+-=\[\{\]\}\\\|,<\.>/\?~`, `!@#`, false},
   365  }
   366  
   367  var literalPrefixTests = []MetaTest{
   368  	// See golang.org/issue/11175.
   369  	// output is unused.
   370  	{`^0^0$`, ``, `0`, false},
   371  	{`^0^`, ``, ``, false},
   372  	{`^0$`, ``, `0`, true},
   373  	{`$0^`, ``, ``, false},
   374  	{`$0$`, ``, ``, false},
   375  	{`^^0$$`, ``, ``, false},
   376  	{`^$^$`, ``, ``, false},
   377  	{`$$0^^`, ``, ``, false},
   378  	{`a\x{fffd}b`, ``, `a`, false},
   379  	{`\x{fffd}b`, ``, ``, false},
   380  	{"\ufffd", ``, ``, false},
   381  }
   382  
   383  func TestQuoteMeta(t *testing.T) {
   384  	for _, tc := range metaTests {
   385  		// Verify that QuoteMeta returns the expected string.
   386  		quoted := QuoteMeta(tc.pattern)
   387  		if quoted != tc.output {
   388  			t.Errorf("QuoteMeta(`%s`) = `%s`; want `%s`",
   389  				tc.pattern, quoted, tc.output)
   390  			continue
   391  		}
   392  
   393  		// Verify that the quoted string is in fact treated as expected
   394  		// by Compile -- i.e. that it matches the original, unquoted string.
   395  		if tc.pattern != "" {
   396  			re, err := Compile(quoted)
   397  			if err != nil {
   398  				t.Errorf("Unexpected error compiling QuoteMeta(`%s`): %v", tc.pattern, err)
   399  				continue
   400  			}
   401  			src := "abc" + tc.pattern + "def"
   402  			repl := "xyz"
   403  			replaced := re.ReplaceAllString(src, repl)
   404  			expected := "abcxyzdef"
   405  			if replaced != expected {
   406  				t.Errorf("QuoteMeta(`%s`).Replace(`%s`,`%s`) = `%s`; want `%s`",
   407  					tc.pattern, src, repl, replaced, expected)
   408  			}
   409  		}
   410  	}
   411  }
   412  
   413  func TestLiteralPrefix(t *testing.T) {
   414  	for _, tc := range append(metaTests, literalPrefixTests...) {
   415  		// Literal method needs to scan the pattern.
   416  		re := MustCompile(tc.pattern)
   417  		str, complete := re.LiteralPrefix()
   418  		if complete != tc.isLiteral {
   419  			t.Errorf("LiteralPrefix(`%s`) = %t; want %t", tc.pattern, complete, tc.isLiteral)
   420  		}
   421  		if str != tc.literal {
   422  			t.Errorf("LiteralPrefix(`%s`) = `%s`; want `%s`", tc.pattern, str, tc.literal)
   423  		}
   424  	}
   425  }
   426  
   427  type subexpIndex struct {
   428  	name  string
   429  	index int
   430  }
   431  
   432  type subexpCase struct {
   433  	input   string
   434  	num     int
   435  	names   []string
   436  	indices []subexpIndex
   437  }
   438  
   439  var emptySubexpIndices = []subexpIndex{{"", -1}, {"missing", -1}}
   440  
   441  var subexpCases = []subexpCase{
   442  	{``, 0, nil, emptySubexpIndices},
   443  	{`.*`, 0, nil, emptySubexpIndices},
   444  	{`abba`, 0, nil, emptySubexpIndices},
   445  	{`ab(b)a`, 1, []string{"", ""}, emptySubexpIndices},
   446  	{`ab(.*)a`, 1, []string{"", ""}, emptySubexpIndices},
   447  	{`(.*)ab(.*)a`, 2, []string{"", "", ""}, emptySubexpIndices},
   448  	{`(.*)(ab)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices},
   449  	{`(.*)((a)b)(.*)a`, 4, []string{"", "", "", "", ""}, emptySubexpIndices},
   450  	{`(.*)(\(ab)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices},
   451  	{`(.*)(\(a\)b)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices},
   452  	{`(?P<foo>.*)(?P<bar>(a)b)(?P<foo>.*)a`, 4, []string{"", "foo", "bar", "", "foo"}, []subexpIndex{{"", -1}, {"missing", -1}, {"foo", 1}, {"bar", 2}}},
   453  }
   454  
   455  func TestSubexp(t *testing.T) {
   456  	for _, c := range subexpCases {
   457  		re := MustCompile(c.input)
   458  		n := re.NumSubexp()
   459  		if n != c.num {
   460  			t.Errorf("%q: NumSubexp = %d, want %d", c.input, n, c.num)
   461  			continue
   462  		}
   463  		names := re.SubexpNames()
   464  		if len(names) != 1+n {
   465  			t.Errorf("%q: len(SubexpNames) = %d, want %d", c.input, len(names), n)
   466  			continue
   467  		}
   468  		if c.names != nil {
   469  			for i := 0; i < 1+n; i++ {
   470  				if names[i] != c.names[i] {
   471  					t.Errorf("%q: SubexpNames[%d] = %q, want %q", c.input, i, names[i], c.names[i])
   472  				}
   473  			}
   474  		}
   475  		for _, subexp := range c.indices {
   476  			index := re.SubexpIndex(subexp.name)
   477  			if index != subexp.index {
   478  				t.Errorf("%q: SubexpIndex(%q) = %d, want %d", c.input, subexp.name, index, subexp.index)
   479  			}
   480  		}
   481  	}
   482  }
   483  
   484  var splitTests = []struct {
   485  	s   string
   486  	r   string
   487  	n   int
   488  	out []string
   489  }{
   490  	{"foo:and:bar", ":", -1, []string{"foo", "and", "bar"}},
   491  	{"foo:and:bar", ":", 1, []string{"foo:and:bar"}},
   492  	{"foo:and:bar", ":", 2, []string{"foo", "and:bar"}},
   493  	{"foo:and:bar", "foo", -1, []string{"", ":and:bar"}},
   494  	{"foo:and:bar", "bar", -1, []string{"foo:and:", ""}},
   495  	{"foo:and:bar", "baz", -1, []string{"foo:and:bar"}},
   496  	{"baabaab", "a", -1, []string{"b", "", "b", "", "b"}},
   497  	{"baabaab", "a*", -1, []string{"b", "b", "b"}},
   498  	{"baabaab", "ba*", -1, []string{"", "", "", ""}},
   499  	{"foobar", "f*b*", -1, []string{"", "o", "o", "a", "r"}},
   500  	{"foobar", "f+.*b+", -1, []string{"", "ar"}},
   501  	{"foobooboar", "o{2}", -1, []string{"f", "b", "boar"}},
   502  	{"a,b,c,d,e,f", ",", 3, []string{"a", "b", "c,d,e,f"}},
   503  	{"a,b,c,d,e,f", ",", 0, nil},
   504  	{",", ",", -1, []string{"", ""}},
   505  	{",,,", ",", -1, []string{"", "", "", ""}},
   506  	{"", ",", -1, []string{""}},
   507  	{"", ".*", -1, []string{""}},
   508  	{"", ".+", -1, []string{""}},
   509  	{"", "", -1, []string{}},
   510  	{"foobar", "", -1, []string{"f", "o", "o", "b", "a", "r"}},
   511  	{"abaabaccadaaae", "a*", 5, []string{"", "b", "b", "c", "cadaaae"}},
   512  	{":x:y:z:", ":", -1, []string{"", "x", "y", "z", ""}},
   513  }
   514  
   515  func TestSplit(t *testing.T) {
   516  	for i, test := range splitTests {
   517  		re, err := Compile(test.r)
   518  		if err != nil {
   519  			t.Errorf("#%d: %q: compile error: %s", i, test.r, err.Error())
   520  			continue
   521  		}
   522  
   523  		split := re.Split(test.s, test.n)
   524  		if !slices.Equal(split, test.out) {
   525  			t.Errorf("#%d: %q: got %q; want %q", i, test.r, split, test.out)
   526  		}
   527  
   528  		if QuoteMeta(test.r) == test.r {
   529  			strsplit := strings.SplitN(test.s, test.r, test.n)
   530  			if !slices.Equal(split, strsplit) {
   531  				t.Errorf("#%d: Split(%q, %q, %d): regexp vs strings mismatch\nregexp=%q\nstrings=%q", i, test.s, test.r, test.n, split, strsplit)
   532  			}
   533  		}
   534  	}
   535  }
   536  
   537  // The following sequence of Match calls used to panic. See issue #12980.
   538  func TestParseAndCompile(t *testing.T) {
   539  	expr := "a$"
   540  	s := "a\nb"
   541  
   542  	for i, tc := range []struct {
   543  		reFlags  syntax.Flags
   544  		expMatch bool
   545  	}{
   546  		{syntax.Perl | syntax.OneLine, false},
   547  		{syntax.Perl &^ syntax.OneLine, true},
   548  	} {
   549  		parsed, err := syntax.Parse(expr, tc.reFlags)
   550  		if err != nil {
   551  			t.Fatalf("%d: parse: %v", i, err)
   552  		}
   553  		re, err := Compile(parsed.String())
   554  		if err != nil {
   555  			t.Fatalf("%d: compile: %v", i, err)
   556  		}
   557  		if match := re.MatchString(s); match != tc.expMatch {
   558  			t.Errorf("%d: %q.MatchString(%q)=%t; expected=%t", i, re, s, match, tc.expMatch)
   559  		}
   560  	}
   561  }
   562  
   563  // Check that one-pass cutoff does trigger.
   564  func TestOnePassCutoff(t *testing.T) {
   565  	re, err := syntax.Parse(`^x{1,1000}y{1,1000}$`, syntax.Perl)
   566  	if err != nil {
   567  		t.Fatalf("parse: %v", err)
   568  	}
   569  	p, err := syntax.Compile(re.Simplify())
   570  	if err != nil {
   571  		t.Fatalf("compile: %v", err)
   572  	}
   573  	if compileOnePass(p) != nil {
   574  		t.Fatalf("makeOnePass succeeded; wanted nil")
   575  	}
   576  }
   577  
   578  // Check that the same machine can be used with the standard matcher
   579  // and then the backtracker when there are no captures.
   580  func TestSwitchBacktrack(t *testing.T) {
   581  	re := MustCompile(`a|b`)
   582  	long := make([]byte, maxBacktrackVector+1)
   583  
   584  	// The following sequence of Match calls used to panic. See issue #10319.
   585  	re.Match(long)     // triggers standard matcher
   586  	re.Match(long[:1]) // triggers backtracker
   587  }
   588  
   589  func BenchmarkFind(b *testing.B) {
   590  	b.StopTimer()
   591  	re := MustCompile("a+b+")
   592  	wantSubs := "aaabb"
   593  	s := []byte("acbb" + wantSubs + "dd")
   594  	b.StartTimer()
   595  	b.ReportAllocs()
   596  	for i := 0; i < b.N; i++ {
   597  		subs := re.Find(s)
   598  		if string(subs) != wantSubs {
   599  			b.Fatalf("Find(%q) = %q; want %q", s, subs, wantSubs)
   600  		}
   601  	}
   602  }
   603  
   604  func BenchmarkFindAllNoMatches(b *testing.B) {
   605  	re := MustCompile("a+b+")
   606  	s := []byte("acddee")
   607  	b.ReportAllocs()
   608  	b.ResetTimer()
   609  	for i := 0; i < b.N; i++ {
   610  		all := re.FindAll(s, -1)
   611  		if all != nil {
   612  			b.Fatalf("FindAll(%q) = %q; want nil", s, all)
   613  		}
   614  	}
   615  }
   616  
   617  func BenchmarkFindAllTenMatches(b *testing.B) {
   618  	re := MustCompile("a+b+")
   619  	s := bytes.Repeat([]byte("acddeeabbax"), 10)
   620  	b.ReportAllocs()
   621  	b.ResetTimer()
   622  	for i := 0; i < b.N; i++ {
   623  		all := re.FindAll(s, -1)
   624  		if len(all) != 10 {
   625  			b.Fatalf("FindAll(%q) = %q; want 10 matches", s, all)
   626  		}
   627  	}
   628  }
   629  
   630  func BenchmarkFindString(b *testing.B) {
   631  	b.StopTimer()
   632  	re := MustCompile("a+b+")
   633  	wantSubs := "aaabb"
   634  	s := "acbb" + wantSubs + "dd"
   635  	b.StartTimer()
   636  	b.ReportAllocs()
   637  	for i := 0; i < b.N; i++ {
   638  		subs := re.FindString(s)
   639  		if subs != wantSubs {
   640  			b.Fatalf("FindString(%q) = %q; want %q", s, subs, wantSubs)
   641  		}
   642  	}
   643  }
   644  
   645  func BenchmarkFindSubmatch(b *testing.B) {
   646  	b.StopTimer()
   647  	re := MustCompile("a(a+b+)b")
   648  	wantSubs := "aaabb"
   649  	s := []byte("acbb" + wantSubs + "dd")
   650  	b.StartTimer()
   651  	b.ReportAllocs()
   652  	for i := 0; i < b.N; i++ {
   653  		subs := re.FindSubmatch(s)
   654  		if string(subs[0]) != wantSubs {
   655  			b.Fatalf("FindSubmatch(%q)[0] = %q; want %q", s, subs[0], wantSubs)
   656  		}
   657  		if string(subs[1]) != "aab" {
   658  			b.Fatalf("FindSubmatch(%q)[1] = %q; want %q", s, subs[1], "aab")
   659  		}
   660  	}
   661  }
   662  
   663  func BenchmarkFindStringSubmatch(b *testing.B) {
   664  	b.StopTimer()
   665  	re := MustCompile("a(a+b+)b")
   666  	wantSubs := "aaabb"
   667  	s := "acbb" + wantSubs + "dd"
   668  	b.StartTimer()
   669  	b.ReportAllocs()
   670  	for i := 0; i < b.N; i++ {
   671  		subs := re.FindStringSubmatch(s)
   672  		if subs[0] != wantSubs {
   673  			b.Fatalf("FindStringSubmatch(%q)[0] = %q; want %q", s, subs[0], wantSubs)
   674  		}
   675  		if subs[1] != "aab" {
   676  			b.Fatalf("FindStringSubmatch(%q)[1] = %q; want %q", s, subs[1], "aab")
   677  		}
   678  	}
   679  }
   680  
   681  func BenchmarkLiteral(b *testing.B) {
   682  	x := strings.Repeat("x", 50) + "y"
   683  	b.StopTimer()
   684  	re := MustCompile("y")
   685  	b.StartTimer()
   686  	for i := 0; i < b.N; i++ {
   687  		if !re.MatchString(x) {
   688  			b.Fatalf("no match!")
   689  		}
   690  	}
   691  }
   692  
   693  func BenchmarkNotLiteral(b *testing.B) {
   694  	x := strings.Repeat("x", 50) + "y"
   695  	b.StopTimer()
   696  	re := MustCompile(".y")
   697  	b.StartTimer()
   698  	for i := 0; i < b.N; i++ {
   699  		if !re.MatchString(x) {
   700  			b.Fatalf("no match!")
   701  		}
   702  	}
   703  }
   704  
   705  func BenchmarkMatchClass(b *testing.B) {
   706  	b.StopTimer()
   707  	x := strings.Repeat("xxxx", 20) + "w"
   708  	re := MustCompile("[abcdw]")
   709  	b.StartTimer()
   710  	for i := 0; i < b.N; i++ {
   711  		if !re.MatchString(x) {
   712  			b.Fatalf("no match!")
   713  		}
   714  	}
   715  }
   716  
   717  func BenchmarkMatchClass_InRange(b *testing.B) {
   718  	b.StopTimer()
   719  	// 'b' is between 'a' and 'c', so the charclass
   720  	// range checking is no help here.
   721  	x := strings.Repeat("bbbb", 20) + "c"
   722  	re := MustCompile("[ac]")
   723  	b.StartTimer()
   724  	for i := 0; i < b.N; i++ {
   725  		if !re.MatchString(x) {
   726  			b.Fatalf("no match!")
   727  		}
   728  	}
   729  }
   730  
   731  func BenchmarkReplaceAll(b *testing.B) {
   732  	x := "abcdefghijklmnopqrstuvwxyz"
   733  	b.StopTimer()
   734  	re := MustCompile("[cjrw]")
   735  	b.StartTimer()
   736  	for i := 0; i < b.N; i++ {
   737  		re.ReplaceAllString(x, "")
   738  	}
   739  }
   740  
   741  func BenchmarkAnchoredLiteralShortNonMatch(b *testing.B) {
   742  	b.StopTimer()
   743  	x := []byte("abcdefghijklmnopqrstuvwxyz")
   744  	re := MustCompile("^zbc(d|e)")
   745  	b.StartTimer()
   746  	for i := 0; i < b.N; i++ {
   747  		re.Match(x)
   748  	}
   749  }
   750  
   751  func BenchmarkAnchoredLiteralLongNonMatch(b *testing.B) {
   752  	b.StopTimer()
   753  	x := []byte("abcdefghijklmnopqrstuvwxyz")
   754  	for i := 0; i < 15; i++ {
   755  		x = append(x, x...)
   756  	}
   757  	re := MustCompile("^zbc(d|e)")
   758  	b.StartTimer()
   759  	for i := 0; i < b.N; i++ {
   760  		re.Match(x)
   761  	}
   762  }
   763  
   764  func BenchmarkAnchoredShortMatch(b *testing.B) {
   765  	b.StopTimer()
   766  	x := []byte("abcdefghijklmnopqrstuvwxyz")
   767  	re := MustCompile("^.bc(d|e)")
   768  	b.StartTimer()
   769  	for i := 0; i < b.N; i++ {
   770  		re.Match(x)
   771  	}
   772  }
   773  
   774  func BenchmarkAnchoredLongMatch(b *testing.B) {
   775  	b.StopTimer()
   776  	x := []byte("abcdefghijklmnopqrstuvwxyz")
   777  	for i := 0; i < 15; i++ {
   778  		x = append(x, x...)
   779  	}
   780  	re := MustCompile("^.bc(d|e)")
   781  	b.StartTimer()
   782  	for i := 0; i < b.N; i++ {
   783  		re.Match(x)
   784  	}
   785  }
   786  
   787  func BenchmarkOnePassShortA(b *testing.B) {
   788  	b.StopTimer()
   789  	x := []byte("abcddddddeeeededd")
   790  	re := MustCompile("^.bc(d|e)*$")
   791  	b.StartTimer()
   792  	for i := 0; i < b.N; i++ {
   793  		re.Match(x)
   794  	}
   795  }
   796  
   797  func BenchmarkNotOnePassShortA(b *testing.B) {
   798  	b.StopTimer()
   799  	x := []byte("abcddddddeeeededd")
   800  	re := MustCompile(".bc(d|e)*$")
   801  	b.StartTimer()
   802  	for i := 0; i < b.N; i++ {
   803  		re.Match(x)
   804  	}
   805  }
   806  
   807  func BenchmarkOnePassShortB(b *testing.B) {
   808  	b.StopTimer()
   809  	x := []byte("abcddddddeeeededd")
   810  	re := MustCompile("^.bc(?:d|e)*$")
   811  	b.StartTimer()
   812  	for i := 0; i < b.N; i++ {
   813  		re.Match(x)
   814  	}
   815  }
   816  
   817  func BenchmarkNotOnePassShortB(b *testing.B) {
   818  	b.StopTimer()
   819  	x := []byte("abcddddddeeeededd")
   820  	re := MustCompile(".bc(?:d|e)*$")
   821  	b.StartTimer()
   822  	for i := 0; i < b.N; i++ {
   823  		re.Match(x)
   824  	}
   825  }
   826  
   827  func BenchmarkOnePassLongPrefix(b *testing.B) {
   828  	b.StopTimer()
   829  	x := []byte("abcdefghijklmnopqrstuvwxyz")
   830  	re := MustCompile("^abcdefghijklmnopqrstuvwxyz.*$")
   831  	b.StartTimer()
   832  	for i := 0; i < b.N; i++ {
   833  		re.Match(x)
   834  	}
   835  }
   836  
   837  func BenchmarkOnePassLongNotPrefix(b *testing.B) {
   838  	b.StopTimer()
   839  	x := []byte("abcdefghijklmnopqrstuvwxyz")
   840  	re := MustCompile("^.bcdefghijklmnopqrstuvwxyz.*$")
   841  	b.StartTimer()
   842  	for i := 0; i < b.N; i++ {
   843  		re.Match(x)
   844  	}
   845  }
   846  
   847  func BenchmarkMatchParallelShared(b *testing.B) {
   848  	x := []byte("this is a long line that contains foo bar baz")
   849  	re := MustCompile("foo (ba+r)? baz")
   850  	b.ResetTimer()
   851  	b.RunParallel(func(pb *testing.PB) {
   852  		for pb.Next() {
   853  			re.Match(x)
   854  		}
   855  	})
   856  }
   857  
   858  func BenchmarkMatchParallelCopied(b *testing.B) {
   859  	x := []byte("this is a long line that contains foo bar baz")
   860  	re := MustCompile("foo (ba+r)? baz")
   861  	b.ResetTimer()
   862  	b.RunParallel(func(pb *testing.PB) {
   863  		re := re.Copy()
   864  		for pb.Next() {
   865  			re.Match(x)
   866  		}
   867  	})
   868  }
   869  
   870  var sink string
   871  
   872  func BenchmarkQuoteMetaAll(b *testing.B) {
   873  	specials := make([]byte, 0)
   874  	for i := byte(0); i < utf8.RuneSelf; i++ {
   875  		if special(i) {
   876  			specials = append(specials, i)
   877  		}
   878  	}
   879  	s := string(specials)
   880  	b.SetBytes(int64(len(s)))
   881  	b.ResetTimer()
   882  	for i := 0; i < b.N; i++ {
   883  		sink = QuoteMeta(s)
   884  	}
   885  }
   886  
   887  func BenchmarkQuoteMetaNone(b *testing.B) {
   888  	s := "abcdefghijklmnopqrstuvwxyz"
   889  	b.SetBytes(int64(len(s)))
   890  	b.ResetTimer()
   891  	for i := 0; i < b.N; i++ {
   892  		sink = QuoteMeta(s)
   893  	}
   894  }
   895  
   896  var compileBenchData = []struct{ name, re string }{
   897  	{"Onepass", `^a.[l-nA-Cg-j]?e$`},
   898  	{"Medium", `^((a|b|[d-z0-9])*(日){4,5}.)+$`},
   899  	{"Hard", strings.Repeat(`((abc)*|`, 50) + strings.Repeat(`)`, 50)},
   900  }
   901  
   902  func BenchmarkCompile(b *testing.B) {
   903  	for _, data := range compileBenchData {
   904  		b.Run(data.name, func(b *testing.B) {
   905  			b.ReportAllocs()
   906  			for i := 0; i < b.N; i++ {
   907  				if _, err := Compile(data.re); err != nil {
   908  					b.Fatal(err)
   909  				}
   910  			}
   911  		})
   912  	}
   913  }
   914  
   915  func TestDeepEqual(t *testing.T) {
   916  	re1 := MustCompile("a.*b.*c.*d")
   917  	re2 := MustCompile("a.*b.*c.*d")
   918  	if !reflect.DeepEqual(re1, re2) { // has always been true, since Go 1.
   919  		t.Errorf("DeepEqual(re1, re2) = false, want true")
   920  	}
   921  
   922  	re1.MatchString("abcdefghijklmn")
   923  	if !reflect.DeepEqual(re1, re2) {
   924  		t.Errorf("DeepEqual(re1, re2) = false, want true")
   925  	}
   926  
   927  	re2.MatchString("abcdefghijklmn")
   928  	if !reflect.DeepEqual(re1, re2) {
   929  		t.Errorf("DeepEqual(re1, re2) = false, want true")
   930  	}
   931  
   932  	re2.MatchString(strings.Repeat("abcdefghijklmn", 100))
   933  	if !reflect.DeepEqual(re1, re2) {
   934  		t.Errorf("DeepEqual(re1, re2) = false, want true")
   935  	}
   936  }
   937  
   938  var minInputLenTests = []struct {
   939  	Regexp string
   940  	min    int
   941  }{
   942  	{``, 0},
   943  	{`a`, 1},
   944  	{`aa`, 2},
   945  	{`(aa)a`, 3},
   946  	{`(?:aa)a`, 3},
   947  	{`a?a`, 1},
   948  	{`(aaa)|(aa)`, 2},
   949  	{`(aa)+a`, 3},
   950  	{`(aa)*a`, 1},
   951  	{`(aa){3,5}`, 6},
   952  	{`[a-z]`, 1},
   953  	{`日`, 3},
   954  }
   955  
   956  func TestMinInputLen(t *testing.T) {
   957  	for _, tt := range minInputLenTests {
   958  		re, _ := syntax.Parse(tt.Regexp, syntax.Perl)
   959  		m := minInputLen(re)
   960  		if m != tt.min {
   961  			t.Errorf("regexp %#q has minInputLen %d, should be %d", tt.Regexp, m, tt.min)
   962  		}
   963  	}
   964  }
   965  
   966  func TestUnmarshalText(t *testing.T) {
   967  	unmarshaled := new(Regexp)
   968  	for i := range goodRe {
   969  		re := compileTest(t, goodRe[i], "")
   970  		marshaled, err := re.MarshalText()
   971  		if err != nil {
   972  			t.Errorf("regexp %#q failed to marshal: %s", re, err)
   973  			continue
   974  		}
   975  		if err := unmarshaled.UnmarshalText(marshaled); err != nil {
   976  			t.Errorf("regexp %#q failed to unmarshal: %s", re, err)
   977  			continue
   978  		}
   979  		if unmarshaled.String() != goodRe[i] {
   980  			t.Errorf("UnmarshalText returned unexpected value: %s", unmarshaled.String())
   981  		}
   982  
   983  		buf := make([]byte, 4, 32)
   984  		marshalAppend, err := re.AppendText(buf)
   985  		if err != nil {
   986  			t.Errorf("regexp %#q failed to marshal: %s", re, err)
   987  			continue
   988  		}
   989  		marshalAppend = marshalAppend[4:]
   990  		if err := unmarshaled.UnmarshalText(marshalAppend); err != nil {
   991  			t.Errorf("regexp %#q failed to unmarshal: %s", re, err)
   992  			continue
   993  		}
   994  		if unmarshaled.String() != goodRe[i] {
   995  			t.Errorf("UnmarshalText returned unexpected value: %s", unmarshaled.String())
   996  		}
   997  	}
   998  	t.Run("invalid pattern", func(t *testing.T) {
   999  		re := new(Regexp)
  1000  		err := re.UnmarshalText([]byte(`\`))
  1001  		if err == nil {
  1002  			t.Error("unexpected success")
  1003  		}
  1004  	})
  1005  }
  1006  

View as plain text