Source file src/regexp/all_test.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package regexp
     6  
     7  import (
     8  	"reflect"
     9  	"regexp/syntax"
    10  	"slices"
    11  	"strings"
    12  	"testing"
    13  	"unicode/utf8"
    14  )
    15  
    16  var goodRe = []string{
    17  	``,
    18  	`.`,
    19  	`^.$`,
    20  	`a`,
    21  	`a*`,
    22  	`a+`,
    23  	`a?`,
    24  	`a|b`,
    25  	`a*|b*`,
    26  	`(a*|b)(c*|d)`,
    27  	`[a-z]`,
    28  	`[a-abc-c\-\]\[]`,
    29  	`[a-z]+`,
    30  	`[abc]`,
    31  	`[^1234]`,
    32  	`[^\n]`,
    33  	`\!\\`,
    34  }
    35  
    36  type stringError struct {
    37  	re  string
    38  	err string
    39  }
    40  
    41  var badRe = []stringError{
    42  	{`*`, "missing argument to repetition operator: `*`"},
    43  	{`+`, "missing argument to repetition operator: `+`"},
    44  	{`?`, "missing argument to repetition operator: `?`"},
    45  	{`(abc`, "missing closing ): `(abc`"},
    46  	{`abc)`, "unexpected ): `abc)`"},
    47  	{`x[a-z`, "missing closing ]: `[a-z`"},
    48  	{`[z-a]`, "invalid character class range: `z-a`"},
    49  	{`abc\`, "trailing backslash at end of expression"},
    50  	{`a**`, "invalid nested repetition operator: `**`"},
    51  	{`a*+`, "invalid nested repetition operator: `*+`"},
    52  	{`\x`, "invalid escape sequence: `\\x`"},
    53  	{strings.Repeat(`\pL`, 27000), "expression too large"},
    54  }
    55  
    56  func compileTest(t *testing.T, expr string, error string) *Regexp {
    57  	re, err := Compile(expr)
    58  	if error == "" && err != nil {
    59  		t.Error("compiling `", expr, "`; unexpected error: ", err.Error())
    60  	}
    61  	if error != "" && err == nil {
    62  		t.Error("compiling `", expr, "`; missing error")
    63  	} else if error != "" && !strings.Contains(err.Error(), error) {
    64  		t.Error("compiling `", expr, "`; wrong error: ", err.Error(), "; want ", error)
    65  	}
    66  	return re
    67  }
    68  
    69  func TestGoodCompile(t *testing.T) {
    70  	for i := 0; i < len(goodRe); i++ {
    71  		compileTest(t, goodRe[i], "")
    72  	}
    73  }
    74  
    75  func TestBadCompile(t *testing.T) {
    76  	for i := 0; i < len(badRe); i++ {
    77  		compileTest(t, badRe[i].re, badRe[i].err)
    78  	}
    79  }
    80  
    81  func matchTest(t *testing.T, test *FindTest) {
    82  	re := compileTest(t, test.pat, "")
    83  	if re == nil {
    84  		return
    85  	}
    86  	m := re.MatchString(test.text)
    87  	if m != (len(test.matches) > 0) {
    88  		t.Errorf("MatchString failure on %s: %t should be %t", test, m, len(test.matches) > 0)
    89  	}
    90  	// now try bytes
    91  	m = re.Match([]byte(test.text))
    92  	if m != (len(test.matches) > 0) {
    93  		t.Errorf("Match failure on %s: %t should be %t", test, m, len(test.matches) > 0)
    94  	}
    95  }
    96  
    97  func TestMatch(t *testing.T) {
    98  	for _, test := range findTests {
    99  		matchTest(t, &test)
   100  	}
   101  }
   102  
   103  func matchFunctionTest(t *testing.T, test *FindTest) {
   104  	m, err := MatchString(test.pat, test.text)
   105  	if err == nil {
   106  		return
   107  	}
   108  	if m != (len(test.matches) > 0) {
   109  		t.Errorf("Match failure on %s: %t should be %t", test, m, len(test.matches) > 0)
   110  	}
   111  }
   112  
   113  func TestMatchFunction(t *testing.T) {
   114  	for _, test := range findTests {
   115  		matchFunctionTest(t, &test)
   116  	}
   117  }
   118  
   119  func copyMatchTest(t *testing.T, test *FindTest) {
   120  	re := compileTest(t, test.pat, "")
   121  	if re == nil {
   122  		return
   123  	}
   124  	m1 := re.MatchString(test.text)
   125  	m2 := re.Copy().MatchString(test.text)
   126  	if m1 != m2 {
   127  		t.Errorf("Copied Regexp match failure on %s: original gave %t; copy gave %t; should be %t",
   128  			test, m1, m2, len(test.matches) > 0)
   129  	}
   130  }
   131  
   132  func TestCopyMatch(t *testing.T) {
   133  	for _, test := range findTests {
   134  		copyMatchTest(t, &test)
   135  	}
   136  }
   137  
   138  type ReplaceTest struct {
   139  	pattern, replacement, input, output string
   140  }
   141  
   142  var replaceTests = []ReplaceTest{
   143  	// Test empty input and/or replacement, with pattern that matches the empty string.
   144  	{"", "", "", ""},
   145  	{"", "x", "", "x"},
   146  	{"", "", "abc", "abc"},
   147  	{"", "x", "abc", "xaxbxcx"},
   148  
   149  	// Test empty input and/or replacement, with pattern that does not match the empty string.
   150  	{"b", "", "", ""},
   151  	{"b", "x", "", ""},
   152  	{"b", "", "abc", "ac"},
   153  	{"b", "x", "abc", "axc"},
   154  	{"y", "", "", ""},
   155  	{"y", "x", "", ""},
   156  	{"y", "", "abc", "abc"},
   157  	{"y", "x", "abc", "abc"},
   158  
   159  	// Multibyte characters -- verify that we don't try to match in the middle
   160  	// of a character.
   161  	{"[a-c]*", "x", "\u65e5", "x\u65e5x"},
   162  	{"[^\u65e5]", "x", "abc\u65e5def", "xxx\u65e5xxx"},
   163  
   164  	// Start and end of a string.
   165  	{"^[a-c]*", "x", "abcdabc", "xdabc"},
   166  	{"[a-c]*$", "x", "abcdabc", "abcdx"},
   167  	{"^[a-c]*$", "x", "abcdabc", "abcdabc"},
   168  	{"^[a-c]*", "x", "abc", "x"},
   169  	{"[a-c]*$", "x", "abc", "x"},
   170  	{"^[a-c]*$", "x", "abc", "x"},
   171  	{"^[a-c]*", "x", "dabce", "xdabce"},
   172  	{"[a-c]*$", "x", "dabce", "dabcex"},
   173  	{"^[a-c]*$", "x", "dabce", "dabce"},
   174  	{"^[a-c]*", "x", "", "x"},
   175  	{"[a-c]*$", "x", "", "x"},
   176  	{"^[a-c]*$", "x", "", "x"},
   177  
   178  	{"^[a-c]+", "x", "abcdabc", "xdabc"},
   179  	{"[a-c]+$", "x", "abcdabc", "abcdx"},
   180  	{"^[a-c]+$", "x", "abcdabc", "abcdabc"},
   181  	{"^[a-c]+", "x", "abc", "x"},
   182  	{"[a-c]+$", "x", "abc", "x"},
   183  	{"^[a-c]+$", "x", "abc", "x"},
   184  	{"^[a-c]+", "x", "dabce", "dabce"},
   185  	{"[a-c]+$", "x", "dabce", "dabce"},
   186  	{"^[a-c]+$", "x", "dabce", "dabce"},
   187  	{"^[a-c]+", "x", "", ""},
   188  	{"[a-c]+$", "x", "", ""},
   189  	{"^[a-c]+$", "x", "", ""},
   190  
   191  	// Other cases.
   192  	{"abc", "def", "abcdefg", "defdefg"},
   193  	{"bc", "BC", "abcbcdcdedef", "aBCBCdcdedef"},
   194  	{"abc", "", "abcdabc", "d"},
   195  	{"x", "xXx", "xxxXxxx", "xXxxXxxXxXxXxxXxxXx"},
   196  	{"abc", "d", "", ""},
   197  	{"abc", "d", "abc", "d"},
   198  	{".+", "x", "abc", "x"},
   199  	{"[a-c]*", "x", "def", "xdxexfx"},
   200  	{"[a-c]+", "x", "abcbcdcdedef", "xdxdedef"},
   201  	{"[a-c]*", "x", "abcbcdcdedef", "xdxdxexdxexfx"},
   202  
   203  	// Substitutions
   204  	{"a+", "($0)", "banana", "b(a)n(a)n(a)"},
   205  	{"a+", "(${0})", "banana", "b(a)n(a)n(a)"},
   206  	{"a+", "(${0})$0", "banana", "b(a)an(a)an(a)a"},
   207  	{"a+", "(${0})$0", "banana", "b(a)an(a)an(a)a"},
   208  	{"hello, (.+)", "goodbye, ${1}", "hello, world", "goodbye, world"},
   209  	{"hello, (.+)", "goodbye, $1x", "hello, world", "goodbye, "},
   210  	{"hello, (.+)", "goodbye, ${1}x", "hello, world", "goodbye, worldx"},
   211  	{"hello, (.+)", "<$0><$1><$2><$3>", "hello, world", "<hello, world><world><><>"},
   212  	{"hello, (?P<noun>.+)", "goodbye, $noun!", "hello, world", "goodbye, world!"},
   213  	{"hello, (?P<noun>.+)", "goodbye, ${noun}", "hello, world", "goodbye, world"},
   214  	{"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "hi", "hihihi"},
   215  	{"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "bye", "byebyebye"},
   216  	{"(?P<x>hi)|(?P<x>bye)", "$xyz", "hi", ""},
   217  	{"(?P<x>hi)|(?P<x>bye)", "${x}yz", "hi", "hiyz"},
   218  	{"(?P<x>hi)|(?P<x>bye)", "hello $$x", "hi", "hello $x"},
   219  	{"a+", "${oops", "aaa", "${oops"},
   220  	{"a+", "$$", "aaa", "$"},
   221  	{"a+", "$", "aaa", "$"},
   222  
   223  	// Substitution when subexpression isn't found
   224  	{"(x)?", "$1", "123", "123"},
   225  	{"abc", "$1", "123", "123"},
   226  
   227  	// Substitutions involving a (x){0}
   228  	{"(a)(b){0}(c)", ".$1|$3.", "xacxacx", "x.a|c.x.a|c.x"},
   229  	{"(a)(((b))){0}c", ".$1.", "xacxacx", "x.a.x.a.x"},
   230  	{"((a(b){0}){3}){5}(h)", "y caramb$2", "say aaaaaaaaaaaaaaaah", "say ay caramba"},
   231  	{"((a(b){0}){3}){5}h", "y caramb$2", "say aaaaaaaaaaaaaaaah", "say ay caramba"},
   232  }
   233  
   234  var replaceLiteralTests = []ReplaceTest{
   235  	// Substitutions
   236  	{"a+", "($0)", "banana", "b($0)n($0)n($0)"},
   237  	{"a+", "(${0})", "banana", "b(${0})n(${0})n(${0})"},
   238  	{"a+", "(${0})$0", "banana", "b(${0})$0n(${0})$0n(${0})$0"},
   239  	{"a+", "(${0})$0", "banana", "b(${0})$0n(${0})$0n(${0})$0"},
   240  	{"hello, (.+)", "goodbye, ${1}", "hello, world", "goodbye, ${1}"},
   241  	{"hello, (?P<noun>.+)", "goodbye, $noun!", "hello, world", "goodbye, $noun!"},
   242  	{"hello, (?P<noun>.+)", "goodbye, ${noun}", "hello, world", "goodbye, ${noun}"},
   243  	{"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "hi", "$x$x$x"},
   244  	{"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "bye", "$x$x$x"},
   245  	{"(?P<x>hi)|(?P<x>bye)", "$xyz", "hi", "$xyz"},
   246  	{"(?P<x>hi)|(?P<x>bye)", "${x}yz", "hi", "${x}yz"},
   247  	{"(?P<x>hi)|(?P<x>bye)", "hello $$x", "hi", "hello $$x"},
   248  	{"a+", "${oops", "aaa", "${oops"},
   249  	{"a+", "$$", "aaa", "$$"},
   250  	{"a+", "$", "aaa", "$"},
   251  }
   252  
   253  type ReplaceFuncTest struct {
   254  	pattern       string
   255  	replacement   func(string) string
   256  	input, output string
   257  }
   258  
   259  var replaceFuncTests = []ReplaceFuncTest{
   260  	{"[a-c]", func(s string) string { return "x" + s + "y" }, "defabcdef", "defxayxbyxcydef"},
   261  	{"[a-c]+", func(s string) string { return "x" + s + "y" }, "defabcdef", "defxabcydef"},
   262  	{"[a-c]*", func(s string) string { return "x" + s + "y" }, "defabcdef", "xydxyexyfxabcydxyexyfxy"},
   263  }
   264  
   265  func TestReplaceAll(t *testing.T) {
   266  	for _, tc := range replaceTests {
   267  		re, err := Compile(tc.pattern)
   268  		if err != nil {
   269  			t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err)
   270  			continue
   271  		}
   272  		actual := re.ReplaceAllString(tc.input, tc.replacement)
   273  		if actual != tc.output {
   274  			t.Errorf("%q.ReplaceAllString(%q,%q) = %q; want %q",
   275  				tc.pattern, tc.input, tc.replacement, actual, tc.output)
   276  		}
   277  		// now try bytes
   278  		actual = string(re.ReplaceAll([]byte(tc.input), []byte(tc.replacement)))
   279  		if actual != tc.output {
   280  			t.Errorf("%q.ReplaceAll(%q,%q) = %q; want %q",
   281  				tc.pattern, tc.input, tc.replacement, actual, tc.output)
   282  		}
   283  	}
   284  }
   285  
   286  func TestReplaceAllLiteral(t *testing.T) {
   287  	// Run ReplaceAll tests that do not have $ expansions.
   288  	for _, tc := range replaceTests {
   289  		if strings.Contains(tc.replacement, "$") {
   290  			continue
   291  		}
   292  		re, err := Compile(tc.pattern)
   293  		if err != nil {
   294  			t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err)
   295  			continue
   296  		}
   297  		actual := re.ReplaceAllLiteralString(tc.input, tc.replacement)
   298  		if actual != tc.output {
   299  			t.Errorf("%q.ReplaceAllLiteralString(%q,%q) = %q; want %q",
   300  				tc.pattern, tc.input, tc.replacement, actual, tc.output)
   301  		}
   302  		// now try bytes
   303  		actual = string(re.ReplaceAllLiteral([]byte(tc.input), []byte(tc.replacement)))
   304  		if actual != tc.output {
   305  			t.Errorf("%q.ReplaceAllLiteral(%q,%q) = %q; want %q",
   306  				tc.pattern, tc.input, tc.replacement, actual, tc.output)
   307  		}
   308  	}
   309  
   310  	// Run literal-specific tests.
   311  	for _, tc := range replaceLiteralTests {
   312  		re, err := Compile(tc.pattern)
   313  		if err != nil {
   314  			t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err)
   315  			continue
   316  		}
   317  		actual := re.ReplaceAllLiteralString(tc.input, tc.replacement)
   318  		if actual != tc.output {
   319  			t.Errorf("%q.ReplaceAllLiteralString(%q,%q) = %q; want %q",
   320  				tc.pattern, tc.input, tc.replacement, actual, tc.output)
   321  		}
   322  		// now try bytes
   323  		actual = string(re.ReplaceAllLiteral([]byte(tc.input), []byte(tc.replacement)))
   324  		if actual != tc.output {
   325  			t.Errorf("%q.ReplaceAllLiteral(%q,%q) = %q; want %q",
   326  				tc.pattern, tc.input, tc.replacement, actual, tc.output)
   327  		}
   328  	}
   329  }
   330  
   331  func TestReplaceAllFunc(t *testing.T) {
   332  	for _, tc := range replaceFuncTests {
   333  		re, err := Compile(tc.pattern)
   334  		if err != nil {
   335  			t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err)
   336  			continue
   337  		}
   338  		actual := re.ReplaceAllStringFunc(tc.input, tc.replacement)
   339  		if actual != tc.output {
   340  			t.Errorf("%q.ReplaceFunc(%q,fn) = %q; want %q",
   341  				tc.pattern, tc.input, actual, tc.output)
   342  		}
   343  		// now try bytes
   344  		actual = string(re.ReplaceAllFunc([]byte(tc.input), func(s []byte) []byte { return []byte(tc.replacement(string(s))) }))
   345  		if actual != tc.output {
   346  			t.Errorf("%q.ReplaceFunc(%q,fn) = %q; want %q",
   347  				tc.pattern, tc.input, actual, tc.output)
   348  		}
   349  	}
   350  }
   351  
   352  type MetaTest struct {
   353  	pattern, output, literal string
   354  	isLiteral                bool
   355  }
   356  
   357  var metaTests = []MetaTest{
   358  	{``, ``, ``, true},
   359  	{`foo`, `foo`, `foo`, true},
   360  	{`日本語+`, `日本語\+`, `日本語`, false},
   361  	{`foo\.\$`, `foo\\\.\\\$`, `foo.$`, true}, // has meta but no operator
   362  	{`foo.\$`, `foo\.\\\$`, `foo`, false},     // has escaped operators and real operators
   363  	{`!@#$%^&*()_+-=[{]}\|,<.>/?~`, `!@#\$%\^&\*\(\)_\+-=\[\{\]\}\\\|,<\.>/\?~`, `!@#`, false},
   364  }
   365  
   366  var literalPrefixTests = []MetaTest{
   367  	// See golang.org/issue/11175.
   368  	// output is unused.
   369  	{`^0^0$`, ``, `0`, false},
   370  	{`^0^`, ``, ``, false},
   371  	{`^0$`, ``, `0`, true},
   372  	{`$0^`, ``, ``, false},
   373  	{`$0$`, ``, ``, false},
   374  	{`^^0$$`, ``, ``, false},
   375  	{`^$^$`, ``, ``, false},
   376  	{`$$0^^`, ``, ``, false},
   377  	{`a\x{fffd}b`, ``, `a`, false},
   378  	{`\x{fffd}b`, ``, ``, false},
   379  	{"\ufffd", ``, ``, false},
   380  }
   381  
   382  func TestQuoteMeta(t *testing.T) {
   383  	for _, tc := range metaTests {
   384  		// Verify that QuoteMeta returns the expected string.
   385  		quoted := QuoteMeta(tc.pattern)
   386  		if quoted != tc.output {
   387  			t.Errorf("QuoteMeta(`%s`) = `%s`; want `%s`",
   388  				tc.pattern, quoted, tc.output)
   389  			continue
   390  		}
   391  
   392  		// Verify that the quoted string is in fact treated as expected
   393  		// by Compile -- i.e. that it matches the original, unquoted string.
   394  		if tc.pattern != "" {
   395  			re, err := Compile(quoted)
   396  			if err != nil {
   397  				t.Errorf("Unexpected error compiling QuoteMeta(`%s`): %v", tc.pattern, err)
   398  				continue
   399  			}
   400  			src := "abc" + tc.pattern + "def"
   401  			repl := "xyz"
   402  			replaced := re.ReplaceAllString(src, repl)
   403  			expected := "abcxyzdef"
   404  			if replaced != expected {
   405  				t.Errorf("QuoteMeta(`%s`).Replace(`%s`,`%s`) = `%s`; want `%s`",
   406  					tc.pattern, src, repl, replaced, expected)
   407  			}
   408  		}
   409  	}
   410  }
   411  
   412  func TestLiteralPrefix(t *testing.T) {
   413  	for _, tc := range append(metaTests, literalPrefixTests...) {
   414  		// Literal method needs to scan the pattern.
   415  		re := MustCompile(tc.pattern)
   416  		str, complete := re.LiteralPrefix()
   417  		if complete != tc.isLiteral {
   418  			t.Errorf("LiteralPrefix(`%s`) = %t; want %t", tc.pattern, complete, tc.isLiteral)
   419  		}
   420  		if str != tc.literal {
   421  			t.Errorf("LiteralPrefix(`%s`) = `%s`; want `%s`", tc.pattern, str, tc.literal)
   422  		}
   423  	}
   424  }
   425  
   426  type subexpIndex struct {
   427  	name  string
   428  	index int
   429  }
   430  
   431  type subexpCase struct {
   432  	input   string
   433  	num     int
   434  	names   []string
   435  	indices []subexpIndex
   436  }
   437  
   438  var emptySubexpIndices = []subexpIndex{{"", -1}, {"missing", -1}}
   439  
   440  var subexpCases = []subexpCase{
   441  	{``, 0, nil, emptySubexpIndices},
   442  	{`.*`, 0, nil, emptySubexpIndices},
   443  	{`abba`, 0, nil, emptySubexpIndices},
   444  	{`ab(b)a`, 1, []string{"", ""}, emptySubexpIndices},
   445  	{`ab(.*)a`, 1, []string{"", ""}, emptySubexpIndices},
   446  	{`(.*)ab(.*)a`, 2, []string{"", "", ""}, emptySubexpIndices},
   447  	{`(.*)(ab)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices},
   448  	{`(.*)((a)b)(.*)a`, 4, []string{"", "", "", "", ""}, emptySubexpIndices},
   449  	{`(.*)(\(ab)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices},
   450  	{`(.*)(\(a\)b)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices},
   451  	{`(?P<foo>.*)(?P<bar>(a)b)(?P<foo>.*)a`, 4, []string{"", "foo", "bar", "", "foo"}, []subexpIndex{{"", -1}, {"missing", -1}, {"foo", 1}, {"bar", 2}}},
   452  }
   453  
   454  func TestSubexp(t *testing.T) {
   455  	for _, c := range subexpCases {
   456  		re := MustCompile(c.input)
   457  		n := re.NumSubexp()
   458  		if n != c.num {
   459  			t.Errorf("%q: NumSubexp = %d, want %d", c.input, n, c.num)
   460  			continue
   461  		}
   462  		names := re.SubexpNames()
   463  		if len(names) != 1+n {
   464  			t.Errorf("%q: len(SubexpNames) = %d, want %d", c.input, len(names), n)
   465  			continue
   466  		}
   467  		if c.names != nil {
   468  			for i := 0; i < 1+n; i++ {
   469  				if names[i] != c.names[i] {
   470  					t.Errorf("%q: SubexpNames[%d] = %q, want %q", c.input, i, names[i], c.names[i])
   471  				}
   472  			}
   473  		}
   474  		for _, subexp := range c.indices {
   475  			index := re.SubexpIndex(subexp.name)
   476  			if index != subexp.index {
   477  				t.Errorf("%q: SubexpIndex(%q) = %d, want %d", c.input, subexp.name, index, subexp.index)
   478  			}
   479  		}
   480  	}
   481  }
   482  
   483  var splitTests = []struct {
   484  	s   string
   485  	r   string
   486  	n   int
   487  	out []string
   488  }{
   489  	{"foo:and:bar", ":", -1, []string{"foo", "and", "bar"}},
   490  	{"foo:and:bar", ":", 1, []string{"foo:and:bar"}},
   491  	{"foo:and:bar", ":", 2, []string{"foo", "and:bar"}},
   492  	{"foo:and:bar", "foo", -1, []string{"", ":and:bar"}},
   493  	{"foo:and:bar", "bar", -1, []string{"foo:and:", ""}},
   494  	{"foo:and:bar", "baz", -1, []string{"foo:and:bar"}},
   495  	{"baabaab", "a", -1, []string{"b", "", "b", "", "b"}},
   496  	{"baabaab", "a*", -1, []string{"b", "b", "b"}},
   497  	{"baabaab", "ba*", -1, []string{"", "", "", ""}},
   498  	{"foobar", "f*b*", -1, []string{"", "o", "o", "a", "r"}},
   499  	{"foobar", "f+.*b+", -1, []string{"", "ar"}},
   500  	{"foobooboar", "o{2}", -1, []string{"f", "b", "boar"}},
   501  	{"a,b,c,d,e,f", ",", 3, []string{"a", "b", "c,d,e,f"}},
   502  	{"a,b,c,d,e,f", ",", 0, nil},
   503  	{",", ",", -1, []string{"", ""}},
   504  	{",,,", ",", -1, []string{"", "", "", ""}},
   505  	{"", ",", -1, []string{""}},
   506  	{"", ".*", -1, []string{""}},
   507  	{"", ".+", -1, []string{""}},
   508  	{"", "", -1, []string{}},
   509  	{"foobar", "", -1, []string{"f", "o", "o", "b", "a", "r"}},
   510  	{"abaabaccadaaae", "a*", 5, []string{"", "b", "b", "c", "cadaaae"}},
   511  	{":x:y:z:", ":", -1, []string{"", "x", "y", "z", ""}},
   512  }
   513  
   514  func TestSplit(t *testing.T) {
   515  	for i, test := range splitTests {
   516  		re, err := Compile(test.r)
   517  		if err != nil {
   518  			t.Errorf("#%d: %q: compile error: %s", i, test.r, err.Error())
   519  			continue
   520  		}
   521  
   522  		split := re.Split(test.s, test.n)
   523  		if !slices.Equal(split, test.out) {
   524  			t.Errorf("#%d: %q: got %q; want %q", i, test.r, split, test.out)
   525  		}
   526  
   527  		if QuoteMeta(test.r) == test.r {
   528  			strsplit := strings.SplitN(test.s, test.r, test.n)
   529  			if !slices.Equal(split, strsplit) {
   530  				t.Errorf("#%d: Split(%q, %q, %d): regexp vs strings mismatch\nregexp=%q\nstrings=%q", i, test.s, test.r, test.n, split, strsplit)
   531  			}
   532  		}
   533  	}
   534  }
   535  
   536  // The following sequence of Match calls used to panic. See issue #12980.
   537  func TestParseAndCompile(t *testing.T) {
   538  	expr := "a$"
   539  	s := "a\nb"
   540  
   541  	for i, tc := range []struct {
   542  		reFlags  syntax.Flags
   543  		expMatch bool
   544  	}{
   545  		{syntax.Perl | syntax.OneLine, false},
   546  		{syntax.Perl &^ syntax.OneLine, true},
   547  	} {
   548  		parsed, err := syntax.Parse(expr, tc.reFlags)
   549  		if err != nil {
   550  			t.Fatalf("%d: parse: %v", i, err)
   551  		}
   552  		re, err := Compile(parsed.String())
   553  		if err != nil {
   554  			t.Fatalf("%d: compile: %v", i, err)
   555  		}
   556  		if match := re.MatchString(s); match != tc.expMatch {
   557  			t.Errorf("%d: %q.MatchString(%q)=%t; expected=%t", i, re, s, match, tc.expMatch)
   558  		}
   559  	}
   560  }
   561  
   562  // Check that one-pass cutoff does trigger.
   563  func TestOnePassCutoff(t *testing.T) {
   564  	re, err := syntax.Parse(`^x{1,1000}y{1,1000}$`, syntax.Perl)
   565  	if err != nil {
   566  		t.Fatalf("parse: %v", err)
   567  	}
   568  	p, err := syntax.Compile(re.Simplify())
   569  	if err != nil {
   570  		t.Fatalf("compile: %v", err)
   571  	}
   572  	if compileOnePass(p) != nil {
   573  		t.Fatalf("makeOnePass succeeded; wanted nil")
   574  	}
   575  }
   576  
   577  // Check that the same machine can be used with the standard matcher
   578  // and then the backtracker when there are no captures.
   579  func TestSwitchBacktrack(t *testing.T) {
   580  	re := MustCompile(`a|b`)
   581  	long := make([]byte, maxBacktrackVector+1)
   582  
   583  	// The following sequence of Match calls used to panic. See issue #10319.
   584  	re.Match(long)     // triggers standard matcher
   585  	re.Match(long[:1]) // triggers backtracker
   586  }
   587  
   588  func BenchmarkFind(b *testing.B) {
   589  	b.StopTimer()
   590  	re := MustCompile("a+b+")
   591  	wantSubs := "aaabb"
   592  	s := []byte("acbb" + wantSubs + "dd")
   593  	b.StartTimer()
   594  	b.ReportAllocs()
   595  	for i := 0; i < b.N; i++ {
   596  		subs := re.Find(s)
   597  		if string(subs) != wantSubs {
   598  			b.Fatalf("Find(%q) = %q; want %q", s, subs, wantSubs)
   599  		}
   600  	}
   601  }
   602  
   603  func BenchmarkFindAllNoMatches(b *testing.B) {
   604  	re := MustCompile("a+b+")
   605  	s := []byte("acddee")
   606  	b.ReportAllocs()
   607  	b.ResetTimer()
   608  	for i := 0; i < b.N; i++ {
   609  		all := re.FindAll(s, -1)
   610  		if all != nil {
   611  			b.Fatalf("FindAll(%q) = %q; want nil", s, all)
   612  		}
   613  	}
   614  }
   615  
   616  func BenchmarkFindString(b *testing.B) {
   617  	b.StopTimer()
   618  	re := MustCompile("a+b+")
   619  	wantSubs := "aaabb"
   620  	s := "acbb" + wantSubs + "dd"
   621  	b.StartTimer()
   622  	b.ReportAllocs()
   623  	for i := 0; i < b.N; i++ {
   624  		subs := re.FindString(s)
   625  		if subs != wantSubs {
   626  			b.Fatalf("FindString(%q) = %q; want %q", s, subs, wantSubs)
   627  		}
   628  	}
   629  }
   630  
   631  func BenchmarkFindSubmatch(b *testing.B) {
   632  	b.StopTimer()
   633  	re := MustCompile("a(a+b+)b")
   634  	wantSubs := "aaabb"
   635  	s := []byte("acbb" + wantSubs + "dd")
   636  	b.StartTimer()
   637  	b.ReportAllocs()
   638  	for i := 0; i < b.N; i++ {
   639  		subs := re.FindSubmatch(s)
   640  		if string(subs[0]) != wantSubs {
   641  			b.Fatalf("FindSubmatch(%q)[0] = %q; want %q", s, subs[0], wantSubs)
   642  		}
   643  		if string(subs[1]) != "aab" {
   644  			b.Fatalf("FindSubmatch(%q)[1] = %q; want %q", s, subs[1], "aab")
   645  		}
   646  	}
   647  }
   648  
   649  func BenchmarkFindStringSubmatch(b *testing.B) {
   650  	b.StopTimer()
   651  	re := MustCompile("a(a+b+)b")
   652  	wantSubs := "aaabb"
   653  	s := "acbb" + wantSubs + "dd"
   654  	b.StartTimer()
   655  	b.ReportAllocs()
   656  	for i := 0; i < b.N; i++ {
   657  		subs := re.FindStringSubmatch(s)
   658  		if subs[0] != wantSubs {
   659  			b.Fatalf("FindStringSubmatch(%q)[0] = %q; want %q", s, subs[0], wantSubs)
   660  		}
   661  		if subs[1] != "aab" {
   662  			b.Fatalf("FindStringSubmatch(%q)[1] = %q; want %q", s, subs[1], "aab")
   663  		}
   664  	}
   665  }
   666  
   667  func BenchmarkLiteral(b *testing.B) {
   668  	x := strings.Repeat("x", 50) + "y"
   669  	b.StopTimer()
   670  	re := MustCompile("y")
   671  	b.StartTimer()
   672  	for i := 0; i < b.N; i++ {
   673  		if !re.MatchString(x) {
   674  			b.Fatalf("no match!")
   675  		}
   676  	}
   677  }
   678  
   679  func BenchmarkNotLiteral(b *testing.B) {
   680  	x := strings.Repeat("x", 50) + "y"
   681  	b.StopTimer()
   682  	re := MustCompile(".y")
   683  	b.StartTimer()
   684  	for i := 0; i < b.N; i++ {
   685  		if !re.MatchString(x) {
   686  			b.Fatalf("no match!")
   687  		}
   688  	}
   689  }
   690  
   691  func BenchmarkMatchClass(b *testing.B) {
   692  	b.StopTimer()
   693  	x := strings.Repeat("xxxx", 20) + "w"
   694  	re := MustCompile("[abcdw]")
   695  	b.StartTimer()
   696  	for i := 0; i < b.N; i++ {
   697  		if !re.MatchString(x) {
   698  			b.Fatalf("no match!")
   699  		}
   700  	}
   701  }
   702  
   703  func BenchmarkMatchClass_InRange(b *testing.B) {
   704  	b.StopTimer()
   705  	// 'b' is between 'a' and 'c', so the charclass
   706  	// range checking is no help here.
   707  	x := strings.Repeat("bbbb", 20) + "c"
   708  	re := MustCompile("[ac]")
   709  	b.StartTimer()
   710  	for i := 0; i < b.N; i++ {
   711  		if !re.MatchString(x) {
   712  			b.Fatalf("no match!")
   713  		}
   714  	}
   715  }
   716  
   717  func BenchmarkReplaceAll(b *testing.B) {
   718  	x := "abcdefghijklmnopqrstuvwxyz"
   719  	b.StopTimer()
   720  	re := MustCompile("[cjrw]")
   721  	b.StartTimer()
   722  	for i := 0; i < b.N; i++ {
   723  		re.ReplaceAllString(x, "")
   724  	}
   725  }
   726  
   727  func BenchmarkAnchoredLiteralShortNonMatch(b *testing.B) {
   728  	b.StopTimer()
   729  	x := []byte("abcdefghijklmnopqrstuvwxyz")
   730  	re := MustCompile("^zbc(d|e)")
   731  	b.StartTimer()
   732  	for i := 0; i < b.N; i++ {
   733  		re.Match(x)
   734  	}
   735  }
   736  
   737  func BenchmarkAnchoredLiteralLongNonMatch(b *testing.B) {
   738  	b.StopTimer()
   739  	x := []byte("abcdefghijklmnopqrstuvwxyz")
   740  	for i := 0; i < 15; i++ {
   741  		x = append(x, x...)
   742  	}
   743  	re := MustCompile("^zbc(d|e)")
   744  	b.StartTimer()
   745  	for i := 0; i < b.N; i++ {
   746  		re.Match(x)
   747  	}
   748  }
   749  
   750  func BenchmarkAnchoredShortMatch(b *testing.B) {
   751  	b.StopTimer()
   752  	x := []byte("abcdefghijklmnopqrstuvwxyz")
   753  	re := MustCompile("^.bc(d|e)")
   754  	b.StartTimer()
   755  	for i := 0; i < b.N; i++ {
   756  		re.Match(x)
   757  	}
   758  }
   759  
   760  func BenchmarkAnchoredLongMatch(b *testing.B) {
   761  	b.StopTimer()
   762  	x := []byte("abcdefghijklmnopqrstuvwxyz")
   763  	for i := 0; i < 15; i++ {
   764  		x = append(x, x...)
   765  	}
   766  	re := MustCompile("^.bc(d|e)")
   767  	b.StartTimer()
   768  	for i := 0; i < b.N; i++ {
   769  		re.Match(x)
   770  	}
   771  }
   772  
   773  func BenchmarkOnePassShortA(b *testing.B) {
   774  	b.StopTimer()
   775  	x := []byte("abcddddddeeeededd")
   776  	re := MustCompile("^.bc(d|e)*$")
   777  	b.StartTimer()
   778  	for i := 0; i < b.N; i++ {
   779  		re.Match(x)
   780  	}
   781  }
   782  
   783  func BenchmarkNotOnePassShortA(b *testing.B) {
   784  	b.StopTimer()
   785  	x := []byte("abcddddddeeeededd")
   786  	re := MustCompile(".bc(d|e)*$")
   787  	b.StartTimer()
   788  	for i := 0; i < b.N; i++ {
   789  		re.Match(x)
   790  	}
   791  }
   792  
   793  func BenchmarkOnePassShortB(b *testing.B) {
   794  	b.StopTimer()
   795  	x := []byte("abcddddddeeeededd")
   796  	re := MustCompile("^.bc(?:d|e)*$")
   797  	b.StartTimer()
   798  	for i := 0; i < b.N; i++ {
   799  		re.Match(x)
   800  	}
   801  }
   802  
   803  func BenchmarkNotOnePassShortB(b *testing.B) {
   804  	b.StopTimer()
   805  	x := []byte("abcddddddeeeededd")
   806  	re := MustCompile(".bc(?:d|e)*$")
   807  	b.StartTimer()
   808  	for i := 0; i < b.N; i++ {
   809  		re.Match(x)
   810  	}
   811  }
   812  
   813  func BenchmarkOnePassLongPrefix(b *testing.B) {
   814  	b.StopTimer()
   815  	x := []byte("abcdefghijklmnopqrstuvwxyz")
   816  	re := MustCompile("^abcdefghijklmnopqrstuvwxyz.*$")
   817  	b.StartTimer()
   818  	for i := 0; i < b.N; i++ {
   819  		re.Match(x)
   820  	}
   821  }
   822  
   823  func BenchmarkOnePassLongNotPrefix(b *testing.B) {
   824  	b.StopTimer()
   825  	x := []byte("abcdefghijklmnopqrstuvwxyz")
   826  	re := MustCompile("^.bcdefghijklmnopqrstuvwxyz.*$")
   827  	b.StartTimer()
   828  	for i := 0; i < b.N; i++ {
   829  		re.Match(x)
   830  	}
   831  }
   832  
   833  func BenchmarkMatchParallelShared(b *testing.B) {
   834  	x := []byte("this is a long line that contains foo bar baz")
   835  	re := MustCompile("foo (ba+r)? baz")
   836  	b.ResetTimer()
   837  	b.RunParallel(func(pb *testing.PB) {
   838  		for pb.Next() {
   839  			re.Match(x)
   840  		}
   841  	})
   842  }
   843  
   844  func BenchmarkMatchParallelCopied(b *testing.B) {
   845  	x := []byte("this is a long line that contains foo bar baz")
   846  	re := MustCompile("foo (ba+r)? baz")
   847  	b.ResetTimer()
   848  	b.RunParallel(func(pb *testing.PB) {
   849  		re := re.Copy()
   850  		for pb.Next() {
   851  			re.Match(x)
   852  		}
   853  	})
   854  }
   855  
   856  var sink string
   857  
   858  func BenchmarkQuoteMetaAll(b *testing.B) {
   859  	specials := make([]byte, 0)
   860  	for i := byte(0); i < utf8.RuneSelf; i++ {
   861  		if special(i) {
   862  			specials = append(specials, i)
   863  		}
   864  	}
   865  	s := string(specials)
   866  	b.SetBytes(int64(len(s)))
   867  	b.ResetTimer()
   868  	for i := 0; i < b.N; i++ {
   869  		sink = QuoteMeta(s)
   870  	}
   871  }
   872  
   873  func BenchmarkQuoteMetaNone(b *testing.B) {
   874  	s := "abcdefghijklmnopqrstuvwxyz"
   875  	b.SetBytes(int64(len(s)))
   876  	b.ResetTimer()
   877  	for i := 0; i < b.N; i++ {
   878  		sink = QuoteMeta(s)
   879  	}
   880  }
   881  
   882  var compileBenchData = []struct{ name, re string }{
   883  	{"Onepass", `^a.[l-nA-Cg-j]?e$`},
   884  	{"Medium", `^((a|b|[d-z0-9])*(日){4,5}.)+$`},
   885  	{"Hard", strings.Repeat(`((abc)*|`, 50) + strings.Repeat(`)`, 50)},
   886  }
   887  
   888  func BenchmarkCompile(b *testing.B) {
   889  	for _, data := range compileBenchData {
   890  		b.Run(data.name, func(b *testing.B) {
   891  			b.ReportAllocs()
   892  			for i := 0; i < b.N; i++ {
   893  				if _, err := Compile(data.re); err != nil {
   894  					b.Fatal(err)
   895  				}
   896  			}
   897  		})
   898  	}
   899  }
   900  
   901  func TestDeepEqual(t *testing.T) {
   902  	re1 := MustCompile("a.*b.*c.*d")
   903  	re2 := MustCompile("a.*b.*c.*d")
   904  	if !reflect.DeepEqual(re1, re2) { // has always been true, since Go 1.
   905  		t.Errorf("DeepEqual(re1, re2) = false, want true")
   906  	}
   907  
   908  	re1.MatchString("abcdefghijklmn")
   909  	if !reflect.DeepEqual(re1, re2) {
   910  		t.Errorf("DeepEqual(re1, re2) = false, want true")
   911  	}
   912  
   913  	re2.MatchString("abcdefghijklmn")
   914  	if !reflect.DeepEqual(re1, re2) {
   915  		t.Errorf("DeepEqual(re1, re2) = false, want true")
   916  	}
   917  
   918  	re2.MatchString(strings.Repeat("abcdefghijklmn", 100))
   919  	if !reflect.DeepEqual(re1, re2) {
   920  		t.Errorf("DeepEqual(re1, re2) = false, want true")
   921  	}
   922  }
   923  
   924  var minInputLenTests = []struct {
   925  	Regexp string
   926  	min    int
   927  }{
   928  	{``, 0},
   929  	{`a`, 1},
   930  	{`aa`, 2},
   931  	{`(aa)a`, 3},
   932  	{`(?:aa)a`, 3},
   933  	{`a?a`, 1},
   934  	{`(aaa)|(aa)`, 2},
   935  	{`(aa)+a`, 3},
   936  	{`(aa)*a`, 1},
   937  	{`(aa){3,5}`, 6},
   938  	{`[a-z]`, 1},
   939  	{`日`, 3},
   940  }
   941  
   942  func TestMinInputLen(t *testing.T) {
   943  	for _, tt := range minInputLenTests {
   944  		re, _ := syntax.Parse(tt.Regexp, syntax.Perl)
   945  		m := minInputLen(re)
   946  		if m != tt.min {
   947  			t.Errorf("regexp %#q has minInputLen %d, should be %d", tt.Regexp, m, tt.min)
   948  		}
   949  	}
   950  }
   951  
   952  func TestUnmarshalText(t *testing.T) {
   953  	unmarshaled := new(Regexp)
   954  	for i := range goodRe {
   955  		re := compileTest(t, goodRe[i], "")
   956  		marshaled, err := re.MarshalText()
   957  		if err != nil {
   958  			t.Errorf("regexp %#q failed to marshal: %s", re, err)
   959  			continue
   960  		}
   961  		if err := unmarshaled.UnmarshalText(marshaled); err != nil {
   962  			t.Errorf("regexp %#q failed to unmarshal: %s", re, err)
   963  			continue
   964  		}
   965  		if unmarshaled.String() != goodRe[i] {
   966  			t.Errorf("UnmarshalText returned unexpected value: %s", unmarshaled.String())
   967  		}
   968  	}
   969  	t.Run("invalid pattern", func(t *testing.T) {
   970  		re := new(Regexp)
   971  		err := re.UnmarshalText([]byte(`\`))
   972  		if err == nil {
   973  			t.Error("unexpected success")
   974  		}
   975  	})
   976  }
   977  

View as plain text