Source file src/regexp/find_test.go

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package regexp
     6  
     7  import (
     8  	"fmt"
     9  	"strings"
    10  	"testing"
    11  )
    12  
    13  // For each pattern/text pair, what is the expected output of each function?
    14  // We can derive the textual results from the indexed results, the non-submatch
    15  // results from the submatched results, the single results from the 'all' results,
    16  // and the byte results from the string results. Therefore the table includes
    17  // only the FindAllStringSubmatchIndex result.
    18  type FindTest struct {
    19  	pat     string
    20  	text    string
    21  	matches [][]int
    22  }
    23  
    24  func (t FindTest) String() string {
    25  	return fmt.Sprintf("pat: %#q text: %#q", t.pat, t.text)
    26  }
    27  
    28  var findTests = []FindTest{
    29  	{``, ``, build(1, 0, 0)},
    30  	{`^abcdefg`, "abcdefg", build(1, 0, 7)},
    31  	{`a+`, "baaab", build(1, 1, 4)},
    32  	{"abcd..", "abcdef", build(1, 0, 6)},
    33  	{`a`, "a", build(1, 0, 1)},
    34  	{`x`, "y", nil},
    35  	{`b`, "abc", build(1, 1, 2)},
    36  	{`.`, "a", build(1, 0, 1)},
    37  	{`.*`, "abcdef", build(1, 0, 6)},
    38  	{`^`, "abcde", build(1, 0, 0)},
    39  	{`$`, "abcde", build(1, 5, 5)},
    40  	{`^abcd$`, "abcd", build(1, 0, 4)},
    41  	{`^bcd'`, "abcdef", nil},
    42  	{`^abcd$`, "abcde", nil},
    43  	{`a+`, "baaab", build(1, 1, 4)},
    44  	{`a*`, "baaab", build(3, 0, 0, 1, 4, 5, 5)},
    45  	{`[a-z]+`, "abcd", build(1, 0, 4)},
    46  	{`[^a-z]+`, "ab1234cd", build(1, 2, 6)},
    47  	{`[a\-\]z]+`, "az]-bcz", build(2, 0, 4, 6, 7)},
    48  	{`[^\n]+`, "abcd\n", build(1, 0, 4)},
    49  	{`[日本語]+`, "日本語日本語", build(1, 0, 18)},
    50  	{`日本語+`, "日本語", build(1, 0, 9)},
    51  	{`日本語+`, "日本語語語語", build(1, 0, 18)},
    52  	{`()`, "", build(1, 0, 0, 0, 0)},
    53  	{`(a)`, "a", build(1, 0, 1, 0, 1)},
    54  	{`(.)(.)`, "日a", build(1, 0, 4, 0, 3, 3, 4)},
    55  	{`(.*)`, "", build(1, 0, 0, 0, 0)},
    56  	{`(.*)`, "abcd", build(1, 0, 4, 0, 4)},
    57  	{`(..)(..)`, "abcd", build(1, 0, 4, 0, 2, 2, 4)},
    58  	{`(([^xyz]*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 3, 4)},
    59  	{`((a|b|c)*(d))`, "abcd", build(1, 0, 4, 0, 4, 2, 3, 3, 4)},
    60  	{`(((a|b|c)*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4)},
    61  	{`\a\f\n\r\t\v`, "\a\f\n\r\t\v", build(1, 0, 6)},
    62  	{`[\a\f\n\r\t\v]+`, "\a\f\n\r\t\v", build(1, 0, 6)},
    63  
    64  	{`a*(|(b))c*`, "aacc", build(1, 0, 4, 2, 2, -1, -1)},
    65  	{`(.*).*`, "ab", build(1, 0, 2, 0, 2)},
    66  	{`[.]`, ".", build(1, 0, 1)},
    67  	{`/$`, "/abc/", build(1, 4, 5)},
    68  	{`/$`, "/abc", nil},
    69  
    70  	// multiple matches
    71  	{`.`, "abc", build(3, 0, 1, 1, 2, 2, 3)},
    72  	{`(.)`, "abc", build(3, 0, 1, 0, 1, 1, 2, 1, 2, 2, 3, 2, 3)},
    73  	{`.(.)`, "abcd", build(2, 0, 2, 1, 2, 2, 4, 3, 4)},
    74  	{`ab*`, "abbaab", build(3, 0, 3, 3, 4, 4, 6)},
    75  	{`a(b*)`, "abbaab", build(3, 0, 3, 1, 3, 3, 4, 4, 4, 4, 6, 5, 6)},
    76  
    77  	// fixed bugs
    78  	{`ab$`, "cab", build(1, 1, 3)},
    79  	{`axxb$`, "axxcb", nil},
    80  	{`data`, "daXY data", build(1, 5, 9)},
    81  	{`da(.)a$`, "daXY data", build(1, 5, 9, 7, 8)},
    82  	{`zx+`, "zzx", build(1, 1, 3)},
    83  	{`ab$`, "abcab", build(1, 3, 5)},
    84  	{`(aa)*$`, "a", build(1, 1, 1, -1, -1)},
    85  	{`(?:.|(?:.a))`, "", nil},
    86  	{`(?:A(?:A|a))`, "Aa", build(1, 0, 2)},
    87  	{`(?:A|(?:A|a))`, "a", build(1, 0, 1)},
    88  	{`(a){0}`, "", build(1, 0, 0, -1, -1)},
    89  	{`(?-s)(?:(?:^).)`, "\n", nil},
    90  	{`(?s)(?:(?:^).)`, "\n", build(1, 0, 1)},
    91  	{`(?:(?:^).)`, "\n", nil},
    92  	{`\b`, "x", build(2, 0, 0, 1, 1)},
    93  	{`\b`, "xx", build(2, 0, 0, 2, 2)},
    94  	{`\b`, "x y", build(4, 0, 0, 1, 1, 2, 2, 3, 3)},
    95  	{`\b`, "xx yy", build(4, 0, 0, 2, 2, 3, 3, 5, 5)},
    96  	{`\B`, "x", nil},
    97  	{`\B`, "xx", build(1, 1, 1)},
    98  	{`\B`, "x y", nil},
    99  	{`\B`, "xx yy", build(2, 1, 1, 4, 4)},
   100  	{`(|a)*`, "aa", build(3, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2)},
   101  
   102  	// RE2 tests
   103  	{`[^\S\s]`, "abcd", nil},
   104  	{`[^\S[:space:]]`, "abcd", nil},
   105  	{`[^\D\d]`, "abcd", nil},
   106  	{`[^\D[:digit:]]`, "abcd", nil},
   107  	{`(?i)\W`, "x", nil},
   108  	{`(?i)\W`, "k", nil},
   109  	{`(?i)\W`, "s", nil},
   110  
   111  	// can backslash-escape any punctuation
   112  	{`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`,
   113  		`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
   114  	{`[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~]+`,
   115  		`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
   116  	{"\\`", "`", build(1, 0, 1)},
   117  	{"[\\`]+", "`", build(1, 0, 1)},
   118  
   119  	{"\ufffd", "\xff", build(1, 0, 1)},
   120  	{"\ufffd", "hello\xffworld", build(1, 5, 6)},
   121  	{`.*`, "hello\xffworld", build(1, 0, 11)},
   122  	{`\x{fffd}`, "\xc2\x00", build(1, 0, 1)},
   123  	{"[\ufffd]", "\xff", build(1, 0, 1)},
   124  	{`[\x{fffd}]`, "\xc2\x00", build(1, 0, 1)},
   125  
   126  	// long set of matches (longer than startSize)
   127  	{
   128  		".",
   129  		"qwertyuiopasdfghjklzxcvbnm1234567890",
   130  		build(36, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
   131  			10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20,
   132  			20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30,
   133  			30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36),
   134  	},
   135  }
   136  
   137  // build is a helper to construct a [][]int by extracting n sequences from x.
   138  // This represents n matches with len(x)/n submatches each.
   139  func build(n int, x ...int) [][]int {
   140  	ret := make([][]int, n)
   141  	runLength := len(x) / n
   142  	j := 0
   143  	for i := range ret {
   144  		ret[i] = make([]int, runLength)
   145  		copy(ret[i], x[j:])
   146  		j += runLength
   147  		if j > len(x) {
   148  			panic("invalid build entry")
   149  		}
   150  	}
   151  	return ret
   152  }
   153  
   154  // First the simple cases.
   155  
   156  func TestFind(t *testing.T) {
   157  	for _, test := range findTests {
   158  		re := MustCompile(test.pat)
   159  		if re.String() != test.pat {
   160  			t.Errorf("String() = `%s`; should be `%s`", re.String(), test.pat)
   161  		}
   162  		result := re.Find([]byte(test.text))
   163  		switch {
   164  		case len(test.matches) == 0 && len(result) == 0:
   165  			// ok
   166  		case test.matches == nil && result != nil:
   167  			t.Errorf("expected no match; got one: %s", test)
   168  		case test.matches != nil && result == nil:
   169  			t.Errorf("expected match; got none: %s", test)
   170  		case test.matches != nil && result != nil:
   171  			expect := test.text[test.matches[0][0]:test.matches[0][1]]
   172  			if len(result) != cap(result) {
   173  				t.Errorf("expected capacity %d got %d: %s", len(result), cap(result), test)
   174  			}
   175  			if expect != string(result) {
   176  				t.Errorf("expected %q got %q: %s", expect, result, test)
   177  			}
   178  		}
   179  	}
   180  }
   181  
   182  func TestFindString(t *testing.T) {
   183  	for _, test := range findTests {
   184  		result := MustCompile(test.pat).FindString(test.text)
   185  		switch {
   186  		case len(test.matches) == 0 && len(result) == 0:
   187  			// ok
   188  		case test.matches == nil && result != "":
   189  			t.Errorf("expected no match; got one: %s", test)
   190  		case test.matches != nil && result == "":
   191  			// Tricky because an empty result has two meanings: no match or empty match.
   192  			if test.matches[0][0] != test.matches[0][1] {
   193  				t.Errorf("expected match; got none: %s", test)
   194  			}
   195  		case test.matches != nil && result != "":
   196  			expect := test.text[test.matches[0][0]:test.matches[0][1]]
   197  			if expect != result {
   198  				t.Errorf("expected %q got %q: %s", expect, result, test)
   199  			}
   200  		}
   201  	}
   202  }
   203  
   204  func testFindIndex(test *FindTest, result []int, t *testing.T) {
   205  	switch {
   206  	case len(test.matches) == 0 && len(result) == 0:
   207  		// ok
   208  	case test.matches == nil && result != nil:
   209  		t.Errorf("expected no match; got one: %s", test)
   210  	case test.matches != nil && result == nil:
   211  		t.Errorf("expected match; got none: %s", test)
   212  	case test.matches != nil && result != nil:
   213  		expect := test.matches[0]
   214  		if expect[0] != result[0] || expect[1] != result[1] {
   215  			t.Errorf("expected %v got %v: %s", expect, result, test)
   216  		}
   217  	}
   218  }
   219  
   220  func TestFindIndex(t *testing.T) {
   221  	for _, test := range findTests {
   222  		testFindIndex(&test, MustCompile(test.pat).FindIndex([]byte(test.text)), t)
   223  	}
   224  }
   225  
   226  func TestFindStringIndex(t *testing.T) {
   227  	for _, test := range findTests {
   228  		testFindIndex(&test, MustCompile(test.pat).FindStringIndex(test.text), t)
   229  	}
   230  }
   231  
   232  func TestFindReaderIndex(t *testing.T) {
   233  	for _, test := range findTests {
   234  		testFindIndex(&test, MustCompile(test.pat).FindReaderIndex(strings.NewReader(test.text)), t)
   235  	}
   236  }
   237  
   238  // Now come the simple All cases.
   239  
   240  func TestFindAll(t *testing.T) {
   241  	for _, test := range findTests {
   242  		result := MustCompile(test.pat).FindAll([]byte(test.text), -1)
   243  		switch {
   244  		case test.matches == nil && result == nil:
   245  			// ok
   246  		case test.matches == nil && result != nil:
   247  			t.Errorf("expected no match; got one: %s", test)
   248  		case test.matches != nil && result == nil:
   249  			t.Fatalf("expected match; got none: %s", test)
   250  		case test.matches != nil && result != nil:
   251  			if len(test.matches) != len(result) {
   252  				t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   253  				continue
   254  			}
   255  			for k, e := range test.matches {
   256  				got := result[k]
   257  				if len(got) != cap(got) {
   258  					t.Errorf("match %d: expected capacity %d got %d: %s", k, len(got), cap(got), test)
   259  				}
   260  				expect := test.text[e[0]:e[1]]
   261  				if expect != string(got) {
   262  					t.Errorf("match %d: expected %q got %q: %s", k, expect, got, test)
   263  				}
   264  			}
   265  		}
   266  	}
   267  }
   268  
   269  func TestFindAllString(t *testing.T) {
   270  	for _, test := range findTests {
   271  		result := MustCompile(test.pat).FindAllString(test.text, -1)
   272  		switch {
   273  		case test.matches == nil && result == nil:
   274  			// ok
   275  		case test.matches == nil && result != nil:
   276  			t.Errorf("expected no match; got one: %s", test)
   277  		case test.matches != nil && result == nil:
   278  			t.Errorf("expected match; got none: %s", test)
   279  		case test.matches != nil && result != nil:
   280  			if len(test.matches) != len(result) {
   281  				t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   282  				continue
   283  			}
   284  			for k, e := range test.matches {
   285  				expect := test.text[e[0]:e[1]]
   286  				if expect != result[k] {
   287  					t.Errorf("expected %q got %q: %s", expect, result, test)
   288  				}
   289  			}
   290  		}
   291  	}
   292  }
   293  
   294  func testFindAllIndex(test *FindTest, result [][]int, t *testing.T) {
   295  	switch {
   296  	case test.matches == nil && result == nil:
   297  		// ok
   298  	case test.matches == nil && result != nil:
   299  		t.Errorf("expected no match; got one: %s", test)
   300  	case test.matches != nil && result == nil:
   301  		t.Errorf("expected match; got none: %s", test)
   302  	case test.matches != nil && result != nil:
   303  		if len(test.matches) != len(result) {
   304  			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   305  			return
   306  		}
   307  		for k, e := range test.matches {
   308  			if e[0] != result[k][0] || e[1] != result[k][1] {
   309  				t.Errorf("match %d: expected %v got %v: %s", k, e, result[k], test)
   310  			}
   311  		}
   312  	}
   313  }
   314  
   315  func TestFindAllIndex(t *testing.T) {
   316  	for _, test := range findTests {
   317  		testFindAllIndex(&test, MustCompile(test.pat).FindAllIndex([]byte(test.text), -1), t)
   318  	}
   319  }
   320  
   321  func TestFindAllStringIndex(t *testing.T) {
   322  	for _, test := range findTests {
   323  		testFindAllIndex(&test, MustCompile(test.pat).FindAllStringIndex(test.text, -1), t)
   324  	}
   325  }
   326  
   327  // Now come the Submatch cases.
   328  
   329  func testSubmatchBytes(test *FindTest, n int, submatches []int, result [][]byte, t *testing.T) {
   330  	if len(submatches) != len(result)*2 {
   331  		t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
   332  		return
   333  	}
   334  	for k := 0; k < len(submatches); k += 2 {
   335  		if submatches[k] == -1 {
   336  			if result[k/2] != nil {
   337  				t.Errorf("match %d: expected nil got %q: %s", n, result, test)
   338  			}
   339  			continue
   340  		}
   341  		got := result[k/2]
   342  		if len(got) != cap(got) {
   343  			t.Errorf("match %d: expected capacity %d got %d: %s", n, len(got), cap(got), test)
   344  			return
   345  		}
   346  		expect := test.text[submatches[k]:submatches[k+1]]
   347  		if expect != string(got) {
   348  			t.Errorf("match %d: expected %q got %q: %s", n, expect, got, test)
   349  			return
   350  		}
   351  	}
   352  }
   353  
   354  func TestFindSubmatch(t *testing.T) {
   355  	for _, test := range findTests {
   356  		result := MustCompile(test.pat).FindSubmatch([]byte(test.text))
   357  		switch {
   358  		case test.matches == nil && result == nil:
   359  			// ok
   360  		case test.matches == nil && result != nil:
   361  			t.Errorf("expected no match; got one: %s", test)
   362  		case test.matches != nil && result == nil:
   363  			t.Errorf("expected match; got none: %s", test)
   364  		case test.matches != nil && result != nil:
   365  			testSubmatchBytes(&test, 0, test.matches[0], result, t)
   366  		}
   367  	}
   368  }
   369  
   370  func testSubmatchString(test *FindTest, n int, submatches []int, result []string, t *testing.T) {
   371  	if len(submatches) != len(result)*2 {
   372  		t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
   373  		return
   374  	}
   375  	for k := 0; k < len(submatches); k += 2 {
   376  		if submatches[k] == -1 {
   377  			if result[k/2] != "" {
   378  				t.Errorf("match %d: expected nil got %q: %s", n, result, test)
   379  			}
   380  			continue
   381  		}
   382  		expect := test.text[submatches[k]:submatches[k+1]]
   383  		if expect != result[k/2] {
   384  			t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test)
   385  			return
   386  		}
   387  	}
   388  }
   389  
   390  func TestFindStringSubmatch(t *testing.T) {
   391  	for _, test := range findTests {
   392  		result := MustCompile(test.pat).FindStringSubmatch(test.text)
   393  		switch {
   394  		case test.matches == nil && result == nil:
   395  			// ok
   396  		case test.matches == nil && result != nil:
   397  			t.Errorf("expected no match; got one: %s", test)
   398  		case test.matches != nil && result == nil:
   399  			t.Errorf("expected match; got none: %s", test)
   400  		case test.matches != nil && result != nil:
   401  			testSubmatchString(&test, 0, test.matches[0], result, t)
   402  		}
   403  	}
   404  }
   405  
   406  func testSubmatchIndices(test *FindTest, n int, expect, result []int, t *testing.T) {
   407  	if len(expect) != len(result) {
   408  		t.Errorf("match %d: expected %d matches; got %d: %s", n, len(expect)/2, len(result)/2, test)
   409  		return
   410  	}
   411  	for k, e := range expect {
   412  		if e != result[k] {
   413  			t.Errorf("match %d: submatch error: expected %v got %v: %s", n, expect, result, test)
   414  		}
   415  	}
   416  }
   417  
   418  func testFindSubmatchIndex(test *FindTest, result []int, t *testing.T) {
   419  	switch {
   420  	case test.matches == nil && result == nil:
   421  		// ok
   422  	case test.matches == nil && result != nil:
   423  		t.Errorf("expected no match; got one: %s", test)
   424  	case test.matches != nil && result == nil:
   425  		t.Errorf("expected match; got none: %s", test)
   426  	case test.matches != nil && result != nil:
   427  		testSubmatchIndices(test, 0, test.matches[0], result, t)
   428  	}
   429  }
   430  
   431  func TestFindSubmatchIndex(t *testing.T) {
   432  	for _, test := range findTests {
   433  		testFindSubmatchIndex(&test, MustCompile(test.pat).FindSubmatchIndex([]byte(test.text)), t)
   434  	}
   435  }
   436  
   437  func TestFindStringSubmatchIndex(t *testing.T) {
   438  	for _, test := range findTests {
   439  		testFindSubmatchIndex(&test, MustCompile(test.pat).FindStringSubmatchIndex(test.text), t)
   440  	}
   441  }
   442  
   443  func TestFindReaderSubmatchIndex(t *testing.T) {
   444  	for _, test := range findTests {
   445  		testFindSubmatchIndex(&test, MustCompile(test.pat).FindReaderSubmatchIndex(strings.NewReader(test.text)), t)
   446  	}
   447  }
   448  
   449  // Now come the monster AllSubmatch cases.
   450  
   451  func TestFindAllSubmatch(t *testing.T) {
   452  	for _, test := range findTests {
   453  		result := MustCompile(test.pat).FindAllSubmatch([]byte(test.text), -1)
   454  		switch {
   455  		case test.matches == nil && result == nil:
   456  			// ok
   457  		case test.matches == nil && result != nil:
   458  			t.Errorf("expected no match; got one: %s", test)
   459  		case test.matches != nil && result == nil:
   460  			t.Errorf("expected match; got none: %s", test)
   461  		case len(test.matches) != len(result):
   462  			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   463  		case test.matches != nil && result != nil:
   464  			for k, match := range test.matches {
   465  				testSubmatchBytes(&test, k, match, result[k], t)
   466  			}
   467  		}
   468  	}
   469  }
   470  
   471  func TestFindAllStringSubmatch(t *testing.T) {
   472  	for _, test := range findTests {
   473  		result := MustCompile(test.pat).FindAllStringSubmatch(test.text, -1)
   474  		switch {
   475  		case test.matches == nil && result == nil:
   476  			// ok
   477  		case test.matches == nil && result != nil:
   478  			t.Errorf("expected no match; got one: %s", test)
   479  		case test.matches != nil && result == nil:
   480  			t.Errorf("expected match; got none: %s", test)
   481  		case len(test.matches) != len(result):
   482  			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   483  		case test.matches != nil && result != nil:
   484  			for k, match := range test.matches {
   485  				testSubmatchString(&test, k, match, result[k], t)
   486  			}
   487  		}
   488  	}
   489  }
   490  
   491  func testFindAllSubmatchIndex(test *FindTest, result [][]int, t *testing.T) {
   492  	switch {
   493  	case test.matches == nil && result == nil:
   494  		// ok
   495  	case test.matches == nil && result != nil:
   496  		t.Errorf("expected no match; got one: %s", test)
   497  	case test.matches != nil && result == nil:
   498  		t.Errorf("expected match; got none: %s", test)
   499  	case len(test.matches) != len(result):
   500  		t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   501  	case test.matches != nil && result != nil:
   502  		for k, match := range test.matches {
   503  			testSubmatchIndices(test, k, match, result[k], t)
   504  		}
   505  	}
   506  }
   507  
   508  func TestFindAllSubmatchIndex(t *testing.T) {
   509  	for _, test := range findTests {
   510  		testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllSubmatchIndex([]byte(test.text), -1), t)
   511  	}
   512  }
   513  
   514  func TestFindAllStringSubmatchIndex(t *testing.T) {
   515  	for _, test := range findTests {
   516  		testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllStringSubmatchIndex(test.text, -1), t)
   517  	}
   518  }
   519  

View as plain text