Source file 
src/regexp/exec_test.go
     1  
     2  
     3  
     4  
     5  package regexp
     6  
     7  import (
     8  	"bufio"
     9  	"compress/bzip2"
    10  	"fmt"
    11  	"internal/testenv"
    12  	"io"
    13  	"os"
    14  	"path/filepath"
    15  	"regexp/syntax"
    16  	"slices"
    17  	"strconv"
    18  	"strings"
    19  	"testing"
    20  	"unicode/utf8"
    21  )
    22  
    23  
    24  
    25  
    26  
    27  
    28  
    29  
    30  
    31  
    32  
    33  
    34  
    35  
    36  
    37  
    38  
    39  
    40  
    41  
    42  
    43  
    44  
    45  
    46  
    47  
    48  
    49  
    50  
    51  
    52  
    53  
    54  
    55  
    56  
    57  
    58  
    59  
    60  
    61  
    62  
    63  
    64  
    65  
    66  func TestRE2Search(t *testing.T) {
    67  	testRE2(t, "testdata/re2-search.txt")
    68  }
    69  
    70  func testRE2(t *testing.T, file string) {
    71  	f, err := os.Open(file)
    72  	if err != nil {
    73  		t.Fatal(err)
    74  	}
    75  	defer f.Close()
    76  	var txt io.Reader
    77  	if strings.HasSuffix(file, ".bz2") {
    78  		z := bzip2.NewReader(f)
    79  		txt = z
    80  		file = file[:len(file)-len(".bz2")] 
    81  	} else {
    82  		txt = f
    83  	}
    84  	lineno := 0
    85  	scanner := bufio.NewScanner(txt)
    86  	var (
    87  		str       []string
    88  		input     []string
    89  		inStrings bool
    90  		re        *Regexp
    91  		refull    *Regexp
    92  		nfail     int
    93  		ncase     int
    94  	)
    95  	for lineno := 1; scanner.Scan(); lineno++ {
    96  		line := scanner.Text()
    97  		switch {
    98  		case line == "":
    99  			t.Fatalf("%s:%d: unexpected blank line", file, lineno)
   100  		case line[0] == '#':
   101  			continue
   102  		case 'A' <= line[0] && line[0] <= 'Z':
   103  			
   104  			t.Logf("%s\n", line)
   105  			continue
   106  		case line == "strings":
   107  			str = str[:0]
   108  			inStrings = true
   109  		case line == "regexps":
   110  			inStrings = false
   111  		case line[0] == '"':
   112  			q, err := strconv.Unquote(line)
   113  			if err != nil {
   114  				
   115  				t.Fatalf("%s:%d: unquote %s: %v", file, lineno, line, err)
   116  			}
   117  			if inStrings {
   118  				str = append(str, q)
   119  				continue
   120  			}
   121  			
   122  			if len(input) != 0 {
   123  				t.Fatalf("%s:%d: out of sync: have %d strings left before %#q", file, lineno, len(input), q)
   124  			}
   125  			re, err = tryCompile(q)
   126  			if err != nil {
   127  				if err.Error() == "error parsing regexp: invalid escape sequence: `\\C`" {
   128  					
   129  					continue
   130  				}
   131  				t.Errorf("%s:%d: compile %#q: %v", file, lineno, q, err)
   132  				if nfail++; nfail >= 100 {
   133  					t.Fatalf("stopping after %d errors", nfail)
   134  				}
   135  				continue
   136  			}
   137  			full := `\A(?:` + q + `)\z`
   138  			refull, err = tryCompile(full)
   139  			if err != nil {
   140  				
   141  				t.Fatalf("%s:%d: compile full %#q: %v", file, lineno, full, err)
   142  			}
   143  			input = str
   144  		case line[0] == '-' || '0' <= line[0] && line[0] <= '9':
   145  			
   146  			ncase++
   147  			if re == nil {
   148  				
   149  				continue
   150  			}
   151  			if len(input) == 0 {
   152  				t.Fatalf("%s:%d: out of sync: no input remaining", file, lineno)
   153  			}
   154  			var text string
   155  			text, input = input[0], input[1:]
   156  			if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) {
   157  				
   158  				
   159  				
   160  				
   161  				
   162  				continue
   163  			}
   164  			res := strings.Split(line, ";")
   165  			if len(res) != len(run) {
   166  				t.Fatalf("%s:%d: have %d test results, want %d", file, lineno, len(res), len(run))
   167  			}
   168  			for i := range res {
   169  				have, suffix := run[i](re, refull, text)
   170  				want := parseResult(t, file, lineno, res[i])
   171  				if !slices.Equal(have, want) {
   172  					t.Errorf("%s:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, re, suffix, text, have, want)
   173  					if nfail++; nfail >= 100 {
   174  						t.Fatalf("stopping after %d errors", nfail)
   175  					}
   176  					continue
   177  				}
   178  				b, suffix := match[i](re, refull, text)
   179  				if b != (want != nil) {
   180  					t.Errorf("%s:%d: %#q%s.MatchString(%#q) = %v, want %v", file, lineno, re, suffix, text, b, !b)
   181  					if nfail++; nfail >= 100 {
   182  						t.Fatalf("stopping after %d errors", nfail)
   183  					}
   184  					continue
   185  				}
   186  			}
   187  
   188  		default:
   189  			t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line)
   190  		}
   191  	}
   192  	if err := scanner.Err(); err != nil {
   193  		t.Fatalf("%s:%d: %v", file, lineno, err)
   194  	}
   195  	if len(input) != 0 {
   196  		t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input))
   197  	}
   198  	t.Logf("%d cases tested", ncase)
   199  }
   200  
   201  var run = []func(*Regexp, *Regexp, string) ([]int, string){
   202  	runFull,
   203  	runPartial,
   204  	runFullLongest,
   205  	runPartialLongest,
   206  }
   207  
   208  func runFull(re, refull *Regexp, text string) ([]int, string) {
   209  	refull.longest = false
   210  	return refull.FindStringSubmatchIndex(text), "[full]"
   211  }
   212  
   213  func runPartial(re, refull *Regexp, text string) ([]int, string) {
   214  	re.longest = false
   215  	return re.FindStringSubmatchIndex(text), ""
   216  }
   217  
   218  func runFullLongest(re, refull *Regexp, text string) ([]int, string) {
   219  	refull.longest = true
   220  	return refull.FindStringSubmatchIndex(text), "[full,longest]"
   221  }
   222  
   223  func runPartialLongest(re, refull *Regexp, text string) ([]int, string) {
   224  	re.longest = true
   225  	return re.FindStringSubmatchIndex(text), "[longest]"
   226  }
   227  
   228  var match = []func(*Regexp, *Regexp, string) (bool, string){
   229  	matchFull,
   230  	matchPartial,
   231  	matchFullLongest,
   232  	matchPartialLongest,
   233  }
   234  
   235  func matchFull(re, refull *Regexp, text string) (bool, string) {
   236  	refull.longest = false
   237  	return refull.MatchString(text), "[full]"
   238  }
   239  
   240  func matchPartial(re, refull *Regexp, text string) (bool, string) {
   241  	re.longest = false
   242  	return re.MatchString(text), ""
   243  }
   244  
   245  func matchFullLongest(re, refull *Regexp, text string) (bool, string) {
   246  	refull.longest = true
   247  	return refull.MatchString(text), "[full,longest]"
   248  }
   249  
   250  func matchPartialLongest(re, refull *Regexp, text string) (bool, string) {
   251  	re.longest = true
   252  	return re.MatchString(text), "[longest]"
   253  }
   254  
   255  func isSingleBytes(s string) bool {
   256  	for _, c := range s {
   257  		if c >= utf8.RuneSelf {
   258  			return false
   259  		}
   260  	}
   261  	return true
   262  }
   263  
   264  func tryCompile(s string) (re *Regexp, err error) {
   265  	
   266  	defer func() {
   267  		if r := recover(); r != nil {
   268  			err = fmt.Errorf("panic: %v", r)
   269  		}
   270  	}()
   271  	return Compile(s)
   272  }
   273  
   274  func parseResult(t *testing.T, file string, lineno int, res string) []int {
   275  	
   276  	if res == "-" {
   277  		return nil
   278  	}
   279  	
   280  	n := 1
   281  	for j := 0; j < len(res); j++ {
   282  		if res[j] == ' ' {
   283  			n++
   284  		}
   285  	}
   286  	out := make([]int, 2*n)
   287  	i := 0
   288  	n = 0
   289  	for j := 0; j <= len(res); j++ {
   290  		if j == len(res) || res[j] == ' ' {
   291  			
   292  			pair := res[i:j]
   293  			if pair == "-" {
   294  				out[n] = -1
   295  				out[n+1] = -1
   296  			} else {
   297  				loStr, hiStr, _ := strings.Cut(pair, "-")
   298  				lo, err1 := strconv.Atoi(loStr)
   299  				hi, err2 := strconv.Atoi(hiStr)
   300  				if err1 != nil || err2 != nil || lo > hi {
   301  					t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
   302  				}
   303  				out[n] = lo
   304  				out[n+1] = hi
   305  			}
   306  			n += 2
   307  			i = j + 1
   308  		}
   309  	}
   310  	return out
   311  }
   312  
   313  
   314  
   315  
   316  func TestFowler(t *testing.T) {
   317  	files, err := filepath.Glob("testdata/*.dat")
   318  	if err != nil {
   319  		t.Fatal(err)
   320  	}
   321  	for _, file := range files {
   322  		t.Log(file)
   323  		testFowler(t, file)
   324  	}
   325  }
   326  
   327  var notab = MustCompilePOSIX(`[^\t]+`)
   328  
   329  func testFowler(t *testing.T, file string) {
   330  	f, err := os.Open(file)
   331  	if err != nil {
   332  		t.Error(err)
   333  		return
   334  	}
   335  	defer f.Close()
   336  	b := bufio.NewReader(f)
   337  	lineno := 0
   338  	lastRegexp := ""
   339  Reading:
   340  	for {
   341  		lineno++
   342  		line, err := b.ReadString('\n')
   343  		if err != nil {
   344  			if err != io.EOF {
   345  				t.Errorf("%s:%d: %v", file, lineno, err)
   346  			}
   347  			break Reading
   348  		}
   349  
   350  		
   351  		
   352  		
   353  		
   354  		
   355  		
   356  		
   357  		if line[0] == '#' || line[0] == '\n' {
   358  			continue Reading
   359  		}
   360  		line = line[:len(line)-1]
   361  		field := notab.FindAllString(line, -1)
   362  		for i, f := range field {
   363  			if f == "NULL" {
   364  				field[i] = ""
   365  			}
   366  			if f == "NIL" {
   367  				t.Logf("%s:%d: skip: %s", file, lineno, line)
   368  				continue Reading
   369  			}
   370  		}
   371  		if len(field) == 0 {
   372  			continue Reading
   373  		}
   374  
   375  		
   376  		
   377  		
   378  		
   379  		
   380  		
   381  		
   382  		
   383  		
   384  		
   385  		
   386  		
   387  		
   388  		
   389  		
   390  		
   391  		
   392  		
   393  		
   394  		
   395  		
   396  		
   397  		
   398  		
   399  		
   400  		
   401  		
   402  		
   403  		
   404  		
   405  		
   406  		
   407  		
   408  		
   409  		
   410  		
   411  		
   412  		
   413  		
   414  		
   415  		
   416  		
   417  		
   418  		
   419  		
   420  		
   421  		
   422  		
   423  		
   424  		
   425  		
   426  		
   427  		
   428  		
   429  		
   430  		
   431  		
   432  		
   433  		
   434  		
   435  		flag := field[0]
   436  		switch flag[0] {
   437  		case '?', '&', '|', ';', '{', '}':
   438  			
   439  			
   440  			flag = flag[1:]
   441  			if flag == "" {
   442  				continue Reading
   443  			}
   444  		case ':':
   445  			var ok bool
   446  			if _, flag, ok = strings.Cut(flag[1:], ":"); !ok {
   447  				t.Logf("skip: %s", line)
   448  				continue Reading
   449  			}
   450  		case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   451  			t.Logf("skip: %s", line)
   452  			continue Reading
   453  		}
   454  
   455  		
   456  		if len(field) < 4 {
   457  			t.Errorf("%s:%d: too few fields: %s", file, lineno, line)
   458  			continue Reading
   459  		}
   460  
   461  		
   462  		if strings.Contains(flag, "$") {
   463  			f := `"` + field[1] + `"`
   464  			if field[1], err = strconv.Unquote(f); err != nil {
   465  				t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
   466  			}
   467  			f = `"` + field[2] + `"`
   468  			if field[2], err = strconv.Unquote(f); err != nil {
   469  				t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
   470  			}
   471  		}
   472  
   473  		
   474  		
   475  		
   476  		if field[1] == "SAME" {
   477  			field[1] = lastRegexp
   478  		}
   479  		lastRegexp = field[1]
   480  
   481  		
   482  		text := field[2]
   483  
   484  		
   485  		ok, shouldCompile, shouldMatch, pos := parseFowlerResult(field[3])
   486  		if !ok {
   487  			t.Errorf("%s:%d: cannot parse result %#q", file, lineno, field[3])
   488  			continue Reading
   489  		}
   490  
   491  		
   492  
   493  	Testing:
   494  		
   495  		for _, c := range flag {
   496  			pattern := field[1]
   497  			syn := syntax.POSIX | syntax.ClassNL
   498  			switch c {
   499  			default:
   500  				continue Testing
   501  			case 'E':
   502  				
   503  			case 'L':
   504  				
   505  				pattern = QuoteMeta(pattern)
   506  			}
   507  
   508  			for _, c := range flag {
   509  				switch c {
   510  				case 'i':
   511  					syn |= syntax.FoldCase
   512  				}
   513  			}
   514  
   515  			re, err := compile(pattern, syn, true)
   516  			if err != nil {
   517  				if shouldCompile {
   518  					t.Errorf("%s:%d: %#q did not compile", file, lineno, pattern)
   519  				}
   520  				continue Testing
   521  			}
   522  			if !shouldCompile {
   523  				t.Errorf("%s:%d: %#q should not compile", file, lineno, pattern)
   524  				continue Testing
   525  			}
   526  			match := re.MatchString(text)
   527  			if match != shouldMatch {
   528  				t.Errorf("%s:%d: %#q.Match(%#q) = %v, want %v", file, lineno, pattern, text, match, shouldMatch)
   529  				continue Testing
   530  			}
   531  			have := re.FindStringSubmatchIndex(text)
   532  			if (len(have) > 0) != match {
   533  				t.Errorf("%s:%d: %#q.Match(%#q) = %v, but %#q.FindSubmatchIndex(%#q) = %v", file, lineno, pattern, text, match, pattern, text, have)
   534  				continue Testing
   535  			}
   536  			if len(have) > len(pos) {
   537  				have = have[:len(pos)]
   538  			}
   539  			if !slices.Equal(have, pos) {
   540  				t.Errorf("%s:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, pattern, text, have, pos)
   541  			}
   542  		}
   543  	}
   544  }
   545  
   546  func parseFowlerResult(s string) (ok, compiled, matched bool, pos []int) {
   547  	
   548  	
   549  	
   550  	
   551  	
   552  	
   553  	
   554  	
   555  	
   556  	
   557  	
   558  	
   559  	
   560  	
   561  	switch {
   562  	case s == "":
   563  		
   564  		ok = true
   565  		compiled = true
   566  		matched = true
   567  		return
   568  	case s == "NOMATCH":
   569  		
   570  		ok = true
   571  		compiled = true
   572  		matched = false
   573  		return
   574  	case 'A' <= s[0] && s[0] <= 'Z':
   575  		
   576  		ok = true
   577  		compiled = false
   578  		return
   579  	}
   580  	compiled = true
   581  
   582  	var x []int
   583  	for s != "" {
   584  		var end byte = ')'
   585  		if len(x)%2 == 0 {
   586  			if s[0] != '(' {
   587  				ok = false
   588  				return
   589  			}
   590  			s = s[1:]
   591  			end = ','
   592  		}
   593  		i := 0
   594  		for i < len(s) && s[i] != end {
   595  			i++
   596  		}
   597  		if i == 0 || i == len(s) {
   598  			ok = false
   599  			return
   600  		}
   601  		var v = -1
   602  		var err error
   603  		if s[:i] != "?" {
   604  			v, err = strconv.Atoi(s[:i])
   605  			if err != nil {
   606  				ok = false
   607  				return
   608  			}
   609  		}
   610  		x = append(x, v)
   611  		s = s[i+1:]
   612  	}
   613  	if len(x)%2 != 0 {
   614  		ok = false
   615  		return
   616  	}
   617  	ok = true
   618  	matched = true
   619  	pos = x
   620  	return
   621  }
   622  
   623  var text []byte
   624  
   625  func makeText(n int) []byte {
   626  	if len(text) >= n {
   627  		return text[:n]
   628  	}
   629  	text = make([]byte, n)
   630  	x := ^uint32(0)
   631  	for i := range text {
   632  		x += x
   633  		x ^= 1
   634  		if int32(x) < 0 {
   635  			x ^= 0x88888eef
   636  		}
   637  		if x%31 == 0 {
   638  			text[i] = '\n'
   639  		} else {
   640  			text[i] = byte(x%(0x7E+1-0x20) + 0x20)
   641  		}
   642  	}
   643  	return text
   644  }
   645  
   646  func BenchmarkMatch(b *testing.B) {
   647  	isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race")
   648  
   649  	for _, data := range benchData {
   650  		r := MustCompile(data.re)
   651  		for _, size := range benchSizes {
   652  			if (isRaceBuilder || testing.Short()) && size.n > 1<<10 {
   653  				continue
   654  			}
   655  			t := makeText(size.n)
   656  			b.Run(data.name+"/"+size.name, func(b *testing.B) {
   657  				b.SetBytes(int64(size.n))
   658  				for i := 0; i < b.N; i++ {
   659  					if r.Match(t) {
   660  						b.Fatal("match!")
   661  					}
   662  				}
   663  			})
   664  		}
   665  	}
   666  }
   667  
   668  func BenchmarkMatch_onepass_regex(b *testing.B) {
   669  	isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race")
   670  	r := MustCompile(`(?s)\A.*\z`)
   671  	if r.onepass == nil {
   672  		b.Fatalf("want onepass regex, but %q is not onepass", r)
   673  	}
   674  	for _, size := range benchSizes {
   675  		if (isRaceBuilder || testing.Short()) && size.n > 1<<10 {
   676  			continue
   677  		}
   678  		t := makeText(size.n)
   679  		b.Run(size.name, func(b *testing.B) {
   680  			b.SetBytes(int64(size.n))
   681  			b.ReportAllocs()
   682  			for i := 0; i < b.N; i++ {
   683  				if !r.Match(t) {
   684  					b.Fatal("not match!")
   685  				}
   686  			}
   687  		})
   688  	}
   689  }
   690  
   691  var benchData = []struct{ name, re string }{
   692  	{"Easy0", "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
   693  	{"Easy0i", "(?i)ABCDEFGHIJklmnopqrstuvwxyz$"},
   694  	{"Easy1", "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"},
   695  	{"Medium", "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
   696  	{"Hard", "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
   697  	{"Hard1", "ABCD|CDEF|EFGH|GHIJ|IJKL|KLMN|MNOP|OPQR|QRST|STUV|UVWX|WXYZ"},
   698  }
   699  
   700  var benchSizes = []struct {
   701  	name string
   702  	n    int
   703  }{
   704  	{"16", 16},
   705  	{"32", 32},
   706  	{"1K", 1 << 10},
   707  	{"32K", 32 << 10},
   708  	{"1M", 1 << 20},
   709  	{"32M", 32 << 20},
   710  }
   711  
   712  func TestLongest(t *testing.T) {
   713  	re, err := Compile(`a(|b)`)
   714  	if err != nil {
   715  		t.Fatal(err)
   716  	}
   717  	if g, w := re.FindString("ab"), "a"; g != w {
   718  		t.Errorf("first match was %q, want %q", g, w)
   719  	}
   720  	re.Longest()
   721  	if g, w := re.FindString("ab"), "ab"; g != w {
   722  		t.Errorf("longest match was %q, want %q", g, w)
   723  	}
   724  }
   725  
   726  
   727  
   728  func TestProgramTooLongForBacktrack(t *testing.T) {
   729  	longRegex := MustCompile(`(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|twentyone|twentytwo|twentythree|twentyfour|twentyfive|twentysix|twentyseven|twentyeight|twentynine|thirty|thirtyone|thirtytwo|thirtythree|thirtyfour|thirtyfive|thirtysix|thirtyseven|thirtyeight|thirtynine|forty|fortyone|fortytwo|fortythree|fortyfour|fortyfive|fortysix|fortyseven|fortyeight|fortynine|fifty|fiftyone|fiftytwo|fiftythree|fiftyfour|fiftyfive|fiftysix|fiftyseven|fiftyeight|fiftynine|sixty|sixtyone|sixtytwo|sixtythree|sixtyfour|sixtyfive|sixtysix|sixtyseven|sixtyeight|sixtynine|seventy|seventyone|seventytwo|seventythree|seventyfour|seventyfive|seventysix|seventyseven|seventyeight|seventynine|eighty|eightyone|eightytwo|eightythree|eightyfour|eightyfive|eightysix|eightyseven|eightyeight|eightynine|ninety|ninetyone|ninetytwo|ninetythree|ninetyfour|ninetyfive|ninetysix|ninetyseven|ninetyeight|ninetynine|onehundred)`)
   730  	if !longRegex.MatchString("two") {
   731  		t.Errorf("longRegex.MatchString(\"two\") was false, want true")
   732  	}
   733  	if longRegex.MatchString("xxx") {
   734  		t.Errorf("longRegex.MatchString(\"xxx\") was true, want false")
   735  	}
   736  }
   737  
View as plain text