Source file src/net/http/pattern.go

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Patterns for ServeMux routing.
     6  
     7  package http
     8  
     9  import (
    10  	"errors"
    11  	"fmt"
    12  	"net/url"
    13  	"strings"
    14  	"unicode"
    15  )
    16  
    17  // A pattern is something that can be matched against an HTTP request.
    18  // It has an optional method, an optional host, and a path.
    19  type pattern struct {
    20  	str    string // original string
    21  	method string
    22  	host   string
    23  	// The representation of a path differs from the surface syntax, which
    24  	// simplifies most algorithms.
    25  	//
    26  	// Paths ending in '/' are represented with an anonymous "..." wildcard.
    27  	// For example, the path "a/" is represented as a literal segment "a" followed
    28  	// by a segment with multi==true.
    29  	//
    30  	// Paths ending in "{$}" are represented with the literal segment "/".
    31  	// For example, the path "a/{$}" is represented as a literal segment "a" followed
    32  	// by a literal segment "/".
    33  	segments []segment
    34  	loc      string // source location of registering call, for helpful messages
    35  }
    36  
    37  func (p *pattern) String() string { return p.str }
    38  
    39  func (p *pattern) lastSegment() segment {
    40  	return p.segments[len(p.segments)-1]
    41  }
    42  
    43  // A segment is a pattern piece that matches one or more path segments, or
    44  // a trailing slash.
    45  //
    46  // If wild is false, it matches a literal segment, or, if s == "/", a trailing slash.
    47  // Examples:
    48  //
    49  //	"a" => segment{s: "a"}
    50  //	"/{$}" => segment{s: "/"}
    51  //
    52  // If wild is true and multi is false, it matches a single path segment.
    53  // Example:
    54  //
    55  //	"{x}" => segment{s: "x", wild: true}
    56  //
    57  // If both wild and multi are true, it matches all remaining path segments.
    58  // Example:
    59  //
    60  //	"{rest...}" => segment{s: "rest", wild: true, multi: true}
    61  type segment struct {
    62  	s     string // literal or wildcard name or "/" for "/{$}".
    63  	wild  bool
    64  	multi bool // "..." wildcard
    65  }
    66  
    67  // parsePattern parses a string into a Pattern.
    68  // The string's syntax is
    69  //
    70  //	[METHOD] [HOST]/[PATH]
    71  //
    72  // where:
    73  //   - METHOD is an HTTP method
    74  //   - HOST is a hostname
    75  //   - PATH consists of slash-separated segments, where each segment is either
    76  //     a literal or a wildcard of the form "{name}", "{name...}", or "{$}".
    77  //
    78  // METHOD, HOST and PATH are all optional; that is, the string can be "/".
    79  // If METHOD is present, it must be followed by at least one space or tab.
    80  // Wildcard names must be valid Go identifiers.
    81  // The "{$}" and "{name...}" wildcard must occur at the end of PATH.
    82  // PATH may end with a '/'.
    83  // Wildcard names in a path must be distinct.
    84  func parsePattern(s string) (_ *pattern, err error) {
    85  	if len(s) == 0 {
    86  		return nil, errors.New("empty pattern")
    87  	}
    88  	off := 0 // offset into string
    89  	defer func() {
    90  		if err != nil {
    91  			err = fmt.Errorf("at offset %d: %w", off, err)
    92  		}
    93  	}()
    94  
    95  	method, rest, found := s, "", false
    96  	if i := strings.IndexAny(s, " \t"); i >= 0 {
    97  		method, rest, found = s[:i], strings.TrimLeft(s[i+1:], " \t"), true
    98  	}
    99  	if !found {
   100  		rest = method
   101  		method = ""
   102  	}
   103  	if method != "" && !validMethod(method) {
   104  		return nil, fmt.Errorf("invalid method %q", method)
   105  	}
   106  	p := &pattern{str: s, method: method}
   107  
   108  	if found {
   109  		off = len(method) + 1
   110  	}
   111  	i := strings.IndexByte(rest, '/')
   112  	if i < 0 {
   113  		return nil, errors.New("host/path missing /")
   114  	}
   115  	p.host = rest[:i]
   116  	rest = rest[i:]
   117  	if j := strings.IndexByte(p.host, '{'); j >= 0 {
   118  		off += j
   119  		return nil, errors.New("host contains '{' (missing initial '/'?)")
   120  	}
   121  	// At this point, rest is the path.
   122  	off += i
   123  
   124  	// An unclean path with a method that is not CONNECT can never match,
   125  	// because paths are cleaned before matching.
   126  	if method != "" && method != "CONNECT" && rest != cleanPath(rest) {
   127  		return nil, errors.New("non-CONNECT pattern with unclean path can never match")
   128  	}
   129  
   130  	seenNames := map[string]bool{} // remember wildcard names to catch dups
   131  	for len(rest) > 0 {
   132  		// Invariant: rest[0] == '/'.
   133  		rest = rest[1:]
   134  		off = len(s) - len(rest)
   135  		if len(rest) == 0 {
   136  			// Trailing slash.
   137  			p.segments = append(p.segments, segment{wild: true, multi: true})
   138  			break
   139  		}
   140  		i := strings.IndexByte(rest, '/')
   141  		if i < 0 {
   142  			i = len(rest)
   143  		}
   144  		var seg string
   145  		seg, rest = rest[:i], rest[i:]
   146  		if i := strings.IndexByte(seg, '{'); i < 0 {
   147  			// Literal.
   148  			seg = pathUnescape(seg)
   149  			p.segments = append(p.segments, segment{s: seg})
   150  		} else {
   151  			// Wildcard.
   152  			if i != 0 {
   153  				return nil, errors.New("bad wildcard segment (must start with '{')")
   154  			}
   155  			if seg[len(seg)-1] != '}' {
   156  				return nil, errors.New("bad wildcard segment (must end with '}')")
   157  			}
   158  			name := seg[1 : len(seg)-1]
   159  			if name == "$" {
   160  				if len(rest) != 0 {
   161  					return nil, errors.New("{$} not at end")
   162  				}
   163  				p.segments = append(p.segments, segment{s: "/"})
   164  				break
   165  			}
   166  			name, multi := strings.CutSuffix(name, "...")
   167  			if multi && len(rest) != 0 {
   168  				return nil, errors.New("{...} wildcard not at end")
   169  			}
   170  			if name == "" {
   171  				return nil, errors.New("empty wildcard")
   172  			}
   173  			if !isValidWildcardName(name) {
   174  				return nil, fmt.Errorf("bad wildcard name %q", name)
   175  			}
   176  			if seenNames[name] {
   177  				return nil, fmt.Errorf("duplicate wildcard name %q", name)
   178  			}
   179  			seenNames[name] = true
   180  			p.segments = append(p.segments, segment{s: name, wild: true, multi: multi})
   181  		}
   182  	}
   183  	return p, nil
   184  }
   185  
   186  func isValidWildcardName(s string) bool {
   187  	if s == "" {
   188  		return false
   189  	}
   190  	// Valid Go identifier.
   191  	for i, c := range s {
   192  		if !unicode.IsLetter(c) && c != '_' && (i == 0 || !unicode.IsDigit(c)) {
   193  			return false
   194  		}
   195  	}
   196  	return true
   197  }
   198  
   199  func pathUnescape(path string) string {
   200  	u, err := url.PathUnescape(path)
   201  	if err != nil {
   202  		// Invalidly escaped path; use the original
   203  		return path
   204  	}
   205  	return u
   206  }
   207  
   208  // relationship is a relationship between two patterns, p1 and p2.
   209  type relationship string
   210  
   211  const (
   212  	equivalent   relationship = "equivalent"   // both match the same requests
   213  	moreGeneral  relationship = "moreGeneral"  // p1 matches everything p2 does & more
   214  	moreSpecific relationship = "moreSpecific" // p2 matches everything p1 does & more
   215  	disjoint     relationship = "disjoint"     // there is no request that both match
   216  	overlaps     relationship = "overlaps"     // there is a request that both match, but neither is more specific
   217  )
   218  
   219  // conflictsWith reports whether p1 conflicts with p2, that is, whether
   220  // there is a request that both match but where neither is higher precedence
   221  // than the other.
   222  //
   223  //	Precedence is defined by two rules:
   224  //	1. Patterns with a host win over patterns without a host.
   225  //	2. Patterns whose method and path is more specific win. One pattern is more
   226  //	   specific than another if the second matches all the (method, path) pairs
   227  //	   of the first and more.
   228  //
   229  // If rule 1 doesn't apply, then two patterns conflict if their relationship
   230  // is either equivalence (they match the same set of requests) or overlap
   231  // (they both match some requests, but neither is more specific than the other).
   232  func (p1 *pattern) conflictsWith(p2 *pattern) bool {
   233  	if p1.host != p2.host {
   234  		// Either one host is empty and the other isn't, in which case the
   235  		// one with the host wins by rule 1, or neither host is empty
   236  		// and they differ, so they won't match the same paths.
   237  		return false
   238  	}
   239  	rel := p1.comparePathsAndMethods(p2)
   240  	return rel == equivalent || rel == overlaps
   241  }
   242  
   243  func (p1 *pattern) comparePathsAndMethods(p2 *pattern) relationship {
   244  	mrel := p1.compareMethods(p2)
   245  	// Optimization: avoid a call to comparePaths.
   246  	if mrel == disjoint {
   247  		return disjoint
   248  	}
   249  	prel := p1.comparePaths(p2)
   250  	return combineRelationships(mrel, prel)
   251  }
   252  
   253  // compareMethods determines the relationship between the method
   254  // part of patterns p1 and p2.
   255  //
   256  // A method can either be empty, "GET", or something else.
   257  // The empty string matches any method, so it is the most general.
   258  // "GET" matches both GET and HEAD.
   259  // Anything else matches only itself.
   260  func (p1 *pattern) compareMethods(p2 *pattern) relationship {
   261  	if p1.method == p2.method {
   262  		return equivalent
   263  	}
   264  	if p1.method == "" {
   265  		// p1 matches any method, but p2 does not, so p1 is more general.
   266  		return moreGeneral
   267  	}
   268  	if p2.method == "" {
   269  		return moreSpecific
   270  	}
   271  	if p1.method == "GET" && p2.method == "HEAD" {
   272  		// p1 matches GET and HEAD; p2 matches only HEAD.
   273  		return moreGeneral
   274  	}
   275  	if p2.method == "GET" && p1.method == "HEAD" {
   276  		return moreSpecific
   277  	}
   278  	return disjoint
   279  }
   280  
   281  // comparePaths determines the relationship between the path
   282  // part of two patterns.
   283  func (p1 *pattern) comparePaths(p2 *pattern) relationship {
   284  	// Optimization: if a path pattern doesn't end in a multi ("...") wildcard, then it
   285  	// can only match paths with the same number of segments.
   286  	if len(p1.segments) != len(p2.segments) && !p1.lastSegment().multi && !p2.lastSegment().multi {
   287  		return disjoint
   288  	}
   289  
   290  	// Consider corresponding segments in the two path patterns.
   291  	var segs1, segs2 []segment
   292  	rel := equivalent
   293  	for segs1, segs2 = p1.segments, p2.segments; len(segs1) > 0 && len(segs2) > 0; segs1, segs2 = segs1[1:], segs2[1:] {
   294  		rel = combineRelationships(rel, compareSegments(segs1[0], segs2[0]))
   295  		if rel == disjoint {
   296  			return rel
   297  		}
   298  	}
   299  	// We've reached the end of the corresponding segments of the patterns.
   300  	// If they have the same number of segments, then we've already determined
   301  	// their relationship.
   302  	if len(segs1) == 0 && len(segs2) == 0 {
   303  		return rel
   304  	}
   305  	// Otherwise, the only way they could fail to be disjoint is if the shorter
   306  	// pattern ends in a multi. In that case, that multi is more general
   307  	// than the remainder of the longer pattern, so combine those two relationships.
   308  	if len(segs1) < len(segs2) && p1.lastSegment().multi {
   309  		return combineRelationships(rel, moreGeneral)
   310  	}
   311  	if len(segs2) < len(segs1) && p2.lastSegment().multi {
   312  		return combineRelationships(rel, moreSpecific)
   313  	}
   314  	return disjoint
   315  }
   316  
   317  // compareSegments determines the relationship between two segments.
   318  func compareSegments(s1, s2 segment) relationship {
   319  	if s1.multi && s2.multi {
   320  		return equivalent
   321  	}
   322  	if s1.multi {
   323  		return moreGeneral
   324  	}
   325  	if s2.multi {
   326  		return moreSpecific
   327  	}
   328  	if s1.wild && s2.wild {
   329  		return equivalent
   330  	}
   331  	if s1.wild {
   332  		if s2.s == "/" {
   333  			// A single wildcard doesn't match a trailing slash.
   334  			return disjoint
   335  		}
   336  		return moreGeneral
   337  	}
   338  	if s2.wild {
   339  		if s1.s == "/" {
   340  			return disjoint
   341  		}
   342  		return moreSpecific
   343  	}
   344  	// Both literals.
   345  	if s1.s == s2.s {
   346  		return equivalent
   347  	}
   348  	return disjoint
   349  }
   350  
   351  // combineRelationships determines the overall relationship of two patterns
   352  // given the relationships of a partition of the patterns into two parts.
   353  //
   354  // For example, if p1 is more general than p2 in one way but equivalent
   355  // in the other, then it is more general overall.
   356  //
   357  // Or if p1 is more general in one way and more specific in the other, then
   358  // they overlap.
   359  func combineRelationships(r1, r2 relationship) relationship {
   360  	switch r1 {
   361  	case equivalent:
   362  		return r2
   363  	case disjoint:
   364  		return disjoint
   365  	case overlaps:
   366  		if r2 == disjoint {
   367  			return disjoint
   368  		}
   369  		return overlaps
   370  	case moreGeneral, moreSpecific:
   371  		switch r2 {
   372  		case equivalent:
   373  			return r1
   374  		case inverseRelationship(r1):
   375  			return overlaps
   376  		default:
   377  			return r2
   378  		}
   379  	default:
   380  		panic(fmt.Sprintf("unknown relationship %q", r1))
   381  	}
   382  }
   383  
   384  // If p1 has relationship `r` to p2, then
   385  // p2 has inverseRelationship(r) to p1.
   386  func inverseRelationship(r relationship) relationship {
   387  	switch r {
   388  	case moreSpecific:
   389  		return moreGeneral
   390  	case moreGeneral:
   391  		return moreSpecific
   392  	default:
   393  		return r
   394  	}
   395  }
   396  
   397  // isLitOrSingle reports whether the segment is a non-dollar literal or a single wildcard.
   398  func isLitOrSingle(seg segment) bool {
   399  	if seg.wild {
   400  		return !seg.multi
   401  	}
   402  	return seg.s != "/"
   403  }
   404  
   405  // describeConflict returns an explanation of why two patterns conflict.
   406  func describeConflict(p1, p2 *pattern) string {
   407  	mrel := p1.compareMethods(p2)
   408  	prel := p1.comparePaths(p2)
   409  	rel := combineRelationships(mrel, prel)
   410  	if rel == equivalent {
   411  		return fmt.Sprintf("%s matches the same requests as %s", p1, p2)
   412  	}
   413  	if rel != overlaps {
   414  		panic("describeConflict called with non-conflicting patterns")
   415  	}
   416  	if prel == overlaps {
   417  		return fmt.Sprintf(`%[1]s and %[2]s both match some paths, like %[3]q.
   418  But neither is more specific than the other.
   419  %[1]s matches %[4]q, but %[2]s doesn't.
   420  %[2]s matches %[5]q, but %[1]s doesn't.`,
   421  			p1, p2, commonPath(p1, p2), differencePath(p1, p2), differencePath(p2, p1))
   422  	}
   423  	if mrel == moreGeneral && prel == moreSpecific {
   424  		return fmt.Sprintf("%s matches more methods than %s, but has a more specific path pattern", p1, p2)
   425  	}
   426  	if mrel == moreSpecific && prel == moreGeneral {
   427  		return fmt.Sprintf("%s matches fewer methods than %s, but has a more general path pattern", p1, p2)
   428  	}
   429  	return fmt.Sprintf("bug: unexpected way for two patterns %s and %s to conflict: methods %s, paths %s", p1, p2, mrel, prel)
   430  }
   431  
   432  // writeMatchingPath writes to b a path that matches the segments.
   433  func writeMatchingPath(b *strings.Builder, segs []segment) {
   434  	for _, s := range segs {
   435  		writeSegment(b, s)
   436  	}
   437  }
   438  
   439  func writeSegment(b *strings.Builder, s segment) {
   440  	b.WriteByte('/')
   441  	if !s.multi && s.s != "/" {
   442  		b.WriteString(s.s)
   443  	}
   444  }
   445  
   446  // commonPath returns a path that both p1 and p2 match.
   447  // It assumes there is such a path.
   448  func commonPath(p1, p2 *pattern) string {
   449  	var b strings.Builder
   450  	var segs1, segs2 []segment
   451  	for segs1, segs2 = p1.segments, p2.segments; len(segs1) > 0 && len(segs2) > 0; segs1, segs2 = segs1[1:], segs2[1:] {
   452  		if s1 := segs1[0]; s1.wild {
   453  			writeSegment(&b, segs2[0])
   454  		} else {
   455  			writeSegment(&b, s1)
   456  		}
   457  	}
   458  	if len(segs1) > 0 {
   459  		writeMatchingPath(&b, segs1)
   460  	} else if len(segs2) > 0 {
   461  		writeMatchingPath(&b, segs2)
   462  	}
   463  	return b.String()
   464  }
   465  
   466  // differencePath returns a path that p1 matches and p2 doesn't.
   467  // It assumes there is such a path.
   468  func differencePath(p1, p2 *pattern) string {
   469  	var b strings.Builder
   470  
   471  	var segs1, segs2 []segment
   472  	for segs1, segs2 = p1.segments, p2.segments; len(segs1) > 0 && len(segs2) > 0; segs1, segs2 = segs1[1:], segs2[1:] {
   473  		s1 := segs1[0]
   474  		s2 := segs2[0]
   475  		if s1.multi && s2.multi {
   476  			// From here the patterns match the same paths, so we must have found a difference earlier.
   477  			b.WriteByte('/')
   478  			return b.String()
   479  
   480  		}
   481  		if s1.multi && !s2.multi {
   482  			// s1 ends in a "..." wildcard but s2 does not.
   483  			// A trailing slash will distinguish them, unless s2 ends in "{$}",
   484  			// in which case any segment will do; prefer the wildcard name if
   485  			// it has one.
   486  			b.WriteByte('/')
   487  			if s2.s == "/" {
   488  				if s1.s != "" {
   489  					b.WriteString(s1.s)
   490  				} else {
   491  					b.WriteString("x")
   492  				}
   493  			}
   494  			return b.String()
   495  		}
   496  		if !s1.multi && s2.multi {
   497  			writeSegment(&b, s1)
   498  		} else if s1.wild && s2.wild {
   499  			// Both patterns will match whatever we put here; use
   500  			// the first wildcard name.
   501  			writeSegment(&b, s1)
   502  		} else if s1.wild && !s2.wild {
   503  			// s1 is a wildcard, s2 is a literal.
   504  			// Any segment other than s2.s will work.
   505  			// Prefer the wildcard name, but if it's the same as the literal,
   506  			// tweak the literal.
   507  			if s1.s != s2.s {
   508  				writeSegment(&b, s1)
   509  			} else {
   510  				b.WriteByte('/')
   511  				b.WriteString(s2.s + "x")
   512  			}
   513  		} else if !s1.wild && s2.wild {
   514  			writeSegment(&b, s1)
   515  		} else {
   516  			// Both are literals. A precondition of this function is that the
   517  			// patterns overlap, so they must be the same literal. Use it.
   518  			if s1.s != s2.s {
   519  				panic(fmt.Sprintf("literals differ: %q and %q", s1.s, s2.s))
   520  			}
   521  			writeSegment(&b, s1)
   522  		}
   523  	}
   524  	if len(segs1) > 0 {
   525  		// p1 is longer than p2, and p2 does not end in a multi.
   526  		// Anything that matches the rest of p1 will do.
   527  		writeMatchingPath(&b, segs1)
   528  	} else if len(segs2) > 0 {
   529  		writeMatchingPath(&b, segs2)
   530  	}
   531  	return b.String()
   532  }
   533  

View as plain text