position.go

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package token
     6  
     7  import (
     8  	"cmp"
     9  	"fmt"
    10  	"slices"
    11  	"strconv"
    12  	"sync"
    13  	"sync/atomic"
    14  )
    15  
    16  // If debug is set, invalid offset and position values cause a panic
    17  // (go.dev/issue/57490).
    18  const debug = false
    19  
    20  // -----------------------------------------------------------------------------
    21  // Positions
    22  
    23  // Position describes an arbitrary source position
    24  // including the file, line, and column location.
    25  // A Position is valid if the line number is > 0.
    26  type Position struct {
    27  	Filename string // filename, if any
    28  	Offset   int    // offset, starting at 0
    29  	Line     int    // line number, starting at 1
    30  	Column   int    // column number, starting at 1 (byte count)
    31  }
    32  
    33  // IsValid reports whether the position is valid.
    34  func (pos *Position) IsValid() bool { return pos.Line > 0 }
    35  
    36  // String returns a string in one of several forms:
    37  //
    38  //	file:line:column    valid position with file name
    39  //	file:line           valid position with file name but no column (column == 0)
    40  //	line:column         valid position without file name
    41  //	line                valid position without file name and no column (column == 0)
    42  //	file                invalid position with file name
    43  //	-                   invalid position without file name
    44  func (pos Position) String() string {
    45  	s := pos.Filename
    46  	if pos.IsValid() {
    47  		if s != "" {
    48  			s += ":"
    49  		}
    50  		s += strconv.Itoa(pos.Line)
    51  		if pos.Column != 0 {
    52  			s += fmt.Sprintf(":%d", pos.Column)
    53  		}
    54  	}
    55  	if s == "" {
    56  		s = "-"
    57  	}
    58  	return s
    59  }
    60  
    61  // Pos is a compact encoding of a source position within a file set.
    62  // It can be converted into a [Position] for a more convenient, but much
    63  // larger, representation.
    64  //
    65  // The Pos value for a given file is a number in the range [base, base+size],
    66  // where base and size are specified when a file is added to the file set.
    67  // The difference between a Pos value and the corresponding file base
    68  // corresponds to the byte offset of that position (represented by the Pos value)
    69  // from the beginning of the file. Thus, the file base offset is the Pos value
    70  // representing the first byte in the file.
    71  //
    72  // To create the Pos value for a specific source offset (measured in bytes),
    73  // first add the respective file to the current file set using [FileSet.AddFile]
    74  // and then call [File.Pos](offset) for that file. Given a Pos value p
    75  // for a specific file set fset, the corresponding [Position] value is
    76  // obtained by calling fset.Position(p).
    77  //
    78  // Pos values can be compared directly with the usual comparison operators:
    79  // If two Pos values p and q are in the same file, comparing p and q is
    80  // equivalent to comparing the respective source file offsets. If p and q
    81  // are in different files, p < q is true if the file implied by p was added
    82  // to the respective file set before the file implied by q.
    83  type Pos int
    84  
    85  // The zero value for [Pos] is NoPos; there is no file and line information
    86  // associated with it, and NoPos.IsValid() is false. NoPos is always
    87  // smaller than any other [Pos] value. The corresponding [Position] value
    88  // for NoPos is the zero value for [Position].
    89  const NoPos Pos = 0
    90  
    91  // IsValid reports whether the position is valid.
    92  func (p Pos) IsValid() bool {
    93  	return p != NoPos
    94  }
    95  
    96  // -----------------------------------------------------------------------------
    97  // File
    98  
    99  // A File is a handle for a file belonging to a [FileSet].
   100  // A File has a name, size, and line offset table.
   101  //
   102  // Use [FileSet.AddFile] to create a File.
   103  // A File may belong to more than one FileSet; see [FileSet.AddExistingFiles].
   104  type File struct {
   105  	name string // file name as provided to AddFile
   106  	base int    // Pos value range for this file is [base...base+size]
   107  	size int    // file size as provided to AddFile
   108  
   109  	// lines and infos are protected by mutex
   110  	mutex sync.Mutex
   111  	lines []int // lines contains the offset of the first character for each line (the first entry is always 0)
   112  	infos []lineInfo
   113  }
   114  
   115  // Name returns the file name of file f as registered with AddFile.
   116  func (f *File) Name() string {
   117  	return f.name
   118  }
   119  
   120  // Base returns the base offset of file f as registered with AddFile.
   121  func (f *File) Base() int {
   122  	return f.base
   123  }
   124  
   125  // Size returns the size of file f as registered with AddFile.
   126  func (f *File) Size() int {
   127  	return f.size
   128  }
   129  
   130  // LineCount returns the number of lines in file f.
   131  func (f *File) LineCount() int {
   132  	f.mutex.Lock()
   133  	n := len(f.lines)
   134  	f.mutex.Unlock()
   135  	return n
   136  }
   137  
   138  // AddLine adds the line offset for a new line.
   139  // The line offset must be larger than the offset for the previous line
   140  // and smaller than the file size; otherwise the line offset is ignored.
   141  func (f *File) AddLine(offset int) {
   142  	f.mutex.Lock()
   143  	if i := len(f.lines); (i == 0 || f.lines[i-1] < offset) && offset < f.size {
   144  		f.lines = append(f.lines, offset)
   145  	}
   146  	f.mutex.Unlock()
   147  }
   148  
   149  // MergeLine merges a line with the following line. It is akin to replacing
   150  // the newline character at the end of the line with a space (to not change the
   151  // remaining offsets). To obtain the line number, consult e.g. [Position.Line].
   152  // MergeLine will panic if given an invalid line number.
   153  func (f *File) MergeLine(line int) {
   154  	if line < 1 {
   155  		panic(fmt.Sprintf("invalid line number %d (should be >= 1)", line))
   156  	}
   157  	f.mutex.Lock()
   158  	defer f.mutex.Unlock()
   159  	if line >= len(f.lines) {
   160  		panic(fmt.Sprintf("invalid line number %d (should be < %d)", line, len(f.lines)))
   161  	}
   162  	// To merge the line numbered <line> with the line numbered <line+1>,
   163  	// we need to remove the entry in lines corresponding to the line
   164  	// numbered <line+1>. The entry in lines corresponding to the line
   165  	// numbered <line+1> is located at index <line>, since indices in lines
   166  	// are 0-based and line numbers are 1-based.
   167  	copy(f.lines[line:], f.lines[line+1:])
   168  	f.lines = f.lines[:len(f.lines)-1]
   169  }
   170  
   171  // Lines returns the effective line offset table of the form described by [File.SetLines].
   172  // Callers must not mutate the result.
   173  func (f *File) Lines() []int {
   174  	f.mutex.Lock()
   175  	lines := f.lines
   176  	f.mutex.Unlock()
   177  	return lines
   178  }
   179  
   180  // SetLines sets the line offsets for a file and reports whether it succeeded.
   181  // The line offsets are the offsets of the first character of each line;
   182  // for instance for the content "ab\nc\n" the line offsets are {0, 3}.
   183  // An empty file has an empty line offset table.
   184  // Each line offset must be larger than the offset for the previous line
   185  // and smaller than the file size; otherwise SetLines fails and returns
   186  // false.
   187  // Callers must not mutate the provided slice after SetLines returns.
   188  func (f *File) SetLines(lines []int) bool {
   189  	// verify validity of lines table
   190  	size := f.size
   191  	for i, offset := range lines {
   192  		if i > 0 && offset <= lines[i-1] || size <= offset {
   193  			return false
   194  		}
   195  	}
   196  
   197  	// set lines table
   198  	f.mutex.Lock()
   199  	f.lines = lines
   200  	f.mutex.Unlock()
   201  	return true
   202  }
   203  
   204  // SetLinesForContent sets the line offsets for the given file content.
   205  // It ignores position-altering //line comments.
   206  func (f *File) SetLinesForContent(content []byte) {
   207  	var lines []int
   208  	line := 0
   209  	for offset, b := range content {
   210  		if line >= 0 {
   211  			lines = append(lines, line)
   212  		}
   213  		line = -1
   214  		if b == '\n' {
   215  			line = offset + 1
   216  		}
   217  	}
   218  
   219  	// set lines table
   220  	f.mutex.Lock()
   221  	f.lines = lines
   222  	f.mutex.Unlock()
   223  }
   224  
   225  // LineStart returns the [Pos] value of the start of the specified line.
   226  // It ignores any alternative positions set using [File.AddLineColumnInfo].
   227  // LineStart panics if the 1-based line number is invalid.
   228  func (f *File) LineStart(line int) Pos {
   229  	if line < 1 {
   230  		panic(fmt.Sprintf("invalid line number %d (should be >= 1)", line))
   231  	}
   232  	f.mutex.Lock()
   233  	defer f.mutex.Unlock()
   234  	if line > len(f.lines) {
   235  		panic(fmt.Sprintf("invalid line number %d (should be < %d)", line, len(f.lines)))
   236  	}
   237  	return Pos(f.base + f.lines[line-1])
   238  }
   239  
   240  // A lineInfo object describes alternative file, line, and column
   241  // number information (such as provided via a //line directive)
   242  // for a given file offset.
   243  type lineInfo struct {
   244  	// fields are exported to make them accessible to gob
   245  	Offset       int
   246  	Filename     string
   247  	Line, Column int
   248  }
   249  
   250  // AddLineInfo is like [File.AddLineColumnInfo] with a column = 1 argument.
   251  // It is here for backward-compatibility for code prior to Go 1.11.
   252  func (f *File) AddLineInfo(offset int, filename string, line int) {
   253  	f.AddLineColumnInfo(offset, filename, line, 1)
   254  }
   255  
   256  // AddLineColumnInfo adds alternative file, line, and column number
   257  // information for a given file offset. The offset must be larger
   258  // than the offset for the previously added alternative line info
   259  // and smaller than the file size; otherwise the information is
   260  // ignored.
   261  //
   262  // AddLineColumnInfo is typically used to register alternative position
   263  // information for line directives such as //line filename:line:column.
   264  func (f *File) AddLineColumnInfo(offset int, filename string, line, column int) {
   265  	f.mutex.Lock()
   266  	if i := len(f.infos); (i == 0 || f.infos[i-1].Offset < offset) && offset < f.size {
   267  		f.infos = append(f.infos, lineInfo{offset, filename, line, column})
   268  	}
   269  	f.mutex.Unlock()
   270  }
   271  
   272  // fixOffset fixes an out-of-bounds offset such that 0 <= offset <= f.size.
   273  func (f *File) fixOffset(offset int) int {
   274  	switch {
   275  	case offset < 0:
   276  		if !debug {
   277  			return 0
   278  		}
   279  	case offset > f.size:
   280  		if !debug {
   281  			return f.size
   282  		}
   283  	default:
   284  		return offset
   285  	}
   286  
   287  	// only generate this code if needed
   288  	if debug {
   289  		panic(fmt.Sprintf("offset %d out of bounds [%d, %d] (position %d out of bounds [%d, %d])",
   290  			0 /* for symmetry */, offset, f.size,
   291  			f.base+offset, f.base, f.base+f.size))
   292  	}
   293  	return 0
   294  }
   295  
   296  // Pos returns the Pos value for the given file offset.
   297  //
   298  // If offset is negative, the result is the file's start
   299  // position; if the offset is too large, the result is
   300  // the file's end position (see also go.dev/issue/57490).
   301  //
   302  // The following invariant, though not true for Pos values
   303  // in general, holds for the result p:
   304  // f.Pos(f.Offset(p)) == p.
   305  func (f *File) Pos(offset int) Pos {
   306  	return Pos(f.base + f.fixOffset(offset))
   307  }
   308  
   309  // Offset returns the offset for the given file position p.
   310  //
   311  // If p is before the file's start position (or if p is NoPos),
   312  // the result is 0; if p is past the file's end position,
   313  // the result is the file size (see also go.dev/issue/57490).
   314  //
   315  // The following invariant, though not true for offset values
   316  // in general, holds for the result offset:
   317  // f.Offset(f.Pos(offset)) == offset
   318  func (f *File) Offset(p Pos) int {
   319  	return f.fixOffset(int(p) - f.base)
   320  }
   321  
   322  // Line returns the line number for the given file position p;
   323  // p must be a [Pos] value in that file or [NoPos].
   324  func (f *File) Line(p Pos) int {
   325  	return f.Position(p).Line
   326  }
   327  
   328  func searchLineInfos(a []lineInfo, x int) int {
   329  	i, found := slices.BinarySearchFunc(a, x, func(a lineInfo, x int) int {
   330  		return cmp.Compare(a.Offset, x)
   331  	})
   332  	if !found {
   333  		// We want the lineInfo containing x, but if we didn't
   334  		// find x then i is the next one.
   335  		i--
   336  	}
   337  	return i
   338  }
   339  
   340  // unpack returns the filename and line and column number for a file offset.
   341  // If adjusted is set, unpack will return the filename and line information
   342  // possibly adjusted by //line comments; otherwise those comments are ignored.
   343  func (f *File) unpack(offset int, adjusted bool) (filename string, line, column int) {
   344  	f.mutex.Lock()
   345  	filename = f.name
   346  	if i := searchInts(f.lines, offset); i >= 0 {
   347  		line, column = i+1, offset-f.lines[i]+1
   348  	}
   349  	if adjusted && len(f.infos) > 0 {
   350  		// few files have extra line infos
   351  		if i := searchLineInfos(f.infos, offset); i >= 0 {
   352  			alt := &f.infos[i]
   353  			filename = alt.Filename
   354  			if i := searchInts(f.lines, alt.Offset); i >= 0 {
   355  				// i+1 is the line at which the alternative position was recorded
   356  				d := line - (i + 1) // line distance from alternative position base
   357  				line = alt.Line + d
   358  				if alt.Column == 0 {
   359  					// alternative column is unknown => relative column is unknown
   360  					// (the current specification for line directives requires
   361  					// this to apply until the next PosBase/line directive,
   362  					// not just until the new newline)
   363  					column = 0
   364  				} else if d == 0 {
   365  					// the alternative position base is on the current line
   366  					// => column is relative to alternative column
   367  					column = alt.Column + (offset - alt.Offset)
   368  				}
   369  			}
   370  		}
   371  	}
   372  	// TODO(mvdan): move Unlock back under Lock with a defer statement once
   373  	// https://go.dev/issue/38471 is fixed to remove the performance penalty.
   374  	f.mutex.Unlock()
   375  	return
   376  }
   377  
   378  func (f *File) position(p Pos, adjusted bool) (pos Position) {
   379  	offset := f.fixOffset(int(p) - f.base)
   380  	pos.Offset = offset
   381  	pos.Filename, pos.Line, pos.Column = f.unpack(offset, adjusted)
   382  	return
   383  }
   384  
   385  // PositionFor returns the Position value for the given file position p.
   386  // If p is out of bounds, it is adjusted to match the File.Offset behavior.
   387  // If adjusted is set, the position may be adjusted by position-altering
   388  // //line comments; otherwise those comments are ignored.
   389  // p must be a Pos value in f or NoPos.
   390  func (f *File) PositionFor(p Pos, adjusted bool) (pos Position) {
   391  	if p != NoPos {
   392  		pos = f.position(p, adjusted)
   393  	}
   394  	return
   395  }
   396  
   397  // Position returns the Position value for the given file position p.
   398  // If p is out of bounds, it is adjusted to match the File.Offset behavior.
   399  // Calling f.Position(p) is equivalent to calling f.PositionFor(p, true).
   400  func (f *File) Position(p Pos) (pos Position) {
   401  	return f.PositionFor(p, true)
   402  }
   403  
   404  // -----------------------------------------------------------------------------
   405  // FileSet
   406  
   407  // A FileSet represents a set of source files.
   408  // Methods of file sets are synchronized; multiple goroutines
   409  // may invoke them concurrently.
   410  //
   411  // The byte offsets for each file in a file set are mapped into
   412  // distinct (integer) intervals, one interval [base, base+size]
   413  // per file. [FileSet.Base] represents the first byte in the file, and size
   414  // is the corresponding file size. A [Pos] value is a value in such
   415  // an interval. By determining the interval a [Pos] value belongs
   416  // to, the file, its file base, and thus the byte offset (position)
   417  // the [Pos] value is representing can be computed.
   418  //
   419  // When adding a new file, a file base must be provided. That can
   420  // be any integer value that is past the end of any interval of any
   421  // file already in the file set. For convenience, [FileSet.Base] provides
   422  // such a value, which is simply the end of the Pos interval of the most
   423  // recently added file, plus one. Unless there is a need to extend an
   424  // interval later, using the [FileSet.Base] should be used as argument
   425  // for [FileSet.AddFile].
   426  //
   427  // A [File] may be removed from a FileSet when it is no longer needed.
   428  // This may reduce memory usage in a long-running application.
   429  type FileSet struct {
   430  	mutex sync.RWMutex         // protects the file set
   431  	base  int                  // base offset for the next file
   432  	tree  tree                 // tree of files in ascending base order
   433  	last  atomic.Pointer[File] // cache of last file looked up
   434  }
   435  
   436  // NewFileSet creates a new file set.
   437  func NewFileSet() *FileSet {
   438  	return &FileSet{
   439  		base: 1, // 0 == NoPos
   440  	}
   441  }
   442  
   443  // Base returns the minimum base offset that must be provided to
   444  // [FileSet.AddFile] when adding the next file.
   445  func (s *FileSet) Base() int {
   446  	s.mutex.RLock()
   447  	b := s.base
   448  	s.mutex.RUnlock()
   449  	return b
   450  }
   451  
   452  // AddFile adds a new file with a given filename, base offset, and file size
   453  // to the file set s and returns the file. Multiple files may have the same
   454  // name. The base offset must not be smaller than the [FileSet.Base], and
   455  // size must not be negative. As a special case, if a negative base is provided,
   456  // the current value of the [FileSet.Base] is used instead.
   457  //
   458  // Adding the file will set the file set's [FileSet.Base] value to base + size + 1
   459  // as the minimum base value for the next file. The following relationship
   460  // exists between a [Pos] value p for a given file offset offs:
   461  //
   462  //	int(p) = base + offs
   463  //
   464  // with offs in the range [0, size] and thus p in the range [base, base+size].
   465  // For convenience, [File.Pos] may be used to create file-specific position
   466  // values from a file offset.
   467  func (s *FileSet) AddFile(filename string, base, size int) *File {
   468  	// Allocate f outside the critical section.
   469  	f := &File{name: filename, size: size, lines: []int{0}}
   470  
   471  	s.mutex.Lock()
   472  	defer s.mutex.Unlock()
   473  	if base < 0 {
   474  		base = s.base
   475  	}
   476  	if base < s.base {
   477  		panic(fmt.Sprintf("invalid base %d (should be >= %d)", base, s.base))
   478  	}
   479  	f.base = base
   480  	if size < 0 {
   481  		panic(fmt.Sprintf("invalid size %d (should be >= 0)", size))
   482  	}
   483  	// base >= s.base && size >= 0
   484  	base += size + 1 // +1 because EOF also has a position
   485  	if base < 0 {
   486  		panic("token.Pos offset overflow (> 2G of source code in file set)")
   487  	}
   488  	// add the file to the file set
   489  	s.base = base
   490  	s.tree.add(f)
   491  	s.last.Store(f)
   492  	return f
   493  }
   494  
   495  // AddExistingFiles adds the specified files to the
   496  // FileSet if they are not already present.
   497  // The caller must ensure that no pair of Files that
   498  // would appear in the resulting FileSet overlap.
   499  func (s *FileSet) AddExistingFiles(files ...*File) {
   500  	// This function cannot be implemented as:
   501  	//
   502  	//	for _, file := range files {
   503  	//		if prev := fset.File(token.Pos(file.Base())); prev != nil {
   504  	//			if prev != file {
   505  	//				panic("FileSet contains a different file at the same base")
   506  	//			}
   507  	//			continue
   508  	//		}
   509  	//		file2 := fset.AddFile(file.Name(), file.Base(), file.Size())
   510  	//		file2.SetLines(file.Lines())
   511  	//	}
   512  	//
   513  	// because all calls to AddFile must be in increasing order.
   514  	// AddExistingFilesFiles lets us augment an existing FileSet
   515  	// sequentially, so long as all sets of files have disjoint ranges.
   516  	// This approach also does not preserve line directives.
   517  
   518  	s.mutex.Lock()
   519  	defer s.mutex.Unlock()
   520  
   521  	for _, f := range files {
   522  		s.tree.add(f)
   523  		s.base = max(s.base, f.Base()+f.Size()+1)
   524  	}
   525  }
   526  
   527  // RemoveFile removes a file from the [FileSet] so that subsequent
   528  // queries for its [Pos] interval yield a negative result.
   529  // This reduces the memory usage of a long-lived [FileSet] that
   530  // encounters an unbounded stream of files.
   531  //
   532  // Removing a file that does not belong to the set has no effect.
   533  func (s *FileSet) RemoveFile(file *File) {
   534  	s.last.CompareAndSwap(file, nil) // clear last file cache
   535  
   536  	s.mutex.Lock()
   537  	defer s.mutex.Unlock()
   538  
   539  	pn, _ := s.tree.locate(file.key())
   540  	if *pn != nil && (*pn).file == file {
   541  		s.tree.delete(pn)
   542  	}
   543  }
   544  
   545  // Iterate calls yield for the files in the file set in ascending Base
   546  // order until yield returns false.
   547  func (s *FileSet) Iterate(yield func(*File) bool) {
   548  	s.mutex.RLock()
   549  	defer s.mutex.RUnlock()
   550  
   551  	// Unlock around user code.
   552  	// The iterator is robust to modification by yield.
   553  	// Avoid range here, so we can use defer.
   554  	s.tree.all()(func(f *File) bool {
   555  		s.mutex.RUnlock()
   556  		defer s.mutex.RLock()
   557  		return yield(f)
   558  	})
   559  }
   560  
   561  func (s *FileSet) file(p Pos) *File {
   562  	// common case: p is in last file.
   563  	if f := s.last.Load(); f != nil && f.base <= int(p) && int(p) <= f.base+f.size {
   564  		return f
   565  	}
   566  
   567  	s.mutex.RLock()
   568  	defer s.mutex.RUnlock()
   569  
   570  	pn, _ := s.tree.locate(key{int(p), int(p)})
   571  	if n := *pn; n != nil {
   572  		// Update cache of last file. A race is ok,
   573  		// but an exclusive lock causes heavy contention.
   574  		s.last.Store(n.file)
   575  		return n.file
   576  	}
   577  	return nil
   578  }
   579  
   580  // File returns the file that contains the position p.
   581  // If no such file is found (for instance for p == [NoPos]),
   582  // the result is nil.
   583  func (s *FileSet) File(p Pos) (f *File) {
   584  	if p != NoPos {
   585  		f = s.file(p)
   586  	}
   587  	return
   588  }
   589  
   590  // PositionFor converts a [Pos] p in the fileset into a [Position] value.
   591  // If adjusted is set, the position may be adjusted by position-altering
   592  // //line comments; otherwise those comments are ignored.
   593  // p must be a [Pos] value in s or [NoPos].
   594  func (s *FileSet) PositionFor(p Pos, adjusted bool) (pos Position) {
   595  	if p != NoPos {
   596  		if f := s.file(p); f != nil {
   597  			return f.position(p, adjusted)
   598  		}
   599  	}
   600  	return
   601  }
   602  
   603  // Position converts a [Pos] p in the fileset into a Position value.
   604  // Calling s.Position(p) is equivalent to calling s.PositionFor(p, true).
   605  func (s *FileSet) Position(p Pos) (pos Position) {
   606  	return s.PositionFor(p, true)
   607  }
   608  
   609  // -----------------------------------------------------------------------------
   610  // Helper functions
   611  
   612  func searchInts(a []int, x int) int {
   613  	// This function body is a manually inlined version of:
   614  	//
   615  	//   return sort.Search(len(a), func(i int) bool { return a[i] > x }) - 1
   616  	//
   617  	// With better compiler optimizations, this may not be needed in the
   618  	// future, but at the moment this change improves the go/printer
   619  	// benchmark performance by ~30%. This has a direct impact on the
   620  	// speed of gofmt and thus seems worthwhile (2011-04-29).
   621  	// TODO(gri): Remove this when compilers have caught up.
   622  	i, j := 0, len(a)
   623  	for i < j {
   624  		h := int(uint(i+j) >> 1) // avoid overflow when computing h
   625  		// i ≤ h < j
   626  		if a[h] <= x {
   627  			i = h + 1
   628  		} else {
   629  			j = h
   630  		}
   631  	}
   632  	return i - 1
   633  }
   634
View as plain text