Source file src/go/token/position.go

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package token
     6  
     7  import (
     8  	"cmp"
     9  	"fmt"
    10  	"slices"
    11  	"strconv"
    12  	"sync"
    13  	"sync/atomic"
    14  )
    15  
    16  // If debug is set, invalid offset and position values cause a panic
    17  // (go.dev/issue/57490).
    18  const debug = false
    19  
    20  // -----------------------------------------------------------------------------
    21  // Positions
    22  
    23  // Position describes an arbitrary source position
    24  // including the file, line, and column location.
    25  // A Position is valid if the line number is > 0.
    26  type Position struct {
    27  	Filename string // filename, if any
    28  	Offset   int    // offset, starting at 0
    29  	Line     int    // line number, starting at 1
    30  	Column   int    // column number, starting at 1 (byte count)
    31  }
    32  
    33  // IsValid reports whether the position is valid.
    34  func (pos *Position) IsValid() bool { return pos.Line > 0 }
    35  
    36  // String returns a string in one of several forms:
    37  //
    38  //	file:line:column    valid position with file name
    39  //	file:line           valid position with file name but no column (column == 0)
    40  //	line:column         valid position without file name
    41  //	line                valid position without file name and no column (column == 0)
    42  //	file                invalid position with file name
    43  //	-                   invalid position without file name
    44  func (pos Position) String() string {
    45  	s := pos.Filename
    46  	if pos.IsValid() {
    47  		if s != "" {
    48  			s += ":"
    49  		}
    50  		s += strconv.Itoa(pos.Line)
    51  		if pos.Column != 0 {
    52  			s += fmt.Sprintf(":%d", pos.Column)
    53  		}
    54  	}
    55  	if s == "" {
    56  		s = "-"
    57  	}
    58  	return s
    59  }
    60  
    61  // Pos is a compact encoding of a source position within a file set.
    62  // It can be converted into a [Position] for a more convenient, but much
    63  // larger, representation.
    64  //
    65  // The Pos value for a given file is a number in the range [base, base+size],
    66  // where base and size are specified when a file is added to the file set.
    67  // The difference between a Pos value and the corresponding file base
    68  // corresponds to the byte offset of that position (represented by the Pos value)
    69  // from the beginning of the file. Thus, the file base offset is the Pos value
    70  // representing the first byte in the file.
    71  //
    72  // To create the Pos value for a specific source offset (measured in bytes),
    73  // first add the respective file to the current file set using [FileSet.AddFile]
    74  // and then call [File.Pos](offset) for that file. Given a Pos value p
    75  // for a specific file set fset, the corresponding [Position] value is
    76  // obtained by calling fset.Position(p).
    77  //
    78  // Pos values can be compared directly with the usual comparison operators:
    79  // If two Pos values p and q are in the same file, comparing p and q is
    80  // equivalent to comparing the respective source file offsets. If p and q
    81  // are in different files, p < q is true if the file implied by p was added
    82  // to the respective file set before the file implied by q.
    83  type Pos int
    84  
    85  // The zero value for [Pos] is NoPos; there is no file and line information
    86  // associated with it, and NoPos.IsValid() is false. NoPos is always
    87  // smaller than any other [Pos] value. The corresponding [Position] value
    88  // for NoPos is the zero value for [Position].
    89  const NoPos Pos = 0
    90  
    91  // IsValid reports whether the position is valid.
    92  func (p Pos) IsValid() bool {
    93  	return p != NoPos
    94  }
    95  
    96  // -----------------------------------------------------------------------------
    97  // File
    98  
    99  // A File is a handle for a file belonging to a [FileSet].
   100  // A File has a name, size, and line offset table.
   101  //
   102  // Use [FileSet.AddFile] to create a File.
   103  // A File may belong to more than one FileSet; see [FileSet.AddExistingFiles].
   104  type File struct {
   105  	name string // file name as provided to AddFile
   106  	base int    // Pos value range for this file is [base...base+size]
   107  	size int    // file size as provided to AddFile
   108  
   109  	// lines and infos are protected by mutex
   110  	mutex sync.Mutex
   111  	lines []int // lines contains the offset of the first character for each line (the first entry is always 0)
   112  	infos []lineInfo
   113  }
   114  
   115  // String returns a brief description of the File.
   116  func (f *File) String() string {
   117  	return fmt.Sprintf("%s(%d-%d)", f.Name(), f.Base(), f.End())
   118  }
   119  
   120  // Name returns the file name of file f as registered with AddFile.
   121  func (f *File) Name() string {
   122  	return f.name
   123  }
   124  
   125  // Base returns the base offset of file f as registered with AddFile.
   126  func (f *File) Base() int {
   127  	return f.base
   128  }
   129  
   130  // Size returns the size of file f as registered with AddFile.
   131  func (f *File) Size() int {
   132  	return f.size
   133  }
   134  
   135  // End returns the end position of file f as registered with AddFile.
   136  func (f *File) End() Pos {
   137  	return Pos(f.base + f.size)
   138  }
   139  
   140  // LineCount returns the number of lines in file f.
   141  func (f *File) LineCount() int {
   142  	f.mutex.Lock()
   143  	n := len(f.lines)
   144  	f.mutex.Unlock()
   145  	return n
   146  }
   147  
   148  // AddLine adds the line offset for a new line.
   149  // The line offset must be larger than the offset for the previous line
   150  // and smaller than the file size; otherwise the line offset is ignored.
   151  func (f *File) AddLine(offset int) {
   152  	f.mutex.Lock()
   153  	if i := len(f.lines); (i == 0 || f.lines[i-1] < offset) && offset < f.size {
   154  		f.lines = append(f.lines, offset)
   155  	}
   156  	f.mutex.Unlock()
   157  }
   158  
   159  // MergeLine merges a line with the following line. It is akin to replacing
   160  // the newline character at the end of the line with a space (to not change the
   161  // remaining offsets). To obtain the line number, consult e.g. [Position.Line].
   162  // MergeLine will panic if given an invalid line number.
   163  func (f *File) MergeLine(line int) {
   164  	if line < 1 {
   165  		panic(fmt.Sprintf("invalid line number %d (should be >= 1)", line))
   166  	}
   167  	f.mutex.Lock()
   168  	defer f.mutex.Unlock()
   169  	if line >= len(f.lines) {
   170  		panic(fmt.Sprintf("invalid line number %d (should be < %d)", line, len(f.lines)))
   171  	}
   172  	// To merge the line numbered <line> with the line numbered <line+1>,
   173  	// we need to remove the entry in lines corresponding to the line
   174  	// numbered <line+1>. The entry in lines corresponding to the line
   175  	// numbered <line+1> is located at index <line>, since indices in lines
   176  	// are 0-based and line numbers are 1-based.
   177  	copy(f.lines[line:], f.lines[line+1:])
   178  	f.lines = f.lines[:len(f.lines)-1]
   179  }
   180  
   181  // Lines returns the effective line offset table of the form described by [File.SetLines].
   182  // Callers must not mutate the result.
   183  func (f *File) Lines() []int {
   184  	f.mutex.Lock()
   185  	lines := f.lines
   186  	f.mutex.Unlock()
   187  	return lines
   188  }
   189  
   190  // SetLines sets the line offsets for a file and reports whether it succeeded.
   191  // The line offsets are the offsets of the first character of each line;
   192  // for instance for the content "ab\nc\n" the line offsets are {0, 3}.
   193  // An empty file has an empty line offset table.
   194  // Each line offset must be larger than the offset for the previous line
   195  // and smaller than the file size; otherwise SetLines fails and returns
   196  // false.
   197  // Callers must not mutate the provided slice after SetLines returns.
   198  func (f *File) SetLines(lines []int) bool {
   199  	// verify validity of lines table
   200  	size := f.size
   201  	for i, offset := range lines {
   202  		if i > 0 && offset <= lines[i-1] || size <= offset {
   203  			return false
   204  		}
   205  	}
   206  
   207  	// set lines table
   208  	f.mutex.Lock()
   209  	f.lines = lines
   210  	f.mutex.Unlock()
   211  	return true
   212  }
   213  
   214  // SetLinesForContent sets the line offsets for the given file content.
   215  // It ignores position-altering //line comments.
   216  func (f *File) SetLinesForContent(content []byte) {
   217  	var lines []int
   218  	line := 0
   219  	for offset, b := range content {
   220  		if line >= 0 {
   221  			lines = append(lines, line)
   222  		}
   223  		line = -1
   224  		if b == '\n' {
   225  			line = offset + 1
   226  		}
   227  	}
   228  
   229  	// set lines table
   230  	f.mutex.Lock()
   231  	f.lines = lines
   232  	f.mutex.Unlock()
   233  }
   234  
   235  // LineStart returns the [Pos] value of the start of the specified line.
   236  // It ignores any alternative positions set using [File.AddLineColumnInfo].
   237  // LineStart panics if the 1-based line number is invalid.
   238  func (f *File) LineStart(line int) Pos {
   239  	if line < 1 {
   240  		panic(fmt.Sprintf("invalid line number %d (should be >= 1)", line))
   241  	}
   242  	f.mutex.Lock()
   243  	defer f.mutex.Unlock()
   244  	if line > len(f.lines) {
   245  		panic(fmt.Sprintf("invalid line number %d (should be < %d)", line, len(f.lines)))
   246  	}
   247  	return Pos(f.base + f.lines[line-1])
   248  }
   249  
   250  // A lineInfo object describes alternative file, line, and column
   251  // number information (such as provided via a //line directive)
   252  // for a given file offset.
   253  type lineInfo struct {
   254  	// fields are exported to make them accessible to gob
   255  	Offset       int
   256  	Filename     string
   257  	Line, Column int
   258  }
   259  
   260  // AddLineInfo is like [File.AddLineColumnInfo] with a column = 1 argument.
   261  // It is here for backward-compatibility for code prior to Go 1.11.
   262  func (f *File) AddLineInfo(offset int, filename string, line int) {
   263  	f.AddLineColumnInfo(offset, filename, line, 1)
   264  }
   265  
   266  // AddLineColumnInfo adds alternative file, line, and column number
   267  // information for a given file offset. The offset must be larger
   268  // than the offset for the previously added alternative line info
   269  // and smaller than the file size; otherwise the information is
   270  // ignored.
   271  //
   272  // AddLineColumnInfo is typically used to register alternative position
   273  // information for line directives such as //line filename:line:column.
   274  func (f *File) AddLineColumnInfo(offset int, filename string, line, column int) {
   275  	f.mutex.Lock()
   276  	if i := len(f.infos); (i == 0 || f.infos[i-1].Offset < offset) && offset < f.size {
   277  		f.infos = append(f.infos, lineInfo{offset, filename, line, column})
   278  	}
   279  	f.mutex.Unlock()
   280  }
   281  
   282  // fixOffset fixes an out-of-bounds offset such that 0 <= offset <= f.size.
   283  func (f *File) fixOffset(offset int) int {
   284  	if debug && !(0 <= offset && offset <= f.size) {
   285  		panic(fmt.Sprintf("offset %d out of bounds [%d, %d] (position %d out of bounds [%d, %d])",
   286  			0 /* for symmetry */, offset, f.size,
   287  			f.base+offset, f.base, f.base+f.size))
   288  	}
   289  	return max(min(f.size, offset), 0)
   290  }
   291  
   292  // Pos returns the Pos value for the given file offset.
   293  //
   294  // If offset is negative, the result is the file's start
   295  // position; if the offset is too large, the result is
   296  // the file's end position (see also go.dev/issue/57490).
   297  //
   298  // The following invariant, though not true for Pos values
   299  // in general, holds for the result p:
   300  // f.Pos(f.Offset(p)) == p.
   301  func (f *File) Pos(offset int) Pos {
   302  	return Pos(f.base + f.fixOffset(offset))
   303  }
   304  
   305  // Offset returns the offset for the given file position p.
   306  //
   307  // If p is before the file's start position (or if p is NoPos),
   308  // the result is 0; if p is past the file's end position,
   309  // the result is the file size (see also go.dev/issue/57490).
   310  //
   311  // The following invariant, though not true for offset values
   312  // in general, holds for the result offset:
   313  // f.Offset(f.Pos(offset)) == offset
   314  func (f *File) Offset(p Pos) int {
   315  	return f.fixOffset(int(p) - f.base)
   316  }
   317  
   318  // Line returns the line number for the given file position p;
   319  // p must be a [Pos] value in that file or [NoPos].
   320  func (f *File) Line(p Pos) int {
   321  	return f.Position(p).Line
   322  }
   323  
   324  func searchLineInfos(a []lineInfo, x int) int {
   325  	i, found := slices.BinarySearchFunc(a, x, func(a lineInfo, x int) int {
   326  		return cmp.Compare(a.Offset, x)
   327  	})
   328  	if !found {
   329  		// We want the lineInfo containing x, but if we didn't
   330  		// find x then i is the next one.
   331  		i--
   332  	}
   333  	return i
   334  }
   335  
   336  // unpack returns the filename and line and column number for a file offset.
   337  // If adjusted is set, unpack will return the filename and line information
   338  // possibly adjusted by //line comments; otherwise those comments are ignored.
   339  func (f *File) unpack(offset int, adjusted bool) (filename string, line, column int) {
   340  	f.mutex.Lock()
   341  	filename = f.name
   342  	if i := searchInts(f.lines, offset); i >= 0 {
   343  		line, column = i+1, offset-f.lines[i]+1
   344  	}
   345  	if adjusted && len(f.infos) > 0 {
   346  		// few files have extra line infos
   347  		if i := searchLineInfos(f.infos, offset); i >= 0 {
   348  			alt := &f.infos[i]
   349  			filename = alt.Filename
   350  			if i := searchInts(f.lines, alt.Offset); i >= 0 {
   351  				// i+1 is the line at which the alternative position was recorded
   352  				d := line - (i + 1) // line distance from alternative position base
   353  				line = alt.Line + d
   354  				if alt.Column == 0 {
   355  					// alternative column is unknown => relative column is unknown
   356  					// (the current specification for line directives requires
   357  					// this to apply until the next PosBase/line directive,
   358  					// not just until the new newline)
   359  					column = 0
   360  				} else if d == 0 {
   361  					// the alternative position base is on the current line
   362  					// => column is relative to alternative column
   363  					column = alt.Column + (offset - alt.Offset)
   364  				}
   365  			}
   366  		}
   367  	}
   368  	// TODO(mvdan): move Unlock back under Lock with a defer statement once
   369  	// https://go.dev/issue/38471 is fixed to remove the performance penalty.
   370  	f.mutex.Unlock()
   371  	return
   372  }
   373  
   374  func (f *File) position(p Pos, adjusted bool) (pos Position) {
   375  	offset := f.fixOffset(int(p) - f.base)
   376  	pos.Offset = offset
   377  	pos.Filename, pos.Line, pos.Column = f.unpack(offset, adjusted)
   378  	return
   379  }
   380  
   381  // PositionFor returns the Position value for the given file position p.
   382  // If p is out of bounds, it is adjusted to match the File.Offset behavior.
   383  // If adjusted is set, the position may be adjusted by position-altering
   384  // //line comments; otherwise those comments are ignored.
   385  // p must be a Pos value in f or NoPos.
   386  func (f *File) PositionFor(p Pos, adjusted bool) (pos Position) {
   387  	if p != NoPos {
   388  		pos = f.position(p, adjusted)
   389  	}
   390  	return
   391  }
   392  
   393  // Position returns the Position value for the given file position p.
   394  // If p is out of bounds, it is adjusted to match the File.Offset behavior.
   395  // Calling f.Position(p) is equivalent to calling f.PositionFor(p, true).
   396  func (f *File) Position(p Pos) (pos Position) {
   397  	return f.PositionFor(p, true)
   398  }
   399  
   400  // -----------------------------------------------------------------------------
   401  // FileSet
   402  
   403  // A FileSet represents a set of source files.
   404  // Methods of file sets are synchronized; multiple goroutines
   405  // may invoke them concurrently.
   406  //
   407  // The byte offsets for each file in a file set are mapped into
   408  // distinct (integer) intervals, one interval [base, base+size]
   409  // per file. [FileSet.Base] represents the first byte in the file, and size
   410  // is the corresponding file size. A [Pos] value is a value in such
   411  // an interval. By determining the interval a [Pos] value belongs
   412  // to, the file, its file base, and thus the byte offset (position)
   413  // the [Pos] value is representing can be computed.
   414  //
   415  // When adding a new file, a file base must be provided. That can
   416  // be any integer value that is past the end of any interval of any
   417  // file already in the file set. For convenience, [FileSet.Base] provides
   418  // such a value, which is simply the end of the Pos interval of the most
   419  // recently added file, plus one. Unless there is a need to extend an
   420  // interval later, using the [FileSet.Base] should be used as argument
   421  // for [FileSet.AddFile].
   422  //
   423  // A [File] may be removed from a FileSet when it is no longer needed.
   424  // This may reduce memory usage in a long-running application.
   425  type FileSet struct {
   426  	mutex sync.RWMutex         // protects the file set
   427  	base  int                  // base offset for the next file
   428  	tree  tree                 // tree of files in ascending base order
   429  	last  atomic.Pointer[File] // cache of last file looked up
   430  }
   431  
   432  // NewFileSet creates a new file set.
   433  func NewFileSet() *FileSet {
   434  	return &FileSet{
   435  		base: 1, // 0 == NoPos
   436  	}
   437  }
   438  
   439  // Base returns the minimum base offset that must be provided to
   440  // [FileSet.AddFile] when adding the next file.
   441  func (s *FileSet) Base() int {
   442  	s.mutex.RLock()
   443  	b := s.base
   444  	s.mutex.RUnlock()
   445  	return b
   446  }
   447  
   448  // AddFile adds a new file with a given filename, base offset, and file size
   449  // to the file set s and returns the file. Multiple files may have the same
   450  // name. The base offset must not be smaller than the [FileSet.Base], and
   451  // size must not be negative. As a special case, if a negative base is provided,
   452  // the current value of the [FileSet.Base] is used instead.
   453  //
   454  // Adding the file will set the file set's [FileSet.Base] value to base + size + 1
   455  // as the minimum base value for the next file. The following relationship
   456  // exists between a [Pos] value p for a given file offset offs:
   457  //
   458  //	int(p) = base + offs
   459  //
   460  // with offs in the range [0, size] and thus p in the range [base, base+size].
   461  // For convenience, [File.Pos] may be used to create file-specific position
   462  // values from a file offset.
   463  func (s *FileSet) AddFile(filename string, base, size int) *File {
   464  	// Allocate f outside the critical section.
   465  	f := &File{name: filename, size: size, lines: []int{0}}
   466  
   467  	s.mutex.Lock()
   468  	defer s.mutex.Unlock()
   469  	if base < 0 {
   470  		base = s.base
   471  	}
   472  	if base < s.base {
   473  		panic(fmt.Sprintf("invalid base %d (should be >= %d)", base, s.base))
   474  	}
   475  	f.base = base
   476  	if size < 0 {
   477  		panic(fmt.Sprintf("invalid size %d (should be >= 0)", size))
   478  	}
   479  	// base >= s.base && size >= 0
   480  	base += size + 1 // +1 because EOF also has a position
   481  	if base < 0 {
   482  		panic("token.Pos offset overflow (> 2G of source code in file set)")
   483  	}
   484  	// add the file to the file set
   485  	s.base = base
   486  	s.tree.add(f)
   487  	s.last.Store(f)
   488  	return f
   489  }
   490  
   491  // AddExistingFiles adds the specified files to the
   492  // FileSet if they are not already present.
   493  // The caller must ensure that no pair of Files that
   494  // would appear in the resulting FileSet overlap.
   495  func (s *FileSet) AddExistingFiles(files ...*File) {
   496  	// This function cannot be implemented as:
   497  	//
   498  	//	for _, file := range files {
   499  	//		if prev := fset.File(token.Pos(file.Base())); prev != nil {
   500  	//			if prev != file {
   501  	//				panic("FileSet contains a different file at the same base")
   502  	//			}
   503  	//			continue
   504  	//		}
   505  	//		file2 := fset.AddFile(file.Name(), file.Base(), file.Size())
   506  	//		file2.SetLines(file.Lines())
   507  	//	}
   508  	//
   509  	// because all calls to AddFile must be in increasing order.
   510  	// AddExistingFiles lets us augment an existing FileSet
   511  	// sequentially, so long as all sets of files have disjoint ranges.
   512  	// This approach also does not preserve line directives.
   513  
   514  	s.mutex.Lock()
   515  	defer s.mutex.Unlock()
   516  
   517  	for _, f := range files {
   518  		s.tree.add(f)
   519  		s.base = max(s.base, f.Base()+f.Size()+1)
   520  	}
   521  }
   522  
   523  // RemoveFile removes a file from the [FileSet] so that subsequent
   524  // queries for its [Pos] interval yield a negative result.
   525  // This reduces the memory usage of a long-lived [FileSet] that
   526  // encounters an unbounded stream of files.
   527  //
   528  // Removing a file that does not belong to the set has no effect.
   529  func (s *FileSet) RemoveFile(file *File) {
   530  	s.mutex.Lock()
   531  	defer s.mutex.Unlock()
   532  
   533  	s.last.CompareAndSwap(file, nil) // clear last file cache
   534  
   535  	pn, _ := s.tree.locate(file.key())
   536  	if *pn != nil && (*pn).file == file {
   537  		s.tree.delete(pn)
   538  	}
   539  }
   540  
   541  // Iterate calls yield for the files in the file set in ascending Base
   542  // order until yield returns false.
   543  func (s *FileSet) Iterate(yield func(*File) bool) {
   544  	s.mutex.RLock()
   545  	defer s.mutex.RUnlock()
   546  
   547  	// Unlock around user code.
   548  	// The iterator is robust to modification by yield.
   549  	// Avoid range here, so we can use defer.
   550  	s.tree.all()(func(f *File) bool {
   551  		s.mutex.RUnlock()
   552  		defer s.mutex.RLock()
   553  		return yield(f)
   554  	})
   555  }
   556  
   557  func (s *FileSet) file(p Pos) *File {
   558  	// common case: p is in last file.
   559  	if f := s.last.Load(); f != nil && f.base <= int(p) && int(p) <= f.base+f.size {
   560  		return f
   561  	}
   562  
   563  	s.mutex.RLock()
   564  	defer s.mutex.RUnlock()
   565  
   566  	pn, _ := s.tree.locate(key{int(p), int(p)})
   567  	if n := *pn; n != nil {
   568  		// Update cache of last file. A race is ok,
   569  		// but an exclusive lock causes heavy contention.
   570  		s.last.Store(n.file)
   571  		return n.file
   572  	}
   573  	return nil
   574  }
   575  
   576  // File returns the file that contains the position p.
   577  // If no such file is found (for instance for p == [NoPos]),
   578  // the result is nil.
   579  func (s *FileSet) File(p Pos) (f *File) {
   580  	if p != NoPos {
   581  		f = s.file(p)
   582  	}
   583  	return
   584  }
   585  
   586  // PositionFor converts a [Pos] p in the fileset into a [Position] value.
   587  // If adjusted is set, the position may be adjusted by position-altering
   588  // //line comments; otherwise those comments are ignored.
   589  // p must be a [Pos] value in s or [NoPos].
   590  func (s *FileSet) PositionFor(p Pos, adjusted bool) (pos Position) {
   591  	if p != NoPos {
   592  		if f := s.file(p); f != nil {
   593  			return f.position(p, adjusted)
   594  		}
   595  	}
   596  	return
   597  }
   598  
   599  // Position converts a [Pos] p in the fileset into a Position value.
   600  // Calling s.Position(p) is equivalent to calling s.PositionFor(p, true).
   601  func (s *FileSet) Position(p Pos) (pos Position) {
   602  	return s.PositionFor(p, true)
   603  }
   604  
   605  // -----------------------------------------------------------------------------
   606  // Helper functions
   607  
   608  func searchInts(a []int, x int) int {
   609  	// This function body is a manually inlined version of:
   610  	//
   611  	//   return sort.Search(len(a), func(i int) bool { return a[i] > x }) - 1
   612  	//
   613  	// With better compiler optimizations, this may not be needed in the
   614  	// future, but at the moment this change improves the go/printer
   615  	// benchmark performance by ~30%. This has a direct impact on the
   616  	// speed of gofmt and thus seems worthwhile (2011-04-29).
   617  	// TODO(gri): Remove this when compilers have caught up.
   618  	i, j := 0, len(a)
   619  	for i < j {
   620  		h := int(uint(i+j) >> 1) // avoid overflow when computing h
   621  		// i ≤ h < j
   622  		if a[h] <= x {
   623  			i = h + 1
   624  		} else {
   625  			j = h
   626  		}
   627  	}
   628  	return i - 1
   629  }
   630  

View as plain text