Source file src/go/token/position.go

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package token
     6  
     7  import (
     8  	"cmp"
     9  	"fmt"
    10  	"slices"
    11  	"strconv"
    12  	"sync"
    13  	"sync/atomic"
    14  )
    15  
    16  // If debug is set, invalid offset and position values cause a panic
    17  // (go.dev/issue/57490).
    18  const debug = false
    19  
    20  // -----------------------------------------------------------------------------
    21  // Positions
    22  
    23  // Position describes an arbitrary source position
    24  // including the file, line, and column location.
    25  // A Position is valid if the line number is > 0.
    26  type Position struct {
    27  	Filename string // filename, if any
    28  	Offset   int    // offset, starting at 0
    29  	Line     int    // line number, starting at 1
    30  	Column   int    // column number, starting at 1 (byte count)
    31  }
    32  
    33  // IsValid reports whether the position is valid.
    34  func (pos *Position) IsValid() bool { return pos.Line > 0 }
    35  
    36  // String returns a string in one of several forms:
    37  //
    38  //	file:line:column    valid position with file name
    39  //	file:line           valid position with file name but no column (column == 0)
    40  //	line:column         valid position without file name
    41  //	line                valid position without file name and no column (column == 0)
    42  //	file                invalid position with file name
    43  //	-                   invalid position without file name
    44  func (pos Position) String() string {
    45  	s := pos.Filename
    46  	if pos.IsValid() {
    47  		if s != "" {
    48  			s += ":"
    49  		}
    50  		s += strconv.Itoa(pos.Line)
    51  		if pos.Column != 0 {
    52  			s += fmt.Sprintf(":%d", pos.Column)
    53  		}
    54  	}
    55  	if s == "" {
    56  		s = "-"
    57  	}
    58  	return s
    59  }
    60  
    61  // Pos is a compact encoding of a source position within a file set.
    62  // It can be converted into a [Position] for a more convenient, but much
    63  // larger, representation.
    64  //
    65  // The Pos value for a given file is a number in the range [base, base+size],
    66  // where base and size are specified when a file is added to the file set.
    67  // The difference between a Pos value and the corresponding file base
    68  // corresponds to the byte offset of that position (represented by the Pos value)
    69  // from the beginning of the file. Thus, the file base offset is the Pos value
    70  // representing the first byte in the file.
    71  //
    72  // To create the Pos value for a specific source offset (measured in bytes),
    73  // first add the respective file to the current file set using [FileSet.AddFile]
    74  // and then call [File.Pos](offset) for that file. Given a Pos value p
    75  // for a specific file set fset, the corresponding [Position] value is
    76  // obtained by calling fset.Position(p).
    77  //
    78  // Pos values can be compared directly with the usual comparison operators:
    79  // If two Pos values p and q are in the same file, comparing p and q is
    80  // equivalent to comparing the respective source file offsets. If p and q
    81  // are in different files, p < q is true if the file implied by p was added
    82  // to the respective file set before the file implied by q.
    83  type Pos int
    84  
    85  // The zero value for [Pos] is NoPos; there is no file and line information
    86  // associated with it, and NoPos.IsValid() is false. NoPos is always
    87  // smaller than any other [Pos] value. The corresponding [Position] value
    88  // for NoPos is the zero value for [Position].
    89  const NoPos Pos = 0
    90  
    91  // IsValid reports whether the position is valid.
    92  func (p Pos) IsValid() bool {
    93  	return p != NoPos
    94  }
    95  
    96  // -----------------------------------------------------------------------------
    97  // File
    98  
    99  // A File is a handle for a file belonging to a [FileSet].
   100  // A File has a name, size, and line offset table.
   101  //
   102  // Use [FileSet.AddFile] to create a File.
   103  // A File may belong to more than one FileSet; see [FileSet.AddExistingFiles].
   104  type File struct {
   105  	name string // file name as provided to AddFile
   106  	base int    // Pos value range for this file is [base...base+size]
   107  	size int    // file size as provided to AddFile
   108  
   109  	// lines and infos are protected by mutex
   110  	mutex sync.Mutex
   111  	lines []int // lines contains the offset of the first character for each line (the first entry is always 0)
   112  	infos []lineInfo
   113  }
   114  
   115  // Name returns the file name of file f as registered with AddFile.
   116  func (f *File) Name() string {
   117  	return f.name
   118  }
   119  
   120  // Base returns the base offset of file f as registered with AddFile.
   121  func (f *File) Base() int {
   122  	return f.base
   123  }
   124  
   125  // Size returns the size of file f as registered with AddFile.
   126  func (f *File) Size() int {
   127  	return f.size
   128  }
   129  
   130  // End returns the end position of file f as registered with AddFile.
   131  func (f *File) End() Pos {
   132  	return Pos(f.base + f.size)
   133  }
   134  
   135  // LineCount returns the number of lines in file f.
   136  func (f *File) LineCount() int {
   137  	f.mutex.Lock()
   138  	n := len(f.lines)
   139  	f.mutex.Unlock()
   140  	return n
   141  }
   142  
   143  // AddLine adds the line offset for a new line.
   144  // The line offset must be larger than the offset for the previous line
   145  // and smaller than the file size; otherwise the line offset is ignored.
   146  func (f *File) AddLine(offset int) {
   147  	f.mutex.Lock()
   148  	if i := len(f.lines); (i == 0 || f.lines[i-1] < offset) && offset < f.size {
   149  		f.lines = append(f.lines, offset)
   150  	}
   151  	f.mutex.Unlock()
   152  }
   153  
   154  // MergeLine merges a line with the following line. It is akin to replacing
   155  // the newline character at the end of the line with a space (to not change the
   156  // remaining offsets). To obtain the line number, consult e.g. [Position.Line].
   157  // MergeLine will panic if given an invalid line number.
   158  func (f *File) MergeLine(line int) {
   159  	if line < 1 {
   160  		panic(fmt.Sprintf("invalid line number %d (should be >= 1)", line))
   161  	}
   162  	f.mutex.Lock()
   163  	defer f.mutex.Unlock()
   164  	if line >= len(f.lines) {
   165  		panic(fmt.Sprintf("invalid line number %d (should be < %d)", line, len(f.lines)))
   166  	}
   167  	// To merge the line numbered <line> with the line numbered <line+1>,
   168  	// we need to remove the entry in lines corresponding to the line
   169  	// numbered <line+1>. The entry in lines corresponding to the line
   170  	// numbered <line+1> is located at index <line>, since indices in lines
   171  	// are 0-based and line numbers are 1-based.
   172  	copy(f.lines[line:], f.lines[line+1:])
   173  	f.lines = f.lines[:len(f.lines)-1]
   174  }
   175  
   176  // Lines returns the effective line offset table of the form described by [File.SetLines].
   177  // Callers must not mutate the result.
   178  func (f *File) Lines() []int {
   179  	f.mutex.Lock()
   180  	lines := f.lines
   181  	f.mutex.Unlock()
   182  	return lines
   183  }
   184  
   185  // SetLines sets the line offsets for a file and reports whether it succeeded.
   186  // The line offsets are the offsets of the first character of each line;
   187  // for instance for the content "ab\nc\n" the line offsets are {0, 3}.
   188  // An empty file has an empty line offset table.
   189  // Each line offset must be larger than the offset for the previous line
   190  // and smaller than the file size; otherwise SetLines fails and returns
   191  // false.
   192  // Callers must not mutate the provided slice after SetLines returns.
   193  func (f *File) SetLines(lines []int) bool {
   194  	// verify validity of lines table
   195  	size := f.size
   196  	for i, offset := range lines {
   197  		if i > 0 && offset <= lines[i-1] || size <= offset {
   198  			return false
   199  		}
   200  	}
   201  
   202  	// set lines table
   203  	f.mutex.Lock()
   204  	f.lines = lines
   205  	f.mutex.Unlock()
   206  	return true
   207  }
   208  
   209  // SetLinesForContent sets the line offsets for the given file content.
   210  // It ignores position-altering //line comments.
   211  func (f *File) SetLinesForContent(content []byte) {
   212  	var lines []int
   213  	line := 0
   214  	for offset, b := range content {
   215  		if line >= 0 {
   216  			lines = append(lines, line)
   217  		}
   218  		line = -1
   219  		if b == '\n' {
   220  			line = offset + 1
   221  		}
   222  	}
   223  
   224  	// set lines table
   225  	f.mutex.Lock()
   226  	f.lines = lines
   227  	f.mutex.Unlock()
   228  }
   229  
   230  // LineStart returns the [Pos] value of the start of the specified line.
   231  // It ignores any alternative positions set using [File.AddLineColumnInfo].
   232  // LineStart panics if the 1-based line number is invalid.
   233  func (f *File) LineStart(line int) Pos {
   234  	if line < 1 {
   235  		panic(fmt.Sprintf("invalid line number %d (should be >= 1)", line))
   236  	}
   237  	f.mutex.Lock()
   238  	defer f.mutex.Unlock()
   239  	if line > len(f.lines) {
   240  		panic(fmt.Sprintf("invalid line number %d (should be < %d)", line, len(f.lines)))
   241  	}
   242  	return Pos(f.base + f.lines[line-1])
   243  }
   244  
   245  // A lineInfo object describes alternative file, line, and column
   246  // number information (such as provided via a //line directive)
   247  // for a given file offset.
   248  type lineInfo struct {
   249  	// fields are exported to make them accessible to gob
   250  	Offset       int
   251  	Filename     string
   252  	Line, Column int
   253  }
   254  
   255  // AddLineInfo is like [File.AddLineColumnInfo] with a column = 1 argument.
   256  // It is here for backward-compatibility for code prior to Go 1.11.
   257  func (f *File) AddLineInfo(offset int, filename string, line int) {
   258  	f.AddLineColumnInfo(offset, filename, line, 1)
   259  }
   260  
   261  // AddLineColumnInfo adds alternative file, line, and column number
   262  // information for a given file offset. The offset must be larger
   263  // than the offset for the previously added alternative line info
   264  // and smaller than the file size; otherwise the information is
   265  // ignored.
   266  //
   267  // AddLineColumnInfo is typically used to register alternative position
   268  // information for line directives such as //line filename:line:column.
   269  func (f *File) AddLineColumnInfo(offset int, filename string, line, column int) {
   270  	f.mutex.Lock()
   271  	if i := len(f.infos); (i == 0 || f.infos[i-1].Offset < offset) && offset < f.size {
   272  		f.infos = append(f.infos, lineInfo{offset, filename, line, column})
   273  	}
   274  	f.mutex.Unlock()
   275  }
   276  
   277  // fixOffset fixes an out-of-bounds offset such that 0 <= offset <= f.size.
   278  func (f *File) fixOffset(offset int) int {
   279  	switch {
   280  	case offset < 0:
   281  		if !debug {
   282  			return 0
   283  		}
   284  	case offset > f.size:
   285  		if !debug {
   286  			return f.size
   287  		}
   288  	default:
   289  		return offset
   290  	}
   291  
   292  	// only generate this code if needed
   293  	if debug {
   294  		panic(fmt.Sprintf("offset %d out of bounds [%d, %d] (position %d out of bounds [%d, %d])",
   295  			0 /* for symmetry */, offset, f.size,
   296  			f.base+offset, f.base, f.base+f.size))
   297  	}
   298  	return 0
   299  }
   300  
   301  // Pos returns the Pos value for the given file offset.
   302  //
   303  // If offset is negative, the result is the file's start
   304  // position; if the offset is too large, the result is
   305  // the file's end position (see also go.dev/issue/57490).
   306  //
   307  // The following invariant, though not true for Pos values
   308  // in general, holds for the result p:
   309  // f.Pos(f.Offset(p)) == p.
   310  func (f *File) Pos(offset int) Pos {
   311  	return Pos(f.base + f.fixOffset(offset))
   312  }
   313  
   314  // Offset returns the offset for the given file position p.
   315  //
   316  // If p is before the file's start position (or if p is NoPos),
   317  // the result is 0; if p is past the file's end position,
   318  // the result is the file size (see also go.dev/issue/57490).
   319  //
   320  // The following invariant, though not true for offset values
   321  // in general, holds for the result offset:
   322  // f.Offset(f.Pos(offset)) == offset
   323  func (f *File) Offset(p Pos) int {
   324  	return f.fixOffset(int(p) - f.base)
   325  }
   326  
   327  // Line returns the line number for the given file position p;
   328  // p must be a [Pos] value in that file or [NoPos].
   329  func (f *File) Line(p Pos) int {
   330  	return f.Position(p).Line
   331  }
   332  
   333  func searchLineInfos(a []lineInfo, x int) int {
   334  	i, found := slices.BinarySearchFunc(a, x, func(a lineInfo, x int) int {
   335  		return cmp.Compare(a.Offset, x)
   336  	})
   337  	if !found {
   338  		// We want the lineInfo containing x, but if we didn't
   339  		// find x then i is the next one.
   340  		i--
   341  	}
   342  	return i
   343  }
   344  
   345  // unpack returns the filename and line and column number for a file offset.
   346  // If adjusted is set, unpack will return the filename and line information
   347  // possibly adjusted by //line comments; otherwise those comments are ignored.
   348  func (f *File) unpack(offset int, adjusted bool) (filename string, line, column int) {
   349  	f.mutex.Lock()
   350  	filename = f.name
   351  	if i := searchInts(f.lines, offset); i >= 0 {
   352  		line, column = i+1, offset-f.lines[i]+1
   353  	}
   354  	if adjusted && len(f.infos) > 0 {
   355  		// few files have extra line infos
   356  		if i := searchLineInfos(f.infos, offset); i >= 0 {
   357  			alt := &f.infos[i]
   358  			filename = alt.Filename
   359  			if i := searchInts(f.lines, alt.Offset); i >= 0 {
   360  				// i+1 is the line at which the alternative position was recorded
   361  				d := line - (i + 1) // line distance from alternative position base
   362  				line = alt.Line + d
   363  				if alt.Column == 0 {
   364  					// alternative column is unknown => relative column is unknown
   365  					// (the current specification for line directives requires
   366  					// this to apply until the next PosBase/line directive,
   367  					// not just until the new newline)
   368  					column = 0
   369  				} else if d == 0 {
   370  					// the alternative position base is on the current line
   371  					// => column is relative to alternative column
   372  					column = alt.Column + (offset - alt.Offset)
   373  				}
   374  			}
   375  		}
   376  	}
   377  	// TODO(mvdan): move Unlock back under Lock with a defer statement once
   378  	// https://go.dev/issue/38471 is fixed to remove the performance penalty.
   379  	f.mutex.Unlock()
   380  	return
   381  }
   382  
   383  func (f *File) position(p Pos, adjusted bool) (pos Position) {
   384  	offset := f.fixOffset(int(p) - f.base)
   385  	pos.Offset = offset
   386  	pos.Filename, pos.Line, pos.Column = f.unpack(offset, adjusted)
   387  	return
   388  }
   389  
   390  // PositionFor returns the Position value for the given file position p.
   391  // If p is out of bounds, it is adjusted to match the File.Offset behavior.
   392  // If adjusted is set, the position may be adjusted by position-altering
   393  // //line comments; otherwise those comments are ignored.
   394  // p must be a Pos value in f or NoPos.
   395  func (f *File) PositionFor(p Pos, adjusted bool) (pos Position) {
   396  	if p != NoPos {
   397  		pos = f.position(p, adjusted)
   398  	}
   399  	return
   400  }
   401  
   402  // Position returns the Position value for the given file position p.
   403  // If p is out of bounds, it is adjusted to match the File.Offset behavior.
   404  // Calling f.Position(p) is equivalent to calling f.PositionFor(p, true).
   405  func (f *File) Position(p Pos) (pos Position) {
   406  	return f.PositionFor(p, true)
   407  }
   408  
   409  // -----------------------------------------------------------------------------
   410  // FileSet
   411  
   412  // A FileSet represents a set of source files.
   413  // Methods of file sets are synchronized; multiple goroutines
   414  // may invoke them concurrently.
   415  //
   416  // The byte offsets for each file in a file set are mapped into
   417  // distinct (integer) intervals, one interval [base, base+size]
   418  // per file. [FileSet.Base] represents the first byte in the file, and size
   419  // is the corresponding file size. A [Pos] value is a value in such
   420  // an interval. By determining the interval a [Pos] value belongs
   421  // to, the file, its file base, and thus the byte offset (position)
   422  // the [Pos] value is representing can be computed.
   423  //
   424  // When adding a new file, a file base must be provided. That can
   425  // be any integer value that is past the end of any interval of any
   426  // file already in the file set. For convenience, [FileSet.Base] provides
   427  // such a value, which is simply the end of the Pos interval of the most
   428  // recently added file, plus one. Unless there is a need to extend an
   429  // interval later, using the [FileSet.Base] should be used as argument
   430  // for [FileSet.AddFile].
   431  //
   432  // A [File] may be removed from a FileSet when it is no longer needed.
   433  // This may reduce memory usage in a long-running application.
   434  type FileSet struct {
   435  	mutex sync.RWMutex         // protects the file set
   436  	base  int                  // base offset for the next file
   437  	tree  tree                 // tree of files in ascending base order
   438  	last  atomic.Pointer[File] // cache of last file looked up
   439  }
   440  
   441  // NewFileSet creates a new file set.
   442  func NewFileSet() *FileSet {
   443  	return &FileSet{
   444  		base: 1, // 0 == NoPos
   445  	}
   446  }
   447  
   448  // Base returns the minimum base offset that must be provided to
   449  // [FileSet.AddFile] when adding the next file.
   450  func (s *FileSet) Base() int {
   451  	s.mutex.RLock()
   452  	b := s.base
   453  	s.mutex.RUnlock()
   454  	return b
   455  }
   456  
   457  // AddFile adds a new file with a given filename, base offset, and file size
   458  // to the file set s and returns the file. Multiple files may have the same
   459  // name. The base offset must not be smaller than the [FileSet.Base], and
   460  // size must not be negative. As a special case, if a negative base is provided,
   461  // the current value of the [FileSet.Base] is used instead.
   462  //
   463  // Adding the file will set the file set's [FileSet.Base] value to base + size + 1
   464  // as the minimum base value for the next file. The following relationship
   465  // exists between a [Pos] value p for a given file offset offs:
   466  //
   467  //	int(p) = base + offs
   468  //
   469  // with offs in the range [0, size] and thus p in the range [base, base+size].
   470  // For convenience, [File.Pos] may be used to create file-specific position
   471  // values from a file offset.
   472  func (s *FileSet) AddFile(filename string, base, size int) *File {
   473  	// Allocate f outside the critical section.
   474  	f := &File{name: filename, size: size, lines: []int{0}}
   475  
   476  	s.mutex.Lock()
   477  	defer s.mutex.Unlock()
   478  	if base < 0 {
   479  		base = s.base
   480  	}
   481  	if base < s.base {
   482  		panic(fmt.Sprintf("invalid base %d (should be >= %d)", base, s.base))
   483  	}
   484  	f.base = base
   485  	if size < 0 {
   486  		panic(fmt.Sprintf("invalid size %d (should be >= 0)", size))
   487  	}
   488  	// base >= s.base && size >= 0
   489  	base += size + 1 // +1 because EOF also has a position
   490  	if base < 0 {
   491  		panic("token.Pos offset overflow (> 2G of source code in file set)")
   492  	}
   493  	// add the file to the file set
   494  	s.base = base
   495  	s.tree.add(f)
   496  	s.last.Store(f)
   497  	return f
   498  }
   499  
   500  // AddExistingFiles adds the specified files to the
   501  // FileSet if they are not already present.
   502  // The caller must ensure that no pair of Files that
   503  // would appear in the resulting FileSet overlap.
   504  func (s *FileSet) AddExistingFiles(files ...*File) {
   505  	// This function cannot be implemented as:
   506  	//
   507  	//	for _, file := range files {
   508  	//		if prev := fset.File(token.Pos(file.Base())); prev != nil {
   509  	//			if prev != file {
   510  	//				panic("FileSet contains a different file at the same base")
   511  	//			}
   512  	//			continue
   513  	//		}
   514  	//		file2 := fset.AddFile(file.Name(), file.Base(), file.Size())
   515  	//		file2.SetLines(file.Lines())
   516  	//	}
   517  	//
   518  	// because all calls to AddFile must be in increasing order.
   519  	// AddExistingFiles lets us augment an existing FileSet
   520  	// sequentially, so long as all sets of files have disjoint ranges.
   521  	// This approach also does not preserve line directives.
   522  
   523  	s.mutex.Lock()
   524  	defer s.mutex.Unlock()
   525  
   526  	for _, f := range files {
   527  		s.tree.add(f)
   528  		s.base = max(s.base, f.Base()+f.Size()+1)
   529  	}
   530  }
   531  
   532  // RemoveFile removes a file from the [FileSet] so that subsequent
   533  // queries for its [Pos] interval yield a negative result.
   534  // This reduces the memory usage of a long-lived [FileSet] that
   535  // encounters an unbounded stream of files.
   536  //
   537  // Removing a file that does not belong to the set has no effect.
   538  func (s *FileSet) RemoveFile(file *File) {
   539  	s.mutex.Lock()
   540  	defer s.mutex.Unlock()
   541  
   542  	s.last.CompareAndSwap(file, nil) // clear last file cache
   543  
   544  	pn, _ := s.tree.locate(file.key())
   545  	if *pn != nil && (*pn).file == file {
   546  		s.tree.delete(pn)
   547  	}
   548  }
   549  
   550  // Iterate calls yield for the files in the file set in ascending Base
   551  // order until yield returns false.
   552  func (s *FileSet) Iterate(yield func(*File) bool) {
   553  	s.mutex.RLock()
   554  	defer s.mutex.RUnlock()
   555  
   556  	// Unlock around user code.
   557  	// The iterator is robust to modification by yield.
   558  	// Avoid range here, so we can use defer.
   559  	s.tree.all()(func(f *File) bool {
   560  		s.mutex.RUnlock()
   561  		defer s.mutex.RLock()
   562  		return yield(f)
   563  	})
   564  }
   565  
   566  func (s *FileSet) file(p Pos) *File {
   567  	// common case: p is in last file.
   568  	if f := s.last.Load(); f != nil && f.base <= int(p) && int(p) <= f.base+f.size {
   569  		return f
   570  	}
   571  
   572  	s.mutex.RLock()
   573  	defer s.mutex.RUnlock()
   574  
   575  	pn, _ := s.tree.locate(key{int(p), int(p)})
   576  	if n := *pn; n != nil {
   577  		// Update cache of last file. A race is ok,
   578  		// but an exclusive lock causes heavy contention.
   579  		s.last.Store(n.file)
   580  		return n.file
   581  	}
   582  	return nil
   583  }
   584  
   585  // File returns the file that contains the position p.
   586  // If no such file is found (for instance for p == [NoPos]),
   587  // the result is nil.
   588  func (s *FileSet) File(p Pos) (f *File) {
   589  	if p != NoPos {
   590  		f = s.file(p)
   591  	}
   592  	return
   593  }
   594  
   595  // PositionFor converts a [Pos] p in the fileset into a [Position] value.
   596  // If adjusted is set, the position may be adjusted by position-altering
   597  // //line comments; otherwise those comments are ignored.
   598  // p must be a [Pos] value in s or [NoPos].
   599  func (s *FileSet) PositionFor(p Pos, adjusted bool) (pos Position) {
   600  	if p != NoPos {
   601  		if f := s.file(p); f != nil {
   602  			return f.position(p, adjusted)
   603  		}
   604  	}
   605  	return
   606  }
   607  
   608  // Position converts a [Pos] p in the fileset into a Position value.
   609  // Calling s.Position(p) is equivalent to calling s.PositionFor(p, true).
   610  func (s *FileSet) Position(p Pos) (pos Position) {
   611  	return s.PositionFor(p, true)
   612  }
   613  
   614  // -----------------------------------------------------------------------------
   615  // Helper functions
   616  
   617  func searchInts(a []int, x int) int {
   618  	// This function body is a manually inlined version of:
   619  	//
   620  	//   return sort.Search(len(a), func(i int) bool { return a[i] > x }) - 1
   621  	//
   622  	// With better compiler optimizations, this may not be needed in the
   623  	// future, but at the moment this change improves the go/printer
   624  	// benchmark performance by ~30%. This has a direct impact on the
   625  	// speed of gofmt and thus seems worthwhile (2011-04-29).
   626  	// TODO(gri): Remove this when compilers have caught up.
   627  	i, j := 0, len(a)
   628  	for i < j {
   629  		h := int(uint(i+j) >> 1) // avoid overflow when computing h
   630  		// i ≤ h < j
   631  		if a[h] <= x {
   632  			i = h + 1
   633  		} else {
   634  			j = h
   635  		}
   636  	}
   637  	return i - 1
   638  }
   639  

View as plain text