Source file src/go/token/position.go

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package token
     6  
     7  import (
     8  	"fmt"
     9  	"sort"
    10  	"strconv"
    11  	"sync"
    12  	"sync/atomic"
    13  )
    14  
    15  // If debug is set, invalid offset and position values cause a panic
    16  // (go.dev/issue/57490).
    17  const debug = false
    18  
    19  // -----------------------------------------------------------------------------
    20  // Positions
    21  
    22  // Position describes an arbitrary source position
    23  // including the file, line, and column location.
    24  // A Position is valid if the line number is > 0.
    25  type Position struct {
    26  	Filename string // filename, if any
    27  	Offset   int    // offset, starting at 0
    28  	Line     int    // line number, starting at 1
    29  	Column   int    // column number, starting at 1 (byte count)
    30  }
    31  
    32  // IsValid reports whether the position is valid.
    33  func (pos *Position) IsValid() bool { return pos.Line > 0 }
    34  
    35  // String returns a string in one of several forms:
    36  //
    37  //	file:line:column    valid position with file name
    38  //	file:line           valid position with file name but no column (column == 0)
    39  //	line:column         valid position without file name
    40  //	line                valid position without file name and no column (column == 0)
    41  //	file                invalid position with file name
    42  //	-                   invalid position without file name
    43  func (pos Position) String() string {
    44  	s := pos.Filename
    45  	if pos.IsValid() {
    46  		if s != "" {
    47  			s += ":"
    48  		}
    49  		s += strconv.Itoa(pos.Line)
    50  		if pos.Column != 0 {
    51  			s += fmt.Sprintf(":%d", pos.Column)
    52  		}
    53  	}
    54  	if s == "" {
    55  		s = "-"
    56  	}
    57  	return s
    58  }
    59  
    60  // Pos is a compact encoding of a source position within a file set.
    61  // It can be converted into a [Position] for a more convenient, but much
    62  // larger, representation.
    63  //
    64  // The Pos value for a given file is a number in the range [base, base+size],
    65  // where base and size are specified when a file is added to the file set.
    66  // The difference between a Pos value and the corresponding file base
    67  // corresponds to the byte offset of that position (represented by the Pos value)
    68  // from the beginning of the file. Thus, the file base offset is the Pos value
    69  // representing the first byte in the file.
    70  //
    71  // To create the Pos value for a specific source offset (measured in bytes),
    72  // first add the respective file to the current file set using [FileSet.AddFile]
    73  // and then call [File.Pos](offset) for that file. Given a Pos value p
    74  // for a specific file set fset, the corresponding [Position] value is
    75  // obtained by calling fset.Position(p).
    76  //
    77  // Pos values can be compared directly with the usual comparison operators:
    78  // If two Pos values p and q are in the same file, comparing p and q is
    79  // equivalent to comparing the respective source file offsets. If p and q
    80  // are in different files, p < q is true if the file implied by p was added
    81  // to the respective file set before the file implied by q.
    82  type Pos int
    83  
    84  // The zero value for [Pos] is NoPos; there is no file and line information
    85  // associated with it, and NoPos.IsValid() is false. NoPos is always
    86  // smaller than any other [Pos] value. The corresponding [Position] value
    87  // for NoPos is the zero value for [Position].
    88  const NoPos Pos = 0
    89  
    90  // IsValid reports whether the position is valid.
    91  func (p Pos) IsValid() bool {
    92  	return p != NoPos
    93  }
    94  
    95  // -----------------------------------------------------------------------------
    96  // File
    97  
    98  // A File is a handle for a file belonging to a [FileSet].
    99  // A File has a name, size, and line offset table.
   100  type File struct {
   101  	name string // file name as provided to AddFile
   102  	base int    // Pos value range for this file is [base...base+size]
   103  	size int    // file size as provided to AddFile
   104  
   105  	// lines and infos are protected by mutex
   106  	mutex sync.Mutex
   107  	lines []int // lines contains the offset of the first character for each line (the first entry is always 0)
   108  	infos []lineInfo
   109  }
   110  
   111  // Name returns the file name of file f as registered with AddFile.
   112  func (f *File) Name() string {
   113  	return f.name
   114  }
   115  
   116  // Base returns the base offset of file f as registered with AddFile.
   117  func (f *File) Base() int {
   118  	return f.base
   119  }
   120  
   121  // Size returns the size of file f as registered with AddFile.
   122  func (f *File) Size() int {
   123  	return f.size
   124  }
   125  
   126  // LineCount returns the number of lines in file f.
   127  func (f *File) LineCount() int {
   128  	f.mutex.Lock()
   129  	n := len(f.lines)
   130  	f.mutex.Unlock()
   131  	return n
   132  }
   133  
   134  // AddLine adds the line offset for a new line.
   135  // The line offset must be larger than the offset for the previous line
   136  // and smaller than the file size; otherwise the line offset is ignored.
   137  func (f *File) AddLine(offset int) {
   138  	f.mutex.Lock()
   139  	if i := len(f.lines); (i == 0 || f.lines[i-1] < offset) && offset < f.size {
   140  		f.lines = append(f.lines, offset)
   141  	}
   142  	f.mutex.Unlock()
   143  }
   144  
   145  // MergeLine merges a line with the following line. It is akin to replacing
   146  // the newline character at the end of the line with a space (to not change the
   147  // remaining offsets). To obtain the line number, consult e.g. [Position.Line].
   148  // MergeLine will panic if given an invalid line number.
   149  func (f *File) MergeLine(line int) {
   150  	if line < 1 {
   151  		panic(fmt.Sprintf("invalid line number %d (should be >= 1)", line))
   152  	}
   153  	f.mutex.Lock()
   154  	defer f.mutex.Unlock()
   155  	if line >= len(f.lines) {
   156  		panic(fmt.Sprintf("invalid line number %d (should be < %d)", line, len(f.lines)))
   157  	}
   158  	// To merge the line numbered <line> with the line numbered <line+1>,
   159  	// we need to remove the entry in lines corresponding to the line
   160  	// numbered <line+1>. The entry in lines corresponding to the line
   161  	// numbered <line+1> is located at index <line>, since indices in lines
   162  	// are 0-based and line numbers are 1-based.
   163  	copy(f.lines[line:], f.lines[line+1:])
   164  	f.lines = f.lines[:len(f.lines)-1]
   165  }
   166  
   167  // Lines returns the effective line offset table of the form described by [File.SetLines].
   168  // Callers must not mutate the result.
   169  func (f *File) Lines() []int {
   170  	f.mutex.Lock()
   171  	lines := f.lines
   172  	f.mutex.Unlock()
   173  	return lines
   174  }
   175  
   176  // SetLines sets the line offsets for a file and reports whether it succeeded.
   177  // The line offsets are the offsets of the first character of each line;
   178  // for instance for the content "ab\nc\n" the line offsets are {0, 3}.
   179  // An empty file has an empty line offset table.
   180  // Each line offset must be larger than the offset for the previous line
   181  // and smaller than the file size; otherwise SetLines fails and returns
   182  // false.
   183  // Callers must not mutate the provided slice after SetLines returns.
   184  func (f *File) SetLines(lines []int) bool {
   185  	// verify validity of lines table
   186  	size := f.size
   187  	for i, offset := range lines {
   188  		if i > 0 && offset <= lines[i-1] || size <= offset {
   189  			return false
   190  		}
   191  	}
   192  
   193  	// set lines table
   194  	f.mutex.Lock()
   195  	f.lines = lines
   196  	f.mutex.Unlock()
   197  	return true
   198  }
   199  
   200  // SetLinesForContent sets the line offsets for the given file content.
   201  // It ignores position-altering //line comments.
   202  func (f *File) SetLinesForContent(content []byte) {
   203  	var lines []int
   204  	line := 0
   205  	for offset, b := range content {
   206  		if line >= 0 {
   207  			lines = append(lines, line)
   208  		}
   209  		line = -1
   210  		if b == '\n' {
   211  			line = offset + 1
   212  		}
   213  	}
   214  
   215  	// set lines table
   216  	f.mutex.Lock()
   217  	f.lines = lines
   218  	f.mutex.Unlock()
   219  }
   220  
   221  // LineStart returns the [Pos] value of the start of the specified line.
   222  // It ignores any alternative positions set using [File.AddLineColumnInfo].
   223  // LineStart panics if the 1-based line number is invalid.
   224  func (f *File) LineStart(line int) Pos {
   225  	if line < 1 {
   226  		panic(fmt.Sprintf("invalid line number %d (should be >= 1)", line))
   227  	}
   228  	f.mutex.Lock()
   229  	defer f.mutex.Unlock()
   230  	if line > len(f.lines) {
   231  		panic(fmt.Sprintf("invalid line number %d (should be < %d)", line, len(f.lines)))
   232  	}
   233  	return Pos(f.base + f.lines[line-1])
   234  }
   235  
   236  // A lineInfo object describes alternative file, line, and column
   237  // number information (such as provided via a //line directive)
   238  // for a given file offset.
   239  type lineInfo struct {
   240  	// fields are exported to make them accessible to gob
   241  	Offset       int
   242  	Filename     string
   243  	Line, Column int
   244  }
   245  
   246  // AddLineInfo is like [File.AddLineColumnInfo] with a column = 1 argument.
   247  // It is here for backward-compatibility for code prior to Go 1.11.
   248  func (f *File) AddLineInfo(offset int, filename string, line int) {
   249  	f.AddLineColumnInfo(offset, filename, line, 1)
   250  }
   251  
   252  // AddLineColumnInfo adds alternative file, line, and column number
   253  // information for a given file offset. The offset must be larger
   254  // than the offset for the previously added alternative line info
   255  // and smaller than the file size; otherwise the information is
   256  // ignored.
   257  //
   258  // AddLineColumnInfo is typically used to register alternative position
   259  // information for line directives such as //line filename:line:column.
   260  func (f *File) AddLineColumnInfo(offset int, filename string, line, column int) {
   261  	f.mutex.Lock()
   262  	if i := len(f.infos); (i == 0 || f.infos[i-1].Offset < offset) && offset < f.size {
   263  		f.infos = append(f.infos, lineInfo{offset, filename, line, column})
   264  	}
   265  	f.mutex.Unlock()
   266  }
   267  
   268  // fixOffset fixes an out-of-bounds offset such that 0 <= offset <= f.size.
   269  func (f *File) fixOffset(offset int) int {
   270  	switch {
   271  	case offset < 0:
   272  		if !debug {
   273  			return 0
   274  		}
   275  	case offset > f.size:
   276  		if !debug {
   277  			return f.size
   278  		}
   279  	default:
   280  		return offset
   281  	}
   282  
   283  	// only generate this code if needed
   284  	if debug {
   285  		panic(fmt.Sprintf("offset %d out of bounds [%d, %d] (position %d out of bounds [%d, %d])",
   286  			0 /* for symmetry */, offset, f.size,
   287  			f.base+offset, f.base, f.base+f.size))
   288  	}
   289  	return 0
   290  }
   291  
   292  // Pos returns the Pos value for the given file offset.
   293  //
   294  // If offset is negative, the result is the file's start
   295  // position; if the offset is too large, the result is
   296  // the file's end position (see also go.dev/issue/57490).
   297  //
   298  // The following invariant, though not true for Pos values
   299  // in general, holds for the result p:
   300  // f.Pos(f.Offset(p)) == p.
   301  func (f *File) Pos(offset int) Pos {
   302  	return Pos(f.base + f.fixOffset(offset))
   303  }
   304  
   305  // Offset returns the offset for the given file position p.
   306  //
   307  // If p is before the file's start position (or if p is NoPos),
   308  // the result is 0; if p is past the file's end position, the
   309  // the result is the file size (see also go.dev/issue/57490).
   310  //
   311  // The following invariant, though not true for offset values
   312  // in general, holds for the result offset:
   313  // f.Offset(f.Pos(offset)) == offset
   314  func (f *File) Offset(p Pos) int {
   315  	return f.fixOffset(int(p) - f.base)
   316  }
   317  
   318  // Line returns the line number for the given file position p;
   319  // p must be a [Pos] value in that file or [NoPos].
   320  func (f *File) Line(p Pos) int {
   321  	return f.Position(p).Line
   322  }
   323  
   324  func searchLineInfos(a []lineInfo, x int) int {
   325  	return sort.Search(len(a), func(i int) bool { return a[i].Offset > x }) - 1
   326  }
   327  
   328  // unpack returns the filename and line and column number for a file offset.
   329  // If adjusted is set, unpack will return the filename and line information
   330  // possibly adjusted by //line comments; otherwise those comments are ignored.
   331  func (f *File) unpack(offset int, adjusted bool) (filename string, line, column int) {
   332  	f.mutex.Lock()
   333  	filename = f.name
   334  	if i := searchInts(f.lines, offset); i >= 0 {
   335  		line, column = i+1, offset-f.lines[i]+1
   336  	}
   337  	if adjusted && len(f.infos) > 0 {
   338  		// few files have extra line infos
   339  		if i := searchLineInfos(f.infos, offset); i >= 0 {
   340  			alt := &f.infos[i]
   341  			filename = alt.Filename
   342  			if i := searchInts(f.lines, alt.Offset); i >= 0 {
   343  				// i+1 is the line at which the alternative position was recorded
   344  				d := line - (i + 1) // line distance from alternative position base
   345  				line = alt.Line + d
   346  				if alt.Column == 0 {
   347  					// alternative column is unknown => relative column is unknown
   348  					// (the current specification for line directives requires
   349  					// this to apply until the next PosBase/line directive,
   350  					// not just until the new newline)
   351  					column = 0
   352  				} else if d == 0 {
   353  					// the alternative position base is on the current line
   354  					// => column is relative to alternative column
   355  					column = alt.Column + (offset - alt.Offset)
   356  				}
   357  			}
   358  		}
   359  	}
   360  	// TODO(mvdan): move Unlock back under Lock with a defer statement once
   361  	// https://go.dev/issue/38471 is fixed to remove the performance penalty.
   362  	f.mutex.Unlock()
   363  	return
   364  }
   365  
   366  func (f *File) position(p Pos, adjusted bool) (pos Position) {
   367  	offset := f.fixOffset(int(p) - f.base)
   368  	pos.Offset = offset
   369  	pos.Filename, pos.Line, pos.Column = f.unpack(offset, adjusted)
   370  	return
   371  }
   372  
   373  // PositionFor returns the Position value for the given file position p.
   374  // If p is out of bounds, it is adjusted to match the File.Offset behavior.
   375  // If adjusted is set, the position may be adjusted by position-altering
   376  // //line comments; otherwise those comments are ignored.
   377  // p must be a Pos value in f or NoPos.
   378  func (f *File) PositionFor(p Pos, adjusted bool) (pos Position) {
   379  	if p != NoPos {
   380  		pos = f.position(p, adjusted)
   381  	}
   382  	return
   383  }
   384  
   385  // Position returns the Position value for the given file position p.
   386  // If p is out of bounds, it is adjusted to match the File.Offset behavior.
   387  // Calling f.Position(p) is equivalent to calling f.PositionFor(p, true).
   388  func (f *File) Position(p Pos) (pos Position) {
   389  	return f.PositionFor(p, true)
   390  }
   391  
   392  // -----------------------------------------------------------------------------
   393  // FileSet
   394  
   395  // A FileSet represents a set of source files.
   396  // Methods of file sets are synchronized; multiple goroutines
   397  // may invoke them concurrently.
   398  //
   399  // The byte offsets for each file in a file set are mapped into
   400  // distinct (integer) intervals, one interval [base, base+size]
   401  // per file. [FileSet.Base] represents the first byte in the file, and size
   402  // is the corresponding file size. A [Pos] value is a value in such
   403  // an interval. By determining the interval a [Pos] value belongs
   404  // to, the file, its file base, and thus the byte offset (position)
   405  // the [Pos] value is representing can be computed.
   406  //
   407  // When adding a new file, a file base must be provided. That can
   408  // be any integer value that is past the end of any interval of any
   409  // file already in the file set. For convenience, [FileSet.Base] provides
   410  // such a value, which is simply the end of the Pos interval of the most
   411  // recently added file, plus one. Unless there is a need to extend an
   412  // interval later, using the [FileSet.Base] should be used as argument
   413  // for [FileSet.AddFile].
   414  //
   415  // A [File] may be removed from a FileSet when it is no longer needed.
   416  // This may reduce memory usage in a long-running application.
   417  type FileSet struct {
   418  	mutex sync.RWMutex         // protects the file set
   419  	base  int                  // base offset for the next file
   420  	files []*File              // list of files in the order added to the set
   421  	last  atomic.Pointer[File] // cache of last file looked up
   422  }
   423  
   424  // NewFileSet creates a new file set.
   425  func NewFileSet() *FileSet {
   426  	return &FileSet{
   427  		base: 1, // 0 == NoPos
   428  	}
   429  }
   430  
   431  // Base returns the minimum base offset that must be provided to
   432  // [FileSet.AddFile] when adding the next file.
   433  func (s *FileSet) Base() int {
   434  	s.mutex.RLock()
   435  	b := s.base
   436  	s.mutex.RUnlock()
   437  	return b
   438  }
   439  
   440  // AddFile adds a new file with a given filename, base offset, and file size
   441  // to the file set s and returns the file. Multiple files may have the same
   442  // name. The base offset must not be smaller than the [FileSet.Base], and
   443  // size must not be negative. As a special case, if a negative base is provided,
   444  // the current value of the [FileSet.Base] is used instead.
   445  //
   446  // Adding the file will set the file set's [FileSet.Base] value to base + size + 1
   447  // as the minimum base value for the next file. The following relationship
   448  // exists between a [Pos] value p for a given file offset offs:
   449  //
   450  //	int(p) = base + offs
   451  //
   452  // with offs in the range [0, size] and thus p in the range [base, base+size].
   453  // For convenience, [File.Pos] may be used to create file-specific position
   454  // values from a file offset.
   455  func (s *FileSet) AddFile(filename string, base, size int) *File {
   456  	// Allocate f outside the critical section.
   457  	f := &File{name: filename, size: size, lines: []int{0}}
   458  
   459  	s.mutex.Lock()
   460  	defer s.mutex.Unlock()
   461  	if base < 0 {
   462  		base = s.base
   463  	}
   464  	if base < s.base {
   465  		panic(fmt.Sprintf("invalid base %d (should be >= %d)", base, s.base))
   466  	}
   467  	f.base = base
   468  	if size < 0 {
   469  		panic(fmt.Sprintf("invalid size %d (should be >= 0)", size))
   470  	}
   471  	// base >= s.base && size >= 0
   472  	base += size + 1 // +1 because EOF also has a position
   473  	if base < 0 {
   474  		panic("token.Pos offset overflow (> 2G of source code in file set)")
   475  	}
   476  	// add the file to the file set
   477  	s.base = base
   478  	s.files = append(s.files, f)
   479  	s.last.Store(f)
   480  	return f
   481  }
   482  
   483  // RemoveFile removes a file from the [FileSet] so that subsequent
   484  // queries for its [Pos] interval yield a negative result.
   485  // This reduces the memory usage of a long-lived [FileSet] that
   486  // encounters an unbounded stream of files.
   487  //
   488  // Removing a file that does not belong to the set has no effect.
   489  func (s *FileSet) RemoveFile(file *File) {
   490  	s.last.CompareAndSwap(file, nil) // clear last file cache
   491  
   492  	s.mutex.Lock()
   493  	defer s.mutex.Unlock()
   494  
   495  	if i := searchFiles(s.files, file.base); i >= 0 && s.files[i] == file {
   496  		last := &s.files[len(s.files)-1]
   497  		s.files = append(s.files[:i], s.files[i+1:]...)
   498  		*last = nil // don't prolong lifetime when popping last element
   499  	}
   500  }
   501  
   502  // Iterate calls f for the files in the file set in the order they were added
   503  // until f returns false.
   504  func (s *FileSet) Iterate(f func(*File) bool) {
   505  	for i := 0; ; i++ {
   506  		var file *File
   507  		s.mutex.RLock()
   508  		if i < len(s.files) {
   509  			file = s.files[i]
   510  		}
   511  		s.mutex.RUnlock()
   512  		if file == nil || !f(file) {
   513  			break
   514  		}
   515  	}
   516  }
   517  
   518  func searchFiles(a []*File, x int) int {
   519  	return sort.Search(len(a), func(i int) bool { return a[i].base > x }) - 1
   520  }
   521  
   522  func (s *FileSet) file(p Pos) *File {
   523  	// common case: p is in last file.
   524  	if f := s.last.Load(); f != nil && f.base <= int(p) && int(p) <= f.base+f.size {
   525  		return f
   526  	}
   527  
   528  	s.mutex.RLock()
   529  	defer s.mutex.RUnlock()
   530  
   531  	// p is not in last file - search all files
   532  	if i := searchFiles(s.files, int(p)); i >= 0 {
   533  		f := s.files[i]
   534  		// f.base <= int(p) by definition of searchFiles
   535  		if int(p) <= f.base+f.size {
   536  			// Update cache of last file. A race is ok,
   537  			// but an exclusive lock causes heavy contention.
   538  			s.last.Store(f)
   539  			return f
   540  		}
   541  	}
   542  	return nil
   543  }
   544  
   545  // File returns the file that contains the position p.
   546  // If no such file is found (for instance for p == [NoPos]),
   547  // the result is nil.
   548  func (s *FileSet) File(p Pos) (f *File) {
   549  	if p != NoPos {
   550  		f = s.file(p)
   551  	}
   552  	return
   553  }
   554  
   555  // PositionFor converts a [Pos] p in the fileset into a [Position] value.
   556  // If adjusted is set, the position may be adjusted by position-altering
   557  // //line comments; otherwise those comments are ignored.
   558  // p must be a [Pos] value in s or [NoPos].
   559  func (s *FileSet) PositionFor(p Pos, adjusted bool) (pos Position) {
   560  	if p != NoPos {
   561  		if f := s.file(p); f != nil {
   562  			return f.position(p, adjusted)
   563  		}
   564  	}
   565  	return
   566  }
   567  
   568  // Position converts a [Pos] p in the fileset into a Position value.
   569  // Calling s.Position(p) is equivalent to calling s.PositionFor(p, true).
   570  func (s *FileSet) Position(p Pos) (pos Position) {
   571  	return s.PositionFor(p, true)
   572  }
   573  
   574  // -----------------------------------------------------------------------------
   575  // Helper functions
   576  
   577  func searchInts(a []int, x int) int {
   578  	// This function body is a manually inlined version of:
   579  	//
   580  	//   return sort.Search(len(a), func(i int) bool { return a[i] > x }) - 1
   581  	//
   582  	// With better compiler optimizations, this may not be needed in the
   583  	// future, but at the moment this change improves the go/printer
   584  	// benchmark performance by ~30%. This has a direct impact on the
   585  	// speed of gofmt and thus seems worthwhile (2011-04-29).
   586  	// TODO(gri): Remove this when compilers have caught up.
   587  	i, j := 0, len(a)
   588  	for i < j {
   589  		h := int(uint(i+j) >> 1) // avoid overflow when computing h
   590  		// i ≤ h < j
   591  		if a[h] <= x {
   592  			i = h + 1
   593  		} else {
   594  			j = h
   595  		}
   596  	}
   597  	return i - 1
   598  }
   599  

View as plain text