Source file src/go/token/position.go
1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package token 6 7 import ( 8 "cmp" 9 "fmt" 10 "slices" 11 "strconv" 12 "sync" 13 "sync/atomic" 14 ) 15 16 // If debug is set, invalid offset and position values cause a panic 17 // (go.dev/issue/57490). 18 const debug = false 19 20 // ----------------------------------------------------------------------------- 21 // Positions 22 23 // Position describes an arbitrary source position 24 // including the file, line, and column location. 25 // A Position is valid if the line number is > 0. 26 type Position struct { 27 Filename string // filename, if any 28 Offset int // offset, starting at 0 29 Line int // line number, starting at 1 30 Column int // column number, starting at 1 (byte count) 31 } 32 33 // IsValid reports whether the position is valid. 34 func (pos *Position) IsValid() bool { return pos.Line > 0 } 35 36 // String returns a string in one of several forms: 37 // 38 // file:line:column valid position with file name 39 // file:line valid position with file name but no column (column == 0) 40 // line:column valid position without file name 41 // line valid position without file name and no column (column == 0) 42 // file invalid position with file name 43 // - invalid position without file name 44 func (pos Position) String() string { 45 s := pos.Filename 46 if pos.IsValid() { 47 if s != "" { 48 s += ":" 49 } 50 s += strconv.Itoa(pos.Line) 51 if pos.Column != 0 { 52 s += fmt.Sprintf(":%d", pos.Column) 53 } 54 } 55 if s == "" { 56 s = "-" 57 } 58 return s 59 } 60 61 // Pos is a compact encoding of a source position within a file set. 62 // It can be converted into a [Position] for a more convenient, but much 63 // larger, representation. 64 // 65 // The Pos value for a given file is a number in the range [base, base+size], 66 // where base and size are specified when a file is added to the file set. 67 // The difference between a Pos value and the corresponding file base 68 // corresponds to the byte offset of that position (represented by the Pos value) 69 // from the beginning of the file. Thus, the file base offset is the Pos value 70 // representing the first byte in the file. 71 // 72 // To create the Pos value for a specific source offset (measured in bytes), 73 // first add the respective file to the current file set using [FileSet.AddFile] 74 // and then call [File.Pos](offset) for that file. Given a Pos value p 75 // for a specific file set fset, the corresponding [Position] value is 76 // obtained by calling fset.Position(p). 77 // 78 // Pos values can be compared directly with the usual comparison operators: 79 // If two Pos values p and q are in the same file, comparing p and q is 80 // equivalent to comparing the respective source file offsets. If p and q 81 // are in different files, p < q is true if the file implied by p was added 82 // to the respective file set before the file implied by q. 83 type Pos int 84 85 // The zero value for [Pos] is NoPos; there is no file and line information 86 // associated with it, and NoPos.IsValid() is false. NoPos is always 87 // smaller than any other [Pos] value. The corresponding [Position] value 88 // for NoPos is the zero value for [Position]. 89 const NoPos Pos = 0 90 91 // IsValid reports whether the position is valid. 92 func (p Pos) IsValid() bool { 93 return p != NoPos 94 } 95 96 // ----------------------------------------------------------------------------- 97 // File 98 99 // A File is a handle for a file belonging to a [FileSet]. 100 // A File has a name, size, and line offset table. 101 // 102 // Use [FileSet.AddFile] to create a File. 103 // A File may belong to more than one FileSet; see [FileSet.AddExistingFiles]. 104 type File struct { 105 name string // file name as provided to AddFile 106 base int // Pos value range for this file is [base...base+size] 107 size int // file size as provided to AddFile 108 109 // lines and infos are protected by mutex 110 mutex sync.Mutex 111 lines []int // lines contains the offset of the first character for each line (the first entry is always 0) 112 infos []lineInfo 113 } 114 115 // Name returns the file name of file f as registered with AddFile. 116 func (f *File) Name() string { 117 return f.name 118 } 119 120 // Base returns the base offset of file f as registered with AddFile. 121 func (f *File) Base() int { 122 return f.base 123 } 124 125 // Size returns the size of file f as registered with AddFile. 126 func (f *File) Size() int { 127 return f.size 128 } 129 130 // End returns the end position of file f as registered with AddFile. 131 func (f *File) End() Pos { 132 return Pos(f.base + f.size) 133 } 134 135 // LineCount returns the number of lines in file f. 136 func (f *File) LineCount() int { 137 f.mutex.Lock() 138 n := len(f.lines) 139 f.mutex.Unlock() 140 return n 141 } 142 143 // AddLine adds the line offset for a new line. 144 // The line offset must be larger than the offset for the previous line 145 // and smaller than the file size; otherwise the line offset is ignored. 146 func (f *File) AddLine(offset int) { 147 f.mutex.Lock() 148 if i := len(f.lines); (i == 0 || f.lines[i-1] < offset) && offset < f.size { 149 f.lines = append(f.lines, offset) 150 } 151 f.mutex.Unlock() 152 } 153 154 // MergeLine merges a line with the following line. It is akin to replacing 155 // the newline character at the end of the line with a space (to not change the 156 // remaining offsets). To obtain the line number, consult e.g. [Position.Line]. 157 // MergeLine will panic if given an invalid line number. 158 func (f *File) MergeLine(line int) { 159 if line < 1 { 160 panic(fmt.Sprintf("invalid line number %d (should be >= 1)", line)) 161 } 162 f.mutex.Lock() 163 defer f.mutex.Unlock() 164 if line >= len(f.lines) { 165 panic(fmt.Sprintf("invalid line number %d (should be < %d)", line, len(f.lines))) 166 } 167 // To merge the line numbered <line> with the line numbered <line+1>, 168 // we need to remove the entry in lines corresponding to the line 169 // numbered <line+1>. The entry in lines corresponding to the line 170 // numbered <line+1> is located at index <line>, since indices in lines 171 // are 0-based and line numbers are 1-based. 172 copy(f.lines[line:], f.lines[line+1:]) 173 f.lines = f.lines[:len(f.lines)-1] 174 } 175 176 // Lines returns the effective line offset table of the form described by [File.SetLines]. 177 // Callers must not mutate the result. 178 func (f *File) Lines() []int { 179 f.mutex.Lock() 180 lines := f.lines 181 f.mutex.Unlock() 182 return lines 183 } 184 185 // SetLines sets the line offsets for a file and reports whether it succeeded. 186 // The line offsets are the offsets of the first character of each line; 187 // for instance for the content "ab\nc\n" the line offsets are {0, 3}. 188 // An empty file has an empty line offset table. 189 // Each line offset must be larger than the offset for the previous line 190 // and smaller than the file size; otherwise SetLines fails and returns 191 // false. 192 // Callers must not mutate the provided slice after SetLines returns. 193 func (f *File) SetLines(lines []int) bool { 194 // verify validity of lines table 195 size := f.size 196 for i, offset := range lines { 197 if i > 0 && offset <= lines[i-1] || size <= offset { 198 return false 199 } 200 } 201 202 // set lines table 203 f.mutex.Lock() 204 f.lines = lines 205 f.mutex.Unlock() 206 return true 207 } 208 209 // SetLinesForContent sets the line offsets for the given file content. 210 // It ignores position-altering //line comments. 211 func (f *File) SetLinesForContent(content []byte) { 212 var lines []int 213 line := 0 214 for offset, b := range content { 215 if line >= 0 { 216 lines = append(lines, line) 217 } 218 line = -1 219 if b == '\n' { 220 line = offset + 1 221 } 222 } 223 224 // set lines table 225 f.mutex.Lock() 226 f.lines = lines 227 f.mutex.Unlock() 228 } 229 230 // LineStart returns the [Pos] value of the start of the specified line. 231 // It ignores any alternative positions set using [File.AddLineColumnInfo]. 232 // LineStart panics if the 1-based line number is invalid. 233 func (f *File) LineStart(line int) Pos { 234 if line < 1 { 235 panic(fmt.Sprintf("invalid line number %d (should be >= 1)", line)) 236 } 237 f.mutex.Lock() 238 defer f.mutex.Unlock() 239 if line > len(f.lines) { 240 panic(fmt.Sprintf("invalid line number %d (should be < %d)", line, len(f.lines))) 241 } 242 return Pos(f.base + f.lines[line-1]) 243 } 244 245 // A lineInfo object describes alternative file, line, and column 246 // number information (such as provided via a //line directive) 247 // for a given file offset. 248 type lineInfo struct { 249 // fields are exported to make them accessible to gob 250 Offset int 251 Filename string 252 Line, Column int 253 } 254 255 // AddLineInfo is like [File.AddLineColumnInfo] with a column = 1 argument. 256 // It is here for backward-compatibility for code prior to Go 1.11. 257 func (f *File) AddLineInfo(offset int, filename string, line int) { 258 f.AddLineColumnInfo(offset, filename, line, 1) 259 } 260 261 // AddLineColumnInfo adds alternative file, line, and column number 262 // information for a given file offset. The offset must be larger 263 // than the offset for the previously added alternative line info 264 // and smaller than the file size; otherwise the information is 265 // ignored. 266 // 267 // AddLineColumnInfo is typically used to register alternative position 268 // information for line directives such as //line filename:line:column. 269 func (f *File) AddLineColumnInfo(offset int, filename string, line, column int) { 270 f.mutex.Lock() 271 if i := len(f.infos); (i == 0 || f.infos[i-1].Offset < offset) && offset < f.size { 272 f.infos = append(f.infos, lineInfo{offset, filename, line, column}) 273 } 274 f.mutex.Unlock() 275 } 276 277 // fixOffset fixes an out-of-bounds offset such that 0 <= offset <= f.size. 278 func (f *File) fixOffset(offset int) int { 279 switch { 280 case offset < 0: 281 if !debug { 282 return 0 283 } 284 case offset > f.size: 285 if !debug { 286 return f.size 287 } 288 default: 289 return offset 290 } 291 292 // only generate this code if needed 293 if debug { 294 panic(fmt.Sprintf("offset %d out of bounds [%d, %d] (position %d out of bounds [%d, %d])", 295 0 /* for symmetry */, offset, f.size, 296 f.base+offset, f.base, f.base+f.size)) 297 } 298 return 0 299 } 300 301 // Pos returns the Pos value for the given file offset. 302 // 303 // If offset is negative, the result is the file's start 304 // position; if the offset is too large, the result is 305 // the file's end position (see also go.dev/issue/57490). 306 // 307 // The following invariant, though not true for Pos values 308 // in general, holds for the result p: 309 // f.Pos(f.Offset(p)) == p. 310 func (f *File) Pos(offset int) Pos { 311 return Pos(f.base + f.fixOffset(offset)) 312 } 313 314 // Offset returns the offset for the given file position p. 315 // 316 // If p is before the file's start position (or if p is NoPos), 317 // the result is 0; if p is past the file's end position, 318 // the result is the file size (see also go.dev/issue/57490). 319 // 320 // The following invariant, though not true for offset values 321 // in general, holds for the result offset: 322 // f.Offset(f.Pos(offset)) == offset 323 func (f *File) Offset(p Pos) int { 324 return f.fixOffset(int(p) - f.base) 325 } 326 327 // Line returns the line number for the given file position p; 328 // p must be a [Pos] value in that file or [NoPos]. 329 func (f *File) Line(p Pos) int { 330 return f.Position(p).Line 331 } 332 333 func searchLineInfos(a []lineInfo, x int) int { 334 i, found := slices.BinarySearchFunc(a, x, func(a lineInfo, x int) int { 335 return cmp.Compare(a.Offset, x) 336 }) 337 if !found { 338 // We want the lineInfo containing x, but if we didn't 339 // find x then i is the next one. 340 i-- 341 } 342 return i 343 } 344 345 // unpack returns the filename and line and column number for a file offset. 346 // If adjusted is set, unpack will return the filename and line information 347 // possibly adjusted by //line comments; otherwise those comments are ignored. 348 func (f *File) unpack(offset int, adjusted bool) (filename string, line, column int) { 349 f.mutex.Lock() 350 filename = f.name 351 if i := searchInts(f.lines, offset); i >= 0 { 352 line, column = i+1, offset-f.lines[i]+1 353 } 354 if adjusted && len(f.infos) > 0 { 355 // few files have extra line infos 356 if i := searchLineInfos(f.infos, offset); i >= 0 { 357 alt := &f.infos[i] 358 filename = alt.Filename 359 if i := searchInts(f.lines, alt.Offset); i >= 0 { 360 // i+1 is the line at which the alternative position was recorded 361 d := line - (i + 1) // line distance from alternative position base 362 line = alt.Line + d 363 if alt.Column == 0 { 364 // alternative column is unknown => relative column is unknown 365 // (the current specification for line directives requires 366 // this to apply until the next PosBase/line directive, 367 // not just until the new newline) 368 column = 0 369 } else if d == 0 { 370 // the alternative position base is on the current line 371 // => column is relative to alternative column 372 column = alt.Column + (offset - alt.Offset) 373 } 374 } 375 } 376 } 377 // TODO(mvdan): move Unlock back under Lock with a defer statement once 378 // https://go.dev/issue/38471 is fixed to remove the performance penalty. 379 f.mutex.Unlock() 380 return 381 } 382 383 func (f *File) position(p Pos, adjusted bool) (pos Position) { 384 offset := f.fixOffset(int(p) - f.base) 385 pos.Offset = offset 386 pos.Filename, pos.Line, pos.Column = f.unpack(offset, adjusted) 387 return 388 } 389 390 // PositionFor returns the Position value for the given file position p. 391 // If p is out of bounds, it is adjusted to match the File.Offset behavior. 392 // If adjusted is set, the position may be adjusted by position-altering 393 // //line comments; otherwise those comments are ignored. 394 // p must be a Pos value in f or NoPos. 395 func (f *File) PositionFor(p Pos, adjusted bool) (pos Position) { 396 if p != NoPos { 397 pos = f.position(p, adjusted) 398 } 399 return 400 } 401 402 // Position returns the Position value for the given file position p. 403 // If p is out of bounds, it is adjusted to match the File.Offset behavior. 404 // Calling f.Position(p) is equivalent to calling f.PositionFor(p, true). 405 func (f *File) Position(p Pos) (pos Position) { 406 return f.PositionFor(p, true) 407 } 408 409 // ----------------------------------------------------------------------------- 410 // FileSet 411 412 // A FileSet represents a set of source files. 413 // Methods of file sets are synchronized; multiple goroutines 414 // may invoke them concurrently. 415 // 416 // The byte offsets for each file in a file set are mapped into 417 // distinct (integer) intervals, one interval [base, base+size] 418 // per file. [FileSet.Base] represents the first byte in the file, and size 419 // is the corresponding file size. A [Pos] value is a value in such 420 // an interval. By determining the interval a [Pos] value belongs 421 // to, the file, its file base, and thus the byte offset (position) 422 // the [Pos] value is representing can be computed. 423 // 424 // When adding a new file, a file base must be provided. That can 425 // be any integer value that is past the end of any interval of any 426 // file already in the file set. For convenience, [FileSet.Base] provides 427 // such a value, which is simply the end of the Pos interval of the most 428 // recently added file, plus one. Unless there is a need to extend an 429 // interval later, using the [FileSet.Base] should be used as argument 430 // for [FileSet.AddFile]. 431 // 432 // A [File] may be removed from a FileSet when it is no longer needed. 433 // This may reduce memory usage in a long-running application. 434 type FileSet struct { 435 mutex sync.RWMutex // protects the file set 436 base int // base offset for the next file 437 tree tree // tree of files in ascending base order 438 last atomic.Pointer[File] // cache of last file looked up 439 } 440 441 // NewFileSet creates a new file set. 442 func NewFileSet() *FileSet { 443 return &FileSet{ 444 base: 1, // 0 == NoPos 445 } 446 } 447 448 // Base returns the minimum base offset that must be provided to 449 // [FileSet.AddFile] when adding the next file. 450 func (s *FileSet) Base() int { 451 s.mutex.RLock() 452 b := s.base 453 s.mutex.RUnlock() 454 return b 455 } 456 457 // AddFile adds a new file with a given filename, base offset, and file size 458 // to the file set s and returns the file. Multiple files may have the same 459 // name. The base offset must not be smaller than the [FileSet.Base], and 460 // size must not be negative. As a special case, if a negative base is provided, 461 // the current value of the [FileSet.Base] is used instead. 462 // 463 // Adding the file will set the file set's [FileSet.Base] value to base + size + 1 464 // as the minimum base value for the next file. The following relationship 465 // exists between a [Pos] value p for a given file offset offs: 466 // 467 // int(p) = base + offs 468 // 469 // with offs in the range [0, size] and thus p in the range [base, base+size]. 470 // For convenience, [File.Pos] may be used to create file-specific position 471 // values from a file offset. 472 func (s *FileSet) AddFile(filename string, base, size int) *File { 473 // Allocate f outside the critical section. 474 f := &File{name: filename, size: size, lines: []int{0}} 475 476 s.mutex.Lock() 477 defer s.mutex.Unlock() 478 if base < 0 { 479 base = s.base 480 } 481 if base < s.base { 482 panic(fmt.Sprintf("invalid base %d (should be >= %d)", base, s.base)) 483 } 484 f.base = base 485 if size < 0 { 486 panic(fmt.Sprintf("invalid size %d (should be >= 0)", size)) 487 } 488 // base >= s.base && size >= 0 489 base += size + 1 // +1 because EOF also has a position 490 if base < 0 { 491 panic("token.Pos offset overflow (> 2G of source code in file set)") 492 } 493 // add the file to the file set 494 s.base = base 495 s.tree.add(f) 496 s.last.Store(f) 497 return f 498 } 499 500 // AddExistingFiles adds the specified files to the 501 // FileSet if they are not already present. 502 // The caller must ensure that no pair of Files that 503 // would appear in the resulting FileSet overlap. 504 func (s *FileSet) AddExistingFiles(files ...*File) { 505 // This function cannot be implemented as: 506 // 507 // for _, file := range files { 508 // if prev := fset.File(token.Pos(file.Base())); prev != nil { 509 // if prev != file { 510 // panic("FileSet contains a different file at the same base") 511 // } 512 // continue 513 // } 514 // file2 := fset.AddFile(file.Name(), file.Base(), file.Size()) 515 // file2.SetLines(file.Lines()) 516 // } 517 // 518 // because all calls to AddFile must be in increasing order. 519 // AddExistingFiles lets us augment an existing FileSet 520 // sequentially, so long as all sets of files have disjoint ranges. 521 // This approach also does not preserve line directives. 522 523 s.mutex.Lock() 524 defer s.mutex.Unlock() 525 526 for _, f := range files { 527 s.tree.add(f) 528 s.base = max(s.base, f.Base()+f.Size()+1) 529 } 530 } 531 532 // RemoveFile removes a file from the [FileSet] so that subsequent 533 // queries for its [Pos] interval yield a negative result. 534 // This reduces the memory usage of a long-lived [FileSet] that 535 // encounters an unbounded stream of files. 536 // 537 // Removing a file that does not belong to the set has no effect. 538 func (s *FileSet) RemoveFile(file *File) { 539 s.mutex.Lock() 540 defer s.mutex.Unlock() 541 542 s.last.CompareAndSwap(file, nil) // clear last file cache 543 544 pn, _ := s.tree.locate(file.key()) 545 if *pn != nil && (*pn).file == file { 546 s.tree.delete(pn) 547 } 548 } 549 550 // Iterate calls yield for the files in the file set in ascending Base 551 // order until yield returns false. 552 func (s *FileSet) Iterate(yield func(*File) bool) { 553 s.mutex.RLock() 554 defer s.mutex.RUnlock() 555 556 // Unlock around user code. 557 // The iterator is robust to modification by yield. 558 // Avoid range here, so we can use defer. 559 s.tree.all()(func(f *File) bool { 560 s.mutex.RUnlock() 561 defer s.mutex.RLock() 562 return yield(f) 563 }) 564 } 565 566 func (s *FileSet) file(p Pos) *File { 567 // common case: p is in last file. 568 if f := s.last.Load(); f != nil && f.base <= int(p) && int(p) <= f.base+f.size { 569 return f 570 } 571 572 s.mutex.RLock() 573 defer s.mutex.RUnlock() 574 575 pn, _ := s.tree.locate(key{int(p), int(p)}) 576 if n := *pn; n != nil { 577 // Update cache of last file. A race is ok, 578 // but an exclusive lock causes heavy contention. 579 s.last.Store(n.file) 580 return n.file 581 } 582 return nil 583 } 584 585 // File returns the file that contains the position p. 586 // If no such file is found (for instance for p == [NoPos]), 587 // the result is nil. 588 func (s *FileSet) File(p Pos) (f *File) { 589 if p != NoPos { 590 f = s.file(p) 591 } 592 return 593 } 594 595 // PositionFor converts a [Pos] p in the fileset into a [Position] value. 596 // If adjusted is set, the position may be adjusted by position-altering 597 // //line comments; otherwise those comments are ignored. 598 // p must be a [Pos] value in s or [NoPos]. 599 func (s *FileSet) PositionFor(p Pos, adjusted bool) (pos Position) { 600 if p != NoPos { 601 if f := s.file(p); f != nil { 602 return f.position(p, adjusted) 603 } 604 } 605 return 606 } 607 608 // Position converts a [Pos] p in the fileset into a Position value. 609 // Calling s.Position(p) is equivalent to calling s.PositionFor(p, true). 610 func (s *FileSet) Position(p Pos) (pos Position) { 611 return s.PositionFor(p, true) 612 } 613 614 // ----------------------------------------------------------------------------- 615 // Helper functions 616 617 func searchInts(a []int, x int) int { 618 // This function body is a manually inlined version of: 619 // 620 // return sort.Search(len(a), func(i int) bool { return a[i] > x }) - 1 621 // 622 // With better compiler optimizations, this may not be needed in the 623 // future, but at the moment this change improves the go/printer 624 // benchmark performance by ~30%. This has a direct impact on the 625 // speed of gofmt and thus seems worthwhile (2011-04-29). 626 // TODO(gri): Remove this when compilers have caught up. 627 i, j := 0, len(a) 628 for i < j { 629 h := int(uint(i+j) >> 1) // avoid overflow when computing h 630 // i ≤ h < j 631 if a[h] <= x { 632 i = h + 1 633 } else { 634 j = h 635 } 636 } 637 return i - 1 638 } 639