Source file src/archive/tar/common.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package tar implements access to tar archives.
     6  //
     7  // Tape archives (tar) are a file format for storing a sequence of files that
     8  // can be read and written in a streaming manner.
     9  // This package aims to cover most variations of the format,
    10  // including those produced by GNU and BSD tar tools.
    11  package tar
    12  
    13  import (
    14  	"errors"
    15  	"fmt"
    16  	"internal/godebug"
    17  	"io/fs"
    18  	"math"
    19  	"path"
    20  	"reflect"
    21  	"strconv"
    22  	"strings"
    23  	"time"
    24  )
    25  
    26  // BUG: Use of the Uid and Gid fields in Header could overflow on 32-bit
    27  // architectures. If a large value is encountered when decoding, the result
    28  // stored in Header will be the truncated version.
    29  
    30  var tarinsecurepath = godebug.New("tarinsecurepath")
    31  
    32  var (
    33  	ErrHeader          = errors.New("archive/tar: invalid tar header")
    34  	ErrWriteTooLong    = errors.New("archive/tar: write too long")
    35  	ErrFieldTooLong    = errors.New("archive/tar: header field too long")
    36  	ErrWriteAfterClose = errors.New("archive/tar: write after close")
    37  	ErrInsecurePath    = errors.New("archive/tar: insecure file path")
    38  	errMissData        = errors.New("archive/tar: sparse file references non-existent data")
    39  	errUnrefData       = errors.New("archive/tar: sparse file contains unreferenced data")
    40  	errWriteHole       = errors.New("archive/tar: write non-NUL byte in sparse hole")
    41  )
    42  
    43  type headerError []string
    44  
    45  func (he headerError) Error() string {
    46  	const prefix = "archive/tar: cannot encode header"
    47  	var ss []string
    48  	for _, s := range he {
    49  		if s != "" {
    50  			ss = append(ss, s)
    51  		}
    52  	}
    53  	if len(ss) == 0 {
    54  		return prefix
    55  	}
    56  	return fmt.Sprintf("%s: %v", prefix, strings.Join(ss, "; and "))
    57  }
    58  
    59  // Type flags for Header.Typeflag.
    60  const (
    61  	// Type '0' indicates a regular file.
    62  	TypeReg = '0'
    63  
    64  	// Deprecated: Use TypeReg instead.
    65  	TypeRegA = '\x00'
    66  
    67  	// Type '1' to '6' are header-only flags and may not have a data body.
    68  	TypeLink    = '1' // Hard link
    69  	TypeSymlink = '2' // Symbolic link
    70  	TypeChar    = '3' // Character device node
    71  	TypeBlock   = '4' // Block device node
    72  	TypeDir     = '5' // Directory
    73  	TypeFifo    = '6' // FIFO node
    74  
    75  	// Type '7' is reserved.
    76  	TypeCont = '7'
    77  
    78  	// Type 'x' is used by the PAX format to store key-value records that
    79  	// are only relevant to the next file.
    80  	// This package transparently handles these types.
    81  	TypeXHeader = 'x'
    82  
    83  	// Type 'g' is used by the PAX format to store key-value records that
    84  	// are relevant to all subsequent files.
    85  	// This package only supports parsing and composing such headers,
    86  	// but does not currently support persisting the global state across files.
    87  	TypeXGlobalHeader = 'g'
    88  
    89  	// Type 'S' indicates a sparse file in the GNU format.
    90  	TypeGNUSparse = 'S'
    91  
    92  	// Types 'L' and 'K' are used by the GNU format for a meta file
    93  	// used to store the path or link name for the next file.
    94  	// This package transparently handles these types.
    95  	TypeGNULongName = 'L'
    96  	TypeGNULongLink = 'K'
    97  )
    98  
    99  // Keywords for PAX extended header records.
   100  const (
   101  	paxNone     = "" // Indicates that no PAX key is suitable
   102  	paxPath     = "path"
   103  	paxLinkpath = "linkpath"
   104  	paxSize     = "size"
   105  	paxUid      = "uid"
   106  	paxGid      = "gid"
   107  	paxUname    = "uname"
   108  	paxGname    = "gname"
   109  	paxMtime    = "mtime"
   110  	paxAtime    = "atime"
   111  	paxCtime    = "ctime"   // Removed from later revision of PAX spec, but was valid
   112  	paxCharset  = "charset" // Currently unused
   113  	paxComment  = "comment" // Currently unused
   114  
   115  	paxSchilyXattr = "SCHILY.xattr."
   116  
   117  	// Keywords for GNU sparse files in a PAX extended header.
   118  	paxGNUSparse          = "GNU.sparse."
   119  	paxGNUSparseNumBlocks = "GNU.sparse.numblocks"
   120  	paxGNUSparseOffset    = "GNU.sparse.offset"
   121  	paxGNUSparseNumBytes  = "GNU.sparse.numbytes"
   122  	paxGNUSparseMap       = "GNU.sparse.map"
   123  	paxGNUSparseName      = "GNU.sparse.name"
   124  	paxGNUSparseMajor     = "GNU.sparse.major"
   125  	paxGNUSparseMinor     = "GNU.sparse.minor"
   126  	paxGNUSparseSize      = "GNU.sparse.size"
   127  	paxGNUSparseRealSize  = "GNU.sparse.realsize"
   128  )
   129  
   130  // basicKeys is a set of the PAX keys for which we have built-in support.
   131  // This does not contain "charset" or "comment", which are both PAX-specific,
   132  // so adding them as first-class features of Header is unlikely.
   133  // Users can use the PAXRecords field to set it themselves.
   134  var basicKeys = map[string]bool{
   135  	paxPath: true, paxLinkpath: true, paxSize: true, paxUid: true, paxGid: true,
   136  	paxUname: true, paxGname: true, paxMtime: true, paxAtime: true, paxCtime: true,
   137  }
   138  
   139  // A Header represents a single header in a tar archive.
   140  // Some fields may not be populated.
   141  //
   142  // For forward compatibility, users that retrieve a Header from Reader.Next,
   143  // mutate it in some ways, and then pass it back to Writer.WriteHeader
   144  // should do so by creating a new Header and copying the fields
   145  // that they are interested in preserving.
   146  type Header struct {
   147  	// Typeflag is the type of header entry.
   148  	// The zero value is automatically promoted to either TypeReg or TypeDir
   149  	// depending on the presence of a trailing slash in Name.
   150  	Typeflag byte
   151  
   152  	Name     string // Name of file entry
   153  	Linkname string // Target name of link (valid for TypeLink or TypeSymlink)
   154  
   155  	Size  int64  // Logical file size in bytes
   156  	Mode  int64  // Permission and mode bits
   157  	Uid   int    // User ID of owner
   158  	Gid   int    // Group ID of owner
   159  	Uname string // User name of owner
   160  	Gname string // Group name of owner
   161  
   162  	// If the Format is unspecified, then Writer.WriteHeader rounds ModTime
   163  	// to the nearest second and ignores the AccessTime and ChangeTime fields.
   164  	//
   165  	// To use AccessTime or ChangeTime, specify the Format as PAX or GNU.
   166  	// To use sub-second resolution, specify the Format as PAX.
   167  	ModTime    time.Time // Modification time
   168  	AccessTime time.Time // Access time (requires either PAX or GNU support)
   169  	ChangeTime time.Time // Change time (requires either PAX or GNU support)
   170  
   171  	Devmajor int64 // Major device number (valid for TypeChar or TypeBlock)
   172  	Devminor int64 // Minor device number (valid for TypeChar or TypeBlock)
   173  
   174  	// Xattrs stores extended attributes as PAX records under the
   175  	// "SCHILY.xattr." namespace.
   176  	//
   177  	// The following are semantically equivalent:
   178  	//  h.Xattrs[key] = value
   179  	//  h.PAXRecords["SCHILY.xattr."+key] = value
   180  	//
   181  	// When Writer.WriteHeader is called, the contents of Xattrs will take
   182  	// precedence over those in PAXRecords.
   183  	//
   184  	// Deprecated: Use PAXRecords instead.
   185  	Xattrs map[string]string
   186  
   187  	// PAXRecords is a map of PAX extended header records.
   188  	//
   189  	// User-defined records should have keys of the following form:
   190  	//	VENDOR.keyword
   191  	// Where VENDOR is some namespace in all uppercase, and keyword may
   192  	// not contain the '=' character (e.g., "GOLANG.pkg.version").
   193  	// The key and value should be non-empty UTF-8 strings.
   194  	//
   195  	// When Writer.WriteHeader is called, PAX records derived from the
   196  	// other fields in Header take precedence over PAXRecords.
   197  	PAXRecords map[string]string
   198  
   199  	// Format specifies the format of the tar header.
   200  	//
   201  	// This is set by Reader.Next as a best-effort guess at the format.
   202  	// Since the Reader liberally reads some non-compliant files,
   203  	// it is possible for this to be FormatUnknown.
   204  	//
   205  	// If the format is unspecified when Writer.WriteHeader is called,
   206  	// then it uses the first format (in the order of USTAR, PAX, GNU)
   207  	// capable of encoding this Header (see Format).
   208  	Format Format
   209  }
   210  
   211  // sparseEntry represents a Length-sized fragment at Offset in the file.
   212  type sparseEntry struct{ Offset, Length int64 }
   213  
   214  func (s sparseEntry) endOffset() int64 { return s.Offset + s.Length }
   215  
   216  // A sparse file can be represented as either a sparseDatas or a sparseHoles.
   217  // As long as the total size is known, they are equivalent and one can be
   218  // converted to the other form and back. The various tar formats with sparse
   219  // file support represent sparse files in the sparseDatas form. That is, they
   220  // specify the fragments in the file that has data, and treat everything else as
   221  // having zero bytes. As such, the encoding and decoding logic in this package
   222  // deals with sparseDatas.
   223  //
   224  // However, the external API uses sparseHoles instead of sparseDatas because the
   225  // zero value of sparseHoles logically represents a normal file (i.e., there are
   226  // no holes in it). On the other hand, the zero value of sparseDatas implies
   227  // that the file has no data in it, which is rather odd.
   228  //
   229  // As an example, if the underlying raw file contains the 10-byte data:
   230  //
   231  //	var compactFile = "abcdefgh"
   232  //
   233  // And the sparse map has the following entries:
   234  //
   235  //	var spd sparseDatas = []sparseEntry{
   236  //		{Offset: 2,  Length: 5},  // Data fragment for 2..6
   237  //		{Offset: 18, Length: 3},  // Data fragment for 18..20
   238  //	}
   239  //	var sph sparseHoles = []sparseEntry{
   240  //		{Offset: 0,  Length: 2},  // Hole fragment for 0..1
   241  //		{Offset: 7,  Length: 11}, // Hole fragment for 7..17
   242  //		{Offset: 21, Length: 4},  // Hole fragment for 21..24
   243  //	}
   244  //
   245  // Then the content of the resulting sparse file with a Header.Size of 25 is:
   246  //
   247  //	var sparseFile = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
   248  type (
   249  	sparseDatas []sparseEntry
   250  	sparseHoles []sparseEntry
   251  )
   252  
   253  // validateSparseEntries reports whether sp is a valid sparse map.
   254  // It does not matter whether sp represents data fragments or hole fragments.
   255  func validateSparseEntries(sp []sparseEntry, size int64) bool {
   256  	// Validate all sparse entries. These are the same checks as performed by
   257  	// the BSD tar utility.
   258  	if size < 0 {
   259  		return false
   260  	}
   261  	var pre sparseEntry
   262  	for _, cur := range sp {
   263  		switch {
   264  		case cur.Offset < 0 || cur.Length < 0:
   265  			return false // Negative values are never okay
   266  		case cur.Offset > math.MaxInt64-cur.Length:
   267  			return false // Integer overflow with large length
   268  		case cur.endOffset() > size:
   269  			return false // Region extends beyond the actual size
   270  		case pre.endOffset() > cur.Offset:
   271  			return false // Regions cannot overlap and must be in order
   272  		}
   273  		pre = cur
   274  	}
   275  	return true
   276  }
   277  
   278  // alignSparseEntries mutates src and returns dst where each fragment's
   279  // starting offset is aligned up to the nearest block edge, and each
   280  // ending offset is aligned down to the nearest block edge.
   281  //
   282  // Even though the Go tar Reader and the BSD tar utility can handle entries
   283  // with arbitrary offsets and lengths, the GNU tar utility can only handle
   284  // offsets and lengths that are multiples of blockSize.
   285  func alignSparseEntries(src []sparseEntry, size int64) []sparseEntry {
   286  	dst := src[:0]
   287  	for _, s := range src {
   288  		pos, end := s.Offset, s.endOffset()
   289  		pos += blockPadding(+pos) // Round-up to nearest blockSize
   290  		if end != size {
   291  			end -= blockPadding(-end) // Round-down to nearest blockSize
   292  		}
   293  		if pos < end {
   294  			dst = append(dst, sparseEntry{Offset: pos, Length: end - pos})
   295  		}
   296  	}
   297  	return dst
   298  }
   299  
   300  // invertSparseEntries converts a sparse map from one form to the other.
   301  // If the input is sparseHoles, then it will output sparseDatas and vice-versa.
   302  // The input must have been already validated.
   303  //
   304  // This function mutates src and returns a normalized map where:
   305  //   - adjacent fragments are coalesced together
   306  //   - only the last fragment may be empty
   307  //   - the endOffset of the last fragment is the total size
   308  func invertSparseEntries(src []sparseEntry, size int64) []sparseEntry {
   309  	dst := src[:0]
   310  	var pre sparseEntry
   311  	for _, cur := range src {
   312  		if cur.Length == 0 {
   313  			continue // Skip empty fragments
   314  		}
   315  		pre.Length = cur.Offset - pre.Offset
   316  		if pre.Length > 0 {
   317  			dst = append(dst, pre) // Only add non-empty fragments
   318  		}
   319  		pre.Offset = cur.endOffset()
   320  	}
   321  	pre.Length = size - pre.Offset // Possibly the only empty fragment
   322  	return append(dst, pre)
   323  }
   324  
   325  // fileState tracks the number of logical (includes sparse holes) and physical
   326  // (actual in tar archive) bytes remaining for the current file.
   327  //
   328  // Invariant: logicalRemaining >= physicalRemaining
   329  type fileState interface {
   330  	logicalRemaining() int64
   331  	physicalRemaining() int64
   332  }
   333  
   334  // allowedFormats determines which formats can be used.
   335  // The value returned is the logical OR of multiple possible formats.
   336  // If the value is FormatUnknown, then the input Header cannot be encoded
   337  // and an error is returned explaining why.
   338  //
   339  // As a by-product of checking the fields, this function returns paxHdrs, which
   340  // contain all fields that could not be directly encoded.
   341  // A value receiver ensures that this method does not mutate the source Header.
   342  func (h Header) allowedFormats() (format Format, paxHdrs map[string]string, err error) {
   343  	format = FormatUSTAR | FormatPAX | FormatGNU
   344  	paxHdrs = make(map[string]string)
   345  
   346  	var whyNoUSTAR, whyNoPAX, whyNoGNU string
   347  	var preferPAX bool // Prefer PAX over USTAR
   348  	verifyString := func(s string, size int, name, paxKey string) {
   349  		// NUL-terminator is optional for path and linkpath.
   350  		// Technically, it is required for uname and gname,
   351  		// but neither GNU nor BSD tar checks for it.
   352  		tooLong := len(s) > size
   353  		allowLongGNU := paxKey == paxPath || paxKey == paxLinkpath
   354  		if hasNUL(s) || (tooLong && !allowLongGNU) {
   355  			whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%q", name, s)
   356  			format.mustNotBe(FormatGNU)
   357  		}
   358  		if !isASCII(s) || tooLong {
   359  			canSplitUSTAR := paxKey == paxPath
   360  			if _, _, ok := splitUSTARPath(s); !canSplitUSTAR || !ok {
   361  				whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%q", name, s)
   362  				format.mustNotBe(FormatUSTAR)
   363  			}
   364  			if paxKey == paxNone {
   365  				whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%q", name, s)
   366  				format.mustNotBe(FormatPAX)
   367  			} else {
   368  				paxHdrs[paxKey] = s
   369  			}
   370  		}
   371  		if v, ok := h.PAXRecords[paxKey]; ok && v == s {
   372  			paxHdrs[paxKey] = v
   373  		}
   374  	}
   375  	verifyNumeric := func(n int64, size int, name, paxKey string) {
   376  		if !fitsInBase256(size, n) {
   377  			whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%d", name, n)
   378  			format.mustNotBe(FormatGNU)
   379  		}
   380  		if !fitsInOctal(size, n) {
   381  			whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%d", name, n)
   382  			format.mustNotBe(FormatUSTAR)
   383  			if paxKey == paxNone {
   384  				whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%d", name, n)
   385  				format.mustNotBe(FormatPAX)
   386  			} else {
   387  				paxHdrs[paxKey] = strconv.FormatInt(n, 10)
   388  			}
   389  		}
   390  		if v, ok := h.PAXRecords[paxKey]; ok && v == strconv.FormatInt(n, 10) {
   391  			paxHdrs[paxKey] = v
   392  		}
   393  	}
   394  	verifyTime := func(ts time.Time, size int, name, paxKey string) {
   395  		if ts.IsZero() {
   396  			return // Always okay
   397  		}
   398  		if !fitsInBase256(size, ts.Unix()) {
   399  			whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%v", name, ts)
   400  			format.mustNotBe(FormatGNU)
   401  		}
   402  		isMtime := paxKey == paxMtime
   403  		fitsOctal := fitsInOctal(size, ts.Unix())
   404  		if (isMtime && !fitsOctal) || !isMtime {
   405  			whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%v", name, ts)
   406  			format.mustNotBe(FormatUSTAR)
   407  		}
   408  		needsNano := ts.Nanosecond() != 0
   409  		if !isMtime || !fitsOctal || needsNano {
   410  			preferPAX = true // USTAR may truncate sub-second measurements
   411  			if paxKey == paxNone {
   412  				whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%v", name, ts)
   413  				format.mustNotBe(FormatPAX)
   414  			} else {
   415  				paxHdrs[paxKey] = formatPAXTime(ts)
   416  			}
   417  		}
   418  		if v, ok := h.PAXRecords[paxKey]; ok && v == formatPAXTime(ts) {
   419  			paxHdrs[paxKey] = v
   420  		}
   421  	}
   422  
   423  	// Check basic fields.
   424  	var blk block
   425  	v7 := blk.toV7()
   426  	ustar := blk.toUSTAR()
   427  	gnu := blk.toGNU()
   428  	verifyString(h.Name, len(v7.name()), "Name", paxPath)
   429  	verifyString(h.Linkname, len(v7.linkName()), "Linkname", paxLinkpath)
   430  	verifyString(h.Uname, len(ustar.userName()), "Uname", paxUname)
   431  	verifyString(h.Gname, len(ustar.groupName()), "Gname", paxGname)
   432  	verifyNumeric(h.Mode, len(v7.mode()), "Mode", paxNone)
   433  	verifyNumeric(int64(h.Uid), len(v7.uid()), "Uid", paxUid)
   434  	verifyNumeric(int64(h.Gid), len(v7.gid()), "Gid", paxGid)
   435  	verifyNumeric(h.Size, len(v7.size()), "Size", paxSize)
   436  	verifyNumeric(h.Devmajor, len(ustar.devMajor()), "Devmajor", paxNone)
   437  	verifyNumeric(h.Devminor, len(ustar.devMinor()), "Devminor", paxNone)
   438  	verifyTime(h.ModTime, len(v7.modTime()), "ModTime", paxMtime)
   439  	verifyTime(h.AccessTime, len(gnu.accessTime()), "AccessTime", paxAtime)
   440  	verifyTime(h.ChangeTime, len(gnu.changeTime()), "ChangeTime", paxCtime)
   441  
   442  	// Check for header-only types.
   443  	var whyOnlyPAX, whyOnlyGNU string
   444  	switch h.Typeflag {
   445  	case TypeReg, TypeChar, TypeBlock, TypeFifo, TypeGNUSparse:
   446  		// Exclude TypeLink and TypeSymlink, since they may reference directories.
   447  		if strings.HasSuffix(h.Name, "/") {
   448  			return FormatUnknown, nil, headerError{"filename may not have trailing slash"}
   449  		}
   450  	case TypeXHeader, TypeGNULongName, TypeGNULongLink:
   451  		return FormatUnknown, nil, headerError{"cannot manually encode TypeXHeader, TypeGNULongName, or TypeGNULongLink headers"}
   452  	case TypeXGlobalHeader:
   453  		h2 := Header{Name: h.Name, Typeflag: h.Typeflag, Xattrs: h.Xattrs, PAXRecords: h.PAXRecords, Format: h.Format}
   454  		if !reflect.DeepEqual(h, h2) {
   455  			return FormatUnknown, nil, headerError{"only PAXRecords should be set for TypeXGlobalHeader"}
   456  		}
   457  		whyOnlyPAX = "only PAX supports TypeXGlobalHeader"
   458  		format.mayOnlyBe(FormatPAX)
   459  	}
   460  	if !isHeaderOnlyType(h.Typeflag) && h.Size < 0 {
   461  		return FormatUnknown, nil, headerError{"negative size on header-only type"}
   462  	}
   463  
   464  	// Check PAX records.
   465  	if len(h.Xattrs) > 0 {
   466  		for k, v := range h.Xattrs {
   467  			paxHdrs[paxSchilyXattr+k] = v
   468  		}
   469  		whyOnlyPAX = "only PAX supports Xattrs"
   470  		format.mayOnlyBe(FormatPAX)
   471  	}
   472  	if len(h.PAXRecords) > 0 {
   473  		for k, v := range h.PAXRecords {
   474  			switch _, exists := paxHdrs[k]; {
   475  			case exists:
   476  				continue // Do not overwrite existing records
   477  			case h.Typeflag == TypeXGlobalHeader:
   478  				paxHdrs[k] = v // Copy all records
   479  			case !basicKeys[k] && !strings.HasPrefix(k, paxGNUSparse):
   480  				paxHdrs[k] = v // Ignore local records that may conflict
   481  			}
   482  		}
   483  		whyOnlyPAX = "only PAX supports PAXRecords"
   484  		format.mayOnlyBe(FormatPAX)
   485  	}
   486  	for k, v := range paxHdrs {
   487  		if !validPAXRecord(k, v) {
   488  			return FormatUnknown, nil, headerError{fmt.Sprintf("invalid PAX record: %q", k+" = "+v)}
   489  		}
   490  	}
   491  
   492  	// TODO(dsnet): Re-enable this when adding sparse support.
   493  	// See https://golang.org/issue/22735
   494  	/*
   495  		// Check sparse files.
   496  		if len(h.SparseHoles) > 0 || h.Typeflag == TypeGNUSparse {
   497  			if isHeaderOnlyType(h.Typeflag) {
   498  				return FormatUnknown, nil, headerError{"header-only type cannot be sparse"}
   499  			}
   500  			if !validateSparseEntries(h.SparseHoles, h.Size) {
   501  				return FormatUnknown, nil, headerError{"invalid sparse holes"}
   502  			}
   503  			if h.Typeflag == TypeGNUSparse {
   504  				whyOnlyGNU = "only GNU supports TypeGNUSparse"
   505  				format.mayOnlyBe(FormatGNU)
   506  			} else {
   507  				whyNoGNU = "GNU supports sparse files only with TypeGNUSparse"
   508  				format.mustNotBe(FormatGNU)
   509  			}
   510  			whyNoUSTAR = "USTAR does not support sparse files"
   511  			format.mustNotBe(FormatUSTAR)
   512  		}
   513  	*/
   514  
   515  	// Check desired format.
   516  	if wantFormat := h.Format; wantFormat != FormatUnknown {
   517  		if wantFormat.has(FormatPAX) && !preferPAX {
   518  			wantFormat.mayBe(FormatUSTAR) // PAX implies USTAR allowed too
   519  		}
   520  		format.mayOnlyBe(wantFormat) // Set union of formats allowed and format wanted
   521  	}
   522  	if format == FormatUnknown {
   523  		switch h.Format {
   524  		case FormatUSTAR:
   525  			err = headerError{"Format specifies USTAR", whyNoUSTAR, whyOnlyPAX, whyOnlyGNU}
   526  		case FormatPAX:
   527  			err = headerError{"Format specifies PAX", whyNoPAX, whyOnlyGNU}
   528  		case FormatGNU:
   529  			err = headerError{"Format specifies GNU", whyNoGNU, whyOnlyPAX}
   530  		default:
   531  			err = headerError{whyNoUSTAR, whyNoPAX, whyNoGNU, whyOnlyPAX, whyOnlyGNU}
   532  		}
   533  	}
   534  	return format, paxHdrs, err
   535  }
   536  
   537  // FileInfo returns an fs.FileInfo for the Header.
   538  func (h *Header) FileInfo() fs.FileInfo {
   539  	return headerFileInfo{h}
   540  }
   541  
   542  // headerFileInfo implements fs.FileInfo.
   543  type headerFileInfo struct {
   544  	h *Header
   545  }
   546  
   547  func (fi headerFileInfo) Size() int64        { return fi.h.Size }
   548  func (fi headerFileInfo) IsDir() bool        { return fi.Mode().IsDir() }
   549  func (fi headerFileInfo) ModTime() time.Time { return fi.h.ModTime }
   550  func (fi headerFileInfo) Sys() any           { return fi.h }
   551  
   552  // Name returns the base name of the file.
   553  func (fi headerFileInfo) Name() string {
   554  	if fi.IsDir() {
   555  		return path.Base(path.Clean(fi.h.Name))
   556  	}
   557  	return path.Base(fi.h.Name)
   558  }
   559  
   560  // Mode returns the permission and mode bits for the headerFileInfo.
   561  func (fi headerFileInfo) Mode() (mode fs.FileMode) {
   562  	// Set file permission bits.
   563  	mode = fs.FileMode(fi.h.Mode).Perm()
   564  
   565  	// Set setuid, setgid and sticky bits.
   566  	if fi.h.Mode&c_ISUID != 0 {
   567  		mode |= fs.ModeSetuid
   568  	}
   569  	if fi.h.Mode&c_ISGID != 0 {
   570  		mode |= fs.ModeSetgid
   571  	}
   572  	if fi.h.Mode&c_ISVTX != 0 {
   573  		mode |= fs.ModeSticky
   574  	}
   575  
   576  	// Set file mode bits; clear perm, setuid, setgid, and sticky bits.
   577  	switch m := fs.FileMode(fi.h.Mode) &^ 07777; m {
   578  	case c_ISDIR:
   579  		mode |= fs.ModeDir
   580  	case c_ISFIFO:
   581  		mode |= fs.ModeNamedPipe
   582  	case c_ISLNK:
   583  		mode |= fs.ModeSymlink
   584  	case c_ISBLK:
   585  		mode |= fs.ModeDevice
   586  	case c_ISCHR:
   587  		mode |= fs.ModeDevice
   588  		mode |= fs.ModeCharDevice
   589  	case c_ISSOCK:
   590  		mode |= fs.ModeSocket
   591  	}
   592  
   593  	switch fi.h.Typeflag {
   594  	case TypeSymlink:
   595  		mode |= fs.ModeSymlink
   596  	case TypeChar:
   597  		mode |= fs.ModeDevice
   598  		mode |= fs.ModeCharDevice
   599  	case TypeBlock:
   600  		mode |= fs.ModeDevice
   601  	case TypeDir:
   602  		mode |= fs.ModeDir
   603  	case TypeFifo:
   604  		mode |= fs.ModeNamedPipe
   605  	}
   606  
   607  	return mode
   608  }
   609  
   610  func (fi headerFileInfo) String() string {
   611  	return fs.FormatFileInfo(fi)
   612  }
   613  
   614  // sysStat, if non-nil, populates h from system-dependent fields of fi.
   615  var sysStat func(fi fs.FileInfo, h *Header, doNameLookups bool) error
   616  
   617  const (
   618  	// Mode constants from the USTAR spec:
   619  	// See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06
   620  	c_ISUID = 04000 // Set uid
   621  	c_ISGID = 02000 // Set gid
   622  	c_ISVTX = 01000 // Save text (sticky bit)
   623  
   624  	// Common Unix mode constants; these are not defined in any common tar standard.
   625  	// Header.FileInfo understands these, but FileInfoHeader will never produce these.
   626  	c_ISDIR  = 040000  // Directory
   627  	c_ISFIFO = 010000  // FIFO
   628  	c_ISREG  = 0100000 // Regular file
   629  	c_ISLNK  = 0120000 // Symbolic link
   630  	c_ISBLK  = 060000  // Block special file
   631  	c_ISCHR  = 020000  // Character special file
   632  	c_ISSOCK = 0140000 // Socket
   633  )
   634  
   635  // FileInfoHeader creates a partially-populated [Header] from fi.
   636  // If fi describes a symlink, FileInfoHeader records link as the link target.
   637  // If fi describes a directory, a slash is appended to the name.
   638  //
   639  // Since fs.FileInfo's Name method only returns the base name of
   640  // the file it describes, it may be necessary to modify Header.Name
   641  // to provide the full path name of the file.
   642  //
   643  // If fi implements [FileInfoNames]
   644  // Header.Gname and Header.Uname
   645  // are provided by the methods of the interface.
   646  func FileInfoHeader(fi fs.FileInfo, link string) (*Header, error) {
   647  	if fi == nil {
   648  		return nil, errors.New("archive/tar: FileInfo is nil")
   649  	}
   650  	fm := fi.Mode()
   651  	h := &Header{
   652  		Name:    fi.Name(),
   653  		ModTime: fi.ModTime(),
   654  		Mode:    int64(fm.Perm()), // or'd with c_IS* constants later
   655  	}
   656  	switch {
   657  	case fm.IsRegular():
   658  		h.Typeflag = TypeReg
   659  		h.Size = fi.Size()
   660  	case fi.IsDir():
   661  		h.Typeflag = TypeDir
   662  		h.Name += "/"
   663  	case fm&fs.ModeSymlink != 0:
   664  		h.Typeflag = TypeSymlink
   665  		h.Linkname = link
   666  	case fm&fs.ModeDevice != 0:
   667  		if fm&fs.ModeCharDevice != 0 {
   668  			h.Typeflag = TypeChar
   669  		} else {
   670  			h.Typeflag = TypeBlock
   671  		}
   672  	case fm&fs.ModeNamedPipe != 0:
   673  		h.Typeflag = TypeFifo
   674  	case fm&fs.ModeSocket != 0:
   675  		return nil, fmt.Errorf("archive/tar: sockets not supported")
   676  	default:
   677  		return nil, fmt.Errorf("archive/tar: unknown file mode %v", fm)
   678  	}
   679  	if fm&fs.ModeSetuid != 0 {
   680  		h.Mode |= c_ISUID
   681  	}
   682  	if fm&fs.ModeSetgid != 0 {
   683  		h.Mode |= c_ISGID
   684  	}
   685  	if fm&fs.ModeSticky != 0 {
   686  		h.Mode |= c_ISVTX
   687  	}
   688  	// If possible, populate additional fields from OS-specific
   689  	// FileInfo fields.
   690  	if sys, ok := fi.Sys().(*Header); ok {
   691  		// This FileInfo came from a Header (not the OS). Use the
   692  		// original Header to populate all remaining fields.
   693  		h.Uid = sys.Uid
   694  		h.Gid = sys.Gid
   695  		h.Uname = sys.Uname
   696  		h.Gname = sys.Gname
   697  		h.AccessTime = sys.AccessTime
   698  		h.ChangeTime = sys.ChangeTime
   699  		if sys.Xattrs != nil {
   700  			h.Xattrs = make(map[string]string)
   701  			for k, v := range sys.Xattrs {
   702  				h.Xattrs[k] = v
   703  			}
   704  		}
   705  		if sys.Typeflag == TypeLink {
   706  			// hard link
   707  			h.Typeflag = TypeLink
   708  			h.Size = 0
   709  			h.Linkname = sys.Linkname
   710  		}
   711  		if sys.PAXRecords != nil {
   712  			h.PAXRecords = make(map[string]string)
   713  			for k, v := range sys.PAXRecords {
   714  				h.PAXRecords[k] = v
   715  			}
   716  		}
   717  	}
   718  	var doNameLookups = true
   719  	if iface, ok := fi.(FileInfoNames); ok {
   720  		doNameLookups = false
   721  		var err error
   722  		h.Gname, err = iface.Gname()
   723  		if err != nil {
   724  			return nil, err
   725  		}
   726  		h.Uname, err = iface.Uname()
   727  		if err != nil {
   728  			return nil, err
   729  		}
   730  	}
   731  	if sysStat != nil {
   732  		return h, sysStat(fi, h, doNameLookups)
   733  	}
   734  	return h, nil
   735  }
   736  
   737  // FileInfoNames extends [fs.FileInfo].
   738  // Passing an instance of this to [FileInfoHeader] permits the caller
   739  // to avoid a system-dependent name lookup by specifying the Uname and Gname directly.
   740  type FileInfoNames interface {
   741  	fs.FileInfo
   742  	// Uname should give a user name.
   743  	Uname() (string, error)
   744  	// Gname should give a group name.
   745  	Gname() (string, error)
   746  }
   747  
   748  // isHeaderOnlyType checks if the given type flag is of the type that has no
   749  // data section even if a size is specified.
   750  func isHeaderOnlyType(flag byte) bool {
   751  	switch flag {
   752  	case TypeLink, TypeSymlink, TypeChar, TypeBlock, TypeDir, TypeFifo:
   753  		return true
   754  	default:
   755  		return false
   756  	}
   757  }
   758  

View as plain text