Source file src/cmd/go/internal/modfetch/codehost/codehost.go

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package codehost defines the interface implemented by a code hosting source,
     6  // along with support code for use by implementations.
     7  package codehost
     8  
     9  import (
    10  	"bytes"
    11  	"context"
    12  	"crypto/sha256"
    13  	"fmt"
    14  	"io"
    15  	"io/fs"
    16  	"os"
    17  	"os/exec"
    18  	"path/filepath"
    19  	"strings"
    20  	"sync"
    21  	"time"
    22  
    23  	"cmd/go/internal/cfg"
    24  	"cmd/go/internal/lockedfile"
    25  	"cmd/go/internal/str"
    26  
    27  	"golang.org/x/mod/module"
    28  	"golang.org/x/mod/semver"
    29  )
    30  
    31  // Downloaded size limits.
    32  const (
    33  	MaxGoMod   = 16 << 20  // maximum size of go.mod file
    34  	MaxLICENSE = 16 << 20  // maximum size of LICENSE file
    35  	MaxZipFile = 500 << 20 // maximum size of downloaded zip file
    36  )
    37  
    38  // A Repo represents a code hosting source.
    39  // Typical implementations include local version control repositories,
    40  // remote version control servers, and code hosting sites.
    41  //
    42  // A Repo must be safe for simultaneous use by multiple goroutines,
    43  // and callers must not modify returned values, which may be cached and shared.
    44  type Repo interface {
    45  	// CheckReuse checks whether the old origin information
    46  	// remains up to date. If so, whatever cached object it was
    47  	// taken from can be reused.
    48  	// The subdir gives subdirectory name where the module root is expected to be found,
    49  	// "" for the root or "sub/dir" for a subdirectory (no trailing slash).
    50  	CheckReuse(ctx context.Context, old *Origin, subdir string) error
    51  
    52  	// Tags lists all tags with the given prefix.
    53  	Tags(ctx context.Context, prefix string) (*Tags, error)
    54  
    55  	// Stat returns information about the revision rev.
    56  	// A revision can be any identifier known to the underlying service:
    57  	// commit hash, branch, tag, and so on.
    58  	Stat(ctx context.Context, rev string) (*RevInfo, error)
    59  
    60  	// Latest returns the latest revision on the default branch,
    61  	// whatever that means in the underlying implementation.
    62  	Latest(ctx context.Context) (*RevInfo, error)
    63  
    64  	// ReadFile reads the given file in the file tree corresponding to revision rev.
    65  	// It should refuse to read more than maxSize bytes.
    66  	//
    67  	// If the requested file does not exist it should return an error for which
    68  	// os.IsNotExist(err) returns true.
    69  	ReadFile(ctx context.Context, rev, file string, maxSize int64) (data []byte, err error)
    70  
    71  	// ReadZip downloads a zip file for the subdir subdirectory
    72  	// of the given revision to a new file in a given temporary directory.
    73  	// It should refuse to read more than maxSize bytes.
    74  	// It returns a ReadCloser for a streamed copy of the zip file.
    75  	// All files in the zip file are expected to be
    76  	// nested in a single top-level directory, whose name is not specified.
    77  	ReadZip(ctx context.Context, rev, subdir string, maxSize int64) (zip io.ReadCloser, err error)
    78  
    79  	// RecentTag returns the most recent tag on rev or one of its predecessors
    80  	// with the given prefix. allowed may be used to filter out unwanted versions.
    81  	RecentTag(ctx context.Context, rev, prefix string, allowed func(tag string) bool) (tag string, err error)
    82  
    83  	// DescendsFrom reports whether rev or any of its ancestors has the given tag.
    84  	//
    85  	// DescendsFrom must return true for any tag returned by RecentTag for the
    86  	// same revision.
    87  	DescendsFrom(ctx context.Context, rev, tag string) (bool, error)
    88  }
    89  
    90  // An Origin describes the provenance of a given repo method result.
    91  // It can be passed to CheckReuse (usually in a different go command invocation)
    92  // to see whether the result remains up-to-date.
    93  type Origin struct {
    94  	VCS    string `json:",omitempty"` // "git" etc
    95  	URL    string `json:",omitempty"` // URL of repository
    96  	Subdir string `json:",omitempty"` // subdirectory in repo
    97  
    98  	Hash string `json:",omitempty"` // commit hash or ID
    99  
   100  	// If TagSum is non-empty, then the resolution of this module version
   101  	// depends on the set of tags present in the repo, specifically the tags
   102  	// of the form TagPrefix + a valid semver version.
   103  	// If the matching repo tags and their commit hashes still hash to TagSum,
   104  	// the Origin is still valid (at least as far as the tags are concerned).
   105  	// The exact checksum is up to the Repo implementation; see (*gitRepo).Tags.
   106  	TagPrefix string `json:",omitempty"`
   107  	TagSum    string `json:",omitempty"`
   108  
   109  	// If Ref is non-empty, then the resolution of this module version
   110  	// depends on Ref resolving to the revision identified by Hash.
   111  	// If Ref still resolves to Hash, the Origin is still valid (at least as far as Ref is concerned).
   112  	// For Git, the Ref is a full ref like "refs/heads/main" or "refs/tags/v1.2.3",
   113  	// and the Hash is the Git object hash the ref maps to.
   114  	// Other VCS might choose differently, but the idea is that Ref is the name
   115  	// with a mutable meaning while Hash is a name with an immutable meaning.
   116  	Ref string `json:",omitempty"`
   117  
   118  	// If RepoSum is non-empty, then the resolution of this module version
   119  	// depends on the entire state of the repo, which RepoSum summarizes.
   120  	// For Git, this is a hash of all the refs and their hashes, and the RepoSum
   121  	// is only needed for module versions that don't exist.
   122  	// For Mercurial, this is a hash of all the branches and their heads' hashes,
   123  	// since the set of available tags is dervied from .hgtags files in those branches,
   124  	// and the RepoSum is used for all module versions, available and not,
   125  	RepoSum string `json:",omitempty"`
   126  }
   127  
   128  // A Tags describes the available tags in a code repository.
   129  type Tags struct {
   130  	Origin *Origin
   131  	List   []Tag
   132  }
   133  
   134  // A Tag describes a single tag in a code repository.
   135  type Tag struct {
   136  	Name string
   137  	Hash string // content hash identifying tag's content, if available
   138  }
   139  
   140  // isOriginTag reports whether tag should be preserved
   141  // in the Tags method's Origin calculation.
   142  // We can safely ignore tags that are not look like pseudo-versions,
   143  // because ../coderepo.go's (*codeRepo).Versions ignores them too.
   144  // We can also ignore non-semver tags, but we have to include semver
   145  // tags with extra suffixes, because the pseudo-version base finder uses them.
   146  func isOriginTag(tag string) bool {
   147  	// modfetch.(*codeRepo).Versions uses Canonical == tag,
   148  	// but pseudo-version calculation has a weaker condition that
   149  	// the canonical is a prefix of the tag.
   150  	// Include those too, so that if any new one appears, we'll invalidate the cache entry.
   151  	// This will lead to spurious invalidation of version list results,
   152  	// but tags of this form being created should be fairly rare
   153  	// (and invalidate pseudo-version results anyway).
   154  	c := semver.Canonical(tag)
   155  	return c != "" && strings.HasPrefix(tag, c) && !module.IsPseudoVersion(tag)
   156  }
   157  
   158  // A RevInfo describes a single revision in a source code repository.
   159  type RevInfo struct {
   160  	Origin  *Origin
   161  	Name    string    // complete ID in underlying repository
   162  	Short   string    // shortened ID, for use in pseudo-version
   163  	Version string    // version used in lookup
   164  	Time    time.Time // commit time
   165  	Tags    []string  // known tags for commit
   166  }
   167  
   168  // UnknownRevisionError is an error equivalent to fs.ErrNotExist, but for a
   169  // revision rather than a file.
   170  type UnknownRevisionError struct {
   171  	Rev string
   172  }
   173  
   174  func (e *UnknownRevisionError) Error() string {
   175  	return "unknown revision " + e.Rev
   176  }
   177  func (UnknownRevisionError) Is(err error) bool {
   178  	return err == fs.ErrNotExist
   179  }
   180  
   181  // ErrNoCommits is an error equivalent to fs.ErrNotExist indicating that a given
   182  // repository or module contains no commits.
   183  var ErrNoCommits error = noCommitsError{}
   184  
   185  type noCommitsError struct{}
   186  
   187  func (noCommitsError) Error() string {
   188  	return "no commits"
   189  }
   190  func (noCommitsError) Is(err error) bool {
   191  	return err == fs.ErrNotExist
   192  }
   193  
   194  // AllHex reports whether the revision rev is entirely lower-case hexadecimal digits.
   195  func AllHex(rev string) bool {
   196  	for i := 0; i < len(rev); i++ {
   197  		c := rev[i]
   198  		if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' {
   199  			continue
   200  		}
   201  		return false
   202  	}
   203  	return true
   204  }
   205  
   206  // ShortenSHA1 shortens a SHA1 hash (40 hex digits) to the canonical length
   207  // used in pseudo-versions (12 hex digits).
   208  func ShortenSHA1(rev string) string {
   209  	if AllHex(rev) && len(rev) == 40 {
   210  		return rev[:12]
   211  	}
   212  	return rev
   213  }
   214  
   215  // WorkDir returns the name of the cached work directory to use for the
   216  // given repository type and name.
   217  func WorkDir(ctx context.Context, typ, name string) (dir, lockfile string, err error) {
   218  	if cfg.GOMODCACHE == "" {
   219  		return "", "", fmt.Errorf("neither GOPATH nor GOMODCACHE are set")
   220  	}
   221  
   222  	// We name the work directory for the SHA256 hash of the type and name.
   223  	// We intentionally avoid the actual name both because of possible
   224  	// conflicts with valid file system paths and because we want to ensure
   225  	// that one checkout is never nested inside another. That nesting has
   226  	// led to security problems in the past.
   227  	if strings.Contains(typ, ":") {
   228  		return "", "", fmt.Errorf("codehost.WorkDir: type cannot contain colon")
   229  	}
   230  	key := typ + ":" + name
   231  	dir = filepath.Join(cfg.GOMODCACHE, "cache/vcs", fmt.Sprintf("%x", sha256.Sum256([]byte(key))))
   232  
   233  	xLog, buildX := cfg.BuildXWriter(ctx)
   234  	if buildX {
   235  		fmt.Fprintf(xLog, "mkdir -p %s # %s %s\n", filepath.Dir(dir), typ, name)
   236  	}
   237  	if err := os.MkdirAll(filepath.Dir(dir), 0777); err != nil {
   238  		return "", "", err
   239  	}
   240  
   241  	lockfile = dir + ".lock"
   242  	if buildX {
   243  		fmt.Fprintf(xLog, "# lock %s\n", lockfile)
   244  	}
   245  
   246  	unlock, err := lockedfile.MutexAt(lockfile).Lock()
   247  	if err != nil {
   248  		return "", "", fmt.Errorf("codehost.WorkDir: can't find or create lock file: %v", err)
   249  	}
   250  	defer unlock()
   251  
   252  	data, err := os.ReadFile(dir + ".info")
   253  	info, err2 := os.Stat(dir)
   254  	if err == nil && err2 == nil && info.IsDir() {
   255  		// Info file and directory both already exist: reuse.
   256  		have := strings.TrimSuffix(string(data), "\n")
   257  		if have != key {
   258  			return "", "", fmt.Errorf("%s exists with wrong content (have %q want %q)", dir+".info", have, key)
   259  		}
   260  		if buildX {
   261  			fmt.Fprintf(xLog, "# %s for %s %s\n", dir, typ, name)
   262  		}
   263  		return dir, lockfile, nil
   264  	}
   265  
   266  	// Info file or directory missing. Start from scratch.
   267  	if xLog != nil {
   268  		fmt.Fprintf(xLog, "mkdir -p %s # %s %s\n", dir, typ, name)
   269  	}
   270  	os.RemoveAll(dir)
   271  	if err := os.MkdirAll(dir, 0777); err != nil {
   272  		return "", "", err
   273  	}
   274  	if err := os.WriteFile(dir+".info", []byte(key), 0666); err != nil {
   275  		os.RemoveAll(dir)
   276  		return "", "", err
   277  	}
   278  	return dir, lockfile, nil
   279  }
   280  
   281  type RunError struct {
   282  	Cmd      string
   283  	Err      error
   284  	Stderr   []byte
   285  	HelpText string
   286  }
   287  
   288  func (e *RunError) Error() string {
   289  	text := e.Cmd + ": " + e.Err.Error()
   290  	stderr := bytes.TrimRight(e.Stderr, "\n")
   291  	if len(stderr) > 0 {
   292  		text += ":\n\t" + strings.ReplaceAll(string(stderr), "\n", "\n\t")
   293  	}
   294  	if len(e.HelpText) > 0 {
   295  		text += "\n" + e.HelpText
   296  	}
   297  	return text
   298  }
   299  
   300  var dirLock sync.Map
   301  
   302  type RunArgs struct {
   303  	cmdline []any    // the command to run
   304  	dir     string   // the directory to run the command in
   305  	local   bool     // true if the VCS information is local
   306  	env     []string // environment variables for the command
   307  	stdin   io.Reader
   308  }
   309  
   310  // Run runs the command line in the given directory
   311  // (an empty dir means the current directory).
   312  // It returns the standard output and, for a non-zero exit,
   313  // a *RunError indicating the command, exit status, and standard error.
   314  // Standard error is unavailable for commands that exit successfully.
   315  func Run(ctx context.Context, dir string, cmdline ...any) ([]byte, error) {
   316  	return run(ctx, RunArgs{cmdline: cmdline, dir: dir})
   317  }
   318  
   319  // RunWithArgs is the same as Run but it also accepts additional arguments.
   320  func RunWithArgs(ctx context.Context, args RunArgs) ([]byte, error) {
   321  	return run(ctx, args)
   322  }
   323  
   324  // bashQuoter escapes characters that have special meaning in double-quoted strings in the bash shell.
   325  // See https://www.gnu.org/software/bash/manual/html_node/Double-Quotes.html.
   326  var bashQuoter = strings.NewReplacer(`"`, `\"`, `$`, `\$`, "`", "\\`", `\`, `\\`)
   327  
   328  func run(ctx context.Context, args RunArgs) ([]byte, error) {
   329  	if args.dir != "" {
   330  		muIface, ok := dirLock.Load(args.dir)
   331  		if !ok {
   332  			muIface, _ = dirLock.LoadOrStore(args.dir, new(sync.Mutex))
   333  		}
   334  		mu := muIface.(*sync.Mutex)
   335  		mu.Lock()
   336  		defer mu.Unlock()
   337  	}
   338  
   339  	cmd := str.StringList(args.cmdline...)
   340  	if os.Getenv("TESTGOVCSREMOTE") == "panic" && !args.local {
   341  		panic(fmt.Sprintf("use of remote vcs: %v", cmd))
   342  	}
   343  	if xLog, ok := cfg.BuildXWriter(ctx); ok {
   344  		text := new(strings.Builder)
   345  		if args.dir != "" {
   346  			text.WriteString("cd ")
   347  			text.WriteString(args.dir)
   348  			text.WriteString("; ")
   349  		}
   350  		for i, arg := range cmd {
   351  			if i > 0 {
   352  				text.WriteByte(' ')
   353  			}
   354  			switch {
   355  			case strings.ContainsAny(arg, "'"):
   356  				// Quote args that could be mistaken for quoted args.
   357  				text.WriteByte('"')
   358  				text.WriteString(bashQuoter.Replace(arg))
   359  				text.WriteByte('"')
   360  			case strings.ContainsAny(arg, "$`\\*?[\"\t\n\v\f\r \u0085\u00a0"):
   361  				// Quote args that contain special characters, glob patterns, or spaces.
   362  				text.WriteByte('\'')
   363  				text.WriteString(arg)
   364  				text.WriteByte('\'')
   365  			default:
   366  				text.WriteString(arg)
   367  			}
   368  		}
   369  		fmt.Fprintf(xLog, "%s\n", text)
   370  		start := time.Now()
   371  		defer func() {
   372  			fmt.Fprintf(xLog, "%.3fs # %s\n", time.Since(start).Seconds(), text)
   373  		}()
   374  	}
   375  	// TODO: Impose limits on command output size.
   376  	// TODO: Set environment to get English error messages.
   377  	var stderr bytes.Buffer
   378  	var stdout bytes.Buffer
   379  	c := exec.CommandContext(ctx, cmd[0], cmd[1:]...)
   380  	c.Cancel = func() error { return c.Process.Signal(os.Interrupt) }
   381  	c.Dir = args.dir
   382  	c.Stdin = args.stdin
   383  	c.Stderr = &stderr
   384  	c.Stdout = &stdout
   385  	c.Env = append(c.Environ(), args.env...)
   386  	err := c.Run()
   387  	if err != nil {
   388  		err = &RunError{Cmd: strings.Join(cmd, " ") + " in " + args.dir, Stderr: stderr.Bytes(), Err: err}
   389  	}
   390  	return stdout.Bytes(), err
   391  }
   392  

View as plain text