Source file src/os/pidfd_linux.go

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Support for pidfd was added during the course of a few Linux releases:
     6  //  v5.1: pidfd_send_signal syscall;
     7  //  v5.2: CLONE_PIDFD flag for clone syscall;
     8  //  v5.3: pidfd_open syscall, clone3 syscall;
     9  //  v5.4: P_PIDFD idtype support for waitid syscall;
    10  //  v5.6: pidfd_getfd syscall.
    11  //
    12  // N.B. Alternative Linux implementations may not follow this ordering. e.g.,
    13  // QEMU user mode 7.2 added pidfd_open, but CLONE_PIDFD was not added until
    14  // 8.0.
    15  
    16  package os
    17  
    18  import (
    19  	"internal/syscall/unix"
    20  	"runtime"
    21  	"sync"
    22  	"syscall"
    23  	_ "unsafe" // for linkname
    24  )
    25  
    26  // ensurePidfd initializes the PidFD field in sysAttr if it is not already set.
    27  // It returns the original or modified SysProcAttr struct and a flag indicating
    28  // whether the PidFD should be duplicated before using.
    29  func ensurePidfd(sysAttr *syscall.SysProcAttr) (*syscall.SysProcAttr, bool) {
    30  	if !pidfdWorks() {
    31  		return sysAttr, false
    32  	}
    33  
    34  	var pidfd int
    35  
    36  	if sysAttr == nil {
    37  		return &syscall.SysProcAttr{
    38  			PidFD: &pidfd,
    39  		}, false
    40  	}
    41  	if sysAttr.PidFD == nil {
    42  		newSys := *sysAttr // copy
    43  		newSys.PidFD = &pidfd
    44  		return &newSys, false
    45  	}
    46  
    47  	return sysAttr, true
    48  }
    49  
    50  // getPidfd returns the value of sysAttr.PidFD (or its duplicate if needDup is
    51  // set) and a flag indicating whether the value can be used.
    52  func getPidfd(sysAttr *syscall.SysProcAttr, needDup bool) (uintptr, bool) {
    53  	if !pidfdWorks() {
    54  		return 0, false
    55  	}
    56  
    57  	h := *sysAttr.PidFD
    58  	if needDup {
    59  		dupH, e := unix.Fcntl(h, syscall.F_DUPFD_CLOEXEC, 0)
    60  		if e != nil {
    61  			return 0, false
    62  		}
    63  		h = dupH
    64  	}
    65  	return uintptr(h), true
    66  }
    67  
    68  // pidfdFind returns the process handle for pid.
    69  func pidfdFind(pid int) (uintptr, error) {
    70  	if !pidfdWorks() {
    71  		return 0, syscall.ENOSYS
    72  	}
    73  
    74  	h, err := unix.PidFDOpen(pid, 0)
    75  	if err != nil {
    76  		return 0, convertESRCH(err)
    77  	}
    78  	return h, nil
    79  }
    80  
    81  // pidfdWait waits for the process to complete,
    82  // and updates the process status to done.
    83  func (p *Process) pidfdWait() (*ProcessState, error) {
    84  	// When pidfd is used, there is no wait/kill race (described in CL 23967)
    85  	// because the PID recycle issue doesn't exist (IOW, pidfd, unlike PID,
    86  	// is guaranteed to refer to one particular process). Thus, there is no
    87  	// need for the workaround (blockUntilWaitable + sigMu) from pidWait.
    88  	//
    89  	// We _do_ need to be careful about reuse of the pidfd FD number when
    90  	// closing the pidfd. See handle for more details.
    91  	handle, status := p.handleTransientAcquire()
    92  	switch status {
    93  	case statusDone:
    94  		// Process already completed Wait, or was not found by
    95  		// pidfdFind. Return ECHILD for consistency with what the wait
    96  		// syscall would return.
    97  		return nil, NewSyscallError("wait", syscall.ECHILD)
    98  	case statusReleased:
    99  		return nil, syscall.EINVAL
   100  	}
   101  	defer p.handleTransientRelease()
   102  
   103  	var (
   104  		info   unix.SiginfoChild
   105  		rusage syscall.Rusage
   106  	)
   107  	err := ignoringEINTR(func() error {
   108  		return unix.Waitid(unix.P_PIDFD, int(handle), &info, syscall.WEXITED, &rusage)
   109  	})
   110  	if err != nil {
   111  		return nil, NewSyscallError("waitid", err)
   112  	}
   113  
   114  	// Update the Process status to statusDone.
   115  	// This also releases a reference to the handle.
   116  	p.doRelease(statusDone)
   117  
   118  	return &ProcessState{
   119  		pid:    int(info.Pid),
   120  		status: info.WaitStatus(),
   121  		rusage: &rusage,
   122  	}, nil
   123  }
   124  
   125  // pidfdSendSignal sends a signal to the process.
   126  func (p *Process) pidfdSendSignal(s syscall.Signal) error {
   127  	handle, status := p.handleTransientAcquire()
   128  	switch status {
   129  	case statusDone:
   130  		return ErrProcessDone
   131  	case statusReleased:
   132  		return errProcessReleased
   133  	}
   134  	defer p.handleTransientRelease()
   135  
   136  	return convertESRCH(unix.PidFDSendSignal(handle, s))
   137  }
   138  
   139  // pidfdWorks returns whether we can use pidfd on this system.
   140  func pidfdWorks() bool {
   141  	return checkPidfdOnce() == nil
   142  }
   143  
   144  // checkPidfdOnce is used to only check whether pidfd works once.
   145  var checkPidfdOnce = sync.OnceValue(checkPidfd)
   146  
   147  // checkPidfd checks whether all required pidfd-related syscalls work. This
   148  // consists of pidfd_open and pidfd_send_signal syscalls, waitid syscall with
   149  // idtype of P_PIDFD, and clone(CLONE_PIDFD).
   150  //
   151  // Reasons for non-working pidfd syscalls include an older kernel and an
   152  // execution environment in which the above system calls are restricted by
   153  // seccomp or a similar technology.
   154  func checkPidfd() error {
   155  	// In Android version < 12, pidfd-related system calls are not allowed
   156  	// by seccomp and trigger the SIGSYS signal. See issue #69065.
   157  	if runtime.GOOS == "android" {
   158  		ignoreSIGSYS()
   159  		defer restoreSIGSYS()
   160  	}
   161  
   162  	// Get a pidfd of the current process (opening of "/proc/self" won't
   163  	// work for waitid).
   164  	fd, err := unix.PidFDOpen(syscall.Getpid(), 0)
   165  	if err != nil {
   166  		return NewSyscallError("pidfd_open", err)
   167  	}
   168  	defer syscall.Close(int(fd))
   169  
   170  	// Check waitid(P_PIDFD) works.
   171  	err = ignoringEINTR(func() error {
   172  		var info unix.SiginfoChild
   173  		// We don't actually care about the info, but passing a nil pointer
   174  		// makes valgrind complain because 0x0 is unaddressable.
   175  		return unix.Waitid(unix.P_PIDFD, int(fd), &info, syscall.WEXITED, nil)
   176  	})
   177  	// Expect ECHILD from waitid since we're not our own parent.
   178  	if err != syscall.ECHILD {
   179  		return NewSyscallError("pidfd_wait", err)
   180  	}
   181  
   182  	// Check pidfd_send_signal works (should be able to send 0 to itself).
   183  	if err := unix.PidFDSendSignal(fd, 0); err != nil {
   184  		return NewSyscallError("pidfd_send_signal", err)
   185  	}
   186  
   187  	// Verify that clone(CLONE_PIDFD) works.
   188  	//
   189  	// This shouldn't be necessary since pidfd_open was added in Linux 5.3,
   190  	// after CLONE_PIDFD in Linux 5.2, but some alternative Linux
   191  	// implementations may not adhere to this ordering.
   192  	if err := checkClonePidfd(); err != nil {
   193  		return err
   194  	}
   195  
   196  	return nil
   197  }
   198  
   199  // Provided by syscall.
   200  //
   201  //go:linkname checkClonePidfd
   202  func checkClonePidfd() error
   203  
   204  // Provided by runtime.
   205  //
   206  //go:linkname ignoreSIGSYS
   207  func ignoreSIGSYS()
   208  
   209  //go:linkname restoreSIGSYS
   210  func restoreSIGSYS()
   211  

View as plain text