Source file src/os/zero_copy_linux.go

     1  // Copyright 2020 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package os
     6  
     7  import (
     8  	"internal/poll"
     9  	"io"
    10  	"syscall"
    11  )
    12  
    13  var (
    14  	pollCopyFileRange = poll.CopyFileRange
    15  	pollSplice        = poll.Splice
    16  )
    17  
    18  // wrapSyscallError takes an error and a syscall name. If the error is
    19  // a syscall.Errno, it wraps it in an os.SyscallError using the syscall name.
    20  func wrapSyscallError(name string, err error) error {
    21  	if _, ok := err.(syscall.Errno); ok {
    22  		err = NewSyscallError(name, err)
    23  	}
    24  	return err
    25  }
    26  
    27  func (f *File) writeTo(w io.Writer) (written int64, handled bool, err error) {
    28  	pfd, network := getPollFDAndNetwork(w)
    29  	// TODO(panjf2000): same as File.spliceToFile.
    30  	if pfd == nil || !pfd.IsStream || !isUnixOrTCP(string(network)) {
    31  		return
    32  	}
    33  
    34  	sc, err := f.SyscallConn()
    35  	if err != nil {
    36  		return
    37  	}
    38  
    39  	rerr := sc.Read(func(fd uintptr) (done bool) {
    40  		written, err, handled = poll.SendFile(pfd, int(fd), 1<<63-1)
    41  		return true
    42  	})
    43  
    44  	if err == nil {
    45  		err = rerr
    46  	}
    47  
    48  	return written, handled, wrapSyscallError("sendfile", err)
    49  }
    50  
    51  func (f *File) readFrom(r io.Reader) (written int64, handled bool, err error) {
    52  	// Neither copy_file_range(2) nor splice(2) supports destinations opened with
    53  	// O_APPEND, so don't bother to try zero-copy with these system calls.
    54  	//
    55  	// Visit https://man7.org/linux/man-pages/man2/copy_file_range.2.html#ERRORS and
    56  	// https://man7.org/linux/man-pages/man2/splice.2.html#ERRORS for details.
    57  	if f.appendMode {
    58  		return 0, false, nil
    59  	}
    60  
    61  	written, handled, err = f.copyFileRange(r)
    62  	if handled {
    63  		return
    64  	}
    65  	return f.spliceToFile(r)
    66  }
    67  
    68  func (f *File) spliceToFile(r io.Reader) (written int64, handled bool, err error) {
    69  	var (
    70  		remain int64
    71  		lr     *io.LimitedReader
    72  	)
    73  	if lr, r, remain = tryLimitedReader(r); remain <= 0 {
    74  		return 0, true, nil
    75  	}
    76  
    77  	pfd, _ := getPollFDAndNetwork(r)
    78  	// TODO(panjf2000): run some tests to see if we should unlock the non-streams for splice.
    79  	// Streams benefit the most from the splice(2), non-streams are not even supported in old kernels
    80  	// where splice(2) will just return EINVAL; newer kernels support non-streams like UDP, but I really
    81  	// doubt that splice(2) could help non-streams, cuz they usually send small frames respectively
    82  	// and one splice call would result in one frame.
    83  	// splice(2) is suitable for large data but the generation of fragments defeats its edge here.
    84  	// Therefore, don't bother to try splice if the r is not a streaming descriptor.
    85  	if pfd == nil || !pfd.IsStream {
    86  		return
    87  	}
    88  
    89  	written, handled, err = pollSplice(&f.pfd, pfd, remain)
    90  
    91  	if lr != nil {
    92  		lr.N = remain - written
    93  	}
    94  
    95  	return written, handled, wrapSyscallError("splice", err)
    96  }
    97  
    98  func (f *File) copyFileRange(r io.Reader) (written int64, handled bool, err error) {
    99  	var (
   100  		remain int64
   101  		lr     *io.LimitedReader
   102  	)
   103  	if lr, r, remain = tryLimitedReader(r); remain <= 0 {
   104  		return 0, true, nil
   105  	}
   106  
   107  	var src *File
   108  	switch v := r.(type) {
   109  	case *File:
   110  		src = v
   111  	case fileWithoutWriteTo:
   112  		src = v.File
   113  	default:
   114  		return 0, false, nil
   115  	}
   116  
   117  	if src.checkValid("ReadFrom") != nil {
   118  		// Avoid returning the error as we report handled as false,
   119  		// leave further error handling as the responsibility of the caller.
   120  		return 0, false, nil
   121  	}
   122  
   123  	written, handled, err = pollCopyFileRange(&f.pfd, &src.pfd, remain)
   124  	if lr != nil {
   125  		lr.N -= written
   126  	}
   127  	return written, handled, wrapSyscallError("copy_file_range", err)
   128  }
   129  
   130  // getPollFDAndNetwork tries to get the poll.FD and network type from the given interface
   131  // by expecting the underlying type of i to be the implementation of syscall.Conn
   132  // that contains a *net.rawConn.
   133  func getPollFDAndNetwork(i any) (*poll.FD, poll.String) {
   134  	sc, ok := i.(syscall.Conn)
   135  	if !ok {
   136  		return nil, ""
   137  	}
   138  	rc, err := sc.SyscallConn()
   139  	if err != nil {
   140  		return nil, ""
   141  	}
   142  	irc, ok := rc.(interface {
   143  		PollFD() *poll.FD
   144  		Network() poll.String
   145  	})
   146  	if !ok {
   147  		return nil, ""
   148  	}
   149  	return irc.PollFD(), irc.Network()
   150  }
   151  
   152  // tryLimitedReader tries to assert the io.Reader to io.LimitedReader, it returns the io.LimitedReader,
   153  // the underlying io.Reader and the remaining amount of bytes if the assertion succeeds,
   154  // otherwise it just returns the original io.Reader and the theoretical unlimited remaining amount of bytes.
   155  func tryLimitedReader(r io.Reader) (*io.LimitedReader, io.Reader, int64) {
   156  	var remain int64 = 1<<63 - 1 // by default, copy until EOF
   157  
   158  	lr, ok := r.(*io.LimitedReader)
   159  	if !ok {
   160  		return nil, r, remain
   161  	}
   162  
   163  	remain = lr.N
   164  	return lr, lr.R, remain
   165  }
   166  
   167  func isUnixOrTCP(network string) bool {
   168  	switch network {
   169  	case "tcp", "tcp4", "tcp6", "unix":
   170  		return true
   171  	default:
   172  		return false
   173  	}
   174  }
   175  

View as plain text