conn, device, tun: implement vectorized I/O plumbing

Accept packet vectors for reading and writing in the tun.Device and
conn.Bind interfaces, so that the internal plumbing between these
interfaces now passes a vector of packets. Vectors move untouched
between these interfaces, i.e. if 128 packets are received from
conn.Bind.Read(), 128 packets are passed to tun.Device.Write(). There is
no internal buffering.

Currently, existing implementations are only adjusted to have vectors
of length one. Subsequent patches will improve that.

Also, as a related fixup, use the unix and windows packages rather than
the syscall package when possible.

Co-authored-by: James Tucker <james@tailscale.com>
Signed-off-by: James Tucker <james@tailscale.com>
Signed-off-by: Jordan Whited <jordan@tailscale.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
This commit is contained in:
Jordan Whited
2023-03-02 14:48:02 -08:00
committed by Jason A. Donenfeld
parent 21636207a6
commit 3bb8fec7e4
25 changed files with 1046 additions and 514 deletions

View File

@@ -8,6 +8,7 @@ package tun
import (
"errors"
"fmt"
"io"
"net"
"os"
"sync"
@@ -15,7 +16,6 @@ import (
"time"
"unsafe"
"golang.org/x/net/ipv6"
"golang.org/x/sys/unix"
)
@@ -33,7 +33,7 @@ type NativeTun struct {
func retryInterfaceByIndex(index int) (iface *net.Interface, err error) {
for i := 0; i < 20; i++ {
iface, err = net.InterfaceByIndex(index)
if err != nil && errors.Is(err, syscall.ENOMEM) {
if err != nil && errors.Is(err, unix.ENOMEM) {
time.Sleep(time.Duration(i) * time.Second / 3)
continue
}
@@ -55,7 +55,7 @@ func (tun *NativeTun) routineRouteListener(tunIfindex int) {
retry:
n, err := unix.Read(tun.routeSocket, data)
if err != nil {
if errno, ok := err.(syscall.Errno); ok && errno == syscall.EINTR {
if errno, ok := err.(unix.Errno); ok && errno == unix.EINTR {
goto retry
}
tun.errors <- err
@@ -217,45 +217,46 @@ func (tun *NativeTun) Events() <-chan Event {
return tun.events
}
func (tun *NativeTun) Read(buff []byte, offset int) (int, error) {
func (tun *NativeTun) Read(buffs [][]byte, sizes []int, offset int) (int, error) {
// TODO: the BSDs look very similar in Read() and Write(). They should be
// collapsed, with platform-specific files containing the varying parts of
// their implementations.
select {
case err := <-tun.errors:
return 0, err
default:
buff := buff[offset-4:]
buff := buffs[0][offset-4:]
n, err := tun.tunFile.Read(buff[:])
if n < 4 {
return 0, err
}
return n - 4, err
sizes[0] = n - 4
return 1, err
}
}
func (tun *NativeTun) Write(buff []byte, offset int) (int, error) {
// reserve space for header
buff = buff[offset-4:]
// add packet information header
buff[0] = 0x00
buff[1] = 0x00
buff[2] = 0x00
if buff[4]>>4 == ipv6.Version {
buff[3] = unix.AF_INET6
} else {
buff[3] = unix.AF_INET
func (tun *NativeTun) Write(buffs [][]byte, offset int) (int, error) {
if offset < 4 {
return 0, io.ErrShortBuffer
}
// write
return tun.tunFile.Write(buff)
}
func (tun *NativeTun) Flush() error {
// TODO: can flushing be implemented by buffering and using sendmmsg?
return nil
for i, buf := range buffs {
buf = buf[offset-4:]
buf[0] = 0x00
buf[1] = 0x00
buf[2] = 0x00
switch buf[4] >> 4 {
case 4:
buf[3] = unix.AF_INET
case 6:
buf[3] = unix.AF_INET6
default:
return i, unix.EAFNOSUPPORT
}
if _, err := tun.tunFile.Write(buf); err != nil {
return i, err
}
}
return len(buffs), nil
}
func (tun *NativeTun) Close() error {
@@ -318,6 +319,10 @@ func (tun *NativeTun) MTU() (int, error) {
return int(ifr.MTU), nil
}
func (tun *NativeTun) BatchSize() int {
return 1
}
func socketCloexec(family, sotype, proto int) (fd int, err error) {
// See go/src/net/sys_cloexec.go for background.
syscall.ForkLock.RLock()