Compare commits

...

14 Commits

Author SHA1 Message Date
Val
0a6ddae0de tmp: introduce wire/user/safe mtu 2023-08-07 23:05:24 +02:00
Val
7d18398d7f tmp: more callmemaybe debugging 2023-08-07 21:04:10 +02:00
Val
01ed896b4d tmp: add both kinds of mtu to printout of probe 2023-08-04 11:02:07 +02:00
Val
c3fac3f6a5 tmp: cut down on probe mtus for debugging 2023-08-03 18:03:26 +02:00
Val
bebb0dc684 Debugging printfs for PTB and MTU
Signed-off-by: Val <valerie@tailscale.com>
2023-08-02 15:34:19 +02:00
salman
aec143eb97 WIP wgengine: inject ICMP PTB for oversize packets
Now with IPv4/6 header math.
2023-08-02 15:34:00 +02:00
Val
1f7de20fe1 wgengine/magicsock: probe likely mtus when connecting to peers
Updates #311

Signed-off-by: Val <valerie@tailscale.com>
2023-08-02 13:49:35 +02:00
Val
4bfeb8b483 wgengine/magicsock: add metric to record highest MTU
Record the highest MTU we probe on any link at any time.

Updates #311

Signed-off-by: Val <valerie@tailscale.com>
2023-08-01 21:29:55 +02:00
Val
c989d08ac7 wgengine/magicsock: record best mtu to an endpoint
Record any mtu information we get from CLI ping, but do not use it
yet.

Updates #311

Signed-off-by: Val <valerie@tailscale.com>
2023-08-01 21:29:47 +02:00
Val
b43c20872e tmp: reminder to update max udp header when we start using bigger mtus 2023-07-31 20:58:36 +02:00
Val
9438ed7438 For merge: don't pad the pong reply
Signed-off-by: Val <valerie@tailscale.com>
2023-07-29 21:58:45 +02:00
Val
f113eec45b For merge: add payload size to ping log message 2023-07-29 21:17:21 +02:00
salman
7e9ed47026 wgengine,ipn,cmd/tailscale: add size option to ping
This adds the capability to pad disco ping message payloads to reach a
specified size. It also plumbs it through to the tailscale ping -size
flag.

Disco pings used for actual endpoint discovery do not use this yet.

Updates #311.

Co-authored-by: Val <valerie@tailscale.com>
Signed-off-by: salman <salman@tailscale.com>
2023-07-29 21:17:21 +02:00
Val
c661d61e24 magicsock: set the don't fragment sockopt
This sets the Don't Fragment flag, for now behind the TS_DEBUG_PMTUD
envknob.

Updates #311.

Co-authored-by: salman <salman@tailscale.com>
Signed-off-by: Val <valerie@tailscale.com>
2023-07-29 21:17:21 +02:00
28 changed files with 524 additions and 70 deletions

View File

@@ -807,11 +807,16 @@ func (lc *LocalClient) ExpandSNIName(ctx context.Context, name string) (fqdn str
return "", false
}
// Ping sends a ping of the provided type to the provided IP and waits
// Ping sends a ping of the provided type and size to the provided IP and waits
// for its response.
func (lc *LocalClient) Ping(ctx context.Context, ip netip.Addr, pingtype tailcfg.PingType) (*ipnstate.PingResult, error) {
//
// For disco pings, the size argument specifies the length of the packet's payload, that
// is, including the disco headers and message, but not including the IP and UDP headers.
// If size is smaller than the minimum message size it's ignored.
func (lc *LocalClient) Ping(ctx context.Context, ip netip.Addr, pingtype tailcfg.PingType, size int) (*ipnstate.PingResult, error) {
v := url.Values{}
v.Set("ip", ip.String())
v.Set("size", strconv.Itoa(size))
v.Set("type", string(pingtype))
body, err := lc.send(ctx, "POST", "/localapi/v0/ping?"+v.Encode(), 200, nil)
if err != nil {

View File

@@ -53,12 +53,14 @@ relay node.
fs.BoolVar(&pingArgs.peerAPI, "peerapi", false, "try hitting the peer's peerapi HTTP server")
fs.IntVar(&pingArgs.num, "c", 10, "max number of pings to send. 0 for infinity.")
fs.DurationVar(&pingArgs.timeout, "timeout", 5*time.Second, "timeout before giving up on a ping")
fs.IntVar(&pingArgs.size, "size", 0, "send a packet with this many bytes in the payload (disco pings only). 0 for minimum size.")
return fs
})(),
}
var pingArgs struct {
num int
size int
untilDirect bool
verbose bool
tsmp bool
@@ -115,7 +117,7 @@ func runPing(ctx context.Context, args []string) error {
for {
n++
ctx, cancel := context.WithTimeout(ctx, pingArgs.timeout)
pr, err := localClient.Ping(ctx, netip.MustParseAddr(ip), pingType())
pr, err := localClient.Ping(ctx, netip.MustParseAddr(ip), pingType(), pingArgs.size)
cancel()
if err != nil {
if errors.Is(err, context.DeadlineExceeded) {
@@ -156,6 +158,9 @@ func runPing(ctx context.Context, args []string) error {
if pr.PeerAPIPort != 0 {
extra = fmt.Sprintf(", %d", pr.PeerAPIPort)
}
if pr.Size != 0 {
extra = fmt.Sprintf(", %d bytes", pr.Size)
}
printf("pong from %s (%s%s) via %v in %v\n", pr.NodeName, pr.NodeIP, extra, via, latency)
if pingArgs.tsmp || pingArgs.icmp {
return nil

View File

@@ -170,7 +170,7 @@ type ControlDialPlanner interface {
// Pinger is the LocalBackend.Ping method.
type Pinger interface {
// Ping is a request to do a ping with the peer handling the given IP.
Ping(ctx context.Context, ip netip.Addr, pingType tailcfg.PingType) (*ipnstate.PingResult, error)
Ping(ctx context.Context, ip netip.Addr, pingType tailcfg.PingType, size int) (*ipnstate.PingResult, error)
}
type Decompressor interface {
@@ -1670,7 +1670,7 @@ func doPingerPing(logf logger.Logf, c *http.Client, pr *tailcfg.PingRequest, pin
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
res, err := pinger.Ping(ctx, pr.IP, pingType)
res, err := pinger.Ping(ctx, pr.IP, pingType, 0)
if err != nil {
d := time.Since(start).Round(time.Millisecond)
logf("doPingerPing: ping error of type %q to %v after %v: %v", pingType, pr.IP, d, err)

View File

@@ -94,6 +94,9 @@ type Message interface {
AppendMarshal([]byte) []byte
}
// MessageHeaderLen is the length of a message header, 2 bytes for type and version.
const MessageHeaderLen = 2
// appendMsgHeader appends two bytes (for t and ver) and then also
// dataLen bytes to b, returning the appended slice in all. The
// returned data slice is a subslice of all with just dataLen bytes of
@@ -117,15 +120,24 @@ type Ping struct {
// netmap data to reduce the discokey:nodekey relation from 1:N to
// 1:1.
NodeKey key.NodePublic
// Padding is the number of 0 bytes at the end of the
// message. (It's used to probe path MTU.)
Padding int
}
// PingLen is the length of a marshalled ping message, without the message
// header or padding.
const PingLen = 12 + key.NodePublicRawLen
func (m *Ping) AppendMarshal(b []byte) []byte {
dataLen := 12
hasKey := !m.NodeKey.IsZero()
if hasKey {
dataLen += key.NodePublicRawLen
}
ret, d := appendMsgHeader(b, TypePing, v0, dataLen)
ret, d := appendMsgHeader(b, TypePing, v0, dataLen+m.Padding)
n := copy(d, m.TxID[:])
if hasKey {
m.NodeKey.AppendTo(d[:n])
@@ -138,11 +150,14 @@ func parsePing(ver uint8, p []byte) (m *Ping, err error) {
return nil, errShort
}
m = new(Ping)
m.Padding = len(p)
p = p[copy(m.TxID[:], p):]
m.Padding -= 12
// Deliberately lax on longer-than-expected messages, for future
// compatibility.
if len(p) >= key.NodePublicRawLen {
m.NodeKey = key.NodePublicFromRaw32(mem.B(p[:key.NodePublicRawLen]))
m.Padding -= key.NodePublicRawLen
}
return m, nil
}
@@ -214,6 +229,8 @@ type Pong struct {
Src netip.AddrPort // 18 bytes (16+2) on the wire; v4-mapped ipv6 for IPv4
}
// pongLen is the length of a marshalled pong message, without the message
// header.
const pongLen = 12 + 16 + 2
func (m *Pong) AppendMarshal(b []byte) []byte {

View File

@@ -35,6 +35,23 @@ func TestMarshalAndParse(t *testing.T) {
},
want: "01 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 00 01 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 1e 1f",
},
{
name: "ping_with_padding",
m: &Ping{
TxID: [12]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
Padding: 3,
},
want: "01 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 00 00 00",
},
{
name: "ping_with_padding_and_nodekey_src",
m: &Ping{
TxID: [12]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
NodeKey: key.NodePublicFromRaw32(mem.B([]byte{1: 1, 2: 2, 30: 30, 31: 31})),
Padding: 3,
},
want: "01 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 00 01 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 1e 1f 00 00 00",
},
{
name: "pong",
m: &Pong{

View File

@@ -2400,7 +2400,7 @@ func (b *LocalBackend) StartLoginInteractive() {
}
}
func (b *LocalBackend) Ping(ctx context.Context, ip netip.Addr, pingType tailcfg.PingType) (*ipnstate.PingResult, error) {
func (b *LocalBackend) Ping(ctx context.Context, ip netip.Addr, pingType tailcfg.PingType, size int) (*ipnstate.PingResult, error) {
if pingType == tailcfg.PingPeerAPI {
t0 := b.clock.Now()
node, base, err := b.pingPeerAPI(ctx, ip)
@@ -2423,7 +2423,7 @@ func (b *LocalBackend) Ping(ctx context.Context, ip netip.Addr, pingType tailcfg
return pr, nil
}
ch := make(chan *ipnstate.PingResult, 1)
b.e.Ping(ip, pingType, func(pr *ipnstate.PingResult) {
b.e.Ping(ip, pingType, size, func(pr *ipnstate.PingResult) {
select {
case ch <- pr:
default:

View File

@@ -639,6 +639,9 @@ type PingResult struct {
// a ping to the local node.
IsLocalIP bool `json:",omitempty"`
// Size is the size of the ping message.
Size int `json:",omitempty"`
// TODO(bradfitz): details like whether port mapping was used on either side? (Once supported)
}
@@ -655,6 +658,7 @@ func (pr *PingResult) ToPingResponse(pingType tailcfg.PingType) *tailcfg.PingRes
DERPRegionCode: pr.DERPRegionCode,
PeerAPIPort: pr.PeerAPIPort,
IsLocalIP: pr.IsLocalIP,
Size: pr.Size,
}
}

View File

@@ -1337,7 +1337,16 @@ func (h *Handler) servePing(w http.ResponseWriter, r *http.Request) {
http.Error(w, "missing 'type' parameter", 400)
return
}
res, err := h.b.Ping(ctx, ip, tailcfg.PingType(pingTypeStr))
size := 0
sizeStr := r.FormValue("size")
if sizeStr != "" {
size, err = strconv.Atoi(sizeStr)
if err != nil {
http.Error(w, "invalid 'size' parameter", 400)
return
}
}
res, err := h.b.Ping(ctx, ip, tailcfg.PingType(pingTypeStr), size)
if err != nil {
writeErrorJSON(w, err)
return

View File

@@ -45,7 +45,8 @@ func (t ICMP4Type) String() string {
type ICMP4Code uint8
const (
ICMP4NoCode ICMP4Code = 0
ICMP4NoCode ICMP4Code = 0x00
ICMP4FragmentationNeeded = 0x04
)
// ICMP4Header is an IPv4+ICMPv4 header.

View File

@@ -20,6 +20,7 @@ type ICMP6Type uint8
const (
ICMP6Unreachable ICMP6Type = 1
ICMP6PacketTooBig ICMP6Type = 2
ICMP6TimeExceeded ICMP6Type = 3
ICMP6EchoRequest ICMP6Type = 128
ICMP6EchoReply ICMP6Type = 129

View File

@@ -4,15 +4,66 @@ package tstun
import "tailscale.com/envknob"
// There are several kinds of MTU.
//
// On-the-wire MTU: This what the network device advertises as the
// maximum packet size available above the physical link layer. This
// includes IP headers and everything at a higher level. For Ethernet,
// this is typically 1500 bytes but can be larger or smaller.
//
// Tailscale interface MTU: This is what we advertise to userspace as
// the largest possible packet it can send through the tailscale
// interface. This is 80 bytes lower than the largest interface we
// have available to send things on, which is the size of the headers
// Wireguard adds (80 for IPv6, 60 for IPv4, but we don't know which
// it will be so we always subtract 80). E.g. if the largest interface
// MTU is 1500, we set the tailscale interface MTU to 1420.
//
// Peer MTU: The MTU that we have probed for the path to a specific
// peer's various endpoints. If this is smaller than the advertised
// tailscale interface, and the packet is larger than the peer MTU,
// then we generate ICMP Packet Too Big (IPv6) or Fragmentation Needed
// (IPv4) packets inside tailscale and drop the packet.
//
// Historically, we set the tailscale interface MTU to 1280. This
// means we treated the "on the wire" MTU as 1360. This is now the
// "Safe" value we use when we do not know what the path MTU is.
//
// Internally, we store the peer MTU as the MTU advertised to the user.
//
// We have to call these by different names or it is way way too confusing.
//
// Wire MTU
// User MTU
// Peer MTU
//
// What should happen when we set TS_DEBUG_MTU? It should set the
// interface to that, but we should not assume that the path MTU is
// this. So distinguish between what we set the interface MTU to and
// what we assume the path MTU is in the absence of probe information.
const (
maxMTU uint32 = 65536
defaultMTU uint32 = 1280
maxMTU uint32 = 65536
wireguardOverhead = 80
DefaultUserMTU uint32 = 1280
DefaultWireMTU uint32 = 1280 + wireguardOverhead
)
// DefaultMTU returns either the constant default MTU of 1280, or the value set
// in TS_DEBUG_MTU clamped to a maximum of 65536.
func DefaultMTU() uint32 {
// DefaultMTU is the Tailscale default MTU for now.
func userMTUToWireMTU(userMTU uint32) uint32 {
return userMTU + wireguardOverhead
}
func wireMTUToUserMTU(wireMTU uint32) uint32 {
if wireMTU < wireguardOverhead {
return 0
}
return wireMTU - wireguardOverhead
}
// TunMTU returns either the constant default user MTU of 1280, or the
// value set in TS_DEBUG_MTU clamped to a maximum of 65536.
func TunMTU() uint32 {
// TunMTU is the Tailscale default MTU for now.
//
// wireguard-go defaults to 1420 bytes, which only works if the
// "outer" MTU is 1500 bytes. This breaks on DSL connections
@@ -21,7 +72,7 @@ func DefaultMTU() uint32 {
// 1280 is the smallest MTU allowed for IPv6, which is a sensible
// "probably works everywhere" setting until we develop proper PMTU
// discovery.
tunMTU := defaultMTU
tunMTU := DefaultUserMTU
if mtu, ok := envknob.LookupUintSized("TS_DEBUG_MTU", 10, 32); ok {
mtu := uint32(mtu)
if mtu > maxMTU {

View File

@@ -7,22 +7,22 @@ import (
"testing"
)
func TestDefaultMTU(t *testing.T) {
func TestTunMTU(t *testing.T) {
orig := os.Getenv("TS_DEBUG_MTU")
defer os.Setenv("TS_DEBUG_MTU", orig)
os.Setenv("TS_DEBUG_MTU", "")
if DefaultMTU() != 1280 {
t.Errorf("DefaultMTU() = %d, want 1280", DefaultMTU())
if TunMTU() != 1280 {
t.Errorf("TunMTU() = %d, want 1280", TunMTU())
}
os.Setenv("TS_DEBUG_MTU", "9000")
if DefaultMTU() != 9000 {
t.Errorf("DefaultMTU() = %d, want 9000", DefaultMTU())
if TunMTU() != 9000 {
t.Errorf("TunMTU() = %d, want 9000", TunMTU())
}
os.Setenv("TS_DEBUG_MTU", "123456789")
if DefaultMTU() != maxMTU {
t.Errorf("DefaultMTU() = %d, want %d", DefaultMTU(), maxMTU)
if TunMTU() != maxMTU {
t.Errorf("TunMTU() = %d, want %d", TunMTU(), maxMTU)
}
}

View File

@@ -44,7 +44,7 @@ func New(logf logger.Logf, tunName string) (tun.Device, string, error) {
}
dev, err = createTAP(tapName, bridgeName)
} else {
dev, err = tun.CreateTUN(tunName, int(DefaultMTU()))
dev, err = tun.CreateTUN(tunName, int(TunMTU()))
}
if err != nil {
return nil, "", err

View File

@@ -1484,6 +1484,9 @@ type PingResponse struct {
// IsLocalIP is whether the ping request error is due to it being
// a ping to the local node.
IsLocalIP bool `json:",omitempty"`
// Size is the size of the ping message.
Size int `json:",omitempty"`
}
type MapResponse struct {

View File

@@ -239,7 +239,7 @@ func TestConn(t *testing.T) {
}
// ping to make sure the connection is up.
res, err := lc2.Ping(ctx, s1ip, tailcfg.PingICMP)
res, err := lc2.Ping(ctx, s1ip, tailcfg.PingICMP, 0)
if err != nil {
t.Fatal(err)
}

View File

@@ -47,6 +47,8 @@ var (
// debugRingBufferMaxSizeBytes overrides the default size of the endpoint
// history ringbuffer.
debugRingBufferMaxSizeBytes = envknob.RegisterInt("TS_DEBUG_MAGICSOCK_RING_BUFFER_MAX_SIZE_BYTES")
// debugPMTUD enables path MTU discovery. Currently only sets the Don't Fragment sockopt.
debugPMTUD = envknob.RegisterBool("TS_DEBUG_PMTUD")
// Hey you! Adding a new debugknob? Make sure to stub it out in the debugknob_stubs.go
// file too.
)

View File

@@ -20,6 +20,7 @@ func debugAlwaysDERP() bool { return false }
func debugUseDERPHTTP() bool { return false }
func debugEnableSilentDisco() bool { return false }
func debugSendCallMeUnknownPeer() bool { return false }
func debugPMTUD() bool { return false }
func debugUseDERPAddr() string { return "" }
func debugUseDerpRouteEnv() string { return "" }
func debugUseDerpRoute() opt.Bool { return "" }

View File

@@ -0,0 +1,34 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package magicsock
import (
"net"
"syscall"
"tailscale.com/types/nettype"
)
const (
// From https://opensource.apple.com/source/xnu/xnu-6153.141.1/bsd/netinet6/in6.h.auto.html
socketOptionIPDontFrag = 28
socketOptionIPv6DontFrag = 62
)
func setDontFragment(pconn nettype.PacketConn, network string) (err error) {
if c, ok := pconn.(*net.UDPConn); ok {
rc, err := c.SyscallConn()
if err == nil {
rc.Control(func(fd uintptr) {
if network == "udp4" {
err = syscall.SetsockoptInt(int(fd), syscall.IPPROTO_IP, socketOptionIPDontFrag, 1)
}
if network == "udp6" {
err = syscall.SetsockoptInt(int(fd), syscall.IPPROTO_IPV6, socketOptionIPDontFrag, 1)
}
})
}
}
return err
}

View File

@@ -0,0 +1,16 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !linux && !darwin
package magicsock
import (
"errors"
"tailscale.com/types/nettype"
)
func setDontFragment(pconn nettype.PacketConn, network string) (err error) {
return errors.New("setting don't fragment bit not supported on this OS")
}

View File

@@ -0,0 +1,28 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package magicsock
import (
"net"
"syscall"
"tailscale.com/types/nettype"
)
func setDontFragment(pconn nettype.PacketConn, network string) (err error) {
if c, ok := pconn.(*net.UDPConn); ok {
rc, err := c.SyscallConn()
if err == nil {
rc.Control(func(fd uintptr) {
if network == "udp4" {
err = syscall.SetsockoptInt(int(fd), syscall.IPPROTO_IP, syscall.IP_MTU_DISCOVER, syscall.IP_PMTUDISC_DO)
}
if network == "udp6" {
err = syscall.SetsockoptInt(int(fd), syscall.IPPROTO_IPV6, syscall.IP_MTU_DISCOVER, syscall.IP_PMTUDISC_DO)
}
})
}
}
return err
}

View File

@@ -15,14 +15,19 @@ import (
"net/netip"
"reflect"
"runtime"
"strconv"
"sync"
"sync/atomic"
"time"
"golang.org/x/crypto/poly1305"
"golang.org/x/exp/maps"
"golang.org/x/net/ipv4"
"golang.org/x/net/ipv6"
"tailscale.com/disco"
"tailscale.com/ipn/ipnstate"
"tailscale.com/net/stun"
"tailscale.com/net/tstun"
"tailscale.com/tailcfg"
"tailscale.com/tstime/mono"
"tailscale.com/types/key"
@@ -62,7 +67,7 @@ type endpoint struct {
lastFullPing mono.Time // last time we pinged all disco endpoints
derpAddr netip.AddrPort // fallback/bootstrap path, if non-zero (non-zero for well-behaved clients)
bestAddr addrLatency // best non-DERP path; zero if none
bestAddr addrQuality // best non-DERP path; zero if none
bestAddrAt mono.Time // time best address re-confirmed
trustBestAddrUntil mono.Time // time when bestAddr expires
sentPing map[stun.TxID]sentPing
@@ -97,6 +102,7 @@ type sentPing struct {
at mono.Time
timer *time.Timer // timeout timer
purpose discoPingPurpose
size int
}
// endpointState is some state and history for a specific endpoint of
@@ -140,6 +146,20 @@ type pongReply struct {
pongSrc netip.AddrPort // what they reported they heard
}
// mtusToProbe are likely on-the-wire MTUs we might see in the
// wild. They are used by the peer MTU probing code.
//
// Set this array to a single zero to disable path MTU probing.
var mtusToProbe = [...]int{
//576, // Smallest MTU for IPv4, probably useless?
//1124, // An observed max mtu in the wild, maybe 1100 instead?
//1280, // Smallest MTU for IPv6, current default
1480, // A little less, for tunnels or such
1500, // Most common real world MTU
//8000, // Some jumbo frames are this size
9000, // Most jumbo frames are this size
}
// EndpointChange is a structure containing information about changes made to a
// particular endpoint. This is not a stable interface and could change at any
// time.
@@ -201,7 +221,7 @@ func (de *endpoint) deleteEndpointLocked(why string, ep netip.AddrPort) {
What: "deleteEndpointLocked-bestAddr-" + why,
From: de.bestAddr,
})
de.bestAddr = addrLatency{}
de.bestAddr = addrQuality{}
}
}
@@ -361,7 +381,7 @@ func (de *endpoint) heartbeat() {
udpAddr, _, _ := de.addrForSendLocked(now)
if udpAddr.IsValid() {
// We have a preferred path. Ping that every 2 seconds.
de.startDiscoPingLocked(udpAddr, now, pingHeartbeat)
de.startDiscoPingLocked(udpAddr, now, pingHeartbeat, 0)
}
if de.wantFullPingLocked(now) {
@@ -403,7 +423,7 @@ func (de *endpoint) noteActiveLocked() {
// cliPing starts a ping for the "tailscale ping" command. res is value to call cb with,
// already partially filled.
func (de *endpoint) cliPing(res *ipnstate.PingResult, cb func(*ipnstate.PingResult)) {
func (de *endpoint) cliPing(res *ipnstate.PingResult, size int, cb func(*ipnstate.PingResult)) {
de.mu.Lock()
defer de.mu.Unlock()
@@ -418,17 +438,17 @@ func (de *endpoint) cliPing(res *ipnstate.PingResult, cb func(*ipnstate.PingResu
now := mono.Now()
udpAddr, derpAddr, _ := de.addrForSendLocked(now)
if derpAddr.IsValid() {
de.startDiscoPingLocked(derpAddr, now, pingCLI)
de.startDiscoPingLocked(derpAddr, now, pingCLI, size)
}
if udpAddr.IsValid() && now.Before(de.trustBestAddrUntil) {
// Already have an active session, so just ping the address we're using.
// Otherwise "tailscale ping" results to a node on the local network
// can look like they're bouncing between, say 10.0.0.0/9 and the peer's
// IPv6 address, both 1ms away, and it's random who replies first.
de.startDiscoPingLocked(udpAddr, now, pingCLI)
de.startDiscoPingLocked(udpAddr, now, pingCLI, size)
} else {
for ep := range de.endpointState {
de.startDiscoPingLocked(ep, now, pingCLI)
de.startDiscoPingLocked(ep, now, pingCLI, size)
}
}
de.noteActiveLocked()
@@ -522,17 +542,29 @@ func (de *endpoint) removeSentDiscoPingLocked(txid stun.TxID, sp sentPing) {
delete(de.sentPing, txid)
}
// sendDiscoPing sends a ping with the provided txid to ep using de's discoKey.
const (
// discoPingSize is the size of a complete disco ping packet, without any padding.
discoPingSize = len(disco.Magic) + key.DiscoPublicRawLen + disco.NonceLen +
poly1305.TagSize + disco.MessageHeaderLen + disco.PingLen
)
// sendDiscoPing sends a ping with the provided txid to ep using de's discoKey. size
// is the desired packet size,
//
// The caller (startPingLocked) should've already recorded the ping in
// sentPing and set up the timer.
//
// The caller should use de.discoKey as the discoKey argument.
// It is passed in so that sendDiscoPing doesn't need to lock de.mu.
func (de *endpoint) sendDiscoPing(ep netip.AddrPort, discoKey key.DiscoPublic, txid stun.TxID, logLevel discoLogLevel) {
func (de *endpoint) sendDiscoPing(ep netip.AddrPort, discoKey key.DiscoPublic, txid stun.TxID, size int, logLevel discoLogLevel) {
padding := 0
if size-discoPingSize > 0 {
padding = size - discoPingSize
}
sent, _ := de.c.sendDiscoMessage(ep, de.publicKey, discoKey, &disco.Ping{
TxID: [12]byte(txid),
NodeKey: de.c.publicKeyAtomic.Load(),
Padding: padding,
}, logLevel)
if !sent {
de.forgetDiscoPing(txid)
@@ -557,7 +589,8 @@ const (
pingCLI
)
func (de *endpoint) startDiscoPingLocked(ep netip.AddrPort, now mono.Time, purpose discoPingPurpose) {
// startDiscoPingLocked sends a disco ping to ep in a separate goroutine.
func (de *endpoint) startDiscoPingLocked(ep netip.AddrPort, now mono.Time, purpose discoPingPurpose, size int) {
if runtime.GOOS == "js" {
return
}
@@ -576,20 +609,32 @@ func (de *endpoint) startDiscoPingLocked(ep netip.AddrPort, now mono.Time, purpo
st.lastPing = now
}
if purpose != pingDiscovery {
de.recordAndSendDiscoPingLocked(ep, now, purpose, epDisco.key, size)
} else {
for _, mtu := range mtusToProbe {
de.c.logf("probing mtu %v with disco message size %v", mtu, wireMTUToPingSize(ep, mtu))
de.recordAndSendDiscoPingLocked(ep, now, purpose, epDisco.key, wireMTUToPingSize(ep, mtu))
}
}
}
func (de *endpoint) recordAndSendDiscoPingLocked(ep netip.AddrPort, now mono.Time, purpose discoPingPurpose, key key.DiscoPublic, size int) {
txid := stun.NewTxID()
de.sentPing[txid] = sentPing{
to: ep,
at: now,
timer: time.AfterFunc(pingTimeoutDuration, func() { de.discoPingTimeout(txid) }),
purpose: purpose,
size: size,
}
logLevel := discoLog
if purpose == pingHeartbeat {
logLevel = discoVerboseLog
}
go de.sendDiscoPing(ep, epDisco.key, txid, logLevel)
go de.sendDiscoPing(ep, key, txid, size, logLevel)
}
// sendDiscoPingsLocked starts pinging all of ep's endpoints.
func (de *endpoint) sendDiscoPingsLocked(now mono.Time, sendCallMeMaybe bool) {
de.lastFullPing = now
var sentAny bool
@@ -612,7 +657,7 @@ func (de *endpoint) sendDiscoPingsLocked(now mono.Time, sendCallMeMaybe bool) {
de.c.dlogf("[v1] magicsock: disco: send, starting discovery for %v (%v)", de.publicKey.ShortString(), de.discoShort())
}
de.startDiscoPingLocked(ep, now, pingDiscovery)
de.startDiscoPingLocked(ep, now, pingDiscovery, 0)
}
derpAddr := de.derpAddr
if sentAny && sendCallMeMaybe && derpAddr.IsValid() {
@@ -621,6 +666,7 @@ func (de *endpoint) sendDiscoPingsLocked(now mono.Time, sendCallMeMaybe bool) {
// message to our peer via DERP informing them that we've
// sent so our firewall ports are probably open and now
// would be a good time for them to connect.
de.c.logf("\n\n\nSENDING DERP CALLMEMAYBE")
go de.c.enqueueCallMeMaybe(derpAddr, de)
}
}
@@ -850,6 +896,124 @@ func (de *endpoint) noteConnectivityChange() {
de.trustBestAddrUntil = 0
}
// mtuToPingSize takes a desired on-the-wire MTU and calculates the
// disco ping message size that would produce a packet that is exactly
// MTU bytes in length including all the headers above the link layer
// (IP and UDP).
//
// Zero return value means don't pad the ping packet at all. An mtu
// argument of zero or less than the necessary header length results
// in a zero return value.
func wireMTUToPingSize(ep netip.AddrPort, mtu int) int {
if mtu == 0 {
return 0
}
headerLen := ipv4.HeaderLen
if ep.Addr().Is6() {
headerLen = ipv6.HeaderLen
}
headerLen += 8 // UDP header length
if mtu < headerLen {
return 0
}
return (mtu - headerLen)
}
// pingSizeToMTU calculates the minimum wire MTU that would permit the
// specified disco ping message to reach this endpoint. The size
// recorded in sp.size does not include the IP/UDP headers at the
// beginning of the disco message.
//
// If sp.size is zero, that means the ping was not padded at all and
// the MTU was not tested, in which case return the largest safe
// on-the-wire MTU.
func pingSizeToWireMTU(sp sentPing) int {
mtu := sp.size
if mtu == 0 {
return int(tstun.DefaultWireMTU)
}
headerLen := ipv4.HeaderLen
if sp.to.Addr().Is6() {
headerLen = ipv6.HeaderLen
}
headerLen += 8 // UDP header length
return mtu + headerLen
}
// pingSizeToUserMTU calculates the minimum MTU on the tailscale
// interface that would permit this ping to reach this endpoint. It is
// the size of the on-the-wire MTU minus the Wireguard overhead:
//
// - 20-byte IPv4 header or 40 byte IPv6 header
// - 8-byte UDP header
// - 4-byte type
// - 4-byte key index
// - 8-byte nonce
// - 16-byte authentication tag
//
// We have to assume IPv6 because we give the same number to everyone
// when we set the external interface MTU.
const wgHeaderLen = 4 + 4 + 8 + 16
func pingSizeToUserMTU(sp sentPing) int {
size := sp.size
if size == 0 {
return int(tstun.DefaultUserMTU)
}
// The size stored in the sentPing already has the IP/UDP
// headers removed. Now remove the Wireguard overhead.
if size < wgHeaderLen {
return 0
}
return size - wgHeaderLen
}
// Update MTU-related metrics. Should be called with Conn.mu held.
func updateMTUMetricsLocked(sp sentPing, logf logger.Logf) {
if sp.size == 0 {
return
}
mtu := pingSizeToUserMTU(sp)
if metricHighestPeerMTU.Value() < int64(mtu) {
metricHighestPeerMTU.Set(int64(mtu))
logf("\n\n\nhighest MTU %v\n\n\n", mtu)
}
}
// PathMTU returns the path MTU to the peer at dst (tailscale address)
func (c *Conn) PathMTU(dst netip.Addr) int {
// TODO(s): this is method is pretty expensive. Reduce lookups before
// removing the envknob guard.
if !debugPMTUD() {
return int(tstun.TunMTU())
}
peer, ok := c.netMap.PeerByTailscaleIP(dst)
if !ok {
return int(tstun.TunMTU())
}
c.mu.Lock()
defer c.mu.Unlock()
if c.closed {
return int(tstun.TunMTU())
}
ep, ok := c.peerMap.endpointForNodeKey(peer.Key)
if !ok {
return int(tstun.TunMTU())
}
now := mono.Now()
if !ep.bestAddr.AddrPort.IsValid() || now.After(ep.trustBestAddrUntil) {
// We have not done the disco pings yet. ep.send() will kick that off
// down the line.
return int(tstun.TunMTU())
}
return ep.bestAddr.mtu
}
// handlePongConnLocked handles a Pong message (a reply to an earlier ping).
// It should be called with the Conn.mu held.
//
@@ -889,7 +1053,7 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
}
if sp.purpose != pingHeartbeat {
de.c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got pong tx=%x latency=%v pong.src=%v%v", de.c.discoShort, de.discoShort(), de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), m.Src, logger.ArgWriter(func(bw *bufio.Writer) {
de.c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got pong tx=%x latency=%v mtu=%v pong.src=%v%v", de.c.discoShort, de.discoShort(), de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), pingSizeToWireMTU(sp), m.Src, logger.ArgWriter(func(bw *bufio.Writer) {
if sp.to != src {
fmt.Fprintf(bw, " ping.to=%v", sp.to)
}
@@ -897,7 +1061,7 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
}
for _, pp := range de.pendingCLIPings {
de.c.populateCLIPingResponseLocked(pp.res, latency, sp.to)
de.c.populateCLIPingResponseLocked(pp.res, latency, sp.to, sp.size)
go pp.cb(pp.res)
}
de.pendingCLIPings = nil
@@ -905,9 +1069,10 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
// Promote this pong response to our current best address if it's lower latency.
// TODO(bradfitz): decide how latency vs. preference order affects decision
if !isDerp {
thisPong := addrLatency{sp.to, latency}
thisPong := addrQuality{sp.to, latency, pingSizeToWireMTU(sp)}
if betterAddr(thisPong, de.bestAddr) {
de.c.logf("magicsock: disco: node %v %v now using %v", de.publicKey.ShortString(), de.discoShort(), sp.to)
de.c.logf("\n\n\nSETTING BEST MTU %v\n\n\n", thisPong.mtu)
de.c.logf("magicsock: disco: node %v %v now using %v mtu %v", de.publicKey.ShortString(), de.discoShort(), sp.to, thisPong.mtu)
de.debugUpdates.Add(EndpointChange{
When: time.Now(),
What: "handlePingLocked-bestAddr-update",
@@ -915,6 +1080,7 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
To: thisPong,
})
de.bestAddr = thisPong
updateMTUMetricsLocked(sp, de.c.logf)
}
if de.bestAddr.AddrPort == thisPong.AddrPort {
de.debugUpdates.Add(EndpointChange{
@@ -931,19 +1097,23 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
return
}
// addrLatency is an IPPort with an associated latency.
type addrLatency struct {
// addrQuality is an IPPort with an associated latency and path mtu.
type addrQuality struct {
netip.AddrPort
latency time.Duration
mtu int
}
func (a addrLatency) String() string {
return a.AddrPort.String() + "@" + a.latency.String()
func (a addrQuality) String() string {
return a.AddrPort.String() + "@" + a.latency.String() + "+" + strconv.Itoa(a.mtu)
}
// betterAddr reports whether a is a better addr to use than b.
func betterAddr(a, b addrLatency) bool {
func betterAddr(a, b addrQuality) bool {
if a.AddrPort == b.AddrPort {
if a.mtu > b.mtu {
return true
}
return false
}
if !b.IsValid() {
@@ -1015,6 +1185,7 @@ func (de *endpoint) handleCallMeMaybe(m *disco.CallMeMaybe) {
// Nothing to do on js/wasm if we can't send UDP packets anyway.
return
}
de.c.logf("\n\n\nDERP CALLMEMAYBE\n\n\n")
de.mu.Lock()
defer de.mu.Unlock()
@@ -1061,6 +1232,7 @@ func (de *endpoint) handleCallMeMaybe(m *disco.CallMeMaybe) {
// in this message.
for ep, want := range de.isCallMeMaybeEP {
if !want {
de.c.logf("\n\n\nCLEARING ENDPOINT\n\n\n")
delete(de.isCallMeMaybeEP, ep)
de.deleteEndpointLocked("handleCallMeMaybe", ep)
}
@@ -1124,7 +1296,7 @@ func (de *endpoint) stopAndReset() {
func (de *endpoint) resetLocked() {
de.lastSend = 0
de.lastFullPing = 0
de.bestAddr = addrLatency{}
de.bestAddr = addrQuality{}
de.bestAddrAt = 0
de.trustBestAddrUntil = 0
for _, es := range de.endpointState {

View File

@@ -721,7 +721,7 @@ func (c *Conn) LastRecvActivityOfNodeKey(nk key.NodePublic) string {
}
// Ping handles a "tailscale ping" CLI query.
func (c *Conn) Ping(peer *tailcfg.Node, res *ipnstate.PingResult, cb func(*ipnstate.PingResult)) {
func (c *Conn) Ping(peer *tailcfg.Node, res *ipnstate.PingResult, size int, cb func(*ipnstate.PingResult)) {
c.mu.Lock()
defer c.mu.Unlock()
if c.privateKey.IsZero() {
@@ -745,12 +745,13 @@ func (c *Conn) Ping(peer *tailcfg.Node, res *ipnstate.PingResult, cb func(*ipnst
cb(res)
return
}
ep.cliPing(res, cb)
ep.cliPing(res, size, cb)
}
// c.mu must be held
func (c *Conn) populateCLIPingResponseLocked(res *ipnstate.PingResult, latency time.Duration, ep netip.AddrPort) {
func (c *Conn) populateCLIPingResponseLocked(res *ipnstate.PingResult, latency time.Duration, ep netip.AddrPort, size int) {
res.LatencySeconds = latency.Seconds()
res.Size = size
if ep.Addr() != tailcfg.DerpMagicIPAddr {
res.Endpoint = ep.String()
return
@@ -1257,7 +1258,6 @@ func (c *Conn) sendDiscoMessage(dst netip.AddrPort, dstKey key.NodePublic, dstDi
} else {
metricSendDiscoUDP.Add(1)
}
box := di.sharedKey.Seal(m.AppendMarshal(nil))
pkt = append(pkt, box...)
sent, err = c.sendAddr(dst, dstKey, pkt)
@@ -1267,7 +1267,7 @@ func (c *Conn) sendDiscoMessage(dst netip.AddrPort, dstKey key.NodePublic, dstDi
if !dstKey.IsZero() {
node = dstKey.ShortString()
}
c.dlogf("[v1] magicsock: disco: %v->%v (%v, %v) sent %v", c.discoShort, dstDisco.ShortString(), node, derpStr(dst.String()), disco.MessageSummary(m))
c.dlogf("[v1] magicsock: disco: %v->%v (%v, %v) sent %v len %v\n", c.discoShort, dstDisco.ShortString(), node, derpStr(dst.String()), disco.MessageSummary(m), len(pkt))
}
if isDERP {
metricSentDiscoDERP.Add(1)
@@ -1335,7 +1335,7 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netip.AddrPort, derpNodeSrc ke
return
}
if debugDisco() {
c.logf("magicsock: disco: got disco-looking frame from %v via %s", sender.ShortString(), via)
c.logf("magicsock: disco: got disco-looking frame from %v via %s len %v", sender.ShortString(), via, len(msg))
}
if c.privateKey.IsZero() {
// Ignore disco messages when we're stopped.
@@ -2238,6 +2238,14 @@ func (c *Conn) bindSocket(ruc *RebindingUDPConn, network string, curPortFate cur
continue
}
trySetSocketBuffer(pconn, c.logf)
if debugPMTUD() {
err = setDontFragment(pconn, network)
if err != nil {
c.logf("magicsock: unable to do path mtu discovery: %v", err)
}
}
// Success.
if debugBindSocket() {
c.logf("magicsock: bindSocket: successfully listened %v port %d", network, port)
@@ -2763,8 +2771,9 @@ func (et *endpointTracker) removeExpiredLocked(now time.Time) {
}
var (
metricNumPeers = clientmetric.NewGauge("magicsock_netmap_num_peers")
metricNumDERPConns = clientmetric.NewGauge("magicsock_num_derp_conns")
metricNumPeers = clientmetric.NewGauge("magicsock_netmap_num_peers")
metricNumDERPConns = clientmetric.NewGauge("magicsock_num_derp_conns")
metricHighestPeerMTU = clientmetric.NewGauge("magicsock_highest_peer_mtu")
metricRebindCalls = clientmetric.NewCounter("magicsock_rebind_calls")
metricReSTUNCalls = clientmetric.NewCounter("magicsock_restun_calls")

View File

@@ -1627,10 +1627,13 @@ func TestEndpointSetsEqual(t *testing.T) {
func TestBetterAddr(t *testing.T) {
const ms = time.Millisecond
al := func(ipps string, d time.Duration) addrLatency {
return addrLatency{netip.MustParseAddrPort(ipps), d}
al := func(ipps string, d time.Duration) addrQuality {
return addrQuality{AddrPort: netip.MustParseAddrPort(ipps), latency: d}
}
zero := addrLatency{}
almtu := func(ipps string, d time.Duration, mtu int) addrQuality {
return addrQuality{AddrPort: netip.MustParseAddrPort(ipps), latency: d, mtu: mtu}
}
zero := addrQuality{}
const (
publicV4 = "1.2.3.4:555"
@@ -1641,7 +1644,7 @@ func TestBetterAddr(t *testing.T) {
)
tests := []struct {
a, b addrLatency
a, b addrQuality
want bool // whether a is better than b
}{
{a: zero, b: zero, want: false},
@@ -1703,7 +1706,12 @@ func TestBetterAddr(t *testing.T) {
b: al(publicV6, 100*ms),
want: true,
},
// If addresses are equal, prefer larger MTU
{
a: almtu(publicV4, 30*ms, 1500),
b: almtu(publicV4, 30*ms, 0),
want: true,
},
// Private IPs are preferred over public IPs even if the public
// IP is IPv6.
{

View File

@@ -150,6 +150,7 @@ const nicID = 1
// maxUDPPacketSize is the maximum size of a UDP packet we copy in startPacketCopy
// when relaying UDP packets. We don't use the 'mtu' const in anticipation of
// one day making the MTU more dynamic.
// TODO: make this bigger
const maxUDPPacketSize = 1500
// Create creates and populates a new Impl.
@@ -178,7 +179,7 @@ func Create(logf logger.Logf, tundev *tstun.Wrapper, e wgengine.Engine, mc *magi
if tcpipErr != nil {
return nil, fmt.Errorf("could not enable TCP SACK: %v", tcpipErr)
}
linkEP := channel.New(512, tstun.DefaultMTU(), "")
linkEP := channel.New(512, tstun.TunMTU(), "")
if tcpipProblem := ipstack.CreateNIC(nicID, linkEP); tcpipProblem != nil {
return nil, fmt.Errorf("could not create netstack NIC: %v", tcpipProblem)
}
@@ -1043,7 +1044,7 @@ func (ns *Impl) acceptUDP(r *udp.ForwarderRequest) {
func (ns *Impl) handleMagicDNSUDP(srcAddr netip.AddrPort, c *gonet.UDPConn) {
// In practice, implementations are advised not to exceed 512 bytes
// due to fragmenting. Just to be sure, we bump all the way to the MTU.
var maxUDPReqSize = tstun.DefaultMTU()
var maxUDPReqSize = tstun.TunMTU()
// Packets are being generated by the local host, so there should be
// very, very little latency. 150ms was chosen as something of an upper
// bound on resource usage, while hopefully still being long enough for

View File

@@ -241,7 +241,7 @@ func interfaceFromLUID(luid winipcfg.LUID, flags winipcfg.GAAFlags) (*winipcfg.I
var networkCategoryWarning = health.NewWarnable(health.WithMapDebugFlag("warn-network-category-unhealthy"))
func configureInterface(cfg *Config, tun *tun.NativeTun) (retErr error) {
var mtu = tstun.DefaultMTU()
var mtu = tstun.TunMTU()
luid := winipcfg.LUID(tun.LUID())
iface, err := interfaceFromLUID(luid,
// Issue 474: on early boot, when the network is still

View File

@@ -7,6 +7,7 @@ import (
"bufio"
"context"
crand "crypto/rand"
"encoding/binary"
"errors"
"fmt"
"io"
@@ -455,6 +456,60 @@ func echoRespondToAll(p *packet.Parsed, t *tstun.Wrapper) filter.Response {
return filter.Accept
}
var debugPMTUD = envknob.RegisterBool("TS_DEBUG_PMTUD")
func (e *userspaceEngine) injectICMPPTB(p *packet.Parsed, mtu int) {
var icmph packet.Header
var payload []byte
if p.Src.Addr().Is4() {
// From https://www.ietf.org/rfc/rfc1191.html#section-4
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | Type | Code | Checksum |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | unused = 0 | MTU |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | Internet Header + 64 bits of Original Datagram Data |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
icmph = packet.ICMP4Header{
IP4Header: packet.IP4Header{
IPProto: ipproto.ICMPv4,
Src: p.Dst.Addr(),
Dst: p.Src.Addr(),
},
Type: packet.ICMP4Unreachable,
Code: packet.ICMP4FragmentationNeeded,
}
payload = make([]byte, 4+20+8)
binary.BigEndian.PutUint32(payload, uint32(mtu))
copy(payload[4:], p.Buffer()[:len(payload)-4])
} else {
// https://www.ietf.org/rfc/rfc4443.html#section-3.2
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | Type | Code | Checksum |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | MTU |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | As much of invoking packet |
// + as possible without the ICMPv6 packet +
// | exceeding the minimum IPv6 MTU [IPv6] |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
icmph = packet.ICMP6Header{
IP6Header: packet.IP6Header{
IPProto: ipproto.ICMPv6,
Src: p.Dst.Addr(),
Dst: p.Src.Addr(),
},
Type: packet.ICMP6PacketTooBig,
Code: packet.ICMP6NoCode,
}
// RFC says add as much of invoking packet, but headers should be enough.
payload = make([]byte, 4+40+8)
binary.BigEndian.PutUint32(payload, uint32(mtu))
copy(payload[4:], p.Buffer()[:len(payload)-4])
}
e.tundev.InjectInboundCopy(packet.Generate(icmph, payload))
}
// handleLocalPackets inspects packets coming from the local network
// stack, and intercepts any packets that should be handled by
// tailscaled directly. Other packets are allowed to proceed into the
@@ -476,6 +531,21 @@ func (e *userspaceEngine) handleLocalPackets(p *packet.Parsed, t *tstun.Wrapper)
}
}
if debugPMTUD() {
var tailscaleOverhead = 20 + 8 + 32 // IP + UDP + WireGuard
if p.IPVersion == 6 {
tailscaleOverhead += 20
}
// TODO what if it is not an IP packet? That is, p.IPVersion == 0
// TODO consts to avoid numbers.
pmtu := e.magicConn.PathMTU(p.Dst.Addr())
if len(p.Buffer())+tailscaleOverhead > pmtu {
e.logf("\n\n\n PACKET TOO BIG should be %v\n\n\n", pmtu)
e.injectICMPPTB(p, pmtu)
return filter.Drop
}
}
return filter.Accept
}
@@ -1211,7 +1281,7 @@ func (e *userspaceEngine) UpdateStatus(sb *ipnstate.StatusBuilder) {
e.magicConn.UpdateStatus(sb)
}
func (e *userspaceEngine) Ping(ip netip.Addr, pingType tailcfg.PingType, cb func(*ipnstate.PingResult)) {
func (e *userspaceEngine) Ping(ip netip.Addr, pingType tailcfg.PingType, size int, cb func(*ipnstate.PingResult)) {
res := &ipnstate.PingResult{IP: ip.String()}
pip, ok := e.PeerForIP(ip)
if !ok {
@@ -1228,10 +1298,10 @@ func (e *userspaceEngine) Ping(ip netip.Addr, pingType tailcfg.PingType, cb func
}
peer := pip.Node
e.logf("ping(%v): sending %v ping to %v %v ...", ip, pingType, peer.Key.ShortString(), peer.ComputedName)
e.logf("ping(%v): sending %v ping payload size %v to %v %v ...", ip, pingType, size, peer.Key.ShortString(), peer.ComputedName)
switch pingType {
case "disco":
e.magicConn.Ping(peer, res, cb)
e.magicConn.Ping(peer, res, size, cb)
case "TSMP":
e.sendTSMPPing(ip, peer, res, cb)
case "ICMP":

View File

@@ -158,8 +158,8 @@ func (e *watchdogEngine) DiscoPublicKey() (k key.DiscoPublic) {
e.watchdog("DiscoPublicKey", func() { k = e.wrap.DiscoPublicKey() })
return k
}
func (e *watchdogEngine) Ping(ip netip.Addr, pingType tailcfg.PingType, cb func(*ipnstate.PingResult)) {
e.watchdog("Ping", func() { e.wrap.Ping(ip, pingType, cb) })
func (e *watchdogEngine) Ping(ip netip.Addr, pingType tailcfg.PingType, size int, cb func(*ipnstate.PingResult)) {
e.watchdog("Ping", func() { e.wrap.Ping(ip, pingType, size, cb) })
}
func (e *watchdogEngine) RegisterIPPortIdentity(ipp netip.AddrPort, tsIP netip.Addr) {
e.watchdog("RegisterIPPortIdentity", func() { e.wrap.RegisterIPPortIdentity(ipp, tsIP) })

View File

@@ -152,7 +152,7 @@ type Engine interface {
// Ping is a request to start a ping with the peer handling the given IP and
// then call cb with its ping latency & method.
Ping(ip netip.Addr, pingType tailcfg.PingType, cb func(*ipnstate.PingResult))
Ping(ip netip.Addr, pingType tailcfg.PingType, size int, cb func(*ipnstate.PingResult))
// RegisterIPPortIdentity registers a given node (identified by its
// Tailscale IP) as temporarily having the given IP:port for whois lookups.