Compare commits
4 Commits
awly/cli-j
...
s/pmtud
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
737a21f078 | ||
|
|
49f24034ea | ||
|
|
0cdb85dff0 | ||
|
|
69bdf2f2c5 |
@@ -809,10 +809,12 @@ func (lc *LocalClient) ExpandSNIName(ctx context.Context, name string) (fqdn str
|
||||
|
||||
// Ping sends a ping of the provided type to the provided IP and waits
|
||||
// for its response.
|
||||
func (lc *LocalClient) Ping(ctx context.Context, ip netip.Addr, pingtype tailcfg.PingType) (*ipnstate.PingResult, error) {
|
||||
func (lc *LocalClient) Ping(ctx context.Context, ip netip.Addr, pingtype tailcfg.PingType, mtu int) (*ipnstate.PingResult, error) {
|
||||
v := url.Values{}
|
||||
v.Set("ip", ip.String())
|
||||
v.Set("mtu", strconv.Itoa(mtu))
|
||||
v.Set("type", string(pingtype))
|
||||
// XXX new api or whatnot
|
||||
body, err := lc.send(ctx, "POST", "/localapi/v0/ping?"+v.Encode(), 200, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error %w: %s", err, body)
|
||||
|
||||
@@ -53,12 +53,14 @@ relay node.
|
||||
fs.BoolVar(&pingArgs.peerAPI, "peerapi", false, "try hitting the peer's peerapi HTTP server")
|
||||
fs.IntVar(&pingArgs.num, "c", 10, "max number of pings to send. 0 for infinity.")
|
||||
fs.DurationVar(&pingArgs.timeout, "timeout", 5*time.Second, "timeout before giving up on a ping")
|
||||
fs.IntVar(&pingArgs.mtu, "mtu", 0, "send a packet with this many bytes total")
|
||||
return fs
|
||||
})(),
|
||||
}
|
||||
|
||||
var pingArgs struct {
|
||||
num int
|
||||
mtu int
|
||||
untilDirect bool
|
||||
verbose bool
|
||||
tsmp bool
|
||||
@@ -115,7 +117,7 @@ func runPing(ctx context.Context, args []string) error {
|
||||
for {
|
||||
n++
|
||||
ctx, cancel := context.WithTimeout(ctx, pingArgs.timeout)
|
||||
pr, err := localClient.Ping(ctx, netip.MustParseAddr(ip), pingType())
|
||||
pr, err := localClient.Ping(ctx, netip.MustParseAddr(ip), pingType(), pingArgs.mtu)
|
||||
cancel()
|
||||
if err != nil {
|
||||
if errors.Is(err, context.DeadlineExceeded) {
|
||||
|
||||
@@ -170,7 +170,7 @@ type ControlDialPlanner interface {
|
||||
// Pinger is the LocalBackend.Ping method.
|
||||
type Pinger interface {
|
||||
// Ping is a request to do a ping with the peer handling the given IP.
|
||||
Ping(ctx context.Context, ip netip.Addr, pingType tailcfg.PingType) (*ipnstate.PingResult, error)
|
||||
Ping(ctx context.Context, ip netip.Addr, pingType tailcfg.PingType, mtu int) (*ipnstate.PingResult, error)
|
||||
}
|
||||
|
||||
type Decompressor interface {
|
||||
@@ -1670,7 +1670,7 @@ func doPingerPing(logf logger.Logf, c *http.Client, pr *tailcfg.PingRequest, pin
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
res, err := pinger.Ping(ctx, pr.IP, pingType)
|
||||
res, err := pinger.Ping(ctx, pr.IP, pingType, 0)
|
||||
if err != nil {
|
||||
d := time.Since(start).Round(time.Millisecond)
|
||||
logf("doPingerPing: ping error of type %q to %v after %v: %v", pingType, pr.IP, d, err)
|
||||
|
||||
@@ -119,6 +119,8 @@ type Ping struct {
|
||||
NodeKey key.NodePublic
|
||||
}
|
||||
|
||||
const PingLen = 12 + key.NodePublicRawLen
|
||||
|
||||
func (m *Ping) AppendMarshal(b []byte) []byte {
|
||||
dataLen := 12
|
||||
hasKey := !m.NodeKey.IsZero()
|
||||
@@ -214,10 +216,10 @@ type Pong struct {
|
||||
Src netip.AddrPort // 18 bytes (16+2) on the wire; v4-mapped ipv6 for IPv4
|
||||
}
|
||||
|
||||
const pongLen = 12 + 16 + 2
|
||||
const PongLen = 12 + 16 + 2
|
||||
|
||||
func (m *Pong) AppendMarshal(b []byte) []byte {
|
||||
ret, d := appendMsgHeader(b, TypePong, v0, pongLen)
|
||||
ret, d := appendMsgHeader(b, TypePong, v0, PongLen)
|
||||
d = d[copy(d, m.TxID[:]):]
|
||||
ip16 := m.Src.Addr().As16()
|
||||
d = d[copy(d, ip16[:]):]
|
||||
@@ -226,7 +228,7 @@ func (m *Pong) AppendMarshal(b []byte) []byte {
|
||||
}
|
||||
|
||||
func parsePong(ver uint8, p []byte) (m *Pong, err error) {
|
||||
if len(p) < pongLen {
|
||||
if len(p) < PongLen {
|
||||
return nil, errShort
|
||||
}
|
||||
m = new(Pong)
|
||||
|
||||
@@ -2395,7 +2395,7 @@ func (b *LocalBackend) StartLoginInteractive() {
|
||||
}
|
||||
}
|
||||
|
||||
func (b *LocalBackend) Ping(ctx context.Context, ip netip.Addr, pingType tailcfg.PingType) (*ipnstate.PingResult, error) {
|
||||
func (b *LocalBackend) Ping(ctx context.Context, ip netip.Addr, pingType tailcfg.PingType, mtu int) (*ipnstate.PingResult, error) {
|
||||
if pingType == tailcfg.PingPeerAPI {
|
||||
t0 := time.Now()
|
||||
node, base, err := b.pingPeerAPI(ctx, ip)
|
||||
@@ -2423,7 +2423,7 @@ func (b *LocalBackend) Ping(ctx context.Context, ip netip.Addr, pingType tailcfg
|
||||
case ch <- pr:
|
||||
default:
|
||||
}
|
||||
})
|
||||
}, mtu)
|
||||
select {
|
||||
case pr := <-ch:
|
||||
return pr, nil
|
||||
|
||||
@@ -1335,7 +1335,17 @@ func (h *Handler) servePing(w http.ResponseWriter, r *http.Request) {
|
||||
http.Error(w, "missing 'type' parameter", 400)
|
||||
return
|
||||
}
|
||||
res, err := h.b.Ping(ctx, ip, tailcfg.PingType(pingTypeStr))
|
||||
mtuStr := r.FormValue("mtu")
|
||||
if mtuStr == "" {
|
||||
// XXX old api didn't include this arg
|
||||
mtuStr = "0"
|
||||
}
|
||||
mtu, err := strconv.Atoi(mtuStr)
|
||||
if err != nil {
|
||||
http.Error(w, "invalid 'mtu' parameter", 400)
|
||||
return
|
||||
}
|
||||
res, err := h.b.Ping(ctx, ip, tailcfg.PingType(pingTypeStr), mtu)
|
||||
if err != nil {
|
||||
writeErrorJSON(w, err)
|
||||
return
|
||||
|
||||
@@ -289,6 +289,9 @@ type State struct {
|
||||
|
||||
// PAC is the URL to the Proxy Autoconfig URL, if applicable.
|
||||
PAC string
|
||||
|
||||
// MaxMTU is the largest MTU of the available usable interfaces
|
||||
MaxMTU int
|
||||
}
|
||||
|
||||
func (s *State) String() string {
|
||||
@@ -524,6 +527,9 @@ func GetState() (*State, error) {
|
||||
}
|
||||
s.HaveV6 = s.HaveV6 || isUsableV6(pfx.Addr())
|
||||
s.HaveV4 = s.HaveV4 || isUsableV4(pfx.Addr())
|
||||
if ni.MTU > s.MaxMTU {
|
||||
s.MaxMTU = ni.MTU
|
||||
}
|
||||
}
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
|
||||
@@ -45,7 +45,8 @@ func (t ICMP4Type) String() string {
|
||||
type ICMP4Code uint8
|
||||
|
||||
const (
|
||||
ICMP4NoCode ICMP4Code = 0
|
||||
ICMP4NoCode ICMP4Code = 0x00
|
||||
ICMP4FragmentationNeeded = 0x04
|
||||
)
|
||||
|
||||
// ICMP4Header is an IPv4+ICMPv4 header.
|
||||
|
||||
@@ -20,6 +20,7 @@ type ICMP6Type uint8
|
||||
|
||||
const (
|
||||
ICMP6Unreachable ICMP6Type = 1
|
||||
ICMP6PacketTooBig ICMP6Type = 2
|
||||
ICMP6TimeExceeded ICMP6Type = 3
|
||||
ICMP6EchoRequest ICMP6Type = 128
|
||||
ICMP6EchoReply ICMP6Type = 129
|
||||
|
||||
@@ -239,7 +239,7 @@ func TestConn(t *testing.T) {
|
||||
}
|
||||
|
||||
// ping to make sure the connection is up.
|
||||
res, err := lc2.Ping(ctx, s1ip, tailcfg.PingICMP)
|
||||
res, err := lc2.Ping(ctx, s1ip, tailcfg.PingICMP, 0)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
@@ -44,6 +44,8 @@ var (
|
||||
// debugSendCallMeUnknownPeer sends a CallMeMaybe to a non-existent destination every
|
||||
// time we send a real CallMeMaybe to test the PeerGoneNotHere logic.
|
||||
debugSendCallMeUnknownPeer = envknob.RegisterBool("TS_DEBUG_SEND_CALLME_UNKNOWN_PEER")
|
||||
// debugPMTUD enables path MTU discovery. Currently only sets the Don't Fragment sockopt.
|
||||
debugPMTUD = envknob.RegisterBool("TS_DEBUG_PMTUD")
|
||||
// Hey you! Adding a new debugknob? Make sure to stub it out in the debugknob_stubs.go
|
||||
// file too.
|
||||
)
|
||||
|
||||
@@ -19,6 +19,7 @@ func debugAlwaysDERP() bool { return false }
|
||||
func debugUseDERPHTTP() bool { return false }
|
||||
func debugEnableSilentDisco() bool { return false }
|
||||
func debugSendCallMeUnknownPeer() bool { return false }
|
||||
func debugPMTUD() bool { return false }
|
||||
func debugUseDERPAddr() string { return "" }
|
||||
func debugUseDerpRouteEnv() string { return "" }
|
||||
func debugUseDerpRoute() opt.Bool { return "" }
|
||||
|
||||
34
wgengine/magicsock/dontfrag_darwin.go
Normal file
34
wgengine/magicsock/dontfrag_darwin.go
Normal file
@@ -0,0 +1,34 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
package magicsock
|
||||
|
||||
import (
|
||||
"net"
|
||||
"syscall"
|
||||
|
||||
"tailscale.com/types/nettype"
|
||||
)
|
||||
|
||||
const (
|
||||
// From https://opensource.apple.com/source/xnu/xnu-6153.141.1/bsd/netinet6/in6.h.auto.html
|
||||
socketOptionIPDontFrag = 28
|
||||
socketOptionIPv6DontFrag = 62
|
||||
)
|
||||
|
||||
func setDontFragment(pconn nettype.PacketConn, network string) (err error) {
|
||||
if c, ok := pconn.(*net.UDPConn); ok {
|
||||
rc, err := c.SyscallConn()
|
||||
if err == nil {
|
||||
rc.Control(func(fd uintptr) {
|
||||
if network == "udp4" {
|
||||
err = syscall.SetsockoptInt(int(fd), syscall.IPPROTO_IP, socketOptionIPDontFrag, 1)
|
||||
}
|
||||
if network == "udp6" {
|
||||
err = syscall.SetsockoptInt(int(fd), syscall.IPPROTO_IPV6, socketOptionIPDontFrag, 1)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
16
wgengine/magicsock/dontfrag_default.go
Normal file
16
wgengine/magicsock/dontfrag_default.go
Normal file
@@ -0,0 +1,16 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
//go:build !linux && !darwin
|
||||
|
||||
package magicsock
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"tailscale.com/types/nettype"
|
||||
)
|
||||
|
||||
func setDontFragment(pconn nettype.PacketConn, network string) (err error) {
|
||||
return errors.New("setting don't fragment bit not supported on this OS")
|
||||
}
|
||||
28
wgengine/magicsock/dontfrag_linux.go
Normal file
28
wgengine/magicsock/dontfrag_linux.go
Normal file
@@ -0,0 +1,28 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
package magicsock
|
||||
|
||||
import (
|
||||
"net"
|
||||
"syscall"
|
||||
|
||||
"tailscale.com/types/nettype"
|
||||
)
|
||||
|
||||
func setDontFragment(pconn nettype.PacketConn, network string) (err error) {
|
||||
if c, ok := pconn.(*net.UDPConn); ok {
|
||||
rc, err := c.SyscallConn()
|
||||
if err == nil {
|
||||
rc.Control(func(fd uintptr) {
|
||||
if network == "udp4" {
|
||||
err = syscall.SetsockoptInt(int(fd), syscall.IPPROTO_IP, syscall.IP_MTU_DISCOVER, syscall.IP_PMTUDISC_DO)
|
||||
}
|
||||
if network == "udp6" {
|
||||
err = syscall.SetsockoptInt(int(fd), syscall.IPPROTO_IPV6, syscall.IP_MTU_DISCOVER, syscall.IP_PMTUDISC_DO)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
@@ -56,6 +56,7 @@ import (
|
||||
"tailscale.com/net/sockstats"
|
||||
"tailscale.com/net/stun"
|
||||
"tailscale.com/net/tsaddr"
|
||||
"tailscale.com/net/tstun"
|
||||
"tailscale.com/syncs"
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/tstime"
|
||||
@@ -88,8 +89,26 @@ const (
|
||||
// is the max supported by a default configuration of macOS. Some platforms
|
||||
// will silently clamp the value.
|
||||
socketBufferSize = 7 << 20
|
||||
|
||||
// Various things needed to calculate the effective MTU
|
||||
udpHeaderLen = 8 // Not defined anywhere unlike IP header length
|
||||
// Disco header
|
||||
discoHeaderLen = len(disco.Magic) + key.DiscoPublicRawLen
|
||||
boxHeaderLen = 40 // Header added by encrypting the naclbox
|
||||
)
|
||||
|
||||
// Pad the content of the message to get the requested on-the-wire
|
||||
// packet size. We have to include the padding as part of the
|
||||
// encrypted message or it gets truncated. So calculate the total
|
||||
// message size and subtract all the various headers.
|
||||
//
|
||||
// Parts of a packet:
|
||||
// IP header
|
||||
// UDP header
|
||||
// disco header
|
||||
// encryption overhead
|
||||
// message contents
|
||||
|
||||
// useDerpRoute reports whether magicsock should enable the DERP
|
||||
// return path optimization (Issue 150).
|
||||
func useDerpRoute() bool {
|
||||
@@ -1007,7 +1026,7 @@ func (c *Conn) LastRecvActivityOfNodeKey(nk key.NodePublic) string {
|
||||
}
|
||||
|
||||
// Ping handles a "tailscale ping" CLI query.
|
||||
func (c *Conn) Ping(peer *tailcfg.Node, res *ipnstate.PingResult, cb func(*ipnstate.PingResult)) {
|
||||
func (c *Conn) Ping(peer *tailcfg.Node, res *ipnstate.PingResult, cb func(*ipnstate.PingResult), mtu int) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
if c.privateKey.IsZero() {
|
||||
@@ -1031,7 +1050,7 @@ func (c *Conn) Ping(peer *tailcfg.Node, res *ipnstate.PingResult, cb func(*ipnst
|
||||
cb(res)
|
||||
return
|
||||
}
|
||||
ep.cliPing(res, cb)
|
||||
ep.cliPing(res, cb, mtu)
|
||||
}
|
||||
|
||||
// c.mu must be held
|
||||
@@ -2056,13 +2075,39 @@ const (
|
||||
// speeds.
|
||||
var debugIPv4DiscoPingPenalty = envknob.RegisterDuration("TS_DISCO_PONG_IPV4_DELAY")
|
||||
|
||||
// usefulMtus are the set of likely on-the-wire MTUs (including all the
|
||||
// layers of protocal headers above link layer)
|
||||
//
|
||||
// Each ping is kicked off with a separate go routine and they seem to
|
||||
// be run in approximately LIFO order, so if we want the biggest ping
|
||||
// to arrive first put it last?
|
||||
//
|
||||
// Debugging tips:
|
||||
//
|
||||
// To get exactly ONE ping of the desired MTU, do the following:
|
||||
//
|
||||
// 1. Set the below array to a single 0.
|
||||
// 2. Disable disco heartbeat with TS_DEBUG_ENABLE_SILENT_DISCO=1
|
||||
// 3. Send a single ping with ./tool/go run ./cmd/tailscale ping --mtu=1000 <host>
|
||||
|
||||
var usefulMtus = [...]int{
|
||||
//0,
|
||||
576, // Smallest MTU for IPv4, probably useless?
|
||||
1124, // An observed max mtu in the wild, maybe 1100 instead?
|
||||
1280, // Smallest MTU for IPv6, current default
|
||||
1400, // A little less, for tunnels or such
|
||||
1500, // Most common real world MTU
|
||||
8000, // Some jumbo frames are this size
|
||||
9000, // Most jumbo frames are this size or slightly larger
|
||||
}
|
||||
|
||||
// sendDiscoMessage sends discovery message m to dstDisco at dst.
|
||||
//
|
||||
// If dst is a DERP IP:port, then dstKey must be non-zero.
|
||||
//
|
||||
// The dstKey should only be non-zero if the dstDisco key
|
||||
// unambiguously maps to exactly one peer.
|
||||
func (c *Conn) sendDiscoMessage(dst netip.AddrPort, dstKey key.NodePublic, dstDisco key.DiscoPublic, m disco.Message, logLevel discoLogLevel) (sent bool, err error) {
|
||||
func (c *Conn) sendDiscoMessage(dst netip.AddrPort, dstKey key.NodePublic, dstDisco key.DiscoPublic, m disco.Message, logLevel discoLogLevel, mtu int) (sent bool, err error) {
|
||||
isDERP := dst.Addr() == derpMagicIPAddr
|
||||
if _, isPong := m.(*disco.Pong); isPong && !isDERP && dst.Addr().Is4() {
|
||||
time.Sleep(debugIPv4DiscoPingPenalty())
|
||||
@@ -2077,9 +2122,15 @@ func (c *Conn) sendDiscoMessage(dst netip.AddrPort, dstKey key.NodePublic, dstDi
|
||||
if _, err := crand.Read(nonce[:]); err != nil {
|
||||
panic(err) // worth dying for
|
||||
}
|
||||
pkt := make([]byte, 0, 512) // TODO: size it correctly? pool? if it matters.
|
||||
|
||||
bufSize := 512
|
||||
if mtu != 0 {
|
||||
bufSize = mtu
|
||||
}
|
||||
pkt := make([]byte, 0, bufSize)
|
||||
pkt = append(pkt, disco.Magic...)
|
||||
pkt = c.discoPublic.AppendTo(pkt)
|
||||
c.logf("disco header %v", len(pkt))
|
||||
di := c.discoInfoLocked(dstDisco)
|
||||
c.mu.Unlock()
|
||||
|
||||
@@ -2089,8 +2140,41 @@ func (c *Conn) sendDiscoMessage(dst netip.AddrPort, dstKey key.NodePublic, dstDi
|
||||
metricSendDiscoUDP.Add(1)
|
||||
}
|
||||
|
||||
box := di.sharedKey.Seal(m.AppendMarshal(nil))
|
||||
// pm needs to be padded out to hit our mtu goal
|
||||
//
|
||||
// IP adds 20/40 bytes to the packet
|
||||
// UDP adds 8 bytes
|
||||
// Disco header is len(pkt) = 6
|
||||
// Disco ping part of the messsage is len(pm)
|
||||
//
|
||||
// Seal adds 40 bytes to the input
|
||||
|
||||
pm := m.AppendMarshal(nil)
|
||||
c.logf("marshaled message %v", len(pm))
|
||||
|
||||
if mtu != 0 {
|
||||
ipHeaderLen := ipv4.HeaderLen
|
||||
if dst.Addr().Is4() {
|
||||
ipHeaderLen = ipv6.HeaderLen
|
||||
}
|
||||
headerLen := ipHeaderLen + udpHeaderLen + boxHeaderLen
|
||||
c.logf("IP/UDP/box headers %v", headerLen)
|
||||
pad := mtu - headerLen - len(pkt) - len(pm)
|
||||
c.logf("pad %v", pad)
|
||||
// XXX This is where we add 20 bytes to hit our MTU
|
||||
// goal. This means that we left out 20 bytes of
|
||||
// header or message in our calculation above.
|
||||
pad += 20
|
||||
c.logf("corrected pad %v", pad)
|
||||
// Extend our buffer to the padded size
|
||||
pm = append(pm, make([]byte, pad, pad)...)
|
||||
c.logf("padded message %v", len(pm))
|
||||
}
|
||||
// Seal adds 40 bytes to the message (accounted for above)
|
||||
box := di.sharedKey.Seal(pm)
|
||||
c.logf("box %v (should be 40 more than %v)", len(box), len(pm))
|
||||
pkt = append(pkt, box...)
|
||||
c.logf("pkt %v", len(pkt))
|
||||
sent, err = c.sendAddr(dst, dstKey, pkt)
|
||||
if sent {
|
||||
if logLevel == discoLog || (logLevel == discoVerboseLog && debugDisco()) {
|
||||
@@ -2098,7 +2182,7 @@ func (c *Conn) sendDiscoMessage(dst netip.AddrPort, dstKey key.NodePublic, dstDi
|
||||
if !dstKey.IsZero() {
|
||||
node = dstKey.ShortString()
|
||||
}
|
||||
c.dlogf("[v1] magicsock: disco: %v->%v (%v, %v) sent %v", c.discoShort, dstDisco.ShortString(), node, derpStr(dst.String()), disco.MessageSummary(m))
|
||||
c.logf("magicsock: disco: %v->%v (%v, %v) sent %v len %v\n", c.discoShort, dstDisco.ShortString(), node, derpStr(dst.String()), disco.MessageSummary(m), len(pkt))
|
||||
}
|
||||
if isDERP {
|
||||
metricSentDiscoDERP.Add(1)
|
||||
@@ -2168,24 +2252,24 @@ const (
|
||||
// - senderDiscoPubKey [32]byte
|
||||
// - nonce [24]byte
|
||||
// - naclbox of payload (see tailscale.com/disco package for inner payload format)
|
||||
// - optionally, zero padding to test out the MTU
|
||||
//
|
||||
// For messages received over DERP, the src.Addr() will be derpMagicIP (with
|
||||
// src.Port() being the region ID) and the derpNodeSrc will be the node key
|
||||
// it was received from at the DERP layer. derpNodeSrc is zero when received
|
||||
// over UDP.
|
||||
func (c *Conn) handleDiscoMessage(msg []byte, src netip.AddrPort, derpNodeSrc key.NodePublic, via discoRXPath) (isDiscoMsg bool) {
|
||||
const headerLen = len(disco.Magic) + key.DiscoPublicRawLen
|
||||
if len(msg) < headerLen || string(msg[:len(disco.Magic)]) != disco.Magic {
|
||||
msgLen := len(msg)
|
||||
if msgLen < discoHeaderLen || string(msg[:len(disco.Magic)]) != disco.Magic {
|
||||
return false
|
||||
}
|
||||
|
||||
// If the first four parts are the prefix of disco.Magic
|
||||
// (0x5453f09f) then it's definitely not a valid WireGuard
|
||||
// packet (which starts with little-endian uint32 1, 2, 3, 4).
|
||||
// Use naked returns for all following paths.
|
||||
isDiscoMsg = true
|
||||
|
||||
sender := key.DiscoPublicFromRaw32(mem.B(msg[len(disco.Magic):headerLen]))
|
||||
sender := key.DiscoPublicFromRaw32(mem.B(msg[len(disco.Magic):discoHeaderLen]))
|
||||
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
@@ -2194,7 +2278,7 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netip.AddrPort, derpNodeSrc ke
|
||||
return
|
||||
}
|
||||
if debugDisco() {
|
||||
c.logf("magicsock: disco: got disco-looking frame from %v via %s", sender.ShortString(), via)
|
||||
c.logf("magicsock: disco: got disco-looking frame from %v via %s len %v", sender.ShortString(), via, msgLen)
|
||||
}
|
||||
if c.privateKey.IsZero() {
|
||||
// Ignore disco messages when we're stopped.
|
||||
@@ -2217,7 +2301,7 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netip.AddrPort, derpNodeSrc ke
|
||||
|
||||
di := c.discoInfoLocked(sender)
|
||||
|
||||
sealedBox := msg[headerLen:]
|
||||
sealedBox := msg[discoHeaderLen:]
|
||||
payload, ok := di.sharedKey.Open(sealedBox)
|
||||
if !ok {
|
||||
// This might be have been intended for a previous
|
||||
@@ -2267,14 +2351,14 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netip.AddrPort, derpNodeSrc ke
|
||||
switch dm := dm.(type) {
|
||||
case *disco.Ping:
|
||||
metricRecvDiscoPing.Add(1)
|
||||
c.handlePingLocked(dm, src, di, derpNodeSrc)
|
||||
c.handlePingLocked(dm, src, di, derpNodeSrc, msgLen)
|
||||
case *disco.Pong:
|
||||
metricRecvDiscoPong.Add(1)
|
||||
// There might be multiple nodes for the sender's DiscoKey.
|
||||
// Ask each to handle it, stopping once one reports that
|
||||
// the Pong's TxID was theirs.
|
||||
c.peerMap.forEachEndpointWithDiscoKey(sender, func(ep *endpoint) (keepGoing bool) {
|
||||
if ep.handlePongConnLocked(dm, di, src) {
|
||||
if ep.handlePongConnLocked(dm, di, src, msgLen) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
@@ -2351,7 +2435,8 @@ func (c *Conn) unambiguousNodeKeyOfPingLocked(dm *disco.Ping, dk key.DiscoPublic
|
||||
|
||||
// di is the discoInfo of the source of the ping.
|
||||
// derpNodeSrc is non-zero if the ping arrived via DERP.
|
||||
func (c *Conn) handlePingLocked(dm *disco.Ping, src netip.AddrPort, di *discoInfo, derpNodeSrc key.NodePublic) {
|
||||
// msgLen is the length of the disco message
|
||||
func (c *Conn) handlePingLocked(dm *disco.Ping, src netip.AddrPort, di *discoInfo, derpNodeSrc key.NodePublic, msgLen int) {
|
||||
likelyHeartBeat := src == di.lastPingFrom && time.Since(di.lastPingTime) < 5*time.Second
|
||||
di.lastPingFrom = src
|
||||
di.lastPingTime = time.Now()
|
||||
@@ -2380,16 +2465,36 @@ func (c *Conn) handlePingLocked(dm *disco.Ping, src netip.AddrPort, di *discoInf
|
||||
// Remember this route if not present.
|
||||
var numNodes int
|
||||
var dup bool
|
||||
// Find the on-the-wire MTU. In this case, msgLen is the
|
||||
// length of the entire disco packet, so everything except the
|
||||
// IP and UDP headers.
|
||||
mtu := 0
|
||||
// msgLen for an ordinary ping is 124
|
||||
// IPv4: 20
|
||||
// UDP: 8
|
||||
// Disco header: 38
|
||||
// boxHeaderLen: 40
|
||||
// PingLen: 44
|
||||
// ???: 2
|
||||
if msgLen > discoHeaderLen+boxHeaderLen+disco.PingLen+2 {
|
||||
pad := ipv4.HeaderLen
|
||||
if src.Addr().Is6() {
|
||||
pad = ipv6.HeaderLen
|
||||
}
|
||||
pad += udpHeaderLen
|
||||
mtu = msgLen + pad
|
||||
c.logf("adding %v bytes to msgLen %v for mtu %v", pad, msgLen, mtu)
|
||||
}
|
||||
if isDerp {
|
||||
if ep, ok := c.peerMap.endpointForNodeKey(derpNodeSrc); ok {
|
||||
if ep.addCandidateEndpoint(src, dm.TxID) {
|
||||
if ep.addCandidateEndpoint(src, dm.TxID, 0) {
|
||||
return
|
||||
}
|
||||
numNodes = 1
|
||||
}
|
||||
} else {
|
||||
c.peerMap.forEachEndpointWithDiscoKey(di.discoKey, func(ep *endpoint) (keepGoing bool) {
|
||||
if ep.addCandidateEndpoint(src, dm.TxID) {
|
||||
if ep.addCandidateEndpoint(src, dm.TxID, mtu) {
|
||||
dup = true
|
||||
return false
|
||||
}
|
||||
@@ -2427,7 +2532,7 @@ func (c *Conn) handlePingLocked(dm *disco.Ping, src netip.AddrPort, di *discoInf
|
||||
go c.sendDiscoMessage(ipDst, dstKey, discoDest, &disco.Pong{
|
||||
TxID: dm.TxID,
|
||||
Src: src,
|
||||
}, discoVerboseLog)
|
||||
}, discoVerboseLog, mtu)
|
||||
}
|
||||
|
||||
// enqueueCallMeMaybe schedules a send of disco.CallMeMaybe to de via derpAddr
|
||||
@@ -2469,12 +2574,12 @@ func (c *Conn) enqueueCallMeMaybe(derpAddr netip.AddrPort, de *endpoint) {
|
||||
for _, ep := range c.lastEndpoints {
|
||||
eps = append(eps, ep.Addr)
|
||||
}
|
||||
go de.c.sendDiscoMessage(derpAddr, de.publicKey, epDisco.key, &disco.CallMeMaybe{MyNumber: eps}, discoLog)
|
||||
go de.c.sendDiscoMessage(derpAddr, de.publicKey, epDisco.key, &disco.CallMeMaybe{MyNumber: eps}, discoLog, 0)
|
||||
if debugSendCallMeUnknownPeer() {
|
||||
// Send a callMeMaybe packet to a non-existent peer
|
||||
unknownKey := key.NewNode().Public()
|
||||
c.logf("magicsock: sending CallMeMaybe to unknown peer per TS_DEBUG_SEND_CALLME_UNKNOWN_PEER")
|
||||
go de.c.sendDiscoMessage(derpAddr, unknownKey, epDisco.key, &disco.CallMeMaybe{MyNumber: eps}, discoLog)
|
||||
go de.c.sendDiscoMessage(derpAddr, unknownKey, epDisco.key, &disco.CallMeMaybe{MyNumber: eps}, discoLog, 0)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3217,6 +3322,39 @@ func (c *Conn) shouldDoPeriodicReSTUNLocked() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// PathMTU returns the path MTU to the peer at dst (tailscale address)
|
||||
func (c *Conn) PathMTU(dst netip.Addr) int {
|
||||
// TODO(s): this is method is pretty expensive. Reduce lookups before
|
||||
// removing the envknob guard.
|
||||
if !debugPMTUD() {
|
||||
return int(tstun.DefaultMTU())
|
||||
}
|
||||
|
||||
peer, ok := c.netMap.PeerByTailscaleIP(dst)
|
||||
if !ok {
|
||||
return int(tstun.DefaultMTU())
|
||||
}
|
||||
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
if c.closed {
|
||||
return int(tstun.DefaultMTU())
|
||||
}
|
||||
ep, ok := c.peerMap.endpointForNodeKey(peer.Key)
|
||||
if !ok {
|
||||
return int(tstun.DefaultMTU())
|
||||
}
|
||||
|
||||
now := mono.Now()
|
||||
if !ep.bestAddr.AddrPort.IsValid() || now.After(ep.trustBestAddrUntil) {
|
||||
// We have not done the disco pings yet. ep.send() will kick that off
|
||||
// down the line.
|
||||
return int(tstun.DefaultMTU())
|
||||
}
|
||||
|
||||
return ep.bestAddr.mtu
|
||||
}
|
||||
|
||||
func (c *Conn) onPortMapChanged() { c.ReSTUN("portmap-changed") }
|
||||
|
||||
// ReSTUN triggers an address discovery.
|
||||
@@ -3335,6 +3473,14 @@ func (c *Conn) bindSocket(ruc *RebindingUDPConn, network string, curPortFate cur
|
||||
continue
|
||||
}
|
||||
trySetSocketBuffer(pconn, c.logf)
|
||||
|
||||
if debugPMTUD() {
|
||||
err = setDontFragment(pconn, network)
|
||||
if err != nil {
|
||||
c.logf("magicsock: unable to do path mtu discovery: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Success.
|
||||
if debugBindSocket() {
|
||||
c.logf("magicsock: bindSocket: successfully listened %v port %d", network, port)
|
||||
@@ -4133,7 +4279,7 @@ type endpoint struct {
|
||||
lastFullPing mono.Time // last time we pinged all disco endpoints
|
||||
derpAddr netip.AddrPort // fallback/bootstrap path, if non-zero (non-zero for well-behaved clients)
|
||||
|
||||
bestAddr addrLatency // best non-DERP path; zero if none
|
||||
bestAddr addrQuality // best non-DERP path; zero if none
|
||||
bestAddrAt mono.Time // time best address re-confirmed
|
||||
trustBestAddrUntil mono.Time // time when bestAddr expires
|
||||
sentPing map[stun.TxID]sentPing
|
||||
@@ -4241,6 +4387,7 @@ type endpointState struct {
|
||||
recentPong uint16 // index into recentPongs of most recent; older before, wrapped
|
||||
|
||||
index int16 // index in nodecfg.Node.Endpoints; meaningless if lastGotPing non-zero
|
||||
mtu int // mtu of connection to this endpoint
|
||||
}
|
||||
|
||||
// indexSentinelDeleted is the temporary value that endpointState.index takes while
|
||||
@@ -4283,11 +4430,13 @@ func (de *endpoint) deleteEndpointLocked(why string, ep netip.AddrPort) {
|
||||
What: "deleteEndpointLocked-bestAddr-" + why,
|
||||
From: de.bestAddr,
|
||||
})
|
||||
de.bestAddr = addrLatency{}
|
||||
de.bestAddr = addrQuality{}
|
||||
}
|
||||
}
|
||||
|
||||
// pongHistoryCount is how many pongReply values we keep per endpointState
|
||||
// XXX should maybe make this bigger for mtu stuff
|
||||
// const pongHistoryCount = 64 * len(usefulMtus)
|
||||
const pongHistoryCount = 64
|
||||
|
||||
type pongReply struct {
|
||||
@@ -4295,6 +4444,7 @@ type pongReply struct {
|
||||
pongAt mono.Time // when we received the pong
|
||||
from netip.AddrPort // the pong's src (usually same as endpoint map key)
|
||||
pongSrc netip.AddrPort // what they reported they heard
|
||||
mtu int // total packet size including IP header
|
||||
}
|
||||
|
||||
type sentPing struct {
|
||||
@@ -4302,6 +4452,7 @@ type sentPing struct {
|
||||
at mono.Time
|
||||
timer *time.Timer // timeout timer
|
||||
purpose discoPingPurpose
|
||||
mtu int // total packet size including IP header
|
||||
}
|
||||
|
||||
// initFakeUDPAddr populates fakeWGAddr with a globally unique fake UDPAddr.
|
||||
@@ -4502,7 +4653,7 @@ func (de *endpoint) noteActiveLocked() {
|
||||
|
||||
// cliPing starts a ping for the "tailscale ping" command. res is value to call cb with,
|
||||
// already partially filled.
|
||||
func (de *endpoint) cliPing(res *ipnstate.PingResult, cb func(*ipnstate.PingResult)) {
|
||||
func (de *endpoint) cliPing(res *ipnstate.PingResult, cb func(*ipnstate.PingResult), mtu int) {
|
||||
de.mu.Lock()
|
||||
defer de.mu.Unlock()
|
||||
|
||||
@@ -4524,10 +4675,16 @@ func (de *endpoint) cliPing(res *ipnstate.PingResult, cb func(*ipnstate.PingResu
|
||||
// Otherwise "tailscale ping" results to a node on the local network
|
||||
// can look like they're bouncing between, say 10.0.0.0/9 and the peer's
|
||||
// IPv6 address, both 1ms away, and it's random who replies first.
|
||||
de.startDiscoPingLocked(udpAddr, now, pingCLI)
|
||||
de.startDiscoPingLockedMTU(udpAddr, now, pingCLI, mtu)
|
||||
} else {
|
||||
for ep := range de.endpointState {
|
||||
de.startDiscoPingLocked(ep, now, pingCLI)
|
||||
if mtu == 0 {
|
||||
for _, testMtu := range usefulMtus {
|
||||
de.startDiscoPingLockedMTU(ep, now, pingDiscovery, testMtu)
|
||||
}
|
||||
} else {
|
||||
de.startDiscoPingLockedMTU(ep, now, pingCLI, mtu)
|
||||
}
|
||||
}
|
||||
}
|
||||
de.noteActiveLocked()
|
||||
@@ -4639,11 +4796,11 @@ func (de *endpoint) removeSentDiscoPingLocked(txid stun.TxID, sp sentPing) {
|
||||
//
|
||||
// The caller should use de.discoKey as the discoKey argument.
|
||||
// It is passed in so that sendDiscoPing doesn't need to lock de.mu.
|
||||
func (de *endpoint) sendDiscoPing(ep netip.AddrPort, discoKey key.DiscoPublic, txid stun.TxID, logLevel discoLogLevel) {
|
||||
func (de *endpoint) sendDiscoPing(ep netip.AddrPort, discoKey key.DiscoPublic, txid stun.TxID, logLevel discoLogLevel, mtu int) {
|
||||
sent, _ := de.c.sendDiscoMessage(ep, de.publicKey, discoKey, &disco.Ping{
|
||||
TxID: [12]byte(txid),
|
||||
NodeKey: de.c.publicKeyAtomic.Load(),
|
||||
}, logLevel)
|
||||
}, logLevel, mtu)
|
||||
if !sent {
|
||||
de.forgetDiscoPing(txid)
|
||||
}
|
||||
@@ -4668,6 +4825,10 @@ const (
|
||||
)
|
||||
|
||||
func (de *endpoint) startDiscoPingLocked(ep netip.AddrPort, now mono.Time, purpose discoPingPurpose) {
|
||||
de.startDiscoPingLockedMTU(ep, now, purpose, 0)
|
||||
}
|
||||
|
||||
func (de *endpoint) startDiscoPingLockedMTU(ep netip.AddrPort, now mono.Time, purpose discoPingPurpose, mtu int) {
|
||||
if runtime.GOOS == "js" {
|
||||
return
|
||||
}
|
||||
@@ -4692,12 +4853,13 @@ func (de *endpoint) startDiscoPingLocked(ep netip.AddrPort, now mono.Time, purpo
|
||||
at: now,
|
||||
timer: time.AfterFunc(pingTimeoutDuration, func() { de.discoPingTimeout(txid) }),
|
||||
purpose: purpose,
|
||||
mtu: mtu,
|
||||
}
|
||||
logLevel := discoLog
|
||||
if purpose == pingHeartbeat {
|
||||
logLevel = discoVerboseLog
|
||||
}
|
||||
go de.sendDiscoPing(ep, epDisco.key, txid, logLevel)
|
||||
go de.sendDiscoPing(ep, epDisco.key, txid, logLevel, mtu)
|
||||
}
|
||||
|
||||
func (de *endpoint) sendDiscoPingsLocked(now mono.Time, sendCallMeMaybe bool) {
|
||||
@@ -4721,8 +4883,20 @@ func (de *endpoint) sendDiscoPingsLocked(now mono.Time, sendCallMeMaybe bool) {
|
||||
if firstPing && sendCallMeMaybe {
|
||||
de.c.dlogf("[v1] magicsock: disco: send, starting discovery for %v (%v)", de.publicKey.ShortString(), de.discoShort())
|
||||
}
|
||||
|
||||
de.startDiscoPingLocked(ep, now, pingDiscovery)
|
||||
// Send a bouquet of pings in different sizes to probe
|
||||
// peer mtu, but only if this is not a derp addr.
|
||||
if de.derpAddr.IsValid() {
|
||||
de.startDiscoPingLocked(ep, now, pingDiscovery)
|
||||
} else {
|
||||
for _, mtu := range usefulMtus {
|
||||
// XXX only send pings less than the mtu of the interface used to reach this endpoint
|
||||
de.startDiscoPingLockedMTU(ep, now, pingDiscovery, mtu)
|
||||
// XXX Would be nice to pause a bit to
|
||||
// give the largest mtu an advantage
|
||||
// in returning first, but we're
|
||||
// holding a lock here.
|
||||
}
|
||||
}
|
||||
}
|
||||
derpAddr := de.derpAddr
|
||||
if sentAny && sendCallMeMaybe && derpAddr.IsValid() {
|
||||
@@ -4924,15 +5098,23 @@ func (de *endpoint) updateFromNode(n *tailcfg.Node, heartbeatDisabled bool) {
|
||||
//
|
||||
// This is called once we've already verified that we got a valid
|
||||
// discovery message from de via ep.
|
||||
func (de *endpoint) addCandidateEndpoint(ep netip.AddrPort, forRxPingTxID stun.TxID) (duplicatePing bool) {
|
||||
func (de *endpoint) addCandidateEndpoint(ep netip.AddrPort, forRxPingTxID stun.TxID, mtu int) (duplicatePing bool) {
|
||||
de.mu.Lock()
|
||||
defer de.mu.Unlock()
|
||||
|
||||
// lookup the current end point state
|
||||
if st, ok := de.endpointState[ep]; ok {
|
||||
// oh, we already have this endpoint in our list!
|
||||
// check to see if this is a resend of the same ping as the one we got most recently
|
||||
duplicatePing = forRxPingTxID == st.lastGotPingTxID
|
||||
if !duplicatePing {
|
||||
st.lastGotPingTxID = forRxPingTxID
|
||||
}
|
||||
// If this ping has a higher mtu, update it in the endpoint
|
||||
if mtu > st.mtu {
|
||||
de.c.logf("UPDATING mtu to %v", mtu)
|
||||
st.mtu = mtu
|
||||
}
|
||||
if st.lastGotPing.IsZero() {
|
||||
// Already-known endpoint from the network map.
|
||||
return duplicatePing
|
||||
@@ -4942,10 +5124,12 @@ func (de *endpoint) addCandidateEndpoint(ep netip.AddrPort, forRxPingTxID stun.T
|
||||
}
|
||||
|
||||
// Newly discovered endpoint. Exciting!
|
||||
de.c.dlogf("[v1] magicsock: disco: adding %v as candidate endpoint for %v (%s)", ep, de.discoShort(), de.publicKey.ShortString())
|
||||
de.c.dlogf("[v1] magicsock: disco: adding %v as candidate endpoint for %v (%s) mtu %v", ep, de.discoShort(), de.publicKey.ShortString(), mtu)
|
||||
// Currently this adds a separate endpoint for each test MTU, would be better to update in place
|
||||
de.endpointState[ep] = &endpointState{
|
||||
lastGotPing: time.Now(),
|
||||
lastGotPingTxID: forRxPingTxID,
|
||||
mtu: mtu,
|
||||
}
|
||||
|
||||
// If for some reason this gets very large, do some cleanup.
|
||||
@@ -4975,7 +5159,7 @@ func (de *endpoint) noteConnectivityChange() {
|
||||
// It should be called with the Conn.mu held.
|
||||
//
|
||||
// It reports whether m.TxID corresponds to a ping that this endpoint sent.
|
||||
func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip.AddrPort) (knownTxID bool) {
|
||||
func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip.AddrPort, msgLen int) (knownTxID bool) {
|
||||
de.mu.Lock()
|
||||
defer de.mu.Unlock()
|
||||
|
||||
@@ -4987,6 +5171,30 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
|
||||
return false
|
||||
}
|
||||
knownTxID = true // for naked returns below
|
||||
|
||||
// Find the on-the-wire MTU. In this case, msgLen is the
|
||||
// length of the entire disco packet, so everything except the
|
||||
// IP and UDP headers.
|
||||
mtu := 0
|
||||
// XXX Why is the msgLen 2 bytes longer than the headers plus pong?
|
||||
if msgLen > discoHeaderLen+boxHeaderLen+disco.PongLen+2 {
|
||||
pad := ipv4.HeaderLen
|
||||
if src.Addr().Is6() {
|
||||
pad = ipv6.HeaderLen
|
||||
}
|
||||
pad += udpHeaderLen
|
||||
// XXX Why do we have to add an additional 20 bytes as
|
||||
// well as the IP header and UDP header length to get
|
||||
// the on-the-wire MTU? What is this 20 bytes if it is
|
||||
// not IP header, UDP header, or contents of the disco
|
||||
// message?
|
||||
mtu = msgLen + pad
|
||||
de.c.logf("adding %v bytes to msgLen %v for mtu %v", pad, msgLen, mtu)
|
||||
}
|
||||
// The mtu of the returned pong should be the same as the sent ping
|
||||
if sp.mtu != mtu {
|
||||
de.c.logf("error! pong mtu %v does not match sent ping mtu %v", mtu, sp.mtu)
|
||||
}
|
||||
de.removeSentDiscoPingLocked(m.TxID, sp)
|
||||
|
||||
now := mono.Now()
|
||||
@@ -5006,15 +5214,16 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
|
||||
pongAt: now,
|
||||
from: src,
|
||||
pongSrc: m.Src,
|
||||
mtu: mtu,
|
||||
})
|
||||
}
|
||||
|
||||
if sp.purpose != pingHeartbeat {
|
||||
de.c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got pong tx=%x latency=%v pong.src=%v%v", de.c.discoShort, de.discoShort(), de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), m.Src, logger.ArgWriter(func(bw *bufio.Writer) {
|
||||
de.c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got pong tx=%x latency=%v pong.src=%v%v mtu %v", de.c.discoShort, de.discoShort(), de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), m.Src, logger.ArgWriter(func(bw *bufio.Writer) {
|
||||
if sp.to != src {
|
||||
fmt.Fprintf(bw, " ping.to=%v", sp.to)
|
||||
}
|
||||
}))
|
||||
}), mtu)
|
||||
}
|
||||
|
||||
for _, pp := range de.pendingCLIPings {
|
||||
@@ -5026,9 +5235,9 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
|
||||
// Promote this pong response to our current best address if it's lower latency.
|
||||
// TODO(bradfitz): decide how latency vs. preference order affects decision
|
||||
if !isDerp {
|
||||
thisPong := addrLatency{sp.to, latency}
|
||||
thisPong := addrQuality{sp.to, latency, mtu}
|
||||
if betterAddr(thisPong, de.bestAddr) {
|
||||
de.c.logf("magicsock: disco: node %v %v now using %v", de.publicKey.ShortString(), de.discoShort(), sp.to)
|
||||
de.c.logf("UPDATING MTU magicsock: disco: node %v %v now using %v mtu %v", de.publicKey.ShortString(), de.discoShort(), sp.to, mtu)
|
||||
de.debugUpdates.Add(EndpointChange{
|
||||
When: time.Now(),
|
||||
What: "handlePingLocked-bestAddr-update",
|
||||
@@ -5066,19 +5275,23 @@ func portableTrySetSocketBuffer(pconn nettype.PacketConn, logf logger.Logf) {
|
||||
}
|
||||
}
|
||||
|
||||
// addrLatency is an IPPort with an associated latency.
|
||||
type addrLatency struct {
|
||||
// addrQuality is an IPPort with an associated latency and MTU.
|
||||
type addrQuality struct {
|
||||
netip.AddrPort
|
||||
latency time.Duration
|
||||
mtu int
|
||||
}
|
||||
|
||||
func (a addrLatency) String() string {
|
||||
return a.AddrPort.String() + "@" + a.latency.String()
|
||||
func (a addrQuality) String() string {
|
||||
return a.AddrPort.String() + "@" + a.latency.String() + "+" + strconv.Itoa(a.mtu)
|
||||
}
|
||||
|
||||
// betterAddr reports whether a is a better addr to use than b.
|
||||
func betterAddr(a, b addrLatency) bool {
|
||||
func betterAddr(a, b addrQuality) bool {
|
||||
if a.AddrPort == b.AddrPort {
|
||||
if a.mtu > b.mtu {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
if !b.IsValid() {
|
||||
@@ -5274,7 +5487,7 @@ func (de *endpoint) stopAndReset() {
|
||||
func (de *endpoint) resetLocked() {
|
||||
de.lastSend = 0
|
||||
de.lastFullPing = 0
|
||||
de.bestAddr = addrLatency{}
|
||||
de.bestAddr = addrQuality{}
|
||||
de.bestAddrAt = 0
|
||||
de.trustBestAddrUntil = 0
|
||||
for _, es := range de.endpointState {
|
||||
|
||||
65
wgengine/magicsock/magicsock_darwin.go
Normal file
65
wgengine/magicsock/magicsock_darwin.go
Normal file
@@ -0,0 +1,65 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
package magicsock
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
"net"
|
||||
"syscall"
|
||||
|
||||
"tailscale.com/types/logger"
|
||||
"tailscale.com/types/nettype"
|
||||
)
|
||||
|
||||
// From https://opensource.apple.com/source/xnu/xnu-6153.141.1/bsd/netinet6/in6.h.auto.html
|
||||
// https://github.com/rust-lang/libc/pull/2613/commits/757b5dd7c7cb4d913e582100c2cd8a5667b9e204
|
||||
|
||||
const (
|
||||
ipDontFrag = 28
|
||||
ipv6DontFrag = 62
|
||||
)
|
||||
|
||||
func (c *Conn) listenRawDisco(family string) (io.Closer, error) {
|
||||
return nil, errors.New("raw disco listening not supported on this OS")
|
||||
}
|
||||
|
||||
func trySetSocketBuffer(pconn nettype.PacketConn, logf logger.Logf) {
|
||||
portableTrySetSocketBuffer(pconn, logf)
|
||||
}
|
||||
|
||||
func trySetDontFragment(pconn nettype.PacketConn, network string) (err error) {
|
||||
if c, ok := pconn.(*net.UDPConn); ok {
|
||||
rc, err := c.SyscallConn()
|
||||
if err == nil {
|
||||
rc.Control(func(fd uintptr) {
|
||||
if network == "udp4" {
|
||||
err = syscall.SetsockoptInt(int(fd), syscall.IPPROTO_IP, ipDontFrag, 1)
|
||||
}
|
||||
if network == "udp6" {
|
||||
err = syscall.SetsockoptInt(int(fd), syscall.IPPROTO_IPV6, ipv6DontFrag, 1)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func tryEnableUDPOffload(pconn nettype.PacketConn) (hasTX bool, hasRX bool) {
|
||||
return false, false
|
||||
}
|
||||
|
||||
func getGSOSizeFromControl(control []byte) (int, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func setGSOSizeInControl(control *[]byte, gso uint16) {}
|
||||
|
||||
func errShouldDisableOffload(err error) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
const (
|
||||
controlMessageSize = 0
|
||||
)
|
||||
@@ -1,7 +1,7 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
//go:build !linux
|
||||
//go:build !linux && !darwin
|
||||
|
||||
package magicsock
|
||||
|
||||
@@ -21,6 +21,10 @@ func trySetSocketBuffer(pconn nettype.PacketConn, logf logger.Logf) {
|
||||
portableTrySetSocketBuffer(pconn, logf)
|
||||
}
|
||||
|
||||
func trySetDontFragment(pconn nettype.PacketConn, network string) (err error) {
|
||||
return errors.New("Setting don't fragment bit not supported on this OS")
|
||||
}
|
||||
|
||||
func tryEnableUDPOffload(pconn nettype.PacketConn) (hasTX bool, hasRX bool) {
|
||||
return false, false
|
||||
}
|
||||
|
||||
@@ -318,6 +318,23 @@ func trySetSocketBuffer(pconn nettype.PacketConn, logf logger.Logf) {
|
||||
}
|
||||
}
|
||||
|
||||
func trySetDontFragment(pconn nettype.PacketConn, network string) (err error) {
|
||||
if c, ok := pconn.(*net.UDPConn); ok {
|
||||
rc, err := c.SyscallConn()
|
||||
if err == nil {
|
||||
rc.Control(func(fd uintptr) {
|
||||
if network == "udp4" {
|
||||
err = syscall.SetsockoptInt(int(fd), syscall.IPPROTO_IP, syscall.IP_MTU_DISCOVER, syscall.IP_PMTUDISC_DO)
|
||||
}
|
||||
if network == "udp6" {
|
||||
err = syscall.SetsockoptInt(int(fd), syscall.IPPROTO_IPV6, syscall.IP_MTU_DISCOVER, syscall.IP_PMTUDISC_DO)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
const (
|
||||
// TODO(jwhited): upstream to unix?
|
||||
socketOptionLevelUDP = 17
|
||||
|
||||
@@ -1627,10 +1627,13 @@ func TestEndpointSetsEqual(t *testing.T) {
|
||||
|
||||
func TestBetterAddr(t *testing.T) {
|
||||
const ms = time.Millisecond
|
||||
al := func(ipps string, d time.Duration) addrLatency {
|
||||
return addrLatency{netip.MustParseAddrPort(ipps), d}
|
||||
al := func(ipps string, d time.Duration) addrQuality {
|
||||
return addrQuality{AddrPort: netip.MustParseAddrPort(ipps), latency: d, mtu: 0}
|
||||
}
|
||||
zero := addrLatency{}
|
||||
almtu := func(ipps string, d time.Duration, mtu int) addrQuality {
|
||||
return addrQuality{AddrPort: netip.MustParseAddrPort(ipps), latency: d, mtu: mtu}
|
||||
}
|
||||
zero := addrQuality{}
|
||||
|
||||
const (
|
||||
publicV4 = "1.2.3.4:555"
|
||||
@@ -1641,7 +1644,7 @@ func TestBetterAddr(t *testing.T) {
|
||||
)
|
||||
|
||||
tests := []struct {
|
||||
a, b addrLatency
|
||||
a, b addrQuality
|
||||
want bool // whether a is better than b
|
||||
}{
|
||||
{a: zero, b: zero, want: false},
|
||||
@@ -1703,7 +1706,12 @@ func TestBetterAddr(t *testing.T) {
|
||||
b: al(publicV6, 100*ms),
|
||||
want: true,
|
||||
},
|
||||
|
||||
// If addresses are equal, prefer larger MTU
|
||||
{
|
||||
a: almtu(publicV4, 30*ms, 1500),
|
||||
b: almtu(publicV4, 30*ms, 0),
|
||||
want: true,
|
||||
},
|
||||
// Private IPs are preferred over public IPs even if the public
|
||||
// IP is IPv6.
|
||||
{
|
||||
|
||||
@@ -150,6 +150,7 @@ const nicID = 1
|
||||
// maxUDPPacketSize is the maximum size of a UDP packet we copy in startPacketCopy
|
||||
// when relaying UDP packets. We don't use the 'mtu' const in anticipation of
|
||||
// one day making the MTU more dynamic.
|
||||
// TODO: make this bigger
|
||||
const maxUDPPacketSize = 1500
|
||||
|
||||
// Create creates and populates a new Impl.
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"bufio"
|
||||
"context"
|
||||
crand "crypto/rand"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
@@ -455,6 +456,60 @@ func echoRespondToAll(p *packet.Parsed, t *tstun.Wrapper) filter.Response {
|
||||
return filter.Accept
|
||||
}
|
||||
|
||||
var debugPMTUD = envknob.RegisterBool("TS_DEBUG_PMTUD")
|
||||
|
||||
func (e *userspaceEngine) injectICMPPTB(p *packet.Parsed, mtu int) {
|
||||
var icmph packet.Header
|
||||
var payload []byte
|
||||
if p.Src.Addr().Is4() {
|
||||
// From https://www.ietf.org/rfc/rfc1191.html#section-4
|
||||
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
// | Type | Code | Checksum |
|
||||
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
// | unused = 0 | MTU |
|
||||
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
// | Internet Header + 64 bits of Original Datagram Data |
|
||||
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
icmph = packet.ICMP4Header{
|
||||
IP4Header: packet.IP4Header{
|
||||
IPProto: ipproto.ICMPv4,
|
||||
Src: p.Dst.Addr(),
|
||||
Dst: p.Src.Addr(),
|
||||
},
|
||||
Type: packet.ICMP4Unreachable,
|
||||
Code: packet.ICMP4FragmentationNeeded,
|
||||
}
|
||||
payload = make([]byte, 4+20+8)
|
||||
binary.BigEndian.PutUint32(payload, uint32(mtu))
|
||||
copy(payload[4:], p.Buffer()[:len(payload)-4])
|
||||
} else {
|
||||
// https://www.ietf.org/rfc/rfc4443.html#section-3.2
|
||||
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
// | Type | Code | Checksum |
|
||||
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
// | MTU |
|
||||
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
// | As much of invoking packet |
|
||||
// + as possible without the ICMPv6 packet +
|
||||
// | exceeding the minimum IPv6 MTU [IPv6] |
|
||||
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
icmph = packet.ICMP6Header{
|
||||
IP6Header: packet.IP6Header{
|
||||
IPProto: ipproto.ICMPv6,
|
||||
Src: p.Dst.Addr(),
|
||||
Dst: p.Src.Addr(),
|
||||
},
|
||||
Type: packet.ICMP6PacketTooBig,
|
||||
Code: packet.ICMP6NoCode,
|
||||
}
|
||||
// RFC says add as much of invoking packet, but headers should be enough.
|
||||
payload = make([]byte, 4+40+8)
|
||||
binary.BigEndian.PutUint32(payload, uint32(mtu))
|
||||
copy(payload[4:], p.Buffer()[:len(payload)-4])
|
||||
}
|
||||
e.tundev.InjectInboundCopy(packet.Generate(icmph, payload))
|
||||
}
|
||||
|
||||
// handleLocalPackets inspects packets coming from the local network
|
||||
// stack, and intercepts any packets that should be handled by
|
||||
// tailscaled directly. Other packets are allowed to proceed into the
|
||||
@@ -476,6 +531,18 @@ func (e *userspaceEngine) handleLocalPackets(p *packet.Parsed, t *tstun.Wrapper)
|
||||
}
|
||||
}
|
||||
|
||||
if debugPMTUD() {
|
||||
const tailscaleOverhead = 40 + 8 + 32 // IP + UDP + WireGuard
|
||||
// TODO IPv4 is 20 bytes but IPv6 is 40 - move this into magicsock where we know
|
||||
// which we're using.
|
||||
// TODO consts to avoid numbers.
|
||||
pmtu := e.magicConn.PathMTU(p.Dst.Addr())
|
||||
if len(p.Buffer())+tailscaleOverhead > pmtu {
|
||||
e.injectICMPPTB(p, pmtu)
|
||||
return filter.Drop
|
||||
}
|
||||
}
|
||||
|
||||
return filter.Accept
|
||||
}
|
||||
|
||||
@@ -1101,6 +1168,7 @@ func (e *userspaceEngine) LinkChange(_ bool) {
|
||||
e.netMon.InjectEvent()
|
||||
}
|
||||
|
||||
// Add MTU monitoring and update here
|
||||
func (e *userspaceEngine) linkChange(changed bool, cur *interfaces.State) {
|
||||
up := cur.AnyInterfaceUp()
|
||||
if !up {
|
||||
@@ -1211,7 +1279,7 @@ func (e *userspaceEngine) UpdateStatus(sb *ipnstate.StatusBuilder) {
|
||||
e.magicConn.UpdateStatus(sb)
|
||||
}
|
||||
|
||||
func (e *userspaceEngine) Ping(ip netip.Addr, pingType tailcfg.PingType, cb func(*ipnstate.PingResult)) {
|
||||
func (e *userspaceEngine) Ping(ip netip.Addr, pingType tailcfg.PingType, cb func(*ipnstate.PingResult), mtu int) {
|
||||
res := &ipnstate.PingResult{IP: ip.String()}
|
||||
pip, ok := e.PeerForIP(ip)
|
||||
if !ok {
|
||||
@@ -1227,11 +1295,10 @@ func (e *userspaceEngine) Ping(ip netip.Addr, pingType tailcfg.PingType, cb func
|
||||
return
|
||||
}
|
||||
peer := pip.Node
|
||||
|
||||
e.logf("ping(%v): sending %v ping to %v %v ...", ip, pingType, peer.Key.ShortString(), peer.ComputedName)
|
||||
e.logf("ping(%v): sending %v ping mtu %v to %v %v...", ip, pingType, mtu, peer.Key.ShortString(), peer.ComputedName)
|
||||
switch pingType {
|
||||
case "disco":
|
||||
e.magicConn.Ping(peer, res, cb)
|
||||
e.magicConn.Ping(peer, res, cb, mtu)
|
||||
case "TSMP":
|
||||
e.sendTSMPPing(ip, peer, res, cb)
|
||||
case "ICMP":
|
||||
|
||||
@@ -158,8 +158,8 @@ func (e *watchdogEngine) DiscoPublicKey() (k key.DiscoPublic) {
|
||||
e.watchdog("DiscoPublicKey", func() { k = e.wrap.DiscoPublicKey() })
|
||||
return k
|
||||
}
|
||||
func (e *watchdogEngine) Ping(ip netip.Addr, pingType tailcfg.PingType, cb func(*ipnstate.PingResult)) {
|
||||
e.watchdog("Ping", func() { e.wrap.Ping(ip, pingType, cb) })
|
||||
func (e *watchdogEngine) Ping(ip netip.Addr, pingType tailcfg.PingType, cb func(*ipnstate.PingResult), mtu int) {
|
||||
e.watchdog("Ping", func() { e.wrap.Ping(ip, pingType, cb, mtu) })
|
||||
}
|
||||
func (e *watchdogEngine) RegisterIPPortIdentity(ipp netip.AddrPort, tsIP netip.Addr) {
|
||||
e.watchdog("RegisterIPPortIdentity", func() { e.wrap.RegisterIPPortIdentity(ipp, tsIP) })
|
||||
|
||||
@@ -152,7 +152,7 @@ type Engine interface {
|
||||
|
||||
// Ping is a request to start a ping with the peer handling the given IP and
|
||||
// then call cb with its ping latency & method.
|
||||
Ping(ip netip.Addr, pingType tailcfg.PingType, cb func(*ipnstate.PingResult))
|
||||
Ping(ip netip.Addr, pingType tailcfg.PingType, cb func(*ipnstate.PingResult), mtu int)
|
||||
|
||||
// RegisterIPPortIdentity registers a given node (identified by its
|
||||
// Tailscale IP) as temporarily having the given IP:port for whois lookups.
|
||||
|
||||
Reference in New Issue
Block a user