Compare commits

...

2 Commits

Author SHA1 Message Date
Andrew Dunham
089d15d8c2 net/dns/resolver: wrap errors with more context
To aid in debugging why we're seeing DNS resolution errors.

Updates #TODO

Signed-off-by: Andrew Dunham <andrew@du.nham.ca>
Change-Id: I9b3f438d5d675f757e9043dbbdc413fd722fb81a
2024-04-17 10:35:49 -04:00
Andrew Dunham
91692495d8 net/dns/resolver: use SystemDial in DoH forwarder
This ensures that we close the underlying connection(s) when a major
link change happens. If we don't do this, on mobile platforms switching
between WiFi and cellular can result in leftover connections in the
http.Client's connection pool which are bound to the "wrong" interface.

Updates #10821
Updates tailscale/corp#19124

Signed-off-by: Andrew Dunham <andrew@du.nham.ca>
Change-Id: Ibd51ce2efcaf4bd68e14f6fdeded61d4e99f9a01
2024-04-17 10:35:40 -04:00
2 changed files with 26 additions and 10 deletions

View File

@@ -28,7 +28,6 @@ import (
"tailscale.com/net/dnscache"
"tailscale.com/net/neterror"
"tailscale.com/net/netmon"
"tailscale.com/net/netns"
"tailscale.com/net/sockstats"
"tailscale.com/net/tsdial"
"tailscale.com/types/dnstype"
@@ -394,8 +393,20 @@ func (f *forwarder) getKnownDoHClientForProvider(urlBase string) (c *http.Client
if err != nil {
return nil, false
}
nsDialer := netns.NewDialer(f.logf, f.netMon)
dialer := dnscache.Dialer(nsDialer.DialContext, &dnscache.Resolver{
// NOTE: use f.dialer.SystemDial so we close connections on a link
// change; on mobile devices when switching between WiFi and cellular,
// we need to ensure we don't retain a connection on the old interface
// or we can block DNS resolution.
//
// NOTE: if we ever support arbitrary user-defined DoH providers, this
// isn't sufficient; we'd need a dialer that dial a DoH server on the
// internet, without going through Tailscale (as SystemDial does), but
// also can dial a node on the tailnet (e.g. a PiHole).
//
// As of the time of writing (2024-02-11), this isn't a problem because
// we only support a restricted set of public DoH providers that aren't
// on a user's tailnet.
dialer := dnscache.Dialer(f.dialer.SystemDial, &dnscache.Resolver{
SingleHost: dohURL.Hostname(),
SingleHostStaticResult: allIPs,
Logf: f.logf,
@@ -829,7 +840,7 @@ func (f *forwarder) forwardWithDestChan(ctx context.Context, query packet, respo
domain, err := nameFromQuery(query.bs)
if err != nil {
metricDNSFwdErrorName.Add(1)
return err
return fmt.Errorf("getting name from DNS query: %w", err)
}
// Guarantee that the ctx we use below is done when this function returns.
@@ -877,7 +888,7 @@ func (f *forwarder) forwardWithDestChan(ctx context.Context, query packet, respo
}
select {
case <-ctx.Done():
return ctx.Err()
return fmt.Errorf("sending SERVFAIL due to no resolvers: %w", ctx.Err())
case responseChan <- res:
return nil
}
@@ -907,6 +918,7 @@ func (f *forwarder) forwardWithDestChan(ctx context.Context, query packet, respo
}
resb, err := f.send(ctx, fq, *rr)
if err != nil {
err = fmt.Errorf("querying resolver %q: %w", rr.name.Addr, err)
select {
case errc <- err:
case <-ctx.Done():
@@ -928,7 +940,7 @@ func (f *forwarder) forwardWithDestChan(ctx context.Context, query packet, respo
select {
case <-ctx.Done():
metricDNSFwdErrorContext.Add(1)
return ctx.Err()
return fmt.Errorf("sending response: %w", ctx.Err())
case responseChan <- packet{v, query.family, query.addr}:
metricDNSFwdSuccess.Add(1)
return nil
@@ -943,7 +955,7 @@ func (f *forwarder) forwardWithDestChan(ctx context.Context, query packet, respo
res, err := servfailResponse(query)
if err != nil {
f.logf("building servfail response: %v", err)
return firstErr
return fmt.Errorf("building SERVFAIL: %w", firstErr)
}
select {
@@ -959,9 +971,10 @@ func (f *forwarder) forwardWithDestChan(ctx context.Context, query packet, respo
metricDNSFwdErrorContext.Add(1)
if firstErr != nil {
metricDNSFwdErrorContextGotError.Add(1)
return firstErr
} else {
firstErr = ctx.Err()
}
return ctx.Err()
return fmt.Errorf("waiting for response: %w", firstErr)
}
}
}

View File

@@ -288,9 +288,12 @@ func (r *Resolver) Query(ctx context.Context, bs []byte, family string, from net
// This is present in some errors paths, such as when all upstream
// DNS servers replied with an error.
case resp := <-responses:
if err != nil {
err = fmt.Errorf("forwarder response error: %w", err)
}
return resp.bs, err
default:
return nil, err
return nil, fmt.Errorf("forwarder error: %w", err)
}
}
return (<-responses).bs, nil