Compare commits
1 Commits
main
...
irbekrm/ex
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d37f2f5085 |
@@ -18,7 +18,9 @@
|
||||
// previously advertised routes. To accept routes, use TS_EXTRA_ARGS to pass
|
||||
// in --accept-routes.
|
||||
// - TS_DEST_IP: proxy all incoming Tailscale traffic to the given
|
||||
// destination.
|
||||
// destination defined by an IP address.
|
||||
// - TS_DEST_DNS_NAME: proxy all incoming Tailscale traffic to the given
|
||||
// destination defined by a DNS name. The DNS name will be periodically resolved and firewall rules updated accordingly.
|
||||
// - TS_TAILNET_TARGET_IP: proxy all incoming non-Tailscale traffic to the given
|
||||
// destination defined by an IP.
|
||||
// - TS_TAILNET_TARGET_FQDN: proxy all incoming non-Tailscale traffic to the given
|
||||
@@ -82,12 +84,14 @@ import (
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"log"
|
||||
"net"
|
||||
"net/netip"
|
||||
"os"
|
||||
"os/exec"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
@@ -122,7 +126,8 @@ func main() {
|
||||
Hostname: defaultEnv("TS_HOSTNAME", ""),
|
||||
Routes: defaultEnvStringPointer("TS_ROUTES"),
|
||||
ServeConfigPath: defaultEnv("TS_SERVE_CONFIG", ""),
|
||||
ProxyTo: defaultEnv("TS_DEST_IP", ""),
|
||||
ProxyTargetIP: defaultEnv("TS_DEST_IP", ""),
|
||||
ProxyTargetDNSName: defaultEnv("TS_DEST_DNS_NAME", ""),
|
||||
TailnetTargetIP: defaultEnv("TS_TAILNET_TARGET_IP", ""),
|
||||
TailnetTargetFQDN: defaultEnv("TS_TAILNET_TARGET_FQDN", ""),
|
||||
DaemonExtraArgs: defaultEnv("TS_TAILSCALED_EXTRA_ARGS", ""),
|
||||
@@ -150,8 +155,8 @@ func main() {
|
||||
if err := ensureTunFile(cfg.Root); err != nil {
|
||||
log.Fatalf("Unable to create tuntap device file: %v", err)
|
||||
}
|
||||
if cfg.ProxyTo != "" || cfg.Routes != nil || cfg.TailnetTargetIP != "" || cfg.TailnetTargetFQDN != "" {
|
||||
if err := ensureIPForwarding(cfg.Root, cfg.ProxyTo, cfg.TailnetTargetIP, cfg.TailnetTargetFQDN, cfg.Routes); err != nil {
|
||||
if cfg.ProxyTargetIP != "" || cfg.ProxyTargetDNSName != "" || cfg.Routes != nil || cfg.TailnetTargetIP != "" || cfg.TailnetTargetFQDN != "" {
|
||||
if err := ensureIPForwarding(cfg.Root, cfg.ProxyTargetIP, cfg.ProxyTargetDNSName, cfg.TailnetTargetIP, cfg.TailnetTargetFQDN, cfg.Routes); err != nil {
|
||||
log.Printf("Failed to enable IP forwarding: %v", err)
|
||||
log.Printf("To run tailscale as a proxy or router container, IP forwarding must be enabled.")
|
||||
if cfg.InKubernetes {
|
||||
@@ -341,7 +346,7 @@ authLoop:
|
||||
}
|
||||
|
||||
var (
|
||||
wantProxy = cfg.ProxyTo != "" || cfg.TailnetTargetIP != "" || cfg.TailnetTargetFQDN != "" || cfg.AllowProxyingClusterTrafficViaIngress
|
||||
wantProxy = cfg.ProxyTargetIP != "" || cfg.ProxyTargetDNSName != "" || cfg.TailnetTargetIP != "" || cfg.TailnetTargetFQDN != "" || cfg.AllowProxyingClusterTrafficViaIngress
|
||||
wantDeviceInfo = cfg.InKubernetes && cfg.KubeSecret != "" && cfg.KubernetesCanPatch
|
||||
startupTasksDone = false
|
||||
currentIPs deephash.Sum // tailscale IPs assigned to device
|
||||
@@ -349,6 +354,9 @@ authLoop:
|
||||
|
||||
currentEgressIPs deephash.Sum
|
||||
|
||||
addrs []netip.Prefix
|
||||
backendAddrs []net.IP
|
||||
|
||||
certDomain = new(atomic.Pointer[string])
|
||||
certDomainChanged = make(chan bool, 1)
|
||||
)
|
||||
@@ -362,6 +370,16 @@ authLoop:
|
||||
log.Fatalf("error creating new netfilter runner: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// If we are proxying to a target specified by a DNS name, periodically
|
||||
// resolve the DNS name and update firewall rules if the backend IPs
|
||||
// have changed.
|
||||
const proxyTargetIPsResolvePeriod = time.Minute * 10
|
||||
var ts time.Ticker
|
||||
if cfg.ProxyTargetDNSName != "" {
|
||||
ts = *time.NewTicker(proxyTargetIPsResolvePeriod)
|
||||
}
|
||||
|
||||
notifyChan := make(chan ipn.Notify)
|
||||
errChan := make(chan error)
|
||||
go func() {
|
||||
@@ -399,7 +417,7 @@ runLoop:
|
||||
log.Fatalf("tailscaled left running state (now in state %q), exiting", *n.State)
|
||||
}
|
||||
if n.NetMap != nil {
|
||||
addrs := n.NetMap.SelfNode.Addresses().AsSlice()
|
||||
addrs = n.NetMap.SelfNode.Addresses().AsSlice()
|
||||
newCurrentIPs := deephash.Hash(&addrs)
|
||||
ipsHaveChanged := newCurrentIPs != currentIPs
|
||||
|
||||
@@ -441,12 +459,32 @@ runLoop:
|
||||
}
|
||||
currentEgressIPs = newCurentEgressIPs
|
||||
}
|
||||
if cfg.ProxyTo != "" && len(addrs) > 0 && ipsHaveChanged {
|
||||
if cfg.ProxyTargetIP != "" && len(addrs) > 0 && ipsHaveChanged {
|
||||
log.Printf("Installing proxy rules")
|
||||
if err := installIngressForwardingRule(ctx, cfg.ProxyTo, addrs, nfr); err != nil {
|
||||
if err := installIngressForwardingRule(ctx, cfg.ProxyTargetIP, addrs, nfr); err != nil {
|
||||
log.Fatalf("installing ingress proxy rules: %v", err)
|
||||
}
|
||||
}
|
||||
if cfg.ProxyTargetDNSName != "" {
|
||||
newBackendAddrs, err := resolveDNS(ctx, cfg.ProxyTargetDNSName)
|
||||
if err != nil {
|
||||
log.Printf("unable to resolve DNS name %s: %v, retrying in %s", cfg.ProxyTargetDNSName, err, proxyTargetIPsResolvePeriod)
|
||||
continue
|
||||
}
|
||||
backendsHaveChanged := slices.CompareFunc(backendAddrs, newBackendAddrs, func(ip1 net.IP, ip2 net.IP) int {
|
||||
if ip1.Equal(ip2) {
|
||||
return 0
|
||||
}
|
||||
return -1
|
||||
})
|
||||
if len(addrs) > 0 && (backendsHaveChanged != 0 || ipsHaveChanged) && len(newBackendAddrs) > 0 {
|
||||
log.Printf("installing ingresss proxy rules for backends %v", newBackendAddrs)
|
||||
if err := installIngressForwardingRuleExternalNameService(ctx, newBackendAddrs, addrs, nfr); err != nil {
|
||||
log.Fatalf("error installing ingress proxy rules: %v", err)
|
||||
}
|
||||
}
|
||||
backendAddrs = newBackendAddrs
|
||||
}
|
||||
if cfg.ServeConfigPath != "" && len(n.NetMap.DNS.CertDomains) > 0 {
|
||||
cd := n.NetMap.DNS.CertDomains[0]
|
||||
prev := certDomain.Swap(ptr.To(cd))
|
||||
@@ -511,12 +549,31 @@ runLoop:
|
||||
os.Exit(0)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
wg.Add(1)
|
||||
go reaper()
|
||||
}
|
||||
}
|
||||
case <-ts.C:
|
||||
newBackendAddrs, err := resolveDNS(ctx, cfg.ProxyTargetDNSName)
|
||||
if err != nil {
|
||||
log.Printf("unable to resolve DNS name %s: %v, retrying in %s", cfg.ProxyTargetDNSName, err, proxyTargetIPsResolvePeriod.String())
|
||||
continue
|
||||
}
|
||||
backendsHaveChanged := slices.CompareFunc(backendAddrs, newBackendAddrs, func(ip1 net.IP, ip2 net.IP) int {
|
||||
if ip1.Equal(ip2) {
|
||||
return 0
|
||||
}
|
||||
return -1
|
||||
})
|
||||
if backendsHaveChanged != 0 && len(newBackendAddrs) != 0 && len(addrs) != 0 {
|
||||
log.Printf("Backend address change detected, installing proxy rules for backends %v", newBackendAddrs)
|
||||
if err := installIngressForwardingRuleExternalNameService(ctx, newBackendAddrs, addrs, nfr); err != nil {
|
||||
log.Fatalf("installing ingress proxy rules for DNS target %s: %v", cfg.ProxyTargetDNSName, err)
|
||||
}
|
||||
}
|
||||
backendAddrs = newBackendAddrs
|
||||
|
||||
}
|
||||
}
|
||||
wg.Wait()
|
||||
@@ -757,12 +814,12 @@ func ensureTunFile(root string) error {
|
||||
}
|
||||
|
||||
// ensureIPForwarding enables IPv4/IPv6 forwarding for the container.
|
||||
func ensureIPForwarding(root, clusterProxyTarget, tailnetTargetiP, tailnetTargetFQDN string, routes *string) error {
|
||||
func ensureIPForwarding(root, clusterProxyTargetIP, clusterProxyTargetDNSName, tailnetTargetiP, tailnetTargetFQDN string, routes *string) error {
|
||||
var (
|
||||
v4Forwarding, v6Forwarding bool
|
||||
)
|
||||
if clusterProxyTarget != "" {
|
||||
proxyIP, err := netip.ParseAddr(clusterProxyTarget)
|
||||
if clusterProxyTargetIP != "" {
|
||||
proxyIP, err := netip.ParseAddr(clusterProxyTargetIP)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid cluster destination IP: %v", err)
|
||||
}
|
||||
@@ -772,6 +829,26 @@ func ensureIPForwarding(root, clusterProxyTarget, tailnetTargetiP, tailnetTarget
|
||||
v6Forwarding = true
|
||||
}
|
||||
}
|
||||
if clusterProxyTargetDNSName != "" {
|
||||
ips, err := resolveDNS(context.Background(), clusterProxyTargetDNSName)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error resolving DNS name %s: %w", clusterProxyTargetDNSName, err)
|
||||
}
|
||||
for _, ip := range ips {
|
||||
if ip.To4() != nil {
|
||||
v4Forwarding = true
|
||||
if v6Forwarding {
|
||||
break
|
||||
}
|
||||
}
|
||||
if ip.To16() != nil {
|
||||
v6Forwarding = true
|
||||
if v4Forwarding {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if tailnetTargetiP != "" {
|
||||
proxyIP, err := netip.ParseAddr(tailnetTargetiP)
|
||||
if err != nil {
|
||||
@@ -918,15 +995,77 @@ func installIngressForwardingRule(ctx context.Context, dstStr string, tsIPs []ne
|
||||
return nil
|
||||
}
|
||||
|
||||
func installIngressForwardingRuleExternalNameService(ctx context.Context, backendAddrs []net.IP, tsIPs []netip.Prefix, nfr linuxfw.NetfilterRunner) error {
|
||||
var (
|
||||
tsv4 netip.Addr
|
||||
tsv6 netip.Addr
|
||||
v4Backends []netip.Addr
|
||||
v6Backends []netip.Addr
|
||||
)
|
||||
for _, pfx := range tsIPs {
|
||||
if pfx.IsSingleIP() && pfx.Addr().Is4() {
|
||||
tsv4 = pfx.Addr()
|
||||
continue
|
||||
}
|
||||
if pfx.IsSingleIP() && pfx.Addr().Is6() {
|
||||
tsv6 = pfx.Addr()
|
||||
continue
|
||||
}
|
||||
}
|
||||
for _, ip := range backendAddrs {
|
||||
if ip.To4() != nil {
|
||||
v4Backends = append(v4Backends, netip.AddrFrom4([4]byte(ip.To4())))
|
||||
}
|
||||
if ip.To16() != nil {
|
||||
v6Backends = append(v6Backends, netip.AddrFrom16([16]byte(ip.To16())))
|
||||
}
|
||||
}
|
||||
|
||||
updateFirewall := func(dst netip.Addr, backendTargets []netip.Addr) error {
|
||||
if err := nfr.DNATWithLoadBalancer(dst, backendTargets); err != nil {
|
||||
return fmt.Errorf("installing DNAT rules for ingress backends %+#v: %w", backendTargets, err)
|
||||
}
|
||||
// The backend might advertize MSS higher than that of the
|
||||
// tailscale interfaces. Clamp MSS of packets going out via
|
||||
// tailscale0 interface to its MTU to prevent broken connections
|
||||
// in environments where path MTU discovery is not working.
|
||||
if err := nfr.ClampMSSToPMTU("tailscale0", dst); err != nil {
|
||||
return fmt.Errorf("adding rule to clamp traffic via tailscale0: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if len(v4Backends) != 0 {
|
||||
if !tsv4.IsValid() {
|
||||
log.Printf("backend targets %v contain at least one IPv4 address, but this node's Tailscale IPs do not contain a valid IPv4 address: %v", backendAddrs, tsIPs)
|
||||
} else if err := updateFirewall(tsv4, v4Backends); err != nil {
|
||||
return fmt.Errorf("Installing IPv4 firewall rules: %w", err)
|
||||
}
|
||||
}
|
||||
if len(v6Backends) != 0 && !tsv6.IsValid() {
|
||||
if !tsv6.IsValid() {
|
||||
log.Printf("backend targets %v contain at least one IPv6 address, but this node's Tailscale IPs do not contain a valid IPv6 address: %v", backendAddrs, tsIPs)
|
||||
} else if !nfr.HasIPV6NAT() {
|
||||
log.Printf("backend targets %v contain at least one IPv6 address, but the chosen firewall mode does not support IPv6 NAT", backendAddrs)
|
||||
} else if err := updateFirewall(tsv6, v6Backends); err != nil {
|
||||
return fmt.Errorf("Installing IPv6 firewall rules: %w", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// settings is all the configuration for containerboot.
|
||||
type settings struct {
|
||||
AuthKey string
|
||||
Hostname string
|
||||
Routes *string
|
||||
// ProxyTo is the destination IP to which all incoming
|
||||
// ProxyTargetIP is the destination IP to which all incoming
|
||||
// Tailscale traffic should be proxied. If empty, no proxying
|
||||
// is done. This is typically a locally reachable IP.
|
||||
ProxyTo string
|
||||
ProxyTargetIP string
|
||||
// ProxyTargetDNSName is a DNS name whose backing IP addresses all
|
||||
// incoming Tailscale traffic should be proxied to.
|
||||
ProxyTargetDNSName string
|
||||
// TailnetTargetIP is the destination IP to which all incoming
|
||||
// non-Tailscale traffic should be proxied. This is typically a
|
||||
// Tailscale IP.
|
||||
@@ -966,9 +1105,15 @@ func (s *settings) validate() error {
|
||||
return fmt.Errorf("error validating tailscaled configfile contents: %w", err)
|
||||
}
|
||||
}
|
||||
if s.ProxyTo != "" && s.UserspaceMode {
|
||||
if s.ProxyTargetIP != "" && s.UserspaceMode {
|
||||
return errors.New("TS_DEST_IP is not supported with TS_USERSPACE")
|
||||
}
|
||||
if s.ProxyTargetDNSName != "" && s.UserspaceMode {
|
||||
return errors.New("TS_DEST_DNS_NAME is not supported with TS_USERSPACE")
|
||||
}
|
||||
if s.ProxyTargetDNSName != "" && s.ProxyTargetIP != "" {
|
||||
return errors.New("TS_DEST_DNS_NAME and TS_DEST_IP cannot both be set")
|
||||
}
|
||||
if s.TailnetTargetIP != "" && s.UserspaceMode {
|
||||
return errors.New("TS_TAILNET_TARGET_IP is not supported with TS_USERSPACE")
|
||||
}
|
||||
@@ -993,6 +1138,16 @@ func (s *settings) validate() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func resolveDNS(ctx context.Context, name string) ([]net.IP, error) {
|
||||
ips, err := net.LookupIP(name)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error looking up IPs for DNS name %s: %w", name, err)
|
||||
}
|
||||
log.Printf("%s resolved to %v", name, ips)
|
||||
|
||||
return ips, nil
|
||||
}
|
||||
|
||||
// defaultEnv returns the value of the given envvar name, or defVal if
|
||||
// unset.
|
||||
func defaultEnv(name, defVal string) string {
|
||||
|
||||
@@ -109,8 +109,9 @@ type tailscaleSTSConfig struct {
|
||||
ParentResourceUID string
|
||||
ChildResourceLabels map[string]string
|
||||
|
||||
ServeConfig *ipn.ServeConfig // if serve config is set, this is a proxy for Ingress
|
||||
ClusterTargetIP string // ingress target
|
||||
ServeConfig *ipn.ServeConfig // if serve config is set, this is a proxy for Ingress
|
||||
ClusterTargetIP string // ingress target IP
|
||||
ClusterTargetDNSName string // ingress target DNS name
|
||||
// If set to true, operator should configure containerboot to forward
|
||||
// cluster traffic via the proxy set up for Kubernetes Ingress.
|
||||
ForwardClusterTrafficViaL7IngressProxy bool
|
||||
@@ -536,6 +537,12 @@ func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.S
|
||||
Value: sts.ClusterTargetIP,
|
||||
})
|
||||
mak.Set(&ss.Spec.Template.Annotations, podAnnotationLastSetClusterIP, sts.ClusterTargetIP)
|
||||
} else if sts.ClusterTargetDNSName != "" {
|
||||
container.Env = append(container.Env, corev1.EnvVar{
|
||||
Name: "TS_DEST_DNS_NAME",
|
||||
Value: sts.ClusterTargetDNSName,
|
||||
})
|
||||
mak.Set(&ss.Spec.Template.Annotations, podAnnotationLastSetClusterIP, sts.ClusterTargetIP)
|
||||
} else if sts.TailnetTargetIP != "" {
|
||||
container.Env = append(container.Env, corev1.EnvVar{
|
||||
Name: "TS_TAILNET_TARGET_IP",
|
||||
|
||||
@@ -200,10 +200,14 @@ func (a *ServiceReconciler) maybeProvision(ctx context.Context, logger *zap.Suga
|
||||
}
|
||||
|
||||
a.mu.Lock()
|
||||
if a.shouldExpose(svc) {
|
||||
if a.shouldExposeClusterIP(svc) {
|
||||
sts.ClusterTargetIP = svc.Spec.ClusterIP
|
||||
a.managedIngressProxies.Add(svc.UID)
|
||||
gaugeIngressProxies.Set(int64(a.managedIngressProxies.Len()))
|
||||
} else if a.shouldExposeDNSName(svc) {
|
||||
sts.ClusterTargetDNSName = svc.Spec.ExternalName
|
||||
a.managedIngressProxies.Add(svc.UID)
|
||||
gaugeIngressProxies.Set(int64(a.managedIngressProxies.Len()))
|
||||
} else if ip := a.tailnetTargetAnnotation(svc); ip != "" {
|
||||
sts.TailnetTargetIP = ip
|
||||
a.managedEgressProxies.Add(svc.UID)
|
||||
@@ -297,15 +301,22 @@ func validateService(svc *corev1.Service) []string {
|
||||
}
|
||||
|
||||
func (a *ServiceReconciler) shouldExpose(svc *corev1.Service) bool {
|
||||
return a.shouldExposeClusterIP(svc) || a.shouldExposeDNSName(svc)
|
||||
}
|
||||
|
||||
func (a *ServiceReconciler) shouldExposeClusterIP(svc *corev1.Service) bool {
|
||||
// Headless services can't be exposed, since there is no ClusterIP to
|
||||
// forward to.
|
||||
if svc.Spec.ClusterIP == "" || svc.Spec.ClusterIP == "None" {
|
||||
return false
|
||||
}
|
||||
|
||||
return a.hasLoadBalancerClass(svc) || a.hasExposeAnnotation(svc)
|
||||
}
|
||||
|
||||
func (a *ServiceReconciler) shouldExposeDNSName(svc *corev1.Service) bool {
|
||||
return a.hasExposeAnnotation(svc) && svc.Spec.Type == corev1.ServiceTypeExternalName && svc.Spec.ExternalName != ""
|
||||
}
|
||||
|
||||
func (a *ServiceReconciler) hasLoadBalancerClass(svc *corev1.Service) bool {
|
||||
return svc != nil &&
|
||||
svc.Spec.Type == corev1.ServiceTypeLoadBalancer &&
|
||||
|
||||
@@ -373,6 +373,30 @@ func (i *iptablesRunner) DNATNonTailscaleTraffic(tun string, dst netip.Addr) err
|
||||
return table.Insert("nat", "PREROUTING", 1, "!", "-i", tun, "-j", "DNAT", "--to-destination", dst.String())
|
||||
}
|
||||
|
||||
// DNATWithLoadBalancer adds DNAT rules to load balance all incoming traffic NOT
|
||||
// destined to tailscale0 interface to provided destinations using round robin.
|
||||
// NB: this function clears the nat PREROUTING chain on start, so it is only
|
||||
// safe to use on systems where Tailscale is the only process that uses this
|
||||
// chain (i.e containers).
|
||||
func (i *iptablesRunner) DNATWithLoadBalancer(origDst netip.Addr, dsts []netip.Addr) error {
|
||||
table := i.getIPTByAddr(dsts[0])
|
||||
if err := table.ClearChain("nat", "PREROUTING"); err != nil && !isErrChainNotExist(err) {
|
||||
// If clearing the PREROUTING chain fails, fail the whole operation. This
|
||||
// rule is currently only used in Kubernetes containers where a
|
||||
// failed container gets restarted which should hopefully fix things.
|
||||
return fmt.Errorf("error clearing nat PREROUTING chain: %w", err)
|
||||
}
|
||||
// If dsts contain more than one address, for n := n in range(len(dsts)..2) route packets for every nth connection to dsts[n].
|
||||
for i := len(dsts); i >= 2; i-- {
|
||||
dst := dsts[i-1] // the order in which rules for addrs are installed does not matter
|
||||
if err := table.Append("nat", "PREROUTING", "--destination", origDst.String(), "-m", "statistic", "--mode", "nth", "--every", fmt.Sprint(i), "--packet", "0", "-j", "DNAT", "--to-destination", dst.String()); err != nil {
|
||||
return fmt.Errorf("error adding DNAT rule for %s: %w", dst.String(), err)
|
||||
}
|
||||
}
|
||||
// If the packet falls through to this rule, we route to the first destination in the list unconditionally.
|
||||
return table.Append("nat", "PREROUTING", "--destination", origDst.String(), "-j", "DNAT", "--to-destination", dsts[0].String())
|
||||
}
|
||||
|
||||
func (i *iptablesRunner) ClampMSSToPMTU(tun string, addr netip.Addr) error {
|
||||
table := i.getIPTByAddr(addr)
|
||||
return table.Append("mangle", "FORWARD", "-o", tun, "-p", "tcp", "--tcp-flags", "SYN,RST", "SYN", "-j", "TCPMSS", "--clamp-mss-to-pmtu")
|
||||
|
||||
@@ -16,6 +16,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/google/nftables"
|
||||
"github.com/google/nftables/binaryutil"
|
||||
"github.com/google/nftables/expr"
|
||||
"golang.org/x/sys/unix"
|
||||
"tailscale.com/net/tsaddr"
|
||||
@@ -114,7 +115,6 @@ func (n *nftablesRunner) AddDNATRule(origDst netip.Addr, dst netip.Addr) error {
|
||||
dadderLen = 16
|
||||
fam = unix.NFPROTO_IPV6
|
||||
}
|
||||
|
||||
dnatRule := &nftables.Rule{
|
||||
Table: nat,
|
||||
Chain: preroutingCh,
|
||||
@@ -145,6 +145,91 @@ func (n *nftablesRunner) AddDNATRule(origDst netip.Addr, dst netip.Addr) error {
|
||||
return n.conn.Flush()
|
||||
}
|
||||
|
||||
// This function does set up nftables rules to load balance traffic to the
|
||||
// backend targets as expected. However, if the same client makes frequent
|
||||
// connections, the connections are frequently dropped. TODO (irbekrm):
|
||||
// investigate why the connections are dropped.
|
||||
func (n *nftablesRunner) DNATWithLoadBalancer(origDst netip.Addr, dsts []netip.Addr) error {
|
||||
nat, preroutingCh, err := n.ensurePreroutingChain(dsts[0])
|
||||
if err != nil {
|
||||
return fmt.Errorf("error ensuring PREROUTING chain in nat table: %w", err)
|
||||
}
|
||||
|
||||
// Figure out if we are dealing with IPv4 or IPv6 addresses and set
|
||||
// parameters accordingly.
|
||||
var (
|
||||
dstsMapValType = nftables.TypeIPAddr
|
||||
origDstIPHeaderOffset uint32 = 16
|
||||
origDstIPHeaderLen uint32 = 4
|
||||
fam = nftables.TableFamilyIPv4
|
||||
)
|
||||
if dsts[0].Is6() {
|
||||
dstsMapValType = nftables.TypeIP6Addr
|
||||
origDstIPHeaderOffset = 24
|
||||
origDstIPHeaderLen = 16
|
||||
fam = nftables.TableFamilyIPv6
|
||||
}
|
||||
|
||||
mapElements := make([]nftables.SetElement, len(dsts))
|
||||
for i, addr := range dsts {
|
||||
mapElements[i] = nftables.SetElement{
|
||||
Key: binaryutil.BigEndian.PutUint32(uint32(i)),
|
||||
Val: addr.AsSlice(),
|
||||
}
|
||||
}
|
||||
dstsMap := &nftables.Set{
|
||||
Table: nat,
|
||||
KeyByteOrder: binaryutil.NativeEndian,
|
||||
KeyType: nftables.TypeInteger,
|
||||
DataType: dstsMapValType,
|
||||
IsMap: true,
|
||||
Anonymous: true,
|
||||
Constant: true, // Anonymous sets must be constant (unmodifiable)
|
||||
|
||||
}
|
||||
if err := n.conn.AddSet(dstsMap, mapElements); err != nil {
|
||||
return fmt.Errorf("error creating a new map: %w", err)
|
||||
}
|
||||
|
||||
dnatRule := &nftables.Rule{
|
||||
Table: nat,
|
||||
Chain: preroutingCh,
|
||||
Exprs: []expr.Any{
|
||||
&expr.Payload{
|
||||
DestRegister: 1,
|
||||
Base: expr.PayloadBaseNetworkHeader,
|
||||
Offset: origDstIPHeaderOffset,
|
||||
Len: origDstIPHeaderLen,
|
||||
},
|
||||
&expr.Cmp{
|
||||
Op: expr.CmpOpEq,
|
||||
Register: 1,
|
||||
Data: origDst.AsSlice(),
|
||||
},
|
||||
&expr.Numgen{
|
||||
Register: 1,
|
||||
Type: unix.NFT_NG_INCREMENTAL,
|
||||
Modulus: uint32(len(dsts)),
|
||||
Offset: 0,
|
||||
},
|
||||
&expr.Lookup{
|
||||
SourceRegister: 1,
|
||||
DestRegister: 2,
|
||||
SetName: dstsMap.Name,
|
||||
SetID: dstsMap.ID,
|
||||
IsDestRegSet: true,
|
||||
},
|
||||
&expr.NAT{
|
||||
Type: expr.NATTypeDestNAT,
|
||||
Family: uint32(fam),
|
||||
RegAddrMin: 2,
|
||||
},
|
||||
},
|
||||
}
|
||||
n.conn.InsertRule(dnatRule)
|
||||
return n.conn.Flush()
|
||||
}
|
||||
|
||||
func (n *nftablesRunner) DNATNonTailscaleTraffic(tunname string, dst netip.Addr) error {
|
||||
nat, preroutingCh, err := n.ensurePreroutingChain(dst)
|
||||
if err != nil {
|
||||
@@ -524,6 +609,14 @@ type NetfilterRunner interface {
|
||||
// to the provided destination, as used in the Kubernetes ingress proxies.
|
||||
AddDNATRule(origDst, dst netip.Addr) error
|
||||
|
||||
// DNATWithLoadBalancer adds a rule to the nat/PREROUTING chain to DNAT
|
||||
// traffic destined for the given original destination to the given new
|
||||
// destination(s) using round robin to load balance if more than one
|
||||
// destination is provided. This is used to forward all traffic destined
|
||||
// for the Tailscale interface to the provided destination(s), as used
|
||||
// in the Kubernetes ingress proxies.
|
||||
DNATWithLoadBalancer(origDst netip.Addr, dsts []netip.Addr) error
|
||||
|
||||
// AddSNATRuleForDst adds a rule to the nat/POSTROUTING chain to SNAT
|
||||
// traffic destined for dst to src.
|
||||
// This is used to forward traffic destined for the local machine over
|
||||
@@ -533,7 +626,7 @@ type NetfilterRunner interface {
|
||||
// DNATNonTailscaleTraffic adds a rule to the nat/PREROUTING chain to DNAT
|
||||
// all traffic inbound from any interface except exemptInterface to dst.
|
||||
// This is used to forward traffic destined for the local machine over
|
||||
// the Tailscale interface, as used in the Kubernetes egress proxies.//
|
||||
// the Tailscale interface, as used in the Kubernetes egress proxies.
|
||||
DNATNonTailscaleTraffic(exemptInterface string, dst netip.Addr) error
|
||||
|
||||
// ClampMSSToPMTU adds a rule to the mangle/FORWARD chain to clamp MSS for
|
||||
|
||||
Reference in New Issue
Block a user