kube-proxy细节分析

xiaoxiao2021-02-28  16

其实kube-proxy的代码本身并不复杂,只是有个细节容易被大家忽略,大家可能都知道它有轮询的复杂均衡策略,是通过iptables实现的,那它是怎样控制平均转发的呢?iptables有个random的模块支持,那怎样控制权重呢? 看代码,一步一步分析

{ tablesNeedServicesChain := []utiliptables.Table{utiliptables.TableFilter, utiliptables.TableNAT} for _, table := range tablesNeedServicesChain { if _, err := proxier.iptables.EnsureChain(table, kubeServicesChain); err != nil { glog.Errorf("Failed to ensure that %s chain %s exists: %v", table, kubeServicesChain, err) return } } tableChainsNeedJumpServices := []struct { table utiliptables.Table chain utiliptables.Chain }{ {utiliptables.TableFilter, utiliptables.ChainInput}, {utiliptables.TableFilter, utiliptables.ChainOutput}, {utiliptables.TableNAT, utiliptables.ChainOutput}, {utiliptables.TableNAT, utiliptables.ChainPrerouting}, } comment := "kubernetes service portals" args := []string{"-m", "comment", "--comment", comment, "-j", string(kubeServicesChain)} for _, tc := range tableChainsNeedJumpServices { if _, err := proxier.iptables.EnsureRule(utiliptables.Prepend, tc.table, tc.chain, args...); err != nil { glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", tc.table, tc.chain, kubeServicesChain, err) return } } }

首先是建立filter表的INPUT/OUTPUT和nat表的OUTPUT/PREROUTE规则全部跳转到service链 效果如下:

-A OUTPUT -m comment --comment "kubernetes service portals" -j KUBE-SERVICES -A PREROUTING -m comment --comment "kubernetes service portals" -j KUBE-SERVICES -A OUTPUT -m comment --comment "kubernetes service portals" -j KUBE-SERVICES

这样出去的流量都会被service的链截获了

当然如果有些流量需要通过SNAT出去

{ if _, err := proxier.iptables.EnsureChain(utiliptables.TableNAT, kubePostroutingChain); err != nil { glog.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableNAT, kubePostroutingChain, err) return } comment := "kubernetes postrouting rules" args := []string{"-m", "comment", "--comment", comment, "-j", string(kubePostroutingChain)} if _, err := proxier.iptables.EnsureRule(utiliptables.Prepend, utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil { glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", utiliptables.TableNAT, utiliptables.ChainPostrouting, kubePostroutingChain, err) return } }

效果如下:

-A POSTROUTING -m comment --comment "kubernetes postrouting rules" -j KUBE-POSTROUTING -A KUBE-POSTROUTING -m comment --comment "kubernetes service traffic requiring SNAT" -m mark --mark 0x4000/0x4000 -j MASQUERADE

现在开始建立kubernetes proxy的各个链

writeLine(proxier.filterChains, "*filter") writeLine(proxier.natChains, "*nat") // Make sure we keep stats for the top-level chains, if they existed // (which most should have because we created them above). if chain, ok := existingFilterChains[kubeServicesChain]; ok { writeLine(proxier.filterChains, chain) } else { writeLine(proxier.filterChains, utiliptables.MakeChainLine(kubeServicesChain)) } if chain, ok := existingNATChains[kubeServicesChain]; ok { writeLine(proxier.natChains, chain) } else { writeLine(proxier.natChains, utiliptables.MakeChainLine(kubeServicesChain)) } if chain, ok := existingNATChains[kubeNodePortsChain]; ok { writeLine(proxier.natChains, chain) } else { writeLine(proxier.natChains, utiliptables.MakeChainLine(kubeNodePortsChain)) } if chain, ok := existingNATChains[kubePostroutingChain]; ok { writeLine(proxier.natChains, chain) } else { writeLine(proxier.natChains, utiliptables.MakeChainLine(kubePostroutingChain)) } if chain, ok := existingNATChains[KubeMarkMasqChain]; ok { writeLine(proxier.natChains, chain) } else { writeLine(proxier.natChains, utiliptables.MakeChainLine(KubeMarkMasqChain)) }

这个里面创建KUBE-SERVICES、KUBE-NODEPORTS、KUBE-POSTROUTING、KUBE-MARK-MASQ

通过kubernetes创建的service会分配一个clusterIP,这些clusterIP是在iptables上面实现的

args := []string{ "-A", string(kubeServicesChain), "-m", "comment", "--comment", fmt.Sprintf(`"%s cluster IP"`, svcNameString), "-m", protocol, "-p", protocol, "-d", fmt.Sprintf("%s/32", svcInfo.clusterIP.String()), "--dport", fmt.Sprintf("%d", svcInfo.port), } if proxier.masqueradeAll { writeLine(proxier.natRules, append(args, "-j", string(KubeMarkMasqChain))...) } if len(proxier.clusterCIDR) > 0 { writeLine(proxier.natRules, append(args, "! -s", proxier.clusterCIDR, "-j", string(KubeMarkMasqChain))...) } writeLine(proxier.natRules, append(args, "-j", string(svcChain))...)

上面就是截获clusterIP的流量做DNAT,这里面需要补充的就是如果一个服务后面有多个endpoint的,

for i, endpointChain := range endpointChains { // Balancing rules in the per-service chain. args := []string{ "-A", string(svcChain), "-m", "comment", "--comment", svcNameString, } if i < (n - 1) { // Each rule is a probabilistic match. args = append(args, "-m", "statistic", "--mode", "random", "--probability", fmt.Sprintf("%0.5f", 1.0/float64(n-i))) } // The final (or only if n == 1) rule is a guaranteed match. args = append(args, "-j", string(endpointChain)) writeLine(proxier.natRules, args...) // Rules in the per-endpoint chain. args = []string{ "-A", string(endpointChain), "-m", "comment", "--comment", svcNameString, } // Handle traffic that loops back to the originator with SNAT. writeLine(proxier.natRules, append(args, "-s", fmt.Sprintf("%s/32", strings.Split(endpoints[i].endpoint, ":")[0]), "-j", string(KubeMarkMasqChain))...) // Update client-affinity lists. if svcInfo.sessionAffinityType == api.ServiceAffinityClientIP { args = append(args, "-m", "recent", "--name", string(endpointChain), "--set") } // DNAT to final destination. args = append(args, "-m", protocol, "-p", protocol, "-j", "DNAT", "--to-destination", endpoints[i].endpoint) writeLine(proxier.natRules, args...) }

上面通过循环的方式创建后端endpoint的转发,概率是通过probability后的1.0/float64(n-i)计算出来的,譬如有两个的场景,那么将会是一个0.5和1也就是第一个是50%概率第二个是100%概率,如果是三个的话类似,33%、50%、100%。下面是10个endpoint的例子。

kubectl get svc --all-namespaces NAMESPACE NAME CLUSTER-IP EXTERNAL-IP PORT(S) AGE admin docker2048 10.13.52.135 11.11.1.1 80/TCP 1d [root@master-62 ~]# [root@master-62 ~]# iptables-save |grep 10.13.52.135 -A KUBE-SERVICES -d 10.13.52.135/32 -p tcp -m comment --comment "admin/docker2048:docker2048-1 cluster IP" -m tcp --dport 80 -j KUBE-SVC-MHWEDWK6NM5OGU2T [root@master-62 ~]# [root@master-62 ~]# [root@master-62 ~]# iptables-save |grep KUBE-SVC-MHWEDWK6NM5OGU2T :KUBE-SVC-MHWEDWK6NM5OGU2T - [0:0] -A KUBE-SERVICES -d 10.13.52.135/32 -p tcp -m comment --comment "admin/docker2048:docker2048-1 cluster IP" -m tcp --dport 80 -j KUBE-SVC-MHWEDWK6NM5OGU2T -A KUBE-SERVICES -d 11.11.1.1/32 -p tcp -m comment --comment "admin/docker2048:docker2048-1 external IP" -m tcp --dport 80 -m physdev ! --physdev-is-in -m addrtype ! --src-type LOCAL -j KUBE-SVC-MHWEDWK6NM5OGU2T -A KUBE-SERVICES -d 11.11.1.1/32 -p tcp -m comment --comment "admin/docker2048:docker2048-1 external IP" -m tcp --dport 80 -m addrtype --dst-type LOCAL -j KUBE-SVC-MHWEDWK6NM5OGU2T -A KUBE-SVC-MHWEDWK6NM5OGU2T -m comment --comment "admin/docker2048:docker2048-1" -m statistic --mode random --probability 0.10000000009 -j KUBE-SEP-VC767CJYOTCBCN3B -A KUBE-SVC-MHWEDWK6NM5OGU2T -m comment --comment "admin/docker2048:docker2048-1" -m statistic --mode random --probability 0.11110999994 -j KUBE-SEP-HQELSIUR5HSCB2VN -A KUBE-SVC-MHWEDWK6NM5OGU2T -m comment --comment "admin/docker2048:docker2048-1" -m statistic --mode random --probability 0.12500000000 -j KUBE-SEP-X2UDSU7Q4UA4IKY7 -A KUBE-SVC-MHWEDWK6NM5OGU2T -m comment --comment "admin/docker2048:docker2048-1" -m statistic --mode random --probability 0.14286000002 -j KUBE-SEP-DQ3TZIZIDTXU77P7 -A KUBE-SVC-MHWEDWK6NM5OGU2T -m comment --comment "admin/docker2048:docker2048-1" -m statistic --mode random --probability 0.16667000018 -j KUBE-SEP-A3JWOZYQIIDDEKNM -A KUBE-SVC-MHWEDWK6NM5OGU2T -m comment --comment "admin/docker2048:docker2048-1" -m statistic --mode random --probability 0.20000000019 -j KUBE-SEP-6EZ2MUBOPU2WH44E -A KUBE-SVC-MHWEDWK6NM5OGU2T -m comment --comment "admin/docker2048:docker2048-1" -m statistic --mode random --probability 0.25000000000 -j KUBE-SEP-4KG3GD3BQ5TCAUPR -A KUBE-SVC-MHWEDWK6NM5OGU2T -m comment --comment "admin/docker2048:docker2048-1" -m statistic --mode random --probability 0.33332999982 -j KUBE-SEP-6EXLETYC4LYB5NLM -A KUBE-SVC-MHWEDWK6NM5OGU2T -m comment --comment "admin/docker2048:docker2048-1" -m statistic --mode random --probability 0.50000000000 -j KUBE-SEP-VLQQMEFA6Y5RZLE7 -A KUBE-SVC-MHWEDWK6NM5OGU2T -m comment --comment "admin/docker2048:docker2048-1" -j KUBE-SEP-CXDZACZ7ESWWLYJM
转载请注明原文地址: https://www.6miu.com/read-800090.html

最新回复(0)