京东云开发者|IoT运维 - 如何部署一套高可用K8S集群( 四 )

安装calico
# kubectlapply -f calico-etcd.yaml验证集群
[root@deploy ~]# kubectlgetpod -n kube-systemNAMEREADYSTATUSRESTARTSAGEcalico-kube-controllers-9767fc4b9-tk9fb1/1Running06m56scalico-node-5mc9h1/1Running06m56scalico-node-dswmp1/1Running06m56scalico-node-qht2s1/1Running06m56scalico-node-sdrcg1/1Running06m56scalico-node-x58lj1/1Running06m56scoredns-7f6cbbb7b8-fc8rd1/1Running061mcoredns-7f6cbbb7b8-qvw2m1/1Running061mkube-apiserver-master11/1Running294mkube-apiserver-master21/1Running066mkube-apiserver-master31/1Running064mkube-controller-manager-master11/1Running294mkube-controller-manager-master21/1Running066mkube-controller-manager-master31/1Running064mkube-proxy-bscfn1/1Running062mkube-proxy-f2fpb1/1Running064mkube-proxy-kt7nl1/1Running066mkube-proxy-lzww81/1Running062mkube-proxy-zn6gj1/1Running294mkube-scheduler-master11/1Running294mkube-scheduler-master21/1Running066mkube-scheduler-master31/1Running064m问题与解决1 、 kubelet日报错 failed to get cgroup stats for "/system.slice/kubelet.service"
11月 18 09:00:42 master1 kubelet[2424]: E1118 09:00:42.9486722424 summary_sys_containers.go:47] "Failed to get system container stats" err="failed to get cgroup stats for \"/system.slice/kubelet.service\": failed to get container info for \"/system.slice/kubelet.service\": unknown container \"/system.slice/kubelet.service\"" containerName="/system.slice/kubelet.service"11月 18 09:00:52 master1 kubelet[2424]: E1118 09:00:52.9561422424 summary_sys_containers.go:47] "Failed to get system container stats" err="failed to get cgroup stats for \"/system.slice/kubelet.service\": failed to get container info for \"/system.slice/kubelet.service\": unknown container \"/system.slice/kubelet.service\"" containerName="/system.slice/kubelet.service"11月 18 09:01:02 master1 kubelet[2424]: E1118 09:01:02.9610222424 summary_sys_containers.go:47] "Failed to get system container stats" err="failed to get cgroup stats for \"/system.slice/kubelet.service\": failed to get container info for \"/system.slice/kubelet.service\": unknown container \"/system.slice/kubelet.service\"" containerName="/system.slice/kubelet.service"11月 18 09:01:12 master1 kubelet[2424]: E1118 09:01:12.9660332424 summary_sys_containers.go:47] "Failed to get system container stats" err="failed to get cgroup stats for \"/system.slice/kubelet.service\": failed to get container info for \"/system.slice/kubelet.service\": unknown container \"/system.slice/kubelet.service\"" containerName="/system.slice/kubelet.service"11月 18 09:01:22 master1 kubelet[2424]: E1118 09:01:22.9706442424 summary_sys_containers.go:47] "Failed to get system container stats" err="failed to get cgroup stats for \"/system.slice/kubelet.service\": failed to get container info for \"/system.slice/kubelet.service\": unknown container \"/system.slice/kubelet.service\"" containerName="/system.slice/kubelet.service"解决方案
配置文件中写入 CPUAccounting=true 与 MemoryAccounting=true
[root@master2 ~]# cat /lib/systemd/system/kubelet.service.d/10-kubeadm.conf# Note: This dropin only works with kubeadm and kubelet v1.11+[Service]CPUAccounting=trueMemoryAccounting=trueEnvironment="KUBELET_KUBECONFIG_ARGS=--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf"Environment="KUBELET_CONFIG_ARGS=--config=/var/lib/kubelet/config.yaml"# This is a file that "kubeadm init" and "kubeadm join" generates at runtime, populating the KUBELET_KUBEADM_ARGS variable dynamicallyEnvironmentFile=-/var/lib/kubelet/kubeadm-flags.env# This is a file that the user can use for overrides of the kubelet args as a last resort. Preferably, the user should use# the .NodeRegistration.KubeletExtraArgs object in the configuration files instead. KUBELET_EXTRA_ARGS should be sourced from this file.EnvironmentFile=-/etc/sysconfig/kubeletExecStart=ExecStart=/usr/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_KUBEADM_ARGS $KUBELET_EXTRA_ARGS2、kubectl get cs 提示 dial tcp 127.0.0.1:10251: connect: connection refused
[root@deploy ~]# kubectlget csWarning: v1 ComponentStatus is deprecated in v1.19+NAMESTATUSMESSAGEERRORschedulerUnhealthyGet "http://127.0.0.1:10251/healthz": dial tcp 127.0.0.1:10251: connect: connection refusedetcd-1Healthy{"health":"true","reason":""}controller-managerHealthyoketcd-0Healthy{"health":"true","reason":""}etcd-2Healthy{"health":"true","reason":""}解决方案
注释 port=0
[root@master1 ~]# cat /etc/kubernetes/manifests/kube-scheduler.yamlapiVersion: v1kind: Podmetadata:creationTimestamp: nulllabels:component: kube-schedulertier: control-planename: kube-schedulernamespace: kube-systemspec:containers:- command:- kube-scheduler- --authentication-kubeconfig=/etc/kubernetes/scheduler.conf- --authorization-kubeconfig=/etc/kubernetes/scheduler.conf- --bind-address=127.0.0.1- --kubeconfig=/etc/kubernetes/scheduler.conf- --leader-elect=true#- --port=0image: registry.aliyuncs.com/google_containers/kube-scheduler:v1.22.3imagePullPolicy: IfNotPresent

推荐阅读