部署有3个节点的 K8S 集群

0x00 前言

此系列的议题是：研究容器集群网络，以及硬件加速的可行性。

我无意把这个系列写成从入门到精通这类教程，除非觉得有必要，否则不会对涉及到的基础知识（命令行、容器基础、VxLan网络等知识点）进行阐述，而是打算写一个学习研究过程的日记，供日后温习回忆参考。

本文是此系列的第一期，主要流水账式的部署教程，关于如何部署一个3个节点的K8S集群，网络插件选用的是flannel。本文的所有操作均基于这篇文章继续进行的，进行部署操作前务必阅读前文。另外，本文的0x01~5章节只贴出了节点1上的操作关键步骤，但是这些步骤在3个节点上都需要做！

0x01 升级系统内核版本

# 查看当前内核版本
[root@node-01 ~]# uname -r
3.10.0-1160.42.2.el7.x86_64

# 配置ELRepo源
[root@node-01 ~]# rpm --import https://www.elrepo.org/RPM-GPG-KEY-elrepo.org
[root@node-01 ~]# yum install https://www.elrepo.org/elrepo-release-7.el7.elrepo.noarch.rpm

# 查看源仓，长期维护内核版本为5.4，因此更新到5.4版本的内核
[root@node-01 ~]# yum --enablerepo=elrepo-kernel list kernel-lt*

# 安装新版内核时出现软件报冲突的错误，根据提示删除老版（3.10.0）的软件包
[root@node-01 ~]# yum remove kernel-tools kernel-tools-libs

# 尝试重新安装，一切顺利
[root@node-01 ~]# yum --enablerepo=elrepo-kernel install kernel-lt*

# 设置新装内核为系统默认启动内核
[root@node-01 ~]# cat /boot/grub2/grub.cfg | grep menuentry
[root@node-01 ~]# grub2-set-default 'Red Hat Enterprise Linux Server (5.4.152-1.el7.elrepo.x86_64) 7.9 (Maipo)'
[root@node-01 ~]# grub2-editenv list
saved_entry=Red Hat Enterprise Linux Server (5.4.152-1.el7.elrepo.x86_64) 7.9 (Maipo)

# 重启以使用最新版内核
[root@node-01 ~]# reboot
[root@node-01 ~]# uname -r
5.4.152-1.el7.elrepo.x86_64

0x02 开启IPVS支持

# 准备开启IPVS的脚本
[root@node-01 ~]# cat /etc/sysconfig/modules/ipvs.modules 
#!/bin/bash
ipvs_modules="ip_vs ip_vs_lc ip_vs_wlc ip_vs_rr ip_vs_wrr ip_vs_lblc ip_vs_lblcr ip_vs_dh ip_vs_sh ip_vs_fo ip_vs_nq ip_vs_sed ip_vs_ftp nf_conntrack"
for kernel_module in ${ipvs_modules}; do
  /sbin/modinfo -F filename ${kernel_module} > /dev/null 2>&1
  if [ $? -eq 0 ]; then
    /sbin/modprobe ${kernel_module}
  fi
done

# 运行并检查结果
[root@node-01 ~]# chmod +x /etc/sysconfig/modules/ipvs.modules 
[root@node-01 ~]# bash /etc/sysconfig/modules/ipvs.modules 
[root@node-01 ~]# lsmod | grep ip_vs
ip_vs_ftp              16384  0 
nf_nat                 40960  1 ip_vs_ftp
ip_vs_sed              16384  0 
ip_vs_nq               16384  0 
ip_vs_fo               16384  0 
ip_vs_sh               16384  0 
ip_vs_dh               16384  0 
ip_vs_lblcr            16384  0 
ip_vs_lblc             16384  0 
ip_vs_wrr              16384  0 
ip_vs_rr               16384  0 
ip_vs_wlc              16384  0 
ip_vs_lc               16384  0 
ip_vs                 155648  24 ip_vs_wlc,ip_vs_rr,ip_vs_dh,ip_vs_lblcr,ip_vs_sh,ip_vs_fo,ip_vs_nq,ip_vs_lblc,ip_vs_wrr,ip_vs_lc,ip_vs_sed,ip_vs_ftp
nf_conntrack          147456  2 nf_nat,ip_vs
nf_defrag_ipv6         24576  2 nf_conntrack,ip_vs
libcrc32c              16384  4 nf_conntrack,nf_nat,xfs,ip_vs

0x03 禁用交换分区，启用内核转发

# 禁用交换分区
[root@node-01 ~]# swapoff -a
[root@node-01 ~]# vim /etc/fstab 
[root@node-01 ~]# cat /etc/fstab 
#
# /etc/fstab
# Created by anaconda on Tue Oct 12 15:34:19 2021
#
# Accessible filesystems, by reference, are maintained under '/dev/disk'
# See man pages fstab(5), findfs(8), mount(8) and/or blkid(8) for more info
#
/dev/mapper/rhel-root   /                       xfs     defaults        0 0
UUID=6c16024f-882a-43f3-9144-0ef8cdfc1f80 /boot                   xfs     defaults        0 0
/dev/mapper/rhel-home   /home                   xfs     defaults        0 0
#/dev/mapper/rhel-swap   swap                    swap    defaults        0 0

# 启用内核转发
[root@node-01 ~]# vim /etc/sysctl.conf 
[root@node-01 ~]# cat /etc/sysctl.conf 
# sysctl settings are defined through files in
# /usr/lib/sysctl.d/, /run/sysctl.d/, and /etc/sysctl.d/.
#
# Vendors settings live in /usr/lib/sysctl.d/.
# To override a whole file, create a new file with the same in
# /etc/sysctl.d/ and put new settings there. To override
# only specific settings, add a file with a lexically later
# name in /etc/sysctl.d/ and put new settings there.
#
# For more information, see sysctl.conf(5) and sysctl.d(5).

vm.swappiness = 0
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-ip6tables = 1
[root@node-01 ~]# sysctl -a | grep vm.swappiness
sysctl: reading key "net.ipv6.conf.all.stable_secret"
sysctl: reading key "net.ipv6.conf.default.stable_secret"
sysctl: reading key "net.ipv6.conf.ens192.stable_secret"
sysctl: reading key "net.ipv6.conf.lo.stable_secret"
vm.swappiness = 30

# 使设置生效并进行检查
[root@node-01 ~]# sysctl -p
vm.swappiness = 0
sysctl: cannot stat /proc/sys/net/bridge/bridge-nf-call-iptables: No such file or directory
net.ipv4.ip_forward = 1
sysctl: cannot stat /proc/sys/net/bridge/bridge-nf-call-ip6tables: No such file or directory
[root@node-01 ~]# sysctl -a | grep vm.swappiness
sysctl: reading key "net.ipv6.conf.all.stable_secret"
sysctl: reading key "net.ipv6.conf.default.stable_secret"
sysctl: reading key "net.ipv6.conf.ens192.stable_secret"
sysctl: reading key "net.ipv6.conf.lo.stable_secret"
vm.swappiness = 0

0x04 安装并配置Docker容器环境

# 安装Docker社区版软件
[root@node-01 ~]# yum-config-manager --add-repo https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
[root@node-01 ~]# sed -i 's+download.docker.com+mirrors.aliyun.com/docker-ce+' /etc/yum.repos.d/docker-ce.repo
[root@node-01 ~]# sed -i 's/\$releasever/7/g' /etc/yum.repos.d/docker-ce.repo 
[root@node-01 ~]# sed -i 's/\$basearch/x86_64/g' /etc/yum.repos.d/docker-ce.repo 
[root@node-01 ~]# yum makecache fast
[root@node-01 ~]# yum -y install docker-ce
[root@node-01 ~]# systemctl enable docker
Created symlink from /etc/systemd/system/multi-user.target.wants/docker.service to /usr/lib/systemd/system/docker.service.
[root@node-01 ~]# systemctl restart docker
[root@node-01 ~]# docker version
Client: Docker Engine - Community
 Version:           20.10.9
 API version:       1.41
 Go version:        go1.16.8
 Git commit:        c2ea9bc
 Built:             Mon Oct  4 16:08:14 2021
 OS/Arch:           linux/amd64
 Context:           default
 Experimental:      true

Server: Docker Engine - Community
 Engine:
  Version:          20.10.9
  API version:      1.41 (minimum version 1.12)
  Go version:       go1.16.8
  Git commit:       79ea9d3
  Built:            Mon Oct  4 16:06:37 2021
  OS/Arch:          linux/amd64
  Experimental:     false
 containerd:
  Version:          1.4.11
  GitCommit:        5b46e404f6b9f661a205e28d59c982d3634148f8
 runc:
  Version:          1.0.2
  GitCommit:        v1.0.2-0-g52b36a2
 docker-init:
  Version:          0.19.0
  GitCommit:        de40ad0

# 使用阿里云的镜像加速器
# 指定Docker的cgroupdriver为systemd，否则与kubelet不一致会导致k8s初始化失败，kubelet的服务启动出错
[root@node-01 ~]# mkdir -p /etc/docker
[root@node-01 ~]# tee /etc/docker/daemon.json <<-'EOF'
{
  "registry-mirrors": ["https://******.mirror.aliyuncs.com"],
  "exec-opts": ["native.cgroupdriver=systemd"]
}
EOF
[root@node-01 ~]# systemctl daemon-reload
[root@node-01 ~]# systemctl restart docker
[root@node-01 ~]# docker info
Client:
 Context:    default
 Debug Mode: false
 Plugins:
  app: Docker App (Docker Inc., v0.9.1-beta3)
  buildx: Build with BuildKit (Docker Inc., v0.6.3-docker)
  scan: Docker Scan (Docker Inc., v0.8.0)

Server:
 Containers: 17
  Running: 16
  Paused: 0
  Stopped: 1
 Images: 12
 Server Version: 20.10.9
 Storage Driver: overlay2
  Backing Filesystem: xfs
  Supports d_type: true
  Native Overlay Diff: true
  userxattr: false
 Logging Driver: json-file
 Cgroup Driver: systemd
 Cgroup Version: 1
 Plugins:
  Volume: local
  Network: bridge host ipvlan macvlan null overlay
  Log: awslogs fluentd gcplogs gelf journald json-file local logentries splunk syslog
 Swarm: inactive
 Runtimes: runc io.containerd.runc.v2 io.containerd.runtime.v1.linux
 Default Runtime: runc
 Init Binary: docker-init
 containerd version: 5b46e404f6b9f661a205e28d59c982d3634148f8
 runc version: v1.0.2-0-g52b36a2
 init version: de40ad0
 Security Options:
  seccomp
   Profile: default
 Kernel Version: 5.4.152-1.el7.elrepo.x86_64
 Operating System: Red Hat Enterprise Linux Server 7.9 (Maipo)
 OSType: linux
 Architecture: x86_64
 CPUs: 8
 Total Memory: 7.776GiB
 Name: node-01.open-source.cc
 ID: OVI7:CEM3:66VC:TIYE:LCJU:TILS:UTEA:OTDW:C3CA:22VS:FLK5:OOKJ
 Docker Root Dir: /var/lib/docker
 Debug Mode: false
 Registry: https://index.docker.io/v1/
 Labels:
 Experimental: false
 Insecure Registries:
  127.0.0.0/8
 Registry Mirrors:
  https://******.mirror.aliyuncs.com/
 Live Restore Enabled: false

0x05 安装K8S软件包

[root@node-01 ~]# cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=0
repo_gpgcheck=0
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
[root@node-01 ~]# yum install -y kubelet kubeadm kubectl
[root@node-01 ~]# systemctl enable kubelet 
[root@node-01 ~]# systemctl restart kubelet

0x06 K8S集群的初始化与配置

前面5个小节罗列的是其中一个节点的操作步骤，但需要在集群中的3个节点都执行，此小节会列出3个节点上的全部操作步骤。

# 导出默认的初始化配置，定制修改kubeadm初始化文件
[root@node-01 ~]# kubeadm config print init-defaults
apiVersion: kubeadm.k8s.io/v1beta3
bootstrapTokens:
- groups:
  - system:bootstrappers:kubeadm:default-node-token
  token: abcdef.0123456789abcdef
  ttl: 24h0m0s
  usages:
  - signing
  - authentication
kind: InitConfiguration
localAPIEndpoint:
  advertiseAddress: 1.2.3.4
  bindPort: 6443
nodeRegistration:
  criSocket: /var/run/dockershim.sock
  imagePullPolicy: IfNotPresent
  name: node
  taints: null
---
apiServer:
  timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta3
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controllerManager: {}
dns: {}
etcd:
  local:
    dataDir: /var/lib/etcd
imageRepository: k8s.gcr.io
kind: ClusterConfiguration
kubernetesVersion: 1.22.0
networking:
  dnsDomain: cluster.local
  serviceSubnet: 10.96.0.0/12
scheduler: {}
[root@node-01 ~]# kubeadm config print init-defaults > kubeadm.yaml
# 修改后的配置文件内容
[root@node-01 ~]# cat kubeadm.yaml 
apiVersion: kubeadm.k8s.io/v1beta3
bootstrapTokens:
- groups:
  - system:bootstrappers:kubeadm:default-node-token
  token: sresre.0123456789012345
  ttl: 24h0m0s
  usages:
  - signing
  - authentication
kind: InitConfiguration
localAPIEndpoint:
  advertiseAddress: 192.168.4.101
  bindPort: 6443
nodeRegistration:
  criSocket: /var/run/dockershim.sock
  imagePullPolicy: IfNotPresent
  name: node-01.open-source.cc
  taints: null
---
apiServer:
  timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta3
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controllerManager: {}
dns: {}
etcd:
  local:
    dataDir: /var/lib/etcd
imageRepository: k8s.gcr.io
kind: ClusterConfiguration
kubernetesVersion: 1.22.0
networking:
  dnsDomain: cluster.local
  serviceSubnet: 10.96.0.0/12
  podSubnet: 10.244.0.0/16
scheduler: {}

# 使用kubeadm与准备好的初始化配置文件，进行集群master节点初始化
[root@node-01 ~]# kubeadm init --config=kubeadm.yaml
[init] Using Kubernetes version: v1.22.0
[preflight] Running pre-flight checks
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'

......
# 省略
......

[kubelet-finalize] Updating "/etc/kubernetes/kubelet.conf" to point to a rotatable kubelet client certificate and key
[addons] Applied essential addon: CoreDNS
[addons] Applied essential addon: kube-proxy

Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

Alternatively, if you are the root user, you can run:

  export KUBECONFIG=/etc/kubernetes/admin.conf

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  https://kubernetes.io/docs/concepts/cluster-administration/addons/

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 192.168.4.101:6443 --token sresre.0123456789012345 \
    --discovery-token-ca-cert-hash sha256:6a747449922a0132067b66989debf321a3a49e0a00a705891e34dc06eb1636e2 
[root@node-01 ~]# mkdir -p $HOME/.kube
[root@node-01 ~]# cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
[root@node-01 ~]# chown $(id -u):$(id -g) $HOME/.kube/config
[root@node-01 ~]# ll -h .kube/
total 8.0K
-rw------- 1 root root 5.6K Oct 14 09:38 config

# 部署flannel网络插件
[root@node-01 ~]# wget https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
[root@node-01 ~]# kubectl apply -f kube-flannel.yml

# 在节点2侧执行加入集群的命令
[root@node-02 ~]# kubeadm join 192.168.4.101:6443 --token sresre.0123456789012345 \
> --discovery-token-ca-cert-hash sha256:6a747449922a0132067b66989debf321a3a49e0a00a705891e34dc06eb1636e2
[preflight] Running pre-flight checks
[preflight] Reading configuration from the cluster...
[preflight] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -o yaml'
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Starting the kubelet
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap...

This node has joined the cluster:
* Certificate signing request was sent to apiserver and a response was received.
* The Kubelet was informed of the new secure connection details.

Run 'kubectl get nodes' on the control-plane to see this node join the cluster.

[root@node-02 ~]# mkdir -p $HOME/.kube
[root@node-02 ~]# scp root@node-01:/etc/kubernetes/admin.conf $HOME/.kube/config
[root@node-02 ~]# chown $(id -u):$(id -g) $HOME/.kube/config

# 在节点3侧执行加入集群的命令
[root@node-03 ~]# kubeadm join 192.168.4.101:6443 --token sresre.0123456789012345 \
> --discovery-token-ca-cert-hash sha256:6a747449922a0132067b66989debf321a3a49e0a00a705891e34dc06eb1636e2
[preflight] Running pre-flight checks
[preflight] Reading configuration from the cluster...
[preflight] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -o yaml'
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Starting the kubelet
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap...

This node has joined the cluster:
* Certificate signing request was sent to apiserver and a response was received.
* The Kubelet was informed of the new secure connection details.

Run 'kubectl get nodes' on the control-plane to see this node join the cluster.

[root@node-03 ~]# mkdir -p $HOME/.kube
[root@node-03 ~]# scp root@node-01:/etc/kubernetes/admin.conf $HOME/.kube/config
[root@node-03 ~]# chown $(id -u):$(id -g) $HOME/.kube/config

# 在节点1侧检查集群状态，一切正常，部署完毕
[root@node-01 ~]# kubectl get nodes
NAME                     STATUS   ROLES                  AGE     VERSION
node-01.open-source.cc   Ready    control-plane,master   10m     v1.22.2
node-02.open-source.cc   Ready    <none>                 9m21s   v1.22.2
node-03.open-source.cc   Ready    <none>                 8m53s   v1.22.2

# 为集群内的3个节点配置命令行补全功能
[root@node-01 ~]# echo "source <(kubectl completion bash)" >> ~/.bashrc
[root@node-02 ~]# echo "source <(kubectl completion bash)" >> ~/.bashrc
[root@node-03 ~]# echo "source <(kubectl completion bash)" >> ~/.bashrc

Kubernetes

部署有3个节点的 K8S 集群

https://srezone.open-space.cc/article/3063225416.html

作者

Richard Li

发布于

2021年11月9日

许可协议

NAS_0x01_硬盘分配上一篇

个人实验环境的基础配置下一篇