rke部署k8s高可用集群
# rke部署k8s高可用集群
Rancher Kubernetes Engine(RKE) 是一款非常简单,运行速度快的 Kubernetes 安装程序,支持各种运行平台。
github地址 (opens new window),v1.2.20 (opens new window)
下载kubelet-1.22.9 (opens new window),curl -LO https://dl.k8s.io/release/v1.22.9/bin/linux/amd64/kubectl
# 基础环境准备
主机3台
192.168.108.【101,103,105】,修改主机名:
hostnamectl set-hostname XXX
,添加host解析 配置时间同步 ntpdatesysctl优化
fs.file-max=1000000
net.ipv4.tcp_max_tw_buckets = 6000
net.ipv4.tcp_sack = 1
net.ipv4.tcp_window_scaling = 1
net.ipv4.tcp_rmem = 4096 87380 4194304
net.ipv4.tcp_wmem = 4096 16384 4194304
net.ipv4.tcp_max_syn_backlog = 16384
net.core.netdev_max_backlog = 32768
net.core.somaxconn = 32768
net.core.wmem_default = 8388608
net.core.rmem_default = 8388608
net.core.rmem_max = 16777216
net.core.wmem_max = 16777216
net.ipv4.tcp_timestamps = 1
net.ipv4.tcp_fin_timeout = 20
net.ipv4.tcp_synack_retries = 2
net.ipv4.tcp_syn_retries = 2
net.ipv4.tcp_syncookies = 1
#net.ipv4.tcp_tw_len = 1
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_mem = 94500000 915000000 927000000
net.ipv4.tcp_max_orphans = 3276800
net.ipv4.ip_local_port_range = 1024 65000
net.nf_conntrack_max = 6553500
net.netfilter.nf_conntrack_max = 6553500
net.netfilter.nf_conntrack_tcp_timeout_close_wait = 60
net.netfilter.nf_conntrack_tcp_timeout_fin_wait = 120
net.netfilter.nf_conntrack_tcp_timeout_time_wait = 120
net.netfilter.nf_conntrack_tcp_timeout_established = 3600
net.bridge.bridge-nf-call-ip6tables=1
net.bridge.bridge-nf-call-iptables=1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
sysctl -p
node节点sysctl.conf (opens new window)
- 安装ipvsadm,rke使用ipvs做路由规则
yum install -y ipvsadm
cat > /etc/sysconfig/modules/ipvs.modules <<EOF
#!/bin/bash
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack_ipv4
EOF
chmod 755 /etc/sysconfig/modules/ipvs.modules
echo 'bash /etc/sysconfig/modules/ipvs.modules' >> /etc/rc.local # 开机自动开启
#查看
bash /etc/sysconfig/modules/ipvs.modules >/dev/null 2>&1
lsmod | egrep "nf_conntrack_ipv4|ip_vs"
#重启服务器检查
reboot
lsmod | egrep "nf_conntrack_ipv4|ip_vs" # 启动完成后执行查看
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
- 安装docker,配置daemon.json
{
"oom-score-adjust": -1000,
"log-driver": "json-file",
"log-opts": {
"max-size": "100m",
"max-file": "3"
},
"max-concurrent-downloads": 10,
"max-concurrent-uploads": 10,
"registry-mirrors": ["https://registry.docker-cn.com"],
"storage-driver": "overlay2",
"insecure-registries": ["idocker.io"],
"bip": "172.40.55.0/24",
"storage-opts": [
"overlay2.override_kernel_check=true"
]
}
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
- 创建rke用户,配置用户免密
注意,这里的普通用户不建议配置密码,通过ssh-keygen
生成密钥,因为普通用户没有设置密码手动拷贝密钥(所有机器普通rke用户)
useradd rke -G docker
su - rke -c "docker ps"
# rke启动k8s集群
su - rke
mkdir -p rke-workspace && cd rke-workspace
2
# 创建cluster.yml
官方cluster.yaml示例 (opens new window)
rancher官方镜像版本说明 (opens new window)
ranhcer-v2.5对应镜像版本 (opens new window)
提前下载好镜像
vi cluster.yml
# If you intened to deploy Kubernetes in an air-gapped environment,
# please consult the documentation on how to configure custom RKE images.
nodes:
- address: 192.168.108.101
port: "22"
internal_address: ""
role:
- controlplane
- worker
- etcd
hostname_override: manage-01
user: rke
docker_socket: /var/run/docker.sock
ssh_key: ""
ssh_key_path: ~/.ssh/id_rsa
ssh_cert: ""
ssh_cert_path: ""
labels: {}
taints: []
- address: 192.168.108.103
port: "22"
internal_address: ""
role:
- controlplane
- worker
- etcd
hostname_override: agent-03
user: rke
docker_socket: /var/run/docker.sock
ssh_key: ""
ssh_key_path: ~/.ssh/id_rsa
ssh_cert: ""
ssh_cert_path: ""
labels: {}
taints: []
- address: 192.168.108.105
port: "22"
internal_address: ""
role:
- controlplane
- worker
- etcd
hostname_override: agent-05
user: rke
docker_socket: /var/run/docker.sock
ssh_key: ""
ssh_key_path: ~/.ssh/id_rsa
ssh_cert: ""
ssh_cert_path: ""
labels: {}
taints: []
system_images:
etcd: rancher/coreos-etcd:v3.4.14-rancher1
alpine: rancher/rke-tools:v0.1.72
nginx_proxy: rancher/rke-tools:v0.1.72
cert_downloader: rancher/rke-tools:v0.1.72
kubernetes_services_sidecar: rancher/rke-tools:v0.1.72
kubernetes: rancher/hyperkube:v1.19.8-rancher1
calico_node: rancher/calico-node:v3.17.2
calico_cni: rancher/calico-cni:v3.17.2
calico_controllers: rancher/calico-kube-controllers:v3.17.2
calico_flexvol: rancher/calico-pod2daemon-flexvol:v3.17.2
canal_node: rancher/calico-node:v3.17.2
canal_cni: rancher/calico-cni:v3.17.2
canal_controllers: rancher/calico-kube-controllers:v3.17.2
canal_flannel: rancher/coreos-flannel:v0.13.0-rancher1
canal_flexvol: rancher/calico-pod2daemon-flexvol:v3.17.2
coredns: rancher/coredns-coredns:1.8.0
coredns_autoscaler: rancher/cluster-proportional-autoscaler:1.8.1
pod_infra_container: rancher/pause:3.2
nodelocal: rancher/k8s-dns-node-cache:1.15.13
ingress: rancher/nginx-ingress-controller:nginx-0.35.0-rancher2
ingress_backend: rancher/nginx-ingress-controller-defaultbackend:1.5-rancher1
services:
etcd:
image: ""
backup_config:
enabled: true
interval_hours: 12
retention: 6
extra_args:
quota-backend-bytes: 5368709120
auto-compaction-retention: 240
extra_binds: []
extra_env: []
win_extra_args: {}
win_extra_binds: []
win_extra_env: []
external_urls: []
ca_cert: ""
cert: ""
key: ""
path: ""
uid: 0
gid: 0
snapshot: null
retention: ""
creation: ""
kube-api:
image: ""
extra_args:
watch-cache: true
default-watch-cache-size: 1500
event-ttl: 1h0m0s
max-requests-inflight: 800
max-mutating-requests-inflight: 400
kubelet-timeout: 10s
extra_binds: []
extra_env: []
win_extra_args: {}
win_extra_binds: []
win_extra_env: []
service_cluster_ip_range: 10.43.0.0/16
service_node_port_range: "10000-65535"
pod_security_policy: false
always_pull_images: false
secrets_encryption_config: null
audit_log: null
admission_configuration: null
event_rate_limit: null
kube-controller:
image: ""
extra_args:
node-monitor-period: "5s"
node-cidr-mask-size: "24"
node-monitor-grace-period: "20s"
node-startup-grace-period: "30s"
pod-eviction-timeout: "1m"
concurrent-deployment-syncs: 5
concurrent-endpoint-syncs: 5
concurrent-gc-syncs: 20
concurrent-namespace-syncs: 10
concurrent-replicaset-syncs: 5
concurrent-service-syncs: 1
concurrent-serviceaccount-token-syncs: 5
deployment-controller-sync-period: 30s
pvclaimbinder-sync-period: 15s
extra_binds: []
extra_env: []
win_extra_args: {}
win_extra_binds: []
win_extra_env: []
cluster_cidr: 10.42.0.0/16
service_cluster_ip_range: 10.43.0.0/16
scheduler:
image: ""
extra_args: {}
extra_binds: []
extra_env: []
win_extra_args: {}
win_extra_binds: []
win_extra_env: []
kubelet:
image: ""
extra_args:
network-plugin-mtu: "1460"
max-pods: "250"
sync-frequency: "3s"
max-open-files: "2000000"
kube-api-burst: "30"
kube-api-qps: "15"
serialize-image-pulls: "false"
registry-burst: "10"
registry-qps: "0"
cgroups-per-qos: "true"
cgroup-driver: "cgroupfs"
enforce-node-allocatable: "pods"
system-reserved: "cpu=0.25,memory=200Mi"
kube-reserved: "cpu=0.25,memory=1500Mi"
eviction-hard: "memory.available<300Mi,nodefs.available<10%,imagefs.available<15%,nodefs.inodesFree<5%"
eviction-soft: "memory.available<500Mi,nodefs.available<80%,imagefs.available<80%,nodefs.inodesFree<10%"
eviction-soft-grace-period: "memory.available=1m30s,nodefs.available=1m30s,imagefs.available=1m30s,nodefs.inodesFree=1m30s"
eviction-max-pod-grace-period: "30"
eviction-pressure-transition-period: "30s"
node-status-update-frequency: 10s
global-housekeeping-interval: 1m0s
housekeeping-interval: 10s
runtime-request-timeout: 2m0s
volume-stats-agg-period: 1m0s
extra_binds:
- "/usr/libexec/kubernetes/kubelet-plugins:/usr/libexec/kubernetes/kubelet-plugins"
cluster_domain: cluster.local
cluster_dns_server: 10.43.0.10
fail_swap_on: false
generate_serving_certificate: false
kubeproxy:
image: ""
extra_args:
proxy-mode: ipvs
kube-api-burst: 20
kube-api-qps: 10
extra_binds: []
extra_env: []
win_extra_args: {}
win_extra_binds: []
win_extra_env: []
network:
plugin: canal
options: {}
mtu: 0
node_selector: {}
update_strategy: null
tolerations: []
authentication:
strategy: x509
sans: []
webhook: null
ssh_key_path: ~/.ssh/id_rsa
authorization:
mode: rbac
options: {}
kubernetes_version: "v1.19.8-rancher1-1"
cluster_name: "yfk-k8s"
cloud_provider:
name: ""
prefix_path: "/opt/rancher/rke"
win_prefix_path: ""
addon_job_timeout: 60
restore:
restore: false
snapshot_name: ""
dns:
provider: coredns
# nodelocal:
# ip_address: ""
update_strategy:
strategy: RollingUpdate
rollingUpdate:
maxUnavailable: 20%
maxSurge: 15%
linear_autoscaler_params:
cores_per_replica: 0.34
nodes_per_replica: 4
prevent_single_point_failure: true
min: 2
max: 3
monitoring:
provider: metrics-server
update_strategy: # Available in v2.4
strategy: RollingUpdate
rollingUpdate:
maxUnavailable: 8
#private_registries:
# - url: registry.yfklife.cn
# user: admin
# password: Yfklife123456
# is_default: true
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
# 启动集群
服务器配置不能过低,不低于4G
#启动
cd /home/rke/rke-workspace
rke up
#删除机器,rke remove
#切换到root用户
exit
mkdir /root/.kube
ln -s /home/rke/rke-workspace/kube_config_cluster.yml /root/.kube/config
#检查
kubectl get nodes
kubectl get pod -A
2
3
4
5
6
7
8
9
10
11
12
13
14
# 配置dns服务地址
集群内部默认使用宿主机的resolv.conf,要有自己的dns服务,极其建议修改,不然内部要使用外部的dns地址将无法解析
kubectl edit cm -n kube-system coredns
修改这里:forward . "/etc/resolv.conf" ,把“/etc/resolv.conf” 修改成自己的dns服务IP
把replicas设置为0,等待自动删除所有的coredns POD之后,等待创建新的coredns POD,使用生效上面的配置
kubectl edit -n kube-system deployments.apps coredns
借鉴图
# 使用traefik做路由管理
- 删除默认的ingress-controller
kubectl get -n ingress-nginx daemonsets.apps nginx-ingress-controller -oyaml > nginx-ingress-controller.yaml
kubectl delete -n ingress-nginx daemonsets.apps nginx-ingress-controller
2
下载制作好的 v2.4.6的包,修改DaemonSet-traefik-hostport.yaml 配置
#下载做好的traefik包,镜像版本:traefik:v2.4.6
kubectl create ns traefik
kubectl apply -f rabc-traefik.yaml
kubectl apply -f DaemonSet-traefik-hostport.yaml
kubectl apply -f dashboard.yaml #修改域名
2
3
4
5
- 使用nginx代理traefik
如果修改“--entrypoints.web.Addresss=:80”,记得把下面的80也修改一致
upstream backend_traefik {
server 192.168.108.101:80 max_fails=3 fail_timeout=10s;
server 192.168.108.103:80 max_fails=3 fail_timeout=10s;
}
server {
server_name *.yfklife.cn;
listen 80;
listen 443 ssl;
ssl_certificate ssl/yfklife.cn.cer; #使用泛域名证书
ssl_certificate_key ssl/yfklife.cn.key;
ssl_session_timeout 10m;
ssl_protocols TLSv1.2;
ssl_prefer_server_ciphers on;
ssl_session_cache shared:SSL:10m;
ssl_ciphers '!aNULL:!MD5:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-WITH-AES128-GCM-SHA256';
location / {
proxy_pass http://backend_traefik;
proxy_set_header Host $http_host;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header X-Forwarded-Server $host;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Graylog-Server-URL http://$server_name/;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_read_timeout 600;
proxy_send_timeout 600;
}
}
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
- 配置域名解析,访问
https://traefik.yfklife.cn
# Traefik 代理ws,wss协议
apipost测试工具下载 (opens new window)
- Dockerfile
FROM idocker.io/base/openjdk:8u252
#下载JDK1.8镜像
WORKDIR /java-work
COPY websocketdemo-0.0.1-SNAPSHOT.jar /java-work
EXPOSE 8086
CMD ["java","-jar","/java-work/websocketdemo-0.0.1-SNAPSHOT.jar"]
2
3
4
5
6
7
wss协议通过nginx配置的ssl证书,实现wss连接
apiVersion: v1
kind: Pod
metadata:
generateName: websocket-demo
labels:
app: websocket-demo
name: websocket-demo
namespace: web-socket
spec:
containers:
- image: dev.idocker.io/icpm/websocketdemo:0.0.1 #同事协调开发制作的jar包
imagePullPolicy: Always
name: websocket-demo
ports:
- containerPort: 8086
name: websocket-demo
protocol: TCP
imagePullSecrets:
- name: regcred-dev
restartPolicy: Always
---
apiVersion: v1
kind: Service
metadata:
annotations:
labels:
app: websocket-demo
name: websocket-demo
namespace: web-socket
spec:
ports:
- name: http
port: 8086
protocol: TCP
targetPort: 8086
selector:
app: websocket-demo
type: ClusterIP
---
apiVersion: traefik.containo.us/v1alpha1
kind: IngressRoute
metadata:
name: websocket-demo
namespace: web-socket
spec:
entryPoints:
- web
routes:
- kind: Rule
match: Host(`websocket.yfklife.cn`) && PathPrefix(`/socket`)
middlewares:
- name: compress
- name: stripprefix
services:
- name: websocket-demo
passHostHeader: true #常规部署服务,就这里新增的参数
port: 8086
---
apiVersion: traefik.containo.us/v1alpha1
kind: Middleware
metadata:
name: stripprefix
namespace: web-socket
spec:
stripPrefix:
forceSlash: false
prefixes:
- /socket
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
下载包技术支持 (opens new window) websocketdemo-0.0.1-SNAPSHOT.jar (opens new window)
# rke扩展小笔记
- etcd数据手动备份
rke etcd snapshot-save --config cluster.yml --name snapshot-name
- 恢复etcd数据集群
rke etcd snapshot-restore --config cluster.yml --name snapshot-name
- 添加或删除 worker 节点
您可以通过修改cluster.yml文件的内容,添加额外的节点,并指定它们在 Kubernetes 集群中的角色;或从cluster.yml中的节点列表中删除节点信息,以达到删除节点的目的
rke up --update-only