健康检查

2021-04-07  本文已影响0人  慕知

1 ⦁ 存活性探测(LivenessProbe)

 ---检查pod中容器是否能够正常启动
pod中所有容器的status=Running时,Pod的状态才会是Running状态;
当存活性检查检测失败的时候,kebulet会删除容器,重新启动一个新的容器,继续检查。

如果一个容器不包含LivenessProbe探针,则Kubelet认为容器的LivenessProbe探针的返回值永远成功。




存活性探测支持的方法有三种:ExecAction,TCPSocketAction,HTTPGetAction。

1) ExecAction

[root@\ k8s-m-01~]# kubectl explain deployment.spec.template.spec.containers
... ...
livenessProbe   <Object>
     Periodic probe of container liveness. Container will be restarted if the
     probe fails. Cannot be updated. More info:
     https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes

... ...

可进入官网查看详细介绍
https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes




[root@\ k8s-m-01~]# vim exec.yaml
kind: Service
apiVersion: v1
metadata:
  name: test-exec
spec:
  ports:
    - name: http
      port: 80
      targetPort: 80
  selector:
    app: test
---
kind: Deployment
apiVersion: apps/v1
metadata:
  name: test-exec
spec:
  selector:
    matchLabels:
      app: test
  template:
    metadata:
      labels:
        app: test
    spec:
      containers:
        - name: django
          image: alvinos/django:v1



[root@\ k8s-m-01~]# kubectl apply -f exec.yaml 
service/test-exec created
deployment.apps/test-exec created

# 查看pod
[root@\ k8s-m-01~]# kubectl get pods
NAME                         READY   STATUS    RESTARTS   AGE
test-exec-586884cc4d-h4xgt   1/1     Running   0          12s


#查看service
[root@\ k8s-m-01~]# kubectl get svc
NAME               TYPE           CLUSTER-IP      EXTERNAL-IP         
test-exec          ClusterIP      10.96.211.253   <none>        
   
# 弹性扩容
[root@\ k8s-m-01~]# kubectl edit deployments test-exec 
deployment.apps/test-exec edited






# 检测(会发现在扩容的过程中,会提示失败)
[root@\ k8s_master~]# while true;do curl 10.96.211.253/index;echo;sleep 1;done
主机名:test-exec-586884cc4d-h4xgt,版本:v1
主机名:test-exec-586884cc4d-h4xgt,版本:v1
curl: (7) Failed connect to 10.96.211.253:80; Connection refused

curl: (7) Failed connect to 10.96.211.253:80; Connection refused

主机名:test-exec-586884cc4d-vgch2,版本:v1



测试:

#配置清单增加以下
... ...
    spec:
      containers:
        - name: django
          image: alvinos/django:v1
          livenessProbe:
            exec:
              command:
                - cat
                - /xxx


# 重新部署
[root@\ k8s-m-01~]# kubectl delete -f exec.yaml 
service "test-exec" deleted

[root@\ k8s-m-01~]# kubectl apply -f exec.yaml 
service/test-exec unchanged
deployment.apps/test-exec created

[root@\ k8s-m-01~]# kubectl get pods
NAME                         READY   STATUS             RESTARTS   AGE
test-exec-dd64bbd47-s9zvb    1/1     Running            0          38s



# 查看pod生成详细清单
[root@\ k8s-m-01~]# kubectl describe pod test-exec-dd64bbd47-s9zvb 
Name:         test-exec-dd64bbd47-s9zvb
Namespace:    default
Priority:     0
Node:         k8s-n-01/192.168.15.32
Start Time:   Wed, 07 Apr 2021 16:13:03 +0800
Labels:       app=test
              pod-template-hash=dd64bbd47
Annotations:  <none>
Status:       Running
IP:           10.244.1.46
IPs:
  IP:           10.244.1.46
Controlled By:  ReplicaSet/test-exec-dd64bbd47
Containers:
  django:
    Container ID:   docker://fb1b5caee0218924268ad63a72b4f4581d618a444995adf6f6ad78ebefb59849
    Image:          alvinos/django:v1
    Image ID:       docker-pullable://alvinos/django@sha256:fc5ffecb7d5038940006d5d7e9ea6414fc4ec0f2509501aebdc3280f1a559723
    Port:           <none>
    Host Port:      <none>
    State:          Running
      Started:      Wed, 07 Apr 2021 16:13:55 +0800
    Last State:     Terminated
      Reason:       Error
      Exit Code:    137
      Started:      Wed, 07 Apr 2021 16:13:04 +0800
      Finished:     Wed, 07 Apr 2021 16:13:55 +0800
    Ready:          True
    Restart Count:  1
    Liveness:       exec [cat /xxx] delay=0s timeout=1s period=10s #success=1 #failure=3
    Environment:    <none>
    Mounts:
      /var/run/secrets/kubernetes.io/serviceaccount from default-token-8zqvl (ro)
Conditions:
  Type              Status
  Initialized       True 
  Ready             True 
  ContainersReady   True 
  PodScheduled      True 
Volumes:
  default-token-8zqvl:
    Type:        Secret (a volume populated by a Secret)
    SecretName:  default-token-8zqvl
    Optional:    false
QoS Class:       BestEffort
Node-Selectors:  <none>
Tolerations:     node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
                 node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
Events:
  Type     Reason     Age                From               Message
  ----     ------     ----               ----               -------
  Normal   Scheduled  87s                default-scheduler  Successfully assigned default/test-exec-dd64bbd47-s9zvb to k8s-n-01
  Normal   Pulled     35s (x2 over 86s)  kubelet            Container image "alvinos/django:v1" already present on machine
  Normal   Created    35s (x2 over 86s)  kubelet            Created container django
  Normal   Started    35s (x2 over 86s)  kubelet            Started container django
  Warning  Unhealthy  5s (x6 over 85s)   kubelet            Liveness probe failed: cat: /xxx: No such file or directory
  Normal   Killing    5s (x2 over 65s)   kubelet            Container django failed liveness probe, will be restarted


可以看到最后两行,找不到/xxx文件,并且kill掉容器再生成

#再次执行以上命令,会发现容器id不一样
[root@\ k8s-m-01~]# kubectl describe pod test-exec-dd64bbd47-s9zvb 
... ...
Container ID:   docker://dcd7334d92b8b00a42510a4e33d046ed9dfbb41dfd25e6e89a5a28c634b759e6
... ...




测试二;
# 进入容器里,当前文件夹有manage.py这个文件
[root@test-exec-586884cc4d-cz78h test]# pwd
/root/test

[root@test-exec-586884cc4d-cz78h test]# ll
total 4
-rw-r--r--. 1 root root   0 Jan 19  2020 db.sqlite3
drwxr-xr-x. 1 root root  25 Jan 19  2020 docker
drwxr-xr-x. 1 root root  25 Jan 19  2020 file
-rw-r--r--. 1 root root 538 Jan 19  2020 manage.py


# 修改配置清单
... ...
    spec:
      containers:
        - name: django
          image: alvinos/django:v1
          livenessProbe:
            exec:
              command:
                - cat
                - /root/test/manage.py                                 


[root@\ k8s-m-01~]# kubectl delete -f exec.yaml
[root@\ k8s-m-01~]# kubectl apply -f exec.yaml 

# 查看正常启动
[root@\ k8s-m-01~]# kubectl describe pod test-exec-6444b54fbf-mf7jr
... ...
Events:
  Type    Reason     Age   From               Message
  ----    ------     ----  ----               -------
  Normal  Scheduled  102s  default-scheduler  Successfully assigned default/test-exec-6444b54fbf-mf7jr to k8s-n-01
  Normal  Pulled     102s  kubelet            Container image "alvinos/django:v1" already present on machine
  Normal  Created    102s  kubelet            Created container django
  Normal  Started    102s  kubelet            Started container django



# 改掉文件名
[root@\ k8s-m-01~]# kubectl exec -it test-exec-6444b54fbf-mf7jr -- bash
[root@test-exec-6444b54fbf-mf7jr test]# ll
total 4
-rw-r--r--. 1 root root   0 Jan 19  2020 db.sqlite3
drwxr-xr-x. 1 root root  25 Jan 19  2020 docker
drwxr-xr-x. 1 root root  25 Jan 19  2020 file
-rw-r--r--. 1 root root 538 Jan 19  2020 manage.py
[root@test-exec-6444b54fbf-mf7jr test]# mv manage.py manage.py.bak
[root@test-exec-6444b54fbf-mf7jr test]# exit
exit




#再次生成,可以看到文件名又恢复过来,是因为删除了原来的容器,重新生成了一个
[root@\ k8s-m-01~]# kubectl  get pods
NAME                         READY   STATUS             RESTARTS   AGE
test-exec-6444b54fbf-fht99   1/1     Running            0          71s

[root@\ k8s-m-01~]# kubectl exec -it test-exec-6444b54fbf-fht99 -- bash
[root@test-exec-6444b54fbf-fht99 test]# ls
db.sqlite3  docker  file  manage.py








# 监控查看pod变化
[root@\ k8s_master~]# kubectl get pod -w
test-exec-6444b54fbf-mf7jr   1/1     Terminating        0          3m44s
test-exec-6444b54fbf-fht99   0/1     Pending            0          0s
test-exec-6444b54fbf-fht99   0/1     Pending            0          0s
test-exec-6444b54fbf-fht99   0/1     ContainerCreating   0          0s
test-exec-6444b54fbf-fht99   1/1     Running             0          2s



2) HTTPGetAction

# 查看用法 (required是必填项)
[root@\ k8s-m-01~]# kubectl explain deployment.spec.template.spec.containers.livenessProbe.httpGet
KIND:     Deployment
VERSION:  apps/v1

RESOURCE: httpGet <Object>

DESCRIPTION:
     HTTPGet specifies the http request to perform.

     HTTPGetAction describes an action based on HTTP Get requests.

FIELDS:
   host <string>
     Host name to connect to, defaults to the pod IP. You probably want to set
     "Host" in httpHeaders instead.

   httpHeaders  <[]Object>
     Custom headers to set in the request. HTTP allows repeated headers.

   path <string>
     Path to access on the HTTP server.

   port <string> -required-
     Name or number of the port to access on the container. Number must be in
     the range 1 to 65535. Name must be an IANA_SVC_NAME.

   scheme   <string>
     Scheme to use for connecting to the host. Defaults to HTTP.



# 修改配置清单
... ...
      containers:
        - name: django
          image: alvinos/django:v1
          livenessProbe:
            httpGet:
              port: 80
              path: /index



[root@\ k8s-m-01~]# kubectl get pod
NAME                         READY   STATUS    RESTARTS   AGE
test-exec-6ffb559d96-qb8nr   1/1     Running   0          4m13s



[root@\ k8s-m-01~]# kubectl edit svc test-exec
  type: NodePort


[root@\ k8s-m-01~]# kubectl get svc
NAME               TYPE           CLUSTER-IP       EXTERNAL-IP   PORT(S)        AGE
test-exec          NodePort       10.109.227.238   <none>        80:31128/TCP   7m3s


[root@\ k8s-m-01~]# kubectl describe pod test-exec-6ffb559d96-qb8nr 
Name:         test-exec-6ffb559d96-qb8nr
Namespace:    default
Priority:     0
Node:         k8s-n-01/192.168.15.32
Start Time:   Wed, 07 Apr 2021 16:49:17 +0800
Labels:       app=test
              pod-template-hash=6ffb559d96
Annotations:  <none>
Status:       Running
IP:           10.244.1.50
IPs:
  IP:           10.244.1.50
Controlled By:  ReplicaSet/test-exec-6ffb559d96
Containers:
  django:
    Container ID:   docker://128b6e47027956fe6cdbaedee020142e88de6575a77f9631a08389b32c6ab17e
    Image:          alvinos/django:v1
    Image ID:       docker-pullable://alvinos/django@sha256:fc5ffecb7d5038940006d5d7e9ea6414fc4ec0f2509501aebdc3280f1a559723
    Port:           <none>
    Host Port:      <none>
    State:          Running
      Started:      Wed, 07 Apr 2021 16:49:18 +0800
    Ready:          True
    Restart Count:  0
    Liveness:       http-get http://:80/index delay=0s timeout=1s period=10s #success=1 #failure=3
    Environment:    <none>
    Mounts:
      /var/run/secrets/kubernetes.io/serviceaccount from default-token-8zqvl (ro)
Conditions:
  Type              Status
  Initialized       True 
  Ready             True 
  ContainersReady   True 
  PodScheduled      True 
Volumes:
  default-token-8zqvl:
    Type:        Secret (a volume populated by a Secret)
    SecretName:  default-token-8zqvl
    Optional:    false
QoS Class:       BestEffort
Node-Selectors:  <none>
Tolerations:     node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
                 node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
Events:
  Type    Reason     Age    From               Message
  ----    ------     ----   ----               -------
  Normal  Scheduled  4m34s  default-scheduler  Successfully assigned default/test-exec-6ffb559d96-qb8nr to k8s-n-01
  Normal  Pulled     4m34s  kubelet            Container image "alvinos/django:v1" already present on machine
  Normal  Created    4m34s  kubelet            Created container django
  Normal  Started    4m33s  kubelet            Started container django

# 最后几行可以看到是成功访问到,如下图

image.png

3) TCPSocketAction

相当于ping

 # 修改配置
    spec:
      containers:
        - name: django
          image: alvinos/django:v1
          livenessProbe:
            tcpSocket:
              port: 80



[root@\ k8s-m-01~]# kubectl apply -f exec.yaml 
service/test-exec created
deployment.apps/test-exec created

[root@\ k8s-m-01~]# kubectl get pods
NAME                         READY   STATUS        RESTARTS   AGE
test-exec-bcb499746-mj4cq    1/1     Running       0          6s


[root@\ k8s-m-01~]# kubectl describe pod test-exec-bcb499746-mj4cq 
Name:         test-exec-bcb499746-mj4cq
Namespace:    default
... ...
Events:
  Type    Reason     Age   From               Message
  ----    ------     ----  ----               -------
  Normal  Scheduled  39s   default-scheduler  Successfully assigned default/test-exec-bcb499746-mj4cq to k8s-n-01
  Normal  Pulled     38s   kubelet            Container image "alvinos/django:v1" already present on machine
  Normal  Created    38s   kubelet            Created container django
  Normal  Started    38s   kubelet            Started container django


2 ⦁ 就绪性探测

就绪性探测的特点是探测失败,立即移出负载均衡(endprints ---> NotReadyAddresses)

1_2_3)

[root@\ k8s-m-01~]# kubectl describe endpoints test-exec 
Name:         test-exec
Namespace:    default
Labels:       <none>
Annotations:  endpoints.kubernetes.io/last-change-trigger-time: 2021-04-07T09:29:45Z
Subsets:
  Addresses:          10.244.1.52
  NotReadyAddresses:  <none>
  Ports:
    Name  Port  Protocol
    ----  ----  --------
    http  80    TCP

Events:  <none>





[root@\ k8s-m-01~]# vim readness.yaml
kind: Service
apiVersion: v1
metadata:
  name: readness
spec:
  ports:
    - name: http
      port: 80
      targetPort: 80
  selector:
    app: readness
---
kind: Deployment
apiVersion: apps/v1
metadata:
  name: readness
spec:
  selector:
    matchLabels:
      app: readness
  template:
    metadata:
      labels:
        app: readness
    spec:
      containers:
        - name: django
          image: alvinos/django:v1
          readinessProbe:
          #  tcpSocket:
          #    port: 80
          #  failureThreshold: 1
          #  httpGet:
          #    port: 80
          #    path: /index
            exec:
              command:
                - cat
                - /root/test/manage.py



[root@\ k8s-m-01~]# kubectl exec -it readness-5dd5489c66-c9qq9  -- bash
[root@readness-5dd5489c66-c9qq9 test]# ll
total 4
-rw-r--r--. 1 root root   0 Jan 19  2020 db.sqlite3
drwxr-xr-x. 1 root root  25 Jan 19  2020 docker
drwxr-xr-x. 1 root root  25 Jan 19  2020 file
-rw-r--r--. 1 root root 538 Jan 19  2020 manage.py

[root@readness-5dd5489c66-c9qq9 test]# mv manage.py manage.py.bak
[root@readness-5dd5489c66-c9qq9 test]# exit



[root@\ k8s-m-01~]# kubectl describe endpoints readness 
Name:         readness
Namespace:    default
Labels:       <none>
Annotations:  endpoints.kubernetes.io/last-change-trigger-time: 2021-04-07T09:41:17Z
Subsets:
  Addresses:          <none>
  NotReadyAddresses:  10.244.1.53
  Ports:
    Name  Port  Protocol
    ----  ----  --------
    http  80    TCP


# 查看  NotReadyAddresses:  10.244.1.53
这时把文件改回来,上面的地址会恢复到集群中



其他俩个和存活性探测使用一样

3,监控检查参数


delay=10s   : 探测延时时间initialDelaySeconds
timeout=1s  :探测的超时时间
period=10s  :探测的频率
success=1   :成功多少次才算成功
failure=1   :失败多少次才算失败



failureThreshold:最少连续几次探测失败的次数,满足该次数则认为fail
initialDelaySeconds:容器启动之后开始进行存活性探测的秒数。不填立即进行
periodSeconds:执行探测的频率(秒)。默认为10秒。最小值为1。
successThreshold:探测失败后,最少连续探测成功多少次才被认定为成功,满足该次数则认为success。(但是如果是liveness则必须是 1。最小值是 1。)
timeoutSeconds:每次执行探测的超时时间,默认1秒,最小1秒。




    spec:
      containers:
        - name: django
          image: alvinos/django:v1
          livenessProbe:
            tcpSocket:
              port: 80
            failureThreshold: 1

使用,放在 livenessProbe:下面

上一篇 下一篇

猜你喜欢

热点阅读