从源码看pod 驱逐机制

2024-01-12  本文已影响0人  wwq2020

背景

pod驱逐有2种场景
1 controller-manager基于taint的驱逐(not not ready)
2 kubelet的驱逐

源码

controller-manager

taint

pkg/controller/nodelifecycle/node_lifecycle_controller.go中

创建NodeLifecycle控制器
func NewNodeLifecycleController(
    ctx context.Context,
    leaseInformer coordinformers.LeaseInformer,
    podInformer coreinformers.PodInformer,
    nodeInformer coreinformers.NodeInformer,
    daemonSetInformer appsv1informers.DaemonSetInformer,
    kubeClient clientset.Interface,
    nodeMonitorPeriod time.Duration,
    nodeStartupGracePeriod time.Duration,
    nodeMonitorGracePeriod time.Duration,
    evictionLimiterQPS float32,
    secondaryEvictionLimiterQPS float32,
    largeClusterThreshold int32,
    unhealthyZoneThreshold float32,
) (*Controller, error) {
...
}


运行控制器
func (nc *Controller) Run(ctx context.Context) {
...
驱逐pod
        go wait.UntilWithContext(ctx, nc.doPodProcessingWorker, time.Second)

...

添加taint
    go wait.UntilWithContext(ctx, nc.doNoExecuteTaintingPass, scheduler.NodeEvictionPeriod)
...
监控节点健康状况
    go wait.UntilWithContext(ctx, func(ctx context.Context) {
        if err := nc.monitorNodeHealth(ctx); err != nil {
            logger.Error(err, "Error monitoring node health")
        }
    }, nc.nodeMonitorPeriod)
...
}

func (nc *Controller) doPodProcessingWorker(ctx context.Context) {
...
        nc.processPod(ctx, podItem)

...
}



添加taint
func (nc *Controller) doNoExecuteTaintingPass(ctx context.Context) {
...
    var zoneNoExecuteTainterKeys []string
    func() {
        nc.evictorLock.Lock()
        defer nc.evictorLock.Unlock()

        zoneNoExecuteTainterKeys = make([]string, 0, len(nc.zoneNoExecuteTainter))
        for k := range nc.zoneNoExecuteTainter {
            zoneNoExecuteTainterKeys = append(zoneNoExecuteTainterKeys, k)
        }
    }()
...

    for _, k := range zoneNoExecuteTainterKeys {
...
添加根据ready condition污点
taintToAdd := v1.Taint{}
            oppositeTaint := v1.Taint{}
            switch condition.Status {
            case v1.ConditionFalse:
                taintToAdd = *NotReadyTaintTemplate
                oppositeTaint = *UnreachableTaintTemplate
            case v1.ConditionUnknown:
                taintToAdd = *UnreachableTaintTemplate
                oppositeTaint = *NotReadyTaintTemplate
            default:
...
                return true, 0
            }
            result := controllerutil.SwapNodeControllerTaint(ctx, nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{&oppositeTaint}, node)
            
...

}
...
}

监控节点健康状况
func (nc *Controller) monitorNodeHealth(ctx context.Context) error {
...
处理基于污点的驱逐
            nc.processTaintBaseEviction(ctx, node, &observedReadyCondition)
...
}
...
}

func (nc *Controller) processTaintBaseEviction(ctx context.Context, node *v1.Node, observedReadyCondition *v1.NodeCondition) {
...
修改unreadhable污点为notready
        if taintutils.TaintExists(node.Spec.Taints, UnreachableTaintTemplate) {
            taintToAdd := *NotReadyTaintTemplate
            if !controllerutil.SwapNodeControllerTaint(ctx, nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{UnreachableTaintTemplate}, node) {
...
            }
标记节点准备添加污点
        } else if nc.markNodeForTainting(node, v1.ConditionFalse) {
...
        }
...
}


标记节点准备添加污点

func (nc *Controller) markNodeForTainting(node *v1.Node, status v1.ConditionStatus) bool {
...
        if !taintutils.TaintExists(node.Spec.Taints, NotReadyTaintTemplate) {
            nc.zoneNoExecuteTainter[nodetopology.GetZoneKey(node)].Remove(node.Name)
        }
...
}

至此node异常时已添加上not ready taints

evict

pkg/controller/tainteviction/taint_eviction.go中

evict控制器
func New(ctx context.Context, c clientset.Interface, podInformer corev1informers.PodInformer, nodeInformer corev1informers.NodeInformer, controllerName string) (*Controller, error) {
...
}

运行控制器
func (tc *Controller) Run(ctx context.Context) {
...
启动工作goroutine
        go tc.worker(ctx, i, wg.Done, ctx.Done())
..
}


func (tc *Controller) worker(ctx context.Context, worker int, done func(), stopCh <-chan struct{}) {
...
处理节点更新
        case nodeUpdate := <-tc.nodeUpdateChannels[worker]:
            tc.handleNodeUpdate(ctx, nodeUpdate)
...
}


func (tc *Controller) handleNodeUpdate(ctx context.Context, nodeUpdate nodeUpdateItem) {
...
    pods, err := tc.getPodsAssignedToNode(node.Name)
...
处理节点上的pod

        tc.processPodOnNode(ctx, podNamespacedName, node.Name, pod.Spec.Tolerations, taints, now)

...
}

处理节点上的pod
func (tc *Controller) processPodOnNode(
    ctx context.Context,
    podNamespacedName types.NamespacedName,
    nodeName string,
    tolerations []v1.Toleration,
    taints []v1.Taint,
    now time.Time,
) {
...
添加到工作队列,定期执行
    tc.taintEvictionQueue.AddWork(ctx, NewWorkArgs(podNamespacedName.Name, podNamespacedName.Namespace), startTime, triggerTime)

}

至此已完成pod evict

kubelet

pkg/kubelet/eviction/eviction_manager.go中

func (m *managerImpl) Start(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, podCleanedUpFunc PodCleanedUpFunc, monitoringInterval time.Duration) {
...
定期检查驱逐
            evictedPods, err := m.synchronize(diskInfoProvider, podFunc)
...
}


检查是否满足条件进行驱逐
func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc) ([]*v1.Pod, error) {
...
没有满足条件
    if len(thresholds) == 0 {
        klog.V(3).InfoS("Eviction manager: no resources are starved")
        return nil, nil
    }
...
驱逐pod
        if m.evictPod(pod, gracePeriodOverride, message, annotations, condition) {
...
}

驱逐pod
func (m *managerImpl) evictPod(pod *v1.Pod, gracePeriodOverride int64, evictMsg string, annotations map[string]string, condition *v1.PodCondition) bool {
...
执行驱逐pod
    err := m.killPodFunc(pod, true, &gracePeriodOverride, func(status *v1.PodStatus) {
        status.Phase = v1.PodFailed
        status.Reason = Reason
        status.Message = evictMsg
        if condition != nil {
            podutil.UpdatePodCondition(status, condition)
        }
    })
...
}
上一篇 下一篇

猜你喜欢

热点阅读