Kubernetes ContainerGC分析
为了回收系统上的资源kubelet有ImageGC和ContainerGC等功能对image和container进行回收,下面就根据kubelet代码对ContainerGC部分进行分析。
相关的参数主要有:
- minimum-container-ttl-duration
- maximum-dead-containers-per-container
- minimum-container-ttl-duration
相对应的代码是:
//pkg/kubelet/container_gc.go
type ContainerGCPolicy struct {
// 已经死掉的容器在机器上存留的时间
MinAge time.Duration
// 每个pod可以保留的死掉的容器
MaxPerPodContainer int
// 机器上最大的可以保留的死亡容器数量
MaxContainers int
}
...
func NewContainerGC(runtime Runtime, policy ContainerGCPolicy, sourcesReadyProvider SourcesReadyProvider) (ContainerGC, error) {
if policy.MinAge < 0 {
return nil, fmt.Errorf("invalid minimum garbage collection age: %v", policy.MinAge)
}
return &realContainerGC{
runtime: runtime,
policy: policy,
sourcesReadyProvider: sourcesReadyProvider,
}, nil
}
func (cgc *realContainerGC) GarbageCollect() error {
return cgc.runtime.GarbageCollect(cgc.policy, cgc.sourcesReadyProvider.AllReady(), false)
}
func (cgc *realContainerGC) DeleteAllUnusedContainers() error {
return cgc.runtime.GarbageCollect(cgc.policy, cgc.sourcesReadyProvider.AllReady(), true)
}
GarbageCollect方法里面的调用就是ContainerGC真正的逻辑所在, GarbageCollect函数是在pkg/kubelet/kubelet.go
里面调用的,每隔一分钟会执行一次。GarbageCollect里面所调用的runtime的GarbageCollect函数是在pkg/kubelet/kuberuntime/kuberuntime_gc.go
里面。
//pkg/kubelet/kuberuntime/kuberuntime_gc.go
func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error {
// Remove evictable containers
if err := cgc.evictContainers(gcPolicy, allSourcesReady, evictNonDeletedPods); err != nil {
return err
}
// Remove sandboxes with zero containers
if err := cgc.evictSandboxes(evictNonDeletedPods); err != nil {
return err
}
// Remove pod sandbox log directory
return cgc.evictPodLogsDirectories(allSourcesReady)
}
第一步是驱逐容器
func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error {
// Separate containers by evict units.
evictUnits, err := cgc.evictableContainers(gcPolicy.MinAge)
if err != nil {
return err
}
// Remove deleted pod containers if all sources are ready.
if allSourcesReady {
for key, unit := range evictUnits {
if cgc.isPodDeleted(key.uid) || evictNonDeletedPods {
cgc.removeOldestN(unit, len(unit)) // Remove all.
delete(evictUnits, key)
}
}
}
// Enforce max containers per evict unit.
if gcPolicy.MaxPerPodContainer >= 0 {
cgc.enforceMaxContainersPerEvictUnit(evictUnits, gcPolicy.MaxPerPodContainer)
}
// Enforce max total number of containers.
if gcPolicy.MaxContainers >= 0 && evictUnits.NumContainers() > gcPolicy.MaxContainers {
// Leave an equal number of containers per evict unit (min: 1).
numContainersPerEvictUnit := gcPolicy.MaxContainers / evictUnits.NumEvictUnits()
if numContainersPerEvictUnit < 1 {
numContainersPerEvictUnit = 1
}
cgc.enforceMaxContainersPerEvictUnit(evictUnits, numContainersPerEvictUnit)
// If we still need to evict, evict oldest first.
numContainers := evictUnits.NumContainers()
if numContainers > gcPolicy.MaxContainers {
flattened := make([]containerGCInfo, 0, numContainers)
for key := range evictUnits {
flattened = append(flattened, evictUnits[key]...)
}
sort.Sort(byCreated(flattened))
cgc.removeOldestN(flattened, numContainers-gcPolicy.MaxContainers)
}
}
return nil
}
1.首先获取已经死掉的并且创建时间大于minage的容器
2.如果pod已经delete那么把属于这个pod的容器全部删除
3.如果设置了MaxPerPodContainer那么把MaxPerPodContainer之外数量的容器删除,这个值默认是1
4.如果设置MaxContainers那么再次对容器进行清理,这个值默认是-1也就是不清理的。首先会拿所有容器的数量除以pod的数量,这样会得到一个平均值,然后按照这个值对3进行再次处理。这个时候如果机器上的死亡容器的数量还大于MaxContainer那么直接按照时间对容器进行排序然后删除大于MaxContainer数量之外的容器。
下一步是清理机器上的sandbox
func (cgc *containerGC) evictSandboxes(evictNonDeletedPods bool) error {
containers, err := cgc.manager.getKubeletContainers(true)
if err != nil {
return err
}
sandboxes, err := cgc.manager.getKubeletSandboxes(true)
if err != nil {
return err
}
sandboxesByPod := make(sandboxesByPodUID)
for _, sandbox := range sandboxes {
podUID := types.UID(sandbox.Metadata.Uid)
sandboxInfo := sandboxGCInfo{
id: sandbox.Id,
createTime: time.Unix(0, sandbox.CreatedAt),
}
// Set ready sandboxes to be active.
if sandbox.State == runtimeapi.PodSandboxState_SANDBOX_READY {
sandboxInfo.active = true
}
// Set sandboxes that still have containers to be active.
hasContainers := false
sandboxID := sandbox.Id
for _, container := range containers {
if container.PodSandboxId == sandboxID {
hasContainers = true
break
}
}
if hasContainers {
sandboxInfo.active = true
}
sandboxesByPod[podUID] = append(sandboxesByPod[podUID], sandboxInfo)
}
// Sort the sandboxes by age.
for uid := range sandboxesByPod {
sort.Sort(sandboxByCreated(sandboxesByPod[uid]))
}
for podUID, sandboxes := range sandboxesByPod {
if cgc.isPodDeleted(podUID) || evictNonDeletedPods {
// Remove all evictable sandboxes if the pod has been removed.
// Note that the latest dead sandbox is also removed if there is
// already an active one.
cgc.removeOldestNSandboxes(sandboxes, len(sandboxes))
} else {
// Keep latest one if the pod still exists.
cgc.removeOldestNSandboxes(sandboxes, len(sandboxes)-1)
}
}
return nil
}
先获取机器上所有的容器和sandbox,如果pod的状态是0则致为active状态,如果此sandbox还有运行的container也认为是active状态,接着对sandbox进行排序,如果sandbox所属的pod的已经被删除那么删除所有的sandbox,如果pod还存在那么就留下最新的一个sandbox其他的都删除.
最后一步是清除container和pod日志
// evictPodLogsDirectories evicts all evictable pod logs directories. Pod logs directories
// are evictable if there are no corresponding pods.
func (cgc *containerGC) evictPodLogsDirectories(allSourcesReady bool) error {
osInterface := cgc.manager.osInterface
if allSourcesReady {
// Only remove pod logs directories when all sources are ready.
dirs, err := osInterface.ReadDir(podLogsRootDirectory)
if err != nil {
return fmt.Errorf("failed to read podLogsRootDirectory %q: %v", podLogsRootDirectory, err)
}
for _, dir := range dirs {
name := dir.Name()
podUID := types.UID(name)
if !cgc.isPodDeleted(podUID) {
continue
}
err := osInterface.RemoveAll(filepath.Join(podLogsRootDirectory, name))
if err != nil {
glog.Errorf("Failed to remove pod logs directory %q: %v", name, err)
}
}
}
// Remove dead container log symlinks.
// TODO(random-liu): Remove this after cluster logging supports CRI container log path.
logSymlinks, _ := osInterface.Glob(filepath.Join(legacyContainerLogsDir, fmt.Sprintf("*.%s", legacyLogSuffix)))
for _, logSymlink := range logSymlinks {
if _, err := osInterface.Stat(logSymlink); os.IsNotExist(err) {
err := osInterface.Remove(logSymlink)
if err != nil {
glog.Errorf("Failed to remove container log dead symlink %q: %v", logSymlink, err)
}
}
}
return nil
}
首先会读取/var/log/pods
目录下面的子目录,下面的目录名称都是pod的uid,如果pod已经删除那么直接把pod所属的目录删除,然后删除/var/log/containers
目录下的软连接。
至此单次ContainerGC的流程结束。