pod log previous日志原理

2023-05-29  本文已影响0人  wwq2020

背景

当我们发现pod crash后往往需要去看pod的previous log,来定位为啥启动失败

原理

所有pod的日志其实放在/var/log/pods,如


image.png

/var/log/pods/{podname}目录下有某个pod中各个容器的目录,如


image.png

/var/log/pods/{podname}/{containerName}目录下会有{restartcount}.log,如


image.png

发现上面的log实际是一个链接,实际目录是docker对应容器的log文件

相关代码

生成log链接

pkg/kubelet/kuberuntime/kuberuntime_container.go中

func (m *kubeGenericRuntimeManager) startContainer(ctx context.Context, podSandboxID string, podSandboxConfig *runtimeapi.PodSandboxConfig, spec *startSpec, pod *v1.Pod, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, podIP string, podIPs []string) (string, error) {
...
    containerConfig, cleanupAction, err := m.generateContainerConfig(ctx, container, pod, restartCount, podIP, imageRef, podIPs, target)
    if cleanupAction != nil {
        defer cleanupAction()
    }
    if err != nil {
        s, _ := grpcstatus.FromError(err)
        m.recordContainerEvent(pod, container, "", v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", s.Message())
        return s.Message(), ErrCreateContainerConfig
    }

    err = m.internalLifecycle.PreCreateContainer(pod, container, containerConfig)
    if err != nil {
        s, _ := grpcstatus.FromError(err)
        m.recordContainerEvent(pod, container, "", v1.EventTypeWarning, events.FailedToCreateContainer, "Internal PreCreateContainer hook failed: %v", s.Message())
        return s.Message(), ErrPreCreateHook
    }

    containerID, err := m.runtimeService.CreateContainer(ctx, podSandboxID, containerConfig, podSandboxConfig)
    if err != nil {
        s, _ := grpcstatus.FromError(err)
        m.recordContainerEvent(pod, container, containerID, v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", s.Message())
        return s.Message(), ErrCreateContainer
    }
...

pkg/kubelet/kuberuntime/kuberuntime_container.go

func (m *kubeGenericRuntimeManager) generateContainerConfig(ctx context.Context, container *v1.Container, pod *v1.Pod, restartCount int, podIP, imageRef string, podIPs []string, nsTarget *kubecontainer.ContainerID) (*runtimeapi.ContainerConfig, func(), error) {
...
logDir := BuildContainerLogsDirectory(pod.Namespace, pod.Name, pod.UID, container.Name)
    err = m.osInterface.MkdirAll(logDir, 0755)
    if err != nil {
        return nil, cleanupAction, fmt.Errorf("create container log directory for container %s failed: %v", container.Name, err)
    }
    containerLogsPath := buildContainerLogsPath(container.Name, restartCount)
    restartCountUint32 := uint32(restartCount)
    config := &runtimeapi.ContainerConfig{
        Metadata: &runtimeapi.ContainerMetadata{
            Name:    container.Name,
            Attempt: restartCountUint32,
        },
        Image:       &runtimeapi.ImageSpec{Image: imageRef},
        Command:     command,
        Args:        args,
        WorkingDir:  container.WorkingDir,
        Labels:      newContainerLabels(container, pod),
        Annotations: newContainerAnnotations(container, pod, restartCount, opts),
        Devices:     makeDevices(opts),
        CDIDevices:  makeCDIDevices(opts),
        Mounts:      m.makeMounts(opts, container),
        LogPath:     containerLogsPath,
        Stdin:       container.Stdin,
        StdinOnce:   container.StdinOnce,
        Tty:         container.TTY,
    }
...
}

pkg/kubelet/kuberuntime/helpers.go

func BuildContainerLogsDirectory(podNamespace, podName string, podUID types.UID, containerName string) string {
    return filepath.Join(BuildPodLogsDirectory(podNamespace, podName, podUID), containerName)
}

查看previous log

pkg/kubelet/kubelet_pods.go中

func (kl *Kubelet) GetKubeletContainerLogs(ctx context.Context, podFullName, containerName string, logOptions *v1.PodLogOptions, stdout, stderr io.Writer) error {
...
containerID, err := kl.validateContainerLogStatus(pod.Name, &podStatus, containerName, logOptions.Previous)
    if err != nil {
        return err
    }
...
    return kl.containerRuntime.GetContainerLogs(ctx, pod, containerID, logOptions, stdout, stderr)

...
}

pkg/kubelet/kubelet_pods.go中

func (kl *Kubelet) validateContainerLogStatus(podName string, podStatus *v1.PodStatus, containerName string, previous bool) (containerID kubecontainer.ContainerID, err error) {
...
    switch {
    case previous:
        if lastState.Terminated == nil || lastState.Terminated.ContainerID == "" {
            return kubecontainer.ContainerID{}, fmt.Errorf("previous terminated container %q in pod %q not found", containerName, podName)
        }
        cID = lastState.Terminated.ContainerID
...
}

pkg/kubelet/kuberuntime/kuberuntime_container.go

func (m *kubeGenericRuntimeManager) GetContainerLogs(ctx context.Context, pod *v1.Pod, containerID kubecontainer.ContainerID, logOptions *v1.PodLogOptions, stdout, stderr io.Writer) (err error) {
    resp, err := m.runtimeService.ContainerStatus(ctx, containerID.ID, false)
    if err != nil {
        klog.V(4).InfoS("Failed to get container status", "containerID", containerID.String(), "err", err)
        return fmt.Errorf("unable to retrieve container logs for %v", containerID.String())
    }
    status := resp.GetStatus()
    if status == nil {
        return remote.ErrContainerStatusNil
    }
    return m.ReadLogs(ctx, status.GetLogPath(), containerID.ID, logOptions, stdout, stderr)
}

总结

pod相关的容器创建时候,会根据restartcount生成一个路径作为参数传给容器运行时
pod previous log查询时候,kubelet会读取上一次退出容器的id,然后从容器运行时拿到logpath,继而读取日志

上一篇下一篇

猜你喜欢

热点阅读