源码分析 shutdown 与 close 的区别

2019-08-13 本文已影响0人董泽润

我们知道，在 linux 中一切皆文件，socket 建立后需要关闭，可以使用通用的 close 函数关闭，也可以使用 socket 专有的 shutdown. 具体有什么差异呢？

SYNOPSIS
     #include <sys/socket.h>

     int
     shutdown(int socket, int how);

DESCRIPTION
     The shutdown() call causes all or part of a full-duplex connection on the socket associated with socket to be shut down.  If how
     is SHUT_RD, further receives will be disallowed.  If how is SHUT_WR, further sends will be disallowed.  If how is SHUT_RDWR, fur-
     ther sends and receives will be disallowed.

SYNOPSIS
     #include <unistd.h>

     int
     close(int fildes);

DESCRIPTION
     The close() call deletes a descriptor from the per-process object reference table.  If this is the last reference to the underly-
     ing object, the object will be deactivated.  For example, on the last close of a file the current seek pointer associated with the
     file is lost; on the last close of a socket(2) associated naming information and queued data are discarded; on the last close of a
     file holding an advisory lock the lock is released (see further flock(2)).
     ......

shutdown 粒度比较细，可以控制读或者写，而 close 只是关闭文件
close 涉及到文件的引用计数，如果计数为 0 才是真正的关闭。一个进程 fork 时子进程默认继承父进程所有打开文件，此时计数加一，或是 close_on_exec 关闭掉，那么这里的就是 close
从影响上来看，如果一个 socket 文件被很多进程打开，那么 shutdown 影响所有进程。而 close 只是计数减一，并不影响其它进程
close 属于更高层的抽象，属于 fs 文件系统级别的，shutdown 更底一些

从 man 手册可以看到如上区别，那么具体实现如何呢？去看内核源码好了。

close 实现

fs 接口层实现

close 属于文件系统层面的操作，通过系统调用看具体实现。

SYSCALL_DEFINE1(close, unsigned int, fd)
{
    int retval = __close_fd(current->files, fd);

    /* can't restart close syscall because file table entry was cleared */
    if (unlikely(retval == -ERESTARTSYS ||
             retval == -ERESTARTNOINTR ||
             retval == -ERESTARTNOHAND ||
             retval == -ERESTART_RESTARTBLOCK))
        retval = -EINTR;

    return retval;
}

__close_fd 关闭 fd 入口，current->files 表示当前进程打开的文件

int __close_fd(struct files_struct *files, unsigned fd)
{
    struct file *file;
    struct fdtable *fdt;

    spin_lock(&files->file_lock); // 锁
    fdt = files_fdtable(files);
    if (fd >= fdt->max_fds)
        goto out_unlock;
    file = fdt->fd[fd];
    if (!file)
        goto out_unlock;
    rcu_assign_pointer(fdt->fd[fd], NULL); // 将 fd 文件从进程文件表里删除
    __put_unused_fd(files, fd);
    spin_unlock(&files->file_lock);
    return filp_close(file, files); // 关闭

out_unlock:
    spin_unlock(&files->file_lock);
    return -EBADF;
}

__close_fd 重要的工作是从文件表里找到 fd 对应的 file，然后从列表里删除，顺便 __put_unused_fd 把 fd 释放出来。再调用 filp_close 关闭文件

int filp_close(struct file *filp, fl_owner_t id)
{
    int retval = 0;

    if (!file_count(filp)) {
        printk(KERN_ERR "VFS: Close: file count is 0\n");
        return 0;
    }

    if (filp->f_op->flush)
        retval = filp->f_op->flush(filp, id); // 如果有 flush 先刷数据

    if (likely(!(filp->f_mode & FMODE_PATH))) {
        dnotify_flush(filp, id);
        locks_remove_posix(filp, id);
    }
    fput(filp);
    return retval;
}

此时 file 己经不在进程打开文件列表里了，调用 fput 来异步关闭文件

static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);

void fput_many(struct file *file, unsigned int refs)
{
    if (atomic_long_sub_and_test(refs, &file->f_count)) {
        struct task_struct *task = current;

        if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
            init_task_work(&file->f_u.fu_rcuhead, ____fput);
            if (!task_work_add(task, &file->f_u.fu_rcuhead, true))
                return;
            /*
             * After this task has run exit_task_work(),
             * task_work_add() will fail.  Fall through to delayed
             * fput to avoid leaking *file.
             */
        }

        if (llist_add(&file->f_u.fu_llist, &delayed_fput_list))
            schedule_delayed_work(&delayed_fput_work, 1);
    }
}

首先 atomic_long_sub_and_test 将引用计数减一，如果为 0 了，那么走后面关闭的逻辑，也就是说引用计数大于 0 ，本次操作什么也不做的。delayed_fput_work 是一个异步的 workqueue 队列，专职用于关闭文件，对应调用函数是 delayed_fput. workqueue 原理暂时不看了，反正就是异步队列

static void __fput(struct file *file)
{
    struct dentry *dentry = file->f_path.dentry;
    struct vfsmount *mnt = file->f_path.mnt;
    struct inode *inode = file->f_inode;
    fmode_t mode = file->f_mode;

    if (unlikely(!(file->f_mode & FMODE_OPENED)))
        goto out;

    might_sleep();

    fsnotify_close(file);
    /*
     * The function eventpoll_release() should be the first called
     * in the file cleanup chain.
     */
    eventpoll_release(file);
    locks_remove_file(file);

    ima_file_free(file);
    if (unlikely(file->f_flags & FASYNC)) {
        if (file->f_op->fasync)
            file->f_op->fasync(-1, file, 0);
    }
    if (file->f_op->release)
        file->f_op->release(inode, file);
    if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
             !(mode & FMODE_PATH))) {
        cdev_put(inode->i_cdev);
    }
    fops_put(file->f_op);
    put_pid(file->f_owner.pid);
    if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
        i_readcount_dec(inode);
    if (mode & FMODE_WRITER) {
        put_write_access(inode);
        __mnt_drop_write(mnt);
    }
    dput(dentry);
    if (unlikely(mode & FMODE_NEED_UNMOUNT))
        dissolve_on_fput(mnt);
    mntput(mnt);
out:
    file_free(file);
}

__fput 才是真正关闭 fd 代码，首先他是一个泛型的操作，本身 linux 一切皆文件嘛，也好理解。fsnotify 消息通知，特殊处理 epoll file 等等，通用的代码就是回调 ops 函数指针 release 来关闭文件。由之前的源码分件我们知道，创建 socket 时 sock_alloc_file 会将 socket_file_ops 关联到 ops 函数指针，那么最终 release 实际指向 sock_close

static const struct file_operations socket_file_ops = {
    .owner =    THIS_MODULE,
    .llseek =   no_llseek,
    .read_iter =    sock_read_iter,
    .write_iter =   sock_write_iter,
    .poll =     sock_poll,
    .unlocked_ioctl = sock_ioctl,
#ifdef CONFIG_COMPAT
    .compat_ioctl = compat_sock_ioctl,
#endif
    .mmap =     sock_mmap,
    .release =  sock_close,
    .fasync =   sock_fasync,
    .sendpage = sock_sendpage,
    .splice_write = generic_splice_sendpage,
    .splice_read =  sock_splice_read,
};

inet 接口层实现

内核的设计是分层的，file system 下面就直接就 sock 层，那么 __sock_release 具体调用哪些具体方法来关闭 sock，也要看具体类型，所以也是函数指针

static void __sock_release(struct socket *sock, struct inode *inode)
{
    if (sock->ops) {
        struct module *owner = sock->ops->owner;

        if (inode)
            inode_lock(inode);
        sock->ops->release(sock); // 释放 sock
        sock->sk = NULL;
        if (inode)
            inode_unlock(inode);
        sock->ops = NULL;
        module_put(owner);
    }

    if (sock->wq->fasync_list)
        pr_err("%s: fasync list not empty!\n", __func__);

    if (!sock->file) {
        iput(SOCK_INODE(sock));
        return;
    }
    sock->file = NULL;
}

由前面的分析知道，对于 tcp 来说 sock->ops 实际上是 inet_stream_ops，对应 release 指针为 inet_release

int inet_release(struct socket *sock)
{
    struct sock *sk = sock->sk;

    if (sk) {
        long timeout;

        /* Applications forget to leave groups before exiting */
        ip_mc_drop_socket(sk);

        /* If linger is set, we don't return until the close
         * is complete.  Otherwise we return immediately. The
         * actually closing is done the same either way.
         *
         * If the close is due to the process exiting, we never
         * linger..
         */
        timeout = 0;
        if (sock_flag(sk, SOCK_LINGER) &&
            !(current->flags & PF_EXITING))
            timeout = sk->sk_lingertime;
        sk->sk_prot->close(sk, timeout);
        sock->sk = NULL;
    }
    return 0;
}

inet_stream 这块也做了抽象，因为具体有 ipv4, ipv6 等等实现，所以 sk->sk_prot 还是函数指针，对应 tcp_prot 结构体， close 调用 tcp_close，走真正的 tcp 四次握手逻辑，这里还细分主动与被动关闭，下一篇再详细分析

shutdown 实现

首先进入内核态的，肯定都是系统调用，shutdown 也是调用的 sock->ops->shutdown

SYSCALL_DEFINE2(shutdown, int, fd, int, how)
{
    return __sys_shutdown(fd, how);
}

int __sys_shutdown(int fd, int how)
{
    int err, fput_needed;
    struct socket *sock;

    sock = sockfd_lookup_light(fd, &err, &fput_needed);
    if (sock != NULL) {
        err = security_socket_shutdown(sock, how);
        if (!err)
            err = sock->ops->shutdown(sock, how);
        fput_light(sock->file, fput_needed);
    }
    return err;
}

调用 inet_stream_ops.shutdown 函数指针，指向 inet_shutdown

int inet_shutdown(struct socket *sock, int how)
{
    struct sock *sk = sock->sk;
    int err = 0;

    /* This should really check to make sure
     * the socket is a TCP socket. (WHY AC...)
     */
    how++; /* maps 0->1 has the advantage of making bit 1 rcvs and
               1->2 bit 2 snds.
               2->3 */
    if ((how & ~SHUTDOWN_MASK) || !how) /* MAXINT->0 */
        return -EINVAL;

    lock_sock(sk);
    if (sock->state == SS_CONNECTING) {
        if ((1 << sk->sk_state) &
            (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE))
            sock->state = SS_DISCONNECTING;
        else
            sock->state = SS_CONNECTED;
    }

    switch (sk->sk_state) {
    case TCP_CLOSE:
        err = -ENOTCONN;
        /* Hack to wake up other listeners, who can poll for
           EPOLLHUP, even on eg. unconnected UDP sockets -- RR */
        /* fall through */
    default:
        sk->sk_shutdown |= how;
        if (sk->sk_prot->shutdown)
            sk->sk_prot->shutdown(sk, how);
        break;

    /* Remaining two branches are temporary solution for missing
     * close() in multithreaded environment. It is _not_ a good idea,
     * but we have no choice until close() is repaired at VFS level.
     */
    case TCP_LISTEN:
        if (!(how & RCV_SHUTDOWN))
            break;
        /* fall through */
    case TCP_SYN_SENT:
        err = sk->sk_prot->disconnect(sk, O_NONBLOCK);
        sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
        break;
    }

    /* Wake up anyone sleeping in poll. */
    sk->sk_state_change(sk);
    release_sock(sk);
    return err;
}

其实到这里，就看出 shutdown 与 close 的区别了

首先判断 how 长为是否合法，SHUT_RD, SHUT_WR, SHUT_RDWR
设置 sock->state 状态，这个是 sock 层的，不是 tcp/udp 层
判断 tcp/udp state 是否是 TCP_CLOSE, 如果是的话己经关闭了返回即可，否则默认走 shutdown 逻辑，并且设置 sk->sk_shutdown 状态
release_sock 释放资源？这里有个问题，后续有读写怎么办呢？？？

void tcp_shutdown(struct sock *sk, int how)
{
    /*  We need to grab some memory, and put together a FIN,
     *  and then put it into the queue to be sent.
     *      Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
     */
    if (!(how & SEND_SHUTDOWN)) // SHUT_RDWR
        return;

    /* If we've already sent a FIN, or it's a closed state, skip this. */
    if ((1 << sk->sk_state) &
        (TCPF_ESTABLISHED | TCPF_SYN_SENT |
         TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
        /* Clear out any half completed packets.  FIN if needed. */
        if (tcp_close_state(sk))
            tcp_send_fin(sk);
    }
}

如果 how 行为不是 SHUT_RDWR，那么返回，什么也不做
如果是 SHUT_RDWR，那么 tcp_send_fin 发送 FIN 包走四关握手逻辑

那么，如果 how 是 SHUT_RD, SHUT_WR 如何生效呢？在哪里起做用呢？其实如果只是关闭读或写，那么 shutdown 只是做个标记而己，具体 tcp_recvmsg 和 tcp_sendmsg 时会判断然后报错。

小结

分析的还是比较浅显，下一篇再看详细的 tcp 四次挥手逻辑

源码分析 shutdown 与 close 的区别

close 实现

fs 接口层实现

inet 接口层实现

shutdown 实现

小结

猜你喜欢

热点阅读