源码分析 shutdown 与 close 的区别
我们知道,在 linux 中一切皆文件,socket
建立后需要关闭,可以使用通用的 close
函数关闭,也可以使用 socket
专有的 shutdown
. 具体有什么差异呢?
SYNOPSIS
#include <sys/socket.h>
int
shutdown(int socket, int how);
DESCRIPTION
The shutdown() call causes all or part of a full-duplex connection on the socket associated with socket to be shut down. If how
is SHUT_RD, further receives will be disallowed. If how is SHUT_WR, further sends will be disallowed. If how is SHUT_RDWR, fur-
ther sends and receives will be disallowed.
SYNOPSIS
#include <unistd.h>
int
close(int fildes);
DESCRIPTION
The close() call deletes a descriptor from the per-process object reference table. If this is the last reference to the underly-
ing object, the object will be deactivated. For example, on the last close of a file the current seek pointer associated with the
file is lost; on the last close of a socket(2) associated naming information and queued data are discarded; on the last close of a
file holding an advisory lock the lock is released (see further flock(2)).
......
-
shutdown
粒度比较细,可以控制读或者写,而close
只是关闭文件 -
close
涉及到文件的引用计数,如果计数为 0 才是真正的关闭。一个进程fork
时子进程默认继承父进程所有打开文件,此时计数加一,或是close_on_exec
关闭掉,那么这里的就是close
- 从影响上来看,如果一个
socket
文件被很多进程打开,那么shutdown
影响所有进程。而close
只是计数减一,并不影响其它进程 -
close
属于更高层的抽象,属于 fs 文件系统级别的,shutdown
更底一些
从 man 手册可以看到如上区别,那么具体实现如何呢?去看内核源码好了。
close 实现
fs 接口层实现
close
属于文件系统层面的操作,通过系统调用看具体实现。
SYSCALL_DEFINE1(close, unsigned int, fd)
{
int retval = __close_fd(current->files, fd);
/* can't restart close syscall because file table entry was cleared */
if (unlikely(retval == -ERESTARTSYS ||
retval == -ERESTARTNOINTR ||
retval == -ERESTARTNOHAND ||
retval == -ERESTART_RESTARTBLOCK))
retval = -EINTR;
return retval;
}
__close_fd
关闭 fd 入口,current->files
表示当前进程打开的文件
int __close_fd(struct files_struct *files, unsigned fd)
{
struct file *file;
struct fdtable *fdt;
spin_lock(&files->file_lock); // 锁
fdt = files_fdtable(files);
if (fd >= fdt->max_fds)
goto out_unlock;
file = fdt->fd[fd];
if (!file)
goto out_unlock;
rcu_assign_pointer(fdt->fd[fd], NULL); // 将 fd 文件从进程文件表里删除
__put_unused_fd(files, fd);
spin_unlock(&files->file_lock);
return filp_close(file, files); // 关闭
out_unlock:
spin_unlock(&files->file_lock);
return -EBADF;
}
__close_fd
重要的工作是从文件表里找到 fd 对应的 file,然后从列表里删除,顺便 __put_unused_fd
把 fd 释放出来。再调用 filp_close
关闭文件
int filp_close(struct file *filp, fl_owner_t id)
{
int retval = 0;
if (!file_count(filp)) {
printk(KERN_ERR "VFS: Close: file count is 0\n");
return 0;
}
if (filp->f_op->flush)
retval = filp->f_op->flush(filp, id); // 如果有 flush 先刷数据
if (likely(!(filp->f_mode & FMODE_PATH))) {
dnotify_flush(filp, id);
locks_remove_posix(filp, id);
}
fput(filp);
return retval;
}
此时 file 己经不在进程打开文件列表里了,调用 fput
来异步关闭文件
static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
void fput_many(struct file *file, unsigned int refs)
{
if (atomic_long_sub_and_test(refs, &file->f_count)) {
struct task_struct *task = current;
if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
init_task_work(&file->f_u.fu_rcuhead, ____fput);
if (!task_work_add(task, &file->f_u.fu_rcuhead, true))
return;
/*
* After this task has run exit_task_work(),
* task_work_add() will fail. Fall through to delayed
* fput to avoid leaking *file.
*/
}
if (llist_add(&file->f_u.fu_llist, &delayed_fput_list))
schedule_delayed_work(&delayed_fput_work, 1);
}
}
首先 atomic_long_sub_and_test
将引用计数减一,如果为 0 了,那么走后面关闭的逻辑,也就是说引用计数大于 0 ,本次操作什么也不做的。delayed_fput_work
是一个异步的 workqueue 队列,专职用于关闭文件,对应调用函数是 delayed_fput
. workqueue 原理暂时不看了,反正就是异步队列
static void __fput(struct file *file)
{
struct dentry *dentry = file->f_path.dentry;
struct vfsmount *mnt = file->f_path.mnt;
struct inode *inode = file->f_inode;
fmode_t mode = file->f_mode;
if (unlikely(!(file->f_mode & FMODE_OPENED)))
goto out;
might_sleep();
fsnotify_close(file);
/*
* The function eventpoll_release() should be the first called
* in the file cleanup chain.
*/
eventpoll_release(file);
locks_remove_file(file);
ima_file_free(file);
if (unlikely(file->f_flags & FASYNC)) {
if (file->f_op->fasync)
file->f_op->fasync(-1, file, 0);
}
if (file->f_op->release)
file->f_op->release(inode, file);
if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
!(mode & FMODE_PATH))) {
cdev_put(inode->i_cdev);
}
fops_put(file->f_op);
put_pid(file->f_owner.pid);
if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
i_readcount_dec(inode);
if (mode & FMODE_WRITER) {
put_write_access(inode);
__mnt_drop_write(mnt);
}
dput(dentry);
if (unlikely(mode & FMODE_NEED_UNMOUNT))
dissolve_on_fput(mnt);
mntput(mnt);
out:
file_free(file);
}
__fput
才是真正关闭 fd 代码,首先他是一个泛型的操作,本身 linux 一切皆文件嘛,也好理解。fsnotify
消息通知,特殊处理 epoll file 等等,通用的代码就是回调 ops 函数指针 release
来关闭文件。由之前的源码分件我们知道, 创建 socket
时 sock_alloc_file
会将 socket_file_ops
关联到 ops 函数指针,那么最终 release
实际指向 sock_close
static const struct file_operations socket_file_ops = {
.owner = THIS_MODULE,
.llseek = no_llseek,
.read_iter = sock_read_iter,
.write_iter = sock_write_iter,
.poll = sock_poll,
.unlocked_ioctl = sock_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_sock_ioctl,
#endif
.mmap = sock_mmap,
.release = sock_close,
.fasync = sock_fasync,
.sendpage = sock_sendpage,
.splice_write = generic_splice_sendpage,
.splice_read = sock_splice_read,
};
inet 接口层实现
内核的设计是分层的,file system
下面就直接就 sock
层,那么 __sock_release
具体调用哪些具体方法来关闭 sock
,也要看具体类型,所以也是函数指针
static void __sock_release(struct socket *sock, struct inode *inode)
{
if (sock->ops) {
struct module *owner = sock->ops->owner;
if (inode)
inode_lock(inode);
sock->ops->release(sock); // 释放 sock
sock->sk = NULL;
if (inode)
inode_unlock(inode);
sock->ops = NULL;
module_put(owner);
}
if (sock->wq->fasync_list)
pr_err("%s: fasync list not empty!\n", __func__);
if (!sock->file) {
iput(SOCK_INODE(sock));
return;
}
sock->file = NULL;
}
由前面的分析知道,对于 tcp 来说 sock->ops
实际上是 inet_stream_ops
, 对应 release
指针为 inet_release
int inet_release(struct socket *sock)
{
struct sock *sk = sock->sk;
if (sk) {
long timeout;
/* Applications forget to leave groups before exiting */
ip_mc_drop_socket(sk);
/* If linger is set, we don't return until the close
* is complete. Otherwise we return immediately. The
* actually closing is done the same either way.
*
* If the close is due to the process exiting, we never
* linger..
*/
timeout = 0;
if (sock_flag(sk, SOCK_LINGER) &&
!(current->flags & PF_EXITING))
timeout = sk->sk_lingertime;
sk->sk_prot->close(sk, timeout);
sock->sk = NULL;
}
return 0;
}
inet_stream
这块也做了抽象,因为具体有 ipv4, ipv6 等等实现,所以 sk->sk_prot
还是函数指针,对应 tcp_prot
结构体, close
调用 tcp_close
,走真正的 tcp 四次握手逻辑,这里还细分主动与被动关闭,下一篇再详细分析
shutdown 实现
首先进入内核态的,肯定都是系统调用,shutdown
也是调用的 sock->ops->shutdown
SYSCALL_DEFINE2(shutdown, int, fd, int, how)
{
return __sys_shutdown(fd, how);
}
int __sys_shutdown(int fd, int how)
{
int err, fput_needed;
struct socket *sock;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock != NULL) {
err = security_socket_shutdown(sock, how);
if (!err)
err = sock->ops->shutdown(sock, how);
fput_light(sock->file, fput_needed);
}
return err;
}
调用 inet_stream_ops.shutdown
函数指针,指向 inet_shutdown
int inet_shutdown(struct socket *sock, int how)
{
struct sock *sk = sock->sk;
int err = 0;
/* This should really check to make sure
* the socket is a TCP socket. (WHY AC...)
*/
how++; /* maps 0->1 has the advantage of making bit 1 rcvs and
1->2 bit 2 snds.
2->3 */
if ((how & ~SHUTDOWN_MASK) || !how) /* MAXINT->0 */
return -EINVAL;
lock_sock(sk);
if (sock->state == SS_CONNECTING) {
if ((1 << sk->sk_state) &
(TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE))
sock->state = SS_DISCONNECTING;
else
sock->state = SS_CONNECTED;
}
switch (sk->sk_state) {
case TCP_CLOSE:
err = -ENOTCONN;
/* Hack to wake up other listeners, who can poll for
EPOLLHUP, even on eg. unconnected UDP sockets -- RR */
/* fall through */
default:
sk->sk_shutdown |= how;
if (sk->sk_prot->shutdown)
sk->sk_prot->shutdown(sk, how);
break;
/* Remaining two branches are temporary solution for missing
* close() in multithreaded environment. It is _not_ a good idea,
* but we have no choice until close() is repaired at VFS level.
*/
case TCP_LISTEN:
if (!(how & RCV_SHUTDOWN))
break;
/* fall through */
case TCP_SYN_SENT:
err = sk->sk_prot->disconnect(sk, O_NONBLOCK);
sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
break;
}
/* Wake up anyone sleeping in poll. */
sk->sk_state_change(sk);
release_sock(sk);
return err;
}
其实到这里,就看出 shutdown
与 close
的区别了
- 首先判断 how 长为是否合法,SHUT_RD, SHUT_WR, SHUT_RDWR
- 设置 sock->state 状态,这个是
sock
层的,不是 tcp/udp 层 - 判断 tcp/udp state 是否是
TCP_CLOSE
, 如果是的话己经关闭了返回即可,否则默认走shutdown
逻辑,并且设置sk->sk_shutdown
状态 -
release_sock
释放资源?这里有个问题,后续有读写怎么办呢???
void tcp_shutdown(struct sock *sk, int how)
{
/* We need to grab some memory, and put together a FIN,
* and then put it into the queue to be sent.
* Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
*/
if (!(how & SEND_SHUTDOWN)) // SHUT_RDWR
return;
/* If we've already sent a FIN, or it's a closed state, skip this. */
if ((1 << sk->sk_state) &
(TCPF_ESTABLISHED | TCPF_SYN_SENT |
TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
/* Clear out any half completed packets. FIN if needed. */
if (tcp_close_state(sk))
tcp_send_fin(sk);
}
}
- 如果 how 行为不是
SHUT_RDWR
,那么返回,什么也不做 - 如果是
SHUT_RDWR
, 那么tcp_send_fin
发送 FIN 包走四关握手逻辑
那么,如果 how 是 SHUT_RD, SHUT_WR 如何生效呢?在哪里起做用呢?其实如果只是关闭读或写,那么 shutdown
只是做个标记而己,具体 tcp_recvmsg
和 tcp_sendmsg
时会判断然后报错。
小结
分析的还是比较浅显,下一篇再看详细的 tcp 四次挥手逻辑