Linux I\O总结 -- 基础概念篇

2019-06-01  本文已影响0人  山中散人的博客

文件描述符

0, 1, 2 文件描述符(fd)
fd与文件
进程文件表
fd重定向

新创建的文件

文件的用户访问权限
设置文件访问权限,读写和执行

写文件的过程

文件推迟写
内核回写文件
内核读缓存
内核写顺序
I/O错误
内核回写规则

读文件的过程

发出读指令
读系统调用

append写

消除潜在的竞争,自动设置文件位置

同步写操作

直接I/O

多路复用I/O

  1. issue multiplexed I/O: tell process any of fd is ready for I/O
  2. sleep until one or more fd is ready
  3. wake up process
  4. handle all fd that is ready
  5. go back to sleep

I/O的内核内幕

相关的API(系统调用)

#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
//maps the file (pathname) to a fd, file pos is to the start of file, access mode is also specified
/*
flags can be combined in bitwise-or manner
O_RDONLY, O_WRONLY, or O_RDWR
O_APPEND: file pos is being updated to the end of file instead (always synchorize to the real last pos after last writes(even from another process))
O_ASYNC: a signal (e.g. SIGIO) generated when file becomes readable/writeable, only for FIFOs, pipes, sockets and terminals
O_CLOEXEC: automatically close file upon executing a new process, which is for eliminates a race condition
O_CREAT: create the file if not exist, otherwise no effect
O_DIRECTORY: target is a directory, to support opendir()
O_EXCL: cause O_CREAT to fail if file already exists, which is used to prevent race conditions on file creation
O_LARGEFILE: target files is larger than 2GB, open file using 64-bit offset for file pos
O_NONBLOCK: open file in nonblocking mode, no operations will cause process to block on I/O, it's defined for FIFOs
O_SYNC: open file for synchronous I\O, write operation will not complete until data has been physically written to disk, no effect on reads since they are already synchronous
O_TRUNC: for regular file, file will be truncated to zero length once open, do not use O_TRUNC | O_RDONLY, it's undefined
*/
int open (const char *name, int flags);
/*
    owner - S_IRWXU (RWX)  S_IRUSR (R-only) S_IWUSR(W-only)  S_IXUSR(execute only)
    group - S_IRWXG        S_IRGRP          S_IWGRP          S_IXGRP
    others - S_IRWXO       S_IROTH          S_IWOTH          S_IXOTH
*/
int open (const char *name, int flags, mode_t mode);
//shorcut for open in O_WRONLY | O_CREAT | O_TRUNC
//return fd on success, on error, return -1 and set errno
int creat (const char *name, mode_t mode);
#include <unistd.h>
/*reads up to len bytes from current file   pos into memory pointed by buf, return the number of bytes read, the file pos is also advanced, return
  - size may less than len, maybe less than len bytes may available, or sys call have been interrupted by a signal, pipe broken
  - ret size may be 0 to indicate EOF(end of file), in case of blocking, read is waiting for more data (socket or device file)*/
ssize_t read (int fd, void *buf, size_t len);
//read all bytes with error handled (parital reads)
/*
   EBADF, EINVAL(bad fd), EFAULT(bad read buf),  EIO
*/
ssize_t ret; //limited by SSIZE_MAX 2^31
if (len > SSIZE_MAX)
    len = SSIZE_MAX;

while (len != 0 && (ret = read(fd, buf, len)) != 0) //read until EOF reached
{
    if (ret == −1)
    {
        if (errno == EINTR) //reissue the call
            continue;
        if (errno == EAGAIN) //non-blocking reads, we may want call to read ret immediately,
                             // indicating no data available
            //resubmit later
        perror("read");
        break;
    }
    len -= ret; //to handling partial reading
    buf += ret;
}
/*writes up to count bytes starting at buf to current file pos, for files not support seeking (character device), always from 'head' ret the num of bytes written */
/*
regular files is guaranteed to not perform partial writes,for other file, e.g. socket, it may happens
*/
ssize_t write (int fd, const void *buf, size_t count);
ssize_t ret, nr;
while (len != 0 && (ret = write(fd, buf, len)) != 0)
{
    if (ret == −1)
    {
        if (errno == EINTR)
            continue;
        perror("write");
        break;
    }
    len -= ret;
    buf += ret;
}
/* ensure all dirty data and metadata (creation timestamp, attributes) associated with file are written back to disk, function call will not return until disk says data are reached */
//write back data, update inode's data, where expensive seek operation may happens
int fsync (int fd);
/*only flushes data and essential data when access file (e.g. file size), which is potentially faster, nonessential data (modification timestamp) is not guaranteed to synchronized */
int fdatasync (int fd);
//all buffers—both data and metadata—are guaranteed to reside on disk
void sync (void);
//setting sync flag
//O_DSYNC: sync only normal data  O_RSYNC
fd = open (file, O_WRONLY | O_SYNC);
//unmap fd, invalid fd and ret to OS, closing a file does not bearing that file is flushed
//  to disk, sync ensure that file is committed to disk 
//when the last open fd referring to a file is closed, the data structure representing the file
//  inside kernel is freed, and it unpins the in-memory copy of the file inode, if nothing else
//  is pinning the inode, inode may too freed in memory
//if a file has been unlinked from disk but kept open before it unlinked, the file is not physically
//  removed until file closed and inode removed from memory. Therefore, close() can result in unlinked
//  file finally being physically removed from disk
int close (int fd);
//set the file pos of a fd to a given value and jump around in a file
/*
    origin - start origin point of setting
    SEEK_CUR, cur file pos + pos
    SEEK_END, end file pos + pos
    SEEK_SET, pos (absolute pos)
*/
//ret the new file pos
off_t lseek (int fd, off_t pos, int origin);

//get current file pos
int pos;
pos = lseek (fd, 0, SEEK_CUR);

//set file pos to the end of file
off_t ret;
ret = lseek (fd, 0, SEEK_END);
if (ret == (off_t) −1)
    return -1;

//set with a absolute file pos
off_t ret;
ret = lseek(fd, (off_t)1825, SEEK_SET);
if (ret == (off_t) −1)
    return -1;
//seeking past the end of the file
//  -a read request to the newly created file pos will ret EOF
//  -write request to this pos, new space will be created between old length and new length, 
//      and padded with zeros
//  -zero padding makes a hole, a hole on Unix-style filesystem do not occupy any physical
//      disk space (which implies the total size of files can add up to more than the physical
//      disk size)
//  -files with holes called sparse files, sparse file can enhance performance since manipulating
//      holes doesn't initiate physical IO
//

int ret;
//size of off_t is often C long type, which is generally the word size (the size of
//  machine's general-propose registers), 32-bit machine may encounter EOVERFLOW errors
ret = lseek (fd, (off_t) 1688, SEEK_END);
if (ret == (off_t) −1)
    return -1;
//not like read() and write(), pread() and pwrite() do not update the file position
//  upon completion, they completely ignore the current file position
//they may enable caller to do some tricky operations such as moving file backward or
//  randomly, and avoid any potential races occur when using lseek()
ssize_t pread (int fd, void *buf, size_t count, off_t pos);
ssize_t pwrite (int fd, const void *buf, size_t count, off_t pos);
//two sys calls for truncating the len of file (smaller or larger), file must be writable
int ftruncate (int fd, off_t len);
int truncate (const char *path, off_t len);
#include <sys/select.h>
#include <sys/time.h>
struct timeval
{
    long tv_sec;  /* seconds */
    long tv_usec; /* microseconds */
};
//a call to select() blocks until the given fd are ready for I/O, or until some timeout
/*
    n: value of highest-valued fd in any set plus one
    timeout: specify the return timeout
*/
/*
    three sets to watch (set could be NULL)
        readfds: watched to see if data is available for reading
        writefds: ... writing
        exceptfds: ... if exception occurred, or if out-of-band data available (only for sockets)
*/
int select(int n,
           fd_set *readfds,
           fd_set *writefds,
           fd_set *exceptfds,
           struct timeval *timeout);
//manage the fd sets
FD_CLR(int fd, fd_set *set);//remove a fd from the set
FD_ISSET(int fd, fd_set *set);//test whether a fd is part of the set
FD_SET(int fd, fd_set *set);//add a fd to the set
FD_ZERO(fd_set *set); //remove all fd from the set
//apply select() to sleep for subsecond-resolution time
struct timeval tv;
tv.tv_sec = 0;
tv.tv_usec = 500;
/* sleep for 500 microseconds */
select(0, NULL, NULL, NULL, &tv);
//System V’s multiplexed I/O solution
#include <poll.h>
struct pollfd
{
    int fd;        /* file descriptor */
    short events;  /* requested events to watch */
    short revents; /* returned events witnessed */
};
/*
  fds: each pollfd structure specifies a single fd to watch
  events: POLLIN, POLLOUT,...
*/
int poll (struct pollfd *fds, nfds_t nfds, int timeout);

struct pollfd fds[2];
int ret;
/* watch stdin for input */
fds[0].fd = STDIN_FILENO;
fds[0].events = POLLIN;
/* watch stdout for ability to write (almost always true) */
fds[1].fd = STDOUT_FILENO;
fds[1].events = POLLOUT;
/* All set, block! */
ret = poll(fds, 2, TIMEOUT * 1000);
if (ret == −1)
{
    perror("poll");
    return 1;
}
上一篇下一篇

猜你喜欢

热点阅读