io file 相关源码及利用方式学习笔记

2019-11-01  本文已影响0人  pu1p

前言

简书的markdown 怎么生成目录........., vscode里面生成的复制过来没法用..... 很烦.
还是 Notion 好用!

一些关于 io file 结构体的笔记, 仅供参考. 欢迎交流 :P

0. 基本数据结构和宏

_IO_FILE && _IO_FILE_plus

struct _IO_FILE {
  int _flags;       /* High-order word is _IO_MAGIC; rest is flags. */
#define _IO_file_flags _flags

  /* The following pointers correspond to the C++ streambuf protocol. */
  /* Note:  Tk uses the _IO_read_ptr and _IO_read_end fields directly. */
  char* _IO_read_ptr;   /* Current read pointer */
  char* _IO_read_end;   /* End of get area. */
  char* _IO_read_base;  /* Start of putback+get area. */
  char* _IO_write_base; /* Start of put area. */
  char* _IO_write_ptr;  /* Current put pointer. */
  char* _IO_write_end;  /* End of put area. */
  char* _IO_buf_base;   /* Start of reserve area. */
  char* _IO_buf_end;    /* End of reserve area. */
  /* The following fields are used to support backing up and undo. */
  char *_IO_save_base; /* Pointer to start of non-current get area. */
  char *_IO_backup_base;  /* Pointer to first valid character of backup area */
  char *_IO_save_end; /* Pointer to end of non-current get area. */

  struct _IO_marker *_markers;

  struct _IO_FILE *_chain;

  int _fileno;
#if 0
  int _blksize;
#else
  int _flags2;
#endif
  _IO_off_t _old_offset; /* This used to be _offset but it's too small.  */

#define __HAVE_COLUMN /* temporary */
  /* 1+column number of pbase(); 0 is unknown. */
  unsigned short _cur_column;
  signed char _vtable_offset;
  char _shortbuf[1];

  /*  char* _save_gptr;  char* _save_egptr; */

  _IO_lock_t *_lock;
#ifdef _IO_USE_OLD_IO_FILE
};

/* We always allocate an extra word following an _IO_FILE.
   This contains a pointer to the function jump table used.
   This is for compatibility with C++ streambuf; the word can
   be used to smash to a pointer to a virtual function table. */

struct _IO_FILE_plus
{
  _IO_FILE file;
  const struct _IO_jump_t *vtable;
};

io file结构体中 _flags 相关宏

#define _IO_MAGIC 0xFBAD0000 /* Magic number */
#define _OLD_STDIO_MAGIC 0xFABC0000 /* Emulate old stdio. */
#define _IO_MAGIC_MASK 0xFFFF0000
#define _IO_USER_BUF 1 /* User owns buffer; don't delete it on close. */
#define _IO_UNBUFFERED 2
#define _IO_NO_READS 4 /* Reading not allowed */
#define _IO_NO_WRITES 8 /* Writing not allowd */
#define _IO_EOF_SEEN 0x10
#define _IO_ERR_SEEN 0x20
#define _IO_DELETE_DONT_CLOSE 0x40 /* Don't call close(_fileno) on cleanup. */
#define _IO_LINKED 0x80 /* Set if linked (using _chain) to streambuf::_list_all.*/
#define _IO_IN_BACKUP 0x100
#define _IO_LINE_BUF 0x200
#define _IO_TIED_PUT_GET 0x400 /* Set if put and get pointer logicly tied. */
#define _IO_CURRENTLY_PUTTING 0x800
#define _IO_IS_APPENDING 0x1000
#define _IO_IS_FILEBUF 0x2000
#define _IO_BAD_SEEN 0x4000
#define _IO_USER_LOCK 0x8000

_IO_jump_t (vtable)

就是 cpp 中的虚表

const struct _IO_jump_t _IO_file_jumps =
{
  **JUMP_INIT_DUMMY,
  JUMP_INIT(finish, _IO_file_finish),
  JUMP_INIT(overflow, _IO_file_overflow),
  JUMP_INIT(underflow, _IO_file_underflow),
  JUMP_INIT(uflow, _IO_default_uflow),
  JUMP_INIT(pbackfail, _IO_default_pbackfail),
  JUMP_INIT(xsputn, _IO_file_xsputn),
  JUMP_INIT(xsgetn, _IO_file_xsgetn),
  JUMP_INIT(seekoff, _IO_new_file_seekoff),
  JUMP_INIT(seekpos, _IO_default_seekpos),
  JUMP_INIT(setbuf, _IO_new_file_setbuf),
  JUMP_INIT(sync, _IO_new_file_sync),
  JUMP_INIT(doallocate, _IO_file_doallocate),
  JUMP_INIT(read, _IO_file_read),
  JUMP_INIT(write, _IO_new_file_write),
  JUMP_INIT(seek, _IO_file_seek),
  JUMP_INIT(close, _IO_file_close),
  JUMP_INIT(stat, _IO_file_stat),
  JUMP_INIT(showmanyc, _IO_default_showmanyc),
  JUMP_INIT(imbue, _IO_default_imbue)
};

1. setvbuf

1.1 文档

-> man setvbuf
...
The  setvbuf()  function may be used on any open stream to change its buffer.  The mode argument must be
       one of the following three macros:

              _IONBF unbuffered

              _IOLBF line buffered

              _IOFBF fully buffered

       Except for unbuffered files, the buf argument should point to a buffer at least size  bytes  long;  this
       buffer  will  be  used  instead  of  the  current buffer.  If the argument buf is NULL, only the mode is
       affected; a new buffer will be allocated on the next read or write operation.   The  setvbuf()  function
       may be used only after opening a stream and before any other operations have been performed on it.

1.2 实例

// iosetvbuf.c
#define _IOFBF 0 /* Fully buffered. */
#define _IOLBF 1 /* Line buffered. */
#define _IONBF 2 /* No buffering. */

// test.c
int main(){
    setvbuf(stdin, 0, 2, 0x10); // 0, 1, 2
    for(;;){
        char buf[0x100];
        scanf("%s", buf);
        puts(buf);
    }
    return 0;
}

分别设置 mode 为 0, 1, 2.

mode 为 0(Fully Buffered) :

经过调试发现 mode 为 0 时, 在setvbuf中会 malloc(0x400), 然后 stdin→_IO_buf_base 和 stdin→_IO_buf_end 分别指向chunk的开头和末尾

pwndbg> io_file 0x7ffff7dd18e0
$5 = {
  file = {
        ...
    _IO_buf_base = 0x602010,
    _IO_buf_end = 0x602410,
        ...
  },
  vtable = 0x7ffff7dd06e0
}

pwndbg> heap
add heap to history
0x602000 PREV_INUSE $6 = {
  prev_size = 0x0,
  size = 0x411,
  fd = 0x0,
  bk = 0x0,
  fd_nextsize = 0x0,
  bk_nextsize = 0x0
}
0x602410 PREV_INUSE $7 = {
...

manpage 里面说 如果 setvbuf 的 buf 时 NULL 的话 a new buffer will be allocated on the next read or write operation ,貌似实现和文档描述并不一致...

mode 为 1 (Line buffered) :

setvbuf 中没有调用malloc, 而是在第一次调用 scanf的时候调用了 malloc(0x400), stdin→_IO_buf_base 和 stdin→_IO_buf_end 的值和之前一致.

这次实现和文档倒是一致了.

mode 为2 (No Buffer) :

都不会malloc, stdin→_IO_buf_base 和 stdin→_IO_buf_end 指向 stdout 附近一块长度为1个字节的内存

pwndbg> io_file 0x7ffff7dd18e0
add io_file 0x7ffff7dd18e0 to history
$1 = {
  file = {
        ...
    _IO_buf_base = 0x7ffff7dd1963,
    _IO_buf_end = 0x7ffff7dd1964,
        ...

1.3 源码

调用链

_IO_setvbuf (iosetvbuf.c) → _IO_new_file_setbuf / _IO_file_setbuf (fileops.c) → _IO_default_setbuf (genops.c)

源码

先贴一下用到的宏

#define _IOFBF 0 /* Fully buffered. */
#define _IOLBF 1 /* Line buffered. */
#define _IONBF 2 /* No buffering. */

#define _IO_LINE_BUF 0x200
#define _IO_UNBUFFERED 2

先看 _IO_setvbuf (省略部分无关代码)

int _IO_setvbuf (_IO_FILE *fp, char *buf, int mode, _IO_size_t size) {
  int result;
  switch (mode) {
    case _IOFBF:
        fp->_IO_file_flags &= ~(_IO_LINE_BUF|_IO_UNBUFFERED);
        if (buf == NULL){
            if (fp->_IO_buf_base == NULL){
                /* There is no flag to distinguish between "fully buffered
                mode has been explicitly set" as opposed to "line
                buffering has not been explicitly set".  In both
                cases, _IO_LINE_BUF is off.  If this is a tty, and
                _IO_filedoalloc later gets called, it cannot know if
                it should set the _IO_LINE_BUF flag (because that is
                the default), or not (because we have explicitly asked
                for fully buffered mode).  So we make sure a buffer
                gets allocated now, and explicitly turn off line
                buffering.

                A possibly cleaner alternative would be to add an
                extra flag, but then flags are a finite resource.  */
                _IO_DOALLOCATE (fp);
                fp->_IO_file_flags &= ~_IO_LINE_BUF;
            }
            result = 0;
            goto unlock_return;
        }
      break;
    case _IOLBF:
        fp->_IO_file_flags &= ~_IO_UNBUFFERED;
        fp->_IO_file_flags |= _IO_LINE_BUF;
        if (buf == NULL){
            result = 0;
            goto unlock_return;
        }
        break;
    case _IONBF:
        fp->_IO_file_flags &= ~_IO_LINE_BUF;
        fp->_IO_file_flags |= _IO_UNBUFFERED;
        buf = NULL;
        size = 0;
        break;
    default:
        result = EOF;
        goto unlock_return;
    }
    result = _IO_SETBUF (fp, buf, size) == NULL ? EOF : 0;

unlock_return:
    _IO_release_lock (fp); // _IO_new_file_setbuf
    return result;
}

通过源码以及注释可以明白为何 mode 和 buf 都为 0 的时候实现为何与文档不一致:

io file 结构体关于 缓冲有三种状态, fully buffered, line buffered, no buffer, 但是 io file 的 _flags 属性只使用了 2 位来表示这三种状态, 为了避免之后分配buffer ( 按照文档里说法应该是第一次读的时候再分配 buffer) 时函数不清楚该如何设置 _flags 所以才提前分配. 画个表格表示一下 _flags 的两位 和 三种状态之间的关系吧

mode #define _IO_LINE_BUF 0x200 #define _IO_UNBUFFERED 2
#define _IOFBF 0 /* Fully buffered. */ 0 0
#define _IOLBF 1 /* Line buffered. */ 1 0
#define _IONBF 2 /* No buffering. */ 0 1

通过源码可以看到仅当 (mode 为 _IOLBUF 且 buf ≠ NULL ) 或 mode 为 _IONBF 的时候, 函数才会继续调用 _IO_SETBUF(FP, BUFFER, LENGTH) (即 _IO_new_file_setbuf), 其余情况就会立即返回.

pwn 题中最常见的就是 _IONBF , 我们继续往下看 _IO_new_file_setbuf 的源码:

_IO_FILE *_IO_new_file_setbuf (_IO_FILE *fp, char *p, _IO_ssize_t len) {
  if (_IO_default_setbuf (fp, p, len) == NULL)
    return NULL;

  fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end
    = fp->_IO_buf_base;
  fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_read_end
    = fp->_IO_buf_base;
  return fp;
}
libc_hidden_ver (_IO_new_file_setbuf, _IO_file_setbuf)


_IO_FILE *_IO_default_setbuf (_IO_FILE *fp, char *p, _IO_ssize_t len) {
    if (p == NULL || len == 0) {
        fp->_flags |= _IO_UNBUFFERED;
        _IO_setb (fp, fp->_shortbuf, fp->_shortbuf+1, 0); // fp->_shortbuf
    }
    else {
        fp->_flags &= ~_IO_UNBUFFERED;
        _IO_setb (fp, p, p+len, 0);
    }
    fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end = 0;
    fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_read_end = 0;
    return fp;
}

void _IO_setb (FILE *f, char *b, char *eb, int a){
    f->_IO_buf_base = b;
    f->_IO_buf_end = eb;
    if (a)
        f->_flags &= ~_IO_USER_BUF;
    else
        f->_flags |= _IO_USER_BUF;
}

通过源码可以看到如果 buf=NULL 或者 size = 0 的话, f->_IO_buf_base 和 f->_IO_buf_end 会被指向 fp->_shortbuf 和 fp->_shortbuf+1, 虽然不知道为什么要这么实现, 但是这样就解释了为什么 可以通过 覆盖 _IO_buf_base 的低字节来leak 地址: 覆盖了低字节之后 _IO_buf_base 就会指向 io file 结构体 内部, 而io file 结构体内部就有很多指向 _shortbuf 的指针.

2. fread && scanf

调用链:

  1. fread:

    fread → _IO_file_xsgetn (fileops.c) (如果buffer够的话就直接返回) → __underflow (genops.c) → _IO_new_file_underflow (fileops.c) → _IO_SYSREAD

  2. scanf:

    scanf → _IO_vfscanf_internal (vfscanf.c) → __uflow (genops.c) → _IO_default_uflow (genops.c) → _IO_file_underflow (fileops.c) -> _IO_SYSREAD

注:

_IO_new_file_underflow 和 _IO_file_underflow 就是同一个函数
神奇的是 用两个 函数名在 gdb 里面都可以断下来

────────────────────────────────────────────────────────────[ DISASM ]────────────────────────────────────────────────────────────
 ► 0x7ffff7a874a0 <_IO_file_underflow>       mov    eax, dword ptr [rdi] <0x7ffff7dd18e0>
   0x7ffff7a874a2 <_IO_file_underflow+2>     test   al, 4
   0x7ffff7a874a4 <_IO_file_underflow+4>     jne    _IO_file_underflow+544 <0x7ffff7a876c0>

   0x7ffff7a874aa <_IO_file_underflow+10>    mov    rdx, qword ptr [rdi + 8]
   0x7ffff7a874ae <_IO_file_underflow+14>    cmp    rdx, qword ptr [rdi + 0x10]
   0x7ffff7a874b2 <_IO_file_underflow+18>    jb     _IO_file_underflow+384 <0x7ffff7a87620>

   0x7ffff7a874b8 <_IO_file_underflow+24>    push   rbp
   0x7ffff7a874b9 <_IO_file_underflow+25>    push   rbx
   0x7ffff7a874ba <_IO_file_underflow+26>    mov    rbx, rdi
   0x7ffff7a874bd <_IO_file_underflow+29>    sub    rsp, 8
   0x7ffff7a874c1 <_IO_file_underflow+33>    cmp    qword ptr [rdi + 0x38], 0
────────────────────────────────────────────────────────[ SOURCE (CODE) ]─────────────────────────────────────────────────────────
In file: /home/pu1p/glibcs/glibc-2.23/libio/fileops.c
   526   return count;
   527 }
   528
   529 int
   530 _IO_new_file_underflow (_IO_FILE *fp)
 ► 531 {
   532   _IO_ssize_t count;
   533 #if 0
   534   /* SysV does not make this test; take it out for compatibility */
   535   if (fp->_flags & _IO_EOF_SEEN)
   536     return (EOF);

源码显示 _IO_new_file_underflow , 汇编显示 _IO_file_underflow, 神奇

源码:

fread

简化了一下

size_t fread(void *data, size_t size, size_t nmemb, FILE *fp){
    _IO_size_t n = size*nmemb;
    _IO_size_t want, have;
    want = n;
    if(fp->_IO_buf_base == NULL)
        // malloc buf and make _IO_buf_base and _IO_buf_end point to it
        _IO_doallocbuf (fp);
    while (want > 0){
        have = fp->_IO_read_end - fp->_IO_read_ptr;
        if (want <= have){
            memcpy(data, fp->_IO_read_ptr, want);
            fp->_IO_read_ptr += want;
            want = 0
        } else{
            if (have > 0){
                memcpy (data, fp->_IO_read_ptr, have);
                data += have;
                want -= have;
                fp->_IO_read_ptr += have;
            }
            if (fp->_IO_buf_base
                && want < (size_t) (fp->_IO_buf_end - fp->_IO_buf_base)) {
                if (__underflow (fp) == EOF)
                    break;
                continue;
            }
        }
    }
}

int __underflow (_IO_FILE *fp) {
  if (_IO_vtable_offset (fp) == 0 && _IO_fwide (fp, -1) != -1)
    return EOF;

  if (fp->_mode == 0)
    _IO_fwide (fp, -1);
  if (_IO_in_put_mode (fp)) // _flags & _IO_CURRENTLY_PUTTING
    if (_IO_switch_to_get_mode (fp) == EOF)
      return EOF;
  if (fp->_IO_read_ptr < fp->_IO_read_end)
    return *(unsigned char *) fp->_IO_read_ptr;
  if (_IO_in_backup (fp)) {
      // _flags & _IO_IN_BACKUP 
      _IO_switch_to_main_get_area (fp);
      if (fp->_IO_read_ptr < fp->_IO_read_end)
    return *(unsigned char *) fp->_IO_read_ptr;
    }
  if (_IO_have_markers (fp)) {
      if (save_for_backup (fp, fp->_IO_read_end))
    return EOF;
    }
  else if (_IO_have_backup (fp)) // _IO_save_base != NULL
    _IO_free_backup_area (fp);
  return _IO_new_file_underflow (fp);
}


int _IO_new_file_underflow (_IO_FILE *fp) {
  if (fp->_flags & _IO_NO_READS) {
      fp->_flags |= _IO_ERR_SEEN;
      __set_errno (EBADF);
      return EOF;
    }
    fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_buf_base;
    fp->_IO_read_end = fp->_IO_buf_base;
    fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end
        = fp->_IO_buf_base;
    count = _IO_SYSREAD (fp, fp->_IO_buf_base,
                fp->_IO_buf_end - fp->_IO_buf_base);

    fp->_IO_read_end += count;
    return *(unsigned char *) fp->_IO_read_ptr;
}

fread的大致逻辑如下:

  1. 如果 _IO_buf_base 为空先malloc一个buf
  2. 先使用 p->_IO_read_ptr 和 p->_IO_read_end 之间的数据
  3. 如果不够的话再调用 sys_read 读取新的数据到_IO_buf_base中.

所以如果可以控制 io_file 结构体, 通过修改 _IO_buf_base 实现任意地址写的功能, 不过需要绕过以下限制.

  1. _IO_buf_base 和 _IO_buf_end 分别指向要写的地址的头和尾, 这个不必多说
  2. fp->_flags & _IO_NO_READS == 0 // #define _IO_NO_READS 4 / Reading not allowed /
  3. fp->_IO_read_end ≥ fp->_IO_read_ptr, 满足这个条件就可以通过多次读把这段空间的数据消耗完, 之后的数据就会读进 _IO_buf_base
  4. (_IO_buf_end -_IO_buf_base) ≥ want, 满足这个条件才会调用 __underflow
  5. 还有一些其它限制(_IO_have_backup (fp) 之类的)具体调试的时候具体情况具体对待就好

scanf

对 io file 的操作差不多, 利用方式也一样, 不再赘述

3. fwrite, puts, printf

调用链

  1. fwrite:

    _IO_fwrite (iofwrite.c) → _IO_new_file_xsputn (fileops.c) → → _IO_new_file_overflow (fileops.c) → _IO_do_write → new_do_write (fileops.c) → _IO_SYSWRITE/_IO_new_file_write (fileops.c)

  2. puts:

    puts → _IO_new_file_xsputn (fileops.c) → ...

  3. printf

    // todo

源码

fwrite

省略了一些无关代码

size_t fwrite(void *data, size_t size, size_t nmemb, FILE *f){
    if (_IO_OVERFLOW (f, EOF) == EOF) // _IO_new_file_overflow
        return to_do == 0 ? EOF : n - to_do;
    return n - to_do;
}


int _IO_new_file_overflow (_IO_FILE *f, int ch)
{
    if (f->_flags & _IO_NO_WRITES) /* SET ERROR */
    {
        // MUST AVOID !!! 1
        f->_flags |= _IO_ERR_SEEN;
        __set_errno (EBADF);
        return EOF;
    }
    /* If currently reading or no buffer allocated. */
    if ((f->_flags & _IO_CURRENTLY_PUTTING) == 0 || f->_IO_write_base == NULL){
        // reset buf, MUST AVOID !! 2
    }
    if (ch == EOF)
        return _IO_do_write (f, f->_IO_write_base,
            f->_IO_write_ptr - f->_IO_write_base); // new_do_write
    return (unsigned char) ch;
}


static _IO_size_t new_do_write (_IO_FILE *fp, const char *data, _IO_size_t to_do)
{
    _IO_size_t count;
    if (fp->_IO_read_end != fp->_IO_write_base){
        // MUST AVOID !!! 3
        _IO_off64_t new_pos
            = _IO_SYSSEEK (fp, fp->_IO_write_base - fp->_IO_read_end, 1); 
        // fseek(stdout, x, 1) always return _IO_pos_BAD (-1)
        if (new_pos == _IO_pos_BAD)
            return 0;
        fp->_offset = new_pos;
    }
    count = _IO_SYSWRITE (fp, data, to_do); // TARGET !!!
    // omitted
    return count;
}

与 fread 类似, fwrite 也是优先将缓冲区中的数据写入文件. 写入文件时的缓冲区是由 _IO_write_base 和 _IO_write_ptr 表示的. 所以我们可以通过控制这两个缓冲区指针来实现任意地址读的目的. 同样需要满足以下条件

  1. _IO_write_base 和 _IO_write_ptr 指向想要泄露地址的地方
  2. (f->_flags & _IO_NO_WRITES) == 0 // #define _IO_NO_WRITES 8
  3. ((f->_flags & _IO_CURRENTLY_PUTTING) == 0 || f->_IO_write_base == NULL) == 0 // #define _IO_CURRENTLY_PUTTING 0x800
  4. fp->_IO_read_end != fp->_IO_write_base // 因为 fseek(stdout, x, 1) (我试了好几次) 都会返回 _IO_pos_BAD (-1).

puts

流程和 fread 类似, 利用方式也差不多.

Appendix A : 参考

blog.angelboy.tw

wiki about buffer underflow

Appendix B: 赛题

TokyoWesterns CTF 2017 parrot (修改 io file 达到任意地址写的目的)

ByteCTF 2019 note_five (修改 stdout->_IO_buf_base 来 leak 地址)

上一篇下一篇

猜你喜欢

热点阅读