安全攻防iOS 性能编译原理

13. Hook原理介绍

2017-08-25  本文已影响88人  泰克2008

13.1 Objective-C消息传递(Messaging)

对于C/C++这类静态语言,调用一个方法其实就是跳到内存中的某一点并开始执行一段代码。没有任何动态的特性,因为这在编译时就决定好了。

而在 Objective-C 中,[object foo] 语法并不会立即执行 foo 这个方法的代码。它是在运行时给 object 发送一条叫 foo 的消息。这个消息,也许会由 object 来处理,也许会被转发给另一个对象,或者不予理睬假装没收到这个消息。多条不同的消息也可以对应同一个方法实现。这些都是在程序运行的时候动态决定的。

事实上,在编译时你写的 Objective-C 函数调用的语法都会被翻译成一个 C 的函数调用 objc_msgSend() 。比如,下面两行代码就是等价的:

[people TailName:@"Luz" Age:18];

objc_msgSend(people, @selector(TailName:Age:), "Luz", 18);

在 Objective-C 中,类、对象和方法都是一个C的结构体,从 objc/objc.h 和 objc/runtime.h 头文件中,我们可以找到他们的定义:

  typedef struct objc_class *Class;
  struct objc_object {
      Class isa  OBJC_ISA_AVAILABILITY;
  };
  typedef struct objc_object *id;
  
//Class 是一个 objc_class 结构类型的指针, id是一个 objc_object 结构类型的指针.

struct objc_class {
  Class isa  OBJC_ISA_AVAILABILITY;

  #if !__OBJC2__
      Class super_class                                        
      const char *name                                         
      long version                                             
      long info                                                
      long instance_size                                       
      struct objc_ivar_list *ivars                             
      struct objc_method_list **methodLists                    
      struct objc_cache *cache                                 
      struct objc_protocol_list *protocols                     
  #endif
} OBJC2_UNAVAILABLE;
objective-c.png
struct objc_ivar_list {
    int ivar_count                                          
    /* variable length structure */
    struct objc_ivar ivar_list[1]                           
}       

};


- objc_cache
指向最近使用的方法.用于方法调用的优化  

struct objc_cache {
unsigned int mask /* total = mask + 1 */;
unsigned int occupied;
Method buckets[1];
};


- protocols
    协议的链表

struct objc_protocol_list {
struct objc_protocol_list *next;
long count;
Protocol *list[1];
};


objc_method_list 本质是一个有 objc_method 元素的可变长度的数组。一个 objc_method 结构体中:
- 函数名,也就是SEL
- 表示函数原型的字符串 (见 Type Encoding) 
- 函数的实现IMP

#### 13.2 Method Swizzling示例
  以上面可知方法的名字(SEL)跟方法的实现(IMP,指向 C 函数的指针)一一对应。Swizzle 一个方法其实就是在程序运行时对 objc_method_list 里做点改动,让这个方法的名字(SEL)对应到另个IMP。 
  
   Method Swizzling(方法调配技术),仅针对Objective-C方法有效。Method Swizzling 利用 Runtime 特性把一个方法的实现与另一个方法的实现进行替换。
  //涉及到的主要方法
  class_addMethod
  class_replaceMethod
  method_exchangeImplementations
- 为什么在load里面调用?
   一般情况下,类别里的方法会重写掉主类里相同命名的方法。如果有两个类别实现了相同命名的方法,只有一个方法会被调用。
但 +load是个特例,当一个类被读到内存的时候, runtime 会给这个类及它的每一个类别都发送一个 +load: 消息。(多个类别时需要防止多次执行)

- object_getClass(obj)与[obj class]的区别?
参考资料:http://www.jianshu.com/p/ae5c32708bc6

13.3 Fishhook

fishhook苹果系统下的一种C函数的hook方案,是facebook提供的一个动态修改链接Mach-O符号表的开源工具。Mach-O为Mach Object文件格式的缩写,也是用于iOS可执行文件,目标代码,动态库,内核转储的文件格式。Mach-O有自己的dylib规范(/usr/include/mach-o/loader.h文件里面)。

官网:https://github.com/facebook/fishhook

#import <Foundation/Foundation.h>
#import <dlfcn.h>
#import "fishhook.h"

static int (*orig_close)(int);
static int (*orig_open)(const char *, int, ...);

int my_close(int fd) {
    printf("Calling real close(%d)\n", fd);
    return orig_close(fd);
}

int my_open(const char *path, int oflag, ...) {
    va_list ap = {0};
    mode_t mode = 0;
    
    if ((oflag & O_CREAT) != 0) {
        // mode only applies to O_CREAT
        va_start(ap, oflag);
        mode = va_arg(ap, int);
        va_end(ap);
        printf("Calling real open('%s', %d, %d)\n", path, oflag, mode);
        return orig_open(path, oflag, mode);
    } else {
        printf("Calling real open('%s', %d)\n", path, oflag);
        return orig_open(path, oflag, mode);
    }
}


int main(int argc, const char * argv[]) {
    @autoreleasepool {
        // insert code here...
        //NSLog(@"Hello, World!");
        struct rebinding rbd[2];
        rbd[0].name = "close";
        rbd[0].replacement = my_close;
        rbd[0].replaced = (void*)&orig_close;
        
        rbd[1].name = "open";
        rbd[1].replacement = my_open;
        rbd[1].replaced = (void*)&orig_open;
        
        rebind_symbols(rbd, 2);
        
        // Open our own binary and print out first 4 bytes (which is the same
        // for all Mach-O binaries on a given architecture)
        int fd = open(argv[0], O_RDONLY);
        uint32_t magic_number = 0;
        read(fd, &magic_number, 4);
        printf("Mach-O Magic Number: 0x%x \n", magic_number);
        close(fd);
    }
    return 0;
}

THEOS_DEVICE_IP = 192.168.1.113
DEBUG = 1
ARCHS = armv7 arm64 
TARGET = iphone:latest:8.0  
include $(THEOS)/makefiles/common.mk

TWEAK_NAME = WeChatReProject
WeChatReProject_FILES = Tweak.xm fishhook.c
WeChatReProject_FRAMEWORKS = UIKit Foundation CoreLocation
WeChatReProject_CFLAGS = -fobjc-arc
include $(THEOS_MAKE_PATH)/tweak.mk

after-install::
    install.exec "killall -9 WeChat"

clean::
    rm -rf ./packages/* 

Tweak.xm文件

#import<UIKit/UIKit.h>
#import<CoreLocation/CoreLocation.h>
#import<CoreLocation/CLLocation.h>
#import "fishhook.h"

@interface SeePeopleNearByLogicController
- (void)onRetrieveLocationOK:(id)arg1;
@end

static int (*orig_close)(int);
static int (*orig_open)(const char *, int, ...);

int my_close(int fd) {
    printf("Calling real close(%d)\n", fd);
    return orig_close(fd);
}

int my_open(const char *path, int oflag, ...) {
    va_list ap = {0};
    mode_t mode = 0;
    
    if ((oflag & O_CREAT) != 0) {
        // mode only applies to O_CREAT
        va_start(ap, oflag);
        mode = va_arg(ap, int);
        va_end(ap);
        NSLog(@"Calling real open('%s', %d, %d)", path, oflag, mode);
        return orig_open(path, oflag, mode);
    } else {
        NSLog(@"Calling real open('%s', %d)", path, oflag);
        return orig_open(path, oflag, mode);
    }
}

%hook MicroMessengerAppDelegate
- (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions 
{
    struct rebinding rbd[2];
    rbd[0].name = "close";
    rbd[0].replacement = (void*)my_close;
    rbd[0].replaced = (void**)&orig_close;
    
    rbd[1].name = "open";
    rbd[1].replacement = (void*)my_open;
    rbd[1].replaced = (void**)&orig_open;
    
    rebind_symbols(rbd, 2);
    NSLog(@"begin hook");
    return %orig;
}
%end

%hook SeePeopleNearByLogicController
- (void)onRetrieveLocationOK:(id)arg1
{
    CLLocation *location = [[CLLocation alloc] initWithLatitude:31.154352 longitude:121.42562];
    %orig(location);
    
    UIAlertView *alertView = [[UIAlertView alloc] 
    initWithTitle:[@"onRetrieveLocationOK" 
    stringByAppendingString:[[NSString alloc] 
    initWithFormat:@"location is %@", location]] 
    message:nil 
    delegate:self 
    cancelButtonTitle:@"ok" 
    otherButtonTitles:nil];
    
    [alertView show];

}
%end
 

13.4 Mach-o文件结构

Mach-o包含三个基本区域:

mach-o.png
13.4.1 Mach-o的header
➜ otool -hv WeChat.decrypted
WeChat.decrypted (architecture armv7):
Mach header
      magic cputype cpusubtype  caps    filetype ncmds sizeofcmds      flags
   MH_MAGIC     ARM         V7  0x00     EXECUTE    76       7416   NOUNDEFS DYLDLINK TWOLEVEL WEAK_DEFINES BINDS_TO_WEAK PIE
WeChat.decrypted (architecture arm64):
Mach header
      magic cputype cpusubtype  caps    filetype ncmds sizeofcmds      flags
MH_MAGIC_64   ARM64        ALL  0x00     EXECUTE    76       8168   NOUNDEFS DYLDLINK TWOLEVEL WEAK_DEFINES BINDS_TO_WEAK PIE
➜ hexdump -C WeChatReProject.dylib | more
00000000  ce fa ed fe 0c 00 00 00  09 00 00 00 06 00 00 00  |................|
00000010  15 00 00 00 fc 07 00 00  85 00 10 00 01 00 00 00  |................|
00000020  d0 01 00 00 5f 5f 54 45  58 54 00 00 00 00 00 00  |....__TEXT......|
00000030  00 00 00 00 00 00 00 00  00 80 00 00 00 00 00 00  |................|
00000040  00 80 00 00 05 00 00 00  05 00 00 00 06 00 00 00  |................|

头部的的结构如下:

  struct mach_header {
  uint32_t    magic;      
  cpu_type_t  cputype;    
  cpu_subtype_t   cpusubtype; 
  uint32_t    filetype;   
  uint32_t    ncmds;      
  uint32_t    sizeofcmds; 
  uint32_t    flags;      
  };

  struct mach_header_64 {
      uint32_t    magic;             
      cpu_type_t  cputype;           
      cpu_subtype_t   cpusubtype;    
      uint32_t    filetype;          
      uint32_t    ncmds;             
      uint32_t    sizeofcmds;       
      uint32_t    flags;            
      uint32_t    reserved;          
  };
  ```  
13.4.2 Load Commands - 加载命令

Mach-O文件包含非常详细的加载指令,这些指令非常清晰地指示加载器如何设置并且加载二进制数据。Load Commands信息紧紧跟着二进制文件头后面。加载命令的数目以及总的大小在header中已经给出。

struct load_command {
    uint32_t cmd;        /* type of load command */
    uint32_t cmdsize;    /* total size of command in bytes */
};
MachDemo otool -l a.out
a.out:
Load command 0
      cmd LC_SEGMENT_64
  cmdsize 72
  segname __PAGEZERO
   vmaddr 0x0000000000000000
   vmsize 0x0000000100000000
  fileoff 0
 filesize 0
 ...

segment数据结构:

struct segment_command { /* for 32-bit architectures */
    uint32_t    cmd;        /* LC_SEGMENT */
    uint32_t    cmdsize;    /* includes sizeof section structs */
    char        segname[16];    /* segment name */
    uint32_t    vmaddr;     /* memory address of this segment */
    uint32_t    vmsize;     /* memory size of this segment */
    uint32_t    fileoff;    /* file offset of this segment */
    uint32_t    filesize;   /* amount to map from the file */
    vm_prot_t   maxprot;    /* maximum VM protection */
    vm_prot_t   initprot;   /* initial VM protection */
    uint32_t    nsects;     /* number of sections in segment */
    uint32_t    flags;      /* flags */
};

struct segment_command_64 { /* for 64-bit architectures */
    uint32_t    cmd;        /* LC_SEGMENT_64 */
    uint32_t    cmdsize;    /* includes sizeof section_64 structs */
    char        segname[16];    /* segment name */
    uint64_t    vmaddr;     /* memory address of this segment */
    uint64_t    vmsize;     /* memory size of this segment */
    uint64_t    fileoff;    /* file offset of this segment */
    uint64_t    filesize;   /* amount to map from the file */
    vm_prot_t   maxprot;    /* maximum VM protection */
    vm_prot_t   initprot;   /* initial VM protection */
    uint32_t    nsects;     /* number of sections in segment */
    uint32_t    flags;      /* flags */
};

- cmd
  就是Load commands的类型,这里LC_SEGMENT_64代表将文件中64位的段映射到进程的地址空间。LC_SEGMENT_64和LC_SEGMENT的结构差别不大。

- cmdsize
  代表load command的大小

- segname 
  16字节的段名字

- vmaddr 
  段的虚拟内存起始地址

- vmsize 
  段的虚拟内存大小

- fileoff 
  段在文件中的偏移量

- filesize 
  段在文件中的大小

- maxprot 
   段页面所需要的最高内存保护(1=r,2=w,4=x)

- initprot 
  段页面初始的内存保护

- nsects 
  段中包含section的数量

- flags 
  其他杂项标志位
13.4.3 段(segment)和节(section)

“__TEXT"代表的是Segment,小写的”__text"代表 Section

    struct section { /* for 32-bit architectures */
        char        sectname[16];   /* name of this section */
        char        segname[16];    /* segment this section goes in */
        uint32_t    addr;       /* memory address of this section */
        uint32_t    size;       /* size in bytes of this section */
        uint32_t    offset;     /* file offset of this section */
        uint32_t    align;      /* section alignment (power of 2) */
        uint32_t    reloff;     /* file offset of relocation entries */
        uint32_t    nreloc;     /* number of relocation entries */
        uint32_t    flags;      /* flags (section type and attributes)*/
        uint32_t    reserved1;  /* reserved (for offset or index) */
        uint32_t    reserved2;  /* reserved (for count or sizeof) */
    };

    struct section_64 { /* for 64-bit architectures */
        char        sectname[16];   /* name of this section */
        char        segname[16];    /* segment this section goes in */
        uint64_t    addr;       /* memory address of this section */
        uint64_t    size;       /* size in bytes of this section */
        uint32_t    offset;     /* file offset of this section */
        uint32_t    align;      /* section alignment (power of 2) */
        uint32_t    reloff;     /* file offset of relocation entries */
        uint32_t    nreloc;     /* number of relocation entries */
        uint32_t    flags;      /* flags (section type and attributes)*/
        uint32_t    reserved1;  /* reserved (for offset or index) */
        uint32_t    reserved2;  /* reserved (for count or sizeof) */
        uint32_t    reserved3;  /* reserved */
    };
    
    
    sectname:比如__text、__stubs

    segname :该section所属的segment,比如__TEXT
    
    addr : 该section在内存的起始位置
    
    size: 该section的大小
    
    offset: 该section的文件偏移
    
    align : 字节大小对齐(以多少字节对齐,一般是2的乘幂)
    
    reloff :重定位入口的文件偏移
    
    nreloc: 需要重定位的入口数量
    
    flags:包含section的type和attributes
   
    reserved: 预留的字段
13.4.4 动态库链接信息
13.4.5 动态库链接器运行方式
VA  : 虚拟地址,也就是程序被加载到内存空间中的地址
RVA : 以虚拟地址前边加上个“相对的”,也就是说它还是按虚拟地址来换算,只不过不是从0开始。
RAW :一般称文件偏移,你把一个文件看成一个连续的字节流,OFFSET就是这个字节流中的位置。
callq   0x100000f84 ## symbol stub for: _printf

  汇编代码中0x100000f84,这个地址是 __TEXT段的section __stubs区的地址。即JMP到__stubs(桩区)
13.4.6 可执行文件的加载过程

参考资料:
https://opensource.apple.com/tarballs/dyld/dyld-360.18.tar.gz

https://opensource.apple.com/source/xnu/xnu-2422.1.72/bsd/kern/kern_exec.c

https://developer.apple.com/library/content/documentation/DeveloperTools/Conceptual/MachOTopics/1-Articles/executing_files.html

总结:

 通过对Mach-O文件的分析,我们知道代码段(__TEXT)都是只读区,包含了程序逻辑处理。但是对于动态库的函数调用借助了数据段(__DATA)的_la_symbol_ptr区和_nl_symbol_pt区。用户可以去修改这两个区的数据,因此我们可以利用这个特性去替换相关函数的调用(如:fishhook)。

注:通过DYLD_INSERT_LIBRARIES进行代码注入是dyld提供的功能。
13.4.7 fishhook工作原理
int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel) {
  //在钩子链表头增加新的钩子
  int retval = prepend_rebindings(&_rebindings_head, rebindings, rebindings_nel);
  if (retval < 0) {
    return retval;
  }
  if (!_rebindings_head->next) { //首次调用
    //注册系统回调
    _dyld_register_func_for_add_image(_rebind_symbols_for_image);
  } else {
    uint32_t c = _dyld_image_count();
    for (uint32_t i = 0; i < c; i++) {
      _rebind_symbols_for_image(_dyld_get_image_header(i), _dyld_get_image_vmaddr_slide(i));
    }
  }
  return retval;
}
//回调函数(参数1:mach_header的地址,参数2:slide 随机偏移量)
//由于ASLR的缘故,导致程序实际虚拟内存地址与对应的Mach-o结构中的地址不一致,有一个偏移量
//slide,slide是程序装在时随机生成的随机数。
static void _rebind_symbols_for_image(const struct mach_header *header,
                                      intptr_t slide) {                            
    rebind_symbols_for_image(_rebindings_head, header, slide);
}
static void rebind_symbols_for_image(struct rebindings_entry *rebindings,
                                     const struct mach_header *header,
                                     intptr_t slide) {
  Dl_info info;
  if (dladdr(header, &info) == 0) {
    return;
  }

  segment_command_t *cur_seg_cmd;
  segment_command_t *linkedit_segment = NULL;
  struct symtab_command* symtab_cmd = NULL;
  struct dysymtab_command* dysymtab_cmd = NULL;

  //计算load commands区域的位置(紧跟mach_header之后)
  uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t);
  
  //遍历加载指令区域
  for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
    cur_seg_cmd = (segment_command_t *)cur;
    //LC_SEGMENT指令
    if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {  
      //__LINKEDIT段
      if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
        linkedit_segment = cur_seg_cmd;
      }
    } else if (cur_seg_cmd->cmd == LC_SYMTAB) { //符号表
      symtab_cmd = (struct symtab_command*)cur_seg_cmd;
    } else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {//动态符号表
      dysymtab_cmd = (struct dysymtab_command*)cur_seg_cmd;
    }
  }

  if (!symtab_cmd || !dysymtab_cmd || !linkedit_segment ||
      !dysymtab_cmd->nindirectsyms) {
    return;
  }

  //计算mach-o header在内存空间中的位置
  uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
  
  //计算Symbol Table的位置
  nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
  
  //计算String Table的位置
  char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);

  //计算Dynamic Symbol Table的位置 
  uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);

  //计算load commands区域的位置(紧跟mach_header之后)
  cur = (uintptr_t)header + sizeof(mach_header_t);
  for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {//遍历
    cur_seg_cmd = (segment_command_t *)cur;
    //LC_SEGMENT
    if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
      //数据段(__DATA)
      if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
          strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
        continue;
      }
      for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
        section_t *sect =
          (section_t *)(cur + sizeof(segment_command_t)) + j;
          //__la_symbol_ptr区
        if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
          perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
        }
        //__nl_symbol_ptr区
        if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
          perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
        }
      }
    }
  }
}
static void perform_rebinding_with_section(struct rebindings_entry *rebindings,
                                           section_t *section,
                                           intptr_t slide,
                                           nlist_t *symtab,
                                           char *strtab,
                                           uint32_t *indirect_symtab) {
  //计算(延时/非延时)加载区在indirect symtab表中的位置                                         
  uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;
  
  //计算(延时/非延时)加载区的地址
  void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);
  
  //计算(延时/非延时)加载区的大小,并遍历
  for (uint i = 0; i < section->size / sizeof(void *); i++) {
    uint32_t symtab_index = indirect_symbol_indices[i];
    if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL ||
        symtab_index == (INDIRECT_SYMBOL_LOCAL   | INDIRECT_SYMBOL_ABS)) {
      continue;
    }
    //获取符号在String Table中的偏移
    uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;
    //获取符号名字
    char *symbol_name = strtab + strtab_offset;
    if (strnlen(symbol_name, 2) < 2) {
      continue;
    }
    struct rebindings_entry *cur = rebindings;
    while (cur) {
     //遍历钩子链表,将替换成新实现,保存老实现 
      for (uint j = 0; j < cur->rebindings_nel; j++) {
        if (strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {
          if (cur->rebindings[j].replaced != NULL &&
              indirect_symbol_bindings[i] != cur->rebindings[j].replacement) {
            *(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];
          }
          indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
          goto symbol_loop;
        }
      }
      cur = cur->next;
    }
  symbol_loop:;
  }
}
fishhook.png
上一篇下一篇

猜你喜欢

热点阅读