底层原理：Category

2022-01-15 本文已影响0人飘摇的水草

实例对象的isa指向类对象，当调用对象方法时，通过实例对象的isa找到类对象，最后找到对象方法的实现进行调用，分类的对象方法同样是放在类对象中的。
类对象的isa指向元类，当调用类方法时，通过类对象的isa找到元类，最后找到类方法的实现进行调用，分类的类方法也是放在元类中的。
分类是通过runtime机制动态地将分类的方法合并到类对象、元类对象中。

Category 作用

把类的不同实现方法分开到不同的文件里
声明私有方法
模拟多继承
将 framework 私有方法公开化

Category底层结构

首先，我们先创建一个类：

#import <Foundation/Foundation.h>
@interface People : NSObject

- (void)talk;
@end

#import "People.h"

@implementation People

- (void)talk{

   NSLog(@"%s:can I speak?",__func__);
}
@end

#import "People.h"

@interface People (Speak)

-(void)speak;

@end

#import "People+Speak.h"

@implementation People (Speak)

-(void)speak{

    NSLog(@"%s: I can speak",__func__);
}

@end

#import "People.h"

@interface People (Eat)

-(void)eat;

@end

#import "People+Eat.h"

@implementation People (Eat)

-(void)eat{

    NSLog(@"%s: I can eat food",__func__);
}

@end

#import <Foundation/Foundation.h>
#import "People.h"
#import "People+Speak.h"
#import "People+Eat.h"
extern void _objc_autoreleasePoolPrint(void);
int main(int argc, const char * argv[]) {
    @autoreleasepool {

        People *people = [[People alloc] init];
        [people talk];
        [people speak];
        [people eat];

    }
    return 0;
}

然后我们通过 xcrun -sdk iphoneos clang -arch arm64 -rewrite-objc People+Speak.m 命令来看下 People+Speak.m 的底层结构

#ifndef _REWRITER_typedef_People
#define _REWRITER_typedef_People
typedef struct objc_object People;
typedef struct {} _objc_exc_People;
#endif

struct People_IMPL {
    struct NSObject_IMPL NSObject_IVARS;
};

static void _I_People_Speak_speak(People * self, SEL _cmd) {

    NSLog((NSString *)&__NSConstantStringImpl__var_folders_rn_r6_l2xln77j0bv69j2c_5rg00000gp_T_People_Speak_8c87eb_mi_0,__func__);
}

// @end

struct _prop_t {
    const char *name;
    const char *attributes;
};

struct _protocol_t;

struct _objc_method {
    struct objc_selector * _cmd;
    const char *method_type;
    void  *_imp;
};

struct _protocol_t {
    void * isa;  // NULL
    const char *protocol_name;
    const struct _protocol_list_t * protocol_list; // super protocols
    const struct method_list_t *instance_methods;
    const struct method_list_t *class_methods;
    const struct method_list_t *optionalInstanceMethods;
    const struct method_list_t *optionalClassMethods;
    const struct _prop_list_t * properties;
    const unsigned int size;  // sizeof(struct _protocol_t)
    const unsigned int flags;  // = 0
    const char ** extendedMethodTypes;
};

struct _ivar_t {
    unsigned long int *offset;  // pointer to ivar offset location
    const char *name;
    const char *type;
    unsigned int alignment;
    unsigned int  size;
};

struct _class_ro_t {
    unsigned int flags;
    unsigned int instanceStart;
    unsigned int instanceSize;
    const unsigned char *ivarLayout;
    const char *name;
    const struct _method_list_t *baseMethods;
    const struct _objc_protocol_list *baseProtocols;
    const struct _ivar_list_t *ivars;
    const unsigned char *weakIvarLayout;
    const struct _prop_list_t *properties;
};

struct _class_t {
    struct _class_t *isa;
    struct _class_t *superclass;
    void *cache;
    void *vtable;
    struct _class_ro_t *ro;
};

struct _category_t {
    const char *name;
    struct _class_t *cls;
    const struct _method_list_t *instance_methods;
    const struct _method_list_t *class_methods;
    const struct _protocol_list_t *protocols;
    const struct _prop_list_t *properties;
};
extern "C" __declspec(dllimport) struct objc_cache _objc_empty_cache;
#pragma warning(disable:4273)

static struct /*_method_list_t*/ {
    unsigned int entsize;  // sizeof(struct _objc_method)
    unsigned int method_count;
    struct _objc_method method_list[1];
} _OBJC_$_CATEGORY_INSTANCE_METHODS_People_$_Speak __attribute__ ((used, section ("__DATA,__objc_const"))) = {
    sizeof(_objc_method),
    1,
    {{(struct objc_selector *)"speak", "v16@0:8", (void *)_I_People_Speak_speak}}
};

extern "C" __declspec(dllimport) struct _class_t OBJC_CLASS_$_People;
//根据分类的定义，给_category_t赋值
static struct _category_t _OBJC_$_CATEGORY_People_$_Speak __attribute__ ((used, section ("__DATA,__objc_const"))) =
{
    "People",
    0, // &OBJC_CLASS_$_People,
    (const struct _method_list_t *)&_OBJC_$_CATEGORY_INSTANCE_METHODS_People_$_Speak,
    0,
    0,
    0,
};
static void OBJC_CATEGORY_SETUP_$_People_$_Speak(void ) {
    _OBJC_$_CATEGORY_People_$_Speak.cls = &OBJC_CLASS_$_People;
}
#pragma section(".objc_inithooks$B", long, read, write)
__declspec(allocate(".objc_inithooks$B")) static void *OBJC_CATEGORY_SETUP[] = {
    (void *)&OBJC_CATEGORY_SETUP_$_People_$_Speak,
};
static struct _category_t *L_OBJC_LABEL_CATEGORY_$ [1] __attribute__((used, section ("__DATA, __objc_catlist,regular,no_dead_strip")))= {
    &_OBJC_$_CATEGORY_People_$_Speak,
};
static struct IMAGE_INFO { unsigned version; unsigned flag; } _OBJC_IMAGE_INFO = { 0, 2 };

分类在编译完成后被转化成了下面这个结构体

struct category_t {
    const char *name;  //类名称
    struct _class_t *cls;
    const struct _method_list_t *instance_methods; //对象方法列表
    const struct _method_list_t *class_methods;//类方法列表
    const struct _protocol_list_t *protocols; //协议列表
    const struct _prop_list_t *properties; //属性列表
};

从上面的数据结构可以看出：

分类可以扩展一个类的（对象方法，类方法，协议和属性）但不能增加成员变量。同时，添加的属性不会生成get和set方法的实现，如果需要，则要用运行时动态添加

分类的方法在被编译完后存放到了这个结构体里，然后等程序运行过程中被合并到了类对象里，假如给 People 创建了5个分类，就会产生5个 category_t 对象，这个文件产生的变量名为 _OBJC_$_CATEGORY_People_$_Speak

分类的实现原理

上面几个步骤说到的只是分类的结构，是程序编译的时候就已经生成的。但这个分类跟对应的类关联在一起是在运行时，这个就用到oc的runtime机制了，runtime在程序运行的过程中，会把所有的分类，合并到对应的类或者原类里面去，如果有同名方法，会优先调用分类里面的方法（利用这个功能，我们可以对系统的类做方法交换）
分类的实现的具体步骤：

所有分类的方法会存放在一个二维数组里面，二维数组的每一个数组就是其中一个分类的所有方法，在运行时，会遍历这个数组，然后把所有的方法添加到对应的类里面去，具体添加的步骤如下

根据要添加的方法数组大小加上原来数组的大小，重新分配数组空间，
先把原来类的数组的存储地址向后面移动n个单元，n取决于二维数组的大小，然后把新传进来的数组从大到小的顺序进行遍历，一个一个插入到新分配的数组空间里面去，因为从大到小的顺序进行遍历，也就是数组后面的会排在第一个。二维数组的顺序是编译的先后顺序决定的，所以同一个方法名，后编译的分类的方法比先编译的方法优先执行

把分类里面的方法合并到对应的类中的核心源码：

/**
      * addedLists 所有分类的方法列表
      * addedCount 有多少个分类
      */
    void attachLists(List* const * addedLists, uint32_t addedCount) {
        if (addedCount == 0) return;
 
        if (hasArray()) {
            // many lists -> many lists
            // 拿到旧的数组空间
            uint32_t oldCount = array()->count;
            // 计算新的数组空间
            uint32_t newCount = oldCount + addedCount;
            // 重新分配内存
            setArray((array_t *)realloc(array(), array_t::byteSize(newCount)));
            array()->count = newCount;
           // 把原来的方法列表移动到到数组的后面
            memmove(array()->lists + addedCount, array()->lists, 
                    oldCount * sizeof(array()->lists[0]));
            // 把分类的方法插到前面
            memcpy(array()->lists, addedLists, 
                   addedCount * sizeof(array()->lists[0]));
        }
        else if (!list  &&  addedCount == 1) {
            // 0 lists -> 1 list
            list = addedLists[0];
        } 
        else {
            // 1 list -> many lists
            List* oldList = list;
            uint32_t oldCount = oldList ? 1 : 0;
            uint32_t newCount = oldCount + addedCount;
            setArray((array_t *)malloc(array_t::byteSize(newCount)));
            array()->count = newCount;
            if (oldList) array()->lists[addedCount] = oldList;
            memcpy(array()->lists, addedLists, 
                   addedCount * sizeof(array()->lists[0]));
        }
    }

右上图可以看出为什么分类的方法和原来类的方法同名的情况下，会优先调用分类方法，因为分类方法在数组的前面，最先被找到。

Category的加载处理过程

通过Runtime加载某个类的所有Category数据
把所有Category的方法、属性、协议数据，合并到一个大数组中，后面参与编译的Category数据，会在数组的前面
将合并后的分类数据（方法、属性、协议），插入到类原来数据的前面。

源码解读顺序

下面我们通过源码，来看它每一步都是如何实现的。

objc-os.mm
1. _objc_init
2. map_images
3. map_images_nolock
objc_runtime_new.mm
1. _read_images
2. remethodizeClass
3. attachCategories
4. attachLists
5. realloc、memmove、memcpy

首先我们从runtime初始化函数开始看

步骤1

void _objc_init(void)
{
    static bool initialized = false;
    if (initialized) return;
    initialized = true;

    // fixme defer initialization until an objc-using image is found?
    environ_init();
    tls_init();
    static_init();
    lock_init();
    exception_init();

    _dyld_objc_notify_register(&map_images, load_images, unmap_image);
}

步骤2

接着我们来到 &map_images函数（images这里代表镜像），来到map_images_nolock函数中找到_read_images函数，在_read_images函数中我们找到分类相关代码

void
map_images(unsigned count, const char * const paths[],
           const struct mach_header * const mhdrs[])
{
    rwlock_writer_t lock(runtimeLock);
    return map_images_nolock(count, paths, mhdrs);
}

void 
map_images_nolock(unsigned mhCount, const char * const mhPaths[],
                  const struct mach_header * const mhdrs[])
{
    ******  以上代码省略
    if (hCount > 0) {
        _read_images(hList, hCount, totalClasses, unoptimizedTotalClasses);
    }

    firstTime = NO;
}

void _read_images(header_info **hList, uint32_t hCount, int totalClasses, int unoptimizedTotalClasses)
{

   *****省略******
   // Discover categories. 
    for (EACH_HEADER) {
        category_t **catlist = 
            _getObjc2CategoryList(hi, &count);
        bool hasClassProperties = hi->info()->hasCategoryClassProperties();

        for (i = 0; i < count; i++) {
            category_t *cat = catlist[i];
            Class cls = remapClass(cat->cls);

            if (!cls) {
                // Category's target class is missing (probably weak-linked).
                // Disavow any knowledge of this category.
                catlist[i] = nil;
                if (PrintConnecting) {
                    _objc_inform("CLASS: IGNORING category \?\?\?(%s) %p with "
                                 "missing weak-linked target class", 
                                 cat->name, cat);
                }
                continue;
            }

            // Process this category. 
            // First, register the category with its target class. 
            // Then, rebuild the class's method lists (etc) if 
            // the class is realized. 
            bool classExists = NO;
            if (cat->instanceMethods ||  cat->protocols  
                ||  cat->instanceProperties) 
            {
                addUnattachedCategoryForClass(cat, cls, hi);
                if (cls->isRealized()) {
                    remethodizeClass(cls);
                    classExists = YES;
                }
                if (PrintConnecting) {
                    _objc_inform("CLASS: found category -%s(%s) %s", 
                                 cls->nameForLogging(), cat->name, 
                                 classExists ? "on existing class" : "");
                }
            }

            if (cat->classMethods  ||  cat->protocols  
                ||  (hasClassProperties && cat->_classProperties)) 
            {
                addUnattachedCategoryForClass(cat, cls->ISA(), hi);
                if (cls->ISA()->isRealized()) {
                    remethodizeClass(cls->ISA());
                }
                if (PrintConnecting) {
                    _objc_inform("CLASS: found category +%s(%s)", 
                                 cls->nameForLogging(), cat->name);
                }
            }
        }
    }
}

步骤3

attachCategories函数接收了类对象cls和分类数组cats，如我们一开始写的代码所示，一个类可以有多个分类。之前我们说到分类信息存储在category_t结构体中，那么多个分类则保存在category_list中

static void remethodizeClass(Class cls)
{
    category_list *cats;
    bool isMeta;

    runtimeLock.assertWriting();

    isMeta = cls->isMetaClass();

    // Re-methodizing: check for more categories
    if ((cats = unattachedCategoriesForClass(cls, false/*not realizing*/))) {
        if (PrintConnecting) {
            _objc_inform("CLASS: attaching categories to class '%s' %s", 
                         cls->nameForLogging(), isMeta ? "(meta)" : "");
        }

        attachCategories(cls, cats, true /*flush caches*/);        
        free(cats);
    }
}

步骤4

首先根据方法列表，属性列表，协议列表，malloc分配内存，根据多少个分类以及每一块方法需要多少内存来分配相应的内存地址。
然后从分类数组里面往三个数组里面存放分类数组里面存放的分类方法，属性以及协议放入对应mlist、proplists、protolosts数组中，这三个数组放着所有分类的方法，属性和协议。
之后通过类对象的data()方法，拿到类对象的class_rw_t结构体rw，在class结构中我们介绍过，class_rw_t中存放着类对象的方法，属性和协议等数据，rw结构体通过类对象的data方法获取，所以rw里面存放这类对象里面的数据。
最后分别通过rw调用方法列表、属性列表、协议列表的attachList函数，将所有的分类的方法、属性、协议列表数组传进去，我们可以猜测在attachList方法内部将分类和本类相应的对象方法，属性，和协议进行了合并

static void 
attachCategories(Class cls, category_list *cats, bool flush_caches)
{
    if (!cats) return;
    if (PrintReplacedMethods) printReplacements(cls, cats);

    bool isMeta = cls->isMetaClass();
    //根据每个分类中的方法列表，属性列表，协议列表分配内存
    // fixme rearrange to remove these intermediate allocations
    method_list_t **mlists = (method_list_t **)
        malloc(cats->count * sizeof(*mlists));
    property_list_t **proplists = (property_list_t **)
        malloc(cats->count * sizeof(*proplists));
    protocol_list_t **protolists = (protocol_list_t **)
        malloc(cats->count * sizeof(*protolists));

    // Count backwards through cats to get newest categories first
    int mcount = 0;
    int propcount = 0;
    int protocount = 0;
    int i = cats->count;
    bool fromBundle = NO;
    while (i--) {
        auto& entry = cats->list[i];  //遍历分类数组

        method_list_t *mlist = entry.cat->methodsForMeta(isMeta);
        if (mlist) {
            mlists[mcount++] = mlist; //将所有分类中的所有方法存入mlists [ [method_t,method_t] [method_t,method_t] ...... ]
            fromBundle |= entry.hi->isBundle();
        }

       //所有属性
        property_list_t *proplist = 
            entry.cat->propertiesForMeta(isMeta, entry.hi);
        if (proplist) {
            proplists[propcount++] = proplist;
        }
       //所有协议
        protocol_list_t *protolist = entry.cat->protocols;
        if (protolist) {
            protolists[protocount++] = protolist;
        }
    }
     //取出类对象
    auto rw = cls->data();
    prepareMethodLists(cls, mlists, mcount, NO, fromBundle);
    将所有分类的对象方法，附加到类对象的方法列表
    rw->methods.attachLists(mlists, mcount);
    free(mlists);
    if (flush_caches  &&  mcount > 0) flushCaches(cls);

    rw->properties.attachLists(proplists, propcount);
    free(proplists);

    rw->protocols.attachLists(protolists, protocount);
    free(protolists);
}

步骤5

方法合并

void attachLists(List* const * addedLists, uint32_t addedCount) {
        if (addedCount == 0) return;

        if (hasArray()) {
            // many lists -> many lists
            uint32_t oldCount = array()->count;
            uint32_t newCount = oldCount + addedCount;
            setArray((array_t *)realloc(array(), array_t::byteSize(newCount)));
            array()->count = newCount;
            memmove(array()->lists + addedCount, array()->lists, 
                    oldCount * sizeof(array()->lists[0])); //原数据后移
            memcpy(array()->lists, addedLists, 
                   addedCount * sizeof(array()->lists[0])); //拷贝新数据到空出来的内存
        }
        else if (!list  &&  addedCount == 1) {
            // 0 lists -> 1 list
            list = addedLists[0];
        } 
        else {
            // 1 list -> many lists
            List* oldList = list;
            uint32_t oldCount = oldList ? 1 : 0;
            uint32_t newCount = oldCount + addedCount;
            setArray((array_t *)malloc(array_t::byteSize(newCount)));
            array()->count = newCount;
            if (oldList) array()->lists[addedCount] = oldList;
            memcpy(array()->lists, addedLists, 
                   addedCount * sizeof(array()->lists[0]));
        }
    }

我们可以看到分类的方法属性协议会追加到原来类的方法属性协议列表的前面，这也就是说如果一个类和它的分类有相同的方法，它的分类的方法会先被调用。

到此我们总结下category整个流程：我们每创建一个分类，在编译时都会生成category_t这样一个结构体并将分类的方法列表等信息存入_category_t这个结构体。在编译阶段分类的相关信息和本类的相关信息是分开的。等到运行阶段，会通过runtime加载某个类的所有Category数据，把所有Category的方法、属性、协议数据分别合并到一个数组中，然后再将分类合并后的数据插入到本类的数据的前面。

Category 的加载方式

我们知道，Objective-C 的运行是依赖 Objective-C 的 Runtime 的，而 Objective-C 的 runtime 和其他系统库一样，是 OS X 和 iOS 通过 dyld 动态加载的。

dyld 加载大致流程

dyld(the dynamic link editor) 的相关代码可在苹果开源网站上进行下载：dyld 苹果开源代码
关于 dyld 的详解可移步到 dyld详解

dyld 大致加载步骤：

配置环境变量；
加载共享缓存；
初始化主 APP；
插入动态缓存库；
链接主程序；
链接插入的动态库；
初始化主程序：OC, C++ 全局变量初始化；
返回主程序入口函数。

Category 的加载过程

Runtime 是在在第七步开始初始化的，所以 Category 也在这一步做加载。
我们先看一在主程序初始化时候的调用栈：

dyldbootstrap::start ---> dyld::_main ---> initializeMainExecutable ---> runInitializers ---> recursiveInitialization ---> doInitialization ---> doModInitFunctions ---> _objc_init

最后调用的 _objc_init 是 libobjc 库中的方法，是 Runtime 的初始化过程，也是 Objective-C 的入口。

在 _objc_init 这一步中：Runtime 向 dyld 绑定了回调，当 image 加载到内存后，dyld 会通知 Runtime 进行处理，Runtime 接手后调用 map_images 做解析和处理，调用 _read_images 方法把 Category 的对象方法、协议、属性添加到类上，把 Category（分类）的类方法、协议添加到类的 MetaClass 上；接下来 load_images 中调用 call_load_methods 方法，遍历所有加载进来的 Class，按继承层级和编译顺序依次调用 Class 的 load 方法和其 Category 的 load 方法。

加载 Category 的调用栈：

_objc_init ---> map_images ---> map_images_nolock ---> _read_images（加载分类） ---> load_images

控制编译的顺序在Targets -> Compile Sources里控制，编译顺序依次是从前向后，放在最下面的文件最后编译，然后放在Category的大数组的最前面，当前类和分类中有同名方法的时候，会从大数组中查找，如果在最前面已经找到则不再继续查找，因此最后编译的文件的方法得到执行，可以在Compile Sources里拖拽文件调整编译顺序。
编写的分类在程序编译完毕的时候，被编译成了_category_t的结构体，方法数据都会存放在这个结构体里面，并且每个分类的名称都为_category_t，结构也一样，只是里面的变量不一样，然后在运行时将每个分类的对象方法合并到类对象中，将类方法合并到元类对象中。
类别和扩展的区别：扩展是编译的时候就合并到类里去了，而类别是程序运行过程中运用运行时机制才合并到类里了。

常见面试题

Category的使用场景是什么？
答：将类分解成不同的模块
Category的实现原理？
答：Category的底层结构是struct category_t，里面存储着分类的对象方法、类方法、属性、协议信息，在程序运行的时候，runtime会将Category的数据，合并到类信息中（类对象、元类对象中）
答：通过runtime在程序运行中动态地将分类的方法合并到类对象、元类对象中，如果分类重写了类的方法，最终调用的是分类的方法，因为分类的方法放前面
Category和Extension的区别是什么？
答：Extension是在编译的时候，它的数据就已经包含在类信息中，Extension 一般用来隐藏类的私有信息，你必须有一个类的源码才能为一个类添加 Extension ，所以你无法为系统的类比如 NSString 添加 Extension，而Category是在运行时，才会将数据合并到类信息中。
Category中有load方法吗？load方法是什么时候调用的？load方法能继承吗？
答：有的。
- +load方法会在 runtime加载类、分类时调用。
- load方法可以继承，但是一般情况下不会主动去调用load方法，都是让系统自动调用。
- 每个类、分类的+load方法只在程序运行过程中调用一次。
- 调用顺序
  1. 先调用类的+load
    - 按照编译先后顺序调用（先编译、先调用）
    - 调用子类的+load之前会先调用父类的+load
  2. 等所有类的load方法调用完了，再调用分类的+load，这个顺序无法通过设置xcode里的文件编译顺序改变，没有继承关系的类之间和一个类的多个分类可以通过拖拽改变顺序
    - 按照编译先后顺序调用（先编译、先调用）
  3. 即使类没有用到，也会加载类的load方法，类和分类的方法都被合并到了类信息中，即使方法名一样
load、initialize方法的区别是什么？它们在category中的调用的顺序？以及出现继承时它们之间的调用过程？

调用方式
- load是根据函数地址直接调用
- initialize是通过objc_msgSend调用
调用时刻
- load是runtime加载类,加载分类的时候调用(只会调用一次)
- initialize是类第一次接收到消息的时候调用,每一个类只会initialize一次(父类的initialize方法可能会被调用多次.取决于子类是是否有重写initialize,如果没有就会通过superclass去父类里面找)
load和initialize的调用顺序
- load
  - 先调用类的load
  - 先编译的类优先调用load
  - 调用子类的load之前,会先调用父类的load
  - 在调用分类的load
  - 先编译的分类优先调用load
- initialize
  - 先初始化父类
  - 再初始化子类(可能最终调用的是父类的initialize方法)

category能否添加成员变量？如果可以，如何给Category添加成员变量？
能否向编译后得到的类中增加实例变量？
答：不能
分析：因为编译后的类已经注册在 runtime 中，类结构体中的 objec_ivar_list 实例变量的链表和 instance_size 实例变量的内存大小已经确定，同时 runtime 会调用 class_setIvarLayout 或 class_setWeakIvarLayout 来处理 strong ，weak 引用，所以不能向存在的类中增加实例变量。
能否向运行时创建的类中增加实例变量？
可以
分析：运行时创建的类是可以添加实例变量，调用 class_addIvar 函数，但是得在调用 objc_allocateClassPair 之后，objc_registerClassPair 之前，原因如上。