iOS 消息发送、查询、缓存实现原理

2021-02-18 本文已影响0人野码道人

本文会阐述下面几个问题
1、SEL 和 IMP是什么
2、方法调用过程概述
3、消息发送的底层实现
4、方法快速查询与慢速查询
5、慢速方法调用过程详解
6、方法缓存

查看源码（源码版本objc4-781.2）

源码地址：https://opensource.apple.com/source/objc4/

SEL 和 IMP

SEL.jpg

如下，OBJC_OLD_DISPATCH_PROTOTYPES这个宏的值是0，可见IMP是一个函数指针，SEL是一个objc_selector类型的结构体指针，apple给的注释，一个不透明的类型代表方法选择器，是的~源码中并没有相关定义

/// An opaque type that represents a method selector.
typedef struct objc_selector *SEL;

/// A pointer to the function of a method implementation. 
#if !OBJC_OLD_DISPATCH_PROTOTYPES
typedef void (*IMP)(void /* id, SEL, ... */ ); 
#else
typedef id _Nullable (*IMP)(id _Nonnull, SEL _Nonnull, ...); 
#endif

方法调用过程概述

对象调用方法，编译器会将其转成消息发送objc_msgSend，流程如下：

1、首先，通过对象的isa指针找到class对象
2、在class的cache中查找方法，如果没有，在method list中查找
3、如果class中没有找到，在继承链中循环查找
4、一旦找到函数imp，就去将其返回并执行

函数调用的底层实现是objc_msgSend，苹果称之为消息快速查找，定义如下：

快速查找

OBJC_EXPORT id _Nullable
objc_msgSend(id _Nullable self, SEL _Nonnull op, ...)

在源码中搜索objc_msgSend

objc-msg.jpg

你会发现不同架构的不同实现文件，嗯~汇编实现的，以arm64为例

    ENTRY _objc_msgSend
    UNWIND _objc_msgSend, NoFrame

    cmp p0, #0          // nil check and tagged pointer check
#if SUPPORT_TAGGED_POINTERS
    b.le    LNilOrTagged        //  (MSB tagged pointer looks negative)
#else
    b.eq    LReturnZero
#endif
    ldr p13, [x0]       // p13 = isa
    GetClassFromIsa_p16 p13     // p16 = class
LGetIsaDone:
    // calls imp or objc_msgSend_uncached
    CacheLookup NORMAL, _objc_msgSend

如上：

1、nil与tagged pointer 判定
2、isa的地址赋值到p13
3、GetClassFromIsa_p16从p13取出class
4、CacheLookup NORMAL, _objc_msgSend从缓存中找到imp否则走objc_msgSend_uncached（慢速路径）

慢速查找

慢速查找路径，objc_msgSend_uncached-> MethodTableLookup-> _lookUpImpOrForward

.macro MethodTableLookup
    
    // push frame
    SignLR
    stp fp, lr, [sp, #-16]!
    mov fp, sp

    // save parameter registers: x0..x8, q0..q7
    sub sp, sp, #(10*8 + 8*16)
    stp q0, q1, [sp, #(0*16)]
    stp q2, q3, [sp, #(2*16)]
    stp q4, q5, [sp, #(4*16)]
    stp q6, q7, [sp, #(6*16)]
    stp x0, x1, [sp, #(8*16+0*8)]
    stp x2, x3, [sp, #(8*16+2*8)]
    stp x4, x5, [sp, #(8*16+4*8)]
    stp x6, x7, [sp, #(8*16+6*8)]
    str x8,     [sp, #(8*16+8*8)]

    // lookUpImpOrForward(obj, sel, cls, LOOKUP_INITIALIZE | LOOKUP_RESOLVER)
    // receiver and selector already in x0 and x1
    mov x2, x16
    mov x3, #3
    bl  _lookUpImpOrForward

    // IMP in x0
    mov x17, x0
    
    // restore registers and return
    ldp q0, q1, [sp, #(0*16)]
    ldp q2, q3, [sp, #(2*16)]
    ldp q4, q5, [sp, #(4*16)]
    ldp q6, q7, [sp, #(6*16)]
    ldp x0, x1, [sp, #(8*16+0*8)]
    ldp x2, x3, [sp, #(8*16+2*8)]
    ldp x4, x5, [sp, #(8*16+4*8)]
    ldp x6, x7, [sp, #(8*16+6*8)]
    ldr x8,     [sp, #(8*16+8*8)]

    mov sp, fp
    ldp fp, lr, [sp], #16
    AuthenticateLR

.endmacro

    STATIC_ENTRY __objc_msgSend_uncached
    UNWIND __objc_msgSend_uncached, FrameWithNoSaves

    // THIS IS NOT A CALLABLE C FUNCTION
    // Out-of-band p16 is the class to search
    
    MethodTableLookup
    TailCallFunctionPointer x17

    END_ENTRY __objc_msgSend_uncached


    STATIC_ENTRY __objc_msgLookup_uncached

如上慢速查找会调用到_lookUpImpOrForward，在源码中查看

慢速方法调用过程详解

定义在objc-runtime-new.mm中，源码有些长，不过还是建议读者耐心看完这个函数，这是方法慢速查找过程的核心函数，如下在关键节点添加了注释，我们来逐行的阅读一下

IMP lookUpImpOrForward(id inst, SEL sel, Class cls, int behavior)
{
    const IMP forward_imp = (IMP)_objc_msgForward_impcache;
    IMP imp = nil;
    Class curClass;

    runtimeLock.assertUnlocked();

    // 从当前类缓存查询
    if (fastpath(behavior & LOOKUP_CACHE)) {
        imp = cache_getImp(cls, sel);
        if (imp) goto done_nolock;
    }

    // 避免多线程添加的方法，老的地址覆盖新的地址
    runtimeLock.lock();

    // 为了使CFI攻击更加困难，下面一行代码将检测类是否内置在二进制文件中或通过以下方式合法注册objc_duplicateClass，objc_initializeClassPair或objc_allocateClassPair。
    // TODO: 在流程启动期间，此检查的成本很高
    // 苹果写了个TODO哈哈，为了安全性还是牺牲了一些性能，估计后续会优化
    checkIsKnownClass(cls);

    if (slowpath(!cls->isRealized())) {
        cls = realizeClassMaybeSwiftAndLeaveLocked(cls, runtimeLock);
    }

    // 当前操作是初始化行为并且当前类没有被初始化
    if (slowpath((behavior & LOOKUP_INITIALIZE) && !cls->isInitialized())) {
        cls = initializeAndLeaveLocked(cls, inst, runtimeLock);
        // +initialize方法会被调用
    }

    runtimeLock.assertLocked();
    curClass = cls;

    for (unsigned attempts = unreasonableClassCount();;) {
        // 当前类方法列表
        Method meth = getMethodNoSuper_nolock(curClass, sel);
        if (meth) {
            imp = meth->imp;
            // 缓存方法到当前类的cache
            goto done;
        }

        if (slowpath((curClass = curClass->superclass) == nil)) {
            // 没有找到实现，并且动态解析也没有拦截.
            // 则使用消息转发
            imp = forward_imp;
            break;
        }

        // 如果继承链中存在递归调用环，抛出异常
        if (slowpath(--attempts == 0)) {
            _objc_fatal("Memory corruption in class list.");
        }

        // 从继承链的当前节点的缓存中查找
        imp = cache_getImp(curClass, sel);
        if (slowpath(imp == forward_imp)) {
            // 停止搜索，但不缓存； 首次执行此类的方法动态解析。
            break;
        }
        if (fastpath(imp)) {
            // 在继承链里面找到方法，缓存方法到继承链的当前节点的cache
            goto done;
        }
    }

    // 没有找到方法，执行一次动态解析
    if (slowpath(behavior & LOOKUP_RESOLVER)) {
        behavior ^= LOOKUP_RESOLVER;
        return resolveMethod_locked(inst, sel, cls, behavior);
    }

 done:
    log_and_fill_cache(cls, imp, sel, inst, curClass);
    runtimeLock.unlock();
 done_nolock:
    if (slowpath((behavior & LOOKUP_NIL) && imp == forward_imp)) {
        return nil;
    }
    return imp;
}

好吧~如果你没有读下来，如上流程大概是这样

1、从当前类的缓存中查询，查到则返回imp
2、检查当前类的合法性checkIsKnownClass，如不合法则抛出异常
3、如果当前操作是初始化行为并且当前类没有被初始化，调用initializeAndLeaveLocked，此时类的+initialize方法会被调用
4、执行一个循环在继承链里面查找，查找当前类的方法列表，有则缓存到当前类的cache，直接返回imp，否则会把当前类的父类赋值给自己，如果自己为空则已经执行过一次动态解析，调用方法转发，否则检测继承链中是否存在递归调用环，有则抛出异常，在自己的cache里面查找，找到的如果不是forward_imp则返回imp，否则停止搜索，不缓存，首次执行此类的方法动态解析；在自己的方法列表里面查询到则缓存的自己的cache

详细探究下：fastpath ，slowpath，log_and_fill_cache

fastpath 和 slowpath

快速路径，慢速路径，定义如下

#define fastpath(x) (__builtin_expect(bool(x), 1))
#define slowpath(x) (__builtin_expect(bool(x), 0))

__builtin_expect这个是unix系统提供的宏，会在汇编层优化我们代码，减少跳转的次数，标识x很可能等于指定的值，即fastpath(x) 中x很可能为真，slowpath(x)中x很可能为假

方法缓存

log_and_fill_cache方法缓存，源码如下

static void
log_and_fill_cache(Class cls, IMP imp, SEL sel, id receiver, Class implementer)
{
#if SUPPORT_MESSAGE_LOGGING
    if (slowpath(objcMsgLogEnabled && implementer)) {
        bool cacheIt = logMessageSend(implementer->isMetaClass(), 
                                      cls->nameForLogging(),
                                      implementer->nameForLogging(), 
                                      sel);
        if (!cacheIt) return;
    }
#endif
    cache_fill(cls, sel, imp, receiver);
}

#if !TARGET_OS_OSX
#   define SUPPORT_MESSAGE_LOGGING 0
#else
#   define SUPPORT_MESSAGE_LOGGING 1
#endif

如上SUPPORT_MESSAGE_LOGGING这个宏在非os架构为0，所以简化下代码

static void
log_and_fill_cache(Class cls, IMP imp, SEL sel, id receiver, Class implementer)
{
    cache_fill(cls, sel, imp, receiver);
}

void cache_fill(Class cls, SEL sel, IMP imp, id receiver)
{
    runtimeLock.assertLocked();
    // Never cache before +initialize is done
    if (cls->isInitialized()) {
        cache_t *cache = getCache(cls);
        cache->insert(cls, sel, imp, receiver);
    }
}

void cache_t::insert(Class cls, SEL sel, IMP imp, id receiver)
{
#if CONFIG_USE_CACHE_LOCK
    cacheUpdateLock.assertLocked();
#else
    runtimeLock.assertLocked();
#endif

    ASSERT(sel != 0 && cls->isInitialized());

    // Use the cache as-is if it is less than 3/4 full
    mask_t newOccupied = occupied() + 1;
    unsigned oldCapacity = capacity(), capacity = oldCapacity;
    if (slowpath(isConstantEmptyCache())) {
        // Cache is read-only. Replace it.
        if (!capacity) capacity = INIT_CACHE_SIZE;
        reallocate(oldCapacity, capacity, /* freeOld */false);
    }
    else if (fastpath(newOccupied + CACHE_END_MARKER <= capacity / 4 * 3)) {
        // Cache is less than 3/4 full. Use it as-is.
    }
    else {
        capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;
        if (capacity > MAX_CACHE_SIZE) {
            capacity = MAX_CACHE_SIZE;
        }
        reallocate(oldCapacity, capacity, true);
    }

    bucket_t *b = buckets();
    mask_t m = capacity - 1;
    mask_t begin = cache_hash(sel, m);
    mask_t i = begin;

    // Scan for the first unused slot and insert there.
    // There is guaranteed to be an empty slot because the
    // minimum size is 4 and we resized at 3/4 full.
    do {
        if (fastpath(b[i].sel() == 0)) {
            incrementOccupied();
            b[i].set<Atomic, Encoded>(sel, imp, cls);
            return;
        }
        if (b[i].sel() == sel) {
            // The entry was added to the cache by some other thread
            // before we grabbed the cacheUpdateLock.
            return;
        }
    } while (fastpath((i = cache_next(i, m)) != begin));

    cache_t::bad_cache(receiver, (SEL)sel, cls);
}

1、对象在调用方法的时候，即会调用objc_msgSend方法，寻找方法的imp.
2、首次查找到方法后会调用log_and_fill_cache
3、然后会调用到cache_fill进行架构判断，非os架构会调用到cache_t::insert方法
4、在cache_t::insert方法中，如果没有缓存方法，缓存这个方法，首先判断缓存空间是否已经开辟，如果没有开辟，调用reallocate方法进行开辟并设置初始值。如果缓存方法的时候，缓存空间已经开辟了，继续判断算上当前这个方法的总缓存数量是否超过了总缓存空间容量的3/4，如果没超过，只执行缓存方法，如果超过了，需要再次调用reallocate方法扩容。扩容方法里面则会重新开辟一个大小为原空间2倍的新缓存空间，并且释放掉原缓存空间。最后进行当前方法的缓存，找到对应缓存位置的bucket进行赋值。
5、最后调用cache_t::bad_cache校验cache内存的合法性

最后

由于篇幅原因，消息动态解析与转发放在下一篇讨论~