iOS runtime 部分二

2020-07-13 本文已影响0人飞不越疯人院

主要探究方法的底层结构以及方法缓存

文中使用的 objc4源码是objc-781版本;

1. 方法(Method)的底层结构

方法的底层封装为 method_t;通过 objc-781源码可以得知method_t的底层结构如下:

struct method_t {
    SEL name;
    const char *types;
    MethodListIMP imp;
    struct SortBySELAddress :
        public std::binary_function<const method_t&,
                                    const method_t&, bool>
    {
        bool operator() (const method_t& lhs,
                         const method_t& rhs)
        { return lhs.name < rhs.name; }
    };
};


==>
///MethodListIMP是什么
#if !__arm64__
#error ptrauth other than arm64e is unimplemented
#endif
// Method lists use process-independent signature for compatibility.
using MethodListIMP = IMP __ptrauth_objc_method_list_imp;
#else
/// MethodListIMP就是 IMP, 是 IMP 的别称;
using MethodListIMP = IMP;
#endif
// _OBJC_PTRAUTH_H_
#endif

SEL name : 方法/函数名字, 也叫选择器;
1. 可以通过@selector()或者sel_registerName()获得(方法中的隐藏参数_cmd也是指向SEL);
2. 可以通过sel_getName()或者NSStringFromSelector()获取字符串;
const char *types : 编码(返回值类型, 参数类型)
MethodListIMP imp : 指向函数的指针(函数的实现)

/// A pointer to the function of a method implementation. 
#if !OBJC_OLD_DISPATCH_PROTOTYPES
typedef void (*IMP)(void /* id, SEL, ... */ ); 
#else
typedef id _Nullable (*IMP)(id _Nonnull, SEL _Nonnull, ...); 
#endif

2. 方法的缓存

我们知道任何方法的调用,底层都会编译成objc_msgSend()的方式, 例如[child HumanTest];方法调用底层的实现为((void (*)(id, SEL))(void *)objc_msgSend)((id)child, sel_registerName("HumanTest"));调用方法的代码执行完下一步就是查找方法进行调用,通过这篇文章可以得知方法的查找流程; 但是为什么需要有方法缓存的设计呢, 如果最终查找到的是基类的方法进行调用, 而self 所在类到基类隔着好多层次的继承关系, 那么每次都进行遍历查找就显得效率低下;

HumanTest是Human中的方法, 继承关系为NSObject->Human->Man->Boy->Child;

因此在类对象和元类对象中都会有方法缓存的操作;

下面以调用实例方法在类对象中的缓存为例讲解方法缓存的存取过程, 类方法在元类对象中的缓存类似;

struct objc_class : objc_object {
    // Class ISA;
    Class superclass;
    ///方法的缓存
    cache_t cache;             // formerly cache pointer and vtable
    class_data_bits_t bits;    // cla
...
}
===>
struct cache_t {
...
public:
    static bucket_t *emptyBuckets();
    ///散列表存储
    struct bucket_t *buckets();
   ///散列表存储长度-1(因为下标从0开始索引, 所以&mask后最大值也只能是[长度-1])
    mask_t mask();
    ///已经缓存的方法数量
    mask_t occupied();
    void incrementOccupied();
    void setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask);
    void initializeToEmpty();

    unsigned capacity();
    bool isConstantEmptyCache();
    bool canBeFreed();
...
}
===>
///每一个方法都被封装成一个 bucket_t, 然后通过散列放入散列表bucket_t s 中
struct bucket_t {
    ...
    ///将 SEL和 IMP 进行一系列的操作封装到 bucket_t 中;
    void set(SEL newSel, IMP newImp, Class cls);
};

查看标准的IMP查找方法中实现


/***********************************************************************
* lookUpImpOrForward.
* The standard IMP lookup. ///标准的 IMP 查找方法
* Without LOOKUP_INITIALIZE: tries to avoid +initialize (but sometimes fails)
....
**********************************************************************/
IMP lookUpImpOrForward(id inst, SEL sel, Class cls, int behavior)
{
    const IMP forward_imp = (IMP)_objc_msgForward_impcache;
    IMP imp = nil;
    Class curClass;

    runtimeLock.assertUnlocked();

    // Optimistic cache lookup
    if (fastpath(behavior & LOOKUP_CACHE)) {
      ///查找方法缓存
        imp = cache_getImp(cls, sel);
        if (imp) goto done_nolock;
    }
...
...
     }
        if (fastpath(imp)) {
            // Found the method in a superclass. Cache it in this class.
            //从父类中找到这个方法, 也会缓存到当前类中
            goto done;
        }
    }

    // No implementation found. Try method resolver once.
    if (slowpath(behavior & LOOKUP_RESOLVER)) {
        behavior ^= LOOKUP_RESOLVER;
        return resolveMethod_locked(inst, sel, cls, behavior);
    }

 done:
    ///调用方法缓存
    log_and_fill_cache(cls, imp, sel, inst, curClass);
    runtimeLock.unlock();
 done_nolock:
    if (slowpath((behavior & LOOKUP_NIL) && imp == forward_imp)) {
        return nil;
    }
    return imp;
}
===>
static void
log_and_fill_cache(Class cls, IMP imp, SEL sel, id receiver, Class implementer)
{
#if SUPPORT_MESSAGE_LOGGING
    if (slowpath(objcMsgLogEnabled && implementer)) {
        bool cacheIt = logMessageSend(implementer->isMetaClass(), 
                                      cls->nameForLogging(),
                                      implementer->nameForLogging(), 
                                      sel);
        if (!cacheIt) return;
    }
#endif
  ///将查找到的方法进行缓存
    cache_fill(cls, sel, imp, receiver);
}

另一个方法查找的实现逻辑

/***********************************************************************
* lookupMethodInClassAndLoadCache.
* Like lookUpImpOrForward, but does not search superclasses.
* Caches and returns objc_msgForward if the method is not found in the class.
**********************************************************************/
IMP lookupMethodInClassAndLoadCache(Class cls, SEL sel)
{
    Method meth;
    IMP imp;

    // fixme this is incomplete - no resolver, +initialize - 
    // but it's only used for .cxx_construct/destruct so we don't care
    ASSERT(sel == SEL_cxx_construct  ||  sel == SEL_cxx_destruct);

    // Search cache first.
    ///搜索方法缓存. 如果方法缓存中有则直接返回
    imp = cache_getImp(cls, sel);
    if (imp) return imp;

    // Cache miss. Search method list.
    ///方法缓存中找不到, 才会开始遍历方法列表
    mutex_locker_t lock(runtimeLock);
    meth = getMethodNoSuper_nolock(cls, sel);
    ///走到这里说明方法缓存中没有, 则要添加缓存
    if (meth) {
        // Hit in method list. Cache it.
         ///添加方法缓存
        cache_fill(cls, sel, meth->imp, nil);
        return meth->imp;
    } else {
        // Miss in method list. Cache objc_msgForward.
         ///添加方法缓存
        cache_fill(cls, sel, _objc_msgForward_impcache, nil);
        return _objc_msgForward_impcache;
    }
}

可以得知不论哪个IMP查找方法都会调用方法缓存, 并且会缓存没有缓存的方法(通过cache_fill()函数);

2.1下面看下缓存的过程

///首先入口是cache_fill()函数;
void cache_fill(Class cls, SEL sel, IMP imp, id receiver)
{
    runtimeLock.assertLocked();
#if !DEBUG_TASK_THREADS
    // Never cache before +initialize is done
    ///确保在调用缓存时候+initialize已经完成
    if (cls->isInitialized()) {
        cache_t *cache = getCache(cls);
#if CONFIG_USE_CACHE_LOCK
        mutex_locker_t lock(cacheUpdateLock);
#endif
        ///调用cache->insert()
        cache->insert(cls, sel, imp, receiver);
    }
#else
    _collecting_in_critical();
#endif
}
===>
///初始化buckets 的长度, 一定是2的幂
/* Initial cache bucket count. INIT_CACHE_SIZE must be a power of two. */
enum {
    INIT_CACHE_SIZE_LOG2 = 2, 
    INIT_CACHE_SIZE      = (1 << INIT_CACHE_SIZE_LOG2),
    MAX_CACHE_SIZE_LOG2  = 16,
    MAX_CACHE_SIZE       = (1 << MAX_CACHE_SIZE_LOG2), ///最多缓存的方法2的16次方个;
};

///cache->insert()方法的实现
ALWAYS_INLINE
void cache_t::insert(Class cls, SEL sel, IMP imp, id receiver)
{
#if CONFIG_USE_CACHE_LOCK
    cacheUpdateLock.assertLocked();
#else
    runtimeLock.assertLocked();
#endif
    ///首先确保 class 已经调用isInitialized()方法
    ASSERT(sel != 0 && cls->isInitialized());

    // Use the cache as-is if it is less than 3/4 full--如果缓存使用空间不到3/4就一直使用, 3/4是一个临界点
    /*
      需要新的空间(散列表的长度) =  已经占用的长度+1, 
      例如原先的散列表长度是4, occupied() = 2表示已经占用两个,
      insert()新的方法后newOccupied = 3;
    */
    mask_t newOccupied = occupied() + 1;
    ///老的散列表占用空间值
    unsigned oldCapacity = capacity(), capacity = oldCapacity;
  ///如果还没有方法缓存, 则创建
    if (slowpath(isConstantEmptyCache())) {
        // Cache is read-only. Replace it.
        ///默认初始化缓存的长度为4个
        if (!capacity) capacity = INIT_CACHE_SIZE;
        reallocate(oldCapacity, capacity, /* freeOld */false);
    }
    ///散列表的空间占用以3/4为界限, 到达临界点需要重新分配空间进行散列缓存
    else if (fastpath(newOccupied + CACHE_END_MARKER <= capacity / 4 * 3)) {
        // Cache is less than 3/4 full. Use it as-is.
    }
    else {
        ///散列表的空间占用以到达临界点3/4, 则重新分配空间长度*2
        capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;
      //如果需要缓存的长度>=最大值MAX_CACHE_SIZE(65536), 则取值MAX_CACHE_SIZE(65536);
        if (capacity > MAX_CACHE_SIZE) {
            capacity = MAX_CACHE_SIZE;
        }
      /*
       重新分配空间, 注意最后的入参为 true, 因此可以得知, 每次缓存到达临界点时会重新分配
      空间(长度*2); 然后将之前的缓存全部清空;
      */
        reallocate(oldCapacity, capacity, true);
    }
    ///散列表
    bucket_t *b = buckets();
    ///掩码mask = 散列表长度-1
    mask_t m = capacity - 1;
    /*
      通过SEL&掩码得到一个值, 这个值就是这个bucket在散列表中存储的地址;
    */
    mask_t begin = cache_hash(sel, m);
    ///以这个地址开始查询, 是否可以放在这个位置
    mask_t i = begin;

    // Scan for the first unused slot and insert there.
    // There is guaranteed to be an empty slot because the
    // minimum size is 4 and we resized at 3/4 full.
    do {  
         ///从这个散列表中取出这个位置的数据, 如果没有数据则将这个bucket插入这个位置
        if (fastpath(b[i].sel() == 0)) {
            ///同时将记录已占用的Occupied()变量+1;
            incrementOccupied();
             ///以sel为key, 将imp封装在bucket中然后放入散列表
            b[i].set<Atomic, Encoded>(sel, imp, cls);
            return;
        }
        ///如果这个位置已经被其他线程将此bucket存储在这里了. 则跳过;
        if (b[i].sel() == sel) {
            // The entry was added to the cache by some other thread
            // before we grabbed the cacheUpdateLock.
            return;
        }
    /*
    如果if(fastpath(b[i].sel() == 0))条件不成立, 则重新生成i, 就是cache_hash(sel, m)直接减一, 
    如果仍然不满足继续减, 遍历到零则取掩码mask(散列表长度-1); 因为每个散列表的空间最多使用3/4;
    所以一定可以找到可用空间;
    */
    } while (fastpath((i = cache_next(i, m)) != begin));
    ///容错处理, 不合法的缓存处理;
    cache_t::bad_cache(receiver, (SEL)sel, cls);
}

===============================================================
///还没有方法缓存的判断条件
bool cache_t::isConstantEmptyCache()
{
    return 
        occupied() == 0  &&  
        buckets() == emptyBucketsForCapacity(capacity(), false);
}

///重新分配散列表内存空间的方法
ALWAYS_INLINE
void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld)
{
    bucket_t *oldBuckets = buckets();
    bucket_t *newBuckets = allocateBuckets(newCapacity);

    // Cache's old contents are not propagated. 
    // This is thought to save cache memory at the cost of extra cache fills.
    // fixme re-measure this
    ASSERT(newCapacity > 0);
    ASSERT((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);
    ///创建新的散列表和掩码mask
    setBucketsAndMask(newBuckets, newCapacity - 1);
   ///将老的散列表空间释放
    if (freeOld) {
        cache_collect_free(oldBuckets, oldCapacity);
    }
}
// Class points to cache. SEL is key. Cache buckets store SEL+IMP.
// Caches are never built in the dyld shared cache.
///通过sel&掩码mask得到存放在散列表中的位置;
static inline mask_t cache_hash(SEL sel, mask_t mask) 
{
    return (mask_t)(uintptr_t)sel & mask;
}
===============================================================

方法缓存过程总结:

创建一个长度为4的散列表buckets, 掩码mask值为3;(因为从0开始索引, 所以3就是最后一个位置)

通过i = sel&mask得到一个值;以sel为key将IMP封装一个bucket;然后将这个bucket存放在i这个位置; (因为mask值为3, 所以i的值不会超过3)

如果散列表中i已经被占用则i - 1查询到0处后查询最后一个位置, 直到可以存放;(方案是散列表的开放寻址法)

如果容量到达3/4临界点, 则重新创建散列表空间是原来2倍, 重新获取掩码mask, 将之前散列表内容清空, 开始计算然后存放;

在arm64构架iPhone上散列表的最大空间是1<<16(65536)个, 所以理论上一个类的方法缓存的上限是(65536 * 3/4) - 1 个, 超过这个值则会清空重新缓存;

散列表中永远都会有很多空余的空间, 这个操作是空间换时间的典型案例;

2.2方法缓存的获取过程

有上面可以得知获取方法缓存的方法是cache_getImp(), 不过在objc4-781版本中这个方法实现已经变成了汇编, 能力有限无力读懂整个流程,不过从官方的注释仍然可以看出提取过程做的 SEL&MASK操作;
objc-msg-arm64.s中没有cache_getImp实现过程, 但是objc-msg-arm.s中有类似相关代码

/********************************************************************
 * IMP cache_getImp(Class cls, SEL sel)
 *
 * On entry:    r0 = class whose cache is to be searched
 *              r1 = selector to search for
 *
 * If found, returns method implementation.
 * If not found, returns NULL.
  如果缓存中找到 IMP 则返回, 否则返回 NULL
 ********************************************************************/
    STATIC_ENTRY _cache_getImp
    mov r9, r0
    CacheLookup NORMAL, _cache_getImp
    // cache hit, IMP in r12
    mov r0, r12
    bx  lr          // return imp
    CacheLookup2 GETIMP, _cache_getImp
    // cache miss, return nil
    mov r0, #0
    bx  lr
    END_ENTRY _cache_getImp

===>
///缓存查找
.macro CacheLookup
LLookupStart$1:

    ldrh    r12, [r9, #CACHE_MASK]  // r12 = mask
    ldr r9, [r9, #CACHE]    // r9 = buckets
.if $0 == STRET
    ///能力有限, 无法读懂汇编的过程, 但是从SEL & mask可以大致看到有做类似的操作来取 index;
    and r12, r12, r2        // r12 = index = SEL & mask
.else
    and r12, r12, r1        // r12 = index = SEL & mask
.endif
    add r9, r9, r12, LSL #3 // r9 = bucket = buckets+index*8
    ldr r12, [r9, #CACHED_SEL]  // r12 = bucket->sel
6:
.if $0 == STRET
    teq r12, r2
.else
    teq r12, r1
.endif
    bne 8f
    ldr r12, [r9, #CACHED_IMP]  // r12 = bucket->imp
.if $0 == STRET
    tst r12, r12        // set ne for stret forwarding
.else
    // eq already set for nonstret forwarding by `teq` above
.endif
.endmacro

参考文章和下载链接
Apple 一些源码的下载地址
 方法的查找顺序
 什么是散列表
 LP64 结构数据占据多少位
 LP64什么意思