objc_class cache分析

2020-12-29  本文已影响0人  丸疯

objc_class结构

struct objc_class : objc_object {
    // Class ISA;
    Class superclass;
    cache_t cache;             // formerly cache pointer and vtable
    class_data_bits_t bits;    // class_rw_t * plus custom rr/alloc flags
    // 此处省略各种方法
}

笔者已有两篇blog介绍ISAbits,有兴趣的同学可以看一下。

cache

struct cache_t {
#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_OUTLINED//macOS、模拟器 -- 主要是架构区分
    // explicit_atomic 显示原子性,目的是为了能够 保证 增删改查时 线程的安全性
    //等价于 struct bucket_t * _buckets;
    //_buckets 中放的是 sel imp
    //_buckets的读取 有提供相应名称的方法 buckets()
    explicit_atomic<struct bucket_t *> _buckets;
    explicit_atomic<mask_t> _mask;
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16 //64位真机
    explicit_atomic<uintptr_t> _maskAndBuckets;//写在一起的目的是为了优化
    mask_t _mask_unused;
    
    //以下都是掩码,即面具 -- 类似于isa的掩码,即位域
    // 掩码省略....
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4 //非64位 真机
    explicit_atomic<uintptr_t> _maskAndBuckets;
    mask_t _mask_unused;

    //以下都是掩码,即面具 -- 类似于isa的掩码,即位域
    // 掩码省略....
#else
#error Unknown cache mask storage type.
#endif
    
#if __LP64__
    uint16_t _flags;
#endif
    uint16_t _occupied;

    //方法省略.....
}

通过源码及以上分析,cache_t的主要结构:_buckets_mask_occupied

struct bucket_t {
private:
    // IMP-first is better for arm64e ptrauth and no worse for arm64.
    // SEL-first is better for armv7* and i386 and x86_64.
#if __arm64__
    explicit_atomic<uintptr_t> _imp;
    explicit_atomic<SEL> _sel;
#else
    explicit_atomic<SEL> _sel;
    explicit_atomic<uintptr_t> _imp;
    // 省略方法
 }

通过源码我们发现,bucket_t是结构类型,里面有_imp_sel,不同的是真机_imp在前,_sel在后。也就是说_imp_sel是成对出现的。

总结:
cache其实就是缓存_imp_sel

  1. p/x Class: 获取类的首地址 $0
  2. p (cache_t *)($0+16): 首地址偏移16获取cache: $1
  3. p $1.buckets()[0]:获取缓存的方法列表的第一个位置的bucket: $2
  4. p $2.sel(): 获取当前bucket的方法名
  5. p $2.imp(Class): 获取当前bucket方法的Imp指针

下面介绍一种脱离源码环境来探索_buckets,_mask,_occupied,_flags

struct lg_objc_class {
    Class ISA;
    Class superclass;
    struct lg_cache_t cache;             // formerly cache pointer and vtable
    struct lg_class_data_bits_t bits;    // class_rw_t * plus custom rr/alloc flags
};
struct lg_cache_t {
    struct lg_bucket_t * _buckets;
    mask_t _mask;
    uint16_t _flags;
    uint16_t _occupied;
};
struct lg_class_data_bits_t {
    uintptr_t bits;
};
struct lg_bucket_t {
    SEL _sel;
    IMP _imp;
};
void printFun(struct lg_objc_class * class){
    Log(@"%hu - %u",class->cache._occupied,class->cache._mask);
    for (mask_t i = 0; i<class->cache._mask; i++) {
        // 打印获取的 bucket
        struct lg_bucket_t bucket = class->cache._buckets[i];
        Log(@"%@ - %p",NSStringFromSelector(bucket._sel),bucket._imp);
    }
}
int main(int argc, const char * argv[]) {
    @autoreleasepool {
        LGPerson *p  = [LGPerson alloc];
        Class pClass = [LGPerson class];  // objc_clas
        /// 将系统的Class强转成我们自定义的lg_objc_class
        struct lg_objc_class *lg_pClass = (__bridge struct lg_objc_class *)(pClass);
        [p say1];
        printFun(lg_pClass);
        [p say2];
        printFun(lg_pClass);
        [p say3];
        printFun(lg_pClass);
        [p say4];
        printFun(lg_pClass);
        [p say5];
        printFun(lg_pClass);
        [p say6];
        printFun(lg_pClass);
        [p say7];
        printFun(lg_pClass);
        [p say8];
        printFun(lg_pClass);
        
        NSLog(@"Hello, World!");
    }
    return 0;
}
2020-12-29 14:23:49.186839+0800 003-cache_t脱离源码环境分析[57596:603045] LGPerson say : -[LGPerson say1]
1 - 3
(null) - 0x0
say1 - 0xb968
(null) - 0x0
2020-12-29 14:23:49.188130+0800 003-cache_t脱离源码环境分析[57596:603045] LGPerson say : -[LGPerson say2]
2 - 3
(null) - 0x0
say1 - 0xb968
say2 - 0xb938
2020-12-29 14:23:49.188286+0800 003-cache_t脱离源码环境分析[57596:603045] LGPerson say : -[LGPerson say3]
1 - 7
say3 - 0xb9c8
(null) - 0x0
(null) - 0x0
(null) - 0x0
(null) - 0x0
(null) - 0x0
(null) - 0x0
2020-12-29 14:23:49.188654+0800 003-cache_t脱离源码环境分析[57596:603045] LGPerson say : -[LGPerson say4]
2 - 7
say3 - 0xb9c8
(null) - 0x0
(null) - 0x0
(null) - 0x0
say4 - 0xb998
(null) - 0x0
(null) - 0x0
2020-12-29 14:23:49.188910+0800 003-cache_t脱离源码环境分析[57596:603045] LGPerson say : -[LGPerson say5]
3 - 7
say3 - 0xb9c8
say5 - 0xb9a8
(null) - 0x0
(null) - 0x0
say4 - 0xb998
(null) - 0x0
(null) - 0x0
2020-12-29 14:23:49.189177+0800 003-cache_t脱离源码环境分析[57596:603045] LGPerson say : -[LGPerson say6]
4 - 7
say3 - 0xb9c8
say5 - 0xb9a8
(null) - 0x0
(null) - 0x0
say4 - 0xb998
(null) - 0x0
say6 - 0xb878
2020-12-29 14:23:49.236042+0800 003-cache_t脱离源码环境分析[57596:603045] LGPerson say : -[LGPerson say7]
5 - 7
say3 - 0xb9c8
say5 - 0xb9a8
(null) - 0x0
say7 - 0xb808
say4 - 0xb998
(null) - 0x0
say6 - 0xb878
2020-12-29 14:23:49.236633+0800 003-cache_t脱离源码环境分析[57596:603045] LGPerson say : -[LGPerson say8]
1 - 15
(null) - 0x0
(null) - 0x0
(null) - 0x0
(null) - 0x0
(null) - 0x0
(null) - 0x0
(null) - 0x0
(null) - 0x0
say8 - 0xb8d8
(null) - 0x0
(null) - 0x0
(null) - 0x0
(null) - 0x0
(null) - 0x0
(null) - 0x0
(lldb) 

看到输出结果,不禁有几个疑问:

  1. _occupied有个界值
  2. _mask发生变化的时候_occupied的值就为1
  3. 为什么有的方法,通过_mask的值遍历不出来
  4. 每次发生变化之后为啥就只有一个bucket_t
  5. 没有打印出来的bucket_t,是否存在cache里面
    带着这些疑问我们进入源码探索环节

cache_t源码探索

    static bucket_t *emptyBuckets();
    
    struct bucket_t *buckets();
    mask_t mask();
    mask_t occupied();
    void incrementOccupied();
    void setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask);
    void initializeToEmpty();

    unsigned capacity();
    bool isConstantEmptyCache();
    bool canBeFreed();

通过incrementOccupied()调用方法,我们定位到了cache:insert方法

ALWAYS_INLINE
void cache_t::insert(Class cls, SEL sel, IMP imp, id receiver)
{
#if CONFIG_USE_CACHE_LOCK
    cacheUpdateLock.assertLocked();
#else
    runtimeLock.assertLocked();
#endif

    ASSERT(sel != 0 && cls->isInitialized());

    // Use the cache as-is if it is less than 3/4 full
    mask_t newOccupied = occupied() + 1;
    unsigned oldCapacity = capacity(), capacity = oldCapacity;
    if (slowpath(isConstantEmptyCache())) {
        // Cache is read-only. Replace it.
        if (!capacity) capacity = INIT_CACHE_SIZE;
        reallocate(oldCapacity, capacity, /* freeOld */false);
    }
    else if (fastpath(newOccupied + CACHE_END_MARKER <= capacity / 4 * 3)) { // 4  3 + 1 bucket cache_t
        // Cache is less than 3/4 full. Use it as-is.
    }
    else {
        capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;  // 扩容两倍 4
        if (capacity > MAX_CACHE_SIZE) {
            capacity = MAX_CACHE_SIZE;
        }
        reallocate(oldCapacity, capacity, true);  // 内存 库容完毕
    }

    bucket_t *b = buckets();
    mask_t m = capacity - 1;
    mask_t begin = cache_hash(sel, m);
    mask_t i = begin;

    // Scan for the first unused slot and insert there.
    // There is guaranteed to be an empty slot because the
    // minimum size is 4 and we resized at 3/4 full.
    do {
        if (fastpath(b[i].sel() == 0)) {
            incrementOccupied();
            b[i].set<Atomic, Encoded>(sel, imp, cls);
            return;
        }
        if (b[i].sel() == sel) {
            // The entry was added to the cache by some other thread
            // before we grabbed the cacheUpdateLock.
            return;
        }
    } while (fastpath((i = cache_next(i, m)) != begin));

    cache_t::bad_cache(receiver, (SEL)sel, cls);
}

根据注释,依次往下读

  1. 进入这个函数,mask_t newOccupied = occupied() + 1; occupied来了个+1,这也是我们在上面的打印中没有找到occupied为0的原因
  2. unsigned oldCapacity = capacity(), capacity = oldCapacity;
    • 这里我们跟进代码发现capacity()的内部实现为: return mask() ? mask()+1 : 0;,也就是说首先会去获取旧值oldCapacity,oldCapacity的值为mask的+1。
    • 然后oldCapacity赋给capacity,也就是旧值赋给新值
  3. 如果当前类没有缓存
    • 如果当前capacity不存在,就给capacity赋值为4:if (!capacity) capacity = INIT_CACHE_SIZE
    • 通过旧值和新值来开辟缓存空间,不对旧值进行释放: reallocate(oldCapacity, capacity, /* freeOld */false);
    • 内部实现为
    void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld){
        bucket_t *oldBuckets = buckets();
        bucket_t *newBuckets = allocateBuckets(newCapacity);
    
        // Cache's old contents are not propagated. 
        // This is thought to save cache memory at the cost of extra cache fills.
        // fixme re-measure this
    
        ASSERT(newCapacity > 0);
        ASSERT((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);
    
        setBucketsAndMask(newBuckets, newCapacity - 1);
    
        if (freeOld) {
            cache_collect_free(oldBuckets, oldCapacity);
        }
    }
    
    • 获取旧的bucket_t:bucket_t *oldBuckets = buckets();,第一次buckets()为空,所以此处不释放。
    • 根据新需要的空间大小来开辟缓存空间: bucket_t *newBuckets = allocateBuckets(newCapacity);
    • 将occupied置为0:
    void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask){
    
    
    #ifdef __arm__
    // ensure other threads see buckets contents before buckets pointer
    mega_barrier();
    
    _buckets.store(newBuckets, memory_order::memory_order_relaxed);
    
    // ensure other threads see new buckets before new mask
    mega_barrier();
    
    _mask.store(newMask, memory_order::memory_order_relaxed);
    _occupied = 0;
    #elif __x86_64__ || i386
    // ensure other threads see buckets contents before buckets pointer
    _buckets.store(newBuckets, memory_order::memory_order_release);
    
    // ensure other threads see new buckets before new mask
    _mask.store(newMask, memory_order::memory_order_release);
    _occupied = 0;
    #else
    #error Don't know how to do setBucketsAndMask on this architecture.
    #endif
    }
    
    • 利用新创建的空间来存储bucket和mask: setBucketsAndMask(newBuckets, newCapacity - 1);
    • 如果需要释放旧值,就直接抹去旧的缓存空间:
    if (freeOld) {
        cache_collect_free(oldBuckets, oldCapacity);
    }
    
  4. 如果newOccupied+1不大于,开辟空间的3/4,就不对缓存空间进行操作
```
fastpath(newOccupied + CACHE_END_MARKER <= capacity / 4 * 3)
```
  1. 如果newOccupied+1大于开辟空间的3/4。就进行扩容至2倍,并释放之前的缓存空间,扩容方法和上面的开辟方法是同一个,只是多了释放操作,不做赘述
```
        capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;  // 扩容两倍 4
    if (capacity > MAX_CACHE_SIZE) {
        capacity = MAX_CACHE_SIZE;
    }
    reallocate(oldCapacity, capacity, true);  // 内存 库容完毕
```
* 获取当前的bucket: `bucket_t *b = buckets();`
* 获取当前的mask: `mask_t m = capacity - 1;`
* 通过hash算法`cache_hash`来获取存储下标
```
static inline mask_t cache_hash(SEL sel, mask_t mask) {
    return (mask_t)(uintptr_t)sel & mask;
}
```
  1. 通过下标来存储当前的bucket
```
    do {
    if (fastpath(b[i].sel() == 0)) {
        incrementOccupied();
        b[i].set<Atomic, Encoded>(sel, imp, cls);
        return;
   }
    if (b[i].sel() == sel) {
        // The entry was added to the cache by some other thread
        // before we grabbed the cacheUpdateLock.
        return;
    }
} while (fastpath((i = cache_next(i, m)) != begin));
```

注意:并不是通过下标就能直接存了,因为在缓存中是无序的,所以通过下标可能会存在冲突,通过以下三步来解决冲突

总结

上一篇下一篇

猜你喜欢

热点阅读