深入探究cache_t

2023-02-07  本文已影响0人  星星杨

之前有分析过OC类的底层结构,今天来重点分析一个重要的结构体cache_t;

cache_t结构体分析

首先通过lldb断点打印我们的cache_t内容,之前在分析OC类的底层结构的时候,有分析过通过内存偏移的方式,取出结构体里的变量,这里不再赘述了,直接看结果:


image.png

打印出来里面有不少东西,那具体哪一个值得我们详细探究呢?这里主要是要探究cache_t的结构,与其说是探究它的结构,不如说cache_t有什么用,通过它的作用,反推我们想要探究的结构;

cache的作用?
答:cache顾名思义,缓存,在一个类里面,缓存能缓存什么呢?属性是存在对象里面,协议对应protocol,所以这里是缓存我们调用过的方法;

方法最重要的是消息的主体,即sel和imp,即我们要重点探索sel跟imp被保存在哪里;mask、occupied、flags这些都是面具符跟标志位,所以可能在_bucketsAndMaybeMask或者_originalPreoptCache中;

struct cache_t {
private:
    explicit_atomic<uintptr_t> _bucketsAndMaybeMask;
    union {
        // Note: _flags on ARM64 needs to line up with the unused bits of
        // _originalPreoptCache because we access some flags (specifically
        // FAST_CACHE_HAS_DEFAULT_CORE and FAST_CACHE_HAS_DEFAULT_AWZ) on
        // unrealized classes with the assumption that they will start out
        // as 0.
        struct {
#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_OUTLINED && !__LP64__
            // Outlined cache mask storage, 32-bit, we have mask and occupied.
            explicit_atomic<mask_t>    _mask;
            uint16_t                   _occupied;
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_OUTLINED && __LP64__
            // Outlined cache mask storage, 64-bit, we have mask, occupied, flags.
            explicit_atomic<mask_t>    _mask;
            uint16_t                   _occupied;
            uint16_t                   _flags;
#   define CACHE_T_HAS_FLAGS 1
#elif __LP64__
            // Inline cache mask storage, 64-bit, we have occupied, flags, and
            // empty space to line up flags with originalPreoptCache.
            //
            // Note: the assembly code for objc_release_xN knows about the
            // location of _flags and the
            // FAST_CACHE_HAS_CUSTOM_DEALLOC_INITIATION flag within. Any changes
            // must be applied there as well.
            uint32_t                   _unused;
            uint16_t                   _occupied;
            uint16_t                   _flags;
#   define CACHE_T_HAS_FLAGS 1
#else
            // Inline cache mask storage, 32-bit, we have occupied, flags.
            uint16_t                   _occupied;
            uint16_t                   _flags;
#   define CACHE_T_HAS_FLAGS 1
#endif

        };
        explicit_atomic<preopt_cache_t *> _originalPreoptCache;
    };

    // Simple constructor for testing purposes only.
    cache_t() : _bucketsAndMaybeMask(0) {}

#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_OUTLINED
    // _bucketsAndMaybeMask is a buckets_t pointer

    static constexpr uintptr_t bucketsMask = ~0ul;
    static_assert(!CONFIG_USE_PREOPT_CACHES, "preoptimized caches not supported");
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16_BIG_ADDRS
    static constexpr uintptr_t maskShift = 48;
    static constexpr uintptr_t maxMask = ((uintptr_t)1 << (64 - maskShift)) - 1;
    static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << maskShift) - 1;

    static_assert(bucketsMask >= OBJC_VM_MAX_ADDRESS, "Bucket field doesn't have enough bits for arbitrary pointers.");
#if CONFIG_USE_PREOPT_CACHES
    static constexpr uintptr_t preoptBucketsMarker = 1ul;
    static constexpr uintptr_t preoptBucketsMask = bucketsMask & ~preoptBucketsMarker;
#endif
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16
    // _bucketsAndMaybeMask is a buckets_t pointer in the low 48 bits

    // How much the mask is shifted by.
    static constexpr uintptr_t maskShift = 48;

    // Additional bits after the mask which must be zero. msgSend
    // takes advantage of these additional bits to construct the value
    // `mask << 4` from `_maskAndBuckets` in a single instruction.
    static constexpr uintptr_t maskZeroBits = 4;

    // The largest mask value we can store.
    static constexpr uintptr_t maxMask = ((uintptr_t)1 << (64 - maskShift)) - 1;

    // The mask applied to `_maskAndBuckets` to retrieve the buckets pointer.
    static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << (maskShift - maskZeroBits)) - 1;

    // Ensure we have enough bits for the buckets pointer.
    static_assert(bucketsMask >= OBJC_VM_MAX_ADDRESS,
            "Bucket field doesn't have enough bits for arbitrary pointers.");

#if CONFIG_USE_PREOPT_CACHES
    static constexpr uintptr_t preoptBucketsMarker = 1ul;
#if __has_feature(ptrauth_calls)
    // 63..60: hash_mask_shift
    // 59..55: hash_shift
    // 54.. 1: buckets ptr + auth
    //      0: always 1
    static constexpr uintptr_t preoptBucketsMask = 0x007ffffffffffffe;
    static inline uintptr_t preoptBucketsHashParams(const preopt_cache_t *cache) {
        uintptr_t value = (uintptr_t)cache->shift << 55;
        // masks have 11 bits but can be 0, so we compute
        // the right shift for 0x7fff rather than 0xffff
        return value | ((objc::mask16ShiftBits(cache->mask) - 1) << 60);
    }
#else
    // 63..53: hash_mask
    // 52..48: hash_shift
    // 47.. 1: buckets ptr
    //      0: always 1
    static constexpr uintptr_t preoptBucketsMask = 0x0000fffffffffffe;
    static inline uintptr_t preoptBucketsHashParams(const preopt_cache_t *cache) {
        return (uintptr_t)cache->hash_params << 48;
    }
#endif
#endif // CONFIG_USE_PREOPT_CACHES
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4
    // _bucketsAndMaybeMask is a buckets_t pointer in the top 28 bits

    static constexpr uintptr_t maskBits = 4;
    static constexpr uintptr_t maskMask = (1 << maskBits) - 1;
    static constexpr uintptr_t bucketsMask = ~maskMask;
    static_assert(!CONFIG_USE_PREOPT_CACHES, "preoptimized caches not supported");
#else
#error Unknown cache mask storage type.
#endif

    bool isConstantEmptyCache() const;
    bool canBeFreed() const;
    mask_t mask() const;

#if CONFIG_USE_PREOPT_CACHES
    void initializeToPreoptCacheInDisguise(const preopt_cache_t *cache);
    const preopt_cache_t *disguised_preopt_cache() const;
#endif

    void incrementOccupied();
    void setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask);

    void reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld);
    void collect_free(bucket_t *oldBuckets, mask_t oldCapacity);

    static bucket_t *emptyBuckets();
    static bucket_t *allocateBuckets(mask_t newCapacity);
    static bucket_t *emptyBucketsForCapacity(mask_t capacity, bool allocate = true);
    static struct bucket_t * endMarker(struct bucket_t *b, uint32_t cap);
    void bad_cache(id receiver, SEL sel) __attribute__((noreturn, cold));

public:
    // The following four fields are public for objcdt's use only.
    // objcdt reaches into fields while the process is suspended
    // hence doesn't care for locks and pesky little details like this
    // and can safely use these.
    unsigned capacity() const;
    struct bucket_t *buckets() const;
    Class cls() const;

#if CONFIG_USE_PREOPT_CACHES
    const preopt_cache_t *preopt_cache(bool authenticated = true) const;
#endif

    mask_t occupied() const;
    void initializeToEmpty();

#if CONFIG_USE_PREOPT_CACHES
    bool isConstantOptimizedCache(bool strict = false, uintptr_t empty_addr = (uintptr_t)&_objc_empty_cache) const;
    bool shouldFlush(SEL sel, IMP imp) const;
    bool isConstantOptimizedCacheWithInlinedSels() const;
    Class preoptFallbackClass() const;
    void maybeConvertToPreoptimized();
    void initializeToEmptyOrPreoptimizedInDisguise();
#else
    inline bool isConstantOptimizedCache(bool strict = false, uintptr_t empty_addr = 0) const { return false; }
    inline bool shouldFlush(SEL sel, IMP imp) const {
        return cache_getImp(cls(), sel) == imp;
    }
    inline bool isConstantOptimizedCacheWithInlinedSels() const { return false; }
    inline void initializeToEmptyOrPreoptimizedInDisguise() { initializeToEmpty(); }
#endif

    void insert(SEL sel, IMP imp, id receiver);
    void copyCacheNolock(objc_imp_cache_entry *buffer, int len);
    void destroy();
    void eraseNolock(const char *func);

    static void init();
    static void collectNolock(bool collectALot);
    static size_t bytesForCapacity(uint32_t cap);

#if CACHE_T_HAS_FLAGS
#   if __arm64__
// class or superclass has .cxx_construct/.cxx_destruct implementation
//   FAST_CACHE_HAS_CXX_DTOR is the first bit so that setting it in
//   isa_t::has_cxx_dtor is a single bfi
#       define FAST_CACHE_HAS_CXX_DTOR       (1<<0)
#       define FAST_CACHE_HAS_CXX_CTOR       (1<<1)
// Denormalized RO_META to avoid an indirection
#       define FAST_CACHE_META               (1<<2)
#   else
// Denormalized RO_META to avoid an indirection
#       define FAST_CACHE_META               (1<<0)
// class or superclass has .cxx_construct/.cxx_destruct implementation
//   FAST_CACHE_HAS_CXX_DTOR is chosen to alias with isa_t::has_cxx_dtor
#       define FAST_CACHE_HAS_CXX_CTOR       (1<<1)
#       define FAST_CACHE_HAS_CXX_DTOR       (1<<2)
#endif

// Fast Alloc fields:
//   This stores the word-aligned size of instances + "ALLOC_DELTA16",
//   or 0 if the instance size doesn't fit.
//
//   These bits occupy the same bits than in the instance size, so that
//   the size can be extracted with a simple mask operation.
//
//   FAST_CACHE_ALLOC_MASK16 allows to extract the instance size rounded
//   rounded up to the next 16 byte boundary, which is a fastpath for
//   _objc_rootAllocWithZone()

// The code in fastInstanceSize/setFastInstanceSize is not quite right for
// 32-bit, so we currently only enable this for 64-bit.
#   if __LP64__
#       define FAST_CACHE_ALLOC_MASK         0x0ff8
#       define FAST_CACHE_ALLOC_MASK16       0x0ff0
#       define FAST_CACHE_ALLOC_DELTA16      0x0008
#   endif

#   define FAST_CACHE_HAS_CUSTOM_DEALLOC_INITIATION (1<<12)
// class's instances requires raw isa
#   define FAST_CACHE_REQUIRES_RAW_ISA   (1<<13)
// class or superclass has default alloc/allocWithZone: implementation
// Note this is is stored in the metaclass.
#   define FAST_CACHE_HAS_DEFAULT_AWZ    (1<<14)
// class or superclass has default new/self/class/respondsToSelector/isKindOfClass
#   define FAST_CACHE_HAS_DEFAULT_CORE   (1<<15)

    bool getBit(uint16_t flags) const {
        return _flags & flags;
    }
    void setBit(uint16_t set) {
        __c11_atomic_fetch_or((_Atomic(uint16_t) *)&_flags, set, __ATOMIC_RELAXED);
    }
    void clearBit(uint16_t clear) {
        __c11_atomic_fetch_and((_Atomic(uint16_t) *)&_flags, ~clear, __ATOMIC_RELAXED);
    }
#endif

#if FAST_CACHE_ALLOC_MASK
    bool hasFastInstanceSize(size_t extra) const
    {
        if (__builtin_constant_p(extra) && extra == 0) {
            return _flags & FAST_CACHE_ALLOC_MASK16;
        }
        return _flags & FAST_CACHE_ALLOC_MASK;
    }

    size_t fastInstanceSize(size_t extra) const
    {
        ASSERT(hasFastInstanceSize(extra));

        if (__builtin_constant_p(extra) && extra == 0) {
            return _flags & FAST_CACHE_ALLOC_MASK16;
        } else {
            size_t size = _flags & FAST_CACHE_ALLOC_MASK;
            // remove the FAST_CACHE_ALLOC_DELTA16 that was added
            // by setFastInstanceSize
            return align16(size + extra - FAST_CACHE_ALLOC_DELTA16);
        }
    }

    void setFastInstanceSize(size_t newSize)
    {
        // Set during realization or construction only. No locking needed.
        uint16_t newBits = _flags & ~FAST_CACHE_ALLOC_MASK;
        uint16_t sizeBits;

        // Adding FAST_CACHE_ALLOC_DELTA16 allows for FAST_CACHE_ALLOC_MASK16
        // to yield the proper 16byte aligned allocation size with a single mask
        sizeBits = word_align(newSize) + FAST_CACHE_ALLOC_DELTA16;
        sizeBits &= FAST_CACHE_ALLOC_MASK;
        if (newSize <= sizeBits) {
            newBits |= sizeBits;
        }
        _flags = newBits;
    }
#else
    bool hasFastInstanceSize(size_t extra) const {
        return false;
    }
    size_t fastInstanceSize(size_t extra) const {
        abort();
    }
    void setFastInstanceSize(size_t extra) {
        // nothing
    }
#endif
}

直接贴源码,从源码中查看;发现很多关于bucket的东西,类似setBucketsAndMask、emptyBuckets、allocateBuckets、emptyBucketsForCapacity,整个篇章就围绕bucket进行操作,所以刚才说的要分析重点也比较清晰了,就是_bucketsAndMaybeMask;

bucket_t

上面提到了,cache_t的重点是bucket_t,所以点开bucket_t查看其结构体内容;
不难发现,这里面就是我们一直想要寻找的sel、imp

bucket_t结构体部分截图
经过以上分析,就可以得到一个比较清晰链路图了;
cache_t结构

尝试找出bucket_t,通过lldb直接取值,发现没办法打印Value没办法直接打印出来;


尝试用lldb找出bucket_t

这个时候只能继续查看源码了,看看有没有源码能够直接获取bucket_t的,在cache_t源码中发现这么一个API,直接调用看看结果;

struct bucket_t *buckets() const;

确实是打印出bucket_t对象了,但是没有清晰明了的显示我们想要sel跟imp;


image.png

同样的,查看bucket_t结构体,看看有没有获取直接获取sel跟imp的,功夫不负有心人了,成功在bucket_t中找到了对应的接口;

inline SEL sel() const { return _sel.load(memory_order_relaxed); }

inline IMP imp(UNUSED_WITHOUT_PTRAUTH bucket_t *base, Class cls) const {
        uintptr_t imp = _imp.load(memory_order_relaxed);
        if (!imp) return nil;
#if CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_PTRAUTH
        SEL sel = _sel.load(memory_order_relaxed);
        return (IMP)
            ptrauth_auth_and_resign((const void *)imp,
                                    ptrauth_key_process_dependent_code,
                                    modifierForSEL(base, sel, cls),
                                    ptrauth_key_function_pointer, 0);
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_ISA_XOR
        return (IMP)(imp ^ (uintptr_t)cls);
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_NONE
        return (IMP)imp;
#else
#error Unknown method cache IMP encoding.
#endif
    }

通过我们找到的sel、imp相关API可以成功获取到我们之前调用的sayHello方法;


API获取sel、imp

通过index获取全部的内容,这里可以看出bucket_t的存储是不连续的,然后它又可以通过下标进行索引,所以这个存储方式是哈希结构的;


打印buckets

接下来就要探索一下bucket_t底层实现了;

脱离源码环境分析cache_t

将objc_class、class_data_bits_t、cache_t、bucket_t的成员变量都拷贝出来,新建新的结构体,利用强制类型转换的方式,打印出对应的标志位及cache._buckets;

#import <objc/runtime.h>
// https://developer.apple.com/videos/play/wwdc2020/10163/
typedef uint32_t mask_t;

struct yx_bucket_t {
    SEL _sel;
    IMP _imp;
};

// 项目运行 调试
struct yx_cache_t {
    struct yx_bucket_t *_buckets;
    mask_t    _mask;
    uint16_t  _occupied;
    uint16_t  _flags;
};

struct yx_class_data_bits_t {
    uintptr_t bits;
};

// cache_t objc_class
struct yx_objc_class  {
    Class ISA;
    Class superclass;
    struct yx_cache_t cache;             // formerly cache pointer and vtable 16
    struct yx_class_data_bits_t bits;
};



int main(int argc, const char * argv[]) {
    @autoreleasepool {
        LGPerson *p  = [LGPerson alloc];
        Class pClass = p.class;
        [p say1];
//        [p say2];
//        [p say3];
        // 增加缓存 -> cache_t 是否存在插入缓存的方法
        
        struct yx_objc_class *yx_class = (__bridge struct yx_objc_class *)(pClass);
        NSLog(@"%u-%u-%hu",yx_class->cache._mask,yx_class->cache._occupied,yx_class->cache._flags);
        for (int i = 0 ; i < yx_class->cache._mask ; i++){
            struct yx_bucket_t bucket = yx_class->cache._buckets[i];
            NSLog(@"%@-%p - %d",NSStringFromSelector(bucket._sel),bucket._imp,i);
        }
        
        // 插入方法 -> 写 -> 读(cache 读取)

        NSLog(@"Hello, World!");
    }
    return 0;
}

_mask是3,_occupied是1;


say1方法

_mask是3,_occupied变成了2;


调用say1、say2方法
_mask是7,_occupied是1;
调用say1、say2、say3方法

这个_mask跟_occupied是从哪里来的?

cache_t::insert

从cache_t插入方法进行分析;
第一步是occupied() + 1,占用方法加一,这也可以解释调用say2的时候,_occupied从1变成2的原因了;
第二步是判断有没有能够插入的对象,没有的话就使用reallocate进行开辟能够插入的对象空间;有的话就判断空间是否足够,是否需要扩容;

传入的capacity = INIT_CACHE_SIZE;INIT_CACHE_SIZE = 1 << 2 = 4;
所以第一次传入newCapacity = 4,在reallocate中有个方法setBucketsAndMask,newCapacity - 1 = 3;这里也可以解释为什么我们第一次调用say1方法的时候,_mask会是3了;

第三步就是对sel跟imp进行存储;

1、调用buckets()创建空桶子,调用cache_hash对sel跟mask进行哈希得到要存储 位置,对sel的值进行平移操作,再与上mask,所以3的空间就是012;
2、使用do-while循环判断buckets桶子里面对应的位置是否有值,没有值的话就调用set方法进行插入,同时调用incrementOccupied让_occupied++,这里才是真正的_occupied增加的原因,上面那个第一步occupied() + 1只是为了判断当前的空间是否达到需要扩容的临界点而已;如果有值的话,就再次hash,寻找下一个位置,循环往复;

问题:mask什么时候变成的7?为什么调用say3方法的时候,say1、say2都不见了?

至于苹果为什么要把原来的内容干掉,而不是往后直接增加呢?或者把原来的内容拷贝到现在新空间呢?

void cache_t::insert(SEL sel, IMP imp, id receiver)
{
    lockdebug::assert_locked(&runtimeLock);

    // Never cache before +initialize is done
    if (slowpath(!cls()->isInitialized())) {
        return;
    }

    if (isConstantOptimizedCache()) {
        _objc_fatal("cache_t::insert() called with a preoptimized cache for %s",
                    cls()->nameForLogging());
    }

#if DEBUG_TASK_THREADS
    return _collecting_in_critical();
#else
#if CONFIG_USE_CACHE_LOCK
    mutex_locker_t lock(cacheUpdateLock);
#endif

    ASSERT(sel != 0 && cls()->isInitialized());

    // Use the cache as-is if until we exceed our expected fill ratio.
    mask_t newOccupied = occupied() + 1;
    unsigned oldCapacity = capacity(), capacity = oldCapacity;
    if (slowpath(isConstantEmptyCache())) {
        // Cache is read-only. Replace it.
        if (!capacity) capacity = INIT_CACHE_SIZE;
        reallocate(oldCapacity, capacity, /* freeOld */false);
    }
    else if (fastpath(newOccupied + CACHE_END_MARKER <= cache_fill_ratio(capacity))) {
        // Cache is less than 3/4 or 7/8 full. Use it as-is.
    }
#if CACHE_ALLOW_FULL_UTILIZATION
    else if (capacity <= FULL_UTILIZATION_CACHE_SIZE && newOccupied + CACHE_END_MARKER <= capacity) {
        // Allow 100% cache utilization for small buckets. Use it as-is.
    }
#endif
    else {
        // 4 * 2 = 8 - 1 = 7
        capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;
        if (capacity > MAX_CACHE_SIZE) {
            capacity = MAX_CACHE_SIZE;
        }
        reallocate(oldCapacity, capacity, true);
    }
    
    bucket_t *b = buckets(); // 空桶子
    mask_t m = capacity - 1;
    mask_t begin = cache_hash(sel, m);
    mask_t i = begin;

    // Scan for the first unused slot and insert there.
    // There is guaranteed to be an empty slot.
    do {
        if (fastpath(b[i].sel() == 0)) {
            incrementOccupied();
            b[i].set<Atomic, Encoded>(b, sel, imp, cls());
            return;
        }
        if (b[i].sel() == sel) {
            // The entry was added to the cache by some other thread
            // before we grabbed the cacheUpdateLock.
            return;
        }
    } while (fastpath((i = cache_next(i, m)) != begin));

    bad_cache(receiver, (SEL)sel);
#endif // !DEBUG_TASK_THREADS
}

ALWAYS_INLINE
void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld)
{
    bucket_t *oldBuckets = buckets();
    bucket_t *newBuckets = allocateBuckets(newCapacity);

    // Cache's old contents are not propagated. 
    // This is thought to save cache memory at the cost of extra cache fills.
    // fixme re-measure this

    ASSERT(newCapacity > 0);
    ASSERT((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);

    setBucketsAndMask(newBuckets, newCapacity - 1);
    
    if (freeOld) {
        collect_free(oldBuckets, oldCapacity);
    }
}

void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask)
{
    uintptr_t buckets = (uintptr_t)newBuckets;
    uintptr_t mask = (uintptr_t)newMask;

    ASSERT(buckets <= bucketsMask);
    ASSERT(mask <= maxMask);

    _bucketsAndMaybeMask.store(((uintptr_t)newMask << maskShift) | (uintptr_t)newBuckets, memory_order_release);
    _occupied = 0;
}

static inline mask_t cache_hash(SEL sel, mask_t mask) 
{
    uintptr_t value = (uintptr_t)sel;
#if CONFIG_USE_PREOPT_CACHES
    value ^= value >> 7;
#endif
    // 0110 1111 : 11 3 01: 1  10: 2 00: 0
    // 0000 0011 - mask = 3
    return (mask_t)(value & mask);
}
Cache_t原理分析图.png
上一篇下一篇

猜你喜欢

热点阅读