深入探究cache_t
之前有分析过OC类的底层结构,今天来重点分析一个重要的结构体cache_t;
cache_t结构体分析
首先通过lldb断点打印我们的cache_t内容,之前在分析OC类的底层结构的时候,有分析过通过内存偏移的方式,取出结构体里的变量,这里不再赘述了,直接看结果:
image.png
打印出来里面有不少东西,那具体哪一个值得我们详细探究呢?这里主要是要探究cache_t的结构,与其说是探究它的结构,不如说cache_t有什么用,通过它的作用,反推我们想要探究的结构;
cache的作用?
答:cache顾名思义,缓存,在一个类里面,缓存能缓存什么呢?属性是存在对象里面,协议对应protocol,所以这里是缓存我们调用过的方法;
方法最重要的是消息的主体,即sel和imp,即我们要重点探索sel跟imp被保存在哪里;mask、occupied、flags这些都是面具符跟标志位,所以可能在_bucketsAndMaybeMask或者_originalPreoptCache中;
struct cache_t {
private:
explicit_atomic<uintptr_t> _bucketsAndMaybeMask;
union {
// Note: _flags on ARM64 needs to line up with the unused bits of
// _originalPreoptCache because we access some flags (specifically
// FAST_CACHE_HAS_DEFAULT_CORE and FAST_CACHE_HAS_DEFAULT_AWZ) on
// unrealized classes with the assumption that they will start out
// as 0.
struct {
#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_OUTLINED && !__LP64__
// Outlined cache mask storage, 32-bit, we have mask and occupied.
explicit_atomic<mask_t> _mask;
uint16_t _occupied;
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_OUTLINED && __LP64__
// Outlined cache mask storage, 64-bit, we have mask, occupied, flags.
explicit_atomic<mask_t> _mask;
uint16_t _occupied;
uint16_t _flags;
# define CACHE_T_HAS_FLAGS 1
#elif __LP64__
// Inline cache mask storage, 64-bit, we have occupied, flags, and
// empty space to line up flags with originalPreoptCache.
//
// Note: the assembly code for objc_release_xN knows about the
// location of _flags and the
// FAST_CACHE_HAS_CUSTOM_DEALLOC_INITIATION flag within. Any changes
// must be applied there as well.
uint32_t _unused;
uint16_t _occupied;
uint16_t _flags;
# define CACHE_T_HAS_FLAGS 1
#else
// Inline cache mask storage, 32-bit, we have occupied, flags.
uint16_t _occupied;
uint16_t _flags;
# define CACHE_T_HAS_FLAGS 1
#endif
};
explicit_atomic<preopt_cache_t *> _originalPreoptCache;
};
// Simple constructor for testing purposes only.
cache_t() : _bucketsAndMaybeMask(0) {}
#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_OUTLINED
// _bucketsAndMaybeMask is a buckets_t pointer
static constexpr uintptr_t bucketsMask = ~0ul;
static_assert(!CONFIG_USE_PREOPT_CACHES, "preoptimized caches not supported");
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16_BIG_ADDRS
static constexpr uintptr_t maskShift = 48;
static constexpr uintptr_t maxMask = ((uintptr_t)1 << (64 - maskShift)) - 1;
static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << maskShift) - 1;
static_assert(bucketsMask >= OBJC_VM_MAX_ADDRESS, "Bucket field doesn't have enough bits for arbitrary pointers.");
#if CONFIG_USE_PREOPT_CACHES
static constexpr uintptr_t preoptBucketsMarker = 1ul;
static constexpr uintptr_t preoptBucketsMask = bucketsMask & ~preoptBucketsMarker;
#endif
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16
// _bucketsAndMaybeMask is a buckets_t pointer in the low 48 bits
// How much the mask is shifted by.
static constexpr uintptr_t maskShift = 48;
// Additional bits after the mask which must be zero. msgSend
// takes advantage of these additional bits to construct the value
// `mask << 4` from `_maskAndBuckets` in a single instruction.
static constexpr uintptr_t maskZeroBits = 4;
// The largest mask value we can store.
static constexpr uintptr_t maxMask = ((uintptr_t)1 << (64 - maskShift)) - 1;
// The mask applied to `_maskAndBuckets` to retrieve the buckets pointer.
static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << (maskShift - maskZeroBits)) - 1;
// Ensure we have enough bits for the buckets pointer.
static_assert(bucketsMask >= OBJC_VM_MAX_ADDRESS,
"Bucket field doesn't have enough bits for arbitrary pointers.");
#if CONFIG_USE_PREOPT_CACHES
static constexpr uintptr_t preoptBucketsMarker = 1ul;
#if __has_feature(ptrauth_calls)
// 63..60: hash_mask_shift
// 59..55: hash_shift
// 54.. 1: buckets ptr + auth
// 0: always 1
static constexpr uintptr_t preoptBucketsMask = 0x007ffffffffffffe;
static inline uintptr_t preoptBucketsHashParams(const preopt_cache_t *cache) {
uintptr_t value = (uintptr_t)cache->shift << 55;
// masks have 11 bits but can be 0, so we compute
// the right shift for 0x7fff rather than 0xffff
return value | ((objc::mask16ShiftBits(cache->mask) - 1) << 60);
}
#else
// 63..53: hash_mask
// 52..48: hash_shift
// 47.. 1: buckets ptr
// 0: always 1
static constexpr uintptr_t preoptBucketsMask = 0x0000fffffffffffe;
static inline uintptr_t preoptBucketsHashParams(const preopt_cache_t *cache) {
return (uintptr_t)cache->hash_params << 48;
}
#endif
#endif // CONFIG_USE_PREOPT_CACHES
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4
// _bucketsAndMaybeMask is a buckets_t pointer in the top 28 bits
static constexpr uintptr_t maskBits = 4;
static constexpr uintptr_t maskMask = (1 << maskBits) - 1;
static constexpr uintptr_t bucketsMask = ~maskMask;
static_assert(!CONFIG_USE_PREOPT_CACHES, "preoptimized caches not supported");
#else
#error Unknown cache mask storage type.
#endif
bool isConstantEmptyCache() const;
bool canBeFreed() const;
mask_t mask() const;
#if CONFIG_USE_PREOPT_CACHES
void initializeToPreoptCacheInDisguise(const preopt_cache_t *cache);
const preopt_cache_t *disguised_preopt_cache() const;
#endif
void incrementOccupied();
void setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask);
void reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld);
void collect_free(bucket_t *oldBuckets, mask_t oldCapacity);
static bucket_t *emptyBuckets();
static bucket_t *allocateBuckets(mask_t newCapacity);
static bucket_t *emptyBucketsForCapacity(mask_t capacity, bool allocate = true);
static struct bucket_t * endMarker(struct bucket_t *b, uint32_t cap);
void bad_cache(id receiver, SEL sel) __attribute__((noreturn, cold));
public:
// The following four fields are public for objcdt's use only.
// objcdt reaches into fields while the process is suspended
// hence doesn't care for locks and pesky little details like this
// and can safely use these.
unsigned capacity() const;
struct bucket_t *buckets() const;
Class cls() const;
#if CONFIG_USE_PREOPT_CACHES
const preopt_cache_t *preopt_cache(bool authenticated = true) const;
#endif
mask_t occupied() const;
void initializeToEmpty();
#if CONFIG_USE_PREOPT_CACHES
bool isConstantOptimizedCache(bool strict = false, uintptr_t empty_addr = (uintptr_t)&_objc_empty_cache) const;
bool shouldFlush(SEL sel, IMP imp) const;
bool isConstantOptimizedCacheWithInlinedSels() const;
Class preoptFallbackClass() const;
void maybeConvertToPreoptimized();
void initializeToEmptyOrPreoptimizedInDisguise();
#else
inline bool isConstantOptimizedCache(bool strict = false, uintptr_t empty_addr = 0) const { return false; }
inline bool shouldFlush(SEL sel, IMP imp) const {
return cache_getImp(cls(), sel) == imp;
}
inline bool isConstantOptimizedCacheWithInlinedSels() const { return false; }
inline void initializeToEmptyOrPreoptimizedInDisguise() { initializeToEmpty(); }
#endif
void insert(SEL sel, IMP imp, id receiver);
void copyCacheNolock(objc_imp_cache_entry *buffer, int len);
void destroy();
void eraseNolock(const char *func);
static void init();
static void collectNolock(bool collectALot);
static size_t bytesForCapacity(uint32_t cap);
#if CACHE_T_HAS_FLAGS
# if __arm64__
// class or superclass has .cxx_construct/.cxx_destruct implementation
// FAST_CACHE_HAS_CXX_DTOR is the first bit so that setting it in
// isa_t::has_cxx_dtor is a single bfi
# define FAST_CACHE_HAS_CXX_DTOR (1<<0)
# define FAST_CACHE_HAS_CXX_CTOR (1<<1)
// Denormalized RO_META to avoid an indirection
# define FAST_CACHE_META (1<<2)
# else
// Denormalized RO_META to avoid an indirection
# define FAST_CACHE_META (1<<0)
// class or superclass has .cxx_construct/.cxx_destruct implementation
// FAST_CACHE_HAS_CXX_DTOR is chosen to alias with isa_t::has_cxx_dtor
# define FAST_CACHE_HAS_CXX_CTOR (1<<1)
# define FAST_CACHE_HAS_CXX_DTOR (1<<2)
#endif
// Fast Alloc fields:
// This stores the word-aligned size of instances + "ALLOC_DELTA16",
// or 0 if the instance size doesn't fit.
//
// These bits occupy the same bits than in the instance size, so that
// the size can be extracted with a simple mask operation.
//
// FAST_CACHE_ALLOC_MASK16 allows to extract the instance size rounded
// rounded up to the next 16 byte boundary, which is a fastpath for
// _objc_rootAllocWithZone()
// The code in fastInstanceSize/setFastInstanceSize is not quite right for
// 32-bit, so we currently only enable this for 64-bit.
# if __LP64__
# define FAST_CACHE_ALLOC_MASK 0x0ff8
# define FAST_CACHE_ALLOC_MASK16 0x0ff0
# define FAST_CACHE_ALLOC_DELTA16 0x0008
# endif
# define FAST_CACHE_HAS_CUSTOM_DEALLOC_INITIATION (1<<12)
// class's instances requires raw isa
# define FAST_CACHE_REQUIRES_RAW_ISA (1<<13)
// class or superclass has default alloc/allocWithZone: implementation
// Note this is is stored in the metaclass.
# define FAST_CACHE_HAS_DEFAULT_AWZ (1<<14)
// class or superclass has default new/self/class/respondsToSelector/isKindOfClass
# define FAST_CACHE_HAS_DEFAULT_CORE (1<<15)
bool getBit(uint16_t flags) const {
return _flags & flags;
}
void setBit(uint16_t set) {
__c11_atomic_fetch_or((_Atomic(uint16_t) *)&_flags, set, __ATOMIC_RELAXED);
}
void clearBit(uint16_t clear) {
__c11_atomic_fetch_and((_Atomic(uint16_t) *)&_flags, ~clear, __ATOMIC_RELAXED);
}
#endif
#if FAST_CACHE_ALLOC_MASK
bool hasFastInstanceSize(size_t extra) const
{
if (__builtin_constant_p(extra) && extra == 0) {
return _flags & FAST_CACHE_ALLOC_MASK16;
}
return _flags & FAST_CACHE_ALLOC_MASK;
}
size_t fastInstanceSize(size_t extra) const
{
ASSERT(hasFastInstanceSize(extra));
if (__builtin_constant_p(extra) && extra == 0) {
return _flags & FAST_CACHE_ALLOC_MASK16;
} else {
size_t size = _flags & FAST_CACHE_ALLOC_MASK;
// remove the FAST_CACHE_ALLOC_DELTA16 that was added
// by setFastInstanceSize
return align16(size + extra - FAST_CACHE_ALLOC_DELTA16);
}
}
void setFastInstanceSize(size_t newSize)
{
// Set during realization or construction only. No locking needed.
uint16_t newBits = _flags & ~FAST_CACHE_ALLOC_MASK;
uint16_t sizeBits;
// Adding FAST_CACHE_ALLOC_DELTA16 allows for FAST_CACHE_ALLOC_MASK16
// to yield the proper 16byte aligned allocation size with a single mask
sizeBits = word_align(newSize) + FAST_CACHE_ALLOC_DELTA16;
sizeBits &= FAST_CACHE_ALLOC_MASK;
if (newSize <= sizeBits) {
newBits |= sizeBits;
}
_flags = newBits;
}
#else
bool hasFastInstanceSize(size_t extra) const {
return false;
}
size_t fastInstanceSize(size_t extra) const {
abort();
}
void setFastInstanceSize(size_t extra) {
// nothing
}
#endif
}
直接贴源码,从源码中查看;发现很多关于bucket的东西,类似setBucketsAndMask、emptyBuckets、allocateBuckets、emptyBucketsForCapacity,整个篇章就围绕bucket进行操作,所以刚才说的要分析重点也比较清晰了,就是_bucketsAndMaybeMask;
bucket_t
上面提到了,cache_t的重点是bucket_t,所以点开bucket_t查看其结构体内容;
不难发现,这里面就是我们一直想要寻找的sel、imp
;
经过以上分析,就可以得到一个比较清晰链路图了;
cache_t结构
尝试找出bucket_t,通过lldb直接取值,发现没办法打印Value没办法直接打印出来;
尝试用lldb找出bucket_t
这个时候只能继续查看源码了,看看有没有源码能够直接获取bucket_t的,在cache_t源码中发现这么一个API,直接调用看看结果;
struct bucket_t *buckets() const;
确实是打印出bucket_t对象了,但是没有清晰明了的显示我们想要sel跟imp;
image.png
同样的,查看bucket_t结构体,看看有没有获取直接获取sel跟imp的,功夫不负有心人了,成功在bucket_t中找到了对应的接口;
inline SEL sel() const { return _sel.load(memory_order_relaxed); }
inline IMP imp(UNUSED_WITHOUT_PTRAUTH bucket_t *base, Class cls) const {
uintptr_t imp = _imp.load(memory_order_relaxed);
if (!imp) return nil;
#if CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_PTRAUTH
SEL sel = _sel.load(memory_order_relaxed);
return (IMP)
ptrauth_auth_and_resign((const void *)imp,
ptrauth_key_process_dependent_code,
modifierForSEL(base, sel, cls),
ptrauth_key_function_pointer, 0);
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_ISA_XOR
return (IMP)(imp ^ (uintptr_t)cls);
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_NONE
return (IMP)imp;
#else
#error Unknown method cache IMP encoding.
#endif
}
通过我们找到的sel、imp相关API可以成功获取到我们之前调用的sayHello方法;
API获取sel、imp
通过index获取全部的内容,这里可以看出bucket_t的存储是不连续的,然后它又可以通过下标进行索引,所以这个存储方式是哈希结构的;
打印buckets
接下来就要探索一下bucket_t底层实现了;
脱离源码环境分析cache_t
将objc_class、class_data_bits_t、cache_t、bucket_t的成员变量都拷贝出来,新建新的结构体,利用强制类型转换的方式,打印出对应的标志位及cache._buckets;
#import <objc/runtime.h>
// https://developer.apple.com/videos/play/wwdc2020/10163/
typedef uint32_t mask_t;
struct yx_bucket_t {
SEL _sel;
IMP _imp;
};
// 项目运行 调试
struct yx_cache_t {
struct yx_bucket_t *_buckets;
mask_t _mask;
uint16_t _occupied;
uint16_t _flags;
};
struct yx_class_data_bits_t {
uintptr_t bits;
};
// cache_t objc_class
struct yx_objc_class {
Class ISA;
Class superclass;
struct yx_cache_t cache; // formerly cache pointer and vtable 16
struct yx_class_data_bits_t bits;
};
int main(int argc, const char * argv[]) {
@autoreleasepool {
LGPerson *p = [LGPerson alloc];
Class pClass = p.class;
[p say1];
// [p say2];
// [p say3];
// 增加缓存 -> cache_t 是否存在插入缓存的方法
struct yx_objc_class *yx_class = (__bridge struct yx_objc_class *)(pClass);
NSLog(@"%u-%u-%hu",yx_class->cache._mask,yx_class->cache._occupied,yx_class->cache._flags);
for (int i = 0 ; i < yx_class->cache._mask ; i++){
struct yx_bucket_t bucket = yx_class->cache._buckets[i];
NSLog(@"%@-%p - %d",NSStringFromSelector(bucket._sel),bucket._imp,i);
}
// 插入方法 -> 写 -> 读(cache 读取)
NSLog(@"Hello, World!");
}
return 0;
}
_mask是3,_occupied是1;
say1方法
_mask是3,_occupied变成了2;
调用say1、say2方法
_mask是7,_occupied是1;
调用say1、say2、say3方法
这个_mask跟_occupied是从哪里来的?
cache_t::insert
从cache_t插入方法进行分析;
第一步是occupied() + 1,占用方法加一,这也可以解释调用say2的时候,_occupied从1变成2的原因了;
第二步是判断有没有能够插入的对象,没有的话就使用reallocate进行开辟能够插入的对象空间;有的话就判断空间是否足够,是否需要扩容;
传入的capacity = INIT_CACHE_SIZE;INIT_CACHE_SIZE = 1 << 2 = 4;
所以第一次传入newCapacity = 4,在reallocate中有个方法setBucketsAndMask,newCapacity - 1 = 3;这里也可以解释为什么我们第一次调用say1方法的时候,_mask会是3了;
第三步就是对sel跟imp进行存储;
1、调用buckets()创建空桶子,调用cache_hash对sel跟mask进行哈希得到要存储 位置,对sel的值进行平移操作,再与上mask,所以3的空间就是012;
2、使用do-while循环判断buckets桶子里面对应的位置是否有值,没有值的话就调用set方法进行插入,同时调用incrementOccupied让_occupied++,这里才是真正的_occupied增加的原因,上面那个第一步occupied() + 1只是为了判断当前的空间是否达到需要扩容的临界点而已;如果有值的话,就再次hash,寻找下一个位置,循环往复;
问题:mask什么时候变成的7?为什么调用say3方法的时候,say1、say2都不见了?
- 看一下这个else方法里面的reallocate传入的 capacity * 2,做了两倍扩容,所以当我们内容一直增加到需要扩容的时候,第二次_mask就变成了 4 * 2 - 1 = 7;
- 因为setBucketsAndMask中有对_occupied = 0进行重置操作,所以一旦有扩容,_occupied就从0开始计算,所以调用say3的时候,_occupied就变成了1;
- 当扩容的时候,传入的第三个参数freeOld = true,
if (freeOld) {
collect_free(oldBuckets, oldCapacity);
}
这个时候,就会释放oldBuckets,所以调用say3的时候,之前存储的say1、say2就会不见了;
至于苹果为什么要把原来的内容干掉,而不是往后直接增加呢?或者把原来的内容拷贝到现在新空间呢?
- 第一个就是内存初始化的时候空间大小已经定好了,继续往后增加,有可能数据可能不连续;因为这个cache_t在类里面,而类的结构在堆内存,后面直接加的内存不一定连续;
- 第二个就是平时我们方法调用是很频繁的操作,当我们需要扩容的时候,每次都要把之前的缓存挨个平移、hash等一系列操作进行拷贝,这个时候就比较费时间了,所以苹果这里就是利用空间换时间的处理方式,直接把前面的抹掉;
void cache_t::insert(SEL sel, IMP imp, id receiver)
{
lockdebug::assert_locked(&runtimeLock);
// Never cache before +initialize is done
if (slowpath(!cls()->isInitialized())) {
return;
}
if (isConstantOptimizedCache()) {
_objc_fatal("cache_t::insert() called with a preoptimized cache for %s",
cls()->nameForLogging());
}
#if DEBUG_TASK_THREADS
return _collecting_in_critical();
#else
#if CONFIG_USE_CACHE_LOCK
mutex_locker_t lock(cacheUpdateLock);
#endif
ASSERT(sel != 0 && cls()->isInitialized());
// Use the cache as-is if until we exceed our expected fill ratio.
mask_t newOccupied = occupied() + 1;
unsigned oldCapacity = capacity(), capacity = oldCapacity;
if (slowpath(isConstantEmptyCache())) {
// Cache is read-only. Replace it.
if (!capacity) capacity = INIT_CACHE_SIZE;
reallocate(oldCapacity, capacity, /* freeOld */false);
}
else if (fastpath(newOccupied + CACHE_END_MARKER <= cache_fill_ratio(capacity))) {
// Cache is less than 3/4 or 7/8 full. Use it as-is.
}
#if CACHE_ALLOW_FULL_UTILIZATION
else if (capacity <= FULL_UTILIZATION_CACHE_SIZE && newOccupied + CACHE_END_MARKER <= capacity) {
// Allow 100% cache utilization for small buckets. Use it as-is.
}
#endif
else {
// 4 * 2 = 8 - 1 = 7
capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;
if (capacity > MAX_CACHE_SIZE) {
capacity = MAX_CACHE_SIZE;
}
reallocate(oldCapacity, capacity, true);
}
bucket_t *b = buckets(); // 空桶子
mask_t m = capacity - 1;
mask_t begin = cache_hash(sel, m);
mask_t i = begin;
// Scan for the first unused slot and insert there.
// There is guaranteed to be an empty slot.
do {
if (fastpath(b[i].sel() == 0)) {
incrementOccupied();
b[i].set<Atomic, Encoded>(b, sel, imp, cls());
return;
}
if (b[i].sel() == sel) {
// The entry was added to the cache by some other thread
// before we grabbed the cacheUpdateLock.
return;
}
} while (fastpath((i = cache_next(i, m)) != begin));
bad_cache(receiver, (SEL)sel);
#endif // !DEBUG_TASK_THREADS
}
ALWAYS_INLINE
void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld)
{
bucket_t *oldBuckets = buckets();
bucket_t *newBuckets = allocateBuckets(newCapacity);
// Cache's old contents are not propagated.
// This is thought to save cache memory at the cost of extra cache fills.
// fixme re-measure this
ASSERT(newCapacity > 0);
ASSERT((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);
setBucketsAndMask(newBuckets, newCapacity - 1);
if (freeOld) {
collect_free(oldBuckets, oldCapacity);
}
}
void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask)
{
uintptr_t buckets = (uintptr_t)newBuckets;
uintptr_t mask = (uintptr_t)newMask;
ASSERT(buckets <= bucketsMask);
ASSERT(mask <= maxMask);
_bucketsAndMaybeMask.store(((uintptr_t)newMask << maskShift) | (uintptr_t)newBuckets, memory_order_release);
_occupied = 0;
}
static inline mask_t cache_hash(SEL sel, mask_t mask)
{
uintptr_t value = (uintptr_t)sel;
#if CONFIG_USE_PREOPT_CACHES
value ^= value >> 7;
#endif
// 0110 1111 : 11 3 01: 1 10: 2 00: 0
// 0000 0011 - mask = 3
return (mask_t)(value & mask);
}
Cache_t原理分析图.png