深入分析 weak
写在前面
weak
的主要作用:用 weak
描述修饰或者所引用对象的计数器不会加一,并且会在引用的对象被释放的时候自动被设置为nil,很方便的避免野指针;也经常用于解决循环引用问题。
本篇文章主要是研究 weak
内部具体是怎么实现的。
编译过程
先从一个简单的例子开始
+ (void)test
{
id obj = [NSObject new];
__weak id weakObj = obj;
if (weakObj)
{
}
}
通过 clang
命令,查看 IR 中间代码 clang -S -fobjc-arc -emit-llvm ARCObject.m -o ARCObject.txt
define internal void @"\01+[ARCObject test]"(i8*, i8*) #0 {
%3 = alloca i8*, align 8
%4 = alloca i8*, align 8
%5 = alloca i8*, align 8
%6 = alloca i8*, align 8
store i8* %0, i8** %3, align 8
store i8* %1, i8** %4, align 8
%7 = load %struct._class_t*, %struct._class_t** @"OBJC_CLASSLIST_REFERENCES_$_", align 8
%8 = bitcast %struct._class_t* %7 to i8*
%9 = call i8* @objc_opt_new(i8* %8)
%10 = bitcast i8* %9 to %0*
%11 = bitcast %0* %10 to i8*
store i8* %11, i8** %5, align 8
%12 = load i8*, i8** %5, align 8
%13 = call i8* @llvm.objc.initWeak(i8** %6, i8* %12) #1
%14 = call i8* @llvm.objc.loadWeakRetained(i8** %6) #1
call void @llvm.objc.release(i8* %14) #1, !clang.imprecise_release !9
%15 = icmp ne i8* %14, null
br i1 %15, label %16, label %17
16: ; preds = %2
br label %17
17: ; preds = %16, %2
call void @llvm.objc.destroyWeak(i8** %6) #1
call void @llvm.objc.storeStrong(i8** %5, i8* null) #1
ret void
}
整理之后
+ (void)test
{
id obj = objc_msgSend(NSObject, @selector(new));
id weakObj;
objc_initWeak(&weakObj, obj);
id tmp = objc_loadWeakRetained(&weakObj);
if (tmp)
{
}
objc_relase(tmp);
objc_destroyWeak(&weakObj);
objc_storeStrong(obj, nil);
}
底层实现
根据上面的例子,可以依次查看实现
// 初始化入口部分
id objc_initWeak(id *location, id newObj)
{
if (!newObj) {
*location = nil;
return nil;
}
return storeWeak<DontHaveOld, DoHaveNew, DoCrashIfDeallocating>
(location, (objc_object*)newObj);
}
// 主动移除指针
void objc_destroyWeak(id *location)
{
(void)storeWeak<DoHaveOld, DontHaveNew, DontCrashIfDeallocating>
(location, nil);
}
// 根据 newObj 的地址,获取一个全局 hash 表,然后把新的指针 *location 追加进去
template <HaveOld haveOld, HaveNew haveNew, CrashIfDeallocating crashIfDeallocating>
static id storeWeak(id *location, objc_object *newObj)
{
Class previouslyInitializedClass = nil;
id oldObj;
SideTable *oldTable;
SideTable *newTable;
// Acquire locks for old and new values.
// Order by lock address to prevent lock ordering problems.
// Retry if the old value changes underneath us.
retry:
if (haveOld) {
oldObj = *location;
oldTable = &SideTables()[oldObj];
} else {
oldTable = nil;
}
if (haveNew) {
newTable = &SideTables()[newObj];
} else {
newTable = nil;
}
// 加锁防止多线程竞争
SideTable::lockTwo<haveOld, haveNew>(oldTable, newTable);
if (haveOld && *location != oldObj) {
SideTable::unlockTwo<haveOld, haveNew>(oldTable, newTable);
goto retry;
}
// Prevent a deadlock between the weak reference machinery
// and the +initialize machinery by ensuring that no
// weakly-referenced object has an un-+initialized isa.
// 防止多线程的情况下,出现 +initialized 还没调用的情况
if (haveNew && newObj) {
Class cls = newObj->getIsa();
if (cls != previouslyInitializedClass &&
!((objc_class *)cls)->isInitialized())
{
SideTable::unlockTwo<haveOld, haveNew>(oldTable, newTable);
_class_initialize(_class_getNonMetaClass(cls, (id)newObj));
// If this class is finished with +initialize then we're good.
// If this class is still running +initialize on this thread
// (i.e. +initialize called storeWeak on an instance of itself)
// then we may proceed but it will appear initializing and
// not yet initialized to the check above.
// Instead set previouslyInitializedClass to recognize it on retry.
previouslyInitializedClass = cls;
goto retry;
}
}
// Clean up old value, if any.
// 如果是覆盖赋值的时候,把当前指针,从老对象对应的表中
if (haveOld) {
weak_unregister_no_lock(&oldTable->weak_table, oldObj, location);
}
// Assign new value, if any.
// 把当前指针,添加到 newObj 对象对应的表中
if (haveNew) {
newObj = (objc_object *)
weak_register_no_lock(&newTable->weak_table, (id)newObj, location,
crashIfDeallocating);
// weak_register_no_lock returns nil if weak store should be rejected
// Set is-weakly-referenced bit in refcount table.
if (newObj && !newObj->isTaggedPointer()) {
newObj->setWeaklyReferenced_nolock();
}
// Do not set *location anywhere else. That would introduce a race.
*location = (id)newObj;
}
else {
// No new value. The storage is not changed.
}
SideTable::unlockTwo<haveOld, haveNew>(oldTable, newTable);
return (id)newObj;
}
// 获取 weak 指针,指向的对象
id objc_loadWeakRetained(id *location)
{
id obj;
id result;
Class cls;
SideTable *table;
retry:
// fixme std::atomic this load
obj = *location;
if (!obj) return nil;
if (obj->isTaggedPointer()) return obj;
table = &SideTables()[obj];
table->lock();
if (*location != obj) {
table->unlock();
goto retry;
}
result = obj;
cls = obj->ISA();
if (! cls->hasCustomRR()) {
// Fast case. We know +initialize is complete because
// default-RR can never be set before then.
assert(cls->isInitialized());
if (! obj->rootTryRetain()) {
result = nil;
}
}
else {
// Slow case. We must check for +initialize and call it outside
// the lock if necessary in order to avoid deadlocks.
if (cls->isInitialized() || _thisThreadIsInitializingClass(cls)) {
BOOL (*tryRetain)(id, SEL) = (BOOL(*)(id, SEL))
class_getMethodImplementation(cls, SEL_retainWeakReference);
if ((IMP)tryRetain == _objc_msgForward) {
result = nil;
}
else if (! (*tryRetain)(obj, SEL_retainWeakReference)) {
result = nil;
}
}
else {
table->unlock();
_class_initialize(cls);
goto retry;
}
}
table->unlock();
return result;
}

在不看具体实现细节的情况下,还是很好理解的,给一个 weak 指针赋值,其实就是把这个 weak 指针加到这个对象的 weak_table 中去。
实现细节
关于 SideTable
struct SideTable {
// 自旋锁
spinlock_t slock;
// 引用计算表
RefcountMap refcnts;
// weak指针表
weak_table_t weak_table;
// 构造函数
SideTable() {
memset(&weak_table, 0, sizeof(weak_table));
}
// 析构函数
~SideTable() {
_objc_fatal("Do not delete SideTable.");
}
void lock() { slock.lock(); }
void unlock() { slock.unlock(); }
void forceReset() { slock.forceReset(); }
// Address-ordered lock discipline for a pair of side tables.
// 模板函数
template<HaveOld, HaveNew>
static void lockTwo(SideTable *lock1, SideTable *lock2);
template<HaveOld, HaveNew>
static void unlockTwo(SideTable *lock1, SideTable *lock2);
};
// 对 weak_entry_t 列表的封装
struct weak_table_t {
weak_entry_t *weak_entries;
size_t num_entries;
uintptr_t mask;
uintptr_t max_hash_displacement;
};
// 一个列表
typedef objc_object ** weak_referrer_t;
struct weak_entry_t {
DisguisedPtr<objc_object> referent;
union {
struct {
weak_referrer_t *referrers;
// 位域为 2,uintptr_t 为无符号,所以 out_of_line_ness 最大值为二进制 11,十进制为 3
uintptr_t out_of_line_ness : 2;
// 位域为 62,num_refs 的最大值为 2^62 - 1
uintptr_t num_refs : PTR_MINUS_2;
uintptr_t mask;
uintptr_t max_hash_displacement;
};
struct {
// out_of_line_ness field is low bits of inline_referrers[1]
weak_referrer_t inline_referrers[WEAK_INLINE_COUNT];
};
};
bool out_of_line() {
return (out_of_line_ness == REFERRERS_OUT_OF_LINE);
}
weak_entry_t& operator=(const weak_entry_t& other) {
memcpy(this, &other, sizeof(other));
return *this;
}
weak_entry_t(objc_object *newReferent, objc_object **newReferrer)
: referent(newReferent)
{
inline_referrers[0] = newReferrer;
for (int i = 1; i < WEAK_INLINE_COUNT; i++) {
inline_referrers[i] = nil;
}
}
};
添加函数 weak_register_no_lock
// 在 weak_table 中,添加 referent_id 对象的 weak 指针 referrer_id
id weak_register_no_lock(weak_table_t *weak_table, id referent_id,
id *referrer_id, bool crashIfDeallocating)
{
objc_object *referent = (objc_object *)referent_id;
objc_object **referrer = (objc_object **)referrer_id;
if (!referent || referent->isTaggedPointer()) return referent_id;
// 确保要新指向的对象是合法的
bool deallocating;
if (!referent->ISA()->hasCustomRR()) {
deallocating = referent->rootIsDeallocating();
}
else {
BOOL (*allowsWeakReference)(objc_object *, SEL) =
(BOOL(*)(objc_object *, SEL))
object_getMethodImplementation((id)referent,
SEL_allowsWeakReference);
if ((IMP)allowsWeakReference == _objc_msgForward) {
return nil;
}
deallocating = !(*allowsWeakReference)(referent, SEL_allowsWeakReference);
}
if (deallocating) {
return nil;
}
// 把 weak 指针添加至列表中
weak_entry_t *entry;
if ((entry = weak_entry_for_referent(weak_table, referent))) {
// 追加一个 weak 指针
append_referrer(entry, referrer);
}
else {
// 初始化 new_entry
weak_entry_t new_entry(referent, referrer);
// 检测 weak_table 是否需要扩容
// 扩容条件 weak_table->num_entries >= TABLE_SIZE(weak_table) * 3 / 4
weak_grow_maybe(weak_table);
// 初始化 new_entry,插入 weak_table 中
weak_entry_insert(weak_table, &new_entry);
}
// Do not set *referrer. objc_storeWeak() requires that the
// value not change.
return referent_id;
}
// 根据对象地址,寻找对应的 weak 指针列表
static weak_entry_t *weak_entry_for_referent(weak_table_t *weak_table, objc_object *referent)
{
weak_entry_t *weak_entries = weak_table->weak_entries;
if (!weak_entries) return nil;
size_t begin = hash_pointer(referent) & weak_table->mask;
size_t index = begin;
size_t hash_displacement = 0;
while (weak_table->weak_entries[index].referent != referent) {
index = (index+1) & weak_table->mask;
if (index == begin) bad_weak_table(weak_table->weak_entries);
hash_displacement++;
if (hash_displacement > weak_table->max_hash_displacement) {
return nil;
}
}
return &weak_table->weak_entries[index];
}
// 把新的 weak 指针添加到 weak_entry_t 列表中
// 添加策略如下
// 1:对象的 weak 指针少于 4(WEAK_INLINE_COUNT) 个时候,添加至 inline_referrers 中
// 2:对象的 weak 指针多于 4 个的时候,则添加至 weak_referrer_t *referrers 列表中
static void append_referrer(weak_entry_t *entry, objc_object **new_referrer)
{
if (! entry->out_of_line()) {
// weak 指针少于 4 个尝试放入 inline_referrers 中
for (size_t i = 0; i < WEAK_INLINE_COUNT; i++) {
if (entry->inline_referrers[i] == nil) {
entry->inline_referrers[i] = new_referrer;
return;
}
}
// weak 指针初次超过 4 个,把 inline_referrers 列表中的数据,移到 weak_referrer_t *referrers 列表中
weak_referrer_t *new_referrers = (weak_referrer_t *)
calloc(WEAK_INLINE_COUNT, sizeof(weak_referrer_t));
for (size_t i = 0; i < WEAK_INLINE_COUNT; i++) {
new_referrers[i] = entry->inline_referrers[i];
}
entry->referrers = new_referrers;
entry->num_refs = WEAK_INLINE_COUNT;
entry->out_of_line_ness = REFERRERS_OUT_OF_LINE; // 标记之后 entry->out_of_line() 为 true
entry->mask = WEAK_INLINE_COUNT-1;
entry->max_hash_displacement = 0;
}
// weak 指针超过 4 个之后的添加逻辑
if (entry->num_refs >= TABLE_SIZE(entry) * 3/4) {
return grow_refs_and_insert(entry, new_referrer);
}
size_t begin = w_hash_pointer(new_referrer) & (entry->mask);
size_t index = begin;
size_t hash_displacement = 0;
while (entry->referrers[index] != nil) {
hash_displacement++;
index = (index+1) & entry->mask;
if (index == begin) bad_weak_table(entry);
}
if (hash_displacement > entry->max_hash_displacement) {
entry->max_hash_displacement = hash_displacement;
}
weak_referrer_t &ref = entry->referrers[index];
ref = new_referrer;
entry->num_refs++;
}
// weak_table 中添加 new_entry
static void weak_entry_insert(weak_table_t *weak_table, weak_entry_t *new_entry)
{
weak_entry_t *weak_entries = weak_table->weak_entries;
size_t begin = hash_pointer(new_entry->referent) & (weak_table->mask);
size_t index = begin;
size_t hash_displacement = 0;
while (weak_entries[index].referent != nil) {
index = (index+1) & weak_table->mask;
if (index == begin) bad_weak_table(weak_entries);
hash_displacement++;
}
weak_entries[index] = *new_entry;
weak_table->num_entries++;
if (hash_displacement > weak_table->max_hash_displacement) {
weak_table->max_hash_displacement = hash_displacement;
}
}
移除函数 weak_unregister_no_lock
// 在 weak_table 表中,删除 referent_id 对象的 weak 指针 referrer_id
void weak_unregister_no_lock(weak_table_t *weak_table, id referent_id, id *referrer_id)
{
objc_object *referent = (objc_object *)referent_id;
objc_object **referrer = (objc_object **)referrer_id;
weak_entry_t *entry;
if (!referent) return;
if ((entry = weak_entry_for_referent(weak_table, referent))) {
remove_referrer(entry, referrer);
bool empty = true;
if (entry->out_of_line() && entry->num_refs != 0) {
empty = false;
}
else {
for (size_t i = 0; i < WEAK_INLINE_COUNT; i++) {
if (entry->inline_referrers[i]) {
empty = false;
break;
}
}
}
// 如果 referent_id 对象的 weak 指针已经全部释放,则把 entry 列表也删除
if (empty) {
weak_entry_remove(weak_table, entry);
}
}
// Do not set *referrer = nil. objc_storeWeak() requires that the
// value not change.
}
// 从 entry 列表中删除 old_referrer 指针
static void remove_referrer(weak_entry_t *entry, objc_object **old_referrer)
{
if (! entry->out_of_line()) {
for (size_t i = 0; i < WEAK_INLINE_COUNT; i++) {
if (entry->inline_referrers[i] == old_referrer) {
entry->inline_referrers[i] = nil;
return;
}
}
objc_weak_error();
return;
}
size_t begin = w_hash_pointer(old_referrer) & (entry->mask);
size_t index = begin;
size_t hash_displacement = 0;
while (entry->referrers[index] != old_referrer) {
index = (index+1) & entry->mask;
if (index == begin) bad_weak_table(entry);
hash_displacement++;
if (hash_displacement > entry->max_hash_displacement) {
objc_weak_error();
return;
}
}
entry->referrers[index] = nil;
entry->num_refs--;
}
static void weak_entry_remove(weak_table_t *weak_table, weak_entry_t *entry)
{
// remove entry
if (entry->out_of_line()) free(entry->referrers);
bzero(entry, sizeof(*entry));
weak_table->num_entries--;
// 检测 weak_table 是否可以缩容
// 缩容条件 TABLE_SIZE(weak_table) >= 1024 && TABLE_SIZE(weak_table) / 16 >= weak_table->num_entries
weak_compact_maybe(weak_table);
}
对象销毁,weak指针设置为nil的过程
可以通过下面代码来分析
@implementation ARCObject
- (void)dealloc
{
NSLog(@"done");
}
+ (void)test
{
static __weak id weakObj;
{
id obj = [ARCObject new];
weakObj = obj;
}
NSLog(@"%@", weakObj);
}
@end
根据上面的分析,很容易可以想到会调用 weak_entry_remove
函数,通过断点来观察堆栈信息

在 dealloc
函数之后,开始清除对象的 weak 指针。接下来继续看源码
void _objc_rootDealloc(id obj)
{
obj->rootDealloc();
}
inline void objc_object::rootDealloc()
{
if (isTaggedPointer()) return; // fixme necessary?
if (fastpath(isa.nonpointer &&
!isa.weakly_referenced &&
!isa.has_assoc &&
!isa.has_cxx_dtor &&
!isa.has_sidetable_rc))
{
free(this);
} else {
object_dispose((id)this);
}
}
id object_dispose(id obj)
{
if (!obj) return nil;
objc_destructInstance(obj);
free(obj);
return nil;
}
void *objc_destructInstance(id obj)
{
if (obj) {
// Read all of the flags at once for performance.
bool cxx = obj->hasCxxDtor();
bool assoc = obj->hasAssociatedObjects();
// This order is important.
if (cxx) object_cxxDestruct(obj);
// 清除关联对象
if (assoc) _object_remove_assocations(obj);
// 清除 weak 指针
obj->clearDeallocating();
}
return obj;
}
inline void objc_object::clearDeallocating()
{
// nonpointer 表示是否对 isa 指针开启指针优化,1 表示优化,0 表示未优化
// arm64架构isa占64位,苹果为了优化性能,存储类对象地址只用了33位,剩下的位用来存储一些其它信息
if (slowpath(!isa.nonpointer)) {
// Slow path for raw pointer isa.
sidetable_clearDeallocating();
}
else if (slowpath(isa.weakly_referenced || isa.has_sidetable_rc)) {
// Slow path for non-pointer isa with weak refs and/or side table data.
clearDeallocating_slow();
}
}
NEVER_INLINE void objc_object::clearDeallocating_slow()
{
SideTable& table = SideTables()[this];
table.lock();
if (isa.weakly_referenced) {
weak_clear_no_lock(&table.weak_table, (id)this);
}
if (isa.has_sidetable_rc) {
table.refcnts.erase(this);
}
table.unlock();
}
通过上面的中转,移除的关键函数如下
void weak_clear_no_lock(weak_table_t *weak_table, id referent_id)
{
objc_object *referent = (objc_object *)referent_id;
weak_entry_t *entry = weak_entry_for_referent(weak_table, referent);
if (entry == nil) {
/// XXX shouldn't happen, but does with mismatched CF/objc
//printf("XXX no entry for clear deallocating %p\n", referent);
return;
}
// zero out references
weak_referrer_t *referrers;
size_t count;
if (entry->out_of_line()) {
referrers = entry->referrers;
count = TABLE_SIZE(entry);
}
else {
referrers = entry->inline_referrers;
count = WEAK_INLINE_COUNT;
}
// 把 referent_id 对象的全部 weak 指针,指向的值置为 nil
for (size_t i = 0; i < count; ++i) {
objc_object **referrer = referrers[i];
if (referrer) {
if (*referrer == referent) {
*referrer = nil;
}
else if (*referrer) {
objc_weak_error();
}
}
}
weak_entry_remove(weak_table, entry);
}
思考
Q:SideTable 是通过对象地址获取的,也就是说,每个对象都对应一个 SideTable,那为什么 weak_table_t 里面又要重复的根据对象地址获取 weak_entry_t 列表?
A:目的是解决 hash 冲突。
先看 SideTable
的获取方式
SideTable *newTable = &SideTables()[newObj];
// reinterpret_cast,是 C++ 里的强制类型转换符
// 使用格式:reinterpret_cast<type-id> (expression)
// 它可以把一个指针转换成一个整数,也可以把一个整数转换成一个指针
// 先把一个指针转换成一个整数,再把该整数转换成原类型的指针,还可以得到原先的指针值
static StripedMap<SideTable>& SideTables() {
return *reinterpret_cast<StripedMap<SideTable>*>(SideTableBuf);
}
enum { CacheLineSize = 64 };
template<typename T>
class StripedMap {
enum { StripeCount = 64 };
struct PaddedT {
T value alignas(CacheLineSize);
};
PaddedT array[StripeCount];
static unsigned int indexForPointer(const void *p) {
uintptr_t addr = reinterpret_cast<uintptr_t>(p);
return ((addr >> 4) ^ (addr >> 9)) % StripeCount;
}
public:
T& operator[] (const void *p) {
return array[indexForPointer(p)].value;
}
const T& operator[] (const void *p) const {
return const_cast<StripedMap<T>>(this)[p];
}
//...
};
通过 indexForPointer
这个函数可以知道,每个对象的地址 addr
,都会映射为一个小于 StripeCount 值的索引值,这里就会出现一个问题,有可能多个对象的 addr
映射成同一个索引值。
也就是说 SideTable *newTable = &SideTables()[newObj];
不是一一对应的,会出现多个 newObj
对应一个 newTable
。
有了上面的基础,接下来,继续看根据对象地址在 weak_table_t
中寻找 weak_entry_t
,重点看 weak_entry_insert
和 weak_entry_for_referent
函数
static void weak_entry_insert(weak_table_t *weak_table, weak_entry_t *new_entry)
{
weak_entry_t *weak_entries = weak_table->weak_entries;
// 根据对象地址 hash 一个整数,同时设置为起始位置,既然是 hash 自然也有可能出现冲突
// hash_displacement 就是来记录,多个对象地址 hash 到同一个值的数量
size_t begin = hash_pointer(new_entry->referent) & (weak_table->mask);
size_t index = begin;
size_t hash_displacement = 0;
while (weak_entries[index].referent != nil) {
index = (index+1) & weak_table->mask;
if (index == begin) bad_weak_table(weak_entries);
hash_displacement++;
}
weak_entries[index] = *new_entry;
weak_table->num_entries++;
if (hash_displacement > weak_table->max_hash_displacement) {
weak_table->max_hash_displacement = hash_displacement;
}
}
static weak_entry_t *weak_entry_for_referent(weak_table_t *weak_table, objc_object *referent)
{
weak_entry_t *weak_entries = weak_table->weak_entries;
if (!weak_entries) return nil;
// 读取的时候,也采用插入时的 hash 机制
size_t begin = hash_pointer(referent) & weak_table->mask;
size_t index = begin;
size_t hash_displacement = 0;
while (weak_table->weak_entries[index].referent != referent) {
index = (index+1) & weak_table->mask;
if (index == begin) bad_weak_table(weak_table->weak_entries);
hash_displacement++;
if (hash_displacement > weak_table->max_hash_displacement) {
return nil;
}
}
return &weak_table->weak_entries[index];
}
// 根据对象地址 hash 一个整数
static inline uintptr_t hash_pointer(objc_object *key) {
return ptr_hash((uintptr_t)key);
}
static inline uint32_t ptr_hash(uint64_t key) {
key ^= key >> 4;
key *= 0x8a970be7488fda55;
key ^= __builtin_bswap64(key);
return (uint32_t)key;
}
小结
- 通过对象地址 hash 得到的
SideTable
,可能出现多个对象对应同一个SideTable
- 在
SideTable->weak_entries
中寻找weak_entry_t
时,也是通过对象地址 hash 得到在weak_entries
列表中的起始索引值,因为有可能存在多个对象,所以也有可能出现多个起始值一样的情况,max_hash_displacement
为 hash 最大冲突值。