CMSCollector 之 二
在上一篇中讲解了由VMThread执行的前台GC和由CMSThread执行的后台GC的整体逻辑,从本篇开始就逐步讲解每个GC步骤的实现细节。
1、checkpointRootsInitial
checkpointRootsInitial方法用于处理CMS GC的第一个步骤,InitialMarking,其并行执行的实现都封装在CMSParInitialMarkTask中,此方法只是做必要的检查和准备工作,其实现如下:
void CMSCollector::checkpointRootsInitial(bool asynch) {
assert(_collectorState == InitialMarking, "Wrong collector state");
//校验当前线程是否正确
check_correct_thread_executing();
TraceCMSMemoryManagerStats tms(_collectorState,GenCollectedHeap::heap()->gc_cause());
//保存堆内存和元空间使用情况
save_heap_summary();
//_gc_tracer_cm记录上面保存的内存使用情况
report_heap_summary(GCWhen::BeforeGC);
ReferenceProcessor* rp = ref_processor();
SpecializationStats::clear();
assert(_restart_addr == NULL, "Control point invariant");
if (asynch) {
//如果是异步的,即后台GC
//获取bitMapLock锁
MutexLockerEx x(bitMapLock(),
Mutex::_no_safepoint_check_flag);
checkpointRootsInitialWork(asynch);
//允许查找Reference实例,两个参数是判断是否校验ReferenceProcessor状态的
rp->enable_discovery(true /*verify_disabled*/, true /*check_no_refs*/);
_collectorState = Marking;
} else {
//校验ReferenceProcessor的属性是否正确
assert(!rp->discovery_is_atomic(),
"incorrect setting of discovery predicate");
assert(!rp->discovery_enabled(), "genCollectedHeap shouldn't control "
"ref discovery for this generation kind");
// already have locks
checkpointRootsInitialWork(asynch);
//允许查找Reference实例
rp->enable_discovery(true /*verify_disabled*/, false /*verify_no_refs*/);
_collectorState = Marking;
}
SpecializationStats::print();
}
#ifndef PRODUCT
void CMSCollector::check_correct_thread_executing() {
Thread* t = Thread::current();
//只能是VMThread 或者 CMS thread
assert(t->is_ConcurrentGC_thread() || t->is_VM_thread(),
"Unexpected thread type");
//_foregroundGCShouldWait为true,则只能是CMS Thread
if (_foregroundGCShouldWait) {
// We cannot be the VM thread
assert(t->is_ConcurrentGC_thread(),
"Should be CMS thread");
} else {
if (t->is_ConcurrentGC_thread()) {
///_foregroundGCShouldWait为false,且是CMS Thread,则状态必须是这两种
assert(_collectorState == InitialMarking ||
_collectorState == FinalMarking,
"Should be a stop-world phase");
// The CMS thread should be holding the CMS_token.
assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
"Potential interference with concurrently "
"executing VM thread");
}
}
}
#endif
void CMSCollector::report_heap_summary(GCWhen::Type when) {
_gc_tracer_cm->report_gc_heap_summary(when, _last_heap_summary);
_gc_tracer_cm->report_metaspace_summary(when, _last_metaspace_summary);
}
void CMSCollector::checkpointRootsInitialWork(bool asynch) {
//校验当前JVM处于安全点
assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
assert(_collectorState == InitialMarking, "just checking");
//校验已经获取了bitMapLock锁
assert_lock_strong(bitMapLock());
//校验markBitMap已清空
assert(_markBitMap.isAllClear(), "was reset at end of previous cycle");
//设置verifying属性和root_scanning_option属性
setup_cms_unloading_and_verification_state();
if (UseAdaptiveSizePolicy) {
//通知InitialWork开始
size_policy()->checkpoint_roots_initial_begin();
}
//CMSPLABRecordAlways默认为true,表示总是记录survivor区PLAB的边界
if (_survivor_plab_array != NULL && !CMSPLABRecordAlways) {
reset_survivor_plab_arrays();
}
ResourceMark rm;
HandleMark hm;
MarkRefsIntoClosure notOlder(_span, &_markBitMap);
GenCollectedHeap* gch = GenCollectedHeap::heap();
//校验markStack和_overflow_list是空的
verify_work_stacks_empty();
//校验_preserved_mark_stack和_preserved_oop_stack是空的
verify_overflow_empty();
//遍历所有JavaThread,让其TLAB不能被用于分配对象
gch->ensure_parsability(false); // fill TLABs, but no need to retire them
//更新各代包含的Space的save_mark_word
gch->save_marks();
//设置enqueuing_is_done属性为false,为true表示Reference实例已经查找并处理完毕
ref_processor()->set_enqueuing_is_done(false);
//另外保存所有新创建的ClassLoaderData
ClassLoaderDataGraph::remember_new_clds(true);
//清除所有ClassLoaderData的claimed标识,该标识表示该ClassLoaderData已经遍历过了
ClassLoaderDataGraph::clear_claimed_marks();
if (CMSPrintEdenSurvivorChunks) {
print_eden_and_survivor_chunk_arrays();
}
{
COMPILER2_PRESENT(DerivedPointerTableDeactivate dpt_deact;)
//CMSParallelInitialMarkEnabled默认为true,表示是否允许并行的InitialMark
if (CMSParallelInitialMarkEnabled && CollectedHeap::use_parallel_gc_threads()) {
// The parallel version.
FlexibleWorkGang* workers = gch->workers();
assert(workers != NULL, "Need parallel worker threads.");
int n_workers = workers->active_workers();
CMSParInitialMarkTask tsk(this, n_workers);
gch->set_par_threads(n_workers);
//计算年轻代三个区并行遍历所需的任务数
initialize_sequential_subtasks_for_young_gen_rescan(n_workers);
if (n_workers > 1) {
GenCollectedHeap::StrongRootsScope srs(gch);
//并发执行CMSParInitialMarkTask任务
workers->run_task(&tsk);
} else {
GenCollectedHeap::StrongRootsScope srs(gch);
tsk.work(0);
}
gch->set_par_threads(0);
} else {
//串行执行逻辑
CLDToOopClosure cld_closure(¬Older, true);
gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
gch->gen_process_roots(_cmsGen->level(),
true, // younger gens are roots
true, // activate StrongRootsScope
GenCollectedHeap::ScanningOption(roots_scanning_options()),
should_unload_classes(),
¬Older,
NULL,
&cld_closure);
}
}
assert(_modUnionTable.isAllClear(),
"Was cleared in most recent final checkpoint phase"
" or no bits are set in the gc_prologue before the start of the next "
"subsequent marking phase.");
assert(_ct->klass_rem_set()->mod_union_is_clear(), "Must be");
//保存cmsSpace的_sweep_limit属性
save_sweep_limits();
if (UseAdaptiveSizePolicy) {
size_policy()->checkpoint_roots_initial_end(gch->gc_cause());
}
verify_overflow_empty();
}
//设置verifying属性和root_scanning_option属性
void CMSCollector::setup_cms_unloading_and_verification_state() {
//这些verify选项默认配置下都是false
const bool should_verify = VerifyBeforeGC || VerifyAfterGC || VerifyDuringGC
|| VerifyBeforeExit;
const int rso = GenCollectedHeap::SO_AllCodeCache;
// We set the proper root for this CMS cycle here.
if (should_unload_classes()) { // Should unload classes this cycle
//如果需要卸载类,则移除标识SO_AllCodeCache
remove_root_scanning_option(rso); // Shrink the root set appropriately
set_verifying(should_verify); // Set verification state for this cycle
return; // Nothing else needs to be done at this time
}
assert(!should_unload_classes(), "Inconsitency!");
//不需要卸载类,则添加标识SO_AllCodeCache
add_root_scanning_option(rso);
if ((!verifying() || unloaded_classes_last_cycle()) && should_verify) {
set_verifying(true);
} else if (verifying() && !should_verify) {
set_verifying(false);
remove_root_scanning_option(rso);
}
}
bool should_unload_classes() const {
return _should_unload_classes;
}
void remove_root_scanning_option(int o) { _roots_scanning_options &= ~o; }
void set_verifying(bool v) { _verifying = v; }
void add_root_scanning_option(int o) { _roots_scanning_options |= o; }
bool verifying() const { return _verifying; }
bool CMSCollector::overflow_list_is_empty() const {
assert(_num_par_pushes >= 0, "Inconsistency");
if (_overflow_list == NULL) {
assert(_num_par_pushes == 0, "Inconsistency");
}
return _overflow_list == NULL;
}
void CMSCollector::verify_work_stacks_empty() const {
assert(_markStack.isEmpty(), "Marking stack should be empty");
assert(overflow_list_is_empty(), "Overflow list should be empty");
}
void CMSCollector::verify_overflow_empty() const {
assert(overflow_list_is_empty(), "Overflow list should be empty");
assert(no_preserved_marks(), "No preserved marks");
}
bool CMSCollector::no_preserved_marks() const {
return _preserved_mark_stack.is_empty() && _preserved_oop_stack.is_empty();
}
//初始化年轻代三个区的par_seq_tasks属性
void
CMSCollector::
initialize_sequential_subtasks_for_young_gen_rescan(int n_threads) {
assert(n_threads > 0, "Unexpected n_threads argument");
DefNewGeneration* dng = (DefNewGeneration*)_young_gen;
// Eden space
if (!dng->eden()->is_empty()) {
SequentialSubTasksDone* pst = dng->eden()->par_seq_tasks();
assert(!pst->valid(), "Clobbering existing data?");
//_eden_chunk_array中[0, _eden_chunk_index)之间的每个元素都代表一个任务
size_t n_tasks = _eden_chunk_index + 1;
assert(n_tasks == 1 || _eden_chunk_array != NULL, "Error");
//n_threads表示线程的个数
pst->set_n_threads(n_threads);
//n_tasks表示任务的个数
pst->set_n_tasks((int)n_tasks);
}
//合并survivor_plab_array到_survivor_chunk_array中
if (_survivor_plab_array != NULL) {
merge_survivor_plab_arrays(dng->from(), n_threads);
} else {
assert(_survivor_chunk_index == 0, "Error");
}
// To space
{
SequentialSubTasksDone* pst = dng->to()->par_seq_tasks();
assert(!pst->valid(), "Clobbering existing data?");
pst->set_n_threads(n_threads);
pst->set_n_tasks(1);
assert(pst->valid(), "Error");
}
// From space
{
SequentialSubTasksDone* pst = dng->from()->par_seq_tasks();
assert(!pst->valid(), "Clobbering existing data?");
//_survivor_chunk_array中0到_survivor_chunk_index之间的每个元素都是一个任务
size_t n_tasks = _survivor_chunk_index + 1;
assert(n_tasks == 1 || _survivor_chunk_array != NULL, "Error");
pst->set_n_threads(n_threads);
pst->set_n_tasks((int)n_tasks);
assert(pst->valid(), "Error");
}
}
//此方法并不是命名上的合并,而是将_survivor_plab_array中的ChunkArray保存的地址按照从低到高排序,并将
//排序后的结果放到_survivor_chunk_array中保存
void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv,
int no_of_gc_threads) {
assert(_survivor_plab_array != NULL, "Error");
assert(_survivor_chunk_array != NULL, "Error");
assert(_collectorState == FinalMarking ||
(CMSParallelInitialMarkEnabled && _collectorState == InitialMarking), "Error");
//把_cursor数组元素初始化成0
for (int j = 0; j < no_of_gc_threads; j++) {
_cursor[j] = 0;
}
HeapWord* top = surv->top();
size_t i;
for (i = 0; i < _survivor_chunk_capacity; i++) { // all sca entries
HeapWord* min_val = top; // Higher than any PLAB address
uint min_tid = 0; // position of min_val this round
//遍历_survivor_plab_array,找到end属性最小的一个元素
for (int j = 0; j < no_of_gc_threads; j++) {
//注意ChunkArray本身也是一个数组,里面存放的地址是按低地址到高地址排序的
ChunkArray* cur_sca = &_survivor_plab_array[j];
if (_cursor[j] == cur_sca->end()) {
//如果等于,说明ChunkArray中的元素都比过了,可以忽略
continue;
}
assert(_cursor[j] < cur_sca->end(), "ctl pt invariant");
//初始状态下cursor所有元素的的值都是0,即都是取ChunkArray的第0个地址比大小
//min_val是最小的end属性,min_tid是end属性最小的元素的索引
HeapWord* cur_val = cur_sca->nth(_cursor[j]);
assert(surv->used_region().contains(cur_val), "Out of bounds value");
if (cur_val < min_val) {
min_tid = j;
min_val = cur_val;
} else {
assert(cur_val < top, "All recorded addresses should be less");
}
}
if (min_val == top) {
break;
}
//第一次循环时,i=0时,_survivor_chunk_array的0号索引就是所有ChunkArray中第0个元素的最小值
_survivor_chunk_array[i] = min_val;
//假如min_tid等于2,则下一次遍历时是用其他ChunkArray的第0个元素和索引为2的ChunkArray的第一个元素比较取最小值
_cursor[min_tid]++;
}
// We are all done; record the size of the _survivor_chunk_array
_survivor_chunk_index = i; // exclusive: [0, i)
if (PrintCMSStatistics > 0) {
gclog_or_tty->print(" (Survivor:" SIZE_FORMAT "chunks) ", i);
}
}
inline void CMSCollector::save_sweep_limits() {
_cmsGen->save_sweep_limit();
}
上述代码中的StrongRootsScope的定义如下,第二个参数active默认为true,用于调整_strong_roots_parity属性,其实现如下:
image.png
2、CMSParMarkTask
CMSParMarkTask是CMSParInitialMarkTask的父类,其类继承关系如下:
其中CMSParRemarkTask用来处理CMS GC的FinalMarking步骤的,其定义如下:
image.png重点关注work_on_young_gen_roots的实现,如下:
void CMSParMarkTask::work_on_young_gen_roots(uint worker_id, OopsInGenClosure* cl) {
DefNewGeneration* dng = _collector->_young_gen->as_DefNewGeneration();
EdenSpace* eden_space = dng->eden();
ContiguousSpace* from_space = dng->from();
ContiguousSpace* to_space = dng->to();
HeapWord** eca = _collector->_eden_chunk_array;
size_t ect = _collector->_eden_chunk_index;
HeapWord** sca = _collector->_survivor_chunk_array;
size_t sct = _collector->_survivor_chunk_index;
assert(ect <= _collector->_eden_chunk_capacity, "out of bounds");
assert(sct <= _collector->_survivor_chunk_capacity, "out of bounds");
//分别扫描三个区,注意三个区是顺序执行的,并不是并行的
do_young_space_rescan(worker_id, cl, to_space, NULL, 0);
do_young_space_rescan(worker_id, cl, from_space, sca, sct);
do_young_space_rescan(worker_id, cl, eden_space, eca, ect);
}
void CMSParMarkTask::do_young_space_rescan(uint worker_id,
OopsInGenClosure* cl, ContiguousSpace* space,
HeapWord** chunk_array, size_t chunk_top) {
ResourceMark rm;
HandleMark hm;
SequentialSubTasksDone* pst = space->par_seq_tasks();
uint nth_task = 0;
uint n_tasks = pst->n_tasks();
if (n_tasks > 0) {
assert(pst->valid(), "Uninitialized use?");
HeapWord *start, *end;
//注意is_task_claimed方法传入的参数是引用,该方法会改变nth_task的值
//nth_task的每一个值代表一个执行任务,while循环直到所有的任务都处理完成
while (!pst->is_task_claimed(/* reference */ nth_task)) {
// We claimed task # nth_task; compute its boundaries.
if (chunk_top == 0) { //没有采样,直接遍历整个space区域
assert(nth_task == 0 && n_tasks == 1, "Can have only 1 EdenSpace task");
start = space->bottom();
end = space->top();
} else if (nth_task == 0) {
//第一个任务,start取bottom地址
start = space->bottom();
end = chunk_array[nth_task];
} else if (nth_task < (uint)chunk_top) {
assert(nth_task >= 1, "Control point invariant");
//中间的任务,取chunk_array相邻的两个元素作为起始地址
start = chunk_array[nth_task - 1];
end = chunk_array[nth_task];
} else {
assert(nth_task == (uint)chunk_top, "Control point invariant");
//最后一个任务,end取top地址
start = chunk_array[chunk_top - 1];
end = space->top();
}
MemRegion mr(start, end);
//校验mr位于已使用内存区域中
assert(mr.is_empty() || space->used_region().contains(mr),
"Should be in space");
//校验start是一个oop地址
assert(mr.is_empty() || oop(mr.start())->is_oop(),
"Should be an oop");
//遍历这个区域中的对象所引用的其他对象
space->par_oop_iterate(mr, cl);
}
//增加_n_completed计数,表示当前线程执行完成
pst->all_tasks_completed();
}
}
bool SequentialSubTasksDone::is_task_claimed(uint& t) {
uint* n_claimed_ptr = &_n_claimed;
//获取当前值
t = *n_claimed_ptr;
while (t < _n_tasks) {
//将_n_claimed原子的加1,如果成功则返回加1前的值
jint res = Atomic::cmpxchg(t+1, n_claimed_ptr, t);
if (res == (jint)t) {
//res等于原来的值,说明修改成功,返回false,即res对应的任务未声明过
return false;
}
t = *n_claimed_ptr;
}
return true;
}
bool SequentialSubTasksDone::all_tasks_completed() {
uint* n_completed_ptr = &_n_completed;
uint complete = *n_completed_ptr;
while (true) {
//不断循环将complete原子加1,如果成功则返回
uint res = Atomic::cmpxchg(complete+1, n_completed_ptr, complete);
if (res == complete) {
break;
}
complete = res;
}
if (complete+1 == _n_threads) {
//所有任务完成了,则将各属性置为0
clear();
return true;
}
return false;
}
void SequentialSubTasksDone::clear() {
_n_tasks = _n_claimed = 0;
_n_threads = _n_completed = 0;
}
该方法会根据_eden_chunk_array和_survivor_chunk_array中保存的对象分配时采集的当时的top地址,将eden区和from区分成若干个子区域,每个子区域对应一个遍历任务,遍历这个区域所有对象所引用的其他对象,这些对象被引用了所以肯定是存活的。所有并行GC线程会通过is_task_claimed方法抢占某个遍历任务,直到所有的遍历任务执行完成才会退出,标记当前线程执行完成。所有并行GC线程执行完成就会唤醒在run_task方法等待的CMSThread线程,由线程继续执行剩余的GC逻辑。
3、CMSParInitialMarkTask
CMSParInitialMarkTask的定义如下:
重点关注其work方法的实现,如下:
void CMSParInitialMarkTask::work(uint worker_id) {
elapsedTimer _timer;
ResourceMark rm;
HandleMark hm;
// ---------- scan from roots --------------
//开始计时
_timer.start();
GenCollectedHeap* gch = GenCollectedHeap::heap();
//注意传入的span是老年代对应的span
Par_MarkRefsIntoClosure par_mri_cl(_collector->_span, &(_collector->_markBitMap));
// ---------- young gen roots --------------
{
//处理年轻代的所有对象的引用类型属性oop,如果该oop是老年代的,则将其在BitMap中打标
work_on_young_gen_roots(worker_id, &par_mri_cl);
//停止计时
_timer.stop();
if (PrintCMSStatistics != 0) {
gclog_or_tty->print_cr(
"Finished young gen initial mark scan work in %dth thread: %3.3f sec",
worker_id, _timer.seconds());
}
}
// ---------- remaining roots --------------
//重置计时器
_timer.reset();
_timer.start();
CLDToOopClosure cld_closure(&par_mri_cl, true);
//遍历根节点,如果根节点是老年代的,则将其打标
gch->gen_process_roots(_collector->_cmsGen->level(),
false, //年轻代在上面已经处理过了
false, //前面已经调用了StrongRootsScope的构造方法
GenCollectedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
_collector->should_unload_classes(),
&par_mri_cl,
NULL,
&cld_closure);
assert(_collector->should_unload_classes()
|| (_collector->CMSCollector::roots_scanning_options() & GenCollectedHeap::SO_AllCodeCache),
"if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
_timer.stop();
if (PrintCMSStatistics != 0) {
gclog_or_tty->print_cr(
"Finished remaining root initial mark scan work in %dth thread: %3.3f sec",
worker_id, _timer.seconds());
}
}
注意上述方法是并行执行的,JVM中的线程池跟Java中的线程池的实现完全不同,Java中的线程池是提交任务到一个任务队列中,由线程池的线程不断从任务队列中获取待执行的任务并执行,如果没有待执行的任务则阻塞等待;JVM中的线程池是通过锁的唤醒让所有线程同时执行某个任务,这个任务执行完成就休眠了,等待下一次唤醒,参考WorkGang::run_task和GangWorker::loop方法的实现,WorkGang相当于线程池,负责各线程间的同步和状态管理,GangWorker就是负责执行具体任务的单个线程,loop方法就是其run方法的实现。因此JVM的并发执行任务在设计时需要自行判断某个任务是否已经执行,即实现任务抢占的功能,如上一节中的is_task_claimed方法,修改共享属性时需要原子修改。
4、Par_MarkRefsIntoClosure
Par_MarkRefsIntoClosure用于将包含在指定span中的oop在bitMap中打标,其定义如下:
DO_OOP_WORK_DEFN宏用于将do_oop_work的调用转化成对do_oop的调用,其定义如下:
image.png其实现如下:
image.png5、CLDToOopClosure
CLDToOopClosure用于遍历ClassLoaderData的,其定义如下:
do_cld的实现如下:
void CLDToOopClosure::do_cld(ClassLoaderData* cld) {
cld->oops_do(_oop_closure, &_klass_closure, _must_claim_cld);
}
void ClassLoaderData::oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim) {
//claim用于判断这个ClassLoaderData有没有被遍历过,如果被遍历过返回false
if (must_claim && !claim()) {
return;
}
//遍历oop
f->do_oop(&_class_loader);
_dependencies.oops_do(f);
_handles.oops_do(f);
if (klass_closure != NULL) {
//遍历klass
classes_do(klass_closure);
}
}
bool ClassLoaderData::claim() {
if (_claimed == 1) {
return false;
}
//没有被遍历过,则应该是初始的0,将其原子的修改为1,如果返回0说明修改成功
//如果返回其他值则说明有其他线程在遍历这个ClassLoaderData,依然返回false
return (int) Atomic::cmpxchg(1, &_claimed, 0) == 0;
}
其中负责klass遍历的KlassToOopClosure定义如下:
image.png
do_klass的实现如下:
void KlassToOopClosure::do_klass(Klass* k) {
assert(_oop_closure != NULL, "Not initialized?");
k->oops_do(_oop_closure);
}
void Klass::oops_do(OopClosure* cl) {
//即遍历保存类静态属性的Class实例,该实例引用的静态属性可能指向其他实例
cl->do_oop(&_java_mirror);
}
6、InitialMarking 总结
InitialMarking就是第一遍的打标,要求JVM处于安全点,即STW的状态,年轻代和根节点的打标是分开处理的,前者用于找到年轻代对象持有的老年代对象的引用,后者主要用于找到线程执行栈帧中包含的老年代对象的引用。年轻代是基于当前使用内存区域, 根据_eden_chunk_array和_survivor_chunk_array中保存的在分配对象内存时采集的当时的top地址,将eden区和from区分成若干个子区域,每个子区域对应一个遍历任务,遍历这个区域所有对象所引用的其他对象,这些对象被引用了所以肯定是存活的,如果这些对象是老年代的则将这些对象的地址在bitMap中打标即可。根节点的对象遍历逻辑封装在gen_process_roots方法中,其对遍历到的对象的处理和年轻代是一样的,除此之外还多了一步对klass的遍历,实际就是遍历klass对应的类Class实例所引用的对象,因为类的静态属性就是由类Class实例维护的。
7、markFromRoots
markFromRoots用于处理CMS GC的第一个步骤,Marking,其并行执行的核心逻辑封装在CMSConcMarkingTask中,其实现如下:
bool CMSCollector::markFromRoots(bool asynch) {
//校验GC状态
assert(_collectorState == Marking, "inconsistent state?");
//校验调用线程是否正确
check_correct_thread_executing();
//校验overflow_list等是空的
verify_overflow_empty();
bool res;
if (asynch) {
//如果是异步GC,即CMS Thread执行的GC
if (UseAdaptiveSizePolicy) {
//通知concurrent_marking开始
size_policy()->concurrent_marking_begin();
}
//获取CMS Token和bitMapLock锁
CMSTokenSyncWithLocks ts(true, bitMapLock());
//TraceCPUTime和CMSPhaseAccounting都是打印GC日志使用的
TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
CMSPhaseAccounting pa(this, "mark", _gc_tracer_cm->gc_id(), !PrintGCDetails);
res = markFromRootsWork(asynch);
if (res) {
//如果执行成功将状态置为Precleaning
_collectorState = Precleaning;
} else {
//因为前台GC想要接管GC,所以返回false,状态不改变
assert(_foregroundGCIsActive, "internal state inconsistency");
assert(_restart_addr == NULL, "foreground will restart from scratch");
if (PrintGCDetails) {
gclog_or_tty->print_cr("bailing out to foreground collection");
}
}
if (UseAdaptiveSizePolicy) {
//通知concurrent_marking结束
size_policy()->concurrent_marking_end();
}
} else {
//校验在安全点上
assert(SafepointSynchronize::is_at_safepoint(),
"inconsistent with asynch == false");
if (UseAdaptiveSizePolicy) {
size_policy()->ms_collection_marking_begin();
}
//前台GC执行时已经获取了所有的锁
res = markFromRootsWork(asynch);
//执行成功将状态置为FinalMarking
_collectorState = FinalMarking;
if (UseAdaptiveSizePolicy) {
GenCollectedHeap* gch = GenCollectedHeap::heap();
size_policy()->ms_collection_marking_end(gch->gc_cause());
}
}
verify_overflow_empty();
return res;
}
bool CMSCollector::markFromRootsWork(bool asynch) {
//校验获取了bitMapLock锁
assert_lock_strong(bitMapLock());
verify_work_stacks_empty();
verify_overflow_empty();
bool result = false;
//CMSConcurrentMTEnabled默认为true
if (CMSConcurrentMTEnabled && ConcGCThreads > 0) {
//并发执行
result = do_marking_mt(asynch);
} else {
//单线程执行
result = do_marking_st(asynch);
}
return result;
}
bool CMSCollector::do_marking_mt(bool asynch) {
assert(ConcGCThreads > 0 && conc_workers() != NULL, "precondition");
//计算需要的并行线程数
int num_workers = AdaptiveSizePolicy::calc_active_conc_workers(
conc_workers()->total_workers(),
conc_workers()->active_workers(),
Threads::number_of_non_daemon_threads());
conc_workers()->set_active_workers(num_workers);
CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace();
CMSConcMarkingTask tsk(this,
cms_space,
asynch,
conc_workers(),
task_queues());
//初始化cmsSpace的_conc_par_seq_tasks属性
cms_space ->initialize_sequential_subtasks_for_marking(num_workers);
//校验ref_processor的属性
assert(!ref_processor()->discovery_is_atomic(), "Should be non-atomic");
assert(ref_processor()->discovery_is_mt(), "Discovery should be MT");
//开始并发执行任务,调用此方法的CMS Thread会被阻塞等待该任务并行执行完成或者要求CMSThread yield而暂停处理
conc_workers()->start_task(&tsk);
while (tsk.yielded()) {
//如果是因为需要yield而退出的,则调用coordinator_yield方法让出当前CMS Thread的执行权限
tsk.coordinator_yield();
//yeild结束,继续执行,直到执行完成或者再次被中止
conc_workers()->continue_task(&tsk);
}
//要么任务执行完成,要么任务被yeild了,如果遍历过程中出现stack_overflow则_restart_addr不为空
assert(tsk.completed() || _restart_addr != NULL, "Inconsistency");
//注意此处是一个while循环,不断从_restart_addr处重新遍历,直到_restart_addr为NULL
while (_restart_addr != NULL) {
//CMSAbortSemantics表示是否实现了abort-on-overflow的语义,默认为false
assert(!CMSAbortSemantics || tsk.aborted(), "Inconsistency");
if (_foregroundGCIsActive && asynch) {
//如果前台GC激活了,且当前方法是后台GC调用的,则将_restart_addr置为NULL,前台GC自己重新处理
_restart_addr = NULL;
return false;
}
//设置重新开始执行的遍历地址,此时设置的是tsk本身的_restart_addr
tsk.reset(_restart_addr);
//重新初始化
cms_space ->initialize_sequential_subtasks_for_marking(num_workers,
_restart_addr);
_restart_addr = NULL;
//重新开始执行
conc_workers()->start_task(&tsk);
while (tsk.yielded()) {
tsk.coordinator_yield();
conc_workers()->continue_task(&tsk);
}
}
//校验任务执行完成
assert(tsk.completed(), "Inconsistency");
assert(tsk.result() == true, "Inconsistency");
return true;
}
bool CMSCollector::do_marking_st(bool asynch) {
ResourceMark rm;
HandleMark hm;
//临时将discovery_is_mt属性置为false
ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(ref_processor(), false);
MarkFromRootsClosure markFromRootsClosure(this, _span, &_markBitMap,
&_markStack, CMSYield && asynch);
//执行bitMap遍历
_markBitMap.iterate(&markFromRootsClosure);
// If _restart_addr is non-NULL, a marking stack overflow
// occurred; we need to do a fresh iteration from the
// indicated restart address.
//_restart_addr为空说明遍历用到的marking stack满了,需要清除然后从_restart_addr处重新开始遍历
while (_restart_addr != NULL) {
if (_foregroundGCIsActive && asynch) {
//如果前台GC激活了,且当前方法是后台GC调用的,则将_restart_addr置为NULL,前台GC自己重新处理
_restart_addr = NULL;
return false; // indicating failure to complete marking
}
HeapWord* ra = _restart_addr;
//重置,从ra处重新开始遍历
markFromRootsClosure.reset(ra);
_restart_addr = NULL;
_markBitMap.iterate(&markFromRootsClosure, ra, _span.end());
}
return true;
}
OopTaskQueueSet* task_queues() { return _task_queues; }
YieldingFlexibleWorkGang* conc_workers() { return _conc_workers; }
//low默认为NULL
void CompactibleFreeListSpace::
initialize_sequential_subtasks_for_marking(int n_threads,
HeapWord* low) {
assert(n_threads > 0, "Unexpected n_threads argument");
//获取task_size,并校验其必须大于card_size_in_words,且是card_size_in_words的整数倍
const size_t task_size = marking_task_size();
assert(task_size > CardTableModRefBS::card_size_in_words &&
(task_size % CardTableModRefBS::card_size_in_words == 0),
"Otherwise arithmetic below would be incorrect");
MemRegion span = _gen->reserved();
if (low != NULL) {
if (span.contains(low)) {
//内存对齐
HeapWord* aligned_low = (HeapWord*)align_size_down((uintptr_t)low,
CardTableModRefBS::card_size);
//取两个MemRegion的交集,因为aligned_low在span内,两者的终止地址一样,所以实际就是取aligned_low
//到end之间的区域
span = span.intersection(MemRegion(aligned_low, span.end()));
} else if (low > span.end()) {
//low大于end,在span外,则span为空的MemRegion
span = MemRegion(low, low); // Null region
} // else use entire span
}
assert(span.is_empty() ||
((uintptr_t)span.start() % CardTableModRefBS::card_size == 0),
"span should start at a card boundary");
//计算需要的任务数
size_t n_tasks = (span.word_size() + task_size - 1)/task_size;
//校验n_tasks是否正确
assert((n_tasks == 0) == span.is_empty(), "Inconsistency");
assert(n_tasks == 0 ||
((span.start() + (n_tasks - 1)*task_size < span.end()) &&
(span.start() + n_tasks*task_size >= span.end())),
"n_tasks calculation incorrect");
SequentialSubTasksDone* pst = conc_par_seq_tasks();
assert(!pst->valid(), "Clobbering existing data?");
//初始化SequentialSubTasksDone
pst->set_n_threads(n_threads);
pst->set_n_tasks((int)n_tasks);
}
const size_t marking_task_size() const { return _marking_task_size; }
SequentialSubTasksDone* conc_par_seq_tasks() {return &_conc_par_seq_tasks; }
注意markFromRoots方法不要求在安全点上执行,且其遍历的区域只限于老年代。注意CMSConcMarkingTask在执行过程中,如果是异步GC,即CMSThread执行的GC,且其他线程要求CMS Thread执行yield,且同步GC,即VMThread执行的GC没有激活的时候,CMSConcMarkingTask会执行yield动作,在yeild结束后再重新开始执行;另外,如果遍历过程中临时保存待遍历的oop的CMSMarkStack满了,则restart_addr不会为NULL,CMSConcMarkingTask会不断的从restart_addr处开始遍历,直到restart_addr为NULL为止。
7、CMSConcMarkingTask
CMSConcMarkingTask继承自YieldingFlexibleGangTask,新增的属性如下:
- CMSCollector* _collector; //关联的CMSCollector
- int _n_workers; // requested/desired # workers
- bool _asynch; //是否异步GC
- bool _result;
- CompactibleFreeListSpace* _cms_space; //关联的CMS Space
- char _pad_front[64]; //_pad_front和_pad_back都是为了保证_global_finger不跟其他属性在同一个缓存中
- HeapWord* _global_finger; //_global_finger是多线程下用来表示全局的正在遍历的内存区域的终止地址
- char _pad_back[64];
- HeapWord* _restart_addr; //任务重新开始执行的起始地址,注意跟CMSCollector中的_restart_addr不一样
- Mutex* const _bit_map_lock; // BitMap的锁
- OopTaskQueueSet* _task_queues; //OopTaskQueue集合
- CMSConcMarkingTerminator _term; //_term和_term_term用于判断是否需要暂停当前线程的执行
- CMSConcMarkingTerminatorTerminator _term_term;
重点关注以下方法的实现
7.1、work
该方法是CMSConcMarkingTask执行的具体任务,其实现如下:
public:
CMSConcMarkingTask(CMSCollector* collector,
CompactibleFreeListSpace* cms_space,
bool asynch,
YieldingFlexibleWorkGang* workers,
OopTaskQueueSet* task_queues):
YieldingFlexibleGangTask("Concurrent marking done multi-threaded"),
_collector(collector),
_cms_space(cms_space),
_asynch(asynch), _n_workers(0), _result(true),
_task_queues(task_queues),
_term(_n_workers, task_queues, _collector),
_bit_map_lock(collector->bitMapLock())
{
_requested_size = _n_workers;
_term.set_task(this);
_term_term.set_task(this);
_restart_addr = _global_finger = _cms_space->bottom();
}
void CMSConcMarkingTask::work(uint worker_id) {
elapsedTimer _timer;
ResourceMark rm;
HandleMark hm;
//初始状态下是空的
assert(work_queue(worker_id)->size() == 0, "Expected to be empty");
// Scan the bitmap covering _cms_space, tracing through grey objects.
//开启计时器
_timer.start();
do_scan_and_mark(worker_id, _cms_space);
_timer.stop();
if (PrintCMSStatistics != 0) {
gclog_or_tty->print_cr("Finished cms space scanning in %dth thread: %3.3f sec",
worker_id, _timer.seconds());
}
//重置计时器
_timer.reset();
_timer.start();
do_work_steal(worker_id);
_timer.stop();
if (PrintCMSStatistics != 0) {
gclog_or_tty->print_cr("Finished work stealing in %dth thread: %3.3f sec",
worker_id, _timer.seconds());
}
assert(_collector->_markStack.isEmpty(), "Should have been emptied");
assert(work_queue(worker_id)->size() == 0, "Should have been emptied");
assert(_global_finger >= _cms_space->end(),
"All tasks have been completed");
DEBUG_ONLY(_collector->verify_overflow_empty();)
}
OopTaskQueueSet* task_queues() { return _task_queues; }
OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); }
void CMSConcMarkingTask::do_scan_and_mark(int i, CompactibleFreeListSpace* sp) {
SequentialSubTasksDone* pst = sp->conc_par_seq_tasks();
int n_tasks = pst->n_tasks();
//校验pst正确初始化
assert(pst->valid() || n_tasks == 0, "Uninitialized use?");
uint nth_task = 0;
HeapWord* aligned_start = sp->bottom();
//_restart_addr默认值是bottom,再stack overflow的情形下就会被reset成CMSCollector保存到的_restart_addr,即从该地址处重新开始遍历
if (sp->used_region().contains(_restart_addr)) {
//将_restart_addr按照card_size向下做内存对齐
aligned_start =
(HeapWord*)align_size_down((uintptr_t)_restart_addr,
CardTableModRefBS::card_size);
}
size_t chunk_size = sp->marking_task_size();
//is_task_claimed通过循环找到一个未被处理的任务,nth_task的每个值都对应一个并行线程,一个并行任务
while (!pst->is_task_claimed(/* reference */ nth_task)) {
//计算nth_task对应的内存区域,因为aligned_start是按照card_size对齐的,chunk_size是card_size的整数倍,所以span的起始地址都是按照card_size对齐的
MemRegion span = MemRegion(aligned_start + nth_task*chunk_size,
aligned_start + (nth_task+1)*chunk_size);
//原子的修改_global_finger属性为finger
HeapWord* finger = span.end();
bump_global_finger(finger); // atomically
//同已使用的内存区域取交集,确保span的终止地址不会超过top地址
span = span.intersection(sp->used_region());
if (!span.is_empty()) { //如果span非空
HeapWord* prev_obj;
assert(!span.contains(_restart_addr) || nth_task == 0,
"Inconsistency");
if (nth_task == 0) {
//第0个任务,认为_restart_addr或者span的start地址就是对象的起始地址
if (span.contains(_restart_addr)) {
// In the case of a restart because of stack overflow,
// we might additionally skip a chunk prefix.
prev_obj = _restart_addr;
} else {
prev_obj = span.start();
}
} else {
//获取start地址对应的内存块的起始地址
prev_obj = sp->block_start_careful(span.start());
// Below we use a variant of block_size that uses the
// Printezis bits to avoid waiting for allocated
// objects to become initialized/parsable.
while (prev_obj < span.start()) {
//说明span的start地址位于一个对象的内部,需要跳过这个对象
//获取这个对象的大小
size_t sz = sp->block_size_no_stall(prev_obj, _collector);
if (sz > 0) {
prev_obj += sz;
} else {
//不可能走到此分支
break;
}
}
}
if (prev_obj < span.end()) {
//遍历从prev_obj到end的内存区域
MemRegion my_span = MemRegion(prev_obj, span.end());
Par_MarkFromRootsClosure cl(this, _collector, my_span,
&_collector->_markBitMap,
work_queue(i),
&_collector->_markStack,
_asynch);
_collector->_markBitMap.iterate(&cl, my_span.start(), my_span.end());
} // else nothing to do for this task
} // else nothing to do for this task
}
//表示当前线程执行任务完成
pst->all_tasks_completed();
}
void CMSConcMarkingTask::bump_global_finger(HeapWord* f) {
HeapWord* read = _global_finger;
HeapWord* cur = read;
while (f > read) {
cur = read;
//原子的修改_global_finger为f
read = (HeapWord*) Atomic::cmpxchg_ptr(f, &_global_finger, cur);
if (cur == read) {
//修改成功,_global_finger应该大于等于f
assert(_global_finger >= f, "protocol consistency");
break;
}
}
}
void CMSConcMarkingTask::do_work_steal(int i) {
OopTaskQueue* work_q = work_queue(i);
oop obj_to_scan;
CMSBitMap* bm = &(_collector->_markBitMap);
CMSMarkStack* ovflw = &(_collector->_markStack);
int* seed = _collector->hash_seed(i);
Par_ConcMarkingClosure cl(_collector, this, work_q, bm, ovflw);
while (true) {
//遍历work_q中的所有元素
cl.trim_queue(0);
//遍历完成,work_q应该是空的
assert(work_q->size() == 0, "Should have been emptied above");
if (get_work_from_overflow_stack(ovflw, work_q)) {
// 如果从overflow_stack中获取待遍历的oop成功,如果失败说明overflow_stack是空的
continue;
} else if (task_queues()->steal(i, seed, /* reference */ obj_to_scan)) {
//如果从其他线程的OopTaskQueue中偷了一个待处理的oop
assert(obj_to_scan->is_oop(), "Should be an oop");
assert(bm->isMarked((HeapWord*)obj_to_scan), "Grey object");
obj_to_scan->oop_iterate(&cl);
} else if (terminator()->offer_termination(&_term_term)) {
//如果需要终止执行
assert(work_q->size() == 0, "Impossible!");
break;
} else if (yielding() || should_yield()) {
//如果是CMS Thread调用,且需要yield,则让出当前线程执行权限,将线程状态置为yield
yield();
}
}
}
//从ovflw_stk取出若干个元素放入work_q中,如果取出的数量大于0则返回true
bool CMSConcMarkingTask::get_work_from_overflow_stack(CMSMarkStack* ovflw_stk,
OopTaskQueue* work_q) {
//ovflw_stk为空则返回false
if (ovflw_stk->length() == 0) {
return false;
}
//校验OopTaskQueue队列是空的
assert(work_q->size() == 0, "Shouldn't steal");
//获取锁par_lock
MutexLockerEx ml(ovflw_stk->par_lock(),
Mutex::_no_safepoint_check_flag);
//取work_q剩余空间的4分之一,不超过20,不超过overflow_stack的元素个数
//ParGCDesiredObjsFromOverflowList的默认值是20,表示从overflow_stack中取出的元素的个数
size_t num = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
(size_t)ParGCDesiredObjsFromOverflowList);
num = MIN2(num, ovflw_stk->length());
//从overflow_stack取出若干个元素放入work_q中
for (int i = (int) num; i > 0; i--) {
oop cur = ovflw_stk->pop();
assert(cur != NULL, "Counted wrong?");
work_q->push(cur);
}
return num > 0;
}
//当调用了Task的yield方法时,状态就会变更成YIELDING
bool yielding() const { return _status == YIELDING; }
bool should_yield() {
//此处就是判断是否需要yield的核心了,如果是同步GC,则永远不会yield,如果是异步GC
//当其他线程要求CMSThread yield且前台GC未激活则会yeild,如果前台GC激活了则优先完成当前后台GC的步骤然后将任务执行权让给前台GC
return ConcurrentMarkSweepThread::should_yield()
&& !_collector->foregroundGCIsActive()
&& _asynch;
}
上述代码中_restart_addr的修改是在Closure遍历的时候修改的,参考其调用链,如下:
image.png
注意work方法是多个GC线程并行执行的,对单个GC线程而言整体上分为两步,第一步do_scan_and_mark,该方法会从cmsSpace的bottom(第一次执行时)或者restart_addr(出现stack overflow的情形)处开始,按照cmsSpace的marking_task_size 将cmsSpace分割成若干个子区域,每个子区域对应一个遍历任务,遍历这个子区域在bitMap内对应的所有打标的bit位,即遍历在第一遍打标时被标记成存活的对象。第二步do_work_steal,当前线程do_scan_and_mark已经执行完成了,其他的GC线程有可能没有,则在do_work_steal方法中会从共享的_markStack或者其他GC线程对应的work_queue中偷出来还未处理完的oop来处理,即帮助其他的GC线程完成任务,从而加速整体上标记任务的完成。
7.2 coordinator_yield / reset
coordinator_yield用于让出CMS Thread线程的执行权限,reset用于重置_restart_addr等属性,让CMSConcMarkingTask可以重新执行,其实现如下:
void CMSConcMarkingTask::coordinator_yield() {
assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
"CMS thread should hold CMS token");
//释放锁_bit_map_lock
assert_lock_strong(_bit_map_lock);
_bit_map_lock->unlock();
//释放CMS Token
ConcurrentMarkSweepThread::desynchronize(true);
//减少计数器
ConcurrentMarkSweepThread::acknowledge_yield_request();
//停止计时器
_collector->stopTimer();
if (PrintCMSStatistics != 0) {
_collector->incrementYields();
}
//如果开启ICMS模式,停止CMS Thread
_collector->icms_wait();
for (unsigned i = 0; i < CMSCoordinatorYieldSleepCount &&
ConcurrentMarkSweepThread::should_yield() &&
!CMSCollector::foregroundGCIsActive(); ++i) {
//非ICMS模式下,通过不断sleep让出CPU权限
os::sleep(Thread::current(), 1, false);
ConcurrentMarkSweepThread::acknowledge_yield_request();
}
//重新获取锁,启动计时器
ConcurrentMarkSweepThread::synchronize(true);
_bit_map_lock->lock_without_safepoint_check();
_collector->startTimer();
}
void reset(HeapWord* ra) {
assert(_global_finger >= _cms_space->end(), "Postcondition of ::work(i)");
_restart_addr = _global_finger = ra;
_term.reset_for_reuse();
}