systrace实现原理
systrace是通过atrace和ftrace一起实现。
抓取systrace的adb命令如下:
adb shell atrace -t 8 -z gfx view wm am sched freq input > atrace
python命令:
python systrace.py -b 10240 -t 10 wm am input ss power view freq workq sched idle sync gfx view hal dalvik disk -a com.tencent.mm -o PD1982_weixin.html
systrace抓取实质是通过atrace实现,下面以ATRACE_CALL()为例说明systrace实现
void SurfaceFlinger::handleMessageRefresh() {
ATRACE_CALL();
...
}
1、Native层实现
system/core/libutils/include/utils/Trace.h
#define _PASTE(x, y) x ## y
#define PASTE(x, y) _PASTE(x,y)
#define ATRACE_NAME(name) android::ScopedTrace PASTE(___tracer, __LINE__) (ATRACE_TAG, name)
// ATRACE_CALL is an ATRACE_NAME that uses the current function name.
#define ATRACE_CALL() ATRACE_NAME(__FUNCTION__)
namespace android {
class ScopedTrace {
public:
inline ScopedTrace(uint64_t tag, const char* name) : mTag(tag) {
atrace_begin(mTag, name);
}
inline ~ScopedTrace() {
atrace_end(mTag);
}
private:
uint64_t mTag;
};
system/core/libcutils/include/cutils/trace.h
static inline void atrace_begin(uint64_t tag, const char* name)
{
if (CC_UNLIKELY(atrace_is_tag_enabled(tag))) {
void atrace_begin_body(const char*);
atrace_begin_body(name);
}
}
static inline void atrace_end(uint64_t tag)
{
if (CC_UNLIKELY(atrace_is_tag_enabled(tag))) {
void atrace_end_body();
atrace_end_body();
}
}
system/core/libcutils/trace-dev.cpp
void atrace_begin_body(const char* name)
{
WRITE_MSG("B|%d|", "%s", name, "");
}
void atrace_end_body()
{
WRITE_MSG("E|%d", "%s", "", "");
}
system/core/libcutils/trace-dev.inc
#define WRITE_MSG(format_begin, format_end, name, value) { \
char buf[ATRACE_MESSAGE_LENGTH]; \
int pid = getpid(); \
int len = snprintf(buf, sizeof(buf), format_begin "%s" format_end, pid, \
name, value); \
if (len >= (int) sizeof(buf)) { \
/* Given the sizeof(buf), and all of the current format buffers, \
* it is impossible for name_len to be < 0 if len >= sizeof(buf). */ \
int name_len = strlen(name) - (len - sizeof(buf)) - 1; \
/* Truncate the name to make the message fit. */ \
ALOGW("Truncated name in %s: %s\n", __FUNCTION__, name); \
len = snprintf(buf, sizeof(buf), format_begin "%.*s" format_end, pid, \
name_len, name, value); \
} \
write(atrace_marker_fd, buf, len); \
}
到这可以看出:
ATRACE_CALL其实就是往atrace_marker_fd写入函数名和进程pid等信息,其中atrace_marker_fd对应“/sys/kernel/debug/tracing/trace_marker”文件
2、内核层实现
systrace在内核层实质是通过ftrace来实现,systrace的内容是写入内核分配的ringbuffer里面的,开关的实质是disable/enable ringbuffer,ftrace的总开关/sys/kernel/debug/tracing/tracing_on
开关ftrace命令如下:
adb shell echo 1 > /sys/kernel/debug/tracing/tracing_on
adb shell echo 0 > /sys/kernel/debug/tracing/tracing_on
通过tracing_mark_write函数把内容写入ringbuffer
msm-4.19/kernel/trace/trace.c
static ssize_t
tracing_mark_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *fpos)
{
...
buffer = tr->trace_buffer.buffer;
event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
irq_flags, preempt_count());
// 如果没有打开systrace的开关,这个地方就会返回,不会写入ringbuffer
if (unlikely(!event))
/* Ring buffer disabled, return as if not open for write */
return -EBADF;
entry = ring_buffer_event_data(event);
trace_entry = (struct trace_entry *)entry;
entry->ip = trace_entry->pid;
len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
if (len) {
memcpy(&entry->buf, faulted, FAULTED_SIZE);
cnt = FAULTED_SIZE;
written = -EFAULT;
} else
written = cnt;
len = cnt;
if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
/* do not add \n before testing triggers, but add \0 */
entry->buf[cnt] = '\0';
tt = event_triggers_call(tr->trace_marker_file, entry, event);
}
if (entry->buf[cnt - 1] != '\n') {
entry->buf[cnt] = '\n';
entry->buf[cnt + 1] = '\0';
stm_log(OST_ENTITY_TRACE_MARKER, entry, sizeof(*entry)+cnt + 2);
} else {
entry->buf[cnt] = '\0';
stm_log(OST_ENTITY_TRACE_MARKER, entry, sizeof(*entry)+cnt + 1);
}
entry->ip = _THIS_IP_;
__buffer_unlock_commit(buffer, event);
if (tt)
event_triggers_post_call(tr->trace_marker_file, tt);
...
}
3、ftrace实现内核函数追踪研究
大致原理参考下图:
ftrace就是function trace的缩写,每个函数的追踪都有一个对应的tracepoint结构来表示,这个结构存放在特殊的section内存中。
msm-4.19/include/linux/tracepoint-defs.h
struct tracepoint {
const char *name; /* Tracepoint name */
struct static_key key;
int (*regfunc)(void);
void (*unregfunc)(void);
struct tracepoint_func __rcu *funcs;
};
3.1 tracepoint的probe函数注册
以高通gpu驱动ftrace为例,通过以下命令可以触发tracepoint的probe函数注册
adb shell echo 1 > /sys/kernel/debug/tracing/events/kgsl/enable
msm-4.19/kernel/tracepoint.c
/*
* Add the probe function to a tracepoint.
*/
static int tracepoint_add_func(struct tracepoint *tp,
struct tracepoint_func *func, int prio)
{
...
/*
* rcu_assign_pointer has as smp_store_release() which makes sure
* that the new probe callbacks array is consistent before setting
* a pointer to it. This array is referenced by __DO_TRACE from
* include/linux/tracepoint.h using rcu_dereference_sched().
*/
rcu_assign_pointer(tp->funcs, tp_funcs);
if (!static_key_enabled(&tp->key))
static_key_slow_inc(&tp->key);
release_probes(old);
return 0;
}
tracepoint的probe函数实质是在下面这个宏里面定义,通过这个函数ftrace往对应的ringbuffer里面写入函数追踪数据
msm-4.19/include/trace/trace_events.h
static notrace void \
trace_event_raw_event_##call(void *__data, proto) \
{ \
struct trace_event_file *trace_file = __data; \
struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\
struct trace_event_buffer fbuffer; \
struct trace_event_raw_##call *entry; \
int __data_size; \
\
if (trace_trigger_soft_disabled(trace_file)) \
return; \
\
__data_size = trace_event_get_offsets_##call(&__data_offsets, args); \
\
entry = trace_event_buffer_reserve(&fbuffer, trace_file, \
sizeof(*entry) + __data_size); \
\
if (!entry) \
return; \
\
tstruct \
\
{ assign; } \
\
trace_event_buffer_commit(&fbuffer, \
sizeof(*entry) + __data_size); \
}
3.2 probe函数的调用
以高通gpu驱动入列一条绘制命令_queue_drawobj函数为例
msm-4.19/drivers/gpu/msm/adreno_dispatch.c
static void _queue_drawobj(struct adreno_context *drawctxt,
struct kgsl_drawobj *drawobj)
{
/* Put the command into the queue */
drawctxt->drawqueue[drawctxt->drawqueue_tail] = drawobj;
drawctxt->drawqueue_tail = (drawctxt->drawqueue_tail + 1) %
ADRENO_CONTEXT_DRAWQUEUE_SIZE;
drawctxt->queued++;
trace_adreno_cmdbatch_queued(drawobj, drawctxt->queued);
}
通过trace_adreno_cmdbatch_queued来追踪_queue_drawobj函数,调用tracepoint的probe函数
接下来看下trace_adreno_cmdbatch_queued的实现
msm-4.19/drivers/gpu/msm/adreno_trace.h
TRACE_EVENT(adreno_cmdbatch_queued,
TP_PROTO(struct kgsl_drawobj *drawobj, unsigned int queued),
TP_ARGS(drawobj, queued),
TP_STRUCT__entry(
__field(unsigned int, id)
__field(unsigned int, timestamp)
__field(unsigned int, queued)
__field(unsigned int, flags)
__field(unsigned int, prio)
),
TP_fast_assign(
__entry->id = drawobj->context->id;
__entry->timestamp = drawobj->timestamp;
__entry->queued = queued;
__entry->flags = drawobj->flags;
__entry->prio = drawobj->context->priority;
),
TP_printk(
"ctx=%u ctx_prio=%u ts=%u queued=%u flags=%s",
__entry->id, __entry->prio,
__entry->timestamp, __entry->queued,
__entry->flags ? __print_flags(__entry->flags, "|",
KGSL_DRAWOBJ_FLAGS) : "none"
)
);
trace_adreno_cmdbatch_queued真正定义在下面这个宏里面
msm-4.19/include/linux/tracepoint.h
#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
extern struct tracepoint __tracepoint_##name; \
// 这里就是trace_adreno_cmdbatch_queued定义
static inline void trace_##name(proto) \
{ \
if (static_key_false(&__tracepoint_##name.key)) \
__DO_TRACE(&__tracepoint_##name, \
TP_PROTO(data_proto), \
TP_ARGS(data_args), \
TP_CONDITION(cond), 0); \
if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \
rcu_read_lock_sched_notrace(); \
rcu_dereference_sched(__tracepoint_##name.funcs);\
rcu_read_unlock_sched_notrace(); \
} \
} \
最终调用到tracepoint注册的probe函数
msm-4.19/include/linux/tracepoint.h
#define __DO_TRACE(tp, proto, args, cond, rcuidle) \
do { \
struct tracepoint_func *it_func_ptr; \
void *it_func; \
void *__data; \
int __maybe_unused __idx = 0; \
\
if (!(cond)) \
return; \
\
/* srcu can't be used from NMI */ \
WARN_ON_ONCE(rcuidle && in_nmi()); \
\
/* keep srcu and sched-rcu usage consistent */ \
preempt_disable_notrace(); \
\
/* \
* For rcuidle callers, use srcu since sched-rcu \
* doesn't work from the idle path. \
*/ \
if (rcuidle) { \
__idx = srcu_read_lock_notrace(&tracepoint_srcu);\
rcu_irq_enter_irqson(); \
} \
\
it_func_ptr = rcu_dereference_raw((tp)->funcs); \
\
if (it_func_ptr) { \
do { \
it_func = (it_func_ptr)->func; \
__data = (it_func_ptr)->data; \
((void(*)(proto))(it_func))(args); \
} while ((++it_func_ptr)->func); \
} \
\
if (rcuidle) { \
rcu_irq_exit_irqson(); \
srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
} \
\
preempt_enable_notrace(); \
} while (0)
总结下:Native层调用内核的tracing_mark_write来往ftrace的ringbuffer里面写入数据,而内核函数调用其对应probe函数往ftrace ringbuffer写入数据。