systrace实现原理

2019-12-27  本文已影响0人  牛逼人物888

systrace是通过atrace和ftrace一起实现。

抓取systrace的adb命令如下:
adb shell atrace -t 8 -z gfx view wm am sched freq input > atrace

python命令:
python systrace.py -b 10240 -t 10 wm am input ss power view freq workq sched idle sync gfx view hal dalvik disk -a com.tencent.mm -o PD1982_weixin.html

systrace抓取实质是通过atrace实现,下面以ATRACE_CALL()为例说明systrace实现

void SurfaceFlinger::handleMessageRefresh() {
    ATRACE_CALL();
...
}
1、Native层实现

system/core/libutils/include/utils/Trace.h

#define _PASTE(x, y) x ## y
#define PASTE(x, y) _PASTE(x,y)
#define ATRACE_NAME(name) android::ScopedTrace PASTE(___tracer, __LINE__) (ATRACE_TAG, name)

// ATRACE_CALL is an ATRACE_NAME that uses the current function name.
#define ATRACE_CALL() ATRACE_NAME(__FUNCTION__)

namespace android {

class ScopedTrace {
public:
    inline ScopedTrace(uint64_t tag, const char* name) : mTag(tag) {
        atrace_begin(mTag, name);
    }

    inline ~ScopedTrace() {
        atrace_end(mTag);
    }

private:
    uint64_t mTag;
};

system/core/libcutils/include/cutils/trace.h

static inline void atrace_begin(uint64_t tag, const char* name)
{
    if (CC_UNLIKELY(atrace_is_tag_enabled(tag))) {
        void atrace_begin_body(const char*);
        atrace_begin_body(name);
    }
}

static inline void atrace_end(uint64_t tag)
{
    if (CC_UNLIKELY(atrace_is_tag_enabled(tag))) {
        void atrace_end_body();
        atrace_end_body();
    }
}

system/core/libcutils/trace-dev.cpp

void atrace_begin_body(const char* name)
{
    WRITE_MSG("B|%d|", "%s", name, "");
}

void atrace_end_body()
{
    WRITE_MSG("E|%d", "%s", "", "");
}

system/core/libcutils/trace-dev.inc

#define WRITE_MSG(format_begin, format_end, name, value) { \
    char buf[ATRACE_MESSAGE_LENGTH]; \
    int pid = getpid(); \
    int len = snprintf(buf, sizeof(buf), format_begin "%s" format_end, pid, \
        name, value); \
    if (len >= (int) sizeof(buf)) { \
        /* Given the sizeof(buf), and all of the current format buffers, \
         * it is impossible for name_len to be < 0 if len >= sizeof(buf). */ \
        int name_len = strlen(name) - (len - sizeof(buf)) - 1; \
        /* Truncate the name to make the message fit. */ \
        ALOGW("Truncated name in %s: %s\n", __FUNCTION__, name); \
        len = snprintf(buf, sizeof(buf), format_begin "%.*s" format_end, pid, \
            name_len, name, value); \
    } \
    write(atrace_marker_fd, buf, len); \
}

到这可以看出:
ATRACE_CALL其实就是往atrace_marker_fd写入函数名和进程pid等信息,其中atrace_marker_fd对应“/sys/kernel/debug/tracing/trace_marker”文件

2、内核层实现

systrace在内核层实质是通过ftrace来实现,systrace的内容是写入内核分配的ringbuffer里面的,开关的实质是disable/enable ringbuffer,ftrace的总开关/sys/kernel/debug/tracing/tracing_on
开关ftrace命令如下:
adb shell echo 1 > /sys/kernel/debug/tracing/tracing_on
adb shell echo 0 > /sys/kernel/debug/tracing/tracing_on

通过tracing_mark_write函数把内容写入ringbuffer
msm-4.19/kernel/trace/trace.c

static ssize_t
tracing_mark_write(struct file *filp, const char __user *ubuf,
                    size_t cnt, loff_t *fpos)
{
...
    buffer = tr->trace_buffer.buffer;
    event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
                        irq_flags, preempt_count());
    // 如果没有打开systrace的开关,这个地方就会返回,不会写入ringbuffer
    if (unlikely(!event))
        /* Ring buffer disabled, return as if not open for write */
        return -EBADF;
    entry = ring_buffer_event_data(event);
    trace_entry = (struct trace_entry *)entry;
    entry->ip = trace_entry->pid;

    len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
    if (len) {
        memcpy(&entry->buf, faulted, FAULTED_SIZE);
        cnt = FAULTED_SIZE;
        written = -EFAULT;
    } else
        written = cnt;
    len = cnt;

    if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
        /* do not add \n before testing triggers, but add \0 */
        entry->buf[cnt] = '\0';
        tt = event_triggers_call(tr->trace_marker_file, entry, event);
    }

    if (entry->buf[cnt - 1] != '\n') {
        entry->buf[cnt] = '\n';
        entry->buf[cnt + 1] = '\0';
        stm_log(OST_ENTITY_TRACE_MARKER, entry, sizeof(*entry)+cnt + 2);
    } else {
        entry->buf[cnt] = '\0';
        stm_log(OST_ENTITY_TRACE_MARKER, entry, sizeof(*entry)+cnt + 1);
    }
    entry->ip = _THIS_IP_;
    __buffer_unlock_commit(buffer, event);

    if (tt)
        event_triggers_post_call(tr->trace_marker_file, tt);
...
}
3、ftrace实现内核函数追踪研究

大致原理参考下图:


ftrace就是function trace的缩写,每个函数的追踪都有一个对应的tracepoint结构来表示,这个结构存放在特殊的section内存中。
msm-4.19/include/linux/tracepoint-defs.h

struct tracepoint {
    const char *name;       /* Tracepoint name */
    struct static_key key;
    int (*regfunc)(void);
    void (*unregfunc)(void);
    struct tracepoint_func __rcu *funcs;
};

3.1 tracepoint的probe函数注册
以高通gpu驱动ftrace为例,通过以下命令可以触发tracepoint的probe函数注册
adb shell echo 1 > /sys/kernel/debug/tracing/events/kgsl/enable

msm-4.19/kernel/tracepoint.c

/*
 * Add the probe function to a tracepoint.
 */
static int tracepoint_add_func(struct tracepoint *tp,
                   struct tracepoint_func *func, int prio)
{
...
    /*
     * rcu_assign_pointer has as smp_store_release() which makes sure
     * that the new probe callbacks array is consistent before setting
     * a pointer to it.  This array is referenced by __DO_TRACE from
     * include/linux/tracepoint.h using rcu_dereference_sched().
     */
    rcu_assign_pointer(tp->funcs, tp_funcs);
    if (!static_key_enabled(&tp->key))
        static_key_slow_inc(&tp->key);
    release_probes(old);
    return 0;
}

tracepoint的probe函数实质是在下面这个宏里面定义,通过这个函数ftrace往对应的ringbuffer里面写入函数追踪数据
msm-4.19/include/trace/trace_events.h

static notrace void                         \
trace_event_raw_event_##call(void *__data, proto)           \
{                                   \
    struct trace_event_file *trace_file = __data;           \
    struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\
    struct trace_event_buffer fbuffer;              \
    struct trace_event_raw_##call *entry;               \
    int __data_size;                        \
                                    \
    if (trace_trigger_soft_disabled(trace_file))            \
        return;                         \
                                    \
    __data_size = trace_event_get_offsets_##call(&__data_offsets, args); \
                                    \
    entry = trace_event_buffer_reserve(&fbuffer, trace_file,    \
                 sizeof(*entry) + __data_size);     \
                                    \
    if (!entry)                         \
        return;                         \
                                    \
    tstruct                             \
                                    \
    { assign; }                         \
                                    \
    trace_event_buffer_commit(&fbuffer,             \
                  sizeof(*entry) + __data_size);    \
}

3.2 probe函数的调用
以高通gpu驱动入列一条绘制命令_queue_drawobj函数为例
msm-4.19/drivers/gpu/msm/adreno_dispatch.c

static void _queue_drawobj(struct adreno_context *drawctxt,
    struct kgsl_drawobj *drawobj)
{
    /* Put the command into the queue */
    drawctxt->drawqueue[drawctxt->drawqueue_tail] = drawobj;
    drawctxt->drawqueue_tail = (drawctxt->drawqueue_tail + 1) %
            ADRENO_CONTEXT_DRAWQUEUE_SIZE;
    drawctxt->queued++;
    trace_adreno_cmdbatch_queued(drawobj, drawctxt->queued);
}

通过trace_adreno_cmdbatch_queued来追踪_queue_drawobj函数,调用tracepoint的probe函数

接下来看下trace_adreno_cmdbatch_queued的实现
msm-4.19/drivers/gpu/msm/adreno_trace.h

TRACE_EVENT(adreno_cmdbatch_queued,
    TP_PROTO(struct kgsl_drawobj *drawobj, unsigned int queued),
    TP_ARGS(drawobj, queued),
    TP_STRUCT__entry(
        __field(unsigned int, id)
        __field(unsigned int, timestamp)
        __field(unsigned int, queued)
        __field(unsigned int, flags)
        __field(unsigned int, prio)
    ),
    TP_fast_assign(
        __entry->id = drawobj->context->id;
        __entry->timestamp = drawobj->timestamp;
        __entry->queued = queued;
        __entry->flags = drawobj->flags;
        __entry->prio = drawobj->context->priority;
    ),
    TP_printk(
        "ctx=%u ctx_prio=%u ts=%u queued=%u flags=%s",
            __entry->id, __entry->prio,
            __entry->timestamp, __entry->queued,
            __entry->flags ? __print_flags(__entry->flags, "|",
                        KGSL_DRAWOBJ_FLAGS) : "none"
    )
);

trace_adreno_cmdbatch_queued真正定义在下面这个宏里面
msm-4.19/include/linux/tracepoint.h

#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
    extern struct tracepoint __tracepoint_##name;           \
    // 这里就是trace_adreno_cmdbatch_queued定义
    static inline void trace_##name(proto)              \ 
    {                               \
        if (static_key_false(&__tracepoint_##name.key))     \
            __DO_TRACE(&__tracepoint_##name,        \
                TP_PROTO(data_proto),           \
                TP_ARGS(data_args),         \
                TP_CONDITION(cond), 0);         \
        if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {     \
            rcu_read_lock_sched_notrace();          \
            rcu_dereference_sched(__tracepoint_##name.funcs);\
            rcu_read_unlock_sched_notrace();        \
        }                           \
    }                               \

最终调用到tracepoint注册的probe函数
msm-4.19/include/linux/tracepoint.h

#define __DO_TRACE(tp, proto, args, cond, rcuidle)          \
    do {                                \
        struct tracepoint_func *it_func_ptr;            \
        void *it_func;                      \
        void *__data;                       \
        int __maybe_unused __idx = 0;               \
                                    \
        if (!(cond))                        \
            return;                     \
                                    \
        /* srcu can't be used from NMI */           \
        WARN_ON_ONCE(rcuidle && in_nmi());          \
                                    \
        /* keep srcu and sched-rcu usage consistent */      \
        preempt_disable_notrace();              \
                                    \
        /*                          \
         * For rcuidle callers, use srcu since sched-rcu    \
         * doesn't work from the idle path.         \
         */                         \
        if (rcuidle) {                      \
            __idx = srcu_read_lock_notrace(&tracepoint_srcu);\
            rcu_irq_enter_irqson();             \
        }                           \
                                    \
        it_func_ptr = rcu_dereference_raw((tp)->funcs);     \
                                    \
        if (it_func_ptr) {                  \
            do {                        \
                it_func = (it_func_ptr)->func;      \
                __data = (it_func_ptr)->data;       \
                ((void(*)(proto))(it_func))(args);  \
            } while ((++it_func_ptr)->func);        \
        }                           \
                                    \
        if (rcuidle) {                      \
            rcu_irq_exit_irqson();              \
            srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
        }                           \
                                    \
        preempt_enable_notrace();               \
    } while (0)

总结下:Native层调用内核的tracing_mark_write来往ftrace的ringbuffer里面写入数据,而内核函数调用其对应probe函数往ftrace ringbuffer写入数据。

上一篇下一篇

猜你喜欢

热点阅读