function graph tracer原理

概述

Function graph相对function trace的不同点是,在函数入口会trace,在函数出口也会trace。

ksys_read
->vfs_read
->ftrace_caller
->prepare_ftrace_return
->function_graph_enter
->ftrace_push_return_trace(&trace, &ret, frame_pointer)
->trace_graph_entry    = funcgraph_ops.entryfunc
->__trace_graph_entry
    ->trace_buffer_unlock_commit_nostack(buffer, event)

   Xxxxxxx 函数体内容

   ->return_to_handler ----修改的vfs_read的ra寄存器,让其执行ret返回是跳转到这个函数。
   ->ftrace_return_to_handler
     ->ftrace_pop_return_trace(&trace, &ret, frame_pointer)
     ->ftrace_graph_return(&trace)
     ->trace_graph_return   =funcgraph_ops.retfunc
   ->__trace_graph_return(tr, trace, trace_ctx)
     ->trace_buffer_unlock_commit_nostack(buffer, event);
    ret

\"\"

注册

与前面章节的function tracer一样,当前执行echo function_graph > current_tracer时就会待用到graph_trace_init函数。
与function tracer一样,当echo function_graph > current_tracer后,函数的入口nop指令会被替换为ftracer_caller。

\"\"

当还没有写入function_graph时,ftrace_caller的实现如下,其中第一处ftrace_stub是用于function tracer替换的,而第二处则是给function_graph替换的。

\"\"

当写入function_graph到current_tracer后,第二处的ftrace_stub被替换为prepare_ftrace_return,如下:

\"\"

static struct fgraph_ops funcgraph_ops = {
    .entryfunc = &trace_graph_entry,
    .retfunc = &trace_graph_return,
};


int graph_trace_init(struct trace_array *tr)
ret = register_ftrace_graph(&funcgraph_ops);
tracing_start_cmdline_record();
tracing_start_sched_switch(RECORD_CMDLINE); 

跟踪入口函数

下面我们来实际跟踪一下ftrace_caller之后的实现。

ENTRY(ftrace_caller)
    SAVE_ABI

    addi    a0, t0, -FENTRY_RA_OFFSET
    la  a1, function_trace_op
    REG_L   a2, 0(a1)
    mv  a1, ra
    mv  a3, sp

ftrace_call:
    .global ftrace_call
    call    ftrace_stub
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
addi    a0, sp, ABI_RA
    REG_L   a1, ABI_T0(sp)
    addi    a1, a1, -FENTRY_RA_OFFSET
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
    mv  a2, s0
#endif
ftrace_graph_call:
    .global ftrace_graph_call
    call    ftrace_stub
#endif
    RESTORE_ABI
    jr t0
ENDPROC(ftrace_caller)

上面代码经过宏展开后,变成下面的代码,我们从调用vfs_read的函数ksys_read开始分析。
ksys_read->vfs_read

...
0xffffffff80384e6e <ksys_read+122>:  auipc   ra,0xfffff
0xffffffff80384e72 <ksys_read+126>:  jalr    594(ra) # 0xffffffff803840c0 <vfs_read>
① ra=PC+4,即0xffffffff80384e76 ,PC=0xffffffff803840c0,这里保存了调用vfs_read的返回地址。
0xffffffff80384e76 <ksys_read+130>:  mv      s2,a0
0xffffffff80384e78 <ksys_read+132>:  bltz    a0,0xffffffff80384ef4 <ksys_read+256>

vfs_read->ftrace_caller

0xffffffff803840c0 <vfs_read+0>:     auipc   t0,0xffc88
0xffffffff803840c4 <vfs_read+4>:     jalr    t0,1404(t0) # 0xffffffff8000c63c <ftrace_caller>
② t0=PC+4即0xffffffff803840c8,PC=0xffffffff8000c63c,这里没有使用ra,而是使用了t0,因此ra得以传递到ftrace_caller。
0xffffffff803840c8 <vfs_read+0>:     addi    sp,sp,-176
0xffffffff803840ca <vfs_read+2>:     sd      s0,160(sp)

ftrace_caller->prepare_ftrace_return

Dump of assembler code for function ftrace_caller:
   0xffffffff8000c63c <+0>:     addi    sp,sp,-80
=> 0xffffffff8000c63e <+2>:     sd      a0,0(sp)
   0xffffffff8000c640 <+4>:     sd      a1,8(sp)
   0xffffffff8000c642 <+6>:     sd      a2,16(sp)
   0xffffffff8000c644 <+8>:     sd      a3,24(sp)
   0xffffffff8000c646 <+10>:    sd      a4,32(sp)
   0xffffffff8000c648 <+12>:    sd      a5,40(sp)
   0xffffffff8000c64a <+14>:    sd      a6,48(sp)
   0xffffffff8000c64c <+16>:    sd      a7,56(sp)
   0xffffffff8000c64e <+18>:    sd      t0,64(sp)  存储了ftrace_caller的返回地址。
   0xffffffff8000c650 <+20>:    sd      ra,72(sp)  存储了vfs_read的返回地址。
③开辟一个栈空间,将寄存器入栈
   0xffffffff8000c652 <+22>:    addi    a0,t0,-8
   0xffffffff8000c656 <+26>:    auipc   a1,0x251c
   0xffffffff8000c65a <+30>:    addi    a1,a1,578 # 0xffffffff82528898 <function_trace_op>
   0xffffffff8000c65e <+34>:    ld      a2,0(a1)
④获取全局变量function_trace_op,这是struct ftrace_ops实例,function_trace_op.func存储了跟踪函数。
   0xffffffff8000c660 <+36>:    mv      a1,ra
   0xffffffff8000c662 <+38>:    mv      a3,sp
⑤a1参数记录了vfs_read的返回地址,a3记录了栈
   0xffffffff8000c664 <+40>:    auipc   ra,0x0
   0xffffffff8000c668 <+44>:    jalr    -1604(ra) # 0xffffffff8000c020 <ftrace_stub>
⑥因为使能的是function_graph,所以ftrace_stub没有被替换
   0xffffffff8000c66c <+48>:    addi    a0,sp,72
⑦sp+72存储的是vfs_read的ra,因此a0的值即为vfs_read函数ra的地址,*a0即返回地址,a0代表的是第一个函数参数,因此第一个函数参数为vfs_read的*parent。
   0xffffffff8000c66e <+50>:    ld      a1,64(sp)
   0xffffffff8000c670 <+52>:    addi    a1,a1,-8
⑧ sp+64存储的是ftrace_caller的返回地址,用ftrace_call的返回地址-8就为vfs_read的入口地址,所以a1代表的是vfs_read的入口地址。
   0xffffffff8000c672 <+54>:    mv      a2,s0
⑨ a2为当前栈帧
   0xffffffff8000c674 <+56>:    auipc   ra,0x0
   0xffffffff8000c678 <+60>:    jalr    -1338(ra) # 0xffffffff8000c13a <prepare_ftrace_return>
⑩ 更新ra=0xffffffff8000c67c,跳转到prepare_ftrace_return
   0xffffffff8000c67c <+64>:    ld      a0,0(sp)
   0xffffffff8000c67e <+66>:    ld      a1,8(sp)
   0xffffffff8000c680 <+68>:    ld      a2,16(sp)
   0xffffffff8000c682 <+70>:    ld      a3,24(sp)
   0xffffffff8000c684 <+72>:    ld      a4,32(sp)
   0xffffffff8000c686 <+74>:    ld      a5,40(sp)
   0xffffffff8000c688 <+76>:    ld      a6,48(sp)
   0xffffffff8000c68a <+78>:    ld      a7,56(sp)
   0xffffffff8000c68c <+80>:    ld      t0,64(sp)
   0xffffffff8000c68e <+82>:    ld      ra,72(sp)
   0xffffffff8000c690 <+84>:    addi    sp,sp,80
   0xffffffff8000c692 <+86>:    jr      t0
End of assembler dump.

从上面可知,ftrace_caller函数后,就跳转到prepare_ftrace_return。
接下来看看prepare_ftrace_return

void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
               unsigned long frame_pointer)
{
    unsigned long return_hooker = (unsigned long)&return_to_handler;
    unsigned long old;

    if (unlikely(atomic_read(¤t->tracing_graph_pause)))
        return;

    /*
     * We don\'t suffer access faults, so no extra fault-recovery assembly
     * is needed here.
     */
    old = *parent;

    if (!function_graph_enter(old, self_addr, frame_pointer, parent))
        *parent = return_hooker;
    调用function_graph_enter,同时将vfs_read的返回地址修改为return_to_handler,也就是说当vfs_read函数执行返回时,会跳转到return_to_handler运行,这样就相当于在vfs_read的出口函数也插桩了。
}

function_graph_enter

int function_graph_enter(unsigned long ret, unsigned long func,
             unsigned long frame_pointer, unsigned long *retp)
{
    struct ftrace_graph_ent trace;

#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
    /*
     * Skip graph tracing if the return location is served by direct trampoline,
     * since call sequence and return addresses are unpredictable anyway.
     * Ex: BPF trampoline may call original function and may skip frame
     * depending on type of BPF programs attached.
     */
    if (ftrace_direct_func_count &&
        ftrace_find_rec_direct(ret - MCOUNT_INSN_SIZE))
        return -EBUSY;
#endif
    trace.func = func;
    trace.depth = ++current->curr_ret_depth;

    if (ftrace_push_return_trace(ret, func, frame_pointer, retp))
        goto out;
① 将要跟踪函数vfs_read的返回地址、函数名称、进入时间存储到当前任务的task_struct中。
    /* Only trace if the calling function expects to */
    if (!ftrace_graph_entry(&trace)) 
        goto out_ret;
②struct fgraph_ops funcgraph_ops.entryfunc = &trace_graph_entry,调用trace_graph_entry函数,将trace信息写入ring buffer。
    return 0;
 out_ret:
    current->curr_ret_stack--;
 out:
    current->curr_ret_depth--;
    return -EBUSY;
}

ftrace_push_return_trace,将要跟踪函数vfs_read的返回地址、函数名称、进入时间存储到当前任务的task_struct中。

static int
ftrace_push_return_trace(unsigned long ret, unsigned long func,
             unsigned long frame_pointer, unsigned long *retp)
{
    unsigned long long calltime;
    int index;

    if (unlikely(ftrace_graph_is_dead()))
        return -EBUSY;

    if (!current->ret_stack)
        return -EBUSY;

    /*
     * We must make sure the ret_stack is tested before we read
     * anything else.
     */
    smp_rmb();

    /* The return trace stack is full */
    if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
        atomic_inc(¤t->trace_overrun);
        return -EBUSY;
    }

    calltime = trace_clock_local();

    index = ++current->curr_ret_stack;
    barrier();
    current->ret_stack[index].ret = ret;  
    current->ret_stack[index].func = func;
current->ret_stack[index].calltime = calltime;
//将vfs_read的返回地址、vfs_read函数、进入vfs_read时间记录到当前任务的ret_stack,后续vfs_read退出时会使用到。
//current是当前运行任务的struct task_sturct,在该结构中,为function graph专门定义了一块空间用于存储记录上面的信息。
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
    current->ret_stack[index].fp = frame_pointer;
#endif
#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
    current->ret_stack[index].retp = retp;
#endif
    return 0;
}

跟踪出口函数

当被跟踪的函数要退出时,由于修改了其ra寄存器,因此会跳转到return_to_handler,本文的实例是vfs_read,因此当vfs_read函数执行完时,本应该执行ret就退出,但是在前面将ra的内容改了,继而跳转执行return_to_handler。

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
ENTRY(return_to_handler)
/*
 * On implementing the frame point test, the ideal way is to compare the
 * s0 (frame pointer, if enabled) on entry and the sp (stack pointer) on return.
 * However, the psABI of variable-length-argument functions does not allow this.
 *
 * So alternatively we check the *old* frame pointer position, that is, the
 * value stored in -16(s0) on entry, and the s0 on return.
 */
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
    mv  t6, s0
#endif
    SAVE_RET_ABI_STATE
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
    mv  a0, t6
#endif
    call    ftrace_return_to_handler
    mv  a2, a0
    RESTORE_RET_ABI_STATE
    jalr    a2
ENDPROC(return_to_handler)
#endif

ftrace_return_to_handler

unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
{
    struct ftrace_graph_ret trace;
    unsigned long ret;

ftrace_pop_return_trace(&trace, &ret, frame_pointer);
①与前面的ftrace_push_return_trace对应,将trace相关信息获取出来,如vfs_read的返回地址信息。
    trace.rettime = trace_clock_local();
ftrace_graph_return(&trace);
② 调用graph 退出函数,调用trace_graph_return->__trace_graph_return,将信息更新写入ring buffer。
    /*
     * The ftrace_graph_return() may still access the current
     * ret_stack structure, we need to make sure the update of
     * curr_ret_stack is after that.
     */
    barrier();
    current->curr_ret_stack--;

    if (unlikely(!ret)) {
        ftrace_graph_stop();
        WARN_ON(1);
        /* Might as well panic. What else to do? */
        ret = (unsigned long)panic;
    }

    return ret;
}