从汇编层面分析函数调用
2019-04-02 本文已影响0人
DevHuangjb
这篇文章将探讨:
- 函数调用如何返回
- 函数返回值底层实现
- 函数参数传递底层实现
- 函数调用堆栈平衡
你需要知道:
- pc:程序计数器,记录当前执行的代码地址,对其赋值会跳转到对应的地址执行
- lr (r30):连接寄存器,当执行ret指令时,lr寄存器的值会赋给pc寄存器
- fp (r29):frame point寄存器,用来保存栈底指针
- sp:stack point寄存器,用来保存栈顶指针
- 通用寄存器r0-r7作为函数调用的参数传递,如果参数大于8个就用栈空间来辅助
- 通用寄存器r0通常作为函数返回值的中转站
函数调用如何返回?
写一个最简单的没有传递参数,没有返回值的函数
#import <Foundation/Foundation.h>
void test(){
}
int main(int argc, char * argv[]) {
test();
return 0;
}
接下来看看这个简单调用在汇编层面的代码:
TestStack`main:
0x10000e920 <+0>: sub sp, sp, #0x20 ; =0x20
0x10000e924 <+4>: stp x29, x30, [sp, #0x10]
0x10000e928 <+8>: add x29, sp, #0x10 ; =0x10
0x10000e92c <+12>: stur wzr, [x29, #-0x4]
0x10000e930 <+16>: str w0, [sp, #0x8]
0x10000e934 <+20>: str x1, [sp]
-> 0x10000e938 <+24>: bl 0x10000e91c ; test at main.m:12
0x10000e93c <+28>: mov w0, #0x0
0x10000e940 <+32>: ldp x29, x30, [sp, #0x10]
0x10000e944 <+36>: add sp, sp, #0x20 ; =0x20
0x10000e948 <+40>: ret
TestStack`test:
-> 0x10008691c <+0>: ret
********************分析***********************
把断点打在‘bl 0x10000e91c’
(lldb) register read
lr = 0x00000001993968b8 libdyld.dylib`start + 4
pc = 0x00000001000f6938 TestStack`main + 24 at main.m:16
(lldb) si
(lldb) register read
General Purpose Registers:
lr = 0x00000001000f693c TestStack`main + 28 at main.m:16
pc = 0x00000001000f691c TestStack`test at main.m:12
可以看出执行‘bl 0x10000e91c’后,pc寄存器被赋值0x00000001000f691c,程序跳转到test函数执行
lr寄存器被赋值0x00000001000f693c,而这个地址正好是main函数里面‘bl 0x10000e91c’的下一条指令
所以在test函数里面执行ret指令后,lr寄存器的值赋给pc寄存器实现函数调用的返回。
函数如何把返回值返回给调用者?
改一下test函数,让它简单的返回一个值,比如:
int test(){
return 10;
}
int main(int argc, char * argv[]) {
int result = test();
return 0;
}
接下来看看在汇编层面的代码:
TestStack`main:
0x100076918 <+0>: sub sp, sp, #0x30 ; =0x30
0x10007691c <+4>: stp x29, x30, [sp, #0x20]
0x100076920 <+8>: add x29, sp, #0x20 ; =0x20
0x100076924 <+12>: stur wzr, [x29, #-0x4]
0x100076928 <+16>: stur w0, [x29, #-0x8]
0x10007692c <+20>: str x1, [sp, #0x10]
-> 0x100076930 <+24>: bl 0x100076910 ; test at main.m:11
0x100076934 <+28>: mov w8, #0x0
0x100076938 <+32>: str w0, [sp, #0xc]
0x10007693c <+36>: mov x0, x8
0x100076940 <+40>: ldp x29, x30, [sp, #0x20]
0x100076944 <+44>: add sp, sp, #0x30 ; =0x30
0x100076948 <+48>: ret
TestStack`test:
-> 0x100076910 <+0>: mov w0, #0xa
0x100076914 <+4>: ret
********************分析***********************
我们在test函数里面简单的返回了10
在汇编层面,‘mov w0, #0xa’把0xa赋值给x0寄存器的低32位
在子程序执行返回后,调用函数通过读取x0寄存器的值拿到子程序的返回值,比如main函数里面‘str w0, [sp, #0xc]’就是把w0寄存器的值赋给局部变量
函数如何实现参数的传递?
再一次修改test函数如下:
void test(int para1, int para2, int para3, int para4, int para5, int para6, int para7, int para8, int para9){
int a = 8;
a = a + para1 + para2 + para3 + para4 + para5 + para6 + para7 + para8 + para9;
}
int main(int argc, char * argv[]) {
test(1,2,3,4,5,6,7,8,9);
return 0;
}
接下来看看在汇编层面的代码:
TestStack`main:
0x1000be8ec <+0>: sub sp, sp, #0x30 ; =0x30
0x1000be8f0 <+4>: stp x29, x30, [sp, #0x20]
0x1000be8f4 <+8>: add x29, sp, #0x20 ; =0x20
0x1000be8f8 <+12>: orr w8, wzr, #0x1 ;1赋给r8
0x1000be8fc <+16>: orr w9, wzr, #0x2 ;2赋给r9
0x1000be900 <+20>: orr w2, wzr, #0x3 ;3赋给r2
0x1000be904 <+24>: orr w3, wzr, #0x4 ;4赋给r3
0x1000be908 <+28>: mov w4, #0x5 ;5赋给r4
0x1000be90c <+32>: orr w5, wzr, #0x6 ;6赋给r5
0x1000be910 <+36>: orr w6, wzr, #0x7 ;7赋给r6
0x1000be914 <+40>: orr w7, wzr, #0x8 ;8赋给r7
0x1000be918 <+44>: mov w10, #0x9 ;9赋给r10
0x1000be91c <+48>: stur wzr, [x29, #-0x4]
0x1000be920 <+52>: stur w0, [x29, #-0x8]
0x1000be924 <+56>: str x1, [sp, #0x10]
0x1000be928 <+60>: mov x0, x8 ;r8(值为1)赋给r0
0x1000be92c <+64>: mov x1, x9 ;r8(值为2)赋给r0
0x1000be930 <+68>: str w10, [sp] ;把r10(值为9)的值入栈,
;在跳转到test函数前:r0-r7响应的被复制1-8,
0x1000be934 <+72>: bl 0x1000be860 ; test at main.m:11
0x1000be938 <+76>: mov w8, #0x0
0x1000be93c <+80>: mov x0, x8
0x1000be940 <+84>: ldp x29, x30, [sp, #0x20]
0x1000be944 <+88>: add sp, sp, #0x30 ; =0x30
0x1000be948 <+92>: ret
TestStack`test:
0x1000be860 <+0>: sub sp, sp, #0x30 ; =0x30
0x1000be864 <+4>: ldr w8, [sp, #0x30] ;main函数把para9入栈,这里把para9的值从栈空间读出来赋给r8
-> 0x1000be868 <+8>: orr w9, wzr, #0x8 ;局部变量‘a=8’,r9=8
0x1000be86c <+12>: str w0, [sp, #0x2c] ;r0(para1)的值入栈
0x1000be870 <+16>: str w1, [sp, #0x28] ;r1(para2)的值入栈
0x1000be874 <+20>: str w2, [sp, #0x24] ;r2(para3)的值入栈
0x1000be878 <+24>: str w3, [sp, #0x20] ;r3(para4)的值入栈
0x1000be87c <+28>: str w4, [sp, #0x1c] ;r4(para5)的值入栈
0x1000be880 <+32>: str w5, [sp, #0x18] ;r5(para6)的值入栈
0x1000be884 <+36>: str w6, [sp, #0x14] ;r5(para7)的值入栈
0x1000be888 <+40>: str w7, [sp, #0x10] ;r7(para8)的值入栈
0x1000be88c <+44>: str w9, [sp, #0xc] ;局部变量a入栈
0x1000be890 <+48>: ldr w9, [sp, #0xc] ;从栈空间取出局部变量的值
0x1000be894 <+52>: ldr w0, [sp, #0x2c] ;从栈空间取出para1的值
0x1000be898 <+56>: add w9, w9, w0 ;加法运算:局部变量a = a + para1
0x1000be89c <+60>: ldr w0, [sp, #0x28] ;从栈空间取出para2的值
0x1000be8a0 <+64>: add w9, w9, w0 ;加法运算:局部变量a = a + para2
0x1000be8a4 <+68>: ldr w0, [sp, #0x24] ;......以下操作一样
0x1000be8a8 <+72>: add w9, w9, w0
0x1000be8ac <+76>: ldr w0, [sp, #0x20]
0x1000be8b0 <+80>: add w9, w9, w0
0x1000be8b4 <+84>: ldr w0, [sp, #0x1c]
0x1000be8b8 <+88>: add w9, w9, w0
0x1000be8bc <+92>: ldr w0, [sp, #0x18]
0x1000be8c0 <+96>: add w9, w9, w0
0x1000be8c4 <+100>: ldr w0, [sp, #0x14]
0x1000be8c8 <+104>: add w9, w9, w0
0x1000be8cc <+108>: ldr w0, [sp, #0x10]
0x1000be8d0 <+112>: add w9, w9, w0
0x1000be8d4 <+116>: ldr w0, [sp, #0x30]
0x1000be8d8 <+120>: add w9, w9, w0
0x1000be8dc <+124>: str w9, [sp, #0xc]
0x1000be8e0 <+128>: str w8, [sp, #0x8]
0x1000be8e4 <+132>: add sp, sp, #0x30 ; =0x30
0x1000be8e8 <+136>: ret
********************分析***********************
从上面的分析过程可以看出,函数参数首先会使用通用寄存器r0-r7,当传递的参数超过8个的时候,会借助函数的栈空间来辅助实现参数的传递
函数调用的堆栈平衡?
我们知道,函数调用的时候,系统会为该函数分配一个栈空间,在函数执行结束后栈空间会被系统回收。那么,系统底层是如何实现这个机制的呢?
函数根据是否调用子函数分为:
- 叶子函数(没有调用其他子函数)
- 非叶子函数(有调用其他子函数)
先来分析下叶子函数的堆栈平衡
int sum (int a, int b) {
return a+b;
}
int main(int argc, char * argv[]) {
int result = sum(4, 5);
}
汇编:
TestStack`main:
0x100062910 <+0>: sub sp, sp, #0x30 ; =0x30
0x100062914 <+4>: stp x29, x30, [sp, #0x20]
0x100062918 <+8>: add x29, sp, #0x20 ; =0x20
0x10006291c <+12>: orr w8, wzr, #0x4
0x100062920 <+16>: mov w9, #0x5
0x100062924 <+20>: stur w0, [x29, #-0x4]
0x100062928 <+24>: str x1, [sp, #0x10]
0x10006292c <+28>: mov x0, x8
0x100062930 <+32>: mov x1, x9
0x100062934 <+36>: bl 0x1000628f0 ; sum at main.m:11
0x100062938 <+40>: mov w8, #0x0
0x10006293c <+44>: str w0, [sp, #0xc]
0x100062940 <+48>: mov x0, x8
0x100062944 <+52>: ldp x29, x30, [sp, #0x20]
0x100062948 <+56>: add sp, sp, #0x30 ; =0x30
0x10006294c <+60>: ret
TestStack`sum:
;sp = sp - 0x10,栈顶指针下移16个内存单元,也就是系统为sum函数开辟了16个字节的栈空间大小
-> 0x1000628f0 <+0>: sub sp, sp, #0x10
;w0存的是函数参数‘int a’,下面把参数a 入栈
0x1000628f4 <+4>: str w0, [sp, #0xc]
;w1存的是函数参数‘int b’,下面把参数b 入栈
0x1000628f8 <+8>: str w1, [sp, #0x8]
;从栈空间中读出参数a的值,赋给w0
0x1000628fc <+12>: ldr w0, [sp, #0xc]
;从栈空间中读出参数b的值,赋给w1
0x100062900 <+16>: ldr w1, [sp, #0x8]
;w0 = a + b,函数返回,外部函数通过w0拿到计算值
0x100062904 <+20>: add w0, w0, w1
;栈顶指针上移16个内存单元,栈空间实现平衡。刚进入sum函数开辟的16个字节的栈空间被回收(看上去像是被回收,实际只是上移了栈顶指针,内存内容依旧存在,等到后面其他函数的栈空间分配时才会对这段栈空间内存进行设置)
0x100062908 <+24>: add sp, sp, #0x10 ; =0x10
0x10006290c <+28>: ret
再来看看非叶子函数如何实现堆栈平衡
int sum (int a, int b) {
return a+b;
}
void test (int a, int b) {
int c = sum(a, b);
}
int main(int argc, char * argv[]) {
test(4, 6);
}
汇编:
TestStack`main:
;register read读取main函数进来后:
;fp = 0x000000016fdf79e0
;sp = 0x000000016fdf79d0
0x100076914 <+0>: sub sp, sp, #0x20 ; =0x20
0x100076918 <+4>: stp x29, x30, [sp, #0x10]
0x10007691c <+8>: add x29, sp, #0x10 ; =0x10
0x100076920 <+12>: orr w8, wzr, #0x4
0x100076924 <+16>: orr w9, wzr, #0x6
0x100076928 <+20>: stur w0, [x29, #-0x4]
0x10007692c <+24>: str x1, [sp]
0x100076930 <+28>: mov x0, x8
0x100076934 <+32>: mov x1, x9
0x100076938 <+36>: bl 0x1000768e4 ; test at main.m:15
0x10007693c <+40>: mov w8, #0x0
0x100076940 <+44>: mov x0, x8
0x100076944 <+48>: ldp x29, x30, [sp, #0x10]
0x100076948 <+52>: add sp, sp, #0x20 ; =0x20
0x10007694c <+56>: ret
TestStack`test:
;fp = 0x000000016fdf79e0
;sp = sp - 0x20 = 0x000000016fdf79b0
0x1000768e4 <+0>: sub sp, sp, #0x20 ; =0x20
;现场保护,把main函数执行时的fp(x29),lr(x30)入栈保存:这两个寄存器关系到main函数执行时的栈底指针和main函数执行后的返回地址,很重要
-> 0x1000768e8 <+4>: stp x29, x30, [sp, #0x10]
;设置自己的fp(栈底指针)
0x1000768ec <+8>: add x29, sp, #0x10 ; =0x10
0x1000768f0 <+12>: stur w0, [x29, #-0x4]
0x1000768f4 <+16>: str w1, [sp, #0x8]
0x1000768f8 <+20>: ldur w0, [x29, #-0x4]
0x1000768fc <+24>: ldr w1, [sp, #0x8]
;跳转到sum函数
0x100076900 <+28>: bl 0x1000768c4 ; sum at main.m:11
;sum函数把返回值放在r0寄存器,这边把返回值赋给局部变量‘int c’
0x100076904 <+32>: str w0, [sp, #0x4]
;恢复现场,把入栈保存的main函数fp,lr指针出栈恢复
0x100076908 <+36>: ldp x29, x30, [sp, #0x10]
;栈顶指针上移32个内存单元,栈空间实现平衡。
0x10007690c <+40>: add sp, sp, #0x20 ; =0x20
0x100076910 <+44>: ret
TestStack`sum:
-> 0x1000768c4 <+0>: sub sp, sp, #0x10 ; =0x10
0x1000768c8 <+4>: str w0, [sp, #0xc]
0x1000768cc <+8>: str w1, [sp, #0x8]
0x1000768d0 <+12>: ldr w0, [sp, #0xc]
0x1000768d4 <+16>: ldr w1, [sp, #0x8]
0x1000768d8 <+20>: add w0, w0, w1
0x1000768dc <+24>: add sp, sp, #0x10 ; =0x10
0x1000768e0 <+28>: ret