dex文件
2018-01-04  本文已影响0人 
dumphex
Overview
source
- art/runtime/dex_file.h
- art/runtime/dex_file-inl.h
- art/runtime/dex_file_types.h
- art/runtime/dex_file.cc
数据结构
- class DexFile
- struct Header
(gdb) p /d sizeof('art::DexFile::Header') $404 = 112
- struct ClassDef
(gdb) p /d sizeof('art::DexFile::ClassDef') $398 = 32
- struct TypeId
(gdb) p /d sizeof('art::DexFile::TypeId') $399 = 4
- struct StringId
(gdb) p /d sizeof('art::DexFile::StringId') $400 = 4
- struct MethodId
(gdb) p /d sizeof('art::DexFile::MethodId') $402 = 8
- struct FieldId
(gdb) p /d sizeof('art::DexFile::FieldId') $403 = 8
- struct ProtoId
(gdb) p /d sizeof('art::DexFile::ProtoId') $401 = 12
- ClassDataItemIterator
- struct ClassDataHeader
- struct ClassDataField
- struct ClassDataMethod
- struct CodeItem
Dex文件布局
+------------+
|   Header   |
+------------+
| StringId[] |
+------------+
|  TypeId[]  |
+------------+
| ProtoId[]  |
+------------+
| FieldId[]  |
+------------+
| MethodId[] |
+------------+
| ClassDef[] |
+------------+
|    Data    |
+------------+
实现
这里会以core-oj.jar中的classes.dex为例进行解析.
Header
- Header定义
// Raw header_item. struct Header { uint8_t magic_[8]; uint32_t checksum_; // See also location_checksum_ uint8_t signature_[kSha1DigestSize]; uint32_t file_size_; // size of entire file uint32_t header_size_; // offset to start of next section uint32_t endian_tag_; uint32_t link_size_; // unused uint32_t link_off_; // unused uint32_t map_off_; // unused uint32_t string_ids_size_; // number of StringIds uint32_t string_ids_off_; // file offset of StringIds array uint32_t type_ids_size_; // number of TypeIds, we don't support more than 65535 uint32_t type_ids_off_; // file offset of TypeIds array uint32_t proto_ids_size_; // number of ProtoIds, we don't support more than 65535 uint32_t proto_ids_off_; // file offset of ProtoIds array uint32_t field_ids_size_; // number of FieldIds uint32_t field_ids_off_; // file offset of FieldIds array uint32_t method_ids_size_; // number of MethodIds uint32_t method_ids_off_; // file offset of MethodIds array uint32_t class_defs_size_; // number of ClassDefs uint32_t class_defs_off_; // file offset of ClassDef array uint32_t data_size_; // size of data section uint32_t data_off_; // file offset of data section // Decode the dex magic version uint32_t GetVersion() const; private: DISALLOW_COPY_AND_ASSIGN(Header); };
- 查看Header相关数据
$ hexdump -C -s 0 -n 112 classes.dex 00000000 64 65 78 0a 30 33 37 00 8e 5e a1 39 cb 37 4f f8 |dex.037..^.9.7O.| 00000010 a4 14 07 28 8c 0a db b6 cd 30 a8 fc 76 3e 11 65 |...(.....0..v>.e| 00000020 fc 55 4d 00 70 00 00 00 78 56 34 12 00 00 00 00 |.UM.p...xV4.....| 00000030 00 00 00 00 2c 55 4d 00 5d 86 00 00 70 00 00 00 |....,UM.]...p...| 00000040 4d 0f 00 00 e4 19 02 00 e7 1c 00 00 18 57 02 00 |M............W..| 00000050 d5 31 00 00 ec b1 03 00 b6 8a 00 00 94 40 05 00 |.1...........@..| 00000060 cd 0d 00 00 44 96 09 00 18 06 42 00 e4 4f 0b 00 |....D.....B..O..| 00000070
- dex Header使用dexdump2解析如下
DEX file header: magic : 'dex\n037\0' checksum : 39a15e8e signature : cb37...1165 file_size : 5068284 header_size : 112 link_size : 0 link_off : 0 (0x000000) string_ids_size : 34397 string_ids_off : 112 (0x000070) type_ids_size : 3917 type_ids_off : 137700 (0x0219e4) proto_ids_size : 7399 proto_ids_off : 153368 (0x025718) field_ids_size : 12757 field_ids_off : 242156 (0x03b1ec) method_ids_size : 35510 method_ids_off : 344212 (0x054094) class_defs_size : 3533 class_defs_off : 628292 (0x099644) data_size : 4326936 data_off : 741348 (0x0b4fe4)
Demo
下面会以BootClassLoader这个类举例说明上面重要的几个字段的含义.
... ...
Class #200            -
  Class descriptor  : 'Ljava/lang/BootClassLoader;'
  Access flags      : 0x0000 ()
  Superclass        : 'Ljava/lang/ClassLoader;'
... ...
即现在已知BootClassLoader类的ClassDef index是200
获取ClassDef
ClassDef的offet为: 0x099644 + 200 * 32 = 0x9af44
$ hexdump -C -s 0x9af44 -n 32 classes.dex 
0009af44  e9 00 00 00 00 00 00 00  fc 00 00 00 00 00 00 00  |................|
0009af54  d9 0d 00 00 a4 07 2f 00  d8 e0 49 00 00 00 00 00  |....../...I.....|
0009af64
根据struct ClassDef的定义
- class_idx_ = 0xe9
- access_flags_ = 0x00
- superclass_idx_ = 0xfc
- source_file_idx_ = 0x0dd9
- class_data_off_ = 0x49e0d8
查看类名
- 
class index class_idx_ 表示TypeId[]中的索引为0xe9 
- 
查看type id 0x0219e4 + 0xe9 * 4 = 0x21d88 $ hexdump -C -s 0x21d88 -n 4 classes.dex 00021d88 18 1b 00 00 |....| 00021d8cTypeId[]的元素值表示描述符在StringId[]中的索引 
- 
查看string id 0x000070 + 0x1b18 * 4 = 0x6cd0 $ hexdump -C -s 0x6cd0 -n 4 classes.dex 00006cd0 57 ae 33 00 |W.3.| 00006cd4StringId[]的元素值表示字符串相对文件起始的偏移 
- 
查看字符串 $ hexdump -C -s 0x33ae57 -n 100 classes.dex 0033ae57 1b 4c 6a 61 76 61 2f 6c 61 6e 67 2f 42 6f 6f 74 |.Ljava/lang/Boot| 0033ae67 43 6c 61 73 73 4c 6f 61 64 65 72 3b 00 20 4c 6a |ClassLoader;. Lj| 0033ae77 61 76 61 2f 6c 61 6e 67 2f 42 6f 6f 74 73 74 72 |ava/lang/Bootstr| 0033ae87 61 70 4d 65 74 68 6f 64 45 72 72 6f 72 3b 00 1a |apMethodError;..| 0033ae97 4c 6a 61 76 61 2f 6c 61 6e 67 2f 42 79 74 65 24 |Ljava/lang/Byte$| 0033aea7 42 79 74 65 43 61 63 68 65 3b 00 10 4c 6a 61 76 |ByteCache;..Ljav| 0033aeb7 61 2f 6c 61 |a/la| 0033aebb则Class index = 0xe9对应类名为"Ljava/lang/BootClassLoader;" 同理,superclass_idx_ = 0xfc对应的类名为"Ljava/lang/ClassLoader;" 
查看源文件
source_file_idx_ = 0x0dd9
0x70 + 0x0dd9 * 4 = 0x37d4
$ hexdump -C -s 0x37d4 -n 4 classes.dex
000037d4  38 a3 32 00                                       |8.2.|
000037d8
$ hexdump -C -s 0x32a338 -n 40 classes.dex
0032a338  10 43 6c 61 73 73 4c 6f  61 64 65 72 2e 6a 61 76  |.ClassLoader.jav|
0032a348  61 00 1b 43 6c 61 73 73  4e 6f 74 46 6f 75 6e 64  |a..ClassNotFound|
0032a358  45 78 63 65 70 74 69 6f                           |Exceptio|
0032a360
表示当前类位于ClassLoader.java
查看类的定义
class_data_item
- class_data_item是用无符号LEB128进行编码
- 解码相关可参考DecodeUnsignedLeb128()
$ hexdump -C -s 0x49e0d8 -n 100 classes.dex 
0049e0d8  01 00 02 07 a2 06 0a d7  0f 81 80 04 f4 91 39 06  |..............9.|
0049e0e8  89 80 08 90 92 39 d9 0f  04 d8 92 39 02 04 f8 92  |.....9.....9....|
0049e0f8  39 01 04 94 93 39 02 04  b8 93 39 01 01 b8 94 39  |9....9....9....9|
0049e108  01 01 d4 94 39 01 04 f0  94 39 01 00 04 00 a3 06  |....9....9......|
0049e118  1a e2 0f 81 80 04 98 95  39 01 81 80 04 b0 95 39  |........9......9|
0049e128  01 81 80 04 c8 95 39 01  81 80 04 e0 95 39 01 00  |......9......9..|
0049e138  02 00 a4 06                                       |....|
ClassDataHeader
- 参考ClassDataItemIterator::ReadClassDataHeader()的实现
- ClassDataHeader的几个成员值如下:
static_fields_size_ = 0x01 instance_fields_size_ = 0x00 direct_methods_size_ = 0x02 virtual_methods_size_ = 0x07
ClassDataField
- ClassDataItemIterator::ReadClassDataField()
- field_idx_delta_
field_idx_delta_ = (0xa2 & 0x7f)|(0x06 << 7) = 0x322 0x03b1ec + 0x322 * 8 = 0x3cafc $ hexdump -C -s 0x3cafc -n 8 classes.dex 0003cafc e9 00 e9 00 39 60 00 00 |....9`..| 0003cb04- 
class_idx_ = 0xe9 表示当前field属于BootClassLoader类 
- 
type_idx_ = 0xe9 表示当前field是BootClassLoader类型 
- 
name_idx_ = 0x6039 0x70 + 0x6039 * 4 = 0x18154 $ hexdump -C -s 0x18154 -n 8 classes.dex 00018154 58 67 3a 00 62 67 3a 00 |Xg:.bg:.| 0001815c $ hexdump -C -s 0x3a6758 -n 40 classes.dex 003a6758 08 69 6e 73 74 61 6e 63 65 00 17 69 6e 73 74 61 |.instance..insta| 003a6768 6e 63 65 46 6f 6c 6c 6f 77 52 65 64 69 72 65 63 |nceFollowRedirec| 003a6778 74 73 00 0b 69 6e 73 74 |ts..inst| 003a6780 当前field名字为"instance"
 
- 
- field_.access_flags_
access_flags_ = 0x0a, 表示private static
ClassDataMethod
- ClassDataItemIterator::ReadClassDataMethod()
- method_idx_delta_
method_.method_idx_delta_ = (0xd7&0x7f)|(0x0f<<7) = 0x7d7 这里的method_idx_delta_就是常见到的dex_method_idx 0x054094 + 0x7d7 * 8 = 0x57f4c $ hexdump -C -s 0x57f4c -n 8 classes.dex 00057f4c e9 00 57 10 e8 07 00 00 |..W.....| 00057f54- class_idx_ = 0xe9
0xe9表示当前method属于BootClassLoader类
- proto_idx_ = 0x1057
0x025718 + 0x1057 * 12 = 0x31b2c $ hexdump -C -s 0x31b2c -n 12 classes.dex 00031b2c 18 3e 00 00 58 0e 00 00 00 00 00 00 |.>..X.......| 00031b38- shorty_idx_ = 0x3e18
0x70 + 0x3e18 * 4 = 0xf8d0 $ hexdump -C -s 0xf8d0 -n 4 classes.dex 0000f8d0 fe 52 38 00 |.R8.| 0000f8d4 $ hexdump -C -s 0x3852fe -n 40 classes.dex 003852fe 01 56 00 02 56 31 00 02 56 32 00 02 56 33 00 02 |.V..V1..V2..V3..| 0038530e 56 3a 00 03 56 3a 3a 00 03 56 41 49 00 04 56 41 |V:..V::..VAI..VA| 0038531e 49 49 00 08 56 41 4c 49 |II..VALI| 00385326 shorty descriptor是"V"
- return_type_idx_ = 0x0e58
0x0219e4 + 0x0e58 * 4 = 0x25344 $ hexdump -C -s 0x25344 -n 4 classes.dex 00025344 18 3e 00 00 |.>..| 00025348 0x70 + 0x3e18 * 4 = 0xf8d0 返回类型也是"V"
- pad_ = 0x0000
- parameters_off_ = 0x00
 
- shorty_idx_ = 0x3e18
- name_idx_ = 0x07e8
0x000070 + 0x07e8 * 4 = 0x2010 $ hexdump -C -s 0x2010 -n 8 classes.dex 00002010 d2 24 32 00 da 24 32 00 |.$2..$2.| 00002018 $ hexdump -C -s 0x3224d2 -n 40 classes.dex 003224d2 06 3c 69 6e 69 74 3e 00 07 3c 69 6e 69 74 3e 20 |.<init>..<init> | 003224e2 00 19 3c 69 6e 69 74 69 61 6c 69 7a 65 64 20 66 |..<initialized f| 003224f2 6f 72 20 73 69 67 6e 69 |or signi| 003224fa 表示当前direct method是"<init>"
 
- class_idx_ = 0xe9
- access_flags_
method_.access_flags_ = (0x04<<14)|((0x80&0x7f)<<7)|(0x81&0x7f) = 0x10001 表示此method是pulibc constructor
- code_off_
- 查看CodeItem
method_.code_off_ = (0xf4&0x7f)|((0x91&0x7f) <<7)|((0x39&0x7f)<<14) = 0xe48f4 $ hexdump -C -s 0xe48f4 -n 40 classes.dex 000e48f4 02 00 01 00 02 00 00 00 ee 58 3d 00 05 00 00 00 |.........X=.....| 000e4904 12 00 70 20 14 09 01 00 0e 00 00 00 02 00 00 00 |..p ............| 000e4914 01 00 01 00 f5 58 3d 00 |.....X=.| 000e491c- registers_size_ = 0x02
- ins_size_ = 0x01
- outs_size_ = 0x02
- tries_size_ = 0x00
- debug_info_off_ = 0x3d58ee
- debug_info信息解析参考DexFile::DecodeDebugPositionInfo()
$ hexdump -C -s 0x3d58ee -n 7 classes.dex 003d58ee c6 0a 00 07 0e 4b 00 |.....K.| 003d58f5 0x0000处dex指令对应的line为(0xc6&0x7f)|(0x0a<<7) = 1350 0x0004处dex指令对应的line为 1350 + 1 = 1351
 
- debug_info信息解析参考DexFile::DecodeDebugPositionInfo()
- insns_size_in_code_units_ = 0x05
- 每个code unit是2个byte
- 每个dex指令可由多个code unit组成
- oat dump
200: Ljava/lang/BootClassLoader; (offset=0x0000ea9c) (type_idx=233) (StatusInitialized) (OatClassAllCompiled) 0: void java.lang.BootClassLoader.<init>() (dex_method_idx=2007) DEX CODE: 0x0000: 1200 | const/4 v0, #+0 0x0001: 7020 1409 0100 | invoke-direct {v1, v0}, void java.lang.ClassLoader.<init>(java.lang.ClassLoader) // method@2324 0x0004: 0e00 | return-void OatMethodOffsets (offset=0x0000eaa0)
 
 
 
- 查看CodeItem
DexFile
- 
DexFile对象 这里仍以core-oj.jar为例 $3 = (art::DexFile) { _vptr$DexFile = 0x74da3dd1f0 <vtable for art::DexFile+16>, static kDefaultMethodsVersion = 0x25, static kClassDefinitionOrderEnforcedVersion = 0x25, static kDexMagic = 0x74da36f654 <art::DexFile::kDexMagic>, static kNumDexVersions = 0x3, static kDexVersionLen = 0x4, static kDexMagicVersions = {{0x30, 0x33, 0x35, 0x0}, {0x30, 0x33, 0x37, 0x0}, {0x30, 0x33, 0x38, 0x0}}, static kSha1DigestSize = 0x14, static kDexEndianConstant = 0x12345678, static kClassesDex = 0x74da33b0e2, static kDexNoIndex = 0xffffffff, static kDexNoIndex16 = 0xffff, static kMultiDexSeparator = 0x3a, begin_ = 0x74d991a01c, size_ = 0x4d55fc, location_ = { <std::__1::__basic_string_common<true>> = {<No data fields>}, members of std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >: __r_ = { <std::__1::__libcpp_compressed_pair_imp<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__rep, std::__1::allocator<char>, 2>> = { <std::__1::allocator<char>> = {<No data fields>}, members of std::__1::__libcpp_compressed_pair_imp<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__rep, std::__1::allocator<char>, 2>: __first_ = { { __l = { __cap_ = 0x21, __size_ = 0x1d, __data_ = 0x74da41c340 }, __s = { { __size_ = 0x21, __lx = 0x21 }, __data_ = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1d, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0xc3, 0x41, 0xda, 0x74, 0x0, 0x0, 0x0} }, __r = { __words = {0x21, 0x1d, 0x74da41c340} } } } }, <No data fields>}, static npos = 0xffffffffffffffff }, location_checksum_ = 0xce236663, mem_map_ = { __ptr_ = { <std::__1::__libcpp_compressed_pair_imp<art::MemMap*, std::__1::default_delete<art::MemMap>, 2>> = { <std::__1::default_delete<art::MemMap>> = {<No data fields>}, members of std::__1::__libcpp_compressed_pair_imp<art::MemMap*, std::__1::default_delete<art::MemMap>, 2>: __first_ = 0x0 }, <No data fields>} }, header_ = 0x74d991a01c, string_ids_ = 0x74d991a08c, type_ids_ = 0x74d993ba00, field_ids_ = 0x74d9955208, method_ids_ = 0x74d996e0b0, proto_ids_ = 0x74d993f734, class_defs_ = 0x74d99b3660, method_handles_ = 0x0, num_method_handles_ = 0x0, call_site_ids_ = 0x0, num_call_site_ids_ = 0x0, oat_dex_file_ = 0x74da438060 }
- 
解析 - vdex文件的的maps
74d991a000-74d9e28000 r--s 00000000 103:0b 1194 /system/framework/arm64/boot.vdex 74b70ea000-74b75f8000 r--p 00000000 103:0b 1194 /system/framework/arm64/boot.vdex
- vdex mmap起始地址是0x74d991a000
- header_ = 0x74d991a000 + 28 = 0x74d991a01c
- string_ids_ = 0x74d991a01c + 0x70 = 0x74d991a08c
- type_ids_ = 0x74d991a01c + 0x0219e4 = 0x74d993ba00
- 其它依次类推
 
- vdex文件的的maps


