art 是怎么把 dalvik 指令编译成 native code 第二篇(走马观花)
2016-06-20 17:00
549 查看
QuickCompiler 分析 1
起了个开头,还为分析完…分析完了直接贴上来。第二篇进行到 QuickCompiler 的分析,先说QuickCompiler是因为这是android art 御用选项。
在讲 QuickCompiler 之前,我们先来用一个实例表现一下过程:
class test{ public int foo(int a,int b) { return (a + b) * (a-b) / (a%b); } public static void main(String[] argc){ test t = new test(); System.out.println(t.foo(100,3)); } }
我们把上面这段java 代码编译成dex.然后放到手机上面运行提取出oat文件比对下 dex 和 arm code.
# dalvikvm -cp test.jar test //然后会在/data/dalvik-cache/arm/ 目录生成data@local@tmp@test.jar@classes.dex 拷贝到一份出来用oatdump
# oatdump --oat-file=ttttoat.dex oatdump --oat-file=ttttoat.dex MAGIC: oat 064 CHECKSUM: 0xdb3b1f18 INSTRUCTION SET: Thumb2 ...... OatDexFile: location: /data/local/tmp/test.jar checksum: 0x69b0fde4 0: Ltest; (offset=0x00000550) (type_idx=4) (StatusInitialized) (OatClassAllCompiled) 0: void test.<init>() (dex_method_idx=2) DEX CODE: 0x0000: 7010 0100 0000 | invoke-direct {v0}, void java.lang.Object.<init>() // method@1 0x0003: 0e00 | return-void OatMethodOffsets (offset=0x00000554) code_offset: 0x0000101d gc_map: (offset=0x00000000) OatQuickMethodHeader (offset=0x00001000) mapping_table: (offset=0x00000000) vmap_table: (offset=0x00000560) Optimized CodeInfo (size=18, number_of_dex_registers=1, number_of_stack_maps=0, has_inline_info=0, number_of_bytes_for_inline_info=0, number DexRegisterLocationCatalog (number_of_entries=0, size_in_bytes=0) QuickMethodFrameInfo frame_size_in_bytes: 0 core_spill_mask: 0x00008020 (r5, r15) fp_spill_mask: 0x00000000 vr_stack_locations: ins: v0[sp + #4] method*: v1[sp + #0] outs: v0[sp + #4] CODE: (code_offset=0x0000101d size_offset=0x00001018 size=2)... 0x0000101c: 4770 bx lr 1: void test.main(java.lang.String[]) (dex_method_idx=4) DEX CODE: 0x0000: 2200 0400 | new-instance v0, test // type@4 0x0002: 7010 0200 0000 | invoke-direct {v0}, void test.<init>() // method@2 0x0005: 6201 0000 | sget-object v1, Ljava/io/PrintStream; java.lang.System.out // field@0 0x0007: 1302 6400 | const/16 v2, #+100 0x0009: 1233 | const/4 v3, #+3 0x000a: 6e30 0300 2003 | invoke-virtual {v0, v2, v3}, int test.foo(int, int) // method@3 0x000d: 0a00 | move-result v0 0x000e: 6e20 0000 0100 | invoke-virtual {v1, v0}, void java.io.PrintStream.println(int) // method@0 0x0011: 0e00 | return-void OatMethodOffsets (offset=0x00000558) code_offset: 0x0000103d gc_map: (offset=0x00000000) OatQuickMethodHeader (offset=0x00001020) mapping_table: (offset=0x00000000) vmap_table: (offset=0x00000572) Optimized CodeInfo (size=72, number_of_dex_registers=5, number_of_stack_maps=7, has_inline_info=0, number_of_bytes_for_inline_info=0, number DexRegisterLocationCatalog (number_of_entries=5, size_in_bytes=5) entry 0: in register (5) entry 1: in register (7) entry 2: in register (6) entry 3: in register (0) entry 4: in stack (16) StackMap 0 (dex_pc=0x0, native_pc_offset=0x8, dex_register_map_offset=0xffffffff, inline_info_offset=0xffffffff, register_mask=0x0, stack_ StackMap 1 (dex_pc=0x0, native_pc_offset=0x26, dex_register_map_offset=0x0, inline_info_offset=0xffffffff, register_mask=0x20, stack_mask= v4: in register (5) [entry 0] StackMap 2 (dex_pc=0xa, native_pc_offset=0x5a, dex_register_map_offset=0x2, inline_info_offset=0xffffffff, register_mask=0xe0, stack_mask= v0: in register (7) [entry 1] v1: in register (6) [entry 2] v4: in register (5) [entry 0] StackMap 3 (dex_pc=0xe, native_pc_offset=0x60, dex_register_map_offset=0x5, inline_info_offset=0xffffffff, register_mask=0x0, stack_mask=0 v0: in register (0) [entry 3] v1: in register (6) [entry 2] v4: in register (5) [entry 0] StackMap 4 (dex_pc=0xe, native_pc_offset=0x6a, dex_register_map_offset=0x8, inline_info_offset=0xffffffff, register_mask=0x60, stack_mask= v1: in register (6) [entry 2] v4: in register (5) [entry 0] StackMap 5 (dex_pc=0x0, native_pc_offset=0x76, dex_register_map_offset=0xa, inline_info_offset=0xffffffff, register_mask=0x0, stack_mask=0 v4: in stack (16) [entry 4] StackMap 6 (dex_pc=0x5, native_pc_offset=0x86, dex_register_map_offset=0xc, inline_info_offset=0xffffffff, register_mask=0x20, stack_mask= v0: in stack (16) [entry 4] v4: in register (5) [entry 0] QuickMethodFrameInfo frame_size_in_bytes: 48 core_spill_mask: 0x000080e0 (r5, r6, r7, r15) fp_spill_mask: 0x00000000 vr_stack_locations: locals: v0[sp + #12] v1[sp + #16] v2[sp + #20] v3[sp + #24] ins: v4[sp + #52] method*: v5[sp + #0] outs: v0[sp + #4] v1[sp + #8] v2[sp + #12] CODE: (code_offset=0x0000103d size_offset=0x00001038 size=142)... 0x0000103c: f5ad5c00 sub r12, sp, #8192 0x00001040: f8dcc000 ldr.w r12, [r12, #0] 0x00001044: b5e0 push {r5, r6, r7, lr} 0x00001046: b088 sub sp, sp, #32 0x00001048: 9000 str r0, [sp, #0] 0x0000104a: f8b9c000 ldrh.w r12, [r9, #0] ; state_and_flags 0x0000104e: f1bc0f00 cmp.w r12, #0 0x00001052: f040802a bne.w +84 (0x000010aa) 0x00001056: 000d lsls r5, r1, #0 0x00001058: 9900 ldr r1, [sp, #0] 0x0000105a: 2004 movs r0, #4 0x0000105c: f8d9e114 ldr.w lr, [r9, #276] ; pAllocObject 0x00001060: 47f0 blx lr 0x00001062: 9900 ldr r1, [sp, #0] 0x00001064: 6889 ldr r1, [r1, #8] 0x00001066: 6989 ldr r1, [r1, #24] 0x00001068: 2900 cmp r1, #0 0x0000106a: f0008025 beq.w +74 (0x000010b8) 0x0000106e: f8d1c084 ldr.w r12, [r1, #132] 0x00001072: f1bc0f0a cmp.w r12, #10 0x00001076: f2c0801f blt.w +62 (0x000010b8) 0x0000107a: f3bf8f5b dmb ish 0x0000107e: f8d161d0 ldr.w r6, [r1, #464] 0x00001082: 0001 lsls r1, r0, #0 0x00001084: 0007 lsls r7, r0, #0 0x00001086: 2264 movs r2, #100 0x00001088: 2303 movs r3, #3 0x0000108a: 9800 ldr r0, [sp, #0] 0x0000108c: 6840 ldr r0, [r0, #4] 0x0000108e: 6980 ldr r0, [r0, #24] 0x00001090: f8d0e024 ldr.w lr, [r0, #36] 0x00001094: 47f0 blx lr 0x00001096: 0031 lsls r1, r6, #0 0x00001098: 0002 lsls r2, r0, #0 0x0000109a: 6808 ldr r0, [r1, #0] 0x0000109c: f8d00230 ldr.w r0, [r0, #560] 0x000010a0: f8d0e024 ldr.w lr, [r0, #36] 0x000010a4: 47f0 blx lr 0x000010a6: b008 add sp, sp, #32 0x000010a8: bde0 pop {r5, r6, r7, pc} 0x000010aa: 9104 str r1, [sp, #16] 0x000010ac: f8d9e25c ldr.w lr, [r9, #604] ; pTestSuspend 0x000010b0: 47f0 blx lr 0x000010b2: 9904 ldr r1, [sp, #16] 0x000010b4: f7ffbfcf b.w -98 (0x00001056) 0x000010b8: 9004 str r0, [sp, #16] 0x000010ba: 2003 movs r0, #3 0x000010bc: f8d9e140 ldr.w lr, [r9, #320] ; pInitializeStaticStorage 0x000010c0: 47f0 blx lr 0x000010c2: 0001 lsls r1, r0, #0 0x000010c4: 9804 ldr r0, [sp, #16] 0x000010c6: f7ffbfda b.w -76 (0x0000107e) 2: int test.foo(int, int) (dex_method_idx=3) //foo 函数的 dex 代码 DEX CODE: 0x0000: 9000 0304 | add-int v0, v3, v4 0x0002: 9101 0304 | sub-int v1, v3, v4 0x0004: b210 | mul-int/2addr v0, v1 0x0005: 9401 0304 | rem-int v1, v3, v4 0x0007: b310 | div-int/2addr v0, v1 0x0008: 0f00 | return v0 OatMethodOffsets (offset=0x0000055c) code_offset: 0x000010ed gc_map: (offset=0x00000000) OatQuickMethodHeader (offset=0x000010d0) mapping_table: (offset=0x00000000) vmap_table: (offset=0x000005ba) Optimized CodeInfo (size=32, number_of_dex_registers=5, number_of_stack_maps=2, has_inline_info=0, number_of_bytes_for_inline_info=0, number DexRegisterLocationCatalog (number_of_entries=4, size_in_bytes=4) entry 0: in register (0) entry 1: in register (1) entry 2: in register (2) entry 3: in register (3) StackMap 0 (dex_pc=0x5, native_pc_offset=0x32, dex_register_map_offset=0x0, inline_info_offset=0xffffffff, register_mask=0x0, stack_mask=0 v0: in register (0) [entry 0] v2: in register (1) [entry 1] v3: in register (2) [entry 2] v4: in register (3) [entry 3] StackMap 1 (dex_pc=0x7, native_pc_offset=0x38, dex_register_map_offset=0x2, inline_info_offset=0xffffffff, register_mask=0x0, stack_mask=0 v0: in register (0) [entry 0] v1: in register (2) [entry 2] v2: in register (1) [entry 1] QuickMethodFrameInfo frame_size_in_bytes: 16 core_spill_mask: 0x00008020 (r5, r15) fp_spill_mask: 0x00000000 vr_stack_locations: locals: v0[sp + #4294967292] v1[sp + #0] ins: v2[sp + #20] v3[sp + #24] v4[sp + #28] method*: v5[sp + #0] CODE: (code_offset=0x000010ed size_offset=0x000010e8 size=56)... //这里是oat 为我们生成出来的int foo(int a,int b) 方法的arm code 0x000010ec: b520 push {r5, lr} 0x000010ee: b082 sub sp, sp, #8 0x000010f0: 9000 str r0, [sp, #0] 0x000010f2: 18d0 adds r0, r2, r3 0x000010f4: 1ad4 subs r4, r2, r3 0x000010f6: fb00f004 mul r0, r0, r4 0x000010fa: 2b00 cmp r3, #0 0x000010fc: f000800c beq.w +24 (0x00001118) 0x00001100: fb92f4f3 sdiv r4, r2, r3 0x00001104: fb04f403 mul r4, r4, r3 0x00001108: 1b12 subs r2, r2, r4 0x0000110a: 2a00 cmp r2, #0 0x0000110c: f0008007 beq.w +14 (0x0000111e) 0x00001110: fb90f0f2 sdiv r0, r0, r2 0x00001114: b002 add sp, sp, #8 0x00001116: bd20 pop {r5, pc} 0x00001118: f8d9e268 ldr.w lr, [r9, #616] ; pThrowDivZero 0x0000111c: 47f0 blx lr 0x0000111e: f8d9e268 ldr.w lr, [r9, #616] ; pThrowDivZero 0x00001122: 47f0 blx lr
//下面开始分析代码
其实android 的代码感觉一直都很乱,我这份看的是 android-5.1.0_r3 的art 代码,我们可以看到 QuickCompiler 放在了一个名为 compilers.h 里面.父类 Compiler 放在一个 compiler.h 里面.
class QuickCompiler : public Compiler { public: explicit QuickCompiler(CompilerDriver* driver) : Compiler(driver, 100) {} void Init() const OVERRIDE; void UnInit() const OVERRIDE; CompiledMethod* Compile(const DexFile::CodeItem* code_item, uint32_t access_flags, InvokeType invoke_type, uint16_t class_def_idx, uint32_t method_idx, jobject class_loader, const DexFile& dex_file) const OVERRIDE; CompiledMethod* JniCompile(uint32_t access_flags, uint32_t method_idx, const DexFile& dex_file) const OVERRIDE; uintptr_t GetEntryPointOf(mirror::ArtMethod* method) const OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); bool WriteElf(art::File* file, OatWriter* oat_writer, const std::vector<const art::DexFile*>& dex_files, const std::string& android_root, bool is_host) const OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); Backend* GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) const OVERRIDE; void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE {} /* * @brief Generate and return Dwarf CFI initialization, if supported by the * backend. * @param driver CompilerDriver for this compile. * @returns nullptr if not supported by backend or a vector of bytes for CFI DWARF * information. * @note This is used for backtrace information in generated code. */ std::vector<uint8_t>* GetCallFrameInformationInitialization(const CompilerDriver& driver) const OVERRIDE; private: DISALLOW_COPY_AND_ASSIGN(QuickCompiler); };
QuickCompiler 继承自 Compiler ,我们先分析 完成dalvik 指令编译的 Compile 函数。
CompiledMethod* QuickCompiler::Compile(const DexFile::CodeItem* code_item, uint32_t access_flags, InvokeType invoke_type, uint16_t class_def_idx, uint32_t method_idx, jobject class_loader, const DexFile& dex_file) const { CompiledMethod* method = TryCompileWithSeaIR(code_item, access_flags, invoke_type, class_def_idx, method_idx, class_loader, dex_file); if (method != nullptr) { return method; } return ArtQuickCompileMethod(GetCompilerDriver(), code_item, access_flags, invoke_type, class_def_idx, method_idx, class_loader, dex_file); }
QuickCompiler::Compile 调用 ArtQuickCompileMethod 然后进入到 Frontend.cc。里面就是一个大杂烩啊。
ArtQuickCompileMethod -> CompileOneMethod -> CompileMethod
在 CompileMethod 里面,const DexFile::CodeItem* code_item 参数就是dex 文件格式的 CodeItem (写完这篇会把 dex 格式分析一遍,以前没做笔记,只自己实现了一份 dexdump ,借这个机会再熟悉一次),先判断 code_item->insns_size_in_code_units_ , code_item->registers_size_ 是否超出编译界限。
CompilationUnit cu(driver.GetArenaPool());
创建一个编译体,存放所有编译相关的东西。
cu.mir_graph.reset(new MIRGraph(&cu, &cu.arena));
创建MIR Graph对象
cu.cg.reset(compiler->GetCodeGenerator(&cu, llvm_compilation_unit));
根据 cu->instruction_set 创建一个 Mir2Lir
cu.mir_graph->InlineMethod(code_item, access_flags, invoke_type, class_def_idx, method_idx, class_loader, dex_file);
负责 Dex Method 到 MIRGraph 的转换,这个过程就是我们比较关心的。函数里面解析Methed 里面所有的指令,cur_block->AppendMIR(insn); 存放如到一个 BasicBlock。那么就说每个BasicBlock 中存放着多个 MIR *insn。
那么具体多少条MIR 我们去做一个BasicBlock呢,下么我们具体说一下,在二进制动态翻译的时候,比如vmwar(没有硬件 vt虚拟化加持的时候 ),还有qemu 模拟器里面都会有 代码基本块的划分。所用基本块划分的技术也大致相同,程序是有很多数据操作指令和流程处理指令衔接在一起的,基本都是按照分支来划分基本块,这样有一个好处,就是翻译的代码能有效的控制,也方便管理每一个基本块。具体代码可以看InlineMethod: 中的 while (code_ptr < code_end) 循环
{ MIR *insn = NewMIR(); insn->offset = current_offset_; insn->m_unit_index = current_method_; int width = ParseInsn(code_ptr, &insn->dalvikInsn); Instruction::Code opcode = insn->dalvikInsn.opcode; if (opcode_count_ != NULL) { opcode_count_[static_cast<int>(opcode)]++; } int flags = Instruction::FlagsOf(insn->dalvikInsn.opcode); int verify_flags = Instruction::VerifyFlagsOf(insn->dalvikInsn.opcode); uint64_t df_flags = GetDataFlowAttributes(insn); merged_df_flags |= df_flags; if (df_flags & DF_HAS_DEFS) { def_count_ += (df_flags & DF_A_WIDE) ? 2 : 1; } if (df_flags & DF_LVN) { cur_block->use_lvn = true; // Run local value numbering on this basic block. } // Check for inline data block signatures. if (opcode == Instruction::NOP) { // A simple NOP will have a width of 1 at this point, embedded data NOP > 1. if ((width == 1) && ((current_offset_ & 0x1) == 0x1) && ((code_end - code_ptr) > 1)) { // Could be an aligning nop. If an embedded data NOP follows, treat pair as single unit. uint16_t following_raw_instruction = code_ptr[1]; if ((following_raw_instruction == Instruction::kSparseSwitchSignature) || (following_raw_instruction == Instruction::kPackedSwitchSignature) || (following_raw_instruction == Instruction::kArrayDataSignature)) { width += Instruction::At(code_ptr + 1)->SizeInCodeUnits(); } } if (width == 1) { // It is a simple nop - treat normally. cur_block->AppendMIR(insn); } else { DCHECK(cur_block->fall_through == NullBasicBlockId); DCHECK(cur_block->taken == NullBasicBlockId); // Unreachable instruction, mark for no continuation and end basic block. flags &= ~Instruction::kContinue; FindBlock(current_offset_ + width, /* split */ false, /* create */ true, /* immed_pred_block_p */ NULL); } } else { cur_block->AppendMIR(insn); } // Associate the starting dex_pc for this opcode with its containing basic block. dex_pc_to_block_map_.Put(insn->offset, cur_block->id); code_ptr += width; if (flags & Instruction::kBranch) { cur_block = ProcessCanBranch(cur_block, insn, current_offset_, width, flags, code_ptr, code_end); } else if (flags & Instruction::kReturn) { cur_block->terminated_by_return = true; cur_block->fall_through = exit_block_->id; exit_block_->predecessors->Insert(cur_block->id); /* * Terminate the current block if there are instructions * afterwards. */ if (code_ptr < code_end) { /* * Create a fallthrough block for real instructions * (incl. NOP). */ FindBlock(current_offset_ + width, /* split */ false, /* create */ true, /* immed_pred_block_p */ NULL); } } else if (flags & Instruction::kThrow) { cur_block = ProcessCanThrow(cur_block, insn, current_offset_, width, flags, try_block_addr_, code_ptr, code_end); } else if (flags & Instruction::kSwitch) { cur_block = ProcessCanSwitch(cur_block, insn, current_offset_, width, flags); } if (verify_flags & Instruction::kVerifyVarArgRange || verify_flags & Instruction::kVerifyVarArgRangeNonZero) { /* * The Quick backend's runtime model includes a gap between a method's * argument ("in") vregs and the rest of its vregs. Handling a range instruction * which spans the gap is somewhat complicated, and should not happen * in normal usage of dx. Punt to the interpreter. */ int first_reg_in_range = insn->dalvikInsn.vC; int last_reg_in_range = first_reg_in_range + insn->dalvikInsn.vA - 1; if (IsInVReg(first_reg_in_range) != IsInVReg(last_reg_in_range)) { punt_to_interpreter_ = true; } } current_offset_ += width; BasicBlock* next_block = FindBlock(current_offset_, /* split */ false, /* create */ false, /* immed_pred_block_p */ NULL); if (next_block) { /* * The next instruction could be the target of a previously parsed * forward branch so a block is already created. If the current * instruction is not an unconditional branch, connect them through * the fall-through link. */ DCHECK(cur_block->fall_through == NullBasicBlockId || GetBasicBlock(cur_block->fall_through) == next_block || GetBasicBlock(cur_block->fall_through) == exit_block_); if ((cur_block->fall_through == NullBasicBlockId) && (flags & Instruction::kContinue)) { cur_block->fall_through = next_block->id; next_block->predecessors->Insert(cur_block->id); } cur_block = next_block; } }
CanCompileMethod
遍历所以 BasicBlock 中的MIR 判断是否有支持的
相关文章推荐
- 使用C++实现JNI接口需要注意的事项
- Android IPC进程间通讯机制
- Android Manifest 用法
- [转载]Activity中ConfigChanges属性的用法
- Android之获取手机上的图片和视频缩略图thumbnails
- Android之使用Http协议实现文件上传功能
- Android学习笔记(二九):嵌入浏览器
- android string.xml文件中的整型和string型代替
- i-jetty环境搭配与编译
- android之定时器AlarmManager
- android wifi 无线调试
- Android Native 绘图方法
- Android java 与 javascript互访(相互调用)的方法例子
- android 代码实现控件之间的间距
- android FragmentPagerAdapter的“标准”配置
- Android"解决"onTouch和onClick的冲突问题
- android:installLocation简析
- android searchView的关闭事件
- SourceProvider.getJniDirectories