您的位置:首页 > 移动开发 > Android开发

art 是怎么把 dalvik 指令编译成 native code 第二篇(走马观花)

2016-06-20 17:00 549 查看

QuickCompiler 分析 1

起了个开头,还为分析完…分析完了直接贴上来。

第二篇进行到 QuickCompiler 的分析,先说QuickCompiler是因为这是android art 御用选项。

在讲 QuickCompiler 之前,我们先来用一个实例表现一下过程:

class test{

public
int foo(int a,int b)
{
return (a + b) * (a-b) / (a%b);
}

public static void main(String[] argc){

test t = new test();

System.out.println(t.foo(100,3));

}
}


我们把上面这段java 代码编译成dex.然后放到手机上面运行提取出oat文件比对下 dex 和 arm code.

# dalvikvm -cp test.jar test
//然后会在/data/dalvik-cache/arm/ 目录生成data@local@tmp@test.jar@classes.dex 拷贝到一份出来用oatdump


# oatdump --oat-file=ttttoat.dex
oatdump --oat-file=ttttoat.dex
MAGIC:
oat
064

CHECKSUM:
0xdb3b1f18

INSTRUCTION SET:
Thumb2

......

OatDexFile:
location: /data/local/tmp/test.jar
checksum: 0x69b0fde4
0: Ltest; (offset=0x00000550) (type_idx=4) (StatusInitialized) (OatClassAllCompiled)
0: void test.<init>() (dex_method_idx=2)
DEX CODE:
0x0000: 7010 0100 0000            | invoke-direct {v0}, void java.lang.Object.<init>() // method@1
0x0003: 0e00                      | return-void
OatMethodOffsets (offset=0x00000554)
code_offset: 0x0000101d
gc_map: (offset=0x00000000)
OatQuickMethodHeader (offset=0x00001000)
mapping_table: (offset=0x00000000)
vmap_table: (offset=0x00000560)
Optimized CodeInfo (size=18, number_of_dex_registers=1, number_of_stack_maps=0, has_inline_info=0, number_of_bytes_for_inline_info=0, number
DexRegisterLocationCatalog (number_of_entries=0, size_in_bytes=0)
QuickMethodFrameInfo
frame_size_in_bytes: 0
core_spill_mask: 0x00008020 (r5, r15)
fp_spill_mask: 0x00000000
vr_stack_locations:
ins: v0[sp + #4]
method*: v1[sp + #0]
outs: v0[sp + #4]
CODE: (code_offset=0x0000101d size_offset=0x00001018 size=2)...
0x0000101c: 4770          bx      lr
1: void test.main(java.lang.String[]) (dex_method_idx=4)
DEX CODE:
0x0000: 2200 0400                 | new-instance v0, test // type@4
0x0002: 7010 0200 0000            | invoke-direct {v0}, void test.<init>() // method@2
0x0005: 6201 0000                 | sget-object  v1, Ljava/io/PrintStream; java.lang.System.out // field@0
0x0007: 1302 6400                 | const/16 v2, #+100
0x0009: 1233                      | const/4 v3, #+3
0x000a: 6e30 0300 2003            | invoke-virtual {v0, v2, v3}, int test.foo(int, int) // method@3
0x000d: 0a00                      | move-result v0
0x000e: 6e20 0000 0100            | invoke-virtual {v1, v0}, void java.io.PrintStream.println(int) // method@0
0x0011: 0e00                      | return-void
OatMethodOffsets (offset=0x00000558)
code_offset: 0x0000103d
gc_map: (offset=0x00000000)
OatQuickMethodHeader (offset=0x00001020)
mapping_table: (offset=0x00000000)
vmap_table: (offset=0x00000572)
Optimized CodeInfo (size=72, number_of_dex_registers=5, number_of_stack_maps=7, has_inline_info=0, number_of_bytes_for_inline_info=0, number
DexRegisterLocationCatalog (number_of_entries=5, size_in_bytes=5)
entry 0: in register (5)
entry 1: in register (7)
entry 2: in register (6)
entry 3: in register (0)
entry 4: in stack (16)
StackMap 0 (dex_pc=0x0, native_pc_offset=0x8, dex_register_map_offset=0xffffffff, inline_info_offset=0xffffffff, register_mask=0x0, stack_
StackMap 1 (dex_pc=0x0, native_pc_offset=0x26, dex_register_map_offset=0x0, inline_info_offset=0xffffffff, register_mask=0x20, stack_mask=
v4: in register (5) [entry 0]
StackMap 2 (dex_pc=0xa, native_pc_offset=0x5a, dex_register_map_offset=0x2, inline_info_offset=0xffffffff, register_mask=0xe0, stack_mask=
v0: in register (7) [entry 1]
v1: in register (6) [entry 2]
v4: in register (5) [entry 0]
StackMap 3 (dex_pc=0xe, native_pc_offset=0x60, dex_register_map_offset=0x5, inline_info_offset=0xffffffff, register_mask=0x0, stack_mask=0
v0: in register (0) [entry 3]
v1: in register (6) [entry 2]
v4: in register (5) [entry 0]
StackMap 4 (dex_pc=0xe, native_pc_offset=0x6a, dex_register_map_offset=0x8, inline_info_offset=0xffffffff, register_mask=0x60, stack_mask=
v1: in register (6) [entry 2]
v4: in register (5) [entry 0]
StackMap 5 (dex_pc=0x0, native_pc_offset=0x76, dex_register_map_offset=0xa, inline_info_offset=0xffffffff, register_mask=0x0, stack_mask=0
v4: in stack (16)   [entry 4]
StackMap 6 (dex_pc=0x5, native_pc_offset=0x86, dex_register_map_offset=0xc, inline_info_offset=0xffffffff, register_mask=0x20, stack_mask=
v0: in stack (16)   [entry 4]
v4: in register (5) [entry 0]
QuickMethodFrameInfo
frame_size_in_bytes: 48
core_spill_mask: 0x000080e0 (r5, r6, r7, r15)
fp_spill_mask: 0x00000000
vr_stack_locations:
locals: v0[sp + #12] v1[sp + #16] v2[sp + #20] v3[sp + #24]
ins: v4[sp + #52]
method*: v5[sp + #0]
outs: v0[sp + #4] v1[sp + #8] v2[sp + #12]
CODE: (code_offset=0x0000103d size_offset=0x00001038 size=142)...
0x0000103c: f5ad5c00      sub     r12, sp, #8192
0x00001040: f8dcc000      ldr.w   r12, [r12, #0]
0x00001044: b5e0          push    {r5, r6, r7, lr}
0x00001046: b088          sub     sp, sp, #32
0x00001048: 9000          str     r0, [sp, #0]
0x0000104a: f8b9c000      ldrh.w  r12, [r9, #0]  ; state_and_flags
0x0000104e: f1bc0f00      cmp.w   r12, #0
0x00001052: f040802a      bne.w   +84 (0x000010aa)
0x00001056: 000d          lsls    r5, r1, #0
0x00001058: 9900          ldr     r1, [sp, #0]
0x0000105a: 2004          movs    r0, #4
0x0000105c: f8d9e114      ldr.w   lr, [r9, #276]  ; pAllocObject
0x00001060: 47f0          blx     lr
0x00001062: 9900          ldr     r1, [sp, #0]
0x00001064: 6889          ldr     r1, [r1, #8]
0x00001066: 6989          ldr     r1, [r1, #24]
0x00001068: 2900          cmp     r1, #0
0x0000106a: f0008025      beq.w   +74 (0x000010b8)
0x0000106e: f8d1c084      ldr.w   r12, [r1, #132]
0x00001072: f1bc0f0a      cmp.w   r12, #10
0x00001076: f2c0801f      blt.w   +62 (0x000010b8)
0x0000107a: f3bf8f5b      dmb     ish
0x0000107e: f8d161d0      ldr.w   r6, [r1, #464]
0x00001082: 0001          lsls    r1, r0, #0
0x00001084: 0007          lsls    r7, r0, #0
0x00001086: 2264          movs    r2, #100
0x00001088: 2303          movs    r3, #3
0x0000108a: 9800          ldr     r0, [sp, #0]
0x0000108c: 6840          ldr     r0, [r0, #4]
0x0000108e: 6980          ldr     r0, [r0, #24]
0x00001090: f8d0e024      ldr.w   lr, [r0, #36]
0x00001094: 47f0          blx     lr
0x00001096: 0031          lsls    r1, r6, #0
0x00001098: 0002          lsls    r2, r0, #0
0x0000109a: 6808          ldr     r0, [r1, #0]
0x0000109c: f8d00230      ldr.w   r0, [r0, #560]
0x000010a0: f8d0e024      ldr.w   lr, [r0, #36]
0x000010a4: 47f0          blx     lr
0x000010a6: b008          add     sp, sp, #32
0x000010a8: bde0          pop     {r5, r6, r7, pc}
0x000010aa: 9104          str     r1, [sp, #16]
0x000010ac: f8d9e25c      ldr.w   lr, [r9, #604]  ; pTestSuspend
0x000010b0: 47f0          blx     lr
0x000010b2: 9904          ldr     r1, [sp, #16]
0x000010b4: f7ffbfcf      b.w     -98 (0x00001056)
0x000010b8: 9004          str     r0, [sp, #16]
0x000010ba: 2003          movs    r0, #3
0x000010bc: f8d9e140      ldr.w   lr, [r9, #320]  ; pInitializeStaticStorage
0x000010c0: 47f0          blx     lr
0x000010c2: 0001          lsls    r1, r0, #0
0x000010c4: 9804          ldr     r0, [sp, #16]
0x000010c6: f7ffbfda      b.w     -76 (0x0000107e)
2: int test.foo(int, int) (dex_method_idx=3)
//foo 函数的 dex 代码
DEX CODE:
0x0000: 9000 0304                 | add-int v0, v3, v4
0x0002: 9101 0304                 | sub-int v1, v3, v4
0x0004: b210                      | mul-int/2addr v0, v1
0x0005: 9401 0304                 | rem-int v1, v3, v4
0x0007: b310                      | div-int/2addr v0, v1
0x0008: 0f00                      | return v0
OatMethodOffsets (offset=0x0000055c)
code_offset: 0x000010ed
gc_map: (offset=0x00000000)
OatQuickMethodHeader (offset=0x000010d0)
mapping_table: (offset=0x00000000)
vmap_table: (offset=0x000005ba)
Optimized CodeInfo (size=32, number_of_dex_registers=5, number_of_stack_maps=2, has_inline_info=0, number_of_bytes_for_inline_info=0, number
DexRegisterLocationCatalog (number_of_entries=4, size_in_bytes=4)
entry 0: in register (0)
entry 1: in register (1)
entry 2: in register (2)
entry 3: in register (3)
StackMap 0 (dex_pc=0x5, native_pc_offset=0x32, dex_register_map_offset=0x0, inline_info_offset=0xffffffff, register_mask=0x0, stack_mask=0
v0: in register (0) [entry 0]
v2: in register (1) [entry 1]
v3: in register (2) [entry 2]
v4: in register (3) [entry 3]
StackMap 1 (dex_pc=0x7, native_pc_offset=0x38, dex_register_map_offset=0x2, inline_info_offset=0xffffffff, register_mask=0x0, stack_mask=0
v0: in register (0) [entry 0]
v1: in register (2) [entry 2]
v2: in register (1) [entry 1]
QuickMethodFrameInfo
frame_size_in_bytes: 16
core_spill_mask: 0x00008020 (r5, r15)
fp_spill_mask: 0x00000000
vr_stack_locations:
locals: v0[sp + #4294967292] v1[sp + #0]
ins: v2[sp + #20] v3[sp + #24] v4[sp + #28]
method*: v5[sp + #0]
CODE: (code_offset=0x000010ed size_offset=0x000010e8 size=56)...
//这里是oat 为我们生成出来的int foo(int a,int b) 方法的arm code
0x000010ec: b520          push    {r5, lr}
0x000010ee: b082          sub     sp, sp, #8
0x000010f0: 9000          str     r0, [sp, #0]
0x000010f2: 18d0          adds    r0, r2, r3
0x000010f4: 1ad4          subs    r4, r2, r3
0x000010f6: fb00f004      mul     r0, r0, r4
0x000010fa: 2b00          cmp     r3, #0
0x000010fc: f000800c      beq.w   +24 (0x00001118)
0x00001100: fb92f4f3      sdiv    r4, r2, r3
0x00001104: fb04f403      mul     r4, r4, r3
0x00001108: 1b12          subs    r2, r2, r4
0x0000110a: 2a00          cmp     r2, #0
0x0000110c: f0008007      beq.w   +14 (0x0000111e)
0x00001110: fb90f0f2      sdiv    r0, r0, r2
0x00001114: b002          add     sp, sp, #8
0x00001116: bd20          pop     {r5, pc}
0x00001118: f8d9e268      ldr.w   lr, [r9, #616]  ; pThrowDivZero
0x0000111c: 47f0          blx     lr
0x0000111e: f8d9e268      ldr.w   lr, [r9, #616]  ; pThrowDivZero
0x00001122: 47f0          blx     lr


//下面开始分析代码

其实android 的代码感觉一直都很乱,我这份看的是 android-5.1.0_r3 的art 代码,我们可以看到 QuickCompiler 放在了一个名为 compilers.h 里面.父类 Compiler 放在一个 compiler.h 里面.

class QuickCompiler : public Compiler {
public:
explicit QuickCompiler(CompilerDriver* driver) : Compiler(driver, 100) {}

void Init() const OVERRIDE;

void UnInit() const OVERRIDE;

CompiledMethod* Compile(const DexFile::CodeItem* code_item,
uint32_t access_flags,
InvokeType invoke_type,
uint16_t class_def_idx,
uint32_t method_idx,
jobject class_loader,
const DexFile& dex_file) const OVERRIDE;

CompiledMethod* JniCompile(uint32_t access_flags,
uint32_t method_idx,
const DexFile& dex_file) const OVERRIDE;

uintptr_t GetEntryPointOf(mirror::ArtMethod* method) const OVERRIDE
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);

bool WriteElf(art::File* file,
OatWriter* oat_writer,
const std::vector<const art::DexFile*>& dex_files,
const std::string& android_root,
bool is_host) const
OVERRIDE
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);

Backend* GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) const OVERRIDE;

void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE {}

/*
* @brief Generate and return Dwarf CFI initialization, if supported by the
* backend.
* @param driver CompilerDriver for this compile.
* @returns nullptr if not supported by backend or a vector of bytes for CFI DWARF
* information.
* @note This is used for backtrace information in generated code.
*/
std::vector<uint8_t>* GetCallFrameInformationInitialization(const CompilerDriver& driver) const
OVERRIDE;

private:
DISALLOW_COPY_AND_ASSIGN(QuickCompiler);
};


QuickCompiler 继承自 Compiler ,我们先分析 完成dalvik 指令编译的 Compile 函数。

CompiledMethod* QuickCompiler::Compile(const DexFile::CodeItem* code_item,
uint32_t access_flags,
InvokeType invoke_type,
uint16_t class_def_idx,
uint32_t method_idx,
jobject class_loader,
const DexFile& dex_file) const {
CompiledMethod* method = TryCompileWithSeaIR(code_item,
access_flags,
invoke_type,
class_def_idx,
method_idx,
class_loader,
dex_file);
if (method != nullptr) {
return method;
}

return ArtQuickCompileMethod(GetCompilerDriver(),
code_item,
access_flags,
invoke_type,
class_def_idx,
method_idx,
class_loader,
dex_file);
}


QuickCompiler::Compile 调用 ArtQuickCompileMethod 然后进入到 Frontend.cc。里面就是一个大杂烩啊。

ArtQuickCompileMethod -> CompileOneMethod -> CompileMethod


在 CompileMethod 里面,const DexFile::CodeItem* code_item 参数就是dex 文件格式的 CodeItem (写完这篇会把 dex 格式分析一遍,以前没做笔记,只自己实现了一份 dexdump ,借这个机会再熟悉一次),先判断 code_item->insns_size_in_code_units_ , code_item->registers_size_ 是否超出编译界限。

CompilationUnit cu(driver.GetArenaPool());

创建一个编译体,存放所有编译相关的东西。

cu.mir_graph.reset(new MIRGraph(&cu, &cu.arena));

创建MIR Graph对象

cu.cg.reset(compiler->GetCodeGenerator(&cu, llvm_compilation_unit));

根据 cu->instruction_set 创建一个 Mir2Lir

cu.mir_graph->InlineMethod(code_item, access_flags, invoke_type, class_def_idx, method_idx, class_loader, dex_file);

负责 Dex Method 到 MIRGraph 的转换,这个过程就是我们比较关心的。函数里面解析Methed 里面所有的指令,cur_block->AppendMIR(insn); 存放如到一个 BasicBlock。那么就说每个BasicBlock 中存放着多个 MIR *insn。

那么具体多少条MIR 我们去做一个BasicBlock呢,下么我们具体说一下,在二进制动态翻译的时候,比如vmwar(没有硬件 vt虚拟化加持的时候 ),还有qemu 模拟器里面都会有 代码基本块的划分。所用基本块划分的技术也大致相同,程序是有很多数据操作指令和流程处理指令衔接在一起的,基本都是按照分支来划分基本块,这样有一个好处,就是翻译的代码能有效的控制,也方便管理每一个基本块。具体代码可以看InlineMethod: 中的 while (code_ptr < code_end) 循环

{
MIR *insn = NewMIR();
insn->offset = current_offset_;
insn->m_unit_index = current_method_;
int width = ParseInsn(code_ptr, &insn->dalvikInsn);
Instruction::Code opcode = insn->dalvikInsn.opcode;
if (opcode_count_ != NULL) {
opcode_count_[static_cast<int>(opcode)]++;
}

int flags = Instruction::FlagsOf(insn->dalvikInsn.opcode);
int verify_flags = Instruction::VerifyFlagsOf(insn->dalvikInsn.opcode);

uint64_t df_flags = GetDataFlowAttributes(insn);
merged_df_flags |= df_flags;

if (df_flags & DF_HAS_DEFS) {
def_count_ += (df_flags & DF_A_WIDE) ? 2 : 1;
}

if (df_flags & DF_LVN) {
cur_block->use_lvn = true;  // Run local value numbering on this basic block.
}

// Check for inline data block signatures.
if (opcode == Instruction::NOP) {
// A simple NOP will have a width of 1 at this point, embedded data NOP > 1.
if ((width == 1) && ((current_offset_ & 0x1) == 0x1) && ((code_end - code_ptr) > 1)) {
// Could be an aligning nop.  If an embedded data NOP follows, treat pair as single unit.
uint16_t following_raw_instruction = code_ptr[1];
if ((following_raw_instruction == Instruction::kSparseSwitchSignature) ||
(following_raw_instruction == Instruction::kPackedSwitchSignature) ||
(following_raw_instruction == Instruction::kArrayDataSignature)) {
width += Instruction::At(code_ptr + 1)->SizeInCodeUnits();
}
}
if (width == 1) {
// It is a simple nop - treat normally.
cur_block->AppendMIR(insn);
} else {
DCHECK(cur_block->fall_through == NullBasicBlockId);
DCHECK(cur_block->taken == NullBasicBlockId);
// Unreachable instruction, mark for no continuation and end basic block.
flags &= ~Instruction::kContinue;
FindBlock(current_offset_ + width, /* split */ false, /* create */ true,
/* immed_pred_block_p */ NULL);
}
} else {
cur_block->AppendMIR(insn);
}

// Associate the starting dex_pc for this opcode with its containing basic block.
dex_pc_to_block_map_.Put(insn->offset, cur_block->id);

code_ptr += width;

if (flags & Instruction::kBranch) {
cur_block = ProcessCanBranch(cur_block, insn, current_offset_,
width, flags, code_ptr, code_end);
} else if (flags & Instruction::kReturn) {
cur_block->terminated_by_return = true;
cur_block->fall_through = exit_block_->id;
exit_block_->predecessors->Insert(cur_block->id);
/*
* Terminate the current block if there are instructions
* afterwards.
*/
if (code_ptr < code_end) {
/*
* Create a fallthrough block for real instructions
* (incl. NOP).
*/
FindBlock(current_offset_ + width, /* split */ false, /* create */ true,
/* immed_pred_block_p */ NULL);
}
} else if (flags & Instruction::kThrow) {
cur_block = ProcessCanThrow(cur_block, insn, current_offset_, width, flags, try_block_addr_,
code_ptr, code_end);
} else if (flags & Instruction::kSwitch) {
cur_block = ProcessCanSwitch(cur_block, insn, current_offset_, width, flags);
}
if (verify_flags & Instruction::kVerifyVarArgRange ||
verify_flags & Instruction::kVerifyVarArgRangeNonZero) {
/*
* The Quick backend's runtime model includes a gap between a method's
* argument ("in") vregs and the rest of its vregs.  Handling a range instruction
* which spans the gap is somewhat complicated, and should not happen
* in normal usage of dx.  Punt to the interpreter.
*/
int first_reg_in_range = insn->dalvikInsn.vC;
int last_reg_in_range = first_reg_in_range + insn->dalvikInsn.vA - 1;
if (IsInVReg(first_reg_in_range) != IsInVReg(last_reg_in_range)) {
punt_to_interpreter_ = true;
}
}
current_offset_ += width;
BasicBlock* next_block = FindBlock(current_offset_, /* split */ false, /* create */
false, /* immed_pred_block_p */ NULL);
if (next_block) {
/*
* The next instruction could be the target of a previously parsed
* forward branch so a block is already created. If the current
* instruction is not an unconditional branch, connect them through
* the fall-through link.
*/
DCHECK(cur_block->fall_through == NullBasicBlockId ||
GetBasicBlock(cur_block->fall_through) == next_block ||
GetBasicBlock(cur_block->fall_through) == exit_block_);

if ((cur_block->fall_through == NullBasicBlockId) && (flags & Instruction::kContinue)) {
cur_block->fall_through = next_block->id;
next_block->predecessors->Insert(cur_block->id);
}
cur_block = next_block;
}
}


CanCompileMethod

遍历所以 BasicBlock 中的MIR 判断是否有支持的
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  android