From 3cec834849ef7d1f4d761409a4c50c8196838799 Mon Sep 17 00:00:00 2001 From: Jason Hiser <jdhiser@gmail.com> Date: Mon, 31 Dec 2018 12:36:07 -0500 Subject: [PATCH] support for short-imm instructions getting tramoplined out-of-band --- unpin.cpp | 246 +++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 217 insertions(+), 29 deletions(-) diff --git a/unpin.cpp b/unpin.cpp index 6a03fb8..d327a22 100644 --- a/unpin.cpp +++ b/unpin.cpp @@ -337,7 +337,10 @@ void Unpin_t::DoUpdateForInstructions() const auto mnemonic =disasm.getMnemonic(); const auto is_adr_type =mnemonic=="adr"; const auto is_adrp_type=mnemonic=="adrp"; - const auto is_ldr_type =mnemonic=="ldr"; + const auto is_ldr_type = mnemonic=="ldr"; + const auto is_ldr_int_type = is_ldr_type && disasm.getOperand(0).isGeneralPurposeRegister(); + const auto is_ldr_fp_type = is_ldr_type && disasm.getOperand(0).isFpuRegister(); + const auto mask1 =(1<< 1)-1; const auto mask2 =(1<< 2)-1; const auto mask5 =(1<< 5)-1; const auto mask12=(1<<12)-1; @@ -372,51 +375,236 @@ void Unpin_t::DoUpdateForInstructions() (int64_t)new_insn_pageno + (int64_t)reloc->GetAddend()+(int64_t)to_addr; // make sure no overflow. - assert( ((new_imm21_ext << 43) >> 43) == new_imm21_ext); - const auto new_immhi19 = new_imm21_ext >> 2; - const auto new_immlo2 = new_imm21_ext & mask2; - const auto clean_new_insn= full_insn & ~(mask2<<29) & ~ (mask19 << 5); - const auto new_insn = clean_new_insn | ((new_immlo2&mask2) << 29) | ((new_immhi19&mask19)<<5); - // put the new instruction in the output - ms.PlopBytes(from_insn_location, (const char*)&new_insn, insn_bytes_len); - if (m_verbose) + if(((new_imm21_ext << 43) >> 43) == new_imm21_ext) { - cout << "Relocating a adr(p) pcrel relocation with orig_pageno=" << hex - << (orig_insn_pageno << 12) << " offset=(page-pc+" << imm21_ext << ")" << endl; - cout << "Based on: " << disasm.getDisassembly() << hex << " originally at " << orig_insn_addr - << " now located at : 0x" << hex << from_insn_location << " with offset=(page-pc + " - << new_imm21_ext << ")" << endl; + const auto new_immhi19 = new_imm21_ext >> 2; + const auto new_immlo2 = new_imm21_ext & mask2; + const auto clean_new_insn= full_insn & ~(mask2<<29) & ~ (mask19 << 5); + const auto new_insn = clean_new_insn | ((new_immlo2&mask2) << 29) | ((new_immhi19&mask19)<<5); + // put the new instruction in the output + ms.PlopBytes(from_insn_location, (const char*)&new_insn, insn_bytes_len); + if (m_verbose) + { + cout << "Relocating a adr(p) pcrel relocation with orig_pageno=" << hex + << (orig_insn_pageno << 12) << " offset=(page-pc+" << imm21_ext << ")" << endl; + cout << "Based on: " << disasm.getDisassembly() << hex << " originally at " << orig_insn_addr + << " now located at : 0x" << hex << from_insn_location << " with offset=(page-pc + " + << new_imm21_ext << ")" << endl; + } + } + else + { + assert(is_adr_type); // don't even know what to do if the PAGE is too far away! + // imm21->64 bit address didn't work. Split it up into two parts. + + /* the plan : + * FA: b L0 + * FT: + * .. + * L0 adrp dest_reg, <addr-page number> + * L1 add dest_reg, dest_reg, (addr-page offset) + * L2: b ft + */ + const auto tramp_size=3*4; // 3 insns, 4 bytes each + const auto address_to_generate=imm21_ext+orig_insn_addr+(int64_t)reloc->GetAddend()+(int64_t)to_addr; + const auto destreg=full_insn&mask5; + const auto tramp_range=ms.GetFreeRange(tramp_size); + const auto tramp_start=tramp_range.GetStart(); + // don't be too fancy, just reserve 12 bytes. + ms.SplitFreeRange({tramp_start,tramp_start+12}); + + + const auto FA=from_insn_location; + const auto FT=from_insn_location+4; + const auto L0=tramp_start; + const auto L1=tramp_start+4; + const auto L2=tramp_start+8; + const auto branch_bytes=string("\x00\x00\x00\x14",4); + // const auto updated_orig_insn_pageno = orig_insn_addr>>12; // orig_insn_pageno was shifted by 0 for adr + const auto relocd_insn_pageno = L1>>12; + const auto address_to_generate_pageno = address_to_generate >> 12; + const auto address_to_generate_page_offset = address_to_generate & mask12; + const auto relocd_imm21_ext = (int64_t)address_to_generate_pageno - (int64_t)relocd_insn_pageno; + const auto relocd_immhi19 = relocd_imm21_ext >> 2; + const auto relocd_immlo2 = relocd_imm21_ext & mask2; + + // this should be +/- 4gb, so we shouldn't fail now! + assert(((relocd_imm21_ext << 43) >> 43) == relocd_imm21_ext); + + // put an uncond branch at where the adr was. + // and make it point at L0 + ms.PlopBytes(FA,branch_bytes.c_str(),4); + zo->ApplyPatch(FA,L0); + + // adrp: 1 imm2lo 1 0000 immhi19 Rd + auto adrp_bytes=string("\x00\x00\x00\x90",4); + auto adrp_word =*(int*)adrp_bytes.c_str(); + adrp_word|=destreg<<0; + adrp_word |= ((relocd_immlo2&mask2) << 29) | ((relocd_immhi19&mask19)<<5); + ms.PlopBytes(L0,(char*)&adrp_word,4); + + // add64 imm12 = 1001 0001 00 imm12 Rn Rd + auto add_bytes=string("\x00\x00\x00\x91",4); + auto add_word =*(int*)add_bytes.c_str(); + add_word|=destreg<<0; + add_word|=destreg<<5; + add_word|=address_to_generate_page_offset << 10 ; + ms.PlopBytes(L1,(char*)&add_word,4); + + // put an uncond branch the end of the trampoline + // and make it jump at FT + ms.PlopBytes(L2,branch_bytes.c_str(),4); + zo->ApplyPatch(L2,FT); + + // should be few enough of these to always print + cout<< "Had to trampoline " << disasm.getDisassembly() << "@"<<FA<<" to " + << hex << L0 << "-" << L0+tramp_size << endl; } } else if(is_ldr_type) { - // ldr: 0 x1 0110 0 0 imm19 Rt5 + // ldr w/x reg : 0 x1 0110 0 0 imm19 Rt5, x1 indicate size (0,1 -> w/x) + // ldr s/d/q reg : opc2 0111 0 0 imm19 Rt5, opc2 indicate size (00,01,10 -> s/d/q) const auto imm19 = ((int64_t)full_insn >> 5 ) & mask19; const auto imm19_ext= (imm19 << 45) >> 45; const auto referenced_addr=(imm19_ext<<2)+from_insn->GetAddress()->GetVirtualOffset()+4; const auto new_imm19_ext =((int64_t)referenced_addr-(int64_t)from_insn_location-4+(int64_t)reloc->GetAddend()+(int64_t)to_addr)>>2; - assert( ((new_imm19_ext << 45) >> 45) == new_imm19_ext); - const auto clean_new_insn = full_insn & ~(mask19 << 5); - const auto new_insn = clean_new_insn | ((new_imm19_ext & mask19)<<5); - // put the new instruction in the output - ms.PlopBytes(from_insn_location, (const char*)&new_insn, insn_bytes_len); - if (m_verbose) + if( ((new_imm19_ext << 45) >> 45) == new_imm19_ext) + { + const auto clean_new_insn = full_insn & ~(mask19 << 5); + const auto new_insn = clean_new_insn | ((new_imm19_ext & mask19)<<5); + // put the new instruction in the output + ms.PlopBytes(from_insn_location, (const char*)&new_insn, insn_bytes_len); + if (m_verbose) + { + cout << "Relocating a ldr pcrel relocation with orig_addr=" << hex + << (referenced_addr) << " offset=(pc+" << imm19_ext << ")" << endl; + cout << "Based on: " << disasm.getDisassembly() + << " now located at : 0x" << hex << from_insn_location << " with offset=(pc + " + << new_imm19_ext << ")" << endl; + } + } + else { - cout << "Relocating a ldr pcrel relocation with orig_addr=" << hex - << (referenced_addr) << " offset=(pc+" << imm19_ext << ")" << endl; - cout << "Based on: " << disasm.getDisassembly() - << " now located at : 0x" << hex << from_insn_location << " with offset=(pc + " - << new_imm19_ext << ")" << endl; + // imm19->64 bit address didn't work. Split it up into two parts. + + /* the plan : + * FA: b L0 + * FT: + * .. + * L0 adrp dest_reg, <addr-page number> + * L1 ldr dest_reg, [dst_reg, #addr-page offset] + * L2: b ft + */ + const auto tramp_size=3*4; // 3 insns, 4 bytes each + const auto address_to_generate=(imm19_ext<<2)+orig_insn_addr+(int64_t)reloc->GetAddend()+(int64_t)to_addr; + const auto destreg=full_insn&mask5; + const auto tramp_range=ms.GetFreeRange(tramp_size); + const auto tramp_start=tramp_range.GetStart(); + // don't be too fancy, just reserve 12 bytes. + ms.SplitFreeRange({tramp_start,tramp_start+12}); + + + const auto FA=from_insn_location; + const auto FT=from_insn_location+4; + const auto L0=tramp_start; + const auto L1=tramp_start+4; + const auto L2=tramp_start+8; + const auto branch_bytes=string("\x00\x00\x00\x14",4); + // const auto updated_orig_insn_pageno = orig_insn_addr>>12; // orig_insn_pageno was shifted by 0 for adr + const auto relocd_insn_pageno = L1>>12; + const auto address_to_generate_pageno = address_to_generate >> 12; + const auto address_to_generate_page_offset = address_to_generate & mask12; + const auto relocd_imm21_ext = (int64_t)address_to_generate_pageno - (int64_t)relocd_insn_pageno; + const auto relocd_immhi19 = relocd_imm21_ext >> 2; + const auto relocd_immlo2 = relocd_imm21_ext & mask2; + + // this should be +/- 4gb, so we shouldn't fail now! + assert(((relocd_imm21_ext << 43) >> 43) == relocd_imm21_ext); + + // put an uncond branch at where the adr was. + // and make it point at L0 + ms.PlopBytes(FA,branch_bytes.c_str(),4); + zo->ApplyPatch(FA,L0); + + // adrp: 1 imm2lo 1 0000 immhi19 Rd + auto adrp_bytes=string("\x00\x00\x00\x90",4); + auto adrp_word =*(int*)adrp_bytes.c_str(); + adrp_word|=destreg<<0; + adrp_word |= ((relocd_immlo2&mask2) << 29) | ((relocd_immhi19&mask19)<<5); + ms.PlopBytes(L0,(char*)&adrp_word,4); + + if(is_ldr_int_type) + { + // convert: ldr w/x reg : 0 x1 011 0 00 ---imm19---- Rt5 x1 indicate size (0,1 -> w/x) + // to : ldr x/w reg : 1 x1 111 0 01 01 imm12 Rn5 Rt5 x1 indciates szie (0,1 -> w/x) + auto new_ldr_bytes=string("\x00\x00\x40\xb9",4); + auto new_ldr_word =*(int*)new_ldr_bytes.c_str(); + const auto orig_ldr_size_bit=(full_insn>>30)&mask1; + const auto scale=0x2|orig_ldr_size_bit; + const auto scaled_page_offset=(address_to_generate_page_offset>>scale) ; + new_ldr_word|=destreg<<0; // Rt + new_ldr_word|=destreg<<5; // Rn + new_ldr_word|=scaled_page_offset << 10 ; // imm12 + new_ldr_word|=orig_ldr_size_bit << 30; // x1 + ms.PlopBytes(L1,(char*)&new_ldr_word,4); + } + else if(is_ldr_fp_type) + { + // convert: ldr s/d/q reg: opc2 01 11 00 imm19 Rt5, opc2 indicate size (00,01,10 -> s/d/q) + // to: ldr b/s/d/q reg: size2 11 11 01 opc2 imm12 Rn Rt + auto new_ldr_bytes=string("\x00\x00\x00\x3d",4); + auto new_ldr_word =*(int*)new_ldr_bytes.c_str(); + const auto orig_ldr_opc_bits=(full_insn>>30)&mask2; + + // decode size out of old ldr + const auto ldr_size= + orig_ldr_opc_bits == 0x0 ? 4u : + orig_ldr_opc_bits == 0x1 ? 8u : + orig_ldr_opc_bits == 0x2 ? 16u : + throw invalid_argument("cannot decode ldr floating-point access size"); + + // encode size field for new ldr. + const auto new_ldr_size_bits= + ldr_size == 4 ? 0x2u : + ldr_size == 8 ? 0x3u : + ldr_size == 16 ? 0x0u : + throw invalid_argument("cannot decode ldr floating-point access size"); + + // encode opc2 + const auto new_ldr_opc2_bits= + ldr_size == 4 ? 0x1u : + ldr_size == 8 ? 0x1u : + ldr_size == 16 ? 0x3u : + throw invalid_argument("cannot decode ldr floating-point access size"); + + // add variable fields to new insn + new_ldr_word|=destreg<<0; // Rt + new_ldr_word|=destreg<<5; // Rn + new_ldr_word|=((address_to_generate_page_offset/ldr_size) << 10); // imm12 + new_ldr_word|=(new_ldr_size_bits<<30); // size2 + new_ldr_word|=(new_ldr_opc2_bits<<22); // opc2 + + ms.PlopBytes(L1,(char*)&new_ldr_word,4); + } + else + assert(0); + + // put an uncond branch the end of the trampoline + // and make it jump at FT + ms.PlopBytes(L2,branch_bytes.c_str(),4); + zo->ApplyPatch(L2,FT); + + // should be few enough of these to always print + cout<< "Had to trampoline " << disasm.getDisassembly() << "@"<<FA<<" to " + << hex << L0 << "-" << L0+tramp_size-1 << endl; } } else assert(0); - - } - } // instruction has a absolute memory operand that needs it's displacement updated. else if(reloc->GetType()==string("absoluteptr_to_scoop")) -- GitLab