diff --git a/unpin.cpp b/unpin.cpp index 689c1de5272af098b45a0057e52ea746d826c81d..bf68f9806288228c1a8f953ac789b9504d31e69a 100644 --- a/unpin.cpp +++ b/unpin.cpp @@ -334,12 +334,13 @@ void Unpin_t::DoUpdateForInstructions() { assert(bo_wrt==nullptr); // not yet imp'd WRT offsetting. assert(to_addr==0); // not yet imp'd WRT offsetting. - const auto mnemonic =disasm.getMnemonic(); - const auto is_adr_type =mnemonic=="adr"; - const auto is_adrp_type=mnemonic=="adrp"; + const auto mnemonic = disasm.getMnemonic(); + const auto is_adr_type = mnemonic=="adr"; + const auto is_adrp_type = mnemonic=="adrp"; const auto is_ldr_type = mnemonic=="ldr"; const auto is_ldr_int_type = is_ldr_type && disasm.getOperand(0).isGeneralPurposeRegister(); const auto is_ldr_fp_type = is_ldr_type && disasm.getOperand(0).isFpuRegister(); + const auto is_ldrsw_type = mnemonic=="ldrsw"; const auto mask1 =(1<< 1)-1; const auto mask2 =(1<< 2)-1; const auto mask5 =(1<< 5)-1; @@ -486,56 +487,59 @@ void Unpin_t::DoUpdateForInstructions() } else { - // imm19->64 bit address didn't work. Split it up into two parts. - - /* the plan : - * FA: b L0 - * FT: - * .. - * L0 adrp dest_reg, <addr-page number> - * L1 ldr dest_reg, [dst_reg, #addr-page offset] - * L2: b ft - */ - const auto tramp_size=3*4; // 3 insns, 4 bytes each const auto address_to_generate=(imm19_ext<<2)+orig_insn_addr+(int64_t)reloc->GetAddend()+(int64_t)to_addr; const auto destreg=full_insn&mask5; - const auto tramp_range=ms.GetFreeRange(tramp_size); - const auto tramp_start=tramp_range.GetStart(); - // don't be too fancy, just reserve 12 bytes. - ms.SplitFreeRange({tramp_start,tramp_start+12}); - - const auto FA=from_insn_location; const auto FT=from_insn_location+4; - const auto L0=tramp_start; - const auto L1=tramp_start+4; - const auto L2=tramp_start+8; const auto branch_bytes=string("\x00\x00\x00\x14",4); - // const auto updated_orig_insn_pageno = orig_insn_addr>>12; // orig_insn_pageno was shifted by 0 for adr - const auto relocd_insn_pageno = L1>>12; const auto address_to_generate_pageno = address_to_generate >> 12; const auto address_to_generate_page_offset = address_to_generate & mask12; - const auto relocd_imm21_ext = (int64_t)address_to_generate_pageno - (int64_t)relocd_insn_pageno; - const auto relocd_immhi19 = relocd_imm21_ext >> 2; - const auto relocd_immlo2 = relocd_imm21_ext & mask2; - - // this should be +/- 4gb, so we shouldn't fail now! - assert(((relocd_imm21_ext << 43) >> 43) == relocd_imm21_ext); - - // put an uncond branch at where the adr was. - // and make it point at L0 - ms.PlopBytes(FA,branch_bytes.c_str(),4); - zo->ApplyPatch(FA,L0); - - // adrp: 1 imm2lo 1 0000 immhi19 Rd - auto adrp_bytes=string("\x00\x00\x00\x90",4); - auto adrp_word =*(int*)adrp_bytes.c_str(); - adrp_word|=destreg<<0; - adrp_word |= ((relocd_immlo2&mask2) << 29) | ((relocd_immhi19&mask19)<<5); - ms.PlopBytes(L0,(char*)&adrp_word,4); if(is_ldr_int_type) { + // imm19->64 bit address didn't work. Split it up into two parts. + + /* the plan : + * FA: b L0 + * FT: + * .. + * L0 adrp dest_reg, <addr-page number> + * L1 ldr dest_reg, [dst_reg, #addr-page offset] + * L2: b ft + */ + const auto tramp_size=3*4; // 3 insns, 4 bytes each + const auto tramp_range=ms.GetFreeRange(tramp_size); + const auto tramp_start=tramp_range.GetStart(); + // don't be too fancy, just reserve 12 bytes. + ms.SplitFreeRange({tramp_start,tramp_start+12}); + + + // and give the bytes some names + const auto L0=tramp_start; + const auto L1=tramp_start+4; + const auto L2=tramp_start+8; + + // calculate the immediates for the new adr and ldr instruction. + const auto relocd_insn_pageno = L1>>12; + const auto relocd_imm21_ext = (int64_t)address_to_generate_pageno - (int64_t)relocd_insn_pageno; + const auto relocd_immhi19 = relocd_imm21_ext >> 2; + const auto relocd_immlo2 = relocd_imm21_ext & mask2; + // this should be +/- 4gb, so we shouldn't fail now! + assert(((relocd_imm21_ext << 43) >> 43) == relocd_imm21_ext); + + + // put an uncond branch at where the adr was. + // and make it point at L0 + ms.PlopBytes(FA,branch_bytes.c_str(),4); + zo->ApplyPatch(FA,L0); + + // adrp: 1 imm2lo 1 0000 immhi19 Rd + auto adrp_bytes=string("\x00\x00\x00\x90",4); + auto adrp_word =*(int*)adrp_bytes.c_str(); + adrp_word|=destreg<<0; + adrp_word |= ((relocd_immlo2&mask2) << 29) | ((relocd_immhi19&mask19)<<5); + ms.PlopBytes(L0,(char*)&adrp_word,4); + // convert: ldr w/x reg : 0 x1 011 0 00 ---imm19---- Rt5 x1 indicates size (0,1 -> w/x) // to : ldr x/w reg : 1 x1 111 0 01 01 imm12 Rn5 Rt5 x1 indciates size (0,1 -> w/x) auto new_ldr_bytes=string("\x00\x00\x40\xb9",4); @@ -548,9 +552,75 @@ void Unpin_t::DoUpdateForInstructions() new_ldr_word|=scaled_page_offset << 10 ; // imm12 new_ldr_word|=orig_ldr_size_bit << 30; // x1 ms.PlopBytes(L1,(char*)&new_ldr_word,4); + + // put an uncond branch the end of the trampoline + // and make it jump at FT + ms.PlopBytes(L2,branch_bytes.c_str(),4); + zo->ApplyPatch(L2,FT); + + // should be few enough of these to always print + cout<< "Had to trampoline " << disasm.getDisassembly() << "@"<<FA<<" to " + << hex << L0 << "-" << L0+tramp_size-1 << endl; + } else if(is_ldr_fp_type) { + /* the scheme for int-type operations doesn't work + * for fp-type because there is no free register. + * use this plan: + * FA: b L0 + * FT: + * .. + * L0 str x0, [sp+128] ; 128 for red zoning + * L1 adrp x0, <addr-page number> + * L2 ldr dest_reg, [x0, #addr-page offset] + * L3 ldr x0, [sp+128] ; 128 for red zoning + * L4 b FT + */ + + + + // allocate and reserve space for the code. + const auto tramp_size=5*4; // 3 insns, 4 bytes each + const auto tramp_range=ms.GetFreeRange(tramp_size); + const auto tramp_start=tramp_range.GetStart(); + // don't be too fancy, just reserve 12 bytes. + ms.SplitFreeRange({tramp_start,tramp_start+12}); + + + // give the bytes some names + const auto L0=tramp_start ; + const auto L1=tramp_start+4 ; + const auto L2=tramp_start+8 ; + const auto L3=tramp_start+12; + const auto L4=tramp_start+16; + + // calculate the immediates for the new adr and ldr instruction. + const auto relocd_insn_pageno = L1>>12; + const auto relocd_imm21_ext = (int64_t)address_to_generate_pageno - (int64_t)relocd_insn_pageno; + const auto relocd_immhi19 = relocd_imm21_ext >> 2; + const auto relocd_immlo2 = relocd_imm21_ext & mask2; + // this should be +/- 4gb, so we shouldn't fail now! + assert(((relocd_imm21_ext << 43) >> 43) == relocd_imm21_ext); + + // put an uncond branch at where the adr was. + // and make it point at L0 + ms.PlopBytes(FA,branch_bytes.c_str(),4); + zo->ApplyPatch(FA,L0); + + // put save of x0 in place. + // diassembly: f81803e0 stur x0, [sp, #-128] + const auto strx0_bytes=string("\xe0\x03\x18\xf8",4); + ms.PlopBytes(L0,strx0_bytes.c_str(),4); + + // adrp: 1 imm2lo 1 0000 immhi19 Rd + auto adrp_bytes=string("\x00\x00\x00\x90",4); + auto adrp_word =*(int*)adrp_bytes.c_str(); + // adrp_word|=destreg<<0; ; destreg for this insn is x0. + adrp_word |= ((relocd_immlo2&mask2) << 29) | ((relocd_immhi19&mask19)<<5); + ms.PlopBytes(L1,(char*)&adrp_word,4); + + // convert: ldr s/d/q reg: opc2 01 11 00 imm19 Rt5, opc2 indicate size (00,01,10 -> s/d/q) // to: ldr b/s/d/q reg: size2 11 11 01 opc2 imm12 Rn Rt auto new_ldr_bytes=string("\x00\x00\x00\x3d",4); @@ -578,17 +648,116 @@ void Unpin_t::DoUpdateForInstructions() ldr_size == 16 ? 0x3u : throw invalid_argument("cannot decode ldr floating-point access size"); - // add variable fields to new insn - new_ldr_word|=destreg<<0; // Rt - new_ldr_word|=destreg<<5; // Rn + // add variable fields to new insn and drop it in the mem space. + new_ldr_word|=destreg<<0; // Rt -- should be actual dest reg, not x0 + // new_ldr_word|=destreg<<5; // Rn -- should be x0 new_ldr_word|=((address_to_generate_page_offset/ldr_size) << 10); // imm12 new_ldr_word|=(new_ldr_size_bits<<30); // size2 new_ldr_word|=(new_ldr_opc2_bits<<22); // opc2 + ms.PlopBytes(L2,(char*)&new_ldr_word,4); + + + // drop in the restore of x0 + // disassembly: f85803e0 ldur x0, [sp, #-128] + const auto ldrx0_bytes=string("\xe0\x03\x58\xf8",4); + ms.PlopBytes(L3,ldrx0_bytes.c_str(),4); + + // put an uncond branch the end of the trampoline + // and make it jump at FT + ms.PlopBytes(L4,branch_bytes.c_str(),4); + zo->ApplyPatch(L4,FT); + + // should be few enough of these to always print + cout<< "Had to trampoline " << disasm.getDisassembly() << "@"<<FA<<" to " + << hex << L0 << "-" << L0+tramp_size-1 << endl; - ms.PlopBytes(L1,(char*)&new_ldr_word,4); } else assert(0); + } + + } + else if(is_ldrsw_type) + { + // ldrsw x reg : 1001 1000 imm19 Rt + const auto imm19 = ((int64_t)full_insn >> 5 ) & mask19; + const auto imm19_ext= (imm19 << 45) >> 45; + const auto referenced_addr=(imm19_ext<<2)+from_insn->GetAddress()->GetVirtualOffset()+4; + const auto new_imm19_ext =((int64_t)referenced_addr-(int64_t)from_insn_location-4+(int64_t)reloc->GetAddend()+(int64_t)to_addr)>>2; + if( ((new_imm19_ext << 45) >> 45) == new_imm19_ext) + { + const auto clean_new_insn = full_insn & ~(mask19 << 5); + const auto new_insn = clean_new_insn | ((new_imm19_ext & mask19)<<5); + // put the new instruction in the output + ms.PlopBytes(from_insn_location, (const char*)&new_insn, insn_bytes_len); + if (m_verbose) + { + cout << "Relocating a ldrsw pcrel relocation with orig_addr=" << hex + << (referenced_addr) << " offset=(pc+" << imm19_ext << ")" << endl; + cout << "Based on: " << disasm.getDisassembly() + << " now located at : 0x" << hex << from_insn_location << " with offset=(pc + " + << new_imm19_ext << ")" << endl; + } + } + else + { + // imm19->64 bit address didn't work. Split it up into two parts. + + /* the plan : + * FA: b L0 + * FT: + * .. + * L0 adrp dest_reg, <addr-page number> + * L1 ldr dest_reg, [dst_reg, #addr-page offset] + * L2: b ft + */ + const auto tramp_size=3*4; // 3 insns, 4 bytes each + const auto address_to_generate=(imm19_ext<<2)+orig_insn_addr+(int64_t)reloc->GetAddend()+(int64_t)to_addr; + const auto destreg=full_insn&mask5; + const auto tramp_range=ms.GetFreeRange(tramp_size); + const auto tramp_start=tramp_range.GetStart(); + // don't be too fancy, just reserve 12 bytes. + ms.SplitFreeRange({tramp_start,tramp_start+12}); + + + const auto FA=from_insn_location; + const auto FT=from_insn_location+4; + const auto L0=tramp_start; + const auto L1=tramp_start+4; + const auto L2=tramp_start+8; + const auto branch_bytes=string("\x00\x00\x00\x14",4); + const auto relocd_insn_pageno = L1>>12; + const auto address_to_generate_pageno = address_to_generate >> 12; + const auto address_to_generate_page_offset = address_to_generate & mask12; + const auto relocd_imm21_ext = (int64_t)address_to_generate_pageno - (int64_t)relocd_insn_pageno; + const auto relocd_immhi19 = relocd_imm21_ext >> 2; + const auto relocd_immlo2 = relocd_imm21_ext & mask2; + + // this should be +/- 4gb, so we shouldn't fail now! + assert(((relocd_imm21_ext << 43) >> 43) == relocd_imm21_ext); + + // put an uncond branch at where the adr was. + // and make it point at L0 + ms.PlopBytes(FA,branch_bytes.c_str(),4); + zo->ApplyPatch(FA,L0); + + // adrp: 1 imm2lo 1 0000 immhi19 Rd + auto adrp_bytes=string("\x00\x00\x00\x90",4); + auto adrp_word =*(int*)adrp_bytes.c_str(); + adrp_word|=destreg<<0; + adrp_word |= ((relocd_immlo2&mask2) << 29) | ((relocd_immhi19&mask19)<<5); + ms.PlopBytes(L0,(char*)&adrp_word,4); + + // convert: ldrsw x reg : 1001 1000 ---imm19--- Rt + // to : ldrsw x reg : 1011 1001 10 imm12 Rn Rt + auto new_ldr_bytes=string("\x00\x00\x80\xb9",4); + auto new_ldr_word =*(int*)new_ldr_bytes.c_str(); + const auto scale=0x2; + const auto scaled_page_offset=(address_to_generate_page_offset>>scale) ; + new_ldr_word|=destreg<<0; // Rt + new_ldr_word|=destreg<<5; // Rn + new_ldr_word|=scaled_page_offset << 10 ; // imm12 + ms.PlopBytes(L1,(char*)&new_ldr_word,4); // put an uncond branch the end of the trampoline // and make it jump at FT