/*************************************************************************** * Copyright (c) 2014 Zephyr Software LLC. All rights reserved. * * This software is furnished under a license and/or other restrictive * terms and may be used and copied only in accordance with such terms * and the inclusion of the above copyright notice. This software or * any other copies thereof may not be provided or otherwise made * available to any other person without the express written consent * of an authorized representative of Zephyr Software LCC. Title to, * ownership of, and all rights in the software is retained by * Zephyr Software LCC. * * Zephyr Software LLC. Proprietary Information * * Unless otherwise specified, the information contained in this * directory, following this legend, and/or referenced herein is * Zephyr Software LLC. (Zephyr) Proprietary Information. * * CONTACT * * For technical assistance, contact Zephyr Software LCC. at: * * * Zephyr Software, LLC * 2040 Tremont Rd * Charlottesville, VA 22911 * * E-mail: jwd@zephyr-software.com **************************************************************************/ #include <string> #include <algorithm> #include "unpin.h" #include <memory> #include <inttypes.h> #include <stdint.h> #include <limits.h> #include <irdb-util> using namespace IRDB_SDK; using namespace std; using namespace Zipr_SDK; static inline uint32_t rotr32 (uint32_t n, unsigned int c) { const unsigned int mask = (CHAR_BIT*sizeof(n) - 1); // assert ( (c<=mask) &&"rotate by type width or more"); c &= mask; return (n>>c) | (n<<( (-c)&mask )); } #define ALLOF(a) begin(a),end(a) // per machine stuff void UnpinArm32_t::HandleRetAddrReloc(Instruction_t* from_insn, Relocation_t* reloc) { assert(0); } void UnpinArm32_t::HandlePcrelReloc(Instruction_t* from_insn, Relocation_t* reloc) { // decode the instruction and find the pcrel operand const auto disasm = DecodedInstruction_t::factory(from_insn); const auto mnemonic = disasm->getMnemonic(); const auto orig_insn_addr = from_insn->getAddress()->getVirtualOffset(); // original location const auto insn_bytes_len = 4; // arm is always 4. const auto bo_wrt = reloc->getWRT(); const auto scoop_wrt = dynamic_cast<DataScoop_t* >(reloc->getWRT()); const auto insn_wrt = dynamic_cast<Instruction_t*>(reloc->getWRT()); const auto branch_bytes = string("\x00\x00\x00\xea",4); uint8_t insn_bytes[insn_bytes_len]; // compiler disallows init on some platforms. // but memcpy should init it sufficiently. memcpy(insn_bytes, from_insn->getDataBits().c_str(), insn_bytes_len); const auto full_insn=*(uint32_t*)insn_bytes; const auto mask1 = (1<<1 )-1; const auto mask4 = (1<<4 )-1; const auto mask8 = (1<<8 )-1; const auto mask12 = (1<<12)-1; const auto allocate_reg = [](const set<uint32_t>& used) -> uint32_t { for(auto i=0u; i<15; i++) { if(used.find(i) == end(used)) return i; } assert(0); }; // get the new insn addr const auto from_insn_location = VirtualOffset_t(locMap[from_insn]); // get WRT info const auto to_addr = (scoop_wrt != nullptr) ? scoop_wrt->getStart()->getVirtualOffset() : // is scoop (insn_wrt != nullptr) ? locMap[insn_wrt] : // is instruction (bo_wrt == nullptr) ? VirtualOffset_t(0u) : // no WRT obj throw invalid_argument("Cannot map pcrel relocation WRT object to address"); const auto addend = reloc -> getAddend(); const auto reloc_offset = to_addr + addend; const auto to_object_id = (scoop_wrt != nullptr) ? scoop_wrt->getName() +"@"+to_hex_string(scoop_wrt->getStart() ->getVirtualOffset()) : // scoop (insn_wrt != nullptr) ? insn_wrt ->getDisassembly()+"@"+to_hex_string(insn_wrt ->getAddress()->getVirtualOffset()) : // instruction (bo_wrt == nullptr) ? string("No-object") : throw invalid_argument("Cannot map pcrel relocation WRT object to address"); // so far, only handling ldr and ldrb const auto is_ldr_type = mnemonic.substr(0,3)=="ldr"; // ldr, ldrb, ldreq, ldrbeq, etc. const auto is_vldr_type = mnemonic.substr(0,4)=="vldr"; // vldr <double>, vldr <single> const auto is_add_type = mnemonic.substr(0,3)=="add"; // add, addne, etc. const auto is_addne_type = mnemonic == "addne"; // exactly addne const auto is_addls_type = mnemonic == "addls"; // exactly addls const auto is_ldrls_type = mnemonic == "ldrls"; // exactly ldrls const auto I_bit_set = 0b1 == ( (full_insn >> 25) & mask1); // 25th bit indicates if op2 is shifted immediate. const auto S_bit_set = 0b1 == ( (full_insn >> 20) & mask1); // 20th bit indicates if the flags are updated. not valid for all insns. const auto Rd = uint32_t(full_insn >> 12) & mask4; const auto Rm = uint32_t(full_insn >> 0) & mask4; const auto Rn = uint32_t(full_insn >> 16) & mask4; const auto Rs = uint32_t(full_insn >> 8) & mask4; const auto is_rn_pc = Rn == 0b1111; // rn reg may not be valid for all instructions const auto is_rd_pc = Rd == 0b1111; // destination register, not valid for all insns // find a temp_reg if we need one const auto tmp_reg = allocate_reg({Rd,Rm,Rn,Rs, 13, 15}); assert(tmp_reg < 15); // sanity check 4 bits if(is_vldr_type) { /* * We need to patch an vldr[b][cond] fp_reg, [pc + constant] * to be at a new location. * * The plan : * FA: b<cond> L0 * FT: * .. * L0 str rd, [sp,#-fc] * L1 ldr rd, [L6] * L2 add rd, pc, rd * L3 vldr rd, [rd, <imm>] # orig insn * L4 ldr rd, [sp,#-fc] * L5: b FT * L6: .word <constant> */ const auto tramp_size = 6*4 + 4 ; // 6 insns, 4 bytes each, plus one word of read-only data const auto tramp_range = ms.getFreeRange(tramp_size); const auto tramp_start = tramp_range.getStart(); // don't be too fancy, just reserve 16 bytes. ms.splitFreeRange({tramp_start,tramp_start+tramp_size}); // and give the bytes some names const auto FA=from_insn_location; const auto FT=from_insn_location+4; const auto L0 = tramp_start; const auto L1 = L0 + 4; const auto L2 = L1 + 4; const auto L3 = L2 + 4; const auto L4 = L3 + 4; const auto L5 = L4 + 4; const auto L6 = L5 + 4; // Create a branch to put over the original ldr // and set the conditional bits equal to // the original instruction conditional bits auto my_branch_bytes = branch_bytes; my_branch_bytes[3] &= 0x0f; // clear always condition bits my_branch_bytes[3] |= (insn_bytes[3] & 0xf0); // add the vldr cond bits. ms.plopBytes(FA,my_branch_bytes.c_str(),4); // and make it point at L0 zo->applyPatch(FA,L0); // spill tmp_reg at L0, e50d00fc auto spill_insn = string("\xfc\x00\x0d\xe5",4); spill_insn[1] |= (tmp_reg<<4); ms.plopBytes(L0,spill_insn.c_str(),4); // ldr dest_reg, [pc+k] (where pc+8+k == L6) auto ldr_imm_insn = string("\x0c\x00\x9f\xe5",4); ldr_imm_insn[1] |= (tmp_reg << 4); // set this instruction's dest reg to the ldr's dest reg. ms.plopBytes(L1,ldr_imm_insn.c_str(),4); // put down add tmp_reg,pc, temp_reg auto new_add_word = string("\x00\x00\x8f\xe0",4); // e08f0000 add r0, pc, r0 new_add_word[1] |= (tmp_reg<<4); new_add_word[0] |= (tmp_reg<<0); ms.plopBytes(L2,new_add_word.c_str(),4); // put down orig vldr insn, with 1) cond field removed, and 2) Rn set to -> tmp_reg auto vldr_insn_bytes = from_insn->getDataBits(); vldr_insn_bytes[2] &= 0xf0; // remove Rn bits. vldr_insn_bytes[2] |= (tmp_reg << 0); // set Rn to tmp-reg ms.plopBytes(L3,vldr_insn_bytes.c_str(),4); // put down L5, restore of scratch reg r0 auto restore_insn = string("\xfc\x00\x1d\xe5",4); restore_insn[1] |= (tmp_reg<<4); // set Rd field ms.plopBytes(L4,restore_insn.c_str(),4); // put an uncond branch the end of the trampoline // and make it jump at FT ms.plopBytes(L5,branch_bytes.c_str(),4); zo->applyPatch(L5,FT); // put the calculated pc-rel offset at L3 const auto new_offset = int32_t(orig_insn_addr - L2 + reloc_offset); ms.plopBytes(L6,reinterpret_cast<const char*>(&new_offset),4); // endianness of host must match target // should be few enough of these to always print cout<< "Had to trampoline " << disasm->getDisassembly() << " @"<<FA<<" to " << hex << L0 << "-" << L0+tramp_size-1 << " WRT=" << to_object_id << endl; } else if( is_ldr_type && !is_rd_pc && !I_bit_set) /* ldr <not pc>, [pc, imm] */ { /* * We need to patch an ldr[b][cond] reg, [pc + constant] * to be at a new location. * * The plan : * FA: b<cond> L0 * FT: * .. * L0 ldr rd, [L3] * L1 ldr(b) rd, [pc, rd] * L2: b FT * L3: .word <constant to add> */ const auto tramp_size = 4*4; // 3 insns, 4 bytes each, plus one word of read-only data const auto tramp_range = ms.getFreeRange(tramp_size); const auto tramp_start = tramp_range.getStart(); // don't be too fancy, just reserve 16 bytes. ms.splitFreeRange({tramp_start,tramp_start+tramp_size}); // and give the bytes some names const auto FA=from_insn_location; const auto FT=from_insn_location+4; const auto L0 = tramp_start; const auto L1 = tramp_start + 4; const auto L2 = tramp_start + 8; const auto L3 = tramp_start + 12; // extract some fields from the orig full_insn const auto is_pos_imm = (bool)((full_insn >> 23) & mask1); const auto is_byte_load = (bool)((full_insn >> 22) & mask1); assert(Rd!=0xf); // not the program counter. // Create a branch to put over the original ldr // and set the conditional bits equal to // the original instruction conditional bits auto my_branch_bytes = branch_bytes; my_branch_bytes[3] &= 0x0f; // clear always condition bits my_branch_bytes[3] |= (insn_bytes[3] & 0xf0); ms.plopBytes(FA,my_branch_bytes.c_str(),4); // and make it point at L0 zo->applyPatch(FA,L0); // ldr dest_reg, [pc+k] (where pc+k == L3) auto ldr_imm_insn = string("\x04\x00\x9f\xe5",4); ldr_imm_insn[1] |= (Rd << 4); // set this instruction's dest reg to the ldr's dest reg. ms.plopBytes(L0,ldr_imm_insn.c_str(),4); // create the modified ldr(b) from the original ldr instruction auto new_ldr_word = string("\x00\x00\x9f\xe7",4); new_ldr_word[1] |= (Rd << 4); // set this instruction's dest reg to the ldr's dest reg. new_ldr_word[0] |= Rd; // set this instruction's 2nd src reg to the orig ldr's dest reg. new_ldr_word[3] |= is_byte_load << 6; // set this instruction's B flag to match orig insn's ms.plopBytes(L1,new_ldr_word.c_str(),4); // put an uncond branch the end of the trampoline // and make it jump at FT ms.plopBytes(L2,branch_bytes.c_str(),4); zo->applyPatch(L2,FT); // put the calculated pc-rel offset at L3 const auto ldr_imm_field = int32_t(full_insn & mask12); const auto ldr_imm = is_pos_imm ? ldr_imm_field : - ldr_imm_field; const auto new_addend = bo_wrt == nullptr ? 8 + ldr_imm : reloc_offset; const auto new_offset = int32_t(orig_insn_addr - L3 + new_addend); ms.plopBytes(L3,reinterpret_cast<const char*>(&new_offset),4); // endianness of host must match target // should be few enough of these to always print cout<< "Had to trampoline " << disasm->getDisassembly() << " @"<<FA<<" to " << hex << L0 << "-" << L0+tramp_size-1 << " ldr_imm = " << ldr_imm << " WRT=" << to_object_id << endl; } else if( is_ldr_type && !is_rd_pc && I_bit_set) /* ldr <not pc>, [pc, reg/shift] */ { /* * We need to patch a ldr Rd [pc, Rm <shift type> <shift amt>]@FA * to be at a new location. * * The plan: * FA: bne L0 * FT: * .. * L0: str Rt, [sp, # - fc] # spill tmp reg (Rt), use tmp_reg instead of r0 * L1: ldr Rt, [pc, #k] # where L1+8+k == L6 or k = L6-L1-8 * L2: add Rt, pc, Rt # add in pc * L3: ldr Rd, [Rt, Rm <shift type> <shift smt> ] * # copy of orig insn with pc (Rn field) replaced with Rt. * L4: ldr Rt, [sp, # - fc] # spill tmp reg (Rt), use tmp_reg instead of r0 * L5: b FT * L6: .word L2 - orig_insn_addr */ const auto tramp_size = 6*4 + 4 ; // 6 insns, 4 bytes each, plus one word of read-only data const auto tramp_range = ms.getFreeRange(tramp_size); const auto tramp_start = tramp_range.getStart(); // don't be too fancy, just reserve tramp_size bytes. ms.splitFreeRange({tramp_start,tramp_start+tramp_size}); // and give the bytes some names const auto FA = from_insn_location; const auto FT = FA + 4; const auto L0 = tramp_start; const auto L1 = L0 + 4; const auto L2 = L1 + 4; const auto L3 = L2 + 4; const auto L4 = L3 + 4; const auto L5 = L4 + 4; const auto L6 = L5 + 4; // Create a branch to put over the original ldr // and set the conditional bits equal to // the original instruction conditional bits auto my_branch_bytes = branch_bytes; my_branch_bytes[3] &= 0x0f; // clear always condition bits my_branch_bytes[3] |= (insn_bytes[3] & 0xf0); ms.plopBytes(FA,my_branch_bytes.c_str(),4); // and make it point at L0 zo->applyPatch(FA,L0); // spill tmp_reg at L0, e50d00fc auto spill_insn = string("\xfc\x00\x0d\xe5",4); spill_insn[1] |= (tmp_reg<<4); ms.plopBytes(L0,spill_insn.c_str(),4); // ldr dest_reg, [pc+k] (where pc+8+k == L6) auto ldr_imm_insn = string("\x0c\x00\x9f\xe5",4); ldr_imm_insn[1] |= (tmp_reg<<4); ms.plopBytes(L1,ldr_imm_insn.c_str(),4); // put down L2 auto new_add_word = string("\x00\x00\x8f\xe0",4); // e08f0000 add r0, pc, r0 new_add_word[1] |= (tmp_reg<<4); new_add_word[0] |= (tmp_reg<<0); ms.plopBytes(L2,new_add_word.c_str(),4); // put down L3 (orig insn with pc fields set to r0) auto orig_ldr = from_insn->getDataBits(); orig_ldr[3] &= 0b00001111; // clear the cond bits. orig_ldr[3] |= 0b11100000; // set the cond bits to "always". orig_ldr[2] &= ~(mask4 << 0); // clear this instruction's Rn field (i.e., set to r0) orig_ldr[2] |= (tmp_reg<<0); // set Rn fields o tmp_reg ms.plopBytes(L3,orig_ldr.c_str(),4); // put down L4, restore of scratch reg r0 auto restore_insn = string("\xfc\x00\x1d\xe5",4); restore_insn[1] |= (tmp_reg<<4); // set Rd field ms.plopBytes(L4,restore_insn.c_str(),4); // put an uncond branch the end of the trampoline // and make it jump at FT ms.plopBytes(L5,branch_bytes.c_str(),4); zo->applyPatch(L5,FT); // put the calculated pc-rel offset at L6 const auto new_offset = int32_t(orig_insn_addr - L2 + reloc_offset); ms.plopBytes(L6,reinterpret_cast<const char*>(&new_offset),4); // endianness of host must match target // should be few enough of these to always print cout << "Had to trampoline " << disasm->getDisassembly() << " @" << hex << FA << " to " << L0 << "-" << L0+tramp_size-1 << " WRT=" << to_object_id << endl; } else if((is_ldrls_type || is_addne_type || is_addls_type) && is_rd_pc && is_rn_pc) { /* * We need to patch an addne pc, pc, reg lsl #2 * to be at a new location. * * The plan : * FA: bne L0 * FT: * .. * L0: str r0, [sp, # - fc] # spill tmp reg, use tmp_reg instead of r0 * L1: ldr r0, [pc, #k] # where L1+8+k == L7 or k = L7-L1-8 * L2: add r0, pc, r0 # add in pc * L3: op r0, r0, ( r2 lsl #2 ) # copy of orig insn with pc removed from op0 and op1, and replaced with tmp_reg. * L4: str r0, [sp, # - f8] # store calculated pc * L5: ldr r0, [sp, # - fc] # restore reg * L6: ldr pc, [sp, # - f8] # jmp dispatch * L7: .word L2 - orig_insn_addr */ const auto tramp_size = 7*4 + 4 ; // 7 insns, 4 bytes each, plus one word of read-only data const auto tramp_range = ms.getFreeRange(tramp_size); const auto tramp_start = tramp_range.getStart(); // don't be too fancy, just reserve tramp_size bytes. ms.splitFreeRange({tramp_start,tramp_start+tramp_size}); // and give the bytes some names const auto FA=from_insn_location; const auto L0 = tramp_start; const auto L1 = L0 + 4; const auto L2 = L1 + 4; const auto L3 = L2 + 4; const auto L4 = L3 + 4; const auto L5 = L4 + 4; const auto L6 = L5 + 4; const auto L7 = L6 + 4; // Create a branch to put over the original ldr // and set the conditional bits equal to // the original instruction conditional bits auto my_branch_bytes = branch_bytes; my_branch_bytes[3] &= 0x0f; // clear always condition bits my_branch_bytes[3] |= (insn_bytes[3] & 0xf0); ms.plopBytes(FA,my_branch_bytes.c_str(),4); // and make it point at L0 zo->applyPatch(FA,L0); // spill tmp_reg at L0, e50d00fc auto spill_insn = string("\xfc\x00\x0d\xe5",4); spill_insn[1] |= (tmp_reg<<4); ms.plopBytes(L0,spill_insn.c_str(),4); // ldr dest_reg, [pc+k] (where pc+8+k == L7) auto ldr_imm_insn = string("\x10\x00\x9f\xe5",4); ldr_imm_insn[1] |= (tmp_reg<<4); ms.plopBytes(L1,ldr_imm_insn.c_str(),4); // put down L2 auto new_add_word = string("\x00\x00\x8f\xe0",4); // e08f0000 add r0, pc, r0 new_add_word[1] |= (tmp_reg<<4); new_add_word[0] |= (tmp_reg<<0); ms.plopBytes(L2,new_add_word.c_str(),4); // put down L3 (orig insn with pc fields set to r0) auto orig_add = from_insn->getDataBits(); orig_add[3] &= 0b00001111; // clear the cond bits. orig_add[3] |= 0b11100000; // set the cond bits to "always". orig_add[1] &= ~(mask4 << 4); // clear this instruction's Rd field (i.e., set to r0) orig_add[2] &= ~(mask4 << 0); // clear this instruction's Rn field (i.e., set to r0) orig_add[1] |= (tmp_reg<<4); // set Rd and Rn fields to tmp_reg orig_add[2] |= (tmp_reg<<0); ms.plopBytes(L3,orig_add.c_str(),4); // put down L4, store of calc'd pc. auto spill_targ_insn = string("\xf8\x00\x0d\xe5",4); spill_targ_insn[1] |= (tmp_reg<<4); // set Rd field ms.plopBytes(L4,spill_targ_insn.c_str(),4); // put down L5, restore of scratch reg r0 auto restore_insn = string("\xfc\x00\x1d\xe5",4); restore_insn[1] |= (tmp_reg<<4); // set Rd field ms.plopBytes(L5,restore_insn.c_str(),4); // put down L6, the actual control transfer using the saved value on the stack auto xfer_insn = string("\xf8\x00\x1d\xe5",4); xfer_insn[1] |= (0b1111 << 4); // set this instruction's Rd reg to PC ms.plopBytes(L6,xfer_insn.c_str(),4); // put the calculated pc-rel offset at L7 const auto new_offset = int32_t(orig_insn_addr - L2 + reloc_offset); ms.plopBytes(L7,reinterpret_cast<const char*>(&new_offset),4); // endianness of host must match target // should be few enough of these to always print cout<< "Had to trampoline " << disasm->getDisassembly() << " @"<<FA<<" to " << hex << L0 << "-" << L0+tramp_size-1 << " WRT=" << to_object_id << endl; } else if(is_add_type && I_bit_set) { /* * * here we've found a add<s><cond> Rd, Rn, #constant<<(#shift * 2) * e.g.: add ip, pc, #0, #12 # IP=PC+8+(12<<(0*2)) * * the plan : * FA: b<cond> L0 * FT: * .. * L0 ldr dest_reg, [L3] * L1 add dest_reg, pc, dest_reg * L2: b FT * L3: .word <constant to add> */ assert(Rd!=0xf); // not the program counter. needs to be handled as IB const auto tramp_size = 4*4; // 3 insns, 4 bytes each, plus one word of read-only data const auto tramp_range = ms.getFreeRange(tramp_size); const auto tramp_start = tramp_range.getStart(); // don't be too fancy, just reserve 16 bytes. ms.splitFreeRange({tramp_start,tramp_start+tramp_size}); // and give the bytes some names const auto FA=from_insn_location; const auto FT=from_insn_location+4; const auto L0 = tramp_start; const auto L1 = tramp_start + 4; const auto L2 = tramp_start + 8; const auto L3 = tramp_start + 12; // Create a branch to put over the original insn // and set the conditional bits equal to // the original instruction conditional bits auto my_branch_bytes = branch_bytes; my_branch_bytes[3] &= 0x0f; // clear always condition bits my_branch_bytes[3] |= (insn_bytes[3] & 0xf0); ms.plopBytes(FA,my_branch_bytes.c_str(),4); // and make it point at L0 zo->applyPatch(FA,L0); // ldr dest_reg, [pc+k] (where pc+k == L3) auto ldr_imm_insn = string("\x04\x00\x9f\xe5",4); ldr_imm_insn[1] |= (Rd << 4); // set this instruction's dest reg to the ldr's dest reg. ms.plopBytes(L0,ldr_imm_insn.c_str(),4); // create the modified add from the original ldr instruction auto new_add_word = string("\x00\x00\x8f\xe0",4); // e08f0000 add r0, pc, r0 new_add_word[1] |= (Rd << 4); // set this instruction's dest reg to the origin insn's dest reg. new_add_word[0] |= Rd; // set this instruction's 2nd src reg to the orig insn's dest reg. new_add_word[3] |= S_bit_set << 4; // set this instruction's S flag to match the orig insn ms.plopBytes(L1,new_add_word.c_str(),4); // put an uncond branch the end of the trampoline // and make it jump at FT ms.plopBytes(L2,branch_bytes.c_str(),4); zo->applyPatch(L2,FT); // put the calculated pc-rel offset at L3 const auto add_imm_field = int32_t(full_insn & mask8); const auto add_ror_field = int32_t((full_insn>>8) & mask4); const auto add_imm = rotr32(add_imm_field,add_ror_field*2); const auto orig_target = orig_insn_addr + 8 + add_imm; const auto new_offset = int32_t(orig_target - L3 + reloc_offset); ms.plopBytes(L3,reinterpret_cast<const char*>(&new_offset),4); // endianness of host must match target // should be few enough of these to always print cout<< "Had to trampoline " << disasm->getDisassembly() << " @"<<FA<<" to " << hex << L0 << "-" << L0+tramp_size-1 << " add_imm = " << add_imm << " WRT=" << to_object_id << endl; } else if(is_add_type && !I_bit_set) { /* * here we've found a add<s><cond> Rd, Rn, Rm <shift_oper> ( shift * 2 ) * e.g. add Rd, pc, Rn # Rd = pc + Rn * e.g2. add Rd, Rm, pc # Rd = Rm + pc * e.g3. add Rd, pc, Rn ror #2 # Rd = pc + (Rn ror #2) * e.g4. add Rd, Rm, pc lsl #2 # Rd = Rm + pc << 2 * e.g5. add Rd, pc, Rn lsl Rs # Rd = pc + (Rn << Rs) * e.g6. add Rd, Rm, pc lsl Rs # Rd = Rm + (pc << Rs) * * Note: instruction may have a <cond> or an <s> appended * Note: Rd, Rn, and Rs may all be the same register. We could optimize if they aren't, TBD. * * Assume: no shifting of pc (as in example 4 and 6.) * Assume: no pc in Rn position (as in example 2, 4, and 6) * Assume: no pc in Rs position (as in example 6) * * Assume: tmp_reg is a (live) register that's not Rd, Rn, Rs, or pc. * * * * FA: b<cond> L0 # deals with all <cond> properly. * FT: * .. * L0 str tmp_reg -> [sp - 0xfc] # save reg * L1 add Rd, ... # orig add instruction, without <cond> or <s> * L2 ldr tmp_reg <- [L3] # load constant offset from cur pc to other pc * L3 add<s> Rd, tmp_reg, Rd # add in constant, and set S flag. * L4 ldr tmp_reg <- [sp - 0xfc] # restore reg * L5: b FT * L6: .word <orig_pc - new_pc> # Note: all other fields factor out! */ assert(Rd!=0xf); // not the program counter. needs to be handled as IB const auto tramp_size = 6*4 + 4 ; // 6 insns, 4 bytes each, plus one word of read-only data const auto tramp_range = ms.getFreeRange(tramp_size); const auto tramp_start = tramp_range.getStart(); // don't be too fancy, just reserve the bytes. ms.splitFreeRange({tramp_start,tramp_start+tramp_size}); // and give the bytes some names const auto FA=from_insn_location; const auto FT=from_insn_location+4; const auto L0 = tramp_start; const auto L1 = L0 + 4; const auto L2 = L1 + 4; const auto L3 = L2 + 4; const auto L4 = L3 + 4; const auto L5 = L4 + 4; const auto L6 = L5 + 4; // Create a branch to put over the original insn // and set the conditional bits equal to // the original instruction conditional bits auto my_branch_bytes = branch_bytes; my_branch_bytes[3] &= 0x0f; // clear always condition bits my_branch_bytes[3] |= (insn_bytes[3] & 0xf0); // set cond bits ms.plopBytes(FA,my_branch_bytes.c_str(),4); // and make it point at L0 zo->applyPatch(FA,L0); // put down L0 auto spill_insn = string("\xfc\x00\x0d\xe5",4); spill_insn[1] |= (tmp_reg << 4); // set this instruction's Rd reg to the tmp reg ms.plopBytes(L0,spill_insn.c_str(),4); // put down L1 auto orig_add = from_insn->getDataBits(); orig_add[2] &= ~(1 << 4); // clear the S bit. orig_add[3] &= 0b00001111; // clear the cond bits. orig_add[3] |= 0b11100000; // set the cond bits to "always". ms.plopBytes(L1,orig_add.c_str(),4); // Put down L2 (ldr dest_reg, [pc+k] where pc+k == L6) auto ldr_imm_insn = string("\x08\x00\x9f\xe5",4); ldr_imm_insn[1] |= (tmp_reg << 4); // set this instruction's Rd to tmp_reg ms.plopBytes(L2,ldr_imm_insn.c_str(),4); // put down L3, an add Rd, tmp_reg, Rd (e0800000) where Rd comes from the origin insn. auto L3_insn = string("\x00\x00\x80\xe0",4); L3_insn[1] |= (Rd << 4); // set this instruction's Rd field to orig insn's Rd L3_insn[2] |= (tmp_reg << 0); // set this instruction's Rn field to the tmp reg L3_insn[0] |= (Rd << 0); // set this instruction's Rm field to orig insn's Rd ms.plopBytes(L3,L3_insn.c_str(),4); // put down L4, e59d00fc auto restore_insn = string("\xfc\x00\x1d\xe5",4); restore_insn[1] |= (tmp_reg << 4); // set this instruction's Rd reg to the tmp reg ms.plopBytes(L4,restore_insn.c_str(),4); // put down L5, an uncond branch the end of the trampoline // and make it jump at FT ms.plopBytes(L5,branch_bytes.c_str(),4); zo->applyPatch(L5,FT); // put the calculated pc-rel offset at L3 const auto new_offset = int32_t(orig_insn_addr - L1 + reloc_offset); ms.plopBytes(L6,reinterpret_cast<const char*>(&new_offset),4); // endianness of host must match target // should be few enough of these to always print cout<< "Had to trampoline " << disasm->getDisassembly() << " @"<<FA<<" to " << hex << L0 << "-" << L0+tramp_size-1 << " WRT=" << to_object_id << endl; } else { cout <<"WARN: insn patching help: "<< from_insn->getDisassembly()<<endl; } /* * other instructions are probably false positives in the disassembly process. let's just not patch them else assert(0); */ } void UnpinArm32_t::HandleAbsptrReloc(Instruction_t* from_insn, Relocation_t* reloc) { assert(0); } void UnpinArm32_t::HandleImmedptrReloc(Instruction_t* from_insn, Relocation_t* reloc) { assert(0); } void UnpinArm32_t::HandleCallbackReloc(Instruction_t* from_insn, Relocation_t* reloc) { assert(0); }