Skip to content
Snippets Groups Projects
Commit 3cec8348 authored by Jason Hiser's avatar Jason Hiser :tractor:
Browse files

support for short-imm instructions getting tramoplined out-of-band

parent 0b946216
No related branches found
No related tags found
No related merge requests found
...@@ -337,7 +337,10 @@ void Unpin_t::DoUpdateForInstructions() ...@@ -337,7 +337,10 @@ void Unpin_t::DoUpdateForInstructions()
const auto mnemonic =disasm.getMnemonic(); const auto mnemonic =disasm.getMnemonic();
const auto is_adr_type =mnemonic=="adr"; const auto is_adr_type =mnemonic=="adr";
const auto is_adrp_type=mnemonic=="adrp"; const auto is_adrp_type=mnemonic=="adrp";
const auto is_ldr_type =mnemonic=="ldr"; const auto is_ldr_type = mnemonic=="ldr";
const auto is_ldr_int_type = is_ldr_type && disasm.getOperand(0).isGeneralPurposeRegister();
const auto is_ldr_fp_type = is_ldr_type && disasm.getOperand(0).isFpuRegister();
const auto mask1 =(1<< 1)-1;
const auto mask2 =(1<< 2)-1; const auto mask2 =(1<< 2)-1;
const auto mask5 =(1<< 5)-1; const auto mask5 =(1<< 5)-1;
const auto mask12=(1<<12)-1; const auto mask12=(1<<12)-1;
...@@ -372,51 +375,236 @@ void Unpin_t::DoUpdateForInstructions() ...@@ -372,51 +375,236 @@ void Unpin_t::DoUpdateForInstructions()
(int64_t)new_insn_pageno + (int64_t)reloc->GetAddend()+(int64_t)to_addr; (int64_t)new_insn_pageno + (int64_t)reloc->GetAddend()+(int64_t)to_addr;
// make sure no overflow. // make sure no overflow.
assert( ((new_imm21_ext << 43) >> 43) == new_imm21_ext); if(((new_imm21_ext << 43) >> 43) == new_imm21_ext)
const auto new_immhi19 = new_imm21_ext >> 2;
const auto new_immlo2 = new_imm21_ext & mask2;
const auto clean_new_insn= full_insn & ~(mask2<<29) & ~ (mask19 << 5);
const auto new_insn = clean_new_insn | ((new_immlo2&mask2) << 29) | ((new_immhi19&mask19)<<5);
// put the new instruction in the output
ms.PlopBytes(from_insn_location, (const char*)&new_insn, insn_bytes_len);
if (m_verbose)
{ {
cout << "Relocating a adr(p) pcrel relocation with orig_pageno=" << hex const auto new_immhi19 = new_imm21_ext >> 2;
<< (orig_insn_pageno << 12) << " offset=(page-pc+" << imm21_ext << ")" << endl; const auto new_immlo2 = new_imm21_ext & mask2;
cout << "Based on: " << disasm.getDisassembly() << hex << " originally at " << orig_insn_addr const auto clean_new_insn= full_insn & ~(mask2<<29) & ~ (mask19 << 5);
<< " now located at : 0x" << hex << from_insn_location << " with offset=(page-pc + " const auto new_insn = clean_new_insn | ((new_immlo2&mask2) << 29) | ((new_immhi19&mask19)<<5);
<< new_imm21_ext << ")" << endl; // put the new instruction in the output
ms.PlopBytes(from_insn_location, (const char*)&new_insn, insn_bytes_len);
if (m_verbose)
{
cout << "Relocating a adr(p) pcrel relocation with orig_pageno=" << hex
<< (orig_insn_pageno << 12) << " offset=(page-pc+" << imm21_ext << ")" << endl;
cout << "Based on: " << disasm.getDisassembly() << hex << " originally at " << orig_insn_addr
<< " now located at : 0x" << hex << from_insn_location << " with offset=(page-pc + "
<< new_imm21_ext << ")" << endl;
}
}
else
{
assert(is_adr_type); // don't even know what to do if the PAGE is too far away!
// imm21->64 bit address didn't work. Split it up into two parts.
/* the plan :
* FA: b L0
* FT:
* ..
* L0 adrp dest_reg, <addr-page number>
* L1 add dest_reg, dest_reg, (addr-page offset)
* L2: b ft
*/
const auto tramp_size=3*4; // 3 insns, 4 bytes each
const auto address_to_generate=imm21_ext+orig_insn_addr+(int64_t)reloc->GetAddend()+(int64_t)to_addr;
const auto destreg=full_insn&mask5;
const auto tramp_range=ms.GetFreeRange(tramp_size);
const auto tramp_start=tramp_range.GetStart();
// don't be too fancy, just reserve 12 bytes.
ms.SplitFreeRange({tramp_start,tramp_start+12});
const auto FA=from_insn_location;
const auto FT=from_insn_location+4;
const auto L0=tramp_start;
const auto L1=tramp_start+4;
const auto L2=tramp_start+8;
const auto branch_bytes=string("\x00\x00\x00\x14",4);
// const auto updated_orig_insn_pageno = orig_insn_addr>>12; // orig_insn_pageno was shifted by 0 for adr
const auto relocd_insn_pageno = L1>>12;
const auto address_to_generate_pageno = address_to_generate >> 12;
const auto address_to_generate_page_offset = address_to_generate & mask12;
const auto relocd_imm21_ext = (int64_t)address_to_generate_pageno - (int64_t)relocd_insn_pageno;
const auto relocd_immhi19 = relocd_imm21_ext >> 2;
const auto relocd_immlo2 = relocd_imm21_ext & mask2;
// this should be +/- 4gb, so we shouldn't fail now!
assert(((relocd_imm21_ext << 43) >> 43) == relocd_imm21_ext);
// put an uncond branch at where the adr was.
// and make it point at L0
ms.PlopBytes(FA,branch_bytes.c_str(),4);
zo->ApplyPatch(FA,L0);
// adrp: 1 imm2lo 1 0000 immhi19 Rd
auto adrp_bytes=string("\x00\x00\x00\x90",4);
auto adrp_word =*(int*)adrp_bytes.c_str();
adrp_word|=destreg<<0;
adrp_word |= ((relocd_immlo2&mask2) << 29) | ((relocd_immhi19&mask19)<<5);
ms.PlopBytes(L0,(char*)&adrp_word,4);
// add64 imm12 = 1001 0001 00 imm12 Rn Rd
auto add_bytes=string("\x00\x00\x00\x91",4);
auto add_word =*(int*)add_bytes.c_str();
add_word|=destreg<<0;
add_word|=destreg<<5;
add_word|=address_to_generate_page_offset << 10 ;
ms.PlopBytes(L1,(char*)&add_word,4);
// put an uncond branch the end of the trampoline
// and make it jump at FT
ms.PlopBytes(L2,branch_bytes.c_str(),4);
zo->ApplyPatch(L2,FT);
// should be few enough of these to always print
cout<< "Had to trampoline " << disasm.getDisassembly() << "@"<<FA<<" to "
<< hex << L0 << "-" << L0+tramp_size << endl;
} }
} }
else if(is_ldr_type) else if(is_ldr_type)
{ {
// ldr: 0 x1 0110 0 0 imm19 Rt5 // ldr w/x reg : 0 x1 0110 0 0 imm19 Rt5, x1 indicate size (0,1 -> w/x)
// ldr s/d/q reg : opc2 0111 0 0 imm19 Rt5, opc2 indicate size (00,01,10 -> s/d/q)
const auto imm19 = ((int64_t)full_insn >> 5 ) & mask19; const auto imm19 = ((int64_t)full_insn >> 5 ) & mask19;
const auto imm19_ext= (imm19 << 45) >> 45; const auto imm19_ext= (imm19 << 45) >> 45;
const auto referenced_addr=(imm19_ext<<2)+from_insn->GetAddress()->GetVirtualOffset()+4; const auto referenced_addr=(imm19_ext<<2)+from_insn->GetAddress()->GetVirtualOffset()+4;
const auto new_imm19_ext =((int64_t)referenced_addr-(int64_t)from_insn_location-4+(int64_t)reloc->GetAddend()+(int64_t)to_addr)>>2; const auto new_imm19_ext =((int64_t)referenced_addr-(int64_t)from_insn_location-4+(int64_t)reloc->GetAddend()+(int64_t)to_addr)>>2;
assert( ((new_imm19_ext << 45) >> 45) == new_imm19_ext); if( ((new_imm19_ext << 45) >> 45) == new_imm19_ext)
const auto clean_new_insn = full_insn & ~(mask19 << 5); {
const auto new_insn = clean_new_insn | ((new_imm19_ext & mask19)<<5); const auto clean_new_insn = full_insn & ~(mask19 << 5);
// put the new instruction in the output const auto new_insn = clean_new_insn | ((new_imm19_ext & mask19)<<5);
ms.PlopBytes(from_insn_location, (const char*)&new_insn, insn_bytes_len); // put the new instruction in the output
if (m_verbose) ms.PlopBytes(from_insn_location, (const char*)&new_insn, insn_bytes_len);
if (m_verbose)
{
cout << "Relocating a ldr pcrel relocation with orig_addr=" << hex
<< (referenced_addr) << " offset=(pc+" << imm19_ext << ")" << endl;
cout << "Based on: " << disasm.getDisassembly()
<< " now located at : 0x" << hex << from_insn_location << " with offset=(pc + "
<< new_imm19_ext << ")" << endl;
}
}
else
{ {
cout << "Relocating a ldr pcrel relocation with orig_addr=" << hex // imm19->64 bit address didn't work. Split it up into two parts.
<< (referenced_addr) << " offset=(pc+" << imm19_ext << ")" << endl;
cout << "Based on: " << disasm.getDisassembly() /* the plan :
<< " now located at : 0x" << hex << from_insn_location << " with offset=(pc + " * FA: b L0
<< new_imm19_ext << ")" << endl; * FT:
* ..
* L0 adrp dest_reg, <addr-page number>
* L1 ldr dest_reg, [dst_reg, #addr-page offset]
* L2: b ft
*/
const auto tramp_size=3*4; // 3 insns, 4 bytes each
const auto address_to_generate=(imm19_ext<<2)+orig_insn_addr+(int64_t)reloc->GetAddend()+(int64_t)to_addr;
const auto destreg=full_insn&mask5;
const auto tramp_range=ms.GetFreeRange(tramp_size);
const auto tramp_start=tramp_range.GetStart();
// don't be too fancy, just reserve 12 bytes.
ms.SplitFreeRange({tramp_start,tramp_start+12});
const auto FA=from_insn_location;
const auto FT=from_insn_location+4;
const auto L0=tramp_start;
const auto L1=tramp_start+4;
const auto L2=tramp_start+8;
const auto branch_bytes=string("\x00\x00\x00\x14",4);
// const auto updated_orig_insn_pageno = orig_insn_addr>>12; // orig_insn_pageno was shifted by 0 for adr
const auto relocd_insn_pageno = L1>>12;
const auto address_to_generate_pageno = address_to_generate >> 12;
const auto address_to_generate_page_offset = address_to_generate & mask12;
const auto relocd_imm21_ext = (int64_t)address_to_generate_pageno - (int64_t)relocd_insn_pageno;
const auto relocd_immhi19 = relocd_imm21_ext >> 2;
const auto relocd_immlo2 = relocd_imm21_ext & mask2;
// this should be +/- 4gb, so we shouldn't fail now!
assert(((relocd_imm21_ext << 43) >> 43) == relocd_imm21_ext);
// put an uncond branch at where the adr was.
// and make it point at L0
ms.PlopBytes(FA,branch_bytes.c_str(),4);
zo->ApplyPatch(FA,L0);
// adrp: 1 imm2lo 1 0000 immhi19 Rd
auto adrp_bytes=string("\x00\x00\x00\x90",4);
auto adrp_word =*(int*)adrp_bytes.c_str();
adrp_word|=destreg<<0;
adrp_word |= ((relocd_immlo2&mask2) << 29) | ((relocd_immhi19&mask19)<<5);
ms.PlopBytes(L0,(char*)&adrp_word,4);
if(is_ldr_int_type)
{
// convert: ldr w/x reg : 0 x1 011 0 00 ---imm19---- Rt5 x1 indicate size (0,1 -> w/x)
// to : ldr x/w reg : 1 x1 111 0 01 01 imm12 Rn5 Rt5 x1 indciates szie (0,1 -> w/x)
auto new_ldr_bytes=string("\x00\x00\x40\xb9",4);
auto new_ldr_word =*(int*)new_ldr_bytes.c_str();
const auto orig_ldr_size_bit=(full_insn>>30)&mask1;
const auto scale=0x2|orig_ldr_size_bit;
const auto scaled_page_offset=(address_to_generate_page_offset>>scale) ;
new_ldr_word|=destreg<<0; // Rt
new_ldr_word|=destreg<<5; // Rn
new_ldr_word|=scaled_page_offset << 10 ; // imm12
new_ldr_word|=orig_ldr_size_bit << 30; // x1
ms.PlopBytes(L1,(char*)&new_ldr_word,4);
}
else if(is_ldr_fp_type)
{
// convert: ldr s/d/q reg: opc2 01 11 00 imm19 Rt5, opc2 indicate size (00,01,10 -> s/d/q)
// to: ldr b/s/d/q reg: size2 11 11 01 opc2 imm12 Rn Rt
auto new_ldr_bytes=string("\x00\x00\x00\x3d",4);
auto new_ldr_word =*(int*)new_ldr_bytes.c_str();
const auto orig_ldr_opc_bits=(full_insn>>30)&mask2;
// decode size out of old ldr
const auto ldr_size=
orig_ldr_opc_bits == 0x0 ? 4u :
orig_ldr_opc_bits == 0x1 ? 8u :
orig_ldr_opc_bits == 0x2 ? 16u :
throw invalid_argument("cannot decode ldr floating-point access size");
// encode size field for new ldr.
const auto new_ldr_size_bits=
ldr_size == 4 ? 0x2u :
ldr_size == 8 ? 0x3u :
ldr_size == 16 ? 0x0u :
throw invalid_argument("cannot decode ldr floating-point access size");
// encode opc2
const auto new_ldr_opc2_bits=
ldr_size == 4 ? 0x1u :
ldr_size == 8 ? 0x1u :
ldr_size == 16 ? 0x3u :
throw invalid_argument("cannot decode ldr floating-point access size");
// add variable fields to new insn
new_ldr_word|=destreg<<0; // Rt
new_ldr_word|=destreg<<5; // Rn
new_ldr_word|=((address_to_generate_page_offset/ldr_size) << 10); // imm12
new_ldr_word|=(new_ldr_size_bits<<30); // size2
new_ldr_word|=(new_ldr_opc2_bits<<22); // opc2
ms.PlopBytes(L1,(char*)&new_ldr_word,4);
}
else
assert(0);
// put an uncond branch the end of the trampoline
// and make it jump at FT
ms.PlopBytes(L2,branch_bytes.c_str(),4);
zo->ApplyPatch(L2,FT);
// should be few enough of these to always print
cout<< "Had to trampoline " << disasm.getDisassembly() << "@"<<FA<<" to "
<< hex << L0 << "-" << L0+tramp_size-1 << endl;
} }
} }
else else
assert(0); assert(0);
} }
} }
// instruction has a absolute memory operand that needs it's displacement updated. // instruction has a absolute memory operand that needs it's displacement updated.
else if(reloc->GetType()==string("absoluteptr_to_scoop")) else if(reloc->GetType()==string("absoluteptr_to_scoop"))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment