Skip to content
Snippets Groups Projects
Commit a7927c3b authored by Jason Hiser's avatar Jason Hiser :tractor:
Browse files

fixed issue where ldr <float reg>, [pcrel] would get patched but destroy an int register.

parent 0ea0da05
No related branches found
No related tags found
No related merge requests found
......@@ -334,12 +334,13 @@ void Unpin_t::DoUpdateForInstructions()
{
assert(bo_wrt==nullptr); // not yet imp'd WRT offsetting.
assert(to_addr==0); // not yet imp'd WRT offsetting.
const auto mnemonic =disasm.getMnemonic();
const auto is_adr_type =mnemonic=="adr";
const auto is_adrp_type=mnemonic=="adrp";
const auto mnemonic = disasm.getMnemonic();
const auto is_adr_type = mnemonic=="adr";
const auto is_adrp_type = mnemonic=="adrp";
const auto is_ldr_type = mnemonic=="ldr";
const auto is_ldr_int_type = is_ldr_type && disasm.getOperand(0).isGeneralPurposeRegister();
const auto is_ldr_fp_type = is_ldr_type && disasm.getOperand(0).isFpuRegister();
const auto is_ldrsw_type = mnemonic=="ldrsw";
const auto mask1 =(1<< 1)-1;
const auto mask2 =(1<< 2)-1;
const auto mask5 =(1<< 5)-1;
......@@ -486,56 +487,59 @@ void Unpin_t::DoUpdateForInstructions()
}
else
{
// imm19->64 bit address didn't work. Split it up into two parts.
/* the plan :
* FA: b L0
* FT:
* ..
* L0 adrp dest_reg, <addr-page number>
* L1 ldr dest_reg, [dst_reg, #addr-page offset]
* L2: b ft
*/
const auto tramp_size=3*4; // 3 insns, 4 bytes each
const auto address_to_generate=(imm19_ext<<2)+orig_insn_addr+(int64_t)reloc->GetAddend()+(int64_t)to_addr;
const auto destreg=full_insn&mask5;
const auto tramp_range=ms.GetFreeRange(tramp_size);
const auto tramp_start=tramp_range.GetStart();
// don't be too fancy, just reserve 12 bytes.
ms.SplitFreeRange({tramp_start,tramp_start+12});
const auto FA=from_insn_location;
const auto FT=from_insn_location+4;
const auto L0=tramp_start;
const auto L1=tramp_start+4;
const auto L2=tramp_start+8;
const auto branch_bytes=string("\x00\x00\x00\x14",4);
// const auto updated_orig_insn_pageno = orig_insn_addr>>12; // orig_insn_pageno was shifted by 0 for adr
const auto relocd_insn_pageno = L1>>12;
const auto address_to_generate_pageno = address_to_generate >> 12;
const auto address_to_generate_page_offset = address_to_generate & mask12;
const auto relocd_imm21_ext = (int64_t)address_to_generate_pageno - (int64_t)relocd_insn_pageno;
const auto relocd_immhi19 = relocd_imm21_ext >> 2;
const auto relocd_immlo2 = relocd_imm21_ext & mask2;
// this should be +/- 4gb, so we shouldn't fail now!
assert(((relocd_imm21_ext << 43) >> 43) == relocd_imm21_ext);
// put an uncond branch at where the adr was.
// and make it point at L0
ms.PlopBytes(FA,branch_bytes.c_str(),4);
zo->ApplyPatch(FA,L0);
// adrp: 1 imm2lo 1 0000 immhi19 Rd
auto adrp_bytes=string("\x00\x00\x00\x90",4);
auto adrp_word =*(int*)adrp_bytes.c_str();
adrp_word|=destreg<<0;
adrp_word |= ((relocd_immlo2&mask2) << 29) | ((relocd_immhi19&mask19)<<5);
ms.PlopBytes(L0,(char*)&adrp_word,4);
if(is_ldr_int_type)
{
// imm19->64 bit address didn't work. Split it up into two parts.
/* the plan :
* FA: b L0
* FT:
* ..
* L0 adrp dest_reg, <addr-page number>
* L1 ldr dest_reg, [dst_reg, #addr-page offset]
* L2: b ft
*/
const auto tramp_size=3*4; // 3 insns, 4 bytes each
const auto tramp_range=ms.GetFreeRange(tramp_size);
const auto tramp_start=tramp_range.GetStart();
// don't be too fancy, just reserve 12 bytes.
ms.SplitFreeRange({tramp_start,tramp_start+12});
// and give the bytes some names
const auto L0=tramp_start;
const auto L1=tramp_start+4;
const auto L2=tramp_start+8;
// calculate the immediates for the new adr and ldr instruction.
const auto relocd_insn_pageno = L1>>12;
const auto relocd_imm21_ext = (int64_t)address_to_generate_pageno - (int64_t)relocd_insn_pageno;
const auto relocd_immhi19 = relocd_imm21_ext >> 2;
const auto relocd_immlo2 = relocd_imm21_ext & mask2;
// this should be +/- 4gb, so we shouldn't fail now!
assert(((relocd_imm21_ext << 43) >> 43) == relocd_imm21_ext);
// put an uncond branch at where the adr was.
// and make it point at L0
ms.PlopBytes(FA,branch_bytes.c_str(),4);
zo->ApplyPatch(FA,L0);
// adrp: 1 imm2lo 1 0000 immhi19 Rd
auto adrp_bytes=string("\x00\x00\x00\x90",4);
auto adrp_word =*(int*)adrp_bytes.c_str();
adrp_word|=destreg<<0;
adrp_word |= ((relocd_immlo2&mask2) << 29) | ((relocd_immhi19&mask19)<<5);
ms.PlopBytes(L0,(char*)&adrp_word,4);
// convert: ldr w/x reg : 0 x1 011 0 00 ---imm19---- Rt5 x1 indicates size (0,1 -> w/x)
// to : ldr x/w reg : 1 x1 111 0 01 01 imm12 Rn5 Rt5 x1 indciates size (0,1 -> w/x)
auto new_ldr_bytes=string("\x00\x00\x40\xb9",4);
......@@ -548,9 +552,75 @@ void Unpin_t::DoUpdateForInstructions()
new_ldr_word|=scaled_page_offset << 10 ; // imm12
new_ldr_word|=orig_ldr_size_bit << 30; // x1
ms.PlopBytes(L1,(char*)&new_ldr_word,4);
// put an uncond branch the end of the trampoline
// and make it jump at FT
ms.PlopBytes(L2,branch_bytes.c_str(),4);
zo->ApplyPatch(L2,FT);
// should be few enough of these to always print
cout<< "Had to trampoline " << disasm.getDisassembly() << "@"<<FA<<" to "
<< hex << L0 << "-" << L0+tramp_size-1 << endl;
}
else if(is_ldr_fp_type)
{
/* the scheme for int-type operations doesn't work
* for fp-type because there is no free register.
* use this plan:
* FA: b L0
* FT:
* ..
* L0 str x0, [sp+128] ; 128 for red zoning
* L1 adrp x0, <addr-page number>
* L2 ldr dest_reg, [x0, #addr-page offset]
* L3 ldr x0, [sp+128] ; 128 for red zoning
* L4 b FT
*/
// allocate and reserve space for the code.
const auto tramp_size=5*4; // 3 insns, 4 bytes each
const auto tramp_range=ms.GetFreeRange(tramp_size);
const auto tramp_start=tramp_range.GetStart();
// don't be too fancy, just reserve 12 bytes.
ms.SplitFreeRange({tramp_start,tramp_start+12});
// give the bytes some names
const auto L0=tramp_start ;
const auto L1=tramp_start+4 ;
const auto L2=tramp_start+8 ;
const auto L3=tramp_start+12;
const auto L4=tramp_start+16;
// calculate the immediates for the new adr and ldr instruction.
const auto relocd_insn_pageno = L1>>12;
const auto relocd_imm21_ext = (int64_t)address_to_generate_pageno - (int64_t)relocd_insn_pageno;
const auto relocd_immhi19 = relocd_imm21_ext >> 2;
const auto relocd_immlo2 = relocd_imm21_ext & mask2;
// this should be +/- 4gb, so we shouldn't fail now!
assert(((relocd_imm21_ext << 43) >> 43) == relocd_imm21_ext);
// put an uncond branch at where the adr was.
// and make it point at L0
ms.PlopBytes(FA,branch_bytes.c_str(),4);
zo->ApplyPatch(FA,L0);
// put save of x0 in place.
// diassembly: f81803e0 stur x0, [sp, #-128]
const auto strx0_bytes=string("\xe0\x03\x18\xf8",4);
ms.PlopBytes(L0,strx0_bytes.c_str(),4);
// adrp: 1 imm2lo 1 0000 immhi19 Rd
auto adrp_bytes=string("\x00\x00\x00\x90",4);
auto adrp_word =*(int*)adrp_bytes.c_str();
// adrp_word|=destreg<<0; ; destreg for this insn is x0.
adrp_word |= ((relocd_immlo2&mask2) << 29) | ((relocd_immhi19&mask19)<<5);
ms.PlopBytes(L1,(char*)&adrp_word,4);
// convert: ldr s/d/q reg: opc2 01 11 00 imm19 Rt5, opc2 indicate size (00,01,10 -> s/d/q)
// to: ldr b/s/d/q reg: size2 11 11 01 opc2 imm12 Rn Rt
auto new_ldr_bytes=string("\x00\x00\x00\x3d",4);
......@@ -578,17 +648,116 @@ void Unpin_t::DoUpdateForInstructions()
ldr_size == 16 ? 0x3u :
throw invalid_argument("cannot decode ldr floating-point access size");
// add variable fields to new insn
new_ldr_word|=destreg<<0; // Rt
new_ldr_word|=destreg<<5; // Rn
// add variable fields to new insn and drop it in the mem space.
new_ldr_word|=destreg<<0; // Rt -- should be actual dest reg, not x0
// new_ldr_word|=destreg<<5; // Rn -- should be x0
new_ldr_word|=((address_to_generate_page_offset/ldr_size) << 10); // imm12
new_ldr_word|=(new_ldr_size_bits<<30); // size2
new_ldr_word|=(new_ldr_opc2_bits<<22); // opc2
ms.PlopBytes(L2,(char*)&new_ldr_word,4);
// drop in the restore of x0
// disassembly: f85803e0 ldur x0, [sp, #-128]
const auto ldrx0_bytes=string("\xe0\x03\x58\xf8",4);
ms.PlopBytes(L3,ldrx0_bytes.c_str(),4);
// put an uncond branch the end of the trampoline
// and make it jump at FT
ms.PlopBytes(L4,branch_bytes.c_str(),4);
zo->ApplyPatch(L4,FT);
// should be few enough of these to always print
cout<< "Had to trampoline " << disasm.getDisassembly() << "@"<<FA<<" to "
<< hex << L0 << "-" << L0+tramp_size-1 << endl;
ms.PlopBytes(L1,(char*)&new_ldr_word,4);
}
else
assert(0);
}
}
else if(is_ldrsw_type)
{
// ldrsw x reg : 1001 1000 imm19 Rt
const auto imm19 = ((int64_t)full_insn >> 5 ) & mask19;
const auto imm19_ext= (imm19 << 45) >> 45;
const auto referenced_addr=(imm19_ext<<2)+from_insn->GetAddress()->GetVirtualOffset()+4;
const auto new_imm19_ext =((int64_t)referenced_addr-(int64_t)from_insn_location-4+(int64_t)reloc->GetAddend()+(int64_t)to_addr)>>2;
if( ((new_imm19_ext << 45) >> 45) == new_imm19_ext)
{
const auto clean_new_insn = full_insn & ~(mask19 << 5);
const auto new_insn = clean_new_insn | ((new_imm19_ext & mask19)<<5);
// put the new instruction in the output
ms.PlopBytes(from_insn_location, (const char*)&new_insn, insn_bytes_len);
if (m_verbose)
{
cout << "Relocating a ldrsw pcrel relocation with orig_addr=" << hex
<< (referenced_addr) << " offset=(pc+" << imm19_ext << ")" << endl;
cout << "Based on: " << disasm.getDisassembly()
<< " now located at : 0x" << hex << from_insn_location << " with offset=(pc + "
<< new_imm19_ext << ")" << endl;
}
}
else
{
// imm19->64 bit address didn't work. Split it up into two parts.
/* the plan :
* FA: b L0
* FT:
* ..
* L0 adrp dest_reg, <addr-page number>
* L1 ldr dest_reg, [dst_reg, #addr-page offset]
* L2: b ft
*/
const auto tramp_size=3*4; // 3 insns, 4 bytes each
const auto address_to_generate=(imm19_ext<<2)+orig_insn_addr+(int64_t)reloc->GetAddend()+(int64_t)to_addr;
const auto destreg=full_insn&mask5;
const auto tramp_range=ms.GetFreeRange(tramp_size);
const auto tramp_start=tramp_range.GetStart();
// don't be too fancy, just reserve 12 bytes.
ms.SplitFreeRange({tramp_start,tramp_start+12});
const auto FA=from_insn_location;
const auto FT=from_insn_location+4;
const auto L0=tramp_start;
const auto L1=tramp_start+4;
const auto L2=tramp_start+8;
const auto branch_bytes=string("\x00\x00\x00\x14",4);
const auto relocd_insn_pageno = L1>>12;
const auto address_to_generate_pageno = address_to_generate >> 12;
const auto address_to_generate_page_offset = address_to_generate & mask12;
const auto relocd_imm21_ext = (int64_t)address_to_generate_pageno - (int64_t)relocd_insn_pageno;
const auto relocd_immhi19 = relocd_imm21_ext >> 2;
const auto relocd_immlo2 = relocd_imm21_ext & mask2;
// this should be +/- 4gb, so we shouldn't fail now!
assert(((relocd_imm21_ext << 43) >> 43) == relocd_imm21_ext);
// put an uncond branch at where the adr was.
// and make it point at L0
ms.PlopBytes(FA,branch_bytes.c_str(),4);
zo->ApplyPatch(FA,L0);
// adrp: 1 imm2lo 1 0000 immhi19 Rd
auto adrp_bytes=string("\x00\x00\x00\x90",4);
auto adrp_word =*(int*)adrp_bytes.c_str();
adrp_word|=destreg<<0;
adrp_word |= ((relocd_immlo2&mask2) << 29) | ((relocd_immhi19&mask19)<<5);
ms.PlopBytes(L0,(char*)&adrp_word,4);
// convert: ldrsw x reg : 1001 1000 ---imm19--- Rt
// to : ldrsw x reg : 1011 1001 10 imm12 Rn Rt
auto new_ldr_bytes=string("\x00\x00\x80\xb9",4);
auto new_ldr_word =*(int*)new_ldr_bytes.c_str();
const auto scale=0x2;
const auto scaled_page_offset=(address_to_generate_page_offset>>scale) ;
new_ldr_word|=destreg<<0; // Rt
new_ldr_word|=destreg<<5; // Rn
new_ldr_word|=scaled_page_offset << 10 ; // imm12
ms.PlopBytes(L1,(char*)&new_ldr_word,4);
// put an uncond branch the end of the trampoline
// and make it jump at FT
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment