From 3cec834849ef7d1f4d761409a4c50c8196838799 Mon Sep 17 00:00:00 2001
From: Jason Hiser <jdhiser@gmail.com>
Date: Mon, 31 Dec 2018 12:36:07 -0500
Subject: [PATCH] support for short-imm instructions getting tramoplined
 out-of-band

---
 unpin.cpp | 246 +++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 217 insertions(+), 29 deletions(-)

diff --git a/unpin.cpp b/unpin.cpp
index 6a03fb8..d327a22 100644
--- a/unpin.cpp
+++ b/unpin.cpp
@@ -337,7 +337,10 @@ void Unpin_t::DoUpdateForInstructions()
 					const auto mnemonic    =disasm.getMnemonic();
 					const auto is_adr_type =mnemonic=="adr";
 					const auto is_adrp_type=mnemonic=="adrp";
-					const auto is_ldr_type =mnemonic=="ldr";
+					const auto is_ldr_type     = mnemonic=="ldr";
+					const auto is_ldr_int_type = is_ldr_type && disasm.getOperand(0).isGeneralPurposeRegister();
+					const auto is_ldr_fp_type  = is_ldr_type && disasm.getOperand(0).isFpuRegister();
+					const auto mask1 =(1<< 1)-1;
 					const auto mask2 =(1<< 2)-1;
 					const auto mask5 =(1<< 5)-1;
 					const auto mask12=(1<<12)-1;
@@ -372,51 +375,236 @@ void Unpin_t::DoUpdateForInstructions()
 								(int64_t)new_insn_pageno + (int64_t)reloc->GetAddend()+(int64_t)to_addr;
 
 						// make sure no overflow.
-						assert( ((new_imm21_ext << 43) >> 43) == new_imm21_ext);
-						const auto new_immhi19   = new_imm21_ext >> 2;
-						const auto new_immlo2    = new_imm21_ext  & mask2;
-						const auto clean_new_insn= full_insn & ~(mask2<<29) & ~ (mask19 << 5);
-						const auto new_insn      = clean_new_insn | ((new_immlo2&mask2) << 29) | ((new_immhi19&mask19)<<5);
-						// put the new instruction in the output
-						ms.PlopBytes(from_insn_location, (const char*)&new_insn, insn_bytes_len);
-						if (m_verbose)
+						if(((new_imm21_ext << 43) >> 43) == new_imm21_ext)
 						{
-							cout << "Relocating a adr(p) pcrel relocation with orig_pageno=" << hex
-							     << (orig_insn_pageno << 12) << " offset=(page-pc+" << imm21_ext << ")"  << endl;
-							cout << "Based on: " << disasm.getDisassembly() << hex << " originally at "  << orig_insn_addr
-							     << " now located at : 0x" << hex << from_insn_location << " with offset=(page-pc + "
-							     << new_imm21_ext << ")" << endl;
+							const auto new_immhi19   = new_imm21_ext >> 2;
+							const auto new_immlo2    = new_imm21_ext  & mask2;
+							const auto clean_new_insn= full_insn & ~(mask2<<29) & ~ (mask19 << 5);
+							const auto new_insn      = clean_new_insn | ((new_immlo2&mask2) << 29) | ((new_immhi19&mask19)<<5);
+							// put the new instruction in the output
+							ms.PlopBytes(from_insn_location, (const char*)&new_insn, insn_bytes_len);
+							if (m_verbose)
+							{
+								cout << "Relocating a adr(p) pcrel relocation with orig_pageno=" << hex
+								     << (orig_insn_pageno << 12) << " offset=(page-pc+" << imm21_ext << ")"  << endl;
+								cout << "Based on: " << disasm.getDisassembly() << hex << " originally at "  << orig_insn_addr
+								     << " now located at : 0x" << hex << from_insn_location << " with offset=(page-pc + "
+								     << new_imm21_ext << ")" << endl;
+							}
+						}
+						else
+						{
+							assert(is_adr_type); // don't even know what to do if the PAGE is too far away!
+							// imm21->64 bit address didn't work.  Split it up into two parts.
+
+							/* the plan :
+							 * FA: b   L0
+							 * FT:
+							 * ..
+							 * L0  adrp dest_reg, <addr-page number>
+							 * L1  add dest_reg, dest_reg, (addr-page offset)
+							 * L2: b ft
+							 */
+							const auto tramp_size=3*4; // 3 insns, 4 bytes each
+							const auto address_to_generate=imm21_ext+orig_insn_addr+(int64_t)reloc->GetAddend()+(int64_t)to_addr;
+							const auto destreg=full_insn&mask5;
+							const auto tramp_range=ms.GetFreeRange(tramp_size);
+							const auto tramp_start=tramp_range.GetStart();
+							// don't be too fancy, just reserve 12 bytes.
+							ms.SplitFreeRange({tramp_start,tramp_start+12});
+
+
+							const auto FA=from_insn_location;
+							const auto FT=from_insn_location+4;
+							const auto L0=tramp_start;
+							const auto L1=tramp_start+4;
+							const auto L2=tramp_start+8;
+							const auto branch_bytes=string("\x00\x00\x00\x14",4);
+							// const auto updated_orig_insn_pageno = orig_insn_addr>>12; // orig_insn_pageno was shifted by 0 for adr
+							const auto relocd_insn_pageno  = L1>>12;
+							const auto address_to_generate_pageno = address_to_generate >> 12;
+							const auto address_to_generate_page_offset = address_to_generate & mask12;
+							const auto relocd_imm21_ext = (int64_t)address_to_generate_pageno - (int64_t)relocd_insn_pageno;
+							const auto relocd_immhi19   = relocd_imm21_ext >> 2;
+							const auto relocd_immlo2    = relocd_imm21_ext  & mask2;
+
+							// this should be +/- 4gb, so we shouldn't fail now!
+							assert(((relocd_imm21_ext << 43) >> 43) == relocd_imm21_ext);
+
+							// put an uncond branch at where the adr was.
+							// and make it point at L0
+							ms.PlopBytes(FA,branch_bytes.c_str(),4);
+							zo->ApplyPatch(FA,L0);
+
+							// adrp: 1 imm2lo 1 0000 immhi19 Rd
+							auto adrp_bytes=string("\x00\x00\x00\x90",4);
+							auto adrp_word =*(int*)adrp_bytes.c_str();
+							adrp_word|=destreg<<0;
+							adrp_word |=  ((relocd_immlo2&mask2) << 29) | ((relocd_immhi19&mask19)<<5);
+							ms.PlopBytes(L0,(char*)&adrp_word,4);
+
+							// add64 imm12 = 1001 0001 00 imm12 Rn Rd
+							auto add_bytes=string("\x00\x00\x00\x91",4);
+							auto add_word =*(int*)add_bytes.c_str();
+							add_word|=destreg<<0;
+							add_word|=destreg<<5;
+							add_word|=address_to_generate_page_offset << 10 ;
+							ms.PlopBytes(L1,(char*)&add_word,4);
+
+							// put an uncond branch the end of the trampoline
+							// and make it jump at FT
+							ms.PlopBytes(L2,branch_bytes.c_str(),4);
+							zo->ApplyPatch(L2,FT);
+
+							// should be few enough of these to always print
+							cout<< "Had to trampoline " << disasm.getDisassembly() << "@"<<FA<<" to "
+							    << hex << L0 << "-" << L0+tramp_size << endl;
 						}
 					}
 					else if(is_ldr_type)
 					{
-						// ldr: 0 x1 0110 0 0 imm19 Rt5
+						// ldr w/x reg    : 0 x1 0110 0 0 imm19 Rt5, x1   indicate size (0,1 -> w/x) 
+						// ldr s/d/q reg  : opc2 0111 0 0 imm19 Rt5, opc2 indicate size (00,01,10 -> s/d/q)
 						const auto imm19    = ((int64_t)full_insn >> 5 ) & mask19;
 						const auto imm19_ext= (imm19 << 45) >> 45;
 						const auto referenced_addr=(imm19_ext<<2)+from_insn->GetAddress()->GetVirtualOffset()+4;
 						const auto new_imm19_ext  =((int64_t)referenced_addr-(int64_t)from_insn_location-4+(int64_t)reloc->GetAddend()+(int64_t)to_addr)>>2;
-						assert( ((new_imm19_ext << 45) >> 45) == new_imm19_ext);
-						const auto clean_new_insn = full_insn & ~(mask19 << 5);
-						const auto new_insn       = clean_new_insn | ((new_imm19_ext & mask19)<<5);
-						// put the new instruction in the output
-						ms.PlopBytes(from_insn_location, (const char*)&new_insn, insn_bytes_len);
-						if (m_verbose)
+						if( ((new_imm19_ext << 45) >> 45) == new_imm19_ext)
+						{
+							const auto clean_new_insn = full_insn & ~(mask19 << 5);
+							const auto new_insn       = clean_new_insn | ((new_imm19_ext & mask19)<<5);
+							// put the new instruction in the output
+							ms.PlopBytes(from_insn_location, (const char*)&new_insn, insn_bytes_len);
+							if (m_verbose)
+							{
+								cout << "Relocating a ldr pcrel relocation with orig_addr=" << hex
+								     << (referenced_addr) << " offset=(pc+" << imm19_ext << ")"  << endl;
+								cout << "Based on: " << disasm.getDisassembly() 
+								     << " now located at : 0x" << hex << from_insn_location << " with offset=(pc + "
+								     << new_imm19_ext << ")" << endl;
+							}
+						}
+						else
 						{
-							cout << "Relocating a ldr pcrel relocation with orig_addr=" << hex
-							     << (referenced_addr) << " offset=(pc+" << imm19_ext << ")"  << endl;
-							cout << "Based on: " << disasm.getDisassembly() 
-							     << " now located at : 0x" << hex << from_insn_location << " with offset=(pc + "
-							     << new_imm19_ext << ")" << endl;
+							// imm19->64 bit address didn't work.  Split it up into two parts.
+
+							/* the plan :
+							 * FA: b   L0
+							 * FT:
+							 * ..
+							 * L0  adrp dest_reg, <addr-page number>
+							 * L1  ldr dest_reg, [dst_reg, #addr-page offset]
+							 * L2: b ft
+							 */
+							const auto tramp_size=3*4; // 3 insns, 4 bytes each
+							const auto address_to_generate=(imm19_ext<<2)+orig_insn_addr+(int64_t)reloc->GetAddend()+(int64_t)to_addr;
+							const auto destreg=full_insn&mask5;
+							const auto tramp_range=ms.GetFreeRange(tramp_size);
+							const auto tramp_start=tramp_range.GetStart();
+							// don't be too fancy, just reserve 12 bytes.
+							ms.SplitFreeRange({tramp_start,tramp_start+12});
+
+
+							const auto FA=from_insn_location;
+							const auto FT=from_insn_location+4;
+							const auto L0=tramp_start;
+							const auto L1=tramp_start+4;
+							const auto L2=tramp_start+8;
+							const auto branch_bytes=string("\x00\x00\x00\x14",4);
+							// const auto updated_orig_insn_pageno = orig_insn_addr>>12; // orig_insn_pageno was shifted by 0 for adr
+							const auto relocd_insn_pageno  = L1>>12;
+							const auto address_to_generate_pageno = address_to_generate >> 12;
+							const auto address_to_generate_page_offset = address_to_generate & mask12;
+							const auto relocd_imm21_ext = (int64_t)address_to_generate_pageno - (int64_t)relocd_insn_pageno;
+							const auto relocd_immhi19   = relocd_imm21_ext >> 2;
+							const auto relocd_immlo2    = relocd_imm21_ext  & mask2;
+
+							// this should be +/- 4gb, so we shouldn't fail now!
+							assert(((relocd_imm21_ext << 43) >> 43) == relocd_imm21_ext);
+
+							// put an uncond branch at where the adr was.
+							// and make it point at L0
+							ms.PlopBytes(FA,branch_bytes.c_str(),4);
+							zo->ApplyPatch(FA,L0);
+
+							// adrp: 1 imm2lo 1 0000 immhi19 Rd
+							auto adrp_bytes=string("\x00\x00\x00\x90",4);
+							auto adrp_word =*(int*)adrp_bytes.c_str();
+							adrp_word|=destreg<<0;
+							adrp_word |=  ((relocd_immlo2&mask2) << 29) | ((relocd_immhi19&mask19)<<5);
+							ms.PlopBytes(L0,(char*)&adrp_word,4);
+
+							if(is_ldr_int_type)
+							{
+								// convert: ldr w/x reg : 0 x1 011 0 00 ---imm19---- Rt5    x1 indicate size (0,1 -> w/x) 
+								// to     : ldr x/w reg : 1 x1 111 0 01 01 imm12 Rn5 Rt5    x1 indciates szie (0,1 -> w/x)
+								auto new_ldr_bytes=string("\x00\x00\x40\xb9",4);
+								auto new_ldr_word =*(int*)new_ldr_bytes.c_str();
+								const auto orig_ldr_size_bit=(full_insn>>30)&mask1;
+								const auto scale=0x2|orig_ldr_size_bit;
+								const auto scaled_page_offset=(address_to_generate_page_offset>>scale) ;
+								new_ldr_word|=destreg<<0; // Rt
+								new_ldr_word|=destreg<<5; // Rn
+								new_ldr_word|=scaled_page_offset << 10 ; // imm12
+								new_ldr_word|=orig_ldr_size_bit << 30; // x1
+								ms.PlopBytes(L1,(char*)&new_ldr_word,4);
+							}
+							else if(is_ldr_fp_type)
+							{
+								// convert: ldr   s/d/q reg: opc2  01 11 00 imm19 Rt5, opc2 indicate size (00,01,10 -> s/d/q)
+								// to:      ldr b/s/d/q reg: size2 11 11 01 opc2 imm12 Rn Rt
+								auto new_ldr_bytes=string("\x00\x00\x00\x3d",4);
+								auto new_ldr_word =*(int*)new_ldr_bytes.c_str();
+								const auto orig_ldr_opc_bits=(full_insn>>30)&mask2;
+
+								// decode size out of old ldr
+								const auto ldr_size= 
+									orig_ldr_opc_bits == 0x0 ? 4u  :
+									orig_ldr_opc_bits == 0x1 ? 8u  :
+									orig_ldr_opc_bits == 0x2 ? 16u :
+									throw invalid_argument("cannot decode ldr floating-point access size");
+
+								// encode size field for new ldr.
+								const auto new_ldr_size_bits=
+									ldr_size == 4  ? 0x2u :
+									ldr_size == 8  ? 0x3u :
+									ldr_size == 16 ? 0x0u :
+									throw invalid_argument("cannot decode ldr floating-point access size");
+
+								// encode opc2
+								const auto new_ldr_opc2_bits=
+									ldr_size == 4  ? 0x1u :
+									ldr_size == 8  ? 0x1u :
+									ldr_size == 16 ? 0x3u :
+									throw invalid_argument("cannot decode ldr floating-point access size");
+
+								// add variable fields to new insn
+								new_ldr_word|=destreg<<0; // Rt
+								new_ldr_word|=destreg<<5; // Rn
+								new_ldr_word|=((address_to_generate_page_offset/ldr_size) << 10); // imm12
+								new_ldr_word|=(new_ldr_size_bits<<30); // size2
+								new_ldr_word|=(new_ldr_opc2_bits<<22); // opc2
+
+								ms.PlopBytes(L1,(char*)&new_ldr_word,4);
+							}
+							else
+								assert(0);
+
+							// put an uncond branch the end of the trampoline
+							// and make it jump at FT
+							ms.PlopBytes(L2,branch_bytes.c_str(),4);
+							zo->ApplyPatch(L2,FT);
+
+							// should be few enough of these to always print
+							cout<< "Had to trampoline " << disasm.getDisassembly() << "@"<<FA<<" to "
+							    << hex << L0 << "-" << L0+tramp_size-1 << endl;
 						}
 
 					}
 					else
 						assert(0);
 
-
-
 				}
-
 			}
 			// instruction has a absolute  memory operand that needs it's displacement updated.
 			else if(reloc->GetType()==string("absoluteptr_to_scoop"))
-- 
GitLab