From ae4f4cb6fd9ea7d5296b7d6fa920be578ccc9229 Mon Sep 17 00:00:00 2001
From: Jason Hiser <jdhiser@gmail.com>
Date: Mon, 31 Dec 2018 12:35:46 -0500
Subject: [PATCH] code cleanups, updates to arm patcher to support tramoplining
 short-immediate instructions to out-of-band locations

---
 include/patcher/patcher_arm64.hpp |   2 +
 src/memory_space.cpp              |  43 ++++--------
 src/patcher_arm64.cpp             | 106 ++++++++++++++++++++++------
 src/zipr.cpp                      | 111 ++++++------------------------
 4 files changed, 121 insertions(+), 141 deletions(-)

diff --git a/include/patcher/patcher_arm64.hpp b/include/patcher/patcher_arm64.hpp
index d56705ecb..4afad8022 100644
--- a/include/patcher/patcher_arm64.hpp
+++ b/include/patcher/patcher_arm64.hpp
@@ -40,6 +40,8 @@ class ZiprPatcherARM64_t : public ZiprPatcherBase_t
         libIRDB::FileIR_t* m_firp;
         Zipr_SDK::MemorySpace_t &memory_space;
 
+	std::map<RangeAddress_t, RangeAddress_t> redirect_map;
+
 
 	public:
 
diff --git a/src/memory_space.cpp b/src/memory_space.cpp
index 1f2d58e94..e4189b9cc 100644
--- a/src/memory_space.cpp
+++ b/src/memory_space.cpp
@@ -41,7 +41,7 @@ void ZiprMemorySpace_t::SplitFreeRange(Range_t split_from)
 	RangeAddress_t counter, end;
 	for (counter = split_from.GetStart(), end = split_from.GetEnd();
 	     counter!=end;
-			 counter++)
+	     counter++)
 	{
 		SplitFreeRange(counter);
 	}
@@ -49,10 +49,10 @@ void ZiprMemorySpace_t::SplitFreeRange(Range_t split_from)
 
 void ZiprMemorySpace_t::SplitFreeRange(RangeAddress_t addr)
 {
-	RangeSet_t::iterator it=FindFreeRange(addr);
+	const auto it=FindFreeRange(addr);
 	assert(IsValidRange(it));
 
-	Range_t r=*it;
+	const auto r=*it;
 	if(r.GetStart()==r.GetEnd())
 	{
 		assert(addr==r.GetEnd());
@@ -80,7 +80,7 @@ void ZiprMemorySpace_t::MergeFreeRange(Range_t range)
 	RangeAddress_t counter, end;
 	for (counter = range.GetStart(), end = range.GetEnd();
 	     counter!=end;
-			 counter++)
+	     counter++)
 	{
 		MergeFreeRange(counter);
 	}
@@ -178,18 +178,15 @@ void ZiprMemorySpace_t::MergeFreeRange(RangeAddress_t addr)
 
 void ZiprMemorySpace_t::PrintMemorySpace(std::ostream &out)
 {
-	for( RangeSet_t::iterator it=free_ranges.begin();
-		it!=free_ranges.end();
-		++it)
+	for(auto r : free_ranges)
 	{
-		Range_t r = *it;
 		out <<"0x"<<std::hex<<r.GetStart()<<" - 0x"<<std::hex<<r.GetEnd()<<endl;
 	}
 }
 
 RangeSet_t::iterator ZiprMemorySpace_t::FindFreeRange(RangeAddress_t addr)
 {
-	RangeSet_t::iterator freer = free_ranges.find(Range_t(addr, addr)); 
+	auto freer = free_ranges.find(Range_t(addr, addr)); 
 	return freer;
 }
 
@@ -201,8 +198,8 @@ bool ZiprMemorySpace_t::IsValidRange(RangeSet_t::iterator it)
 std::pair<RangeSet_t::const_iterator,RangeSet_t::const_iterator>
 	ZiprMemorySpace_t::GetNearbyFreeRanges(const RangeAddress_t hint,size_t count)
 {
-	Range_t search(hint, hint+1);
-	RangeSet_t::const_iterator result = free_ranges.lower_bound(search);
+	const auto search=Range_t(hint, hint+1);
+	const auto result = free_ranges.lower_bound(search);
 	/*
 	 * TODO: Not quite sure what to make of this.
 	 */
@@ -212,11 +209,8 @@ std::pair<RangeSet_t::const_iterator,RangeSet_t::const_iterator>
 
 Range_t ZiprMemorySpace_t::GetLargeRange(void)
 {
-	for( RangeSet_t::iterator it=free_ranges.begin();
-		it!=free_ranges.end();
-		++it)
+	for(auto r : free_ranges)
 	{
-		Range_t r=*it;
 		if(r.GetEnd()==(RangeAddress_t)-1)
 			return r;
 	}
@@ -230,12 +224,9 @@ bool ZiprMemorySpace_t::SortRangeBySize(const Range_t &a, const Range_t &b)
 
 std::list<Range_t> ZiprMemorySpace_t::GetFreeRanges(size_t size)
 {
-	list<Range_t> result;
-	for( RangeSet_t::iterator it=free_ranges.begin();
-		it!=free_ranges.end();
-		++it)
+	auto result=list<Range_t>();
+	for(auto r : free_ranges)
 	{
-		Range_t r=*it;
 		if(r.GetEnd() - r.GetStart() >= (unsigned) size)
 			result.push_back(r);
 	}
@@ -245,13 +236,8 @@ std::list<Range_t> ZiprMemorySpace_t::GetFreeRanges(size_t size)
 
 Range_t ZiprMemorySpace_t::GetInfiniteFreeRange()
 {
-	vector<Range_t> v;
-	Range_t big_range;
-	for( RangeSet_t::iterator it=free_ranges.begin();
-		it!=free_ranges.end();
-		++it)
+	for(auto r : free_ranges)
 	{
-		Range_t r=*it;
 		if(r.GetEnd()==(RangeAddress_t)-1)
 			return r;
 	}
@@ -263,11 +249,8 @@ Range_t ZiprMemorySpace_t::GetFreeRange(int size)
 {
 	vector<Range_t> v;
 	Range_t big_range;
-	for( RangeSet_t::iterator it=free_ranges.begin();
-		it!=free_ranges.end();
-		++it)
+	for(auto r : free_ranges)
 	{
-		Range_t r=*it;
 		if(r.GetEnd()==(RangeAddress_t)-1)
 			big_range=r;
 		else if(r.GetEnd() - r.GetStart() >= (unsigned) size)
diff --git a/src/patcher_arm64.cpp b/src/patcher_arm64.cpp
index faad6ad7c..477789b09 100644
--- a/src/patcher_arm64.cpp
+++ b/src/patcher_arm64.cpp
@@ -72,6 +72,7 @@ void ZiprPatcherARM64_t::ApplyNopToPatch(RangeAddress_t addr)
 
 void ZiprPatcherARM64_t::ApplyPatch(RangeAddress_t from_addr, RangeAddress_t to_addr)
 { 
+
         const auto first_byte =(uint8_t)memory_space[from_addr+3];
         const auto second_byte=(uint8_t)memory_space[from_addr+2];
         const auto third_byte =(uint8_t)memory_space[from_addr+1];
@@ -112,35 +113,100 @@ void ZiprPatcherARM64_t::ApplyPatch(RangeAddress_t from_addr, RangeAddress_t to_
 
 	if(is_uncond_branch || is_uncond_branch_and_link)
 	{
+		// cout<<"Applying uncond branch patch from "<<hex<<from_addr<<" to "<<to_addr<<endl;
 		const auto non_imm_bits=32U-26U;	// 32 bits, imm26
 		// assert there's no overflow.
 		assert((uint64_t)(new_offset << non_imm_bits) == ((uint64_t)new_offset) << non_imm_bits);
 		// or in opcode for first byte.  set remaining bytes.
-		const auto trimmed_offset=new_offset & ((1<<26)-1);
-		const auto new_first_byte =               (trimmed_offset>> 0)&0xff;
-		const auto new_second_byte=               (trimmed_offset>> 8)&0xff;
-		const auto new_third_byte =               (trimmed_offset>>16)&0xff;
-		const auto new_fourth_byte=(opcode<<2) | ((trimmed_offset>>24)&0xff);
-		//cout<<"ARM64::Patching "<<hex<<from_addr+0<<" val="<<new_first_byte <<endl;
-		//cout<<"ARM64::Patching "<<hex<<from_addr+1<<" val="<<new_second_byte<<endl;
-		//cout<<"ARM64::Patching "<<hex<<from_addr+2<<" val="<<new_third_byte <<endl;
-		//cout<<"ARM64::Patching "<<hex<<from_addr+3<<" val="<<new_fourth_byte<<endl;
-		memory_space[from_addr+0]=new_first_byte;             
-		memory_space[from_addr+1]=new_second_byte;
-		memory_space[from_addr+2]=new_third_byte;
-		memory_space[from_addr+3]=new_fourth_byte;
+		const auto mask26=((1<<26)-1);
+		const auto trimmed_offset=new_offset & mask26;
+		memory_space[from_addr+0]=               (trimmed_offset>> 0)&0xff;
+		memory_space[from_addr+1]=               (trimmed_offset>> 8)&0xff;
+		memory_space[from_addr+2]=               (trimmed_offset>>16)&0xff;
+		memory_space[from_addr+3]=(opcode<<2) | ((trimmed_offset>>24)&0xff);
 	}
 	else if (is_branch_cond || is_compare_and_branch)
 	{
 		const auto non_mask_bits=32U-19;	// 32 bits, imm19
 		const auto mask19=(1<<19U)-1;
-		assert((uint64_t)(new_offset << non_mask_bits) == ((uint64_t)new_offset) << non_mask_bits);
-		const auto full_word_clean=full_word & ~(mask19<<5);
-		const auto full_word_new_offset=full_word_clean | ((new_offset&mask19)<<5);
-		memory_space[from_addr+0]=(full_word_new_offset>> 0)&0xff;
-		memory_space[from_addr+1]=(full_word_new_offset>> 8)&0xff;
-		memory_space[from_addr+2]=(full_word_new_offset>>16)&0xff;
-		memory_space[from_addr+3]=(full_word_new_offset>>24)&0xff;
+		if((uint64_t)(new_offset << non_mask_bits) == ((uint64_t)new_offset) << non_mask_bits)
+		{
+			// the branch offset works here!
+			const auto full_word_clean=full_word & ~(mask19<<5);
+			const auto full_word_new_offset=full_word_clean | ((new_offset&mask19)<<5);
+			memory_space[from_addr+0]=(full_word_new_offset>> 0)&0xff;
+			memory_space[from_addr+1]=(full_word_new_offset>> 8)&0xff;
+			memory_space[from_addr+2]=(full_word_new_offset>>16)&0xff;
+			memory_space[from_addr+3]=(full_word_new_offset>>24)&0xff;
+		}
+		else
+		{
+			// branch offset didn't work.
+			// hopefully we can get there with a direct branch.
+			/* the plan when the branch offset doesn't fit:
+			 * FA: b   L0
+			 * FT: 
+			 * ..
+			 * L0  b<cond> <args>, L2 # at tramp_start
+			 * L1  b FT
+			 * L2: b <target>
+			 */
+			const auto tramp_size=12;
+
+			// check to see if we already had to trampoline from_addr.  If so, 
+			// patch the trampoline, not the actual redirect.
+			auto tramp_start=RangeAddress_t(0);
+			const auto redirect_it=redirect_map.find(from_addr);
+			if(redirect_it==redirect_map.end())
+			{
+				// allocate new space in memory
+				const auto tramp_range=memory_space.GetFreeRange(tramp_size);
+				tramp_start=tramp_range.GetStart();
+				// don't be too fancy, just reserve 12 bytes.
+				memory_space.SplitFreeRange({tramp_start,tramp_start+tramp_size});
+				// record that we had to trampoline this!
+				redirect_map[from_addr]=tramp_start;
+			}
+			else
+			{
+				// use previous tramp space.
+				tramp_start=redirect_it->second;
+			}
+
+			const auto FA=from_addr;
+			const auto FT=from_addr+4;
+			const auto L0=tramp_start;
+			const auto L1=tramp_start+4;
+			const auto L2=tramp_start+8;
+			const auto branch_bytes=string("\x00\x00\x00\x014",4);
+
+			// put the cond branch in the trampline, make it jump to L2
+			memory_space[L0+0]=memory_space[FA+0];
+			memory_space[L0+1]=memory_space[FA+1];
+			memory_space[L0+2]=memory_space[FA+2];
+			memory_space[L0+3]=memory_space[FA+3];
+			ApplyPatch(L0,L2);
+
+			// now make the original location jump to the trampoline
+			memory_space.PlopBytes(FA, branch_bytes.c_str(), 4);
+			ApplyPatch(FA,L0);// make it jump to FT
+			
+			// now drop down a uncond jump for L1, and make it go to FT
+			// (i.e., jump around the jump to the target)
+			memory_space.PlopBytes(L1, branch_bytes.c_str(), 4);
+			ApplyPatch(L1,FT);// make it jump to FT
+
+			// lastly, put down the uncond jump at L2, and make it go to the target
+			memory_space.PlopBytes(L2, branch_bytes.c_str(), 4);
+			ApplyPatch(L2,to_addr);// make it jump to +8
+
+			const auto disasm_str=DecodedInstruction_t(from_addr, (const void*)&full_word, 4).getDisassembly();
+
+			cout << "Had to trampline "<<disasm_str<< " at "<<hex<<from_addr
+			     << " to " << L0 << " - " << L0+tramp_size<< " for target "<<to_addr<<endl;
+
+
+		}
 	}
 	else if (is_test_and_branch)
 	{
diff --git a/src/zipr.cpp b/src/zipr.cpp
index 6cbceeb6a..3968a7b9b 100644
--- a/src/zipr.cpp
+++ b/src/zipr.cpp
@@ -780,15 +780,16 @@ void ZiprImpl_t::WriteDollops()
 
 			// sanity check that we didn't go passed the worst case size we calculate for this entry
 			const auto de_start_loc = entry_to_write->Place();
-			const auto should_end_before = de_start_loc + DetermineDollopEntrySize(entry_to_write, false);
-			assert(de_end_loc <= should_end_before);
+			const auto should_end_at = de_start_loc + DetermineDollopEntrySize(entry_to_write, false);
+			assert(de_end_loc == should_end_at);
 			/*
 			 * Build up a list of those dollop entries that we have
 			 * just written that have a target. See comment above 
 			 * ReplopDollopEntriesWithTargets() for the reason that
 			 * we have to do this.
 			 */
-			if (entry_to_write->TargetDollop())
+			const auto will_replop=entry_to_write->TargetDollop()!=nullptr;
+			if (will_replop)
 				m_des_to_replop.push_back(entry_to_write);
 		}
 	}
@@ -1738,16 +1739,13 @@ RangeAddress_t ZiprImpl_t::_PlopDollopEntry(DollopEntry_t *entry, RangeAddress_t
 	const auto insn = entry->Instruction();
 	const auto insn_wcis = DetermineInsnSize(insn, false);
 	RangeAddress_t updated_addr = 0;
-	auto placed_address = entry->Place();
 	RangeAddress_t target_address = 0;
 	auto placed_insn = false;
-
-	if (entry->TargetDollop() && entry->TargetDollop()->front())
+	const auto target_dollop=entry->TargetDollop();
+	if (target_dollop && target_dollop->front())
 	{
-		auto target_address_iter = final_insn_locations.find(entry->
-		                                                     TargetDollop()->
-		                                                     front()->
-		                                                     Instruction());
+		const auto entry_target_head_insn=entry-> TargetDollop()-> front()-> Instruction();
+		const auto target_address_iter = final_insn_locations.find(entry_target_head_insn);
 		if (target_address_iter != final_insn_locations.end())
 		{
 			target_address = target_address_iter->second;
@@ -1758,32 +1756,26 @@ RangeAddress_t ZiprImpl_t::_PlopDollopEntry(DollopEntry_t *entry, RangeAddress_t
 	}	
 
 
-	if (override_address != 0)
-		placed_address = override_address;
-	
+	auto placed_address = override_address == 0 ? entry->Place() : override_address;
 	const auto plop_it = plopping_plugins.find(insn);
 	if (plop_it != plopping_plugins.end())
 	{
 		for (auto pp : *(plop_it->second))
 		{
 			auto pp_placed_insn = false;
-			DLFunctionHandle_t handle = pp;
-			auto zpi = dynamic_cast<ZiprPluginInterface_t*>(handle);
-			updated_addr = std::max(zpi->PlopDollopEntry(entry,
-			                                             placed_address,
-			                                             target_address,
-		 	                                             insn_wcis,
-		 	                                             pp_placed_insn
-								    ),
-			                        updated_addr
-			                       );
+			const auto handle = pp;
+			const auto zpi = dynamic_cast<ZiprPluginInterface_t*>(handle);
+			const auto plugin_ret=zpi->PlopDollopEntry(entry, placed_address, target_address, insn_wcis, pp_placed_insn);
+			updated_addr = std::max(plugin_ret, updated_addr);
 			if (m_verbose)
+			{
 				cout << zpi->ToString() << " placed entry " 
 				     << std::hex << entry 
-						 << " at address: " << std::hex << placed_address 
-						 << " " << (pp_placed_insn ? "and placed" : "but did not place")
-						 << " the instruction."
-						 << endl;
+				     << " at address: " << std::hex << placed_address 
+				     << " " << (pp_placed_insn ? "and placed" : "but did not place")
+				     << " the instruction."
+				     << endl;
+			}
 		
 			placed_insn |= pp_placed_insn;
 		}
@@ -1830,69 +1822,6 @@ RangeAddress_t ZiprImpl_t::PlopDollopEntry(
 	string raw_data = insn->GetDataBits();
 	string orig_data = insn->GetDataBits();
 
-#if 0
-	/* functionality moved to unpin plugin. */
-
-	const auto operands=d.getOperands();
-	const auto is_instr_relative_it = find_if(ALLOF(operands),[](const DecodedOperand_t& op)
-	                                          { return op.isMemory() && op.isPcrel(); });
-
-	const auto is_instr_relative = is_instr_relative_it != operands.end(); 
-
-	if (is_instr_relative) 
-	{
-		uint32_t abs_displacement=0;
-		uint32_t *displacement=0;
-		char instr_raw[20] = {0,};
-		int size=0;
-		int offset=0;
-		assert(raw_data.length() <= 20);
-
-		/*
-		 * Which argument is relative? There must be one.
-		 */
-		auto relative_arg=*is_instr_relative_it;
-
-		/*
-		 * Calculate the offset into the instruction
-		 * of the displacement address.
-		 */
-		offset = d.getMemoryDisplacementOffset(relative_arg, insn); 
-
-		/*
-		 * The size of the displacement address must be
-		 * four at this point.
-		 */
-		size = relative_arg.getMemoryDisplacementEncodingSize(); 
-		assert(size == 4);
-
-		/*
-		 * Copy the instruction raw bytes to a place
-		 * where we can modify them.
-		 */
-		memcpy(instr_raw,raw_data.c_str(),raw_data.length());
-
-		/*
-		 * Calculate absolute displacement and relative
-		 * displacement.
-		 */
-		displacement = (uint32_t*)(&instr_raw[offset]);
-		abs_displacement = *displacement;
-		*displacement = abs_displacement - addr;
-
-		if(m_verbose)
-		{
-			cout<<"absolute displacement: "<< hex << abs_displacement<<endl;
-			cout<<"relative displacement: "<< hex << *displacement<<endl;
-		}
-
-		/*
-		 * Update the instruction with the relative displacement.
-		 */
-		raw_data.replace(0, raw_data.length(), instr_raw, raw_data.length());
-		insn->SetDataBits(raw_data);
-	}
-#endif
 
 	if(entry->TargetDollop() && entry->Instruction()->GetCallback()=="")
 	{
@@ -2722,7 +2651,7 @@ void ZiprImpl_t::UpdateScoops()
 			scoop->GetEnd()->SetVirtualOffset(frit->GetStart());
 		}
 
-		for(virtual_offset_t i=scoop->GetStart()->GetVirtualOffset();
+		for(auto i=scoop->GetStart()->GetVirtualOffset();
 		    i<= scoop->GetEnd()->GetVirtualOffset();
 		    i++ )
 		{
-- 
GitLab