From ef66c8c98ee8e67095e0f5f1566d9e89da5f49e0 Mon Sep 17 00:00:00 2001
From: Jason Hiser <jdhiser@gmail.com>
Date: Tue, 3 Dec 2019 20:31:13 -0500
Subject: [PATCH] added mips delay slot support

---
 include/arch/arch_base.hpp   |  7 +++
 include/arch/arch_mips32.hpp |  3 ++
 src/SConscript               |  1 +
 src/arch_base.cpp            | 31 ++++++++++++-
 src/arch_mips32.cpp          | 87 ++++++++++++++++++++++++++++++++++++
 src/patcher_mips32.cpp       | 70 ++++++++++++++++++++---------
 src/sizer_mips32.cpp         | 20 ++++++---
 src/zipr.cpp                 | 25 +++++++----
 8 files changed, 205 insertions(+), 39 deletions(-)
 create mode 100644 src/arch_mips32.cpp

diff --git a/include/arch/arch_base.hpp b/include/arch/arch_base.hpp
index 7c2949081..f84f5a7d8 100644
--- a/include/arch/arch_base.hpp
+++ b/include/arch/arch_base.hpp
@@ -1,18 +1,25 @@
 #ifndef ARCHBASE_HPP
 #define ARCHBASE_HPP
 
+
 class ZiprArchitectureHelperBase_t
 {
 	private:
 		const unique_ptr<ZiprPinnerBase_t > m_pinner ;
 		const unique_ptr<ZiprPatcherBase_t> m_patcher;
 		const unique_ptr<ZiprSizerBase_t  > m_sizer  ;
+
 		ZiprArchitectureHelperBase_t()=delete;
+
 	protected:
+		Zipr_t*                             m_zipr   ;
 		ZiprArchitectureHelperBase_t(Zipr_SDK::Zipr_t* p_zipr_obj);
+
 	public:
 		virtual IRDB_SDK::Instruction_t* createNewJumpInstruction(IRDB_SDK::FileIR_t *p_firp, IRDB_SDK::Instruction_t* p_existing)=0;
 		virtual IRDB_SDK::Instruction_t* createNewHaltInstruction(IRDB_SDK::FileIR_t *p_firp, IRDB_SDK::Instruction_t* p_existing)=0;
+		virtual RangeAddress_t           splitDollop             (IRDB_SDK::FileIR_t* firp, Zipr_SDK::Dollop_t* to_split, const RangeAddress_t p_cur_addr);
+
 		static std::unique_ptr<ZiprArchitectureHelperBase_t> factory(Zipr_SDK::Zipr_t* zipr_obj);
 
 		ZiprPinnerBase_t * getPinner () const { return m_pinner .get(); }
diff --git a/include/arch/arch_mips32.hpp b/include/arch/arch_mips32.hpp
index 5746425db..201673aff 100644
--- a/include/arch/arch_mips32.hpp
+++ b/include/arch/arch_mips32.hpp
@@ -29,5 +29,8 @@ class ZiprArchitectureHelperMIPS32_t : public ZiprArchitectureHelperBase_t
 			assert(d->valid());
 			return ret;
 		}
+
+		virtual RangeAddress_t splitDollop(FileIR_t* p_firp, Zipr_SDK::Dollop_t* to_split, const RangeAddress_t p_cur_addr);
+
 };
 #endif
diff --git a/src/SConscript b/src/SConscript
index ae89b03c2..5a5c4c88b 100644
--- a/src/SConscript
+++ b/src/SConscript
@@ -26,6 +26,7 @@ files=  '''
 	pewrite.cpp
 	ehwrite.cpp
 	arch_base.cpp
+	arch_mips32.cpp
 	pinner_arm64.cpp  
 	pinner_arm32.cpp  
 	pinner_mips32.cpp  
diff --git a/src/arch_base.cpp b/src/arch_base.cpp
index 0d2a34906..282624015 100644
--- a/src/arch_base.cpp
+++ b/src/arch_base.cpp
@@ -18,7 +18,8 @@ using namespace zipr;
 ZiprArchitectureHelperBase_t::ZiprArchitectureHelperBase_t(Zipr_SDK::Zipr_t* p_zipr_obj) :
 	m_pinner (ZiprPinnerBase_t ::factory(p_zipr_obj)),
 	m_patcher(ZiprPatcherBase_t::factory(p_zipr_obj)),
-	m_sizer  (ZiprSizerBase_t  ::factory(p_zipr_obj)) 
+	m_sizer  (ZiprSizerBase_t  ::factory(p_zipr_obj)),
+	m_zipr   (p_zipr_obj)
 {
 }
 
@@ -35,3 +36,31 @@ unique_ptr<ZiprArchitectureHelperBase_t> ZiprArchitectureHelperBase_t::factory(Z
 
 	return unique_ptr<ZiprArchitectureHelperBase_t>(ret);
 }
+
+/*
+ * Default way to split a dollop is to create a jump instruction, push it on the dollop, then ask plugins if they want anything to do with it
+ */
+RangeAddress_t ZiprArchitectureHelperBase_t::splitDollop(FileIR_t* p_firp, Zipr_SDK::Dollop_t* to_split, const RangeAddress_t p_cur_addr)
+{
+	auto zipr_impl   = dynamic_cast<ZiprImpl_t*>(m_zipr);
+	auto cur_addr    = p_cur_addr;
+	auto fallthrough = to_split->getFallthroughDollop();
+	auto patch       = createNewJumpInstruction(p_firp, nullptr);
+	auto patch_de    = new DollopEntry_t(patch, to_split);
+
+	patch_de->setTargetDollop(fallthrough);
+	patch_de->Place(cur_addr);
+	cur_addr += m_zipr->determineDollopEntrySize(patch_de, false);
+
+	to_split->push_back(patch_de);
+	to_split->setFallthroughPatched(true);
+
+	m_zipr->getPlacementQueue()->insert({fallthrough, cur_addr});
+	/*
+	 * Since we inserted a new instruction, we should
+	 * check to see whether a plugin wants to plop it.
+	 */
+	zipr_impl->AskPluginsAboutPlopping(patch_de->getInstruction());
+
+	return cur_addr;
+}
diff --git a/src/arch_mips32.cpp b/src/arch_mips32.cpp
new file mode 100644
index 000000000..caa78ba5e
--- /dev/null
+++ b/src/arch_mips32.cpp
@@ -0,0 +1,87 @@
+#include <zipr_all.h>
+
+namespace zipr
+{
+#include <arch/arch_mips32.hpp>
+}
+#include <memory>
+
+using namespace std;
+using namespace IRDB_SDK;
+using namespace zipr;
+
+#define ALLOF(a) begin(a),end(a)
+
+
+
+/*
+ * Default way to split a dollop is to create a jump instruction, push it on the dollop, then ask plugins if they want anything to do with it
+ */
+RangeAddress_t ZiprArchitectureHelperMIPS32_t::splitDollop(FileIR_t* p_firp, Zipr_SDK::Dollop_t* to_split, const RangeAddress_t p_cur_addr)
+{
+	assert(!to_split->empty());
+	const auto last_de             = *to_split->rbegin(); // end of DollopEntryList 
+	const auto last_insn           = last_de->getInstruction();
+	const auto last_insn_is_branch = DecodedInstruction_t::factory(last_insn)->isBranch();
+	const auto nop_bits            = string("\x00\x00\x00\x00",4);
+        auto zipr_impl                 = dynamic_cast<ZiprImpl_t*>(m_zipr);
+        auto cur_addr                  = p_cur_addr;
+        auto fallthrough               = to_split->getFallthroughDollop();
+
+	const auto add_instruction = [&](Instruction_t* patch) -> DollopEntry_t*
+		{
+			/* need a dollop entry */
+			auto patch_de    = new DollopEntry_t(patch, to_split);
+
+			/* place it */
+			patch_de->Place(cur_addr);
+
+			/* advance cur_addr */
+			cur_addr += m_zipr->determineDollopEntrySize(patch_de, false);
+
+			/* add it to the dollop and placement queue */
+			to_split->push_back(patch_de);
+			m_zipr->getPlacementQueue()->insert({fallthrough, cur_addr});
+
+			/*
+			 * Since we inserted a new instruction, we should
+			 * check to see whether a plugin wants to plop it.
+			 */
+			zipr_impl->AskPluginsAboutPlopping(patch_de->getInstruction());
+
+			return patch_de;
+		};
+
+
+	/* if the dollop ends in some kind of branch we need the delay slot instruction */
+	if(last_insn_is_branch)
+	{
+		const auto is_delay_slot_reloc = [](const Relocation_t* reloc) -> bool  { return reloc->getType() == "delay_slot1"; } ;
+		const auto &last_insn_relocs = last_insn->getRelocations();
+		const auto delay_slot_insn_it = find_if(ALLOF(last_insn_relocs), is_delay_slot_reloc);
+		assert(delay_slot_insn_it != end(last_insn_relocs));
+		const auto delay_slot_insn = dynamic_cast<Instruction_t*>((*delay_slot_insn_it)->getWRT());
+
+		auto new_delay_slot = delay_slot_insn == nullptr ? 
+			IRDB_SDK::addNewDataBits(p_firp, nullptr, nop_bits) :
+			IRDB_SDK::addNewDataBits(p_firp, delay_slot_insn->getTarget(), delay_slot_insn->getDataBits());
+
+		/* add a copy of the delay slot instruction */
+		add_instruction(new_delay_slot);
+	}
+
+	/* add a jump instruction */
+	const auto target_insn = (*fallthrough->begin())->getInstruction()->getFallthrough(); // jumps over delay slot
+	add_instruction(createNewJumpInstruction(p_firp, target_insn))->setTargetDollop(fallthrough);
+
+	auto new_branch_delay_slot = IRDB_SDK::addNewDataBits(p_firp, nullptr, nop_bits);
+
+	/* add a copy of the delay slot instruction */
+	add_instruction(new_branch_delay_slot);
+
+	/* finally, mark that we've patched this dollop to jump to the target */
+	to_split->setFallthroughPatched(true);
+
+	// renew the end of the newly placed instructions.
+        return cur_addr;
+}
diff --git a/src/patcher_mips32.cpp b/src/patcher_mips32.cpp
index 392a57175..211916316 100644
--- a/src/patcher_mips32.cpp
+++ b/src/patcher_mips32.cpp
@@ -64,28 +64,54 @@ void ZiprPatcherMIPS32_t::ApplyNopToPatch(RangeAddress_t addr)
 
 void ZiprPatcherMIPS32_t::ApplyPatch(RangeAddress_t from_addr, RangeAddress_t to_addr)
 { 
-#if 0
-        const auto first_byte  = (uint8_t)memory_space[from_addr+0];
-	assert(first_byte == 0x10); // beq $0
-#endif
-
-        const auto new_offset  = (int32_t)((to_addr) - (from_addr+4)) >> 2;
-
-
-	// Use a branch always.  In mips, this will be a  beq $0, $0, <label> as there is no branch always.
-	// format: 0001 00ss sstt iiii iiii iiii iiii iiii 
-	// ssss=0b0000
-	// tttt=0b0000
-	// i...i = (from_addr-to_addr)>>2
-	cout<<"Applying cond branch patch from "<<hex<<from_addr<<" to "<<to_addr<<endl;
-	const auto non_imm_bits = 16;
-	// assert there's no overflow.
-	assert((int64_t)(new_offset << non_imm_bits) == ((int64_t)new_offset) << non_imm_bits);
-	// or in opcode for first byte.  set remaining bytes.
-	const auto mask16         = ((1<<16)-1);
-	const auto trimmed_offset = new_offset & mask16;
-	memory_space[from_addr+3]  = (trimmed_offset>> 0)&0xff;
-	memory_space[from_addr+2]  = (trimmed_offset>> 8)&0xff;
+	const auto mask6      = 0b111111;
+        const auto first_byte = (uint8_t)memory_space[from_addr+0];
+	const auto top6bits   = (first_byte >> 2) & mask6;
+
+
+	if(
+		top6bits == 0b000100 ||  // beq, 
+		top6bits == 0b000001 ||  // bgez, bgezal, bltz, bltzal
+		top6bits == 0b000111 ||  // bgtz, 
+		top6bits == 0b000110 ||  // blez, 
+		top6bits == 0b000110 ||  // blez, 
+		top6bits == 0b000101     // bne
+		) 
+	{
+		const auto new_offset  = (int32_t)((to_addr) - (from_addr+4)) >> 2;
+		// Use a branch always.  In mips, this will be a  beq $0, $0, <label> as there is no branch always.
+		// format: 0001 00ss sstt iiii iiii iiii iiii iiii 
+		// ssss=0b0000
+		// tttt=0b0000
+		// i...i = (from_addr-to_addr)>>2
+		cout<<"Applying cond branch patch from "<<hex<<from_addr<<" to "<<to_addr<<endl;
+		const auto non_imm_bits = 16;
+		// assert there's no overflow.
+		assert((int64_t)(new_offset << non_imm_bits) == ((int64_t)new_offset) << non_imm_bits);
+		// or in opcode for first byte.  set remaining bytes.
+		const auto mask16         = ((1<<16)-1);
+		const auto trimmed_offset = new_offset & mask16;
+		memory_space[from_addr+3]  = (trimmed_offset>> 0)&0xff;
+		memory_space[from_addr+2]  = (trimmed_offset>> 8)&0xff;
+	}
+	else if(top6bits == 0b00010) /* j and jal */
+	{
+		const auto new_offset  = (int32_t)(to_addr) >> 2;
+		cout<<"Applying uncond jump patch from "<<hex<<from_addr<<" to "<<to_addr<<endl;
+		const auto non_imm_bits = 32-26;
+		// assert there's no overflow.
+		assert((int64_t)(new_offset << non_imm_bits) == ((int64_t)new_offset) << non_imm_bits);
+		// or in opcode for first byte.  set remaining bytes.
+		const auto mask26         = ((1<<26)-1);
+		const auto trimmed_offset = new_offset & mask26;
+		memory_space[from_addr+3]   = (trimmed_offset>> 0)&0b11111111;  /* low 8 bits */
+		memory_space[from_addr+2]   = (trimmed_offset>> 8)&0b11111111;  /* 2nd 8 bits */
+		memory_space[from_addr+1]   = (trimmed_offset>> 16)&0b11111111; /* 3rd 8 bits */
+		memory_space[from_addr+0]  |= (trimmed_offset>> 24)&0b11;       /* last 2 bits of 26 bit address. */
+
+	}
+	else
+		assert(0);
 
 }
 
diff --git a/src/sizer_mips32.cpp b/src/sizer_mips32.cpp
index c29d5fbbd..e71c30dc5 100644
--- a/src/sizer_mips32.cpp
+++ b/src/sizer_mips32.cpp
@@ -9,17 +9,23 @@ namespace zipr
 using namespace zipr ;
 using namespace IRDB_SDK;
 
+#define ALLOF(a) begin(a),end(a)
 
-ZiprSizerMIPS32_t::ZiprSizerMIPS32_t(Zipr_SDK::Zipr_t* p_zipr_obj) : ZiprSizerBase_t(p_zipr_obj,4,4,4,4,4)
+const auto is_delay_slot_reloc = [](const Relocation_t* reloc) -> bool  { return reloc->getType() == "delay_slot1"; } ;
+
+
+ZiprSizerMIPS32_t::ZiprSizerMIPS32_t(Zipr_SDK::Zipr_t* p_zipr_obj) : ZiprSizerBase_t(p_zipr_obj,8,8,8,8,4)
 {
 }
 
 size_t ZiprSizerMIPS32_t::DetermineInsnSize(Instruction_t* insn, bool account_for_jump) const
 {
+	const auto is_branch = DecodedInstruction_t::factory(insn)->isBranch();
 	// need 4 bytes for insn, plus 4 bytes for a jump 
-	const auto size     = 4u;
-	const auto jmp_size = account_for_jump ? 4 : 0;
-	return size + jmp_size;
+	const auto size            = 4u;
+	const auto delay_slot_size = is_branch        ? 4u : 0u;
+	const auto jmp_size        = account_for_jump ? 8u + delay_slot_size : 0u;
+	return size + jmp_size ;
 }
 
 RangeAddress_t ZiprSizerMIPS32_t::PlopDollopEntryWithTarget(
@@ -27,9 +33,9 @@ RangeAddress_t ZiprSizerMIPS32_t::PlopDollopEntryWithTarget(
         RangeAddress_t override_place,
         RangeAddress_t override_target) const
 {
-        const auto addr        = (override_place  == 0) ? entry->getPlace()                 : override_place;
-        const auto target_addr = (override_target == 0) ? entry->getTargetDollop()->getPlace() : override_target;
-	const auto insn        = entry->getInstruction();
+        const auto addr               = (override_place  == 0) ? entry->getPlace()                 : override_place;
+        const auto target_addr        = (override_target == 0) ? entry->getTargetDollop()->getPlace() : override_target;
+	const auto insn               = entry->getInstruction();
 
 	// plop instruction an d make it target the right address.
 	memory_space.PlopBytes(addr, insn->getDataBits().c_str(), 4);
diff --git a/src/zipr.cpp b/src/zipr.cpp
index 5c2d6895f..14bfdc5be 100644
--- a/src/zipr.cpp
+++ b/src/zipr.cpp
@@ -1385,7 +1385,21 @@ void ZiprImpl_t::PlaceDollops()
 				   )
 				{
 
-					string patch_jump_string;
+					if (*m_vverbose)
+					{
+						const auto end_of_cur_dollop_insn  = (*to_place  ->rbegin())->getInstruction();
+						const auto start_of_ft_dollop_insn = (*fallthrough->begin())->getInstruction();
+						cout << "Not coalescing " 
+						     << end_of_cur_dollop_insn->getDisassembly() << "@" << hex << end_of_cur_dollop_insn->getAddress()->getVirtualOffset() 
+						     << " and " 
+						     << start_of_ft_dollop_insn->getDisassembly() << "@" << hex << start_of_ft_dollop_insn->getAddress()->getVirtualOffset() 
+						     << string((fallthrough->isPlaced()) ?  " because fallthrough is placed" : "")
+						     << string((!allowed_coalescing) ?  " because I am not allowed" : "") 
+						     << ".  Add jmp to fallthrough dollop (" << std::hex << fallthrough << ")." << '\n';
+					}
+
+					cur_addr = archhelper->splitDollop(m_firp,to_place, cur_addr);
+#if 0
 					auto patch = archhelper->createNewJumpInstruction(m_firp, nullptr);
 					auto patch_de = new DollopEntry_t(patch, to_place);
 
@@ -1397,14 +1411,6 @@ void ZiprImpl_t::PlaceDollops()
 					to_place->push_back(patch_de);
 					to_place->setFallthroughPatched(true);
 
-					if (*m_vverbose)
-						cout << "Not coalescing"
-						     << string((fallthrough->isPlaced()) ?  " because fallthrough is placed" : "")
-						     << string((!allowed_coalescing) ?  " because I am not allowed" : "")
-						     << "; Added jump (via " << std::hex << patch_de
-						     << " at " << std::hex << patch_de->getPlace() << ") "
-						     << "to fallthrough dollop (" << std::hex 
-						     << fallthrough << ")." << endl;
 
 					placement_queue.insert({fallthrough, cur_addr});
 					/*
@@ -1412,6 +1418,7 @@ void ZiprImpl_t::PlaceDollops()
 					 * check to see whether a plugin wants to plop it.
 					 */
 					AskPluginsAboutPlopping(patch_de->getInstruction());
+#endif
 
 					m_stats->total_did_not_coalesce++;
 
-- 
GitLab