From 8f0c1422c446d0da922a202f239f002e7cc59a78 Mon Sep 17 00:00:00 2001
From: Daniel Chen <dc3pe@virginia.edu>
Date: Sat, 15 Aug 2020 20:27:06 -0400
Subject: [PATCH] Update IRDB API to have insertAssemblyInstructionsBefore (and
 After) which includes an interface for multi-line assembly where it can fill
 in template parameters automatically for the caller.  See
 irdb-cookbook/init_stack.

---
 irdb-libs/SConscript                          |   1 +
 irdb-libs/libIRDB-cfg/src/SConscript          |   4 +-
 irdb-libs/libIRDB-core/include/fileir.hpp     |   3 +-
 irdb-libs/libIRDB-core/src/fileir.cpp         |   7 +-
 irdb-libs/libIRDB-core/src/instruction.cpp    |   3 +-
 irdb-libs/libIRDB-transform/src/SConscript    |   5 +-
 .../libIRDB-transform/src/rewrite_util.cpp    | 139 +++++++++++++++---
 irdb-libs/libIRDB-transform/src/transform.cpp |   8 +
 irdb-libs/libIRDB-util/src/SConscript         |   1 -
 irdb-libs/meds2pdb/SConscript                 |   1 -
 irdb-libs/thanos/SConscript                   |   1 -
 irdb-sdk                                      |   2 +-
 12 files changed, 138 insertions(+), 37 deletions(-)

diff --git a/irdb-libs/SConscript b/irdb-libs/SConscript
index 52e754f67..1cc8a80ff 100644
--- a/irdb-libs/SConscript
+++ b/irdb-libs/SConscript
@@ -68,6 +68,7 @@ libkeystone_path = env.Install("$SECURITY_TRANSFORMS_HOME/lib/", libkeystone_pat
 env.Command(os.environ['SECURITY_TRANSFORMS_HOME']+"/lib/libkeystone.so", libkeystone_path, "ln -s $SOURCE.abspath $TARGET.abspath")
 libkeystone=os.environ['SECURITY_TRANSFORMS_HOME']+"/lib/libkeystone.so"
 
+
 libehp=env.SConscript("libehp/SConscript")
 libehp=env.Install("$SECURITY_TRANSFORMS_HOME/lib", libehp);
 
diff --git a/irdb-libs/libIRDB-cfg/src/SConscript b/irdb-libs/libIRDB-cfg/src/SConscript
index 821622ad6..3740ebca0 100644
--- a/irdb-libs/libIRDB-cfg/src/SConscript
+++ b/irdb-libs/libIRDB-cfg/src/SConscript
@@ -15,8 +15,8 @@ cpppath='''
 	$SECURITY_TRANSFORMS_HOME/include/
 	$SECURITY_TRANSFORMS_HOME/libIRDB-core/include/
 	$SECURITY_TRANSFORMS_HOME/libIRDB-cfg/include/
-        $SECURITY_TRANSFORMS_HOME/third_party/keystone/include/keystone/
- 	'''
+	$SECURITY_TRANSFORMS_HOME/third_party/keystone/include/keystone/
+	'''
 libpath='''
 	$SECURITY_TRANSFORMS_HOME/lib
 	'''
diff --git a/irdb-libs/libIRDB-core/include/fileir.hpp b/irdb-libs/libIRDB-core/include/fileir.hpp
index 52c786a36..033a33694 100644
--- a/irdb-libs/libIRDB-core/include/fileir.hpp
+++ b/irdb-libs/libIRDB-core/include/fileir.hpp
@@ -17,8 +17,7 @@
  * URL   : http://www.zephyr-software.com/
  *
  */
-//#include <keystone/keystone.h>
-#include <keystone.h>
+#include <keystone/keystone.h>
 
 namespace libIRDB
 {
diff --git a/irdb-libs/libIRDB-core/src/fileir.cpp b/irdb-libs/libIRDB-core/src/fileir.cpp
index 0ca30aa02..2b7ee0498 100644
--- a/irdb-libs/libIRDB-core/src/fileir.cpp
+++ b/irdb-libs/libIRDB-core/src/fileir.cpp
@@ -20,7 +20,6 @@
  */
 
 #include <keystone.h>
-//#include <keystone/keystone.h>
 #include <all.hpp>
 #include <irdb-util>
 #include <cstdlib>
@@ -182,9 +181,6 @@ void FileIR_t::changeRegistryKey(IRDB_SDK::Instruction_t *p_orig, IRDB_SDK::Inst
 
 void FileIR_t::assemblestr(ks_engine * &ks, IRDB_SDK::Instruction_t *ins, const char * instruct, char * &encode, size_t &size, size_t &count) 
 {
-	// do ks_asm call here
-        //assert if err is equal to KS_ERR_OK
-        //Check if count = 1
 	if(ks_asm(ks, instruct, 0, (unsigned char **)&encode, &size, &count) != KS_ERR_OK) { //string or cstr
 		ks_free((unsigned char*)encode);
 		ks_close(ks);
@@ -224,6 +220,9 @@ void FileIR_t::assembleRegistry()
 
 	//Build and set assembly string
 	for(auto it : assembly_registry) {
+		// do ks_asm call here
+		//assert if err is equal to KS_ERR_OK
+		//Check if count = 1
 		assemblestr(ks, it.first, it.second.c_str(), encode, size, count);
 	}
 
diff --git a/irdb-libs/libIRDB-core/src/instruction.cpp b/irdb-libs/libIRDB-core/src/instruction.cpp
index d533a551f..44975ae7f 100644
--- a/irdb-libs/libIRDB-core/src/instruction.cpp
+++ b/irdb-libs/libIRDB-core/src/instruction.cpp
@@ -25,7 +25,6 @@
 #include <sstream>
 #include <iomanip>
 #include <irdb-util>
-//#include <keystone/keystone.h>
 #include <keystone.h>
 #include "cmdstr.hpp"
 
@@ -132,7 +131,7 @@ bool Instruction_t::assemble(string assembly)
     	const auto err = ks_open(arch, mode, &ks);
 		assert(err == KS_ERR_OK);        
 
-        ks_option(ks, KS_OPT_SYNTAX, KS_OPT_SYNTAX_NASM);
+	ks_option(ks, KS_OPT_SYNTAX, KS_OPT_SYNTAX_NASM);
 
         FileIR_t::assemblestr(ks, this, assembly.c_str(), encode, size, count);
         return true;
diff --git a/irdb-libs/libIRDB-transform/src/SConscript b/irdb-libs/libIRDB-transform/src/SConscript
index a46c77030..a346bd57b 100644
--- a/irdb-libs/libIRDB-transform/src/SConscript
+++ b/irdb-libs/libIRDB-transform/src/SConscript
@@ -15,12 +15,13 @@ cpppath='''
          $SECURITY_TRANSFORMS_HOME/libIRDB-cfg/include
          $SECURITY_TRANSFORMS_HOME/libIRDB-util/include
          $SECURITY_TRANSFORMS_HOME/libMEDSannotation/include/
-        $SECURITY_TRANSFORMS_HOME/third_party/keystone/include/keystone/
+         $SECURITY_TRANSFORMS_HOME/third_party/capstone/include/capstone/
+         $SECURITY_TRANSFORMS_HOME/third_party/keystone/include/keystone/
         '''
 
 
 LIBPATH="$SECURITY_TRANSFORMS_HOME/lib"
-LIBS=Split("irdb-core irdb-cfg irdb-util MEDSannotation")
+LIBS=Split("irdb-core irdb-cfg irdb-util MEDSannotation capstone keystone")
 
 myenv=myenv.Clone(CPPPATH=Split(cpppath))
 myenv.Append(CXXFLAGS = " -std=c++11 ")
diff --git a/irdb-libs/libIRDB-transform/src/rewrite_util.cpp b/irdb-libs/libIRDB-transform/src/rewrite_util.cpp
index 443236f80..268f57d8b 100644
--- a/irdb-libs/libIRDB-transform/src/rewrite_util.cpp
+++ b/irdb-libs/libIRDB-transform/src/rewrite_util.cpp
@@ -1,6 +1,9 @@
 
 #include <irdb-transform>
 #include <libIRDB-core.hpp>
+#include <keystone.h>
+#include <capstone.h>
+#include <bits/stdc++.h> 
 
 // Copied from PnTransform
 // @todo: create a utility library with the one interface
@@ -13,6 +16,7 @@ void copyInstruction(Instruction_t* src, Instruction_t* dest);
 Instruction_t* copyInstruction(FileIR_t* virp, Instruction_t* instr);
 Instruction_t* allocateNewInstruction(FileIR_t* virp, DatabaseID_t p_fileID,Function_t* func);
 Instruction_t* allocateNewInstruction(FileIR_t* virp, Instruction_t *template_instr);
+vector<string> assemblegroup(string group);
 
 
 
@@ -32,7 +36,6 @@ void setInstructionsDetails(FileIR_t* virp, Instruction_t *p_instr, string p_dat
         real_virp->GetInstructions().insert(p_instr);
 }
 
-
 //For all insertBefore functions:
 //The "first" instruction will have its contents replaced and a duplicate of "first" will be in the follow of first. 
 //This duplicate is returned since the user already has a pointer to first.
@@ -64,18 +67,48 @@ Instruction_t* IRDB_SDK::insertAssemblyBefore(FileIR_t* virp, Instruction_t* fir
 	return next;
 }
 
-#if 0
-Instruction_t* insertAssemblyBefore(FileIR_t* virp, Instruction_t* first, string assembly)
+void strReplace(std::string& str, const std::string& oldStr, const std::string& newStr)
 {
-	return insertAssemblyBefore(virp,first,assembly,NULL);
+  std::string::size_type pos = 0u;
+  while((pos = str.find(oldStr, pos)) != std::string::npos){
+     str.replace(pos, oldStr.length(), newStr);
+     pos += newStr.length();
+  }
 }
 
+vector<Instruction_t*> IRDB_SDK::insertAssemblyInstructionsBefore(FileIR_t* firp, Instruction_t* before, string templateIns, vector<string> templateParams) {
+	auto numParam = templateParams.size();
+	for(auto i = 0u; i < numParam; i++) {
+		strReplace(templateIns, "%%"+to_string(i+1), templateParams[i]);
+	}
+	const auto databits = assemblegroup(templateIns);
+	auto results = vector<Instruction_t*>();
+	const auto size = databits.size();
+	auto curins = before;
+	for(auto i = 0u; i < size; i++) {
+		results.push_back(curins);
+		curins = insertDataBitsBefore(firp, curins, databits[i], NULL);
+	}
+	results.push_back(curins);
+	return results;
+}
 
-Instruction_t* insertDataBitsBefore(FileIR_t* virp, Instruction_t* first, string dataBits)
-{
-        return insertDataBitsBefore(virp,first,dataBits,NULL);
+vector<Instruction_t*> IRDB_SDK::insertAssemblyInstructionsAfter(FileIR_t* firp, Instruction_t* after, string templateIns, vector<string> templateParams) {
+	auto numParam = templateParams.size();
+	for(auto i = 0u; i < numParam; i++) {
+		strReplace(templateIns, "%%"+to_string(i+1), templateParams[i]);
+	}
+	const auto databits = assemblegroup(templateIns);
+	auto results = vector<Instruction_t*>();
+	const auto size = databits.size();
+	results.push_back(after);
+	auto curins = after;
+	for(auto i = 0u; i < size; i++) {
+		curins = insertDataBitsAfter(firp, curins, databits[i], NULL);
+		results.push_back(curins);
+	}
+	return results;
 }
-#endif
 
 Instruction_t* IRDB_SDK::insertDataBitsBefore(FileIR_t* virp, Instruction_t* first, string dataBits, Instruction_t *target)
 {
@@ -111,14 +144,6 @@ Instruction_t* IRDB_SDK::insertAssemblyAfter(FileIR_t* virp, Instruction_t* firs
         return new_instr;
 }
 
-#if 0
-Instruction_t* insertAssemblyAfter(FileIR_t* virp, Instruction_t* first, string assembly)
-{
-        return insertAssemblyAfter(virp,first,assembly,NULL);
-
-}
-#endif
-
 Instruction_t* IRDB_SDK::insertDataBitsAfter(FileIR_t* virp, Instruction_t* first, string dataBits, Instruction_t *target)
 {
 	Instruction_t *new_instr = allocateNewInstruction(virp,first);
@@ -128,12 +153,84 @@ Instruction_t* IRDB_SDK::insertDataBitsAfter(FileIR_t* virp, Instruction_t* firs
         return new_instr;
 }
 
-#if 0
-Instruction_t* insertDataBitsAfter(FileIR_t* virp, Instruction_t* first, string dataBits)
-{
-        return insertDataBitsAfter(virp,first,dataBits,NULL);
+/** This function ssembles a group of instructions, separated by semicolons/newlines, into databits, and returns a vector of assembled instructions, with each item inside the vector being a string that represents each assembled instruction.
+ * Param 1: The group of semicolon/newline delimited assembly instructions to be assembled.
+ * Returns: a vector of assembled instructions, with each item inside the vector being a string that represents each assembled instruction.
+ */
+vector<string> assemblegroup(string group) {
+        const auto bits = FileIR_t::getArchitectureBitWidth();
+        auto *encode = (char *)NULL;
+        auto count = (size_t)0;
+        auto size = (size_t)0;
+
+        const auto mode = (bits == 32) ? KS_MODE_32 : 
+                      (bits == 64) ? KS_MODE_64 :
+                      throw std::invalid_argument("Cannot map IRDB bit size to keystone bit size");
+    
+    const auto machinetype = FileIR_t::getArchitecture()->getMachineType();
+    const auto arch = (machinetype == IRDB_SDK::admtI386 || machinetype == IRDB_SDK::admtX86_64) ? KS_ARCH_X86 :
+                      (machinetype == IRDB_SDK::admtArm32) ? KS_ARCH_ARM :
+                      (machinetype == IRDB_SDK::admtAarch64) ? KS_ARCH_ARM64 : 
+                      (machinetype == IRDB_SDK::admtMips64 || machinetype == IRDB_SDK::admtMips32) ? KS_ARCH_MIPS :
+                      throw std::invalid_argument("Cannot map IRDB architecture to keystone architure");
+    auto ks = (ks_engine *)NULL;
+    auto err = ks_open(arch, mode, &ks);
+    	if(err != KS_ERR_OK) {
+        	throw std::runtime_error("ERROR: ks_open() failure");
+        }
+        ks_option(ks, KS_OPT_SYNTAX, KS_OPT_SYNTAX_NASM);
+        if((err = (ks_err)ks_asm(ks, group.c_str(), 0, (unsigned char **)&encode, &size, &count)) != KS_ERR_OK) { //string or cstr
+                ks_free((unsigned char*)encode);
+                ks_close(ks);
+                throw std::runtime_error("ERROR: ks_asm() failed during instrunction assembly.");
+    }
+        else {
+        	if(!count) {
+        		throw std::runtime_error("ERROR: no instructions assembled");
+        	}
+        	vector<string> assembled;
+        	csh handle;
+        	auto insn = (cs_insn *)NULL;
+        	auto cscount = (size_t)0;
+
+			const auto csmode = (bits == 32) ? CS_MODE_32 : 
+                      (bits == 64) ? CS_MODE_64 :
+                      throw std::invalid_argument("Cannot map IRDB bit size to keystone bit size");
+
+            const auto csarch = (machinetype == IRDB_SDK::admtI386 || machinetype == IRDB_SDK::admtX86_64) ? CS_ARCH_X86 :
+                      (machinetype == IRDB_SDK::admtArm32) ? CS_ARCH_ARM :
+                      (machinetype == IRDB_SDK::admtAarch64) ? CS_ARCH_ARM64 : 
+                      (machinetype == IRDB_SDK::admtMips64 || machinetype == IRDB_SDK::admtMips32) ? CS_ARCH_MIPS :
+                      throw std::invalid_argument("Cannot map IRDB architecture to keystone architure");
+
+            const auto cserr = cs_open(csarch, csmode, &handle);
+            if(cserr != CS_ERR_OK) {
+            	throw std::runtime_error("ERROR: cs_open() failure");
+            }
+            cscount = cs_disasm(handle, (const unsigned char*)encode, size, 0x1000, 0, &insn);
+            if(cscount > 0) {
+            	auto assembleidx = 0;
+            	for(unsigned int i = 0; i < cscount; i++) {
+            		assembled.push_back(string(&encode[assembleidx], insn[i].size));
+            		assembleidx += insn[i].size;
+            	}
+            }
+            else {
+            	cs_free(insn, cscount);
+            	ks_free((unsigned char*)encode);
+            	ks_close(ks);
+            	cs_close(&handle);
+            	throw std::runtime_error("ERROR: cs_disasm() failed during instrunction assembly.");
+            }
+            cs_free(insn, cscount);
+            ks_free((unsigned char*)encode);
+            ks_close(ks);
+            cs_close(&handle);
+            return assembled;
+        }
+
 }
-#endif
+
 
 Instruction_t* IRDB_SDK::addNewDataBits(FileIR_t* firp, Instruction_t *p_instr, string p_bits)
 {
diff --git a/irdb-libs/libIRDB-transform/src/transform.cpp b/irdb-libs/libIRDB-transform/src/transform.cpp
index 7b4217b50..0e8a1f974 100644
--- a/irdb-libs/libIRDB-transform/src/transform.cpp
+++ b/irdb-libs/libIRDB-transform/src/transform.cpp
@@ -60,6 +60,14 @@ Instruction_t* Transform_t::insertDataBitsAfter(Instruction_t* before, const str
 	return IRDB_SDK::insertDataBitsAfter(getFileIR(), before, the_asm, target);
 }
 
+vector<Instruction_t*> Transform_t::insertAssemblyInstructionsBefore(Instruction_t* before, string templateIns, vector<string> templateParams) {
+	return IRDB_SDK::insertAssemblyInstructionsBefore(getFileIR(), before, templateIns, templateParams);
+}
+
+vector<Instruction_t*> Transform_t::insertAssemblyInstructionsAfter(Instruction_t* after, string templateIns, vector<string> templateParams) {
+        return IRDB_SDK::insertAssemblyInstructionsAfter(getFileIR(), after, templateIns, templateParams);
+}
+
 Instruction_t* Transform_t::addNewDataBits(const string& p_bits)
 {
 	return IRDB_SDK::addNewDataBits(getFileIR(), p_bits);
diff --git a/irdb-libs/libIRDB-util/src/SConscript b/irdb-libs/libIRDB-util/src/SConscript
index 92ebc0c1f..fee25b597 100644
--- a/irdb-libs/libIRDB-util/src/SConscript
+++ b/irdb-libs/libIRDB-util/src/SConscript
@@ -17,7 +17,6 @@ cpppath='''
 	$SECURITY_TRANSFORMS_HOME/include/
 	$SECURITY_TRANSFORMS_HOME/libIRDB-core/include/
 	$SECURITY_TRANSFORMS_HOME/libIRDB-util/include/
-        $SECURITY_TRANSFORMS_HOME/third_party/keystone/include/keystone/
 	'''
 
 #myenv.Append(CCFLAGS=" -Wall -W -Wextra -Wconversion ")
diff --git a/irdb-libs/meds2pdb/SConscript b/irdb-libs/meds2pdb/SConscript
index b795c0f23..bde4f83a3 100644
--- a/irdb-libs/meds2pdb/SConscript
+++ b/irdb-libs/meds2pdb/SConscript
@@ -12,7 +12,6 @@ cpppath='''
 	 $SECURITY_TRANSFORMS_HOME/libIRDB-core/include 
 	 $SECURITY_TRANSFORMS_HOME/libEXEIO/include 
 	 $SECURITY_TRANSFORMS_HOME/libMEDSannotation/include 
-        $SECURITY_TRANSFORMS_HOME/third_party/keystone/include/keystone/
 	'''
 
 files=Glob( Dir('.').srcnode().abspath+"/*.cpp") + Glob(Dir('.').srcnode().abspath+"/*.c")
diff --git a/irdb-libs/thanos/SConscript b/irdb-libs/thanos/SConscript
index f137e4ebb..75ff75ee9 100644
--- a/irdb-libs/thanos/SConscript
+++ b/irdb-libs/thanos/SConscript
@@ -9,7 +9,6 @@ myenv.Replace(SECURITY_TRANSFORMS_HOME=os.environ['SECURITY_TRANSFORMS_HOME'])
 cpppath=''' 
 	 $IRDB_SDK/include 
 	 $SECURITY_TRANSFORMS_HOME/libIRDB-core/include 
-        $SECURITY_TRANSFORMS_HOME/third_party/keystone/include/keystone/
 	'''
 
 
diff --git a/irdb-sdk b/irdb-sdk
index 134c74743..350263009 160000
--- a/irdb-sdk
+++ b/irdb-sdk
@@ -1 +1 @@
-Subproject commit 134c7474304013b5738aec1898b9002f19158b55
+Subproject commit 350263009091239be69a13e4e15142dc11880dca
-- 
GitLab