From 6cb3e237c94d39cc4c4ee4bba4ab13c8cfc9fdf6 Mon Sep 17 00:00:00 2001
From: Daniel Chen <dc3pe@virginia.edu>
Date: Thu, 25 Jun 2020 09:25:06 -0400
Subject: [PATCH] Rework the assembly-string->binary conversion.

The old method involved writing assembly to a file, asking an external assembler to do the conversion, then processing the binary file to read the binary instructions back out.  This method is slow and relies on nasm, which isn't available on all machines or for the full variety of supported architectures.  The replacement is keystone, a 3rd party library that is linked to provide assembly->binary conversion.
---
 .gitmodules                                |   3 +
 irdb-libs/SConscript                       |  10 ++
 irdb-libs/libIRDB-core/include/fileir.hpp  |   2 +
 irdb-libs/libIRDB-core/src/SConscript      |   5 +-
 irdb-libs/libIRDB-core/src/fileir.cpp      | 122 ++++++++-------------
 irdb-libs/libIRDB-core/src/instruction.cpp |  70 ++++--------
 irdb-libs/third_party/keystone             |   1 +
 set_env_vars                               |   2 +-
 8 files changed, 84 insertions(+), 131 deletions(-)
 create mode 160000 irdb-libs/third_party/keystone

diff --git a/.gitmodules b/.gitmodules
index 4b0e2746b..2ad0cd402 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -48,3 +48,6 @@
 [submodule "irdb-libs/third_party/capstone"]
 	path = irdb-libs/third_party/capstone
 	url = git@git.zephyr-software.com:third-party-mirrors/capstone.git
+[submodule "irdb-libs/third_party/keystone"]
+	path = irdb-libs/third_party/keystone
+	url = https://github.com/keystone-engine/keystone/
diff --git a/irdb-libs/SConscript b/irdb-libs/SConscript
index 4058ec7f4..55347a8d5 100644
--- a/irdb-libs/SConscript
+++ b/irdb-libs/SConscript
@@ -12,6 +12,9 @@ if env.GetOption('clean'):
     os.chdir(os.environ['SECURITY_TRANSFORMS_HOME']+"/third_party/capstone")
     os.system("make clean")
     os.chdir(os.environ['SECURITY_TRANSFORMS_HOME'])
+    os.chdir(os.environ['SECURITY_TRANSFORMS_HOME']+"/third_party/keystone/build")
+    os.system("rm -rf " + os.environ['SECURITY_TRANSFORMS_HOME']+"/third_party/keystone/build")
+    os.chdir(os.environ['SECURITY_TRANSFORMS_HOME'])
 
 
 else:
@@ -20,6 +23,13 @@ else:
     print "Rebuilding libcapstone."
     jobs=env.GetOption('num_jobs')
     os.system("make -j "+str(jobs))
+    os.chdir(os.environ['SECURITY_TRANSFORMS_HOME']+"/third_party/keystone")
+    print "Rebuilding libkeystone."
+    os.chdir(os.environ['SECURITY_TRANSFORMS_HOME']+"/third_party/keystone")
+    os.system("mkdir -p ./build")
+    os.chdir("build")
+    os.system('../make-common.sh lib_only; cmake -DBUILD_LIBS_ONLY=$BUILD_LIBS_ONLY -DLLVM_BUILD_32_BITS="$LLVM_BUILD_32_BITS" -DCMAKE_OSX_ARCHITECTURES="$ARCH" -DCMAKE_BUILD_TYPE=$BUILDTYPE -DBUILD_SHARED_LIBS=ON -DLLVM_TARGETS_TO_BUILD="all" -G "Unix Makefiles" ..; make -j' + str(jobs))
+    os.system("make DESTDIR=$SECURITY_TRANSFORMS_HOME/lib install")
     os.chdir(os.environ['SECURITY_TRANSFORMS_HOME'])
 
 
diff --git a/irdb-libs/libIRDB-core/include/fileir.hpp b/irdb-libs/libIRDB-core/include/fileir.hpp
index e40378200..033a33694 100644
--- a/irdb-libs/libIRDB-core/include/fileir.hpp
+++ b/irdb-libs/libIRDB-core/include/fileir.hpp
@@ -17,6 +17,7 @@
  * URL   : http://www.zephyr-software.com/
  *
  */
+#include <keystone/keystone.h>
 
 namespace libIRDB
 {
@@ -79,6 +80,7 @@ class FileIR_t : public BaseObj_t, virtual public IRDB_SDK::FileIR_t
 		// assembles the assembly isntructions for each registered instruction
 		// and clears the registry. RegisterAssembly registers the instruction
 		// to be assembled later. 
+		static void assemblestr(ks_engine * &ks, IRDB_SDK::Instruction_t *ins, const char * instruct, char * &encode, size_t &size, size_t &count);
 		void assembleRegistry();
 		void registerAssembly(IRDB_SDK::Instruction_t *instr, string assembly);
 		void unregisterAssembly(IRDB_SDK::Instruction_t *instr);
diff --git a/irdb-libs/libIRDB-core/src/SConscript b/irdb-libs/libIRDB-core/src/SConscript
index cbb1b62c8..7822295ec 100644
--- a/irdb-libs/libIRDB-core/src/SConscript
+++ b/irdb-libs/libIRDB-core/src/SConscript
@@ -50,16 +50,17 @@ cpppath='''
 	$SECURITY_TRANSFORMS_HOME/include/
 	$SECURITY_TRANSFORMS_HOME/libIRDB-core/include/
 	$SECURITY_TRANSFORMS_HOME/third_party/capstone/include/capstone/
+	$SECURITY_TRANSFORMS_HOME/third_party/capstone/include/keystone/
 	'''
 libpath='''
 	$SECURITY_TRANSFORMS_HOME/lib
         '''
 
-myenv.Append(CCFLAGS=" -Wall -std=c++11 -fmax-errors=2 ")
+myenv.Append(CCFLAGS=" -Wall -std=c++11 -fmax-errors=2")
 myenv.Append(LIBPATH=libpath)
 
 myenv=myenv.Clone(CPPPATH=Split(cpppath))
-mylib=myenv.SharedLibrary(libname, Split(files), LIBS=Split("pqxx capstone"))
+mylib=myenv.SharedLibrary(libname, Split(files), LIBS=Split("pqxx capstone keystone"))
 
 install=myenv.Install("$SECURITY_TRANSFORMS_HOME/lib/", mylib)
 Default(install)
diff --git a/irdb-libs/libIRDB-core/src/fileir.cpp b/irdb-libs/libIRDB-core/src/fileir.cpp
index 0277d7780..e72aac159 100644
--- a/irdb-libs/libIRDB-core/src/fileir.cpp
+++ b/irdb-libs/libIRDB-core/src/fileir.cpp
@@ -19,6 +19,7 @@
  *
  */
 
+#include <keystone/keystone.h>
 #include <all.hpp>
 #include <irdb-util>
 #include <cstdlib>
@@ -165,7 +166,7 @@ void FileIR_t::ReadFromDB()
 }
 
 
-void  FileIR_t::changeRegistryKey(IRDB_SDK::Instruction_t *p_orig, IRDB_SDK::Instruction_t *p_updated)
+void FileIR_t::changeRegistryKey(IRDB_SDK::Instruction_t *p_orig, IRDB_SDK::Instruction_t *p_updated)
 {
 	auto orig=dynamic_cast<libIRDB::Instruction_t*>(p_orig);
 	auto updated=dynamic_cast<libIRDB::Instruction_t*>(p_updated);
@@ -178,92 +179,55 @@ void  FileIR_t::changeRegistryKey(IRDB_SDK::Instruction_t *p_orig, IRDB_SDK::Ins
 	}
 }
 
+void FileIR_t::assemblestr(ks_engine * &ks, IRDB_SDK::Instruction_t *ins, const char * instruct, char * &encode, size_t &size, size_t &count) 
+{
+	// do ks_asm call here
+        //assert if err is equal to KS_ERR_OK
+        //Check if count = 1
+	if(ks_asm(ks, instruct, 0, (unsigned char **)&encode, &size, &count) != KS_ERR_OK) { //string or cstr
+		ks_free((unsigned char*)encode);
+		ks_close(ks);
+		throw std::runtime_error("ERROR: ks_asm() failed during instrunction assembly.");
+    }
+	else {
+		ins->setDataBits(string(encode, size));
+		ks_free((unsigned char*)encode);
+	}
+}
+
 void FileIR_t::assembleRegistry()
 {
 	if(assembly_registry.size() == 0)
 		return;
 
-	string assemblyFile = "tmp.asm";
-	string binaryOutputFile = "tmp.bin";
-
-	string command = "rm -f " + assemblyFile + " " + binaryOutputFile;
-	auto actual_exit = command_to_stream(command, cout); // system(command.c_str());
-	
-	assert(actual_exit == 0);
-	
-	ofstream asmFile;
-	asmFile.open(assemblyFile.c_str());
-	if(!asmFile.is_open())
-		assert(false);
-
-	asmFile<<"BITS "<<std::dec<<getArchitectureBitWidth()<<endl; 
-
-	for(auto it : assembly_registry)
-	{
-		asmFile<<it.second<<endl;
+	const auto bits = getArchitectureBitWidth();
+	auto count = (size_t)0;
+	auto *encode = (char *)NULL;
+	auto size = (size_t)0;
+
+	const auto mode = (bits == 32) ? KS_MODE_32 : 
+                      (bits == 64) ? KS_MODE_64 :
+                      throw std::invalid_argument("Cannot map IRDB bit size to keystone bit size");
+    
+    const auto machinetype = getArchitecture()->getMachineType();
+    const auto arch = (machinetype == IRDB_SDK::admtI386 || machinetype == IRDB_SDK::admtX86_64) ? KS_ARCH_X86 :
+                      (machinetype == IRDB_SDK::admtArm32) ? KS_ARCH_ARM :
+                      (machinetype == IRDB_SDK::admtAarch64) ? KS_ARCH_ARM64 : 
+                      (machinetype == IRDB_SDK::admtMips64 || machinetype == IRDB_SDK::admtMips32) ? KS_ARCH_MIPS :
+                      throw std::invalid_argument("Cannot map IRDB architecture to keystone architure");
+    auto ks = (ks_engine *)NULL;
+    const auto err = ks_open(arch, mode, &ks);
+	assert(err == KS_ERR_OK);
+
+	ks_option(ks, KS_OPT_SYNTAX, KS_OPT_SYNTAX_NASM);
+
+	//Build and set assembly string
+	for(auto it : assembly_registry) {
+		assemblestr(ks, it.first, it.second.c_str(), encode, size, count);
 	}
-	asmFile.close();
-
-	command = string("nasm ") + assemblyFile + string(" -o ") + binaryOutputFile;
-	actual_exit = command_to_stream(command,cout); // system(command.c_str());
-	assert(actual_exit == 0);
-	
-	ifstream binreader;
-	unsigned int filesize;
-	binreader.open(binaryOutputFile.c_str(),ifstream::in|ifstream::binary);
-
-	assert(binreader.is_open());
-
-	binreader.seekg(0,ios::end);
-	filesize = binreader.tellg();
-	binreader.seekg(0,ios::beg);
-
-	unsigned char *binary_stream = new unsigned char[filesize];
-
-	binreader.read((char*)binary_stream,filesize);
-	binreader.close();
 
-	unsigned int index = 0;
-	registry_type::iterator reg_val =  assembly_registry.begin();
-
-	while(index < filesize)
-	{
-		//the number of registered instructions should not be exceeded
-		assert(reg_val != assembly_registry.end());
-		Instruction_t *instr = reg_val->first;
-
-
-		// disasm.EIP =  (UIntPtr)&binary_stream[index];
-		// int instr_len = Disasm(&disasm);
-
-		const auto p_disasm=DecodedInstruction_t::factory
-			(
-				/* fake start addr doesn't matter */0x1000, 
-				(void*)&binary_stream[index], 
-				(void*)&binary_stream[filesize]
-			);
-		const auto& disasm=*p_disasm;
-
-		assert(disasm.valid());
-		const auto instr_len=disasm.length();
-
-		string rawBits;
-		rawBits.resize(instr_len);
-		for(auto i=0U;i<instr_len;i++,index++)
-		{
-			rawBits[i] = binary_stream[index];
-		}
-
-		instr->setDataBits(rawBits);
-//		*verbose_logging << "doing instruction:" << ((Instruction_t*)instr)->getDisassembly() << " comment: " << ((Instruction_t*)instr)->getComment() << endl;
-		reg_val++;
-	}
-
-	assert(reg_val == assembly_registry.end());
-
-	delete [] binary_stream;
+	ks_close(ks);
 	assembly_registry.clear();
-
 }
 
 void FileIR_t::registerAssembly(IRDB_SDK::Instruction_t *p_instr, string assembly)
diff --git a/irdb-libs/libIRDB-core/src/instruction.cpp b/irdb-libs/libIRDB-core/src/instruction.cpp
index d723741e1..ceb81cac9 100644
--- a/irdb-libs/libIRDB-core/src/instruction.cpp
+++ b/irdb-libs/libIRDB-core/src/instruction.cpp
@@ -25,6 +25,7 @@
 #include <sstream>
 #include <iomanip>
 #include <irdb-util>
+#include <keystone/keystone.h>
 #include "cmdstr.hpp"
 
 #undef EIP
@@ -109,61 +110,32 @@ std::string Instruction_t::getDisassembly() const
 //
 bool Instruction_t::assemble(string assembly)
 {
-   const string assemblyFile = "tmp.asm"; 
-   const string binaryOutputFile = "tmp.bin";
+        const auto bits = FileIR_t::getArchitectureBitWidth();
+        auto count = (size_t)0;
+        auto encode = (char *)NULL;
+        auto size = (size_t)0;
 
-   //remove any preexisting assembly or nasm generated files
-   string command = "rm -f " + assemblyFile;
-   command_to_stream(command,cout);
-   command = "rm -f "+assemblyFile+".bin";
-   command_to_stream(command,cout);
+        const auto machinetype = FileIR_t::getArchitecture()->getMachineType();
 
-   ofstream asmFile;
-   asmFile.open(assemblyFile.c_str());
-   if(!asmFile.is_open())
-   {
-     return false;
-   }
+        const auto mode = (bits == 32) ? KS_MODE_32 : 
+                      (bits == 64) ? KS_MODE_64 :
+                      throw std::invalid_argument("Cannot map IRDB bit size to keystone bit size");
+    
+    	const auto arch = (machinetype == IRDB_SDK::admtI386 || machinetype == IRDB_SDK::admtX86_64) ? KS_ARCH_X86 :
+                      (machinetype == IRDB_SDK::admtArm32) ? KS_ARCH_ARM :
+                      (machinetype == IRDB_SDK::admtAarch64) ? KS_ARCH_ARM64 : 
+                      (machinetype == IRDB_SDK::admtMips64 || machinetype == IRDB_SDK::admtMips32) ? KS_ARCH_MIPS :
+                      throw std::invalid_argument("Cannot map IRDB architecture to keystone architure");
 
-   asmFile<<"BITS "<<std::dec<<FileIR_t::getArchitectureBitWidth()<<endl; 
+    	auto ks = (ks_engine *)NULL;
+    	const auto err = ks_open(arch, mode, &ks);
+		assert(err == KS_ERR_OK);        
 
-   asmFile<<assembly<<endl;
-   asmFile.close();
+        ks_option(ks, KS_OPT_SYNTAX, KS_OPT_SYNTAX_NASM);
 
-   command = "nasm " + assemblyFile + " -o "+ binaryOutputFile;
-   command_to_stream(command,cout);
+        FileIR_t::assemblestr(ks, this, assembly.c_str(), encode, size, count);
+        return true;
 
-    ifstream binreader;
-    unsigned int filesize;
-    binreader.open(binaryOutputFile.c_str(),ifstream::in|ifstream::binary);
-
-    if(!binreader.is_open())
-    {
-      return false;
-    }
-
-    binreader.seekg(0,ios::end);
-
-    filesize = binreader.tellg();
-
-    binreader.seekg(0,ios::beg);
-
-    if (filesize == 0) return false;
-
-    unsigned char *memblock = new unsigned char[filesize];
-
-    binreader.read((char*)memblock,filesize);
-    binreader.close();
-
-    string rawBits;
-    rawBits.resize(filesize);
-    for (auto i = 0U; i < filesize; ++i)
-      rawBits[i] = memblock[i];
-
-    // should erase those 2 files here
-
-    this->setDataBits(rawBits);
-    return true;
 }
 
 
diff --git a/irdb-libs/third_party/keystone b/irdb-libs/third_party/keystone
new file mode 160000
index 000000000..e405f7b1a
--- /dev/null
+++ b/irdb-libs/third_party/keystone
@@ -0,0 +1 @@
+Subproject commit e405f7b1a1125a590c49916c1b2df7c60c30802d
diff --git a/set_env_vars b/set_env_vars
index b19534c80..0486622a5 100755
--- a/set_env_vars
+++ b/set_env_vars
@@ -32,7 +32,7 @@ export PSZ=$PS_INSTALL/tools/ps_zipr.sh
 source ./irdb_vars
 source $PEASOUP_HOME/set_command_envs
 
-export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$ZEST_RUNTIME/lib32:$ZEST_RUNTIME/lib64:$SECURITY_TRANSFORMS_HOME/lib"
+export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$ZEST_RUNTIME/lib32:$ZEST_RUNTIME/lib64:$SECURITY_TRANSFORMS_HOME/lib:$SECURITY_TRANSFORMS_HOME/lib/usr/local/lib64/"
 
 
 # deprecated:  going away soon.
-- 
GitLab