From a485336d3d88928cf3a14928b2e6b90e1a0de0d3 Mon Sep 17 00:00:00 2001
From: jdh8d <jdh8d@git.zephyr-software.com>
Date: Fri, 13 Sep 2013 16:14:23 +0000
Subject: [PATCH] Fixed jmp label insns to work for 64-bit, where label may not
 be in range

Former-commit-id: 5d0d140f3e89e3e6cc457f692a7644c65112cdbc
---
 libIRDB/src/core/generate_spri.cpp | 235 ++++++++++++++++++-----------
 tools/spasm/spasm.cpp              |  45 ++++--
 2 files changed, 179 insertions(+), 101 deletions(-)

diff --git a/libIRDB/src/core/generate_spri.cpp b/libIRDB/src/core/generate_spri.cpp
index eb5276ae1..9c51a28f8 100644
--- a/libIRDB/src/core/generate_spri.cpp
+++ b/libIRDB/src/core/generate_spri.cpp
@@ -49,10 +49,32 @@ static string addressify(Instruction_t* insn);
 //
 // determine if this branch has a short offset that can't be represented as a long branch
 //
-static int needs_short_branch_rewrite(const DISASM &disasm)
+static bool needs_short_branch_rewrite(Instruction_t* newinsn, const DISASM &disasm)
 {
-	return   strstr(disasm.Instruction.Mnemonic, "jecxz" ) || strstr(disasm.Instruction.Mnemonic, "loop" ) || 
-		 strstr(disasm.Instruction.Mnemonic, "loopne") || strstr(disasm.Instruction.Mnemonic, "loope") ;
+	if   (strstr(disasm.Instruction.Mnemonic, "jecxz" ) || strstr(disasm.Instruction.Mnemonic, "loop" ) || 
+		 strstr(disasm.Instruction.Mnemonic, "loopne") || strstr(disasm.Instruction.Mnemonic, "loope") )
+		return true;
+
+	/* 64-bit has more needs than this */
+	if(sizeof(void*)!=8)
+		return false;
+
+	if(disasm.Instruction.BranchType==0)		/* non-branches, jumps, calls and returns don't need this rewrite */
+		return false;
+	if(disasm.Instruction.BranchType==JmpType)
+		return false;
+	if(disasm.Instruction.BranchType==CallType)
+		return false;
+	if(disasm.Instruction.BranchType==RetType)
+		return false;
+
+	/* all other branches (on x86-64) need further checking */
+	if(!newinsn->GetTarget())	/* no specified target, no need to modify it */
+		return false;
+	string new_target=labelfy(newinsn->GetTarget());
+	if (new_target.c_str()[0]=='0')	/* if we're jumping back to the base instruction */
+		return true;
+	return false;
 }
 
 
@@ -181,6 +203,14 @@ static string get_short_branch_label(Instruction_t *newinsn)
 		return "sj_" + labelfy(newinsn);
 }
 
+static string get_data_label(Instruction_t *newinsn)
+{
+	if (!newinsn)
+		return string("");
+	else
+		return "da_" + labelfy(newinsn);
+}
+
 static string getPostCallbackLabel(Instruction_t *newinsn)
 {
 	if (!newinsn)
@@ -195,6 +225,122 @@ static void emit_relocation(FileIR_t* fileIRp, ostream& fout, int offset, string
 	fout<<"\t"<<labelfy(insn)<<" rl " << offset << " "<< type <<  " " << URLToFile(fileIRp->GetFile()->GetURL()) <<endl;
 }
 
+
+void covert_jump_for_64bit(Instruction_t* newinsn, string &final, string new_target)
+{
+	/* skip for x86-32 */
+	if(sizeof(void*)==4)
+		return;
+
+	/* skip for labeled addresses */
+	if (new_target.c_str()[0]!='0')
+		return;
+
+	string datalabel=get_data_label(newinsn);
+
+	/* convert a "call <addr>" into "call qword [rel data_label] \n  data_label ** dq <addr>" */
+	int start=final.find(new_target,0);
+
+	final=final.substr(0,start)+" qword [ rel " +datalabel + "]\n\t"+ datalabel + " ** dq "+final.substr(start);
+
+	return;
+}
+
+void emit_jump(FileIR_t* fileIRp, ostream& fout, DISASM& disasm, Instruction_t* newinsn, Instruction_t *old_insn, string & original_target)
+{
+
+        string label=labelfy(newinsn);
+        string complete_instr=string(disasm.CompleteInstr);
+        string address_string=string(disasm.Argument1.ArgMnemonic);
+
+
+	/* if we have a target instruction in the database */
+	if(newinsn->GetTarget() || needs_short_branch_rewrite(newinsn,disasm))
+	{
+		/* change the target to be symbolic */
+
+		/* first get the new target */
+		string new_target;
+		if(newinsn->GetTarget())
+			new_target=labelfy(newinsn->GetTarget());
+		/* if this is a short branch, write this branch to jump to the next insn */
+		if(needs_short_branch_rewrite(newinsn,disasm))
+		{
+			new_target=get_short_branch_label(newinsn);
+
+			/* also get the real target if it's a short branch */
+			if(newinsn->GetTarget())
+				original_target=labelfy(newinsn->GetTarget());
+			else
+				original_target=address_string;
+		}
+
+		/* find the location in the disassembled string of the old target */
+		int start=complete_instr.find(address_string,0);
+
+		/* and build up a new string that has the label of the target instead of the address */
+		string final=complete_instr.substr(0,start) + new_target + complete_instr.substr(start+address_string.length());
+
+	
+		/* sanity, no segment registers for absolute mode */
+		assert(disasm.Argument1.SegmentReg==0);
+
+		covert_jump_for_64bit(newinsn,final, new_target);
+
+		fout<<final<<endl;
+
+		if (new_target.c_str()[0]=='0')
+		{
+			// if we're jumping to an absolute address vrs a label, we will need a relocation for this jump instruction
+			if(
+ 		   	   disasm.Instruction.Opcode==0xeb || 	 // jmp with 8-bit addr  -- should be recompiled to 32-bit
+ 		   	   disasm.Instruction.Opcode==0xe8 || 	 // jmp with 32-bit addr 
+		   	   disasm.Instruction.Opcode==0xe9 	 // call with 32-bit addr
+
+			)
+			{
+				/* jumps have a 1-byte opcode */
+ 				emit_relocation(fileIRp, fout,1,"32-bit",newinsn);
+			}
+			else
+			{
+				/* other jcc'often use a 2-byte opcode for far jmps (which is what spri will emit) */
+ 				emit_relocation(fileIRp, fout,2,"32-bit",newinsn);
+			}
+		}
+	}
+	else 	/* this instruction has a target, but it's not in the DB */
+	{
+		/* so we'll just emit the instruction and let it go back to the application text. */	
+		fout<<complete_instr<<endl;
+// needs relocation info.
+		if(complete_instr.compare("call 0x00000000")==0 ||
+		   complete_instr.compare("jmp 0x00000000")==0
+ 		  )
+		{
+			// just ignore these bogus instructions.
+		}
+		else
+		{
+			if(
+		   	   disasm.Instruction.Opcode==0xeb || 	 // jmp with 8-bit addr 
+		   	   disasm.Instruction.Opcode==0xe8 || 	 // jmp with 32-bit addr 
+		   	   disasm.Instruction.Opcode==0xe9 	 // call with 32-bit addr
+			  )
+			{
+				emit_relocation(fileIRp, fout,1,"32-bit",newinsn);
+			}
+			else
+			{
+				// assert this is the "main" file and no relocation is necessary.
+				assert(strstr(fileIRp->GetFile()->GetURL().c_str(),"a.ncexe")!=0);
+			}
+		}
+	}
+}
+
+
+
 //
 // emit this instruction as spri code.
 //
@@ -275,88 +421,7 @@ static string emit_spri_instruction(FileIR_t* fileIRp, Instruction_t *newinsn, o
                 (disasm.Argument1.ArgType & CONSTANT_TYPE)!=0          // and has a constant argument type 1
           )
 	{
-
-		/* if we have a target instruction in the database */
-		if(newinsn->GetTarget() || needs_short_branch_rewrite(disasm))
-		{
-			/* change the target to be symbolic */
-	
-			/* first get the new target */
-			string new_target;
-			if(newinsn->GetTarget())
-				new_target=labelfy(newinsn->GetTarget());
-			/* if this is a short branch, write this branch to jump to the next insn */
-			if(needs_short_branch_rewrite(disasm))
-			{
-				new_target=get_short_branch_label(newinsn);
-
-				/* also get the real target if it's a short branch */
-				if(newinsn->GetTarget())
-					original_target=labelfy(newinsn->GetTarget());
-				else
-					original_target=address_string;
-			}
-
-			/* find the location in the disassembled string of the old target */
-			int start=complete_instr.find(address_string,0);
-
-			/* and build up a new string that has the label of the target instead of the address */
-			string final=complete_instr.substr(0,start) + new_target + complete_instr.substr(start+address_string.length());
-
-	
-			/* sanity, no segment registers for absolute mode */
-			assert(disasm.Argument1.SegmentReg==0);
-
-			fout<<final<<endl;
-
-			if (new_target.c_str()[0]=='0')
-			{
-				// if we're jumping to an absolute address vrs a label, we will need a relocation for this jump instruction
-				if(
- 			   	   disasm.Instruction.Opcode==0xeb || 	 // jmp with 8-bit addr  -- should be recompiled to 32-bit
- 			   	   disasm.Instruction.Opcode==0xe8 || 	 // jmp with 32-bit addr 
-			   	   disasm.Instruction.Opcode==0xe9 	 // call with 32-bit addr
-
-				)
-				{
-					/* jumps have a 1-byte opcode */
- 					emit_relocation(fileIRp, fout,1,"32-bit",newinsn);
-				}
-				else
-				{
-					/* other jcc'often use a 2-byte opcode for far jmps (which is what spri will emit) */
- 					emit_relocation(fileIRp, fout,2,"32-bit",newinsn);
-				}
-			}
-		}
-		else 	/* this instruction has a target, but it's not in the DB */
-		{
-			/* so we'll just emit the instruction and let it go back to the application text. */	
-			fout<<complete_instr<<endl;
-// needs relocation info.
-			if(complete_instr.compare("call 0x00000000")==0 ||
-			   complete_instr.compare("jmp 0x00000000")==0
- 			  )
-			{
-				// just ignore these bogus instructions.
-			}
-			else
-			{
-				if(
-			   	   disasm.Instruction.Opcode==0xeb || 	 // jmp with 8-bit addr 
-			   	   disasm.Instruction.Opcode==0xe8 || 	 // jmp with 32-bit addr 
-			   	   disasm.Instruction.Opcode==0xe9 	 // call with 32-bit addr
-				  )
-				{
-					emit_relocation(fileIRp, fout,1,"32-bit",newinsn);
-				}
-				else
-				{
-					// assert this is the "main" file and no relocation is necessary.
-					assert(strstr(fileIRp->GetFile()->GetURL().c_str(),"a.ncexe")!=0);
-				}
-			}
-		}
+		emit_jump(fileIRp, fout, disasm,newinsn,old_insn, original_target);
 	}
 	else
 	{
diff --git a/tools/spasm/spasm.cpp b/tools/spasm/spasm.cpp
index 608a020c7..7a7628232 100755
--- a/tools/spasm/spasm.cpp
+++ b/tools/spasm/spasm.cpp
@@ -2,6 +2,7 @@
 #include <vector>
 #include <regex.h>
 #include <iostream>
+#include <ios>
 #include <sstream>
 #include <fstream>
 #include <map>
@@ -10,6 +11,8 @@
 #include <climits>
 #include <cstring>
 #include <assert.h>
+#include <stdint.h>
+#include <algorithm>
 
 #include "ben_lib.h"
 
@@ -51,11 +54,11 @@ typedef struct bin_instruction {
 		
 
 
-static unsigned int const ORG_PC = 0xff000000;
+static uintptr_t const ORG_PC = 0xff000000;
 //padding is added to the ORG_PC for the first vpc
 //the padding amount is [0-PC_PADDING_MAX), i.e., not inclusive of PC_PADDING_MAX
 static unsigned int const PC_PADDING_MAX = 8001;
-static unsigned int vpc = ORG_PC; 
+static uintptr_t vpc = ORG_PC; 
 static map<string,string> symMap; 
 static map<string,string> callbackMap; 
 
@@ -90,7 +93,7 @@ static void resolveSymbols(const string &mapFile);
 //static vector<bin_instruction_t> parseBin(const string &binFile);
 //static vector<string> getSPRI(const vector<bin_instruction_t> &bin, const vector<spasmline_t> &spasmlines, const string &symbolFilename);
 //static void printVector(const string &outputFile, const vector<string> &lines);
-static int getSymbolAddress(const string &symbolFilename, const string &symbol) throw(exception);
+static uintptr_t getSymbolAddress(const string &symbolFilename, const string &symbol) throw(exception);
 
 //
 // @todo: need to cache results
@@ -106,13 +109,13 @@ static string getCallbackAddress(const string &symbolFilename, const string &sym
 }
 
 
-static int getSymbolAddress(const string &symbolFilename, const string &symbol) throw(exception)
+static uintptr_t getSymbolAddress(const string &symbolFilename, const string &symbol) throw(exception)
 {
 	string symbolFullName = symbolFilename + "+" + symbol;
 	map<string,string>::iterator callbackMapIterator;
 	if(callbackMap.find(symbolFullName) != callbackMap.end())
 	{
-		return strtol(callbackMap[symbolFullName].c_str(),NULL,16);
+		return (uintptr_t)strtoull(callbackMap[symbolFullName].c_str(),NULL,16);
 	}
 
 // nm -a stratafier.o.exe | egrep " integer_overflow_detector$" | cut -f1 -d' '
@@ -138,7 +141,7 @@ static int getSymbolAddress(const string &symbolFilename, const string &symbol)
 
 	callbackMap[symbolFullName] = addressString;
 
-	return strtol(addressString.c_str(),NULL,16);
+	return (uintptr_t) strtoull(addressString.c_str(),NULL,16);
 }
 
 bool fexists(const string &filename)
@@ -156,9 +159,16 @@ void a2bspri(const vector<string> &input,const string &outFilename, const string
 
 	srand(time(0));
 
-	vpc += rand()%PC_PADDING_MAX;
+	/* make start at 0xff00000000000000 for x86-64 */
+	if(sizeof(void*)==8)
+	{
+		vpc<<=32;
+		vpc += rand();
+	}
+	else
+		vpc += rand()%PC_PADDING_MAX;
 
-	cout<<"VPC init loc: "<<hex<<vpc<<endl;
+	cout<<"VPC init loc: "<<hex<<nouppercase<<vpc<<endl;
 
 	for(unsigned int i=0;i<input.size();i++)
 	{
@@ -357,7 +367,7 @@ static void assemble(const string &assemblyFile)
 		nasm_bit_width="BITS 32";
 
 	asmFile<<nasm_bit_width<<endl;
-	asmFile<<"ORG 0x"<<hex<<vpc<<endl;
+	asmFile<<"ORG 0x"<<hex<<nouppercase<<vpc<<endl;
 	asmFile<<"[map symbols "<<assemblyFile<<".map]"<<endl;
 
 	spasmline_t sline;
@@ -506,24 +516,27 @@ static void resolveSymbols(const string &mapFile)
 		//and the third is the symbol.
 		char *endptr;
 		char *tok_c_str = const_cast<char*>(tokens[0].c_str());
-		long long addrval;
-		addrval = strtoll(tok_c_str,&endptr,16); 
+		uintptr_t addrval;
+		addrval = (uintptr_t)strtoull(tok_c_str,&endptr,16); 
 
-		if((errno == ERANGE && (addrval == LLONG_MAX || addrval == LLONG_MIN))
-		   || ((errno != 0 && addrval == 0) || endptr == tok_c_str))
+		if((errno == ERANGE && (addrval == (uintptr_t)ULLONG_MAX || addrval == (uintptr_t)0))
+		   || ((errno != 0 && addrval == (uintptr_t)0) || endptr == tok_c_str))
 		{
 			continue;
 		}
 
 		tok_c_str = const_cast<char*>(tokens[1].c_str());
-		addrval = strtoll(tok_c_str,&endptr,16); 
+		addrval = (uintptr_t)strtoull(tok_c_str,&endptr,16); 
 
-		if((errno == ERANGE && (addrval == LLONG_MAX || addrval == LLONG_MIN))
-		   || ((errno != 0 && addrval == 0) || endptr == tok_c_str))
+		if((errno == ERANGE && (addrval == (uintptr_t)ULLONG_MAX || addrval == (uintptr_t)0))
+		   || ((errno != 0 && addrval == (uintptr_t)0) || endptr == tok_c_str))
 		{
 			continue;
 		}
 
+		// convert tokens[1] to lower case 
+		transform(tokens[1].begin(), tokens[1].end(),tokens[1].begin(), ::tolower );
+
 		if(symMap.find(tokens[2]) != symMap.end())
 		{
 			symMap[tokens[2]] = tokens[1];
-- 
GitLab