Skip to content
Snippets Groups Projects
Commit a485336d authored by jdh8d's avatar jdh8d
Browse files

Fixed jmp label insns to work for 64-bit, where label may not be in range

Former-commit-id: 5d0d140f3e89e3e6cc457f692a7644c65112cdbc
parent 5c729099
No related branches found
No related tags found
No related merge requests found
......@@ -49,10 +49,32 @@ static string addressify(Instruction_t* insn);
//
// determine if this branch has a short offset that can't be represented as a long branch
//
static int needs_short_branch_rewrite(const DISASM &disasm)
static bool needs_short_branch_rewrite(Instruction_t* newinsn, const DISASM &disasm)
{
return strstr(disasm.Instruction.Mnemonic, "jecxz" ) || strstr(disasm.Instruction.Mnemonic, "loop" ) ||
strstr(disasm.Instruction.Mnemonic, "loopne") || strstr(disasm.Instruction.Mnemonic, "loope") ;
if (strstr(disasm.Instruction.Mnemonic, "jecxz" ) || strstr(disasm.Instruction.Mnemonic, "loop" ) ||
strstr(disasm.Instruction.Mnemonic, "loopne") || strstr(disasm.Instruction.Mnemonic, "loope") )
return true;
/* 64-bit has more needs than this */
if(sizeof(void*)!=8)
return false;
if(disasm.Instruction.BranchType==0) /* non-branches, jumps, calls and returns don't need this rewrite */
return false;
if(disasm.Instruction.BranchType==JmpType)
return false;
if(disasm.Instruction.BranchType==CallType)
return false;
if(disasm.Instruction.BranchType==RetType)
return false;
/* all other branches (on x86-64) need further checking */
if(!newinsn->GetTarget()) /* no specified target, no need to modify it */
return false;
string new_target=labelfy(newinsn->GetTarget());
if (new_target.c_str()[0]=='0') /* if we're jumping back to the base instruction */
return true;
return false;
}
......@@ -181,6 +203,14 @@ static string get_short_branch_label(Instruction_t *newinsn)
return "sj_" + labelfy(newinsn);
}
static string get_data_label(Instruction_t *newinsn)
{
if (!newinsn)
return string("");
else
return "da_" + labelfy(newinsn);
}
static string getPostCallbackLabel(Instruction_t *newinsn)
{
if (!newinsn)
......@@ -195,6 +225,122 @@ static void emit_relocation(FileIR_t* fileIRp, ostream& fout, int offset, string
fout<<"\t"<<labelfy(insn)<<" rl " << offset << " "<< type << " " << URLToFile(fileIRp->GetFile()->GetURL()) <<endl;
}
void covert_jump_for_64bit(Instruction_t* newinsn, string &final, string new_target)
{
/* skip for x86-32 */
if(sizeof(void*)==4)
return;
/* skip for labeled addresses */
if (new_target.c_str()[0]!='0')
return;
string datalabel=get_data_label(newinsn);
/* convert a "call <addr>" into "call qword [rel data_label] \n data_label ** dq <addr>" */
int start=final.find(new_target,0);
final=final.substr(0,start)+" qword [ rel " +datalabel + "]\n\t"+ datalabel + " ** dq "+final.substr(start);
return;
}
void emit_jump(FileIR_t* fileIRp, ostream& fout, DISASM& disasm, Instruction_t* newinsn, Instruction_t *old_insn, string & original_target)
{
string label=labelfy(newinsn);
string complete_instr=string(disasm.CompleteInstr);
string address_string=string(disasm.Argument1.ArgMnemonic);
/* if we have a target instruction in the database */
if(newinsn->GetTarget() || needs_short_branch_rewrite(newinsn,disasm))
{
/* change the target to be symbolic */
/* first get the new target */
string new_target;
if(newinsn->GetTarget())
new_target=labelfy(newinsn->GetTarget());
/* if this is a short branch, write this branch to jump to the next insn */
if(needs_short_branch_rewrite(newinsn,disasm))
{
new_target=get_short_branch_label(newinsn);
/* also get the real target if it's a short branch */
if(newinsn->GetTarget())
original_target=labelfy(newinsn->GetTarget());
else
original_target=address_string;
}
/* find the location in the disassembled string of the old target */
int start=complete_instr.find(address_string,0);
/* and build up a new string that has the label of the target instead of the address */
string final=complete_instr.substr(0,start) + new_target + complete_instr.substr(start+address_string.length());
/* sanity, no segment registers for absolute mode */
assert(disasm.Argument1.SegmentReg==0);
covert_jump_for_64bit(newinsn,final, new_target);
fout<<final<<endl;
if (new_target.c_str()[0]=='0')
{
// if we're jumping to an absolute address vrs a label, we will need a relocation for this jump instruction
if(
disasm.Instruction.Opcode==0xeb || // jmp with 8-bit addr -- should be recompiled to 32-bit
disasm.Instruction.Opcode==0xe8 || // jmp with 32-bit addr
disasm.Instruction.Opcode==0xe9 // call with 32-bit addr
)
{
/* jumps have a 1-byte opcode */
emit_relocation(fileIRp, fout,1,"32-bit",newinsn);
}
else
{
/* other jcc'often use a 2-byte opcode for far jmps (which is what spri will emit) */
emit_relocation(fileIRp, fout,2,"32-bit",newinsn);
}
}
}
else /* this instruction has a target, but it's not in the DB */
{
/* so we'll just emit the instruction and let it go back to the application text. */
fout<<complete_instr<<endl;
// needs relocation info.
if(complete_instr.compare("call 0x00000000")==0 ||
complete_instr.compare("jmp 0x00000000")==0
)
{
// just ignore these bogus instructions.
}
else
{
if(
disasm.Instruction.Opcode==0xeb || // jmp with 8-bit addr
disasm.Instruction.Opcode==0xe8 || // jmp with 32-bit addr
disasm.Instruction.Opcode==0xe9 // call with 32-bit addr
)
{
emit_relocation(fileIRp, fout,1,"32-bit",newinsn);
}
else
{
// assert this is the "main" file and no relocation is necessary.
assert(strstr(fileIRp->GetFile()->GetURL().c_str(),"a.ncexe")!=0);
}
}
}
}
//
// emit this instruction as spri code.
//
......@@ -275,88 +421,7 @@ static string emit_spri_instruction(FileIR_t* fileIRp, Instruction_t *newinsn, o
(disasm.Argument1.ArgType & CONSTANT_TYPE)!=0 // and has a constant argument type 1
)
{
/* if we have a target instruction in the database */
if(newinsn->GetTarget() || needs_short_branch_rewrite(disasm))
{
/* change the target to be symbolic */
/* first get the new target */
string new_target;
if(newinsn->GetTarget())
new_target=labelfy(newinsn->GetTarget());
/* if this is a short branch, write this branch to jump to the next insn */
if(needs_short_branch_rewrite(disasm))
{
new_target=get_short_branch_label(newinsn);
/* also get the real target if it's a short branch */
if(newinsn->GetTarget())
original_target=labelfy(newinsn->GetTarget());
else
original_target=address_string;
}
/* find the location in the disassembled string of the old target */
int start=complete_instr.find(address_string,0);
/* and build up a new string that has the label of the target instead of the address */
string final=complete_instr.substr(0,start) + new_target + complete_instr.substr(start+address_string.length());
/* sanity, no segment registers for absolute mode */
assert(disasm.Argument1.SegmentReg==0);
fout<<final<<endl;
if (new_target.c_str()[0]=='0')
{
// if we're jumping to an absolute address vrs a label, we will need a relocation for this jump instruction
if(
disasm.Instruction.Opcode==0xeb || // jmp with 8-bit addr -- should be recompiled to 32-bit
disasm.Instruction.Opcode==0xe8 || // jmp with 32-bit addr
disasm.Instruction.Opcode==0xe9 // call with 32-bit addr
)
{
/* jumps have a 1-byte opcode */
emit_relocation(fileIRp, fout,1,"32-bit",newinsn);
}
else
{
/* other jcc'often use a 2-byte opcode for far jmps (which is what spri will emit) */
emit_relocation(fileIRp, fout,2,"32-bit",newinsn);
}
}
}
else /* this instruction has a target, but it's not in the DB */
{
/* so we'll just emit the instruction and let it go back to the application text. */
fout<<complete_instr<<endl;
// needs relocation info.
if(complete_instr.compare("call 0x00000000")==0 ||
complete_instr.compare("jmp 0x00000000")==0
)
{
// just ignore these bogus instructions.
}
else
{
if(
disasm.Instruction.Opcode==0xeb || // jmp with 8-bit addr
disasm.Instruction.Opcode==0xe8 || // jmp with 32-bit addr
disasm.Instruction.Opcode==0xe9 // call with 32-bit addr
)
{
emit_relocation(fileIRp, fout,1,"32-bit",newinsn);
}
else
{
// assert this is the "main" file and no relocation is necessary.
assert(strstr(fileIRp->GetFile()->GetURL().c_str(),"a.ncexe")!=0);
}
}
}
emit_jump(fileIRp, fout, disasm,newinsn,old_insn, original_target);
}
else
{
......
......@@ -2,6 +2,7 @@
#include <vector>
#include <regex.h>
#include <iostream>
#include <ios>
#include <sstream>
#include <fstream>
#include <map>
......@@ -10,6 +11,8 @@
#include <climits>
#include <cstring>
#include <assert.h>
#include <stdint.h>
#include <algorithm>
#include "ben_lib.h"
......@@ -51,11 +54,11 @@ typedef struct bin_instruction {
static unsigned int const ORG_PC = 0xff000000;
static uintptr_t const ORG_PC = 0xff000000;
//padding is added to the ORG_PC for the first vpc
//the padding amount is [0-PC_PADDING_MAX), i.e., not inclusive of PC_PADDING_MAX
static unsigned int const PC_PADDING_MAX = 8001;
static unsigned int vpc = ORG_PC;
static uintptr_t vpc = ORG_PC;
static map<string,string> symMap;
static map<string,string> callbackMap;
......@@ -90,7 +93,7 @@ static void resolveSymbols(const string &mapFile);
//static vector<bin_instruction_t> parseBin(const string &binFile);
//static vector<string> getSPRI(const vector<bin_instruction_t> &bin, const vector<spasmline_t> &spasmlines, const string &symbolFilename);
//static void printVector(const string &outputFile, const vector<string> &lines);
static int getSymbolAddress(const string &symbolFilename, const string &symbol) throw(exception);
static uintptr_t getSymbolAddress(const string &symbolFilename, const string &symbol) throw(exception);
//
// @todo: need to cache results
......@@ -106,13 +109,13 @@ static string getCallbackAddress(const string &symbolFilename, const string &sym
}
static int getSymbolAddress(const string &symbolFilename, const string &symbol) throw(exception)
static uintptr_t getSymbolAddress(const string &symbolFilename, const string &symbol) throw(exception)
{
string symbolFullName = symbolFilename + "+" + symbol;
map<string,string>::iterator callbackMapIterator;
if(callbackMap.find(symbolFullName) != callbackMap.end())
{
return strtol(callbackMap[symbolFullName].c_str(),NULL,16);
return (uintptr_t)strtoull(callbackMap[symbolFullName].c_str(),NULL,16);
}
// nm -a stratafier.o.exe | egrep " integer_overflow_detector$" | cut -f1 -d' '
......@@ -138,7 +141,7 @@ static int getSymbolAddress(const string &symbolFilename, const string &symbol)
callbackMap[symbolFullName] = addressString;
return strtol(addressString.c_str(),NULL,16);
return (uintptr_t) strtoull(addressString.c_str(),NULL,16);
}
bool fexists(const string &filename)
......@@ -156,9 +159,16 @@ void a2bspri(const vector<string> &input,const string &outFilename, const string
srand(time(0));
vpc += rand()%PC_PADDING_MAX;
/* make start at 0xff00000000000000 for x86-64 */
if(sizeof(void*)==8)
{
vpc<<=32;
vpc += rand();
}
else
vpc += rand()%PC_PADDING_MAX;
cout<<"VPC init loc: "<<hex<<vpc<<endl;
cout<<"VPC init loc: "<<hex<<nouppercase<<vpc<<endl;
for(unsigned int i=0;i<input.size();i++)
{
......@@ -357,7 +367,7 @@ static void assemble(const string &assemblyFile)
nasm_bit_width="BITS 32";
asmFile<<nasm_bit_width<<endl;
asmFile<<"ORG 0x"<<hex<<vpc<<endl;
asmFile<<"ORG 0x"<<hex<<nouppercase<<vpc<<endl;
asmFile<<"[map symbols "<<assemblyFile<<".map]"<<endl;
spasmline_t sline;
......@@ -506,24 +516,27 @@ static void resolveSymbols(const string &mapFile)
//and the third is the symbol.
char *endptr;
char *tok_c_str = const_cast<char*>(tokens[0].c_str());
long long addrval;
addrval = strtoll(tok_c_str,&endptr,16);
uintptr_t addrval;
addrval = (uintptr_t)strtoull(tok_c_str,&endptr,16);
if((errno == ERANGE && (addrval == LLONG_MAX || addrval == LLONG_MIN))
|| ((errno != 0 && addrval == 0) || endptr == tok_c_str))
if((errno == ERANGE && (addrval == (uintptr_t)ULLONG_MAX || addrval == (uintptr_t)0))
|| ((errno != 0 && addrval == (uintptr_t)0) || endptr == tok_c_str))
{
continue;
}
tok_c_str = const_cast<char*>(tokens[1].c_str());
addrval = strtoll(tok_c_str,&endptr,16);
addrval = (uintptr_t)strtoull(tok_c_str,&endptr,16);
if((errno == ERANGE && (addrval == LLONG_MAX || addrval == LLONG_MIN))
|| ((errno != 0 && addrval == 0) || endptr == tok_c_str))
if((errno == ERANGE && (addrval == (uintptr_t)ULLONG_MAX || addrval == (uintptr_t)0))
|| ((errno != 0 && addrval == (uintptr_t)0) || endptr == tok_c_str))
{
continue;
}
// convert tokens[1] to lower case
transform(tokens[1].begin(), tokens[1].end(),tokens[1].begin(), ::tolower );
if(symMap.find(tokens[2]) != symMap.end())
{
symMap[tokens[2]] = tokens[1];
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment