From a485336d3d88928cf3a14928b2e6b90e1a0de0d3 Mon Sep 17 00:00:00 2001 From: jdh8d <jdh8d@git.zephyr-software.com> Date: Fri, 13 Sep 2013 16:14:23 +0000 Subject: [PATCH] Fixed jmp label insns to work for 64-bit, where label may not be in range Former-commit-id: 5d0d140f3e89e3e6cc457f692a7644c65112cdbc --- libIRDB/src/core/generate_spri.cpp | 235 ++++++++++++++++++----------- tools/spasm/spasm.cpp | 45 ++++-- 2 files changed, 179 insertions(+), 101 deletions(-) diff --git a/libIRDB/src/core/generate_spri.cpp b/libIRDB/src/core/generate_spri.cpp index eb5276ae1..9c51a28f8 100644 --- a/libIRDB/src/core/generate_spri.cpp +++ b/libIRDB/src/core/generate_spri.cpp @@ -49,10 +49,32 @@ static string addressify(Instruction_t* insn); // // determine if this branch has a short offset that can't be represented as a long branch // -static int needs_short_branch_rewrite(const DISASM &disasm) +static bool needs_short_branch_rewrite(Instruction_t* newinsn, const DISASM &disasm) { - return strstr(disasm.Instruction.Mnemonic, "jecxz" ) || strstr(disasm.Instruction.Mnemonic, "loop" ) || - strstr(disasm.Instruction.Mnemonic, "loopne") || strstr(disasm.Instruction.Mnemonic, "loope") ; + if (strstr(disasm.Instruction.Mnemonic, "jecxz" ) || strstr(disasm.Instruction.Mnemonic, "loop" ) || + strstr(disasm.Instruction.Mnemonic, "loopne") || strstr(disasm.Instruction.Mnemonic, "loope") ) + return true; + + /* 64-bit has more needs than this */ + if(sizeof(void*)!=8) + return false; + + if(disasm.Instruction.BranchType==0) /* non-branches, jumps, calls and returns don't need this rewrite */ + return false; + if(disasm.Instruction.BranchType==JmpType) + return false; + if(disasm.Instruction.BranchType==CallType) + return false; + if(disasm.Instruction.BranchType==RetType) + return false; + + /* all other branches (on x86-64) need further checking */ + if(!newinsn->GetTarget()) /* no specified target, no need to modify it */ + return false; + string new_target=labelfy(newinsn->GetTarget()); + if (new_target.c_str()[0]=='0') /* if we're jumping back to the base instruction */ + return true; + return false; } @@ -181,6 +203,14 @@ static string get_short_branch_label(Instruction_t *newinsn) return "sj_" + labelfy(newinsn); } +static string get_data_label(Instruction_t *newinsn) +{ + if (!newinsn) + return string(""); + else + return "da_" + labelfy(newinsn); +} + static string getPostCallbackLabel(Instruction_t *newinsn) { if (!newinsn) @@ -195,6 +225,122 @@ static void emit_relocation(FileIR_t* fileIRp, ostream& fout, int offset, string fout<<"\t"<<labelfy(insn)<<" rl " << offset << " "<< type << " " << URLToFile(fileIRp->GetFile()->GetURL()) <<endl; } + +void covert_jump_for_64bit(Instruction_t* newinsn, string &final, string new_target) +{ + /* skip for x86-32 */ + if(sizeof(void*)==4) + return; + + /* skip for labeled addresses */ + if (new_target.c_str()[0]!='0') + return; + + string datalabel=get_data_label(newinsn); + + /* convert a "call <addr>" into "call qword [rel data_label] \n data_label ** dq <addr>" */ + int start=final.find(new_target,0); + + final=final.substr(0,start)+" qword [ rel " +datalabel + "]\n\t"+ datalabel + " ** dq "+final.substr(start); + + return; +} + +void emit_jump(FileIR_t* fileIRp, ostream& fout, DISASM& disasm, Instruction_t* newinsn, Instruction_t *old_insn, string & original_target) +{ + + string label=labelfy(newinsn); + string complete_instr=string(disasm.CompleteInstr); + string address_string=string(disasm.Argument1.ArgMnemonic); + + + /* if we have a target instruction in the database */ + if(newinsn->GetTarget() || needs_short_branch_rewrite(newinsn,disasm)) + { + /* change the target to be symbolic */ + + /* first get the new target */ + string new_target; + if(newinsn->GetTarget()) + new_target=labelfy(newinsn->GetTarget()); + /* if this is a short branch, write this branch to jump to the next insn */ + if(needs_short_branch_rewrite(newinsn,disasm)) + { + new_target=get_short_branch_label(newinsn); + + /* also get the real target if it's a short branch */ + if(newinsn->GetTarget()) + original_target=labelfy(newinsn->GetTarget()); + else + original_target=address_string; + } + + /* find the location in the disassembled string of the old target */ + int start=complete_instr.find(address_string,0); + + /* and build up a new string that has the label of the target instead of the address */ + string final=complete_instr.substr(0,start) + new_target + complete_instr.substr(start+address_string.length()); + + + /* sanity, no segment registers for absolute mode */ + assert(disasm.Argument1.SegmentReg==0); + + covert_jump_for_64bit(newinsn,final, new_target); + + fout<<final<<endl; + + if (new_target.c_str()[0]=='0') + { + // if we're jumping to an absolute address vrs a label, we will need a relocation for this jump instruction + if( + disasm.Instruction.Opcode==0xeb || // jmp with 8-bit addr -- should be recompiled to 32-bit + disasm.Instruction.Opcode==0xe8 || // jmp with 32-bit addr + disasm.Instruction.Opcode==0xe9 // call with 32-bit addr + + ) + { + /* jumps have a 1-byte opcode */ + emit_relocation(fileIRp, fout,1,"32-bit",newinsn); + } + else + { + /* other jcc'often use a 2-byte opcode for far jmps (which is what spri will emit) */ + emit_relocation(fileIRp, fout,2,"32-bit",newinsn); + } + } + } + else /* this instruction has a target, but it's not in the DB */ + { + /* so we'll just emit the instruction and let it go back to the application text. */ + fout<<complete_instr<<endl; +// needs relocation info. + if(complete_instr.compare("call 0x00000000")==0 || + complete_instr.compare("jmp 0x00000000")==0 + ) + { + // just ignore these bogus instructions. + } + else + { + if( + disasm.Instruction.Opcode==0xeb || // jmp with 8-bit addr + disasm.Instruction.Opcode==0xe8 || // jmp with 32-bit addr + disasm.Instruction.Opcode==0xe9 // call with 32-bit addr + ) + { + emit_relocation(fileIRp, fout,1,"32-bit",newinsn); + } + else + { + // assert this is the "main" file and no relocation is necessary. + assert(strstr(fileIRp->GetFile()->GetURL().c_str(),"a.ncexe")!=0); + } + } + } +} + + + // // emit this instruction as spri code. // @@ -275,88 +421,7 @@ static string emit_spri_instruction(FileIR_t* fileIRp, Instruction_t *newinsn, o (disasm.Argument1.ArgType & CONSTANT_TYPE)!=0 // and has a constant argument type 1 ) { - - /* if we have a target instruction in the database */ - if(newinsn->GetTarget() || needs_short_branch_rewrite(disasm)) - { - /* change the target to be symbolic */ - - /* first get the new target */ - string new_target; - if(newinsn->GetTarget()) - new_target=labelfy(newinsn->GetTarget()); - /* if this is a short branch, write this branch to jump to the next insn */ - if(needs_short_branch_rewrite(disasm)) - { - new_target=get_short_branch_label(newinsn); - - /* also get the real target if it's a short branch */ - if(newinsn->GetTarget()) - original_target=labelfy(newinsn->GetTarget()); - else - original_target=address_string; - } - - /* find the location in the disassembled string of the old target */ - int start=complete_instr.find(address_string,0); - - /* and build up a new string that has the label of the target instead of the address */ - string final=complete_instr.substr(0,start) + new_target + complete_instr.substr(start+address_string.length()); - - - /* sanity, no segment registers for absolute mode */ - assert(disasm.Argument1.SegmentReg==0); - - fout<<final<<endl; - - if (new_target.c_str()[0]=='0') - { - // if we're jumping to an absolute address vrs a label, we will need a relocation for this jump instruction - if( - disasm.Instruction.Opcode==0xeb || // jmp with 8-bit addr -- should be recompiled to 32-bit - disasm.Instruction.Opcode==0xe8 || // jmp with 32-bit addr - disasm.Instruction.Opcode==0xe9 // call with 32-bit addr - - ) - { - /* jumps have a 1-byte opcode */ - emit_relocation(fileIRp, fout,1,"32-bit",newinsn); - } - else - { - /* other jcc'often use a 2-byte opcode for far jmps (which is what spri will emit) */ - emit_relocation(fileIRp, fout,2,"32-bit",newinsn); - } - } - } - else /* this instruction has a target, but it's not in the DB */ - { - /* so we'll just emit the instruction and let it go back to the application text. */ - fout<<complete_instr<<endl; -// needs relocation info. - if(complete_instr.compare("call 0x00000000")==0 || - complete_instr.compare("jmp 0x00000000")==0 - ) - { - // just ignore these bogus instructions. - } - else - { - if( - disasm.Instruction.Opcode==0xeb || // jmp with 8-bit addr - disasm.Instruction.Opcode==0xe8 || // jmp with 32-bit addr - disasm.Instruction.Opcode==0xe9 // call with 32-bit addr - ) - { - emit_relocation(fileIRp, fout,1,"32-bit",newinsn); - } - else - { - // assert this is the "main" file and no relocation is necessary. - assert(strstr(fileIRp->GetFile()->GetURL().c_str(),"a.ncexe")!=0); - } - } - } + emit_jump(fileIRp, fout, disasm,newinsn,old_insn, original_target); } else { diff --git a/tools/spasm/spasm.cpp b/tools/spasm/spasm.cpp index 608a020c7..7a7628232 100755 --- a/tools/spasm/spasm.cpp +++ b/tools/spasm/spasm.cpp @@ -2,6 +2,7 @@ #include <vector> #include <regex.h> #include <iostream> +#include <ios> #include <sstream> #include <fstream> #include <map> @@ -10,6 +11,8 @@ #include <climits> #include <cstring> #include <assert.h> +#include <stdint.h> +#include <algorithm> #include "ben_lib.h" @@ -51,11 +54,11 @@ typedef struct bin_instruction { -static unsigned int const ORG_PC = 0xff000000; +static uintptr_t const ORG_PC = 0xff000000; //padding is added to the ORG_PC for the first vpc //the padding amount is [0-PC_PADDING_MAX), i.e., not inclusive of PC_PADDING_MAX static unsigned int const PC_PADDING_MAX = 8001; -static unsigned int vpc = ORG_PC; +static uintptr_t vpc = ORG_PC; static map<string,string> symMap; static map<string,string> callbackMap; @@ -90,7 +93,7 @@ static void resolveSymbols(const string &mapFile); //static vector<bin_instruction_t> parseBin(const string &binFile); //static vector<string> getSPRI(const vector<bin_instruction_t> &bin, const vector<spasmline_t> &spasmlines, const string &symbolFilename); //static void printVector(const string &outputFile, const vector<string> &lines); -static int getSymbolAddress(const string &symbolFilename, const string &symbol) throw(exception); +static uintptr_t getSymbolAddress(const string &symbolFilename, const string &symbol) throw(exception); // // @todo: need to cache results @@ -106,13 +109,13 @@ static string getCallbackAddress(const string &symbolFilename, const string &sym } -static int getSymbolAddress(const string &symbolFilename, const string &symbol) throw(exception) +static uintptr_t getSymbolAddress(const string &symbolFilename, const string &symbol) throw(exception) { string symbolFullName = symbolFilename + "+" + symbol; map<string,string>::iterator callbackMapIterator; if(callbackMap.find(symbolFullName) != callbackMap.end()) { - return strtol(callbackMap[symbolFullName].c_str(),NULL,16); + return (uintptr_t)strtoull(callbackMap[symbolFullName].c_str(),NULL,16); } // nm -a stratafier.o.exe | egrep " integer_overflow_detector$" | cut -f1 -d' ' @@ -138,7 +141,7 @@ static int getSymbolAddress(const string &symbolFilename, const string &symbol) callbackMap[symbolFullName] = addressString; - return strtol(addressString.c_str(),NULL,16); + return (uintptr_t) strtoull(addressString.c_str(),NULL,16); } bool fexists(const string &filename) @@ -156,9 +159,16 @@ void a2bspri(const vector<string> &input,const string &outFilename, const string srand(time(0)); - vpc += rand()%PC_PADDING_MAX; + /* make start at 0xff00000000000000 for x86-64 */ + if(sizeof(void*)==8) + { + vpc<<=32; + vpc += rand(); + } + else + vpc += rand()%PC_PADDING_MAX; - cout<<"VPC init loc: "<<hex<<vpc<<endl; + cout<<"VPC init loc: "<<hex<<nouppercase<<vpc<<endl; for(unsigned int i=0;i<input.size();i++) { @@ -357,7 +367,7 @@ static void assemble(const string &assemblyFile) nasm_bit_width="BITS 32"; asmFile<<nasm_bit_width<<endl; - asmFile<<"ORG 0x"<<hex<<vpc<<endl; + asmFile<<"ORG 0x"<<hex<<nouppercase<<vpc<<endl; asmFile<<"[map symbols "<<assemblyFile<<".map]"<<endl; spasmline_t sline; @@ -506,24 +516,27 @@ static void resolveSymbols(const string &mapFile) //and the third is the symbol. char *endptr; char *tok_c_str = const_cast<char*>(tokens[0].c_str()); - long long addrval; - addrval = strtoll(tok_c_str,&endptr,16); + uintptr_t addrval; + addrval = (uintptr_t)strtoull(tok_c_str,&endptr,16); - if((errno == ERANGE && (addrval == LLONG_MAX || addrval == LLONG_MIN)) - || ((errno != 0 && addrval == 0) || endptr == tok_c_str)) + if((errno == ERANGE && (addrval == (uintptr_t)ULLONG_MAX || addrval == (uintptr_t)0)) + || ((errno != 0 && addrval == (uintptr_t)0) || endptr == tok_c_str)) { continue; } tok_c_str = const_cast<char*>(tokens[1].c_str()); - addrval = strtoll(tok_c_str,&endptr,16); + addrval = (uintptr_t)strtoull(tok_c_str,&endptr,16); - if((errno == ERANGE && (addrval == LLONG_MAX || addrval == LLONG_MIN)) - || ((errno != 0 && addrval == 0) || endptr == tok_c_str)) + if((errno == ERANGE && (addrval == (uintptr_t)ULLONG_MAX || addrval == (uintptr_t)0)) + || ((errno != 0 && addrval == (uintptr_t)0) || endptr == tok_c_str)) { continue; } + // convert tokens[1] to lower case + transform(tokens[1].begin(), tokens[1].end(),tokens[1].begin(), ::tolower ); + if(symMap.find(tokens[2]) != symMap.end()) { symMap[tokens[2]] = tokens[1]; -- GitLab