diff --git a/.gitattributes b/.gitattributes index 8d853a1d4184d2bac1d50fe5975d9a60185ed99e..734cc226315010d3f68197116e6f6bae53e5ed55 100644 --- a/.gitattributes +++ b/.gitattributes @@ -239,6 +239,7 @@ libIRDB/include/core/fileir.hpp -text libIRDB/include/core/function.hpp -text libIRDB/include/core/instruction.hpp -text libIRDB/include/core/pqxxdb.hpp -text +libIRDB/include/core/reloc.hpp -text libIRDB/include/core/variantid.hpp -text libIRDB/include/libIRDB-cfg.hpp -text libIRDB/include/libIRDB-core.hpp -text @@ -261,6 +262,7 @@ libIRDB/src/core/pqxxdb.cpp -text libIRDB/src/core/variantid.cpp -text libIRDB/test/Makefile -text libIRDB/test/calc_conflicts.cpp -text +libIRDB/test/check_thunks.cpp -text libIRDB/test/clone.cpp -text libIRDB/test/create_variant.cpp -text libIRDB/test/create_variantir.cpp -text diff --git a/libIRDB/include/core/file.hpp b/libIRDB/include/core/file.hpp index f0b9d1e2329f9fef192687d32241f543c0a19af3..d6383297483e417bd35683c30d2d7426b7f16489 100644 --- a/libIRDB/include/core/file.hpp +++ b/libIRDB/include/core/file.hpp @@ -4,7 +4,7 @@ class File_t : public BaseObj_t public: // create new item. File_t(db_id_t file_id, db_id_t orig_fid, std::string url, std::string hash, std::string arch, int elfoid, - std::string atn, std::string ftn, std::string itn, db_id_t doipid); + std::string atn, std::string ftn, std::string itn, std::string rtn, db_id_t doipid); File_t(db_id_t file_id) : BaseObj_t(NULL) { assert(0);} // read from DB void WriteToDB() { assert(0); } // writes to DB ID is not -1. @@ -12,6 +12,7 @@ class File_t : public BaseObj_t std::string GetAddressTableName() { return address_table_name; } std::string GetFunctionTableName() { return function_table_name; } std::string GetInstructionTableName() { return instruction_table_name; } + std::string GetRelocationsTableName() { return relocs_table_name; } std::string GetURL() { return url; } void CreateTables(); @@ -23,6 +24,7 @@ class File_t : public BaseObj_t friend class AddressID_t; friend class Instruction_t; friend class VariantID_t; + friend class Relocation_t; @@ -34,5 +36,6 @@ class File_t : public BaseObj_t std::string address_table_name; std::string function_table_name; std::string instruction_table_name; + std::string relocs_table_name; int elfoid; }; diff --git a/libIRDB/include/core/fileir.hpp b/libIRDB/include/core/fileir.hpp index 308a5da1fe67092e42b9eda478aec2aba367762c..4c77987f3c3eaf39248ef8b00996ee74f5db58c1 100644 --- a/libIRDB/include/core/fileir.hpp +++ b/libIRDB/include/core/fileir.hpp @@ -15,6 +15,7 @@ class FileIR_t : public BaseObj_t std::set<Function_t*>& GetFunctions() { return funcs; } std::set<Instruction_t*>& GetInstructions() { return insns; } std::set<AddressID_t*>& GetAddresses() { return addrs; } + std::set<Relocation_t*>& GetRelocations() { return relocs; } // generate the spri rules into the output file, fout. void GenerateSPRI(std::ostream &fout); @@ -37,19 +38,26 @@ class FileIR_t : public BaseObj_t std::set<Function_t*> funcs; std::set<Instruction_t*> insns; std::set<AddressID_t*> addrs; + std::set<Relocation_t*> relocs; VariantID_t progid; File_t* fileptr; std::map<db_id_t,AddressID_t*> ReadAddrsFromDB(); std::map<db_id_t,Function_t*> ReadFuncsFromDB ( - std::map<db_id_t,AddressID_t*> addrMap + std::map<db_id_t,AddressID_t*> &addrMap ); std::map<db_id_t,Instruction_t*> ReadInsnsFromDB ( - std::map<db_id_t,Function_t*> funcMap, - std::map<db_id_t,AddressID_t*> addrMap + std::map<db_id_t,Function_t*> &funcMap, + std::map<db_id_t,AddressID_t*> &addrMap ) ; + void ReadRelocsFromDB + ( + std::map<db_id_t,Instruction_t*> &insnMap + ); + + }; diff --git a/libIRDB/include/core/instruction.hpp b/libIRDB/include/core/instruction.hpp index 1f3513822ea0eb0d192530d92fa87761dd8c0832..2bad0257e60224c4be258bce9198a6783dca96cd 100644 --- a/libIRDB/include/core/instruction.hpp +++ b/libIRDB/include/core/instruction.hpp @@ -40,11 +40,13 @@ class Instruction_t : public BaseObj_t void WriteToDB() { assert(0); } std::string WriteToDB(File_t *fid, db_id_t newid); int Disassemble(DISASM &d); - std::string getDisassembly(); + std::string getDisassembly(); bool Assemble(std::string assembly); bool IsFunctionExit() const; + std::set<Relocation_t*>& GetRelocations() { return relocs; } + static bool SetsStackPointer(DISASM *disasm); static bool SetsStackPointer(ARGTYPE* arg); @@ -59,4 +61,5 @@ class Instruction_t : public BaseObj_t std::string callback; // name of callback handler (if any) std::string comment; AddressID_t* indTarg; + std::set<Relocation_t*> relocs; }; diff --git a/libIRDB/include/core/reloc.hpp b/libIRDB/include/core/reloc.hpp new file mode 100644 index 0000000000000000000000000000000000000000..21d18aa7de70ab9f55a5c745b829bcde398dd3e9 --- /dev/null +++ b/libIRDB/include/core/reloc.hpp @@ -0,0 +1,32 @@ +// An ELF file as represented by the DB +class Relocation_t : public BaseObj_t +{ + public: + + // create new item. + Relocation_t() : BaseObj_t(NULL), offset(0) {} // new reloc w/no data + + // a reloc read from the DB + Relocation_t(db_id_t reloc_id, int _offset, std::string _type) : + BaseObj_t(NULL), offset(_offset), type(_type) { SetBaseID(reloc_id); } + + Relocation_t(db_id_t reloc_id) : BaseObj_t(NULL) { assert(0);} // read from DB + void WriteToDB() { assert(0); } // writes to DB ID is not -1. + std::string WriteToDB(File_t* fid, Instruction_t* insn); // writes to DB, ID is not -1. + + void SetOffset(int off) { offset=off;} + int GetOffset() { return offset; } + void SetType(std::string ty) { type=ty;} + std::string GetType() { return type; } + + friend class FileIR_t; + friend class Function_t; + friend class AddressID_t; + friend class Instruction_t; + friend class VariantID_t; + + private: + int offset; // how far into the instruction the relocation should be applied. + std::string type; // a string that describes the relocation type. for now "32-bit" + // is the only allowable string +}; diff --git a/libIRDB/include/libIRDB-core.hpp b/libIRDB/include/libIRDB-core.hpp index 5b5c46cb71fdf1a302907d36d410eb5db4ea33dd..05064f88cc9d610cdfa6ef5bec70a54d9f433300 100644 --- a/libIRDB/include/libIRDB-core.hpp +++ b/libIRDB/include/libIRDB-core.hpp @@ -14,11 +14,13 @@ namespace libIRDB class VariantID_t; // forward decl for many classes class File_t; // forward decl for many classes +class Instruction_t; // forward decl for many classes #include <core/basetypes.hpp> #include <core/dbinterface.hpp> #include <core/doip.hpp> #include <core/baseobj.hpp> +#include <core/reloc.hpp> #include <core/address.hpp> #include <core/instruction.hpp> #include <core/file.hpp> diff --git a/libIRDB/src/core/file.cpp b/libIRDB/src/core/file.cpp index 247200f3aa8c29f8151bc44089f92353a2ad7d12..df776d585f6939a79be1f956a33ba1807b30c240 100644 --- a/libIRDB/src/core/file.cpp +++ b/libIRDB/src/core/file.cpp @@ -2,13 +2,19 @@ #include <all.hpp> #include <utils.hpp> #include <stdlib.h> +#include <fstream> +#include <iostream> + using namespace libIRDB; +using namespace std; + File_t::File_t(db_id_t myfile_id, db_id_t my_orig_fid, std::string myurl, std::string myhash, std::string myarch, int myoid, - std::string atn, std::string ftn, std::string itn, db_id_t mydoipid) : + std::string atn, std::string ftn, std::string itn, std::string rtn, db_id_t mydoipid) : BaseObj_t(NULL), url(myurl), hash(myhash), arch(myarch), elfoid(myoid), - address_table_name(atn), function_table_name(ftn), instruction_table_name(itn), orig_fid(my_orig_fid) + address_table_name(atn), function_table_name(ftn), instruction_table_name(itn), + relocs_table_name(rtn), orig_fid(my_orig_fid) { SetBaseID(myfile_id); @@ -17,47 +23,27 @@ File_t::File_t(db_id_t myfile_id, db_id_t my_orig_fid, std::string myurl, std::s void File_t::CreateTables() { -/* - * WARNING! If you edit these tables, you must also edit $PEASOUP_HOME/tools/db/*.tbl - */ - - dbintr->IssueQuery( - "CREATE TABLE " + address_table_name + - " ( " - " address_id SERIAL PRIMARY KEY, " - " file_id integer REFERENCES file_info, " - " vaddress_offset integer, " - " doip_id integer DEFAULT -1 " - ");" - ); - - dbintr->IssueQuery( - "CREATE TABLE " + function_table_name + - " ( " - " function_id SERIAL PRIMARY KEY, " - " file_id integer REFERENCES file_info, " - " name text, " - " stack_frame_size integer, " - " doip_id integer DEFAULT -1, " - " out_args_region_size integer, " - " use_frame_pointer integer " - "); " - ); - - dbintr->IssueQuery( - "CREATE TABLE " + instruction_table_name + - " ( " - "instruction_id SERIAL PRIMARY KEY, " - "address_id integer REFERENCES " + address_table_name + ", " + - "parent_function_id integer, " - "orig_address_id integer, " - "fallthrough_address_id integer DEFAULT -1, " - "target_address_id integer DEFAULT -1, " - "data bytea, " - "callback text, " - "comment text, " - "ind_target_address_id integer DEFAULT -1, " - "doip_id integer DEFAULT -1 " - ");" - ); + + // to avoid duplicate schemas for the DB, we're calling + // the script that has the table creation schema in it + string home(getenv("PEASOUP_HOME")); + string tmpfile= "db_script."+to_string(getpid()); + + string command=home+"/tools/db/pdb_create_program_tables.sh "+ + address_table_name+" "+ + function_table_name+" "+ + instruction_table_name+" "+ + relocs_table_name+" "+ + tmpfile; + + system(command.c_str()); + + + std::ifstream t(tmpfile.c_str()); + std::stringstream buffer; + buffer << t.rdbuf(); + + + dbintr->IssueQuery(buffer.str().c_str()); + } diff --git a/libIRDB/src/core/fileir.cpp b/libIRDB/src/core/fileir.cpp index f4937af722a363c3345c41b354e12b2ed953bcbb..9fe0b90c91c298fc7530089a933b411f4dbe75b7 100644 --- a/libIRDB/src/core/fileir.cpp +++ b/libIRDB/src/core/fileir.cpp @@ -51,6 +51,7 @@ void FileIR_t::ReadFromDB() std::map<db_id_t,AddressID_t*> addrMap=ReadAddrsFromDB(); std::map<db_id_t,Function_t*> funcMap=ReadFuncsFromDB(addrMap); std::map<db_id_t,Instruction_t*> insnMap=ReadInsnsFromDB(funcMap,addrMap); + ReadRelocsFromDB(insnMap); UpdateEntryPoints(insnMap); } @@ -58,7 +59,7 @@ void FileIR_t::ReadFromDB() std::map<db_id_t,Function_t*> FileIR_t::ReadFuncsFromDB ( - std::map<db_id_t,AddressID_t*> addrMap + std::map<db_id_t,AddressID_t*> &addrMap ) { std::map<db_id_t,Function_t*> idMap; @@ -144,8 +145,8 @@ std::map<db_id_t,AddressID_t*> FileIR_t::ReadAddrsFromDB std::map<db_id_t,Instruction_t*> FileIR_t::ReadInsnsFromDB ( - std::map<db_id_t,Function_t*> funcMap, - std::map<db_id_t,AddressID_t*> addrMap + std::map<db_id_t,Function_t*> &funcMap, + std::map<db_id_t,AddressID_t*> &addrMap ) { std::map<db_id_t,Instruction_t*> idMap; @@ -224,6 +225,36 @@ std::map<db_id_t,Instruction_t*> FileIR_t::ReadInsnsFromDB return idMap; } +void FileIR_t::ReadRelocsFromDB + ( + std::map<db_id_t,Instruction_t*> &insnMap + ) +{ + std::string q= "select * from " + fileptr->relocs_table_name + " ; "; + dbintr->IssueQuery(q); + + while(!dbintr->IsDone()) + { + db_id_t reloc_id=atoi(dbintr->GetResultColumn("reloc_id").c_str()); + int reloc_offset=atoi(dbintr->GetResultColumn("reloc_offset").c_str()); + std::string reloc_type=(dbintr->GetResultColumn("reloc_type")); + db_id_t instruction_id=atoi(dbintr->GetResultColumn("instruction_id").c_str()); + db_id_t doipid=atoi(dbintr->GetResultColumn("doip_id").c_str()); + + Relocation_t *reloc=new Relocation_t(reloc_id,reloc_offset,reloc_type); + + assert(insnMap[instruction_id]!=NULL); + + insnMap[instruction_id]->GetRelocations().insert(reloc); + relocs.insert(reloc); + + dbintr->MoveToNextRow(); + } + +} + + + void FileIR_t::WriteToDB() { @@ -235,6 +266,7 @@ void FileIR_t::WriteToDB() dbintr->IssueQuery(string("TRUNCATE TABLE ")+ fileptr->instruction_table_name + string(" cascade;")); dbintr->IssueQuery(string("TRUNCATE TABLE ")+ fileptr->function_table_name + string(" cascade;")); dbintr->IssueQuery(string("TRUNCATE TABLE ")+ fileptr->address_table_name + string(" cascade;")); + dbintr->IssueQuery(string("TRUNCATE TABLE ")+ fileptr->relocs_table_name + string(" cascade;")); /* and now that everything has an ID, let's write to the DB */ string q=string(""); @@ -272,11 +304,11 @@ void FileIR_t::WriteToDB() q=string(""); } } + dbintr->IssueQuery(q); } - void FileIR_t::SetBaseIDS() { #define MAX(a,b) (((a)>(b)) ? (a) : (b)) @@ -289,6 +321,8 @@ void FileIR_t::SetBaseIDS() j=MAX(j,(*i)->GetBaseID()); for(std::set<Instruction_t*>::const_iterator i=insns.begin(); i!=insns.end(); ++i) j=MAX(j,(*i)->GetBaseID()); + for(std::set<Relocation_t*>::const_iterator i=relocs.begin(); i!=relocs.end(); ++i) + j=MAX(j,(*i)->GetBaseID()); /* increment past the max ID so we don't duplicate */ j++; @@ -303,4 +337,21 @@ void FileIR_t::SetBaseIDS() for(std::set<Instruction_t*>::const_iterator i=insns.begin(); i!=insns.end(); ++i) if((*i)->GetBaseID()==NOT_IN_DATABASE) (*i)->SetBaseID(j++); + for(std::set<Relocation_t*>::const_iterator i=relocs.begin(); i!=relocs.end(); ++i) + if((*i)->GetBaseID()==NOT_IN_DATABASE) + (*i)->SetBaseID(j++); +} + +std::string Relocation_t::WriteToDB(File_t* fid, Instruction_t* myinsn) +{ + string q; + q ="insert into " + fid->relocs_table_name; + q+="(reloc_id,reloc_offset,reloc_type,instruction_id,doip_id) "+ + string(" VALUES (") + + string("'") + to_string(GetBaseID()) + string("', ") + + string("'") + to_string(offset) + string("', ") + + string("'") + (type) + string("', ") + + string("'") + to_string(myinsn->GetBaseID()) + string("', ") + + string("'") + to_string(GetDoipID()) + string("') ; ") ; + return q; } diff --git a/libIRDB/src/core/generate_spri.cpp b/libIRDB/src/core/generate_spri.cpp index fa6a79db16284caa230bc3d251198c19ab64a126..27fd5c8e3a40d2e5822504fde50fcb81001cdc15 100644 --- a/libIRDB/src/core/generate_spri.cpp +++ b/libIRDB/src/core/generate_spri.cpp @@ -13,6 +13,10 @@ using namespace libIRDB; using namespace std; +// forward decls for this file +static string qualified_addressify(FileIR_t* fileIRp, Instruction_t *insn); +static string labelfy(Instruction_t* insn); + // // the set of insturctions that have control @@ -51,12 +55,41 @@ static int needs_short_branch_rewrite(const DISASM &disasm) // // create a label for the given instruction // +static string qualified_labelfy(FileIR_t* fileIRp, Instruction_t* insn) +{ + if(!needs_spri_rule(insn, insnMap[insn])) + return qualified_addressify(fileIRp, insn); + + return labelfy(insn); +} + +static int label_offset=0; + +static void update_label_offset(FileIR_t *firp) +{ + int max=0; + for(set<Instruction_t*>::iterator it=firp->GetInstructions().begin(); + it!=firp->GetInstructions().end(); + ++it) + { + Instruction_t *insn=*it; + if(insn->GetBaseID()>max) + max=insn->GetBaseID()+100; + } + label_offset+=max; +} + +static int IDToSPRIID(int id) +{ + return id+label_offset; +} + static string labelfy(Instruction_t* insn) { if(!needs_spri_rule(insn, insnMap[insn])) return addressify(insn); - return string("Label_insn_") + to_string(insn->GetBaseID()); + return string("LI_") + to_string(IDToSPRIID(insn->GetBaseID())); } @@ -78,12 +111,49 @@ static string addressify(Instruction_t* insn) } +static string URLToFile(string url) +{ + int loc=0; + + loc=url.find('/'); + while(loc!=string::npos) + { + url=url.substr(loc+1,url.length()-loc-1); + + loc=url.find('/'); + } + // maybe need to check filename for odd characters + + return url; +} + +static string qualify(FileIR_t* fileIRp) +{ + return URLToFile(fileIRp->GetFile()->GetURL()) + "+" ; +} + +static string qualify_address(FileIR_t* fileIRp, int addr) +{ + stringstream ss; + ss<< qualify(fileIRp) << "0x" << std::hex << (addr); + return ss.str(); +} + +static string qualified_addressify(FileIR_t* fileIRp, Instruction_t *insn) +{ + string address=addressify(insn); + if(address.c_str()[0]=='0') + return qualify(fileIRp) + address; + return address; + +} + static string get_short_branch_label(Instruction_t *newinsn) { if (!newinsn) return string(""); else - return "short_jump_" + labelfy(newinsn); + return "sj_" + labelfy(newinsn); } static string getPostCallbackLabel(Instruction_t *newinsn) @@ -91,13 +161,19 @@ static string getPostCallbackLabel(Instruction_t *newinsn) if (!newinsn) return string(""); else - return "post_callback_" + labelfy(newinsn); + return "pcb_" + labelfy(newinsn); +} + + +static void emit_relocation(FileIR_t* fileIRp, ostream& fout, int offset, string type, Instruction_t* insn) +{ + fout<<"\t"<<labelfy(insn)<<" rl " << offset << " "<< type << " " << URLToFile(fileIRp->GetFile()->GetURL()) <<endl; } // // emit this instruction as spri code. // -static string emit_spri_instruction(Instruction_t *newinsn, ostream& fout) +static string emit_spri_instruction(FileIR_t* fileIRp, Instruction_t *newinsn, ostream& fout) { string original_target; Instruction_t* old_insn=insnMap[newinsn]; @@ -166,6 +242,7 @@ static string emit_spri_instruction(Instruction_t *newinsn, ostream& fout) /* if we have a target instruction in the database */ if(newinsn->GetTarget() || needs_short_branch_rewrite(disasm)) { +// assert(0); // may need relocation info if target() is in the DB, but not being rewritten /* change the target to be symbolic */ /* first get the new target */ @@ -189,16 +266,25 @@ static string emit_spri_instruction(Instruction_t *newinsn, ostream& fout) /* and build up a new string that has the label of the target instead of the address */ string final=complete_instr.substr(0,start) + new_target + complete_instr.substr(start+address_string.length()); + /* sanity, no segment registers for absolute mode */ assert(disasm.Argument1.SegmentReg==0); - fout<<final; + fout<<final<<endl; + + if (new_target.c_str()[0]=='0') + { + // if we're jumping to an absolute address vrs a label, we will need a relocation for this jump instruction + emit_relocation(fileIRp, fout,1,"32-bit",newinsn); + } } else /* this instruction has a target, but it's not in the DB */ { /* so we'll just emit the instruction and let it go back to the application text. */ - fout<<complete_instr; + fout<<complete_instr<<endl; +// needs relocation info. + assert(0); } } else @@ -266,10 +352,14 @@ static string emit_spri_instruction(Instruction_t *newinsn, ostream& fout) } fout<<disasm.CompleteInstr; + fout<<endl; } - fout<<endl; - + for(set<Relocation_t*>::iterator it=newinsn->GetRelocations().begin(); it!=newinsn->GetRelocations().end(); ++it) + { + Relocation_t* this_reloc=*it; + emit_relocation(fileIRp, fout, this_reloc->GetOffset(),this_reloc->GetType(), newinsn); + } return original_target; } @@ -336,15 +426,9 @@ static bool needs_spri_rule(Instruction_t* newinsn,Instruction_t* oldinsn) // // emit the spri rule to redirect this instruction. // -static void emit_spri_rule(Instruction_t* newinsn, ostream& fout) +static void emit_spri_rule(FileIR_t* fileIRp, Instruction_t* newinsn, ostream& fout) { -#if 0 -We need to emit a rule of this form - L_insn_id -> . - . ** data bits with pc rel address taken care of. - . -> fallthrough label -#endif Instruction_t* old_insn=insnMap[newinsn]; @@ -361,28 +445,28 @@ We need to emit a rule of this form unmoved_insn_targets.find(newinsn) != unmoved_insn_targets.end() ) { - fout << addressify(newinsn) <<" -> ."<<endl; + fout << qualified_addressify(fileIRp, newinsn) <<" -> ."<<endl; } else { fout << "# eliding, no indirect targets"<<endl; - fout << addressify(newinsn) <<" -> 0x0 " <<endl; + fout << qualified_addressify(fileIRp, newinsn) <<" -> 0x0 " <<endl; } } else if (newinsn->GetIndirectBranchTargetAddress()) { - fout << "0x" << std::hex << newinsn->GetIndirectBranchTargetAddress()->GetVirtualOffset() <<" -> ."<<endl; + fout << qualify_address(fileIRp,newinsn->GetIndirectBranchTargetAddress()->GetVirtualOffset()) <<" -> ."<<endl; fout << ". -> "<< getPostCallbackLabel(newinsn) <<endl; } - string original_target=emit_spri_instruction(newinsn, fout); + string original_target=emit_spri_instruction(fileIRp, newinsn, fout); /* if there's a fallthrough instruction, jump to it. */ if(newinsn->GetFallthrough()) { - fout << ". -> " << labelfy(newinsn->GetFallthrough())<<endl; + fout << ". -> " << qualified_labelfy(fileIRp,newinsn->GetFallthrough())<<endl; } else { @@ -399,7 +483,7 @@ We need to emit a rule of this form { assert(old_insn); /* it's an error to insert a new, non-unconditional branch instruction * and not specify it's fallthrough */ - fout << ". -> 0x" << std::hex << old_insn->GetAddress()->GetVirtualOffset()+instr_len <<endl; + fout << ". -> " << qualify(fileIRp)<< "0x" << std::hex << old_insn->GetAddress()->GetVirtualOffset()+instr_len <<endl; } } @@ -410,6 +494,9 @@ We need to emit a rule of this form */ if(!original_target.empty()) { + /* qualify this target if necessary */ + if(original_target.c_str()[0]=='0') + original_target=qualify(fileIRp)+original_target; fout << "\t" << get_short_branch_label(newinsn) << "\t -> \t " << original_target << endl; } @@ -420,7 +507,7 @@ We need to emit a rule of this form // // generate a map from new instructions to old instructions // -static void generate_insn_to_insn_maps(FileIR_t *varirp, FileIR_t *orig_varirp) +static void generate_insn_to_insn_maps(FileIR_t *fileIRp, FileIR_t *orig_fileIRp) { static map<Instruction_t*,Instruction_t*> new_insnMap; insnMap=new_insnMap; // re-init the global instruction map. @@ -435,8 +522,8 @@ static void generate_insn_to_insn_maps(FileIR_t *varirp, FileIR_t *orig_varirp) /* loop through each insn in the original program */ for( - std::set<Instruction_t*>::const_iterator it=orig_varirp->GetInstructions().begin(); - it!=orig_varirp->GetInstructions().end(); + std::set<Instruction_t*>::const_iterator it=orig_fileIRp->GetInstructions().begin(); + it!=orig_fileIRp->GetInstructions().end(); ++it ) { @@ -458,8 +545,8 @@ static void generate_insn_to_insn_maps(FileIR_t *varirp, FileIR_t *orig_varirp) /* loop through the new variant and create the final mapping of new insn to old insn */ for( - std::set<Instruction_t*>::const_iterator it=varirp->GetInstructions().begin(); - it!=varirp->GetInstructions().end(); + std::set<Instruction_t*>::const_iterator it=fileIRp->GetInstructions().begin(); + it!=fileIRp->GetInstructions().end(); ++it ) { @@ -485,15 +572,31 @@ static void generate_insn_to_insn_maps(FileIR_t *varirp, FileIR_t *orig_varirp) // void FileIR_t::GenerateSPRI(ostream &fout) { - if(orig_variant_ir_p==NULL) + VariantID_t orig_varidp(progid.GetOriginalVariantID()); + assert(orig_varidp.IsRegistered()==true); + + for( + set<File_t*>::iterator it=orig_varidp.GetFiles().begin(); + it!=orig_varidp.GetFiles().end(); + ++it + ) { - VariantID_t orig_varidp(progid.GetOriginalVariantID()); - assert(orig_varidp.IsRegistered()==true); - orig_variant_ir_p=new FileIR_t(orig_varidp); + File_t* the_file=*it; + + if(the_file->GetBaseID()==fileptr->orig_fid) + { + fout <<"# Generating spri for "<< the_file->GetURL()<<endl; + + orig_variant_ir_p=new FileIR_t(orig_varidp,the_file); + this->GenerateSPRI(orig_variant_ir_p,fout); + delete orig_variant_ir_p; + orig_variant_ir_p=NULL; + } + } - this->GenerateSPRI(orig_variant_ir_p,fout); + } @@ -501,11 +604,11 @@ void FileIR_t::GenerateSPRI(ostream &fout) // generate_unmoved_insn_targets_set -- create the set of insturctions that have control // transfers to them (including fallthrough type control transfers) from instructions that do not need a spri rule. // -static void generate_unmoved_insn_targets_set(FileIR_t* varirp) +static void generate_unmoved_insn_targets_set(FileIR_t* fileIRp) { for( - set<Instruction_t*>::const_iterator it=varirp->GetInstructions().begin(); - it!=varirp->GetInstructions().end(); + set<Instruction_t*>::const_iterator it=fileIRp->GetInstructions().begin(); + it!=fileIRp->GetInstructions().end(); ++it ) { @@ -527,27 +630,27 @@ static void generate_unmoved_insn_targets_set(FileIR_t* varirp) } -void FileIR_t::GenerateSPRI(FileIR_t *orig_varirp, ostream &fout) +void FileIR_t::GenerateSPRI(FileIR_t *orig_fileIRp, ostream &fout) { // give 'this' a name - FileIR_t *varirp=this; + FileIR_t *fileIRp=this; SetBaseIDS(); // need unique ID to generate unique label name // generate the map from new instruction to old instruction needed for this transform. - generate_insn_to_insn_maps(varirp, orig_varirp); + generate_insn_to_insn_maps(fileIRp, orig_fileIRp); - // generate unmoved_insn_targets_set -- the set of insturctions that have control + // generate unmoved_insn_targets_set -- the set of instructions that have control // transfers to them (including fallthrough type control transfers) from instructions that do not need a spri rule. - generate_unmoved_insn_targets_set(varirp); + generate_unmoved_insn_targets_set(fileIRp); // // for each instruction, compare the new instruction with the original instruction and see if // they are the same. If so, do nothing, otherwise emit a rewrite rule for this instruction. // for( - std::set<Instruction_t*>::const_iterator it=varirp->GetInstructions().begin(); - it!=varirp->GetInstructions().end(); + std::set<Instruction_t*>::const_iterator it=fileIRp->GetInstructions().begin(); + it!=fileIRp->GetInstructions().end(); ++it ) { @@ -558,9 +661,10 @@ void FileIR_t::GenerateSPRI(FileIR_t *orig_varirp, ostream &fout) if(needs_spri_rule(newinsn,oldinsn)) { - emit_spri_rule(newinsn,fout); + emit_spri_rule(fileIRp,newinsn,fout); } } + update_label_offset(fileIRp); } diff --git a/libIRDB/src/core/instruction.cpp b/libIRDB/src/core/instruction.cpp index 14529259fa06cf9ca22635e2f8026bb8866c18b6..66d754c11de0e6a23bf60814d3f4cd20d379b2d6 100644 --- a/libIRDB/src/core/instruction.cpp +++ b/libIRDB/src/core/instruction.cpp @@ -173,6 +173,13 @@ string Instruction_t::WriteToDB(File_t *fid, db_id_t newid) string("'") + to_string(indirect_bt_id) + string("', ") + string("'") + to_string(GetDoipID()) + string("') ; ") ; + // for each relocation in this instruction + for(set<Relocation_t*>::iterator it=relocs.begin(); it!=relocs.end(); ++it) + { + Relocation_t* reloc=*it; + q+=reloc->WriteToDB(fid,this); + } + return q; } diff --git a/libIRDB/src/core/variantid.cpp b/libIRDB/src/core/variantid.cpp index a839ffb79c7b48421d95ddc3e2f480a53b769006..0608b7bd3cf135972235cee134ba582d4c588c06 100644 --- a/libIRDB/src/core/variantid.cpp +++ b/libIRDB/src/core/variantid.cpp @@ -15,62 +15,13 @@ VariantID_t::VariantID_t() : schema_ver=CURRENT_SCHEMA; orig_pid=-1; name=""; -#if 0 - address_table_name=""; - function_table_name=""; - instruction_table_name=""; -#endif } void VariantID_t::CreateTables() { -assert(0); -#if 0 -/* - * WARNING! If you edit these tables, you must also edit $PEASOUP_HOME/tools/db/*.tbl - */ - - dbintr->IssueQuery( - "CREATE TABLE " + address_table_name + - " ( " - " address_id SERIAL PRIMARY KEY, " - " file_id integer REFERENCES file_info, " - " vaddress_offset integer, " - " doip_id integer DEFAULT -1 " - ");" - ); - - dbintr->IssueQuery( - "CREATE TABLE " + function_table_name + - " ( " - " function_id SERIAL PRIMARY KEY, " - " file_id integer REFERENCES file_info, " - " name text, " - " stack_frame_size integer, " - " doip_id integer DEFAULT -1, " - " out_args_region_size integer, " - " use_frame_pointer integer " - "); " - ); - - dbintr->IssueQuery( - "CREATE TABLE " + instruction_table_name + - " ( " - "instruction_id SERIAL PRIMARY KEY, " - "address_id integer REFERENCES " + address_table_name + ", " + - "parent_function_id integer, " - "orig_address_id integer, " - "fallthrough_address_id integer DEFAULT -1, " - "target_address_id integer DEFAULT -1, " - "data bytea, " - "callback text, " - "comment text, " - "ind_target_address_id integer DEFAULT -1, " - "doip_id integer DEFAULT -1 " - ");" - ); -#endif + // note: this tables are now part of File_t. + assert(0); } VariantID_t::VariantID_t(db_id_t pid) : BaseObj_t(NULL) @@ -214,6 +165,7 @@ File_t* VariantID_t::CloneFile(File_t* fptr) std::string atn="atnfid"+to_string(newfid); std::string ftn="ftnfid"+to_string(newfid); std::string itn="itnfid"+to_string(newfid); + std::string rtn="rtnfid"+to_string(newfid); q ="update file_info set address_table_name='"; q+=atn; @@ -221,13 +173,16 @@ File_t* VariantID_t::CloneFile(File_t* fptr) q+=ftn; q+="', instruction_table_name='"; q+=itn; + q+="', relocs_table_name='"; + q+=rtn; q+="' where file_id='"; q+=to_string(newfid); q+="' ; "; dbintr->IssueQuery(q); - File_t* newfile=new File_t(newfid, fptr->orig_fid, fptr->url, fptr->hash, fptr->arch, fptr->elfoid, atn, ftn, itn, fptr->GetDoipID()); + File_t* newfile=new File_t(newfid, fptr->orig_fid, fptr->url, fptr->hash, fptr->arch, fptr->elfoid, + atn, ftn, itn, rtn, fptr->GetDoipID()); newfile->CreateTables(); @@ -247,6 +202,11 @@ File_t* VariantID_t::CloneFile(File_t* fptr) q+=" ; "; dbintr->IssueQuery(q); + q="drop table "; + q+=rtn; + q+=" ; "; + dbintr->IssueQuery(q); + // next issue SQL to clone each table q="select * into "; @@ -270,6 +230,15 @@ File_t* VariantID_t::CloneFile(File_t* fptr) q+=" ;"; dbintr->IssueQuery(q); + q="select * into "; + q+=rtn; + q+=" from "; + q+=fptr->relocs_table_name; + q+=" ;"; + dbintr->IssueQuery(q); + + // update the variant dependency table to represent the deep clone + // update the variant dependency table to represent the deep clone q = "update variant_dependency set file_id='" + to_string(newfid) + @@ -307,11 +276,6 @@ std::ostream& libIRDB::operator<<(std::ostream& out, const VariantID_t& pid) "schema="<<pid.schema_ver<<":" "orig_pid="<<pid.orig_pid<<":" "name="<<pid.name<<":" -#if 0 - "ATN="<<pid.address_table_name<<":" - "FTN="<<pid.function_table_name<<":" - "ITN="<<pid.instruction_table_name<< -#endif ")" ; return out; } @@ -322,11 +286,6 @@ void VariantID_t::DropFromDB() assert(IsRegistered()); string q; -#if 0 - q =string("drop table ")+instruction_table_name + string(" cascade;"); - q+=string("drop table ")+address_table_name + string(" cascade;"); - q+=string("drop table ")+function_table_name + string(" cascade;"); -#endif q+=string("delete from variant_dependency where variant_id = '") + to_string(GetBaseID()) + string("';"); q+=string("delete from variant_info where variant_id = '") + to_string(GetBaseID()) + string("';"); @@ -334,9 +293,6 @@ void VariantID_t::DropFromDB() SetBaseID(NOT_IN_DATABASE); orig_pid=NOT_IN_DATABASE; -#if 0 - name=instruction_table_name=address_table_name=function_table_name=string(""); -#endif schema_ver=CURRENT_SCHEMA; } @@ -363,7 +319,7 @@ void VariantID_t::ReadFilesFromDB() { std::string q= "select file_info.orig_file_id, file_info.address_table_name, file_info.instruction_table_name, " - " file_info.function_table_name, file_info.file_id, file_info.url, file_info.hash," + " file_info.function_table_name, file_info.relocs_table_name, file_info.file_id, file_info.url, file_info.hash," " file_info.arch, file_info.type, file_info.elfoid, file_info.doip_id " " from file_info,variant_dependency " " where variant_dependency.variant_id = '" + to_string(GetBaseID()) + "' AND " @@ -385,10 +341,11 @@ void VariantID_t::ReadFilesFromDB() std::string atn=(BaseObj_t::dbintr->GetResultColumn("address_table_name")); std::string ftn=(BaseObj_t::dbintr->GetResultColumn("function_table_name")); std::string itn=(BaseObj_t::dbintr->GetResultColumn("instruction_table_name")); + std::string rtn=(BaseObj_t::dbintr->GetResultColumn("relocs_table_name")); - File_t *newfile=new File_t(file_id,orig_fid,url,hash,type,oid,atn,ftn,itn,doipid); + File_t *newfile=new File_t(file_id,orig_fid,url,hash,type,oid,atn,ftn,itn,rtn,doipid); //std::cout<<"Found file "<<file_id<<"."<<std::endl; diff --git a/libIRDB/test/Makefile b/libIRDB/test/Makefile index c83f7114d37cb8b136b6b136f2fef07f1e378eac..dac0ffcb032a837e43dd6466b7f92720e3da26f1 100644 --- a/libIRDB/test/Makefile +++ b/libIRDB/test/Makefile @@ -11,8 +11,8 @@ $(PROGS): ../lib/libIRDB-core.a ../lib/libIRDB-cfg.a read_ehframe.exe: unwind-pe.h g++ -g read_ehframe.cpp -DTEST -I../include/ -I../../beaengine/include -L ../lib/ -lIRDB-core -lIRDB-cfg -lpqxx -L ../../beaengine/lib/Linux.gnu.Debug -lBeaEngine_s_d -o $@ -fill_in_indtargs.exe: read_ehframe.o fill_in_indtargs.o - g++ -g fill_in_indtargs.o read_ehframe.o -I../include/ -I../../beaengine/include -L ../lib/ -lIRDB-core -lIRDB-cfg -lpqxx -L ../../beaengine/lib/Linux.gnu.Debug -lBeaEngine_s_d -o $@ +fill_in_indtargs.exe: read_ehframe.o fill_in_indtargs.o check_thunks.o + g++ -g fill_in_indtargs.o read_ehframe.o check_thunks.o -I../include/ -I../../beaengine/include -L ../lib/ -lIRDB-core -lIRDB-cfg -lpqxx -L ../../beaengine/lib/Linux.gnu.Debug -lBeaEngine_s_d -o $@ .o.exe: $< ../lib/libIRDB-core.a ../lib/libIRDB-cfg.a diff --git a/libIRDB/test/check_thunks.cpp b/libIRDB/test/check_thunks.cpp new file mode 100644 index 0000000000000000000000000000000000000000..be3156e66fdc4cea2ef250369557e8a5b9ebb312 --- /dev/null +++ b/libIRDB/test/check_thunks.cpp @@ -0,0 +1,369 @@ + + + + + +#include <libIRDB-core.hpp> +#include <libIRDB-cfg.hpp> +#include <utils.hpp> +#include <iostream> +#include <stdlib.h> +#include "beaengine/BeaEngine.h" +#include <assert.h> +#include <string.h> +#include <elf.h> + + + + +using namespace libIRDB; +using namespace std; + +#define HIWORD(a) ((a)&0xFFFF0000) + + +/* + * check_for_thunk_offsets - check non-function thunks for extra offsets + */ +void check_for_thunk_offsets(FileIR_t* firp, Instruction_t *thunk_insn, string reg, string offset) +{ + + void possible_target(int p); + + + int thunk_base=thunk_insn->GetFallthrough()->GetAddress()->GetVirtualOffset()+ + strtol(offset.c_str(),NULL,16); + int thunk_call_addr=thunk_insn->GetAddress()->GetVirtualOffset(); + int thunk_call_offset=strtol(offset.c_str(),NULL,16); + + + /* don't check inserted thunk addresses */ + if(thunk_insn->GetAddress()->GetVirtualOffset()==0) + return; + + for( + set<Instruction_t*>::iterator it=firp->GetInstructions().begin(); + it!=firp->GetInstructions().end(); + ++it + ) + { + // if it has a targ and fallthrough (quick test) it might be a call + Instruction_t* insn=*it; + DISASM d; + insn->Disassemble(d); + + if(string(d.Instruction.Mnemonic)==string("add ")) + { + // check that arg2 is a constant + if(HIWORD(d.Argument2.ArgType)!=CONSTANT_TYPE+ABSOLUTE_) + continue; + + string add_offset=string(d.Argument2.ArgMnemonic); + + int addoff=strtol(add_offset.c_str(),NULL,16); + + /* bounds check gently */ + if(0<addoff && addoff<100) + continue; + + /* record that there's a possible target here */ + cout <<"Possible thunk target (add): call:"<<thunk_call_addr<<" offset:"<<thunk_call_offset + <<" addoff: " << addoff << " total: "<< (thunk_base+addoff)<<endl; + possible_target(thunk_base+addoff); + } + else if(string(d.Instruction.Mnemonic)==string("lea ")) + { + assert (d.Argument2.ArgType==MEMORY_TYPE); + + /* no indexing please! */ + if(d.Argument2.Memory.IndexRegister!=0) + continue; + + int leaoff=d.Argument2.Memory.Displacement; + + /* bounds check gently */ + if(0<leaoff && leaoff<100) + continue; + + /* record that there's a possible target here */ + cout <<"Possible thunk target (lea): call:"<<thunk_call_addr<<" offset:"<<thunk_call_offset + <<" leaoff: " << leaoff << " total: "<< (thunk_base+leaoff)<<endl; + possible_target(thunk_base+leaoff); + + } + + } +} + + +/* + * check_func_for_thunk_offsets - we know that insn represents a thunk call, with reg+offset as the constant. + * check the rest of the function for offsets that might help form a code pointer. + */ +void check_func_for_thunk_offsets(Function_t *func, Instruction_t* thunk_insn, + string reg, string offset) +{ + + void possible_target(int p); + + + int thunk_base=thunk_insn->GetFallthrough()->GetAddress()->GetVirtualOffset()+ + strtol(offset.c_str(),NULL,16); + int thunk_call_addr=thunk_insn->GetAddress()->GetVirtualOffset(); + int thunk_call_offset=strtol(offset.c_str(),NULL,16); + + + /* don't check inserted thunk addresses */ + if(thunk_insn->GetAddress()->GetVirtualOffset()==0) + return; + + for( + set<Instruction_t*>::iterator it=func->GetInstructions().begin(); + it!=func->GetInstructions().end(); + ++it + ) + { + // if it has a targ and fallthrough (quick test) it might be a call + Instruction_t* insn=*it; + DISASM d; + insn->Disassemble(d); + + if(string(d.Instruction.Mnemonic)==string("add ")) + { + // check that arg2 is a constant + if(HIWORD(d.Argument2.ArgType)!=CONSTANT_TYPE+ABSOLUTE_) + continue; + + string add_offset=string(d.Argument2.ArgMnemonic); + + int addoff=strtol(add_offset.c_str(),NULL,16); + + /* bounds check gently */ + if(0<addoff && addoff<100) + continue; + + /* record that there's a possible target here */ + cout <<"Possible thunk target (add): call:"<<thunk_call_addr<<" offset:"<<thunk_call_offset + <<" addoff: " << addoff << " total: "<< (thunk_base+addoff)<<endl; + possible_target(thunk_base+addoff); + } + else if(string(d.Instruction.Mnemonic)==string("lea ")) + { + assert (d.Argument2.ArgType==MEMORY_TYPE); + + /* no indexing please! */ + if(d.Argument2.Memory.IndexRegister!=0) + continue; + + int leaoff=d.Argument2.Memory.Displacement; + + /* bounds check gently */ + if(0<leaoff && leaoff<100) + continue; + + /* record that there's a possible target here */ + cout <<"Possible thunk target (lea): call:"<<thunk_call_addr<<" offset:"<<thunk_call_offset + <<" leaoff: " << leaoff << " total: "<< (thunk_base+leaoff)<<endl; + possible_target(thunk_base+leaoff); + + } + + } +} + + +/* + * is_thunk_load - look for a mov reg<[esp], return reg in output parameter. + */ +bool is_thunk_load(Instruction_t* insn, string ®) +{ + DISASM d; + insn->Disassemble(d); + + if(string(d.Instruction.Mnemonic)!=string("mov ")) + return false; + + if(d.Argument2.ArgType!=MEMORY_TYPE || string(d.Argument2.ArgMnemonic)!=string("esp")) + return false; + + reg=string(d.Argument1.ArgMnemonic); + return true; +} + +/* + * is_ret - return trun if insn is a return + */ +bool is_ret(Instruction_t* insn) +{ + DISASM d; + insn->Disassemble(d); + + if(d.Instruction.BranchType!=RetType) + return false; + + return true; +} + + +/* + * is_thunk_call - check if this instruction is a call to a thunk function, return the thunk function's reg. + */ +/* note: reg is output paramater */ +bool is_thunk_call(Instruction_t* insn, string ®) +{ + DISASM d; + insn->Disassemble(d); + + /* not a call */ + if(d.Instruction.BranchType!=CallType) + return false; + + /* no target in IRDB */ + if(insn->GetTarget()==NULL) + return false; + + /* Target not the right type of load */ + if(!is_thunk_load(insn->GetTarget(),reg)) + return false; + + /* target has no FT? */ + if(!insn->GetTarget()->GetFallthrough()) + return false; + + /* target's FT is a return insn */ + if(!is_ret(insn->GetTarget()->GetFallthrough())) + return false; + + return true; +} + +/* + * is_thunk_add - Check the given instruction for an add of reg, return the constant K1 + */ +/* note: offset is an output parameter */ +bool is_thunk_add(Instruction_t *insn, string reg, string &offset) +{ + DISASM d; + insn->Disassemble(d); + + // make sure it's an add instruction + if(string(d.Instruction.Mnemonic)!=string("add ")) + return false; + + // check that it's an add of the proper reg + if(string(d.Argument1.ArgMnemonic)!=reg) + return false; + + // check that arg2 is a constant + if(HIWORD(d.Argument2.ArgType)!=CONSTANT_TYPE+ABSOLUTE_) + return false; + + offset=string(d.Argument2.ArgMnemonic); + + int intoff=strtol(offset.c_str(),NULL,16); + + /* bounds check gently */ + if(0<intoff && intoff<100) + return false; + + return true; +} + +/* + * check_func_for_thunk_calls - check this function for a thunk call (see check_for_thunks for description of thunk calls) + */ +void check_func_for_thunk_calls(Function_t* func) +{ + // for each insn in the func + for( + set<Instruction_t*>::iterator it=func->GetInstructions().begin(); + it!=func->GetInstructions().end(); + ++it + ) + { + // if it has a targ and fallthrough (quick test) it might be a call + Instruction_t* insn=*it; + /* check if we might be calling a thunk */ + if(insn->GetFallthrough() && insn->GetTarget()) + { + + // check for a call, followed by an add of reg (note the output params of reg and offset) + string reg,offset; + if(is_thunk_call(insn,reg) && + is_thunk_add(insn->GetFallthrough(),reg,offset)) + { + check_func_for_thunk_offsets(func,insn,reg,offset); + } + } + } +} + + + +void check_non_funcs_for_thunks(FileIR_t *firp) +{ + // for each insn in the func + for( + set<Instruction_t*>::iterator it=firp->GetInstructions().begin(); + it!=firp->GetInstructions().end(); + ++it + ) + { + // if it has a targ and fallthrough (quick test) it might be a call + Instruction_t* insn=*it; + + /* these instructions/thunks are checked with the functions */ +#if 0 + if(insn->GetFunction()) + continue; +#endif + + /* check if we might be calling a thunk */ + if(insn->GetFallthrough() && insn->GetTarget()) + { + + // check for a call, followed by an add of reg (note the output params of reg and offset) + string reg,offset; + if(is_thunk_call(insn,reg) && + is_thunk_add(insn->GetFallthrough(),reg,offset)) + { + check_for_thunk_offsets(firp,insn,reg,offset); + } + } + } +} + + +/* + * check_for_thunks - + * + * check the program (file) for this pattern: + * + * call ebx_thunk + * L1: add K1,%ebx + * + * ebx_thunk: mov ebx <- [esp] + * ret + * + * If found, check the function for L1+K1+K2 (where K2 is any constant in the function) + * If L1+k1+k2 is found, and points at a code address (outside this function?), mark it as an indirect branch target. + * + */ +void check_for_thunks(FileIR_t* firp) +{ +// this doesn't work for now. and consequently, ILR won't work with -fPIC. need to fix for shared libs. +return; + for( + set<Function_t*>::iterator it=firp->GetFunctions().begin(); + it!=firp->GetFunctions().end(); + ++it + ) + { + Function_t* func=*it; + check_func_for_thunk_calls(func); + + } + + check_non_funcs_for_thunks(firp); +} + diff --git a/libIRDB/test/fill_in_indtargs.cpp b/libIRDB/test/fill_in_indtargs.cpp index 28ca0ad43041fef0da941e7aaf90bb7ddc686f37..5eb634716b09fbf553f39adfb0af0486a5ecfc6b 100644 --- a/libIRDB/test/fill_in_indtargs.cpp +++ b/libIRDB/test/fill_in_indtargs.cpp @@ -276,11 +276,11 @@ void print_targets() */ void add_num_handle_fn_watches(FileIR_t * firp) { - /* Loop over the set of functions */ + /* Loop over the set of functions */ for( - set<Function_t*>::const_iterator it=firp->GetFunctions().begin(); - it!=firp->GetFunctions().end(); - ++it + set<Function_t*>::const_iterator it=firp->GetFunctions().begin(); + it!=firp->GetFunctions().end(); + ++it ) { Function_t *func=*it; @@ -336,17 +336,9 @@ void fill_in_indtargs(FileIR_t* firp, pqxxDB_t &pqxx_interface) Elf32_Half secnum, strndx, secndx; Elf32_Word secsize; - //fp = fopen(elf_file.c_str(),"rb"); int elfoid=firp->GetFile()->GetELFOID(); pqxx::largeobjectaccess loa(pqxx_interface.GetTransaction(), elfoid, PGSTD::ios::in); - - // if(!fp) - // { - // cerr<<"Cannot open "<<elf_file<<"."<<endl; - // exit(-1); - // } - /* allcoate memory */ Elf32_Ehdr elfhdr; @@ -359,9 +351,7 @@ void fill_in_indtargs(FileIR_t* firp, pqxxDB_t &pqxx_interface) /* Read Section headers */ Elf32_Shdr *sechdrs=(Elf32_Shdr*)malloc(sizeof(Elf32_Shdr)*secnum); - //fseek(fp, sec_hdr_off, SEEK_SET); loa.seek(sec_hdr_off, std::ios_base::beg); - //res=fread(sechdrs, sizeof(Elf32_Shdr), secnum, fp); loa.cread((char*)sechdrs, sizeof(Elf32_Shdr)* secnum); /* look through each section and record bounds */ @@ -376,7 +366,6 @@ void fill_in_indtargs(FileIR_t* firp, pqxxDB_t &pqxx_interface) cout<<"========================================="<<endl; cout<<"Targets from data sections are: " << endl; cout<<"# ATTRIBUTE total_indirect_targets_pass1="<<std::dec<<targets.size()<<endl; -// print_targets(); cout<<"========================================="<<endl; /* look through the instructions in the program for targets */ @@ -389,7 +378,6 @@ void fill_in_indtargs(FileIR_t* firp, pqxxDB_t &pqxx_interface) cout<<"========================================="<<endl; cout<<"All targets from data+instruction sections are: " << endl; cout<<"# ATTRIBUTE total_indirect_targets_pass2="<<std::dec<<targets.size()<<endl; -// print_targets(); cout<<"========================================="<<endl; /* Read the exception handler frame so that those indirect branches are accounted for */ @@ -399,7 +387,6 @@ void fill_in_indtargs(FileIR_t* firp, pqxxDB_t &pqxx_interface) cout<<"========================================="<<endl; cout<<"All targets from data+instruction+eh_header sections are: " << endl; cout<<"# ATTRIBUTE total_indirect_targets_pass3="<<std::dec<<targets.size()<<endl; -// print_targets(); cout<<"========================================="<<endl; @@ -411,12 +398,32 @@ void fill_in_indtargs(FileIR_t* firp, pqxxDB_t &pqxx_interface) print_targets(); cout<<"========================================="<<endl; - /* Add functions containing unsigned int params to the list */ - add_num_handle_fn_watches(firp); + /* now process the ranges that have exception handling */ + void check_for_thunks(FileIR_t* firp); + check_for_thunks(firp); + cout<<"========================================="<<endl; + cout<<"# ATTRIBUTE total_indirect_targets_pass5="<<std::dec<<targets.size()<<endl; + print_targets(); + cout<<"========================================="<<endl; + + + + + + + /* Add functions containing unsigned int params to the list */ + add_num_handle_fn_watches(firp); + /* now process the ranges that have exception handling */ + cout<<"========================================="<<endl; + cout<<"# ATTRIBUTE total_indirect_targets_pass6="<<std::dec<<targets.size()<<endl; + print_targets(); + cout<<"========================================="<<endl; + + + /* set the IR to have some instructions marked as IB targets */ mark_targets(firp); - } diff --git a/libIRDB/test/fix_calls.cpp b/libIRDB/test/fix_calls.cpp index 955874f0fba1682a30380ad35aa1d31b7a2f830a..21de06444517ad2861f9b814b3ec1f5078020ae0 100644 --- a/libIRDB/test/fix_calls.cpp +++ b/libIRDB/test/fix_calls.cpp @@ -185,7 +185,7 @@ void fix_call(Instruction_t* insn, FileIR_t *firp) int instr_len = Disasm(&disasm); - /* if this instruction is an inserted call instruction and we don't need to + /* if this instruction is an inserted call instruction than we don't need to * convert it for correctness' sake. */ if(insn->GetAddress()->GetVirtualOffset()==0) @@ -259,6 +259,13 @@ void fix_call(Instruction_t* insn, FileIR_t *firp) insn->SetDataBits(newbits); insn->SetComment(insn->GetComment()+" Push part"); + /* create a relocation for this instruction */ + Relocation_t* reloc=new Relocation_t; + reloc->SetOffset(1); + reloc->SetType("32-bit"); + insn->GetRelocations().insert(reloc); + firp->GetRelocations().insert(reloc); + /* If the fallthrough is not marked as indirectly branchable-to, then mark it so */ if(newindirtarg && !newindirtarg->GetIndirectBranchTargetAddress()) diff --git a/libIRDB/test/generate_spri.cpp b/libIRDB/test/generate_spri.cpp index 5132e3aa6097cfd83cfa027e9e8a3f8aea775cf3..b510a814d7fa36426daad350788d831df18f8696 100644 --- a/libIRDB/test/generate_spri.cpp +++ b/libIRDB/test/generate_spri.cpp @@ -31,7 +31,7 @@ main(int argc, char* argv[]) VariantID_t *varidp=NULL; - FileIR_t *varirp=NULL; + FileIR_t *firp=NULL; /* setup the interface to the sql server */ pqxxDB_t pqxx_interface; @@ -45,13 +45,22 @@ main(int argc, char* argv[]) assert(varidp->IsRegistered()==true); - // read the db - cerr<<"Reading variant "<<string(argv[1])<<" from database." << endl; - varirp=new FileIR_t(*varidp); - cerr<<"Reading variant "<<varidp->GetOriginalVariantID()<<" from database." << endl; + for(set<File_t*>::iterator it=varidp->GetFiles().begin(); + it!=varidp->GetFiles().end(); + ++it + ) + { + File_t* this_file=*it; + assert(this_file); + cerr<<"Reading variant "<<string(argv[1])<<":"<<this_file->GetURL() + <<" from database." << endl; - varirp->GenerateSPRI(*fout); + // read the db + firp=new FileIR_t(*varidp,this_file); + firp->GenerateSPRI(*fout); + delete firp; + } } catch (DatabaseError_t pnide) @@ -66,7 +75,6 @@ main(int argc, char* argv[]) ((ofstream*)fout)->close(); - delete varirp; delete varidp; } diff --git a/libIRDB/test/ilr.cpp b/libIRDB/test/ilr.cpp index c8a9e389ce6c3edcdba1681ecd1e41443852d548..11e85c5e758326268d898095f40b317b3b50c614 100644 --- a/libIRDB/test/ilr.cpp +++ b/libIRDB/test/ilr.cpp @@ -82,6 +82,10 @@ main(int argc, char* argv[]) File_t* this_file=*it; assert(this_file); + // ilr isnt working for shared libs yet. + if(this_file!=pidp->GetMainFile()) + continue; + // read the db firp=new FileIR_t(*pidp,this_file); diff --git a/tools/spasm/Makefile b/tools/spasm/Makefile index ff71ae8798f6010461025cd5f359e8eee4c8bf23..c646d822f620f5541c4d1cdf92d35d4364795d60 100644 --- a/tools/spasm/Makefile +++ b/tools/spasm/Makefile @@ -17,7 +17,7 @@ # CC=g++ -CFLAGS= -static -DUBUNTU -Wall -O3 +CFLAGS= -g# -DUBUNTU -Wall -O3 INCLUDE=-I. -I../../include -I../../xform -I../../beaengine/include LIBS=-L../../xform -lxform -L ../../beaengine/lib/Linux.gnu.Debug -lBeaEngine_s_d @@ -29,7 +29,7 @@ all: spasm echo build complete clean: - rm -f *.o core spasm + rm -f *.o core spasm *.map *.bspri *.asm *.bin spasm: $(OBJS) spasm.cpp Makefile spasm.h spasm_main.cpp ben_lib.cpp ben_lib.h $(CC) -o spasm $(INCLUDE) $(CFLAGS) spasm_main.cpp spasm.cpp ben_lib.cpp $(OBJS) $(LIBS) diff --git a/tools/spasm/spasm.cpp b/tools/spasm/spasm.cpp index e440d9f64f6bbc2bb28a7e41a10f10964a7cffb5..d8dcf8cdcce3414edec0ad9f9de3ca54695303fb 100755 --- a/tools/spasm/spasm.cpp +++ b/tools/spasm/spasm.cpp @@ -9,6 +9,7 @@ #include <cerrno> #include <climits> #include <cstring> +#include <assert.h> #include "ben_lib.h" #include "beaengine/BeaEngine.h" @@ -21,21 +22,23 @@ using namespace std; +//TODO: if I am getting rid of the requirement for 0x address prefixes, make sure comments reflec this + typedef struct spasmline { - string address; - string op; - string rhs; //represents "right hand side" - string comment; - bool commentOnly; - unsigned int lineNum; - } spasmline_t; + string address; + string op; + string rhs; //represents "right hand side" + string comment; + bool commentOnly; + unsigned int lineNum; +} spasmline_t; typedef struct bin_instruction { - string hex_str; - unsigned int size; - //char array is not by convention null terminated. - unsigned char raw_bin[50]; - } bin_instruction_t; + string hex_str; + unsigned int size; + //char array is not by convention null terminated. + unsigned char raw_bin[50]; +} bin_instruction_t; @@ -70,47 +73,61 @@ static string getCallbackAddress(const string &symbolFilename, const string &sym static int getSymbolAddress(const string &symbolFilename, const string &symbol) throw(exception) { - string symbolFullName = symbolFilename + "+" + symbol; - map<string,string>::iterator callbackMapIterator; - - if(callbackMap.find(symbolFullName) != callbackMap.end()) - { - return strtol(callbackMap[symbolFullName].c_str(),NULL,16); - } + string symbolFullName = symbolFilename + "+" + symbol; + map<string,string>::iterator callbackMapIterator; + if(callbackMap.find(symbolFullName) != callbackMap.end()) + { + return strtol(callbackMap[symbolFullName].c_str(),NULL,16); + } // nm -a stratafier.o.exe | egrep " integer_overflow_detector$" | cut -f1 -d' ' - string command = "nm -a " + symbolFilename + " | egrep \" " + symbol + "$\" | cut -f1 -d' '"; - char* address = new char[128]; + string command = "nm -a " + symbolFilename + " | egrep \" " + symbol + "$\" | cut -f1 -d' '"; + char* address = new char[128]; - FILE *fp = popen(command.c_str(), "r"); + FILE *fp = popen(command.c_str(), "r"); - fscanf(fp,"%s", address); - string addressString = string(address); + fscanf(fp,"%s", address); + string addressString = string(address); - pclose(fp); - delete [] address; + pclose(fp); + delete [] address; - callbackMap[symbolFullName] = addressString; + callbackMap[symbolFullName] = addressString; return strtol(addressString.c_str(),NULL,16); } -void a2bspri(const string &input, const string &output, const string &symbolFilename) throw(exception) +//void a2bspri(const string &input, const string &output, const string &symbolFilename) throw(exception) +void a2bspri(const vector<string> &input, const string &symbolFilename) throw(exception) { + for(int i=0;i<input.size();i++) + { + symMap.clear(); + vector<spasmline_t> spasmlines = getSpasmLines(input[i]); - vector<spasmline_t> spasmlines = getSpasmLines(input); - - vector<string> assembly = getAssembly(spasmlines); + vector<string> assembly = getAssembly(spasmlines); - assemble(assembly,input+".asm"); + assemble(assembly,input[i]+".asm"); - resolveSymbols(input+".asm.map"); + resolveSymbols(input[i]+".asm.map"); - vector<bin_instruction_t> binInstr = parseBin(input+".asm.bin"); + vector<bin_instruction_t> binInstr = parseBin(input[i]+".asm.bin"); - vector<string> spriLines = getSPRI(binInstr,spasmlines, symbolFilename); + vector<string> spriLines = getSPRI(binInstr,spasmlines, symbolFilename); - printVector(output,spriLines); + //if the input file ends with .aspri, strip suffix and replace with .bspri + string output = input[i]; + size_t pos = output.find(".aspri"); + if(pos != string::npos) + { + output = output.substr(0,pos); + } + //else just append .bspri + + output += ".bspri"; + + printVector(output,spriLines); + } } @@ -120,44 +137,35 @@ static vector<spasmline_t> getSpasmLines(const string &inputFile) int lineCount = 0; - string commentOnlyRegex = "^[[:blank:]]*(;|#).*$"; - string entryRedirectRegex = "^[[:blank:]]*0x[[:xdigit:]]+[[:blank:]]+->[[:blank:]]+([.]|[a-zA-Z0-9_]*|0x[:xdigit:]+)[[:blank:]]*((;|#).*)?$"; - string otherRedirectRegex = "^[[:blank:]]*([.]|[a-zA-Z][a-zA-Z0-9_]*)[[:blank:]]+->[[:blank:]]+((0x[[:xdigit:]]+)|[a-zA-Z][a-zA-Z0-9_]*)[[:blank:]]*((;|#).*)?$"; - string insertRedirectRegex = "^[[:blank:]]*([.]|[a-zA-Z][a-zA-Z0-9_]*)[[:blank:]]+[-][|][[:blank:]]+0x[[:xdigit:]]+[[:blank:]]*((;|#).*)?$"; - string instructionRegex = "^[[:blank:]]*([.]|[a-zA-Z][a-zA-Z0-9_]*)[[:blank:]]+[*][*][[:blank:]]+.*$"; - string callbackRegex = "^[[:blank:]]*([.]|[a-zA-Z][a-zA-Z0-9_]*|0x[[:xdigit:]]+)[[:blank:]]+[(][)][[:blank:]]+.*$"; - - regex_t coPattern, erPattern, orPattern, irPattern, insPattern, cbPattern; - - if (regcomp(&coPattern, commentOnlyRegex.c_str(), REG_EXTENDED) != 0) - { - throw SpasmException("ERROR: program bug, regex compilation failure for commentOnlyRegex in getSpasmLines"); - } - - if (regcomp(&erPattern, entryRedirectRegex.c_str(), REG_EXTENDED) != 0) - { - throw SpasmException("ERROR: program bug, regex compilation failure for entryRedirectRegex in getSpasmLines"); - } - - if (regcomp(&orPattern, otherRedirectRegex.c_str(), REG_EXTENDED) != 0) - { - throw SpasmException("ERROR: program bug, regex compilation failure for otherRedirectRegex in getSpasmLines"); - } - - if (regcomp(&irPattern, insertRedirectRegex.c_str(), REG_EXTENDED) != 0) - { - throw SpasmException("ERROR: program bug, regex compilation failure for insertRedirectRegex in getSpasmLines"); - } + string regularAddressRegex = "0x[[:xdigit:]]+"; + string offsetAddressRegex = "[a-zA-Z0-9\\._-]+[[:blank:]]*[+][[:blank:]]*0x[[:xdigit:]]+|[a-zA-Z0-9\\._]+[[:blank:]]*[+][[:blank:]]*[[:xdigit:]]+"; - if (regcomp(&insPattern, instructionRegex.c_str(), REG_EXTENDED) != 0) - { - throw SpasmException("ERROR: program bug, regex compilation failure for instructionRegex in getSpasmLines"); - } + string allAddressRegex = regularAddressRegex + "|" + offsetAddressRegex; + + string commentOnlyRegex = "^[[:blank:]]*(;|#).*$"; + string entryRedirectRegex = "^[[:blank:]]*("+allAddressRegex + ")[[:blank:]]+(->)[[:blank:]]+([.]|[a-zA-Z0-9_]*|" + allAddressRegex + ")[[:blank:]]*((;|#).*)?$"; + string otherRedirectRegex = "^[[:blank:]]*([.]|[a-zA-Z][a-zA-Z0-9_]*)[[:blank:]]+(->)[[:blank:]]+(("+ allAddressRegex + ")|[a-zA-Z][a-zA-Z0-9_]*)[[:blank:]]*((;|#).*)?$"; + string insertRedirectRegex = "^[[:blank:]]*([.]|[a-zA-Z][a-zA-Z0-9_]*)[[:blank:]]+([-][|])[[:blank:]]+("+allAddressRegex + ")[[:blank:]]*((;|#).*)?$"; + string instructionRegex = "^[[:blank:]]*([.]|[a-zA-Z][a-zA-Z0-9_]*)[[:blank:]]+([*][*])[[:blank:]]+.*$"; + string callbackRegex = "^[[:blank:]]*([.]|[a-zA-Z][a-zA-Z0-9_]*)[[:blank:]]+([(][)])[[:blank:]]+.*$"; + string relocRegex = "^[[:blank:]]*([.]|[a-zA-Z][a-zA-Z0-9_]*)[[:blank:]]+([r][l])[[:blank:]]+.*$"; + + regex_t coPattern, erPattern, orPattern, irPattern, insPattern, cbPattern, rlPattern; + +#define COMPILE_REGEX(pattern,the_string) \ + if (regcomp(&pattern, the_string.c_str(), REG_EXTENDED) != 0) \ + { \ + throw SpasmException("ERROR: program bug, regex compilation failure for " #the_string " in getSpasmLines"); \ + } - if (regcomp(&cbPattern, callbackRegex.c_str(), REG_EXTENDED) != 0) - { - throw SpasmException("ERROR: program bug, regex compilation failure for callbackRegex in getSpasmLines"); - } + COMPILE_REGEX(rlPattern,relocRegex); + COMPILE_REGEX(coPattern,commentOnlyRegex); + COMPILE_REGEX(erPattern,entryRedirectRegex); + COMPILE_REGEX(orPattern,otherRedirectRegex); + COMPILE_REGEX(irPattern,insertRedirectRegex); + COMPILE_REGEX(insPattern,instructionRegex); + COMPILE_REGEX(coPattern,commentOnlyRegex); + COMPILE_REGEX(cbPattern,callbackRegex); ifstream myfile; myfile.open(inputFile.c_str()); @@ -171,9 +179,9 @@ static vector<spasmline_t> getSpasmLines(const string &inputFile) { lineCount++; - spasmline_t spasmline; + spasmline_t spasmline; string line; - getline(myfile,line); + getline(myfile,line); vector<string> tokens; spasmline.address = ""; @@ -187,13 +195,12 @@ static vector<spasmline_t> getSpasmLines(const string &inputFile) ss<<lineCount; string strLineNum = ss.str(); - trim(line); + regmatch_t pmatch[5]; - tokenize(tokens,line," \t"); - - //empty line, skip - if (tokens.size()==0) - continue; + trim(line); + + if(line.length() == 0) + continue; //comment only line check if(regexec(&coPattern, line.c_str(), 0, NULL, 0)==0) @@ -207,24 +214,20 @@ static vector<spasmline_t> getSpasmLines(const string &inputFile) continue; } - if(regexec(&erPattern,line.c_str(),0,NULL,0)==0 || regexec(&orPattern,line.c_str(),0,NULL,0)==0 || - regexec(&irPattern,line.c_str(),0,NULL,0)==0 || regexec(&insPattern,line.c_str(),0,NULL,0)==0 || regexec(&cbPattern, line.c_str(),0,NULL,0)==0) + if(regexec(&erPattern,line.c_str(),5,pmatch,0)==0 || + regexec(&orPattern,line.c_str(),5,pmatch,0)==0 || + regexec(&irPattern,line.c_str(),5,pmatch,0)==0 || + regexec(&insPattern,line.c_str(),5,pmatch,0)==0 || + regexec(&cbPattern, line.c_str(),5,pmatch,0)==0 || + regexec(&rlPattern, line.c_str(),5,pmatch,0)==0) { - trim(tokens[1]); - trim(tokens[2]); - - spasmline.address = tokens[0]; - spasmline.op = tokens[1]; - spasmline.rhs = tokens[2]; - - //Since I tokenized by spaces, the right hand side (rhs), will be - //unnecessarily tokenized, so I must put it back together. I could - //have tokenized again to put the rhs back together but in the - //future, if a spasm operator can be found in the rhs we could have a problem. - for(unsigned int i=3;i<tokens.size();i++) - { - spasmline.rhs += " " + tokens[i]; - } + int mlen = pmatch[1].rm_eo - pmatch[1].rm_so; + spasmline.address = line.substr(pmatch[1].rm_so,mlen); + + mlen = pmatch[2].rm_eo - pmatch[2].rm_so; + spasmline.op = line.substr(pmatch[2].rm_so,mlen); + + spasmline.rhs = line.substr(pmatch[2].rm_eo); //There may be an inline comment, search rhs for ';'and split rhs accordingly for(unsigned int i=0;i<spasmline.rhs.length();i++) @@ -234,6 +237,7 @@ static vector<spasmline_t> getSpasmLines(const string &inputFile) spasmline.comment = spasmline.rhs.substr(i); //yea I am changing part of the guard in a loop, but I am breaking immediately spasmline.rhs = spasmline.rhs.substr(0,i); + break; } } @@ -244,8 +248,13 @@ static vector<spasmline_t> getSpasmLines(const string &inputFile) myfile.close(); throw SpasmException("ERROR: improperly formatted spasm line at " + strLineNum); } + + trim(spasmline.comment); + trim(spasmline.rhs); + trim(spasmline.op); + trim(spasmline.address); - lines.push_back(spasmline); + lines.push_back(spasmline); } regfree(&erPattern); @@ -253,6 +262,7 @@ static vector<spasmline_t> getSpasmLines(const string &inputFile) regfree(&orPattern); regfree(&coPattern); regfree(&insPattern); + regfree(&rlPattern); myfile.close(); return lines; @@ -267,7 +277,8 @@ static vector<string> getAssembly(const vector<spasmline_t> &lines) { spasmline_t sline = lines[i]; - if(sline.commentOnly) + // skip comments and relocations */ + if(sline.commentOnly || sline.op==string("rl")) continue; stringstream ss; @@ -280,10 +291,16 @@ static vector<string> getAssembly(const vector<spasmline_t> &lines) string lineOp = sline.op; string lineRH = sline.rhs; - //If not '.' or '0' the address is a label - if((lineAddr[0] != '.') && (lineAddr[0] != '0')) - { + //if lineAddr has a plus in it, if so it is an address + //optimally I would do all these checks with a regex, but + //hindsight is 20/20 + + //If not '.' or an offset address (<base> + <offset>) + //then the address is a label + //TODO: I really need to use regex for all checks like this + if(lineAddr.find("+") == string::npos && lineAddr[0] != '.' && lineAddr[0] != '0') + { if(symMap.find(lineAddr) != symMap.end()) { throw SpasmException("ERROR: multiple symbolic destination detected for symbol "+lineAddr+ " on line " + strLineNum); @@ -295,10 +312,9 @@ static vector<string> getAssembly(const vector<spasmline_t> &lines) if(lineOp.compare("->")==0) { - //Check if label or . (labels cannot start with 0 and all addresses must start with 0x) - //non-entry point redirections require one byte of space. This space is reserved with - //nop - if(lineAddr[0] != '0') + //Check if label or . + //non-entry point redirections require one byte of space. This space is reserved with nop + if(lineAddr.find("+") == string::npos && lineAddr[0] != '0') { lineRH = "nop"; } @@ -313,12 +329,12 @@ static vector<string> getAssembly(const vector<spasmline_t> &lines) } else if(lineOp.compare("()")==0) { - // this is a callback + // this is a callback /* - assemblyLine = "; "; - assemblyLine += lineAddr; - assemblyLine += " () "; - assemblyLine += " needToResolveAddressFor: "; + assemblyLine = "; "; + assemblyLine += lineAddr; + assemblyLine += " () "; + assemblyLine += " needToResolveAddressFor: "; */ string callback = lineRH; lineRH = "nop"; @@ -367,7 +383,7 @@ static void assemble(const vector<string> &assembly, const string &assemblyFile) asmFile<<NASM_BIT_WIDTH<<endl; char orgDirective[50]; - sprintf(orgDirective, "ORG 0x%x", ORG_PC); + sprintf(orgDirective, "ORG 0x%x", vpc); asmFile<<orgDirective<<endl; asmFile<<"[map symbols "<<assemblyFile<<".map]"<<endl; @@ -375,12 +391,12 @@ static void assemble(const vector<string> &assembly, const string &assemblyFile) { asmFile<<assembly[i]<<endl; } - asmFile.close(); + asmFile.close(); - command = "nasm -O1 -w-number-overflow " + assemblyFile + " -o "+assemblyFile+".bin"; - cout<<"Running nasm ("<<command<<")..."; - system(command.c_str()); - cout<<endl; + command = "nasm -O1 -w-number-overflow " + assemblyFile + " -o "+assemblyFile+".bin"; + cout<<"Running nasm ("<<command<<")..."; + system(command.c_str()); + cout<<endl; //see if the file was created ifstream filetest; @@ -435,7 +451,7 @@ static void resolveSymbols(const string &mapFile) addrval = strtoll(tok_c_str,&endptr,16); if((errno == ERANGE && (addrval == LLONG_MAX || addrval == LLONG_MIN)) - || ((errno != 0 && addrval == 0) || endptr == tok_c_str)) + || ((errno != 0 && addrval == 0) || endptr == tok_c_str)) { continue; } @@ -444,7 +460,7 @@ static void resolveSymbols(const string &mapFile) addrval = strtoll(tok_c_str,&endptr,16); if((errno == ERANGE && (addrval == LLONG_MAX || addrval == LLONG_MIN)) - || ((errno != 0 && addrval == 0) || endptr == tok_c_str)) + || ((errno != 0 && addrval == 0) || endptr == tok_c_str)) { continue; } @@ -538,6 +554,7 @@ static vector<string> getSPRI(const vector<bin_instruction_t> &bin, const vector for(unsigned int i=0;i<spasmlines.size();i++) { + stringstream ss; ss <<spasmlines[i].lineNum; string strLineNum = ss.str(); @@ -573,22 +590,29 @@ static vector<string> getSPRI(const vector<bin_instruction_t> &bin, const vector //which require no memory space, so push the instruction alone //no symbols are allowed on the rhs for these spasm instructions, therefore //there is no need to resolve any symbols - //Only virtual addresses may begin with 0 - if(address[0] == '0') + //The assumption is that spasm instruction lines (** ops), do not have + //actual addresses on the left hand side. + // if (is address and not a relocaion) + if((address.find("+") != string::npos || address[0] == '0')) { spri.push_back("");//ensures a space separates spri entry points //remove 0x of the address (not necessary but makes all addresses uniform) //rhs is replaced with current vpc - spriline = address.substr(2)+" "+op+" "; + //spriline = address.substr(2)+" "+op+" "; + + spriline = address + " " + op + " "; //rhs has a dot symbol if(rhs[0] == '.') spriline += vpcstr+" "; - else if(rhs[0] == '0') - spriline += rhs.substr(2)+" "; + else if(op.compare("rl") == 0 ) + spriline += rhs; + else if(rhs.find("+") != string::npos || rhs[0] == '0') + spriline += rhs; //rhs is a user defined symbol, and must be resolved else { + assert(op.compare("->")==0 || op.compare("-|")==0); if (symMap.find(rhs) == symMap.end()) throw SpasmException("ERROR: unresolved symbol " + rhs + " for symbol defined on aspri line " + strLineNum); @@ -604,7 +628,7 @@ static vector<string> getSPRI(const vector<bin_instruction_t> &bin, const vector bin_instruction_t binLine = bin[bintop]; //If the address is a symbol, replace with resolved symbol address - //a symbol does not begin with a 0 and is not '.'. At this point + //a symbol is not '.' or a <base> + <offset> pattern. At this point //we have already weeded out all instructions that use a non-symbolic //address so we only check for '.'. if(address[0] != '.') @@ -640,23 +664,29 @@ static vector<string> getSPRI(const vector<bin_instruction_t> &bin, const vector throw SpasmException(string("ERROR: could not resolve address for callback handler: " + rhs + " in symbol file: " + symbolFilename)); spriline += callbackAddress; } + // handle relocations + else if(op.compare("rl") == 0) + { + spriline += rhs; + spriline += "\t"+comments; + spri.push_back(spriline); + continue; + } //terminating and non-terminating redirects may have symbols on the right hand side //resolve them. - else if(op.compare("**") != 0) + else if(op.compare("->") == 0 || op.compare("-|") ==0) { - //If the current disassembled instruction is not nop, then something is out of sync if(bin[bintop].hex_str.compare("1 90") !=0) - throw SpasmException(string("ERROR: Bug detected in getSPRI, bin out of sync with spasm lines. ") + + throw SpasmException(string("ERROR: Bug detected in getSPRI, bin out of sync with spasm lines. ") + "Expected a place holder nop (1 90) for a SPRI redirect, but found " + bin[bintop].hex_str +". " + "Sync error occurs on line " + strLineNum + " of the SPASM input file"); //non-entry point redirects require one byte of memory incSize = 1; - //if rhs is a vpc, remove the leading 0x - if(rhs[0] == '0') - spriline += rhs.substr(2); + if(rhs.find("+") != string::npos || rhs[0] == '0') + spriline += rhs; //else the rhs must be a label else { @@ -677,6 +707,7 @@ static vector<string> getSPRI(const vector<bin_instruction_t> &bin, const vector } else { + assert(op.compare("**")==0); //Add a comment indicating the assembly used for this instruction if(comments.empty()) comments = "#"; diff --git a/tools/spasm/spasm.h b/tools/spasm/spasm.h index 614df22eaa87729dcef0c7529fa6a762f26d3bf3..4e6d52a980d2efc7264b1fcf58bfa0d8062d1513 100644 --- a/tools/spasm/spasm.h +++ b/tools/spasm/spasm.h @@ -4,9 +4,10 @@ #include <string> #include <exception> +#include <vector> - -void a2bspri(const std::string &input, const std::string &output, const std::string &elfFile) throw(std::exception); +//void a2bspri(const std::string &input, const std::string &output, const std::string &elfFile) throw(std::exception); +void a2bspri(const std::vector<std::string> &input, const std::string &symbolFilename) throw(std::exception); class SpasmException: public std::exception { diff --git a/tools/spasm/spasm_main.cpp b/tools/spasm/spasm_main.cpp index 92a9cd0215cbe689fdac9b8dc5800049b32ec714..026d30525fc24ca356ea0ca2bd4079484c8ca855 100644 --- a/tools/spasm/spasm_main.cpp +++ b/tools/spasm/spasm_main.cpp @@ -3,6 +3,7 @@ #include <fstream> #include <string> #include <cstdlib> +#include <vector> using namespace std; @@ -12,6 +13,13 @@ bool fexists(string filename) return ifile; } +void usage() +{ + cerr<<"SPASM usage:\n-s <symbol file> <input files>"<<endl; + exit(1); +} + + ///Utility SPASM's main int main(int argc, char *argv[]) { @@ -43,17 +51,17 @@ int main(int argc, char *argv[]) exit(1); } - input = string(argv[1]); output = string(argv[2]); - cout<<"Input:"<<input<<endl; cout<<"Output:"<<output<<endl; cout<<"Symbols:"<<elf<<endl; + vector<string> input_list; + input_list.push_back(input); try { - a2bspri(input,output,elf); + a2bspri(input_list,elf); } catch (SpasmException err) { diff --git a/tools/spasm/test.aspri b/tools/spasm/test.aspri index d03ce46016463b0d0586416f1aa0dffb31bc5aab..fe76b61187c6428f9409cd171f65ed2d9720b77c 100644 --- a/tools/spasm/test.aspri +++ b/tools/spasm/test.aspri @@ -5,7 +5,7 @@ #more comment tests # -0x00000010 -> . ; Spasm entry point +a.out+ 0x00000010 -> . ; Spasm entry point . ** sub esp, 10 L1 ** mov ebx, 10 ; . ** nop @@ -13,8 +13,8 @@ L1 ** mov ebx, 10 ; . ** jmp L1 L2 -> L1 . -> L2 -. -> 0x00000029 -. -| 0x00000013 +. -> a.out+0x00000029 +. -| a.out+00000013 . ** jmp L2 . ** nop . ** nop @@ -25,5 +25,5 @@ L2 -> L1 . ** nop . ** nop L3 ** mov esp, 19 -0xf000 -> L3 +a.out + 0xf000 -> L3