diff --git a/.gitattributes b/.gitattributes index 6fe4d734e76f499658f098da215f8aa6043b4640..9a9ae0d79789e30a3ad2bf59ec4f45a01e8a6910 100644 --- a/.gitattributes +++ b/.gitattributes @@ -177,6 +177,7 @@ libIRDB/test/create_variant.cpp -text libIRDB/test/create_variantir.cpp -text libIRDB/test/drop_variant.cpp -text libIRDB/test/fill_in_cfg.cpp -text +libIRDB/test/fill_in_indtargs.cpp -text libIRDB/test/fix_calls.cpp -text libIRDB/test/generate_spri.cpp -text libIRDB/test/ilr.cpp -text diff --git a/libIRDB/include/file.hpp b/libIRDB/include/file.hpp index 88b171243fb8d5f3c9bdb2cd1e3c6123527d91b7..af1fc6b385647b3fa1b65768a3fc1506d4132daa 100644 --- a/libIRDB/include/file.hpp +++ b/libIRDB/include/file.hpp @@ -3,13 +3,16 @@ class File_t : public BaseObj_t { public: // create new item. - File_t(db_id_t file_id, std::string url, std::string hash, std::string arch, db_id_t doipid); + File_t(db_id_t file_id, std::string url, std::string hash, std::string arch, int elfoid, db_id_t doipid); File_t(db_id_t file_id) : BaseObj_t(NULL) { assert(0);} // read from DB void WriteToDB() { assert(0); } // writes to DB ID is not -1. + int GetELFOID() { return elfoid; }; + private: std::string url; std::string hash; std::string arch; + int elfoid; }; diff --git a/libIRDB/include/instruction.hpp b/libIRDB/include/instruction.hpp index ef90417cc86afb097468a98e93f5e4204764e65a..f9aed8e7a8dc9a260d06e57b6db7161c116c0462 100644 --- a/libIRDB/include/instruction.hpp +++ b/libIRDB/include/instruction.hpp @@ -11,8 +11,8 @@ class Instruction_t : public BaseObj_t Instruction_t(); - Instruction_t(db_id_t id, AddressID_t *addr, Function_t *func, db_id_t file_id, db_id_t orig_id, - std::string data, std::string comment, db_id_t doip_id); + Instruction_t(db_id_t id, AddressID_t *addr, Function_t *func, db_id_t orig_id, + std::string data, std::string comment, bool my_indTarg, db_id_t doip_id); AddressID_t* GetAddress() { return my_address; } Function_t* GetFunction() { return my_function; } @@ -24,11 +24,13 @@ class Instruction_t : public BaseObj_t void SetAddress(AddressID_t* newaddr) { my_address=newaddr; } void SetFunction(Function_t* func ) { my_function=func;} - void SetOriginalAddressID(AddressID_t) {assert(0); /* you shouldn't do this! */} + void SetOriginalAddressID(db_id_t origid) { orig_address_id=origid; /* you shouldn't do this, unless you know what you're doing! */} void SetFallthrough(Instruction_t* i) {fallthrough=i;} void SetTarget(Instruction_t* i) {target=i; } void SetDataBits(std::string orig ) { data=orig;} void SetComment(std::string orig ) { comment=orig;} + bool GetIsIndirectTarget() { return indTarg; } + void SetIsIndirectTarget(bool myIndTarg) { indTarg=myIndTarg; } void WriteToDB() { assert(0); } std::string WriteToDB(VariantID_t *vid, db_id_t newid); @@ -37,10 +39,10 @@ class Instruction_t : public BaseObj_t private: AddressID_t *my_address; Function_t *my_function; - db_id_t file_id; // const, should not change. db_id_t orig_address_id; // const, should not change. Instruction_t* fallthrough; Instruction_t* target; std::string data; std::string comment; + bool indTarg; }; diff --git a/libIRDB/include/pqxxdb.hpp b/libIRDB/include/pqxxdb.hpp index 390110544b1bea23c005f5915e3b0846e7dfdee1..b1d54a650f71143d7c6d8107c07d5cf71a3691cb 100644 --- a/libIRDB/include/pqxxdb.hpp +++ b/libIRDB/include/pqxxdb.hpp @@ -9,6 +9,10 @@ class pqxxDB_t : public DBinterface_t std::string GetResultColumn(std::string colname); bool IsDone(); void Commit(); + + pqxx::connection& GetConnection() { return conn; } + pqxx::work& GetTransaction() { return txn; } + private: pqxx::connection conn; pqxx::work txn; diff --git a/libIRDB/include/variantir.hpp b/libIRDB/include/variantir.hpp index 8b79eae2bb4b21ade91cb8bc03faaf440e5deda1..bf7bf2ce64b7e2ef52dce7fa6ffdc6ec182199f7 100644 --- a/libIRDB/include/variantir.hpp +++ b/libIRDB/include/variantir.hpp @@ -23,6 +23,8 @@ class VariantIR_t : public BaseObj_t // generate spri, assume that orig_varirp is the original variant. void generate_spri(VariantIR_t *orig_varirp, std::ostream &fout); + void SetBaseIDS(); + private: diff --git a/libIRDB/src/file.cpp b/libIRDB/src/file.cpp index f585d3b068af8366d6ea7eadf941a061c6d7616f..4b1da9d6b42529a64a9654b77621bd2cf492e06c 100644 --- a/libIRDB/src/file.cpp +++ b/libIRDB/src/file.cpp @@ -5,8 +5,8 @@ using namespace libIRDB; -File_t::File_t(db_id_t myfile_id, std::string myurl, std::string myhash, std::string myarch, db_id_t mydoipid) : - BaseObj_t(NULL), url(myurl), hash(myhash), arch(myarch) +File_t::File_t(db_id_t myfile_id, std::string myurl, std::string myhash, std::string myarch, int myoid, db_id_t mydoipid) : + BaseObj_t(NULL), url(myurl), hash(myhash), arch(myarch), elfoid(myoid) { SetBaseID(myfile_id); } diff --git a/libIRDB/src/generate_spri.cpp b/libIRDB/src/generate_spri.cpp index 0431ccfd795170ed8df25a711acf1829f4e9e0ea..a1ef0c066f226629c1733f75d4748da9b92c3886 100644 --- a/libIRDB/src/generate_spri.cpp +++ b/libIRDB/src/generate_spri.cpp @@ -19,6 +19,8 @@ using namespace std; // static map<Instruction_t*,Instruction_t*> insnMap; + + // // create a label for the given instruction // @@ -248,11 +250,21 @@ We need to emit a rule of this form Instruction_t* old_insn=insnMap[newinsn]; - fout << "#"<<endl; fout << "# Orig addr: "<<addressify(newinsn)<<" addr_id: "<< newinsn->GetBaseID()<<" with comment "<<newinsn->GetComment()<<endl; - fout << "#"<<endl; if(addressify(newinsn).c_str()[0]=='0') - fout << addressify(newinsn) <<" -> ."<<endl; + { + if(old_insn->GetIsIndirectTarget()) + { + fout << addressify(newinsn) <<" -> ."<<endl; + } + else + { + fout << "# eliding, no indirect targets"<<endl; + fout << addressify(newinsn) <<" -> . " <<endl; + fout << ". -> 0x0" << endl; + } + + } emit_spri_instruction(newinsn, fout); @@ -286,6 +298,7 @@ We need to emit a rule of this form } + // // generate a map from new instructions to old instructions // diff --git a/libIRDB/src/instruction.cpp b/libIRDB/src/instruction.cpp index 27f0f6f38a5a70e624a51be450af10ca78ffbc51..4baafa7ee043deb06e9e3738ab5a2e57eecbf275 100644 --- a/libIRDB/src/instruction.cpp +++ b/libIRDB/src/instruction.cpp @@ -8,8 +8,7 @@ using namespace std; Instruction_t::Instruction_t() : BaseObj_t(NULL), data(""), - comment(""), - file_id(NOT_IN_DATABASE) + comment("") { SetBaseID(NOT_IN_DATABASE); my_address=NULL; @@ -17,21 +16,22 @@ Instruction_t::Instruction_t() : orig_address_id=NOT_IN_DATABASE; fallthrough=NULL; target=NULL; + indTarg=true; } Instruction_t::Instruction_t(db_id_t id, AddressID_t *addr, Function_t *func, - db_id_t my_file_id, db_id_t orig_id, std::string thedata, std::string my_comment, + bool my_indTarg, db_id_t doip_id) : BaseObj_t(NULL), data(thedata), comment(my_comment), - file_id(my_file_id) + indTarg(my_indTarg) { SetBaseID(id); my_address=addr; @@ -64,18 +64,18 @@ string Instruction_t::WriteToDB(VariantID_t *vid, db_id_t newid) string q= string("insert into ")+vid->instruction_table_name + - string(" (instruction_id, address_id, parent_function_id, file_id, orig_address_id, fallthrough_address_id, target_address_id, data, comment, doip_id) ")+ + string(" (instruction_id, address_id, parent_function_id, orig_address_id, fallthrough_address_id, target_address_id, data, comment, is_indirect_target, doip_id) ")+ string(" VALUES (") + string("'") + to_string(GetBaseID()) + string("', ") + string("'") + to_string(my_address->GetBaseID()) + string("', ") + string("'") + to_string(func_id) + string("', ") + - string("'") + to_string(file_id) + string("', ") + string("'") + to_string(orig_address_id) + string("', ") + string("'") + to_string(ft_id) + string("', ") + string("'") + to_string(targ_id) + string("', ") + string("E'") + pqxx::escape_binary(data) + "'::bytea" + string(" , ") + // no ticks for this field // also need to append ::bytea string("'") + comment + string("', ") + + string("'") + to_string((int)indTarg) + string("', ") + string("'") + to_string(GetDoipID()) + string("') ; ") ; return q; diff --git a/libIRDB/src/variantid.cpp b/libIRDB/src/variantid.cpp index 09a3bc183a55c9e422ff141736ba6126b0e0354f..79b8284223f4eb838397fddb71470b16dba0f803 100644 --- a/libIRDB/src/variantid.cpp +++ b/libIRDB/src/variantid.cpp @@ -24,6 +24,9 @@ VariantID_t::VariantID_t() : void VariantID_t::CreateTables() { +/* + * WARNING! If you edit these tables, you must also edit $PEASOUP_HOME/tools/db/*.tbl + */ dbintr->IssueQuery( "CREATE TABLE " + address_table_name + @@ -52,13 +55,13 @@ void VariantID_t::CreateTables() "instruction_id SERIAL PRIMARY KEY, " "address_id integer REFERENCES " + address_table_name + ", " + "parent_function_id integer, " - "file_id integer REFERENCES file_info, " "orig_address_id integer, " "fallthrough_address_id integer, " "target_address_id integer, " "data bytea, " "comment text, " - "doip_id integer DEFAULT -1 " + "is_indirect_target boolean DEFAULT true, " + "doip_id integer DEFAULT -1 " ");" ); } @@ -129,9 +132,9 @@ bool VariantID_t::Register() if(NOT_IN_DATABASE==orig_pid) orig_pid=newid; - address_table_name="AddressTable_Variant"+to_string(GetBaseID())+"_address"; - function_table_name="FunctionTable_Variant"+to_string(GetBaseID())+"_function"; - instruction_table_name="InstructionTable_Variant"+to_string(GetBaseID())+"_instruction"; + address_table_name="Variant"+to_string(GetBaseID())+"_address"; + function_table_name="Variant"+to_string(GetBaseID())+"_function"; + instruction_table_name="Variant"+to_string(GetBaseID())+"_instruction"; BaseObj_t::dbintr->MoveToNextRow(); assert(BaseObj_t::dbintr->IsDone()); diff --git a/libIRDB/src/variantir.cpp b/libIRDB/src/variantir.cpp index a5e97580edf21436431228f4f6b7c04943b16520..2459f638638c5ab183a0336d29c56694ae70012b 100644 --- a/libIRDB/src/variantir.cpp +++ b/libIRDB/src/variantir.cpp @@ -30,7 +30,7 @@ std::map<db_id_t,File_t*> VariantIR_t::ReadFilesFromDB() std::map<db_id_t,File_t*> idMap; std::string q= "select file_info.file_id, file_info.url, file_info.hash," - " file_info.arch, file_info.type, file_info.doip_id " + " file_info.arch, file_info.type, file_info.elfoid, file_info.doip_id " " from file_info,variant_dependency " " where variant_dependency.variant_id = '" + to_string(progid.GetBaseID()) + "' AND " " file_info.file_id = variant_dependency.file_id ; "; @@ -45,9 +45,10 @@ std::map<db_id_t,File_t*> VariantIR_t::ReadFilesFromDB() std::string url=dbintr->GetResultColumn("url"); std::string hash=dbintr->GetResultColumn("hash"); std::string type=dbintr->GetResultColumn("type"); + int oid=atoi(dbintr->GetResultColumn("elfoid").c_str()); db_id_t doipid=atoi(dbintr->GetResultColumn("doip_id").c_str()); - File_t *newfile=new File_t(file_id,url,hash,type,doipid); + File_t *newfile=new File_t(file_id,url,hash,type,oid,doipid); //std::cout<<"Found file "<<file_id<<"."<<std::endl; @@ -154,7 +155,6 @@ std::map<db_id_t,Instruction_t*> VariantIR_t::ReadInsnsFromDB ( std::map<db // address_id integer REFERENCES #PROGNAME#_address, // parent_function_id integer, -// file_id integer REFERENCES file_info, // orig_address_id integer REFERENCES #PROGNAME#_address, // fallthrough_address_id integer, // target_address_id integer, @@ -166,20 +166,21 @@ std::map<db_id_t,Instruction_t*> VariantIR_t::ReadInsnsFromDB ( std::map<db db_id_t instruction_id=atoi(dbintr->GetResultColumn("instruction_id").c_str()); db_id_t aid=atoi(dbintr->GetResultColumn("address_id").c_str()); db_id_t parent_func_id=atoi(dbintr->GetResultColumn("parent_function_id").c_str()); - db_id_t file_id=atoi(dbintr->GetResultColumn("file_id").c_str()); db_id_t orig_address_id=atoi(dbintr->GetResultColumn("orig_address_id").c_str()); db_id_t fallthrough_address_id=atoi(dbintr->GetResultColumn("fallthrough_address_id").c_str()); db_id_t targ_address_id=atoi(dbintr->GetResultColumn("target_address_id").c_str()); std::string data=(dbintr->GetResultColumn("data")); std::string comment=(dbintr->GetResultColumn("comment")); + std::string isIndStr=(dbintr->GetResultColumn("is_indirect_target")); + bool indTarg= isIndStr==std::string("t"); db_id_t doipid=atoi(dbintr->GetResultColumn("doip_id").c_str()); + Instruction_t *newinsn=new Instruction_t(instruction_id, addrMap[aid], funcMap[parent_func_id], - file_id, orig_address_id, - data, comment, doipid); + data, comment, indTarg, doipid); if(funcMap[parent_func_id]) funcMap[parent_func_id]->GetInstructions().insert(newinsn); @@ -213,14 +214,37 @@ std::map<db_id_t,Instruction_t*> VariantIR_t::ReadInsnsFromDB ( std::map<db void VariantIR_t::WriteToDB() { + /* assign each item a unique ID */ + SetBaseIDS(); -#define MAX(a,b) (((a)>(b)) ? (a) : (b)) + db_id_t j=-1; dbintr->IssueQuery(string("TRUNCATE TABLE ")+ progid.instruction_table_name + string(" cascade;")); dbintr->IssueQuery(string("TRUNCATE TABLE ")+ progid.function_table_name + string(" cascade;")); dbintr->IssueQuery(string("TRUNCATE TABLE ")+ progid.address_table_name + string(" cascade;")); - + /* and now that everything has an ID, let's write to the DB */ + string q=string(""); + for(std::set<Function_t*>::const_iterator i=funcs.begin(); i!=funcs.end(); ++i) + q+=(*i)->WriteToDB(&progid,j); + dbintr->IssueQuery(q); + + q=string(""); + for(std::set<AddressID_t*>::const_iterator i=addrs.begin(); i!=addrs.end(); ++i) + q+=(*i)->WriteToDB(&progid,j); + dbintr->IssueQuery(q); + + q=string(""); + for(std::set<Instruction_t*>::const_iterator i=insns.begin(); i!=insns.end(); ++i) + q+=(*i)->WriteToDB(&progid,j); + dbintr->IssueQuery(q); +} + + + +void VariantIR_t::SetBaseIDS() +{ +#define MAX(a,b) (((a)>(b)) ? (a) : (b)) /* find the highest database ID */ db_id_t j=0; @@ -230,7 +254,8 @@ void VariantIR_t::WriteToDB() j=MAX(j,(*i)->GetBaseID()); for(std::set<Instruction_t*>::const_iterator i=insns.begin(); i!=insns.end(); ++i) j=MAX(j,(*i)->GetBaseID()); - + for(std::set<File_t*>::const_iterator i=files.begin(); i!=files.end(); ++i) + j=MAX(j,(*i)->GetBaseID()); /* for anything that's not yet in the DB, assign an ID to it */ for(std::set<Function_t*>::const_iterator i=funcs.begin(); i!=funcs.end(); ++i) @@ -242,23 +267,7 @@ void VariantIR_t::WriteToDB() for(std::set<Instruction_t*>::const_iterator i=insns.begin(); i!=insns.end(); ++i) if((*i)->GetBaseID()==NOT_IN_DATABASE) (*i)->SetBaseID(j++); - - - /* and now that everything has an ID, let's write to the DB */ - string q=string(""); - for(std::set<Function_t*>::const_iterator i=funcs.begin(); i!=funcs.end(); ++i, ++j) - q+=(*i)->WriteToDB(&progid,j); - dbintr->IssueQuery(q); - - q=string(""); - for(std::set<AddressID_t*>::const_iterator i=addrs.begin(); i!=addrs.end(); ++i,++j) - q+=(*i)->WriteToDB(&progid,j); - dbintr->IssueQuery(q); - - q=string(""); - for(std::set<Instruction_t*>::const_iterator i=insns.begin(); i!=insns.end(); ++i,++j) - q+=(*i)->WriteToDB(&progid,j); - dbintr->IssueQuery(q); + for(std::set<File_t*>::const_iterator i=files.begin(); i!=files.end(); ++i) + if((*i)->GetBaseID()==NOT_IN_DATABASE) + (*i)->SetBaseID(j++); } - - diff --git a/libIRDB/test/Makefile b/libIRDB/test/Makefile index a771592628128eabfba65e6aba634919c5c370e6..edee863122605f6ff76aa49298b727f3069860c5 100644 --- a/libIRDB/test/Makefile +++ b/libIRDB/test/Makefile @@ -1,7 +1,7 @@ .SUFFIXES: .exe .cpp -PROGS=print_variant.exe list_programs.exe create_variant.exe create_variantir.exe read_variantir.exe clone.exe ilr.exe drop_variant.exe generate_spri.exe fill_in_cfg.exe fix_calls.exe +PROGS=print_variant.exe list_programs.exe create_variant.exe create_variantir.exe read_variantir.exe clone.exe ilr.exe drop_variant.exe generate_spri.exe fill_in_cfg.exe fix_calls.exe fill_in_indtargs.exe all: $(PROGS) diff --git a/libIRDB/test/fill_in_cfg.cpp b/libIRDB/test/fill_in_cfg.cpp index 2a80985f2857fffebd3b56f6c56fa173c2db1ca1..534723572f84b90a79595a9525500f2b6d46fd98 100644 --- a/libIRDB/test/fill_in_cfg.cpp +++ b/libIRDB/test/fill_in_cfg.cpp @@ -6,6 +6,10 @@ #include <string.h> #include <map> #include <assert.h> +#include <elf.h> +#include <sys/mman.h> +#include <ctype.h> + #include "beaengine/BeaEngine.h" @@ -16,12 +20,22 @@ int bad_fallthrough_count=0; using namespace libIRDB; using namespace std; +set< pair<db_id_t,int> > missed_instructions; +int failed_target_count=0; + +pqxxDB_t pqxx_interface; + void populate_instruction_map ( map< pair<db_id_t,virtual_offset_t>, Instruction_t*> &insnMap, VariantIR_t *virp ) { + /* start from scratch each time */ + insnMap.clear(); + + + /* for each instruction in the IR */ for( set<Instruction_t*>::const_iterator it=virp->GetInstructions().begin(); it!=virp->GetInstructions().end(); @@ -82,6 +96,8 @@ void set_fallthrough /* set the target for this insn */ if(fallthrough_insn!=0) insn->SetFallthrough(fallthrough_insn); + else + missed_instructions.insert(pair<db_id_t,int>(insn->GetAddress()->GetFileID(),virtual_offset)); } @@ -120,7 +136,7 @@ void set_target Instruction_t *target_insn=insnMap[p]; /* sanity, note we may see odd control transfers to 0x0 */ - if(target_insn==NULL && virtual_offset!=0) + if(target_insn==NULL) { unsigned char first_byte=0; if(insn->GetFallthrough()) @@ -138,10 +154,12 @@ void set_target ) { odd_target_count++; + target_insn=insn->GetFallthrough(); } else { - cout<<"Cannot set target for "<<std::hex<<insn->GetAddress()->GetVirtualOffset()<<"."<<endl; + if(virtual_offset!=0) + cout<<"Cannot set target (target="<< std::hex << virtual_offset << ") for "<<std::hex<<insn->GetAddress()->GetVirtualOffset()<<"."<<endl; bad_target_count++; } } @@ -149,68 +167,262 @@ void set_target /* set the target for this insn */ if(target_insn!=0) insn->SetTarget(target_insn); + else + missed_instructions.insert( pair<db_id_t,int>(insn->GetAddress()->GetFileID(),virtual_offset)); } } -void fill_in_cfg(VariantIR_t *virp) +File_t* find_file(VariantIR_t* virp, db_id_t fileid) { - map< pair<db_id_t,virtual_offset_t>, Instruction_t*> insnMap; - populate_instruction_map(insnMap, virp); - - cout << "Found "<<virp->GetInstructions().size()<<" instructions." <<endl; + set<File_t*> &files=virp->GetFiles(); for( - set<Instruction_t*>::const_iterator it=virp->GetInstructions().begin(); - it!=virp->GetInstructions().end(); + set<File_t*>::iterator it=files.begin(); + it!=files.end(); ++it ) { - Instruction_t *insn=*it; - DISASM disasm; - memset(&disasm, 0, sizeof(DISASM)); + File_t* thefile=*it; + if(thefile->GetBaseID()==fileid) + return thefile; + } + return NULL; +} + +void add_new_instructions(VariantIR_t *virp) +{ + int found_instructions=0; + for( + set< pair<db_id_t,int> >::const_iterator it=missed_instructions.begin(); + it!=missed_instructions.end(); + ++it + ) + { + /* get the address we've missed */ + int missed_address=(*it).second; + + /* get the address ID of the instruction that's missing the missed addressed */ + db_id_t missed_fileid=(*it).first; + + /* figure out which file we're looking at */ + File_t* filep=find_file(virp,missed_fileid); + assert(filep); - disasm.Options = NasmSyntax + PrefixedNumeral; - disasm.Archi = 32; - disasm.EIP = (UIntPtr) insn->GetDataBits().c_str(); - disasm.VirtualAddr = insn->GetAddress()->GetVirtualOffset(); - int instr_len = Disasm(&disasm); + /* get the OID of the file */ + int elfoid=filep->GetELFOID(); - assert(instr_len==insn->GetDataBits().size()); - set_fallthrough(insnMap, &disasm, insn, virp); - set_target(insnMap, &disasm, insn, virp); + Elf32_Off sec_hdr_off, sec_off; + Elf32_Half secnum, strndx, secndx; + Elf32_Word secsize; + + pqxx::largeobjectaccess loa(pqxx_interface.GetTransaction(), elfoid, PGSTD::ios::in); + + + /* allcoate memory */ + Elf32_Ehdr elfhdr; + + /* Read ELF header */ + loa.cread((char*)&elfhdr, sizeof(Elf32_Ehdr)* 1); + + sec_hdr_off = elfhdr.e_shoff; + secnum = elfhdr.e_shnum; + strndx = elfhdr.e_shstrndx; + + /* Read Section headers */ + Elf32_Shdr *sechdrs=(Elf32_Shdr*)malloc(sizeof(Elf32_Shdr)*secnum); + loa.seek(sec_hdr_off, std::ios_base::beg); + loa.cread((char*)sechdrs, sizeof(Elf32_Shdr)* secnum); + + bool found=false; + + /* look through each section and find the missing target*/ + for (secndx=1; secndx<secnum; secndx++) + { + int flags = sechdrs[secndx].sh_flags; + + /* not a loaded section */ + if( (flags & SHF_ALLOC) != SHF_ALLOC) + continue; + /* loaded, and contains instruction, record the bounds */ + if( (flags & SHF_EXECINSTR) != SHF_EXECINSTR) + continue; + + int first=sechdrs[secndx].sh_addr; + int second=sechdrs[secndx].sh_addr+sechdrs[secndx].sh_size; + + /* is the missed instruction in this section */ + if(first<=missed_address && missed_address<=second) + { + /* found */ + found=true; + char* data=(char*)malloc(sechdrs[secndx].sh_size+16); /* +16 to account for a bogus-y instruction that wraps past the end of the section */ + memset(data,0, sechdrs[secndx].sh_size+16); /* bogus bits are always 0 */ + + /* grab the data from the ELF file for this section */ + loa.seek(sechdrs[secndx].sh_offset, std::ios_base::beg); + loa.read(data, sechdrs[secndx].sh_size * 1); + + int offset_into_section=missed_address-sechdrs[secndx].sh_addr; + + /* disassemble the instruction */ + DISASM disasm; + memset(&disasm, 0, sizeof(DISASM)); + + disasm.Options = NasmSyntax + PrefixedNumeral; + disasm.Archi = 32; + disasm.EIP = (UIntPtr) &data[offset_into_section]; + disasm.VirtualAddr = missed_address; + int instr_len = Disasm(&disasm); + + + /* get the new bits for an instruction */ + string newinsnbits; + newinsnbits.resize(instr_len); + for(int i=0;i<instr_len;i++) + newinsnbits[i]=data[offset_into_section+i]; + + /* create a new address */ + AddressID_t *newaddr=new AddressID_t(); + assert(newaddr); + newaddr->SetVirtualOffset(missed_address); + newaddr->SetFileID(missed_fileid); + + /* create a new instruction */ + Instruction_t *newinsn=new Instruction_t(); + assert(newinsn); + newinsn->SetAddress(newaddr); + newinsn->SetDataBits(newinsnbits); + newinsn->SetComment(string(disasm.CompleteInstr)+string(" from fill_in_cfg ")); + newinsn->SetAddress(newaddr); + /* fallthrough/target/is indirect will be set later */ + + /* insert into the IR */ + virp->GetInstructions().insert(newinsn); + virp->GetAddresses().insert(newaddr); + + + cout<<"Found new instruction, "<<newinsn->GetComment()<<", at "<<std::hex<<newinsn->GetAddress()->GetVirtualOffset()<<" in file "<<"<no name yet>"<<"."<<endl; + found_instructions++; + } + + } + if(!found) + { + failed_target_count++; + + cout<<"Cannot find address "<<std::hex<<missed_address<<" in file "<<"<no name yet>"<<"."<<endl; + } } - if(bad_target_count>0) - cout<<std::dec<<"Found "<<bad_target_count<<" bad targets."<<endl; - if(bad_target_count>0) - cout<<"Found "<<bad_fallthrough_count<<" bad fallthroughs."<<endl; - if(odd_target_count>0) - cout<<std::dec<<"Found "<<odd_target_count<<" odd targets (to jump over lock prefix)."<<endl; + cout<<"Found a total of "<<std::dec<<found_instructions<<" new instructions."<<endl; } -main(int argc, char* argv[]) +void fill_in_cfg(VariantIR_t *virp) { + int round=0; + + do + { + bad_target_count=0; + bad_fallthrough_count=0; + failed_target_count=0; + missed_instructions.clear(); + + map< pair<db_id_t,virtual_offset_t>, Instruction_t*> insnMap; + populate_instruction_map(insnMap, virp); + + cout << "Found "<<virp->GetInstructions().size()<<" instructions." <<endl; + + /* for each instruction, disassemble it and set the target/fallthrough */ + for( + set<Instruction_t*>::const_iterator it=virp->GetInstructions().begin(); + it!=virp->GetInstructions().end(); + ++it + ) + { + Instruction_t *insn=*it; + DISASM disasm; + memset(&disasm, 0, sizeof(DISASM)); + + disasm.Options = NasmSyntax + PrefixedNumeral; + disasm.Archi = 32; + disasm.EIP = (UIntPtr) insn->GetDataBits().c_str(); + disasm.VirtualAddr = insn->GetAddress()->GetVirtualOffset(); + int instr_len = Disasm(&disasm); + + assert(instr_len==insn->GetDataBits().size()); + + set_fallthrough(insnMap, &disasm, insn, virp); + set_target(insnMap, &disasm, insn, virp); + + } + if(bad_target_count>0) + cout<<std::dec<<"Found "<<bad_target_count<<" bad targets at round "<<round<<endl; + if(bad_fallthrough_count>0) + cout<<"Found "<<bad_fallthrough_count<<" bad fallthroughs at round "<<round<<endl; + cout<<"Missed instruction count="<<missed_instructions.size()<<endl; - if(argc!=2) + add_new_instructions(virp); + + round++; + + /* keep trying this while we're resolving targets. if at any point we fail to resolve a new target/fallthrough address, then we give up */ + } while(missed_instructions.size()>failed_target_count); + + cout<<"Caution: Was unable to find instructions for these addresses:"<<endl; + for( + set< pair<db_id_t,int> >::const_iterator it=missed_instructions.begin(); + it!=missed_instructions.end(); + ++it + ) { - cerr<<"Usage: create_variant <id>"<<endl; - exit(-1); + /* get the address we've missed */ + int missed_address=(*it).second; + cout << missed_address << ", "; } + cout<<endl; + /* set the base IDs for all instructions */ + virp->SetBaseIDS(); + + /* for each instruction, set the original address id to be that of the address id, as fill_in_cfg is + * designed to work on only original programs. + */ + for( + std::set<Instruction_t*>::const_iterator it=virp->GetInstructions().begin(); + it!=virp->GetInstructions().end(); + ++it + ) + { + Instruction_t* insn=*it; + + insn->SetOriginalAddressID(insn->GetAddress()->GetBaseID()); + } + + +} +main(int argc, char* argv[]) +{ + + if(argc!=2) + { + cerr<<"Usage: fill_in_cfg <id>"<<endl; + exit(-1); + } + VariantID_t *pidp=NULL; VariantIR_t * virp=NULL; try { /* setup the interface to the sql server */ - pqxxDB_t pqxx_interface; BaseObj_t::SetInterface(&pqxx_interface); pidp=new VariantID_t(atoi(argv[1])); @@ -222,7 +434,6 @@ main(int argc, char* argv[]) // read the db virp=new VariantIR_t(*pidp); - fill_in_cfg(virp); // write the DB back and commit our changes diff --git a/libIRDB/test/fill_in_indtargs.cpp b/libIRDB/test/fill_in_indtargs.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d5675fcefdd59ac656a28aea5db3308428ddeaf0 --- /dev/null +++ b/libIRDB/test/fill_in_indtargs.cpp @@ -0,0 +1,293 @@ + + +#include <libIRDB.hpp> +#include <iostream> +#include <stdlib.h> +#include <string.h> +#include <map> +#include <assert.h> +#include <elf.h> +#include <sys/mman.h> +#include <ctype.h> + + +#include "beaengine/BeaEngine.h" + +int odd_target_count=0; +int bad_target_count=0; +int bad_fallthrough_count=0; + +using namespace libIRDB; +using namespace std; + + +set< pair <int,int> > bounds; +set<int> targets; + +void possible_target(int p) +{ + for( + set< pair <int,int> >::iterator it=bounds.begin(); + it!=bounds.end(); + ++it + ) + { + pair<int,int> bound=*it; + int start=bound.first; + int end=bound.second; + if(start<=p && p<=end) + targets.insert(p); + + } +} + +void handle_argument(ARGTYPE *arg) +{ + if( arg->ArgType == MEMORY_TYPE ) + possible_target(arg->Memory.Displacement); +} + +void mark_targets(VariantIR_t *virp) +{ + for( + set<Instruction_t*>::const_iterator it=virp->GetInstructions().begin(); + it!=virp->GetInstructions().end(); + ++it + ) + { + Instruction_t *insn=*it; + int addr=insn->GetAddress()->GetVirtualOffset(); + + /* lookup in the list of targets */ + if(targets.find(addr)!=targets.end()) + insn->SetIsIndirectTarget(true); + else + insn->SetIsIndirectTarget(false); + + } + +} +void get_instruction_targets(VariantIR_t *virp) +{ + for( + set<Instruction_t*>::const_iterator it=virp->GetInstructions().begin(); + it!=virp->GetInstructions().end(); + ++it + ) + { + Instruction_t *insn=*it; + DISASM disasm; + memset(&disasm, 0, sizeof(DISASM)); + + disasm.Options = NasmSyntax + PrefixedNumeral; + disasm.Archi = 32; + disasm.EIP = (UIntPtr) insn->GetDataBits().c_str(); + disasm.VirtualAddr = insn->GetAddress()->GetVirtualOffset(); + int instr_len = Disasm(&disasm); + + assert(instr_len==insn->GetDataBits().size()); + + /* calls indicate an indirect target, pc+sizeof(instruction) */ + if(disasm.Instruction.BranchType==CallType) + { + possible_target(disasm.VirtualAddr+instr_len); + } + /* other branches can't indicate an indirect branch target */ + else if(disasm.Instruction.BranchType) + continue; + + /* otherwise, any immediate is a possible branch target */ + possible_target(disasm.Instruction.Immediat); + + handle_argument(&disasm.Argument1); + handle_argument(&disasm.Argument2); + handle_argument(&disasm.Argument3); + + + + } + +} + +void get_executable_bounds(Elf32_Shdr *shdr, FILE* fp, VariantIR_t *virp) +{ + int flags = shdr->sh_flags; + + /* not a loaded section */ + if( (flags & SHF_ALLOC) != SHF_ALLOC) + return; + + /* loaded, and contains instruction, record the bounds */ + if( (flags & SHF_EXECINSTR) != SHF_EXECINSTR) + return; + + int first=shdr->sh_addr; + int second=shdr->sh_addr+shdr->sh_size; + + bounds.insert(pair<int,int>(first,second)); + + +} + +void infer_targets(Elf32_Shdr *shdr, FILE* fp, VariantIR_t *virp) +{ + int flags = shdr->sh_flags; + + if( (flags & SHF_ALLOC) != SHF_ALLOC) + /* not a loaded section */ + return; + + if( (flags & SHF_EXECINSTR) == SHF_EXECINSTR) + /* loaded, but contains instruction. we'll look through the VariantIR for this section. */ + return; + + char* data=(char*)malloc(shdr->sh_size); + + fseek(fp,shdr->sh_offset, SEEK_SET); + + int res=fread(data, shdr->sh_size, 1, fp); + assert(res==1); + + for(int i=0;i<=shdr->sh_size-sizeof(void*);i++) + { + int p=*(int*)&data[i]; + possible_target(p); + } + +} + + +void print_targets() +{ + int j=1; + for( + set<int>::iterator it=targets.begin(); + it!=targets.end(); + ++it, j++ + ) + { + int target=*it; + + cout<<std::hex<<target; + if(j%10 == 0) + cout<<endl; + else + cout<<", "; + } + + cout<<endl; +} + + +void fill_in_indtargs(VariantIR_t* virp, string elf_file) +{ + Elf32_Off sec_hdr_off, sec_off; + Elf32_Half secnum, strndx, secndx; + Elf32_Word secsize; + FILE *fp; + + fp = fopen(elf_file.c_str(),"rb"); + + if(!fp) + { + cerr<<"Cannot open "<<elf_file<<"."<<endl; + exit(-1); + } + + /* allcoate memory */ + Elf32_Ehdr elfhdr; + + /* Read ELF header */ + int res=fread(&elfhdr, sizeof(Elf32_Ehdr), 1, fp); + assert(res==1); + sec_hdr_off = elfhdr.e_shoff; + secnum = elfhdr.e_shnum; + strndx = elfhdr.e_shstrndx; + + /* Read Section headers */ + Elf32_Shdr *sechdrs=(Elf32_Shdr*)malloc(sizeof(Elf32_Shdr)*secnum); + fseek(fp, sec_hdr_off, SEEK_SET); + res=fread(sechdrs, sizeof(Elf32_Shdr), secnum, fp); + assert(res==secnum); + + /* look through each section and record bounds */ + for (secndx=1; secndx<secnum; secndx++) + get_executable_bounds(&sechdrs[secndx], fp, virp); + + /* look through each section and look for target possibilities */ + for (secndx=1; secndx<secnum; secndx++) + infer_targets(&sechdrs[secndx], fp, virp); + + + cout<<"Targets from data sections are: " << endl; + print_targets(); + + /* look through the instructions in the program for targets */ + get_instruction_targets(virp); + + /* mark the entry point as a target */ + possible_target(elfhdr.e_entry); + + + cout<<"All targets from data sections are: " << endl; + print_targets(); + + /* set the IR to have some instructions marked as IB targets */ + mark_targets(virp); + +} + + + + + +main(int argc, char* argv[]) +{ + + if(argc!=3) + { + cerr<<"Usage: fill_in_indtargs <id> <elffile>"<<endl; + exit(-1); + } + + + + + VariantID_t *pidp=NULL; + VariantIR_t * virp=NULL; + + try + { + /* setup the interface to the sql server */ + pqxxDB_t pqxx_interface; + BaseObj_t::SetInterface(&pqxx_interface); + + pidp=new VariantID_t(atoi(argv[1])); + + assert(pidp->IsRegistered()==true); + + cout<<"New Variant, after reading registration, is: "<<*pidp << endl; + + // read the db + virp=new VariantIR_t(*pidp); + + + fill_in_indtargs(virp,argv[2]); + + // write the DB back and commit our changes + virp->WriteToDB(); + pqxx_interface.Commit(); + + } + catch (DatabaseError_t pnide) + { + cout<<"Unexpected database error: "<<pnide<<endl; + exit(-1); + } + + assert(virp && pidp); + + + delete pidp; + delete virp; +} diff --git a/tools/meds2pdb/meds2pdb.cpp b/tools/meds2pdb/meds2pdb.cpp index c2a21e83d57e4c0f3f5b8894ca1de294b102752d..b3b02f4c20cd3844fc7db33e8ca3a31e39981718 100644 --- a/tools/meds2pdb/meds2pdb.cpp +++ b/tools/meds2pdb/meds2pdb.cpp @@ -51,7 +51,7 @@ void insert_instructions(string programName, int fileID, vector<wahoo::Instructi string instructionTable = programName + "_" + "instruction"; string query2 = "INSERT INTO " + instructionTable; - query2 += " (address_id, parent_function_id, file_id, orig_address_id, data, comment) VALUES "; + query2 += " (address_id, parent_function_id, orig_address_id, data, comment) VALUES "; for (int j = i; j < i + STRIDE; ++j) { @@ -85,7 +85,6 @@ void insert_instructions(string programName, int fileID, vector<wahoo::Instructi query2 += "("; query2 += txn.quote(address_id) + ","; // j is the address id query2 += txn.quote(parent_function_id) + ","; - query2 += txn.quote(fileID) + ","; query2 += txn.quote(orig_address_id) + ","; // encode instruction binary data information