diff --git a/.gitattributes b/.gitattributes index 7a3faca7fff6c9cad70b46204a872aac21afe957..8371f9635550df9298fdbc7e7365b87112550acf 100644 --- a/.gitattributes +++ b/.gitattributes @@ -260,7 +260,6 @@ libIRDB/src/util/Makefile -text libIRDB/src/util/SConscript -text libIRDB/src/util/SConstruct -text libIRDB/src/util/insn_preds.cpp -text -libIRDB/test/Makefile.in -text libIRDB/test/SConscript -text libIRDB/test/SConstruct -text libIRDB/test/build_callgraph.cpp -text @@ -274,6 +273,7 @@ libIRDB/test/create_variantir.cpp -text libIRDB/test/drop_variant.cpp -text libIRDB/test/fill_in_cfg.cpp -text libIRDB/test/fill_in_indtargs.cpp -text +libIRDB/test/fill_in_indtargs.hpp -text libIRDB/test/find_strings.cpp -text libIRDB/test/fix_calls.cpp -text libIRDB/test/generate_spri.cpp -text @@ -308,12 +308,15 @@ libMEDSannotation/include/MEDS_FPTRShadowAnnotation.hpp -text libMEDSannotation/include/MEDS_FRSafeAnnotation.hpp -text libMEDSannotation/include/MEDS_FuncAnnotation.hpp -text libMEDSannotation/include/MEDS_FuncPrototypeAnnotation.hpp -text +libMEDSannotation/include/MEDS_IBAnnotation.hpp -text +libMEDSannotation/include/MEDS_IBTAnnotation.hpp -text libMEDSannotation/include/MEDS_InstructionCheckAnnotation.hpp -text libMEDSannotation/include/MEDS_ProblemFuncAnnotation.hpp -text libMEDSannotation/include/MEDS_Register.hpp -text libMEDSannotation/include/MEDS_SafeFuncAnnotation.hpp -text libMEDSannotation/include/MEDS_ShadowAnnotation.hpp -text libMEDSannotation/include/VirtualOffset.hpp -text +libMEDSannotation/include/libMEDSAnnotation.h -text libMEDSannotation/src/FuncExitAnnotation.cpp -text libMEDSannotation/src/MEDS_AnnotationParser.cpp -text libMEDSannotation/src/MEDS_DeadRegAnnotation.cpp -text diff --git a/libIRDB/include/core/fileir.hpp b/libIRDB/include/core/fileir.hpp index 942859926b11c809512a208746484f6bd494b72c..92a480a704e01148ea62f2170ac7491a597b7d63 100644 --- a/libIRDB/include/core/fileir.hpp +++ b/libIRDB/include/core/fileir.hpp @@ -31,103 +31,103 @@ class FileIR_t : public BaseObj_t public: // Create a Variant from the database - FileIR_t(const VariantID_t &newprogid, File_t* fid=NULL); - virtual ~FileIR_t(); - - // DB operations - void WriteToDB(); - - // accessors and mutators in one - FunctionSet_t& GetFunctions() { return funcs; } - InstructionSet_t& GetInstructions() { return insns; } - AddressSet_t& GetAddresses() { return addrs; } - RelocationSet_t& GetRelocations() { return relocs; } - DataScoopSet_t& GetDataScoops() { return scoops; } - ICFSSet_t& GetAllICFS() { return icfs_set; } - - // generate the spri rules into the output file, fout. - void GenerateSPRI(std::ostream &fout, bool with_ilr=false); - - // generate spri, assume that orig_varirp is the original variant. - void GenerateSPRI(FileIR_t *orig_varirp, std::ostream &fout, bool with_ilr=false); - - void SetBaseIDS(); - - File_t* GetFile() { return fileptr; } - - // Used for modifying a large number of instructions. AssembleRegistry - // assembles the assembly isntructions for each registered instruction - // and clears the registry. RegisterAssembly registers the instruction - // to be assembled later. - void AssembleRegistry(); - void RegisterAssembly(Instruction_t *instr, std::string assembly); - void UnregisterAssembly(Instruction_t *instr); - std::string LookupAssembly(Instruction_t *instr); - - //Needed for inserting assembly before an instruction. - //if orig is not registered, the function returns, otherwise - //the instruction/assembly mapping of orig->assembly is altered to - //updated->assembly - //removes the mapping for orig->assembly from the map. - void ChangeRegistryKey(Instruction_t* orig, Instruction_t* updated); - - static int GetArchitectureBitWidth(); - void SetArchitecture(); + FileIR_t(const VariantID_t &newprogid, File_t* fid=NULL); + virtual ~FileIR_t(); + + // DB operations + void WriteToDB(); + + // accessors and mutators in one + FunctionSet_t& GetFunctions() { return funcs; } + InstructionSet_t& GetInstructions() { return insns; } + AddressSet_t& GetAddresses() { return addrs; } + RelocationSet_t& GetRelocations() { return relocs; } + DataScoopSet_t& GetDataScoops() { return scoops; } + ICFSSet_t& GetAllICFS() { return icfs_set; } + + // generate the spri rules into the output file, fout. + void GenerateSPRI(std::ostream &fout, bool with_ilr=false); + + // generate spri, assume that orig_varirp is the original variant. + void GenerateSPRI(FileIR_t *orig_varirp, std::ostream &fout, bool with_ilr=false); + + void SetBaseIDS(); + + File_t* GetFile() { return fileptr; } + + // Used for modifying a large number of instructions. AssembleRegistry + // assembles the assembly isntructions for each registered instruction + // and clears the registry. RegisterAssembly registers the instruction + // to be assembled later. + void AssembleRegistry(); + void RegisterAssembly(Instruction_t *instr, std::string assembly); + void UnregisterAssembly(Instruction_t *instr); + std::string LookupAssembly(Instruction_t *instr); + + //Needed for inserting assembly before an instruction. + //if orig is not registered, the function returns, otherwise + //the instruction/assembly mapping of orig->assembly is altered to + //updated->assembly + //removes the mapping for orig->assembly from the map. + void ChangeRegistryKey(Instruction_t* orig, Instruction_t* updated); + + static int GetArchitectureBitWidth(); + void SetArchitecture(); private: - static ArchitectureDescription_t *archdesc; - - #define ASM_REG_MAX_SIZE 500000 - - typedef std::map<Instruction_t*,std::string> registry_type; - - // a pointer to the original variants IR, NULL means not yet loaded. - FileIR_t* orig_variant_ir_p; - - registry_type assembly_registry; - - void ReadFromDB(); //accesses DB - - FunctionSet_t funcs; - InstructionSet_t insns; - AddressSet_t addrs; - RelocationSet_t relocs; - TypeSet_t types; - DataScoopSet_t scoops; - VariantID_t progid; - ICFSSet_t icfs_set; - File_t* fileptr; - - std::map<db_id_t,AddressID_t*> ReadAddrsFromDB(); - std::map<db_id_t,Function_t*> ReadFuncsFromDB - ( - std::map<db_id_t,AddressID_t*> &addrMap, - std::map<db_id_t,Type_t*> &typeMap - ); - std::map<db_id_t,DataScoop_t*> ReadScoopsFromDB - ( - std::map<db_id_t,AddressID_t*> &addrMap, - std::map<db_id_t,Type_t*> &typeMap - ); - std::map<db_id_t,Instruction_t*> ReadInsnsFromDB - ( - std::map<db_id_t,Function_t*> &funcMap, - std::map<db_id_t,AddressID_t*> &addrMap, - std::map<db_id_t,Instruction_t*> &addressToInstructionMap, - std::map<Instruction_t*, db_id_t> &unresolvedICFS - ); - void ReadRelocsFromDB - ( - std::map<db_id_t,BaseObj_t*> &insnMap - ); - - std::map<db_id_t, Type_t*> ReadTypesFromDB(TypeSet_t& types); - void ReadAllICFSFromDB(std::map<db_id_t,Instruction_t*> &addr2insnMap, - std::map<Instruction_t*, db_id_t> &unresolvedICFS); - - void CleanupICFS(); - void GarbageCollectICFS(); - void DedupICFS(); + static ArchitectureDescription_t *archdesc; + + #define ASM_REG_MAX_SIZE 500000 + + typedef std::map<Instruction_t*,std::string> registry_type; + + // a pointer to the original variants IR, NULL means not yet loaded. + FileIR_t* orig_variant_ir_p; + + registry_type assembly_registry; + + void ReadFromDB(); //accesses DB + + FunctionSet_t funcs; + InstructionSet_t insns; + AddressSet_t addrs; + RelocationSet_t relocs; + TypeSet_t types; + DataScoopSet_t scoops; + VariantID_t progid; + ICFSSet_t icfs_set; + File_t* fileptr; + + std::map<db_id_t,AddressID_t*> ReadAddrsFromDB(); + std::map<db_id_t,Function_t*> ReadFuncsFromDB + ( + std::map<db_id_t,AddressID_t*> &addrMap, + std::map<db_id_t,Type_t*> &typeMap + ); + std::map<db_id_t,DataScoop_t*> ReadScoopsFromDB + ( + std::map<db_id_t,AddressID_t*> &addrMap, + std::map<db_id_t,Type_t*> &typeMap + ); + std::map<db_id_t,Instruction_t*> ReadInsnsFromDB + ( + std::map<db_id_t,Function_t*> &funcMap, + std::map<db_id_t,AddressID_t*> &addrMap, + std::map<db_id_t,Instruction_t*> &addressToInstructionMap, + std::map<Instruction_t*, db_id_t> &unresolvedICFS + ); + void ReadRelocsFromDB + ( + std::map<db_id_t,BaseObj_t*> &insnMap + ); + + std::map<db_id_t, Type_t*> ReadTypesFromDB(TypeSet_t& types); + void ReadAllICFSFromDB(std::map<db_id_t,Instruction_t*> &addr2insnMap, + std::map<Instruction_t*, db_id_t> &unresolvedICFS); + + void CleanupICFS(); + void GarbageCollectICFS(); + void DedupICFS(); }; diff --git a/libIRDB/include/core/icfs.hpp b/libIRDB/include/core/icfs.hpp index bb9e1d5be31abeffe2a31bf08efe773c52c41fa3..dc66f6f6ea51c209c99087324c4851fdb8ee5cd9 100644 --- a/libIRDB/include/core/icfs.hpp +++ b/libIRDB/include/core/icfs.hpp @@ -33,11 +33,14 @@ class ICFS_t : public InstructionSet_t, public BaseObj_t { public: ICFS_t(): BaseObj_t(NULL), m_icfs_analysis_status(ICFS_Analysis_Incomplete) {} + ICFS_t(const ICFS_Analysis_Status_t p_status) : BaseObj_t(NULL), m_icfs_analysis_status(p_status) {} ICFS_t(db_id_t p_set_id, const ICFS_Analysis_Status_t p_status = ICFS_Analysis_Incomplete); ICFS_t(db_id_t p_set_id, const std::string); std::string WriteToDB(File_t *fid); - ICFS_t& operator=(const InstructionSet_t &p_other); + + // this is bad -- you loose data with this operator=. + void SetTargets(const InstructionSet_t &other) { InstructionSet_t::operator=(other); diff --git a/libIRDB/include/core/scoop.hpp b/libIRDB/include/core/scoop.hpp index 82738d43e4a814b08c0b93f1ccf5ac40de6a24b0..96194283d187bfbb398a04fe7e3eb51892328ba7 100644 --- a/libIRDB/include/core/scoop.hpp +++ b/libIRDB/include/core/scoop.hpp @@ -30,29 +30,35 @@ class DataScoop_t : public BaseObj_t libIRDB::AddressID_t* p_start, libIRDB::AddressID_t* p_end, libIRDB::Type_t* p_type, - int p_permissions) + int p_permissions, + std::string p_contents) : BaseObj_t(NULL), name(p_name), start(p_start), end(p_end), type(p_type), - permissions(p_permissions) + permissions(p_permissions), + contents(p_contents) { + assert(start && end); SetBaseID(id); } std::string GetName() const { return name; } + std::string GetContents() const { return contents; } libIRDB::AddressID_t* GetStart() const { return start; } libIRDB::AddressID_t* GetEnd() const { return end; } libIRDB::Type_t* GetType() const { return type; } + libIRDB::virtual_offset_t GetSize() { assert(start && end); return end->GetVirtualOffset() - start->GetVirtualOffset() ; } bool isReadable() const { return (permissions & permissions_r) == permissions_r; } bool isWriteable() const { return (permissions & permissions_w) == permissions_w; }; bool isExecuteable() const { return (permissions & permissions_x) == permissions_x; }; void SetName(const std::string &n) { name=n; } - void SetStart( libIRDB::AddressID_t* addr) { start=addr; } - void SetEnd( libIRDB::AddressID_t* addr ) { end=addr; } + void SetContents(const std::string &n) { contents=n; } + void SetStart( libIRDB::AddressID_t* addr) { assert(addr); start=addr; } + void SetEnd( libIRDB::AddressID_t* addr ) { assert(addr); end=addr; } void SetType( libIRDB::Type_t* t) { type=t; } void SetReadable() { permissions |= permissions_r; } @@ -75,6 +81,7 @@ class DataScoop_t : public BaseObj_t libIRDB::AddressID_t* end; libIRDB::Type_t* type; int permissions; + std::string contents; }; diff --git a/libIRDB/src/core/fileir.cpp b/libIRDB/src/core/fileir.cpp index 466b21c80b917140ff98091cde07abd37b486fc5..c623be425b2bc0f8ec092bcb87eef1f9d27db195 100644 --- a/libIRDB/src/core/fileir.cpp +++ b/libIRDB/src/core/fileir.cpp @@ -657,6 +657,8 @@ void FileIR_t::SetBaseIDS() j=MAX(j,(*i)->GetBaseID()); for(DataScoopSet_t::const_iterator i=scoops.begin(); i!=scoops.end(); ++i) j=MAX(j,(*i)->GetBaseID()); + for(ICFSSet_t::const_iterator i=icfs_set.begin(); i!=icfs_set.end(); ++i) + j=MAX(j,(*i)->GetBaseID()); /* increment past the max ID so we don't duplicate */ j++; @@ -680,6 +682,9 @@ void FileIR_t::SetBaseIDS() for(DataScoopSet_t::const_iterator i=scoops.begin(); i!=scoops.end(); ++i) if((*i)->GetBaseID()==NOT_IN_DATABASE) (*i)->SetBaseID(j++); + for(ICFSSet_t::const_iterator i=icfs_set.begin(); i!=icfs_set.end(); ++i) + if((*i)->GetBaseID()==NOT_IN_DATABASE) + (*i)->SetBaseID(j++); } std::string Relocation_t::WriteToDB(File_t* fid, Instruction_t* myinsn) @@ -1174,8 +1179,9 @@ std::map<db_id_t,DataScoop_t*> FileIR_t::ReadScoopsFromDB db_id_t end_id=atoi(dbintr->GetResultColumn("end_address_id").c_str()); AddressID_t* end_addr=addrMap[end_id]; int permissions=atoi(dbintr->GetResultColumn("permissions").c_str()); + std::string contents=dbintr->GetResultColumn("data"); - DataScoop_t* newscoop=new DataScoop_t(sid,name,start_addr,end_addr,type,permissions); + DataScoop_t* newscoop=new DataScoop_t(sid,name,start_addr,end_addr,type,permissions,contents); assert(newscoop); GetDataScoops().insert(newscoop); dbintr->MoveToNextRow(); diff --git a/libIRDB/test/Makefile.in b/libIRDB/test/Makefile.in deleted file mode 100644 index daa6623fd8c634f19931cc86af16a8324cd67c6b..0000000000000000000000000000000000000000 --- a/libIRDB/test/Makefile.in +++ /dev/null @@ -1,34 +0,0 @@ - -CXX=@CXX@ -CC=@CC@ - -INCLUDES= -I ../../include -I../include/ -I../../beaengine/include -LIBS=-L ../lib/ -lIRDB-core -lIRDB-cfg -lIRDB-util -lpqxx -L $(SECURITY_TRANSFORMS_HOME)/lib -lBeaEngine_s_d -lpq -OPT=-g @EXTRA_CXXFLAGS@ -.SUFFIXES: .exe .cpp - -PROGS=print_variant.exe list_programs.exe create_variant.exe create_variantir.exe read_variantir.exe clone.exe ilr.exe \ - drop_variant.exe generate_spri.exe fill_in_cfg.exe fix_calls.exe fill_in_indtargs.exe unfix_calls.exe \ - find_strings.exe build_callgraph.exe build_preds.exe rename_function.exe pin_address.exe - -all: $(PROGS) - -$(PROGS): ../../lib/* - - -fill_in_indtargs.exe: read_ehframe.o fill_in_indtargs.o check_thunks.o - $(CXX) fill_in_indtargs.o read_ehframe.o check_thunks.o $(INCLUDES) $(LIBS) $(OPT) -o $@ - - -.o.exe: $< ../lib/libIRDB-core.a ../lib/libIRDB-cfg.a - $(CXX) $< $(INCLUDES) $(LIBS) $(OPT) -o $@ - -.cpp.o: $< - $(CXX) $< $(INCLUDES) $(LIBS) $(OPT) -o $@ -c - -# shouldn't be needed anymore -#read_ehframe.o: read_ehframe.cpp -# $(CXX) -w -fpermissive $(INCLUDES) $(LIBS) $(OPT) $^ -o $@ -c - -clean: - rm -f $(PROGS) *.o diff --git a/libIRDB/test/SConscript b/libIRDB/test/SConscript index 8d3ef63ca309ec82c9b0edf11dd319e3ca46a084..c1e9313c865f37b580b9f1a5129b58b89928f417 100644 --- a/libIRDB/test/SConscript +++ b/libIRDB/test/SConscript @@ -8,12 +8,13 @@ myenv.Replace(SECURITY_TRANSFORMS_HOME=os.environ['SECURITY_TRANSFORMS_HOME']) cpppath=''' $SECURITY_TRANSFORMS_HOME/include $SECURITY_TRANSFORMS_HOME/libIRDB/include + $SECURITY_TRANSFORMS_HOME/libMEDSannotation/include $SECURITY_TRANSFORMS_HOME/libEXEIO/include $SECURITY_TRANSFORMS_HOME/beaengine/include ''' LIBPATH="$SECURITY_TRANSFORMS_HOME/lib" -LIBS=Split( env.subst('$BASE_IRDB_LIBS')+ " IRDB-cfg IRDB-util") +LIBS=Split( env.subst('$BASE_IRDB_LIBS')+ " IRDB-cfg IRDB-util MEDSannotation") print "The libs for libIRDB/tests are:" print LIBS diff --git a/libIRDB/test/check_thunks.cpp b/libIRDB/test/check_thunks.cpp index 6d8f4cbe213ecf35ae015e26583fc1b307a1f579..531fc738a20aa4f9daf14fad068761b5d50595d3 100644 --- a/libIRDB/test/check_thunks.cpp +++ b/libIRDB/test/check_thunks.cpp @@ -34,6 +34,7 @@ #include <elf.h> #include "check_thunks.hpp" +#include "fill_in_indtargs.hpp" using namespace libIRDB; @@ -75,7 +76,7 @@ void check_for_thunk_offsets(FileIR_t* firp, virtual_offset_t thunk_base) if(0<addoff && addoff<100) continue; - possible_target(thunk_base+addoff); + possible_target(thunk_base+addoff, 0, ibt_provenance_t::ibtp_text); } else if(string(d.Instruction.Mnemonic)==string("lea ")) { @@ -92,7 +93,7 @@ void check_for_thunk_offsets(FileIR_t* firp, virtual_offset_t thunk_base) continue; /* record that there's a possible target here */ - possible_target(thunk_base+leaoff); + possible_target(thunk_base+leaoff, 0, ibt_provenance_t::ibtp_text); } @@ -124,7 +125,6 @@ void check_func_for_thunk_offsets(Function_t *func, Instruction_t* thunk_insn, string reg, string offset) { - bool possible_target(virtual_offset_t p, virtual_offset_t at=0); virtual_offset_t thunk_base=thunk_insn->GetFallthrough()->GetAddress()->GetVirtualOffset()+ @@ -165,7 +165,7 @@ void check_func_for_thunk_offsets(Function_t *func, Instruction_t* thunk_insn, /* record that there's a possible target here */ // cout <<"Possible thunk target (add): call:"<<thunk_call_addr<<" offset:"<<thunk_call_offset // <<" addoff: " << addoff << " total: "<< (thunk_base+addoff)<<endl; - possible_target(thunk_base+addoff); + possible_target(thunk_base+addoff, 0, ibt_provenance_t::ibtp_text); } else if(string(d.Instruction.Mnemonic)==string("lea ")) { @@ -184,7 +184,7 @@ void check_func_for_thunk_offsets(Function_t *func, Instruction_t* thunk_insn, /* record that there's a possible target here */ // cout <<"Possible thunk target (lea): call:"<<thunk_call_addr<<" offset:"<<thunk_call_offset // <<" leaoff: " << leaoff << " total: "<< (thunk_base+leaoff)<<endl; - possible_target(thunk_base+leaoff); + possible_target(thunk_base+leaoff, 0, ibt_provenance_t::ibtp_text); } diff --git a/libIRDB/test/check_thunks.hpp b/libIRDB/test/check_thunks.hpp index d0d17b9861c72891871dc45ce37c1e298f8f24c5..ed7ce8d80dad51446ce7e8dfb1c59c0fdc364601 100644 --- a/libIRDB/test/check_thunks.hpp +++ b/libIRDB/test/check_thunks.hpp @@ -29,8 +29,5 @@ void find_all_module_starts(libIRDB::FileIR_t* firp, std::set<libIRDB::virtual_o void check_for_thunks(libIRDB::FileIR_t* firp, const std::set<libIRDB::virtual_offset_t>& thunk_bases); -bool possible_target(libIRDB::virtual_offset_t p, libIRDB::virtual_offset_t at=0); - - #endif diff --git a/libIRDB/test/fill_in_cfg.cpp b/libIRDB/test/fill_in_cfg.cpp index 354e876255b34b3b9407135668095f700f050a47..cd70a20bedba1cfe41f91be2b6dc47a88dcc4d88 100644 --- a/libIRDB/test/fill_in_cfg.cpp +++ b/libIRDB/test/fill_in_cfg.cpp @@ -469,7 +469,13 @@ void fill_in_scoops(FileIR_t *firp) endaddr->SetFileID(firp->GetFile()->GetBaseID()); firp->GetAddresses().insert(endaddr); - Type_t *chunk_type=NULL; /* FIXME -- need to figure out the type system for schoops, but NULL should remain valid */ + string the_contents; + the_contents.resize(elfiop.sections[secndx]->get_size()); + // deal with .bss segments that are 0 init'd. + if (elfiop.sections[secndx]->get_data()) + the_contents.assign(elfiop.sections[secndx]->get_data(),elfiop.sections[secndx]->get_size()); + + Type_t *chunk_type=NULL; /* FIXME -- need to figure out the type system for scoops, but NULL should remain valid */ /* permissions */ int permissions= @@ -477,7 +483,7 @@ void fill_in_scoops(FileIR_t *firp) ( elfiop.sections[secndx]->isWriteable() << 1 ) | ( elfiop.sections[secndx]->isExecutable() << 0 ) ; - DataScoop_t *newscoop=new DataScoop_t(BaseObj_t::NOT_IN_DATABASE, name, startaddr, endaddr, NULL, permissions); + DataScoop_t *newscoop=new DataScoop_t(BaseObj_t::NOT_IN_DATABASE, name, startaddr, endaddr, NULL, permissions, the_contents); assert(newscoop); firp->GetDataScoops().insert(newscoop); diff --git a/libIRDB/test/fill_in_indtargs.cpp b/libIRDB/test/fill_in_indtargs.cpp index ad367c4622e919c8c5b9863f334c493fa119df28..98be6f7be6823708a4b3f79ff0f7d5ec01c625b7 100644 --- a/libIRDB/test/fill_in_indtargs.cpp +++ b/libIRDB/test/fill_in_indtargs.cpp @@ -21,6 +21,7 @@ #include <libIRDB-core.hpp> #include <iostream> +#include <fstream> #include <limits> #include <stdlib.h> #include <string.h> @@ -36,54 +37,84 @@ #include <exeio.h> #include "beaengine/BeaEngine.h" #include "check_thunks.hpp" +#include "fill_in_indtargs.hpp" +#include "libMEDSAnnotation.h" using namespace libIRDB; using namespace std; using namespace EXEIO; +using namespace MEDS_Annotation; -int next_icfs_set_id = 0; -ICFS_t* hellnode_tgts = NULL; -//ICFS_t* indirect_calls = NULL; - +/* + * defines + */ #define arch_ptr_bytes() (firp->GetArchitectureBitWidth()/8) -int odd_target_count=0; -int bad_target_count=0; -int bad_fallthrough_count=0; +/* + * global variables + */ -bool is_possible_target(virtual_offset_t p, virtual_offset_t addr); +// +// record the ICFS for each branch, these can come from switch tables +// +map<Instruction_t*, ICFS_t> icfs_maps; +// the bounds of the executable sections in the pgm. set< pair <virtual_offset_t,virtual_offset_t> > bounds; -set<virtual_offset_t> targets; +// the set of (possible) targets we've found. +map<virtual_offset_t,ibt_provenance_t> targets; + +// the set of ranges represented by the eh_frame section, could be empty for non-elf files. set< pair< virtual_offset_t, virtual_offset_t> > ranges; -// a way to map an instruction to its set of predecessors. +// a way to map an instruction to its set of (direct) predecessors. map< Instruction_t* , InstructionSet_t > preds; // keep track of jmp tables -map< Instruction_t*, InstructionSet_t > jmptables; +map< Instruction_t*, ICFS_t > jmptables; + +// a map of virtual offset -> instruction for quick access. +map<virtual_offset_t,Instruction_t*> lookupInstructionMap; + + +/* + * Forward prototypes + */ -void check_for_PIC_switch_table32_type2(Instruction_t* insn, DISASM disasm, EXEIO::exeio* elfiop, const set<virtual_offset_t>& thunk_bases); -void check_for_PIC_switch_table32_type3(Instruction_t* insn, DISASM disasm, EXEIO::exeio* elfiop, const set<virtual_offset_t>& thunk_bases); -void check_for_PIC_switch_table32(FileIR_t*, Instruction_t* insn, DISASM disasm, EXEIO::exeio* elfiop, const set<virtual_offset_t>& thunk_bases); -void check_for_PIC_switch_table64(FileIR_t*, Instruction_t* insn, DISASM disasm, EXEIO::exeio* elfiop); -// get switch table structure, determine ib targets -// handle both 32 and 64 bit -void check_for_nonPIC_switch_table(FileIR_t*, Instruction_t* insn, DISASM disasm, EXEIO::exeio* elfiop); -void check_for_nonPIC_switch_table_pattern2(FileIR_t*, Instruction_t* insn, DISASM disasm, EXEIO::exeio* elfiop); +static void check_for_PIC_switch_table32_type2(Instruction_t* insn, DISASM disasm, EXEIO::exeio* elfiop, const set<virtual_offset_t>& thunk_bases); +static void check_for_PIC_switch_table32_type3(FileIR_t* firp, Instruction_t* insn, DISASM disasm, EXEIO::exeio* elfiop, const set<virtual_offset_t>& thunk_bases); +static void check_for_PIC_switch_table32(FileIR_t*, Instruction_t* insn, DISASM disasm, EXEIO::exeio* elfiop, const set<virtual_offset_t>& thunk_bases); +static void check_for_PIC_switch_table64(FileIR_t*, Instruction_t* insn, DISASM disasm, EXEIO::exeio* elfiop); +static void check_for_nonPIC_switch_table(FileIR_t*, Instruction_t* insn, DISASM disasm, EXEIO::exeio* elfiop); +static void check_for_nonPIC_switch_table_pattern2(FileIR_t*, Instruction_t* insn, DISASM disasm, EXEIO::exeio* elfiop); -void check_for_indirect_jmp(FileIR_t* const firp, Instruction_t* const insn); -void check_for_indirect_call(FileIR_t* const firp, Instruction_t* const insn); -void check_for_ret(FileIR_t* const firp, Instruction_t* const insn); +extern void read_ehframe(FileIR_t* firp, EXEIO::exeio* ); + + + +template <class T> T MAX(T a, T b) +{ + return a>b ? a : b; +} + + +/* + * range - record a new eh_frame range into the ranges global variable. + * this is called from read_ehframe. + */ void range(virtual_offset_t start, virtual_offset_t end) { pair<virtual_offset_t,virtual_offset_t> foo(start,end); ranges.insert(foo); } + +/* + * is_in_range - determine if an address is referenced by the eh_frame section + */ bool is_in_range(virtual_offset_t p) { for( @@ -101,8 +132,14 @@ bool is_in_range(virtual_offset_t p) return false; } +/* + * process_range - go through each instruction. if it's a call, check to see if the return address is in a range. if so, mark it as a possible target. + */ void process_ranges(FileIR_t* firp) { +#if 0 +Do we still want to do this? doesn't fix_calls read the eh_frame itself now and deal with this appropriately ? + for( set<Instruction_t*>::const_iterator it=firp->GetInstructions().begin(); it!=firp->GetInstructions().end(); @@ -111,14 +148,6 @@ void process_ranges(FileIR_t* firp) { Instruction_t *insn=*it; DISASM disasm; -#if 0 - memset(&disasm, 0, sizeof(DISASM)); - - disasm.Options = NasmSyntax + PrefixedNumeral; - disasm.Archi = 32; - disasm.EIP = (UIntPtr) insn->GetDataBits().c_str(); - disasm.VirtualAddr = insn->GetAddress()->GetVirtualOffset(); -#endif int instr_len = insn->Disassemble(disasm); assert(instr_len==insn->GetDataBits().size()); @@ -130,9 +159,10 @@ void process_ranges(FileIR_t* firp) possible_target(disasm.VirtualAddr+instr_len); } } +#endif } -bool possible_target(virtual_offset_t p, virtual_offset_t addr) +bool possible_target(virtual_offset_t p, virtual_offset_t from_addr, ibt_provenance_t prov) { /* if(p!=(int)p) { @@ -141,16 +171,16 @@ bool possible_target(virtual_offset_t p, virtual_offset_t addr) return false; } */ - if(is_possible_target(p,addr)) + if(is_possible_target(p,from_addr)) { if(getenv("IB_VERBOSE")!=NULL) { - if(addr!=0) - cout<<"Found IB target address 0x"<<std::hex<<p<<" at 0x"<<addr<<std::dec<<endl; + if(from_addr!=0) + cout<<"Found IB target address 0x"<<std::hex<<p<<" at 0x"<<from_addr<<std::dec<<endl; else cout<<"Found IB target address 0x"<<std::hex<<p<<" from unknown location"<<endl; } - targets.insert(p); + targets[p].add(prov); return true; } return false; @@ -207,15 +237,14 @@ void handle_argument(ARGTYPE *arg, Instruction_t* insn) assert(insn); assert(insn->GetAddress()); possible_target(arg->Memory.Displacement+insn->GetAddress()->GetVirtualOffset()+ - insn->GetDataBits().length()); + insn->GetDataBits().length(), ibt_provenance_t::ibtp_text); } else - possible_target(arg->Memory.Displacement); + possible_target(arg->Memory.Displacement, ibt_provenance_t::ibtp_text); } } -static map<virtual_offset_t,Instruction_t*> lookupInstructionMap; void lookupInstruction_init(FileIR_t *firp) { lookupInstructionMap.clear(); @@ -235,83 +264,6 @@ Instruction_t *lookupInstruction(FileIR_t *firp, virtual_offset_t virtual_offset return NULL; } -void mark_jmptables(FileIR_t *firp) -{ - map< Instruction_t*, InstructionSet_t >::iterator it; - for (it = jmptables.begin(); it != jmptables.end(); ++it) - { - Instruction_t* instr = it->first; - const InstructionSet_t &instruction_targets = it->second; - - // ignore if instr already marked complete. - // FIXME: assert that fill_in_indtarg analysis matches already complete analysis. - if(instr->GetIBTargets() && instr->GetIBTargets()->IsComplete()) - continue; - - assert(instruction_targets.size() > 0); - - ICFS_t* new_icfs = new ICFS_t(next_icfs_set_id++, ICFS_Analysis_Complete); - new_icfs->SetTargets(instruction_targets); - firp->GetAllICFS().insert(new_icfs); - - instr->SetIBTargets(new_icfs); - cout << "new icfs: jmp table[" << new_icfs->GetBaseID() << "]: size: " << new_icfs->size() << endl; - } -} - -bool allTargetsIndirectlyCalledFunctions(Instruction_t *instr) -{ - if (!instr->GetIBTargets()) - return false; - - ICFS_t *targets = instr->GetIBTargets(); - for(set<Instruction_t*>::const_iterator it=targets->begin(); - it!=targets->end(); ++it) - { - Instruction_t *insn = *it; - if (!insn->GetFunction()) - return false; - else if (!insn->GetFunction()->GetEntryPoint()) - return false; - /* NB: current API assumes only 1 entry point per function */ - else if (insn->GetFunction()->GetEntryPoint() != insn) - return false; - } - - return true; -} - -/* - pre: some ib targets may be incomplete - post: all icfs are either module_complete or complete -*/ -void patch_icfs(FileIR_t *firp) -{ - for (set<Instruction_t*>::const_iterator it=firp->GetInstructions().begin(); - it!=firp->GetInstructions().end(); - ++it) - { - Instruction_t* instr = *it; - - if(instr->GetIBTargets() && !( - instr->GetIBTargets()->IsComplete() || - instr->GetIBTargets()->IsModuleComplete()) ) - { - assert(instr->GetIBTargets()->IsIncomplete()); - -/* - if (allTargetsIndirectlyCalledFunctions(instr)) { - cerr << "ib targets for: " << instr->getDisassembly() << " reassigned to indirectcalls node" << endl; - instr->SetIBTargets(indirect_calls); - } else { -*/ - cerr << "incomplete ib targets for: " << instr->getDisassembly() << " reassigned to hellnode" << endl; - instr->SetIBTargets(hellnode_tgts); -// } - } - } -} - void mark_targets(FileIR_t *firp) { for( @@ -352,7 +304,8 @@ void get_instruction_targets(FileIR_t *firp, EXEIO::exeio* elfiop, const set<vir assert(instr_len==insn->GetDataBits().size()); check_for_PIC_switch_table32_type2(insn,disasm, elfiop, thunk_bases); - check_for_PIC_switch_table32_type3(insn,disasm, elfiop, thunk_bases); + check_for_PIC_switch_table32_type3(firp,insn,disasm, elfiop, thunk_bases); + if (firp->GetArchitectureBitWidth()==32) check_for_PIC_switch_table32(firp, insn,disasm, elfiop, thunk_bases); else if (firp->GetArchitectureBitWidth()==64) @@ -366,25 +319,16 @@ void get_instruction_targets(FileIR_t *firp, EXEIO::exeio* elfiop, const set<vir if (jmptables.count(insn) == 0) check_for_nonPIC_switch_table_pattern2(firp, insn,disasm, elfiop); - // assign hellnode type to indirect jmps that are not detected - // to be switch tables - if (jmptables.count(insn) == 0) - check_for_indirect_jmp(firp, insn); - - // assign special hellnode type to indirect calls - check_for_indirect_call(firp, insn); - check_for_ret(firp, insn); - /* other branches can't indicate an indirect branch target */ if(disasm.Instruction.BranchType) continue; /* otherwise, any immediate is a possible branch target */ - possible_target(disasm.Instruction.Immediat); - + possible_target(disasm.Instruction.Immediat,ibt_provenance_t::ibtp_text); handle_argument(&disasm.Argument1, insn); handle_argument(&disasm.Argument2, insn); handle_argument(&disasm.Argument3, insn); + handle_argument(&disasm.Argument4, insn); } } @@ -439,7 +383,29 @@ void infer_targets(FileIR_t *firp, section* shdr) p=*(int*)&data[i]; else p=*(virtual_offset_t*)&data[i]; // 64 or 32-bit depending on sizeof uintptr_t, may need porting for cross platform analysis. - possible_target(p, i+shdr->get_address()); + + + + ibt_provenance_t prov; + if(shdr->get_name()==".init_array") + prov=ibt_provenance_t::ibtp_initarray; + else if(shdr->get_name()==".fini_array") + prov=ibt_provenance_t::ibtp_finiarray; + else if(shdr->get_name()==".got.plt") + prov=ibt_provenance_t::ibtp_gotplt; + else if(shdr->get_name()==".got") + prov=ibt_provenance_t::ibtp_got; + else if(shdr->get_name()==".dynsym") + prov=ibt_provenance_t::ibtp_dynsym; + else if(shdr->get_name()==".symtab") + prov=ibt_provenance_t::ibtp_symtab; + else if( ! shdr->isWriteable()) + prov=ibt_provenance_t::ibtp_data; + else + prov=ibt_provenance_t::ibtp_rodata; + + possible_target(p, i+shdr->get_address(), prov); + } } @@ -449,12 +415,12 @@ void print_targets() { int j=0; for( - set<virtual_offset_t>::iterator it=targets.begin(); + map<virtual_offset_t,ibt_provenance_t>::iterator it=targets.begin(); it!=targets.end(); ++it, j++ ) { - virtual_offset_t target=*it; + virtual_offset_t target=it->first; cout<<std::hex<<target; if(j%10 == 0) @@ -466,77 +432,6 @@ void print_targets() cout<<endl; } -/* - * - * add_num_handle_fn_watches - - * - * This function is a quick and dirty way to ensure that - * certain function call watches are not interfered with by ILR - * This is done by marking the functions of interest as indirect targets - * so that they receive a spri rule of the form <original_addr> -> <newaddr> - * - * Current function list: - * fread, fread_unlocked, - * fwrite, fwrite_unlocked, - * strncpy, strncat, strncmp, strxfrm - * memcpy, memmove, memcmp, memchr, memrchr, memset - * wcsncpy, wcsncat, wcsncmp, wcsxfrm - * wmemcpy, wmemmove, wmemcmp, wmemchr, memset - * - */ -void add_num_handle_fn_watches(FileIR_t * firp) -{ - /* Loop over the set of functions */ - for( - set<Function_t*>::const_iterator it=firp->GetFunctions().begin(); - it!=firp->GetFunctions().end(); - ++it - ) - { - Function_t *func=*it; - char *funcname=(char *)func->GetName().c_str(); - if(!func->GetEntryPoint()) - continue; - virtual_offset_t the_offset=func->GetEntryPoint()->GetAddress()->GetVirtualOffset(); - - /* macro to facilitate the checking */ -#define CHECK_FN(fname) \ - if(strcmp(#fname, funcname)==0) \ - { \ - possible_target(the_offset); \ - } - - /* - * if one that we want to watch, - * mark it as a possible target - */ - CHECK_FN(fread); - CHECK_FN(_IO_fread); - CHECK_FN(fread_unlocked); - CHECK_FN(fwrite); - CHECK_FN(_IO_fwrite); - CHECK_FN(fwrite_unlocked); - CHECK_FN(strncpy); - CHECK_FN(strncmp); - CHECK_FN(strxfrm); - CHECK_FN(memcpy); - CHECK_FN(memmove); - CHECK_FN(memcmp); - CHECK_FN(memchr); - CHECK_FN(memrchr); - CHECK_FN(memset); - CHECK_FN(wcsncpy); - CHECK_FN(wcsxfrm); - CHECK_FN(wmemcpy); - CHECK_FN(wmemmove); - CHECK_FN(wmemcmp); - CHECK_FN(wmemchr); - CHECK_FN(wmemset); - - } - -} - set<Instruction_t*> find_in_function(string needle, Function_t *haystack) { DISASM disasm; @@ -620,8 +515,10 @@ bool backup_until(const char* insn_type_regex, Instruction_t *& prev, Instructio /* * check_for_PIC_switch_table32 - look for switch tables in PIC code for 32-bit code. */ -void check_for_PIC_switch_table32(FileIR_t *firp, Instruction_t* insn, DISASM disasm, EXEIO::exeio* elfiop, const set<virtual_offset_t> &thunk_bases) +static void check_for_PIC_switch_table32(FileIR_t *firp, Instruction_t* insn, DISASM disasm, EXEIO::exeio* elfiop, const set<virtual_offset_t> &thunk_bases) { + + ibt_provenance_t prov=ibt_provenance_t::ibtp_switchtable_type1; #if 0 /* here's typical code */ @@ -748,7 +645,7 @@ cout<<hex<<"Found switch dispatch at "<<I3->GetAddress()->GetVirtualOffset()<< " if(getenv("IB_VERBOSE")!=0) cout<<"Found switch table (thunk-relative) entry["<<dec<<i<<"], "<<hex<<thunk_base+table_entry<<endl; - if(!possible_target(thunk_base+table_entry,table_base+i*4)) + if(!possible_target(thunk_base+table_entry,table_base+i*4,prov)) break; Instruction_t *ibtarget = lookupInstruction(firp, thunk_base+table_entry); @@ -764,7 +661,9 @@ cout<<hex<<"Found switch dispatch at "<<I3->GetAddress()->GetVirtualOffset()<< " if (table_size == ibtargets.size() || table_size == (ibtargets.size()-1)) { cout << "pic32 (base pattern): valid switch table detected" << endl; - jmptables[I5] = ibtargets; + jmptables[I5].SetTargets(ibtargets); + jmptables[I5].SetAnalysisStatus(ICFS_Analysis_Complete); + } } else @@ -779,8 +678,9 @@ cout<<hex<<"Found switch dispatch at "<<I3->GetAddress()->GetVirtualOffset()<< " } -void check_for_PIC_switch_table32_type2(Instruction_t* insn, DISASM disasm, EXEIO::exeio* elfiop, const set<virtual_offset_t> &thunk_bases) +static void check_for_PIC_switch_table32_type2(Instruction_t* insn, DISASM disasm, EXEIO::exeio* elfiop, const set<virtual_offset_t> &thunk_bases) { + ibt_provenance_t prov=ibt_provenance_t::ibtp_switchtable_type2; #if 0 /* here's typical code */ @@ -854,7 +754,7 @@ cout<<hex<<"Found (type2) switch dispatch at "<<I3->GetAddress()->GetVirtualOffs const int32_t *table_entry_ptr=(const int32_t*)&(secdata[offset+i*4]); virtual_offset_t table_entry=*table_entry_ptr; -cout<<"Checking target base:" << std::hex << table_base+table_entry << ", " << table_base+i*4<<endl; +// cout<<"Checking target base:" << std::hex << table_base+table_entry << ", " << table_base+i*4<<endl; if(!is_possible_target(table_base+table_entry,table_base+i*4)) break; } @@ -874,7 +774,7 @@ cout<<"Checking target base:" << std::hex << table_base+table_entry << ", " << t if(getenv("IB_VERBOSE")!=0) cout<<"Found switch table (thunk-relative) entry["<<dec<<i<<"], "<<hex<<table_base+table_entry<<endl; - if(!possible_target(table_base+table_entry,table_base+i*4)) + if(!possible_target(table_base+table_entry,table_base+i*4,prov)) break; } } @@ -890,8 +790,9 @@ cout<<"Checking target base:" << std::hex << table_base+table_entry << ", " << t } -void check_for_PIC_switch_table32_type3(Instruction_t* insn, DISASM disasm, EXEIO::exeio* elfiop, const set<virtual_offset_t> &thunk_bases) +static void check_for_PIC_switch_table32_type3(FileIR_t* firp, Instruction_t* insn, DISASM disasm, EXEIO::exeio* elfiop, const set<virtual_offset_t> &thunk_bases) { + ibt_provenance_t prov=ibt_provenance_t::ibtp_switchtable_type3; #if 0 /* here's typical code */ @@ -917,11 +818,6 @@ void check_for_PIC_switch_table32_type3(Instruction_t* insn, DISASM disasm, EXEI return; - /* could be jmp [reg1+addr], jmp [reg2*k+addr], jmp [reg1+reg2*k+addr], or jmp [addr] */ - if(getenv("IB_VERBOSE")!=0) - cout<<hex<<"Found (type3) candidate for switch dispatch for '"<<disasm.CompleteInstr<<"' at "<<I5->GetAddress()->GetVirtualOffset()<< " with table_base="<<table_base<<endl; - - // find the section with the data table EXEIO::section *pSec=find_section(table_base,elfiop); if(!pSec) @@ -946,26 +842,43 @@ void check_for_PIC_switch_table32_type3(Instruction_t* insn, DISASM disasm, EXEI if(getenv("IB_VERBOSE")!=0) cout<<"Checking target base:" << std::hex << table_entry << ", " << table_base+i*4<<endl; - if(!is_possible_target(table_entry,table_base+i*4)) - { - cout<<hex<<"Found (type3) candidate for switch dispatch for '"<<disasm.CompleteInstr<<"' at "<<I5->GetAddress()->GetVirtualOffset()<< " with table_base="<<table_base<<endl; - cout<<"Found table_entry "<<hex<<table_entry<<" is not valid\n"<<endl; - return; - } /* if there's no base register and no index reg, */ /* then this jmp can't have more than one valid table entry */ if( disasm.Argument1.Memory.BaseRegister==0 && disasm.Argument1.Memory.IndexRegister==0 ) { /* but the table can have 1 valid entry. */ - possible_target(table_entry,table_base+0*4); - cout<<hex<<"Found constant-memory dispatch ("<<disasm.CompleteInstr<<"') at "<<I5->GetAddress()->GetVirtualOffset()<< endl; + if(pSec->get_name()==".got.plt") + { + + + Instruction_t *ibtarget = lookupInstruction(firp, table_entry); + if(ibtarget) + { + jmptables[I5].insert(ibtarget); + jmptables[I5].SetAnalysisStatus(ICFS_Analysis_Complete); + possible_target(table_entry,table_base+0*4, ibt_provenance_t::ibtp_gotplt); + cout<<hex<<"Found plt dispatch ("<<disasm.CompleteInstr<<"') at "<<I5->GetAddress()->GetVirtualOffset()<< endl; + return; + } + } + if(pSec->isWriteable()) + possible_target(table_entry,table_base+0*4, ibt_provenance_t::ibtp_data); + else + possible_target(table_entry,table_base+0*4, ibt_provenance_t::ibtp_rodata); + cout<<hex<<"Found constant-memory dispatch from non- .got.plt location ("<<disasm.CompleteInstr<<"') at "<<I5->GetAddress()->GetVirtualOffset()<< endl; return; } + if(!is_possible_target(table_entry,table_base+i*4)) + { + cout<<hex<<"Found (type3) candidate for switch dispatch for '"<<disasm.CompleteInstr<<"' at "<<I5->GetAddress()->GetVirtualOffset()<< " with table_base="<<table_base<<endl; + cout<<"Found table_entry "<<hex<<table_entry<<" is not valid\n"<<endl; + return; + } } - cout<<hex<<"Found (type3) switch dispatch at "<<I5->GetAddress()->GetVirtualOffset()<< " with table_base="<<table_base<<endl; + cout<<hex<<"Definitely found (type3) switch dispatch at "<<I5->GetAddress()->GetVirtualOffset()<< " with table_base="<<table_base<<endl; /* did we finish the loop or break out? */ if(i==3) @@ -983,7 +896,7 @@ void check_for_PIC_switch_table32_type3(Instruction_t* insn, DISASM disasm, EXEI if(getenv("IB_VERBOSE")!=0) cout<<"Found switch table (thunk-relative) entry["<<dec<<i<<"], "<<hex<<table_entry<<endl; - if(!possible_target(table_entry,table_base+i*4)) + if(!possible_target(table_entry,table_base+i*4,prov)) return; } } @@ -1002,8 +915,9 @@ void check_for_PIC_switch_table32_type3(Instruction_t* insn, DISASM disasm, EXEI * if so, see if we can trace back a few instructions to find a * the start of the table. */ -void check_for_PIC_switch_table64(FileIR_t* firp, Instruction_t* insn, DISASM disasm, EXEIO::exeio* elfiop) +static void check_for_PIC_switch_table64(FileIR_t* firp, Instruction_t* insn, DISASM disasm, EXEIO::exeio* elfiop) { + ibt_provenance_t prov=ibt_provenance_t::ibtp_switchtable_type4; /* here's the pattern we're looking for */ #if 0 I1: 0x000000000044425a <+218>: cmp DWORD PTR [rax+0x8],0xd // bounds checking code, 0xd cases. switch(i) has i stored in [rax+8] in this e.g. @@ -1218,7 +1132,7 @@ DN: 0x4824XX: .long 0x4824e0-LN const int *table_entry_ptr=(const int*)&(secdata[offset]); virtual_offset_t table_entry=*table_entry_ptr; - if(!possible_target(D1+table_entry)) + if(!possible_target(D1+table_entry, 0/* from addr unknown */,prov)) break; if(getenv("IB_VERBOSE")) @@ -1255,7 +1169,8 @@ DN: 0x4824XX: .long 0x4824e0-LN if (table_size == ibtargets.size() || table_size == (ibtargets.size()-1)) { cout << "pic64: valid switch table detected" << endl; - jmptables[I8] = ibtargets; + jmptables[I8].SetTargets(ibtargets); + jmptables[I8].SetAnalysisStatus(ICFS_Analysis_Complete); } } } @@ -1273,8 +1188,9 @@ DN: 0x4824XX: .long 0x4824e0-LN nb: handles both 32 and 64 bit */ -void check_for_nonPIC_switch_table_pattern2(FileIR_t* firp, Instruction_t* insn, DISASM disasm, EXEIO::exeio* elfiop) +static void check_for_nonPIC_switch_table_pattern2(FileIR_t* firp, Instruction_t* insn, DISASM disasm, EXEIO::exeio* elfiop) { + ibt_provenance_t prov=ibt_provenance_t::ibtp_switchtable_type5; Instruction_t *I1 = NULL; Instruction_t *IJ = insn; @@ -1316,7 +1232,7 @@ void check_for_nonPIC_switch_table_pattern2(FileIR_t* firp, Instruction_t* insn, cout<<"(nonPIC-pattern2): size of jmp table: "<< table_size << endl; // find the section with the data table - EXEIO::section *pSec=find_section(table_offset,elfiop); + EXEIO::section *pSec=find_section(table_offset,elfiop); if(!pSec) { return; @@ -1342,6 +1258,7 @@ void check_for_nonPIC_switch_table_pattern2(FileIR_t* firp, Instruction_t* insn, const virtual_offset_t *table_entry_ptr=(const virtual_offset_t*)&(secdata[offset+i*arch_ptr_bytes()]); virtual_offset_t table_entry=*table_entry_ptr; + possible_target(table_entry,0,prov); Instruction_t *ibtarget = lookupInstruction(firp, table_entry); if (!ibtarget) { @@ -1357,7 +1274,8 @@ void check_for_nonPIC_switch_table_pattern2(FileIR_t* firp, Instruction_t* insn, cout << "(non-PIC) valid switch table found" << endl; - jmptables[IJ] = ibtargets; + jmptables[IJ].SetTargets(ibtargets); + jmptables[IJ].SetAnalysisStatus(ICFS_Analysis_Complete); } @@ -1374,8 +1292,9 @@ void check_for_nonPIC_switch_table_pattern2(FileIR_t* firp, Instruction_t* insn, nb: handles both 32 and 64 bit */ -void check_for_nonPIC_switch_table(FileIR_t* firp, Instruction_t* insn, DISASM disasm, EXEIO::exeio* elfiop) +static void check_for_nonPIC_switch_table(FileIR_t* firp, Instruction_t* insn, DISASM disasm, EXEIO::exeio* elfiop) { + ibt_provenance_t prov=ibt_provenance_t::ibtp_switchtable_type6; Instruction_t *I1 = NULL; Instruction_t *I2 = NULL; Instruction_t *I4 = NULL; @@ -1475,6 +1394,7 @@ void check_for_nonPIC_switch_table(FileIR_t* firp, Instruction_t* insn, DISASM d else assert(0 && "Unknown arch size."); + possible_target(table_entry, 0 /* from addr unknown */, prov); Instruction_t *ibtarget = lookupInstruction(firp, table_entry); if (!ibtarget) { if(getenv("IB_VERBOSE")) @@ -1488,191 +1408,500 @@ void check_for_nonPIC_switch_table(FileIR_t* firp, Instruction_t* insn, DISASM d } cout << "(non-PIC) valid switch table found" << endl; - jmptables[IJ] = ibtargets; + jmptables[IJ].SetTargets(ibtargets); + jmptables[IJ].SetAnalysisStatus(ICFS_Analysis_Complete); } -template <class T> T MAX(T a, T b) +void calc_preds(FileIR_t* firp) { - return a>b ? a : b; + preds.clear(); + for( + set<Instruction_t*>::const_iterator it=firp->GetInstructions().begin(); + it!=firp->GetInstructions().end(); + ++it + ) + { + Instruction_t* insn=*it; + if(insn->GetTarget()); + preds[insn->GetTarget()].insert(insn); + if(insn->GetFallthrough()); + preds[insn->GetFallthrough()].insert(insn); + } } -void icfs_init(FileIR_t* firp) + +void handle_ib_annot(FileIR_t* firp,Instruction_t* insn, MEDS_IBAnnotation* p_ib_annotation) { - assert(firp); - db_id_t max_id=0; - for(ICFSSet_t::iterator it=firp->GetAllICFS().begin(); it!=firp->GetAllICFS().end(); ++it) + if(p_ib_annotation->IsComplete()) { - max_id=MAX<db_id_t>(max_id, (*it)->GetBaseID()); + jmptables[insn].SetAnalysisStatus(ICFS_Analysis_Complete); + } +} +void handle_ibt_annot(FileIR_t* firp,Instruction_t* insn, MEDS_IBTAnnotation* p_ibt_annotation) +{ +/* + * ibt_prov reason codes + * static const provtype_t ibtp_stars_ret=1<<11; + * static const provtype_t ibtp_stars_switch=1<<12; + * static const provtype_t ibtp_stars_data=1<<13; + * static const provtype_t ibtp_stars_unknown=1<<14; + * static const provtype_t ibtp_stars_addressed=1<<15; + * static const provtype_t ibtp_stars_unreachable=1<<15; + */ +/* meds annotations + * typedef enum { SWITCH, RET, DATA, UNREACHABLE, ADDRESSED, UNKNOWN } ibt_reason_code_t; + */ + switch(p_ibt_annotation->GetReason()) + { + case MEDS_IBTAnnotation::SWITCH: + { + possible_target((EXEIO::virtual_offset_t)p_ibt_annotation->getVirtualOffset().getOffset(), + 0,ibt_provenance_t::ibtp_stars_switch); + libIRDB::virtual_offset_t addr=(libIRDB::virtual_offset_t)p_ibt_annotation->GetXrefAddr(); + Instruction_t* fromib=lookupInstruction(firp, addr); + Instruction_t* ibt=lookupInstruction(firp, p_ibt_annotation->getVirtualOffset().getOffset()); + if(fromib && ibt) + { + jmptables[fromib].insert(ibt); + } + else + { + cout<<"Warning: cannot find source or dest for switch icfs."<<endl; + } + break; + } + case MEDS_IBTAnnotation::RET: + { + /* we are not going to mark return points as IBTs yet. that's fix-calls job */ + // possible_target((EXEIO::virtual_offset_t)p_ibt_annotation->getVirtualOffset().getOffset(), + // 0,ibt_provenance_t::ibtp_stars_ret); + + + libIRDB::virtual_offset_t fromaddr=(libIRDB::virtual_offset_t)p_ibt_annotation->GetXrefAddr(); + Instruction_t* fromib=lookupInstruction(firp, fromaddr); + libIRDB::virtual_offset_t toaddr=p_ibt_annotation->getVirtualOffset().getOffset(); + Instruction_t* ibt=lookupInstruction(firp, toaddr); + if(fromib && ibt) + { + jmptables[fromib].insert(ibt); + } + else + { + cout<<"Warning: cannot find source ("<<hex<<fromaddr<<") or dest ("<<hex<<toaddr<<") for ret icfs."<<endl; + } + break; + } + case MEDS_IBTAnnotation::DATA: + { + possible_target((EXEIO::virtual_offset_t)p_ibt_annotation->getVirtualOffset().getOffset(), + 0,ibt_provenance_t::ibtp_stars_data); + break; + } + case MEDS_IBTAnnotation::UNREACHABLE: + { + possible_target((EXEIO::virtual_offset_t)p_ibt_annotation->getVirtualOffset().getOffset(), + 0,ibt_provenance_t::ibtp_stars_unreachable); + break; + } + case MEDS_IBTAnnotation::ADDRESSED: + { + possible_target((EXEIO::virtual_offset_t)p_ibt_annotation->getVirtualOffset().getOffset(), + 0,ibt_provenance_t::ibtp_stars_addressed); + break; + } + case MEDS_IBTAnnotation::UNKNOWN: + { + possible_target((EXEIO::virtual_offset_t)p_ibt_annotation->getVirtualOffset().getOffset(), + 0,ibt_provenance_t::ibtp_stars_unknown); + break; + } + default: + { + assert(0); // unexpected ibt annotation. + } } - next_icfs_set_id = max_id+1; - cerr<<"Found max ICFS id=="<<max_id<<endl; - hellnode_tgts = new ICFS_t(next_icfs_set_id++, ICFS_Analysis_Module_Complete); -// indirect_calls = new ICFS_t(next_icfs_set_id++, ICFS_Analysis_Module_Complete); - firp->GetAllICFS().insert(hellnode_tgts); - cout << "new icfs: hellnode targets" << endl; - cout<<"icfs_init: size of ICFS set"<<firp->GetAllICFS().size()<<endl; -// firp->GetAllICFS().insert(indirect_calls); + + } -void icfs_set_indirect_calls(FileIR_t* const firp, ICFS_t* const targets) +void read_stars_xref_file(FileIR_t* firp) { - assert(firp && targets); - for( - FunctionSet_t::const_iterator it=firp->GetFunctions().begin(); - it!=firp->GetFunctions().end(); - ++it + + string BINARY_NAME="a.ncexe"; + string SHARED_OBJECTS_DIR="shared_objects"; + + string fileBasename = basename((char*)firp->GetFile()->GetURL().c_str()); + int ibs=0; + int ibts=0; + + MEDS_AnnotationParser annotationParser; + string annotationFilename; + // need to map filename to integer annotation file produced by STARS + // this should be retrieved from the IRDB but for now, we use files to store annotations + // convention from within the peasoup subdirectory is: + // a.ncexe.infoannot + // shared_objects/<shared-lib-filename>.infoannot + if (fileBasename==BINARY_NAME) + annotationFilename = BINARY_NAME; + else + annotationFilename = SHARED_OBJECTS_DIR + "/" + fileBasename ; + + annotationParser.parseFile(annotationFilename+".STARSxrefs"); + + for( + set<Instruction_t*>::const_iterator it=firp->GetInstructions().begin(); + it!=firp->GetInstructions().end(); + ++it ) - { - Function_t *func=*it; + { + Instruction_t* insn=*it; + virtual_offset_t irdb_vo = insn->GetAddress()->GetVirtualOffset(); + VirtualOffset vo(irdb_vo); - // no entry point, doesn't count - if(!func->GetEntryPoint()) - continue; + /* find it in the annotations */ + pair<MEDS_Annotations_t::iterator,MEDS_Annotations_t::iterator> ret; + ret = annotationParser.getAnnotations().equal_range(vo); + MEDS_IBAnnotation* p_ib_annotation; + MEDS_IBTAnnotation* p_ibt_annotation; - // if it's no an indirectly called function, it doesn't count - if(!func->GetEntryPoint()->GetIndirectBranchTargetAddress()) - continue; + /* for each annotation for this instruction */ + for (MEDS_Annotations_t::iterator ait = ret.first; ait != ret.second; ++ait) + { + /* is this annotation a funcSafe annotation? */ + p_ib_annotation=dynamic_cast<MEDS_IBAnnotation*>(ait->second); + if(p_ib_annotation && p_ib_annotation->isValid()) + { + ibs++; + handle_ib_annot(firp,insn,p_ib_annotation); + } + p_ibt_annotation=dynamic_cast<MEDS_IBTAnnotation*>(ait->second); + if(p_ibt_annotation && p_ibt_annotation->isValid()) + { + ibts++; + handle_ibt_annot(firp,insn,p_ibt_annotation); + } + } + } + + cout<<"Found "<<ibs<<" ibs and "<<ibts<<" ibts in the STARSxref file."<<endl; - targets->insert(func->GetEntryPoint()); +} + +void process_dynsym(FileIR_t* firp) +{ + FILE *dynsymfile = popen("$PS_OBJDUMP -T readeh_tmp_file.exe | $PS_GREP '^[0-9]\\+' | $PS_GREP -v UND | awk '{print $1;}' | $PS_GREP -v '^$'", "r"); + assert(dynsymfile); + virtual_offset_t target=0; + while( fscanf(dynsymfile, "%x", &target) != -1) + { + possible_target(target,0,ibt_provenance_t::ibtp_dynsym); } } -void icfs_set_hellnode_targets(FileIR_t* const firp, ICFS_t* const targets) + +ICFS_t* setup_hellnode(FileIR_t* firp, ibt_provenance_t allowed, ibt_provenance_t warn) { - assert(firp && targets); - for( - InstructionSet_t::const_iterator it=firp->GetInstructions().begin(); - it!=firp->GetInstructions().end(); ++it) + ICFS_t* hn=new ICFS_t(ICFS_Analysis_Module_Complete); + + for( + set<Instruction_t*>::const_iterator it=firp->GetInstructions().begin(); + it!=firp->GetInstructions().end(); + ++it + ) { Instruction_t* insn=*it; - if(insn->GetIndirectBranchTargetAddress()) + if(insn->GetIndirectBranchTargetAddress() == NULL) + continue; + + ibt_provenance_t prov=targets[insn->GetAddress()->GetVirtualOffset()]; + + if(prov.isPartiallySet(allowed)) + { + hn->insert(insn); + } + else if(prov.isPartiallySet(warn)) { - targets->insert(insn); + std::ofstream ofs ("warning.txt", std::ofstream::out); + ofs<<"Sanity issue: STARS marked something as an IBT that FII didn't find. Please debug."<<endl; } + + } -} + return hn; +} -void check_for_ret(FileIR_t* const firp, Instruction_t* const insn) +ICFS_t* setup_call_hellnode(FileIR_t* firp) { - assert(firp && insn); + ibt_provenance_t allowed= + ibt_provenance_t::ibtp_data | + ibt_provenance_t::ibtp_text | + ibt_provenance_t::ibtp_stars_addressed | + ibt_provenance_t::ibtp_unknown | + ibt_provenance_t::ibtp_stars_unreachable | + ibt_provenance_t::ibtp_rodata | + ibt_provenance_t::ibtp_initarray | // .init loops through the init_array, and calls them + ibt_provenance_t::ibtp_finiarray | // .fini loops through the fini_array, and calls them + ibt_provenance_t::ibtp_user; - DISASM d; - insn->Disassemble(d); + ibt_provenance_t warn= + ibt_provenance_t::ibtp_stars_unknown | // couldn't parse stars annotation's reason code + ibt_provenance_t::ibtp_got; // warn if we found something in zero-init'd got. - if(strstr(d.Instruction.Mnemonic, "ret")==NULL) - return; - // already analysed by ida. - if(insn->GetIBTargets() && insn->GetIBTargets()->IsComplete()) - return; +// would like to sanity check better. +// ibt_provenance_t::ibtp_stars_data | // warn if stars reports it's in data, but !allowed. + + /* + * these aren't good enough reasons for a call instruction to transfer somewhere. + * ibt_provenance_t::ibtp_eh_frame // only libc should xfer. + * ibt_provenance_t::ibtp_gotplt // only an analyzed jump should xfer. + * ibt_provenance_t::ibtp_entrypoint // only ld.so or kernel should xfer. + * ibt_provenance_t::ibtp_texttoprintf // shouldn't xfer if addr passed to printf. + * ibt_provenance_t::ibtp_dynsym // symbol resolved to other module, this module should xfer directly. + * ibt_provenance_t::ibtp_symtab // user info only. + * ibt_provenance_t::ibtp_stars_ret // stars says a return goes here, calls shouldn't. + * ibt_provenance_t::ibtp_stars_switch // stars says switch target. + * ibt_provenance_t::ibtp_switchtable_type1 // FII switch targets. + * ibt_provenance_t::ibtp_switchtable_type2 + * ibt_provenance_t::ibtp_switchtable_type3 + * ibt_provenance_t::ibtp_switchtable_type4 + * ibt_provenance_t::ibtp_switchtable_type5 + * ibt_provenance_t::ibtp_switchtable_type6 + * ibt_provenance_t::ibtp_switchtable_type7 + * ibt_provenance_t::ibtp_switchtable_type8 + * ibt_provenance_t::ibtp_switchtable_type9 + * ibt_provenance_t::ibtp_switchtable_type10 + */ + + return setup_hellnode(firp,allowed,warn); - insn->SetIBTargets(hellnode_tgts); } -// find any indirect jumps in the pgm and mark them as having a hell node ICFS if they don't -// already have a complete ICFS. -void check_for_indirect_jmp(FileIR_t* const firp, Instruction_t* const insn) +ICFS_t* setup_jmp_hellnode(FileIR_t* firp) { - assert(firp && insn); + ibt_provenance_t allowed= + ibt_provenance_t::ibtp_data | + ibt_provenance_t::ibtp_text | + ibt_provenance_t::ibtp_stars_addressed | + ibt_provenance_t::ibtp_unknown | + ibt_provenance_t::ibtp_stars_unreachable | + ibt_provenance_t::ibtp_rodata | + ibt_provenance_t::ibtp_gotplt | + ibt_provenance_t::ibtp_user; + + ibt_provenance_t warn= + ibt_provenance_t::ibtp_stars_unknown | // couldn't parse stars annotation's reason code + ibt_provenance_t::ibtp_got; // warn if we found something in zero-init'd got. + +// ibt_provenance_t::ibtp_stars_data | // warn if stars reports it's in data, but !allowed. - // already analysed by ida. - if(insn->GetIBTargets() && insn->GetIBTargets()->IsComplete()) - return; + /* + * these aren't good enough reasons for a jmp instruction to transfer somewhere. + * ibt_provenance_t::ibtp_eh_frame // only libc should xfer. + * ibt_provenance_t::ibtp_initarray // only ld.so should xfer. + * ibt_provenance_t::ibtp_finiarray // only ld.so should xfer. + * ibt_provenance_t::ibtp_entrypoint // only ld.so or kernel should xfer. + * ibt_provenance_t::ibtp_texttoprintf // shouldn't xfer if addr passed to printf. + * ibt_provenance_t::ibtp_dynsym // symbol resolved to other module, this module should xfer directly. + * ibt_provenance_t::ibtp_symtab // user info only. + * ibt_provenance_t::ibtp_stars_ret // stars says a return goes here, calls shouldn't. + * ibt_provenance_t::ibtp_stars_switch // stars says switch target. + * ibt_provenance_t::ibtp_switchtable_type1 // FII switch targets. + * ibt_provenance_t::ibtp_switchtable_type2 + * ibt_provenance_t::ibtp_switchtable_type3 + * ibt_provenance_t::ibtp_switchtable_type4 + * ibt_provenance_t::ibtp_switchtable_type5 + * ibt_provenance_t::ibtp_switchtable_type6 + * ibt_provenance_t::ibtp_switchtable_type7 + * ibt_provenance_t::ibtp_switchtable_type8 + * ibt_provenance_t::ibtp_switchtable_type9 + * ibt_provenance_t::ibtp_switchtable_type10 + */ - DISASM d; - insn->Disassemble(d); + return setup_hellnode(firp,allowed,warn); - if(strstr(d.Instruction.Mnemonic, "jmp")==NULL) - return; +} - if(d.Argument1.ArgType®ISTER_TYPE) - { - insn->SetIBTargets(hellnode_tgts); - } - else if(d.Argument1.ArgType&MEMORY_TYPE) + +ICFS_t* setup_ret_hellnode(FileIR_t* firp) +{ + ibt_provenance_t allowed= + ibt_provenance_t::ibtp_stars_ret | // stars says a return goes here, and this return isn't analyzeable. + ibt_provenance_t::ibtp_unknown | + ibt_provenance_t::ibtp_stars_unreachable | + ibt_provenance_t::ibtp_user; + + ibt_provenance_t warn= + ibt_provenance_t::ibtp_stars_unknown | // couldn't parse stars annotation's reason code + ibt_provenance_t::ibtp_got; // warn if we found something in zero-init'd got. + + + +// would like to sanity check better. +// ibt_provenance_t::ibtp_stars_data | // warn if stars reports it's in data, but !allowed. + + + /* + * these aren't good enough reasons for a ret instruction to transfer somewhere. + * ibt_provenance_t::ibtp_eh_frame // only libc should xfer. + * ibt_provenance_t::ibtp_initarray // only ld.so should xfer. + * ibt_provenance_t::ibtp_finiarray // only ld.so should xfer. + * ibt_provenance_t::ibtp_entrypoint // only ld.so or kernel should xfer. + * ibt_provenance_t::ibtp_texttoprintf // shouldn't xfer if addr passed to printf. + * ibt_provenance_t::ibtp_dynsym // symbol resolved to other module, this module should xfer directly. + * ibt_provenance_t::ibtp_symtab // user info only. + * ibt_provenance_t::ibtp_stars_ret // stars says a return goes here, calls shouldn't. + * ibt_provenance_t::ibtp_stars_switch // stars says switch target. + * ibt_provenance_t::ibtp_switchtable_type1 // FII switch targets. + * ibt_provenance_t::ibtp_switchtable_type2 + * ibt_provenance_t::ibtp_switchtable_type3 + * ibt_provenance_t::ibtp_switchtable_type4 + * ibt_provenance_t::ibtp_switchtable_type5 + * ibt_provenance_t::ibtp_switchtable_type6 + * ibt_provenance_t::ibtp_switchtable_type7 + * ibt_provenance_t::ibtp_switchtable_type8 + * ibt_provenance_t::ibtp_switchtable_type9 + * ibt_provenance_t::ibtp_switchtable_type10 + * ibt_provenance_t::ibtp_data // returns likely shouldn't be used to jump to data or addressed text chunks. may need to relax later. + * ibt_provenance_t::ibtp_text + * ibt_provenance_t::ibtp_stars_addressed + * ibt_provenance_t::ibtp_rodata + * ibt_provenance_t::ibtp_gotplt + */ + + ICFS_t* ret_hell_node=setup_hellnode(firp,allowed,warn); + + + // add unmarked return points. fix_calls will deal with whether they need to be pinned or not later. + for( + InstructionSet_t::const_iterator it=firp->GetInstructions().begin(); + it!=firp->GetInstructions().end(); + ++it + ) { - insn->SetIBTargets(hellnode_tgts); + Instruction_t* insn=*it; + DISASM d; + insn->Disassemble(d); + if(string("call ")==d.Instruction.Mnemonic && insn->GetFallthrough()) + { + ret_hell_node->insert(insn->GetFallthrough()); + } } + + return ret_hell_node; + } -void check_for_indirect_call(FileIR_t* const firp, Instruction_t* const insn) +void print_icfs(FileIR_t* firp) { - assert(firp && insn); - - DISASM d; - insn->Disassemble(d); + cout<<"Printing ICFS sets."<<endl; + for( + InstructionSet_t::const_iterator it=firp->GetInstructions().begin(); + it!=firp->GetInstructions().end(); + ++it + ) + { + Instruction_t* insn=*it; + ICFS_t *icfs=insn->GetIBTargets(); - if (d.Instruction.BranchType!=CallType) - return; - - if(d.Argument1.ArgType&CONSTANT_TYPE) - return; + // not an IB + if(!icfs) + continue; - // already analysed by ida. - if(insn->GetIBTargets() && insn->GetIBTargets()->IsComplete()) - return; + cout<<hex<<insn->GetAddress()->GetVirtualOffset()<<" -> "; - insn->SetIBTargets(hellnode_tgts); -// insn->SetIBTargets(indirect_calls); + for(ICFS_t::const_iterator icfsit=icfs->begin(); icfsit!=icfs->end(); ++icfsit) + { + Instruction_t* target=*icfsit; + cout<<hex<<target->GetAddress()->GetVirtualOffset()<<" "; + } + cout<<endl; + } } - -void calc_preds(FileIR_t* firp) +void setup_icfs(FileIR_t* firp) { - preds.clear(); + // setup calls, jmps and ret hell nodes. + ICFS_t *call_hell = setup_call_hellnode(firp); + firp->GetAllICFS().insert(call_hell); + + ICFS_t *jmp_hell = setup_jmp_hellnode(firp); + firp->GetAllICFS().insert(jmp_hell); + + ICFS_t *ret_hell = setup_ret_hellnode(firp); + firp->GetAllICFS().insert(ret_hell); + + + // for each instruction for( set<Instruction_t*>::const_iterator it=firp->GetInstructions().begin(); it!=firp->GetInstructions().end(); ++it ) - { - Instruction_t* insn=*it; - if(insn->GetTarget()); - preds[insn->GetTarget()].insert(insn); - if(insn->GetFallthrough()); - preds[insn->GetFallthrough()].insert(insn); - } -} + { + // if we already got it complete (via stars or FII) + Instruction_t* insn=*it; + if(jmptables[insn].IsComplete()) + { +cout<<"jump table complete for "<<hex<<insn->GetAddress()->GetVirtualOffset()<<endl; + // get the strcuture into the IRDB + ICFS_t* nn=new ICFS_t(jmptables[insn]); + firp->GetAllICFS().insert(nn); + insn->SetIBTargets(nn); -void fill_in_indtargs(FileIR_t* firp, exeio* elfiop, std::list<virtual_offset_t> forced_pins) -{ - if(getenv("IB_VERBOSE")!=0) - for( - set<Instruction_t*>::const_iterator it=firp->GetInstructions().begin(); - it!=firp->GetInstructions().end(); - ++it - ) + // that's all we need to do + continue; + } + + // disassemble the instruction, and figure out which type of hell node we need. + DISASM d; + insn->Disassemble(d); + if(string("ret ")==d.Instruction.Mnemonic) { - Instruction_t* insn=*it; - if(insn->GetIndirectBranchTargetAddress()) - cout<<"Insn at "<<insn->GetAddress()->GetVirtualOffset()<<" already has ibt "<< - insn->GetIndirectBranchTargetAddress()->GetVirtualOffset()<<endl; - +cout<<"using ret hell node for "<<hex<<insn->GetAddress()->GetVirtualOffset()<<endl; + insn->SetIBTargets(ret_hell); } + else if ( (string("call ")==d.Instruction.Mnemonic) && ((d.Argument1.ArgType&0xffff0000&CONSTANT_TYPE)!=CONSTANT_TYPE)) + { +cout<<"using call hell node for "<<hex<<insn->GetAddress()->GetVirtualOffset()<<endl; + // indirect call + insn->SetIBTargets(call_hell); + } + else if ( (string("jmp ")==d.Instruction.Mnemonic) && ((d.Argument1.ArgType&0xffff0000&CONSTANT_TYPE)!=CONSTANT_TYPE)) + { +cout<<"using jmp hell node for "<<hex<<insn->GetAddress()->GetVirtualOffset()<<endl; + // indirect jmp + insn->SetIBTargets(jmp_hell); + } + + } + if(getenv("IB_VERBOSE")!=NULL) + print_icfs(firp); +} + + + + +/* + * fill_in_indtargs - main driver routine for + */ +void fill_in_indtargs(FileIR_t* firp, exeio* elfiop, std::list<virtual_offset_t> forced_pins) +{ set<virtual_offset_t> thunk_bases; find_all_module_starts(firp,thunk_bases); - // reset global vars bounds.clear(); ranges.clear(); targets.clear(); + jmptables.clear(); + lookupInstruction_init(firp); calc_preds(firp); -#if 0 -/* info gotten from EXEIO class now. */ - ::Elf64_Off sec_hdr_off, sec_off; - ::Elf_Half secnum, strndx, secndx; - ::Elf_Word secsize; - - /* Read ELF header */ - virtual_offset_t sec_hdr_off = elfiop->get_sections_offset(); - virtual_offset_t strndx = elfiop->get_section_name_str_index(); -#endif int secnum = elfiop->sections.size(); int secndx=0; @@ -1685,100 +1914,46 @@ void fill_in_indtargs(FileIR_t* firp, exeio* elfiop, std::list<virtual_offset_t> infer_targets(firp, elfiop->sections[secndx]); + /* should move to separate function */ std::list<virtual_offset_t>::iterator forced_iterator = forced_pins.begin(); for (; forced_iterator != forced_pins.end(); forced_iterator++) { - possible_target(*forced_iterator); + possible_target(*forced_iterator, 0, ibt_provenance_t::ibtp_user); } - cout<<"========================================="<<endl; - cout<<"Targets from data sections (and forces) are: " << endl; - cout<<"# ATTRIBUTE total_indirect_targets_pass1="<<std::dec<<targets.size()<<endl; - print_targets(); - cout<<"========================================="<<endl; - /* look through the instructions in the program for targets */ get_instruction_targets(firp, elfiop, thunk_bases); /* mark the entry point as a target */ - possible_target(elfiop->get_entry()); - - - cout<<"========================================="<<endl; - cout<<"All targets from data+instruction sections are: " << endl; - cout<<"# ATTRIBUTE total_indirect_targets_pass2="<<std::dec<<targets.size()<<endl; - print_targets(); - cout<<"========================================="<<endl; + possible_target(elfiop->get_entry(),0,ibt_provenance_t::ibtp_entrypoint); /* Read the exception handler frame so that those indirect branches are accounted for */ - void read_ehframe(FileIR_t* firp, EXEIO::exeio* ); + /* then now process the ranges and mark IBTs as necessarthat have exception handling */ read_ehframe(firp, elfiop); - - cout<<"========================================="<<endl; - cout<<"All targets from data+instruction+eh_header sections are: " << endl; - cout<<"# ATTRIBUTE total_indirect_targets_pass3="<<std::dec<<targets.size()<<endl; - print_targets(); - cout<<"========================================="<<endl; - - - /* now process the ranges that have exception handling */ process_ranges(firp); - cout<<"========================================="<<endl; - cout<<"All targets from data+instruction+eh_header sections+eh_header_ranges are: " << endl; - cout<<"# ATTRIBUTE total_indirect_targets_pass4="<<std::dec<<targets.size()<<endl; - print_targets(); - cout<<"========================================="<<endl; - - /* now process the ranges that have exception handling */ + + /* now, find the .GOT addr and process any pc-rel things for x86-32 ibts. */ check_for_thunks(firp, thunk_bases); - cout<<"========================================="<<endl; - cout<<"# ATTRIBUTE total_indirect_targets_pass5="<<std::dec<<targets.size()<<endl; - print_targets(); - cout<<"========================================="<<endl; - /* Add functions containing unsigned int params to the list */ - add_num_handle_fn_watches(firp); - /* now process the ranges that have exception handling */ - cout<<"========================================="<<endl; - cout<<"# ATTRIBUTE total_indirect_targets_pass6="<<std::dec<<targets.size()<<endl; - print_targets(); - cout<<"========================================="<<endl; + /* now deal with dynsym pins */ + process_dynsym(firp); + /* import info from stars */ + read_stars_xref_file(firp); - //FILE* dynsymfile = popen( "$PS_READELF --dyn-syms readeh_tmp_file.exe |grep 'FUNC GLOBAL DEFAULT'" - // "|grep -v 'FUNC GLOBAL DEFAULT UND' |sed 's/.*: *//'|cut -f1 -d' '", "r"); - FILE *dynsymfile = popen("$PS_OBJDUMP -T readeh_tmp_file.exe | $PS_GREP '^[0-9]\\+' | $PS_GREP -v UND | awk '{print $1;}' | $PS_GREP -v '^$'", "r"); - assert(dynsymfile); - virtual_offset_t target=0; - while( fscanf(dynsymfile, "%x", &target) != -1) - { - possible_target(target); - } cout<<"========================================="<<endl; - cout<<"# ATTRIBUTE total_indirect_targets_pass7="<<std::dec<<targets.size()<<endl; + cout<<"# ATTRIBUTE total_indirect_targets="<<std::dec<<targets.size()<<endl; print_targets(); cout<<"========================================="<<endl; - - /* set the IR to have some instructions marked as IB targets */ + /* set the IR to have some instructions marked as IB targets, and deal with the ICFS */ mark_targets(firp); -// icfs_set_indirect_calls(firp, indirect_calls); - icfs_set_hellnode_targets(firp, hellnode_tgts); - - mark_jmptables(firp); - - patch_icfs(firp); - - for(ICFSSet_t::const_iterator it=firp->GetAllICFS().begin(); - it != firp->GetAllICFS().end(); - ++it) - { - ICFS_t *icfs = *it; - cout << dec << "icfs set id: " << icfs->GetBaseID() << " #ibtargets: " << icfs->size() << " analysis status: " << icfs->GetAnalysisStatus() << endl; - } + // try to setup an ICFS for every IB. + setup_icfs(firp); } + main(int argc, char* argv[]) { int argc_iter = 2; @@ -1790,6 +1965,17 @@ main(int argc, char* argv[]) cerr<<"Usage: fill_in_indtargs <id> [addr,...]"<<endl; exit(-1); } + /* parse argumnets */ + for (argc_iter = 2; argc_iter < argc; argc_iter++) + { + char *end_ptr; + virtual_offset_t offset = strtol(argv[argc_iter], &end_ptr, 0); + if (*end_ptr == '\0') + { + cout << "force pinning: 0x" << std::hex << offset << endl; + forced_pins.push_back(offset); + } + } VariantID_t *pidp=NULL; FileIR_t * firp=NULL; @@ -1802,16 +1988,6 @@ main(int argc, char* argv[]) pidp=new VariantID_t(atoi(argv[1])); - for (argc_iter = 2; argc_iter < argc; argc_iter++) - { - char *end_ptr; - virtual_offset_t offset = strtol(argv[argc_iter], &end_ptr, 0); - if (*end_ptr == '\0') - { - cout << "force pinning: 0x" << std::hex << offset << endl; - forced_pins.push_back(offset); - } - } assert(pidp->IsRegistered()==true); @@ -1829,24 +2005,14 @@ main(int argc, char* argv[]) // read the db firp=new FileIR_t(*pidp, this_file); - lookupInstruction_init(firp); - icfs_init(firp); + // read the executeable file int elfoid=firp->GetFile()->GetELFOID(); pqxx::largeobject lo(elfoid); lo.to_file(pqxx_interface.GetTransaction(),"readeh_tmp_file.exe"); - - jmptables.clear(); - EXEIO::exeio* elfiop=new EXEIO::exeio; elfiop->load((const char*)"readeh_tmp_file.exe"); - - - EXEIO::dump::header(cout,*elfiop); - EXEIO::dump::section_headers(cout,*elfiop); - - // find all indirect branch targets fill_in_indtargs(firp, elfiop, forced_pins); @@ -1854,8 +2020,6 @@ main(int argc, char* argv[]) firp->WriteToDB(); delete firp; -// delete indirect_calls; - delete hellnode_tgts; } pqxx_interface.Commit(); diff --git a/libIRDB/test/fill_in_indtargs.hpp b/libIRDB/test/fill_in_indtargs.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7fea34de9e19ac18fa66a77f7c8e764542b0edcb --- /dev/null +++ b/libIRDB/test/fill_in_indtargs.hpp @@ -0,0 +1,118 @@ + +/* + * Copyright (c) 2014 - Zephyr Software LLC + * + * This file may be used and modified for non-commercial purposes as long as + * all copyright, permission, and nonwarranty notices are preserved. + * Redistribution is prohibited without prior written consent from Zephyr + * Software. + * + * Please contact the authors for restrictions applying to commercial use. + * + * THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * + * Author: Zephyr Software + * e-mail: jwd@zephyr-software.com + * URL : http://www.zephyr-software.com/ + * + */ + +#include <libIRDB-core.hpp> +#include <iostream> +#include <limits> +#include <stdlib.h> +#include <string.h> +#include <map> +#include <assert.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <regex.h> +#include <ctype.h> +#include <list> +#include <stdio.h> + +#include <exeio.h> +#include "beaengine/BeaEngine.h" +#include "check_thunks.hpp" + +using namespace libIRDB; +using namespace std; +using namespace EXEIO; + +/* + * defines + */ +#define arch_ptr_bytes() (firp->GetArchitectureBitWidth()/8) + +/* + * global variables + */ + + +// +// data structures +// + +class ibt_provenance_t +{ + typedef unsigned int provtype_t; + public: + + ibt_provenance_t() : value(0) { }; + + ibt_provenance_t(const provtype_t t) : value(t) { }; + + static const provtype_t ibtp_eh_frame=1<<0; + static const provtype_t ibtp_user=1<<1; // requested by user + static const provtype_t ibtp_gotplt=1<<2; + static const provtype_t ibtp_initarray=1<<3; + static const provtype_t ibtp_finiarray=1<<4; + static const provtype_t ibtp_entrypoint=1<<5; + static const provtype_t ibtp_data=1<<6; + static const provtype_t ibtp_text=1<<7; + static const provtype_t ibtp_texttoprintf=1<<8; + static const provtype_t ibtp_dynsym=1<<9; + static const provtype_t ibtp_symtab=1<<10; + static const provtype_t ibtp_stars_ret=1<<11; + static const provtype_t ibtp_stars_switch=1<<12; + static const provtype_t ibtp_stars_data=1<<13; + static const provtype_t ibtp_stars_unknown=1<<14; + static const provtype_t ibtp_stars_addressed=1<<15; + static const provtype_t ibtp_stars_unreachable=1<<16; + static const provtype_t ibtp_switchtable_type1=1<<17; + static const provtype_t ibtp_switchtable_type2=1<<18; + static const provtype_t ibtp_switchtable_type3=1<<19; + static const provtype_t ibtp_switchtable_type4=1<<20; + static const provtype_t ibtp_switchtable_type5=1<<21; + static const provtype_t ibtp_switchtable_type6=1<<22; + static const provtype_t ibtp_switchtable_type7=1<<23; + static const provtype_t ibtp_switchtable_type8=1<<24; + static const provtype_t ibtp_switchtable_type9=1<<25; + static const provtype_t ibtp_switchtable_type10=1<<26; + static const provtype_t ibtp_rodata=1<<27; + static const provtype_t ibtp_unknown=1<<28; // completely unknown + static const provtype_t ibtp_got=1<<29; // got is 0 init'd, shouldn't see this one. + + void add(provtype_t t) { value |=t; } + void add(ibt_provenance_t t) { value |=t.value; } + bool isFullySet(provtype_t t) { return (value&t) == t; } + bool isFullySet(ibt_provenance_t t) { return (value&t.value) == t.value; } + bool isPartiallySet(provtype_t t) { return (value&t) != 0; } + bool isPartiallySet(ibt_provenance_t t) { return (value&t.value) != 0; } + + private: + + provtype_t value; + +}; + + +/* + * Forward prototypes + */ + +bool is_possible_target(virtual_offset_t p, virtual_offset_t addr); +bool possible_target(virtual_offset_t p, virtual_offset_t from_addr, ibt_provenance_t prov=ibt_provenance_t::ibtp_unknown); + diff --git a/libIRDB/test/fix_calls.cpp b/libIRDB/test/fix_calls.cpp index f1116d2f4a3edd151f17adbbaf444eb3cded5132..158fbe86693df5f05f08c7d43e4f09751a7ecfac 100644 --- a/libIRDB/test/fix_calls.cpp +++ b/libIRDB/test/fix_calls.cpp @@ -32,6 +32,7 @@ #include <set> #include <exeio.h> +#include "fill_in_indtargs.hpp" using namespace libIRDB; @@ -936,7 +937,7 @@ void range(virtual_offset_t a, virtual_offset_t b) eh_frame_ranges.insert(Range_t(a,b)); } -bool possible_target(uintptr_t p, uintptr_t at=0) +bool possible_target(uintptr_t p, uintptr_t at, ibt_provenance_t prov) { // used for LDSA } diff --git a/libIRDB/test/read_ehframe.cpp b/libIRDB/test/read_ehframe.cpp index b5d993a5ba4f3aea4cfb53255a7d73bd673d9173..35b15bcb71ee103e8d75c489b17f23a2a53c2dd7 100644 --- a/libIRDB/test/read_ehframe.cpp +++ b/libIRDB/test/read_ehframe.cpp @@ -38,6 +38,9 @@ int ptrsize=0; #include "elfio/elfio_dump.hpp" +#include "fill_in_indtargs.hpp" + + using namespace libIRDB; using namespace std; @@ -512,9 +515,9 @@ classify_object_over_fdes (struct object *ob, fde *this_fde) for (; ! last_fde (ob, this_fde); this_fde = next_fde (this_fde)) { - printf("analysis addr=%p\n", this_fde); - printf("pgm addr=%p\n", (uintptr_t)this_fde+(uintptr_t)eh_offset); - printf("offset=%p\n", (uintptr_t)this_fde+(uintptr_t)eh_offset-(uintptr_t)eh_frame_addr); +// printf("analysis addr=%p\n", this_fde); +// printf("pgm addr=%p\n", (uintptr_t)this_fde+(uintptr_t)eh_offset); +// printf("offset=%p\n", (uintptr_t)this_fde+(uintptr_t)eh_offset-(uintptr_t)eh_frame_addr); struct dwarf_cie *this_cie; _Unwind_Ptr mask, pc_begin; @@ -599,13 +602,12 @@ void print_lsda_handlers(lsda_header_info* info, unsigned char* p) <<"cs_action: "<< cs_action << endl; #ifndef TEST - bool possible_target(uintptr_t p, uintptr_t at=0); /* the landing pad is a possible target if an exception is thrown */ - possible_target(cs_lp+info->Start); + possible_target(cs_lp+info->Start, 0, ibt_provenance_t::ibtp_eh_frame); /* and the return address is a possible oddity if it's used for walking the stack */ - possible_target(cs_len+cs_start+info->Start); + possible_target(cs_len+cs_start+info->Start, 0, ibt_provenance_t::ibtp_eh_frame); #endif } diff --git a/libMEDSannotation/include/MEDS_IBAnnotation.hpp b/libMEDSannotation/include/MEDS_IBAnnotation.hpp new file mode 100644 index 0000000000000000000000000000000000000000..727bb696a9070dcbcba74fbb008dc63705aae32b --- /dev/null +++ b/libMEDSannotation/include/MEDS_IBAnnotation.hpp @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2014 - Zephyr Software LLC + * + * This file may be used and modified for non-commercial purposes as long as + * all copyright, permission, and nonwarranty notices are preserved. + * Redistribution is prohibited without prior written consent from Zephyr + * Software. + * + * Please contact the authors for restrictions applying to commercial use. + * + * THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * + * Author: Zephyr Software + * e-mail: jwd@zephyr-software.com + * URL : http://www.zephyr-software.com/ + * + */ + +#ifndef _MEDS_IBANNOTATION_H_ +#define _MEDS_IBANNOTATION_H_ + +#include <string> +#include "VirtualOffset.hpp" +#include "MEDS_AnnotationBase.hpp" + +namespace MEDS_Annotation +{ + +using namespace std; +using namespace MEDS_Annotation; + +// +// Class to handle one MEDS shadow annotation +// +class MEDS_IBAnnotation : public MEDS_AnnotationBase +{ + public: + typedef enum { SWITCH, RET, UNKNOWN } ib_type_t; + + MEDS_IBAnnotation( const string& p_rawLine) : the_type(UNKNOWN), count(0), complete(false) + { + setInvalid(); + parse(p_rawLine); + + }; + + ib_type_t GetType() const { return the_type; } + int GetCount() const { return count; } + bool IsComplete() const { return complete; } + + virtual const std::string toString() const + { + std::string ret="IB"; + if(IsComplete()) ret+=" COMPLETE"; + ret+=" Count=<na> type=<na>"; + return ret; + } + + protected: + + void parse(const string& p_rawLine) + { + string tofind="INSTR XREF FROMIB"; + size_t pos=p_rawLine.find(tofind); + if(pos==string::npos) + return; + + VirtualOffset vo(p_rawLine); + m_virtualOffset = vo; + + + // she be valid + setValid(); + + tofind="COMPLETE"; + size_t pos2=p_rawLine.find(tofind); + if(pos2!=string::npos) + { + pos=pos2; + complete=true; + } + + string rest=p_rawLine.substr(pos+tofind.length()); + istringstream is(rest); + is>>count; /* get count */ + + if(p_rawLine.find("RETURNTARGET")) set_type(RET); + if(p_rawLine.find("SWITCHTABLE")) set_type(SWITCH); + + } + + void set_type(ib_type_t t) + { + assert(the_type==UNKNOWN); + } + + private: + + ib_type_t the_type; + int count; + bool complete; +}; + +} + +#endif diff --git a/libMEDSannotation/include/MEDS_IBTAnnotation.hpp b/libMEDSannotation/include/MEDS_IBTAnnotation.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0a29130570c16eb281f0be4affbfb0aef4578627 --- /dev/null +++ b/libMEDSannotation/include/MEDS_IBTAnnotation.hpp @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2014 - Zephyr Software LLC + * + * This file may be used and modified for non-commercial purposes as long as + * all copyright, permission, and nonwarranty notices are preserved. + * Redistribution is prohibited without prior written consent from Zephyr + * Software. + * + * Please contact the authors for restrictions applying to commercial use. + * + * THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTIBTILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * + * Author: Zephyr Software + * e-mail: jwd@zephyr-software.com + * URL : http://www.zephyr-software.com/ + * + */ + +#ifndef _MEDS_IBTANNOTATION_H_ +#define _MEDS_IBTANNOTATION_H_ + +#include <string> +#include "VirtualOffset.hpp" +#include "MEDS_AnnotationBase.hpp" + +namespace MEDS_Annotation +{ + +using namespace std; +using namespace MEDS_Annotation; + +// +// Class to handle one MEDS shadow annotation +// +class MEDS_IBTAnnotation : public MEDS_AnnotationBase +{ + public: + typedef enum { SWITCH, RET, DATA, UNREACHABLE, ADDRESSED, UNKNOWN } ibt_reason_code_t; + + MEDS_IBTAnnotation( const string& p_rawLine) + : xref_addr(0), reason(UNKNOWN) + { + setInvalid(); + parse(p_rawLine); + + }; + + ApplicationAddress GetXrefAddr() { return xref_addr; } + ibt_reason_code_t GetReason() { return reason; } + + virtual const std::string toString() const + { + return "IBT"; + } + + protected: + + void parse(const string& p_rawLine) + { + string tofind="INSTR XREF IBT"; + size_t pos=p_rawLine.find(tofind); + if(pos==string::npos) + return; + + setValid(); + + VirtualOffset vo(p_rawLine); + m_virtualOffset = vo; + + + stringstream stream(p_rawLine.substr(pos+tofind.length())); + + string from_type; + stream >> from_type; + + if(string("FROMIB") == from_type) + { + stream >> hex >> xref_addr; + } + else if(string("FROMDATA") == from_type) + { + stream >> hex >> xref_addr; + reason=DATA; + return; + } + else if(string("FROMUNKNOWN") == from_type) + { + // no other fields for from UNKNOWN + xref_addr=0; + } + + string reason_code; + stream >> reason_code; + + if(string("RETURNTARGET") == reason_code) + { reason=RET; } + else if(string("TAILCALLRETURNTARGET") == reason_code) + { reason=RET; } + else if(string("SWITCHTABLE") == reason_code) + { reason=SWITCH; } + else if(string("UNREACHABLEBLOCK") == reason_code) + { reason=UNREACHABLE; } + else if(string("CODEADDRESSTAKEN") == reason_code) + { reason=ADDRESSED; } + else + { reason=UNKNOWN; } + + } + + private: + + ApplicationAddress xref_addr; + ibt_reason_code_t reason; +}; + +} + +#endif diff --git a/libMEDSannotation/include/libMEDSAnnotation.h b/libMEDSannotation/include/libMEDSAnnotation.h new file mode 100644 index 0000000000000000000000000000000000000000..a4bb39f5d2ed179b79fb0d0d83d8dc444d899d03 --- /dev/null +++ b/libMEDSannotation/include/libMEDSAnnotation.h @@ -0,0 +1,22 @@ +#ifndef LIBMEDSAnnotation_h +#define LIBMEDSAnnotation_h + +#include "FuncExitAnnotation.hpp" +#include "MEDS_AnnotationBase.hpp" +#include "MEDS_AnnotationParser.hpp" +#include "MEDS_DeadRegAnnotation.hpp" +#include "MEDS_FPTRShadowAnnotation.hpp" +#include "MEDS_FRSafeAnnotation.hpp" +#include "MEDS_FuncAnnotation.hpp" +#include "MEDS_FuncPrototypeAnnotation.hpp" +#include "MEDS.hpp" +#include "MEDS_IBAnnotation.hpp" +#include "MEDS_IBTAnnotation.hpp" +#include "MEDS_InstructionCheckAnnotation.hpp" +#include "MEDS_ProblemFuncAnnotation.hpp" +#include "MEDS_Register.hpp" +#include "MEDS_SafeFuncAnnotation.hpp" +#include "MEDS_ShadowAnnotation.hpp" +#include "VirtualOffset.hpp" + +#endif diff --git a/libMEDSannotation/src/MEDS_AnnotationParser.cpp b/libMEDSannotation/src/MEDS_AnnotationParser.cpp index 2f181ec93e874f41492126edf777e2f3f5939e5e..c49e482b763203e4b0b3cf9acf5a8e5a81d35dfd 100644 --- a/libMEDSannotation/src/MEDS_AnnotationParser.cpp +++ b/libMEDSannotation/src/MEDS_AnnotationParser.cpp @@ -30,6 +30,8 @@ #include "MEDS_FRSafeAnnotation.hpp" #include "MEDS_FPTRShadowAnnotation.hpp" #include "MEDS_DeadRegAnnotation.hpp" +#include "MEDS_IBAnnotation.hpp" +#include "MEDS_IBTAnnotation.hpp" // @todo: multiple annotation per instruction @@ -91,7 +93,6 @@ void MEDS_AnnotationParser::parseFile(istream &p_inputStream) getline(p_inputStream, line); if (line.empty()) continue; -//cerr << "MEDS_AnnotationParser: line: " << line << endl; if(add_if_valid<MEDS_DeadRegAnnotation>(line)) continue; if(add_if_valid<MEDS_FPTRShadowAnnotation>(line)) continue; @@ -101,8 +102,9 @@ void MEDS_AnnotationParser::parseFile(istream &p_inputStream) if(add_if_valid<MEDS_ProblemFuncAnnotation>(line)) continue; if(add_if_valid<MEDS_FRSafeAnnotation>(line)) continue; if(add_if_valid<MEDS_FuncExitAnnotation>(line)) continue; + if(add_if_valid<MEDS_IBAnnotation>(line)) continue; + if(add_if_valid<MEDS_IBTAnnotation>(line)) continue; -// cout<<"Found annotation: "<<annot->toString()<<endl;\ } } diff --git a/tools/meds2pdb/meds2pdb.cpp b/tools/meds2pdb/meds2pdb.cpp index a74510bf5cfc4bb3d78d00d2e4a90ad26fb211b2..a01ae7e41d3a81fc0a519b4857db85917658ee5a 100644 --- a/tools/meds2pdb/meds2pdb.cpp +++ b/tools/meds2pdb/meds2pdb.cpp @@ -83,78 +83,6 @@ int get_file_id(char *progName, char *md5hash) } -void insert_icfs(int fileID, const vector<wahoo::Instruction*>& instructions) -{ - using namespace wahoo; - - connection conn; - work txn(conn); - txn.exec("SET client_encoding='LATIN1';"); - - int next_icfs_id=0; - - for (int i = 0; i < instructions.size(); i ++) - { - wahoo::Instruction *instruction = instructions[i]; - assert(instruction); - - const std::set<Instruction*> &ibts=instruction->getIBTs(); - - if(ibts.size()==0) - continue; - cerr<<"Found fromIB=="<<hex<<instruction->getAddress()<<endl; - - string query = "INSERT INTO " + icfsTable; - query += " (icfs_id,icfs_status) VALUES "; - query += "("; - query += txn.quote(next_icfs_id) + ","; - - if (instruction->isIbComplete()) - query += txn.quote(ICFS_ANALYSIS_COMPLETE_STR); - else - query += txn.quote(ICFS_ANALYSIS_INCOMPLETE_STR); - - query += ")"; - - string query2 = "INSERT INTO " + icfsMapTable; - query2 += " (icfs_id,address_id) VALUES "; - - for(set<Instruction*>::iterator it=ibts.begin(); it!=ibts.end(); it++) - { - cerr<<" Found toIBT=="<<hex<<(*it)->getAddress()<<endl; - if(it!=ibts.begin()) - query2+=","; - int target_address_id=instruction_to_addressid_map[*it]; - query2 += "("; - query2 += txn.quote(next_icfs_id) + ","; - query2 += txn.quote(target_address_id); - query2 += ")"; - - } - - // update icfs_id entry in instruction - app_iaddr_t instruction_id = address_to_instructionid_map[instruction->getAddress()]; - string query3 = "UPDATE " + instructionTable; - query3 += " SET icfs_id="; - query3 += txn.quote(next_icfs_id); - query3 += " WHERE instruction_id="; - query3 += txn.quote(instruction_id); - - query+=";"; - query2+=";"; - query3+=";"; - - txn.exec(query+query2+query3); - - cerr<<query+query2+query3<<endl; - next_icfs_id++; - } - cerr<<"Finished inserting ICFS into IR."<<endl; - txn.commit(); - -} - - // insert addresses & instructions into DB void insert_instructions(int fileID, const vector<wahoo::Instruction*> &instructions, const vector<wahoo::Function*> &functions) { @@ -176,7 +104,7 @@ void insert_instructions(int fileID, const vector<wahoo::Instruction*> &instruct query += " (address_id, file_id, vaddress_offset) VALUES "; string query2 = "INSERT INTO " + instructionTable; - query2 += " (instruction_id,address_id, ind_target_address_id, parent_function_id, orig_address_id, data, comment) VALUES "; + query2 += " (instruction_id,address_id, parent_function_id, orig_address_id, data, comment) VALUES "; for (int j = i; j < i + STRIDE; ++j) { @@ -184,8 +112,6 @@ void insert_instructions(int fileID, const vector<wahoo::Instruction*> &instruct wahoo::Instruction *instruction = instructions[j]; app_iaddr_t addr = instruction->getAddress(); - int ind_target_address=instruction->getIBTAddress(); - address_to_instructionid_map[addr]=j; int address_id = next_address_id++; @@ -200,20 +126,6 @@ void insert_instructions(int fileID, const vector<wahoo::Instruction*> &instruct query += txn.quote(string(buf)); query += ")"; - if(ind_target_address!=0) - { - query += ","; - ind_target_address= next_address_id++; - query += "("; - query += txn.quote(ind_target_address) + ","; - query += txn.quote(fileID) + ","; - sprintf(buf,"%lld", (long long)addr); - query += txn.quote(string(buf)); - query += ")"; - } - else - ind_target_address=-1; - int parent_function_id = -1; if (instruction->getFunction()) { @@ -226,7 +138,6 @@ void insert_instructions(int fileID, const vector<wahoo::Instruction*> &instruct query2 += "("; query2 += txn.quote(my_to_string(j)) + ","; query2 += txn.quote(address_id) + ","; // the address id - query2 += txn.quote(ind_target_address) + ","; // the IBT address id query2 += txn.quote(parent_function_id) + ","; query2 += txn.quote(orig_address_id) + ","; @@ -531,9 +442,9 @@ void update_function_prototype(const vector<wahoo::Function*> &functions, char* int main(int argc, char **argv) { - if (argc != 12) + if (argc != 11) { - cerr << "usage: " << argv[0] << " <annotations file> <info annotation file> <file id> <func tab name> <insn tab name> <addr tab name> <types tab name> <icfs table name> <icfs map table name> <elf file> <STARSxref file>" << endl; + cerr << "usage: " << argv[0] << " <annotations file> <info annotation file> <file id> <func tab name> <insn tab name> <addr tab name> <types tab name> <icfs table name> <icfs map table name> <elf file>" << endl; return 1; } @@ -547,7 +458,6 @@ int main(int argc, char **argv) char *myicfsTable=argv[8]; char *myicfsMapTable=argv[9]; char *elfFile=argv[10]; - char *starsXrefFile=argv[11]; cout<<"Annotation file: "<< annotFile<<endl; cout<<"Info annotation file: "<< infoAnnotFile<<endl; @@ -559,7 +469,6 @@ int main(int argc, char **argv) cout<<"ICFSTab: "<< myicfsTable<<endl; cout<<"ICFSMapTab: "<< myicfsMapTable<<endl; cout<<"elfFile: "<< elfFile<<endl; - cout<<"xrefFile: "<< starsXrefFile<<endl; // set global vars for importing. functionTable=myFunctionTable; @@ -569,7 +478,7 @@ int main(int argc, char **argv) icfsTable=myicfsTable; icfsMapTable=myicfsMapTable; - Rewriter *rewriter = new Rewriter(elfFile, annotFile, starsXrefFile); + Rewriter *rewriter = new Rewriter(elfFile, annotFile); int fileID = atoi(fid); if(fileID<=0) @@ -588,8 +497,6 @@ int main(int argc, char **argv) insert_instructions(fileID, instructions, functions); update_functions(fileID, functions); - insert_icfs(fileID, instructions); - // add function prototype information to the IRDB update_function_prototype(functions, infoAnnotFile); exit(0); diff --git a/xform/instruction_descriptor.cpp b/xform/instruction_descriptor.cpp index 4fdfafae741a902817e067b60320ccb1cf1f0e52..50595f6cff2fa228f98820ef1f94693439ac4b3c 100644 --- a/xform/instruction_descriptor.cpp +++ b/xform/instruction_descriptor.cpp @@ -24,7 +24,6 @@ wahoo::Instruction::Instruction() { m_address = 0; - m_ibt_address = 0; m_size = -1; m_function = NULL; m_asm = ""; @@ -35,14 +34,11 @@ wahoo::Instruction::Instruction() m_varStackRef = false; m_isVisited = false; m_data = NULL; - m_ib_complete=false; - m_ibt_provenance = IBT_PROVENANCE_UNKNOWN; } wahoo::Instruction::Instruction(app_iaddr_t p_address, int p_size, Function* p_func) { m_address = p_address; - m_ibt_address = 0; m_size = p_size; m_function = p_func; m_isVisited = false; @@ -51,8 +47,6 @@ wahoo::Instruction::Instruction(app_iaddr_t p_address, int p_size, Function* p_f m_deallocSite = false; m_stackRef = false; m_data = NULL; - m_ib_complete=false; - m_ibt_provenance = IBT_PROVENANCE_UNKNOWN; } wahoo::Instruction::~Instruction() @@ -88,44 +82,3 @@ void wahoo::Instruction::markVarStackRef() m_varStackRef = true; } -void wahoo::Instruction::setIBTProvenance(char *p_provenance) -{ - std::string provenance(p_provenance); - - if (provenance == "RETURNTARGET") - { - m_ibt_provenance = IBT_PROVENANCE_RETURN; - } - else if (provenance == "SWITCHTABLE") - { - m_ibt_provenance = IBT_PROVENANCE_SWITCH_TABLE; - } - else if (provenance == "INDIRCALL") - { - m_ibt_provenance = IBT_PROVENANCE_INDIRECT_CALL; - } - else if (provenance == "COMPUTEDGOTOHEURISTIC") - { - m_ibt_provenance = IBT_PROVENANCE_COMPUTED_GOTO; - } - else if (provenance == "CODEADDRESSTAKEN") - { - m_ibt_provenance = IBT_PROVENANCE_CODE_ADDRESS_TAKEN; - } - else if (provenance == "UNREACHABLEBLOCK") - { - m_ibt_provenance = IBT_PROVENANCE_UNREACHABLE_BLOCK; - } - else if (provenance == "DATASEGMENT") - { - m_ibt_provenance = IBT_PROVENANCE_DATA_SEGMENT; - } - else if (provenance == "UNKNOWN") - { - m_ibt_provenance = IBT_PROVENANCE_UNKNOWN; - } - else - { - m_ibt_provenance = IBT_PROVENANCE_UNKNOWN; - } -} diff --git a/xform/instruction_descriptor.h b/xform/instruction_descriptor.h index 2826b6738f3ca437ed55162fd217d688e96778ec..5efdf3b65a647011e8293b2c5a7215b506868058 100644 --- a/xform/instruction_descriptor.h +++ b/xform/instruction_descriptor.h @@ -13,7 +13,6 @@ namespace wahoo { class Function; -enum IBTProvenance { IBT_PROVENANCE_UNKNOWN, IBT_PROVENANCE_RETURN, IBT_PROVENANCE_SWITCH_TABLE, IBT_PROVENANCE_INDIRECT_CALL, IBT_PROVENANCE_COMPUTED_GOTO, IBT_PROVENANCE_CODE_ADDRESS_TAKEN, IBT_PROVENANCE_UNREACHABLE_BLOCK, IBT_PROVENANCE_DATA_SEGMENT }; class Instruction { public: @@ -30,7 +29,6 @@ class Instruction { void markVarStackRef(); app_iaddr_t getAddress() const { return m_address; } - app_iaddr_t getIBTAddress() const { return m_ibt_address; } int getSize() const { return m_size; } Function* getFunction() const { return m_function; } string getAsm() const { return m_asm; } @@ -38,7 +36,6 @@ class Instruction { void setData(void *dataPtr, int len); unsigned char* getData() const { return m_data; } void setData(void *data) { m_data = (unsigned char*) data; } - void setIBTAddress(app_iaddr_t v) { m_ibt_address=v; } bool isStackRef() const { return m_stackRef; } bool isVarStackRef() const { return m_varStackRef; } @@ -49,18 +46,8 @@ class Instruction { void setVisited() { m_isVisited = true; } bool isVisited() const { return m_isVisited; } - // include Indirect branch targets for insructions. - void addIBT(Instruction* insn) { ibts.insert(insn); } - const std::set<Instruction*>& getIBTs() { return ibts; } - void markIbComplete(bool complete=true) { m_ib_complete=complete; } - bool isIbComplete() { return m_ib_complete; } - void setIBTProvenance(char *); - void setIBTProvenance(const IBTProvenance p_provenance) { m_ibt_provenance = p_provenance; } - IBTProvenance getIBTProvenance() const { return m_ibt_provenance; } - private: app_iaddr_t m_address; - app_iaddr_t m_ibt_address; int m_size; Function* m_function; string m_asm; @@ -73,9 +60,6 @@ class Instruction { bool m_isVisited; - std::set<Instruction*> ibts; - bool m_ib_complete; - IBTProvenance m_ibt_provenance; }; } diff --git a/xform/rewriter.cpp b/xform/rewriter.cpp index 6294f52b62d843a9c65047a0b7a875d5715140cf..ded32c916e80ff1d6222e9234042ff8ed9f6d282 100644 --- a/xform/rewriter.cpp +++ b/xform/rewriter.cpp @@ -38,14 +38,13 @@ using namespace std; -Rewriter::Rewriter(char *p_elfPath, char *p_annotationFilePath, char* p_xrefFilePath) +Rewriter::Rewriter(char *p_elfPath, char *p_annotationFilePath) { m_elfReader = new ElfReader(p_elfPath); // parse file and build up all the data structures readAnnotationFile(p_annotationFilePath); readElfFile(p_elfPath); - readXrefsFile(p_xrefFilePath); } Rewriter::~Rewriter() @@ -620,154 +619,6 @@ after_loop: dissassemble(); } -void Rewriter::readXrefsFile(char p_filename[]) -{ - vector<wahoo::Instruction*> instructions=getAllInstructions(); - map<app_iaddr_t,wahoo::Instruction*> addr_to_insn_map; - for (int j = 0; j < instructions.size(); ++j) - { - wahoo::Instruction *instr = instructions[j]; - assert(instr); - addr_to_insn_map[instr->getAddress()]=instr; - } - - set<app_iaddr_t> completeIBT; - - FILE* fin=fopen(p_filename, "r"); - - if(!fin) - { - fprintf(stderr,"Cannot open xref annotation file %s\n", p_filename); - return; - } - - int line=0; - - app_iaddr_t addr = 0; - union { int size, type;} size_type_u; - char type[200]; - char scope[200]; - char ibt[200]; - char fromib[200]; - char dest[200]; - do - { - - addr=0; - fscanf(fin, "%p%d", (void**)&addr, &size_type_u.size); - - if(feof(fin)) // deal with blank lines at the EOF - break; - fscanf(fin, "%s%s", type,scope); - if(feof(fin)) // deal with blank lines at the EOF - break; - - assert(strcmp(type,"INSTR")==0); - assert(strcmp(scope,"XREF")==0); - fscanf(fin, "%s", ibt); - if(feof(fin)) // deal with blank lines at the EOF - break; - - // check for instr xref ibt -/* - 4280c0 1 INSTR XREF IBT FROMIB 426558 RETURNTARGET - 426614 1 INSTR XREF IBT FROMIB 426580 RETURNTARGET - 4280c0 1 INSTR XREF IBT FROMIB 426580 RETURNTARGET - 4269d2 1 INSTR XREF IBT FROMIB 42689c RETURNTARGET - 4432bd 1 INSTR XREF IBT FROMIB 42689c RETURNTARGET - 447d4f 1 INSTR XREF IBT FROMIB 42689c RETURNTARGET - 42689c 1 INSTR XREF FROMIB COMPLETE 3 RETURNTARGET - -*/ - - if(string("IBT")==string(ibt)) - { - fscanf(fin, "%s", fromib); - if(feof(fin)) // deal with blank lines at the EOF - break; - - assert(strcmp(fromib,"FROMIB")==0 || strcmp(fromib,"FROMDATA")==0 - || strcmp(fromib,"FROMUNKNOWN")==0); - - wahoo::Instruction *instr = addr_to_insn_map[addr]; - if(instr) - { - // cout<<"Setting IBT for addr "<<std::hex<<addr<<std::dec<<endl; - char provenance[200]; - instr->setIBTAddress(addr); - if(strcmp(fromib,"FROMIB")==0) - { - // get the from point into memory. - app_iaddr_t from_addr = 0; - fscanf(fin, "%p %s", (void**)&from_addr, provenance); - - // find that instruction - wahoo::Instruction *from_instr = addr_to_insn_map[from_addr]; - assert(from_instr); - - // record in the IR listing. - from_instr->addIBT(instr); - - // set provenance info - instr->setIBTProvenance(provenance); - } - else if(strcmp(fromib,"FROMUNKNOWN")==0) - { - // 8049234 3 INSTR XREF IBT FROMUNKNOWN UNREACHABLEBLOCK - // COMPUTEDGOTOHEURISTIC | CODEADDRESSTAKEN | UNREACHABLEBLOCK - fscanf(fin, "%s", provenance); - instr->setIBTProvenance(provenance); - } - else if(strcmp(fromib,"FROMDATA")==0) - { - instr->setIBTProvenance("DATASEGMENT"); - } - - if(feof(fin)) // deal with blank lines at the EOF - break; - } - } - // check for instr xref fromib - else if(string("FROMIB")==string(ibt)) - { - // annotations can come in any order so the COMPLETE annotation for IB targets - // can come before/after the targets themselves - // in this loop, just keep track of instructions w/ complete targets - // 4004b6 1 INSTR XREF FROMIB COMPLETE 1 <provenance> - char complete[200]; - fscanf(fin, "%s", complete); - if(feof(fin)) // deal with blank lines at the EOF - break; - - if(strcmp(complete,"COMPLETE")==0) - { - char provenance[200]; - int num_targets; - completeIBT.insert(addr); - fscanf(fin, "%d %s", &num_targets, provenance); - if(feof(fin)) // deal with blank lines at the EOF - break; - } - } - - char remainder[2000]; - fgets(remainder, sizeof(remainder), fin); - line++; - - - } while(!feof(fin)); - - // let's backpatch all IB instructions with complete targets - set<app_iaddr_t>::const_iterator it; - for (it = completeIBT.begin(); it != completeIBT.end(); ++it) - { - wahoo::Instruction *instr = addr_to_insn_map[*it]; - assert(instr); - instr->markIbComplete(); - } - - fclose(fin); -} /* * Read MEDS annotation file and populate relevant hash table & data structures diff --git a/xform/rewriter.h b/xform/rewriter.h index c035835215fa86a001a66380de4dcd5f89a35584..2d83667133c5de46d5f6a20bd5b06f2e97d1cf80 100644 --- a/xform/rewriter.h +++ b/xform/rewriter.h @@ -13,7 +13,7 @@ using namespace std; class Rewriter { public: - Rewriter(char *p_elf, char *p_annotationFile, char* xrefFile=NULL); + Rewriter(char *p_elf, char *p_annotationFile); ~Rewriter(); virtual vector<wahoo::Function*> getAllFunctions(); @@ -26,7 +26,6 @@ class Rewriter protected: void readAnnotationFile(char []); - void readXrefsFile(char []); void readElfFile(char []); ElfReader *getElfReader() const { return m_elfReader; } FILE* getAsmSpri() { return m_spri; };