From a073df28b2eddc2c93daefa1d3f98aee89ce61e4 Mon Sep 17 00:00:00 2001 From: jdh8d <jdh8d@git.zephyr-software.com> Date: Wed, 29 Mar 2017 15:11:13 +0000 Subject: [PATCH] IR read/write of eh-programs and eh-headers to/from the db, including full IR support. zipr does not yet write new eh-hdr outputs. make sure you dropdb/createdb/pdb_setup, as there are many schema changes Former-commit-id: be2ca6b2079be9332f9ba1422837952f41217401 --- libIRDB/include/core/eh.hpp | 43 ++- libIRDB/include/core/file.hpp | 26 +- libIRDB/include/core/fileir.hpp | 32 +- libIRDB/include/core/instruction.hpp | 8 + libIRDB/include/libIRDB-core.hpp | 2 + libIRDB/src/core/SConscript | 7 +- libIRDB/src/core/file.cpp | 7 +- libIRDB/src/core/fileir.cpp | 287 +++++++++++++---- libIRDB/src/core/instruction.cpp | 72 +++-- libIRDB/src/core/variantid.cpp | 64 ++-- libIRDB/test/split_eh_frame.cpp | 452 +++++++++++++++++++++------ 11 files changed, 764 insertions(+), 236 deletions(-) diff --git a/libIRDB/include/core/eh.hpp b/libIRDB/include/core/eh.hpp index 517af5704..2d09b1c92 100644 --- a/libIRDB/include/core/eh.hpp +++ b/libIRDB/include/core/eh.hpp @@ -20,28 +20,39 @@ typedef std::string EhProgramInstruction_t; -typedef vector<EhProgramInstruction_t> EhProgramListing_t; +typedef std::vector<EhProgramInstruction_t> EhProgramListing_t; -class EhProgram_t : public BaseObj_t; +class EhProgram_t : public BaseObj_t { public: + EhProgram_t(db_id_t id, const uint64_t caf, const int64_t daf, const uint8_t p_ptrsize) + : BaseObj_t(NULL), code_alignment_factor(caf), data_alignment_factor(daf), ptrsize(p_ptrsize) + { SetBaseID(id); } + + + EhProgramListing_t& GetCIEProgram() { return cie_program; } + const EhProgramListing_t& GetCIEProgram() const { return cie_program; } - EhProgramListing_t& GetCieProgram() { return cie_program; } EhProgramListing_t& GetFDEProgram() { return fde_program; } + const EhProgramListing_t& GetFDEProgram() const { return fde_program; } uint64_t GetCodeAlignmentFactor() const { return code_alignment_factor; } void SetCodeAlignmentFactor(const uint64_t caf) { code_alignment_factor=caf; } - int64_t GetDataAlignmentFactor() const { return data_alignment_factor=daf; } - void SetDataAlignmentFactor(const int64_t daf) { return data_alignment_factor; } + int64_t GetDataAlignmentFactor() const { return data_alignment_factor; } + void SetDataAlignmentFactor(const int64_t daf) { data_alignment_factor=daf; } - std::string WriteToDB(File_t* fid, BaseObj_t* insn); // writes to DB, ID is not -1. + std::string WriteToDB(File_t* fid); // writes to DB, ID is not -1. + friend bool operator<(const EhProgram_t&a, const EhProgram_t&b); + + void print() const; private: + EhProgramListing_t cie_program; EhProgramListing_t fde_program; uint64_t code_alignment_factor; @@ -49,28 +60,36 @@ class EhProgram_t : public BaseObj_t; uint8_t ptrsize; // needed for interpreting programs }; +bool operator<(const EhProgram_t&a, const EhProgram_t&b); + +/* +struct EhProgramComparator_t { + bool operator() (const EhProgram_t* a, const EhProgram_t* b) { return *a < *b; } +}; +*/ -typedef vector<const Relocation_t*> TypeTable_t; +typedef std::set<EhProgram_t*> EhProgramSet_t; class EhCallSite_t : public BaseObj_t { public: - EhCallSite_t(const db_id_t id, const uint64_t enc=0, const Instruction_t* lp, const TypeTable_t& tt) : - BaseObj_t(NULL), tt_encoding(enc), landing_pad(lp), type_table(tt) { SetBaseID(id); } + EhCallSite_t(const db_id_t id, const uint64_t enc=0, Instruction_t* lp=NULL) : + BaseObj_t(NULL), tt_encoding(enc), landing_pad(lp) + { SetBaseID(id); } uint64_t GetTTEncoding() const { return tt_encoding; } void SetTTEncoding(const uint64_t p_tt) { tt_encoding=p_tt; } Instruction_t* GetLandingPad() const { return landing_pad; } - void SetLandingPad(const Instruction_t* lp) const { landing_pad=lp; } + void SetLandingPad(Instruction_t* lp) { landing_pad=lp; } - TypeTable_t& GetTypeTable() { return type_table; } + std::string WriteToDB(File_t* fid); // writes to DB, ID is not -1. private: uint64_t tt_encoding; Instruction_t* landing_pad; - TypeTable_t type_table; // pointers to the entries in the type table. }; +typedef std::set<EhCallSite_t*> EhCallSiteSet_t; diff --git a/libIRDB/include/core/file.hpp b/libIRDB/include/core/file.hpp index 26e297721..a702a427d 100644 --- a/libIRDB/include/core/file.hpp +++ b/libIRDB/include/core/file.hpp @@ -28,25 +28,27 @@ class File_t : public BaseObj_t const std::string &atn, const std::string &ftn, const std::string &itn, const std::string &icfs, const std::string &icfs_map, const std::string &rtn, const std::string &typ, const std::string &scoop, + const std::string &ehpgms, const std::string &ehcss, const db_id_t &doipid); File_t(db_id_t file_id) : BaseObj_t(NULL) { assert(0);} // read from DB void WriteToDB() { assert(0); } // writes to DB ID is not -1. - std::string GetAddressTableName() { return address_table_name; } - std::string GetFunctionTableName() { return function_table_name; } - std::string GetInstructionTableName() { return instruction_table_name; } -// xxx std::string GetIBTargetsTableName() { return ibtargets_table_name; } - std::string GetICFSTableName() { return icfs_table_name; } - std::string GetICFSMapTableName() { return icfs_map_table_name; } - std::string GetRelocationsTableName() { return relocs_table_name; } - std::string GetTypesTableName() { return types_table_name; } - std::string GetScoopTableName() { return scoop_table_name; } - std::string GetURL() { return url; } + std::string GetAddressTableName() const { return address_table_name; } + std::string GetFunctionTableName() const { return function_table_name; } + std::string GetInstructionTableName() const { return instruction_table_name; } + std::string GetICFSTableName() const { return icfs_table_name; } + std::string GetICFSMapTableName() const { return icfs_map_table_name; } + std::string GetRelocationsTableName() const { return relocs_table_name; } + std::string GetTypesTableName() const { return types_table_name; } + std::string GetScoopTableName() const { return scoop_table_name; } + std::string GetEhProgramTableName() const { return ehpgm_table_name; } + std::string GetEhCallSiteTableName() const { return ehcss_table_name; } + std::string GetURL() const { return url; } void CreateTables(); - int GetELFOID() { return elfoid; }; + int GetELFOID() const { return elfoid; }; friend class FileIR_t; friend class Function_t; @@ -76,5 +78,7 @@ class File_t : public BaseObj_t std::string relocs_table_name; std::string types_table_name; std::string scoop_table_name; + std::string ehpgm_table_name; + std::string ehcss_table_name; int elfoid; }; diff --git a/libIRDB/include/core/fileir.hpp b/libIRDB/include/core/fileir.hpp index f938a8125..ecf8ea3f2 100644 --- a/libIRDB/include/core/fileir.hpp +++ b/libIRDB/include/core/fileir.hpp @@ -40,17 +40,28 @@ class FileIR_t : public BaseObj_t // accessors and mutators in one FunctionSet_t& GetFunctions() { return funcs; } const FunctionSet_t& GetFunctions() const { return funcs; } + InstructionSet_t& GetInstructions() { return insns; } const InstructionSet_t& GetInstructions() const { return insns; } + AddressSet_t& GetAddresses() { return addrs; } const AddressSet_t& GetAddresses() const { return addrs; } + RelocationSet_t& GetRelocations() { return relocs; } const RelocationSet_t& GetRelocations() const { return relocs; } + DataScoopSet_t& GetDataScoops() { return scoops; } const DataScoopSet_t& GetDataScoops() const { return scoops; } + ICFSSet_t& GetAllICFS() { return icfs_set; } const ICFSSet_t& GetAllICFS() const { return icfs_set; } + EhProgramSet_t& GetAllEhPrograms() { return eh_pgms; } + const EhProgramSet_t& GetAllEhPrograms() const { return eh_pgms; } + + EhCallSiteSet_t& GetAllEhCallSites() { return eh_css; } + const EhCallSiteSet_t& GetAllEhCallSites() const { return eh_css; } + // generate the spri rules into the output file, fout. void GenerateSPRI(std::ostream &fout, bool with_ilr=false); @@ -111,25 +122,40 @@ class FileIR_t : public BaseObj_t VariantID_t progid; ICFSSet_t icfs_set; File_t* fileptr; + EhProgramSet_t eh_pgms; + EhCallSiteSet_t eh_css; std::map<db_id_t,AddressID_t*> ReadAddrsFromDB(); + std::map<db_id_t,EhProgram_t*> ReadEhPgmsFromDB(); + + std::map<db_id_t,EhCallSite_t*> ReadEhCallSitesFromDB + ( + std::map<EhCallSite_t*,db_id_t> &unresolvedEhCssLandingPads + ); + std::map<db_id_t,Function_t*> ReadFuncsFromDB ( std::map<db_id_t,AddressID_t*> &addrMap, - std::map<db_id_t,Type_t*> &typeMap + std::map<db_id_t,Type_t*> &typeMap, + std::map<Function_t*,db_id_t> &entry_points ); + std::map<db_id_t,DataScoop_t*> ReadScoopsFromDB ( std::map<db_id_t,AddressID_t*> &addrMap, std::map<db_id_t,Type_t*> &typeMap ); + std::map<db_id_t,Instruction_t*> ReadInsnsFromDB ( - std::map<db_id_t,Function_t*> &funcMap, - std::map<db_id_t,AddressID_t*> &addrMap, + const std::map<db_id_t,Function_t*> &funcMap, + const std::map<db_id_t,AddressID_t*> &addrMap, + const std::map<db_id_t,EhProgram_t*> &ehpgmMap, + const std::map<db_id_t,EhCallSite_t*> &ehcsMap, std::map<db_id_t,Instruction_t*> &addressToInstructionMap, std::map<Instruction_t*, db_id_t> &unresolvedICFS ); + void ReadRelocsFromDB ( std::map<db_id_t,BaseObj_t*> &insnMap diff --git a/libIRDB/include/core/instruction.hpp b/libIRDB/include/core/instruction.hpp index a843c12bc..ba1364041 100644 --- a/libIRDB/include/core/instruction.hpp +++ b/libIRDB/include/core/instruction.hpp @@ -19,6 +19,8 @@ */ class Function_t; // forward decls. +class EhProgram_t; // forward decls. +class EhCallSite_t; // forward decls. #define MAX_INSN_SIZE 32 // x86 really declares this as 16, but we'll allow // for bigger instructions, maybe from other machines? @@ -42,6 +44,8 @@ class Instruction_t : public BaseObj_t std::string GetDataBits() const { return data; } std::string GetCallback() const { return callback; } std::string GetComment() const { return comment; } + EhProgram_t* GetEhProgram() const { return eh_pgm; } + EhCallSite_t* GetEhCallSite_t() const { return eh_cs; } void SetAddress(AddressID_t* newaddr) { my_address=newaddr; } @@ -53,6 +57,8 @@ class Instruction_t : public BaseObj_t void SetDataBits(std::string orig) { data=orig; } void SetCallback(std::string orig) { callback=orig; } void SetComment(std::string orig) { comment=orig; } + void SetEhProgram(EhProgram_t* orig) { eh_pgm=orig; } + void SetEhCallSite(EhCallSite_t* orig) { eh_cs=orig; } AddressID_t* GetIndirectBranchTargetAddress() { return indTarg; } void SetIndirectBranchTargetAddress(AddressID_t* myIndTarg) { indTarg=myIndTarg; } @@ -81,4 +87,6 @@ class Instruction_t : public BaseObj_t std::string comment; AddressID_t* indTarg; ICFS_t* icfs; + EhProgram_t* eh_pgm; + EhCallSite_t* eh_cs; }; diff --git a/libIRDB/include/libIRDB-core.hpp b/libIRDB/include/libIRDB-core.hpp index 1cb8bae45..af385b596 100644 --- a/libIRDB/include/libIRDB-core.hpp +++ b/libIRDB/include/libIRDB-core.hpp @@ -22,6 +22,7 @@ #define libIRDB_core #include <string> +#include <vector> #include <set> #include <assert.h> #include <string.h> @@ -51,6 +52,7 @@ class Instruction_t; // forward decl for many classes #include <core/archdesc.hpp> #include <core/type.hpp> #include <core/scoop.hpp> +#include <core/eh.hpp> #include <core/fileir.hpp> #include <core/pqxxdb.hpp> diff --git a/libIRDB/src/core/SConscript b/libIRDB/src/core/SConscript index 4152768f8..194340eba 100644 --- a/libIRDB/src/core/SConscript +++ b/libIRDB/src/core/SConscript @@ -1,7 +1,7 @@ import os Import('env') -myenv=env +myenv=env.Clone() myenv.Replace(SECURITY_TRANSFORMS_HOME=os.environ['SECURITY_TRANSFORMS_HOME']) @@ -20,7 +20,10 @@ files= ''' type.cpp scoop.cpp variantid.cpp + eh.cpp + reloc.cpp ''' + cpppath=''' . $SECURITY_TRANSFORMS_HOME/include/ @@ -29,7 +32,7 @@ cpppath=''' $SECURITY_TRANSFORMS_HOME/beaengine/beaengineSources/Includes/ ''' -#myenv.Append(CCFLAGS=" -Wall -W -Wextra -Wconversion ") +myenv.Append(CCFLAGS=" -std=c++11 ") myenv=myenv.Clone(CPPPATH=Split(cpppath)) mylib=myenv.Library(libname, Split(files)) diff --git a/libIRDB/src/core/file.cpp b/libIRDB/src/core/file.cpp index 37438cbb7..86719ccec 100644 --- a/libIRDB/src/core/file.cpp +++ b/libIRDB/src/core/file.cpp @@ -35,12 +35,15 @@ File_t::File_t(const db_id_t &myfile_id, const db_id_t &my_orig_fid, const std:: const std::string &myhash, const std::string &myarch, const int &myoid, const std::string &atn, const std::string &ftn, const std::string &itn, const std::string &icfs, const std::string &icfs_map, const std::string &rtn, const std::string &typ, const std::string &scoop, + const std::string &ehpgms, const std::string &ehcss, const db_id_t &mydoipid) : BaseObj_t(NULL), url(myurl), hash(myhash), arch(myarch), elfoid(myoid), address_table_name(atn), function_table_name(ftn), instruction_table_name(itn), icfs_table_name(icfs), icfs_map_table_name(icfs_map), relocs_table_name(rtn), - types_table_name(typ), scoop_table_name(scoop), orig_fid(my_orig_fid) + types_table_name(typ), scoop_table_name(scoop), + ehpgm_table_name(ehpgms), ehcss_table_name(ehcss), + orig_fid(my_orig_fid) { SetBaseID(myfile_id); } @@ -63,6 +66,8 @@ void File_t::CreateTables() relocs_table_name+" "+ types_table_name+" "+ scoop_table_name+" "+ + ehpgm_table_name+" "+ + ehcss_table_name+" "+ tmpfile; system(command.c_str()); diff --git a/libIRDB/src/core/fileir.cpp b/libIRDB/src/core/fileir.cpp index 20cc4980b..1e6f265ce 100644 --- a/libIRDB/src/core/fileir.cpp +++ b/libIRDB/src/core/fileir.cpp @@ -30,7 +30,6 @@ using namespace libIRDB; using namespace std; -static map<Function_t*,db_id_t> entry_points; #define SCOOP_CHUNK_SIZE (10*1024*1024) /* 10 mb */ @@ -38,7 +37,9 @@ static map<Function_t*,db_id_t> entry_points; #undef EIP -static void UpdateEntryPoints(std::map<db_id_t,Instruction_t*> &insnMap) +static void UpdateEntryPoints( + const std::map<db_id_t,Instruction_t*> &insnMap, + const map<Function_t*,db_id_t>& entry_points) { /* for each function, look up the instruction that's the entry point */ for( map<Function_t*,db_id_t>::const_iterator it=entry_points.begin(); @@ -49,14 +50,28 @@ static void UpdateEntryPoints(std::map<db_id_t,Instruction_t*> &insnMap) Function_t* func=(*it).first; db_id_t func_entry_id=(*it).second; - assert(func_entry_id==-1 || insnMap[func_entry_id]); - func->SetEntryPoint(insnMap[func_entry_id]); + assert(func_entry_id==-1 || insnMap.at(func_entry_id)); + func->SetEntryPoint(insnMap.at(func_entry_id)); // cout<<"Function named "<<func->GetName()<< " getting entry point set to "<<insnMap[func_entry_id]->GetComment()<<"."<<endl; } } -virtual_offset_t strtovo(std::string s) +static void UpdateUnresolvedEhCallSites( + const std::map<db_id_t,Instruction_t*> &insnMap, + const std::map<EhCallSite_t*,db_id_t> & unresolvedEhcss) +{ + for(const auto &i : unresolvedEhcss) + { + const auto& ehcs=i.first; + const auto& insnid=i.second; + const auto& insn=insnMap.at(insnid); + assert(insn); + ehcs->SetLandingPad(insn); + } +} + +static virtual_offset_t strtovo(std::string s) { return strtoint<virtual_offset_t>(s); } @@ -114,32 +129,32 @@ FileIR_t::~FileIR_t() // DB operations void FileIR_t::ReadFromDB() { - entry_points.clear(); + auto entry_points=map<Function_t*,db_id_t>(); + auto unresolvedICFS=std::map<Instruction_t*, db_id_t>(); + auto unresolvedEhCallSites=std::map<EhCallSite_t*,db_id_t>(); + auto objMap=std::map<db_id_t,BaseObj_t*>(); + auto addressToInstructionMap=std::map<db_id_t,Instruction_t*>(); - std::map<db_id_t,BaseObj_t*> objMap; - - std::map<db_id_t,Type_t*> typesMap = ReadTypesFromDB(types); - std::map<db_id_t,AddressID_t*> addrMap=ReadAddrsFromDB(); - std::map<db_id_t,Function_t*> funcMap=ReadFuncsFromDB(addrMap, typesMap); - std::map<db_id_t,DataScoop_t*> scoopMap=ReadScoopsFromDB(addrMap, typesMap); - - - std::map<db_id_t,Instruction_t*> addressToInstructionMap; - std::map<Instruction_t*, db_id_t> unresolvedICFS; - - std::map<db_id_t,Instruction_t*> insnMap=ReadInsnsFromDB(funcMap,addrMap,addressToInstructionMap, unresolvedICFS); + auto ehpgmMap = ReadEhPgmsFromDB(); + auto ehcsMap = ReadEhCallSitesFromDB(unresolvedEhCallSites); + auto typesMap = ReadTypesFromDB(types); + auto addrMap = ReadAddrsFromDB(); + auto funcMap = ReadFuncsFromDB(addrMap, typesMap,entry_points); + auto scoopMap = ReadScoopsFromDB(addrMap, typesMap); + auto insnMap = ReadInsnsFromDB(funcMap,addrMap,ehpgmMap,ehcsMap,addressToInstructionMap, unresolvedICFS); ReadAllICFSFromDB(addressToInstructionMap, unresolvedICFS); - - // put the scoops+instructions into the object map. - // if relocs end up on other objects, we'll need to add them to. for now only insns/scoops. + // put the scoops, instructions, and eh call sites into the object map. + // if relocs end up on other objects, we'll need to add them to. for now only these things. objMap.insert(insnMap.begin(), insnMap.end()); objMap.insert(scoopMap.begin(), scoopMap.end()); + objMap.insert(ehcsMap.begin(), ehcsMap.end()); ReadRelocsFromDB(objMap); - UpdateEntryPoints(insnMap); + UpdateEntryPoints(insnMap,entry_points); + UpdateUnresolvedEhCallSites(insnMap,unresolvedEhCallSites); } @@ -275,12 +290,13 @@ std::string FileIR_t::LookupAssembly(Instruction_t *instr) std::map<db_id_t,Function_t*> FileIR_t::ReadFuncsFromDB ( std::map<db_id_t,AddressID_t*> &addrMap, - std::map<db_id_t,Type_t*> &typesMap + std::map<db_id_t,Type_t*> &typesMap, + map<Function_t*,db_id_t> &entry_points ) { - std::map<db_id_t,Function_t*> idMap; + auto idMap=std::map<db_id_t,Function_t*> (); - std::string q= "select * from " + fileptr->function_table_name + " ; "; + auto q=std::string("select * from ") + fileptr->function_table_name + " ; "; dbintr->IssueQuery(q); @@ -334,6 +350,127 @@ std::map<db_id_t,Function_t*> FileIR_t::ReadFuncsFromDB } +std::map<db_id_t,EhCallSite_t*> FileIR_t::ReadEhCallSitesFromDB + ( + map<EhCallSite_t*,db_id_t> &unresolvedEhCssLandingPads // output arg. + ) +{ + auto ehcsMap=std::map<db_id_t,EhCallSite_t*>(); + + std::string q= "select * from " + fileptr->GetEhCallSiteTableName() + " ; "; + + for(dbintr->IssueQuery(q); !dbintr->IsDone(); dbintr->MoveToNextRow()) + { + /* + * ehcs_id integer, -- id of this object. + * tt_encoding integer, -- the encoding of the type table. + * lp_insn_id integer -- the landing pad instruction's id. + */ + + + const auto eh_cs_id=atoi(dbintr->GetResultColumn("ehcs_id").c_str()); + const auto tt_encoding=atoi(dbintr->GetResultColumn("tt_encoding").c_str()); + const auto lp_insn_id=atoi(dbintr->GetResultColumn("lp_insn_id").c_str()); + + auto newEhCs=new EhCallSite_t(eh_cs_id,tt_encoding,NULL); // create the call site with an unresolved LP + eh_css.insert(newEhCs); // record that it exists. + ehcsMap[eh_cs_id]=newEhCs; // record the map for when we read instructions. + if(lp_insn_id != BaseObj_t::NOT_IN_DATABASE) + unresolvedEhCssLandingPads[newEhCs]=lp_insn_id; // note that the LP is unresolved + } + + return ehcsMap; +} + +std::map<db_id_t,EhProgram_t*> FileIR_t::ReadEhPgmsFromDB() +{ + auto idMap = std::map<db_id_t,EhProgram_t*>(); + + auto q=std::string("select * from ") + fileptr->ehpgm_table_name + " ; "; + dbintr->IssueQuery(q); + + auto decode_pgm=[](const string& encoded_pgm, EhProgramListing_t& decoded_pgm) + { + + auto split=[](const string& str, const string& delim, EhProgramListing_t& tokens) -> void + { + auto prev = size_t(0); + auto pos = size_t(0); + do + { + pos = str.find(delim, prev); + if (pos == string::npos) pos = str.length(); + string token = str.substr(prev, pos-prev); + if (!token.empty()) tokens.push_back(token); + prev = pos + delim.length(); + } + while (pos < str.length() && prev < str.length()); + }; + + auto decode_in_place=[](string& to_decode) -> void + { + auto charToHex=[](uint8_t value) -> uint8_t + { + if (value >= '0' && value <= '9') return value - '0'; + else if (value >= 'A' && value <= 'F') return value - 'A' + 10; + else if (value >= 'a' && value <= 'f') return value - 'a' + 10; + assert(false); + }; + + + auto out=string(""); + while(to_decode.size() > 0) + { + // to-decode should have pairs of characters that represent individual bytes. + assert(to_decode.size() >= 2); + auto val = uint8_t ( charToHex(to_decode[0])*16 + charToHex(to_decode[1]) ); + out += val; + to_decode.erase(0,2); + } + + to_decode=out; + }; + + split(encoded_pgm, ",", decoded_pgm); + + // decode each one + for(auto& i : decoded_pgm) + decode_in_place(i); + + + }; + + while(!dbintr->IsDone()) + { + /* + * eh_pgm_id integer, + * caf integer, + * daf integer, + * ptrsize integer, + * cie_program text, + * fde_program text + */ + + + const auto eh_pgm_id=atoi(dbintr->GetResultColumn("eh_pgm_id").c_str()); + const auto caf=atoi(dbintr->GetResultColumn("caf").c_str()); + const auto daf=atoi(dbintr->GetResultColumn("daf").c_str()); + const auto ptrsize=atoi(dbintr->GetResultColumn("ptrsize").c_str()); + const auto& encoded_cie_program = dbintr->GetResultColumn("cie_program"); + const auto& encoded_fde_program = dbintr->GetResultColumn("fde_program"); + + auto new_ehpgm=new EhProgram_t(eh_pgm_id, caf, daf, ptrsize); + decode_pgm(encoded_cie_program, new_ehpgm->GetCIEProgram()); + decode_pgm(encoded_fde_program, new_ehpgm->GetFDEProgram()); + + idMap[eh_pgm_id]=new_ehpgm; + eh_pgms.insert(new_ehpgm); + dbintr->MoveToNextRow(); + } + + return idMap; +} + std::map<db_id_t,AddressID_t*> FileIR_t::ReadAddrsFromDB ( ) @@ -375,10 +512,12 @@ std::map<db_id_t,AddressID_t*> FileIR_t::ReadAddrsFromDB std::map<db_id_t,Instruction_t*> FileIR_t::ReadInsnsFromDB ( - std::map<db_id_t,Function_t*> &funcMap, - std::map<db_id_t,AddressID_t*> &addrMap, - std::map<db_id_t,Instruction_t*> &addressToInstructionMap, - std::map<Instruction_t*, db_id_t> &unresolvedICFS + const std::map<db_id_t,Function_t*> &funcMap, + const std::map<db_id_t,AddressID_t*> &addrMap, + const std::map<db_id_t,EhProgram_t*> &ehpgmMap, + const std::map<db_id_t,EhCallSite_t*> &ehcsMap, + std::map<db_id_t,Instruction_t*> &addressToInstructionMap, + std::map<Instruction_t*, db_id_t> &unresolvedICFS ) { std::map<db_id_t,Instruction_t*> idMap; @@ -412,6 +551,8 @@ std::map<db_id_t,Instruction_t*> FileIR_t::ReadInsnsFromDB db_id_t fallthrough_address_id=atoi(dbintr->GetResultColumn("fallthrough_address_id").c_str()); db_id_t targ_address_id=atoi(dbintr->GetResultColumn("target_address_id").c_str()); db_id_t icfs_id=atoi(dbintr->GetResultColumn("icfs_id").c_str()); + db_id_t eh_pgm_id=atoi(dbintr->GetResultColumn("ehpgm_id").c_str()); + db_id_t eh_cs_id=atoi(dbintr->GetResultColumn("ehcss_id").c_str()); std::string data=(dbintr->GetResultColumn("data")); std::string callback=(dbintr->GetResultColumn("callback")); std::string comment=(dbintr->GetResultColumn("comment")); @@ -420,20 +561,26 @@ std::map<db_id_t,Instruction_t*> FileIR_t::ReadInsnsFromDB std::string isIndStr=(dbintr->GetResultColumn("ind_target_address_id")); - AddressID_t* indTarg = NULL; + auto indTarg=(AddressID_t*)NULL; if (indirect_branch_target_address_id != NOT_IN_DATABASE) - indTarg = addrMap[indirect_branch_target_address_id]; + indTarg = addrMap.at(indirect_branch_target_address_id); + + auto parent_func=(Function_t*)NULL; + if(parent_func_id!= NOT_IN_DATABASE) parent_func=funcMap.at(parent_func_id); Instruction_t *newinsn=new Instruction_t(instruction_id, - addrMap[aid], - funcMap[parent_func_id], + addrMap.at(aid), + parent_func, orig_address_id, data, callback, comment, indTarg, doipid); + + if(eh_pgm_id != NOT_IN_DATABASE) newinsn->SetEhProgram(ehpgmMap.at(eh_pgm_id)); + if(eh_cs_id != NOT_IN_DATABASE) newinsn->SetEhCallSite(ehcsMap.at(eh_cs_id)); - if(funcMap[parent_func_id]) + if(parent_func) { - funcMap[parent_func_id]->GetInstructions().insert(newinsn); - newinsn->SetFunction(funcMap[parent_func_id]); + parent_func->GetInstructions().insert(newinsn); + newinsn->SetFunction(funcMap.at(parent_func_id)); } //std::cout<<"Found address "<<aid<<"."<<std::endl; @@ -528,6 +675,8 @@ void FileIR_t::WriteToDB() dbintr->IssueQuery(string("TRUNCATE TABLE ")+ fileptr->types_table_name + string(" cascade;")); dbintr->IssueQuery(string("TRUNCATE TABLE ")+ fileptr->scoop_table_name + string(" cascade;")); dbintr->IssueQuery(string("TRUNCATE TABLE ")+ fileptr->scoop_table_name+"_part2"+ string(" cascade;")); + dbintr->IssueQuery(string("TRUNCATE TABLE ")+ fileptr->ehpgm_table_name + string(" cascade;")); + dbintr->IssueQuery(string("TRUNCATE TABLE ")+ fileptr->ehcss_table_name + string(" cascade;")); /* and now that everything has an ID, let's write to the DB */ @@ -663,6 +812,17 @@ void FileIR_t::WriteToDB() dbintr->IssueQuery(q); } + for(const auto& i : eh_pgms) + { + string q = i->WriteToDB(fileptr); + dbintr->IssueQuery(q); + } + for(const auto& i : eh_css) + { + string q = i->WriteToDB(fileptr); + dbintr->IssueQuery(q); + } + } @@ -672,63 +832,56 @@ void FileIR_t::SetBaseIDS() /* find the highest database ID */ db_id_t j=0; - for(std::set<Function_t*>::const_iterator i=funcs.begin(); i!=funcs.end(); ++i) + for(auto i=funcs.begin(); i!=funcs.end(); ++i) j=MAX(j,(*i)->GetBaseID()); - for(std::set<AddressID_t*>::const_iterator i=addrs.begin(); i!=addrs.end(); ++i) + for(auto i=addrs.begin(); i!=addrs.end(); ++i) j=MAX(j,(*i)->GetBaseID()); - for(std::set<Instruction_t*>::const_iterator i=insns.begin(); i!=insns.end(); ++i) + for(auto i=insns.begin(); i!=insns.end(); ++i) j=MAX(j,(*i)->GetBaseID()); - for(std::set<Relocation_t*>::const_iterator i=relocs.begin(); i!=relocs.end(); ++i) + for(auto i=relocs.begin(); i!=relocs.end(); ++i) j=MAX(j,(*i)->GetBaseID()); - for(std::set<Type_t*>::const_iterator i=types.begin(); i!=types.end(); ++i) + for(auto i=types.begin(); i!=types.end(); ++i) j=MAX(j,(*i)->GetBaseID()); - for(DataScoopSet_t::const_iterator i=scoops.begin(); i!=scoops.end(); ++i) + for(auto i=scoops.begin(); i!=scoops.end(); ++i) j=MAX(j,(*i)->GetBaseID()); - for(ICFSSet_t::const_iterator i=icfs_set.begin(); i!=icfs_set.end(); ++i) + for(auto i=icfs_set.begin(); i!=icfs_set.end(); ++i) + j=MAX(j,(*i)->GetBaseID()); + for(auto i=eh_pgms.begin(); i!=eh_pgms.end(); ++i) + j=MAX(j,(*i)->GetBaseID()); + for(auto i=eh_css.begin(); i!=eh_css.end(); ++i) j=MAX(j,(*i)->GetBaseID()); /* increment past the max ID so we don't duplicate */ j++; /* for anything that's not yet in the DB, assign an ID to it */ - for(std::set<Function_t*>::const_iterator i=funcs.begin(); i!=funcs.end(); ++i) + for(auto i=funcs.begin(); i!=funcs.end(); ++i) if((*i)->GetBaseID()==NOT_IN_DATABASE) (*i)->SetBaseID(j++); - for(std::set<AddressID_t*>::const_iterator i=addrs.begin(); i!=addrs.end(); ++i) + for(auto i=addrs.begin(); i!=addrs.end(); ++i) if((*i)->GetBaseID()==NOT_IN_DATABASE) (*i)->SetBaseID(j++); - for(std::set<Instruction_t*>::const_iterator i=insns.begin(); i!=insns.end(); ++i) + for(auto i=insns.begin(); i!=insns.end(); ++i) if((*i)->GetBaseID()==NOT_IN_DATABASE) (*i)->SetBaseID(j++); - for(std::set<Relocation_t*>::const_iterator i=relocs.begin(); i!=relocs.end(); ++i) + for(auto i=relocs.begin(); i!=relocs.end(); ++i) if((*i)->GetBaseID()==NOT_IN_DATABASE) (*i)->SetBaseID(j++); - for(std::set<Type_t*>::const_iterator i=types.begin(); i!=types.end(); ++i) + for(auto i=types.begin(); i!=types.end(); ++i) if((*i)->GetBaseID()==NOT_IN_DATABASE) (*i)->SetBaseID(j++); - for(DataScoopSet_t::const_iterator i=scoops.begin(); i!=scoops.end(); ++i) + for(auto i=scoops.begin(); i!=scoops.end(); ++i) if((*i)->GetBaseID()==NOT_IN_DATABASE) (*i)->SetBaseID(j++); - for(ICFSSet_t::const_iterator i=icfs_set.begin(); i!=icfs_set.end(); ++i) + for(auto i=icfs_set.begin(); i!=icfs_set.end(); ++i) + if((*i)->GetBaseID()==NOT_IN_DATABASE) + (*i)->SetBaseID(j++); + for(auto i=eh_pgms.begin(); i!=eh_pgms.end(); ++i) + if((*i)->GetBaseID()==NOT_IN_DATABASE) + (*i)->SetBaseID(j++); + for(auto i=eh_css.begin(); i!=eh_css.end(); ++i) if((*i)->GetBaseID()==NOT_IN_DATABASE) (*i)->SetBaseID(j++); -} - -std::string Relocation_t::WriteToDB(File_t* fid, BaseObj_t* myinsn) -{ - string q; - db_id_t wrt_id=wrt_obj ? wrt_obj->GetBaseID() : BaseObj_t::NOT_IN_DATABASE; - q ="insert into " + fid->relocs_table_name; - q+="(reloc_id,reloc_offset,reloc_type,instruction_id,wrt_id,addend,doip_id) "+ - string(" VALUES (") + - string("'") + to_string(GetBaseID()) + string("', ") + - string("'") + to_string(offset) + string("', ") + - string("'") + (type) + string("', ") + - string("'") + to_string(myinsn->GetBaseID()) + string("', ") + - string("'") + to_string(wrt_id) + string("', ") + - string("'") + to_string(addend) + string("', ") + - string("'") + to_string(GetDoipID()) + string("') ; ") ; - return q; } int FileIR_t::GetArchitectureBitWidth() diff --git a/libIRDB/src/core/instruction.cpp b/libIRDB/src/core/instruction.cpp index 708d2e15c..106ffad5f 100644 --- a/libIRDB/src/core/instruction.cpp +++ b/libIRDB/src/core/instruction.cpp @@ -34,16 +34,18 @@ Instruction_t::Instruction_t() : BaseObj_t(NULL), data(""), callback(""), - comment("") + comment(""), + my_address(NULL), + my_function(NULL), + orig_address_id(NOT_IN_DATABASE), + fallthrough(NULL), + target(NULL), + indTarg(NULL), + icfs(NULL), + eh_pgm(NULL), + eh_cs(NULL) { SetBaseID(NOT_IN_DATABASE); - my_address=NULL; - my_function=NULL; - orig_address_id=NOT_IN_DATABASE; - fallthrough=NULL; - target=NULL; - indTarg=NULL; - icfs=NULL; } Instruction_t::Instruction_t(db_id_t id, @@ -60,15 +62,17 @@ Instruction_t::Instruction_t(db_id_t id, data(thedata), callback(my_callback), comment(my_comment), - indTarg(my_indTarg) + indTarg(my_indTarg), + my_address(addr), + my_function(func), + orig_address_id(orig_id), + fallthrough(NULL), + target(NULL), + icfs(NULL), + eh_pgm(NULL), + eh_cs(NULL) { SetBaseID(id); - my_address=addr; - my_function=func; - orig_address_id=orig_id; - fallthrough=NULL; - target=NULL; - icfs=NULL; } int Instruction_t::Disassemble(DISASM &disasm) const @@ -164,31 +168,33 @@ string Instruction_t::WriteToDB(File_t *fid, db_id_t newid, bool p_withHeader) if(GetBaseID()==NOT_IN_DATABASE) SetBaseID(newid); - db_id_t func_id=NOT_IN_DATABASE; - if(my_function) - func_id=my_function->GetBaseID(); + auto func_id=NOT_IN_DATABASE; + if(my_function) func_id=my_function->GetBaseID(); - db_id_t ft_id=NOT_IN_DATABASE; - if(fallthrough) - ft_id=fallthrough->GetBaseID(); + auto ft_id=NOT_IN_DATABASE; + if(fallthrough) ft_id=fallthrough->GetBaseID(); - db_id_t targ_id=NOT_IN_DATABASE; - if(target) - targ_id=target->GetBaseID(); + auto targ_id=NOT_IN_DATABASE; + if(target) targ_id=target->GetBaseID(); - db_id_t icfs_id=NOT_IN_DATABASE; - if (icfs) - icfs_id=icfs->GetBaseID(); + auto icfs_id=NOT_IN_DATABASE; + if (icfs) icfs_id=icfs->GetBaseID(); - db_id_t indirect_bt_id=NOT_IN_DATABASE; - if(indTarg) - indirect_bt_id=indTarg->GetBaseID(); + auto indirect_bt_id=NOT_IN_DATABASE; + if(indTarg) indirect_bt_id=indTarg->GetBaseID(); + + auto eh_pgm_id=NOT_IN_DATABASE; + if(eh_pgm) eh_pgm_id=eh_pgm->GetBaseID(); + + auto eh_css_id=NOT_IN_DATABASE; + if(eh_cs) eh_css_id=eh_cs->GetBaseID(); string q; if (p_withHeader) q = string("insert into ")+fid->instruction_table_name + - string(" (instruction_id, address_id, parent_function_id, orig_address_id, fallthrough_address_id, target_address_id, icfs_id, data, callback, comment, ind_target_address_id, doip_id) VALUES "); + string(" (instruction_id, address_id, parent_function_id, orig_address_id, fallthrough_address_id, " + "target_address_id, icfs_id, data, callback, comment, ind_target_address_id, ehpgm_id, ehcss_id, doip_id) VALUES "); else q = ","; @@ -197,7 +203,7 @@ string Instruction_t::WriteToDB(File_t *fid, db_id_t newid, bool p_withHeader) for (size_t i = 0; i < data.length(); ++i) hex_data << setw(2) << (int)(data[i]&0xff); - q += string("('") + to_string(GetBaseID()) + string("', ") + + q += string("('") + to_string(GetBaseID()) + string("', ") + string("'") + to_string(my_address->GetBaseID()) + string("', ") + string("'") + to_string(func_id) + string("', ") + string("'") + to_string(orig_address_id) + string("', ") + @@ -208,6 +214,8 @@ string Instruction_t::WriteToDB(File_t *fid, db_id_t newid, bool p_withHeader) string("'") + callback + string("', ") + string("'") + comment + string("', ") + string("'") + to_string(indirect_bt_id) + string("', ") + + string("'") + to_string(eh_pgm_id) + string("', ") + + string("'") + to_string(eh_css_id) + string("', ") + string("'") + to_string(GetDoipID()) + string("') ") ; return q; diff --git a/libIRDB/src/core/variantid.cpp b/libIRDB/src/core/variantid.cpp index 76ac67587..bdeb2e0c6 100644 --- a/libIRDB/src/core/variantid.cpp +++ b/libIRDB/src/core/variantid.cpp @@ -191,30 +191,27 @@ File_t* VariantID_t::CloneFile(File_t* fptr) std::string dtn="dtnfid"+to_string(newfid); std::string dtn_part2="dtnfid"+to_string(newfid)+"_part2"; std::string typ="typfid"+to_string(newfid); - - q ="update file_info set address_table_name='"; - q+=atn; - q+="', function_table_name='"; - q+=ftn; - q+="', instruction_table_name='"; - q+=itn; - q+="', icfs_table_name='"; - q+=icfs; - q+="', icfs_map_table_name='"; - q+=icfsmap; - q+="', relocs_table_name='"; - q+=rtn; - q+="', types_table_name='"; - q+=typ; - q+="', scoop_table_name='"; - q+=dtn; + std::string ehp="ehpfid"+to_string(newfid); + std::string css="cssfid"+to_string(newfid); + + q ="update file_info set address_table_name='"+atn; + q+="', function_table_name='"+ftn; + q+="', instruction_table_name='"+itn; + q+="', icfs_table_name='"+icfs; + q+="', icfs_map_table_name='"+icfsmap; + q+="', relocs_table_name='"+rtn; + q+="', types_table_name='"+typ; + q+="', scoop_table_name='"+dtn; + q+="', ehpgm_table_name='"+ehp; + q+="', ehcss_table_name='"+css; q+="' where file_id='"; q+=to_string(newfid); q+="' ; "; dbintr->IssueQuery(q); - File_t* newfile=new File_t(newfid, fptr->orig_fid, fptr->url, fptr->hash, fptr->arch, fptr->elfoid, atn, ftn, itn, icfs, icfsmap, rtn, typ, dtn, fptr->GetDoipID()); + File_t* newfile=new File_t(newfid, fptr->orig_fid, fptr->url, fptr->hash, fptr->arch, + fptr->elfoid, atn, ftn, itn, icfs, icfsmap, rtn, typ, dtn, ehp, css, fptr->GetDoipID()); newfile->CreateTables(); @@ -264,6 +261,17 @@ File_t* VariantID_t::CloneFile(File_t* fptr) q+=" ; "; dbintr->IssueQuery(q); + q="drop table "; + q+=ehp; + q+=" ; "; + dbintr->IssueQuery(q); + + q="drop table "; + q+=css; + q+=" ; "; + dbintr->IssueQuery(q); + + // next issue SQL to clone each table q="select * into "; q+=atn; @@ -328,6 +336,20 @@ File_t* VariantID_t::CloneFile(File_t* fptr) q+=" ;"; dbintr->IssueQuery(q); + q="select * into "; + q+=ehp; + q+=" from "; + q+=fptr->ehpgm_table_name; + q+=" ;"; + dbintr->IssueQuery(q); + + q="select * into "; + q+=css; + q+=" from "; + q+=fptr->ehcss_table_name; + q+=" ;"; + dbintr->IssueQuery(q); + // update the variant dependency table to represent the deep clone q = "update variant_dependency set file_id='" + to_string(newfid) + @@ -410,7 +432,7 @@ void VariantID_t::ReadFilesFromDB() std::string q= "select file_info.orig_file_id, file_info.address_table_name, " " file_info.instruction_table_name, file_info.icfs_table_name,file_info.icfs_map_table_name, " " file_info.function_table_name, file_info.relocs_table_name, file_info.types_table_name, " - " file_info.scoop_table_name, file_info.file_id, file_info.url, file_info.hash," + " file_info.scoop_table_name, file_info.ehpgm_table_name, file_info.ehcss_table_name, file_info.file_id, file_info.url, file_info.hash," " file_info.arch, file_info.type, file_info.elfoid, file_info.doip_id " " from file_info,variant_dependency " " where variant_dependency.variant_id = '" + to_string(GetBaseID()) + "' AND " @@ -433,13 +455,15 @@ void VariantID_t::ReadFilesFromDB() std::string ftn=(BaseObj_t::dbintr->GetResultColumn("function_table_name")); std::string itn=(BaseObj_t::dbintr->GetResultColumn("instruction_table_name")); std::string dtn=(BaseObj_t::dbintr->GetResultColumn("scoop_table_name")); + std::string ehp=(BaseObj_t::dbintr->GetResultColumn("ehpgm_table_name")); + std::string css=(BaseObj_t::dbintr->GetResultColumn("ehcss_table_name")); std::string icfs=(BaseObj_t::dbintr->GetResultColumn("icfs_table_name")); std::string icfs_map=(BaseObj_t::dbintr->GetResultColumn("icfs_map_table_name")); std::string rtn=(BaseObj_t::dbintr->GetResultColumn("relocs_table_name")); std::string typ=(BaseObj_t::dbintr->GetResultColumn("types_table_name")); - File_t *newfile=new File_t(file_id,orig_fid,url,hash,type,oid,atn,ftn,itn,icfs,icfs_map,rtn,typ,dtn,doipid); + File_t *newfile=new File_t(file_id,orig_fid,url,hash,type,oid,atn,ftn,itn,icfs,icfs_map,rtn,typ,dtn,ehp,css,doipid); std::cout<<"Found file "<<file_id<<"."<<std::endl; std::cout<<" atn: " << atn << " ftn: " << ftn << " rtn: " << rtn << " typ: " << typ << std::endl; diff --git a/libIRDB/test/split_eh_frame.cpp b/libIRDB/test/split_eh_frame.cpp index bfe6a3ab1..ba20943bb 100644 --- a/libIRDB/test/split_eh_frame.cpp +++ b/libIRDB/test/split_eh_frame.cpp @@ -9,6 +9,7 @@ #include <assert.h> #include <elf.h> #include <algorithm> +#include <memory> #include <exeio.h> #include "beaengine/BeaEngine.h" @@ -20,6 +21,8 @@ using namespace std; using namespace EXEIO; using namespace libIRDB; +typedef map<virtual_offset_t, Instruction_t*> OffsetMap_t; + template <int ptrsize> class eh_frame_util_t { @@ -259,7 +262,7 @@ class eh_program_insn_t { public: - void print(uint64_t &pc) const + void print(uint64_t &pc, int64_t caf=1) const { // make sure uint8_t is an unsigned char. static_assert(std::is_same<unsigned char, uint8_t>::value, "uint8_t is not unsigned char"); @@ -276,7 +279,7 @@ class eh_program_insn_t case 1: { // case DW_CFA_advance_loc: - pc+=opcode_lower6; + pc+=(opcode_lower6*caf); cout<<" cfa_advance_loc "<<dec<<+opcode_lower6<<" to "<<hex<<pc<<endl; break; } @@ -359,21 +362,24 @@ class eh_program_insn_t case DW_CFA_advance_loc1: { auto loc=*(uint8_t*)(&data[pos]); - cout<<" advance_loc1 "<<+loc<<endl; + pc+=(loc*caf); + cout<<" advance_loc1 "<<+loc<<" to " <<pc << endl; break; } case DW_CFA_advance_loc2: { auto loc=*(uint16_t*)(&data[pos]); - cout<<" advance_loc1 "<<+loc<<endl; + pc+=(loc*caf); + cout<<" advance_loc2 "<<+loc<<" to " <<pc << endl; break; } case DW_CFA_advance_loc4: { auto loc=*(uint32_t*)(&data[pos]); - cout<<" advance_loc1 "<<+loc<<endl; + pc+=(loc*caf); + cout<<" advance_loc4 "<<+loc<<" to " <<pc << endl; break; } case DW_CFA_offset_extended: @@ -656,11 +662,110 @@ class eh_program_insn_t return false; } + bool isNop() const + { + auto data=program_bytes.data(); + auto opcode=program_bytes[0]; + auto opcode_upper2=(uint8_t)(opcode >> 6); + auto opcode_lower6=(uint8_t)(opcode & (0x3f)); + switch(opcode_upper2) + { + case 0: + { + switch(opcode_lower6) + { + + case DW_CFA_nop: + return true; + } + } + } + return false; + } + + bool Advance(uint64_t &cur_addr, uint64_t CAF) const + { + // make sure uint8_t is an unsigned char. + static_assert(std::is_same<unsigned char, uint8_t>::value, "uint8_t is not unsigned char"); + + auto data=program_bytes.data(); + auto opcode=program_bytes[0]; + auto opcode_upper2=(uint8_t)(opcode >> 6); + auto opcode_lower6=(uint8_t)(opcode & (0x3f)); + auto pos=uint32_t(1); + auto max=program_bytes.size(); + + switch(opcode_upper2) + { + case 1: + { + // case DW_CFA_advance_loc: + cur_addr+=(opcode_lower6*CAF); + return true; + } + case 0: + { + switch(opcode_lower6) + { + case DW_CFA_set_loc: + { + assert(0); +/* + auto arg=uintptr_t(0xDEADBEEF); + switch(ptrsize) + { + case 4: + arg=*(uint32_t*)data[pos]; break; + case 8: + arg=*(uint64_t*)data[pos]; break; + } + cout<<" set_loc "<<hex<<arg<<endl; + break; +*/ + return true; + } + case DW_CFA_advance_loc1: + { + auto loc=*(uint8_t*)(&data[pos]); + cur_addr+=(opcode_lower6*CAF); + return true; + } + + case DW_CFA_advance_loc2: + { + auto loc=*(uint16_t*)(&data[pos]); + cur_addr+=(opcode_lower6*CAF); + return true; + } + + case DW_CFA_advance_loc4: + { + auto loc=*(uint32_t*)(&data[pos]); + cur_addr+=(opcode_lower6*CAF); + return true; + } + } + } + } + return false; + } + + const vector<uint8_t>& GetBytes() const { return program_bytes; } + vector<uint8_t>& GetBytes() { return program_bytes; } + + + private: vector<uint8_t> program_bytes; }; +template <int ptrsize> +bool operator<(const eh_program_insn_t<ptrsize>& a, const eh_program_insn_t<ptrsize>& b) +{ + return a.GetBytes() < b.GetBytes(); +} + template <int ptrsize> class eh_program_t { @@ -704,10 +809,18 @@ class eh_program_t //cout<<endl; return false; } + const vector<eh_program_insn_t <ptrsize> >& GetInstructions() const { return instructions; } + vector<eh_program_insn_t <ptrsize> >& GetInstructions() { return instructions; } private: vector<eh_program_insn_t <ptrsize> > instructions; }; +template <int ptrsize> +bool operator<(const eh_program_t<ptrsize>& a, const eh_program_t<ptrsize>& b) +{ + return a.GetInstructions() < b.GetInstructions(); +} + template <int ptrsize> class cie_contents_t : eh_frame_util_t<ptrsize> { @@ -744,6 +857,9 @@ class cie_contents_t : eh_frame_util_t<ptrsize> fde_encoding(0) {} + const eh_program_t<ptrsize>& GetProgram() const { return eh_pgm; } + uint64_t GetCAF() const { return code_alignment_factor; } + int64_t GetDAF() const { return data_alignment_factor; } string GetAugmentation() const { return augmentation; } uint8_t GetLSDAEncoding() const { return lsda_encoding;} @@ -886,7 +1002,8 @@ class cie_contents_t : eh_frame_util_t<ptrsize> } void build_ir(Instruction_t* insn) const { - eh_pgm.print(); + // nothing to do? built up one level. + //eh_pgm.print(); } }; @@ -984,7 +1101,6 @@ class lsda_type_table_entry_t: private eh_frame_util_t<ptrsize> }; - template <int ptrsize> class lsda_call_site_t : private eh_frame_util_t<ptrsize> { @@ -1112,9 +1228,21 @@ class lsda_call_site_t : private eh_frame_util_t<ptrsize> return ( call_site_addr <=insn_addr && insn_addr<call_site_end_addr ); } - void build_ir(Instruction_t* insn, const vector<lsda_type_table_entry_t <ptrsize> > &type_table) const + void build_ir(Instruction_t* insn, const vector<lsda_type_table_entry_t <ptrsize> > &type_table, const uint8_t& tt_encoding, const OffsetMap_t& om, FileIR_t* firp) const { assert(appliesTo(insn)); + + // find landing pad instruction. + auto lp_insn=(Instruction_t*)NULL; + auto lp_it=om.find(landing_pad_addr); + if(lp_it!=om.end()) + lp_insn=lp_it->second; + + // create the callsite. + auto new_ehcs = new EhCallSite_t(BaseObj_t::NOT_IN_DATABASE, tt_encoding, lp_insn); + firp->GetAllEhCallSites().insert(new_ehcs); + insn->SetEhCallSite(new_ehcs); + cout<<"landing pad addr : 0x"<<hex<<landing_pad_addr<<endl; if(action_table.size() == 0 ) { @@ -1127,15 +1255,30 @@ class lsda_call_site_t : private eh_frame_util_t<ptrsize> const auto action=p.GetAction(); if(action==0) { + auto newreloc=new Relocation_t(BaseObj_t::NOT_IN_DATABASE, 0, "type_table_entry", NULL, 0); + new_ehcs->GetRelocations().insert(newreloc); + firp->GetRelocations().insert(newreloc); cout<<"Cleanup only (no catches) ."<<endl; } else if(action>0) { const auto index=action - 1; - cout<<"Catch for type: "; - + //cout<<"Catch for type: "; // the type table reveral was done during parsing, type table is right-side-up now. - type_table.at(index).print(); + //type_table.at(index).print(); + auto wrt=(DataScoop_t*)NULL; + assert(index<type_table.size()); + if(type_table.at(index).GetTypeInfoPointer()!=0) + { + wrt=firp->FindScoop(type_table.at(index).GetTypeInfoPointer()); + assert(wrt); + } + auto offset=0; + if(wrt!=NULL) + type_table.at(index).GetTypeInfoPointer()-wrt->GetStart()->GetVirtualOffset(); + auto newreloc=new Relocation_t(BaseObj_t::NOT_IN_DATABASE, offset, "type_table_entry", wrt, 0); + new_ehcs->GetRelocations().insert(newreloc); + firp->GetRelocations().insert(newreloc); } else if(action<0) { @@ -1170,6 +1313,8 @@ class lsda_t : private eh_frame_util_t<ptrsize> vector<lsda_type_table_entry_t <ptrsize> > type_table; public: + + uint8_t GetTTEncoding() const { return type_table_encoding; } lsda_t() : landing_pad_base_encoding(0), @@ -1299,7 +1444,7 @@ class lsda_t : private eh_frame_util_t<ptrsize> p.print(); }); } - void build_ir(Instruction_t* insn) const + void build_ir(Instruction_t* insn, const OffsetMap_t& om, FileIR_t* firp) const { auto cs_it=find_if(call_site_table.begin(), call_site_table.end(), [&](const lsda_call_site_t<ptrsize>& p) { @@ -1308,7 +1453,7 @@ class lsda_t : private eh_frame_util_t<ptrsize> if(cs_it!= call_site_table.end()) { - cs_it->build_ir(insn, type_table); + cs_it->build_ir(insn, type_table, GetTTEncoding(), om, firp); } else { @@ -1347,6 +1492,7 @@ class fde_contents_t : eh_frame_util_t<ptrsize> lsda_addr(0) {} + bool appliesTo(const Instruction_t* insn) const { assert(insn && insn->GetAddress()); @@ -1355,8 +1501,13 @@ class fde_contents_t : eh_frame_util_t<ptrsize> return ( fde_start_addr<=insn_addr && insn_addr<fde_end_addr ); } - cie_contents_t<ptrsize> GetCIE() const { return cie_info; } - eh_program_t<ptrsize> GetProgram() const { return eh_pgm; } + uint64_t GetFDEStartAddress() const { return fde_start_addr; } + + const cie_contents_t<ptrsize>& GetCIE() const { return cie_info; } + cie_contents_t<ptrsize>& GetCIE() { return cie_info; } + + const eh_program_t<ptrsize>& GetProgram() const { return eh_pgm; } + eh_program_t<ptrsize>& GetProgram() { return eh_pgm; } bool parse_fde( const uint32_t &fde_position, @@ -1442,63 +1593,43 @@ class fde_contents_t : eh_frame_util_t<ptrsize> cout<<" No LSDA for this FDE."<<endl; } - void build_ir(Instruction_t* insn) const + void build_ir(Instruction_t* insn, const OffsetMap_t &om, FileIR_t* firp) const { // assert this is the right FDE. assert( fde_start_addr<= insn->GetAddress()->GetVirtualOffset() && insn->GetAddress()->GetVirtualOffset() <= fde_end_addr); - eh_pgm.print(fde_start_addr); - lsda.build_ir(insn); + //eh_pgm.print(fde_start_addr); + if(lsda_addr!=0) + lsda.build_ir(insn,om,firp); } }; +class split_eh_frame_t +{ + public: + + virtual bool parse()=0; + virtual void build_ir() const =0; +}; + template <int ptrsize> -class split_eh_frame_t +class split_eh_frame_impl_t : public split_eh_frame_t { private: - FileIR_t* firp; DataScoop_t* eh_frame_scoop; DataScoop_t* eh_frame_hdr_scoop; DataScoop_t* gcc_except_table_scoop; - map<virtual_offset_t,const Instruction_t*> offset_to_insn_map; + OffsetMap_t offset_to_insn_map; vector<cie_contents_t <ptrsize> > cies; vector<fde_contents_t <ptrsize> > fdes; - public: - - split_eh_frame_t(FileIR_t* p_firp) - : firp(p_firp), - eh_frame_scoop(NULL), - eh_frame_hdr_scoop(NULL), - gcc_except_table_scoop(NULL) - { - assert(firp!=NULL); - - // function to find a scoop by name. - auto lookup_scoop_by_name=[&](const string &name) -> DataScoop_t* - { - auto scoop_it=find_if(firp->GetDataScoops().begin(), firp->GetDataScoops().end(), [name](DataScoop_t* scoop) - { - return scoop->GetName()==name; - }); - - if(scoop_it!=firp->GetDataScoops().end()) - return *scoop_it; - return NULL; - }; - - eh_frame_scoop=lookup_scoop_by_name(".eh_frame"); - eh_frame_hdr_scoop=lookup_scoop_by_name(".eh_frame_hdr"); - gcc_except_table_scoop=lookup_scoop_by_name(".gcc_except_table"); - - } bool init_offset_map() { - for_each(firp->GetInstructions().begin(), firp->GetInstructions().end(), [&](const Instruction_t* i) + for_each(firp->GetInstructions().begin(), firp->GetInstructions().end(), [&](Instruction_t* i) { offset_to_insn_map[i->GetAddress()->GetVirtualOffset()]=i; }); @@ -1562,7 +1693,36 @@ class split_eh_frame_t return false; } - bool execute() + public: + + split_eh_frame_impl_t(FileIR_t* p_firp) + : firp(p_firp), + eh_frame_scoop(NULL), + eh_frame_hdr_scoop(NULL), + gcc_except_table_scoop(NULL) + { + assert(firp!=NULL); + + // function to find a scoop by name. + auto lookup_scoop_by_name=[&](const string &name) -> DataScoop_t* + { + auto scoop_it=find_if(firp->GetDataScoops().begin(), firp->GetDataScoops().end(), [name](DataScoop_t* scoop) + { + return scoop->GetName()==name; + }); + + if(scoop_it!=firp->GetDataScoops().end()) + return *scoop_it; + return NULL; + }; + + eh_frame_scoop=lookup_scoop_by_name(".eh_frame"); + eh_frame_hdr_scoop=lookup_scoop_by_name(".eh_frame_hdr"); + gcc_except_table_scoop=lookup_scoop_by_name(".gcc_except_table"); + + } + + bool parse() { if(eh_frame_scoop==NULL) return true; // no frame info in this binary @@ -1591,59 +1751,175 @@ class split_eh_frame_t void build_ir() const { - //print(); - for_each(firp->GetInstructions().begin(), firp->GetInstructions().end(), [&](Instruction_t* i) - { - build_ir(i); - }); - } + auto reusedpgms=size_t(0); + struct EhProgramComparator_t { + bool operator() (const EhProgram_t* a, const EhProgram_t* b) { return *a < *b; } + }; - // find the right cie and fde, and build the IR from those for this instruction. - void build_ir(Instruction_t* insn) const - { - auto fie_it=find_if(fdes.begin(), fdes.end(), [&](const fde_contents_t<ptrsize> &p) - { - return p.appliesTo(insn); - }); + // this is used to avoid adding duplicate entries to the program's IR, it allows a lookup by value + // instead of the IR's set which allows duplicates. + auto eh_program_cache = set<EhProgram_t*, EhProgramComparator_t>(); - if(fie_it!=fdes.end()) + // find the right cie and fde, and build the IR from those for this instruction. + auto build_ir_insn=[&](Instruction_t* insn) -> void { + auto fie_it=find_if(fdes.begin(), fdes.end(), [&](const fde_contents_t<ptrsize> &p) + { + return p.appliesTo(insn); + }); - if(getenv("EHIR_VERBOSE")!=NULL) + if(fie_it!=fdes.end()) { - cout<<hex<<insn->GetBaseID()<<":"<<insn->getDisassembly()<<" -> "<<endl; - //fie_it->GetCIE().print(); - //fie_it->print(); + + if(getenv("EHIR_VERBOSE")!=NULL) + { + cout<<hex<<insn->GetAddress()->GetVirtualOffset()<<":" + <<insn->GetBaseID()<<":"<<insn->getDisassembly()<<" -> "<<endl; + //fie_it->GetCIE().print(); + //fie_it->print(); + } + + const auto fde_addr=fie_it->GetFDEStartAddress(); + const auto caf=fie_it->GetCIE().GetCAF(); + const auto daf=fie_it->GetCIE().GetDAF(); + const auto insn_addr=insn->GetAddress()->GetVirtualOffset(); + + auto import_pgm = [&](EhProgramListing_t& out_pgm, const eh_program_t<ptrsize> in_pgm) -> void + { + auto cur_addr=fde_addr; + for(const auto & insn : in_pgm.GetInstructions()) + { + if(insn.Advance(cur_addr, caf)) + { + if(cur_addr > insn_addr) + break; + } + else if(insn.isNop()) + { + // skip nops + } + else + { + string to_push(insn.GetBytes().begin(),insn.GetBytes().end()); + out_pgm.push_back(to_push); + } + + } + if(getenv("EHIR_VERBOSE")!=NULL) + { + cout<<"\tPgm has insn_count="<<out_pgm.size()<<endl; + } + }; + + // build an eh program on the stack; + + EhProgram_t ehpgm(BaseObj_t::NOT_IN_DATABASE,caf,daf,ptrsize); + import_pgm(ehpgm.GetCIEProgram(), fie_it->GetCIE().GetProgram()); + import_pgm(ehpgm.GetFDEProgram(), fie_it->GetProgram()); + + + if(getenv("EHIR_VERBOSE")!=NULL) + ehpgm.print(); + // see if we've already built this one. + auto ehpgm_it = eh_program_cache.find(&ehpgm) ; + if(ehpgm_it != eh_program_cache.end()) + { + // yes, use the cached program. + insn->SetEhProgram(*ehpgm_it); + if(getenv("EHIR_VERBOSE")!=NULL) + cout<<"Re-using existing Program!"<<endl; + reusedpgms++; + } + else /* doesn't yet exist! */ + { + + // allocate in the heap so we can give it to the IR. + if(getenv("EHIR_VERBOSE")!=NULL) + cout<<"Allocating new Program!"<<endl; + EhProgram_t* newehpgm=new EhProgram_t(ehpgm); // copy constructor + + // add to the IR + firp->GetAllEhPrograms().insert(newehpgm); + + // record for this insn + insn->SetEhProgram(newehpgm); + + // update cache. + eh_program_cache.insert(newehpgm); + } + + // build the IR from the FDE. + fie_it->GetCIE().build_ir(insn); + fie_it->build_ir(insn, offset_to_insn_map,firp); + } + else + { + if(getenv("EHIR_VERBOSE")!=NULL) + { + cout<<hex<<insn->GetAddress()->GetVirtualOffset()<<":" + <<insn->GetBaseID()<<":"<<insn->getDisassembly()<<" has no FDE "<<endl; + } } - // build the IR from the FDE. - fie_it->GetCIE().build_ir(insn); - fie_it->build_ir(insn); - } - else + }; + + //for_each(firp->GetInstructions().begin(), firp->GetInstructions().end(), [&](Instruction_t* i) + //{ + // build_ir_insn(i); + //}); + for(Instruction_t* i : firp->GetInstructions()) { - if(getenv("EHIR_VERBOSE")!=NULL) - cout<<hex<<insn->GetBaseID()<<":"<<insn->getDisassembly()<<" has no FDE "<<endl; + build_ir_insn(i); } - - } + cout<<"#ATTRIBUTE total_eh_programs_created="<<dec<<firp->GetAllEhPrograms().size()<<endl; + cout<<"#ATTRIBUTE total_eh_programs_reused="<<dec<<reusedpgms<<endl; + cout<<"#ATTRIBUTE total_eh_programs="<<dec<<firp->GetAllEhPrograms().size()+reusedpgms<<endl; + + + auto remove_reloc=[&](Relocation_t* r) -> void + { + firp->GetRelocations().erase(r); + delete r; + }; + + auto remove_address=[&](AddressID_t* a) -> void + { + firp->GetAddresses().erase(a); + for(auto &r : a->GetRelocations()) remove_reloc(r); + for(auto &r : firp->GetRelocations()) assert(r->GetWRT() != a); + delete a; + }; + + auto remove_scoop=[&] (DataScoop_t* s) -> void + { + firp->GetDataScoops().erase(s); + remove_address(s->GetStart()); + remove_address(s->GetEnd()); + for(auto &r : s->GetRelocations()) remove_reloc(r); + for(auto &r : firp->GetRelocations()) assert(r->GetWRT() != s); + delete s; + }; + + // will put back in a min, removing for commit + //remove_scoop(eh_frame_scoop); + //remove_scoop(eh_frame_hdr_scoop); + //remove_scoop(gcc_except_table_scoop); + + } }; void split_eh_frame(FileIR_t* firp) { auto found_err=false; + //auto eh_frame_splitter=(unique_ptr<split_eh_frame_t>)NULL; + auto eh_frame_splitter=unique_ptr<split_eh_frame_t>((nullptr_t)NULL); if( firp->GetArchitectureBitWidth()==64) - { - split_eh_frame_t<8> eh_frame_splitter(firp); - found_err=eh_frame_splitter.execute(); - eh_frame_splitter.build_ir(); - } + eh_frame_splitter.reset(new split_eh_frame_impl_t<8>(firp)); else - { - split_eh_frame_t<4> eh_frame_splitter(firp); - found_err=eh_frame_splitter.execute(); - eh_frame_splitter.build_ir(); - } + eh_frame_splitter.reset(new split_eh_frame_impl_t<4>(firp)); + found_err=eh_frame_splitter->parse(); + eh_frame_splitter->build_ir(); + assert(!found_err); } -- GitLab