diff --git a/examples/cpp-examples/test_quick.sh b/examples/cpp-examples/test_quick.sh index 6b165884aa85c06338a97527e18873c8de28e280..d4cd527ec5b8556ce82f10dfc579d62ff14c8fe3 100755 --- a/examples/cpp-examples/test_quick.sh +++ b/examples/cpp-examples/test_quick.sh @@ -68,7 +68,7 @@ main() for src in $src_files1 do - for option in -O0 -O3 + for option in '-O3 -fno-reorder-blocks-and-partition' -O0 do #rida @@ -80,7 +80,7 @@ main() for src in $src_files2 do - for option in -O3 + for option in '-O3 -fno-reorder-blocks-and-partition' do #rida doit_meta $src "$option" "$rida_flags $p1_flags $ss_flags " diff --git a/irdb-libs/rida/rida.cpp b/irdb-libs/rida/rida.cpp index 73946595b4d2e53171c9fbb4594ceb69776d9809..792c150fc3a80946a4c1505dd5e5e2ec0448d703 100644 --- a/irdb-libs/rida/rida.cpp +++ b/irdb-libs/rida/rida.cpp @@ -13,646 +13,645 @@ #include <functional> #include <cstring> - using namespace std; using namespace EHP; using namespace EXEIO; -#define ALLOF(a) begin(a),end(a) +#define ALLOF(a) begin(a), end(a) -void usage(int argc, char* argv[]) +void usage(int argc, char *argv[]) { - cout<<"Usage: "<<argv[0]<<" input.exe output.annot>"<<endl; + cout << "Usage: " << argv[0] << " input.exe output.annot>" << endl; exit(1); } - - class CreateFunctions_t { - private: - unique_ptr<const EHFrameParser_t> ehp; - using Address_t = uint64_t; - class Range_t : public pair<Address_t,Address_t> - { - public: - Range_t(const Address_t &a, const Address_t &b) : pair<Address_t,Address_t>(a,b) { } - bool contains(const Address_t &c) const { return first <= c && c<second; } - }; - using RangeSet_t = set<Range_t>; - set < RangeSet_t > sccs; - map<RangeSet_t,string> funcNames; - bool verbose; - exeio_t exeio; - csh cshandle; - ofstream outfile; - execlass_t file_class; - MachineType_t machine_type; - friend ostream& operator<<(ostream& os, const CreateFunctions_t::RangeSet_t& rs); +private: + unique_ptr<const EHFrameParser_t> ehp; + using Address_t = uint64_t; + class Range_t : public pair<Address_t, Address_t> + { public: - CreateFunctions_t(const string &input_pgm, const string &output_annot, const bool p_verbose) - : - verbose(p_verbose), - exeio(input_pgm), - cshandle(), - file_class(exeio.get_class()), - machine_type(exeio.getMachineType()) + Range_t(const Address_t &a, const Address_t &b) : pair<Address_t, Address_t>(a, b) {} + bool contains(const Address_t &c) const { return first <= c && c < second; } + }; + using RangeSet_t = set<Range_t>; + set<RangeSet_t> sccs; + map<RangeSet_t, string> funcNames; + bool verbose; + exeio_t exeio; + csh cshandle; + ofstream outfile; + execlass_t file_class; + MachineType_t machine_type; + friend ostream &operator<<(ostream &os, const CreateFunctions_t::RangeSet_t &rs); + +public: + CreateFunctions_t(const string &input_pgm, const string &output_annot, const bool p_verbose) + : verbose(p_verbose), + exeio(input_pgm), + cshandle(), + file_class(exeio.get_class()), + machine_type(exeio.getMachineType()) + { + outfile.open(output_annot.c_str(), ofstream::out); + if (!outfile.is_open()) { - outfile.open(output_annot.c_str(), ofstream::out); - if(!outfile.is_open()) - { - cerr<<"Cannot open "<<output_annot<<endl; - exit(1); - } - ehp = EHFrameParser_t::factory(input_pgm); - if(verbose) - ehp->print(); - - if(file_class!=ELF64 && file_class != ELF32) - { - cerr<<"Rida can only process ELF files."<<endl; - exit(1); - } + cerr << "Cannot open " << output_annot << endl; + exit(1); + } + ehp = EHFrameParser_t::factory(input_pgm); + if (verbose) + ehp->print(); - const auto my_cs_mode = - machine_type == mtArm32 ? cs_mode(CS_MODE_LITTLE_ENDIAN) : - machine_type == mtAarch64 ? cs_mode(CS_MODE_LITTLE_ENDIAN) : - machine_type == mtMips32 ? cs_mode(CS_MODE_MIPS32 | CS_MODE_BIG_ENDIAN) : - file_class == ELF64 ? cs_mode(CS_MODE_64) : - file_class == ELF32 ? cs_mode(CS_MODE_32) : - throw std::runtime_error("Cannot handle ELF class"); - - const auto my_cs_arch = - machine_type == mtX86_64 ? CS_ARCH_X86 : - machine_type == mtI386 ? CS_ARCH_X86 : - machine_type == mtArm32 ? CS_ARCH_ARM : - machine_type == mtAarch64 ? CS_ARCH_ARM64 : - machine_type == mtMips32 ? CS_ARCH_MIPS : - throw std::runtime_error("Cannot handle architecture"); - - if (cs_open(my_cs_arch, my_cs_mode , &cshandle) != CS_ERR_OK) - { - cerr<<"Cannot initialize capstone"<<endl; - exit(1); - } + if (file_class != ELF64 && file_class != ELF32) + { + cerr << "Rida can only process ELF files." << endl; + exit(1); } - virtual ~CreateFunctions_t() + + const auto my_cs_mode = + machine_type == mtArm32 ? cs_mode(CS_MODE_LITTLE_ENDIAN) : machine_type == mtAarch64 ? cs_mode(CS_MODE_LITTLE_ENDIAN) + : machine_type == mtMips32 ? cs_mode(CS_MODE_MIPS32 | CS_MODE_BIG_ENDIAN) + : file_class == ELF64 ? cs_mode(CS_MODE_64) + : file_class == ELF32 ? cs_mode(CS_MODE_32) + : throw std::runtime_error("Cannot handle ELF class"); + + const auto my_cs_arch = + machine_type == mtX86_64 ? CS_ARCH_X86 : machine_type == mtI386 ? CS_ARCH_X86 + : machine_type == mtArm32 ? CS_ARCH_ARM + : machine_type == mtAarch64 ? CS_ARCH_ARM64 + : machine_type == mtMips32 ? CS_ARCH_MIPS + : throw std::runtime_error("Cannot handle architecture"); + + if (cs_open(my_cs_arch, my_cs_mode, &cshandle) != CS_ERR_OK) { - cs_close(&cshandle); + cerr << "Cannot initialize capstone" << endl; + exit(1); } + } + virtual ~CreateFunctions_t() + { + cs_close(&cshandle); + } + + void calculate() + { + ehframeToSccs(); + addSectionToSccs(".init"); + addSectionToSccs(".fini"); - void calculate() + if (file_class == ELF64) + { + class Extracter64 + { + public: + Elf64_Xword elf_r_sym(Elf64_Xword a) { return ELF64_R_SYM(a); } + Elf64_Xword elf_r_type(Elf64_Xword a) { return ELF64_R_TYPE(a); } + unsigned char elf_st_bind(unsigned char a) { return ELF64_ST_BIND(a); } + unsigned char elf_st_type(unsigned char a) { return ELF64_ST_TYPE(a); } + }; + pltSplit<Elf64_Sym, Elf64_Rela, Elf64_Rel, Extracter64>(".plt", ".plt.got", ".plt.sec"); + nameFunctions<Elf64_Sym, Extracter64>(); + } + else { + class Extracter32 + { + public: + Elf32_Word elf_r_sym(Elf32_Word a) { return ELF32_R_SYM(a); } + Elf32_Word elf_r_type(Elf32_Word a) { return ELF32_R_TYPE(a); } + unsigned char elf_st_bind(unsigned char a) { return ELF32_ST_BIND(a); } + unsigned char elf_st_type(unsigned char a) { return ELF32_ST_TYPE(a); } + }; + pltSplit<Elf32_Sym, Elf32_Rela, Elf32_Rel, Extracter32>(".plt", ".plt.got"); + nameFunctions<Elf32_Sym, Extracter32>(); + } + } + template <class T_Sym, class T_Extracter> + void nameFunctions() + { + // do symbol names. + parseSyms<T_Sym, T_Extracter>(".dynsym", ".dynstr"); + parseSyms<T_Sym, T_Extracter>(".symtab", ".strtab"); + + auto namedFunctions = 0U; + auto unnamedFunctions = 0U; + auto functions = 0U; - ehframeToSccs(); - addSectionToSccs(".init"); - addSectionToSccs(".fini"); - - if(file_class==ELF64) + // set default names + for (const auto &func : sccs) + { + assert(func.begin() != func.end()); + const auto first_range = *(func.begin()); + const auto startAddr = first_range.first; + std::stringstream ss; + ss << "sub_" << hex << startAddr; + const auto name = ss.str(); + + functions++; + if (funcNames[func] == "") // destructive test OK, next line sets if empty. { - class Extracter64 - { - public: - Elf64_Xword elf_r_sym (Elf64_Xword a) { return ELF64_R_SYM (a); } - Elf64_Xword elf_r_type(Elf64_Xword a) { return ELF64_R_TYPE(a); } - unsigned char elf_st_bind(unsigned char a) { return ELF64_ST_BIND(a); } - unsigned char elf_st_type(unsigned char a) { return ELF64_ST_TYPE(a); } - }; - pltSplit<Elf64_Sym, Elf64_Rela, Elf64_Rel, Extracter64>(".plt", ".plt.got"); - nameFunctions<Elf64_Sym, Extracter64>(); + unnamedFunctions++; + funcNames[func] = name; } else { - class Extracter32 - { - public: - Elf32_Word elf_r_sym (Elf32_Word a) { return ELF32_R_SYM (a); } - Elf32_Word elf_r_type(Elf32_Word a) { return ELF32_R_TYPE(a); } - unsigned char elf_st_bind(unsigned char a) { return ELF32_ST_BIND(a); } - unsigned char elf_st_type(unsigned char a) { return ELF32_ST_TYPE(a); } - }; - pltSplit<Elf32_Sym, Elf32_Rela, Elf32_Rel, Extracter32>(".plt", ".plt.got"); - nameFunctions<Elf32_Sym, Extracter32>(); + namedFunctions++; } - } - template<class T_Sym, class T_Extracter> - void nameFunctions() - { - // do symbol names. - parseSyms<T_Sym, T_Extracter>(".dynsym", ".dynstr"); - parseSyms<T_Sym, T_Extracter>(".symtab", ".strtab"); + cout << "#ATTRIBUTE functions=" << dec << functions << endl; + cout << "#ATTRIBUTE named_functions=" << dec << namedFunctions << endl; + cout << "#ATTRIBUTE uunamed_functions=" << dec << unnamedFunctions << endl; + } - auto namedFunctions=0U; - auto unnamedFunctions=0U; - auto functions=0U; + template <class T_Sym, class T_Extracter> + void parseSyms(const string &secName, const string &stringSecName) + { + const auto sec = exeio.sections[secName]; + if (!sec) + return; // err check - // set default names - for(const auto &func: sccs) - { - assert(func.begin() != func.end()); - const auto first_range=*(func.begin()); - const auto startAddr=first_range.first; - std::stringstream ss; - ss << "sub_" << hex << startAddr; - const auto name = ss.str(); - - functions++; - if(funcNames[func]=="") // destructive test OK, next line sets if empty. - { - unnamedFunctions++; - funcNames[func]=name; - } - else - { - namedFunctions++; - } - - } + const auto stringSec = exeio.sections[stringSecName]; + if (!stringSec) + return; // err check - cout<<"#ATTRIBUTE functions="<<dec<<functions<<endl; - cout<<"#ATTRIBUTE named_functions="<<dec<<namedFunctions<<endl; - cout<<"#ATTRIBUTE uunamed_functions="<<dec<<unnamedFunctions<<endl; - - } + const auto data = sec->get_data(); + const auto stringData = stringSec->get_data(); - template<class T_Sym, class T_Extracter> - void parseSyms(const string& secName, const string & stringSecName) + for (auto i = 0U; i + sizeof(T_Sym) <= (size_t)sec->get_size(); i += sizeof(T_Sym)) { - const auto sec=exeio.sections[secName]; - if(!sec) return; // err check - - const auto stringSec=exeio.sections[stringSecName]; - if(!stringSec) return; // err check - - const auto data=sec->get_data(); - const auto stringData=stringSec->get_data(); - - for(auto i=0U; i+sizeof(T_Sym) <= (size_t)sec->get_size(); i+=sizeof(T_Sym)) + const auto sym = reinterpret_cast<const T_Sym *>(data + i); + const auto value = sym->st_value; + if (value == 0) + continue; + + // works for both ELF64 and ELF32, macros defined the same. + const auto type = T_Extracter().elf_st_type(sym->st_info); + if (type != STT_FUNC) + continue; + + // functions with non-zero address at this point. + const auto name_offset = sym->st_name; + + // sanity check string length + if (name_offset < 0U || name_offset > (size_t)stringSec->get_size()) + continue; + + // get the name + const auto name = string(stringData + name_offset); + + // find a function + auto func_it = find_if(ALLOF(sccs), [&](const RangeSet_t &s) { + return s.begin()->first == value; + }); + if (func_it != sccs.end()) { - const auto sym=reinterpret_cast<const T_Sym *>(data+i); - const auto value=sym->st_value; - if(value==0) - continue; - - // works for both ELF64 and ELF32, macros defined the same. - const auto type=T_Extracter().elf_st_type(sym->st_info); - if(type!=STT_FUNC) - continue; - - - // functions with non-zero address at this point. - const auto name_offset=sym->st_name; - - // sanity check string length - if(name_offset < 0U || name_offset > (size_t)stringSec->get_size()) - continue; - - // get the name - const auto name=string(stringData+name_offset); - - - // find a function - auto func_it=find_if(ALLOF(sccs), [&](const RangeSet_t& s) - { - return s.begin() -> first == value; - }); - if(func_it!=sccs.end()) - { - cout<<"Setting function at "<<hex<<value<<" to name "<<name<<endl; - funcNames[*func_it]=name; - } - + cout << "Setting function at " << hex << value << " to name " << name << endl; + funcNames[*func_it] = name; } } + } - void ehframeToSccs() + void ehframeToSccs() + { + const auto fdes = ehp->getFDEs(); + for (const auto fde : *fdes) { - const auto fdes=ehp->getFDEs(); - for(const auto fde : *fdes) - //sccs.insert({ RangeSet_t({fde->getStartAddress(), fde->getEndAddress()})}); - sccs.insert(RangeSet_t({Range_t(fde->getStartAddress(),fde->getEndAddress())})); + sccs.insert(RangeSet_t({Range_t(fde->getStartAddress(), fde->getEndAddress())})); + } - cout<<hex; - if(getenv("SELF_VALIDATE")) - assert(fdes->size()>=0); + cout << hex; + if (getenv("SELF_VALIDATE")) + assert(fdes->size() >= 0); - for(const auto fde : *fdes) + for (const auto fde : *fdes) + { + if (verbose) + cout << "Found FDE at : " << fde->getStartAddress() << "-" << fde->getEndAddress() << endl; + auto pair = Range_t(fde->getStartAddress(), fde->getEndAddress()); + const auto lsda = fde->getLSDA(); + assert(lsda); + const auto callsites = lsda->getCallSites(); + assert(callsites); + + for (const auto cs : *callsites) { - if(verbose) - cout<<"Found FDE at : " << fde->getStartAddress() << "-"<<fde->getEndAddress()<<endl; - auto pair=Range_t(fde->getStartAddress(), fde->getEndAddress()); - const auto lsda=fde->getLSDA(); - assert(lsda); - const auto callsites=lsda->getCallSites(); - assert(callsites); - - for(const auto cs : *callsites) + if (verbose) + cout << "\tCall site (0x" << cs->getCallSiteAddress() << "-" << cs->getCallSiteEndAddress() + << ") with landing pad=0x" << cs->getLandingPadAddress() << endl; + if (cs->getLandingPadAddress() == 0x0) + continue; + auto set1_it = find_if(ALLOF(sccs), [&](const RangeSet_t &s) { return s.find(pair) != s.end(); }); + assert(set1_it != sccs.end()); + + auto set2_it = find_if(ALLOF(sccs), [&](const RangeSet_t &s) { + return find_if(ALLOF(s), [&](const Range_t &r) { return r.contains(cs->getLandingPadAddress()); }) != s.end(); + }); + assert(set2_it != sccs.end()); + auto set1 = *set1_it; + auto set2 = *set2_it; + if (set1 != set2) { - if(verbose) - cout<<"\tCall site (0x"<<cs->getCallSiteAddress()<<"-"<<cs->getCallSiteEndAddress() - <<") with landing pad=0x"<<cs->getLandingPadAddress()<<endl; - if(cs->getLandingPadAddress()==0x0) - continue; - auto set1_it=find_if(ALLOF(sccs), [&](const RangeSet_t& s) { return s.find(pair) != s.end(); } ); - assert(set1_it!=sccs.end()); - - auto set2_it=find_if(ALLOF(sccs), [&](const RangeSet_t& s) - { - return find_if(ALLOF(s), [&](const Range_t& r) { return r.contains(cs->getLandingPadAddress()); }) != s.end(); - }); - assert(set2_it!=sccs.end()); - auto set1=*set1_it; - auto set2=*set2_it; - if(set1!=set2) - { - sccs.erase(set1); - sccs.erase(set2); - auto set3=RangeSet_t(); - if(verbose) - cout<<"\tMerging: set1="<< hex<< set1 << " and set2="<<set2<<dec<<endl; - set_union(ALLOF(set1), ALLOF(set2), inserter(set3, set3.begin())); - sccs.insert(set3); - } + sccs.erase(set1); + sccs.erase(set2); + auto set3 = RangeSet_t(); + if (verbose) + cout << "\tMerging: set1=" << hex << set1 << " and set2=" << set2 << dec << endl; + set_union(ALLOF(set1), ALLOF(set2), inserter(set3, set3.begin())); + sccs.insert(set3); } } - - } + } - void addSectionToSccs(const string &sec_name) - { - const auto sec=exeio.sections[sec_name]; - if(sec==nullptr) - return; - const auto range=Range_t(sec->get_address(), sec->get_address()+sec->get_size()); - const auto ranges=RangeSet_t({range}); - sccs.insert(ranges); - } + void addSectionToSccs(const string &sec_name) + { + const auto sec = exeio.sections[sec_name]; + if (sec == nullptr) + return; + const auto range = Range_t(sec->get_address(), sec->get_address() + sec->get_size()); + const auto ranges = RangeSet_t({range}); + sccs.insert(ranges); + } - template<class T_Sym, class T_Rela, class T_Rel, class T_Extracter> - void pltSplit(const string &pltSecName, const string &endSecName) - { - const auto dynsymSec=exeio.sections[".dynsym"]; - const auto dynstrSec=exeio.sections[".dynstr"]; - const auto relapltSec=exeio.sections[".rela.plt"]; - const auto relpltSec=exeio.sections[".rel.plt"]; - const auto relSec=relapltSec ? relapltSec : relpltSec; - const auto relSecEntrySize=relapltSec ? sizeof(T_Rela) : sizeof(T_Rel); - - - const auto addRange=[&](const Address_t s, size_t len) - { - if(verbose) - cout<<"Adding PLT function "<<s<<" "<<len<<endl; - sccs.insert(RangeSet_t({Range_t({s,s+len})})); - }; + template <class T_Sym, class T_Rela, class T_Rel, class T_Extracter> + void pltSplit(const string &pltName, const string &endSecName, const string &pltSecName = "") + { + const auto dynsymSec = exeio.sections[".dynsym"]; + const auto dynstrSec = exeio.sections[".dynstr"]; + const auto relapltSec = exeio.sections[".rela.plt"]; + const auto relpltSec = exeio.sections[".rel.plt"]; + const auto relSec = relapltSec ? relapltSec : relpltSec; + const auto relSecEntrySize = relapltSec ? sizeof(T_Rela) : sizeof(T_Rel); + + const auto addRange = [&](const Address_t s, size_t len) { + if (verbose) + cout << "Adding PLT function " << hex << s << " " << len << endl; + sccs.insert(RangeSet_t({Range_t({s, s + len})})); + }; - const auto addName=[&](const Address_t addr, uint64_t symIndex) - { - if(!dynsymSec) return; - if(!dynstrSec) return; - if(!relSec) return; - - // get the data out of the plt section. - const auto relData=relSec->get_data(); - if(symIndex*relSecEntrySize >= (size_t)relSec->get_size()) return; - const auto relDataAsSymPtr=reinterpret_cast<const T_Rel *>(relData + symIndex*relSecEntrySize); - const auto &relEntry=*relDataAsSymPtr; - - // calculate index into dynsym, section. - const auto dynsymIndex=T_Extracter().elf_r_sym(relEntry.r_info); - const auto dynsymData=dynsymSec->get_data(); - const auto dynstrData=dynstrSec->get_data(); - - cout<<dec<<"At entry "<<symIndex<<", reloc entry has dynsym index "<<dynsymIndex<<endl; - - // the index into the .dynsym section for the relocation. - const auto dynsymDataAsSymPtr=reinterpret_cast<const T_Sym *>(dynsymData); - if(dynsymIndex*sizeof(T_Sym) >= (size_t)dynsymSec->get_size()) return; - - // get a reference to the dynsym entry. - const auto &dynsymEntry=dynsymDataAsSymPtr[dynsymIndex]; - // extra where in the string table the name is. - const auto name_offset=dynsymEntry.st_name; - - // sanity check string length - if(name_offset < 0U || name_offset > (size_t)dynstrSec->get_size()) - return; - - const auto applyName=[&](const string& part, const Address_t myAddr) - { - // get the name - const auto name=string(dynstrData+name_offset)+part+"@plt"; - - // find a function - auto func_it=find_if(ALLOF(sccs), [&](const RangeSet_t& s) - { - return s.begin() -> first == myAddr; - }); - if(func_it!=sccs.end()) - { - cout<<"Setting function at "<<hex<<myAddr<<" to name "<<name<<endl; - funcNames[*func_it]=name; - } - }; - - applyName("part1", addr); - applyName("part2", addr+6); - }; + const auto addName = [&](const Address_t addr, uint64_t symIndex) { + if (!dynsymSec) + return; + if (!dynstrSec) + return; + if (!relSec) + return; - const auto pltSec=exeio.sections[pltSecName]; - if(pltSec==NULL) return; + // get the data out of the plt section. + const auto relData = relSec->get_data(); + if (symIndex * relSecEntrySize >= (size_t)relSec->get_size()) + return; + const auto relDataAsSymPtr = reinterpret_cast<const T_Rel *>(relData + symIndex * relSecEntrySize); + const auto &relEntry = *relDataAsSymPtr; - const auto startAddr=pltSec->get_address(); - const auto endAddr=pltSec->get_address()+pltSec->get_size(); + // calculate index into dynsym, section. + const auto dynsymIndex = T_Extracter().elf_r_sym(relEntry.r_info); + const auto dynsymData = dynsymSec->get_data(); + const auto dynstrData = dynstrSec->get_data(); - if(verbose) - cout<<"Found plt function range is "<<hex<<startAddr<<"-"<<endAddr<<endl; + cout << dec << "At entry " << symIndex << ", reloc entry has dynsym index " << dynsymIndex << endl; - const auto pltRange_it=find_if(ALLOF(sccs), [&](const RangeSet_t& s) - { - return find_if(ALLOF(s), [&](const Range_t& r) { return r.contains(startAddr); }) != s.end(); - }); - // erase startAddr if found. - if(pltRange_it!=sccs.end()) - sccs.erase(pltRange_it); // invalidates all iterators + // the index into the .dynsym section for the relocation. + const auto dynsymDataAsSymPtr = reinterpret_cast<const T_Sym *>(dynsymData); + if (dynsymIndex * sizeof(T_Sym) >= (size_t)dynsymSec->get_size()) + return; - auto dynsymEntryIndex=0; + // get a reference to the dynsym entry. + const auto &dynsymEntry = dynsymDataAsSymPtr[dynsymIndex]; + // extra where in the string table the name is. + const auto name_offset = dynsymEntry.st_name; - const auto handle_x86_plt=[&]() - { - const auto plt_sec_data_ptr = pltSec->get_data(); - const auto plt_skip=16; - const auto plt_entry_size=16; - // Need to determine whether there is an "enhanced plt" in use. An enhanced plt - // uses bounded prefixes on the jump instructions to make sure that the plt - // entries are not poisoned. Use this array of bytes (which translates to an endbr64 - // instruction) in order to make the determination. - uint8_t enhanced_plt_signature[] = {0xf3, 0x0f, 0x1e, 0xfa}; - const auto use_enhanced_plt = !memcmp((const void*)(plt_sec_data_ptr+16), - (const void*)enhanced_plt_signature, - sizeof(enhanced_plt_signature)); - const auto plt_header_size = use_enhanced_plt ? 13 : 12; - const auto plt_entry_size_first_part = use_enhanced_plt ? 15 : 6; - - addRange(startAddr,plt_header_size); - for(auto i=startAddr+plt_skip; i<endAddr; i+=plt_skip) - { - addRange(i,plt_entry_size_first_part); - addRange(i+plt_entry_size_first_part,plt_entry_size-plt_entry_size_first_part); - addName(i,dynsymEntryIndex++); - } + // sanity check string length + if (name_offset < 0U || name_offset > (size_t)dynstrSec->get_size()) + return; - // Return whether or not we used an enhanced plt. - return use_enhanced_plt; - }; - const auto handle_arm64_plt=[&]() - { - const auto plt_entry_size=16; - const auto plt_header_size=8*4; + const auto applyName = [&](const string &part, const Address_t myAddr) { + // get the name + const auto name = string(dynstrData + name_offset) + part + "@plt"; - addRange(startAddr,plt_header_size); - for(auto i=startAddr+plt_header_size; i<endAddr; i+=plt_entry_size) + // find a function + auto func_it = find_if(ALLOF(sccs), [&](const RangeSet_t &s) { + return s.begin()->first == myAddr; + }); + if (func_it != sccs.end()) { - addRange(i,plt_entry_size); - addName(i,dynsymEntryIndex++); + cout << "Setting function at " << hex << myAddr << " to name " << name << endl; + funcNames[*func_it] = name; } }; - const auto handle_arm32_plt=[&]() - { - const auto plt_entry_size=3*4; // 3 instructions - const auto plt_header_size=5*4; // 5 instructions - addRange(startAddr,plt_header_size); - for(auto i=startAddr+plt_header_size; i<endAddr; i+=plt_entry_size) - { - addRange(i,plt_entry_size); - addName(i,dynsymEntryIndex++); - } - }; + applyName("part1", addr); + applyName("part2", addr + 6); + }; - bool use_enhanced_x86_plt = false; - switch(machine_type) + const auto pltSec = exeio.sections[pltName]; + if (pltSec == NULL) + return; + + const auto startAddr = pltSec->get_address(); + const auto endAddr = pltSec->get_address() + pltSec->get_size(); + + if (verbose) + cout << "Found plt function range is " << hex << startAddr << "-" << endAddr << endl; + + const auto pltRange_it = find_if(ALLOF(sccs), [&](const RangeSet_t &s) { + return find_if(ALLOF(s), [&](const Range_t &r) { return r.contains(startAddr); }) != s.end(); + }); + // erase startAddr if found. + if (pltRange_it != sccs.end()) + sccs.erase(pltRange_it); // invalidates all iterators + + auto dynsymEntryIndex = 0; + + const auto handle_x86_plt = [&]() { + const auto plt_sec_data_ptr = pltSec->get_data(); + const auto plt_skip = 16; + const auto plt_entry_size = 16; + // Need to determine whether there is an "enhanced plt" in use. An enhanced plt + // uses bounded prefixes on the jump instructions to make sure that the plt + // entries are not poisoned. Use this array of bytes (which translates to an endbr64 + // instruction) in order to make the determination. + uint8_t enhanced_plt_signature[] = {0xf3, 0x0f, 0x1e, 0xfa}; + const auto use_enhanced_plt = !memcmp((const void *)(plt_sec_data_ptr + 16), + (const void *)enhanced_plt_signature, + sizeof(enhanced_plt_signature)); + const auto plt_header_size = use_enhanced_plt ? 13 : 12; + const auto plt_entry_size_first_part = use_enhanced_plt ? 15 : 6; + + addRange(startAddr, plt_header_size); + for (auto i = startAddr + plt_skip; i < endAddr; i += plt_skip) { - case mtX86_64: - case mtI386: - use_enhanced_x86_plt = handle_x86_plt(); - break; - case mtAarch64: - handle_arm64_plt(); - break; - case mtArm32: - handle_arm32_plt(); - break; - default: - assert(0); + addRange(i, plt_entry_size_first_part); + addRange(i + plt_entry_size_first_part, plt_entry_size - plt_entry_size_first_part); + addName(i, dynsymEntryIndex++); + } - }; - cout<<"#ATTRIBUTE plt_entries="<<dec<<dynsymEntryIndex<<endl; + // Return whether or not we used an enhanced plt. + return use_enhanced_plt; + }; + const auto handle_arm64_plt = [&]() { + const auto plt_entry_size = 16; + const auto plt_header_size = 8 * 4; + addRange(startAddr, plt_header_size); + for (auto i = startAddr + plt_header_size; i < endAddr; i += plt_entry_size) + { + addRange(i, plt_entry_size); + addName(i, dynsymEntryIndex++); + } + }; + const auto handle_arm32_plt = [&]() { + const auto plt_entry_size = 3 * 4; // 3 instructions + const auto plt_header_size = 5 * 4; // 5 instructions - // deal with gotPlt Section. - const auto gotPltSec=exeio.sections[endSecName]; - if(gotPltSec==NULL) - return; + addRange(startAddr, plt_header_size); + for (auto i = startAddr + plt_header_size; i < endAddr; i += plt_entry_size) + { + addRange(i, plt_entry_size); + addName(i, dynsymEntryIndex++); + } + }; + bool use_enhanced_x86_plt = false; + switch (machine_type) + { + case mtX86_64: + case mtI386: + use_enhanced_x86_plt = handle_x86_plt(); + break; + case mtAarch64: + handle_arm64_plt(); + break; + case mtArm32: + handle_arm32_plt(); + break; + default: + assert(0); + }; + cout << "#ATTRIBUTE plt_entries=" << dec << dynsymEntryIndex << endl; + + // deal with gotPlt Section. + const auto gotPltSec = exeio.sections[endSecName]; + if (gotPltSec == NULL) + return; + + // Decide on the sizes/configuration of the plt entries depending on + // whether we are using enhanced plts. See handle_x86_plt() for more + // information about this choice. + const auto gotPltEntrySize = use_enhanced_x86_plt ? 16 : 8; + const auto gotPltRangeSize = use_enhanced_x86_plt ? 11 : 6; + const auto gotPltStartAddr = gotPltSec->get_address(); + + const auto gotPltRange_it = find_if(ALLOF(sccs), [&](const RangeSet_t &s) { + return find_if(ALLOF(s), [&](const Range_t &r) { return r.contains(gotPltStartAddr); }) != s.end(); + }); + // erase startAddr if found. + if (gotPltRange_it != sccs.end()) + sccs.erase(gotPltRange_it); // invalidates all iterators + + auto gotpltEntries = 0U; + for (auto i = 0U; i + gotPltRangeSize < (size_t)gotPltSec->get_size(); i += gotPltEntrySize) + { + addRange(gotPltStartAddr + i, gotPltRangeSize); + gotpltEntries++; + } + cout << "#ATTRIBUTE gotplt_entries=" << dec << gotpltEntries << endl; - // Decide on the sizes/configuration of the plt entries depending on - // whether we are using enhanced plts. See handle_x86_plt() for more - // information about this choice. - const auto gotPltEntrySize= use_enhanced_x86_plt ? 16 : 8; - const auto gotPltRangeSize= use_enhanced_x86_plt ? 11 : 6; - const auto gotPltStartAddr=gotPltSec->get_address(); + // Deal with the .plt.sec section that calls via the got. + const auto pltSecSection = exeio.sections[pltSecName]; + if (pltSecSection == NULL) + return; - const auto gotPltRange_it=find_if(ALLOF(sccs), [&](const RangeSet_t& s) - { - return find_if(ALLOF(s), [&](const Range_t& r) { return r.contains(gotPltStartAddr); }) != s.end(); - }); - // erase startAddr if found. - if(gotPltRange_it!=sccs.end()) - sccs.erase(gotPltRange_it); // invalidates all iterators + const auto pltSecStartAddr = pltSecSection->get_address(); + const auto pltSecEndAddr = pltSecSection->get_address() + pltSecSection->get_size(); + const auto handle_x86_pltSec = [&]() { + const auto plt_skip = 16; + const auto plt_entry_size = 11; - auto gotpltEntries=0U; - for(auto i=0U; i + gotPltRangeSize < (size_t)gotPltSec->get_size(); i+=gotPltEntrySize) + for (auto i = pltSecStartAddr; i < pltSecEndAddr; i += plt_skip) { - addRange(gotPltStartAddr+i,gotPltRangeSize); - gotpltEntries++; + addRange(i, plt_entry_size); } - cout<<"#ATTRIBUTE gotplt_entries="<<dec<<gotpltEntries<<endl; - + }; + const auto pltSecRange_it = find_if(ALLOF(sccs), [&](const RangeSet_t &s) { + return find_if(ALLOF(s), [&](const Range_t &r) { return r.contains(pltSecStartAddr); }) != s.end(); + }); + // erase startAddr if found. + if (pltSecRange_it != sccs.end()) + { + cout << "Erasing scc with " << dec << pltSecRange_it->size() << " entries." << endl; + sccs.erase(pltSecRange_it); // invalidates all iterators } - - void doBelongTos(const Range_t &range, const Address_t startAddr) + switch (machine_type) { - const auto sec=exeio.sections.findByAddress(range.first); - assert(sec); - const auto secEnd=exeio.sections.findByAddress(range.second-1); - assert(sec==secEnd); // same section. - const auto data=sec->get_data(); - const auto secStartAddr=sec->get_address(); - const auto range_len=range.second-range.first; - const auto the_code=(const uint8_t*)(data+(range.first-secStartAddr)); - - auto insn=(cs_insn *)nullptr; - - const auto count = cs_disasm(cshandle, the_code, range_len, range.first, 0, &insn); - if (count > 0) - { - for (auto j = 0U; j < count; j++) - { - outfile<<hex<<"\t"<<insn[j].address<<"\t"<<dec<<insn[j].size<<"\tINSTR BELONGTO\t"<<hex<<startAddr<< "\t; "<<insn[j].mnemonic << " " << insn[j].op_str<<endl; - } + case mtX86_64: + handle_x86_pltSec(); + break; + default: + assert(0); + }; + } - cs_free(insn, count); - } - else + void doBelongTos(const Range_t &range, const Address_t startAddr) + { + const auto sec = exeio.sections.findByAddress(range.first); + assert(sec); + const auto secEnd = exeio.sections.findByAddress(range.second - 1); + assert(sec == secEnd); // same section. + const auto data = sec->get_data(); + const auto secStartAddr = sec->get_address(); + const auto range_len = range.second - range.first; + const auto the_code = (const uint8_t *)(data + (range.first - secStartAddr)); + + auto insn = (cs_insn *)nullptr; + + const auto count = cs_disasm(cshandle, the_code, range_len, range.first, 0, &insn); + if (count > 0) + { + for (auto j = 0U; j < count; j++) { - cerr<<"ERROR: Failed to disassemble code at "<<range.first<<"-"<<range.second<<endl; - exit(1); + outfile << hex << "\t" << insn[j].address << "\t" << dec << insn[j].size << "\tINSTR BELONGTO\t" << hex << startAddr << "\t; " << insn[j].mnemonic << " " << insn[j].op_str << endl; } - + cs_free(insn, count); } - - void doBelongTos(const RangeSet_t &scc) + else { - const auto min=*scc.begin(); - const auto startAddr=min.first; - - for(auto range : scc) - doBelongTos(range,startAddr); - + cerr << "ERROR: Failed to disassemble code at " << range.first << "-" << range.second << endl; + exit(1); } + } - void writeAnnotations() - { - cout<<"The functions are:"<<endl; - auto i=0; - for(const auto &scc : sccs) - { - const auto min=*scc.begin(); - const auto max=*prev(scc.end()); - const auto size=max.second-min.first; - - cout<<"Function "<<dec<<i++<<" (" <<funcNames[scc] << ") is "<<hex<<min.first<<" "<<dec<<max.second-min.first<<endl; - const auto usefp=getUseFp(scc); - - outfile<<hex<<"\t"<<min.first<<"\t"<<dec<<size<<"\tFUNC GLOBAL\t"<<funcNames[scc]<<" "<< usefp << endl; - doBelongTos(scc); - } - if(getenv("SELF_VALIDATE")) - assert(sccs.size()>=0); - } + void doBelongTos(const RangeSet_t &scc) + { + const auto min = *scc.begin(); + const auto startAddr = min.first; - string getUseFp(const RangeSet_t scc) + for (auto range : scc) + doBelongTos(range, startAddr); + } + + void writeAnnotations() + { + cout << "The functions are:" << endl; + auto i = 0; + for (const auto &scc : sccs) { - assert(scc.begin()!=scc.end()); - const auto startAddr=scc.begin()->first; - const auto fde=ehp->findFDE(startAddr); - if(!fde) return "NOFP"; - const auto &ehprogram=fde->getProgram(); - const auto ehprogramInstructions=ehprogram.getInstructions(); - - const auto def_cfa_rbp_it = find_if(ALLOF(*ehprogramInstructions), [&](const EHProgramInstruction_t* insn) - { - assert(insn); - const auto &insnBytes=insn->getBytes(); - // 0xd, 0x5 is "def_cfa_register ebp" - // 0xd, 0x6 is "def_cfa_register rbp" - const auto reg=file_class==ELF64 ? (uint8_t)0x6 : (uint8_t)0x5; - return insnBytes==EHProgramInstructionByteVector_t({(uint8_t)0xd, reg }); - }); - return def_cfa_rbp_it == ehprogramInstructions->end() ? "NOFP" : "USEFP"; - } - + const auto min = *scc.begin(); + const auto max = *prev(scc.end()); + const auto size = max.second - min.first; + cout << "Function " << dec << i++ << " (" << funcNames[scc] << ") is " << hex << min.first << " " << dec << max.second - min.first << endl; + const auto usefp = getUseFp(scc); + outfile << hex << "\t" << min.first << "\t" << dec << size << "\tFUNC GLOBAL\t" << funcNames[scc] << " " << usefp << endl; + doBelongTos(scc); + } + if (getenv("SELF_VALIDATE")) + assert(sccs.size() >= 0); + } + string getUseFp(const RangeSet_t scc) + { + assert(scc.begin() != scc.end()); + const auto startAddr = scc.begin()->first; + const auto fde = ehp->findFDE(startAddr); + if (!fde) + return "NOFP"; + const auto &ehprogram = fde->getProgram(); + const auto ehprogramInstructions = ehprogram.getInstructions(); + + const auto def_cfa_rbp_it = find_if(ALLOF(*ehprogramInstructions), [&](const EHProgramInstruction_t *insn) { + assert(insn); + const auto &insnBytes = insn->getBytes(); + // 0xd, 0x5 is "def_cfa_register ebp" + // 0xd, 0x6 is "def_cfa_register rbp" + const auto reg = file_class == ELF64 ? (uint8_t)0x6 : (uint8_t)0x5; + return insnBytes == EHProgramInstructionByteVector_t({(uint8_t)0xd, reg}); + }); + return def_cfa_rbp_it == ehprogramInstructions->end() ? "NOFP" : "USEFP"; + } }; - -ostream& operator<<(ostream& os, const CreateFunctions_t::RangeSet_t& rs) +ostream &operator<<(ostream &os, const CreateFunctions_t::RangeSet_t &rs) { - for(const auto r : rs) + for (const auto r : rs) { - os<<"("<<r.first<<"-"<<r.second<<"), "; + os << "(" << r.first << "-" << r.second << "), "; } return os; } - - -int main(int argc, char* argv[]) +int main(int argc, char *argv[]) { - if(argc < 3) - { - usage(argc,argv); - exit(1); - } - // Parse some options for the transform - const static struct option long_options[] = { - {"verbose", no_argument, 0, 'v'}, - {"help", no_argument, 0, 'h'}, - {"usage", no_argument, 0, '?'}, - {0,0,0,0} - }; - auto short_opts="vh?"; - auto verbose=false; + if (argc < 3) + { + usage(argc, argv); + exit(1); + } + // Parse some options for the transform + const static struct option long_options[] = { + {"verbose", no_argument, 0, 'v'}, + {"help", no_argument, 0, 'h'}, + {"usage", no_argument, 0, '?'}, + {0, 0, 0, 0}}; + auto short_opts = "vh?"; + auto verbose = false; auto index = (int)0; - while(1) + while (1) { - int c = getopt_long(argc, argv,short_opts, long_options, &index); - if(c == -1) - break; - switch(c) + int c = getopt_long(argc, argv, short_opts, long_options, &index); + if (c == -1) + break; + switch (c) { - case 0: - break; - case 'v': - verbose=true; - break; - case '?': - case 'h': - usage(argc,argv); - exit(1); - break; - default: - break; - } - } - - - if(optind+2 > argc) + case 0: + break; + case 'v': + verbose = true; + break; + case '?': + case 'h': + usage(argc, argv); + exit(1); + break; + default: + break; + } + } + + if (optind + 2 > argc) { - usage(argc,argv); + usage(argc, argv); exit(1); } - auto input_pgm=string(argv[optind]); - auto output_annot=string(argv[optind+1]); - for(auto i=optind+2 ; i < argc; i++) + auto input_pgm = string(argv[optind]); + auto output_annot = string(argv[optind + 1]); + for (auto i = optind + 2; i < argc; i++) { - ofstream out(argv[i]); // touch file - if(!out.is_open()) + ofstream out(argv[i]); // touch file + if (!out.is_open()) { - cerr<<"Cannot touch file "<<argv[i]<<endl; + cerr << "Cannot touch file " << argv[i] << endl; exit(1); } - } try { - CreateFunctions_t create_funcs(input_pgm,output_annot,verbose); + CreateFunctions_t create_funcs(input_pgm, output_annot, verbose); create_funcs.calculate(); create_funcs.writeAnnotations(); } - catch(const exception& e) + catch (const exception &e) { cout << "Cannot run rida on input file: " << input_pgm << endl; cout << e.what() << endl;