diff --git a/libEXEIO/include/exeio.h b/libEXEIO/include/exeio.h index 76e09882115bdf102ce4852f14ff52d6fade3293..1380e42ffb7c18b7d2cac44b13631b0ade75f171 100644 --- a/libEXEIO/include/exeio.h +++ b/libEXEIO/include/exeio.h @@ -44,7 +44,7 @@ namespace EXEIO virtual ~exeio_backend_t() { } virtual void dump_header(std::ostream& stream) =0; virtual void dump_section_headers(std::ostream& stream) =0; - virtual void load(exeio_t* main, char* filename) =0; + virtual void load(exeio_t* main, const char* filename) =0; virtual execlass_t get_class() =0; virtual virtual_offset_t get_entry() =0; virtual void* get_elfio() { return NULL; } @@ -57,6 +57,22 @@ namespace EXEIO { public: exeio_section_t* operator[](int i) { return the_sections[i]; } + exeio_section_t* operator[](const std::string& name) + { + for(auto s: the_sections) + if(s->get_name()==name) + return s; + return NULL; + } + exeio_section_t* findByAddress(const uint64_t addr) + { + for(auto s: the_sections) + { + if(s->get_address() <= addr && addr < (s->get_address()+s->get_size())) + return s; + } + return NULL; + } int size() const { return (int)the_sections.size(); } void add_section(exeio_section_t* sec) @@ -77,13 +93,14 @@ namespace EXEIO public: // constructors exeio_t() { Init(); } - exeio_t(char* filename) { Init(); load(filename); } + exeio_t(const char* filename) { Init(); load(filename); } + exeio_t(const std::string& filename) { Init(); load(filename.c_str()); } virtual ~exeio_t() { delete backend; } - virtual void load(std::string filename) { load((char*)filename.c_str()); } + virtual void load(const std::string filename) { load((char*)filename.c_str()); } // load the file - virtual void load(char* fn); + virtual void load(const char* fn); // trying to do minimal rewriting of code that uses // ELFIO namespace. diff --git a/libEXEIO/include/exeio_elf.h b/libEXEIO/include/exeio_elf.h index 91f2a39c2a3f684f7e6015177711257cd71c57f1..0b3dd175acb3c8b9cd5dd6889868e46412ecc7ef 100644 --- a/libEXEIO/include/exeio_elf.h +++ b/libEXEIO/include/exeio_elf.h @@ -59,7 +59,7 @@ namespace EXEIO // remove the elfio class. delete e; } - void load(exeio* the_main, char* filename) + void load(exeio* the_main, const char* filename) { main=the_main; e=new ELFIO::elfio; diff --git a/libEXEIO/include/exeio_pe.h b/libEXEIO/include/exeio_pe.h index 614f2df0102951cd5019bd688cd4af951a61dcb1..78f78d619cc488de5f2976534e89dfba06ece386 100644 --- a/libEXEIO/include/exeio_pe.h +++ b/libEXEIO/include/exeio_pe.h @@ -69,7 +69,7 @@ namespace EXEIO // remove the pe_bliss class. delete e; } - void load(exeio* the_main, char* filename) + void load(exeio* the_main, const char* filename) { main=the_main; diff --git a/libEXEIO/src/SConscript b/libEXEIO/src/SConscript index 824d768770c5ed792dedb65dbf6689f511e16436..e12e98d2a020018d830cf6c1331304c6332341d9 100644 --- a/libEXEIO/src/SConscript +++ b/libEXEIO/src/SConscript @@ -17,7 +17,9 @@ cpppath=''' $SECURITY_TRANSFORMS_HOME/pebliss/trunk/pe_lib/ ''' + myenv=myenv.Clone(CPPPATH=Split(cpppath)) +myenv.Append(CXXFLAGS=" -std=c++11 -Wall -Werror ") lib=myenv.Library(libname, Split(files)) install=myenv.Install("$SECURITY_TRANSFORMS_HOME/lib/", lib) diff --git a/libEXEIO/src/exeio_src.cpp b/libEXEIO/src/exeio_src.cpp index 94f6cb5db02cae4fd1ad3b08f98b5e876f8b348b..58ec9f58b794f6402db38942f3d6c0bc6cde3ac2 100644 --- a/libEXEIO/src/exeio_src.cpp +++ b/libEXEIO/src/exeio_src.cpp @@ -14,7 +14,7 @@ using namespace std; using namespace pe_bliss; #endif -void exeio::load(char* filename) +void exeio::load(const char* filename) { ifstream instream(filename); diff --git a/libEXEIO/test/SConscript b/libEXEIO/test/SConscript index 2b47d858e13b0928b350027440945f8928537a39..3982cf59c158574c0fe45309c02730d326d285b5 100644 --- a/libEXEIO/test/SConscript +++ b/libEXEIO/test/SConscript @@ -30,6 +30,7 @@ if "CYGWIN" in sysname: libs = libs + " iconv" myenv=myenv.Clone(CPPPATH=Split(cpppath), LIBS=Split(libs), LIBPATH=Split(libpath)) +myenv.Append(CXXFLAGS=" -Wall -Werror -std=c++11 ") pgm=myenv.Program(libname, Split(files)) install=myenv.Install("$SECURITY_TRANSFORMS_HOME/bin/", pgm) diff --git a/libMEDSannotation/SConscript b/libMEDSannotation/SConscript index 4c393970d2f32a2b3515dbe6b5eb407ee3f8aff3..62c64921b7dd2cb456df308c97e7a1b93c6b4434 100644 --- a/libMEDSannotation/SConscript +++ b/libMEDSannotation/SConscript @@ -26,7 +26,7 @@ cpppath=''' #CFLAGS="-fPIC " myenv=myenv.Clone(CPPPATH=Split(cpppath)) -myenv.Append(CXXFLAGS=" -std=c++11 ") +myenv.Append(CXXFLAGS=" -std=c++11 -Wall -Werror ") lib=myenv.Library(lib, Split(files)) install=myenv.Install("$SECURITY_TRANSFORMS_HOME/lib/", lib) diff --git a/libehp b/libehp index 482433201f9c480cfdabf4a374feecd47becd324..cc0a5d8ede2ba1cc307ecfd22e19ad4dd8806c6d 160000 --- a/libehp +++ b/libehp @@ -1 +1 @@ -Subproject commit 482433201f9c480cfdabf4a374feecd47becd324 +Subproject commit cc0a5d8ede2ba1cc307ecfd22e19ad4dd8806c6d diff --git a/tools/SConscript b/tools/SConscript index 0b57597713f5e340aae37daec7ecc729b385f1c9..31dfd20e9e71bad448a6d430e0fbdf39561a041f 100644 --- a/tools/SConscript +++ b/tools/SConscript @@ -13,6 +13,7 @@ dirs=''' spasm dump_map hook_start + rida ''' nobuild_dirs=''' diff --git a/tools/rida/SConscript b/tools/rida/SConscript new file mode 100644 index 0000000000000000000000000000000000000000..68e393c43bea056f94b6b613ee43700032cb6b6b --- /dev/null +++ b/tools/rida/SConscript @@ -0,0 +1,32 @@ +import os + + + +Import('env') +myenv=env.Clone() +myenv.Replace(SECURITY_TRANSFORMS_HOME=os.environ['SECURITY_TRANSFORMS_HOME']) + +cpppath=''' + $SECURITY_TRANSFORMS_HOME/include + $SECURITY_TRANSFORMS_HOME/libehp/include + $SECURITY_TRANSFORMS_HOME/libEXEIO/include + $SECURITY_TRANSFORMS_HOME/libcapstone/include + ''' + + +files=Glob( Dir('.').srcnode().abspath+"/*.cpp") + + +pgm="rida.exe" + +LIBPATH=Split(''' + $SECURITY_TRANSFORMS_HOME/lib + $SECURITY_TRANSFORMS_HOME/libcapstone + ''') +LIBS=Split("ehp EXEIO pebliss capstone") +myenv=myenv.Clone(CPPPATH=Split(cpppath)) +pgm=myenv.Program(pgm, files, LIBPATH=LIBPATH, LIBS=LIBS) +install=myenv.Install("$SECURITY_TRANSFORMS_HOME/plugins_install/", pgm) +Default(install) + +Return('install') diff --git a/tools/rida/SConstruct b/tools/rida/SConstruct new file mode 100644 index 0000000000000000000000000000000000000000..9c9d2a5d563079f60954e46d422025a7c18edb82 --- /dev/null +++ b/tools/rida/SConstruct @@ -0,0 +1,12 @@ + + + +env=Environment() + +env.Append(CFLAGS=" -g -Wall -Werror -std=c++11 -fmax-errors=2") +env.Append(CXXFLAGS=" -g -Wall -Werror -std=c++11 -fmax-errors=2") +env.Append(LINKFLAGS=" -g -Wall -Werror -std=c++11 -fmax-errors=2") + +Export('env') +install=SConscript("SConscript") +Return('install') diff --git a/tools/rida/rida.cpp b/tools/rida/rida.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4ed54b50f3807df26b8adc7da42e3626e8fbeac6 --- /dev/null +++ b/tools/rida/rida.cpp @@ -0,0 +1,421 @@ +#include <iostream> +#include <assert.h> +#include <set> +#include <algorithm> +#include <getopt.h> +#include <ehp.hpp> +#include <exeio.h> +#include <string> +#include "capstone/capstone.h" +#include <fstream> +#include <elfio/elfio.hpp> +#include <elf.h> + + +using namespace std; +using namespace EHP; +using namespace EXEIO; +using namespace ELFIO; + +#define ALLOF(a) begin(a),end(a) + +void usage(int argc, char* argv[]) +{ + cout<<"Usage: "<<argv[0]<<" input.exe output.annot>"<<endl; + exit(1); +} + + + +class CreateFunctions_t +{ + private: + shared_ptr<const EHFrameParser_t> ehp; + using Address_t = uint64_t; + class Range_t : public pair<Address_t,Address_t> + { + public: + Range_t(const Address_t &a, const Address_t &b) : pair<Address_t,Address_t>(a,b) { } + bool contains(const Address_t &c) const { return first <= c && c<=second; } + + }; + using RangeSet_t = set<Range_t>; + set < RangeSet_t > sccs; + map<RangeSet_t,string> funcNames; + bool verbose; + exeio_t exeio; + csh cshandle; + ofstream outfile; + public: + CreateFunctions_t(const string &input_pgm, const string &output_annot, const bool p_verbose) + : + verbose(p_verbose), + exeio(input_pgm), + cshandle() + { + outfile.open(output_annot.c_str(), ofstream::out); + if(!outfile.is_open()) + { + cerr<<"Cannot open "<<output_annot<<endl; + exit(1); + } + ehp = EHFrameParser_t::factory(input_pgm); + if(verbose) + ehp->print(); + + if (cs_open(CS_ARCH_X86, CS_MODE_64, &cshandle) != CS_ERR_OK) + { + cerr<<"Cannot initialize capstone"<<endl; + exit(1); + } + } + virtual ~CreateFunctions_t() + { + cs_close(&cshandle); + } + + + void calculate() + { + ehframeToSccs(); + addSectionToSccs(".init"); + addSectionToSccs(".fini"); + pltSplit<ELFIO::Elf64_Sym>(".plt", ".plt.got"); + // if exeio->elf class == 64-bit + nameFunctions<ELFIO::Elf64_Sym>(); + // else + // nameFunctions<Elf32_Rela, Elf32_Rel, Elf32_Sym>(); + + } + + template<class T_Sym> + void nameFunctions() + { + // set default names + for(const auto &func: sccs) + { + assert(func.begin() != func.end()); + const auto first_range=*(func.begin()); + const auto startAddr=first_range.first; + const auto name=string()+"sub_"+to_string(startAddr); + if(funcNames[func]=="") // destructive test OK, next line sets if empty. + funcNames[func]=name; + } + + // do symbol names. + parseSyms<T_Sym>(".dynsym", ".dynstr"); + parseSyms<T_Sym>(".symtab", ".strtab"); + } + + template<class T_Sym> + void parseSyms(const string& secName, const string & stringSecName) + { + const auto sec=exeio.sections[secName]; + if(!sec) return; // err check + + const auto stringSec=exeio.sections[stringSecName]; + if(!stringSec) return; // err check + + const auto data=sec->get_data(); + const auto stringData=stringSec->get_data(); + + for(auto i=0U; i+sizeof(T_Sym) <= (size_t)sec->get_size(); i+=sizeof(T_Sym)) + { + const auto sym=reinterpret_cast<const T_Sym *>(data+i); + const auto value=sym->st_value; + if(value==0) + continue; + + // works for both ELF64 and ELF32, macros defined the same. + const auto type=ELF64_ST_TYPE(sym->st_info); + if(type!=STT_FUNC) + continue; + + + // functions with non-zero address at this point. + const auto name_offset=sym->st_name; + + // sanity check string length + if(name_offset < 0U || name_offset > (size_t)stringSec->get_size()) + continue; + + // get the name + const auto name=string(stringData+name_offset); + + + // find a function + auto func_it=find_if(ALLOF(sccs), [&](const RangeSet_t& s) + { + return s.begin() -> first == value; + }); + if(func_it!=sccs.end()) + { + cout<<"Setting function at "<<hex<<value<<" to name "<<name<<endl; + funcNames[*func_it]=name; + } + + } + } + + void ehframeToSccs() + { + const auto fdes=ehp->getFDEs(); + for(const auto fde : *fdes) + //sccs.insert({ RangeSet_t({fde->getStartAddress(), fde->getEndAddress()})}); + sccs.insert(RangeSet_t({Range_t(fde->getStartAddress(),fde->getEndAddress())})); + + cout<<hex; + for(const auto fde : *fdes) + { + if(verbose) + cout<<"Found FDE at : " << fde->getStartAddress() << "-"<<fde->getEndAddress()<<endl; + auto pair=Range_t(fde->getStartAddress(), fde->getEndAddress()); + const auto lsda=fde->getLSDA(); + assert(lsda); + const auto callsites=lsda->getCallSites(); + assert(callsites); + + for(const auto cs : *callsites) + { + if(verbose) + cout<<"\tCall site (0x"<<cs->getCallSiteAddress()<<"-"<<cs->getCallSiteEndAddress() + <<") with landing pad=0x"<<cs->getLandingPadAddress()<<endl; + auto set1_it=find_if(ALLOF(sccs), [&](const RangeSet_t& s) { return s.find(pair) != s.end(); } ); + assert(set1_it!=sccs.end()); + + auto set2_it=find_if(ALLOF(sccs), [&](const RangeSet_t& s) + { + return find_if(ALLOF(s), [&](const Range_t& r) { return r.contains(cs->getCallSiteAddress()); }) != s.end(); + }); + assert(set2_it!=sccs.end()); + auto set1=*set1_it; + auto set2=*set2_it; + sccs.erase(set1); + sccs.erase(set2); + auto set3=RangeSet_t(); + set_union(ALLOF(set1), ALLOF(set2), inserter(set3, set3.begin())); + sccs.insert(set3); + } + } + + + } + + void addSectionToSccs(const string &sec_name) + { + const auto sec=exeio.sections[sec_name]; + const auto range=Range_t(sec->get_address(), sec->get_address()+sec->get_size()); + const auto ranges=RangeSet_t({range}); + sccs.insert(ranges); + } + + template<class T_Sym> + void pltSplit(const string &pltSecName, const string &endSecName) + { + const auto dynsymSec=exeio.sections[".dynsym"]; + const auto dynstrSec=exeio.sections[".dynstr"]; + + const auto addRange=[&](const Address_t s, size_t len) + { + if(verbose) + cout<<"Adding PLT function "<<s<<" "<<len<<endl; + sccs.insert(RangeSet_t({Range_t({s,s+len})})); + }; + + const auto addName=[&](const Address_t addr, uint64_t dynsymIndex) + { + + if(!dynsymSec) return; + if(!dynstrSec) return; + + const auto dynsymData=dynsymSec->get_data(); + const auto dynstrData=dynstrSec->get_data(); + + // the index into the .dynsym section for the relocation. + const auto dynsymDataAsSymPtr=reinterpret_cast<const T_Sym *>(dynsymData); + if(dynsymIndex*sizeof(T_Sym) >= (size_t)dynsymSec->get_size()) return; + + // get a reference to the dynsym entry. + const auto &dynsymEntry=dynsymDataAsSymPtr[dynsymIndex]; + // extra where in the string table the name is. + const auto name_offset=dynsymEntry.st_name; + + // sanity check string length + if(name_offset < 0U || name_offset > (size_t)dynstrSec->get_size()) + return; + + // get the name + const auto name=string(dynstrData+name_offset)+"@plt"; + + // find a function + auto func_it=find_if(ALLOF(sccs), [&](const RangeSet_t& s) + { + return s.begin() -> first == addr; + }); + if(func_it!=sccs.end()) + { + cout<<"Setting function at "<<hex<<addr<<" to name "<<name<<endl; + funcNames[*func_it]=name; + } + }; + + const auto pltSec=exeio.sections[pltSecName]; + assert(pltSec!=NULL); + const auto startAddr=pltSec->get_address(); + const auto endAddr=pltSec->get_address()+pltSec->get_size(); + + if(verbose) + cout<<"Found plt function range is "<<hex<<startAddr<<"-"<<endAddr<<endl; + + auto pltRange_it=find_if(ALLOF(sccs), [&](const RangeSet_t& s) + { + return find_if(ALLOF(s), [&](const Range_t& r) { return r.contains(startAddr); }) != s.end(); + }); + assert(pltRange_it!=sccs.end()); + sccs.erase(pltRange_it); // invalidates all iterators + + const auto plt_skip=16; + const auto plt_header_size=12; + const auto plt_entry_size=16; + + addRange(startAddr,plt_header_size); + auto dynsymEntryIndex=1; + for(auto i=startAddr+plt_skip; i<endAddr; i+=plt_skip) + { + addRange(i,plt_entry_size); + addName(i,dynsymEntryIndex++); + } + + const auto gotPltSec=exeio.sections[endSecName]; + assert(gotPltSec!=NULL); + addRange(gotPltSec->get_address(),gotPltSec->get_size()); + + } + + void doBelongTos(const Range_t &range, const Address_t startAddr) + { + const auto sec=exeio.sections.findByAddress(range.first); + assert(sec); + const auto secEnd=exeio.sections.findByAddress(range.second-1); + assert(sec==secEnd); // same section. + const auto data=sec->get_data(); + const auto secStartAddr=sec->get_address(); + const auto range_len=range.second-range.first; + const auto the_code=(const uint8_t*)(data+(range.first-secStartAddr)); + + auto insn=(cs_insn *)nullptr; + + const auto count = cs_disasm(cshandle, the_code, range_len, range.first, 0, &insn); + if (count > 0) + { + for (auto j = 0U; j < count; j++) + { + outfile<<hex<<"\t"<<insn[j].address<<"\t"<<dec<<insn[j].size<<"\tINSTR BELONGTO\t"<<hex<<startAddr<< "\t; "<<insn[j].mnemonic << " " << insn[j].op_str<<endl; + } + + cs_free(insn, count); + } + else + { + cerr<<"ERROR: Failed to disassemble code at "<<range.first<<"-"<<range.second<<endl; + exit(1); + } + + + } + void doBelongTos(const RangeSet_t &scc) + { + const auto min=*scc.begin(); + const auto startAddr=min.first; + + for(auto range : scc) + doBelongTos(range,startAddr); + + } + + void writeAnnotations() + { + cout<<"The functions are:"<<endl; + auto i=0; + for(const auto &scc : sccs) + { + const auto min=*scc.begin(); + const auto max=*prev(scc.end()); + const auto size=max.second-min.first; + + cout<<"Function "<<dec<<i++<<" is "<<hex<<min.first<<" "<<dec<<max.second-min.first<<endl; + + outfile<<hex<<"\t"<<min.first<<"\t"<<dec<<size<<"\tFUNC GLOBAL\t"<<funcNames[scc]<<endl; + doBelongTos(scc); + } + } + + +}; + +int main(int argc, char* argv[]) +{ + + if(argc < 3) + { + usage(argc,argv); + exit(1); + } + // Parse some options for the transform + const static struct option long_options[] = { + {"verbose", no_argument, 0, 'v'}, + {"help", no_argument, 0, 'h'}, + {"usage", no_argument, 0, '?'}, + {0,0,0,0} + }; + auto short_opts="vh?"; + auto verbose=false; + auto index = (int)0; + while(1) + { + int c = getopt_long(argc, argv,short_opts, long_options, &index); + if(c == -1) + break; + switch(c) + { + case 0: + break; + case 'v': + verbose=true; + break; + case '?': + case 'h': + usage(argc,argv); + exit(1); + break; + default: + break; + } + } + + + if(optind+2 > argc) + { + usage(argc,argv); + exit(1); + } + + auto input_pgm=string(argv[optind]); + auto output_annot=string(argv[optind+1]); + for(auto i=optind+2 ; i < argc; i++) + { + ofstream out(argv[i]); // touch file + if(!out.is_open()) + { + cerr<<"Cannot touch file "<<argv[i]<<endl; + exit(1); + } + + } + + CreateFunctions_t create_funcs(input_pgm,output_annot,verbose); + create_funcs.calculate(); + create_funcs.writeAnnotations(); + + return 0; +}