Newer
Older
#include <assert.h>
#include <set>
#include <algorithm>
#include <getopt.h>
#include <ehp.hpp>
#include <exeio.h>
#include <string>
#include "capstone/capstone.h"
#include <fstream>
#include <elf.h>
using namespace std;
using namespace EHP;
using namespace EXEIO;
#define ALLOF(a) begin(a),end(a)
void usage(int argc, char* argv[])
{
cout<<"Usage: "<<argv[0]<<" input.exe output.annot>"<<endl;
exit(1);
}
class CreateFunctions_t
{
private:
unique_ptr<const EHFrameParser_t> ehp;
using Address_t = uint64_t;
class Range_t : public pair<Address_t,Address_t>
{
public:
Range_t(const Address_t &a, const Address_t &b) : pair<Address_t,Address_t>(a,b) { }
bool contains(const Address_t &c) const { return first <= c && c<second; }
};
using RangeSet_t = set<Range_t>;
set < RangeSet_t > sccs;
map<RangeSet_t,string> funcNames;
bool verbose;
exeio_t exeio;
csh cshandle;
ofstream outfile;
friend ostream& operator<<(ostream& os, const CreateFunctions_t::RangeSet_t& rs);
public:
CreateFunctions_t(const string &input_pgm, const string &output_annot, const bool p_verbose)
:
verbose(p_verbose),
exeio(input_pgm),
file_class(exeio.get_class()),
machine_type(exeio.getMachineType())
{
outfile.open(output_annot.c_str(), ofstream::out);
if(!outfile.is_open())
{
cerr<<"Cannot open "<<output_annot<<endl;
exit(1);
}
ehp = EHFrameParser_t::factory(input_pgm);
if(verbose)
ehp->print();
if(file_class!=ELF64 && file_class != ELF32)
{
cerr<<"Rida can only process ELF files."<<endl;
exit(1);
}
const auto my_cs_mode =
machine_type == mtArm32 ? cs_mode(CS_MODE_LITTLE_ENDIAN) :
machine_type == mtAarch64 ? cs_mode(CS_MODE_LITTLE_ENDIAN) :
machine_type == mtMips32 ? cs_mode(CS_MODE_MIPS32 | CS_MODE_BIG_ENDIAN) :
file_class == ELF64 ? cs_mode(CS_MODE_64) :
file_class == ELF32 ? cs_mode(CS_MODE_32) :
throw std::runtime_error("Cannot handle ELF class");
machine_type == mtX86_64 ? CS_ARCH_X86 :
machine_type == mtI386 ? CS_ARCH_X86 :
machine_type == mtArm32 ? CS_ARCH_ARM :
throw std::runtime_error("Cannot handle architecture");
if (cs_open(my_cs_arch, my_cs_mode , &cshandle) != CS_ERR_OK)
{
cerr<<"Cannot initialize capstone"<<endl;
exit(1);
}
}
virtual ~CreateFunctions_t()
{
cs_close(&cshandle);
}
void calculate()
{
ehframeToSccs();
addSectionToSccs(".init");
addSectionToSccs(".fini");
if(file_class==ELF64)
{
class Extracter64
{
public:
Elf64_Xword elf_r_sym (Elf64_Xword a) { return ELF64_R_SYM (a); }
Elf64_Xword elf_r_type(Elf64_Xword a) { return ELF64_R_TYPE(a); }
unsigned char elf_st_bind(unsigned char a) { return ELF64_ST_BIND(a); }
unsigned char elf_st_type(unsigned char a) { return ELF64_ST_TYPE(a); }
};
pltSplit<Elf64_Sym, Elf64_Rela, Elf64_Rel, Extracter64>(".plt", ".plt.got");
nameFunctions<Elf64_Sym, Extracter64>();
}
else
{
class Extracter32
{
public:
Elf32_Word elf_r_sym (Elf32_Word a) { return ELF32_R_SYM (a); }
Elf32_Word elf_r_type(Elf32_Word a) { return ELF32_R_TYPE(a); }
unsigned char elf_st_bind(unsigned char a) { return ELF32_ST_BIND(a); }
unsigned char elf_st_type(unsigned char a) { return ELF32_ST_TYPE(a); }
};
pltSplit<Elf32_Sym, Elf32_Rela, Elf32_Rel, Extracter32>(".plt", ".plt.got");
nameFunctions<Elf32_Sym, Extracter32>();
}
template<class T_Sym, class T_Extracter>
parseSyms<T_Sym, T_Extracter>(".dynsym", ".dynstr");
parseSyms<T_Sym, T_Extracter>(".symtab", ".strtab");
auto namedFunctions=0U;
auto unnamedFunctions=0U;
auto functions=0U;
// set default names
for(const auto &func: sccs)
{
assert(func.begin() != func.end());
const auto first_range=*(func.begin());
const auto startAddr=first_range.first;
std::stringstream ss;
ss << "sub_" << hex << startAddr;
const auto name = ss.str();
if(funcNames[func]=="") // destructive test OK, next line sets if empty.

Jason Hiser
committed
{
unnamedFunctions++;
cout<<"#ATTRIBUTE functions="<<dec<<functions<<endl;
cout<<"#ATTRIBUTE named_functions="<<dec<<namedFunctions<<endl;
cout<<"#ATTRIBUTE uunamed_functions="<<dec<<unnamedFunctions<<endl;
template<class T_Sym, class T_Extracter>
void parseSyms(const string& secName, const string & stringSecName)
{
const auto sec=exeio.sections[secName];
if(!sec) return; // err check
const auto stringSec=exeio.sections[stringSecName];
if(!stringSec) return; // err check
const auto data=sec->get_data();
const auto stringData=stringSec->get_data();
for(auto i=0U; i+sizeof(T_Sym) <= (size_t)sec->get_size(); i+=sizeof(T_Sym))
{
const auto sym=reinterpret_cast<const T_Sym *>(data+i);
const auto value=sym->st_value;
if(value==0)
continue;
// works for both ELF64 and ELF32, macros defined the same.
const auto type=T_Extracter().elf_st_type(sym->st_info);
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
if(type!=STT_FUNC)
continue;
// functions with non-zero address at this point.
const auto name_offset=sym->st_name;
// sanity check string length
if(name_offset < 0U || name_offset > (size_t)stringSec->get_size())
continue;
// get the name
const auto name=string(stringData+name_offset);
// find a function
auto func_it=find_if(ALLOF(sccs), [&](const RangeSet_t& s)
{
return s.begin() -> first == value;
});
if(func_it!=sccs.end())
{
cout<<"Setting function at "<<hex<<value<<" to name "<<name<<endl;
funcNames[*func_it]=name;
}
}
}
void ehframeToSccs()
{
const auto fdes=ehp->getFDEs();
for(const auto fde : *fdes)
//sccs.insert({ RangeSet_t({fde->getStartAddress(), fde->getEndAddress()})});
sccs.insert(RangeSet_t({Range_t(fde->getStartAddress(),fde->getEndAddress())}));
cout<<hex;
for(const auto fde : *fdes)
{
if(verbose)
cout<<"Found FDE at : " << fde->getStartAddress() << "-"<<fde->getEndAddress()<<endl;
auto pair=Range_t(fde->getStartAddress(), fde->getEndAddress());
const auto lsda=fde->getLSDA();
assert(lsda);
const auto callsites=lsda->getCallSites();
assert(callsites);
for(const auto cs : *callsites)
{
if(verbose)
cout<<"\tCall site (0x"<<cs->getCallSiteAddress()<<"-"<<cs->getCallSiteEndAddress()
<<") with landing pad=0x"<<cs->getLandingPadAddress()<<endl;
if(cs->getLandingPadAddress()==0x0)
continue;
auto set1_it=find_if(ALLOF(sccs), [&](const RangeSet_t& s) { return s.find(pair) != s.end(); } );
assert(set1_it!=sccs.end());
auto set2_it=find_if(ALLOF(sccs), [&](const RangeSet_t& s)
{
return find_if(ALLOF(s), [&](const Range_t& r) { return r.contains(cs->getLandingPadAddress()); }) != s.end();
});
assert(set2_it!=sccs.end());
auto set1=*set1_it;
auto set2=*set2_it;
if(set1!=set2)
{
sccs.erase(set1);
sccs.erase(set2);
auto set3=RangeSet_t();
if(verbose)
cout<<"\tMerging: set1="<< hex<< set1 << " and set2="<<set2<<dec<<endl;
set_union(ALLOF(set1), ALLOF(set2), inserter(set3, set3.begin()));
sccs.insert(set3);
}
}
}
}
void addSectionToSccs(const string &sec_name)
{
const auto sec=exeio.sections[sec_name];
const auto range=Range_t(sec->get_address(), sec->get_address()+sec->get_size());
const auto ranges=RangeSet_t({range});
sccs.insert(ranges);
}
template<class T_Sym, class T_Rela, class T_Rel, class T_Extracter>
void pltSplit(const string &pltSecName, const string &endSecName)
{
const auto dynsymSec=exeio.sections[".dynsym"];
const auto dynstrSec=exeio.sections[".dynstr"];

Jason Hiser
committed
const auto relapltSec=exeio.sections[".rela.plt"];
const auto relpltSec=exeio.sections[".rel.plt"];
const auto relSec=relapltSec ? relapltSec : relpltSec;
const auto relSecEntrySize=relapltSec ? sizeof(T_Rela) : sizeof(T_Rel);
const auto addRange=[&](const Address_t s, size_t len)
{
if(verbose)
cout<<"Adding PLT function "<<s<<" "<<len<<endl;
sccs.insert(RangeSet_t({Range_t({s,s+len})}));
};

Jason Hiser
committed
const auto addName=[&](const Address_t addr, uint64_t symIndex)
{
if(!dynsymSec) return;
if(!dynstrSec) return;

Jason Hiser
committed
if(!relSec) return;
// get the data out of the plt section.
const auto relData=relSec->get_data();
if(symIndex*relSecEntrySize >= (size_t)relSec->get_size()) return;
const auto relDataAsSymPtr=reinterpret_cast<const T_Rel *>(relData + symIndex*relSecEntrySize);
const auto &relEntry=*relDataAsSymPtr;
// calculate index into dynsym, section.
const auto dynsymIndex=T_Extracter().elf_r_sym(relEntry.r_info);
const auto dynsymData=dynsymSec->get_data();
const auto dynstrData=dynstrSec->get_data();

Jason Hiser
committed
cout<<dec<<"At entry "<<symIndex<<", reloc entry has dynsym index "<<dynsymIndex<<endl;
// the index into the .dynsym section for the relocation.
const auto dynsymDataAsSymPtr=reinterpret_cast<const T_Sym *>(dynsymData);
if(dynsymIndex*sizeof(T_Sym) >= (size_t)dynsymSec->get_size()) return;
// get a reference to the dynsym entry.
const auto &dynsymEntry=dynsymDataAsSymPtr[dynsymIndex];
// extra where in the string table the name is.
const auto name_offset=dynsymEntry.st_name;
// sanity check string length
if(name_offset < 0U || name_offset > (size_t)dynstrSec->get_size())
return;

Jason Hiser
committed
const auto applyName=[&](const string& part, const Address_t myAddr)
{
// get the name
const auto name=string(dynstrData+name_offset)+part+"@plt";
// find a function
auto func_it=find_if(ALLOF(sccs), [&](const RangeSet_t& s)
{
return s.begin() -> first == myAddr;
});
if(func_it!=sccs.end())
{
cout<<"Setting function at "<<hex<<myAddr<<" to name "<<name<<endl;
funcNames[*func_it]=name;
}
};
applyName("part1", addr);
applyName("part2", addr+6);
};
const auto pltSec=exeio.sections[pltSecName];

Jason Hiser
committed
if(pltSec==NULL) return;
const auto startAddr=pltSec->get_address();
const auto endAddr=pltSec->get_address()+pltSec->get_size();
if(verbose)
cout<<"Found plt function range is "<<hex<<startAddr<<"-"<<endAddr<<endl;
const auto pltRange_it=find_if(ALLOF(sccs), [&](const RangeSet_t& s)
{
return find_if(ALLOF(s), [&](const Range_t& r) { return r.contains(startAddr); }) != s.end();
});
// erase startAddr if found.
if(pltRange_it!=sccs.end())
sccs.erase(pltRange_it); // invalidates all iterators

Jason Hiser
committed
auto dynsymEntryIndex=0;
const auto plt_skip=16;
const auto plt_header_size=12;
const auto plt_entry_size=16;
const auto plt_entry_size_first_part=6;
addRange(startAddr,plt_header_size);
for(auto i=startAddr+plt_skip; i<endAddr; i+=plt_skip)
{
addRange(i,plt_entry_size_first_part);
addRange(i+6,plt_entry_size-plt_entry_size_first_part);
addName(i,dynsymEntryIndex++);
}
};

Jason Hiser
committed
const auto handle_arm64_plt=[&]()
{
const auto plt_entry_size=16;
const auto plt_header_size=8*4;
addRange(startAddr,plt_header_size);
for(auto i=startAddr+plt_header_size; i<endAddr; i+=plt_entry_size)
{
addRange(i,plt_entry_size);
addName(i,dynsymEntryIndex++);
}
};

Jason Hiser
committed
const auto handle_arm32_plt=[&]()
{
const auto plt_entry_size=3*4; // 3 instructions
const auto plt_header_size=5*4; // 5 instructions
addRange(startAddr,plt_header_size);
for(auto i=startAddr+plt_header_size; i<endAddr; i+=plt_entry_size)
{
addRange(i,plt_entry_size);
addName(i,dynsymEntryIndex++);
}
};
switch(machine_type)
{
case mtX86_64:
case mtI386:
handle_x86_plt();
break;
case mtAarch64:

Jason Hiser
committed
handle_arm64_plt();
break;
case mtArm32:
handle_arm32_plt();

Jason Hiser
committed
// deal with gotPlt Section.

Jason Hiser
committed
if(gotPltSec==NULL)
return;
// both 32- and 64-bit, entries are 6 bytes, with 2 bytes of padding.

Jason Hiser
committed
const auto gotPltEntrySize=8;
const auto gotPltRangeSize=6;
const auto gotPltStartAddr=gotPltSec->get_address();
const auto gotPltRange_it=find_if(ALLOF(sccs), [&](const RangeSet_t& s)
{
return find_if(ALLOF(s), [&](const Range_t& r) { return r.contains(gotPltStartAddr); }) != s.end();
});
// erase startAddr if found.
if(gotPltRange_it!=sccs.end())
sccs.erase(gotPltRange_it); // invalidates all iterators
auto gotpltEntries=0U;

Jason Hiser
committed
for(auto i=0U; i + gotPltRangeSize < (size_t)gotPltSec->get_size(); i+=gotPltEntrySize)
{
addRange(gotPltStartAddr+i,gotPltRangeSize);

Jason Hiser
committed
}
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
}
void doBelongTos(const Range_t &range, const Address_t startAddr)
{
const auto sec=exeio.sections.findByAddress(range.first);
assert(sec);
const auto secEnd=exeio.sections.findByAddress(range.second-1);
assert(sec==secEnd); // same section.
const auto data=sec->get_data();
const auto secStartAddr=sec->get_address();
const auto range_len=range.second-range.first;
const auto the_code=(const uint8_t*)(data+(range.first-secStartAddr));
auto insn=(cs_insn *)nullptr;
const auto count = cs_disasm(cshandle, the_code, range_len, range.first, 0, &insn);
if (count > 0)
{
for (auto j = 0U; j < count; j++)
{
outfile<<hex<<"\t"<<insn[j].address<<"\t"<<dec<<insn[j].size<<"\tINSTR BELONGTO\t"<<hex<<startAddr<< "\t; "<<insn[j].mnemonic << " " << insn[j].op_str<<endl;
}
cs_free(insn, count);
}
else
{
cerr<<"ERROR: Failed to disassemble code at "<<range.first<<"-"<<range.second<<endl;
exit(1);
}
}
void doBelongTos(const RangeSet_t &scc)
{
const auto min=*scc.begin();
const auto startAddr=min.first;
for(auto range : scc)
doBelongTos(range,startAddr);
}
void writeAnnotations()
{
cout<<"The functions are:"<<endl;
auto i=0;
for(const auto &scc : sccs)
{
const auto min=*scc.begin();
const auto max=*prev(scc.end());
const auto size=max.second-min.first;
cout<<"Function "<<dec<<i++<<" (" <<funcNames[scc] << ") is "<<hex<<min.first<<" "<<dec<<max.second-min.first<<endl;
outfile<<hex<<"\t"<<min.first<<"\t"<<dec<<size<<"\tFUNC GLOBAL\t"<<funcNames[scc]<<" "<< usefp << endl;
string getUseFp(const RangeSet_t scc)
{
assert(scc.begin()!=scc.end());
const auto startAddr=scc.begin()->first;
const auto fde=ehp->findFDE(startAddr);
if(!fde) return "NOFP";
const auto &ehprogram=fde->getProgram();
const auto ehprogramInstructions=ehprogram.getInstructions();
const auto def_cfa_rbp_it = find_if(ALLOF(*ehprogramInstructions), [&](const EHProgramInstruction_t* insn)
{
assert(insn);
const auto &insnBytes=insn->getBytes();
const auto reg=file_class==ELF64 ? (uint8_t)0x6 : (uint8_t)0x5;
return insnBytes==EHProgramInstructionByteVector_t({(uint8_t)0xd, reg });
});
return def_cfa_rbp_it == ehprogramInstructions->end() ? "NOFP" : "USEFP";
}
ostream& operator<<(ostream& os, const CreateFunctions_t::RangeSet_t& rs)
{
for(const auto r : rs)
{
os<<"("<<r.first<<"-"<<r.second<<"), ";
}
return os;
}
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
int main(int argc, char* argv[])
{
if(argc < 3)
{
usage(argc,argv);
exit(1);
}
// Parse some options for the transform
const static struct option long_options[] = {
{"verbose", no_argument, 0, 'v'},
{"help", no_argument, 0, 'h'},
{"usage", no_argument, 0, '?'},
{0,0,0,0}
};
auto short_opts="vh?";
auto verbose=false;
auto index = (int)0;
while(1)
{
int c = getopt_long(argc, argv,short_opts, long_options, &index);
if(c == -1)
break;
switch(c)
{
case 0:
break;
case 'v':
verbose=true;
break;
case '?':
case 'h':
usage(argc,argv);
exit(1);
break;
default:
break;
}
}
if(optind+2 > argc)
{
usage(argc,argv);
exit(1);
}
auto input_pgm=string(argv[optind]);
auto output_annot=string(argv[optind+1]);
for(auto i=optind+2 ; i < argc; i++)
{
ofstream out(argv[i]); // touch file
if(!out.is_open())
{
cerr<<"Cannot touch file "<<argv[i]<<endl;
exit(1);
}
}
try
{
CreateFunctions_t create_funcs(input_pgm,output_annot,verbose);
create_funcs.calculate();
create_funcs.writeAnnotations();
}
catch(const exception& e)
{
cout << "Cannot run rida on input file: " << input_pgm << endl;
cout << e.what() << endl;
return 2;
}