Skip to content
Snippets Groups Projects
rida.cpp 17.7 KiB
Newer Older
Jason Hiser's avatar
Jason Hiser committed
#include <iostream>
#include <sstream>
Jason Hiser's avatar
Jason Hiser committed
#include <assert.h>
#include <set>
#include <algorithm>
#include <getopt.h>
#include <ehp.hpp>
#include <exeio.h>
#include <string>
#include "capstone/capstone.h"
#include <fstream>
#include <elf.h>
#include <functional>
Jason Hiser's avatar
Jason Hiser committed


using namespace std;
using namespace EHP;
using namespace EXEIO;

#define ALLOF(a) begin(a),end(a)

void usage(int argc, char* argv[])
{
	cout<<"Usage: "<<argv[0]<<" input.exe output.annot>"<<endl;
	exit(1);
}



class CreateFunctions_t
{
	private:
		unique_ptr<const EHFrameParser_t> ehp;
Jason Hiser's avatar
Jason Hiser committed
		using Address_t = uint64_t;
		class Range_t : public pair<Address_t,Address_t>
		{
			public:
				Range_t(const Address_t &a, const Address_t &b) : pair<Address_t,Address_t>(a,b) { } 
				bool contains(const Address_t &c) const { return first <= c && c<second; }
Jason Hiser's avatar
Jason Hiser committed
		};
		using RangeSet_t = set<Range_t>;
		set < RangeSet_t > sccs;
		map<RangeSet_t,string> funcNames;
		bool verbose;
		exeio_t exeio;
		csh cshandle;
		ofstream outfile;
		execlass_t file_class;
Jason Hiser's avatar
Jason Hiser committed
		MachineType_t machine_type;
		friend ostream& operator<<(ostream& os, const CreateFunctions_t::RangeSet_t& rs);
Jason Hiser's avatar
Jason Hiser committed
	public:
		CreateFunctions_t(const string &input_pgm, const string &output_annot, const bool p_verbose)
			: 
			verbose(p_verbose),
			exeio(input_pgm),
			cshandle(),
Jason Hiser's avatar
Jason Hiser committed
			file_class(exeio.get_class()),
			machine_type(exeio.getMachineType())
Jason Hiser's avatar
Jason Hiser committed
		{
			outfile.open(output_annot.c_str(), ofstream::out);
			if(!outfile.is_open())
			{
				cerr<<"Cannot open "<<output_annot<<endl;
				exit(1);
			}
			ehp = EHFrameParser_t::factory(input_pgm);
			if(verbose)
				ehp->print();

			if(file_class!=ELF64 && file_class != ELF32)
			{
				cerr<<"Rida can only process ELF files."<<endl;
				exit(1);
			}

			const auto my_cs_mode = 
				machine_type == mtArm32   ? cs_mode(CS_MODE_LITTLE_ENDIAN) :
				machine_type == mtAarch64 ? cs_mode(CS_MODE_LITTLE_ENDIAN) :
				machine_type == mtMips32  ? cs_mode(CS_MODE_MIPS32 | CS_MODE_BIG_ENDIAN) : 
				file_class   == ELF64     ? cs_mode(CS_MODE_64) : 
				file_class   == ELF32     ? cs_mode(CS_MODE_32) : 
				throw std::runtime_error("Cannot handle ELF class");
Jason Hiser's avatar
Jason Hiser committed

			const auto my_cs_arch = 
Jason Hiser's avatar
Jason Hiser committed
				machine_type == mtX86_64  ?  CS_ARCH_X86   : 
				machine_type == mtI386    ?  CS_ARCH_X86   :
				machine_type == mtArm32   ?  CS_ARCH_ARM   : 
Jason Hiser's avatar
Jason Hiser committed
				machine_type == mtAarch64 ?  CS_ARCH_ARM64 : 
				machine_type == mtMips32  ?  CS_ARCH_MIPS  : 
				throw std::runtime_error("Cannot handle architecture");
Jason Hiser's avatar
Jason Hiser committed

			if (cs_open(my_cs_arch, my_cs_mode , &cshandle) != CS_ERR_OK)
Jason Hiser's avatar
Jason Hiser committed
			{
				cerr<<"Cannot initialize capstone"<<endl;
				exit(1);
			}
		}
		virtual ~CreateFunctions_t()
		{
			cs_close(&cshandle);
		}


		void calculate()
		{
Jason Hiser's avatar
Jason Hiser committed
			ehframeToSccs();
			addSectionToSccs(".init");
			addSectionToSccs(".fini");
			{
				class Extracter64
				{
					public:
						Elf64_Xword  elf_r_sym (Elf64_Xword a) { return ELF64_R_SYM (a); }
						Elf64_Xword  elf_r_type(Elf64_Xword a) { return ELF64_R_TYPE(a); }
						unsigned char  elf_st_bind(unsigned char a) { return ELF64_ST_BIND(a); }
						unsigned char  elf_st_type(unsigned char a) { return ELF64_ST_TYPE(a); }
				};
				pltSplit<Elf64_Sym, Elf64_Rela, Elf64_Rel, Extracter64>(".plt", ".plt.got");
				nameFunctions<Elf64_Sym, Extracter64>();
			}
			else
			{
				class Extracter32
				{
					public:
						Elf32_Word  elf_r_sym (Elf32_Word a) { return ELF32_R_SYM (a); }
						Elf32_Word  elf_r_type(Elf32_Word a) { return ELF32_R_TYPE(a); }
						unsigned char  elf_st_bind(unsigned char a) { return ELF32_ST_BIND(a); }
						unsigned char  elf_st_type(unsigned char a) { return ELF32_ST_TYPE(a); }
				};
				pltSplit<Elf32_Sym, Elf32_Rela, Elf32_Rel, Extracter32>(".plt", ".plt.got");
				nameFunctions<Elf32_Sym, Extracter32>();
			}
		template<class T_Sym, class T_Extracter>
Jason Hiser's avatar
Jason Hiser committed
		void nameFunctions()
		{
Jason Hiser's avatar
Jason Hiser committed
			// do symbol names.
			parseSyms<T_Sym, T_Extracter>(".dynsym", ".dynstr");
			parseSyms<T_Sym, T_Extracter>(".symtab", ".strtab");
Jason Hiser's avatar
Jason Hiser committed

			auto namedFunctions=0U;
			auto unnamedFunctions=0U;
			auto functions=0U;

Jason Hiser's avatar
Jason Hiser committed
			// set default names 
			for(const auto &func: sccs)
			{
				assert(func.begin() != func.end());
				const auto first_range=*(func.begin());
				const auto startAddr=first_range.first;
				std::stringstream ss;
				ss << "sub_" << hex << startAddr;
				const auto name = ss.str();
Jason Hiser's avatar
Jason Hiser committed
				functions++;
				if(funcNames[func]=="")	// destructive test OK, next line sets if empty.
					funcNames[func]=name;
Jason Hiser's avatar
Jason Hiser committed
				}
				else
				{
					namedFunctions++;
				}
					
Jason Hiser's avatar
Jason Hiser committed
			}
Jason Hiser's avatar
Jason Hiser committed

			cout<<"#ATTRIBUTE functions="<<dec<<functions<<endl;
			cout<<"#ATTRIBUTE named_functions="<<dec<<namedFunctions<<endl;
			cout<<"#ATTRIBUTE uunamed_functions="<<dec<<unnamedFunctions<<endl;
		template<class T_Sym, class T_Extracter>
Jason Hiser's avatar
Jason Hiser committed
		void parseSyms(const string& secName, const string & stringSecName)
		{
			const auto sec=exeio.sections[secName];
			if(!sec) return;	// err check

			const auto stringSec=exeio.sections[stringSecName];
			if(!stringSec) return; // err check

			const auto data=sec->get_data();
			const auto stringData=stringSec->get_data();

			for(auto i=0U; i+sizeof(T_Sym) <= (size_t)sec->get_size(); i+=sizeof(T_Sym))
			{
				const auto sym=reinterpret_cast<const T_Sym *>(data+i);
				const auto value=sym->st_value;
				if(value==0) 
					continue;

				// works for both ELF64 and ELF32, macros defined the same.
				const auto type=T_Extracter().elf_st_type(sym->st_info);
Jason Hiser's avatar
Jason Hiser committed
				if(type!=STT_FUNC) 
					continue;


				// functions with non-zero address at this point.
				const auto name_offset=sym->st_name;
	
				// sanity check string length
				if(name_offset < 0U || name_offset > (size_t)stringSec->get_size())
					continue;

				// get the name
				const auto name=string(stringData+name_offset);
		

				// find a function 
				auto func_it=find_if(ALLOF(sccs), [&](const RangeSet_t& s) 
					{ 
						return s.begin() -> first == value;
					});
				if(func_it!=sccs.end())
				{
					cout<<"Setting function at "<<hex<<value<<" to name "<<name<<endl;
					funcNames[*func_it]=name;
				}

			}
		}

		void ehframeToSccs()
		{
			const auto fdes=ehp->getFDEs();
			for(const auto fde : *fdes) 
				//sccs.insert({ RangeSet_t({fde->getStartAddress(), fde->getEndAddress()})});
				sccs.insert(RangeSet_t({Range_t(fde->getStartAddress(),fde->getEndAddress())}));

			cout<<hex;
Anh Nguyen-Tuong's avatar
Anh Nguyen-Tuong committed
			if(getenv("SELF_VALIDATE"))
				assert(fdes->size()>=0);
Anh Nguyen-Tuong's avatar
Anh Nguyen-Tuong committed

Jason Hiser's avatar
Jason Hiser committed
			for(const auto fde : *fdes)
			{
				if(verbose)
					cout<<"Found FDE at : " << fde->getStartAddress() << "-"<<fde->getEndAddress()<<endl;
				auto pair=Range_t(fde->getStartAddress(), fde->getEndAddress());
				const auto lsda=fde->getLSDA();
				assert(lsda);
				const auto callsites=lsda->getCallSites();
				assert(callsites);

				for(const auto cs : *callsites)
				{
					if(verbose)
						cout<<"\tCall site (0x"<<cs->getCallSiteAddress()<<"-"<<cs->getCallSiteEndAddress()
						    <<") with landing pad=0x"<<cs->getLandingPadAddress()<<endl;
					if(cs->getLandingPadAddress()==0x0)
						continue;
Jason Hiser's avatar
Jason Hiser committed
					auto set1_it=find_if(ALLOF(sccs), [&](const RangeSet_t& s) { return s.find(pair) != s.end(); } );
					assert(set1_it!=sccs.end());

					auto set2_it=find_if(ALLOF(sccs), [&](const RangeSet_t& s) 
						{ 
							return find_if(ALLOF(s), [&](const Range_t& r) { return r.contains(cs->getLandingPadAddress()); }) != s.end(); 
Jason Hiser's avatar
Jason Hiser committed
						});
					assert(set2_it!=sccs.end());
					auto set1=*set1_it;
					auto set2=*set2_it;
					if(set1!=set2)
					{
						sccs.erase(set1);
						sccs.erase(set2);
						auto set3=RangeSet_t();
						if(verbose)
							cout<<"\tMerging: set1="<< hex<< set1 << " and set2="<<set2<<dec<<endl;
						set_union(ALLOF(set1), ALLOF(set2), inserter(set3, set3.begin()));
						sccs.insert(set3);
					}
Jason Hiser's avatar
Jason Hiser committed
				}
			}


		}

		void addSectionToSccs(const string &sec_name)
		{
			const auto sec=exeio.sections[sec_name];
			if(sec==nullptr)
				return;
Jason Hiser's avatar
Jason Hiser committed
			const auto range=Range_t(sec->get_address(), sec->get_address()+sec->get_size());
			const auto ranges=RangeSet_t({range});
			sccs.insert(ranges);
		}

		template<class T_Sym, class T_Rela, class T_Rel, class T_Extracter>
Jason Hiser's avatar
Jason Hiser committed
		void pltSplit(const string &pltSecName, const string &endSecName)
		{
			const auto dynsymSec=exeio.sections[".dynsym"];
			const auto dynstrSec=exeio.sections[".dynstr"];
			const auto relapltSec=exeio.sections[".rela.plt"];
			const auto relpltSec=exeio.sections[".rel.plt"];
			const auto relSec=relapltSec  ? relapltSec : relpltSec;
			const auto relSecEntrySize=relapltSec  ? sizeof(T_Rela) : sizeof(T_Rel);
			
Jason Hiser's avatar
Jason Hiser committed

			const auto addRange=[&](const Address_t s, size_t len)
			{
				if(verbose)
					cout<<"Adding PLT function "<<s<<" "<<len<<endl;
				sccs.insert(RangeSet_t({Range_t({s,s+len})}));
			};

			const auto addName=[&](const Address_t addr, uint64_t symIndex)
Jason Hiser's avatar
Jason Hiser committed
			{
				if(!dynsymSec) return;
				if(!dynstrSec) return;
				if(!relSec) return;
			
				// get the data out of the plt section.
				const auto relData=relSec->get_data();
				if(symIndex*relSecEntrySize >= (size_t)relSec->get_size()) return;
				const auto relDataAsSymPtr=reinterpret_cast<const T_Rel *>(relData + symIndex*relSecEntrySize);
				const auto &relEntry=*relDataAsSymPtr;

				// calculate index into dynsym, section.
				const auto dynsymIndex=T_Extracter().elf_r_sym(relEntry.r_info);
Jason Hiser's avatar
Jason Hiser committed
				const auto dynsymData=dynsymSec->get_data();
				const auto dynstrData=dynstrSec->get_data();

				cout<<dec<<"At entry "<<symIndex<<", reloc entry has dynsym index "<<dynsymIndex<<endl;

Jason Hiser's avatar
Jason Hiser committed
				// the index into the .dynsym section for the relocation.
				const auto dynsymDataAsSymPtr=reinterpret_cast<const T_Sym *>(dynsymData);
				if(dynsymIndex*sizeof(T_Sym) >= (size_t)dynsymSec->get_size()) return;

				// get a reference to the dynsym entry.
				const auto &dynsymEntry=dynsymDataAsSymPtr[dynsymIndex];
				// extra where in the string table the name is.
				const auto name_offset=dynsymEntry.st_name;
				
				// sanity check string length
				if(name_offset < 0U || name_offset > (size_t)dynstrSec->get_size())
					return;

				const auto applyName=[&](const string& part, const Address_t myAddr)
					{
						// get the name
						const auto name=string(dynstrData+name_offset)+part+"@plt";

						// find a function 
						auto func_it=find_if(ALLOF(sccs), [&](const RangeSet_t& s) 
							{ 
								return s.begin() -> first == myAddr;
							});
						if(func_it!=sccs.end())
						{
							cout<<"Setting function at "<<hex<<myAddr<<" to name "<<name<<endl;
							funcNames[*func_it]=name;
						}
					};

				applyName("part1", addr);
				applyName("part2", addr+6);
Jason Hiser's avatar
Jason Hiser committed
			};

			const auto pltSec=exeio.sections[pltSecName];
Jason Hiser's avatar
Jason Hiser committed
			const auto startAddr=pltSec->get_address();
			const auto endAddr=pltSec->get_address()+pltSec->get_size();

			if(verbose)
				cout<<"Found plt function range is "<<hex<<startAddr<<"-"<<endAddr<<endl;

Jason Hiser's avatar
Jason Hiser committed
			const auto pltRange_it=find_if(ALLOF(sccs), [&](const RangeSet_t& s) 
Jason Hiser's avatar
Jason Hiser committed
				{ 
					return find_if(ALLOF(s), [&](const Range_t& r) { return r.contains(startAddr); }) != s.end(); 
				});
Jason Hiser's avatar
Jason Hiser committed
			// erase startAddr if found.
			if(pltRange_it!=sccs.end())
				sccs.erase(pltRange_it);	// invalidates all iterators
Jason Hiser's avatar
Jason Hiser committed

			const auto handle_x86_plt=[&]()
Jason Hiser's avatar
Jason Hiser committed
			{
Jason Hiser's avatar
Jason Hiser committed
				const auto plt_skip=16;
				const auto plt_header_size=12;
				const auto plt_entry_size=16;
				const auto plt_entry_size_first_part=6;

				addRange(startAddr,plt_header_size);
				for(auto i=startAddr+plt_skip; i<endAddr; i+=plt_skip) 
				{
					addRange(i,plt_entry_size_first_part);
					addRange(i+6,plt_entry_size-plt_entry_size_first_part);
					addName(i,dynsymEntryIndex++);
				}
			};
Jason Hiser's avatar
Jason Hiser committed
			{
				const auto plt_entry_size=16;
				const auto plt_header_size=8*4;

				addRange(startAddr,plt_header_size);
				for(auto i=startAddr+plt_header_size; i<endAddr; i+=plt_entry_size) 
				{
					addRange(i,plt_entry_size);
					addName(i,dynsymEntryIndex++);
				}
			};
			const auto handle_arm32_plt=[&]()
			{
				const auto plt_entry_size=3*4;	// 3 instructions
				const auto plt_header_size=5*4; // 5 instructions

				addRange(startAddr,plt_header_size);
				for(auto i=startAddr+plt_header_size; i<endAddr; i+=plt_entry_size) 
				{
					addRange(i,plt_entry_size);
					addName(i,dynsymEntryIndex++);
				}
			};
Jason Hiser's avatar
Jason Hiser committed

			switch(machine_type)
			{
				case mtX86_64:
				case mtI386: 
					handle_x86_plt();
					break;
				case mtAarch64:
Jason Hiser's avatar
Jason Hiser committed
					break;
				default:
					assert(0);

			};
Jason Hiser's avatar
Jason Hiser committed
			cout<<"#ATTRIBUTE plt_entries="<<dec<<dynsymEntryIndex<<endl;
Jason Hiser's avatar
Jason Hiser committed
			const auto gotPltSec=exeio.sections[endSecName];
			// both 32- and 64-bit, entries are 6 bytes, with 2 bytes of padding.
			const auto gotPltEntrySize=8;
			const auto gotPltRangeSize=6;
			const auto gotPltStartAddr=gotPltSec->get_address();

			const auto gotPltRange_it=find_if(ALLOF(sccs), [&](const RangeSet_t& s) 
				{ 
					return find_if(ALLOF(s), [&](const Range_t& r) { return r.contains(gotPltStartAddr); }) != s.end(); 
				});
			// erase startAddr if found.
			if(gotPltRange_it!=sccs.end())
				sccs.erase(gotPltRange_it);	// invalidates all iterators

			auto gotpltEntries=0U;
			for(auto i=0U; i + gotPltRangeSize < (size_t)gotPltSec->get_size(); i+=gotPltEntrySize)
			{
				addRange(gotPltStartAddr+i,gotPltRangeSize);
Jason Hiser's avatar
Jason Hiser committed
				gotpltEntries++;
Jason Hiser's avatar
Jason Hiser committed
			cout<<"#ATTRIBUTE gotplt_entries="<<dec<<gotpltEntries<<endl;
Jason Hiser's avatar
Jason Hiser committed
	
		}

		void doBelongTos(const Range_t &range, const Address_t startAddr)
		{
			const auto sec=exeio.sections.findByAddress(range.first);
			assert(sec);
			const auto secEnd=exeio.sections.findByAddress(range.second-1);
			assert(sec==secEnd);	 // same section.
			const auto data=sec->get_data();
			const auto secStartAddr=sec->get_address();
			const auto range_len=range.second-range.first;
			const auto the_code=(const uint8_t*)(data+(range.first-secStartAddr));

			auto insn=(cs_insn *)nullptr;

			const auto count = cs_disasm(cshandle, the_code, range_len, range.first, 0, &insn);
			if (count > 0) 
			{
				for (auto j = 0U; j < count; j++) 
				{
					outfile<<hex<<"\t"<<insn[j].address<<"\t"<<dec<<insn[j].size<<"\tINSTR BELONGTO\t"<<hex<<startAddr<< "\t; "<<insn[j].mnemonic << " " << insn[j].op_str<<endl;
				}

				cs_free(insn, count);
			} 
			else
			{
				cerr<<"ERROR: Failed to disassemble code at "<<range.first<<"-"<<range.second<<endl;
				exit(1);
			}


		}
Jason Hiser's avatar
Jason Hiser committed
		void doBelongTos(const RangeSet_t &scc)
		{
			const auto min=*scc.begin();
			const auto startAddr=min.first;

			for(auto range : scc)
				doBelongTos(range,startAddr);

		}

		void writeAnnotations()
		{
			cout<<"The functions are:"<<endl;
			auto i=0;
			for(const auto &scc : sccs)
			{
				const auto min=*scc.begin();
				const auto max=*prev(scc.end());
				const auto size=max.second-min.first;
		
				cout<<"Function "<<dec<<i++<<" (" <<funcNames[scc] << ") is "<<hex<<min.first<<" "<<dec<<max.second-min.first<<endl;
				const auto usefp=getUseFp(scc);
				outfile<<hex<<"\t"<<min.first<<"\t"<<dec<<size<<"\tFUNC GLOBAL\t"<<funcNames[scc]<<" "<< usefp << endl;
Jason Hiser's avatar
Jason Hiser committed
				doBelongTos(scc);
			}
Anh Nguyen-Tuong's avatar
Anh Nguyen-Tuong committed
			if(getenv("SELF_VALIDATE"))
				assert(sccs.size()>=0);
Jason Hiser's avatar
Jason Hiser committed
		}

		string getUseFp(const RangeSet_t scc)
		{
			assert(scc.begin()!=scc.end());
			const auto startAddr=scc.begin()->first;
			const auto fde=ehp->findFDE(startAddr);
			if(!fde) return "NOFP";
			const auto &ehprogram=fde->getProgram();
			const auto ehprogramInstructions=ehprogram.getInstructions();

			const auto def_cfa_rbp_it = find_if(ALLOF(*ehprogramInstructions), [&](const EHProgramInstruction_t* insn)
				{
					assert(insn);
					const auto &insnBytes=insn->getBytes();
					// 0xd, 0x5 is "def_cfa_register ebp" 
					// 0xd, 0x6 is "def_cfa_register rbp" 
					const auto reg=file_class==ELF64 ? (uint8_t)0x6 : (uint8_t)0x5;
					return insnBytes==EHProgramInstructionByteVector_t({(uint8_t)0xd, reg });
				});
			return def_cfa_rbp_it == ehprogramInstructions->end() ?  "NOFP" : "USEFP";
		}
ostream& operator<<(ostream& os, const CreateFunctions_t::RangeSet_t& rs)
{
	for(const auto r : rs)
	{
		os<<"("<<r.first<<"-"<<r.second<<"), ";
	}
	return os;
}

Jason Hiser's avatar
Jason Hiser committed
int main(int argc, char* argv[])
{

        if(argc < 3)
        {
		usage(argc,argv);
                exit(1);
        }
        // Parse some options for the transform
        const static struct option long_options[] = {
                {"verbose", no_argument, 0, 'v'},
                {"help", no_argument, 0, 'h'},
                {"usage", no_argument, 0, '?'},
                {0,0,0,0}
        };
        auto short_opts="vh?";
        auto verbose=false;
	auto index = (int)0;
        while(1) 
	{
                int c = getopt_long(argc, argv,short_opts, long_options, &index);
                if(c == -1)
                        break;
                switch(c) 
		{
                        case 0:
                                break;
                        case 'v':
				verbose=true;
                                break;
                        case '?':
                        case 'h':
                                usage(argc,argv);
                                exit(1);
                                break;
                        default:
                                break;
                }
        }


	if(optind+2 > argc)
	{
		usage(argc,argv);	
		exit(1);
	}

	auto input_pgm=string(argv[optind]);
	auto output_annot=string(argv[optind+1]);
	for(auto i=optind+2 ; i < argc; i++)
	{
		ofstream out(argv[i]);	// touch file
		if(!out.is_open())
		{
			cerr<<"Cannot touch file "<<argv[i]<<endl;
			exit(1);
		}
		
	}

	try
	{
		CreateFunctions_t create_funcs(input_pgm,output_annot,verbose);
		create_funcs.calculate();
		create_funcs.writeAnnotations();
	}
	catch(const exception& e)
	{
		cout << "Cannot run rida on input file: " << input_pgm << endl;
		cout << e.what() << endl;
		return 2;
	}
Jason Hiser's avatar
Jason Hiser committed
	return 0;
}