diff --git a/libIRDB/include/core/instruction.hpp b/libIRDB/include/core/instruction.hpp index 4cd9471dac272e1541cb670babe142cc2fc86dbd..e8c10a6bb128ddf86325f435c767bc337a5218bf 100644 --- a/libIRDB/include/core/instruction.hpp +++ b/libIRDB/include/core/instruction.hpp @@ -44,6 +44,10 @@ class Instruction_t : public BaseObj_t bool IsFunctionExit() const; + static bool SetsStackPointer(DISASM *disasm); + static bool SetsStackPointer(ARGTYPE* arg); + + private: AddressID_t *my_address; Function_t *my_function; diff --git a/libIRDB/src/cfg/CFG.cpp b/libIRDB/src/cfg/CFG.cpp index 4c185146a38b0161d736ebe24a5796452e74ad01..84c25fe1168730fcc3327afd6e7740239017fda4 100644 --- a/libIRDB/src/cfg/CFG.cpp +++ b/libIRDB/src/cfg/CFG.cpp @@ -74,6 +74,10 @@ void ControlFlowGraph_t::Build(Function_t* func) Instruction_t* insn=*it; BasicBlock_t* newblock=new BasicBlock_t; + /* record the entry block */ + if(insn==func->GetEntryPoint()) + entry=newblock; + assert( insn && newblock ); blocks.insert(newblock); diff --git a/libIRDB/src/core/generate_spri.cpp b/libIRDB/src/core/generate_spri.cpp index a032066c91588bf94bdf9727bb666c1c15654a21..9b8777504489c825dd26e54ba7c4b9d829724ebd 100644 --- a/libIRDB/src/core/generate_spri.cpp +++ b/libIRDB/src/core/generate_spri.cpp @@ -356,7 +356,7 @@ We need to emit a rule of this form { if( // if it's an indirect branch target - old_insn->GetIndirectBranchTargetAddress() || + newinsn->GetIndirectBranchTargetAddress() || // or the target of an unmodified instruction unmoved_insn_targets.find(newinsn) != unmoved_insn_targets.end() ) @@ -366,8 +366,7 @@ We need to emit a rule of this form else { fout << "# eliding, no indirect targets"<<endl; - fout << addressify(newinsn) <<" -> . " <<endl; - fout << ". -> 0x0" << endl; + fout << addressify(newinsn) <<" -> 0 " <<endl; } } diff --git a/libIRDB/src/core/instruction.cpp b/libIRDB/src/core/instruction.cpp index 82516007e1596f2b3e01fc5daebdc23aa0612027..d67eab8b447e8a94d66045079c9791db553eec77 100644 --- a/libIRDB/src/core/instruction.cpp +++ b/libIRDB/src/core/instruction.cpp @@ -193,3 +193,35 @@ bool Instruction_t::IsFunctionExit() const return false; } + +bool Instruction_t::SetsStackPointer(ARGTYPE* arg) +{ + if(arg->AccessMode!=WRITE) + return false; + int access_type=arg->ArgType & 0xFFFF0000; + + if(access_type==REGISTER_TYPE + GENERAL_REG +REG4) + return true; + return false; + +} + +bool Instruction_t::SetsStackPointer(DISASM* disasm) +{ + if(strstr(disasm->Instruction.Mnemonic, "push")!=NULL) + return true; + if(strstr(disasm->Instruction.Mnemonic, "pop")!=NULL) + return true; + if(strstr(disasm->Instruction.Mnemonic, "call")!=NULL) + return true; + if(disasm->Instruction.ImplicitModifiedRegs==REGISTER_TYPE+GENERAL_REG+REG4) + return true; + + + if(SetsStackPointer(&disasm->Argument1)) return true; + if(SetsStackPointer(&disasm->Argument2)) return true; + if(SetsStackPointer(&disasm->Argument3)) return true; + + return false; + +} diff --git a/libIRDB/src/core/variantir.cpp b/libIRDB/src/core/variantir.cpp index 1ce0f0726bd2fa1c95834804ba6450d7f9b07433..7516d6c5619a23fea768738799dd9e9933b9b114 100644 --- a/libIRDB/src/core/variantir.cpp +++ b/libIRDB/src/core/variantir.cpp @@ -272,17 +272,39 @@ void VariantIR_t::WriteToDB() /* and now that everything has an ID, let's write to the DB */ string q=string(""); for(std::set<Function_t*>::const_iterator i=funcs.begin(); i!=funcs.end(); ++i) + { q+=(*i)->WriteToDB(&progid,j); + if(q.size()>1024*1024) + { + dbintr->IssueQuery(q); + q=string(""); + } + + } dbintr->IssueQuery(q); q=string(""); for(std::set<AddressID_t*>::const_iterator i=addrs.begin(); i!=addrs.end(); ++i) + { q+=(*i)->WriteToDB(&progid,j); + if(q.size()>1024*1024) + { + dbintr->IssueQuery(q); + q=string(""); + } + } dbintr->IssueQuery(q); q=string(""); for(std::set<Instruction_t*>::const_iterator i=insns.begin(); i!=insns.end(); ++i) + { q+=(*i)->WriteToDB(&progid,j); + if(q.size()>1024*1024) + { + dbintr->IssueQuery(q); + q=string(""); + } + } dbintr->IssueQuery(q); } diff --git a/libIRDB/test/fill_in_indtargs.cpp b/libIRDB/test/fill_in_indtargs.cpp index 0c4e5f1afffd84d7782fc0eb217034d1bd426bbf..8a8d30d790deb521ba9d89013c54c8674d355fa7 100644 --- a/libIRDB/test/fill_in_indtargs.cpp +++ b/libIRDB/test/fill_in_indtargs.cpp @@ -94,13 +94,22 @@ void get_instruction_targets(VariantIR_t *virp) assert(instr_len==insn->GetDataBits().size()); + + +/* we are moving the marking of callsite indirects into fix calls + * which has a better notion of whether the callsite indicates an + * indirect branch target at the next instruction + */ +#if 0 /* calls indicate an indirect target, pc+sizeof(instruction) */ if(disasm.Instruction.BranchType==CallType) { possible_target(disasm.VirtualAddr+instr_len); } + else +#endif /* other branches can't indicate an indirect branch target */ - else if(disasm.Instruction.BranchType) + if(disasm.Instruction.BranchType) continue; /* otherwise, any immediate is a possible branch target */ @@ -170,7 +179,7 @@ void infer_targets(Elf32_Shdr *shdr, FILE* fp, VariantIR_t *virp) void print_targets() { - int j=1; + int j=0; for( set<int>::iterator it=targets.begin(); it!=targets.end(); @@ -230,6 +239,7 @@ void fill_in_indtargs(VariantIR_t* virp, string elf_file) infer_targets(&sechdrs[secndx], fp, virp); + cout<<"# ATTRIBUTE total_indirect_targets_pass1="<<std::dec<<targets.size()<<endl; cout<<"Targets from data sections are: " << endl; print_targets(); @@ -240,6 +250,7 @@ void fill_in_indtargs(VariantIR_t* virp, string elf_file) possible_target(elfhdr.e_entry); + cout<<"# ATTRIBUTE total_indirect_targets_pass2="<<std::dec<<targets.size()<<endl; cout<<"All targets from data sections are: " << endl; print_targets(); diff --git a/libIRDB/test/fix_calls.cpp b/libIRDB/test/fix_calls.cpp index 42f4cdbd0dbc0beb04e5a65d239e8f3195ac50aa..8d92d32cd861b77e637fbd8f2e18014bd8fad4df 100644 --- a/libIRDB/test/fix_calls.cpp +++ b/libIRDB/test/fix_calls.cpp @@ -1,6 +1,8 @@ #include <libIRDB-core.hpp> +#include <libIRDB-cfg.hpp> +#include <utils.hpp> #include <iostream> #include <stdlib.h> #include "beaengine/BeaEngine.h" @@ -12,10 +14,149 @@ using namespace libIRDB; using namespace std; +long long no_target_insn=0; +long long target_not_in_function=0; +long long call_to_not_entry=0; +long long thunk_check=0; +long long found_pattern=0; + + +bool check_entry(bool &found, ControlFlowGraph_t* cfg) +{ + + BasicBlock_t *entry=cfg->GetEntry(); + found=false; + + for( + std::vector<Instruction_t*>::const_iterator it=entry->GetInstructions().begin(); + it!=entry->GetInstructions().end(); + ++it + ) + { + DISASM disasm; + Instruction_t* insn=*it; + insn->Disassemble(disasm); + if(Instruction_t::SetsStackPointer(&disasm)) + return false; + + if(strstr(disasm.CompleteInstr, "[esp]")) + { + found=true; + return true; + } + + } + return false; +} + + +bool call_needs_fix(Instruction_t* insn) +{ + Instruction_t *target=insn->GetTarget(); + DISASM disasm; + + string pattern;; + + /* if the target isn't in the IR */ + if(!target) + { + /* call 0's aren't to real locations */ + insn->Disassemble(disasm); + if(strcmp(disasm.CompleteInstr, "call 0x00000000")==0) + { + return false; + } + no_target_insn++; + /* then we need to fix it */ + return true; + } + + + Function_t* func=target->GetFunction(); + + /* if there's no function for this instruction */ + if(!func) + { + target_not_in_function++; + /* we need to fix it */ + return true; + } + + + /* build a cfg for this function */ + ControlFlowGraph_t* cfg=new ControlFlowGraph_t(func); + + assert(cfg->GetEntry()); + + /* if the call instruction isn't to a function entry point */ + if(cfg->GetEntry()->GetInstructions()[0]!=target) + { + call_to_not_entry++; + /* then we need to fix it */ + return true; + } + + + /* check the entry block for thunks, etc. */ + bool found; + bool ret=check_entry(found,cfg); + if(found) + { + if(ret) + thunk_check++; + return ret; + } + + /* now, search the function for stack references */ + + + /* determine what the stack ref. would look like */ + if(func->GetUseFramePointer()) + { + pattern="[ebp]"; + } + else + { + pattern="[esp+"+to_string(func->GetStackFrameSize())+"]"; + } + + + /* check each instruction */ + for( + std::set<Instruction_t*>::const_iterator it=func->GetInstructions().begin(); + it!=func->GetInstructions().end(); + ++it + ) + { + /* if the disassembly contains the string mentioned */ + DISASM disasm; + insn->Disassemble(disasm); + if(strstr(disasm.CompleteInstr, pattern.c_str())!=NULL) + { + found_pattern++; + /* then we need to fix this callsite */ + return true; + } + } + + /* otherwise, we think it's safe */ + return false; + +} + + + + + + + void fix_call(Instruction_t* insn, VariantIR_t *virp) { + /* record the possibly new indirect branch target if this call gets fixed */ + Instruction_t* newindirtarg=insn->GetFallthrough(); + /* disassemble */ DISASM disasm; memset(&disasm, 0, sizeof(DISASM)); @@ -94,6 +235,22 @@ void fix_call(Instruction_t* insn, VariantIR_t *virp) newbits[4]=(next_addr>>24) & 0xff; insn->SetDataBits(newbits); insn->SetComment(insn->GetComment()+" Push part"); + + + /* If the fallthrough is not marked as indirectly branchable-to, then mark it so */ + if(newindirtarg && !newindirtarg->GetIndirectBranchTargetAddress()) + { + /* create a new address for the IBTA */ + AddressID_t* newaddr = new AddressID_t; + assert(newaddr); + newaddr->SetFileID(newindirtarg->GetAddress()->GetFileID()); + newaddr->SetVirtualOffset(newindirtarg->GetAddress()->GetVirtualOffset()); + + /* set the insturction and include this address in the list of addrs */ + newindirtarg->SetIndirectBranchTargetAddress(newaddr); + virp->GetAddresses().insert(newaddr); + } + } @@ -128,7 +285,7 @@ main(int argc, char* argv[]) if(argc!=2) { - cerr<<"Usage: ilr <id>"<<endl; + cerr<<"Usage: fix_calls <id>"<<endl; exit(-1); } @@ -162,6 +319,8 @@ main(int argc, char* argv[]) cout<<"Fixing calls->push/jmp in variant "<<*pidp<< "." <<endl; + long long fixed_calls=0, not_fixed_calls=0, not_calls=0; + for( set<Instruction_t*>::const_iterator it=virp->GetInstructions().begin(); it!=virp->GetInstructions().end(); @@ -170,10 +329,34 @@ main(int argc, char* argv[]) { Instruction_t* insn=*it; - if(is_call(insn)) - fix_call(insn, virp); + if(is_call(insn)) + { + if(call_needs_fix(insn)) + { + fixed_calls++; + fix_call(insn, virp); + } + else + not_fixed_calls++; + } + else + { + not_calls++; + } } + + cout << "# ATTRIBUTE fixed_calls="<<std::dec<<fixed_calls<<endl; + cout << "# ATTRIBUTE no_fix_needed_calls="<<std::dec<<not_fixed_calls<<endl; + cout << "# ATTRIBUTE other_instructions="<<std::dec<<not_calls<<endl; + cout << "# ATTRIBUTE fixed_ratio="<<std::dec<<(fixed_calls/((float)(not_fixed_calls+fixed_calls)))<<endl; + cout << "# ATTRIBUTE remaining_ratio="<<std::dec<<(not_fixed_calls/((float)(not_fixed_calls+fixed_calls+not_calls)))<<endl; + cout << "# ATTRIBUTE no_target_insn="<<std::dec<< no_target_insn << endl; + cout << "# ATTRIBUTE target_not_in_function="<<std::dec<< target_not_in_function << endl; + cout << "# ATTRIBUTE call_to_not_entry="<<std::dec<< call_to_not_entry << endl; + cout << "# ATTRIBUTE thunk_check="<<std::dec<< thunk_check << endl; + cout << "# ATTRIBUTE found_pattern="<<std::dec<< found_pattern << endl; + cout<<"Writing variant "<<*pidp<<" back to database." << endl; virp->WriteToDB(); diff --git a/libIRDB/test/ilr.cpp b/libIRDB/test/ilr.cpp index 5a9bf3bcf6cc8998bf07ee45033b895164acc75c..38c12ac2adf93a7f9d4fcb73f07442323de9ad5d 100644 --- a/libIRDB/test/ilr.cpp +++ b/libIRDB/test/ilr.cpp @@ -46,6 +46,8 @@ main(int argc, char* argv[]) cout<<"Applying ILR to variant "<<*pidp<< "." <<endl; + long long unmoved_instr=0, moved_instr=0; + set<AddressID_t*> newaddressset; for( set<Instruction_t*>::const_iterator it=virp->GetInstructions().begin(); @@ -60,14 +62,22 @@ main(int argc, char* argv[]) if (insn->GetIndirectBranchTargetAddress()) { + unmoved_instr++; newaddressset.insert(insn->GetIndirectBranchTargetAddress()); } + else + moved_instr++; newaddressset.insert(newaddr); } virp->GetAddresses()=newaddressset; + + cout << "# ATTRIBUTE unmoved_instructions="<<std::dec<<unmoved_instr<<endl; + cout << "# ATTRIBUTE moved_instructions="<<std::dec<<moved_instr<<endl; + cout << "# ATTRIBUTE moved_ratio="<<std::dec<<(float)moved_instr/(moved_instr+unmoved_instr)<<endl; + cout<<"Writing variant "<<*pidp<<" back to database." << endl; virp->WriteToDB();