diff --git a/libIRDB/include/core/fileir.hpp b/libIRDB/include/core/fileir.hpp index c88986f2a3ba2223347edbe448dd73699f797bf8..0aa9a60b9b2a91f805f0d237f7831477249bac49 100644 --- a/libIRDB/include/core/fileir.hpp +++ b/libIRDB/include/core/fileir.hpp @@ -80,7 +80,6 @@ class FileIR_t : public BaseObj_t #define ASM_REG_MAX_SIZE 500000 typedef std::map<Instruction_t*,std::string> registry_type; -// xxx typedef std::map<Instruction_t*,InstructionCFGNode_t*> ICFGNodeMap_t; // a pointer to the original variants IR, NULL means not yet loaded. FileIR_t* orig_variant_ir_p; @@ -119,5 +118,9 @@ class FileIR_t : public BaseObj_t std::map<db_id_t, Type_t*> ReadTypesFromDB(TypeSet_t& types); void ReadAllICFSFromDB(std::map<db_id_t,Instruction_t*> &addr2insnMap, std::map<Instruction_t*, db_id_t> &unresolvedICFS); + + void CleanupICFS(); + void GarbageCollectICFS(); + void DedupICFS(); }; diff --git a/libIRDB/src/core/fileir.cpp b/libIRDB/src/core/fileir.cpp index 32c057c97d301d4b969350544a3fd3578e8f8342..528abf2335fb9f521b98f2066f760ce3cd98ca06 100644 --- a/libIRDB/src/core/fileir.cpp +++ b/libIRDB/src/core/fileir.cpp @@ -486,6 +486,8 @@ void FileIR_t::WriteToDB() /* assign each item a unique ID */ SetBaseIDS(); + CleanupICFS(); + db_id_t j=-1; dbintr->IssueQuery(string("TRUNCATE TABLE ")+ fileptr->instruction_table_name + string(" cascade;")); @@ -994,3 +996,117 @@ void FileIR_t::ReadAllICFSFromDB(std::map<db_id_t,Instruction_t*> &addr2instMap, unresolved->SetIBTargets(icfs); } } + +void FileIR_t::GarbageCollectICFS() +{ + std::set<ICFS_t*> used_icfs; + + for(set<Instruction_t*>::const_iterator it=this->GetInstructions().begin(); + it!=this->GetInstructions().end(); + ++it) + { + Instruction_t* instr=*it; + if(instr && instr->GetIBTargets()) + { + used_icfs.insert(instr->GetIBTargets()); + } + } + + int unused_icfs = this->GetAllICFS().size() - used_icfs.size(); + if (unused_icfs > 0) + { + cerr << "FileIR_t::GarbageCollectICFS(): WARNING: " << dec << unused_icfs << " unused ICFS found. "; + cerr << "Deleting before committing to IRDB" << endl; + } + + ICFSSet_t to_erase; + for(ICFSSet_t::const_iterator it=this->GetAllICFS().begin(); + it != this->GetAllICFS().end(); + ++it) + { + ICFS_t* icfs = *it; + if (used_icfs.count(icfs) == 0) + { + to_erase.insert(icfs); + } + } + + for(ICFSSet_t::const_iterator it=to_erase.begin(); + it != to_erase.end(); + ++it) + { + ICFS_t* icfs = *it; + this->GetAllICFS().erase(icfs); + } + +} + +void FileIR_t::DedupICFS() +{ + std::set<ICFS_t> unique_icfs; + + ICFSSet_t& all_icfs=this->GetAllICFS(); + + // detect duplicate icfs + ICFSSet_t duplicates; + std::pair<std::set<ICFS_t>::iterator,bool> ret; + for(ICFSSet_t::iterator it=all_icfs.begin(); it!=all_icfs.end(); ++it) + { + ICFS_t* p=*it; + assert(p); + ret = unique_icfs.insert( *p ); + if (!ret.second) { + duplicates.insert(p); + } + } + + if (duplicates.size() > 0) + { + cerr << "FileIR_t::DedupICFS(): WARNING: " << dec << duplicates.size() << " duplicate ICFS out of " << all_icfs.size() << " total ICFS"; + cerr << ". De-duplicating before committing to IRDB" << endl; + } + + // remove duplicate icfs + for(ICFSSet_t::const_iterator it=duplicates.begin(); it!=duplicates.end(); ++it) + { + ICFS_t* icfs = *it; + all_icfs.erase(icfs); + } + + // build duplicate icfs map + std::map<ICFS_t*, ICFS_t*> duplicate_map; + for(ICFSSet_t::const_iterator it=duplicates.begin(); it!=duplicates.end(); ++it) + { + ICFS_t* icfs = *it; + for(ICFSSet_t::iterator it=all_icfs.begin(); it!=all_icfs.end(); ++it) + { + ICFS_t* t = *it; + + assert(t); + if (*icfs == *t) + { + duplicate_map[icfs] = t; + cerr << "FileIR_t::DedupICFS(): remap: icfs id " << icfs->GetBaseID() << " --> icsf id " << t->GetBaseID() << endl; + break; + } + } + } + + // reassign ibtargets + for(set<Instruction_t*>::const_iterator it=this->GetInstructions().begin(); + it!=this->GetInstructions().end(); + ++it) + { + Instruction_t* instr=*it; + if(instr->GetIBTargets() && duplicate_map[instr->GetIBTargets()]) + { + instr->SetIBTargets(duplicate_map[instr->GetIBTargets()]); + } + } +} + +void FileIR_t::CleanupICFS() +{ + GarbageCollectICFS(); + DedupICFS(); +} diff --git a/libIRDB/test/fill_in_indtargs.cpp b/libIRDB/test/fill_in_indtargs.cpp index d9db48c78554511a6d2cf89f36dac0ff584cc251..b3fcd80bf1ac36f94105134739a164cad328039f 100644 --- a/libIRDB/test/fill_in_indtargs.cpp +++ b/libIRDB/test/fill_in_indtargs.cpp @@ -41,10 +41,9 @@ using namespace libIRDB; using namespace std; using namespace EXEIO; -int next_icfs_set_id = 2; - +int next_icfs_set_id = 0; ICFS_t* hellnode_tgts = NULL; -ICFS_t* indirect_calls = NULL; +//ICFS_t* indirect_calls = NULL; #define arch_ptr_bytes() (firp->GetArchitectureBitWidth()/8) @@ -256,9 +255,7 @@ void mark_jmptables(FileIR_t *firp) firp->GetAllICFS().insert(new_icfs); instr->SetIBTargets(new_icfs); - - if(getenv("IB_VERBOSE")!=0) - cout << "jmp table[" << new_icfs->GetBaseID() << "]: size: " << new_icfs->size() << endl; + cout << "new icfs: jmp table[" << new_icfs->GetBaseID() << "]: size: " << new_icfs->size() << endl; } } @@ -302,13 +299,15 @@ void patch_icfs(FileIR_t *firp) { assert(instr->GetIBTargets()->IsIncomplete()); +/* if (allTargetsIndirectlyCalledFunctions(instr)) { cerr << "ib targets for: " << instr->getDisassembly() << " reassigned to indirectcalls node" << endl; instr->SetIBTargets(indirect_calls); } else { - cerr << "ib targets for: " << instr->getDisassembly() << " reassigned to hellnode" << endl; +*/ + cerr << "incomplete ib targets for: " << instr->getDisassembly() << " reassigned to hellnode" << endl; instr->SetIBTargets(hellnode_tgts); - } +// } } } } @@ -1489,9 +1488,11 @@ void icfs_init(FileIR_t* firp) next_icfs_set_id = max_id+1; cerr<<"Found max ICFS id=="<<max_id<<endl; hellnode_tgts = new ICFS_t(next_icfs_set_id++, ICFS_Analysis_Module_Complete); - indirect_calls = new ICFS_t(next_icfs_set_id++, ICFS_Analysis_Module_Complete); +// indirect_calls = new ICFS_t(next_icfs_set_id++, ICFS_Analysis_Module_Complete); firp->GetAllICFS().insert(hellnode_tgts); - firp->GetAllICFS().insert(indirect_calls); + cout << "new icfs: hellnode targets" << endl; + cout<<"icfs_init: size of ICFS set"<<firp->GetAllICFS().size()<<endl; +// firp->GetAllICFS().insert(indirect_calls); } void icfs_set_indirect_calls(FileIR_t* const firp, ICFS_t* const targets) @@ -1741,12 +1742,10 @@ void fill_in_indtargs(FileIR_t* firp, exeio* elfiop, std::list<virtual_offset_t> cout<<"========================================="<<endl; - - /* set the IR to have some instructions marked as IB targets */ mark_targets(firp); - icfs_set_indirect_calls(firp, indirect_calls); +// icfs_set_indirect_calls(firp, indirect_calls); icfs_set_hellnode_targets(firp, hellnode_tgts); mark_jmptables(firp); @@ -1832,12 +1831,12 @@ main(int argc, char* argv[]) // find all indirect branch targets fill_in_indtargs(firp, elfiop, forced_pins); - + // write the DB back and commit our changes firp->WriteToDB(); delete firp; - delete indirect_calls; +// delete indirect_calls; delete hellnode_tgts; }