From ec396f1dce7af881fcf8e067bd5f5a26b3110eaf Mon Sep 17 00:00:00 2001 From: Jason Hiser <jdhiser@gmail.com> Date: Tue, 7 Aug 2018 09:41:21 -0400 Subject: [PATCH] revised cfg builder to build blocks for apparently unreachable code. Former-commit-id: c04807a76e1358c5b454fb64dd19a13fde642fd0 --- .gitignore | 2 +- libIRDB/include/cfg/BasicBlock.hpp | 3 +- libIRDB/include/cfg/CFG.hpp | 15 +++--- libIRDB/src/cfg/BasicBlock.cpp | 4 +- libIRDB/src/cfg/CFG.cpp | 83 +++++++++++++++++++++--------- 5 files changed, 71 insertions(+), 36 deletions(-) diff --git a/.gitignore b/.gitignore index 10e73a2eb..5477298a9 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,4 @@ lib/ manifest.txt.config tools/meds2pdb/meds2pdb plugins_install/*.exe - +*.swp diff --git a/libIRDB/include/cfg/BasicBlock.hpp b/libIRDB/include/cfg/BasicBlock.hpp index 42a9a4bf8..7e6475280 100644 --- a/libIRDB/include/cfg/BasicBlock.hpp +++ b/libIRDB/include/cfg/BasicBlock.hpp @@ -53,8 +53,7 @@ class BasicBlock_t protected: - void BuildBlock(Function_t* func, - Instruction_t* insn, + void BuildBlock( Instruction_t* insn, const std::map<Instruction_t*,BasicBlock_t*> &insn2block_map ); diff --git a/libIRDB/include/cfg/CFG.hpp b/libIRDB/include/cfg/CFG.hpp index 647f916aa..ae2c6d0cd 100644 --- a/libIRDB/include/cfg/CFG.hpp +++ b/libIRDB/include/cfg/CFG.hpp @@ -26,24 +26,27 @@ class ControlFlowGraph_t { public: ControlFlowGraph_t(Function_t* func); - BasicBlock_t* GetEntry() const { return entry; } Function_t* GetFunction() const { return function; } + BasicBlockSet_t& GetBlocks() { return blocks; } + const BasicBlockSet_t& GetBlocks() const { return blocks; } void dump(std::ostream &os=std::cout) const { os<<*this; } - protected: + private: + // methods void Build(Function_t *func); + void alloc_blocks(const InstructionSet_t &starts, map<Instruction_t*,BasicBlock_t*>& insn2block_map); + void build_blocks(const map<Instruction_t*,BasicBlock_t*>& insn2block_map); + void find_unblocked_instructions(InstructionSet_t &starts, Function_t* func); - private: - std::set<BasicBlock_t*> blocks; + // data + BasicBlockSet_t blocks; BasicBlock_t* entry; Function_t* function; /* friends */ public: friend std::ostream& operator<<(std::ostream& os, const ControlFlowGraph_t& cfg); - BasicBlockSet_t& GetBlocks() { return blocks; } - const BasicBlockSet_t& GetBlocks() const { return blocks; } }; diff --git a/libIRDB/src/cfg/BasicBlock.cpp b/libIRDB/src/cfg/BasicBlock.cpp index bb0e5c7dc..ccba987cd 100644 --- a/libIRDB/src/cfg/BasicBlock.cpp +++ b/libIRDB/src/cfg/BasicBlock.cpp @@ -37,11 +37,11 @@ BasicBlock_t::BasicBlock_t() void BasicBlock_t::BuildBlock ( - Function_t* func, Instruction_t* insn, const map<Instruction_t*,BasicBlock_t*> &insn2block_map ) { + const auto &func=insn->GetFunction(); assert(insn); /* loop through the instructions for this block */ while(insn) @@ -116,7 +116,7 @@ void BasicBlock_t::BuildBlock break; /* check for a fallthrough out of the function */ - if(ft_insn && !is_in_container(func->GetInstructions(),ft_insn)) + if(ft_insn && ft_insn->GetFunction() != func) // !is_in_container(func->GetInstructions(),ft_insn)) break; diff --git a/libIRDB/src/cfg/CFG.cpp b/libIRDB/src/cfg/CFG.cpp index 2e1b80c83..cfbc731f8 100644 --- a/libIRDB/src/cfg/CFG.cpp +++ b/libIRDB/src/cfg/CFG.cpp @@ -25,10 +25,12 @@ using namespace std; using namespace libIRDB; +#define ALLOF(a) begin(a),end(a) + /* * FindTargets - locate all possible instructions that are the target of a jump instruction */ -static set<Instruction_t*> FindBlockStarts(Function_t* func) +static InstructionSet_t FindBlockStarts(Function_t* func) { InstructionSet_t targets; @@ -80,55 +82,86 @@ static set<Instruction_t*> FindBlockStarts(Function_t* func) return targets; } - - - ControlFlowGraph_t::ControlFlowGraph_t(Function_t* func) : entry(NULL), function(func) { Build(func); } -void ControlFlowGraph_t::Build(Function_t* func) -{ - set<Instruction_t*> starts=FindBlockStarts(func); - - map<Instruction_t*,BasicBlock_t*> insn2block_map; +void ControlFlowGraph_t::alloc_blocks(const InstructionSet_t &starts, map<Instruction_t*,BasicBlock_t*>& insn2block_map) +{ /* create a basic block for each instruction that starts a block */ - for( set<Instruction_t*>::const_iterator it=starts.begin(); - it!=starts.end(); - ++it - ) + for(const auto &insn : starts) { - Instruction_t* insn=*it; - BasicBlock_t* newblock=new BasicBlock_t; + if(is_in_container(insn2block_map,insn)) // already allocated + continue; - /* record the entry block */ - if(insn==func->GetEntryPoint()) - entry=newblock; + auto newblock=new BasicBlock_t; assert( insn && newblock ); blocks.insert(newblock); insn2block_map[insn]=newblock; } +} + +void ControlFlowGraph_t::build_blocks(const map<Instruction_t*,BasicBlock_t*>& insn2block_map) +{ /* Ask the basic block to set the fields for each block that need to be set */ - for( map<Instruction_t*,BasicBlock_t*>::const_iterator it=insn2block_map.begin(); - it!=insn2block_map.end(); - ++it - ) + for(const auto &it : insn2block_map) { - Instruction_t* insn=(*it).first; - BasicBlock_t* block=(*it).second; + const auto insn=it.first; + const auto block=it.second; + + if(block->GetInstructions().size()>0) // already built + continue; assert(insn && block); - block->BuildBlock(func, insn, insn2block_map); + block->BuildBlock(insn, insn2block_map); } +} + +void ControlFlowGraph_t::find_unblocked_instructions(InstructionSet_t &starts, Function_t* func) +{ + auto mapped_instructions=InstructionSet_t(); + auto missed_instructions=InstructionSet_t(); + for(const auto block : GetBlocks()) + mapped_instructions.insert(ALLOF(block->GetInstructions())); + + auto my_inserter=inserter(missed_instructions,missed_instructions.end()); + set_difference(ALLOF(func->GetInstructions()), ALLOF(mapped_instructions), my_inserter); + starts.insert(ALLOF(missed_instructions)); +} + + + +void ControlFlowGraph_t::Build(Function_t* func) +{ + auto starts=FindBlockStarts(func); + + auto insn2block_map=map<Instruction_t*,BasicBlock_t*> (); + + alloc_blocks(starts, insn2block_map); + build_blocks(insn2block_map); + /* record the entry block */ + entry=insn2block_map[func->GetEntryPoint()]; + + /* most functions are done now. */ + /* however, if a function has a (direct) side entrance, + * some code may appear unreachable and not be placed in + * a block -- here, we detect that code and create a + * new basic block for every instruction, as any may have a side entrance + */ + /* note: side entrances may miss a block start */ + /* in code that appears reachable from the entrance?! */ + find_unblocked_instructions(starts, func); + alloc_blocks(starts, insn2block_map); + build_blocks(insn2block_map); } -- GitLab