From ec396f1dce7af881fcf8e067bd5f5a26b3110eaf Mon Sep 17 00:00:00 2001
From: Jason Hiser <jdhiser@gmail.com>
Date: Tue, 7 Aug 2018 09:41:21 -0400
Subject: [PATCH] revised cfg builder to build blocks for apparently
 unreachable code.

Former-commit-id: c04807a76e1358c5b454fb64dd19a13fde642fd0
---
 .gitignore                         |  2 +-
 libIRDB/include/cfg/BasicBlock.hpp |  3 +-
 libIRDB/include/cfg/CFG.hpp        | 15 +++---
 libIRDB/src/cfg/BasicBlock.cpp     |  4 +-
 libIRDB/src/cfg/CFG.cpp            | 83 +++++++++++++++++++++---------
 5 files changed, 71 insertions(+), 36 deletions(-)

diff --git a/.gitignore b/.gitignore
index 10e73a2eb..5477298a9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,4 +9,4 @@ lib/
 manifest.txt.config
 tools/meds2pdb/meds2pdb
 plugins_install/*.exe
-
+*.swp
diff --git a/libIRDB/include/cfg/BasicBlock.hpp b/libIRDB/include/cfg/BasicBlock.hpp
index 42a9a4bf8..7e6475280 100644
--- a/libIRDB/include/cfg/BasicBlock.hpp
+++ b/libIRDB/include/cfg/BasicBlock.hpp
@@ -53,8 +53,7 @@ class BasicBlock_t
 
 	protected:
 
-		void BuildBlock(Function_t* func,
-                		Instruction_t* insn,
+		void BuildBlock( Instruction_t* insn,
                 		const std::map<Instruction_t*,BasicBlock_t*> &insn2block_map
         			);
 
diff --git a/libIRDB/include/cfg/CFG.hpp b/libIRDB/include/cfg/CFG.hpp
index 647f916aa..ae2c6d0cd 100644
--- a/libIRDB/include/cfg/CFG.hpp
+++ b/libIRDB/include/cfg/CFG.hpp
@@ -26,24 +26,27 @@ class ControlFlowGraph_t
 {
 	public:
 		ControlFlowGraph_t(Function_t* func);
-
 		BasicBlock_t* GetEntry() const { return entry; }
 		Function_t* GetFunction() const { return function; }
+		BasicBlockSet_t& GetBlocks()   { return blocks; }
+		const BasicBlockSet_t& GetBlocks()   const { return blocks; }
 		void dump(std::ostream &os=std::cout) const { os<<*this; }
 
-	protected:
+	private:
+	// methods 
 		void Build(Function_t *func);
+		void alloc_blocks(const InstructionSet_t &starts, map<Instruction_t*,BasicBlock_t*>& insn2block_map);
+		void build_blocks(const map<Instruction_t*,BasicBlock_t*>& insn2block_map);
+		void find_unblocked_instructions(InstructionSet_t &starts, Function_t* func);
 
-	private:
-		std::set<BasicBlock_t*> blocks;
+	// data
+		BasicBlockSet_t blocks;
 		BasicBlock_t* entry;
 		Function_t* function;
 
 	/* friends */
 	public:
 		friend std::ostream& operator<<(std::ostream& os, const ControlFlowGraph_t& cfg);
-		BasicBlockSet_t& GetBlocks()   { return blocks; }
-		const BasicBlockSet_t& GetBlocks()   const { return blocks; }
 };
 
 
diff --git a/libIRDB/src/cfg/BasicBlock.cpp b/libIRDB/src/cfg/BasicBlock.cpp
index bb0e5c7dc..ccba987cd 100644
--- a/libIRDB/src/cfg/BasicBlock.cpp
+++ b/libIRDB/src/cfg/BasicBlock.cpp
@@ -37,11 +37,11 @@ BasicBlock_t::BasicBlock_t()
 
 void BasicBlock_t::BuildBlock
 	(
-		Function_t* func, 
 		Instruction_t* insn, 
 		const map<Instruction_t*,BasicBlock_t*> &insn2block_map
 	)
 {
+	const auto &func=insn->GetFunction();
 	assert(insn);
 	/* loop through the instructions for this block */
 	while(insn)
@@ -116,7 +116,7 @@ void BasicBlock_t::BuildBlock
 			break;
 
 		/* check for a fallthrough out of the function */
-		if(ft_insn && !is_in_container(func->GetInstructions(),ft_insn))
+		if(ft_insn && ft_insn->GetFunction() != func) //  !is_in_container(func->GetInstructions(),ft_insn))
 			break;
 
 
diff --git a/libIRDB/src/cfg/CFG.cpp b/libIRDB/src/cfg/CFG.cpp
index 2e1b80c83..cfbc731f8 100644
--- a/libIRDB/src/cfg/CFG.cpp
+++ b/libIRDB/src/cfg/CFG.cpp
@@ -25,10 +25,12 @@
 using namespace std;
 using namespace libIRDB;
 
+#define ALLOF(a) begin(a),end(a)
+
 /*
  *  FindTargets - locate all possible instructions that are the target of a jump instruction
  */
-static set<Instruction_t*> FindBlockStarts(Function_t* func) 
+static InstructionSet_t FindBlockStarts(Function_t* func) 
 {
 
 	InstructionSet_t targets;
@@ -80,55 +82,86 @@ static set<Instruction_t*> FindBlockStarts(Function_t* func)
 	return targets;
 }
 
-
-
-
 ControlFlowGraph_t::ControlFlowGraph_t(Function_t* func) :
 	entry(NULL), function(func)
 {
 	Build(func);	
 }
 
-void ControlFlowGraph_t::Build(Function_t* func)
-{
-	set<Instruction_t*> starts=FindBlockStarts(func);
-
-	map<Instruction_t*,BasicBlock_t*> insn2block_map;
 
+void ControlFlowGraph_t::alloc_blocks(const InstructionSet_t &starts, map<Instruction_t*,BasicBlock_t*>& insn2block_map)
+{
 	/* create a basic block for each instruction that starts a block */
-	for(	set<Instruction_t*>::const_iterator it=starts.begin();
-		it!=starts.end();
-		++it
-	   )
+	for(const auto &insn : starts)
 	{
-		Instruction_t* insn=*it;
-		BasicBlock_t* newblock=new BasicBlock_t;
+		if(is_in_container(insn2block_map,insn)) // already allocated 
+			continue;
 
-		/* record the entry block */
-		if(insn==func->GetEntryPoint())
-			entry=newblock;
+		auto  newblock=new BasicBlock_t;
 
 		assert( insn && newblock );
 
 		blocks.insert(newblock);
 		insn2block_map[insn]=newblock;
 	}
+}
+
+void ControlFlowGraph_t::build_blocks(const map<Instruction_t*,BasicBlock_t*>& insn2block_map)
+{
 
 	/* Ask the basic block to set the fields for each block that need to be set */
-	for(	map<Instruction_t*,BasicBlock_t*>::const_iterator it=insn2block_map.begin();
-		it!=insn2block_map.end();
-		++it
-	   )
+	for(const auto &it : insn2block_map)
 	{
-		Instruction_t* insn=(*it).first;
-		BasicBlock_t* block=(*it).second;
+		const auto insn=it.first;
+		const auto block=it.second;
+
+		if(block->GetInstructions().size()>0) // already built
+			continue;
 
 		assert(insn && block);
 
-		block->BuildBlock(func, insn, insn2block_map);
+		block->BuildBlock(insn, insn2block_map);
 
 	}
 
+}
+
+void ControlFlowGraph_t::find_unblocked_instructions(InstructionSet_t &starts, Function_t* func)
+{
+	auto mapped_instructions=InstructionSet_t();
+	auto missed_instructions=InstructionSet_t();
+	for(const auto block : GetBlocks())
+		mapped_instructions.insert(ALLOF(block->GetInstructions()));
+
+	auto my_inserter=inserter(missed_instructions,missed_instructions.end());
+	set_difference(ALLOF(func->GetInstructions()), ALLOF(mapped_instructions), my_inserter);
+	starts.insert(ALLOF(missed_instructions));
+}
+
+
+
+void ControlFlowGraph_t::Build(Function_t* func)
+{
+	auto starts=FindBlockStarts(func);
+
+	auto insn2block_map=map<Instruction_t*,BasicBlock_t*> ();
+
+	alloc_blocks(starts, insn2block_map);
+	build_blocks(insn2block_map);
+	/* record the entry block */
+	entry=insn2block_map[func->GetEntryPoint()];
+
+	/* most functions are done now. */
+	/* however, if a function has a (direct) side entrance, 
+	 * some code may appear unreachable and not be placed in 
+	 * a block -- here, we detect that code and create a 
+	 * new basic block for every instruction, as any may have a side entrance
+	 */ 
+	/* note:  side entrances may miss a block start */
+	/* in code that appears reachable from the entrance?! */
+	find_unblocked_instructions(starts, func);
+	alloc_blocks(starts, insn2block_map);
+	build_blocks(insn2block_map);
 
 
 }
-- 
GitLab