From 7f33c215b366656465f8eea6c4e2962db519f9d0 Mon Sep 17 00:00:00 2001
From: Anh Nguyen-Tuong <zenpoems@gmail.com>
Date: Sat, 15 Dec 2018 22:26:48 -0500
Subject: [PATCH] Self validate

Former-commit-id: f694199159725b54e8460705bb503187bcd59fea
---
 libIRDB/test/fill_in_cfg.cpp      | 21 +++++++-
 libIRDB/test/fill_in_cfg.hpp      | 20 +++++---
 libIRDB/test/fill_in_indtargs.cpp | 11 +++--
 libIRDB/test/fix_calls.cpp        | 80 +++++++++++++++----------------
 tools/meds2pdb/meds2pdb.cpp       |  6 +++
 tools/rida/rida.cpp               |  5 ++
 6 files changed, 91 insertions(+), 52 deletions(-)

diff --git a/libIRDB/test/fill_in_cfg.cpp b/libIRDB/test/fill_in_cfg.cpp
index 901638f12..905f3bd53 100644
--- a/libIRDB/test/fill_in_cfg.cpp
+++ b/libIRDB/test/fill_in_cfg.cpp
@@ -106,7 +106,10 @@ void PopulateCFG::set_fallthrough
 
 	/* set the target for this insn */
 	if(fallthrough_insn!=0)
+	{
+		fallthroughs_set++;
 		insn->SetFallthrough(fallthrough_insn);
+	}
 	else
 		missed_instructions.insert(pair<db_id_t,virtual_offset_t>(insn->GetAddress()->GetFileID(),virtual_offset));
 }
@@ -180,7 +183,10 @@ void PopulateCFG::set_target
 
 		/* set the target for this insn */
 		if(target_insn!=0)
+		{
+			targets_set++;
 			insn->SetTarget(target_insn);
+		}
 		else
 			missed_instructions.insert( pair<db_id_t,virtual_offset_t>(insn->GetAddress()->GetFileID(),virtual_offset));
 
@@ -510,6 +516,7 @@ void PopulateCFG::fill_in_scoops(FileIR_t *firp)
 
 		bool is_relro=is_in_relro_segment(secndx);
 		DataScoop_t *newscoop=new DataScoop_t(max_base_id++, name, startaddr, endaddr, NULL, permissions, is_relro, the_contents);
+		scoops_detected++;
 		assert(newscoop);
 		firp->GetDataScoops().insert(newscoop);
 
@@ -650,8 +657,20 @@ int PopulateCFG::executeStep(IRDBObjects_t *const irdb_objects)
 		cerr<<"Unexpected error"<<endl;
 		return -1;
 	}
+
+        cout<<"#ATTRIBUTE targets_set="<<targets_set<<endl;
+        cout<<"#ATTRIBUTE fallthroughs_set="<<fallthroughs_set<<endl;
+        cout<<"#ATTRIBUTE scoops_detected="<<scoops_detected<<endl;
+
+	if(getenv("SELF_VALIDATE"))
+	{
+		assert(targets_set > 10);
+		assert(fallthroughs_set > 100);
+		assert(scoops_detected > 5 );
+	}
+
     
-    return 0;
+	return 0;
 }
 
 
diff --git a/libIRDB/test/fill_in_cfg.hpp b/libIRDB/test/fill_in_cfg.hpp
index 4925bafda..5beb37871 100644
--- a/libIRDB/test/fill_in_cfg.hpp
+++ b/libIRDB/test/fill_in_cfg.hpp
@@ -16,12 +16,16 @@ class PopulateCFG : public libIRDB::Transform_SDK::TransformStep_t
             variant_id(p_variant_id),
             fix_landing_pads(p_fix_landing_pads)
         {
-            odd_target_count = 0;
-            bad_target_count = 0;
-            bad_fallthrough_count = 0;
-            failed_target_count = 0U;
-       
-  	    elfiop = std::unique_ptr<EXEIO::exeio>(nullptr);
+		odd_target_count = 0;
+		bad_target_count = 0;
+		bad_fallthrough_count = 0;
+		failed_target_count = 0U;
+
+		targets_set=0;
+		fallthroughs_set=0;
+		scoops_detected=0;
+
+		elfiop = std::unique_ptr<EXEIO::exeio>(nullptr);
         }
 
 	~PopulateCFG(void) override
@@ -74,6 +78,10 @@ class PopulateCFG : public libIRDB::Transform_SDK::TransformStep_t
         int bad_target_count;
         int bad_fallthrough_count;
         unsigned int failed_target_count;
+
+	size_t targets_set=0;
+	size_t fallthroughs_set=0;
+	size_t scoops_detected=0;
         
         // non-optional
 	libIRDB::db_id_t variant_id;        
diff --git a/libIRDB/test/fill_in_indtargs.cpp b/libIRDB/test/fill_in_indtargs.cpp
index d4955e8fa..34225d0f4 100644
--- a/libIRDB/test/fill_in_indtargs.cpp
+++ b/libIRDB/test/fill_in_indtargs.cpp
@@ -66,10 +66,6 @@ public:
  * class variables 
  */
 
-//
-// record the ICFS for each branch, these can come from switch tables
-// 
-map<Instruction_t*, ICFS_t> icfs_maps;
 
 // the bounds of the executable sections in the pgm.
 set< pair <virtual_offset_t,virtual_offset_t>  > bounds;
@@ -3058,6 +3054,13 @@ int executeStep(IRDBObjects_t *const irdb_objects)
                 cerr<<"Unexpected error"<<endl;
                 return -1;
         }
+
+	assert(getenv("SELF_VALIDATE")==nullptr || bounds.size() > 3 );
+	assert(getenv("SELF_VALIDATE")==nullptr || targets.size() > 100 );
+	assert(getenv("SELF_VALIDATE")==nullptr || ranges.size() > 5 );
+	assert(getenv("SELF_VALIDATE")==nullptr || preds.size() > 100 );
+	assert(getenv("SELF_VALIDATE")==nullptr || lookupInstructionMap.size() > 100 );
+
 	return 0;
 }
 
diff --git a/libIRDB/test/fix_calls.cpp b/libIRDB/test/fix_calls.cpp
index 84197c341..a513deb8a 100644
--- a/libIRDB/test/fix_calls.cpp
+++ b/libIRDB/test/fix_calls.cpp
@@ -78,15 +78,20 @@ using RangeSet_t = std::set<Range_t, Range_tCompare>;
 
 
 RangeSet_t eh_frame_ranges;
-long long no_target_insn=0;
-long long no_fallthrough_insn=0;
-long long target_not_in_function=0;
-long long call_to_not_entry=0;
-long long thunk_check=0;
-long long found_pattern=0;
-long long in_ehframe=0;
-long long no_fix_for_ib=0;
-long long no_fix_for_safefn=0;
+size_t no_target_insn=0;
+size_t no_fallthrough_insn=0;
+size_t target_not_in_function=0;
+size_t call_to_not_entry=0;
+size_t thunk_check=0;
+size_t found_pattern=0;
+size_t in_ehframe=0;
+size_t no_fix_for_ib=0;
+size_t no_fix_for_safefn=0;
+size_t other_fixes=0;
+size_t fixed_calls=0;
+size_t not_fixed_calls=0;
+size_t not_calls=0;
+
 bool opt_fix_icalls = false;
 bool opt_fix_safefn = true;
 
@@ -724,7 +729,7 @@ void mark_as_unpinned_ibt(FileIR_t* firp, Instruction_t* ret_point)
 // fix_all_calls - convert calls to push/jump pairs in the IR.  if fix_all is true, all calls are converted, 
 // else we attempt to detect the calls it is safe to convert.
 //
-void fix_all_calls(FileIR_t* firp, bool print_stats, bool fix_all)
+void fix_all_calls(FileIR_t* firp, bool fix_all)
 {
 
         set<Instruction_t*,insn_less<Instruction_t*> > sorted_insns;
@@ -739,7 +744,6 @@ void fix_all_calls(FileIR_t* firp, bool print_stats, bool fix_all)
                 sorted_insns.insert(insn);
         }
 
-	long long fixed_calls=0, not_fixed_calls=0, not_calls=0;
 
 	for(
 		set<Instruction_t*,insn_less<Instruction_t*> >::const_iterator it=sorted_insns.begin();
@@ -749,7 +753,7 @@ void fix_all_calls(FileIR_t* firp, bool print_stats, bool fix_all)
 	{
 
 		Instruction_t* insn=*it;
-		if(getenv("STOP_FIX_CALLS_AT") && fixed_calls>=atoi(getenv("STOP_FIX_CALLS_AT")))
+		if(getenv("STOP_FIX_CALLS_AT") && fixed_calls>=(size_t)atoi(getenv("STOP_FIX_CALLS_AT")))
 			break;
 
 		if(is_call(insn)) 
@@ -761,7 +765,7 @@ void fix_all_calls(FileIR_t* firp, bool print_stats, bool fix_all)
 			}
 			// we've been asked to fix all calls for funsies/cfi
 			// (and a bit about debugging fix-calls that's not important for anyone but jdh.
-			else if ( fix_all || (getenv("FIX_CALL_LIMIT") && not_fixed_calls>=atoi(getenv("FIX_CALL_LIMIT"))))
+			else if ( fix_all || (getenv("FIX_CALL_LIMIT") && not_fixed_calls>=(size_t)atoi(getenv("FIX_CALL_LIMIT"))))
 			{
 				bool fix_me = true;
 				if (!opt_fix_safefn && can_skip_safe_function(insn))
@@ -799,33 +803,21 @@ void fix_all_calls(FileIR_t* firp, bool print_stats, bool fix_all)
 		}
 	}
 
-
-	if(print_stats)
-	{
-		cout << "# ATTRIBUTE fix_calls::fixed_calls="<<std::dec<<fixed_calls<<endl;
-		cout << "# ATTRIBUTE fix_calls::no_fix_needed_calls="<<std::dec<<not_fixed_calls<<endl;
-		cout << "# ATTRIBUTE fix_calls::other_instructions="<<std::dec<<not_calls<<endl;
-		cout << "# ATTRIBUTE fix_calls::fixed_pct="<<std::fixed<<(((float)fixed_calls)/((float)(not_fixed_calls+fixed_calls+not_calls)))*100.00<<"%"<<endl;
-		cout << "# ATTRIBUTE fix_calls::remaining_ratio="<<std::fixed<<((float)not_fixed_calls/((float)(not_fixed_calls+fixed_calls+not_calls)))*100.00<<"%"<<endl;
-		cout << "# ATTRIBUTE fix_calls::other_insts_ratio="<<std::fixed<<((float)not_calls/((float)(not_fixed_calls+fixed_calls+not_calls)))*100.00<<"%"<<endl;
-		cout << "# ATTRIBUTE fix_calls::no_target_insn="<<std::dec<< no_target_insn << endl;
-		cout << "# ATTRIBUTE fix_calls::no_fallthrough_insn="<<std::dec<< no_fallthrough_insn << endl;
-		cout << "# ATTRIBUTE fix_calls::target_not_in_function="<<std::dec<< target_not_in_function << endl;
-		cout << "# ATTRIBUTE fix_calls::call_to_not_entry="<<std::dec<< call_to_not_entry << endl;
-		cout << "# ATTRIBUTE fix_calls::thunk_check="<<std::dec<< thunk_check << endl;
-		cout << "# ATTRIBUTE fix_calls::found_pattern="<<std::dec<< found_pattern << endl;
-		cout << "# ATTRIBUTE fix_calls::in_ehframe="<<std::dec<< in_ehframe << endl;
-		cout << "# ATTRIBUTE fix_calls::no_fix_for_ib="<<std::dec<< no_fix_for_ib << endl;
-		cout << "# ATTRIBUTE fix_calls::no_fix_for_safefn="<<std::dec<< no_fix_for_safefn << endl;
-		no_target_insn=0;
-		no_fallthrough_insn=0;
-		target_not_in_function=0;
-		call_to_not_entry=0;
-		thunk_check=0;
-		found_pattern=0;
-		in_ehframe=0;
-		no_fix_for_ib=0;
-	}
+	cout << "# ATTRIBUTE fix_calls::fixed_calls="<<std::dec<<fixed_calls<<endl;
+	cout << "# ATTRIBUTE fix_calls::no_fix_needed_calls="<<std::dec<<not_fixed_calls<<endl;
+	cout << "# ATTRIBUTE fix_calls::other_instructions="<<std::dec<<not_calls<<endl;
+	cout << "# ATTRIBUTE fix_calls::fixed_pct="<<std::fixed<<(((float)fixed_calls)/((float)(not_fixed_calls+fixed_calls+not_calls)))*100.00<<"%"<<endl;
+	cout << "# ATTRIBUTE fix_calls::remaining_ratio="<<std::fixed<<((float)not_fixed_calls/((float)(not_fixed_calls+fixed_calls+not_calls)))*100.00<<"%"<<endl;
+	cout << "# ATTRIBUTE fix_calls::other_insts_ratio="<<std::fixed<<((float)not_calls/((float)(not_fixed_calls+fixed_calls+not_calls)))*100.00<<"%"<<endl;
+	cout << "# ATTRIBUTE fix_calls::no_target_insn="<<std::dec<< no_target_insn << endl;
+	cout << "# ATTRIBUTE fix_calls::no_fallthrough_insn="<<std::dec<< no_fallthrough_insn << endl;
+	cout << "# ATTRIBUTE fix_calls::target_not_in_function="<<std::dec<< target_not_in_function << endl;
+	cout << "# ATTRIBUTE fix_calls::call_to_not_entry="<<std::dec<< call_to_not_entry << endl;
+	cout << "# ATTRIBUTE fix_calls::thunk_check="<<std::dec<< thunk_check << endl;
+	cout << "# ATTRIBUTE fix_calls::found_pattern="<<std::dec<< found_pattern << endl;
+	cout << "# ATTRIBUTE fix_calls::in_ehframe="<<std::dec<< in_ehframe << endl;
+	cout << "# ATTRIBUTE fix_calls::no_fix_for_ib="<<std::dec<< no_fix_for_ib << endl;
+	cout << "# ATTRIBUTE fix_calls::no_fix_for_safefn="<<std::dec<< no_fix_for_safefn << endl;
 }
 
 
@@ -908,6 +900,8 @@ void fix_other_pcrel(FileIR_t* firp, Instruction_t *insn, uintptr_t virt_offset)
 		insn->GetRelocations().insert(reloc);
 		firp->GetRelocations().insert(reloc);
 
+		other_fixes++;
+
 		disasm=DecodedInstruction_t(insn);
 		if(getenv("VERBOSE_FIX_CALLS"))
 			cout<<" Converted to: "<<disasm.getDisassembly() /*CompleteInstr*/<<endl;
@@ -950,6 +944,7 @@ void fix_other_pcrel(FileIR_t* firp)
 		fix_other_pcrel(firp,insn, insn->GetAddress()->GetVirtualOffset());
 		fix_safefr(firp,insn, insn->GetAddress()->GetVirtualOffset());
 	}
+	cout << "# ATTRIBUTE fix_calls::other_fixes="<<std::dec<<other_fixes<<endl;
 }
 
 //
@@ -1063,7 +1058,7 @@ int executeStep(IRDBObjects_t *const irdb_objects)
 			if(do_eh_frame)
         			read_ehframe(firp, elfiop);
 
-			fix_all_calls(firp,true,fix_all);
+			fix_all_calls(firp,fix_all);
 			fix_other_pcrel(firp);
 
 			cout<<"Done!"<<endl;
@@ -1081,6 +1076,9 @@ int executeStep(IRDBObjects_t *const irdb_objects)
                 return -1;
         }
 
+	assert(getenv("SELF_VALIDATE")==nullptr || (fixed_calls + other_fixes) > 5);
+	assert(getenv("SELF_VALIDATE")==nullptr || not_fixed_calls > 5);
+
 	return 0;
 }
 
diff --git a/tools/meds2pdb/meds2pdb.cpp b/tools/meds2pdb/meds2pdb.cpp
index 8b56919da..3eb03a6e1 100644
--- a/tools/meds2pdb/meds2pdb.cpp
+++ b/tools/meds2pdb/meds2pdb.cpp
@@ -96,6 +96,8 @@ void insert_instructions(int fileID, const vector<wahoo::Instruction*> &instruct
 	// for each instruction:
 	//    populate instruction table
 
+	assert(getenv("SELF_VALIDATE")==nullptr || instructions.size() > 0 );
+
 	pqxx::tablewriter W_addrs(txn,addressTable);
 	for (auto i = 0U; i < instructions.size(); i ++ )
 	{
@@ -188,6 +190,8 @@ void insert_functions(int fileID, const vector<wahoo::Function*> &functions  )
   work txn(conn);
   txn.exec("SET client_encoding='LATIN1';");
 
+  assert(getenv("SELF_VALIDATE")==nullptr || functions.size() > 0 );
+
   // bulk insert of function information into the DB
   for (auto i = 0U; i < functions.size(); i += STRIDE)
   {  
@@ -233,6 +237,8 @@ void update_functions(int fileID, const vector<wahoo::Function*> &functions  )
   work txn(conn);
   txn.exec("SET client_encoding='LATIN1';");
 
+  assert(getenv("SELF_VALIDATE")==nullptr || functions.size() > 0 );
+
   // bulk insert of function information into the DB
   string query;
   for (auto i = 0U; i < functions.size(); i += STRIDE )
diff --git a/tools/rida/rida.cpp b/tools/rida/rida.cpp
index 9220e6cdc..08f9a9b1d 100644
--- a/tools/rida/rida.cpp
+++ b/tools/rida/rida.cpp
@@ -221,6 +221,9 @@ class CreateFunctions_t
 				sccs.insert(RangeSet_t({Range_t(fde->getStartAddress(),fde->getEndAddress())}));
 
 			cout<<hex;
+			if(getenv("SELF_VALIDATE"))
+				assert(fdes->size()>0);
+
 			for(const auto fde : *fdes)
 			{
 				if(verbose)
@@ -453,6 +456,8 @@ class CreateFunctions_t
 				outfile<<hex<<"\t"<<min.first<<"\t"<<dec<<size<<"\tFUNC GLOBAL\t"<<funcNames[scc]<<" "<< usefp << endl;
 				doBelongTos(scc);
 			}
+			if(getenv("SELF_VALIDATE"))
+				assert(sccs.size()>0);
 		}
 
 		string getUseFp(const RangeSet_t scc)
-- 
GitLab