From e499a163db3ed845c8378aa665a2b6dab9451b63 Mon Sep 17 00:00:00 2001
From: jdh8d <jdh8d@git.zephyr-software.com>
Date: Mon, 3 Apr 2017 14:58:33 +0000
Subject: [PATCH] First, working version of the EH-iR.  enable with
 --step-option fill_in_indtargs:--split-eh-frame.

Former-commit-id: c728208252f028ce9e465c723a5a0110ab25d684
---
 libIRDB/include/core/eh.hpp           | 24 ++++---
 libIRDB/include/core/reloc.hpp        |  2 +-
 libIRDB/src/core/eh.cpp               | 11 +--
 libIRDB/src/core/fileir.cpp           | 22 +++++-
 libIRDB/test/fill_in_indtargs.cpp     | 30 ++++++--
 libIRDB/test/read_ehframe.cpp         |  7 ++
 libIRDB/test/split_eh_frame.cpp       | 98 ++++++++++++++++++---------
 libtransform/src/integertransform.cpp |  2 +-
 8 files changed, 137 insertions(+), 59 deletions(-)

diff --git a/libIRDB/include/core/eh.hpp b/libIRDB/include/core/eh.hpp
index 2d09b1c92..b99abb0dd 100644
--- a/libIRDB/include/core/eh.hpp
+++ b/libIRDB/include/core/eh.hpp
@@ -26,8 +26,13 @@ class EhProgram_t : public BaseObj_t
 {
 	public:
 
-	EhProgram_t(db_id_t id, const uint64_t caf, const int64_t daf, const uint8_t p_ptrsize)
-		: BaseObj_t(NULL), code_alignment_factor(caf), data_alignment_factor(daf), ptrsize(p_ptrsize) 
+	EhProgram_t(db_id_t id, const uint64_t caf, const int64_t daf, const uint8_t rr, const uint8_t p_ptrsize)
+		: 
+		BaseObj_t(NULL), 
+		code_alignment_factor(caf), 
+		data_alignment_factor(daf), 
+		return_register(rr), 
+		ptrsize(p_ptrsize) 
 	{ SetBaseID(id); }
 
 
@@ -43,6 +48,9 @@ class EhProgram_t : public BaseObj_t
         int64_t GetDataAlignmentFactor() const { return data_alignment_factor; }
         void SetDataAlignmentFactor(const int64_t daf) { data_alignment_factor=daf; }
 
+        int64_t GetReturnRegNumber() const { return return_register; }
+        void SetReturnRegNumber(const int64_t rr) { return_register=rr; }
+
         std::string WriteToDB(File_t* fid);    // writes to DB, ID is not -1.
 
 
@@ -52,22 +60,16 @@ class EhProgram_t : public BaseObj_t
 
 	private:
 
-
 	EhProgramListing_t cie_program;
 	EhProgramListing_t fde_program;
-        uint64_t code_alignment_factor;
-        int64_t data_alignment_factor;
+        uint8_t code_alignment_factor;
+        int8_t data_alignment_factor;
+        int8_t return_register;
 	uint8_t ptrsize; // needed for interpreting programs
 
 };
 bool operator<(const EhProgram_t&a, const EhProgram_t&b);
 
-/*
-struct EhProgramComparator_t {
-	bool operator() (const EhProgram_t* a, const EhProgram_t* b) { return *a < *b; }
-};
-*/
-
 typedef std::set<EhProgram_t*> EhProgramSet_t;
 
 class EhCallSite_t : public BaseObj_t
diff --git a/libIRDB/include/core/reloc.hpp b/libIRDB/include/core/reloc.hpp
index 37f77ccad..d8eb491dd 100644
--- a/libIRDB/include/core/reloc.hpp
+++ b/libIRDB/include/core/reloc.hpp
@@ -30,7 +30,7 @@ class Relocation_t : public BaseObj_t
         Relocation_t(db_id_t reloc_id, int _offset, std::string _type, BaseObj_t* p_wrt_obj=NULL, int32_t p_addend=0) :
 		BaseObj_t(NULL), offset(_offset), type(_type), wrt_obj(p_wrt_obj), addend(p_addend) { SetBaseID(reloc_id); }
 
-        Relocation_t(db_id_t reloc_id) : BaseObj_t(NULL), type(""), wrt_obj(NULL), addend(0) { assert(0);}          // read from DB       
+//        Relocation_t(db_id_t reloc_id) : BaseObj_t(NULL), type(""), wrt_obj(NULL), addend(0) { assert(0);}          // read from DB       
 
         void WriteToDB() { assert(0); }   // writes to DB ID is not -1.
         std::string WriteToDB(File_t* fid, BaseObj_t* insn);    // writes to DB, ID is not -1.
diff --git a/libIRDB/src/core/eh.cpp b/libIRDB/src/core/eh.cpp
index 6b2ca98d7..b09178d40 100644
--- a/libIRDB/src/core/eh.cpp
+++ b/libIRDB/src/core/eh.cpp
@@ -32,9 +32,9 @@ using namespace std;
 
 bool libIRDB::operator<(const EhProgram_t&a, const EhProgram_t&b)
 {
-        return  tie(a.cie_program,a.fde_program,a.code_alignment_factor,a.data_alignment_factor,a.ptrsize)
+        return  tie(a.cie_program,a.fde_program,a.code_alignment_factor,a.data_alignment_factor,a.return_register, a.ptrsize)
                 <
-                tie(b.cie_program,b.fde_program,b.code_alignment_factor,b.data_alignment_factor,b.ptrsize);
+                tie(b.cie_program,b.fde_program,b.code_alignment_factor,b.data_alignment_factor,b.return_register, b.ptrsize);
 }
 
 
@@ -95,11 +95,12 @@ std::string EhProgram_t::WriteToDB(File_t* fid)    // writes to DB, ID is not -1
 
 	string q;
 	q ="insert into " + fid->GetEhProgramTableName();
-	q+="(eh_pgm_id,caf,daf,ptrsize,cie_program,fde_program) "+
+	q+="(eh_pgm_id,caf,daf,return_register,ptrsize,cie_program,fde_program) "+
 		string(" VALUES (") +
 		string("'") + to_string(GetBaseID())          + string("', ") +
-		string("'") + to_string(code_alignment_factor)               + string("', ") +
-		string("'") + to_string(data_alignment_factor)               + string("', ") +
+		string("'") + to_string(+code_alignment_factor)               + string("', ") +
+		string("'") + to_string(+data_alignment_factor)               + string("', ") +
+		string("'") + to_string(+return_register)               + string("', ") +
 		string("'") + to_string(+ptrsize)               + string("', ") +
 		string("'") + encoded_cie_program               + string("', ") +
 		string("'") + encoded_fde_program               + string("') ; ");
diff --git a/libIRDB/src/core/fileir.cpp b/libIRDB/src/core/fileir.cpp
index 1e6f265ce..8c8b0f340 100644
--- a/libIRDB/src/core/fileir.cpp
+++ b/libIRDB/src/core/fileir.cpp
@@ -146,11 +146,12 @@ void FileIR_t::ReadFromDB()
 
 	ReadAllICFSFromDB(addressToInstructionMap, unresolvedICFS);
 
-	// put the scoops, instructions, and eh call sites into the object map.
+	// put the scoops, instructions, ehpgms, eh call sites into the object map.
 	// if relocs end up on other objects, we'll need to add them to.  for now only these things.
 	objMap.insert(insnMap.begin(), insnMap.end());
 	objMap.insert(scoopMap.begin(), scoopMap.end());
 	objMap.insert(ehcsMap.begin(), ehcsMap.end());
+	objMap.insert(ehpgmMap.begin(), ehpgmMap.end());
 	ReadRelocsFromDB(objMap);
 
 	UpdateEntryPoints(insnMap,entry_points);
@@ -455,11 +456,12 @@ std::map<db_id_t,EhProgram_t*> FileIR_t::ReadEhPgmsFromDB()
 		const auto eh_pgm_id=atoi(dbintr->GetResultColumn("eh_pgm_id").c_str());
 		const auto caf=atoi(dbintr->GetResultColumn("caf").c_str());
 		const auto daf=atoi(dbintr->GetResultColumn("daf").c_str());
+		const auto rr=atoi(dbintr->GetResultColumn("return_register").c_str());
 		const auto ptrsize=atoi(dbintr->GetResultColumn("ptrsize").c_str());
 		const auto& encoded_cie_program = dbintr->GetResultColumn("cie_program");
 		const auto& encoded_fde_program = dbintr->GetResultColumn("fde_program");
 
-		auto new_ehpgm=new EhProgram_t(eh_pgm_id, caf, daf, ptrsize);
+		auto new_ehpgm=new EhProgram_t(eh_pgm_id, caf, daf, rr, ptrsize);
 		decode_pgm(encoded_cie_program, new_ehpgm->GetCIEProgram());
 		decode_pgm(encoded_fde_program, new_ehpgm->GetFDEProgram());
 
@@ -816,13 +818,27 @@ void FileIR_t::WriteToDB()
 	{
 		string q = i->WriteToDB(fileptr);
 		dbintr->IssueQuery(q);
+
+		string r="";
+		for(auto& reloc : i->GetRelocations())
+		{
+			r+=reloc->WriteToDB(fileptr,i);
+		}
+		if(r!="")
+			dbintr->IssueQuery(r);
 	}
 	for(const auto& i : eh_css)
 	{
 		string q = i->WriteToDB(fileptr);
 		dbintr->IssueQuery(q);
+		string r="";
+		for(auto& reloc : i->GetRelocations())
+		{
+			r+=reloc->WriteToDB(fileptr,i);
+		}
+		if(r!="")
+			dbintr->IssueQuery(r);
 	}
-
 }
 
 
diff --git a/libIRDB/test/fill_in_indtargs.cpp b/libIRDB/test/fill_in_indtargs.cpp
index 93d8be06d..d129a80dd 100644
--- a/libIRDB/test/fill_in_indtargs.cpp
+++ b/libIRDB/test/fill_in_indtargs.cpp
@@ -2640,17 +2640,32 @@ void fill_in_indtargs(FileIR_t* firp, exeio* elfiop, std::list<virtual_offset_t>
 
 main(int argc, char* argv[])
 {
-	int argc_iter = 2;
-
-	std::list<virtual_offset_t> forced_pins;
+	auto argc_iter = (int)2;
+	auto split_eh_frame_opt=false;
+	auto forced_pins=std::list<virtual_offset_t> ();
 
 	if(argc<2)
 	{
-		cerr<<"Usage: fill_in_indtargs <id> [addr,...]"<<endl;
+		cerr<<"Usage: fill_in_indtargs <id> [--split-eh-frame] [addr,...]"<<endl;
 		exit(-1);
 	}
-	/* parse argumnets */
-	for (argc_iter = 2; argc_iter < argc; argc_iter++)
+
+	// parse dash-style options.
+	while(argc_iter < argc && argv[argc_iter][0]=='-')
+	{
+		if(string(argv[argc_iter])=="--split-eh-frame")
+		{
+			split_eh_frame_opt=true;
+			argc_iter++;
+		}
+		else
+		{
+			cerr<<"Unknown option: "<<argv[argc_iter]<<endl;
+			exit(2);
+		}
+	}
+	// parse addr argumnets 
+	for (; argc_iter < argc; argc_iter++)
 	{
 		char *end_ptr;
 		virtual_offset_t offset = strtol(argv[argc_iter], &end_ptr, 0);
@@ -2699,7 +2714,8 @@ main(int argc, char* argv[])
 
 			// find all indirect branch targets
 			fill_in_indtargs(firp, elfiop, forced_pins);
-			split_eh_frame(firp);
+			if(split_eh_frame_opt)
+				split_eh_frame(firp);
 			
 			// write the DB back and commit our changes 
 			firp->WriteToDB();
diff --git a/libIRDB/test/read_ehframe.cpp b/libIRDB/test/read_ehframe.cpp
index 4d69b5ead..b385b93c3 100644
--- a/libIRDB/test/read_ehframe.cpp
+++ b/libIRDB/test/read_ehframe.cpp
@@ -872,6 +872,13 @@ void read_ehframe(FileIR_t* virp, EXEIO::exeio* exeiop)
 	if(!elfiop)
 		return;	// skip entire analysis for non-elf files as eh-frame is way different.
 
+	const auto eh_frame_it=find_if(virp->GetDataScoops().begin(), virp->GetDataScoops().end(),
+		[](const DataScoop_t* scoop) { return scoop->GetName()==".eh_frame"; });
+
+	// either no eh_frame in the elf file, or fill_in_indtargs removed it because
+	// it was asked to import the EH IR. 
+	if(eh_frame_it==virp->GetDataScoops().end())
+		return;
 
 	int secndx=0;
 	int secnum=elfiop->sections.size(); 
diff --git a/libIRDB/test/split_eh_frame.cpp b/libIRDB/test/split_eh_frame.cpp
index ba20943bb..b0db20d0d 100644
--- a/libIRDB/test/split_eh_frame.cpp
+++ b/libIRDB/test/split_eh_frame.cpp
@@ -727,21 +727,21 @@ class eh_program_insn_t
 					case DW_CFA_advance_loc1:
 					{
 						auto loc=*(uint8_t*)(&data[pos]);
-						cur_addr+=(opcode_lower6*CAF);
+						cur_addr+=(loc*CAF);
 						return true;
 					}
 
 					case DW_CFA_advance_loc2:
 					{
 						auto loc=*(uint16_t*)(&data[pos]);
-						cur_addr+=(opcode_lower6*CAF);
+						cur_addr+=(loc*CAF);
 						return true;
 					}
 
 					case DW_CFA_advance_loc4:
 					{
 						auto loc=*(uint32_t*)(&data[pos]);
-						cur_addr+=(opcode_lower6*CAF);
+						cur_addr+=(loc*CAF);
 						return true;
 					}
 				}
@@ -860,6 +860,8 @@ class cie_contents_t : eh_frame_util_t<ptrsize>
 	const eh_program_t<ptrsize>& GetProgram() const { return eh_pgm; }
 	uint64_t GetCAF() const { return code_alignment_factor; }
 	int64_t GetDAF() const { return data_alignment_factor; }
+	uint64_t GetPersonality() const { return personality; }
+	uint64_t GetReturnRegister() const { return return_address_register_column; }
 
 	string GetAugmentation() const { return augmentation; }
 	uint8_t GetLSDAEncoding() const { return lsda_encoding;}
@@ -1273,12 +1275,18 @@ class lsda_call_site_t : private eh_frame_util_t<ptrsize>
 						wrt=firp->FindScoop(type_table.at(index).GetTypeInfoPointer());
 						assert(wrt);
 					}
-					auto offset=0;
+					const auto offset=0;
+					auto addend=0;
 					if(wrt!=NULL) 
-						type_table.at(index).GetTypeInfoPointer()-wrt->GetStart()->GetVirtualOffset();
-					auto newreloc=new Relocation_t(BaseObj_t::NOT_IN_DATABASE, offset, "type_table_entry", wrt, 0);
+						addend=type_table.at(index).GetTypeInfoPointer()-wrt->GetStart()->GetVirtualOffset();
+					auto newreloc=new Relocation_t(BaseObj_t::NOT_IN_DATABASE, offset, "type_table_entry", wrt, addend);
 					new_ehcs->GetRelocations().insert(newreloc);
 					firp->GetRelocations().insert(newreloc);
+
+					if(wrt==NULL)
+						cout<<"Catch all in action table"<<endl;
+					else
+						cout<<"Catch for type at "<<wrt->GetName()<<"+0x"<<hex<<addend<<"."<<endl;
 				}
 				else if(action<0)
 				{
@@ -1751,14 +1759,18 @@ class split_eh_frame_impl_t : public split_eh_frame_t
 
 	void build_ir() const
 	{
+		typedef pair<EhProgram_t*,uint64_t> whole_pgm_t;
+
 		auto reusedpgms=size_t(0);
 		struct EhProgramComparator_t { 
-			bool operator() (const EhProgram_t* a, const EhProgram_t* b) { return *a < *b; } 
+//			bool operator() (const EhProgram_t* a, const EhProgram_t* b) { return *a < *b; } 
+			bool operator() (const whole_pgm_t& a, const whole_pgm_t& b) 
+			{ return tie(*a.first, a.second) < tie(*b.first,b.second); } 
 		};
 
 		// this is used to avoid adding duplicate entries to the program's IR, it allows a lookup by value
 		// instead of the IR's set which allows duplicates.
-		auto eh_program_cache = set<EhProgram_t*, EhProgramComparator_t>();
+		auto eh_program_cache = set<whole_pgm_t, EhProgramComparator_t>();
 
 		// find the right cie and fde, and build the IR from those for this instruction.
 		auto build_ir_insn=[&](Instruction_t* insn) -> void
@@ -1782,6 +1794,8 @@ class split_eh_frame_impl_t : public split_eh_frame_t
 				const auto fde_addr=fie_it->GetFDEStartAddress();
 				const auto caf=fie_it->GetCIE().GetCAF(); 
 				const auto daf=fie_it->GetCIE().GetDAF(); 
+				const auto return_reg=fie_it->GetCIE().GetReturnRegister(); 
+				const auto personality=fie_it->GetCIE().GetPersonality(); 
 				const auto insn_addr=insn->GetAddress()->GetVirtualOffset();
 
 				auto import_pgm = [&](EhProgramListing_t& out_pgm, const eh_program_t<ptrsize> in_pgm) -> void
@@ -1813,7 +1827,7 @@ class split_eh_frame_impl_t : public split_eh_frame_t
 
 				// build an eh program on the stack;
 
-				EhProgram_t ehpgm(BaseObj_t::NOT_IN_DATABASE,caf,daf,ptrsize);
+				auto ehpgm=EhProgram_t(BaseObj_t::NOT_IN_DATABASE,caf,daf,return_reg, ptrsize);
 				import_pgm(ehpgm.GetCIEProgram(), fie_it->GetCIE().GetProgram());
 				import_pgm(ehpgm.GetFDEProgram(), fie_it->GetProgram());
 
@@ -1821,11 +1835,11 @@ class split_eh_frame_impl_t : public split_eh_frame_t
 				if(getenv("EHIR_VERBOSE")!=NULL)
 					ehpgm.print();
 				// see if we've already built this one.
-				auto ehpgm_it = eh_program_cache.find(&ehpgm) ;
+				auto ehpgm_it = eh_program_cache.find(whole_pgm_t(&ehpgm, personality)) ;
 				if(ehpgm_it != eh_program_cache.end())
 				{
 					// yes, use the cached program.
-					insn->SetEhProgram(*ehpgm_it);
+					insn->SetEhProgram(ehpgm_it->first);
 					if(getenv("EHIR_VERBOSE")!=NULL)
 						cout<<"Re-using existing Program!"<<endl;
 					reusedpgms++;
@@ -1833,19 +1847,44 @@ class split_eh_frame_impl_t : public split_eh_frame_t
 				else /* doesn't yet exist! */
 				{
 					
-					// allocate in the heap so we can give it to the IR.
 					if(getenv("EHIR_VERBOSE")!=NULL)
 						cout<<"Allocating new Program!"<<endl;
-					EhProgram_t* newehpgm=new EhProgram_t(ehpgm); // copy constructor
 
-					// add to the IR
+					// allocate a new pgm in the heap so we can give it to the IR.
+					auto newehpgm=new EhProgram_t(ehpgm); // copy constructor
+					assert(newehpgm);
 					firp->GetAllEhPrograms().insert(newehpgm);
 
+					// allocate a relocation for the personality and give it to the IR.	
+					auto personality_scoop=firp->FindScoop(personality);
+					auto personality_insn_it=offset_to_insn_map.find(personality);
+					auto personality_insn=personality_insn_it==offset_to_insn_map.end() ? (Instruction_t*)NULL : personality_insn_it->second;
+					auto personality_obj = personality_scoop ? (BaseObj_t*)personality_scoop : (BaseObj_t*)personality_insn;
+					auto addend= personality_scoop ? personality - personality_scoop->GetStart()->GetVirtualOffset() : 0;
+					auto newreloc=new Relocation_t(BaseObj_t::NOT_IN_DATABASE, 0, "personality", personality_obj, addend);
+					assert(personality==0 || personality_obj!=NULL);
+					assert(newreloc);	
+
+					if(personality_obj==NULL)
+						cout<<"Null personality obj: 0x"<<hex<<personality<<endl;
+					else if(personality_scoop)
+						cout<<"Found personality scoop: 0x"<<hex<<personality<<" -> "
+						    <<personality_scoop->GetName()<<"+0x"<<hex<<addend<<endl;
+					else if(personality_insn)
+						cout<<"Found personality insn: 0x"<<hex<<personality<<" -> "
+						    <<personality_insn->GetBaseID()<<":"<<personality_insn->getDisassembly()<<endl;
+					else
+						assert(0);
+
+					newehpgm->GetRelocations().insert(newreloc);
+					firp->GetRelocations().insert(newreloc);
+
+
 					// record for this insn
 					insn->SetEhProgram(newehpgm);
 
 					// update cache.
-					eh_program_cache.insert(newehpgm);
+					eh_program_cache.insert(whole_pgm_t(newehpgm,personality));
 				}
 				
 				// build the IR from the FDE.
@@ -1863,19 +1902,6 @@ class split_eh_frame_impl_t : public split_eh_frame_t
 			
 		};
 
-		//for_each(firp->GetInstructions().begin(), firp->GetInstructions().end(), [&](Instruction_t* i)
-		//{
-		//	build_ir_insn(i);
-		//});
-		for(Instruction_t* i : firp->GetInstructions())
-		{
-			build_ir_insn(i);
-		}
-
-		cout<<"#ATTRIBUTE total_eh_programs_created="<<dec<<firp->GetAllEhPrograms().size()<<endl;
-		cout<<"#ATTRIBUTE total_eh_programs_reused="<<dec<<reusedpgms<<endl;
-		cout<<"#ATTRIBUTE total_eh_programs="<<dec<<firp->GetAllEhPrograms().size()+reusedpgms<<endl;
-
 
 		auto remove_reloc=[&](Relocation_t* r) -> void
 		{
@@ -1901,10 +1927,20 @@ class split_eh_frame_impl_t : public split_eh_frame_t
 			delete s;
 		};
 
+		for(Instruction_t* i : firp->GetInstructions())
+		{
+			build_ir_insn(i);
+		}
+
+		cout<<"#ATTRIBUTE total_eh_programs_created="<<dec<<firp->GetAllEhPrograms().size()<<endl;
+		cout<<"#ATTRIBUTE total_eh_programs_reused="<<dec<<reusedpgms<<endl;
+		cout<<"#ATTRIBUTE total_eh_programs="<<dec<<firp->GetAllEhPrograms().size()+reusedpgms<<endl;
+
+
 		// will put back in a min, removing for commit
-		//remove_scoop(eh_frame_scoop);
-		//remove_scoop(eh_frame_hdr_scoop);
-		//remove_scoop(gcc_except_table_scoop);
+		remove_scoop(eh_frame_scoop);
+		remove_scoop(eh_frame_hdr_scoop);
+		remove_scoop(gcc_except_table_scoop);
 	
 	}
 };
diff --git a/libtransform/src/integertransform.cpp b/libtransform/src/integertransform.cpp
index 3811ffcbc..04417d1af 100644
--- a/libtransform/src/integertransform.cpp
+++ b/libtransform/src/integertransform.cpp
@@ -27,7 +27,7 @@
  * Find the first occurrence of find in s, ignore case.
  */
 static char *
-my_strcasestr(const char* s, char *find)
+my_strcasestr(const char* s, const char *find)
 {
         char c, sc;
         size_t len;
-- 
GitLab