From dbcc913071d9ee52f9151d14e1c8b667ca5628b5 Mon Sep 17 00:00:00 2001
From: jdh8d <jdh8d@git.zephyr-software.com>
Date: Thu, 18 Aug 2011 20:00:24 +0000
Subject: [PATCH] Fixes to record the function entry point

---
 Makefile                      |   1 +
 libIRDB/include/function.hpp  |   9 +-
 libIRDB/include/variantir.hpp |  19 ++--
 libIRDB/src/function.cpp      |  22 ++---
 libIRDB/src/variantid.cpp     |   4 +-
 libIRDB/src/variantir.cpp     |  59 +++++++++---
 tools/meds2pdb/meds2pdb.cpp   | 169 ++++++++++++++++++++++------------
 xform/elfreader.cpp           |   3 +-
 8 files changed, 189 insertions(+), 97 deletions(-)

diff --git a/Makefile b/Makefile
index 3a6fad0d4..4f8beac89 100644
--- a/Makefile
+++ b/Makefile
@@ -3,6 +3,7 @@ all:
 	cd beaengine; make all
 	cd libIRDB; make all
 	cd xform; make all
+	cd libIRDB; make all
 	cd tools; make all
 
 clean:
diff --git a/libIRDB/include/function.hpp b/libIRDB/include/function.hpp
index a8ec98381..c1dd15b72 100644
--- a/libIRDB/include/function.hpp
+++ b/libIRDB/include/function.hpp
@@ -5,7 +5,8 @@ class Function_t : public BaseObj_t
 	
 	Function_t() : BaseObj_t(NULL) {}	// create a new function not in the db 
 
-	Function_t(db_id_t id, std::string name, int size, int oa_size, bool use_fp, File_t *file);	// create a function that's already in the DB  
+	// create a function that's already in the DB  
+	Function_t(db_id_t id, std::string name, int size, int oa_size, bool use_fp, Instruction_t *entry);	
 
         std::set<Instruction_t*>& GetInstructions() { return my_insns; }
 
@@ -17,8 +18,8 @@ class Function_t : public BaseObj_t
         void SetName(std::string newname)	 { name=newname; }
         void SetOutArgsRegionSize(int oa_size) {out_args_region_size=oa_size;}
 
-	void SetFile(File_t* file) {my_file=file;}
-	File_t* GetFile() { return my_file;}
+	void SetEntryPoint(Instruction_t *insn) {entry_point=insn;}
+	Instruction_t* GetEntryPoint() { return entry_point;}
 
 	void WriteToDB();		// we need the variant ID to write into a program.
 	std::string WriteToDB(VariantID_t *vid, db_id_t newid);
@@ -28,7 +29,7 @@ class Function_t : public BaseObj_t
 
 
     private:
-	File_t *my_file;
+	Instruction_t *entry_point;
         std::set<Instruction_t*> my_insns;
         int stack_frame_size;
         std::string name;
diff --git a/libIRDB/include/variantir.hpp b/libIRDB/include/variantir.hpp
index bf7bf2ce6..cedbb8839 100644
--- a/libIRDB/include/variantir.hpp
+++ b/libIRDB/include/variantir.hpp
@@ -41,13 +41,18 @@ class VariantIR_t : public BaseObj_t
         VariantID_t progid;
 
 	std::map<db_id_t,File_t*> ReadFilesFromDB();
-	std::map<db_id_t,Function_t*> ReadFuncsFromDB ( 	std::map<db_id_t,File_t*> fileMap) ;
-	std::map<db_id_t,AddressID_t*> ReadAddrsFromDB  ( 	std::map<db_id_t,File_t*> fileMap, 
-									std::map<db_id_t,Function_t*> funcMap) ;
-	std::map<db_id_t,Instruction_t*> ReadInsnsFromDB (	std::map<db_id_t,File_t*> fileMap, 
-									std::map<db_id_t,Function_t*> funcMap,
-									std::map<db_id_t,AddressID_t*> addrMap
-									) ;
+	std::map<db_id_t,AddressID_t*> ReadAddrsFromDB(std::map<db_id_t,File_t*> fileMap);
+	std::map<db_id_t,Function_t*> ReadFuncsFromDB
+		(
+		 	std::map<db_id_t,File_t*> fileMap,
+			std::map<db_id_t,AddressID_t*> addrMap
+		);
+	std::map<db_id_t,Instruction_t*> ReadInsnsFromDB 
+		(	
+			std::map<db_id_t,File_t*> fileMap, 
+			std::map<db_id_t,Function_t*> funcMap,
+			std::map<db_id_t,AddressID_t*> addrMap
+		) ;
 
 };
 
diff --git a/libIRDB/src/function.cpp b/libIRDB/src/function.cpp
index 2bc9d8dff..bc27e40f0 100644
--- a/libIRDB/src/function.cpp
+++ b/libIRDB/src/function.cpp
@@ -6,8 +6,8 @@ using namespace std;
 
 
 
-Function_t::Function_t(db_id_t id, std::string myname, int size, int oa_size, bool useFP, File_t* file)
-	: BaseObj_t(NULL), my_file(file)
+Function_t::Function_t(db_id_t id, std::string myname, int size, int oa_size, bool useFP, Instruction_t* entry)
+	: BaseObj_t(NULL), entry_point(entry)
 {
 	SetBaseID(id);
 	name=myname;
@@ -25,21 +25,21 @@ void Function_t::WriteToDB()
 string Function_t::WriteToDB(VariantID_t *vid, db_id_t newid)
 {
 	assert(vid);
-	assert(my_file);
+	assert(entry_point);
 
 	if(GetBaseID()==NOT_IN_DATABASE)
 		SetBaseID(newid);
 
 	string q=string("insert into ")+vid->function_table_name + 
-		string(" (function_id, file_id, name, stack_frame_size, out_args_region_size, use_frame_pointer, doip_id) ")+
+		string(" (function_id, entry_point_id, name, stack_frame_size, out_args_region_size, use_frame_pointer, doip_id) ")+
 		string(" VALUES (") + 
-		string("'") + to_string(GetBaseID()) 		+ string("', ") + 
-		string("'") + to_string(my_file->GetBaseID()) 	+ string("', ") + 
-		string("'") + name 				+ string("', ") + 
-		string("'") + to_string(stack_frame_size) 	+ string("', ") + 
-	        string("'") + to_string(out_args_region_size) 	+ string("', ") + 
-	        string("'") + to_string(use_fp) 	+ string("', ") + 
-		string("'") + to_string(GetDoipID()) 		+ string("') ; ") ;
+		string("'") + to_string(GetBaseID()) 		  + string("', ") + 
+		string("'") + to_string(entry_point->GetBaseID()) + string("', ") + 
+		string("'") + name 				  + string("', ") + 
+		string("'") + to_string(stack_frame_size) 	  + string("', ") + 
+	        string("'") + to_string(out_args_region_size) 	  + string("', ") + 
+	        string("'") + to_string(use_fp) 		  + string("', ") + 
+		string("'") + to_string(GetDoipID()) 		  + string("') ; ") ;
 
 	return q;
 }
diff --git a/libIRDB/src/variantid.cpp b/libIRDB/src/variantid.cpp
index 28c98e2ae..6019ffa10 100644
--- a/libIRDB/src/variantid.cpp
+++ b/libIRDB/src/variantid.cpp
@@ -58,8 +58,8 @@ void VariantID_t::CreateTables()
   		"address_id                integer REFERENCES " + address_table_name + ", " +
   		"parent_function_id        integer, "
   		"orig_address_id           integer, "
-  		"fallthrough_address_id    integer, "
-  		"target_address_id         integer, "
+  		"fallthrough_address_id    integer DEFAULT -1, "
+  		"target_address_id         integer DEFAULT -1, "
   		"data                      bytea, "
   		"callback                  text, "
   		"comment                   text, "
diff --git a/libIRDB/src/variantir.cpp b/libIRDB/src/variantir.cpp
index 5ca4f6d8a..1eb8050cb 100644
--- a/libIRDB/src/variantir.cpp
+++ b/libIRDB/src/variantir.cpp
@@ -6,6 +6,26 @@
 using namespace libIRDB;
 using namespace std;
 
+static map<Function_t*,db_id_t> entry_points;
+
+
+static void UpdateEntryPoints(std::map<db_id_t,Instruction_t*> 	&insnMap)
+{
+	/* for each function, look up the instruction that's the entry point */
+	for(	static map<Function_t*,db_id_t>::const_iterator it=entry_points.begin();
+		it!=entry_points.end();
+		++it
+	   )
+	{
+		Function_t* func=(*it).first;
+		db_id_t func_entry_id=(*it).second;
+
+		assert(insnMap[func_entry_id]);
+		func->SetEntryPoint(insnMap[func_entry_id]);
+	}
+		
+}
+
 // Create a Variant from the database
 VariantIR_t::VariantIR_t(VariantID_t newprogid) : BaseObj_t(NULL)
 {
@@ -19,10 +39,14 @@ VariantIR_t::VariantIR_t(VariantID_t newprogid) : BaseObj_t(NULL)
 // DB operations
 void VariantIR_t::ReadFromDB()
 {
+	entry_points.clear();
+
 	std::map<db_id_t,File_t*>	fileMap=ReadFilesFromDB();
-	std::map<db_id_t,Function_t*> 	funcMap=ReadFuncsFromDB(fileMap);
-	std::map<db_id_t,AddressID_t*> 	addrMap=ReadAddrsFromDB(fileMap,funcMap);
+	std::map<db_id_t,AddressID_t*> 	addrMap=ReadAddrsFromDB(fileMap);
+	std::map<db_id_t,Function_t*> 	funcMap=ReadFuncsFromDB(fileMap, addrMap);
 	std::map<db_id_t,Instruction_t*> 	insnMap=ReadInsnsFromDB(fileMap,funcMap,addrMap);
+
+	UpdateEntryPoints(insnMap);
 }
 
 std::map<db_id_t,File_t*> VariantIR_t::ReadFilesFromDB()
@@ -64,14 +88,14 @@ std::map<db_id_t,File_t*> VariantIR_t::ReadFilesFromDB()
 
 std::map<db_id_t,Function_t*> VariantIR_t::ReadFuncsFromDB
 	(
-		std::map<db_id_t,File_t*> fileMap
+		std::map<db_id_t,File_t*> fileMap,
+        	std::map<db_id_t,AddressID_t*> addrMap
 	)
 {
 	std::map<db_id_t,Function_t*> idMap;
 
 	std::string q= "select * from " + progid.function_table_name + " ; ";
 
-
 	dbintr->IssueQuery(q);
 
 	while(!dbintr->IsDone())
@@ -79,7 +103,7 @@ std::map<db_id_t,Function_t*> VariantIR_t::ReadFuncsFromDB
 // function_id | file_id | name | stack_frame_size | out_args_region_size | use_frame_pointer | doip_id
 
 		db_id_t fid=atoi(dbintr->GetResultColumn("function_id").c_str());
-		db_id_t file_id=atoi(dbintr->GetResultColumn("file_id").c_str());
+		db_id_t entry_point_id=atoi(dbintr->GetResultColumn("entry_point_id").c_str());
 		std::string name=dbintr->GetResultColumn("name");
 		int sfsize=atoi(dbintr->GetResultColumn("stack_frame_size").c_str());
 		int oasize=atoi(dbintr->GetResultColumn("out_args_region_size").c_str());
@@ -94,8 +118,9 @@ std::map<db_id_t,Function_t*> VariantIR_t::ReadFuncsFromDB
 
 		db_id_t doipid=atoi(dbintr->GetResultColumn("doip_id").c_str());
 
-		Function_t *newfunc=new Function_t(fid,name,sfsize,oasize,useFP,fileMap[file_id]);
-
+		Function_t *newfunc=new Function_t(fid,name,sfsize,oasize,useFP,NULL); 
+		entry_points[newfunc]=entry_point_id;
+		
 //std::cout<<"Found function "<<name<<"."<<std::endl;
 
 		idMap[fid]=newfunc;
@@ -108,8 +133,11 @@ std::map<db_id_t,Function_t*> VariantIR_t::ReadFuncsFromDB
 	return idMap;
 }
 
-std::map<db_id_t,AddressID_t*> VariantIR_t::ReadAddrsFromDB  (         std::map<db_id_t,File_t*> fileMap,
-                                                                        std::map<db_id_t,Function_t*> funcMap) 
+
+std::map<db_id_t,AddressID_t*> VariantIR_t::ReadAddrsFromDB  
+	(
+	std::map<db_id_t,File_t*> fileMap
+	) 
 {
 	std::map<db_id_t,AddressID_t*> idMap;
 
@@ -146,10 +174,12 @@ std::map<db_id_t,AddressID_t*> VariantIR_t::ReadAddrsFromDB  (         std::map<
 }
 
 
-std::map<db_id_t,Instruction_t*> VariantIR_t::ReadInsnsFromDB (      std::map<db_id_t,File_t*> fileMap,
-                                                                        std::map<db_id_t,Function_t*> funcMap,
-                                                                        std::map<db_id_t,AddressID_t*> addrMap
-                                                                        ) 
+std::map<db_id_t,Instruction_t*> VariantIR_t::ReadInsnsFromDB 
+	(      
+	std::map<db_id_t,File_t*> fileMap,
+        std::map<db_id_t,Function_t*> funcMap,
+        std::map<db_id_t,AddressID_t*> addrMap
+        ) 
 {
 	std::map<db_id_t,Instruction_t*> idMap;
 	std::map<db_id_t,db_id_t> fallthroughs;
@@ -273,6 +303,9 @@ void VariantIR_t::SetBaseIDS()
 	for(std::set<File_t*>::const_iterator i=files.begin(); i!=files.end(); ++i)
 		j=MAX(j,(*i)->GetBaseID());
 
+	/* increment past the max ID so we don't duplicate */
+	j++;
+
 	/* for anything that's not yet in the DB, assign an ID to it */
 	for(std::set<Function_t*>::const_iterator i=funcs.begin(); i!=funcs.end(); ++i)
 		if((*i)->GetBaseID()==NOT_IN_DATABASE)
diff --git a/tools/meds2pdb/meds2pdb.cpp b/tools/meds2pdb/meds2pdb.cpp
index 93e5b5e46..b3f25136c 100644
--- a/tools/meds2pdb/meds2pdb.cpp
+++ b/tools/meds2pdb/meds2pdb.cpp
@@ -8,6 +8,22 @@
 using namespace std;
 using namespace pqxx;
 
+#include <sstream>
+
+template <class T>
+inline std::string my_to_string (const T& t)
+{
+        std::stringstream ss;
+        ss << t;
+        return ss.str();
+}
+
+
+int next_address_id=0;
+
+map<app_iaddr_t,int> address_to_instructionid_map;
+
+
 // extract the file id from the md5 hash and the program name
 int get_file_id(char *progName, char *md5hash)
 {
@@ -29,20 +45,20 @@ int get_file_id(char *progName, char *md5hash)
   return -1; // error
 }
 
+
 // insert addresses & instructions into DB
 void insert_instructions(string programName, int fileID, vector<wahoo::Instruction*> instructions, vector<wahoo::Function*> functions)
 {
-  cerr << "Inserting instructions in the DB";
+  cerr << "Inserting instructions in the DB"<<endl;
   connection conn;
   work txn(conn);
   // for each instruction:
   //    (1) get address, insert into address table
   //    (2) populate instruction table
 
-  const int STRIDE = 1000;
   int count = 0;
 
-  for (int i = 0; i < instructions.size(); i += STRIDE)
+  for (int i = 0; i < instructions.size(); i ++ )
   {
     char buf[128];
     string addressTable = programName + "_" + "address";
@@ -51,29 +67,24 @@ void insert_instructions(string programName, int fileID, vector<wahoo::Instructi
 
     string instructionTable = programName + "_" + "instruction";
     string query2 = "INSERT INTO " + instructionTable;
-    query2 += " (address_id, parent_function_id, orig_address_id, data, comment) VALUES ";
+    query2 += " (instruction_id,address_id, parent_function_id, orig_address_id, data, comment) VALUES ";
 
-    for (int j = i; j < i + STRIDE; ++j)
-    {
-      if (j >= instructions.size()) break;
-      count++;
-
-      wahoo::Instruction *instruction = instructions[j];
+      wahoo::Instruction *instruction = instructions[i];
       app_iaddr_t   addr = instruction->getAddress();
 
+      address_to_instructionid_map[addr]=i;
+
+      int address_id = next_address_id++;
+
       // insert into address table
-      if (j != i) query += ",";
       query += "(";
-      query += txn.quote(j) + ",";
+      query += txn.quote(address_id) + ",";
       query += txn.quote(fileID) + ",";
       sprintf(buf,"%d", addr);
       query += txn.quote(string(buf));
       query += ")";
 
-      // insert into instruction table
-      if (j != i) query2 += ",";
 
-      int address_id = j;
       int parent_function_id = -1;
       if (instruction->getFunction())
       {
@@ -83,7 +94,8 @@ void insert_instructions(string programName, int fileID, vector<wahoo::Instructi
       string asmData = instruction->getAsm();
 
       query2 += "(";
-      query2 += txn.quote(address_id) + ","; // j is the address id
+      query2 += txn.quote(my_to_string(i)) + ",";
+      query2 += txn.quote(address_id) + ","; // i is the address id
       query2 += txn.quote(parent_function_id) + ","; 
       query2 += txn.quote(orig_address_id) + ","; 
 
@@ -109,7 +121,6 @@ void insert_instructions(string programName, int fileID, vector<wahoo::Instructi
       query2 += string(buf) + ","; 
       query2 += txn.quote(asmData); 
       query2 += ")";
-    }
 
 //   cerr << "Query: " << query << endl; 
 //   cerr << "Query2: " << query2 << endl; 
@@ -118,9 +129,87 @@ void insert_instructions(string programName, int fileID, vector<wahoo::Instructi
     txn.exec(query2); 
   }
 
-  cerr << "Committing all instructions - this may take a while";
+  cerr << "Committing all instructions - this may take a while"<<endl;
   txn.commit();
-  cerr << "Done inserting instructions in the DB";
+  cerr << "Done inserting instructions in the DB"<<endl;
+}
+
+
+void insert_functions( string programName, int fileID, const vector<wahoo::Function*> &functions  )
+{
+  connection conn;
+  work txn(conn);
+
+  // bulk insert of function information into the DB
+  const int STRIDE = 25;
+  int count = 0;
+  for (int i = 0; i < functions.size(); i += STRIDE)
+  {  
+    string functionTable = string(programName) + "_" + "function";
+    string query = "INSERT INTO " + functionTable;
+    query += " (function_id, name, stack_frame_size, out_args_region_size, use_frame_pointer) VALUES ";
+
+
+    for (int j = i; j < i + STRIDE; ++j)
+    {
+      if (j >= functions.size()) break;
+      wahoo::Function *f = functions[j];
+      string functionName = f->getName();
+      app_iaddr_t functionAddress = f->getAddress();
+      int functionSize = f->getSize();
+
+      int function_id = j;
+      f->setFunctionID(function_id);
+
+      int outArgsRegionSize = f->getOutArgsRegionSize();
+      bool useFP = f->getUseFramePointer();
+
+      if (j != i) query += ",";
+      query += "(";
+      query += txn.quote(function_id) + ",";
+      query += txn.quote(functionName) + ",";
+      query += txn.quote(functionSize) + ",";
+      query += txn.quote(outArgsRegionSize) + ",";
+      query += txn.quote(useFP) + ")";
+
+    }
+
+    txn.exec(query);
+  }
+
+  txn.commit(); // must commit o/w everything will be rolled back
+}
+
+void update_functions( string programName, int fileID, const vector<wahoo::Function*> &functions  )
+{
+  connection conn;
+  work txn(conn);
+
+  // bulk insert of function information into the DB
+  int count = 0;
+  string query;
+  for (int i = 0; i < functions.size(); i++ )
+  {  
+      	wahoo::Function *f = functions[i];
+      	string functionName = f->getName();
+      	app_iaddr_t functionAddress = f->getAddress();
+      	int functionSize = f->getSize();
+      	int function_id = f->getFunctionID();
+      	int outArgsRegionSize = f->getOutArgsRegionSize();
+      	bool useFP = f->getUseFramePointer();
+	int insnid=address_to_instructionid_map[functionAddress];
+
+    	string functionTable = string(programName) + "_" + "function";
+    	query += "update " + functionTable;
+	query += " set entry_point_id = " + txn.quote(my_to_string(insnid));
+    	query += " where function_id = " + txn.quote(my_to_string(function_id));
+	query += ";";
+
+
+  }
+
+  txn.exec(query);
+  txn.commit(); // must commit o/w everything will be rolled back
 }
 
 int main(int argc, char **argv)
@@ -141,8 +230,6 @@ int main(int argc, char **argv)
   cerr << "hash-md5:" << md5hash << endl;
   cerr << "annotation file:" << annotFile << endl;
 
-  connection conn;
-  work txn(conn);
 
   Rewriter *rewriter = new Rewriter(elfFile, annotFile);
 
@@ -162,44 +249,8 @@ int main(int argc, char **argv)
   cerr << "Number of functions: " << functions.size() << endl;
   cerr << "Number of instructions: " << instructions.size() << endl;
 
-  // bulk insert of function information into the DB
-  const int STRIDE = 25;
-  int count = 0;
-  for (int i = 0; i < functions.size(); i += STRIDE)
-  {  
-    string functionTable = string(programName) + "_" + "function";
-    string query = "INSERT INTO " + functionTable;
-    query += " (function_id, file_id, name, stack_frame_size, out_args_region_size, use_frame_pointer) VALUES ";//*
-
-    for (int j = i; j < i + STRIDE; ++j)
-    {
-      if (j >= functions.size()) break;
-    
-      wahoo::Function *f = functions[j];
-      string functionName = f->getName();
-      app_iaddr_t functionAddress = f->getAddress();
-      int functionSize = f->getSize();
-      int function_id = j;
-      int outArgsRegionSize = f->getOutArgsRegionSize();//*
-      bool useFP = f->getUseFramePointer();//*
-
-      if (j != i) query += ",";
-      query += "(";
-      query += txn.quote(function_id) + ",";
-      query += txn.quote(fileID) + ",";
-      query += txn.quote(functionName) + ",";
-      query += txn.quote(functionSize) + ",";
-      query += txn.quote(outArgsRegionSize) + ",";
-      query += txn.quote(useFP) + ")";
-
-      f->setFunctionID(function_id);
-    }
-
-    txn.exec(query);
-  }
-
-  txn.commit(); // must commit o/w everything will be rolled back
 
+  insert_functions(programName, fileID, functions);
   insert_instructions(programName, fileID, instructions, functions);
+  update_functions(programName, fileID, functions);
 }
-
diff --git a/xform/elfreader.cpp b/xform/elfreader.cpp
index 7a354fef0..a9f15b624 100644
--- a/xform/elfreader.cpp
+++ b/xform/elfreader.cpp
@@ -21,10 +21,11 @@ ElfReader::ElfReader(char *p_elfFile)
     {    // For all sections
         const IELFISection* pSec = m_reader->GetSection( i );
         m_sections.push_back(pSec);
-
+#if 0
         std::cout << "Sec. name: " << pSec->GetName() 
                   << " Sec. offset: " << pSec->GetOffset() 
                   << " Sec. size: " << pSec->GetSize() << std::endl;
+#endif
 
     }
     std::cout << std::endl;
-- 
GitLab