diff --git a/Makefile b/Makefile index 3a6fad0d497cf093ad07bf564450ed738e21701b..4f8beac89eeeab3f7ea7239b3982131bc0ad8f00 100644 --- a/Makefile +++ b/Makefile @@ -3,6 +3,7 @@ all: cd beaengine; make all cd libIRDB; make all cd xform; make all + cd libIRDB; make all cd tools; make all clean: diff --git a/libIRDB/include/function.hpp b/libIRDB/include/function.hpp index a8ec983814a0b2cd06f577c6461c70b15d60743c..c1dd15b721a9b7788d66b4638c179644252cafe9 100644 --- a/libIRDB/include/function.hpp +++ b/libIRDB/include/function.hpp @@ -5,7 +5,8 @@ class Function_t : public BaseObj_t Function_t() : BaseObj_t(NULL) {} // create a new function not in the db - Function_t(db_id_t id, std::string name, int size, int oa_size, bool use_fp, File_t *file); // create a function that's already in the DB + // create a function that's already in the DB + Function_t(db_id_t id, std::string name, int size, int oa_size, bool use_fp, Instruction_t *entry); std::set<Instruction_t*>& GetInstructions() { return my_insns; } @@ -17,8 +18,8 @@ class Function_t : public BaseObj_t void SetName(std::string newname) { name=newname; } void SetOutArgsRegionSize(int oa_size) {out_args_region_size=oa_size;} - void SetFile(File_t* file) {my_file=file;} - File_t* GetFile() { return my_file;} + void SetEntryPoint(Instruction_t *insn) {entry_point=insn;} + Instruction_t* GetEntryPoint() { return entry_point;} void WriteToDB(); // we need the variant ID to write into a program. std::string WriteToDB(VariantID_t *vid, db_id_t newid); @@ -28,7 +29,7 @@ class Function_t : public BaseObj_t private: - File_t *my_file; + Instruction_t *entry_point; std::set<Instruction_t*> my_insns; int stack_frame_size; std::string name; diff --git a/libIRDB/include/variantir.hpp b/libIRDB/include/variantir.hpp index bf7bf2ce64b7e2ef52dce7fa6ffdc6ec182199f7..cedbb8839347e3bc7f6d7851ee836c136f7a89da 100644 --- a/libIRDB/include/variantir.hpp +++ b/libIRDB/include/variantir.hpp @@ -41,13 +41,18 @@ class VariantIR_t : public BaseObj_t VariantID_t progid; std::map<db_id_t,File_t*> ReadFilesFromDB(); - std::map<db_id_t,Function_t*> ReadFuncsFromDB ( std::map<db_id_t,File_t*> fileMap) ; - std::map<db_id_t,AddressID_t*> ReadAddrsFromDB ( std::map<db_id_t,File_t*> fileMap, - std::map<db_id_t,Function_t*> funcMap) ; - std::map<db_id_t,Instruction_t*> ReadInsnsFromDB ( std::map<db_id_t,File_t*> fileMap, - std::map<db_id_t,Function_t*> funcMap, - std::map<db_id_t,AddressID_t*> addrMap - ) ; + std::map<db_id_t,AddressID_t*> ReadAddrsFromDB(std::map<db_id_t,File_t*> fileMap); + std::map<db_id_t,Function_t*> ReadFuncsFromDB + ( + std::map<db_id_t,File_t*> fileMap, + std::map<db_id_t,AddressID_t*> addrMap + ); + std::map<db_id_t,Instruction_t*> ReadInsnsFromDB + ( + std::map<db_id_t,File_t*> fileMap, + std::map<db_id_t,Function_t*> funcMap, + std::map<db_id_t,AddressID_t*> addrMap + ) ; }; diff --git a/libIRDB/src/function.cpp b/libIRDB/src/function.cpp index 2bc9d8dffc8eb34cb1a296a46ec2c60d665954bc..bc27e40f03406beba2dc86342888db5be527ff67 100644 --- a/libIRDB/src/function.cpp +++ b/libIRDB/src/function.cpp @@ -6,8 +6,8 @@ using namespace std; -Function_t::Function_t(db_id_t id, std::string myname, int size, int oa_size, bool useFP, File_t* file) - : BaseObj_t(NULL), my_file(file) +Function_t::Function_t(db_id_t id, std::string myname, int size, int oa_size, bool useFP, Instruction_t* entry) + : BaseObj_t(NULL), entry_point(entry) { SetBaseID(id); name=myname; @@ -25,21 +25,21 @@ void Function_t::WriteToDB() string Function_t::WriteToDB(VariantID_t *vid, db_id_t newid) { assert(vid); - assert(my_file); + assert(entry_point); if(GetBaseID()==NOT_IN_DATABASE) SetBaseID(newid); string q=string("insert into ")+vid->function_table_name + - string(" (function_id, file_id, name, stack_frame_size, out_args_region_size, use_frame_pointer, doip_id) ")+ + string(" (function_id, entry_point_id, name, stack_frame_size, out_args_region_size, use_frame_pointer, doip_id) ")+ string(" VALUES (") + - string("'") + to_string(GetBaseID()) + string("', ") + - string("'") + to_string(my_file->GetBaseID()) + string("', ") + - string("'") + name + string("', ") + - string("'") + to_string(stack_frame_size) + string("', ") + - string("'") + to_string(out_args_region_size) + string("', ") + - string("'") + to_string(use_fp) + string("', ") + - string("'") + to_string(GetDoipID()) + string("') ; ") ; + string("'") + to_string(GetBaseID()) + string("', ") + + string("'") + to_string(entry_point->GetBaseID()) + string("', ") + + string("'") + name + string("', ") + + string("'") + to_string(stack_frame_size) + string("', ") + + string("'") + to_string(out_args_region_size) + string("', ") + + string("'") + to_string(use_fp) + string("', ") + + string("'") + to_string(GetDoipID()) + string("') ; ") ; return q; } diff --git a/libIRDB/src/variantid.cpp b/libIRDB/src/variantid.cpp index 28c98e2aee7913defb99984f9f653366b0c0cec3..6019ffa10259f1284cf35d78ebe9dff6e67c0760 100644 --- a/libIRDB/src/variantid.cpp +++ b/libIRDB/src/variantid.cpp @@ -58,8 +58,8 @@ void VariantID_t::CreateTables() "address_id integer REFERENCES " + address_table_name + ", " + "parent_function_id integer, " "orig_address_id integer, " - "fallthrough_address_id integer, " - "target_address_id integer, " + "fallthrough_address_id integer DEFAULT -1, " + "target_address_id integer DEFAULT -1, " "data bytea, " "callback text, " "comment text, " diff --git a/libIRDB/src/variantir.cpp b/libIRDB/src/variantir.cpp index 5ca4f6d8ab4e885a3f79beab4bdc6097a870093c..1eb8050cbb7046f920ba651847eea5f342f9f495 100644 --- a/libIRDB/src/variantir.cpp +++ b/libIRDB/src/variantir.cpp @@ -6,6 +6,26 @@ using namespace libIRDB; using namespace std; +static map<Function_t*,db_id_t> entry_points; + + +static void UpdateEntryPoints(std::map<db_id_t,Instruction_t*> &insnMap) +{ + /* for each function, look up the instruction that's the entry point */ + for( static map<Function_t*,db_id_t>::const_iterator it=entry_points.begin(); + it!=entry_points.end(); + ++it + ) + { + Function_t* func=(*it).first; + db_id_t func_entry_id=(*it).second; + + assert(insnMap[func_entry_id]); + func->SetEntryPoint(insnMap[func_entry_id]); + } + +} + // Create a Variant from the database VariantIR_t::VariantIR_t(VariantID_t newprogid) : BaseObj_t(NULL) { @@ -19,10 +39,14 @@ VariantIR_t::VariantIR_t(VariantID_t newprogid) : BaseObj_t(NULL) // DB operations void VariantIR_t::ReadFromDB() { + entry_points.clear(); + std::map<db_id_t,File_t*> fileMap=ReadFilesFromDB(); - std::map<db_id_t,Function_t*> funcMap=ReadFuncsFromDB(fileMap); - std::map<db_id_t,AddressID_t*> addrMap=ReadAddrsFromDB(fileMap,funcMap); + std::map<db_id_t,AddressID_t*> addrMap=ReadAddrsFromDB(fileMap); + std::map<db_id_t,Function_t*> funcMap=ReadFuncsFromDB(fileMap, addrMap); std::map<db_id_t,Instruction_t*> insnMap=ReadInsnsFromDB(fileMap,funcMap,addrMap); + + UpdateEntryPoints(insnMap); } std::map<db_id_t,File_t*> VariantIR_t::ReadFilesFromDB() @@ -64,14 +88,14 @@ std::map<db_id_t,File_t*> VariantIR_t::ReadFilesFromDB() std::map<db_id_t,Function_t*> VariantIR_t::ReadFuncsFromDB ( - std::map<db_id_t,File_t*> fileMap + std::map<db_id_t,File_t*> fileMap, + std::map<db_id_t,AddressID_t*> addrMap ) { std::map<db_id_t,Function_t*> idMap; std::string q= "select * from " + progid.function_table_name + " ; "; - dbintr->IssueQuery(q); while(!dbintr->IsDone()) @@ -79,7 +103,7 @@ std::map<db_id_t,Function_t*> VariantIR_t::ReadFuncsFromDB // function_id | file_id | name | stack_frame_size | out_args_region_size | use_frame_pointer | doip_id db_id_t fid=atoi(dbintr->GetResultColumn("function_id").c_str()); - db_id_t file_id=atoi(dbintr->GetResultColumn("file_id").c_str()); + db_id_t entry_point_id=atoi(dbintr->GetResultColumn("entry_point_id").c_str()); std::string name=dbintr->GetResultColumn("name"); int sfsize=atoi(dbintr->GetResultColumn("stack_frame_size").c_str()); int oasize=atoi(dbintr->GetResultColumn("out_args_region_size").c_str()); @@ -94,8 +118,9 @@ std::map<db_id_t,Function_t*> VariantIR_t::ReadFuncsFromDB db_id_t doipid=atoi(dbintr->GetResultColumn("doip_id").c_str()); - Function_t *newfunc=new Function_t(fid,name,sfsize,oasize,useFP,fileMap[file_id]); - + Function_t *newfunc=new Function_t(fid,name,sfsize,oasize,useFP,NULL); + entry_points[newfunc]=entry_point_id; + //std::cout<<"Found function "<<name<<"."<<std::endl; idMap[fid]=newfunc; @@ -108,8 +133,11 @@ std::map<db_id_t,Function_t*> VariantIR_t::ReadFuncsFromDB return idMap; } -std::map<db_id_t,AddressID_t*> VariantIR_t::ReadAddrsFromDB ( std::map<db_id_t,File_t*> fileMap, - std::map<db_id_t,Function_t*> funcMap) + +std::map<db_id_t,AddressID_t*> VariantIR_t::ReadAddrsFromDB + ( + std::map<db_id_t,File_t*> fileMap + ) { std::map<db_id_t,AddressID_t*> idMap; @@ -146,10 +174,12 @@ std::map<db_id_t,AddressID_t*> VariantIR_t::ReadAddrsFromDB ( std::map< } -std::map<db_id_t,Instruction_t*> VariantIR_t::ReadInsnsFromDB ( std::map<db_id_t,File_t*> fileMap, - std::map<db_id_t,Function_t*> funcMap, - std::map<db_id_t,AddressID_t*> addrMap - ) +std::map<db_id_t,Instruction_t*> VariantIR_t::ReadInsnsFromDB + ( + std::map<db_id_t,File_t*> fileMap, + std::map<db_id_t,Function_t*> funcMap, + std::map<db_id_t,AddressID_t*> addrMap + ) { std::map<db_id_t,Instruction_t*> idMap; std::map<db_id_t,db_id_t> fallthroughs; @@ -273,6 +303,9 @@ void VariantIR_t::SetBaseIDS() for(std::set<File_t*>::const_iterator i=files.begin(); i!=files.end(); ++i) j=MAX(j,(*i)->GetBaseID()); + /* increment past the max ID so we don't duplicate */ + j++; + /* for anything that's not yet in the DB, assign an ID to it */ for(std::set<Function_t*>::const_iterator i=funcs.begin(); i!=funcs.end(); ++i) if((*i)->GetBaseID()==NOT_IN_DATABASE) diff --git a/tools/meds2pdb/meds2pdb.cpp b/tools/meds2pdb/meds2pdb.cpp index 93e5b5e4638e458310848f48d036a45dfdcb205b..b3f25136c7d4f17b0436bba7230adb744d662394 100644 --- a/tools/meds2pdb/meds2pdb.cpp +++ b/tools/meds2pdb/meds2pdb.cpp @@ -8,6 +8,22 @@ using namespace std; using namespace pqxx; +#include <sstream> + +template <class T> +inline std::string my_to_string (const T& t) +{ + std::stringstream ss; + ss << t; + return ss.str(); +} + + +int next_address_id=0; + +map<app_iaddr_t,int> address_to_instructionid_map; + + // extract the file id from the md5 hash and the program name int get_file_id(char *progName, char *md5hash) { @@ -29,20 +45,20 @@ int get_file_id(char *progName, char *md5hash) return -1; // error } + // insert addresses & instructions into DB void insert_instructions(string programName, int fileID, vector<wahoo::Instruction*> instructions, vector<wahoo::Function*> functions) { - cerr << "Inserting instructions in the DB"; + cerr << "Inserting instructions in the DB"<<endl; connection conn; work txn(conn); // for each instruction: // (1) get address, insert into address table // (2) populate instruction table - const int STRIDE = 1000; int count = 0; - for (int i = 0; i < instructions.size(); i += STRIDE) + for (int i = 0; i < instructions.size(); i ++ ) { char buf[128]; string addressTable = programName + "_" + "address"; @@ -51,29 +67,24 @@ void insert_instructions(string programName, int fileID, vector<wahoo::Instructi string instructionTable = programName + "_" + "instruction"; string query2 = "INSERT INTO " + instructionTable; - query2 += " (address_id, parent_function_id, orig_address_id, data, comment) VALUES "; + query2 += " (instruction_id,address_id, parent_function_id, orig_address_id, data, comment) VALUES "; - for (int j = i; j < i + STRIDE; ++j) - { - if (j >= instructions.size()) break; - count++; - - wahoo::Instruction *instruction = instructions[j]; + wahoo::Instruction *instruction = instructions[i]; app_iaddr_t addr = instruction->getAddress(); + address_to_instructionid_map[addr]=i; + + int address_id = next_address_id++; + // insert into address table - if (j != i) query += ","; query += "("; - query += txn.quote(j) + ","; + query += txn.quote(address_id) + ","; query += txn.quote(fileID) + ","; sprintf(buf,"%d", addr); query += txn.quote(string(buf)); query += ")"; - // insert into instruction table - if (j != i) query2 += ","; - int address_id = j; int parent_function_id = -1; if (instruction->getFunction()) { @@ -83,7 +94,8 @@ void insert_instructions(string programName, int fileID, vector<wahoo::Instructi string asmData = instruction->getAsm(); query2 += "("; - query2 += txn.quote(address_id) + ","; // j is the address id + query2 += txn.quote(my_to_string(i)) + ","; + query2 += txn.quote(address_id) + ","; // i is the address id query2 += txn.quote(parent_function_id) + ","; query2 += txn.quote(orig_address_id) + ","; @@ -109,7 +121,6 @@ void insert_instructions(string programName, int fileID, vector<wahoo::Instructi query2 += string(buf) + ","; query2 += txn.quote(asmData); query2 += ")"; - } // cerr << "Query: " << query << endl; // cerr << "Query2: " << query2 << endl; @@ -118,9 +129,87 @@ void insert_instructions(string programName, int fileID, vector<wahoo::Instructi txn.exec(query2); } - cerr << "Committing all instructions - this may take a while"; + cerr << "Committing all instructions - this may take a while"<<endl; txn.commit(); - cerr << "Done inserting instructions in the DB"; + cerr << "Done inserting instructions in the DB"<<endl; +} + + +void insert_functions( string programName, int fileID, const vector<wahoo::Function*> &functions ) +{ + connection conn; + work txn(conn); + + // bulk insert of function information into the DB + const int STRIDE = 25; + int count = 0; + for (int i = 0; i < functions.size(); i += STRIDE) + { + string functionTable = string(programName) + "_" + "function"; + string query = "INSERT INTO " + functionTable; + query += " (function_id, name, stack_frame_size, out_args_region_size, use_frame_pointer) VALUES "; + + + for (int j = i; j < i + STRIDE; ++j) + { + if (j >= functions.size()) break; + wahoo::Function *f = functions[j]; + string functionName = f->getName(); + app_iaddr_t functionAddress = f->getAddress(); + int functionSize = f->getSize(); + + int function_id = j; + f->setFunctionID(function_id); + + int outArgsRegionSize = f->getOutArgsRegionSize(); + bool useFP = f->getUseFramePointer(); + + if (j != i) query += ","; + query += "("; + query += txn.quote(function_id) + ","; + query += txn.quote(functionName) + ","; + query += txn.quote(functionSize) + ","; + query += txn.quote(outArgsRegionSize) + ","; + query += txn.quote(useFP) + ")"; + + } + + txn.exec(query); + } + + txn.commit(); // must commit o/w everything will be rolled back +} + +void update_functions( string programName, int fileID, const vector<wahoo::Function*> &functions ) +{ + connection conn; + work txn(conn); + + // bulk insert of function information into the DB + int count = 0; + string query; + for (int i = 0; i < functions.size(); i++ ) + { + wahoo::Function *f = functions[i]; + string functionName = f->getName(); + app_iaddr_t functionAddress = f->getAddress(); + int functionSize = f->getSize(); + int function_id = f->getFunctionID(); + int outArgsRegionSize = f->getOutArgsRegionSize(); + bool useFP = f->getUseFramePointer(); + int insnid=address_to_instructionid_map[functionAddress]; + + string functionTable = string(programName) + "_" + "function"; + query += "update " + functionTable; + query += " set entry_point_id = " + txn.quote(my_to_string(insnid)); + query += " where function_id = " + txn.quote(my_to_string(function_id)); + query += ";"; + + + } + + txn.exec(query); + txn.commit(); // must commit o/w everything will be rolled back } int main(int argc, char **argv) @@ -141,8 +230,6 @@ int main(int argc, char **argv) cerr << "hash-md5:" << md5hash << endl; cerr << "annotation file:" << annotFile << endl; - connection conn; - work txn(conn); Rewriter *rewriter = new Rewriter(elfFile, annotFile); @@ -162,44 +249,8 @@ int main(int argc, char **argv) cerr << "Number of functions: " << functions.size() << endl; cerr << "Number of instructions: " << instructions.size() << endl; - // bulk insert of function information into the DB - const int STRIDE = 25; - int count = 0; - for (int i = 0; i < functions.size(); i += STRIDE) - { - string functionTable = string(programName) + "_" + "function"; - string query = "INSERT INTO " + functionTable; - query += " (function_id, file_id, name, stack_frame_size, out_args_region_size, use_frame_pointer) VALUES ";//* - - for (int j = i; j < i + STRIDE; ++j) - { - if (j >= functions.size()) break; - - wahoo::Function *f = functions[j]; - string functionName = f->getName(); - app_iaddr_t functionAddress = f->getAddress(); - int functionSize = f->getSize(); - int function_id = j; - int outArgsRegionSize = f->getOutArgsRegionSize();//* - bool useFP = f->getUseFramePointer();//* - - if (j != i) query += ","; - query += "("; - query += txn.quote(function_id) + ","; - query += txn.quote(fileID) + ","; - query += txn.quote(functionName) + ","; - query += txn.quote(functionSize) + ","; - query += txn.quote(outArgsRegionSize) + ","; - query += txn.quote(useFP) + ")"; - - f->setFunctionID(function_id); - } - - txn.exec(query); - } - - txn.commit(); // must commit o/w everything will be rolled back + insert_functions(programName, fileID, functions); insert_instructions(programName, fileID, instructions, functions); + update_functions(programName, fileID, functions); } - diff --git a/xform/elfreader.cpp b/xform/elfreader.cpp index 7a354fef05c3a19f1f5dc345d8a8f3dd5bf38f3c..a9f15b6243d21eed1a9df1409ecc4bf78b9145a2 100644 --- a/xform/elfreader.cpp +++ b/xform/elfreader.cpp @@ -21,10 +21,11 @@ ElfReader::ElfReader(char *p_elfFile) { // For all sections const IELFISection* pSec = m_reader->GetSection( i ); m_sections.push_back(pSec); - +#if 0 std::cout << "Sec. name: " << pSec->GetName() << " Sec. offset: " << pSec->GetOffset() << " Sec. size: " << pSec->GetSize() << std::endl; +#endif } std::cout << std::endl;