diff --git a/libIRDB/include/core/scoop.hpp b/libIRDB/include/core/scoop.hpp index e17f6efb321bf53131affa49dbf5e9b9d05230f5..1d9345c7d4bdb8e5a03d72ffc5a84c7b85e6a529 100644 --- a/libIRDB/include/core/scoop.hpp +++ b/libIRDB/include/core/scoop.hpp @@ -92,6 +92,8 @@ class DataScoop_t : public BaseObj_t void ClearRelRo() { is_relro=false; } std::string WriteToDB(File_t *fid, db_id_t newid); + std::string WriteToDBRange(File_t *fid, db_id_t newid, int start, int end, std::string table_name); + private: const static int permissions_r=4; diff --git a/libIRDB/src/core/fileir.cpp b/libIRDB/src/core/fileir.cpp index c0861dbc5e5a2fa8386065900515658d22e9de59..a0d7830160b9ba6fd85ddc60c04eb8dadbcadb31 100644 --- a/libIRDB/src/core/fileir.cpp +++ b/libIRDB/src/core/fileir.cpp @@ -18,7 +18,6 @@ * */ - #include <all.hpp> #include <utils.hpp> #include <cstdlib> @@ -33,6 +32,9 @@ using namespace std; static map<Function_t*,db_id_t> entry_points; +#define SCOOP_CHUNK_SIZE (10*1024*1024) /* 10 mb */ + + #undef EIP @@ -516,14 +518,15 @@ void FileIR_t::WriteToDB() db_id_t j=-1; - dbintr->IssueQuery(string("TRUNCATE TABLE ")+ fileptr->instruction_table_name + string(" cascade;")); - dbintr->IssueQuery(string("TRUNCATE TABLE ")+ fileptr->icfs_table_name + string(" cascade;")); - dbintr->IssueQuery(string("TRUNCATE TABLE ")+ fileptr->icfs_map_table_name + string(" cascade;")); - dbintr->IssueQuery(string("TRUNCATE TABLE ")+ fileptr->function_table_name + string(" cascade;")); - dbintr->IssueQuery(string("TRUNCATE TABLE ")+ fileptr->address_table_name + string(" cascade;")); - dbintr->IssueQuery(string("TRUNCATE TABLE ")+ fileptr->relocs_table_name + string(" cascade;")); - dbintr->IssueQuery(string("TRUNCATE TABLE ")+ fileptr->types_table_name + string(" cascade;")); - dbintr->IssueQuery(string("TRUNCATE TABLE ")+ fileptr->scoop_table_name + string(" cascade;")); + dbintr->IssueQuery(string("TRUNCATE TABLE ")+ fileptr->instruction_table_name + string(" cascade;")); + dbintr->IssueQuery(string("TRUNCATE TABLE ")+ fileptr->icfs_table_name + string(" cascade;")); + dbintr->IssueQuery(string("TRUNCATE TABLE ")+ fileptr->icfs_map_table_name + string(" cascade;")); + dbintr->IssueQuery(string("TRUNCATE TABLE ")+ fileptr->function_table_name + string(" cascade;")); + dbintr->IssueQuery(string("TRUNCATE TABLE ")+ fileptr->address_table_name + string(" cascade;")); + dbintr->IssueQuery(string("TRUNCATE TABLE ")+ fileptr->relocs_table_name + string(" cascade;")); + dbintr->IssueQuery(string("TRUNCATE TABLE ")+ fileptr->types_table_name + string(" cascade;")); + dbintr->IssueQuery(string("TRUNCATE TABLE ")+ fileptr->scoop_table_name + string(" cascade;")); + dbintr->IssueQuery(string("TRUNCATE TABLE ")+ fileptr->scoop_table_name+"_part2"+ string(" cascade;")); /* and now that everything has an ID, let's write to the DB */ @@ -629,8 +632,8 @@ void FileIR_t::WriteToDB() Relocation_t* reloc=*it; r+=reloc->WriteToDB(fileptr,*i); } - - dbintr->IssueQuery(r); + if(r!="") + dbintr->IssueQuery(r); } dbintr->IssueQuery(q); @@ -646,14 +649,17 @@ void FileIR_t::WriteToDB() DataScoop_t* scoop = *it; assert(scoop); string q = scoop->WriteToDB(fileptr,j); + dbintr->IssueQuery(q); + q=""; const std::set<Relocation_t*> &the_relocs = scoop->GetRelocations(); for(set<Relocation_t*>::const_iterator rit=the_relocs.begin(); rit!=the_relocs.end(); ++rit) { Relocation_t* reloc=*rit; q+=reloc->WriteToDB(fileptr,scoop); } - dbintr->IssueQuery(q); + if(q!="") + dbintr->IssueQuery(q); } } @@ -1185,10 +1191,32 @@ std::map<db_id_t,DataScoop_t*> FileIR_t::ReadScoopsFromDB std::map<db_id_t,DataScoop_t*> scoopMap; - std::string q= "select * from " + fileptr->scoop_table_name + " ; "; - + //std::map<db_id_t,string> bonus_contents; + // + // read part 2 of the scoops. + //std::string q= "select * from " + fileptr->scoop_table_name + "_part2 ; "; + //dbintr->IssueQuery(q); + //while(!dbintr->IsDone()) + //{ + // db_id_t sid=atoi(dbintr->GetResultColumn("scoop_id").c_str()); + // bonus_contents[sid]=dbintr->GetResultColumn("data"); + // dbintr->MoveToNextRow(); + //} + + + + // read part 1 of the scoops, and merge in the part 2s + // scoop_id SERIAL PRIMARY KEY, -- key + // name text DEFAULT '', -- string representation of the type + // type_id integer, -- the type of the data, as an index into the table table. + // start_address_id integer, -- address id for start. + // end_address_id integer, -- address id for end + // permissions integer, -- in umask format (bitmask for rwx) + // relro bit, -- is this scoop a relro scoop (i.e., is r/w until relocs are done). + // data bytea -- the actual bytes of the scoop + + string q= "select scoop_id,name,type_id,start_address_id,end_address_id,permissions,relro from " + fileptr->scoop_table_name + " ; "; dbintr->IssueQuery(q); - while(!dbintr->IsDone()) { @@ -1202,9 +1230,8 @@ std::map<db_id_t,DataScoop_t*> FileIR_t::ReadScoopsFromDB AddressID_t* end_addr=addrMap[end_id]; int permissions=atoi(dbintr->GetResultColumn("permissions").c_str()); bool is_relro=atoi(dbintr->GetResultColumn("relro").c_str()) != 0 ; - std::string contents=dbintr->GetResultColumn("data"); - DataScoop_t* newscoop=new DataScoop_t(sid,name,start_addr,end_addr,type,permissions,is_relro,contents); + DataScoop_t* newscoop=new DataScoop_t(sid,name,start_addr,end_addr,type,permissions,is_relro,""); assert(newscoop); GetDataScoops().insert(newscoop); dbintr->MoveToNextRow(); @@ -1212,6 +1239,58 @@ std::map<db_id_t,DataScoop_t*> FileIR_t::ReadScoopsFromDB scoopMap[sid]=newscoop; } + for(DataScoopSet_t::iterator it=GetDataScoops().begin(); it!=GetDataScoops().end(); ++it) + { + DataScoop_t* scoop=*it; + + q= "select length(data) from " + fileptr->scoop_table_name + " where scoop_id='"+to_string(scoop->GetBaseID())+"'; "; + dbintr->IssueQuery(q); + if(!dbintr->IsDone()) + { + int data_len=atoi(dbintr->GetResultColumn("length").c_str()); + for(int i=0;i<data_len;i+=SCOOP_CHUNK_SIZE) + { + string start_pos=to_string(i); + string len_to_get=to_string(SCOOP_CHUNK_SIZE); + string field="substr(data,"+start_pos+","+len_to_get+")"; + q= "select "+field+" from " + fileptr->scoop_table_name + " where scoop_id='"+to_string(scoop->GetBaseID())+"'; "; + dbintr->IssueQuery(q); + + scoop->GetContents()+=dbintr->GetResultColumn("substr"); + + } + } + + + // read part 2 from db + q= "select length(data) from " + fileptr->scoop_table_name + "_part2 where scoop_id='"+to_string(scoop->GetBaseID())+"'; "; + dbintr->IssueQuery(q); + if(!dbintr->IsDone()) + { + int part2_len=atoi(dbintr->GetResultColumn("length").c_str()); + for(int i=0;i<part2_len; i+=SCOOP_CHUNK_SIZE) + { + string start_pos=to_string(i); + string len_to_get=to_string(SCOOP_CHUNK_SIZE); + string field="substr(data,"+start_pos+","+len_to_get+")"; + q= "select "+field+" from " + fileptr->scoop_table_name + "_part2 where scoop_id='"+to_string(scoop->GetBaseID())+"'; "; + dbintr->IssueQuery(q); + + scoop->GetContents()+=dbintr->GetResultColumn("substr"); + + } + } + } + for( DataScoopSet_t::const_iterator it=GetDataScoops().begin(); + it!=GetDataScoops().end(); + ++it + ) + { + DataScoop_t* scoop=*it; + assert(scoop->GetContents().size() == scoop->GetSize()); + } + + return scoopMap; } diff --git a/libIRDB/src/core/scoop.cpp b/libIRDB/src/core/scoop.cpp index e727dcb29d6eacbc77c5bbe4b5c08d3f59d11085..80dd9ed63c115e4ccc5465d1747686828a882740 100644 --- a/libIRDB/src/core/scoop.cpp +++ b/libIRDB/src/core/scoop.cpp @@ -8,8 +8,28 @@ using namespace std; using namespace libIRDB; +#define SCOOP_THRESHOLD 990000000 /* almost 1gb -- leaving a bit of head room for overhead sql syntax overheads */ +#define SCOOP_CHUNK_SIZE (10*1024*1024) /* 10 mb */ + +//#define SCOOP_THRESHOLD 10 /* almost 1gb -- leaving a bit of head room for overhead sql syntax overheads */ +//#define SCOOP_CHUNK_SIZE (128) string DataScoop_t::WriteToDB(File_t *fid, db_id_t newid) +{ + string q= ""; + + if(contents.length() < SCOOP_THRESHOLD) + q+=WriteToDBRange(fid,newid, 0, contents.length(), fid->scoop_table_name); + else + { + q+=WriteToDBRange(fid,newid, 0, SCOOP_THRESHOLD, fid->scoop_table_name); + q+=WriteToDBRange(fid,newid , SCOOP_THRESHOLD, contents.length(), fid->scoop_table_name+"_part2"); + } + + return q; +} + +string DataScoop_t::WriteToDBRange(File_t *fid, db_id_t newid, int start, int end, string table_name) { /* @@ -27,7 +47,7 @@ string DataScoop_t::WriteToDB(File_t *fid, db_id_t newid) ostringstream hex_data; - string q=string("insert into ")+fid->scoop_table_name + + string q=string("insert into ")+table_name+ string(" (scoop_id, name, type_id, start_address_id, end_address_id, data, permissions, relro) ")+ string(" VALUES (") + string("'") + to_string(GetBaseID()) + string("', ") + @@ -35,22 +55,49 @@ string DataScoop_t::WriteToDB(File_t *fid, db_id_t newid) string("'") + to_string(type_id) + string("', ") + string("'") + to_string(GetStart()->GetBaseID()) + string("', ") + string("'") + to_string(GetEnd()->GetBaseID()) + string("', ") + - string("decode('"); + string("'") + /* empty data field -- for now + */ string("', ") + + string("'") + to_string(permissions) + string("', ") + + string("'") + to_string(is_relro) + string("'); ") ; + // add the table row with empty data field. + dbintr->IssueQuery(q); + - hex_data << setfill('0') << hex; - for (size_t i = 0; i < contents.length(); ++i) + // now try to append the data to the field in chunks. + + string query_start="update "+table_name+" set data = data || decode('"; + string query_end="', 'hex') where scoop_id="+ string("'") + to_string(GetBaseID()) + string("'; ") ; + + + hex_data << query_start << setfill('0') << hex; + for (size_t i = start; i < end; ++i) { hex_data << setw(2) << (int)(contents[i]&0xff); - q+=hex_data.str(); - hex_data.str(""); // reset to empty - hex_data.clear(); + + stringstream::pos_type offset = hex_data.tellp(); + + if(offset > SCOOP_CHUNK_SIZE) + { + // q+=hex_data.str(); + // hex_data.str(""); // reset to empty + // hex_data.clear(); + + // tag the end, + hex_data << query_end; + + // append this chunk to the db. + dbintr->IssueQuery(hex_data.str()); + + // restart + hex_data.str(""); // reset to empty + hex_data.clear(); + hex_data << query_start << setfill('0') << hex; + } } - + hex_data << query_end; - q+= string("', 'hex'), ") + - string("'") + to_string(permissions) + string("', ") + - string("'") + to_string(is_relro) + string("'); ") ; + // append this chunk to the db. + dbintr->IssueQuery(hex_data.str()); - return q; + return ""; } diff --git a/libIRDB/src/core/variantid.cpp b/libIRDB/src/core/variantid.cpp index 0492d3af2b184f3081492116096961867320f988..76ac67587876d3a5c86e8b338ac738012a8c5e8e 100644 --- a/libIRDB/src/core/variantid.cpp +++ b/libIRDB/src/core/variantid.cpp @@ -189,6 +189,7 @@ File_t* VariantID_t::CloneFile(File_t* fptr) std::string icfsmap="icfsmapfid"+to_string(newfid); std::string rtn="rtnfid"+to_string(newfid); std::string dtn="dtnfid"+to_string(newfid); + std::string dtn_part2="dtnfid"+to_string(newfid)+"_part2"; std::string typ="typfid"+to_string(newfid); q ="update file_info set address_table_name='"; @@ -258,6 +259,11 @@ File_t* VariantID_t::CloneFile(File_t* fptr) q+=" ; "; dbintr->IssueQuery(q); + q="drop table "; + q+=dtn_part2; + q+=" ; "; + dbintr->IssueQuery(q); + // next issue SQL to clone each table q="select * into "; q+=atn; @@ -315,6 +321,12 @@ File_t* VariantID_t::CloneFile(File_t* fptr) q+=" ;"; dbintr->IssueQuery(q); + q="select * into "; + q+=dtn_part2; + q+=" from "; + q+=fptr->scoop_table_name+"_part2"; + q+=" ;"; + dbintr->IssueQuery(q); // update the variant dependency table to represent the deep clone q = "update variant_dependency set file_id='" + diff --git a/libIRDB/test/fill_in_cfg.cpp b/libIRDB/test/fill_in_cfg.cpp index 8f2df940be2186ceefba55d5107c2e8c7f435081..a78efb90ccce7367c0993e335ccefc40eb48c20a 100644 --- a/libIRDB/test/fill_in_cfg.cpp +++ b/libIRDB/test/fill_in_cfg.cpp @@ -18,8 +18,6 @@ * */ - - #include <libIRDB-core.hpp> #include <iostream> #include <fstream>