diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index ab1f90757cf1dd3a2ecbee5e953c93bb47aa49e6..e8a67fa51b9bc2858f8523b55f56a8b7a9537f07 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -180,3 +180,35 @@ test_fib-ubuntu18: - ubuntu18 variables: OS: 'ubuntu18' + + +# +# elfdep test +# + +# template +.elfdep: &elfdep + stage: test + script: + - ./cicd_tests/elfdep.sh + +elfdep-ubuntu18: + <<: *elfdep + tags: + - ubuntu18 + variables: + OS: 'ubuntu18' + +elfdep-ubuntu16: + <<: *elfdep + tags: + - ubuntu16 + variables: + OS: 'ubuntu16' + +elfdep-centos75: + <<: *elfdep + tags: + - centos75 + variables: + OS: 'centos75' diff --git a/cicd_tests/elfdep.sh b/cicd_tests/elfdep.sh new file mode 100755 index 0000000000000000000000000000000000000000..9ba43c87cd0d7e134e0a9b54f5278e6faed9c0c7 --- /dev/null +++ b/cicd_tests/elfdep.sh @@ -0,0 +1,9 @@ +#!/bin/bash +set -e +set -x + +cd $CICD_MODULE_WORK_DIR/peasoup_umbrella +source set_env_vars + +cd $SECURITY_TRANSFORMS_HOME/libElfDep/test/ +./test-elfdep.sh diff --git a/libElfDep/src/elfdep.cpp b/libElfDep/src/elfdep.cpp index e673fe8039acaa8ee425f4737e5042d301cfd256..b2d3bcd12aa27aa127cc17d37752994473f38f5a 100644 --- a/libElfDep/src/elfdep.cpp +++ b/libElfDep/src/elfdep.cpp @@ -206,6 +206,7 @@ Instruction_t* ElfDependencies_t::ElfDependenciesImpl_t<T_Elf_Sym,T_Elf_Rela,T_E newinsn->GetRelocations().insert(newreloc); getFileIR()->GetRelocations().insert(newreloc); + newinsn->GetAddress()->SetFileID(getFileIR()->GetFile()->GetBaseID()); return newinsn; } diff --git a/libElfDep/test/SConscript b/libElfDep/test/SConscript index e07d67e74ee3ab43ca4acdfd671107c86bed4eea..e457589bcc480f365b4b5f164a57f68de53be2ba 100644 --- a/libElfDep/test/SConscript +++ b/libElfDep/test/SConscript @@ -32,7 +32,7 @@ files=Glob( Dir('.').srcnode().abspath+"/edt*.cpp") pgm="edt.exe" LIBPATH="$SECURITY_TRANSFORMS_HOME/lib" -LIBS=Split("stars "+ myenv.subst('$BASE_IRDB_LIBS')+ " IRDB-core pqxx BeaEngine_s_d transform MEDSannotation EXEIO pebliss ElfDep") +LIBS=Split("stars "+ myenv.subst('$BASE_IRDB_LIBS')+ " IRDB-core libIRDB-cfg libIRDB-util.so pqxx capstone transform MEDSannotation EXEIO pebliss ElfDep") myenv=myenv.Clone(CPPPATH=Split(cpppath)) pgm=myenv.Program(pgm, files, LIBPATH=LIBPATH, LIBS=LIBS) install=myenv.Install("$SECURITY_TRANSFORMS_HOME/plugins_install/", pgm) diff --git a/libElfDep/test/edt.cpp b/libElfDep/test/edt.cpp index 12e1ea24f4bbab211770e2bebe8589d88c9886f6..77cff3e41f288bc2d67fe976f0beb5e8a449b2a0 100644 --- a/libElfDep/test/edt.cpp +++ b/libElfDep/test/edt.cpp @@ -41,23 +41,23 @@ int ElfDep_Tester_t::execute() // insert the instrumentation auto tmp=insert_loc; - (void)insertAssemblyBefore(getFileIR(),tmp," push rdi") ; - tmp= insertAssemblyAfter(getFileIR(), tmp," push rsi ") ; - tmp= insertAssemblyAfter(getFileIR(), tmp," push rdx") ; - tmp= insertAssemblyAfter(getFileIR(), tmp," push rcx ") ; - tmp= insertAssemblyAfter(getFileIR(), tmp," push r8 ") ; - tmp= insertAssemblyAfter(getFileIR(), tmp," push r9 ") ; - tmp= insertAssemblyAfter(getFileIR(), tmp," call 0 ", edpcb) ; - tmp= insertAssemblyAfter(getFileIR(), tmp," L1: mov rcx, [rel L1]"); + (void)insertAssemblyBefore(tmp," push rdi") ; + tmp= insertAssemblyAfter(tmp," push rsi ") ; + tmp= insertAssemblyAfter(tmp," push rdx") ; + tmp= insertAssemblyAfter(tmp," push rcx ") ; + tmp= insertAssemblyAfter(tmp," push r8 ") ; + tmp= insertAssemblyAfter(tmp," push r9 ") ; + tmp= insertAssemblyAfter(tmp," call 0 ", edpcb) ; + tmp= insertAssemblyAfter(tmp," L1: mov rcx, [rel L1]"); auto got_insn=tmp; - tmp= insertAssemblyAfter(getFileIR(), tmp," inc dword [rcx]"); - tmp= insertAssemblyAfter(getFileIR(), tmp," call 0", edpcb); - tmp= insertAssemblyAfter(getFileIR(), tmp," pop r9"); - tmp= insertAssemblyAfter(getFileIR(), tmp," pop r8"); - tmp= insertAssemblyAfter(getFileIR(), tmp," pop rcx"); - tmp= insertAssemblyAfter(getFileIR(), tmp," pop rdx"); - tmp= insertAssemblyAfter(getFileIR(), tmp," pop rsi"); - tmp= insertAssemblyAfter(getFileIR(), tmp," pop rdi"); + tmp= insertAssemblyAfter(tmp," inc dword [rcx]"); + tmp= insertAssemblyAfter(tmp," call 0", edpcb); + tmp= insertAssemblyAfter(tmp," pop r9"); + tmp= insertAssemblyAfter(tmp," pop r8"); + tmp= insertAssemblyAfter(tmp," pop rcx"); + tmp= insertAssemblyAfter(tmp," pop rdx"); + tmp= insertAssemblyAfter(tmp," pop rsi"); + tmp= insertAssemblyAfter(tmp," pop rdi"); // map the load to point at the GOT entry. diff --git a/libElfDep/test/test-elfdep.sh b/libElfDep/test/test-elfdep.sh new file mode 100755 index 0000000000000000000000000000000000000000..39ac850f75d329f0223c1f2e177835917710a161 --- /dev/null +++ b/libElfDep/test/test-elfdep.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +TMP_ORIG=/tmp/testelfdep.ls.orig.$$ +TMP_ORIG2=/tmp/testelfdep.ls.orig.2.$$ +TMP_ELFDEP=/tmp/testelfdep.ls.elfdep.$$ + +cleanup_files() +{ + rm /tmp/testelfdep.ls* >/dev/null 2>&1 +} + +cleanup() +{ + echo "************" + echo "test failed." + echo "************" + + cleanup_files + exit 1 +} + + +# make sure xforms are built +scons || cleanup + +$PSZ /bin/ls ./xxx -c move_globals=on -o move_globals:--elftables -c edt=on || cleanup + +/bin/ls /tmp > $TMP_ORIG || cleanup + +./xxx /tmp > $TMP_ELFDEP || cleanup + +echo "Verify external vars was overwritten" +grep "var = 0" $TMP_ELFDEP || cleanup +grep "var = 1" $TMP_ELFDEP || cleanup + +echo "Verify same output" +grep -v "var =" $TMP_ELFDEP > $TMP_ORIG2 +diff $TMP_ORIG2 $TMP_ELFDEP + +cleanup_files + +echo +echo "test passed." +echo diff --git a/libElfDep/test/testit.sh b/libElfDep/test/testit.sh deleted file mode 100755 index 051b6c0133b5fc47f9a961516065161c9533ccdb..0000000000000000000000000000000000000000 --- a/libElfDep/test/testit.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -cleanup() -{ - echo "************" - echo "test failed." - echo "************" - exit 1 -} - - -# make sure xforms are built -scons || cleanup - -$PSZ /bin/ls ./xxx -c move_globals=on -o move_globals:--elftables -c edt=on || cleanup - -/bin/ls /tmp || cleanup -./xxx /tmp || cleanup - -echo -echo "test passed." -echo diff --git a/libIRDB/include/core/IRDB_Objects.hpp b/libIRDB/include/core/IRDB_Objects.hpp index 41f59ecf4ec35ffa28361c65d44acd860fcffaff..859d9177eeb36652af749fd363181df31a362874 100644 --- a/libIRDB/include/core/IRDB_Objects.hpp +++ b/libIRDB/include/core/IRDB_Objects.hpp @@ -41,11 +41,13 @@ class IRDBObjects_t pqxxDB_t* resetDBInterface(); // Write back variants and file IRs. Does NOT commit changes. - int writeBackFileIR(const db_id_t file_id); + int writeBackFileIR(const db_id_t file_id, std::ostream *verbose_logging=nullptr); int writeBackVariant(const db_id_t variant_id); // Does NOT write back its files' IRs - int writeBackAll(void); // Returns -1 if any writes fail. + int writeBackAll(std::ostream* verbose_logging=nullptr); // Returns -1 if any writes fail. void deleteAll(void); + + void tidyIR(); private: std::unique_ptr<pqxxDB_t> pqxx_interface; diff --git a/libIRDB/include/core/file.hpp b/libIRDB/include/core/file.hpp index 5a33f8aa4c475183403d2192511c9365a1c6ddde..8c30dc9a74fcb2cd496f553e4045b768d6e96ec9 100644 --- a/libIRDB/include/core/file.hpp +++ b/libIRDB/include/core/file.hpp @@ -49,6 +49,7 @@ class File_t : public BaseObj_t void CreateTables(); int GetELFOID() const { return elfoid; }; + db_id_t GetFileID() const {return orig_fid; }; friend class FileIR_t; friend class Function_t; diff --git a/libIRDB/include/core/fileir.hpp b/libIRDB/include/core/fileir.hpp index 7668dabaf3094753e0235894546627d1896356ea..d0b70244b35a70b1d3a796fb1ac889633840012b 100644 --- a/libIRDB/include/core/fileir.hpp +++ b/libIRDB/include/core/fileir.hpp @@ -37,7 +37,7 @@ class FileIR_t : public BaseObj_t virtual ~FileIR_t(); // DB operations - void WriteToDB(); + void WriteToDB(std::ostream *verbose_logging=&std::cerr); // accessors and mutators in one FunctionSet_t& GetFunctions() { return funcs; } @@ -169,9 +169,9 @@ class FileIR_t : public BaseObj_t void ReadAllICFSFromDB(std::map<db_id_t,Instruction_t*> &addr2insnMap, std::map<Instruction_t*, db_id_t> &unresolvedICFS); - void CleanupICFS(); - void GarbageCollectICFS(); - void DedupICFS(); + void CleanupICFS(std::ostream *verbose_logging=&std::cerr); + void GarbageCollectICFS(std::ostream *verbose_logging=&std::cerr); + void DedupICFS(std::ostream *verbose_logging=&std::cerr); std::clock_t ReadIRDB_start; diff --git a/libIRDB/include/core/reloc.hpp b/libIRDB/include/core/reloc.hpp index 514018178d9bfdb9fcdd9339233247ce767fd145..0f352c19aba25aef65e9a278fe63af54b2a462fe 100644 --- a/libIRDB/include/core/reloc.hpp +++ b/libIRDB/include/core/reloc.hpp @@ -24,7 +24,7 @@ class Relocation_t : public BaseObj_t public: // create new item. - Relocation_t() : BaseObj_t(NULL), offset(0), wrt_obj(NULL) {} // new reloc w/no data + Relocation_t() : BaseObj_t(NULL), offset(0), wrt_obj(NULL), addend(0) {} // new reloc w/no data // a reloc read from the DB Relocation_t(db_id_t reloc_id, int _offset, std::string _type, BaseObj_t* p_wrt_obj=NULL, int32_t p_addend=0) : diff --git a/libIRDB/include/util/IBT_Provenance.hpp b/libIRDB/include/util/IBT_Provenance.hpp index 7969904380357052caa648b645e694d60c4d1a54..07f2b0bf7c4a62142e0d4560f1de6a9e041cf4ae 100644 --- a/libIRDB/include/util/IBT_Provenance.hpp +++ b/libIRDB/include/util/IBT_Provenance.hpp @@ -6,41 +6,33 @@ class IBTProvenance_t { private: - typedef std::map<const Instruction_t*, Provenance_t> ProvMap_t; + + // types + using InsnProvMap_t = std::map<const Instruction_t*, Provenance_t>; + + // data + InsnProvMap_t prov_map; + static Provenance_t empty; + + // methods + void Init() {}; + void AddProvs(const Provenance_t& p, const InstructionSet_t& after) ; public: IBTProvenance_t(const FileIR_t* f=NULL) {Init(); if(f) AddFile(f);} - virtual ~IBTProvenance_t() {;} - virtual void AddFile(const FileIR_t* ); + virtual ~IBTProvenance_t() {} // default destructor not OK for some reason? + void AddFile(const FileIR_t* ); - /*Provenance_t getProvenance(const Instruction_t* insn) const - { - return ((ProvMap_t) prov_map)[insn]; - }*/ - - Provenance_t& operator[] (const Instruction_t* i) - { - return prov_map[i]; - } const Provenance_t& operator[] (const Instruction_t* i) const { - ProvMap_t::const_iterator it=prov_map.find(i); + const auto it=prov_map.find(i); if (it!= prov_map.end()) return it->second; - static Provenance_t empty; return empty; } - protected: - virtual void Init() {}; - - private: - - virtual void AddProvs(const Instruction_t* before, const InstructionSet_t& after); - - ProvMap_t prov_map; }; #endif diff --git a/libIRDB/include/util/Provenance.hpp b/libIRDB/include/util/Provenance.hpp index c3d205055be6be4c9262f9c3d2d559861511b9f3..73a14041a09d185cf973c8d43e5a4d47bfe5ef5d 100644 --- a/libIRDB/include/util/Provenance.hpp +++ b/libIRDB/include/util/Provenance.hpp @@ -28,10 +28,16 @@ class Provenance_t prov.set((size_t) ProvType::IndCall); } + void addProv(const Provenance_t& other) + { + prov |= other.prov; + } + bool hasReturn() const { return prov.test((size_t) ProvType::Ret); } + bool hasIndirectJump() const { diff --git a/libIRDB/include/utils.hpp b/libIRDB/include/utils.hpp index 224f26a3d06de28cc0af03bed8ce9993ed7389d9..6f4a9046e6a03b03bbdc2eb0d320ae03434ba2e2 100644 --- a/libIRDB/include/utils.hpp +++ b/libIRDB/include/utils.hpp @@ -120,6 +120,9 @@ inline Funct for_randomOrder_each(const IterType &b, const IterType & e, const F return callback; } +int command_to_stream(const std::string& command, std::ostream& stream); + + #endif diff --git a/libIRDB/src/core/IRDB_Objects.cpp b/libIRDB/src/core/IRDB_Objects.cpp index 17ea07d06eb9da606dd3038b6525043b5680ea49..34c05f812b444211e05a0564381411a9888845fb 100644 --- a/libIRDB/src/core/IRDB_Objects.cpp +++ b/libIRDB/src/core/IRDB_Objects.cpp @@ -49,7 +49,7 @@ FileIR_t* IRDBObjects_t::addFileIR(const db_id_t variant_id, const db_id_t file_ } } -int IRDBObjects_t::writeBackFileIR(const db_id_t file_id) +int IRDBObjects_t::writeBackFileIR(const db_id_t file_id, ostream *verbose_logging) { const auto it = file_IR_map.find(file_id); @@ -65,7 +65,7 @@ int IRDBObjects_t::writeBackFileIR(const db_id_t file_id) // make sure static variable is set in the calling module -- IMPORTANT const auto & the_fileIR = (it->second).fileIR; the_fileIR->SetArchitecture(); - the_fileIR->WriteToDB(); + the_fileIR->WriteToDB(verbose_logging); return 0; } catch (DatabaseError_t pnide) @@ -180,14 +180,14 @@ void IRDBObjects_t::deleteVariant(const db_id_t variant_id) variant_map.erase(variant_id); } -int IRDBObjects_t::writeBackAll(void) +int IRDBObjects_t::writeBackAll(ostream *verbose_logging) { int ret_status = 0; // Write back FileIRs for(auto &file_pair : file_IR_map) { - const int result = IRDBObjects_t::writeBackFileIR((file_pair.second.file)->GetBaseID()); + const int result = IRDBObjects_t::writeBackFileIR((file_pair.second.file)->GetBaseID(), verbose_logging); if(result != 0) { ret_status = -1; @@ -211,7 +211,7 @@ void IRDBObjects_t::deleteAll(void) // Delete Variants (also deletes all files) for( auto &variant_pair : variant_map) { - IRDBObjects_t::deleteVariant((variant_pair.second)->GetBaseID()); + IRDBObjects_t::deleteVariant((variant_pair.second)->GetBaseID()); } } @@ -227,3 +227,15 @@ pqxxDB_t* IRDBObjects_t::resetDBInterface() return pqxx_interface.get(); } + +void IRDBObjects_t::tidyIR(void) +{ + // Delete Variants (also deletes all files) + for( auto &variant_pair : file_IR_map) + { + const auto &file_ir_info = variant_pair.second; + auto fileIR=file_ir_info.fileIR.get(); + fileIR->AssembleRegistry(); + fileIR->SetBaseIDS(); + } +} diff --git a/libIRDB/src/core/address.cpp b/libIRDB/src/core/address.cpp index ea1cfe71e464c4dc51314cc29bb7ead62d6798d8..f06a0783a0aff1a79f45fa3386bf907a1171a6f2 100644 --- a/libIRDB/src/core/address.cpp +++ b/libIRDB/src/core/address.cpp @@ -22,6 +22,7 @@ #include <all.hpp> #include <utils.hpp> #include <stdlib.h> + using namespace libIRDB; using namespace std; diff --git a/libIRDB/src/core/decode_cs.cpp b/libIRDB/src/core/decode_cs.cpp index 72230e43fdff76df0b85d822cf2eb148f9f9bce2..ec824293540cf94c45ef18ac51ce895a61167310 100644 --- a/libIRDB/src/core/decode_cs.cpp +++ b/libIRDB/src/core/decode_cs.cpp @@ -596,7 +596,7 @@ virtual_offset_t DecodedInstructionCapstone_t::getMemoryDisplacementOffset(const //const auto encoding_size=t.getMemoryDisplacementEncodingSize(); //const auto x86 = &(the_insn->detail->x86); - const auto imm_count = cs_op_count(cs_handle->getHandle(), the_insn, X86_OP_IMM); + const auto imm_count = cs_op_count(cs_handle->getHandle(), the_insn, X86_OP_IMM); const auto disp_size=t.getMemoryDisplacementEncodingSize(); const auto imm=getImmediate(); const auto disp=t.getMemoryDisplacement(); @@ -619,8 +619,14 @@ virtual_offset_t DecodedInstructionCapstone_t::getMemoryDisplacementOffset(const const auto possible_imm_sizes= string(the_insn->mnemonic)=="movabs" ? set<int>({1,2,4,8}) : set<int>({1,2,4}); - for(const auto imm_size : possible_imm_sizes) + // Reverse iterate to find the maximum size value-match possible. + // E.g. with a mov [rbx*8 + 0x00000000],0x00000000 instruction, it would be easy to match + // a 1-byte immediate value of 0 and a 4-byte displacement value of zero, while + // the desired behavior is to match two 4-byte values of zero when searching + // for the actual start offsets of the displacement and immediate fields. + for (auto imm_size_iter = possible_imm_sizes.crbegin(); imm_size_iter != possible_imm_sizes.crend(); ++imm_size_iter) { + const auto imm_size = (*imm_size_iter); if(the_insn->size < disp_size + imm_size) continue; diff --git a/libIRDB/src/core/file.cpp b/libIRDB/src/core/file.cpp index b7560f892ea421811aeafb8bbfb2b78c37b0f98b..7aa10476f6a13748430cde33781069b76d51e6f4 100644 --- a/libIRDB/src/core/file.cpp +++ b/libIRDB/src/core/file.cpp @@ -26,12 +26,11 @@ #include <fstream> #include <iostream> +#include <utils.hpp> + using namespace libIRDB; using namespace std; -static void ignore_result(int /* res */) { } - - File_t::File_t(const db_id_t &myfile_id, const db_id_t &my_orig_fid, const std::string &myurl, const std::string &myhash, const std::string &myarch, const int &myoid, @@ -82,7 +81,8 @@ void File_t::CreateTables() ehcss_table_name+" "+ tmpfile; - ignore_result(system(command.c_str())); + // ignore_result(system(command.c_str())); + command_to_stream(command,cout); std::ifstream t(tmpfile.c_str()); diff --git a/libIRDB/src/core/fileir.cpp b/libIRDB/src/core/fileir.cpp index 7e93e6562928d909c63d44fe917b28702cf71997..59ec13bcf49ea7734f7ec3b913527b09373e90a2 100644 --- a/libIRDB/src/core/fileir.cpp +++ b/libIRDB/src/core/fileir.cpp @@ -28,19 +28,40 @@ #include <stdlib.h> #include <sys/wait.h> #include <iomanip> -//#include <bea_deprecated.hpp> - - using namespace libIRDB; using namespace std; #define SCOOP_CHUNK_SIZE (10*1024*1024) /* 10 mb */ +#define ALLOF(a) begin(a),end(a) #undef EIP +int command_to_stream(const string& command, ostream& stream) +{ + auto redirect_command=command+" 2>&1 "; + auto buffer=array<char,128>(); + + std::cout << "Issuing subcommand: "<< command << std::endl; + auto pipe = popen(redirect_command.c_str(), "r"); + if (!pipe) + { + stream << "Couldn't start command:"<< strerror(errno) << endl; + return 1; + } + while (fgets(buffer.data(), 128, pipe) != NULL) + { + stream<<buffer.data(); + } + auto returnCode = pclose(pipe); + if(returnCode==-1) + stream << "Could not close pipe: "<< strerror(errno) << endl; + + std::cout << "Return code = "<<returnCode << std::endl; + return returnCode; +} static void UpdateEntryPoints( const std::map<db_id_t,Instruction_t*> &insnMap, @@ -191,14 +212,8 @@ void FileIR_t::AssembleRegistry() string binaryOutputFile = "tmp.bin"; string command = "rm -f " + assemblyFile + " " + binaryOutputFile; - int rt = system(command.c_str()); + auto actual_exit = command_to_stream(command, cout); // system(command.c_str()); - int actual_exit = -1; - //int actual_signal = -1; - - if (WIFEXITED(rt)) actual_exit = WEXITSTATUS(rt); - //else actual_signal = WTERMSIG(rt); - assert(actual_exit == 0); ofstream asmFile; @@ -218,14 +233,7 @@ void FileIR_t::AssembleRegistry() asmFile.close(); command = string("nasm ") + assemblyFile + string(" -o ") + binaryOutputFile; - rt = system(command.c_str()); - - actual_exit = -1; - //actual_signal = -1; - - if (WIFEXITED(rt)) actual_exit = WEXITSTATUS(rt); - //else actual_signal = WTERMSIG(rt); - + actual_exit = command_to_stream(command,cout); // system(command.c_str()); assert(actual_exit == 0); @@ -281,7 +289,7 @@ void FileIR_t::AssembleRegistry() } instr->SetDataBits(rawBits); -// cerr << "doing instruction:" << ((Instruction_t*)instr)->getDisassembly() << " comment: " << ((Instruction_t*)instr)->GetComment() << endl; +// *verbose_logging << "doing instruction:" << ((Instruction_t*)instr)->getDisassembly() << " comment: " << ((Instruction_t*)instr)->GetComment() << endl; reg_val++; } @@ -709,7 +717,7 @@ void FileIR_t::ReadRelocsFromDB } -void FileIR_t::WriteToDB() +void FileIR_t::WriteToDB(ostream *verbose_logging) { // const auto WriteIRDB_start = clock(); @@ -722,7 +730,7 @@ void FileIR_t::WriteToDB() /* assign each item a unique ID */ SetBaseIDS(); - CleanupICFS(); + CleanupICFS(verbose_logging); db_id_t j=-1; @@ -797,7 +805,8 @@ void FileIR_t::WriteToDB() // in the IRDB, or have an associated "old" instruction. // without these bits of information, the new instruction can't possibly execute correctly. // and we won't have the information necessary to emit spri. - cerr << "NULL fallthrough: offending instruction:" << ((Instruction_t*)insnp)->getDisassembly() << " comment: " << ((Instruction_t*)insnp)->GetComment() << endl; + + *verbose_logging << "NULL fallthrough: offending instruction:" << ((Instruction_t*)insnp)->getDisassembly() << " comment: " << ((Instruction_t*)insnp)->GetComment() << endl; assert(0); abort(); } @@ -816,7 +825,7 @@ void FileIR_t::WriteToDB() // in the IRDB, or have an associated "old" instruction. // without these bits of information, the new instruction can't possibly execute correctly. // and we won't have the information necessary to emit spri. - cerr << "Call must have a target; offending instruction:" << ((Instruction_t*)insnp)->getDisassembly() << " comment: " << ((Instruction_t*)insnp)->GetComment() << endl; + *verbose_logging << "Call must have a target; offending instruction:" << ((Instruction_t*)insnp)->getDisassembly() << " comment: " << ((Instruction_t*)insnp)->GetComment() << endl; assert(0); abort(); } @@ -1313,53 +1322,21 @@ void FileIR_t::ReadAllICFSFromDB(std::map<db_id_t,Instruction_t*> &addr2instMap, } } -void FileIR_t::GarbageCollectICFS() +void FileIR_t::GarbageCollectICFS(ostream* verbose_logging) { - std::set<ICFS_t*> used_icfs; - - for(set<Instruction_t*>::const_iterator it=this->GetInstructions().begin(); - it!=this->GetInstructions().end(); - ++it) - { - Instruction_t* instr=*it; - if(instr && instr->GetIBTargets()) - { - used_icfs.insert(instr->GetIBTargets()); - } - } - -/* - int unused_icfs = this->GetAllICFS().size() - used_icfs.size(); - if (unused_icfs > 0) - { - cerr << "FileIR_t::GarbageCollectICFS(): WARNING: " << dec << unused_icfs << " unused ICFS found. "; - cerr << "Deleting before committing to IRDB" << endl; - } -*/ - - ICFSSet_t to_erase; - for(ICFSSet_t::const_iterator it=this->GetAllICFS().begin(); - it != this->GetAllICFS().end(); - ++it) - { - ICFS_t* icfs = *it; - if (used_icfs.count(icfs) == 0) - { - to_erase.insert(icfs); - } - } - - for(ICFSSet_t::const_iterator it=to_erase.begin(); - it != to_erase.end(); - ++it) - { - ICFS_t* icfs = *it; - this->GetAllICFS().erase(icfs); - } - + auto used_icfs= ICFSSet_t(); + // get the IBTarget of each instruction into used_icfs + transform( ALLOF(insns), inserter(used_icfs, begin(used_icfs)), + [](const Instruction_t* insn) -> ICFS_t* { return insn->GetIBTargets(); } + ); + // we likely inserted null into the set, which we just will remove as a special ase. + used_icfs.erase(nullptr); + + // update the list to include only the used ones. + icfs_set=used_icfs; } -void FileIR_t::DedupICFS() +void FileIR_t::DedupICFS(ostream *verbose_logging) { std::set<ICFS_t> unique_icfs; @@ -1380,8 +1357,8 @@ void FileIR_t::DedupICFS() if (duplicates.size() > 0) { - cerr << "FileIR_t::DedupICFS(): WARNING: " << dec << duplicates.size() << " duplicate ICFS out of " << all_icfs.size() << " total ICFS"; - cerr << ". De-duplicating before committing to IRDB" << endl; + *verbose_logging << "FileIR_t::DedupICFS(): WARNING: " << dec << duplicates.size() << " duplicate ICFS out of " << all_icfs.size() << " total ICFS"; + *verbose_logging << ". De-duplicating before committing to IRDB" << endl; } // remove duplicate icfs @@ -1404,7 +1381,7 @@ void FileIR_t::DedupICFS() if (*icfs == *t) { duplicate_map[icfs] = t; - cerr << "FileIR_t::DedupICFS(): remap: icfs id " << icfs->GetBaseID() << " --> icsf id " << t->GetBaseID() << endl; + *verbose_logging << "FileIR_t::DedupICFS(): remap: icfs id " << icfs->GetBaseID() << " --> icsf id " << t->GetBaseID() << endl; break; } } @@ -1423,10 +1400,10 @@ void FileIR_t::DedupICFS() } } -void FileIR_t::CleanupICFS() +void FileIR_t::CleanupICFS(ostream *verbose_logging) { - GarbageCollectICFS(); - DedupICFS(); + GarbageCollectICFS(verbose_logging); + DedupICFS(verbose_logging); } std::map<db_id_t,DataScoop_t*> FileIR_t::ReadScoopsFromDB diff --git a/libIRDB/src/core/instruction.cpp b/libIRDB/src/core/instruction.cpp index 27a990218f4f83a65ef03faa8d418a325497700c..7a876821660082bf3116ac9b57082fdf200c7ade 100644 --- a/libIRDB/src/core/instruction.cpp +++ b/libIRDB/src/core/instruction.cpp @@ -24,15 +24,13 @@ #include <fstream> #include <sstream> #include <iomanip> +#include <utils.hpp> #undef EIP using namespace libIRDB; using namespace std; -static void ignore_result(int /* res */) { } - - Instruction_t::Instruction_t() : BaseObj_t(NULL), my_address(NULL), @@ -115,9 +113,9 @@ bool Instruction_t::Assemble(string assembly) //remove any preexisting assembly or nasm generated files string command = "rm -f " + assemblyFile; - ignore_result(system(command.c_str())); + command_to_stream(command,cout); command = "rm -f "+assemblyFile+".bin"; - ignore_result(system(command.c_str())); + command_to_stream(command,cout); ofstream asmFile; asmFile.open(assemblyFile.c_str()); @@ -132,7 +130,7 @@ bool Instruction_t::Assemble(string assembly) asmFile.close(); command = "nasm " + assemblyFile + " -o "+ binaryOutputFile; - ignore_result(system(command.c_str())); + command_to_stream(command,cout); ifstream binreader; unsigned int filesize; diff --git a/libIRDB/src/core/operand_cs.cpp b/libIRDB/src/core/operand_cs.cpp index c5a863494f171a80be903b57960e19486377d5ae..f74aff7b91e04bac2020ac16bd3eb16d37e6f74a 100644 --- a/libIRDB/src/core/operand_cs.cpp +++ b/libIRDB/src/core/operand_cs.cpp @@ -557,14 +557,53 @@ uint32_t DecodedOperandCapstone_t::getSegmentRegister() const set<string> write_only_operand_mnemonics= { + "seta", + "setae", + "setb", + "setbe", + "setc", "sete", + "setg", + "setge", + "setl", + "setle", + "setna", + "setnae", + "setnb", + "setnbe", + "setnc", "setne", + "setng", + "setnge", + "setnl", + "setnle", + "setno", + "setnp", + "setns", + "setnz", + "seto", + "setp", + "setpe", + "setpo", + "sets", + "setz", "fst", "fstp", "fist", "fistp" }; +set<string> write_first_operand_mnemonics= + { + "movups", + "movd", + "rol", + "movdqa", + "ror", + "movdqu", + "movq" + }; + set<string> read_only_operand_mnemonics= { // specal read-only op cases, rest are some form of compare. @@ -728,21 +767,37 @@ bool DecodedOperandCapstone_t::isWritten() const { const auto d=DecodedInstructionCapstone_t(my_insn); const auto d_mnemonic=d.getMnemonic(); + + // special case check: all operands are reads const auto room_it=read_only_operand_mnemonics.find(d_mnemonic); const auto in_room=(room_it!=end(read_only_operand_mnemonics)); if(in_room) return false; + + // special case check: all operands are writes const auto woom_it=write_only_operand_mnemonics.find(d_mnemonic); const auto in_woom=(woom_it!=end(write_only_operand_mnemonics)); if(in_woom) return true; + // special case check: first operand is writes + if(op_num==0) + { + const auto wfom_it=write_first_operand_mnemonics.find(d_mnemonic); + const auto in_wfom=(wfom_it!=end(write_first_operand_mnemonics)); + if(in_wfom) + return true; + } + + // special case of imul // imul has a 1-argument form which uses all it's operands if(d_mnemonic=="imul" && !d.hasOperand(1)) return false; + + + // default: use capstone's advice. const auto the_insn=static_cast<cs_insn*>(my_insn.get()); const auto &op = (the_insn->detail->x86.operands[op_num]); - return (op.access & CS_AC_WRITE)!=0; /* if(op_num!=0) diff --git a/libIRDB/src/util/IBT_Provenance.cpp b/libIRDB/src/util/IBT_Provenance.cpp index 429455f1d766abb574e5cae4bc84a79cfb0c87b1..9956da0bd03697c51417395a5ad79f3ba4ef9a41 100644 --- a/libIRDB/src/util/IBT_Provenance.cpp +++ b/libIRDB/src/util/IBT_Provenance.cpp @@ -8,47 +8,57 @@ using namespace libIRDB; using namespace std; +Provenance_t IBTProvenance_t::empty; -void IBTProvenance_t::AddProvs(const Instruction_t* before, const InstructionSet_t& afterset) + +void IBTProvenance_t::AddFile(const FileIR_t* firp) { - // Determine type of IB - const auto IndBranchAsm=DecodedInstruction_t(before); - - bool isIndJmp = IndBranchAsm.isUnconditionalBranch() && !IndBranchAsm.getOperand(0).isConstant(); - bool isIndCall = IndBranchAsm.isCall() && !IndBranchAsm.getOperand(0).isConstant(); - bool isRet = IndBranchAsm.isReturn(); - - // Set the provenance info of targets depending on the type of IB - for(auto insn : afterset) + + using ICFSProvMap_t = std::map<const ICFS_t*, Provenance_t>; + + auto icfs_prov_map = ICFSProvMap_t(); + + // collect before info for each icfs into icfs_prov_map + for(auto insn : firp->GetInstructions()) { + const auto &ibTargets=insn->GetIBTargets(); + if(!ibTargets) + continue; + + auto this_prov=Provenance_t(); + const auto IndBranchAsm=DecodedInstruction_t(insn); + const auto isIndJmp = IndBranchAsm.isUnconditionalBranch() && !IndBranchAsm.getOperand(0).isConstant(); + const auto isIndCall = IndBranchAsm.isCall() && !IndBranchAsm.getOperand(0).isConstant(); + const auto isRet = IndBranchAsm.isReturn(); + if(isIndJmp) { - prov_map[insn].addIndirectJump(); + this_prov.addIndirectJump(); } else if(isIndCall) { - prov_map[insn].addIndirectCall(); + this_prov.addIndirectCall(); } else if(isRet) { - prov_map[insn].addReturn(); + this_prov.addReturn(); } else { assert(0); } + + icfs_prov_map[ibTargets].addProv(this_prov); } -} -void IBTProvenance_t::AddFile(const FileIR_t* firp2) -{ - FileIR_t* firp=(FileIR_t*)firp2; // discarding const qualifier because we know we won't change the set - firp->AssembleRegistry(); // Takes time but I'm paranoid - for(auto insn : firp->GetInstructions()) + // deploy info for each target of the icfs + for(const auto &icfs : firp->GetAllICFS()) { - // If insn is an IB, add the type of IB to the targets' provenance info - if(insn->GetIBTargets()) - AddProvs(insn, *insn->GetIBTargets()); + assert(icfs); + for(const auto &insn : *icfs) + { + prov_map[insn].addProv(icfs_prov_map[icfs]); + } } } diff --git a/libIRDB/test/fill_in_cfg.cpp b/libIRDB/test/fill_in_cfg.cpp index 901638f121993e08248d05479da016e553e971b4..905f3bd5301070599a99ed3977cccd902ec8a15c 100644 --- a/libIRDB/test/fill_in_cfg.cpp +++ b/libIRDB/test/fill_in_cfg.cpp @@ -106,7 +106,10 @@ void PopulateCFG::set_fallthrough /* set the target for this insn */ if(fallthrough_insn!=0) + { + fallthroughs_set++; insn->SetFallthrough(fallthrough_insn); + } else missed_instructions.insert(pair<db_id_t,virtual_offset_t>(insn->GetAddress()->GetFileID(),virtual_offset)); } @@ -180,7 +183,10 @@ void PopulateCFG::set_target /* set the target for this insn */ if(target_insn!=0) + { + targets_set++; insn->SetTarget(target_insn); + } else missed_instructions.insert( pair<db_id_t,virtual_offset_t>(insn->GetAddress()->GetFileID(),virtual_offset)); @@ -510,6 +516,7 @@ void PopulateCFG::fill_in_scoops(FileIR_t *firp) bool is_relro=is_in_relro_segment(secndx); DataScoop_t *newscoop=new DataScoop_t(max_base_id++, name, startaddr, endaddr, NULL, permissions, is_relro, the_contents); + scoops_detected++; assert(newscoop); firp->GetDataScoops().insert(newscoop); @@ -650,8 +657,20 @@ int PopulateCFG::executeStep(IRDBObjects_t *const irdb_objects) cerr<<"Unexpected error"<<endl; return -1; } + + cout<<"#ATTRIBUTE targets_set="<<targets_set<<endl; + cout<<"#ATTRIBUTE fallthroughs_set="<<fallthroughs_set<<endl; + cout<<"#ATTRIBUTE scoops_detected="<<scoops_detected<<endl; + + if(getenv("SELF_VALIDATE")) + { + assert(targets_set > 10); + assert(fallthroughs_set > 100); + assert(scoops_detected > 5 ); + } + - return 0; + return 0; } diff --git a/libIRDB/test/fill_in_cfg.hpp b/libIRDB/test/fill_in_cfg.hpp index 4925bafda9045fc015ce8ca053ad0bf9bcb0090e..5beb37871fbc9ed11100e160271c39ac49f5e79b 100644 --- a/libIRDB/test/fill_in_cfg.hpp +++ b/libIRDB/test/fill_in_cfg.hpp @@ -16,12 +16,16 @@ class PopulateCFG : public libIRDB::Transform_SDK::TransformStep_t variant_id(p_variant_id), fix_landing_pads(p_fix_landing_pads) { - odd_target_count = 0; - bad_target_count = 0; - bad_fallthrough_count = 0; - failed_target_count = 0U; - - elfiop = std::unique_ptr<EXEIO::exeio>(nullptr); + odd_target_count = 0; + bad_target_count = 0; + bad_fallthrough_count = 0; + failed_target_count = 0U; + + targets_set=0; + fallthroughs_set=0; + scoops_detected=0; + + elfiop = std::unique_ptr<EXEIO::exeio>(nullptr); } ~PopulateCFG(void) override @@ -74,6 +78,10 @@ class PopulateCFG : public libIRDB::Transform_SDK::TransformStep_t int bad_target_count; int bad_fallthrough_count; unsigned int failed_target_count; + + size_t targets_set=0; + size_t fallthroughs_set=0; + size_t scoops_detected=0; // non-optional libIRDB::db_id_t variant_id; diff --git a/libIRDB/test/fill_in_indtargs.cpp b/libIRDB/test/fill_in_indtargs.cpp index e796cb2690325bae0b2527458bc2b1b0bbff7ce2..6281d476858d81a6c5c323d4052e1f1cc3d2a708 100644 --- a/libIRDB/test/fill_in_indtargs.cpp +++ b/libIRDB/test/fill_in_indtargs.cpp @@ -66,10 +66,6 @@ public: * class variables */ -// -// record the ICFS for each branch, these can come from switch tables -// -map<Instruction_t*, ICFS_t> icfs_maps; // the bounds of the executable sections in the pgm. set< pair <virtual_offset_t,virtual_offset_t> > bounds; @@ -655,11 +651,12 @@ I7: 08069391 <_gedit_app_ready+0x91> ret assert(disasm.getOperand(0).isRegister()); const auto I5_reg=disasm.getOperand(0).getString(); auto jmp_reg=string(); + auto add_reg=string(); // has to be a jump to a register now // backup and find the instruction that's an add before I8 - if(!backup_until(string()+"add "+I5_reg, I4, I5, I5_reg)) + if(!backup_until(string()+"add "+I5_reg+"|lea "+I5_reg, I4, I5, I5_reg)) { auto mov_insn=static_cast<Instruction_t*>(nullptr); if(!backup_until(string()+"mov "+I5_reg, mov_insn, I5, I5_reg)) @@ -673,15 +670,53 @@ I7: 08069391 <_gedit_app_ready+0x91> ret jmp_reg=mov_reg; } else - jmp_reg=I5_reg; + { + const auto d4=DecodedInstruction_t(I4); + if(d4.getMnemonic()=="lea") + { + const auto base_reg=d4.getOperand(1).getBaseRegister(); + switch(base_reg) + { + case 0/*REG0*/: jmp_reg="eax"; break; + case 1/*REG1*/: jmp_reg="ecx"; break; + case 2/*REG2*/: jmp_reg="edx"; break; + case 3/*REG3*/: jmp_reg="ebx"; break; + case 4/*REG4*/: jmp_reg="esp"; break; + case 5/*REG5*/: jmp_reg="ebp"; break; + case 6/*REG6*/: jmp_reg="esi"; break; + case 7/*REG7*/: jmp_reg="edi"; break; + default: + // no base register; + return; + } + const auto index_reg=d4.getOperand(1).getBaseRegister(); + switch(index_reg) + { + case 0/*REG0*/: add_reg="eax"; break; + case 1/*REG1*/: add_reg="ecx"; break; + case 2/*REG2*/: add_reg="edx"; break; + case 3/*REG3*/: add_reg="ebx"; break; + case 4/*REG4*/: add_reg="esp"; break; + case 5/*REG5*/: add_reg="ebp"; break; + case 6/*REG6*/: add_reg="esi"; break; + case 7/*REG7*/: add_reg="edi"; break; + default: + // no base register; + return; + } + } + else + { + jmp_reg=I5_reg; + if(!d4.getOperand(1).isRegister()) + return; + add_reg=d4.getOperand(1).getString(); + } + } - assert(jmp_reg!="" && I4!=nullptr); + assert(jmp_reg!="" && add_reg!="" && I4!=nullptr); - const auto d4=DecodedInstruction_t(I4); - if(!d4.getOperand(1).isRegister()) - return; - const auto add_reg=d4.getOperand(1).getString(); // backup and find the instruction that's an movsxd before I7 if(!backup_until(string()+"(mov "+jmp_reg+"|mov "+add_reg+")", I3, I4)) return; @@ -3019,6 +3054,13 @@ int executeStep(IRDBObjects_t *const irdb_objects) cerr<<"Unexpected error"<<endl; return -1; } + + assert(getenv("SELF_VALIDATE")==nullptr || bounds.size() > 3 ); + assert(getenv("SELF_VALIDATE")==nullptr || targets.size() > 100 ); + assert(getenv("SELF_VALIDATE")==nullptr || ranges.size() > 1 ); + assert(getenv("SELF_VALIDATE")==nullptr || preds.size() > 100 ); + assert(getenv("SELF_VALIDATE")==nullptr || lookupInstructionMap.size() > 100 ); + return 0; } diff --git a/libIRDB/test/fix_calls.cpp b/libIRDB/test/fix_calls.cpp index 84197c3410c857b226f96d4ea2e9c1b3a60308db..a2bb674d280e9f89db41195c88ec1470b57b5867 100644 --- a/libIRDB/test/fix_calls.cpp +++ b/libIRDB/test/fix_calls.cpp @@ -78,15 +78,20 @@ using RangeSet_t = std::set<Range_t, Range_tCompare>; RangeSet_t eh_frame_ranges; -long long no_target_insn=0; -long long no_fallthrough_insn=0; -long long target_not_in_function=0; -long long call_to_not_entry=0; -long long thunk_check=0; -long long found_pattern=0; -long long in_ehframe=0; -long long no_fix_for_ib=0; -long long no_fix_for_safefn=0; +size_t no_target_insn=0; +size_t no_fallthrough_insn=0; +size_t target_not_in_function=0; +size_t call_to_not_entry=0; +size_t thunk_check=0; +size_t found_pattern=0; +size_t in_ehframe=0; +size_t no_fix_for_ib=0; +size_t no_fix_for_safefn=0; +size_t other_fixes=0; +size_t fixed_calls=0; +size_t not_fixed_calls=0; +size_t not_calls=0; + bool opt_fix_icalls = false; bool opt_fix_safefn = true; @@ -724,7 +729,7 @@ void mark_as_unpinned_ibt(FileIR_t* firp, Instruction_t* ret_point) // fix_all_calls - convert calls to push/jump pairs in the IR. if fix_all is true, all calls are converted, // else we attempt to detect the calls it is safe to convert. // -void fix_all_calls(FileIR_t* firp, bool print_stats, bool fix_all) +void fix_all_calls(FileIR_t* firp, bool fix_all) { set<Instruction_t*,insn_less<Instruction_t*> > sorted_insns; @@ -739,7 +744,6 @@ void fix_all_calls(FileIR_t* firp, bool print_stats, bool fix_all) sorted_insns.insert(insn); } - long long fixed_calls=0, not_fixed_calls=0, not_calls=0; for( set<Instruction_t*,insn_less<Instruction_t*> >::const_iterator it=sorted_insns.begin(); @@ -749,7 +753,7 @@ void fix_all_calls(FileIR_t* firp, bool print_stats, bool fix_all) { Instruction_t* insn=*it; - if(getenv("STOP_FIX_CALLS_AT") && fixed_calls>=atoi(getenv("STOP_FIX_CALLS_AT"))) + if(getenv("STOP_FIX_CALLS_AT") && fixed_calls>=(size_t)atoi(getenv("STOP_FIX_CALLS_AT"))) break; if(is_call(insn)) @@ -761,7 +765,7 @@ void fix_all_calls(FileIR_t* firp, bool print_stats, bool fix_all) } // we've been asked to fix all calls for funsies/cfi // (and a bit about debugging fix-calls that's not important for anyone but jdh. - else if ( fix_all || (getenv("FIX_CALL_LIMIT") && not_fixed_calls>=atoi(getenv("FIX_CALL_LIMIT")))) + else if ( fix_all || (getenv("FIX_CALL_LIMIT") && not_fixed_calls>=(size_t)atoi(getenv("FIX_CALL_LIMIT")))) { bool fix_me = true; if (!opt_fix_safefn && can_skip_safe_function(insn)) @@ -799,33 +803,21 @@ void fix_all_calls(FileIR_t* firp, bool print_stats, bool fix_all) } } - - if(print_stats) - { - cout << "# ATTRIBUTE fix_calls::fixed_calls="<<std::dec<<fixed_calls<<endl; - cout << "# ATTRIBUTE fix_calls::no_fix_needed_calls="<<std::dec<<not_fixed_calls<<endl; - cout << "# ATTRIBUTE fix_calls::other_instructions="<<std::dec<<not_calls<<endl; - cout << "# ATTRIBUTE fix_calls::fixed_pct="<<std::fixed<<(((float)fixed_calls)/((float)(not_fixed_calls+fixed_calls+not_calls)))*100.00<<"%"<<endl; - cout << "# ATTRIBUTE fix_calls::remaining_ratio="<<std::fixed<<((float)not_fixed_calls/((float)(not_fixed_calls+fixed_calls+not_calls)))*100.00<<"%"<<endl; - cout << "# ATTRIBUTE fix_calls::other_insts_ratio="<<std::fixed<<((float)not_calls/((float)(not_fixed_calls+fixed_calls+not_calls)))*100.00<<"%"<<endl; - cout << "# ATTRIBUTE fix_calls::no_target_insn="<<std::dec<< no_target_insn << endl; - cout << "# ATTRIBUTE fix_calls::no_fallthrough_insn="<<std::dec<< no_fallthrough_insn << endl; - cout << "# ATTRIBUTE fix_calls::target_not_in_function="<<std::dec<< target_not_in_function << endl; - cout << "# ATTRIBUTE fix_calls::call_to_not_entry="<<std::dec<< call_to_not_entry << endl; - cout << "# ATTRIBUTE fix_calls::thunk_check="<<std::dec<< thunk_check << endl; - cout << "# ATTRIBUTE fix_calls::found_pattern="<<std::dec<< found_pattern << endl; - cout << "# ATTRIBUTE fix_calls::in_ehframe="<<std::dec<< in_ehframe << endl; - cout << "# ATTRIBUTE fix_calls::no_fix_for_ib="<<std::dec<< no_fix_for_ib << endl; - cout << "# ATTRIBUTE fix_calls::no_fix_for_safefn="<<std::dec<< no_fix_for_safefn << endl; - no_target_insn=0; - no_fallthrough_insn=0; - target_not_in_function=0; - call_to_not_entry=0; - thunk_check=0; - found_pattern=0; - in_ehframe=0; - no_fix_for_ib=0; - } + cout << "# ATTRIBUTE fix_calls::fixed_calls="<<std::dec<<fixed_calls<<endl; + cout << "# ATTRIBUTE fix_calls::no_fix_needed_calls="<<std::dec<<not_fixed_calls<<endl; + cout << "# ATTRIBUTE fix_calls::other_instructions="<<std::dec<<not_calls<<endl; + cout << "# ATTRIBUTE fix_calls::fixed_pct="<<std::fixed<<(((float)fixed_calls)/((float)(not_fixed_calls+fixed_calls+not_calls)))*100.00<<"%"<<endl; + cout << "# ATTRIBUTE fix_calls::remaining_ratio="<<std::fixed<<((float)not_fixed_calls/((float)(not_fixed_calls+fixed_calls+not_calls)))*100.00<<"%"<<endl; + cout << "# ATTRIBUTE fix_calls::other_insts_ratio="<<std::fixed<<((float)not_calls/((float)(not_fixed_calls+fixed_calls+not_calls)))*100.00<<"%"<<endl; + cout << "# ATTRIBUTE fix_calls::no_target_insn="<<std::dec<< no_target_insn << endl; + cout << "# ATTRIBUTE fix_calls::no_fallthrough_insn="<<std::dec<< no_fallthrough_insn << endl; + cout << "# ATTRIBUTE fix_calls::target_not_in_function="<<std::dec<< target_not_in_function << endl; + cout << "# ATTRIBUTE fix_calls::call_to_not_entry="<<std::dec<< call_to_not_entry << endl; + cout << "# ATTRIBUTE fix_calls::thunk_check="<<std::dec<< thunk_check << endl; + cout << "# ATTRIBUTE fix_calls::found_pattern="<<std::dec<< found_pattern << endl; + cout << "# ATTRIBUTE fix_calls::in_ehframe="<<std::dec<< in_ehframe << endl; + cout << "# ATTRIBUTE fix_calls::no_fix_for_ib="<<std::dec<< no_fix_for_ib << endl; + cout << "# ATTRIBUTE fix_calls::no_fix_for_safefn="<<std::dec<< no_fix_for_safefn << endl; } @@ -908,6 +900,8 @@ void fix_other_pcrel(FileIR_t* firp, Instruction_t *insn, uintptr_t virt_offset) insn->GetRelocations().insert(reloc); firp->GetRelocations().insert(reloc); + other_fixes++; + disasm=DecodedInstruction_t(insn); if(getenv("VERBOSE_FIX_CALLS")) cout<<" Converted to: "<<disasm.getDisassembly() /*CompleteInstr*/<<endl; @@ -950,6 +944,7 @@ void fix_other_pcrel(FileIR_t* firp) fix_other_pcrel(firp,insn, insn->GetAddress()->GetVirtualOffset()); fix_safefr(firp,insn, insn->GetAddress()->GetVirtualOffset()); } + cout << "# ATTRIBUTE fix_calls::other_fixes="<<std::dec<<other_fixes<<endl; } // @@ -1063,7 +1058,7 @@ int executeStep(IRDBObjects_t *const irdb_objects) if(do_eh_frame) read_ehframe(firp, elfiop); - fix_all_calls(firp,true,fix_all); + fix_all_calls(firp,fix_all); fix_other_pcrel(firp); cout<<"Done!"<<endl; @@ -1081,6 +1076,9 @@ int executeStep(IRDBObjects_t *const irdb_objects) return -1; } + assert(getenv("SELF_VALIDATE")==nullptr || (fixed_calls + other_fixes) > 5); + assert(getenv("SELF_VALIDATE")==nullptr || fix_all || not_fixed_calls > 5); + return 0; } diff --git a/libMEDSannotation/src/MEDS_DeadRegAnnotation.cpp b/libMEDSannotation/src/MEDS_DeadRegAnnotation.cpp index 2c03d1dd20dcc39195241cc10fc9d441007cbc52..cf2c43872262206ecccfb830555792a331d9d95a 100644 --- a/libMEDSannotation/src/MEDS_DeadRegAnnotation.cpp +++ b/libMEDSannotation/src/MEDS_DeadRegAnnotation.cpp @@ -63,7 +63,9 @@ void MEDS_DeadRegAnnotation::parse() VirtualOffset vo(m_rawInputLine); m_virtualOffset = vo; - cout <<"Found deadreg annotation in: "<<m_rawInputLine<<endl; + if (getenv("DEADREGS_VERBOSE")) { + cout <<"Found deadreg annotation in: "<<m_rawInputLine<<endl; + } // ignore result of getRegisterSet method because // we don't need to parse the rest of the line. Register::readRegisterSet(m_rawInputLine.substr(pos+tofind.length()), regset); diff --git a/libMEDSannotation/src/MEDS_FuncPrototypeAnnotation.cpp b/libMEDSannotation/src/MEDS_FuncPrototypeAnnotation.cpp index 6276cd62c1d85b72d5388562a12828de05a44027..8c0ae2c41b1a093f4d8c826669c38ae491d9d755 100644 --- a/libMEDSannotation/src/MEDS_FuncPrototypeAnnotation.cpp +++ b/libMEDSannotation/src/MEDS_FuncPrototypeAnnotation.cpp @@ -92,9 +92,8 @@ void MEDS_FuncPrototypeAnnotation::parse() char *zarg = strstr(buf, arg); if (zarg) { - char tmp[maxbufsize]; int meds_type; - sscanf(tmp,"%*s %d %*s", &meds_type); + sscanf(zarg,"%*s %d", &meds_type); MEDS_Arg marg(meds_type); addArg(marg); } diff --git a/libcapstone b/libcapstone index a6b87b7bc31499b9ec538701b438f7a60e9dda38..9408c0de4f3a4b2a3cc1cac3f22219301c561400 160000 --- a/libcapstone +++ b/libcapstone @@ -1 +1 @@ -Subproject commit a6b87b7bc31499b9ec538701b438f7a60e9dda38 +Subproject commit 9408c0de4f3a4b2a3cc1cac3f22219301c561400 diff --git a/libtransform/src/Rewrite_Utility.cpp b/libtransform/src/Rewrite_Utility.cpp index 38d4bae729d8a9879ab70ac5c771b40004e9fcd6..6df1334214ea74d19a915aaf4a0ab2af7401f8ea 100644 --- a/libtransform/src/Rewrite_Utility.cpp +++ b/libtransform/src/Rewrite_Utility.cpp @@ -149,7 +149,7 @@ Instruction_t* addNewDatabits(FileIR_t* firp, Instruction_t *p_instr, string p_b if (p_instr) newinstr = allocateNewInstruction(firp,p_instr->GetAddress()->GetFileID(), p_instr->GetFunction()); else - newinstr = allocateNewInstruction(firp,BaseObj_t::NOT_IN_DATABASE, NULL); + newinstr = allocateNewInstruction(firp,firp->GetFile()->GetFileID(), NULL); newinstr->SetDataBits(p_bits); @@ -168,7 +168,7 @@ Instruction_t* addNewAssembly(FileIR_t* firp, Instruction_t *p_instr, string p_a if (p_instr) newinstr = allocateNewInstruction(firp,p_instr->GetAddress()->GetFileID(), p_instr->GetFunction()); else - newinstr = allocateNewInstruction(firp,BaseObj_t::NOT_IN_DATABASE, NULL); + newinstr = allocateNewInstruction(firp,firp->GetFile()->GetFileID(), NULL); firp->RegisterAssembly(newinstr, p_asm); @@ -227,6 +227,8 @@ Instruction_t* allocateNewInstruction(FileIR_t* virp, db_id_t p_fileID,Function_ instr->SetFunction(func); instr->SetAddress(a); + if(func) + func->GetInstructions().insert(instr); virp->GetInstructions().insert(instr); virp->GetAddresses().insert(a); diff --git a/plugins_install/p1transform.sh b/plugins_install/p1transform.sh deleted file mode 100755 index c288c305cf6b7d1998a8d8de75101233b00a9225..0000000000000000000000000000000000000000 --- a/plugins_install/p1transform.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -PN_TIMEOUT_VALUE=21600 - -varid=$1 -shift - -$PEASOUP_HOME/tools/do_p1transform.sh $varid a.ncexe a.ncexe.annot $PEASOUP_HOME/tools/bed.sh $PN_TIMEOUT_VALUE $* - diff --git a/tools/meds2pdb/meds2pdb.cpp b/tools/meds2pdb/meds2pdb.cpp index 8b56919daba79ca6e2cf3c267eccf201c0744f51..3eb03a6e14240be1f723371e3bf604766a7dfa24 100644 --- a/tools/meds2pdb/meds2pdb.cpp +++ b/tools/meds2pdb/meds2pdb.cpp @@ -96,6 +96,8 @@ void insert_instructions(int fileID, const vector<wahoo::Instruction*> &instruct // for each instruction: // populate instruction table + assert(getenv("SELF_VALIDATE")==nullptr || instructions.size() > 0 ); + pqxx::tablewriter W_addrs(txn,addressTable); for (auto i = 0U; i < instructions.size(); i ++ ) { @@ -188,6 +190,8 @@ void insert_functions(int fileID, const vector<wahoo::Function*> &functions ) work txn(conn); txn.exec("SET client_encoding='LATIN1';"); + assert(getenv("SELF_VALIDATE")==nullptr || functions.size() > 0 ); + // bulk insert of function information into the DB for (auto i = 0U; i < functions.size(); i += STRIDE) { @@ -233,6 +237,8 @@ void update_functions(int fileID, const vector<wahoo::Function*> &functions ) work txn(conn); txn.exec("SET client_encoding='LATIN1';"); + assert(getenv("SELF_VALIDATE")==nullptr || functions.size() > 0 ); + // bulk insert of function information into the DB string query; for (auto i = 0U; i < functions.size(); i += STRIDE ) diff --git a/tools/rida/rida.cpp b/tools/rida/rida.cpp index 9db875c238acc99f69ae56021eba22095b821f54..08f9a9b1d9c191971f036be757a74168404c0593 100644 --- a/tools/rida/rida.cpp +++ b/tools/rida/rida.cpp @@ -221,6 +221,9 @@ class CreateFunctions_t sccs.insert(RangeSet_t({Range_t(fde->getStartAddress(),fde->getEndAddress())})); cout<<hex; + if(getenv("SELF_VALIDATE")) + assert(fdes->size()>0); + for(const auto fde : *fdes) { if(verbose) @@ -267,6 +270,8 @@ class CreateFunctions_t void addSectionToSccs(const string &sec_name) { const auto sec=exeio.sections[sec_name]; + if(sec==nullptr) + return; const auto range=Range_t(sec->get_address(), sec->get_address()+sec->get_size()); const auto ranges=RangeSet_t({range}); sccs.insert(ranges); @@ -451,6 +456,8 @@ class CreateFunctions_t outfile<<hex<<"\t"<<min.first<<"\t"<<dec<<size<<"\tFUNC GLOBAL\t"<<funcNames[scc]<<" "<< usefp << endl; doBelongTos(scc); } + if(getenv("SELF_VALIDATE")) + assert(sccs.size()>0); } string getUseFp(const RangeSet_t scc) diff --git a/tools/selective_cfi/scfi_driver.cpp b/tools/selective_cfi/scfi_driver.cpp index cd5374c12928d4508f0298dafca8899a99064610..0bd71b6d0e37b7f2434c1cec006d175564909b37 100644 --- a/tools/selective_cfi/scfi_driver.cpp +++ b/tools/selective_cfi/scfi_driver.cpp @@ -218,6 +218,7 @@ int main(int argc, char **argv) cout<<"selective_cfi.exe started\n"; bool one_success = false; + bool seen_failures = false; for(set<File_t*>::iterator it=pidp->GetFiles().begin(); it!=pidp->GetFiles().end(); ++it) @@ -245,15 +246,18 @@ int main(int argc, char **argv) } else { + seen_failures = true; cout<<"Skipping (no changes) "<<this_file->GetURL()<<endl; } } catch (DatabaseError_t pnide) { + seen_failures = true; cerr << programName << ": Unexpected database error: " << pnide << "file url: " << this_file->GetURL() << endl; } catch (...) { + seen_failures = true; cerr << programName << ": Unexpected error file url: " << this_file->GetURL() << endl; } } // end file iterator @@ -265,6 +269,13 @@ int main(int argc, char **argv) pqxx_interface.Commit(); } - return 0; + if(seen_failures) + { + return 1; + } + else + { + return 0; + } } diff --git a/tools/selective_cfi/scfi_instr.cpp b/tools/selective_cfi/scfi_instr.cpp index b9f2dc20e082bdd2d7981f41da28f37b46ef08ab..1d968ae4c74b251a4b4b8cf1054964d342fe884e 100644 --- a/tools/selective_cfi/scfi_instr.cpp +++ b/tools/selective_cfi/scfi_instr.cpp @@ -327,9 +327,15 @@ bool SCFI_Instrument::mark_targets() } } } + cout<<"# ATTRIBUTE Selective_Control_Flow_Integrity::ind_targets_found="<<std::dec<<ind_targets<<endl; cout<<"# ATTRIBUTE Selective_Control_Flow_Integrity::targets_found="<<std::dec<<targets<<endl; cout<<"# ATTRIBUTE Selective_Control_Flow_Integrity::exe_nonce_targets_found="<<std::dec<<exe_nonce_targets<<endl; + + assert(getenv("SELF_VALIDATE")==nullptr || ind_targets > 5 ); + assert(getenv("SELF_VALIDATE")==nullptr || targets > 5 ); + assert(getenv("SELF_VALIDATE")==nullptr || !do_exe_nonce_for_call || exe_nonce_targets > 5 ); + return true; } @@ -1135,6 +1141,8 @@ bool SCFI_Instrument::instrument_jumps() cout<<"# ATTRIBUTE Selective_Control_Flow_Integrity::cfi_ret_complete="<<std::dec<<cfi_branch_ret_complete<<endl; display_histogram(cout, "cfi_ret_complete_histogram", rets); + assert(getenv("SELF_VALIDATE")==nullptr || cfi_branch_call_checks> 2); + assert(getenv("SELF_VALIDATE")==nullptr || cfi_branch_call_checks> 2); // 0 or 1 checks. cout<<"# ATTRIBUTE Selective_Control_Flow_Integrity::multimodule_checks="<< (unsigned int)(zestcfi_function_entry!=NULL) <<endl; diff --git a/tools/thanos/thanos.cpp b/tools/thanos/thanos.cpp index c48c0679aa41b4e3ad167d96c132ab316fb337dc..a5523b239d9bc8acb235532b6747c45ac9e095d3 100644 --- a/tools/thanos/thanos.cpp +++ b/tools/thanos/thanos.cpp @@ -9,6 +9,8 @@ #include <fcntl.h> #include <fstream> #include <ctime> +#include <ext/stdio_filebuf.h> + using namespace std; @@ -18,11 +20,14 @@ using namespace Transform_SDK; #define ALLOF(a) begin(a),end(a) // global to be used like cout/cerr for writing to the logs -ofstream thanos_log; +int thanos_log_fd=-1; +ostream *thanos_log; ostream *real_cout; ostream *real_cerr; string thanos_path; bool redirect_opt=true; +int new_stdout_fd=1; +int new_stderr_fd=2; class ThanosPlugin_t { @@ -51,6 +56,7 @@ class ThanosPlugin_t step_args(p_step_args) { } + void tidyIR(); int executeStep(TransformStep_t& the_step, const bool are_debugging); int commitAll(); @@ -69,18 +75,33 @@ PluginList_t getPlugins(const int argc, char const *const argv[]); int main(int argc, char* argv[]) { thanos_path=argv[0]; - ostream my_real_cerr(cerr.rdbuf()); - ostream my_real_cout(cout.rdbuf()); - real_cerr=&my_real_cerr; - real_cout=&my_real_cout; - thanos_log.open("logs/thanos.log", ofstream::out); + new_stdout_fd=dup(STDOUT_FILENO); + new_stderr_fd=dup(STDERR_FILENO); - if(!thanos_log) + __gnu_cxx::stdio_filebuf<char> stdout_filebuf(new_stdout_fd, ios::out); + __gnu_cxx::stdio_filebuf<char> stderr_filebuf(new_stderr_fd, ios::out); + + ostream my_real_cout(&stderr_filebuf); + ostream my_real_cerr(&stdout_filebuf); + real_cout=&my_real_cout; + real_cerr=&my_real_cerr; + + auto thanos_log_fileptr=fopen("logs/thanos.log", "a+"); + if(!thanos_log_fileptr) { - cerr<<"Cannot open logs/thanos.log"<<endl; + *real_cerr<<"Cannot open logs/thanos.log"<<endl; exit(1); } + thanos_log_fd=fileno(thanos_log_fileptr); + __gnu_cxx::stdio_filebuf<char> thanos_log_filebuf(thanos_log_fd, ios::out); + ostream thanos_log_stream(&thanos_log_filebuf); + thanos_log=&thanos_log_stream; + + // make sure stuff goes to the log unless otherwise indicated by using real_cout + dup2(thanos_log_fd, STDOUT_FILENO); + dup2(thanos_log_fd, STDERR_FILENO); + // get plugins auto argv_iter=1; while (true) @@ -105,8 +126,8 @@ int main(int argc, char* argv[]) // for now, usage is pretty strict to enable simple // parsing, because this program is only used by an // automated script - thanos_log << "Syntax error in arguments." << endl; - thanos_log << "USAGE: <thanos opts> (\"<step name> [-optional] [--step-args [ARGS]]\")+" << endl; + *thanos_log << "Syntax error in arguments." << endl; + *thanos_log << "USAGE: <thanos opts> (\"<step name> [-optional] [--step-args [ARGS]]\")+" << endl; return 1; } @@ -118,8 +139,8 @@ int main(int argc, char* argv[]) // if that returns failure AND the step is not optional if(result != 0 && !plugin->isOptional()) { - thanos_log << "A critical step failed: " << plugin->getStepName() << endl; - thanos_log << "If DEBUG_STEPS is not on, this failure could " + *thanos_log << "A critical step failed: " << plugin->getStepName() << endl; + *thanos_log << "If DEBUG_STEPS is not on, this failure could " << "be due to an earlier critical step." << endl; return 1; // critical step failed, abort } @@ -128,9 +149,9 @@ int main(int argc, char* argv[]) const int result = ThanosPlugin_t::saveChanges(); if(result != 0) { - thanos_log << "A critical step failed: " << (thanos_plugins.back())->getStepName() + *thanos_log << "A critical step failed: " << (thanos_plugins.back())->getStepName() << endl; - thanos_log << "If DEBUG_STEPS is not on, this failure could " + *thanos_log << "If DEBUG_STEPS is not on, this failure could " << "be due to an earlier critical step." << endl; return 1; // critical step failed, abort } @@ -206,7 +227,7 @@ int ThanosPlugin_t::runPlugin() static const char *const base_path = getenv("SECURITY_TRANSFORMS_HOME"); if(base_path == NULL) { - thanos_log << "Environment variables not set." << endl; + *thanos_log << "Environment variables not set." << endl; return -1; } static const auto plugin_path (string(base_path).append("/plugins_install/")); @@ -215,7 +236,7 @@ int ThanosPlugin_t::runPlugin() if(dlhdl == NULL) { const auto err=dlerror(); - thanos_log<<"Cannot open "<<step_name<<": "<<err<<endl; + *thanos_log<<"Cannot open "<<step_name<<": "<<err<<endl; return -1; } @@ -223,7 +244,7 @@ int ThanosPlugin_t::runPlugin() if(sym == NULL) { const auto err=dlerror(); - thanos_log<<"Cannot find GetTransformStep in "<<step_name<<": "<<err<<endl; + *thanos_log<<"Cannot find GetTransformStep in "<<step_name<<": "<<err<<endl; return -1; } @@ -234,17 +255,14 @@ int ThanosPlugin_t::runPlugin() static const char *const are_debugging = getenv("DEBUG_STEPS"); - - auto saved_cerrbuf = cerr.rdbuf(); - auto saved_coutbuf = cout.rdbuf(); - ofstream logfile; + auto logfile=(FILE*)nullptr; auto are_logging = !((bool) are_debugging); if(are_logging) { // setup logging auto logfile_path = "./logs/"+step_name+".log"; - logfile.open(logfile_path,ofstream::out); + logfile=fopen(logfile_path.c_str(), "a+"); if(!logfile) { *real_cout<<"Cannot open log file "<<logfile_path<<endl; @@ -252,8 +270,13 @@ int ThanosPlugin_t::runPlugin() } if(redirect_opt) { - cout.rdbuf(logfile.rdbuf()); - cerr.rdbuf(logfile.rdbuf()); + dup2(fileno(logfile), STDOUT_FILENO); + dup2(fileno(logfile), STDERR_FILENO); + } + else + { + dup2(new_stdout_fd, STDOUT_FILENO); + dup2(new_stderr_fd, STDERR_FILENO); } } @@ -273,13 +296,15 @@ int ThanosPlugin_t::runPlugin() cout<< "#ATTRIBUTE end_time=" << end_time_str ; // endl in time_str cout<< "#ATTRIBUTE elapsed_time=" << elapsed_time<<endl; cout<< "#ATTRIBUTE step_name=" << step_name<<endl; - cout<< "#ATTRIBUTE step_command= " << thanos_path << " " << step_name - << " --step-args "; copy(ALLOF(step_args), ostream_iterator<string>(cout, " ")); cout<<endl; + cout<< "#ATTRIBUTE step_command= " << thanos_path << " \"" << step_name + << " --step-args "; copy(ALLOF(step_args), ostream_iterator<string>(cout, " ")); cout<<"\""<<endl; cout<< "#ATTRIBUTE step_exitcode="<<dec<<step_result<<endl; - cerr.rdbuf(saved_cerrbuf); - cout.rdbuf(saved_coutbuf); + dup2(thanos_log_fd, STDOUT_FILENO); + dup2(thanos_log_fd, STDERR_FILENO); + if(logfile) + fclose(logfile); the_step.reset(); // explicitly get rid of the handle to the library so we can close it. dlclose(dlhdl); @@ -289,6 +314,12 @@ int ThanosPlugin_t::runPlugin() } +void ThanosPlugin_t::tidyIR() +{ + optind=1; + shared_objects->tidyIR(); +} + int ThanosPlugin_t::executeStep(TransformStep_t& the_step, const bool are_debugging) { @@ -296,13 +327,15 @@ int ThanosPlugin_t::executeStep(TransformStep_t& the_step, const bool are_debugg flush(*real_cout); + tidyIR(); + const int parse_retval = the_step.parseArgs(step_args); if(parse_retval != 0) { *real_cout<<"Done. Command failed! ***************************************"<<endl; if(!step_optional) { - *real_cout<<"ERROR: The "<<the_step.getStepName()<<" step is necessary, but failed. Exiting early."<<endl; + *real_cout<<"ERROR: The "<<the_step.getStepName()<<" step is necessary, but options parsing failed. Exiting early."<<endl; } return parse_retval; } @@ -310,7 +343,7 @@ int ThanosPlugin_t::executeStep(TransformStep_t& the_step, const bool are_debugg pqxxDB_t* pqxx_interface = shared_objects->getDBInterface(); if(step_optional) { - const int error = shared_objects->writeBackAll(); + const int error = shared_objects->writeBackAll(thanos_log); if(error) { return 1; // the failure must be from a critical step, abort @@ -350,7 +383,7 @@ int ThanosPlugin_t::executeStep(TransformStep_t& the_step, const bool are_debugg if(step_optional) { // write changes to DB to see if it succeeds - const int error = shared_objects->writeBackAll(); + const int error = shared_objects->writeBackAll(thanos_log); if(error) { // abort changes by resetting DB interface @@ -366,7 +399,7 @@ int ThanosPlugin_t::executeStep(TransformStep_t& the_step, const bool are_debugg else if(are_debugging) { // write changes to DB in case next step fails - const int error = shared_objects->writeBackAll(); + const int error = shared_objects->writeBackAll(thanos_log); if(error) { return 1; // critical step failed, abort @@ -386,7 +419,7 @@ int ThanosPlugin_t::executeStep(TransformStep_t& the_step, const bool are_debugg int ThanosPlugin_t::saveChanges() { pqxxDB_t* pqxx_interface = shared_objects->getDBInterface(); - const int error = shared_objects->writeBackAll(); + const int error = shared_objects->writeBackAll(thanos_log); if(error) { return 1; // critical step failed, abort