diff --git a/SMPStaticAnalyzer b/SMPStaticAnalyzer index ccccd625a6e515e377728c5dcd4891d9224ddd22..152274cfbdc3fc428ea3583fa6311507e3aa7c9f 160000 --- a/SMPStaticAnalyzer +++ b/SMPStaticAnalyzer @@ -1 +1 @@ -Subproject commit ccccd625a6e515e377728c5dcd4891d9224ddd22 +Subproject commit 152274cfbdc3fc428ea3583fa6311507e3aa7c9f diff --git a/irdb-libs/ir_builders/back_search.hpp b/irdb-libs/ir_builders/back_search.hpp index 3ffd09d9c925416fd3cfd5bf7628c4b173564c0b..a3dd6bba72daad1e2d0e04cc487545cfc1fea8d5 100644 --- a/irdb-libs/ir_builders/back_search.hpp +++ b/irdb-libs/ir_builders/back_search.hpp @@ -59,17 +59,19 @@ void calc_preds(FileIR_t* firp) } -// search for an expression in prior instructions. -bool backup_until(const string &insn_type_regex_str, // what to search for +// search for an expression in prior instructions. Return whether expression was found. +// If an assignment is found, stopped_because_set is set to this instruction and false is returned +bool backup_until_or_move(const string &insn_type_regex_str, // what to search for Instruction_t *& prev, // output param -- the instruction we found. Instruction_t* orig, // where to start the search. + Instruction_t *& stopped_because_set, // output param -- the instruction that stopped the search because of stop_if_set. nullptr if the search was not stopped because of this reason const string & stop_if_set="", // stop if an operand that's written matches this expression. const string & stop_if_opcode="", // stop if an opcode matches this expression bool recursive=false, // search recursively? uint32_t max_insns=10000u, // max number of instructions to search through. uint32_t max_recursions=5u) // make number of blocks to recusive into { - + stopped_because_set = static_cast<Instruction_t *>(nullptr); const auto find_or_build_regex=[&] (const string& s) -> regex_t& { // declare a freer for regexs so they go away when the program ends. @@ -130,8 +132,10 @@ bool backup_until(const string &insn_type_regex_str, // what to search for { for(const auto &operand : disasm->getOperands()) { - if(operand->isWritten() && regexec(&stop_operand_expression, operand->getString().c_str(), 0, nullptr, 0) == 0) - return false; + if(operand->isWritten() && regexec(&stop_operand_expression, operand->getString().c_str(), 0, nullptr, 0) == 0) { + stopped_because_set = prev; + return false; + } } } // if we have a stop_if_opcode expresison, check the opcode to see if it matches. @@ -156,14 +160,16 @@ bool backup_until(const string &insn_type_regex_str, // what to search for { for(const auto &operand : disasm->getOperands()) { - if(operand->isWritten() && regexec(&stop_operand_expression, operand->getString().c_str(), 0, nullptr, 0) == 0) - return false; + if(operand->isWritten() && regexec(&stop_operand_expression, operand->getString().c_str(), 0, nullptr, 0) == 0) { + stopped_because_set = pred; + return false; + } } } // if we have a stop_if_opcode expresison, check the opcode to see if it matches. if(stop_if_opcode!="" && regexec(&stop_opcode_expression, disasm->getMnemonic().c_str(), 0, nullptr, 0) == 0) return false; - if(backup_until(insn_type_regex_str, prev, pred, stop_if_set, stop_if_opcode, recursive, max_insns, max_recursions/mypreds.size())) + if(backup_until_or_move(insn_type_regex_str, prev, pred, stopped_because_set, stop_if_set, stop_if_opcode, recursive, max_insns, max_recursions/mypreds.size())) return true; // reset for next call @@ -173,5 +179,28 @@ bool backup_until(const string &insn_type_regex_str, // what to search for return false; } +// search for an expression in prior instructions. +bool backup_until(const string &insn_type_regex_str, // what to search for + Instruction_t *& prev, // output param -- the instruction we found. + Instruction_t* orig, // where to start the search. + const string & stop_if_set="", // stop if an operand that's written matches this expression. + const string & stop_if_opcode="", // stop if an opcode matches this expression + bool recursive=false, // search recursively? + uint32_t max_insns=10000u, // max number of instructions to search through. + uint32_t max_recursions=5u) // make number of blocks to recusive into +{ + // This function is just a proxy to keep the old API intact + auto discard = static_cast<Instruction_t *>(nullptr); + + return backup_until_or_move(insn_type_regex_str, + prev, + orig, + discard, + stop_if_set, + stop_if_opcode, + recursive, + max_insns, + max_recursions); +} #endif diff --git a/irdb-libs/ir_builders/fill_in_indtargs.cpp b/irdb-libs/ir_builders/fill_in_indtargs.cpp index d653b4025d2bc01558e4f3b2d27ff8674694ad83..a80f436d80f90f1bec56d260700af096cd7facd5 100644 --- a/irdb-libs/ir_builders/fill_in_indtargs.cpp +++ b/irdb-libs/ir_builders/fill_in_indtargs.cpp @@ -34,6 +34,7 @@ #include <sys/mman.h> #include <sys/types.h> #include <regex.h> +#include <regex> #include <ctype.h> #include <list> #include <stdio.h> @@ -117,7 +118,7 @@ static inline string regNoToX8664Reg(int regno) /* * Return the x86-32/64, 32-bit register corresponding to the regno passed in */ -static inline string regNoToX8632Reg(int regno) +static inline string regNoToX8632Reg(int regno) { switch(regno) { @@ -129,20 +130,72 @@ static inline string regNoToX8632Reg(int regno) case 5/*REG5*/: return "ebp"; case 6/*REG6*/: return "esi"; case 7/*REG7*/: return "edi"; - case 8/*REG8*/: return "r8w"; - case 9/*REG9*/: return "r9w"; - case 10/*REG10*/: return "r10w"; - case 11/*REG11*/: return "r11w"; - case 12/*REG12*/: return "r12w"; - case 13/*REG13*/: return "r13w"; - case 14/*REG14*/: return "r14w"; - case 15/*REG15*/: return "r15w"; + case 8/*REG8*/: return "r8d"; + case 9/*REG9*/: return "r9d"; + case 10/*REG10*/: return "r10d"; + case 11/*REG11*/: return "r11d"; + case 12/*REG12*/: return "r12d"; + case 13/*REG13*/: return "r13d"; + case 14/*REG14*/: return "r14d"; + case 15/*REG15*/: return "r15d"; default: // no base register; return ""; } } +static inline string regNoToX8616Reg(int regno) +{ + switch(regno) + { + case 0/*REG0*/: return "ax"; + case 1/*REG1*/: return "cx"; + case 2/*REG2*/: return "dx"; + case 3/*REG3*/: return "bx"; + case 4/*REG4*/: return "sp"; + case 5/*REG5*/: return "bp"; + case 6/*REG6*/: return "si"; + case 7/*REG7*/: return "di"; + case 8/*REG8*/: return "r8w"; + case 9/*REG9*/: return "r9w"; + case 10/*REG10*/: return "r10w"; + case 11/*REG11*/: return "r11w"; + case 12/*REG12*/: return "r12w"; + case 13/*REG13*/: return "r13w"; + case 14/*REG14*/: return "r14w"; + case 15/*REG15*/: return "r15w"; + default: + // no base register; + return ""; + } +} + +static inline string regNoToX868Reg(int regno) +{ + switch(regno) + { + case 0/*REG0*/: return "al"; + case 1/*REG1*/: return "cl"; + case 2/*REG2*/: return "dl"; + case 3/*REG3*/: return "bl"; + case 4/*REG4*/: return "spl"; + case 5/*REG5*/: return "bpl"; + case 6/*REG6*/: return "sil"; + case 7/*REG7*/: return "dil"; + case 8/*REG8*/: return "r8b"; + case 9/*REG9*/: return "r9b"; + case 10/*REG10*/: return "r10b"; + case 11/*REG11*/: return "r11b"; + case 12/*REG12*/: return "r12b"; + case 13/*REG13*/: return "r13b"; + case 14/*REG14*/: return "r14b"; + case 15/*REG15*/: return "r15b"; + default: + // no base register; + return ""; + } +} + extern void read_ehframe(FileIR_t* firp, EXEIO::exeio* ); template<typename T> @@ -2350,7 +2403,7 @@ V2: // we found a case in a rust program (xsv) where the switch table index // was loop invariant and hoisted outside of the loop. Thus, breaking I8 away from I6-I7. // So, we search a bit harder for I7 here. - if (!backup_until(table_index_str.c_str(), I7, I8, table_index_stop_if, "", true)) + if (!backup_until(table_index_str, I7, I8, table_index_stop_if, "", true)) return; const auto d7 = DecodedInstruction_t::factory(I7); @@ -2430,6 +2483,7 @@ V2: auto cmp_str = string(" do not match anything "); // to be updated inside if statement below auto bound_stopif = string(" do not match anything "); // to be updated inside if statement below auto and_str = string(" do not match anything "); // to be updated inside if statement below + int32_t index_reg_no = -1; // to be updated inside if statement below if (d6_op1_is_mem) { @@ -2442,17 +2496,20 @@ V2: */ if (!d6->getOperand(1)->hasBaseRegister()) return; - const auto base_reg = regNoToX8664Reg(d6->getOperand(1)->getBaseRegister()); + const auto base_reg = regNoToX8664Reg((int32_t) d6->getOperand(1)->getBaseRegister()); if (!d6->getOperand(1)->hasIndexRegister()) return; - const auto indexRegno = d6->getOperand(1)->getIndexRegister(); - const auto index_reg_64bit = regNoToX8664Reg(indexRegno); - const auto index_reg_32bit = regNoToX8632Reg(indexRegno); + index_reg_no = (int32_t) d6->getOperand(1)->getIndexRegister(); + const auto index_reg_64bit = regNoToX8664Reg(index_reg_no); + const auto index_reg_32bit = regNoToX8632Reg(index_reg_no); + const auto index_reg_16bit = regNoToX8616Reg(index_reg_no); + const auto index_reg_8bit = regNoToX868Reg(index_reg_no); + - cmp_str = "cmp " + index_reg_32bit + "|cmp " + index_reg_64bit; - bound_stopif = "^" + index_reg_32bit + "$|^" + index_reg_64bit + "$"; - and_str = "and " + index_reg_32bit + "|and " + index_reg_64bit; + cmp_str = "cmp (" + index_reg_8bit + "|" + index_reg_16bit + "|" + index_reg_32bit + "|" + index_reg_64bit + ")"; + bound_stopif = "^(" + index_reg_8bit + "|" + index_reg_16bit + "|" + index_reg_32bit + "|" + index_reg_64bit + ")$"; + and_str = "and (" + index_reg_8bit + "|" + index_reg_16bit + "|" + index_reg_32bit + "|" + index_reg_64bit + ")"; lea_string1 += base_reg; if (d6->getOperand(1)->getScaleValue() == 1) lea_string2 = "lea " + index_reg_64bit; @@ -2471,13 +2528,13 @@ V2: auto found_leas = InstructionSet_t(); - if (backup_until(lea_string1.c_str(), I5, I6, "", "", true)) + if (backup_until(lea_string1, I5, I6, "", "", true)) found_leas.insert(I5); - if (backup_until(lea_string2.c_str(), I5, I6, "", "", true)) + if (backup_until(lea_string2, I5, I6, "", "", true)) found_leas.insert(I5); // if we didn't find anything yet, .... - if (found_leas.size() == 0 && I6->getFunction()) + if (found_leas.empty() && I6->getFunction()) { cout << "Using find_in_function method." << endl; const auto tmp_found_leas = find_in_function("lea ", I6->getFunction()); @@ -2509,7 +2566,8 @@ V2: cmp_str, bound_stopif, and_str, - allow_unpins); + allow_unpins, + index_reg_no); } if (allow_unpins) jmptables[I8].AddSwitchType(prov); @@ -2608,11 +2666,17 @@ V2: Instruction_t *table_load_instruction, int32_t table_entry_multiplier, const ibt_provenance_t &switch_prov, - const string &cmp_str, - const string &bound_stopif, - const string &and_str, - const bool allow_unpins) + const string &p_cmp_str, + const string &p_bound_stopif, + const string &p_and_str, + const bool allow_unpins, + const int32_t index_reg_no) { + // Copy the strings for potential replace later on + string cmp_str = p_cmp_str; + string bound_stopif = p_bound_stopif; + string and_str = p_and_str; + auto table_load_disasm = DecodedInstruction_t::factory(lea_for_table_base); if (!(table_load_disasm->getOperand(1)->isMemory())) @@ -2666,20 +2730,146 @@ V2: // Thus, this default is sane for most cases, and is only applied when we absolutely // cannot find a bounds check on the table size. // - auto table_size = 255U; + auto table_size = 512U; + if(getenv("MAX_JUMP_TABLE_CLAMP_SIZE")){ + auto temp_table_size = strtol(getenv("MAX_JUMP_TABLE_CLAMP_SIZE"), nullptr, 10); + if(temp_table_size > 0){ + table_size = temp_table_size; + } + } auto found_table_size = false; auto I1 = static_cast<Instruction_t *>(nullptr); - if (backup_until(cmp_str.c_str(), I1, table_load_instruction, bound_stopif,"^jne$|^je$|^jeq$")) + + auto intermediate_write_instr = static_cast<Instruction_t *>(nullptr); + bool found_cmp = false; + bool found_and = false; + auto search_start_instr = table_load_instruction; + + // Check whether the index register has been renamed between bounds-check and use + // Only do this once + for( auto i = 0; i < 2 ; i++) { + found_cmp = backup_until_or_move(cmp_str, I1, search_start_instr, intermediate_write_instr, + bound_stopif, "^jne$|^je$|^jeq$"); + if (!found_cmp && intermediate_write_instr != nullptr) { + auto temp = intermediate_write_instr->getBaseID(); // Remember BaseId for sanity check + assert(temp != IRDB_SDK::BaseObj_t::NOT_IN_DATABASE); + found_and = backup_until_or_move(and_str, I1, search_start_instr, intermediate_write_instr, + bound_stopif); + if (!found_and && intermediate_write_instr != nullptr) { + // We should find the same possible write location for both checks + assert(temp == intermediate_write_instr->getBaseID()); + auto decoded_instr = DecodedInstruction_t::factory(intermediate_write_instr); + if( + // Is a reg to reg move + decoded_instr->getMnemonic().rfind("mov", 0) != string::npos && + decoded_instr->getOperand(0)->isRegister() && decoded_instr->getOperand(1)->isRegister() + // The source register is a equal to the target (modulo size differences) + && decoded_instr->getOperand(0)->getRegNumber() == decoded_instr->getOperand(1)->getRegNumber() + ){ + // If we have a move from self to self, restart searching from here, this is a case where there + // are multiple redirects like so: + // 0003bc30 cmp r12b, 0x21 + // ... + // 0003bc3a movzx ecx, r12w <-- this is the actual move from temporary + // 0003bc3e movzx ecx, cl <-- we detect this as the move from temporary + // ... + // 0003bc48 movsxd rcx, dword [rdx+rcx*4] + + if (getenv("IB_VERBOSE") || getenv("DEBUG")) { + cout << "DEBUG: checking for register rename for index reg of jumptable @ 0x" + << hex << table_load_instruction->getAddress()->getVirtualOffset() << endl; + } + + search_start_instr = intermediate_write_instr; + continue; + } else { + // We have either found what we were looking for or hit a dead end + // We wanted to find a code pattern like this, which copies the index from a temporary register + // into the index reg: + // + // cmp rax, 0x1234 + // ... + // mov rbx, rax + // mov ..., [ ... rbx*4 ...] + + search_start_instr = intermediate_write_instr; + break; + } + } + } + if (found_and || found_cmp) + break; + } + + if(!found_cmp && !found_and && intermediate_write_instr != nullptr + && intermediate_write_instr->getDisassembly().rfind("mov", 0) != string::npos) { + auto decoded_instr = DecodedInstruction_t::factory(intermediate_write_instr); + if(decoded_instr->getOperand(1)->isRegister()){ + // We found a direct move from a temporary register into the index register. + // Do the same search again starting from this move and check for a comparison with the temporary register + const auto index_reg_64bit = regNoToX8664Reg(index_reg_no); + const auto index_reg_32bit = regNoToX8632Reg(index_reg_no); + const auto temp_index_reg_64bit = + regNoToX8664Reg((int32_t) decoded_instr->getOperand(1)->getRegNumber()); + const auto temp_index_reg_32bit = + regNoToX8632Reg((int32_t) decoded_instr->getOperand(1)->getRegNumber()); + const auto temp_index_reg_16bit = + regNoToX8616Reg((int32_t) decoded_instr->getOperand(1)->getRegNumber()); + const auto temp_index_reg_8bit = + regNoToX868Reg((int32_t) decoded_instr->getOperand(1)->getRegNumber()); + + if(index_reg_32bit.empty() || index_reg_64bit.empty()){ + cout << "WARNING: Could not detect index register for table load at " << + hex << table_load_instruction->getAddress()->getVirtualOffset() << endl; + } else if (temp_index_reg_32bit.empty() || temp_index_reg_64bit.empty()) { + cout << "WARNING: Could not detect new temporary index register for move into index reg at " << + hex << intermediate_write_instr->getAddress()->getVirtualOffset() << endl; + } else { + auto to_replace = "\\([^()]*" + index_reg_32bit + "\\|" + index_reg_64bit + "\\)"; + auto to_replace_regex = std::regex(to_replace); + auto replace_with = "(" + temp_index_reg_8bit + "|" + temp_index_reg_16bit + "|" + temp_index_reg_32bit + "|" + temp_index_reg_64bit + ")"; + if (getenv("IB_VERBOSE") || getenv("DEBUG")) { + cout << "DEBUG: replacing index reg " << to_replace << " with " << replace_with << " because of instruction '" + << intermediate_write_instr->getDisassembly() << "' @ 0x" + << hex << intermediate_write_instr->getAddress()->getVirtualOffset() << endl; + } + + + cmp_str = std::regex_replace(cmp_str, to_replace_regex, replace_with); + bound_stopif = std::regex_replace(bound_stopif, to_replace_regex, replace_with); + and_str = std::regex_replace(and_str, to_replace_regex, replace_with); + + if (getenv("IB_VERBOSE") || getenv("DEBUG")) { + cout << "After replacement:" << endl << cmp_str << endl << bound_stopif << endl << and_str + << endl; + } + } + } else { + if (getenv("IB_VERBOSE") || getenv("DEBUG")) { + cout << "DEBUG: source operand for move to index reg is not a register '" + << intermediate_write_instr->getDisassembly() << "' @ 0x" + << hex << intermediate_write_instr->getAddress()->getVirtualOffset() << endl; + } + // We have hit a dead end for now, restore initial state + // This is probably a move from memory, where the index was compared in memory + // or before moving it to memory (an example is shown in the known issues list below) + // TODO: Think about handling this case + search_start_instr = table_load_instruction; + } + } + + if (backup_until(cmp_str, I1, search_start_instr, bound_stopif,"^jne$|^je$|^jeq$")) { const auto d1 = DecodedInstruction_t::factory(I1); table_size = d1->getImmediate(); // notes on table size: // readelf on ubuntu20 has a table size of 4. - cout << "pic64: found cmp-type I1 ('" << d1->getDisassembly() << "'), with table_size=" << table_size << "\n"; + cout << "pic64: found cmp-type I1 ('" << d1->getDisassembly() << "'), with table_size=" + << table_size << " at " << hex << I1->getAddress()->getVirtualOffset() << "\n"; found_table_size = true; } - else if (backup_until(and_str.c_str(), I1, table_load_instruction, bound_stopif)) + else if (backup_until(and_str, I1, search_start_instr, bound_stopif)) { const auto d1 = DecodedInstruction_t::factory(I1); const auto d1SecondOp = d1->getOperand(1); @@ -2698,20 +2888,31 @@ V2: } else { - // it's very common for the bound_stopif backup to stop before finding the compare - // because of a code pattern like this: - // - // cmp rax, 0x1234 - // ... - // mov rbx, rax - // mov ..., [ ... rbx*4 ...] - // - // As you can see, the mov rbx,rax would cause backup_until to stop there, - // missing the compare. - // For now, we tolerate this and let the no-table-size code find the table size - // but it might be useful to look for mov/movzx that did a register rename before - // the compare. - cout << "pic64: could not find size of switch table" << endl; + /* FIXME: Known Issues + We currently miss cases with default values enforced by byte loads and shifts: + 0004326d movzx eax, byte [r14] <-- eax <= 255 + 00043271 mov rdx, rax + 00043274 shr rdx, 0x6 <-- rdx <= (255 >> 6) = 3 + 00043278 lea rsi, [rel jump_table_ffae4] + 0004327f movsxd rdx, dword [rsi+rdx*4] + 00043283 add rdx, rsi + 00043286 jmp rdx + -- + We also miss cases where index and base reg are switched in one of the operations and the index is + compared in memory + cmp DWORD PTR [rbp-0x474],0x1d <-- check for default case + ja 0x55be3 + mov eax,DWORD PTR [rbp-0x474] <-- load index into table (without respecting the entry size) + lea rdx, [4*rax] <-- calculate index in table while respecting the entry size + lea rax,[rip+0xa3286] <-- load base + mov eax,DWORD PTR [rdx+rax] <-- load jump offset from table + cdqe + lea rdx,[rip+0xa327a] <-- load base + add rax,rdx + jmp rax + */ + cout << "pic64: could not find size of switch table for table load at " << + hex << table_load_instruction->getAddress()->getVirtualOffset() << endl; } const auto table_start_addr_it = direct_addresses.find(table_start_address); @@ -2749,7 +2950,7 @@ V2: table_entry_size == 1 ? VirtualOffset_t(*reinterpret_cast<const int8_t *>(table_entry_ptr)) : table_entry_size == 2 ? VirtualOffset_t(*reinterpret_cast<const int16_t *>(table_entry_ptr)) : table_entry_size == 4 ? VirtualOffset_t(*reinterpret_cast<const int32_t *>(table_entry_ptr)) : table_entry_size == 8 ? VirtualOffset_t(*reinterpret_cast<const int64_t *>(table_entry_ptr)) - : throw new invalid_argument("Cannot detect displacement size to load value "); + : throw invalid_argument("Cannot detect displacement size to load value "); const auto table_entry = raw_table_entry * table_entry_multiplier; if (!possible_target(D1 + table_entry, 0 /* from addr unknown */, switch_prov)) @@ -2845,6 +3046,17 @@ V2: * I8: jmp rdx * */ + /* TODO: check whether this is also impacted by the compiler behavior to rename the index before using it + * cmp eax,0x23 + * mov eax, eax <============== THIS + * ja 0x1400066d3 + * lea_for_table_base: lea rcx,[rip+0xffffffffffff9a7d] # 0x140000000 + * I6_2 movzx eax,BYTE PTR [rcx+rax*1+0x6a50] + * mov ebx, eax <============== OR THIS + * I6: mov edx,DWORD PTR [rcx+rbx*4+0x6a34] + * I7: add rdx,rcx + * I8: jmp rdx + */ // sanity check that I understand the variables of this function properly. // and grab the index reg diff --git a/irdb-libs/libIRDB-cfg/src/BasicBlock.cpp b/irdb-libs/libIRDB-cfg/src/BasicBlock.cpp index e89e6664e805291a2ffc4674957353cdae319fb9..e7cdd2024e0038a43bb2b53ae691e6013f9701c9 100644 --- a/irdb-libs/libIRDB-cfg/src/BasicBlock.cpp +++ b/irdb-libs/libIRDB-cfg/src/BasicBlock.cpp @@ -246,7 +246,7 @@ std::ostream& IRDB_SDK::operator<<(std::ostream& os, const IRDB_SDK::BasicBlock_ } void BasicBlock_t::dump(std::ostream& os) const { - os<<getIsExitBlock(); + os<< "Is ExitBlock:" << getIsExitBlock() << endl; os<<"\t ---- Starting block print -----" <<endl; for(auto i=0U;i<getInstructions().size();i++) { diff --git a/irdb-libs/libIRDB-core/src/assemblestr.hpp b/irdb-libs/libIRDB-core/src/assemblestr.hpp index b077bf6fac7b702ac8d5292105bd32565af67ed7..d23bab993e807548478501e2d4d281165f6deaad 100644 --- a/irdb-libs/libIRDB-core/src/assemblestr.hpp +++ b/irdb-libs/libIRDB-core/src/assemblestr.hpp @@ -6,9 +6,13 @@ static void assemblestr(ks_engine * &ks, IRDB_SDK::Instruction_t *ins, const char * instruct, char * &encode, size_t &size, size_t &count) { if(ks_asm(ks, instruct, 0, (unsigned char **)&encode, &size, &count) != KS_ERR_OK) { //string or cstr - ks_free((unsigned char*)encode); + auto error = ks_errno(ks); + ks_free((unsigned char*)encode); ks_close(ks); - throw std::runtime_error("ERROR: ks_asm() failed during instrunction assembly."); + auto msg = + string("ERROR: ks_asm() failed during instruction assembly. (instruction='") + + instruct + "', count=" + to_string(count) + ", error='" + ks_strerror(error) + "')"; + throw std::runtime_error(msg); } else { ins->setDataBits(string(encode, size)); diff --git a/irdb-libs/libIRDB-core/src/decode_csx86.cpp b/irdb-libs/libIRDB-core/src/decode_csx86.cpp index 34fe6a4333ff8c56fa50ecfb5e7c9c270228e920..f2d714435f5b4edd945cdd3be460af736e251929 100644 --- a/irdb-libs/libIRDB-core/src/decode_csx86.cpp +++ b/irdb-libs/libIRDB-core/src/decode_csx86.cpp @@ -304,8 +304,6 @@ string DecodedInstructionCapstoneX86_t::getDisassembly() const myReplace(full_str," dword ", " "); } - myReplace(full_str, "rip", to_string(the_insn->size)); - return full_str; } diff --git a/irdb-libs/libIRDB-core/src/operand_csx86.cpp b/irdb-libs/libIRDB-core/src/operand_csx86.cpp index f7c4216079ecdb3d17ca2906cff2da76e31fe4da..aa6135c2de87a44fd61f1534f6b9bdd0774cfa2e 100644 --- a/irdb-libs/libIRDB-core/src/operand_csx86.cpp +++ b/irdb-libs/libIRDB-core/src/operand_csx86.cpp @@ -178,7 +178,7 @@ static uint32_t to_reg_number(const x86_reg ®) return 15; default: break; } - assert(0); + throw std::invalid_argument("Not a register with an associated register number"); } // methods @@ -449,21 +449,29 @@ bool DecodedOperandCapstoneX86_t::isMemory() const bool DecodedOperandCapstoneX86_t::hasBaseRegister() const { - const auto the_insn=static_cast<cs_insn*>(my_insn.get()); - const auto &op = (the_insn->detail->x86.operands[op_num]); - const auto br_is_pc = - op.mem.base==X86_REG_RIP || - op.mem.base==X86_REG_EIP || - op.mem.base==X86_REG_IP; - const auto br_is_valid = op.mem.base!=X86_REG_INVALID; - return isMemory() && br_is_valid && !br_is_pc; + const auto the_insn=static_cast<cs_insn*>(my_insn.get()); + const auto &op = (the_insn->detail->x86.operands[op_num]); + const auto br_is_pc = + op.mem.base==X86_REG_RIP || + op.mem.base==X86_REG_EIP || + op.mem.base==X86_REG_IP; + const auto br_is_valid = + op.mem.base != X86_REG_INVALID && + op.mem.base != X86_REG_EIZ && + op.mem.base != X86_REG_RIZ; + return isMemory() && br_is_valid && !br_is_pc; } bool DecodedOperandCapstoneX86_t::hasIndexRegister() const { - const auto the_insn=static_cast<cs_insn*>(my_insn.get()); - const auto &op = (the_insn->detail->x86.operands[op_num]); - return isMemory() && op.mem.index!=X86_REG_INVALID; + const auto the_insn=static_cast<cs_insn*>(my_insn.get()); + const auto &op = (the_insn->detail->x86.operands[op_num]); + + const auto index_reg_is_valid = + op.mem.index != X86_REG_INVALID && + op.mem.index != X86_REG_EIZ && + op.mem.index != X86_REG_RIZ; + return isMemory() && index_reg_is_valid; } uint32_t DecodedOperandCapstoneX86_t::getBaseRegister() const diff --git a/irdb-libs/libIRDB-transform/src/rewrite_util.cpp b/irdb-libs/libIRDB-transform/src/rewrite_util.cpp index 268f57d8bfed0860c830d6f58942bf30b3324069..3fdf0456a259e7cfb8e8794d6e93c7f5a336cdde 100644 --- a/irdb-libs/libIRDB-transform/src/rewrite_util.cpp +++ b/irdb-libs/libIRDB-transform/src/rewrite_util.cpp @@ -52,6 +52,9 @@ Instruction_t* IRDB_SDK::insertAssemblyBefore(FileIR_t* virp, Instruction_t* fir next->setOriginalAddressID(first->getOriginalAddressID()); //"Null" out the original address (it should be as if the instruction was not in the database). first->setOriginalAddressID(BaseObj_t::NOT_IN_DATABASE); + // Keep Base ID in the original instruction + next->setBaseID(first->getBaseID()); + first->setBaseID(BaseObj_t::NOT_IN_DATABASE); auto real_first=dynamic_cast<libIRDB::Instruction_t*>(first); assert(real_first); real_first->GetRelocations().clear(); @@ -121,6 +124,9 @@ Instruction_t* IRDB_SDK::insertDataBitsBefore(FileIR_t* virp, Instruction_t* fir next->setOriginalAddressID(first->getOriginalAddressID()); //"Null" out the original address (it should be as if the instruction was not in the database). first->setOriginalAddressID(BaseObj_t::NOT_IN_DATABASE); + // Keep Base ID in the original instruction + next->setBaseID(first->getBaseID()); + first->setBaseID(BaseObj_t::NOT_IN_DATABASE); auto real_first=dynamic_cast<libIRDB::Instruction_t*>(first); assert(real_first); real_first->GetRelocations().clear(); diff --git a/irdb-libs/libIRDB-util/src/registers.cpp b/irdb-libs/libIRDB-util/src/registers.cpp index da3c6152d1f9ae755fcc4025d28a4af57a38afea..942d2b1e569b9807cd4a40195137922147917605 100644 --- a/irdb-libs/libIRDB-util/src/registers.cpp +++ b/irdb-libs/libIRDB-util/src/registers.cpp @@ -165,6 +165,14 @@ RegisterID_t IRDB_SDK::strToRegister(const string& p_reg) return rn_UNKNOWN; } +int IRDB_SDK::getRegisterBitWidth(const RegisterID_t p_reg){ + if(is64bitRegister(p_reg)) return 64; + if(is32bitRegister(p_reg)) return 32; + if(is16bitRegister(p_reg)) return 16; + if(is8bitRegister(p_reg)) return 8; + return -1; +} + bool IRDB_SDK::is8bitRegister(const RegisterID_t p_reg) { return p_reg == rn_AL || p_reg == rn_BL || p_reg == rn_CL || p_reg == rn_DL || @@ -399,33 +407,40 @@ RegisterID_t IRDB_SDK::convertRegisterTo16bit(const RegisterID_t p_reg) switch (p_reg) { - case rn_RAX: case rn_EAX: return rn_AX; - case rn_RBX: case rn_EBX: return rn_BX; - case rn_RCX: case rn_ECX: return rn_CX; - case rn_RDX: case rn_EDX: return rn_DX; - case rn_RBP: case rn_EBP: return rn_BP; - case rn_RSP: case rn_ESP: return rn_SP; - case rn_RSI: case rn_ESI: return rn_SI; - case rn_RDI: case rn_EDI: return rn_DI; - case rn_R8: case rn_R8D: return rn_R8W; - case rn_R9: case rn_R9D: return rn_R9W; - case rn_R10: case rn_R10D: return rn_R10W; - case rn_R11: case rn_R11D: return rn_R11W; - case rn_R12: case rn_R12D: return rn_R12W; - case rn_R13: case rn_R13D: return rn_R13W; - case rn_R14: case rn_R14D: return rn_R14W; - case rn_R15: case rn_R15D: return rn_R15W; + case rn_RAX: case rn_EAX: case rn_AH: case rn_AL: return rn_AX; + case rn_RBX: case rn_EBX: case rn_BH: case rn_BL: return rn_BX; + case rn_RCX: case rn_ECX: case rn_CH: case rn_CL: return rn_CX; + case rn_RDX: case rn_EDX: case rn_DH: case rn_DL: return rn_DX; + case rn_RBP: case rn_EBP: case rn_BPL: return rn_BP; + case rn_RSP: case rn_ESP: case rn_SPL: return rn_SP; + case rn_RSI: case rn_ESI: case rn_SIL: return rn_SI; + case rn_RDI: case rn_EDI: case rn_DIL: return rn_DI; + case rn_R8: case rn_R8D: case rn_R8B: return rn_R8W; + case rn_R9: case rn_R9D: case rn_R9B: return rn_R9W; + case rn_R10: case rn_R10D: case rn_R10B: return rn_R10W; + case rn_R11: case rn_R11D: case rn_R11B: return rn_R11W; + case rn_R12: case rn_R12D: case rn_R12B: return rn_R12W; + case rn_R13: case rn_R13D: case rn_R13B: return rn_R13W; + case rn_R14: case rn_R14D: case rn_R14B: return rn_R14W; + case rn_R15: case rn_R15D: case rn_R15B: return rn_R15W; default: return rn_UNKNOWN; break; } } +// Does not guarantee, whether the returned register is the lower or upper half +// of the lowest 16 bits RegisterID_t IRDB_SDK::convertRegisterTo8bit(const RegisterID_t p_reg) { - if (is16bitRegister(p_reg)) + if (is8bitRegister(p_reg)) return p_reg; + return convertRegisterTo8bitLower(p_reg); +} + +RegisterID_t IRDB_SDK::convertRegisterTo8bitLower(const RegisterID_t p_reg) +{ switch (p_reg) { case rn_RAX: case rn_EAX: case rn_AX: return rn_AL; @@ -450,3 +465,16 @@ RegisterID_t IRDB_SDK::convertRegisterTo8bit(const RegisterID_t p_reg) } } +RegisterID_t IRDB_SDK::convertRegisterTo8bitUpper(const RegisterID_t p_reg) +{ + switch (convertRegisterTo8bitLower(p_reg)) + { + case rn_AL: return rn_AH; + case rn_BL: return rn_BH; + case rn_CL: return rn_CH; + case rn_DL: return rn_DH; + default: + return rn_UNKNOWN; + break; + } +} \ No newline at end of file diff --git a/irdb-sdk b/irdb-sdk index 93748d2a2e75e973dae11659dcfbe9c9ae25523d..f3d491b57460bf89f648effe1693c5a909eca933 160000 --- a/irdb-sdk +++ b/irdb-sdk @@ -1 +1 @@ -Subproject commit 93748d2a2e75e973dae11659dcfbe9c9ae25523d +Subproject commit f3d491b57460bf89f648effe1693c5a909eca933