From f46ffa33bac36c0608604d9c6feec24f08f8c5dd Mon Sep 17 00:00:00 2001 From: Anh <zenpoems@gmail.com> Date: Wed, 3 Jul 2019 15:23:14 +0000 Subject: [PATCH] made backup_until includable as needed it in other places --- irdb-libs/ir_builders/back_search.hpp | 167 +++++++++++++++++++++ irdb-libs/ir_builders/fill_in_cfg.cpp | 78 ++++++++-- irdb-libs/ir_builders/fill_in_indtargs.cpp | 8 +- 3 files changed, 238 insertions(+), 15 deletions(-) create mode 100644 irdb-libs/ir_builders/back_search.hpp diff --git a/irdb-libs/ir_builders/back_search.hpp b/irdb-libs/ir_builders/back_search.hpp new file mode 100644 index 000000000..35c59b1c4 --- /dev/null +++ b/irdb-libs/ir_builders/back_search.hpp @@ -0,0 +1,167 @@ + +#ifndef back_search_hpp +#define back_search_hpp +/* + * Copyright (c) 2014 - Zephyr Software LLC + * + * This file may be used and modified for non-commercial purposes as long as + * all copyright, permission, and nonwarranty notices are preserved. + * Redistribution is prohibited without prior written consent from Zephyr + * Software. + * + * Please contact the authors for restrictions applying to commercial use. + * + * THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * + * Author: Zephyr Software + * e-mail: jwd@zephyr-software.com + * URL : http://www.zephyr-software.com/ + * + */ + +#include <iostream> +#include <limits> +#include <string> +#include <algorithm> +#include <stdlib.h> +#include <string.h> +#include <map> +#include <assert.h> +#include <regex.h> +#include <ctype.h> +#include <stdio.h> +#include <cctype> + +using namespace IRDB_SDK; +using namespace std; + +/* + * defines + */ +#define ALLOF(a) begin(a),end(a) + + +// a way to map an instruction to its set of (direct) predecessors. +map< Instruction_t* , InstructionSet_t > preds; + +void calc_preds(FileIR_t* firp) +{ + preds.clear(); + for(auto insn : firp->getInstructions()) + { + if(insn->getTarget()) + preds[insn->getTarget()].insert(insn); + if(insn->getFallthrough()) + preds[insn->getFallthrough()].insert(insn); + } +} + + +bool backup_until(const string &insn_type_regex_str, + Instruction_t *& prev, + Instruction_t* orig, + const string & stop_if_set="", + bool recursive=false, + uint32_t max_insns=10000u, + uint32_t max_recursions=5u) +{ + + const auto find_or_build_regex=[&] (const string& s) -> regex_t& + { + // declare a freer for regexs so they go away when the program ends. + const auto regex_freer=[](regex_t* to_free) -> void + { + regfree(to_free); + delete to_free; + }; + // keep the map safe from anyone but me using it. + using regex_unique_ptr_t=unique_ptr<regex_t, decltype(regex_freer)>; + static map<string, regex_unique_ptr_t > regexs_used; + + if(s=="") + { + static regex_t empty; + return empty; + } + const auto it=regexs_used.find(s); + if(it==regexs_used.end()) + { + // allocate a new regex ptr + regexs_used.insert(pair<string,regex_unique_ptr_t>(s,move(regex_unique_ptr_t(new regex_t, regex_freer)))); + // and compile it. + auto ®ex_ptr=regexs_used.at(s); + const auto ret=regcomp(regex_ptr.get(), s.c_str(), REG_EXTENDED); + // error check + assert(ret==0); + } + return *regexs_used.at(s).get(); + }; + + + // build regexs. + const auto &preg = find_or_build_regex(insn_type_regex_str); + const auto &stop_expression = find_or_build_regex(stop_if_set); + + + prev=orig; + while(preds[prev].size()==1 && max_insns > 0) + { + // dec max for next loop + max_insns--; + + // get the only item in the list. + prev=*(preds[prev].begin()); + + + // get I7's disassembly + const auto disasm=DecodedInstruction_t::factory(prev); + + // check it's the requested type + if(regexec(&preg, disasm->getDisassembly().c_str(), 0, nullptr, 0) == 0) + return true; + + if(stop_if_set!="") + { + for(const auto operand : disasm->getOperands()) + { + if(operand->isWritten() && regexec(&stop_expression, operand->getString().c_str(), 0, nullptr, 0) == 0) + return false; + } + } + + // otherwise, try backing up again. + } + if(recursive && max_insns > 0 && max_recursions > 0 ) + { + const auto myprev=prev; + // can't just use prev because recursive call will update it. + const auto &mypreds=preds[myprev]; + for(const auto pred : mypreds) + { + prev=pred;// mark that we are here, in case we return true here. + const auto disasm=DecodedInstruction_t::factory(pred); + // check it's the requested type + if(regexec(&preg, disasm->getDisassembly().c_str(), 0, nullptr, 0) == 0) + return true; + if(stop_if_set!="") + { + for(const auto operand : disasm->getOperands()) + { + if(operand->isWritten() && regexec(&stop_expression, operand->getString().c_str(), 0, nullptr, 0) == 0) + return false; + } + } + if(backup_until(insn_type_regex_str, prev, pred, stop_if_set, recursive, max_insns, max_recursions/mypreds.size())) + return true; + + // reset for next call + prev=myprev; + } + } + return false; +} + + +#endif diff --git a/irdb-libs/ir_builders/fill_in_cfg.cpp b/irdb-libs/ir_builders/fill_in_cfg.cpp index 6d5e6ed4f..e1f694fd2 100644 --- a/irdb-libs/ir_builders/fill_in_cfg.cpp +++ b/irdb-libs/ir_builders/fill_in_cfg.cpp @@ -27,6 +27,7 @@ #include <ctype.h> #include "elfio/elfio.hpp" #include "split_eh_frame.hpp" +#include "back_search.hpp" using namespace std; using namespace EXEIO; @@ -494,13 +495,15 @@ void PopulateCFG::fill_in_scoops(FileIR_t *firp) void PopulateCFG::detect_scoops_in_code(FileIR_t *firp) { + // make sure preds are up to date for this + calc_preds(firp); + // data for this function auto already_scoopified=map<VirtualOffset_t,DataScoop_t*>(); const auto is_arm64 = firp->getArchitecture()->getMachineType() == admtAarch64; const auto is_arm32 = firp->getArchitecture()->getMachineType() == admtArm32; const auto is_arm_variant = is_arm32 || is_arm64; - const auto do_unpin = is_arm32; // only valid for arm64 if(!is_arm_variant) return; @@ -511,25 +514,71 @@ void PopulateCFG::detect_scoops_in_code(FileIR_t *firp) // look for ldr's with a pcrel operand const auto d = DecodedInstruction_t::factory(insn); const auto mnemonic = d->getMnemonic(); + const auto is_ldrd_variant = mnemonic.substr(0,4) == "ldrd"; const auto is_ldr_variant = mnemonic.substr(0,3) == "ldr"; const auto is_vldr_variant = mnemonic.substr(0,4) == "vldr"; - const auto is_relevant_ldr = is_ldr_variant || is_vldr_variant; + const auto is_relevant_ldr = is_ldr_variant || is_vldr_variant || is_ldrd_variant; if(!is_relevant_ldr) continue; + + + // extract op0 const auto op0 = d->getOperand(0); - // capstone reports ldrd instructions as having 2 "dest" operands. - // so we skip to the 3rd operand to get the memory op. - // todo: fix libirdb-core to fix this and skip the odd operand. - // todo: report to capstone that they are broken. - const auto mem_op = mnemonic[3]=='d' ? d->getOperand(2) : d->getOperand(1); - if( !mem_op->isPcrel()) continue; + // the address we detect as referenced by this instruction. + auto referenced_address = VirtualOffset_t(0); + auto do_unpin = is_arm32; + + if(is_ldrd_variant && !d->getOperand(2)->isPcrel()) + { + const auto mem_op = d->getOperand(2); + if( mem_op->hasIndexRegister() ) continue; + if( mem_op->getMemoryDisplacement() != 0 ) continue; + + const auto mem_str = mem_op->getString(); + const auto end_of_base_reg = mem_str.find(" "); + const auto find_reg = mem_str.substr(0,end_of_base_reg); + + // find the instruction that sets the base reg. + auto add_pc_insn = (Instruction_t*)nullptr; + if(!backup_until( string()+"add.* "+find_reg+", pc, #", /* look for this pattern. */ + add_pc_insn, /* strong instruction in add_pc_insn */ + insn, /* insn I10 */ + "^"+find_reg+"$" /* stop if find_reg set */ + )) + { + continue; + } + + const auto add_pc_insn_d = DecodedInstruction_t::factory(add_pc_insn); + + // record to unpin something. + referenced_address = add_pc_insn_d -> getOperand(2)->getConstant() + (is_arm32 ? add_pc_insn->getAddress()->getVirtualOffset() + 8 : 0); + do_unpin = false; + + } + else + { + + // capstone reports ldrd instructions as having 2 "dest" operands. + // so we skip to the 3rd operand to get the memory op. + // todo: fix libirdb-core to fix this and skip the odd operand. + // todo: report to capstone that they are broken. + const auto mem_op = d->getOperand(1); + if( !mem_op->isPcrel()) continue; + + // if there is an indexing operation, skip this instruction. + if( mem_op->hasIndexRegister()) continue; + + // sanity check that it's a memory operation, and extract fields + assert(mem_op->isMemory()); + + + referenced_address = mem_op->getMemoryDisplacement() + (is_arm32 ? insn->getAddress()->getVirtualOffset() + 8 : 0); + } - // if there is an indexing operation, skip this instruction. - if( mem_op->hasIndexRegister()) continue; + // no address found. + if(referenced_address == 0) continue; - // sanity check that it's a memory operation, and extract fields - assert(mem_op->isMemory()); - const auto referenced_address = mem_op->getMemoryDisplacement() + (is_arm32 ? insn->getAddress()->getVirtualOffset() + 8 : 0); const auto name = "data_in_text_"+to_hex_string(referenced_address); const auto op0_str = op0->getString(); @@ -537,7 +586,8 @@ void PopulateCFG::detect_scoops_in_code(FileIR_t *firp) if(is_arm32 && op0_str == "pc" ) continue; const auto referenced_size = // could use API call? - is_arm64 && op0_str[0]=='w' ? 4 : // arm64 regs + is_ldrd_variant ? 8 : // special case for load int reg pair. + is_arm64 && op0_str[0]=='w' ? 4 : // arm64 regs is_arm64 && op0_str[0]=='x' ? 8 : is_arm64 && op0_str[0]=='s' ? 4 : is_arm64 && op0_str[0]=='d' ? 8 : diff --git a/irdb-libs/ir_builders/fill_in_indtargs.cpp b/irdb-libs/ir_builders/fill_in_indtargs.cpp index 93d0b963d..99da1812d 100644 --- a/irdb-libs/ir_builders/fill_in_indtargs.cpp +++ b/irdb-libs/ir_builders/fill_in_indtargs.cpp @@ -43,6 +43,7 @@ #include "check_thunks.hpp" #include "fill_in_indtargs.hpp" #include "libMEDSAnnotation.h" +#include "back_search.hpp" using namespace IRDB_SDK; using namespace std; @@ -95,8 +96,10 @@ map<VirtualOffset_t,ibt_provenance_t> targets; // the set of ranges represented by the eh_frame section, could be empty for non-elf files. set< pair< VirtualOffset_t, VirtualOffset_t> > ranges; +#if 0 // a way to map an instruction to its set of (direct) predecessors. map< Instruction_t* , InstructionSet_t > preds; +#endif // keep track of jmp tables map< Instruction_t*, fii_icfs > jmptables; @@ -544,7 +547,7 @@ set<Instruction_t*> find_in_function(string needle, Function_t *haystack) } - +#if 0 bool backup_until(const string &insn_type_regex_str, Instruction_t *& prev, Instruction_t* orig, @@ -648,6 +651,7 @@ bool backup_until(const string &insn_type_regex_str, } return false; } +#endif void check_for_arm32_switch_type1( @@ -2742,6 +2746,7 @@ void check_for_nonPIC_switch_table(FileIR_t* firp, Instruction_t* insn, const De jmptables[IJ].setAnalysisStatus(iasAnalysisComplete); } +#if 0 void calc_preds(FileIR_t* firp) { preds.clear(); @@ -2753,6 +2758,7 @@ void calc_preds(FileIR_t* firp) preds[insn->getFallthrough()].insert(insn); } } +#endif void handle_takes_address_annot(FileIR_t* firp,Instruction_t* insn, MEDS_TakesAddressAnnotation* p_takes_address_annotation) { -- GitLab