From a85692c233fb1e78ffb5f799252492157debcc77 Mon Sep 17 00:00:00 2001 From: Anh <zenpoems@gmail.com> Date: Sun, 3 Feb 2019 11:59:16 -0800 Subject: [PATCH] Refactor Zax --- afl_transforms/tools/zax/zax.cpp | 803 +----------------------- afl_transforms/tools/zax/zax.hpp | 88 +-- afl_transforms/tools/zax/zax_base.cpp | 720 +++++++++++++++++++++ afl_transforms/tools/zax/zax_base.hpp | 111 ++++ afl_transforms/tools/zax/zax_driver.cpp | 11 +- afl_transforms/tools/zax/zuntracer.cpp | 22 +- afl_transforms/tools/zax/zuntracer.hpp | 10 +- 7 files changed, 868 insertions(+), 897 deletions(-) create mode 100644 afl_transforms/tools/zax/zax_base.cpp create mode 100644 afl_transforms/tools/zax/zax_base.hpp diff --git a/afl_transforms/tools/zax/zax.cpp b/afl_transforms/tools/zax/zax.cpp index 620a218..e592add 100644 --- a/afl_transforms/tools/zax/zax.cpp +++ b/afl_transforms/tools/zax/zax.cpp @@ -29,8 +29,6 @@ #include <irdb-cfg> #include <irdb-transform> #include <irdb-elfdep> -#include <MEDS_DeadRegAnnotation.hpp> -#include <MEDS_SafeFuncAnnotation.hpp> #include "zax.hpp" @@ -39,256 +37,32 @@ using namespace IRDB_SDK; using namespace Zafl; using namespace MEDS_Annotation; -#define ALLOF(a) begin(a),end(a) - -Zax_t::Zax_t(IRDB_SDK::pqxxDB_t &p_dbinterface, IRDB_SDK::FileIR_t *p_variantIR, string p_forkServerEntryPoint, set<string> p_exitPoints, bool p_use_stars, bool p_autozafl, bool p_verbose) - : - Transform(p_variantIR), - m_dbinterface(p_dbinterface), - m_stars_analysis_engine(p_dbinterface), - m_fork_server_entry(p_forkServerEntryPoint), - m_exitpoints(p_exitPoints), - m_use_stars(p_use_stars), - m_autozafl(p_autozafl), - m_bb_graph_optimize(false), - m_forkserver_enabled(true), - m_breakupCriticalEdges(false), - m_verbose(p_verbose) -{ - if (m_use_stars) { - cout << "Use STARS analysis engine" << endl; - m_stars_analysis_engine.do_STARS(getFileIR()); - } - - auto ed=ElfDependencies_t::factory(getFileIR()); - if (p_autozafl) - { - cout << "autozafl library is on" << endl; - (void)ed->prependLibraryDepedencies("libautozafl.so"); - } - else - { - cout << "autozafl library is off" << endl; - (void)ed->prependLibraryDepedencies("libzafl.so"); - } - - if (m_verbose) - cout << "verbose mode is on" << endl; - else - cout << "verbose mode is off" << endl; - - // bind to external symbols declared in libzafl.so - m_plt_zafl_initAflForkServer=ed->appendPltEntry("zafl_initAflForkServer"); - m_trace_map = ed->appendGotEntry("zafl_trace_map"); - m_prev_id = ed->appendGotEntry("zafl_prev_id"); - - // let's not instrument these functions ever - // see isBlacklisted() for other blacklisted functions - m_blacklist.insert("init"); - m_blacklist.insert("_init"); - m_blacklist.insert("start"); - m_blacklist.insert("_start"); - m_blacklist.insert("fini"); - m_blacklist.insert("_fini"); - m_blacklist.insert("register_tm_clones"); - m_blacklist.insert("deregister_tm_clones"); - m_blacklist.insert("frame_dummy"); - m_blacklist.insert("__do_global_ctors_aux"); - m_blacklist.insert("__do_global_dtors_aux"); - m_blacklist.insert("__libc_csu_init"); - m_blacklist.insert("__libc_csu_fini"); - m_blacklist.insert("__libc_start_main"); - m_blacklist.insert("__gmon_start__"); - m_blacklist.insert("__cxa_atexit"); - m_blacklist.insert("__cxa_finalize"); - m_blacklist.insert("__assert_fail"); - m_blacklist.insert("free"); - m_blacklist.insert("fnmatch"); - m_blacklist.insert("readlinkat"); - m_blacklist.insert("malloc"); - m_blacklist.insert("calloc"); - m_blacklist.insert("realloc"); - m_blacklist.insert("argp_failure"); - m_blacklist.insert("argp_help"); - m_blacklist.insert("argp_state_help"); - m_blacklist.insert("argp_error"); - m_blacklist.insert("argp_parse"); - - m_labelid = 0; - - m_num_bb = 0; - m_num_bb_instrumented = 0; - m_num_bb_skipped = 0; - m_num_bb_skipped_pushjmp = 0; - m_num_bb_skipped_nop_padding = 0; - m_num_bb_skipped_innernode = 0; - m_num_bb_skipped_cbranch = 0; - m_num_bb_skipped_onlychild = 0; - m_num_bb_keep_exit_block = 0; - m_num_bb_keep_cbranch_back_edge = 0; - m_num_style_collafl = 0; -} - -void Zax_t::setBreakupCriticalEdges(const bool p_breakupEdges) -{ - m_breakupCriticalEdges = p_breakupEdges; -} - - -void create_got_reloc(FileIR_t* fir, pair<DataScoop_t*,int> wrt, Instruction_t* i) +Zax_t::Zax_t(IRDB_SDK::pqxxDB_t &p_dbinterface, IRDB_SDK::FileIR_t *p_variantIR, string p_forkServerEntryPoint, set<string> p_exitPoints, bool p_use_stars, bool p_autozafl) : ZaxBase_t(p_dbinterface, p_variantIR, p_forkServerEntryPoint, p_exitPoints, p_use_stars, p_autozafl) { - /* - auto r=new Relocation_t(BaseObj_t::NOT_IN_DATABASE, wrt.second, "pcrel", wrt.first); - fir->getRelocations().insert(r); - i->getRelocations().insert(r); - */ - (void)fir->addNewRelocation(i,wrt.second, "pcrel", wrt.first); -} - -RegisterSet_t get_dead_regs(Instruction_t* insn, MEDS_AnnotationParser &meds_ap_param) -{ - std::pair<MEDS_Annotations_t::iterator,MEDS_Annotations_t::iterator> ret; - - /* find it in the annotations */ - ret = meds_ap_param.getAnnotations().equal_range(insn->getBaseID()); - MEDS_DeadRegAnnotation* p_annotation; - - /* for each annotation for this instruction */ - for (MEDS_Annotations_t::iterator it = ret.first; it != ret.second; ++it) - { - p_annotation=dynamic_cast<MEDS_DeadRegAnnotation*>(it->second); - if(p_annotation==NULL) - continue; - - /* bad annotation? */ - if(!p_annotation->isValid()) - continue; - - return p_annotation->getRegisterSet(); - } - - /* couldn't find the annotation, return an empty set.*/ - return RegisterSet_t(); -} - -// return intersection of candidates and allowed general-purpose registers -RegisterSet_t get_free_regs(const RegisterSet_t candidates, const RegisterSet_t allowed) -{ - std::set<RegisterName> free_regs; - set_intersection(candidates.begin(),candidates.end(),allowed.begin(),allowed.end(), - std::inserter(free_regs,free_regs.begin())); - return free_regs; -} - -static bool hasLeafAnnotation(Function_t* fn, MEDS_AnnotationParser &meds_ap_param) -{ - assert(fn); - const auto ret = meds_ap_param.getFuncAnnotations().equal_range(fn->getName()); - const auto sfa_it = find_if(ret.first, ret.second, [](const MEDS_Annotations_FuncPair_t &it) - { - auto p_annotation=dynamic_cast<MEDS_SafeFuncAnnotation*>(it.second); - if(p_annotation==NULL) - return false; - return p_annotation->isLeaf(); - } - ); - - return (sfa_it != ret.second); -} - -bool Zax_t::BB_isPaddingNop(const BasicBlock_t *p_bb) -{ - return p_bb->getInstructions().size()==1 && - p_bb->getPredecessors().size()==0 && - p_bb->getSuccessors().size()==1 && - p_bb->getInstructions()[0]->getDisassembly().find("nop")!=string::npos; -} - -bool Zax_t::BB_isPushJmp(const BasicBlock_t *p_bb) -{ - return p_bb->getInstructions().size()==2 && - p_bb->getInstructions()[0]->getDisassembly().find("push")!=string::npos && - p_bb->getInstructions()[1]->getDisassembly().find("jmp")!=string::npos; } /* - * Only allow instrumentation in whitelisted functions/instructions - * Each line in file is either a function name or address + * Return random block id + * Try to avoid duplicate ids */ -void Zax_t::setWhitelist(const string& p_whitelist) -{ - std::ifstream whitelistFile(p_whitelist); - if (!whitelistFile.is_open()) - throw; - std::string line; - while(whitelistFile >> line) - { - cout <<"Adding " << line << " to white list" << endl; - m_whitelist.insert(line); - } - whitelistFile.close(); -} - -/* - * Disallow instrumentation in blacklisted functions/instructions - * Each line in file is either a function name or address - */ -void Zax_t::setBlacklist(const string& p_blackList) -{ - std::ifstream blackListFile(p_blackList); - if (!blackListFile.is_open()) - throw; - std::string line; - while(blackListFile >> line) - { - cout <<"Adding " << line << " to black list" << endl; - m_blacklist.insert(line); - } - blackListFile.close(); -} - zafl_blockid_t Zax_t::get_blockid(const unsigned p_max) { - auto counter = 0; - auto blockid = 0; - - // only try getting new block id 100 times - // avoid returning duplicate if we can help it - while (counter++ < 100) { - blockid = rand() % p_max; - if (m_used_blockid.find(blockid) == m_used_blockid.end()) - { - m_used_blockid.insert(blockid); - return blockid; - } - } - return blockid; -} - -zafl_labelid_t Zax_t::get_labelid(const unsigned p_max) -{ - return m_labelid++; -} - -void Zax_t::insertExitPoint(Instruction_t *p_inst) -{ - assert(p_inst->getAddress()->getVirtualOffset()); - - if (p_inst->getFunction()) - cout << "in function: " << p_inst->getFunction()->getName() << " "; - - stringstream ss; - ss << hex << p_inst->getAddress()->getVirtualOffset(); - m_blacklist.insert(ss.str()); - - cout << "insert exit point at: 0x" << ss.str() << endl; - - auto tmp = p_inst; - insertAssemblyBefore(tmp, "xor edi, edi"); // rdi=0 - tmp = insertAssemblyAfter(tmp, "mov eax, 231"); // 231 = __NR_exit_group from <asm/unistd_64.h> - tmp = insertAssemblyAfter(tmp, "syscall"); // sys_exit_group(edi) + auto counter = 0; + auto blockid = 0; + + // only try getting new block id 100 times + // avoid returning duplicate if we can help it + while (counter++ < 100) { + blockid = rand() % p_max; + if (m_used_blockid.find(blockid) == m_used_blockid.end()) + { + m_used_blockid.insert(blockid); + return blockid; + } + } + return blockid; } - + /* Original afl instrumentation: block_id = <random>; @@ -555,544 +329,3 @@ void Zax_t::afl_instrument_bb(Instruction_t *p_inst, const bool p_honorRedZone, free(reg_prev_id); } -void Zax_t::insertForkServer(Instruction_t* p_entry) -{ - assert(p_entry); - - stringstream ss; - ss << "0x" << hex << p_entry->getAddress()->getVirtualOffset(); - cout << "inserting fork server code at address: " << ss.str() << dec << endl; - assert(p_entry->getAddress()->getVirtualOffset()); - - if (p_entry->getFunction()) { - cout << " function: " << p_entry->getFunction()->getName(); - cout << " ep instr: " << p_entry->getDisassembly() << endl; - } - cout << endl; - - // blacklist insertion point - cout << "Blacklisting entry point: " << ss.str() << endl; - m_blacklist.insert(ss.str()); - - // insert the instrumentation - auto tmp=p_entry; - const auto regs = vector<string>({ "rdi", "rsi", "rbp", "rdx", "rcx", "rbx", "rax", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"}); - - // red zone - (void)insertAssemblyBefore(tmp, "lea rsp, [rsp-128]"); - // save flags and registrers - tmp = insertAssemblyAfter(tmp, "pushf ") ; - for (vector<string>::const_iterator rit = regs.begin(); rit != regs.end(); ++rit) - tmp = insertAssemblyAfter(tmp, " push " + *rit); - // call fork server initialization routine (in external library) - tmp = insertAssemblyAfter(tmp, "call 0 ", m_plt_zafl_initAflForkServer) ; - // restore registers and flags - for (vector<string>::const_reverse_iterator rit = regs.rbegin(); rit != regs.rend(); ++rit) - tmp = insertAssemblyAfter(tmp, " pop " + *rit) ; - tmp = insertAssemblyAfter(tmp, "popf ") ; - // red zome - tmp = insertAssemblyAfter(tmp, "lea rsp, [rsp+128]"); -} - -void Zax_t::insertForkServer(string p_forkServerEntry) -{ - assert(p_forkServerEntry.size() > 0); - - cout << "looking for fork server entry point: " << p_forkServerEntry << endl; - - if (std::isdigit(p_forkServerEntry[0])) - { - // find instruction to insert fork server based on address - const auto voffset = (VirtualOffset_t) std::strtoul(p_forkServerEntry.c_str(), NULL, 16); - auto instructions=find_if(getFileIR()->getInstructions().begin(), getFileIR()->getInstructions().end(), [&](const Instruction_t* i) { - return i->getAddress()->getVirtualOffset()==voffset; - }); - - if (instructions==getFileIR()->getInstructions().end()) - { - cerr << "Error: could not find address to insert fork server: " << p_forkServerEntry << endl; - throw; - } - - insertForkServer(*instructions); - } - else - { - // find entry point of specified function to insert fork server - auto entryfunc=find_if(getFileIR()->getFunctions().begin(), getFileIR()->getFunctions().end(), [&](const Function_t* f) { - return f->getName()==p_forkServerEntry; - }); - - - if(entryfunc==getFileIR()->getFunctions().end()) - { - cerr << "Error: could not find function to insert fork server: " << p_forkServerEntry << endl; - throw; - } - - cout << "inserting fork server code at entry point of function: " << p_forkServerEntry << endl; - auto entrypoint = (*entryfunc)->getEntryPoint(); - - if (!entrypoint) - { - cerr << "Could not find entry point for: " << p_forkServerEntry << endl; - throw; - } - insertForkServer(entrypoint); - } -} - -void Zax_t::setupForkServer() -{ - if (m_fork_server_entry.size()>0) - { - // user has specified entry point - insertForkServer(m_fork_server_entry); - } - else - { - // try to insert fork server at main - const auto &all_funcs=getFileIR()->getFunctions(); - const auto main_func_it=find_if(all_funcs.begin(), all_funcs.end(), [&](const Function_t* f) { return f->getName()=="main";}); - if(main_func_it!=all_funcs.end()) - { - insertForkServer("main"); - } - - } - - // it's ok not to have a fork server at all, e.g. libraries -} - -void Zax_t::insertExitPoints() -{ - for (auto exitp : m_exitpoints) - { - if (std::isdigit(exitp[0])) - { - // find instruction to insert fork server based on address - const auto voffset = (VirtualOffset_t) std::strtoul(exitp.c_str(), NULL, 16); - auto instructions=find_if(getFileIR()->getInstructions().begin(), getFileIR()->getInstructions().end(), [&](const Instruction_t* i) { - return i->getAddress()->getVirtualOffset()==voffset; - }); - - if (instructions==getFileIR()->getInstructions().end()) - { - cerr << "Error: could not find address to insert exit point: " << exitp << endl; - throw; - } - - insertExitPoint(*instructions); - } - else - { - // find function by name - auto func_iter=find_if(getFileIR()->getFunctions().begin(), getFileIR()->getFunctions().end(), [&](const Function_t* f) { - return f->getName()==exitp; - }); - - - if(func_iter==getFileIR()->getFunctions().end()) - { - cerr << "Error: could not find function to insert exit points: " << exitp << endl; - throw; - } - - cout << "inserting exit code at return points of function: " << exitp << endl; - for (auto i : (*func_iter)->getInstructions()) - { - if (i->getBaseID() >= 0) - { - const auto d=DecodedInstruction_t::factory(i); - - // if it's a return instruction, add exit point - if (d->isReturn()) - { - insertExitPoint(i); - } - } - } - } - } -} - -#ifdef DEPRECATE -static bool isConditionalBranch(const Instruction_t *i) -{ - const auto d=DecodedInstruction_t::factory(i); - return (d->isConditionalBranch()); -} - - -static void walkSuccessors(set<BasicBlock_t*> &p_visited_successors, BasicBlock_t *p_bb, BasicBlock_t *p_target) -{ - if (p_bb == NULL || p_target == NULL) - return; - - for (auto b : p_bb->getSuccessors()) - { - if (p_visited_successors.find(b) == p_visited_successors.end()) - { -// cout << "bb anchored at " << b->getInstructions()[0]->getBaseID() << " is a successor of bb anchored at " << p_bb->getInstructions()[0]->getBaseID() << endl; - p_visited_successors.insert(b); - if (p_visited_successors.find(p_target) != p_visited_successors.end()) - return; - walkSuccessors(p_visited_successors, b, p_target); - } - } -} -// @nb: move in BB class? -static bool hasBackEdge(BasicBlock_t *p_bb) -{ - assert(p_bb); - if (p_bb->getPredecessors().find(p_bb)!=p_bb->getPredecessors().end()) - return true; - if (p_bb->getSuccessors().find(p_bb)!=p_bb->getSuccessors().end()) - return true; - if (p_bb->getSuccessors().size() == 0) - return false; - - // walk successors recursively - set<BasicBlock_t*> all_successors; - - cout << "Walk successors for bb anchored at: " << p_bb->getInstructions()[0]->getBaseID() << endl; - walkSuccessors(all_successors, p_bb, p_bb); - if (all_successors.find(p_bb)!=all_successors.end()) - return true; - - return false; -} - -#endif - -// blacklist functions: -// - in blacklist -// - that start with '.' -// - that end with @plt -bool Zax_t::isBlacklisted(const Function_t *p_func) const -{ - return (p_func->getName()[0] == '.' || - p_func->getName().find("@plt") != string::npos || - m_blacklist.find(p_func->getName())!=m_blacklist.end()); -} - -bool Zax_t::isWhitelisted(const Function_t *p_func) const -{ - if (m_whitelist.size() == 0) return true; - return (m_whitelist.find(p_func->getName())!=m_whitelist.end()); -} - -bool Zax_t::isBlacklisted(const Instruction_t *p_inst) const -{ - stringstream ss; - ss << "0x" << hex << p_inst->getAddress()->getVirtualOffset(); - return (m_blacklist.count(ss.str()) > 0 || isBlacklisted(p_inst->getFunction())); -} - -bool Zax_t::isWhitelisted(const Instruction_t *p_inst) const -{ - if (m_whitelist.size() == 0) return true; - - stringstream ss; - ss << "0x" << hex << p_inst->getAddress()->getVirtualOffset(); - return (m_whitelist.count(ss.str()) > 0 || isWhitelisted(p_inst->getFunction())); -} - -void Zax_t::setup() -{ - if (m_forkserver_enabled) - setupForkServer(); - else - cout << "Fork server has been disabled" << endl; - - insertExitPoints(); -} - -void Zax_t::teardown() -{ - dumpAttributes(); - dumpMap(); -} - -// in: control flow graph for a given function -// out: set of basic blocks to instrument -set<BasicBlock_t*> Zax_t::getBlocksToInstrument(ControlFlowGraph_t &cfg) -{ - static int bb_debug_id=-1; - - if (m_verbose) - cout << cfg << endl; - - auto keepers = set<BasicBlock_t*>(); - - for (auto &bb : cfg.getBlocks()) - { - assert(bb->getInstructions().size() > 0); - - bb_debug_id++; - - // already marked as a keeper - if (keepers.find(bb) != keepers.end()) - continue; - - // if whitelist specified, only allow instrumentation for functions/addresses in whitelist - if (m_whitelist.size() > 0) - { - if (!isWhitelisted(bb->getInstructions()[0])) - { - continue; - } - } - - if (isBlacklisted(bb->getInstructions()[0])) - continue; - - // debugging support - if (getenv("ZAFL_LIMIT_BEGIN")) - { - if (bb_debug_id < atoi(getenv("ZAFL_LIMIT_BEGIN"))) - continue; - } - - // debugging support - if (getenv("ZAFL_LIMIT_END")) - { - if (bb_debug_id >= atoi(getenv("ZAFL_LIMIT_END"))) - continue; - } - - // make sure we're not trying to instrument code we just inserted, e.g., fork server, added exit points - if (bb->getInstructions()[0]->getBaseID() < 0) - continue; - - // push/jmp pair, don't bother instrumenting - if (BB_isPushJmp(bb)) - { - m_num_bb_skipped_pushjmp++; - continue; - } - - // padding nop, don't bother - if (BB_isPaddingNop(bb)) - { - m_num_bb_skipped_nop_padding++; - continue; - } - - // optimization: - // inner node: 1 predecessor and 1 successor - // - // predecessor has only 1 successor (namely this bb) - // bb has 1 predecessor - if (m_bb_graph_optimize) - { - if (bb->getSuccessors().size() == 2 && bb->endsInConditionalBranch()) - { - m_num_bb_skipped_cbranch++; - continue; - } -#ifdef DEPRECATE - auto point_to_self = false; - if (bb->getPredecessors().find(bb)!=bb->getPredecessors().end()) { - point_to_self = true; - } - if (bb->getPredecessors().size()==1 && !point_to_self) - { - if (bb->getSuccessors().size() == 1 && - (!bb->getInstructions()[0]->getIndirectBranchTargetAddress())) - { - cout << "Skipping bb #" << dec << bb_debug_id << " because inner node with 1 predecessor and 1 successor" << endl; - m_num_bb_skipped_innernode++; - continue; - } - - const auto pred = *(bb->getPredecessors().begin()); - if (pred->getSuccessors().size() == 1) - { - if (!bb->getInstructions()[0]->getIndirectBranchTargetAddress()) - { - cout << "Skipping bb #" << dec << bb_debug_id << " because not ibta, <1,*> and preds <*,1>" << endl; - m_num_bb_skipped_onlychild++; - continue; - } - - if (pred->GetIsExitBlock()) - { - m_num_bb_skipped_onlychild++; - cout << "Skipping bb #" << dec << bb_debug_id << " because ibta, <1,*> and preds(exit_block) <*,1>" << endl; - continue; - } - } - } - - // optimization conditional branch: - // elide conditional branch when no back edges - if (bb->getSuccessors().size() == 2 && isConditionalBranch(bb->getInstructions()[bb->getInstructions().size()-1])) - { - - if (hasBackEdge(bb)) - { - cout << "Keeping bb #" << dec << bb_debug_id << " conditional branch has back edge" << endl; - m_num_bb_keep_cbranch_back_edge++; - keepers.insert(bb); - continue; - } - - for (auto &s: bb->getSuccessors()) - { - if (s->GetIsExitBlock() || s->getSuccessors().size()==0) - { - m_num_bb_keep_exit_block++; - keepers.insert(s); - } - } - - cout << "Skipping bb #" << dec << bb_debug_id << " because conditional branch with 2 successors" << endl; - m_num_bb_skipped_cbranch++; - continue; - } -#endif - } - - keepers.insert(bb); - } - return keepers; -} - -/* - * Execute the transform. - * - * preconditions: the FileIR is read as from the IRDB. valid file listing functions to auto-initialize - * postcondition: instructions added to auto-initialize stack for each specified function - * - */ -int Zax_t::execute() -{ - setup(); - - // for all functions - // build cfg and extract basic blocks - // for all basic blocks, figure out whether should be kept - // for all kept basic blocks - // add afl-compatible instrumentation - - struct BaseIDSorter - { - bool operator()( const Function_t* lhs, const Function_t* rhs ) const - { - return lhs->getBaseID() < rhs->getBaseID(); - } - }; - auto sortedFuncs=set<Function_t*, BaseIDSorter>( ALLOF(getFileIR()->getFunctions())); - for(auto f : sortedFuncs) - { - if (f == nullptr ) continue; - // skip instrumentation for blacklisted functions - if (isBlacklisted(f)) continue; - // skip if function has no entry point - if (!f->getEntryPoint()) continue; - - bool leafAnnotation = true; - if (m_use_stars) - { - leafAnnotation = hasLeafAnnotation(f, m_stars_analysis_engine.getAnnotations()); - } - -// auto cfg=ControlFlowGraph_t(f); - auto cfgp = ControlFlowGraph_t::factory(f); - auto &cfg = *cfgp; - - const auto num_blocks_in_func = cfg.getBlocks().size(); - m_num_bb += num_blocks_in_func; - - - auto keepers = getBlocksToInstrument(cfg); - struct BBSorter - { - bool operator()( const BasicBlock_t* lhs, const BasicBlock_t* rhs ) const - { - const auto lhs_insns=lhs->getInstructions(); - const auto rhs_insns=rhs->getInstructions(); - assert(lhs_insns[0]->getBaseID() != BaseObj_t::NOT_IN_DATABASE); - assert(rhs_insns[0]->getBaseID() != BaseObj_t::NOT_IN_DATABASE); - return lhs_insns[0]->getBaseID() < rhs_insns[0]->getBaseID(); - } - }; - auto sortedBasicBlocks = set<BasicBlock_t*, BBSorter> (ALLOF(keepers)); - for (auto &bb : sortedBasicBlocks) - { - auto collAflSingleton = false; - // for collAfl-style instrumentation, we want #predecessors==1 - // if the basic block entry point is an IBTA, we don't know the #predecessors - if (m_bb_graph_optimize && - bb->getPredecessors().size() == 1 && - !bb->getInstructions()[0]->getIndirectBranchTargetAddress() - ) - { - collAflSingleton = true; - m_num_style_collafl++; - - } - - afl_instrument_bb(bb->getInstructions()[0], leafAnnotation, collAflSingleton); - } - - - m_num_bb_instrumented += keepers.size(); - m_num_bb_skipped += (num_blocks_in_func - keepers.size()); - - if (m_verbose) - { - cout << "Post transformation CFG:" << endl; -// auto post_cfg=ControlFlowGraph_t(f); - auto post_cfg=ControlFlowGraph_t::factory(f); - cout << *post_cfg << endl; - } - - cout << "Function " << f->getName() << ": " << dec << keepers.size() << "/" << num_blocks_in_func << " basic blocks instrumented." << endl; - }; - - teardown(); - - return 1; -} - -void Zax_t::dumpAttributes() -{ - cout << "#ATTRIBUTE num_bb=" << dec << m_num_bb << endl; - cout << "#ATTRIBUTE num_bb_instrumented=" << m_num_bb_instrumented << endl; - cout << "#ATTRIBUTE num_bb_skipped=" << m_num_bb_skipped << endl; - cout << "#ATTRIBUTE num_bb_skipped_pushjmp=" << m_num_bb_skipped_pushjmp << endl; - cout << "#ATTRIBUTE num_bb_skipped_nop_padding=" << m_num_bb_skipped_nop_padding << endl; - cout << "#ATTRIBUTE graph_optimize=" << boolalpha << m_bb_graph_optimize << endl; - if (m_bb_graph_optimize) - { - cout << "#ATTRIBUTE num_bb_skipped_cond_branch=" << m_num_bb_skipped_cbranch << endl; - cout << "#ATTRIBUTE num_bb_keep_cbranch_back_edge=" << m_num_bb_keep_cbranch_back_edge << endl; - cout << "#ATTRIBUTE num_bb_keep_exit_block=" << m_num_bb_keep_exit_block << endl; - cout << "#ATTRIBUTE num_style_collafl=" << m_num_style_collafl << endl; - cout << "#ATTRIBUTE num_bb_skipped_onlychild=" << m_num_bb_skipped_onlychild << endl; - cout << "#ATTRIBUTE num_bb_skipped_innernode=" << m_num_bb_skipped_innernode << endl; - } -} - -void Zax_t::dumpMap() -{ - // dump out modified basic block info - getFileIR()->setBaseIDS(); // make sure instructions have IDs - getFileIR()->assembleRegistry(); // make sure to assemble all instructions - - std::ofstream mapfile("zax.map"); - - mapfile << "# BLOCK_ID ID_EP:size ID_OLDEP:size (ID_INSTRUMENTATION:size)*" << endl; - for (auto &mb : m_modifiedBlocks) - { - const auto blockid = mb.first; - mapfile << dec << blockid << " "; - for (auto &entry : mb.second) - { - mapfile << hex << entry->getBaseID() << ":" << dec << entry->getDataBits().size() << " "; - } - mapfile << endl; - } -} diff --git a/afl_transforms/tools/zax/zax.hpp b/afl_transforms/tools/zax/zax.hpp index f8b0e2a..89be018 100644 --- a/afl_transforms/tools/zax/zax.hpp +++ b/afl_transforms/tools/zax/zax.hpp @@ -1,106 +1,28 @@ #ifndef _LIBTRANSFORM_ZAX_H #define _LIBTRANSFORM_ZAX_H -#include <irdb-core> -#include <irdb-cfg> -#include <irdb-transform> -#include <stars.h> -#include <MEDS_Register.hpp> - - -// utility functions -// @todo: move these functions into other libs for reuse -extern void create_got_reloc(IRDB_SDK::FileIR_t* fir, std::pair<IRDB_SDK::DataScoop_t*,int> wrt, IRDB_SDK::Instruction_t* i); -extern MEDS_Annotation::RegisterSet_t get_dead_regs(IRDB_SDK::Instruction_t* insn, MEDS_Annotation::MEDS_AnnotationParser &meds_ap_param); -extern MEDS_Annotation::RegisterSet_t get_free_regs(const MEDS_Annotation::RegisterSet_t candidates, const MEDS_Annotation::RegisterSet_t allowed); +#include "zax_base.hpp" namespace Zafl { - using namespace IRDB_SDK; - using namespace std; - typedef unsigned zafl_blockid_t; - typedef unsigned zafl_labelid_t; - typedef vector<Instruction_t*> BBRecord_t; - - // - // Transform to add afl-compatible instrumentation, including a fork server + // Implements afl-style edge coverage instrumentation // - class Zax_t : public Transform + class Zax_t : public ZaxBase_t { public: // explicitly disable default and copy constructors Zax_t() = delete; Zax_t(const Zafl::Zax_t&) = delete; - Zax_t(pqxxDB_t &p_dbinterface, FileIR_t *p_variantIR, string p_entry, set<string> p_exits, bool p_use_stars=false, bool p_autozafl=false, bool p_verbose=false); + Zax_t(pqxxDB_t &p_dbinterface, FileIR_t *p_variantIR, string p_entry, set<string> p_exits, bool p_use_stars=false, bool p_autozafl=false); virtual ~Zax_t() {}; - virtual int execute(); - void setWhitelist(const string& p_filename); - void setBlacklist(const string& p_filename); - void setBasicBlockOptimization(bool p_bb_graph_optimize) {m_bb_graph_optimize=p_bb_graph_optimize;} - void setEnableForkServer(bool p_forkserver_enabled) {m_forkserver_enabled=p_forkserver_enabled;} - void setBreakupCriticalEdges(const bool p_breakupCriticalEdges); protected: virtual zafl_blockid_t get_blockid(const unsigned p_maxid=0xFFFF); - virtual zafl_labelid_t get_labelid(const unsigned p_maxid=0xFFFF); - virtual set<BasicBlock_t*> getBlocksToInstrument(ControlFlowGraph_t &cfg); virtual void afl_instrument_bb(Instruction_t *inst, const bool p_hasLeafAnnotation, const bool p_collafl_optimization=false); - void insertExitPoint(Instruction_t *inst); - void insertForkServer(Instruction_t* p_entry); - void insertForkServer(string p_forkServerEntry); - void setupForkServer(); - void insertExitPoints(); - bool isBlacklisted(const Function_t*) const; - bool isWhitelisted(const Function_t*) const; - bool isBlacklisted(const Instruction_t*) const; - bool isWhitelisted(const Instruction_t*) const; - bool BB_isPushJmp(const BasicBlock_t *p_bb); - bool BB_isPaddingNop(const BasicBlock_t *p_bb); - virtual void setup(); - virtual void teardown(); - virtual void dumpMap(); - virtual void dumpAttributes(); - - protected: - pqxxDB_t& m_dbinterface; - STARS::IRDB_Interface_t m_stars_analysis_engine; - - string m_fork_server_entry; // string to specify fork server entry point - set<string> m_exitpoints; // set of strings to specify exit points - bool m_use_stars; // use STARS to have access to dead register info - bool m_autozafl; // link in library w/ auto fork server - bool m_bb_graph_optimize; // skip basic blocks based on graph - bool m_forkserver_enabled; // fork server enabled? - bool m_breakupCriticalEdges; - bool m_verbose; - - pair<DataScoop_t*,int> m_trace_map; // afl shared memory trace map - pair<DataScoop_t*,int> m_prev_id; // id of previous block - Instruction_t* m_plt_zafl_initAflForkServer; // plt entry for afl fork server initialization routine - - set<string> m_whitelist; // whitelisted functions and/or instructions - set<string> m_blacklist; // blacklisted functions and/or instructions - - zafl_labelid_t m_labelid; // internal bookkeeping to generate labels - - map<zafl_blockid_t, BBRecord_t> m_modifiedBlocks; // keep track of modified blocks - - // stats - unsigned m_num_bb; - unsigned m_num_bb_instrumented; - unsigned m_num_bb_skipped; - unsigned m_num_bb_skipped_pushjmp; - unsigned m_num_bb_skipped_nop_padding; - unsigned m_num_bb_skipped_innernode; - unsigned m_num_bb_skipped_cbranch; - unsigned m_num_bb_skipped_onlychild; - unsigned m_num_bb_keep_exit_block; - unsigned m_num_bb_keep_cbranch_back_edge; - unsigned m_num_style_collafl; private: - set<zafl_blockid_t> m_used_blockid; // internal bookkeeping to keep track of used block ids + set<zafl_blockid_t> m_used_blockid; // internal bookkeeping to keep track of used block ids }; } diff --git a/afl_transforms/tools/zax/zax_base.cpp b/afl_transforms/tools/zax/zax_base.cpp new file mode 100644 index 0000000..b64969a --- /dev/null +++ b/afl_transforms/tools/zax/zax_base.cpp @@ -0,0 +1,720 @@ +/*************************************************************************** + * Copyright (c) 2018-2019 Zephyr Software LLC. All rights reserved. + * + * This software is furnished under a license and/or other restrictive + * terms and may be used and copied only in accordance with such terms + * and the inclusion of the above copyright notice. This software or + * any other copies thereof may not be provided or otherwise made + * available to any other person without the express written consent + * of an authorized representative of Zephyr Software LCC. Title to, + * ownership of, and all rights in the software is retained by + * Zephyr Software LCC. + * + * Zephyr Software LLC. Proprietary Information + * + * Unless otherwise specified, the information contained in this + * directory, following this legend, and/or referenced herein is + * Zephyr Software LLC. (Zephyr) Proprietary Information. + * + * CONTACT INFO + * + * E-mail: jwd@zephyr-software.com + **************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <algorithm> +#include <cctype> +#include <sstream> +#include <irdb-cfg> +#include <irdb-transform> +#include <irdb-elfdep> +#include <MEDS_DeadRegAnnotation.hpp> +#include <MEDS_SafeFuncAnnotation.hpp> + +#include "zax_base.hpp" + +using namespace std; +using namespace IRDB_SDK; +using namespace Zafl; +using namespace MEDS_Annotation; + +#define ALLOF(a) begin(a),end(a) + +void create_got_reloc(FileIR_t* fir, pair<DataScoop_t*,int> wrt, Instruction_t* i) +{ + (void)fir->addNewRelocation(i,wrt.second, "pcrel", wrt.first); +} + +RegisterSet_t get_dead_regs(Instruction_t* insn, MEDS_AnnotationParser &meds_ap_param) +{ + std::pair<MEDS_Annotations_t::iterator,MEDS_Annotations_t::iterator> ret; + + /* find it in the annotations */ + ret = meds_ap_param.getAnnotations().equal_range(insn->getBaseID()); + MEDS_DeadRegAnnotation* p_annotation; + + /* for each annotation for this instruction */ + for (MEDS_Annotations_t::iterator it = ret.first; it != ret.second; ++it) + { + p_annotation=dynamic_cast<MEDS_DeadRegAnnotation*>(it->second); + if(p_annotation==NULL) + continue; + + /* bad annotation? */ + if(!p_annotation->isValid()) + continue; + + return p_annotation->getRegisterSet(); + } + + /* couldn't find the annotation, return an empty set.*/ + return RegisterSet_t(); +} + +// return intersection of candidates and allowed general-purpose registers +RegisterSet_t get_free_regs(const RegisterSet_t candidates, const RegisterSet_t allowed) +{ + std::set<RegisterName> free_regs; + set_intersection(candidates.begin(),candidates.end(),allowed.begin(),allowed.end(), + std::inserter(free_regs,free_regs.begin())); + return free_regs; +} + +static bool hasLeafAnnotation(Function_t* fn, MEDS_AnnotationParser &meds_ap_param) +{ + assert(fn); + const auto ret = meds_ap_param.getFuncAnnotations().equal_range(fn->getName()); + const auto sfa_it = find_if(ret.first, ret.second, [](const MEDS_Annotations_FuncPair_t &it) + { + auto p_annotation=dynamic_cast<MEDS_SafeFuncAnnotation*>(it.second); + if(p_annotation==NULL) + return false; + return p_annotation->isLeaf(); + } + ); + + return (sfa_it != ret.second); +} + +bool ZaxBase_t::BB_isPaddingNop(const BasicBlock_t *p_bb) const +{ + return p_bb->getInstructions().size()==1 && + p_bb->getPredecessors().size()==0 && + p_bb->getSuccessors().size()==1 && + p_bb->getInstructions()[0]->getDisassembly().find("nop")!=string::npos; +} + +bool ZaxBase_t::BB_isPushJmp(const BasicBlock_t *p_bb) const +{ + return p_bb->getInstructions().size()==2 && + p_bb->getInstructions()[0]->getDisassembly().find("push")!=string::npos && + p_bb->getInstructions()[1]->getDisassembly().find("jmp")!=string::npos; +} + +ZaxBase_t::ZaxBase_t(IRDB_SDK::pqxxDB_t &p_dbinterface, IRDB_SDK::FileIR_t *p_variantIR, string p_forkServerEntryPoint, set<string> p_exitPoints, bool p_use_stars, bool p_autozafl) + : + Transform(p_variantIR), + m_dbinterface(p_dbinterface), + m_stars_analysis_engine(p_dbinterface), + m_use_stars(p_use_stars), + m_autozafl(p_autozafl), + m_bb_graph_optimize(false), + m_forkserver_enabled(true), + m_breakupCriticalEdges(false), + m_fork_server_entry(p_forkServerEntryPoint), + m_exitpoints(p_exitPoints) +{ + if (m_use_stars) { + cout << "Use STARS analysis engine" << endl; + m_stars_analysis_engine.do_STARS(getFileIR()); + } + + auto ed=ElfDependencies_t::factory(getFileIR()); + if (p_autozafl) + { + cout << "autozafl library is on" << endl; + (void)ed->prependLibraryDepedencies("libautozafl.so"); + } + else + { + cout << "autozafl library is off" << endl; + (void)ed->prependLibraryDepedencies("libzafl.so"); + } + + m_verbose = false; + + // bind to external symbols declared in libzafl.so + m_plt_zafl_initAflForkServer=ed->appendPltEntry("zafl_initAflForkServer"); + m_trace_map = ed->appendGotEntry("zafl_trace_map"); + m_prev_id = ed->appendGotEntry("zafl_prev_id"); + + // let's not instrument these functions ever + // see isBlacklisted() for other blacklisted functions + m_blacklist.insert("init"); + m_blacklist.insert("_init"); + m_blacklist.insert("start"); + m_blacklist.insert("_start"); + m_blacklist.insert("fini"); + m_blacklist.insert("_fini"); + m_blacklist.insert("register_tm_clones"); + m_blacklist.insert("deregister_tm_clones"); + m_blacklist.insert("frame_dummy"); + m_blacklist.insert("__do_global_ctors_aux"); + m_blacklist.insert("__do_global_dtors_aux"); + m_blacklist.insert("__libc_csu_init"); + m_blacklist.insert("__libc_csu_fini"); + m_blacklist.insert("__libc_start_main"); + m_blacklist.insert("__gmon_start__"); + m_blacklist.insert("__cxa_atexit"); + m_blacklist.insert("__cxa_finalize"); + m_blacklist.insert("__assert_fail"); + m_blacklist.insert("free"); + m_blacklist.insert("fnmatch"); + m_blacklist.insert("readlinkat"); + m_blacklist.insert("malloc"); + m_blacklist.insert("calloc"); + m_blacklist.insert("realloc"); + m_blacklist.insert("argp_failure"); + m_blacklist.insert("argp_help"); + m_blacklist.insert("argp_state_help"); + m_blacklist.insert("argp_error"); + m_blacklist.insert("argp_parse"); + + m_labelid = 0; + m_blockid = 0; + + m_num_bb = 0; + m_num_bb_instrumented = 0; + m_num_bb_skipped = 0; + m_num_bb_skipped_pushjmp = 0; + m_num_bb_skipped_nop_padding = 0; + m_num_bb_skipped_innernode = 0; + m_num_bb_skipped_cbranch = 0; + m_num_bb_skipped_onlychild = 0; + m_num_bb_keep_exit_block = 0; + m_num_bb_keep_cbranch_back_edge = 0; + m_num_style_collafl = 0; +} + +void ZaxBase_t::setVerbose(bool p_verbose) +{ + m_verbose = p_verbose; +} + +void ZaxBase_t::setBasicBlockOptimization(bool p_bb_graph_optimize) +{ + m_bb_graph_optimize = p_bb_graph_optimize; +} + +void ZaxBase_t::setEnableForkServer(bool p_forkserver_enabled) +{ + m_forkserver_enabled = p_forkserver_enabled; +} + +void ZaxBase_t::setBreakupCriticalEdges(bool p_breakupEdges) +{ + m_breakupCriticalEdges = p_breakupEdges; +} + +/* + * Only allow instrumentation in whitelisted functions/instructions + * Each line in file is either a function name or address + */ +void ZaxBase_t::setWhitelist(const string& p_whitelist) +{ + std::ifstream whitelistFile(p_whitelist); + if (!whitelistFile.is_open()) + throw; + std::string line; + while(whitelistFile >> line) + { + cout <<"Adding " << line << " to white list" << endl; + m_whitelist.insert(line); + } + whitelistFile.close(); +} + +/* + * Disallow instrumentation in blacklisted functions/instructions + * Each line in file is either a function name or address + */ +void ZaxBase_t::setBlacklist(const string& p_blackList) +{ + std::ifstream blackListFile(p_blackList); + if (!blackListFile.is_open()) + throw; + std::string line; + while(blackListFile >> line) + { + cout <<"Adding " << line << " to black list" << endl; + m_blacklist.insert(line); + } + blackListFile.close(); +} + +zafl_labelid_t ZaxBase_t::get_labelid(const unsigned p_max) +{ + return m_labelid++; +} + +zafl_blockid_t ZaxBase_t::get_blockid(const unsigned p_max) +{ + m_blockid = (m_blockid+1) % p_max; + return m_blockid; +} + +void ZaxBase_t::insertExitPoint(Instruction_t *p_inst) +{ + assert(p_inst->getAddress()->getVirtualOffset()); + + if (p_inst->getFunction()) + cout << "in function: " << p_inst->getFunction()->getName() << " "; + + stringstream ss; + ss << hex << p_inst->getAddress()->getVirtualOffset(); + m_blacklist.insert(ss.str()); + + cout << "insert exit point at: 0x" << ss.str() << endl; + + auto tmp = p_inst; + insertAssemblyBefore(tmp, "xor edi, edi"); // rdi=0 + tmp = insertAssemblyAfter(tmp, "mov eax, 231"); // 231 = __NR_exit_group from <asm/unistd_64.h> + tmp = insertAssemblyAfter(tmp, "syscall"); // sys_exit_group(edi) +} + +void ZaxBase_t::insertForkServer(Instruction_t* p_entry) +{ + assert(p_entry); + + stringstream ss; + ss << "0x" << hex << p_entry->getAddress()->getVirtualOffset(); + cout << "inserting fork server code at address: " << ss.str() << dec << endl; + assert(p_entry->getAddress()->getVirtualOffset()); + + if (p_entry->getFunction()) { + cout << " function: " << p_entry->getFunction()->getName(); + cout << " ep instr: " << p_entry->getDisassembly() << endl; + } + cout << endl; + + // blacklist insertion point + cout << "Blacklisting entry point: " << ss.str() << endl; + m_blacklist.insert(ss.str()); + + // insert the instrumentation + auto tmp=p_entry; + const auto regs = vector<string>({ "rdi", "rsi", "rbp", "rdx", "rcx", "rbx", "rax", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"}); + + // red zone + (void)insertAssemblyBefore(tmp, "lea rsp, [rsp-128]"); + // save flags and registrers + tmp = insertAssemblyAfter(tmp, "pushf ") ; + for (vector<string>::const_iterator rit = regs.begin(); rit != regs.end(); ++rit) + tmp = insertAssemblyAfter(tmp, " push " + *rit); + // call fork server initialization routine (in external library) + tmp = insertAssemblyAfter(tmp, "call 0 ", m_plt_zafl_initAflForkServer) ; + // restore registers and flags + for (vector<string>::const_reverse_iterator rit = regs.rbegin(); rit != regs.rend(); ++rit) + tmp = insertAssemblyAfter(tmp, " pop " + *rit) ; + tmp = insertAssemblyAfter(tmp, "popf ") ; + // red zome + tmp = insertAssemblyAfter(tmp, "lea rsp, [rsp+128]"); +} + +void ZaxBase_t::insertForkServer(string p_forkServerEntry) +{ + assert(p_forkServerEntry.size() > 0); + + cout << "looking for fork server entry point: " << p_forkServerEntry << endl; + + if (std::isdigit(p_forkServerEntry[0])) + { + // find instruction to insert fork server based on address + const auto voffset = (VirtualOffset_t) std::strtoul(p_forkServerEntry.c_str(), NULL, 16); + auto instructions=find_if(getFileIR()->getInstructions().begin(), getFileIR()->getInstructions().end(), [&](const Instruction_t* i) { + return i->getAddress()->getVirtualOffset()==voffset; + }); + + if (instructions==getFileIR()->getInstructions().end()) + { + cerr << "Error: could not find address to insert fork server: " << p_forkServerEntry << endl; + throw; + } + + insertForkServer(*instructions); + } + else + { + // find entry point of specified function to insert fork server + auto entryfunc=find_if(getFileIR()->getFunctions().begin(), getFileIR()->getFunctions().end(), [&](const Function_t* f) { + return f->getName()==p_forkServerEntry; + }); + + + if(entryfunc==getFileIR()->getFunctions().end()) + { + cerr << "Error: could not find function to insert fork server: " << p_forkServerEntry << endl; + throw; + } + + cout << "inserting fork server code at entry point of function: " << p_forkServerEntry << endl; + auto entrypoint = (*entryfunc)->getEntryPoint(); + + if (!entrypoint) + { + cerr << "Could not find entry point for: " << p_forkServerEntry << endl; + throw; + } + insertForkServer(entrypoint); + } +} + +void ZaxBase_t::setupForkServer() +{ + if (m_fork_server_entry.size()>0) + { + // user has specified entry point + insertForkServer(m_fork_server_entry); + } + else + { + // try to insert fork server at main + const auto &all_funcs=getFileIR()->getFunctions(); + const auto main_func_it=find_if(all_funcs.begin(), all_funcs.end(), [&](const Function_t* f) { return f->getName()=="main";}); + if(main_func_it!=all_funcs.end()) + { + insertForkServer("main"); + } + + } + + // it's ok not to have a fork server at all, e.g. libraries +} + +void ZaxBase_t::insertExitPoints() +{ + for (auto exitp : m_exitpoints) + { + if (std::isdigit(exitp[0])) + { + // find instruction to insert fork server based on address + const auto voffset = (VirtualOffset_t) std::strtoul(exitp.c_str(), NULL, 16); + auto instructions=find_if(getFileIR()->getInstructions().begin(), getFileIR()->getInstructions().end(), [&](const Instruction_t* i) { + return i->getAddress()->getVirtualOffset()==voffset; + }); + + if (instructions==getFileIR()->getInstructions().end()) + { + cerr << "Error: could not find address to insert exit point: " << exitp << endl; + throw; + } + + insertExitPoint(*instructions); + } + else + { + // find function by name + auto func_iter=find_if(getFileIR()->getFunctions().begin(), getFileIR()->getFunctions().end(), [&](const Function_t* f) { + return f->getName()==exitp; + }); + + + if(func_iter==getFileIR()->getFunctions().end()) + { + cerr << "Error: could not find function to insert exit points: " << exitp << endl; + throw; + } + + cout << "inserting exit code at return points of function: " << exitp << endl; + for (auto i : (*func_iter)->getInstructions()) + { + if (i->getBaseID() >= 0) + { + const auto d=DecodedInstruction_t::factory(i); + + // if it's a return instruction, add exit point + if (d->isReturn()) + { + insertExitPoint(i); + } + } + } + } + } +} + +// blacklist functions: +// - in blacklist +// - that start with '.' +// - that end with @plt +bool ZaxBase_t::isBlacklisted(const Function_t *p_func) const +{ + return (p_func->getName()[0] == '.' || + p_func->getName().find("@plt") != string::npos || + m_blacklist.find(p_func->getName())!=m_blacklist.end()); +} + +bool ZaxBase_t::isWhitelisted(const Function_t *p_func) const +{ + if (m_whitelist.size() == 0) return true; + return (m_whitelist.find(p_func->getName())!=m_whitelist.end()); +} + +bool ZaxBase_t::isBlacklisted(const Instruction_t *p_inst) const +{ + stringstream ss; + ss << "0x" << hex << p_inst->getAddress()->getVirtualOffset(); + return (m_blacklist.count(ss.str()) > 0 || isBlacklisted(p_inst->getFunction())); +} + +bool ZaxBase_t::isWhitelisted(const Instruction_t *p_inst) const +{ + if (m_whitelist.size() == 0) return true; + + stringstream ss; + ss << "0x" << hex << p_inst->getAddress()->getVirtualOffset(); + return (m_whitelist.count(ss.str()) > 0 || isWhitelisted(p_inst->getFunction())); +} + +void ZaxBase_t::setup() +{ + if (m_forkserver_enabled) + setupForkServer(); + else + cout << "Fork server has been disabled" << endl; + + insertExitPoints(); +} + +void ZaxBase_t::teardown() +{ + dumpAttributes(); + dumpMap(); +} + +// in: control flow graph for a given function +// out: set of basic blocks to instrument +set<BasicBlock_t*> ZaxBase_t::getBlocksToInstrument(ControlFlowGraph_t &cfg) +{ + static int bb_debug_id=-1; + + if (m_verbose) + cout << cfg << endl; + + auto keepers = set<BasicBlock_t*>(); + + for (auto &bb : cfg.getBlocks()) + { + assert(bb->getInstructions().size() > 0); + + bb_debug_id++; + + // already marked as a keeper + if (keepers.find(bb) != keepers.end()) + continue; + + // if whitelist specified, only allow instrumentation for functions/addresses in whitelist + if (!isWhitelisted(bb->getInstructions()[0])) + continue; + + if (isBlacklisted(bb->getInstructions()[0])) + continue; + + // debugging support + if (getenv("ZAFL_LIMIT_BEGIN")) + { + if (bb_debug_id < atoi(getenv("ZAFL_LIMIT_BEGIN"))) + continue; + } + + // debugging support + if (getenv("ZAFL_LIMIT_END")) + { + if (bb_debug_id >= atoi(getenv("ZAFL_LIMIT_END"))) + continue; + } + + // make sure we're not trying to instrument code we just inserted, e.g., fork server, added exit points + if (bb->getInstructions()[0]->getBaseID() < 0) + continue; + + // push/jmp pair, don't bother instrumenting + if (BB_isPushJmp(bb)) + { + m_num_bb_skipped_pushjmp++; + continue; + } + + // padding nop, don't bother + if (BB_isPaddingNop(bb)) + { + m_num_bb_skipped_nop_padding++; + continue; + } + + // optimization: + // inner node: 1 predecessor and 1 successor + // + // predecessor has only 1 successor (namely this bb) + // bb has 1 predecessor + if (m_bb_graph_optimize) + { + if (bb->getSuccessors().size() == 2 && bb->endsInConditionalBranch()) + { + // for now, until we get a more principled way of pruning the graph, + // make sure to keep both successors + for (auto next_bb : bb->getSuccessors()) + keepers.insert(next_bb); + m_num_bb_skipped_cbranch++; + continue; + } + } + + keepers.insert(bb); + } + return keepers; +} + +// by default, return the first instruction in block +Instruction_t* ZaxBase_t::getInstructionToInstrument(const BasicBlock_t *p_bb) +{ + if (!p_bb) return nullptr; + return p_bb->getInstructions()[0]; +} + +/* + * Execute the transform. + * + * preconditions: the FileIR is read as from the IRDB. valid file listing functions to auto-initialize + * postcondition: instructions added to auto-initialize stack for each specified function + * + */ +int ZaxBase_t::execute() +{ + setup(); + + // for all functions + // build cfg and extract basic blocks + // for all basic blocks, figure out whether should be kept + // for all kept basic blocks + // add afl-compatible instrumentation + + struct BaseIDSorter + { + bool operator()( const Function_t* lhs, const Function_t* rhs ) const + { + return lhs->getBaseID() < rhs->getBaseID(); + } + }; + auto sortedFuncs=set<Function_t*, BaseIDSorter>( ALLOF(getFileIR()->getFunctions())); + for(auto f : sortedFuncs) + { + if (f == nullptr ) continue; + // skip instrumentation for blacklisted functions + if (isBlacklisted(f)) continue; + // skip if function has no entry point + if (!f->getEntryPoint()) continue; + + bool leafAnnotation = true; + if (m_use_stars) + { + leafAnnotation = hasLeafAnnotation(f, m_stars_analysis_engine.getAnnotations()); + } + + auto cfgp = ControlFlowGraph_t::factory(f); + auto &cfg = *cfgp; + + const auto num_blocks_in_func = cfg.getBlocks().size(); + m_num_bb += num_blocks_in_func; + + auto keepers = getBlocksToInstrument(cfg); + struct BBSorter + { + bool operator()( const BasicBlock_t* lhs, const BasicBlock_t* rhs ) const + { + const auto lhs_insns=lhs->getInstructions(); + const auto rhs_insns=rhs->getInstructions(); + assert(lhs_insns[0]->getBaseID() != BaseObj_t::NOT_IN_DATABASE); + assert(rhs_insns[0]->getBaseID() != BaseObj_t::NOT_IN_DATABASE); + return lhs_insns[0]->getBaseID() < rhs_insns[0]->getBaseID(); + } + }; + auto sortedBasicBlocks = set<BasicBlock_t*, BBSorter> (ALLOF(keepers)); + for (auto &bb : sortedBasicBlocks) + { + auto collAflSingleton = false; + // for collAfl-style instrumentation, we want #predecessors==1 + // if the basic block entry point is an IBTA, we don't know the #predecessors + if (m_bb_graph_optimize && + bb->getPredecessors().size() == 1 && + !bb->getInstructions()[0]->getIndirectBranchTargetAddress() + ) + { + collAflSingleton = true; + m_num_style_collafl++; + + } + + auto instruction = getInstructionToInstrument(bb); + if (instruction) + afl_instrument_bb(instruction, leafAnnotation, collAflSingleton); + } + + m_num_bb_instrumented += keepers.size(); + m_num_bb_skipped += (num_blocks_in_func - keepers.size()); + + if (m_verbose) + { + cout << "Post transformation CFG:" << endl; + auto post_cfg=ControlFlowGraph_t::factory(f); + cout << *post_cfg << endl; + } + + cout << "Function " << f->getName() << ": " << dec << keepers.size() << "/" << num_blocks_in_func << " basic blocks instrumented." << endl; + }; + + teardown(); + + return 1; +} + +void ZaxBase_t::dumpAttributes() +{ + cout << "#ATTRIBUTE num_bb=" << dec << m_num_bb << endl; + cout << "#ATTRIBUTE num_bb_instrumented=" << m_num_bb_instrumented << endl; + cout << "#ATTRIBUTE num_bb_skipped=" << m_num_bb_skipped << endl; + cout << "#ATTRIBUTE num_bb_skipped_pushjmp=" << m_num_bb_skipped_pushjmp << endl; + cout << "#ATTRIBUTE num_bb_skipped_nop_padding=" << m_num_bb_skipped_nop_padding << endl; + cout << "#ATTRIBUTE graph_optimize=" << boolalpha << m_bb_graph_optimize << endl; + if (m_bb_graph_optimize) + { + cout << "#ATTRIBUTE num_bb_skipped_cond_branch=" << m_num_bb_skipped_cbranch << endl; + cout << "#ATTRIBUTE num_bb_keep_cbranch_back_edge=" << m_num_bb_keep_cbranch_back_edge << endl; + cout << "#ATTRIBUTE num_bb_keep_exit_block=" << m_num_bb_keep_exit_block << endl; + cout << "#ATTRIBUTE num_style_collafl=" << m_num_style_collafl << endl; + cout << "#ATTRIBUTE num_bb_skipped_onlychild=" << m_num_bb_skipped_onlychild << endl; + cout << "#ATTRIBUTE num_bb_skipped_innernode=" << m_num_bb_skipped_innernode << endl; + } +} + +// file dump of modified basic block info +void ZaxBase_t::dumpMap() +{ + getFileIR()->setBaseIDS(); // make sure instructions have IDs + getFileIR()->assembleRegistry(); // make sure to assemble all instructions + + std::ofstream mapfile("zax.map"); + + mapfile << "# BLOCK_ID ID_EP:size ID_OLDEP:size (ID_INSTRUMENTATION:size)*" << endl; + for (auto &mb : m_modifiedBlocks) + { + const auto blockid = mb.first; + mapfile << dec << blockid << " "; + for (auto &entry : mb.second) + { + mapfile << hex << entry->getBaseID() << ":" << dec << entry->getDataBits().size() << " "; + } + mapfile << endl; + } +} diff --git a/afl_transforms/tools/zax/zax_base.hpp b/afl_transforms/tools/zax/zax_base.hpp new file mode 100644 index 0000000..a5fd1a1 --- /dev/null +++ b/afl_transforms/tools/zax/zax_base.hpp @@ -0,0 +1,111 @@ +#ifndef _LIBTRANSFORM_ZAXBASE_H +#define _LIBTRANSFORM_ZAXBASE_H + +#include <irdb-core> +#include <irdb-cfg> +#include <irdb-transform> +#include <stars.h> +#include <MEDS_Register.hpp> + + +// utility functions +// @todo: move these functions into other libs for reuse +extern void create_got_reloc(IRDB_SDK::FileIR_t* fir, std::pair<IRDB_SDK::DataScoop_t*,int> wrt, IRDB_SDK::Instruction_t* i); +extern MEDS_Annotation::RegisterSet_t get_dead_regs(IRDB_SDK::Instruction_t* insn, MEDS_Annotation::MEDS_AnnotationParser &meds_ap_param); +extern MEDS_Annotation::RegisterSet_t get_free_regs(const MEDS_Annotation::RegisterSet_t candidates, const MEDS_Annotation::RegisterSet_t allowed); + +namespace Zafl +{ + using namespace IRDB_SDK; + using namespace std; + using zafl_blockid_t = unsigned; + using zafl_labelid_t = unsigned; + using BBRecord_t = vector<Instruction_t*>; + + /* + * Base class for afl-compatible instrumentation: + * - fork server + * - trace map + */ + class ZaxBase_t : public Transform + { + public: + ZaxBase_t() = delete; + ZaxBase_t(const Zafl::ZaxBase_t&) = delete; + virtual ~ZaxBase_t() {}; + virtual int execute(); + void setWhitelist(const string& p_filename); + void setBlacklist(const string& p_filename); + void setVerbose(bool); + void setBasicBlockOptimization(bool); + void setEnableForkServer(bool); + void setBreakupCriticalEdges(bool); + + protected: + ZaxBase_t(pqxxDB_t &p_dbinterface, FileIR_t *p_variantIR, string p_entry, set<string> p_exits, bool p_use_stars=false, bool p_autozafl=false); + + virtual void afl_instrument_bb(Instruction_t *inst, const bool p_hasLeafAnnotation, const bool p_collafl_optimization=false) = 0; + + virtual zafl_blockid_t get_blockid(const unsigned p_maxid=0xFFFF); + virtual zafl_labelid_t get_labelid(const unsigned p_maxid=0xFFFF); + virtual set<BasicBlock_t*> getBlocksToInstrument(ControlFlowGraph_t &cfg); + virtual Instruction_t* getInstructionToInstrument(const BasicBlock_t *p_bb); + virtual void setup(); + virtual void teardown(); + virtual void dumpAttributes(); + virtual void dumpMap(); + + void insertExitPoint(Instruction_t *inst); + void insertForkServer(Instruction_t* p_entry); + void insertForkServer(string p_forkServerEntry); + void setupForkServer(); + void insertExitPoints(); + bool isBlacklisted(const Function_t*) const; + bool isWhitelisted(const Function_t*) const; + bool isBlacklisted(const Instruction_t*) const; + bool isWhitelisted(const Instruction_t*) const; + bool BB_isPushJmp(const BasicBlock_t *p_bb) const; + bool BB_isPaddingNop(const BasicBlock_t *p_bb) const; + + protected: + pqxxDB_t& m_dbinterface; + STARS::IRDB_Interface_t m_stars_analysis_engine; + + bool m_use_stars; // use STARS to have access to dead register info + bool m_autozafl; // link in library w/ auto fork server + bool m_bb_graph_optimize; // skip basic blocks based on graph + bool m_forkserver_enabled; // fork server enabled? + bool m_breakupCriticalEdges; + bool m_verbose; + + pair<DataScoop_t*,int> m_trace_map; // afl shared memory trace map + pair<DataScoop_t*,int> m_prev_id; // id of previous block + Instruction_t* m_plt_zafl_initAflForkServer; // plt entry for afl fork server initialization routine + + map<zafl_blockid_t, BBRecord_t> m_modifiedBlocks; // keep track of modified blocks + + // stats + unsigned m_num_bb; + unsigned m_num_bb_instrumented; + unsigned m_num_bb_skipped; + unsigned m_num_bb_skipped_pushjmp; + unsigned m_num_bb_skipped_nop_padding; + unsigned m_num_bb_skipped_innernode; + unsigned m_num_bb_skipped_cbranch; + unsigned m_num_bb_skipped_onlychild; + unsigned m_num_bb_keep_exit_block; + unsigned m_num_bb_keep_cbranch_back_edge; + unsigned m_num_style_collafl; + + private: + string m_fork_server_entry; // string to specify fork server entry point + set<string> m_exitpoints; // set of strings to specify exit points + set<string> m_whitelist; // whitelisted functions and/or instructions + set<string> m_blacklist; // blacklisted functions and/or instructions + zafl_labelid_t m_labelid; // internal bookkeeping to generate labels + zafl_blockid_t m_blockid; // internal bookkeeping to generate labels + + }; +} + +#endif diff --git a/afl_transforms/tools/zax/zax_driver.cpp b/afl_transforms/tools/zax/zax_driver.cpp index aef0f15..261cb4f 100644 --- a/afl_transforms/tools/zax/zax_driver.cpp +++ b/afl_transforms/tools/zax/zax_driver.cpp @@ -188,16 +188,19 @@ int main(int argc, char **argv) try { - auto zax_raw= - untracer_mode ? new ZUntracer_t(*pqxx_interface, firp.get(), entry_fork_server, exitpoints, use_stars, autozafl, verbose) : - new Zax_t(*pqxx_interface, firp.get(), entry_fork_server, exitpoints, use_stars, autozafl, verbose); - auto zax=unique_ptr<Zax_t>(zax_raw); + ZaxBase_t* zax_raw; + if (untracer_mode) + zax_raw = new ZUntracer_t(*pqxx_interface, firp.get(), entry_fork_server, exitpoints, use_stars, autozafl); + else + zax_raw = new Zax_t(*pqxx_interface, firp.get(), entry_fork_server, exitpoints, use_stars, autozafl); + auto zax = unique_ptr<ZaxBase_t>(zax_raw); if (whitelistFile.size()>0) zax->setWhitelist(whitelistFile); if (blacklistFile.size()>0) zax->setBlacklist(blacklistFile); + zax->setVerbose(verbose); zax->setBasicBlockOptimization(bb_graph_optimize); zax->setEnableForkServer(forkserver_enabled); zax->setBreakupCriticalEdges(breakup_critical_edges); diff --git a/afl_transforms/tools/zax/zuntracer.cpp b/afl_transforms/tools/zax/zuntracer.cpp index 076c49f..4c8c3e4 100644 --- a/afl_transforms/tools/zax/zuntracer.cpp +++ b/afl_transforms/tools/zax/zuntracer.cpp @@ -4,17 +4,8 @@ using namespace Zafl; using namespace MEDS_Annotation; -ZUntracer_t::ZUntracer_t(IRDB_SDK::pqxxDB_t &p_dbinterface, IRDB_SDK::FileIR_t *p_variantIR, string p_forkServerEntryPoint, set<string> p_exitPoints, bool p_use_stars, bool p_autozafl, bool p_verbose) : Zax_t(p_dbinterface, p_variantIR, p_forkServerEntryPoint, p_exitPoints, p_use_stars, p_autozafl, p_verbose) +ZUntracer_t::ZUntracer_t(IRDB_SDK::pqxxDB_t &p_dbinterface, IRDB_SDK::FileIR_t *p_variantIR, string p_forkServerEntryPoint, set<string> p_exitPoints, bool p_use_stars, bool p_autozafl) : ZaxBase_t(p_dbinterface, p_variantIR, p_forkServerEntryPoint, p_exitPoints, p_use_stars, p_autozafl) { - m_blockid = 0; -} - -zafl_blockid_t ZUntracer_t::get_blockid(const unsigned p_max) -{ -// assert (m_blockid < p_max); -// @todo: issue warning when wrapping around - m_blockid = (m_blockid+1) % p_max; - return m_blockid; } void ZUntracer_t::afl_instrument_bb(Instruction_t *p_inst, const bool p_redZoneHint, const bool p_collafl_optimization) @@ -170,13 +161,8 @@ set<BasicBlock_t*> ZUntracer_t::getBlocksToInstrument(ControlFlowGraph_t &cfg) continue; // if whitelist specified, only allow instrumentation for functions/addresses in whitelist - if (m_whitelist.size() > 0) - { - if (!isWhitelisted(bb->getInstructions()[0])) - { - continue; - } - } + if (!isWhitelisted(bb->getInstructions()[0])) + continue; if (isBlacklisted(bb->getInstructions()[0])) continue; @@ -247,6 +233,6 @@ int ZUntracer_t::execute() getFileIR()->assembleRegistry(); } - return Zax_t::execute(); + return ZaxBase_t::execute(); } diff --git a/afl_transforms/tools/zax/zuntracer.hpp b/afl_transforms/tools/zax/zuntracer.hpp index 1568b17..d664a39 100644 --- a/afl_transforms/tools/zax/zuntracer.hpp +++ b/afl_transforms/tools/zax/zuntracer.hpp @@ -1,7 +1,7 @@ #ifndef _LIBTRANSFORM_ZUNTRACER_H #define _LIBTRANSFORM_ZUNTRACER_H -#include "zax.hpp" +#include "zax_base.hpp" namespace Zafl { @@ -9,26 +9,22 @@ namespace Zafl using namespace IRDB_SDK; // Block-level instrumentation for Untracer - class ZUntracer_t : public Zax_t + class ZUntracer_t : public ZaxBase_t { public: ZUntracer_t() = delete; ZUntracer_t(const ZUntracer_t&) = delete; - ZUntracer_t(IRDB_SDK::pqxxDB_t &p_dbinterface, IRDB_SDK::FileIR_t *p_variantIR, string p_entry, set<string> p_exits, bool p_use_stars=false, bool p_autozafl=false, bool p_verbose=false); + ZUntracer_t(IRDB_SDK::pqxxDB_t &p_dbinterface, IRDB_SDK::FileIR_t *p_variantIR, string p_entry, set<string> p_exits, bool p_use_stars=false, bool p_autozafl=false); virtual ~ZUntracer_t() {}; virtual int execute(); protected: - virtual zafl_blockid_t get_blockid(const unsigned p_maxid = 0xFFFF); virtual void afl_instrument_bb(Instruction_t *p_inst, const bool p_hasLeafAnnotation, const bool p_collafl_optimization=false); virtual set<BasicBlock_t*> getBlocksToInstrument(ControlFlowGraph_t &cfg); private: void _afl_instrument_bb_fixed(Instruction_t *p_inst, char* p_tracemap_addr); void _afl_instrument_bb(Instruction_t *p_inst, const bool p_redZoneHint); - - private: - zafl_blockid_t m_blockid; }; } -- GitLab