// // SMPDataFlowAnalysis.cpp // // This module performs the fundamental data flow analyses needed for the // SMP project (Software Memory Protection). // #include <vector> #include <pro.h> #include <ida.hpp> #include <idp.hpp> #include <allins.hpp> #include <auto.hpp> #include <bytes.hpp> #include <funcs.hpp> #include <intel.hpp> #include <loader.hpp> #include <lines.hpp> #include <name.hpp> #include "SMPDataFlowAnalysis.h" #include "SMPStaticAnalyzer.h" // Set to 1 for debugging output #define SMP_DEBUG 1 #define SMP_DEBUG2 0 // verbose #define SMP_DEBUG3 0 // verbose #define SMP_DEBUG_CONTROLFLOW 0 // tells what processing stage is entered #define SMP_DEBUG_XOR 0 #define SMP_DEBUG_CHUNKS 1 // tracking down tail chunks for functions // Used for binary search by function number in SMPStaticAnalyzer.cpp // to trigger debugging output and find which instruction in which // function is causing a crash. bool SMPBinaryDebug = false; // Define instruction categories for data flow analysis. static SMPitype DFACategory[NN_last+1]; static char *RegNames[R_of + 1] = { "EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI", "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15", "AL", "CL", "DL", "BL", "AH", "CH", "DH", "BH", "SPL", "BPL", "SIL", "DIL", "EIP", "ES", "CS", "SS", "DS", "FS", "GS", "CF", "ZF", "SF", "OF" }; // Make the CF_CHG1 .. CF_CHG6 and CF_USE1..CF_USE6 macros more usable // by allowing us to pick them up with an array index. static ulong DefMacros[UA_MAXOP] = {CF_CHG1, CF_CHG2, CF_CHG3, CF_CHG4, CF_CHG5, CF_CHG6}; static ulong UseMacros[UA_MAXOP] = {CF_USE1, CF_USE2, CF_USE3, CF_USE4, CF_USE5, CF_USE6}; // Text to be printed in each optimizing annotation explaining why // the annotation was emitted. static char *OptExplanation[LAST_OPT_CATEGORY + 1] = { "NoOpt", "NoMetaUpdate", "AlwaysNUM", "NUMVia2ndSrcIMMEDNUM", "Always1stSrc", "1stSrcVia2ndSrcIMMEDNUM", "AlwaysPtr", "AlwaysNUM", "AlwaysNUM", "NUMViaFPRegDest" }; // ***************************************************************** // Class DefOrUseList // ***************************************************************** // Default constructor. DefOrUseList::DefOrUseList(void) { return; } // Set a Def or Use into the list, along with its type. void DefOrUseList::SetRef(op_t Ref, SMPOperandType Type) { this->Refs.push_back(Ref); this->Types.push_back(Type); return; } // Get a reference by index. op_t DefOrUseList::GetRef(size_t index) const { return Refs[index]; } SMPOperandType DefOrUseList::GetRefType(size_t index) const { return Types[index]; } // ***************************************************************** // Class SMPInstr // ***************************************************************** // Constructor for instruction. SMPInstr::SMPInstr(ea_t addr) { this->address = addr; this->analyzed = false; this->JumpTarget = false; return; } // Is the instruction the type that terminates a basic block? bool SMPInstr::IsBasicBlockTerminator() const { return ((type == JUMP) || (type == COND_BRANCH) || (type == INDIR_JUMP) || (type == RETURN)); } // Is the destination operand a memory reference? bool SMPInstr::HasDestMemoryOperand(void) const { bool MemDest = false; for (size_t index = 0; index < Defs.GetSize(); ++index) { MemDest = ((Defs.GetRef(index).type == o_mem) || (Defs.GetRef(index).type == o_phrase) || (Defs.GetRef(index).type == o_displ)); if (MemDest) break; } return MemDest; } // end of SMPInstr::HasDestMemoryOperand() // Is the destination operand a memory reference? bool SMPInstr::HasSourceMemoryOperand(void) const { bool MemSrc = false; for (size_t index = 0; index < Uses.GetSize(); ++index) { MemSrc = ((Uses.GetRef(index).type == o_mem) || (Uses.GetRef(index).type == o_phrase) || (Uses.GetRef(index).type == o_displ)); if (MemSrc) break; } return MemSrc; } // end of SMPInstr::HasSourceMemoryOperand() // Does the instruction whose flags are in F have a numeric type // as the second source operand? // NOTE: We can only analyze immediate values now, using a heuristic // that values in the range +/- 8K are numeric and others are // probably addresses. When data flow analyses are implemented, // we will be able to analyze many non-immediate operands. #define IMMEDNUM_LOWER -8191 #define IMMEDNUM_UPPER 8191 bool SMPInstr::IsSecondSrcOperandNumeric(flags_t F) const { bool SecondOpImm = (SMPcmd.Operands[1].type == o_imm); signed long TempImm; if (SecondOpImm) { TempImm = (signed long) SMPcmd.Operands[1].value; } #if SMP_DEBUG if (SecondOpImm && (0 > TempImm)) { #if 0 msg("Negative immediate: %d Hex: %x ASM: %s\n", TempImm, SMPcmd.Operands[1].value, disasm); #endif } else if ((!SecondOpImm) && (SMPcmd.Operands[1].type == o_imm)) { msg("Problem with flags on immediate src operand: %s\n", disasm); } #endif return (SecondOpImm && (TempImm > IMMEDNUM_LOWER) && (TempImm < IMMEDNUM_UPPER)); } // end of SMPInstr::IsSecondSrcOperandNumeric() // DEBUG Print DEF and/or USE for an operand. void PrintDefUse(ulong feature, int OpNum) { // CF_ macros number the operands from 1 to 6, while OpNum // is a 0 to 5 index into the insn_t.Operands[] array. switch (OpNum) { case 0: if (feature & CF_CHG1) msg(" DEF"); if (feature & CF_USE1) msg(" USE"); break; case 1: if (feature & CF_CHG2) msg(" DEF"); if (feature & CF_USE2) msg(" USE"); break; case 2: if (feature & CF_CHG3) msg(" DEF"); if (feature & CF_USE3) msg(" USE"); break; case 3: if (feature & CF_CHG4) msg(" DEF"); if (feature & CF_USE4) msg(" USE"); break; case 4: if (feature & CF_CHG5) msg(" DEF"); if (feature & CF_USE5) msg(" USE"); break; case 5: if (feature & CF_CHG6) msg(" DEF"); if (feature & CF_USE6) msg(" USE"); break; } return; } // end PrintDefUse() // DEBUG print SIB info for an operand. void PrintSIB(op_t Opnd) { int BaseReg = sib_base(Opnd); short IndexReg = sib_index(Opnd); int ScaleFactor = sib_scale(Opnd); #define NAME_LEN 5 char BaseName[NAME_LEN] = {'N', 'o', 'n', 'e', '\0'}; char IndexName[NAME_LEN] = {'N', 'o', 'n', 'e', '\0'}; if (BaseReg != R_bp) { // SIB code for NO BASE REG qstrncpy(BaseName, RegNames[BaseReg], NAME_LEN - 1); } if (IndexReg != R_sp) { // SIB code for NO INDEX REG qstrncpy(IndexName, RegNames[IndexReg], NAME_LEN -1); } msg(" Base %s Index %s Scale %d", BaseName, IndexName, ScaleFactor); } // end PrintSIB() // DEBUG print operands for Inst. void SMPInstr::PrintOperands() const { op_t Opnd; for (int i = 0; i < UA_MAXOP; ++i) { Opnd = SMPcmd.Operands[i]; if (Opnd.type == o_void) continue; else if (Opnd.type == o_mem) { msg(" Operand %d : memory : addr: %x", i, Opnd.addr); PrintDefUse(features, i); if (Opnd.hasSIB) { // has SIB info PrintSIB(Opnd); } } else if (Opnd.type == o_phrase) { msg(" Operand %d : memory phrase :", i); PrintDefUse(features, i); if (Opnd.hasSIB) { // has SIB info PrintSIB(Opnd); } else { // no SIB info ushort BaseReg = Opnd.phrase; msg(" reg %s", RegNames[BaseReg]); } if (Opnd.addr != 0) { msg(" \n WARNING: addr for o_phrase type: %d\n", Opnd.addr); } } else if (Opnd.type == o_displ) { ea_t offset = Opnd.addr; PrintDefUse(features, i); if (Opnd.hasSIB) { PrintSIB(Opnd); msg(" displ %d", offset); } else { ushort BaseReg = Opnd.reg; msg(" Operand %d : memory displ : reg %s displ %d", i, RegNames[BaseReg], offset); } } else if (Opnd.type == o_reg) { msg(" Operand %d : register", i); msg(" regno: %d", Opnd.reg); PrintDefUse(features, i); } else if (Opnd.type == o_imm) { msg(" Operand %d : immed", i); PrintDefUse(features, i); } else if (Opnd.type == o_far) { msg(" Operand %d : FarPtrImmed", i); PrintDefUse(features, i); } else if (Opnd.type == o_near) { msg(" Operand %d : NearPtrImmed", i); PrintDefUse(features, i); } else { msg(" Operand %d : unknown", i); PrintDefUse(features, i); } if (!(Opnd.showed())) msg(" HIDDEN "); } msg(" \n"); return; } // end of SMPInstr::PrintOperands() // Print out the destination operand list for the instruction, given // the OptCategory for the instruction as a hint. char * SMPInstr::DestString(int OptType) { static char DestList[MAXSTR]; if (OptType != 7) { if (SMPcmd.Operands[0].type != o_reg) { msg("Problem: destination operand not memory and not reg: %d %d %s \n", SMPcmd.Operands[0].type, SMPcmd.Operands[1].type, disasm); } else { ushort DestReg = SMPcmd.Operands[0].reg; qstrncpy(DestList, RegNames[DestReg], 1 + strlen(RegNames[DestReg])); #if 1 qstrncat(DestList, " ZZ ", MAXSTR); #endif return DestList; } } else { // OptType 7 could have one or two destinations. // NOTE: FIX later. Currently a clone of code above. ** #if SMP_DEBUG3 msg("OptType 7: %s\n", disasm); PrintOperands(); #endif if (SMPcmd.Operands[0].type != o_reg) { msg("Problem: destination operand not memory and not reg: %d %d %s\n", SMPcmd.Operands[0].type, SMPcmd.Operands[1].type, disasm); } else { ushort DestReg = SMPcmd.Operands[0].reg; qstrncpy(DestList, RegNames[DestReg], 1 + strlen(RegNames[DestReg])); #if 1 qstrncat(DestList, " ZZ ", MAXSTR); #endif return DestList; } } DestList[0] = '\0'; return DestList; } // end of SMPInstr::DestString() // Equality operator for SMPInstr. Key field is address. int SMPInstr::operator==(const SMPInstr &rhs) const { if (this->address != rhs.GetAddr()) return 0; else return 1; } // Inequality operator for SMPInstr. Key field is address. int SMPInstr::operator!=(const SMPInstr &rhs) const { return (this->address != rhs.GetAddr()); } // Less than operator for sorting SMPInstr lists. Key field is address. int SMPInstr::operator<(const SMPInstr &rhs) const { return (this->address < rhs.GetAddr()); } // Get optimization category for instruction int SMPInstr::GetOptType(void) const { return OptType; } // Is this instruction the one that allocates space on the // stack for the local variables of size LocSize? bool SMPInstr::MDIsFrameAllocInstr(asize_t LocSize) const { // The frame allocating instruction should look like: // sub esp,48 or add esp,-64 etc. if ((SMPcmd.itype == NN_sub) || (SMPcmd.itype == NN_add)) { if (Defs.GetRef(0).is_reg(R_sp)) { // We know that an addition or subtraction is being // performed on the stack pointer. This should not be // possible within the prologue except at the stack // frame allocation instruction, so return true. We // could be more robust in this analysis in the future. **!!** // CAUTION: If a compiler allocates 64 bytes for locals // and 16 bytes for outgoing arguments in a single // instruction: sub esp,80 // you cannot insist on finding sub esp,LocSize return true; } } return false; } // end of SMPInstr::MDIsFrameAllocInstr() // Is this instruction in the epilogue the one that deallocates the local // vars region of the stack frame? bool SMPInstr::MDIsFrameDeallocInstr(bool UseFP, asize_t LocalVarsSize) const { // The usual compiler idiom for the prologue on x86 is to // deallocate the local var space with: mov esp,ebp // It could be add esp,constant. We can be tricked by // add esp,constant when the constant is just the stack // adjustment after a call. We will have to insist that // the immediate operand have at least the value of // LocalVarsSize for this second form, and that UseFP be true // for the first form. if (UseFP && (this->SMPcmd.itype == NN_mov) && (this->Defs.GetRef(0).is_reg(R_sp)) && (this->Uses.GetRef(0).is_reg(R_bp))) return true; else if ((this->SMPcmd.itype == NN_add) && (this->Defs.GetRef(0).is_reg(R_sp)) && (this->Uses.GetRef(1).is_imm((uval_t) LocalVarsSize))) return true; else if ((this->SMPcmd.itype == NN_add) && (this->Defs.GetRef(0).is_reg(R_sp)) && (this->Uses.GetRef(1).type == o_imm)) { msg("Used imprecise LocalVarsSize to find dealloc instr.\n"); return true; } else return false; } // end of SMPInstr::MDIsFrameDeallocInstr() // MACHINE DEPENDENT: Is instruction a return instruction? bool SMPInstr::MDIsReturnInstr(void) const { return ((SMPcmd.itype == NN_retn) || (SMPcmd.itype == NN_retf)); } // MACHINE DEPENDENT: Is instruction a POP instruction? #define FIRST_POP_INST NN_pop #define LAST_POP_INST NN_popfq bool SMPInstr::MDIsPopInstr(void) const { return ((SMPcmd.itype >= FIRST_POP_INST) && (SMPcmd.itype <= LAST_POP_INST)); } // MACHINE DEPENDENT: Is instruction a PUSH instruction? #define FIRST_PUSH_INST NN_push #define LAST_PUSH_INST NN_pushfq bool SMPInstr::MDIsPushInstr(void) const { return ((SMPcmd.itype >= FIRST_PUSH_INST) && (SMPcmd.itype <= LAST_PUSH_INST)); } // Analyze the instruction and its operands. void SMPInstr::Analyze(void) { if (this->analyzed) return; // Fill cmd structure with disassembly of instr ua_ana0(this->address); // Get the instr disassembly text. (void) generate_disasm_line(this->address, this->disasm, sizeof(this->disasm) - 1); // Remove interactive color-coding tags. tag_remove(this->disasm, this->disasm, 0); // Copy cmd to member variable SMPcmd. this->SMPcmd = cmd; // Get the canonical features into member variables features. this->features = cmd.get_canon_feature(); // Record what type of instruction this is, simplified for the needs // of data flow and type analysis. this->type = DFACategory[cmd.itype]; // Record optimization category. this->OptType = OptCategory[cmd.itype]; // Build the DEF and USE lists for the instruction. this->BuildSMPDefUseLists(); // Fix up machine dependent quirks in the def and use lists. this->MDFixupDefUseLists(); // Determine whether the instruction is a jump target by looking // at its cross references and seeing if it has "TO" code xrefs. xrefblk_t xrefs; for (bool ok = xrefs.first_to(this->address, XREF_FAR); ok; ok = xrefs.next_to()) { if ((xrefs.from != 0) && (xrefs.iscode)) { this->JumpTarget = true; break; } } this->analyzed = true; return; } // end of SMPInstr::Analyze() // Fill the Defs and Uses private data members. void SMPInstr::BuildSMPDefUseLists(void) { size_t OpNum; // Start with the Defs. for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { if (this->features & DefMacros[OpNum]) { // DEF this->Defs.SetRef(this->SMPcmd.Operands[OpNum]); } } // end for (OpNum = 0; ...) // Now, do the Uses. Uses have special case operations, because // any memory operand could have register uses in the addressing // expression, and we must create Uses for those registers. For // example: mov eax,[ebx + esi*2 + 044Ch] // This is a two-operand instruction with one def: eax. But // there are three uses: [ebx + esi*2 + 044Ch], ebx, and esi. // The first use is an op_t of type o_phrase (memory phrase), // which can be copied from cmd.Operands[1]. Likewise, we just // copy cmd.Operands[0] into the defs list. However, we must create // op_t types for register ebx and register esi and append them // to the Uses list. This is handled by the machine dependent // method MDFixupDefUseLists(). for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { if (this->features & UseMacros[OpNum]) { // USE this->Uses.SetRef(this->SMPcmd.Operands[OpNum]); } } // end for (OpNum = 0; ...) return; } // end of SMPInstr::BuildSMPDefUseLists() // Perform machine dependent ad hoc fixes to the def and use lists. // For example, some multiply and divide instructions in x86 implicitly // use and/or define register EDX. For memory phrase examples, see comment // in BuildSMPDefUseLists(). void SMPInstr::MDFixupDefUseLists(void) { return; } // Emit annotations for constants used as ptr offsets from EBP or // ESP into the stack frame. Only pay attention to EBP-relative // offsets if EBP is being used as a frame pointer (UseFP == true). void SMPInstr::AnnotateStackConstants(bool UseFP, FILE *AnnotFile) { op_t Opnd; #if 0 if ((this->address == 0x8048409) || (this->address == 0x81488a1)) { msg("PROBLEM INSTRUCTION: \n"); this->PrintOperands(); } #endif for (int i = 0; i < UA_MAXOP; ++i) { Opnd = SMPcmd.Operands[i]; if (Opnd.type == o_displ) { ea_t offset = Opnd.addr; if (Opnd.hasSIB) { int BaseReg = sib_base(Opnd); short IndexReg = sib_index(Opnd); if (BaseReg == R_sp) { // EBP cannot be BaseReg in SIB // ESP-relative constant offset qfprintf(AnnotFile, "%x %d PTRIMMEDESP STACK %d %s\n", SMPcmd.ea, SMPcmd.size, offset, disasm); } else if (IndexReg == R_bp) { // ESP cannot be IndexReg // EBP-relative constant offset qfprintf(AnnotFile, "%x %d PTRIMMEDEBP STACK %d %s\n", SMPcmd.ea, SMPcmd.size, offset, disasm); } } else { // no SIB ushort BaseReg = Opnd.reg; if (BaseReg == R_sp) { // ESP-relative constant offset qfprintf(AnnotFile, "%x %d PTRIMMEDESP STACK %d %s\n", SMPcmd.ea, SMPcmd.size, offset, disasm); } else if (BaseReg == R_bp) { // EBP-relative constant offset qfprintf(AnnotFile, "%x %d PTRIMMEDEBP STACK %d %s\n", SMPcmd.ea, SMPcmd.size, offset, disasm); } } // end if (Opnd.hasSIB) ... else ... } // end if (Opnd.type == o_displ) else if (Opnd.type == o_phrase) { ea_t offset = 0; // mmStrata thinks [esp] is [esp+0] if (Opnd.hasSIB) { int BaseReg = sib_base(Opnd); short IndexReg = sib_index(Opnd); if (BaseReg == R_sp) { // EBP cannot be BaseReg in SIB // ESP-relative constant offset qfprintf(AnnotFile, "%x %d PTRIMMEDESP STACK %d %s\n", SMPcmd.ea, SMPcmd.size, offset, disasm); } else if (IndexReg == R_bp) { // ESP cannot be IndexReg // EBP-relative constant offset qfprintf(AnnotFile, "%x %d PTRIMMEDEBP STACK %d %s\n", SMPcmd.ea, SMPcmd.size, offset, disasm); } } else { // Something like [ecx] ushort BaseReg = Opnd.reg; if (BaseReg == R_sp) { // ESP-relative constant offset qfprintf(AnnotFile, "%x %d PTRIMMEDESP STACK %d %s\n", SMPcmd.ea, SMPcmd.size, offset, disasm); } else if (BaseReg == R_bp) { // EBP-relative constant offset qfprintf(AnnotFile, "%x %d PTRIMMEDEBP STACK %d %s\n", SMPcmd.ea, SMPcmd.size, offset, disasm); } } // end if (Opnd.hasSIB) ... else ... } // end else if (Opnd.type == o_phrase) } // end for all operands return; } // end of SMPInstr::AnnotateStackConstants() // ***************************************************************** // Class SMPBasicBlock // ***************************************************************** // Constructor SMPBasicBlock::SMPBasicBlock(list<SMPInstr>::iterator First, list<SMPInstr>::iterator Last) { this->FirstInstr = First; this->LastInstr = Last; this->IndirectJump = false; this->Returns = false; this->SharedTailChunk = false; } // Analyze basic block and fill data members. void SMPBasicBlock::Analyze() { if (LastInstr->GetDataFlowType() == INDIR_JUMP) { this->IndirectJump = true; } else if (LastInstr->MDIsReturnInstr()) { this->Returns = true; } } // end of SMPBasicBlock::Analyze() // ***************************************************************** // Class SMPFunction // ***************************************************************** // Constructor SMPFunction::SMPFunction(func_t *Info) { this->FuncInfo = Info; IndirectCalls = false; return; } // Figure out the different regions of the stack frame, and find the // instructions that allocate and deallocate the local variables space // on the stack frame. // The stack frame info will be used to emit stack // annotations when Analyze() reaches the stack allocation // instruction that sets aside space for local vars. // Set the address of the instruction at which these // annotations should be emitted. This should normally // be an instruction such as: sub esp,48 // However, for a function with no local variables at all, // we will need to determine which instruction should be // considered to be the final instruction of the function // prologue and return its address. // Likewise, we find the stack deallocating instruction in // the function epilogue. void SMPFunction::SetStackFrameInfo(void) { bool FoundAllocInstr = false; bool FoundDeallocInstr = false; // The sizes of the three regions of the stack frame other than the // return address are stored in the function structure. this->LocalVarsSize = this->FuncInfo->frsize; this->CalleeSavedRegsSize = this->FuncInfo->frregs; this->IncomingArgsSize = this->FuncInfo->argsize; // The return address size can be obtained in a machine independent // way by calling get_frame_retsize(). this->RetAddrSize = get_frame_retsize(this->FuncInfo); // IDA Pro has trouble with functions that do not have any local // variables. Unfortunately, the C library has plenty of these // functions. IDA usually claims that frregs is zero and frsize // is N, when the values should have been reversed. We can attempt // to detect this and fix it. bool FrameInfoFixed = this->MDFixFrameInfo(); #define SMP_DEBUG_FRAMEFIXUP 1 #if SMP_DEBUG_FRAMEFIXUP if (FrameInfoFixed) { msg("Fixed stack frame size info: %s\n", this->FuncName); SMPBasicBlock CurrBlock = this->Blocks.front(); msg("First basic block:\n"); for (list<SMPInstr>::iterator CurrInstr = CurrBlock.GetFirstInstr(); CurrInstr != CurrBlock.GetLastInstr(); ++CurrInstr) { msg("%s\n", CurrInstr->GetDisasm()); } msg("%s\n", CurrBlock.GetLastInstr()->GetDisasm()); } #endif // Now, if LocalVarsSize is not zero, we need to find the instruction // in the function prologue that allocates space on the stack for // local vars. This code could be made more robust in the future // by matching LocalVarsSize to the immediate value in the allocation // instruction. However, IDA Pro is sometimes a little off on this // number, claiming 4 bytes for functions with no locals at all, // assigning the 4 bytes used for a callee-saved register to the // local vars space and then claiming 0 bytes for callee-saved // registers. **!!** if (0 < this->LocalVarsSize) { for (list<SMPInstr>::iterator CurrInstr = this->Instrs.begin(); CurrInstr != this->Instrs.end(); ++CurrInstr) { ea_t addr = CurrInstr->GetAddr(); // Keep the most recent instruction in the DeallocInstr // in case we reach the return without seeing a dealloc. if (!FoundDeallocInstr) { this->LocalVarsDeallocInstr = addr; } if (!FoundAllocInstr && CurrInstr->MDIsFrameAllocInstr(this->LocalVarsSize)) { this->LocalVarsAllocInstr = addr; FoundAllocInstr = true; } else if (FoundAllocInstr) { // We can now start searching for the DeallocInstr. if (CurrInstr->MDIsFrameDeallocInstr(UseFP, this->LocalVarsSize)) { // Keep saving the most recent addr that looks // like the DeallocInstr until we reach the // end of the function. Last one to look like // it is used as the DeallocInstr. this->LocalVarsDeallocInstr = addr; FoundDeallocInstr = true; } } } // end for (list<SMPInstr>::iterator CurrInstr ... ) if (!FoundAllocInstr) { // Could not find the frame allocating instruction. Bad. // Emit diagnostic and use the first instruction in the // function. msg("ERROR: Could not find stack frame allocation in %s\n", FuncName); msg("LocalVarsSize: %d SavedRegsSize: %d ArgsSize: %d\n", LocalVarsSize, CalleeSavedRegsSize, IncomingArgsSize); this->LocalVarsAllocInstr = this->FuncInfo->startEA; } if (!FoundDeallocInstr) { // Could not find the frame deallocating instruction. Bad. // Emit diagnostic and use the last instruction in the // function. msg("ERROR: Could not find stack frame deallocation in %s\n", FuncName); } } // else LocalVarsSize was zero, meaning that we need to search // for the end of the function prologue code and emit stack frame // annotations from that address (i.e. this method returns that // address). We will approximate this by finding the end of the // sequence of PUSH instructions at the beginning of the function. // The last PUSH instruction should be the last callee-save-reg // instruction. We can make this more robust in the future by // making sure that we do not count a PUSH of anything other than // a register. **!!** // NOTE: 2nd prologue instr is usually mov ebp,esp // THE ASSUMPTION THAT WE HAVE ONLY PUSH INSTRUCTIONS BEFORE // THE ALLOCATING INSTR IS ONLY TRUE WHEN LOCALVARSSIZE == 0; else { ea_t SaveAddr = FuncInfo->startEA; for (list<SMPInstr>::iterator CurrInstr = this->Instrs.begin(); CurrInstr != this->Instrs.end(); ++CurrInstr) { insn_t CurrCmd = CurrInstr->GetCmd(); ea_t addr = CurrInstr->GetAddr(); if (CurrCmd.itype == NN_push) SaveAddr = addr; else break; } this->LocalVarsAllocInstr = SaveAddr; this->LocalVarsDeallocInstr = 0; } // end if (LocalVarsSize > 0) ... else ... #if 0 // Now we need to do the corresponding operations from the // end of the function to find the DeallocInstr in the // function epilogue. Because there is no addition to the // stack pointer to deallocate the local vars region, the // function epilogue will consist of (optional) pops of // callee-saved regs, followed by the return instruction. // Working backwards, we should find a return and then // stop when we do not find any more pops. if (0 >= LocalVarsSize) { this->LocalVarsDeallocInstr = NULL; } else { SaveAddr = FuncInfo->endEA - 1; bool FoundRet = false; do { ea_t addr = get_item_head(SaveAddr); flags_t InstrFlags = getFlags(addr); if (isCode(addr) && isHead(addr)) { ua_ana0(addr); if (!FoundRet) { // Just starting out. if (MDIsReturnInstr(cmd)) { FoundRet = true; SaveAddr = addr - 1; } else { msg("ERROR: Last instruction not a return.\n"); } } else { // Should be 0 or more POPs before the return. if (MDIsPopInstr(cmd)) { SaveAddr = addr - 1; } else if (FrameAllocInstr(cmd, this->LocalVarsSize)) { this->LocalVarsDeallocInstr = addr; } else { msg("ERROR: Frame deallocation not prior to POPs.\n"); this->LocalVarsDeallocInstr = SaveAddr + 1; } } // end if (!FoundRet) ... else ... } else { --SaveAddr; } // end if (isCode(addr) && isHead(addr)) } while (NULL == this->LocalVarsDeallocInstr); } // end if (0 >= this->LocalVarsSize) #endif // 0 return; } // end of SMPFunction::SetStackFrameInfo() // IDA Pro has trouble with functions that do not have any local // variables. Unfortunately, the C library has plenty of these // functions. IDA usually claims that frregs is zero and frsize // is N, when the values should have been reversed. We can attempt // to detect this and fix it. // Fixing means we will update the data members LocalVarsSize and // CalleeSavedRegsSize. bool SMPFunction::MDFixFrameInfo(void) { // Does this function fit the problem pattern, with zero for saved // regs and nonzero for local vars? if ((LocalVarsSize == 0) || (CalleeSavedRegsSize != 0)) return false; // Iterate through the first basic block in the function. If we find // a frame allocating Instr in it, then we have local vars. If not, // we don't, and LocalVarsSize should have been zero, not // CalleeSavedRegsSize, so swap them. SMPBasicBlock CurrBlock = this->Blocks.front(); for (list<SMPInstr>::iterator CurrInstr = CurrBlock.GetFirstInstr(); CurrInstr != CurrBlock.GetLastInstr(); // LastInstr is jump anyway ++CurrInstr) { if (CurrInstr->MDIsFrameAllocInstr(LocalVarsSize)) return false; } // We did not find a stack frame allocation instruction in the first // basic block, yet CalleeSavedRegsSize is zero and LocalVarsSize // is not zero. Swap them. this->CalleeSavedRegsSize = (ushort) this->LocalVarsSize; this->LocalVarsSize = 0; return true; } // end of SMPFunction::MDFixFrameInfo() // Emit the annotations describing the regions of the stack frame. void SMPFunction::EmitStackFrameAnnotations(FILE *AnnotFile, list<SMPInstr>::iterator Instr) { ea_t addr = Instr->GetAddr(); if (0 < IncomingArgsSize) qfprintf(AnnotFile, "%x %d INARGS STACK esp + %d %s \n", addr, IncomingArgsSize, (LocalVarsSize + CalleeSavedRegsSize + RetAddrSize), Instr->GetDisasm()); if (0 < RetAddrSize) qfprintf(AnnotFile, "%x %d MEMORYHOLE STACK esp + %d ReturnAddress \n", addr, RetAddrSize, (LocalVarsSize + CalleeSavedRegsSize)); if (0 < CalleeSavedRegsSize) qfprintf(AnnotFile, "%x %d MEMORYHOLE STACK esp + %d CalleeSavedRegs \n", addr, CalleeSavedRegsSize, LocalVarsSize); if (0 < LocalVarsSize) qfprintf(AnnotFile, "%x %d LOCALFRAME STACK esp + %d LocalVars \n", addr, LocalVarsSize, 0); return; } // end of SMPFunction::EmitStackFrameAnnotations() // Main data flow analysis driver. Goes through the function and // fills all objects for instructions, basic blocks, and the function // itself. void SMPFunction::Analyze(void) { list<SMPInstr>::iterator FirstInBlock = this->Instrs.end(); // For starting a basic block list<SMPInstr>::iterator LastInBlock = this->Instrs.end(); // Terminating a basic block #if SMP_DEBUG_CONTROLFLOW msg("Entering SMPFunction::Analyze.\n"); #endif // Get some basic info from the FuncInfo structure. this->Size = this->FuncInfo->endEA - this->FuncInfo->startEA; this->UseFP = (0 != (this->FuncInfo->flags & (FUNC_FRAME | FUNC_BOTTOMBP))); this->StaticFunc = (0 != (this->FuncInfo->flags & FUNC_STATIC)); get_func_name(this->FuncInfo->startEA, this->FuncName, sizeof(this->FuncName) - 1); #if SMP_DEBUG_CONTROLFLOW msg("SMPFunction::Analyze: got basic info.\n"); #endif // Cycle through all chunks that belong to the function. func_tail_iterator_t FuncTail(this->FuncInfo); size_t ChunkCounter = 0; for (bool ChunkOK = FuncTail.main(); ChunkOK; ChunkOK = FuncTail.next()) { const area_t &CurrChunk = FuncTail.chunk(); ++ChunkCounter; #if SMP_DEBUG_CHUNKS if (1 < ChunkCounter) msg("Found tail chunk for %s\n", this->FuncName); #endif // Build the instruction and block lists for the function. for (ea_t addr = CurrChunk.startEA; addr < CurrChunk.endEA; addr = get_item_end(addr)) { flags_t InstrFlags = getFlags(addr); if (isHead(InstrFlags) && isCode(InstrFlags)) { SMPInstr CurrInst = SMPInstr(addr); // Fill in the instruction data members. #if SMP_DEBUG_CONTROLFLOW msg("SMPFunction::Analyze: calling CurrInst::Analyze.\n"); #endif CurrInst.Analyze(); if (SMPBinaryDebug) { msg("Disasm: %s \n", CurrInst.GetDisasm()); } if (CurrInst.GetDataFlowType() == INDIR_CALL) this->IndirectCalls = true; // Before we insert the instruction into the instruction // list, determine if it is a jump target that does not // follow a basic block terminator. This is the special case // of a CASE in a SWITCH that falls through into another // CASE, for example. The first sequence of statements // was not terminated by a C "break;" statement, so it // looks like straight line code, but there is an entry // point at the beginning of the second CASE sequence and // we have to split basic blocks at the entry point. if ((FirstInBlock != this->Instrs.end()) && CurrInst.IsJumpTarget()) { #if SMP_DEBUG_CONTROLFLOW msg("SMPFunction::Analyze: hit special jump target case.\n"); #endif LastInBlock = --(this->Instrs.end()); SMPBasicBlock CurrBlock = SMPBasicBlock(FirstInBlock, LastInBlock); CurrBlock.Analyze(); // If not the first chunk in the function, it is a shared // tail chunk. if (ChunkCounter > 1) { CurrBlock.SetShared(); } FirstInBlock = this->Instrs.end(); LastInBlock = this->Instrs.end(); this->Blocks.push_back(CurrBlock); } #if SMP_DEBUG_CONTROLFLOW msg("SMPFunction::Analyze: putting CurrInst on list.\n"); #endif // Insert instruction at end of list. this->Instrs.push_back(CurrInst); // Find basic block leaders and terminators. if (FirstInBlock == this->Instrs.end()) { #if SMP_DEBUG_CONTROLFLOW msg("SMPFunction::Analyze: setting FirstInBlock.\n"); #endif FirstInBlock = --(this->Instrs.end()); } if (CurrInst.IsBasicBlockTerminator()) { #if SMP_DEBUG_CONTROLFLOW msg("SMPFunction::Analyze: found block terminator.\n"); #endif LastInBlock = --(this->Instrs.end()); SMPBasicBlock CurrBlock = SMPBasicBlock(FirstInBlock, LastInBlock); CurrBlock.Analyze(); // If not the first chunk in the function, it is a shared // tail chunk. if (ChunkCounter > 1) { CurrBlock.SetShared(); } FirstInBlock = this->Instrs.end(); LastInBlock = this->Instrs.end(); this->Blocks.push_back(CurrBlock); } } // end if (isHead(InstrFlags) && isCode(InstrFlags) } // end for (ea_t addr = FuncInfo->startEA; ... ) // Handle the special case in which a function does not terminate // with a return instruction or any other basic block terminator. // Sometimes IDA Pro sees a call to a NORET function and decides // to not include the dead code after it in the function. That // dead code includes the return instruction, so the function no // longer includes a return instruction and terminates with a CALL. if (FirstInBlock != this->Instrs.end()) { LastInBlock = --(this->Instrs.end()); SMPBasicBlock CurrBlock = SMPBasicBlock(FirstInBlock, LastInBlock); CurrBlock.Analyze(); // If not the first chunk in the function, it is a shared // tail chunk. if (ChunkCounter > 1) { CurrBlock.SetShared(); } FirstInBlock = this->Instrs.end(); LastInBlock = this->Instrs.end(); this->Blocks.push_back(CurrBlock); } } // end for (bool ChunkOK = ...) #if SMP_DEBUG_CONTROLFLOW msg("SMPFunction::Analyze: set stack frame info.\n"); #endif // Figure out the stack frame and related info. this->SetStackFrameInfo(); return; } // end of SMPFunction::Analyze() void SMPFunction::EmitAnnotations(FILE *AnnotFile) { // Emit annotation for the function as a whole. if (this->StaticFunc) { qfprintf(AnnotFile, "%x %d FUNC LOCAL %s ", FuncInfo->startEA, this->Size, this->FuncName); } else { qfprintf(AnnotFile, "%x %d FUNC GLOBAL %s ", FuncInfo->startEA, this->Size, this->FuncName); } if (this->UseFP) { qfprintf(AnnotFile, "USEFP "); } else { qfprintf(AnnotFile, "NOFP "); } if (this->FuncInfo->does_return()) { qfprintf(AnnotFile, "\n"); } else { qfprintf(AnnotFile, "NORET \n"); } // Loop through all instructions in the function. // Output optimization annotations for those // instructions that do not require full computation // of their memory metadata by the Memory Monitor SDT. list<SMPInstr>::iterator CurrInst; bool AllocSeen = false; // Reached LocalVarsAllocInstr yet? bool DeallocTrigger = false; for (CurrInst = Instrs.begin(); CurrInst != Instrs.end(); ++CurrInst) { ea_t addr = CurrInst->GetAddr(); flags_t InstrFlags = getFlags(addr); int OptType = CurrInst->GetOptType(); char *disasm = CurrInst->GetDisasm(); bool MemDest = CurrInst->HasDestMemoryOperand(); bool MemSrc = CurrInst->HasSourceMemoryOperand(); bool SecondSrcOperandNum = CurrInst->IsSecondSrcOperandNumeric(InstrFlags); ++OptCount[OptType]; // keep count for debugging info // If this is the instruction which allocated space // for local variables, we want to emit annotations // describing the stack frame. if (addr == LocalVarsAllocInstr) { EmitStackFrameAnnotations(AnnotFile, CurrInst); AllocSeen = true; } // If this is the instruction which deallocated space // for local variables, we set a flag to remind us to // emit an annotation on the next instruction. // mmStrata wants the instruction AFTER the // deallocating instruction, so that it processes // the deallocation after it happens. It inserts // instrumentation before an instruction, not // after, so it will insert the deallocating // instrumentation before the first POP of callee-saved regs, // if there are any, or before the return, otherwise. if (addr == LocalVarsDeallocInstr) { DeallocTrigger = true; } else if (DeallocTrigger) { // Time for annotation qfprintf(AnnotFile, "%x %d DEALLOC STACK esp - %d %s\n", addr, LocalVarsSize, LocalVarsSize, disasm); DeallocTrigger = false; } #if SMP_DEBUG_MEM if (MemDest || MemSrc) { msg("OptType: %d %s", OptType, disasm); CurrInst->PrintOperands(); } #endif // Emit appropriate optimization annotations. bool SDTInstrumentation = false; switch (OptType) { case 0: // SDT will have to handle these { #if SMP_DEBUG_TYPE0 msg("OptType 0: %x %s\n", addr, disasm); #endif // mmStrata wants to suppress warnings on the PUSH // instructions that precede the LocalVarsAllocInstr // (i.e. the PUSHes of callee-saved regs). if (!AllocSeen && CurrInst->MDIsPushInstr()) { qfprintf(AnnotFile, "%x %d INSTR LOCAL NoWarn %s \n", addr, -3, disasm); } else { SDTInstrumentation = true; } break; } case 1: // nothing for SDT to do { qfprintf(AnnotFile, "%x %d INSTR LOCAL NoMetaUpdate %s \n", addr, -1, disasm); ++AnnotationCount[OptType]; break; } case 4: // INC, DEC, etc.: no SDT work unless MemDest { if (MemDest || MemSrc) { SDTInstrumentation = true; break; // treat as category 0 } qfprintf(AnnotFile, "%x %d INSTR LOCAL Always1stSrc %s \n", addr, -1, disasm); ++AnnotationCount[OptType]; break; } case 5: // ADD, etc.: If numeric 2nd src operand, no SDT work. { if (MemDest || MemSrc) { SDTInstrumentation = true; break; // treat as category 0 } if (SecondSrcOperandNum) { // treat as category 1 qfprintf(AnnotFile, "%x %d INSTR LOCAL %s %s \n", addr, -1, OptExplanation[OptType], disasm); ++AnnotationCount[OptType]; } break; } case 6: // Only OS code should include these; problem for SDT { if (MemDest) { SDTInstrumentation = true; break; // treat as category 0 } qfprintf(AnnotFile, "%x %d INSTR LOCAL AlwaysPTR %s \n", addr, -OptType, disasm); ++AnnotationCount[OptType]; break; } case 8: // Implicitly writes to EDX:EAX, always numeric. { qfprintf(AnnotFile, "%x %d INSTR LOCAL n EDX EAX ZZ %s %s \n", addr, -2, OptExplanation[OptType], disasm); ++AnnotationCount[OptType]; SDTInstrumentation = true; break; } case 9: // Either writes to FP reg (cat. 1) or memory (cat. 0) { if (MemDest) { #if SMP_DEBUG // MemDest seems to happen too much. msg("Floating point MemDest: %s \n", disasm); #endif SDTInstrumentation = true; break; // treat as category 0 } qfprintf(AnnotFile, "%x %d INSTR LOCAL %s %s \n", addr, -1, OptExplanation[OptType], disasm); ++AnnotationCount[OptType]; break; } default: // 2,3,7: Optimization possibilities depend on operands { #if SMP_DEBUG2 if (OptType == 3) { // MOV instr class if (MemDest) { msg("MemDest on MOV: %s\n", disasm); } else if (!SecondSrcOperandNum) { msg("MOV: not 2nd op numeric: %s\n", disasm); CurrInst->PrintOperands(); } } #endif SDTInstrumentation = true; if (MemDest) { #if SMP_DEBUG_XOR if (OptType == 2) msg("MemDest on OptType 2: %s\n", disasm); #endif break; // treat as category 0 } if ((OptType == 2) || (OptType == 7) || SecondSrcOperandNum) { qfprintf(AnnotFile, "%x %d INSTR LOCAL n %s %s %s \n", addr, -2, CurrInst->DestString(OptType), OptExplanation[OptType], disasm); ++AnnotationCount[OptType]; } break; } } // end switch (OptType) // If mmStrata is going to have to deal with the // instruction, then we can annotate EBP and ESP // relative constant offsets. If we have emitted // an annotation of type -1, there is no point // in telling mmStrata about these constants. if (SDTInstrumentation) { CurrInst->AnnotateStackConstants(UseFP, AnnotFile); } } // end for (ea_t addr = FuncInfo->startEA; ...) return; } // Initialize the DFACategory[] array to define instruction classes // for the purposes of data flow analysis. void InitDFACategory(void) { // Default category is 0, not the start or end of a basic block. (void) memset(DFACategory, 0, sizeof(DFACategory)); DFACategory[NN_call] = CALL; // Call Procedure DFACategory[NN_callfi] = INDIR_CALL; // Indirect Call Far Procedure DFACategory[NN_callni] = INDIR_CALL; // Indirect Call Near Procedure DFACategory[NN_hlt] = HALT; // Halt DFACategory[NN_int] = CALL; // Call to Interrupt Procedure DFACategory[NN_into] = CALL; // Call to Interrupt Procedure if Overflow Flag = 1 DFACategory[NN_int3] = CALL; // Trap to Debugger DFACategory[NN_iretw] = RETURN; // Interrupt Return DFACategory[NN_iret] = RETURN; // Interrupt Return DFACategory[NN_iretd] = RETURN; // Interrupt Return (use32) DFACategory[NN_iretq] = RETURN; // Interrupt Return (use64) DFACategory[NN_ja] = COND_BRANCH; // Jump if Above (CF=0 & ZF=0) DFACategory[NN_jae] = COND_BRANCH; // Jump if Above or Equal (CF=0) DFACategory[NN_jb] = COND_BRANCH; // Jump if Below (CF=1) DFACategory[NN_jbe] = COND_BRANCH; // Jump if Below or Equal (CF=1 | ZF=1) DFACategory[NN_jc] = COND_BRANCH; // Jump if Carry (CF=1) DFACategory[NN_jcxz] = COND_BRANCH; // Jump if CX is 0 DFACategory[NN_jecxz] = COND_BRANCH; // Jump if ECX is 0 DFACategory[NN_jrcxz] = COND_BRANCH; // Jump if RCX is 0 DFACategory[NN_je] = COND_BRANCH; // Jump if Equal (ZF=1) DFACategory[NN_jg] = COND_BRANCH; // Jump if Greater (ZF=0 & SF=OF) DFACategory[NN_jge] = COND_BRANCH; // Jump if Greater or Equal (SF=OF) DFACategory[NN_jl] = COND_BRANCH; // Jump if Less (SF!=OF) DFACategory[NN_jle] = COND_BRANCH; // Jump if Less or Equal (ZF=1 | SF!=OF) DFACategory[NN_jna] = COND_BRANCH; // Jump if Not Above (CF=1 | ZF=1) DFACategory[NN_jnae] = COND_BRANCH; // Jump if Not Above or Equal (CF=1) DFACategory[NN_jnb] = COND_BRANCH; // Jump if Not Below (CF=0) DFACategory[NN_jnbe] = COND_BRANCH; // Jump if Not Below or Equal (CF=0 & ZF=0) DFACategory[NN_jnc] = COND_BRANCH; // Jump if Not Carry (CF=0) DFACategory[NN_jne] = COND_BRANCH; // Jump if Not Equal (ZF=0) DFACategory[NN_jng] = COND_BRANCH; // Jump if Not Greater (ZF=1 | SF!=OF) DFACategory[NN_jnge] = COND_BRANCH; // Jump if Not Greater or Equal (ZF=1) DFACategory[NN_jnl] = COND_BRANCH; // Jump if Not Less (SF=OF) DFACategory[NN_jnle] = COND_BRANCH; // Jump if Not Less or Equal (ZF=0 & SF=OF) DFACategory[NN_jno] = COND_BRANCH; // Jump if Not Overflow (OF=0) DFACategory[NN_jnp] = COND_BRANCH; // Jump if Not Parity (PF=0) DFACategory[NN_jns] = COND_BRANCH; // Jump if Not Sign (SF=0) DFACategory[NN_jnz] = COND_BRANCH; // Jump if Not Zero (ZF=0) DFACategory[NN_jo] = COND_BRANCH; // Jump if Overflow (OF=1) DFACategory[NN_jp] = COND_BRANCH; // Jump if Parity (PF=1) DFACategory[NN_jpe] = COND_BRANCH; // Jump if Parity Even (PF=1) DFACategory[NN_jpo] = COND_BRANCH; // Jump if Parity Odd (PF=0) DFACategory[NN_js] = COND_BRANCH; // Jump if Sign (SF=1) DFACategory[NN_jz] = COND_BRANCH; // Jump if Zero (ZF=1) DFACategory[NN_jmp] = JUMP; // Jump DFACategory[NN_jmpfi] = INDIR_JUMP; // Indirect Far Jump DFACategory[NN_jmpni] = INDIR_JUMP; // Indirect Near Jump DFACategory[NN_jmpshort] = JUMP; // Jump Short (only in 64-bit mode) DFACategory[NN_loopw] = COND_BRANCH; // Loop while ECX != 0 DFACategory[NN_loop] = COND_BRANCH; // Loop while CX != 0 DFACategory[NN_loopd] = COND_BRANCH; // Loop while ECX != 0 DFACategory[NN_loopq] = COND_BRANCH; // Loop while RCX != 0 DFACategory[NN_loopwe] = COND_BRANCH; // Loop while CX != 0 and ZF=1 DFACategory[NN_loope] = COND_BRANCH; // Loop while rCX != 0 and ZF=1 DFACategory[NN_loopde] = COND_BRANCH; // Loop while ECX != 0 and ZF=1 DFACategory[NN_loopqe] = COND_BRANCH; // Loop while RCX != 0 and ZF=1 DFACategory[NN_loopwne] = COND_BRANCH; // Loop while CX != 0 and ZF=0 DFACategory[NN_loopne] = COND_BRANCH; // Loop while rCX != 0 and ZF=0 DFACategory[NN_loopdne] = COND_BRANCH; // Loop while ECX != 0 and ZF=0 DFACategory[NN_loopqne] = COND_BRANCH; // Loop while RCX != 0 and ZF=0 DFACategory[NN_retn] = RETURN; // Return Near from Procedure DFACategory[NN_retf] = RETURN; // Return Far from Procedure // // Pentium instructions // DFACategory[NN_rsm] = HALT; // Resume from System Management Mode // Pentium II instructions DFACategory[NN_sysenter] = CALL; // Fast Transition to System Call Entry Point DFACategory[NN_sysexit] = CALL; // Fast Transition from System Call Entry Point // AMD syscall/sysret instructions NOTE: not AMD, found in Intel manual DFACategory[NN_syscall] = CALL; // Low latency system call DFACategory[NN_sysret] = CALL; // Return from system call // VMX instructions DFACategory[NN_vmcall] = INDIR_CALL; // Call to VM Monitor return; } // end InitDFACategory()