// // SMPInstr.cpp // // This module performs the instruction level analyses needed for the // SMP project (Software Memory Protection). // #include <cstring> #include <pro.h> #include <assert.h> #include <ida.hpp> #include <idp.hpp> #include <allins.hpp> #include <auto.hpp> #include <bytes.hpp> #include <funcs.hpp> #include <intel.hpp> #include <loader.hpp> #include <lines.hpp> #include <name.hpp> #include "SMPStaticAnalyzer.h" #include "SMPDataFlowAnalysis.h" #include "SMPInstr.h" // Set to 1 for debugging output #define SMP_DEBUG 1 #define SMP_DEBUG2 0 // verbose #define SMP_DEBUG_XOR 0 // Make the CF_CHG1 .. CF_CHG6 and CF_USE1..CF_USE6 macros more usable // by allowing us to pick them up with an array index. static ulong DefMacros[UA_MAXOP] = {CF_CHG1, CF_CHG2, CF_CHG3, CF_CHG4, CF_CHG5, CF_CHG6}; static ulong UseMacros[UA_MAXOP] = {CF_USE1, CF_USE2, CF_USE3, CF_USE4, CF_USE5, CF_USE6}; // Text to be printed in each optimizing annotation explaining why // the annotation was emitted. static char *OptExplanation[LAST_OPT_CATEGORY + 1] = { "NoOpt", "NoMetaUpdate", "AlwaysNUM", "NUMVia2ndSrcIMMEDNUM", "Always1stSrc", "1stSrcVia2ndSrcIMMEDNUM", "AlwaysPtr", "AlwaysNUM", "AlwaysNUM", "NUMViaFPRegDest" }; // ***************************************************************** // Class SMPInstr // ***************************************************************** // Constructor for instruction. SMPInstr::SMPInstr(ea_t addr) { this->address = addr; this->analyzed = false; this->JumpTarget = false; this->BlockTerm = false; this->DeadRegsString[0] = '\0'; return; } // Is the instruction the type that terminates a basic block? bool SMPInstr::IsBasicBlockTerminator() const { return ((type == JUMP) || (type == COND_BRANCH) || (type == INDIR_JUMP) || (type == RETURN)); } // Is the destination operand a memory reference? bool SMPInstr::HasDestMemoryOperand(void) const { bool MemDest = false; for (size_t index = 0; index < Defs.GetSize(); ++index) { optype_t CurrType = Defs.GetRef(index).GetOp().type; MemDest = ((CurrType == o_mem) || (CurrType == o_phrase) || (CurrType == o_displ)); if (MemDest) break; } return MemDest; } // end of SMPInstr::HasDestMemoryOperand() // Is a source operand a memory reference? bool SMPInstr::HasSourceMemoryOperand(void) const { bool MemSrc = false; for (size_t index = 0; index < Uses.GetSize(); ++index) { optype_t CurrType = Uses.GetRef(index).GetOp().type; MemSrc = ((CurrType == o_mem) || (CurrType == o_phrase) || (CurrType == o_displ)); if (MemSrc) break; } return MemSrc; } // end of SMPInstr::HasSourceMemoryOperand() // Does the instruction whose flags are in F have a numeric type // as the second source operand? // NOTE: We can only analyze immediate values now, using a heuristic // that values in the range +/- 8K are numeric and others are // probably addresses. When data flow analyses are implemented, // we will be able to analyze many non-immediate operands. #define IMMEDNUM_LOWER -8191 #define IMMEDNUM_UPPER 8191 bool SMPInstr::IsSecondSrcOperandNumeric(flags_t F) const { bool SecondOpImm = (SMPcmd.Operands[1].type == o_imm); signed long TempImm; if (SecondOpImm) { TempImm = (signed long) SMPcmd.Operands[1].value; } #if SMP_DEBUG if (SecondOpImm && (0 > TempImm)) { #if 0 msg("Negative immediate: %d Hex: %x ASM: %s\n", TempImm, SMPcmd.Operands[1].value, disasm); #endif } else if ((!SecondOpImm) && (SMPcmd.Operands[1].type == o_imm)) { msg("Problem with flags on immediate src operand: %s\n", disasm); } #endif return (SecondOpImm && (TempImm > IMMEDNUM_LOWER) && (TempImm < IMMEDNUM_UPPER)); } // end of SMPInstr::IsSecondSrcOperandNumeric() // DEBUG print operands for Inst. void SMPInstr::PrintOperands(void) const { op_t Opnd; for (int i = 0; i < UA_MAXOP; ++i) { Opnd = SMPcmd.Operands[i]; PrintOneOperand(Opnd, this->features, i); } msg(" \n"); return; } // end of SMPInstr::PrintOperands() // Complete DEBUG printing. void SMPInstr::Dump(void) const { msg("%x %d SMPitype: %d %s\n", this->address, this->SMPcmd.size, (int) this->type, this->GetDisasm()); msg("USEs: "); this->Uses.Dump(); msg("DEFs: "); this->Defs.Dump(); return; } // end of SMPInstr::Dump() // Print out the destination operand list for the instruction, given // the OptCategory for the instruction as a hint. char * SMPInstr::DestString(int OptType) { static char DestList[MAXSTR]; int RegDestCount = 0; DestList[0] = 'Z'; // Make sure there are no leftovers from last call DestList[1] = 'Z'; DestList[2] = '\0'; for (size_t DefIndex = 0; DefIndex < this->NumDefs(); ++DefIndex) { op_t DefOpnd = this->GetDef(DefIndex).GetOp(); if (DefOpnd.is_reg(X86_FLAGS_REG)) // don't print flags as a destination continue; if (o_reg == DefOpnd.type) { ushort DestReg = DefOpnd.reg; if (0 == RegDestCount) { qstrncpy(DestList, RegNames[DestReg], 1 + strlen(RegNames[DestReg])); } else { qstrncat(DestList, " ", MAXSTR); qstrncat(DestList, RegNames[DestReg], MAXSTR); } ++RegDestCount; } } if (0 >= RegDestCount) { msg("WARNING: No destination registers: %s\n", this->GetDisasm()); } else { qstrncat(DestList, " ZZ ", MAXSTR); } return DestList; } // end of SMPInstr::DestString() // Equality operator for SMPInstr. Key field is address. int SMPInstr::operator==(const SMPInstr &rhs) const { if (this->address != rhs.GetAddr()) return 0; else return 1; } // Inequality operator for SMPInstr. Key field is address. int SMPInstr::operator!=(const SMPInstr &rhs) const { return (this->address != rhs.GetAddr()); } // Less than operator for sorting SMPInstr lists. Key field is address. int SMPInstr::operator<(const SMPInstr &rhs) const { return (this->address < rhs.GetAddr()); } // Less than or equal operator for sorting SMPInstr lists. Key field is address. int SMPInstr::operator<=(const SMPInstr &rhs) const { return (this->address <= rhs.GetAddr()); } #define MD_FIRST_ENTER_INSTR NN_enterw #define MD_LAST_ENTER_INSTR NN_enterq // Is this instruction the one that allocates space on the // stack for the local variables? bool SMPInstr::MDIsFrameAllocInstr(void) const { // The frame allocating instruction should look like: // sub esp,48 or add esp,-64 etc. if ((SMPcmd.itype == NN_sub) || (SMPcmd.itype == NN_add)) { if (Defs.GetRef(0).GetOp().is_reg(R_sp)) { // We know that an addition or subtraction is being // performed on the stack pointer. This should not be // possible within the prologue except at the stack // frame allocation instruction, so return true. We // could be more robust in this analysis in the future. **!!** // CAUTION: If a compiler allocates 64 bytes for locals // and 16 bytes for outgoing arguments in a single // instruction: sub esp,80 // you cannot insist on finding sub esp,LocSize // To make this more robust, we are going to insist that // an allocation of stack space is either performed by // adding a negative immediate value, or by subtracting // a positive immediate value. We will throw in, free of // charge, a subtraction of a register, which is how alloca() // usually allocates stack space. if (o_imm == Uses.GetRef(0).GetOp().type) { signed long TempImm = (signed long) Uses.GetRef(0).GetOp().value; if (((0 > TempImm) && (SMPcmd.itype == NN_add)) || ((0 < TempImm) && (SMPcmd.itype == NN_sub))) { return true; } } else if ((o_reg == Uses.GetRef(0).GetOp().type) && (SMPcmd.itype == NN_sub)) { // alloca() ? return true; } } } else if ((SMPcmd.itype >= MD_FIRST_ENTER_INSTR) && (SMPcmd.itype <= MD_LAST_ENTER_INSTR)) { return true; } return false; } // end of SMPInstr::MDIsFrameAllocInstr() // Is this instruction in the epilogue the one that deallocates the local // vars region of the stack frame? bool SMPInstr::MDIsFrameDeallocInstr(bool UseFP, asize_t LocalVarsSize) const { // The usual compiler idiom for the prologue on x86 is to // deallocate the local var space with: mov esp,ebp // It could be add esp,constant. We can be tricked by // add esp,constant when the constant is just the stack // adjustment after a call. We will have to insist that // the immediate operand have at least the value of // LocalVarsSize for this second form, and that UseFP be true // for the first form. if (UseFP && (this->SMPcmd.itype == NN_mov) && (this->Defs.GetRef(0).GetOp().is_reg(R_sp)) && (this->Uses.GetRef(0).GetOp().is_reg(R_bp))) return true; else if ((this->SMPcmd.itype == NN_add) && (this->Defs.GetRef(0).GetOp().is_reg(R_sp)) && (this->Uses.GetRef(1).GetOp().is_imm((uval_t) LocalVarsSize))) return true; else if ((this->SMPcmd.itype == NN_add) && (this->Defs.GetRef(0).GetOp().is_reg(R_sp)) && (this->Uses.GetRef(1).GetOp().type == o_imm)) { msg("Used imprecise LocalVarsSize to find dealloc instr.\n"); return true; } else if (NN_leave == this->SMPcmd.itype) return true; else return false; } // end of SMPInstr::MDIsFrameDeallocInstr() // Is instruction a no-op? There are 1-byte, 2-byte, etc versions of no-ops. bool SMPInstr::MDIsNop(void) const { bool IsNop = false; ushort opcode = this->SMPcmd.itype; if (NN_nop == opcode) IsNop = true; else if (NN_mov == opcode) { if ((o_reg == this->SMPcmd.Operands[0].type) && this->SMPcmd.Operands[1].is_reg(this->SMPcmd.Operands[0].reg)) { // We have a register to register move with source == destination. IsNop = true; } } else if (NN_lea == opcode) { if ((o_reg == this->SMPcmd.Operands[0].type) && (o_displ == this->SMPcmd.Operands[1].type)) { // We are looking for 6-byte no-ops like lea esi,[esi+0] ushort destreg = this->SMPcmd.Operands[0].reg; if ((this->SMPcmd.Operands[1].hasSIB) && (destreg == (ushort) sib_base(this->SMPcmd.Operands[1]))) { IsNop = true; } else if (destreg == this->SMPcmd.Operands[1].reg) { IsNop = true; } } } return IsNop; } // end of SMPInstr::MDIsNop() // MACHINE DEPENDENT: Is instruction a return instruction? bool SMPInstr::MDIsReturnInstr(void) const { return ((SMPcmd.itype == NN_retn) || (SMPcmd.itype == NN_retf)); } // MACHINE DEPENDENT: Is instruction a POP instruction? #define FIRST_POP_INST NN_pop #define LAST_POP_INST NN_popfq bool SMPInstr::MDIsPopInstr(void) const { return ((SMPcmd.itype >= FIRST_POP_INST) && (SMPcmd.itype <= LAST_POP_INST)); } // MACHINE DEPENDENT: Is instruction a PUSH instruction? #define FIRST_PUSH_INST NN_push #define LAST_PUSH_INST NN_pushfq bool SMPInstr::MDIsPushInstr(void) const { return ((SMPcmd.itype >= FIRST_PUSH_INST) && (SMPcmd.itype <= LAST_PUSH_INST)); } // MACHINE DEPENDENT: Is instruction an ENTER instruction? #define FIRST_ENTER_INST NN_enterw #define LAST_ENTER_INST NN_enterq bool SMPInstr::MDIsEnterInstr(void) const { return ((SMPcmd.itype >= FIRST_ENTER_INST) && (SMPcmd.itype <= LAST_ENTER_INST)); } // MACHINE DEPENDENT: Is instruction a LEAVE instruction? #define FIRST_LEAVE_INST NN_leavew #define LAST_LEAVE_INST NN_leaveq bool SMPInstr::MDIsLeaveInstr(void) const { return ((SMPcmd.itype >= FIRST_LEAVE_INST) && (SMPcmd.itype <= LAST_LEAVE_INST)); } // MACHINE DEPENDENT: Does instruction use a callee-saved register? bool SMPInstr::MDUsesCalleeSavedReg(void) const { for (size_t index = 0; index < this->Uses.GetSize(); ++index) { op_t CurrUse = this->GetUse(index).GetOp(); if (CurrUse.is_reg(R_bp) || CurrUse.is_reg(R_si) || CurrUse.is_reg(R_di) || CurrUse.is_reg(R_bx)) { return true; } } return false; } // end of SMPInstr::MDUsesCalleeSavedReg() // Is the instruction a register to register copy of a stack pointer or frame pointer // into a general purpose register (which mmStrata will now need to track as a stack // relative pointer)? bool SMPInstr::MDIsStackPointerCopy(bool UseFP) const { if ((this->OptType == 3) && (this->GetDef(0).GetOp().type == o_reg) && (!(this->GetDef(0).GetOp().is_reg(R_sp)))) { if (UseFP) { if (this->GetUse(0).GetOp().is_reg(R_bp)) // Move of base pointer EBP into a general register return true; else if ((this->GetUse(0).GetOp().is_reg(R_sp)) && !(this->GetDef(0).GetOp().is_reg(R_bp))) // Move of ESP into something besides a base pointer return true; } else if (this->GetUse(0).GetOp().is_reg(R_sp)) { // Move of ESP into a register; no base pointer used in this function return true; } } return false; } // end of SMPInstr::MDIsStackPointerCopy() // Is instruction a branch (conditional or unconditional) to a // code target that is not in the current chunk? bool SMPInstr::IsBranchToFarChunk(void) const { func_t *CurrChunk = get_fchunk(this->address); bool FarBranch = false; if ((JUMP | COND_BRANCH) & this->GetDataFlowType()) { // Instruction is a direct branch, conditional or unconditional if (this->NumUses() > 0) { op_t JumpTarget = this->GetUse(0).GetOp(); if ((o_near == JumpTarget.type) || (o_far == JumpTarget.type)) { // Branches to a code address func_t *TargetChunk = get_fchunk(JumpTarget.addr); // Is target address within the same chunk as the branch? FarBranch = (NULL == TargetChunk) || (CurrChunk->startEA != TargetChunk->startEA); } } } return FarBranch; } // end of SMPInstr::IsBranchToFarChunk() // Analyze the instruction and its operands. void SMPInstr::Analyze(void) { if (this->analyzed) return; // Fill cmd structure with disassembly of instr ua_ana0(this->address); // Get the instr disassembly text. (void) generate_disasm_line(this->address, this->disasm, sizeof(this->disasm) - 1); // Remove interactive color-coding tags. tag_remove(this->disasm, this->disasm, 0); // Copy cmd to member variable SMPcmd. this->SMPcmd = cmd; // Get the canonical features into member variables features. this->features = cmd.get_canon_feature(); // Record what type of instruction this is, simplified for the needs // of data flow and type analysis. this->type = DFACategory[cmd.itype]; // Record optimization category. this->OptType = OptCategory[cmd.itype]; // Build the DEF and USE lists for the instruction. this->BuildSMPDefUseLists(); // Fix up machine dependent quirks in the def and use lists. this->MDFixupDefUseLists(); // Erase any duplicate references we just added by accident. this->Uses.EraseDuplicates(); this->Defs.EraseDuplicates(); // Set the type (NUMERIC or POINTER) of the DEFs and USEs if possible to determine // without context from other instructions. this->MDAnalyzeDefType(); this->MDAnalyzeUseType(); // Determine whether the instruction is a jump target by looking // at its cross references and seeing if it has "TO" code xrefs. xrefblk_t xrefs; for (bool ok = xrefs.first_to(this->address, XREF_FAR); ok; ok = xrefs.next_to()) { if ((xrefs.from != 0) && (xrefs.iscode)) { this->JumpTarget = true; break; } } this->analyzed = true; return; } // end of SMPInstr::Analyze() // Fill the Defs and Uses private data members. void SMPInstr::BuildSMPDefUseLists(void) { size_t OpNum; this->Defs.clear(); this->Uses.clear(); // Start with the Defs. for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { if (this->features & DefMacros[OpNum]) { // DEF op_t TempOp = this->SMPcmd.Operands[OpNum]; if (MDKnownOperandType(TempOp)) this->Defs.SetRef(TempOp); } } // end for (OpNum = 0; ...) // Now, do the Uses. Uses have special case operations, because // any memory operand could have register uses in the addressing // expression, and we must create Uses for those registers. For // example: mov eax,[ebx + esi*2 + 044Ch] // This is a two-operand instruction with one def: eax. But // there are three uses: [ebx + esi*2 + 044Ch], ebx, and esi. // The first use is an op_t of type o_phrase (memory phrase), // which can be copied from cmd.Operands[1]. Likewise, we just // copy cmd.Operands[0] into the defs list. However, we must create // op_t types for register ebx and register esi and append them // to the Uses list. This is handled by the machine dependent // method MDFixupDefUseLists(). for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { if (this->features & UseMacros[OpNum]) { // USE op_t TempOp = this->SMPcmd.Operands[OpNum]; if (MDKnownOperandType(TempOp)) this->Uses.SetRef(TempOp); } } // end for (OpNum = 0; ...) return; } // end of SMPInstr::BuildSMPDefUseLists() // If DefReg is not already in the DEF list, add a DEF for it. void SMPInstr::MDAddRegDef(ushort DefReg, bool Shown) { bool AlreadySet = false; for (size_t DefIndex = 0; DefIndex < this->NumDefs(); ++DefIndex) { if (this->GetDef(DefIndex).GetOp().is_reg(DefReg)) { AlreadySet = true; break; } } if (!AlreadySet) { op_t TempDef; TempDef.type = o_reg; TempDef.reg = DefReg; if (Shown) TempDef.set_showed(); else TempDef.clr_showed(); this->Defs.SetRef(TempDef); } return; } // end of SMPInstr::MDAddRegDef() // If UseReg is not already in the USE list, add a USE for it. void SMPInstr::MDAddRegUse(ushort UseReg, bool Shown) { bool AlreadyUsed = false; for (size_t UseIndex = 0; UseIndex < this->NumUses(); ++UseIndex) { if (this->GetUse(UseIndex).GetOp().is_reg(UseReg)) { AlreadyUsed = true; break; } } if (!AlreadyUsed) { op_t TempUse; TempUse.type = o_reg; TempUse.reg = UseReg; if (Shown) TempUse.set_showed(); else TempUse.clr_showed(); this->Uses.SetRef(TempUse); } return; } // end of SMPInstr::MDAddRegUse() // Perform machine dependent ad hoc fixes to the def and use lists. // For example, some multiply and divide instructions in x86 implicitly // use and/or define register EDX. For memory phrase examples, see comment // in BuildSMPDefUseLists(). void SMPInstr::MDFixupDefUseLists(void) { // First, handle the uses hidden in memory addressing modes. Note that we do not // care whether we are dealing with a memory destination operand or source // operand, because register USEs, not DEFs, happen within the addressing expressions. size_t OpNum; for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { op_t Opnd = SMPcmd.Operands[OpNum]; if ((Opnd.type == o_phrase) || (Opnd.type == o_displ) || (Opnd.type == o_mem)) { if (Opnd.hasSIB) { int BaseReg = sib_base(Opnd); short IndexReg = sib_index(Opnd); if (R_none != BaseReg) { op_t BaseOpnd = Opnd; // Init to current operand field values BaseOpnd.type = o_reg; // Change type and reg fields BaseOpnd.reg = BaseReg; BaseOpnd.hasSIB = 0; BaseOpnd.set_showed(); this->Uses.SetRef(BaseOpnd); if (BaseOpnd.is_reg(R_bp)) { msg("WARNING: EBP base register in SIB: %s\n", this->GetDisasm()); } } else { msg("WARNING: R_none base register in SIB: %s\n", this->GetDisasm()); } if (R_none != IndexReg) { // Should we disallow R_sp here? **!!** op_t IndexOpnd = Opnd; // Init to current operand field values IndexOpnd.type = o_reg; // Change type and reg fields IndexOpnd.reg = IndexReg; IndexOpnd.hasSIB = 0; IndexOpnd.set_showed(); this->Uses.SetRef(IndexOpnd); if (IndexOpnd.is_reg(R_sp)) { msg("WARNING: ESP index register in SIB: %s\n", this->GetDisasm()); } } } else { // no SIB byte; can have base reg but no index reg ushort BaseReg = Opnd.reg; // cannot be R_none for no SIB case op_t BaseOpnd = Opnd; // Init to current operand field values BaseOpnd.type = o_reg; // Change type and reg fields BaseOpnd.reg = BaseReg; BaseOpnd.hasSIB = 0; BaseOpnd.set_showed(); this->Uses.SetRef(BaseOpnd); } } // end if (o_phrase or o_displ operand) } // end for (all operands) // Now, handle special instruction categories that have implicit operands. if (NN_cmpxchg == this->SMPcmd.itype) { // x86 Compare and Exchange conditionally sets EAX. We must keep data flow analysis // sound by declaring that EAX is always a DEF. this->MDAddRegDef(R_ax, false); } // end if NN_cmpxchg else if (this->MDIsPopInstr() || this->MDIsPushInstr() || this->MDIsReturnInstr()) { // IDA does not include the stack pointer in the DEFs or USEs. this->MDAddRegDef(R_sp, false); this->MDAddRegUse(R_sp, false); } else if (this->MDIsEnterInstr() || this->MDIsLeaveInstr()) { // Entire function prologue or epilogue microcoded. this->MDAddRegDef(R_sp, false); this->MDAddRegUse(R_sp, false); this->MDAddRegDef(R_bp, false); this->MDAddRegUse(R_bp, false); } else if (8 == this->GetOptType()) { // This category implicitly writes to EDX:EAX. this->MDAddRegDef(R_dx, false); this->MDAddRegDef(R_ax, false); } // end else if (8 == GetOptType) else if (7 == this->GetOptType()) { // Category 7 instructions sometimes write implicitly to EDX:EAX or DX:AX. // DX is the same as EDX to IDA Pro (and SMP); ditto for EAX and AX. // DIV, IDIV, and MUL all have hidden EAX or AX operands (hidden in the IDA Pro // sense, because they are not displayed in the disassembly text). For example: // mul ebx means EDX:EAX <-- EAX*EBX, and mul bx means DX:AX <-- AX*BX. If the // source operand is only 8 bits wide, there is room to hold the result in AX // without using DX: mul bl means AX <-- AL*BL. // IMUL has forms with a hidden EAX or AX operand and forms with no implicit // operands: imul ebx means EDX:EAX <-- EAX*EBX, but imul ebx,edx means that // EBX*EDX gets truncated and the result placed in EBX (no hidden operands). bool HiddenEAXUse = false; for (size_t UseIndex = 0; UseIndex < this->NumUses(); ++UseIndex) { op_t TempUse = this->GetUse(UseIndex).GetOp(); if (!TempUse.showed()) { // hidden operand if (TempUse.is_reg(R_ax)) { // not R_al, so it is not 8 bits this->MDAddRegUse(R_dx, false); this->MDAddRegDef(R_ax, false); this->MDAddRegDef(R_dx, false); } } } } // end else if (7 == OptType) // Next, add the flags register to the DEFs and USEs for those instructions that // are marked as defining or using flags. if (this->type == COND_BRANCH) { assert(SMPUsesFlags[this->SMPcmd.itype]); } if (SMPDefsFlags[this->SMPcmd.itype]) { this->MDAddRegDef(X86_FLAGS_REG, false); } if (SMPUsesFlags[this->SMPcmd.itype]) { this->MDAddRegUse(X86_FLAGS_REG, false); } #if 1 if (this->MDIsNop()) { // Clear the DEFs and USEs for no-ops. this->Defs.clear(); this->Uses.clear(); } #endif return; } // end of SMPInstr::MDFixupDefUseLists() // Set the type (NUMERIC or POINTER) of DEFs for this instruction if the type can // be determined from the OptType (optimization category). We can also set all // DEFs of the CPU flags to NUMERIC as they cannot be POINTER. void SMPInstr::MDAnalyzeDefType(void) { size_t index; // Optimization category 6 always produces a POINTER result, while categories // 2, 7, 8, and 9 always produce NUMERIC results. Categories 0 and 1 produce // different results for different opcodes. if (this->OptType == 6) { for (index = 0; index < this->Defs.GetSize(); ++index) { this->Defs.SetType(index, POINTER); } } else if ((this->OptType == 2) || (this->OptType >= 7 && this->OptType <= 9)) { for (index = 0; index < this->Defs.GetSize(); ++index) { this->Defs.SetType(index, NUMERIC); } } // Now, set all flags DEFs to NUMERIC. vector<DefOrUse>::iterator DefIter; index = 0; for (DefIter = this->Defs.GetFirstRef(); DefIter != this->Defs.GetLastRef(); ++DefIter) { op_t TempOp = DefIter->GetOp(); if (TempOp.is_reg(X86_FLAGS_REG)) { this->Defs.SetType(index, NUMERIC); } ++index; } return; } // end of SMPInstr::MDAnalyzeDefType() // Set the type (NUMERIC or POINTER) of USEs for this instruction if the type can // be determined from the OptType (optimization category). We can also set all // USEs of the CPU flags to NUMERIC as they cannot be POINTER. void SMPInstr::MDAnalyzeUseType(void) { size_t index; // We start out with a clone of the NUMERIC part of the corresponding function for DEFs. // The key idea is that if an instruction produces a NUMERIC result, its USES were // NUMERIC within this instruction. Thus, if an exclusive-or instruction produces a // result of type NUMERIC, its source operands were being used as numeric values // regardless of their shadow metadata type. That does not mean that we can change // the metadata to NUMERIC. You can certainly load a POINTER and use it as a NUMERIC, // an example being the hash function computation that hashes an address into a numeric // hash table index. What it does mean is that if all USEs for a particular SSA DEF // are NUMERIC, then there is no point in looking up the metadata for this variable // anywhere in this def-use chain. We can emit optimizing annotations when a complete // USE chain for a particular DEF is numeric. // Optimization categories 2, 7, 8, and 9 always produce NUMERIC results. // Categories 0 and 1 produce different results for different opcodes. if ((this->OptType == 2) || (this->OptType >= 7 && this->OptType <= 9)) { for (index = 0; index < this->Uses.GetSize(); ++index) { this->Uses.SetType(index, NUMERIC); } } // Now, set all flags USEs to NUMERIC. vector<DefOrUse>::iterator UseIter; index = 0; for (UseIter = this->Uses.GetFirstRef(); UseIter != this->Uses.GetLastRef(); ++UseIter) { op_t TempOp = UseIter->GetOp(); if (TempOp.is_reg(X86_FLAGS_REG)) { this->Uses.SetType(index, NUMERIC); } ++index; } return; } // end of SMPInstr::MDAnalyzeUseType() // Handle x86 opcode SIB byte annotations. void SMPInstr::MDAnnotateSIBStackConstants(FILE *AnnotFile, op_t Opnd, ea_t offset, bool UseFP) { int BaseReg = sib_base(Opnd); short IndexReg = sib_index(Opnd); if (BaseReg == R_none) { msg("BaseReg of R_none at %x\n", this->address); } if (BaseReg == R_sp) { // ESP cannot be IndexReg // ESP-relative constant offset qfprintf(AnnotFile, "%x %d PTRIMMEDESP STACK %d displ %s\n", this->SMPcmd.ea, this->SMPcmd.size, offset, this->disasm); } else if (UseFP && ((IndexReg == R_bp) || (BaseReg == R_bp))) { // EBP-relative constant offset qfprintf(AnnotFile, "%x %d PTRIMMEDEBP STACK %d displ %s\n", this->SMPcmd.ea, this->SMPcmd.size, offset, this->disasm); } return; } // end of MDAnnotateSIBStackConstants // Emit annotations for constants used as ptr offsets from EBP or // ESP into the stack frame. Only pay attention to EBP-relative // offsets if EBP is being used as a frame pointer (UseFP == true). void SMPInstr::AnnotateStackConstants(bool UseFP, FILE *AnnotFile) { op_t Opnd; #if 0 if (this->address == 0x80925f4) { msg("PROBLEM INSTRUCTION: \n"); this->PrintOperands(); } #endif for (int i = 0; i < UA_MAXOP; ++i) { Opnd = SMPcmd.Operands[i]; if (Opnd.type == o_displ) { ea_t offset = Opnd.addr; if (Opnd.hasSIB) { MDAnnotateSIBStackConstants(AnnotFile, Opnd, offset, UseFP); } else { // no SIB ushort BaseReg = Opnd.reg; if (BaseReg == R_sp) { // ESP-relative constant offset qfprintf(AnnotFile, "%x %d PTRIMMEDESP STACK %d displ %s\n", SMPcmd.ea, SMPcmd.size, offset, disasm); } else if (UseFP && (BaseReg == R_bp)) { // EBP-relative constant offset qfprintf(AnnotFile, "%x %d PTRIMMEDEBP STACK %d displ %s\n", SMPcmd.ea, SMPcmd.size, offset, disasm); } } // end if (Opnd.hasSIB) ... else ... } // end if (Opnd.type == o_displ) else if (Opnd.type == o_phrase) { ea_t offset = 0; // mmStrata thinks [esp] is [esp+0] if (Opnd.hasSIB) { MDAnnotateSIBStackConstants(AnnotFile, Opnd, offset, UseFP); } else { // Something like [ecx] ushort BaseReg = Opnd.reg; if (BaseReg == R_sp) { // ESP-relative constant offset qfprintf(AnnotFile, "%x %d PTRIMMEDESP STACK %d displ %s\n", SMPcmd.ea, SMPcmd.size, offset, disasm); } else if (UseFP && (BaseReg == R_bp)) { // EBP-relative constant offset qfprintf(AnnotFile, "%x %d PTRIMMEDEBP STACK %d displ %s\n", SMPcmd.ea, SMPcmd.size, offset, disasm); } } // end if (Opnd.hasSIB) ... else ... } // end else if (Opnd.type == o_phrase) } // end for all operands // If we move a stack pointer or frame pointer into another register, we // need to annotate the implicit zero offset, e.g. mov edi,esp == mov edi,esp+0 // and edi is becoming a stack pointer that mmStrata needs to track. if (this->MDIsStackPointerCopy(UseFP)) { if (UseFP && this->GetUse(0).GetOp().is_reg(R_bp)) { qfprintf(AnnotFile, "%x %d PTRIMMEDEBP STACK 0 displ %s\n", SMPcmd.ea, SMPcmd.size, disasm); } else { qfprintf(AnnotFile, "%x %d PTRIMMEDESP STACK 0 displ %s\n", SMPcmd.ea, SMPcmd.size, disasm); } } return; } // end of SMPInstr::AnnotateStackConstants() // Emit all annotations for the instruction. void SMPInstr::EmitAnnotations(bool UseFP, bool AllocSeen, FILE *AnnotFile) { ea_t addr = this->address; flags_t InstrFlags = getFlags(addr); bool MemDest = this->HasDestMemoryOperand(); bool MemSrc = this->HasSourceMemoryOperand(); bool SecondSrcOperandNum = this->IsSecondSrcOperandNumeric(InstrFlags); ++OptCount[OptType]; // keep count for debugging info #if SMP_DEBUG_MEM if (MemDest || MemSrc) { msg("OptType: %d %s", OptType, disasm); this->PrintOperands(); } #endif // Emit appropriate optimization annotations. bool SDTInstrumentation = false; switch (OptType) { case 0: // SDT will have to handle these { #if SMP_DEBUG_TYPE0 msg("OptType 0: %x %s\n", addr, disasm); #endif // mmStrata wants to suppress warnings on the PUSH // instructions that precede the LocalVarsAllocInstr // (i.e. the PUSHes of callee-saved regs). if (!AllocSeen && this->MDIsPushInstr()) { qfprintf(AnnotFile, "%x %d INSTR LOCAL NoWarn %s \n", addr, -3, disasm); } else { SDTInstrumentation = true; } break; } case 1: // nothing for SDT to do { qfprintf(AnnotFile, "%x %d INSTR LOCAL NoMetaUpdate %s \n", addr, -1, disasm); ++AnnotationCount[OptType]; break; } case 4: // INC, DEC, etc.: no SDT work unless MemDest { if (MemDest || MemSrc) { SDTInstrumentation = true; break; // treat as category 0 } qfprintf(AnnotFile, "%x %d INSTR LOCAL Always1stSrc %s \n", addr, -1, disasm); ++AnnotationCount[OptType]; break; } case 5: // ADD, etc.: If numeric 2nd src operand, no SDT work. { if (MemDest || MemSrc) { SDTInstrumentation = true; break; // treat as category 0 } if (SecondSrcOperandNum) { // treat as category 1 qfprintf(AnnotFile, "%x %d INSTR LOCAL %s %s \n", addr, -1, OptExplanation[OptType], disasm); ++AnnotationCount[OptType]; } break; } case 6: // Only OS code should include these; problem for SDT { if (MemDest) { SDTInstrumentation = true; break; // treat as category 0 } qfprintf(AnnotFile, "%x %d INSTR LOCAL AlwaysPTR %s \n", addr, -OptType, disasm); ++AnnotationCount[OptType]; break; } case 8: // Implicitly writes to EDX:EAX, always numeric. { qfprintf(AnnotFile, "%x %d INSTR LOCAL n EDX EAX ZZ %s %s \n", addr, -2, OptExplanation[OptType], disasm); ++AnnotationCount[OptType]; SDTInstrumentation = true; break; } case 9: // Either writes to FP reg (cat. 1) or memory (cat. 0) { if (MemDest) { #if SMP_DEBUG // MemDest seems to happen too much. msg("Floating point MemDest: %s \n", disasm); #endif SDTInstrumentation = true; break; // treat as category 0 } qfprintf(AnnotFile, "%x %d INSTR LOCAL %s %s \n", addr, -1, OptExplanation[OptType], disasm); ++AnnotationCount[OptType]; break; } default: // 2,3,7: Optimization possibilities depend on operands { #if SMP_DEBUG2 if (OptType == 3) { // MOV instr class if (MemDest) { msg("MemDest on MOV: %s\n", disasm); } else if (!SecondSrcOperandNum) { msg("MOV: not 2nd op numeric: %s\n", disasm); this->PrintOperands(); } } #endif SDTInstrumentation = true; if (MemDest) { #if SMP_DEBUG_XOR if (OptType == 2) msg("MemDest on OptType 2: %s\n", disasm); #endif break; // treat as category 0 } if ((OptType == 2) || (OptType == 7) || SecondSrcOperandNum) { qfprintf(AnnotFile, "%x %d INSTR LOCAL n %s %s %s \n", addr, -2, this->DestString(OptType), OptExplanation[OptType], disasm); ++AnnotationCount[OptType]; } break; } } // end switch (OptType) // If mmStrata is going to have to deal with the // instruction, then we can annotate EBP and ESP // relative constant offsets. If we have emitted // an annotation of type -1, there is no point // in telling mmStrata about these constants. if (SDTInstrumentation) { this->AnnotateStackConstants(UseFP, AnnotFile); if (strlen(this->DeadRegsString) > 0) { // Optimize by informing mmStrata of dead registers. It can avoid saving // and restoring dead state. This is particularly important for EFLAGS, // as restoring the flags is a pipeline serializing instruction. qfprintf(AnnotFile, "%x %d INSTR DEADREGS %s ZZ %s \n", addr, this->SMPcmd.size, this->DeadRegsString, disasm); } } return; } // end of SMPInstr::EmitAnnotations()