/* * SMPInstr.cpp - <see below>. * * Copyright (c) 2000, 2001, 2010 - University of Virginia * * This file is part of the Memory Error Detection System (MEDS) infrastructure. * This file may be used and modified for non-commercial purposes as long as * all copyright, permission, and nonwarranty notices are preserved. * Redistribution is prohibited without prior written consent from the University * of Virginia. * * Please contact the authors for restrictions applying to commercial use. * * THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. * * Author: University of Virginia * e-mail: jwd@virginia.com * URL : http://www.cs.virginia.edu/ * * Additional copyrights 2010, 2011 by Zephyr Software LLC * e-mail: {clc,jwd}@zephyr-software.com * URL : http://www.zephyr-software.com/ * */ // // SMPInstr.cpp // // This module performs the instruction level analyses needed for the // SMP project (Software Memory Protection). // using namespace std; #include <string> #include <cstring> #include <pro.h> #include <assert.h> #include <ua.hpp> #include <ida.hpp> #include <idp.hpp> #include <allins.hpp> #include <auto.hpp> #include <bytes.hpp> #include <funcs.hpp> #include <intel.hpp> #include <loader.hpp> #include <lines.hpp> #include <name.hpp> #include "SMPDBInterface.h" #include "SMPStaticAnalyzer.h" #include "SMPDataFlowAnalysis.h" #include "SMPInstr.h" #include "SMPProgram.h" #include "ProfilerInformation.h" // Set to 1 for debugging output #define SMP_DEBUG 1 #define SMP_DEBUG2 0 // verbose #define SMP_DEBUG_XOR 0 #define SMP_DEBUG_BUILD_RTL 1 // should be left on, serious errors! #define SMP_VERBOSE_DEBUG_BUILD_RTL 0 #define SMP_VERBOSE_DEBUG_BUILD_RTL_DEF_USE 0 #define SMP_VERBOSE_DEBUG_INFER_TYPES 0 #define SMP_VERBOSE_DUMP 0 #define SMP_VERBOSE_FIND_POINTERS 0 #define SMP_CALL_TRASHES_REGS 1 // Add DEFs of caller-saved regs to CALL instructions #define SMP_BASEREG_POINTER_TYPE 1 // Initialize Base Register USEs to type POINTER? #define SMP_OPTIMIZE_ADD_TO_NUMERIC 0 // optimizing annotation type -5 #define SMP_IDENTIFY_POINTER_ADDRESS_REG 0 // optimizing annotation POINTER #define SMP_CHILDACCESS_ALL_CODE 0 // CHILDACCESS annotations for all funcs, or just analyzed funcs? #define SPECIAL_CASE_CARRY_BORROW 0 // Treat sbb/adc different from sub/add annotations? #define SMP_BUILD_SPECIAL_ADC_SBB_RTL 0 // Explicit RTL subtree for carry flag? #define SMP_AGGRESSIVE_TYPE_INFERENCE 1 // Shorten iterations by quick propagation in InferOperatorType() #define SMP_ANNOTATE_ALL_MEMORY_OPERANDS 0 // Info annotation for all memory read and write operands? // Make the CF_CHG1 .. CF_CHG6 and CF_USE1..CF_USE6 macros more usable // by allowing us to pick them up with an array index. static ulong DefMacros[UA_MAXOP] = {CF_CHG1, CF_CHG2, CF_CHG3, CF_CHG4, CF_CHG5, CF_CHG6}; static ulong UseMacros[UA_MAXOP] = {CF_USE1, CF_USE2, CF_USE3, CF_USE4, CF_USE5, CF_USE6}; // Text to be printed in each optimizing annotation explaining why // the annotation was emitted. static const char *OptExplanation[LAST_TYPE_CATEGORY + 1] = { "NoOpt", "NoMetaUpdate", "AlwaysNUM", "NUMVia2ndSrcIMMEDNUM", "Always1stSrc", "1stSrcVia2ndSrcIMMEDNUM", "AlwaysPtr", "AlwaysNUM", "AlwaysNUM", "NUMViaFPRegDest", "NumericSources", "StackMemoryTracking", "NumericSources", "NumericMemDest", "NeverMemDest", "SafeIfNoIndexing" }; static const char *OperatorText[LAST_SMP_OPERATOR + 1] = { "SMP_NULL_OPERATOR", "SMP_CALL", "SMP_INPUT", "SMP_OUTPUT", "SMP_ADDRESS_OF", "SMP_U_LEFT_SHIFT", "SMP_S_LEFT_SHIFT", "SMP_U_RIGHT_SHIFT", "SMP_S_RIGHT_SHIFT", "SMP_ROTATE_LEFT", "SMP_ROTATE_LEFT_CARRY", "SMP_ROTATE_RIGHT", "SMP_ROTATE_RIGHT_CARRY", "SMP_DECREMENT", "SMP_INCREMENT", "SMP_ADD", "SMP_ADD_CARRY", "SMP_SUBTRACT", "SMP_SUBTRACT_BORROW", "SMP_U_MULTIPLY", "SMP_S_MULTIPLY", "SMP_U_DIVIDE", "SMP_S_DIVIDE", "SMP_U_REMAINDER", "SMP_SIGN_EXTEND", "SMP_ZERO_EXTEND", "SMP_ASSIGN", "SMP_BITWISE_AND", "SMP_BITWISE_OR", "SMP_BITWISE_NOT", "SMP_BITWISE_XOR", "SMP_BITWISE_AND_NOT", "SMP_NEGATE", "SMP_S_COMPARE", "SMP_U_COMPARE", "SMP_LESS_THAN", "SMP_GREATER_THAN", "SMP_LESS_EQUAL", "SMP_GREATER_EQUAL", "SMP_EQUAL", "SMP_NOT_EQUAL", "SMP_LOGICAL_AND", "SMP_LOGICAL_OR", "SMP_UNARY_NUMERIC_OPERATION", "SMP_BINARY_NUMERIC_OPERATION", "SMP_SYSTEM_OPERATION", "SMP_UNARY_FLOATING_ARITHMETIC", "SMP_BINARY_FLOATING_ARITHMETIC", "SMP_REVERSE_SHIFT_U", "SMP_SHUFFLE", "SMP_COMPARE_EQ_AND_SET", "SMP_COMPARE_GT_AND_SET", "SMP_PACK_SIGNED", "SMP_PACK_UNSIGNED", "SMP_AVERAGE_UNSIGNED", "SMP_MULTIPLY_AND_ADD", "SMP_SUM_OF_DIFFS", "SMP_MAX_S", "SMP_MAX_U", "SMP_MIN_S", "SMP_MIN_U", "SMP_ABS_VALUE", "SMP_INTERLEAVE", "SMP_CONCATENATE" }; // Does the CurrOperator definitely indicate a signed or unsigned operation? bool OperatorHasSignedness(SMPoperator CurrOperator) { bool DetectedSignedness; switch (CurrOperator) { case SMP_NULL_OPERATOR: DetectedSignedness = false; break; case SMP_CALL: // CALL instruction DetectedSignedness = true; break; case SMP_INPUT: // input from port case SMP_OUTPUT: // output to port case SMP_ADDRESS_OF: // take effective address DetectedSignedness = false; break; case SMP_U_LEFT_SHIFT: // unsigned left shift case SMP_U_RIGHT_SHIFT: // unsigned right shift case SMP_ROTATE_LEFT: case SMP_ROTATE_LEFT_CARRY: // rotate left through carry case SMP_ROTATE_RIGHT: case SMP_ROTATE_RIGHT_CARRY: // rotate right through carry case SMP_U_MULTIPLY: case SMP_U_DIVIDE: case SMP_U_REMAINDER: case SMP_ZERO_EXTEND: case SMP_BITWISE_NOT: // unary operator case SMP_BITWISE_XOR: case SMP_BITWISE_AND_NOT: case SMP_U_COMPARE: // unsigned compare (AND-based) DetectedSignedness = true; break; case SMP_S_LEFT_SHIFT: // signed left shift case SMP_S_RIGHT_SHIFT: // signed right shift case SMP_S_MULTIPLY: case SMP_S_DIVIDE: case SMP_SIGN_EXTEND: case SMP_NEGATE: // unary negation case SMP_S_COMPARE: // signed compare (subtraction-based) case SMP_LESS_THAN: // boolean test operators case SMP_GREATER_THAN: case SMP_LESS_EQUAL: case SMP_GREATER_EQUAL: DetectedSignedness = true; break; case SMP_DECREMENT: case SMP_INCREMENT: case SMP_ADD: case SMP_ADD_CARRY: // add with carry case SMP_SUBTRACT: case SMP_SUBTRACT_BORROW: // subtract with borrow case SMP_ASSIGN: case SMP_BITWISE_AND: case SMP_BITWISE_OR: case SMP_EQUAL: case SMP_NOT_EQUAL: case SMP_LOGICAL_AND: case SMP_LOGICAL_OR: case SMP_UNARY_NUMERIC_OPERATION: // miscellaneous; produces NUMERIC result case SMP_BINARY_NUMERIC_OPERATION: // miscellaneous; produces NUMERIC result case SMP_SYSTEM_OPERATION: // for instructions such as CPUID, RDTSC, etc.; NUMERIC DetectedSignedness = false; break; case SMP_UNARY_FLOATING_ARITHMETIC: // all the same to our type system; all NUMERIC case SMP_BINARY_FLOATING_ARITHMETIC: // all the same to our type system; all NUMERIC DetectedSignedness = true; break; case SMP_REVERSE_SHIFT_U: // Shift right operand by bit count in left operand case SMP_SHUFFLE: // Shuffle bytes, words, etc. within destination operation per source mask case SMP_COMPARE_EQ_AND_SET: // Compare for equality and set fields to all 1's or all 0's case SMP_COMPARE_GT_AND_SET: // Compare for greater-than and set fields to all 1's or all 0's case SMP_PACK_S: // Pack operands into extended-precision register, signed saturation for loss of precision case SMP_PACK_U: // Pack operands into extended-precision register, unsigned saturation for loss of precision case SMP_AVERAGE_U: // Average of unsigned operands case SMP_MULTIPLY_AND_ADD: // multiply and add (or multiply and accumulate) case SMP_SUM_OF_DIFFS: // sum over two vectors of absolute values of differences of their elements case SMP_INTERLEAVE: // extended-precision interleaving of bytes or words or dwords etc.; NUMERIC case SMP_CONCATENATE: // extended-precision concatenation; NUMERIC DetectedSignedness = true; break; default: DetectedSignedness = false; SMP_msg("ERROR: Unknown operator in OperatorHasSignedness: %d\n", CurrOperator); break; } // end switch on operator return DetectedSignedness; } // end of OperatorHasSignedness() // ***************************************************************** // Class SMPGuard // ***************************************************************** // Constructor SMPGuard::SMPGuard(void) { this->LeftOperand.type = o_void; this->RightOperand.type = o_void; this->GuardOp = SMP_NULL_OPERATOR; return; } // Debug print void SMPGuard::Dump(void) { SMP_msg("GUARD: "); PrintOperand(this->LeftOperand); SMP_msg(" %s ", OperatorText[this->GuardOp]); PrintOperand(this->RightOperand); SMP_msg(":"); return; } // end of SMPGuard::Dump() // ***************************************************************** // Class SMPRegTransfer // ***************************************************************** // Constructor SMPRegTransfer::SMPRegTransfer(void) { this->Guard = NULL; this->LeftOperand.type = o_void; this->RightOperand.type = o_void; this->RTop.oper = SMP_NULL_OPERATOR; this->RTop.type = UNINIT; #if SMP_TRACK_NONSPEC_OPER_TYPE this->RTop.NonSpeculativeType = UNINIT; #endif this->booleans1 = 0; this->RightRT = NULL; return; } // Destructor SMPRegTransfer::~SMPRegTransfer() { #if 0 SMP_msg("Destroying SMPRegTransfer.\n"); #endif if (NULL != this->RightRT) delete this->RightRT; if (NULL != this->Guard) delete this->Guard; return; } // Compute operand-dependent change in stack pointer value. sval_t SMPRegTransfer::ComputeStackPointerAlteration(bool IsLeaveInstr, sval_t IncomingDelta, sval_t FramePtrDelta) { sval_t delta = 0; // Search for the pattern: stack_pointer := ... if (SMP_ASSIGN == this->GetOperator()) { op_t DefOp = this->GetLeftOperand(); if (DefOp.is_reg(MD_STACK_POINTER_REG)) { // We have the code pattern stack_pointer := ... // We expect to find an overall RT structure of: // stack_pointer := stack_pointer binaryoperator constant // If the binaryoperator is addition or subtraction, then // we use the constant to determine our return value. // If the binaryoperator is a bitwise AND, then we return // a special code. Delta is unknown but can probably be treated // as zero, assuming that no stack-pointer-relative accesses to // the stack region above the current stack pointer occur later. // If we have a non-constant operand, we return an error code // to indicate that we cannot compute the delta. // If we don't have a binary operator, we return an error code, // unless we find a stack frame deallocation: esp := ebp if (!this->HasRightSubTree()) { op_t UseOp = this->GetRightOperand(); if (UseOp.is_reg(MD_FRAME_POINTER_REG)) { // !!!!****!!!! Should validate that frame pointer reg is used as frame pointer // Found esp := ebp (deallocation of stack frame) delta = FramePtrDelta - IncomingDelta; assert(delta >= 0); // e.g. -4 - -12 => +8 for an 8-byte frame } else { // !!!!****!!!!**** Need to look up deltas for registers that // were used to hold a copy of the stack pointer. // For now, just consider it an error. delta = (sval_t) SMP_STACK_DELTA_ERROR_CODE; } } else { SMPRegTransfer *RightRT = this->GetRightTree(); SMPoperator RightOperator = RightRT->GetOperator(); op_t RightDefOp = RightRT->GetLeftOperand(); if (IsLeaveInstr) { // Found the RT esp := ebp + 4 that adjusts the stack pointer // for an x86 LEAVE instruction. esp := ebp is equivalent // to esp := <esp when frame ptr was set up>, so delta becomes // <esp delta when frame ptr was set up> - IncomingDelta + 4. assert(SMP_ADD == RightOperator); assert(!RightRT->HasRightSubTree()); assert(RightDefOp.is_reg(X86_FRAME_POINTER_REG)); delta = 4 + FramePtrDelta - IncomingDelta; } else if (RightRT->HasRightSubTree()) { // Not the right code pattern; unknown effect on stack pointer. delta = (sval_t) SMP_STACK_DELTA_ERROR_CODE; } else if (!RightDefOp.is_reg(MD_STACK_POINTER_REG)) { // We have stack_pointer := something1 operator something2 // where something1 is not the stack_pointer. It might be // the frame pointer, e.g. SP := FP-12. We can compute this // stack delta because we know the delta when the FP was set up. op_t RightUseOp = RightRT->GetRightOperand(); if (RightDefOp.is_reg(MD_FRAME_POINTER_REG) && (o_imm == RightUseOp.type)) { // We have SP := FP operator immediate delta = FramePtrDelta - IncomingDelta; // partial computation: SP := BP assert(delta >= 0); // e.g. -4 - -12 => +8 for an 8-byte frame if (SMP_ADD == RightOperator) { delta += RightUseOp.value; } else if (SMP_SUBTRACT == RightOperator) { delta -= RightUseOp.value; } else { // Not the right code pattern; unknown effect on stack pointer. delta = (sval_t) SMP_STACK_DELTA_ERROR_CODE; } } else { // !!!!****!!!!**** Need to look up deltas for registers that // were used to hold a copy of the stack pointer. // For now, just consider it an error. delta = (sval_t) SMP_STACK_DELTA_ERROR_CODE; } } else { // We have stack_pointer := stack_pointer operator ... op_t RightUseOp = RightRT->GetRightOperand(); // Check the operator if (SMP_BITWISE_AND == RightOperator) { delta = (sval_t) SMP_STACK_POINTER_BITWISE_AND_CODE; } else { if (o_imm != RightUseOp.type) { // Don't know how to deal with adding non-constant to stack pointer delta = (sval_t) SMP_STACK_DELTA_ERROR_CODE; } else if (SMP_ADD == RightOperator) { delta = (sval_t) RightUseOp.value; } else if (SMP_SUBTRACT == RightOperator) { delta = (0 - ((sval_t) RightUseOp.value)); } else { delta = (sval_t) SMP_STACK_DELTA_ERROR_CODE; } } } } } } return delta; } // end of SMPRegTransfer::ComputeStackPointerAlteration() // Debug print void SMPRegTransfer::Dump(void) { if (NULL != this->Guard) this->Guard->Dump(); // Left operand if (o_void != this->LeftOperand.type) PrintOperand(this->LeftOperand); // Then the operator SMP_msg(" %s ", OperatorText[this->GetOperator()]); // then the right operand or subtree if (this->HasRightSubTree()) this->GetRightTree()->Dump(); else if (o_void != this->RightOperand.type) PrintOperand(this->RightOperand); return; } // ***************************************************************** // Class SMPRTL // ***************************************************************** // Constructor SMPRTL::SMPRTL() { this->ExtraKills.clear(); this->RTCount = 0; return; } // Destructor SMPRTL::~SMPRTL() { for (size_t index = 0; index < this->RTCount; ++index) { delete (this->RTvector[index]); } this->ExtraKills.clear(); return; } // Get methods SMPRegTransfer *SMPRTL::GetRT(size_t index) { if (index > this->RTCount) return NULL; else return this->RTvector[index]; } // Set methods void SMPRTL::push_back(SMPRegTransfer *NewEffect) { assert(SMP_RT_LIMIT > this->RTCount); this->RTvector[this->RTCount] = NewEffect; ++(this->RTCount); return; } // Printing methods void SMPRTL::Dump(void) { size_t index; if (0 < this->RTCount) { SMP_msg("RTL: "); for (index = 0; index < this->RTCount; ++index) { this->RTvector[index]->Dump(); } for (index = 0; index < this->ExtraKills.size(); ++index) { SMP_msg(" KILL: "); PrintOperand(this->ExtraKills.at(index)); } SMP_msg("\n"); } return; } // end of SMPRTL::Dump() // Accumulate stack pointer alteration total across all RTs. sval_t SMPRTL::TotalStackPointerAlteration(bool IsLeaveInstr, sval_t IncomingDelta, sval_t FramePtrDelta) { sval_t TotalDelta = 0; sval_t IncrementalDelta; for (size_t index = 0; index < this->RTCount; ++index) { IncrementalDelta = this->RTvector[index]->ComputeStackPointerAlteration(IsLeaveInstr, IncomingDelta, FramePtrDelta); if ((SMP_STACK_DELTA_ERROR_CODE == IncrementalDelta) || (SMP_STACK_POINTER_BITWISE_AND_CODE == IncrementalDelta)) { TotalDelta = IncrementalDelta; // pass code back break; // exit loop and return coded value } else { TotalDelta += IncrementalDelta; } } return TotalDelta; } // end of SMPRTL::TotalStackPointerAlteration() // ***************************************************************** // Class SMPInstr // ***************************************************************** // Constructor for instruction. SMPInstr::SMPInstr(ea_t addr) { this->SMPcmd.size = 0; this->address = addr; #if 0 this->ResetGoodRTL(); this->ResetJumpTarget(); this->ResetBlockTerm(); this->ResetTailCall(); this->ResetCondTailCall(); this->ResetCallUsedAsJump(); this->ResetDirectRecursiveCall(); this->ResetInterrupt(); #else this->booleans1 = 0; #endif #if 0 this->ResetNop(); this->ResetRegClearIdiom(); this->ResetDefsFlags(); this->ResetUsesFlags(); this->ResetFarBranchComputed(); this->ResetBranchesToFarChunk(); this->ResetIndirectMemWrite(); this->ResetIndirectMemRead(); #else this->booleans2 = 0; #endif #if 0 this->ResetLoadFromStack(); this->ResetMultiplicationBitsDiscarded(); this->ResetTypeInferenceComplete(); this->ResetCategoryInferenceComplete(); this->ResetDEFsTyped(); this->ResetUSEsTyped(); #else this->booleans3 = 0; #endif this->booleans4 = 0; this->DeadRegsString[0] = '\0'; this->CallTarget = BADADDR; this->FarBranchTarget = BADADDR; this->AddSubSourceType = UNINIT; this->AddSubUseType = UNINIT; this->AddSubSourceOp = InitOp; this->AddSubUseOp = InitOp; this->DEFMemOp = InitOp; this->USEMemOp = InitOp; this->LeaUSEMemOp = InitOp; this->MoveSource = InitOp; this->BasicBlock = NULL; this->features = 0; this->StackPtrOffset = 0; this->type = DEFAULT; this->OptType = 0; this->Defs.clear(); this->Uses.clear(); return; } // Destructor. SMPInstr::~SMPInstr() { this->Defs.clear(); this->Uses.clear(); return; } char *SMPInstr::GetDisasm(void) const { return DisAsmText.GetDisAsm(this->GetAddr()); } // Is the instruction the type that terminates a basic block? bool SMPInstr::IsBasicBlockTerminator() const { return ((type == JUMP) || (type == COND_BRANCH) || (type == INDIR_JUMP) || (type == RETURN)); } // Get non-flags DEF, usually for arithmetic opcode. set<DefOrUse, LessDefUse>::iterator SMPInstr::GetFirstNonFlagsDef(void) { set<DefOrUse, LessDefUse>::iterator DefIter; op_t DefOp; for (DefIter = this->GetFirstDef(); DefIter != this->GetLastDef(); ++DefIter) { DefOp = DefIter->GetOp(); if (!((o_reg == DefOp.type) && DefOp.is_reg(MD_FLAGS_REG))) break; // found a non-flags-reg DEF. } return DefIter; } // Is the destination operand a memory reference? bool SMPInstr::HasDestMemoryOperand(void) { return (o_void != this->DEFMemOp.type); } // end of SMPInstr::HasDestMemoryOperand() // Is a source operand a memory reference? bool SMPInstr::HasSourceMemoryOperand(void) { return (o_void != this->USEMemOp.type); } // end of SMPInstr::HasSourceMemoryOperand() // Get the first memory operand in the DEF list. op_t SMPInstr::MDGetMemDefOp(void) { return this->DEFMemOp; // cached value } // end of SMPInstr::MDGetMemDefOp() // Get the first memory operand in the USE list. op_t SMPInstr::MDGetMemUseOp(void) { return this->USEMemOp; // cached value } // end of SMPInstr::MDGetMemUseOp() // return BADADDR if not jump, target addr otherwise. ea_t SMPInstr::GetJumpTarget(void) { ea_t TargetAddr = BADADDR; if (this->HasGoodRTL()) { // We want to find an RTL of the form: inst_ptr_reg := code_addr SMPRegTransfer *CurrRT = this->RTL.GetRT(0); op_t DefOp = CurrRT->GetLeftOperand(); if (DefOp.is_reg(MD_INSTRUCTION_POINTER_REG)) { if ((SMP_ASSIGN == CurrRT->GetOperator()) && (!CurrRT->HasRightSubTree())) { op_t UseOp = CurrRT->GetRightOperand(); if ((o_near == UseOp.type) || (o_far == UseOp.type)) { // address TargetAddr = UseOp.addr; } } } } return TargetAddr; } // end SMPInstr::GetJumpTarget() // Does any USE have type NEGATEDPTR? bool SMPInstr::HasNegatedPtrUSE(void) { bool UseFound = false; set<DefOrUse, LessDefUse>::iterator UseIter; SMPOperandType UseType; for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) { UseType = UseIter->GetType(); if (IsEqType(UseType, NEGATEDPTR)) { UseFound = true; break; } } return UseFound; } // end of SMPInstr::HasNegatedPtrUSE() // Detect indirect memory DEFs or USEs void SMPInstr::AnalyzeIndirectRefs(bool UseFP) { op_t DefMemOp = this->MDGetMemDefOp(); op_t UseMemOp = this->MDGetMemUseOp(); if (o_void != DefMemOp.type) { // Found a memory DEF. Is it indirect? if (MDIsIndirectMemoryOpnd(DefMemOp, UseFP)) { this->SetIndirectMemWrite(); } } if (o_void != UseMemOp.type) { // Found a memory USE. Is it indirect? if (MDIsIndirectMemoryOpnd(UseMemOp, UseFP)) { this->SetIndirectMemRead(); } } return; } // end of SMPInstr::AnalyzeIndirectRefs() set<DefOrUse, LessDefUse>::iterator SMPInstr::GetPointerAddressReg(op_t MemOp) { int BaseReg; int IndexReg; ushort ScaleFactor; ea_t displacement; set<DefOrUse, LessDefUse>::iterator PtrIter; if ((NULL == this->BasicBlock) || (NULL == this->BasicBlock->GetFunc())) { SMP_msg("ERROR: NULL member pointers in SMPInstr::GetPointerAddressReg() at %x \n", this->address); return this->GetLastUse(); } bool UseFP = this->BasicBlock->GetFunc()->UsesFramePointer(); MDExtractAddressFields(MemOp, BaseReg, IndexReg, ScaleFactor, displacement); if ((R_none != BaseReg) && (R_sp != BaseReg) && (!(UseFP && (R_bp == BaseReg)))) { op_t BaseOp = InitOp; BaseOp.type = o_reg; BaseOp.reg = (ushort) BaseReg; PtrIter = this->FindUse(BaseOp); assert(PtrIter != this->GetLastUse()); if (IsDataPtr(PtrIter->GetType())) { return PtrIter; } } if ((R_none != IndexReg) && (R_sp != IndexReg) && (!(UseFP && (R_bp == IndexReg)))) { op_t IndexOp = InitOp; IndexOp.type = o_reg; IndexOp.reg = (ushort) IndexReg; PtrIter = this->FindUse(IndexOp); assert(PtrIter != this->GetLastUse()); if (IsDataPtr(PtrIter->GetType())) { return PtrIter; } } PtrIter = this->GetLastUse(); return PtrIter; } // end of SMPInstr::GetPointerAddressReg() // Does the instruction whose flags are in F have a numeric type // as the second source operand? // NOTE: We can only analyze immediate values now. When data flow analyses are implemented, // we will be able to analyze many non-immediate operands. #define IMMEDNUM_LOWER -8191 #define IMMEDNUM_UPPER 8191 bool SMPInstr::IsSecondSrcOperandNumeric(flags_t F) const { bool SecondOpImm = (this->SMPcmd.Operands[1].type == o_imm); uval_t TempImm; if (SecondOpImm) { TempImm = this->SMPcmd.Operands[1].value; } return (SecondOpImm && IsImmedNumeric(TempImm)); } // end of SMPInstr::IsSecondSrcOperandNumeric() // Determine the type of the USE-only operand for add and subtract // instructions. If it is NUMERIC or PROF_NUMERIC, an optimizing // annotation will result. // As a byproduct, find the type of the USE/DEF operand as well. void SMPInstr::SetAddSubSourceType(void) { // Walk the RTL and find the operands we care about. // The RTL should look like: opnd1 := (opnd1 op opnd2), where op is // and add or subtract operator. Within the parentheses, the type // of opnd1 is our AddSubUseType and opnd1 is our AddSubUseOp, while // the type of opnd2 is our AddSubSourceType. if (this->RTL.GetCount() < 1) return; // no RTL, no leave types as UNINIT. assert(this->RTL.GetRT(0)->HasRightSubTree()); SMPRegTransfer *RightTree = this->RTL.GetRT(0)->GetRightTree(); op_t LeftOp, RightOp; LeftOp = RightTree->GetLeftOperand(); // Use (also DEF) operand #if SMP_BUILD_SPECIAL_ADC_SBB_RTL if ((NN_adc != this->SMPcmd.itype) && (NN_sbb != this->SMPcmd.itype)) { assert(!(RightTree->HasRightSubTree())); RightOp = RightTree->GetRightOperand(); // Src (non-DEF) operand } else { // Add with carry and subtract with borrow have an extra level // to the tree RTL, e.g. for add with carry: // opnd1 := (opnd1 + (opnd2 + carryflag)) assert(RightTree->HasRightSubTree()); RightTree = RightTree->GetRightTree(); RightOp = RightTree->GetLeftOperand(); } #else assert(!(RightTree->HasRightSubTree())); RightOp = RightTree->GetRightOperand(); // Src (non-DEF) operand #endif set<DefOrUse, LessDefUse>::iterator UseIter, SrcIter; SrcIter = this->FindUse(RightOp); assert(SrcIter != this->GetLastUse()); this->AddSubSourceType = SrcIter->GetType(); this->AddSubSourceOp = RightOp; UseIter = this->FindUse(LeftOp); assert(UseIter != this->GetLastUse()); this->AddSubUseType = UseIter->GetType(); this->AddSubUseOp = LeftOp; return; } // end of SMPInstr::SetAddSubSourceType() // Are all DEFs in the DEF set NUMERIC type? bool SMPInstr::AllDefsNumeric(void) { bool AllNumeric = (this->Defs.GetSize() > 0); // false if no DEFs, true otherwise set<DefOrUse, LessDefUse>::iterator CurrDef; for (CurrDef = this->GetFirstDef(); CurrDef != this->GetLastDef(); ++CurrDef) { // We ignore the stack pointer for pop instructions and consider only // the register DEF of the pop. if (this->MDIsPopInstr() && CurrDef->GetOp().is_reg(R_sp)) continue; AllNumeric = (AllNumeric && IsNumeric(CurrDef->GetType())); } return AllNumeric; } // end of SMPInstr::AllDefsNumeric() // Were the types of any DEFs derived from profiler info? bool SMPInstr::AnyDefsProfiled(void) { bool profd = false; set<DefOrUse, LessDefUse>::iterator CurrDef; for (CurrDef = this->GetFirstDef(); CurrDef != this->GetLastDef(); ++CurrDef) { profd = (profd || IsProfDerived(CurrDef->GetType())); } return profd; } // Do all DEFs have DEF_METADATA_UNUSED status? bool SMPInstr::AllDefMetadataUnused(void) { bool AllUnused = (this->Defs.GetSize() > 0); // false if no DEFs, true otherwise set<DefOrUse, LessDefUse>::iterator CurrDef; for (CurrDef = this->GetFirstDef(); CurrDef != this->GetLastDef(); ++CurrDef) { AllUnused = (AllUnused && (DEF_METADATA_UNUSED == CurrDef->GetMetadataStatus())); } return AllUnused; } // end of SMPInstr::AllDefMetadataUnused() // DEBUG print operands for Inst. void SMPInstr::PrintOperands(void) const { op_t Opnd; for (int i = 0; i < UA_MAXOP; ++i) { Opnd = SMPcmd.Operands[i]; PrintOneOperand(Opnd, this->features, i); } SMP_msg(" \n"); return; } // end of SMPInstr::PrintOperands() // Complete DEBUG printing. void SMPInstr::Dump(void) { SMP_msg("%x %d SMPitype: %d %s\n", this->address, this->SMPcmd.size, (int) this->type, DisAsmText.GetDisAsm(this->GetAddr())); SMP_msg("USEs: "); this->Uses.Dump(); SMP_msg("DEFs: "); this->Defs.Dump(); this->RTL.Dump(); #if SMP_VERBOSE_DUMP this->PrintOperands(); #endif SMP_msg("\n"); return; } // end of SMPInstr::Dump() // Print out the destination operand list for the instruction, given // the OptCategory for the instruction as a hint. char * SMPInstr::DestString(int OptType) { static char DestList[MAXSTR]; int RegDestCount = 0; DestList[0] = 'Z'; // Make sure there are no leftovers from last call DestList[1] = 'Z'; DestList[2] = '\0'; set<DefOrUse, LessDefUse>::iterator CurrDef; for (CurrDef = this->GetFirstDef(); CurrDef != this->GetLastDef(); ++CurrDef) { op_t DefOpnd = CurrDef->GetOp(); if (DefOpnd.is_reg(X86_FLAGS_REG)) // don't print flags as a destination continue; // We want to ignore the stack pointer DEF for pops and just include // the register DEF for the pop. if (DefOpnd.is_reg(R_sp) && this->MDIsPopInstr()) continue; if (o_reg == DefOpnd.type) { ushort DestReg = DefOpnd.reg; if (0 == RegDestCount) { SMP_strncpy(DestList, RegNames[DestReg], 1 + strlen(RegNames[DestReg])); } else { SMP_strncat(DestList, " ", MAXSTR); SMP_strncat(DestList, RegNames[DestReg], MAXSTR); } ++RegDestCount; } } if (0 >= RegDestCount) { SMP_msg("WARNING: No destination registers: %s\n", DisAsmText.GetDisAsm(this->GetAddr())); } else { SMP_strncat(DestList, " ZZ ", MAXSTR); } return DestList; } // end of SMPInstr::DestString() // Equality operator for SMPInstr. Key field is address. int SMPInstr::operator==(const SMPInstr &rhs) const { if (this->address != rhs.GetAddr()) return 0; else return 1; } // Inequality operator for SMPInstr. Key field is address. int SMPInstr::operator!=(const SMPInstr &rhs) const { return (this->address != rhs.GetAddr()); } // Less than operator for sorting SMPInstr lists. Key field is address. int SMPInstr::operator<(const SMPInstr &rhs) const { return (this->address < rhs.GetAddr()); } // Less than or equal operator for sorting SMPInstr lists. Key field is address. int SMPInstr::operator<=(const SMPInstr &rhs) const { return (this->address <= rhs.GetAddr()); } #define MD_FIRST_ENTER_INSTR NN_enterw #define MD_LAST_ENTER_INSTR NN_enterq // Is this instruction the one that allocates space on the // stack for the local variables? bool SMPInstr::MDIsFrameAllocInstr(void) { // The frame allocating instruction should look like: // sub esp,48 or add esp,-64 etc. op_t ESPOp = InitOp; ESPOp.type = o_reg; ESPOp.reg = R_sp; if ((SMPcmd.itype == NN_sub) || (SMPcmd.itype == NN_add)) { if (this->GetLastDef() != this->Defs.FindRef(ESPOp)) { // We know that an addition or subtraction is being // performed on the stack pointer. This should not be // possible within the prologue except at the stack // frame allocation instruction, so return true. We // could be more robust in this analysis in the future. **!!** // CAUTION: If a compiler allocates 64 bytes for locals // and 16 bytes for outgoing arguments in a single // instruction: sub esp,80 // you cannot insist on finding sub esp,LocSize // To make this more robust, we are going to insist that // an allocation of stack space is either performed by // adding a negative immediate value, or by subtracting // a positive immediate value. We will throw in, free of // charge, a subtraction of a register, which is how alloca() // usually allocates stack space. // PHASE ORDERING: Should we use the Operands[] instead of the USE list? **!!** set<DefOrUse, LessDefUse>::iterator CurrUse; for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) { if (o_imm == CurrUse->GetOp().type) { signed long TempImm = (signed long) CurrUse->GetOp().value; if (((0 > TempImm) && (this->SMPcmd.itype == NN_add)) || ((0 < TempImm) && (this->SMPcmd.itype == NN_sub))) { return true; } } else if ((o_reg == CurrUse->GetOp().type) && (!CurrUse->GetOp().is_reg(R_sp)) // skip the ESP operand && (this->SMPcmd.itype == NN_sub)) { // sub esp,reg: alloca() ? return true; } } } } else if ((this->SMPcmd.itype >= MD_FIRST_ENTER_INSTR) && (this->SMPcmd.itype <= MD_LAST_ENTER_INSTR)) { return true; } return false; } // end of SMPInstr::MDIsFrameAllocInstr() #define MD_FIRST_LEAVE_INSTR NN_leavew #define MD_LAST_LEAVE_INSTR NN_leaveq // Is this instruction in the epilogue the one that deallocates the local // vars region of the stack frame? bool SMPInstr::MDIsFrameDeallocInstr(bool UseFP, asize_t LocalVarsSize) { // The usual compiler idiom for the prologue on x86 is to // deallocate the local var space with: mov esp,ebp // It could be add esp,constant. We can be tricked by // add esp,constant when the constant is just the stack // adjustment after a call. We will have to insist that // the immediate operand have at least the value of // LocalVarsSize for this second form, and that UseFP be true // for the first form. set<DefOrUse, LessDefUse>::iterator FirstDef = this->GetFirstDef(); set<DefOrUse, LessDefUse>::iterator FirstUse = this->GetFirstUse(); if ((SMPcmd.itype >= MD_FIRST_LEAVE_INSTR) && (SMPcmd.itype <= MD_LAST_LEAVE_INSTR)) return true; else if (this->HasDestMemoryOperand() || this->HasSourceMemoryOperand()) { // Don't get fooled by USE or DEF entries of EBP or ESP that come // from memory operands, e.g. mov eax,[ebp-20] return false; } else if (UseFP && (this->SMPcmd.itype == NN_mov) && (FirstDef->GetOp().is_reg(R_sp)) && (FirstUse->GetOp().is_reg(R_bp))) return true; else if ((this->SMPcmd.itype == NN_add) && (FirstDef->GetOp().is_reg(R_sp))) { set<DefOrUse, LessDefUse>::iterator SecondUse = ++FirstUse; if (SecondUse == this->Uses.GetLastRef()) return false; // no more USEs ... strange for ADD instruction if (SecondUse->GetOp().is_imm((uval_t) LocalVarsSize)) return true; else if (SecondUse->GetOp().type == o_imm) { signed long TempImm = (signed long) this->SMPcmd.Operands[1].value; if (0 > TempImm) // adding a negative to ESP; alloc, not dealloc return false; else { SMP_msg("Used imprecise LocalVarsSize to find dealloc instr.\n"); return true; } } else return false; } else return false; } // end of SMPInstr::MDIsFrameDeallocInstr() // Is instruction a no-op? There are 1-byte, 2-byte, etc., versions of no-ops. bool SMPInstr::MDIsNop(void) const { bool IsNop = false; ushort opcode = this->SMPcmd.itype; // NOTE: More examples have arisen, e.g. xchg reg with itself. !!!!!! if (NN_nop == opcode) IsNop = true; else if (NN_mov == opcode) { if ((o_reg == this->SMPcmd.Operands[0].type) && this->SMPcmd.Operands[1].is_reg(this->SMPcmd.Operands[0].reg)) { // We have a register to register move with source == destination. IsNop = true; } } else if (NN_lea == opcode) { if ((o_reg == this->SMPcmd.Operands[0].type) && (o_displ == this->SMPcmd.Operands[1].type) && (0 == this->SMPcmd.Operands[1].addr)) { // We are looking for 6-byte no-ops like lea esi,[esi+0] ushort destreg = this->SMPcmd.Operands[0].reg; if ((this->SMPcmd.Operands[1].hasSIB) && (destreg == (ushort) sib_base(this->SMPcmd.Operands[1])) && (R_sp == sib_index(this->SMPcmd.Operands[1]))) { // R_sp signifies no SIB index register. So, we have // lea reg,[reg+0] with reg being the same in both place, // once as Operands[0] and once as the base reg in Operands[1]. IsNop = true; } else if (destreg == this->SMPcmd.Operands[1].reg) { IsNop = true; } } } return IsNop; } // end of SMPInstr::MDIsNop() // Is non-multiply arithmetic instruction that can possibly overflow? bool SMPInstr::MDIsOverflowingOpcode(void) const { unsigned short opcode = this->SMPcmd.itype; return ((NN_adc == opcode) || (NN_add == opcode) || (NN_inc == opcode) || (NN_neg == opcode) || (NN_xadd == opcode)); } // Is non-multiply arithmetic instruction that can possibly underflow? bool SMPInstr::MDIsUnderflowingOpcode(void) const { unsigned short opcode = this->SMPcmd.itype; return ((NN_dec == opcode) || (NN_sbb == opcode) || (NN_sub == opcode)); } // Is potentially benign overflow instruction? bool SMPInstr::MDIsMaybeBenignOverflowOpcode(void) const { unsigned short opcode = this->SMPcmd.itype; return ((NN_adc == opcode) || (NN_add == opcode)); } // Is potentially benign underflow instruction? bool SMPInstr::MDIsMaybeBenignUnderflowOpcode(void) const { unsigned short opcode = this->SMPcmd.itype; return ((NN_neg == opcode) || (NN_sbb == opcode) || (NN_sub == opcode)); } // Is definitely benign underflow instruction? // NOTE: Overlaps with MDIsMaybeBenignUnderflowOpcode(), so call this one first. bool SMPInstr::MDIsDefiniteBenignUnderflowOpcode(void) { unsigned short opcode = this->SMPcmd.itype; // gcc use: sbb edx,edx as a tricky way to get all zeroes or all ones into edx. // (Some sort of saturation?) // The "underflow" on the subtraction is irrelevant and benign. return ((NN_sbb == opcode) && (this->SubtractsFromItself())); } // Does a subtraction operator get applied to same left and right operands? bool SMPInstr::SubtractsFromItself(void) { bool SelfSubtract = false; size_t RTLCount = this->RTL.GetCount(); for (size_t index = 0; index < RTLCount; ++index) { SMPRegTransfer *CurrRT = this->RTL.GetRT(index); if ((CurrRT != NULL) && (CurrRT->HasRightSubTree())) { CurrRT = CurrRT->GetRightTree(); SMPoperator CurrOp = CurrRT->GetOperator(); if ((SMP_SUBTRACT_BORROW == CurrOp) || (SMP_SUBTRACT == CurrOp)) { if (!(CurrRT->HasRightSubTree())) { // NOTE: Must change this code when we build more precise SMP_SUBTRACT_BORROW RTL. op_t LeftOp = CurrRT->GetLeftOperand(); op_t RightOp = CurrRT->GetRightOperand(); SelfSubtract = IsEqOp(RightOp, LeftOp); } break; } } } return SelfSubtract; } // end of SMPInstr::SubtractsFromItself() // MACHINE DEPENDENT: Is instruction a return instruction? bool SMPInstr::MDIsReturnInstr(void) const { return ((this->SMPcmd.itype == NN_retn) || (this->SMPcmd.itype == NN_retf)); } // MACHINE DEPENDENT: Is instruction a POP instruction? #define FIRST_POP_INST NN_pop #define LAST_POP_INST NN_popfq bool SMPInstr::MDIsPopInstr(void) const { return ((this->SMPcmd.itype >= FIRST_POP_INST) && (this->SMPcmd.itype <= LAST_POP_INST)); } // MACHINE DEPENDENT: Is instruction a PUSH instruction? #define FIRST_PUSH_INST NN_push #define LAST_PUSH_INST NN_pushfq bool SMPInstr::MDIsPushInstr(void) const { return ((this->SMPcmd.itype >= FIRST_PUSH_INST) && (this->SMPcmd.itype <= LAST_PUSH_INST)); } // MACHINE DEPENDENT: Is instruction an ENTER instruction? bool SMPInstr::MDIsEnterInstr(void) const { return ((this->SMPcmd.itype >= MD_FIRST_ENTER_INSTR) && (this->SMPcmd.itype <= MD_LAST_ENTER_INSTR)); } // MACHINE DEPENDENT: Is instruction a LEAVE instruction? bool SMPInstr::MDIsLeaveInstr(void) const { return ((this->SMPcmd.itype >= MD_FIRST_LEAVE_INSTR) && (this->SMPcmd.itype <= MD_LAST_LEAVE_INSTR)); } // MACHINE DEPENDENT: Is instruction a HALT instruction? bool SMPInstr::MDIsHaltInstr(void) const { return (NN_hlt == this->SMPcmd.itype); } #define MD_FIRST_COND_MOVE_INSTR NN_cmova #define MD_LAST_COND_MOVE_INSTR NN_fcmovnu // MACHINE DEPENDENT: Is instruction a conditional move? bool SMPInstr::MDIsConditionalMoveInstr(void) const { return ((this->SMPcmd.itype >= MD_FIRST_COND_MOVE_INSTR) && (this->SMPcmd.itype <= MD_LAST_COND_MOVE_INSTR)); } // MACHINE DEPENDENT: Do opcode/operands definitely indicate signed arithmetic? // Generally, this is only true for certain variants of multiplication and division. bool SMPInstr::MDIsSignedArithmetic(void) const { unsigned short opcode = this->SMPcmd.itype; if (NN_idiv == opcode) return true; if (NN_imul == opcode) { // If we discard the upper N bits of the multiplication result, then the // lower N bits are the same for signed and unsigned multiplication, and // gcc/g++ often use the IMUL opcode for both signed and unsigned multiplies // when only N bits of result are retained. Therefore, the SIGNED nature of // IMUL operands can only be inferred from the case in which 2N bits are kept. return (!(this->AreMultiplicationBitsDiscarded())); } else { // idiv and imul are only possible signed cases return false; } } // end of SMPInstr::MDIsSignedArithmetic() // MACHINE DEPENDENT: Is instruction a conditional jump based on an unsigned condition? bool SMPInstr::MDIsUnsignedBranch(void) const { unsigned short opcode = this->SMPcmd.itype; return ((NN_ja == opcode) || (NN_jae == opcode) || (NN_jb == opcode) || (NN_jbe == opcode) || (NN_jna == opcode) || (NN_jnae == opcode) || (NN_jnb == opcode) || (NN_jnbe == opcode)); } // MACHINE DEPENDENT: Is instruction a conditional jump based on a signed condition? bool SMPInstr::MDIsSignedBranch(void) const { unsigned short opcode = this->SMPcmd.itype; return ((NN_jg == opcode) || (NN_jge == opcode) || (NN_jl == opcode) || (NN_jle == opcode) || (NN_jng == opcode) || (NN_jnge == opcode) || (NN_jnl == opcode) || (NN_jnle == opcode) || (NN_js == opcode) || (NN_jns == opcode)); } // MACHINE DEPENDENT: Is instruction a boolean set based on an unsigned condition? bool SMPInstr::MDIsUnsignedSetValue(void) const { unsigned short opcode = this->SMPcmd.itype; return ((NN_seta == opcode) || (NN_setae == opcode) || (NN_setb == opcode) || (NN_setbe == opcode) || (NN_setna == opcode) || (NN_setnae == opcode) || (NN_setnb == opcode) || (NN_setnbe == opcode)); } // MACHINE DEPENDENT: Is instruction a boolean set based on a signed condition? bool SMPInstr::MDIsSignedSetValue(void) const { unsigned short opcode = this->SMPcmd.itype; return ((NN_setg == opcode) || (NN_setge == opcode) || (NN_setl == opcode) || (NN_setle == opcode) || (NN_setng == opcode) || (NN_setnge == opcode) || (NN_setnl == opcode) || (NN_setnle == opcode) || (NN_sets == opcode) || (NN_setns == opcode)); } // MACHINE DEPENDENT: Does instruction use a callee-saved register? bool SMPInstr::MDUsesCalleeSavedReg(void) { set<DefOrUse, LessDefUse>::iterator CurrUse; for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) { op_t CurrOp = CurrUse->GetOp(); if (CurrOp.is_reg(R_bp) || CurrOp.is_reg(R_si) || CurrOp.is_reg(R_di) || CurrOp.is_reg(R_bx)) { return true; } } return false; } // end of SMPInstr::MDUsesCalleeSavedReg() // Is the instruction a register to register copy of a stack pointer or frame pointer // into a general purpose register (which mmStrata will now need to track as a stack // relative pointer)? bool SMPInstr::MDIsStackPointerCopy(bool UseFP) { // OptType 3 indicates a move instruction // The lea instruction can perform three operand arithmetic, e.g. // lea ebx,[esp+12] is just ebx:=esp+12, so it is a stack pointer copy. if (((this->OptType == 3) || (NN_lea == this->SMPcmd.itype)) && (this->GetFirstDef()->GetOp().type == o_reg) && (!(this->GetFirstDef()->GetOp().is_reg(R_sp))) && (!(this->HasSourceMemoryOperand()))) { // reg to reg move if (UseFP) { if (this->GetFirstUse()->GetOp().is_reg(R_bp)) // Move of base pointer EBP into a general register return true; else if ((this->GetFirstUse()->GetOp().is_reg(R_sp)) && !(this->GetFirstDef()->GetOp().is_reg(R_bp))) // Move of ESP into something besides a base pointer return true; } else if (this->GetFirstUse()->GetOp().is_reg(R_sp)) { // Move of ESP into a register; no base pointer used in this function return true; } } return false; } // end of SMPInstr::MDIsStackPointerCopy() // Determine if the instruction saves or restores a pointer into the stack frame. // If it saves a stack pointer, set Save to true, set the StackDelta saved, and set // the operand that received the saved stack pointer into CopyOp. and return true. // If it restores a stack pointer, set Save to false, set CopyOp to the operand that // held the value being restored, set RestoreOp to the stack pointer or frame pointer // register (whichever was restored), leave StackDelta alone for later computation // based on reaching definitions, and return true. // For most instructions, no save or restore of a stack pointer, so return false. bool SMPInstr::MDIsStackPtrSaveOrRestore(bool UseFP, bool &Save, sval_t &StackDelta, op_t &CopyOp, bool &Error) { bool StackPointerSaveOrRestore = false; // default unless we detect a save or restore of the stack or frame pointer size_t RTLCount = this->RTL.GetCount(); size_t RTLIndex; sval_t FPDelta = BADADDR; op_t TempOp = InitOp; int BaseReg, IndexReg, CopyReg; ushort Scale; ea_t offset; SMPoperator CurrOper; bool LookUpStackDelta = false; // Get stack delta from reaching defs for TempOp sval_t DeltaAdjust = 0; // add to StackDelta after computing from reaching defs, e.g. lea esp,[ecx-4] get TempOp of ecx // and DeltaAdjust of -4 Save = false; // default unless we detect a stack pointer save Error = false; if (UseFP) { FPDelta = this->GetBlock()->GetFunc()->GetFramePtrStackDelta(); } for (RTLIndex = 0; RTLIndex < RTLCount; ++RTLIndex) { bool FPRestore = false; // frame pointer is restored bool SPRestore = false; // stack pointer is restored SMPRegTransfer *CurrRT = this->RTL.GetRT(RTLIndex); op_t LeftOp = CurrRT->GetLeftOperand(); if (LeftOp.is_reg(MD_STACK_POINTER_REG)) { SPRestore = true; // temporary; might just be a push or pop RTL, etc., in which case we will reset. } else if (UseFP && LeftOp.is_reg(MD_FRAME_POINTER_REG)) { FPRestore = true; // likewise temporary } Save = (!(SPRestore || FPRestore)); // If we are assigning to the stack pointer reg or the frame pointer reg, we need to analyze the right // hand side of the RTL to see if it is a stack/frame pointer value, and not a simple push, pop, etc. CurrOper = CurrRT->GetOperator(); if (SMP_ASSIGN != CurrOper) { break; // not a regular RTL } if (!(CurrRT->HasRightSubTree())) { // Simple assignment to stack or frame pointer. op_t RightOp = CurrRT->GetRightOperand(); if (RightOp.is_reg(MD_STACK_POINTER_REG)) { // Must be the move of stack pointer into frame pointer in function prologue. assert(FPRestore); // not really a restore, but we had temporarily marked it as such above Save = true; StackDelta = this->GetStackPtrOffset(); // FP := SP, so saved delta is just current delta CopyOp = RightOp; StackPointerSaveOrRestore = true; FPRestore = false; break; } else if ((o_reg <= RightOp.type) && (o_displ >= RightOp.type)) { // register or memory if (SPRestore || FPRestore) { // stack or frame pointer is being restored; leave Save=false and set other outgoing arguments. TempOp = RightOp; CopyOp = RightOp; StackPointerSaveOrRestore = true; LookUpStackDelta = true; break; } else if (RightOp.is_reg(MD_STACK_POINTER_REG)) { // Stack pointer reg is being saved. StackDelta = this->GetStackPtrOffset(); // LeftOp := SP, so saved delta is just current delta CopyOp = LeftOp; StackPointerSaveOrRestore = true; break; } else if (UseFP && RightOp.is_reg(MD_FRAME_POINTER_REG)) { // Frame pointer is being saved StackDelta = FPDelta; CopyOp = LeftOp; StackPointerSaveOrRestore = true; break; } else { // RightOp is register or non-stack-pointer memory expr; either might hold stack delta TempOp = RightOp; CopyOp = LeftOp; LookUpStackDelta = true; // See if RightOp is holding a stack delta break; } } else { SMP_msg("ERROR: Invalid operand type for assignment to stack or frame pointer at %x\n", this->GetAddr()); StackPointerSaveOrRestore = false; break; } } else { // we have a right subtree in the CurrRT SMPRegTransfer *RightRT = CurrRT->GetRightTree(); // In order to have a right subtree, we must have something like: // lea esp,[ecx-4] which produces the RTL: esp := ecx - 4 // We should consider any other RTL structure besides a basic addition or // subtraction on the right subtree to be invalid. CurrOper = RightRT->GetOperator(); if ((SMP_ADD == CurrOper) || (SMP_SUBTRACT == CurrOper)) { op_t RightLeftOp = RightRT->GetLeftOperand(); if (o_reg == RightLeftOp.type) { if (RightRT->HasRightSubTree()) { // Complex RTL such as lea esp,[ebx+ecx*4] ; cannot analyze StackPointerSaveOrRestore = false; } else { op_t RightRightOp = RightRT->GetRightOperand(); if (o_imm != RightRightOp.type) { // Complex RTL such as lea esp,[ebx+ecx] ; cannot analyze StackPointerSaveOrRestore = false; } else { TempOp = RightLeftOp; DeltaAdjust = (sval_t) RightRightOp.value; if (SMP_SUBTRACT == CurrOper) { // Negate the stack delta adjustment, e.g. lea esp,[ecx-4] needs DeltaAdjust of -4, not 4. DeltaAdjust = (0 - DeltaAdjust); } LookUpStackDelta = true; } } } else { // weird RTL; LeftOp := (MemoryOp OPER ???) StackPointerSaveOrRestore = false; } } else { // not ADD or SUBTRACT StackPointerSaveOrRestore = false; } } } // end for all RTs in the RTL if (LookUpStackDelta) { bool StackAccess = false; // We need to set StackDelta based on the reaching defs for TempOp // A reg is probably a general register, but could have lea ebx,[esp+4] so it could be stack or frame pointer. if (TempOp.is_reg(MD_STACK_POINTER_REG)) { StackDelta = this->GetStackPtrOffset(); StackDelta += DeltaAdjust; LookUpStackDelta = false; // just got it; no need for reaching defs } else if (UseFP && TempOp.is_reg(MD_FRAME_POINTER_REG)) { StackDelta = FPDelta; StackDelta += DeltaAdjust; LookUpStackDelta = false; // just got it; no need for reaching defs } else if (o_reg == TempOp.type) { // general reg, not frame or stack pointer reg CopyReg = TempOp.reg; } else { MDExtractAddressFields(TempOp, BaseReg, IndexReg, Scale, offset); CopyReg = BaseReg; bool IndexedAccess = ((R_none != BaseReg) && (R_none != IndexReg)); if (IndexedAccess) { StackPointerSaveOrRestore = false; // Cannot analyze indexed accesses into the stack } else if (MDIsStackPtrReg(BaseReg, UseFP)) { StackAccess = true; } else { // memory expr that is not stack or frame pointer DeltaAdjust = (sval_t) TempOp.addr; // get normalized delta from addr field } } if (StackPointerSaveOrRestore && LookUpStackDelta) { op_t FindOp = InitOp; if (StackAccess) { FindOp = TempOp; } else { FindOp.type = o_reg; FindOp.reg = CopyReg; } if (this->GetBlock()->GetFunc()->IsInStackPtrCopySet(FindOp)) { // Screened out time wasters that are not in copy set; now, // look up reaching defs. // We need to find out which are the reaching definitions for the FindOp at the current InstAddr. this->GetBlock()->GetFunc()->ComputeTempReachingDefs(FindOp, this->GetAddr()); this->GetBlock()->GetFunc()->ComputeTempStackDeltaReachesList(FindOp); // See if TempStackDeltaReachesList has a consistent delta value. StackPointerSaveOrRestore = this->GetBlock()->GetFunc()->FindReachingStackDelta(StackDelta); // consistent SavedDelta value across entire list StackDelta += DeltaAdjust; } } } // end if (LookupStackDelta) if (!StackPointerSaveOrRestore && !Save) { // Any restore that could not be analyzed is an error. Error = true; } return StackPointerSaveOrRestore; } // end of SMPInstr::MDIsStackPtrSaveOrRestore() // If call instruction is to malloc(), set the DEF register EAX type to // HEAPPTR and return true. bool SMPInstr::MDFindMallocCall(op_t TargetOp) { bool changed = false; func_t *TargetFunc = get_func(TargetOp.addr); if (TargetFunc) { char FuncName[MAXSTR]; get_func_name(TargetFunc->startEA, FuncName, sizeof(FuncName) - 1); if (0 == strcmp("malloc", FuncName)) { // NOTE: Some compilers might call it __malloc ; make this more robust !!! #if SMP_VERBOSE_FIND_POINTERS SMP_msg("Found call to malloc at %x\n", this->addr); #endif op_t SearchOp = InitOp; SearchOp.type = o_reg; SearchOp.reg = R_ax; set<DefOrUse, LessDefUse>::iterator EAXDEF; EAXDEF = this->SetDefType(SearchOp, HEAPPTR); int SSANum = EAXDEF->GetSSANum(); changed = true; if (this->BasicBlock->IsLocalName(SearchOp)) { (void) this->BasicBlock->PropagateLocalDefType(SearchOp, HEAPPTR, this->GetAddr(), SSANum, false); } else { // global name this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false (void) this->BasicBlock->PropagateGlobalDefType(SearchOp, HEAPPTR, SSANum, false); } } // end if "malloc" } // end if (TargetFunc) return changed; } // end of SMPInstr::MDFindMallocCall() // Is instruction a branch (conditional or unconditional) to a // code target that is not in the current chunk? bool SMPInstr::IsBranchToFarChunk(void) { if (this->IsFarBranchComputed()) { // answer is cached return this->IsBranchesToFarChunk(); } func_t *CurrChunk = get_fchunk(this->address); bool FarBranch = false; if ((JUMP | COND_BRANCH) & this->GetDataFlowType()) { // Instruction is a direct branch, conditional or unconditional if (this->NumUses() > 0) { set<DefOrUse, LessDefUse>::iterator CurrUse; for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) { op_t JumpTarget = CurrUse->GetOp(); if ((o_near == JumpTarget.type) || (o_far == JumpTarget.type)) { // Branches to a code address // stdclib sometimes has jumps to zero and calls to zero. These are dead code. if (0 != JumpTarget.addr) { func_t *TargetChunk = get_fchunk(JumpTarget.addr); // Is target address within the same chunk as the branch? FarBranch = (NULL == TargetChunk) || (CurrChunk->startEA != TargetChunk->startEA); if (FarBranch) { this->FarBranchTarget = JumpTarget.addr; } } } } } } if (FarBranch) { this->SetBranchesToFarChunk(); } this->SetFarBranchComputed(); return FarBranch; } // end of SMPInstr::IsBranchToFarChunk() set<DefOrUse, LessDefUse>::iterator SMPInstr::SetUseSSA(op_t CurrOp, int SSASub) { return this->Uses.SetSSANum(CurrOp, SSASub); }; set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefSSA(op_t CurrOp, int SSASub) { return this->Defs.SetSSANum(CurrOp, SSASub); }; set<DefOrUse, LessDefUse>::iterator SMPInstr::SetUseType(op_t CurrOp, SMPOperandType CurrType) { return this->Uses.SetType(CurrOp, CurrType, this); }; set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefType(op_t CurrOp, SMPOperandType CurrType) { return this->Defs.SetType(CurrOp, CurrType, this); }; set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefMetadata(op_t CurrOp, SMPMetadataType Status) { return this->Defs.SetMetadata(CurrOp, Status); }; set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefIndWrite(op_t CurrOp, bool IndWriteFlag) { return this->Defs.SetIndWrite(CurrOp, IndWriteFlag); }; set<DefOrUse, LessDefUse>::iterator SMPInstr::SetUseNoTruncate(op_t CurrOp, bool NoTruncFlag) { return this->Uses.SetNoTruncation(CurrOp, NoTruncFlag); }; set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefNoOverflow(op_t DefOp, bool NoOverflowFlag) { return this->Defs.SetNoOverflow(DefOp, NoOverflowFlag); }; // Analyze the instruction and its operands. void SMPInstr::Analyze(void) { bool DebugFlag = false; if (0x8049b00 == this->address) { // Setting up breakpoint line. DebugFlag = true; } // Fill cmd structure with disassembly of instr if (!SMPGetCmd(this->address, this->SMPcmd, this->features)) return; // Record what type of instruction this is, simplified for the needs // of data flow and type analysis. this->type = DFACategory[this->SMPcmd.itype]; // Record optimization category. this->OptType = OptCategory[this->SMPcmd.itype]; if ((NN_int == this->SMPcmd.itype) || (NN_into == this->SMPcmd.itype) || (NN_int3 == this->SMPcmd.itype)) { this->SetInterrupt(); } else { this->ResetInterrupt(); } // See if instruction is an ASM idiom for clearing a register. if (NN_xor == this->SMPcmd.itype) { ushort FirstReg; if (o_reg == this->SMPcmd.Operands[0].type) { FirstReg = this->SMPcmd.Operands[0].reg; if (this->SMPcmd.Operands[1].is_reg(FirstReg)) this->SetRegClearIdiom(); } } // See if instruction is simple nop or ASM idiom for nop. if (this->MDIsNop()) { this->SetNop(); } // Build the DEF and USE lists for the instruction. this->FindMemOps(); this->BuildSMPDefUseLists(); // Determine whether the instruction is a jump target by looking // at its cross references and seeing if it has "TO" code xrefs. SMP_xref_t xrefs; for (bool ok = xrefs.SMP_first_to(this->address, XREF_FAR); ok; ok = xrefs.SMP_next_to()) { if ((xrefs.GetFrom() != 0) && (xrefs.GetIscode())) { this->SetJumpTarget(); break; } } // If instruction is a call or indirect call, see if a call target has been recorded // by IDA Pro. if (this->GetDataFlowType() == INDIR_CALL) { for (bool ok = xrefs.SMP_first_from(this->address, XREF_ALL); ok; ok = xrefs.SMP_next_from()) { if ((xrefs.GetTo() != 0) && (xrefs.GetIscode())) { // Found a code target, with its address in CurrXrefs.to if (xrefs.GetTo() == (this->address + this->GetCmd().size)) { // A call instruction will have two targets: the fall through to the // next instruction, and the called function. We want to find // the called function. continue; } // We found a target, not the fall-through. this->CallTarget = xrefs.GetTo(); SMP_msg("Found indirect call target %x at %x\n", xrefs.GetTo(), this->address); break; } } // end for all code xrefs if (BADADDR == this->CallTarget) { SMP_msg("WARNING: Did not find indirect call target at %x\n", this->address); } } // end if INDIR_CALL else if (this->GetDataFlowType() == CALL) { set<DefOrUse, LessDefUse>::iterator CurrUse; for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) { optype_t OpType = CurrUse->GetOp().type; if ((OpType == o_near) || (OpType == o_far)) { this->CallTarget = CurrUse->GetOp().addr; } } if (BADADDR == this->CallTarget) { SMP_msg("ERROR: Target not found for direct call at %x\n", this->address); } } if (DebugFlag) { SMP_msg("Analyzed debug instruction at %x\n", this->address); } return; } // end of SMPInstr::Analyze() // Analyze the floating point NOP marker instruction at the top of the function. void SMPInstr::AnalyzeMarker(void) { // Fill member variable SMPcmd structure with disassembly of instr (void) memset(&(this->SMPcmd), 0, sizeof(this->SMPcmd)); this->SMPcmd.itype = NN_fnop; this->SMPcmd.size = 1; this->SMPcmd.ea = this->address; // Set the instr disassembly text. DisAsmText.SetMarkerInstText(this->GetAddr()); // Record what type of instruction this is, simplified for the needs // of data flow and type analysis. this->type = DFACategory[this->SMPcmd.itype]; // Record optimization category. this->OptType = OptCategory[this->SMPcmd.itype]; return; } // end of SMPInstr::AnalyzeMarker() // Detect oddities of call instructions, such as pseudo-calls that are // actually jumps within a function void SMPInstr::AnalyzeCallInst(ea_t FirstFuncAddr, ea_t LastFuncAddr) { if (BADADDR != this->CallTarget) { if ((this->CallTarget > FirstFuncAddr) && (this->CallTarget <= LastFuncAddr)) { this->SetCallUsedAsJump(); } else { this->ResetCallUsedAsJump(); } if (this->CallTarget == FirstFuncAddr) { this->SetDirectRecursiveCall(); } else { this->ResetDirectRecursiveCall(); } if (this->IsCallUsedAsJump()) this->type = JUMP; } return; } // end of SMPInstr::AnalyzeCallInst() sval_t SMPInstr::AnalyzeStackPointerDelta(sval_t IncomingDelta, sval_t PreAllocDelta) { uint16 InstType = this->SMPcmd.itype; sval_t InstDelta = StackAlteration[InstType]; SMPitype FlowType = this->GetDataFlowType(); bool TailCall = this->IsTailCall(); if (this->IsCallUsedAsJump() || this->MDIsInterruptCall() || this->IsCondTailCall()) { // Call is used within function as a jump. Happens when setting up // thunk offsets, for example; OR, call is an interrupt call, in which // the interrupt return cleans up the stack, leaving a delta of zero, but // we do not have the system call code to analyze, OR, the call is a conditional // jump to another function (conditional tail call), in which case the current // function must have a return statement to fall into which will clean up the // only thing left on the stack (the return address) and the conditional jump // has no effect on the stack pointer. ; // leave InstDelta equal to negative or zero value from StackAlterationTable[] } else if ((CALL == FlowType) || (INDIR_CALL == FlowType) || TailCall) { // A real call instruction, which pushes a return address on the stack, // not a call used as a branch within the function. A return instruction // will usually cancel out the stack push that is implicit in the call, which // means that the function will have a net stack ptr delta of +4, which will // cancel out the -4 value of the call instruction and set the delta to zero. // However, this is not true in all cases, so we get the net stack ptr delta // directly from the called function unless it is an unresolved indirect call, // in which case we assume +4. !!!!****!!!! In the future, we could analyze // the code around an unresolved indirect call to see if it seems to be // removing items left on the stack by the callee. // SPECIAL CASE: A jump used as a tail call will have a stack ptr effect that is equal // to the net stack ptr effect of its target function, usually +4, whereas a jump // would otherwise have a net stack ptr effect of 0. ea_t CalledFuncAddr = this->GetCallTarget(); if ((BADADDR == CalledFuncAddr) || (0 == CalledFuncAddr)) { InstDelta = 0; } else { // We have a call target SMPFunction *CalleeFunc = this->GetBlock()->GetFunc()->GetProg()->GetFuncFromAddr(CalledFuncAddr); sval_t AdjustmentDelta; if (CalleeFunc) { if (!CalleeFunc->HasSTARSStackPtrAnalysisCompleted()) { // Phase ordering issue in the call graph. A mutually recursive clique of functions has to // be broken by starting processing somewhere, and all callees cannot be processed before // we start. If we got our stack down to zero and then made a tail call, then we have to assume // that the callee will use our return address, so we assume the default stack delta. If not a // tail call, we ask our function to see if the information is available from IDA Pro analyses, // or if it can be inferred from the fact that the call is followed by a stack adjustment. SMP_msg("WARNING: Callee stack ptr analysis not yet performed at inst %x ; normal delta assumed\n", this->GetAddr()); if (TailCall) { InstDelta = CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA; } else { AdjustmentDelta = this->GetBlock()->GetFunc()->GetStackDeltaForCallee(this->GetAddr()); InstDelta += AdjustmentDelta; } } else if (!CalleeFunc->StackPtrAnalysisSucceeded()) { // Callee analyses were done, but they failed. In order to proceed, we have to assume // the same situation as we just did in the case where analyses have not been performed. SMP_msg("WARNING: Callee stack ptr analysis failed at inst %x ; normal delta assumed\n", this->GetAddr()); if (TailCall) { InstDelta = CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA; } else { AdjustmentDelta = this->GetBlock()->GetFunc()->GetStackDeltaForCallee(this->GetAddr()); InstDelta += AdjustmentDelta; } } else { // Callee's analyses have succeeded, so get delta straight from callee. InstDelta += CalleeFunc->GetNetStackPtrDelta(); } } else { #if 0 SMP_msg("ERROR: SMPInstr::AnalyzeStackPointerDelta failed to find func at %x in inst %x\n", CalledFuncAddr, this->GetAddr()); InstDelta = SMP_STACK_DELTA_ERROR_CODE; #else SMP_msg("WARNING: SMPInstr::AnalyzeStackPointerDelta failed to find func at %x in inst %x\n", CalledFuncAddr, this->GetAddr()); if (TailCall) { InstDelta = CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA; } else { InstDelta = 0; } #endif } } } // end CALL or INDIR_CALL or TailCall case else if (1 == InstDelta) { // value of 1 is trigger to investigate the RTL for the // true value, which cannot be found simply by table lookup // In the special case of an x86 LEAVE instruction, the effect // on the stack pointer is to deallocate the local frame size, // plus pop the saved frame pointer into EBP. Helper functions // need to know whether to look for this special case. bool IsLeaveInstr = this->MDIsLeaveInstr(); InstDelta = this->RTL.TotalStackPointerAlteration(IsLeaveInstr, IncomingDelta, PreAllocDelta); } return InstDelta; } // end of SMPInstr::AnalyzeStackPointerDelta() // Total the stack adjustment bytes, as happens after a call to a function that leaves // outgoing args on the stack or swallows incoming args from the stack. sval_t SMPInstr::FindStackAdjustment(void) { uint16 InstType = this->SMPcmd.itype; sval_t InstDelta = StackAlteration[InstType]; if (1 == InstDelta) { // value of 1 is trigger to investigate the RTL for the // true value, which cannot be found simply by table lookup // In the special case of an x86 LEAVE instruction, the effect // on the stack pointer is to deallocate the local frame size, // plus pop the saved frame pointer into EBP. Helper functions // need to know whether to look for this special case. bool IsLeaveInstr = this->MDIsLeaveInstr(); if (!IsLeaveInstr) { InstDelta = this->RTL.TotalStackPointerAlteration(IsLeaveInstr, 0, 0); } else { InstDelta = 0; // LEAVE is not the kind of instr we are looking for } } return InstDelta; } // end of SMPInstr::FindStackAdjustment() // Normalize stack operands to have a displacement from the stack pointer value on entry to the function, // rather than the current stack pointer value. // UseFP indicates we are usign a frame pointer in the function. // FPDelta holds the stack delta (normalized) for the frame pointer. // DefOp comes in with the operand to be normalized, and contains the normalized operand upon return. // Return true if operand is a register or stack location, false otherwise (true => include in data flow analysis sets and SSA.) bool SMPInstr::MDComputeNormalizedDataFlowOp(bool UseFP, sval_t FPDelta, op_t &DefOp) { if (o_reg == DefOp.type) { return true; } else if (MDIsStackAccessOpnd(DefOp, UseFP)) { int SignedOffset = (int) DefOp.addr; sval_t NormalizedDelta; if (DefOp.reg == MD_FRAME_POINTER_REG) { // If FPDelta is -4 and SignedOffset is +8, then we have [ebp+8] as DefOp, and this // is equivalent to [esp+4] where esp has its entry value, i.e. this would be the first incoming // argument. If SignedOffset is -12, we have [ebp-12] as DefOp, and this is [esp-16] when // normalized to the entry point value of the stack pointer. In both cases, we can see that the // normalized stack delta is just FPDelta+SignedOffset. NormalizedDelta = FPDelta + (sval_t) SignedOffset; // Now, we simply convert the memory operand from EBP to ESP and replace the SignedOffset with the // NormalizedDelta just computed. DefOp.reg = MD_STACK_POINTER_REG; } else { assert(DefOp.reg == MD_STACK_POINTER_REG); // We only need to adjust the offset to reflect the change in the stack pointer since the function // was entered, e.g. [esp+4] is normalized to [esp-28] if the current esp value is 32 less than it // was upon function entry. We get the value "-32" in that case from a member variable. NormalizedDelta = this->GetStackPtrOffset() + (sval_t) SignedOffset; } DefOp.addr = (ea_t) NormalizedDelta; // common to frame and stack pointer cases return true; } else { return false; } } // end of SMPInstr::MDComputeNormalizedDataFlowOp() // Normalize stack operands in all DEFs and USEs to have stack deltas relative to the function entry stack pointer. // Return true if any stack DEFs or USEs were normalized. bool SMPInstr::MDNormalizeStackOps(bool UseFP, sval_t FPDelta, bool Recomputing) { bool StackOpFound = false; bool OpNormalized; set<DefOrUse, LessDefUse>::iterator DefIter, UseIter; list<pair<set<DefOrUse, LessDefUse>::iterator, op_t> > DefWorkList, UseWorkList; list<pair<set<DefOrUse, LessDefUse>::iterator, op_t> >::iterator WorkIter; op_t OldOp, NewOp; // Find all the DEFs that need changing, and put their iterators into a list. // Normalizing stack ops could change their sort order, hence we could skip over // a DEF in the set by erasing a DEF and reinserting a normalized DEF, so we // make all the changes after we iterate through the DEFS set. for (DefIter = this->GetFirstDef(); DefIter != this->GetLastDef(); ++DefIter) { OldOp = DefIter->GetOp(); NewOp = OldOp; if (o_reg != NewOp.type) { OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, NewOp); if (OpNormalized) { StackOpFound = true; pair<set<DefOrUse, LessDefUse>::iterator, op_t> DefItem(DefIter, NewOp); DefWorkList.push_back(DefItem); } } } // Now go through the DEF worklist and change stack operands to normalized stack operands. for (WorkIter = DefWorkList.begin(); WorkIter != DefWorkList.end(); ++WorkIter) { DefIter = WorkIter->first; DefIter = this->Defs.SetOp(DefIter, NewOp); } // Find all USEs that need changing, and build a second work list. for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) { OldOp = UseIter->GetOp(); NewOp = OldOp; if (o_reg != NewOp.type) { OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, NewOp); if (OpNormalized) { StackOpFound = true; pair<set<DefOrUse, LessDefUse>::iterator, op_t> UseItem(UseIter, NewOp); UseWorkList.push_back(UseItem); } } } // Now go through the USE worklist and change stack operands to normalized stack operands. for (WorkIter = UseWorkList.begin(); WorkIter != UseWorkList.end(); ++WorkIter) { UseIter = WorkIter->first; UseIter = this->Uses.SetOp(UseIter, NewOp); } return StackOpFound; } // end of SMPInstr::MDNormalizeStackOps() // Find USE-not-DEF operand that is not the flags register. op_t SMPInstr::GetSourceOnlyOperand(void) { size_t OpNum; for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { if (this->features & DefMacros[OpNum]) { // DEF ; } else if (this->features & UseMacros[OpNum]) { // USE op_t CurrOp = this->SMPcmd.Operands[OpNum]; if (!(CurrOp.is_reg(X86_FLAGS_REG))) { return CurrOp; } } } // It is expected that increment, decrement, and floating point stores // will not have a USE-only operand. Increment and decrement have an // operand that is both USEd and DEFed, while the floating point stack // registers are implicit in most floating point opcodes. Also, exchange // and exchange-and-add instructions have multiple DEF-and-USE operands. int TypeGroup = SMPTypeCategory[this->SMPcmd.itype]; if ((TypeGroup != 2) && (TypeGroup != 4) && (TypeGroup != 9) && (TypeGroup != 12) && (TypeGroup != 13)) { SMP_msg("ERROR: Could not find source only operand at %x in %s\n", this->address, DisAsmText.GetDisAsm(this->GetAddr())); } return InitOp; } // end of SMPInstr::GetSourceOnlyOperand() // Should apparent memory operands be ignored? e.g. lea opcode on x86 bool SMPInstr::MDIgnoreMemOps(void) { bool leaInst = (NN_lea == this->SMPcmd.itype); return leaInst; } // Find memory DEFs and USEs, store in DEFMemOp and USEMemOp void SMPInstr::FindMemOps(void) { size_t OpNum; if (!(this->MDIgnoreMemOps())) { for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if ((TempOp.type >= o_mem) && (TempOp.type <= o_displ)) { // memory if (this->features & DefMacros[OpNum]) { // DEF if (this->DEFMemOp.type == o_void) { // only save first mem DEF this->DEFMemOp = TempOp; } } if (this->features & UseMacros[OpNum]) { // USE if (this->USEMemOp.type == o_void) { // only save first mem USE this->USEMemOp = TempOp; } } } } // end for (OpNum = 0; ...) } this->SetMemOpsFound(); return; } // end of SMPInstr::FindMemOps() // Fill the Defs and Uses private data members. void SMPInstr::BuildSMPDefUseLists(void) { size_t OpNum; bool DebugFlag = (0x8049b00 == this->GetAddr()); bool WidthDoubler = this->MDDoublesWidth(); this->Defs.clear(); this->Uses.clear(); // Start with the Defs. for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { if (this->features & DefMacros[OpNum]) { // DEF op_t TempOp = this->SMPcmd.Operands[OpNum]; if (WidthDoubler) { // Opcodes that sign-extend a byte to a word, or a word to a dword, // have only one operand. It is implicit, and it is the shorter USE. // That means the DEF will have the same width as the USE, e.g. if // we are sign-extending AX to EAX, the USE and DEF both be AX without // a special fix. We fix this problem with the DEF operand now. if (TempOp.dtyp == dt_byte) { TempOp.dtyp = dt_word; TempOp.reg = MDCanonicalizeSubReg(TempOp.reg); } else if (TempOp.dtyp == dt_word) { TempOp.dtyp = dt_dword; TempOp.reg = MDCanonicalizeSubReg(TempOp.reg); } else if (TempOp.dtyp == dt_dword) { TempOp.dtyp = dt_qword; } else { SMP_msg("ERROR: Instruction operand %zu not 1,2, or 4 bytes at %x dtyp: %d\n", OpNum, this->address, TempOp.dtyp); } } if (MDKnownOperandType(TempOp)) { if (DebugFlag) { SMP_msg("DEBUG: Setting DEF for: "); PrintOperand(TempOp); SMP_msg("\n"); } if (o_reg == TempOp.type) { // We want to map AH, AL, and AX to EAX, etc. throughout our data flow // analysis and type inference systems. TempOp.reg = MDCanonicalizeSubReg(TempOp.reg); } this->Defs.SetRef(TempOp); } } } // end for (OpNum = 0; ...) if (this->IsRegClearIdiom()) { // Something like xor eax,eax clears eax but does not really // use eax. It is the same as mov eax,0 and we don't want to // extend the prior def-use chain for eax to this instruction // by treating the instruction as xor eax,eax. Instead, we // build the DEF and USE lists and RTL as if it were mov eax,0. op_t ImmOp = InitOp; ImmOp.type = o_imm; ImmOp.value = 0; this->Uses.SetRef(ImmOp, NUMERIC); return; } // Now, do the Uses. Uses have special case operations, because // any memory operand could have register uses in the addressing // expression, and we must create Uses for those registers. For // example: mov eax,[ebx + esi*2 + 044Ch] // This is a two-operand instruction with one def: eax. But // there are three uses: [ebx + esi*2 + 044Ch], ebx, and esi. // The first use is an op_t of type o_phrase (memory phrase), // which can be copied from cmd.Operands[1]. Likewise, we just // copy cmd.Operands[0] into the defs list. However, we must create // op_t types for register ebx and register esi and append them // to the Uses list. This is handled by the machine dependent // method MDFixupDefUseLists(). for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { if (this->features & UseMacros[OpNum]) { // USE op_t TempOp = this->SMPcmd.Operands[OpNum]; if (MDKnownOperandType(TempOp)) { if (DebugFlag) { SMP_msg("DEBUG: Setting USE for: "); PrintOperand(TempOp); SMP_msg("\n"); } if (o_reg == TempOp.type) { // We want to map AH, AL, and AX to EAX, etc. throughout our data flow // analysis and type inference systems. TempOp.reg = MDCanonicalizeSubReg(TempOp.reg); } this->Uses.SetRef(TempOp); } } } // end for (OpNum = 0; ...) return; } // end of SMPInstr::BuildSMPDefUseLists() // If DefReg is not already in the DEF list, add a DEF for it. void SMPInstr::MDAddRegDef(ushort DefReg, bool Shown, SMPOperandType Type) { op_t TempDef = InitOp; TempDef.type = o_reg; TempDef.reg = DefReg; if (Shown) TempDef.set_showed(); else TempDef.clr_showed(); this->Defs.SetRef(TempDef, Type); return; } // end of SMPInstr::MDAddRegDef() // If UseReg is not already in the USE list, add a USE for it. void SMPInstr::MDAddRegUse(ushort UseReg, bool Shown, SMPOperandType Type) { op_t TempUse = InitOp; TempUse.type = o_reg; TempUse.reg = UseReg; if (Shown) TempUse.set_showed(); else TempUse.clr_showed(); this->Uses.SetRef(TempUse, Type); return; } // end of SMPInstr::MDAddRegUse() // Perform machine dependent ad hoc fixes to the def and use lists. // For example, some multiply and divide instructions in x86 implicitly // use and/or define register EDX. For memory phrase examples, see comment // in BuildSMPDefUseLists(). void SMPInstr::MDFixupDefUseLists(void) { // First, handle the uses hidden in memory addressing modes. Note that we do not // care whether we are dealing with a memory destination operand or source // operand, because register USEs, not DEFs, happen within the addressing expressions. size_t OpNum; SMPOperandType RefType; int BaseReg; int IndexReg; ushort ScaleFactor; ea_t displacement; bool UseFP = true; bool HasIndexReg = false; bool SingleAddressReg = false; bool leaInst = (NN_lea == this->SMPcmd.itype); bool DebugFlag = (this->GetAddr() == 0x8086177); if (DebugFlag) { SMP_msg("DEBUG: Fixing up DEF-USE lists for debug location\n"); this->Dump(); } #if SMP_BASEREG_POINTER_TYPE // Some instructions are analyzed outside of any function or block when fixing up // the IDB, so we have to assume the block and func pointers might be NULL. if ((NULL != this->BasicBlock) && (NULL != this->BasicBlock->GetFunc())) UseFP = this->BasicBlock->GetFunc()->UsesFramePointer(); #endif if (DebugFlag) { SMP_msg("DEBUG: UseFP = %d\n", UseFP); } for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { op_t Opnd = SMPcmd.Operands[OpNum]; if ((Opnd.type == o_phrase) || (Opnd.type == o_displ) || (Opnd.type == o_mem)) { MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, displacement); SingleAddressReg = ((0 == displacement) && ((R_none == BaseReg) || (R_none == IndexReg))); if (R_none != IndexReg) { op_t IndexOpnd = Opnd; // Init to current operand field values IndexOpnd.type = o_reg; // Change type and reg fields IndexOpnd.reg = (ushort) IndexReg; IndexOpnd.hasSIB = 0; IndexOpnd.set_showed(); // We want to map AH, AL, and AX to EAX, etc. throughout our data flow // analysis and type inference systems. IndexOpnd.reg = MDCanonicalizeSubReg(IndexOpnd.reg); if (0 == ScaleFactor) this->Uses.SetRef(IndexOpnd); else { // scaling == shift ==> NUMERIC HasIndexReg = true; this->Uses.SetRef(IndexOpnd, NUMERIC); } } if (R_none != BaseReg) { op_t BaseOpnd = Opnd; // Init to current operand field values BaseOpnd.type = o_reg; // Change type and reg fields BaseOpnd.reg = (ushort) BaseReg; BaseOpnd.hasSIB = 0; BaseOpnd.set_showed(); // We want to map AH, AL, and AX to EAX, etc. throughout our data flow // analysis and type inference systems. BaseOpnd.reg = MDCanonicalizeSubReg(BaseOpnd.reg); RefType = UNINIT; #if SMP_BASEREG_POINTER_TYPE // R_sp and R_bp will get type STACKPTR in SMPInstr::SetImmedTypes(). // Other registers used as base registers should get their USEs as // base registers typed as POINTER, which might get refined later // to STACKPTR, GLOBALPTR, HEAPPTR, etc. // NOTE: the NN_lea opcode is often used without a true base register. // E.g. lea eax,[eax+eax+5] is an x86 idiom for eax:=eax*2+5, which // could not be done in one instruction without using the addressing // modes of the machine to do the arithmetic. We don't want to set the // USE of EAX to POINTER in this case, so we will conservatively skip // all lea instructions here. // We cannot be sure that a register is truly a base register unless // there is also an index register. E.g. with reg+displacement, we // could have memaddr+indexreg or basereg+offset, depending on what // the displacement is. The exception is if there is no offset and only // one addressing register, e.g. mov eax,[ebx]. if (BaseOpnd.is_reg(R_sp) || (UseFP && BaseOpnd.is_reg(R_bp)) || leaInst || (!HasIndexReg && !SingleAddressReg)) { ; } else { RefType = POINTER; } #endif this->Uses.SetRef(BaseOpnd, RefType); } // end if R_none != BaseReg } // end if (o_phrase or o_displ operand) } // end for (all operands) // The lea (load effective address) instruction looks as if it has // a memory USE: lea ebx,[edx+esi] // However, this instruction is really just: ebx := edx+esi // Now that the above code has inserted the "addressing" registers // into the USE list, we should remove the "memory USE". if (leaInst) { set<DefOrUse, LessDefUse>::iterator CurrUse; for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) { op_t UseOp = CurrUse->GetOp(); if ((o_mem <= UseOp.type) && (o_displ >= UseOp.type)) { this->LeaUSEMemOp = UseOp; this->EraseUse(CurrUse); this->USEMemOp = InitOp; break; } } } // Next, handle repeat prefices in the instructions. The Intel REPE/REPZ prefix // is just the text printed for SCAS/CMPS instructions that have a REP prefix. // Only two distinct prefix codes are actually defined: REP and REPNE/REPNZ, and // REPNE/REPNZ only applies to SCAS and CMPS instructions. bool HasRepPrefix = (0 != (this->SMPcmd.auxpref & aux_rep)); bool HasRepnePrefix = (0 != (this->SMPcmd.auxpref & aux_repne)); if (HasRepPrefix && HasRepnePrefix) SMP_msg("REP and REPNE both present at %x %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); if (HasRepPrefix || HasRepnePrefix) { // All repeating instructions use ECX as the countdown register. op_t BaseOpnd = InitOp; BaseOpnd.type = o_reg; // Change type and reg fields BaseOpnd.reg = R_cx; BaseOpnd.clr_showed(); this->Defs.SetRef(BaseOpnd, NUMERIC); this->Uses.SetRef(BaseOpnd, NUMERIC); } if ((this->SMPcmd.itype == NN_cmps) || (this->SMPcmd.itype == NN_scas) || (this->SMPcmd.itype == NN_movs) || (this->SMPcmd.itype == NN_stos)) { // ESI and EDI are USEd and DEFed to point to source and dest strings for CMPS/MOVS. // Only EDI is involved with SCAS/STOS. op_t BaseOpnd = InitOp; BaseOpnd.type = o_reg; // Change type and reg fields BaseOpnd.clr_showed(); if ((this->SMPcmd.itype == NN_cmps) || (this->SMPcmd.itype == NN_movs)) { BaseOpnd.reg = R_si; this->Defs.SetRef(BaseOpnd, POINTER); this->Uses.SetRef(BaseOpnd, POINTER); } BaseOpnd.reg = R_di; this->Defs.SetRef(BaseOpnd, POINTER); this->Uses.SetRef(BaseOpnd, POINTER); } // Now, handle special instruction categories that have implicit operands. if (NN_cmpxchg == this->SMPcmd.itype) { // x86 Compare and Exchange conditionally sets EAX. We must keep data flow analysis // sound by declaring that EAX is always a DEF. this->MDAddRegDef(R_ax, false); } // end if NN_cmpxchg else if (this->MDIsPopInstr() || this->MDIsPushInstr() || this->MDIsReturnInstr()) { // IDA does not include the stack pointer in the DEFs or USEs. this->MDAddRegDef(R_sp, false); this->MDAddRegUse(R_sp, false); if (!this->MDIsReturnInstr()) { // We always reference [esp+0] or [esp-4], so add it to the DEF or USE list. op_t StackOp = InitOp; StackOp.type = o_displ; StackOp.reg = R_sp; if (this->MDIsPopInstr()) { StackOp.addr = 0; // [ESP+0] this->Uses.SetRef(StackOp); // USE } else { StackOp.addr = (ea_t) -4; // [ESP-4] this->Defs.SetRef(StackOp); // DEF } } } #if SMP_CALL_TRASHES_REGS else if ((this->type == CALL) || (this->type == INDIR_CALL) || this->IsTailCall()) { // We want to add the caller-saved registers to the USEs and DEFs lists this->MDAddRegDef(R_ax, false); this->MDAddRegDef(R_cx, false); this->MDAddRegDef(R_dx, false); this->MDAddRegUse(R_ax, false); this->MDAddRegUse(R_cx, false); this->MDAddRegUse(R_dx, false); #if 1 if (this->MDIsInterruptCall()) { #endif this->MDAddRegDef(R_bx, false); this->MDAddRegUse(R_bx, false); this->MDAddRegDef(R_si, false); this->MDAddRegUse(R_si, false); #if 1 } #endif } #endif else if (this->MDIsEnterInstr() || this->MDIsLeaveInstr()) { // Entire function prologue or epilogue microcoded. this->MDAddRegDef(R_sp, false); this->MDAddRegUse(R_sp, false); this->MDAddRegDef(R_bp, false); this->MDAddRegUse(R_bp, false); } else if ((this->SMPcmd.itype == NN_maskmovq) || (this->SMPcmd.itype == NN_maskmovdqu)) { this->MDAddRegUse(R_di, false, POINTER); } else if (8 == this->GetOptType()) { // This category implicitly writes to EDX:EAX. this->MDAddRegDef(R_dx, false); this->MDAddRegDef(R_ax, false); } // end else if (8 == GetOptType) else if (7 == this->GetOptType()) { // Category 7 instructions sometimes write implicitly to EDX:EAX or DX:AX. // DX is the same as EDX to IDA Pro (and SMP); ditto for EAX and AX. // DIV, IDIV, and MUL all have hidden EAX or AX operands (hidden in the IDA Pro // sense, because they are not displayed in the disassembly text). For example: // mul ebx means EDX:EAX <-- EAX*EBX, and mul bx means DX:AX <-- AX*BX. If the // source operand is only 8 bits wide, there is room to hold the result in AX // without using DX: mul bl means AX <-- AL*BL. // IMUL has forms with a hidden EAX or AX operand and forms with no implicit // operands: imul ebx means EDX:EAX <-- EAX*EBX, but imul ebx,edx means that // EBX*EDX gets truncated and the result placed in EBX (no hidden operands). for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { op_t TempUse = this->SMPcmd.Operands[OpNum]; if (!TempUse.showed()) { // hidden operand if (TempUse.is_reg(R_ax)) { // not R_al, so it is not 8 bits if ((NN_div == this->SMPcmd.itype) || (NN_idiv == this->SMPcmd.itype)) { this->MDAddRegUse(R_dx, false); } this->MDAddRegDef(R_ax, false); this->MDAddRegDef(R_dx, false); } } } } // end else if (7 == OptType) #if 0 // The floating point instructions in type categories 14 and 15 often USE and DEF // the floating point register stack, e.g. pushing a value onto that stack is a // massive copy downward of stack locations. We don't really care about the USE of // the stack if the value being pushed came from elsewhere than the stack. For example, // an "fld" opcode pushes its source onto the stack. We build RTLs with a simple // move structure, but the RTL building can be fooled by seeing two "source" operands // in the USE list. if ((14 == SMPTypeCategory[this->SMPcmd.itype]) || (15 == SMPTypeCategory[this->SMPcmd.itype])) { } #endif #if 0 // Not true for LOOP instructions that use only the ECX counter register. if (this->type == COND_BRANCH) { assert(SMPUsesFlags[this->SMPcmd.itype]); } #endif // The return value register EAX is not quite like a caller-save or callee-save // register (technically, it is caller-save). Within a callee, it might appear // that EAX has become dead by the time a return instruction is reached, but // the USE that would make it not dead is in the caller. To prevent type inference // from mistakenly thinking that all USEs of EAX have been seen in the callee, // we add EAX to the USE list for all return instructions, as well as for all // tail calls, which are essentially returns in terms of data flow analysis. // This USE of EAX will always be of type UNINIT unless its DEF has a known type // that propagates to it. Thus, it will prevent an invalid back inference of the // DEF type from "all" USE types that are visible in the callee; even if they // were all NUMERIC, this return USE will be UNINIT and inhibit the invalid // type inference. EAX could be loaded with a pointer from memory, for example, // and USEd only in a comparison instruction, making it falsely appear to be // a NUMERIC, without this extra USE at the return instruction. // Because some of the library functions pass values around in EBX, EDI, etc., // we will add these general purpose registers to the USE list for returns // in order to prevent erroneous analyses of dead registers or unused // metadata. if ((this->type == RETURN) || this->IsTailCall()) { this->MDAddRegUse(R_ax, false); this->MDAddRegUse(R_bx, false); this->MDAddRegUse(R_cx, false); this->MDAddRegUse(R_dx, false); if (!UseFP) this->MDAddRegUse(R_bp, false); this->MDAddRegUse(R_si, false); this->MDAddRegUse(R_di, false); } // Next, add the flags register to the DEFs and USEs for those instructions that // are marked as defining or using flags. if (!this->IsDefsFlags() && SMPDefsFlags[this->SMPcmd.itype]) { this->MDAddRegDef(X86_FLAGS_REG, false); this->SetDefsFlags(); } if (!this->IsUsesFlags() && SMPUsesFlags[this->SMPcmd.itype]) { this->MDAddRegUse(X86_FLAGS_REG, false); this->SetUsesFlags(); } #if 1 if (this->IsNop()) { // Clear the DEFs and USEs for no-ops. // These include machine idioms for no-ops, e.g. mov esi,esi // or xchg ax,ax or lea esi,[esi]. this->Defs.clear(); this->Uses.clear(); this->MoveSource = InitOp; this->OptType = 1; } #endif if (DebugFlag) { SMP_msg("DEBUG after MDFixupDefUseLists:\n"); this->Dump(); } return; } // end of SMPInstr::MDFixupDefUseLists() // If we can definitely identify which part of the addressing expression // used in MemOp is the POINTER type, and it is not a STACKPTR or GLOBALPTR // immediate, set the USE type for that register to POINTER and return true. // If we can find definite NUMERIC addressing registers that are not already // typed as NUMERIC, set their USE types to NUMERIC and return true. bool SMPInstr::MDFindPointerUse(op_t MemOp, bool UseFP) { bool changed = false; int BaseReg; int IndexReg; op_t BaseOp = InitOp; op_t IndexOp = InitOp; SMPOperandType BaseType = UNKNOWN; SMPOperandType IndexType = UNKNOWN; ushort ScaleFactor; ea_t offset; set<DefOrUse, LessDefUse>::iterator BaseIter; set<DefOrUse, LessDefUse>::iterator IndexIter; if (NN_lea == this->SMPcmd.itype) return false; // lea instruction really has no memory operands if (NN_fnop == this->SMPcmd.itype) return false; // SSA marker instruction MDExtractAddressFields(MemOp, BaseReg, IndexReg, ScaleFactor, offset); if (R_none != IndexReg) { IndexOp.type = o_reg; IndexOp.reg = MDCanonicalizeSubReg((ushort) IndexReg); IndexIter = this->FindUse(IndexOp); assert(IndexIter != this->GetLastUse()); IndexType = IndexIter->GetType(); } if (R_none != BaseReg) { BaseOp.type = o_reg; BaseOp.reg = MDCanonicalizeSubReg((ushort) BaseReg); BaseIter = this->FindUse(BaseOp); assert(BaseIter != this->GetLastUse()); BaseType = BaseIter->GetType(); } if ((R_sp == BaseReg) || (UseFP && (R_bp == BaseReg))) { if ((R_none != IndexReg) && (!IsNumeric(IndexType))) { // We have an indexed access into the stack frame. // Set IndexReg USE type to NUMERIC. changed = true; IndexIter = this->SetUseType(IndexOp, NUMERIC); assert(IndexIter != this->GetLastUse()); } return changed; // stack accesses will get STACKPTR type in SetImmedTypes() } if ((R_sp == IndexReg) || (UseFP && (R_bp == IndexReg))) { if ((R_none != BaseReg) && (!IsNumeric(BaseType))) { // We have an indexed access into the stack frame. // Set BaseReg USE type to NUMERIC. // Note that BaseReg is really an IndexReg and vice versa. changed = true; BaseIter = this->SetUseType(BaseOp, NUMERIC); assert(BaseIter != this->GetLastUse()); SMP_msg("WARNING: BaseReg is index, IndexReg is base: %s\n", DisAsmText.GetDisAsm(this->GetAddr())); } return changed; // stack accesses will get STACKPTR type in SetImmedTypes() } if (IsImmedGlobalAddress(offset)) { if ((R_none != IndexReg) && (!IsNumeric(IndexType))) { // We have an indexed access into a global. // Set IndexReg USE type to NUMERIC. changed = true; IndexIter = this->SetUseType(IndexOp, NUMERIC); assert(IndexIter != this->GetLastUse()); } if ((R_none != BaseReg) && (!IsNumeric(BaseType))) { // We have an indexed access into a global. // Set BaseReg USE type to NUMERIC. // Note that BaseReg is really an index register. changed = true; BaseIter = this->SetUseType(BaseOp, NUMERIC); assert(BaseIter != this->GetLastUse()); #if SMP_VERBOSE_FIND_POINTERS SMP_msg("WARNING: BaseReg used as index: %s\n", DisAsmText.GetDisAsm(this->GetAddr())); #endif } return changed; // global immediate is handled in SetImmedTypes() } // At this point, we must have a base address in a register, not used // to directly address the stack or a global. if ((0 < ScaleFactor) || (R_none == IndexReg)) { // IndexReg is scaled, meaning it is NUMERIC, so BaseReg must // be a POINTER; or IndexReg is not present, so BaseReg is the // only possible holder of an address. if (R_none != BaseReg) { if (UNINIT == BaseIter->GetType()) { changed = true; BaseIter = this->SetUseType(BaseOp, POINTER); assert(BaseIter != this->GetLastUse()); } } } else if (R_none == BaseReg) { // We have an unscaled IndexReg and no BaseReg and offset was // not a global offset, so IndexReg must be a POINTER. if (R_none != IndexReg) { if (UNINIT == IndexType) { changed = true; IndexIter = this->SetUseType(IndexOp, POINTER); assert(IndexIter != this->GetLastUse()); } } } else { // We have BaseReg and an unscaled IndexReg. // The only hope for typing something like [ebx+edx] is for // one register to already be typed NUMERIC, in which case // the other one must be a POINTER, or if one register is // already POINTER, then the other one must be NUMERIC. if (IsNumeric(BaseType)) { if (UNINIT == IndexType) { // Set to POINTER or PROF_POINTER changed = true; IndexIter = this->SetUseType(IndexOp, POINTER); assert(IndexIter != this->GetLastUse()); } else if (IsNumeric(IndexType)) { SMP_msg("ERROR: BaseReg and IndexReg both NUMERIC at %x: %s\n", this->address, DisAsmText.GetDisAsm(this->GetAddr())); } } else { // BaseReg was not NUMERIC if (UNINIT == BaseType) { // BaseReg is UNINIT if (IsNumeric(IndexType)) { changed = true; BaseIter = this->SetUseType(BaseOp, POINTER); assert(BaseIter != this->GetLastUse()); } else if (IsDataPtr(IndexType)) { // IndexReg is POINTER, so make BaseReg NUMERIC. changed = true; BaseIter = this->SetUseType(BaseOp, NUMERIC); assert(BaseIter != this->GetLastUse()); } } else if (IsDataPtr(BaseType)) { // BaseReg was a pointer type. IndexReg must be NUMERIC. if (UNINIT == IndexType) { changed = true; IndexIter = this->SetUseType(IndexOp, NUMERIC); assert(IndexIter != this->GetLastUse()); } else if (IsDataPtr(IndexType)) { SMP_msg("ERROR: BaseReg and IndexReg both POINTER at %x: %s\n", this->address, DisAsmText.GetDisAsm(this->GetAddr())); } } } } return changed; } // end of SMPInstr::MDFindPointerUse() // Are all DEFs typed to something besides UNINIT? bool SMPInstr::AllDEFsTyped(void) { if (this->AreDEFsTyped()) { return true; } bool FoundUNINIT = false; set<DefOrUse, LessDefUse>::iterator DefIter; for (DefIter = this->GetFirstDef(); DefIter != this->GetLastDef(); ++DefIter) { if (IsEqType(UNINIT, DefIter->GetType())) { FoundUNINIT = true; break; } } if (!FoundUNINIT) { this->SetDEFsTyped(); } return (!FoundUNINIT); } // end of SMPInstr::AllDEFsTyped() // Are all USEs typed to something besides UNINIT? bool SMPInstr::AllUSEsTyped(void) { if (this->AreUSEsTyped()) { return true; } bool FoundUNINIT = false; set<DefOrUse, LessDefUse>::iterator UseIter; for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) { if (IsEqType(UNINIT, UseIter->GetType())) { FoundUNINIT = true; break; } } if (!FoundUNINIT) { this->SetUSEsTyped(); } return (!FoundUNINIT); } // end of SMPInstr::AllUSEsTyped() // UseOp is a USE reg, not just an address reg in a memory USE bool SMPInstr::IsNonAddressReg(op_t UseOp) const { bool FoundUse = false; ushort SearchReg = MDCanonicalizeSubReg(UseOp.reg); for (size_t OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { op_t Opnd = this->SMPcmd.Operands[OpNum]; if (this->features & UseMacros[OpNum]) { // USE if (Opnd.type == o_reg) { ushort TestReg = MDCanonicalizeSubReg(Opnd.reg); if (TestReg == SearchReg) { FoundUse = true; break; } } } } return FoundUse; } // end of SMPInstr::IsNonAddressReg() // Is a sub-register of UseOp used as a shift counter in the RTL? // For example, UseOp could be ECX on an x86 machine, and CL // could be used as a shift or rotate counter. bool SMPInstr::IsSubRegUsedAsShiftCount(op_t UseOp) { bool ShiftCounter = false; if ((o_reg == UseOp.type) && this->MDIsShiftOrRotate()) { SMPRegTransfer *CurrRT = this->RTL.GetRT(0); assert(CurrRT->HasRightSubTree()); CurrRT = CurrRT->GetRightTree(); op_t ShiftCountOp = CurrRT->GetRightOperand(); if (o_reg == ShiftCountOp.type) { ushort UseReg = UseOp.reg; ushort ShiftCountReg = ShiftCountOp.reg; ushort WideUseReg = MDCanonicalizeSubReg(UseReg); ushort WideShiftCountReg = MDCanonicalizeSubReg(ShiftCountReg); if ((UseReg != ShiftCountReg) && (WideUseReg == WideShiftCountReg)) { // Registers were not equal, but their canonical enclosing // registers are equal. Because shift counters that are not // immediate are the 8-bit subregister in x86 (MD here !!!!!!) // it must be that the ShiftCountReg is a subreg of UseReg. // This is the condition we are looking for. ShiftCounter = true; } } } return ShiftCounter; } // end of SMPInstr::IsSubRegUsedAsShiftCount() // Is opcode a shift or rotate? // NOTE: We omit MMX/SSE unit shifts that do not use a general purpose // register as a shift counter, because right now this method is only // used as a helper for IsSubRegUsedAsShiftCount(). bool SMPInstr::MDIsShiftOrRotate(void) const { return (((NN_rcl <= SMPcmd.itype) && (NN_ror >= SMPcmd.itype)) || ((NN_sal <= SMPcmd.itype) && (NN_shr >= SMPcmd.itype)) || (NN_shld == SMPcmd.itype) || (NN_shrd == SMPcmd.itype)); } // end of SMPInstr::MDIsShiftOrRotate() // Does the shift or rotate RTL move the upper HalfBitWidth bits // into the lower half of the register? bool SMPInstr::ShiftMakesUpperBitsLower(size_t HalfBitWidth) { bool FullCircle = false; if (MD_NORMAL_MACHINE_BITWIDTH == (HalfBitWidth * 2)) { SMPRegTransfer *CurrRT = this->RTL.GetRT(0); if ((NULL != CurrRT) && (CurrRT->HasRightSubTree())) { CurrRT = CurrRT->GetRightTree(); SMPoperator CurrOper = CurrRT->GetOperator(); if ((SMP_U_RIGHT_SHIFT == CurrOper) || (SMP_S_RIGHT_SHIFT == CurrOper) || (SMP_ROTATE_LEFT == CurrOper) || (SMP_ROTATE_RIGHT == CurrOper)) { if (CurrRT->HasRightSubTree()) { // double-word shift CurrRT = CurrRT->GetRightTree(); } assert(!(CurrRT->HasRightSubTree())); op_t ShiftCount = CurrRT->GetRightOperand(); if (o_imm == ShiftCount.type) { uval_t ImmVal = ShiftCount.value; if (ImmVal == HalfBitWidth) { FullCircle = true; } } } } } return FullCircle; } // SMPInstr::ShiftMakesUpperBitsLower() #if 0 // Find SearchDelta in StackDeltaSet, inserting it if not found. Return whether it was initially found. bool SMPInstr::FindStackPtrDelta(sval_t SearchDelta) const { bool found = (this->StackDeltaSet.find(SearchDelta) != this->StackDeltaSet.end()); if (!found) { this->StackDeltaSet.insert(SearchDelta); if (SearchDelta < this->StackPtrOffset) { // Mimic IDA Pro, which seems to keep the biggest stack frame possible. // With negative stack deltas, this means the smallest stack delta is kept. this->SetStackPtrOffset(SearchDelta); } } return found; } // end of SMPInstr::FindStackPtrDelta() #endif // Set the type of all immediate operands found in the USE set. // Set all flags and floating point register USEs and DEFs to NUMERIC also. void SMPInstr::SetImmedTypes(bool UseFP) { set<DefOrUse, LessDefUse>::iterator CurrUse; set<DefOrUse, LessDefUse>::iterator CurrDef; op_t UseOp; op_t DefOp; uval_t ImmVal; bool DebugFlag = false; #if SMP_VERBOSE_DEBUG_BUILD_RTL DebugFlag = DebugFlag || (this->address == 0x805cd52) || (this->address == 0x805cd56); DebugFlag |= (0 == strncmp("__libc_csu_fini", this->BasicBlock->GetFunc()->GetFuncName(), 15)); #endif CurrUse = this->GetFirstUse(); while (CurrUse != this->GetLastUse()) { UseOp = CurrUse->GetOp(); if (DebugFlag) { SMP_msg("SetImmedTypes USE: "); PrintOperand(UseOp); SMP_msg("\n"); } if (o_imm == UseOp.type) { ImmVal = UseOp.value; if (IsImmedGlobalAddress((ea_t) ImmVal)) { if (DebugFlag) SMP_msg("Setting to GLOBALPTR\n"); CurrUse = this->SetUseType(UseOp, GLOBALPTR); } #if 0 else if (IsDataAddress((ea_t) ImmVal)) { // NOTE: We must call IsDataAddress() before we call IsImmedCodeAddress() // to catch the data addresses within the code address range. if (DebugFlag) SMP_msg("Setting to POINTER\n"); CurrUse = this->SetUseType(UseOp, POINTER); } #endif else if (this->MDIsInterruptCall() || IsImmedCodeAddress((ea_t) ImmVal)) { if (DebugFlag) SMP_msg("Setting to CODEPTR\n"); CurrUse = this->SetUseType(UseOp, CODEPTR); } else { // NUMERIC if (DebugFlag) SMP_msg("Setting to NUMERIC\n"); CurrUse = this->SetUseType(UseOp, NUMERIC); } } else if (o_reg == UseOp.type) { if (UseOp.is_reg(X86_FLAGS_REG)) { if (DebugFlag) SMP_msg("Setting flags reg to NUMERIC\n"); CurrUse = this->SetUseType(UseOp, NUMERIC); } #if 1 else if (UseOp.is_reg(R_sp) || (UseFP && UseOp.is_reg(R_bp))) { if (DebugFlag) SMP_msg("Setting reg to STACKPTR\n"); CurrUse = this->SetUseType(UseOp, STACKPTR); } #endif } #if 0 // could these registers have pointers in them? else if ((o_trreg == UseOp.type) ||(o_dbreg == UseOp.type) || (o_crreg == UseOp.type)) { if (DebugFlag) SMP_msg("Setting special reg to NUMERIC\n"); CurrUse = this->SetUseType(UseOp, NUMERIC); } #endif else if ((o_fpreg == UseOp.type) || (o_mmxreg == UseOp.type) || (o_xmmreg == UseOp.type)) { if (DebugFlag) SMP_msg("Setting floating point reg to NUMERIC\n"); CurrUse = this->SetUseType(UseOp, NUMERIC); } else if ((o_mem == UseOp.type) || (o_phrase == UseOp.type) || (o_displ == UseOp.type)) { // For memory operands, we need to identify the POINTER value that // is used in the addressing mode, if possible. (void) this->MDFindPointerUse(UseOp, UseFP); } ++CurrUse; } // end while all USEs via CurrUse CurrDef = this->GetFirstDef(); while (CurrDef != this->GetLastDef()) { DefOp = CurrDef->GetOp(); if (DebugFlag) { SMP_msg("SetImmedTypes DEF: "); PrintOperand(DefOp); SMP_msg("\n"); } if (DebugFlag) SMP_msg("FuncName: %s\n", this->BasicBlock->GetFunc()->GetFuncName()); if (o_reg == DefOp.type) { if (DefOp.is_reg(X86_FLAGS_REG)) { if (DebugFlag) SMP_msg("Setting flags reg DEF to NUMERIC\n"); CurrDef = this->SetDefType(DefOp, NUMERIC); // No need to propagate this DEF type, as all flags will become NUMERIC. } #if 1 else if (DefOp.is_reg(R_sp) || (DefOp.is_reg(R_bp) && UseFP)) { if (DebugFlag) SMP_msg("Setting reg DEF to STACKPTR\n"); CurrDef = this->SetDefType(DefOp, STACKPTR); assert(CurrDef != this->Defs.GetLastRef()); // No need to propagate; all stack and frame pointers will become STACKPTR. } #endif } else if ((o_fpreg == DefOp.type) || (o_mmxreg == DefOp.type) || (o_xmmreg == DefOp.type)) { if (DebugFlag) SMP_msg("Setting floating point reg DEF to NUMERIC\n"); CurrDef = this->SetDefType(DefOp, NUMERIC); // No need to propagate; all FP reg uses will become NUMERIC anyway. } #if 0 // could these registers have pointers in them? else if ((o_trreg == DefOp.type) || (o_dbreg == DefOp.type) || (o_crreg == DefOp.type)) { if (DebugFlag) SMP_msg("Setting special reg DEF to NUMERIC\n"); CurrDef = this->SetDefType(DefOp, NUMERIC); } #endif else if ((o_mem == DefOp.type) || (o_phrase == DefOp.type) || (o_displ == DefOp.type)) { // For memory operands, we need to identify the POINTER value that // is used in the addressing mode, if possible. (void) this->MDFindPointerUse(DefOp, UseFP); } ++CurrDef; } // end while all DEFs via CurrDef return; } // end of SMPInstr::SetImmedTypes() // Is the instruction a load from the stack? void SMPInstr::MDFindLoadFromStack(bool UseFP) { set<DefOrUse, LessDefUse>::iterator UseIter; op_t UseOp; if ((3 == this->OptType) && (this->HasSourceMemoryOperand())) { // Loads and stores are OptCategory 3. We want only loads from the stack. for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) { UseOp = UseIter->GetOp(); if (MDIsStackAccessOpnd(UseOp, UseFP)) { this->SetLoadFromStack(); break; } } } return; } // end of SMPInstr::MDFindLoadFromStack() // Determine if instr is inherently signed load instruction. // True if sign or zero-extended; pass out mask bits if true. bool SMPInstr::MDIsSignedLoad(unsigned short &SignMask) { unsigned short opcode = this->SMPcmd.itype; if (NN_movzx == opcode) { SignMask = FG_MASK_UNSIGNED; } else if (NN_movsx == opcode) { SignMask = FG_MASK_SIGNED; } else { return false; } return true; } // Infer sign, bit width, other type info for simple cases where all the info needed is // within the instruction or can be read from the FineGrainedStackTable in the SMPFunction. // NOTE: Must be called after SSA analysis is complete. void SMPInstr::MDSetWidthSignInfo(bool UseFP) { set<DefOrUse, LessDefUse>::iterator UseIter; set<DefOrUse, LessDefUse>::iterator DefIter; op_t UseOp, DefOp; struct FineGrainedInfo FGEntry; bool ValueWillChange; unsigned short SignMask, TempSign, WidthMask; int DefHashValue, UseHashValue; ea_t DefAddr; // for flags USE in conditional set int SSANum; // for flags USE in conditional set bool LocalFlags; // is flags register a local name? bool case1, case2, case3, case4, case5, case6; bool SignedSetOpcode = this->MDIsSignedSetValue(); bool UnsignedSetOpcode = this->MDIsUnsignedSetValue(); case1 = this->IsLoadFromStack(); case2 = this->MDIsSignedLoad(SignMask); // sets value of SignMask if it returns true case3 = (7 == this->OptType); // Multiplies and divides case4 = ((CALL == this->GetDataFlowType()) || (INDIR_CALL == this->GetDataFlowType())); case5 = (SignedSetOpcode || UnsignedSetOpcode); // set boolean based on flag condition case6 = this->MDDoublesWidth(); // convert byte to word, word to dword, etc. // Case 1: Load from stack location. if (case1) { bool success = false; for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) { UseOp = UseIter->GetOp(); if (MDIsStackAccessOpnd(UseOp, UseFP)) { // Found the stack location being loaded into a register. Now we need // to get the sign and width info from the fine grained stack frame // analysis. success = this->GetBlock()->GetFunc()->MDGetFGStackLocInfo(this->address, UseOp, FGEntry); assert(success); // Now we have signedness info in FGEntry. We need to OR it into the register target of the load. if (FGEntry.SignMiscInfo == 0) break; // nothing to OR in; save time for (DefIter = this->GetFirstDef(); DefIter != this->GetLastDef(); ++DefIter) { DefOp = DefIter->GetOp(); if (o_reg == DefOp.type) { DefOp.reg = MDCanonicalizeSubReg(DefOp.reg); TempSign = FGEntry.SignMiscInfo & FG_MASK_SIGNEDNESS_BITS; // Get both sign bit flags DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum()); if (this->BasicBlock->IsLocalName(DefOp)) { this->BasicBlock->UpdateDefSignMiscInfo(DefHashValue, TempSign); } else { this->BasicBlock->GetFunc()->UpdateDefSignMiscInfo(DefHashValue, TempSign); } break; // Should be only one register target for stack load, and no flags are set. } } break; // Only concerned with the stack operand } } assert(success); } // end if this->IsLoadFromStack() // Case 2: Loads that are sign-extended or zero-extended imply signed and unsigned, respectively. // NOTE: If from the stack, they were handled in Case 1, and the signedness of the stack location // was recorded a long time ago in SMPFunction::FindOutgoingArgsSize(); else if (case2) { DefIter = this->GetFirstDef(); while (DefIter != this->GetLastDef()) { // All non-memory DEFs besides the flags register should get the new SignMask ORed in. // On x86, there should only be one DEF for this move, and no flags, but we will generalize // in case other architectures are odd. DefOp = DefIter->GetOp(); if (!(IsMemOperand(DefOp) || MDIsFlagsReg(DefOp))) { DefOp.reg = MDCanonicalizeSubReg(DefOp.reg); DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum()); if (this->BasicBlock->IsLocalName(DefOp)) { this->BasicBlock->UpdateDefSignMiscInfo(DefHashValue, SignMask); } else { this->BasicBlock->GetFunc()->UpdateDefSignMiscInfo(DefHashValue, SignMask); } } ++DefIter; } // If the signed load is from memory, the only USEs are the memory // operand and addressing registers. We do not want to claim that // EBX is signed in the instruction movsx eax,[ebx]. Only the DEF // register EAX and the memory location [EBX] are signed, and we // have no idea where [EBX] is, so we punt on all USEs if we have // a memory source operand. if (!(this->HasSourceMemoryOperand())) { UseIter = this->GetFirstUse(); while (UseIter != this->GetLastUse()) { // All non-memory USEs besides the flags register should get the new SignMask ORed in. UseOp = UseIter->GetOp(); if (!(IsMemOperand(UseOp) || MDIsFlagsReg(UseOp))) { UseOp.reg = MDCanonicalizeSubReg(UseOp.reg); UseHashValue = HashGlobalNameAndSSA(UseOp, UseIter->GetSSANum()); if (this->BasicBlock->IsLocalName(UseOp)) { this->BasicBlock->UpdateUseSignMiscInfo(UseHashValue, SignMask); } else { this->BasicBlock->GetFunc()->UpdateUseSignMiscInfo(UseHashValue, SignMask); } } ++UseIter; } } } // end of case 2 // Case 3: multiplies and divides can be signed or unsigned. else if (case3) { // Multiplies and divides are type 7. if (this->MDIsSignedArithmetic()) { SignMask = FG_MASK_SIGNED; } else if (this->MDIsUnsignedArithmetic()) { SignMask = FG_MASK_UNSIGNED; } else { SignMask = 0; // unknown, uninitialized } if (0 != SignMask) { DefIter = this->GetFirstDef(); while (DefIter != this->GetLastDef()) { // All DEFs besides the flags register should get the new SignMask ORed in. DefOp = DefIter->GetOp(); if ((DefOp.type == o_reg) && (!(DefOp.is_reg(X86_FLAGS_REG)))) { DefOp.reg = MDCanonicalizeSubReg(DefOp.reg); DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum()); if (this->BasicBlock->IsLocalName(DefOp)) { this->BasicBlock->UpdateDefSignMiscInfo(DefHashValue, SignMask); } else { this->BasicBlock->GetFunc()->UpdateDefSignMiscInfo(DefHashValue, SignMask); } } ++DefIter; } UseIter = this->GetFirstUse(); while (UseIter != this->GetLastUse()) { // All USEs besides the flags register should get the new SignMask ORed in. UseOp = UseIter->GetOp(); if ((UseOp.type == o_reg) && (!(UseOp.is_reg(X86_FLAGS_REG)))) { UseOp.reg = MDCanonicalizeSubReg(UseOp.reg); UseHashValue = HashGlobalNameAndSSA(UseOp, UseIter->GetSSANum()); if (this->BasicBlock->IsLocalName(UseOp)) { this->BasicBlock->UpdateUseSignMiscInfo(UseHashValue, SignMask); } else { this->BasicBlock->GetFunc()->UpdateUseSignMiscInfo(UseHashValue, SignMask); } } ++UseIter; } } // end if (0 != SignMask) } // end of case 3 (multiplies and divides) // Case 4: Calls to library functions can reveal the type of the return register. else if (case4) { // Get name of function called. string FuncName = this->GetTrimmedCalledFunctionName(); // Get FG info, if any, for called function. GetLibFuncFGInfo(FuncName, FGEntry); // See if anything was returned in FGEntry. if ((FGEntry.SignMiscInfo != 0) || (FGEntry.SizeInfo != 0)) { // Need to update the FG info for the DEF of the return register. DefOp = InitOp; DefOp.type = o_reg; DefOp.reg = MD_RETURN_VALUE_REG; DefIter = this->FindDef(DefOp); assert(DefIter != this->GetLastDef()); DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum()); if (this->BasicBlock->IsLocalName(DefOp)) { this->BasicBlock->UpdateDefFGInfo(DefHashValue, FGEntry); } else { this->BasicBlock->GetFunc()->UpdateDefFGInfo(DefHashValue, FGEntry); } } } // end of case4 (function calls) else if (case5) { // signed or unsigned conditional set opcode if (UnsignedSetOpcode) { SignMask = FG_MASK_UNSIGNED; } else { assert(SignedSetOpcode); SignMask = FG_MASK_SIGNED; } // Find the flags USE. UseOp.type = o_reg; // set up a dummy op for searching UseOp.reg = X86_FLAGS_REG; UseIter = this->FindUse(UseOp); assert(UseIter != this->GetLastUse()); UseOp = UseIter->GetOp(); // get full info in all fields of UseOp SSANum = UseIter->GetSSANum(); LocalFlags = this->GetBlock()->IsLocalName(UseOp); DefAddr = this->GetBlock()->GetDefAddrFromUseAddr(UseOp, this->GetAddr(), SSANum, LocalFlags); // Pass DefAddr to recursive helper function to propagate signedness of the set opcode. this->GetBlock()->PropagateBranchSignedness(DefAddr, UseOp, SignMask); } else if (case6) { // sign extend to double the width of USE operand into DEF operand DefIter = this->GetFirstNonFlagsDef(); assert(DefIter != this->GetLastDef()); DefOp = DefIter->GetOp(); assert(o_reg == DefOp.type); DefOp.reg = MDCanonicalizeSubReg(DefOp.reg); SSANum = DefIter->GetSSANum(); DefHashValue = HashGlobalNameAndSSA(DefOp, SSANum); UseIter = this->GetFirstUse(); assert(UseIter != this->GetLastUse()); UseOp = UseIter->GetOp(); assert(o_reg == UseOp.type); UseOp.reg = MDCanonicalizeSubReg(UseOp.reg); assert(UseOp.reg == DefOp.reg); UseHashValue = HashGlobalNameAndSSA(UseOp, UseIter->GetSSANum()); SignMask = FG_MASK_SIGNED; // opcodes do sign extension => signed // Mark DEF and USE as signed. if (this->GetBlock()->IsLocalName(DefOp)) { this->GetBlock()->UpdateDefSignMiscInfo(DefHashValue, SignMask); this->GetBlock()->UpdateUseSignMiscInfo(UseHashValue, SignMask); } else { this->GetBlock()->GetFunc()->UpdateDefSignMiscInfo(DefHashValue, SignMask); this->GetBlock()->GetFunc()->UpdateUseSignMiscInfo(UseHashValue, SignMask); } } // For all register DEFs and USEs, we should get the obvious register width info // updated. Need to use the RTL operands to get accurate widths. SMPRegTransfer *CurrRT; for (size_t index = 0; index < this->RTL.GetCount(); ++index) { CurrRT = this->RTL.GetRT(index); DefOp = CurrRT->GetLeftOperand(); // Avoid setting def width for case 2; we leave it as zero so that // later uses can determine whether the zero-extension or sign-extension // bits ever got used. See more discussion in EmitIntegerErrorAnnotations() // for the CHECK TRUNCATION case. // NOTE: case2 can be set to true even in the case1/case2 overlap case that // only passes through the case1 code above. This is intentional. We want // to leave the DEF width set to 0 for all of case2 including the case1 overlap. if (!case2) { if (MDIsGeneralPurposeReg(DefOp)) { WidthMask = ComputeOperandBitWidthMask(DefOp, 0); DefOp.reg = MDCanonicalizeSubReg(DefOp.reg); DefIter = this->FindDef(DefOp); assert(DefIter != this->GetLastDef()); DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum()); if (this->BasicBlock->IsLocalName(DefOp)) { this->BasicBlock->UpdateDefWidthTypeInfo(DefHashValue, WidthMask); } else { this->BasicBlock->GetFunc()->UpdateDefWidthTypeInfo(DefHashValue, WidthMask); } } } if (CurrRT->HasRightSubTree()) { this->MDSetRTLRegWidthInfo(CurrRT->GetRightTree()); } else { UseOp = CurrRT->GetRightOperand(); this->SetRTLUseOpRegWidthInfo(UseOp); } } // end for all RTLs return; } // end of SMPInstr::MDSetWidthSignInfo() // Infer sign from the SMP types for USEs and DEFs. void SMPInstr::InferSignednessFromSMPTypes(bool UseFP) { // Start with registers only, infer that all kids of pointers are UNSIGNED. set<DefOrUse, LessDefUse>::iterator DefIter, UseIter; op_t DefOp, UseOp; int SSANum; int DefHashValue, UseHashValue; SMPOperandType DefType, UseType; unsigned short DefSignMiscInfo = FG_MASK_UNSIGNED, UseSignMiscInfo = FG_MASK_UNSIGNED; bool GlobalName; for (DefIter = this->GetFirstDef(); DefIter != this->GetLastDef(); ++DefIter) { DefOp = DefIter->GetOp(); if (MDIsGeneralPurposeReg(DefOp)) { DefType = DefIter->GetType(); if (IsDataPtr(DefType) || (CODEPTR == DefType)) { GlobalName = this->BasicBlock->GetFunc()->IsGlobalName(DefOp); SSANum = DefIter->GetSSANum(); DefHashValue = HashGlobalNameAndSSA(DefOp, SSANum); if (GlobalName) { this->BasicBlock->GetFunc()->UpdateDefSignMiscInfo(DefHashValue, DefSignMiscInfo); } else { this->BasicBlock->UpdateDefSignMiscInfo(DefHashValue, DefSignMiscInfo); } } } } for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) { UseOp = UseIter->GetOp(); if (MDIsGeneralPurposeReg(UseOp)) { UseType = UseIter->GetType(); if (IsDataPtr(UseType) || (CODEPTR == UseType)) { GlobalName = this->BasicBlock->GetFunc()->IsGlobalName(UseOp); SSANum = UseIter->GetSSANum(); UseHashValue = HashGlobalNameAndSSA(UseOp, SSANum); if (GlobalName) { this->BasicBlock->GetFunc()->UpdateUseSignMiscInfo(UseHashValue, UseSignMiscInfo); } else { this->BasicBlock->UpdateUseSignMiscInfo(UseHashValue, UseSignMiscInfo); } } } } return; } // end of SMPInstr::InferSignednessFromSMPTypes() // Helper to set width info for a UseOp from an RTL void SMPInstr::SetRTLUseOpRegWidthInfo(op_t UseOp) { unsigned short WidthMask; set<DefOrUse, LessDefUse>::iterator UseIter; unsigned int UseHashValue; if (MDIsGeneralPurposeReg(UseOp)) { WidthMask = ComputeOperandBitWidthMask(UseOp, 0); UseOp.reg = MDCanonicalizeSubReg(UseOp.reg); UseIter = this->FindUse(UseOp); assert(UseIter != this->GetLastUse()); UseHashValue = HashGlobalNameAndSSA(UseOp, UseIter->GetSSANum()); if (this->BasicBlock->IsLocalName(UseOp)) { this->BasicBlock->UpdateUseWidthTypeInfo(UseHashValue, WidthMask); } else { this->BasicBlock->GetFunc()->UpdateUseWidthTypeInfo(UseHashValue, WidthMask); } } return; } // end of SMPInstr::SetRTLUseOpRegWidthInfo() // Walk the RTL and update the register USE operands' width info. void SMPInstr::MDSetRTLRegWidthInfo(SMPRegTransfer *CurrRT) { op_t UseOp; UseOp = CurrRT->GetLeftOperand(); this->SetRTLUseOpRegWidthInfo(UseOp); if (CurrRT->HasRightSubTree()) { this->MDSetRTLRegWidthInfo(CurrRT->GetRightTree()); } else { UseOp = CurrRT->GetRightOperand(); this->SetRTLUseOpRegWidthInfo(UseOp); } return; } // end of SMPInstr::MDSetRTLRegWidthInfo() // Do we not consider truncation on this type of instruction to be an error? bool SMPInstr::IsBenignTruncation(void) { bool benign = false; unsigned short SignMask; op_t UseOp, SearchOp; if (3 == this->GetOptType()) { // Move instruction bool ExtendedLoad = this->MDIsSignedLoad(SignMask); if (ExtendedLoad) { if (SignMask & FG_MASK_UNSIGNED) { // We have a zero-extended load. Compilers zero-extend both // signed (unfortunately) and unsigned sub-regs when they know // from the source language types that only the lower bits matter, // e.g. when a char has been stored in the lower bits and regardless // of whether that char was sign-extended or zero-extended previously, // only the char itself is useful info. Otherwise, the compiler could // move the whole register, e.g. instead of edi := zero-extend(cx), the // compiler could have generated edi := ecx. The zero-extension loads // are therefore not good candidates for truncation checks, as they lead // to lots of false positives. benign = true; #if SMP_MEASURE_NUMERIC_ANNOTATIONS ++BenignTruncationCount; #endif } } else { // Move, and not extended load, which was handled above. // Next case: A move instruction whose USE falsely appears to be a truncation, // but in fact the apparently unused portion of the register is used later, e.g.: // mov [ebp-12],ax ; looks like EAX is being truncated to AX and stored // shr eax,16 ; gets upper 16 bits into lower 16 bits // mov [ebp-14],ax ; store what used to be the upper 16 bits of EAX // The first instruction will trigger a CHECK TRUNCATION annotation that // causes false positives. We need to analyze the context of the instruction // to see that the whole register EAX was used, so no truncation occurred. // The context analysis in the basic block will mark the second move as // a "truncation" that should be ignored, so we check the flag here to short // circuit redundant analysis. UseOp = this->GetMoveSource(); assert(o_void != UseOp.type); SearchOp = UseOp; if (o_reg == UseOp.type) { SearchOp.reg = MDCanonicalizeSubReg(UseOp.reg); } set<DefOrUse, LessDefUse>::iterator UseIter = this->FindUse(SearchOp); assert(UseIter != this->GetLastUse()); if (UseIter->DoesNotTruncate()) { benign = true; #if SMP_MEASURE_NUMERIC_ANNOTATIONS ++SuppressTruncationRegPiecesAllUsed; #endif } else { set<DefOrUse, LessDefUse>::iterator DefIter = this->GetFirstNonFlagsDef(); assert(DefIter != this->GetLastDef()); int DefSSANum = DefIter->GetSSANum(); benign = this->GetBlock()->IsBenignTruncationDEF(DefIter->GetOp(), DefSSANum, this->GetAddr()); } } } return benign; } // end of SMPInstr::IsBenignTruncation() // Do we not consider overflow or underflow on this type of instruction to be an error? bool SMPInstr::IsBenignOverflow(void) { bool benign = false; set<DefOrUse, LessDefUse>::iterator DefIter; SMPOperandType DefType; int DefSSANum; ea_t DefAddr; op_t DefOp; if (this->MDIsDefiniteBenignUnderflowOpcode()) { // No further analysis at block or function scope is needed. benign = true; } else if (this->MDIsMaybeBenignUnderflowOpcode()) { // might have the subtract instruction // We are looking to suppress overflow and underflow warnings on the following // code sequence: PTR1-PTR2+1 gets a loop invariant code motion optimization // that pulls temp := 1-PTR2 out of the loop, and leaves temp2 := PTR1+temp // inside the loop. The hoisted subtraction could underflow, and the addition // that is not hoisted could overflow. The net effect of these two instructions // is benign, however, so we want to suppress underflow and overflow checks on // both of them, but only if we can match the pair of instructions. DefIter = this->GetFirstNonFlagsDef(); assert(DefIter != this->GetLastDef()); if (DefIter->DoesNotOverflow()) { benign = true; // short circuit; already analyzed } else { DefType = DefIter->GetType(); if (IsEqType(DefType, NEGATEDPTR)) { // We have candidate subtract instruction DefSSANum = DefIter->GetSSANum(); DefAddr = this->GetAddr(); DefOp = DefIter->GetOp(); benign = this->GetBlock()->GetFunc()->IsBenignUnderflowDEF(DefOp, DefSSANum, DefAddr); } } } else if (this->MDIsMaybeBenignOverflowOpcode()) { // might have the add instruction DefIter = this->GetFirstNonFlagsDef(); assert(DefIter != this->GetLastDef()); if (DefIter->DoesNotOverflow()) { benign = true; // short circuit; already analyzed } else { // Bad luck to encounter addition first. See what types tell us. if (this->HasNegatedPtrUSE()) { // We have candidate for addition. DefType = DefIter->GetType(); if (IsEqType(DefType, PTROFFSET)) { // Current instruction is definitely the addition instruction of some // benignly underflowing and overflowing pair of instructions. It does // not really matter that we have not found the subtraction yet; we will // get to it eventually. We should suppress overflow checks on this inst. DefOp = DefIter->GetOp(); this->Defs.SetNoOverflow(DefOp, true); benign = true; } else { // A bit of a quandary. Ideally, we would have successful SMP type inference // and always have PTROFFSET := POINTER + NEGATEDPTR. However, sometimes we // might have ?? := ?? + NEGATEDPTR. The instruction could be of the type // NEGATEDPTR := NUMERIC + NEGATEDPTR, so we cannot just assume here that // we have detected the benign case. We will be safe and do nothing. ; } } } } return benign; } // end of SMPInstr::IsBenignOverflow() // Do we detect a situation in which it is safe to check for signedness errors on // the stack write (return false), or not (return true to be safe). bool SMPInstr::SkipSignednessCheckOnStackWrite(int DefSSANum) { bool SkipCheck = true; op_t StackDefOp = this->DEFMemOp; size_t DefBitWidth = 8 * GetOpDataSize(StackDefOp); if (DefBitWidth < MD_NORMAL_MACHINE_BITWIDTH) { // If we are not dealing with a shortened bit width, then // the next load from the stack will not be sign-extended // or zero-extended. if (this->GetBlock()->IsStackOpNextUsedWithSignedness(StackDefOp, this->GetAddr(), DefSSANum)) { SkipCheck = false; } } return SkipCheck; } // end of SMPInstr::SkipSignednessCheckOnStackWrite() // Does inst pass an outgoing argument? bool SMPInstr::MDIsArgumentPass(void) { bool OutArgPass = false; // Current model is writing outargs to stack. For other compiler targets in the // future, we would also include pushes onto the stack. if (this->HasDestMemoryOperand() && (this->GetOptType() == 3)) { // move to memory if (this->GetBlock()->GetFunc()->OutArgsRegionComputed()) { op_t DefOp = this->DEFMemOp; OutArgPass = this->GetBlock()->GetFunc()->IsInOutgoingArgsRegion(DefOp); } } return OutArgPass; } // Trace UseOp through register moves back to its stack location or immediate value source. // Return true if we are passing an immediate or stack location back in UltSource. bool SMPInstr::TraceUltimateMoveSource(op_t UseOp, int UseSSANum, op_t &UltSource) { // If we hit an immediate value or a stack location, we are done. bool UseFP = this->GetBlock()->GetFunc()->UsesFramePointer(); op_t NewUseOp; // next UseOp up the move chain op_t DefOp, ImmOp; int NewUseSSANum; set<DefOrUse,LessDefUse>::iterator UseIter; bool LocalName; ea_t DefAddr; SMPInstr *DefInst; UltSource = InitOp; bool StackOp = MDIsStackAccessOpnd(UseOp, UseFP); bool RegisterOp = (UseOp.type == o_reg); if (this->GetOptType() == 3) { // move instruction if (UseOp.type == o_imm) { UltSource = UseOp; return true; } else if ((!RegisterOp) && (!StackOp)) { // We only trace the move chain through registers or stack locations to an ultimate // load-effective-address of a stack location, or a move of an immediate value. return false; } } else if (!this->MDIsLoadEffectiveAddressInstr()) { return false; } else { // Load effective address instruction. // If it is a stack location being loaded, trace succeeded, else it failed. if (StackOp) { UltSource = UseOp; return true; } else { return false; } } // If we reach this point, we have a move instruction but did not return true or false above. // Recursion case. Going back up the move chain has just produced a register or // a stack location, and we still need to find the stack address or immediate value // that was stored in the register or stack location. The stack location could hold // a pointer to a stack object, produced by an earlier LEA instruction, or it // could hold an immediate value (e.g. constant size argument passed to memset() or // similar function). LocalName = this->GetBlock()->IsLocalName(UseOp); DefAddr = this->GetBlock()->GetDefAddrFromUseAddr(UseOp, this->GetAddr(), UseSSANum, LocalName); if ((BADADDR == DefAddr) || (DefAddr < (this->GetBlock()->GetFunc()->GetStartAddr() - 1))) { // Def was not found, or was found in Phi function (DefAddr was block number, not instruction addr). return false; } if (DefAddr < (this->GetBlock()->GetFirstAddr())) { // If DefAddr is 1 byte less than the first addr in the block, then // it is a pseudo-def in the global DU chains, signifying that the // value was LiveIn and the true DEF is in another block. We could // handle this in the future, but right now we will only deal with // the simpler case in which the move source can be traced // within the basic block. return false; } // Everything is OK so far; time to recurse up the move chain. DefInst = this->GetBlock()->GetFunc()->GetInstFromAddr(DefAddr); if (DefInst->GetOptType() == 3) { NewUseOp = DefInst->GetMoveSource(); } else if (DefInst->MDIsLoadEffectiveAddressInstr()) { NewUseOp = DefInst->GetLeaMemUseOp(); if (MDIsStackAccessOpnd(NewUseOp, UseFP)) { UltSource = NewUseOp; return true; } else { return false; } } // We don't have a move instruction or a load effective address instruction, which // can be used to move a stack address into a register. We don't try to trace through // arithmetic except for two easy cases. // Case 1: A register is cleared. Same as assigning immediate value zero to the reg. else if (DefInst->IsRegClearIdiom()) { UltSource.type = o_imm; UltSource.value = 0; // why would we memset a zero byte region? return true; } // Easy arithmetic Case 2: we have reg += ImmediateValue, and reg was DEFed by reg := LEA(StackLoc). else if (DefInst->MDIsAddImmediateToReg(DefOp, ImmOp)) { SMPInstr *NewDefInst; UseIter = DefInst->FindUse(DefOp); assert(UseIter != DefInst->GetLastUse()); NewUseSSANum = UseIter->GetSSANum(); LocalName = DefInst->GetBlock()->IsLocalName(DefOp); DefAddr = DefInst->GetBlock()->GetDefAddrFromUseAddr(DefOp, DefInst->GetAddr(), NewUseSSANum, LocalName); if ((BADADDR == DefAddr) || (DefAddr < (DefInst->GetBlock()->GetFunc()->GetStartAddr() - 1))) { // Def was not found, or was found in Phi function (DefAddr was block number, not instruction addr). return false; } NewDefInst = DefInst->GetBlock()->GetFunc()->GetInstFromAddr(DefAddr); if (NewDefInst->MDIsLoadEffectiveAddressInstr()) { NewUseOp = NewDefInst->GetLeaMemUseOp(); if (MDIsStackAccessOpnd(NewUseOp, UseFP)) { // We have the code sequence we were searching for when we first saw the // addition of an immediate value to a register, e.g.: // lea ebx,[ebp-2000] // add ebx,1000 // // We are essentially making this sequence into a single instruction: // lea ebx,[ebp-1000] // by adding the immediate value to the address offset. With a stack that grows // downward, it does not matter if we add 1000 to [esp+500] to produce [esp+1500], // or we add 1000 to [ebp-2000] to make [ebp-1000]. Either way, we are simulating the // addition of 1000 as we move up in the stack frame. NewUseOp.addr += ImmOp.value; // perform the address arithmetic addition UltSource = NewUseOp; return true; } else { return false; } } else { return false; } } else { // Not the kind of instruction we need; cut short the recursion. return false; } // NewUseOp is the move source operand that we seek. UseIter = DefInst->FindUse(NewUseOp); assert(UseIter != DefInst->GetLastUse()); NewUseSSANum = UseIter->GetSSANum(); // unused for immediates, used for regs and stack // Recurse return DefInst->TraceUltimateMoveSource(NewUseOp, NewUseSSANum, UltSource); } // end of SMPInstr::TraceUltimateMoveSource() // Infer DEF, USE, and RTL SMPoperator types within the instruction based on the type // of operator, the type category of the instruction, and the previously known types // of the operands. bool SMPInstr::InferTypes(void) { bool changed = false; // return value int SSANum; int TypeCategory = SMPTypeCategory[this->SMPcmd.itype]; set<DefOrUse, LessDefUse>::iterator CurrDef; set<DefOrUse, LessDefUse>::iterator CurrUse; op_t DefOp = InitOp, UseOp = InitOp; bool DebugFlag = false; bool UseFP = this->BasicBlock->GetFunc()->UsesFramePointer(); bool SafeFunc = this->BasicBlock->GetFunc()->IsSafe(); bool IsMemOp; #if SMP_VERBOSE_DEBUG_INFER_TYPES DebugFlag |= (0 == strcmp("InputMove", this->BasicBlock->GetFunc()->GetFuncName())); #endif if (DebugFlag) { SMP_msg("opcode: %d TypeCategory: %d\n", this->SMPcmd.itype, TypeCategory); } // If we are already finished with all types, return false. if (this->IsTypeInferenceComplete()) return false; if (this->AllDEFsTyped() && this->AllUSEsTyped()) { this->SetTypeInferenceComplete(); return false; } if (this->HasDestMemoryOperand()) { changed |= this->MDFindPointerUse(this->MDGetMemDefOp(), UseFP); } if (this->HasSourceMemoryOperand()) { changed |= this->MDFindPointerUse(this->MDGetMemUseOp(), UseFP); } // The control flow instructions can be handled simply based on their type // and do not need an RTL walk. SMPitype DFAType = this->GetDataFlowType(); bool CallInst = ((DFAType == CALL) || (DFAType == INDIR_CALL) || this->IsTailCall()); ushort IndirCallReg = R_none; if (DebugFlag) { SMP_msg("DFAType: %d CategoryInferenceComplete: %d\n", DFAType, this->IsCategoryInferenceComplete()); } if (DFAType == INDIR_CALL) { op_t TargetOp = this->SMPcmd.Operands[0]; if (TargetOp.type == o_reg) IndirCallReg = TargetOp.reg; } if (((DFAType >= JUMP) && (DFAType <= INDIR_CALL)) || this->IsTailCall()) { // All USEs are either the flags (NUMERIC) or the target address (CODEPTR). // The exceptions are the USE list for interrupt calls, which includes // the caller-saved regs, and indirect calls through a memory // operand, such as call [ebx+esi+20h], where the memory operand // is a CODEPTR but the addressing registers are a BaseReg and // IndexReg as in any other memory addressing, and the saved // regs on any call. CurrUse = this->GetFirstUse(); while (CurrUse != this->GetLastUse()) { UseOp = CurrUse->GetOp(); if (UseOp.is_reg(X86_FLAGS_REG)) CurrUse = this->SetUseType(UseOp, NUMERIC); else if ((CurrUse->GetType() != CODEPTR) && (!(this->MDIsInterruptCall() && (o_reg == UseOp.type))) && (!(CallInst && (o_reg == UseOp.type))) && (!(this->HasSourceMemoryOperand() && (INDIR_CALL == this->GetDataFlowType()) && (o_reg == UseOp.type)))) { CurrUse = this->SetUseType(UseOp, CODEPTR); if (CallInst && (DFAType != INDIR_CALL)) { // If the call is to malloc(), then the DEF of the return // register is of type HEAPPTR. // ****!!!!**** Could have INDIR_CALL resolved to malloc. changed |= this->MDFindMallocCall(UseOp); } } else if ((CurrUse->GetType() != CODEPTR) && CallInst && UseOp.is_reg(IndirCallReg)) { CurrUse = this->SetUseType(UseOp, CODEPTR); } ++CurrUse; } this->SetTypeInferenceComplete(); return true; } // First, see if we can infer something about DEFs and USEs just from the // type category of the instruction. if (!this->IsCategoryInferenceComplete()) { bool MemPropagate = false; switch (TypeCategory) { case 0: // no inference possible just from type category case 1: // no inference possible just from type category case 3: // MOV instructions; inference will come from source to dest in RTL walk. case 5: // binary arithmetic; inference will come in RTL walk. case 10: // binary arithmetic; inference will come in RTL walk. case 11: // push and pop instructions; inference will come in RTL walk. case 12: // exchange instructions; inference will come in RTL walk. this->SetCategoryInferenceComplete(); break; case 2: // Result type is always NUMERIC. case 7: // Result type is always NUMERIC. case 8: // Result type is always NUMERIC. case 9: // Result type is always NUMERIC. case 13: // Result type is always NUMERIC. case 14: // Result type is always NUMERIC. case 15: // Result type is always NUMERIC. CurrDef = this->GetFirstDef(); while (CurrDef != this->GetLastDef()) { if (!IsEqType(NUMERIC, CurrDef->GetType())) { DefOp = CurrDef->GetOp(); SSANum = CurrDef->GetSSANum(); CurrDef = this->SetDefType(DefOp, NUMERIC); changed = true; // Be conservative and only propagate register DEFs and SAFE stack locs. We // can improve this in the future. **!!** bool IsMemOp = (o_reg != DefOp.type); bool MemPropagate = MDIsStackAccessOpnd(DefOp, UseFP); #if SMP_PROPAGATE_MEM_TYPES ; #else // Be conservative and only propagate register DEFs and SAFE stack locs. // We can improve this in the future. **!!** MemPropagate = MemPropagate && SafeFunc; #endif if ((o_reg == DefOp.type) || MemPropagate) { if (this->BasicBlock->IsLocalName(DefOp)) { (void) this->BasicBlock->PropagateLocalDefType(DefOp, NUMERIC, this->GetAddr(), SSANum, IsMemOp); } else { // global name this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false (void) this->BasicBlock->PropagateGlobalDefType(DefOp, NUMERIC, SSANum, IsMemOp); } } } ++CurrDef; } this->SetCategoryInferenceComplete(); break; case 4: // Unary INC, DEC, etc.: dest=source, so type remains the same assert(1 == this->RTL.GetCount()); assert(this->RTL.GetRT(0)->HasRightSubTree()); UseOp = this->RTL.GetRT(0)->GetLeftOperand(); // USE == DEF CurrUse = this->Uses.FindRef(UseOp); assert(CurrUse != this->GetLastUse()); if (UNINIT != CurrUse->GetType()) { // Only one USE, and it has a type assigned, so assign that type // to the DEF. CurrDef = this->GetFirstDef(); while (CurrDef != this->GetLastDef()) { // Two DEFs: EFLAGS is NUMERIC, dest==source DefOp = CurrDef->GetOp(); SSANum = CurrDef->GetSSANum(); if (DefOp.is_reg(X86_FLAGS_REG)) { ; // SetImmedTypes already made it NUMERIC } else { CurrDef = this->SetDefType(DefOp, CurrUse->GetType()); // Be conservative and only propagate register DEFs and SAFE stack locs. We // can improve this in the future. **!!** bool IsMemOp = (o_reg != DefOp.type); MemPropagate = MDIsStackAccessOpnd(DefOp, UseFP); #if SMP_PROPAGATE_MEM_TYPES ; #else // Be conservative and only propagate register DEFs and SAFE stack locs. // We can improve this in the future. **!!** MemPropagate = MemPropagate && SafeFunc; #endif if ((o_reg == DefOp.type) || MemPropagate) { if (this->BasicBlock->IsLocalName(DefOp)) { (void) this->BasicBlock->PropagateLocalDefType(DefOp, CurrUse->GetType(), this->GetAddr(), SSANum, IsMemOp); } else { // global name this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false (void) this->BasicBlock->PropagateGlobalDefType(DefOp, CurrUse->GetType(), SSANum, IsMemOp); } } } ++CurrDef; } this->SetCategoryInferenceComplete(); changed = true; this->SetTypeInferenceComplete(); } break; case 6: // Result is always POINTER DefOp = this->GetFirstDef()->GetOp(); SSANum = this->GetFirstDef()->GetSSANum(); CurrDef = this->SetDefType(DefOp, POINTER); this->SetCategoryInferenceComplete(); changed = true; // Be conservative and only propagate register DEFs and SAFE stack locs. We // can improve this in the future. **!!** IsMemOp = (o_reg != DefOp.type); MemPropagate = MDIsStackAccessOpnd(DefOp, UseFP); #if SMP_PROPAGATE_MEM_TYPES ; #else // Be conservative and only propagate register DEFs and SAFE stack locs. // We can improve this in the future. **!!** MemPropagate = MemPropagate && SafeFunc; #endif if ((o_reg == DefOp.type) || MemPropagate) { if (this->BasicBlock->IsLocalName(DefOp)) { (void) this->BasicBlock->PropagateLocalDefType(DefOp, POINTER, this->GetAddr(), SSANum, IsMemOp); } else { // global name this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false (void) this->BasicBlock->PropagateGlobalDefType(DefOp, POINTER, SSANum, IsMemOp); } } break; default: SMP_msg("ERROR: Unknown type category for %s\n", DisAsmText.GetDisAsm(this->GetAddr())); this->SetCategoryInferenceComplete(); break; } // end switch on TypeCategory } // end if (!CategoryInference) // Walk the RTL and infer types based on operators and operands. if (DebugFlag) { SMP_msg("RTcount: %zu\n", this->RTL.GetCount()); } for (size_t index = 0; index < this->RTL.GetCount(); ++index) { SMPRegTransfer *CurrRT = this->RTL.GetRT(index); if (SMP_NULL_OPERATOR == CurrRT->GetOperator()) // nothing to infer continue; if (!(CurrRT->IsTypeInferenceComplete())) { changed |= this->InferOperatorType(CurrRT); } if (DebugFlag) { SMP_msg("returned from InferOperatorType\n"); } } // end for all RTs in the RTL return changed; } // end of SMPInstr::InferTypes() // Infer the type of an operator within an RT based on the types of its operands and // based on the operator itself. Recurse down the tree if necessary. // Return true if the operator type of the RT is updated. bool SMPInstr::InferOperatorType(SMPRegTransfer *CurrRT) { bool updated = false; bool LeftNumeric, RightNumeric, OperNumeric; bool LeftPointer, RightPointer, OperPointer; bool UseFP = this->BasicBlock->GetFunc()->UsesFramePointer(); bool SafeFunc = this->BasicBlock->GetFunc()->IsSafe(); set<DefOrUse, LessDefUse>::iterator CurrDef; set<DefOrUse, LessDefUse>::iterator CurrUse; set<DefOrUse, LessDefUse>::iterator LeftUse; set<DefOrUse, LessDefUse>::iterator RightUse; SMPOperandType LeftType = UNINIT; SMPOperandType RightType = UNINIT; SMPOperandType OperType = UNINIT; op_t UseOp = InitOp, DefOp = InitOp, LeftOp = InitOp, RightOp = InitOp; SMPoperator CurrOp = CurrRT->GetOperator(); bool TypeInferenceFinished = false; #if SMP_VERBOSE_DEBUG_INFER_TYPES bool DebugFlag = false; #if 1 DebugFlag |= (0 == strcmp("InputMove", this->BasicBlock->GetFunc()->GetFuncName())); #endif DebugFlag = DebugFlag || ((this->address == 0x806453b) || (this->address == 0x806453e)); #endif #if SMP_VERBOSE_DEBUG_INFER_TYPES if (DebugFlag) { SMP_msg("Entered InferOperatorType for CurrOp: %d at %x\n", CurrOp, this->GetAddr()); } #endif if (CurrRT->IsTypeInferenceComplete()) { return updated; } switch (CurrOp) { case SMP_NULL_OPERATOR: TypeInferenceFinished = true; break; case SMP_CALL: // CALL instruction if (UNINIT == CurrRT->GetOperatorType()) { CurrRT->SetOperatorType(CODEPTR, this); updated = true; UseOp = CurrRT->GetRightOperand(); CurrUse = this->Uses.FindRef(UseOp); assert(CurrUse != this->GetLastUse()); if (UNINIT == CurrUse->GetType()) { CurrUse = this->SetUseType(UseOp, CODEPTR); } else if (CODEPTR != CurrUse->GetType()) { SMP_msg("WARNING: call target is type %d, setting to CODEPTR at %x in %s\n", CurrUse->GetType(), this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); CurrUse = this->SetUseType(UseOp, CODEPTR); } } TypeInferenceFinished = true; break; case SMP_INPUT: // input from port if (UNINIT == CurrRT->GetOperatorType()) { CurrRT->SetOperatorType(NUMERIC, this); updated = true; } break; case SMP_OUTPUT: // output to port if (UNINIT == CurrRT->GetOperatorType()) { CurrRT->SetOperatorType(NUMERIC, this); updated = true; } break; case SMP_SIGN_EXTEND: case SMP_ZERO_EXTEND: // Should we infer that all operands are NUMERIC? !!!???!!!! break; case SMP_ADDRESS_OF: // take effective address if (UNINIT == CurrRT->GetOperatorType()) { CurrRT->SetOperatorType(POINTER, this); // Left operand is having its address taken, but we cannot infer what its // type is. updated = true; } break; case SMP_U_LEFT_SHIFT: // unsigned left shift case SMP_S_LEFT_SHIFT: // signed left shift case SMP_U_RIGHT_SHIFT: // unsigned right shift case SMP_S_RIGHT_SHIFT: // signed right shift case SMP_ROTATE_LEFT: case SMP_ROTATE_LEFT_CARRY: // rotate left through carry case SMP_ROTATE_RIGHT: case SMP_ROTATE_RIGHT_CARRY: // rotate right through carry case SMP_U_MULTIPLY: case SMP_S_MULTIPLY: case SMP_U_DIVIDE: case SMP_S_DIVIDE: case SMP_U_REMAINDER: case SMP_BITWISE_NOT: // unary operator case SMP_BITWISE_XOR: case SMP_S_COMPARE: // signed compare (subtraction-based) case SMP_U_COMPARE: // unsigned compare (AND-based) case SMP_LESS_THAN: // boolean test operators case SMP_GREATER_THAN: case SMP_LESS_EQUAL: case SMP_GREATER_EQUAL: case SMP_EQUAL: case SMP_NOT_EQUAL: case SMP_LOGICAL_AND: case SMP_LOGICAL_OR: case SMP_UNARY_NUMERIC_OPERATION: // miscellaneous; produces NUMERIC result case SMP_BINARY_NUMERIC_OPERATION: // miscellaneous; produces NUMERIC result case SMP_SYSTEM_OPERATION: // for instructions such as CPUID, RDTSC, etc.; NUMERIC case SMP_UNARY_FLOATING_ARITHMETIC: // all the same to our type system; all NUMERIC case SMP_BINARY_FLOATING_ARITHMETIC: // all the same to our type system; all NUMERIC case SMP_REVERSE_SHIFT_U: // all the same to our type system; all NUMERIC case SMP_SHUFFLE: // all the same to our type system; all NUMERIC case SMP_COMPARE_EQ_AND_SET: // packed compare for equality and set bits; all NUMERIC case SMP_COMPARE_GT_AND_SET: // packed compare for greater-than and set bits; all NUMERIC case SMP_PACK_S: // Pack operands into extended-precision register, signed saturation for loss of precision case SMP_PACK_U: // Pack operands into extended-precision register, unsigned saturation for loss of precision case SMP_AVERAGE_U: // Average of unsigned operands case SMP_MULTIPLY_AND_ADD: // multiply and add (or multiply and accumulate) case SMP_SUM_OF_DIFFS: // sum over two vectors of absolute values of differences of their elements case SMP_INTERLEAVE: // interleave fields from two packed operands; NUMERIC case SMP_CONCATENATE: // all the same to our type system; all NUMERIC if (UNINIT == CurrRT->GetOperatorType()) { CurrRT->SetOperatorType(NUMERIC, this); updated = true; } // Left operand should be NUMERIC if it exists. UseOp = CurrRT->GetLeftOperand(); if (UseOp.type != o_void) { CurrUse = this->Uses.FindRef(UseOp); if (CurrUse == this->GetLastUse()) { SMP_msg("SERIOUS WARNING: Adding missing USE of "); PrintOperand(UseOp); SMP_msg(" at %x in %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); this->Uses.SetRef(UseOp, NUMERIC, -1); updated = true; } else if (UNINIT == CurrUse->GetType()) { CurrUse = this->SetUseType(UseOp, NUMERIC); updated = true; } } // Right operand should be NUMERIC if it exists. if (CurrRT->HasRightSubTree()) { // Recurse into subtree #if SMP_AGGRESSIVE_TYPE_INFERENCE if (UNINIT == CurrRT->GetRightTree()->GetOperatorType()) { CurrRT->GetRightTree()->SetOperatorType(NUMERIC, this); } #endif updated |= this->InferOperatorType(CurrRT->GetRightTree()); } else { UseOp = CurrRT->GetRightOperand(); if (UseOp.type != o_void) { CurrUse = this->Uses.FindRef(UseOp); if (CurrUse == this->GetLastUse()) { SMP_msg("SERIOUS WARNING: Adding missing USE of "); PrintOperand(UseOp); SMP_msg(" at %x in %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); this->Uses.SetRef(UseOp, NUMERIC, -1); updated = true; } else if (UNINIT == CurrUse->GetType()) { CurrUse = this->SetUseType(UseOp, NUMERIC); updated = true; } } } break; case SMP_NEGATE: // unary negation UseOp = CurrRT->GetLeftOperand(); assert(o_void != UseOp.type); CurrUse = this->Uses.FindRef(UseOp); if (CurrUse == this->GetLastUse()) { SMP_msg("SERIOUS WARNING: Adding missing USE of "); PrintOperand(UseOp); SMP_msg(" at %x in %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); updated = true; } else { OperType = CurrRT->GetOperatorType(); LeftType = CurrUse->GetType(); // Only tricky cases are the negation of a POINTER or PTROFFSET. // Negation of PTROFFSET could be inefficient code that computed // PTR1 - PTR2 and later corrected it to PTR2 - PTR1 by negation. // The type remains PTROFFSET. Negating a POINTER could be an unusual // case similar to subtracting a POINTER from a NUMERIC. See comments // in the SMP_ADD case below, and also the SMP_SUBTRACT case. if (LeftType == PTROFFSET) { // Override any prior operator type, in case PTROFFSET was inferred late // in our analysis and the operator was set to NUMERIC. CurrRT->SetOperatorType(PTROFFSET, this); updated = true; } else if (IsDataPtr(LeftType)) { // Override any prior operator type, in case POINTER was inferred late // in our analysis and the operator was set to NUMERIC. CurrRT->SetOperatorType(NEGATEDPTR, this); updated = true; } else if (OperType == UNINIT) { // Default to NUMERIC for most negations. CurrRT->SetOperatorType(NUMERIC, this); // But, leave left operand type alone, in case an UNINIT operand // might be determined later to be PTROFFSET or NEGATEDPTR. // Leaving it alone causes us not to set TypeInferenceFinished to true // at the end of this function in the UNINIT case. updated = true; } } break; case SMP_INCREMENT: case SMP_DECREMENT: // The type of the left operand is propagated to the operator, or vice // versa, whichever receives a type first. assert(!CurrRT->HasRightSubTree()); UseOp = CurrRT->GetLeftOperand(); assert(o_void != UseOp.type); CurrUse = this->Uses.FindRef(UseOp); if (CurrUse == this->GetLastUse()) { SMP_msg("SERIOUS WARNING: Adding missing USE of "); PrintOperand(UseOp); SMP_msg(" at %x in %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); this->Uses.SetRef(UseOp); updated = true; break; } if (UNINIT == CurrRT->GetOperatorType()) { if (UNINIT != CurrUse->GetType()) { // Propagate operand type up to the operator. CurrRT->SetOperatorType(CurrUse->GetType(), this); updated = true; } } else if (UNINIT == CurrUse->GetType()) { // Propagate operator type to operand. CurrUse = this->SetUseType(UseOp, CurrRT->GetOperatorType()); updated = true; } break; case SMP_ADD: case SMP_ADD_CARRY: // add with carry case SMP_BITWISE_AND: case SMP_BITWISE_OR: // Extract the current types of right and left operands and the operator. OperType = CurrRT->GetOperatorType(); LeftOp = CurrRT->GetLeftOperand(); CurrUse = this->Uses.FindRef(LeftOp); assert(CurrUse != this->GetLastUse()); // found it LeftType = CurrUse->GetType(); if (CurrRT->HasRightSubTree()) { updated |= this->InferOperatorType(CurrRT->GetRightTree()); RightType = CurrRT->GetRightTree()->GetOperatorType(); } else { RightOp = CurrRT->GetRightOperand(); if (o_void == RightOp.type) { SMP_msg("ERROR: void operand at %x in %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); return updated; } else { CurrUse = this->Uses.FindRef(RightOp); if (CurrUse == this->GetLastUse()) { SMP_msg("SERIOUS WARNING: Adding missing USE of "); PrintOperand(RightOp); SMP_msg(" at %x in %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); this->Uses.SetRef(RightOp); updated = true; break; } else { RightType = CurrUse->GetType(); } } } // We have to know both operand types to infer the operator, or know the // operator type and one operand type to infer the other operand type. if ((UNINIT == OperType) && ((UNINIT == LeftType) || (UNINIT == RightType))) break; // If both operands are NUMERIC, operator and result are NUMERIC. // If one operand is NUMERIC and the other is a pointer type, // then the ADD operator and the result will inherit this second type, // while AND and OR operators will remain UNINIT (we don't know what // type "ptr AND 0xfffffff8" has until we see how it is used). LeftNumeric = IsEqType(NUMERIC, LeftType); RightNumeric = IsEqType(NUMERIC, RightType); LeftPointer = IsDataPtr(LeftType); RightPointer = IsDataPtr(RightType); if (UNINIT == OperType) { // Infer operator type from left and right operands. if (LeftNumeric && RightNumeric) { CurrRT->SetOperatorType(NUMERIC, this); updated = true; break; } else if (LeftNumeric || RightNumeric) { // ADD of NUMERIC to non-NUMERIC preserves non-NUMERIC type. // AND and OR operations should leave the operator UNINIT for now. if (LeftNumeric && (UNINIT != RightType) && ((SMP_ADD == CurrOp) || (SMP_ADD_CARRY == CurrOp))) { CurrRT->SetOperatorType(RightType, this); updated = true; break; } else if (RightNumeric && (UNINIT != LeftType) && ((SMP_ADD == CurrOp) || (SMP_ADD_CARRY == CurrOp))) { CurrRT->SetOperatorType(LeftType, this); updated = true; break; } } else if (LeftPointer && RightPointer) { // Arithmetic on two pointers if ((SMP_ADD == CurrOp) || (SMP_ADD_CARRY == CurrOp)) { CurrRT->SetOperatorType(UNKNOWN, this); updated = true; } else { // bitwise AND or OR of two pointers SMP_msg("WARNING: hash of two pointers at %x in %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); // hash operation? leave operator as UNINIT } break; } else if ((LeftPointer && IsEqType(RightType, PTROFFSET)) || (RightPointer && IsEqType(LeftType, PTROFFSET))) { // Arithmetic on PTR and PTROFFSET if ((SMP_ADD == CurrOp) || (SMP_ADD_CARRY == CurrOp)) { // We assume (A-B) is being added to B or vice versa **!!** CurrRT->SetOperatorType(POINTER, this); updated = true; } else { // bitwise AND or OR of pointer and pointer difference SMP_msg("WARNING: hash of PTROFFSET and POINTER at %x in %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); // hash operation? leave operator as UNINIT } break; } else if ((LeftPointer && IsEqType(RightType, NEGATEDPTR)) || (RightPointer && IsEqType(LeftType, NEGATEDPTR))) { // Compiler optimizations can take a ptr expression such as: // PTR1 - PTR2 + 1 // and hoist the loop-invariant subexpression " - PTR2 + 1" // out of the loop as "1 - PTR2", which produces a NEGATEDPTR type. // When PTR1 gets its value determined inside the loop, then the // addition of PTR1 finally happens, producing a PTROFFSET type, // which is what the whole expression is. if ((SMP_ADD == CurrOp) || (SMP_ADD_CARRY == CurrOp)) { CurrRT->SetOperatorType(PTROFFSET, this); updated = true; } else { // bitwise AND or OR of pointer and pointer difference SMP_msg("WARNING: hash of NEGATEDPTR and POINTER at %x in %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); // hash operation? leave operator as UNINIT } break; } } // end if UNINIT operator type else { // operator has type other than UNINIT // We make add-with-carry and subtract-with-borrow exceptions // to the type propagation. LeftOp could have POINTER type // inferred later; these instructions can change the type of // the register from POINTER to NUMERIC, unlike regular // add and subtract opcodes. OperNumeric = IsEqType(NUMERIC, OperType); OperPointer = IsDataPtr(OperType); if (OperNumeric) { if ((UNINIT == LeftType) && (SMP_ADD_CARRY != CurrOp)) { CurrUse = this->SetUseType(LeftOp, CurrRT->GetOperatorType()); updated = true; assert(CurrUse != this->GetLastUse()); break; } if (CurrRT->HasRightSubTree()) { // Must need to iterate through the right tree again, as the operator // has been typed. if (UNINIT == RightType) { CurrRT->GetRightTree()->SetOperatorType(CurrRT->GetOperatorType(), this); updated = true; updated |= this->InferOperatorType(CurrRT->GetRightTree()); } break; } else { // right operand; propagate operator type if needed if (UNINIT == RightType) { CurrUse = this->SetUseType(RightOp, CurrRT->GetOperatorType()); updated = true; assert(CurrUse != this->GetLastUse()); break; } } } } break; case SMP_SUBTRACT: case SMP_SUBTRACT_BORROW: // subtract with borrow // Extract the current types of right and left operands and the operator. OperType = CurrRT->GetOperatorType(); LeftOp = CurrRT->GetLeftOperand(); LeftUse = this->Uses.FindRef(LeftOp); assert(LeftUse != this->GetLastUse()); // found it LeftType = LeftUse->GetType(); if (CurrRT->HasRightSubTree()) { updated |= this->InferOperatorType(CurrRT->GetRightTree()); RightType = CurrRT->GetRightTree()->GetOperatorType(); } else { RightOp = CurrRT->GetRightOperand(); if (o_void == RightOp.type) { SMP_msg("ERROR: void operand in %s\n", DisAsmText.GetDisAsm(this->GetAddr())); return false; } else { RightUse = this->Uses.FindRef(RightOp); if (RightUse == this->GetLastUse()) { SMP_msg("WARNING: Adding missing USE of "); PrintOperand(RightOp); SMP_msg(" in %s\n", DisAsmText.GetDisAsm(this->GetAddr())); this->Uses.SetRef(RightOp); updated = true; break; } else { RightType = RightUse->GetType(); } } } // If left operand is NUMERIC, operator is NUMERIC. LeftNumeric = IsEqType(NUMERIC, LeftType); RightNumeric = IsEqType(NUMERIC, RightType); LeftPointer = IsDataPtr(LeftType); RightPointer = IsDataPtr(RightType); if (LeftNumeric) { // Subtracting anything from a NUMERIC leaves it NUMERIC or NEGATEDPTR, // in the special case in which a POINTER is subtracted from a NUMERIC. // See NEGATEDPTR comments in the ADD/AND operators case above. if (RightPointer) { CurrRT->SetOperatorType(NEGATEDPTR, this); updated = true; } else if (UNINIT == OperType) { CurrRT->SetOperatorType(NUMERIC, this); updated = true; } else if (IsNotEqType(NUMERIC, OperType) && IsNotEqType(NEGATEDPTR, OperType)) { SMP_msg("ERROR: SMP_SUBTRACT from NUMERIC should be NUMERIC or NEGATEDPTR operator."); SMP_msg(" Operator type is %d in: %s\n", OperType, DisAsmText.GetDisAsm(this->GetAddr())); } #if 0 if (!RightNumeric) { // Right operand is being used as a NUMERIC, so propagate NUMERIC to it. if (CurrRT->HasRightSubTree()) { CurrRT->GetRightTree()->SetOperatorType(NUMERIC, this); } else { RightUse = this->SetUseType(RightOp, NUMERIC); } updated = true; } #endif } // end if LeftNumeric else if (LeftPointer) { if (UNINIT == OperType) { // If we subtract another pointer type, we produce PTROFFSET. if (RightPointer) { CurrRT->SetOperatorType(PTROFFSET, this); updated = true; } else if (RightType == PTROFFSET) { // We assume B - (B - A) == A **!!** CurrRT->SetOperatorType(POINTER, this); SMP_msg("WARNING: PTR - PTROFFSET produces PTR at %x in %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); updated = true; } else if (RightNumeric) { // pointer minus NUMERIC keeps same pointer type CurrRT->SetOperatorType(LeftType, this); updated = true; } } else { // we have an operator type for the SMP_SUBTRACT OperNumeric = IsEqType(NUMERIC, OperType); OperPointer = IsDataPtr(OperType); if (CurrRT->HasRightSubTree()) { // Might need to iterate through the right tree again, if its operator // can be typed. if (UNINIT == RightType) { if (OperPointer) { // PTR := PTR - ?? ==> ?? is NUMERIC Why? ?? could be PTROFFSET CurrRT->GetRightTree()->SetOperatorType(NUMERIC, this); updated = true; updated |= this->InferOperatorType(CurrRT->GetRightTree()); } else if (OperType == PTROFFSET) { // PTROFFSET := PTR - ?? ==> ?? is PTR CurrRT->GetRightTree()->SetOperatorType(LeftType, this); updated = true; updated |= this->InferOperatorType(CurrRT->GetRightTree()); } else if (OperNumeric) { SMP_msg("WARNING: PTR - ?? produces NUMERIC at %x in %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); } } break; } else { // right operand; propagate operator type if needed if (UNINIT == RightType) { if (OperPointer) { // PTR := PTR - ?? ==> ?? is NUMERIC Why? ?? could be PTROFFSET RightUse = this->SetUseType(RightOp, NUMERIC); updated = true; assert(RightUse != this->GetLastUse()); } else if (OperType == PTROFFSET) { // PTROFFSET := PTR - ?? ==> ?? is PTR RightUse = this->SetUseType(RightOp, LeftType); updated = true; } else if (OperNumeric) { SMP_msg("WARNING: PTR - ?? produces NUMERIC at %x in %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); } break; } } } // end if OperType is UNINIT ... else ... } // end if LeftNumeric ... else if LeftPointer ... else if (UNINIT == LeftType) { // We make add-with-carry and subtract-with-borrow exceptions // to the type propagation. LeftOp could have POINTER type // inferred later; these instructions can change the type of // the register from POINTER to NUMERIC, unlike regular // add and subtract opcodes. if ((UNINIT != OperType) && (SMP_SUBTRACT_BORROW != CurrOp)) { LeftUse = this->SetUseType(LeftOp, OperType); assert(LeftUse != this->GetLastUse()); updated = true; } } break; case SMP_ASSIGN: // Extract the current types of right and left operands and SMP_ASSIGN operator. OperType = CurrRT->GetOperatorType(); DefOp = CurrRT->GetLeftOperand(); CurrDef = this->Defs.FindRef(DefOp); assert(CurrDef != this->GetLastDef()); // found it LeftType = CurrDef->GetType(); if (CurrRT->HasRightSubTree()) { updated |= this->InferOperatorType(CurrRT->GetRightTree()); RightType = CurrRT->GetRightTree()->GetOperatorType(); } else { UseOp = CurrRT->GetRightOperand(); if (o_void == UseOp.type) { SMP_msg("ERROR: void operand for SMP_ASSIGN in %s\n", DisAsmText.GetDisAsm(this->GetAddr())); return false; } else { CurrUse = this->Uses.FindRef(UseOp); if (CurrUse == this->GetLastUse()) { SMP_msg("WARNING: Adding missing USE of "); PrintOperand(UseOp); SMP_msg(" in %s\n", DisAsmText.GetDisAsm(this->GetAddr())); this->Uses.SetRef(UseOp); updated = true; break; } else { RightType = CurrUse->GetType(); } } } #if SMP_VERBOSE_DEBUG_INFER_TYPES if (DebugFlag) { SMP_msg("%x LeftType: %d OperatorType: %d RightType: %d\n", this->address, LeftType, OperType, RightType); } #endif if ((UNINIT == RightType) && (UNINIT == LeftType)) { break; } else if (UNINIT == OperType) { // UNINIT SMP_ASSIGN operator, but either LeftType or RightType is not UNINIT. bool UpdatedOperType = false; if (UNINIT != RightType) { // We have to special case conditional moves. Only if both operands // (the source and the prior value of the potential destination, // which was added to the USE set by BuildMoveRTL()) agree in type // can we propagate their common type to the operator and ultimately // to the DEF. if ((!this->MDIsConditionalMoveInstr()) || this->Uses.TypesAgreeNoFlags()) { CurrRT->SetOperatorType(RightType, this); updated = true; OperType = RightType; UpdatedOperType = true; } } else { // LeftType must not be UNINIT CurrRT->SetOperatorType(LeftType, this); updated = true; UpdatedOperType = true; } // Speed up type propagation by passing the RightType/OperType to the Def // on this iteration. if (UpdatedOperType) { // Propagate the new DEF type unless it is an indirect memory access. // Future: Propagate until re-DEF of addressing register terminates // the propagation. **!!** CurrDef = this->SetDefType(DefOp, OperType); LeftType = OperType; if (!MDIsIndirectMemoryOpnd(DefOp, this->BasicBlock->GetFunc()->UsesFramePointer())) { bool IsMemOp = (o_reg != DefOp.type); bool MemPropagate = MDIsStackAccessOpnd(DefOp, UseFP); #if SMP_PROPAGATE_MEM_TYPES ; #else // Be conservative and only propagate register DEFs and SAFE stack locs. // We can improve this in the future. **!!** MemPropagate = MemPropagate && SafeFunc; #endif if ((o_reg == DefOp.type) || MemPropagate) { int SSANum = CurrDef->GetSSANum(); if (this->BasicBlock->IsLocalName(DefOp)) { (void) this->BasicBlock->PropagateLocalDefType(DefOp, LeftType, this->GetAddr(), SSANum, IsMemOp); } else { // global name this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false (void) this->BasicBlock->PropagateGlobalDefType(DefOp, LeftType, SSANum, IsMemOp); } } } } break; } else if (UNINIT == LeftType) { // SMP_ASSIGN operator has type, so propagate it. CurrDef = this->SetDefType(DefOp, OperType); LeftType = OperType; updated = true; // Propagate the new DEF type unless it is an indirect memory access. // Future: Propagate until re-DEF of addressing register terminates // the propagation. **!!** if (!MDIsIndirectMemoryOpnd(DefOp, this->BasicBlock->GetFunc()->UsesFramePointer())) { bool IsMemOp = (o_reg != DefOp.type); bool MemPropagate = MDIsStackAccessOpnd(DefOp, UseFP); #if SMP_PROPAGATE_MEM_TYPES ; #else // Be conservative and only propagate register DEFs and SAFE stack locs. // We can improve this in the future. **!!** MemPropagate = MemPropagate && SafeFunc; #endif if ((o_reg == DefOp.type) || MemPropagate) { int SSANum = CurrDef->GetSSANum(); if (this->BasicBlock->IsLocalName(DefOp)) { (void) this->BasicBlock->PropagateLocalDefType(DefOp, LeftType, this->GetAddr(), SSANum, IsMemOp); } else { // global name this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false (void) this->BasicBlock->PropagateGlobalDefType(DefOp, LeftType, SSANum, IsMemOp); } } } break; } else if (UNINIT == RightType) { // SMP_ASSIGN operator has type, so propagate it. if (CurrRT->HasRightSubTree()) { CurrRT->GetRightTree()->SetOperatorType(OperType, this); updated = true; updated |= this->InferOperatorType(CurrRT->GetRightTree()); } else { // For conditional moves, propagate to the pseudo-USE of the // destination register as well as the source operand. if (this->MDIsConditionalMoveInstr()) { CurrUse = this->FindUse(DefOp); assert(CurrUse != this->GetLastUse()); if (UNINIT == CurrUse->GetType()) CurrUse = this->SetUseType(DefOp, OperType); else if (OperType != CurrUse->GetType()) { SMP_msg("WARNING: Avoiding lattice oscillation from type %d to %d at %x for: ", CurrUse->GetType(), OperType, this->address); PrintOperand(CurrUse->GetOp()); SMP_msg("\n"); } } CurrUse = this->SetUseType(UseOp, OperType); updated = true; } break; } break; default: SMP_msg("ERROR: Unknown operator %d at %x in %s\n", CurrOp, this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); break; } // end switch on operator // Determine if type inference is finished for this register transfer. if (updated && (!TypeInferenceFinished)) { bool FinishedRight = false; bool FinishedLeft = false; bool FinishedOperator = (CurrRT->GetOperatorType() != UNINIT); if (FinishedOperator) { switch (CurrOp) { case SMP_INPUT: // input from port case SMP_OUTPUT: // output to port case SMP_SIGN_EXTEND: case SMP_ZERO_EXTEND: case SMP_ADDRESS_OF: // take effective address case SMP_SYSTEM_OPERATION: // for instructions such as CPUID, RDTSC, etc.; NUMERIC case SMP_BITWISE_NOT: // unary operator case SMP_NEGATE: // unary negation case SMP_DECREMENT: case SMP_INCREMENT: case SMP_UNARY_NUMERIC_OPERATION: // miscellaneous; produces NUMERIC result case SMP_UNARY_FLOATING_ARITHMETIC: // all the same to our type system; all NUMERIC // Unary operators have no right operand. FinishedRight = true; break; default: // All binary operators come here if (CurrRT->HasRightSubTree()) { FinishedRight = CurrRT->GetRightTree()->IsTypeInferenceComplete(); } else { UseOp = CurrRT->GetRightOperand(); if (UseOp.type != o_void) { CurrUse = this->Uses.FindRef(UseOp); assert(CurrUse != this->GetLastUse()); FinishedRight = (CurrUse->GetType() != UNINIT); } else { // if o_void, no further type inference on it is possible. FinishedRight = true; } } break; } // end switch on CurrOp if (FinishedRight) { // no point checking left op if right op is not finished DefOp = CurrRT->GetLeftOperand(); if (DefOp.type != o_void) { if (SMP_ASSIGN == CurrOp) { CurrDef = this->Defs.FindRef(DefOp); assert(CurrDef != this->GetLastDef()); FinishedLeft = (CurrDef->GetType() != UNINIT); } else { // not ASSIGN, so really a UseOp not DefOp CurrUse = this->Uses.FindRef(DefOp); assert(CurrUse != this->GetLastUse()); FinishedLeft = (CurrUse->GetType() != UNINIT); } } else { // if o_void, no further type inference on it is possible. FinishedLeft = true; } } TypeInferenceFinished = (FinishedLeft && FinishedRight); } // end if (FinishedOperator) } // end if (updated && (!TypeInferenceFinished)) if (TypeInferenceFinished) { CurrRT->SetTypeInferenceComplete(); } return updated; } // end of SMPInstr::InferOperatorType() // Transfer function: Does operator propagate signedness of its operands to its result? bool SMPInstr::DoesOperatorTransferSign(SMPoperator CurrOp) { bool transfer = false; switch (CurrOp) { case SMP_NULL_OPERATOR: case SMP_CALL: // CALL instruction case SMP_INPUT: // input from port case SMP_OUTPUT: // output to port case SMP_SYSTEM_OPERATION: // for instructions such as CPUID, RDTSC, etc.; NUMERIC // No concept of signedness for some operators break; case SMP_ADDRESS_OF: // take effective address case SMP_U_LEFT_SHIFT: // unsigned left shift case SMP_U_RIGHT_SHIFT: // unsigned right shift case SMP_ROTATE_LEFT: case SMP_ROTATE_LEFT_CARRY: // rotate left through carry case SMP_ROTATE_RIGHT: case SMP_ROTATE_RIGHT_CARRY: // rotate right through carry case SMP_U_MULTIPLY: case SMP_U_DIVIDE: case SMP_U_REMAINDER: case SMP_ZERO_EXTEND: case SMP_BITWISE_NOT: // unary operator case SMP_BITWISE_XOR: case SMP_BITWISE_AND_NOT: case SMP_U_COMPARE: // unsigned compare (AND-based) case SMP_S_LEFT_SHIFT: // signed left shift case SMP_S_RIGHT_SHIFT: // signed right shift case SMP_S_MULTIPLY: case SMP_S_DIVIDE: case SMP_SIGN_EXTEND: case SMP_NEGATE: // unary negation case SMP_S_COMPARE: // signed compare (subtraction-based) case SMP_LESS_THAN: // boolean test operators case SMP_GREATER_THAN: case SMP_LESS_EQUAL: case SMP_GREATER_EQUAL: // Inherently unsigned and signed operators force the signedness // of their results, rather than propagating the signedness of // their operands. break; case SMP_DECREMENT: case SMP_INCREMENT: case SMP_ADD: case SMP_ADD_CARRY: // add with carry case SMP_SUBTRACT: case SMP_SUBTRACT_BORROW: // subtract with borrow case SMP_ASSIGN: case SMP_BITWISE_AND: case SMP_BITWISE_OR: case SMP_EQUAL: case SMP_NOT_EQUAL: case SMP_LOGICAL_AND: case SMP_LOGICAL_OR: case SMP_UNARY_NUMERIC_OPERATION: // miscellaneous; produces NUMERIC result case SMP_BINARY_NUMERIC_OPERATION: // miscellaneous; produces NUMERIC result transfer = true; break; case SMP_UNARY_FLOATING_ARITHMETIC: // all the same to our type system; all NUMERIC case SMP_BINARY_FLOATING_ARITHMETIC: // all the same to our type system; all NUMERIC case SMP_REVERSE_SHIFT_U: // Shift right operand by bit count in left operand case SMP_SHUFFLE: // Shuffle bytes, words, etc. within destination operation per source mask case SMP_COMPARE_EQ_AND_SET: // Compare for equality and set fields to all 1's or all 0's case SMP_COMPARE_GT_AND_SET: // Compare for greater-than and set fields to all 1's or all 0's case SMP_PACK_S: // Pack operands into extended-precision register, signed saturation for loss of precision case SMP_PACK_U: // Pack operands into extended-precision register, unsigned saturation for loss of precision case SMP_AVERAGE_U: // Average of unsigned operands case SMP_MULTIPLY_AND_ADD: // multiply and add (or multiply and accumulate) case SMP_SUM_OF_DIFFS: // sum over two vectors of absolute values of differences of their elements case SMP_INTERLEAVE: // extended-precision interleaving of bytes or words or dwords etc.; NUMERIC case SMP_CONCATENATE: // extended-precision concatenation; NUMERIC transfer = true; break; default: SMP_msg("ERROR: Unknown operator in %s\n", DisAsmText.GetDisAsm(this->GetAddr())); break; } // end switch on operator return transfer; } // end of SMPInstr::DoesOperatorTransferSign() // Initial inferences (if any) about FG info of operand based solely on the RTL operator type above it in RTL. bool SMPInstr::InitFGInfoFromOperator(SMPoperator CurrOp, struct FineGrainedInfo &InitFG) { bool changed = false; switch (CurrOp) { case SMP_NULL_OPERATOR: break; case SMP_CALL: // CALL instruction InitFG.SignMiscInfo |= FG_MASK_UNSIGNED; // target address is unsigned 32-bit InitFG.SizeInfo |= (MD_NORMAL_BITWIDTH_MASK | FG_MASK_CODEPOINTER); changed = true; break; case SMP_INPUT: // input from port case SMP_OUTPUT: // output to port case SMP_ADDRESS_OF: // take effective address case SMP_U_COMPARE: // unsigned compare (AND-based) case SMP_S_COMPARE: // signed compare (subtraction-based) // NOTE: The AND-based and subtraction-based comparisons are used // on lots of operands of all types, and the conditional jump that // follows determines signedness, not the operator. break; case SMP_U_LEFT_SHIFT: // unsigned left shift case SMP_U_RIGHT_SHIFT: // unsigned right shift case SMP_ROTATE_LEFT: case SMP_ROTATE_LEFT_CARRY: // rotate left through carry case SMP_ROTATE_RIGHT: case SMP_ROTATE_RIGHT_CARRY: // rotate right through carry case SMP_U_MULTIPLY: case SMP_U_DIVIDE: case SMP_U_REMAINDER: case SMP_ZERO_EXTEND: case SMP_BITWISE_NOT: // unary operator case SMP_BITWISE_XOR: case SMP_BITWISE_AND_NOT: InitFG.SignMiscInfo |= FG_MASK_UNSIGNED; changed = true; break; case SMP_S_LEFT_SHIFT: // signed left shift case SMP_S_RIGHT_SHIFT: // signed right shift case SMP_S_MULTIPLY: case SMP_S_DIVIDE: case SMP_SIGN_EXTEND: case SMP_NEGATE: // unary negation case SMP_LESS_THAN: // boolean test operators case SMP_GREATER_THAN: case SMP_LESS_EQUAL: case SMP_GREATER_EQUAL: // Special case: If signed multiply operator, it might sometimes // be used for unsigned operands when upper bits of the result // are discarded, because there is no difference in the result bits // between unsigned and signed multiplcation when only the lower // N bits are retained and the upper N bits are discarded. if ((SMP_S_MULTIPLY == CurrOp) && (!(this->MDIsSignedArithmetic()))) { break; } InitFG.SignMiscInfo |= FG_MASK_SIGNED; changed = true; break; case SMP_DECREMENT: case SMP_INCREMENT: case SMP_ADD: case SMP_ADD_CARRY: // add with carry case SMP_SUBTRACT: case SMP_SUBTRACT_BORROW: // subtract with borrow case SMP_ASSIGN: case SMP_BITWISE_AND: case SMP_BITWISE_OR: case SMP_EQUAL: case SMP_NOT_EQUAL: case SMP_LOGICAL_AND: case SMP_LOGICAL_OR: case SMP_UNARY_NUMERIC_OPERATION: // miscellaneous; produces NUMERIC result case SMP_BINARY_NUMERIC_OPERATION: // miscellaneous; produces NUMERIC result case SMP_SYSTEM_OPERATION: // for instructions such as CPUID, RDTSC, etc.; NUMERIC break; case SMP_UNARY_FLOATING_ARITHMETIC: // all the same to our type system; all NUMERIC case SMP_BINARY_FLOATING_ARITHMETIC: // all the same to our type system; all NUMERIC InitFG.SignMiscInfo |= FG_MASK_SIGNED; InitFG.SizeInfo |= FG_MASK_FLOAT_MMX; changed = true; break; case SMP_REVERSE_SHIFT_U: // Shift right operand by bit count in left operand case SMP_SHUFFLE: // Shuffle bytes, words, etc. within destination operation per source mask case SMP_COMPARE_EQ_AND_SET: // Compare for equality and set fields to all 1's or all 0's case SMP_COMPARE_GT_AND_SET: // Compare for greater-than and set fields to all 1's or all 0's case SMP_PACK_S: // Pack operands into extended-precision register, signed saturation for loss of precision InitFG.SignMiscInfo |= FG_MASK_SIGNED; InitFG.SizeInfo |= (FG_MASK_FLOAT_MMX | FG_MASK_BITWIDTH_128); changed = true; break; case SMP_PACK_U: // Pack operands into extended-precision register, unsigned saturation for loss of precision case SMP_AVERAGE_U: // Average of unsigned operands case SMP_MULTIPLY_AND_ADD: // multiply and add (or multiply and accumulate) case SMP_SUM_OF_DIFFS: // sum over two vectors of absolute values of differences of their elements InitFG.SignMiscInfo |= FG_MASK_UNSIGNED; InitFG.SizeInfo |= (FG_MASK_FLOAT_MMX | FG_MASK_BITWIDTH_128); changed = true; break; case SMP_INTERLEAVE: // extended-precision interleaving of bytes or words or dwords etc.; NUMERIC case SMP_CONCATENATE: // extended-precision concatenation; NUMERIC InitFG.SignMiscInfo |= FG_MASK_SIGNED; InitFG.SizeInfo |= (FG_MASK_FLOAT_MMX | FG_MASK_BITWIDTH_128); changed = true; break; default: SMP_msg("ERROR: Unknown operator in %s\n", DisAsmText.GetDisAsm(this->GetAddr())); break; } // end switch on operator return changed; } // end of SMPInstr::InitFGInfoFromOperator() // Helper to take USE operand, find its SSANum, and return its UseHashValue. int SMPInstr::GetUseOpHashAndSSA(op_t UseOp, int &SSANum) { op_t SearchOp = UseOp; SearchOp.reg = MDCanonicalizeSubReg(UseOp.reg); set<DefOrUse, LessDefUse>::iterator UseIter = this->FindUse(SearchOp); assert(UseIter != this->GetLastUse()); SSANum = UseIter->GetSSANum(); int UseHashValue = HashGlobalNameAndSSA(SearchOp, SSANum); return UseHashValue; } // end of SMPInstr::GetUseOpHashAndSSA() // Helper to take DEF operand, find its SSANum, and return its DefHashValue. int SMPInstr::GetDefOpHashAndSSA(op_t DefOp, int &SSANum) { op_t SearchOp = DefOp; SearchOp.reg = MDCanonicalizeSubReg(DefOp.reg); set<DefOrUse, LessDefUse>::iterator DefIter = this->FindDef(SearchOp); assert(DefIter != this->GetLastDef()); SSANum = DefIter->GetSSANum(); int DefHashValue = HashGlobalNameAndSSA(SearchOp, SSANum); return DefHashValue; } // end of SMPInstr::GetDefOpHashAndSSA() // helper for InferOperatorFGInfo() to update DEF maps, return true if changed maps bool SMPInstr::UpdateDefOpFGInfo(op_t DefOp, struct FineGrainedInfo NewFG) { bool MapsChanged = false; // Changes to maps of name/SSA to FG info? set<DefOrUse, LessDefUse>::iterator DefIter; int SSANum; int DefHashValue; op_t SearchOp; bool LocalName; struct FineGrainedInfo OldFG, UnionFG; // If operator is inherently signed, then we will have // a sign bit set in NewFG from InitFGInfoFromOperator(). DefHashValue = this->GetDefOpHashAndSSA(DefOp, SSANum); LocalName = this->BasicBlock->IsLocalName(DefOp); if (LocalName) { // Get old FG info from block level. OldFG = this->BasicBlock->GetDefFGInfo(DefHashValue); } else { // global name // Get old FG info from function level. OldFG = this->BasicBlock->GetFunc()->GetDefFGInfo(DefHashValue); } UnionFG.SignMiscInfo = OldFG.SignMiscInfo | NewFG.SignMiscInfo; UnionFG.SizeInfo = OldFG.SizeInfo | NewFG.SizeInfo; if ((OldFG.SignMiscInfo != UnionFG.SignMiscInfo) || (OldFG.SizeInfo != UnionFG.SizeInfo)) { // The signs they are a-changin'. MapsChanged = true; if (LocalName) this->BasicBlock->UpdateDefFGInfo(DefHashValue, UnionFG); else this->BasicBlock->GetFunc()->UpdateDefFGInfo(DefHashValue, UnionFG); } return MapsChanged; } // end of SMPInstr::UpdateDefOpFGInfo() // helper for InferOperatorFGInfo() to update USE maps, return true if changed maps bool SMPInstr::UpdateUseOpFGInfo(op_t UseOp, struct FineGrainedInfo NewFG) { bool MapsChanged = false; // Changes to maps of name/SSA to FG info? int SSANum; int UseHashValue; bool LocalName; struct FineGrainedInfo OldFG, UnionFG; // If operator is inherently signed, then we will have // a sign bit set in NewFG from InitFGInfoFromOperator(). UseHashValue = this->GetUseOpHashAndSSA(UseOp, SSANum); LocalName = this->BasicBlock->IsLocalName(UseOp); if (LocalName) { // Get old FG info from block level. OldFG = this->BasicBlock->GetUseFGInfo(UseHashValue); } else { // global name // Get old FG info from function level. OldFG = this->BasicBlock->GetFunc()->GetUseFGInfo(UseHashValue); } UnionFG.SignMiscInfo = OldFG.SignMiscInfo | NewFG.SignMiscInfo; UnionFG.SizeInfo = OldFG.SizeInfo | NewFG.SizeInfo; if ((OldFG.SignMiscInfo != UnionFG.SignMiscInfo) || (OldFG.SizeInfo != UnionFG.SizeInfo)) { // The signs they are a-changin'. MapsChanged = true; if (LocalName) this->BasicBlock->UpdateUseFGInfo(UseHashValue, UnionFG); else this->BasicBlock->GetFunc()->UpdateUseFGInfo(UseHashValue, UnionFG); } return MapsChanged; } // end of SMPInstr::UpdateUseOpFGInfo() // Helper to fetch DEF signedness info for UseOp that has none. unsigned short SMPInstr::GetDefSignInfoFromUseOp(op_t UseOp) { int SSANum, UseHashValue; bool LocalName; UseHashValue = this->GetUseOpHashAndSSA(UseOp, SSANum); LocalName = this->BasicBlock->IsLocalName(UseOp); if (LocalName) { // Get old sign info from block level. return this->BasicBlock->GetDefSignMiscInfo(UseHashValue); } else { // global name // Get old sign info from function level. return this->BasicBlock->GetFunc()->GetDefSignMiscInfo(UseHashValue); } } // end of SMPInstr::GetDefSignInfoFromUseOp() // infer FG info, + width on FirstIter; pass out FG info for op subtree, return true if change made to any FG info map. bool SMPInstr::InferOperatorFGInfo(SMPRegTransfer *CurrRT, bool FirstIter, struct FineGrainedInfo &OpFG) { bool MapsChanged = false; // Changes to maps of name/SSA to FG info? bool NewChange = false; // Bit changes from InitFGInfoFromOperator() ? SMPoperator CurrOp = CurrRT->GetOperator(); struct FineGrainedInfo LeftFG, OldLeftFG; struct FineGrainedInfo RightFG, OldRightFG; op_t LeftOp, RightOp; unsigned short WidthMask, SignMask; bool CurrOpTransfersSign = this->DoesOperatorTransferSign(CurrOp); bool UseFP = this->BasicBlock->GetFunc()->UsesFramePointer(); bool success; int DefHashValue, UseHashValue, SSANum; // Recurse to the right first, so we can do a depth-first accumulation of FG info. RightFG.SignMiscInfo = 0; RightFG.SizeInfo = 0; if (CurrRT->HasRightSubTree()) { if (FirstIter) { // Get width as well as signedness NewChange = this->InitFGInfoFromOperator(CurrOp, RightFG); } // end if (FirstIter) MapsChanged |= this->InferOperatorFGInfo(CurrRT->GetRightTree(), FirstIter, RightFG); } else { RightOp = CurrRT->GetRightOperand(); if (RightOp.type == o_imm) { // If immediate operand is a data address or code address, we can infer that it is unsigned. uval_t ImmVal = RightOp.value; if (IsImmedGlobalAddress((ea_t) ImmVal)) { // Data address (type GLOBALPTR) RightFG.SignMiscInfo |= FG_MASK_UNSIGNED; } else if (this->MDIsInterruptCall() || IsImmedCodeAddress((ea_t) ImmVal)) { // Code address (type GLOBALPTR) RightFG.SignMiscInfo |= FG_MASK_UNSIGNED; } } else if ((RightOp.type == o_reg) && !RightOp.is_reg(MD_INSTRUCTION_POINTER_REG)) { if (FirstIter) { // Get width as well as signedness NewChange = this->InitFGInfoFromOperator(CurrOp, RightFG); WidthMask = ComputeOperandBitWidthMask(RightOp, 0); RightFG.SizeInfo |= WidthMask; } // end if (FirstIter) #define SMP_AGGRESSIVE_SIGN_TRANSFER 1 #if SMP_AGGRESSIVE_SIGN_TRANSFER else { // On all iterations other than 1st, see if USE has FG info. UseHashValue = this->GetUseOpHashAndSSA(RightOp, SSANum); if (this->BasicBlock->IsLocalName(RightOp)) { // Get FG info from block. RightFG = this->BasicBlock->GetUseFGInfo(UseHashValue); } else { // Get FG info from function level. RightFG = this->BasicBlock->GetFunc()->GetUseFGInfo(UseHashValue); } } #endif // Propagate signedness on all iterations. // If operator is inherently signed, then we will have // a sign bit set in RightFG from InitFGInfoFromOperator(). if ((RightFG.SignMiscInfo == 0) && CurrOpTransfersSign) { // We have a USE with no sign info. See if we // can get sign info from the DEF of this USE so we can // transfer it up the RTL tree. RightFG.SignMiscInfo = (FG_MASK_SIGNEDNESS_BITS & (this->GetDefSignInfoFromUseOp(RightOp))); } if ((RightFG.SignMiscInfo != 0) || (RightFG.SizeInfo != 0)) MapsChanged |= this->UpdateUseOpFGInfo(RightOp, RightFG); } // end if (RightOP is o_reg) else if (MDIsStackAccessOpnd(RightOp, UseFP)) { // We used to assume that all FG info transfers from stack locations to // the target registers of stack loads happened in SMPInstr::MDSetWidthSignInfo(), // in an early pass that needed no iteration. The FG info was loaded from the // StackFGInfo that was computed in SMPFunction::FindOutgoingArgsSize() based solely // on whether the load was sign-extended or zero-extended. Of course, many stack // locations have neither kind of signed/unsigned load. So, if we see a store to // a stack location with no signedness, we transfer the signedness of the RightFG // to the stack location FGInfo in the code below that processes the LeftOp. // As a result, we now have a need to examine regular loads from the stack to // see if there is signedness info for the stack location. success = this->BasicBlock->GetFunc()->MDGetFGStackLocInfo(this->address, RightOp, RightFG); if (success) { SignMask = (RightFG.SignMiscInfo & FG_MASK_SIGNEDNESS_BITS); RightFG.SizeInfo = 0; // only want to transfer signedness } } } // end if (right subtree) else right operand LeftFG.SignMiscInfo = 0; LeftFG.SizeInfo = 0; LeftOp = CurrRT->GetLeftOperand(); bool OpIsDEF = (SMP_ASSIGN == CurrOp); // Skip control-flow assignments to the instruction pointer register. if ((LeftOp.type == o_reg) && !LeftOp.is_reg(MD_INSTRUCTION_POINTER_REG)) { if (FirstIter) { // Get width as well as signedness NewChange = this->InitFGInfoFromOperator(CurrOp, LeftFG); // Special case: For sign-extended and zero-extended loads, // we don't know whether the DEF will always be USEd as // the smaller or larger size. For example, we could // zero-extend a 16-bit stack location into a 32-bit register // just because the compiler always loads unsigned shorts // that way, but we might never use it as a 32-bit value. // So there is no truncation if we store only 16 bits later. // By setting the target of an extended load to zero width, // we signal that we want the maximum USE width to determine // whether the store is truncated (see EmitIntegerErrorAnnotations). WidthMask = ComputeOperandBitWidthMask(LeftOp, 0); if (OpIsDEF) { if (this->MDIsSignedLoad(SignMask)) { WidthMask = 0; } // DEF inherits sign from right hand side. LeftFG.SignMiscInfo |= RightFG.SignMiscInfo; } else if ((LeftFG.SignMiscInfo == 0) && CurrOpTransfersSign) { // We have a USE, not a DEF, with no sign info. See if we // can get sign info from the DEF of this USE so we can // transfer it up the RTL tree. LeftFG.SignMiscInfo = (FG_MASK_SIGNEDNESS_BITS & (this->GetDefSignInfoFromUseOp(LeftOp))); } LeftFG.SizeInfo |= WidthMask; if ((LeftFG.SignMiscInfo != 0) || (LeftFG.SizeInfo != 0)) { // Either NewChanged or CurrOpTransfersSign is true or we set WidthMask above. // See if we would change the FG map entry. if (OpIsDEF) { // Need DEF map info MapsChanged |= this->UpdateDefOpFGInfo(LeftOp, LeftFG); } else { // need USE map info MapsChanged |= this->UpdateUseOpFGInfo(LeftOp, LeftFG); } } // end if non-zero LeftFG info } // end if (FirstIter) #if SMP_AGGRESSIVE_SIGN_TRANSFER else { // On all iterations other than 1st, see if LeftOp has FG info. if (!OpIsDEF) { // LeftOp is a USE UseHashValue = this->GetUseOpHashAndSSA(LeftOp, SSANum); if (this->BasicBlock->IsLocalName(LeftOp)) { // Get FG info from block. LeftFG = this->BasicBlock->GetUseFGInfo(UseHashValue); } else { // Get FG info from function level. LeftFG = this->BasicBlock->GetFunc()->GetUseFGInfo(UseHashValue); } } else { // LeftOp is a DEF DefHashValue = this->GetDefOpHashAndSSA(LeftOp, SSANum); if (this->BasicBlock->IsLocalName(LeftOp)) { // Get FG info from block. LeftFG = this->BasicBlock->GetDefFGInfo(DefHashValue); } else { // Get FG info from function level. LeftFG = this->BasicBlock->GetFunc()->GetDefFGInfo(DefHashValue); } // See if RightFG has sign info to transfer to LeftFG. SignMask = (RightFG.SignMiscInfo & FG_MASK_SIGNEDNESS_BITS); if ((SignMask != 0) && (SignMask != (LeftFG.SignMiscInfo & FG_MASK_SIGNEDNESS_BITS))) { // SignMask from RightFG has bits that will change LeftFG.SignMiscInfo. LeftFG.SignMiscInfo |= SignMask; MapsChanged |= this->UpdateDefOpFGInfo(LeftOp, LeftFG); } } } #endif } // end of register case for LeftOp else if (OpIsDEF && MDIsStackAccessOpnd(LeftOp, UseFP) && (!this->BasicBlock->GetFunc()->IsInOutgoingArgsRegion(LeftOp))) { // For stores into the stack, if the operand being stored has signedness // and the stack location has no signedness, then we have a case where // none of the loads from the stack location were signed, so it is // safe to infer signedness of the stack location based on what is being // stored into it, as no store signedness will conflict with load signedness. success = this->BasicBlock->GetFunc()->MDGetFGStackLocInfo(this->address, LeftOp, LeftFG); assert(success); if (0 == (LeftFG.SignMiscInfo & FG_MASK_SIGNEDNESS_BITS)) { // No previous signedness info for the stack location. // Get signedness info from RightFG. SignMask = (RightFG.SignMiscInfo & FG_MASK_SIGNEDNESS_BITS); if ((0 != SignMask) && (FG_MASK_INCONSISTENT_SIGN != SignMask)) { // Operand being stored has signedness. // Transfer the signedness to the stack location. struct FineGrainedInfo TempFG; TempFG.SignMiscInfo = SignMask; TempFG.SizeInfo = 0; // just update signedness success = this->BasicBlock->GetFunc()->MDUpdateFGStackLocInfo(this->address, LeftOp, TempFG); MapsChanged |= success; } } } // Prepare to return FG info for operator. First, OR the left and right FG infos. if (NewChange || MapsChanged || CurrOpTransfersSign) { OpFG.SignMiscInfo |= LeftFG.SignMiscInfo; OpFG.SizeInfo |= LeftFG.SizeInfo; OpFG.SignMiscInfo |= RightFG.SignMiscInfo; OpFG.SizeInfo |= RightFG.SizeInfo; } // An operator could override the width or signedness info of its operands. if (CurrOp == SMP_ADDRESS_OF) { // Result is 32-bit data pointer. OpFG.SizeInfo &= (~FG_MASK_BITWIDTH_FIELDS); // clear all width bits OpFG.SizeInfo |= (FG_MASK_BITWIDTH_32 | FG_MASK_DATAPOINTER); OpFG.SignMiscInfo &= (~FG_MASK_SIGNED); OpFG.SignMiscInfo |= FG_MASK_UNSIGNED; } return MapsChanged; } // end of SMPInstr::InferOperatorFGInfo() // infer width on first pass, signedness on all passes bool SMPInstr::InferFGInfo(unsigned short IterCount) { bool MapsChanged = false; // Changes to maps of name/SSA to FG info? struct FineGrainedInfo OpFG; SMPitype DFType = this->GetDataFlowType(); assert(0 < IterCount); // start IterCount at 1, not 0. if (DFType != DEFAULT) { // We have a control flow instruction, e.g. call, return, branch, jump // No data operands unless these instructions are indirect through a register, // and the indirect operand is a memory operand in that case, e.g. [eax]. return MapsChanged; } for (size_t index = 0; index < this->RTL.GetCount(); ++index) { SMPRegTransfer *CurrRT = this->RTL.GetRT(index); if (SMP_NULL_OPERATOR == CurrRT->GetOperator()) // nothing to infer continue; OpFG.SignMiscInfo = 0; OpFG.SizeInfo = 0; MapsChanged |= this->InferOperatorFGInfo(CurrRT, (1 == IterCount), OpFG); if (SMP_CALL == CurrRT->GetOperator()) // no LeftOp DEF continue; } // end for all RTs in the RTL return MapsChanged; } // end of SMPInstr::InferFGInfo() // Get the meet of the metadata types of all non-flags DEFs. SMPMetadataType SMPInstr::GetDefMetadataType(void) { SMPMetadataType MeetType = DEF_METADATA_UNANALYZED; set<DefOrUse, LessDefUse>::iterator CurrDef; for (CurrDef = this->GetFirstDef(); CurrDef != this->GetLastDef(); ++CurrDef) { SMPMetadataType CurrType; op_t DefOp = CurrDef->GetOp(); if (DefOp.is_reg(X86_FLAGS_REG)) continue; // flags are always unused metadata; irrelevant CurrType = CurrDef->GetMetadataStatus(); if (MeetType == CurrType) continue; // no meet operation to perform // Any time we find USED metadata, that overrides all other types. if (CurrType == DEF_METADATA_USED) return CurrType; if (MeetType == DEF_METADATA_UNANALYZED) MeetType = CurrType; else if (MeetType < DEF_METADATA_REDUNDANT) { // Conflict between types of different DEFs. It could be that // a multiply or divide instruction DEFs EAX and EDX, and one // of them is used in a store and the other is unused. In that // case, the final MeetType is USED and we can return. Or, if // one type is UNUSED and the other is REDUNDANT, we can set // the final type to the REDUNDANT type and return. The USED case // is handled above, so we must have the UNUSED vs. REDUNDANT case. assert(CurrType >= DEF_METADATA_REDUNDANT); MeetType = CurrType; } else { // MeetType REDUNDANT, not equal to CurrType. if (CurrType >= DEF_METADATA_REDUNDANT) { // One type is profile derived, both are REDUNDANT. MeetType = DEF_METADATA_PROF_REDUNDANT; } else { assert(DEF_METADATA_UNUSED == CurrType); // leave MeetType as REDUNDANT } } } // end for all DEFs return MeetType; } // end of SMPInstr::GetDefMetadataType() // Handle x86 opcode SIB byte annotations. void SMPInstr::MDAnnotateSIBStackConstants(FILE *AnnotFile, op_t Opnd, ea_t offset, bool UseFP) { int BaseReg; int IndexReg; ea_t displacement; ushort ScaleFactor; char *disasm = DisAsmText.GetDisAsm(this->GetAddr()); MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, displacement); if (BaseReg == R_sp) { // ESP cannot be IndexReg // ESP-relative constant offset SMP_fprintf(AnnotFile, "%10x %6d PTRIMMEDESP STACK %d displ %s\n", this->SMPcmd.ea, this->SMPcmd.size, offset, disasm); } else if (UseFP && ((IndexReg == R_bp) || (BaseReg == R_bp))) { // EBP-relative constant offset SMP_fprintf(AnnotFile, "%10x %6d PTRIMMEDEBP STACK %d displ %s\n", this->SMPcmd.ea, this->SMPcmd.size, offset, disasm); } return; } // end of MDAnnotateSIBStackConstants // Emit annotations for constants used as ptr offsets from EBP or // ESP into the stack frame. Only pay attention to EBP-relative // offsets if EBP is being used as a frame pointer (UseFP == true). void SMPInstr::AnnotateStackConstants(bool UseFP, FILE *AnnotFile) { op_t Opnd; ea_t offset; int BaseReg; int IndexReg; ushort ScaleFactor; char *disasm = DisAsmText.GetDisAsm(this->GetAddr()); #if 0 if (this->address == 0x80925f4) { SMP_msg("PROBLEM INSTRUCTION: \n"); this->PrintOperands(); } #endif for (int i = 0; i < UA_MAXOP; ++i) { Opnd = this->SMPcmd.Operands[i]; if ((Opnd.type == o_displ) || (Opnd.type == o_phrase)) MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, offset); if (Opnd.type == o_displ) { if (Opnd.hasSIB) { MDAnnotateSIBStackConstants(AnnotFile, Opnd, offset, UseFP); } else { // no SIB if (BaseReg == R_sp) { // ESP-relative constant offset SMP_fprintf(AnnotFile, "%10x %6d PTRIMMEDESP STACK %d displ %s\n", this->SMPcmd.ea, this->SMPcmd.size, offset, disasm); } else if (UseFP && (BaseReg == R_bp)) { // EBP-relative constant offset SMP_fprintf(AnnotFile, "%10x %6d PTRIMMEDEBP STACK %d displ %s\n", this->SMPcmd.ea, this->SMPcmd.size, offset, disasm); } } // end if (Opnd.hasSIB) ... else ... } // end if (Opnd.type == o_displ) else if (Opnd.type == o_phrase) { offset = 0; // mmStrata thinks [esp] is [esp+0] if (Opnd.hasSIB) { MDAnnotateSIBStackConstants(AnnotFile, Opnd, offset, UseFP); } else { // Something like [ecx]; is it [esp] or [ebp] ? if (BaseReg == R_sp) { // ESP-relative constant offset SMP_fprintf(AnnotFile, "%10x %6d PTRIMMEDESP STACK %d displ %s\n", this->SMPcmd.ea, this->SMPcmd.size, offset, disasm); } else if (UseFP && (BaseReg == R_bp)) { // EBP-relative constant offset SMP_fprintf(AnnotFile, "%10x %6d PTRIMMEDEBP STACK %d displ %s\n", this->SMPcmd.ea, this->SMPcmd.size, offset, disasm); } } // end if (Opnd.hasSIB) ... else ... } // end else if (Opnd.type == o_phrase) } // end for all operands // If we move a stack pointer or frame pointer into another register, we // need to annotate the implicit zero offset, e.g. mov edi,esp == mov edi,esp+0 // and edi is becoming a stack pointer that mmStrata needs to track. if (this->MDIsStackPointerCopy(UseFP)) { // Two possibilities: a move of the stack pointer, or an "lea" // opcode, e.g. lea eax,[eap+8] ==> eax:=esp+8. In the move // instruction (e.g. mov eax,esp), we have the implicit zero // offset from the stack pointer register, but in the lea case, // we might have zero or some other offset (lea eax,[esp] has // the implicit zero). int ESPoffset = 0; if (NN_lea == this->SMPcmd.itype) { ESPoffset = this->MDGetImmedUse(); } // NOTE: Looks like this next line should be "else" because an lea instruction // looks like it has a memory operand, hence it has already been handled above. // We are getting duplicate annotations for lea instructions. else { if (UseFP && this->GetFirstUse()->GetOp().is_reg(R_bp)) { SMP_fprintf(AnnotFile, "%10x %6d PTRIMMEDEBP STACK %d displ %s\n", this->SMPcmd.ea, this->SMPcmd.size, ESPoffset, disasm); } else { SMP_fprintf(AnnotFile, "%10x %6d PTRIMMEDESP STACK %d displ %s\n", this->SMPcmd.ea, this->SMPcmd.size, ESPoffset, disasm); } } } return; } // end of SMPInstr::AnnotateStackConstants() // Emit all annotations for the instruction in the absence of RTL type inference. void SMPInstr::EmitAnnotations(bool UseFP, bool AllocSeen, bool NeedsFrame, FILE *AnnotFile, FILE *InfoAnnotFile) { ea_t addr = this->address; flags_t InstrFlags = getFlags(addr); bool MemDest = this->HasDestMemoryOperand(); bool MemSrc = this->HasSourceMemoryOperand(); bool SecondSrcOperandImmNum = this->IsSecondSrcOperandNumeric(InstrFlags); // assumes 2nd source is Imm or not-numeric?! bool NoWarnFlag = false; // NOWARN annotation emitted? char *disasm = DisAsmText.GetDisAsm(this->GetAddr()); #if SMP_CHILDACCESS_ALL_CODE bool OrphanCode = (NULL == this->BasicBlock); ProfilerInformation *ProfInfo = NULL; if (!OrphanCode) ProfInfo = this->BasicBlock->GetFunc()->GetProg()->GetProfInfo(); #endif ++OptCount[OptType]; // keep count for debugging info #if SMP_ANNOTATE_ALL_MEMORY_OPERANDS // Emit informational annotations for memory operands. if (MemSrc) { op_t MemSrcOp = this->MDGetMemUseOp(); size_t SrcBitWidth = 8 * GetOpDataSize(MemSrcOp); SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR MEMSRC %d", addr, this->SMPcmd.size, SrcBitWidth); AnnotPrintOperand(MemSrcOp, InfoAnnotFile); SMP_fprintf(InfoAnnotFile, " ZZ %s \n", disasm); } if (MemDest) { op_t MemDestOp = this->MDGetMemDefOp(); size_t DestBitWidth = 8 * GetOpDataSize(MemDestOp); SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR MEMDEF %d", addr, this->SMPcmd.size, DestBitWidth); AnnotPrintOperand(MemDestOp, InfoAnnotFile); SMP_fprintf(InfoAnnotFile, " ZZ %s \n", disasm); } #endif // If the instruction is a CALL (or INDIR_CALL that has been resolved to // a single target address), then we need to see if the call is to a // function that has been forbidden by a security policy. If so, we // need to output a security alert. // In the near future, we will output SPRI instrumentation to prevent // the system/library call from executing. if ((BADADDR != this->CallTarget) && (!this->IsCallUsedAsJump())) { // We have a resolved call target address, either via direct or indirect call. string FuncName = this->GetTrimmedCalledFunctionName(); ZST_SysCallType FuncCallType = GetCallTypeFromFuncName(FuncName); ZST_Policy FuncCallPolicy = GetPolicyFromCallType(FuncCallType); if (ZST_DISALLOW == FuncCallPolicy) { if ((NULL != this->GetBlock()) && (NULL != this->GetBlock()->GetFunc())) { SMP_fprintf(ZST_AlarmFile, "ALARM: Call to %s will be disallowed at %x in %s\n", FuncName.c_str(), this->address, this->GetBlock()->GetFunc()->GetFuncName()); } else { SMP_fprintf(ZST_AlarmFile, "ALARM: Call to %s will be disallowed at %x\n", FuncName.c_str(), this->address); } SMP_fprintf(ZST_AlarmFile, "ALARM REASON: Call policy is DISALLOW for all calls of type %s\n", CallTypeNames[FuncCallType]); SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR SECURITYCALL Disallow 1 1 %s \n", addr, this->SMPcmd.size, disasm); } } #if SMP_DEBUG_MEM if (MemDest || MemSrc) { SMP_msg("OptType: %d %s", OptType, disasm); this->PrintOperands(); } #endif // Emit appropriate optimization annotations. bool SDTInstrumentation = false; switch (OptType) { case 0: // SDT will have to handle these { #if SMP_DEBUG_TYPE0 SMP_msg("OptType 0: %x %s\n", addr, disasm); #endif // mmStrata wants to suppress warnings on the PUSH // instructions that precede the LocalVarsAllocInstr // (i.e. the PUSHes of callee-saved regs). if ((!AllocSeen || !NeedsFrame) && this->MDIsPushInstr()) { SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL NoWarn %s \n", addr, -3, disasm); NoWarnFlag = true; } else { SDTInstrumentation = true; } break; } case 1: // nothing for SDT to do { SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL %s %s \n", addr, -1, OptExplanation[OptType], disasm); ++AnnotationCount[OptType]; break; } case 4: // INC, DEC, etc.: no SDT work unless MemDest { if (MemDest || MemSrc) { SDTInstrumentation = true; break; // treat as category 0 } SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL Always1stSrc %s \n", addr, -1, disasm); ++AnnotationCount[OptType]; break; } case 5: // ADD, etc.: If numeric 2nd src operand, no SDT work. { if (MemDest || MemSrc) { SDTInstrumentation = true; break; // treat as category 0 } if (SecondSrcOperandImmNum && !this->MDIsFrameAllocInstr() #if SPECIAL_CASE_CARRY_BORROW && (this->SMPcmd.itype != NN_adc) && (this->SMPcmd.itype != NN_sbb) #endif ) { // treat as category 1 SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL %s %s \n", addr, -1, OptExplanation[OptType], disasm); ++AnnotationCount[OptType]; } else { SDTInstrumentation = true; } break; } case 6: // Only OS code should include these; problem for SDT { if (MemDest) { SDTInstrumentation = true; break; // treat as category 0 } SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL AlwaysPTR %s \n", addr, -OptType, disasm); ++AnnotationCount[OptType]; break; } case 8: // Implicitly writes to EDX:EAX, always numeric. { SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL n EDX EAX ZZ %s %s \n", addr, -2, OptExplanation[OptType], disasm); ++AnnotationCount[OptType]; SDTInstrumentation = true; break; } case 9: // Either writes to FP reg (cat. 1) or memory (cat. 0) { if (MemDest) { #if SMP_DEBUG2 // MemDest seems to happen too much. SMP_msg("Floating point MemDest: %s \n", disasm); #endif SDTInstrumentation = true; break; // treat as category 0 } SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL %s %s \n", addr, -1, OptExplanation[OptType], disasm); ++AnnotationCount[OptType]; break; } case 10: // Implicitly writes to EDX:EAX and ECX, always numeric. { SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL n EDX EAX ECX ZZ %s %s \n", addr, -2, OptExplanation[OptType], disasm); ++AnnotationCount[OptType]; SDTInstrumentation = true; break; } default: // 2,3,7: Optimization possibilities depend on operands { #if SMP_DEBUG2 if (OptType == 3) { // MOV instr class if (MemDest) { SMP_msg("MemDest on MOV: %s\n", disasm); } else if (!SecondSrcOperandNum) { SMP_msg("MOV: not 2nd op numeric: %s\n", disasm); this->PrintOperands(); } } #endif SDTInstrumentation = true; if (MemDest) { #if SMP_DEBUG_XOR if (OptType == 2) SMP_msg("MemDest on OptType 2: %s\n", disasm); #endif break; // treat as category 0 } if ((OptType == 2) || (OptType == 7) || SecondSrcOperandImmNum) { SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL n %s %s %s \n", addr, -2, this->DestString(OptType), OptExplanation[OptType], disasm); ++AnnotationCount[OptType]; } break; } } // end switch (OptType) // always emit stack constant annotations, in case strata is // instrumenting all instructions, or trying to verify speculative annotations. this->AnnotateStackConstants(UseFP, AnnotFile); // If mmStrata is going to have to deal with the // instruction, then we can annotate EBP and ESP // relative constant offsets. If we have emitted // an annotation of type -1, there is no point // in telling mmStrata about these constants. // Likewise, we can tell mmStrata if a MemDest is an // non-directly-accessed child object. if (SDTInstrumentation || NoWarnFlag) { if (strlen(this->DeadRegsString) > 0) { // Optimize by informing mmStrata of dead registers. It can avoid saving // and restoring dead state. This is particularly important for EFLAGS, // as restoring the flags is a pipeline serializing instruction. SMP_fprintf(AnnotFile, "%10x %6d INSTR DEADREGS %s ZZ %s \n", addr, this->SMPcmd.size, this->DeadRegsString, disasm); } #if SMP_CHILDACCESS_ALL_CODE int ChildOffset, ChildSize; if (MemDest && !OrphanCode && ProfInfo->GetMemoryAccessInfo()->ComputeNonDirectAccessRegion(addr, ChildOffset, ChildSize)) { SMP_fprintf(AnnotFile, "%10x %6d INSTR CHILDACCESS %d %d ZZ %s \n", addr, this->SMPcmd.size, ChildOffset, ChildSize, disasm); } #endif } return; } // end of SMPInstr::EmitAnnotations() /** * Emits Safe Returns * Mark the type of the annotation as "-4". Currently the SDT is ignoring this * annotation. */ void SMPInstr::EmitSafeReturn(FILE *AnnotFile) { char *disasm = DisAsmText.GetDisAsm(this->GetAddr()); SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL SafeReturn %s\n", this->address, -4, disasm); } // Emit all annotations for the instruction using RTL type inference. void SMPInstr::EmitTypeAnnotations(bool UseFP, bool AllocSeen, bool NeedsFrame, FILE *AnnotFile, FILE *InfoAnnotFile) { ea_t addr = this->address; flags_t InstrFlags = getFlags(addr); int TypeGroup = SMPTypeCategory[this->SMPcmd.itype]; bool NumericDEFs = this->AllDefsNumeric(); // all DEFs are NUMERIC or CODEPTR bool ProfiledDEFs = this->AnyDefsProfiled(); // Some DEFs come from the profiler bool UnusedMetadata = this->AllDefMetadataUnused(); bool MemDest = this->HasDestMemoryOperand(); bool MemSrc = this->HasSourceMemoryOperand(); bool SecondSrcOperandImmNum = this->IsSecondSrcOperandNumeric(InstrFlags); // assumes 2nd source is imm or not-numeric?? bool NoWarnFlag = false; // NOWARN annotation emitted? bool CarryBorrow = ((this->SMPcmd.itype == NN_adc) || (this->SMPcmd.itype == NN_sbb)); // Do we have the special case in which a non-NUMERIC comes into // an add with carry or subtract with borrow and the result // has been inferred to be NUMERIC? bool TypeChange = CarryBorrow && (!IsNumeric(this->AddSubUseType)) && NumericDEFs; SMPMetadataType DefMetadataType = this->GetDefMetadataType(); ProfilerInformation *ProfInfo; ProfInfo = this->BasicBlock->GetFunc()->GetProg()->GetProfInfo(); char *disasm = DisAsmText.GetDisAsm(this->GetAddr()); ++OptCount[this->OptType]; // keep count for debugging info if (this->IsNop()) TypeGroup = 1; // no-op idioms need their category reset // Emit appropriate optimization annotations. bool SDTInstrumentation = false; #if SMP_ANNOTATE_ALL_MEMORY_OPERANDS // Emit informational annotations for memory operands. if (MemSrc) { op_t MemSrcOp = this->MDGetMemUseOp(); size_t SrcBitWidth = 8 * GetOpDataSize(MemSrcOp); SMP_fprintf(InfoAnnotFile, "%10x %6zu INSTR MEMSRC %zu", addr, this->SMPcmd.size, SrcBitWidth); AnnotPrintOperand(MemSrcOp, InfoAnnotFile); SMP_fprintf(InfoAnnotFile, " ZZ %s \n", disasm); } if (MemDest) { op_t MemDestOp = this->MDGetMemDefOp(); size_t DestBitWidth = 8 * GetOpDataSize(MemDestOp); SMP_fprintf(InfoAnnotFile, "%10x %6zu INSTR MEMDEF %zu", addr, this->SMPcmd.size, DestBitWidth); AnnotPrintOperand(MemDestOp, InfoAnnotFile); SMP_fprintf(InfoAnnotFile, " ZZ %s \n", disasm); } #endif // If the instruction is a CALL (or INDIR_CALL that has been resolved to // a single target address), then we need to see if the call is to a // function that has been forbidden by a security policy. If so, we // need to output a security alert. // In the near future, we will output SPRI instrumentation to prevent // the system/library call from executing. if ((BADADDR != this->CallTarget) && (!this->IsCallUsedAsJump())) { // We have a resolved call target address, either via direct or indirect call. string FuncName = this->GetTrimmedCalledFunctionName(); ZST_SysCallType FuncCallType = GetCallTypeFromFuncName(FuncName); ZST_Policy FuncCallPolicy = GetPolicyFromCallType(FuncCallType); if (ZST_DISALLOW == FuncCallPolicy) { SMP_fprintf(ZST_AlarmFile, "ALARM: Call to %s will be disallowed at %x in %s\n", FuncName.c_str(), this->address, this->GetBlock()->GetFunc()->GetFuncName()); SMP_fprintf(ZST_AlarmFile, "ALARM REASON: Call policy is DISALLOW for all calls of type %s\n", CallTypeNames[FuncCallType]); SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR SECURITYCALL Disallow 1 1 %s \n", addr, this->SMPcmd.size, disasm); } } // If the DEF metadata is all unused, mmStrata can skip the instruction. // We omit this for groups 1 and 14, so that the metadata analysis // does not get statistical credit for instructions that were already // getting -1 annotations without analysis. // We also cannot skip NN_adc and NN_sbb instructions that change the // type of the incoming register. if ((1 != TypeGroup) && (14 != TypeGroup) && (!this->MDIsInterruptCall()) && !TypeChange) { if (UnusedMetadata) { SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL MetadataUnused %s \n", addr, -1, disasm); ++AnnotationCount[this->OptType]; return; } else if (DEF_METADATA_REDUNDANT == DefMetadataType) { SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL MetadataRedundant %s \n", addr, -1, disasm); ++AnnotationCount[this->OptType]; return; } else if (DEF_METADATA_PROF_REDUNDANT == DefMetadataType) { SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL MetadataRedundant %s \n", addr, -257, disasm); ++AnnotationCount[this->OptType]; // Profiler annotations could be backed off due to false // positives, in which case we will need stack constant // annotations. this->AnnotateStackConstants(UseFP, AnnotFile); return; } } switch (TypeGroup) { case 0: // SDT will have to handle these case 11: // PUSH/POP **!!** What if we push/pop NUMERIC type? Optimize? // --jdh // pop numeric's can be optimized with a numericdef annotation. // numeric push's can't immediately be optimized, but if the stack location // can be proven as dead metadata, then perhaps optimize. // --jdh // mmStrata wants to suppress warnings on the PUSH // instructions that precede the LocalVarsAllocInstr // (i.e. the PUSHes of callee-saved regs). if ((!AllocSeen || !NeedsFrame) && this->MDIsPushInstr()) { SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL NoWarn %s \n", addr, -3, disasm); NoWarnFlag = true; } else if (this->MDIsPopInstr() && NumericDEFs) { SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL n %s NumericDEFs %s \n", addr, ProfiledDEFs ? -256-2 : -2, this->DestString(this->OptType), disasm); ++AnnotationCount[this->OptType]; } else { SDTInstrumentation = true; } break; case 1: // nothing for SDT to do case 14: if (MemDest) { SMP_msg("ERROR: MemDest in Type Category 1 or 14: %x %s\n", addr, disasm); SDTInstrumentation = true; break; } SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL %s %s \n", addr, -1, OptExplanation[this->OptType], disasm); ++AnnotationCount[this->OptType]; break; case 4: // INC, DEC, etc.: no SDT work unless MemDest if (MemDest || MemSrc) { // pretty conservative here? // could be more aggressive if we know there's no overflow. -- jdh SDTInstrumentation = true; break; // treat as category 0 } SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL Always1stSrc %s \n", addr, -1, disasm); ++AnnotationCount[this->OptType]; break; case 5: // ADD, etc.: If numeric 2nd src operand, no SDT work. #if 1 if (MemDest) { SDTInstrumentation = true; break; // treat as category 0 } #endif this->SetAddSubSourceType(); if (SecondSrcOperandImmNum && !this->MDIsFrameAllocInstr() && !TypeChange #if SPECIAL_CASE_CARRY_BORROW && (!CarryBorrow) #endif ) { // treat as category 1 SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL %s %s \n", addr, -1, OptExplanation[this->OptType], disasm); ++AnnotationCount[this->OptType]; } else if (IsEqType(NUMERIC, this->AddSubSourceType) && !this->MDIsFrameAllocInstr() && !TypeChange #if SPECIAL_CASE_CARRY_BORROW && (!CarryBorrow) #endif ) { SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL 2ndSrcNumeric %s \n", addr, -1, disasm); ++AnnotationCount[this->OptType]; } else if (NumericDEFs) { SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL n %s NumericDEFs %s \n", addr, ProfiledDEFs ? -256-2 : -2, this->DestString(this->OptType), disasm); ++AnnotationCount[this->OptType]; } #if SMP_OPTIMIZE_ADD_TO_NUMERIC else if ((NN_add == this->SMPcmd.itype) && (!MemSrc) && IsNumeric(this->AddSubUseType)) { // reg1 := reg1 + reg2, where reg1 comes in as NUMERIC, // means that reg1 will get DEFed to the type of reg2, // whatever it is. If reg2 were known to be NUMERIC, // we would have hit one of the annotation cases above. SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL %s := %s ZZ AddToNumeric %s \n", addr, -5, RegNames[this->AddSubUseOp.reg], RegNames[this->AddSubSourceOp.reg], disasm); ++AnnotationCount[this->OptType]; } #endif else { SDTInstrumentation = true; } break; case 6: // Only OS code should include these; problem for SDT if (MemDest) { SDTInstrumentation = true; break; // treat as category 0 } SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL AlwaysPTR %s \n", addr, -OptType, disasm); ++AnnotationCount[this->OptType]; break; case 8: // Implicitly writes to EDX:EAX, always numeric. if (this->OptType == 10) { // writes to ECX also SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL n EDX EAX ECX ZZ %s %s \n", addr, -2, OptExplanation[this->OptType], disasm); } else { SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL n EDX EAX ZZ %s %s \n", addr, -2, OptExplanation[this->OptType], disasm); } ++AnnotationCount[this->OptType]; SDTInstrumentation = true; break; case 9: // Either writes to FP reg (cat. 1) or memory (cat. 0) if (MemDest) { SDTInstrumentation = true; #if 0 if (NumericDEFs) { SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL n %s NumericDEFs %s \n", addr, ProfiledDEFs ? -256-2 : -2, this->DestString(this->OptType), disasm); ++AnnotationCount[this->OptType]; } #endif } else { SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL %s %s \n", addr, -1, OptExplanation[this->OptType], disasm); ++AnnotationCount[this->OptType]; } break; case 10: // AND, OR, etc.: If all DEFs have been inferred to be // NUMERIC, then output optimizing annotation. SDTInstrumentation = true; if (MemDest) { // **!!** optimize with numeric annotation in future break; // treat as category 0 } else if (NumericDEFs) { // NUMERIC result because of NUMERIC sources SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL n %s NumericDEFs %s \n", addr, ProfiledDEFs ? -256-2 : -2, this->DestString(this->OptType), disasm); ++AnnotationCount[this->OptType]; } break; case 12: // Exchange, exchange and add, conditional exchange: All NUMERIC // sources ==> NUMERIC DEFs, so nothing for mmStrata to do. if (MemDest) { // **!!** optimize with numeric annotation in future SDTInstrumentation = true; break; // treat as category 0 } else if (NumericDEFs) { // NUMERIC result because of NUMERIC sources SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL %s %s \n", addr, ProfiledDEFs ? -256-1 : -1, OptExplanation[TypeGroup], disasm); ++AnnotationCount[this->OptType]; } else SDTInstrumentation = true; break; case 13: case 15: // Floating point, NUMERIC, possible memory destination. // If not memory destination, fpreg dest, so nothing for mmStrata to do. if (MemDest) { // **!!** optimize with numeric annotation in future SDTInstrumentation = true; break; // treat as category 0 } else { // NUMERIC floating register result; these regs are always NUMERIC SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL %s %s \n", addr, -1, OptExplanation[TypeGroup], disasm); ++AnnotationCount[this->OptType]; } break; default: // 2,3,7: Optimization possibilities depend on operands SDTInstrumentation = true; if (MemDest) { break; // treat as category 0 } if ((OptType == 2) || (OptType == 7) || SecondSrcOperandImmNum) { SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL n %s %s %s \n", addr, -2, this->DestString(this->OptType), OptExplanation[this->OptType], disasm); ++AnnotationCount[this->OptType]; } else if (NumericDEFs) { // NUMERIC move instruction SMP_fprintf(AnnotFile, "%10x %6d INSTR LOCAL n %s NumericDEFs %s \n", addr, ProfiledDEFs ? -256-2 : -2, this->DestString(this->OptType), disasm); ++AnnotationCount[this->OptType]; } break; } // end switch (OptType) // always annotate stack constants for the profiler, etc. this->AnnotateStackConstants(UseFP, AnnotFile); // If mmStrata is going to have to deal with the // instruction, then we can annotate EBP and ESP // relative constant offsets. If we have emitted // an annotation of type -1, there is no point // in telling mmStrata about these constants. // Likewise, we can tell mmStrata if a MemDest is an // non-directly-accessed child object. int ChildOffset, ChildSize; if (SDTInstrumentation || NoWarnFlag) { if (strlen(this->DeadRegsString) > 0) { // Optimize by informing mmStrata of dead registers. It can avoid saving // and restoring dead state. This is particularly important for EFLAGS, // as restoring the flags is a pipeline serializing instruction. SMP_fprintf(AnnotFile, "%10x %6d INSTR DEADREGS %s ZZ %s \n", addr, this->SMPcmd.size, this->DeadRegsString, disasm); } if (MemDest && ProfInfo->GetMemoryAccessInfo()->ComputeNonDirectAccessRegion(addr, ChildOffset, ChildSize)) { SMP_fprintf(AnnotFile, "%10x %6d INSTR CHILDACCESS %d %d ZZ %s \n", addr, this->SMPcmd.size, ChildOffset, ChildSize, disasm); } #if SMP_IDENTIFY_POINTER_ADDRESS_REG if (MemDest) { assert(this->HasDestMemoryOperand()); set<DefOrUse, LessDefUse>::iterator PtrUse; PtrUse = this->GetPointerAddressReg(this->DEFMemOp); if (PtrUse != this->GetLastUse()) { // found POINTER addr reg USE if (PtrUse->GetOp().type == o_reg) { ushort PtrReg = PtrUse->GetOp().reg; SMP_fprintf(AnnotFile, "%10x %6d INSTR POINTER reg %s ZZ %s \n", addr, this->SMPcmd.size, RegNames[PtrReg], disasm); } } } #endif } return; } // end of SMPInstr::EmitTypeAnnotations() // union of sign masks from 2 reg USEs for binary arithmetic unsigned short SMPInstr::SignMaskUnionFromUseRegs(void) { unsigned short UnionMask = 0; unsigned short UseSignMask, DefSignMask; set<DefOrUse, LessDefUse>::iterator UseIter; op_t UseOp; size_t RegOpCount = 0; int UseHashValue; struct FineGrainedInfo UseFGInfo, DefFGInfo; bool UseFP = this->GetBlock()->GetFunc()->UsesFramePointer(); for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) { UseOp = UseIter->GetOp(); if (MDIsGeneralPurposeReg(UseOp) && (!MDIsStackPtrReg(UseOp.reg, UseFP))) { ++RegOpCount; UseHashValue = HashGlobalNameAndSSA(UseOp, UseIter->GetSSANum()); bool LocalUseName = this->BasicBlock->IsLocalName(UseOp); if (LocalUseName) { // Local name, find in basic block maps. UseFGInfo = this->BasicBlock->GetUseFGInfo(UseHashValue); } else { // Global name, find in global maps. UseFGInfo = this->BasicBlock->GetFunc()->GetUseFGInfo(UseHashValue); } UseSignMask = (FG_MASK_SIGNEDNESS_BITS & UseFGInfo.SignMiscInfo); if (0 == UseSignMask) { // Try to get signedness from DEF. if (LocalUseName) { // Local name, find in basic block maps. DefFGInfo = this->BasicBlock->GetDefFGInfo(UseHashValue); } else { // Global name, find in global maps. DefFGInfo = this->BasicBlock->GetFunc()->GetDefFGInfo(UseHashValue); } DefSignMask = (FG_MASK_SIGNEDNESS_BITS & DefFGInfo.SignMiscInfo); UnionMask |= DefSignMask; } else { UnionMask |= UseSignMask; } } } if (2 > RegOpCount) { // only interested in binary arithmetic on two registers UnionMask = 0; } return UnionMask; } // SMPInstr::SignMaskUnionFromUseRegs() // emit check annotations for signedness, overflow, truncation, etc. void SMPInstr::EmitIntegerErrorAnnotations(FILE *InfoAnnotFile) { set<DefOrUse, LessDefUse>::iterator UseIter, DefIter; op_t UseOp, DefOp; unsigned short UseWidthInfo, DefWidthInfo, SourceDefWidthInfo; unsigned short UseSignInfo, DefSignInfo, SourceDefSignInfo; unsigned short UseSignMask, DefSignMask, SourceDefSignMask; struct FineGrainedInfo UseFGInfo, DefFGInfo, SourceDefFGInfo; size_t UseBitWidth, DefBitWidth, UseMaxBitWidth, SourceDefBitWidth, DefMaxBitWidth; ea_t DefAddr; int UseHashValue, DefHashValue, SSANum, DefSSANum; bool OverflowOpcode = this->MDIsOverflowingOpcode(); bool UnderflowOpcode = this->MDIsUnderflowingOpcode(); bool CheckForOverflow; bool UseIsSigned, DefIsSigned, UseIsUnsigned, DefIsUnsigned, SourceDefIsSigned, SourceDefIsUnsigned; bool UseSignMixed, SourceDefSignMixed; // inconsistent signedness bool UseFP = this->BasicBlock->GetFunc()->UsesFramePointer(); bool SignednessCheckEmitted = false; bool SuppressSignednessCheck = false; // If we are not confident in check, set to true. bool PartialStore; // Store is fewer bits than is defined for the target, e.g. overwriting last 8 bits // of an int or a pointer. Cannot have signedness error in that case, as sign bit // is not affected. char *disasm = DisAsmText.GetDisAsm(this->GetAddr()); // If the instruction is the beginning of an infinite loop, we want no annotations other than // the infinite loop annotation. if (this->IsFirstInBlock()) { if (this->GetBlock()->IsInfiniteSelfLoop()) { SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR INFINITELOOP %s \n", this->address, this->SMPcmd.size, disasm); return; } } // Case 1: Overflow on addition. // Case 2: Underflow on subtraction. if (OverflowOpcode || UnderflowOpcode) { // If the flags register DEF is dead, we need a CHECK OVERFLOW/UNDERFLOW annotation. DefOp = InitOp; DefOp.type = o_reg; DefOp.reg = MD_FLAGS_REG; DefIter = this->FindDef(DefOp); assert(DefIter != this->GetLastDef()); if (this->BasicBlock->IsDefDead(this->address, DefOp)) { DefIter = this->GetFirstNonFlagsDef(); assert(DefIter != this->GetLastDef()); DefOp = DefIter->GetOp(); SSANum = DefIter->GetSSANum(); bool IgnoreOverflow = this->IsBenignOverflow(); #if SMP_MEASURE_NUMERIC_ANNOTATIONS if (IgnoreOverflow) ++BenignOverflowInstCount; #endif // Don't worry about stack space allocation instructions. The // program will crash long before the stack pointer underflows // below zero. if (!(IgnoreOverflow || ((o_reg == DefOp.type) && DefOp.is_reg(MD_STACK_POINTER_REG)))) { if (o_reg == DefOp.type) { if (this->GetBlock()->IsBenignOverflowDEF(DefOp, SSANum, this->GetAddr())) { IgnoreOverflow = true; #if SMP_MEASURE_NUMERIC_ANNOTATIONS ++BenignOverflowDefCount; #endif // e.g. if we overflow on ECX but the next USE is of CL as // a shift counter, then program is intentionally tossing // most of the ECX bits anyway. } else { DefHashValue = HashGlobalNameAndSSA(DefOp, SSANum); if (this->BasicBlock->IsLocalName(DefOp)) { // Local name, find in basic block maps. DefFGInfo = this->BasicBlock->GetDefFGInfo(DefHashValue); } else { // Global name, find in global maps. DefFGInfo = this->BasicBlock->GetFunc()->GetDefFGInfo(DefHashValue); } } } else if (MDIsStackAccessOpnd(DefOp, UseFP)) { bool success = this->BasicBlock->GetFunc()->MDGetFGStackLocInfo(this->address, DefOp, DefFGInfo); assert(success); } else { // non-stack memory address; we know nothing about it. DefFGInfo.SignMiscInfo = 0; DefFGInfo.SizeInfo = 0; } if (!IgnoreOverflow) { #if SMP_MEASURE_NUMERIC_ANNOTATIONS ++NumericAnnotationsCount12; #endif DefSignInfo = DefFGInfo.SignMiscInfo; DefSignMask = DefSignInfo & FG_MASK_SIGNEDNESS_BITS; DefWidthInfo = DefFGInfo.SizeInfo; DefBitWidth = LargestBitWidthFromMask(DefWidthInfo); if (0 == DefBitWidth) { // Could happen for non-stack memory operands, for example. DefBitWidth = MD_NORMAL_MACHINE_BITWIDTH; } if (DefSignMask == 0) { // If no sign info from DEF, see if we can get it from the USE regs. DefSignMask = this->SignMaskUnionFromUseRegs(); } if (OverflowOpcode) { SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR CHECK OVERFLOW %s %zu ", this->address, this->SMPcmd.size, SignednessStrings[DefSignMask], DefBitWidth); } else { // must be UnderflowOpcode SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR CHECK UNDERFLOW %s %zu ", this->address, this->SMPcmd.size, SignednessStrings[DefSignMask], DefBitWidth); } AnnotPrintOperand(DefOp, InfoAnnotFile); string SinkString(""); if (this->GetBlock()->GetFunc()->HasIntErrorCallSink(DefOp, SSANum, this->address, SinkString)) { SMP_fprintf(InfoAnnotFile, " ZZ %s %s \n", SinkString.c_str(), disasm); } else { SMP_fprintf(InfoAnnotFile, " ZZ %s \n", disasm); } } // end if (!IgnoreOverflow) } // end if (!(IgnoreOverflow or DEF is stack ptr register)) #if SMP_MEASURE_NUMERIC_ANNOTATIONS else if (!IgnoreOverflow) { ++SuppressStackPtrOverflowCount; } #endif } // end if flags reg is dead #if SMP_MEASURE_NUMERIC_ANNOTATIONS else { ++SuppressLiveFlagsOverflowCount; } #endif } // end cases 1-2 // Case 3: Overflow on multiplication with upper bits discarded. if (this->MDIsMultiply()) { // There are four overflow sub-cases for x86: (A) the multiplication result // can go into EDX:EAX for 32x32=>64 bit multiplication; (B) the result // can go into DX:AX for 16x16=>32 bit; (C) the result can be in AX // for 8x8=>16 bit; (D) see below. The latter case (C) will be detected most easily // as a truncation in a later instruction, i.e. if only AL gets stored // later, then we check the AH bits at that time for a truncation // error. Because our SSA numbering lumps AL, AH, AX, and EAX into // a single canonicalized register, we would have a hard time using // SSA-based def-use chains to determine if AH is dead. // For the other two sub-cases, the question is whether EDX becomes dead // starting with the DEF of EDX in the multiply instruction. // Case (D) is where the multiply instruction discards the upper bits // of the multiply. // Sub-cases A&B are detected by checking if EDX is dead, and if so, then // emitting an annotation to check for the overflow flag. The x86 sets // overflow and carry flags on multiplication instructions based on whether // the result carries out of the lower half of the result to the upper half. // Sub-case D is also detected using flags, but we don't need to check whether EDX // is dead. We just need to detect that EDX is not in the DEF set in the // first place. We have a private member flag for that case. CheckForOverflow = false; if (this->AreMultiplicationBitsDiscarded()) { // Sub-case D CheckForOverflow = true; assert(this->RTL.GetCount() > 0); DefOp = this->RTL.GetRT(0)->GetLeftOperand(); DefIter = this->FindDef(DefOp); assert(DefIter != this->GetLastDef()); DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum()); } else { // If the instruction were EDX:=EDX*foo, then it would be // the multiplication bits discarded case and would not // reach this else clause. Therefore, if we find EDX in // the DEF set, it is holding upper result bits of the // multiplication and we have the potential for sub-cases A&B // but not sub-case C. So, we check to see if the DEF of EDX // is dead. DefOp = InitOp; DefOp.type = o_reg; DefOp.reg = R_dx; DefIter = this->FindDef(DefOp); if (DefIter != this->GetLastDef()) { // We found DEF of EDX, so it is not AX:=AL*op8 sub-case C. // Now, is DEF of EDX dead (i.e. no uses?) CheckForOverflow = this->BasicBlock->IsDefDead(this->address, DefOp); if (CheckForOverflow) { DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum()); } } } // end if sub-case D else if sub-case A or B if (CheckForOverflow) { // need an annotation #if SMP_MEASURE_NUMERIC_ANNOTATIONS ++NumericAnnotationsCount3; #endif if (this->BasicBlock->IsLocalName(DefOp)) { // Local name, find in basic block maps. DefFGInfo = this->BasicBlock->GetDefFGInfo(DefHashValue); } else { // Global name, find in global maps. DefFGInfo = this->BasicBlock->GetFunc()->GetDefFGInfo(DefHashValue); } DefWidthInfo = DefFGInfo.SizeInfo; DefBitWidth = LargestBitWidthFromMask(DefWidthInfo); DefSignMask = (DefFGInfo.SignMiscInfo & FG_MASK_SIGNEDNESS_BITS); if (DefSignMask == 0) { // If no sign info from DEF, see if we can get it from the USE regs. DefSignMask = this->SignMaskUnionFromUseRegs(); } // Next two statements exclude the inconsistent sign case and the no sign info known case. DefIsSigned = (FG_MASK_SIGNED == DefSignMask); // exact, not bit-mask-AND DefIsUnsigned = (FG_MASK_UNSIGNED == DefSignMask); // exact, not bit-mask-AND if (this->MDIsUnsignedArithmetic() || DefIsUnsigned) { SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR CHECK OVERFLOW UNSIGNED %zu %s ZZ %s \n", this->address, this->SMPcmd.size, DefBitWidth, RegNames[DefOp.reg], disasm); } else if (this->MDIsSignedArithmetic() || DefIsSigned) { SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR CHECK OVERFLOW SIGNED %zu %s ZZ %s \n", this->address, this->SMPcmd.size, DefBitWidth, RegNames[DefOp.reg], disasm); } else { SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR CHECK OVERFLOW UNKNOWNSIGN %zu %s ZZ %s \n", this->address, this->SMPcmd.size, DefBitWidth, RegNames[DefOp.reg], disasm); } } // end if (CheckForOverflow) #if SMP_MEASURE_NUMERIC_ANNOTATIONS else { ++LiveMultiplyBitsCount; } #endif } // end of case 3 // Case 4: Signedness error on move. // Case 5: Truncation error on move. UseOp = this->GetMoveSource(); if ((3 == this->OptType) && (o_reg == UseOp.type) && (!(this->IsBenignTruncation()))) { // Possibilities for a move: reg to reg, mem to reg, or reg to mem. If we load // from memory into a register, we cannot track signedness in memory unless it // is a stack location. In that case, we record the signedness in the stack // map and transfer it to the reg DEF in SMPInstr.MDSetWidthSignInfo(). That // determines the signedness of the reg DEF and it cannot be in conflict with // the stack memory USE. The load from stack to reg also determines width // of the stack operand and we cannot have a truncation. So, we can restrict // our analysis of cases 4-5 to register-source move instructions, as we // have done in the condition above. // // Similarly, we cannot detect a signedness conflict if the destination is a // memory location that is not known to be a particular stack offset location. // // So, we only concern ourselves with signedness errors // when the USE operand of the move is a register, and the destination is another // register or a stack location. // // We can have a truncation error and a signedness error on a single instruction, so // we group them into common code. For example, move the lower half of a 32-bit unsigned // into a 16-bit signed destination. Upper bits set to 1 and discarded would be a // truncation, and setting the sign bit of the 16-bit signed destination would be a // signedness error. // // NOTE: Signedness errors are different from overflow and truncation errors. We // can have incomplete knowledge about an instructions operands and still determine // that truncation occurred. For example, if we do not know whether register EAX // is signed or unsigned, we can still say that storing only AX is a truncation error // if the upper half of EAX is a mixture of one and zero bits. If EAX is unsigned, // we could be more specific and insist that the upper half be all zero bits; if EAX // is signed, we could insist that the upper half of EAX be the sign-extension of AX. // We can avoid false positives by only declaring a truncation error when the upper // half of EAX is not all zero bits or all one bits. This approach allows a few // potential false negatives. With signedness, if we don't know the signedness // of one of the operands, we can only avoid false positives by doing no checks at // all. UseIter = this->FindUse(UseOp); assert(UseIter != this->GetLastUse()); UseBitWidth = 8 * GetOpDataSize(UseOp); // Now, the question is: Are we storing fewer bits than // we were using in our computations in this DEF-USE chain? // E.g. if we computed using 32 bits and then only store 16, // we have potential truncation error. But if we computed // using 16 bits all along, we have already checked for 16-bit // overflows on arithmetic in the DU chain and there can be no // truncation on this store. op_t SearchOp = UseOp; // Canonicalize sub-regs for searching DEFs and USEs. SearchOp.reg = MDCanonicalizeSubReg(UseOp.reg); SearchOp.dtyp = dt_dword; UseHashValue = HashGlobalNameAndSSA(SearchOp, UseIter->GetSSANum()); if (this->BasicBlock->IsLocalName(SearchOp)) { // Local name, find in basic block maps. SourceDefFGInfo = this->BasicBlock->GetDefFGInfo(UseHashValue); UseFGInfo = this->BasicBlock->GetUseFGInfo(UseHashValue); } else { // Global name, find in global maps. SourceDefFGInfo = this->BasicBlock->GetFunc()->GetDefFGInfo(UseHashValue); UseFGInfo = this->BasicBlock->GetFunc()->GetUseFGInfo(UseHashValue); } SourceDefWidthInfo = SourceDefFGInfo.SizeInfo; UseWidthInfo = UseFGInfo.SizeInfo; SourceDefBitWidth = LargestBitWidthFromMask(SourceDefWidthInfo); UseMaxBitWidth = LargestBitWidthFromMask(UseWidthInfo); UseSignMask = (UseFGInfo.SignMiscInfo & FG_MASK_SIGNEDNESS_BITS); SourceDefSignInfo = SourceDefFGInfo.SignMiscInfo; SourceDefSignMask = (SourceDefSignInfo & FG_MASK_SIGNEDNESS_BITS); #if 1 // If we have no signedness info at all for the UseSignMask, but // the SourceDefSignMask has info, then we want to use the // SourceDefSignMask as our signedness info. This is because of // simple instruction sequences in which no signedness can be inferred // from the use, e.g.: // // imul eax,ebx ; eax := eax*ebx and eax&ebx are signed // mov [ebp-8],al ; store al on stack // // If we don't know the signedness of stack location [ebp-8], // then we will end up in the S->? case below and we will emit // a CHECK TRUNCATION UNKNOWNSIGN annotation. This discards the // knowledge we have that EAX is signed, from the IMUL opcode. if (0 == UseSignMask) { UseSignMask = SourceDefSignMask; } #endif // Next six statements exclude the inconsistent sign case and the no sign info known case. UseIsSigned = (FG_MASK_SIGNED == UseSignMask); // exact, not bit-mask-AND UseIsUnsigned = (FG_MASK_UNSIGNED == UseSignMask); // exact, not bit-mask-AND SourceDefIsSigned = (FG_MASK_SIGNED == SourceDefSignMask); // exact, not bit-mask-AND SourceDefIsUnsigned = (FG_MASK_UNSIGNED == SourceDefSignMask); // exact, not bit-mask-AND UseSignMixed = (FG_MASK_INCONSISTENT_SIGN == UseSignMask); // exclude uninit sign case SourceDefSignMixed = (FG_MASK_INCONSISTENT_SIGN == SourceDefSignMask); // exclude uninit sign case // Not only the CHECK SIGNEDNESS annotations depend on the signedness of the // source and destination operands. The CHECK TRUNCATION annotations come // in SIGNED, UNSIGNED, and UNKNOWNSIGN variants, so we need to get the // signedness of the destination operand before we proceeed. DefOp = this->RTL.GetRT(0)->GetLeftOperand(); // RTL must be dest := rhs op_t DestSearchOp = DefOp; bool StackDestination; // Outargs locations are reused for different function calls, so no inference of their // signedness is valid. We maintain a flag to suppress signedness checks on writes to // outargs locations on the stack. bool OutArgsWrite = false; if (o_reg == DestSearchOp.type) { StackDestination = false; DestSearchOp.reg = MDCanonicalizeSubReg(DefOp.reg); DestSearchOp.dtyp = dt_dword; } else if (!(MDIsStackAccessOpnd(DefOp, UseFP))) { // If destination of move is not a register and is not // a stack location, we cannot track its signedness and width. return; } else { StackDestination = true; } DefIter = this->FindDef(DestSearchOp); DefSSANum = DefIter->GetSSANum(); if (StackDestination) { // Fetch FG info from stack map. bool success = this->GetBlock()->GetFunc()->MDGetFGStackLocInfo(this->address, DefOp, DefFGInfo); assert(success); OutArgsWrite = this->GetBlock()->GetFunc()->IsInOutgoingArgsRegion(DefOp); } else { // Fetch FG info from register FG info maps. DefHashValue = HashGlobalNameAndSSA(DestSearchOp, DefSSANum); if (this->BasicBlock->IsLocalName(DestSearchOp)) { // Local name, find in basic block maps. DefFGInfo = this->BasicBlock->GetDefFGInfo(DefHashValue); } else { // Global name, find in global maps. DefFGInfo = this->BasicBlock->GetFunc()->GetDefFGInfo(DefHashValue); } } DefSignMask = (DefFGInfo.SignMiscInfo & FG_MASK_SIGNEDNESS_BITS); // Next two statements exclude the inconsistent sign case and the no sign info known case. DefIsSigned = (FG_MASK_SIGNED == DefSignMask); // exact, not bit-mask-AND DefIsUnsigned = (FG_MASK_UNSIGNED == DefSignMask); // exact, not bit-mask-AND // Get the Def bit width and maximum bit width for special cases. DefWidthInfo = DefFGInfo.SizeInfo; DefMaxBitWidth = LargestBitWidthFromMask(DefWidthInfo); // max width over all defs DefBitWidth = 8 * GetOpDataSize(DefOp); // width of def in current instruction PartialStore = (DefBitWidth < DefMaxBitWidth); if (StackDestination) { // We only do signedness checks in limited, safe analysis cases on stack writes. SuppressSignednessCheck = this->SkipSignednessCheckOnStackWrite(DefSSANum); } else { SuppressSignednessCheck = false; } // If we set the (source) DEF bit width to 0, it means we wanted to have the USEs determine // the width. This happens on sign-extended and zero-extended loads. If we zero-extend // a 16-bit value to 32 bits, then immediately store the lower 16 bits to a 16-bit location, // then the upper bits cannot have any overflow info yet. But if we do 32-bit arithmetic // on the zero-extended value, and then store the lower 16 bits, we need to check for // truncation. So, the key is whether the value ever got used as a 32-bit value. If it // did, check for truncation; if not, there is no need to check. if ((SourceDefBitWidth > UseBitWidth) || ((SourceDefBitWidth == 0) && (UseMaxBitWidth > UseBitWidth))) { // Original DEF (or subsequent USE) was wider than what we are storing now. unsigned short SourceDefReg = SearchOp.reg; unsigned short UseReg = UseOp.reg; if (SourceDefBitWidth == 0) { // Convert for printing annotation. SourceDefBitWidth = 8 * GetOpDataSize(SearchOp); } // OK, we need to check for possible truncation. But, how we check depends on the // signedness combinations of the source and destination operands of the move. // Each operand can be signed, unsigned, or of unknown sign (and we lump the // inconsistent sign case into the unknown sign case). So, we have a set of 3x3=9 // possible combinations of signedness. // Now we have the DefSignMask to compare to the UseSignMask. The nine possible // combinations, and the annotations we want to emit for each, are shown below. // S = SIGNED, U = UNSIGNED, and ? = unknown or inconsistent sign. // S => U indicates a SIGNED being stored into an UNSIGNED, for example. // Assume without loss of generality that register EAX is the source of // all the move instructions, and that only subword register AX is being stored. // We can perform all truncation and signedness checks on EAX just prior to // the move instruction, which is cheaper than performing checks on the // destination if the destination is in memory. // // U => U // U => S // S => U // U => ? // ? => U // // In these first five cases, EAX must be the zero-extension of AX else there is // a truncation error. In the three cases in which the source (EAX/AX) is UNSIGNED, // discarding upper bits that are not zero is obviously truncation. In the case // of S => U, if the upper bits of EAX are not all zeroes, then we either have // a large positive value of EAX that is being truncated, or EAX is negative and // the lower bits will be misinterpreted in the unsigned destination. Finally, // the ? => U case must be either U => U or S => U, and these two cases already // share the demand that EAX be the zero-extension of AX. So, these five cases // will receive the annotation: CHECK TRUNCATION UNSIGNED 32 EAX 16 AX which // means that EAX is tested against AX to see if it is the 32-bit zero-extension // of 16-bit reg AX. // In the U => S case, we can have a signedness error as well as truncation. Even // if the truncation check passes (all upper half bits of EAX are zero), the top // bit of AX might be 1, and this will be misinterpreted as a sign bit in the // destination. So, this case receives a second annotation: CHECK SIGNEDNESS SIGNED 16 AX. // In the two cases that involve signedness uncertainty, there are possible signedness // errors that we are not checking ar tun-time, because we do not have enough information // to perform the checks without generating many more false positives than true positives. // As a result, false negatives on signedness can occur. // // On to more of the 9 combinations: // // S => S // // In this case, EAX must be the sign-extension of AX. Because the destination is also // signed, nothing is lost if the sign-extension bits (all zeroes or all ones) are dropped. // We emit a CHECK TRUNCATION SIGNED 32 EAX 16 AX annotation to test EAX == sign-extended AX. // // S => ? // ? => S // ? => ? // // These final three cases all involve at least one operand of unknown signedness, and no // operands that are known to be unsigned. In each case, there are two possibilities: // either EAX must be the sign-extension of AX, or EAX must be the zero-extension of AX. // Because of the uncertainty that is represented by the question marks, we are not sure // which of these two cases we are dealing with. However, rather than just give up and // perform no run-time checks (to avoid false positives), we can still perform a run-time // check that will catch (perhaps most) true positives while causing no false positives. // We can insist that EAX must be EITHER the sign-extension or the zero-extension of AX. // To be neither type of extension of AX implies that some sort of truncation is happening. // So, we emit a CHECK TRUNCATION UNKNOWNSIGN 32 EAX 16 AX annotation, and the Strata // instrumentation will check for either EAX == sign-extended AX or EAX == zero-extended AX // being true. If neither is true, we raise a true positive alert. False negatives on // signedness errors are the result of the uncertainty, but all truncations are detected // for all nine cases. if (DefIsUnsigned || UseIsUnsigned) { // First five cases above: any UNSIGNED operand leads to CHECK TRUNCATION UNSIGNED annotation. SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR CHECK TRUNCATION UNSIGNED %zu %s %zu %s ZZ %s \n", this->address, this->SMPcmd.size, SourceDefBitWidth, MDGetRegName(SearchOp), UseBitWidth, MDGetRegName(UseOp), disasm); #if SMP_MEASURE_NUMERIC_ANNOTATIONS ++TruncationAnnotationsCount; #endif if (!SuppressSignednessCheck && UseIsUnsigned && DefIsSigned && !OutArgsWrite && !PartialStore) { SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS SIGNED %zu %s ZZ %s \n", this->address, this->SMPcmd.size, UseBitWidth, MDGetRegName(UseOp), disasm); } #if SMP_MEASURE_NUMERIC_ANNOTATIONS else { ++SuppressSignednessOnTruncation; } #endif } else if (DefIsSigned && UseIsSigned) { // S => S case above. Emit CHECK TRUNCATION SIGNED annotation. SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR CHECK TRUNCATION SIGNED %zu %s %zu %s ZZ %s \n", this->address, this->SMPcmd.size, SourceDefBitWidth, MDGetRegName(SearchOp), UseBitWidth, MDGetRegName(UseOp), disasm); #if SMP_MEASURE_NUMERIC_ANNOTATIONS ++TruncationAnnotationsCount; #endif } else { // S => ?, ? => S, ? => ? cases above: CHECK TRUNCATION UNKNOWNSIGN annotation. SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR CHECK TRUNCATION UNKNOWNSIGN %zu %s %zu %s ZZ %s \n", this->address, this->SMPcmd.size, SourceDefBitWidth, MDGetRegName(SearchOp), UseBitWidth, MDGetRegName(UseOp), disasm); #if SMP_MEASURE_NUMERIC_ANNOTATIONS ++TruncationAnnotationsCount; #endif } #if 1 // Now check for signedness conflicts between the UseOp USEs and its DEF. if (!SuppressSignednessCheck && !OutArgsWrite && !PartialStore) { // Inferred signedness of outargs location is invalid anyway if (UseIsSigned && SourceDefIsUnsigned) { SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS UNSIGNED %zu %s ZZ %s \n", this->address, this->SMPcmd.size, UseBitWidth, MDGetRegName(UseOp), disasm); } else if (UseIsUnsigned && SourceDefIsSigned) { // Currently same annotation, but might differ in the future for better forensics // and more precise diagnostic messages. SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS SIGNED %zu %s ZZ %s \n", this->address, this->SMPcmd.size, UseBitWidth, MDGetRegName(UseOp), disasm); } else if ((!SourceDefSignMixed) && UseSignMixed) { // DEF has consistent and known signedness, USE is inconsistent. SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS %s %zu %s ZZ %s \n", this->address, this->SMPcmd.size, SignednessStrings[SourceDefSignMask], UseBitWidth, MDGetRegName(UseOp), disasm); } } #if SMP_MEASURE_NUMERIC_ANNOTATIONS else if (!(DefIsUnsigned || UseIsUnsigned)) { ++SuppressSignednessOnTruncation; } #endif #endif } // end if truncation else if (!SuppressSignednessCheck && !OutArgsWrite && !PartialStore) { // still need to check for signedness errors even if no truncation if (UseIsSigned && DefIsUnsigned) { SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS UNSIGNED %zu %s ZZ %s \n", this->address, this->SMPcmd.size, UseBitWidth, MDGetRegName(UseOp), disasm); #if SMP_MEASURE_NUMERIC_ANNOTATIONS ++SignednessWithoutTruncationCount; #endif } else if (UseIsUnsigned && DefIsSigned) { // Currently UNSIGNED and SIGNED annotations for signedness differ only in the // arithmetic saturation value applied as a recovery action. UNSIGNED means // left-hand-side is UNSIGNED so saturate a negative right-hand-side up to 0, // while SIGNED means lhs is SIGNED so saturate large rhs to 0x7fff.... // For inconsistent sign cases we default to SIGNED. SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS SIGNED %zu %s ZZ %s \n", this->address, this->SMPcmd.size, UseBitWidth, MDGetRegName(UseOp), disasm); #if SMP_MEASURE_NUMERIC_ANNOTATIONS ++SignednessWithoutTruncationCount; #endif } else if (UseIsSigned && SourceDefIsUnsigned) { SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS UNSIGNED %zu %s ZZ %s \n", this->address, this->SMPcmd.size, UseBitWidth, MDGetRegName(UseOp), disasm); #if SMP_MEASURE_NUMERIC_ANNOTATIONS ++SignednessWithoutTruncationCount; #endif } else if (UseIsUnsigned && SourceDefIsSigned) { // Currently same annotation, but might differ in the future for better forensics // and more precise diagnostic messages. SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS SIGNED %zu %s ZZ %s \n", this->address, this->SMPcmd.size, UseBitWidth, MDGetRegName(UseOp), disasm); #if SMP_MEASURE_NUMERIC_ANNOTATIONS ++SignednessWithoutTruncationCount; #endif } #if 0 else if ((!SourceDefSignMixed) && UseSignMixed) { // source DEF has consistent and known signedness, source USE is inconsistent. SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS SIGNED %u %s ZZ %s \n", this->address, this->SMPcmd.size, UseBitWidth, MDGetRegName(UseOp), disasm); #if SMP_MEASURE_NUMERIC_ANNOTATIONS ++SignednessWithoutTruncationCount; #endif } #endif } // end if truncation else check signedness } // end of cases 4-5, (3 == OptType) checking for TRUNCATION and SIGNEDNESS errors else if ((this->MDIsLoadEffectiveAddressInstr()) && (!(this->IsNop()))) { // Case 6: Load Effective Address opcodes can do arithmetic that silently overflows. this->MDEmitLeaOpcodeOverflowAnnotations(InfoAnnotFile); } // end of case 6, LoadEffectiveAddress instruction else if (this->MDDoublesWidth()) { // case 7: half of register is sign-extended into the full register. // Potential truncation error if entire register was DEFed first, because // only the lower half is used. That also makes a potential signedness // error, because the "sign bit" that is being extended is really a data bit, // not a sign bit. DefIter = this->GetFirstNonFlagsDef(); assert(DefIter != this->GetLastDef()); DefOp = DefIter->GetOp(); assert(o_reg == DefOp.type); SSANum = DefIter->GetSSANum(); DefHashValue = HashGlobalNameAndSSA(DefOp, SSANum); UseIter = this->GetFirstUse(); assert(UseIter != this->GetLastUse()); UseOp = UseIter->GetOp(); assert(o_reg == UseOp.type); SSANum = UseIter->GetSSANum(); UseHashValue = HashGlobalNameAndSSA(UseOp, SSANum); if (this->GetBlock()->IsLocalName(DefOp)) { DefFGInfo = this->GetBlock()->GetDefFGInfo(DefHashValue); SourceDefFGInfo = this->GetBlock()->GetDefFGInfo(UseHashValue); } else { DefFGInfo = this->GetBlock()->GetFunc()->GetDefFGInfo(DefHashValue); SourceDefFGInfo = this->GetBlock()->GetFunc()->GetDefFGInfo(UseHashValue); } DefWidthInfo = DefFGInfo.SizeInfo; DefMaxBitWidth = LargestBitWidthFromMask(DefWidthInfo); // max width over all defs SourceDefWidthInfo = SourceDefFGInfo.SizeInfo; SourceDefBitWidth = LargestBitWidthFromMask(SourceDefWidthInfo); // max width over all defs // Was the USE in the width-doubling instruction originally DEFed to have more bits // than are used in the width-doubling instruction? If so, we have truncation and // signedness errors to check at run time. UseBitWidth = (DefMaxBitWidth / 2); // USE should have been DEFed with this bit width if (SourceDefBitWidth > UseBitWidth) { // Make checks signed, because of the implications of sign extension by these opcodes. SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR CHECK TRUNCATION SIGNED %zu %s %zu %s ZZ %s \n", this->address, this->SMPcmd.size, DefMaxBitWidth, MDGetRegName(DefOp), UseBitWidth, MDGetRegName(UseOp), disasm); #if SMP_MEASURE_NUMERIC_ANNOTATIONS ++WidthDoublingTruncationCount; #endif } } // end of case 7, doubles width inside register else if ((BADADDR != this->CallTarget) && (!this->IsCallUsedAsJump())) { // PEASOUP wants info on memset() calls. string FuncName = this->GetTrimmedCalledFunctionName(); if (0 == FuncName.compare("memset")) { op_t MemSetTarget; size_t MemSetSize; int StackOffset; if (this->GetBlock()->AnalyzeMemSet(this->GetAddr(), MemSetTarget, MemSetSize, StackOffset)) { if (0 < MemSetSize) { // Emit annotation. if (0 > StackOffset) { // Must be negative offset from EBP. SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR MEMSET STACKOFFSET_EBP %d SIZE %zu ZZ %s \n", this->address, this->SMPcmd.size, StackOffset, MemSetSize, disasm); } else { // Must be non-negative offset from ESP. SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR MEMSET STACKOFFSET_ESP %d SIZE %zu ZZ %s \n", this->address, this->SMPcmd.size, StackOffset, MemSetSize, disasm); } } } } } // end of memset() case return; } // end of SMPInstr::EmitIntegerErrorAnnotations() #define MAX_OFFSET_STR_LEN 40 // Emit overflow annotations for lea opcodes that perform arithmetic that // can overflow without affecting the flags. void SMPInstr::MDEmitLeaOpcodeOverflowAnnotations(FILE *InfoAnnotFile) { set<DefOrUse, LessDefUse>::iterator UseIter, DefIter; size_t OpNum; int BaseReg; int IndexReg; ushort ScaleFactor; ea_t offset; int SignedOffset; bool ScaledIndexReg; bool SourceFound = false; int SSANum; int UseHashValue; struct FineGrainedInfo UseFGInfo, SourceDefFGInfo; unsigned short UseSignMask, SourceDefSignMask, BaseRegSignMask, TempSignMask; unsigned short BaseRegWidthInfo, UseBitWidthInfo; size_t BaseRegMaxWidth, IndexRegMaxWidth, TempMaxWidth; char OffsetString[MAX_OFFSET_STR_LEN]; op_t TempOp; for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & UseMacros[OpNum]) { // USE if ((TempOp.type >= o_mem) && (TempOp.type <= o_displ)) { MDExtractAddressFields(TempOp, BaseReg, IndexReg, ScaleFactor, offset); SignedOffset = (int) offset; // IDA Pro makes offset unsigned for some reason. SourceFound = true; break; } } } assert(SourceFound); if (MDIsStackAccessOpnd(TempOp, this->BasicBlock->GetFunc()->UsesFramePointer())) { // If the lea operand is something like lea ebx,[esp+4] we do not want to waste // time checking for overflow, because stack pointers and frame pointers // are kept within integer bounds (and if not, something big and terrible will // happen long before integer bounds are overflowed or underflowed). #if SMP_MEASURE_NUMERIC_ANNOTATIONS ++SuppressStackPtrOverflowCount; #endif return; } else { // Check for the benign overflow cases we want to ignore. DefIter = this->GetFirstNonFlagsDef(); assert(DefIter != this->GetLastDef()); op_t DefOp = DefIter->GetOp(); SSANum = DefIter->GetSSANum(); if (this->GetBlock()->IsBenignOverflowDEF(DefOp, SSANum, this->GetAddr())) { #if SMP_MEASURE_NUMERIC_ANNOTATIONS ++BenignOverflowDefCount; #endif return; } } char *disasm = DisAsmText.GetDisAsm(this->GetAddr()); ScaledIndexReg = (0 < ScaleFactor); if (0 != SignedOffset) { (void) SMP_snprintf(OffsetString, MAX_OFFSET_STR_LEN-2, "%d", SignedOffset); } // Build up the computational string one operation at a time and emit // CHECK OVERFLOW annotations after each step. string CurrString(""); const char *ScaleStrings[4] = {"", "*2", "*4", "*8"}; op_t RegOp; RegOp.type = o_reg; // Gather signedness info about BaseReg, if any, that will be used in multiple cases below. if (R_none != BaseReg) { RegOp.reg = MDCanonicalizeSubReg(BaseReg); RegOp.dtyp = dt_dword; UseIter = this->FindUse(RegOp); assert(UseIter != this->GetLastUse()); UseHashValue = HashGlobalNameAndSSA(RegOp, UseIter->GetSSANum()); if (this->BasicBlock->IsLocalName(RegOp)) { // Local name, find in basic block maps. SourceDefFGInfo = this->BasicBlock->GetDefFGInfo(UseHashValue); UseFGInfo = this->BasicBlock->GetUseFGInfo(UseHashValue); } else { // Global name, find in global maps. SourceDefFGInfo = this->BasicBlock->GetFunc()->GetDefFGInfo(UseHashValue); UseFGInfo = this->BasicBlock->GetFunc()->GetUseFGInfo(UseHashValue); } UseSignMask = (UseFGInfo.SignMiscInfo & FG_MASK_SIGNEDNESS_BITS); SourceDefSignMask = (SourceDefFGInfo.SignMiscInfo & FG_MASK_SIGNEDNESS_BITS); if (UseSignMask == 0) { // Get signedness info wherever it is available. UseSignMask = SourceDefSignMask; } BaseRegSignMask = UseSignMask; BaseRegWidthInfo = (UseFGInfo.SizeInfo & FG_MASK_BITWIDTH_FIELDS); if (0 == BaseRegWidthInfo) { BaseRegWidthInfo = (SourceDefFGInfo.SizeInfo & FG_MASK_BITWIDTH_FIELDS); } BaseRegMaxWidth = LargestBitWidthFromMask(BaseRegWidthInfo); } else { BaseRegMaxWidth = 0; BaseRegSignMask = 0; } if (R_none != IndexReg) { // Get signedness info for IndexReg. RegOp.reg = MDCanonicalizeSubReg(IndexReg); RegOp.dtyp = dt_dword; UseIter = this->FindUse(RegOp); assert(UseIter != this->GetLastUse()); UseHashValue = HashGlobalNameAndSSA(RegOp, UseIter->GetSSANum()); if (this->BasicBlock->IsLocalName(RegOp)) { // Local name, find in basic block maps. SourceDefFGInfo = this->BasicBlock->GetDefFGInfo(UseHashValue); UseFGInfo = this->BasicBlock->GetUseFGInfo(UseHashValue); } else { // Global name, find in global maps. SourceDefFGInfo = this->BasicBlock->GetFunc()->GetDefFGInfo(UseHashValue); UseFGInfo = this->BasicBlock->GetFunc()->GetUseFGInfo(UseHashValue); } UseSignMask = (UseFGInfo.SignMiscInfo & FG_MASK_SIGNEDNESS_BITS); SourceDefSignMask = (SourceDefFGInfo.SignMiscInfo & FG_MASK_SIGNEDNESS_BITS); if (UseSignMask == 0) { // Get signedness info wherever it is available. UseSignMask = SourceDefSignMask; } UseBitWidthInfo = (UseFGInfo.SizeInfo & FG_MASK_BITWIDTH_FIELDS); IndexRegMaxWidth = LargestBitWidthFromMask(UseBitWidthInfo); TempSignMask = (BaseRegSignMask | UseSignMask); TempMaxWidth = (BaseRegMaxWidth >= IndexRegMaxWidth) ? BaseRegMaxWidth : IndexRegMaxWidth; if (ScaledIndexReg) { assert((ScaleFactor >= 1) && (ScaleFactor <= 3)); assert((IndexReg >= R_ax) && (IndexReg <= R_bh)); CurrString += RegNames[IndexReg]; CurrString += ScaleStrings[ScaleFactor]; SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR CHECK OVERFLOW %s %zu %s ZZ %s \n", this->address, this->SMPcmd.size, LeaSignednessStrings[UseSignMask], IndexRegMaxWidth, CurrString.c_str(), disasm); #if SMP_MEASURE_NUMERIC_ANNOTATIONS ++LeaInstOverflowCount; #endif if (R_none != BaseReg) { // Have BaseReg+IndexReg*ScaleFactor string TempStr(CurrString); CurrString.clear(); CurrString += RegNames[BaseReg]; CurrString += "+"; CurrString += TempStr; SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR CHECK OVERFLOW %s %zu %s ZZ %s \n", this->address, this->SMPcmd.size, LeaSignednessStrings[TempSignMask], TempMaxWidth, CurrString.c_str(), disasm); #if SMP_MEASURE_NUMERIC_ANNOTATIONS ++LeaInstOverflowCount; #endif } if (0 != SignedOffset) { // We have [ BaseReg + ] IndexReg*ScaleFactor+offset [BaseReg or not] // CurrString has everything through the end of the scalefactor in either case. #if 0 // easier for instrumentation to always use ADD, even for negative offsets if (0 < SignedOffset) #endif CurrString += "+"; CurrString += OffsetString; SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR CHECK OVERFLOW %s %zu %s ZZ %s \n", this->address, this->SMPcmd.size, LeaSignednessStrings[TempSignMask], TempMaxWidth, CurrString.c_str(), disasm); #if SMP_MEASURE_NUMERIC_ANNOTATIONS ++LeaInstOverflowCount; #endif } } else if (R_none != BaseReg) { // We have BaseReg+IndexReg, unscaled. CurrString += RegNames[BaseReg]; CurrString += "+"; CurrString += RegNames[IndexReg]; SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR CHECK OVERFLOW %s %zu %s ZZ %s \n", this->address, this->SMPcmd.size, LeaSignednessStrings[TempSignMask], TempMaxWidth, CurrString.c_str(), disasm); #if SMP_MEASURE_NUMERIC_ANNOTATIONS ++LeaInstOverflowCount; #endif if (0 != SignedOffset) { // We have BaseReg + IndexReg + offset #if 0 // easier for instrumentation to always use ADD, even for negative offsets if (0 < SignedOffset) #endif CurrString += "+"; CurrString += OffsetString; SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR CHECK OVERFLOW %s %zu %s ZZ %s \n", this->address, this->SMPcmd.size, LeaSignednessStrings[TempSignMask], TempMaxWidth, CurrString.c_str(), disasm); #if SMP_MEASURE_NUMERIC_ANNOTATIONS ++LeaInstOverflowCount; #endif } } else if (0 != SignedOffset) { // We have just IndexReg+offset. CurrString += RegNames[IndexReg]; #if 0 // easier for instrumentation to always use ADD, even for negative offsets if (0 < SignedOffset) #endif CurrString += "+"; CurrString += OffsetString; SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR CHECK OVERFLOW %s %zu %s ZZ %s \n", this->address, this->SMPcmd.size, LeaSignednessStrings[TempSignMask], TempMaxWidth, CurrString.c_str(), disasm); #if SMP_MEASURE_NUMERIC_ANNOTATIONS ++LeaInstOverflowCount; #endif } else { // Just IndexReg, no BaseReg or offset, so nothing to do. ; } } else if ((R_none != BaseReg) && (SignedOffset != 0)) { // No index reg, scaled or otherwise. Just BaseReg+offset CurrString += RegNames[BaseReg]; #if 0 // easier for instrumentation to always use ADD, even for negative offsets if (0 < SignedOffset) #endif CurrString += "+"; CurrString += OffsetString; SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR CHECK OVERFLOW %s %zu %s ZZ %s \n", this->address, this->SMPcmd.size, LeaSignednessStrings[BaseRegSignMask], BaseRegMaxWidth, CurrString.c_str(), disasm); #if SMP_MEASURE_NUMERIC_ANNOTATIONS ++LeaInstOverflowCount; #endif } else { // Either just a BaseReg, or just an offset. Nothing to do. ; } return; } // end of SMPInstr::MDEmitLeaOpcodeOverflowAnnotations() // Go through the PUSH RTL and get the operand pushed. op_t SMPInstr::GetPushedOpnd(void) { op_t VoidOp = InitOp; if (NN_push == this->SMPcmd.itype) { for (size_t OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & UseMacros[OpNum]) { // USE return TempOp; } } SMP_msg("ERROR: Could not find PUSH operand at %x %s\n", this->address, DisAsmText.GetDisAsm(this->GetAddr())); return VoidOp; } else { return VoidOp; } } // end of SMPInstr::GetPushedOpnd() // Get the immediate value used in the instruction. Return zero // if no immediate was used. int SMPInstr::MDGetImmedUse(void) { int ImmedVal = 0; set<DefOrUse, LessDefUse>::iterator CurrUse; for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) { op_t UseOp = CurrUse->GetOp(); if (o_imm == UseOp.type) { ImmedVal = (int) UseOp.value; break; } } return ImmedVal; } // end of SMPInstr::MDGetImmedUse() // Get funcname from call inst and remove "." and "_" prefices. // Asserts if this is not a call instruction. string SMPInstr::GetTrimmedCalledFunctionName(void) { SMPitype DFType = this->GetDataFlowType(); assert((CALL == DFType) || (INDIR_CALL == DFType) || this->IsTailCall()); ea_t FuncAddr = this->CallTarget; char IDA_func_name[MAXSTR]; size_t SkipCount; char *TempFuncName; if (CALL == DFType) { // We should have a good call target for direct calls. assert(BADADDR != FuncAddr); (void) get_func_name(FuncAddr, IDA_func_name, (size_t)(MAXSTR - 1)); SkipCount = strspn(IDA_func_name, "._"); TempFuncName = &(IDA_func_name[SkipCount]); string FuncName(TempFuncName); return FuncName; } else { // INDIR_CALL // We might have a resolved call target for indirect calls. if (BADADDR != FuncAddr) { // We have a resolved address for the indirect call. (void) get_func_name(FuncAddr, IDA_func_name, (size_t)(MAXSTR - 1)); SkipCount = strspn(IDA_func_name, "._"); TempFuncName = &(IDA_func_name[SkipCount]); string IndirFuncName(TempFuncName); return IndirFuncName; } else { // INDIR_CALL, no resolved target. string DummyFuncName("ZST_NEVER_MATCH_THIS_FUNC_NAME"); return DummyFuncName; } } } // end of SMPInstr::GetTrimmedCalledFunctionName() // Build the RTL for an instruction with a unary opcode bool SMPInstr::BuildUnaryRTL(SMPoperator UnaryOp) { size_t OpNum; bool DestFound = false; bool WidthDoubler = this->MDDoublesWidth(); SMPRegTransfer *TempRT = NULL; op_t VoidOp = InitOp; op_t FPRegOp = InitOp; FPRegOp.type = o_fpreg; // floating point register stack op_t FlagsOp = InitOp; FlagsOp.type = o_reg; FlagsOp.reg = X86_FLAGS_REG; // Handle special cases first if (SMP_UNARY_FLOATING_ARITHMETIC == UnaryOp) { // Use of the floating register stack top is implicit DestFound = true; TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(FPRegOp); TempRT->SetOperator(SMP_ASSIGN); SMPRegTransfer *RightRT = new SMPRegTransfer; RightRT->SetLeftOperand(FPRegOp); RightRT->SetOperator(UnaryOp); RightRT->SetRightOperand(VoidOp); TempRT->SetRightTree(RightRT); this->RTL.push_back(TempRT); } else if ((NN_clc == this->SMPcmd.itype) || (NN_cld == this->SMPcmd.itype) || (NN_cmc == this->SMPcmd.itype) || (NN_stc == this->SMPcmd.itype) || (NN_std == this->SMPcmd.itype)) { // Flags register is implicit destination. DestFound = true; TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(FlagsOp); TempRT->SetOperator(SMP_ASSIGN); SMPRegTransfer *RightRT = new SMPRegTransfer; if (NN_cmc == this->SMPcmd.itype) { // complement carry flag USEs old carry flag RightRT->SetLeftOperand(FlagsOp); RightRT->SetOperator(SMP_BITWISE_NOT); } else { RightRT->SetLeftOperand(VoidOp); RightRT->SetOperator(UnaryOp); } RightRT->SetRightOperand(VoidOp); TempRT->SetRightTree(RightRT); this->RTL.push_back(TempRT); } for (OpNum = 0; !DestFound && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; op_t LeftOp; if (this->features & DefMacros[OpNum]) { // DEF if (MDKnownOperandType(TempOp)) { DestFound = true; TempRT = new SMPRegTransfer; LeftOp = TempOp; if (WidthDoubler) { // Opcodes that sign-extend a byte to a word, or a word to a dword, // have only one operand. It is implicit, and it is the shorter USE. // That means the DEF op will have the same width as the USE op, e.g. if // we are sign-extending AX to EAX, the USE op and DEF op will both be AX // without a special fix. We fix this problem with the DEF operand now. if (LeftOp.dtyp == dt_byte) { LeftOp.dtyp = dt_word; LeftOp.reg = MDCanonicalizeSubReg(LeftOp.reg); } else if (LeftOp.dtyp == dt_word) { LeftOp.dtyp = dt_dword; LeftOp.reg = MDCanonicalizeSubReg(LeftOp.reg); } else if (LeftOp.dtyp == dt_dword) { LeftOp.dtyp = dt_qword; } else { SMP_msg("ERROR: Instruction operand %zu not 1,2, or 4 bytes at %x dtyp: %d\n", OpNum, this->address, LeftOp.dtyp); } } TempRT->SetLeftOperand(LeftOp); TempRT->SetOperator(SMP_ASSIGN); SMPRegTransfer *RightRT = new SMPRegTransfer; RightRT->SetLeftOperand(TempOp); RightRT->SetOperator(UnaryOp); RightRT->SetRightOperand(VoidOp); TempRT->SetRightTree(RightRT); this->RTL.push_back(TempRT); } } } // end for (OpNum = 0; ...) #if SMP_DEBUG_BUILD_RTL if (!DestFound) { SMP_msg("ERROR: Could not find unary operand at %x for %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); } #endif return DestFound; } // end of SMPInstr::BuildUnaryRTL() #if 0 // Cleaner, no special-case version of BuildUnary2OpndRTL() // Build the RTL for an instruction of the form dest := unary_operator(src), with src != dest bool SMPInstr::BuildUnaryTwoOperandRTL(SMPoperator UnaryOp) { size_t OpNum; bool DestFound = false; bool SrcFound = false; op_t DestOp, SrcOp; SMPRegTransfer *TempRT = NULL; op_t VoidOp = InitOp; for (OpNum = 0; (!(DestFound && SourceFound)) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & DefMacros[OpNum]) { // DEF if (MDKnownOperandType(TempOp)) { DestFound = true; DestOp = TempOp; } } else { // not dest, see if valid source if (MDKnownOperandType(TempOp)) { SourceFound = true; } } } // end for (OpNum = 0; ...) if (DestFound && SourceFound) { TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(DestOp); TempRT->SetOperator(SMP_ASSIGN); SMPRegTransfer *RightRT = new SMPRegTransfer; RightRT->SetLeftOperand(SrcOp); RightRT->SetOperator(UnaryOp); RightRT->SetRightOperand(VoidOp); TempRT->SetRightTree(RightRT); this->RTL.push_back(TempRT); } #if SMP_DEBUG_BUILD_RTL else { SMP_msg("ERROR: Could not find unary operand at %x for %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); } #endif return DestFound; } // end of SMPInstr::BuildUnaryTwoOperandRTL() #endif // Build the RTL for an instruction with a binary arithmetic opcode bool SMPInstr::BuildBinaryRTL(SMPoperator BinaryOp, bool HiddenFPStackOp) { size_t OpNum; bool DestFound = false; bool SourceFound = false; bool MemSrc = this->HasSourceMemoryOperand(); bool MemDest = this->HasDestMemoryOperand(); // Work around IDA pro error; they assumed that the pcmpeq and pcmpgt // families of instructions were just compares, so they do not tag // either operand as a DEF. Actually, the first operand has byte or // word or dword fields set to all 1's or all 0's based on the result // of the comparison. bool SrcIsReallyDest = ((SMP_COMPARE_EQ_AND_SET == BinaryOp) || (SMP_COMPARE_GT_AND_SET == BinaryOp)); SMPRegTransfer *TempRT = NULL; SMPRegTransfer *RightRT = new SMPRegTransfer; op_t VoidOp = InitOp; op_t FPRegOp = InitOp; FPRegOp.type = o_fpreg; // floating point register stack // Handle special cases first if (HiddenFPStackOp) { // Use of the floating register stack top is implicit DestFound = true; TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(FPRegOp); TempRT->SetOperator(SMP_ASSIGN); RightRT->SetLeftOperand(FPRegOp); RightRT->SetOperator(BinaryOp); RightRT->SetRightOperand(VoidOp); TempRT->SetRightTree(RightRT); } for (OpNum = 0; !(DestFound && SourceFound) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if ((this->features & DefMacros[OpNum]) || (SrcIsReallyDest && (0 == OpNum))) { // DEF if (!DestFound && MDKnownOperandType(TempOp)) { // See comments just below for floating point sources. FP stores // are analogous to FP loads. if (!MemDest || ((TempOp.type >= o_mem) && (TempOp.type <= o_displ))) { DestFound = true; TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(TempOp); TempRT->SetOperator(SMP_ASSIGN); if (this->IsRegClearIdiom()) { op_t ImmOp = InitOp; ImmOp.type = o_imm; ImmOp.value = 0; TempRT->SetRightOperand(ImmOp); SourceFound = true; // cause loop exit } else { RightRT->SetLeftOperand(TempOp); RightRT->SetOperator(BinaryOp); TempRT->SetRightTree(RightRT); } } else { ; #if SMP_VERBOSE_DEBUG_BUILD_RTL SMP_msg("WARNING: Skipping DEF operand: "); PrintOperand(TempOp); SMP_msg(" at %x in %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); #endif } } else if (DestFound && (!HiddenFPStackOp)) { ; #if SMP_VERBOSE_DEBUG_BUILD_RTL SMP_msg("ERROR: Found two DEF operands: "); PrintOperand(TempOp); SMP_msg(" at %x in %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); #endif } } else { // USE if (!SourceFound && MDKnownOperandType(TempOp)) { // If this is a floating point instruction with the fpregs listed as // a USE and a memory operand also listed as a USE, then we want to // ignore the irrelevant USE of the fpreg stack. // Note that MemDest AND MemSrc means something like add mem,reg is being // processed, where the memory operand is both DEF and USE. if (!MemSrc || MemDest || ((TempOp.type >= o_mem) && (TempOp.type <= o_displ))) { SourceFound = true; RightRT->SetRightOperand(TempOp); } } if (!(this->features & UseMacros[OpNum])) { ; #if SMP_VERBOSE_DEBUG_BUILD_RTL_DEF_USE SMP_msg("WARNING: Operand neither DEF nor USE: "); PrintOperand(TempOp); SMP_msg(" at %x in %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); #endif } } // end if DEF ... else ... } // end for (OpNum = 0; ...) if (!DestFound || !SourceFound) { assert(NULL != RightRT); if (DestFound && (NULL != TempRT)) delete TempRT; else delete RightRT; #if SMP_DEBUG_BUILD_RTL if (!DestFound) { SMP_msg("ERROR: Could not find binary DEF operand at %x for %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); } else { SMP_msg("ERROR: Could not find binary operand at %x for %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); this->PrintOperands(); } #endif } else { this->RTL.push_back(TempRT); } return (DestFound && SourceFound); } // end of SMPInstr::BuildBinaryRTL() // Build the RTL for a load-effective-address instruction. bool SMPInstr::BuildLeaRTL(void) { size_t OpNum; bool DestFound = false; bool SourceFound = false; op_t DefOp = InitOp; op_t UseOp = InitOp; SMPRegTransfer *AssignRT = NULL; int BaseReg; int IndexReg; ushort ScaleFactor; ea_t offset; bool ScaledIndexReg; for (OpNum = 0; !(DestFound && SourceFound) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & DefMacros[OpNum]) { // DEF DefOp = TempOp; DestFound = true; assert(o_reg == DefOp.type); } else { // USE if (!SourceFound && MDKnownOperandType(TempOp)) { if ((TempOp.type >= o_mem) && (TempOp.type <= o_displ)) { SourceFound = true; UseOp = TempOp; MDExtractAddressFields(TempOp, BaseReg, IndexReg, ScaleFactor, offset); } else { ; #if SMP_VERBOSE_DEBUG_BUILD_RTL SMP_msg("WARNING: Skipping USE operand: "); PrintOperand(TempOp); SMP_msg(" at %x in %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); #endif } } if (!(this->features & UseMacros[OpNum])) { ; #if SMP_VERBOSE_DEBUG_BUILD_RTL_DEF_USE SMP_msg("WARNING: Operand neither DEF nor USE: "); PrintOperand(TempOp); SMP_msg(" at %x in %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); #endif } } // end if DEF ... else ... } // end for (OpNum = 0; ...) if (!DestFound || !SourceFound) { #if SMP_DEBUG_BUILD_RTL if (!DestFound) { SMP_msg("ERROR: Could not find lea DEF operand at %x for %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); } else { SMP_msg("ERROR: Could not find lea USE operand at %x for %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); this->PrintOperands(); } #endif } else { // Ready to build the RTL // We build the RTL down to the right, in reverse order, with any multiplication // of the index register by a scale factor at the bottom of the RTL tree. // Note that almost any combination of BaseReg, IndexReg, and offset can be present // or absent. AssignRT = new SMPRegTransfer; AssignRT->SetLeftOperand(DefOp); AssignRT->SetOperator(SMP_ASSIGN); ScaledIndexReg = ((ScaleFactor > 0) && (IndexReg != R_none)); op_t BaseOp = InitOp, IndexOp = InitOp, OffsetOp = InitOp, ScaleOp = InitOp; BaseOp.type = o_reg; BaseOp.reg = (ushort) BaseReg; IndexOp.type = o_reg; IndexOp.reg = (ushort) IndexReg; OffsetOp.type = o_imm; OffsetOp.value = (uval_t) offset; ScaleOp.type = o_imm; ScaleOp.value = (uval_t) ScaleFactor; if (ScaledIndexReg) { // First, build the subtree to scale the IndexReg. SMPRegTransfer *MultRT = new SMPRegTransfer; MultRT->SetLeftOperand(IndexOp); MultRT->SetOperator(SMP_U_LEFT_SHIFT); MultRT->SetRightOperand(ScaleOp); // Now, case on the possibilities for existence of the other address fields. if (0 != offset) { // Add the offset to the scaled index subtree. SMPRegTransfer *AddOffRT = new SMPRegTransfer; AddOffRT->SetLeftOperand(OffsetOp); AddOffRT->SetOperator(SMP_ADD); AddOffRT->SetRightTree(MultRT); // Add a BaseReg, if any. if (R_none != BaseReg) { SMPRegTransfer *AddBaseRT = new SMPRegTransfer; AddBaseRT->SetLeftOperand(BaseOp); AddBaseRT->SetOperator(SMP_ADD); AddBaseRT->SetRightTree(AddOffRT); // Link into assignment root tree. AssignRT->SetRightTree(AddBaseRT); } else { // no BaseReg AssignRT->SetRightTree(AddOffRT); } } // end if nonzero offset else { // no offset to add // Add a BaseReg, if any. if (R_none != BaseReg) { SMPRegTransfer *AddBaseRT = new SMPRegTransfer; AddBaseRT->SetLeftOperand(BaseOp); AddBaseRT->SetOperator(SMP_ADD); AddBaseRT->SetRightTree(MultRT); // Link into assignment root tree. AssignRT->SetRightTree(AddBaseRT); } else { // no BaseReg AssignRT->SetRightTree(MultRT); } } } // end if ScaleIndexReg else { // no scaled index register if (0 != offset) { if (R_none != IndexReg) { SMPRegTransfer *AddOffRT = new SMPRegTransfer; AddOffRT->SetLeftOperand(OffsetOp); AddOffRT->SetOperator(SMP_ADD); AddOffRT->SetRightOperand(IndexOp); // Add BaseReg, if any. if (R_none != BaseReg) { SMPRegTransfer *AddBaseRT = new SMPRegTransfer; AddBaseRT->SetLeftOperand(BaseOp); AddBaseRT->SetOperator(SMP_ADD); AddBaseRT->SetRightTree(AddOffRT); // Link into assignment root tree. AssignRT->SetRightTree(AddBaseRT); } else { // no BaseReg AssignRT->SetRightTree(AddOffRT); } } // end if valid IndexReg else { // no IndexReg // Add BaseReg, if any. if (R_none != BaseReg) { SMPRegTransfer *AddBaseRT = new SMPRegTransfer; AddBaseRT->SetLeftOperand(BaseOp); AddBaseRT->SetOperator(SMP_ADD); AddBaseRT->SetRightOperand(OffsetOp); // Link into assignment root tree. AssignRT->SetRightTree(AddBaseRT); } else { // no BaseReg, no IndexReg, just offset? if (UseOp.type != o_mem) { SMP_msg("ERROR: No BaseReg, no IndexReg at %x for %s\n", this->address, DisAsmText.GetDisAsm(this->GetAddr())); } AssignRT->SetRightOperand(OffsetOp); } } } // end if nonzero offset else { // no offset if ((R_none == BaseReg) || (R_none == IndexReg)) { SMP_msg("WARNING: lea used as move at %x for %s\n", this->address, DisAsmText.GetDisAsm(this->GetAddr())); if (R_none != BaseReg) AssignRT->SetRightOperand(BaseOp); else { assert(R_none != IndexReg); AssignRT->SetRightOperand(IndexOp); } } else { // we have a BaseReg and an IndexReg, unscaled, no offset SMPRegTransfer *AddBaseRT = new SMPRegTransfer; AddBaseRT->SetLeftOperand(BaseOp); AddBaseRT->SetOperator(SMP_ADD); AddBaseRT->SetRightOperand(IndexOp); // Link into assignment root tree. AssignRT->SetRightTree(AddBaseRT); } } // end if nonzero offset ... else ... } // end if (ScaledIndexReg) ... else ... this->RTL.push_back(AssignRT); } return (DestFound && SourceFound); } // end of SMPInstr::BuildLeaRTL() // Build the RTL for an double-word shift instruction bool SMPInstr::BuildDoubleShiftRTL(SMPoperator BinaryOp) { size_t OpNum; bool DestFound = false; bool SourceFound = false; bool CountFound = false; SMPRegTransfer *TempRT = NULL; SMPRegTransfer *RightRT = new SMPRegTransfer; SMPRegTransfer *LowerRightRT = new SMPRegTransfer; // The doubleword shifts operate as follows: shift the DEF register right or left by // the number of bits specified by the count, and shift in bits from the USE register, // but do not change the USE register. This is hard to represent accurately in an RTL, // so we create an RTL as follows: // // ASSIGN // / \ // DEF SHIFT // / \ // DEF SHIFT // / \ // USE count // // This records all the operands, but it makes it look like the result of the // lower shift is the counter for the upper shift, which is unintentional and // will have to be special cased. In all single word shifts, the lower shift // subtree would be replaced by a single count operand. The presence of a sub-tree // instead of an operand is the identifying marker for double-word shifts. op_t FlagsOp = InitOp; FlagsOp.type = o_reg; FlagsOp.reg = X86_FLAGS_REG; for (OpNum = 0; !(DestFound && SourceFound && CountFound) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & DefMacros[OpNum]) { // DEF if (MDKnownOperandType(TempOp)) { DestFound = true; TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(TempOp); TempRT->SetOperator(SMP_ASSIGN); RightRT->SetLeftOperand(TempOp); RightRT->SetOperator(BinaryOp); TempRT->SetRightTree(RightRT); LowerRightRT->SetOperator(BinaryOp); RightRT->SetRightTree(LowerRightRT); } } else { // USE if (MDKnownOperandType(TempOp)) { if (!SourceFound) { SourceFound = true; LowerRightRT->SetLeftOperand(TempOp); } else { CountFound = true; LowerRightRT->SetRightOperand(TempOp); } } } } // end for (OpNum = 0; ...) if (!DestFound || !SourceFound || !CountFound) { if (NULL != TempRT) delete TempRT; #if SMP_DEBUG_BUILD_RTL SMP_msg("ERROR: Could not find double-shift operand at %x for %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); #endif } else { this->RTL.push_back(TempRT); // The carry flag gets the last shifted out bit. this->RTL.ExtraKills.push_back(FlagsOp); } return (DestFound && SourceFound && CountFound); } // end of SMPInstr::BuildDoubleShiftRTL() // Build the RTL for a multiply or divide, which can have implicit EAX and/or EDX operands bool SMPInstr::BuildMultiplyDivideRTL(SMPoperator BinaryOp) { size_t OpNum; bool DestFound = false; bool SourceFound = false; bool HiddenEAXUse = false; bool ImplicitEDXUse = false; SMPRegTransfer *TempRT = NULL; SMPRegTransfer *RightRT = new SMPRegTransfer; op_t FPRegOp = InitOp; FPRegOp.type = o_fpreg; // floating point register stack op_t Immed1Op = InitOp; Immed1Op.type = o_imm; // immediate 1 for increment or decrement FPRegOp.value = 1; // Detect the cases in which EDX:EAX is the destination and EAX is a hidden operand. // See detailed comments on the multiply and divide instructions in MDFixupDefUseLists(). for (OpNum = 0; !(DestFound && SourceFound) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (!TempOp.showed()) { // hidden operand if (TempOp.is_reg(R_ax)) { // not R_al, so it is not 8 bits // This form always has a hidden use of EDX:EAX HiddenEAXUse = true; ImplicitEDXUse = true; } else if (TempOp.is_reg(R_al)) { // Use of AX register to hold 16-bit result is hidden, // but EDX is not needed to hold result bits. HiddenEAXUse = true; } } } for (OpNum = 0; !(DestFound && SourceFound) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & DefMacros[OpNum]) { // DEF if (MDKnownOperandType(TempOp)) { DestFound = true; TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(TempOp); TempRT->SetOperator(SMP_ASSIGN); RightRT->SetLeftOperand(TempOp); RightRT->SetOperator(BinaryOp); TempRT->SetRightTree(RightRT); } } else { // USE if (MDKnownOperandType(TempOp)) { SourceFound = true; RightRT->SetRightOperand(TempOp); } } } // end for (OpNum = 0; ...) if (!DestFound || !SourceFound) { assert(NULL != RightRT); if (DestFound && (NULL != TempRT)) delete TempRT; else delete RightRT; #if SMP_DEBUG_BUILD_RTL SMP_msg("ERROR: Could not find mul/div operand at %x for %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); #endif } else { this->RTL.push_back(TempRT); if (ImplicitEDXUse) { // Need another effect for EDX, which was implicit. // Make a deep copy from existing EAX effect and change EAX dest to EDX. // For divisions, we also deep copy EAX effect and change EAX source to EDX. SMPRegTransfer *EDXRT = new SMPRegTransfer; SMPRegTransfer *EDXRightRT = new SMPRegTransfer; op_t EDXOp; EDXRT->SetOperator(SMP_ASSIGN); EDXOp = TempRT->GetLeftOperand(); assert(EDXOp.is_reg(R_ax)); EDXOp.reg = R_dx; EDXRT->SetLeftOperand(EDXOp); op_t SourceOp = RightRT->GetLeftOperand(); if ((NN_div == this->SMPcmd.itype) || (NN_idiv == this->SMPcmd.itype)) { // Need to change left operand of RightRT to EDX. i.e. we are // changing the effect from eax := eax DIV foo to edx := edx DIV foo. assert(SourceOp.is_reg(R_ax)); EDXRightRT->SetLeftOperand(EDXOp); } else { // just use same source operands for multiplies EDXRightRT->SetLeftOperand(SourceOp); } EDXRightRT->SetOperator(BinaryOp); EDXRightRT->SetRightOperand(RightRT->GetRightOperand()); EDXRT->SetRightTree(EDXRightRT); this->RTL.push_back(EDXRT); this->ResetMultiplicationBitsDiscarded(); } else { // No implicit EDX effect. // If we had 8x8=>16 bit multiply with AL*op8=>AX there // is no discarding of result bits, else there is discarding. if (!HiddenEAXUse) this->SetMultiplicationBitsDiscarded(); } } return (DestFound && SourceFound); } // end of SMPInstr::BuildMultiplyDivideRTL() // Build the RTL for an instruction with a tertiary arithmetic opcode applied to // two operands plus an implied FLAGS operand, e.g. add with carry adds the carry bit // and two operands together; rotate through carry, etc. bool SMPInstr::BuildBinaryPlusFlagsRTL(SMPoperator BinaryOp) { size_t OpNum; bool DestFound = false; bool SourceFound = false; SMPRegTransfer *TempRT = NULL; op_t FlagsOp = InitOp; FlagsOp.type = o_reg; FlagsOp.reg = X86_FLAGS_REG; SMPRegTransfer *RightRT = new SMPRegTransfer; SMPRegTransfer *FlagsRightRT = new SMPRegTransfer; for (OpNum = 0; !(DestFound && SourceFound) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & DefMacros[OpNum]) { // DEF if (MDKnownOperandType(TempOp)) { DestFound = true; TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(TempOp); TempRT->SetOperator(SMP_ASSIGN); RightRT->SetLeftOperand(TempOp); RightRT->SetOperator(BinaryOp); TempRT->SetRightTree(RightRT); } } else { // USE if (MDKnownOperandType(TempOp)) { SourceFound = true; FlagsRightRT->SetLeftOperand(TempOp); FlagsRightRT->SetOperator(BinaryOp); FlagsRightRT->SetRightOperand(FlagsOp); RightRT->SetRightTree(FlagsRightRT); } } } // end for (OpNum = 0; ...) if (!DestFound || !SourceFound) { if (DestFound) delete TempRT; // also deletes linked in RightRT else delete RightRT; // will also delete FlagsRightRT if SourceFound is true if (!SourceFound) // FlagsRightRT not linked into RightRT yet delete FlagsRightRT; // .. so delete FlagsRightRT separately #if SMP_DEBUG_BUILD_RTL SMP_msg("ERROR: Could not find binary operand at %x for %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); #endif } else { this->RTL.push_back(TempRT); } return (DestFound && SourceFound); } // end of SMPInstr::BuildBinaryPlusFlagsRTL() #define SMP_FIRST_SET_OPCODE NN_seta #define SMP_LAST_SET_OPCODE NN_setz // Build the RTL for an instruction of form dest := unary_operator(source), dest != source bool SMPInstr::BuildUnary2OpndRTL(SMPoperator UnaryOp) { size_t OpNum; bool DestFound = false; bool SourceFound = false; SMPRegTransfer *TempRT = new SMPRegTransfer; SMPRegTransfer *RightRT = new SMPRegTransfer; int opcode = this->SMPcmd.itype; bool ExtendedMove = ((NN_movsx == opcode) || (NN_movzx == opcode)); op_t VoidOp = InitOp; op_t FlagsOp = InitOp; FlagsOp.type = o_reg; FlagsOp.reg = X86_FLAGS_REG; op_t PortNumOp = InitOp; PortNumOp.type = o_reg; PortNumOp.reg = R_dx; op_t PortDataOp = InitOp; PortDataOp.type = o_reg; PortDataOp.reg = R_ax; // Handle special cases first. if ((SMP_FIRST_SET_OPCODE <= opcode) && (SMP_LAST_SET_OPCODE >= opcode)) { // Set instructions implicitly use the flags register. SourceFound = true; RightRT->SetLeftOperand(FlagsOp); } for (OpNum = 0; !(DestFound && SourceFound) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & DefMacros[OpNum]) { // DEF if (MDKnownOperandType(TempOp)) { DestFound = true; if (NN_in == opcode) { SMP_msg("ERROR: Explicit DEF for IN from port opcode at %x : ", this->GetAddr()); PrintOperand(TempOp); SMP_msg("\n"); TempRT->SetLeftOperand(PortDataOp); TempRT->SetOperator(UnaryOp); } else if (NN_out == opcode) { TempRT->SetLeftOperand(TempOp); TempRT->SetOperator(UnaryOp); } else { TempRT->SetLeftOperand(TempOp); TempRT->SetOperator(SMP_ASSIGN); RightRT->SetRightOperand(VoidOp); RightRT->SetOperator(UnaryOp); TempRT->SetRightTree(RightRT); } } } else { // USE if (MDKnownOperandType(TempOp)) { SourceFound = true; if (NN_in == opcode) { TempRT->SetRightOperand(TempOp); } else if (NN_out == opcode) { SMP_msg("ERROR: Explicit USE for OUT to port opcode at %x : ", this->GetAddr()); PrintOperand(TempOp); SMP_msg("\n"); TempRT->SetRightOperand(PortDataOp); } else { RightRT->SetLeftOperand(TempOp); if (ExtendedMove) this->MoveSource = TempOp; } } } } // end for (OpNum = 0; ...) if (!SourceFound && (NN_in == opcode)) { // Input from port is implicitly from port # in DX register if not // specified with an immediate operand. SourceFound = true; TempRT->SetRightOperand(PortNumOp); } if (!DestFound && (NN_in == opcode)) { // Input from port is implicitly to register AL, AX, or EAX // depending on the opcode and bit width mode. DestFound = true; TempRT->SetLeftOperand(PortDataOp); TempRT->SetOperator(UnaryOp); } if (!DestFound && (NN_out == opcode)) { // Output to port is implicitly to port # in DX register if not // specified with an immediate operand. DestFound = true; TempRT->SetLeftOperand(PortNumOp); TempRT->SetOperator(SMP_ASSIGN); } if (!SourceFound && (NN_out == opcode)) { // Output to port is implicitly from register AL, AX, or EAX // depending on the opcode and bit width mode. SourceFound = true; TempRT->SetRightOperand(PortDataOp); } if (!DestFound || !SourceFound) { if (!DestFound) delete RightRT; // never linked in to TempRT if (NULL != TempRT) delete TempRT; #if SMP_DEBUG_BUILD_RTL SMP_msg("ERROR: Could not find binary operand at %x for %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); #endif } else { this->RTL.push_back(TempRT); if ((NN_in == opcode) || (NN_out == opcode)) delete RightRT; // unused for port I/O } return (DestFound && SourceFound); } // end of SMPInstr::BuildUnary2OpndRTL() // Build the RTL for an instruction of form dest := source, where dest != source bool SMPInstr::BuildMoveRTL(SMPoperator GuardOp) { size_t OpNum; bool DestFound = false; bool SourceFound = false; bool MemSrc = this->HasSourceMemoryOperand(); bool MemDest = this->HasDestMemoryOperand(); bool HasRepeatPrefix = (0 != (this->SMPcmd.auxpref & aux_rep)) || (0 != (this->SMPcmd.auxpref & aux_repne)); int opcode = this->SMPcmd.itype; #if IDA_SDK_VERSION < 600 if ((NN_ldmxcsr == opcode) || (NN_stmxcsr == opcode)) { // IDA 5.1 does not have the R_mxcsr enumeration value, // so we cannot handle these opcodes. return false; } #endif SMPRegTransfer *TempRT = new SMPRegTransfer; op_t VoidOp = InitOp; op_t EAXOp = InitOp; EAXOp.type = o_reg; EAXOp.reg = R_ax; op_t ALOp = InitOp; ALOp.type = o_reg; ALOp.reg = R_al; ALOp.dtyp = dt_byte; op_t CountOp = InitOp; CountOp.type = o_reg; CountOp.reg = R_cx; op_t FlagsOp = InitOp; FlagsOp.type = o_reg; FlagsOp.reg = X86_FLAGS_REG; op_t FPRegOp = InitOp; FPRegOp.type = o_fpreg; // floating point register stack FPRegOp.reg = 0; op_t PortNumOp = InitOp; PortNumOp.type = o_reg; PortNumOp.reg = R_dx; op_t PortDataOp = InitOp; PortDataOp.type = o_reg; PortDataOp.reg = R_ax; #if IDA_SDK_VERSION > 599 op_t MXCSROp = InitOp; // MMX Control & Status Register MXCSROp.type = o_reg; MXCSROp.reg = R_mxcsr; #endif op_t ZeroOp = InitOp; ZeroOp.type = o_imm; // immediate zero ZeroOp.value = 0; #if SMP_DEBUG_BUILD_RTL if (MemSrc && MemDest && (NN_movs != opcode)) { if (NN_stos != opcode) { SMP_msg("ERROR: MemDest and MemSrc in move at %x for %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); } else { // IDA incorrectly lists [EDI] as both DEF and USE, because reg EDI // is both DEF and USE in NN_stos. SMP_msg("WARNING: MemDest and MemSrc in move at %x for %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); } this->PrintOperands(); } #endif // First, handle special cases with implicit operands if (NN_lahf == opcode) { // load AH from flags TempRT->SetOperator(SMP_ASSIGN); TempRT->SetLeftOperand(EAXOp); TempRT->SetRightOperand(FlagsOp); this->RTL.push_back(TempRT); return true; } if (NN_sahf == opcode) { // store AH to flags TempRT->SetOperator(SMP_ASSIGN); TempRT->SetLeftOperand(FlagsOp); TempRT->SetRightOperand(EAXOp); this->RTL.push_back(TempRT); return true; } if ((NN_movs == opcode) || (NN_stos == opcode) || (NN_ins == opcode) || (NN_outs == opcode)) { // The ESI and EDI registers get incremented or decremented, depending // on the direction flag DF, for MOVS; only EDI for STOS and INS; // only ESI for OUTS. // This is true with or without a repeat prefix. op_t ESIOp = InitOp, EDIOp = InitOp; ESIOp.type = o_reg; ESIOp.reg = R_si; EDIOp.type = o_reg; EDIOp.reg = R_di; op_t ESIMemOp = InitOp, EDIMemOp = InitOp; // [esi] and [edi] ESIMemOp.type = o_phrase; ESIMemOp.reg = R_si; EDIMemOp.type = o_phrase; EDIMemOp.reg = R_di; if (NN_movs == opcode) { this->RTL.ExtraKills.push_back(ESIOp); this->RTL.ExtraKills.push_back(EDIOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetLeftOperand(EDIMemOp); TempRT->SetRightOperand(ESIMemOp); DestFound = true; SourceFound = true; } else if (NN_stos == opcode) { this->RTL.ExtraKills.push_back(EDIOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetLeftOperand(EDIMemOp); TempRT->SetRightOperand(ALOp); // default in case we don't find source later DestFound = true; } else if (NN_ins == opcode) { this->RTL.ExtraKills.push_back(EDIOp); TempRT->SetOperator(SMP_INPUT); TempRT->SetLeftOperand(EDIMemOp); TempRT->SetRightOperand(PortNumOp); DestFound = true; SourceFound = true; } else if (NN_outs == opcode) { this->RTL.ExtraKills.push_back(ESIOp); TempRT->SetOperator(SMP_OUTPUT); TempRT->SetLeftOperand(ESIMemOp); TempRT->SetRightOperand(PortNumOp); DestFound = true; SourceFound = true; } } // Some floating point instructions use the floating point register stack top as // an implicit source or destination, but the other operand of the load or store // is explicit, so we set the implicit operand and let control flow pass to the // main processing loop below. if ((NN_fld == opcode) || (NN_fbld == opcode) || (NN_fild == opcode)) { // Loads implicitly use the floating point stack top as destination. TempRT->SetLeftOperand(FPRegOp); TempRT->SetOperator(SMP_ASSIGN); DestFound = true; } else if ((NN_fst == opcode) || (NN_fstp == opcode) || (NN_fbstp == opcode) || (NN_fist == opcode) || (NN_fistp == opcode)) { // Stores implicitly use the floating point stack top as source TempRT->SetRightOperand(FPRegOp); SourceFound = true; // The "p" at the end of the opcode indicates that the floating point // register stack gets popped. if ((NN_fstp == opcode) || (NN_fbstp == opcode) || (NN_fistp == opcode)) { this->RTL.ExtraKills.push_back(FPRegOp); } } #if IDA_SDK_VERSION > 599 else if (NN_ldmxcsr == opcode) { // The MMX Control & Status Register is used implicitly. TempRT->SetLeftOperand(MXCSROp); DestFound = true; } else if (NN_stmxcsr == opcode) { // The MMX Control & Status Register is used implicitly. TempRT->SetRightOperand(MXCSROp); SourceFound = true; } #endif for (OpNum = 0; !(DestFound && SourceFound) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & DefMacros[OpNum]) { // DEF if (!DestFound && MDKnownOperandType(TempOp)) { // See comments just below for floating point sources. FP stores // are analogous to FP loads. if (!MemDest || ((TempOp.type >= o_mem) && (TempOp.type <= o_displ))) { DestFound = true; TempRT->SetLeftOperand(TempOp); TempRT->SetOperator(SMP_ASSIGN); } } } else { // USE if (!SourceFound && MDKnownOperandType(TempOp)) { // If this is a floating point instruction with the fpregs listed as // a USE and a memory operand also listed as a USE, then we want to // ignore the irrelevant USE of the fpreg stack. // Note that MemDest AND MemSrc means something like stosb is being // processed, where the memory operand is both DEF and USE to IDA // for mysterious reasons. if (!MemSrc || MemDest || ((TempOp.type >= o_mem) && (TempOp.type <= o_displ))) { SourceFound = true; TempRT->SetRightOperand(TempOp); this->MoveSource = TempOp; } } if (this->features & UseMacros[OpNum]) { ; #if SMP_VERBOSE_DEBUG_BUILD_RTL_DEF_USE SMP_msg("WARNING: Operand neither DEF nor USE: "); PrintOperand(TempOp); SMP_msg(" at %x in %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); #endif } } } // end for (OpNum = 0; ...) if (!DestFound || !SourceFound) { if (NULL != TempRT) delete TempRT; #if SMP_DEBUG_BUILD_RTL SMP_msg("ERROR: Could not find move operand at %x for %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); #endif } else { // If the move is conditional, set the guard expression. if (SMP_NULL_OPERATOR != GuardOp) { SMPGuard *Guard1 = new SMPGuard; Guard1->SetLeftOperand(FlagsOp); Guard1->SetOperator(GuardOp); Guard1->SetRightOperand(ZeroOp); TempRT->SetGuard(Guard1); if (this->MDIsConditionalMoveInstr()) { // We need to represent the possibility that the DEF operand will not // be set because the move is conditional. We will add the DEF operand // into the USE set and special case our type inferences so that the // USE and the pseudo-USE (prior SSA value of the DEF operand) must // agree in type before we can be sure of the result type. assert(this->Defs.GetSize() == 1); this->Uses.SetRef(this->Defs.GetFirstRef()->GetOp()); } } this->RTL.push_back(TempRT); // Now, create the repeat prefix effects if (HasRepeatPrefix) { // Must be MOVS or STOS or INS or OUTS // The repeat causes USE and DEF of ECX as a counter SMPRegTransfer *CounterRT = new SMPRegTransfer; SMPRegTransfer *RightRT = new SMPRegTransfer; CounterRT->SetLeftOperand(CountOp); CounterRT->SetOperator(SMP_ASSIGN); RightRT->SetLeftOperand(CountOp); RightRT->SetOperator(SMP_UNARY_NUMERIC_OPERATION); RightRT->SetRightOperand(VoidOp); CounterRT->SetRightTree(RightRT); this->RTL.push_back(CounterRT); } } return (DestFound && SourceFound); } // end of SMPInstr::BuildMoveRTL() // Build the RTL for a load string instruction, which loads from ES:ESI into EAX // (or into AX or AL) and increments or decrements ESI based on the direction flag DF. bool SMPInstr::BuildLoadStringRTL(void) { bool DestFound = false; unsigned short ByteSize = 0; op_t DestOp = InitOp; DestOp.type = o_reg; DestOp.reg = R_al; for (size_t OpNum = 0; !DestFound && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (MDKnownOperandType(TempOp)) { if ((o_reg == TempOp.type) && (this->features & DefMacros[OpNum])) { // DEF DestFound = true; if (TempOp.is_reg(R_al)) { ByteSize = 1; } else if (TempOp.is_reg(R_ax)) { ByteSize = 4; DestOp.reg = R_ax; } else { SMP_msg("ERROR: Load string destination operand is neither AL nor EAX at %x\n", this->GetAddr()); ByteSize = 1; // default to AL destination } } } } // end for (OpNum = 0; ...) // Return false if we did not find a destination if (!DestFound) { return false; } op_t ZeroOp = InitOp; ZeroOp.type = o_imm; // immediate zero ZeroOp.value = 0; op_t OneOp = InitOp; OneOp.type = o_imm; // immediate one OneOp.value = 1; op_t FlagsOp = InitOp; FlagsOp.type = o_reg; FlagsOp.reg = X86_FLAGS_REG; op_t ESIOp = InitOp; ESIOp.type = o_reg; ESIOp.reg = R_si; op_t DerefESIOp = InitOp; DerefESIOp.type = o_phrase; DerefESIOp.reg = R_si; SMPRegTransfer *TempRT = new SMPRegTransfer; SMPRegTransfer *GuardedIncRT = new SMPRegTransfer; SMPRegTransfer *GuardedDecRT = new SMPRegTransfer; SMPRegTransfer *RightIncRT = new SMPRegTransfer; SMPRegTransfer *RightDecRT = new SMPRegTransfer; // Build the load string RTL. Ignore ES segment register for now. // Load string is: AL := [ESI]; if (DF == 0) ESI += 1 else ESI -= 1; // for the 8-bit case, and EAX := [ESI]; if (DF == 0) ESI += 4 else ESI -= 4; // for the 32-bit case. // AL := [ESI] or EAX := [ESI] TempRT->SetLeftOperand(DestOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetRightOperand(DerefESIOp); this->RTL.push_back(TempRT); // Guarded increment RTL: If the DF (Direction Flag) in EFLAGS is 0, increment ESI // by ByteSize. op_t IncDecOp = InitOp; IncDecOp.type = o_imm; IncDecOp.value = ByteSize; SMPGuard *Guard1 = new SMPGuard; Guard1->SetLeftOperand(FlagsOp); Guard1->SetOperator(SMP_U_COMPARE); Guard1->SetRightOperand(ZeroOp); GuardedIncRT->SetGuard(Guard1); GuardedIncRT->SetLeftOperand(ESIOp); GuardedIncRT->SetOperator(SMP_ASSIGN); RightIncRT->SetLeftOperand(ESIOp); RightIncRT->SetOperator(SMP_ADD); RightIncRT->SetRightOperand(IncDecOp); GuardedIncRT->SetRightTree(RightIncRT); this->RTL.push_back(GuardedIncRT); // Guarded decrement RTL: If the DF (Direction Flag) in EFLAGS is 1, decrement ESI // by ByteSize. SMPGuard *Guard2 = new SMPGuard; Guard2->SetLeftOperand(FlagsOp); Guard2->SetOperator(SMP_U_COMPARE); Guard2->SetRightOperand(OneOp); GuardedDecRT->SetGuard(Guard2); GuardedDecRT->SetLeftOperand(ESIOp); GuardedDecRT->SetOperator(SMP_ASSIGN); RightDecRT->SetLeftOperand(ESIOp); RightDecRT->SetOperator(SMP_SUBTRACT); RightDecRT->SetRightOperand(IncDecOp); GuardedDecRT->SetRightTree(RightDecRT); this->RTL.push_back(GuardedDecRT); return true; } // end of SMPInstr::BuildLoadStringRTL() // Build the RTL for a compare string instruction, possibly with repeat prefix. bool SMPInstr::BuildCompareStringRTL(void) { size_t OpNum; bool Src1Found = false; bool Src2Found = false; bool HasRepeatPrefix = (0 != (this->SMPcmd.auxpref & aux_rep)) || (0 != (this->SMPcmd.auxpref & aux_repne)); op_t FlagsOp = InitOp; FlagsOp.type = o_reg; FlagsOp.reg = X86_FLAGS_REG; op_t CountOp = InitOp; CountOp.type = o_reg; CountOp.reg = R_cx; op_t VoidOp = InitOp; SMPRegTransfer *TempRT = new SMPRegTransfer; SMPRegTransfer *RightRT = new SMPRegTransfer; for (OpNum = 0; !(Src1Found && Src2Found) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (MDKnownOperandType(TempOp)) { if (!Src1Found) { Src1Found = true; TempRT->SetLeftOperand(FlagsOp); TempRT->SetOperator(SMP_ASSIGN); RightRT->SetLeftOperand(TempOp); RightRT->SetOperator(SMP_U_COMPARE); TempRT->SetRightTree(RightRT); if (this->features & DefMacros[OpNum]) // DEF SMP_msg("CMPS 1st opnd is DEF\n"); else if (this->features & UseMacros[OpNum]) // USE SMP_msg("CMPS 1st opnd is USE\n"); else SMP_msg("CMPS 1st opnd neither DEF nor USE\n"); } else { Src2Found = true; RightRT->SetRightOperand(TempOp); if (this->features & DefMacros[OpNum]) // DEF SMP_msg("CMPS 2nd opnd is DEF\n"); else if (this->features & UseMacros[OpNum]) // USE SMP_msg("CMPS 2nd opnd is USE\n"); else SMP_msg("CMPS 2nd opnd neither DEF nor USE\n"); } } } // end for (OpNum = 0; ...) if (!Src1Found || !Src2Found) { if (NULL != TempRT) delete TempRT; #if SMP_DEBUG_BUILD_RTL SMP_msg("ERROR: Could not find CMPS operand at %x for %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); #endif } else { this->RTL.push_back(TempRT); // Now, create the repeat prefix effects if (HasRepeatPrefix) { // The repeat causes USE and DEF of ECX as a counter SMPRegTransfer *CounterRT = new SMPRegTransfer; SMPRegTransfer *RightRT = new SMPRegTransfer; CounterRT->SetLeftOperand(CountOp); CounterRT->SetOperator(SMP_ASSIGN); RightRT->SetLeftOperand(CountOp); RightRT->SetOperator(SMP_UNARY_NUMERIC_OPERATION); RightRT->SetRightOperand(VoidOp); CounterRT->SetRightTree(RightRT); this->RTL.push_back(CounterRT); } } return (Src1Found && Src2Found); } // end of SMPInstr::BuildCompareStringRTL() // Build the RTL for an instruction of form dest := source, source := dest bool SMPInstr::BuildExchangeRTL(void) { size_t OpNum; bool Src1Found = false; bool Src2Found = false; SMPRegTransfer *TempRT = new SMPRegTransfer; // second effect, src := dest for (OpNum = 0; !(Src1Found && Src2Found) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (MDKnownOperandType(TempOp)) { if (!Src1Found) { Src1Found = true; TempRT->SetRightOperand(TempOp); TempRT->SetOperator(SMP_ASSIGN); #if SMP_VERBOSE_DEBUG_BUILD_RTL if (this->features & DefMacros[OpNum]) // DEF SMP_msg("XCHG 1st opnd is DEF\n"); else if (this->features & UseMacros[OpNum]) // USE SMP_msg("XCHG 1st opnd is USE\n"); else SMP_msg("XCHG 1st opnd neither DEF nor USE\n"); #endif } else { Src2Found = true; TempRT->SetLeftOperand(TempOp); if (this->features & DefMacros[OpNum]) // DEF SMP_msg("XCHG 2nd opnd is DEF\n"); else if (this->features & UseMacros[OpNum]) // USE SMP_msg("XCHG 2nd opnd is USE\n"); else SMP_msg("XCHG 2nd opnd neither DEF nor USE\n"); } } } // end for (OpNum = 0; ...) if (!Src1Found || !Src2Found) { if (NULL != TempRT) delete TempRT; #if SMP_DEBUG_BUILD_RTL SMP_msg("ERROR: Could not find XCHG operand at %x for %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); #endif } else { // Create the first effect, dest := src SMPRegTransfer *FirstRT = new SMPRegTransfer; FirstRT->SetLeftOperand(TempRT->GetRightOperand()); FirstRT->SetRightOperand(TempRT->GetLeftOperand()); FirstRT->SetOperator(SMP_ASSIGN); this->RTL.push_back(FirstRT); // Push the second effect on the list, src := dest this->RTL.push_back(TempRT); } return (Src1Found && Src2Found); } // end of SMPInstr::BuildExchangeRTL() // Build the RTL for an instruction of form dest := dest + source, source := dest bool SMPInstr::BuildExchangeAddRTL(void) { size_t OpNum; bool Src1Found = false; bool Src2Found = false; SMPRegTransfer *TempRT = new SMPRegTransfer; // second effect, src := dest for (OpNum = 0; !(Src1Found && Src2Found) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (MDKnownOperandType(TempOp)) { if (!Src1Found) { Src1Found = true; TempRT->SetRightOperand(TempOp); TempRT->SetOperator(SMP_ASSIGN); if (this->features & DefMacros[OpNum]) // DEF SMP_msg("XADD 1st opnd is DEF\n"); // should be the case else if (this->features & UseMacros[OpNum]) // USE SMP_msg("WARNING: XADD 1st opnd is USE\n"); else SMP_msg("WARNING: XADD 1st opnd neither DEF nor USE\n"); } else { Src2Found = true; TempRT->SetLeftOperand(TempOp); if (this->features & DefMacros[OpNum]) // DEF SMP_msg("WARNING: XADD 2nd opnd is DEF\n"); else if (this->features & UseMacros[OpNum]) // USE SMP_msg("XADD 2nd opnd is USE\n"); // should be the case else SMP_msg("WARNING: XADD 2nd opnd neither DEF nor USE\n"); } } } // end for (OpNum = 0; ...) if (!Src1Found || !Src2Found) { if (NULL != TempRT) delete TempRT; #if SMP_DEBUG_BUILD_RTL SMP_msg("ERROR: Could not find XADD operand at %x for %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); #endif } else { // Create the first effect, dest := dest + src SMPRegTransfer *FirstRT = new SMPRegTransfer; SMPRegTransfer *AddRT = new SMPRegTransfer; AddRT->SetLeftOperand(TempRT->GetRightOperand()); AddRT->SetOperator(SMP_ADD); AddRT->SetRightOperand(TempRT->GetLeftOperand()); FirstRT->SetLeftOperand(TempRT->GetRightOperand()); FirstRT->SetRightTree(AddRT); FirstRT->SetOperator(SMP_ASSIGN); this->RTL.push_back(FirstRT); // Push the second effect on the list, src := dest this->RTL.push_back(TempRT); } return (Src1Found && Src2Found); } // end of SMPInstr::BuildExchangeAddRTL() // Build the RTL for an instruction of form: // if (dest==EAX) dest := source else EAX := dest bool SMPInstr::BuildCompareExchangeRTL(void) { size_t OpNum; bool DestFound = false; bool SourceFound = false; op_t DestOp = InitOp; op_t SourceOp = InitOp; SMPRegTransfer *TempRT = new SMPRegTransfer; for (OpNum = 0; !(DestFound && SourceFound) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (MDKnownOperandType(TempOp)) { if (this->features & DefMacros[OpNum]) { // DEF if (!DestFound) { DestFound = true; DestOp = TempOp; } else { SMP_msg("CMPXCHG has two DEF operands.\n"); } } else if (this->features & UseMacros[OpNum]) { // USE if (!SourceFound) { SourceFound = true; SourceOp = TempOp; } else { SMP_msg("CMPXCHG has two USE operands.\n"); } } } } // end for (OpNum = 0; ...) if (!DestFound || !SourceFound) { if (NULL != TempRT) delete TempRT; #if SMP_DEBUG_BUILD_RTL SMP_msg("ERROR: Could not find CMPXCHG operand at %x for %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); #endif } else { // Create the first effect, if (dest == EAX) dest := src SMPGuard *Guard1 = new SMPGuard; op_t EAXOp = InitOp; EAXOp.type = o_reg; EAXOp.reg = R_ax; Guard1->SetLeftOperand(DestOp); Guard1->SetOperator(SMP_EQUAL); Guard1->SetRightOperand(EAXOp); SMPRegTransfer *FirstRT = new SMPRegTransfer; FirstRT->SetLeftOperand(DestOp); FirstRT->SetRightOperand(SourceOp); FirstRT->SetOperator(SMP_ASSIGN); FirstRT->SetGuard(Guard1); this->RTL.push_back(FirstRT); // Push the second effect on the list, if (dest!=EAX) dest := EAX SMPGuard *Guard2 = new SMPGuard; Guard2->SetLeftOperand(DestOp); Guard2->SetOperator(SMP_EQUAL); Guard2->SetRightOperand(EAXOp); TempRT->SetLeftOperand(DestOp); TempRT->SetRightOperand(EAXOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetGuard(Guard2); this->RTL.push_back(TempRT); } return (DestFound && SourceFound); } // end of SMPInstr::BuildCompareExchangeRTL() // Build the RTL for an extended FP concatenate and shift instruction bool SMPInstr::BuildPackShiftRTL(SMPoperator PackOp, SMPoperator ShiftOp) { size_t OpNum; bool DestFound = false; bool SourceFound = false; bool CountFound = false; SMPRegTransfer *TempRT = NULL; SMPRegTransfer *ShiftRT = new SMPRegTransfer; SMPRegTransfer *PackRT = new SMPRegTransfer; // RTL structure: top operator is assignment, next right operator is a reverse // shift with the shift count as its left operand, and lowest right operator // is the concatenation operator. Sequence of operations is pack, shift, assign. for (OpNum = 0; !(DestFound && SourceFound && CountFound) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & DefMacros[OpNum]) { // DEF if (MDKnownOperandType(TempOp)) { DestFound = true; TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(TempOp); TempRT->SetOperator(SMP_ASSIGN); PackRT->SetLeftOperand(TempOp); PackRT->SetOperator(PackOp); ShiftRT->SetOperator(ShiftOp); ShiftRT->SetRightTree(PackRT); TempRT->SetRightTree(ShiftRT); } } else { // USE if (MDKnownOperandType(TempOp)) { if (!SourceFound) { SourceFound = true; PackRT->SetRightOperand(TempOp); } else { CountFound = true; ShiftRT->SetLeftOperand(TempOp); } } } } // end for (OpNum = 0; ...) if (!DestFound || !SourceFound || !CountFound) { if (NULL != TempRT) delete TempRT; #if SMP_DEBUG_BUILD_RTL SMP_msg("ERROR: Could not find MMX/XMM pack and shift operand at %x for %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); #endif } else { this->RTL.push_back(TempRT); } return (DestFound && SourceFound && CountFound); } // end of SMPInstr::BuildPackShiftRTL() // Build the RTL for a compare or test instruction with an implicit EFLAGS destination operand bool SMPInstr::BuildFlagsDestBinaryRTL(SMPoperator BinaryOp) { size_t OpNum; int opcode = this->SMPcmd.itype; bool Source1Found = false; bool Source2Found = false; bool NoOperandsRequired = ((NN_scas == opcode) || (NN_cmps == opcode)); bool HasRepeatPrefix = (0 != (this->SMPcmd.auxpref & aux_rep)) || (0 != (this->SMPcmd.auxpref & aux_repne)); SMPRegTransfer *TempRT = new SMPRegTransfer; SMPRegTransfer *RightRT = new SMPRegTransfer; op_t VoidOp = InitOp, FlagsOp = InitOp; FlagsOp.type = o_reg; FlagsOp.reg = X86_FLAGS_REG; op_t CountOp = InitOp; CountOp.type = o_reg; CountOp.reg = R_cx; op_t FPRegOp = InitOp; FPRegOp.type = o_fpreg; // floating point register stack FPRegOp.reg = 0; // Some floating point instructions use the floating point register stack top as // an implicit source or destination, but the other operand of the load or store // is explicit, so we set the implicit operand and let control flow pass to the // main processing loop below. if ((NN_fcomi == opcode) || (NN_fucomi == opcode) || (NN_fcomip == opcode) || (NN_fucomip == opcode)) { // Compares implicitly use the floating point stack top as destination. TempRT->SetLeftOperand(FlagsOp); TempRT->SetOperator(SMP_ASSIGN); RightRT->SetLeftOperand(FPRegOp); RightRT->SetOperator(BinaryOp); TempRT->SetRightTree(RightRT); Source1Found = true; // The "p" at the end of the opcode indicates that the floating point // register stack gets popped. if ((NN_fcomip == opcode) || (NN_fucomip == opcode)) { this->RTL.ExtraKills.push_back(FPRegOp); } } for (OpNum = 0; !(Source1Found && Source2Found) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & DefMacros[OpNum]) { // DEF if (MDKnownOperandType(TempOp)) { #if SMP_DEBUG_BUILD_RTL SMP_msg("ERROR: Found destination for compare or test at %x : %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); #endif } } else { // USE if (MDKnownOperandType(TempOp)) { if (!Source1Found) { Source1Found = true; TempRT->SetLeftOperand(FlagsOp); TempRT->SetOperator(SMP_ASSIGN); RightRT->SetLeftOperand(TempOp); RightRT->SetOperator(BinaryOp); TempRT->SetRightTree(RightRT); } else { assert(!Source2Found); Source2Found = true; RightRT->SetRightOperand(TempOp); } } } } // end for (OpNum = 0; ...) // The compare string instruction always uses DS:ESI and ES:EDI as its source // operands, regardless of the explicit operands given, and might not have // explicit operands; explicit operands are just for documentation. // The scan string instruction uses EAX/AX/AH/AL and ES:EDI as its source // operands and might not have any explicit operands at all. if ((!NoOperandsRequired) && (!Source1Found || !Source2Found)) { if (!Source1Found) delete RightRT; else delete TempRT; #if SMP_DEBUG_BUILD_RTL SMP_msg("ERROR: Could not find CMP/TEST operand at %x for %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); #endif } else { this->RTL.push_back(TempRT); // Now, create the repeat prefix effects if (HasRepeatPrefix) { // Must be CMPS or SCAS // The repeat causes USE and DEF of ECX as a counter SMPRegTransfer *CounterRT = new SMPRegTransfer; SMPRegTransfer *RightRT = new SMPRegTransfer; CounterRT->SetLeftOperand(CountOp); CounterRT->SetOperator(SMP_ASSIGN); RightRT->SetLeftOperand(CountOp); RightRT->SetOperator(SMP_UNARY_NUMERIC_OPERATION); RightRT->SetRightOperand(VoidOp); CounterRT->SetRightTree(RightRT); this->RTL.push_back(CounterRT); } if ((NN_cmps == opcode) || (NN_scas == opcode)) { // The ESI and EDI registers get incremented or decremented, depending // on the direction flag DF, for CMPS; only EDI for SCAS. // This is true with or without a repeat prefix. op_t ESIOp = InitOp, EDIOp = InitOp; ESIOp.type = o_reg; ESIOp.reg = R_si; EDIOp.type = o_reg; EDIOp.reg = R_di; if (NN_cmps == opcode) { this->RTL.ExtraKills.push_back(ESIOp); } this->RTL.ExtraKills.push_back(EDIOp); } } return (NoOperandsRequired || (Source1Found && Source2Found)); } // end of SMPInstr::BuildFlagsDestBinaryRTL() // Build the RTL for a direct or indirect call instruction bool SMPInstr::BuildCallRTL(void) { size_t OpNum; bool SourceFound = false; op_t VoidOp = InitOp; SMPRegTransfer *TempRT = NULL; for (OpNum = 0; !SourceFound && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & DefMacros[OpNum]) { // DEF if (MDKnownOperandType(TempOp)) { #if SMP_DEBUG_BUILD_RTL SMP_msg("ERROR: Found destination operand for call at %x : %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); #endif } } else { // USE if (MDKnownOperandType(TempOp)) { SourceFound = true; TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(VoidOp); TempRT->SetOperator(SMP_CALL); TempRT->SetRightOperand(TempOp); } } } // end for (OpNum = 0; ...) if (!SourceFound) { #if SMP_DEBUG_BUILD_RTL SMP_msg("ERROR: Could not find CALL operand at %x for %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); #endif } else { this->RTL.push_back(TempRT); } return SourceFound; } // end of SMPInstr::BuildCallRTL() // Build the RTL for a return instruction, with or without extra bytes popped off stack bool SMPInstr::BuildReturnRTL(void) { size_t OpNum; uval_t PopBytes = 4; // default: pop off return address for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & DefMacros[OpNum]) { // DEF if (MDKnownOperandType(TempOp)) { #if SMP_DEBUG_BUILD_RTL SMP_msg("ERROR: Found destination operand for RET at %x : %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); #endif } } else { // USE if (MDKnownOperandType(TempOp)) { if (o_imm == TempOp.type) { PopBytes += TempOp.value; } else { #if SMP_DEBUG_BUILD_RTL if (!(this->IsTailCall())) { SMP_msg("ERROR: Found unexpected operand for return at %x : %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); } #endif } } } } // end for (OpNum = 0; ...) this->AddToStackPointer(PopBytes); return true; } // end of SMPInstr::BuildReturnRTL() // Build the RTL for an ENTER instruction bool SMPInstr::BuildEnterRTL(void) { // An "ENTER k,0" instruction with allocation k and nesting level 0 does the following: // push ebp // mov ebp,esp // sub esp,k // This can be modeled by the parallel effects: // [esp-4] := ebp; ebp := esp - 4; esp := esp - (k + 4) // If nesting level is greater than zero, we have a block structure language with // nested procedures, in which additional frame pointers are saved: // "ENTER k,n" pushes n additional frame pointers on the stack. We will only model // the change in the stack pointer here, and not attempt to transfer the display // pointers. A warning will be issued to the log file. Parallel effects are: // [esp-4] := ebp; ebp := esp - 4; esp := esp - (((k + n)*4)+4) // Note that k and n are immediate values so the final expression can be computed. size_t OpNum; uval_t NestingLevel = 0; uval_t AllocBytes = 0; bool AllocFound = false; bool NestingLevelFound = false; op_t StackPointerOp = InitOp; // ESP StackPointerOp.type = o_reg; StackPointerOp.reg = R_sp; op_t FramePointerOp = InitOp; // EBP FramePointerOp.type = o_reg; FramePointerOp.reg = R_bp; op_t Immed4Op = InitOp; // 4 Immed4Op.type = o_imm; Immed4Op.value = 4; op_t SavedEBP = InitOp; // [ESP-4], location of saved EBP SavedEBP.type = o_displ; SavedEBP.addr = (ea_t) -4; SavedEBP.reg = R_sp; for (OpNum = 0; !(AllocFound && NestingLevelFound) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & DefMacros[OpNum]) { // DEF if (MDKnownOperandType(TempOp)) { #if SMP_DEBUG_BUILD_RTL SMP_msg("ERROR: Found destination operand for ENTER at %x : %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); #endif } } else { // USE if (MDKnownOperandType(TempOp)) { if (o_imm == TempOp.type) { if (!AllocFound) { AllocBytes = TempOp.value; AllocFound = true; } else { NestingLevel = TempOp.value; NestingLevelFound = true; } } else { #if SMP_DEBUG_BUILD_RTL SMP_msg("ERROR: Found unexpected operand for ENTER at %x : %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); #endif } } } } // end for (OpNum = 0; ...) if (!AllocFound) { #if SMP_DEBUG_BUILD_RTL SMP_msg("ERROR: Could not find allocation operand for ENTER at %x : %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); #endif } else { SMPRegTransfer *TempRT = new SMPRegTransfer; // Add first effect: [esp-4] := ebp TempRT->SetLeftOperand(SavedEBP); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetRightOperand(FramePointerOp); this->RTL.push_back(TempRT); TempRT = NULL; // Add second effect: ebp := esp - 4 TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(FramePointerOp); TempRT->SetOperator(SMP_ASSIGN); SMPRegTransfer *RightRT = new SMPRegTransfer; RightRT->SetLeftOperand(StackPointerOp); RightRT->SetOperator(SMP_SUBTRACT); RightRT->SetRightOperand(Immed4Op); TempRT->SetRightTree(RightRT); this->RTL.push_back(TempRT); TempRT = NULL; RightRT = NULL; // Add final effect on stack pointer AllocBytes += ((4 * NestingLevel) + 4); if (0 != NestingLevel) { SMP_msg("WARNING: Nested procedures in ENTER instruction at %x : %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); } this->SubFromStackPointer(AllocBytes); } return AllocFound; } // end of SMPInstr::BuildEnterRTL() // Build the RTL for an LEAVE instruction bool SMPInstr::BuildLeaveRTL(void) { // A LEAVE instruction simulates the following instructions: // mov ebp into esp (deallocates stack frame) // pop saved ebp off stack into ebp // We want to model these two instructions with three parallel effects: // esp := ebp; ebp := [ebp+0]; esp = esp + 4; // There cannot be two definitions of esp in the list of effects, so we do: // esp := ebp + 4; ebp := [ebp+0] as our two parallel effects op_t StackPointerOp = InitOp; // ESP StackPointerOp.type = o_reg; StackPointerOp.reg = R_sp; op_t FramePointerOp = InitOp; // EBP FramePointerOp.type = o_reg; FramePointerOp.reg = R_bp; op_t Immed4Op = InitOp; // 4 Immed4Op.type = o_imm; Immed4Op.value = 4; op_t SavedEBP = InitOp; // [EBP+0] SavedEBP.type = o_displ; SavedEBP.reg = R_bp; // Build first effect: ESP := EBP + 4 SMPRegTransfer *TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(StackPointerOp); TempRT->SetOperator(SMP_ASSIGN); SMPRegTransfer *RightRT = new SMPRegTransfer; RightRT->SetOperator(SMP_ADD); RightRT->SetLeftOperand(FramePointerOp); RightRT->SetRightOperand(Immed4Op); TempRT->SetRightTree(RightRT); this->RTL.push_back(TempRT); TempRT = NULL; RightRT = NULL; // Build second effect: EBP := [EBP+0] TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(FramePointerOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetRightOperand(SavedEBP); this->RTL.push_back(TempRT); TempRT = NULL; return true; } // end of SMPInstr::BuildLeaveRTL() // Build OptCategory 8 RTLs, which set system info into EDX:EAX. bool SMPInstr::BuildOptType8RTL(void) { op_t DestOp = InitOp; DestOp.type = o_reg; op_t VoidOp = InitOp; // Create the effect on EDX. SMPRegTransfer *TempRT = new SMPRegTransfer; DestOp.reg = R_dx; TempRT->SetLeftOperand(DestOp); TempRT->SetOperator(SMP_ASSIGN); SMPRegTransfer *RightRT = new SMPRegTransfer; RightRT->SetLeftOperand(VoidOp); RightRT->SetOperator(SMP_SYSTEM_OPERATION); RightRT->SetRightOperand(VoidOp); TempRT->SetRightTree(RightRT); this->RTL.push_back(TempRT); // Create the effect on EAX. TempRT = NULL; RightRT = NULL; TempRT = new SMPRegTransfer; DestOp.reg = R_ax; TempRT->SetLeftOperand(DestOp); TempRT->SetOperator(SMP_ASSIGN); RightRT = new SMPRegTransfer; RightRT->SetLeftOperand(VoidOp); RightRT->SetOperator(SMP_SYSTEM_OPERATION); RightRT->SetRightOperand(VoidOp); TempRT->SetRightTree(RightRT); this->RTL.push_back(TempRT); return true; } // end of BuildOptType8RTL() // Build the RTL for a direct or indirect jump instruction bool SMPInstr::BuildJumpRTL(SMPoperator CondBranchOp) { size_t OpNum; bool TargetFound = false; SMPRegTransfer *TempRT = NULL; op_t EIPOp = InitOp, ZeroOp = InitOp, FlagsOp = InitOp; EIPOp.type = o_reg; EIPOp.reg = R_ip; ZeroOp.type = o_imm; FlagsOp.type = o_reg; FlagsOp.reg = X86_FLAGS_REG; op_t CountOp = InitOp; CountOp.type = o_reg; CountOp.reg = R_cx; #if 0 if (this->IsTailCall()) return this->BuildReturnRTL(); #endif for (OpNum = 0; !TargetFound && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & UseMacros[OpNum]) { // USE if (MDKnownOperandType(TempOp)) { TargetFound = true; TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(EIPOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetRightOperand(TempOp); if (CondBranchOp != SMP_NULL_OPERATOR) { // Set up a guard expression comparing EFLAGS to zero. // NOTE: This is imprecise for value-set purposes, but OK for types. SMPGuard *BranchCondition = new SMPGuard; BranchCondition->SetOperator(CondBranchOp); // The conditional jumps on ECX==0 compare to ECX, not EFLAGS. if ((NN_jcxz <= this->SMPcmd.itype) && (NN_jrcxz >= this->SMPcmd.itype)) BranchCondition->SetLeftOperand(CountOp); else BranchCondition->SetLeftOperand(FlagsOp); BranchCondition->SetRightOperand(ZeroOp); TempRT->SetGuard(BranchCondition); } this->RTL.push_back(TempRT); } } } // end for (OpNum = 0; ...) #if SMP_DEBUG_BUILD_RTL if (!TargetFound) { SMP_msg("ERROR: Could not find jump target at %x for %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); } #endif return TargetFound; } // end of SMPInstr::BuildJumpRTL() // Add to the stack pointer to deallocate stack space, e.g. for a pop instruction. void SMPInstr::AddToStackPointer(uval_t delta) { SMPRegTransfer *TempRT = new SMPRegTransfer; SMPRegTransfer *RightRT = new SMPRegTransfer; op_t StackOp = InitOp, DeltaOp = InitOp; StackOp.type = o_reg; StackOp.reg = R_sp; DeltaOp.type = o_imm; DeltaOp.value = delta; TempRT->SetLeftOperand(StackOp); // ESP := RightRT TempRT->SetOperator(SMP_ASSIGN); RightRT->SetLeftOperand(StackOp); // ESP + delta RightRT->SetOperator(SMP_ADD); RightRT->SetRightOperand(DeltaOp); TempRT->SetRightTree(RightRT); this->RTL.push_back(TempRT); return; } // end of SMPInstr::AddToStackPointer() // Add to the stack pointer to deallocate stack space, e.g. for a pop instruction. void SMPInstr::SubFromStackPointer(uval_t delta) { SMPRegTransfer *TempRT = new SMPRegTransfer; SMPRegTransfer *RightRT = new SMPRegTransfer; op_t StackOp = InitOp, DeltaOp = InitOp; StackOp.type = o_reg; StackOp.reg = R_sp; DeltaOp.type = o_imm; DeltaOp.value = delta; TempRT->SetLeftOperand(StackOp); // ESP := RightRT TempRT->SetOperator(SMP_ASSIGN); RightRT->SetLeftOperand(StackOp); // ESP - delta RightRT->SetOperator(SMP_SUBTRACT); RightRT->SetRightOperand(DeltaOp); TempRT->SetRightTree(RightRT); this->RTL.push_back(TempRT); return; } // end of SMPInstr::SubFromStackPointer() #define SMP_FIRST_POP_FLAGS NN_popfw #define SMP_LAST_POP_FLAGS NN_popfq #define SMP_FIRST_POP_ALL NN_popaw #define SMP_LAST_POP_ALL NN_popaq // Build the RTL for a pop instruction bool SMPInstr::BuildPopRTL(void) { size_t OpNum, OpSize; bool DestFound = false; SMPRegTransfer *TempRT = NULL; op_t StackOp = InitOp, FlagsOp = InitOp; StackOp.type = o_displ; StackOp.reg = R_sp; // StackOp.addr = 0; // [ESP+0] FlagsOp.type = o_reg; FlagsOp.reg = X86_FLAGS_REG; // Handle special cases first. if ((SMP_FIRST_POP_FLAGS <= this->SMPcmd.itype) && (SMP_LAST_POP_FLAGS >= this->SMPcmd.itype)) { TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(FlagsOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetRightOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // Now create the stack pointer increment effect. this->AddToStackPointer(4); return true; } if ((SMP_FIRST_POP_ALL <= this->SMPcmd.itype) && (SMP_LAST_POP_ALL >= this->SMPcmd.itype)) { // We pop off 7 registers from the 8 that were pushed on the stack. // The pushed stack pointer is ignored. Instead, the stack pointer value is // adjusted at the end, per the Intel instruction manuals. op_t RegOp = InitOp; RegOp.type = o_reg; // EDI comes from [ESP+0] RegOp.reg = R_di; StackOp.addr = 0; // [ESP+0] TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetRightOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // ESI comes from [ESP+4] RegOp.reg = R_si; StackOp.addr = 4; // [ESP+4] TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetRightOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // EBP comes from [ESP+8] RegOp.reg = R_bp; StackOp.addr = 8; // [ESP+8] TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetRightOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // Skip over saved ESP at [ESP+12] // EBX comes from [ESP+16] RegOp.reg = R_bx; StackOp.addr = 16; // [ESP+16] TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetRightOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // EDX comes from [ESP+20] RegOp.reg = R_dx; StackOp.addr = 20; // [ESP+20] TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetRightOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // ECX comes from [ESP+24] RegOp.reg = R_cx; StackOp.addr = 24; // [ESP+24] TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetRightOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // EAX comes from [ESP+28] RegOp.reg = R_ax; StackOp.addr = 28; // [ESP+28] TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetRightOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // Now create the stack pointer increment effect. this->AddToStackPointer(32); return true; } // end for "pop all" instructions // If we reach this point, we have a simple POP instruction. for (OpNum = 0; !DestFound && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & DefMacros[OpNum]) { // DEF if (MDKnownOperandType(TempOp)) { DestFound = true; TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(TempOp); TempRT->SetOperator(SMP_ASSIGN); StackOp.dtyp = TempOp.dtyp; // size of transfer TempRT->SetRightOperand(StackOp); this->RTL.push_back(TempRT); // Now create the stack pointer increment effect. OpSize = GetOpDataSize(TempOp); this->AddToStackPointer((uval_t) OpSize); } } } // end for (OpNum = 0; ...) #if SMP_DEBUG_BUILD_RTL if (!DestFound) { SMP_msg("ERROR: Could not find pop operand at %x for %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); } #endif return DestFound; } // end of SMPInstr::BuildPopRTL() #define SMP_FIRST_PUSH_FLAGS NN_pushfw #define SMP_LAST_PUSH_FLAGS NN_pushfq #define SMP_FIRST_PUSH_ALL NN_pushaw #define SMP_LAST_PUSH_ALL NN_pushaq // Build the RTL for a push instruction bool SMPInstr::BuildPushRTL(void) { size_t OpNum, OpSize; bool SourceFound = false; SMPRegTransfer *TempRT = NULL; op_t StackOp = InitOp, FlagsOp = InitOp; StackOp.type = o_displ; StackOp.reg = R_sp; StackOp.addr = (ea_t) -4; // [ESP-4] FlagsOp.type = o_reg; FlagsOp.reg = X86_FLAGS_REG; // Handle special cases first. if ((SMP_FIRST_PUSH_FLAGS <= this->SMPcmd.itype) && (SMP_LAST_PUSH_FLAGS >= this->SMPcmd.itype)) { TempRT = new SMPRegTransfer; TempRT->SetRightOperand(FlagsOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetLeftOperand(StackOp); this->RTL.push_back(TempRT); // Now create the stack pointer increment effect. this->SubFromStackPointer(4); return true; } if ((SMP_FIRST_PUSH_ALL <= this->SMPcmd.itype) && (SMP_LAST_PUSH_ALL >= this->SMPcmd.itype)) { op_t RegOp = InitOp; RegOp.type = o_reg; // EDI goes to [ESP-32] RegOp.reg = R_di; StackOp.addr = (ea_t) -32; // [ESP-32] TempRT = new SMPRegTransfer; TempRT->SetRightOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetLeftOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // ESI goes to [ESP-28] RegOp.reg = R_si; StackOp.addr = (ea_t) -28; // [ESP-28] TempRT = new SMPRegTransfer; TempRT->SetRightOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetLeftOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // EBP goes to [ESP-24] RegOp.reg = R_bp; StackOp.addr = (ea_t) -24; // [ESP-24] TempRT = new SMPRegTransfer; TempRT->SetRightOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetLeftOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // ESP goes to [ESP-20] RegOp.reg = R_sp; StackOp.addr = (ea_t) -20; // [ESP-20] TempRT = new SMPRegTransfer; TempRT->SetRightOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetLeftOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // EBX goes to [ESP-16] RegOp.reg = R_bx; StackOp.addr = (ea_t) -16; // [ESP-16] TempRT = new SMPRegTransfer; TempRT->SetRightOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetLeftOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // EDX goes to [ESP-12] RegOp.reg = R_dx; StackOp.addr = (ea_t) -12; // [ESP-12] TempRT = new SMPRegTransfer; TempRT->SetRightOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetLeftOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // ECX goes to [ESP-8] RegOp.reg = R_cx; StackOp.addr = (ea_t) -8; // [ESP-8] TempRT = new SMPRegTransfer; TempRT->SetRightOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetLeftOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // EAX goes to [ESP-4] RegOp.reg = R_ax; StackOp.addr = (ea_t) -4; // [ESP-4] TempRT = new SMPRegTransfer; TempRT->SetRightOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetLeftOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // Now create the stack pointer increment effect. this->SubFromStackPointer(32); return true; } // end for "pop all" instructions // If we reach this point, we have a simple PUSH instruction. for (OpNum = 0; !SourceFound && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & UseMacros[OpNum]) { // USE if (MDKnownOperandType(TempOp)) { SourceFound = true; OpSize = GetOpDataSize(TempOp); TempRT = new SMPRegTransfer; TempRT->SetRightOperand(TempOp); TempRT->SetOperator(SMP_ASSIGN); StackOp.dtyp = TempOp.dtyp; // size of transfer StackOp.addr = (ea_t) (-((signed int) OpSize)); TempRT->SetLeftOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // Now create the stack pointer increment effect. this->SubFromStackPointer((uval_t) OpSize); #if 0 this->RTL.Dump(); #endif } } } // end for (OpNum = 0; ...) #if SMP_DEBUG_BUILD_RTL if (!SourceFound) { SMP_msg("ERROR: Could not find push operand at %x for %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); } #endif return SourceFound; } // end of SMPInstr::BuildPushRTL() // Build RTL trees from the SMPcmd info. bool SMPInstr::BuildRTL(void) { op_t FlagsOp = InitOp; FlagsOp.type = o_reg; FlagsOp.reg = X86_FLAGS_REG; SMPRegTransfer *NopRT = NULL; // no-op register transfer // We don't want to explicitly represent the various no-ops except as NULL operations. // E.g. mov esi,esi should not generate DEF and USE of esi, because esi does not change. if (this->IsNop()) { NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; } switch (this->SMPcmd.itype) { case NN_aaa: // ASCII Adjust after Addition case NN_aad: // ASCII Adjust AX before Division case NN_aam: // ASCII Adjust AX after Multiply case NN_aas: // ASCII Adjust AL after Subtraction return this->BuildUnaryRTL(SMP_UNARY_NUMERIC_OPERATION); case NN_adc: // Add with Carry #if SMP_BUILD_SPECIAL_ADC_SBB_RTL return this->BuildBinaryPlusFlagsRTL(SMP_ADD_CARRY); #else return this->BuildBinaryRTL(SMP_ADD_CARRY); #endif case NN_add: // Add return this->BuildBinaryRTL(SMP_ADD); case NN_and: // Logical AND return this->BuildBinaryRTL(SMP_BITWISE_AND); case NN_arpl: // Adjust RPL Field of Selector case NN_bound: // Check Array Index Against Bounds return false; break; case NN_bsf: // Bit Scan Forward case NN_bsr: // Bit Scan Reverse return this->BuildUnary2OpndRTL(SMP_UNARY_NUMERIC_OPERATION); case NN_bt: // Bit Test return this->BuildFlagsDestBinaryRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_btc: // Bit Test and Complement case NN_btr: // Bit Test and Reset case NN_bts: // Bit Test and Set // Has effects on both the carry flag and the first operand this->RTL.ExtraKills.push_back(FlagsOp); return this->BuildBinaryRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_call: // Call Procedure case NN_callfi: // Indirect Call Far Procedure case NN_callni: // Indirect Call Near Procedure return this->BuildCallRTL(); case NN_cbw: // AL -> AX (with sign) case NN_cwde: // AX -> EAX (with sign) case NN_cdqe: // EAX -> RAX (with sign) return this->BuildUnaryRTL(SMP_SIGN_EXTEND); case NN_clc: // Clear Carry Flag case NN_cld: // Clear Direction Flag return this->BuildUnaryRTL(SMP_UNARY_NUMERIC_OPERATION); case NN_cli: // Clear Interrupt Flag case NN_clts: // Clear Task-Switched Flag in CR0 // We don't track the interrupt flag or the special registers, // so we can just consider these to be no-ops. // NOTE: Shouldn't we killthe EFLAGS register on NN_cli ??!!??!! NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; case NN_cmc: // Complement Carry Flag return this->BuildUnaryRTL(SMP_UNARY_NUMERIC_OPERATION); case NN_cmp: // Compare Two Operands return this->BuildFlagsDestBinaryRTL(SMP_S_COMPARE); case NN_cmps: // Compare Strings // Why do we no longer use BuildCompareStringRTL()? ****!!!!**** Test it! return this->BuildFlagsDestBinaryRTL(SMP_U_COMPARE); case NN_cwd: // AX -> DX:AX (with sign) case NN_cdq: // EAX -> EDX:EAX (with sign) case NN_cqo: // RAX -> RDX:RAX (with sign) return this->BuildUnary2OpndRTL(SMP_SIGN_EXTEND); case NN_daa: // Decimal Adjust AL after Addition case NN_das: // Decimal Adjust AL after Subtraction return this->BuildUnaryRTL(SMP_UNARY_NUMERIC_OPERATION); case NN_dec: // Decrement by 1 return this->BuildUnaryRTL(SMP_DECREMENT); case NN_div: // Unsigned Divide return this->BuildMultiplyDivideRTL(SMP_U_DIVIDE); case NN_enterw: // Make Stack Frame for Procedure Parameters case NN_enter: // Make Stack Frame for Procedure Parameters case NN_enterd: // Make Stack Frame for Procedure Parameters case NN_enterq: // Make Stack Frame for Procedure Parameters return this->BuildEnterRTL(); case NN_hlt: // Halt // Treat as a no-op NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; case NN_idiv: // Signed Divide return this->BuildMultiplyDivideRTL(SMP_S_DIVIDE); case NN_imul: // Signed Multiply return this->BuildMultiplyDivideRTL(SMP_S_MULTIPLY); case NN_in: // Input from Port return this->BuildUnary2OpndRTL(SMP_INPUT); case NN_inc: // Increment by 1 return this->BuildUnaryRTL(SMP_INCREMENT); case NN_ins: // Input Byte(s) from Port to String return this->BuildMoveRTL(SMP_NULL_OPERATOR); break; case NN_int: // Call to Interrupt Procedure case NN_into: // Call to Interrupt Procedure if Overflow Flag = 1 case NN_int3: // Trap to Debugger return this->BuildCallRTL(); case NN_iretw: // Interrupt Return case NN_iret: // Interrupt Return case NN_iretd: // Interrupt Return (use32) case NN_iretq: // Interrupt Return (use64) return this->BuildReturnRTL(); case NN_ja: // Jump if Above (CF=0 & ZF=0) case NN_jae: // Jump if Above or Equal (CF=0) case NN_jb: // Jump if Below (CF=1) case NN_jbe: // Jump if Below or Equal (CF=1 | ZF=1) case NN_jc: // Jump if Carry (CF=1) return this->BuildJumpRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_jcxz: // Jump if CX is 0 case NN_jecxz: // Jump if ECX is 0 case NN_jrcxz: // Jump if RCX is 0 return this->BuildJumpRTL(SMP_EQUAL); // special case in BuildJumpRTL() case NN_je: // Jump if Equal (ZF=1) return this->BuildJumpRTL(SMP_EQUAL); case NN_jg: // Jump if Greater (ZF=0 & SF=OF) return this->BuildJumpRTL(SMP_GREATER_THAN); case NN_jge: // Jump if Greater or Equal (SF=OF) return this->BuildJumpRTL(SMP_GREATER_EQUAL); case NN_jl: // Jump if Less (SF!=OF) return this->BuildJumpRTL(SMP_LESS_THAN); case NN_jle: // Jump if Less or Equal (ZF=1 | SF!=OF) return this->BuildJumpRTL(SMP_LESS_EQUAL); case NN_jna: // Jump if Not Above (CF=1 | ZF=1) case NN_jnae: // Jump if Not Above or Equal (CF=1) case NN_jnb: // Jump if Not Below (CF=0) case NN_jnbe: // Jump if Not Below or Equal (CF=0 & ZF=0) a.k.a. ja case NN_jnc: // Jump if Not Carry (CF=0) return this->BuildJumpRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_jne: // Jump if Not Equal (ZF=0) return this->BuildJumpRTL(SMP_NOT_EQUAL); case NN_jng: // Jump if Not Greater (ZF=1 | SF!=OF) a.k.a. jle return this->BuildJumpRTL(SMP_LESS_EQUAL); case NN_jnge: // Jump if Not Greater or Equal (SF != OF) ** return this->BuildJumpRTL(SMP_LESS_THAN); case NN_jnl: // Jump if Not Less (SF=OF) a.k.a. jge return this->BuildJumpRTL(SMP_GREATER_EQUAL); case NN_jnle: // Jump if Not Less or Equal (ZF=0 & SF=OF) a.k.a. jg return this->BuildJumpRTL(SMP_GREATER_THAN); case NN_jno: // Jump if Not Overflow (OF=0) case NN_jnp: // Jump if Not Parity (PF=0) case NN_jns: // Jump if Not Sign (SF=0) return this->BuildJumpRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_jnz: // Jump if Not Zero (ZF=0) a.k.a. jne return this->BuildJumpRTL(SMP_NOT_EQUAL); case NN_jo: // Jump if Overflow (OF=1) case NN_jp: // Jump if Parity (PF=1) case NN_jpe: // Jump if Parity Even (PF=1) case NN_jpo: // Jump if Parity Odd (PF=0) case NN_js: // Jump if Sign (SF=1) return this->BuildJumpRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_jz: // Jump if Zero (ZF=1) return this->BuildJumpRTL(SMP_EQUAL); case NN_jmp: // Jump case NN_jmpfi: // Indirect Far Jump case NN_jmpni: // Indirect Near Jump case NN_jmpshort: // Jump Short (not used) return this->BuildJumpRTL(SMP_NULL_OPERATOR); case NN_lahf: // Load Flags into AH Register return this->BuildMoveRTL(SMP_NULL_OPERATOR); case NN_lar: // Load Access Right Byte return false; break; case NN_lea: // Load Effective Address return this->BuildLeaRTL(); break; case NN_leavew: // High Level Procedure Exit case NN_leave: // High Level Procedure Exit case NN_leaved: // High Level Procedure Exit case NN_leaveq: // High Level Procedure Exit return this->BuildLeaveRTL(); break; case NN_lgdt: // Load Global Descriptor Table Register case NN_lidt: // Load Interrupt Descriptor Table Register return false; break; case NN_lgs: // Load Full Pointer to GS:xx case NN_lss: // Load Full Pointer to SS:xx case NN_lds: // Load Full Pointer to DS:xx case NN_les: // Load Full Pointer to ES:xx case NN_lfs: // Load Full Pointer to FS:xx // These instructions differ from NN_lea only in setting // a segment register in addition to a pointer. We are // not yet tracking segment registers. return this->BuildLeaRTL(); break; case NN_lldt: // Load Local Descriptor Table Register case NN_lmsw: // Load Machine Status Word case NN_lock: // Assert LOCK# Signal Prefix return false; break; case NN_lods: // Load String return this->BuildLoadStringRTL(); break; case NN_loopw: // Loop while ECX != 0 case NN_loop: // Loop while CX != 0 case NN_loopd: // Loop while ECX != 0 case NN_loopq: // Loop while RCX != 0 case NN_loopwe: // Loop while CX != 0 and ZF=1 case NN_loope: // Loop while rCX != 0 and ZF=1 case NN_loopde: // Loop while ECX != 0 and ZF=1 case NN_loopqe: // Loop while RCX != 0 and ZF=1 case NN_loopwne: // Loop while CX != 0 and ZF=0 case NN_loopne: // Loop while rCX != 0 and ZF=0 case NN_loopdne: // Loop while ECX != 0 and ZF=0 case NN_loopqne: // Loop while RCX != 0 and ZF=0 return false; break; case NN_lsl: // Load Segment Limit case NN_ltr: // Load Task Register return false; break; case NN_mov: // Move Data case NN_movsp: // Move to/from Special Registers case NN_movs: // Move Byte(s) from String to String return this->BuildMoveRTL(SMP_NULL_OPERATOR); case NN_movsx: // Move with Sign-Extend return this->BuildUnary2OpndRTL(SMP_SIGN_EXTEND); case NN_movzx: // Move with Zero-Extend return this->BuildUnary2OpndRTL(SMP_ZERO_EXTEND); case NN_mul: // Unsigned Multiplication of AL or AX return this->BuildMultiplyDivideRTL(SMP_U_MULTIPLY); case NN_neg: // Two's Complement Negation return this->BuildUnaryRTL(SMP_NEGATE); case NN_nop: // No Operation NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; case NN_not: // One's Complement Negation return this->BuildUnaryRTL(SMP_BITWISE_NOT); case NN_or: // Logical Inclusive OR return this->BuildBinaryRTL(SMP_BITWISE_OR); case NN_out: // Output to Port return this->BuildUnary2OpndRTL(SMP_OUTPUT); case NN_outs: // Output Byte(s) to Port return this->BuildMoveRTL(SMP_NULL_OPERATOR); break; case NN_pop: // Pop a word from the Stack case NN_popaw: // Pop all General Registers case NN_popa: // Pop all General Registers case NN_popad: // Pop all General Registers (use32) case NN_popaq: // Pop all General Registers (use64) case NN_popfw: // Pop Stack into Flags Register case NN_popf: // Pop Stack into Flags Register case NN_popfd: // Pop Stack into Eflags Register case NN_popfq: // Pop Stack into Rflags Register return this->BuildPopRTL(); case NN_push: // Push Operand onto the Stack case NN_pushaw: // Push all General Registers case NN_pusha: // Push all General Registers case NN_pushad: // Push all General Registers (use32) case NN_pushaq: // Push all General Registers (use64) case NN_pushfw: // Push Flags Register onto the Stack case NN_pushf: // Push Flags Register onto the Stack case NN_pushfd: // Push Flags Register onto the Stack (use32) case NN_pushfq: // Push Flags Register onto the Stack (use64) return this->BuildPushRTL(); case NN_rcl: // Rotate Through Carry Left return this->BuildBinaryPlusFlagsRTL(SMP_ROTATE_LEFT_CARRY); case NN_rcr: // Rotate Through Carry Right return this->BuildBinaryPlusFlagsRTL(SMP_ROTATE_RIGHT_CARRY); case NN_rol: // Rotate Left return this->BuildBinaryRTL(SMP_ROTATE_LEFT); case NN_ror: // Rotate Right return this->BuildBinaryRTL(SMP_ROTATE_RIGHT); case NN_rep: // Repeat String Operation case NN_repe: // Repeat String Operation while ZF=1 case NN_repne: // Repeat String Operation while ZF=0 return false; break; case NN_retn: // Return Near from Procedure case NN_retf: // Return Far from Procedure return this->BuildReturnRTL(); case NN_sahf: // Store AH into Flags Register return this->BuildMoveRTL(SMP_NULL_OPERATOR); case NN_sal: // Shift Arithmetic Left return this->BuildBinaryRTL(SMP_S_LEFT_SHIFT); case NN_sar: // Shift Arithmetic Right return this->BuildBinaryRTL(SMP_S_RIGHT_SHIFT); case NN_shl: // Shift Logical Left return this->BuildBinaryRTL(SMP_U_LEFT_SHIFT); case NN_shr: // Shift Logical Right return this->BuildBinaryRTL(SMP_U_RIGHT_SHIFT); case NN_sbb: // Integer Subtraction with Borrow #if SMP_BUILD_SPECIAL_ADC_SBB_RTL return this->BuildBinaryPlusFlagsRTL(SMP_SUBTRACT_BORROW); #else return this->BuildBinaryRTL(SMP_SUBTRACT_BORROW); #endif case NN_scas: // Scan String return this->BuildFlagsDestBinaryRTL(SMP_U_COMPARE); case NN_seta: // Set Byte if Above (CF=0 & ZF=0) case NN_setae: // Set Byte if Above or Equal (CF=0) case NN_setb: // Set Byte if Below (CF=1) case NN_setbe: // Set Byte if Below or Equal (CF=1 | ZF=1) case NN_setc: // Set Byte if Carry (CF=1) case NN_sete: // Set Byte if Equal (ZF=1) case NN_setg: // Set Byte if Greater (ZF=0 & SF=OF) case NN_setge: // Set Byte if Greater or Equal (SF=OF) case NN_setl: // Set Byte if Less (SF!=OF) case NN_setle: // Set Byte if Less or Equal (ZF=1 | SF!=OF) case NN_setna: // Set Byte if Not Above (CF=1 | ZF=1) case NN_setnae: // Set Byte if Not Above or Equal (CF=1) case NN_setnb: // Set Byte if Not Below (CF=0) case NN_setnbe: // Set Byte if Not Below or Equal (CF=0 & ZF=0) case NN_setnc: // Set Byte if Not Carry (CF=0) case NN_setne: // Set Byte if Not Equal (ZF=0) case NN_setng: // Set Byte if Not Greater (ZF=1 | SF!=OF) case NN_setnge: // Set Byte if Not Greater or Equal (ZF=1) case NN_setnl: // Set Byte if Not Less (SF=OF) case NN_setnle: // Set Byte if Not Less or Equal (ZF=0 & SF=OF) case NN_setno: // Set Byte if Not Overflow (OF=0) case NN_setnp: // Set Byte if Not Parity (PF=0) case NN_setns: // Set Byte if Not Sign (SF=0) case NN_setnz: // Set Byte if Not Zero (ZF=0) case NN_seto: // Set Byte if Overflow (OF=1) case NN_setp: // Set Byte if Parity (PF=1) case NN_setpe: // Set Byte if Parity Even (PF=1) case NN_setpo: // Set Byte if Parity Odd (PF=0) case NN_sets: // Set Byte if Sign (SF=1) case NN_setz: // Set Byte if Zero (ZF=1) // Destination always get set to NUMERIC 0 or 1, depending on // the condition and the relevant flags bits. Best way to model // this in an RTL is to perform an unspecified unary NUMERIC // operation on the flags register and assign the result to the // destination operand, making it always NUMERIC. return this->BuildUnary2OpndRTL(SMP_UNARY_NUMERIC_OPERATION); case NN_sgdt: // Store Global Descriptor Table Register case NN_sidt: // Store Interrupt Descriptor Table Register return false; break; case NN_shld: // Double Precision Shift Left return this->BuildDoubleShiftRTL(SMP_U_LEFT_SHIFT); case NN_shrd: // Double Precision Shift Right return this->BuildDoubleShiftRTL(SMP_U_RIGHT_SHIFT); case NN_sldt: // Store Local Descriptor Table Register case NN_smsw: // Store Machine Status Word return false; break; case NN_stc: // Set Carry Flag case NN_std: // Set Direction Flag return this->BuildUnaryRTL(SMP_UNARY_NUMERIC_OPERATION); case NN_sti: // Set Interrupt Flag NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; case NN_stos: // Store String return this->BuildMoveRTL(SMP_NULL_OPERATOR); case NN_str: // Store Task Register return false; break; case NN_sub: // Integer Subtraction return this->BuildBinaryRTL(SMP_SUBTRACT); case NN_test: // Logical Compare return this->BuildFlagsDestBinaryRTL(SMP_U_COMPARE); case NN_verr: // Verify a Segment for Reading case NN_verw: // Verify a Segment for Writing case NN_wait: // Wait until BUSY# Pin is Inactive (HIGH) NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; if (NN_wait != this->SMPcmd.itype) this->RTL.ExtraKills.push_back(FlagsOp); return true; case NN_xchg: // Exchange Register/Memory with Register return this->BuildExchangeRTL(); case NN_xlat: // Table Lookup Translation return false; break; case NN_xor: // Logical Exclusive OR return this->BuildBinaryRTL(SMP_BITWISE_XOR); // // 486 instructions // case NN_cmpxchg: // Compare and Exchange return this->BuildCompareExchangeRTL(); case NN_bswap: // Swap bits in EAX return this->BuildUnaryRTL(SMP_UNARY_NUMERIC_OPERATION); case NN_xadd: // t<-dest; dest<-src+dest; src<-t return this->BuildExchangeAddRTL(); case NN_invd: // Invalidate Data Cache case NN_wbinvd: // Invalidate Data Cache (write changes) case NN_invlpg: // Invalidate TLB entry NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; // // Pentium instructions // case NN_rdmsr: // Read Machine Status Register return this->BuildOptType8RTL(); case NN_wrmsr: // Write Machine Status Register return false; break; case NN_cpuid: // Get CPU ID return this->BuildOptType8RTL(); case NN_cmpxchg8b: // Compare and Exchange Eight Bytes return false; break; case NN_rdtsc: // Read Time Stamp Counter return this->BuildOptType8RTL(); case NN_rsm: // Resume from System Management Mode NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; // // Pentium Pro instructions // case NN_cmova: // Move if Above (CF=0 & ZF=0) case NN_cmovb: // Move if Below (CF=1) case NN_cmovbe: // Move if Below or Equal (CF=1 | ZF=1) return this->BuildMoveRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_cmovg: // Move if Greater (ZF=0 & SF=OF) return this->BuildMoveRTL(SMP_GREATER_THAN); case NN_cmovge: // Move if Greater or Equal (SF=OF) return this->BuildMoveRTL(SMP_GREATER_EQUAL); case NN_cmovl: // Move if Less (SF!=OF) return this->BuildMoveRTL(SMP_LESS_THAN); case NN_cmovle: // Move if Less or Equal (ZF=1 | SF!=OF) return this->BuildMoveRTL(SMP_LESS_EQUAL); case NN_cmovnb: // Move if Not Below (CF=0) case NN_cmovno: // Move if Not Overflow (OF=0) case NN_cmovnp: // Move if Not Parity (PF=0) case NN_cmovns: // Move if Not Sign (SF=0) return this->BuildMoveRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_cmovnz: // Move if Not Zero (ZF=0) return this->BuildMoveRTL(SMP_NOT_EQUAL); case NN_cmovo: // Move if Overflow (OF=1) case NN_cmovp: // Move if Parity (PF=1) case NN_cmovs: // Move if Sign (SF=1) return this->BuildMoveRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_cmovz: // Move if Zero (ZF=1) return this->BuildMoveRTL(SMP_EQUAL); case NN_fcmovb: // Floating Move if Below return this->BuildMoveRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_fcmove: // Floating Move if Equal return this->BuildMoveRTL(SMP_EQUAL); case NN_fcmovbe: // Floating Move if Below or Equal return this->BuildMoveRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_fcmovu: // Floating Move if Unordered return this->BuildMoveRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_fcmovnb: // Floating Move if Not Below return this->BuildMoveRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_fcmovne: // Floating Move if Not Equal return this->BuildMoveRTL(SMP_NOT_EQUAL); case NN_fcmovnbe: // Floating Move if Not Below or Equal return this->BuildMoveRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_fcmovnu: // Floating Move if Not Unordered return this->BuildMoveRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_fcomi: // FP Compare: result in EFLAGS case NN_fucomi: // FP Unordered Compare: result in EFLAGS case NN_fcomip: // FP Compare: result in EFLAGS: pop stack case NN_fucomip: // FP Unordered Compare: result in EFLAGS: pop stack return this->BuildFlagsDestBinaryRTL(SMP_S_COMPARE); break; case NN_rdpmc: // Read Performance Monitor Counter return this->BuildOptType8RTL(); // // FPP instructions // case NN_fld: // Load Real case NN_fst: // Store Real case NN_fstp: // Store Real and Pop return this->BuildMoveRTL(SMP_NULL_OPERATOR); case NN_fxch: // Exchange Registers // FP registers remain NUMERIC anyway, so this is a no-op to our type system. NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; case NN_fild: // Load Integer case NN_fist: // Store Integer case NN_fistp: // Store Integer and Pop case NN_fbld: // Load BCD case NN_fbstp: // Store BCD and Pop return this->BuildMoveRTL(SMP_NULL_OPERATOR); case NN_fadd: // Add Real case NN_faddp: // Add Real and Pop case NN_fiadd: // Add Integer case NN_fsub: // Subtract Real case NN_fsubp: // Subtract Real and Pop case NN_fisub: // Subtract Integer case NN_fsubr: // Subtract Real Reversed case NN_fsubrp: // Subtract Real Reversed and Pop case NN_fisubr: // Subtract Integer Reversed case NN_fmul: // Multiply Real case NN_fmulp: // Multiply Real and Pop case NN_fimul: // Multiply Integer case NN_fdiv: // Divide Real case NN_fdivp: // Divide Real and Pop case NN_fidiv: // Divide Integer case NN_fdivr: // Divide Real Reversed case NN_fdivrp: // Divide Real Reversed and Pop case NN_fidivr: // Divide Integer Reversed return this->BuildBinaryRTL(SMP_BINARY_FLOATING_ARITHMETIC, true); case NN_fsqrt: // Square Root case NN_fscale: // Scale: st(0) <- st(0) * 2^st(1) case NN_fprem: // Partial Remainder case NN_frndint: // Round to Integer case NN_fxtract: // Extract exponent and significand case NN_fabs: // Absolute value case NN_fchs: // Change Sign return this->BuildUnaryRTL(SMP_UNARY_FLOATING_ARITHMETIC); case NN_fcom: // Compare Real case NN_fcomp: // Compare Real and Pop case NN_fcompp: // Compare Real and Pop Twice case NN_ficom: // Compare Integer case NN_ficomp: // Compare Integer and Pop case NN_ftst: // Test case NN_fxam: // Examine // Floating comparison instructions use FP reg stack locations // as sources and set only the FP flags. All of these are numeric // type and we don't track any of them, so all such instructions // can be considered to be no-ops. NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; case NN_fptan: // Partial tangent case NN_fpatan: // Partial arctangent case NN_f2xm1: // 2^x - 1 case NN_fyl2x: // Y * lg2(X) case NN_fyl2xp1: // Y * lg2(X+1) // We can consider it a unary operation when both arguments come // off the floating point register stack, unless we ever start // modeling the different locations in the FP register stack. return this->BuildUnaryRTL(SMP_UNARY_FLOATING_ARITHMETIC); case NN_fldz: // Load +0.0 case NN_fld1: // Load +1.0 case NN_fldpi: // Load PI=3.14... case NN_fldl2t: // Load lg2(10) case NN_fldl2e: // Load lg2(e) case NN_fldlg2: // Load lg10(2) case NN_fldln2: // Load ln(2) case NN_finit: // Initialize Processor case NN_fninit: // Initialize Processor (no wait) case NN_fsetpm: // Set Protected Mode case NN_fldcw: // Load Control Word case NN_fstcw: // Store Control Word case NN_fnstcw: // Store Control Word (no wait) case NN_fstsw: // Store Status Word case NN_fnstsw: // Store Status Word (no wait) case NN_fclex: // Clear Exceptions case NN_fnclex: // Clear Exceptions (no wait) // Floating point stack and control word and flags operations // with no memory operands are no-ops to us. NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; case NN_fstenv: // Store Environment case NN_fnstenv: // Store Environment (no wait) case NN_fldenv: // Load Environment case NN_fsave: // Save State case NN_fnsave: // Save State (no wait) case NN_frstor: // Restore State case NN_fincstp: // Increment Stack Pointer case NN_fdecstp: // Decrement Stack Pointer case NN_ffree: // Free Register return false; break; case NN_fnop: // No Operation NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; case NN_feni: // (8087 only) case NN_fneni: // (no wait) (8087 only) case NN_fdisi: // (8087 only) case NN_fndisi: // (no wait) (8087 only) return false; break; // // 80387 instructions // case NN_fprem1: // Partial Remainder ( < half ) case NN_fsincos: // t<-cos(st); st<-sin(st); push t case NN_fsin: // Sine case NN_fcos: // Cosine case NN_fucom: // Compare Unordered Real case NN_fucomp: // Compare Unordered Real and Pop case NN_fucompp: // Compare Unordered Real and Pop Twice // Floating point stack and control word and flags operations // with no memory operands are no-ops to us. NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; // // Instructions added 28.02.96 // case NN_setalc: // Set AL to Carry Flag case NN_svdc: // Save Register and Descriptor case NN_rsdc: // Restore Register and Descriptor case NN_svldt: // Save LDTR and Descriptor case NN_rsldt: // Restore LDTR and Descriptor case NN_svts: // Save TR and Descriptor case NN_rsts: // Restore TR and Descriptor case NN_icebp: // ICE Break Point case NN_loadall: // Load the entire CPU state from ES:EDI return false; break; // // MMX instructions // case NN_emms: // Empty MMX state NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; break; case NN_movd: // Move 32 bits case NN_movq: // Move 64 bits return this->BuildMoveRTL(SMP_NULL_OPERATOR); break; case NN_packsswb: // Pack with Signed Saturation (Word->Byte) case NN_packssdw: // Pack with Signed Saturation (Dword->Word) return this->BuildBinaryRTL(SMP_PACK_S); break; case NN_packuswb: // Pack with Unsigned Saturation (Word->Byte) return this->BuildBinaryRTL(SMP_PACK_U); break; case NN_paddb: // Packed Add Byte case NN_paddw: // Packed Add Word case NN_paddd: // Packed Add Dword case NN_paddsb: // Packed Add with Saturation (Byte) case NN_paddsw: // Packed Add with Saturation (Word) case NN_paddusb: // Packed Add Unsigned with Saturation (Byte) case NN_paddusw: // Packed Add Unsigned with Saturation (Word) return this->BuildBinaryRTL(SMP_ADD); break; case NN_pand: // Bitwise Logical And return this->BuildBinaryRTL(SMP_BITWISE_AND); break; case NN_pandn: // Bitwise Logical And Not return this->BuildBinaryRTL(SMP_BITWISE_AND_NOT); break; case NN_pcmpeqb: // Packed Compare for Equal (Byte) case NN_pcmpeqw: // Packed Compare for Equal (Word) case NN_pcmpeqd: // Packed Compare for Equal (Dword) return this->BuildBinaryRTL(SMP_COMPARE_EQ_AND_SET); break; case NN_pcmpgtb: // Packed Compare for Greater Than (Byte) case NN_pcmpgtw: // Packed Compare for Greater Than (Word) case NN_pcmpgtd: // Packed Compare for Greater Than (Dword) return this->BuildBinaryRTL(SMP_COMPARE_EQ_AND_SET); break; case NN_pmaddwd: // Packed Multiply and Add return this->BuildBinaryRTL(SMP_MULTIPLY_AND_ADD); break; case NN_pmulhw: // Packed Multiply High case NN_pmullw: // Packed Multiply Low return this->BuildBinaryRTL(SMP_U_MULTIPLY); break; case NN_por: // Bitwise Logical Or return this->BuildBinaryRTL(SMP_BITWISE_OR); break; case NN_psllw: // Packed Shift Left Logical (Word) case NN_pslld: // Packed Shift Left Logical (Dword) case NN_psllq: // Packed Shift Left Logical (Qword) return this->BuildBinaryRTL(SMP_U_LEFT_SHIFT); break; case NN_psraw: // Packed Shift Right Arithmetic (Word) case NN_psrad: // Packed Shift Right Arithmetic (Dword) return this->BuildBinaryRTL(SMP_S_RIGHT_SHIFT); break; case NN_psrlw: // Packed Shift Right Logical (Word) case NN_psrld: // Packed Shift Right Logical (Dword) case NN_psrlq: // Packed Shift Right Logical (Qword) return this->BuildBinaryRTL(SMP_U_RIGHT_SHIFT); break; case NN_psubb: // Packed Subtract Byte case NN_psubw: // Packed Subtract Word case NN_psubd: // Packed Subtract Dword return this->BuildBinaryRTL(SMP_SUBTRACT); break; case NN_psubsb: // Packed Subtract with Saturation (Byte) case NN_psubsw: // Packed Subtract with Saturation (Word) return this->BuildBinaryRTL(SMP_SUBTRACT); break; case NN_psubusb: // Packed Subtract Unsigned with Saturation (Byte) case NN_psubusw: // Packed Subtract Unsigned with Saturation (Word) return this->BuildBinaryRTL(SMP_SUBTRACT); break; case NN_punpckhbw: // Unpack High Packed Data (Byte->Word) case NN_punpckhwd: // Unpack High Packed Data (Word->Dword) case NN_punpckhdq: // Unpack High Packed Data (Dword->Qword) case NN_punpcklbw: // Unpack Low Packed Data (Byte->Word) case NN_punpcklwd: // Unpack Low Packed Data (Word->Dword) case NN_punpckldq: // Unpack Low Packed Data (Dword->Qword) return this->BuildBinaryRTL(SMP_INTERLEAVE); break; case NN_pxor: // Bitwise Logical Exclusive Or return this->BuildBinaryRTL(SMP_BITWISE_XOR); break; // // Undocumented Deschutes processor instructions // case NN_fxsave: // Fast save FP context case NN_fxrstor: // Fast restore FP context return false; break; // Pentium II instructions case NN_sysenter: // Fast Transition to System Call Entry Point case NN_sysexit: // Fast Transition from System Call Entry Point return false; break; // 3DNow! instructions case NN_pavgusb: // Packed 8-bit Unsigned Integer Averaging return this->BuildBinaryRTL(SMP_AVERAGE_U); break; case NN_pfadd: // Packed Floating-Point Addition case NN_pfsub: // Packed Floating-Point Subtraction case NN_pfsubr: // Packed Floating-Point Reverse Subtraction case NN_pfacc: // Packed Floating-Point Accumulate case NN_pfcmpge: // Packed Floating-Point Comparison: Greater or Equal case NN_pfcmpgt: // Packed Floating-Point Comparison: Greater case NN_pfcmpeq: // Packed Floating-Point Comparison: Equal case NN_pfmin: // Packed Floating-Point Minimum case NN_pfmax: // Packed Floating-Point Maximum case NN_pi2fd: // Packed 32-bit Integer to Floating-Point case NN_pf2id: // Packed Floating-Point to 32-bit Integer case NN_pfrcp: // Packed Floating-Point Reciprocal Approximation case NN_pfrsqrt: // Packed Floating-Point Reciprocal Square Root Approximation case NN_pfmul: // Packed Floating-Point Multiplication case NN_pfrcpit1: // Packed Floating-Point Reciprocal First Iteration Step case NN_pfrsqit1: // Packed Floating-Point Reciprocal Square Root First Iteration Step case NN_pfrcpit2: // Packed Floating-Point Reciprocal Second Iteration Step case NN_pmulhrw: // Packed Floating-Point 16-bit Integer Multiply with rounding case NN_femms: // Faster entry/exit of the MMX or floating-point state return false; break; case NN_prefetch: // Prefetch at least a 32-byte line into L1 data cache case NN_prefetchw: // Prefetch processor cache line into L1 data cache (mark as modified) // Prefetch opcodes are no-ops to us. NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; // Pentium III instructions case NN_addps: // Packed Single-FP Add case NN_addss: // Scalar Single-FP Add case NN_andnps: // Bitwise Logical And Not for Single-FP case NN_andps: // Bitwise Logical And for Single-FP return this->BuildBinaryRTL(SMP_BINARY_FLOATING_ARITHMETIC); break; case NN_cmpps: // Packed Single-FP Compare case NN_cmpss: // Scalar Single-FP Compare case NN_comiss: // Scalar Ordered Single-FP Compare and Set EFLAGS return false; break; case NN_cvtpi2ps: // Packed signed INT32 to Packed Single-FP conversion case NN_cvtps2pi: // Packed Single-FP to Packed INT32 conversion case NN_cvtsi2ss: // Scalar signed INT32 to Single-FP conversion case NN_cvtss2si: // Scalar Single-FP to signed INT32 conversion case NN_cvttps2pi: // Packed Single-FP to Packed INT32 conversion (truncate) case NN_cvttss2si: // Scalar Single-FP to signed INT32 conversion (truncate) return false; break; case NN_divps: // Packed Single-FP Divide case NN_divss: // Scalar Single-FP Divide return this->BuildBinaryRTL(SMP_BINARY_FLOATING_ARITHMETIC); break; case NN_ldmxcsr: // Load Streaming SIMD Extensions Technology Control/Status Register return this->BuildMoveRTL(SMP_NULL_OPERATOR); break; case NN_maxps: // Packed Single-FP Maximum case NN_maxss: // Scalar Single-FP Maximum return this->BuildBinaryRTL(SMP_MAX_S); break; case NN_minps: // Packed Single-FP Minimum case NN_minss: // Scalar Single-FP Minimum return this->BuildBinaryRTL(SMP_MIN_S); break; case NN_movaps: // Move Aligned Four Packed Single-FP case NN_movhlps: // Move High to Low Packed Single-FP case NN_movhps: // Move High Packed Single-FP case NN_movlhps: // Move Low to High Packed Single-FP case NN_movlps: // Move Low Packed Single-FP case NN_movmskps: // Move Mask to Register case NN_movss: // Move Scalar Single-FP case NN_movups: // Move Unaligned Four Packed Single-FP return this->BuildMoveRTL(SMP_NULL_OPERATOR); break; case NN_mulps: // Packed Single-FP Multiply case NN_mulss: // Scalar Single-FP Multiply case NN_orps: // Bitwise Logical OR for Single-FP Data return this->BuildBinaryRTL(SMP_BINARY_FLOATING_ARITHMETIC); break; case NN_rcpps: // Packed Single-FP Reciprocal case NN_rcpss: // Scalar Single-FP Reciprocal case NN_rsqrtps: // Packed Single-FP Square Root Reciprocal case NN_rsqrtss: // Scalar Single-FP Square Root Reciprocal return this->BuildUnary2OpndRTL(SMP_UNARY_FLOATING_ARITHMETIC); break; case NN_shufps: // Shuffle Single-FP return this->BuildBinaryRTL(SMP_SHUFFLE); break; case NN_sqrtps: // Packed Single-FP Square Root case NN_sqrtss: // Scalar Single-FP Square Root return this->BuildUnary2OpndRTL(SMP_UNARY_FLOATING_ARITHMETIC); break; case NN_stmxcsr: // Store Streaming SIMD Extensions Technology Control/Status Register return this->BuildMoveRTL(SMP_NULL_OPERATOR); break; case NN_subps: // Packed Single-FP Subtract case NN_subss: // Scalar Single-FP Subtract return this->BuildBinaryRTL(SMP_BINARY_FLOATING_ARITHMETIC); break; case NN_ucomiss: // Scalar Unordered Single-FP Compare and Set EFLAGS case NN_unpckhps: // Unpack High Packed Single-FP Data case NN_unpcklps: // Unpack Low Packed Single-FP Data return false; break; case NN_xorps: // Bitwise Logical XOR for Single-FP Data return this->BuildBinaryRTL(SMP_BINARY_FLOATING_ARITHMETIC); break; case NN_pavgb: // Packed Average (Byte) case NN_pavgw: // Packed Average (Word) return this->BuildBinaryRTL(SMP_AVERAGE_U); break; case NN_pextrw: // Extract Word case NN_pinsrw: // Insert Word return false; break; case NN_pmaxsw: // Packed Signed Integer Word Maximum return this->BuildBinaryRTL(SMP_MAX_S); break; case NN_pmaxub: // Packed Unsigned Integer Byte Maximum return this->BuildBinaryRTL(SMP_MAX_U); break; case NN_pminsw: // Packed Signed Integer Word Minimum return this->BuildBinaryRTL(SMP_MIN_S); break; case NN_pminub: // Packed Unsigned Integer Byte Minimum return this->BuildBinaryRTL(SMP_MIN_U); break; case NN_pmovmskb: // Move Byte Mask to Integer return this->BuildBinaryRTL(SMP_SHUFFLE); break; case NN_pmulhuw: // Packed Multiply High Unsigned return this->BuildBinaryRTL(SMP_U_MULTIPLY); break; case NN_psadbw: // Packed Sum of Absolute Differences return this->BuildBinaryRTL(SMP_SUM_OF_DIFFS); break; case NN_pshufw: // Packed Shuffle Word return this->BuildBinaryRTL(SMP_SHUFFLE); break; case NN_maskmovq: // Byte Mask write return false; break; case NN_movntps: // Move Aligned Four Packed Single-FP Non Temporal case NN_movntq: // Move 64 Bits Non Temporal return this->BuildMoveRTL(SMP_NULL_OPERATOR); break; case NN_prefetcht0: // Prefetch to all cache levels case NN_prefetcht1: // Prefetch to all cache levels case NN_prefetcht2: // Prefetch to L2 cache case NN_prefetchnta: // Prefetch to L1 cache case NN_sfence: // Store Fence // Cache prefetch and store fence opcodes are no-ops to us. NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; // Pentium III Pseudo instructions case NN_cmpeqps: // Packed Single-FP Compare EQ case NN_cmpltps: // Packed Single-FP Compare LT case NN_cmpleps: // Packed Single-FP Compare LE case NN_cmpunordps: // Packed Single-FP Compare UNORD case NN_cmpneqps: // Packed Single-FP Compare NOT EQ case NN_cmpnltps: // Packed Single-FP Compare NOT LT case NN_cmpnleps: // Packed Single-FP Compare NOT LE case NN_cmpordps: // Packed Single-FP Compare ORDERED case NN_cmpeqss: // Scalar Single-FP Compare EQ case NN_cmpltss: // Scalar Single-FP Compare LT case NN_cmpless: // Scalar Single-FP Compare LE case NN_cmpunordss: // Scalar Single-FP Compare UNORD case NN_cmpneqss: // Scalar Single-FP Compare NOT EQ case NN_cmpnltss: // Scalar Single-FP Compare NOT LT case NN_cmpnless: // Scalar Single-FP Compare NOT LE case NN_cmpordss: // Scalar Single-FP Compare ORDERED return false; break; // AMD K7 instructions case NN_pf2iw: // Packed Floating-Point to Integer with Sign Extend case NN_pfnacc: // Packed Floating-Point Negative Accumulate case NN_pfpnacc: // Packed Floating-Point Mixed Positive-Negative Accumulate case NN_pi2fw: // Packed 16-bit Integer to Floating-Point case NN_pswapd: // Packed Swap Double Word return false; break; // Undocumented FP instructions (thanks to norbert.juffa@adm.com) case NN_fstp1: // Alias of Store Real and Pop case NN_fcom2: // Alias of Compare Real case NN_fcomp3: // Alias of Compare Real and Pop case NN_fxch4: // Alias of Exchange Registers case NN_fcomp5: // Alias of Compare Real and Pop case NN_ffreep: // Free Register and Pop case NN_fxch7: // Alias of Exchange Registers case NN_fstp8: // Alias of Store Real and Pop case NN_fstp9: // Alias of Store Real and Pop return false; break; // Pentium 4 instructions case NN_addpd: // Add Packed Double-Precision Floating-Point Values case NN_addsd: // Add Scalar Double-Precision Floating-Point Values case NN_andnpd: // Bitwise Logical AND NOT of Packed Double-Precision Floating-Point Values case NN_andpd: // Bitwise Logical AND of Packed Double-Precision Floating-Point Values return this->BuildBinaryRTL(SMP_BINARY_FLOATING_ARITHMETIC); break; case NN_clflush: // Flush Cache Line case NN_cmppd: // Compare Packed Double-Precision Floating-Point Values case NN_cmpsd: // Compare Scalar Double-Precision Floating-Point Values case NN_comisd: // Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS return false; break; case NN_cvtdq2pd: // Convert Packed Doubleword Integers to Packed Single-Precision Floating-Point Values case NN_cvtdq2ps: // Convert Packed Doubleword Integers to Packed Double-Precision Floating-Point Values case NN_cvtpd2dq: // Convert Packed Double-Precision Floating-Point Values to Packed Doubleword Integers case NN_cvtpd2pi: // Convert Packed Double-Precision Floating-Point Values to Packed Doubleword Integers case NN_cvtpd2ps: // Convert Packed Double-Precision Floating-Point Values to Packed Single-Precision Floating-Point Values case NN_cvtpi2pd: // Convert Packed Doubleword Integers to Packed Double-Precision Floating-Point Values case NN_cvtps2dq: // Convert Packed Single-Precision Floating-Point Values to Packed Doubleword Integers case NN_cvtps2pd: // Convert Packed Single-Precision Floating-Point Values to Packed Double-Precision Floating-Point Values case NN_cvtsd2si: // Convert Scalar Double-Precision Floating-Point Value to Doubleword Integer case NN_cvtsd2ss: // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value case NN_cvtsi2sd: // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value case NN_cvtss2sd: // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value case NN_cvttpd2dq: // Convert With Truncation Packed Double-Precision Floating-Point Values to Packed Doubleword Integers case NN_cvttpd2pi: // Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Doubleword Integers case NN_cvttps2dq: // Convert With Truncation Packed Single-Precision Floating-Point Values to Packed Doubleword Integers case NN_cvttsd2si: // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer return false; break; case NN_divpd: // Divide Packed Double-Precision Floating-Point Values case NN_divsd: // Divide Scalar Double-Precision Floating-Point Values return this->BuildBinaryRTL(SMP_BINARY_FLOATING_ARITHMETIC); break; case NN_lfence: // Load Fence NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; case NN_maskmovdqu: // Store Selected Bytes of Double Quadword return false; break; case NN_maxpd: // Return Maximum Packed Double-Precision Floating-Point Values case NN_maxsd: // Return Maximum Scalar Double-Precision Floating-Point Value return this->BuildBinaryRTL(SMP_MAX_S); break; case NN_mfence: // Memory Fence NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; case NN_minpd: // Return Minimum Packed Double-Precision Floating-Point Values case NN_minsd: // Return Minimum Scalar Double-Precision Floating-Point Value return this->BuildBinaryRTL(SMP_MIN_S); break; case NN_movapd: // Move Aligned Packed Double-Precision Floating-Point Values case NN_movdq2q: // Move Quadword from XMM to MMX Register case NN_movdqa: // Move Aligned Double Quadword case NN_movdqu: // Move Unaligned Double Quadword case NN_movhpd: // Move High Packed Double-Precision Floating-Point Values case NN_movlpd: // Move Low Packed Double-Precision Floating-Point Values return this->BuildMoveRTL(SMP_NULL_OPERATOR); break; case NN_movmskpd: // Extract Packed Double-Precision Floating-Point Sign Mask return false; break; case NN_movntdq: // Store Double Quadword Using Non-Temporal Hint case NN_movnti: // Store Doubleword Using Non-Temporal Hint case NN_movntpd: // Store Packed Double-Precision Floating-Point Values Using Non-Temporal Hint case NN_movq2dq: // Move Quadword from MMX to XMM Register case NN_movsd: // Move Scalar Double-Precision Floating-Point Values case NN_movupd: // Move Unaligned Packed Double-Precision Floating-Point Values return this->BuildMoveRTL(SMP_NULL_OPERATOR); break; case NN_mulpd: // Multiply Packed Double-Precision Floating-Point Values case NN_mulsd: // Multiply Scalar Double-Precision Floating-Point Values case NN_orpd: // Bitwise Logical OR of Double-Precision Floating-Point Values return this->BuildBinaryRTL(SMP_BINARY_FLOATING_ARITHMETIC); break; case NN_paddq: // Add Packed Quadword Integers return false; break; case NN_pause: // Spin Loop Hint NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; case NN_pmuludq: // Multiply Packed Unsigned Doubleword Integers return false; break; case NN_pshufd: // Shuffle Packed Doublewords case NN_pshufhw: // Shuffle Packed High Words case NN_pshuflw: // Shuffle Packed Low Words return this->BuildBinaryRTL(SMP_SHUFFLE); break; case NN_pslldq: // Shift Double Quadword Left Logical return this->BuildBinaryRTL(SMP_U_LEFT_SHIFT); break; case NN_psrldq: // Shift Double Quadword Right Logical return this->BuildBinaryRTL(SMP_U_RIGHT_SHIFT); break; case NN_psubq: // Subtract Packed Quadword Integers return this->BuildBinaryRTL(SMP_SUBTRACT); break; case NN_punpckhqdq: // Unpack High Data case NN_punpcklqdq: // Unpack Low Data return this->BuildBinaryRTL(SMP_INTERLEAVE); break; case NN_shufpd: // Shuffle Packed Double-Precision Floating-Point Values return this->BuildBinaryRTL(SMP_SHUFFLE); break; case NN_sqrtpd: // Compute Square Roots of Packed Double-Precision Floating-Point Values case NN_sqrtsd: // Compute Square Rootof Scalar Double-Precision Floating-Point Value return this->BuildUnary2OpndRTL(SMP_UNARY_FLOATING_ARITHMETIC); break; case NN_subpd: // Subtract Packed Double-Precision Floating-Point Values case NN_subsd: // Subtract Scalar Double-Precision Floating-Point Values return this->BuildBinaryRTL(SMP_BINARY_FLOATING_ARITHMETIC); break; case NN_ucomisd: // Unordered Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS return false; break; case NN_unpckhpd: // Unpack and Interleave High Packed Double-Precision Floating-Point Values case NN_unpcklpd: // Unpack and Interleave Low Packed Double-Precision Floating-Point Values return this->BuildBinaryRTL(SMP_INTERLEAVE); break; case NN_xorpd: // Bitwise Logical OR of Double-Precision Floating-Point Values return this->BuildBinaryRTL(SMP_BINARY_FLOATING_ARITHMETIC); break; // AMD syscall/sysret instructions case NN_syscall: // Low latency system call case NN_sysret: // Return from system call // AMD64 instructions case NN_swapgs: // Exchange GS base with KernelGSBase MSR // New Pentium instructions (SSE3) case NN_movddup: // Move One Double-FP and Duplicate case NN_movshdup: // Move Packed Single-FP High and Duplicate case NN_movsldup: // Move Packed Single-FP Low and Duplicate return false; break; // Missing AMD64 instructions case NN_movsxd: // Move with Sign-Extend Doubleword case NN_cmpxchg16b: // Compare and Exchange 16 Bytes return false; break; // SSE3 instructions case NN_addsubpd: // Add /Sub packed DP FP numbers case NN_addsubps: // Add /Sub packed SP FP numbers case NN_haddpd: // Add horizontally packed DP FP numbers case NN_haddps: // Add horizontally packed SP FP numbers case NN_hsubpd: // Sub horizontally packed DP FP numbers case NN_hsubps: // Sub horizontally packed SP FP numbers case NN_monitor: // Set up a linear address range to be monitored by hardware case NN_mwait: // Wait until write-back store performed within the range specified by the MONITOR instruction case NN_fisttp: // Store ST in intXX (chop) and pop case NN_lddqu: // Load unaligned integer 128-bit return false; break; // SSSE3 instructions case NN_psignb: // Packed SIGN Byte case NN_psignw: // Packed SIGN Word case NN_psignd: // Packed SIGN Doubleword case NN_pshufb: // Packed Shuffle Bytes return this->BuildBinaryRTL(SMP_SHUFFLE); break; case NN_pmulhrsw: // Packed Multiply High with Round and Scale case NN_pmaddubsw: // Multiply and Add Packed Signed and Unsigned Bytes case NN_phsubsw: // Packed Horizontal Subtract and Saturate case NN_phaddsw: // Packed Horizontal Add and Saturate case NN_phaddw: // Packed Horizontal Add Word case NN_phaddd: // Packed Horizontal Add Doubleword case NN_phsubw: // Packed Horizontal Subtract Word case NN_phsubd: // Packed Horizontal Subtract Doubleword return false; break; case NN_palignr: // Packed Align Right return this->BuildPackShiftRTL(SMP_CONCATENATE, SMP_REVERSE_SHIFT_U); break; case NN_pabsb: // Packed Absolute Value Byte case NN_pabsw: // Packed Absolute Value Word case NN_pabsd: // Packed Absolute Value Doubleword return false; break; // VMX instructions case NN_vmcall: // Call to VM Monitor case NN_vmclear: // Clear Virtual Machine Control Structure case NN_vmlaunch: // Launch Virtual Machine case NN_vmresume: // Resume Virtual Machine case NN_vmptrld: // Load Pointer to Virtual Machine Control Structure case NN_vmptrst: // Store Pointer to Virtual Machine Control Structure case NN_vmread: // Read Field from Virtual Machine Control Structure case NN_vmwrite: // Write Field from Virtual Machine Control Structure case NN_vmxoff: // Leave VMX Operation case NN_vmxon: // Enter VMX Operation return false; break; default: SMP_msg("ERROR: Unknown instruction opcode at %x : %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr())); break; } // end switch on opcode return true; } // end SMPInstr::BuildRTL() // Iterate through all reg transfers and call SyncRTLDefUse for each. void SMPInstr::SyncAllRTs(void) { for (size_t index = 0; index < this->RTL.GetCount(); ++index) { this->SyncRTLDefUse(this->RTL.GetRT(index)); } return; } // end of SMPInstr:SyncAllRTs() // Ensure that each operand of the RTL is found in the appropriate DEF or USE list. void SMPInstr::SyncRTLDefUse(SMPRegTransfer *CurrRT) { // The Guard expression and ExtraKills are almost never represented in the DEF and USE // lists. When they are, they are added in MDFixupDefUseLists(), so we ignore them here. // The only DEFs should come from left operands of SMP_ASSIGN operators, i.e. the effects // of register transfers. op_t LeftOp, RightOp; set<DefOrUse, LessDefUse>::iterator CurrDef, CurrUse; bool DebugFlag = false; #if SMP_VERBOSE_DEBUG_BUILD_RTL DebugFlag |= (0 == strcmp("__libc_csu_fini", this->BasicBlock->GetFunc()->GetFuncName())); #endif if (DebugFlag) { SMP_msg("SyncRTLDefUse entered. Dump of USE list:\n"); this->Uses.Dump(); } LeftOp = CurrRT->GetLeftOperand(); if (SMP_ASSIGN == CurrRT->GetOperator()) { assert(o_void != LeftOp.type); assert(o_imm != LeftOp.type); CurrDef = this->Defs.FindRef(LeftOp); if (CurrDef == this->GetLastDef() && !LeftOp.is_reg(R_ip)) { #if SMP_VERBOSE_DEBUG_BUILD_RTL SMP_msg("WARNING: DEF not found for SMP_ASSIGN in %s ; added op:", DisAsmText.GetDisAsm(this->GetAddr())); PrintOperand(LeftOp); SMP_msg("\n"); #endif this->Defs.SetRef(LeftOp, CurrRT->GetOperatorType()); } } else { // not SMP_ASSIGN; left operand should be a USE if (o_void != LeftOp.type) { CurrUse = this->Uses.FindRef(LeftOp); if (CurrUse == this->GetLastUse()) { #if SMP_VERBOSE_DEBUG_BUILD_RTL_DEF_USE SMP_msg("WARNING: USE not found for "); PrintOperand(LeftOp); SMP_msg(" in %s ; added\n", DisAsmText.GetDisAsm(this->GetAddr())); #endif this->Uses.SetRef(LeftOp); } } } if (!CurrRT->HasRightSubTree()) { RightOp = CurrRT->GetRightOperand(); // right operand should be a USE if (o_void != RightOp.type) { CurrUse = this->Uses.FindRef(RightOp); if (CurrUse == this->GetLastUse()) { #if SMP_VERBOSE_DEBUG_BUILD_RTL_DEF_USE SMP_msg("WARNING: USE not found for "); PrintOperand(RightOp); SMP_msg(" in %s ; added\n", DisAsmText.GetDisAsm(this->GetAddr())); #endif this->Uses.SetRef(RightOp); } } } else { // recurse into right subtree this->SyncRTLDefUse(CurrRT->GetRightTree()); } return; } // end of SMPInstr::SyncRTLDefUse() // SetOperatorType - set the type of the operator, take into account the speculative (profiler) status void SMPRegTransfer::SetOperatorType(SMPOperandType OpType, const SMPInstr* Instr) { SMPOperandType OldType = RTop.type; SMPOperandType NewType = OpType; if (Instr->GetBlock()->GetFunc()->GetIsSpeculative()) { NewType = (SMPOperandType) (((int)NewType) | PROF_BASE); #if SMP_TRACK_NONSPEC_OPER_TYPE if (!IsProfDerived(OldType)) RTop.NonSpeculativeType = OldType; #endif } RTop.type = NewType; } // end of SMPRegTransfer::SetOperatorType // Update the memory source operands to have the new type void SMPInstr::UpdateMemLoadTypes(SMPOperandType newType) { bool MemSrc = false; op_t Opnd; for (int i = 0; i < UA_MAXOP; ++i) { Opnd = this->SMPcmd.Operands[i]; optype_t CurrType = Opnd.type; if (this->features & UseMacros[i]) { // USE MemSrc = ((CurrType == o_mem) || (CurrType == o_phrase) || (CurrType == o_displ)); if (MemSrc) { set<DefOrUse, LessDefUse>::iterator use = this->FindUse(Opnd); SMPOperandType type = use->GetType(); assert(newType == (NUMERIC|PROF_BASE)); switch (type) { case UNINIT: case CODEPTR: this->SetUseType(Opnd,newType); break; case POINTER: this->SetUseType(Opnd, (SMPOperandType)(UNKNOWN|PROF_BASE)); break; default: break; } } } } return ; } // end of SMPInstr::UpdateMemLoadTypes() // Return true if we have register DefOp += ImmOp. bool SMPInstr::MDIsAddImmediateToReg(op_t &DefOp, op_t &ImmOp) { bool FoundAddImmed = false; bool FoundImmed = false; bool FoundRegUse = false; if (NN_add == this->SMPcmd.itype) { set<DefOrUse, LessDefUse>::iterator UseIter = this->GetFirstUse(); while (UseIter != this->GetLastUse()) { op_t UseOp = UseIter->GetOp(); if (o_imm == UseOp.type) { ImmOp = UseOp; FoundImmed = true; } else if (o_reg == UseOp.type) { set<DefOrUse, LessDefUse>::iterator DefIter = this->GetFirstNonFlagsDef(); op_t TempDefOp = DefIter->GetOp(); if (o_reg != TempDefOp.type) { return false; } if (MDLessReg(UseOp.reg, TempDefOp.reg) || MDLessReg(TempDefOp.reg, UseOp.reg)) { return false; } // If we make it here, we have the same register DEFed as we found USEd. DefOp = TempDefOp; FoundRegUse = true; } ++UseIter; } FoundAddImmed = (FoundImmed && FoundRegUse); } return FoundAddImmed; } // end of SMPInstr::MDIsAddImmediateToReg()