/* * SMPInstr.cpp - <see below>. * * Copyright (c) 2000, 2001, 2010 - University of Virginia * * This file is part of the Memory Error Detection System (MEDS) infrastructure. * This file may be used and modified for non-commercial purposes as long as * all copyright, permission, and nonwarranty notices are preserved. * Redistribution is prohibited without prior written consent from the University * of Virginia. * * Please contact the authors for restrictions applying to commercial use. * * THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. * * Author: University of Virginia * e-mail: jwd@virginia.com * URL : http://www.cs.virginia.edu/ * * Additional copyrights 2010, 2011 by Zephyr Software LLC * e-mail: {clc,jwd}@zephyr-software.com * URL : http://www.zephyr-software.com/ * */ // // SMPInstr.cpp // // This module performs the instruction level analyses needed for the // SMP project (Software Memory Protection). // #include <cstring> #include <pro.h> #include <assert.h> #include <ida.hpp> #include <idp.hpp> #include <allins.hpp> #include <auto.hpp> #include <bytes.hpp> #include <funcs.hpp> #include <intel.hpp> #include <loader.hpp> #include <lines.hpp> #include <name.hpp> #include "SMPStaticAnalyzer.h" #include "SMPDataFlowAnalysis.h" #include "SMPInstr.h" #include "SMPProgram.h" #include "ProfilerInformation.h" // Set to 1 for debugging output #define SMP_DEBUG 1 #define SMP_DEBUG2 0 // verbose #define SMP_DEBUG_XOR 0 #define SMP_DEBUG_BUILD_RTL 1 // should be left on, serious errors! #define SMP_VERBOSE_DEBUG_BUILD_RTL 0 #define SMP_VERBOSE_DEBUG_BUILD_RTL_DEF_USE 0 #define SMP_VERBOSE_DEBUG_INFER_TYPES 0 #define SMP_VERBOSE_DUMP 0 #define SMP_VERBOSE_FIND_POINTERS 0 #define SMP_CALL_TRASHES_REGS 1 // Add DEFs of caller-saved regs to CALL instructions #define SMP_BASEREG_POINTER_TYPE 1 // Initialize Base Register USEs to type POINTER? #define SMP_OPTIMIZE_ADD_TO_NUMERIC 0 // optimizing annotation type -5 #define SMP_IDENTIFY_POINTER_ADDRESS_REG 0 // optimizing annotation POINTER #define SMP_CHILDACCESS_ALL_CODE 0 // CHILDACCESS annotations for all funcs, or just analyzed funcs? #define SPECIAL_CASE_CARRY_BORROW 0 // Treat sbb/adc different from sub/add annotations? #define SMP_BUILD_SPECIAL_ADC_SBB_RTL 0 // Explicit RTL subtree for carry flag? // Make the CF_CHG1 .. CF_CHG6 and CF_USE1..CF_USE6 macros more usable // by allowing us to pick them up with an array index. static ulong DefMacros[UA_MAXOP] = {CF_CHG1, CF_CHG2, CF_CHG3, CF_CHG4, CF_CHG5, CF_CHG6}; static ulong UseMacros[UA_MAXOP] = {CF_USE1, CF_USE2, CF_USE3, CF_USE4, CF_USE5, CF_USE6}; // Text to be printed in each optimizing annotation explaining why // the annotation was emitted. static const char *OptExplanation[LAST_TYPE_CATEGORY + 1] = { "NoOpt", "NoMetaUpdate", "AlwaysNUM", "NUMVia2ndSrcIMMEDNUM", "Always1stSrc", "1stSrcVia2ndSrcIMMEDNUM", "AlwaysPtr", "AlwaysNUM", "AlwaysNUM", "NUMViaFPRegDest", "NumericSources", "StackMemoryTracking", "NumericSources", "NumericMemDest", "NeverMemDest", "SafeIfNoIndexing" }; static const char *OperatorText[LAST_SMP_OPERATOR + 1] = { "SMP_NULL_OPERATOR", "SMP_CALL", "SMP_INPUT", "SMP_OUTPUT", "SMP_ADDRESS_OF", "SMP_U_LEFT_SHIFT", "SMP_S_LEFT_SHIFT", "SMP_U_RIGHT_SHIFT", "SMP_S_RIGHT_SHIFT", "SMP_ROTATE_LEFT", "SMP_ROTATE_LEFT_CARRY", "SMP_ROTATE_RIGHT", "SMP_ROTATE_RIGHT_CARRY", "SMP_DECREMENT", "SMP_INCREMENT", "SMP_ADD", "SMP_ADD_CARRY", "SMP_SUBTRACT", "SMP_SUBTRACT_BORROW", "SMP_U_MULTIPLY", "SMP_S_MULTIPLY", "SMP_U_DIVIDE", "SMP_S_DIVIDE", "SMP_U_REMAINDER", "SMP_SIGN_EXTEND", "SMP_ZERO_EXTEND", "SMP_ASSIGN", "SMP_BITWISE_AND", "SMP_BITWISE_OR", "SMP_BITWISE_NOT", "SMP_BITWISE_XOR", "SMP_BITWISE_AND_NOT", "SMP_NEGATE", "SMP_S_COMPARE", "SMP_U_COMPARE", "SMP_LESS_THAN", "SMP_GREATER_THAN", "SMP_LESS_EQUAL", "SMP_GREATER_EQUAL", "SMP_EQUAL", "SMP_NOT_EQUAL", "SMP_LOGICAL_AND", "SMP_LOGICAL_OR", "SMP_UNARY_NUMERIC_OPERATION", "SMP_BINARY_NUMERIC_OPERATION", "SMP_SYSTEM_OPERATION", "SMP_UNARY_FLOATING_ARITHMETIC", "SMP_BINARY_FLOATING_ARITHMETIC", "SMP_REVERSE_SHIFT_U", "SMP_SHUFFLE", "SMP_COMPARE_EQ_AND_SET", "SMP_COMPARE_GT_AND_SET", "SMP_INTERLEAVE", "SMP_CONCATENATE" }; // Does the CurrOperator definitely indicate a signed or unsigned operation? bool OperatorHasSignedness(SMPoperator CurrOperator) { bool DetectedSignedness; switch (CurrOperator) { case SMP_NULL_OPERATOR: DetectedSignedness = false; break; case SMP_CALL: // CALL instruction DetectedSignedness = true; break; case SMP_INPUT: // input from port case SMP_OUTPUT: // output to port case SMP_ADDRESS_OF: // take effective address DetectedSignedness = false; break; case SMP_U_LEFT_SHIFT: // unsigned left shift case SMP_U_RIGHT_SHIFT: // unsigned right shift case SMP_ROTATE_LEFT: case SMP_ROTATE_LEFT_CARRY: // rotate left through carry case SMP_ROTATE_RIGHT: case SMP_ROTATE_RIGHT_CARRY: // rotate right through carry case SMP_U_MULTIPLY: case SMP_U_DIVIDE: case SMP_U_REMAINDER: case SMP_ZERO_EXTEND: case SMP_BITWISE_NOT: // unary operator case SMP_BITWISE_XOR: case SMP_BITWISE_AND_NOT: case SMP_U_COMPARE: // unsigned compare (AND-based) DetectedSignedness = true; break; case SMP_S_LEFT_SHIFT: // signed left shift case SMP_S_RIGHT_SHIFT: // signed right shift case SMP_S_MULTIPLY: case SMP_S_DIVIDE: case SMP_SIGN_EXTEND: case SMP_NEGATE: // unary negation case SMP_S_COMPARE: // signed compare (subtraction-based) case SMP_LESS_THAN: // boolean test operators case SMP_GREATER_THAN: case SMP_LESS_EQUAL: case SMP_GREATER_EQUAL: DetectedSignedness = true; break; case SMP_DECREMENT: case SMP_INCREMENT: case SMP_ADD: case SMP_ADD_CARRY: // add with carry case SMP_SUBTRACT: case SMP_SUBTRACT_BORROW: // subtract with borrow case SMP_ASSIGN: case SMP_BITWISE_AND: case SMP_BITWISE_OR: case SMP_EQUAL: case SMP_NOT_EQUAL: case SMP_LOGICAL_AND: case SMP_LOGICAL_OR: case SMP_UNARY_NUMERIC_OPERATION: // miscellaneous; produces NUMERIC result case SMP_BINARY_NUMERIC_OPERATION: // miscellaneous; produces NUMERIC result case SMP_SYSTEM_OPERATION: // for instructions such as CPUID, RDTSC, etc.; NUMERIC DetectedSignedness = false; break; case SMP_UNARY_FLOATING_ARITHMETIC: // all the same to our type system; all NUMERIC case SMP_BINARY_FLOATING_ARITHMETIC: // all the same to our type system; all NUMERIC DetectedSignedness = true; break; case SMP_REVERSE_SHIFT_U: // Shift right operand by bit count in left operand case SMP_SHUFFLE: // Shuffle bytes, words, etc. within destination operation per source mask case SMP_COMPARE_EQ_AND_SET: // Compare for equality and set fields to all 1's or all 0's case SMP_COMPARE_GT_AND_SET: // Compare for greater-than and set fields to all 1's or all 0's case SMP_INTERLEAVE: // extended-precision interleaving of bytes or words or dwords etc.; NUMERIC case SMP_CONCATENATE: // extended-precision concatenation; NUMERIC DetectedSignedness = true; break; default: DetectedSignedness = false; msg("ERROR: Unknown operator in OperatorHasSignedness: %d\n", CurrOperator); break; } // end switch on operator return DetectedSignedness; } // end of OperatorHasSignedness() // ***************************************************************** // Class SMPGuard // ***************************************************************** // Constructor SMPGuard::SMPGuard(void) { this->LeftOperand.type = o_void; this->RightOperand.type = o_void; this->GuardOp = SMP_NULL_OPERATOR; return; } // Debug print void SMPGuard::Dump(void) { msg("GUARD: "); PrintOperand(this->LeftOperand); msg(" %s ", OperatorText[this->GuardOp]); PrintOperand(this->RightOperand); msg(":"); return; } // end of SMPGuard::Dump() // ***************************************************************** // Class SMPRegTransfer // ***************************************************************** // Constructor SMPRegTransfer::SMPRegTransfer(void) { this->Guard = NULL; this->LeftOperand.type = o_void; this->RightOperand.type = o_void; this->RTop.oper = SMP_NULL_OPERATOR; this->RTop.type = UNINIT; this->RTop.NonSpeculativeType = UNINIT; this->RightSubTree = false; this->RightRT = NULL; return; } // Destructor SMPRegTransfer::~SMPRegTransfer() { #if 0 msg("Destroying SMPRegTransfer.\n"); #endif if (NULL != this->RightRT) delete this->RightRT; if (NULL != this->Guard) delete this->Guard; return; } // Debug print void SMPRegTransfer::Dump(void) { if (NULL != this->Guard) this->Guard->Dump(); // Left operand if (o_void != this->LeftOperand.type) PrintOperand(this->LeftOperand); // Then the operator msg(" %s ", OperatorText[this->GetOperator()]); // then the right operand or subtree if (this->HasRightSubTree()) this->GetRightTree()->Dump(); else if (o_void != this->RightOperand.type) PrintOperand(this->RightOperand); return; } // ***************************************************************** // Class SMPRTL // ***************************************************************** // Constructor SMPRTL::SMPRTL() { this->ExtraKills.clear(); this->RTCount = 0; return; } // Destructor SMPRTL::~SMPRTL() { for (size_t index = 0; index < this->RTCount; ++index) { delete (this->RTvector[index]); } this->ExtraKills.clear(); return; } // Get methods SMPRegTransfer *SMPRTL::GetRT(size_t index) { if (index > this->RTCount) return NULL; else return this->RTvector[index]; } // Set methods void SMPRTL::push_back(SMPRegTransfer *NewEffect) { assert(SMP_RT_LIMIT > this->RTCount); this->RTvector[this->RTCount] = NewEffect; ++(this->RTCount); return; } // Printing methods void SMPRTL::Dump(void) { size_t index; if (0 < this->RTCount) { msg("RTL: "); for (index = 0; index < this->RTCount; ++index) { this->RTvector[index]->Dump(); } for (index = 0; index < this->ExtraKills.size(); ++index) { msg(" KILL: "); PrintOperand(this->ExtraKills.at(index)); } msg("\n"); } return; } // end of SMPRTL::Dump() // ***************************************************************** // Class SMPInstr // ***************************************************************** // Constructor for instruction. SMPInstr::SMPInstr(ea_t addr) { this->SMPcmd.size = 0; this->address = addr; this->analyzed = false; this->JumpTarget = false; this->Nop = false; this->BlockTerm = false; this->TailCall = false; this->CondTailCall = false; this->CallUsedAsJump = false; this->DirectRecursiveCall = false; this->Interrupt = false; this->RegClearIdiom = false; this->DeadRegsString[0] = '\0'; this->DefsFlags = false; this->UsesFlags = false; this->FarBranchComputed = false; this->BranchesToFarChunk = false; this->CallTarget = BADADDR; this->AddSubSourceType = UNINIT; this->AddSubUseType = UNINIT; this->AddSubSourceOp = InitOp; this->AddSubUseOp = InitOp; this->DestMemOp = InitOp; this->SrcMemOp = InitOp; this->DEFMemOp = InitOp; this->USEMemOp = InitOp; this->MoveSource = InitOp; this->IndirectMemRead = false; this->IndirectMemWrite = false; this->LoadFromStack = false; this->MultiplicationBitsDiscarded = false; this->TypeInferenceComplete = false; this->CategoryInferenceComplete = false; this->BasicBlock = NULL; this->features = 0; this->disasm[0] = '\0'; this->type = DEFAULT; this->OptType = 0; this->Defs.clear(); this->Uses.clear(); return; } // Destructor. SMPInstr::~SMPInstr() { this->Defs.clear(); this->Uses.clear(); return; } // Is the instruction the type that terminates a basic block? bool SMPInstr::IsBasicBlockTerminator() const { return ((type == JUMP) || (type == COND_BRANCH) || (type == INDIR_JUMP) || (type == RETURN)); } // Get non-flags DEF, usually for arithmetic opcode. set<DefOrUse, LessDefUse>::iterator SMPInstr::GetFirstNonFlagsDef(void) { set<DefOrUse, LessDefUse>::iterator DefIter; op_t DefOp; for (DefIter = this->GetFirstDef(); DefIter != this->GetLastDef(); ++DefIter) { DefOp = DefIter->GetOp(); if (!((o_reg == DefOp.type) && DefOp.is_reg(MD_FLAGS_REG))) break; // found a non-flags-reg DEF. } return DefIter; } // Is the destination operand a memory reference? bool SMPInstr::HasDestMemoryOperand(void) { bool MemDest = false; op_t Opnd; for (int i = 0; i < UA_MAXOP; ++i) { Opnd = this->SMPcmd.Operands[i]; optype_t CurrType = Opnd.type; if (this->features & DefMacros[i]) { // DEF MemDest = ((CurrType == o_mem) || (CurrType == o_phrase) || (CurrType == o_displ)); if (MemDest) { this->DestMemOp = Opnd; break; } } } return MemDest; } // end of SMPInstr::HasDestMemoryOperand() // Is a source operand a memory reference? bool SMPInstr::HasSourceMemoryOperand(void) { bool MemSrc = false; op_t Opnd; // NN_lea looks like it has a memory source, but it does not. if (NN_lea == this->SMPcmd.itype) return false; for (int i = 0; i < UA_MAXOP; ++i) { Opnd = this->SMPcmd.Operands[i]; optype_t CurrType = Opnd.type; if (this->features & UseMacros[i]) { // USE MemSrc = ((CurrType == o_mem) || (CurrType == o_phrase) || (CurrType == o_displ)); if (MemSrc) break; } } return MemSrc; } // end of SMPInstr::HasSourceMemoryOperand() // Get the first memory operand in the DEF list. op_t SMPInstr::MDGetMemDefOp(void) { set<DefOrUse, LessDefUse>::iterator DefIter; if (this->DEFMemOp.type != o_void) return this->DEFMemOp; // cached value op_t MemOp = InitOp; for (DefIter = this->GetFirstDef(); DefIter != this->GetLastDef(); ++DefIter) { optype_t DefType = DefIter->GetOp().type; if ((DefType >= o_mem) && (DefType <= o_displ)) { MemOp = DefIter->GetOp(); break; } } this->DEFMemOp = MemOp; // cache the value return MemOp; } // end of SMPInstr::MDGetMemDefOp() // Get the first memory operand in the USE list. op_t SMPInstr::MDGetMemUseOp(void) { set<DefOrUse, LessDefUse>::iterator UseIter; if (this->USEMemOp.type != o_void) return this->USEMemOp; // cached value op_t MemOp = InitOp; for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) { optype_t UseType = UseIter->GetOp().type; if ((UseType >= o_mem) && (UseType <= o_displ)) { MemOp = UseIter->GetOp(); break; } } this->USEMemOp = MemOp; // cache the value return MemOp; } // end of SMPInstr::MDGetMemUseOp() // Detect indirect memory DEFs or USEs void SMPInstr::AnalyzeIndirectRefs(bool UseFP) { op_t DefMemOp = this->MDGetMemDefOp(); op_t UseMemOp = this->MDGetMemUseOp(); if (o_void != DefMemOp.type) { // Found a memory DEF. Is it indirect? if (MDIsIndirectMemoryOpnd(DefMemOp, UseFP)) { this->IndirectMemWrite = true; } } if (o_void != UseMemOp.type) { // Found a memory USE. Is it indirect? if (MDIsIndirectMemoryOpnd(UseMemOp, UseFP)) { this->IndirectMemRead = true; } } return; } // end of SMPInstr::AnalyzeIndirectRefs() set<DefOrUse, LessDefUse>::iterator SMPInstr::GetPointerAddressReg(op_t MemOp) { int BaseReg; int IndexReg; ushort ScaleFactor; ea_t displacement; set<DefOrUse, LessDefUse>::iterator PtrIter; if ((NULL == this->BasicBlock) || (NULL == this->BasicBlock->GetFunc())) { msg("ERROR: NULL member pointers in SMPInstr::GetPointerAddressReg() at %x \n", this->address); return this->GetLastUse(); } bool UseFP = this->BasicBlock->GetFunc()->UsesFramePointer(); MDExtractAddressFields(MemOp, BaseReg, IndexReg, ScaleFactor, displacement); if ((R_none != BaseReg) && (R_sp != BaseReg) && (!(UseFP && (R_bp == BaseReg)))) { op_t BaseOp = InitOp; BaseOp.type = o_reg; BaseOp.reg = (ushort) BaseReg; PtrIter = this->FindUse(BaseOp); assert(PtrIter != this->GetLastUse()); if (IsDataPtr(PtrIter->GetType())) { return PtrIter; } } if ((R_none != IndexReg) && (R_sp != IndexReg) && (!(UseFP && (R_bp == IndexReg)))) { op_t IndexOp = InitOp; IndexOp.type = o_reg; IndexOp.reg = (ushort) IndexReg; PtrIter = this->FindUse(IndexOp); assert(PtrIter != this->GetLastUse()); if (IsDataPtr(PtrIter->GetType())) { return PtrIter; } } PtrIter = this->GetLastUse(); return PtrIter; } // end of SMPInstr::GetPointerAddressReg() // Does the instruction whose flags are in F have a numeric type // as the second source operand? // NOTE: We can only analyze immediate values now. When data flow analyses are implemented, // we will be able to analyze many non-immediate operands. #define IMMEDNUM_LOWER -8191 #define IMMEDNUM_UPPER 8191 bool SMPInstr::IsSecondSrcOperandNumeric(flags_t F) const { bool SecondOpImm = (this->SMPcmd.Operands[1].type == o_imm); uval_t TempImm; if (SecondOpImm) { TempImm = this->SMPcmd.Operands[1].value; } return (SecondOpImm && IsImmedNumeric(TempImm)); } // end of SMPInstr::IsSecondSrcOperandNumeric() // Determine the type of the USE-only operand for add and subtract // instructions. If it is NUMERIC or PROF_NUMERIC, an optimizing // annotation will result. // As a byproduct, find the type of the USE/DEF operand as well. void SMPInstr::SetAddSubSourceType(void) { // Walk the RTL and find the operands we care about. // The RTL should look like: opnd1 := (opnd1 op opnd2), where op is // and add or subtract operator. Within the parentheses, the type // of opnd1 is our AddSubUseType and opnd1 is our AddSubUseOp, while // the type of opnd2 is our AddSubSourceType. if (this->RTL.GetCount() < 1) return; // no RTL, no leave types as UNINIT. assert(this->RTL.GetRT(0)->HasRightSubTree()); SMPRegTransfer *RightTree = this->RTL.GetRT(0)->GetRightTree(); op_t LeftOp, RightOp; LeftOp = RightTree->GetLeftOperand(); // Use (also DEF) operand #if SMP_BUILD_SPECIAL_ADC_SBB_RTL if ((NN_adc != this->SMPcmd.itype) && (NN_sbb != this->SMPcmd.itype)) { assert(!(RightTree->HasRightSubTree())); RightOp = RightTree->GetRightOperand(); // Src (non-DEF) operand } else { // Add with carry and subtract with borrow have an extra level // to the tree RTL, e.g. for add with carry: // opnd1 := (opnd1 + (opnd2 + carryflag)) assert(RightTree->HasRightSubTree()); RightTree = RightTree->GetRightTree(); RightOp = RightTree->GetLeftOperand(); } #else assert(!(RightTree->HasRightSubTree())); RightOp = RightTree->GetRightOperand(); // Src (non-DEF) operand #endif set<DefOrUse, LessDefUse>::iterator UseIter, SrcIter; SrcIter = this->FindUse(RightOp); assert(SrcIter != this->GetLastUse()); this->AddSubSourceType = SrcIter->GetType(); this->AddSubSourceOp = RightOp; UseIter = this->FindUse(LeftOp); assert(UseIter != this->GetLastUse()); this->AddSubUseType = UseIter->GetType(); this->AddSubUseOp = LeftOp; return; } // end of SMPInstr::SetAddSubSourceType() // Are all DEFs in the DEF set NUMERIC type? bool SMPInstr::AllDefsNumeric(void) { bool AllNumeric = (this->Defs.GetSize() > 0); // false if no DEFs, true otherwise set<DefOrUse, LessDefUse>::iterator CurrDef; for (CurrDef = this->GetFirstDef(); CurrDef != this->GetLastDef(); ++CurrDef) { // We ignore the stack pointer for pop instructions and consider only // the register DEF of the pop. if (this->MDIsPopInstr() && CurrDef->GetOp().is_reg(R_sp)) continue; AllNumeric = (AllNumeric && IsNumeric(CurrDef->GetType())); } return AllNumeric; } // end of SMPInstr::AllDefsNumeric() // Were the types of any DEFs derived from profiler info? bool SMPInstr::AnyDefsProfiled(void) { bool profd = false; set<DefOrUse, LessDefUse>::iterator CurrDef; for (CurrDef = this->GetFirstDef(); CurrDef != this->GetLastDef(); ++CurrDef) { profd = (profd || IsProfDerived(CurrDef->GetType())); } return profd; } // Do all DEFs have DEF_METADATA_UNUSED status? bool SMPInstr::AllDefMetadataUnused(void) { bool AllUnused = (this->Defs.GetSize() > 0); // false if no DEFs, true otherwise set<DefOrUse, LessDefUse>::iterator CurrDef; for (CurrDef = this->GetFirstDef(); CurrDef != this->GetLastDef(); ++CurrDef) { AllUnused = (AllUnused && (DEF_METADATA_UNUSED == CurrDef->GetMetadataStatus())); } return AllUnused; } // end of SMPInstr::AllDefMetadataUnused() // DEBUG print operands for Inst. void SMPInstr::PrintOperands(void) const { op_t Opnd; for (int i = 0; i < UA_MAXOP; ++i) { Opnd = SMPcmd.Operands[i]; PrintOneOperand(Opnd, this->features, i); } msg(" \n"); return; } // end of SMPInstr::PrintOperands() // Complete DEBUG printing. void SMPInstr::Dump(void) { msg("%x %d SMPitype: %d %s\n", this->address, this->SMPcmd.size, (int) this->type, this->GetDisasm()); msg("USEs: "); this->Uses.Dump(); msg("DEFs: "); this->Defs.Dump(); this->RTL.Dump(); #if SMP_VERBOSE_DUMP this->PrintOperands(); #endif msg("\n"); return; } // end of SMPInstr::Dump() // Print out the destination operand list for the instruction, given // the OptCategory for the instruction as a hint. char * SMPInstr::DestString(int OptType) { static char DestList[MAXSTR]; int RegDestCount = 0; DestList[0] = 'Z'; // Make sure there are no leftovers from last call DestList[1] = 'Z'; DestList[2] = '\0'; set<DefOrUse, LessDefUse>::iterator CurrDef; for (CurrDef = this->GetFirstDef(); CurrDef != this->GetLastDef(); ++CurrDef) { op_t DefOpnd = CurrDef->GetOp(); if (DefOpnd.is_reg(X86_FLAGS_REG)) // don't print flags as a destination continue; // We want to ignore the stack pointer DEF for pops and just include // the register DEF for the pop. if (DefOpnd.is_reg(R_sp) && this->MDIsPopInstr()) continue; if (o_reg == DefOpnd.type) { ushort DestReg = DefOpnd.reg; if (0 == RegDestCount) { qstrncpy(DestList, RegNames[DestReg], 1 + strlen(RegNames[DestReg])); } else { qstrncat(DestList, " ", MAXSTR); qstrncat(DestList, RegNames[DestReg], MAXSTR); } ++RegDestCount; } } if (0 >= RegDestCount) { msg("WARNING: No destination registers: %s\n", this->GetDisasm()); } else { qstrncat(DestList, " ZZ ", MAXSTR); } return DestList; } // end of SMPInstr::DestString() // Equality operator for SMPInstr. Key field is address. int SMPInstr::operator==(const SMPInstr &rhs) const { if (this->address != rhs.GetAddr()) return 0; else return 1; } // Inequality operator for SMPInstr. Key field is address. int SMPInstr::operator!=(const SMPInstr &rhs) const { return (this->address != rhs.GetAddr()); } // Less than operator for sorting SMPInstr lists. Key field is address. int SMPInstr::operator<(const SMPInstr &rhs) const { return (this->address < rhs.GetAddr()); } // Less than or equal operator for sorting SMPInstr lists. Key field is address. int SMPInstr::operator<=(const SMPInstr &rhs) const { return (this->address <= rhs.GetAddr()); } #define MD_FIRST_ENTER_INSTR NN_enterw #define MD_LAST_ENTER_INSTR NN_enterq // Is this instruction the one that allocates space on the // stack for the local variables? bool SMPInstr::MDIsFrameAllocInstr(void) { // The frame allocating instruction should look like: // sub esp,48 or add esp,-64 etc. op_t ESPOp = InitOp; ESPOp.type = o_reg; ESPOp.reg = R_sp; if ((SMPcmd.itype == NN_sub) || (SMPcmd.itype == NN_add)) { if (this->GetLastDef() != this->Defs.FindRef(ESPOp)) { // We know that an addition or subtraction is being // performed on the stack pointer. This should not be // possible within the prologue except at the stack // frame allocation instruction, so return true. We // could be more robust in this analysis in the future. **!!** // CAUTION: If a compiler allocates 64 bytes for locals // and 16 bytes for outgoing arguments in a single // instruction: sub esp,80 // you cannot insist on finding sub esp,LocSize // To make this more robust, we are going to insist that // an allocation of stack space is either performed by // adding a negative immediate value, or by subtracting // a positive immediate value. We will throw in, free of // charge, a subtraction of a register, which is how alloca() // usually allocates stack space. // PHASE ORDERING: Should we use the Operands[] instead of the USE list? **!!** set<DefOrUse, LessDefUse>::iterator CurrUse; for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) { if (o_imm == CurrUse->GetOp().type) { signed long TempImm = (signed long) CurrUse->GetOp().value; if (((0 > TempImm) && (this->SMPcmd.itype == NN_add)) || ((0 < TempImm) && (this->SMPcmd.itype == NN_sub))) { return true; } } else if ((o_reg == CurrUse->GetOp().type) && (!CurrUse->GetOp().is_reg(R_sp)) // skip the ESP operand && (this->SMPcmd.itype == NN_sub)) { // sub esp,reg: alloca() ? return true; } } } } else if ((this->SMPcmd.itype >= MD_FIRST_ENTER_INSTR) && (this->SMPcmd.itype <= MD_LAST_ENTER_INSTR)) { return true; } return false; } // end of SMPInstr::MDIsFrameAllocInstr() #define MD_FIRST_LEAVE_INSTR NN_leavew #define MD_LAST_LEAVE_INSTR NN_leaveq // Is this instruction in the epilogue the one that deallocates the local // vars region of the stack frame? bool SMPInstr::MDIsFrameDeallocInstr(bool UseFP, asize_t LocalVarsSize) { // The usual compiler idiom for the prologue on x86 is to // deallocate the local var space with: mov esp,ebp // It could be add esp,constant. We can be tricked by // add esp,constant when the constant is just the stack // adjustment after a call. We will have to insist that // the immediate operand have at least the value of // LocalVarsSize for this second form, and that UseFP be true // for the first form. set<DefOrUse, LessDefUse>::iterator FirstDef = this->GetFirstDef(); set<DefOrUse, LessDefUse>::iterator FirstUse = this->GetFirstUse(); if ((SMPcmd.itype >= MD_FIRST_LEAVE_INSTR) && (SMPcmd.itype <= MD_LAST_LEAVE_INSTR)) return true; else if (this->HasDestMemoryOperand() || this->HasSourceMemoryOperand()) { // Don't get fooled by USE or DEF entries of EBP or ESP that come // from memory operands, e.g. mov eax,[ebp-20] return false; } else if (UseFP && (this->SMPcmd.itype == NN_mov) && (FirstDef->GetOp().is_reg(R_sp)) && (FirstUse->GetOp().is_reg(R_bp))) return true; else if ((this->SMPcmd.itype == NN_add) && (FirstDef->GetOp().is_reg(R_sp))) { set<DefOrUse, LessDefUse>::iterator SecondUse = ++FirstUse; if (SecondUse == this->Uses.GetLastRef()) return false; // no more USEs ... strange for ADD instruction if (SecondUse->GetOp().is_imm((uval_t) LocalVarsSize)) return true; else if (SecondUse->GetOp().type == o_imm) { signed long TempImm = (signed long) this->SMPcmd.Operands[1].value; if (0 > TempImm) // adding a negative to ESP; alloc, not dealloc return false; else { msg("Used imprecise LocalVarsSize to find dealloc instr.\n"); return true; } } else return false; } else return false; } // end of SMPInstr::MDIsFrameDeallocInstr() // Is instruction a no-op? There are 1-byte, 2-byte, etc., versions of no-ops. bool SMPInstr::MDIsNop(void) const { bool IsNop = false; ushort opcode = this->SMPcmd.itype; // NOTE: More examples have arisen, e.g. xchg reg with itself. !!!!!! if (NN_nop == opcode) IsNop = true; else if (NN_mov == opcode) { if ((o_reg == this->SMPcmd.Operands[0].type) && this->SMPcmd.Operands[1].is_reg(this->SMPcmd.Operands[0].reg)) { // We have a register to register move with source == destination. IsNop = true; } } else if (NN_lea == opcode) { if ((o_reg == this->SMPcmd.Operands[0].type) && (o_displ == this->SMPcmd.Operands[1].type) && (0 == this->SMPcmd.Operands[1].addr)) { // We are looking for 6-byte no-ops like lea esi,[esi+0] ushort destreg = this->SMPcmd.Operands[0].reg; if ((this->SMPcmd.Operands[1].hasSIB) && (destreg == (ushort) sib_base(this->SMPcmd.Operands[1])) && (R_sp == sib_index(this->SMPcmd.Operands[1]))) { // R_sp signifies no SIB index register. So, we have // lea reg,[reg+0] with reg being the same in both place, // once as Operands[0] and once as the base reg in Operands[1]. IsNop = true; } else if (destreg == this->SMPcmd.Operands[1].reg) { IsNop = true; } } } return IsNop; } // end of SMPInstr::MDIsNop() // Is non-multiply arithmetic instruction that can possibly overflow? bool SMPInstr::MDIsOverflowingOpcode(void) const { unsigned short opcode = this->SMPcmd.itype; return ((NN_adc == opcode) || (NN_add == opcode) || (NN_inc == opcode) || (NN_neg == opcode) || (NN_xadd == opcode)); } // Is non-multiply arithmetic instruction that can possibly underflow? bool SMPInstr::MDIsUnderflowingOpcode(void) const { unsigned short opcode = this->SMPcmd.itype; return ((NN_dec == opcode) || (NN_sbb == opcode) || (NN_sub == opcode)); } // MACHINE DEPENDENT: Is instruction a return instruction? bool SMPInstr::MDIsReturnInstr(void) const { return ((this->SMPcmd.itype == NN_retn) || (this->SMPcmd.itype == NN_retf)); } // MACHINE DEPENDENT: Is instruction a POP instruction? #define FIRST_POP_INST NN_pop #define LAST_POP_INST NN_popfq bool SMPInstr::MDIsPopInstr(void) const { return ((this->SMPcmd.itype >= FIRST_POP_INST) && (this->SMPcmd.itype <= LAST_POP_INST)); } // MACHINE DEPENDENT: Is instruction a PUSH instruction? #define FIRST_PUSH_INST NN_push #define LAST_PUSH_INST NN_pushfq bool SMPInstr::MDIsPushInstr(void) const { return ((this->SMPcmd.itype >= FIRST_PUSH_INST) && (this->SMPcmd.itype <= LAST_PUSH_INST)); } // MACHINE DEPENDENT: Is instruction an ENTER instruction? bool SMPInstr::MDIsEnterInstr(void) const { return ((this->SMPcmd.itype >= MD_FIRST_ENTER_INSTR) && (this->SMPcmd.itype <= MD_LAST_ENTER_INSTR)); } // MACHINE DEPENDENT: Is instruction a LEAVE instruction? bool SMPInstr::MDIsLeaveInstr(void) const { return ((this->SMPcmd.itype >= MD_FIRST_LEAVE_INSTR) && (this->SMPcmd.itype <= MD_LAST_LEAVE_INSTR)); } #define MD_FIRST_COND_MOVE_INSTR NN_cmova #define MD_LAST_COND_MOVE_INSTR NN_fcmovnu // MACHINE DEPENDENT: Is instruction a conditional move? bool SMPInstr::MDIsConditionalMoveInstr(void) const { return ((this->SMPcmd.itype >= MD_FIRST_COND_MOVE_INSTR) && (this->SMPcmd.itype <= MD_LAST_COND_MOVE_INSTR)); } // MACHINE DEPENDENT: Is instruction a conditional jump based on an unsigned condition? bool SMPInstr::MDIsUnsignedBranch(void) const { unsigned short opcode = this->SMPcmd.itype; return ((NN_ja == opcode) || (NN_jae == opcode) || (NN_jb == opcode) || (NN_jbe == opcode) || (NN_jna == opcode) || (NN_jnae == opcode) || (NN_jnb == opcode) || (NN_jnbe == opcode)); } // MACHINE DEPENDENT: Is instruction a conditional jump based on a signed condition? bool SMPInstr::MDIsSignedBranch(void) const { unsigned short opcode = this->SMPcmd.itype; return ((NN_jg == opcode) || (NN_jge == opcode) || (NN_jl == opcode) || (NN_jle == opcode) || (NN_jng == opcode) || (NN_jnge == opcode) || (NN_jnl == opcode) || (NN_jnle == opcode) || (NN_js == opcode) || (NN_jns == opcode)); } // MACHINE DEPENDENT: Does instruction use a callee-saved register? bool SMPInstr::MDUsesCalleeSavedReg(void) { set<DefOrUse, LessDefUse>::iterator CurrUse; for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) { op_t CurrOp = CurrUse->GetOp(); if (CurrOp.is_reg(R_bp) || CurrOp.is_reg(R_si) || CurrOp.is_reg(R_di) || CurrOp.is_reg(R_bx)) { return true; } } return false; } // end of SMPInstr::MDUsesCalleeSavedReg() // Is the instruction a register to register copy of a stack pointer or frame pointer // into a general purpose register (which mmStrata will now need to track as a stack // relative pointer)? bool SMPInstr::MDIsStackPointerCopy(bool UseFP) { // OptType 3 indicates a move instruction // The lea instruction can perform three operand arithmetic, e.g. // lea ebx,[esp+12] is just ebx:=esp+12, so it is a stack pointer copy. if (((this->OptType == 3) || (NN_lea == this->SMPcmd.itype)) && (this->GetFirstDef()->GetOp().type == o_reg) && (!(this->GetFirstDef()->GetOp().is_reg(R_sp))) && (!(this->HasSourceMemoryOperand()))) { // reg to reg move if (UseFP) { if (this->GetFirstUse()->GetOp().is_reg(R_bp)) // Move of base pointer EBP into a general register return true; else if ((this->GetFirstUse()->GetOp().is_reg(R_sp)) && !(this->GetFirstDef()->GetOp().is_reg(R_bp))) // Move of ESP into something besides a base pointer return true; } else if (this->GetFirstUse()->GetOp().is_reg(R_sp)) { // Move of ESP into a register; no base pointer used in this function return true; } } return false; } // end of SMPInstr::MDIsStackPointerCopy() // If call instruction is to malloc(), set the DEF register EAX type to // HEAPPTR and return true. bool SMPInstr::MDFindMallocCall(op_t TargetOp) { bool changed = false; func_t *TargetFunc = get_func(TargetOp.addr); if (TargetFunc) { char FuncName[MAXSTR]; get_func_name(TargetFunc->startEA, FuncName, sizeof(FuncName) - 1); if (0 == strcmp("malloc", FuncName)) { // NOTE: Some compilers might call it __malloc ; make this more robust !!! #if SMP_VERBOSE_FIND_POINTERS msg("Found call to malloc at %x\n", this->addr); #endif op_t SearchOp = InitOp; SearchOp.type = o_reg; SearchOp.reg = R_ax; set<DefOrUse, LessDefUse>::iterator EAXDEF; EAXDEF = this->SetDefType(SearchOp, HEAPPTR); int SSANum = EAXDEF->GetSSANum(); changed = true; if (this->BasicBlock->IsLocalName(SearchOp)) { (void) this->BasicBlock->PropagateLocalDefType(SearchOp, HEAPPTR, this->GetAddr(), SSANum, false); } else { // global name this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false (void) this->BasicBlock->PropagateGlobalDefType(SearchOp, HEAPPTR, SSANum, false); } } // end if "malloc" } // end if (TargetFunc) return changed; } // end of SMPInstr::MDFindMallocCall() // Is instruction a branch (conditional or unconditional) to a // code target that is not in the current chunk? bool SMPInstr::IsBranchToFarChunk(void) { if (this->FarBranchComputed) { // answer is cached return this->BranchesToFarChunk; } func_t *CurrChunk = get_fchunk(this->address); bool FarBranch = false; if ((JUMP | COND_BRANCH) & this->GetDataFlowType()) { // Instruction is a direct branch, conditional or unconditional if (this->NumUses() > 0) { set<DefOrUse, LessDefUse>::iterator CurrUse; for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) { op_t JumpTarget = CurrUse->GetOp(); if ((o_near == JumpTarget.type) || (o_far == JumpTarget.type)) { // Branches to a code address func_t *TargetChunk = get_fchunk(JumpTarget.addr); // Is target address within the same chunk as the branch? FarBranch = (NULL == TargetChunk) || (CurrChunk->startEA != TargetChunk->startEA); } } } } this->BranchesToFarChunk = FarBranch; this->FarBranchComputed = true; return FarBranch; } // end of SMPInstr::IsBranchToFarChunk() set<DefOrUse, LessDefUse>::iterator SMPInstr::SetUseSSA(op_t CurrOp, int SSASub) { return this->Uses.SetSSANum(CurrOp, SSASub); }; set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefSSA(op_t CurrOp, int SSASub) { return this->Defs.SetSSANum(CurrOp, SSASub); }; set<DefOrUse, LessDefUse>::iterator SMPInstr::SetUseType(op_t CurrOp, SMPOperandType CurrType) { return this->Uses.SetType(CurrOp, CurrType, this); }; set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefType(op_t CurrOp, SMPOperandType CurrType) { return this->Defs.SetType(CurrOp, CurrType, this); }; set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefMetadata(op_t CurrOp, SMPMetadataType Status) { return this->Defs.SetMetadata(CurrOp, Status); }; set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefIndWrite(op_t CurrOp, bool IndWriteFlag) { return this->Defs.SetIndWrite(CurrOp, IndWriteFlag); }; // Analyze the instruction and its operands. void SMPInstr::Analyze(void) { if (this->analyzed) return; bool DebugFlag = false; if (0x8049b00 == this->address) { // Setting up breakpoint line. DebugFlag = true; } // Fill cmd structure with disassembly of instr #if IDA_SDK_VERSION < 600 int InstrLen = ua_ana0(this->address); #else int InstrLen = decode_insn(this->address); #endif if (0 >= InstrLen) { msg("ERROR: decode_insn failed at %x \n", this->address); this->SMPcmd = cmd; this->SMPcmd.size = 0; return; } // NOTE: Might do this on demand to save time. // Get the instr disassembly text. bool IDAsuccess = generate_disasm_line(this->address, this->disasm, sizeof(this->disasm) - 1); if (IDAsuccess) { // Remove interactive color-coding tags. ssize_t StringLen = tag_remove(this->disasm, this->disasm, 0); if (0 > StringLen) msg("ERROR: tag_remove failed at %x \n", this->address); } else { msg("ERROR: generate_disasm_line failed at %x \n", this->address); } // Copy cmd to member variable SMPcmd. this->SMPcmd = cmd; // Get the canonical features into member variables features. this->features = cmd.get_canon_feature(); // Record what type of instruction this is, simplified for the needs // of data flow and type analysis. this->type = DFACategory[cmd.itype]; // Record optimization category. this->OptType = OptCategory[cmd.itype]; this->Interrupt = ((NN_int == cmd.itype) || (NN_into == cmd.itype) || (NN_int3 == cmd.itype)); // See if instruction is an ASM idiom for clearing a register. if (NN_xor == this->SMPcmd.itype) { ushort FirstReg; if (o_reg == this->SMPcmd.Operands[0].type) { FirstReg = this->SMPcmd.Operands[0].reg; if (this->SMPcmd.Operands[1].is_reg(FirstReg)) this->RegClearIdiom = true; } } // See if instruction is simple nop or ASM idiom for nop. this->Nop = this->MDIsNop(); // Build the DEF and USE lists for the instruction. this->BuildSMPDefUseLists(); // Determine whether the instruction is a jump target by looking // at its cross references and seeing if it has "TO" code xrefs. xrefblk_t xrefs; for (bool ok = xrefs.first_to(this->address, XREF_FAR); ok; ok = xrefs.next_to()) { if ((xrefs.from != 0) && (xrefs.iscode)) { this->JumpTarget = true; break; } } // If instruction is a call or indirect call, see if a call target has been recorded // by IDA Pro. if (this->GetDataFlowType() == INDIR_CALL) { for (bool ok = xrefs.first_from(this->address, XREF_ALL); ok; ok = xrefs.next_from()) { if ((xrefs.to != 0) && (xrefs.iscode)) { // Found a code target, with its address in CurrXrefs.to if (xrefs.to == (this->address + this->GetCmd().size)) { // A call instruction will have two targets: the fall through to the // next instruction, and the called function. We want to find // the called function. continue; } // We found a target, not the fall-through. this->CallTarget = xrefs.to; msg("Found indirect call target %x at %x\n", xrefs.to, this->address); break; } } // end for all code xrefs if (BADADDR == this->CallTarget) { msg("WARNING: Did not find indirect call target at %x\n", this->address); } } // end if INDIR_CALL else if (this->GetDataFlowType() == CALL) { set<DefOrUse, LessDefUse>::iterator CurrUse; for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) { optype_t OpType = CurrUse->GetOp().type; if ((OpType == o_near) || (OpType == o_far)) { this->CallTarget = CurrUse->GetOp().addr; } } if (BADADDR == this->CallTarget) { msg("WARNING: Target not found for direct call at %x\n", this->address); } } this->analyzed = true; if (DebugFlag) { msg("Analyzed debug instruction at %x\n", this->address); } return; } // end of SMPInstr::Analyze() // Analyze the floating point NOP marker instruction at the top of the function. void SMPInstr::AnalyzeMarker(void) { if (this->analyzed) return; // Fill member variable SMPcmd structure with disassembly of instr (void) memset(&(this->SMPcmd), 0, sizeof(this->SMPcmd)); this->SMPcmd.itype = NN_fnop; this->SMPcmd.size = 1; this->SMPcmd.ea = this->address; // Get the instr disassembly text. qstrncpy(this->disasm, "\tfnop\t; Top of function SSA marker for SMP", sizeof(this->disasm) - 1); // Record what type of instruction this is, simplified for the needs // of data flow and type analysis. this->type = DFACategory[this->SMPcmd.itype]; // Record optimization category. this->OptType = OptCategory[this->SMPcmd.itype]; this->analyzed = true; return; } // end of SMPInstr::AnalyzeMarker() // Detect oddities of call instructions, such as pseudo-calls that are // actually jumps within a function void SMPInstr::AnalyzeCallInst(ea_t FirstFuncAddr, ea_t LastFuncAddr) { if (BADADDR != this->CallTarget) { this->CallUsedAsJump = ((this->CallTarget > FirstFuncAddr) && (this->CallTarget <= LastFuncAddr)); this->DirectRecursiveCall = (this->CallTarget == FirstFuncAddr); if (this->CallUsedAsJump) this->type = JUMP; } return; } // Find USE-not-DEF operand that is not the flags register. op_t SMPInstr::GetSourceOnlyOperand(void) { size_t OpNum; for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { if (this->features & DefMacros[OpNum]) { // DEF ; } else if (this->features & UseMacros[OpNum]) { // USE op_t CurrOp = this->SMPcmd.Operands[OpNum]; if (!(CurrOp.is_reg(X86_FLAGS_REG))) { return CurrOp; } } } // It is expected that increment, decrement, and floating point stores // will not have a USE-only operand. Increment and decrement have an // operand that is both USEd and DEFed, while the floating point stack // registers are implicit in most floating point opcodes. Also, exchange // and exchange-and-add instructions have multiple DEF-and-USE operands. int TypeGroup = SMPTypeCategory[this->SMPcmd.itype]; if ((TypeGroup != 2) && (TypeGroup != 4) && (TypeGroup != 9) && (TypeGroup != 12) && (TypeGroup != 13)) { msg("ERROR: Could not find source only operand at %x in %s\n", this->address, this->GetDisasm()); } return InitOp; } // end of SMPInstr::GetSourceOnlyOperand() // Fill the Defs and Uses private data members. void SMPInstr::BuildSMPDefUseLists(void) { size_t OpNum; bool DebugFlag = (0x8049b00 == this->GetAddr()); this->Defs.clear(); this->Uses.clear(); // Start with the Defs. for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { if (this->features & DefMacros[OpNum]) { // DEF op_t TempOp = this->SMPcmd.Operands[OpNum]; if (MDKnownOperandType(TempOp)) { if (DebugFlag) { msg("DEBUG: Setting DEF for: "); PrintOperand(TempOp); msg("\n"); } if (o_reg == TempOp.type) { // We want to map AH, AL, and AX to EAX, etc. throughout our data flow // analysis and type inference systems. TempOp.reg = MDCanonicalizeSubReg(TempOp.reg); } this->Defs.SetRef(TempOp); } } } // end for (OpNum = 0; ...) if (this->RegClearIdiom) { // Something like xor eax,eax clears eax but does not really // use eax. It is the same as mov eax,0 and we don't want to // extend the prior def-use chain for eax to this instruction // by treating the instruction as xor eax,eax. Instead, we // build the DEF and USE lists and RTL as if it were mov eax,0. op_t ImmOp = InitOp; ImmOp.type = o_imm; this->Uses.SetRef(ImmOp, NUMERIC); return; } // Now, do the Uses. Uses have special case operations, because // any memory operand could have register uses in the addressing // expression, and we must create Uses for those registers. For // example: mov eax,[ebx + esi*2 + 044Ch] // This is a two-operand instruction with one def: eax. But // there are three uses: [ebx + esi*2 + 044Ch], ebx, and esi. // The first use is an op_t of type o_phrase (memory phrase), // which can be copied from cmd.Operands[1]. Likewise, we just // copy cmd.Operands[0] into the defs list. However, we must create // op_t types for register ebx and register esi and append them // to the Uses list. This is handled by the machine dependent // method MDFixupDefUseLists(). for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { if (this->features & UseMacros[OpNum]) { // USE op_t TempOp = this->SMPcmd.Operands[OpNum]; if (MDKnownOperandType(TempOp)) { if (DebugFlag) { msg("DEBUG: Setting USE for: "); PrintOperand(TempOp); msg("\n"); } if (o_reg == TempOp.type) { // We want to map AH, AL, and AX to EAX, etc. throughout our data flow // analysis and type inference systems. TempOp.reg = MDCanonicalizeSubReg(TempOp.reg); } this->Uses.SetRef(TempOp); } } } // end for (OpNum = 0; ...) return; } // end of SMPInstr::BuildSMPDefUseLists() // If DefReg is not already in the DEF list, add a DEF for it. void SMPInstr::MDAddRegDef(ushort DefReg, bool Shown, SMPOperandType Type) { op_t TempDef = InitOp; TempDef.type = o_reg; TempDef.reg = DefReg; if (Shown) TempDef.set_showed(); else TempDef.clr_showed(); this->Defs.SetRef(TempDef, Type); return; } // end of SMPInstr::MDAddRegDef() // If UseReg is not already in the USE list, add a USE for it. void SMPInstr::MDAddRegUse(ushort UseReg, bool Shown, SMPOperandType Type) { op_t TempUse = InitOp; TempUse.type = o_reg; TempUse.reg = UseReg; if (Shown) TempUse.set_showed(); else TempUse.clr_showed(); this->Uses.SetRef(TempUse, Type); return; } // end of SMPInstr::MDAddRegUse() // Perform machine dependent ad hoc fixes to the def and use lists. // For example, some multiply and divide instructions in x86 implicitly // use and/or define register EDX. For memory phrase examples, see comment // in BuildSMPDefUseLists(). void SMPInstr::MDFixupDefUseLists(void) { // First, handle the uses hidden in memory addressing modes. Note that we do not // care whether we are dealing with a memory destination operand or source // operand, because register USEs, not DEFs, happen within the addressing expressions. size_t OpNum; SMPOperandType RefType; int BaseReg; int IndexReg; ushort ScaleFactor; ea_t displacement; bool UseFP = true; bool HasIndexReg = false; bool SingleAddressReg = false; bool leaInst = (NN_lea == this->SMPcmd.itype); bool DebugFlag = (this->GetAddr() == 0x8086177); if (DebugFlag) { msg("DEBUG: Fixing up DEF-USE lists for debug location\n"); this->Dump(); } #if SMP_BASEREG_POINTER_TYPE // Some instructions are analyzed outside of any function or block when fixing up // the IDB, so we have to assume the block and func pointers might be NULL. if ((NULL != this->BasicBlock) && (NULL != this->BasicBlock->GetFunc())) UseFP = this->BasicBlock->GetFunc()->UsesFramePointer(); #endif if (DebugFlag) { msg("DEBUG: UseFP = %d\n", UseFP); } for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { op_t Opnd = SMPcmd.Operands[OpNum]; if ((Opnd.type == o_phrase) || (Opnd.type == o_displ) || (Opnd.type == o_mem)) { MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, displacement); SingleAddressReg = ((0 == displacement) && ((R_none == BaseReg) || (R_none == IndexReg))); if (R_none != IndexReg) { op_t IndexOpnd = Opnd; // Init to current operand field values IndexOpnd.type = o_reg; // Change type and reg fields IndexOpnd.reg = (ushort) IndexReg; IndexOpnd.hasSIB = 0; IndexOpnd.set_showed(); // We want to map AH, AL, and AX to EAX, etc. throughout our data flow // analysis and type inference systems. IndexOpnd.reg = MDCanonicalizeSubReg(IndexOpnd.reg); if (0 == ScaleFactor) this->Uses.SetRef(IndexOpnd); else { // scaling == shift ==> NUMERIC HasIndexReg = true; this->Uses.SetRef(IndexOpnd, NUMERIC); } } if (R_none != BaseReg) { op_t BaseOpnd = Opnd; // Init to current operand field values BaseOpnd.type = o_reg; // Change type and reg fields BaseOpnd.reg = (ushort) BaseReg; BaseOpnd.hasSIB = 0; BaseOpnd.set_showed(); // We want to map AH, AL, and AX to EAX, etc. throughout our data flow // analysis and type inference systems. BaseOpnd.reg = MDCanonicalizeSubReg(BaseOpnd.reg); RefType = UNINIT; #if SMP_BASEREG_POINTER_TYPE // R_sp and R_bp will get type STACKPTR in SMPInstr::SetImmedTypes(). // Other registers used as base registers should get their USEs as // base registers typed as POINTER, which might get refined later // to STACKPTR, GLOBALPTR, HEAPPTR, etc. // NOTE: the NN_lea opcode is often used without a true base register. // E.g. lea eax,[eax+eax+5] is an x86 idiom for eax:=eax*2+5, which // could not be done in one instruction without using the addressing // modes of the machine to do the arithmetic. We don't want to set the // USE of EAX to POINTER in this case, so we will conservatively skip // all lea instructions here. // We cannot be sure that a register is truly a base register unless // there is also an index register. E.g. with reg+displacement, we // could have memaddr+indexreg or basereg+offset, depending on what // the displacement is. The exception is if there is no offset and only // one addressing register, e.g. mov eax,[ebx]. if (BaseOpnd.is_reg(R_sp) || (UseFP && BaseOpnd.is_reg(R_bp)) || leaInst || (!HasIndexReg && !SingleAddressReg)) { ; } else { RefType = POINTER; } #endif this->Uses.SetRef(BaseOpnd, RefType); } // end if R_none != BaseReg } // end if (o_phrase or o_displ operand) } // end for (all operands) // The lea (load effective address) instruction looks as if it has // a memory USE: lea ebx,[edx+esi] // However, this instruction is really just: ebx := edx+esi // Now that the above code has inserted the "addressing" registers // into the USE list, we should remove the "memory USE". if (leaInst) { set<DefOrUse, LessDefUse>::iterator CurrUse; for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) { op_t UseOp = CurrUse->GetOp(); if ((o_mem <= UseOp.type) && (o_displ >= UseOp.type)) { this->EraseUse(CurrUse); break; } } } // Next, handle repeat prefices in the instructions. The Intel REPE/REPZ prefix // is just the text printed for SCAS/CMPS instructions that have a REP prefix. // Only two distinct prefix codes are actually defined: REP and REPNE/REPNZ, and // REPNE/REPNZ only applies to SCAS and CMPS instructions. bool HasRepPrefix = (0 != (this->SMPcmd.auxpref & aux_rep)); bool HasRepnePrefix = (0 != (this->SMPcmd.auxpref & aux_repne)); if (HasRepPrefix && HasRepnePrefix) msg("REP and REPNE both present at %x %s\n", this->GetAddr(), this->GetDisasm()); if (HasRepPrefix || HasRepnePrefix) { // All repeating instructions use ECX as the countdown register. op_t BaseOpnd = InitOp; BaseOpnd.type = o_reg; // Change type and reg fields BaseOpnd.reg = R_cx; BaseOpnd.clr_showed(); this->Defs.SetRef(BaseOpnd, NUMERIC); this->Uses.SetRef(BaseOpnd, NUMERIC); } if ((this->SMPcmd.itype == NN_cmps) || (this->SMPcmd.itype == NN_scas) || (this->SMPcmd.itype == NN_movs) || (this->SMPcmd.itype == NN_stos)) { // ESI and EDI are USEd and DEFed to point to source and dest strings for CMPS/MOVS. // Only EDI is involved with SCAS/STOS. op_t BaseOpnd = InitOp; BaseOpnd.type = o_reg; // Change type and reg fields BaseOpnd.clr_showed(); if ((this->SMPcmd.itype == NN_cmps) || (this->SMPcmd.itype == NN_movs)) { BaseOpnd.reg = R_si; this->Defs.SetRef(BaseOpnd, POINTER); this->Uses.SetRef(BaseOpnd, POINTER); } BaseOpnd.reg = R_di; this->Defs.SetRef(BaseOpnd, POINTER); this->Uses.SetRef(BaseOpnd, POINTER); } // Now, handle special instruction categories that have implicit operands. if (NN_cmpxchg == this->SMPcmd.itype) { // x86 Compare and Exchange conditionally sets EAX. We must keep data flow analysis // sound by declaring that EAX is always a DEF. this->MDAddRegDef(R_ax, false); } // end if NN_cmpxchg else if (this->MDIsPopInstr() || this->MDIsPushInstr() || this->MDIsReturnInstr()) { // IDA does not include the stack pointer in the DEFs or USEs. this->MDAddRegDef(R_sp, false); this->MDAddRegUse(R_sp, false); if (!this->MDIsReturnInstr()) { // We always reference [esp+0] or [esp-4], so add it to the DEF or USE list. op_t StackOp = InitOp; StackOp.type = o_displ; StackOp.reg = R_sp; if (this->MDIsPopInstr()) { StackOp.addr = 0; // [ESP+0] this->Uses.SetRef(StackOp); // USE } else { StackOp.addr = (ea_t) -4; // [ESP-4] this->Defs.SetRef(StackOp); // DEF } } } #if SMP_CALL_TRASHES_REGS else if ((this->type == CALL) || (this->type == INDIR_CALL)) { // We want to add the caller-saved registers to the USEs and DEFs lists this->MDAddRegDef(R_ax, false); this->MDAddRegDef(R_cx, false); this->MDAddRegDef(R_dx, false); this->MDAddRegUse(R_ax, false); this->MDAddRegUse(R_cx, false); this->MDAddRegUse(R_dx, false); #if 1 if (this->MDIsInterruptCall()) { #endif this->MDAddRegDef(R_bx, false); this->MDAddRegUse(R_bx, false); this->MDAddRegDef(R_si, false); this->MDAddRegUse(R_si, false); #if 1 } #endif } #endif else if (this->MDIsEnterInstr() || this->MDIsLeaveInstr()) { // Entire function prologue or epilogue microcoded. this->MDAddRegDef(R_sp, false); this->MDAddRegUse(R_sp, false); this->MDAddRegDef(R_bp, false); this->MDAddRegUse(R_bp, false); } else if ((this->SMPcmd.itype == NN_maskmovq) || (this->SMPcmd.itype == NN_maskmovdqu)) { this->MDAddRegUse(R_di, false, POINTER); } else if (8 == this->GetOptType()) { // This category implicitly writes to EDX:EAX. this->MDAddRegDef(R_dx, false); this->MDAddRegDef(R_ax, false); } // end else if (8 == GetOptType) else if (7 == this->GetOptType()) { // Category 7 instructions sometimes write implicitly to EDX:EAX or DX:AX. // DX is the same as EDX to IDA Pro (and SMP); ditto for EAX and AX. // DIV, IDIV, and MUL all have hidden EAX or AX operands (hidden in the IDA Pro // sense, because they are not displayed in the disassembly text). For example: // mul ebx means EDX:EAX <-- EAX*EBX, and mul bx means DX:AX <-- AX*BX. If the // source operand is only 8 bits wide, there is room to hold the result in AX // without using DX: mul bl means AX <-- AL*BL. // IMUL has forms with a hidden EAX or AX operand and forms with no implicit // operands: imul ebx means EDX:EAX <-- EAX*EBX, but imul ebx,edx means that // EBX*EDX gets truncated and the result placed in EBX (no hidden operands). for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { op_t TempUse = this->SMPcmd.Operands[OpNum]; if (!TempUse.showed()) { // hidden operand if (TempUse.is_reg(R_ax)) { // not R_al, so it is not 8 bits if ((NN_div == this->SMPcmd.itype) || (NN_idiv == this->SMPcmd.itype)) { this->MDAddRegUse(R_dx, false); } this->MDAddRegDef(R_ax, false); this->MDAddRegDef(R_dx, false); } } } } // end else if (7 == OptType) #if 0 // The floating point instructions in type categories 14 and 15 often USE and DEF // the floating point register stack, e.g. pushing a value onto that stack is a // massive copy downward of stack locations. We don't really care about the USE of // the stack if the value being pushed came from elsewhere than the stack. For example, // an "fld" opcode pushes its source onto the stack. We build RTLs with a simple // move structure, but the RTL building can be fooled by seeing two "source" operands // in the USE list. if ((14 == SMPTypeCategory[this->SMPcmd.itype]) || (15 == SMPTypeCategory[this->SMPcmd.itype])) { } #endif #if 0 // Not true for LOOP instructions that use only the ECX counter register. if (this->type == COND_BRANCH) { assert(SMPUsesFlags[this->SMPcmd.itype]); } #endif // The return value register EAX is not quite like a caller-save or callee-save // register (technically, it is caller-save). Within a callee, it might appear // that EAX has become dead by the time a return instruction is reached, but // the USE that would make it not dead is in the caller. To prevent type inference // from mistakenly thinking that all USEs of EAX have been seen in the callee, // we add EAX to the USE list for all return instructions, as well as for all // tail calls, which are essentially returns in terms of data flow analysis. // This USE of EAX will always be of type UNINIT unless its DEF has a known type // that propagates to it. Thus, it will prevent an invalid back inference of the // DEF type from "all" USE types that are visible in the callee; even if they // were all NUMERIC, this return USE will be UNINIT and inhibit the invalid // type inference. EAX could be loaded with a pointer from memory, for example, // and USEd only in a comparison instruction, making it falsely appear to be // a NUMERIC, without this extra USE at the return instruction. // Because some of the library functions pass values around in EBX, EDI, etc., // we will add these general purpose registers to the USE list for returns // in order to prevent erroneous analyses of dead registers or unused // metadata. if ((this->type == RETURN) || this->IsTailCall()) { this->MDAddRegUse(R_ax, false); this->MDAddRegUse(R_bx, false); this->MDAddRegUse(R_cx, false); this->MDAddRegUse(R_dx, false); if (!UseFP) this->MDAddRegUse(R_bp, false); this->MDAddRegUse(R_si, false); this->MDAddRegUse(R_di, false); } // Next, add the flags register to the DEFs and USEs for those instructions that // are marked as defining or using flags. if (!this->DefsFlags && SMPDefsFlags[this->SMPcmd.itype]) { this->MDAddRegDef(X86_FLAGS_REG, false); this->DefsFlags = true; } if (!this->UsesFlags && SMPUsesFlags[this->SMPcmd.itype]) { this->MDAddRegUse(X86_FLAGS_REG, false); this->UsesFlags = true; } #if 1 if (this->IsNop()) { // Clear the DEFs and USEs for no-ops. // These include machine idioms for no-ops, e.g. mov esi,esi // or xchg ax,ax or lea esi,[esi]. this->Defs.clear(); this->Uses.clear(); this->MoveSource = InitOp; this->OptType = 1; } #endif if (DebugFlag) { msg("DEBUG after MDFixupDefUseLists:\n"); this->Dump(); } return; } // end of SMPInstr::MDFixupDefUseLists() // If we can definitely identify which part of the addressing expression // used in MemOp is the POINTER type, and it is not a STACKPTR or GLOBALPTR // immediate, set the USE type for that register to POINTER and return true. // If we can find definite NUMERIC addressing registers that are not already // typed as NUMERIC, set their USE types to NUMERIC and return true. bool SMPInstr::MDFindPointerUse(op_t MemOp, bool UseFP) { bool changed = false; int BaseReg; int IndexReg; op_t BaseOp = InitOp; op_t IndexOp = InitOp; SMPOperandType BaseType = UNKNOWN; SMPOperandType IndexType = UNKNOWN; ushort ScaleFactor; ea_t offset; set<DefOrUse, LessDefUse>::iterator BaseIter; set<DefOrUse, LessDefUse>::iterator IndexIter; if (NN_lea == this->SMPcmd.itype) return false; // lea instruction really has no memory operands if (NN_fnop == this->SMPcmd.itype) return false; // SSA marker instruction MDExtractAddressFields(MemOp, BaseReg, IndexReg, ScaleFactor, offset); if (R_none != IndexReg) { IndexOp.type = o_reg; IndexOp.reg = MDCanonicalizeSubReg((ushort) IndexReg); IndexIter = this->FindUse(IndexOp); assert(IndexIter != this->GetLastUse()); IndexType = IndexIter->GetType(); } if (R_none != BaseReg) { BaseOp.type = o_reg; BaseOp.reg = MDCanonicalizeSubReg((ushort) BaseReg); BaseIter = this->FindUse(BaseOp); assert(BaseIter != this->GetLastUse()); BaseType = BaseIter->GetType(); } if ((R_sp == BaseReg) || (UseFP && (R_bp == BaseReg))) { if ((R_none != IndexReg) && (!IsNumeric(IndexType))) { // We have an indexed access into the stack frame. // Set IndexReg USE type to NUMERIC. changed = true; IndexIter = this->SetUseType(IndexOp, NUMERIC); assert(IndexIter != this->GetLastUse()); } return changed; // stack accesses will get STACKPTR type in SetImmedTypes() } if ((R_sp == IndexReg) || (UseFP && (R_bp == IndexReg))) { if ((R_none != BaseReg) && (!IsNumeric(BaseType))) { // We have an indexed access into the stack frame. // Set BaseReg USE type to NUMERIC. // Note that BaseReg is really an IndexReg and vice versa. changed = true; BaseIter = this->SetUseType(BaseOp, NUMERIC); assert(BaseIter != this->GetLastUse()); msg("WARNING: BaseReg is index, IndexReg is base: %s\n", this->GetDisasm()); } return changed; // stack accesses will get STACKPTR type in SetImmedTypes() } if (IsImmedGlobalAddress(offset)) { if ((R_none != IndexReg) && (!IsNumeric(IndexType))) { // We have an indexed access into a global. // Set IndexReg USE type to NUMERIC. changed = true; IndexIter = this->SetUseType(IndexOp, NUMERIC); assert(IndexIter != this->GetLastUse()); } if ((R_none != BaseReg) && (!IsNumeric(BaseType))) { // We have an indexed access into a global. // Set BaseReg USE type to NUMERIC. // Note that BaseReg is really an index register. changed = true; BaseIter = this->SetUseType(BaseOp, NUMERIC); assert(BaseIter != this->GetLastUse()); #if SMP_VERBOSE_FIND_POINTERS msg("WARNING: BaseReg used as index: %s\n", this->GetDisasm()); #endif } return changed; // global immediate is handled in SetImmedTypes() } // At this point, we must have a base address in a register, not used // to directly address the stack or a global. if ((0 < ScaleFactor) || (R_none == IndexReg)) { // IndexReg is scaled, meaning it is NUMERIC, so BaseReg must // be a POINTER; or IndexReg is not present, so BaseReg is the // only possible holder of an address. if (R_none != BaseReg) { if (UNINIT == BaseIter->GetType()) { changed = true; BaseIter = this->SetUseType(BaseOp, POINTER); assert(BaseIter != this->GetLastUse()); } } } else if (R_none == BaseReg) { // We have an unscaled IndexReg and no BaseReg and offset was // not a global offset, so IndexReg must be a POINTER. if (R_none != IndexReg) { if (UNINIT == IndexType) { changed = true; IndexIter = this->SetUseType(IndexOp, POINTER); assert(IndexIter != this->GetLastUse()); } } } else { // We have BaseReg and an unscaled IndexReg. // The only hope for typing something like [ebx+edx] is for // one register to already be typed NUMERIC, in which case // the other one must be a POINTER, or if one register is // already POINTER, then the other one must be NUMERIC. if (IsNumeric(BaseType)) { if (UNINIT == IndexType) { // Set to POINTER or PROF_POINTER changed = true; IndexIter = this->SetUseType(IndexOp, POINTER); assert(IndexIter != this->GetLastUse()); } else if (IsNumeric(IndexType)) { msg("ERROR: BaseReg and IndexReg both NUMERIC at %x: %s\n", this->address, this->GetDisasm()); } } else { // BaseReg was not NUMERIC if (UNINIT == BaseType) { // BaseReg is UNINIT if (IsNumeric(IndexType)) { changed = true; BaseIter = this->SetUseType(BaseOp, POINTER); assert(BaseIter != this->GetLastUse()); } else if (IsDataPtr(IndexType)) { // IndexReg is POINTER, so make BaseReg NUMERIC. changed = true; BaseIter = this->SetUseType(BaseOp, NUMERIC); assert(BaseIter != this->GetLastUse()); } } else if (IsDataPtr(BaseType)) { // BaseReg was a pointer type. IndexReg must be NUMERIC. if (UNINIT == IndexType) { changed = true; IndexIter = this->SetUseType(IndexOp, NUMERIC); assert(IndexIter != this->GetLastUse()); } else if (IsDataPtr(IndexType)) { msg("ERROR: BaseReg and IndexReg both POINTER at %x: %s\n", this->address, this->GetDisasm()); } } } } return changed; } // end of SMPInstr::MDFindPointerUse() // Are all DEFs typed to something besides UNINIT? bool SMPInstr::AllDEFsTyped(void) { bool FoundUNINIT = false; set<DefOrUse, LessDefUse>::iterator DefIter; for (DefIter = this->GetFirstDef(); DefIter != this->GetLastDef(); ++DefIter) { if (IsEqType(UNINIT, DefIter->GetType())) { FoundUNINIT = true; break; } } return (!FoundUNINIT); } // end of SMPInstr::AllDEFsTyped() // Are all USEs typed to something besides UNINIT? bool SMPInstr::AllUSEsTyped(void) { bool FoundUNINIT = false; set<DefOrUse, LessDefUse>::iterator UseIter; for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) { if (IsEqType(UNINIT, UseIter->GetType())) { FoundUNINIT = true; break; } } return (!FoundUNINIT); } // end of SMPInstr::AllUSEsTyped() // Set the type of all immediate operands found in the USE set. // Set all flags and floating point register USEs and DEFs to NUMERIC also. void SMPInstr::SetImmedTypes(bool UseFP) { set<DefOrUse, LessDefUse>::iterator CurrUse; set<DefOrUse, LessDefUse>::iterator CurrDef; op_t UseOp; op_t DefOp; uval_t ImmVal; bool DebugFlag = false; #if SMP_VERBOSE_DEBUG_BUILD_RTL DebugFlag = DebugFlag || (this->address == 0x805cd52) || (this->address == 0x805cd56); DebugFlag |= (0 == strncmp("__libc_csu_fini", this->BasicBlock->GetFunc()->GetFuncName(), 15)); #endif CurrUse = this->GetFirstUse(); while (CurrUse != this->GetLastUse()) { UseOp = CurrUse->GetOp(); if (DebugFlag) { msg("SetImmedTypes USE: "); PrintOperand(UseOp); msg("\n"); } if (o_imm == UseOp.type) { ImmVal = UseOp.value; if (IsImmedGlobalAddress((ea_t) ImmVal)) { if (DebugFlag) msg("Setting to GLOBALPTR\n"); CurrUse = this->SetUseType(UseOp, GLOBALPTR); } else if (this->Interrupt || IsImmedCodeAddress((ea_t) ImmVal)) { if (DebugFlag) msg("Setting to CODEPTR\n"); CurrUse = this->SetUseType(UseOp, CODEPTR); } else { // NUMERIC if (DebugFlag) msg("Setting to NUMERIC\n"); CurrUse = this->SetUseType(UseOp, NUMERIC); } } else if (o_reg == UseOp.type) { if (UseOp.is_reg(X86_FLAGS_REG)) { if (DebugFlag) msg("Setting flags reg to NUMERIC\n"); CurrUse = this->SetUseType(UseOp, NUMERIC); } #if 1 else if (UseOp.is_reg(R_sp) || (UseFP && UseOp.is_reg(R_bp))) { if (DebugFlag) msg("Setting reg to STACKPTR\n"); CurrUse = this->SetUseType(UseOp, STACKPTR); } #endif } #if 0 // could these registers have pointers in them? else if ((o_trreg == UseOp.type) ||(o_dbreg == UseOp.type) || (o_crreg == UseOp.type)) { if (DebugFlag) msg("Setting special reg to NUMERIC\n"); CurrUse = this->SetUseType(UseOp, NUMERIC); } #endif else if ((o_fpreg == UseOp.type) || (o_mmxreg == UseOp.type) || (o_xmmreg == UseOp.type)) { if (DebugFlag) msg("Setting floating point reg to NUMERIC\n"); CurrUse = this->SetUseType(UseOp, NUMERIC); } else if ((o_mem == UseOp.type) || (o_phrase == UseOp.type) || (o_displ == UseOp.type)) { // For memory operands, we need to identify the POINTER value that // is used in the addressing mode, if possible. (void) this->MDFindPointerUse(UseOp, UseFP); } ++CurrUse; } // end while all USEs via CurrUse CurrDef = this->GetFirstDef(); while (CurrDef != this->GetLastDef()) { DefOp = CurrDef->GetOp(); if (DebugFlag) { msg("SetImmedTypes DEF: "); PrintOperand(DefOp); msg("\n"); } if (DebugFlag) msg("FuncName: %s\n", this->BasicBlock->GetFunc()->GetFuncName()); if (o_reg == DefOp.type) { if (DefOp.is_reg(X86_FLAGS_REG)) { if (DebugFlag) msg("Setting flags reg DEF to NUMERIC\n"); CurrDef = this->SetDefType(DefOp, NUMERIC); // No need to propagate this DEF type, as all flags will become NUMERIC. } #if 1 else if (DefOp.is_reg(R_sp) || (DefOp.is_reg(R_bp) && UseFP)) { if (DebugFlag) msg("Setting reg DEF to STACKPTR\n"); CurrDef = this->SetDefType(DefOp, STACKPTR); assert(CurrDef != this->Defs.GetLastRef()); // No need to propagate; all stack and frame pointers will become STACKPTR. } #endif } else if ((o_fpreg == DefOp.type) || (o_mmxreg == DefOp.type) || (o_xmmreg == DefOp.type)) { if (DebugFlag) msg("Setting floating point reg DEF to NUMERIC\n"); CurrDef = this->SetDefType(DefOp, NUMERIC); // No need to propagate; all FP reg uses will become NUMERIC anyway. } #if 0 // could these registers have pointers in them? else if ((o_trreg == DefOp.type) || (o_dbreg == DefOp.type) || (o_crreg == DefOp.type)) { if (DebugFlag) msg("Setting special reg DEF to NUMERIC\n"); CurrDef = this->SetDefType(DefOp, NUMERIC); } #endif else if ((o_mem == DefOp.type) || (o_phrase == DefOp.type) || (o_displ == DefOp.type)) { // For memory operands, we need to identify the POINTER value that // is used in the addressing mode, if possible. (void) this->MDFindPointerUse(DefOp, UseFP); } ++CurrDef; } // end while all DEFs via CurrDef return; } // end of SMPInstr::SetImmedTypes() // Is the instruction a load from the stack? void SMPInstr::MDFindLoadFromStack(bool UseFP) { set<DefOrUse, LessDefUse>::iterator UseIter; op_t UseOp; if ((3 == this->OptType) && (this->HasSourceMemoryOperand())) { // Loads and stores are OptCategory 3. We want only loads from the stack. for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) { UseOp = UseIter->GetOp(); if (MDIsStackAccessOpnd(UseOp, UseFP)) { this->LoadFromStack = true; break; } } } return; } // end of SMPInstr::MDFindLoadFromStack() // Determine if instr is inherently signed load instruction. // True if sign or zero-extended; pass out mask bits if true. bool SMPInstr::MDIsSignedLoad(unsigned short &SignMask) { unsigned short opcode = this->SMPcmd.itype; if (NN_movzx == opcode) { SignMask = FG_MASK_UNSIGNED; } else if (NN_movsx == opcode) { SignMask = FG_MASK_SIGNED; } else { return false; } return true; } // Infer sign, bit width, other type info for simple cases where all the info needed is // within the instruction or can be read from the FineGrainedStackTable in the SMPFunction. // NOTE: Must be called after SSA analysis is complete. void SMPInstr::MDSetWidthSignInfo(bool UseFP) { set<DefOrUse, LessDefUse>::iterator UseIter; set<DefOrUse, LessDefUse>::iterator DefIter; op_t UseOp, DefOp; struct FineGrainedInfo FGEntry; bool ValueWillChange; unsigned short SignMask, TempSign, WidthMask; int DefHashValue, UseHashValue; bool case1, case2, case3, case4; case1 = this->IsLoadFromStack(); case2 = this->MDIsSignedLoad(SignMask); case3 = (7 == this->OptType); case4 = (CALL == this->GetDataFlowType()); // Case 1: Load from stack location. if (case1) { bool success = false; for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) { UseOp = UseIter->GetOp(); if (MDIsStackAccessOpnd(UseOp, UseFP)) { // Found the stack location being loaded into a register. Now we need // to get the sign and width info from the fine grained stack frame // analysis. success = this->GetBlock()->GetFunc()->MDGetFGStackLocInfo(this->address, UseOp, FGEntry); assert(success); // Now we have signedness info in FGEntry. We need to OR it into the register target of the load. if (FGEntry.SignMiscInfo == 0) break; // nothing to OR in; save time for (DefIter = this->GetFirstDef(); DefIter != this->GetLastDef(); ++DefIter) { DefOp = DefIter->GetOp(); if (o_reg == DefOp.type) { DefOp.reg = MDCanonicalizeSubReg(DefOp.reg); TempSign = FGEntry.SignMiscInfo & FG_MASK_SIGNEDNESS_BITS; // Get both sign bit flags DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum()); if (this->BasicBlock->IsLocalName(DefOp)) { this->BasicBlock->UpdateDefSignMiscInfo(DefHashValue, TempSign); } else { this->BasicBlock->GetFunc()->UpdateDefSignMiscInfo(DefHashValue, TempSign); } break; // Should be only one register target for stack load, and no flags are set. } } break; // Only concerned with the stack operand } } assert(success); } // end if this->IsLoadFromStack() // Case 2: Loads that are sign-extended or zero-extended imply signed and unsigned, respectively. // NOTE: If from the stack, they were handled in Case 1, and the signedness of the stack location // was recorded a long time ago in SMPFunction::FindOutgoingArgsSize(); else if (case2) { DefIter = this->GetFirstDef(); while (DefIter != this->GetLastDef()) { // All non-memory DEFs besides the flags register should get the new SignMask ORed in. // On x86, there should only be one DEF for this move, and no flags, but we will generalize // in case other architectures are odd. DefOp = DefIter->GetOp(); if (!(IsMemOperand(DefOp) || MDIsFlagsReg(DefOp))) { DefOp.reg = MDCanonicalizeSubReg(DefOp.reg); DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum()); if (this->BasicBlock->IsLocalName(DefOp)) { this->BasicBlock->UpdateDefSignMiscInfo(DefHashValue, SignMask); } else { this->BasicBlock->GetFunc()->UpdateDefSignMiscInfo(DefHashValue, SignMask); } } ++DefIter; } // If the signed load is from memory, the only USEs are the memory // operand and addressing registers. We do not want to claim that // EBX is signed in the instruction movsx eax,[ebx]. Only the DEF // register EAX and the memory location [EBX] are signed, and we // have no idea where [EBX] is, so we punt on all USEs if we have // a memory source operand. if (!(this->HasSourceMemoryOperand())) { UseIter = this->GetFirstUse(); while (UseIter != this->GetLastUse()) { // All non-memory USEs besides the flags register should get the new SignMask ORed in. UseOp = UseIter->GetOp(); if (!(IsMemOperand(UseOp) || MDIsFlagsReg(UseOp))) { UseOp.reg = MDCanonicalizeSubReg(UseOp.reg); UseHashValue = HashGlobalNameAndSSA(UseOp, UseIter->GetSSANum()); if (this->BasicBlock->IsLocalName(UseOp)) { this->BasicBlock->UpdateUseSignMiscInfo(UseHashValue, SignMask); } else { this->BasicBlock->GetFunc()->UpdateUseSignMiscInfo(UseHashValue, SignMask); } } ++UseIter; } } } // end of case 2 // Case 3: multiplies and divides are signed or unsigned. else if (case3) { // Multiplies and divides are type 7. SignMask = FG_MASK_SIGNED; if (this->MDIsUnsignedArithmetic()) { SignMask = FG_MASK_UNSIGNED; } DefIter = this->GetFirstDef(); while (DefIter != this->GetLastDef()) { // All DEFs besides the flags register should get the new SignMask ORed in. DefOp = DefIter->GetOp(); if (!(DefOp.is_reg(X86_FLAGS_REG))) { DefOp.reg = MDCanonicalizeSubReg(DefOp.reg); DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum()); if (this->BasicBlock->IsLocalName(DefOp)) { this->BasicBlock->UpdateDefSignMiscInfo(DefHashValue, SignMask); } else { this->BasicBlock->GetFunc()->UpdateDefSignMiscInfo(DefHashValue, SignMask); } } ++DefIter; } UseIter = this->GetFirstUse(); while (UseIter != this->GetLastUse()) { // All USEs besides the flags register should get the new SignMask ORed in. UseOp = UseIter->GetOp(); if (!(UseOp.is_reg(X86_FLAGS_REG))) { UseOp.reg = MDCanonicalizeSubReg(UseOp.reg); UseHashValue = HashGlobalNameAndSSA(UseOp, UseIter->GetSSANum()); if (this->BasicBlock->IsLocalName(UseOp)) { this->BasicBlock->UpdateUseSignMiscInfo(UseHashValue, SignMask); } else { this->BasicBlock->GetFunc()->UpdateUseSignMiscInfo(UseHashValue, SignMask); } } ++UseIter; } } // end of case 3 (multiplies and divides) // Case 4: Calls to library functions can reveal the type of the return register. else if (case4) { // Get name of function called. assert(!(this->RTL.GetRT(0)->HasRightSubTree())); UseOp = this->RTL.GetRT(0)->GetRightOperand(); ea_t FuncAddr = UseOp.addr; char IDA_func_name[MAXSTR]; (void) get_func_name(FuncAddr, IDA_func_name, (size_t)(MAXSTR - 1)); size_t SkipCount = strspn(IDA_func_name, "._"); char *TempFuncName = &(IDA_func_name[SkipCount]); string FuncName(TempFuncName); // Get FG info, if any, for called function. GetLibFuncFGInfo(FuncName, FGEntry); // See if anything was returned in FGEntry. if ((FGEntry.SignMiscInfo != 0) || (FGEntry.SizeInfo != 0)) { // Need to update the FG info for the DEF of the return register. DefOp = InitOp; DefOp.type = o_reg; DefOp.reg = MD_RETURN_VALUE_REG; DefIter = this->FindDef(DefOp); assert(DefIter != this->GetLastDef()); DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum()); if (this->BasicBlock->IsLocalName(DefOp)) { this->BasicBlock->UpdateDefFGInfo(DefHashValue, FGEntry); } else { this->BasicBlock->GetFunc()->UpdateDefFGInfo(DefHashValue, FGEntry); } } } // end of case4 (function calls) // For all register DEFs and USEs, we should get the obvious register width info // updated. Need to use the RTL operands to get accurate widths. SMPRegTransfer *CurrRT; for (size_t index = 0; index < this->RTL.GetCount(); ++index) { CurrRT = this->RTL.GetRT(index); DefOp = CurrRT->GetLeftOperand(); // Avoid setting def width for case 2; we leave it as zero so that // later uses can determine whether the zero-extension or sign-extension // bits ever got used. See more discussion in EmitIntegerErrorAnnotations() // for the CHECK TRUNCATION case. // NOTE: case2 can be set to true even in the case1/case2 overlap case that // only passes through the case1 code above. This is intentional. We want // to leave the DEF width set to 0 for all of case2 including the case1 overlap. if (!case2 && MDIsGeneralPurposeReg(DefOp)) { WidthMask = ComputeOperandBitWidthMask(DefOp, 0); DefOp.reg = MDCanonicalizeSubReg(DefOp.reg); DefIter = this->FindDef(DefOp); assert(DefIter != this->GetLastDef()); DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum()); if (this->BasicBlock->IsLocalName(DefOp)) { this->BasicBlock->UpdateDefWidthTypeInfo(DefHashValue, WidthMask); } else { this->BasicBlock->GetFunc()->UpdateDefWidthTypeInfo(DefHashValue, WidthMask); } } if (CurrRT->HasRightSubTree()) { this->MDSetRTLRegWidthInfo(CurrRT->GetRightTree()); } else { UseOp = CurrRT->GetRightOperand(); this->SetRTLUseOpRegWidthInfo(UseOp); } } // end for all RTLs return; } // end of SMPInstr::MDSetWidthSignInfo() // Helper to set width info for a UseOp from an RTL void SMPInstr::SetRTLUseOpRegWidthInfo(op_t UseOp) { unsigned short WidthMask; set<DefOrUse, LessDefUse>::iterator UseIter; unsigned int UseHashValue; if (MDIsGeneralPurposeReg(UseOp)) { WidthMask = ComputeOperandBitWidthMask(UseOp, 0); UseOp.reg = MDCanonicalizeSubReg(UseOp.reg); UseIter = this->FindUse(UseOp); assert(UseIter != this->GetLastUse()); UseHashValue = HashGlobalNameAndSSA(UseOp, UseIter->GetSSANum()); if (this->BasicBlock->IsLocalName(UseOp)) { this->BasicBlock->UpdateUseWidthTypeInfo(UseHashValue, WidthMask); } else { this->BasicBlock->GetFunc()->UpdateUseWidthTypeInfo(UseHashValue, WidthMask); } } return; } // end of SMPInstr::SetRTLUseOpRegWidthInfo() // Walk the RTL and update the register USE operands' width info. void SMPInstr::MDSetRTLRegWidthInfo(SMPRegTransfer *CurrRT) { op_t UseOp; UseOp = CurrRT->GetLeftOperand(); this->SetRTLUseOpRegWidthInfo(UseOp); if (CurrRT->HasRightSubTree()) { this->MDSetRTLRegWidthInfo(CurrRT->GetRightTree()); } else { UseOp = CurrRT->GetRightOperand(); this->SetRTLUseOpRegWidthInfo(UseOp); } return; } // end of SMPInstr::MDSetRTLRegWidthInfo() // Infer DEF, USE, and RTL SMPoperator types within the instruction based on the type // of operator, the type category of the instruction, and the previously known types // of the operands. bool SMPInstr::InferTypes(void) { bool changed = false; // return value int SSANum; int TypeCategory = SMPTypeCategory[this->SMPcmd.itype]; set<DefOrUse, LessDefUse>::iterator CurrDef; set<DefOrUse, LessDefUse>::iterator CurrUse; op_t DefOp = InitOp, UseOp = InitOp; bool DebugFlag = false; bool UseFP = this->BasicBlock->GetFunc()->UsesFramePointer(); bool SafeFunc = this->BasicBlock->GetFunc()->IsSafe(); bool IsMemOp; #if SMP_VERBOSE_DEBUG_INFER_TYPES DebugFlag |= (0 == strcmp("InputMove", this->BasicBlock->GetFunc()->GetFuncName())); #endif if (DebugFlag) { msg("opcode: %d TypeCategory: %d\n", this->SMPcmd.itype, TypeCategory); } // If we are already finished with all types, return false. if (this->TypeInferenceComplete) return false; if (this->AllDEFsTyped() && this->AllUSEsTyped()) { this->TypeInferenceComplete = true; return false; } if (this->HasDestMemoryOperand()) { changed |= this->MDFindPointerUse(this->MDGetMemDefOp(), UseFP); } if (this->HasSourceMemoryOperand()) { changed |= this->MDFindPointerUse(this->MDGetMemUseOp(), UseFP); } // The control flow instructions can be handled simply based on their type // and do not need an RTL walk. SMPitype DFAType = this->GetDataFlowType(); bool CallInst = ((DFAType == CALL) || (DFAType == INDIR_CALL)); ushort IndirCallReg = R_none; if (DebugFlag) { msg("DFAType: %d CategoryInferenceComplete: %d\n", DFAType, this->CategoryInferenceComplete); } if (DFAType == INDIR_CALL) { op_t TargetOp = this->SMPcmd.Operands[0]; if (TargetOp.type == o_reg) IndirCallReg = TargetOp.reg; } if ((DFAType >= JUMP) && (DFAType <= INDIR_CALL)) { // All USEs are either the flags (NUMERIC) or the target address (CODEPTR). // The exceptions are the USE list for interrupt calls, which includes // the caller-saved regs, and indirect calls through a memory // operand, such as call [ebx+esi+20h], where the memory operand // is a CODEPTR but the addressing registers are a BaseReg and // IndexReg as in any other memory addressing, and the caller-saved // regs on any call. CurrUse = this->GetFirstUse(); while (CurrUse != this->GetLastUse()) { UseOp = CurrUse->GetOp(); if (UseOp.is_reg(X86_FLAGS_REG)) CurrUse = this->SetUseType(UseOp, NUMERIC); else if ((CurrUse->GetType() != CODEPTR) && (!(this->MDIsInterruptCall() && (o_reg == UseOp.type))) && (!(CallInst && MDIsCallerSavedReg(UseOp))) && (!(this->HasSourceMemoryOperand() && (INDIR_CALL == this->GetDataFlowType()) && (o_reg == UseOp.type)))) { CurrUse = this->SetUseType(UseOp, CODEPTR); if (DFAType == CALL) { // If the call is to malloc(), then the DEF of the return // register is of type HEAPPTR. changed |= this->MDFindMallocCall(UseOp); } } else if ((CurrUse->GetType() != CODEPTR) && CallInst && UseOp.is_reg(IndirCallReg)) { CurrUse = this->SetUseType(UseOp, CODEPTR); } ++CurrUse; } this->TypeInferenceComplete = true; return true; } // First, see if we can infer something about DEFs and USEs just from the // type category of the instruction. if (!this->CategoryInferenceComplete) { bool MemPropagate = false; switch (TypeCategory) { case 0: // no inference possible just from type category case 1: // no inference possible just from type category case 3: // MOV instructions; inference will come from source to dest in RTL walk. case 5: // binary arithmetic; inference will come in RTL walk. case 10: // binary arithmetic; inference will come in RTL walk. case 11: // push and pop instructions; inference will come in RTL walk. case 12: // exchange instructions; inference will come in RTL walk. this->CategoryInferenceComplete = true; break; case 2: // Result type is always NUMERIC. case 7: // Result type is always NUMERIC. case 8: // Result type is always NUMERIC. case 9: // Result type is always NUMERIC. case 13: // Result type is always NUMERIC. case 14: // Result type is always NUMERIC. case 15: // Result type is always NUMERIC. CurrDef = this->GetFirstDef(); while (CurrDef != this->GetLastDef()) { if (!IsEqType(NUMERIC, CurrDef->GetType())) { DefOp = CurrDef->GetOp(); SSANum = CurrDef->GetSSANum(); CurrDef = this->SetDefType(DefOp, NUMERIC); changed = true; // Be conservative and only propagate register DEFs and SAFE stack locs. We // can improve this in the future. **!!** bool IsMemOp = (o_reg != DefOp.type); bool MemPropagate = MDIsStackAccessOpnd(DefOp, UseFP); #if SMP_PROPAGATE_MEM_TYPES ; #else // Be conservative and only propagate register DEFs and SAFE stack locs. // We can improve this in the future. **!!** MemPropagate = MemPropagate && SafeFunc; #endif if ((o_reg == DefOp.type) || MemPropagate) { if (this->BasicBlock->IsLocalName(DefOp)) { (void) this->BasicBlock->PropagateLocalDefType(DefOp, NUMERIC, this->GetAddr(), SSANum, IsMemOp); } else { // global name this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false (void) this->BasicBlock->PropagateGlobalDefType(DefOp, NUMERIC, SSANum, IsMemOp); } } } ++CurrDef; } this->CategoryInferenceComplete = true; break; case 4: // Unary INC, DEC, etc.: dest=source, so type remains the same assert(1 == this->RTL.GetCount()); assert(this->RTL.GetRT(0)->HasRightSubTree()); UseOp = this->RTL.GetRT(0)->GetLeftOperand(); // USE == DEF CurrUse = this->Uses.FindRef(UseOp); assert(CurrUse != this->GetLastUse()); if (UNINIT != CurrUse->GetType()) { // Only one USE, and it has a type assigned, so assign that type // to the DEF. CurrDef = this->GetFirstDef(); while (CurrDef != this->GetLastDef()) { // Two DEFs: EFLAGS is NUMERIC, dest==source DefOp = CurrDef->GetOp(); SSANum = CurrDef->GetSSANum(); if (DefOp.is_reg(X86_FLAGS_REG)) { ; // SetImmedTypes already made it NUMERIC } else { CurrDef = this->SetDefType(DefOp, CurrUse->GetType()); // Be conservative and only propagate register DEFs and SAFE stack locs. We // can improve this in the future. **!!** bool IsMemOp = (o_reg != DefOp.type); MemPropagate = MDIsStackAccessOpnd(DefOp, UseFP); #if SMP_PROPAGATE_MEM_TYPES ; #else // Be conservative and only propagate register DEFs and SAFE stack locs. // We can improve this in the future. **!!** MemPropagate = MemPropagate && SafeFunc; #endif if ((o_reg == DefOp.type) || MemPropagate) { if (this->BasicBlock->IsLocalName(DefOp)) { (void) this->BasicBlock->PropagateLocalDefType(DefOp, CurrUse->GetType(), this->GetAddr(), SSANum, IsMemOp); } else { // global name this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false (void) this->BasicBlock->PropagateGlobalDefType(DefOp, CurrUse->GetType(), SSANum, IsMemOp); } } } ++CurrDef; } this->CategoryInferenceComplete = true; changed = true; this->TypeInferenceComplete = true; } break; case 6: // Result is always POINTER DefOp = this->GetFirstDef()->GetOp(); SSANum = this->GetFirstDef()->GetSSANum(); CurrDef = this->SetDefType(DefOp, POINTER); this->CategoryInferenceComplete = true; changed = true; // Be conservative and only propagate register DEFs and SAFE stack locs. We // can improve this in the future. **!!** IsMemOp = (o_reg != DefOp.type); MemPropagate = MDIsStackAccessOpnd(DefOp, UseFP); #if SMP_PROPAGATE_MEM_TYPES ; #else // Be conservative and only propagate register DEFs and SAFE stack locs. // We can improve this in the future. **!!** MemPropagate = MemPropagate && SafeFunc; #endif if ((o_reg == DefOp.type) || MemPropagate) { if (this->BasicBlock->IsLocalName(DefOp)) { (void) this->BasicBlock->PropagateLocalDefType(DefOp, POINTER, this->GetAddr(), SSANum, IsMemOp); } else { // global name this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false (void) this->BasicBlock->PropagateGlobalDefType(DefOp, POINTER, SSANum, IsMemOp); } } break; default: msg("ERROR: Unknown type category for %s\n", this->GetDisasm()); this->CategoryInferenceComplete = true; break; } // end switch on TypeCategory } // end if (!CategoryInference) // Walk the RTL and infer types based on operators and operands. if (DebugFlag) { msg("RTcount: %d\n", this->RTL.GetCount()); } for (size_t index = 0; index < this->RTL.GetCount(); ++index) { SMPRegTransfer *CurrRT = this->RTL.GetRT(index); if (SMP_NULL_OPERATOR == CurrRT->GetOperator()) // nothing to infer continue; changed |= this->InferOperatorType(CurrRT); if (DebugFlag) { msg("returned from InferOperatorType\n"); } } // end for all RTs in the RTL return changed; } // end of SMPInstr::InferTypes() // Infer the type of an operator within an RT based on the types of its operands and // based on the operator itself. Recurse down the tree if necessary. // Return true if the operator type of the RT is updated. bool SMPInstr::InferOperatorType(SMPRegTransfer *CurrRT) { bool updated = false; bool LeftNumeric, RightNumeric; bool LeftPointer, RightPointer; bool UseFP = this->BasicBlock->GetFunc()->UsesFramePointer(); bool SafeFunc = this->BasicBlock->GetFunc()->IsSafe(); set<DefOrUse, LessDefUse>::iterator CurrDef; set<DefOrUse, LessDefUse>::iterator CurrUse; set<DefOrUse, LessDefUse>::iterator LeftUse; set<DefOrUse, LessDefUse>::iterator RightUse; SMPOperandType LeftType = UNINIT; SMPOperandType RightType = UNINIT; SMPOperandType OperType = UNINIT; op_t UseOp = InitOp, DefOp = InitOp, LeftOp = InitOp, RightOp = InitOp; SMPoperator CurrOp = CurrRT->GetOperator(); bool DebugFlag = false; #if SMP_VERBOSE_DEBUG_INFER_TYPES #if 1 DebugFlag |= (0 == strcmp("InputMove", this->BasicBlock->GetFunc()->GetFuncName())); #endif DebugFlag = DebugFlag || ((this->address == 0x806453b) || (this->address == 0x806453e)); #endif #if SMP_VERBOSE_DEBUG_INFER_TYPES if (DebugFlag) { msg("Entered InferOperatorType for CurrOp: %d\n", CurrOp); } #endif switch (CurrOp) { case SMP_NULL_OPERATOR: break; case SMP_CALL: // CALL instruction if (UNINIT == CurrRT->GetOperatorType()) { CurrRT->SetOperatorType(CODEPTR, this); updated = true; UseOp = CurrRT->GetRightOperand(); CurrUse = this->Uses.FindRef(UseOp); assert(CurrUse != this->GetLastUse()); if (UNINIT == CurrUse->GetType()) { CurrUse = this->SetUseType(UseOp, CODEPTR); } else if (CODEPTR != CurrUse->GetType()) { msg("WARNING: call target is type %d, setting to CODEPTR at %x in %s\n", CurrUse->GetType(), this->GetAddr(), this->GetDisasm()); CurrUse = this->SetUseType(UseOp, CODEPTR); } } break; case SMP_INPUT: // input from port if (UNINIT == CurrRT->GetOperatorType()) { CurrRT->SetOperatorType(NUMERIC, this); updated = true; } break; case SMP_OUTPUT: // output to port if (UNINIT == CurrRT->GetOperatorType()) { CurrRT->SetOperatorType(NUMERIC, this); updated = true; } break; case SMP_SIGN_EXTEND: case SMP_ZERO_EXTEND: break; case SMP_ADDRESS_OF: // take effective address if (UNINIT == CurrRT->GetOperatorType()) { CurrRT->SetOperatorType(POINTER, this); // Left operand is having its address taken, but we cannot infer what its // type is. updated = true; } break; case SMP_U_LEFT_SHIFT: // unsigned left shift case SMP_S_LEFT_SHIFT: // signed left shift case SMP_U_RIGHT_SHIFT: // unsigned right shift case SMP_S_RIGHT_SHIFT: // signed right shift case SMP_ROTATE_LEFT: case SMP_ROTATE_LEFT_CARRY: // rotate left through carry case SMP_ROTATE_RIGHT: case SMP_ROTATE_RIGHT_CARRY: // rotate right through carry case SMP_U_MULTIPLY: case SMP_S_MULTIPLY: case SMP_U_DIVIDE: case SMP_S_DIVIDE: case SMP_U_REMAINDER: case SMP_BITWISE_NOT: // unary operator case SMP_BITWISE_XOR: case SMP_NEGATE: // unary negation case SMP_S_COMPARE: // signed compare (subtraction-based) case SMP_U_COMPARE: // unsigned compare (AND-based) case SMP_LESS_THAN: // boolean test operators case SMP_GREATER_THAN: case SMP_LESS_EQUAL: case SMP_GREATER_EQUAL: case SMP_EQUAL: case SMP_NOT_EQUAL: case SMP_LOGICAL_AND: case SMP_LOGICAL_OR: case SMP_UNARY_NUMERIC_OPERATION: // miscellaneous; produces NUMERIC result case SMP_BINARY_NUMERIC_OPERATION: // miscellaneous; produces NUMERIC result case SMP_SYSTEM_OPERATION: // for instructions such as CPUID, RDTSC, etc.; NUMERIC case SMP_UNARY_FLOATING_ARITHMETIC: // all the same to our type system; all NUMERIC case SMP_BINARY_FLOATING_ARITHMETIC: // all the same to our type system; all NUMERIC case SMP_REVERSE_SHIFT_U: // all the same to our type system; all NUMERIC case SMP_SHUFFLE: // all the same to our type system; all NUMERIC case SMP_COMPARE_EQ_AND_SET: // packed compare for equality and set bits; all NUMERIC case SMP_COMPARE_GT_AND_SET: // packed compare for greater-than and set bits; all NUMERIC case SMP_INTERLEAVE: // interleave fields from two packed operands; NUMERIC case SMP_CONCATENATE: // all the same to our type system; all NUMERIC if (UNINIT == CurrRT->GetOperatorType()) { CurrRT->SetOperatorType(NUMERIC, this); updated = true; } // Left operand should be NUMERIC if it exists. UseOp = CurrRT->GetLeftOperand(); if (UseOp.type != o_void) { CurrUse = this->Uses.FindRef(UseOp); if (CurrUse == this->GetLastUse()) { msg("WARNING: Adding missing USE of "); PrintOperand(UseOp); msg(" in %s\n", this->GetDisasm()); this->Uses.SetRef(UseOp, NUMERIC, -1); updated = true; } else if (UNINIT == CurrUse->GetType()) { CurrUse = this->SetUseType(UseOp, NUMERIC); updated = true; } } // Right operand should be NUMERIC if it exists. if (CurrRT->HasRightSubTree()) { // Recurse into subtree updated |= this->InferOperatorType(CurrRT->GetRightTree()); } else { UseOp = CurrRT->GetRightOperand(); if (UseOp.type != o_void) { CurrUse = this->Uses.FindRef(UseOp); if (CurrUse == this->GetLastUse()) { msg("WARNING: Adding missing USE of "); PrintOperand(UseOp); msg(" in %s\n", this->GetDisasm()); this->Uses.SetRef(UseOp, NUMERIC, -1); updated = true; } else if (UNINIT == CurrUse->GetType()) { CurrUse = this->SetUseType(UseOp, NUMERIC); updated = true; } } } break; case SMP_INCREMENT: case SMP_DECREMENT: // The type of the right operand is propagated to the operator, or vice // versa, whichever receives a type first. assert(!CurrRT->HasRightSubTree()); UseOp = CurrRT->GetLeftOperand(); assert(o_void != UseOp.type); CurrUse = this->Uses.FindRef(UseOp); if (CurrUse == this->GetLastUse()) { msg("WARNING: Adding missing USE of "); PrintOperand(UseOp); msg(" at %x in %s\n", this->GetAddr(), this->GetDisasm()); this->Uses.SetRef(UseOp); updated = true; break; } if (UNINIT == CurrRT->GetOperatorType()) { if (UNINIT != CurrUse->GetType()) { // Propagate operand type up to the operator. CurrRT->SetOperatorType(CurrUse->GetType(), this); updated = true; } } else if (UNINIT == CurrUse->GetType()) { // Propagate operator type to operand. CurrUse = this->SetUseType(UseOp, CurrRT->GetOperatorType()); updated = true; } break; case SMP_ADD: case SMP_ADD_CARRY: // add with carry case SMP_BITWISE_AND: case SMP_BITWISE_OR: // Extract the current types of right and left operands and the operator. LeftOp = CurrRT->GetLeftOperand(); CurrUse = this->Uses.FindRef(LeftOp); assert(CurrUse != this->GetLastUse()); // found it LeftType = CurrUse->GetType(); if (CurrRT->HasRightSubTree()) { RightType = CurrRT->GetRightTree()->GetOperatorType(); } else { RightOp = CurrRT->GetRightOperand(); if (o_void == RightOp.type) { msg("ERROR: void operand in %s\n", this->GetDisasm()); return false; } else { CurrUse = this->Uses.FindRef(RightOp); if (CurrUse == this->GetLastUse()) { msg("WARNING: Adding missing USE of "); PrintOperand(RightOp); msg(" in %s\n", this->GetDisasm()); this->Uses.SetRef(RightOp); updated = true; break; } else { RightType = CurrUse->GetType(); } } } // We have to know both operand types to infer the operator, or know the // operator type to infer the operand types. if ((UNINIT == CurrRT->GetOperatorType()) && ((UNINIT == LeftType) || (UNINIT == RightType))) break; // If both operands are NUMERIC, operator and result are NUMERIC. // If one operand is NUMERIC and the other is a pointer type, // then the ADD operator and the result will inherit this second type, // while AND and OR operators will remain UNINIT (we don't know what // type "ptr AND 0xfffffff8" has until we see how it is used). LeftNumeric = IsEqType(NUMERIC, LeftType); RightNumeric = IsEqType(NUMERIC, RightType); LeftPointer = IsDataPtr(LeftType); RightPointer = IsDataPtr(RightType); if (UNINIT == CurrRT->GetOperatorType()) { // Infer operator type from left and right operands. if (LeftNumeric && RightNumeric) { CurrRT->SetOperatorType(NUMERIC, this); updated = true; break; } else if (LeftNumeric || RightNumeric) { // ADD of NUMERIC to non-NUMERIC preserves non-NUMERIC type. // AND and OR operations should leave the operator UNINIT for now. if (LeftNumeric && (UNINIT != RightType) && ((SMP_ADD == CurrOp) || (SMP_ADD_CARRY == CurrOp))) { CurrRT->SetOperatorType(RightType, this); updated = true; break; } else if (RightNumeric && (UNINIT != LeftType) && ((SMP_ADD == CurrOp) || (SMP_ADD_CARRY == CurrOp))) { CurrRT->SetOperatorType(LeftType, this); updated = true; break; } } else if (LeftPointer && RightPointer) { // Arithmetic on two pointers if ((SMP_ADD == CurrOp) || (SMP_ADD_CARRY == CurrOp)) { CurrRT->SetOperatorType(UNKNOWN, this); updated = true; } else { // bitwise AND or OR of two pointers msg("WARNING: hash of two pointers at %x in %s\n", this->GetAddr(), this->GetDisasm()); // hash operation? leave operator as UNINIT } break; } else if ((LeftPointer && IsEqType(RightType, PTROFFSET)) || (RightPointer && IsEqType(LeftType, PTROFFSET))) { // Arithmetic on PTR and PTROFFSET if ((SMP_ADD == CurrOp) || (SMP_ADD_CARRY == CurrOp)) { // We assume (A-B) is being added to B or vice versa **!!** CurrRT->SetOperatorType(POINTER, this); updated = true; } else { // bitwise AND or OR of pointer and pointer difference msg("WARNING: hash of PTROFFSET and POINTER at %x in %s\n", this->GetAddr(), this->GetDisasm()); // hash operation? leave operator as UNINIT } break; } } // end if UNINIT operator type else { // operator has type other than UNINIT // We make add-with-carry and subtract-with-borrow exceptions // to the type propagation. LeftOp could have POINTER type // inferred later; these instructions can change the type of // the register from POINTER to NUMERIC, unlike regular // add and subtract opcodes. if ((UNINIT == LeftType) && (SMP_ADD_CARRY != CurrOp)) { CurrUse = this->SetUseType(LeftOp, CurrRT->GetOperatorType()); updated = true; assert(CurrUse != this->GetLastUse()); break; } if (CurrRT->HasRightSubTree()) { // Must need to iterate through the right tree again, as the operator // has been typed. if (UNINIT == RightType) { CurrRT->GetRightTree()->SetOperatorType(CurrRT->GetOperatorType(), this); updated = true; } updated |= this->InferOperatorType(CurrRT->GetRightTree()); break; } else { // right operand; propagate operator type if needed if (UNINIT == RightType) { CurrUse = this->SetUseType(RightOp, CurrRT->GetOperatorType()); updated = true; assert(CurrUse != this->GetLastUse()); break; } } } break; case SMP_SUBTRACT: case SMP_SUBTRACT_BORROW: // subtract with borrow // Extract the current types of right and left operands and the operator. OperType = CurrRT->GetOperatorType(); LeftOp = CurrRT->GetLeftOperand(); LeftUse = this->Uses.FindRef(LeftOp); assert(LeftUse != this->GetLastUse()); // found it LeftType = LeftUse->GetType(); if (CurrRT->HasRightSubTree()) { RightType = CurrRT->GetRightTree()->GetOperatorType(); } else { RightOp = CurrRT->GetRightOperand(); if (o_void == RightOp.type) { msg("ERROR: void operand in %s\n", this->GetDisasm()); return false; } else { RightUse = this->Uses.FindRef(RightOp); if (RightUse == this->GetLastUse()) { msg("WARNING: Adding missing USE of "); PrintOperand(RightOp); msg(" in %s\n", this->GetDisasm()); this->Uses.SetRef(RightOp); updated = true; break; } else { RightType = RightUse->GetType(); } } } // If left operand is NUMERIC, operator is NUMERIC. LeftNumeric = IsEqType(NUMERIC, LeftType); RightNumeric = IsEqType(NUMERIC, RightType); LeftPointer = IsDataPtr(LeftType); RightPointer = IsDataPtr(RightType); if (LeftNumeric) { // Subtracting anything from a NUMERIC leaves it NUMERIC. if (UNINIT == OperType) { CurrRT->SetOperatorType(NUMERIC, this); updated = true; } else if (NUMERIC != OperType) { msg("ERROR: SMP_SUBTRACT from NUMERIC should be NUMERIC operator."); msg(" Operator type is %d in: %s\n", OperType, this->GetDisasm()); } #if 0 if (!RightNumeric) { // Right operand is being used as a NUMERIC, so propagate NUMERIC to it. if (CurrRT->HasRightSubTree()) { CurrRT->GetRightTree()->SetOperatorType(NUMERIC, this); } else { RightUse = this->SetUseType(RightOp, NUMERIC); } updated = true; } #endif } // end if LeftNumeric else if (LeftPointer) { if (UNINIT == OperType) { // If we subtract another pointer type, we produce PTROFFSET. if (RightPointer) { CurrRT->SetOperatorType(PTROFFSET, this); updated = true; } else if (RightType == PTROFFSET) { // We assume B - (B - A) == A **!!** CurrRT->SetOperatorType(POINTER, this); msg("WARNING: PTR - PTROFFSET produces PTR in %s\n", this->GetDisasm()); updated = true; } else if (RightNumeric) { // pointer minus NUMERIC keeps same pointer type CurrRT->SetOperatorType(LeftType, this); updated = true; } } else { // we have an operator type for the SMP_SUBTRACT bool OperatorPointer = IsDataPtr(OperType); if (CurrRT->HasRightSubTree()) { // Must need to iterate through the right tree again, as the operator // has been typed. if (UNINIT == RightType) { if (OperatorPointer) { // PTR := PTR - ?? ==> ?? is NUMERIC CurrRT->GetRightTree()->SetOperatorType(NUMERIC, this); updated = true; } else if (OperType == PTROFFSET) { // PTROFFSET := PTR - ?? ==> ?? is PTR CurrRT->GetRightTree()->SetOperatorType(LeftType, this); updated = true; } } updated |= this->InferOperatorType(CurrRT->GetRightTree()); break; } else { // right operand; propagate operator type if needed if (UNINIT == RightType) { if (OperatorPointer) { // PTR := PTR - ?? ==> ?? is NUMERIC RightUse = this->SetUseType(RightOp, NUMERIC); updated = true; assert(RightUse != this->GetLastUse()); } else if (OperType == PTROFFSET) { // PTROFFSET := PTR - ?? ==> ?? is PTR RightUse = this->SetUseType(RightOp, LeftType); updated = true; } break; } } } // end if OperType is UNINIT ... else ... } // end if LeftNumeric ... else if LeftPointer ... else if (UNINIT == LeftType) { // We make add-with-carry and subtract-with-borrow exceptions // to the type propagation. LeftOp could have POINTER type // inferred later; these instructions can change the type of // the register from POINTER to NUMERIC, unlike regular // add and subtract opcodes. if ((UNINIT != OperType) && (SMP_SUBTRACT_BORROW != CurrOp)) { LeftUse = this->SetUseType(LeftOp, OperType); assert(LeftUse != this->GetLastUse()); updated = true; } } break; case SMP_ASSIGN: // Extract the current types of right and left operands and SMP_ASSIGN operator. OperType = CurrRT->GetOperatorType(); DefOp = CurrRT->GetLeftOperand(); CurrDef = this->Defs.FindRef(DefOp); assert(CurrDef != this->GetLastDef()); // found it LeftType = CurrDef->GetType(); if (CurrRT->HasRightSubTree()) { RightType = CurrRT->GetRightTree()->GetOperatorType(); } else { UseOp = CurrRT->GetRightOperand(); if (o_void == UseOp.type) { msg("ERROR: void operand for SMP_ASSIGN in %s\n", this->GetDisasm()); return false; } else { CurrUse = this->Uses.FindRef(UseOp); if (CurrUse == this->GetLastUse()) { msg("WARNING: Adding missing USE of "); PrintOperand(UseOp); msg(" in %s\n", this->GetDisasm()); this->Uses.SetRef(UseOp); updated = true; break; } else { RightType = CurrUse->GetType(); } } } // We keep it simple by only trying to propagate one step at a time, from // the right operand or tree up to the SMP_ASSIGN operator, then from // the operator to the left (DEF) operand, or from left up to operator // and down the right, depending on where the existing types are. if (DebugFlag) { msg("%x LeftType: %d OperatorType: %d RightType: %d\n", this->address, LeftType, OperType, RightType); } if ((UNINIT == RightType) && (UNINIT == LeftType)) { // We will only try to solve the right hand side on this iteration. if (CurrRT->HasRightSubTree()) { updated |= this->InferOperatorType(CurrRT->GetRightTree()); } break; } else if (UNINIT == OperType) { // UNINIT SMP_ASSIGN operator, but either LeftType or RightType is not UNINIT. if (UNINIT != RightType) { // We have to special case conditional moves. Only if both operands // (the source and the prior value of the potential destination, // which was added to the USE set by BuildMoveRTL()) agree in type // can we propagate their common type to the operator and ultimately // to the DEF. if ((!this->MDIsConditionalMoveInstr()) || this->Uses.TypesAgreeNoFlags()) { CurrRT->SetOperatorType(RightType, this); updated = true; } } else { CurrRT->SetOperatorType(LeftType, this); updated = true; } break; } else if (UNINIT == LeftType) { // SMP_ASSIGN operator has type, so propagate it. LeftType = OperType; CurrDef = this->SetDefType(DefOp, OperType); updated = true; // Propagate the new DEF type unless it is an indirect memory access. // Future: Propagate until re-DEF of addressing register terminates // the propagation. **!!** if (!MDIsIndirectMemoryOpnd(DefOp, this->BasicBlock->GetFunc()->UsesFramePointer())) { bool IsMemOp = (o_reg != DefOp.type); bool MemPropagate = MDIsStackAccessOpnd(DefOp, UseFP); #if SMP_PROPAGATE_MEM_TYPES ; #else // Be conservative and only propagate register DEFs and SAFE stack locs. // We can improve this in the future. **!!** MemPropagate = MemPropagate && SafeFunc; #endif if ((o_reg == DefOp.type) || MemPropagate) { int SSANum = CurrDef->GetSSANum(); if (this->BasicBlock->IsLocalName(DefOp)) { (void) this->BasicBlock->PropagateLocalDefType(DefOp, LeftType, this->GetAddr(), SSANum, IsMemOp); } else { // global name this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false (void) this->BasicBlock->PropagateGlobalDefType(DefOp, LeftType, SSANum, IsMemOp); } } } break; } else if (UNINIT == RightType) { // SMP_ASSIGN operator has type, so propagate it. if (CurrRT->HasRightSubTree()) { CurrRT->GetRightTree()->SetOperatorType(OperType, this); updated = true; updated |= this->InferOperatorType(CurrRT->GetRightTree()); } else { // For conditional moves, propagate to the pseudo-USE of the // destination register as well as the source operand. if (this->MDIsConditionalMoveInstr()) { CurrUse = this->FindUse(DefOp); assert(CurrUse != this->GetLastUse()); if (UNINIT == CurrUse->GetType()) CurrUse = this->SetUseType(DefOp, OperType); else if (OperType != CurrUse->GetType()) { msg("WARNING: Avoiding lattice oscillation from type %d to %d at %x for: ", CurrUse->GetType(), OperType, this->address); PrintOperand(CurrUse->GetOp()); msg("\n"); } } CurrUse = this->SetUseType(UseOp, OperType); updated = true; } break; } break; default: msg("ERROR: Unknown operator in %s\n", this->GetDisasm()); break; } // end switch on operator return updated; } // end of SMPInstr::InferOperatorType() // Transfer function: Does operator propagate signedness of its operands to its result? bool SMPInstr::DoesOperatorTransferSign(SMPoperator CurrOp) { bool transfer = false; switch (CurrOp) { case SMP_NULL_OPERATOR: case SMP_CALL: // CALL instruction case SMP_INPUT: // input from port case SMP_OUTPUT: // output to port case SMP_SYSTEM_OPERATION: // for instructions such as CPUID, RDTSC, etc.; NUMERIC // No concept of signedness for some operators break; case SMP_ADDRESS_OF: // take effective address case SMP_U_LEFT_SHIFT: // unsigned left shift case SMP_U_RIGHT_SHIFT: // unsigned right shift case SMP_ROTATE_LEFT: case SMP_ROTATE_LEFT_CARRY: // rotate left through carry case SMP_ROTATE_RIGHT: case SMP_ROTATE_RIGHT_CARRY: // rotate right through carry case SMP_U_MULTIPLY: case SMP_U_DIVIDE: case SMP_U_REMAINDER: case SMP_ZERO_EXTEND: case SMP_BITWISE_NOT: // unary operator case SMP_BITWISE_XOR: case SMP_BITWISE_AND_NOT: case SMP_U_COMPARE: // unsigned compare (AND-based) case SMP_S_LEFT_SHIFT: // signed left shift case SMP_S_RIGHT_SHIFT: // signed right shift case SMP_S_MULTIPLY: case SMP_S_DIVIDE: case SMP_SIGN_EXTEND: case SMP_NEGATE: // unary negation case SMP_S_COMPARE: // signed compare (subtraction-based) case SMP_LESS_THAN: // boolean test operators case SMP_GREATER_THAN: case SMP_LESS_EQUAL: case SMP_GREATER_EQUAL: // Inherently unsigned and signed operators force the signedness // of their results, rather than propagating the signedness of // their operands. break; case SMP_DECREMENT: case SMP_INCREMENT: case SMP_ADD: case SMP_ADD_CARRY: // add with carry case SMP_SUBTRACT: case SMP_SUBTRACT_BORROW: // subtract with borrow case SMP_ASSIGN: case SMP_BITWISE_AND: case SMP_BITWISE_OR: case SMP_EQUAL: case SMP_NOT_EQUAL: case SMP_LOGICAL_AND: case SMP_LOGICAL_OR: case SMP_UNARY_NUMERIC_OPERATION: // miscellaneous; produces NUMERIC result case SMP_BINARY_NUMERIC_OPERATION: // miscellaneous; produces NUMERIC result transfer = true; break; case SMP_UNARY_FLOATING_ARITHMETIC: // all the same to our type system; all NUMERIC case SMP_BINARY_FLOATING_ARITHMETIC: // all the same to our type system; all NUMERIC case SMP_REVERSE_SHIFT_U: // Shift right operand by bit count in left operand case SMP_SHUFFLE: // Shuffle bytes, words, etc. within destination operation per source mask case SMP_COMPARE_EQ_AND_SET: // Compare for equality and set fields to all 1's or all 0's case SMP_COMPARE_GT_AND_SET: // Compare for greater-than and set fields to all 1's or all 0's case SMP_INTERLEAVE: // extended-precision interleaving of bytes or words or dwords etc.; NUMERIC case SMP_CONCATENATE: // extended-precision concatenation; NUMERIC transfer = true; break; default: msg("ERROR: Unknown operator in %s\n", this->GetDisasm()); break; } // end switch on operator return transfer; } // end of SMPInstr::DoesOperatorTransferSign() // Initial inferences (if any) about FG info of operand based solely on the RTL operator type above it in RTL. bool SMPInstr::InitFGInfoFromOperator(SMPoperator CurrOp, struct FineGrainedInfo &InitFG) { bool changed = false; switch (CurrOp) { case SMP_NULL_OPERATOR: break; case SMP_CALL: // CALL instruction InitFG.SignMiscInfo |= FG_MASK_UNSIGNED; // target address is unsigned 32-bit InitFG.SizeInfo |= (MD_NORMAL_BITWIDTH_MASK | FG_MASK_CODEPOINTER); changed = true; break; case SMP_INPUT: // input from port case SMP_OUTPUT: // output to port case SMP_ADDRESS_OF: // take effective address case SMP_U_COMPARE: // unsigned compare (AND-based) case SMP_S_COMPARE: // signed compare (subtraction-based) // NOTE: The AND-based and subtraction-based comparisons are used // on lots of operands of all types, and the conditional jump that // follows determines signedness, not the operator. break; case SMP_U_LEFT_SHIFT: // unsigned left shift case SMP_U_RIGHT_SHIFT: // unsigned right shift case SMP_ROTATE_LEFT: case SMP_ROTATE_LEFT_CARRY: // rotate left through carry case SMP_ROTATE_RIGHT: case SMP_ROTATE_RIGHT_CARRY: // rotate right through carry case SMP_U_MULTIPLY: case SMP_U_DIVIDE: case SMP_U_REMAINDER: case SMP_ZERO_EXTEND: case SMP_BITWISE_NOT: // unary operator case SMP_BITWISE_XOR: case SMP_BITWISE_AND_NOT: InitFG.SignMiscInfo |= FG_MASK_UNSIGNED; changed = true; break; case SMP_S_LEFT_SHIFT: // signed left shift case SMP_S_RIGHT_SHIFT: // signed right shift case SMP_S_MULTIPLY: case SMP_S_DIVIDE: case SMP_SIGN_EXTEND: case SMP_NEGATE: // unary negation case SMP_LESS_THAN: // boolean test operators case SMP_GREATER_THAN: case SMP_LESS_EQUAL: case SMP_GREATER_EQUAL: InitFG.SignMiscInfo |= FG_MASK_SIGNED; changed = true; break; case SMP_DECREMENT: case SMP_INCREMENT: case SMP_ADD: case SMP_ADD_CARRY: // add with carry case SMP_SUBTRACT: case SMP_SUBTRACT_BORROW: // subtract with borrow case SMP_ASSIGN: case SMP_BITWISE_AND: case SMP_BITWISE_OR: case SMP_EQUAL: case SMP_NOT_EQUAL: case SMP_LOGICAL_AND: case SMP_LOGICAL_OR: case SMP_UNARY_NUMERIC_OPERATION: // miscellaneous; produces NUMERIC result case SMP_BINARY_NUMERIC_OPERATION: // miscellaneous; produces NUMERIC result case SMP_SYSTEM_OPERATION: // for instructions such as CPUID, RDTSC, etc.; NUMERIC break; case SMP_UNARY_FLOATING_ARITHMETIC: // all the same to our type system; all NUMERIC case SMP_BINARY_FLOATING_ARITHMETIC: // all the same to our type system; all NUMERIC InitFG.SignMiscInfo |= FG_MASK_SIGNED; InitFG.SizeInfo |= FG_MASK_FLOAT_MMX; changed = true; break; case SMP_REVERSE_SHIFT_U: // Shift right operand by bit count in left operand case SMP_SHUFFLE: // Shuffle bytes, words, etc. within destination operation per source mask case SMP_COMPARE_EQ_AND_SET: // Compare for equality and set fields to all 1's or all 0's case SMP_COMPARE_GT_AND_SET: // Compare for greater-than and set fields to all 1's or all 0's case SMP_INTERLEAVE: // extended-precision interleaving of bytes or words or dwords etc.; NUMERIC case SMP_CONCATENATE: // extended-precision concatenation; NUMERIC InitFG.SignMiscInfo |= FG_MASK_SIGNED; InitFG.SizeInfo |= (FG_MASK_FLOAT_MMX | FG_MASK_BITWIDTH_128); changed = true; break; default: msg("ERROR: Unknown operator in %s\n", this->GetDisasm()); break; } // end switch on operator return changed; } // end of SMPInstr::InitFGInfoFromOperator() // helper for InferOperatorFGInfo() to update DEF maps, return true if changed maps bool SMPInstr::UpdateDefOpFGInfo(op_t DefOp, struct FineGrainedInfo NewFG) { bool MapsChanged = false; // Changes to maps of name/SSA to FG info? set<DefOrUse, LessDefUse>::iterator DefIter; int SSANum; int DefHashValue; op_t SearchOp; bool LocalName; struct FineGrainedInfo OldFG, UnionFG; // If operator is inherently signed, then we will have // a sign bit set in NewFG from InitFGInfoFromOperator(). DefIter = this->FindDef(DefOp); assert(DefIter != this->GetLastDef()); SSANum = DefIter->GetSSANum(); SearchOp = DefOp; SearchOp.reg = MDCanonicalizeSubReg(DefOp.reg); DefHashValue = HashGlobalNameAndSSA(SearchOp, SSANum); LocalName = this->BasicBlock->IsLocalName(DefOp); if (LocalName) { // Get old FG info from block level. OldFG = this->BasicBlock->GetDefFGInfo(DefHashValue); } else { // global name // Get old FG info from function level. OldFG = this->BasicBlock->GetFunc()->GetDefFGInfo(DefHashValue); } UnionFG.SignMiscInfo = OldFG.SignMiscInfo | NewFG.SignMiscInfo; UnionFG.SizeInfo = OldFG.SizeInfo | NewFG.SizeInfo; if ((OldFG.SignMiscInfo != UnionFG.SignMiscInfo) || (OldFG.SizeInfo != UnionFG.SizeInfo)) { // The signs they are a-changin'. MapsChanged = true; if (LocalName) this->BasicBlock->UpdateDefFGInfo(DefHashValue, UnionFG); else this->BasicBlock->GetFunc()->UpdateDefFGInfo(DefHashValue, UnionFG); } return MapsChanged; } // end of SMPInstr::UpdateDefOpFGInfo() // helper for InferOperatorFGInfo() to update USE maps, return true if changed maps bool SMPInstr::UpdateUseOpFGInfo(op_t UseOp, struct FineGrainedInfo NewFG) { bool MapsChanged = false; // Changes to maps of name/SSA to FG info? set<DefOrUse, LessDefUse>::iterator UseIter; int SSANum; int UseHashValue; op_t SearchOp; bool LocalName; struct FineGrainedInfo OldFG, UnionFG; // If operator is inherently signed, then we will have // a sign bit set in NewFG from InitFGInfoFromOperator(). UseIter = this->FindUse(UseOp); assert(UseIter != this->GetLastUse()); SSANum = UseIter->GetSSANum(); SearchOp = UseOp; SearchOp.reg = MDCanonicalizeSubReg(UseOp.reg); UseHashValue = HashGlobalNameAndSSA(SearchOp, SSANum); LocalName = this->BasicBlock->IsLocalName(UseOp); if (LocalName) { // Get old FG info from block level. OldFG = this->BasicBlock->GetUseFGInfo(UseHashValue); } else { // global name // Get old FG info from function level. OldFG = this->BasicBlock->GetFunc()->GetUseFGInfo(UseHashValue); } UnionFG.SignMiscInfo = OldFG.SignMiscInfo | NewFG.SignMiscInfo; UnionFG.SizeInfo = OldFG.SizeInfo | NewFG.SizeInfo; if ((OldFG.SignMiscInfo != UnionFG.SignMiscInfo) || (OldFG.SizeInfo != UnionFG.SizeInfo)) { // The signs they are a-changin'. MapsChanged = true; if (LocalName) this->BasicBlock->UpdateUseFGInfo(UseHashValue, UnionFG); else this->BasicBlock->GetFunc()->UpdateUseFGInfo(UseHashValue, UnionFG); } return MapsChanged; } // end of SMPInstr::UpdateUseOpFGInfo() // Helper to fetch DEF signedness info for UseOp that has none. unsigned short SMPInstr::GetDefSignInfoFromUseOp(op_t UseOp) { set<DefOrUse, LessDefUse>::iterator UseIter; int SSANum, UseHashValue; op_t SearchOp; bool LocalName; UseIter = this->FindUse(UseOp); assert(UseIter != this->GetLastUse()); SSANum = UseIter->GetSSANum(); SearchOp = UseOp; SearchOp.reg = MDCanonicalizeSubReg(UseOp.reg); UseHashValue = HashGlobalNameAndSSA(SearchOp, SSANum); LocalName = this->BasicBlock->IsLocalName(SearchOp); if (LocalName) { // Get old sign info from block level. return this->BasicBlock->GetDefSignMiscInfo(UseHashValue); } else { // global name // Get old sign info from function level. return this->BasicBlock->GetFunc()->GetDefSignMiscInfo(UseHashValue); } } // end of SMPInstr::GetDefSignInfoFromUseOp() // infer FG info, + width on FirstIter; pass out FG info for op subtree, return true if change made to any FG info map. bool SMPInstr::InferOperatorFGInfo(SMPRegTransfer *CurrRT, bool FirstIter, struct FineGrainedInfo &OpFG) { // INCOMPLETE: Just doing the (FirstIter == true) work. bool MapsChanged = false; // Changes to maps of name/SSA to FG info? bool NewChange = false; // Bit changes from InitFGInfoFromOperator() ? SMPoperator CurrOp = CurrRT->GetOperator(); struct FineGrainedInfo LeftFG, OldLeftFG; struct FineGrainedInfo RightFG, OldRightFG; op_t LeftOp, RightOp; unsigned short WidthMask, SignMask; bool CurrOpTransfersSign = this->DoesOperatorTransferSign(CurrOp); // Recurse to the right first, so we can do a depth-first accumulation of FG info. RightFG.SignMiscInfo = 0; RightFG.SizeInfo = 0; if (CurrRT->HasRightSubTree()) { if (FirstIter) { // Get width as well as signedness NewChange = this->InitFGInfoFromOperator(CurrOp, RightFG); } // end if (FirstIter) MapsChanged |= this->InferOperatorFGInfo(CurrRT->GetRightTree(), FirstIter, RightFG); } else { RightOp = CurrRT->GetRightOperand(); if ((RightOp.type == o_reg) && !RightOp.is_reg(MD_INSTRUCTION_POINTER_REG)) { if (FirstIter) { // Get width as well as signedness NewChange = this->InitFGInfoFromOperator(CurrOp, RightFG); WidthMask = ComputeOperandBitWidthMask(RightOp, 0); RightFG.SizeInfo |= WidthMask; } // end if (FirstIter) // Propagate signedness on all iterations. // If operator is inherently signed, then we will have // a sign bit set in RightFG from InitFGInfoFromOperator(). if ((RightFG.SignMiscInfo == 0) && CurrOpTransfersSign) { // We have a USE with no sign info. See if we // can get sign info from the DEF of this USE so we can // transfer it up the RTL tree. RightFG.SignMiscInfo = (FG_MASK_SIGNEDNESS_BITS & (this->GetDefSignInfoFromUseOp(RightOp))); } if ((RightFG.SignMiscInfo != 0) || (RightFG.SizeInfo != 0)) MapsChanged |= this->UpdateUseOpFGInfo(RightOp, RightFG); } // end if (RightOP is o_reg) } // end if (right subtree) else right operand LeftFG.SignMiscInfo = 0; LeftFG.SizeInfo = 0; LeftOp = CurrRT->GetLeftOperand(); // Skip control-flow assignments to the instruction pointer register. if ((LeftOp.type == o_reg) && !LeftOp.is_reg(MD_INSTRUCTION_POINTER_REG)) { bool OpIsDEF = (SMP_ASSIGN == CurrOp); if (FirstIter) { // Get width as well as signedness NewChange = this->InitFGInfoFromOperator(CurrOp, LeftFG); // Special case: For sign-extended and zero-extended loads, // we don't know whether the DEF will always be USEd as // the smaller or larger size. For example, we could // zero-extend a 16-bit stack location into a 32-bit register // just because the compiler always loads unsigned shorts // that way, but we might never use it as a 32-bit value. // So there is no truncation if we store only 16 bits later. // By setting the target of an extended load to zero width, // we signal that we want the maximum USE width to determine // whether the store is truncated (see EmitIntegerErrorAnnotations). WidthMask = ComputeOperandBitWidthMask(LeftOp, 0); if (OpIsDEF) { if (this->MDIsSignedLoad(SignMask)) { WidthMask = 0; } // DEF inherits sign from right hand side. LeftFG.SignMiscInfo |= RightFG.SignMiscInfo; } else if ((LeftFG.SignMiscInfo == 0) && CurrOpTransfersSign) { // We have a USE, not a DEF, with no sign info. See if we // can get sign info from the DEF of this USE so we can // transfer it up the RTL tree. LeftFG.SignMiscInfo = (FG_MASK_SIGNEDNESS_BITS & (this->GetDefSignInfoFromUseOp(LeftOp))); } LeftFG.SizeInfo |= WidthMask; if ((LeftFG.SignMiscInfo != 0) || (LeftFG.SizeInfo != 0)) { // Either NewChanged or CurrOpTransfersSign is true or we set WidthMask above. // See if we would change the FG map entry. if (OpIsDEF) { // Need DEF map info MapsChanged |= this->UpdateDefOpFGInfo(LeftOp, LeftFG); } else { // need USE map info MapsChanged |= this->UpdateUseOpFGInfo(LeftOp, LeftFG); } } // end if non-zero LeftFG info } // end if (FirstIter) } // Prepare to return FG info for operator. First, OR the left and right FG infos. if (NewChange || MapsChanged || CurrOpTransfersSign) { OpFG.SignMiscInfo |= LeftFG.SignMiscInfo; OpFG.SizeInfo |= LeftFG.SizeInfo; OpFG.SignMiscInfo |= RightFG.SignMiscInfo; OpFG.SizeInfo |= RightFG.SizeInfo; } // An operator could override the width or signedness info of its operands. if (CurrOp == SMP_ADDRESS_OF) { // Result is 32-bit data pointer. OpFG.SizeInfo &= (~FG_MASK_BITWIDTH_FIELDS); // clear all width bits OpFG.SizeInfo |= (FG_MASK_BITWIDTH_32 | FG_MASK_DATAPOINTER); OpFG.SignMiscInfo &= (~FG_MASK_SIGNED); OpFG.SignMiscInfo |= FG_MASK_UNSIGNED; } return MapsChanged; } // end of SMPInstr::InferOperatorFGInfo() // infer width on first pass, signedness on all passes bool SMPInstr::InferFGInfo(unsigned short IterCount) { bool MapsChanged = false; // Changes to maps of name/SSA to FG info? struct FineGrainedInfo OpFG; SMPitype DFType = this->GetDataFlowType(); assert(0 < IterCount); // start IterCount at 1, not 0. if (DFType != DEFAULT) { // We have a control flow instruction, e.g. call, return, branch, jump // No data operands unless these instructions are indirect through a register, // and the indirect operand is a memory operand in that case, e.g. [eax]. return MapsChanged; } OpFG.SignMiscInfo = 0; OpFG.SizeInfo = 0; for (size_t index = 0; index < this->RTL.GetCount(); ++index) { SMPRegTransfer *CurrRT = this->RTL.GetRT(index); if (SMP_NULL_OPERATOR == CurrRT->GetOperator()) // nothing to infer continue; MapsChanged |= this->InferOperatorFGInfo(CurrRT, (1 == IterCount), OpFG); if (SMP_CALL == CurrRT->GetOperator()) // no LeftOp DEF continue; } // end for all RTs in the RTL return MapsChanged; } // end of SMPInstr::InferFGInfo() // Get the meet of the metadata types of all non-flags DEFs. SMPMetadataType SMPInstr::GetDefMetadataType(void) { SMPMetadataType MeetType = DEF_METADATA_UNANALYZED; set<DefOrUse, LessDefUse>::iterator CurrDef; for (CurrDef = this->GetFirstDef(); CurrDef != this->GetLastDef(); ++CurrDef) { SMPMetadataType CurrType; op_t DefOp = CurrDef->GetOp(); if (DefOp.is_reg(X86_FLAGS_REG)) continue; // flags are always unused metadata; irrelevant CurrType = CurrDef->GetMetadataStatus(); if (MeetType == CurrType) continue; // no meet operation to perform // Any time we find USED metadata, that overrides all other types. if (CurrType == DEF_METADATA_USED) return CurrType; if (MeetType == DEF_METADATA_UNANALYZED) MeetType = CurrType; else if (MeetType < DEF_METADATA_REDUNDANT) { // Conflict between types of different DEFs. It could be that // a multiply or divide instruction DEFs EAX and EDX, and one // of them is used in a store and the other is unused. In that // case, the final MeetType is USED and we can return. Or, if // one type is UNUSED and the other is REDUNDANT, we can set // the final type to the REDUNDANT type and return. The USED case // is handled above, so we must have the UNUSED vs. REDUNDANT case. assert(CurrType >= DEF_METADATA_REDUNDANT); MeetType = CurrType; } else { // MeetType REDUNDANT, not equal to CurrType. if (CurrType >= DEF_METADATA_REDUNDANT) { // One type is profile derived, both are REDUNDANT. MeetType = DEF_METADATA_PROF_REDUNDANT; } else { assert(DEF_METADATA_UNUSED == CurrType); // leave MeetType as REDUNDANT } } } // end for all DEFs return MeetType; } // end of SMPInstr::GetDefMetadataType() // Handle x86 opcode SIB byte annotations. void SMPInstr::MDAnnotateSIBStackConstants(FILE *AnnotFile, op_t Opnd, ea_t offset, bool UseFP) { int BaseReg; int IndexReg; ea_t displacement; ushort ScaleFactor; MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, displacement); if (BaseReg == R_sp) { // ESP cannot be IndexReg // ESP-relative constant offset qfprintf(AnnotFile, "%10x %6d PTRIMMEDESP STACK %d displ %s\n", this->SMPcmd.ea, this->SMPcmd.size, offset, this->disasm); } else if (UseFP && ((IndexReg == R_bp) || (BaseReg == R_bp))) { // EBP-relative constant offset qfprintf(AnnotFile, "%10x %6d PTRIMMEDEBP STACK %d displ %s\n", this->SMPcmd.ea, this->SMPcmd.size, offset, this->disasm); } return; } // end of MDAnnotateSIBStackConstants // Emit annotations for constants used as ptr offsets from EBP or // ESP into the stack frame. Only pay attention to EBP-relative // offsets if EBP is being used as a frame pointer (UseFP == true). void SMPInstr::AnnotateStackConstants(bool UseFP, FILE *AnnotFile) { op_t Opnd; ea_t offset; int BaseReg; int IndexReg; ushort ScaleFactor; #if 0 if (this->address == 0x80925f4) { msg("PROBLEM INSTRUCTION: \n"); this->PrintOperands(); } #endif for (int i = 0; i < UA_MAXOP; ++i) { Opnd = this->SMPcmd.Operands[i]; if ((Opnd.type == o_displ) || (Opnd.type == o_phrase)) MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, offset); if (Opnd.type == o_displ) { if (Opnd.hasSIB) { MDAnnotateSIBStackConstants(AnnotFile, Opnd, offset, UseFP); } else { // no SIB if (BaseReg == R_sp) { // ESP-relative constant offset qfprintf(AnnotFile, "%10x %6d PTRIMMEDESP STACK %d displ %s\n", this->SMPcmd.ea, this->SMPcmd.size, offset, disasm); } else if (UseFP && (BaseReg == R_bp)) { // EBP-relative constant offset qfprintf(AnnotFile, "%10x %6d PTRIMMEDEBP STACK %d displ %s\n", this->SMPcmd.ea, this->SMPcmd.size, offset, disasm); } } // end if (Opnd.hasSIB) ... else ... } // end if (Opnd.type == o_displ) else if (Opnd.type == o_phrase) { offset = 0; // mmStrata thinks [esp] is [esp+0] if (Opnd.hasSIB) { MDAnnotateSIBStackConstants(AnnotFile, Opnd, offset, UseFP); } else { // Something like [ecx]; is it [esp] or [ebp] ? if (BaseReg == R_sp) { // ESP-relative constant offset qfprintf(AnnotFile, "%10x %6d PTRIMMEDESP STACK %d displ %s\n", this->SMPcmd.ea, this->SMPcmd.size, offset, disasm); } else if (UseFP && (BaseReg == R_bp)) { // EBP-relative constant offset qfprintf(AnnotFile, "%10x %6d PTRIMMEDEBP STACK %d displ %s\n", this->SMPcmd.ea, this->SMPcmd.size, offset, disasm); } } // end if (Opnd.hasSIB) ... else ... } // end else if (Opnd.type == o_phrase) } // end for all operands // If we move a stack pointer or frame pointer into another register, we // need to annotate the implicit zero offset, e.g. mov edi,esp == mov edi,esp+0 // and edi is becoming a stack pointer that mmStrata needs to track. if (this->MDIsStackPointerCopy(UseFP)) { // Two possibilities: a move of the stack pointer, or an "lea" // opcode, e.g. lea eax,[eap+8] ==> eax:=esp+8. In the move // instruction (e.g. mov eax,esp), we have the implicit zero // offset from the stack pointer register, but in the lea case, // we might have zero or some other offset (lea eax,[esp] has // the implicit zero). int ESPoffset = 0; if (NN_lea == this->SMPcmd.itype) { ESPoffset = this->MDGetImmedUse(); } // NOTE: Looks like this next line should be "else" because an lea instruction // looks like it has a memory operand, hence it has already been handled above. // We are getting duplicate annotations for lea instructions. else { if (UseFP && this->GetFirstUse()->GetOp().is_reg(R_bp)) { qfprintf(AnnotFile, "%10x %6d PTRIMMEDEBP STACK %d displ %s\n", this->SMPcmd.ea, this->SMPcmd.size, ESPoffset, disasm); } else { qfprintf(AnnotFile, "%10x %6d PTRIMMEDESP STACK %d displ %s\n", this->SMPcmd.ea, this->SMPcmd.size, ESPoffset, disasm); } } } return; } // end of SMPInstr::AnnotateStackConstants() // Emit all annotations for the instruction in the absence of RTL type inference. void SMPInstr::EmitAnnotations(bool UseFP, bool AllocSeen, bool NeedsFrame, FILE *AnnotFile) { ea_t addr = this->address; flags_t InstrFlags = getFlags(addr); bool MemDest = this->HasDestMemoryOperand(); bool MemSrc = this->HasSourceMemoryOperand(); bool SecondSrcOperandImmNum = this->IsSecondSrcOperandNumeric(InstrFlags); // assumes 2nd source is Imm or not-numeric?! bool NoWarnFlag = false; // NOWARN annotation emitted? #if SMP_CHILDACCESS_ALL_CODE bool OrphanCode = (NULL == this->BasicBlock); ProfilerInformation *ProfInfo = NULL; if (!OrphanCode) ProfInfo = this->BasicBlock->GetFunc()->GetProg()->GetProfInfo(); #endif ++OptCount[OptType]; // keep count for debugging info #if SMP_DEBUG_MEM if (MemDest || MemSrc) { msg("OptType: %d %s", OptType, disasm); this->PrintOperands(); } #endif // Emit appropriate optimization annotations. bool SDTInstrumentation = false; switch (OptType) { case 0: // SDT will have to handle these { #if SMP_DEBUG_TYPE0 msg("OptType 0: %x %s\n", addr, disasm); #endif // mmStrata wants to suppress warnings on the PUSH // instructions that precede the LocalVarsAllocInstr // (i.e. the PUSHes of callee-saved regs). if ((!AllocSeen || !NeedsFrame) && this->MDIsPushInstr()) { qfprintf(AnnotFile, "%10x %6d INSTR LOCAL NoWarn %s \n", addr, -3, disasm); NoWarnFlag = true; } else { SDTInstrumentation = true; } break; } case 1: // nothing for SDT to do { qfprintf(AnnotFile, "%10x %6d INSTR LOCAL %s %s \n", addr, -1, OptExplanation[OptType], disasm); ++AnnotationCount[OptType]; break; } case 4: // INC, DEC, etc.: no SDT work unless MemDest { if (MemDest || MemSrc) { SDTInstrumentation = true; break; // treat as category 0 } qfprintf(AnnotFile, "%10x %6d INSTR LOCAL Always1stSrc %s \n", addr, -1, disasm); ++AnnotationCount[OptType]; break; } case 5: // ADD, etc.: If numeric 2nd src operand, no SDT work. { if (MemDest || MemSrc) { SDTInstrumentation = true; break; // treat as category 0 } if (SecondSrcOperandImmNum && !this->MDIsFrameAllocInstr() #if SPECIAL_CASE_CARRY_BORROW && (this->SMPcmd.itype != NN_adc) && (this->SMPcmd.itype != NN_sbb) #endif ) { // treat as category 1 qfprintf(AnnotFile, "%10x %6d INSTR LOCAL %s %s \n", addr, -1, OptExplanation[OptType], disasm); ++AnnotationCount[OptType]; } else { SDTInstrumentation = true; } break; } case 6: // Only OS code should include these; problem for SDT { if (MemDest) { SDTInstrumentation = true; break; // treat as category 0 } qfprintf(AnnotFile, "%10x %6d INSTR LOCAL AlwaysPTR %s \n", addr, -OptType, disasm); ++AnnotationCount[OptType]; break; } case 8: // Implicitly writes to EDX:EAX, always numeric. { qfprintf(AnnotFile, "%10x %6d INSTR LOCAL n EDX EAX ZZ %s %s \n", addr, -2, OptExplanation[OptType], disasm); ++AnnotationCount[OptType]; SDTInstrumentation = true; break; } case 9: // Either writes to FP reg (cat. 1) or memory (cat. 0) { if (MemDest) { #if SMP_DEBUG2 // MemDest seems to happen too much. msg("Floating point MemDest: %s \n", disasm); #endif SDTInstrumentation = true; break; // treat as category 0 } qfprintf(AnnotFile, "%10x %6d INSTR LOCAL %s %s \n", addr, -1, OptExplanation[OptType], disasm); ++AnnotationCount[OptType]; break; } case 10: // Implicitly writes to EDX:EAX and ECX, always numeric. { qfprintf(AnnotFile, "%10x %6d INSTR LOCAL n EDX EAX ECX ZZ %s %s \n", addr, -2, OptExplanation[OptType], disasm); ++AnnotationCount[OptType]; SDTInstrumentation = true; break; } default: // 2,3,7: Optimization possibilities depend on operands { #if SMP_DEBUG2 if (OptType == 3) { // MOV instr class if (MemDest) { msg("MemDest on MOV: %s\n", disasm); } else if (!SecondSrcOperandNum) { msg("MOV: not 2nd op numeric: %s\n", disasm); this->PrintOperands(); } } #endif SDTInstrumentation = true; if (MemDest) { #if SMP_DEBUG_XOR if (OptType == 2) msg("MemDest on OptType 2: %s\n", disasm); #endif break; // treat as category 0 } if ((OptType == 2) || (OptType == 7) || SecondSrcOperandImmNum) { qfprintf(AnnotFile, "%10x %6d INSTR LOCAL n %s %s %s \n", addr, -2, this->DestString(OptType), OptExplanation[OptType], disasm); ++AnnotationCount[OptType]; } break; } } // end switch (OptType) // always emit stack constant annotations, in case strata is // instrumenting all instructions, or trying to verify speculative annotations. this->AnnotateStackConstants(UseFP, AnnotFile); // If mmStrata is going to have to deal with the // instruction, then we can annotate EBP and ESP // relative constant offsets. If we have emitted // an annotation of type -1, there is no point // in telling mmStrata about these constants. // Likewise, we can tell mmStrata if a MemDest is an // non-directly-accessed child object. if (SDTInstrumentation || NoWarnFlag) { if (strlen(this->DeadRegsString) > 0) { // Optimize by informing mmStrata of dead registers. It can avoid saving // and restoring dead state. This is particularly important for EFLAGS, // as restoring the flags is a pipeline serializing instruction. qfprintf(AnnotFile, "%10x %6d INSTR DEADREGS %s ZZ %s \n", addr, this->SMPcmd.size, this->DeadRegsString, disasm); } #if SMP_CHILDACCESS_ALL_CODE int ChildOffset, ChildSize; if (MemDest && !OrphanCode && ProfInfo->GetMemoryAccessInfo()->ComputeNonDirectAccessRegion(addr, ChildOffset, ChildSize)) { qfprintf(AnnotFile, "%10x %6d INSTR CHILDACCESS %d %d ZZ %s \n", addr, this->SMPcmd.size, ChildOffset, ChildSize, disasm); } #endif } return; } // end of SMPInstr::EmitAnnotations() /** * Emits Safe Returns * Mark the type of the annotation as "-4". Currently the SDT is ignoring this * annotation. */ void SMPInstr::EmitSafeReturn(FILE *AnnotFile) { qfprintf(AnnotFile, "%10x %6d INSTR LOCAL SafeReturn %s\n", this->address, -4, disasm); } // Emit all annotations for the instruction using RTL type inference. void SMPInstr::EmitTypeAnnotations(bool UseFP, bool AllocSeen, bool NeedsFrame, FILE *AnnotFile) { ea_t addr = this->address; flags_t InstrFlags = getFlags(addr); int TypeGroup = SMPTypeCategory[this->SMPcmd.itype]; bool NumericDEFs = this->AllDefsNumeric(); // all DEFs are NUMERIC or CODEPTR bool ProfiledDEFs = this->AnyDefsProfiled(); // Some DEFs come from the profiler bool UnusedMetadata = this->AllDefMetadataUnused(); bool MemDest = this->HasDestMemoryOperand(); bool MemSrc = this->HasSourceMemoryOperand(); bool SecondSrcOperandImmNum = this->IsSecondSrcOperandNumeric(InstrFlags); // assumes 2nd source is imm or not-numeric?? bool NoWarnFlag = false; // NOWARN annotation emitted? bool CarryBorrow = ((this->SMPcmd.itype == NN_adc) || (this->SMPcmd.itype == NN_sbb)); // Do we have the special case in which a non-NUMERIC comes into // an add with carry or subtract with borrow and the result // has been inferred to be NUMERIC? bool TypeChange = CarryBorrow && (!IsNumeric(this->AddSubUseType)) && NumericDEFs; SMPMetadataType DefMetadataType = this->GetDefMetadataType(); ProfilerInformation *ProfInfo; ProfInfo = this->BasicBlock->GetFunc()->GetProg()->GetProfInfo(); ++OptCount[this->OptType]; // keep count for debugging info if (this->IsNop()) TypeGroup = 1; // no-op idioms need their category reset // Emit appropriate optimization annotations. bool SDTInstrumentation = false; // If the DEF metadata is all unused, mmStrata can skip the instruction. // We omit this for groups 1 and 14, so that the metadata analysis // does not get statistical credit for instructions that were already // getting -1 annotations without analysis. // We also cannot skip NN_adc and NN_sbb instructions that change the // type of the incoming register. if ((1 != TypeGroup) && (14 != TypeGroup) && (!this->MDIsInterruptCall()) && !TypeChange) { if (UnusedMetadata) { qfprintf(AnnotFile, "%10x %6d INSTR LOCAL MetadataUnused %s \n", addr, -1, disasm); ++AnnotationCount[this->OptType]; return; } else if (DEF_METADATA_REDUNDANT == DefMetadataType) { qfprintf(AnnotFile, "%10x %6d INSTR LOCAL MetadataRedundant %s \n", addr, -1, disasm); ++AnnotationCount[this->OptType]; return; } else if (DEF_METADATA_PROF_REDUNDANT == DefMetadataType) { qfprintf(AnnotFile, "%10x %6d INSTR LOCAL MetadataRedundant %s \n", addr, -257, disasm); ++AnnotationCount[this->OptType]; // Profiler annotations could be backed off due to false // positives, in which case we will need stack constant // annotations. this->AnnotateStackConstants(UseFP, AnnotFile); return; } } switch (TypeGroup) { case 0: // SDT will have to handle these case 11: // PUSH/POP **!!** What if we push/pop NUMERIC type? Optimize? // --jdh // pop numeric's can be optimized with a numericdef annotation. // numeric push's can't immediately be optimized, but if the stack location // can be proven as dead metadata, then perhaps optimize. // --jdh // mmStrata wants to suppress warnings on the PUSH // instructions that precede the LocalVarsAllocInstr // (i.e. the PUSHes of callee-saved regs). if ((!AllocSeen || !NeedsFrame) && this->MDIsPushInstr()) { qfprintf(AnnotFile, "%10x %6d INSTR LOCAL NoWarn %s \n", addr, -3, disasm); NoWarnFlag = true; } else if (this->MDIsPopInstr() && NumericDEFs) { qfprintf(AnnotFile, "%10x %6d INSTR LOCAL n %s NumericDEFs %s \n", addr, ProfiledDEFs ? -256-2 : -2, this->DestString(this->OptType), disasm); ++AnnotationCount[this->OptType]; } else { SDTInstrumentation = true; } break; case 1: // nothing for SDT to do case 14: if (MemDest) { msg("ERROR: MemDest in Type Category 1 or 14: %x %s\n", addr, disasm); SDTInstrumentation = true; break; } qfprintf(AnnotFile, "%10x %6d INSTR LOCAL %s %s \n", addr, -1, OptExplanation[this->OptType], disasm); ++AnnotationCount[this->OptType]; break; case 4: // INC, DEC, etc.: no SDT work unless MemDest if (MemDest || MemSrc) { // pretty conservative here? // could be more aggressive if we know there's no overflow. -- jdh SDTInstrumentation = true; break; // treat as category 0 } qfprintf(AnnotFile, "%10x %6d INSTR LOCAL Always1stSrc %s \n", addr, -1, disasm); ++AnnotationCount[this->OptType]; break; case 5: // ADD, etc.: If numeric 2nd src operand, no SDT work. #if 1 if (MemDest) { SDTInstrumentation = true; break; // treat as category 0 } #endif this->SetAddSubSourceType(); if (SecondSrcOperandImmNum && !this->MDIsFrameAllocInstr() && !TypeChange #if SPECIAL_CASE_CARRY_BORROW && (!CarryBorrow) #endif ) { // treat as category 1 qfprintf(AnnotFile, "%10x %6d INSTR LOCAL %s %s \n", addr, -1, OptExplanation[this->OptType], disasm); ++AnnotationCount[this->OptType]; } else if (IsEqType(NUMERIC, this->AddSubSourceType) && !this->MDIsFrameAllocInstr() && !TypeChange #if SPECIAL_CASE_CARRY_BORROW && (!CarryBorrow) #endif ) { qfprintf(AnnotFile, "%10x %6d INSTR LOCAL 2ndSrcNumeric %s \n", addr, -1, disasm); ++AnnotationCount[this->OptType]; } else if (NumericDEFs) { qfprintf(AnnotFile, "%10x %6d INSTR LOCAL n %s NumericDEFs %s \n", addr, ProfiledDEFs ? -256-2 : -2, this->DestString(this->OptType), disasm); ++AnnotationCount[this->OptType]; } #if SMP_OPTIMIZE_ADD_TO_NUMERIC else if ((NN_add == this->SMPcmd.itype) && (!MemSrc) && IsNumeric(this->AddSubUseType)) { // reg1 := reg1 + reg2, where reg1 comes in as NUMERIC, // means that reg1 will get DEFed to the type of reg2, // whatever it is. If reg2 were known to be NUMERIC, // we would have hit one of the annotation cases above. qfprintf(AnnotFile, "%10x %6d INSTR LOCAL %s := %s ZZ AddToNumeric %s \n", addr, -5, RegNames[this->AddSubUseOp.reg], RegNames[this->AddSubSourceOp.reg], disasm); ++AnnotationCount[this->OptType]; } #endif else { SDTInstrumentation = true; } break; case 6: // Only OS code should include these; problem for SDT if (MemDest) { SDTInstrumentation = true; break; // treat as category 0 } qfprintf(AnnotFile, "%10x %6d INSTR LOCAL AlwaysPTR %s \n", addr, -OptType, disasm); ++AnnotationCount[this->OptType]; break; case 8: // Implicitly writes to EDX:EAX, always numeric. if (this->OptType == 10) { // writes to ECX also qfprintf(AnnotFile, "%10x %6d INSTR LOCAL n EDX EAX ECX ZZ %s %s \n", addr, -2, OptExplanation[this->OptType], disasm); } else { qfprintf(AnnotFile, "%10x %6d INSTR LOCAL n EDX EAX ZZ %s %s \n", addr, -2, OptExplanation[this->OptType], disasm); } ++AnnotationCount[this->OptType]; SDTInstrumentation = true; break; case 9: // Either writes to FP reg (cat. 1) or memory (cat. 0) if (MemDest) { SDTInstrumentation = true; #if 0 if (NumericDEFs) { qfprintf(AnnotFile, "%10x %6d INSTR LOCAL n %s NumericDEFs %s \n", addr, ProfiledDEFs ? -256-2 : -2, this->DestString(this->OptType), disasm); ++AnnotationCount[this->OptType]; } #endif } else { qfprintf(AnnotFile, "%10x %6d INSTR LOCAL %s %s \n", addr, -1, OptExplanation[this->OptType], disasm); ++AnnotationCount[this->OptType]; } break; case 10: // AND, OR, etc.: If all DEFs have been inferred to be // NUMERIC, then output optimizing annotation. SDTInstrumentation = true; if (MemDest) { // **!!** optimize with numeric annotation in future break; // treat as category 0 } else if (NumericDEFs) { // NUMERIC result because of NUMERIC sources qfprintf(AnnotFile, "%10x %6d INSTR LOCAL n %s NumericDEFs %s \n", addr, ProfiledDEFs ? -256-2 : -2, this->DestString(this->OptType), disasm); ++AnnotationCount[this->OptType]; } break; case 12: // Exchange, exchange and add, conditional exchange: All NUMERIC // sources ==> NUMERIC DEFs, so nothing for mmStrata to do. if (MemDest) { // **!!** optimize with numeric annotation in future SDTInstrumentation = true; break; // treat as category 0 } else if (NumericDEFs) { // NUMERIC result because of NUMERIC sources qfprintf(AnnotFile, "%10x %6d INSTR LOCAL %s %s \n", addr, ProfiledDEFs ? -256-1 : -1, OptExplanation[TypeGroup], disasm); ++AnnotationCount[this->OptType]; } else SDTInstrumentation = true; break; case 13: case 15: // Floating point, NUMERIC, possible memory destination. // If not memory destination, fpreg dest, so nothing for mmStrata to do. if (MemDest) { // **!!** optimize with numeric annotation in future SDTInstrumentation = true; break; // treat as category 0 } else { // NUMERIC floating register result; these regs are always NUMERIC qfprintf(AnnotFile, "%10x %6d INSTR LOCAL %s %s \n", addr, -1, OptExplanation[TypeGroup], disasm); ++AnnotationCount[this->OptType]; } break; default: // 2,3,7: Optimization possibilities depend on operands SDTInstrumentation = true; if (MemDest) { break; // treat as category 0 } if ((OptType == 2) || (OptType == 7) || SecondSrcOperandImmNum) { qfprintf(AnnotFile, "%10x %6d INSTR LOCAL n %s %s %s \n", addr, -2, this->DestString(this->OptType), OptExplanation[this->OptType], disasm); ++AnnotationCount[this->OptType]; } else if (NumericDEFs) { // NUMERIC move instruction qfprintf(AnnotFile, "%10x %6d INSTR LOCAL n %s NumericDEFs %s \n", addr, ProfiledDEFs ? -256-2 : -2, this->DestString(this->OptType), disasm); ++AnnotationCount[this->OptType]; } break; } // end switch (OptType) // always annotate stack constants for the profiler, etc. this->AnnotateStackConstants(UseFP, AnnotFile); // If mmStrata is going to have to deal with the // instruction, then we can annotate EBP and ESP // relative constant offsets. If we have emitted // an annotation of type -1, there is no point // in telling mmStrata about these constants. // Likewise, we can tell mmStrata if a MemDest is an // non-directly-accessed child object. int ChildOffset, ChildSize; if (SDTInstrumentation || NoWarnFlag) { if (strlen(this->DeadRegsString) > 0) { // Optimize by informing mmStrata of dead registers. It can avoid saving // and restoring dead state. This is particularly important for EFLAGS, // as restoring the flags is a pipeline serializing instruction. qfprintf(AnnotFile, "%10x %6d INSTR DEADREGS %s ZZ %s \n", addr, this->SMPcmd.size, this->DeadRegsString, disasm); } if (MemDest && ProfInfo->GetMemoryAccessInfo()->ComputeNonDirectAccessRegion(addr, ChildOffset, ChildSize)) { qfprintf(AnnotFile, "%10x %6d INSTR CHILDACCESS %d %d ZZ %s \n", addr, this->SMPcmd.size, ChildOffset, ChildSize, disasm); } #if SMP_IDENTIFY_POINTER_ADDRESS_REG if (MemDest) { assert(o_void != this->DestMemOp.type); set<DefOrUse, LessDefUse>::iterator PtrUse; PtrUse = this->GetPointerAddressReg(this->DestMemOp); if (PtrUse != this->GetLastUse()) { // found POINTER addr reg USE if (PtrUse->GetOp().type == o_reg) { ushort PtrReg = PtrUse->GetOp().reg; qfprintf(AnnotFile, "%10x %6d INSTR POINTER reg %s ZZ %s \n", addr, this->SMPcmd.size, RegNames[PtrReg], disasm); } } } #endif } return; } // end of SMPInstr::EmitTypeAnnotations() // emit check annotations for signedness, overflow, truncation, etc. void SMPInstr::EmitIntegerErrorAnnotations(FILE *InfoAnnotFile) { set<DefOrUse, LessDefUse>::iterator UseIter, DefIter; op_t UseOp, DefOp; unsigned short UseWidthInfo, DefWidthInfo, SourceDefWidthInfo; unsigned short UseSignInfo, DefSignInfo, SourceDefSignInfo; unsigned short UseSignMask, DefSignMask, SourceDefSignMask; struct FineGrainedInfo UseFGInfo, DefFGInfo, SourceDefFGInfo; size_t UseBitWidth, DefBitWidth, UseMaxBitWidth, SourceDefBitWidth; ea_t DefAddr; int UseHashValue, DefHashValue; bool OverflowOpcode = this->MDIsOverflowingOpcode(); bool UnderflowOpcode = this->MDIsUnderflowingOpcode(); bool CheckForOverflow; bool UseIsSigned, DefIsSigned, UseIsUnsigned, DefIsUnsigned, SourceDefIsSigned, SourceDefIsUnsigned; bool UseSignMixed, SourceDefSignMixed; // inconsistent signedness bool UseFP = this->BasicBlock->GetFunc()->UsesFramePointer(); bool SignednessCheckEmitted = false; // Case 1: Overflow on addition. // Case 2: Underflow on subtraction. if (OverflowOpcode || UnderflowOpcode) { // If the flags register DEF is dead, we need a CHECK OVERFLOW/UNDERFLOW annotation. DefOp = InitOp; DefOp.type = o_reg; DefOp.reg = MD_FLAGS_REG; DefIter = this->FindDef(DefOp); assert(DefIter != this->GetLastDef()); if (this->BasicBlock->IsDefDead(this->address, DefOp)) { DefIter = this->GetFirstNonFlagsDef(); assert(DefIter != this->GetLastDef()); DefOp = DefIter->GetOp(); // Don't worry about stack space allocation instructions. The // program will crash long before the stack pointer underflows // below zero. if (!((o_reg == DefOp.type) && DefOp.is_reg(MD_STACK_POINTER_REG))) { DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum()); if (o_reg == DefOp.type) { if (this->BasicBlock->IsLocalName(DefOp)) { // Local name, find in basic block maps. DefFGInfo = this->BasicBlock->GetDefFGInfo(DefHashValue); } else { // Global name, find in global maps. DefFGInfo = this->BasicBlock->GetFunc()->GetDefFGInfo(DefHashValue); } } else if (MDIsStackAccessOpnd(DefOp, UseFP)) { bool success = this->BasicBlock->GetFunc()->MDGetFGStackLocInfo(this->address, DefOp, DefFGInfo); assert(success); } else { // non-stack memory address; we know nothing about it. DefFGInfo.SignMiscInfo = 0; DefFGInfo.SizeInfo = 0; } DefSignInfo = DefFGInfo.SignMiscInfo; DefSignMask = DefSignInfo & FG_MASK_SIGNEDNESS_BITS; DefWidthInfo = DefFGInfo.SizeInfo; DefBitWidth = LargestBitWidthFromMask(DefWidthInfo); if (0 == DefBitWidth) { // Could happen for non-stack memory operands, for example. DefBitWidth = MD_NORMAL_MACHINE_BITWIDTH; } if (OverflowOpcode) { qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK OVERFLOW %s %d ", this->address, this->SMPcmd.size, SignednessStrings[DefSignMask], DefBitWidth, MDGetRegName(DefOp), disasm); } else { // must be UnderflowOpcode qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK UNDERFLOW %s %d ", this->address, this->SMPcmd.size, SignednessStrings[DefSignMask], DefBitWidth, MDGetRegName(DefOp), disasm); } AnnotPrintOperand(DefOp, InfoAnnotFile); qfprintf(InfoAnnotFile, " ZZ %s \n", this->disasm); } } // end if flags reg is dead } // end cases 1-2 // Case 3: Overflow on multiplication with upper bits discarded. if (this->MDIsMultiply()) { // There are four overflow sub-cases for x86: (A) the multiplication result // can go into EDX:EAX for 32x32=>64 bit multiplication; (B) the result // can go into DX:AX for 16x16=>32 bit; (C) the result can be in AX // for 8x8=>16 bit; (D) see below. The latter case (C) will be detected most easily // as a truncation in a later instruction, i.e. if only AL gets stored // later, then we check the AH bits at that time for a truncation // error. Because our SSA numbering lumps AL, AH, AX, and EAX into // a single canonicalized register, we would have a hard time using // SSA-based def-use chains to determine if AH is dead. // For the other two sub-cases, the question is whether EDX becomes dead // starting with the DEF of EDX in the multiply instruction. // Case (D) is where the multiply instruction discards the upper bits // of the multiply. // Sub-cases A&B are detected by checking if EDX is dead, and if so, then // emitting an annotation to check for the overflow flag. The x86 sets // overflow and carry flags on multiplication instructions based on whether // the result carries out of the lower half of the result to the upper half. // Sub-case D is also detected using flags, but we don't need to check whether EDX // is dead. We just need to detect that EDX is not in the DEF set in the // first place. We have a private member flag for that case. CheckForOverflow = false; if (this->MultiplicationBitsDiscarded) { // Sub-case D CheckForOverflow = true; assert(this->RTL.GetCount() > 0); DefOp = this->RTL.GetRT(0)->GetLeftOperand(); DefIter = this->FindDef(DefOp); assert(DefIter != this->GetLastDef()); DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum()); } else { // If the instruction were EDX:=EDX*foo, then it would be // the multiplication bits discarded case and would not // reach this else clause. Therefore, if we find EDX in // the DEF set, it is holding upper result bits of the // multiplication and we have the potential for sub-cases A&B // but not sub-case C. So, we check to see if the DEF of EDX // is dead. DefOp = InitOp; DefOp.type = o_reg; DefOp.reg = R_dx; DefIter = this->FindDef(DefOp); if (DefIter != this->GetLastDef()) { // We found DEF of EDX, so it is not AX:=AL*op8 sub-case C. // Now, is DEF of EDX dead (i.e. no uses?) CheckForOverflow = this->BasicBlock->IsDefDead(this->address, DefOp); if (CheckForOverflow) { DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum()); } } } // end if sub-case D else if sub-case A or B if (CheckForOverflow) { // need an annotation if (this->BasicBlock->IsLocalName(DefOp)) { // Local name, find in basic block maps. DefFGInfo = this->BasicBlock->GetDefFGInfo(DefHashValue); } else { // Global name, find in global maps. DefFGInfo = this->BasicBlock->GetFunc()->GetDefFGInfo(DefHashValue); } DefWidthInfo = DefFGInfo.SizeInfo; DefBitWidth = LargestBitWidthFromMask(DefWidthInfo); if (this->MDIsUnsignedArithmetic()) { qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK OVERFLOW UNSIGNED %d %s ZZ %s \n", this->address, this->SMPcmd.size, DefBitWidth, RegNames[DefOp.reg], disasm); } else { qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK OVERFLOW SIGNED %d %s ZZ %s \n", this->address, this->SMPcmd.size, DefBitWidth, RegNames[DefOp.reg], disasm); } } } // end of case 3 // Case 4: Signedness error on move. // Case 5: Truncation error on move. UseOp = this->GetMoveSource(); if ((3 == this->OptType) && (o_reg == UseOp.type)) { // Possibilities for a move: reg to reg, mem to reg, or reg to mem. If we load // from memory into a register, we cannot track signedness in memory unless it // is a stack location. In that case, we record the signedness in the stack // map and transfer it to the reg DEF in SMPInstr.MDSetWidthSignInfo(). That // determines the signedness of the reg DEF and it cannot be in conflict with // the stack memory USE. The load from stack to reg also determines width // of the stack operand and we cannot have a truncation. So, we can restrict // our analysis of cases 4-5 to register-source move instructions, as we // have done in the condition above. // // Similarly, we cannot detect a signedness conflict if the destination is a // memory location that is not known to be a particular stack offset location. // // So, we only concern ourselves with signedness errors // when the USE operand of the move is a register, and the destination is another // register or a stack location. // // We can have a truncation error and a signedness error on a single instruction, so // we group them into common code. For example, move the lower half of a 32-bit unsigned // into a 16-bit signed destination. Upper bits set to 1 and discarded would be a // truncation, and setting the sign bit of the 16-bit signed destination would be a // signedness error. // // NOTE: Signedness errors are different from overflow and truncation errors. We // can have incomplete knowledge about an instructions operands and still determine // that truncation occurred. For example, if we do not know whether register EAX // is signed or unsigned, we can still say that storing only AX is a truncation error // if the upper half of EAX is a mixture of one and zero bits. If EAX is unsigned, // we could be more specific and insist that the upper half be all zero bits; if EAX // is signed, we could insist that the upper half of EAX be the sign-extension of AX. // We can avoid false positives by only declaring a truncation error when the upper // half of EAX is not all zero bits or all one bits. This approach allows a few // potential false negatives. With signedness, if we don't know the signedness // of one of the operands, we can only avoid false positives by doing no checks at // all. UseIter = this->FindUse(UseOp); assert(UseIter != this->GetLastUse()); UseBitWidth = 8 * GetOpDataSize(UseOp); // Now, the question is: Are we storing fewer bits than // we were using in our computations in this DEF-USE chain? // E.g. if we computed using 32 bits and then only store 16, // we have potential truncation error. But if we computed // using 16 bits all along, we have already checked for 16-bit // overflows on arithmetic in the DU chain and there can be no // truncation on this store. op_t SearchOp = UseOp; // Canonicalize sub-regs for searching DEFs and USEs. SearchOp.reg = MDCanonicalizeSubReg(UseOp.reg); SearchOp.dtyp = dt_dword; UseHashValue = HashGlobalNameAndSSA(SearchOp, UseIter->GetSSANum()); if (this->BasicBlock->IsLocalName(SearchOp)) { // Local name, find in basic block maps. SourceDefFGInfo = this->BasicBlock->GetDefFGInfo(UseHashValue); UseFGInfo = this->BasicBlock->GetUseFGInfo(UseHashValue); } else { // Global name, find in global maps. SourceDefFGInfo = this->BasicBlock->GetFunc()->GetDefFGInfo(UseHashValue); UseFGInfo = this->BasicBlock->GetFunc()->GetUseFGInfo(UseHashValue); } SourceDefWidthInfo = SourceDefFGInfo.SizeInfo; UseWidthInfo = UseFGInfo.SizeInfo; SourceDefBitWidth = LargestBitWidthFromMask(SourceDefWidthInfo); UseMaxBitWidth = LargestBitWidthFromMask(UseWidthInfo); UseSignMask = (UseFGInfo.SignMiscInfo & FG_MASK_SIGNEDNESS_BITS); SourceDefSignInfo = SourceDefFGInfo.SignMiscInfo; SourceDefSignMask = (SourceDefSignInfo & FG_MASK_SIGNEDNESS_BITS); // Next four statements exclude the inconsistent sign case and the no sign info known case. UseIsSigned = (FG_MASK_SIGNED == UseSignMask); // exact, not bit-mask-AND UseIsUnsigned = (FG_MASK_UNSIGNED == UseSignMask); // exact, not bit-mask-AND SourceDefIsSigned = (FG_MASK_SIGNED == SourceDefSignMask); // exact, not bit-mask-AND SourceDefIsUnsigned = (FG_MASK_UNSIGNED == SourceDefSignMask); // exact, not bit-mask-AND UseSignMixed = (FG_MASK_INCONSISTENT_SIGN == UseSignMask); // exclude uninit sign case SourceDefSignMixed = (FG_MASK_INCONSISTENT_SIGN == SourceDefSignMask); // exclude uninit sign case // Not only the CHECK SIGNEDNESS annotations depend on the signedness of the // source and destination operands. The CHECK TRUNCATION annotations come // in SIGNED, UNSIGNED, and UNKNOWNSIGN variants, so we need to get the // signedness of the destination operand before we proceeed. DefOp = this->RTL.GetRT(0)->GetLeftOperand(); // RTL must be dest := rhs op_t DestSearchOp = DefOp; bool StackDestination; if (o_reg == DestSearchOp.type) { StackDestination = false; DestSearchOp.reg = MDCanonicalizeSubReg(DefOp.reg); DestSearchOp.dtyp = dt_dword; } else if (!(MDIsStackAccessOpnd(DefOp, UseFP))) { // If destination of move is not a register and is not // a stack location, we cannot track its signedness and width. return; } else { StackDestination = true; } DefIter = this->FindDef(DestSearchOp); if (StackDestination) { // Fetch FG info from stack map. bool success = this->GetBlock()->GetFunc()->MDGetFGStackLocInfo(this->address, DefOp, DefFGInfo); assert(success); } else { // Fetch FG info from register FG info maps. DefHashValue = HashGlobalNameAndSSA(DestSearchOp, DefIter->GetSSANum()); if (this->BasicBlock->IsLocalName(DestSearchOp)) { // Local name, find in basic block maps. DefFGInfo = this->BasicBlock->GetDefFGInfo(DefHashValue); } else { // Global name, find in global maps. DefFGInfo = this->BasicBlock->GetFunc()->GetDefFGInfo(DefHashValue); } } DefSignMask = (DefFGInfo.SignMiscInfo & FG_MASK_SIGNEDNESS_BITS); // Next two statements exclude the inconsistent sign case and the no sign info known case. DefIsSigned = (FG_MASK_SIGNED == DefSignMask); // exact, not bit-mask-AND DefIsUnsigned = (FG_MASK_UNSIGNED == DefSignMask); // exact, not bit-mask-AND // If we set the (source) DEF bit width to 0, it means we wanted to have the USEs determine // the width. This happens on sign-extended and zero-extended loads. If we zero-extend // a 16-bit value to 32 bits, then immediately store the lower 16 bits to a 16-bit location, // then the upper bits cannot have any overflow info yet. But if we do 32-bit arithmetic // on the zero-extended value, and then store the lower 16 bits, we need to check for // truncation. So, the key is whether the value ever got used as a 32-bit value. If it // did, check for truncation; if not, there is no need to check. if ((SourceDefBitWidth > UseBitWidth) || ((SourceDefBitWidth == 0) && (UseMaxBitWidth > UseBitWidth))) { // Original DEF (or subsequent USE) was wider than what we are storing now. unsigned short SourceDefReg = SearchOp.reg; unsigned short UseReg = UseOp.reg; if (SourceDefBitWidth == 0) { // Convert for printing annotation. SourceDefBitWidth = 8 * GetOpDataSize(SearchOp); } // OK, we need to check for possible truncation. But, how we check depends on the // signedness combinations of the source and destination operands of the move. // Each operand can be signed, unsigned, or of unknown sign (and we lump the // inconsistent sign case into the unknown sign case). So, we have a set of 3x3=9 // possible combinations of signedness. // Now we have the DefSignMask to compare to the UseSignMask. The nine possible // combinations, and the annotations we want to emit for each, are shown below. // S = SIGNED, U = UNSIGNED, and ? = unknown or inconsistent sign. // S => U indicates a SIGNED being stored into an UNSIGNED, for example. // Assume without loss of generality that register EAX is the source of // all the move instructions, and that only subword register AX is being stored. // We can perform all truncation and signedness checks on EAX just prior to // the move instruction, which is cheaper than performing checks on the // destination if the destination is in memory. // // U => U // U => S // S => U // U => ? // ? => U // // In these first five cases, EAX must be the zero-extension of AX else there is // a truncation error. In the three cases in which the source (EAX/AX) is UNSIGNED, // discarding upper bits that are not zero is obviously truncation. In the case // of S => U, if the upper bits of EAX are not all zeroes, then we either have // a large positive value of EAX that is being truncated, or EAX is negative and // the lower bits will be misinterpreted in the unsigned destination. Finally, // the ? => U case must be either U => U or S => U, and these two cases already // share the demand that EAX be the zero-extension of AX. So, these five cases // will receive the annotation: CHECK TRUNCATION UNSIGNED 32 EAX 16 AX which // means that EAX is tested against AX to see if it is the 32-bit zero-extension // of 16-bit reg AX. // In the U => S case, we can have a signedness error as well as truncation. Even // if the truncation check passes (all upper half bits of EAX are zero), the top // bit of AX might be 1, and this will be misinterpreted as a sign bit in the // destination. So, this case receives a second annotation: CHECK SIGNEDNESS SIGNED 16 AX. // In the two cases that involve signedness uncertainty, there are possible signedness // errors that we are not checking ar tun-time, because we do not have enough information // to perform the checks without generating many more false positives than true positives. // As a result, false negatives on signedness can occur. // // On to more of the 9 combinations: // // S => S // // In this case, EAX must be the sign-extension of AX. Because the destination is also // signed, nothing is lost if the sign-extension bits (all zeroes or all ones) are dropped. // We emit a CHECK TRUNCATION SIGNED 32 EAX 16 AX annotation to test EAX == sign-extended AX. // // S => ? // ? => S // ? => ? // // These final three cases all involve at least one operand of unknown signedness, and no // operands that are known to be unsigned. In each case, there are two possibilities: // either EAX must be the sign-extension of AX, or EAX must be the zero-extension of AX. // Because of the uncertainty that is represented by the question marks, we are not sure // which of these two cases we are dealing with. However, rather than just give up and // perform no run-time checks (to avoid false positives), we can still perform a run-time // check that will catch (perhaps most) true positives while causing no false positives. // We can insist that EAX must be EITHER the sign-extension or the zero-extension of AX. // To be neither type of extension of AX implies that some sort of truncation is happening. // So, we emit a CHECK TRUNCATION UNKNOWNSIGN 32 EAX 16 AX annotation, and the Strata // instrumentation will check for either EAX == sign-extended AX or EAX == zero-extended AX // being true. If neither is true, we raise a true positive alert. False negatives on // signedness errors are the result of the uncertainty, but all truncations are detected // for all nine cases. if (DefIsUnsigned || UseIsUnsigned) { // First five cases above: any UNSIGNED operand leads to CHECK TRUNCATION UNSIGNED annotation. qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK TRUNCATION UNSIGNED %d %s %d %s ZZ %s \n", this->address, this->SMPcmd.size, SourceDefBitWidth, MDGetRegName(SearchOp), UseBitWidth, MDGetRegName(UseOp), disasm); if (UseIsUnsigned && DefIsSigned) { qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS SIGNED %d %s ZZ %s \n", this->address, this->SMPcmd.size, UseBitWidth, MDGetRegName(UseOp), disasm); } } else if (DefIsSigned && UseIsSigned) { // S => S case above. Emit CHECK TRUNCATION SIGNED annotation. qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK TRUNCATION SIGNED %d %s %d %s ZZ %s \n", this->address, this->SMPcmd.size, SourceDefBitWidth, MDGetRegName(SearchOp), UseBitWidth, MDGetRegName(UseOp), disasm); } else { // S => ?, ? => S, ? => ? cases above: CHECK TRUNCATION UNKNOWNSIGN annotation. qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK TRUNCATION UNKNOWNSIGN %d %s %d %s ZZ %s \n", this->address, this->SMPcmd.size, SourceDefBitWidth, MDGetRegName(SearchOp), UseBitWidth, MDGetRegName(UseOp), disasm); } #if 1 // Now check for signedness conflicts between the UseOp USEs and its DEF. if (UseIsSigned && SourceDefIsUnsigned) { qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS SIGNED %d %s ZZ %s \n", this->address, this->SMPcmd.size, UseBitWidth, MDGetRegName(UseOp), disasm); } else if (UseIsUnsigned && SourceDefIsSigned) { // Currently same annotation, but might differ in the future for better forensics // and more precise diagnostic messages. qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS SIGNED %d %s ZZ %s \n", this->address, this->SMPcmd.size, UseBitWidth, MDGetRegName(UseOp), disasm); } else if ((!SourceDefSignMixed) && UseSignMixed) { // DEF has consistent and known signedness, USE is inconsistent. qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS SIGNED %d %s ZZ %s \n", this->address, this->SMPcmd.size, UseBitWidth, MDGetRegName(UseOp), disasm); } #endif } // end if truncation else { // still need to check for signedness errors even if no truncation if (UseIsSigned && DefIsUnsigned) { qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS SIGNED %d %s ZZ %s \n", this->address, this->SMPcmd.size, UseBitWidth, MDGetRegName(UseOp), disasm); } else if (UseIsUnsigned && DefIsSigned) { // Currently same annotation, but might differ in the future for better forensics // and more precise diagnostic messages. qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS SIGNED %d %s ZZ %s \n", this->address, this->SMPcmd.size, UseBitWidth, MDGetRegName(UseOp), disasm); } #if 1 else if (UseIsSigned && SourceDefIsUnsigned) { qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS SIGNED %d %s ZZ %s \n", this->address, this->SMPcmd.size, UseBitWidth, MDGetRegName(UseOp), disasm); } else if (UseIsUnsigned && SourceDefIsSigned) { // Currently same annotation, but might differ in the future for better forensics // and more precise diagnostic messages. qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS SIGNED %d %s ZZ %s \n", this->address, this->SMPcmd.size, UseBitWidth, MDGetRegName(UseOp), disasm); } else if ((!SourceDefSignMixed) && UseSignMixed) { // DEF has consistent and known signedness, USE is inconsistent. qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS SIGNED %d %s ZZ %s \n", this->address, this->SMPcmd.size, UseBitWidth, MDGetRegName(UseOp), disasm); } #endif } // end if truncation else check signedness } // end of cases 4-5, (3 == OptType) checking for TRUNCATION and SIGNEDNESS errors return; } // end of SMPInstr::EmitIntegerErrorAnnotations() // Go through the PUSH RTL and get the operand pushed. op_t SMPInstr::GetPushedOpnd(void) { op_t VoidOp = InitOp; if (NN_push == this->SMPcmd.itype) { for (size_t OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & UseMacros[OpNum]) { // USE return TempOp; } } msg("ERROR: Could not find PUSH operand at %x %s\n", this->address, this->GetDisasm()); return VoidOp; } else { return VoidOp; } } // end of SMPInstr::GetPushedOpnd() // Get the immediate value used in the instruction. Return zero // if no immediate was used. int SMPInstr::MDGetImmedUse(void) { int ImmedVal = 0; set<DefOrUse, LessDefUse>::iterator CurrUse; for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) { op_t UseOp = CurrUse->GetOp(); if (o_imm == UseOp.type) { ImmedVal = (int) UseOp.value; break; } } return ImmedVal; } // end of SMPInstr::MDGetImmedUse() // Build the RTL for an instruction with a unary opcode bool SMPInstr::BuildUnaryRTL(SMPoperator UnaryOp) { size_t OpNum; bool DestFound = false; SMPRegTransfer *TempRT = NULL; op_t VoidOp = InitOp; op_t FPRegOp = InitOp; FPRegOp.type = o_fpreg; // floating point register stack op_t FlagsOp = InitOp; FlagsOp.type = o_reg; FlagsOp.reg = X86_FLAGS_REG; // Handle special cases first if (SMP_UNARY_FLOATING_ARITHMETIC == UnaryOp) { // Use of the floating register stack top is implicit DestFound = true; TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(FPRegOp); TempRT->SetOperator(SMP_ASSIGN); SMPRegTransfer *RightRT = new SMPRegTransfer; RightRT->SetLeftOperand(FPRegOp); RightRT->SetOperator(UnaryOp); RightRT->SetRightOperand(VoidOp); TempRT->SetRightTree(RightRT); this->RTL.push_back(TempRT); } else if ((NN_clc == this->SMPcmd.itype) || (NN_cld == this->SMPcmd.itype) || (NN_cmc == this->SMPcmd.itype) || (NN_stc == this->SMPcmd.itype) || (NN_std == this->SMPcmd.itype)) { // Flags register is implicit destination. DestFound = true; TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(FlagsOp); TempRT->SetOperator(SMP_ASSIGN); SMPRegTransfer *RightRT = new SMPRegTransfer; if (NN_cmc == this->SMPcmd.itype) { // complement carry flag USEs old carry flag RightRT->SetLeftOperand(FlagsOp); RightRT->SetOperator(SMP_BITWISE_NOT); } else { RightRT->SetLeftOperand(VoidOp); RightRT->SetOperator(UnaryOp); } RightRT->SetRightOperand(VoidOp); TempRT->SetRightTree(RightRT); this->RTL.push_back(TempRT); } for (OpNum = 0; !DestFound && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & DefMacros[OpNum]) { // DEF if (MDKnownOperandType(TempOp)) { DestFound = true; TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(TempOp); TempRT->SetOperator(SMP_ASSIGN); SMPRegTransfer *RightRT = new SMPRegTransfer; RightRT->SetLeftOperand(TempOp); RightRT->SetOperator(UnaryOp); RightRT->SetRightOperand(VoidOp); TempRT->SetRightTree(RightRT); this->RTL.push_back(TempRT); } } } // end for (OpNum = 0; ...) #if SMP_DEBUG_BUILD_RTL if (!DestFound) { msg("ERROR: Could not find unary operand at %x for %s\n", this->GetAddr(), this->GetDisasm()); } #endif return DestFound; } // end of SMPInstr::BuildUnaryRTL() // Build the RTL for an instruction with a binary arithmetic opcode bool SMPInstr::BuildBinaryRTL(SMPoperator BinaryOp) { size_t OpNum; bool DestFound = false; bool SourceFound = false; bool MemSrc = this->HasSourceMemoryOperand(); bool MemDest = this->HasDestMemoryOperand(); // Work around IDA pro error; they assumed that the pcmpeq and pcmpgt // families of instructions were just compares, so they do not tag // either operand as a DEF. Actually, the first operand has byte or // word or dword fields set to all 1's or all 0's based on the result // of the comparison. bool SrcIsReallyDest = ((SMP_COMPARE_EQ_AND_SET == BinaryOp) || (SMP_COMPARE_GT_AND_SET == BinaryOp)); SMPRegTransfer *TempRT = NULL; SMPRegTransfer *RightRT = new SMPRegTransfer; op_t VoidOp = InitOp; op_t FPRegOp = InitOp; FPRegOp.type = o_fpreg; // floating point register stack // Handle special cases first if (SMP_BINARY_FLOATING_ARITHMETIC == BinaryOp) { // Use of the floating register stack top is implicit DestFound = true; TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(FPRegOp); TempRT->SetOperator(SMP_ASSIGN); RightRT->SetLeftOperand(FPRegOp); RightRT->SetOperator(BinaryOp); RightRT->SetRightOperand(VoidOp); TempRT->SetRightTree(RightRT); } for (OpNum = 0; !(DestFound && SourceFound) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if ((this->features & DefMacros[OpNum]) || (SrcIsReallyDest && (0 == OpNum))) { // DEF if (!DestFound && MDKnownOperandType(TempOp)) { // See comments just below for floating point sources. FP stores // are analogous to FP loads. if (!MemDest || ((TempOp.type >= o_mem) && (TempOp.type <= o_displ))) { DestFound = true; TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(TempOp); TempRT->SetOperator(SMP_ASSIGN); if (this->RegClearIdiom) { op_t ImmOp = InitOp; ImmOp.type = o_imm; ImmOp.value = 0; TempRT->SetRightOperand(ImmOp); SourceFound = true; // cause loop exit } else { RightRT->SetLeftOperand(TempOp); RightRT->SetOperator(BinaryOp); TempRT->SetRightTree(RightRT); } } else { ; #if SMP_VERBOSE_DEBUG_BUILD_RTL msg("WARNING: Skipping DEF operand: "); PrintOperand(TempOp); msg(" at %x in %s\n", this->GetAddr(), this->GetDisasm()); #endif } } else if (DestFound && (SMP_BINARY_FLOATING_ARITHMETIC != BinaryOp)) { ; #if SMP_VERBOSE_DEBUG_BUILD_RTL msg("ERROR: Found two DEF operands: "); PrintOperand(TempOp); msg(" at %x in %s\n", this->GetAddr(), this->GetDisasm()); #endif } } else { // USE if (!SourceFound && MDKnownOperandType(TempOp)) { // If this is a floating point instruction with the fpregs listed as // a USE and a memory operand also listed as a USE, then we want to // ignore the irrelevant USE of the fpreg stack. // Note that MemDest AND MemSrc means something like add mem,reg is being // processed, where the memory operand is both DEF and USE. if (!MemSrc || MemDest || ((TempOp.type >= o_mem) && (TempOp.type <= o_displ))) { SourceFound = true; RightRT->SetRightOperand(TempOp); } } if (!(this->features & UseMacros[OpNum])) { ; #if SMP_VERBOSE_DEBUG_BUILD_RTL_DEF_USE msg("WARNING: Operand neither DEF nor USE: "); PrintOperand(TempOp); msg(" at %x in %s\n", this->GetAddr(), this->GetDisasm()); #endif } } // end if DEF ... else ... } // end for (OpNum = 0; ...) if (!DestFound || !SourceFound) { assert(NULL != RightRT); if (DestFound && (NULL != TempRT)) delete TempRT; else delete RightRT; #if SMP_DEBUG_BUILD_RTL if (!DestFound) { msg("ERROR: Could not find binary DEF operand at %x for %s\n", this->GetAddr(), this->GetDisasm()); } else { msg("ERROR: Could not find binary operand at %x for %s\n", this->GetAddr(), this->GetDisasm()); this->PrintOperands(); } #endif } else { this->RTL.push_back(TempRT); } return (DestFound && SourceFound); } // end of SMPInstr::BuildBinaryRTL() // Build the RTL for a load-effective-address instruction. bool SMPInstr::BuildLeaRTL(void) { size_t OpNum; bool DestFound = false; bool SourceFound = false; op_t DefOp = InitOp; op_t UseOp = InitOp; SMPRegTransfer *AssignRT = NULL; int BaseReg; int IndexReg; ushort ScaleFactor; ea_t offset; bool ScaledIndexReg; for (OpNum = 0; !(DestFound && SourceFound) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & DefMacros[OpNum]) { // DEF DefOp = TempOp; DestFound = true; assert(o_reg == DefOp.type); } else { // USE if (!SourceFound && MDKnownOperandType(TempOp)) { if ((TempOp.type >= o_mem) && (TempOp.type <= o_displ)) { SourceFound = true; UseOp = TempOp; MDExtractAddressFields(TempOp, BaseReg, IndexReg, ScaleFactor, offset); } else { ; #if SMP_VERBOSE_DEBUG_BUILD_RTL msg("WARNING: Skipping USE operand: "); PrintOperand(TempOp); msg(" at %x in %s\n", this->GetAddr(), this->GetDisasm()); #endif } } if (!(this->features & UseMacros[OpNum])) { ; #if SMP_VERBOSE_DEBUG_BUILD_RTL_DEF_USE msg("WARNING: Operand neither DEF nor USE: "); PrintOperand(TempOp); msg(" at %x in %s\n", this->GetAddr(), this->GetDisasm()); #endif } } // end if DEF ... else ... } // end for (OpNum = 0; ...) if (!DestFound || !SourceFound) { #if SMP_DEBUG_BUILD_RTL if (!DestFound) { msg("ERROR: Could not find lea DEF operand at %x for %s\n", this->GetAddr(), this->GetDisasm()); } else { msg("ERROR: Could not find lea USE operand at %x for %s\n", this->GetAddr(), this->GetDisasm()); this->PrintOperands(); } #endif } else { // Ready to build the RTL // We build the RTL down to the right, in reverse order, with any multiplication // of the index register by a scale factor at the bottom of the RTL tree. // Note that almost any combination of BaseReg, IndexReg, and offset can be present // or absent. AssignRT = new SMPRegTransfer; AssignRT->SetLeftOperand(DefOp); AssignRT->SetOperator(SMP_ASSIGN); ScaledIndexReg = ((ScaleFactor > 0) && (IndexReg != R_none)); op_t BaseOp = InitOp, IndexOp = InitOp, OffsetOp = InitOp, ScaleOp = InitOp; BaseOp.type = o_reg; BaseOp.reg = (ushort) BaseReg; IndexOp.type = o_reg; IndexOp.reg = (ushort) IndexReg; OffsetOp.type = o_imm; OffsetOp.value = (uval_t) offset; ScaleOp.type = o_imm; ScaleOp.value = (uval_t) ScaleFactor; if (ScaledIndexReg) { // First, build the subtree to scale the IndexReg. SMPRegTransfer *MultRT = new SMPRegTransfer; MultRT->SetLeftOperand(IndexOp); MultRT->SetOperator(SMP_U_LEFT_SHIFT); MultRT->SetRightOperand(ScaleOp); // Now, case on the possibilities for existence of the other address fields. if (0 != offset) { // Add the offset to the scaled index subtree. SMPRegTransfer *AddOffRT = new SMPRegTransfer; AddOffRT->SetLeftOperand(OffsetOp); AddOffRT->SetOperator(SMP_ADD); AddOffRT->SetRightTree(MultRT); // Add a BaseReg, if any. if (R_none != BaseReg) { SMPRegTransfer *AddBaseRT = new SMPRegTransfer; AddBaseRT->SetLeftOperand(BaseOp); AddBaseRT->SetOperator(SMP_ADD); AddBaseRT->SetRightTree(AddOffRT); // Link into assignment root tree. AssignRT->SetRightTree(AddBaseRT); } else { // no BaseReg AssignRT->SetRightTree(AddOffRT); } } // end if nonzero offset else { // no offset to add // Add a BaseReg, if any. if (R_none != BaseReg) { SMPRegTransfer *AddBaseRT = new SMPRegTransfer; AddBaseRT->SetLeftOperand(BaseOp); AddBaseRT->SetOperator(SMP_ADD); AddBaseRT->SetRightTree(MultRT); // Link into assignment root tree. AssignRT->SetRightTree(AddBaseRT); } else { // no BaseReg AssignRT->SetRightTree(MultRT); } } } // end if ScaleIndexReg else { // no scaled index register if (0 != offset) { if (R_none != IndexReg) { SMPRegTransfer *AddOffRT = new SMPRegTransfer; AddOffRT->SetLeftOperand(OffsetOp); AddOffRT->SetOperator(SMP_ADD); AddOffRT->SetRightOperand(IndexOp); // Add BaseReg, if any. if (R_none != BaseReg) { SMPRegTransfer *AddBaseRT = new SMPRegTransfer; AddBaseRT->SetLeftOperand(BaseOp); AddBaseRT->SetOperator(SMP_ADD); AddBaseRT->SetRightTree(AddOffRT); // Link into assignment root tree. AssignRT->SetRightTree(AddBaseRT); } else { // no BaseReg AssignRT->SetRightTree(AddOffRT); } } // end if valid IndexReg else { // no IndexReg // Add BaseReg, if any. if (R_none != BaseReg) { SMPRegTransfer *AddBaseRT = new SMPRegTransfer; AddBaseRT->SetLeftOperand(BaseOp); AddBaseRT->SetOperator(SMP_ADD); AddBaseRT->SetRightOperand(OffsetOp); // Link into assignment root tree. AssignRT->SetRightTree(AddBaseRT); } else { // no BaseReg, no IndexReg, just offset? if (UseOp.type != o_mem) { msg("ERROR: No BaseReg, no IndexReg at %x for %s\n", this->address, this->GetDisasm()); } AssignRT->SetRightOperand(OffsetOp); } } } // end if nonzero offset else { // no offset if ((R_none == BaseReg) || (R_none == IndexReg)) { msg("WARNING: lea used as move at %x for %s\n", this->address, this->GetDisasm()); if (R_none != BaseReg) AssignRT->SetRightOperand(BaseOp); else { assert(R_none != IndexReg); AssignRT->SetRightOperand(IndexOp); } } else { // we have a BaseReg and an IndexReg, unscaled, no offset SMPRegTransfer *AddBaseRT = new SMPRegTransfer; AddBaseRT->SetLeftOperand(BaseOp); AddBaseRT->SetOperator(SMP_ADD); AddBaseRT->SetRightOperand(IndexOp); // Link into assignment root tree. AssignRT->SetRightTree(AddBaseRT); } } // end if nonzero offset ... else ... } // end if (ScaledIndexReg) ... else ... this->RTL.push_back(AssignRT); } return (DestFound && SourceFound); } // end of SMPInstr::BuildLeaRTL() // Build the RTL for an double-word shift instruction bool SMPInstr::BuildDoubleShiftRTL(SMPoperator BinaryOp) { size_t OpNum; bool DestFound = false; bool SourceFound = false; bool CountFound = false; SMPRegTransfer *TempRT = NULL; SMPRegTransfer *RightRT = new SMPRegTransfer; SMPRegTransfer *LowerRightRT = new SMPRegTransfer; op_t FlagsOp = InitOp; FlagsOp.type = o_reg; FlagsOp.reg = X86_FLAGS_REG; for (OpNum = 0; !(DestFound && SourceFound && CountFound) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & DefMacros[OpNum]) { // DEF if (MDKnownOperandType(TempOp)) { DestFound = true; TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(TempOp); TempRT->SetOperator(SMP_ASSIGN); RightRT->SetLeftOperand(TempOp); RightRT->SetOperator(BinaryOp); TempRT->SetRightTree(RightRT); LowerRightRT->SetOperator(BinaryOp); RightRT->SetRightTree(LowerRightRT); } } else { // USE if (MDKnownOperandType(TempOp)) { if (!SourceFound) { SourceFound = true; LowerRightRT->SetLeftOperand(TempOp); } else { CountFound = true; LowerRightRT->SetRightOperand(TempOp); } } } } // end for (OpNum = 0; ...) if (!DestFound || !SourceFound || !CountFound) { if (NULL != TempRT) delete TempRT; #if SMP_DEBUG_BUILD_RTL msg("ERROR: Could not find double-shift operand at %x for %s\n", this->GetAddr(), this->GetDisasm()); #endif } else { this->RTL.push_back(TempRT); // The carry flag gets the last shifted out bit. this->RTL.ExtraKills.push_back(FlagsOp); } return (DestFound && SourceFound && CountFound); } // end of SMPInstr::BuildDoubleShiftRTL() // Build the RTL for a multiply or divide, which can have implicit EAX and/or EDX operands bool SMPInstr::BuildMultiplyDivideRTL(SMPoperator BinaryOp) { size_t OpNum; bool DestFound = false; bool SourceFound = false; bool HiddenEAXUse = false; bool ImplicitEDXUse = false; SMPRegTransfer *TempRT = NULL; SMPRegTransfer *RightRT = new SMPRegTransfer; op_t FPRegOp = InitOp; FPRegOp.type = o_fpreg; // floating point register stack op_t Immed1Op = InitOp; Immed1Op.type = o_imm; // immediate 1 for increment or decrement FPRegOp.value = 1; // Detect the cases in which EDX:EDX is the destination and EAX is a hidden operand. // See detailed comments on the multiply and divide instructions in MDFixupDefUseLists(). for (OpNum = 0; !(DestFound && SourceFound) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (!TempOp.showed()) { // hidden operand if (TempOp.is_reg(R_ax)) { // not R_al, so it is not 8 bits // This form always has a hidden use of EDX:EAX HiddenEAXUse = true; ImplicitEDXUse = true; } else if (TempOp.is_reg(R_al)) { // Use of AX register to hold 16-bit result is hidden, // but EDX is not needed to hold result bits. HiddenEAXUse = true; } } } for (OpNum = 0; !(DestFound && SourceFound) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & DefMacros[OpNum]) { // DEF if (MDKnownOperandType(TempOp)) { DestFound = true; TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(TempOp); TempRT->SetOperator(SMP_ASSIGN); RightRT->SetLeftOperand(TempOp); RightRT->SetOperator(BinaryOp); TempRT->SetRightTree(RightRT); } } else { // USE if (MDKnownOperandType(TempOp)) { SourceFound = true; RightRT->SetRightOperand(TempOp); } } } // end for (OpNum = 0; ...) if (!DestFound || !SourceFound) { assert(NULL != RightRT); if (DestFound && (NULL != TempRT)) delete TempRT; else delete RightRT; #if SMP_DEBUG_BUILD_RTL msg("ERROR: Could not find mul/div operand at %x for %s\n", this->GetAddr(), this->GetDisasm()); #endif } else { this->RTL.push_back(TempRT); if (ImplicitEDXUse) { // Need another effect for EDX, which was implicit. // Make a deep copy from existing effect and change EAX dest to EDX. // For divisions, we also change EAX source to EDX. SMPRegTransfer *EDXRT = new SMPRegTransfer; SMPRegTransfer *EDXRightRT = new SMPRegTransfer; op_t EDXOp; EDXRT->SetOperator(SMP_ASSIGN); EDXOp = TempRT->GetLeftOperand(); assert(EDXOp.is_reg(R_ax)); EDXOp.reg = R_dx; EDXRT->SetLeftOperand(EDXOp); op_t SourceOp = RightRT->GetLeftOperand(); if ((NN_div == this->SMPcmd.itype) || (NN_idiv == this->SMPcmd.itype)) { // Need to change left operand of RightRT to EDX. i.e. we are // changing the effect from eax := eax DIV foo to edx := edx DIV foo. assert(SourceOp.is_reg(R_ax)); EDXRightRT->SetLeftOperand(EDXOp); } else { // just use same source operands for multiplies EDXRightRT->SetLeftOperand(SourceOp); } EDXRightRT->SetOperator(BinaryOp); EDXRightRT->SetRightOperand(RightRT->GetRightOperand()); EDXRT->SetRightTree(EDXRightRT); this->RTL.push_back(EDXRT); this->MultiplicationBitsDiscarded = false; } else { // No implicit EDX effect. // If we had 8x8=>16 bit multiply with AL*op8=>AX there // is no discarding of result bits, else there is discarding. this->MultiplicationBitsDiscarded = (!HiddenEAXUse); } } return (DestFound && SourceFound); } // end of SMPInstr::BuildMultiplyDivideRTL() // Build the RTL for an instruction with a tertiary arithmetic opcode applied to // two operands plus an implied FLAGS operand, e.g. add with carry adds the carry bit // and two operands together; rotate through carry, etc. bool SMPInstr::BuildBinaryPlusFlagsRTL(SMPoperator BinaryOp) { size_t OpNum; bool DestFound = false; bool SourceFound = false; SMPRegTransfer *TempRT = NULL; op_t FlagsOp = InitOp; FlagsOp.type = o_reg; FlagsOp.reg = X86_FLAGS_REG; SMPRegTransfer *RightRT = new SMPRegTransfer; SMPRegTransfer *FlagsRightRT = new SMPRegTransfer; for (OpNum = 0; !(DestFound && SourceFound) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & DefMacros[OpNum]) { // DEF if (MDKnownOperandType(TempOp)) { DestFound = true; TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(TempOp); TempRT->SetOperator(SMP_ASSIGN); RightRT->SetLeftOperand(TempOp); RightRT->SetOperator(BinaryOp); TempRT->SetRightTree(RightRT); } } else { // USE if (MDKnownOperandType(TempOp)) { SourceFound = true; FlagsRightRT->SetLeftOperand(TempOp); FlagsRightRT->SetOperator(BinaryOp); FlagsRightRT->SetRightOperand(FlagsOp); RightRT->SetRightTree(FlagsRightRT); } } } // end for (OpNum = 0; ...) if (!DestFound || !SourceFound) { if (DestFound) delete TempRT; // also deletes linked in RightRT else delete RightRT; // will also delete FlagsRightRT if SourceFound is true if (!SourceFound) // FlagsRightRT not linked into RightRT yet delete FlagsRightRT; // .. so delete FlagsRightRT separately #if SMP_DEBUG_BUILD_RTL msg("ERROR: Could not find binary operand at %x for %s\n", this->GetAddr(), this->GetDisasm()); #endif } else { this->RTL.push_back(TempRT); } return (DestFound && SourceFound); } // end of SMPInstr::BuildBinaryPlusFlagsRTL() #define SMP_FIRST_SET_OPCODE NN_seta #define SMP_LAST_SET_OPCODE NN_setz // Build the RTL for an instruction of form dest := unary_operator(source), dest != source bool SMPInstr::BuildUnary2OpndRTL(SMPoperator UnaryOp) { size_t OpNum; bool DestFound = false; bool SourceFound = false; SMPRegTransfer *TempRT = new SMPRegTransfer; SMPRegTransfer *RightRT = new SMPRegTransfer; int opcode = this->SMPcmd.itype; bool ExtendedMove = ((NN_movsx == opcode) || (NN_movzx == opcode)); op_t VoidOp = InitOp; op_t FlagsOp = InitOp; FlagsOp.type = o_reg; FlagsOp.reg = X86_FLAGS_REG; op_t PortNumOp = InitOp; PortNumOp.type = o_reg; PortNumOp.reg = R_dx; op_t PortDataOp = InitOp; PortDataOp.type = o_reg; PortDataOp.reg = R_ax; // Handle special cases first. if ((SMP_FIRST_SET_OPCODE <= opcode) && (SMP_LAST_SET_OPCODE >= opcode)) { // Set instructions implicitly use the flags register. SourceFound = true; RightRT->SetLeftOperand(FlagsOp); } for (OpNum = 0; !(DestFound && SourceFound) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & DefMacros[OpNum]) { // DEF if (MDKnownOperandType(TempOp)) { DestFound = true; if (NN_in == opcode) { msg("ERROR: Explicit DEF for IN from port opcode at %x : ", this->GetAddr()); PrintOperand(TempOp); msg("\n"); TempRT->SetLeftOperand(PortDataOp); TempRT->SetOperator(UnaryOp); } else if (NN_out == opcode) { TempRT->SetLeftOperand(TempOp); TempRT->SetOperator(UnaryOp); } else { TempRT->SetLeftOperand(TempOp); TempRT->SetOperator(SMP_ASSIGN); RightRT->SetRightOperand(VoidOp); RightRT->SetOperator(UnaryOp); TempRT->SetRightTree(RightRT); } } } else { // USE if (MDKnownOperandType(TempOp)) { SourceFound = true; if (NN_in == opcode) { TempRT->SetRightOperand(TempOp); } else if (NN_out == opcode) { msg("ERROR: Explicit USE for OUT to port opcode at %x : ", this->GetAddr()); PrintOperand(TempOp); msg("\n"); TempRT->SetRightOperand(PortDataOp); } else { RightRT->SetLeftOperand(TempOp); if (ExtendedMove) this->MoveSource = TempOp; } } } } // end for (OpNum = 0; ...) if (!SourceFound && (NN_in == opcode)) { // Input from port is implicitly from port # in DX register if not // specified with an immediate operand. SourceFound = true; TempRT->SetRightOperand(PortNumOp); } if (!DestFound && (NN_in == opcode)) { // Input from port is implicitly to register AL, AX, or EAX // depending on the opcode and bit width mode. DestFound = true; TempRT->SetLeftOperand(PortDataOp); TempRT->SetOperator(UnaryOp); } if (!DestFound && (NN_out == opcode)) { // Output to port is implicitly to port # in DX register if not // specified with an immediate operand. DestFound = true; TempRT->SetLeftOperand(PortNumOp); TempRT->SetOperator(SMP_ASSIGN); } if (!SourceFound && (NN_out == opcode)) { // Output to port is implicitly from register AL, AX, or EAX // depending on the opcode and bit width mode. SourceFound = true; TempRT->SetRightOperand(PortDataOp); } if (!DestFound || !SourceFound) { if (!DestFound) delete RightRT; // never linked in to TempRT if (NULL != TempRT) delete TempRT; #if SMP_DEBUG_BUILD_RTL msg("ERROR: Could not find binary operand at %x for %s\n", this->GetAddr(), this->GetDisasm()); #endif } else { this->RTL.push_back(TempRT); if ((NN_in == opcode) || (NN_out == opcode)) delete RightRT; // unused for port I/O } return (DestFound && SourceFound); } // end of SMPInstr::BuildUnary2OpndRTL() // Build the RTL for an instruction of form dest := source, dest != source bool SMPInstr::BuildMoveRTL(SMPoperator GuardOp) { size_t OpNum; bool DestFound = false; bool SourceFound = false; bool MemSrc = this->HasSourceMemoryOperand(); bool MemDest = this->HasDestMemoryOperand(); bool HasRepeatPrefix = (0 != (this->SMPcmd.auxpref & aux_rep)) || (0 != (this->SMPcmd.auxpref & aux_repne)); int opcode = this->SMPcmd.itype; #if IDA_SDK_VERSION < 600 if ((NN_ldmxcsr == opcode) || (NN_stmxcsr == opcode)) { // IDA 5.1 does not have the R_mxcsr enumeration value, // so we cannot handle these opcodes. return false; } #endif SMPRegTransfer *TempRT = new SMPRegTransfer; op_t VoidOp = InitOp; op_t EAXOp = InitOp; EAXOp.type = o_reg; EAXOp.reg = R_ax; op_t ALOp = InitOp; ALOp.type = o_reg; ALOp.reg = R_al; ALOp.dtyp = dt_byte; op_t CountOp = InitOp; CountOp.type = o_reg; CountOp.reg = R_cx; op_t FlagsOp = InitOp; FlagsOp.type = o_reg; FlagsOp.reg = X86_FLAGS_REG; op_t FPRegOp = InitOp; FPRegOp.type = o_fpreg; // floating point register stack FPRegOp.reg = 0; op_t PortNumOp = InitOp; PortNumOp.type = o_reg; PortNumOp.reg = R_dx; op_t PortDataOp = InitOp; PortDataOp.type = o_reg; PortDataOp.reg = R_ax; #if IDA_SDK_VERSION > 599 op_t MXCSROp = InitOp; // MMX Control & Status Register MXCSROp.type = o_reg; MXCSROp.reg = R_mxcsr; #endif op_t ZeroOp = InitOp; ZeroOp.type = o_imm; // immediate zero ZeroOp.value = 0; #if SMP_DEBUG_BUILD_RTL if (MemSrc && MemDest && (NN_movs != opcode)) { if (NN_stos != opcode) { msg("ERROR: MemDest and MemSrc in move at %x for %s\n", this->GetAddr(), this->GetDisasm()); } else { // IDA incorrectly lists [EDI] as both DEF and USE, because reg EDI // is both DEF and USE in NN_stos. msg("WARNING: MemDest and MemSrc in move at %x for %s\n", this->GetAddr(), this->GetDisasm()); } this->PrintOperands(); } #endif // First, handle special cases with implicit operands if (NN_lahf == opcode) { // load AH from flags TempRT->SetOperator(SMP_ASSIGN); TempRT->SetLeftOperand(EAXOp); TempRT->SetRightOperand(FlagsOp); this->RTL.push_back(TempRT); return true; } if (NN_sahf == opcode) { // store AH to flags TempRT->SetOperator(SMP_ASSIGN); TempRT->SetLeftOperand(FlagsOp); TempRT->SetRightOperand(EAXOp); this->RTL.push_back(TempRT); return true; } if ((NN_movs == opcode) || (NN_stos == opcode) || (NN_ins == opcode) || (NN_outs == opcode)) { // The ESI and EDI registers get incremented or decremented, depending // on the direction flag DF, for MOVS; only EDI for STOS and INS; // only ESI for OUTS. // This is true with or without a repeat prefix. op_t ESIOp = InitOp, EDIOp = InitOp; ESIOp.type = o_reg; ESIOp.reg = R_si; EDIOp.type = o_reg; EDIOp.reg = R_di; op_t ESIMemOp = InitOp, EDIMemOp = InitOp; // [esi] and [edi] ESIMemOp.type = o_phrase; ESIMemOp.reg = R_si; EDIMemOp.type = o_phrase; EDIMemOp.reg = R_di; if (NN_movs == opcode) { this->RTL.ExtraKills.push_back(ESIOp); this->RTL.ExtraKills.push_back(EDIOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetLeftOperand(EDIMemOp); TempRT->SetRightOperand(ESIMemOp); DestFound = true; SourceFound = true; } else if (NN_stos == opcode) { this->RTL.ExtraKills.push_back(EDIOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetLeftOperand(EDIMemOp); TempRT->SetRightOperand(ALOp); // default in case we don't find source later DestFound = true; } else if (NN_ins == opcode) { this->RTL.ExtraKills.push_back(EDIOp); TempRT->SetOperator(SMP_INPUT); TempRT->SetLeftOperand(EDIMemOp); TempRT->SetRightOperand(PortNumOp); DestFound = true; SourceFound = true; } else if (NN_outs == opcode) { this->RTL.ExtraKills.push_back(ESIOp); TempRT->SetOperator(SMP_OUTPUT); TempRT->SetLeftOperand(ESIMemOp); TempRT->SetRightOperand(PortNumOp); DestFound = true; SourceFound = true; } } // Some floating point instructions use the floating point register stack top as // an implicit source or destination, but the other operand of the load or store // is explicit, so we set the implicit operand and let control flow pass to the // main processing loop below. if ((NN_fld == opcode) || (NN_fbld == opcode) || (NN_fild == opcode)) { // Loads implicitly use the floating point stack top as destination. TempRT->SetLeftOperand(FPRegOp); TempRT->SetOperator(SMP_ASSIGN); DestFound = true; } else if ((NN_fst == opcode) || (NN_fstp == opcode) || (NN_fbstp == opcode) || (NN_fist == opcode) || (NN_fistp == opcode)) { // Stores implicitly use the floating point stack top as source TempRT->SetRightOperand(FPRegOp); SourceFound = true; // The "p" at the end of the opcode indicates that the floating point // register stack gets popped. if ((NN_fstp == opcode) || (NN_fbstp == opcode) || (NN_fistp == opcode)) { this->RTL.ExtraKills.push_back(FPRegOp); } } #if IDA_SDK_VERSION > 599 else if (NN_ldmxcsr == opcode) { // The MMX Control & Status Register is used implicitly. TempRT->SetLeftOperand(MXCSROp); DestFound = true; } else if (NN_stmxcsr == opcode) { // The MMX Control & Status Register is used implicitly. TempRT->SetRightOperand(MXCSROp); SourceFound = true; } #endif for (OpNum = 0; !(DestFound && SourceFound) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & DefMacros[OpNum]) { // DEF if (!DestFound && MDKnownOperandType(TempOp)) { // See comments just below for floating point sources. FP stores // are analogous to FP loads. if (!MemDest || ((TempOp.type >= o_mem) && (TempOp.type <= o_displ))) { DestFound = true; TempRT->SetLeftOperand(TempOp); TempRT->SetOperator(SMP_ASSIGN); } } } else { // USE if (!SourceFound && MDKnownOperandType(TempOp)) { // If this is a floating point instruction with the fpregs listed as // a USE and a memory operand also listed as a USE, then we want to // ignore the irrelevant USE of the fpreg stack. // Note that MemDest AND MemSrc means something like stosb is being // processed, where the memory operand is both DEF and USE to IDA // for mysterious reasons. if (!MemSrc || MemDest || ((TempOp.type >= o_mem) && (TempOp.type <= o_displ))) { SourceFound = true; TempRT->SetRightOperand(TempOp); this->MoveSource = TempOp; } } if (this->features & UseMacros[OpNum]) { ; #if SMP_VERBOSE_DEBUG_BUILD_RTL_DEF_USE msg("WARNING: Operand neither DEF nor USE: "); PrintOperand(TempOp); msg(" at %x in %s\n", this->GetAddr(), this->GetDisasm()); #endif } } } // end for (OpNum = 0; ...) if (!DestFound || !SourceFound) { if (NULL != TempRT) delete TempRT; #if SMP_DEBUG_BUILD_RTL msg("ERROR: Could not find move operand at %x for %s\n", this->GetAddr(), this->GetDisasm()); #endif } else { // If the move is conditional, set the guard expression. if (SMP_NULL_OPERATOR != GuardOp) { SMPGuard *Guard1 = new SMPGuard; Guard1->SetLeftOperand(FlagsOp); Guard1->SetOperator(GuardOp); Guard1->SetRightOperand(ZeroOp); TempRT->SetGuard(Guard1); if (this->MDIsConditionalMoveInstr()) { // We need to represent the possibility that the DEF operand will not // be set because the move is conditional. We will add the DEF operand // into the USE set and special case our type inferences so that the // USE and the pseudo-USE (prior SSA value of the DEF operand) must // agree in type before we can be sure of the result type. assert(this->Defs.GetSize() == 1); this->Uses.SetRef(this->Defs.GetFirstRef()->GetOp()); } } this->RTL.push_back(TempRT); // Now, create the repeat prefix effects if (HasRepeatPrefix) { // Must be MOVS or STOS or INS or OUTS // The repeat causes USE and DEF of ECX as a counter SMPRegTransfer *CounterRT = new SMPRegTransfer; SMPRegTransfer *RightRT = new SMPRegTransfer; CounterRT->SetLeftOperand(CountOp); CounterRT->SetOperator(SMP_ASSIGN); RightRT->SetLeftOperand(CountOp); RightRT->SetOperator(SMP_UNARY_NUMERIC_OPERATION); RightRT->SetRightOperand(VoidOp); CounterRT->SetRightTree(RightRT); this->RTL.push_back(CounterRT); } } return (DestFound && SourceFound); } // end of SMPInstr::BuildMoveRTL() // Build the RTL for a compare string instruction, possibly with repeat prefix. bool SMPInstr::BuildCompareStringRTL(void) { size_t OpNum; bool Src1Found = false; bool Src2Found = false; bool HasRepeatPrefix = (0 != (this->SMPcmd.auxpref & aux_rep)) || (0 != (this->SMPcmd.auxpref & aux_repne)); op_t FlagsOp = InitOp; FlagsOp.type = o_reg; FlagsOp.reg = X86_FLAGS_REG; op_t CountOp = InitOp; CountOp.type = o_reg; CountOp.reg = R_cx; op_t VoidOp = InitOp; SMPRegTransfer *TempRT = new SMPRegTransfer; SMPRegTransfer *RightRT = new SMPRegTransfer; for (OpNum = 0; !(Src1Found && Src2Found) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (MDKnownOperandType(TempOp)) { if (!Src1Found) { Src1Found = true; TempRT->SetLeftOperand(FlagsOp); TempRT->SetOperator(SMP_ASSIGN); RightRT->SetLeftOperand(TempOp); RightRT->SetOperator(SMP_U_COMPARE); TempRT->SetRightTree(RightRT); if (this->features & DefMacros[OpNum]) // DEF msg("CMPS 1st opnd is DEF\n"); else if (this->features & UseMacros[OpNum]) // USE msg("CMPS 1st opnd is USE\n"); else msg("CMPS 1st opnd neither DEF nor USE\n"); } else { Src2Found = true; RightRT->SetRightOperand(TempOp); if (this->features & DefMacros[OpNum]) // DEF msg("CMPS 2nd opnd is DEF\n"); else if (this->features & UseMacros[OpNum]) // USE msg("CMPS 2nd opnd is USE\n"); else msg("CMPS 2nd opnd neither DEF nor USE\n"); } } } // end for (OpNum = 0; ...) if (!Src1Found || !Src2Found) { if (NULL != TempRT) delete TempRT; #if SMP_DEBUG_BUILD_RTL msg("ERROR: Could not find CMPS operand at %x for %s\n", this->GetAddr(), this->GetDisasm()); #endif } else { this->RTL.push_back(TempRT); // Now, create the repeat prefix effects if (HasRepeatPrefix) { // The repeat causes USE and DEF of ECX as a counter SMPRegTransfer *CounterRT = new SMPRegTransfer; SMPRegTransfer *RightRT = new SMPRegTransfer; CounterRT->SetLeftOperand(CountOp); CounterRT->SetOperator(SMP_ASSIGN); RightRT->SetLeftOperand(CountOp); RightRT->SetOperator(SMP_UNARY_NUMERIC_OPERATION); RightRT->SetRightOperand(VoidOp); CounterRT->SetRightTree(RightRT); this->RTL.push_back(CounterRT); } } return (Src1Found && Src2Found); } // end of SMPInstr::BuildCompareStringRTL() // Build the RTL for an instruction of form dest := source, source := dest bool SMPInstr::BuildExchangeRTL(void) { size_t OpNum; bool Src1Found = false; bool Src2Found = false; SMPRegTransfer *TempRT = new SMPRegTransfer; // second effect, src := dest for (OpNum = 0; !(Src1Found && Src2Found) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (MDKnownOperandType(TempOp)) { if (!Src1Found) { Src1Found = true; TempRT->SetRightOperand(TempOp); TempRT->SetOperator(SMP_ASSIGN); #if SMP_VERBOSE_DEBUG_BUILD_RTL if (this->features & DefMacros[OpNum]) // DEF msg("XCHG 1st opnd is DEF\n"); else if (this->features & UseMacros[OpNum]) // USE msg("XCHG 1st opnd is USE\n"); else msg("XCHG 1st opnd neither DEF nor USE\n"); #endif } else { Src2Found = true; TempRT->SetLeftOperand(TempOp); if (this->features & DefMacros[OpNum]) // DEF msg("XCHG 2nd opnd is DEF\n"); else if (this->features & UseMacros[OpNum]) // USE msg("XCHG 2nd opnd is USE\n"); else msg("XCHG 2nd opnd neither DEF nor USE\n"); } } } // end for (OpNum = 0; ...) if (!Src1Found || !Src2Found) { if (NULL != TempRT) delete TempRT; #if SMP_DEBUG_BUILD_RTL msg("ERROR: Could not find XCHG operand at %x for %s\n", this->GetAddr(), this->GetDisasm()); #endif } else { // Create the first effect, dest := src SMPRegTransfer *FirstRT = new SMPRegTransfer; FirstRT->SetLeftOperand(TempRT->GetRightOperand()); FirstRT->SetRightOperand(TempRT->GetLeftOperand()); FirstRT->SetOperator(SMP_ASSIGN); this->RTL.push_back(FirstRT); // Push the second effect on the list, src := dest this->RTL.push_back(TempRT); } return (Src1Found && Src2Found); } // end of SMPInstr::BuildExchangeRTL() // Build the RTL for an instruction of form dest := dest + source, source := dest bool SMPInstr::BuildExchangeAddRTL(void) { size_t OpNum; bool Src1Found = false; bool Src2Found = false; SMPRegTransfer *TempRT = new SMPRegTransfer; // second effect, src := dest for (OpNum = 0; !(Src1Found && Src2Found) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (MDKnownOperandType(TempOp)) { if (!Src1Found) { Src1Found = true; TempRT->SetRightOperand(TempOp); TempRT->SetOperator(SMP_ASSIGN); if (this->features & DefMacros[OpNum]) // DEF msg("XADD 1st opnd is DEF\n"); // should be the case else if (this->features & UseMacros[OpNum]) // USE msg("XADD 1st opnd is USE\n"); else msg("XADD 1st opnd neither DEF nor USE\n"); } else { Src2Found = true; TempRT->SetLeftOperand(TempOp); if (this->features & DefMacros[OpNum]) // DEF msg("XADD 2nd opnd is DEF\n"); else if (this->features & UseMacros[OpNum]) // USE msg("XADD 2nd opnd is USE\n"); // should be the case else msg("XADD 2nd opnd neither DEF nor USE\n"); } } } // end for (OpNum = 0; ...) if (!Src1Found || !Src2Found) { if (NULL != TempRT) delete TempRT; #if SMP_DEBUG_BUILD_RTL msg("ERROR: Could not find XADD operand at %x for %s\n", this->GetAddr(), this->GetDisasm()); #endif } else { // Create the first effect, dest := dest + src SMPRegTransfer *FirstRT = new SMPRegTransfer; SMPRegTransfer *AddRT = new SMPRegTransfer; AddRT->SetLeftOperand(TempRT->GetRightOperand()); AddRT->SetOperator(SMP_ADD); AddRT->SetRightOperand(TempRT->GetLeftOperand()); FirstRT->SetLeftOperand(TempRT->GetRightOperand()); FirstRT->SetRightTree(AddRT); FirstRT->SetOperator(SMP_ASSIGN); this->RTL.push_back(FirstRT); // Push the second effect on the list, src := dest this->RTL.push_back(TempRT); } return (Src1Found && Src2Found); } // end of SMPInstr::BuildExchangeAddRTL() // Build the RTL for an instruction of form: // if (dest==EAX) dest := source else EAX := dest bool SMPInstr::BuildCompareExchangeRTL(void) { size_t OpNum; bool DestFound = false; bool SourceFound = false; op_t DestOp = InitOp; op_t SourceOp = InitOp; SMPRegTransfer *TempRT = new SMPRegTransfer; for (OpNum = 0; !(DestFound && SourceFound) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (MDKnownOperandType(TempOp)) { if (this->features & DefMacros[OpNum]) { // DEF if (!DestFound) { DestFound = true; DestOp = TempOp; } else { msg("CMPXCHG has two DEF operands.\n"); } } else if (this->features & UseMacros[OpNum]) { // USE if (!SourceFound) { SourceFound = true; SourceOp = TempOp; } else { msg("CMPXCHG has two USE operands.\n"); } } } } // end for (OpNum = 0; ...) if (!DestFound || !SourceFound) { if (NULL != TempRT) delete TempRT; #if SMP_DEBUG_BUILD_RTL msg("ERROR: Could not find CMPXCHG operand at %x for %s\n", this->GetAddr(), this->GetDisasm()); #endif } else { // Create the first effect, if (dest == EAX) dest := src SMPGuard *Guard1 = new SMPGuard; op_t EAXOp = InitOp; EAXOp.type = o_reg; EAXOp.reg = R_ax; Guard1->SetLeftOperand(DestOp); Guard1->SetOperator(SMP_EQUAL); Guard1->SetRightOperand(EAXOp); SMPRegTransfer *FirstRT = new SMPRegTransfer; FirstRT->SetLeftOperand(DestOp); FirstRT->SetRightOperand(SourceOp); FirstRT->SetOperator(SMP_ASSIGN); FirstRT->SetGuard(Guard1); this->RTL.push_back(FirstRT); // Push the second effect on the list, if (dest!=EAX) dest := EAX SMPGuard *Guard2 = new SMPGuard; Guard2->SetLeftOperand(DestOp); Guard2->SetOperator(SMP_EQUAL); Guard2->SetRightOperand(EAXOp); TempRT->SetLeftOperand(DestOp); TempRT->SetRightOperand(EAXOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetGuard(Guard2); this->RTL.push_back(TempRT); } return (DestFound && SourceFound); } // end of SMPInstr::BuildCompareExchangeRTL() // Build the RTL for an extended FP concatenate and shift instruction bool SMPInstr::BuildPackShiftRTL(SMPoperator PackOp, SMPoperator ShiftOp) { size_t OpNum; bool DestFound = false; bool SourceFound = false; bool CountFound = false; SMPRegTransfer *TempRT = NULL; SMPRegTransfer *ShiftRT = new SMPRegTransfer; SMPRegTransfer *PackRT = new SMPRegTransfer; // RTL structure: top operator is assignment, next right operator is a reverse // shift with the shift count as its left operand, and lowest right operator // is the concatenation operator. Sequence of operations is pack, shift, assign. for (OpNum = 0; !(DestFound && SourceFound && CountFound) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & DefMacros[OpNum]) { // DEF if (MDKnownOperandType(TempOp)) { DestFound = true; TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(TempOp); TempRT->SetOperator(SMP_ASSIGN); PackRT->SetLeftOperand(TempOp); PackRT->SetOperator(PackOp); ShiftRT->SetOperator(ShiftOp); ShiftRT->SetRightTree(PackRT); TempRT->SetRightTree(ShiftRT); } } else { // USE if (MDKnownOperandType(TempOp)) { if (!SourceFound) { SourceFound = true; PackRT->SetRightOperand(TempOp); } else { CountFound = true; ShiftRT->SetLeftOperand(TempOp); } } } } // end for (OpNum = 0; ...) if (!DestFound || !SourceFound || !CountFound) { if (NULL != TempRT) delete TempRT; #if SMP_DEBUG_BUILD_RTL msg("ERROR: Could not find MMX/XMM pack and shift operand at %x for %s\n", this->GetAddr(), this->GetDisasm()); #endif } else { this->RTL.push_back(TempRT); } return (DestFound && SourceFound && CountFound); } // end of SMPInstr::BuildPackShiftRTL() // Build the RTL for a compare or test instruction with an implicit EFLAGS destination operand bool SMPInstr::BuildFlagsDestBinaryRTL(SMPoperator BinaryOp) { size_t OpNum; int opcode = this->SMPcmd.itype; bool Source1Found = false; bool Source2Found = false; bool NoOperandsRequired = ((NN_scas == opcode) || (NN_cmps == opcode)); bool HasRepeatPrefix = (0 != (this->SMPcmd.auxpref & aux_rep)) || (0 != (this->SMPcmd.auxpref & aux_repne)); SMPRegTransfer *TempRT = new SMPRegTransfer; SMPRegTransfer *RightRT = new SMPRegTransfer; op_t VoidOp = InitOp, FlagsOp = InitOp; FlagsOp.type = o_reg; FlagsOp.reg = X86_FLAGS_REG; op_t CountOp = InitOp; CountOp.type = o_reg; CountOp.reg = R_cx; op_t FPRegOp = InitOp; FPRegOp.type = o_fpreg; // floating point register stack FPRegOp.reg = 0; // Some floating point instructions use the floating point register stack top as // an implicit source or destination, but the other operand of the load or store // is explicit, so we set the implicit operand and let control flow pass to the // main processing loop below. if ((NN_fcomi == opcode) || (NN_fucomi == opcode) || (NN_fcomip == opcode) || (NN_fucomip == opcode)) { // Compares implicitly use the floating point stack top as destination. TempRT->SetLeftOperand(FlagsOp); TempRT->SetOperator(SMP_ASSIGN); RightRT->SetLeftOperand(FPRegOp); RightRT->SetOperator(BinaryOp); TempRT->SetRightTree(RightRT); Source1Found = true; // The "p" at the end of the opcode indicates that the floating point // register stack gets popped. if ((NN_fcomip == opcode) || (NN_fucomip == opcode)) { this->RTL.ExtraKills.push_back(FPRegOp); } } for (OpNum = 0; !(Source1Found && Source2Found) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & DefMacros[OpNum]) { // DEF if (MDKnownOperandType(TempOp)) { #if SMP_DEBUG_BUILD_RTL msg("ERROR: Found destination for compare or test at %x : %s\n", this->GetAddr(), this->GetDisasm()); #endif } } else { // USE if (MDKnownOperandType(TempOp)) { if (!Source1Found) { Source1Found = true; TempRT->SetLeftOperand(FlagsOp); TempRT->SetOperator(SMP_ASSIGN); RightRT->SetLeftOperand(TempOp); RightRT->SetOperator(BinaryOp); TempRT->SetRightTree(RightRT); } else { assert(!Source2Found); Source2Found = true; RightRT->SetRightOperand(TempOp); } } } } // end for (OpNum = 0; ...) // The compare string instruction always uses DS:ESI and ES:EDI as its source // operands, regardless of the explicit operands given, and might not have // explicit operands; explicit operands are just for documentation. // The scan string instruction uses EAX/AX/AH/AL and ES:EDI as its source // operands and might not have any explicit operands at all. if ((!NoOperandsRequired) && (!Source1Found || !Source2Found)) { if (!Source1Found) delete RightRT; else delete TempRT; #if SMP_DEBUG_BUILD_RTL msg("ERROR: Could not find CMP/TEST operand at %x for %s\n", this->GetAddr(), this->GetDisasm()); #endif } else { this->RTL.push_back(TempRT); // Now, create the repeat prefix effects if (HasRepeatPrefix) { // Must be CMPS or SCAS // The repeat causes USE and DEF of ECX as a counter SMPRegTransfer *CounterRT = new SMPRegTransfer; SMPRegTransfer *RightRT = new SMPRegTransfer; CounterRT->SetLeftOperand(CountOp); CounterRT->SetOperator(SMP_ASSIGN); RightRT->SetLeftOperand(CountOp); RightRT->SetOperator(SMP_UNARY_NUMERIC_OPERATION); RightRT->SetRightOperand(VoidOp); CounterRT->SetRightTree(RightRT); this->RTL.push_back(CounterRT); } if ((NN_cmps == opcode) || (NN_scas == opcode)) { // The ESI and EDI registers get incremented or decremented, depending // on the direction flag DF, for CMPS; only EDI for SCAS. // This is true with or without a repeat prefix. op_t ESIOp = InitOp, EDIOp = InitOp; ESIOp.type = o_reg; ESIOp.reg = R_si; EDIOp.type = o_reg; EDIOp.reg = R_di; if (NN_cmps == opcode) { this->RTL.ExtraKills.push_back(ESIOp); } this->RTL.ExtraKills.push_back(EDIOp); } } return (NoOperandsRequired || (Source1Found && Source2Found)); } // end of SMPInstr::BuildFlagsDestBinaryRTL() // Build the RTL for a direct or indirect call instruction bool SMPInstr::BuildCallRTL(void) { size_t OpNum; bool SourceFound = false; op_t VoidOp = InitOp; SMPRegTransfer *TempRT = NULL; for (OpNum = 0; !SourceFound && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & DefMacros[OpNum]) { // DEF if (MDKnownOperandType(TempOp)) { #if SMP_DEBUG_BUILD_RTL msg("ERROR: Found destination operand for call at %x : %s\n", this->GetAddr(), this->GetDisasm()); #endif } } else { // USE if (MDKnownOperandType(TempOp)) { SourceFound = true; TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(VoidOp); TempRT->SetOperator(SMP_CALL); TempRT->SetRightOperand(TempOp); } } } // end for (OpNum = 0; ...) if (!SourceFound) { #if SMP_DEBUG_BUILD_RTL msg("ERROR: Could not find CALL operand at %x for %s\n", this->GetAddr(), this->GetDisasm()); #endif } else { this->RTL.push_back(TempRT); } return SourceFound; } // end of SMPInstr::BuildCallRTL() // Build the RTL for a return instruction, with or without extra bytes popped off stack bool SMPInstr::BuildReturnRTL(void) { size_t OpNum; uval_t PopBytes = 4; // default: pop off return address for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & DefMacros[OpNum]) { // DEF if (MDKnownOperandType(TempOp)) { #if SMP_DEBUG_BUILD_RTL msg("ERROR: Found destination operand for RET at %x : %s\n", this->GetAddr(), this->GetDisasm()); #endif } } else { // USE if (MDKnownOperandType(TempOp)) { if (o_imm == TempOp.type) { PopBytes += TempOp.value; } else { #if SMP_DEBUG_BUILD_RTL if (!(this->IsTailCall())) { msg("ERROR: Found unexpected operand for return at %x : %s\n", this->GetAddr(), this->GetDisasm()); } #endif } } } } // end for (OpNum = 0; ...) this->AddToStackPointer(PopBytes); return true; } // end of SMPInstr::BuildReturnRTL() // Build the RTL for an ENTER instruction bool SMPInstr::BuildEnterRTL(void) { // An "ENTER k,0" instruction with allocation k and nesting level 0 does the following: // push ebp // mov ebp,esp // sub esp,k // This can be modeled by the parallel effects: // [esp-4] := ebp; ebp := esp - 4; esp := esp - k // If nesting level is greater than zero, we have a block structure language with // nested procedures, in which additional frame pointers are saved: // "ENTER k,n" pushes n additional frame pointers on the stack. We will only model // the change in the stack pointer here, and not attempt to transfer the display // pointers. A warning will be issued to the log file. Parallel effects are: // [esp-4] := ebp; ebp := esp - 4; esp := esp - (k + n*4) // Note that k and n and immediate values so the final expression can be computed. size_t OpNum; uval_t NestingLevel = 0; uval_t AllocBytes = 0; bool AllocFound = false; bool NestingLevelFound = false; op_t StackPointerOp = InitOp; // ESP StackPointerOp.type = o_reg; StackPointerOp.reg = R_sp; op_t FramePointerOp = InitOp; // EBP FramePointerOp.type = o_reg; FramePointerOp.reg = R_bp; op_t Immed4Op = InitOp; // 4 Immed4Op.type = o_imm; Immed4Op.value = 4; op_t SavedEBP = InitOp; // [ESP-4], location of saved EBP SavedEBP.type = o_displ; SavedEBP.addr = (ea_t) -4; SavedEBP.reg = R_sp; for (OpNum = 0; !(AllocFound && NestingLevelFound) && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & DefMacros[OpNum]) { // DEF if (MDKnownOperandType(TempOp)) { #if SMP_DEBUG_BUILD_RTL msg("ERROR: Found destination operand for ENTER at %x : %s\n", this->GetAddr(), this->GetDisasm()); #endif } } else { // USE if (MDKnownOperandType(TempOp)) { if (o_imm == TempOp.type) { if (!AllocFound) { AllocBytes = TempOp.value; AllocFound = true; } else { NestingLevel = TempOp.value; NestingLevelFound = true; } } else { #if SMP_DEBUG_BUILD_RTL msg("ERROR: Found unexpected operand for ENTER at %x : %s\n", this->GetAddr(), this->GetDisasm()); #endif } } } } // end for (OpNum = 0; ...) if (!AllocFound) { #if SMP_DEBUG_BUILD_RTL msg("ERROR: Could not find allocation operand for ENTER at %x : %s\n", this->GetAddr(), this->GetDisasm()); #endif } else { SMPRegTransfer *TempRT = new SMPRegTransfer; // Add first effect: [esp-4] := ebp TempRT->SetLeftOperand(SavedEBP); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetRightOperand(FramePointerOp); this->RTL.push_back(TempRT); TempRT = NULL; // Add second effect: ebp := esp - 4 TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(FramePointerOp); TempRT->SetOperator(SMP_ASSIGN); SMPRegTransfer *RightRT = new SMPRegTransfer; RightRT->SetLeftOperand(StackPointerOp); RightRT->SetOperator(SMP_SUBTRACT); RightRT->SetRightOperand(Immed4Op); TempRT->SetRightTree(RightRT); this->RTL.push_back(TempRT); TempRT = NULL; RightRT = NULL; // Add final effect on stack pointer AllocBytes += (4 * NestingLevel); if (0 != NestingLevel) { msg("WARNING: Nested procedures in ENTER instruction at %x : %s\n", this->GetAddr(), this->GetDisasm()); } this->AddToStackPointer(AllocBytes); } return AllocFound; } // end of SMPInstr::BuildEnterRTL() // Build the RTL for an LEAVE instruction bool SMPInstr::BuildLeaveRTL(void) { // A LEAVE instruction simulates the following instructions: // mov ebp into esp (deallocates stack frame) // pop saved ebp off stack into ebp // We will model these two instructions with three parallel effects: // esp := ebp; ebp := [ebp+0]; esp = esp + 4; // There cannot be two definitions of esp in the list of effects, so we do: // esp := ebp + 4; ebp := [ebp+0] as our two parallel effects op_t StackPointerOp = InitOp; // ESP StackPointerOp.type = o_reg; StackPointerOp.reg = R_sp; op_t FramePointerOp = InitOp; // EBP FramePointerOp.type = o_reg; FramePointerOp.reg = R_bp; op_t Immed4Op = InitOp; // 4 Immed4Op.type = o_imm; Immed4Op.value = 4; op_t SavedEBP = InitOp; // [EBP+0] SavedEBP.type = o_displ; SavedEBP.reg = R_bp; // Build first effect: ESP := EBP + 4 SMPRegTransfer *TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(StackPointerOp); TempRT->SetOperator(SMP_ASSIGN); SMPRegTransfer *RightRT = new SMPRegTransfer; RightRT->SetOperator(SMP_ADD); RightRT->SetLeftOperand(FramePointerOp); RightRT->SetRightOperand(Immed4Op); TempRT->SetRightTree(RightRT); this->RTL.push_back(TempRT); TempRT = NULL; RightRT = NULL; // Build second effect: EBP := [EBP+0] TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(FramePointerOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetRightOperand(SavedEBP); this->RTL.push_back(TempRT); TempRT = NULL; return true; } // end of SMPInstr::BuildLeaveRTL() // Build OptCategory 8 RTLs, which set system info into EDX:EAX. bool SMPInstr::BuildOptType8RTL(void) { op_t DestOp = InitOp; DestOp.type = o_reg; op_t VoidOp = InitOp; // Create the effect on EDX. SMPRegTransfer *TempRT = new SMPRegTransfer; DestOp.reg = R_dx; TempRT->SetLeftOperand(DestOp); TempRT->SetOperator(SMP_ASSIGN); SMPRegTransfer *RightRT = new SMPRegTransfer; RightRT->SetLeftOperand(VoidOp); RightRT->SetOperator(SMP_SYSTEM_OPERATION); RightRT->SetRightOperand(VoidOp); TempRT->SetRightTree(RightRT); this->RTL.push_back(TempRT); // Create the effect on EAX. TempRT = NULL; RightRT = NULL; TempRT = new SMPRegTransfer; DestOp.reg = R_ax; TempRT->SetLeftOperand(DestOp); TempRT->SetOperator(SMP_ASSIGN); RightRT = new SMPRegTransfer; RightRT->SetLeftOperand(VoidOp); RightRT->SetOperator(SMP_SYSTEM_OPERATION); RightRT->SetRightOperand(VoidOp); TempRT->SetRightTree(RightRT); this->RTL.push_back(TempRT); return true; } // end of BuildOptType8RTL() // Build the RTL for a direct or indirect jump instruction bool SMPInstr::BuildJumpRTL(SMPoperator CondBranchOp) { size_t OpNum; bool TargetFound = false; SMPRegTransfer *TempRT = NULL; op_t EIPOp = InitOp, ZeroOp = InitOp, FlagsOp = InitOp; EIPOp.type = o_reg; EIPOp.reg = R_ip; ZeroOp.type = o_imm; FlagsOp.type = o_reg; FlagsOp.reg = X86_FLAGS_REG; op_t CountOp = InitOp; CountOp.type = o_reg; CountOp.reg = R_cx; if (this->IsTailCall()) return this->BuildReturnRTL(); for (OpNum = 0; !TargetFound && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & UseMacros[OpNum]) { // USE if (MDKnownOperandType(TempOp)) { TargetFound = true; TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(EIPOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetRightOperand(TempOp); if (CondBranchOp != SMP_NULL_OPERATOR) { // Set up a guard expression comparing EFLAGS to zero. // NOTE: This is imprecise for value-set purposes, but OK for types. SMPGuard *BranchCondition = new SMPGuard; BranchCondition->SetOperator(CondBranchOp); // The conditional jumps on ECX==0 compare to ECX, not EFLAGS. if ((NN_jcxz <= this->SMPcmd.itype) && (NN_jrcxz >= this->SMPcmd.itype)) BranchCondition->SetLeftOperand(CountOp); else BranchCondition->SetLeftOperand(FlagsOp); BranchCondition->SetRightOperand(ZeroOp); TempRT->SetGuard(BranchCondition); } this->RTL.push_back(TempRT); } } } // end for (OpNum = 0; ...) #if SMP_DEBUG_BUILD_RTL if (!TargetFound) { msg("ERROR: Could not find jump target at %x for %s\n", this->GetAddr(), this->GetDisasm()); } #endif return TargetFound; } // end of SMPInstr::BuildJumpRTL() // Add to the stack pointer to deallocate stack space, e.g. for a pop instruction. void SMPInstr::AddToStackPointer(uval_t delta) { SMPRegTransfer *TempRT = new SMPRegTransfer; SMPRegTransfer *RightRT = new SMPRegTransfer; op_t StackOp = InitOp, DeltaOp = InitOp; StackOp.type = o_reg; StackOp.reg = R_sp; DeltaOp.type = o_imm; DeltaOp.value = delta; TempRT->SetLeftOperand(StackOp); // ESP := RightRT TempRT->SetOperator(SMP_ASSIGN); RightRT->SetLeftOperand(StackOp); // ESP + delta RightRT->SetOperator(SMP_ADD); RightRT->SetRightOperand(DeltaOp); TempRT->SetRightTree(RightRT); this->RTL.push_back(TempRT); return; } // end of SMPInstr::AddToStackPointer() // Add to the stack pointer to deallocate stack space, e.g. for a pop instruction. void SMPInstr::SubFromStackPointer(uval_t delta) { SMPRegTransfer *TempRT = new SMPRegTransfer; SMPRegTransfer *RightRT = new SMPRegTransfer; op_t StackOp = InitOp, DeltaOp = InitOp; StackOp.type = o_reg; StackOp.reg = R_sp; DeltaOp.type = o_imm; DeltaOp.value = delta; TempRT->SetLeftOperand(StackOp); // ESP := RightRT TempRT->SetOperator(SMP_ASSIGN); RightRT->SetLeftOperand(StackOp); // ESP - delta RightRT->SetOperator(SMP_SUBTRACT); RightRT->SetRightOperand(DeltaOp); TempRT->SetRightTree(RightRT); this->RTL.push_back(TempRT); return; } // end of SMPInstr::SubFromStackPointer() #define SMP_FIRST_POP_FLAGS NN_popfw #define SMP_LAST_POP_FLAGS NN_popfq #define SMP_FIRST_POP_ALL NN_popaw #define SMP_LAST_POP_ALL NN_popaq // Build the RTL for a pop instruction bool SMPInstr::BuildPopRTL(void) { size_t OpNum, OpSize; bool DestFound = false; SMPRegTransfer *TempRT = NULL; op_t StackOp = InitOp, FlagsOp = InitOp; StackOp.type = o_displ; StackOp.reg = R_sp; // StackOp.addr = 0; // [ESP+0] FlagsOp.type = o_reg; FlagsOp.reg = X86_FLAGS_REG; // Handle special cases first. if ((SMP_FIRST_POP_FLAGS <= this->SMPcmd.itype) && (SMP_LAST_POP_FLAGS >= this->SMPcmd.itype)) { TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(FlagsOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetRightOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // Now create the stack pointer increment effect. this->AddToStackPointer(4); return true; } if ((SMP_FIRST_POP_ALL <= this->SMPcmd.itype) && (SMP_LAST_POP_ALL >= this->SMPcmd.itype)) { // We pop off 7 registers from the 8 that were pushed on the stack. // The pushed stack pointer is ignored. Instead, the stack pointer value is // adjusted at the end, per the Intel instruction manuals. op_t RegOp = InitOp; RegOp.type = o_reg; // EDI comes from [ESP+0] RegOp.reg = R_di; StackOp.addr = 0; // [ESP+0] TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetRightOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // ESI comes from [ESP+4] RegOp.reg = R_si; StackOp.addr = 4; // [ESP+4] TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetRightOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // EBP comes from [ESP+8] RegOp.reg = R_bp; StackOp.addr = 8; // [ESP+8] TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetRightOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // Skip over saved ESP at [ESP+12] // EBX comes from [ESP+16] RegOp.reg = R_bx; StackOp.addr = 16; // [ESP+16] TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetRightOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // EDX comes from [ESP+20] RegOp.reg = R_dx; StackOp.addr = 20; // [ESP+20] TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetRightOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // ECX comes from [ESP+24] RegOp.reg = R_cx; StackOp.addr = 24; // [ESP+24] TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetRightOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // EAX comes from [ESP+28] RegOp.reg = R_ax; StackOp.addr = 28; // [ESP+28] TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetRightOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // Now create the stack pointer increment effect. this->AddToStackPointer(32); return true; } // end for "pop all" instructions // If we reach this point, we have a simple POP instruction. for (OpNum = 0; !DestFound && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & DefMacros[OpNum]) { // DEF if (MDKnownOperandType(TempOp)) { DestFound = true; TempRT = new SMPRegTransfer; TempRT->SetLeftOperand(TempOp); TempRT->SetOperator(SMP_ASSIGN); StackOp.dtyp = TempOp.dtyp; // size of transfer TempRT->SetRightOperand(StackOp); this->RTL.push_back(TempRT); // Now create the stack pointer increment effect. OpSize = GetOpDataSize(TempOp); this->AddToStackPointer((uval_t) OpSize); } } } // end for (OpNum = 0; ...) #if SMP_DEBUG_BUILD_RTL if (!DestFound) { msg("ERROR: Could not find pop operand at %x for %s\n", this->GetAddr(), this->GetDisasm()); } #endif return DestFound; } // end of SMPInstr::BuildPopRTL() #define SMP_FIRST_PUSH_FLAGS NN_pushfw #define SMP_LAST_PUSH_FLAGS NN_pushfq #define SMP_FIRST_PUSH_ALL NN_pushaw #define SMP_LAST_PUSH_ALL NN_pushaq // Build the RTL for a push instruction bool SMPInstr::BuildPushRTL(void) { size_t OpNum, OpSize; bool SourceFound = false; SMPRegTransfer *TempRT = NULL; op_t StackOp = InitOp, FlagsOp = InitOp; StackOp.type = o_displ; StackOp.reg = R_sp; StackOp.addr = (ea_t) -4; // [ESP-4] FlagsOp.type = o_reg; FlagsOp.reg = X86_FLAGS_REG; // Handle special cases first. if ((SMP_FIRST_PUSH_FLAGS <= this->SMPcmd.itype) && (SMP_LAST_PUSH_FLAGS >= this->SMPcmd.itype)) { TempRT = new SMPRegTransfer; TempRT->SetRightOperand(FlagsOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetLeftOperand(StackOp); this->RTL.push_back(TempRT); // Now create the stack pointer increment effect. this->SubFromStackPointer(4); return true; } if ((SMP_FIRST_PUSH_ALL <= this->SMPcmd.itype) && (SMP_LAST_PUSH_ALL >= this->SMPcmd.itype)) { op_t RegOp = InitOp; RegOp.type = o_reg; // EDI goes to [ESP-32] RegOp.reg = R_di; StackOp.addr = (ea_t) -32; // [ESP-32] TempRT = new SMPRegTransfer; TempRT->SetRightOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetLeftOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // ESI goes to [ESP-28] RegOp.reg = R_si; StackOp.addr = (ea_t) -28; // [ESP-28] TempRT = new SMPRegTransfer; TempRT->SetRightOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetLeftOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // EBP goes to [ESP-24] RegOp.reg = R_bp; StackOp.addr = (ea_t) -24; // [ESP-24] TempRT = new SMPRegTransfer; TempRT->SetRightOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetLeftOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // ESP goes to [ESP-20] RegOp.reg = R_sp; StackOp.addr = (ea_t) -20; // [ESP-20] TempRT = new SMPRegTransfer; TempRT->SetRightOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetLeftOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // EBX goes to [ESP-16] RegOp.reg = R_bx; StackOp.addr = (ea_t) -16; // [ESP-16] TempRT = new SMPRegTransfer; TempRT->SetRightOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetLeftOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // EDX goes to [ESP-12] RegOp.reg = R_dx; StackOp.addr = (ea_t) -12; // [ESP-12] TempRT = new SMPRegTransfer; TempRT->SetRightOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetLeftOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // ECX goes to [ESP-8] RegOp.reg = R_cx; StackOp.addr = (ea_t) -8; // [ESP-8] TempRT = new SMPRegTransfer; TempRT->SetRightOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetLeftOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // EAX goes to [ESP-4] RegOp.reg = R_ax; StackOp.addr = (ea_t) -4; // [ESP-4] TempRT = new SMPRegTransfer; TempRT->SetRightOperand(RegOp); TempRT->SetOperator(SMP_ASSIGN); TempRT->SetLeftOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // Now create the stack pointer increment effect. this->SubFromStackPointer(32); return true; } // end for "pop all" instructions // If we reach this point, we have a simple PUSH instruction. for (OpNum = 0; !SourceFound && (OpNum < UA_MAXOP); ++OpNum) { op_t TempOp = this->SMPcmd.Operands[OpNum]; if (this->features & UseMacros[OpNum]) { // USE if (MDKnownOperandType(TempOp)) { SourceFound = true; OpSize = GetOpDataSize(TempOp); TempRT = new SMPRegTransfer; TempRT->SetRightOperand(TempOp); TempRT->SetOperator(SMP_ASSIGN); StackOp.dtyp = TempOp.dtyp; // size of transfer StackOp.addr = (ea_t) (-((signed int) OpSize)); TempRT->SetLeftOperand(StackOp); this->RTL.push_back(TempRT); TempRT = NULL; // Now create the stack pointer increment effect. this->SubFromStackPointer((uval_t) OpSize); #if 0 this->RTL.Dump(); #endif } } } // end for (OpNum = 0; ...) #if SMP_DEBUG_BUILD_RTL if (!SourceFound) { msg("ERROR: Could not find push operand at %x for %s\n", this->GetAddr(), this->GetDisasm()); } #endif return SourceFound; } // end of SMPInstr::BuildPushRTL() // Build RTL trees from the SMPcmd info. bool SMPInstr::BuildRTL(void) { op_t FlagsOp = InitOp; FlagsOp.type = o_reg; FlagsOp.reg = X86_FLAGS_REG; SMPRegTransfer *NopRT = NULL; // no-op register transfer // We don't want to explicitly represent the various no-ops except as NULL operations. // E.g. mov esi,esi should not generate DEF and USE of esi, because esi does not change. if (this->IsNop()) { NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; } switch (this->SMPcmd.itype) { case NN_aaa: // ASCII Adjust after Addition case NN_aad: // ASCII Adjust AX before Division case NN_aam: // ASCII Adjust AX after Multiply case NN_aas: // ASCII Adjust AL after Subtraction return this->BuildUnaryRTL(SMP_UNARY_NUMERIC_OPERATION); case NN_adc: // Add with Carry #if SMP_BUILD_SPECIAL_ADC_SBB_RTL return this->BuildBinaryPlusFlagsRTL(SMP_ADD_CARRY); #else return this->BuildBinaryRTL(SMP_ADD_CARRY); #endif case NN_add: // Add return this->BuildBinaryRTL(SMP_ADD); case NN_and: // Logical AND return this->BuildBinaryRTL(SMP_BITWISE_AND); case NN_arpl: // Adjust RPL Field of Selector case NN_bound: // Check Array Index Against Bounds return false; break; case NN_bsf: // Bit Scan Forward case NN_bsr: // Bit Scan Reverse return this->BuildUnary2OpndRTL(SMP_UNARY_NUMERIC_OPERATION); case NN_bt: // Bit Test return this->BuildFlagsDestBinaryRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_btc: // Bit Test and Complement case NN_btr: // Bit Test and Reset case NN_bts: // Bit Test and Set // Has effects on both the carry flag and the first operand this->RTL.ExtraKills.push_back(FlagsOp); return this->BuildBinaryRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_call: // Call Procedure case NN_callfi: // Indirect Call Far Procedure case NN_callni: // Indirect Call Near Procedure return this->BuildCallRTL(); case NN_cbw: // AL -> AX (with sign) case NN_cwde: // AX -> EAX (with sign) case NN_cdqe: // EAX -> RAX (with sign) return this->BuildUnaryRTL(SMP_SIGN_EXTEND); case NN_clc: // Clear Carry Flag case NN_cld: // Clear Direction Flag return this->BuildUnaryRTL(SMP_UNARY_NUMERIC_OPERATION); case NN_cli: // Clear Interrupt Flag case NN_clts: // Clear Task-Switched Flag in CR0 // We don't track the interrupt flag or the special registers, // so we can just consider these to be no-ops. // NOTE: Shouldn't we killthe EFLAGS register on NN_cli ??!!??!! NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; case NN_cmc: // Complement Carry Flag return this->BuildUnaryRTL(SMP_UNARY_NUMERIC_OPERATION); case NN_cmp: // Compare Two Operands return this->BuildFlagsDestBinaryRTL(SMP_S_COMPARE); case NN_cmps: // Compare Strings // Why do we no longer use BuildCompareStringRTL()? ****!!!!**** Test it! return this->BuildFlagsDestBinaryRTL(SMP_U_COMPARE); case NN_cwd: // AX -> DX:AX (with sign) case NN_cdq: // EAX -> EDX:EAX (with sign) case NN_cqo: // RAX -> RDX:RAX (with sign) return this->BuildUnary2OpndRTL(SMP_SIGN_EXTEND); case NN_daa: // Decimal Adjust AL after Addition case NN_das: // Decimal Adjust AL after Subtraction return this->BuildUnaryRTL(SMP_UNARY_NUMERIC_OPERATION); case NN_dec: // Decrement by 1 return this->BuildUnaryRTL(SMP_DECREMENT); case NN_div: // Unsigned Divide return this->BuildMultiplyDivideRTL(SMP_U_DIVIDE); case NN_enterw: // Make Stack Frame for Procedure Parameters case NN_enter: // Make Stack Frame for Procedure Parameters case NN_enterd: // Make Stack Frame for Procedure Parameters case NN_enterq: // Make Stack Frame for Procedure Parameters return this->BuildEnterRTL(); case NN_hlt: // Halt // Treat as a no-op NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; case NN_idiv: // Signed Divide return this->BuildMultiplyDivideRTL(SMP_S_DIVIDE); case NN_imul: // Signed Multiply return this->BuildMultiplyDivideRTL(SMP_S_MULTIPLY); case NN_in: // Input from Port return this->BuildUnary2OpndRTL(SMP_INPUT); case NN_inc: // Increment by 1 return this->BuildUnaryRTL(SMP_INCREMENT); case NN_ins: // Input Byte(s) from Port to String return this->BuildMoveRTL(SMP_NULL_OPERATOR); break; case NN_int: // Call to Interrupt Procedure case NN_into: // Call to Interrupt Procedure if Overflow Flag = 1 case NN_int3: // Trap to Debugger return this->BuildCallRTL(); case NN_iretw: // Interrupt Return case NN_iret: // Interrupt Return case NN_iretd: // Interrupt Return (use32) case NN_iretq: // Interrupt Return (use64) return this->BuildReturnRTL(); case NN_ja: // Jump if Above (CF=0 & ZF=0) case NN_jae: // Jump if Above or Equal (CF=0) case NN_jb: // Jump if Below (CF=1) case NN_jbe: // Jump if Below or Equal (CF=1 | ZF=1) case NN_jc: // Jump if Carry (CF=1) return this->BuildJumpRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_jcxz: // Jump if CX is 0 case NN_jecxz: // Jump if ECX is 0 case NN_jrcxz: // Jump if RCX is 0 return this->BuildJumpRTL(SMP_EQUAL); // special case in BuildJumpRTL() case NN_je: // Jump if Equal (ZF=1) return this->BuildJumpRTL(SMP_EQUAL); case NN_jg: // Jump if Greater (ZF=0 & SF=OF) return this->BuildJumpRTL(SMP_GREATER_THAN); case NN_jge: // Jump if Greater or Equal (SF=OF) return this->BuildJumpRTL(SMP_GREATER_EQUAL); case NN_jl: // Jump if Less (SF!=OF) return this->BuildJumpRTL(SMP_LESS_THAN); case NN_jle: // Jump if Less or Equal (ZF=1 | SF!=OF) return this->BuildJumpRTL(SMP_LESS_EQUAL); case NN_jna: // Jump if Not Above (CF=1 | ZF=1) case NN_jnae: // Jump if Not Above or Equal (CF=1) case NN_jnb: // Jump if Not Below (CF=0) case NN_jnbe: // Jump if Not Below or Equal (CF=0 & ZF=0) a.k.a. ja case NN_jnc: // Jump if Not Carry (CF=0) return this->BuildJumpRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_jne: // Jump if Not Equal (ZF=0) return this->BuildJumpRTL(SMP_NOT_EQUAL); case NN_jng: // Jump if Not Greater (ZF=1 | SF!=OF) a.k.a. jle return this->BuildJumpRTL(SMP_LESS_EQUAL); case NN_jnge: // Jump if Not Greater or Equal (SF != OF) ** return this->BuildJumpRTL(SMP_LESS_THAN); case NN_jnl: // Jump if Not Less (SF=OF) a.k.a. jge return this->BuildJumpRTL(SMP_GREATER_EQUAL); case NN_jnle: // Jump if Not Less or Equal (ZF=0 & SF=OF) a.k.a. jg return this->BuildJumpRTL(SMP_GREATER_THAN); case NN_jno: // Jump if Not Overflow (OF=0) case NN_jnp: // Jump if Not Parity (PF=0) case NN_jns: // Jump if Not Sign (SF=0) return this->BuildJumpRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_jnz: // Jump if Not Zero (ZF=0) a.k.a. jne return this->BuildJumpRTL(SMP_NOT_EQUAL); case NN_jo: // Jump if Overflow (OF=1) case NN_jp: // Jump if Parity (PF=1) case NN_jpe: // Jump if Parity Even (PF=1) case NN_jpo: // Jump if Parity Odd (PF=0) case NN_js: // Jump if Sign (SF=1) return this->BuildJumpRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_jz: // Jump if Zero (ZF=1) return this->BuildJumpRTL(SMP_EQUAL); case NN_jmp: // Jump case NN_jmpfi: // Indirect Far Jump case NN_jmpni: // Indirect Near Jump case NN_jmpshort: // Jump Short (not used) return this->BuildJumpRTL(SMP_NULL_OPERATOR); case NN_lahf: // Load Flags into AH Register return this->BuildMoveRTL(SMP_NULL_OPERATOR); case NN_lar: // Load Access Right Byte return false; break; case NN_lea: // Load Effective Address return this->BuildLeaRTL(); break; case NN_leavew: // High Level Procedure Exit case NN_leave: // High Level Procedure Exit case NN_leaved: // High Level Procedure Exit case NN_leaveq: // High Level Procedure Exit return this->BuildLeaveRTL(); break; case NN_lgdt: // Load Global Descriptor Table Register case NN_lidt: // Load Interrupt Descriptor Table Register return false; break; case NN_lgs: // Load Full Pointer to GS:xx case NN_lss: // Load Full Pointer to SS:xx case NN_lds: // Load Full Pointer to DS:xx case NN_les: // Load Full Pointer to ES:xx case NN_lfs: // Load Full Pointer to FS:xx // These instructions differ from NN_lea only in setting // a segment register in addition to a pointer. We are // not yet tracking segment registers. return this->BuildLeaRTL(); break; case NN_lldt: // Load Local Descriptor Table Register case NN_lmsw: // Load Machine Status Word case NN_lock: // Assert LOCK# Signal Prefix case NN_lods: // Load String return false; break; case NN_loopw: // Loop while ECX != 0 case NN_loop: // Loop while CX != 0 case NN_loopd: // Loop while ECX != 0 case NN_loopq: // Loop while RCX != 0 case NN_loopwe: // Loop while CX != 0 and ZF=1 case NN_loope: // Loop while rCX != 0 and ZF=1 case NN_loopde: // Loop while ECX != 0 and ZF=1 case NN_loopqe: // Loop while RCX != 0 and ZF=1 case NN_loopwne: // Loop while CX != 0 and ZF=0 case NN_loopne: // Loop while rCX != 0 and ZF=0 case NN_loopdne: // Loop while ECX != 0 and ZF=0 case NN_loopqne: // Loop while RCX != 0 and ZF=0 return false; break; case NN_lsl: // Load Segment Limit case NN_ltr: // Load Task Register return false; break; case NN_mov: // Move Data case NN_movsp: // Move to/from Special Registers case NN_movs: // Move Byte(s) from String to String return this->BuildMoveRTL(SMP_NULL_OPERATOR); case NN_movsx: // Move with Sign-Extend return this->BuildUnary2OpndRTL(SMP_SIGN_EXTEND); case NN_movzx: // Move with Zero-Extend return this->BuildUnary2OpndRTL(SMP_ZERO_EXTEND); case NN_mul: // Unsigned Multiplication of AL or AX return this->BuildMultiplyDivideRTL(SMP_U_MULTIPLY); case NN_neg: // Two's Complement Negation return this->BuildUnaryRTL(SMP_NEGATE); case NN_nop: // No Operation NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; case NN_not: // One's Complement Negation return this->BuildUnaryRTL(SMP_BITWISE_NOT); case NN_or: // Logical Inclusive OR return this->BuildBinaryRTL(SMP_BITWISE_OR); case NN_out: // Output to Port return this->BuildUnary2OpndRTL(SMP_OUTPUT); case NN_outs: // Output Byte(s) to Port return this->BuildMoveRTL(SMP_NULL_OPERATOR); break; case NN_pop: // Pop a word from the Stack case NN_popaw: // Pop all General Registers case NN_popa: // Pop all General Registers case NN_popad: // Pop all General Registers (use32) case NN_popaq: // Pop all General Registers (use64) case NN_popfw: // Pop Stack into Flags Register case NN_popf: // Pop Stack into Flags Register case NN_popfd: // Pop Stack into Eflags Register case NN_popfq: // Pop Stack into Rflags Register return this->BuildPopRTL(); case NN_push: // Push Operand onto the Stack case NN_pushaw: // Push all General Registers case NN_pusha: // Push all General Registers case NN_pushad: // Push all General Registers (use32) case NN_pushaq: // Push all General Registers (use64) case NN_pushfw: // Push Flags Register onto the Stack case NN_pushf: // Push Flags Register onto the Stack case NN_pushfd: // Push Flags Register onto the Stack (use32) case NN_pushfq: // Push Flags Register onto the Stack (use64) return this->BuildPushRTL(); case NN_rcl: // Rotate Through Carry Left return this->BuildBinaryPlusFlagsRTL(SMP_ROTATE_LEFT_CARRY); case NN_rcr: // Rotate Through Carry Right return this->BuildBinaryPlusFlagsRTL(SMP_ROTATE_RIGHT_CARRY); case NN_rol: // Rotate Left return this->BuildBinaryRTL(SMP_ROTATE_LEFT); case NN_ror: // Rotate Right return this->BuildBinaryRTL(SMP_ROTATE_RIGHT); case NN_rep: // Repeat String Operation case NN_repe: // Repeat String Operation while ZF=1 case NN_repne: // Repeat String Operation while ZF=0 return false; break; case NN_retn: // Return Near from Procedure case NN_retf: // Return Far from Procedure return this->BuildReturnRTL(); case NN_sahf: // Store AH into Flags Register return this->BuildMoveRTL(SMP_NULL_OPERATOR); case NN_sal: // Shift Arithmetic Left return this->BuildBinaryRTL(SMP_S_LEFT_SHIFT); case NN_sar: // Shift Arithmetic Right return this->BuildBinaryRTL(SMP_S_RIGHT_SHIFT); case NN_shl: // Shift Logical Left return this->BuildBinaryRTL(SMP_U_LEFT_SHIFT); case NN_shr: // Shift Logical Right return this->BuildBinaryRTL(SMP_U_RIGHT_SHIFT); case NN_sbb: // Integer Subtraction with Borrow #if SMP_BUILD_SPECIAL_ADC_SBB_RTL return this->BuildBinaryPlusFlagsRTL(SMP_SUBTRACT_BORROW); #else return this->BuildBinaryRTL(SMP_SUBTRACT_BORROW); #endif case NN_scas: // Scan String return this->BuildFlagsDestBinaryRTL(SMP_U_COMPARE); case NN_seta: // Set Byte if Above (CF=0 & ZF=0) case NN_setae: // Set Byte if Above or Equal (CF=0) case NN_setb: // Set Byte if Below (CF=1) case NN_setbe: // Set Byte if Below or Equal (CF=1 | ZF=1) case NN_setc: // Set Byte if Carry (CF=1) case NN_sete: // Set Byte if Equal (ZF=1) case NN_setg: // Set Byte if Greater (ZF=0 & SF=OF) case NN_setge: // Set Byte if Greater or Equal (SF=OF) case NN_setl: // Set Byte if Less (SF!=OF) case NN_setle: // Set Byte if Less or Equal (ZF=1 | SF!=OF) case NN_setna: // Set Byte if Not Above (CF=1 | ZF=1) case NN_setnae: // Set Byte if Not Above or Equal (CF=1) case NN_setnb: // Set Byte if Not Below (CF=0) case NN_setnbe: // Set Byte if Not Below or Equal (CF=0 & ZF=0) case NN_setnc: // Set Byte if Not Carry (CF=0) case NN_setne: // Set Byte if Not Equal (ZF=0) case NN_setng: // Set Byte if Not Greater (ZF=1 | SF!=OF) case NN_setnge: // Set Byte if Not Greater or Equal (ZF=1) case NN_setnl: // Set Byte if Not Less (SF=OF) case NN_setnle: // Set Byte if Not Less or Equal (ZF=0 & SF=OF) case NN_setno: // Set Byte if Not Overflow (OF=0) case NN_setnp: // Set Byte if Not Parity (PF=0) case NN_setns: // Set Byte if Not Sign (SF=0) case NN_setnz: // Set Byte if Not Zero (ZF=0) case NN_seto: // Set Byte if Overflow (OF=1) case NN_setp: // Set Byte if Parity (PF=1) case NN_setpe: // Set Byte if Parity Even (PF=1) case NN_setpo: // Set Byte if Parity Odd (PF=0) case NN_sets: // Set Byte if Sign (SF=1) case NN_setz: // Set Byte if Zero (ZF=1) // Destination always get set to NUMERIC 0 or 1, depending on // the condition and the relevant flags bits. Best way to model // this in an RTL is to perform an unspecified unary NUMERIC // operation on the flags register and assign the result to the // destination operand, making it always NUMERIC. return this->BuildUnary2OpndRTL(SMP_UNARY_NUMERIC_OPERATION); case NN_sgdt: // Store Global Descriptor Table Register case NN_sidt: // Store Interrupt Descriptor Table Register return false; break; case NN_shld: // Double Precision Shift Left return this->BuildDoubleShiftRTL(SMP_U_LEFT_SHIFT); case NN_shrd: // Double Precision Shift Right return this->BuildDoubleShiftRTL(SMP_U_RIGHT_SHIFT); case NN_sldt: // Store Local Descriptor Table Register case NN_smsw: // Store Machine Status Word return false; break; case NN_stc: // Set Carry Flag case NN_std: // Set Direction Flag return this->BuildUnaryRTL(SMP_UNARY_NUMERIC_OPERATION); case NN_sti: // Set Interrupt Flag NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; case NN_stos: // Store String return this->BuildMoveRTL(SMP_NULL_OPERATOR); case NN_str: // Store Task Register return false; break; case NN_sub: // Integer Subtraction return this->BuildBinaryRTL(SMP_SUBTRACT); case NN_test: // Logical Compare return this->BuildFlagsDestBinaryRTL(SMP_U_COMPARE); case NN_verr: // Verify a Segment for Reading case NN_verw: // Verify a Segment for Writing case NN_wait: // Wait until BUSY# Pin is Inactive (HIGH) NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; if (NN_wait != this->SMPcmd.itype) this->RTL.ExtraKills.push_back(FlagsOp); return true; case NN_xchg: // Exchange Register/Memory with Register return this->BuildExchangeRTL(); case NN_xlat: // Table Lookup Translation return false; break; case NN_xor: // Logical Exclusive OR return this->BuildBinaryRTL(SMP_BITWISE_XOR); // // 486 instructions // case NN_cmpxchg: // Compare and Exchange return this->BuildCompareExchangeRTL(); case NN_bswap: // Swap bits in EAX return this->BuildUnaryRTL(SMP_UNARY_NUMERIC_OPERATION); case NN_xadd: // t<-dest; dest<-src+dest; src<-t return this->BuildExchangeAddRTL(); case NN_invd: // Invalidate Data Cache case NN_wbinvd: // Invalidate Data Cache (write changes) case NN_invlpg: // Invalidate TLB entry NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; // // Pentium instructions // case NN_rdmsr: // Read Machine Status Register return this->BuildOptType8RTL(); case NN_wrmsr: // Write Machine Status Register return false; break; case NN_cpuid: // Get CPU ID return this->BuildOptType8RTL(); case NN_cmpxchg8b: // Compare and Exchange Eight Bytes return false; break; case NN_rdtsc: // Read Time Stamp Counter return this->BuildOptType8RTL(); case NN_rsm: // Resume from System Management Mode NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; // // Pentium Pro instructions // case NN_cmova: // Move if Above (CF=0 & ZF=0) case NN_cmovb: // Move if Below (CF=1) case NN_cmovbe: // Move if Below or Equal (CF=1 | ZF=1) return this->BuildMoveRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_cmovg: // Move if Greater (ZF=0 & SF=OF) return this->BuildMoveRTL(SMP_GREATER_THAN); case NN_cmovge: // Move if Greater or Equal (SF=OF) return this->BuildMoveRTL(SMP_GREATER_EQUAL); case NN_cmovl: // Move if Less (SF!=OF) return this->BuildMoveRTL(SMP_LESS_THAN); case NN_cmovle: // Move if Less or Equal (ZF=1 | SF!=OF) return this->BuildMoveRTL(SMP_LESS_EQUAL); case NN_cmovnb: // Move if Not Below (CF=0) case NN_cmovno: // Move if Not Overflow (OF=0) case NN_cmovnp: // Move if Not Parity (PF=0) case NN_cmovns: // Move if Not Sign (SF=0) return this->BuildMoveRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_cmovnz: // Move if Not Zero (ZF=0) return this->BuildMoveRTL(SMP_NOT_EQUAL); case NN_cmovo: // Move if Overflow (OF=1) case NN_cmovp: // Move if Parity (PF=1) case NN_cmovs: // Move if Sign (SF=1) return this->BuildMoveRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_cmovz: // Move if Zero (ZF=1) return this->BuildMoveRTL(SMP_EQUAL); case NN_fcmovb: // Floating Move if Below return this->BuildMoveRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_fcmove: // Floating Move if Equal return this->BuildMoveRTL(SMP_EQUAL); case NN_fcmovbe: // Floating Move if Below or Equal return this->BuildMoveRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_fcmovu: // Floating Move if Unordered return this->BuildMoveRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_fcmovnb: // Floating Move if Not Below return this->BuildMoveRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_fcmovne: // Floating Move if Not Equal return this->BuildMoveRTL(SMP_NOT_EQUAL); case NN_fcmovnbe: // Floating Move if Not Below or Equal return this->BuildMoveRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_fcmovnu: // Floating Move if Not Unordered return this->BuildMoveRTL(SMP_BINARY_NUMERIC_OPERATION); case NN_fcomi: // FP Compare: result in EFLAGS case NN_fucomi: // FP Unordered Compare: result in EFLAGS case NN_fcomip: // FP Compare: result in EFLAGS: pop stack case NN_fucomip: // FP Unordered Compare: result in EFLAGS: pop stack return this->BuildFlagsDestBinaryRTL(SMP_S_COMPARE); break; case NN_rdpmc: // Read Performance Monitor Counter return this->BuildOptType8RTL(); // // FPP instructions // case NN_fld: // Load Real case NN_fst: // Store Real case NN_fstp: // Store Real and Pop return this->BuildMoveRTL(SMP_NULL_OPERATOR); case NN_fxch: // Exchange Registers // FP registers remain NUMERIC anyway, so this is a no-op to our type system. NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; case NN_fild: // Load Integer case NN_fist: // Store Integer case NN_fistp: // Store Integer and Pop case NN_fbld: // Load BCD case NN_fbstp: // Store BCD and Pop return this->BuildMoveRTL(SMP_NULL_OPERATOR); case NN_fadd: // Add Real case NN_faddp: // Add Real and Pop case NN_fiadd: // Add Integer case NN_fsub: // Subtract Real case NN_fsubp: // Subtract Real and Pop case NN_fisub: // Subtract Integer case NN_fsubr: // Subtract Real Reversed case NN_fsubrp: // Subtract Real Reversed and Pop case NN_fisubr: // Subtract Integer Reversed case NN_fmul: // Multiply Real case NN_fmulp: // Multiply Real and Pop case NN_fimul: // Multiply Integer case NN_fdiv: // Divide Real case NN_fdivp: // Divide Real and Pop case NN_fidiv: // Divide Integer case NN_fdivr: // Divide Real Reversed case NN_fdivrp: // Divide Real Reversed and Pop case NN_fidivr: // Divide Integer Reversed return this->BuildBinaryRTL(SMP_BINARY_FLOATING_ARITHMETIC); case NN_fsqrt: // Square Root case NN_fscale: // Scale: st(0) <- st(0) * 2^st(1) case NN_fprem: // Partial Remainder case NN_frndint: // Round to Integer case NN_fxtract: // Extract exponent and significand case NN_fabs: // Absolute value case NN_fchs: // Change Sign return this->BuildUnaryRTL(SMP_UNARY_FLOATING_ARITHMETIC); case NN_fcom: // Compare Real case NN_fcomp: // Compare Real and Pop case NN_fcompp: // Compare Real and Pop Twice case NN_ficom: // Compare Integer case NN_ficomp: // Compare Integer and Pop case NN_ftst: // Test case NN_fxam: // Examine // Floating comparison instructions use FP reg stack locations // as sources and set only the FP flags. All of these are numeric // type and we don't track any of them, so all such instructions // can be considered to be no-ops. NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; case NN_fptan: // Partial tangent case NN_fpatan: // Partial arctangent case NN_f2xm1: // 2^x - 1 case NN_fyl2x: // Y * lg2(X) case NN_fyl2xp1: // Y * lg2(X+1) // We can consider it a unary operation when both arguments come // off the floating point register stack, unless we ever start // modeling the different locations in the FP register stack. return this->BuildUnaryRTL(SMP_UNARY_FLOATING_ARITHMETIC); case NN_fldz: // Load +0.0 case NN_fld1: // Load +1.0 case NN_fldpi: // Load PI=3.14... case NN_fldl2t: // Load lg2(10) case NN_fldl2e: // Load lg2(e) case NN_fldlg2: // Load lg10(2) case NN_fldln2: // Load ln(2) case NN_finit: // Initialize Processor case NN_fninit: // Initialize Processor (no wait) case NN_fsetpm: // Set Protected Mode case NN_fldcw: // Load Control Word case NN_fstcw: // Store Control Word case NN_fnstcw: // Store Control Word (no wait) case NN_fstsw: // Store Status Word case NN_fnstsw: // Store Status Word (no wait) case NN_fclex: // Clear Exceptions case NN_fnclex: // Clear Exceptions (no wait) // Floating point stack and control word and flags operations // with no memory operands are no-ops to us. NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; case NN_fstenv: // Store Environment case NN_fnstenv: // Store Environment (no wait) case NN_fldenv: // Load Environment case NN_fsave: // Save State case NN_fnsave: // Save State (no wait) case NN_frstor: // Restore State case NN_fincstp: // Increment Stack Pointer case NN_fdecstp: // Decrement Stack Pointer case NN_ffree: // Free Register return false; break; case NN_fnop: // No Operation NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; case NN_feni: // (8087 only) case NN_fneni: // (no wait) (8087 only) case NN_fdisi: // (8087 only) case NN_fndisi: // (no wait) (8087 only) return false; break; // // 80387 instructions // case NN_fprem1: // Partial Remainder ( < half ) case NN_fsincos: // t<-cos(st); st<-sin(st); push t case NN_fsin: // Sine case NN_fcos: // Cosine case NN_fucom: // Compare Unordered Real case NN_fucomp: // Compare Unordered Real and Pop case NN_fucompp: // Compare Unordered Real and Pop Twice // Floating point stack and control word and flags operations // with no memory operands are no-ops to us. NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; // // Instructions added 28.02.96 // case NN_setalc: // Set AL to Carry Flag case NN_svdc: // Save Register and Descriptor case NN_rsdc: // Restore Register and Descriptor case NN_svldt: // Save LDTR and Descriptor case NN_rsldt: // Restore LDTR and Descriptor case NN_svts: // Save TR and Descriptor case NN_rsts: // Restore TR and Descriptor case NN_icebp: // ICE Break Point case NN_loadall: // Load the entire CPU state from ES:EDI return false; break; // // MMX instructions // case NN_emms: // Empty MMX state return false; break; case NN_movd: // Move 32 bits case NN_movq: // Move 64 bits return this->BuildMoveRTL(SMP_NULL_OPERATOR); break; case NN_packsswb: // Pack with Signed Saturation (Word->Byte) case NN_packssdw: // Pack with Signed Saturation (Dword->Word) case NN_packuswb: // Pack with Unsigned Saturation (Word->Byte) case NN_paddb: // Packed Add Byte case NN_paddw: // Packed Add Word case NN_paddd: // Packed Add Dword case NN_paddsb: // Packed Add with Saturation (Byte) case NN_paddsw: // Packed Add with Saturation (Word) case NN_paddusb: // Packed Add Unsigned with Saturation (Byte) case NN_paddusw: // Packed Add Unsigned with Saturation (Word) return false; break; case NN_pand: // Bitwise Logical And return this->BuildBinaryRTL(SMP_BITWISE_AND); break; case NN_pandn: // Bitwise Logical And Not return this->BuildBinaryRTL(SMP_BITWISE_AND_NOT); break; case NN_pcmpeqb: // Packed Compare for Equal (Byte) case NN_pcmpeqw: // Packed Compare for Equal (Word) case NN_pcmpeqd: // Packed Compare for Equal (Dword) return this->BuildBinaryRTL(SMP_COMPARE_EQ_AND_SET); break; case NN_pcmpgtb: // Packed Compare for Greater Than (Byte) case NN_pcmpgtw: // Packed Compare for Greater Than (Word) case NN_pcmpgtd: // Packed Compare for Greater Than (Dword) return this->BuildBinaryRTL(SMP_COMPARE_EQ_AND_SET); break; case NN_pmaddwd: // Packed Multiply and Add case NN_pmulhw: // Packed Multiply High case NN_pmullw: // Packed Multiply Low return false; break; case NN_por: // Bitwise Logical Or return this->BuildBinaryRTL(SMP_BITWISE_OR); break; case NN_psllw: // Packed Shift Left Logical (Word) case NN_pslld: // Packed Shift Left Logical (Dword) case NN_psllq: // Packed Shift Left Logical (Qword) return this->BuildBinaryRTL(SMP_U_LEFT_SHIFT); break; case NN_psraw: // Packed Shift Right Arithmetic (Word) case NN_psrad: // Packed Shift Right Arithmetic (Dword) return this->BuildBinaryRTL(SMP_S_RIGHT_SHIFT); break; case NN_psrlw: // Packed Shift Right Logical (Word) case NN_psrld: // Packed Shift Right Logical (Dword) case NN_psrlq: // Packed Shift Right Logical (Qword) return this->BuildBinaryRTL(SMP_U_RIGHT_SHIFT); break; case NN_psubb: // Packed Subtract Byte case NN_psubw: // Packed Subtract Word case NN_psubd: // Packed Subtract Dword return this->BuildBinaryRTL(SMP_SUBTRACT); break; case NN_psubsb: // Packed Subtract with Saturation (Byte) case NN_psubsw: // Packed Subtract with Saturation (Word) return this->BuildBinaryRTL(SMP_SUBTRACT); break; case NN_psubusb: // Packed Subtract Unsigned with Saturation (Byte) case NN_psubusw: // Packed Subtract Unsigned with Saturation (Word) return this->BuildBinaryRTL(SMP_SUBTRACT); break; case NN_punpckhbw: // Unpack High Packed Data (Byte->Word) case NN_punpckhwd: // Unpack High Packed Data (Word->Dword) case NN_punpckhdq: // Unpack High Packed Data (Dword->Qword) case NN_punpcklbw: // Unpack Low Packed Data (Byte->Word) case NN_punpcklwd: // Unpack Low Packed Data (Word->Dword) case NN_punpckldq: // Unpack Low Packed Data (Dword->Qword) return this->BuildBinaryRTL(SMP_INTERLEAVE); break; case NN_pxor: // Bitwise Logical Exclusive Or return this->BuildBinaryRTL(SMP_BITWISE_XOR); break; // // Undocumented Deschutes processor instructions // case NN_fxsave: // Fast save FP context case NN_fxrstor: // Fast restore FP context return false; break; // Pentium II instructions case NN_sysenter: // Fast Transition to System Call Entry Point case NN_sysexit: // Fast Transition from System Call Entry Point return false; break; // 3DNow! instructions case NN_pavgusb: // Packed 8-bit Unsigned Integer Averaging case NN_pfadd: // Packed Floating-Point Addition case NN_pfsub: // Packed Floating-Point Subtraction case NN_pfsubr: // Packed Floating-Point Reverse Subtraction case NN_pfacc: // Packed Floating-Point Accumulate case NN_pfcmpge: // Packed Floating-Point Comparison: Greater or Equal case NN_pfcmpgt: // Packed Floating-Point Comparison: Greater case NN_pfcmpeq: // Packed Floating-Point Comparison: Equal case NN_pfmin: // Packed Floating-Point Minimum case NN_pfmax: // Packed Floating-Point Maximum case NN_pi2fd: // Packed 32-bit Integer to Floating-Point case NN_pf2id: // Packed Floating-Point to 32-bit Integer case NN_pfrcp: // Packed Floating-Point Reciprocal Approximation case NN_pfrsqrt: // Packed Floating-Point Reciprocal Square Root Approximation case NN_pfmul: // Packed Floating-Point Multiplication case NN_pfrcpit1: // Packed Floating-Point Reciprocal First Iteration Step case NN_pfrsqit1: // Packed Floating-Point Reciprocal Square Root First Iteration Step case NN_pfrcpit2: // Packed Floating-Point Reciprocal Second Iteration Step case NN_pmulhrw: // Packed Floating-Point 16-bit Integer Multiply with rounding case NN_femms: // Faster entry/exit of the MMX or floating-point state return false; break; case NN_prefetch: // Prefetch at least a 32-byte line into L1 data cache case NN_prefetchw: // Prefetch processor cache line into L1 data cache (mark as modified) // Prefetch opcodes are no-ops to us. NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; // Pentium III instructions case NN_addps: // Packed Single-FP Add case NN_addss: // Scalar Single-FP Add case NN_andnps: // Bitwise Logical And Not for Single-FP case NN_andps: // Bitwise Logical And for Single-FP case NN_cmpps: // Packed Single-FP Compare case NN_cmpss: // Scalar Single-FP Compare case NN_comiss: // Scalar Ordered Single-FP Compare and Set EFLAGS case NN_cvtpi2ps: // Packed signed INT32 to Packed Single-FP conversion case NN_cvtps2pi: // Packed Single-FP to Packed INT32 conversion case NN_cvtsi2ss: // Scalar signed INT32 to Single-FP conversion case NN_cvtss2si: // Scalar Single-FP to signed INT32 conversion case NN_cvttps2pi: // Packed Single-FP to Packed INT32 conversion (truncate) case NN_cvttss2si: // Scalar Single-FP to signed INT32 conversion (truncate) case NN_divps: // Packed Single-FP Divide case NN_divss: // Scalar Single-FP Divide return false; break; case NN_ldmxcsr: // Load Streaming SIMD Extensions Technology Control/Status Register return this->BuildMoveRTL(SMP_NULL_OPERATOR); break; case NN_maxps: // Packed Single-FP Maximum case NN_maxss: // Scalar Single-FP Maximum case NN_minps: // Packed Single-FP Minimum case NN_minss: // Scalar Single-FP Minimum return false; break; case NN_movaps: // Move Aligned Four Packed Single-FP case NN_movhlps: // Move High to Low Packed Single-FP case NN_movhps: // Move High Packed Single-FP case NN_movlhps: // Move Low to High Packed Single-FP case NN_movlps: // Move Low Packed Single-FP case NN_movmskps: // Move Mask to Register case NN_movss: // Move Scalar Single-FP case NN_movups: // Move Unaligned Four Packed Single-FP return this->BuildMoveRTL(SMP_NULL_OPERATOR); break; case NN_mulps: // Packed Single-FP Multiply case NN_mulss: // Scalar Single-FP Multiply case NN_orps: // Bitwise Logical OR for Single-FP Data case NN_rcpps: // Packed Single-FP Reciprocal case NN_rcpss: // Scalar Single-FP Reciprocal case NN_rsqrtps: // Packed Single-FP Square Root Reciprocal case NN_rsqrtss: // Scalar Single-FP Square Root Reciprocal case NN_shufps: // Shuffle Single-FP case NN_sqrtps: // Packed Single-FP Square Root case NN_sqrtss: // Scalar Single-FP Square Root return false; break; case NN_stmxcsr: // Store Streaming SIMD Extensions Technology Control/Status Register return this->BuildMoveRTL(SMP_NULL_OPERATOR); break; case NN_subps: // Packed Single-FP Subtract case NN_subss: // Scalar Single-FP Subtract case NN_ucomiss: // Scalar Unordered Single-FP Compare and Set EFLAGS case NN_unpckhps: // Unpack High Packed Single-FP Data case NN_unpcklps: // Unpack Low Packed Single-FP Data case NN_xorps: // Bitwise Logical XOR for Single-FP Data case NN_pavgb: // Packed Average (Byte) case NN_pavgw: // Packed Average (Word) case NN_pextrw: // Extract Word case NN_pinsrw: // Insert Word case NN_pmaxsw: // Packed Signed Integer Word Maximum case NN_pmaxub: // Packed Unsigned Integer Byte Maximum case NN_pminsw: // Packed Signed Integer Word Minimum case NN_pminub: // Packed Unsigned Integer Byte Minimum return false; break; case NN_pmovmskb: // Move Byte Mask to Integer return this->BuildBinaryRTL(SMP_SHUFFLE); break; case NN_pmulhuw: // Packed Multiply High Unsigned case NN_psadbw: // Packed Sum of Absolute Differences return false; break; case NN_pshufw: // Packed Shuffle Word return this->BuildBinaryRTL(SMP_SHUFFLE); break; case NN_maskmovq: // Byte Mask write return false; break; case NN_movntps: // Move Aligned Four Packed Single-FP Non Temporal case NN_movntq: // Move 64 Bits Non Temporal return this->BuildMoveRTL(SMP_NULL_OPERATOR); break; case NN_prefetcht0: // Prefetch to all cache levels case NN_prefetcht1: // Prefetch to all cache levels case NN_prefetcht2: // Prefetch to L2 cache case NN_prefetchnta: // Prefetch to L1 cache case NN_sfence: // Store Fence // Cache prefetch and store fence opcodes are no-ops to us. NopRT = new SMPRegTransfer; NopRT->SetOperator(SMP_NULL_OPERATOR); this->RTL.push_back(NopRT); NopRT = NULL; return true; // Pentium III Pseudo instructions case NN_cmpeqps: // Packed Single-FP Compare EQ case NN_cmpltps: // Packed Single-FP Compare LT case NN_cmpleps: // Packed Single-FP Compare LE case NN_cmpunordps: // Packed Single-FP Compare UNORD case NN_cmpneqps: // Packed Single-FP Compare NOT EQ case NN_cmpnltps: // Packed Single-FP Compare NOT LT case NN_cmpnleps: // Packed Single-FP Compare NOT LE case NN_cmpordps: // Packed Single-FP Compare ORDERED case NN_cmpeqss: // Scalar Single-FP Compare EQ case NN_cmpltss: // Scalar Single-FP Compare LT case NN_cmpless: // Scalar Single-FP Compare LE case NN_cmpunordss: // Scalar Single-FP Compare UNORD case NN_cmpneqss: // Scalar Single-FP Compare NOT EQ case NN_cmpnltss: // Scalar Single-FP Compare NOT LT case NN_cmpnless: // Scalar Single-FP Compare NOT LE case NN_cmpordss: // Scalar Single-FP Compare ORDERED return false; break; // AMD K7 instructions case NN_pf2iw: // Packed Floating-Point to Integer with Sign Extend case NN_pfnacc: // Packed Floating-Point Negative Accumulate case NN_pfpnacc: // Packed Floating-Point Mixed Positive-Negative Accumulate case NN_pi2fw: // Packed 16-bit Integer to Floating-Point case NN_pswapd: // Packed Swap Double Word return false; break; // Undocumented FP instructions (thanks to norbert.juffa@adm.com) case NN_fstp1: // Alias of Store Real and Pop case NN_fcom2: // Alias of Compare Real case NN_fcomp3: // Alias of Compare Real and Pop case NN_fxch4: // Alias of Exchange Registers case NN_fcomp5: // Alias of Compare Real and Pop case NN_ffreep: // Free Register and Pop case NN_fxch7: // Alias of Exchange Registers case NN_fstp8: // Alias of Store Real and Pop case NN_fstp9: // Alias of Store Real and Pop return false; break; // Pentium 4 instructions case NN_addpd: // Add Packed Double-Precision Floating-Point Values case NN_addsd: // Add Scalar Double-Precision Floating-Point Values case NN_andnpd: // Bitwise Logical AND NOT of Packed Double-Precision Floating-Point Values case NN_andpd: // Bitwise Logical AND of Packed Double-Precision Floating-Point Values case NN_clflush: // Flush Cache Line case NN_cmppd: // Compare Packed Double-Precision Floating-Point Values case NN_cmpsd: // Compare Scalar Double-Precision Floating-Point Values case NN_comisd: // Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS case NN_cvtdq2pd: // Convert Packed Doubleword Integers to Packed Single-Precision Floating-Point Values case NN_cvtdq2ps: // Convert Packed Doubleword Integers to Packed Double-Precision Floating-Point Values case NN_cvtpd2dq: // Convert Packed Double-Precision Floating-Point Values to Packed Doubleword Integers case NN_cvtpd2pi: // Convert Packed Double-Precision Floating-Point Values to Packed Doubleword Integers case NN_cvtpd2ps: // Convert Packed Double-Precision Floating-Point Values to Packed Single-Precision Floating-Point Values case NN_cvtpi2pd: // Convert Packed Doubleword Integers to Packed Double-Precision Floating-Point Values case NN_cvtps2dq: // Convert Packed Single-Precision Floating-Point Values to Packed Doubleword Integers case NN_cvtps2pd: // Convert Packed Single-Precision Floating-Point Values to Packed Double-Precision Floating-Point Values case NN_cvtsd2si: // Convert Scalar Double-Precision Floating-Point Value to Doubleword Integer case NN_cvtsd2ss: // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value case NN_cvtsi2sd: // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value case NN_cvtss2sd: // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value case NN_cvttpd2dq: // Convert With Truncation Packed Double-Precision Floating-Point Values to Packed Doubleword Integers case NN_cvttpd2pi: // Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Doubleword Integers case NN_cvttps2dq: // Convert With Truncation Packed Single-Precision Floating-Point Values to Packed Doubleword Integers case NN_cvttsd2si: // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer case NN_divpd: // Divide Packed Double-Precision Floating-Point Values case NN_divsd: // Divide Scalar Double-Precision Floating-Point Values case NN_lfence: // Load Fence case NN_maskmovdqu: // Store Selected Bytes of Double Quadword case NN_maxpd: // Return Maximum Packed Double-Precision Floating-Point Values case NN_maxsd: // Return Maximum Scalar Double-Precision Floating-Point Value case NN_mfence: // Memory Fence case NN_minpd: // Return Minimum Packed Double-Precision Floating-Point Values case NN_minsd: // Return Minimum Scalar Double-Precision Floating-Point Value return false; break; case NN_movapd: // Move Aligned Packed Double-Precision Floating-Point Values case NN_movdq2q: // Move Quadword from XMM to MMX Register case NN_movdqa: // Move Aligned Double Quadword case NN_movdqu: // Move Unaligned Double Quadword case NN_movhpd: // Move High Packed Double-Precision Floating-Point Values case NN_movlpd: // Move Low Packed Double-Precision Floating-Point Values return this->BuildMoveRTL(SMP_NULL_OPERATOR); break; case NN_movmskpd: // Extract Packed Double-Precision Floating-Point Sign Mask return false; break; case NN_movntdq: // Store Double Quadword Using Non-Temporal Hint case NN_movnti: // Store Doubleword Using Non-Temporal Hint case NN_movntpd: // Store Packed Double-Precision Floating-Point Values Using Non-Temporal Hint case NN_movq2dq: // Move Quadword from MMX to XMM Register case NN_movsd: // Move Scalar Double-Precision Floating-Point Values case NN_movupd: // Move Unaligned Packed Double-Precision Floating-Point Values return this->BuildMoveRTL(SMP_NULL_OPERATOR); break; case NN_mulpd: // Multiply Packed Double-Precision Floating-Point Values case NN_mulsd: // Multiply Scalar Double-Precision Floating-Point Values case NN_orpd: // Bitwise Logical OR of Double-Precision Floating-Point Values case NN_paddq: // Add Packed Quadword Integers case NN_pause: // Spin Loop Hint case NN_pmuludq: // Multiply Packed Unsigned Doubleword Integers return false; break; case NN_pshufd: // Shuffle Packed Doublewords case NN_pshufhw: // Shuffle Packed High Words case NN_pshuflw: // Shuffle Packed Low Words return this->BuildBinaryRTL(SMP_SHUFFLE); break; case NN_pslldq: // Shift Double Quadword Left Logical return this->BuildBinaryRTL(SMP_U_LEFT_SHIFT); break; case NN_psrldq: // Shift Double Quadword Right Logical return this->BuildBinaryRTL(SMP_U_RIGHT_SHIFT); break; case NN_psubq: // Subtract Packed Quadword Integers return this->BuildBinaryRTL(SMP_SUBTRACT); break; case NN_punpckhqdq: // Unpack High Data case NN_punpcklqdq: // Unpack Low Data return this->BuildBinaryRTL(SMP_INTERLEAVE); break; case NN_shufpd: // Shuffle Packed Double-Precision Floating-Point Values return this->BuildBinaryRTL(SMP_SHUFFLE); break; case NN_sqrtpd: // Compute Square Roots of Packed Double-Precision Floating-Point Values case NN_sqrtsd: // Compute Square Rootof Scalar Double-Precision Floating-Point Value case NN_subpd: // Subtract Packed Double-Precision Floating-Point Values case NN_subsd: // Subtract Scalar Double-Precision Floating-Point Values case NN_ucomisd: // Unordered Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS return false; break; case NN_unpckhpd: // Unpack and Interleave High Packed Double-Precision Floating-Point Values case NN_unpcklpd: // Unpack and Interleave Low Packed Double-Precision Floating-Point Values return this->BuildBinaryRTL(SMP_INTERLEAVE); break; case NN_xorpd: // Bitwise Logical OR of Double-Precision Floating-Point Values return false; break; // AMD syscall/sysret instructions case NN_syscall: // Low latency system call case NN_sysret: // Return from system call // AMD64 instructions case NN_swapgs: // Exchange GS base with KernelGSBase MSR // New Pentium instructions (SSE3) case NN_movddup: // Move One Double-FP and Duplicate case NN_movshdup: // Move Packed Single-FP High and Duplicate case NN_movsldup: // Move Packed Single-FP Low and Duplicate return false; break; // Missing AMD64 instructions case NN_movsxd: // Move with Sign-Extend Doubleword case NN_cmpxchg16b: // Compare and Exchange 16 Bytes return false; break; // SSE3 instructions case NN_addsubpd: // Add /Sub packed DP FP numbers case NN_addsubps: // Add /Sub packed SP FP numbers case NN_haddpd: // Add horizontally packed DP FP numbers case NN_haddps: // Add horizontally packed SP FP numbers case NN_hsubpd: // Sub horizontally packed DP FP numbers case NN_hsubps: // Sub horizontally packed SP FP numbers case NN_monitor: // Set up a linear address range to be monitored by hardware case NN_mwait: // Wait until write-back store performed within the range specified by the MONITOR instruction case NN_fisttp: // Store ST in intXX (chop) and pop case NN_lddqu: // Load unaligned integer 128-bit return false; break; // SSSE3 instructions case NN_psignb: // Packed SIGN Byte case NN_psignw: // Packed SIGN Word case NN_psignd: // Packed SIGN Doubleword case NN_pshufb: // Packed Shuffle Bytes return this->BuildBinaryRTL(SMP_SHUFFLE); break; case NN_pmulhrsw: // Packed Multiply High with Round and Scale case NN_pmaddubsw: // Multiply and Add Packed Signed and Unsigned Bytes case NN_phsubsw: // Packed Horizontal Subtract and Saturate case NN_phaddsw: // Packed Horizontal Add and Saturate case NN_phaddw: // Packed Horizontal Add Word case NN_phaddd: // Packed Horizontal Add Doubleword case NN_phsubw: // Packed Horizontal Subtract Word case NN_phsubd: // Packed Horizontal Subtract Doubleword return false; break; case NN_palignr: // Packed Align Right return this->BuildPackShiftRTL(SMP_CONCATENATE, SMP_REVERSE_SHIFT_U); break; case NN_pabsb: // Packed Absolute Value Byte case NN_pabsw: // Packed Absolute Value Word case NN_pabsd: // Packed Absolute Value Doubleword return false; break; // VMX instructions case NN_vmcall: // Call to VM Monitor case NN_vmclear: // Clear Virtual Machine Control Structure case NN_vmlaunch: // Launch Virtual Machine case NN_vmresume: // Resume Virtual Machine case NN_vmptrld: // Load Pointer to Virtual Machine Control Structure case NN_vmptrst: // Store Pointer to Virtual Machine Control Structure case NN_vmread: // Read Field from Virtual Machine Control Structure case NN_vmwrite: // Write Field from Virtual Machine Control Structure case NN_vmxoff: // Leave VMX Operation case NN_vmxon: // Enter VMX Operation return false; break; default: msg("ERROR: Unknown instruction opcode at %x : %s\n", this->GetAddr(), this->GetDisasm()); break; } // end switch on opcode return true; } // end SMPInstr::BuildRTL() // Iterate through all reg transfers and call SyncRTLDefUse for each. void SMPInstr::SyncAllRTs(void) { for (size_t index = 0; index < this->RTL.GetCount(); ++index) { this->SyncRTLDefUse(this->RTL.GetRT(index)); } return; } // end of SMPInstr:SyncAllRTs() // Ensure that each operand of the RTL is found in the appropriate DEF or USE list. void SMPInstr::SyncRTLDefUse(SMPRegTransfer *CurrRT) { // The Guard expression and ExtraKills are almost never represented in the DEF and USE // lists. When they are, they are added in MDFixupDefUseLists(), so we ignore them here. // The only DEFs should come from left operands of SMP_ASSIGN operators, i.e. the effects // of register transfers. op_t LeftOp, RightOp; set<DefOrUse, LessDefUse>::iterator CurrDef, CurrUse; bool DebugFlag = false; #if SMP_VERBOSE_DEBUG_BUILD_RTL DebugFlag |= (0 == strcmp("__libc_csu_fini", this->BasicBlock->GetFunc()->GetFuncName())); #endif if (DebugFlag) { msg("SyncRTLDefUse entered. Dump of USE list:\n"); this->Uses.Dump(); } LeftOp = CurrRT->GetLeftOperand(); if (SMP_ASSIGN == CurrRT->GetOperator()) { assert(o_void != LeftOp.type); assert(o_imm != LeftOp.type); CurrDef = this->Defs.FindRef(LeftOp); if (CurrDef == this->GetLastDef() && !LeftOp.is_reg(R_ip)) { #if SMP_VERBOSE_DEBUG_BUILD_RTL msg("WARNING: DEF not found for SMP_ASSIGN in %s ; added op:", this->GetDisasm()); PrintOperand(LeftOp); msg("\n"); #endif this->Defs.SetRef(LeftOp, CurrRT->GetOperatorType()); } } else { // not SMP_ASSIGN; left operand should be a USE if (o_void != LeftOp.type) { CurrUse = this->Uses.FindRef(LeftOp); if (CurrUse == this->GetLastUse()) { #if SMP_VERBOSE_DEBUG_BUILD_RTL_DEF_USE msg("WARNING: USE not found for "); PrintOperand(LeftOp); msg(" in %s ; added\n", this->GetDisasm()); #endif this->Uses.SetRef(LeftOp); } } } if (!CurrRT->HasRightSubTree()) { RightOp = CurrRT->GetRightOperand(); // right operand should be a USE if (o_void != RightOp.type) { CurrUse = this->Uses.FindRef(RightOp); if (CurrUse == this->GetLastUse()) { #if SMP_VERBOSE_DEBUG_BUILD_RTL_DEF_USE msg("WARNING: USE not found for "); PrintOperand(RightOp); msg(" in %s ; added\n", this->GetDisasm()); #endif this->Uses.SetRef(RightOp); } } } else { // recurse into right subtree this->SyncRTLDefUse(CurrRT->GetRightTree()); } return; } // end of SMPInstr::SyncRTLDefUse() // SetOperatorType - set the type of the operator, take into account the speculative (profiler) status void SMPRegTransfer::SetOperatorType(SMPOperandType OpType, const SMPInstr* Instr) { SMPOperandType OldType = RTop.type; SMPOperandType NewType = OpType; if (Instr->GetBlock()->GetFunc()->GetIsSpeculative()) { NewType = (SMPOperandType) (((int)NewType) | PROF_BASE); if (!IsProfDerived(OldType)) RTop.NonSpeculativeType = OldType; } RTop.type = NewType; } // end of SMPRegTransfer::SetOperatorType // Update the memory source operands to have the new type void SMPInstr::UpdateMemLoadTypes(SMPOperandType newType) { bool MemSrc = false; op_t Opnd; for (int i = 0; i < UA_MAXOP; ++i) { Opnd = this->SMPcmd.Operands[i]; optype_t CurrType = Opnd.type; if (this->features & UseMacros[i]) { // USE MemSrc = ((CurrType == o_mem) || (CurrType == o_phrase) || (CurrType == o_displ)); if (MemSrc) { set<DefOrUse, LessDefUse>::iterator use = this->FindUse(Opnd); SMPOperandType type = use->GetType(); assert(newType == (NUMERIC|PROF_BASE)); switch (type) { case UNINIT: case CODEPTR: this->SetUseType(Opnd,newType); break; case POINTER: this->SetUseType(Opnd, (SMPOperandType)(UNKNOWN|PROF_BASE)); break; default: break; } } } } return ; } // end of SMPInstr::UpdateMemLoadTypes()