Newer
Older
break;
}
}
}
// Next, handle repeat prefices in the instructions. The Intel REPE/REPZ prefix
// is just the text printed for SCAS/CMPS instructions that have a REP prefix.
// Only two distinct prefix codes are actually defined: REP and REPNE/REPNZ, and
// REPNE/REPNZ only applies to SCAS and CMPS instructions.
bool HasRepPrefix = (0 != (this->SMPcmd.auxpref & aux_rep));
bool HasRepnePrefix = (0 != (this->SMPcmd.auxpref & aux_repne));
if (HasRepPrefix && HasRepnePrefix)
clc5q
committed
SMP_msg("REP and REPNE both present at %x %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr()));
if (HasRepPrefix || HasRepnePrefix) {
// All repeating instructions use ECX as the countdown register.
op_t BaseOpnd = InitOp;
BaseOpnd.type = o_reg; // Change type and reg fields
BaseOpnd.reg = R_cx;
BaseOpnd.clr_showed();
this->Defs.SetRef(BaseOpnd, NUMERIC);
this->Uses.SetRef(BaseOpnd, NUMERIC);
if ((opcode == NN_cmps) || (opcode == NN_scas) || (opcode == NN_movs) || (opcode == NN_stos)) {
// ESI and EDI are USEd and DEFed to point to source and dest strings for CMPS/MOVS.
// Only EDI is involved with SCAS/STOS.
op_t BaseOpnd = InitOp;
BaseOpnd.type = o_reg; // Change type and reg fields
BaseOpnd.clr_showed();
if ((opcode == NN_cmps) || (opcode == NN_movs)) {
BaseOpnd.reg = R_si;
this->Defs.SetRef(BaseOpnd, POINTER);
this->Uses.SetRef(BaseOpnd, POINTER);
}
BaseOpnd.reg = R_di;
this->Defs.SetRef(BaseOpnd, POINTER);
this->Uses.SetRef(BaseOpnd, POINTER);
else if ((NN_loopw <= opcode) && (NN_loopqne >= opcode)) {
op_t LoopCounterOp = InitOp;
LoopCounterOp.type = o_reg;
LoopCounterOp.reg = R_cx;
this->Defs.SetRef(LoopCounterOp, NUMERIC);
this->Uses.SetRef(LoopCounterOp, NUMERIC);
}
// Now, handle special instruction categories that have implicit operands.
if (NN_cmpxchg == opcode) {
// x86 Compare and Exchange conditionally sets EAX. We must keep data flow analysis
// sound by declaring that EAX is always a DEF.
this->MDAddRegDef(R_ax, false);
} // end if NN_cmpxchg
else if (this->MDIsPopInstr() || this->MDIsPushInstr() || this->MDIsReturnInstr()) {
// IDA does not include the stack pointer in the DEFs or USEs.
this->MDAddRegDef(R_sp, false);
this->MDAddRegUse(R_sp, false);
// We always reference [esp+0] or [esp-4], so add it to the DEF or USE list.
op_t StackOp = InitOp;
StackOp.type = o_displ;
StackOp.reg = R_sp;
if (this->MDIsPopInstr()) {
StackOp.addr = 0; // [ESP+0]
this->Uses.SetRef(StackOp); // USE
}
else {
StackOp.addr = (ea_t) -4; // [ESP-4]
this->Defs.SetRef(StackOp); // DEF
}
clc5q
committed
else if ((this->type == CALL) || (this->type == INDIR_CALL) || this->IsTailCall()) {
// We want to add the caller-saved registers to the USEs and DEFs lists
this->MDAddRegDef(R_ax, false);
this->MDAddRegDef(R_cx, false);
this->MDAddRegDef(R_dx, false);
this->MDAddRegUse(R_ax, false);
this->MDAddRegUse(R_cx, false);
this->MDAddRegUse(R_dx, false);
#if 1
if (this->MDIsInterruptCall()) {
#endif
this->MDAddRegDef(R_bx, false);
this->MDAddRegUse(R_bx, false);
this->MDAddRegDef(R_si, false);
this->MDAddRegUse(R_si, false);
}
#endif
else if (this->MDIsEnterInstr() || this->MDIsLeaveInstr()) {
// Entire function prologue or epilogue microcoded.
this->MDAddRegDef(R_sp, false);
this->MDAddRegUse(R_sp, false);
this->MDAddRegDef(R_bp, false);
this->MDAddRegUse(R_bp, false);
else if ((opcode == NN_maskmovq) || (opcode == NN_maskmovdqu)) {
else if (8 == this->GetOptType()) {
// This category implicitly writes to EDX:EAX.
this->MDAddRegDef(R_dx, false);
this->MDAddRegDef(R_ax, false);
} // end else if (8 == GetOptType)
else if (7 == this->GetOptType()) {
// Category 7 instructions sometimes write implicitly to EDX:EAX or DX:AX.
// DX is the same as EDX to IDA Pro (and SMP); ditto for EAX and AX.
// DIV, IDIV, and MUL all have hidden EAX or AX operands (hidden in the IDA Pro
// sense, because they are not displayed in the disassembly text). For example:
// mul ebx means EDX:EAX <-- EAX*EBX, and mul bx means DX:AX <-- AX*BX. If the
// source operand is only 8 bits wide, there is room to hold the result in AX
// without using DX: mul bl means AX <-- AL*BL.
// IMUL has forms with a hidden EAX or AX operand and forms with no implicit
// operands: imul ebx means EDX:EAX <-- EAX*EBX, but imul ebx,edx means that
// EBX*EDX gets truncated and the result placed in EBX (no hidden operands).
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
op_t TempUse = this->SMPcmd.Operands[OpNum];
if (!TempUse.showed()) { // hidden operand
if (TempUse.is_reg(R_ax)) { // not R_al, so it is not 8 bits
if ((NN_div == this->SMPcmd.itype) || (NN_idiv == this->SMPcmd.itype)) {
this->MDAddRegUse(R_dx, false);
}
this->MDAddRegDef(R_ax, false);
this->MDAddRegDef(R_dx, false);
}
}
}
} // end else if (7 == OptType)
#if 0
// The floating point instructions in type categories 14 and 15 often USE and DEF
// the floating point register stack, e.g. pushing a value onto that stack is a
// massive copy downward of stack locations. We don't really care about the USE of
// the stack if the value being pushed came from elsewhere than the stack. For example,
// an "fld" opcode pushes its source onto the stack. We build RTLs with a simple
// move structure, but the RTL building can be fooled by seeing two "source" operands
// in the USE list.
if ((14 == SMPTypeCategory[this->SMPcmd.itype])
|| (15 == SMPTypeCategory[this->SMPcmd.itype])) {
}
#endif
clc5q
committed
#if 0 // Not true for LOOP instructions that use only the ECX counter register.
if (this->type == COND_BRANCH) {
assert(SMPUsesFlags[opcode]);
clc5q
committed
#endif
// The return value register EAX is not quite like a caller-save or callee-save
// register (technically, it is caller-save). Within a callee, it might appear
// that EAX has become dead by the time a return instruction is reached, but
// the USE that would make it not dead is in the caller. To prevent type inference
// from mistakenly thinking that all USEs of EAX have been seen in the callee,
// we add EAX to the USE list for all return instructions, as well as for all
// tail calls, which are essentially returns in terms of data flow analysis.
// This USE of EAX will always be of type UNINIT unless its DEF has a known type
// that propagates to it. Thus, it will prevent an invalid back inference of the
// DEF type from "all" USE types that are visible in the callee; even if they
// were all NUMERIC, this return USE will be UNINIT and inhibit the invalid
// type inference. EAX could be loaded with a pointer from memory, for example,
// and USEd only in a comparison instruction, making it falsely appear to be
// a NUMERIC, without this extra USE at the return instruction.
// Because some of the library functions pass values around in EBX, EDI, etc.,
// we will add these general purpose registers to the USE list for returns
// in order to prevent erroneous analyses of dead registers or unused
// metadata.
if ((this->type == RETURN) || this->IsTailCall()) {
this->MDAddRegUse(R_ax, false);
this->MDAddRegUse(R_bx, false);
this->MDAddRegUse(R_cx, false);
this->MDAddRegUse(R_dx, false);
if (!UseFP)
this->MDAddRegUse(R_bp, false);
this->MDAddRegUse(R_si, false);
this->MDAddRegUse(R_di, false);
}
clc5q
committed
// Next, add the flags register to the DEFs and USEs for those instructions that
// are marked as defining or using flags.
if (!this->IsDefsFlags() && SMPDefsFlags[opcode]) {
this->MDAddRegDef(X86_FLAGS_REG, false);
this->SetDefsFlags();
if (!this->IsUsesFlags() && SMPUsesFlags[opcode]) {
this->MDAddRegUse(X86_FLAGS_REG, false);
this->SetUsesFlags();
}
if (this->IsNop()) {
// Clear the DEFs and USEs for no-ops.
// These include machine idioms for no-ops, e.g. mov esi,esi
// or xchg ax,ax or lea esi,[esi].
this->Defs.clear();
this->Uses.clear();
this->MoveSource = InitOp;
this->DEFMemOp = InitOp;
this->USEMemOp = InitOp;
this->LeaUSEMemOp = InitOp;
this->OptType = 1;
}
#endif
clc5q
committed
SMP_msg("DEBUG after MDFixupDefUseLists:\n");
return;
} // end of SMPInstr::MDFixupDefUseLists()
// If we can definitely identify which part of the addressing expression
// used in MemOp is the POINTER type, and it is not a STACKPTR or GLOBALPTR
// immediate, set the USE type for that register to POINTER and return true.
// If we can find definite NUMERIC addressing registers that are not already
// typed as NUMERIC, set their USE types to NUMERIC and return true.
bool SMPInstr::MDFindPointerUse(op_t MemOp, bool UseFP) {
bool changed = false;
int BaseReg;
int IndexReg;
op_t BaseOp = InitOp;
op_t IndexOp = InitOp;
SMPOperandType BaseType = UNKNOWN;
SMPOperandType IndexType = UNKNOWN;
ushort ScaleFactor;
ea_t offset;
set<DefOrUse, LessDefUse>::iterator BaseIter;
set<DefOrUse, LessDefUse>::iterator IndexIter;
if (NN_lea == this->SMPcmd.itype)
return false; // lea instruction really has no memory operands
if (NN_fnop == this->SMPcmd.itype)
return false; // SSA marker instruction
MDExtractAddressFields(MemOp, BaseReg, IndexReg, ScaleFactor, offset);
if (R_none != IndexReg) {
IndexOp.type = o_reg;
IndexOp.reg = MDCanonicalizeSubReg((ushort) IndexReg);
IndexOp.dtyp = dt_dword; // Canonical 32-bit width
IndexIter = this->FindUse(IndexOp);
assert(IndexIter != this->GetLastUse());
IndexType = IndexIter->GetType();
}
if (R_none != BaseReg) {
BaseOp.type = o_reg;
BaseOp.reg = MDCanonicalizeSubReg((ushort) BaseReg);
BaseOp.dtyp = dt_dword; // Canonical 32-bit width
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
BaseIter = this->FindUse(BaseOp);
assert(BaseIter != this->GetLastUse());
BaseType = BaseIter->GetType();
}
if ((R_sp == BaseReg) || (UseFP && (R_bp == BaseReg))) {
if ((R_none != IndexReg) && (!IsNumeric(IndexType))) {
// We have an indexed access into the stack frame.
// Set IndexReg USE type to NUMERIC.
changed = true;
IndexIter = this->SetUseType(IndexOp, NUMERIC);
assert(IndexIter != this->GetLastUse());
}
return changed; // stack accesses will get STACKPTR type in SetImmedTypes()
}
if ((R_sp == IndexReg) || (UseFP && (R_bp == IndexReg))) {
if ((R_none != BaseReg) && (!IsNumeric(BaseType))) {
// We have an indexed access into the stack frame.
// Set BaseReg USE type to NUMERIC.
// Note that BaseReg is really an IndexReg and vice versa.
changed = true;
BaseIter = this->SetUseType(BaseOp, NUMERIC);
assert(BaseIter != this->GetLastUse());
clc5q
committed
SMP_msg("WARNING: BaseReg is index, IndexReg is base: %s\n",
DisAsmText.GetDisAsm(this->GetAddr()));
}
return changed; // stack accesses will get STACKPTR type in SetImmedTypes()
}
if (IsImmedGlobalAddress(offset)) {
if ((R_none != IndexReg) && (!IsNumeric(IndexType))) {
// We have an indexed access into a global.
// Set IndexReg USE type to NUMERIC.
changed = true;
IndexIter = this->SetUseType(IndexOp, NUMERIC);
assert(IndexIter != this->GetLastUse());
}
if ((R_none != BaseReg) && (!IsNumeric(BaseType))) {
// We have an indexed access into a global.
// Set BaseReg USE type to NUMERIC.
// Note that BaseReg is really an index register.
changed = true;
BaseIter = this->SetUseType(BaseOp, NUMERIC);
assert(BaseIter != this->GetLastUse());
clc5q
committed
SMP_msg("WARNING: BaseReg used as index: %s\n", DisAsmText.GetDisAsm(this->GetAddr()));
clc5q
committed
return changed; // global immediate is handled in SetImmedTypes()
// At this point, we must have a base address in a register, not used
// to directly address the stack or a global.
if ((0 < ScaleFactor) || (R_none == IndexReg)) {
// IndexReg is scaled, meaning it is NUMERIC, so BaseReg must
// be a POINTER; or IndexReg is not present, so BaseReg is the
// only possible holder of an address.
if (R_none != BaseReg) {
if (UNINIT == BaseIter->GetType()) {
changed = true;
BaseIter = this->SetUseType(BaseOp, POINTER);
assert(BaseIter != this->GetLastUse());
}
}
}
else if (R_none == BaseReg) {
// We have an unscaled IndexReg and no BaseReg and offset was
// not a global offset, so IndexReg must be a POINTER.
if (R_none != IndexReg) {
changed = true;
IndexIter = this->SetUseType(IndexOp, POINTER);
assert(IndexIter != this->GetLastUse());
}
}
}
else { // We have BaseReg and an unscaled IndexReg.
// The only hope for typing something like [ebx+edx] is for
// one register to already be typed NUMERIC, in which case
// the other one must be a POINTER, or if one register is
// already POINTER, then the other one must be NUMERIC.
if (IsNumeric(BaseType)) {
if (UNINIT == IndexType) {
// Set to POINTER or PROF_POINTER
changed = true;
IndexIter = this->SetUseType(IndexOp, POINTER);
assert(IndexIter != this->GetLastUse());
}
else if (IsNumeric(IndexType)) {
clc5q
committed
SMP_msg("ERROR: BaseReg and IndexReg both NUMERIC at %x: %s\n",
this->address, DisAsmText.GetDisAsm(this->GetAddr()));
}
}
else { // BaseReg was not NUMERIC
if (UNINIT == BaseType) { // BaseReg is UNINIT
if (IsNumeric(IndexType)) {
changed = true;
BaseIter = this->SetUseType(BaseOp, POINTER);
assert(BaseIter != this->GetLastUse());
}
else if (IsDataPtr(IndexType)) {
// IndexReg is POINTER, so make BaseReg NUMERIC.
changed = true;
BaseIter = this->SetUseType(BaseOp, NUMERIC);
assert(BaseIter != this->GetLastUse());
}
}
else if (IsDataPtr(BaseType)) {
// BaseReg was a pointer type. IndexReg must be NUMERIC.
if (UNINIT == IndexType) {
changed = true;
IndexIter = this->SetUseType(IndexOp, NUMERIC);
assert(IndexIter != this->GetLastUse());
}
else if (IsDataPtr(IndexType)) {
clc5q
committed
SMP_msg("ERROR: BaseReg and IndexReg both POINTER at %x: %s\n",
this->address, DisAsmText.GetDisAsm(this->GetAddr()));
}
}
}
}
return changed;
} // end of SMPInstr::MDFindPointerUse()
clc5q
committed
// Are all DEFs typed to something besides UNINIT?
bool SMPInstr::AllDEFsTyped(void) {
if (this->AreDEFsTyped()) {
return true;
}
clc5q
committed
bool FoundUNINIT = false;
set<DefOrUse, LessDefUse>::iterator DefIter;
for (DefIter = this->GetFirstDef(); DefIter != this->GetLastDef(); ++DefIter) {
if (IsEqType(UNINIT, DefIter->GetType())) {
FoundUNINIT = true;
break;
}
}
if (!FoundUNINIT) {
this->SetDEFsTyped();
}
clc5q
committed
return (!FoundUNINIT);
} // end of SMPInstr::AllDEFsTyped()
// Are all USEs typed to something besides UNINIT?
bool SMPInstr::AllUSEsTyped(void) {
if (this->AreUSEsTyped()) {
return true;
}
clc5q
committed
bool FoundUNINIT = false;
set<DefOrUse, LessDefUse>::iterator UseIter;
for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) {
if (IsEqType(UNINIT, UseIter->GetType())) {
FoundUNINIT = true;
break;
}
}
if (!FoundUNINIT) {
this->SetUSEsTyped();
}
clc5q
committed
return (!FoundUNINIT);
} // end of SMPInstr::AllUSEsTyped()
// Return true if UseOp is a USE reg, not just an address reg in a memory USE
clc5q
committed
bool SMPInstr::IsNonAddressReg(op_t UseOp) const {
bool FoundUse = false;
ushort SearchReg = MDCanonicalizeSubReg(UseOp.reg);
for (size_t OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
op_t Opnd = this->SMPcmd.Operands[OpNum];
if (this->features & UseMacros[OpNum]) { // USE
if (Opnd.type == o_reg) {
ushort TestReg = MDCanonicalizeSubReg(Opnd.reg);
if (TestReg == SearchReg) {
FoundUse = true;
break;
}
}
}
}
return FoundUse;
} // end of SMPInstr::IsNonAddressReg()
uval_t SMPInstr::MDGetShiftCount(void) const {
uval_t ShiftCount = 0;
if (this->MDIsShiftOrRotate()) {
SMPRegTransfer *CurrRT = this->RTL.GetRT(0);
assert(CurrRT->HasRightSubTree());
CurrRT = CurrRT->GetRightTree();
op_t ShiftCountOp = CurrRT->GetRightOperand();
if (o_imm == ShiftCountOp.type) {
ShiftCount = ShiftCountOp.value;
}
}
return ShiftCount;
} // end of SMPInstr::MDGetShiftCount()
clc5q
committed
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
// Is a sub-register of UseOp used as a shift counter in the RTL?
// For example, UseOp could be ECX on an x86 machine, and CL
// could be used as a shift or rotate counter.
bool SMPInstr::IsSubRegUsedAsShiftCount(op_t UseOp) {
bool ShiftCounter = false;
if ((o_reg == UseOp.type) && this->MDIsShiftOrRotate()) {
SMPRegTransfer *CurrRT = this->RTL.GetRT(0);
assert(CurrRT->HasRightSubTree());
CurrRT = CurrRT->GetRightTree();
op_t ShiftCountOp = CurrRT->GetRightOperand();
if (o_reg == ShiftCountOp.type) {
ushort UseReg = UseOp.reg;
ushort ShiftCountReg = ShiftCountOp.reg;
ushort WideUseReg = MDCanonicalizeSubReg(UseReg);
ushort WideShiftCountReg = MDCanonicalizeSubReg(ShiftCountReg);
if ((UseReg != ShiftCountReg) && (WideUseReg == WideShiftCountReg)) {
// Registers were not equal, but their canonical enclosing
// registers are equal. Because shift counters that are not
// immediate are the 8-bit subregister in x86 (MD here !!!!!!)
// it must be that the ShiftCountReg is a subreg of UseReg.
// This is the condition we are looking for.
ShiftCounter = true;
}
}
}
return ShiftCounter;
} // end of SMPInstr::IsSubRegUsedAsShiftCount()
clc5q
committed
// Does UseOp ultimately come from a set-condition-code instruction?
bool SMPInstr::IsOpSourceConditionCode(op_t UseOp, int UseSSANum) {
bool FoundConditionalSetInst = false;
bool LocalName = this->GetBlock()->IsLocalName(UseOp);
ea_t UseAddr = this->GetAddr();
ea_t UseDefAddr = this->GetBlock()->GetDefAddrFromUseAddr(UseOp, UseAddr, UseSSANum, LocalName);
ea_t FirstFuncAddr = this->GetBlock()->GetFunc()->GetFirstFuncAddr();
clc5q
committed
if ((o_reg != UseOp.type) || (UseDefAddr == (FirstFuncAddr - 1))) {
// Cannot search for non-reg defs by SSANum reliably.
// FirstFuncAddr - 1 signifies the pseudo-inst to hold DEFs of regs
// that are LiveIn to the function; pseudo-inst is not a condition code set.
FoundConditionalSetInst = false;
}
else if (UseDefAddr < this->GetBlock()->GetFunc()->GetNumBlocks()) {
// A block number was returned. That means the DEF is in a Phi Function.
// We could trace all Phi USEs and see if all of them come from condition codes
// but we only need one of the Phi USEs to come from
// a condition code to potentially lead to a false positive numeric error. We
// will recurse on all Phi USEs, declaring success if we find a single one of them
// to come from a condition code.
size_t BlockNum = (size_t) UseDefAddr;
assert(!LocalName);
SMPBasicBlock *PhiDefBlock = this->GetBlock()->GetFunc()->GetBlockByNum(BlockNum);
assert(NULL != PhiDefBlock);
if (!PhiDefBlock->IsProcessed()) { // Prevent infinite recursion
set<SMPPhiFunction, LessPhi>::iterator DefPhiIter = PhiDefBlock->FindPhi(UseOp);
assert(DefPhiIter != PhiDefBlock->GetLastPhi());
size_t PhiListSize = DefPhiIter->GetPhiListSize();
PhiDefBlock->SetProcessed(true); // Prevent infinite recursion
for (size_t UseIndex = 0; UseIndex < PhiListSize; ++UseIndex) {
int PhiUseSSANum = DefPhiIter->GetUseSSANum(UseIndex);
if (this->IsOpSourceConditionCode(UseOp, PhiUseSSANum)) {
FoundConditionalSetInst = true; // only one success on all Phi USEs is needed
break;
}
}
}
}
else {
SMPInstr *DefInst = this->GetBlock()->GetFunc()->GetInstFromAddr(UseDefAddr);
if (DefInst->MDIsAnySetValue()) {
FoundConditionalSetInst = true;
clc5q
committed
}
else if (DefInst->MDIsMoveInstr()) {
op_t MoveUseOp = DefInst->GetMoveSource();
if (o_reg == MoveUseOp.type) { // pattern is simple; don't try to follow through memory
CanonicalizeOpnd(MoveUseOp);
set<DefOrUse, LessDefUse>::iterator MoveUseIter = DefInst->FindUse(MoveUseOp);
assert(MoveUseIter != DefInst->GetLastUse());
int MoveUseSSANum = MoveUseIter->GetSSANum();
FoundConditionalSetInst = DefInst->IsOpSourceConditionCode(MoveUseOp, MoveUseSSANum); // recurse
clc5q
committed
}
}
else {
// Not a move, not a condition code transfer. We must return false.
FoundConditionalSetInst = false;
}
}
return FoundConditionalSetInst;
} // end of SMPInstr::IsOpSourceConditionCode()
// Does UseOp ultimately come from a move-with-zero-extension instruction?
bool SMPInstr::IsOpSourceZeroExtendedMove(op_t UseOp, int UseSSANum, bool TruncationCheck) {
bool FoundMoveZX = false;
bool LocalName = this->GetBlock()->IsLocalName(UseOp);
ea_t UseAddr = this->GetAddr();
ea_t UseDefAddr = this->GetBlock()->GetDefAddrFromUseAddr(UseOp, UseAddr, UseSSANum, LocalName);
ea_t FirstFuncAddr = this->GetBlock()->GetFunc()->GetFirstFuncAddr();
if ((o_reg != UseOp.type) || (UseDefAddr == (FirstFuncAddr - 1))) {
// Cannot search for non-reg defs by SSANum reliably.
// FirstFuncAddr - 1 signifies the pseudo-inst to hold DEFs of regs
// that are LiveIn to the function; pseudo-inst is not a zero-extended load.
FoundMoveZX = false;
}
else if (UseDefAddr < this->GetBlock()->GetFunc()->GetNumBlocks()) {
// A block number was returned. That means the DEF is in a Phi Function.
// We could trace all Phi USEs and see if all of them come from zero-extended
// moves into the UseOp register, but we only need one of the Phi USEs to come from
// a zero-extended move to potentially lead to a false positive numeric error. We
// will recurse on all Phi USEs, declaring success if we find a single one of them
// to come from a zero-extended move.
size_t BlockNum = (size_t) UseDefAddr;
assert(!LocalName);
SMPBasicBlock *PhiDefBlock = this->GetBlock()->GetFunc()->GetBlockByNum(BlockNum);
assert(NULL != PhiDefBlock);
if (!PhiDefBlock->IsProcessed()) { // Prevent infinite recursion
set<SMPPhiFunction, LessPhi>::iterator DefPhiIter = PhiDefBlock->FindPhi(UseOp);
assert(DefPhiIter != PhiDefBlock->GetLastPhi());
size_t PhiListSize = DefPhiIter->GetPhiListSize();
PhiDefBlock->SetProcessed(true); // Prevent infinite recursion
for (size_t UseIndex = 0; UseIndex < PhiListSize; ++UseIndex) {
int PhiUseSSANum = DefPhiIter->GetUseSSANum(UseIndex);
if (this->IsOpSourceZeroExtendedMove(UseOp, PhiUseSSANum, TruncationCheck)) {
FoundMoveZX = true; // only one success on all Phi USEs is needed
break;
clc5q
committed
}
}
}
}
else {
SMPInstr *DefInst = this->GetBlock()->GetFunc()->GetInstFromAddr(UseDefAddr);
unsigned short SignMask;
if (DefInst->MDIsSignedLoad(SignMask)) {
FoundMoveZX = (FG_MASK_UNSIGNED == SignMask);
}
else if (DefInst->MDIsMoveInstr()) {
op_t MoveUseOp = DefInst->GetMoveSource();
if (o_reg == MoveUseOp.type) { // pattern is simple; don't try to follow through memory
CanonicalizeOpnd(MoveUseOp);
set<DefOrUse, LessDefUse>::iterator MoveUseIter = DefInst->FindUse(MoveUseOp);
assert(MoveUseIter != DefInst->GetLastUse());
int MoveUseSSANum = MoveUseIter->GetSSANum();
FoundMoveZX = DefInst->IsOpSourceZeroExtendedMove(MoveUseOp, MoveUseSSANum, TruncationCheck); // recurse
}
}
else if (TruncationCheck && DefInst->MDIsNonOverflowingBitManipulation()) {
// Not a move, not a zero-extended move. We must return false for the non-truncation case,
// but we allow non-overflowing bit manipulation instructions in the chain for truncation checks.
// This is because of a benign code pattern:
// reg: = zero-extended move
// reg := reg AND bit pattern
// reg := reg OR bit pattern
// store lower bits of reg
// Compilers like to do 32-bit arithmetic. There was never any good reason otherwise to zero-extend the
// value in the first instruction in the pattern. The lower bits that are stored at the end of the code
// sequence are the only bits that ever mattered, so this is not really a truncation.
set<DefOrUse, LessDefUse>::iterator BitUseIter = DefInst->FindUse(UseOp);
if (BitUseIter != DefInst->GetLastUse()) {
int BitUseSSANum = BitUseIter->GetSSANum();
FoundMoveZX = DefInst->IsOpSourceZeroExtendedMove(UseOp, BitUseSSANum, true); // recurse up the chain
clc5q
committed
}
}
else {
FoundMoveZX = false;
}
clc5q
committed
}
return FoundMoveZX;
} // end of SMPInstr::IsOpSourceZeroExtendedMove()
clc5q
committed
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
// Does UseOp ultimately come from a move-with-zero-extension instruction OR from a condition code OR from a right shift?
bool SMPInstr::IsOpSourceZeroExtendedMoveShiftRightOrConditionCode(op_t UseOp, int UseSSANum, bool TruncationCheck) {
bool FoundMoveZXCC = false;
bool LocalName = this->GetBlock()->IsLocalName(UseOp);
ea_t UseAddr = this->GetAddr();
ea_t UseDefAddr = this->GetBlock()->GetDefAddrFromUseAddr(UseOp, UseAddr, UseSSANum, LocalName);
ea_t FirstFuncAddr = this->GetBlock()->GetFunc()->GetFirstFuncAddr();
if ((o_reg != UseOp.type) || (UseDefAddr == (FirstFuncAddr - 1))) {
// Cannot search for non-reg defs by SSANum reliably.
// FirstFuncAddr - 1 signifies the pseudo-inst to hold DEFs of regs
// that are LiveIn to the function; pseudo-inst is not a zero-extended load.
FoundMoveZXCC = false;
}
else if (UseDefAddr < this->GetBlock()->GetFunc()->GetNumBlocks()) {
// A block number was returned. That means the DEF is in a Phi Function.
// We could trace all Phi USEs and see if all of them come from zero-extended
// moves into the UseOp register, but we only need one of the Phi USEs to come from
// a zero-extended move to potentially lead to a false positive numeric error. We
// will recurse on all Phi USEs, declaring success if we find a single one of them
// to come from a zero-extended move.
size_t BlockNum = (size_t) UseDefAddr;
assert(!LocalName);
SMPBasicBlock *PhiDefBlock = this->GetBlock()->GetFunc()->GetBlockByNum(BlockNum);
assert(NULL != PhiDefBlock);
if (!PhiDefBlock->IsProcessed()) { // Prevent infinite recursion
set<SMPPhiFunction, LessPhi>::iterator DefPhiIter = PhiDefBlock->FindPhi(UseOp);
assert(DefPhiIter != PhiDefBlock->GetLastPhi());
size_t PhiListSize = DefPhiIter->GetPhiListSize();
PhiDefBlock->SetProcessed(true); // Prevent infinite recursion
for (size_t UseIndex = 0; UseIndex < PhiListSize; ++UseIndex) {
int PhiUseSSANum = DefPhiIter->GetUseSSANum(UseIndex);
if (this->IsOpSourceZeroExtendedMoveShiftRightOrConditionCode(UseOp, PhiUseSSANum, TruncationCheck)) {
FoundMoveZXCC = true; // only one success on all Phi USEs is needed
break;
}
}
}
}
else {
SMPInstr *DefInst = this->GetBlock()->GetFunc()->GetInstFromAddr(UseDefAddr);
unsigned short SignMask;
if (DefInst->MDIsSignedLoad(SignMask)) {
FoundMoveZXCC = (FG_MASK_UNSIGNED == SignMask);
}
else if (DefInst->MDIsAnySetValue() || DefInst->MDIsShiftRight()) {
FoundMoveZXCC = true;
}
else if (DefInst->MDIsMoveInstr()) {
op_t MoveUseOp = DefInst->GetMoveSource();
if (o_reg == MoveUseOp.type) { // pattern is simple; don't try to follow through memory
CanonicalizeOpnd(MoveUseOp);
set<DefOrUse, LessDefUse>::iterator MoveUseIter = DefInst->FindUse(MoveUseOp);
assert(MoveUseIter != DefInst->GetLastUse());
int MoveUseSSANum = MoveUseIter->GetSSANum();
FoundMoveZXCC = DefInst->IsOpSourceZeroExtendedMoveShiftRightOrConditionCode(MoveUseOp, MoveUseSSANum, TruncationCheck); // recurse
}
}
else if (TruncationCheck && (DefInst->MDIsNonOverflowingBitManipulation() || DefInst->MDIsSmallAdditionOrSubtraction())) {
// Not a move, not a zero-extended move. We must return false for the non-truncation case,
// but we allow non-overflowing bit manipulation instructions in the chain for truncation checks.
// This is because of a benign code pattern:
// reg: = zero-extended move
// reg := reg AND bit pattern
// reg := reg OR bit pattern
// store lower bits of reg
// Compilers like to do 32-bit arithmetic. There was never any good reason otherwise to zero-extend the
// value in the first instruction in the pattern. The lower bits that are stored at the end of the code
// sequence are the only bits that ever mattered, so this is not really a truncation.
// NOTE: We combine into this case additions or subtractions of small values, as they only operate on the
// lower bits of the register.
set<DefOrUse, LessDefUse>::iterator BitUseIter = DefInst->FindUse(UseOp);
if (BitUseIter != DefInst->GetLastUse()) {
int BitUseSSANum = BitUseIter->GetSSANum();
FoundMoveZXCC = DefInst->IsOpSourceZeroExtendedMoveShiftRightOrConditionCode(UseOp, BitUseSSANum, true); // recurse up the chain
}
}
else {
FoundMoveZXCC = false;
}
}
return FoundMoveZXCC;
clc5q
committed
} // end of SMPInstr::IsOpSourceZeroExtendedMoveShiftRightOrConditionCode()
// Is opcode a shift or rotate?
// NOTE: We omit MMX/SSE unit shifts that do not use a general purpose
// register as a shift counter, because right now this method is only
// used as a helper for IsSubRegUsedAsShiftCount().
bool SMPInstr::MDIsShiftOrRotate(void) const {
return (((NN_rcl <= SMPcmd.itype) && (NN_ror >= SMPcmd.itype))
|| ((NN_sal <= SMPcmd.itype) && (NN_shr >= SMPcmd.itype))
|| (NN_shld == SMPcmd.itype) || (NN_shrd == SMPcmd.itype));
} // end of SMPInstr::MDIsShiftOrRotate()
clc5q
committed
// Is opcode a shift to the right?
bool SMPInstr::MDIsShiftRight(void) const {
return ((NN_sar == SMPcmd.itype) || (NN_shr == SMPcmd.itype));
}
// Does the shift or rotate RTL move the upper HalfBitWidth bits
// into the lower half of the register? Or, if MustBeHalfRegWidth is false,
// do we shift right by HalfBitWidth bits?
bool SMPInstr::ShiftMakesUpperBitsLower(size_t HalfBitWidth, bool MustBeHalfRegWidth) {
bool FullCircle = false;
if (!MustBeHalfRegWidth || (MD_NORMAL_MACHINE_BITWIDTH == (HalfBitWidth * 2))) {
SMPRegTransfer *CurrRT = this->RTL.GetRT(0);
if ((NULL != CurrRT) && (CurrRT->HasRightSubTree())) {
CurrRT = CurrRT->GetRightTree();
SMPoperator CurrOper = CurrRT->GetOperator();
bool LeftRotate = (SMP_ROTATE_LEFT == CurrOper);
if ((SMP_U_RIGHT_SHIFT == CurrOper) || (SMP_S_RIGHT_SHIFT == CurrOper)
|| LeftRotate || (SMP_ROTATE_RIGHT == CurrOper)) {
if (CurrRT->HasRightSubTree()) { // double-word shift
CurrRT = CurrRT->GetRightTree();
}
assert(!(CurrRT->HasRightSubTree()));
op_t ShiftCount = CurrRT->GetRightOperand();
if (o_imm == ShiftCount.type) {
uval_t ImmVal = ShiftCount.value;
// If we rotate left by e.g. 32-HalfBitWidth bits, then we are processing
// bytes or halfregs one at a time; if we rotate or shift right by HalfBitWidth,
// we are processing the register one HalfBitWidth at a time. We also a
if (MustBeHalfRegWidth || (!LeftRotate)) {
FullCircle = (HalfBitWidth == ImmVal);
}
else {
// Left rotate amount plus HalfBitWidth must add up to full register width
FullCircle = (MD_NORMAL_MACHINE_BITWIDTH == (ImmVal + HalfBitWidth));
}
}
}
}
}
return FullCircle;
} // SMPInstr::ShiftMakesUpperBitsLower()
clc5q
committed
#if 0
// Find SearchDelta in StackDeltaSet, inserting it if not found. Return whether it was initially found.
bool SMPInstr::FindStackPtrDelta(sval_t SearchDelta) const {
bool found = (this->StackDeltaSet.find(SearchDelta) != this->StackDeltaSet.end());
if (!found) {
this->StackDeltaSet.insert(SearchDelta);
if (SearchDelta < this->StackPtrOffset) {
// Mimic IDA Pro, which seems to keep the biggest stack frame possible.
// With negative stack deltas, this means the smallest stack delta is kept.
this->SetStackPtrOffset(SearchDelta);
}
}
return found;
} // end of SMPInstr::FindStackPtrDelta()
#endif
// Set the type of all immediate operands found in the USE set.
// Set all flags and floating point register USEs and DEFs to NUMERIC also,
// along with easily determined types for special cases.
void SMPInstr::SetImmedTypes(bool UseFP) {
set<DefOrUse, LessDefUse>::iterator CurrUse;
set<DefOrUse, LessDefUse>::iterator CurrDef;
uval_t ImmVal;
#if SMP_VERBOSE_DEBUG_BUILD_RTL
DebugFlag = DebugFlag || (this->address == 0x805cd52) || (this->address == 0x805cd56);
DebugFlag |= (0 == strncmp("__libc_csu_fini", this->BasicBlock->GetFunc()->GetFuncName(), 15));
#endif
CurrUse = this->GetFirstUse();
while (CurrUse != this->GetLastUse()) {
UseOp = CurrUse->GetOp();
clc5q
committed
SMP_msg("SetImmedTypes USE: ");
clc5q
committed
SMP_msg("\n");
if (o_imm == UseOp.type) {
ImmVal = UseOp.value;
if (IsImmedGlobalAddress((ea_t) ImmVal)) {
clc5q
committed
if (DebugFlag) SMP_msg("Setting to GLOBALPTR\n");
CurrUse = this->SetUseType(UseOp, GLOBALPTR);
#if 0
else if (IsDataAddress((ea_t) ImmVal)) {
// NOTE: We must call IsDataAddress() before we call IsImmedCodeAddress()
// to catch the data addresses within the code address range.
clc5q
committed
if (DebugFlag) SMP_msg("Setting to POINTER\n");
CurrUse = this->SetUseType(UseOp, POINTER);
}
#endif
else if (this->MDIsInterruptCall() || IsImmedCodeAddress((ea_t) ImmVal)) {
clc5q
committed
if (DebugFlag) SMP_msg("Setting to CODEPTR\n");
CurrUse = this->SetUseType(UseOp, CODEPTR);
}
else { // NUMERIC
clc5q
committed
if (DebugFlag) SMP_msg("Setting to NUMERIC\n");
CurrUse = this->SetUseType(UseOp, NUMERIC);
else if (o_reg == UseOp.type) {
if (UseOp.is_reg(X86_FLAGS_REG)) {
clc5q
committed
if (DebugFlag) SMP_msg("Setting flags reg to NUMERIC\n");
CurrUse = this->SetUseType(UseOp, NUMERIC);
}
#if 1
else if (UseOp.is_reg(R_sp) || (UseFP && UseOp.is_reg(R_bp))) {
clc5q
committed
if (DebugFlag) SMP_msg("Setting reg to STACKPTR\n");
CurrUse = this->SetUseType(UseOp, STACKPTR);
}
#endif
}
#if 0 // could these registers have pointers in them?
else if ((o_trreg == UseOp.type) ||(o_dbreg == UseOp.type) || (o_crreg == UseOp.type)) {
clc5q
committed
if (DebugFlag) SMP_msg("Setting special reg to NUMERIC\n");
CurrUse = this->SetUseType(UseOp, NUMERIC);
}
#endif
else if ((o_fpreg == UseOp.type) || (o_mmxreg == UseOp.type) || (o_xmmreg == UseOp.type)) {
clc5q
committed
if (DebugFlag) SMP_msg("Setting floating point reg to NUMERIC\n");
CurrUse = this->SetUseType(UseOp, NUMERIC);
}
else if ((o_mem == UseOp.type) || (o_phrase == UseOp.type) || (o_displ == UseOp.type)) {
// For memory operands, we need to identify the POINTER value that
// is used in the addressing mode, if possible.
(void) this->MDFindPointerUse(UseOp, UseFP);
}
++CurrUse;
} // end while all USEs via CurrUse
CurrDef = this->GetFirstDef();
while (CurrDef != this->GetLastDef()) {
DefOp = CurrDef->GetOp();
if (DebugFlag) {
clc5q
committed
SMP_msg("SetImmedTypes DEF: ");
clc5q
committed
SMP_msg("\n");
clc5q
committed
if (DebugFlag) SMP_msg("FuncName: %s\n", this->BasicBlock->GetFunc()->GetFuncName());
if (DefOp.is_reg(X86_FLAGS_REG)) {
clc5q
committed
if (DebugFlag) SMP_msg("Setting flags reg DEF to NUMERIC\n");
CurrDef = this->SetDefType(DefOp, NUMERIC);
// No need to propagate this DEF type, as all flags will become NUMERIC.
}
#if 1
else if (DefOp.is_reg(R_sp) || (DefOp.is_reg(R_bp) && UseFP)) {
clc5q
committed
if (DebugFlag) SMP_msg("Setting reg DEF to STACKPTR\n");
CurrDef = this->SetDefType(DefOp, STACKPTR);
assert(CurrDef != this->Defs.GetLastRef());
// No need to propagate; all stack and frame pointers will become STACKPTR.
else if ((o_fpreg == DefOp.type) || (o_mmxreg == DefOp.type) || (o_xmmreg == DefOp.type)) {
clc5q
committed
if (DebugFlag) SMP_msg("Setting floating point reg DEF to NUMERIC\n");
CurrDef = this->SetDefType(DefOp, NUMERIC);
// No need to propagate; all FP reg uses will become NUMERIC anyway.
}
#if 0 // could these registers have pointers in them?
else if ((o_trreg == DefOp.type) || (o_dbreg == DefOp.type) || (o_crreg == DefOp.type)) {
clc5q
committed
if (DebugFlag) SMP_msg("Setting special reg DEF to NUMERIC\n");
CurrDef = this->SetDefType(DefOp, NUMERIC);
}
#endif
else if ((o_mem == DefOp.type) || (o_phrase == DefOp.type) || (o_displ == DefOp.type)) {
// For memory operands, we need to identify the POINTER value that
// is used in the addressing mode, if possible.
(void) this->MDFindPointerUse(DefOp, UseFP);
}
++CurrDef;
} // end while all DEFs via CurrDef
return;
} // end of SMPInstr::SetImmedTypes()
// Is the instruction a load from the stack?
void SMPInstr::MDFindLoadFromStack(bool UseFP) {
set<DefOrUse, LessDefUse>::iterator UseIter;
op_t UseOp;
if ((3 == this->OptType) && (this->HasSourceMemoryOperand())) {
// Loads and stores are OptCategory 3. We want only loads from the stack.
for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) {
UseOp = UseIter->GetOp();
if (MDIsStackAccessOpnd(UseOp, UseFP)) {
this->SetLoadFromStack();
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
break;
}
}
}
return;
} // end of SMPInstr::MDFindLoadFromStack()
// Determine if instr is inherently signed load instruction.
// True if sign or zero-extended; pass out mask bits if true.
bool SMPInstr::MDIsSignedLoad(unsigned short &SignMask) {
unsigned short opcode = this->SMPcmd.itype;
if (NN_movzx == opcode) {
SignMask = FG_MASK_UNSIGNED;
}
else if (NN_movsx == opcode) {
SignMask = FG_MASK_SIGNED;
}
else {
return false;
}
return true;
}
clc5q
committed
// true if increment or addition of small positive immediate value
#define STARS_SMALL_POS_VALUE_LIMIT 16
bool SMPInstr::MDIsSmallPositiveAddition(void) {
unsigned short opcode = this->SMPcmd.itype;
bool found = (NN_inc == opcode);
if (!found && ((NN_add == opcode) || (NN_adc == opcode))) {
clc5q
committed
set<DefOrUse, LessDefUse>::iterator UseIter;
for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) {
op_t UseOp = UseIter->GetOp();
if (o_imm == UseOp.type) {
uval_t ImmVal = UseOp.value;
if ((ImmVal <= STARS_SMALL_POS_VALUE_LIMIT) && (0 < ImmVal)) {
found = true;
break;
}
}
}
}
return found;
} // end of SMPInstr::MDIsSmallPositiveAddition()
// true if increment, decrement, or addition or subtraction of small immediate value
bool SMPInstr::MDIsSmallAdditionOrSubtraction(void) {
unsigned short opcode = this->SMPcmd.itype;
bool found = ((NN_inc == opcode) || (NN_dec == opcode));
if ((NN_add == opcode) || (NN_adc == opcode) || (NN_sub == opcode) || (NN_sbb == opcode)) {
set<DefOrUse, LessDefUse>::iterator UseIter;
for (UseIter = this->GetFirstUse(); !found && (UseIter != this->GetLastUse()); ++UseIter) {
op_t UseOp = UseIter->GetOp();
if (o_imm == UseOp.type) {
uval_t ImmVal = UseOp.value;
int SignedImmVal = (int) ImmVal;
found = ((SignedImmVal >= (-STARS_SMALL_POS_VALUE_LIMIT)) && (SignedImmVal <= STARS_SMALL_POS_VALUE_LIMIT));
}
}
}
return found;
} // end of SMPInstr::MDIsSmallAdditionOrSubtraction()
// Inst is move or register clear.
bool SMPInstr::MDIsSimpleAssignment(bool &ValueFound, uval_t &ConstValue) {
bool Simple = false;
clc5q
committed
ValueFound = false;
if (this->IsRegClearIdiom()) {
Simple = true;
ValueFound = true;
ConstValue = 0;
}
else if (this->MDIsMoveInstr()) {
Simple = true;
if (o_imm == this->MoveSource.type) {
ValueFound = true;
ConstValue = this->MoveSource.value;
}
}
return Simple;
} // end of SMPInstr::MDIsSimpleAssignment()
// Inst clears register or adds or subtracts small immediate value, as is done with counter variables.
bool SMPInstr::IsCounterOperation(void) {
bool CounterOperation = false;
bool ImmedValueFound = false;
uval_t ConstValue = 1;
if (this->MDIsSimpleAssignment(ImmedValueFound, ConstValue)) {
CounterOperation = (ImmedValueFound && (0 == ConstValue));
}
else {
CounterOperation = this->MDIsSmallAdditionOrSubtraction();
}