Skip to content
Snippets Groups Projects
SMPInstr.cpp 514 KiB
Newer Older
	// Next, handle repeat prefices in the instructions. The Intel REPE/REPZ prefix
	//  is just the text printed for SCAS/CMPS instructions that have a REP prefix.
	//  Only two distinct prefix codes are actually defined: REP and REPNE/REPNZ, and
	//  REPNE/REPNZ only applies to SCAS and CMPS instructions.
	bool HasRepPrefix = (0 != (this->SMPcmd.auxpref & aux_rep));
	bool HasRepnePrefix = (0 != (this->SMPcmd.auxpref & aux_repne));
	if (HasRepPrefix && HasRepnePrefix)
		SMP_msg("REP and REPNE both present at %x %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr()));
	if (HasRepPrefix || HasRepnePrefix) {
		// All repeating instructions use ECX as the countdown register.
		BaseOpnd.type = o_reg; // Change type and reg fields
		BaseOpnd.reg = R_cx;
		BaseOpnd.clr_showed();
clc5q's avatar
clc5q committed
		this->Defs.SetRef(BaseOpnd, NUMERIC);
		this->Uses.SetRef(BaseOpnd, NUMERIC);
	if ((opcode == NN_cmps) || (opcode == NN_scas) || (opcode == NN_movs) || (opcode == NN_stos)) {
		// ESI and EDI are USEd and DEFed to point to source and dest strings for CMPS/MOVS.
		//  Only EDI is involved with SCAS/STOS.
		BaseOpnd.type = o_reg; // Change type and reg fields
		BaseOpnd.clr_showed();
		if ((opcode == NN_cmps) || (opcode == NN_movs)) {
clc5q's avatar
clc5q committed
			this->Defs.SetRef(BaseOpnd, POINTER);
			this->Uses.SetRef(BaseOpnd, POINTER);
clc5q's avatar
clc5q committed
		this->Defs.SetRef(BaseOpnd, POINTER);
		this->Uses.SetRef(BaseOpnd, POINTER);
	else if ((NN_loopw <= opcode) && (NN_loopqne >= opcode)) {
		op_t LoopCounterOp = InitOp;
		LoopCounterOp.type = o_reg;
		LoopCounterOp.reg = R_cx;
		this->Defs.SetRef(LoopCounterOp, NUMERIC);
		this->Uses.SetRef(LoopCounterOp, NUMERIC);
	}
	// Now, handle special instruction categories that have implicit operands.
		// x86 Compare and Exchange conditionally sets EAX. We must keep data flow analysis
		//  sound by declaring that EAX is always a DEF.
		this->MDAddRegDef(R_ax, false);
	} // end if NN_cmpxchg
	else if (this->MDIsPopInstr() || this->MDIsPushInstr() || this->MDIsReturnInstr()) {
		// IDA does not include the stack pointer in the DEFs or USEs.
		this->MDAddRegDef(R_sp, false);
		this->MDAddRegUse(R_sp, false);
		if (!this->MDIsReturnInstr()) {
			// We always reference [esp+0] or [esp-4], so add it to the DEF or USE list.
			StackOp.type = o_displ;
			StackOp.reg = R_sp;
			if (this->MDIsPopInstr()) {
				StackOp.addr = 0;  // [ESP+0]
				this->Uses.SetRef(StackOp);  // USE
			}
			else {
				StackOp.addr = (ea_t) -4;  // [ESP-4]
				this->Defs.SetRef(StackOp); // DEF
			}
#if SMP_CALL_TRASHES_REGS
	else if ((this->type == CALL) || (this->type == INDIR_CALL) || this->IsTailCall()) {
		// We want to add the caller-saved registers to the USEs and DEFs lists
		this->MDAddRegDef(R_ax, false);
		this->MDAddRegDef(R_cx, false);
		this->MDAddRegDef(R_dx, false);
		this->MDAddRegUse(R_ax, false);
		this->MDAddRegUse(R_cx, false);
		this->MDAddRegUse(R_dx, false);
#if 1
			this->MDAddRegDef(R_bx, false);
			this->MDAddRegUse(R_bx, false);
			this->MDAddRegDef(R_si, false);
			this->MDAddRegUse(R_si, false);
	else if (this->MDIsEnterInstr() || this->MDIsLeaveInstr()) {
		// Entire function prologue or epilogue microcoded.
		this->MDAddRegDef(R_sp, false);
		this->MDAddRegUse(R_sp, false);
		this->MDAddRegDef(R_bp, false);
		this->MDAddRegUse(R_bp, false);
	else if ((opcode == NN_maskmovq) || (opcode == NN_maskmovdqu)) {
clc5q's avatar
clc5q committed
		this->MDAddRegUse(R_di, false, POINTER);
	else if (8 == this->GetOptType()) {
		// This category implicitly writes to EDX:EAX.
		this->MDAddRegDef(R_dx, false);
		this->MDAddRegDef(R_ax, false);
	} // end else if (8 == GetOptType)
	else if (7 == this->GetOptType()) {
		// Category 7 instructions sometimes write implicitly to EDX:EAX or DX:AX.
		//  DX is the same as EDX to IDA Pro (and SMP); ditto for EAX and AX.
		// DIV, IDIV, and MUL all have hidden EAX or AX operands (hidden in the IDA Pro
		//  sense, because they are not displayed in the disassembly text). For example:
		//  mul ebx means EDX:EAX <-- EAX*EBX, and mul bx means DX:AX <-- AX*BX. If the
		//  source operand is only 8 bits wide, there is room to hold the result in AX
		//  without using DX:  mul bl means AX <-- AL*BL.
		// IMUL has forms with a hidden EAX or AX operand and forms with no implicit
		//  operands:  imul ebx means EDX:EAX <-- EAX*EBX, but imul ebx,edx means that
		//  EBX*EDX gets truncated and the result placed in EBX (no hidden operands).
		for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
			op_t TempUse = this->SMPcmd.Operands[OpNum];
			if (!TempUse.showed()) { // hidden operand
				if (TempUse.is_reg(R_ax)) { // not R_al, so it is not 8 bits
					if ((NN_div == this->SMPcmd.itype) || (NN_idiv == this->SMPcmd.itype)) {
						this->MDAddRegUse(R_dx, false);
					}
					this->MDAddRegDef(R_ax, false);
					this->MDAddRegDef(R_dx, false);
				}
			}
		}
	} // end else if (7 == OptType)
#if 0
	// The floating point instructions in type categories 14 and 15 often USE and DEF
	//  the floating point register stack, e.g. pushing a value onto that stack is a
	//  massive copy downward of stack locations. We don't really care about the USE of
	//  the stack if the value being pushed came from elsewhere than the stack. For example,
	//  an "fld" opcode pushes its source onto the stack. We build RTLs with a simple
	//  move structure, but the RTL building can be fooled by seeing two "source" operands
	//  in the USE list.
	if ((14 == SMPTypeCategory[this->SMPcmd.itype])
		|| (15 == SMPTypeCategory[this->SMPcmd.itype])) {
	}
#endif

#if 0  // Not true for LOOP instructions that use only the ECX counter register.
	// The return value register EAX is not quite like a caller-save or callee-save
	//  register (technically, it is caller-save). Within a callee, it might appear
	//  that EAX has become dead by the time a return instruction is reached, but
	//  the USE that would make it not dead is in the caller. To prevent type inference
	//  from mistakenly thinking that all USEs of EAX have been seen in the callee,
	//  we add EAX to the USE list for all return instructions, as well as for all
	//  tail calls, which are essentially returns in terms of data flow analysis.
	// This USE of EAX will always be of type UNINIT unless its DEF has a known type
	//  that propagates to it. Thus, it will prevent an invalid back inference of the
	//  DEF type from "all" USE types that are visible in the callee; even if they
	//  were all NUMERIC, this return USE will be UNINIT and inhibit the invalid
	//  type inference. EAX could be loaded with a pointer from memory, for example,
	//  and USEd only in a comparison instruction, making it falsely appear to be
	//  a NUMERIC, without this extra USE at the return instruction.
	// Because some of the library functions pass values around in EBX, EDI, etc.,
	//  we will add these general purpose registers to the USE list for returns
	//  in order to prevent erroneous analyses of dead registers or unused
	//  metadata.
	if ((this->type == RETURN) || this->IsTailCall()) {
		this->MDAddRegUse(R_bx, false);
		this->MDAddRegUse(R_cx, false);
		this->MDAddRegUse(R_dx, false);
		if (!UseFP)
			this->MDAddRegUse(R_bp, false);
		this->MDAddRegUse(R_si, false);
		this->MDAddRegUse(R_di, false);
	// Next, add the flags register to the DEFs and USEs for those instructions that
	//  are marked as defining or using flags.
	if (!this->IsDefsFlags() && SMPDefsFlags[opcode]) {
		this->MDAddRegDef(X86_FLAGS_REG, false);
		this->SetDefsFlags();
	if (!this->IsUsesFlags() && SMPUsesFlags[opcode]) {
		this->MDAddRegUse(X86_FLAGS_REG, false);
		this->SetUsesFlags();
		// Clear the DEFs and USEs for no-ops.
		//  These include machine idioms for no-ops, e.g. mov esi,esi
		//  or xchg ax,ax or lea esi,[esi].
		this->Defs.clear();
		this->Uses.clear();
		this->MoveSource = InitOp;
		this->DEFMemOp = InitOp;
		this->USEMemOp = InitOp;
		this->LeaUSEMemOp = InitOp;
	if (DebugFlag) {
		SMP_msg("DEBUG after MDFixupDefUseLists:\n");
		this->Dump();
	}
	return;
} // end of SMPInstr::MDFixupDefUseLists()

// If we can definitely identify which part of the addressing expression
//  used in MemOp is the POINTER type, and it is not a STACKPTR or GLOBALPTR
//  immediate, set the USE type for that register to POINTER and return true.
//  If we can find definite NUMERIC addressing registers that are not already
//  typed as NUMERIC, set their USE types to NUMERIC and return true.
bool SMPInstr::MDFindPointerUse(op_t MemOp, bool UseFP) {
	bool changed = false;
	int BaseReg;
	int IndexReg;
	op_t BaseOp = InitOp;
	op_t IndexOp = InitOp;
	SMPOperandType BaseType = UNKNOWN;
	SMPOperandType IndexType = UNKNOWN;
	ushort ScaleFactor;
	ea_t offset;
	set<DefOrUse, LessDefUse>::iterator BaseIter;
	set<DefOrUse, LessDefUse>::iterator IndexIter;

	if (NN_lea == this->SMPcmd.itype)
		return false;  // lea instruction really has no memory operands
	if (NN_fnop == this->SMPcmd.itype)
		return false;  // SSA marker instruction

	MDExtractAddressFields(MemOp, BaseReg, IndexReg, ScaleFactor, offset);
	if (R_none != IndexReg) {
		IndexOp.type = o_reg;
		IndexOp.reg = MDCanonicalizeSubReg((ushort) IndexReg);
		IndexOp.dtyp = dt_dword; // Canonical 32-bit width
		IndexIter = this->FindUse(IndexOp);
		assert(IndexIter != this->GetLastUse());
		IndexType = IndexIter->GetType();
	}
	if (R_none != BaseReg) {
		BaseOp.type = o_reg;
		BaseOp.reg = MDCanonicalizeSubReg((ushort) BaseReg);
		BaseOp.dtyp = dt_dword; // Canonical 32-bit width
		BaseIter = this->FindUse(BaseOp);
		assert(BaseIter != this->GetLastUse());
		BaseType = BaseIter->GetType();
	}
	if ((R_sp == BaseReg) || (UseFP && (R_bp == BaseReg))) {
		if ((R_none != IndexReg) && (!IsNumeric(IndexType))) {
			// We have an indexed access into the stack frame.
			//  Set IndexReg USE type to NUMERIC.
			changed = true;
			IndexIter = this->SetUseType(IndexOp, NUMERIC);
			assert(IndexIter != this->GetLastUse());
		}
		return changed; // stack accesses will get STACKPTR type in SetImmedTypes()
	}
	if ((R_sp == IndexReg) || (UseFP && (R_bp == IndexReg))) {
		if ((R_none != BaseReg) && (!IsNumeric(BaseType))) {
			// We have an indexed access into the stack frame.
			//  Set BaseReg USE type to NUMERIC.
			// Note that BaseReg is really an IndexReg and vice versa.
			changed = true;
			BaseIter = this->SetUseType(BaseOp, NUMERIC);
			assert(BaseIter != this->GetLastUse());
			SMP_msg("WARNING: BaseReg is index, IndexReg is base: %s\n",
				DisAsmText.GetDisAsm(this->GetAddr()));
		}
		return changed; // stack accesses will get STACKPTR type in SetImmedTypes()
	}
	if (IsImmedGlobalAddress(offset)) {
		if ((R_none != IndexReg) && (!IsNumeric(IndexType))) {
			// We have an indexed access into a global.
			//  Set IndexReg USE type to NUMERIC.
			changed = true;
			IndexIter = this->SetUseType(IndexOp, NUMERIC);
			assert(IndexIter != this->GetLastUse());
		}
		if ((R_none != BaseReg) && (!IsNumeric(BaseType))) {
			// We have an indexed access into a global.
			//  Set BaseReg USE type to NUMERIC.
			// Note that BaseReg is really an index register.
			changed = true;
			BaseIter = this->SetUseType(BaseOp, NUMERIC);
			assert(BaseIter != this->GetLastUse());
clc5q's avatar
clc5q committed
#if SMP_VERBOSE_FIND_POINTERS
			SMP_msg("WARNING: BaseReg used as index: %s\n", DisAsmText.GetDisAsm(this->GetAddr()));
clc5q's avatar
clc5q committed
#endif
		return changed;  // global immediate is handled in SetImmedTypes()
	// At this point, we must have a base address in a register, not used
	//  to directly address the stack or a global.
	if ((0 < ScaleFactor) || (R_none == IndexReg)) {
		// IndexReg is scaled, meaning it is NUMERIC, so BaseReg must
		//  be a POINTER; or IndexReg is not present, so BaseReg is the
		//  only possible holder of an address.
		if (R_none != BaseReg) {
			if (UNINIT == BaseIter->GetType()) {
				BaseIter = this->SetUseType(BaseOp, POINTER);
				assert(BaseIter != this->GetLastUse());
		}
	}
	else if (R_none == BaseReg) {
		// We have an unscaled IndexReg and no BaseReg and offset was
		//  not a global offset, so IndexReg must be a POINTER.
		if (R_none != IndexReg) {
			if (UNINIT == IndexType) {
				IndexIter = this->SetUseType(IndexOp, POINTER);
				assert(IndexIter != this->GetLastUse());
			}
		}
	}
	else { // We have BaseReg and an unscaled IndexReg.
		// The only hope for typing something like [ebx+edx] is for
		//  one register to already be typed NUMERIC, in which case
		//  the other one must be a POINTER, or if one register is
		//  already POINTER, then the other one must be NUMERIC.
		if (IsNumeric(BaseType)) {
			if (UNINIT == IndexType) {
				// Set to POINTER or PROF_POINTER
				changed = true;
				IndexIter = this->SetUseType(IndexOp, POINTER);
				assert(IndexIter != this->GetLastUse());
			}
			else if (IsNumeric(IndexType)) {
				SMP_msg("ERROR: BaseReg and IndexReg both NUMERIC at %x: %s\n",
					this->address, DisAsmText.GetDisAsm(this->GetAddr()));
			if (UNINIT == BaseType) { // BaseReg is UNINIT
				if (IsNumeric(IndexType)) {
					BaseIter = this->SetUseType(BaseOp, POINTER);
					assert(BaseIter != this->GetLastUse());
				}
				else if (IsDataPtr(IndexType)) {
					// IndexReg is POINTER, so make BaseReg NUMERIC.
					changed = true;
					BaseIter = this->SetUseType(BaseOp, NUMERIC);
					assert(BaseIter != this->GetLastUse());
				}
			}
			else if (IsDataPtr(BaseType)) {
				// BaseReg was a pointer type. IndexReg must be NUMERIC.
				if (UNINIT == IndexType) {
					changed = true;
					IndexIter = this->SetUseType(IndexOp, NUMERIC);
					assert(IndexIter != this->GetLastUse());
				}
				else if (IsDataPtr(IndexType)) {
					SMP_msg("ERROR: BaseReg and IndexReg both POINTER at %x: %s\n",
						this->address, DisAsmText.GetDisAsm(this->GetAddr()));
		}
	}

	return changed;
} // end of SMPInstr::MDFindPointerUse()

// Are all DEFs typed to something besides UNINIT?
bool SMPInstr::AllDEFsTyped(void) {
	if (this->AreDEFsTyped()) {
		return true;
	}
	bool FoundUNINIT = false;
	set<DefOrUse, LessDefUse>::iterator DefIter;
	for (DefIter = this->GetFirstDef(); DefIter != this->GetLastDef(); ++DefIter) {
		if (IsEqType(UNINIT, DefIter->GetType())) {
			FoundUNINIT = true;
			break;
		}
	}
	if (!FoundUNINIT) {
		this->SetDEFsTyped();
	}
	return (!FoundUNINIT);
} // end of SMPInstr::AllDEFsTyped()

// Are all USEs typed to something besides UNINIT?
bool SMPInstr::AllUSEsTyped(void) {
	if (this->AreUSEsTyped()) {
		return true;
	}
	bool FoundUNINIT = false;
	set<DefOrUse, LessDefUse>::iterator UseIter;
	for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) {
		if (IsEqType(UNINIT, UseIter->GetType())) {
			FoundUNINIT = true;
			break;
		}
	}
	if (!FoundUNINIT) {
		this->SetUSEsTyped();
	}
	return (!FoundUNINIT);
} // end of SMPInstr::AllUSEsTyped()

// Return true if UseOp is a USE reg, not just an address reg in a memory USE
bool SMPInstr::IsNonAddressReg(op_t UseOp) const { 
	bool FoundUse = false;
	ushort SearchReg = MDCanonicalizeSubReg(UseOp.reg);
	for (size_t OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
		op_t Opnd = this->SMPcmd.Operands[OpNum];
		if (this->features & UseMacros[OpNum]) { // USE
			if (Opnd.type == o_reg) {
				ushort TestReg = MDCanonicalizeSubReg(Opnd.reg);
				if (TestReg == SearchReg) {
					FoundUse = true;
					break;
				}
			}
		}
	}
	return FoundUse;
} // end of SMPInstr::IsNonAddressReg()
uval_t SMPInstr::MDGetShiftCount(void) const {
	uval_t ShiftCount = 0;

	if (this->MDIsShiftOrRotate()) {
		SMPRegTransfer *CurrRT = this->RTL.GetRT(0);
		assert(CurrRT->HasRightSubTree());
		CurrRT = CurrRT->GetRightTree();
		op_t ShiftCountOp = CurrRT->GetRightOperand();
		if (o_imm == ShiftCountOp.type) {
			ShiftCount = ShiftCountOp.value;
		}
	}

	return ShiftCount;
} // end of SMPInstr::MDGetShiftCount()
// Is a sub-register of UseOp used as a shift counter in the RTL?
//  For example, UseOp could be ECX on an x86 machine, and CL
//  could be used as a shift or rotate counter.
bool SMPInstr::IsSubRegUsedAsShiftCount(op_t UseOp) {
	bool ShiftCounter = false;

	if ((o_reg == UseOp.type) && this->MDIsShiftOrRotate()) {
		SMPRegTransfer *CurrRT = this->RTL.GetRT(0);
		assert(CurrRT->HasRightSubTree());
		CurrRT = CurrRT->GetRightTree();
		op_t ShiftCountOp = CurrRT->GetRightOperand();
		if (o_reg == ShiftCountOp.type) {
			ushort UseReg = UseOp.reg;
			ushort ShiftCountReg = ShiftCountOp.reg;
			ushort WideUseReg = MDCanonicalizeSubReg(UseReg);
			ushort WideShiftCountReg = MDCanonicalizeSubReg(ShiftCountReg);
			if ((UseReg != ShiftCountReg) && (WideUseReg == WideShiftCountReg)) {
				// Registers were not equal, but their canonical enclosing
				//  registers are equal. Because shift counters that are not
				//  immediate are the 8-bit subregister in x86 (MD here !!!!!!)
				//  it must be that the ShiftCountReg is a subreg of UseReg.
				//  This is the condition we are looking for.
				ShiftCounter = true;
			}
		}
	}

	return ShiftCounter;
} // end of SMPInstr::IsSubRegUsedAsShiftCount()

// Does UseOp ultimately come from a set-condition-code instruction?
bool SMPInstr::IsOpSourceConditionCode(op_t UseOp, int UseSSANum) {
	bool FoundConditionalSetInst = false;
	bool LocalName = this->GetBlock()->IsLocalName(UseOp);
	ea_t UseAddr = this->GetAddr();
	ea_t UseDefAddr = this->GetBlock()->GetDefAddrFromUseAddr(UseOp, UseAddr, UseSSANum, LocalName);
	ea_t FirstFuncAddr = this->GetBlock()->GetFunc()->GetFirstFuncAddr();
clc5q's avatar
clc5q committed
	if ((o_reg != UseOp.type) || (UseDefAddr == (FirstFuncAddr - 1))) {
		// Cannot search for non-reg defs by SSANum reliably.
		//  FirstFuncAddr - 1 signifies the pseudo-inst to hold DEFs of regs
		//  that are LiveIn to the function; pseudo-inst is not a condition code set.
		FoundConditionalSetInst = false; 
	}
	else if (UseDefAddr < this->GetBlock()->GetFunc()->GetNumBlocks()) {
		// A block number was returned. That means the DEF is in a Phi Function.
		//  We could trace all Phi USEs and see if all of them come from condition codes
		//  but we only need one of the Phi USEs to come from
		//  a condition code to potentially lead to a false positive numeric error. We
		//  will recurse on all Phi USEs, declaring success if we find a single one of them
		//  to come from a condition code.
clc5q's avatar
clc5q committed
		size_t BlockNum = (size_t) UseDefAddr;
		assert(!LocalName);
		SMPBasicBlock *PhiDefBlock = this->GetBlock()->GetFunc()->GetBlockByNum(BlockNum);
		assert(NULL != PhiDefBlock);
		if (!PhiDefBlock->IsProcessed()) { // Prevent infinite recursion
			set<SMPPhiFunction, LessPhi>::iterator DefPhiIter = PhiDefBlock->FindPhi(UseOp);
			assert(DefPhiIter != PhiDefBlock->GetLastPhi());
			size_t PhiListSize = DefPhiIter->GetPhiListSize();
			PhiDefBlock->SetProcessed(true); // Prevent infinite recursion
			for (size_t UseIndex = 0; UseIndex < PhiListSize; ++UseIndex) {
				int PhiUseSSANum = DefPhiIter->GetUseSSANum(UseIndex);
				if (this->IsOpSourceConditionCode(UseOp, PhiUseSSANum)) {
					FoundConditionalSetInst = true; // only one success on all Phi USEs is needed
					break;
	}
	else {
		SMPInstr *DefInst = this->GetBlock()->GetFunc()->GetInstFromAddr(UseDefAddr);
		if (DefInst->MDIsAnySetValue()) {
			FoundConditionalSetInst = true;
		else if (DefInst->MDIsMoveInstr()) {
			op_t MoveUseOp = DefInst->GetMoveSource();
			if (o_reg == MoveUseOp.type) { // pattern is simple; don't try to follow through memory
				CanonicalizeOpnd(MoveUseOp);
				set<DefOrUse, LessDefUse>::iterator MoveUseIter = DefInst->FindUse(MoveUseOp);
				assert(MoveUseIter != DefInst->GetLastUse());
				int MoveUseSSANum = MoveUseIter->GetSSANum();
				FoundConditionalSetInst = DefInst->IsOpSourceConditionCode(MoveUseOp, MoveUseSSANum); // recurse
		}
		else {
			// Not a move, not a condition code transfer. We must return false.
			FoundConditionalSetInst = false;
		}
	}

	return FoundConditionalSetInst;
} // end of SMPInstr::IsOpSourceConditionCode()

// Does UseOp ultimately come from a move-with-zero-extension instruction?
clc5q's avatar
clc5q committed
bool SMPInstr::IsOpSourceZeroExtendedMove(op_t UseOp, int UseSSANum, bool TruncationCheck) {
	bool FoundMoveZX = false;
	bool LocalName = this->GetBlock()->IsLocalName(UseOp);
	ea_t UseAddr = this->GetAddr();
	ea_t UseDefAddr = this->GetBlock()->GetDefAddrFromUseAddr(UseOp, UseAddr, UseSSANum, LocalName);
	ea_t FirstFuncAddr = this->GetBlock()->GetFunc()->GetFirstFuncAddr();

clc5q's avatar
clc5q committed
	if ((o_reg != UseOp.type) || (UseDefAddr == (FirstFuncAddr - 1))) {
		// Cannot search for non-reg defs by SSANum reliably.
		//  FirstFuncAddr - 1 signifies the pseudo-inst to hold DEFs of regs
		//  that are LiveIn to the function; pseudo-inst is not a zero-extended load.
		FoundMoveZX = false; 
	}
	else if (UseDefAddr < this->GetBlock()->GetFunc()->GetNumBlocks()) {
		// A block number was returned. That means the DEF is in a Phi Function.
		//  We could trace all Phi USEs and see if all of them come from zero-extended
		//  moves into the UseOp register, but we only need one of the Phi USEs to come from
		//  a zero-extended move to potentially lead to a false positive numeric error. We
		//  will recurse on all Phi USEs, declaring success if we find a single one of them
		//  to come from a zero-extended move.
clc5q's avatar
clc5q committed
		size_t BlockNum = (size_t) UseDefAddr;
		assert(!LocalName);
		SMPBasicBlock *PhiDefBlock = this->GetBlock()->GetFunc()->GetBlockByNum(BlockNum);
		assert(NULL != PhiDefBlock);
		if (!PhiDefBlock->IsProcessed()) { // Prevent infinite recursion
			set<SMPPhiFunction, LessPhi>::iterator DefPhiIter = PhiDefBlock->FindPhi(UseOp);
			assert(DefPhiIter != PhiDefBlock->GetLastPhi());
			size_t PhiListSize = DefPhiIter->GetPhiListSize();
			PhiDefBlock->SetProcessed(true); // Prevent infinite recursion
			for (size_t UseIndex = 0; UseIndex < PhiListSize; ++UseIndex) {
				int PhiUseSSANum = DefPhiIter->GetUseSSANum(UseIndex);
				if (this->IsOpSourceZeroExtendedMove(UseOp, PhiUseSSANum, TruncationCheck)) {
					FoundMoveZX = true; // only one success on all Phi USEs is needed
					break;
		}
	}
	else {
		SMPInstr *DefInst = this->GetBlock()->GetFunc()->GetInstFromAddr(UseDefAddr);
		unsigned short SignMask;
		if (DefInst->MDIsSignedLoad(SignMask)) {
			FoundMoveZX = (FG_MASK_UNSIGNED == SignMask);
		}
		else if (DefInst->MDIsMoveInstr()) {
			op_t MoveUseOp = DefInst->GetMoveSource();
			if (o_reg == MoveUseOp.type) { // pattern is simple; don't try to follow through memory
				CanonicalizeOpnd(MoveUseOp);
				set<DefOrUse, LessDefUse>::iterator MoveUseIter = DefInst->FindUse(MoveUseOp);
				assert(MoveUseIter != DefInst->GetLastUse());
				int MoveUseSSANum = MoveUseIter->GetSSANum();
clc5q's avatar
clc5q committed
				FoundMoveZX = DefInst->IsOpSourceZeroExtendedMove(MoveUseOp, MoveUseSSANum, TruncationCheck); // recurse
			}
		}
		else if (TruncationCheck && DefInst->MDIsNonOverflowingBitManipulation()) {
clc5q's avatar
clc5q committed
			// Not a move, not a zero-extended move. We must return false for the non-truncation case,
			//  but we allow non-overflowing bit manipulation instructions in the chain for truncation checks.
			//  This is because of a benign code pattern:
			//   reg: = zero-extended move
			//   reg := reg AND bit pattern
			//   reg := reg OR bit pattern
			//   store lower bits of reg
			//  Compilers like to do 32-bit arithmetic. There was never any good reason otherwise to zero-extend the
			//   value in the first instruction in the pattern. The lower bits that are stored at the end of the code
			//   sequence are the only bits that ever mattered, so this is not really a truncation.
			set<DefOrUse, LessDefUse>::iterator BitUseIter = DefInst->FindUse(UseOp);
			if (BitUseIter != DefInst->GetLastUse()) {
				int BitUseSSANum = BitUseIter->GetSSANum();
				FoundMoveZX = DefInst->IsOpSourceZeroExtendedMove(UseOp, BitUseSSANum, true); // recurse up the chain
	return FoundMoveZX;
} // end of SMPInstr::IsOpSourceZeroExtendedMove()
// Does UseOp ultimately come from a move-with-zero-extension instruction OR from a condition code OR from a right shift?
bool SMPInstr::IsOpSourceZeroExtendedMoveShiftRightOrConditionCode(op_t UseOp, int UseSSANum, bool TruncationCheck) {
	bool FoundMoveZXCC = false;
	bool LocalName = this->GetBlock()->IsLocalName(UseOp);
	ea_t UseAddr = this->GetAddr();
	ea_t UseDefAddr = this->GetBlock()->GetDefAddrFromUseAddr(UseOp, UseAddr, UseSSANum, LocalName);
	ea_t FirstFuncAddr = this->GetBlock()->GetFunc()->GetFirstFuncAddr();

	if ((o_reg != UseOp.type) || (UseDefAddr == (FirstFuncAddr - 1))) {
		// Cannot search for non-reg defs by SSANum reliably.
		//  FirstFuncAddr - 1 signifies the pseudo-inst to hold DEFs of regs
		//  that are LiveIn to the function; pseudo-inst is not a zero-extended load.
		FoundMoveZXCC = false; 
	}
	else if (UseDefAddr < this->GetBlock()->GetFunc()->GetNumBlocks()) {
		// A block number was returned. That means the DEF is in a Phi Function.
		//  We could trace all Phi USEs and see if all of them come from zero-extended
		//  moves into the UseOp register, but we only need one of the Phi USEs to come from
		//  a zero-extended move to potentially lead to a false positive numeric error. We
		//  will recurse on all Phi USEs, declaring success if we find a single one of them
		//  to come from a zero-extended move.
		size_t BlockNum = (size_t) UseDefAddr;
		assert(!LocalName);
		SMPBasicBlock *PhiDefBlock = this->GetBlock()->GetFunc()->GetBlockByNum(BlockNum);
		assert(NULL != PhiDefBlock);
		if (!PhiDefBlock->IsProcessed()) { // Prevent infinite recursion
			set<SMPPhiFunction, LessPhi>::iterator DefPhiIter = PhiDefBlock->FindPhi(UseOp);
			assert(DefPhiIter != PhiDefBlock->GetLastPhi());
			size_t PhiListSize = DefPhiIter->GetPhiListSize();
			PhiDefBlock->SetProcessed(true); // Prevent infinite recursion
			for (size_t UseIndex = 0; UseIndex < PhiListSize; ++UseIndex) {
				int PhiUseSSANum = DefPhiIter->GetUseSSANum(UseIndex);
				if (this->IsOpSourceZeroExtendedMoveShiftRightOrConditionCode(UseOp, PhiUseSSANum, TruncationCheck)) {
					FoundMoveZXCC = true; // only one success on all Phi USEs is needed
					break;
				}
			}
		}
	}
	else {
		SMPInstr *DefInst = this->GetBlock()->GetFunc()->GetInstFromAddr(UseDefAddr);
		unsigned short SignMask;
		if (DefInst->MDIsSignedLoad(SignMask)) {
			FoundMoveZXCC = (FG_MASK_UNSIGNED == SignMask);
		}
		else if (DefInst->MDIsAnySetValue() || DefInst->MDIsShiftRight()) {
			FoundMoveZXCC = true;
		}
		else if (DefInst->MDIsMoveInstr()) {
			op_t MoveUseOp = DefInst->GetMoveSource();
			if (o_reg == MoveUseOp.type) { // pattern is simple; don't try to follow through memory
				CanonicalizeOpnd(MoveUseOp);
				set<DefOrUse, LessDefUse>::iterator MoveUseIter = DefInst->FindUse(MoveUseOp);
				assert(MoveUseIter != DefInst->GetLastUse());
				int MoveUseSSANum = MoveUseIter->GetSSANum();
				FoundMoveZXCC = DefInst->IsOpSourceZeroExtendedMoveShiftRightOrConditionCode(MoveUseOp, MoveUseSSANum, TruncationCheck); // recurse
			}
		}
clc5q's avatar
clc5q committed
		else if (TruncationCheck && (DefInst->MDIsNonOverflowingBitManipulation() || DefInst->MDIsSmallAdditionOrSubtraction())) {
			// Not a move, not a zero-extended move. We must return false for the non-truncation case,
			//  but we allow non-overflowing bit manipulation instructions in the chain for truncation checks.
			//  This is because of a benign code pattern:
			//   reg: = zero-extended move
			//   reg := reg AND bit pattern
			//   reg := reg OR bit pattern
			//   store lower bits of reg
			//  Compilers like to do 32-bit arithmetic. There was never any good reason otherwise to zero-extend the
			//   value in the first instruction in the pattern. The lower bits that are stored at the end of the code
			//   sequence are the only bits that ever mattered, so this is not really a truncation.
clc5q's avatar
clc5q committed
			// NOTE: We combine into this case additions or subtractions of small values, as they only operate on the
			//  lower bits of the register.
			set<DefOrUse, LessDefUse>::iterator BitUseIter = DefInst->FindUse(UseOp);
			if (BitUseIter != DefInst->GetLastUse()) {
				int BitUseSSANum = BitUseIter->GetSSANum();
				FoundMoveZXCC = DefInst->IsOpSourceZeroExtendedMoveShiftRightOrConditionCode(UseOp, BitUseSSANum, true); // recurse up the chain
			}
		}
		else {
			FoundMoveZXCC = false;
		}
	}

	return FoundMoveZXCC;
} // end of SMPInstr::IsOpSourceZeroExtendedMoveShiftRightOrConditionCode()
// Is opcode a shift or rotate?
// NOTE: We omit MMX/SSE unit shifts that do not use a general purpose
//  register as a shift counter, because right now this method is only
//  used as a helper for IsSubRegUsedAsShiftCount().
bool SMPInstr::MDIsShiftOrRotate(void) const {
	return (((NN_rcl <= SMPcmd.itype) && (NN_ror >= SMPcmd.itype))
		|| ((NN_sal <= SMPcmd.itype) && (NN_shr >= SMPcmd.itype))
		|| (NN_shld == SMPcmd.itype) || (NN_shrd == SMPcmd.itype));
} // end of SMPInstr::MDIsShiftOrRotate()
// Is opcode a shift to the right?
bool SMPInstr::MDIsShiftRight(void) const {
	return ((NN_sar == SMPcmd.itype) || (NN_shr == SMPcmd.itype));
}

// Does the shift or rotate RTL move the upper HalfBitWidth bits
//  into the lower half of the register? Or, if MustBeHalfRegWidth is false,
//  do we shift right by HalfBitWidth bits?
bool SMPInstr::ShiftMakesUpperBitsLower(size_t HalfBitWidth, bool MustBeHalfRegWidth) {
	if (!MustBeHalfRegWidth || (MD_NORMAL_MACHINE_BITWIDTH == (HalfBitWidth * 2))) {
		SMPRegTransfer *CurrRT = this->RTL.GetRT(0);
		if ((NULL != CurrRT) && (CurrRT->HasRightSubTree())) {
			CurrRT = CurrRT->GetRightTree();
			SMPoperator CurrOper =  CurrRT->GetOperator();
			bool LeftRotate = (SMP_ROTATE_LEFT == CurrOper);
			if ((SMP_U_RIGHT_SHIFT == CurrOper) || (SMP_S_RIGHT_SHIFT == CurrOper)
				|| LeftRotate || (SMP_ROTATE_RIGHT == CurrOper)) {
				if (CurrRT->HasRightSubTree()) { // double-word shift
					CurrRT = CurrRT->GetRightTree();
				}
				assert(!(CurrRT->HasRightSubTree()));
				op_t ShiftCount = CurrRT->GetRightOperand();
				if (o_imm == ShiftCount.type) {
					uval_t ImmVal = ShiftCount.value;
					// If we rotate left by e.g. 32-HalfBitWidth bits, then we are processing
					//  bytes or halfregs one at a time; if we rotate or shift right by HalfBitWidth,
					//  we are processing the register one HalfBitWidth at a time. We also a
					if (MustBeHalfRegWidth || (!LeftRotate)) {
						FullCircle = (HalfBitWidth == ImmVal);
					}
					else {
						// Left rotate amount plus HalfBitWidth must add up to full register width
						FullCircle = (MD_NORMAL_MACHINE_BITWIDTH == (ImmVal + HalfBitWidth));
					}
				}
			}
		}
	}
	return FullCircle;
} // SMPInstr::ShiftMakesUpperBitsLower()

#if 0
// Find SearchDelta in StackDeltaSet, inserting it if not found. Return whether it was initially found.
bool SMPInstr::FindStackPtrDelta(sval_t SearchDelta) const {
	bool found = (this->StackDeltaSet.find(SearchDelta) != this->StackDeltaSet.end());
	if (!found) {
		this->StackDeltaSet.insert(SearchDelta);
		if (SearchDelta < this->StackPtrOffset) {
			// Mimic IDA Pro, which seems to keep the biggest stack frame possible.
			//  With negative stack deltas, this means the smallest stack delta is kept.
			this->SetStackPtrOffset(SearchDelta);
		}
	}
	return found;
} // end of SMPInstr::FindStackPtrDelta()
#endif

// Set the type of all immediate operands found in the USE set.
// Set all flags and floating point register USEs and DEFs to NUMERIC also,
//  along with easily determined types for special cases.
void SMPInstr::SetImmedTypes(bool UseFP) {
	set<DefOrUse, LessDefUse>::iterator CurrUse;
	set<DefOrUse, LessDefUse>::iterator CurrDef;
	bool DebugFlag = false;
#if SMP_VERBOSE_DEBUG_BUILD_RTL
	DebugFlag = DebugFlag || (this->address == 0x805cd52) || (this->address == 0x805cd56);
	DebugFlag |= (0 == strncmp("__libc_csu_fini", this->BasicBlock->GetFunc()->GetFuncName(), 15));
#endif

	CurrUse = this->GetFirstUse();
	while (CurrUse != this->GetLastUse()) {
		UseOp = CurrUse->GetOp();
		if (DebugFlag) {
			PrintOperand(UseOp);
		if (o_imm == UseOp.type) {
			ImmVal = UseOp.value;
			if (IsImmedGlobalAddress((ea_t) ImmVal)) {
				if (DebugFlag) SMP_msg("Setting to GLOBALPTR\n");
				CurrUse = this->SetUseType(UseOp, GLOBALPTR);
#if 0
			else if (IsDataAddress((ea_t) ImmVal)) {
				// NOTE: We must call IsDataAddress() before we call IsImmedCodeAddress()
				//  to catch the data addresses within the code address range.
				if (DebugFlag) SMP_msg("Setting to POINTER\n");
				CurrUse = this->SetUseType(UseOp, POINTER);
			}
#endif
			else if (this->MDIsInterruptCall() || IsImmedCodeAddress((ea_t) ImmVal)) {
				if (DebugFlag) SMP_msg("Setting to CODEPTR\n");
				CurrUse = this->SetUseType(UseOp, CODEPTR);
				if (DebugFlag) SMP_msg("Setting to NUMERIC\n");
				CurrUse = this->SetUseType(UseOp, NUMERIC);
		else if (o_reg == UseOp.type) {
			if (UseOp.is_reg(X86_FLAGS_REG)) {
				if (DebugFlag) SMP_msg("Setting flags reg to NUMERIC\n");
				CurrUse = this->SetUseType(UseOp, NUMERIC);
			}
#if 1
			else if (UseOp.is_reg(R_sp) || (UseFP && UseOp.is_reg(R_bp))) {
				if (DebugFlag) SMP_msg("Setting reg to STACKPTR\n");
				CurrUse = this->SetUseType(UseOp, STACKPTR);
			}
#endif
		}
#if 0  // could these registers have pointers in them?
		else if ((o_trreg == UseOp.type) ||(o_dbreg == UseOp.type) || (o_crreg == UseOp.type)) {
			if (DebugFlag) SMP_msg("Setting special reg to NUMERIC\n");
			CurrUse = this->SetUseType(UseOp, NUMERIC);
		}
#endif
		else if ((o_fpreg == UseOp.type) || (o_mmxreg == UseOp.type) || (o_xmmreg == UseOp.type)) {
			if (DebugFlag) SMP_msg("Setting floating point reg to NUMERIC\n");
			CurrUse = this->SetUseType(UseOp, NUMERIC);
		}
		else if ((o_mem == UseOp.type) || (o_phrase == UseOp.type) || (o_displ == UseOp.type)) {
			// For memory operands, we need to identify the POINTER value that
			//  is used in the addressing mode, if possible.
			(void) this->MDFindPointerUse(UseOp, UseFP);
		}
		++CurrUse;
	} // end while all USEs via CurrUse

	CurrDef = this->GetFirstDef();
	while (CurrDef != this->GetLastDef()) {
		DefOp = CurrDef->GetOp();
		if (DebugFlag) {
			PrintOperand(DefOp);
		if (DebugFlag) SMP_msg("FuncName: %s\n", this->BasicBlock->GetFunc()->GetFuncName());
		if (o_reg == DefOp.type) {
			if (DefOp.is_reg(X86_FLAGS_REG)) {
				if (DebugFlag) SMP_msg("Setting flags reg DEF to NUMERIC\n");
				CurrDef = this->SetDefType(DefOp, NUMERIC);
				// No need to propagate this DEF type, as all flags will become NUMERIC.
			}
#if 1
			else if (DefOp.is_reg(R_sp) || (DefOp.is_reg(R_bp) && UseFP)) {
				if (DebugFlag) SMP_msg("Setting reg DEF to STACKPTR\n");
				CurrDef = this->SetDefType(DefOp, STACKPTR);
				assert(CurrDef != this->Defs.GetLastRef());
				// No need to propagate; all stack and frame pointers will become STACKPTR.
		else if ((o_fpreg == DefOp.type) || (o_mmxreg == DefOp.type) || (o_xmmreg == DefOp.type)) {
			if (DebugFlag) SMP_msg("Setting floating point reg DEF to NUMERIC\n");
			CurrDef = this->SetDefType(DefOp, NUMERIC);
			// No need to propagate; all FP reg uses will become NUMERIC anyway.
		}
#if 0  // could these registers have pointers in them?
		else if ((o_trreg == DefOp.type) || (o_dbreg == DefOp.type) || (o_crreg == DefOp.type)) {
			if (DebugFlag) SMP_msg("Setting special reg DEF to NUMERIC\n");
			CurrDef = this->SetDefType(DefOp, NUMERIC);
		}
#endif	
		else if ((o_mem == DefOp.type) || (o_phrase == DefOp.type) || (o_displ == DefOp.type)) {
			// For memory operands, we need to identify the POINTER value that
			//  is used in the addressing mode, if possible.
			(void) this->MDFindPointerUse(DefOp, UseFP);
		}
		++CurrDef;
	} // end while all DEFs via CurrDef
	return;
} // end of SMPInstr::SetImmedTypes()

// Is the instruction a load from the stack?
void SMPInstr::MDFindLoadFromStack(bool UseFP) {
	set<DefOrUse, LessDefUse>::iterator UseIter;
	op_t UseOp;

	if ((3 == this->OptType) && (this->HasSourceMemoryOperand())) {
		// Loads and stores are OptCategory 3. We want only loads from the stack.
		for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) {
			UseOp = UseIter->GetOp();
			if (MDIsStackAccessOpnd(UseOp, UseFP)) {
				this->SetLoadFromStack();
				break;
			}
		}
	}
	return;
} // end of SMPInstr::MDFindLoadFromStack()

// Determine if instr is inherently signed load instruction.
//  True if sign or zero-extended; pass out mask bits if true.
bool SMPInstr::MDIsSignedLoad(unsigned short &SignMask) {
	unsigned short opcode = this->SMPcmd.itype;
	if (NN_movzx == opcode) {
		SignMask = FG_MASK_UNSIGNED;
	}
	else if (NN_movsx == opcode) {
		SignMask = FG_MASK_SIGNED;
	}
	else {
		return false;
	}
	return true;
}

// true if increment or addition of small positive immediate value
#define STARS_SMALL_POS_VALUE_LIMIT 16
bool SMPInstr::MDIsSmallPositiveAddition(void) {
	unsigned short opcode = this->SMPcmd.itype;
	bool found = (NN_inc == opcode);

	if (!found && ((NN_add == opcode) || (NN_adc == opcode))) {
		set<DefOrUse, LessDefUse>::iterator UseIter;
		for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) {
			op_t UseOp = UseIter->GetOp();
			if (o_imm == UseOp.type) {
				uval_t ImmVal = UseOp.value;
				if ((ImmVal <= STARS_SMALL_POS_VALUE_LIMIT) && (0 < ImmVal)) {
					found = true;
					break;
				}
			}
		}
	}

	return found;
} // end of SMPInstr::MDIsSmallPositiveAddition()

// true if increment, decrement, or addition or subtraction of small immediate value
bool SMPInstr::MDIsSmallAdditionOrSubtraction(void) {
	unsigned short opcode = this->SMPcmd.itype;
	bool found = ((NN_inc == opcode) || (NN_dec == opcode));

	if ((NN_add == opcode) || (NN_adc == opcode) || (NN_sub == opcode) || (NN_sbb == opcode)) {
		set<DefOrUse, LessDefUse>::iterator UseIter;
		for (UseIter = this->GetFirstUse(); !found && (UseIter != this->GetLastUse()); ++UseIter) {
			op_t UseOp = UseIter->GetOp();
			if (o_imm == UseOp.type) {
				uval_t ImmVal = UseOp.value;
				int SignedImmVal = (int) ImmVal;
				found = ((SignedImmVal >= (-STARS_SMALL_POS_VALUE_LIMIT)) && (SignedImmVal <= STARS_SMALL_POS_VALUE_LIMIT));
			}
		}
	}

	return found;
} // end of SMPInstr::MDIsSmallAdditionOrSubtraction()

// Inst is move or register clear.
bool SMPInstr::MDIsSimpleAssignment(bool &ValueFound, uval_t &ConstValue) {
	bool Simple = false;
	if (this->IsRegClearIdiom()) {
		Simple = true;
		ValueFound = true;
		ConstValue = 0;
	}
	else if (this->MDIsMoveInstr()) {
		Simple = true;
		if (o_imm == this->MoveSource.type) {
			ValueFound = true;
			ConstValue = this->MoveSource.value;
		}
	}

	return Simple;
} // end of SMPInstr::MDIsSimpleAssignment()

// Inst clears register or adds or subtracts small immediate value, as is done with counter variables.
bool SMPInstr::IsCounterOperation(void) {
	bool CounterOperation = false;
	bool ImmedValueFound = false;
	uval_t ConstValue = 1;
	if (this->MDIsSimpleAssignment(ImmedValueFound, ConstValue)) {
		CounterOperation = (ImmedValueFound && (0 == ConstValue));
	}
	else {
		CounterOperation = this->MDIsSmallAdditionOrSubtraction();
	}