Skip to content
Snippets Groups Projects
SMPInstr.cpp 396 KiB
Newer Older
			}
			else {
				this->BasicBlock->GetFunc()->UpdateDefFGInfo(DefHashValue, FGEntry);
			}
		}
	} // end of case4 (function calls)
	else if (case5) { // signed or unsigned conditional set opcode
		if (UnsignedSetOpcode) {
			SignMask = FG_MASK_UNSIGNED;
		}
		else {
			assert(SignedSetOpcode);
			SignMask = FG_MASK_SIGNED;
		}
		// Find the flags USE.
		UseOp.type = o_reg; // set up a dummy op for searching
		UseOp.reg = X86_FLAGS_REG;
		UseIter = this->FindUse(UseOp);
		assert(UseIter != this->GetLastUse());
		UseOp = UseIter->GetOp(); // get full info in all fields of UseOp
		SSANum = UseIter->GetSSANum();
		LocalFlags = this->GetBlock()->IsLocalName(UseOp);

		DefAddr = this->GetBlock()->GetDefAddrFromUseAddr(UseOp, this->GetAddr(), SSANum, LocalFlags);
		// Pass DefAddr to recursive helper function to propagate signedness of the set opcode.
		this->GetBlock()->PropagateBranchSignedness(DefAddr, UseOp, SignMask);
	}
	else if (case6) { // sign extend to double the width of USE operand into DEF operand
		DefIter = this->GetFirstNonFlagsDef();
		assert(DefIter != this->GetLastDef());
		DefOp = DefIter->GetOp();
		assert(o_reg == DefOp.type);
		DefOp.reg = MDCanonicalizeSubReg(DefOp.reg);
		SSANum =  DefIter->GetSSANum();
		DefHashValue = HashGlobalNameAndSSA(DefOp, SSANum);
		UseIter = this->GetFirstUse();
		assert(UseIter != this->GetLastUse());
		UseOp = UseIter->GetOp();
		assert(o_reg == UseOp.type);
		UseOp.reg = MDCanonicalizeSubReg(UseOp.reg);
		assert(UseOp.reg == DefOp.reg);
		UseHashValue = HashGlobalNameAndSSA(UseOp, UseIter->GetSSANum()); 
		SignMask = FG_MASK_SIGNED;  // opcodes do sign extension => signed
		// Mark DEF and USE as signed.
		if (this->GetBlock()->IsLocalName(DefOp)) {
			this->GetBlock()->UpdateDefSignMiscInfo(DefHashValue, SignMask);
			this->GetBlock()->UpdateUseSignMiscInfo(UseHashValue, SignMask);
		}
		else {
			this->GetBlock()->GetFunc()->UpdateDefSignMiscInfo(DefHashValue, SignMask);
			this->GetBlock()->GetFunc()->UpdateUseSignMiscInfo(UseHashValue, SignMask);
		}
	}
	// For all register DEFs and USEs, we should get the obvious register width info
	//  updated. Need to use the RTL operands to get accurate widths.
	SMPRegTransfer *CurrRT;
	for (size_t index = 0; index < this->RTL.GetCount(); ++index) {
		CurrRT = this->RTL.GetRT(index);
		DefOp = CurrRT->GetLeftOperand();
		// Avoid setting def width for case 2; we leave it as zero so that
		//  later uses can determine whether the zero-extension or sign-extension
		//  bits ever got used. See more discussion in EmitIntegerErrorAnnotations()
		//  for the CHECK TRUNCATION case.
		// NOTE: case2 can be set to true even in the case1/case2 overlap case that
		//  only passes through the case1 code above. This is intentional. We want
		//  to leave the DEF width set to 0 for all of case2 including the case1 overlap.
		if (!case2) {
			if (MDIsGeneralPurposeReg(DefOp)) {
				WidthMask = ComputeOperandBitWidthMask(DefOp, 0);
				DefOp.reg = MDCanonicalizeSubReg(DefOp.reg);
				DefIter = this->FindDef(DefOp);
				assert(DefIter != this->GetLastDef());
				DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum());
				if (this->BasicBlock->IsLocalName(DefOp)) {
					this->BasicBlock->UpdateDefWidthTypeInfo(DefHashValue, WidthMask);
				}
				else {
					this->BasicBlock->GetFunc()->UpdateDefWidthTypeInfo(DefHashValue, WidthMask);
				}
			}
		}
		if (CurrRT->HasRightSubTree()) {
			this->MDSetRTLRegWidthInfo(CurrRT->GetRightTree());
		}
		else {
			UseOp = CurrRT->GetRightOperand();
			this->SetRTLUseOpRegWidthInfo(UseOp);
		}
	}  // end for all RTLs 

	return;
} // end of SMPInstr::MDSetWidthSignInfo()

// Infer sign from the SMP types for USEs and DEFs.
void SMPInstr::InferSignednessFromSMPTypes(bool UseFP) {
	// Start with registers only, infer that all kids of pointers are UNSIGNED.
	set<DefOrUse, LessDefUse>::iterator DefIter, UseIter;
	op_t DefOp, UseOp;
	int SSANum;
	int DefHashValue, UseHashValue;
	SMPOperandType DefType, UseType;
	unsigned short DefSignMiscInfo = FG_MASK_UNSIGNED, UseSignMiscInfo = FG_MASK_UNSIGNED;
	bool GlobalName;

	for (DefIter = this->GetFirstDef(); DefIter != this->GetLastDef(); ++DefIter) {
		DefOp = DefIter->GetOp();
		if (MDIsGeneralPurposeReg(DefOp)) {
			DefType = DefIter->GetType();
			if (IsDataPtr(DefType) || (CODEPTR == DefType)) {
				GlobalName = this->BasicBlock->GetFunc()->IsGlobalName(DefOp);
				SSANum = DefIter->GetSSANum();
				DefHashValue = HashGlobalNameAndSSA(DefOp, SSANum);
				if (GlobalName) {
					this->BasicBlock->GetFunc()->UpdateDefSignMiscInfo(DefHashValue, DefSignMiscInfo);
				}
				else {
					this->BasicBlock->UpdateDefSignMiscInfo(DefHashValue, DefSignMiscInfo);
				}
			}
		}
	}

	for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) {
		UseOp = UseIter->GetOp();
		if (MDIsGeneralPurposeReg(UseOp)) {
			UseType = UseIter->GetType();
			if (IsDataPtr(UseType) || (CODEPTR == UseType)) {
				GlobalName = this->BasicBlock->GetFunc()->IsGlobalName(UseOp);
				SSANum = UseIter->GetSSANum();
				UseHashValue = HashGlobalNameAndSSA(UseOp, SSANum);
				if (GlobalName) {
					this->BasicBlock->GetFunc()->UpdateUseSignMiscInfo(UseHashValue, UseSignMiscInfo);
				}
				else {
					this->BasicBlock->UpdateUseSignMiscInfo(UseHashValue, UseSignMiscInfo);
				}
			}
		}
	}

	return;
} // end of SMPInstr::InferSignednessFromSMPTypes()


// Helper to set width info for a UseOp from an RTL
void SMPInstr::SetRTLUseOpRegWidthInfo(op_t UseOp) {
	unsigned short WidthMask;
	set<DefOrUse, LessDefUse>::iterator UseIter;
	unsigned int UseHashValue;

	if (MDIsGeneralPurposeReg(UseOp)) {
		WidthMask = ComputeOperandBitWidthMask(UseOp, 0);
		UseOp.reg = MDCanonicalizeSubReg(UseOp.reg);
		UseIter = this->FindUse(UseOp);
		assert(UseIter != this->GetLastUse());
		UseHashValue = HashGlobalNameAndSSA(UseOp, UseIter->GetSSANum());
		if (this->BasicBlock->IsLocalName(UseOp)) {
			this->BasicBlock->UpdateUseWidthTypeInfo(UseHashValue, WidthMask);
		}
		else {
			this->BasicBlock->GetFunc()->UpdateUseWidthTypeInfo(UseHashValue, WidthMask);
		}
	}

	return;
} // end of SMPInstr::SetRTLUseOpRegWidthInfo()

// Walk the RTL and update the register USE operands' width info.
void SMPInstr::MDSetRTLRegWidthInfo(SMPRegTransfer *CurrRT) {
	op_t UseOp;

	UseOp = CurrRT->GetLeftOperand();
	this->SetRTLUseOpRegWidthInfo(UseOp);
	if (CurrRT->HasRightSubTree()) {
		this->MDSetRTLRegWidthInfo(CurrRT->GetRightTree());
	}
	else {
		UseOp = CurrRT->GetRightOperand();
		this->SetRTLUseOpRegWidthInfo(UseOp);
	}

	return;
} // end of SMPInstr::MDSetRTLRegWidthInfo()

// Do we not consider truncation on this type of instruction to be an error?
bool SMPInstr::IsBenignTruncation(void) {
	bool benign = false;
	unsigned short SignMask;
	op_t UseOp, SearchOp;

	if (3 == this->GetOptType()) { // Move instruction
		bool ExtendedLoad = this->MDIsSignedLoad(SignMask);
		if (ExtendedLoad) {
			if (SignMask & FG_MASK_UNSIGNED) {
				// We have a zero-extended load. Compilers zero-extend both
				//  signed (unfortunately) and unsigned sub-regs when they know
				//  from the source language types that only the lower bits matter,
				//  e.g. when a char has been stored in the lower bits and regardless
				//  of whether that char was sign-extended or zero-extended previously,
				//  only the char itself is useful info. Otherwise, the compiler could
				//  move the whole register, e.g. instead of edi := zero-extend(cx), the
				//  compiler could have generated edi := ecx. The zero-extension loads
				//  are therefore not good candidates for truncation checks, as they lead
				//  to lots of false positives.
				benign = true;
#if SMP_MEASURE_NUMERIC_ANNOTATIONS
				++BenignTruncationCount;
#endif
			}
		}
		else { // Move, and not extended load, which was handled above.
			// Next case: A move instruction whose USE falsely appears to be a truncation,
			//  but in fact the apparently unused portion of the register is used later, e.g.:
			//   mov [ebp-12],ax  ; looks like EAX is being truncated to AX and stored
			//   shr eax,16       ; gets upper 16 bits into lower 16 bits
			//   mov [ebp-14],ax  ; store what used to be the upper 16 bits of EAX
			// The first instruction will trigger a CHECK TRUNCATION annotation that
			//  causes false positives. We need to analyze the context of the instruction
			//  to see that the whole register EAX was used, so no truncation occurred.
			//  The context analysis in the basic block will mark the second move as
			//  a "truncation" that should be ignored, so we check the flag here to short
			//  circuit redundant analysis.
			UseOp = this->GetMoveSource();
			assert(o_void != UseOp.type);
			SearchOp = UseOp;
			if (o_reg == UseOp.type) {
				SearchOp.reg = MDCanonicalizeSubReg(UseOp.reg);
			}
			set<DefOrUse, LessDefUse>::iterator UseIter = this->FindUse(SearchOp);
			assert(UseIter != this->GetLastUse());
			if (UseIter->DoesNotTruncate()) {
				benign = true;
#if SMP_MEASURE_NUMERIC_ANNOTATIONS
				++SuppressTruncationRegPiecesAllUsed;
#endif
			}
			else {
				set<DefOrUse, LessDefUse>::iterator DefIter = this->GetFirstNonFlagsDef();
				assert(DefIter != this->GetLastDef());
				int DefSSANum = DefIter->GetSSANum();
				benign = this->GetBlock()->IsBenignTruncationDEF(DefIter->GetOp(), DefSSANum, this->GetAddr());
			}
		}
	}
	return benign;
} // end of SMPInstr::IsBenignTruncation()

// Do we not consider overflow or underflow on this type of instruction to be an error?
bool SMPInstr::IsBenignOverflow(void) {
	bool benign = false;
	set<DefOrUse, LessDefUse>::iterator DefIter;
	SMPOperandType DefType;
	int DefSSANum;
	ea_t DefAddr;
	op_t DefOp;

	if (this->MDIsDefiniteBenignUnderflowOpcode()) {
		// No further analysis at block or function scope is needed.
		benign = true;
	}
	else if (this->MDIsMaybeBenignUnderflowOpcode()) { // might have the subtract instruction
		// We are looking to suppress overflow and underflow warnings on the following
		//  code sequence: PTR1-PTR2+1 gets a loop invariant code motion optimization
		//  that pulls  temp := 1-PTR2 out of the loop, and leaves temp2 := PTR1+temp
		//  inside the loop. The hoisted subtraction could underflow, and the addition
		//  that is not hoisted could overflow. The net effect of these two instructions
		//  is benign, however, so we want to suppress underflow and overflow checks on
		//  both of them, but only if we can match the pair of instructions.
		DefIter = this->GetFirstNonFlagsDef();
		assert(DefIter != this->GetLastDef());
		if (DefIter->DoesNotOverflow()) {
			benign = true; // short circuit; already analyzed
		}
		else {
			DefType = DefIter->GetType();
			if (IsEqType(DefType, NEGATEDPTR)) { // We have candidate subtract instruction
				DefSSANum = DefIter->GetSSANum();
				DefAddr = this->GetAddr();
				DefOp = DefIter->GetOp();
				benign = this->GetBlock()->GetFunc()->IsBenignUnderflowDEF(DefOp, DefSSANum, DefAddr);
			}
		}
	}
	else if (this->MDIsMaybeBenignOverflowOpcode()) { // might have the add instruction
		DefIter = this->GetFirstNonFlagsDef();
		assert(DefIter != this->GetLastDef());
		if (DefIter->DoesNotOverflow()) {
			benign = true; // short circuit; already analyzed
		}
		else { // Bad luck to encounter addition first. See what types tell us.
			if (this->HasNegatedPtrUSE()) { // We have candidate for addition.
				DefType = DefIter->GetType();
				if (IsEqType(DefType, PTROFFSET)) {
					// Current instruction is definitely the addition instruction of some
					//  benignly underflowing and overflowing pair of instructions. It does
					//  not really matter that we have not found the subtraction yet; we will
					//  get to it eventually. We should suppress overflow checks on this inst.
					DefOp = DefIter->GetOp();
					this->Defs.SetNoOverflow(DefOp, true);
					benign = true;
				}
				else {
					// A bit of a quandary. Ideally, we would have successful SMP type inference
					//  and always have PTROFFSET := POINTER + NEGATEDPTR. However, sometimes we
					//  might have ?? := ?? + NEGATEDPTR. The instruction could be of the type
					//  NEGATEDPTR := NUMERIC + NEGATEDPTR, so we cannot just assume here that 
					//  we have detected the benign case. We will be safe and do nothing.
					;
				}
			}
		}
	}

	return benign;
} // end of SMPInstr::IsBenignOverflow()

// Do we detect a situation in which it is safe to check for signedness errors on
//  the stack write (return false), or not (return true to be safe).
bool SMPInstr::SkipSignednessCheckOnStackWrite(int DefSSANum) {
	bool SkipCheck = true;
	op_t StackDefOp = this->DEFMemOp;
	size_t DefBitWidth = 8 * GetOpDataSize(StackDefOp);
	if (DefBitWidth < MD_NORMAL_MACHINE_BITWIDTH) {
		// If we are not dealing with a shortened bit width, then
		//  the next load from the stack will not be sign-extended
		//  or zero-extended.
		if (this->GetBlock()->IsStackOpNextUsedWithSignedness(StackDefOp, this->GetAddr(), DefSSANum)) {
			SkipCheck = false;
		}
	}
	return SkipCheck;
} // end of SMPInstr::SkipSignednessCheckOnStackWrite()

// Does inst pass an outgoing argument?
bool SMPInstr::MDIsArgumentPass(void) {
	bool OutArgPass = false;

	// Current model is writing outargs to stack. For other compiler targets in the
	//  future, we would also include pushes onto the stack.
	if (this->HasDestMemoryOperand() && (this->GetOptType() == 3)) { // move to memory
		if (this->GetBlock()->GetFunc()->OutArgsRegionComputed()) {
			op_t DefOp = this->DEFMemOp;
			OutArgPass = this->GetBlock()->GetFunc()->IsInOutgoingArgsRegion(DefOp);
		}
	}
	return OutArgPass;
}

// Trace UseOp through register moves back to its stack location or immediate value source.
//  Return true if we are passing an immediate or stack location back in UltSource.
bool SMPInstr::TraceUltimateMoveSource(op_t UseOp, int UseSSANum, op_t &UltSource) {
	// If we hit an immediate value or a stack location, we are done.
	bool UseFP = this->GetBlock()->GetFunc()->UsesFramePointer();
	op_t NewUseOp; // next UseOp up the move chain
	int NewUseSSANum;
	set<DefOrUse,LessDefUse>::iterator UseIter;
	bool LocalName;
	ea_t DefAddr;
	SMPInstr *DefInst;
	bool StackOp = MDIsStackAccessOpnd(UseOp, UseFP);
	bool RegisterOp = (UseOp.type == o_reg);
	if (this->GetOptType() == 3) { // move instruction
		if (UseOp.type == o_imm) {
			UltSource = UseOp;
			return true;
		}
		else if ((!RegisterOp) && (!StackOp)) {
			// We only trace the move chain through registers or stack locations to an ultimate 
			//  load-effective-address of a stack location, or a move of an immediate value.
			return false;
		}
	} 
	else if (!this->MDIsLoadEffectiveAddressInstr()) {
	else { // Load effective address instruction.
		// If it is a stack location being loaded, trace succeeded, else it failed.
		if (StackOp) {
			UltSource = UseOp;
			return true;
		}
		else {
			return false;
		}
	}

	// If we reach this point, we have a move instruction but did not return true or false above.
	// Recursion case. Going back up the move chain has just produced a register or
	//  a stack location, and we still need to find the stack address or immediate value
	//  that was stored in the register or stack location. The stack location could hold
	//  a pointer to a stack object, produced by an earlier LEA instruction, or it
	//  could hold an immediate value (e.g. constant size argument passed to memset() or
	//  similar function).
	LocalName = this->GetBlock()->IsLocalName(UseOp);
	DefAddr = this->GetBlock()->GetDefAddrFromUseAddr(UseOp, this->GetAddr(), UseSSANum, LocalName);
	if ((BADADDR == DefAddr) || (DefAddr < (this->GetBlock()->GetFunc()->GetStartAddr() - 1))) {
		// Def was not found, or was found in Phi function (DefAddr was block number, not instruction addr).
		return false;
	}
	if (DefAddr < (this->GetBlock()->GetFirstAddr())) {
		// If DefAddr is 1 byte less than the first addr in the block, then
		//  it is a pseudo-def in the global DU chains, signifying that the
		//  value was LiveIn and the true DEF is in another block. We could
		//  handle this in the future, but right now we will only deal with
		//  the simpler case in which the move source can be traced
		//  within the basic block.
		return false;
	}

	// Everything is OK so far; time to recurse up the move chain.
	DefInst = this->GetBlock()->GetFunc()->GetInstFromAddr(DefAddr);
	if (DefInst->GetOptType() == 3) {
		NewUseOp = DefInst->GetMoveSource();
	}
	else if (DefInst->MDIsLoadEffectiveAddressInstr()) {
		NewUseOp = DefInst->GetLeaMemUseOp();
		if (MDIsStackAccessOpnd(NewUseOp, UseFP)) {
			UltSource = NewUseOp;
			return true;
		}
		else {
			return false;
		}
	}
	// We don't have a move instruction or a load effective address instruction, which
	//  can be used to move a stack address into a register. We don't try to trace through
	//  arithmetic except for two easy cases.
	// Case 1: A register is cleared. Same as assigning immediate value zero to the reg.
	else if (DefInst->IsRegClearIdiom()) {
		UltSource.type = o_imm;
		UltSource.value = 0;  // why would we memset a zero byte region?
	// Easy arithmetic Case 2: we have reg += ImmediateValue, and reg was DEFed by reg := LEA(StackLoc).
	else if (DefInst->MDIsAddImmediateToReg(DefOp, ImmOp)) {
		SMPInstr *NewDefInst;
		UseIter = DefInst->FindUse(DefOp);
		assert(UseIter != DefInst->GetLastUse());
		NewUseSSANum = UseIter->GetSSANum();
		LocalName = DefInst->GetBlock()->IsLocalName(DefOp);
		DefAddr = DefInst->GetBlock()->GetDefAddrFromUseAddr(DefOp, DefInst->GetAddr(), NewUseSSANum, LocalName);
		if ((BADADDR == DefAddr) || (DefAddr < (DefInst->GetBlock()->GetFunc()->GetStartAddr() - 1))) {
			// Def was not found, or was found in Phi function (DefAddr was block number, not instruction addr).
			return false;
		}
		NewDefInst = DefInst->GetBlock()->GetFunc()->GetInstFromAddr(DefAddr);
		if (NewDefInst->MDIsLoadEffectiveAddressInstr()) {
			NewUseOp = NewDefInst->GetLeaMemUseOp();
			if (MDIsStackAccessOpnd(NewUseOp, UseFP)) {
				// We have the code sequence we were searching for when we first saw the
				//  addition of an immediate value to a register, e.g.:
				//   lea ebx,[ebp-2000]
				//   add ebx,1000
				//
				// We are essentially making this sequence into a single instruction:
				//   lea ebx,[ebp-1000]
				//  by adding the immediate value to the address offset. With a stack that grows
				//  downward, it does not matter if we add 1000 to [esp+500] to produce [esp+1500],
				//  or we add 1000 to [ebp-2000] to make [ebp-1000]. Either way, we are simulating the
				//  addition of 1000 as we move up in the stack frame.
				NewUseOp.addr += ImmOp.value; // perform the address arithmetic addition
				UltSource = NewUseOp;
				return true;
			}
			else {
				return false;
			}
		}
		else {
			return false;
		}
	}
	else {
		// Not the kind of instruction we need; cut short the recursion.
		return false;
	}

	// NewUseOp is the move source operand that we seek.
	UseIter = DefInst->FindUse(NewUseOp);
	assert(UseIter != DefInst->GetLastUse());
	NewUseSSANum = UseIter->GetSSANum();  // unused for immediates, used for regs and stack
	// Recurse
	return DefInst->TraceUltimateMoveSource(NewUseOp, NewUseSSANum, UltSource);

} // end of SMPInstr::TraceUltimateMoveSource()

// Infer DEF, USE, and RTL SMPoperator types within the instruction based on the type
//  of operator, the type category of the instruction, and the previously known types 
//  of the operands.
bool SMPInstr::InferTypes(void) {
	bool changed = false;  // return value
	int TypeCategory = SMPTypeCategory[this->SMPcmd.itype];
	set<DefOrUse, LessDefUse>::iterator CurrDef;
	set<DefOrUse, LessDefUse>::iterator CurrUse;
	op_t DefOp = InitOp, UseOp = InitOp;
	bool DebugFlag = false;
	bool UseFP = this->BasicBlock->GetFunc()->UsesFramePointer();
	bool SafeFunc = this->BasicBlock->GetFunc()->IsSafe();
#if SMP_VERBOSE_DEBUG_INFER_TYPES
	DebugFlag |= (0 == strcmp("InputMove", this->BasicBlock->GetFunc()->GetFuncName()));
	if (DebugFlag) {
		SMP_msg("opcode: %d TypeCategory: %d\n", this->SMPcmd.itype, TypeCategory);

	// If we are already finished with all types, return false.
	if (this->IsTypeInferenceComplete())
	if (this->AllDEFsTyped() && this->AllUSEsTyped()) {
		this->SetTypeInferenceComplete();
	if (this->HasDestMemoryOperand()) {
		changed |= this->MDFindPointerUse(this->MDGetMemDefOp(), UseFP);
	}
	if (this->HasSourceMemoryOperand()) {
		changed |= this->MDFindPointerUse(this->MDGetMemUseOp(), UseFP);
	}

	// The control flow instructions can be handled simply based on their type
	//  and do not need an RTL walk.
	SMPitype DFAType = this->GetDataFlowType();
	bool CallInst = ((DFAType == CALL) || (DFAType == INDIR_CALL) || this->IsTailCall());
	ushort IndirCallReg = R_none;
		SMP_msg("DFAType: %d  CategoryInferenceComplete: %d\n",
			DFAType, this->IsCategoryInferenceComplete());
	if (DFAType == INDIR_CALL) {
		op_t TargetOp = this->SMPcmd.Operands[0];
		if (TargetOp.type == o_reg)
			IndirCallReg = TargetOp.reg;
	}
	if (((DFAType >= JUMP) && (DFAType <= INDIR_CALL)) || this->IsTailCall()) {
		// All USEs are either the flags (NUMERIC) or the target address (CODEPTR).
		//  The exceptions are the USE list for interrupt calls, which includes
		//  the caller-saved regs, and indirect calls through a memory
		//  operand, such as call [ebx+esi+20h], where the memory operand
		//  is a CODEPTR but the addressing registers are a BaseReg and
		//  IndexReg as in any other memory addressing, and the saved
		//  regs on any call.
		CurrUse = this->GetFirstUse();
		while (CurrUse != this->GetLastUse()) {
			UseOp = CurrUse->GetOp();
			if (UseOp.is_reg(X86_FLAGS_REG))
				CurrUse = this->SetUseType(UseOp, NUMERIC);
			else if ((CurrUse->GetType() != CODEPTR)
				&& (!(this->MDIsInterruptCall() && (o_reg == UseOp.type)))
				&& (!(this->HasSourceMemoryOperand() 
					&& (INDIR_CALL == this->GetDataFlowType())
					&& (o_reg == UseOp.type)))) {
				CurrUse = this->SetUseType(UseOp, CODEPTR);
					// If the call is to malloc(), then the DEF of the return
					//  register is of type HEAPPTR.
					//  ****!!!!**** Could have INDIR_CALL resolved to malloc.
					changed |= this->MDFindMallocCall(UseOp);
				}
			else if ((CurrUse->GetType() != CODEPTR) && CallInst
				&& UseOp.is_reg(IndirCallReg)) {

				CurrUse = this->SetUseType(UseOp, CODEPTR);
			}
		this->SetTypeInferenceComplete();
		return true;
	}

	// First, see if we can infer something about DEFs and USEs just from the 
	//  type category of the instruction.
	if (!this->IsCategoryInferenceComplete()) {
		bool MemPropagate = false;
		switch (TypeCategory) {
			case 0: // no inference possible just from type category
			case 1: // no inference possible just from type category
			case 3:  // MOV instructions; inference will come from source to dest in RTL walk.
			case 5:  // binary arithmetic; inference will come in RTL walk.
			case 10:  // binary arithmetic; inference will come in RTL walk.
			case 11:  // push and pop instructions; inference will come in RTL walk.
			case 12:  // exchange instructions; inference will come in RTL walk.
				this->SetCategoryInferenceComplete();
				break;

			case 2: // Result type is always NUMERIC.
			case 7: // Result type is always NUMERIC.
			case 8: // Result type is always NUMERIC.
			case 9: // Result type is always NUMERIC.
			case 13: // Result type is always NUMERIC.
			case 14: // Result type is always NUMERIC.
			case 15: // Result type is always NUMERIC.
				CurrDef = this->GetFirstDef();
				while (CurrDef != this->GetLastDef()) {
					if (!IsEqType(NUMERIC, CurrDef->GetType())) {
						DefOp = CurrDef->GetOp();
						SSANum = CurrDef->GetSSANum();
						CurrDef = this->SetDefType(DefOp, NUMERIC);
						changed = true;
						// Be conservative and only propagate register DEFs and SAFE stack locs. We
						//  can improve this in the future. **!!**
						bool IsMemOp = (o_reg != DefOp.type);
						bool MemPropagate = MDIsStackAccessOpnd(DefOp, UseFP);
#if SMP_PROPAGATE_MEM_TYPES
						;
#else
						// Be conservative and only propagate register DEFs and SAFE stack locs.
						//  We can improve this in the future. **!!**
						MemPropagate = MemPropagate && SafeFunc;
#endif
						if ((o_reg == DefOp.type) || MemPropagate) {
							if (this->BasicBlock->IsLocalName(DefOp)) {
								(void) this->BasicBlock->PropagateLocalDefType(DefOp, NUMERIC,
									this->GetAddr(), SSANum, IsMemOp);
							}
							else { // global name
								this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false
								(void) this->BasicBlock->PropagateGlobalDefType(DefOp, NUMERIC,
				this->SetCategoryInferenceComplete();
			case 4: // Unary INC, DEC, etc.: dest=source, so type remains the same
				assert(1 == this->RTL.GetCount());
				assert(this->RTL.GetRT(0)->HasRightSubTree());
				UseOp = this->RTL.GetRT(0)->GetLeftOperand(); // USE == DEF
				CurrUse = this->Uses.FindRef(UseOp);
				assert(CurrUse != this->GetLastUse());
				if (UNINIT != CurrUse->GetType()) {
					// Only one USE, and it has a type assigned, so assign that type
					// to the DEF.
					CurrDef = this->GetFirstDef();
					while (CurrDef != this->GetLastDef()) {
						// Two DEFs: EFLAGS is NUMERIC, dest==source
						DefOp = CurrDef->GetOp();
						SSANum = CurrDef->GetSSANum();
						if (DefOp.is_reg(X86_FLAGS_REG)) {
							; // SetImmedTypes already made it NUMERIC
							CurrDef = this->SetDefType(DefOp, CurrUse->GetType());
							// Be conservative and only propagate register DEFs and SAFE stack locs. We
							//  can improve this in the future. **!!**
							bool IsMemOp = (o_reg != DefOp.type);
							MemPropagate = MDIsStackAccessOpnd(DefOp, UseFP);
#if SMP_PROPAGATE_MEM_TYPES
							;
#else
							// Be conservative and only propagate register DEFs and SAFE stack locs.
							//  We can improve this in the future. **!!**
							MemPropagate = MemPropagate && SafeFunc;
#endif
							if ((o_reg == DefOp.type) || MemPropagate) {
								if (this->BasicBlock->IsLocalName(DefOp)) {
									(void) this->BasicBlock->PropagateLocalDefType(DefOp, CurrUse->GetType(),
										this->GetAddr(), SSANum, IsMemOp);
								}
								else { // global name
									this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false
									(void) this->BasicBlock->PropagateGlobalDefType(DefOp, CurrUse->GetType(),
					this->SetCategoryInferenceComplete();
					changed = true;
					this->SetTypeInferenceComplete();
				}
				break;

			case 6: // Result is always POINTER
				DefOp = this->GetFirstDef()->GetOp();
				SSANum = this->GetFirstDef()->GetSSANum();
				CurrDef = this->SetDefType(DefOp, POINTER);
				this->SetCategoryInferenceComplete();
				changed = true;
				// Be conservative and only propagate register DEFs and SAFE stack locs. We
				//  can improve this in the future. **!!**
				IsMemOp = (o_reg != DefOp.type);
				MemPropagate = MDIsStackAccessOpnd(DefOp, UseFP);
#if SMP_PROPAGATE_MEM_TYPES
				;
#else
				// Be conservative and only propagate register DEFs and SAFE stack locs.
				//  We can improve this in the future. **!!**
				MemPropagate = MemPropagate && SafeFunc;
#endif
				if ((o_reg == DefOp.type) || MemPropagate)  {
					if (this->BasicBlock->IsLocalName(DefOp)) {
						(void) this->BasicBlock->PropagateLocalDefType(DefOp, POINTER,
							this->GetAddr(), SSANum, IsMemOp);
					}
					else { // global name
						this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false
						(void) this->BasicBlock->PropagateGlobalDefType(DefOp, POINTER,
				SMP_msg("ERROR: Unknown type category for %s\n", DisAsmText.GetDisAsm(this->GetAddr()));
				this->SetCategoryInferenceComplete();
				break;
		} // end switch on TypeCategory
	} // end if (!CategoryInference)

	// Walk the RTL and infer types based on operators and operands.
	if (DebugFlag) {
		SMP_msg("RTcount: %zu\n", this->RTL.GetCount());
	for (size_t index = 0; index < this->RTL.GetCount(); ++index) {
		SMPRegTransfer *CurrRT = this->RTL.GetRT(index);
		if (SMP_NULL_OPERATOR == CurrRT->GetOperator()) // nothing to infer
			continue;
clc5q's avatar
clc5q committed
		if (!(CurrRT->IsTypeInferenceComplete())) {
			changed |= this->InferOperatorType(CurrRT);
		}
		if (DebugFlag) {
			SMP_msg("returned from InferOperatorType\n");
	} // end for all RTs in the RTL
	return changed;
} // end of SMPInstr::InferTypes()

// Infer the type of an operator within an RT based on the types of its operands and
//  based on the operator itself. Recurse down the tree if necessary.
// Return true if the operator type of the RT is updated.
bool SMPInstr::InferOperatorType(SMPRegTransfer *CurrRT) {
	bool updated = false;
	bool LeftNumeric, RightNumeric, OperNumeric;
	bool LeftPointer, RightPointer, OperPointer;
	bool UseFP = this->BasicBlock->GetFunc()->UsesFramePointer();
	bool SafeFunc = this->BasicBlock->GetFunc()->IsSafe();
	set<DefOrUse, LessDefUse>::iterator CurrDef;
	set<DefOrUse, LessDefUse>::iterator CurrUse;
	set<DefOrUse, LessDefUse>::iterator LeftUse;
	set<DefOrUse, LessDefUse>::iterator RightUse;
	SMPOperandType LeftType = UNINIT;
	SMPOperandType RightType = UNINIT;
	SMPOperandType OperType = UNINIT;
	op_t UseOp = InitOp, DefOp = InitOp, LeftOp = InitOp, RightOp = InitOp;
	SMPoperator CurrOp = CurrRT->GetOperator();
clc5q's avatar
clc5q committed
	bool TypeInferenceFinished = false;
#if SMP_VERBOSE_DEBUG_INFER_TYPES
#if 1
	DebugFlag |= (0 == strcmp("InputMove", this->BasicBlock->GetFunc()->GetFuncName()));
	DebugFlag = DebugFlag || ((this->address == 0x806453b) || (this->address == 0x806453e));
#if SMP_VERBOSE_DEBUG_INFER_TYPES
		SMP_msg("Entered InferOperatorType for CurrOp: %d at %x\n", CurrOp, this->GetAddr());
clc5q's avatar
clc5q committed

	if (CurrRT->IsTypeInferenceComplete()) {
		return updated;
	}

	switch (CurrOp) {
		case SMP_NULL_OPERATOR:
clc5q's avatar
clc5q committed
			TypeInferenceFinished = true;
			break;

		case SMP_CALL:  // CALL instruction
			if (UNINIT == CurrRT->GetOperatorType()) {
				CurrRT->SetOperatorType(CODEPTR, this);
				updated = true;
				UseOp = CurrRT->GetRightOperand();
				CurrUse = this->Uses.FindRef(UseOp);
				assert(CurrUse != this->GetLastUse());
				if (UNINIT == CurrUse->GetType()) {
					CurrUse = this->SetUseType(UseOp, CODEPTR);
				}
				else if (CODEPTR != CurrUse->GetType()) {
					SMP_msg("WARNING: call target is type %d, setting to CODEPTR at %x in %s\n",
						CurrUse->GetType(), this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr()));
					CurrUse = this->SetUseType(UseOp, CODEPTR);
				}
clc5q's avatar
clc5q committed
			TypeInferenceFinished = true;
			break;

		case SMP_INPUT:  // input from port
			if (UNINIT == CurrRT->GetOperatorType()) {
				CurrRT->SetOperatorType(NUMERIC, this);
				updated = true;
			}
			break;

		case SMP_OUTPUT: // output to port
			if (UNINIT == CurrRT->GetOperatorType()) {
				CurrRT->SetOperatorType(NUMERIC, this);
				updated = true;
			}
			break;

		case SMP_SIGN_EXTEND:
		case SMP_ZERO_EXTEND:
clc5q's avatar
clc5q committed
			// Should we infer that all operands are NUMERIC?  !!!???!!!!
			break;

		case SMP_ADDRESS_OF: // take effective address
			if (UNINIT == CurrRT->GetOperatorType()) {
				CurrRT->SetOperatorType(POINTER, this);
				// Left operand is having its address taken, but we cannot infer what its
				//  type is.
				updated = true;
			}
			break;

		case SMP_U_LEFT_SHIFT: // unsigned left shift
		case SMP_S_LEFT_SHIFT: // signed left shift
		case SMP_U_RIGHT_SHIFT: // unsigned right shift
		case SMP_S_RIGHT_SHIFT: // signed right shift
		case SMP_ROTATE_LEFT:
		case SMP_ROTATE_LEFT_CARRY: // rotate left through carry
		case SMP_ROTATE_RIGHT:
		case SMP_ROTATE_RIGHT_CARRY: // rotate right through carry
		case SMP_U_MULTIPLY:
		case SMP_S_MULTIPLY:
		case SMP_U_DIVIDE:
		case SMP_S_DIVIDE:
		case SMP_U_REMAINDER:
		case SMP_BITWISE_NOT: // unary operator
		case SMP_BITWISE_XOR:
		case SMP_S_COMPARE: // signed compare (subtraction-based)
		case SMP_U_COMPARE: // unsigned compare (AND-based)
		case SMP_LESS_THAN: // boolean test operators
		case SMP_GREATER_THAN:
		case SMP_LESS_EQUAL:
		case SMP_GREATER_EQUAL:
		case SMP_EQUAL:
		case SMP_NOT_EQUAL:
		case SMP_LOGICAL_AND:
		case SMP_LOGICAL_OR:
		case SMP_UNARY_NUMERIC_OPERATION:  // miscellaneous; produces NUMERIC result
		case SMP_BINARY_NUMERIC_OPERATION:  // miscellaneous; produces NUMERIC result
		case SMP_SYSTEM_OPERATION:   // for instructions such as CPUID, RDTSC, etc.; NUMERIC
		case SMP_UNARY_FLOATING_ARITHMETIC:  // all the same to our type system; all NUMERIC
		case SMP_BINARY_FLOATING_ARITHMETIC:  // all the same to our type system; all NUMERIC
		case SMP_REVERSE_SHIFT_U:   // all the same to our type system; all NUMERIC
		case SMP_SHUFFLE:   // all the same to our type system; all NUMERIC
		case SMP_COMPARE_EQ_AND_SET:   // packed compare for equality and set bits; all NUMERIC
		case SMP_COMPARE_GT_AND_SET:   // packed compare for greater-than and set bits; all NUMERIC
		case SMP_PACK_S:  // Pack operands into extended-precision register, signed saturation for loss of precision
		case SMP_PACK_U:  // Pack operands into extended-precision register, unsigned saturation for loss of precision
		case SMP_AVERAGE_U: // Average of unsigned operands
		case SMP_MULTIPLY_AND_ADD: // multiply and add (or multiply and accumulate)
		case SMP_SUM_OF_DIFFS: // sum over two vectors of absolute values of differences of their elements
		case SMP_INTERLEAVE:  // interleave fields from two packed operands; NUMERIC
		case SMP_CONCATENATE:   // all the same to our type system; all NUMERIC
			if (UNINIT == CurrRT->GetOperatorType()) {
				CurrRT->SetOperatorType(NUMERIC, this);
				updated = true;
			}
			// Left operand should be NUMERIC if it exists.
			UseOp = CurrRT->GetLeftOperand();
			if (UseOp.type != o_void) {
				CurrUse = this->Uses.FindRef(UseOp);
				if (CurrUse == this->GetLastUse()) {
					SMP_msg("SERIOUS WARNING: Adding missing USE of ");
					PrintOperand(UseOp);
					SMP_msg(" at %x in %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr()));
					this->Uses.SetRef(UseOp, NUMERIC, -1);
					updated = true;
				}
				else if (UNINIT == CurrUse->GetType()) {
					CurrUse = this->SetUseType(UseOp, NUMERIC);
					updated = true;
				}
			}
			// Right operand should be NUMERIC if it exists.
			if (CurrRT->HasRightSubTree()) {
				// Recurse into subtree
#if SMP_AGGRESSIVE_TYPE_INFERENCE
				if (UNINIT == CurrRT->GetRightTree()->GetOperatorType()) {
					CurrRT->GetRightTree()->SetOperatorType(NUMERIC, this);
				}
#endif
				updated |= this->InferOperatorType(CurrRT->GetRightTree());
			}
			else {
				UseOp = CurrRT->GetRightOperand();
				if (UseOp.type != o_void) {
					CurrUse = this->Uses.FindRef(UseOp);
					if (CurrUse == this->GetLastUse()) {
						SMP_msg("SERIOUS WARNING: Adding missing USE of ");
						PrintOperand(UseOp);
						SMP_msg(" at %x in %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr()));
						this->Uses.SetRef(UseOp, NUMERIC, -1);
						updated = true;
					}
					else if (UNINIT == CurrUse->GetType()) {
						CurrUse = this->SetUseType(UseOp, NUMERIC);
						updated = true;
					}
				}
			}
			break;

		case SMP_NEGATE:    // unary negation
			UseOp = CurrRT->GetLeftOperand();
			assert(o_void != UseOp.type);
			CurrUse = this->Uses.FindRef(UseOp);
			if (CurrUse == this->GetLastUse()) {
				SMP_msg("SERIOUS WARNING: Adding missing USE of ");
				SMP_msg(" at %x in %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr()));
				updated = true;
			}
			else {
				OperType = CurrRT->GetOperatorType();
				LeftType = CurrUse->GetType();
				// Only tricky cases are the negation of a POINTER or PTROFFSET.
				//  Negation of PTROFFSET could be inefficient code that computed
				//  PTR1 - PTR2 and later corrected it to PTR2 - PTR1 by negation.
				//  The type remains PTROFFSET. Negating a POINTER could be an unusual
				//  case similar to subtracting a POINTER from a NUMERIC. See comments
				//  in the SMP_ADD case below, and also the SMP_SUBTRACT case.
				if (LeftType == PTROFFSET) {
					// Override any prior operator type, in case PTROFFSET was inferred late
					//  in our analysis and the operator was set to NUMERIC.
					CurrRT->SetOperatorType(PTROFFSET, this);
					updated = true;
				}
				else if (IsDataPtr(LeftType)) {
					// Override any prior operator type, in case POINTER was inferred late
					//  in our analysis and the operator was set to NUMERIC.
					CurrRT->SetOperatorType(NEGATEDPTR, this);
					updated = true;
				}
				else if (OperType == UNINIT) {
					// Default to NUMERIC for most negations.
					CurrRT->SetOperatorType(NUMERIC, this);
					// But, leave left operand type alone, in case an UNINIT operand
					//  might be determined later to be PTROFFSET or NEGATEDPTR.
					//  Leaving it alone causes us not to set TypeInferenceFinished to true
					//  at the end of this function in the UNINIT case.
					updated = true;
				}
			}
			break;

		case SMP_INCREMENT:
		case SMP_DECREMENT:
			// The type of the left operand is propagated to the operator, or vice
			//  versa, whichever receives a type first.
			assert(!CurrRT->HasRightSubTree());
			UseOp = CurrRT->GetLeftOperand();
			assert(o_void != UseOp.type);
			CurrUse = this->Uses.FindRef(UseOp);
			if (CurrUse == this->GetLastUse()) {
				SMP_msg("SERIOUS WARNING: Adding missing USE of ");
				PrintOperand(UseOp);
				SMP_msg(" at %x in %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr()));
				this->Uses.SetRef(UseOp);
				updated = true;
				break;
			}
			if (UNINIT == CurrRT->GetOperatorType()) {
				if (UNINIT != CurrUse->GetType()) {
					// Propagate operand type up to the operator.
					CurrRT->SetOperatorType(CurrUse->GetType(), this);
					updated = true;
				}
			}
			else if (UNINIT == CurrUse->GetType()) {
				// Propagate operator type to operand.
				CurrUse = this->SetUseType(UseOp, CurrRT->GetOperatorType());
				updated = true;
			}
			break;

		case SMP_ADD: