Skip to content
Snippets Groups Projects
SMPInstr.cpp 416 KiB
Newer Older
		//  from memory operands, e.g. mov eax,[ebp-20]
		return false;
	}
	else if (UseFP && (this->SMPcmd.itype == NN_mov)
		&& (FirstDef->GetOp().is_reg(R_sp))
		&& (FirstUse->GetOp().is_reg(R_bp)))
		return true;
	else if ((this->SMPcmd.itype == NN_add)
		&& (FirstDef->GetOp().is_reg(R_sp))) {
		set<DefOrUse, LessDefUse>::iterator SecondUse = ++FirstUse;
		if (SecondUse == this->Uses.GetLastRef())
			return false;  // no more USEs ... strange for ADD instruction
		if (SecondUse->GetOp().is_imm((uval_t) LocalVarsSize))
			return true;
		else if (SecondUse->GetOp().type == o_imm) {
			signed long	TempImm = (signed long) this->SMPcmd.Operands[1].value;
			if (0 > TempImm) // adding a negative to ESP; alloc, not dealloc
				return false;
			else {
				SMP_msg("Used imprecise LocalVarsSize to find dealloc instr.\n");
	}
	else
		return false;
} // end of SMPInstr::MDIsFrameDeallocInstr()

// Is instruction a no-op? There are 1-byte, 2-byte, etc., versions of no-ops.
bool SMPInstr::MDIsNop(void) const {
	bool IsNop = false;
	ushort opcode = this->SMPcmd.itype;

	// NOTE: More examples have arisen, e.g. xchg reg with itself. !!!!!!

	if (NN_nop == opcode)
		IsNop = true;
	else if (NN_mov == opcode) {
		if ((o_reg == this->SMPcmd.Operands[0].type) 
			&& this->SMPcmd.Operands[1].is_reg(this->SMPcmd.Operands[0].reg)) {
			// We have a register to register move with source == destination.
			IsNop = true;
		}
	}
	else if (NN_lea == opcode) {
		if ((o_reg == this->SMPcmd.Operands[0].type)
			&& (o_displ == this->SMPcmd.Operands[1].type)
			&& (0 == this->SMPcmd.Operands[1].addr)) {
			// We are looking for 6-byte no-ops like lea esi,[esi+0]
				ushort destreg = this->SMPcmd.Operands[0].reg;
				if ((this->SMPcmd.Operands[1].hasSIB)
					&& (destreg == (ushort) sib_base(this->SMPcmd.Operands[1]))
					&& (R_sp == sib_index(this->SMPcmd.Operands[1]))) {
					// R_sp signifies no SIB index register. So, we have
					//  lea reg,[reg+0] with reg being the same in both place,
					//  once as Operands[0] and once as the base reg in Operands[1].
					IsNop = true;
				}
				else if (destreg == this->SMPcmd.Operands[1].reg) {
					IsNop = true;
				}
		}
	}
	return IsNop;
} // end of SMPInstr::MDIsNop()

// Is non-multiply arithmetic instruction that can possibly overflow?
bool SMPInstr::MDIsOverflowingOpcode(void) const {
	unsigned short opcode = this->SMPcmd.itype;

	return ((NN_adc == opcode) || (NN_add == opcode) || (NN_inc == opcode)
		|| (NN_neg == opcode) || (NN_xadd == opcode));
}

// Is non-multiply arithmetic instruction that can possibly underflow?
bool SMPInstr::MDIsUnderflowingOpcode(void) const {
	unsigned short opcode = this->SMPcmd.itype;

	return ((NN_dec == opcode) || (NN_sbb == opcode) || (NN_sub == opcode));
}

// Is potentially benign overflow instruction?
bool SMPInstr::MDIsMaybeBenignOverflowOpcode(void) const {
	unsigned short opcode = this->SMPcmd.itype;

	return ((NN_adc == opcode) || (NN_add == opcode));
}

// Is potentially benign underflow instruction?
bool SMPInstr::MDIsMaybeBenignUnderflowOpcode(void) const {
	unsigned short opcode = this->SMPcmd.itype;

	return ((NN_neg == opcode) || (NN_sbb == opcode) || (NN_sub == opcode));
}

// Is definitely benign underflow instruction?
//  NOTE: Overlaps with MDIsMaybeBenignUnderflowOpcode(), so call this one first.
bool SMPInstr::MDIsDefiniteBenignUnderflowOpcode(void) {
	unsigned short opcode = this->SMPcmd.itype;

	// gcc use:  sbb edx,edx as a tricky way to get all zeroes or all ones into edx.
	//  (Some sort of saturation?)
	//  The "underflow" on the subtraction is irrelevant and benign.
	return ((NN_sbb == opcode) && (this->SubtractsFromItself()));
}

// Does a subtraction operator get applied to same left and right operands?
bool SMPInstr::SubtractsFromItself(void) {
	bool SelfSubtract = false;
	size_t RTLCount = this->RTL.GetCount();
	for (size_t index = 0; index < RTLCount; ++index) {
		SMPRegTransfer *CurrRT = this->RTL.GetRT(index);
		if ((CurrRT != NULL) && (CurrRT->HasRightSubTree())) {
			CurrRT = CurrRT->GetRightTree();
			SMPoperator CurrOp = CurrRT->GetOperator();
			if ((SMP_SUBTRACT_BORROW == CurrOp) || (SMP_SUBTRACT == CurrOp)) {
				if (!(CurrRT->HasRightSubTree())) {
					// NOTE: Must change this code when we build more precise SMP_SUBTRACT_BORROW RTL.
					op_t LeftOp = CurrRT->GetLeftOperand();
					op_t RightOp = CurrRT->GetRightOperand();
					SelfSubtract = IsEqOp(RightOp, LeftOp);
				}
				break;
			}
		}
	}
	return SelfSubtract;
} // end of SMPInstr::SubtractsFromItself()

// MACHINE DEPENDENT: Is instruction a return instruction?
bool SMPInstr::MDIsReturnInstr(void) const {
	return ((this->SMPcmd.itype == NN_retn) || (this->SMPcmd.itype == NN_retf));
}

// MACHINE DEPENDENT: Is instruction a POP instruction?
#define FIRST_POP_INST   NN_pop
#define LAST_POP_INST    NN_popfq
bool SMPInstr::MDIsPopInstr(void) const {
	return ((this->SMPcmd.itype >= FIRST_POP_INST)
			&& (this->SMPcmd.itype <= LAST_POP_INST));
}

// MACHINE DEPENDENT: Is instruction a PUSH instruction?
#define FIRST_PUSH_INST   NN_push
#define LAST_PUSH_INST    NN_pushfq
bool SMPInstr::MDIsPushInstr(void) const {
	return ((this->SMPcmd.itype >= FIRST_PUSH_INST)
			&& (this->SMPcmd.itype <= LAST_PUSH_INST));
}

// MACHINE DEPENDENT: Is instruction an ENTER instruction?
bool SMPInstr::MDIsEnterInstr(void) const {
	return ((this->SMPcmd.itype >= MD_FIRST_ENTER_INSTR)
			&& (this->SMPcmd.itype <= MD_LAST_ENTER_INSTR));
}

// MACHINE DEPENDENT: Is instruction a LEAVE instruction?
bool SMPInstr::MDIsLeaveInstr(void) const {
	return ((this->SMPcmd.itype >= MD_FIRST_LEAVE_INSTR)
			&& (this->SMPcmd.itype <= MD_LAST_LEAVE_INSTR));
// MACHINE DEPENDENT: Is instruction a HALT instruction?
bool SMPInstr::MDIsHaltInstr(void) const {
	return (NN_hlt == this->SMPcmd.itype);
}

#define MD_FIRST_COND_MOVE_INSTR NN_cmova
#define MD_LAST_COND_MOVE_INSTR  NN_fcmovnu
// MACHINE DEPENDENT: Is instruction a conditional move?
bool SMPInstr::MDIsConditionalMoveInstr(void) const {
	return ((this->SMPcmd.itype >= MD_FIRST_COND_MOVE_INSTR)
			&& (this->SMPcmd.itype <= MD_LAST_COND_MOVE_INSTR));
// MACHINE DEPENDENT: Is instruction any kind of move?
bool SMPInstr::MDIsMoveInstr(void) const {
	return ((NN_mov == this->SMPcmd.itype) || (NN_movsx == this->SMPcmd.itype)
		|| (NN_movzx == this->SMPcmd.itype) || this->MDIsConditionalMoveInstr());
}

// MACHINE DEPENDENT: Do opcode/operands definitely indicate signed arithmetic?
//  Generally, this is only true for certain variants of multiplication and division.
bool SMPInstr::MDIsSignedArithmetic(void) const {
	unsigned short opcode = this->SMPcmd.itype;
	if (NN_idiv == opcode)
		return true;
	if (NN_imul == opcode) {
		// If we discard the upper N bits of the multiplication result, then the
		//  lower N bits are the same for signed and unsigned multiplication, and
		//  gcc/g++ often use the IMUL opcode for both signed and unsigned multiplies
		//  when only N bits of result are retained. Therefore, the SIGNED nature of
		//  IMUL operands can only be inferred from the case in which 2N bits are kept.
		return (!(this->AreMultiplicationBitsDiscarded()));
	}
	else { // idiv and imul are only possible signed cases
		return false;
	}
} // end of SMPInstr::MDIsSignedArithmetic()

// MACHINE DEPENDENT: Is instruction a conditional jump based on an unsigned condition?
bool SMPInstr::MDIsUnsignedBranch(void) const {
	unsigned short opcode = this->SMPcmd.itype;
	return ((NN_ja == opcode) || (NN_jae == opcode) || (NN_jb == opcode) || (NN_jbe == opcode)
		|| (NN_jna == opcode) || (NN_jnae == opcode) || (NN_jnb == opcode) || (NN_jnbe == opcode));
}

// MACHINE DEPENDENT: Is instruction a conditional jump based on a signed condition?
bool SMPInstr::MDIsSignedBranch(void) const {
	unsigned short opcode = this->SMPcmd.itype;
	return ((NN_jg == opcode) || (NN_jge == opcode) || (NN_jl == opcode) || (NN_jle == opcode)
		|| (NN_jng == opcode) || (NN_jnge == opcode) || (NN_jnl == opcode) || (NN_jnle == opcode)
		|| (NN_js == opcode) || (NN_jns == opcode));
}

// MACHINE DEPENDENT: Is instruction a boolean set based on an unsigned condition?
bool SMPInstr::MDIsUnsignedSetValue(void) const {
	unsigned short opcode = this->SMPcmd.itype;
	return ((NN_seta == opcode) || (NN_setae == opcode) || (NN_setb == opcode) || (NN_setbe == opcode)
		|| (NN_setna == opcode) || (NN_setnae == opcode) || (NN_setnb == opcode) || (NN_setnbe == opcode));
}

// MACHINE DEPENDENT: Is instruction a boolean set based on a signed condition?
bool SMPInstr::MDIsSignedSetValue(void) const {
	unsigned short opcode = this->SMPcmd.itype;
	return ((NN_setg == opcode) || (NN_setge == opcode) || (NN_setl == opcode) || (NN_setle == opcode)
		|| (NN_setng == opcode) || (NN_setnge == opcode) || (NN_setnl == opcode) || (NN_setnle == opcode)
		|| (NN_sets == opcode) || (NN_setns == opcode));
}

// MACHINE DEPENDENT: Does instruction use a callee-saved register?
bool SMPInstr::MDUsesCalleeSavedReg(void) {
	set<DefOrUse, LessDefUse>::iterator CurrUse;
	for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) {
		op_t CurrOp = CurrUse->GetOp();
		if (CurrOp.is_reg(R_bp) || CurrOp.is_reg(R_si)
			|| CurrOp.is_reg(R_di) || CurrOp.is_reg(R_bx)) {
			return true;
		}
	}
	return false;
} // end of SMPInstr::MDUsesCalleeSavedReg()

// Is the instruction a register to register copy of a stack pointer or frame pointer
//  into a general purpose register (which mmStrata will now need to track as a stack 
//  relative pointer)?
bool SMPInstr::MDIsStackPointerCopy(bool UseFP) {
	// OptType 3 indicates a move instruction
	// The lea instruction can perform three operand arithmetic, e.g.
	//  lea ebx,[esp+12] is just ebx:=esp+12, so it is a stack pointer copy.
	if (((this->OptType == 3) || (NN_lea == this->SMPcmd.itype))
		&& (this->GetFirstDef()->GetOp().type == o_reg)
		&& (!(this->GetFirstDef()->GetOp().is_reg(R_sp)))
		&& (!(this->HasSourceMemoryOperand()))) { // reg to reg move
			if (this->GetFirstUse()->GetOp().is_reg(R_bp))
				// Move of base pointer EBP into a general register
				return true;
			else if ((this->GetFirstUse()->GetOp().is_reg(R_sp))
				&& !(this->GetFirstDef()->GetOp().is_reg(R_bp)))
				// Move of ESP into something besides a base pointer
				return true;
		}
		else if (this->GetFirstUse()->GetOp().is_reg(R_sp)) {
			// Move of ESP into a register; no base pointer used in this function
			return true;
		}
	}
	return false;
} // end of SMPInstr::MDIsStackPointerCopy()

// Does any RTL fit the alloca() pattern: stack_pointer -= non-immediate-operand
bool SMPInstr::HasAllocaRTL(void) {
	bool FoundAlloca = false;
	size_t RTLCount = this->RTL.GetCount();
	size_t RTLIndex;

	for (RTLIndex = 0; RTLIndex < RTLCount; ++RTLIndex) {
		SMPRegTransfer *CurrRT = this->RTL.GetRT(RTLIndex);
		if (CurrRT->IsAllocaRTL()) {
			FoundAlloca = true;
			break;
		}
	}

	return FoundAlloca;
} // end of SMPInstr::HasAllocaRTL()

// Determine if the instruction saves or restores a pointer into the stack frame.
// If it saves a stack pointer, set Save to true, set the StackDelta saved, and set
//   the operand that received the saved stack pointer into CopyOp. and return true.
// If it restores a stack pointer, set Save to false, set CopyOp to the operand that
//   held the value being restored, set RestoreOp to the stack pointer or frame pointer
//   register (whichever was restored), leave StackDelta alone for later computation
//   based on reaching definitions, and return true.
// For most instructions, no save or restore of a stack pointer, so return false.
bool SMPInstr::MDIsStackPtrSaveOrRestore(bool UseFP, sval_t FPDelta, bool &Save, sval_t &StackDelta, op_t &CopyOp, bool &Error) {
	bool StackPointerSaveOrRestore;
	size_t RTLCount = this->RTL.GetCount();
	size_t RTLIndex;
	int BaseReg, IndexReg, CopyReg;
	ushort Scale;
	ea_t offset;
	SMPoperator CurrOper;
	bool LookUpStackDelta; // Get stack delta from reaching defs for TempOp
	sval_t DeltaAdjust; // add to StackDelta after computing from reaching defs, e.g. lea esp,[ecx-4] get TempOp of ecx
	                        //  and DeltaAdjust of -4

	Error = false;

	for (RTLIndex = 0; RTLIndex < RTLCount; ++RTLIndex) {
		bool FPRestore = false; // frame pointer is restored
		bool SPRestore = false; // stack pointer is restored
		StackPointerSaveOrRestore = false; // default unless we detect a save or restore of the stack or frame pointer
		TempOp = InitOp;
		LookUpStackDelta = false;
		DeltaAdjust = 0;
		Save = false; // default unless we detect a stack pointer save

		// The stack alignment instructions (SP := SP bitwise_and immediate_value)
		//  look like something that needs to be processed here, but we always ignore
		//  these instructions. They have a variable effect on the stack pointer, from zero
		//  to -15 delta, but we assume that the delta is zero. This works for us because
		//  no stack accesses will occur into the padding region.
		// Also, any instruction that definitely does not restore the stack pointer or
		//  frame pointer from an arbitrary register or memory location, e.g. a leave instruction
		//  in x86 CPUs, is already handled in normal stack delta computations and needs
		//  no lookups from reaching defs, etc.
		if (this->IsStackAlignmentInst() || this->MDIsLeaveInstr() || this->MDIsFrameAllocInstr()) {
			break; // exit and return false
		}

		SMPRegTransfer *CurrRT = this->RTL.GetRT(RTLIndex);
		CurrOper = CurrRT->GetOperator();
		if (SMP_ASSIGN != CurrOper) {
			break; // not a regular RTL
		}
		op_t LeftOp = CurrRT->GetLeftOperand();
		if (LeftOp.is_reg(MD_STACK_POINTER_REG)) {
			SPRestore = true; // temporary; might just be a push or pop RTL, etc., in which case we will reset.
		}
		else if (UseFP && LeftOp.is_reg(MD_FRAME_POINTER_REG)) {
			FPRestore = true; // likewise temporary
		}
		if (!(SPRestore || FPRestore)) {
#if 0
			if (LeftOp.is_reg(MD_FLAGS_REG)) {
				break; // No point in looking for a save into the flags register
			}
#endif
			Save = true;
		}

		// If we are assigning to the stack pointer reg or the frame pointer reg, we need to analyze the right
		//  hand side of the RTL to see if it is a stack/frame pointer value, and not a simple push, pop, etc.
		if (!(CurrRT->HasRightSubTree())) {
			op_t RightOp = CurrRT->GetRightOperand();
			if ((o_reg <= RightOp.type) && (o_displ >= RightOp.type)) { // register or memory
				if (RightOp.is_reg(MD_STACK_POINTER_REG)) {
					// Stack pointer reg is being saved.
					StackDelta = this->GetStackPtrOffset(); // LeftOp := SP, so saved delta is just current delta
					CopyOp = LeftOp;
					StackPointerSaveOrRestore = true;
					FPRestore = false; // treat FP := SP as a save of SP rather than a restoration of FP
				else if (!SPRestore && UseFP && RightOp.is_reg(MD_FRAME_POINTER_REG)) {
					// Frame pointer is being saved
					StackDelta = FPDelta;
					CopyOp = LeftOp;
					StackPointerSaveOrRestore = true;
					break;
				}
				else if (SPRestore || FPRestore) {
					// stack or frame pointer is being restored; leave Save=false and set other outgoing arguments.
					TempOp = RightOp;
					CopyOp = RightOp;
					StackPointerSaveOrRestore = true;
					LookUpStackDelta = true;
				}
				else { // RightOp is register or non-stack-pointer memory expr; either might hold stack delta
					TempOp = RightOp;
					CopyOp = LeftOp;
					LookUpStackDelta = true; // See if RightOp is holding a stack delta
					StackPointerSaveOrRestore = true;
				if (SPRestore || FPRestore) {
					SMP_msg("ERROR: Invalid operand type for assignment to stack or frame pointer at %x\n", this->GetAddr());
				}
				StackPointerSaveOrRestore = false;
				break;
			}
		}
		else { // we have a right subtree in the CurrRT
			SMPRegTransfer *RightRT = CurrRT->GetRightTree();
			// In order to have a right subtree, we must have something like:
			//   lea esp,[ecx-4]  which produces the RTL: esp := ecx - 4
			// We should consider any other RTL structure besides a basic addition or
			//  subtraction on the right subtree to be invalid.
			CurrOper = RightRT->GetOperator();
			if ((SMP_ADD == CurrOper) || (SMP_SUBTRACT == CurrOper)) {
				op_t RightLeftOp = RightRT->GetLeftOperand();
				if (o_reg == RightLeftOp.type) {
					if (RightRT->HasRightSubTree()) {
						// Complex RTL such as lea esp,[ebx+ecx*4] ; cannot analyze
						StackPointerSaveOrRestore = false;
					}
					else {
						op_t RightRightOp = RightRT->GetRightOperand();
						if (o_imm != RightRightOp.type) {
							// Complex RTL such as lea esp,[ebx+ecx] ; cannot analyze
							StackPointerSaveOrRestore = false;
						}
						else {
							TempOp = RightLeftOp;
							DeltaAdjust = (sval_t) RightRightOp.value;
							if (SMP_SUBTRACT == CurrOper) {
								// Negate the stack delta adjustment, e.g. lea esp,[ecx-4] needs DeltaAdjust of -4, not 4.
								DeltaAdjust = (0 - DeltaAdjust);
							}
							LookUpStackDelta = true;
							StackPointerSaveOrRestore = true;
							if (SPRestore || FPRestore) {
								CopyOp = RightLeftOp;
							}
							else {
								CopyOp = LeftOp;
							}
						}
					}
				}
				else { // weird RTL; LeftOp := (MemoryOp OPER ???)
					StackPointerSaveOrRestore = false;
				}
			}
			else { // not ADD or SUBTRACT
				StackPointerSaveOrRestore = false;
			}
		}

		if (LookUpStackDelta) {
			bool StackAccess = false;
			// We need to set StackDelta based on the reaching defs for TempOp
			// A reg is probably a general register, but could have lea ebx,[esp+4] so it could be stack or frame pointer.
			if (TempOp.is_reg(MD_STACK_POINTER_REG)) {
				// Weed out RTs that increment or decrement the stack pointer, e.g. SP := SP -4.
				//  These are not the kind of "save" or "restore" RTs that we are tracking.
				if (CopyOp.is_reg(MD_STACK_POINTER_REG)) {
					StackPointerSaveOrRestore = false;
					SPRestore = false;
					FPRestore = false;
					Save = false;
				}
				else {
					StackDelta = this->GetStackPtrOffset();
					StackDelta += DeltaAdjust;
					LookUpStackDelta = false; // just got it; no need for reaching defs
					StackPointerSaveOrRestore = true;
				}
			}
			else if (UseFP && TempOp.is_reg(MD_FRAME_POINTER_REG)) {
				StackDelta = FPDelta;
				StackDelta += DeltaAdjust;
				LookUpStackDelta = false; // just got it; no need for reaching defs
				StackPointerSaveOrRestore = true;
			else if (o_reg == TempOp.type) { // general reg, not frame or stack pointer reg
				CopyReg = TempOp.reg;
				MDExtractAddressFields(TempOp, BaseReg, IndexReg, Scale, offset);
				CopyReg = BaseReg;
				bool IndexedAccess = ((R_none != BaseReg) && (R_none != IndexReg));
				if (IndexedAccess) {
					StackPointerSaveOrRestore = false;  // Cannot analyze indexed accesses into the stack
				}
				else if (MDIsStackPtrReg(BaseReg, UseFP)) {
					StackAccess = true;
				}
				else {
					// memory expr that is not stack or frame pointer
					DeltaAdjust = (sval_t) TempOp.addr; // get normalized delta from addr field
				}
			if (StackPointerSaveOrRestore && LookUpStackDelta) {
				op_t FindOp = InitOp;
				if (StackAccess) {
					FindOp = TempOp;
				}
				else {
					FindOp.type = o_reg;
					FindOp.reg = CopyReg;
				}
				if (this->GetBlock()->GetFunc()->IsInStackPtrCopySet(FindOp)) {
					// Screened out time wasters that are not in copy set; now,
					//  look up reaching defs.
					// We need to find out which are the reaching definitions for the FindOp at the current InstAddr.
					this->GetBlock()->GetFunc()->ComputeTempReachingDefs(FindOp, this->GetAddr());
					this->GetBlock()->GetFunc()->ComputeTempStackDeltaReachesList(FindOp);
					// See if TempStackDeltaReachesList has a consistent delta value.
					StackPointerSaveOrRestore = this->GetBlock()->GetFunc()->FindReachingStackDelta(StackDelta); // consistent SavedDelta value across entire list
					StackDelta += DeltaAdjust;
				}
				else {
					StackPointerSaveOrRestore = false; // reset, not in stack pointer copy set
				}
		} // end if (LookupStackDelta)

		if (!StackPointerSaveOrRestore && !Save && (SPRestore || FPRestore)) {
			// Any restore that could not be analyzed is an error.
			Error = true;
			break; // error exit
		}
		else if (StackPointerSaveOrRestore) {
			if (FPRestore) {
				// If we succeeded in looking up a stack delta that goes into the frame pointer reg,
				//  then we want to consider this instruction to be a save of a stack delta into
				//  a register (which happens to be the frame pointer reg in this case).
				FPRestore = false;
				Save = true;
			break; // assume only one save or restore in an instruction; exit with success
	} // end for all RTs in the RTL

	return StackPointerSaveOrRestore;
} // end of SMPInstr::MDIsStackPtrSaveOrRestore()

// If call instruction is to malloc(), set the DEF register EAX type to
//  HEAPPTR and return true.
bool SMPInstr::MDFindMallocCall(op_t TargetOp) {
	bool changed = false;
	func_t *TargetFunc = get_func(TargetOp.addr);
	if (TargetFunc) {
		char FuncName[MAXSTR];
		get_func_name(TargetFunc->startEA, FuncName, sizeof(FuncName) - 1);
		if (0 == strcmp("malloc", FuncName)) {
			// NOTE: Some compilers might call it __malloc ; make this more robust !!!
#if SMP_VERBOSE_FIND_POINTERS
			SMP_msg("Found call to malloc at %x\n", this->addr);
#endif
			op_t SearchOp = InitOp;
			SearchOp.type = o_reg;
			SearchOp.reg = R_ax;
			set<DefOrUse, LessDefUse>::iterator EAXDEF;
			EAXDEF = this->SetDefType(SearchOp, HEAPPTR);
			int SSANum = EAXDEF->GetSSANum();
			changed = true;
			if (this->BasicBlock->IsLocalName(SearchOp)) {
				(void) this->BasicBlock->PropagateLocalDefType(SearchOp, HEAPPTR,
						this->GetAddr(), SSANum, false);
			}
			else { // global name
				this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false
				(void) this->BasicBlock->PropagateGlobalDefType(SearchOp, HEAPPTR,
						SSANum, false);
			}
		} // end if "malloc"
	} // end if (TargetFunc)
	return changed;
} // end of SMPInstr::MDFindMallocCall()

// Is instruction a branch (conditional or unconditional) to a
//  code target that is not in the current chunk?
bool SMPInstr::IsBranchToFarChunk(void) {
	if (this->IsFarBranchComputed()) { // answer is cached
		return this->IsBranchesToFarChunk();
	func_t *CurrChunk = get_fchunk(this->address);
	bool FarBranch = false;
	if ((JUMP | COND_BRANCH) & this->GetDataFlowType()) {
		// Instruction is a direct branch, conditional or unconditional
		if (this->NumUses() > 0) {
			set<DefOrUse, LessDefUse>::iterator CurrUse;
			for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) {
				op_t JumpTarget = CurrUse->GetOp();
				if ((o_near == JumpTarget.type) || (o_far == JumpTarget.type)) {
					// Branches to a code address
					// stdclib sometimes has jumps to zero and calls to zero. These are dead code.
					if (0 != JumpTarget.addr) {
						func_t *TargetChunk = get_fchunk(JumpTarget.addr);
						// Is target address within the same chunk as the branch?
						FarBranch = (NULL == TargetChunk) || (CurrChunk->startEA != TargetChunk->startEA);
						if (FarBranch) {
							this->FarBranchTarget = JumpTarget.addr;
						}
	if (FarBranch) {
		this->SetBranchesToFarChunk();
	}
	this->SetFarBranchComputed();
	return FarBranch;
} // end of SMPInstr::IsBranchToFarChunk()

set<DefOrUse, LessDefUse>::iterator SMPInstr::SetUseSSA(op_t CurrOp, int SSASub) {
	return this->Uses.SetSSANum(CurrOp, SSASub);
};

set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefSSA(op_t CurrOp, int SSASub) {
	return this->Defs.SetSSANum(CurrOp, SSASub);
};

set<DefOrUse, LessDefUse>::iterator SMPInstr::SetUseType(op_t CurrOp, SMPOperandType CurrType) {
	return this->Uses.SetType(CurrOp, CurrType, this);
};

set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefType(op_t CurrOp, SMPOperandType CurrType) {
	return this->Defs.SetType(CurrOp, CurrType, this);
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefMetadata(op_t CurrOp, SMPMetadataType Status) {
	return this->Defs.SetMetadata(CurrOp, Status);
};

set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefIndWrite(op_t CurrOp, bool IndWriteFlag) {
	return this->Defs.SetIndWrite(CurrOp, IndWriteFlag);
};

set<DefOrUse, LessDefUse>::iterator SMPInstr::SetUseNoTruncate(op_t CurrOp, bool NoTruncFlag) {
	return this->Uses.SetNoTruncation(CurrOp, NoTruncFlag);
};

set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefNoOverflow(op_t DefOp, bool NoOverflowFlag) {
	return this->Defs.SetNoOverflow(DefOp, NoOverflowFlag);
};

// Analyze the instruction and its operands.
void SMPInstr::Analyze(void) {
	bool DebugFlag = false;
	if (0x8049b00 == this->address) {
		// Setting up breakpoint line.
		DebugFlag = true;
	}

	// Fill cmd structure with disassembly of instr
	if (!SMPGetCmd(this->address, this->SMPcmd, this->features))

	// Record what type of instruction this is, simplified for the needs
	//  of data flow and type analysis.
	this->type = DFACategory[this->SMPcmd.itype];
	// Record optimization category.
	this->OptType = OptCategory[this->SMPcmd.itype];
	if ((NN_int == this->SMPcmd.itype) || (NN_into == this->SMPcmd.itype) || (NN_int3 == this->SMPcmd.itype)) {
		this->SetInterrupt();
	}
	else {
		this->ResetInterrupt();
	}
clc5q's avatar
clc5q committed
	// See if instruction is an ASM idiom for clearing a register.
	if (NN_xor == this->SMPcmd.itype) {
		ushort FirstReg;
		if (o_reg == this->SMPcmd.Operands[0].type) {
			FirstReg = this->SMPcmd.Operands[0].reg;
			if (this->SMPcmd.Operands[1].is_reg(FirstReg))
				this->SetRegClearIdiom();
	// See if instruction is simple nop or ASM idiom for nop.
	if (this->MDIsNop()) {
		this->SetNop();
	}
	// Build the DEF and USE lists for the instruction.
	this->BuildSMPDefUseLists();

	// Determine whether the instruction is a jump target by looking
	//  at its cross references and seeing if it has "TO" code xrefs.
	SMP_xref_t xrefs;
	for (bool ok = xrefs.SMP_first_to(this->address, XREF_FAR); ok; ok = xrefs.SMP_next_to()) {
		if ((xrefs.GetFrom() != 0) && (xrefs.GetIscode())) {
			this->SetJumpTarget();
	// If instruction is a call or indirect call, see if a call target has been recorded
	//  by IDA Pro.
	if (this->GetDataFlowType() == INDIR_CALL) {
		for (bool ok = xrefs.SMP_first_from(this->address, XREF_ALL);
			ok = xrefs.SMP_next_from()) {
			if ((xrefs.GetTo() != 0) && (xrefs.GetIscode())) {
				// Found a code target, with its address in CurrXrefs.to
				if (xrefs.GetTo() == (this->address + this->GetCmd().size)) {
					// A call instruction will have two targets: the fall through to the
					//  next instruction, and the called function. We want to find
					//  the called function.
					continue;
				}
				// We found a target, not the fall-through.
				this->CallTarget = xrefs.GetTo();
				SMP_msg("Found indirect call target %x at %x\n",
					xrefs.GetTo(), this->address);
				break;
			}
		} // end for all code xrefs
		if (BADADDR == this->CallTarget) {
			SMP_msg("WARNING: Did not find indirect call target at %x\n",
				this->address);
		}
	} // end if INDIR_CALL
	else if (this->GetDataFlowType() == CALL) {
		set<DefOrUse, LessDefUse>::iterator CurrUse;
		for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) {
			optype_t OpType = CurrUse->GetOp().type;
			if ((OpType == o_near) || (OpType == o_far)) {
				this->CallTarget = CurrUse->GetOp().addr;
			}
		}
		if (BADADDR == this->CallTarget) {
			SMP_msg("ERROR: Target not found for direct call at %x\n", this->address);
		SMP_msg("Analyzed debug instruction at %x\n", this->address);
	return;
} // end of SMPInstr::Analyze()

// Analyze the floating point NOP marker instruction at the top of the function.
void SMPInstr::AnalyzeMarker(void) {
	// Fill member variable SMPcmd structure with disassembly of instr
	(void) memset(&(this->SMPcmd), 0, sizeof(this->SMPcmd));
	this->SMPcmd.itype = NN_fnop;
	this->SMPcmd.size = 1;
	this->SMPcmd.ea = this->address;
	// Set the instr disassembly text.
	DisAsmText.SetMarkerInstText(this->GetAddr());

	// Record what type of instruction this is, simplified for the needs
	//  of data flow and type analysis.
	this->type = DFACategory[this->SMPcmd.itype];
	// Record optimization category.
	this->OptType = OptCategory[this->SMPcmd.itype];

	return;
} // end of SMPInstr::AnalyzeMarker()

// Detect oddities of call instructions, such as pseudo-calls that are
//  actually jumps within a function
void SMPInstr::AnalyzeCallInst(ea_t FirstFuncAddr, ea_t LastFuncAddr) {
	if (BADADDR != this->CallTarget) {
		if (this->CallTarget == FirstFuncAddr) {
			this->SetDirectRecursiveCall();
		}
		else {
			this->ResetDirectRecursiveCall();
			if ((this->CallTarget > FirstFuncAddr)
					&& (this->CallTarget < LastFuncAddr)) {
				this->SetCallUsedAsJump();
				this->type = JUMP;
			}
			else {
				this->ResetCallUsedAsJump();
			}
sval_t SMPInstr::AnalyzeStackPointerDelta(sval_t IncomingDelta, sval_t PreAllocDelta) {
	uint16 InstType = this->SMPcmd.itype;
	sval_t InstDelta = StackAlteration[InstType];
	SMPitype FlowType = this->GetDataFlowType();
	if (this->IsCallUsedAsJump() || this->MDIsInterruptCall() || this->IsCondTailCall()) {
		// Call is used within function as a jump. Happens when setting up
		//  thunk offsets, for example; OR, call is an interrupt call, in which
		//  the interrupt return cleans up the stack, leaving a delta of zero, but
		//  we do not have the system call code to analyze, OR, the call is a conditional
		//  jump to another function (conditional tail call), in which case the current
		//  function must have a return statement to fall into which will clean up the
		//  only thing left on the stack (the return address) and the conditional jump
		//  has no effect on the stack pointer.
		; // leave InstDelta equal to negative or zero value from StackAlterationTable[]
	}
	else if (this->IsRecursiveCall()) {
		// We don't have the net stack delta for our own function yet, so we cannot
		//  look it up. We must assume that each call has no net effect on the stack delta.
		// Alternatively, we could call this->GetBlock()->GetFunc()->GetStackDeltaForCallee() as below.
		InstDelta = 0;
	}
	else if (this->IsAllocaCall()) {
		InstDelta = STARS_DEFAULT_ALLOCA_SIZE;
	}
	else if ((CALL == FlowType) || (INDIR_CALL == FlowType) || TailCall) {
		// A real call instruction, which pushes a return address on the stack,
		//  not a call used as a branch within the function. A return instruction
		//  will usually cancel out the stack push that is implicit in the call, which 
		//  means that the function will have a net stack ptr delta of +4, which will
		//  cancel out the -4 value of the call instruction and set the delta to zero.
		//  However, this is not true in all cases, so we get the net stack ptr delta
		//  directly from the called function unless it is an unresolved indirect call,
		//  in which case we assume +4. !!!!****!!!! In the future, we could analyze
		//  the code around an unresolved indirect call to see if it seems to be
		//  removing items left on the stack by the callee.
		// SPECIAL CASE: A jump used as a tail call will have a stack ptr effect that is equal
		//  to the net stack ptr effect of its target function, usually +4, whereas a jump
		//  would otherwise have a net stack ptr effect of 0.
		ea_t CalledFuncAddr = this->GetCallTarget();
		if ((BADADDR == CalledFuncAddr) || (0 == CalledFuncAddr)) {
			InstDelta = 0;
		}
		else { // We have a call target
			SMPFunction *CalleeFunc = this->GetBlock()->GetFunc()->GetProg()->FindFunction(CalledFuncAddr);
			sval_t AdjustmentDelta;
			if (CalleeFunc) {
				if (!CalleeFunc->HasSTARSStackPtrAnalysisCompleted()) {
					// Phase ordering issue in the call graph. A mutually recursive clique of functions has to
					//  be broken by starting processing somewhere, and all callees cannot be processed before
					//  we start. If we got our stack down to zero and then made a tail call, then we have to assume
					//  that the callee will use our return address, so we assume the default stack delta. If not a
					//  tail call, we ask our function to see if the information is available from IDA Pro analyses,
					//  or if it can be inferred from the fact that the call is followed by a stack adjustment.
					if (TailCall) {
						InstDelta = CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA;
						SMP_msg("WARNING: Callee stack ptr analysis not yet performed at tail call inst %x ; normal delta assumed\n", this->GetAddr());
						AdjustmentDelta = this->GetBlock()->GetFunc()->GetStackDeltaForCallee(CalledFuncAddr);
						SMP_msg("WARNING: Callee stack ptr analysis not yet performed at inst %x ; stack adjustment used\n", this->GetAddr());
					}
				}
				else if (!CalleeFunc->StackPtrAnalysisSucceeded()) {
					// Callee analyses were done, but they failed. In order to proceed, we have to assume
					//  the same situation as we just did in the case where analyses have not been performed.
					SMP_msg("WARNING: Callee stack ptr analysis failed at inst %x ; normal delta assumed\n", this->GetAddr());
					if (TailCall) {
						InstDelta = CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA;
					}
					else {
						AdjustmentDelta = this->GetBlock()->GetFunc()->GetStackDeltaForCallee(this->GetAddr());
						InstDelta += AdjustmentDelta;
					}
				}
				else {
					// Callee's analyses have succeeded, so get delta straight from callee.
					InstDelta += CalleeFunc->GetNetStackPtrDelta();
				}
			}
			else {
#if 0
				SMP_msg("ERROR: SMPInstr::AnalyzeStackPointerDelta failed to find func at %x in inst %x\n",
					CalledFuncAddr, this->GetAddr());
				InstDelta = SMP_STACK_DELTA_ERROR_CODE;
#else
				SMP_msg("ERROR: SMPInstr::AnalyzeStackPointerDelta failed to find func at %x in inst %x\n",
					CalledFuncAddr, this->GetAddr());
				if (TailCall) {
					InstDelta = CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA;
				}
				else {
					InstDelta = 0;
				}
#endif
			}
		}
	} // end CALL or INDIR_CALL or TailCall case
	else if (1 == InstDelta) { 
		// value of 1 is trigger to investigate the RTL for the 
		//  true value, which cannot be found simply by table lookup
		// In the special case of an x86 LEAVE instruction, the effect
		//  on the stack pointer is to deallocate the local frame size,
		//  plus pop the saved frame pointer into EBP. Helper functions
		//  need to know whether to look for this special case.
		bool IsLeaveInstr = this->MDIsLeaveInstr();
		InstDelta = this->RTL.TotalStackPointerAlteration(IsLeaveInstr, IncomingDelta, PreAllocDelta);
} // end of SMPInstr::AnalyzeStackPointerDelta()

// Total the stack adjustment bytes, as happens after a call to a function that leaves
//  outgoing args on the stack or swallows incoming args from the stack.
sval_t SMPInstr::FindStackAdjustment(void) {
	uint16 InstType = this->SMPcmd.itype;
	sval_t InstDelta = StackAlteration[InstType];

	if (1 == InstDelta) { 
		// value of 1 is trigger to investigate the RTL for the 
		//  true value, which cannot be found simply by table lookup
		// In the special case of an x86 LEAVE instruction, the effect
		//  on the stack pointer is to deallocate the local frame size,
		//  plus pop the saved frame pointer into EBP. Helper functions
		//  need to know whether to look for this special case.
		bool IsLeaveInstr = this->MDIsLeaveInstr();
		if (!IsLeaveInstr) {
			InstDelta = this->RTL.TotalStackPointerAlteration(IsLeaveInstr, 0, 0);
		}
		else {
			InstDelta = 0; // LEAVE is not the kind of instr we are looking for
		}
	}
	return InstDelta;
} // end of SMPInstr::FindStackAdjustment()

// Normalize stack operands to have a displacement from the stack pointer value on entry to the function,
//  rather than the current stack pointer value.
// UseFP indicates we are using a frame pointer in the function.
// FPDelta holds the stack delta (normalized) for the frame pointer.
// DefOp comes in with the operand to be normalized, and contains the normalized operand upon return.
// Return true if operand is a register or stack location, false otherwise (true => include in data flow analysis sets and SSA.)
bool SMPInstr::MDComputeNormalizedDataFlowOp(bool UseFP, sval_t FPDelta, op_t &DefOp) {
	if (o_reg == DefOp.type) {
		return true;
	}
	else if (MDIsStackAccessOpnd(DefOp, UseFP)) {
		int SignedOffset = (int) DefOp.addr;
		sval_t NormalizedDelta;

		if (DefOp.hasSIB) {
			// We must deal with a potentially indexed memory expression. We want to
			//  normalize two different cases here: e.g. [esp+ebx+4] will become [esp+ebx-24]
			//  and [ebp+ebx-8] will become [esp+ebx-12] after normalization. A wrinkle
			//  on the second case is when the base register and index register are swapped
			//  in the SIB byte, and we make [ebx+ebp-4] into [esp+ebx-12], which involves
			//  correcting the index/base reg order in the SIB, because an index reg of ESP
			//  is the SIB encoding for "no index register" and we cannot leave it like that.
			int BaseReg = sib_base(DefOp);
			int IndexReg = (int) sib_index(DefOp);
			if (X86_STACK_POINTER_REG == IndexReg) // signifies no index register
				IndexReg = R_none;
			if (BaseReg == X86_STACK_POINTER_REG) {
				// We probably have an indexed ESP-relative operand.
				//  We leave the sib byte alone and normalize the offset.
				NormalizedDelta = this->GetStackPtrOffset() + (sval_t) SignedOffset;
			}
			else {
				// Must be EBP-relative.
				NormalizedDelta = FPDelta + (sval_t) SignedOffset;
				// Unfortunately, when we are dealing with a SIB byte in the opcode, we cannot
				//  just say DefOp.reg = MD_STACK_POINTER_REG to convert from the frame pointer
				//  to the stack pointer. Instead, we have to get into the nasty machine code
				//  level and change the SIB bits that specify either the base register or the
				//  index register, whichever one is the frame pointer.
				if (BaseReg == X86_FRAME_POINTER_REG) {
					// The three least significant bits of the SIB byte are the base register.
					//  They must contain a 5, which is the x86 value for register EBP, and we
					//  want to convert it to a 4, denoting register ESP. We can just zero out
					//  the least significant bit to accomplish that.
					DefOp.sib &= 0xfe;
				}
				else {
					// We sometimes have an instruction in which the frame pointer is used as
					//  the "index" register in the SIB byte, and the true index register is
					//  in the "base" register position in the SIB byte.
					assert(IndexReg == X86_FRAME_POINTER_REG);
					// The true index reg is in the lowest three bits, while the next three
					//  bits must contain a 5 (register EBP) and we want to make them a 4 (ESP).
					//  We must swap base and index regs as we normalize (see explanation above).
					char SIBtemp = DefOp.sib;
					char SIBindex = SIBtemp & 0x38;
					char SIBbase = SIBtemp & 0x07;
					assert ((SIBindex >> 3) == 5); // must be EBP
					SIBtemp &= 0xa0; // zero out lower 6 bits; upper 2 bits are scale factor - leave them alone
					SIBtemp &= (SIBbase << 3); // make old base reg (e.g. ebx) into a proper index reg
					SIBtemp |= 0x04; // make the new base reg be 4 (reg ESP)
					DefOp.sib = SIBtemp;
				}
				this->SetFPNormalizedToSP();
				// Add the stack pointer to the USE set for the instruction.
				this->MDAddRegUse(X86_STACK_POINTER_REG, false);
			}
		}

		else if (DefOp.reg == MD_FRAME_POINTER_REG) {
			// If FPDelta is -4 and SignedOffset is +8, then we have [ebp+8] as DefOp, and this
			//  is equivalent to [esp+4] where esp has its entry value, i.e. this would be the first incoming
			//  argument. If SignedOffset is -12, we have [ebp-12] as DefOp, and this is [esp-16] when
			//  normalized to the entry point value of the stack pointer. In both cases, we can see that the
			//  normalized stack delta is just FPDelta+SignedOffset.
			NormalizedDelta = FPDelta + (sval_t) SignedOffset;
			// Now, we simply convert the memory operand from EBP to ESP and replace the SignedOffset with the
			//  NormalizedDelta just computed.
			DefOp.reg = MD_STACK_POINTER_REG;
			this->SetFPNormalizedToSP();
			// Add the stack pointer to the USE set for the instruction.
			this->MDAddRegUse(DefOp.reg, false);