Skip to content
Snippets Groups Projects
SMPInstr.cpp 368 KiB
Newer Older
// MACHINE DEPENDENT: Is instruction a boolean set based on an unsigned condition?
bool SMPInstr::MDIsUnsignedSetValue(void) const {
	unsigned short opcode = this->SMPcmd.itype;
	return ((NN_seta == opcode) || (NN_setae == opcode) || (NN_setb == opcode) || (NN_setbe == opcode)
		|| (NN_setna == opcode) || (NN_setnae == opcode) || (NN_setnb == opcode) || (NN_setnbe == opcode));
}

// MACHINE DEPENDENT: Is instruction a boolean set based on a signed condition?
bool SMPInstr::MDIsSignedSetValue(void) const {
	unsigned short opcode = this->SMPcmd.itype;
	return ((NN_setg == opcode) || (NN_setge == opcode) || (NN_setl == opcode) || (NN_setle == opcode)
		|| (NN_setng == opcode) || (NN_setnge == opcode) || (NN_setnl == opcode) || (NN_setnle == opcode)
		|| (NN_sets == opcode) || (NN_setns == opcode));
}

// MACHINE DEPENDENT: Does instruction use a callee-saved register?
bool SMPInstr::MDUsesCalleeSavedReg(void) {
	set<DefOrUse, LessDefUse>::iterator CurrUse;
	for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) {
		op_t CurrOp = CurrUse->GetOp();
		if (CurrOp.is_reg(R_bp) || CurrOp.is_reg(R_si)
			|| CurrOp.is_reg(R_di) || CurrOp.is_reg(R_bx)) {
			return true;
		}
	}
	return false;
} // end of SMPInstr::MDUsesCalleeSavedReg()

// Is the instruction a register to register copy of a stack pointer or frame pointer
//  into a general purpose register (which mmStrata will now need to track as a stack 
//  relative pointer)?
bool SMPInstr::MDIsStackPointerCopy(bool UseFP) {
	// OptType 3 indicates a move instruction
	// The lea instruction can perform three operand arithmetic, e.g.
	//  lea ebx,[esp+12] is just ebx:=esp+12, so it is a stack pointer copy.
	if (((this->OptType == 3) || (NN_lea == this->SMPcmd.itype))
		&& (this->GetFirstDef()->GetOp().type == o_reg)
		&& (!(this->GetFirstDef()->GetOp().is_reg(R_sp)))
		&& (!(this->HasSourceMemoryOperand()))) { // reg to reg move
			if (this->GetFirstUse()->GetOp().is_reg(R_bp))
				// Move of base pointer EBP into a general register
				return true;
			else if ((this->GetFirstUse()->GetOp().is_reg(R_sp))
				&& !(this->GetFirstDef()->GetOp().is_reg(R_bp)))
				// Move of ESP into something besides a base pointer
				return true;
		}
		else if (this->GetFirstUse()->GetOp().is_reg(R_sp)) {
			// Move of ESP into a register; no base pointer used in this function
			return true;
		}
	}
	return false;
} // end of SMPInstr::MDIsStackPointerCopy()

// If call instruction is to malloc(), set the DEF register EAX type to
//  HEAPPTR and return true.
bool SMPInstr::MDFindMallocCall(op_t TargetOp) {
	bool changed = false;
	func_t *TargetFunc = get_func(TargetOp.addr);
	if (TargetFunc) {
		char FuncName[MAXSTR];
		get_func_name(TargetFunc->startEA, FuncName, sizeof(FuncName) - 1);
		if (0 == strcmp("malloc", FuncName)) {
			// NOTE: Some compilers might call it __malloc ; make this more robust !!!
#if SMP_VERBOSE_FIND_POINTERS
			SMP_msg("Found call to malloc at %x\n", this->addr);
#endif
			op_t SearchOp = InitOp;
			SearchOp.type = o_reg;
			SearchOp.reg = R_ax;
			set<DefOrUse, LessDefUse>::iterator EAXDEF;
			EAXDEF = this->SetDefType(SearchOp, HEAPPTR);
			int SSANum = EAXDEF->GetSSANum();
			changed = true;
			if (this->BasicBlock->IsLocalName(SearchOp)) {
				(void) this->BasicBlock->PropagateLocalDefType(SearchOp, HEAPPTR,
						this->GetAddr(), SSANum, false);
			}
			else { // global name
				this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false
				(void) this->BasicBlock->PropagateGlobalDefType(SearchOp, HEAPPTR,
						SSANum, false);
			}
		} // end if "malloc"
	} // end if (TargetFunc)
	return changed;
} // end of SMPInstr::MDFindMallocCall()

// Is instruction a branch (conditional or unconditional) to a
//  code target that is not in the current chunk?
bool SMPInstr::IsBranchToFarChunk(void) {
	if (this->IsFarBranchComputed()) { // answer is cached
		return this->IsBranchesToFarChunk();
	func_t *CurrChunk = get_fchunk(this->address);
	bool FarBranch = false;
	if ((JUMP | COND_BRANCH) & this->GetDataFlowType()) {
		// Instruction is a direct branch, conditional or unconditional
		if (this->NumUses() > 0) {
			set<DefOrUse, LessDefUse>::iterator CurrUse;
			for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) {
				op_t JumpTarget = CurrUse->GetOp();
				if ((o_near == JumpTarget.type) || (o_far == JumpTarget.type)) {
					// Branches to a code address
					func_t *TargetChunk = get_fchunk(JumpTarget.addr);
					// Is target address within the same chunk as the branch?
					FarBranch = (NULL == TargetChunk) || (CurrChunk->startEA != TargetChunk->startEA);
					if (FarBranch) {
						this->FarBranchTarget = JumpTarget.addr;
					}
	if (FarBranch) {
		this->SetBranchesToFarChunk();
	}
	this->SetFarBranchComputed();
	return FarBranch;
} // end of SMPInstr::IsBranchToFarChunk()

set<DefOrUse, LessDefUse>::iterator SMPInstr::SetUseSSA(op_t CurrOp, int SSASub) {
	return this->Uses.SetSSANum(CurrOp, SSASub);
};

set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefSSA(op_t CurrOp, int SSASub) {
	return this->Defs.SetSSANum(CurrOp, SSASub);
};

set<DefOrUse, LessDefUse>::iterator SMPInstr::SetUseType(op_t CurrOp, SMPOperandType CurrType) {
	return this->Uses.SetType(CurrOp, CurrType, this);
};

set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefType(op_t CurrOp, SMPOperandType CurrType) {
	return this->Defs.SetType(CurrOp, CurrType, this);
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefMetadata(op_t CurrOp, SMPMetadataType Status) {
	return this->Defs.SetMetadata(CurrOp, Status);
};

set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefIndWrite(op_t CurrOp, bool IndWriteFlag) {
	return this->Defs.SetIndWrite(CurrOp, IndWriteFlag);
};

set<DefOrUse, LessDefUse>::iterator SMPInstr::SetUseNoTruncate(op_t CurrOp, bool NoTruncFlag) {
	return this->Uses.SetNoTruncation(CurrOp, NoTruncFlag);
};

set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefNoOverflow(op_t DefOp, bool NoOverflowFlag) {
	return this->Defs.SetNoOverflow(DefOp, NoOverflowFlag);
};

// Analyze the instruction and its operands.
void SMPInstr::Analyze(void) {
	bool DebugFlag = false;
	if (0x8049b00 == this->address) {
		// Setting up breakpoint line.
		DebugFlag = true;
	}

	// Fill cmd structure with disassembly of instr
	if (!SMPGetCmd(this->address, this->SMPcmd, this->features))

	// Record what type of instruction this is, simplified for the needs
	//  of data flow and type analysis.
	this->type = DFACategory[this->SMPcmd.itype];
	// Record optimization category.
	this->OptType = OptCategory[this->SMPcmd.itype];
	if ((NN_int == this->SMPcmd.itype) || (NN_into == this->SMPcmd.itype) || (NN_int3 == this->SMPcmd.itype)) {
		this->SetInterrupt();
	}
	else {
		this->ResetInterrupt();
	}
clc5q's avatar
clc5q committed
	// See if instruction is an ASM idiom for clearing a register.
	if (NN_xor == this->SMPcmd.itype) {
		ushort FirstReg;
		if (o_reg == this->SMPcmd.Operands[0].type) {
			FirstReg = this->SMPcmd.Operands[0].reg;
			if (this->SMPcmd.Operands[1].is_reg(FirstReg))
				this->SetRegClearIdiom();
	// See if instruction is simple nop or ASM idiom for nop.
	if (this->MDIsNop()) {
		this->SetNop();
	}
	// Build the DEF and USE lists for the instruction.
	this->BuildSMPDefUseLists();

	// Determine whether the instruction is a jump target by looking
	//  at its cross references and seeing if it has "TO" code xrefs.
	SMP_xref_t xrefs;
	for (bool ok = xrefs.SMP_first_to(this->address, XREF_FAR); ok; ok = xrefs.SMP_next_to()) {
		if ((xrefs.GetFrom() != 0) && (xrefs.GetIscode())) {
			this->SetJumpTarget();
	// If instruction is a call or indirect call, see if a call target has been recorded
	//  by IDA Pro.
	if (this->GetDataFlowType() == INDIR_CALL) {
		for (bool ok = xrefs.SMP_first_from(this->address, XREF_ALL);
			ok = xrefs.SMP_next_from()) {
			if ((xrefs.GetTo() != 0) && (xrefs.GetIscode())) {
				// Found a code target, with its address in CurrXrefs.to
				if (xrefs.GetTo() == (this->address + this->GetCmd().size)) {
					// A call instruction will have two targets: the fall through to the
					//  next instruction, and the called function. We want to find
					//  the called function.
					continue;
				}
				// We found a target, not the fall-through.
				this->CallTarget = xrefs.GetTo();
				SMP_msg("Found indirect call target %x at %x\n",
					xrefs.GetTo(), this->address);
				break;
			}
		} // end for all code xrefs
		if (BADADDR == this->CallTarget) {
			SMP_msg("WARNING: Did not find indirect call target at %x\n",
				this->address);
		}
	} // end if INDIR_CALL
	else if (this->GetDataFlowType() == CALL) {
		set<DefOrUse, LessDefUse>::iterator CurrUse;
		for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) {
			optype_t OpType = CurrUse->GetOp().type;
			if ((OpType == o_near) || (OpType == o_far)) {
				this->CallTarget = CurrUse->GetOp().addr;
			}
		}
		if (BADADDR == this->CallTarget) {
			SMP_msg("ERROR: Target not found for direct call at %x\n", this->address);
		SMP_msg("Analyzed debug instruction at %x\n", this->address);
	return;
} // end of SMPInstr::Analyze()

// Analyze the floating point NOP marker instruction at the top of the function.
void SMPInstr::AnalyzeMarker(void) {
	// Fill member variable SMPcmd structure with disassembly of instr
	(void) memset(&(this->SMPcmd), 0, sizeof(this->SMPcmd));
	this->SMPcmd.itype = NN_fnop;
	this->SMPcmd.size = 1;
	this->SMPcmd.ea = this->address;
	// Set the instr disassembly text.
	DisAsmText.SetMarkerInstText(this->GetAddr());

	// Record what type of instruction this is, simplified for the needs
	//  of data flow and type analysis.
	this->type = DFACategory[this->SMPcmd.itype];
	// Record optimization category.
	this->OptType = OptCategory[this->SMPcmd.itype];

	return;
} // end of SMPInstr::AnalyzeMarker()

// Detect oddities of call instructions, such as pseudo-calls that are
//  actually jumps within a function
void SMPInstr::AnalyzeCallInst(ea_t FirstFuncAddr, ea_t LastFuncAddr) {
	if (BADADDR != this->CallTarget) {
		if ((this->CallTarget > FirstFuncAddr)
				&& (this->CallTarget <= LastFuncAddr)) {
			this->SetCallUsedAsJump();
		}
		else {
			this->ResetCallUsedAsJump();
		}
		if (this->CallTarget == FirstFuncAddr) {
			this->SetDirectRecursiveCall();
		}
		else {
			this->ResetDirectRecursiveCall();
		}
		if (this->IsCallUsedAsJump())
// Find USE-not-DEF operand that is not the flags register.
op_t SMPInstr::GetSourceOnlyOperand(void) {
	size_t OpNum;
	for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
		if (this->features & DefMacros[OpNum]) { // DEF
			;
		}
		else if (this->features & UseMacros[OpNum]) { // USE
			op_t CurrOp = this->SMPcmd.Operands[OpNum];
			if (!(CurrOp.is_reg(X86_FLAGS_REG))) {
				return CurrOp;
			}
		}
	}
clc5q's avatar
clc5q committed
	// It is expected that increment, decrement, and floating point stores
	//  will not have a USE-only operand. Increment and decrement have an
	//  operand that is both USEd and DEFed, while the floating point stack
	//  registers are implicit in most floating point opcodes. Also, exchange
	//  and exchange-and-add instructions have multiple DEF-and-USE operands.
	int TypeGroup = SMPTypeCategory[this->SMPcmd.itype];
	if ((TypeGroup != 2) && (TypeGroup != 4) && (TypeGroup != 9) && (TypeGroup != 12)
		&& (TypeGroup != 13)) {
		SMP_msg("ERROR: Could not find source only operand at %x in %s\n",
			this->address, DisAsmText.GetDisAsm(this->GetAddr()));
clc5q's avatar
clc5q committed
	}
} // end of SMPInstr::GetSourceOnlyOperand()

// Should apparent memory operands be ignored? e.g. lea opcode on x86
bool SMPInstr::MDIgnoreMemOps(void) {
	bool leaInst = (NN_lea == this->SMPcmd.itype);
	return leaInst;
}

// Find memory DEFs and USEs, store in DEFMemOp and USEMemOp
void SMPInstr::FindMemOps(void) {
	size_t OpNum;

	if (!(this->MDIgnoreMemOps())) {
		for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
			op_t TempOp = this->SMPcmd.Operands[OpNum];
			if ((TempOp.type >= o_mem) && (TempOp.type <= o_displ)) { // memory
				if (this->features & DefMacros[OpNum]) { // DEF
					if (this->DEFMemOp.type == o_void) { // only save first mem DEF
						this->DEFMemOp = TempOp;
					}
				}
				if (this->features & UseMacros[OpNum]) { // USE
					if (this->USEMemOp.type == o_void) { // only save first mem USE
						this->USEMemOp = TempOp;
					}
				}
			}
		} // end for (OpNum = 0; ...)
	}
	this->SetMemOpsFound();
	return;
} // end of SMPInstr::FindMemOps()

// Fill the Defs and Uses private data members.
void SMPInstr::BuildSMPDefUseLists(void) {
	size_t OpNum;
	bool DebugFlag = (0x8049b00 == this->GetAddr());
	bool WidthDoubler = this->MDDoublesWidth();
	this->Defs.clear();
	this->Uses.clear();

	// Start with the Defs.
	for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
		if (this->features & DefMacros[OpNum]) { // DEF
			op_t TempOp = this->SMPcmd.Operands[OpNum];
			if (WidthDoubler) {
				// Opcodes that sign-extend a byte to a word, or a word to a dword, 
				//  have only one operand. It is implicit, and it is the shorter USE.
				//  That means the DEF will have the same width as the USE, e.g. if
				//  we are sign-extending AX to EAX, the USE and DEF both be AX without
				//  a special fix. We fix this problem with the DEF operand now.
				if (TempOp.dtyp == dt_byte) {
					TempOp.dtyp = dt_word;
					TempOp.reg = MDCanonicalizeSubReg(TempOp.reg);
				}
				else if (TempOp.dtyp == dt_word) {
					TempOp.dtyp = dt_dword;
					TempOp.reg = MDCanonicalizeSubReg(TempOp.reg);
				}
				else if (TempOp.dtyp == dt_dword) {
					TempOp.dtyp = dt_qword;
				}
				else {
					SMP_msg("ERROR: Instruction operand %zu not 1,2, or 4 bytes at %x dtyp: %d\n", 
			if (MDKnownOperandType(TempOp)) {
				if (DebugFlag) {
					PrintOperand(TempOp);
				if (o_reg == TempOp.type) {
					// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
					//  analysis and type inference systems.
					TempOp.reg = MDCanonicalizeSubReg(TempOp.reg);
				}
		}
	} // end for (OpNum = 0; ...)

	if (this->IsRegClearIdiom()) {
clc5q's avatar
clc5q committed
		// Something like xor eax,eax clears eax but does not really
		//  use eax. It is the same as mov eax,0 and we don't want to
		//  extend the prior def-use chain for eax to this instruction
		//  by treating the instruction as xor eax,eax. Instead, we
		//  build the DEF and USE lists and RTL as if it were mov eax,0.
clc5q's avatar
clc5q committed
		ImmOp.type = o_imm;
clc5q's avatar
clc5q committed
		this->Uses.SetRef(ImmOp, NUMERIC);
		return;
	}

	// Now, do the Uses. Uses have special case operations, because
	//  any memory operand could have register uses in the addressing
	//  expression, and we must create Uses for those registers. For
	//  example:  mov eax,[ebx + esi*2 + 044Ch]
	//  This is a two-operand instruction with one def: eax. But
	//  there are three uses: [ebx + esi*2 + 044Ch], ebx, and esi.
	//  The first use is an op_t of type o_phrase (memory phrase),
	//  which can be copied from cmd.Operands[1]. Likewise, we just
	//  copy cmd.Operands[0] into the defs list. However, we must create
	//  op_t types for register ebx and register esi and append them
	//  to the Uses list. This is handled by the machine dependent
	//  method MDFixupDefUseLists().
	for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
		if (this->features & UseMacros[OpNum]) { // USE
			op_t TempOp = this->SMPcmd.Operands[OpNum];
			if (MDKnownOperandType(TempOp)) {
				if (DebugFlag) {
					PrintOperand(TempOp);
				if (o_reg == TempOp.type) {
					// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
					//  analysis and type inference systems.
					TempOp.reg = MDCanonicalizeSubReg(TempOp.reg);
				}
				this->Uses.SetRef(TempOp);
		}
	} // end for (OpNum = 0; ...)

	return;
} // end of SMPInstr::BuildSMPDefUseLists()

// If DefReg is not already in the DEF list, add a DEF for it.
clc5q's avatar
clc5q committed
void SMPInstr::MDAddRegDef(ushort DefReg, bool Shown, SMPOperandType Type) {
	TempDef.type = o_reg;
	TempDef.reg = DefReg;
	if (Shown)
		TempDef.set_showed();
	else
		TempDef.clr_showed();
clc5q's avatar
clc5q committed
	this->Defs.SetRef(TempDef, Type);
	return;
} // end of SMPInstr::MDAddRegDef()

// If UseReg is not already in the USE list, add a USE for it.
clc5q's avatar
clc5q committed
void SMPInstr::MDAddRegUse(ushort UseReg, bool Shown, SMPOperandType Type) {
	TempUse.type = o_reg;
	TempUse.reg = UseReg;
	if (Shown)
		TempUse.set_showed();
	else
		TempUse.clr_showed();
clc5q's avatar
clc5q committed
	this->Uses.SetRef(TempUse, Type);
	return;
} // end of SMPInstr::MDAddRegUse()

// Perform machine dependent ad hoc fixes to the def and use lists.
//  For example, some multiply and divide instructions in x86 implicitly
//  use and/or define register EDX. For memory phrase examples, see comment
//  in BuildSMPDefUseLists().
void SMPInstr::MDFixupDefUseLists(void) {
	// First, handle the uses hidden in memory addressing modes. Note that we do not
	//  care whether we are dealing with a memory destination operand or source
	//  operand, because register USEs, not DEFs, happen within the addressing expressions.
	size_t OpNum;
	SMPOperandType RefType;
	int BaseReg;
	int IndexReg;
	ushort ScaleFactor;
	ea_t displacement;
	bool UseFP = true;
	bool HasIndexReg = false;
	bool SingleAddressReg = false;
	bool leaInst = (NN_lea == this->SMPcmd.itype);
	bool DebugFlag = (this->GetAddr() == 0x8086177);
	if (DebugFlag) {
		SMP_msg("DEBUG: Fixing up DEF-USE lists for debug location\n");
#if SMP_BASEREG_POINTER_TYPE
	// Some instructions are analyzed outside of any function or block when fixing up
	//  the IDB, so we have to assume the block and func pointers might be NULL.
	if ((NULL != this->BasicBlock) && (NULL != this->BasicBlock->GetFunc()))
		UseFP = this->BasicBlock->GetFunc()->UsesFramePointer();
#endif

	for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
		op_t Opnd = SMPcmd.Operands[OpNum];
		if ((Opnd.type == o_phrase) || (Opnd.type == o_displ) || (Opnd.type == o_mem)) {
			MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, displacement);
			SingleAddressReg = ((0 == displacement) 
				&& ((R_none == BaseReg) || (R_none == IndexReg)));
			if (R_none != IndexReg) { 
				op_t IndexOpnd = Opnd; // Init to current operand field values
				IndexOpnd.type = o_reg; // Change type and reg fields
				IndexOpnd.reg = (ushort) IndexReg;
				IndexOpnd.hasSIB = 0;
				IndexOpnd.set_showed();
				// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
				//  analysis and type inference systems.
				IndexOpnd.reg = MDCanonicalizeSubReg(IndexOpnd.reg);
				if (0 == ScaleFactor)
					this->Uses.SetRef(IndexOpnd);
				else { // scaling == shift ==> NUMERIC
					HasIndexReg = true;
					this->Uses.SetRef(IndexOpnd, NUMERIC);
			if (R_none != BaseReg) {
				op_t BaseOpnd = Opnd; // Init to current operand field values
				BaseOpnd.type = o_reg; // Change type and reg fields
				BaseOpnd.reg = (ushort) BaseReg;
				BaseOpnd.hasSIB = 0;
				// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
				//  analysis and type inference systems.
				BaseOpnd.reg = MDCanonicalizeSubReg(BaseOpnd.reg);
				RefType = UNINIT;
#if SMP_BASEREG_POINTER_TYPE
				// R_sp and R_bp will get type STACKPTR in SMPInstr::SetImmedTypes().
				//  Other registers used as base registers should get their USEs as
				//  base registers typed as POINTER, which might get refined later
				//  to STACKPTR, GLOBALPTR, HEAPPTR, etc.
				// NOTE: the NN_lea opcode is often used without a true base register.
				//  E.g. lea eax,[eax+eax+5] is an x86 idiom for eax:=eax*2+5, which
				//  could not be done in one instruction without using the addressing
				//  modes of the machine to do the arithmetic. We don't want to set the
				//  USE of EAX to POINTER in this case, so we will conservatively skip
				//  all lea instructions here.
				// We cannot be sure that a register is truly a base register unless
				//  there is also an index register. E.g. with reg+displacement, we
				//  could have memaddr+indexreg or basereg+offset, depending on what
				//  the displacement is. The exception is if there is no offset and only
				//  one addressing register, e.g. mov eax,[ebx].
				if (BaseOpnd.is_reg(R_sp) || (UseFP && BaseOpnd.is_reg(R_bp))
					|| leaInst || (!HasIndexReg && !SingleAddressReg)) {
#endif
				this->Uses.SetRef(BaseOpnd, RefType);
			} // end if R_none != BaseReg
		} // end if (o_phrase or o_displ operand)
	} // end for (all operands)

	// The lea (load effective address) instruction looks as if it has
	//  a memory USE:  lea ebx,[edx+esi]
	//  However, this instruction is really just: ebx := edx+esi
	//  Now that the above code has inserted the "addressing" registers
	//  into the USE list, we should remove the "memory USE".
	if (leaInst) {
		set<DefOrUse, LessDefUse>::iterator CurrUse;
		for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) {
			op_t UseOp = CurrUse->GetOp();
			if ((o_mem <= UseOp.type) && (o_displ >= UseOp.type)) {
				this->LeaUSEMemOp = UseOp;
	// Next, handle repeat prefices in the instructions. The Intel REPE/REPZ prefix
	//  is just the text printed for SCAS/CMPS instructions that have a REP prefix.
	//  Only two distinct prefix codes are actually defined: REP and REPNE/REPNZ, and
	//  REPNE/REPNZ only applies to SCAS and CMPS instructions.
	bool HasRepPrefix = (0 != (this->SMPcmd.auxpref & aux_rep));
	bool HasRepnePrefix = (0 != (this->SMPcmd.auxpref & aux_repne));
	if (HasRepPrefix && HasRepnePrefix)
		SMP_msg("REP and REPNE both present at %x %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr()));
	if (HasRepPrefix || HasRepnePrefix) {
		// All repeating instructions use ECX as the countdown register.
		BaseOpnd.type = o_reg; // Change type and reg fields
		BaseOpnd.reg = R_cx;
		BaseOpnd.clr_showed();
clc5q's avatar
clc5q committed
		this->Defs.SetRef(BaseOpnd, NUMERIC);
		this->Uses.SetRef(BaseOpnd, NUMERIC);
	if ((this->SMPcmd.itype == NN_cmps) || (this->SMPcmd.itype == NN_scas)
		|| (this->SMPcmd.itype == NN_movs) || (this->SMPcmd.itype == NN_stos)) {
		// ESI and EDI are USEd and DEFed to point to source and dest strings for CMPS/MOVS.
		//  Only EDI is involved with SCAS/STOS.
		BaseOpnd.type = o_reg; // Change type and reg fields
		BaseOpnd.clr_showed();
		if ((this->SMPcmd.itype == NN_cmps) || (this->SMPcmd.itype == NN_movs)) {
clc5q's avatar
clc5q committed
			this->Defs.SetRef(BaseOpnd, POINTER);
			this->Uses.SetRef(BaseOpnd, POINTER);
clc5q's avatar
clc5q committed
		this->Defs.SetRef(BaseOpnd, POINTER);
		this->Uses.SetRef(BaseOpnd, POINTER);
	// Now, handle special instruction categories that have implicit operands.
	if (NN_cmpxchg == this->SMPcmd.itype) {
		// x86 Compare and Exchange conditionally sets EAX. We must keep data flow analysis
		//  sound by declaring that EAX is always a DEF.
		this->MDAddRegDef(R_ax, false);
	} // end if NN_cmpxchg
	else if (this->MDIsPopInstr() || this->MDIsPushInstr() || this->MDIsReturnInstr()) {
		// IDA does not include the stack pointer in the DEFs or USEs.
		this->MDAddRegDef(R_sp, false);
		this->MDAddRegUse(R_sp, false);
		if (!this->MDIsReturnInstr()) {
			// We always reference [esp+0] or [esp-4], so add it to the DEF or USE list.
			StackOp.type = o_displ;
			StackOp.reg = R_sp;
			if (this->MDIsPopInstr()) {
				StackOp.addr = 0;  // [ESP+0]
				this->Uses.SetRef(StackOp);  // USE
			}
			else {
				StackOp.addr = (ea_t) -4;  // [ESP-4]
				this->Defs.SetRef(StackOp); // DEF
			}
#if SMP_CALL_TRASHES_REGS
	else if ((this->type == CALL) || (this->type == INDIR_CALL)) {
		// We want to add the caller-saved registers to the USEs and DEFs lists
		this->MDAddRegDef(R_ax, false);
		this->MDAddRegDef(R_cx, false);
		this->MDAddRegDef(R_dx, false);
		this->MDAddRegUse(R_ax, false);
		this->MDAddRegUse(R_cx, false);
		this->MDAddRegUse(R_dx, false);
#if 1
			this->MDAddRegDef(R_bx, false);
			this->MDAddRegUse(R_bx, false);
			this->MDAddRegDef(R_si, false);
			this->MDAddRegUse(R_si, false);
	else if (this->MDIsEnterInstr() || this->MDIsLeaveInstr()) {
		// Entire function prologue or epilogue microcoded.
		this->MDAddRegDef(R_sp, false);
		this->MDAddRegUse(R_sp, false);
		this->MDAddRegDef(R_bp, false);
		this->MDAddRegUse(R_bp, false);
	else if ((this->SMPcmd.itype == NN_maskmovq)
			|| (this->SMPcmd.itype == NN_maskmovdqu)) {
clc5q's avatar
clc5q committed
		this->MDAddRegUse(R_di, false, POINTER);
	else if (8 == this->GetOptType()) {
		// This category implicitly writes to EDX:EAX.
		this->MDAddRegDef(R_dx, false);
		this->MDAddRegDef(R_ax, false);
	} // end else if (8 == GetOptType)
	else if (7 == this->GetOptType()) {
		// Category 7 instructions sometimes write implicitly to EDX:EAX or DX:AX.
		//  DX is the same as EDX to IDA Pro (and SMP); ditto for EAX and AX.
		// DIV, IDIV, and MUL all have hidden EAX or AX operands (hidden in the IDA Pro
		//  sense, because they are not displayed in the disassembly text). For example:
		//  mul ebx means EDX:EAX <-- EAX*EBX, and mul bx means DX:AX <-- AX*BX. If the
		//  source operand is only 8 bits wide, there is room to hold the result in AX
		//  without using DX:  mul bl means AX <-- AL*BL.
		// IMUL has forms with a hidden EAX or AX operand and forms with no implicit
		//  operands:  imul ebx means EDX:EAX <-- EAX*EBX, but imul ebx,edx means that
		//  EBX*EDX gets truncated and the result placed in EBX (no hidden operands).
		for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
			op_t TempUse = this->SMPcmd.Operands[OpNum];
			if (!TempUse.showed()) { // hidden operand
				if (TempUse.is_reg(R_ax)) { // not R_al, so it is not 8 bits
					if ((NN_div == this->SMPcmd.itype) || (NN_idiv == this->SMPcmd.itype)) {
						this->MDAddRegUse(R_dx, false);
					}
					this->MDAddRegDef(R_ax, false);
					this->MDAddRegDef(R_dx, false);
				}
			}
		}
	} // end else if (7 == OptType)
#if 0
	// The floating point instructions in type categories 14 and 15 often USE and DEF
	//  the floating point register stack, e.g. pushing a value onto that stack is a
	//  massive copy downward of stack locations. We don't really care about the USE of
	//  the stack if the value being pushed came from elsewhere than the stack. For example,
	//  an "fld" opcode pushes its source onto the stack. We build RTLs with a simple
	//  move structure, but the RTL building can be fooled by seeing two "source" operands
	//  in the USE list.
	if ((14 == SMPTypeCategory[this->SMPcmd.itype])
		|| (15 == SMPTypeCategory[this->SMPcmd.itype])) {
	}
#endif

#if 0  // Not true for LOOP instructions that use only the ECX counter register.
	if (this->type == COND_BRANCH) {
		assert(SMPUsesFlags[this->SMPcmd.itype]);
	}
	// The return value register EAX is not quite like a caller-save or callee-save
	//  register (technically, it is caller-save). Within a callee, it might appear
	//  that EAX has become dead by the time a return instruction is reached, but
	//  the USE that would make it not dead is in the caller. To prevent type inference
	//  from mistakenly thinking that all USEs of EAX have been seen in the callee,
	//  we add EAX to the USE list for all return instructions, as well as for all
	//  tail calls, which are essentially returns in terms of data flow analysis.
	// This USE of EAX will always be of type UNINIT unless its DEF has a known type
	//  that propagates to it. Thus, it will prevent an invalid back inference of the
	//  DEF type from "all" USE types that are visible in the callee; even if they
	//  were all NUMERIC, this return USE will be UNINIT and inhibit the invalid
	//  type inference. EAX could be loaded with a pointer from memory, for example,
	//  and USEd only in a comparison instruction, making it falsely appear to be
	//  a NUMERIC, without this extra USE at the return instruction.
	// Because some of the library functions pass values around in EBX, EDI, etc.,
	//  we will add these general purpose registers to the USE list for returns
	//  in order to prevent erroneous analyses of dead registers or unused
	//  metadata.
	if ((this->type == RETURN) || this->IsTailCall()) {
		this->MDAddRegUse(R_bx, false);
		this->MDAddRegUse(R_cx, false);
		this->MDAddRegUse(R_dx, false);
		if (!UseFP)
			this->MDAddRegUse(R_bp, false);
		this->MDAddRegUse(R_si, false);
		this->MDAddRegUse(R_di, false);
	// Next, add the flags register to the DEFs and USEs for those instructions that
	//  are marked as defining or using flags.
	if (!this->IsDefsFlags() && SMPDefsFlags[this->SMPcmd.itype]) {
		this->MDAddRegDef(X86_FLAGS_REG, false);
		this->SetDefsFlags();
	if (!this->IsUsesFlags() && SMPUsesFlags[this->SMPcmd.itype]) {
		this->MDAddRegUse(X86_FLAGS_REG, false);
		this->SetUsesFlags();
		// Clear the DEFs and USEs for no-ops.
		//  These include machine idioms for no-ops, e.g. mov esi,esi
		//  or xchg ax,ax or lea esi,[esi].
		this->Defs.clear();
		this->Uses.clear();
		this->MoveSource = InitOp;
		this->OptType = 1;
	if (DebugFlag) {
		SMP_msg("DEBUG after MDFixupDefUseLists:\n");
		this->Dump();
	}
	return;
} // end of SMPInstr::MDFixupDefUseLists()

// If we can definitely identify which part of the addressing expression
//  used in MemOp is the POINTER type, and it is not a STACKPTR or GLOBALPTR
//  immediate, set the USE type for that register to POINTER and return true.
//  If we can find definite NUMERIC addressing registers that are not already
//  typed as NUMERIC, set their USE types to NUMERIC and return true.
bool SMPInstr::MDFindPointerUse(op_t MemOp, bool UseFP) {
	bool changed = false;
	int BaseReg;
	int IndexReg;
	op_t BaseOp = InitOp;
	op_t IndexOp = InitOp;
	SMPOperandType BaseType = UNKNOWN;
	SMPOperandType IndexType = UNKNOWN;
	ushort ScaleFactor;
	ea_t offset;
	set<DefOrUse, LessDefUse>::iterator BaseIter;
	set<DefOrUse, LessDefUse>::iterator IndexIter;

	if (NN_lea == this->SMPcmd.itype)
		return false;  // lea instruction really has no memory operands
	if (NN_fnop == this->SMPcmd.itype)
		return false;  // SSA marker instruction

	MDExtractAddressFields(MemOp, BaseReg, IndexReg, ScaleFactor, offset);
	if (R_none != IndexReg) {
		IndexOp.type = o_reg;
		IndexOp.reg = MDCanonicalizeSubReg((ushort) IndexReg);
		IndexIter = this->FindUse(IndexOp);
		assert(IndexIter != this->GetLastUse());
		IndexType = IndexIter->GetType();
	}
	if (R_none != BaseReg) {
		BaseOp.type = o_reg;
		BaseOp.reg = MDCanonicalizeSubReg((ushort) BaseReg);
		BaseIter = this->FindUse(BaseOp);
		assert(BaseIter != this->GetLastUse());
		BaseType = BaseIter->GetType();
	}
	if ((R_sp == BaseReg) || (UseFP && (R_bp == BaseReg))) {
		if ((R_none != IndexReg) && (!IsNumeric(IndexType))) {
			// We have an indexed access into the stack frame.
			//  Set IndexReg USE type to NUMERIC.
			changed = true;
			IndexIter = this->SetUseType(IndexOp, NUMERIC);
			assert(IndexIter != this->GetLastUse());
		}
		return changed; // stack accesses will get STACKPTR type in SetImmedTypes()
	}
	if ((R_sp == IndexReg) || (UseFP && (R_bp == IndexReg))) {
		if ((R_none != BaseReg) && (!IsNumeric(BaseType))) {
			// We have an indexed access into the stack frame.
			//  Set BaseReg USE type to NUMERIC.
			// Note that BaseReg is really an IndexReg and vice versa.
			changed = true;
			BaseIter = this->SetUseType(BaseOp, NUMERIC);
			assert(BaseIter != this->GetLastUse());
			SMP_msg("WARNING: BaseReg is index, IndexReg is base: %s\n",
				DisAsmText.GetDisAsm(this->GetAddr()));
		}
		return changed; // stack accesses will get STACKPTR type in SetImmedTypes()
	}
	if (IsImmedGlobalAddress(offset)) {
		if ((R_none != IndexReg) && (!IsNumeric(IndexType))) {
			// We have an indexed access into a global.
			//  Set IndexReg USE type to NUMERIC.
			changed = true;
			IndexIter = this->SetUseType(IndexOp, NUMERIC);
			assert(IndexIter != this->GetLastUse());
		}
		if ((R_none != BaseReg) && (!IsNumeric(BaseType))) {
			// We have an indexed access into a global.
			//  Set BaseReg USE type to NUMERIC.
			// Note that BaseReg is really an index register.
			changed = true;
			BaseIter = this->SetUseType(BaseOp, NUMERIC);
			assert(BaseIter != this->GetLastUse());
clc5q's avatar
clc5q committed
#if SMP_VERBOSE_FIND_POINTERS
			SMP_msg("WARNING: BaseReg used as index: %s\n", DisAsmText.GetDisAsm(this->GetAddr()));
clc5q's avatar
clc5q committed
#endif
		return changed;  // global immediate is handled in SetImmedTypes()
	// At this point, we must have a base address in a register, not used
	//  to directly address the stack or a global.
	if ((0 < ScaleFactor) || (R_none == IndexReg)) {
		// IndexReg is scaled, meaning it is NUMERIC, so BaseReg must
		//  be a POINTER; or IndexReg is not present, so BaseReg is the
		//  only possible holder of an address.
		if (R_none != BaseReg) {
			if (UNINIT == BaseIter->GetType()) {
				BaseIter = this->SetUseType(BaseOp, POINTER);
				assert(BaseIter != this->GetLastUse());
		}
	}
	else if (R_none == BaseReg) {
		// We have an unscaled IndexReg and no BaseReg and offset was
		//  not a global offset, so IndexReg must be a POINTER.
		if (R_none != IndexReg) {
			if (UNINIT == IndexType) {
				IndexIter = this->SetUseType(IndexOp, POINTER);
				assert(IndexIter != this->GetLastUse());
			}
		}
	}
	else { // We have BaseReg and an unscaled IndexReg.
		// The only hope for typing something like [ebx+edx] is for
		//  one register to already be typed NUMERIC, in which case
		//  the other one must be a POINTER, or if one register is
		//  already POINTER, then the other one must be NUMERIC.
		if (IsNumeric(BaseType)) {
			if (UNINIT == IndexType) {
				// Set to POINTER or PROF_POINTER
				changed = true;
				IndexIter = this->SetUseType(IndexOp, POINTER);
				assert(IndexIter != this->GetLastUse());
			}
			else if (IsNumeric(IndexType)) {
				SMP_msg("ERROR: BaseReg and IndexReg both NUMERIC at %x: %s\n",
					this->address, DisAsmText.GetDisAsm(this->GetAddr()));
			if (UNINIT == BaseType) { // BaseReg is UNINIT
				if (IsNumeric(IndexType)) {
					BaseIter = this->SetUseType(BaseOp, POINTER);
					assert(BaseIter != this->GetLastUse());
				}
				else if (IsDataPtr(IndexType)) {
					// IndexReg is POINTER, so make BaseReg NUMERIC.
					changed = true;
					BaseIter = this->SetUseType(BaseOp, NUMERIC);
					assert(BaseIter != this->GetLastUse());
				}
			}
			else if (IsDataPtr(BaseType)) {
				// BaseReg was a pointer type. IndexReg must be NUMERIC.
				if (UNINIT == IndexType) {
					changed = true;
					IndexIter = this->SetUseType(IndexOp, NUMERIC);
					assert(IndexIter != this->GetLastUse());
				}
				else if (IsDataPtr(IndexType)) {
					SMP_msg("ERROR: BaseReg and IndexReg both POINTER at %x: %s\n",
						this->address, DisAsmText.GetDisAsm(this->GetAddr()));
		}
	}

	return changed;
} // end of SMPInstr::MDFindPointerUse()

// Are all DEFs typed to something besides UNINIT?
bool SMPInstr::AllDEFsTyped(void) {
	if (this->AreDEFsTyped()) {
		return true;
	}
	bool FoundUNINIT = false;
	set<DefOrUse, LessDefUse>::iterator DefIter;
	for (DefIter = this->GetFirstDef(); DefIter != this->GetLastDef(); ++DefIter) {
		if (IsEqType(UNINIT, DefIter->GetType())) {
			FoundUNINIT = true;
			break;
		}
	}
	if (!FoundUNINIT) {
		this->SetDEFsTyped();
	}
	return (!FoundUNINIT);
} // end of SMPInstr::AllDEFsTyped()

// Are all USEs typed to something besides UNINIT?
bool SMPInstr::AllUSEsTyped(void) {
	if (this->AreUSEsTyped()) {
		return true;
	}
	bool FoundUNINIT = false;
	set<DefOrUse, LessDefUse>::iterator UseIter;
	for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) {
		if (IsEqType(UNINIT, UseIter->GetType())) {
			FoundUNINIT = true;
			break;
		}
	}
	if (!FoundUNINIT) {
		this->SetUSEsTyped();
	}
	return (!FoundUNINIT);
} // end of SMPInstr::AllUSEsTyped()

// UseOp is a USE reg, not just an address reg in a memory USE
bool SMPInstr::IsNonAddressReg(op_t UseOp) const { 
	bool FoundUse = false;
	ushort SearchReg = MDCanonicalizeSubReg(UseOp.reg);
	for (size_t OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
		op_t Opnd = this->SMPcmd.Operands[OpNum];
		if (this->features & UseMacros[OpNum]) { // USE
			if (Opnd.type == o_reg) {
				ushort TestReg = MDCanonicalizeSubReg(Opnd.reg);
				if (TestReg == SearchReg) {
					FoundUse = true;
					break;
				}