From c978f02acccbce2ab5e652ec8bb6066b52e8f549 Mon Sep 17 00:00:00 2001
From: clc5q <>
Date: Thu, 1 May 2008 14:06:49 +0000
Subject: [PATCH] Use new function MDExtractAddressFields to refactor code.

 SMPBasicBlock.cpp       |  30 +++-----
 SMPDataFlowAnalysis.cpp |  16 ++--
 SMPInstr.cpp            | 160 ++++++++++++++++------------------------
 3 files changed, 84 insertions(+), 122 deletions(-)

diff --git a/SMPBasicBlock.cpp b/SMPBasicBlock.cpp
index d0ce90f5..70f5e50b 100644
--- a/SMPBasicBlock.cpp
+++ b/SMPBasicBlock.cpp
@@ -281,30 +281,24 @@ bool SMPBasicBlock::MDAlreadyKilled(op_t Opnd1) const {
 			else {
 				// Should we add Opnd1 to the KillSet every time we return true below? **!!**
 				op_t TempOp;
-				if (Opnd1.hasSIB) {
-					int BaseReg = sib_base(Opnd1);
-					short IndexReg = sib_index(Opnd1);
-					TempOp.type = o_reg;
+				TempOp.type = o_reg;
+				int BaseReg;
+				int IndexReg;
+				ushort ScaleFactor;
+				ea_t displacement;
+				MDExtractAddressFields(Opnd1, BaseReg, IndexReg, ScaleFactor, displacement);
+				if (R_none != BaseReg) {
 					TempOp.reg = (ushort) BaseReg;
 					if (this->KillSet.end() != this->KillSet.find(TempOp))
 						return true;
-					if (R_sp != IndexReg) { // Cannot have ESP index reg in SIB
-						TempOp.reg = (ushort) IndexReg;
-						if (this->KillSet.end() != this->KillSet.find(TempOp))
-							return true;
-					}
-				else { // no SIB
-					ushort BaseReg;
-					if (Opnd1.type == o_phrase)
-						BaseReg = Opnd1.phrase;
-					else // o_displ
-						BaseReg = Opnd1.reg;
-					TempOp.type = o_reg;
-					TempOp.reg = BaseReg;
+				if (R_none != IndexReg) { // Cannot have ESP index reg in SIB
+					TempOp.reg = (ushort) IndexReg;
 					if (this->KillSet.end() != this->KillSet.find(TempOp))
 						return true;
-				} // end if SIB ... else ...
+				}
 			} // end if (FoundInKillSet) ... else ...
diff --git a/SMPDataFlowAnalysis.cpp b/SMPDataFlowAnalysis.cpp
index c8f5cb6a..6936181e 100644
--- a/SMPDataFlowAnalysis.cpp
+++ b/SMPDataFlowAnalysis.cpp
@@ -279,18 +279,20 @@ void PrintDefUse(ulong feature, int OpNum) {
 // DEBUG print SIB info for an operand.
 void PrintSIB(op_t Opnd) {
-	int BaseReg = sib_base(Opnd);
-	short IndexReg = sib_index(Opnd);
-	int ScaleFactor = sib_scale(Opnd);
+	int BaseReg;
+	int IndexReg;
+	ushort ScaleFactor;
+	ea_t offset;
 #define NAME_LEN 5
 	char BaseName[NAME_LEN] = {'N', 'o', 'n', 'e', '\0'};
 	char IndexName[NAME_LEN] = {'N', 'o', 'n', 'e', '\0'};
-#if 1
-	if (!((BaseReg == R_bp) && (Opnd.type == o_mem))) // EBP can be SIB code for NO BASE REG
+	MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, offset);
+	if (BaseReg != R_none)
 		qstrncpy(BaseName, RegNames[BaseReg], NAME_LEN - 1);
-	if (IndexReg != R_sp) { // SIB code for NO INDEX REG
+	if (IndexReg != R_none) {
 		qstrncpy(IndexName, RegNames[IndexReg], NAME_LEN -1);
 	msg(" Base %s Index %s Scale %d", BaseName, IndexName, ScaleFactor);
diff --git a/SMPInstr.cpp b/SMPInstr.cpp
index 9d0b9ef1..1fb0af47 100644
--- a/SMPInstr.cpp
+++ b/SMPInstr.cpp
@@ -772,6 +772,10 @@ void SMPInstr::MDFixupDefUseLists(void) {
 	//  operand, because register USEs, not DEFs, happen within the addressing expressions.
 	size_t OpNum;
 	SMPOperandType RefType;
+	int BaseReg;
+	int IndexReg;
+	ushort ScaleFactor;
+	ea_t displacement;
 	bool UseFP = true;
 	bool HasIndexReg = false;
 	bool leaInst = (NN_lea == this->SMPcmd.itype);
@@ -791,103 +795,58 @@ void SMPInstr::MDFixupDefUseLists(void) {
 	for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
 		op_t Opnd = SMPcmd.Operands[OpNum];
 		if ((Opnd.type == o_phrase) || (Opnd.type == o_displ) || (Opnd.type == o_mem)) {
-			if (Opnd.hasSIB) {
-				if (DebugFlag) {
-					msg("DEBUG: hasSIB\n");
-				}
-				int BaseReg = sib_base(Opnd);
-				short IndexReg = sib_index(Opnd);
-				if ((R_none != IndexReg) && (R_sp != IndexReg)) { 
-					op_t IndexOpnd = Opnd; // Init to current operand field values
-					IndexOpnd.type = o_reg; // Change type and reg fields
-					IndexOpnd.reg = IndexReg;
-					IndexOpnd.hasSIB = 0;
-					IndexOpnd.set_showed();
-					// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
-					//  analysis and type inference systems.
-					IndexOpnd.reg = MDCanonicalizeSubReg(IndexOpnd.reg);
-					if (0 == sib_scale(Opnd))
-						this->Uses.SetRef(IndexOpnd);
-					else {// scaling == shift ==> NUMERIC
-						HasIndexReg = true;
-						this->Uses.SetRef(IndexOpnd, NUMERIC);
-					}
-				}
-				if (R_none != BaseReg) {
-					op_t BaseOpnd = Opnd; // Init to current operand field values
-					BaseOpnd.type = o_reg; // Change type and reg fields
-					BaseOpnd.reg = BaseReg;
-					BaseOpnd.hasSIB = 0;
-					BaseOpnd.set_showed();
-					if (BaseOpnd.is_reg(R_bp) && (Opnd.type == o_mem)) {
-						if (DebugFlag) msg("EBP base reg ignored at %x\n", this->GetAddr());
-					}
-					else {
-						if (DebugFlag) msg("base reg %d not ignored at %x\n", BaseReg, this->GetAddr());
-						// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
-						//  analysis and type inference systems.
-						BaseOpnd.reg = MDCanonicalizeSubReg(BaseOpnd.reg);
-						RefType = UNINIT;
-						// R_sp and R_bp will get type STACKPTR in SMPInstr::SetImmedTypes().
-						//  Other registers used as base registers should get their USEs as
-						//  base registers typed as POINTER, which might get refined later
-						//  to STACKPTR, GLOBALPTR, HEAPPTR, etc.
-						// NOTE: the NN_lea opcode is often used without a true base register.
-						//  E.g. lea eax,[eax+eax+5] is an x86 idiom for eax:=eax*2+5, which
-						//  could not be done in one instruction without using the addressing
-						//  modes of the machine to do the arithmetic. We don't want to set the
-						//  USE of EAX to POINTER in this case, so we will conservatively skip
-						//  all lea instructions here.
-						// We cannot be sure that a register is truly a base register unless
-						//  there is also an index register. E.g. with reg+displacement, we
-						//  could have memaddr+indexreg or basereg+offset, depending on what
-						//  the displacement is.
-						if (BaseOpnd.is_reg(R_sp) || (UseFP && BaseOpnd.is_reg(R_bp))
-							|| leaInst || (!HasIndexReg)) {
-							;
-						}
-						else {
-							RefType = POINTER;
-						}
-						this->Uses.SetRef(BaseOpnd, RefType);
-					}
-				}
-				else {
-					msg("WARNING: R_none base register in SIB: %s\n", this->GetDisasm());
+			MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, displacement);
+			if (R_none != IndexReg) { 
+				op_t IndexOpnd = Opnd; // Init to current operand field values
+				IndexOpnd.type = o_reg; // Change type and reg fields
+				IndexOpnd.reg = IndexReg;
+				IndexOpnd.hasSIB = 0;
+				IndexOpnd.set_showed();
+				// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
+				//  analysis and type inference systems.
+				IndexOpnd.reg = MDCanonicalizeSubReg(IndexOpnd.reg);
+				if (0 == ScaleFactor)
+					this->Uses.SetRef(IndexOpnd);
+				else { // scaling == shift ==> NUMERIC
+					HasIndexReg = true;
+					this->Uses.SetRef(IndexOpnd, NUMERIC);
-			else { // no SIB byte; can have base reg but no index reg
-				ushort BaseReg = Opnd.reg;  // cannot be R_none for no SIB case
+			if (R_none != BaseReg) {
 				op_t BaseOpnd = Opnd; // Init to current operand field values
 				BaseOpnd.type = o_reg; // Change type and reg fields
 				BaseOpnd.reg = BaseReg;
 				BaseOpnd.hasSIB = 0;
-				if (Opnd.type == o_mem) {
-					if (!(BaseOpnd.is_reg(0) || BaseOpnd.is_reg(R_bp))) {
-						msg("base reg %d ignored at %x\n", BaseOpnd.reg, this->GetAddr());
-					}
+				// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
+				//  analysis and type inference systems.
+				BaseOpnd.reg = MDCanonicalizeSubReg(BaseOpnd.reg);
+				RefType = UNINIT;
+				// R_sp and R_bp will get type STACKPTR in SMPInstr::SetImmedTypes().
+				//  Other registers used as base registers should get their USEs as
+				//  base registers typed as POINTER, which might get refined later
+				// NOTE: the NN_lea opcode is often used without a true base register.
+				//  E.g. lea eax,[eax+eax+5] is an x86 idiom for eax:=eax*2+5, which
+				//  could not be done in one instruction without using the addressing
+				//  modes of the machine to do the arithmetic. We don't want to set the
+				//  USE of EAX to POINTER in this case, so we will conservatively skip
+				//  all lea instructions here.
+				// We cannot be sure that a register is truly a base register unless
+				//  there is also an index register. E.g. with reg+displacement, we
+				//  could have memaddr+indexreg or basereg+offset, depending on what
+				//  the displacement is.
+				if (BaseOpnd.is_reg(R_sp) || (UseFP && BaseOpnd.is_reg(R_bp))
+					|| leaInst || (!HasIndexReg)) {
+					;
 				else {
-					// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
-					//  analysis and type inference systems.
-					BaseOpnd.reg = MDCanonicalizeSubReg(BaseOpnd.reg);
-					RefType = UNINIT;
-					// See comments above for BaseReg POINTER types.
-					if (BaseOpnd.is_reg(R_sp) || (UseFP && BaseOpnd.is_reg(R_bp))
-						|| leaInst || (!HasIndexReg)) {
-						;
-					}
-					else {
-						RefType = POINTER;
-					}
-					this->Uses.SetRef(BaseOpnd, RefType);
+					RefType = POINTER;
-			}
+				this->Uses.SetRef(BaseOpnd, RefType);
+			} // end if R_none != BaseReg
 		} // end if (o_phrase or o_displ operand)
 	} // end for (all operands)
@@ -1896,18 +1855,20 @@ bool SMPInstr::InferOperatorType(SMPRegTransfer *CurrRT) {
 // Handle x86 opcode SIB byte annotations.
 void SMPInstr::MDAnnotateSIBStackConstants(FILE *AnnotFile, op_t Opnd, ea_t offset, bool UseFP) {
-	int BaseReg = sib_base(Opnd);
-	short IndexReg = sib_index(Opnd);
-	if (BaseReg == R_none) {
-		msg("BaseReg of R_none at %x\n", this->address);
-	}
+	int BaseReg;
+	int IndexReg;
+	ea_t displacement;
+	ushort ScaleFactor;
+	MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, displacement);
 	if (BaseReg == R_sp) { // ESP cannot be IndexReg
 		// ESP-relative constant offset
 				"%10x %6d PTRIMMEDESP STACK %d displ %s\n",
 				this->SMPcmd.ea, this->SMPcmd.size, offset, this->disasm);
-	else if (UseFP && ((IndexReg == R_bp) || ((BaseReg == R_bp) && (Opnd.type != o_mem)))) {
+	else if (UseFP && ((IndexReg == R_bp) || (BaseReg == R_bp))) {
 		// EBP-relative constant offset
 				"%10x %6d PTRIMMEDEBP STACK %d displ %s\n",
@@ -1922,6 +1883,11 @@ void SMPInstr::MDAnnotateSIBStackConstants(FILE *AnnotFile, op_t Opnd, ea_t offs
 //  offsets if EBP is being used as a frame pointer (UseFP == true).
 void SMPInstr::AnnotateStackConstants(bool UseFP, FILE *AnnotFile) {
 	op_t Opnd;
+	ea_t offset;
+	int BaseReg;
+	int IndexReg;
+	ushort ScaleFactor;
 #if 0
 	if (this->address == 0x80925f4) {
@@ -1930,13 +1896,14 @@ void SMPInstr::AnnotateStackConstants(bool UseFP, FILE *AnnotFile) {
 	for (int i = 0; i < UA_MAXOP; ++i) {
 		Opnd = SMPcmd.Operands[i];
+		if ((Opnd.type == o_displ) || (Opnd.type == o_phrase))
+			MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, offset);
 		if (Opnd.type == o_displ) {
-			ea_t offset = Opnd.addr;
 			if (Opnd.hasSIB) {
 				MDAnnotateSIBStackConstants(AnnotFile, Opnd, offset, UseFP);
 			else { // no SIB
-				ushort BaseReg = Opnd.reg;
 				if (BaseReg == R_sp) {
 					// ESP-relative constant offset
@@ -1952,12 +1919,11 @@ void SMPInstr::AnnotateStackConstants(bool UseFP, FILE *AnnotFile) {
 			} // end if (Opnd.hasSIB) ... else ...
 		} // end if (Opnd.type == o_displ) 
 		else if (Opnd.type == o_phrase) {
-			ea_t offset = 0; // mmStrata thinks [esp] is [esp+0]
+			offset = 0; // mmStrata thinks [esp] is [esp+0]
 			if (Opnd.hasSIB) {
 				MDAnnotateSIBStackConstants(AnnotFile, Opnd, offset, UseFP);
 			else { // Something like [ecx]; is it [esp] or [ebp] ?
-				ushort BaseReg = Opnd.reg;
 				if (BaseReg == R_sp) {
 					// ESP-relative constant offset