From da332408fc2c09cfbe820fc90fced7893f43889f Mon Sep 17 00:00:00 2001
From: clc5q <clc5q@git.zephyr-software.com>
Date: Fri, 28 Dec 2007 16:01:45 +0000
Subject: [PATCH] Implement SMPInstr::MDFixupDefUseLists,
 SMPInstr::MDAnnotateSIBStackConstants, and helpers

---
 SMPDataFlowAnalysis.cpp | 232 +++++++++++++++++++++++++++-------------
 SMPDataFlowAnalysis.h   |   5 +-
 2 files changed, 163 insertions(+), 74 deletions(-)

diff --git a/SMPDataFlowAnalysis.cpp b/SMPDataFlowAnalysis.cpp
index f97526f3..d90ae4b7 100644
--- a/SMPDataFlowAnalysis.cpp
+++ b/SMPDataFlowAnalysis.cpp
@@ -235,7 +235,8 @@ void SMPInstr::PrintOperands() const {
 		else if (Opnd.type == o_mem) {
 			msg(" Operand %d : memory : addr: %x", i, Opnd.addr);
 			PrintDefUse(features, i);
-			if (Opnd.hasSIB) { // has SIB info
+			if (Opnd.hasSIB) { // has SIB info -- is this possible for o_mem?
+				msg(" Found SIB byte for o_mem operand ");
 				PrintSIB(Opnd);
 			}
 		}
@@ -299,43 +300,28 @@ void SMPInstr::PrintOperands() const {
 // Print out the destination operand list for the instruction, given
 //  the OptCategory for the instruction as a hint.
 char * SMPInstr::DestString(int OptType) {
-	static char DestList[MAXSTR];
-	if (OptType != 7) {
-		if (SMPcmd.Operands[0].type != o_reg) {
-			msg("Problem: destination operand not memory and not reg: %d %d %s \n",
-				SMPcmd.Operands[0].type, SMPcmd.Operands[1].type, disasm);
-		}
-		else {
-			ushort DestReg = SMPcmd.Operands[0].reg;
-			qstrncpy(DestList, RegNames[DestReg],
-					1 + strlen(RegNames[DestReg]));
-#if 1
-			qstrncat(DestList, " ZZ ", MAXSTR);
-#endif
-			return DestList;
+	static char DestList[MAXSTR] = { '\0', '\0' };
+	int RegDestCount = 0;
+	for (size_t DefIndex = 0; DefIndex < this->NumDefs(); ++DefIndex) {
+		op_t DefOpnd = this->GetDef(DefIndex);
+		if (o_reg == DefOpnd.type) {
+			ushort DestReg = DefOpnd.reg;
+			if (0 == RegDestCount) {
+				qstrncpy(DestList, RegNames[DestReg], 1 + strlen(RegNames[DestReg]));
+			}
+			else {
+				qstrncat(DestList, " ", MAXSTR);
+				qstrncat(DestList, RegNames[DestReg], MAXSTR);
+			}
+			++RegDestCount;
 		}
 	}
-	else { // OptType 7 could have one or two destinations.
-		// NOTE: FIX later. Currently a clone of code above.     **
-#if SMP_DEBUG3
-		msg("OptType 7: %s\n", disasm);
-		PrintOperands();
-#endif
-		if (SMPcmd.Operands[0].type != o_reg) {
-			msg("Problem: destination operand not memory and not reg: %d %d %s\n",
-				SMPcmd.Operands[0].type, SMPcmd.Operands[1].type, disasm);
-		}
-		else {
-			ushort DestReg = SMPcmd.Operands[0].reg;
-			qstrncpy(DestList, RegNames[DestReg],
-					1 + strlen(RegNames[DestReg]));
-#if 1
-			qstrncat(DestList, " ZZ ", MAXSTR);
-#endif
-			return DestList;
-		}
+	if (0 >= RegDestCount) {
+		msg("WARNING: No destination registers: %s\n", this->GetDisasm());
+	}
+	else {
+		qstrncat(DestList, " ZZ ", MAXSTR);
 	}
-	DestList[0] = '\0';
 	return DestList;
 } // end of SMPInstr::DestString()
 
@@ -357,11 +343,8 @@ int SMPInstr::operator<(const SMPInstr &rhs) const {
 	return (this->address < rhs.GetAddr());
 }
 
-// Get optimization category for instruction
-int SMPInstr::GetOptType(void) const {
-	return OptType;
-}
-
+#define MD_FIRST_ENTER_INSTR  NN_enterw
+#define MD_LAST_ENTER_INSTR NN_enterq
 // Is this instruction the one that allocates space on the
 //  stack for the local variables?
 bool SMPInstr::MDIsFrameAllocInstr(void) const {
@@ -397,6 +380,9 @@ bool SMPInstr::MDIsFrameAllocInstr(void) const {
 			}
 		}
 	}
+	else if ((SMPcmd.itype >= MD_FIRST_ENTER_INSTR) && (SMPcmd.itype <= MD_LAST_ENTER_INSTR)) {
+		return true;
+	}
 	return false;
 } // end of SMPInstr::MDIsFrameAllocInstr()
 
@@ -537,13 +523,142 @@ void SMPInstr::BuildSMPDefUseLists(void) {
 	return;
 } // end of SMPInstr::BuildSMPDefUseLists()
 
+// If DefReg is not already in the DEF list, add a DEF for it.
+void SMPInstr::MDAddRegDef(ushort DefReg) {
+	bool AlreadySet = false;
+	for (size_t DefIndex = 0; DefIndex < this->NumDefs(); ++DefIndex) {
+		if (this->GetDef(DefIndex).is_reg(DefReg)) {
+			AlreadySet = true;
+			break;
+		}
+	}
+	if (!AlreadySet) {
+		op_t TempDef;
+		TempDef.type = o_reg;
+		TempDef.reg = DefReg;
+		this->Defs.SetRef(TempDef);
+	}
+	return;
+} // end of SMPInstr::MDAddRegDef()
+
+// If UseReg is not already in the USE list, add a USE for it.
+void SMPInstr::MDAddRegUse(ushort UseReg) {
+	bool AlreadyUsed = false;
+	for (size_t UseIndex = 0; UseIndex < this->NumUses(); ++UseIndex) {
+		if (this->GetUse(UseIndex).is_reg(UseReg)) {
+			AlreadyUsed = true;
+			break;
+		}
+	}
+	if (!AlreadyUsed) {
+		op_t TempUse;
+		TempUse.type = o_reg;
+		TempUse.reg = UseReg;
+		this->Uses.SetRef(TempUse);
+	}
+	return;
+} // end of SMPInstr::MDAddRegUse()
+
 // Perform machine dependent ad hoc fixes to the def and use lists.
 //  For example, some multiply and divide instructions in x86 implicitly
 //  use and/or define register EDX. For memory phrase examples, see comment
 //  in BuildSMPDefUseLists().
 void SMPInstr::MDFixupDefUseLists(void) {
+	// First, handle the uses hidden in memory addressing modes. Note that we do not
+	//  care whether we are dealing with a memory destination operand or source
+	//  operand, because register USEs, not DEFs, happen within the addressing expressions.
+	size_t OpNum;
+	for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
+		op_t Opnd = SMPcmd.Operands[OpNum];
+		if ((Opnd.type == o_phrase) || (Opnd.type == o_displ)) {
+			if (Opnd.hasSIB) {
+				int BaseReg = sib_base(Opnd);
+				short IndexReg = sib_index(Opnd);
+				if (R_none != BaseReg) {
+					op_t BaseOpnd = Opnd; // Init to current operand field values
+					BaseOpnd.type = o_reg; // Change type and reg fields
+					BaseOpnd.reg = BaseReg;
+					BaseOpnd.hasSIB = 0;
+					this->Uses.SetRef(BaseOpnd);
+				}
+				if (R_none != IndexReg) {
+					op_t IndexOpnd = Opnd; // Init to current operand field values
+					IndexOpnd.type = o_reg; // Change type and reg fields
+					IndexOpnd.reg = IndexReg;
+					IndexOpnd.hasSIB = 0;
+					this->Uses.SetRef(IndexOpnd);
+				}
+			}
+			else { // no SIB byte; can have base reg but no index reg
+				ushort BaseReg = Opnd.reg;  // cannot be R_none for no SIB case
+				op_t BaseOpnd = Opnd; // Init to current operand field values
+				BaseOpnd.type = o_reg; // Change type and reg fields
+				BaseOpnd.reg = BaseReg;
+				BaseOpnd.hasSIB = 0;
+				this->Uses.SetRef(BaseOpnd);
+			}
+		} // end if (o_phrase or o_displ operand)
+	} // end for (all operands)
+
+	// Now, handle special instruction categories that have implicit operands.
+	if (NN_cmpxchg == SMPcmd.itype) {
+		// x86 Compare and Exchange conditionally sets EAX. We must keep data flow analysis
+		//  sound by declaring that EAX is always a DEF.
+		this->MDAddRegDef(R_ax);
+	} // end if NN_cmpxchg
+	else if (8 == this->GetOptType()) {
+		// This category implicitly writes to EDX:EAX.
+		this->MDAddRegDef(R_dx);
+		this->MDAddRegDef(R_ax);
+	} // end else if (8 == GetOptType)
+	else if (7 == this->GetOptType()) {
+		// Category 7 instructions sometimes write implicitly to EDX:EAX or DX:AX.
+		//  DX is the same as EDX to IDA Pro (and SMP); ditto for EAX and AX.
+		// DIV, IDIV, and MUL all have hidden EAX or AX operands (hidden in the IDA Pro
+		//  sense, because they are not displayed in the disassembly text). For example:
+		//  mul ebx means EDX:EAX <-- EAX*EBX, and mul bx means DX:AX <-- AX*BX. If the
+		//  source operand is only 8 bits wide, there is room to hold the result in AX
+		//  without using DX:  mul bl means AX <-- AL*BL.
+		// IMUL has forms with a hidden EAX or AX operand and forms with no implicit
+		//  operands:  imul ebx means EDX:EAX <-- EAX*EBX, but imul ebx,edx means that
+		//  EBX*EDX gets truncated and the result placed in EBX (no hidden operands).
+		bool HiddenEAXUse = false;
+		for (size_t UseIndex = 0; UseIndex < this->NumUses(); ++UseIndex) {
+			op_t TempUse = this->GetUse(UseIndex);
+			if (!TempUse.showed()) { // hidden operand
+				if (TempUse.is_reg(R_ax)) { // not R_al, so it is not 8 bits
+					this->MDAddRegUse(R_dx);
+					this->MDAddRegDef(R_ax);
+					this->MDAddRegDef(R_dx);
+				}
+			}
+		}
+	} // end else if (7 == OptType)
 	return;
-}
+} // end of SMPInstr::MDFixupDefUseLists()
+
+// Handle x86 opcode SIB byte annotations.
+void SMPInstr::MDAnnotateSIBStackConstants(FILE *AnnotFile, op_t Opnd, ea_t offset, bool UseFP) {
+	int BaseReg = sib_base(Opnd);
+	short IndexReg = sib_index(Opnd);
+	if (BaseReg == R_none) {
+		msg("BaseReg of R_none at %x\n", this->address);
+	}
+	if (BaseReg == R_sp) { // ESP cannot be IndexReg
+		// ESP-relative constant offset
+		qfprintf(AnnotFile,
+				"%x %d PTRIMMEDESP STACK %d displ %s\n",
+				this->SMPcmd.ea, this->SMPcmd.size, offset, this->disasm);
+	}
+	else if (UseFP && ((IndexReg == R_bp) || (BaseReg == R_bp))) {
+		// EBP-relative constant offset
+		qfprintf(AnnotFile,
+				"%x %d PTRIMMEDEBP STACK %d displ %s\n",
+				this->SMPcmd.ea, this->SMPcmd.size, offset, this->disasm);
+	}
+
+	return;
+} // end of MDAnnotateSIBStackConstants
 
 // Emit annotations for constants used as ptr offsets from EBP or
 //  ESP into the stack frame. Only pay attention to EBP-relative
@@ -551,7 +666,7 @@ void SMPInstr::MDFixupDefUseLists(void) {
 void SMPInstr::AnnotateStackConstants(bool UseFP, FILE *AnnotFile) {
 	op_t Opnd;
 #if 1
-	if (this->address == 0x8066d08) {
+	if (this->address == 0x80925f4) {
 		msg("PROBLEM INSTRUCTION: \n");
 		this->PrintOperands();
 	}
@@ -561,23 +676,7 @@ void SMPInstr::AnnotateStackConstants(bool UseFP, FILE *AnnotFile) {
 		if (Opnd.type == o_displ) {
 			ea_t offset = Opnd.addr;
 			if (Opnd.hasSIB) {
-				int BaseReg = sib_base(Opnd);
-				short IndexReg = sib_index(Opnd);
-				if (BaseReg == R_none) {
-					msg("BaseReg of R_none at %x\n", this->address);
-				}
-				if (BaseReg == R_sp) { // ESP cannot be IndexReg
-					// ESP-relative constant offset
-					qfprintf(AnnotFile,
-							"%x %d PTRIMMEDESP STACK %d displ %s\n",
-							SMPcmd.ea, SMPcmd.size, offset, disasm);
-				}
-				else if (UseFP && ((IndexReg == R_bp) || (BaseReg == R_bp))) {
-					// EBP-relative constant offset
-					qfprintf(AnnotFile,
-							"%x %d PTRIMMEDEBP STACK %d displ %s\n",
-							SMPcmd.ea, SMPcmd.size, offset, disasm);
-				}
+				MDAnnotateSIBStackConstants(AnnotFile, Opnd, offset, UseFP);
 			}
 			else { // no SIB
 				ushort BaseReg = Opnd.reg;
@@ -598,20 +697,7 @@ void SMPInstr::AnnotateStackConstants(bool UseFP, FILE *AnnotFile) {
 		else if (Opnd.type == o_phrase) {
 			ea_t offset = 0; // mmStrata thinks [esp] is [esp+0]
 			if (Opnd.hasSIB) {
-				int BaseReg = sib_base(Opnd);
-				short IndexReg = sib_index(Opnd);
-				if (BaseReg == R_sp) { // ESP cannot be IndexReg
-					// ESP-relative constant offset
-					qfprintf(AnnotFile,
-							"%x %d PTRIMMEDESP STACK %d displ %s\n",
-							SMPcmd.ea, SMPcmd.size, offset, disasm);
-				}
-				else if (UseFP && ((BaseReg == R_bp) || (IndexReg == R_bp))) {
-					// EBP-relative constant offset
-					qfprintf(AnnotFile,
-							"%x %d PTRIMMEDEBP STACK %d displ %s\n",
-							SMPcmd.ea, SMPcmd.size, offset, disasm);
-				}
+				MDAnnotateSIBStackConstants(AnnotFile, Opnd, offset, UseFP);
 			}
 			else { // Something like [ecx]
 				ushort BaseReg = Opnd.reg;
diff --git a/SMPDataFlowAnalysis.h b/SMPDataFlowAnalysis.h
index a6267bd9..c2ac2e83 100644
--- a/SMPDataFlowAnalysis.h
+++ b/SMPDataFlowAnalysis.h
@@ -105,7 +105,7 @@ public:
 	bool MDIsFrameAllocInstr(void) const;
 	bool MDIsFrameDeallocInstr(bool UseFP, asize_t LocSize) const;
 	bool MDUsesCalleeSavedReg(void) const;
-	int GetOptType(void) const;
+	inline int GetOptType(void) const { return OptType; };
 	inline SMPitype GetDataFlowType(void) const { return type; };
 	void PrintOperands(void) const;
 	char *DestString(int OptType);
@@ -125,6 +125,9 @@ private:
 	DefOrUseList Uses; // Uses list
 	void BuildSMPDefUseLists(void); // Build DEF and USE lists for instruction
 	void MDFixupDefUseLists(void); // Machine-dependent ad hoc fixes
+	void MDAddRegDef(ushort); // Add DEF of register if not already a DEF
+	void MDAddRegUse(ushort); // Add USE of register if not already a USE
+	void MDAnnotateSIBStackConstants(FILE *, op_t, ea_t, bool); // Handle x86 opcode SIB byte
 };  // end class SMPInstr
 
 // Class defining basic blocks.
-- 
GitLab