From f330bda53bf3c03b31d5c5bbc8032de2de2c1ddf Mon Sep 17 00:00:00 2001
From: clc5q <clc5q@git.zephyr-software.com>
Date: Wed, 9 Apr 2008 05:07:27 +0000
Subject: [PATCH] Fixx LessOp and map subword regs to containing reg for data
 flow analyses.

---
 SMPDataFlowAnalysis.cpp | 47 ++++++++++++++++++++++++++++++-----------
 SMPDataFlowAnalysis.h   | 18 +++++++++-------
 SMPInstr.cpp            | 32 ++++++++++++++++++++++------
 3 files changed, 71 insertions(+), 26 deletions(-)

diff --git a/SMPDataFlowAnalysis.cpp b/SMPDataFlowAnalysis.cpp
index a7005d20..ca63a9d0 100644
--- a/SMPDataFlowAnalysis.cpp
+++ b/SMPDataFlowAnalysis.cpp
@@ -109,26 +109,24 @@ size_t GetOpDataSize(op_t DataOp) {
 #define FIRST_x86_SUBWORD_REG R_al
 #define LAST_x86_SUBWORD_REG R_bh
 bool MDLessReg(const ushort Reg1, const ushort Reg2) {
-	bool FirstSubword = ((Reg1 >= FIRST_x86_SUBWORD_REG) && (Reg1 <= LAST_x86_SUBWORD_REG));
-	bool SecondSubword = ((Reg2 >= FIRST_x86_SUBWORD_REG) && (Reg2 <= LAST_x86_SUBWORD_REG));
+	ushort SReg1 = MDCanonicalizeSubReg(Reg1);
+	ushort SReg2 = MDCanonicalizeSubReg(Reg2);
+	return (SReg1 < SReg2);
+} // end of MDLessReg()
+
+ushort MDCanonicalizeSubReg(const ushort Reg1) {
+	bool Subword = ((Reg1 >= FIRST_x86_SUBWORD_REG) && (Reg1 <= LAST_x86_SUBWORD_REG));
 	ushort SReg1 = Reg1;
-	ushort SReg2 = Reg2;
 
-	if (FirstSubword) {
+	if (Subword) {
 		// See enumeration RegNo in intel.hpp.
 		if (SReg1 < 20)  // AL, CL, DL or BL
 			SReg1 -= 16;
 		else             // AH, CH, DH or BH
 			SReg1 -= 20;
 	}
-	if (SecondSubword) {
-		if (SReg2 < 20)
-			SReg2 -= 16;
-		else
-			SReg2 -= 20;
-	}
-	return (SReg1 < SReg2);
-} // end of MDLessReg()
+	return SReg1;
+} // end of MDCanonicalizeSubReg()
 
 // In SSA computations, we are storing the GlobalNames index into the op_t fields
 //  n, offb, and offo. This function extracts an unsigned int from these three 8-bit
@@ -334,6 +332,11 @@ DefOrUse::DefOrUse(void) {
 
 // Constructor.
 DefOrUse::DefOrUse(op_t Ref, SMPOperandType Type, int SSASub) {
+	if (o_reg == Ref.type) {
+		// We want to map AH, AL, and AX to EAX, etc. throughout our data flow analysis
+		//  and type inference systems.
+		Ref.reg = MDCanonicalizeSubReg(Ref.reg);
+	}
 	this->Operand = Ref;
 	this->OpType = Type;
 	this->SSANumber = SSASub;
@@ -576,6 +579,11 @@ SMPDefUseChain::SMPDefUseChain(void) {
 }
 
 SMPDefUseChain::SMPDefUseChain(op_t Name, ea_t Def) {
+	if (o_reg == Name.type) {
+		// We want to map AH, AL, and AX to EAX, etc. throughout our data flow analysis
+		//  and type inference systems.
+		Name.reg = MDCanonicalizeSubReg(Name.reg);
+	}
 	this->SSAName = Name;
 	this->RefInstrs.push_back(Def);
 	return;
@@ -583,6 +591,11 @@ SMPDefUseChain::SMPDefUseChain(op_t Name, ea_t Def) {
 
 // Set the variable name.
 void SMPDefUseChain::SetName(op_t Name) {
+	if (o_reg == Name.type) {
+		// We want to map AH, AL, and AX to EAX, etc. throughout our data flow analysis
+		//  and type inference systems.
+		Name.reg = MDCanonicalizeSubReg(Name.reg);
+	}
 	this->SSAName = Name;
 	return;
 }
@@ -624,11 +637,21 @@ SMPDUChainArray::SMPDUChainArray(void) {
 }
 
 SMPDUChainArray::SMPDUChainArray(op_t Name) {
+	if (o_reg == Name.type) {
+		// We want to map AH, AL, and AX to EAX, etc. throughout our data flow analysis
+		//  and type inference systems.
+		Name.reg = MDCanonicalizeSubReg(Name.reg);
+	}
 	this->SSAName = Name;
 	return;
 }
 
 void SMPDUChainArray::SetName(op_t Name) {
+	if (o_reg == Name.type) {
+		// We want to map AH, AL, and AX to EAX, etc. throughout our data flow analysis
+		//  and type inference systems.
+		Name.reg = MDCanonicalizeSubReg(Name.reg);
+	}
 	this->SSAName = Name;
 	return;
 }
diff --git a/SMPDataFlowAnalysis.h b/SMPDataFlowAnalysis.h
index c9e383da..490fa3b6 100644
--- a/SMPDataFlowAnalysis.h
+++ b/SMPDataFlowAnalysis.h
@@ -46,6 +46,7 @@ void PrintOperand(op_t Opnd);
 // MACHINE DEPENDENT: Is operand type a known type that we want to analyze?
 bool MDKnownOperandType(op_t TempOp);
 
+ushort MDCanonicalizeSubReg(const ushort Reg1);
 bool MDLessReg(const ushort Reg1, const ushort Reg2);
 
 // MACHINE DEPENDENT: comparison class to permit sorting of op_t operands.
@@ -59,15 +60,17 @@ public:
 			case o_reg: return MDLessReg(Opnd1.reg, Opnd2.reg);
 			case o_mem: return (Opnd1.addr < Opnd2.addr);
 			case o_phrase: if (Opnd1.hasSIB && Opnd2.hasSIB) return (Opnd1.sib < Opnd2.sib);
-							else if (Opnd2.hasSIB) return true;
-							else if (Opnd1.hasSIB) return false;
-							else return (Opnd1.phrase < Opnd2.phrase); // no SIB bytes
+							else if (Opnd2.hasSIB) return true;  // no SIB < has SIB
+							else if (Opnd1.hasSIB) return false; // no SIB < has SIB
+							else return MDLessReg(Opnd1.phrase, Opnd2.phrase); // no SIB bytes
 			case o_displ: if (Opnd1.hasSIB && Opnd2.hasSIB)
 							  return ((Opnd1.sib < Opnd2.sib) 
 								|| ((Opnd1.sib == Opnd2.sib) && (Opnd1.addr < Opnd2.addr)));
-							else if (Opnd2.hasSIB) return true;
-							else if (Opnd1.hasSIB) return false;
-							else return (Opnd1.addr < Opnd2.addr); // no SIB bytes
+							else if (Opnd2.hasSIB) return true;  // no SIB < has SIB
+							else if (Opnd1.hasSIB) return false; // no SIB < has SIB
+							else return ((Opnd1.addr < Opnd2.addr)
+								|| ((Opnd1.addr == Opnd2.addr) 
+								&& MDLessReg(Opnd1.reg, Opnd2.reg))); // no SIB bytes
 			case o_imm: return (Opnd1.value < Opnd2.value);
 			case o_far:  // fall through to o_near case
 			case o_near: return (Opnd1.addr < Opnd2.addr);
@@ -76,7 +79,7 @@ public:
 			case o_crreg:  // fall through
 			case o_fpreg:  // fall through
 			case o_mmxreg: // fall through
-			case o_xmmreg: return (Opnd1.reg < Opnd2.reg);
+			case o_xmmreg: return (Opnd1.reg < Opnd2.reg); // no subword regs to deal with
 
 			default: msg("Unknown operand type in LessOp.\n"); return false;
 		}; // end switch (Opnd1.type)
@@ -172,7 +175,6 @@ public:
 	// Get methods
 	// DefOrUse GetRef(size_t index) const;
 	inline size_t GetSize(void) const { return (size_t) Refs.size(); };
-	// inline DefOrUse *GetRefNum(size_t index) { return &Refs[index]; };
 	inline set<DefOrUse, LessDefUse>::iterator GetFirstRef(void) { return Refs.begin(); };
 	inline set<DefOrUse, LessDefUse>::iterator GetLastRef(void) { return Refs.end(); };
 	set<DefOrUse, LessDefUse>::iterator FindRef(op_t SearchOp);
diff --git a/SMPInstr.cpp b/SMPInstr.cpp
index 1946d541..02e47463 100644
--- a/SMPInstr.cpp
+++ b/SMPInstr.cpp
@@ -643,8 +643,14 @@ void SMPInstr::BuildSMPDefUseLists(void) {
 	for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
 		if (this->features & DefMacros[OpNum]) { // DEF
 			op_t TempOp = this->SMPcmd.Operands[OpNum];
-			if (MDKnownOperandType(TempOp))
+			if (MDKnownOperandType(TempOp)) {
+				if (o_reg == TempOp.type) {
+					// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
+					//  analysis and type inference systems.
+					TempOp.reg = MDCanonicalizeSubReg(TempOp.reg);
+				}
 				this->Defs.SetRef(TempOp);
+			}
 		}
 	} // end for (OpNum = 0; ...)
 
@@ -663,8 +669,14 @@ void SMPInstr::BuildSMPDefUseLists(void) {
 	for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
 		if (this->features & UseMacros[OpNum]) { // USE
 			op_t TempOp = this->SMPcmd.Operands[OpNum];
-			if (MDKnownOperandType(TempOp))
+			if (MDKnownOperandType(TempOp)) {
+				if (o_reg == TempOp.type) {
+					// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
+					//  analysis and type inference systems.
+					TempOp.reg = MDCanonicalizeSubReg(TempOp.reg);
+				}
 				this->Uses.SetRef(TempOp);
+			}
 		}
 	} // end for (OpNum = 0; ...)
 
@@ -724,6 +736,9 @@ void SMPInstr::MDFixupDefUseLists(void) {
 					}
 					else {
 						if (DebugFlag) msg("base reg %d not ignored at %x\n", BaseReg, this->GetAddr());
+						// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
+						//  analysis and type inference systems.
+						BaseOpnd.reg = MDCanonicalizeSubReg(BaseOpnd.reg);
 						this->Uses.SetRef(BaseOpnd);
 					}
 				}
@@ -736,6 +751,9 @@ void SMPInstr::MDFixupDefUseLists(void) {
 					IndexOpnd.reg = IndexReg;
 					IndexOpnd.hasSIB = 0;
 					IndexOpnd.set_showed();
+					// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
+					//  analysis and type inference systems.
+					IndexOpnd.reg = MDCanonicalizeSubReg(IndexOpnd.reg);
 					if (0 == sib_scale(Opnd))
 						this->Uses.SetRef(IndexOpnd);
 					else // scaling == shift ==> NUMERIC
@@ -754,6 +772,9 @@ void SMPInstr::MDFixupDefUseLists(void) {
 				}
 				else {
 					if (DebugFlag) msg("base reg %d not ignored at %x\n", BaseReg, this->GetAddr());
+					// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
+					//  analysis and type inference systems.
+					BaseOpnd.reg = MDCanonicalizeSubReg(BaseOpnd.reg);
 					this->Uses.SetRef(BaseOpnd);
 				}
 			}
@@ -849,9 +870,8 @@ void SMPInstr::MDFixupDefUseLists(void) {
 		// IMUL has forms with a hidden EAX or AX operand and forms with no implicit
 		//  operands:  imul ebx means EDX:EAX <-- EAX*EBX, but imul ebx,edx means that
 		//  EBX*EDX gets truncated and the result placed in EBX (no hidden operands).
-		set<DefOrUse, LessDefUse>::iterator CurrUse;
-		for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) {
-			op_t TempUse = CurrUse->GetOp();
+		for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
+			op_t TempUse = SMPcmd.Operands[OpNum];
 			if (!TempUse.showed()) { // hidden operand
 				if (TempUse.is_reg(R_ax)) { // not R_al, so it is not 8 bits
 					if ((NN_div == this->SMPcmd.itype) || (NN_idiv == this->SMPcmd.itype)) {
@@ -1743,7 +1763,7 @@ void SMPInstr::AnnotateStackConstants(bool UseFP, FILE *AnnotFile) {
 			if (Opnd.hasSIB) {
 				MDAnnotateSIBStackConstants(AnnotFile, Opnd, offset, UseFP);
 			}
-			else { // Something like [ecx]
+			else { // Something like [ecx]; is it [esp] or [ebp] ?
 				ushort BaseReg = Opnd.reg;
 				if (BaseReg == R_sp) {
 					// ESP-relative constant offset
-- 
GitLab