From 3bebaac353869682cdeed278a307d7185d1d81ab Mon Sep 17 00:00:00 2001
From: clc5q <>
Date: Wed, 14 May 2008 01:18:27 +0000
Subject: [PATCH] Infer local POINTER DEF from any POINTER USE; fix
 add/sub/interrupt problems.

 SMPBasicBlock.cpp |  57 +++++++++++++++++----
 SMPInstr.cpp      | 125 ++++++++++++++++++++++++++++++++++++++++++----
 SMPInstr.h        |   2 +
 3 files changed, 162 insertions(+), 22 deletions(-)

diff --git a/SMPBasicBlock.cpp b/SMPBasicBlock.cpp
index d7ba6bbc..6673d378 100644
--- a/SMPBasicBlock.cpp
+++ b/SMPBasicBlock.cpp
@@ -980,7 +980,7 @@ bool SMPBasicBlock::InferLocalDefType(op_t DefOp, unsigned int LocIndex, ea_t De
 	bool changed = false;
 	bool DebugFlag = false;
-	DebugFlag |= (0 == strcmp("memset", this->MyFunc->GetFuncName()));
+	DebugFlag |= (0 == strcmp("weightadj", this->MyFunc->GetFuncName()));
 	if (DebugFlag) {
@@ -1030,6 +1030,11 @@ bool SMPBasicBlock::InferLocalDefType(op_t DefOp, unsigned int LocIndex, ea_t De
 	//  a single type (other than UNINIT), change the DEF type to match the USE type
 	//  and set changed to true.
 	bool FirstUseSeen = false;
+	bool FoundUninit = false;
+	bool FoundNumeric = false;
+	bool FoundPointer = false;
+	bool FoundUnknown = false;
+	SMPOperandType PtrType = UNINIT;
 	SMPOperandType UseType = UNINIT;
 	for (InstIter = this->Instrs.begin(); InstIter != this->Instrs.end(); ++InstIter) {
 		ea_t CurrAddr = (*InstIter)->GetAddr();
@@ -1038,26 +1043,56 @@ bool SMPBasicBlock::InferLocalDefType(op_t DefOp, unsigned int LocIndex, ea_t De
 		if (CurrAddr > StartAddr) {
 			CurrUse = (*InstIter)->FindUse(DefOp);
 			if (CurrUse != (*InstIter)->GetLastUse()) { // found a USE in the chain
-				if (!FirstUseSeen) {
+				if (!FirstUseSeen)
 					FirstUseSeen = true;
-					UseType = CurrUse->GetType();
-					if (UNINIT == UseType)
-						return false;
-				}
-				else {
-					if (IsNotEqType(CurrUse->GetType(), UseType))
-						return false;  // no consistent type
+				UseType = CurrUse->GetType();
+				if (UNINIT == UseType)
+					FoundUninit = true;
+				else if (IsNumeric(UseType))
+					FoundNumeric = true;
+				else if (IsUnknown(UseType))
+					FoundUnknown = true;
+				else if (IsDataPtr(UseType)) {
+					if (FoundPointer) {
+						if (IsNotEqType(PtrType, UseType)) {
+							msg("WARNING: Differing ptr types in local chain:");
+							msg(" Prev: %d Current: %d %s\n", PtrType, UseType,
+								(*InstIter)->GetDisasm());
+						}
+						PtrType = POINTER;
+					}
+					else {
+						FoundPointer = true;
+						PtrType = UseType;
+					}
 	} // end for all instructions
 	if (FirstUseSeen) {
-		// We have a consistent type, else we would have returned false above.
-		assert(UNINIT != UseType);
+		// See if we have a consistent type.
+		// If we see any definite POINTER uses, we must set the DEF
+		//  to type POINTER or a refinement of it.
+#if 1
+		if (!FoundNumeric || FoundUninit || FoundPointer || FoundUnknown)
+			return false;
+		if (FoundPointer)
+			UseType = PtrType;
+		else if (FoundNumeric && !FoundUninit && !FoundUnknown)
+			UseType = NUMERIC;
+		else
+			return false; // no POINTER, but no consistent type
 		if (DebugFlag) msg("Inferring local DEF of type %d\n", UseType);
 		CurrDef = (*DefInstIter)->SetDefType(DefOp, UseType);
 		changed = true;
+		if (FoundPointer && FoundUninit) {
+			// We will propagate PtrType to the UNINIT elements of
+			//  the DEF-USE chain.
+			changed |= this->PropagateLocalDefType(DefOp, PtrType, DefAddr);
+		}
 	return changed;
diff --git a/SMPInstr.cpp b/SMPInstr.cpp
index 04a6d9d3..faedc54d 100644
--- a/SMPInstr.cpp
+++ b/SMPInstr.cpp
@@ -245,6 +245,38 @@ bool SMPInstr::HasSourceMemoryOperand(void) {
 	return MemSrc;
 } // end of SMPInstr::HasSourceMemoryOperand()
+// Get the first memory operand in the DEF list.
+op_t SMPInstr::MDGetMemDefOp(void) {
+	set<DefOrUse, LessDefUse>::iterator DefIter;
+	op_t MemOp;
+	MemOp.type = o_void;
+	for (DefIter = this->GetFirstDef(); DefIter != this->GetLastDef(); ++DefIter) {
+		optype_t DefType = DefIter->GetOp().type;
+		if ((DefType >= o_mem) && (DefType <= o_displ)) {
+			MemOp = DefIter->GetOp();
+			break;
+		}
+	}
+	return MemOp;
+} // end of SMPInstr::MDGetMemDefOp()
+// Get the first memory operand in the USE list.
+op_t SMPInstr::MDGetMemUseOp(void) {
+	set<DefOrUse, LessDefUse>::iterator UseIter;
+	op_t MemOp;
+	MemOp.type = o_void;
+	for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) {
+		optype_t UseType = UseIter->GetOp().type;
+		if ((UseType >= o_mem) && (UseType <= o_displ)) {
+			MemOp = UseIter->GetOp();
+			break;
+		}
+	}
+	return MemOp;
+} // end of SMPInstr::MDGetMemUseOp()
 // Does the instruction whose flags are in F have a numeric type
 //   as the second source operand?
 // NOTE: We can only analyze immediate values now. When data flow analyses are implemented,
@@ -267,18 +299,33 @@ bool SMPInstr::IsSecondSrcOperandNumeric(flags_t F) const {
 //  annotation will result.
 void SMPInstr::SetAddSubSourceType(void) {
 	set<DefOrUse, LessDefUse>::iterator UseIter, DefIter;
+	bool MemSrc = this->HasSourceMemoryOperand();
+#if 0
 	// First, ensure that we are dealing with a register source.
-	if (this->HasSourceMemoryOperand()) {
+	if (MemSrc) {
 		this->AddSubSourceType = UNINIT;
 	// The USE and DEF lists will have the flags and the destination
-	//  operand in common for adds and subtracts. The USE-only operand
-	//  is the one we are concerned with.
+	//  operand in common for register adds and subtracts. The USE-only
+	//  operand is the one we are concerned with.
 	for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) {
-		if (this->GetLastDef() == this->FindDef(UseIter->GetOp())) {
+		op_t UseOp = UseIter->GetOp();
+		if (MemSrc) {
+			if ((UseOp.type >= o_mem) && (UseOp.type <= o_displ)) {
+				// Found the memory USE.
+				this->AddSubSourceType = UseIter->GetType();
+				msg("Set AddSubSourceType to %d at %x: %s\n", UseIter->GetType(),
+					this->address, this->GetDisasm());
+				break;
+			}
+		}
+		else if (this->GetLastDef() == this->FindDef(UseOp)) {
 			// Found the USE that is not a DEF
 			this->AddSubSourceType = UseIter->GetType();
@@ -1090,27 +1137,69 @@ bool SMPInstr::MDFindPointerUse(op_t MemOp, bool UseFP) {
 	// At this point, we must have a base address in a register.
 	if ((0 < ScaleFactor) || (R_none == IndexReg)) {
 		// IndexReg is scaled, meaning it is NUMERIC, so BaseReg must
-		//  be a POINTER, or IndexReg is not present, so BaseReg is the
+		//  be a POINTER; or IndexReg is not present, so BaseReg is the
 		//  only possible holder of an address.
 		if (R_none != BaseReg) {
-			changed = true;
 			op_t BaseOp;
 			BaseOp.type = o_reg;
 			BaseOp.reg = MDCanonicalizeSubReg(BaseReg);
-			UseIter = this->SetUseType(BaseOp, POINTER);
+			UseIter = this->FindUse(BaseOp);
 			assert(UseIter != this->GetLastUse());
+			if (UNINIT == UseIter->GetType()) {
+				changed = true;
+				UseIter = this->SetUseType(BaseOp, POINTER);
+				assert(UseIter != this->GetLastUse());
+			}
 	else if (R_none == BaseReg) {
 		// We have an unscaled IndexReg and no BaseReg and offset was
 		//  not a global offset, so IndexReg must be a POINTER.
 		if (R_none != IndexReg) {
-			changed = true;
 			op_t IndexOp;
 			IndexOp.type = o_reg;
 			IndexOp.reg = MDCanonicalizeSubReg(IndexReg);
-			UseIter = this->SetUseType(IndexOp, POINTER);
+			UseIter = this->FindUse(IndexOp);
+			assert(UseIter != this->GetLastUse());
+			if (UNINIT == UseIter->GetType()) {
+				changed = true;
+				UseIter = this->SetUseType(IndexOp, POINTER);
+				assert(UseIter != this->GetLastUse());
+			}
+		}
+	}
+	else { // We have BaseReg and an unscaled IndexReg.
+		// The only hope for typing something like [ebx+edx] is for
+		//  one register to already be typed NUMERIC, in which case
+		//  the other one must be a POINTER.
+		op_t IndexOp;
+		IndexOp.type = o_reg;
+		IndexOp.reg = MDCanonicalizeSubReg(IndexReg);
+		op_t BaseOp;
+		BaseOp.type = o_reg;
+		BaseOp.reg = MDCanonicalizeSubReg(BaseReg);
+		UseIter = this->FindUse(BaseOp);
+		assert(UseIter != this->GetLastUse());
+		if (IsNumeric(UseIter->GetType())) {
+			UseIter = this->FindUse(IndexOp);
 			assert(UseIter != this->GetLastUse());
+			if (UNINIT == UseIter->GetType()) {
+				// Set to POINTER or PROF_POINTER
+				changed = true;
+				UseIter = this->SetUseType(IndexOp, POINTER);
+				assert(UseIter != this->GetLastUse());
+			}
+		}
+		else { // BaseReg was not NUMERIC
+			if (UNINIT == UseIter->GetType()) { // BaseReg is UNINIT
+				UseIter = this->FindUse(IndexOp);
+				assert(UseIter != this->GetLastUse());
+				if (IsNumeric(UseIter->GetType())) {
+					changed = true;
+					UseIter = this->SetUseType(BaseOp, POINTER);
+					assert(UseIter != this->GetLastUse());
+				}
+			}
@@ -1240,6 +1329,7 @@ bool SMPInstr::InferTypes(void) {
 	set<DefOrUse, LessDefUse>::iterator CurrUse;
 	op_t DefOp, UseOp;
 	bool DebugFlag = false;
+	bool UseFP = this->BasicBlock->GetFunc()->UsesFramePointer();
 	DebugFlag |= (0 == strcmp("__libc_csu_fini", this->BasicBlock->GetFunc()->GetFuncName()));
@@ -1252,6 +1342,13 @@ bool SMPInstr::InferTypes(void) {
 	if (this->TypeInferenceComplete)
 		return false;
+	if (this->HasDestMemoryOperand()) {
+		changed |= this->MDFindPointerUse(this->MDGetMemDefOp(), UseFP);
+	}
+	if (this->HasSourceMemoryOperand()) {
+		changed |= this->MDFindPointerUse(this->MDGetMemUseOp(), UseFP);
+	}
 	// The control flow instructions can be handled simply based on their type
 	//  and do not need an RTL walk.
 	SMPitype DFAType = this->GetDataFlowType();
@@ -1260,13 +1357,17 @@ bool SMPInstr::InferTypes(void) {
 	if ((DFAType >= JUMP) && (DFAType <= INDIR_CALL)) {
 		// All USEs are either the flags (NUMERIC) or the target address (CODEPTR).
+		//  The exception is the USE list for interrupt calls, which includes
+		//  the caller-saved regs.
 		CurrUse = this->GetFirstUse();
 		while (CurrUse != this->GetLastUse()) {
 			UseOp = CurrUse->GetOp();
 			if (UseOp.is_reg(X86_FLAGS_REG))
 				CurrUse = this->SetUseType(UseOp, NUMERIC);
-			else if (CurrUse->GetType() != CODEPTR)
+			else if ((CurrUse->GetType() != CODEPTR)
+				&& (!(this->MDIsInterruptCall() && (o_reg == UseOp.type)))) {
 				CurrUse = this->SetUseType(UseOp, CODEPTR);
+			}
 		this->TypeInferenceComplete = true;
@@ -2303,7 +2404,9 @@ void SMPInstr::EmitTypeAnnotations(bool UseFP, bool AllocSeen, FILE *AnnotFile)
 			else if (IsEqType(NUMERIC, this->AddSubSourceType)
-				&& !this->MDIsFrameAllocInstr()) {
+				&& !this->MDIsFrameAllocInstr()
+				&& (this->SMPcmd.itype != NN_adc)
+				&& (this->SMPcmd.itype != NN_sbb)) {
 				qfprintf(AnnotFile, "%10x %6d INSTR LOCAL 2ndSrcNumeric %s \n",
 						addr, -1, disasm);
diff --git a/SMPInstr.h b/SMPInstr.h
index 7c9b4471..a4ca2bd9 100644
--- a/SMPInstr.h
+++ b/SMPInstr.h
@@ -263,6 +263,8 @@ private:
 	void MDAddRegUse(ushort, bool); // Add USE of register if not already a USE
 	void MDAnnotateSIBStackConstants(FILE *, op_t, ea_t, bool); // Handle x86 opcode SIB byte
 	bool MDFindPointerUse(op_t MemOp, bool UseFP); // Set base reg to POINTER
+	op_t MDGetMemUseOp(void);
+	op_t MDGetMemDefOp(void);
 	bool BuildUnaryRTL(SMPoperator UnaryOp); // helper for BuildRTL()
 	bool BuildUnary2OpndRTL(SMPoperator UnaryOp); // helper for BuildRTL()
 	bool BuildBinaryRTL(SMPoperator BinaryOp); // helper for BuildRTL()