From 08d807e172f79e3670890c866609c03e488c2e9a Mon Sep 17 00:00:00 2001
From: clc5q <clc5q@git.zephyr-software.com>
Date: Mon, 14 Nov 2011 06:50:33 +0000
Subject: [PATCH] Infer signedness and width of return value register from
 library function calls.

---
 SMPDataFlowAnalysis.cpp | 262 ++++++++++++++++++++++++++++++++++++++++
 SMPDataFlowAnalysis.h   |   3 +
 SMPInstr.cpp            |  45 ++++++-
 SMPStaticAnalyzer.cpp   |  11 +-
 4 files changed, 312 insertions(+), 9 deletions(-)

diff --git a/SMPDataFlowAnalysis.cpp b/SMPDataFlowAnalysis.cpp
index 986a54ab..cd6235e9 100644
--- a/SMPDataFlowAnalysis.cpp
+++ b/SMPDataFlowAnalysis.cpp
@@ -36,6 +36,7 @@
 #include <set>
 #include <vector>
 #include <algorithm>
+#include <string>
 
 #include <cstring>
 
@@ -1330,6 +1331,267 @@ void SMPCompleteDUChains::Dump(void) {
 	return;
 } // end of SMPCompleteDUChains::Dump()
 
+static map<string, struct FineGrainedInfo> ReturnRegisterTypeMap;
+
+// Initialize the FG info for the return register from any library function
+//  whose name implies that we know certain return values (e.g. atoi() returns
+//  a signed integer, while strtoul() returns an unsigned long).
+void GetLibFuncFGInfo(string FuncName, struct FineGrainedInfo &InitFGInfo) {
+	map<string, struct FineGrainedInfo>::iterator FindIter;
+
+	FindIter = ReturnRegisterTypeMap.find(FuncName);
+	if (FindIter == ReturnRegisterTypeMap.end()) { // not found
+		InitFGInfo.SignMiscInfo = 0;
+		InitFGInfo.SizeInfo = 0;
+	}
+	else { // found
+		InitFGInfo = FindIter->second;
+	}
+	return;
+} // end of GetLibFuncFGInfo()
+
+// Initialize the lookup maps that are used to define the FG info that can
+//  be inferred from a library function name.
+void InitLibFuncFGInfoMaps(void) {
+	op_t DummyOp = InitOp;
+	struct FineGrainedInfo FGEntry;
+	pair<string, struct FineGrainedInfo> MapEntry;
+	pair<map<string, struct FineGrainedInfo>::iterator, bool> InsertResult;
+
+	// Add functions that return signed integers.
+	FGEntry.SignMiscInfo = FG_MASK_SIGNED;
+	FGEntry.SizeInfo = (FG_MASK_INTEGER | ComputeOperandBitWidthMask(DummyOp, sizeof(int)));
+	MapEntry.second = FGEntry;
+
+	MapEntry.first = "atoi";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "strcmp";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "strncmp";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "memcmp";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "isalnum";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "isalpha";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "islower";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "isupper";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "isdigit";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "isxdigit";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "iscntrl";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "isgraph";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "isblank";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "isspace";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "isprint";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "ispunct";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	// Functions that return signed longs.
+	if (sizeof(long int) != sizeof(int)) {
+		FGEntry.SizeInfo = (FG_MASK_INTEGER | ComputeOperandBitWidthMask(DummyOp, sizeof(long int)));
+		MapEntry.second = FGEntry;
+	}
+
+	MapEntry.first = "atol";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "strtol";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	// Functions that return signed long longs.
+	if (sizeof(long long int) != sizeof(long int)) {
+		FGEntry.SizeInfo = (FG_MASK_INTEGER | ComputeOperandBitWidthMask(DummyOp, sizeof(long long int)));
+		MapEntry.second = FGEntry;
+	}
+
+	MapEntry.first = "atoll";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "strtoll";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	// Functions that return unsigned long longs.
+	FGEntry.SignMiscInfo = FG_MASK_UNSIGNED;
+	MapEntry.second = FGEntry;
+
+	MapEntry.first = "strtoull";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	// Functions that return unsigned longs.
+	if (sizeof(long long int) != sizeof(long int)) {
+		FGEntry.SizeInfo = (FG_MASK_INTEGER | ComputeOperandBitWidthMask(DummyOp, sizeof(long int)));
+		MapEntry.second = FGEntry;
+	}
+
+	MapEntry.first = "strtoul";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	// Functions that return size_t.
+	FGEntry.SizeInfo = (FG_MASK_INTEGER | ComputeOperandBitWidthMask(DummyOp, sizeof(size_t)));
+	FGEntry.SignMiscInfo = FG_MASK_UNSIGNED;
+	MapEntry.second = FGEntry;
+
+	MapEntry.first = "strlen";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "strxfrm";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "strspn";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "strcspn";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	// Functions that return (char *).
+	FGEntry.SizeInfo = (FG_MASK_DATAPOINTER | ComputeOperandBitWidthMask(DummyOp, sizeof(char *)));
+	FGEntry.SignMiscInfo = FG_MASK_UNSIGNED;
+	MapEntry.second = FGEntry;
+
+	MapEntry.first = "strcpy";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "strncpy";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "strcat";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "strncat";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "strcoll";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "strchr";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "strrchr";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "strpbrk";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "strstr";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "strtok";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "strerror";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	// Functions that return (void *).
+	if (sizeof(void *) != sizeof(char *)) {
+		FGEntry.SizeInfo = (FG_MASK_DATAPOINTER | ComputeOperandBitWidthMask(DummyOp, sizeof(void *)));
+		MapEntry.second = FGEntry;
+	}
+
+	MapEntry.first = "malloc";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "calloc";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "realloc";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "memchr";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "memcpy";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "mempcpy";  // non-standard, found in glibc
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "memmove";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	MapEntry.first = "memset";
+	InsertResult = ReturnRegisterTypeMap.insert(MapEntry);
+	assert(InsertResult.second);
+
+	// Functions that return bool.
+	FGEntry.SizeInfo = (FG_MASK_INTEGER | ComputeOperandBitWidthMask(DummyOp, sizeof(bool)));
+	FGEntry.SignMiscInfo = FG_MASK_UNSIGNED;
+	MapEntry.second = FGEntry;
+
+
+	// NOTE: Add <math.h> functions later.
+
+	return;
+} // end of InitLibFuncFGInfoMaps()
+
 // Initialize the DFACategory[] array to define instruction classes
 //   for the purposes of data flow analysis.
 void InitDFACategory(void) {
diff --git a/SMPDataFlowAnalysis.h b/SMPDataFlowAnalysis.h
index 8f808b70..62a39beb 100644
--- a/SMPDataFlowAnalysis.h
+++ b/SMPDataFlowAnalysis.h
@@ -99,6 +99,9 @@ const char *MDGetRegName(op_t RegOp); // Distinguishes subword regs from their p
 #define X86_STACK_POINTER_REG R_sp
 #define MD_STACK_POINTER_REG X86_STACK_POINTER_REG
 
+#define X86_RETURN_VALUE_REG R_ax
+#define MD_RETURN_VALUE_REG X86_RETURN_VALUE_REG
+
 // Debug: print one operand from an instruction or DEF or USE list.
 void PrintDefUse(ulong feature, int OpNum);
 void PrintSIB(op_t Opnd);
diff --git a/SMPInstr.cpp b/SMPInstr.cpp
index 2e84ba1c..1a5936ec 100644
--- a/SMPInstr.cpp
+++ b/SMPInstr.cpp
@@ -1999,11 +1999,12 @@ void SMPInstr::MDSetWidthSignInfo(bool UseFP) {
 	bool ValueWillChange;
 	unsigned short SignMask, TempSign, WidthMask;
 	int DefHashValue, UseHashValue;
-	bool case1, case2, case3;
+	bool case1, case2, case3, case4;
 
 	case1 = this->IsLoadFromStack();
 	case2 = this->MDIsSignedLoad(SignMask);
 	case3 = (7 == this->OptType);
+	case4 = (CALL == this->GetDataFlowType());
 
 	// Case 1: Load from stack location.
 	if (case1) {
@@ -2128,7 +2129,40 @@ void SMPInstr::MDSetWidthSignInfo(bool UseFP) {
 			}
 			++UseIter;
 		}
-	}
+	} // end of case 3 (multiplies and divides)
+
+	// Case 4: Calls to library functions can reveal the type of the return register.
+	else if (case4) {
+		// Get name of function called.
+		assert(!(this->RTL.GetRT(0)->HasRightSubTree()));
+		UseOp = this->RTL.GetRT(0)->GetRightOperand();
+		ea_t FuncAddr = UseOp.addr;
+		char IDA_func_name[MAXSTR];
+		(void) get_func_name(FuncAddr, IDA_func_name, (size_t)(MAXSTR - 1));
+		size_t SkipCount = strspn(IDA_func_name, "._");
+		char *TempFuncName = &(IDA_func_name[SkipCount]);
+		string FuncName(TempFuncName);
+
+		// Get FG info, if any, for called function.
+		GetLibFuncFGInfo(FuncName, FGEntry);
+
+		// See if anything was returned in FGEntry.
+		if ((FGEntry.SignMiscInfo != 0) || (FGEntry.SizeInfo != 0)) {
+			// Need to update the FG info for the DEF of the return register.
+			DefOp = InitOp;
+			DefOp.type = o_reg;
+			DefOp.reg = MD_RETURN_VALUE_REG;
+			DefIter = this->FindDef(DefOp);
+			assert(DefIter != this->GetLastDef());
+			DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum());
+			if (this->BasicBlock->IsLocalName(DefOp)) {
+				this->BasicBlock->UpdateDefFGInfo(DefHashValue, FGEntry);
+			}
+			else {
+				this->BasicBlock->GetFunc()->UpdateDefFGInfo(DefHashValue, FGEntry);
+			}
+		}
+	} // end of case4 (function calls)
 
 	// For all register DEFs and USEs, we should get the obvious register width info
 	//  updated. Need to use the RTL operands to get accurate widths.
@@ -3150,6 +3184,11 @@ bool SMPInstr::InitFGInfoFromOperator(SMPoperator CurrOp, struct FineGrainedInfo
 		case SMP_INPUT:  // input from port
 		case SMP_OUTPUT: // output to port
 		case SMP_ADDRESS_OF: // take effective address
+		case SMP_U_COMPARE: // unsigned compare (AND-based)
+		case SMP_S_COMPARE: // signed compare (subtraction-based)
+			// NOTE: The AND-based and subtraction-based comparisons are used
+			//  on lots of operands of all types, and the conditional jump that
+			//  follows determines signedness, not the operator.
 			break;
 
 		case SMP_U_LEFT_SHIFT: // unsigned left shift
@@ -3165,7 +3204,6 @@ bool SMPInstr::InitFGInfoFromOperator(SMPoperator CurrOp, struct FineGrainedInfo
 		case SMP_BITWISE_NOT: // unary operator
 		case SMP_BITWISE_XOR:
 		case SMP_BITWISE_AND_NOT:
-		case SMP_U_COMPARE: // unsigned compare (AND-based)
 			InitFG.SignMiscInfo |= FG_MASK_UNSIGNED;
 			changed = true;
 			break;
@@ -3176,7 +3214,6 @@ bool SMPInstr::InitFGInfoFromOperator(SMPoperator CurrOp, struct FineGrainedInfo
 		case SMP_S_DIVIDE:
 		case SMP_SIGN_EXTEND:
 		case SMP_NEGATE:    // unary negation
-		case SMP_S_COMPARE: // signed compare (subtraction-based)
 		case SMP_LESS_THAN: // boolean test operators
 		case SMP_GREATER_THAN:
 		case SMP_LESS_EQUAL:
diff --git a/SMPStaticAnalyzer.cpp b/SMPStaticAnalyzer.cpp
index 2212cfc3..c7ea35dc 100644
--- a/SMPStaticAnalyzer.cpp
+++ b/SMPStaticAnalyzer.cpp
@@ -201,11 +201,6 @@ int IDAP_init(void) {
  		return PLUGIN_SKIP;
 	}
 	hook_to_notification_point(HT_IDP, idp_callback, NULL);
-    InitOptCategory();
-	InitDFACategory();
-	InitTypeCategory();
-	InitSMPDefsFlags();
-	InitSMPUsesFlags();
 	DataReferentID = 1;
 	InitOp.type = o_void;
 	InitOp.addr = 0;
@@ -221,6 +216,12 @@ int IDAP_init(void) {
 	InitOp.specflag4 = 0;
 	InitOp.specval = 0;
 	InitOp.value = 0;
+    InitOptCategory();
+	InitDFACategory();
+	InitTypeCategory();
+	InitSMPDefsFlags();
+	InitSMPUsesFlags();
+	InitLibFuncFGInfoMaps();
 	return PLUGIN_KEEP;
 } // end of IDAP_init
 
-- 
GitLab