diff --git a/SMPDataFlowAnalysis.cpp b/SMPDataFlowAnalysis.cpp index 986a54abc7fccb185c7d68a58b09c1bb1f47acfe..cd6235e92de4ca8825a56c60396e835a88bf60fe 100644 --- a/SMPDataFlowAnalysis.cpp +++ b/SMPDataFlowAnalysis.cpp @@ -36,6 +36,7 @@ #include <set> #include <vector> #include <algorithm> +#include <string> #include <cstring> @@ -1330,6 +1331,267 @@ void SMPCompleteDUChains::Dump(void) { return; } // end of SMPCompleteDUChains::Dump() +static map<string, struct FineGrainedInfo> ReturnRegisterTypeMap; + +// Initialize the FG info for the return register from any library function +// whose name implies that we know certain return values (e.g. atoi() returns +// a signed integer, while strtoul() returns an unsigned long). +void GetLibFuncFGInfo(string FuncName, struct FineGrainedInfo &InitFGInfo) { + map<string, struct FineGrainedInfo>::iterator FindIter; + + FindIter = ReturnRegisterTypeMap.find(FuncName); + if (FindIter == ReturnRegisterTypeMap.end()) { // not found + InitFGInfo.SignMiscInfo = 0; + InitFGInfo.SizeInfo = 0; + } + else { // found + InitFGInfo = FindIter->second; + } + return; +} // end of GetLibFuncFGInfo() + +// Initialize the lookup maps that are used to define the FG info that can +// be inferred from a library function name. +void InitLibFuncFGInfoMaps(void) { + op_t DummyOp = InitOp; + struct FineGrainedInfo FGEntry; + pair<string, struct FineGrainedInfo> MapEntry; + pair<map<string, struct FineGrainedInfo>::iterator, bool> InsertResult; + + // Add functions that return signed integers. + FGEntry.SignMiscInfo = FG_MASK_SIGNED; + FGEntry.SizeInfo = (FG_MASK_INTEGER | ComputeOperandBitWidthMask(DummyOp, sizeof(int))); + MapEntry.second = FGEntry; + + MapEntry.first = "atoi"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "strcmp"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "strncmp"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "memcmp"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "isalnum"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "isalpha"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "islower"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "isupper"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "isdigit"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "isxdigit"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "iscntrl"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "isgraph"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "isblank"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "isspace"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "isprint"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "ispunct"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + // Functions that return signed longs. + if (sizeof(long int) != sizeof(int)) { + FGEntry.SizeInfo = (FG_MASK_INTEGER | ComputeOperandBitWidthMask(DummyOp, sizeof(long int))); + MapEntry.second = FGEntry; + } + + MapEntry.first = "atol"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "strtol"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + // Functions that return signed long longs. + if (sizeof(long long int) != sizeof(long int)) { + FGEntry.SizeInfo = (FG_MASK_INTEGER | ComputeOperandBitWidthMask(DummyOp, sizeof(long long int))); + MapEntry.second = FGEntry; + } + + MapEntry.first = "atoll"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "strtoll"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + // Functions that return unsigned long longs. + FGEntry.SignMiscInfo = FG_MASK_UNSIGNED; + MapEntry.second = FGEntry; + + MapEntry.first = "strtoull"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + // Functions that return unsigned longs. + if (sizeof(long long int) != sizeof(long int)) { + FGEntry.SizeInfo = (FG_MASK_INTEGER | ComputeOperandBitWidthMask(DummyOp, sizeof(long int))); + MapEntry.second = FGEntry; + } + + MapEntry.first = "strtoul"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + // Functions that return size_t. + FGEntry.SizeInfo = (FG_MASK_INTEGER | ComputeOperandBitWidthMask(DummyOp, sizeof(size_t))); + FGEntry.SignMiscInfo = FG_MASK_UNSIGNED; + MapEntry.second = FGEntry; + + MapEntry.first = "strlen"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "strxfrm"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "strspn"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "strcspn"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + // Functions that return (char *). + FGEntry.SizeInfo = (FG_MASK_DATAPOINTER | ComputeOperandBitWidthMask(DummyOp, sizeof(char *))); + FGEntry.SignMiscInfo = FG_MASK_UNSIGNED; + MapEntry.second = FGEntry; + + MapEntry.first = "strcpy"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "strncpy"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "strcat"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "strncat"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "strcoll"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "strchr"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "strrchr"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "strpbrk"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "strstr"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "strtok"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "strerror"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + // Functions that return (void *). + if (sizeof(void *) != sizeof(char *)) { + FGEntry.SizeInfo = (FG_MASK_DATAPOINTER | ComputeOperandBitWidthMask(DummyOp, sizeof(void *))); + MapEntry.second = FGEntry; + } + + MapEntry.first = "malloc"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "calloc"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "realloc"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "memchr"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "memcpy"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "mempcpy"; // non-standard, found in glibc + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "memmove"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = "memset"; + InsertResult = ReturnRegisterTypeMap.insert(MapEntry); + assert(InsertResult.second); + + // Functions that return bool. + FGEntry.SizeInfo = (FG_MASK_INTEGER | ComputeOperandBitWidthMask(DummyOp, sizeof(bool))); + FGEntry.SignMiscInfo = FG_MASK_UNSIGNED; + MapEntry.second = FGEntry; + + + // NOTE: Add <math.h> functions later. + + return; +} // end of InitLibFuncFGInfoMaps() + // Initialize the DFACategory[] array to define instruction classes // for the purposes of data flow analysis. void InitDFACategory(void) { diff --git a/SMPDataFlowAnalysis.h b/SMPDataFlowAnalysis.h index 8f808b70e9f31ee05a971baf470bfc6601d4c1f3..62a39beb49882657ce73c77f200070b9d91177af 100644 --- a/SMPDataFlowAnalysis.h +++ b/SMPDataFlowAnalysis.h @@ -99,6 +99,9 @@ const char *MDGetRegName(op_t RegOp); // Distinguishes subword regs from their p #define X86_STACK_POINTER_REG R_sp #define MD_STACK_POINTER_REG X86_STACK_POINTER_REG +#define X86_RETURN_VALUE_REG R_ax +#define MD_RETURN_VALUE_REG X86_RETURN_VALUE_REG + // Debug: print one operand from an instruction or DEF or USE list. void PrintDefUse(ulong feature, int OpNum); void PrintSIB(op_t Opnd); diff --git a/SMPInstr.cpp b/SMPInstr.cpp index 2e84ba1cd851094655bed1ade843d5f2e4a93a65..1a5936ece09a50f2932be695fe0dcda5d689a246 100644 --- a/SMPInstr.cpp +++ b/SMPInstr.cpp @@ -1999,11 +1999,12 @@ void SMPInstr::MDSetWidthSignInfo(bool UseFP) { bool ValueWillChange; unsigned short SignMask, TempSign, WidthMask; int DefHashValue, UseHashValue; - bool case1, case2, case3; + bool case1, case2, case3, case4; case1 = this->IsLoadFromStack(); case2 = this->MDIsSignedLoad(SignMask); case3 = (7 == this->OptType); + case4 = (CALL == this->GetDataFlowType()); // Case 1: Load from stack location. if (case1) { @@ -2128,7 +2129,40 @@ void SMPInstr::MDSetWidthSignInfo(bool UseFP) { } ++UseIter; } - } + } // end of case 3 (multiplies and divides) + + // Case 4: Calls to library functions can reveal the type of the return register. + else if (case4) { + // Get name of function called. + assert(!(this->RTL.GetRT(0)->HasRightSubTree())); + UseOp = this->RTL.GetRT(0)->GetRightOperand(); + ea_t FuncAddr = UseOp.addr; + char IDA_func_name[MAXSTR]; + (void) get_func_name(FuncAddr, IDA_func_name, (size_t)(MAXSTR - 1)); + size_t SkipCount = strspn(IDA_func_name, "._"); + char *TempFuncName = &(IDA_func_name[SkipCount]); + string FuncName(TempFuncName); + + // Get FG info, if any, for called function. + GetLibFuncFGInfo(FuncName, FGEntry); + + // See if anything was returned in FGEntry. + if ((FGEntry.SignMiscInfo != 0) || (FGEntry.SizeInfo != 0)) { + // Need to update the FG info for the DEF of the return register. + DefOp = InitOp; + DefOp.type = o_reg; + DefOp.reg = MD_RETURN_VALUE_REG; + DefIter = this->FindDef(DefOp); + assert(DefIter != this->GetLastDef()); + DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum()); + if (this->BasicBlock->IsLocalName(DefOp)) { + this->BasicBlock->UpdateDefFGInfo(DefHashValue, FGEntry); + } + else { + this->BasicBlock->GetFunc()->UpdateDefFGInfo(DefHashValue, FGEntry); + } + } + } // end of case4 (function calls) // For all register DEFs and USEs, we should get the obvious register width info // updated. Need to use the RTL operands to get accurate widths. @@ -3150,6 +3184,11 @@ bool SMPInstr::InitFGInfoFromOperator(SMPoperator CurrOp, struct FineGrainedInfo case SMP_INPUT: // input from port case SMP_OUTPUT: // output to port case SMP_ADDRESS_OF: // take effective address + case SMP_U_COMPARE: // unsigned compare (AND-based) + case SMP_S_COMPARE: // signed compare (subtraction-based) + // NOTE: The AND-based and subtraction-based comparisons are used + // on lots of operands of all types, and the conditional jump that + // follows determines signedness, not the operator. break; case SMP_U_LEFT_SHIFT: // unsigned left shift @@ -3165,7 +3204,6 @@ bool SMPInstr::InitFGInfoFromOperator(SMPoperator CurrOp, struct FineGrainedInfo case SMP_BITWISE_NOT: // unary operator case SMP_BITWISE_XOR: case SMP_BITWISE_AND_NOT: - case SMP_U_COMPARE: // unsigned compare (AND-based) InitFG.SignMiscInfo |= FG_MASK_UNSIGNED; changed = true; break; @@ -3176,7 +3214,6 @@ bool SMPInstr::InitFGInfoFromOperator(SMPoperator CurrOp, struct FineGrainedInfo case SMP_S_DIVIDE: case SMP_SIGN_EXTEND: case SMP_NEGATE: // unary negation - case SMP_S_COMPARE: // signed compare (subtraction-based) case SMP_LESS_THAN: // boolean test operators case SMP_GREATER_THAN: case SMP_LESS_EQUAL: diff --git a/SMPStaticAnalyzer.cpp b/SMPStaticAnalyzer.cpp index 2212cfc3c76e517f9eb3473916c21a69934dce39..c7ea35dcbf4ff32bad4dcf7f7d7665460152944f 100644 --- a/SMPStaticAnalyzer.cpp +++ b/SMPStaticAnalyzer.cpp @@ -201,11 +201,6 @@ int IDAP_init(void) { return PLUGIN_SKIP; } hook_to_notification_point(HT_IDP, idp_callback, NULL); - InitOptCategory(); - InitDFACategory(); - InitTypeCategory(); - InitSMPDefsFlags(); - InitSMPUsesFlags(); DataReferentID = 1; InitOp.type = o_void; InitOp.addr = 0; @@ -221,6 +216,12 @@ int IDAP_init(void) { InitOp.specflag4 = 0; InitOp.specval = 0; InitOp.value = 0; + InitOptCategory(); + InitDFACategory(); + InitTypeCategory(); + InitSMPDefsFlags(); + InitSMPUsesFlags(); + InitLibFuncFGInfoMaps(); return PLUGIN_KEEP; } // end of IDAP_init