From d57062b8f8c95d6da2f4a2ac4cec4a652e310bc5 Mon Sep 17 00:00:00 2001 From: clc5q <clc5q@git.zephyr-software.com> Date: Sun, 21 Jul 2013 02:04:26 +0000 Subject: [PATCH] Check for signedness errors on unsigned args to 19 C standard library functions. --- SMPBasicBlock.cpp | 34 ++++++++++ SMPBasicBlock.h | 9 ++- SMPDataFlowAnalysis.cpp | 141 ++++++++++++++++++++++++++++++++++++++++ SMPDataFlowAnalysis.h | 24 ++++++- SMPInstr.cpp | 23 ++++++- SMPInstr.h | 6 +- SMPStaticAnalyzer.cpp | 1 + 7 files changed, 230 insertions(+), 8 deletions(-) diff --git a/SMPBasicBlock.cpp b/SMPBasicBlock.cpp index ce715fae..8b633322 100644 --- a/SMPBasicBlock.cpp +++ b/SMPBasicBlock.cpp @@ -693,6 +693,7 @@ set<DefOrUse, LessDefUse>::iterator SMPBasicBlock::GetGlobalDefIterFromDefAddr(o return DefIter; } // end of SMPBasicBlock::GetGlobalDefIterFromDefAddr() +#if 0 // never used // Given a USE operand and the address of its instr, return DEF using DU chains or Phi func. set<DefOrUse, LessDefUse>::iterator SMPBasicBlock::GetDefFromOpAddr(op_t UseOp, ea_t InstAddr, int SSANum) { ea_t DefAddr; @@ -736,6 +737,7 @@ set<DefOrUse, LessDefUse>::iterator SMPBasicBlock::GetDefFromOpAddr(op_t UseOp, return DefIter; } // end of SMPBasicBlock::GetDefFromOpAddr() +#endif // Given a USE operand and the address of its instr, return DEF addr using DU chains or Phi func. // If DEF is in a Phi function, we return the block number, which never conflicts with instruction @@ -1688,6 +1690,38 @@ bool SMPBasicBlock::PropagateDEFSignedness(void) { return changed; } // end of SMPBasicBlock::PropagateDEFSignedness() +// backtrack from CallAddr and mark unsigned args based on bitset +void SMPBasicBlock::MarkUnsignedArgs(ea_t CallAddr, unsigned int ArgPosBits) { + list<SMPInstr *>::reverse_iterator RevInstIter; + unsigned int UnsignedArgCount = CountBitsSet(ArgPosBits); + unsigned int UnsignedArgsFound = 0; + + for (RevInstIter = this->GetRevInstBegin(); RevInstIter != this->GetRevInstEnd(); ++RevInstIter) { + SMPInstr *CurrInst = (*RevInstIter); + ea_t InstAddr = CurrInst->GetAddr(); + if (InstAddr < CallAddr) { + SMPitype FlowType = CurrInst->GetDataFlowType(); + if ((FlowType == CALL) || (FlowType == INDIR_CALL)) { + // We have reached a call before the one we care about. Done. + break; + } + size_t ArgumentNumber; + if (CurrInst->MDIsArgumentPass(ArgumentNumber)) { + if (0 != (ArgPosBits & (1 << ArgumentNumber))) { + // ArgumentNumber matches a bit set in ArgPosBits. + CurrInst->SetUnsignedArg(); + ++UnsignedArgsFound; + if (UnsignedArgsFound >= UnsignedArgCount) { + break; // found them all + } + } + } + } + } + + return; +} // end of SMPBasicBlock::MarkUnsignedArgs() + // Find the stack target of a call to memset() and the size in bytes of the memset() region. // If the memset() target is not on the stack, return false. If the // size of the memset() region is not a constant, we also return false. diff --git a/SMPBasicBlock.h b/SMPBasicBlock.h index ecebbab6..a9bbc4ca 100644 --- a/SMPBasicBlock.h +++ b/SMPBasicBlock.h @@ -75,7 +75,7 @@ using namespace std; #define BLOCK_SET_REACHES_OUT_STALE 0x01 #define BLOCK_SET_LOOP_TAIL_BLOCK 0x02 #define BLOCK_SET_LOOP_HEADER_BLOCK 0x04 -#define BLOCK_SET_UNUSED4 0x08 +#define BLOCK_SET_CALLS_UNSIGNED_ARG_FUNC 0x08 #define BLOCK_SET_UNUSED5 0x10 #define BLOCK_SET_UNUSED6 0x20 #define BLOCK_SET_UNUSED7 0x40 @@ -83,7 +83,7 @@ using namespace std; #define BLOCK_RESET_REACHES_OUT_STALE 0xfe #define BLOCK_RESET_LOOP_TAIL_BLOCK 0xfd #define BLOCK_RESET_LOOP_HEADER_BLOCK 0xfb -#define BLOCK_RESET_UNUSED4 0xf7 +#define BLOCK_RESET_CALLS_UNSIGNED_ARG_FUNC 0xf7 #define BLOCK_RESET_UNUSED5 0xef #define BLOCK_RESET_UNUSED6 0xdf #define BLOCK_RESET_UNUSED7 0xbf @@ -149,8 +149,10 @@ public: int GetPredPosition(int BlockNum); // What is position # within Preds of BlockNum? set<SMPPhiFunction, LessPhi>::iterator FindPhi(op_t FindOp); set<op_t, LessOp>::iterator FindLocalName(op_t FindOp); +#if 0 // Given a USE operand and the address of its instr, return DEF from DU chains or Phi func. set<DefOrUse, LessDefUse>::iterator GetDefFromOpAddr(op_t UseOp, ea_t InstAddr, int SSANum); +#endif set<DefOrUse, LessDefUse>::iterator GetGlobalDefIterFromDefAddr(op_t DefOp, ea_t DefAddr); ea_t GetDefAddrFromUseAddr(op_t UseOp, ea_t InstAddr, int SSANum, bool LocalName); bool IsOpDestTruncatedWrite(op_t DefOp, int DefSSANum, ea_t DefAddr); // Does DefOp get written out in truncated form (lower bits only)? @@ -182,6 +184,7 @@ public: inline void SetFlagsDeadBeforeFirstKill(bool flag) { flag ? (booleans1 |= BLOCK_SET_FLAGS_DEAD_BEFORE_FIRST_KILL) : (booleans1 &= BLOCK_RESET_FLAGS_DEAD_BEFORE_FIRST_KILL); }; inline void SetReachesOutStale(void) { booleans2 |= BLOCK_SET_REACHES_OUT_STALE; }; inline void SetLoopHeaderBlock(void) { booleans2 |= BLOCK_SET_LOOP_HEADER_BLOCK; }; + inline void SetCallsUnsignedArgFunc(void) { booleans2 |= BLOCK_SET_CALLS_UNSIGNED_ARG_FUNC; }; inline void SetNumber(int Num) { BlockNum = Num; }; inline void SetOutgoingStackDelta(sval_t OutDelta) { OutgoingStackDelta = OutDelta; }; void LinkToPred(SMPBasicBlock *Predecessor); @@ -226,6 +229,7 @@ public: inline bool IsReachesOutStale(void) const { return (booleans2 & BLOCK_SET_REACHES_OUT_STALE); }; inline bool IsLoopTailBlock(void) const { return (booleans2 & BLOCK_SET_LOOP_TAIL_BLOCK); }; inline bool IsLoopHeaderBlock(void) const { return (booleans2 & BLOCK_SET_LOOP_HEADER_BLOCK); }; + inline bool CallsUnsignedArgFunc(void) const { return (booleans2 & BLOCK_SET_CALLS_UNSIGNED_ARG_FUNC); }; inline bool IsSelfLoop(void) const { return (IsLoopHeaderBlock() && (GetNumber() == GetLoopHeaderNumber())); }; inline bool IsLiveIn(op_t CurrOp) const { return (LiveInSet.end() != LiveInSet.find(CurrOp)); @@ -303,6 +307,7 @@ public: void PropagatePhiFGInfo(void); void PropagateBranchSignedness(ea_t DefAddr, op_t SearchOp, unsigned short SignMask); bool PropagateDEFSignedness(void); // propagate DEF signedness to USE if USE has no signedness info. + void MarkUnsignedArgs(ea_t CallAddr, unsigned int ArgPosBits); // backtrack from CallAddr and mark unsigned args based on bitset bool ComputeReachesInSet(void); // return true if changes were made to the ReachesInSet bool ComputeReachesOutSet(void); // return true if changes were made to the ReachesOutSet void UpdateDownExposedDefs(op_t DefOp, ea_t InstAddr); // Add DefOp, remove previous defs of DefOp that now do not reach the end of the block. diff --git a/SMPDataFlowAnalysis.cpp b/SMPDataFlowAnalysis.cpp index 7efe8dc8..e6139185 100644 --- a/SMPDataFlowAnalysis.cpp +++ b/SMPDataFlowAnalysis.cpp @@ -1659,6 +1659,147 @@ void GetSinkStringForCallName(string CalleeName, string &SinkString) { return; } +// Map system or library call name to the argument number that +// should have an unsigned value and should be guarded from the +// signedness error that results from copying a signed value +// into the outgoing argument. Argument numbers are zero-based. +// We will return 0 when there is no argument to worry about +// for a particular library or system call name. +static map<string, unsigned int> UnsignedArgPositionMap; + +void InitUnsignedArgPositionMap(void) { + pair<string, unsigned int> MapEntry; + pair<map<string, unsigned int>::iterator, bool> InsertResult; + + // <string.h> + MapEntry.first = string("memchr"); + MapEntry.second = STARS_ARG_POS_2; + InsertResult = UnsignedArgPositionMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = string("memcmp"); + MapEntry.second = STARS_ARG_POS_2; + InsertResult = UnsignedArgPositionMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = string("memcpy"); + MapEntry.second = STARS_ARG_POS_2; + InsertResult = UnsignedArgPositionMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = string("memmove"); + MapEntry.second = STARS_ARG_POS_2; + InsertResult = UnsignedArgPositionMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = string("memset"); + MapEntry.second = STARS_ARG_POS_2; + InsertResult = UnsignedArgPositionMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = string("strncat"); + MapEntry.second = STARS_ARG_POS_2; + InsertResult = UnsignedArgPositionMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = string("strncmp"); + MapEntry.second = STARS_ARG_POS_2; + InsertResult = UnsignedArgPositionMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = string("strncpy"); + MapEntry.second = STARS_ARG_POS_2; + InsertResult = UnsignedArgPositionMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = string("strxfrm"); + MapEntry.second = STARS_ARG_POS_2; + InsertResult = UnsignedArgPositionMap.insert(MapEntry); + assert(InsertResult.second); + + // <stdlib.h> + MapEntry.first = string("malloc"); + MapEntry.second = STARS_ARG_POS_0; + InsertResult = UnsignedArgPositionMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = string("calloc"); + MapEntry.second = (STARS_ARG_POS_0 | STARS_ARG_POS_1); + InsertResult = UnsignedArgPositionMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = string("realloc"); + MapEntry.second = STARS_ARG_POS_1; + InsertResult = UnsignedArgPositionMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = string("bsearch"); + MapEntry.second = (STARS_ARG_POS_2 | STARS_ARG_POS_3); + InsertResult = UnsignedArgPositionMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = string("qsort"); + MapEntry.second = (STARS_ARG_POS_1 | STARS_ARG_POS_2); + InsertResult = UnsignedArgPositionMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = string("mblen"); + MapEntry.second = STARS_ARG_POS_1; + InsertResult = UnsignedArgPositionMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = string("mbtowc"); + MapEntry.second = STARS_ARG_POS_2; + InsertResult = UnsignedArgPositionMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = string("mbstowcs"); + MapEntry.second = STARS_ARG_POS_2; + InsertResult = UnsignedArgPositionMap.insert(MapEntry); + assert(InsertResult.second); + + MapEntry.first = string("wcstombs"); + MapEntry.second = STARS_ARG_POS_2; + InsertResult = UnsignedArgPositionMap.insert(MapEntry); + assert(InsertResult.second); + + // <stdio.h> + MapEntry.first = string("setvbuf"); + MapEntry.second = STARS_ARG_POS_3; + InsertResult = UnsignedArgPositionMap.insert(MapEntry); + assert(InsertResult.second); + + return; +} + +// Return unsigned arg position bitset for call name from the sink map. +// If we don't care find the call name, we return 0. +void GetUnsignedArgPositionsForCallName(string CalleeName, unsigned int &ArgPosBits) { + map<string, unsigned int>::iterator MapIter; + + ArgPosBits = 0; // Change if found later + MapIter = UnsignedArgPositionMap.find(CalleeName); + + if (MapIter != UnsignedArgPositionMap.end()) { // found it + ArgPosBits = MapIter->second; + } + return; +} + +// Utility to count bits set in an unsigned int, e.g. ArgPosBits. +unsigned int CountBitsSet(unsigned int ArgPosBits) { + unsigned int count; // count accumulates the total bits set in ArgPosBits + for (count = 0; ArgPosBits; ++count) { + ArgPosBits &= (ArgPosBits - 1); // clear the least significant bit set + } + // Brian Kernighan's method goes through as many iterations as there are set bits. + // So if we have a 32-bit word with only the high bit set, then it will only go once through the loop. + // Published in 1988, the C Programming Language 2nd Ed. (by Brian W. Kernighan and Dennis M. Ritchie) mentions this in exercise 2-9. + // On April 19, 2006 Don Knuth pointed out to me that this method "was first published by Peter Wegner in CACM 3 (1960), 322. + // (Also discovered independently by Derrick Lehmer and published in 1964 in a book edited by Beckenbach.)" + return count; +} + // Initialize the FG info for the return register from any library function // whose name implies that we know certain return values (e.g. atoi() returns // a signed integer, while strtoul() returns an unsigned long). diff --git a/SMPDataFlowAnalysis.h b/SMPDataFlowAnalysis.h index 47db8b62..501ae5b2 100644 --- a/SMPDataFlowAnalysis.h +++ b/SMPDataFlowAnalysis.h @@ -791,9 +791,31 @@ void InitLibFuncFGInfoMaps(void); // the case of integer error detection of a value passed to that call. void InitIntegerErrorCallSinkMap(void); - // Return sink string for call name from the sink map. // If we don't care find the call name, we return an empty string. void GetSinkStringForCallName(string CalleeName, string &SinkString); +// Init map system or library call name to the argument number that +// should have an unsigned value and should be guarded from the +// signedness error that results from copying a signed value +// into the outgoing argument. Argument numbers are zero-based; see bit defs below. +void InitUnsignedArgPositionMap(void); + +// Define argument position bits to put into UnsignedArgPositionMap. +// A system or library call name could have more than one argument +// position to protect from signedness errors. +#define STARS_ARG_POS_0 0x1 +#define STARS_ARG_POS_1 0x2 +#define STARS_ARG_POS_2 0x4 +#define STARS_ARG_POS_3 0x8 +#define STARS_ARG_POS_4 0x16 + +// Return unsigned argument position bitmap for call name from the unsigned arg position map. +// We will return 0 when there is no argument to worry about +// for a particular library or system call name. +void GetUnsignedArgPositionsForCallName(string CalleeName, unsigned int &ArgPosBits); + +// Utility to count bits set in an unsigned int, e.g. ArgPosBits. +unsigned int CountBitsSet(unsigned int ArgPosBits); + #endif diff --git a/SMPInstr.cpp b/SMPInstr.cpp index b648b9ae..9a2748b1 100644 --- a/SMPInstr.cpp +++ b/SMPInstr.cpp @@ -4652,6 +4652,17 @@ void SMPInstr::MDSetWidthSignInfo(bool UseFP) { this->BasicBlock->GetFunc()->UpdateDefFGInfo(DefHashValue, FGEntry); } } + // See if we make a call to a library function that needs signedness checks on + // unsigned incoming args. + unsigned int ArgPosBits = 0; + GetUnsignedArgPositionsForCallName(FuncName, ArgPosBits); + if (0 < ArgPosBits) { + // Mark our basic block as containing this type of call, so signedness checks + // can be done later. + this->GetBlock()->SetCallsUnsignedArgFunc(); + // Find the argument assignments and mark them, to trigger later signedness checks. + this->GetBlock()->MarkUnsignedArgs(this->GetAddr(), ArgPosBits); + } } // end of case4 (function calls) else if (case5) { // signed or unsigned conditional set opcode if (UnsignedSetOpcode) { @@ -5032,7 +5043,7 @@ bool SMPInstr::IsBenignOverflow(int &IdiomCode) { // Do we detect a situation in which it is safe to check for signedness errors on // the stack write (return false), or not (return true to be safe). bool SMPInstr::SkipSignednessCheckOnStackWrite(int DefSSANum) { - bool SkipCheck = true; + bool SkipCheck = (!(this->IsUnsignedArg())); op_t StackDefOp = this->DEFMemOp; size_t DefBitWidth = 8 * GetOpDataSize(StackDefOp); if (DefBitWidth < MD_NORMAL_MACHINE_BITWIDTH) { @@ -8459,8 +8470,10 @@ void SMPInstr::EmitIntegerErrorAnnotations(FILE *InfoAnnotFile, list<size_t> &Lo bool StackDestination = false; // Outargs locations are reused for different function calls, so no inference of their // signedness is valid. We maintain a flag to suppress signedness checks on writes to - // outargs locations on the stack. + // outargs locations on the stack. We make an exception for the known unsigned args to + // certain library functions. bool OutArgsWrite = false; + bool IgnoreOutArgsWrite = false; bool NonStackMemDestination = false; if (o_reg == DestSearchOp.type) { StackDestination = false; @@ -8482,6 +8495,7 @@ void SMPInstr::EmitIntegerErrorAnnotations(FILE *InfoAnnotFile, list<size_t> &Lo bool success = this->GetBlock()->GetFunc()->MDGetFGStackLocInfo(this->address, DefOp, DefFGInfo); assert(success); OutArgsWrite = this->GetBlock()->GetFunc()->IsInOutgoingArgsRegion(DefOp); + IgnoreOutArgsWrite = OutArgsWrite && (!(this->IsUnsignedArg())); // ignore out args write iff not a known unsigned argument } else if (NonStackMemDestination) { // We can check truncation of USE operand, but we know nothing about @@ -8744,7 +8758,7 @@ void SMPInstr::EmitIntegerErrorAnnotations(FILE *InfoAnnotFile, list<size_t> &Lo #endif #endif } // end if truncation - else if (!OutArgsWrite && !PartialStore) { // still need to check for signedness errors even if no truncation + else if (!IgnoreOutArgsWrite && !PartialStore) { // still need to check for signedness errors even if no truncation if (!SuppressSignednessCheck && (0 == IdiomCode)) { unsigned short DummySignMask; if (this->MDIsSignedLoad(DummySignMask)) { @@ -8753,6 +8767,9 @@ void SMPInstr::EmitIntegerErrorAnnotations(FILE *InfoAnnotFile, list<size_t> &Lo SuppressSignednessCheck = true; } } + if (this->IsUnsignedArg()) { + DefIsUnsigned = true; + } if (UseIsSigned && DefIsUnsigned) { if (!SuppressSignednessCheck || (0 == IdiomCode)) { SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS UNSIGNED %zu %s ZZ %s \n", diff --git a/SMPInstr.h b/SMPInstr.h index c9d9aff2..11004da1 100644 --- a/SMPInstr.h +++ b/SMPInstr.h @@ -318,7 +318,7 @@ private: #define INSTR_SET_ALLOCA 0x10 #define INSTR_SET_SUPPRESS_NUMERIC_ANNOTATION 0x20 #define INSTR_SET_HASH_OPERATION 0x40 -#define INSTR_SET_UNUSED128 0x80 +#define INSTR_SET_UNSIGNED_ARG 0x80 #define INSTR_RESET_DEFS_NORMALIZED 0xfe #define INSTR_RESET_INST_REMOVED 0xfd #define INSTR_RESET_STACK_ALIGNMENT 0xfb @@ -326,7 +326,7 @@ private: #define INSTR_RESET_ALLOCA 0xef #define INSTR_RESET_SUPPRESS_NUMERIC_ANNOTATION 0xdf #define INSTR_RESET_HASH_OPERATION 0xbf -#define INSTR_RESET_UNUSED128 0x7f +#define INSTR_RESET_UNSIGNED_ARG 0x7f class SMPInstr { public: @@ -389,6 +389,7 @@ public: inline void SetAllocaCall(void) { booleans4 |= INSTR_SET_ALLOCA; }; inline void SetSuppressNumericAnnotation(void) { booleans4 |= INSTR_SET_SUPPRESS_NUMERIC_ANNOTATION; }; inline void SetHashOperation(void) { booleans4 |= INSTR_SET_HASH_OPERATION; }; + inline void SetUnsignedArg(void) { booleans4 |= INSTR_SET_UNSIGNED_ARG; }; inline void AddDef(op_t DefOp, SMPOperandType DefType, int DefSSANum) { Defs.SetRef(DefOp, DefType, DefSSANum); } @@ -497,6 +498,7 @@ public: inline bool IsAllocaCall(void) const { return (booleans4 & INSTR_SET_ALLOCA); }; inline bool IsNumericAnnotationSuppressed(void) const { return (booleans4 & INSTR_SET_SUPPRESS_NUMERIC_ANNOTATION); }; inline bool IsHashOperation(void) const { return (booleans4 & INSTR_SET_HASH_OPERATION); }; + inline bool IsUnsignedArg(void) const { return (booleans4 & INSTR_SET_UNSIGNED_ARG); }; inline bool MDIsReducedWidthMove(void) const { return (MoveSource.dtyp < 2); }; // Are numeric values from a system call trusted input, so that all numeric errors // derived from the returned values should be treated as benign? diff --git a/SMPStaticAnalyzer.cpp b/SMPStaticAnalyzer.cpp index 9e162d6b..7ddac40a 100644 --- a/SMPStaticAnalyzer.cpp +++ b/SMPStaticAnalyzer.cpp @@ -415,6 +415,7 @@ int IDAP_init(void) { InitSMPUsesFlags(); InitLibFuncFGInfoMaps(); InitIntegerErrorCallSinkMap(); + InitUnsignedArgPositionMap(); InitStackAlteration(); return PLUGIN_KEEP; } // end of IDAP_init -- GitLab