/* * SMPFunction.h - <see below>. * * Copyright (c) 2000, 2001, 2010 - University of Virginia * * This file is part of the Memory Error Detection System (MEDS) infrastructure. * This file may be used and modified for non-commercial purposes as long as * all copyright, permission, and nonwarranty notices are preserved. * Redistribution is prohibited without prior written consent from the University * of Virginia. * * Please contact the authors for restrictions applying to commercial use. * * THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. * * Author: University of Virginia * e-mail: jwd@virginia.com * URL : http://www.cs.virginia.edu/ * * Additional copyrights 2010, 2011 by Zephyr Software LLC * e-mail: {clc,jwd}@zephyr-software.com * URL : http://www.zephyr-software.com/ * */ #ifndef SMPFUNCTION_H #define SMPFUNCTION_H 1 // SMPFunction.h // // This header defines the interfaces needed for analyzing functions, performing live variable analysis, // putting code into SSA form, etc. #include <utility> #include <list> #include <vector> #include <map> #include <set> #include <cstddef> #include "SMPDBInterface.h" #ifdef STARS_IDA_INTERFACE #include <pro.h> #include <ida.hpp> #include <ua.hpp> #endif #include "SMPDataFlowAnalysis.h" #include "SMPInstr.h" #include "SMPBasicBlock.h" #include "ProfilerInformation.h" class SMPProgram; // forward declaration so we can declare a pointer to an SMPProgram using namespace std; // Use IDA info for switch tables to link indirect jumps to successor blocks? #define SMP_USE_SWITCH_TABLE_INFO 1 struct LocalVar { char VarName[MAXSMPVARSTR]; long offset; size_t size; }; // Comparison function for sorting. bool LocalVarCompare(const LocalVar &LV1, const LocalVar &LV2); // Entry for each byte address in the stack frame struct StackFrameEntry { struct LocalVar *VarPtr; // LocalVar that includes this offset long offset; // offset relative to incoming stack pointer bool Read; // was this entry ever read by an instruction? bool Written; // was this entry ever written by an instruction? bool AddressTaken; // did this entry have its address taken? bool ESPRelativeAccess; // ever accessed by ESP+const? bool EBPRelativeAccess; // ever accessed by EBP-const? (only if UseFP) bool IndexedAccess; // index reg of unknown value added to the base address }; enum FuncType { FUNC_UNKNOWN = 0, FUNC_SAFE = 1, FUNC_UNSAFE = 2 }; static char StaticFuncName[MAXSMPSTR]; // Class encapsulating all that the SMP static analyzer cares to know // about a function. class SMPFunction { public: // Constructors and destructors. SMPFunction(func_t *Info, SMPProgram *pgm); // Default constructor ~SMPFunction(); // Get methods inline SMPProgram *GetProg(void) const { return Program; }; inline const char *GetFuncName(void) const { get_func_name(FirstEA, StaticFuncName, MAXSMPSTR-1); return StaticFuncName; }; func_t *GetFuncInfo(void); inline long GetTypedDefs(void) const { return TypedDefs; }; inline long GetUntypedDefs(void) const { return UntypedDefs; }; inline long GetTypedPhiDefs(void) const { return TypedPhiDefs; }; inline long GetUntypedPhiDefs(void) const { return UntypedPhiDefs; }; inline long GetSafeBlocks(void) const { return SafeBlocks; }; inline long GetUnsafeBlocks(void) const { return UnsafeBlocks; }; inline set<op_t, LessOp>::iterator GetFirstGlobalName(void) { return GlobalNames.begin(); }; inline set<op_t, LessOp>::iterator GetLastGlobalName(void) { return GlobalNames.end(); }; inline size_t NumGlobalNames(void) { return GlobalNames.size(); }; inline set<op_t, LessOp>::iterator FindGlobalName(op_t SearchOp) { return GlobalNames.find(SearchOp); }; set<op_t, LessOp>::iterator GetFirstLiveIn(void); // LiveInSet.begin() set<op_t, LessOp>::iterator GetLastLiveIn(void); // LiveInSet.end() set<op_t, LessOp>::iterator GetFirstLiveOut(void); // LiveOutSet.begin() set<op_t, LessOp>::iterator GetLastLiveOut(void); // LiveOutSet.end() set<op_t, LessOp>::iterator GetFirstVarKill(void); // KillSet.begin() set<op_t, LessOp>::iterator GetLastVarKill(void); // KillSet.end() inline FuncType GetReturnAddressStatus(void) const { return ReturnAddrStatus;} inline const ea_t GetStartAddr(void) const { return FuncInfo.startEA; }; // exposing the start address of the function. Used in RecurseAndMark inline const vector<ea_t> GetCallTargets(void) const { return AllCallTargets; }; inline size_t GetNumCallTargets(void) const { return AllCallTargets.size(); }; inline ea_t GetCallTargetAddr(size_t index) const { return AllCallTargets.at(index); }; bool GetIsSpeculative() { return IsSpeculative; } inline size_t GetNumCallers(void) const { return AllCallSources.size(); }; bool MDGetFGStackLocInfo(ea_t InstAddr, op_t TempOp, struct FineGrainedInfo &FGEntry); // Return fine grained stack entry for stack op TempOp from instruction at InstAddr ea_t GetGlobalDefAddr(op_t DefOp, int SSANum); // retrieve from GlobalDefAddrBySSA or return BADADDR SMPBasicBlock *GetBlockFromInstAddr(ea_t InstAddr); // retrieve from InstBlockMap or assert inline SMPBasicBlock *GetBlockByNum(size_t BlockIndex) const { return RPOBlocks.at(BlockIndex); }; SMPInstr *GetInstFromAddr(ea_t InstAddr); inline size_t GetNumBlocks(void) const { return Blocks.size(); }; set<SMPPhiFunction, LessPhi>::iterator GetPhiIterForPhiDef(size_t BlockNumber, op_t DefOp, int SSANum); // Given block # and PhiDef op_t and SSANum, return the Phi iterator or assert. // Six methods to get values from the maps of global reg/SSA to FG info. // For local names, see corresponding methods in SMPBasicBlock. unsigned short GetDefSignMiscInfo(int DefHashValue); unsigned short GetUseSignMiscInfo(int UseHashValue); unsigned short GetDefWidthTypeInfo(int DefHashValue); unsigned short GetUseWidthTypeInfo(int UseHashValue); struct FineGrainedInfo GetDefFGInfo(int DefHashValue); struct FineGrainedInfo GetUseFGInfo(int UseHashValue); // Set methods inline void IncTypedPhiDefs(void) { ++TypedPhiDefs; return; }; inline void IncUntypedPhiDefs(void) { ++UntypedPhiDefs; return; }; inline void DecTypedPhiDefs(void) { --TypedPhiDefs; return; }; inline void DecUntypedPhiDefs(void) { --UntypedPhiDefs; return; }; inline void SetReturnAddressStatus(FuncType funcType) { ReturnAddrStatus = funcType; } inline void SetFuncSafe(bool Status) { SafeFunc = Status; return; }; inline void SetSpecFuncSafe(bool Status) { SpecSafeFunc = Status; return; }; inline void SetNeedsFrame(bool Status) { NeedsStackReferent = Status; return; }; inline void SetSpecNeedsFrame(bool Status) { SpecNeedsStackReferent = Status; return; }; inline void SetIsSpeculative(bool IsS) { IsSpeculative = IsS; } void AddCallSource(ea_t addr); // Six methods to set values into the maps of global reg/SSA to FG info. // For local names, see corresponding methods in SMPBasicBlock. void UpdateDefSignMiscInfo(int DefHashValue, unsigned short NewInfo); void UpdateUseSignMiscInfo(int UseHashValue, unsigned short NewInfo); void UpdateDefWidthTypeInfo(int DefHashValue, unsigned short NewInfo); void UpdateUseWidthTypeInfo(int UseHashValue, unsigned short NewInfo); void UpdateDefFGInfo(int DefHashValue, struct FineGrainedInfo NewFG); void UpdateUseFGInfo(int UseHashValue, struct FineGrainedInfo NewFG); bool MDUpdateFGStackLocInfo(ea_t InstAddr, op_t TempOp, struct FineGrainedInfo NewFG); // Return true if we update fine grained stack entry for stack op TempOp from instruction at InstAddr // Query methods inline bool HasIndirectCalls(void) const { return IndirectCalls; }; inline bool HasUnresolvedIndirectCalls(void) const { return UnresolvedIndirectCalls; }; inline bool HasIndirectJumps(void) const { return IndirectJumps; }; inline bool HasUnresolvedIndirectJumps(void) const { return UnresolvedIndirectJumps; }; inline bool IsDirectlyRecursive(void) const { return DirectlyRecursive; }; inline bool HasSharedChunks(void) const { return SharedChunks; }; inline bool HasGoodRTLs(void) const { return BuiltRTLs; }; inline bool IsAddrInFunc(ea_t addr) { return ((addr >= FuncInfo.startEA) && (addr <= FuncInfo.endEA)); } inline bool IsLibFunc(void) const { return LibFunc; }; inline bool IsLeaf(void) const { return (!IndirectCalls && DirectCallTargets.empty()); }; inline bool IsSafe(void) const { return SafeFunc; }; inline bool IsSpecSafe(void) const { return SpecSafeFunc; }; inline bool IsSafeCallee(void) const { return SafeCallee; }; inline bool IsSpecSafeCallee(void) const { return SafeCallee; }; inline bool NeedsStackFrame(void) const { return NeedsStackReferent; }; inline bool SpecNeedsStackFrame(void) const { return SpecNeedsStackReferent; }; inline bool WritesAboveReturnAddress(void) const { return WritesAboveRA; }; inline bool OutArgsRegionComputed(void) const { return OutgoingArgsComputed; }; bool IsInOutgoingArgsRegion(op_t DestOp); // Does DestOp fall within outgoing args area? inline bool IsGlobalName(op_t RefOp) const { return (GlobalNames.end() != GlobalNames.find(RefOp)); }; inline bool UsesFramePointer(void) const { return UseFP; }; inline bool HasGoodFGStackTable(void) const { return (!(FineGrainedStackTable.empty())); }; inline bool IsLiveIn(op_t CurrOp) const { return (LiveInSet.end() != LiveInSet.find(CurrOp)); } inline bool IsLiveOut(op_t CurrOp) const { return (LiveOutSet.end() != LiveOutSet.find(CurrOp)); } inline bool IsVarKill(op_t CurrOp) const { return (KillSet.end() != KillSet.find(CurrOp)); } // Printing methods void Dump(void); // debug dump // Analysis methods void ResetProcessedBlocks(void); // Set Processed flag to false in all blocks bool ComputeGlobalSets(void); // return true if LiveIn, LiveOut, Kill sets change void Analyze(void); // Analyze all instructions in function void EmitAnnotations(FILE *AnnotFile, FILE *InfoAnnotFile); void RPONumberBlocks(void); void SetLinks(void); // Link basic blocks and map instructions to blocks void LiveVariableAnalysis(void); // Perform Live Variable Analysis across all blocks void ComputeSSA(void); // Compute SSA form data structures void AliasAnalysis(void); // Find memory writes with possible aliases void InferTypes(bool FirstIter); // Determine NUMERIC, POINTER, etc. for all operands void InferFGInfo(void); // determine signedness and width info for all operands SMPOperandType InferGlobalDefType(op_t DefOp, int SSANum, SMPBasicBlock *DefBlock, bool CallInst, ea_t DefAddr); // Can DEF type be inferred from all USEs? void ApplyProfilerInformation(ProfilerInformation *pi); void AnalyzeMetadataLiveness(void); // Is metadata live or dead for each inst bool PropagateGlobalMetadata(op_t UseOp, SMPMetadataType Status, int SSANum); void FindRedundantMetadata(void); // Do consecutive DEFs have same type? bool IsBenignUnderflowDEF(op_t DefOp, int DefSSANum, size_t DefAddr); // Do we not care if DEF underflowed, due to how it is used? bool HasIntErrorCallSink(op_t DefOp, int DefSSANum, size_t DefAddr, string &SinkString); // DEF is passed to known system/lib call void FreeUnusedMemory2(void); // After loop 2 in SMPProgram::Analyze(), free memory void FreeUnusedMemory3(void); // After loop 3 in SMPProgram::Analyze(), free memory void FreeUnusedMemory4(void); // After loop 4 (type inference) in SMPProgram::Analyze(), free memory private: // Data SMPProgram* Program; // pointer to the program I'm part of func_t FuncInfo; ea_t FirstEA; // address of first instruction in the function #if 0 char FuncName[MAXSMPSTR]; #endif int BlockCount; // number of basic blocks in the function bool UseFP; // Does function use a frame pointer? bool StaticFunc; // Is function declared static? bool LibFunc; // is function a standard library function? bool IndirectCalls; // Does function make indirect calls? bool UnresolvedIndirectCalls; // Calls could not all be linked to targets bool IndirectJumps; // Does function make indirect jumps? bool UnresolvedIndirectJumps; // Jumps could not all be linked to targets bool DirectlyRecursive; // Calls itself bool SharedChunks; // Does function share a tail chunk with other functions? bool CallsAlloca; // Does function allocate stack space after initial allocation? bool AnalyzedSP; // Were stack pointer change points successfully analyzed? bool BuiltRTLs; // Were RTLs built succcessfully for all instructions? bool SafeFunc; // Function needs no bounds checking from mmStrata bool SpecSafeFunc; // Function needs no bounds checking from mmStrata bool SafeCallee; // SafeFunc AND Caller of this func does not need a stack referent bool SpecSafeCallee; // SafeFunc AND Caller of this func does not need a stack referent bool WritesAboveRA; // Function writes to stack above the return address bool NeedsStackReferent; // mmStrata will need a stack referent to do bounds checking bool SpecNeedsStackReferent; // mmStrata will need a stack referent to do bounds checking bool HasIndirectWrites; // Function has at least one indirect memory write bool PossibleIndirectCallTarget; // function address appears in data, could indicate indirect calls to it bool OutgoingArgsComputed; // Were able to compute OutgoingArgsSize bool GoodLocalVarTable; // LocalVarTable was built successfully bool IsSpeculative; // Have we started the speculative portion of the analysis for this function. long TypedDefs; // How many DEFs in instructions were not UNINIT type after InferTypes() long UntypedDefs; // How many DEFs in instructions are UNINIT type after InferTypes() long TypedPhiDefs; long UntypedPhiDefs; long SafeBlocks; // no unsafe memory writes in block; counter long UnsafeBlocks; // possibly unsafe memory write in block; counter size_t Size; // Function size in code bytes asize_t LocalVarsSize; // size of local vars region of stack frame ushort CalleeSavedRegsSize; // stack size of callee pushed regs int RetAddrSize; // size of return address on stack (4 for most machines) asize_t IncomingArgsSize; // size of incoming args on stack size_t OutgoingArgsSize; // portion of LocalVarsSize that is really outgoing args space ea_t LocalVarsAllocInstr; // address of instr that allocates stack frame ea_t LocalVarsDeallocInstr; // address of epilogue instr that deallocs frame adiff_t AllocPointDelta; // IDA sp_delta AFTER stack frame allocation instruction sval_t MinStackDelta; // smallest (negative) value that stack pointer reaches, relative // to the value it has at the entry point of the function sval_t MaxStackDelta; // highest (positive) value that stack pointer reaches, relative // to the value it has at the entry point of the function long LocalVarOffsetLimit; // upper bound on stack-relative offsets FuncType ReturnAddrStatus; // Marked true if the return address is safe from being overwritten list<SMPInstr *> Instrs; list<SMPBasicBlock *> Blocks; vector<ea_t> DirectCallTargets; // addresses called directly vector<ea_t> IndirectCallTargets; // addresses called by indirect calls vector<ea_t> AllCallTargets; // union of direct and indirect set<ea_t> AllCallSources; // functions that call this one map<ea_t, SMPBasicBlock *> InstBlockMap; vector<SMPBasicBlock *> RPOBlocks; vector<int> IDom; // Immediate dominators, indexed and valued by block RPO numbers vector<pair<int, list<int> > > DomTree; // Dominator tree, as parent # and list of children set<op_t, LessOp> GlobalNames; // operands used in more than one block; needed in SSA vector<list<int> > BlocksDefinedIn; // What blocks DEF each GlobalName; index = op # in GlobalNames vector<int> SSACounter; // SSA subscript #, indexed by GlobalNames op # vector<list<int> > SSAStack; // SSA stack of most recent SSA number, indexed by global # vector<struct LocalVar> LocalVarTable; // offset-sorted list of local vars / outgoing args vector<struct StackFrameEntry> StackFrameMap; // memory map of every byte on stack frame vector<struct FineGrainedInfo> FineGrainedStackTable; // built using opcode analysis, not IDA stack info vector<int> SavedRegLoc; // indexed by reg #; offset from return address of callee-saved reg vector<SMPOperandType> ReturnRegTypes; // indexed by reg #; inferred types upon return map<int, ea_t> GlobalDefAddrBySSA; // map hash of global name & SSANum to DEF inst addr // If global DEF for that SSA is found in a Phi function, we use block number instead of inst addr // Instruction addresses should never overlap block #s, as block #s start at 0 and top out at a few hundred. // NOTE: We are currently limiting this map to registers, not all global names. map<int, struct FineGrainedInfo> GlobalDefFGInfoBySSA; // map hash of global name & SSANum to DEF FG info. // NOTE: We are currently limiting this map to registers, not all global names. map<int, struct FineGrainedInfo> GlobalUseFGInfoBySSA; // map hash of global name & SSANum to USE FG info. // NOTE: We are currently limiting this map to registers, not all global names. // Three sets used in live variable analysis set<op_t, LessOp> KillSet; // registers killed in this function set<op_t, LessOp> LiveOutSet; // Live-Out registers in this function set<op_t, LessOp> LiveInSet; // registers live in to this function // Methods void EraseInstRange(ea_t FirstAddr, ea_t LastAddr); void SetStackFrameInfo(void); ea_t FindAllocPoint(asize_t); // Deal with difficult to find stack frame allocations bool MDFixFrameInfo(void); // Redefine stack regions for our needs bool MDFixUseFP(void); // Fix IDA errors affecting UseFP void BuildLocalVarTable(void); // Determine local variable boundaries on the stack void SemiNaiveLocalVarID(void); // Semi-naive algorithm for local var boundaries ID bool AuditLocalVarTable(void); // Check and correct IDA Pro listing of local frame members. void FindOutgoingArgsSize(void); // Find portion of local frame that is outgoing args bool WritesAboveLocalFrame(op_t DestOp); // Is DestOp direct stack write to caller's frame? bool IndexedWritesAboveLocalFrame(op_t DestOp); // Is DestOp direct stack write to caller's frame? bool MDGetStackOffsetAndSize(SMPInstr *Instr, op_t TempOp, sval_t sp_delta, ea_t &offset, size_t &DataSize, bool &FP, bool &Indexed, bool &Signed, bool &Unsigned); // Find any stack memory access in TempOp, return offset, size, // whether the Frame Pointer was used and signedness (if sign-extended or zero-extended). bool FindAlloca(void); // true if found evidence of alloca() allocations void MDFindSavedRegs(void); // Fill in SavedRegLoc[] offsets void EmitStackFrameAnnotations(FILE *AnnotFile, SMPInstr *Instr); void ComputeIDoms(void); // Compute immediate dominators of all blocks into IDom[] int IntersectDoms(int, int) const; // Find Dom intersection (as IDom[] index) for 2 blocks void ComputeDomFrontiers(void); // Compute dominance frontiers for all blocks void ComputeGlobalNames(void); // Compute the GlobalNames set void ComputeBlocksDefinedIn(void); // Compute the BlocksDefinedIn vector void InsertPhiFunctions(void); // Insert SSA phi functions at top of each basic block void BuildDominatorTree(void); // Build the DomTree structure int SSANewNumber(size_t GlobNameIndex); // SSA helper: increment and return SSA number void SSARename(int BlockNumber); // SSA main helper: rename throughout block void SSARenumber(void); // Renumber SSA subscripts for all names void FreeSSAMemory(void); // After SSA #s are in DEFs and USEs, free SSA data structures. bool FindPossibleChainAlias(SMPInstr *CurrInst, op_t DefOp, int SSANum); // Does the DefOp DEF_USE chain starting at CurrInst contain an indirect mem write? bool FindChainAliasHelper(list<SMPBasicBlock *>::iterator CurrBlock, op_t DefOp); // recursive helper for global DefOp with DU-chain that traverses CFG bool ConditionalTypePropagation(void); // Apply SCC algorithm to unresolved Phi DEFs void MDFindReturnTypes(void); // Fill ReturnRegTypes[] void MarkFunctionSafe(void); // Does analysis to see if the function can be marked safe }; // end class SMPFunction #endif