#ifndef SMPDATAFLOWANALYSIS_H #define SMPDATAFLOWANALYSIS_H 1 // SMPDataFlowAnalysis.h // // This header defines the common enumerations and types needed by // instructions, basic blocks, and functions. #include <list> #include <vector> #include <map> #include <set> #include <cstddef> #include <pro.h> #include <ida.hpp> #include <ua.hpp> #include <intel.hpp> using namespace std; class SMPFunction; class SMPBasicBlock; class SMPInstr; class DefOrUseList; class SMPPhiFunction; // Value for an SSA subscript number before it is initialized by SSA renaming. #define SMP_SSA_UNINIT (-1) // Map register number to a string for printing or annotations. extern char *RegNames[]; // Use the carry flag as the surrogate for the EFLAGS register in the x86 architecture. // IDA Pro distinguishes among four different flag bits; we aggregate them. #define X86_FLAGS_REG R_cf // Debug: print one operand from an instruction or DEF or USE list. void PrintDefUse(ulong feature, int OpNum); void PrintSIB(op_t Opnd); void PrintOneOperand(op_t Opnd, ulong features, int OpNum); void PrintListOperand(op_t Opnd, int SSANum = SMP_SSA_UNINIT); void PrintOperand(op_t Opnd); // MACHINE DEPENDENT: Could operand be an indirect memory access? bool MDIsIndirectMemoryOpnd(op_t CurrOp, bool UseFP); // MACHINE DEPENDENT: Is operand a stack memory access? bool MDIsStackAccessOpnd(op_t CurrOp, bool UseFP); // MACHINE DEPENDENT: Get base and index registers and displacement/addr from operand. void MDExtractAddressFields(op_t MemOp, int &BaseReg, int &IndexReg, ushort &Scale, ea_t &Offset); // MACHINE DEPENDENT: Is operand type a known type that we want to analyze? bool MDKnownOperandType(op_t TempOp); ushort MDCanonicalizeSubReg(const ushort Reg1); bool MDLessReg(const ushort Reg1, const ushort Reg2); // MACHINE DEPENDENT: comparison class to permit sorting of op_t operands. class LessOp { public: bool operator()(const op_t &Opnd1, const op_t &Opnd2) const { if (Opnd1.type != Opnd2.type) return (Opnd1.type < Opnd2.type); switch (Opnd1.type) { case o_void: return false; case o_reg: return MDLessReg(Opnd1.reg, Opnd2.reg); case o_mem: return (Opnd1.addr < Opnd2.addr); case o_phrase: if (Opnd1.hasSIB && Opnd2.hasSIB) return (Opnd1.sib < Opnd2.sib); else if (Opnd2.hasSIB) return true; // no SIB < has SIB else if (Opnd1.hasSIB) return false; // no SIB < has SIB else return MDLessReg(Opnd1.phrase, Opnd2.phrase); // no SIB bytes case o_displ: if (Opnd1.hasSIB && Opnd2.hasSIB) return ((Opnd1.sib < Opnd2.sib) || ((Opnd1.sib == Opnd2.sib) && (Opnd1.addr < Opnd2.addr))); else if (Opnd2.hasSIB) return true; // no SIB < has SIB else if (Opnd1.hasSIB) return false; // no SIB < has SIB else return ((Opnd1.addr < Opnd2.addr) || ((Opnd1.addr == Opnd2.addr) && MDLessReg(Opnd1.reg, Opnd2.reg))); // no SIB bytes case o_imm: return (Opnd1.value < Opnd2.value); case o_far: // fall through to o_near case case o_near: return (Opnd1.addr < Opnd2.addr); case o_trreg: // fall through case o_dbreg: // fall through case o_crreg: // fall through case o_fpreg: // fall through case o_mmxreg: // fall through case o_xmmreg: return (Opnd1.reg < Opnd2.reg); // no subword regs to deal with default: msg("Unknown operand type in LessOp.\n"); return false; }; // end switch (Opnd1.type) } // end operator }; // end class LessOp // Get the size in bytes of the data type of an operand. size_t GetOpDataSize(op_t DataOp); // SMP will operate on a doubly linked list of instructions, which // will be grouped into basic blocks. We will include info about each // instruction that can not all be obtained easily in IDA Pro, and // is not all obtained easily from one place in IDA Pro. This // information will include all that we need to perform data flow // analyses. Because the LABEL and CASE could be applied to other // kinds of instructions during analysis, we define the enum values // as non-overlapping bits. This type might be stored as an int // in the future as a result of this design decision. enum SMPitype { DEFAULT = 0, // most instructions in middle of basic block LABEL = 1, // instr also has a label (can be jump target) UNUSED CASE = 2, // instr also has case label (is inside switch) UNUSED JUMP = 4, // unconditional branch COND_BRANCH = 8, // conditional branch INDIR_JUMP = 16, // indirect unconditional branch, CALL = 32, // direct call INDIR_CALL = 64, // indirect call RETURN = 128, // function return OR TAIL CALL HALT = 256 // execution stops }; // A primary goal of data flow analysis in SMP will be to identify // operands as numeric or pointer type. If an instruction definitely // produces a numeric result, an annotation could inform the mmStrata // run time monitor to simply record 'numeric' for the result register // and otherwise do nothing. This could greatly cut down the overhead // at run time. If the result register already has an mmStrata metadata // type of numeric, then the annotation could tell mmStrata to do nothing // at all, which is even better. In order to emit these annotations, we // will track all operands in a function as to their basic SMP types. #define PROF_BASE 256 enum SMPOperandType { // What type is a given register or memory operand? UNINIT = 0, // Operand type not yet analyzed; type lattice top NUMERIC = 1, // Definitely holds non-pointer value (char, int, etc.) CODEPTR = 2, // Definitely holds a code address; mmStrata considers this NUMERIC POINTER = 4, // Definitely holds a data address STACKPTR = 8, // Definitely holds a stack data address (refinement of POINTER) GLOBALPTR = 16, // Definitely holds a global static data address (refinement of POINTER) HEAPPTR = 32, // Definitely holds a heap data address (refinement of POINTER) PTROFFSET = 64, // Offset from a pointer, e.g. a difference between two pointers UNKNOWN = 128, // Might hold an address, might not (Bad!); type lattice bottom PROF_NUMERIC = NUMERIC + PROF_BASE, // NUMERIC, determined using profiler input PROF_CODEPTR = CODEPTR + PROF_BASE, // CODEPTR, determined using profiler input PROF_POINTER = POINTER + PROF_BASE, // POINTER, determined using profiler input PROF_STACKPTR = STACKPTR + PROF_BASE, // STACKPTR, determined using profiler input PROF_GLOBALPTR = GLOBALPTR + PROF_BASE, // GLOBALPTR, determined using profiler input PROF_HEAPPTR = HEAPPTR + PROF_BASE, // HEAPPTR, determined using profiler input PROF_PTROFFSET = PTROFFSET + PROF_BASE, // PTROFFSET, determined using profiler input PROF_UNKNOWN = UNKNOWN + PROF_BASE // UNKNOWN, determined using profiler input }; // Macros to query and compare SMPOperandType values. // Is OpType any kind of pointer to data, whether profile derived or not? #define IsDataPtr(OpType) (((OpType & (~PROF_BASE)) >= POINTER) && ((OpType & (~PROF_BASE)) <= HEAPPTR)) // Is OpType one of the refined data pointer types (stack, global, heap)? #define IsRefinedDataPtr(OpType) (((OpType & (~PROF_BASE)) >= STACKPTR) && ((OpType & (~PROF_BASE)) <= HEAPPTR)) // Is OpType the lattice bottom (overdefined) type UNKNOWN, profile derived or not? #define IsUnknown(OpType) ((OpType & (~PROF_BASE)) == UNKNOWN) // Is OpType1 the same type as OpType2, ignoring profile derivation? #define IsEqType(OpType1, OpType2) ((OpType1 & (~PROF_BASE)) == (OpType2 & (~PROF_BASE))) #define IsNotEqType(OpType1, OpType2) ((OpType1 & (~PROF_BASE)) != (OpType2 & (~PROF_BASE))) // Is OpType some kind of NUMERIC (i.e. NUMERIC or CODEPTR)? #define IsNumeric(OpType) (IsEqType(OpType, NUMERIC) || IsEqType(OpType, CODEPTR)) // Is OpType a profile derived type? #define IsProfDerived(OpType) (0 != (OpType & PROF_BASE)) // Make OpType into its equivalent profile derived type. #define MakeProfDerived(OpType) ((SMPOperandType)(OpType | PROF_BASE)) // Enumeration to keep track of whether the metadata for each DEF is // used, unused, redundant, etc. enum SMPMetadataType { DEF_METADATA_UNANALYZED = 0, // have not finished analyzing DEF_METADATA_UNUSED = 1, // will not be used in a later store inst DEF_METADATA_USED = 2, // will be used in bounds check for a store DEF_METADATA_REDUNDANT = 4 // DEF reg already has the same metadata // it would receive by updating from this inst }; class DefOrUse { public: // Constructors DefOrUse(void); DefOrUse(op_t Ref, SMPOperandType Type = UNINIT, int SSASub = SMP_SSA_UNINIT); DefOrUse(const DefOrUse &CopyIn); // Operators DefOrUse &operator=(const DefOrUse &rhs); // Get methods inline op_t GetOp(void) const { return Operand; }; inline SMPOperandType GetType(void) const { return OpType; }; inline int GetSSANum(void) const { return SSANumber; }; inline SMPMetadataType GetMetadataStatus(void) const { return MetadataStatus; }; // Set methods inline void SetSSANum(int Num) { SSANumber = Num; }; void SetType(SMPOperandType Type, const SMPInstr* instr); inline void SetMetadataStatus(SMPMetadataType NewStatus) { MetadataStatus = NewStatus; }; // Printing methods void Dump(void) const; private: // Data op_t Operand; SMPOperandType OpType; SMPOperandType NonSpeculativeOpType; int SSANumber; SMPMetadataType MetadataStatus; }; // end of class DefOrUse // Comparison operator class to permit use of DefOrUse type in sets. class LessDefUse { public: bool operator()(const DefOrUse &Ref1, const DefOrUse &Ref2) const { set<op_t, LessOp>::value_compare OpComp = this->DummySet.value_comp(); // LessOp() return OpComp(Ref1.GetOp(), Ref2.GetOp()); } private: set<op_t, LessOp> DummySet; }; // Same class is used for both a DEF list and a USE list. class DefOrUseSet { public: // Constructors DefOrUseSet(void); // Get methods // DefOrUse GetRef(size_t index) const; inline size_t GetSize(void) const { return (size_t) Refs.size(); }; inline set<DefOrUse, LessDefUse>::iterator GetFirstRef(void) { return Refs.begin(); }; inline set<DefOrUse, LessDefUse>::iterator GetLastRef(void) { return Refs.end(); }; set<DefOrUse, LessDefUse>::iterator FindRef(op_t SearchOp); // Set methods void SetRef(op_t Ref, SMPOperandType Type = UNINIT, int SSASub = SMP_SSA_UNINIT); set<DefOrUse, LessDefUse>::iterator SetSSANum(op_t CurrOp, int NewSSASub); set<DefOrUse, LessDefUse>::iterator SetType(op_t CurrOp, SMPOperandType Type, const SMPInstr* Instr); set<DefOrUse, LessDefUse>::iterator SetMetadata(op_t CurrOp, SMPMetadataType Status); inline void clear(void) { Refs.clear(); return; }; // Printing methods void Dump(void); // Analysis methods bool TypesAgreeNoFlags(void); // Are all types consistent, ignoring flags registers? private: // Data set<DefOrUse, LessDefUse> Refs; // Defined or used operand with type and SSA subscript }; // end class DefOrUseSet // Same class is used for both a DEF list and a USE list. class DefOrUseList { public: // Constructors DefOrUseList(void); // Get methods DefOrUse GetRef(size_t index) const; inline size_t GetSize(void) const { return (size_t) Refs.size(); }; inline DefOrUse *GetRefNum(size_t index) { return &Refs[index]; }; inline vector<DefOrUse>::iterator GetFirstRef(void) { return Refs.begin(); }; inline vector<DefOrUse>::iterator GetLastRef(void) { return Refs.end(); }; inline int GetRefSSANum(size_t index) const { return Refs.at(index).GetSSANum(); }; inline SMPOperandType GetRefType(size_t index) const { return Refs.at(index).GetType(); }; // Set methods void SetRef(op_t Ref, SMPOperandType Type = UNINIT, int SSASub = SMP_SSA_UNINIT); void SetSSANum(size_t index, int NewSSASub); void SetType(size_t index, SMPOperandType Type, const SMPInstr* Instr); inline void clear(void) { Refs.clear(); return; }; // Printing methods void Dump(void) const; // Analysis methods void EraseDuplicates(void); // in case SMPInstr::MDFixupDefUseLists() adds duplicate private: // Data vector<DefOrUse> Refs; // Defined or used operand with type and SSA subscript }; // end class DefOrUseList class SMPPhiFunction { public: // Constructors SMPPhiFunction(int GlobIndex, const DefOrUse &Def); // Get methods inline int GetIndex(void) const { return index; }; inline size_t GetPhiListSize(void) const { return this->SubscriptedOps.GetSize(); }; inline DefOrUse GetPhiRef(size_t i) const { return this->SubscriptedOps.GetRef(i); }; inline DefOrUse *GetRefNum(size_t index) { return SubscriptedOps.GetRefNum(index); }; inline vector<DefOrUse>::iterator GetFirstOp(void) { return SubscriptedOps.GetFirstRef(); }; inline vector<DefOrUse>::iterator GetLastOp(void) { return SubscriptedOps.GetLastRef(); }; inline op_t GetAnyOp(void) const { return DefName.GetOp(); }; inline int GetDefSSANum(void) const { return DefName.GetSSANum(); }; inline int GetUseSSANum(size_t index) const { return SubscriptedOps.GetRefSSANum(index); }; inline SMPOperandType GetDefType(void) const { return DefName.GetType(); }; inline SMPOperandType GetUseType(size_t index) const { return SubscriptedOps.GetRefType(index); }; inline SMPMetadataType GetDefMetadata(void) const { return DefName.GetMetadataStatus(); }; // Set methods void PushBack(DefOrUse Ref); // add inputs to phi function void SetSSADef(int NewSSASub); void SetSSARef(size_t index, int NewSSASub); void SetDefType(SMPOperandType Type, const SMPInstr* instr); void SetRefType(size_t index, SMPOperandType Type, const SMPInstr* instr); void SetDefMetadata(SMPMetadataType Status); // Query methods bool HasTypedUses(void); // true ==> at least one USE is not UNINIT type // Printing methods void Dump(void) const; // Analysis methods SMPOperandType ConditionalMeetType(void) const; // meet operator over USE types private: int index; // index into SMPFunction::BlocksDefinedIn (i.e. global name index) DefOrUse DefName; // output of phi function DefOrUseList SubscriptedOps; // ordered list of inputs to phi function }; // end class SMPPhiFunction // Comparison function to permit sorting phi functions and keeping them in sets. class LessPhi { public: bool operator() (const SMPPhiFunction &Phi1, const SMPPhiFunction &Phi2) const { return (Phi1.GetIndex() < Phi2.GetIndex()); } }; // end of class LessPhi // In SSA computations, we are storing the GlobalNames index into the op_t fields // n, offb, and offo. This function extracts an unsigned int from these three 8-bit // fields. unsigned int ExtractGlobalIndex(op_t GlobalOp); void SetGlobalIndex(op_t *TempOp, size_t index); class SMPDefUseChain { public: // Constructors SMPDefUseChain(void); SMPDefUseChain(op_t Name, ea_t Def = BADADDR); // Get methods inline op_t GetName(void) const { return SSAName; }; inline ea_t GetDef(void) const { return RefInstrs.at(0); }; inline ea_t GetUse(size_t index) const { return RefInstrs.at(index + 1); }; inline ea_t GetLastUse(void) const { if (RefInstrs.size() > 1) return RefInstrs.back(); else return BADADDR; }; // Set methods void SetName(op_t Name); void SetDef(ea_t Def); void PushUse(ea_t Use); // Printing methods void Dump(int SSANum = (-1)); private: op_t SSAName; // What variable is defined and used in the chain? vector<ea_t> RefInstrs; // First is always DEF, rest are USE. }; // end class DefUseChain class SMPDUChainArray { public: // Constructor SMPDUChainArray(void); SMPDUChainArray(op_t Name); // Set methods. void SetName(op_t Name); // Printing methods. void Dump(void); // Data (public for convenience) vector<SMPDefUseChain> DUChains; // indexed by SSA number private: op_t SSAName; // What variable is used in all chains in the array? }; // end class SMPDUChainArray class SMPCompleteDUChains { public: // Printing methods. void Dump(void); // Data (public for convenience) vector<SMPDUChainArray> ChainsByName; // indexed by name index }; // end class SMPCompleteDUChains // Initialization routine for DFA category. extern SMPitype DFACategory[]; void InitDFACategory(void); // Initialization routine for Type category. extern int SMPTypeCategory[]; void InitTypeCategory(void); // Initializations for CPU flags DEF/USE. extern bool SMPDefsFlags[]; extern bool SMPUsesFlags[]; void InitSMPDefsFlags(void); void InitSMPUsesFlags(void); #endif