#ifndef SMPDATAFLOWANALYSIS_H
#define SMPDATAFLOWANALYSIS_H 1

// SMPDataFlowAnalysis.h
//
// This header defines the common enumerations and types needed by
//  instructions, basic blocks, and functions.

#include <list>
#include <vector>
#include <map>
#include <set>

#include <cstddef>

#include <pro.h>
#include <ida.hpp>
#include <ua.hpp>
#include <intel.hpp>

using namespace std;

class SMPFunction;
class SMPBasicBlock;
class SMPInstr;
class DefOrUseList;
class SMPPhiFunction;

// Value for an SSA subscript number before it is initialized by SSA renaming.
#define SMP_SSA_UNINIT  (-1)

// Map register number to a string for printing or annotations.
extern char *RegNames[];

// Use the carry flag as the surrogate for the EFLAGS register in the x86 architecture.
//  IDA Pro distinguishes among four different flag bits; we aggregate them.
#define X86_FLAGS_REG R_cf

// Debug: print one operand from an instruction or DEF or USE list.
void PrintDefUse(ulong feature, int OpNum);
void PrintSIB(op_t Opnd);
void PrintOneOperand(op_t Opnd, ulong features, int OpNum); 
void PrintListOperand(op_t Opnd, int SSANum = SMP_SSA_UNINIT); 
void PrintOperand(op_t Opnd);

// MACHINE DEPENDENT: Could operand be an indirect memory access?
bool MDIsIndirectMemoryOpnd(op_t CurrOp, bool UseFP);

// MACHINE DEPENDENT: Is operand a stack memory access?
bool MDIsStackAccessOpnd(op_t CurrOp, bool UseFP);

// MACHINE DEPENDENT: Get base and index registers and displacement/addr from operand.
void MDExtractAddressFields(op_t MemOp, int &BaseReg, int &IndexReg, ushort &Scale, ea_t &Offset);

// MACHINE DEPENDENT: Is operand type a known type that we want to analyze?
bool MDKnownOperandType(op_t TempOp);

ushort MDCanonicalizeSubReg(const ushort Reg1);
bool MDLessReg(const ushort Reg1, const ushort Reg2);

// MACHINE DEPENDENT: comparison class to permit sorting of op_t operands.
class LessOp {
public:
	bool operator()(const op_t &Opnd1, const op_t &Opnd2) const {
		if (Opnd1.type != Opnd2.type)
			return (Opnd1.type < Opnd2.type);
		switch (Opnd1.type) {
			case o_void: return false;
			case o_reg: return MDLessReg(Opnd1.reg, Opnd2.reg);
			case o_mem: return (Opnd1.addr < Opnd2.addr);
			case o_phrase: if (Opnd1.hasSIB && Opnd2.hasSIB) return (Opnd1.sib < Opnd2.sib);
							else if (Opnd2.hasSIB) return true;  // no SIB < has SIB
							else if (Opnd1.hasSIB) return false; // no SIB < has SIB
							else return MDLessReg(Opnd1.phrase, Opnd2.phrase); // no SIB bytes
			case o_displ: if (Opnd1.hasSIB && Opnd2.hasSIB)
							  return ((Opnd1.sib < Opnd2.sib) 
								|| ((Opnd1.sib == Opnd2.sib) && (Opnd1.addr < Opnd2.addr)));
							else if (Opnd2.hasSIB) return true;  // no SIB < has SIB
							else if (Opnd1.hasSIB) return false; // no SIB < has SIB
							else return ((Opnd1.addr < Opnd2.addr)
								|| ((Opnd1.addr == Opnd2.addr) 
								&& MDLessReg(Opnd1.reg, Opnd2.reg))); // no SIB bytes
			case o_imm: return (Opnd1.value < Opnd2.value);
			case o_far:  // fall through to o_near case
			case o_near: return (Opnd1.addr < Opnd2.addr);
			case o_trreg:  // fall through
			case o_dbreg:  // fall through
			case o_crreg:  // fall through
			case o_fpreg:  // fall through
			case o_mmxreg: // fall through
			case o_xmmreg: return (Opnd1.reg < Opnd2.reg); // no subword regs to deal with

			default: msg("Unknown operand type in LessOp.\n"); return false;
		}; // end switch (Opnd1.type)
	} // end operator
}; // end class LessOp

// Get the size in bytes of the data type of an operand.
size_t GetOpDataSize(op_t DataOp);

// SMP will operate on a doubly linked list of instructions, which
//  will be grouped into basic blocks. We will include info about each
//  instruction that can not all be obtained easily in IDA Pro, and
//  is not all obtained easily from one place in IDA Pro. This 
//  information will include all that we need to perform data flow 
//  analyses. Because the LABEL and CASE could be applied to other
//  kinds of instructions during analysis, we define the enum values
//  as non-overlapping bits. This type might be stored as an int
//  in the future as a result of this design decision.
enum SMPitype {
	DEFAULT = 0,     // most instructions in middle of basic block
	LABEL = 1,       // instr also has a label (can be jump target) UNUSED
	CASE = 2,        // instr also has case label (is inside switch) UNUSED
	JUMP = 4,        // unconditional branch
	COND_BRANCH = 8, // conditional branch
	INDIR_JUMP = 16, // indirect unconditional branch,
	CALL = 32,       // direct call
	INDIR_CALL = 64, // indirect call
	RETURN = 128,    // function return OR TAIL CALL
	HALT = 256       // execution stops
};

// A primary goal of data flow analysis in SMP will be to identify
//  operands as numeric or pointer type. If an instruction definitely
//  produces a numeric result, an annotation could inform the mmStrata
//  run time monitor to simply record 'numeric' for the result register
//  and otherwise do nothing. This could greatly cut down the overhead
//  at run time. If the result register already has an mmStrata metadata
//  type of numeric, then the annotation could tell mmStrata to do nothing
//  at all, which is even better. In order to emit these annotations, we
//  will track all operands in a function as to their basic SMP types.
#define PROF_BASE 256
enum SMPOperandType { // What type is a given register or memory operand?
	UNINIT = 0,      // Operand type not yet analyzed; type lattice top
	NUMERIC = 1,     // Definitely holds non-pointer value (char, int, etc.)
	CODEPTR = 2,     // Definitely holds a code address; mmStrata considers this NUMERIC
	POINTER = 4,     // Definitely holds a data address
	STACKPTR = 8,    // Definitely holds a stack data address (refinement of POINTER)
	GLOBALPTR = 16,   // Definitely holds a global static data address (refinement of POINTER)
	HEAPPTR = 32,     // Definitely holds a heap data address (refinement of POINTER)
	PTROFFSET = 64,   // Offset from a pointer, e.g. a difference between two pointers
	UNKNOWN = 128,     // Might hold an address, might not (Bad!); type lattice bottom
	PROF_NUMERIC = NUMERIC + PROF_BASE, // NUMERIC, determined using profiler input
	PROF_CODEPTR = CODEPTR + PROF_BASE, // CODEPTR, determined using profiler input
	PROF_POINTER = POINTER + PROF_BASE, // POINTER, determined using profiler input
	PROF_STACKPTR = STACKPTR + PROF_BASE, // STACKPTR, determined using profiler input
	PROF_GLOBALPTR = GLOBALPTR + PROF_BASE, // GLOBALPTR, determined using profiler input
	PROF_HEAPPTR = HEAPPTR + PROF_BASE, // HEAPPTR, determined using profiler input
	PROF_PTROFFSET = PTROFFSET + PROF_BASE, // PTROFFSET, determined using profiler input
	PROF_UNKNOWN = UNKNOWN + PROF_BASE // UNKNOWN, determined using profiler input
};

// Macros to query and compare SMPOperandType values.

// Is OpType any kind of pointer to data, whether profile derived or not?
#define IsDataPtr(OpType) (((OpType & (~PROF_BASE)) >= POINTER) && ((OpType & (~PROF_BASE)) <= HEAPPTR))

// Is OpType one of the refined data pointer types (stack, global, heap)?
#define IsRefinedDataPtr(OpType) (((OpType & (~PROF_BASE)) >= STACKPTR) && ((OpType & (~PROF_BASE)) <= HEAPPTR))

// Is OpType the lattice bottom (overdefined) type UNKNOWN, profile derived or not?
#define IsUnknown(OpType) ((OpType & (~PROF_BASE)) == UNKNOWN)

// Is OpType1 the same type as OpType2, ignoring profile derivation?
#define IsEqType(OpType1, OpType2)  ((OpType1 & (~PROF_BASE)) == (OpType2 & (~PROF_BASE)))
#define IsNotEqType(OpType1, OpType2)  ((OpType1 & (~PROF_BASE)) != (OpType2 & (~PROF_BASE)))

// Is OpType some kind of NUMERIC (i.e. NUMERIC or CODEPTR)?
#define IsNumeric(OpType) (IsEqType(OpType, NUMERIC) || IsEqType(OpType, CODEPTR))

// Is OpType a profile derived type?
#define IsProfDerived(OpType)  (0 != (OpType & PROF_BASE))

// Make OpType into its equivalent profile derived type.
#define MakeProfDerived(OpType) ((SMPOperandType)(OpType | PROF_BASE))

// Enumeration to keep track of whether the metadata for each DEF is
//  used, unused, redundant, etc.
enum SMPMetadataType {
	DEF_METADATA_UNANALYZED = 0, // have not finished analyzing
	DEF_METADATA_UNUSED = 1,   // will not be used in a later store inst
	DEF_METADATA_USED = 2,     // will be used in bounds check for a store
	DEF_METADATA_REDUNDANT = 4 // DEF reg already has the same metadata
								// it would receive by updating from this inst
};

class DefOrUse {
public:
	// Constructors
	DefOrUse(void);
	DefOrUse(op_t Ref, SMPOperandType Type = UNINIT, int SSASub = SMP_SSA_UNINIT);
	DefOrUse(const DefOrUse &CopyIn);
	// Operators
	DefOrUse &operator=(const DefOrUse &rhs);
	// Get methods
	inline op_t GetOp(void) const { return Operand; };
	inline SMPOperandType GetType(void) const { return OpType; };
	inline int GetSSANum(void) const { return SSANumber; };
	inline SMPMetadataType GetMetadataStatus(void) const { return MetadataStatus; };
	// Set methods
	inline void SetSSANum(int Num) { SSANumber = Num; };
	void SetType(SMPOperandType Type, const SMPInstr* instr); 
	inline void SetMetadataStatus(SMPMetadataType NewStatus) {
		MetadataStatus = NewStatus;
	};
	// Printing methods
	void Dump(void) const;
private:
	// Data
	op_t Operand;
	SMPOperandType OpType;
	SMPOperandType NonSpeculativeOpType;
	int SSANumber;
	SMPMetadataType MetadataStatus;
}; // end of class DefOrUse

// Comparison operator class to permit use of DefOrUse type in sets.
class LessDefUse {
public:
	bool operator()(const DefOrUse &Ref1, const DefOrUse &Ref2) const {
		set<op_t, LessOp>::value_compare OpComp = this->DummySet.value_comp(); // LessOp()
		return OpComp(Ref1.GetOp(), Ref2.GetOp());
	}
private:
	set<op_t, LessOp> DummySet;
};

// Same class is used for both a DEF list and a USE list.
class DefOrUseSet {
public:
	// Constructors
	DefOrUseSet(void);
	// Get methods
	// DefOrUse GetRef(size_t index) const;
	inline size_t GetSize(void) const { return (size_t) Refs.size(); };
	inline set<DefOrUse, LessDefUse>::iterator GetFirstRef(void) { return Refs.begin(); };
	inline set<DefOrUse, LessDefUse>::iterator GetLastRef(void) { return Refs.end(); };
	set<DefOrUse, LessDefUse>::iterator FindRef(op_t SearchOp);
	// Set methods
	void SetRef(op_t Ref, SMPOperandType Type = UNINIT, int SSASub = SMP_SSA_UNINIT);
	set<DefOrUse, LessDefUse>::iterator SetSSANum(op_t CurrOp, int NewSSASub);
	set<DefOrUse, LessDefUse>::iterator SetType(op_t CurrOp, SMPOperandType Type, const SMPInstr* Instr);
	set<DefOrUse, LessDefUse>::iterator SetMetadata(op_t CurrOp, SMPMetadataType Status);
	inline void clear(void) { Refs.clear(); return; };
	// Printing methods
	void Dump(void);
	// Analysis methods
	bool TypesAgreeNoFlags(void); // Are all types consistent, ignoring flags registers?
private:
	// Data
	set<DefOrUse, LessDefUse> Refs;  // Defined or used operand with type and SSA subscript
};  // end class DefOrUseSet

// Same class is used for both a DEF list and a USE list.
class DefOrUseList {
public:
	// Constructors
	DefOrUseList(void);
	// Get methods
	DefOrUse GetRef(size_t index) const;
	inline size_t GetSize(void) const { return (size_t) Refs.size(); };
	inline DefOrUse *GetRefNum(size_t index) { return &Refs[index]; };
	inline vector<DefOrUse>::iterator GetFirstRef(void) { return Refs.begin(); };
	inline vector<DefOrUse>::iterator GetLastRef(void) { return Refs.end(); };
	inline int GetRefSSANum(size_t index) const { return Refs.at(index).GetSSANum(); };
	inline SMPOperandType GetRefType(size_t index) const { return Refs.at(index).GetType(); };
	// Set methods
	void SetRef(op_t Ref, SMPOperandType Type = UNINIT, int SSASub = SMP_SSA_UNINIT);
	void SetSSANum(size_t index, int NewSSASub);
	void SetType(size_t index, SMPOperandType Type, const SMPInstr* Instr);
	inline void clear(void) { Refs.clear(); return; };
	// Printing methods
	void Dump(void) const;
	// Analysis methods
	void EraseDuplicates(void);  // in case SMPInstr::MDFixupDefUseLists() adds duplicate
private:
	// Data
	vector<DefOrUse> Refs;  // Defined or used operand with type and SSA subscript
};  // end class DefOrUseList

class SMPPhiFunction {
public:
	// Constructors
	SMPPhiFunction(int GlobIndex, const DefOrUse &Def);
	// Get methods
	inline int GetIndex(void) const { return index; };
	inline size_t GetPhiListSize(void) const { return this->SubscriptedOps.GetSize(); };
	inline DefOrUse GetPhiRef(size_t i) const { return this->SubscriptedOps.GetRef(i); };
	inline DefOrUse *GetRefNum(size_t index) { return SubscriptedOps.GetRefNum(index); };
	inline vector<DefOrUse>::iterator GetFirstOp(void) { return SubscriptedOps.GetFirstRef(); };
	inline vector<DefOrUse>::iterator GetLastOp(void) { return SubscriptedOps.GetLastRef(); };
	inline op_t GetAnyOp(void) const { return DefName.GetOp(); };
	inline int GetDefSSANum(void) const { return DefName.GetSSANum(); };
	inline int GetUseSSANum(size_t index) const { return SubscriptedOps.GetRefSSANum(index); };
	inline SMPOperandType GetDefType(void) const { return DefName.GetType(); };
	inline SMPOperandType GetUseType(size_t index) const { return SubscriptedOps.GetRefType(index); };
	inline SMPMetadataType GetDefMetadata(void) const { return DefName.GetMetadataStatus(); };
	// Set methods
	void PushBack(DefOrUse Ref); // add inputs to phi function
	void SetSSADef(int NewSSASub);
	void SetSSARef(size_t index, int NewSSASub);
	void SetDefType(SMPOperandType Type, const SMPInstr* instr);
	void SetRefType(size_t index, SMPOperandType Type, const SMPInstr* instr);
	void SetDefMetadata(SMPMetadataType Status);
	// Query methods
	bool HasTypedUses(void); // true ==> at least one USE is not UNINIT type
	// Printing methods
	void Dump(void) const;
	// Analysis methods
	SMPOperandType ConditionalMeetType(void) const; // meet operator over USE types
private:
	int index;  // index into SMPFunction::BlocksDefinedIn (i.e. global name index)
	DefOrUse DefName; // output of phi function
	DefOrUseList SubscriptedOps; // ordered list of inputs to phi function
}; // end class SMPPhiFunction

// Comparison function to permit sorting phi functions and keeping them in sets.
class LessPhi {
public:
	bool operator() (const SMPPhiFunction &Phi1, const SMPPhiFunction &Phi2) const {
		return (Phi1.GetIndex() < Phi2.GetIndex());
	}
}; // end of class LessPhi

// In SSA computations, we are storing the GlobalNames index into the op_t fields
//  n, offb, and offo. This function extracts an unsigned int from these three 8-bit
//  fields.
unsigned int ExtractGlobalIndex(op_t GlobalOp);
void SetGlobalIndex(op_t *TempOp, size_t index);

class SMPDefUseChain {
public:
	// Constructors
	SMPDefUseChain(void);
	SMPDefUseChain(op_t Name, ea_t Def = BADADDR);
	// Get methods
	inline op_t GetName(void) const { return SSAName; };
	inline ea_t GetDef(void) const { return RefInstrs.at(0); };
	inline ea_t GetUse(size_t index) const { return RefInstrs.at(index + 1); };
	inline ea_t GetLastUse(void) const {
		if (RefInstrs.size() > 1) 
			return RefInstrs.back();
		else
			return BADADDR;
	};
	// Set methods
	void SetName(op_t Name);
	void SetDef(ea_t Def);
	void PushUse(ea_t Use);
	// Printing methods
	void Dump(int SSANum = (-1));
private:
	op_t SSAName;  // What variable is defined and used in the chain?
	vector<ea_t> RefInstrs; // First is always DEF, rest are USE.
}; // end class DefUseChain

class SMPDUChainArray {
public:
	// Constructor
	SMPDUChainArray(void);
	SMPDUChainArray(op_t Name);
	// Set methods.
	void SetName(op_t Name);
	// Printing methods.
	void Dump(void);
	// Data (public for convenience)
	vector<SMPDefUseChain> DUChains; // indexed by SSA number
private:
	op_t SSAName; // What variable is used in all chains in the array?
}; // end class SMPDUChainArray

class SMPCompleteDUChains {
public:
	// Printing methods.
	void Dump(void);
	// Data (public for convenience)
	vector<SMPDUChainArray> ChainsByName; // indexed by name index
}; // end class SMPCompleteDUChains

// Initialization routine for DFA category.
extern SMPitype DFACategory[];
void InitDFACategory(void);

// Initialization routine for Type category.
extern int SMPTypeCategory[];
void InitTypeCategory(void);

// Initializations for CPU flags DEF/USE.
extern bool SMPDefsFlags[];
extern bool SMPUsesFlags[];
void InitSMPDefsFlags(void);
void InitSMPUsesFlags(void);

#endif