Skip to content
Snippets Groups Projects
SMPDataFlowAnalysis.cpp 179 KiB
Newer Older
jdh8d's avatar
jdh8d committed
/*
 * SMPDataFlowAnalysis.cpp - <see below>.
 *
 * Copyright (c) 2000, 2001, 2010 - University of Virginia 
 *
 * This file is part of the Memory Error Detection System (MEDS) infrastructure.
 * This file may be used and modified for non-commercial purposes as long as 
 * all copyright, permission, and nonwarranty notices are preserved.  
 * Redistribution is prohibited without prior written consent from the University 
 * of Virginia.
 *
 * Please contact the authors for restrictions applying to commercial use.
 *
 * THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
 * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 *
 * Author: University of Virginia
 * e-mail: jwd@virginia.com
 * URL   : http://www.cs.virginia.edu/
 *
 * Additional copyrights 2010, 2011 by Zephyr Software LLC
 * e-mail: {clc,jwd}@zephyr-software.com
 * URL   : http://www.zephyr-software.com/
 *
jdh8d's avatar
jdh8d committed
 */

clc5q's avatar
clc5q committed
//
// SMPDataFlowAnalysis.cpp
//
// This module contains common types an helper classes needed for the
clc5q's avatar
clc5q committed
//   SMP project (Software Memory Protection).
//

#include <list>
#include <set>
clc5q's avatar
clc5q committed
#include <vector>
#include <algorithm>
clc5q's avatar
clc5q committed

#include <cstring>
clc5q's avatar
clc5q committed

#include <pro.h>
clc5q's avatar
clc5q committed
#include <assert.h>
clc5q's avatar
clc5q committed
#include <ida.hpp>
#include <idp.hpp>
#include <allins.hpp>
#include <auto.hpp>
#include <bytes.hpp>
#include <funcs.hpp>
#include <intel.hpp>
#include <loader.hpp>
#include <lines.hpp>
#include <name.hpp>

#include "SMPDataFlowAnalysis.h"
#include "SMPStaticAnalyzer.h"
#include "SMPInstr.h"
#include "SMPBasicBlock.h"
#include "SMPFunction.h"
clc5q's avatar
clc5q committed

// Set these to 1 for debugging output
clc5q's avatar
clc5q committed
#define SMP_DEBUG_CONTROLFLOW 0  // tells what processing stage is entered
#define SMP_DEBUG_CHUNKS 1  // tracking down tail chunks for functions
#define SMP_DEBUG_FRAMEFIXUP 0  // Fixing up stack frame info the way we want the offsets
#define SMP_DEBUG_OPERAND_TYPES 1  // leave on; warnings that should never happen
clc5q's avatar
clc5q committed
#if IDA_SDK_VERSION > 560
#define MAX_IDA_REG R_mxcsr
#else
#define MAX_IDA_REG 80
#endif

const char *RegNames[MAX_IDA_REG + 1] =
	{ "EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI",
	  "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15",
	  "AL", "CL", "DL", "BL", "AH", "CH", "DH", "BH",
	  "SPL", "BPL", "SIL", "DIL", "EIP", "ES", "CS", "SS",
	  "DS", "FS", "GS", "CF", "ZF", "SF", "OF", "PF",
	  "AF", "TF", "IF", "DF", "EFLAGS", "FPU_ST0", "FPU_ST1", "FPU_ST2",
	  "FPU_ST3", "FPU_ST4", "FPU_ST5", "FPU_ST6", "FPU_ST7", "FPU_CTRL", "FPU_STAT", "FPU_TAGS",
      "MMX0", "MMX1", "MMX2", "MMX3", "MMX4", "MMX5", "MMX6", "MMX7",
      "XMM0", "XMM1", "XMM2", "XMM3", "XMM4", "XMM5", "XMM6", "XMM7",
      "XMM8", "XMM9", "XMM10", "XMM11", "XMM12", "XMM13", "XMM14", "XMM15",
      "MXCSR"
const unsigned char RegSizes[MAX_IDA_REG + 1] =
	{ 4, 4, 4, 4, 4, 4, 4, 4,
	  4, 4, 4, 4, 4, 4, 4, 4,
	  1, 1, 1, 1, 1, 1, 1, 1,
	  4, 4, 4, 4, 4, 4, 4, 4,
	  4, 4, 4, 4, 4, 4, 4, 4,
	  4, 4, 4, 4, 4, 8, 8, 8,
	  8, 8, 8, 8, 8, 4, 4, 4,
      16, 16, 16, 16, 16, 16, 16, 16,
      16, 16, 16, 16, 16, 16, 16, 16,
      16, 16, 16, 16, 16, 16, 16, 16,
      4
	};

const char *ErrorStrings[1] = { "ERROR_REG" };

const char *WordRegStrings[4] = { "AX", "CX", "DX", "BX" };

const char *SignednessStrings[4] = { "UNKNOWNSIGN", "SIGNED", "UNSIGNED", "UNKNOWNSIGN" };

const char *LeaSignednessStrings[4] = { "NOFLAGUNKNOWNSIGN", "NOFLAGSIGNED", "NOFLAGUNSIGNED", "NOFLAGUNKNOWNSIGN" };

// Distinguishes subword regs from their parent regs
const char *MDGetRegName(op_t RegOp) {
	if ((o_reg != RegOp.type) || (R_none == RegOp.reg) || (MAX_IDA_REG < RegOp.reg))
		return ErrorStrings[0];
	else if ((RegOp.dtyp == dt_word) && (RegOp.reg >= R_ax) && (RegOp.reg <= R_bx)) {
		// 16-bit registers
		return WordRegStrings[RegOp.reg];
	}
	else {
		return RegNames[RegOp.reg];
	}
}


clc5q's avatar
clc5q committed
// Define instruction categories for data flow analysis.
SMPitype DFACategory[NN_last+1];
// Define instruction categories for data type analysis.
int SMPTypeCategory[NN_last+1];
clc5q's avatar
clc5q committed

// Define which instructions define and use the CPU flags.
bool SMPDefsFlags[NN_last + 1];
bool SMPUsesFlags[NN_last + 1];

// Hash a global name and SSA number into an int, for use in SMPFunction.GlobalDefAddrBySSA map
int HashGlobalNameAndSSA(op_t DefOp, int SSANum) { 
	assert(o_reg == DefOp.type);
	return ((SSANum << 16) | (DefOp.reg));
}

// Get the size in bytes of the data type of an operand.
size_t GetOpDataSize(op_t DataOp) {
	size_t DataSize;
	if (o_reg == DataOp.type) {
		DataSize = RegSizes[DataOp.reg];
		if (DataOp.dtyp == dt_word) {
			DataSize = 2;
#if 0
			// msg("Found 16-bit register using dtyp field.\n");
#endif
		}
		return DataSize;
	}
	switch (DataOp.dtyp) {
		case dt_byte:
			DataSize = 1;
			break;
		case dt_word:
			DataSize = 2;
			break;
		case dt_dword:
		case dt_float:
		case dt_code:
		case dt_unicode:
		case dt_string:
			DataSize = 4;
			break;
		case dt_double:
		case dt_qword:
			DataSize = 8;
			break;
		case dt_packreal:
			DataSize = 12;
			break;
		case dt_byte16:
#if IDA_SDK_VERSION > 599
#endif
			DataSize = 16;
			break;
		case dt_fword:
			DataSize = 6;
			break;
		case dt_3byte:
			DataSize = 3;
			break;
		default:
			msg("ERROR: unexpected data type %d in GetOpDataSize() :", DataOp.dtyp);
			PrintOperand(DataOp);
			msg("\n");
			DataSize = 4;
			break;
	}
	return DataSize;
} // end of GetOpDataSize()

// Return one of the bit width masks for the current operand.
//  Pass in DataSize in bytes if known, else pass in DataSize = 0.
unsigned short ComputeOperandBitWidthMask(op_t CurrOp, size_t DataSize) {
	unsigned short BitWidthMask = 32;
	if (0 == DataSize)
		DataSize = GetOpDataSize(CurrOp);
	if (4 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_32;
	else if (8 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_64;
	else if (1 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_8;
	else if (2 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_16;
	else if (16 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_128;
	else if (3 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_24;
	else if (6 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_48;
	else if (12 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_96;
	else if (32 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_256;
	else {
		msg("ERROR: Unknown DataSize: %d bytes Operand: ", DataSize);
		PrintOperand(CurrOp);
		msg("\n");
	}
	return BitWidthMask;
} // end of ComputeOperandBitWidthMask()

// Compute largest bit width from a SignMiscInfo bit mask.
size_t LargestBitWidthFromMask(unsigned short WidthTypeInfo) {
	unsigned short BitWidthMask = WidthTypeInfo & FG_MASK_BITWIDTH_FIELDS;
	size_t LargestWidth = 0;

	// Go from highest bit width to lowest.
	if (BitWidthMask & FG_MASK_BITWIDTH_256)
		LargestWidth = 256;
	else if (BitWidthMask & FG_MASK_BITWIDTH_128)
		LargestWidth = 128;
	else if (BitWidthMask & FG_MASK_BITWIDTH_96)
		LargestWidth = 96;
	else if (BitWidthMask & FG_MASK_BITWIDTH_64)
		LargestWidth = 64;
	else if (BitWidthMask & FG_MASK_BITWIDTH_48)
		LargestWidth = 48;
	else if (BitWidthMask & FG_MASK_BITWIDTH_32)
		LargestWidth = 32;
	else if (BitWidthMask & FG_MASK_BITWIDTH_24)
		LargestWidth = 24;
	else if (BitWidthMask & FG_MASK_BITWIDTH_16)
		LargestWidth = 16;
	else if (BitWidthMask & FG_MASK_BITWIDTH_8)
		LargestWidth = 8;

	return LargestWidth;
} // end of LargestBitWidthFromMask()

// Is CurrOp a general purpose register? (not flags, instruction pointer, non-integer reg, etc.)
bool MDIsGeneralPurposeReg(op_t CurrOp) {
	// intel.hpp defines two ranges that are general purpose regs in enum RegNo.
	return ((o_reg == CurrOp.type) 
		&& (((CurrOp.reg >= R_ax) && (CurrOp.reg <= R_di))
			|| ((CurrOp.reg >= R_al) && (CurrOp.reg <= R_dil))));
}

clc5q's avatar
clc5q committed
// We need to make subword registers equal to their containing registers when we
//  do comparisons, so that we will realize that register EAX is killed by a prior DEF
//  of register AL, for example, and vice versa. To keep sets ordered strictly,
//  we also have to make AL and AH be equal to each other as well as equal to EAX.
clc5q's avatar
clc5q committed
#define FIRST_x86_SUBWORD_REG R_al
#define LAST_x86_SUBWORD_REG R_bh
bool MDLessReg(const ushort Reg1, const ushort Reg2) {
	ushort SReg1 = MDCanonicalizeSubReg(Reg1);
	ushort SReg2 = MDCanonicalizeSubReg(Reg2);
	return (SReg1 < SReg2);
} // end of MDLessReg()

ushort MDCanonicalizeSubReg(const ushort Reg1) {
	bool Subword = ((Reg1 >= FIRST_x86_SUBWORD_REG) && (Reg1 <= LAST_x86_SUBWORD_REG));
		// See enumeration RegNo in intel.hpp.
		if (SReg1 < 20)  // AL, CL, DL or BL
			SReg1 -= 16;
		else             // AH, CH, DH or BH
			SReg1 -= 20;
	}
	return SReg1;
} // end of MDCanonicalizeSubReg()
clc5q's avatar
clc5q committed

// In SSA computations, we are storing the GlobalNames index into the op_t fields
//  n, offb, and offo. This function extracts an unsigned int from these three 8-bit
//  fields.
unsigned int ExtractGlobalIndex(op_t GlobalOp) {
	unsigned int index = 0;
	index |= (((unsigned int) GlobalOp.offo) & 0x000000ff);
	index <<= 8;
	index |= (((unsigned int) GlobalOp.offb) & 0x000000ff);
	index <<= 8;
	index |= (((unsigned int) GlobalOp.n) & 0x000000ff);
clc5q's avatar
clc5q committed
	return index;
}

void SetGlobalIndex(op_t *TempOp, size_t index) {
	TempOp->n = (char) (index & 0x000000ff);
	TempOp->offb = (char) ((index & 0x0000ff00) >> 8);
	TempOp->offo = (char) ((index & 0x00ff0000) >> 16);
	return;
}

// Return true if CurrOp could be an indirect memory reference.
bool MDIsIndirectMemoryOpnd(op_t CurrOp, bool UseFP) {
	bool indirect = false;
	if ((CurrOp.type != o_mem) && (CurrOp.type != o_phrase) && (CurrOp.type != o_displ))
		return false;

	if (CurrOp.hasSIB) {
		int BaseReg = sib_base(CurrOp);
		short IndexReg = sib_index(CurrOp);
		if ((R_none != IndexReg) && (R_sp != IndexReg)) { 
			if ((R_bp == IndexReg) && UseFP)
				;
			else
				indirect = true;
		}
		if (0 != sib_scale(CurrOp))
			indirect = true;
		if (R_none != BaseReg) {
			if ((BaseReg == R_bp) && (CurrOp.type == o_mem)) {
				; // EBP ==> no base register for o_mem type
			}
			else if ((BaseReg == R_bp) && UseFP) 
				;  // EBP used as frame pointer for direct access
			else if (BaseReg == R_sp)
				;  // ESP used as stack pointer for direct access
			else
				indirect = true; // conservative; some register used for addressing
								// other than a stack or frame pointer
		}
	} // end if hasSIB
	else { // no SIB; can have base register only
		ushort BaseReg = CurrOp.reg;
		if (CurrOp.type == o_mem) { // no base register for o_mem
			if (!((0 == BaseReg) || (R_bp == BaseReg))) {
				msg("base reg %d ignored \n", BaseReg);
			}
		}
		else if ((BaseReg == R_bp) && UseFP) 
			;  // EBP used as frame pointer for direct access
		else if (BaseReg == R_sp)
			;  // ESP used as stack pointer for direct access
		else {
			indirect = true;
		}
	}

	return indirect;
} // end MDIsIndirectMemoryOpnd()

// Extract the base and index registers and scale factor and displacement from the
//  memory operand.
void MDExtractAddressFields(op_t MemOp, int &BaseReg, int &IndexReg, ushort &Scale, ea_t &Offset) {
	assert((MemOp.type == o_phrase) || (MemOp.type == o_displ) || (MemOp.type == o_mem));

	Scale = 0;
	BaseReg = R_none;
	IndexReg = R_none;
	Offset = MemOp.addr;

	if (MemOp.hasSIB) {
		BaseReg = sib_base(MemOp);
		IndexReg = (int) sib_index(MemOp);
		if (R_sp == IndexReg) // signifies no index register
			IndexReg = R_none;
		if (R_none != IndexReg) {
clc5q's avatar
clc5q committed
			Scale = (ushort) sib_scale(MemOp);
		}
		if (R_none != BaseReg) {
			if ((BaseReg == R_bp) && (MemOp.type == o_mem)) {
				BaseReg = R_none;
				// **!!** BaseReg allowed for o_mem with SIB byte???
			}
		}
	}
	else { // no SIB byte; can have base reg but no index reg or scale factor
		BaseReg = (int) MemOp.reg;  // cannot be R_none for no SIB case
		if (MemOp.type == o_mem) {
			BaseReg = R_none; // no Base register for o_mem operands
		}
	}

	return;
} // end of MDExtractAddressFields()

// Is CurrOp a memory operand?
bool IsMemOperand(op_t CurrOp) {
	return ((o_mem == CurrOp.type) || (o_displ == CurrOp.type) || (o_phrase == CurrOp.type));
}

// MACHINE DEPENDENT: Is CurrOp the flags register?
bool MDIsFlagsReg(op_t CurrOp) {
	return ((o_reg == CurrOp.type) && CurrOp.is_reg(X86_FLAGS_REG));
}

// MACHINE DEPENDENT: Is operand a stack memory access?
bool MDIsStackAccessOpnd(op_t CurrOp, bool UseFP) {
	int BaseReg;
	int IndexReg;
	ushort ScaleFactor;
	ea_t offset;

	if ((o_displ != CurrOp.type) && (o_phrase != CurrOp.type)) {
		return false;
	}

	MDExtractAddressFields(CurrOp, BaseReg, IndexReg, ScaleFactor, offset);
	return ((BaseReg == R_sp) || (UseFP && (BaseReg == R_bp)));
} // end of MDIsStackAccessOpnd()

// MACHINE DEPENDENT: Is operand a caller-saved register?
bool MDIsCallerSavedReg(op_t CurrOp) {
	if (o_reg != CurrOp.type)
		return false;
	ushort CurrReg = MDCanonicalizeSubReg(CurrOp.reg);
	return ((R_ax == CurrReg) || (R_cx == CurrReg) || (R_dx == CurrReg));
} // end of MDIsCallerSavedReg()
// DEBUG Print DEF and/or USE for an operand.
void PrintDefUse(ulong feature, int OpNum) {
	// CF_ macros number the operands from 1 to 6, while OpNum
	//  is a 0 to 5 index into the insn_t.Operands[] array.
	// OpNum == -1 is a signal that this is a DEF or USE or VarKillSet etc.
	//  operand and not an instruction operand.
	if (-1 == OpNum)
		return;
	switch (OpNum) {
		case 0:
			if (feature & CF_CHG1)
				msg(" DEF");
			if (feature & CF_USE1)
				msg(" USE");
			break;
		case 1:
			if (feature & CF_CHG2)
				msg(" DEF");
			if (feature & CF_USE2)
				msg(" USE");
			break;
		case 2:
			if (feature & CF_CHG3)
				msg(" DEF");
			if (feature & CF_USE3)
				msg(" USE");
			break;
		case 3:
			if (feature & CF_CHG4)
				msg(" DEF");
			if (feature & CF_USE4)
				msg(" USE");
			break;
		case 4:
			if (feature & CF_CHG5)
				msg(" DEF");
			if (feature & CF_USE5)
				msg(" USE");
			break;
		case 5:
			if (feature & CF_CHG6)
				msg(" DEF");
			if (feature & CF_USE6)
				msg(" USE");
			break;
	}
	return;
} // end PrintDefUse()

// DEBUG print SIB info for an operand.
void PrintSIB(op_t Opnd) {
	int BaseReg;
	int IndexReg;
	ushort ScaleFactor;
	ea_t offset;
#define NAME_LEN 5
	char BaseName[NAME_LEN] = {'N', 'o', 'n', 'e', '\0'};
	char IndexName[NAME_LEN] = {'N', 'o', 'n', 'e', '\0'};

	MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, offset);

	if (BaseReg != R_none)
		qstrncpy(BaseName, RegNames[BaseReg], NAME_LEN - 1);

	if (IndexReg != R_none) {
		qstrncpy(IndexName, RegNames[IndexReg], NAME_LEN -1);
	}
	msg(" Base %s Index %s Scale %d", BaseName, IndexName, ScaleFactor);
} // end PrintSIB()

// Annotations: concisely print SIB info for an operand.
void AnnotPrintSIB(op_t Opnd, bool HasOffset, FILE *OutFile) {
	int BaseReg;
	int IndexReg;
	ushort ScaleFactor;
	ea_t offset;
	char OutString[MAXSTR] = {'[', '\0'};
	char ScaleString[4];
	int count;
	op_t BaseOp =  InitOp, IndexOp = InitOp;
	BaseOp.type = o_reg;
	IndexOp.type = o_reg;
	BaseOp.dtyp = dt_dword;
	IndexOp.dtyp = dt_dword;

	MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, offset);

	if (ScaleFactor > 0) {
		ScaleFactor = 1 << (ScaleFactor - 1);
		count = qsnprintf(ScaleString, 4, "%d", ScaleFactor);
	}

	if (BaseReg != R_none) {
		BaseOp.reg = BaseReg;
		if (RegSizes[BaseReg] == 1)
			BaseOp.dtyp = dt_byte;
		(void) qstrncat(OutString, MDGetRegName(BaseOp), MAXSTR-1);
		if (IndexReg != R_none) {
			IndexOp.reg = IndexReg;
			if (RegSizes[IndexReg] == 1)
				IndexOp.dtyp = dt_byte;
			(void) qstrncat(OutString, "+", MAXSTR-1);
			(void) qstrncat(OutString, MDGetRegName(IndexOp), MAXSTR-1);
			if (ScaleFactor > 0) {
				(void) qstrncat(OutString, "*", MAXSTR-1);
				(void) qstrncat(OutString, ScaleString, MAXSTR-1);
			}
		}
	}
	else if (IndexReg != R_none) {
		IndexOp.reg = IndexReg;
		if (RegSizes[IndexReg] == 1)
			IndexOp.dtyp = dt_byte;
		(void) qstrncat(OutString, MDGetRegName(IndexOp), MAXSTR-1);
		if (ScaleFactor > 0) {
			(void) qstrncat(OutString, "*", MAXSTR-1);
			(void) qstrncat(OutString, ScaleString, MAXSTR-1);
		}
	}
	else {
		msg("ERROR: No BaseReg, no IndexReg in SIB\n");
	}
	if (!HasOffset) // can close the brackets around regs
		(void) qstrncat(OutString, "]", MAXSTR-1);
	qfprintf(OutFile, " %s", OutString);
} // end AnnotPrintSIB()

// Debug: print one operand from an instruction or DEF or USE list.
void PrintOneOperand(op_t Opnd, ulong features, int OpNum) { 
	if (Opnd.type != o_void) {
		PrintOperand(Opnd);
		PrintDefUse(features, OpNum);
	}
	return;
} // end of PrintOneOperand()

// Debug: print one operand.
void PrintOperand(op_t Opnd) { 
	if (Opnd.type == o_void)
		return;
	else if (Opnd.type == o_mem) {
		msg(" Operand: memory : addr: %x", Opnd.addr);
		if (Opnd.hasSIB) {
			PrintSIB(Opnd);
		}
	}
	else if (Opnd.type == o_phrase) {
		msg(" Operand: memory phrase :");
		if (Opnd.hasSIB) { // has SIB info
			PrintSIB(Opnd);
		}
		else { // no SIB info
			ushort BaseReg = Opnd.phrase;
			msg(" reg %s", RegNames[BaseReg]);
		}
		if (Opnd.addr != 0) {
clc5q's avatar
clc5q committed
			msg(" \n WARNING: addr for o_phrase type: %x\n", Opnd.addr);
		}
	}
	else if (Opnd.type == o_displ) {
		msg(" Operand: memory displ :");
		ea_t offset = Opnd.addr;
		if (Opnd.hasSIB) {
			PrintSIB(Opnd);
			msg(" displ %d", offset);
		}
		else {
			ushort BaseReg = Opnd.reg;
			msg(" reg %s displ %d", RegNames[BaseReg], offset);
		}
	}
	else if (Opnd.type == o_reg) {
		msg(" Operand: register %s", RegNames[Opnd.reg]);
	}
	else if (Opnd.type == o_imm) {
		msg(" Operand: immed %d", Opnd.value);
	}
	else if (Opnd.type == o_far) {
		msg(" Operand: FarPtrImmed addr: %x", Opnd.addr);
	}
	else if (Opnd.type == o_near) {
		msg(" Operand: NearPtrImmed addr: %x", Opnd.addr);
	else if (Opnd.type == o_trreg) {
		msg(" Operand: TaskReg reg: %d", Opnd.reg);
	else if (Opnd.type == o_dbreg) {
		msg(" Operand: DebugReg reg: %d", Opnd.reg);
	else if (Opnd.type == o_crreg) {
		msg(" Operand: ControlReg reg: %d", Opnd.reg);
	else if (Opnd.type == o_fpreg) {
		msg(" Operand: FloatReg reg: %d", Opnd.reg);
	else if (Opnd.type == o_mmxreg) {
		msg(" Operand: MMXReg reg: %d", Opnd.reg);
	else if (Opnd.type == o_xmmreg) {
		msg(" Operand: XMMReg reg: %d", Opnd.reg);
		msg(" Operand: unknown");
	}
	if (!(Opnd.showed()))
		msg(" HIDDEN ");
	return;
} // end of PrintOperand()

// Print an operand that has no features flags or operand position number, such
//  as the op_t types found in lists and sets throughout the blocks, phi functions, etc.
void PrintListOperand(op_t Opnd, int SSANum) {
	if (Opnd.type != o_void) {
		PrintOperand(Opnd);
		msg(" SSANum: %d ", SSANum);
	}
	return;
} // end of PrintListOperand()

// Annotations: concisely print one operand.
void AnnotPrintOperand(op_t Opnd, FILE *OutFile) { 
	op_t BaseOp = InitOp;
	op_t IndexOp = InitOp;
	BaseOp.type = o_reg;
	IndexOp.type = o_reg;
	BaseOp.dtyp = dt_dword;
	IndexOp.dtyp = dt_dword;

	if (Opnd.type == o_mem) {
		qfprintf(OutFile, " %x", Opnd.addr);
		if (Opnd.hasSIB) {
			AnnotPrintSIB(Opnd, false, OutFile);
		}
	}
	else if (Opnd.type == o_phrase) {
		if (Opnd.hasSIB) { // has SIB info
			AnnotPrintSIB(Opnd, false, OutFile);
		}
		else { // no SIB info
			ushort BaseReg = Opnd.phrase;
			BaseOp.reg = BaseReg;
			if (RegSizes[BaseReg] == 1)
				BaseOp.dtyp = dt_byte;
			qfprintf(OutFile, " [%s]", MDGetRegName(BaseOp));
		}
		if (Opnd.addr != 0) {
			msg(" \n WARNING: addr for o_phrase type: %x\n", Opnd.addr);
		}
	}
	else if (Opnd.type == o_displ) {
		ea_t offset = Opnd.addr;
		int SignedOffset = (int) offset;
		if (Opnd.hasSIB) {
			AnnotPrintSIB(Opnd, (SignedOffset != 0), OutFile);
			if (SignedOffset > 0) // print plus sign
				qfprintf(OutFile, "+%d]", SignedOffset);
			else if (SignedOffset < 0) // minus sign will print automatically
				qfprintf(OutFile, "%d]", SignedOffset);

		}
		else {
			ushort BaseReg = Opnd.reg;
			BaseOp.reg = BaseReg;
			if (RegSizes[BaseReg] == 1)
				BaseOp.dtyp = dt_byte;
			if (SignedOffset >= 0) // print plus sign
				qfprintf(OutFile, " [%s+%d]", MDGetRegName(BaseOp), SignedOffset);
			else // minus sign will print automatically
				qfprintf(OutFile, " [%s%d]", MDGetRegName(BaseOp), SignedOffset);
		}
	}
	else if (Opnd.type == o_reg) {
		qfprintf(OutFile, " %s", MDGetRegName(Opnd));
	}
	else if (Opnd.type == o_imm) {
		qfprintf(OutFile, " %d", Opnd.value);
	}
	else if ((Opnd.type == o_far) || (Opnd.type == o_near)) {
		qfprintf(OutFile, " %x", Opnd.addr);
	}
	else {
		qfprintf(OutFile, " ERROROP");
	}
	return;
} // end of AnnotPrintOperand()

// MACHINE DEPENDENT: Is operand type a known type that we want to analyze?
bool MDKnownOperandType(op_t TempOp) {
	bool GoodOpType = ((TempOp.type >= o_reg) && (TempOp.type <= o_xmmreg));
#if SMP_DEBUG_OPERAND_TYPES
	if (!GoodOpType && (o_void != TempOp.type)) {
		msg("WARNING: Operand type %d \n", TempOp.type);
	}
#endif 
	return GoodOpType;
clc5q's avatar
clc5q committed
// Meet function over any two types in the type lattice.
SMPOperandType SMPTypeMeet(SMPOperandType Type1, SMPOperandType Type2) {
	SMPOperandType MeetType = UNKNOWN;
	bool ProfDerived = IsProfDerived(Type1) || IsProfDerived(Type2);
	if (IsEqType(UNINIT, Type1))
		MeetType = Type2;
	else if (IsEqType(UNINIT, Type2) || IsEqType(Type1, Type2)
		|| IsUnknown(Type1))
		MeetType = Type1;
	else if (IsNumeric(Type1)) {
		if (IsNumeric(Type2))  // one is NUMERIC, one is CODEPTR
			MeetType = NUMERIC;
		else if (IsDataPtr(Type2) || IsUnknown(Type2))
			MeetType = UNKNOWN;
		else
			msg("ERROR #1 in SMPTypeMeet.\n");
clc5q's avatar
clc5q committed
	}
	else if (IsDataPtr(Type1)) {
		if (IsDataPtr(Type2))  // two different POINTER subtypes
			MeetType = POINTER;
		else if (IsNumeric(Type2) || IsUnknown(Type2))
			MeetType = UNKNOWN;
		else
			msg("ERROR #2 in SMPTypeMeet.\n");
clc5q's avatar
clc5q committed
	}
	if (ProfDerived && IsNotEqType(UNINIT, MeetType))
		MeetType = MakeProfDerived(MeetType);
clc5q's avatar
clc5q committed
	return MeetType;
} // end of SMPTypeMeet()

clc5q's avatar
clc5q committed
// *****************************************************************
// Class DefOrUse
// *****************************************************************

// Default constructor to make the compilers happy.
DefOrUse::DefOrUse(void) {
	this->Operand.type = o_void;
	this->NonSpeculativeOpType = UNINIT;
	this->MetadataStatus = DEF_METADATA_UNANALYZED;
clc5q's avatar
clc5q committed
// Constructor.
DefOrUse::DefOrUse(op_t Ref, SMPOperandType Type, int SSASub) {
	if (o_reg == Ref.type) {
		// We want to map AH, AL, and AX to EAX, etc. throughout our data flow analysis
		//  and type inference systems.
		Ref.reg = MDCanonicalizeSubReg(Ref.reg);
	}
clc5q's avatar
clc5q committed
	this->Operand = Ref;
	this->OpType = Type;

	assert(!IsProfDerived(Type));
	this->NonSpeculativeOpType = Type;
clc5q's avatar
clc5q committed
	this->SSANumber = SSASub;
	this->MetadataStatus = DEF_METADATA_UNANALYZED;
// Copy constructor.
DefOrUse::DefOrUse(const DefOrUse &CopyIn) {
	*this = CopyIn;
	return;
}

// Assignment operator for copy constructor use.
DefOrUse &DefOrUse::operator=(const DefOrUse &rhs) {
	this->Operand = rhs.Operand;
	this->OpType = rhs.OpType;
	this->NonSpeculativeOpType = rhs.NonSpeculativeOpType;
	this->SSANumber = rhs.SSANumber;
	this->MetadataStatus = rhs.MetadataStatus;
// Set the operand type for this DEF or USE - don't forget to take
//  into account the speculative (profiler) status.
void DefOrUse::SetType(SMPOperandType Type, const SMPInstr *Instr) 
{
	SMPOperandType OldType = this->OpType;
	SMPOperandType NewType = Type;
	if (Instr->GetBlock()->GetFunc()->GetIsSpeculative()) {
		NewType = (SMPOperandType)(((int)NewType) | PROF_BASE);
		if (!IsProfDerived(OldType))
			this->NonSpeculativeOpType = OldType;
	this->OpType = NewType;
// Set the indirect memory write flag.
void DefOrUse::SetIndWrite(bool IndMemWrite) {
	this->IndWrite = IndMemWrite;
	return;
}
// Debug printing.
void DefOrUse::Dump(void) const {
	PrintListOperand(this->Operand, this->SSANumber);
	if (IsEqType(this->OpType , NUMERIC))
	else if (IsEqType(this->OpType , CODEPTR))
		msg("C ");
	else if (IsEqType(this->OpType , POINTER))
	else if (IsEqType(this->OpType , STACKPTR))
		msg("S ");
	else if (IsEqType(this->OpType , GLOBALPTR))
		msg("G ");
	else if (IsEqType(this->OpType , HEAPPTR))
		msg("H ");
	else if (IsEqType(this->OpType , PTROFFSET))
		msg("O ");
	else if (IsEqType(this->OpType , UNKNOWN))
	if (IsProfDerived(this->OpType))
	// Don't write anything for UNINIT OpType

	// Emit the metadata status.
	if (DEF_METADATA_UNUSED == this->MetadataStatus)
		msg("Mn ");
	else if (DEF_METADATA_USED == this->MetadataStatus)
		msg("Mu ");
	else if (DEF_METADATA_REDUNDANT == this->MetadataStatus)
		msg("Mr ");
	// Is the DEF possibly aliased because of an indirect write in
	//  the DEF-USE chain?
	if (this->IndWrite)
		msg("Al* ");
	return;
} // end of DefOrUse::Dump()

// *****************************************************************
// Class DefOrUseSet
// *****************************************************************

// Default constructor.
DefOrUseSet::DefOrUseSet(void) {
// Destructor.
DefOrUseSet::~DefOrUseSet() {
	this->Refs.clear();
	return;
// Find the reference for a given operand type.
set<DefOrUse, LessDefUse>::iterator DefOrUseSet::FindRef(op_t SearchOp) {
	set<DefOrUse, LessDefUse>::iterator CurrRef;
	DefOrUse DummyRef(SearchOp);
	CurrRef = this->Refs.find(DummyRef);
	return CurrRef;
}

// Insert a new DEF or USE; must be new, insert must succeed else we assert.
set<DefOrUse, LessDefUse>::iterator DefOrUseSet::InsertRef(DefOrUse Ref) {
	pair<set<DefOrUse, LessDefUse>::iterator, bool> InsertResult;
	InsertResult = this->Refs.insert(Ref);
	assert(InsertResult.second);
	return InsertResult.first;
}

// Set a Def or Use into the list, along with its type.
void DefOrUseSet::SetRef(op_t Ref, SMPOperandType Type, int SSASub) {
	DefOrUse CurrRef(Ref, Type, SSASub);
	this->Refs.insert(CurrRef);
	return;
}

// Change the SSA subscript for a reference.
set<DefOrUse, LessDefUse>::iterator DefOrUseSet::SetSSANum(op_t CurrOp, int NewSSASub) {
	// To change a field within a set, we must grab a copy, change the copy,
	//  delete the old set member, and insert the updated copy as a new member.
	set<DefOrUse, LessDefUse>::iterator CurrRef = this->FindRef(CurrOp);
	assert(CurrRef != this->Refs.end());
	set<DefOrUse, LessDefUse>::iterator NextRef = CurrRef;
	++NextRef;
	DefOrUse NewCopy = (*CurrRef);
	NewCopy.SetSSANum(NewSSASub);
	this->Refs.erase(CurrRef);
	CurrRef = this->Refs.insert(NextRef, NewCopy);
	return CurrRef;
} // end of DefOrUseSet::SetSSANum()

// Change the operand type for a reference.
set<DefOrUse, LessDefUse>::iterator DefOrUseSet::SetType(op_t CurrOp, SMPOperandType Type, const SMPInstr* Instr) {
	// To change a field within a set, we must grab a copy, change the copy,
	//  delete the old set member, and insert the updated copy as a new member.
	set<DefOrUse, LessDefUse>::iterator CurrRef = this->FindRef(CurrOp);
	assert(CurrRef != this->Refs.end());
#if 1
	if (o_imm == CurrOp.type) {
		if (UNINIT != CurrRef->GetType() && Type!=CurrRef->GetType() ) {
			msg("ERROR: Changing type of immediate from %d to %d : ", CurrRef->GetType(), Type);
			CurrRef->Dump();
			msg("\n");
		}
	}
#endif
	DefOrUse NewCopy = (*CurrRef);
	this->Refs.erase(CurrRef);
	pair<set<DefOrUse, LessDefUse>::iterator, bool> InsertResult;
	InsertResult = this->Refs.insert(NewCopy);
	assert(InsertResult.second);
	CurrRef = InsertResult.first;
} // end of DefOrUseSet::SetType()

// Change the Metadata type for a reference.
set<DefOrUse, LessDefUse>::iterator DefOrUseSet::SetMetadata(op_t CurrOp, SMPMetadataType Status) {
	// To change a field within a set, we must grab a copy, change the copy,
	//  delete the old set member, and insert the updated copy as a new member.
	set<DefOrUse, LessDefUse>::iterator CurrRef = this->FindRef(CurrOp);
	assert(CurrRef != this->Refs.end());
	DefOrUse NewCopy = (*CurrRef);
	NewCopy.SetMetadataStatus(Status);
	this->Refs.erase(CurrRef);
	pair<set<DefOrUse, LessDefUse>::iterator, bool> InsertResult;
	InsertResult = this->Refs.insert(NewCopy);
	assert(InsertResult.second);
	CurrRef = InsertResult.first;
	return CurrRef;
} // end of DefOrUseSet::SetMetadata()
// Change the indirect write status for a reference.
set<DefOrUse, LessDefUse>::iterator DefOrUseSet::SetIndWrite(op_t CurrOp, bool IndWriteFlag) {
	// To change a field within a set, we must grab a copy, change the copy,
	//  delete the old set member, and insert the updated copy as a new member.
	set<DefOrUse, LessDefUse>::iterator CurrRef = this->FindRef(CurrOp);
	assert(CurrRef != this->Refs.end());
	DefOrUse NewCopy = (*CurrRef);
	NewCopy.SetIndWrite(IndWriteFlag);
	this->Refs.erase(CurrRef);
	pair<set<DefOrUse, LessDefUse>::iterator, bool> InsertResult;
	InsertResult = this->Refs.insert(NewCopy);
	assert(InsertResult.second);
	CurrRef = InsertResult.first;
	return CurrRef;
} // end of DefOrUseSet::SetIndWrite()

// Debug printing.
void DefOrUseSet::Dump(void) {
	set<DefOrUse, LessDefUse>::iterator CurrRef;
	for (CurrRef = this->Refs.begin(); CurrRef != this->Refs.end(); ++CurrRef) {
		CurrRef->Dump();
	}
	msg("\n");
	return;
}

clc5q's avatar
clc5q committed
// Do all types agree, ignoring any flags registers in the set? This is used
//  for conditional move instructions; if all types agree, it does not matter
//  whether the move happens or not.
bool DefOrUseSet::TypesAgreeNoFlags(void) {
	bool FoundFirstUse = false;
	set<DefOrUse, LessDefUse>::iterator CurrUse;
	SMPOperandType UseType = UNINIT;
	for (CurrUse = this->Refs.begin(); CurrUse != this->Refs.end(); ++CurrUse) {
		if (!(CurrUse->GetOp().is_reg(X86_FLAGS_REG))) { // ignore flags
			if (!FoundFirstUse) {
				FoundFirstUse = true;
				UseType = CurrUse->GetType();
			}
			else {
				if (IsNotEqType(CurrUse->GetType(), UseType)) {