Skip to content
Snippets Groups Projects
SMPDataFlowAnalysis.cpp 197 KiB
Newer Older
jdh8d's avatar
jdh8d committed
/*
 * SMPDataFlowAnalysis.cpp - <see below>.
 *
 * Copyright (c) 2000, 2001, 2010 - University of Virginia 
 *
 * This file is part of the Memory Error Detection System (MEDS) infrastructure.
 * This file may be used and modified for non-commercial purposes as long as 
 * all copyright, permission, and nonwarranty notices are preserved.  
 * Redistribution is prohibited without prior written consent from the University 
 * of Virginia.
 *
 * Please contact the authors for restrictions applying to commercial use.
 *
 * THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
 * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 *
 * Author: University of Virginia
 * e-mail: jwd@virginia.com
 * URL   : http://www.cs.virginia.edu/
 *
 * Additional copyrights 2010, 2011 by Zephyr Software LLC
 * e-mail: {clc,jwd}@zephyr-software.com
 * URL   : http://www.zephyr-software.com/
 *
jdh8d's avatar
jdh8d committed
 */

clc5q's avatar
clc5q committed
//
// SMPDataFlowAnalysis.cpp
//
// This module contains common types an helper classes needed for the
clc5q's avatar
clc5q committed
//   SMP project (Software Memory Protection).
//

#include <list>
#include <set>
clc5q's avatar
clc5q committed
#include <vector>
#include <algorithm>
clc5q's avatar
clc5q committed

#include <cstring>
clc5q's avatar
clc5q committed

#include <pro.h>
clc5q's avatar
clc5q committed
#include <assert.h>
clc5q's avatar
clc5q committed
#include <ida.hpp>
#include <idp.hpp>
#include <auto.hpp>
#include <bytes.hpp>
#include <funcs.hpp>
#include <intel.hpp>
#include <loader.hpp>
#include <lines.hpp>
#include <name.hpp>

#include "SMPDataFlowAnalysis.h"
#include "SMPStaticAnalyzer.h"
#include "SMPInstr.h"
#include "SMPBasicBlock.h"
#include "SMPFunction.h"
clc5q's avatar
clc5q committed

// Set these to 1 for debugging output
clc5q's avatar
clc5q committed
#define SMP_DEBUG_CONTROLFLOW 0  // tells what processing stage is entered
#define SMP_DEBUG_CHUNKS 1  // tracking down tail chunks for functions
#define SMP_DEBUG_FRAMEFIXUP 0  // Fixing up stack frame info the way we want the offsets
#define SMP_DEBUG_OPERAND_TYPES 1  // leave on; warnings that should never happen
clc5q's avatar
clc5q committed
#define STARS_DEBUG_DUMP_IDENTIFY_HIDDEN_OPERANDS 0 // print HIDDEN if operand.showed() is false
clc5q's avatar
clc5q committed
#if IDA_SDK_VERSION > 560
#define MAX_IDA_REG R_mxcsr
#else
#define MAX_IDA_REG 80
#endif

const char *RegNames[MAX_IDA_REG + 1] =
	{ "EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI",
	  "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15",
	  "AL", "CL", "DL", "BL", "AH", "CH", "DH", "BH",
	  "SPL", "BPL", "SIL", "DIL", "EIP", "ES", "CS", "SS",
	  "DS", "FS", "GS", "CF", "ZF", "SF", "OF", "PF",
	  "AF", "TF", "IF", "DF", "EFLAGS", "FPU_ST0", "FPU_ST1", "FPU_ST2",
	  "FPU_ST3", "FPU_ST4", "FPU_ST5", "FPU_ST6", "FPU_ST7", "FPU_CTRL", "FPU_STAT", "FPU_TAGS",
      "MMX0", "MMX1", "MMX2", "MMX3", "MMX4", "MMX5", "MMX6", "MMX7",
      "XMM0", "XMM1", "XMM2", "XMM3", "XMM4", "XMM5", "XMM6", "XMM7",
      "XMM8", "XMM9", "XMM10", "XMM11", "XMM12", "XMM13", "XMM14", "XMM15",
      "MXCSR"
const unsigned char RegSizes[MAX_IDA_REG + 1] =
	{ 4, 4, 4, 4, 4, 4, 4, 4,
	  4, 4, 4, 4, 4, 4, 4, 4,
	  1, 1, 1, 1, 1, 1, 1, 1,
	  4, 4, 4, 4, 4, 4, 4, 4,
	  4, 4, 4, 4, 4, 4, 4, 4,
	  4, 4, 4, 4, 4, 8, 8, 8,
	  8, 8, 8, 8, 8, 4, 4, 4,
      16, 16, 16, 16, 16, 16, 16, 16,
      16, 16, 16, 16, 16, 16, 16, 16,
      16, 16, 16, 16, 16, 16, 16, 16,
      4
	};

const char *ErrorStrings[1] = { "ERROR_REG" };

const char *WordRegStrings[4] = { "AX", "CX", "DX", "BX" };

const char *SignednessStrings[4] = { "UNKNOWNSIGN", "SIGNED", "UNSIGNED", "UNKNOWNSIGN" };

const char *LeaSignednessStrings[4] = { "NOFLAGUNKNOWNSIGN", "NOFLAGSIGNED", "NOFLAGUNSIGNED", "NOFLAGUNKNOWNSIGN" };

// Distinguishes subword regs from their parent regs
const char *MDGetRegName(op_t RegOp) {
	if ((o_reg != RegOp.type) || (R_none == RegOp.reg) || (MAX_IDA_REG < RegOp.reg))
		return ErrorStrings[0];
	else if ((RegOp.dtyp == dt_word) && (RegOp.reg >= R_ax) && (RegOp.reg <= R_bx)) {
		// 16-bit registers
		return WordRegStrings[RegOp.reg];
	}
	else {
		return RegNames[RegOp.reg];
	}
}


clc5q's avatar
clc5q committed
// Define instruction categories for data flow analysis.
SMPitype DFACategory[NN_last+1];
// Define instruction categories for data type analysis.
int SMPTypeCategory[NN_last+1];
clc5q's avatar
clc5q committed

// Define which instructions define and use the CPU flags.
bool SMPDefsFlags[NN_last + 1];
bool SMPUsesFlags[NN_last + 1];

// Hash a global name and SSA number into an int, for use in SMPFunction.GlobalDefAddrBySSA map
int HashGlobalNameAndSSA(op_t DefOp, int SSANum) { 
	assert(o_reg == DefOp.type);
	return ((SSANum << 16) | (DefOp.reg));
}

// Get the size in bytes of the data type of an operand.
size_t GetOpDataSize(op_t DataOp) {
	size_t DataSize;
	if (o_reg == DataOp.type) {
		DataSize = RegSizes[DataOp.reg];
		if (DataOp.dtyp == dt_word) {
			DataSize = 2;
#if 0
			// SMP_msg("Found 16-bit register using dtyp field.\n");
	switch (DataOp.dtyp) {
		case dt_byte:
			DataSize = 1;
			break;
		case dt_word:
			DataSize = 2;
			break;
		case dt_dword:
		case dt_float:
		case dt_code:
		case dt_unicode:
		case dt_string:
			DataSize = 4;
			break;
		case dt_double:
		case dt_qword:
			DataSize = 8;
			break;
		case dt_packreal:
			DataSize = 12;
			break;
		case dt_byte16:
#if IDA_SDK_VERSION > 599
#endif
			DataSize = 16;
			break;
		case dt_fword:
			DataSize = 6;
			break;
		case dt_3byte:
			DataSize = 3;
			break;
		default:
			SMP_msg("ERROR: unexpected data type %d in GetOpDataSize() :", DataOp.dtyp);
			PrintOperand(DataOp);
			DataSize = 4;
			break;
	}
	return DataSize;
} // end of GetOpDataSize()

// Return one of the bit width masks for the current operand.
//  Pass in DataSize in bytes if known, else pass in DataSize = 0.
unsigned short ComputeOperandBitWidthMask(op_t CurrOp, size_t DataSize) {
	unsigned short BitWidthMask = 32;
	if (0 == DataSize)
		DataSize = GetOpDataSize(CurrOp);
	if (4 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_32;
	else if (8 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_64;
	else if (1 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_8;
	else if (2 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_16;
	else if (16 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_128;
	else if (3 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_24;
	else if (6 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_48;
	else if (10 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_80;
	else if (12 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_96;
	else if (32 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_256;
	else {
		SMP_msg("ERROR: Unknown DataSize: %zu bytes ", DataSize);
	}
	return BitWidthMask;
} // end of ComputeOperandBitWidthMask()

// Compute largest bit width from a SignMiscInfo bit mask.
size_t LargestBitWidthFromMask(unsigned short WidthTypeInfo) {
	unsigned short BitWidthMask = WidthTypeInfo & FG_MASK_BITWIDTH_FIELDS;
	size_t LargestWidth = 0;

	// Go from highest bit width to lowest.
	if (BitWidthMask & FG_MASK_BITWIDTH_256)
		LargestWidth = 256;
	else if (BitWidthMask & FG_MASK_BITWIDTH_128)
		LargestWidth = 128;
	else if (BitWidthMask & FG_MASK_BITWIDTH_96)
		LargestWidth = 96;
	else if (BitWidthMask & FG_MASK_BITWIDTH_64)
		LargestWidth = 64;
	else if (BitWidthMask & FG_MASK_BITWIDTH_48)
		LargestWidth = 48;
	else if (BitWidthMask & FG_MASK_BITWIDTH_32)
		LargestWidth = 32;
	else if (BitWidthMask & FG_MASK_BITWIDTH_24)
		LargestWidth = 24;
	else if (BitWidthMask & FG_MASK_BITWIDTH_16)
		LargestWidth = 16;
	else if (BitWidthMask & FG_MASK_BITWIDTH_8)
		LargestWidth = 8;

	return LargestWidth;
} // end of LargestBitWidthFromMask()

// Is CurrOp a general purpose register? (not flags, instruction pointer, non-integer reg, etc.)
bool MDIsGeneralPurposeReg(op_t CurrOp) {
	// intel.hpp defines two ranges that are general purpose regs in enum RegNo.
	return ((o_reg == CurrOp.type) 
		&& (((CurrOp.reg >= R_ax) && (CurrOp.reg <= R_di))
			|| ((CurrOp.reg >= R_al) && (CurrOp.reg <= R_dil))));
}

// Are operands equal?
bool IsEqOp(op_t Opnd1, op_t Opnd2) {
		if (Opnd1.type != Opnd2.type)
			return false;
		switch (Opnd1.type) {
			case o_void: return true;
			case o_reg: return ((Opnd1.reg == Opnd2.reg) && (Opnd1.dtyp == Opnd2.dtyp));
			case o_mem: return (Opnd1.addr == Opnd2.addr);
			case o_phrase: if (Opnd1.hasSIB && Opnd2.hasSIB) return (Opnd1.sib == Opnd2.sib);
							else return false; // no SIB != has SIB
			case o_displ: if (Opnd1.hasSIB && Opnd2.hasSIB)
							  return ((Opnd1.sib == Opnd2.sib) && (Opnd1.addr == Opnd2.addr));
						  else if ((!Opnd1.hasSIB) && (!Opnd2.hasSIB))
							  return ((Opnd1.addr == Opnd2.addr) && (Opnd1.reg == Opnd2.reg));
							else return false; // no SIB != has SIB
			case o_imm: return (Opnd1.value == Opnd2.value);
			case o_far:  // fall through to o_near case
			case o_near: return (Opnd1.addr == Opnd2.addr);
			case o_trreg:  // fall through
			case o_dbreg:  // fall through
			case o_crreg:  // fall through
			case o_fpreg:  // fall through
			case o_mmxreg: // fall through
			case o_xmmreg: return (Opnd1.reg == Opnd2.reg); // no subword regs to deal with

			default: SMP_msg("ERROR: Unknown operand type in IsEqOp.\n"); return false;
		}; // end switch (Opnd1.type)}
} // end of function IsEqOp()

// Are operands equal, ignoring bitwidth differences for register operands?
bool IsEqOpIgnoreBitwidth(op_t Opnd1, op_t Opnd2) {
		if (Opnd1.type != Opnd2.type)
			return false;
		switch (Opnd1.type) {
			case o_void: return true;
			case o_reg: return (Opnd1.reg == Opnd2.reg);
			case o_mem: return (Opnd1.addr == Opnd2.addr);
			case o_phrase: if (Opnd1.hasSIB && Opnd2.hasSIB) return (Opnd1.sib == Opnd2.sib);
							else return false; // no SIB != has SIB
			case o_displ: if (Opnd1.hasSIB && Opnd2.hasSIB)
							  return ((Opnd1.sib == Opnd2.sib) && (Opnd1.addr == Opnd2.addr));
						  else if ((!Opnd1.hasSIB) && (!Opnd2.hasSIB))
							  return ((Opnd1.addr == Opnd2.addr) && (Opnd1.reg == Opnd2.reg));
							else return false; // no SIB != has SIB
			case o_imm: return (Opnd1.value == Opnd2.value);
			case o_far:  // fall through to o_near case
			case o_near: return (Opnd1.addr == Opnd2.addr);
			case o_trreg:  // fall through
			case o_dbreg:  // fall through
			case o_crreg:  // fall through
			case o_fpreg:  // fall through
			case o_mmxreg: // fall through
			case o_xmmreg: return (Opnd1.reg == Opnd2.reg); // no subword regs to deal with

			default: SMP_msg("ERROR: Unknown operand type in IsEqOpIgnoreBitwidth.\n"); return false;
		}; // end switch (Opnd1.type)}
} // end of function IsEqOpIgnoreBitwidth()
clc5q's avatar
clc5q committed
// We need to make subword registers equal to their containing registers when we
//  do comparisons, so that we will realize that register EAX is killed by a prior DEF
//  of register AL, for example, and vice versa. To keep sets ordered strictly,
//  we also have to make AL and AH be equal to each other as well as equal to EAX.
clc5q's avatar
clc5q committed
#define FIRST_x86_SUBWORD_REG R_al
#define LAST_x86_SUBWORD_REG R_bh
bool MDLessReg(const ushort Reg1, const ushort Reg2) {
	ushort SReg1 = MDCanonicalizeSubReg(Reg1);
	ushort SReg2 = MDCanonicalizeSubReg(Reg2);
	return (SReg1 < SReg2);
} // end of MDLessReg()

ushort MDCanonicalizeSubReg(const ushort Reg1) {
	bool Subword = ((Reg1 >= FIRST_x86_SUBWORD_REG) && (Reg1 <= LAST_x86_SUBWORD_REG));
		// See enumeration RegNo in intel.hpp.
		if (SReg1 < 20)  // AL, CL, DL or BL
			SReg1 -= 16;
		else             // AH, CH, DH or BH
			SReg1 -= 20;
	}
	return SReg1;
} // end of MDCanonicalizeSubReg()
// If TempOp is a register, call MDCanonicalizeSubReg() on it.
void CanonicalizeOpnd(op_t &TempOp) {
	if (o_reg == TempOp.type) {
		TempOp.reg = MDCanonicalizeSubReg(TempOp.reg);
		TempOp.dtyp = dt_dword; // set to 32-bit width
	}
}

bool MDIsStackOrFramePointerReg(op_t RegOp, bool UseFP) {
	bool PtrReg = false;
	if (o_reg == RegOp.type) {
		PtrReg = RegOp.is_reg(MD_STACK_POINTER_REG) || (UseFP && RegOp.is_reg(MD_FRAME_POINTER_REG));
	}
	return PtrReg;
}

clc5q's avatar
clc5q committed
// In SSA computations, we are storing the GlobalNames index into the op_t fields
//  n, offb, and offo. This function extracts an unsigned int from these three 8-bit
//  fields.
unsigned int ExtractGlobalIndex(op_t GlobalOp) {
	unsigned int index = 0;
	index |= (((unsigned int) GlobalOp.offo) & 0x000000ff);
	index <<= 8;
	index |= (((unsigned int) GlobalOp.offb) & 0x000000ff);
	index <<= 8;
	index |= (((unsigned int) GlobalOp.n) & 0x000000ff);
clc5q's avatar
clc5q committed
	return index;
}

void SetGlobalIndex(op_t *TempOp, size_t index) {
	TempOp->n = (char) (index & 0x000000ff);
	TempOp->offb = (char) ((index & 0x0000ff00) >> 8);
	TempOp->offo = (char) ((index & 0x00ff0000) >> 16);
	return;
}

// Return true if CurrOp could be an indirect memory reference.
bool MDIsIndirectMemoryOpnd(op_t CurrOp, bool UseFP) {
	bool indirect = false;
	if ((CurrOp.type != o_mem) && (CurrOp.type != o_phrase) && (CurrOp.type != o_displ))
		return false;

	if (CurrOp.hasSIB) {
		int BaseReg = sib_base(CurrOp);
		short IndexReg = sib_index(CurrOp);
		if ((R_none != IndexReg) && (R_sp != IndexReg)) { 
			if ((R_bp == IndexReg) && UseFP)
				;
			else
				indirect = true;
		}
		if (0 != sib_scale(CurrOp))
			indirect = true;
		if (R_none != BaseReg) {
			if ((BaseReg == R_bp) && (CurrOp.type == o_mem)) {
				; // EBP ==> no base register for o_mem type
			}
			else if ((BaseReg == R_bp) && UseFP) 
				;  // EBP used as frame pointer for direct access
			else if (BaseReg == R_sp)
				;  // ESP used as stack pointer for direct access
			else
				indirect = true; // conservative; some register used for addressing
								// other than a stack or frame pointer
		}
	} // end if hasSIB
	else { // no SIB; can have base register only
		ushort BaseReg = CurrOp.reg;
		if (CurrOp.type == o_mem) { // no base register for o_mem
			if (!((0 == BaseReg) || (R_bp == BaseReg))) {
				SMP_msg("base reg %d ignored \n", BaseReg);
			}
		}
		else if ((BaseReg == R_bp) && UseFP) 
			;  // EBP used as frame pointer for direct access
		else if (BaseReg == R_sp)
			;  // ESP used as stack pointer for direct access
		else {
			indirect = true;
		}
	}

	return indirect;
} // end MDIsIndirectMemoryOpnd()

// Extract the base and index registers and scale factor and displacement from the
//  memory operand.
void MDExtractAddressFields(op_t MemOp, int &BaseReg, int &IndexReg, ushort &Scale, ea_t &Offset) {
	assert((MemOp.type == o_phrase) || (MemOp.type == o_displ) || (MemOp.type == o_mem));

	Scale = 0;
	BaseReg = R_none;
	IndexReg = R_none;
	Offset = MemOp.addr;

	if (MemOp.hasSIB) {
		BaseReg = sib_base(MemOp);
		IndexReg = (int) sib_index(MemOp);
		if (R_sp == IndexReg) // signifies no index register
			IndexReg = R_none;
		if (R_none != IndexReg) {
clc5q's avatar
clc5q committed
			Scale = (ushort) sib_scale(MemOp);
		}
		if (R_none != BaseReg) {
			if ((BaseReg == R_bp) && (MemOp.type == o_mem)) {
				BaseReg = R_none;
				// **!!** BaseReg allowed for o_mem with SIB byte???
			}
		}
	}
	else { // no SIB byte; can have base reg but no index reg or scale factor
		BaseReg = (int) MemOp.reg;  // cannot be R_none for no SIB case
		if (MemOp.type == o_mem) {
			BaseReg = R_none; // no Base register for o_mem operands
		}
	}

	return;
} // end of MDExtractAddressFields()

// Is CurrOp a memory operand?
bool IsMemOperand(op_t CurrOp) {
	return ((o_mem == CurrOp.type) || (o_displ == CurrOp.type) || (o_phrase == CurrOp.type));
}

// MACHINE DEPENDENT: Is CurrOp the flags register?
bool MDIsFlagsReg(op_t CurrOp) {
	return ((o_reg == CurrOp.type) && CurrOp.is_reg(X86_FLAGS_REG));
}

// MACHINE DEPENDENT: Is register a stack pointer or frame pointer?
bool MDIsStackPtrReg(int RegNumber, bool UseFP) {
	return ((RegNumber == MD_STACK_POINTER_REG) || (UseFP && (RegNumber == MD_FRAME_POINTER_REG)));
// MACHINE DEPENDENT: Is operand a stack memory access?
bool MDIsStackAccessOpnd(op_t CurrOp, bool UseFP) {
	int BaseReg;
	int IndexReg;
	ushort ScaleFactor;
	ea_t offset;

	if ((o_displ != CurrOp.type) && (o_phrase != CurrOp.type)) {
		return false;
	}

	MDExtractAddressFields(CurrOp, BaseReg, IndexReg, ScaleFactor, offset);
} // end of MDIsStackAccessOpnd()

// MACHINE DEPENDENT: Is operand a direct stack memory access?
bool MDIsDirectStackAccessOpnd(op_t CurrOp, bool UseFP) {
	int BaseReg;
	int IndexReg;
	ushort ScaleFactor;
	ea_t offset;

	if ((o_displ != CurrOp.type) && (o_phrase != CurrOp.type)) {
		return false;
	}

	MDExtractAddressFields(CurrOp, BaseReg, IndexReg, ScaleFactor, offset);
	// When the IndexReg is
	return (MDIsStackPtrReg(BaseReg, UseFP) && (IndexReg == R_none));
} // end of MDIsDirectStackAccessOpnd()

// MACHINE DEPENDENT: Is operand trackable in data flow analyses (i.e. a direct stack memory access or a register?)
bool MDIsDataFlowOpnd(op_t CurrOp, bool UseFP) {
	return ((o_reg == CurrOp.type) || MDIsDirectStackAccessOpnd(CurrOp, UseFP));
// MACHINE DEPENDENT: Is operand a caller-saved register?
bool MDIsCallerSavedReg(op_t CurrOp) {
	if (o_reg != CurrOp.type)
		return false;
	ushort CurrReg = MDCanonicalizeSubReg(CurrOp.reg);
	return ((R_ax == CurrReg) || (R_cx == CurrReg) || (R_dx == CurrReg));
} // end of MDIsCallerSavedReg()
// DEBUG Print DEF and/or USE for an operand.
void PrintDefUse(ulong feature, int OpNum) {
	// CF_ macros number the operands from 1 to 6, while OpNum
	//  is a 0 to 5 index into the insn_t.Operands[] array.
	// OpNum == -1 is a signal that this is a DEF or USE or VarKillSet etc.
	//  operand and not an instruction operand.
	if (-1 == OpNum)
		return;
	switch (OpNum) {
		case 0:
			if (feature & CF_CHG1)
			break;
		case 1:
			if (feature & CF_CHG2)
			break;
		case 2:
			if (feature & CF_CHG3)
			break;
		case 3:
			if (feature & CF_CHG4)
			break;
		case 4:
			if (feature & CF_CHG5)
			break;
		case 5:
			if (feature & CF_CHG6)
			break;
	}
	return;
} // end PrintDefUse()

// DEBUG print SIB info for an operand.
void PrintSIB(op_t Opnd) {
	int BaseReg;
	int IndexReg;
	ushort ScaleFactor;
	ea_t offset;
#define NAME_LEN 5
	char BaseName[NAME_LEN] = {'N', 'o', 'n', 'e', '\0'};
	char IndexName[NAME_LEN] = {'N', 'o', 'n', 'e', '\0'};

	MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, offset);

	if (BaseReg != R_none)
		SMP_strncpy(BaseName, RegNames[BaseReg], NAME_LEN - 1);
	if (IndexReg != R_none) {
		SMP_strncpy(IndexName, RegNames[IndexReg], NAME_LEN -1);
	SMP_msg(" Base %s Index %s Scale %d", BaseName, IndexName, ScaleFactor);
// Annotations: concisely print SIB info for an operand.
void AnnotPrintSIB(op_t Opnd, bool HasOffset, FILE *OutFile) {
	int BaseReg;
	int IndexReg;
	ushort ScaleFactor;
	ea_t offset;
	char OutString[MAXSTR] = {'[', '\0'};
	char ScaleString[4];
	op_t BaseOp =  InitOp, IndexOp = InitOp;
	BaseOp.type = o_reg;
	IndexOp.type = o_reg;
	BaseOp.dtyp = dt_dword;
	IndexOp.dtyp = dt_dword;

	MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, offset);

	if (ScaleFactor > 0) {
		ScaleFactor = 1 << (ScaleFactor - 1);
		(void) SMP_snprintf(ScaleString, 4, "%d", ScaleFactor);
	}

	if (BaseReg != R_none) {
		BaseOp.reg = BaseReg;
		if (RegSizes[BaseReg] == 1)
			BaseOp.dtyp = dt_byte;
		(void) SMP_strncat(OutString, MDGetRegName(BaseOp), MAXSTR-1);
		if (IndexReg != R_none) {
			IndexOp.reg = IndexReg;
			if (RegSizes[IndexReg] == 1)
				IndexOp.dtyp = dt_byte;
			(void) SMP_strncat(OutString, "+", MAXSTR-1);
			(void) SMP_strncat(OutString, MDGetRegName(IndexOp), MAXSTR-1);
				(void) SMP_strncat(OutString, "*", MAXSTR-1);
				(void) SMP_strncat(OutString, ScaleString, MAXSTR-1);
			}
		}
	}
	else if (IndexReg != R_none) {
		IndexOp.reg = IndexReg;
		if (RegSizes[IndexReg] == 1)
			IndexOp.dtyp = dt_byte;
		(void) SMP_strncat(OutString, MDGetRegName(IndexOp), MAXSTR-1);
			(void) SMP_strncat(OutString, "*", MAXSTR-1);
			(void) SMP_strncat(OutString, ScaleString, MAXSTR-1);
		SMP_msg("ERROR: No BaseReg, no IndexReg in SIB\n");
	if (!HasOffset) // can close the brackets around regs
		(void) SMP_strncat(OutString, "]", MAXSTR-1);
	SMP_fprintf(OutFile, " %s", OutString);
// Debug: print one operand from an instruction or DEF or USE list.
void PrintOneOperand(op_t Opnd, ulong features, int OpNum) { 
	if (Opnd.type != o_void) {
		PrintOperand(Opnd);
		PrintDefUse(features, OpNum);
	}
	return;
} // end of PrintOneOperand()

// Debug: print one operand.
void PrintOperand(op_t Opnd) { 
	if (Opnd.type == o_void)
		return;
	else if (Opnd.type == o_mem) {
		SMP_msg(" Operand: memory : addr: %x", Opnd.addr);
		if (Opnd.hasSIB) {
			PrintSIB(Opnd);
		}
	}
	else if (Opnd.type == o_phrase) {
		if (Opnd.hasSIB) { // has SIB info
			PrintSIB(Opnd);
		}
		else { // no SIB info
			ushort BaseReg = Opnd.phrase;
			SMP_msg(" \n WARNING: addr for o_phrase type: %x\n", Opnd.addr);
		}
	}
	else if (Opnd.type == o_displ) {
		ea_t offset = Opnd.addr;
		int SignedOffset = (int) offset;
		if (Opnd.hasSIB) {
			PrintSIB(Opnd);
			SMP_msg(" displ %d", SignedOffset);
		}
		else {
			ushort BaseReg = Opnd.reg;
			SMP_msg(" reg %s displ %d", RegNames[BaseReg], SignedOffset);
		}
	}
	else if (Opnd.type == o_reg) {
		SMP_msg(" Operand: register %s", RegNames[Opnd.reg]);
	}
	else if (Opnd.type == o_imm) {
	}
	else if (Opnd.type == o_far) {
		SMP_msg(" Operand: FarPtrImmed addr: %x", Opnd.addr);
	}
	else if (Opnd.type == o_near) {
		SMP_msg(" Operand: NearPtrImmed addr: %x", Opnd.addr);
	else if (Opnd.type == o_trreg) {
		SMP_msg(" Operand: TaskReg reg: %d", Opnd.reg);
	else if (Opnd.type == o_dbreg) {
		SMP_msg(" Operand: DebugReg reg: %d", Opnd.reg);
	else if (Opnd.type == o_crreg) {
		SMP_msg(" Operand: ControlReg reg: %d", Opnd.reg);
	else if (Opnd.type == o_fpreg) {
		SMP_msg(" Operand: FloatReg reg: %d", Opnd.reg);
	else if (Opnd.type == o_mmxreg) {
		SMP_msg(" Operand: MMXReg reg: %d", Opnd.reg);
	else if (Opnd.type == o_xmmreg) {
		SMP_msg(" Operand: XMMReg reg: %d", Opnd.reg);
clc5q's avatar
clc5q committed
#if STARS_DEBUG_DUMP_IDENTIFY_HIDDEN_OPERANDS
clc5q's avatar
clc5q committed
#endif
} // end of PrintOperand()

// Print an operand that has no features flags or operand position number, such
//  as the op_t types found in lists and sets throughout the blocks, phi functions, etc.
void PrintListOperand(op_t Opnd, int SSANum) {
	if (Opnd.type != o_void) {
		PrintOperand(Opnd);
} // end of PrintListOperand()

// Annotations: concisely print one operand.
void AnnotPrintOperand(op_t Opnd, FILE *OutFile) { 
	op_t BaseOp = InitOp;
	op_t IndexOp = InitOp;
	BaseOp.type = o_reg;
	IndexOp.type = o_reg;
	BaseOp.dtyp = dt_dword;
	IndexOp.dtyp = dt_dword;

	if (Opnd.type == o_mem) {
			AnnotPrintSIB(Opnd, false, OutFile);
		}
	}
	else if (Opnd.type == o_phrase) {
		if (Opnd.hasSIB) { // has SIB info
			AnnotPrintSIB(Opnd, false, OutFile);
		}
		else { // no SIB info
			ushort BaseReg = Opnd.phrase;
			BaseOp.reg = BaseReg;
			if (RegSizes[BaseReg] == 1)
				BaseOp.dtyp = dt_byte;
			SMP_fprintf(OutFile, " [%s]", MDGetRegName(BaseOp));
			SMP_msg(" \n WARNING: addr for o_phrase type: %x\n", Opnd.addr);
		}
	}
	else if (Opnd.type == o_displ) {
		ea_t offset = Opnd.addr;
		int SignedOffset = (int) offset;
		if (Opnd.hasSIB) {
			AnnotPrintSIB(Opnd, (SignedOffset != 0), OutFile);
			if (SignedOffset > 0) // print plus sign
				SMP_fprintf(OutFile, "+%d]", SignedOffset);
			else if (SignedOffset < 0) // minus sign will print automatically

		}
		else {
			ushort BaseReg = Opnd.reg;
			BaseOp.reg = BaseReg;
			if (RegSizes[BaseReg] == 1)
				BaseOp.dtyp = dt_byte;
			if (SignedOffset >= 0) // print plus sign
				SMP_fprintf(OutFile, " [%s+%d]", MDGetRegName(BaseOp), SignedOffset);
			else // minus sign will print automatically
				SMP_fprintf(OutFile, " [%s%d]", MDGetRegName(BaseOp), SignedOffset);
		SMP_fprintf(OutFile, " %s", MDGetRegName(Opnd));
	}
	else if ((Opnd.type == o_far) || (Opnd.type == o_near)) {
// MACHINE DEPENDENT: Is operand type a known type that we want to analyze?
bool MDKnownOperandType(op_t TempOp) {
	bool GoodOpType = ((TempOp.type >= o_reg) && (TempOp.type <= o_xmmreg));
#if SMP_DEBUG_OPERAND_TYPES
	if (!GoodOpType && (o_void != TempOp.type)) {
		SMP_msg("WARNING: Operand type %d \n", TempOp.type);
	}
#endif 
	return GoodOpType;
clc5q's avatar
clc5q committed
// Meet function over any two types in the type lattice.
SMPOperandType SMPTypeMeet(SMPOperandType Type1, SMPOperandType Type2) {
	SMPOperandType MeetType = UNKNOWN;
	bool ProfDerived = IsProfDerived(Type1) || IsProfDerived(Type2);
	if (IsEqType(UNINIT, Type1))
		MeetType = Type2;
	else if (IsEqType(UNINIT, Type2) || IsEqType(Type1, Type2)
		|| IsUnknown(Type1))
		MeetType = Type1;
	else if (IsNumeric(Type1)) {
		if (IsNumeric(Type2))  // one is NUMERIC, one is CODEPTR
			MeetType = NUMERIC;
		else if (IsDataPtr(Type2) || IsUnknown(Type2))
			MeetType = UNKNOWN;
clc5q's avatar
clc5q committed
	}
	else if (IsDataPtr(Type1)) {
		if (IsDataPtr(Type2))  // two different POINTER subtypes
			MeetType = POINTER;
		else if (IsNumeric(Type2) || IsUnknown(Type2))
			MeetType = UNKNOWN;
clc5q's avatar
clc5q committed
	}
	if (ProfDerived && IsNotEqType(UNINIT, MeetType))
		MeetType = MakeProfDerived(MeetType);
clc5q's avatar
clc5q committed
	return MeetType;
} // end of SMPTypeMeet()

// *****************************************************************
// Class DisAsmString
// *****************************************************************
DisAsmString::DisAsmString(void) {
	this->CurrAddr = BADADDR;
	this->StringLen = 0;
	this->CachedDisAsm[0] = '\0';
	return;
}

char *DisAsmString::GetDisAsm(ea_t InstAddr) {
	if (InstAddr != this->CurrAddr) {
		this->CurrAddr = InstAddr;
		bool IDAsuccess = generate_disasm_line(InstAddr, this->CachedDisAsm, sizeof(this->CachedDisAsm) - 1);
		if (IDAsuccess) {
			// Remove interactive color-coding tags.
			this->StringLen = tag_remove(this->CachedDisAsm, this->CachedDisAsm, 0);
			if (-1 >= StringLen) {
				SMP_msg("ERROR: tag_remove failed at addr %x \n", InstAddr);
			SMP_msg("ERROR: generate_disasm_line failed at addr %x \n", InstAddr);
			this->CachedDisAsm[0] = '\0';
		}
	}
	return (char *) this->CachedDisAsm;
} // end of DisAsmString::GetDisasm()

// Set the disasm text for the SSA marker instructions, which have no IDA Pro disasm because
//  they are pseudo-instructions that we add at the top of each function to hold LiveIn name info.
void DisAsmString::SetMarkerInstText(ea_t InstAddr) {
	if (InstAddr != this->CurrAddr) {
		this->CurrAddr = InstAddr;
		SMP_strncpy(this->CachedDisAsm, "\tfnop\t; Top of function SSA marker for SMP", 
			sizeof(this->CachedDisAsm) - 1);
		this->StringLen = (ssize_t) strlen(this->CachedDisAsm);
	}
	return;
} // end of DisAsmString::SetMarkerInstText()

clc5q's avatar
clc5q committed
// *****************************************************************
// Class DefOrUse
// *****************************************************************

// Default constructor to make the compilers happy.
DefOrUse::DefOrUse(void) {
	this->Operand.type = o_void;
	this->NonSpeculativeOpType = UNINIT;
	this->MetadataStatus = DEF_METADATA_UNANALYZED;
	this->IndWrite = false;
	this->NoTruncation = false;
clc5q's avatar
clc5q committed
// Constructor.
DefOrUse::DefOrUse(op_t Ref, SMPOperandType Type, int SSASub) {
	if (o_reg == Ref.type) {
		// We want to map AH, AL, and AX to EAX, etc. throughout our data flow analysis
		//  and type inference systems.
		CanonicalizeOpnd(Ref);
clc5q's avatar
clc5q committed
	this->Operand = Ref;
	this->OpType = Type;

	assert(!IsProfDerived(Type));
	this->NonSpeculativeOpType = Type;
clc5q's avatar
clc5q committed
	this->SSANumber = SSASub;
	this->MetadataStatus = DEF_METADATA_UNANALYZED;
	this->NoTruncation = false;
// Copy constructor.
DefOrUse::DefOrUse(const DefOrUse &CopyIn) {
	*this = CopyIn;
	return;
}

// Assignment operator for copy constructor use.
DefOrUse &DefOrUse::operator=(const DefOrUse &rhs) {
	this->Operand = rhs.Operand;
	this->OpType = rhs.OpType;
	this->NonSpeculativeOpType = rhs.NonSpeculativeOpType;
	this->SSANumber = rhs.SSANumber;
	this->MetadataStatus = rhs.MetadataStatus;
	this->NoTruncation = rhs.NoTruncation;
// Set the operand type for this DEF or USE - don't forget to take
//  into account the speculative (profiler) status.
void DefOrUse::SetType(SMPOperandType Type, const SMPInstr *Instr) 
{
	SMPOperandType OldType = this->OpType;
	SMPOperandType NewType = Type;
	if (Instr->GetBlock()->GetFunc()->GetIsSpeculative()) {
		NewType = (SMPOperandType)(((int)NewType) | PROF_BASE);
		if (!IsProfDerived(OldType))
			this->NonSpeculativeOpType = OldType;
	this->OpType = NewType;
// Set the indirect memory write flag.
void DefOrUse::SetIndWrite(bool IndMemWrite) {
	this->IndWrite = IndMemWrite;
	return;
}
void DefOrUse::SetNoTruncation(bool NoTruncFlag) {
	this->NoTruncation = NoTruncFlag;
	return;
}

void DefOrUse::SetNoOverflow(bool NoOverflowFlag) {
	this->NoOverflow = NoOverflowFlag;
	return;
}

// Debug printing.
void DefOrUse::Dump(void) const {
	PrintListOperand(this->Operand, this->SSANumber);
	if (IsEqType(this->OpType , NUMERIC))
	else if (IsEqType(this->OpType , CODEPTR))
	else if (IsEqType(this->OpType , POINTER))
	else if (IsEqType(this->OpType , STACKPTR))