Skip to content
Snippets Groups Projects
SMPDataFlowAnalysis.cpp 393 KiB
Newer Older
jdh8d's avatar
jdh8d committed
/*
 * SMPDataFlowAnalysis.cpp - <see below>.
 *
 * Copyright (c) 2000, 2001, 2010 - University of Virginia 
 *
 * This file is part of the Memory Error Detection System (MEDS) infrastructure.
 * This file may be used and modified for non-commercial purposes as long as 
 * all copyright, permission, and nonwarranty notices are preserved.  
 * Redistribution is prohibited without prior written consent from the University 
 * of Virginia.
 *
 * Please contact the authors for restrictions applying to commercial use.
 *
 * THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
 * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 *
 * Author: University of Virginia
 * e-mail: jwd@virginia.com
 * URL   : http://www.cs.virginia.edu/
 *
 * Additional copyrights 2010, 2011 by Zephyr Software LLC
 * e-mail: {clc,jwd}@zephyr-software.com
 * URL   : http://www.zephyr-software.com/
 *
jdh8d's avatar
jdh8d committed
 */

clc5q's avatar
clc5q committed
//
// SMPDataFlowAnalysis.cpp
//
// This module contains common types an helper classes needed for the
clc5q's avatar
clc5q committed
//   SMP project (Software Memory Protection).
//

#include <list>
#include <set>
clc5q's avatar
clc5q committed
#include <vector>
#include <algorithm>
clc5q's avatar
clc5q committed

#include <cstring>
clc5q's avatar
clc5q committed

#include <pro.h>
clc5q's avatar
clc5q committed
#include <assert.h>
clc5q's avatar
clc5q committed
#include <ida.hpp>
#include <idp.hpp>
#include <auto.hpp>
#include <bytes.hpp>
#include <funcs.hpp>
#include <intel.hpp>
#include <loader.hpp>
#include <lines.hpp>
#include <name.hpp>

#include "interfaces/SMPDBInterface.h"
#include "base/SMPDataFlowAnalysis.h"
#include "base/SMPStaticAnalyzer.h"
#include "base/SMPInstr.h"
#include "base/SMPBasicBlock.h"
#include "base/SMPFunction.h"

using namespace std;
clc5q's avatar
clc5q committed

// Set these to 1 for debugging output
clc5q's avatar
clc5q committed
#define SMP_DEBUG_CONTROLFLOW 0  // tells what processing stage is entered
#define SMP_DEBUG_CHUNKS 1  // tracking down tail chunks for functions
#define SMP_DEBUG_FRAMEFIXUP 0  // Fixing up stack frame info the way we want the offsets
#define SMP_DEBUG_OPERAND_TYPES 1  // leave on; warnings that should never happen
clc5q's avatar
clc5q committed
#define STARS_DEBUG_DUMP_IDENTIFY_HIDDEN_OPERANDS 0 // print HIDDEN if operand.showed() is false
clc5q's avatar
clc5q committed
#if IDA_SDK_VERSION > 560
#define MAX_IDA_REG R_last
clc5q's avatar
clc5q committed
#else
#define MAX_IDA_REG 80
#endif

const char *RegNames[MAX_IDA_REG + 1] =
	{ "EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI",
	  "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15",
	  "AL", "CL", "DL", "BL", "AH", "CH", "DH", "BH",
	  "SPL", "BPL", "SIL", "DIL", "EIP", "ES", "CS", "SS",
	  "DS", "FS", "GS", "CF", "ZF", "SF", "OF", "PF",
	  "AF", "TF", "IF", "DF", "EFLAGS", "FPU_ST0", "FPU_ST1", "FPU_ST2",
	  "FPU_ST3", "FPU_ST4", "FPU_ST5", "FPU_ST6", "FPU_ST7", "FPU_CTRL", "FPU_STAT", "FPU_TAGS",
      "MMX0", "MMX1", "MMX2", "MMX3", "MMX4", "MMX5", "MMX6", "MMX7",
      "XMM0", "XMM1", "XMM2", "XMM3", "XMM4", "XMM5", "XMM6", "XMM7",
      "XMM8", "XMM9", "XMM10", "XMM11", "XMM12", "XMM13", "XMM14", "XMM15",
      "MXCSR",
      "YMM0", "YMM1", "YMM2", "YMM3", "YMM4", "YMM5", "YMM6", "YMM7",
      "YMM8", "YMM9", "YMM10", "YMM11", "YMM12", "YMM13", "YMM14", "YMM15",
	  "REG_ERROR"
// NOTE: Review these sizes. Alter when annotation diffs can be isolated to the change.
//   !!!!****!!!! FP reg stack should be 10-byte registers, right?
const unsigned char RegSizes[MAX_IDA_REG + 1] =
	{ 4, 4, 4, 4, 4, 4, 4, 4,
	  8, 8, 8, 8, 8, 8, 8, 8,
	  1, 1, 1, 1, 4, 4, 4, 4,
	  4, 4, 4, 4, 4, 10, 10, 10,
	  10, 10, 10, 10, 10, 4, 4, 4,
      16, 16, 16, 16, 16, 16, 16, 16,
      16, 16, 16, 16, 16, 16, 16, 16,
      16, 16, 16, 16, 16, 16, 16, 16,
      4,
      32, 32, 32, 32, 32, 32, 32, 32,
      32, 32, 32, 32, 32, 32, 32, 32,
	  4
unsigned char GetRegSize(uint16_t RegNum) {
	assert(RegNum != ((uint16_t) R_none));
	return RegSizes[RegNum];
}

const char RegDtyps[MAX_IDA_REG + 1] =
{ dt_dword, dt_dword, dt_dword, dt_dword, dt_dword, dt_dword, dt_dword, dt_dword,
dt_qword, dt_qword, dt_qword, dt_qword, dt_qword, dt_qword, dt_qword, dt_qword,
dt_byte, dt_byte, dt_byte, dt_byte, dt_byte, dt_byte, dt_byte, dt_byte,
dt_byte, dt_byte, dt_byte, dt_byte, dt_dword, dt_dword, dt_dword, dt_dword,
dt_dword, dt_dword, dt_dword, dt_dword, dt_dword, dt_dword, dt_dword, dt_dword,
dt_dword, dt_dword, dt_dword, dt_dword, dt_dword, dt_tbyte, dt_tbyte, dt_tbyte,
dt_tbyte, dt_tbyte, dt_tbyte, dt_tbyte, dt_tbyte, dt_word, dt_word, dt_word,
dt_byte16, dt_byte16, dt_byte16, dt_byte16, dt_byte16, dt_byte16, dt_byte16, dt_byte16,
dt_byte16, dt_byte16, dt_byte16, dt_byte16, dt_byte16, dt_byte16, dt_byte16, dt_byte16,
dt_byte16, dt_byte16, dt_byte16, dt_byte16, dt_byte16, dt_byte16, dt_byte16, dt_byte16,
dt_word,
dt_byte32, dt_byte32, dt_byte32, dt_byte32, dt_byte32, dt_byte32, dt_byte32, dt_byte32,
dt_byte32, dt_byte32, dt_byte32, dt_byte32, dt_byte32, dt_byte32, dt_byte32, dt_byte32,
dt_word 
};

const char *ErrorStrings[1] = { "ERROR_REG" };

const char *WordRegStrings[8] = { "AX", "CX", "DX", "BX", "SP", "BP", "SI", "DI" };

const char *QWordRegStrings[8] = { "RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI" };
const char *SignednessStrings[4] = { "UNKNOWNSIGN", "SIGNED", "UNSIGNED", "UNKNOWNSIGN" };

const char *LeaSignednessStrings[4] = { "NOFLAGUNKNOWNSIGN", "NOFLAGSIGNED", "NOFLAGUNSIGNED", "NOFLAGUNKNOWNSIGN" };

// Distinguishes subword regs from their parent regs
const char *MDGetRegNumName(uint16_t RegNum, uint16_t ByteWidth) {
	if ((R_none == RegNum) || (MAX_IDA_REG < RegNum))
		return ErrorStrings[0];
	else if ((ByteWidth == 2) && (RegNum >= R_ax) && (RegNum <= R_di)) {
		return WordRegStrings[RegNum];
	else if ((ByteWidth == 8) && (RegNum >= R_ax) && (RegNum <= R_di)) {
		return QWordRegStrings[RegNum];
// Distinguishes subword regs from their parent regs
const char *MDGetRegName(STARSOpndTypePtr RegOp) {
	if (!RegOp->IsRegOp())
		return ErrorStrings[0];

	uint16_t RegNum = RegOp->GetReg();
	uint16_t ByteWidth = RegOp->GetByteWidth();
	return MDGetRegNumName(RegNum, ByteWidth);
}
clc5q's avatar
clc5q committed
// Define instruction categories for data flow analysis.
SMPitype DFACategory[NN_last+1];
// Define instruction categories for data type analysis.
int SMPTypeCategory[NN_last+1];
clc5q's avatar
clc5q committed

// Define which instructions define and use the CPU flags.
bool SMPDefsFlags[NN_last + 1];
bool SMPUsesFlags[NN_last + 1];

// Hash a global name and SSA number into an int, for use in SMPFunction.GlobalDefAddrBySSA map
int HashGlobalNameAndSSA(STARSOpndTypePtr DefOp, int SSANum) { 
	assert(DefOp->IsRegOp());
	return ((SSANum << 16) | ((int)(DefOp->GetReg())));
// Get the size in bytes of the data type of an operand.
size_t GetOpDataSize(STARSOpndTypePtr DataOp) {
	size_t DataSize;
	char OpDtyp = DataOp->GetOpDtyp();
	if (DataOp->IsRegOp()) {
		DataSize = RegSizes[DataOp->GetReg()];
		if (OpDtyp == dt_word) {
			SMP_msg("Found 16-bit register using dtyp field.\n");
#endif
		}
		else if (OpDtyp == dt_qword) {
			DataSize = 8;
#if 0
			SMP_msg("Found 64-bit register using dtyp field.\n");
		case dt_byte:
			DataSize = 1;
			break;
		case dt_word:
			DataSize = 2;
			break;
		case dt_dword:
		case dt_float:
		case dt_code:
		case dt_unicode:
		case dt_string:
			DataSize = 4;
			break;
		case dt_double:
		case dt_qword:
			DataSize = 8;
			break;
		case dt_packreal:
			DataSize = 12;
			break;
		case dt_byte16:
#if IDA_SDK_VERSION > 599
#endif
			DataSize = 16;
			break;
		case dt_fword:
			DataSize = 6;
			break;
		case dt_3byte:
			DataSize = 3;
			break;
		case dt_byte32:
			DataSize = 32;
			break;
		case dt_byte64:
			DataSize = 64;
			break;
			SMP_msg("ERROR: unexpected data type %d in GetOpDataSize() :", OpDtyp);
			PrintOperand(DataOp);
			break;
	}
	return DataSize;
} // end of GetOpDataSize()

// Get the IDA Pro register size (dtyp) field
char GetRegDtyp(uint16 RegNum, bool Has64BitOpnds) {
	assert(RegNum != ((uint16_t) R_none));
	assert(RegNum < MAX_IDA_REG);
	char RegDtyp = RegDtyps[RegNum];
	if ((STARS_ISA_Bytewidth == 8) && Has64BitOpnds && (RegDtyp == dt_dword) && (RegNum <= R_ip)) {
		// 32-bit IDA general regs are 64-bit for x86-64
		RegDtyp = dt_qword;
	}

	return RegDtyp;
}

// Return one of the bit width masks for the current operand.
//  Pass in DataSize in bytes if known, else pass in DataSize = 0.
unsigned short ComputeOperandBitWidthMask(STARSOpndTypePtr CurrOp, size_t DataSize) {
	unsigned short BitWidthMask = 32;
	if (0 == DataSize)
		DataSize = GetOpDataSize(CurrOp);
	if (4 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_32;
	else if (8 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_64;
	else if (1 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_8;
	else if (2 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_16;
	else if (16 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_128;
	else if (3 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_24;
	else if (6 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_48;
	else if (10 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_80;
	else if (12 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_96;
	else if (32 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_256;
	else {
		SMP_msg("ERROR: Unknown DataSize: %zu bytes ", DataSize);
	}
	return BitWidthMask;
} // end of ComputeOperandBitWidthMask()

// Compute largest bit width from a SignMiscInfo bit mask.
size_t LargestBitWidthFromMask(unsigned short WidthTypeInfo) {
	unsigned short BitWidthMask = WidthTypeInfo & FG_MASK_BITWIDTH_FIELDS;
	size_t LargestWidth = 0;

	// Go from highest bit width to lowest.
	if (BitWidthMask & FG_MASK_BITWIDTH_256)
		LargestWidth = 256;
	else if (BitWidthMask & FG_MASK_BITWIDTH_128)
		LargestWidth = 128;
	else if (BitWidthMask & FG_MASK_BITWIDTH_96)
		LargestWidth = 96;
	else if (BitWidthMask & FG_MASK_BITWIDTH_64)
		LargestWidth = 64;
	else if (BitWidthMask & FG_MASK_BITWIDTH_48)
		LargestWidth = 48;
	else if (BitWidthMask & FG_MASK_BITWIDTH_32)
		LargestWidth = 32;
	else if (BitWidthMask & FG_MASK_BITWIDTH_24)
		LargestWidth = 24;
	else if (BitWidthMask & FG_MASK_BITWIDTH_16)
		LargestWidth = 16;
	else if (BitWidthMask & FG_MASK_BITWIDTH_8)
		LargestWidth = 8;

	return LargestWidth;
} // end of LargestBitWidthFromMask()

// Is CurrOp a general purpose register? (not flags, instruction pointer, non-integer reg, etc.)
bool MDIsGeneralPurposeReg(STARSOpndTypePtr CurrOp) {
	bool success = (nullptr != CurrOp);
	if (success) {
		// intel.hpp defines two ranges that are general purpose regs in enum RegNo.
		uint16_t CurrReg = CurrOp->GetReg();
		success = (CurrOp->IsRegOp() && ((CurrReg >= R_ax) && (CurrReg <= R_dil)));
	}
	return success;
// We maintain a list of the caller-saved regs for the current binary's ABI.
//  This differs from 32-bit to 64-bit x86 binaries, as well as across other ISAs.
list<uint16> STARS_MDCallerSavedRegs;

void MDInitializeCallerSavedRegs(void) {
	STARS_MDCallerSavedRegs.clear();
	bool x86_64_ISA_flag = false;
#ifdef __EA64__
	x86_64_ISA_flag = (STARS_ISA_Bitwidth == 64);
#endif
	if (!x86_64_ISA_flag) {
		// 32-bit x86 uses EAX, ECX, EDX as caller-saved.
		STARS_MDCallerSavedRegs.push_back(R_ax);
		STARS_MDCallerSavedRegs.push_back(R_cx);
		STARS_MDCallerSavedRegs.push_back(R_dx);
	}
	else {
		// 64-bit x86 uses EDI, ESI, EDX, ECX, R8 and R9
		//  in that order. After six arguments that fit into
		//  these regs, arguments are passed on the stack.
		// In addition, registers EAX, R10 and R11 are caller-saved
		//  but are not used to pass arguments.
		STARS_MDCallerSavedRegs.push_back(R_ax);
		STARS_MDCallerSavedRegs.push_back(R_cx);
		STARS_MDCallerSavedRegs.push_back(R_dx);
		STARS_MDCallerSavedRegs.push_back(R_si);
		STARS_MDCallerSavedRegs.push_back(R_di);
		STARS_MDCallerSavedRegs.push_back(R_r8);
		STARS_MDCallerSavedRegs.push_back(R_r9);
		STARS_MDCallerSavedRegs.push_back(R_r10);
		STARS_MDCallerSavedRegs.push_back(R_r11);
	}
	return;
}

list<uint16>::iterator GetFirstCallerSavedReg(void) {
	return STARS_MDCallerSavedRegs.begin();
}

list<uint16>::iterator GetLastCallerSavedReg(void) {
	return STARS_MDCallerSavedRegs.end();
}

// We maintain a list of the argument-passing regs for the current binary's ABI.
//  This differs from 32-bit to 64-bit x86 binaries, as well as across other ISAs.
//  The list is in order of argument position number. For x86-64, this means EDI,
//  ESI, EDX, ECX, R8, R9.
list<uint16> STARS_MDArgumentRegs;

void MDInitializeArgumentRegs(void) {
	bool x86_64_ISA_flag = false;
#ifdef __EA64__
	x86_64_ISA_flag = (STARS_ISA_Bitwidth == 64);
#endif
	if (x86_64_ISA_flag) {
		STARS_MDArgumentRegs.push_back(R_di);
		STARS_MDArgumentRegs.push_back(R_si);
		STARS_MDArgumentRegs.push_back(R_dx);
		STARS_MDArgumentRegs.push_back(R_cx);
		STARS_MDArgumentRegs.push_back(R_r8);
		STARS_MDArgumentRegs.push_back(R_r9);
	}
	else {
		STARS_MDArgumentRegs.clear();
	}
	return;
}

list<uint16>::iterator GetFirstArgumentReg(void) {
	return STARS_MDArgumentRegs.begin();
}

list<uint16>::iterator GetLastArgumentReg(void) {
	return STARS_MDArgumentRegs.end();
}

bool IsEqOp(STARSOpndTypePtr Opnd1, STARSOpndTypePtr Opnd2) {
	if ((nullptr == Opnd1) || (nullptr == Opnd2))
		return false;
	if (Opnd1->GetOpType() != Opnd2->GetOpType())
		return false;
	switch (Opnd1->GetOpType()) {
		case o_void: return true;
		case o_reg: return ((Opnd1->GetReg() == Opnd2->GetReg()) && (Opnd1->GetByteWidth() == Opnd2->GetByteWidth()));
		case o_mem: return (Opnd1->GetAddr() == Opnd2->GetAddr());
		case o_phrase: if (Opnd1->HasSIBByte() && Opnd2->HasSIBByte()) return ((Opnd1->GetSIB() == Opnd2->GetSIB()) && (Opnd1->GetSpecFlag4() == Opnd2->GetSpecFlag4()));
						else if (Opnd1->HasSIBByte() || Opnd2->HasSIBByte()) return false; // no SIB != has SIB
						else return (Opnd1->GetReg() == Opnd2->GetReg()); // neither has SIB; compare register, e.g. [ebx] to [edx]
		case o_displ: if (Opnd1->HasSIBByte() && Opnd2->HasSIBByte())
			return ((Opnd1->GetSIB() == Opnd2->GetSIB()) && (Opnd1->GetAddr() == Opnd2->GetAddr()) && (Opnd1->GetSpecFlag4() == Opnd2->GetSpecFlag4()));
						else if ((!Opnd1->HasSIBByte()) && (!Opnd2->HasSIBByte()))
							return ((Opnd1->GetAddr() == Opnd2->GetAddr()) && (Opnd1->GetReg() == Opnd2->GetReg()));
						else return false; // no SIB != has SIB
		case o_imm: return (Opnd1->GetImmedValue() == Opnd2->GetImmedValue());
		case o_far:  // fall through to o_near case
		case o_near: return (Opnd1->GetAddr() == Opnd2->GetAddr());
		case o_trreg:  // fall through
		case o_dbreg:  // fall through
		case o_crreg:  // fall through
		case o_fpreg:  // fall through
		case o_mmxreg: // fall through
		case o_xmmreg: // fall through
		case o_ymmreg: return (Opnd1->GetReg() == Opnd2->GetReg()); // no subword regs to deal with

		default: SMP_msg("ERROR: Unknown operand type in IsEqOp.\n"); return false;
	}; // end switch (Opnd1.type)}
// Are operands equal, ignoring bitwidth differences for register operands?
bool IsEqOpIgnoreBitwidth(STARSOpndTypePtr Opnd1, STARSOpndTypePtr Opnd2) {
		if (Opnd1->GetOpType() != Opnd2->GetOpType())
#if 1
		if (Opnd1->IsRegOp())
			return (Opnd1->GetReg() == Opnd2->GetReg()); // no concern for subword regs; AX == EAX == RAX
		else
			return IsEqOp(Opnd1, Opnd2);
#else
		switch (Opnd1->GetOpType()) {
			case o_void: return true;
			case o_reg: return (Opnd1->GetReg() == Opnd2->GetReg());
			case o_mem: return (Opnd1.addr == Opnd2.addr);
			case o_phrase: if (Opnd1.hasSIB && Opnd2.hasSIB) return ((Opnd1.sib == Opnd2.sib) && (Opnd1.specflag4 == Opnd2.specflag4));
						   else if (Opnd1.hasSIB || Opnd2.hasSIB) return false; // no SIB != has SIB
							else return (Opnd1.reg == Opnd2.reg); // neither has SIB; compare register, e.g. [ebx] to [edx]
			case o_displ: if (Opnd1.hasSIB && Opnd2.hasSIB)
							  return ((Opnd1.sib == Opnd2.sib) && (Opnd1.addr == Opnd2.addr)  && (Opnd1.specflag4 == Opnd2.specflag4));
						  else if ((!Opnd1.hasSIB) && (!Opnd2.hasSIB))
							  return ((Opnd1.addr == Opnd2.addr) && (Opnd1.reg == Opnd2.reg));
							else return false; // no SIB != has SIB
			case o_imm: return (Opnd1.value == Opnd2.value);
			case o_far:  // fall through to o_near case
			case o_near: return (Opnd1.addr == Opnd2.addr);
			case o_trreg:  // fall through
			case o_dbreg:  // fall through
			case o_crreg:  // fall through
			case o_fpreg:  // fall through
			case o_mmxreg: // fall through
			case o_xmmreg: return (Opnd1.reg == Opnd2.reg); // no subword regs to deal with

			default: SMP_msg("ERROR: Unknown operand type in IsEqOpIgnoreBitwidth.\n"); return false;
		}; // end switch (Opnd1.type)}
} // end of function IsEqOpIgnoreBitwidth()
clc5q's avatar
clc5q committed
// We need to make subword registers equal to their containing registers when we
//  do comparisons, so that we will realize that register EAX is killed by a prior DEF
//  of register AL, for example, and vice versa. To keep sets ordered strictly,
//  we also have to make AL and AH be equal to each other as well as equal to EAX.
bool MDLessReg(const uint16_t Reg1, const uint16_t Reg2) {
	uint16_t SReg1 = MDCanonicalizeSubReg(Reg1);
	uint16_t SReg2 = MDCanonicalizeSubReg(Reg2);
	return (SReg1 < SReg2);
} // end of MDLessReg()

bool MDEqReg(const uint16_t Reg1, const uint16_t Reg2) {
	uint16_t SReg1 = MDCanonicalizeSubReg(Reg1);
	uint16_t SReg2 = MDCanonicalizeSubReg(Reg2);
	return (SReg1 == SReg2);
} // end of MDEqReg()

bool MDLessRegOpnd(const STARSOpndTypePtr RegOp1, const STARSOpndTypePtr RegOp2) {
	uint16_t SReg1 = MDCanonicalizeSubReg(RegOp1->GetReg());
	uint16_t SReg2 = MDCanonicalizeSubReg(RegOp2->GetReg());
	return ((SReg1 < SReg2) || ((SReg1 == SReg2) && (RegOp1->GetByteWidth() < RegOp2->GetByteWidth())));
uint16_t MDCanonicalizeSubReg(const uint16_t Reg1) {
	bool Subword = ((Reg1 >= FIRST_X86_SUBWORD_REG) && (Reg1 <= LAST_X86_SUBWORD_REG));
	uint16_t SReg1 = Reg1;
		// See enumeration RegNo in intel.hpp.
		if (SReg1 < R_ah)  // AL, CL, DL or BL
			SReg1 -= (R_al - R_ax);
		else             // AH, CH, DH, BH, SPL, BPL, SIL, DIL
			SReg1 -= (R_ah - R_ax);
	return SReg1;
} // end of MDCanonicalizeSubReg()

#if 0
// If TempOp is a register, call MDCanonicalizeSubReg() on it.
void CanonicalizeOpnd(STARSOpndTypePtr &TempOp) {
	if (TempOp->IsRegOp()) {
		uint16 NewReg = MDCanonicalizeSubReg(TempOp->GetReg());
		if (TempOp->GetReg() != NewReg) {
			TempOp->SetReg(NewReg);
// If TempOp is a register, call MDCanonicalizeSubReg() on it.
void CanonicalizeOpnd(STARSOpndTypePtr &TempOp) {
	if (TempOp->IsRegOp()) {
		if (4 > GetOpDataSize(TempOp)) {
			TempOp->SetReg(MDCanonicalizeSubReg(TempOp->GetReg()));
bool MDIsStackOrFramePointerReg(STARSOpndTypePtr RegOp, bool UseFP) {
	if (RegOp->IsRegOp()) {
		PtrReg = RegOp->MatchesReg(MD_STACK_POINTER_REG) || (UseFP && RegOp->MatchesReg(MD_FRAME_POINTER_REG));
clc5q's avatar
clc5q committed
// In SSA computations, we are storing the GlobalNames index into the op_t fields
//  n, offb, and offo. This function extracts an unsigned int from these three 8-bit
//  fields.
unsigned int ExtractGlobalIndex(STARSOpndTypePtr GlobalOp) {
	return GlobalOp->GetOpGlobalIndex();
void SetGlobalIndex(STARSOpndTypePtr TempOp, size_t index) {
	TempOp->SetOpGlobalIndex(index);
bool MD_STARS_op256(const STARSOpndTypePtr &x)        // is VEX.L set?
  return ((x->GetSpecFlag4() & STARS_VEXPR) != 0) && ((x->GetSpecFlag4() & VEX_L) != 0);
bool MD_STARS_is_vsib(const STARSOpndTypePtr &x)  // does instruction use VSIB variant of the sib byte?
  return ((x->GetSpecFlag4() & STARS_VSIB) != 0);
int MD_STARS_sib_base(const STARSOpndTypePtr &x)                    // get extended sib base
#ifdef __EA64__
  if ( x->GetSpecFlag4() & REX_B )
    base |= 8;
#endif
  return base;
}

regnum_t MD_STARS_sib_index(const STARSOpndTypePtr &x)                   // get extended sib index
  regnum_t index = regnum_t((x->GetSIB() >> 3) & 7);
#ifdef __EA64__
  if ( x->GetSpecFlag4() & REX_X )
    index |= 8;
#endif
  if (MD_STARS_is_vsib(x))
    index += MD_STARS_op256(x) ? 81 /*R_ymm0*/ : 64 /*R_xmm0*/;
  return index;
}

// Return true if CurrOp could be an indirect memory reference.
bool MDIsIndirectMemoryOpnd(STARSOpndTypePtr CurrOp, bool UseFP) {
	if ((nullptr == CurrOp) || (! CurrOp->IsMemOp()))
		int BaseReg = MD_STARS_sib_base(CurrOp);
		short IndexReg = MD_STARS_sib_index(CurrOp);
		if ((R_none != IndexReg) && (MD_STACK_POINTER_REG != IndexReg)) { 
			if ((MD_FRAME_POINTER_REG == IndexReg) && UseFP)
		if (0 != CurrOp->GetSIBScaleFactor())
			if ((BaseReg == MD_FRAME_POINTER_REG) && CurrOp->IsStaticMemOp()) {
			else if ((BaseReg == MD_FRAME_POINTER_REG) && UseFP) 
				;  // EBP used as frame pointer for direct access
			else if (BaseReg == MD_STACK_POINTER_REG)
				;  // ESP used as stack pointer for direct access
			else
				indirect = true; // conservative; some register used for addressing
								// other than a stack or frame pointer
		}
	} // end if hasSIB
	else { // no SIB; can have base register only
		uint16_t BaseReg = CurrOp->GetReg();
		if (CurrOp->IsStaticMemOp()) { // no base register for o_mem
			if (!((0 == BaseReg) || (MD_FRAME_POINTER_REG == BaseReg))) {
				SMP_msg("base reg %d ignored \n", BaseReg);
		else if ((BaseReg == MD_FRAME_POINTER_REG) && UseFP) 
			;  // EBP used as frame pointer for direct access
		else if (BaseReg == MD_STACK_POINTER_REG)
			;  // ESP used as stack pointer for direct access
		else {
			indirect = true;
		}
	}

	return indirect;
} // end MDIsIndirectMemoryOpnd()

// Extract the base and index registers and scale factor and displacement from the
//  memory operand.
void MDExtractAddressFields(STARSOpndTypePtr MemOp, int &BaseReg, int &IndexReg, uint16_t &Scale, ea_t &Offset) {
	assert(MemOp->IsMemOp());
		BaseReg = MD_STARS_sib_base(MemOp);
		IndexReg = (int) MD_STARS_sib_index(MemOp);
		if (MD_STACK_POINTER_REG == IndexReg) // signifies no index register
			Scale = (uint16_t) MemOp->GetSIBScaleFactor();
			if ((BaseReg == MD_FRAME_POINTER_REG) && MemOp->IsStaticMemOp()) {
				// **!!** BaseReg allowed for o_mem with SIB byte???
			}
		}
	}
	else { // no SIB byte; can have base reg but no index reg or scale factor
		BaseReg = (int) MemOp->GetReg();  // cannot be R_none for no SIB case
		if (MemOp->IsStaticMemOp()) {
			BaseReg = R_none; // no Base register for o_mem operands
		}
	}

	return;
} // end of MDExtractAddressFields()

// Is CurrOp a memory operand?
bool IsMemOperand(STARSOpndTypePtr CurrOp) {
	return ((nullptr != CurrOp) && CurrOp->IsMemOp());
}

// MACHINE DEPENDENT: Is CurrOp the flags register?
bool MDIsFlagsReg(STARSOpndTypePtr CurrOp) {
	return ((nullptr != CurrOp) && CurrOp->MatchesReg(X86_FLAGS_REG));
// MACHINE DEPENDENT: Is register a stack pointer or frame pointer?
bool MDIsStackPtrReg(int RegNumber, bool UseFP) {
	return ((RegNumber == MD_STACK_POINTER_REG) || (UseFP && (RegNumber == MD_FRAME_POINTER_REG)));
// MACHINE DEPENDENT: Is operand a stack memory access?
bool MDIsStackAccessOpnd(STARSOpndTypePtr CurrOp, bool UseFP) {
	if ((nullptr == CurrOp) || ((!CurrOp->IsMemDisplacementOp()) && (!CurrOp->IsMemNoDisplacementOp()))) {
		return false;
	}

	MDExtractAddressFields(CurrOp, BaseReg, IndexReg, ScaleFactor, offset);
} // end of MDIsStackAccessOpnd()

// MACHINE DEPENDENT: Is operand a direct stack memory access?
bool MDIsDirectStackAccessOpnd(STARSOpndTypePtr CurrOp, bool UseFP) {
	int BaseReg;
	int IndexReg;
	if ((nullptr == CurrOp) || ((! CurrOp->IsMemDisplacementOp()) && (! CurrOp->IsMemNoDisplacementOp()))) {
		return false;
	}

	MDExtractAddressFields(CurrOp, BaseReg, IndexReg, ScaleFactor, offset);
	// When the IndexReg is
	return (MDIsStackPtrReg(BaseReg, UseFP) && (IndexReg == R_none));
} // end of MDIsDirectStackAccessOpnd()

// MACHINE DEPENDENT: Is operand trackable in data flow analyses (i.e. a direct stack memory access or a register?)
bool MDIsDataFlowOpnd(STARSOpndTypePtr CurrOp, bool UseFP) {
	return ((nullptr != CurrOp) && (CurrOp->IsRegOp() || MDIsDirectStackAccessOpnd(CurrOp, UseFP)));
// MACHINE DEPENDENT: Is operand a caller-saved register?
bool MDIsCallerSavedReg(STARSOpndTypePtr CurrOp) {
	if (! CurrOp->IsRegOp())
		return false;
	uint16_t CurrReg = MDCanonicalizeSubReg(CurrOp->GetReg());
	return ((R_ax == CurrReg) || (R_cx == CurrReg) || (R_dx == CurrReg));
} // end of MDIsCallerSavedReg()
// If CurrOp would change when reg is canonicalized, then return CurrOp->clone(), else return CurrOp;
STARSOpndTypePtr CloneIfSubwordReg(STARSOpndTypePtr CurrOp) {
	uint16_t CurrReg = CurrOp->GetReg();
	if (CurrOp->IsRegOp()) {
		if (STARS_ISA_Bytewidth > CurrOp->GetByteWidth()) {
			return CurrOp->clone();
		}
		else {
			return CurrOp;
		}
	}
	else {
		return CurrOp;
	}
} // end of CloneIfSubwordReg()

// If CurrOp would change when Canonicalized or stack-normalized, then return CurrOp->clone(), else return CurrOp;
STARSOpndTypePtr CloneIfNecessary(STARSOpndTypePtr CurrOp, bool UseFP) {
	if (MDIsStackAccessOpnd(CurrOp, UseFP)) {
		return CurrOp->clone();
	}
	else {
		return CloneIfSubwordReg(CurrOp);
	}
} // end of CloneIfNecessary()

// DEBUG Print DEF and/or USE for an operand.
void PrintDefUse(ulong feature, int OpNum) {
	// CF_ macros number the operands from 1 to 6, while OpNum
	//  is a 0 to 5 index into the insn_t.Operands[] array.
	// OpNum == -1 is a signal that this is a DEF or USE or VarKillSet etc.
	//  operand and not an instruction operand.
	if (-1 == OpNum)
		return;
	switch (OpNum) {
		case 0:
			if (feature & CF_CHG1)
			break;
		case 1:
			if (feature & CF_CHG2)
			break;
		case 2:
			if (feature & CF_CHG3)
			break;
		case 3:
			if (feature & CF_CHG4)
			break;
		case 4:
			if (feature & CF_CHG5)
			break;
		case 5:
			if (feature & CF_CHG6)
			break;
	}
	return;
} // end PrintDefUse()

// DEBUG print SIB info for an operand.
void PrintSIB(STARSOpndTypePtr Opnd) {
	int BaseReg;
	int IndexReg;
#define NAME_LEN 5
	char BaseName[NAME_LEN] = {'N', 'o', 'n', 'e', '\0'};
	char IndexName[NAME_LEN] = {'N', 'o', 'n', 'e', '\0'};

	MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, offset);

	if (BaseReg != R_none)
		SMP_strncpy(BaseName, RegNames[BaseReg], NAME_LEN - 1);
	if (IndexReg != R_none) {
		SMP_strncpy(IndexName, RegNames[IndexReg], NAME_LEN -1);
	SMP_msg(" Base %s Index %s Scale %d Flag4 %d", BaseName, IndexName, ScaleFactor, Opnd->GetSpecFlag4());
// Annotations: concisely print SIB info for an operand.
void AnnotPrintSIB(STARSOpndTypePtr Opnd, bool HasOffset, FILE *OutFile) {
	ea_t offset;
	char OutString[MAXSTR] = {'[', '\0'};
	char ScaleString[4];

	MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, offset);

	if (ScaleFactor > 0) {
		ScaleFactor = 1 << (ScaleFactor - 1);
		(void) SMP_snprintf(ScaleString, 4, "%d", ScaleFactor);
		(void) SMP_strncat(OutString, MDGetRegNumName(BaseReg, RegSizes[BaseReg]), MAXSTR - 1);
			(void) SMP_strncat(OutString, "+", MAXSTR-1);
			(void) SMP_strncat(OutString, MDGetRegNumName(IndexReg, RegSizes[IndexReg]), MAXSTR - 1);
				(void) SMP_strncat(OutString, "*", MAXSTR-1);
				(void) SMP_strncat(OutString, ScaleString, MAXSTR-1);
		(void) SMP_strncat(OutString, MDGetRegNumName(IndexReg, RegSizes[IndexReg]), MAXSTR - 1);
			(void) SMP_strncat(OutString, "*", MAXSTR-1);
			(void) SMP_strncat(OutString, ScaleString, MAXSTR-1);
		SMP_msg("ERROR: No BaseReg, no IndexReg in SIB\n");
	if (!HasOffset) // can close the brackets around regs
		(void) SMP_strncat(OutString, "]", MAXSTR-1);
	SMP_fprintf(OutFile, " %s", OutString);
// Annotations: concisely print SIB info for an operand.
void SPARKAnnotPrintSIB(STARSOpndTypePtr Opnd, bool HasOffset, FILE *OutFile, uint16 SegReg, bool UseFP) {
clc5q's avatar
clc5q committed
	char OutString[MAXSTR] = {'(', '\0'};
	char ScaleString[4];

	MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, offset);

clc5q's avatar
clc5q committed
	bool SegRegPrefix = is_segreg((int) SegReg);
	if (SegRegPrefix) {
		// Emit segment register string unless it is just the stack segment plus a stack operand,
		//  where the stack segment is implied anyway.
		if ((SegReg == R_ss) && MDIsStackAccessOpnd(Opnd, UseFP)) {
clc5q's avatar
clc5q committed
			SegRegPrefix = false;
		}
		else {
			(void) SMP_strncat(OutString, "X86.", MAXSTR-1);
			(void) SMP_strncat(OutString, MDGetRegNumName(SegReg, RegSizes[SegReg]), MAXSTR-1);
	if (ScaleFactor > 0) {
		ScaleFactor = 1 << (ScaleFactor - 1);
		(void) SMP_snprintf(ScaleString, 4, "%d", ScaleFactor);
	}

	if (BaseReg != R_none) {
clc5q's avatar
clc5q committed
		if (SegRegPrefix) {
			(void) SMP_strncat(OutString, " + ", MAXSTR-1);
		}
		(void) SMP_strncat(OutString, "X86.", MAXSTR-1);
		(void) SMP_strncat(OutString, MDGetRegNumName(BaseReg, RegSizes[BaseReg]), MAXSTR-1);
		if (STARS_ISA_Bytewidth > RegSizes[BaseReg]) {
			++SubwordAddressRegCount;
		}
		if (IndexReg != R_none) {
			(void) SMP_strncat(OutString, " + ", MAXSTR-1);
clc5q's avatar
clc5q committed
			(void) SMP_strncat(OutString, "X86.", MAXSTR-1);
			(void) SMP_strncat(OutString, MDGetRegNumName(IndexReg, RegSizes[IndexReg]), MAXSTR-1);
			if (STARS_ISA_Bytewidth > RegSizes[IndexReg]) {
				++SubwordAddressRegCount;
			}
			if (ScaleFactor > 0) {
				(void) SMP_strncat(OutString, "*", MAXSTR-1);
				(void) SMP_strncat(OutString, ScaleString, MAXSTR-1);
			}
		}
	}
	else if (IndexReg != R_none) {
clc5q's avatar
clc5q committed
		if (SegRegPrefix) {
			(void) SMP_strncat(OutString, " + ", MAXSTR-1);
		}
		(void) SMP_strncat(OutString, "X86.", MAXSTR-1);
		(void) SMP_strncat(OutString, MDGetRegNumName(IndexReg, RegSizes[IndexReg]), MAXSTR - 1);
		if (STARS_ISA_Bytewidth > RegSizes[IndexReg]) {
			++SubwordAddressRegCount;
		}
		if (ScaleFactor > 0) {
			(void) SMP_strncat(OutString, "*", MAXSTR-1);
			(void) SMP_strncat(OutString, ScaleString, MAXSTR-1);
		}
	}
clc5q's avatar
clc5q committed
	else if (!SegRegPrefix) {
		SMP_msg("ERROR: No BaseReg, no IndexReg in SIB\n");
	}
clc5q's avatar
clc5q committed
	if (!HasOffset) // can close the parens around regs
		(void) SMP_strncat(OutString, ")", MAXSTR-1);
	SMP_fprintf(OutFile, " %s", OutString);
} // end SPARKAnnotPrintSIB()

// Debug: print one operand from an instruction or DEF or USE list.
void PrintOneOperand(STARSOpndTypePtr Opnd, uint32_t features, int OpNum) { 
	if ((nullptr != Opnd) && (!Opnd->IsVoidOp())) {
		PrintOperand(Opnd);
		PrintDefUse(features, OpNum);
	}
	return;
} // end of PrintOneOperand()

// Debug: print one operand.
void PrintOperand(STARSOpndTypePtr Opnd) { 
	if ((nullptr == Opnd) || (Opnd->IsVoidOp()))
	else if (Opnd->IsStaticMemOp()) {
		SMP_msg(" Operand: memory : addr: %lx", (unsigned long) Opnd->GetAddr());
		if (Opnd->HasSIBByte()) {
	else if (Opnd->IsMemNoDisplacementOp()) {
		if (Opnd->HasSIBByte()) { // has SIB info
			PrintSIB(Opnd);
		}
		else { // no SIB info
			uint16_t BaseReg = Opnd->GetReg();
		if (Opnd->GetAddr() != 0) {
			SMP_msg(" \n ERROR: addr for o_phrase type: %lx\n", (unsigned long) Opnd->GetAddr());
	else if (Opnd->IsMemDisplacementOp()) {
		ea_t offset = Opnd->GetAddr();
		int SignedOffset = (int) offset;
			SMP_msg(" displ %d", SignedOffset);
			uint16_t BaseReg = Opnd->GetReg();
			SMP_msg(" reg %s displ %d", RegNames[BaseReg], SignedOffset);