Skip to content
Snippets Groups Projects
SMPDataFlowAnalysis.cpp 215 KiB
Newer Older
jdh8d's avatar
jdh8d committed
/*
 * SMPDataFlowAnalysis.cpp - <see below>.
 *
 * Copyright (c) 2000, 2001, 2010 - University of Virginia 
 *
 * This file is part of the Memory Error Detection System (MEDS) infrastructure.
 * This file may be used and modified for non-commercial purposes as long as 
 * all copyright, permission, and nonwarranty notices are preserved.  
 * Redistribution is prohibited without prior written consent from the University 
 * of Virginia.
 *
 * Please contact the authors for restrictions applying to commercial use.
 *
 * THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
 * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 *
 * Author: University of Virginia
 * e-mail: jwd@virginia.com
 * URL   : http://www.cs.virginia.edu/
 *
 * Additional copyrights 2010, 2011, 2012, 2013, 2014, 2015 by Zephyr Software LLC
 * e-mail: {clc,jwd}@zephyr-software.com
 * URL   : http://www.zephyr-software.com/
 *
jdh8d's avatar
jdh8d committed
 */

clc5q's avatar
clc5q committed
//
// SMPDataFlowAnalysis.cpp
//
// This module contains common types an helper classes needed for the
clc5q's avatar
clc5q committed
//   SMP project (Software Memory Protection).
//

#include <list>
#include <set>
clc5q's avatar
clc5q committed
#include <vector>
#include <algorithm>
clc5q's avatar
clc5q committed
#include <cstring>
clc5q's avatar
clc5q committed

#include "interfaces/SMPDBInterface.h"
#include "base/SMPDataFlowAnalysis.h"
#include "base/SMPInstr.h"
#include "base/SMPBasicBlock.h"
#include "base/SMPFunction.h"

using namespace std;
clc5q's avatar
clc5q committed

// Set these to 1 for debugging output
clc5q's avatar
clc5q committed
#define SMP_DEBUG_CONTROLFLOW 0  // tells what processing stage is entered
#define SMP_DEBUG_CHUNKS 1  // tracking down tail chunks for functions
#define SMP_DEBUG_FRAMEFIXUP 0  // Fixing up stack frame info the way we want the offsets
#define SMP_DEBUG_OPERAND_TYPES 1  // leave on; warnings that should never happen
clc5q's avatar
clc5q committed
#define STARS_DEBUG_DUMP_IDENTIFY_HIDDEN_OPERANDS 0 // print HIDDEN if operand.showed() is false
#define MAX_IDA_REG STARS_x86_R_last
clc5q's avatar
clc5q committed

clc5q's avatar
clc5q committed
// return true if Item is in IntList
bool IsIntInList(const std::list<int> &IntList, int Item) {
	bool Found = false;
	for (list<int>::const_iterator ListIter = IntList.cbegin(); ListIter != IntList.cend(); ++ListIter) {
		if ((*ListIter) == Item) {
			Found = true;
			break;
		}
	}
	return Found;
}

// Bit masks for extracting bits from a STARSBitSet unsigned char.
const uint8_t STARSBitMasks[8] = { 1, 2, 4, 8, 16, 32, 64, 128 };
clc5q's avatar
clc5q committed
const char *RegNames[MAX_IDA_REG + 1] =
	{ "EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI",
	  "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15",
	  "AL", "CL", "DL", "BL", "AH", "CH", "DH", "BH",
	  "SPL", "BPL", "SIL", "DIL", "EIP", "ES", "CS", "SS",
	  "DS", "FS", "GS", "CF", "ZF", "SF", "OF", "PF",
	  "AF", "TF", "IF", "DF", "EFLAGS", "FPU_ST0", "FPU_ST1", "FPU_ST2",
	  "FPU_ST3", "FPU_ST4", "FPU_ST5", "FPU_ST6", "FPU_ST7", "FPU_CTRL", "FPU_STAT", "FPU_TAGS",
	  "MMX0", "MMX1", "MMX2", "MMX3", "MMX4", "MMX5", "MMX6", "MMX7",
	  "XMM0", "XMM1", "XMM2", "XMM3", "XMM4", "XMM5", "XMM6", "XMM7",
	  "XMM8", "XMM9", "XMM10", "XMM11", "XMM12", "XMM13", "XMM14", "XMM15",
	  "MXCSR",
	  "YMM0", "YMM1", "YMM2", "YMM3", "YMM4", "YMM5", "YMM6", "YMM7",
	  "YMM8", "YMM9", "YMM10", "YMM11", "YMM12", "YMM13", "YMM14", "YMM15",
	  "BND0", "BND1", "BND2", "BND3",
	  "XMM16", "XMM17", "XMM18", "XMM19", "XMM20", "XMM21", "XMM22", "XMM23",
	  "XMM24", "XMM25", "XMM26", "XMM27", "XMM28", "XMM29", "XMM30", "XMM31",
	  "YMM16", "YMM17", "YMM18", "YMM19", "YMM20", "YMM21", "YMM22", "YMM23",
	  "YMM24", "YMM25", "YMM26", "YMM27", "YMM28", "YMM29", "YMM30", "YMM31",
	  "XMM0", "XMM1", "XMM2", "XMM3", "XMM4", "XMM5", "XMM6", "XMM7",
	  "XMM8", "XMM9", "XMM10", "XMM11", "XMM12", "XMM13", "XMM14", "XMM15",
	  "ZMM16", "ZMM17", "ZMM18", "ZMM19", "ZMM20", "ZMM21", "ZMM22", "ZMM23",
	  "ZMM24", "ZMM25", "ZMM26", "ZMM27", "ZMM28", "ZMM29", "ZMM30", "ZMM31",
	  "K0", "K1", "K2", "K3", "K4", "K5", "K6", "K7",
// NOTE: Review these sizes. Alter when annotation diffs can be isolated to the change.
//   !!!!****!!!! FP reg stack should be 10-byte registers, right?
const unsigned char RegSizes[MAX_IDA_REG + 1] =
	{ 4, 4, 4, 4, 4, 4, 4, 4,
	  8, 8, 8, 8, 8, 8, 8, 8,
	  1, 1, 1, 1, 4, 2, 2, 2,
	  2, 2, 2, 4, 4, 4, 4, 4,
	  4, 4, 4, 4, 4, 10, 10, 10,
	  10, 10, 10, 10, 10, 4, 4, 4,
	  16, 16, 16, 16, 16, 16, 16, 16,
	  16, 16, 16, 16, 16, 16, 16, 16,
	  16, 16, 16, 16, 16, 16, 16, 16,
	  4,
	  32, 32, 32, 32, 32, 32, 32, 32,
	  32, 32, 32, 32, 32, 32, 32, 32,
unsigned char GetRegSize(STARS_regnum_t RegNum) {
	assert(RegNum != ((STARS_regnum_t) STARS_x86_R_none));
	return RegSizes[RegNum];
}

const char RegDtyps[MAX_IDA_REG + 1] =
{ STARS_dt_dword, STARS_dt_dword, STARS_dt_dword, STARS_dt_dword, STARS_dt_dword, STARS_dt_dword, STARS_dt_dword, STARS_dt_dword,
STARS_dt_qword, STARS_dt_qword, STARS_dt_qword, STARS_dt_qword, STARS_dt_qword, STARS_dt_qword, STARS_dt_qword, STARS_dt_qword,
STARS_dt_byte, STARS_dt_byte, STARS_dt_byte, STARS_dt_byte, STARS_dt_byte, STARS_dt_byte, STARS_dt_byte, STARS_dt_byte,
STARS_dt_byte, STARS_dt_byte, STARS_dt_byte, STARS_dt_byte, STARS_dt_dword, STARS_dt_dword, STARS_dt_dword, STARS_dt_dword,
STARS_dt_dword, STARS_dt_dword, STARS_dt_dword, STARS_dt_dword, STARS_dt_dword, STARS_dt_dword, STARS_dt_dword, STARS_dt_dword,
STARS_dt_dword, STARS_dt_dword, STARS_dt_dword, STARS_dt_dword, STARS_dt_dword, STARS_dt_tbyte, STARS_dt_tbyte, STARS_dt_tbyte,
STARS_dt_tbyte, STARS_dt_tbyte, STARS_dt_tbyte, STARS_dt_tbyte, STARS_dt_tbyte, STARS_dt_word, STARS_dt_word, STARS_dt_word,
STARS_dt_byte16, STARS_dt_byte16, STARS_dt_byte16, STARS_dt_byte16, STARS_dt_byte16, STARS_dt_byte16, STARS_dt_byte16, STARS_dt_byte16,
STARS_dt_byte16, STARS_dt_byte16, STARS_dt_byte16, STARS_dt_byte16, STARS_dt_byte16, STARS_dt_byte16, STARS_dt_byte16, STARS_dt_byte16,
STARS_dt_byte16, STARS_dt_byte16, STARS_dt_byte16, STARS_dt_byte16, STARS_dt_byte16, STARS_dt_byte16, STARS_dt_byte16, STARS_dt_byte16,
STARS_dt_word,
STARS_dt_byte32, STARS_dt_byte32, STARS_dt_byte32, STARS_dt_byte32, STARS_dt_byte32, STARS_dt_byte32, STARS_dt_byte32, STARS_dt_byte32,
STARS_dt_byte32, STARS_dt_byte32, STARS_dt_byte32, STARS_dt_byte32, STARS_dt_byte32, STARS_dt_byte32, STARS_dt_byte32, STARS_dt_byte32,
STARS_dt_word 
const char *ErrorStrings[1] = { "ERROR_REG" };

const char *WordRegStrings[8] = { "AX", "CX", "DX", "BX", "SP", "BP", "SI", "DI" };

const char *QWordRegStrings[8] = { "RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI" };
clc5q's avatar
clc5q committed
const char *QDWordRegStrings[8] = { "R8D", "R9D", "R10D", "R11D", "R12D", "R13D", "R14D", "R15D" };

const char *QWWordRegStrings[8] = { "R8W", "R9W", "R10W", "R11W", "R12W", "R13W", "R14W", "R15W" };

const char *QByteRegStrings[8] = { "R8L", "R9L", "R10L", "R11L", "R12L", "R13L", "R14L", "R15L" };

const char *SignednessStrings[4] = { "UNKNOWNSIGN", "SIGNED", "UNSIGNED", "UNKNOWNSIGN" };

const char *LeaSignednessStrings[4] = { "NOFLAGUNKNOWNSIGN", "NOFLAGSIGNED", "NOFLAGUNSIGNED", "NOFLAGUNKNOWNSIGN" };

const char *SPARKFloatingPointStackRegNames[8] = { "FloatingPointStackDummy", "FloatingPointStackDummy1", "FloatingPointStackDummy1", "FloatingPointStackDummy1",
"FloatingPointStackDummy1", "FloatingPointStackDummy1", "FloatingPointStackDummy1", "FloatingPointStackDummy1" };

const char *CFTTypeStrings[20] = { "FALL_THROUGH", "BRANCH_IF_THEN", "BRANCH_IF_THEN_ELSE", "JUMP_BEFORE_ELSE",
"LOOP_BACK", "LOOP_EXIT", "LOOP_CONTINUE", "JUMP_INTO_LOOP_TEST", "JUMP_TO_DEFAULT_CASE", "CASE_BREAK_TO_FOLLOW_NODE",
"JUMP_TO_SWITCH_INDIR_JUMP", "SHORT_CIRCUIT_BRANCH", "SHORT_CIRCUIT_LOOP_EXIT", "INVERTED_LOOP_EXIT", 
"INVERTED_LOOP_BACK", "SHORT_CIRCUIT_INVERTED_LOOP_EXIT", "", "", "", ""
// Distinguishes subword regs from their parent regs
const char *MDGetRegNumName(STARS_regnum_t RegNum, uint16_t ByteWidth) {
	if ((STARS_x86_R_none == RegNum) || (MAX_IDA_REG < RegNum))
		return ErrorStrings[0];
	else if ((ByteWidth == 2) && (RegNum >= STARS_x86_R_ax) && (RegNum <= STARS_x86_R_di)) {
		return WordRegStrings[RegNum];
	else if ((ByteWidth == 8) && (RegNum >= STARS_x86_R_ax) && (RegNum <= STARS_x86_R_di)) {
		return QWordRegStrings[RegNum];
clc5q's avatar
clc5q committed
	else if ((ByteWidth < 8) && (RegNum >= STARS_x86_R_r8) && (RegNum <= STARS_x86_R_r15)) {
		if (ByteWidth == 4)
			return QDWordRegStrings[RegNum - STARS_x86_R_r8];
		else if (ByteWidth == 2)
			return QWWordRegStrings[RegNum - STARS_x86_R_r8];
		else if (ByteWidth == 1)
			return QByteRegStrings[RegNum - STARS_x86_R_r8];
		else
			return ErrorStrings[0];
	}
clc5q's avatar
clc5q committed
} // end of MDGetRegNumName()
// Distinguishes subword regs from their parent regs, uses SPARK dummy names for FP stack.
const char *MDGetSPARKRegNumName(STARS_regnum_t RegNum, uint16_t ByteWidth) {
	if ((RegNum >= STARS_x86_R_st0) && (RegNum <= STARS_x86_R_st7))
		return SPARKFloatingPointStackRegNames[RegNum - STARS_x86_R_st0];
	else
		return MDGetRegNumName(RegNum, ByteWidth);
}

// Distinguishes subword regs from their parent regs
const char *MDGetRegName(const STARSOpndTypePtr &RegOp) {
	if (!(RegOp->IsRegOp() || RegOp->IsFloatingPointRegOp()))
	STARS_regnum_t RegNum = RegOp->GetReg();
	uint16_t ByteWidth = RegOp->GetByteWidth();
	return MDGetRegNumName(RegNum, ByteWidth);
}
clc5q's avatar
clc5q committed
// Define instruction categories for data flow analysis.
// Define instruction categories for data type analysis.
int SMPTypeCategory[STARS_NN_last+1];
clc5q's avatar
clc5q committed

// Define which instructions define and use the CPU flags.
bool SMPDefsFlags[STARS_NN_last + 1];
bool SMPUsesFlags[STARS_NN_last + 1];
// print to log file using SMP_msg()
void DumpDataFlowType(const SMPitype FlowType) {
	switch (FlowType) {
	case DEFAULT:
		SMP_msg("DEFAULT\n");
		break;
	case LABEL:
		SMP_msg("LABEL\n");
		break;
	case CASE:
		SMP_msg("CASE\n");
		break;
	case JUMP:
		SMP_msg("JUMP\n");
		break;
	case COND_BRANCH:
		SMP_msg("COND_BRANCH\n");
		break;
	case INDIR_JUMP:
		SMP_msg("INDIR_JUMP\n");
		break;
	case CALL:
		SMP_msg("CALL\n");
		break;
	case INDIR_CALL:
		SMP_msg("INDIR_CALL\n");
		break;
	case RETURN:
		SMP_msg("RETURN\n");
		break;
	case HALT:
		SMP_msg("HALT\n");
		break;
	default:
		SMP_msg("ERROR\n");
		break;
	}
	return;
} // end of DumpDataFlowType()

// Hash a global name and SSA number into an int, for use in SMPFunction.GlobalDefAddrBySSA map
int HashGlobalNameAndSSA(const STARSOpndTypePtr &DefOp, int SSANum) { 
	int HashValue = 0;
	if (DefOp->IsRegOp()) {
		HashValue = ((SSANum << 16) | ((int)(DefOp->GetReg())));
	}
	return HashValue;
}

// Hash a global name and SSA number into an int, for use in SMPFunction.GlobalDefAddrBySSA map
int64_t HashGlobalStackNameAndSSA(const STARSOpndTypePtr &DefOp, int SSANum, bool UseFP) {
	int64_t HashValue = 0;
	assert(MDIsDirectStackAccessOpnd(DefOp, UseFP));
	HashValue = ((((int64_t) SSANum) << 32) | (((uint64_t)(DefOp->GetAddr())) & 0xffffffff));
	return HashValue;
// Get the size in bytes of the data type of an operand.
size_t GetOpDataSize(const STARSOpndTypePtr &DataOp) {
	size_t DataSize;
	char OpDtyp = DataOp->GetOpDtyp();
	if (DataOp->IsRegOp()) {
		DataSize = RegSizes[DataOp->GetReg()];
			SMP_msg("Found 16-bit register using dtyp field.\n");
#endif
		}
		else if (OpDtyp == STARS_dt_qword) {
			DataSize = 8;
#if 0
			SMP_msg("Found 64-bit register using dtyp field.\n");
			DataSize = 1;
			break;
			DataSize = 2;
			break;
		case STARS_dt_dword:
		case STARS_dt_float:
		case STARS_dt_code:
		case STARS_dt_unicode:
		case STARS_dt_string:
			DataSize = 4;
			break;
		case STARS_dt_double:
		case STARS_dt_qword:
			DataSize = 8;
			break;
			DataSize = 12;
			break;
		case STARS_dt_byte16:
		case STARS_dt_ldbl:
			DataSize = 16;
			break;
			DataSize = 6;
			break;
			DataSize = 3;
			break;
#if (IDA_SDK_VERSION >= 700)
		case STARS_dt_half:      // 2-byte floating point
			DataSize = 16;
			break;
#endif
			SMP_msg("ERROR: unexpected data type %d in GetOpDataSize() :", OpDtyp);
			PrintOperand(DataOp);
			DataSize = global_STARS_program->GetSTARS_ISA_dtyp();
			break;
	}
	return DataSize;
} // end of GetOpDataSize()

// Get the IDA Pro register size (dtyp) field
char GetRegDtyp(STARS_regnum_t RegNum, bool Has64BitOpnds) {
	assert(RegNum != ((STARS_regnum_t) STARS_x86_R_none));
	assert(RegNum < MAX_IDA_REG);
	char RegDtyp = RegDtyps[RegNum];
	if ((global_STARS_program->GetSTARS_ISA_Bytewidth() == 8) && Has64BitOpnds && (RegDtyp == STARS_dt_dword) && (RegNum <= STARS_x86_R_ip)) {
		// 32-bit IDA general regs are 64-bit for x86-64
// Return one of the bit width masks for the current operand.
//  Pass in DataSize in bytes if known, else pass in DataSize = 0.
unsigned short ComputeOperandBitWidthMask(const STARSOpndTypePtr &CurrOp, size_t DataSize) {
	unsigned short BitWidthMask = 32;
	if (4 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_32;
	else if (8 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_64;
	else if (1 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_8;
	else if (2 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_16;
	else if (16 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_128;
	else if (3 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_24;
	else if (6 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_48;
	else if (10 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_80;
	else if (12 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_96;
	else if (32 == DataSize)
		BitWidthMask = FG_MASK_BITWIDTH_256;
	else {
		SMP_msg("ERROR: Unknown DataSize: %zu bytes ", DataSize);
	}
	return BitWidthMask;
} // end of ComputeOperandBitWidthMask()

// Compute largest bit width from a SignMiscInfo bit mask.
size_t LargestBitWidthFromMask(unsigned short WidthTypeInfo) {
	unsigned short BitWidthMask = WidthTypeInfo & FG_MASK_BITWIDTH_FIELDS;
	size_t LargestWidth = 0;

	// Go from highest bit width to lowest.
	if (BitWidthMask & FG_MASK_BITWIDTH_256)
		LargestWidth = 256;
	else if (BitWidthMask & FG_MASK_BITWIDTH_128)
		LargestWidth = 128;
	else if (BitWidthMask & FG_MASK_BITWIDTH_96)
		LargestWidth = 96;
	else if (BitWidthMask & FG_MASK_BITWIDTH_64)
		LargestWidth = 64;
	else if (BitWidthMask & FG_MASK_BITWIDTH_48)
		LargestWidth = 48;
	else if (BitWidthMask & FG_MASK_BITWIDTH_32)
		LargestWidth = 32;
	else if (BitWidthMask & FG_MASK_BITWIDTH_24)
		LargestWidth = 24;
	else if (BitWidthMask & FG_MASK_BITWIDTH_16)
		LargestWidth = 16;
	else if (BitWidthMask & FG_MASK_BITWIDTH_8)
		LargestWidth = 8;

	return LargestWidth;
} // end of LargestBitWidthFromMask()

// Is CurrOp a general purpose register? (not flags, instruction pointer, non-integer reg, etc.)
bool MDIsGeneralPurposeReg(const STARSOpndTypePtr &CurrOp) {
	bool success = (nullptr != CurrOp);
	if (success) {
		// intel.hpp defines two ranges that are general purpose regs in enum RegNo.
		STARS_regnum_t CurrReg = CurrOp->GetReg();
		success = (CurrOp->IsRegOp() && ((CurrReg >= STARS_x86_R_ax) && (CurrReg <= STARS_x86_R_dil)));
bool IsEqOp(const STARSOpndTypePtr &Opnd1, const STARSOpndTypePtr &Opnd2) 
{
	if ((nullptr == Opnd1) && (nullptr == Opnd2))
		return true;
	if ((nullptr == Opnd1) || (nullptr == Opnd2))
		return false;
	// this expression is logically equiv. to "equal"
	//
	// truth table:
	//  
	//   O1 O2  O1<O2 !(O1<O2) O2<O1 !(O2<O1)  !(O1<O2)&&!(O2<O1) ==
		//    0  0      0        1     0        1                   1  1
		//    0  1      1        0     0        0                   0  0
		//    1  0      0        1     1        0                   0  0
		//    1  1      0        1     0        0                   1  1
	return !(*Opnd1 < *Opnd2) && !(*Opnd2 < *Opnd1);
// Are operands equal, ignoring bitwidth differences for register operands?
bool IsEqOpIgnoreBitwidth(const STARSOpndTypePtr &Opnd1, const STARSOpndTypePtr &Opnd2) {
		if (Opnd1->GetOpType() != Opnd2->GetOpType())
			return (MDCanonicalizeSubReg(Opnd1->GetReg()) == MDCanonicalizeSubReg(Opnd2->GetReg())); // no concern for subword regs; AX == EAX == RAX
		else
			return IsEqOp(Opnd1, Opnd2);
} // end of function IsEqOpIgnoreBitwidth()
// Are operands equal, ignoring value differences for immediate operands?
bool IsEqOpIgnoreImmedValues(const STARSOpndTypePtr &Opnd1, const STARSOpndTypePtr &Opnd2) {
	if (nullptr == Opnd1)
		return (nullptr == Opnd2);

	if (nullptr == Opnd2)
		return (nullptr == Opnd1);

	if (Opnd1->GetOpType() != Opnd2->GetOpType())
		return false;

	if (Opnd1->IsImmedOp())
		return true;
	else
		return IsEqOp(Opnd1, Opnd2);

} // end of function IsEqOpIgnoreImmedValues()


clc5q's avatar
clc5q committed
// We need to make subword registers equal to their containing registers when we
//  do comparisons, so that we will realize that register EAX is killed by a prior DEF
//  of register AL, for example, and vice versa. To keep sets ordered strictly,
//  we also have to make AL and AH be equal to each other as well as equal to EAX.
bool MDLessReg(const STARS_regnum_t Reg1, const STARS_regnum_t Reg2) {
	STARS_regnum_t SReg1 = MDCanonicalizeSubReg(Reg1);
	STARS_regnum_t SReg2 = MDCanonicalizeSubReg(Reg2);
	return (SReg1 < SReg2);
} // end of MDLessReg()

bool MDEqReg(const STARS_regnum_t Reg1, const STARS_regnum_t Reg2) {
	STARS_regnum_t SReg1 = MDCanonicalizeSubReg(Reg1);
	STARS_regnum_t SReg2 = MDCanonicalizeSubReg(Reg2);
	return (SReg1 == SReg2);
} // end of MDEqReg()

bool MDLessRegOpnd(const STARSOpndTypePtr &RegOp1, const STARSOpndTypePtr &RegOp2) {
	STARS_regnum_t SReg1 = MDCanonicalizeSubReg(RegOp1->GetReg());
	STARS_regnum_t SReg2 = MDCanonicalizeSubReg(RegOp2->GetReg());
	return ((SReg1 < SReg2) || ((SReg1 == SReg2) && (RegOp1->GetByteWidth() < RegOp2->GetByteWidth())));
STARS_regnum_t MDCanonicalizeSubReg(const STARS_regnum_t Reg1) {
	bool Subword = ((Reg1 >= FIRST_X86_SUBWORD_REG) && (Reg1 <= LAST_X86_SUBWORD_REG));
	STARS_regnum_t SReg1 = Reg1;
		// See enumeration RegNo in intel.hpp.
		switch (SReg1) {
			case STARS_x86_R_al:
			case STARS_x86_R_ah:
				SReg1 = STARS_x86_R_ax;
				break;
			case STARS_x86_R_cl:
			case STARS_x86_R_ch:
				SReg1 = STARS_x86_R_cx;
				break;
			case STARS_x86_R_dl:
			case STARS_x86_R_dh:
				SReg1 = STARS_x86_R_dx;
				break;
			case STARS_x86_R_bl:
			case STARS_x86_R_bh:
				SReg1 = STARS_x86_R_bx;
				break;
			case STARS_x86_R_spl:
				SReg1 = STARS_x86_R_sp;
				break;
			case STARS_x86_R_bpl:
				SReg1 = STARS_x86_R_bp;
				break;
			case STARS_x86_R_sil:
				SReg1 = STARS_x86_R_si;
				break;
			case STARS_x86_R_dil:
				SReg1 = STARS_x86_R_di;
				break;
			default:
				assert(false);
		} // end switch (SReg1)
	return SReg1;
} // end of MDCanonicalizeSubReg()

#if 0
// If TempOp is a register, call MDCanonicalizeSubReg() on it.
void CanonicalizeOpnd(STARSOpndTypePtr &TempOp) {
	if (TempOp->IsRegOp()) {
		STARS_regnum_t NewReg = MDCanonicalizeSubReg(TempOp->GetReg());
		if (TempOp->GetReg() != NewReg) {
			TempOp->SetReg(NewReg);
// If TempOp is a register, call MDCanonicalizeSubReg() on it.
void CanonicalizeOpnd(STARSOpndTypePtr &TempOp) {
	if ((nullptr != TempOp) && TempOp->IsRegOp()) {
			TempOp->SetReg(MDCanonicalizeSubReg(TempOp->GetReg()));
#else
		STARS_regnum_t Reg1 = TempOp->GetReg();
		bool Subword = ((Reg1 >= FIRST_X86_SUBWORD_REG) && (Reg1 <= LAST_X86_SUBWORD_REG));
		if (Subword) {
			TempOp->SetReg(MDCanonicalizeSubReg(TempOp->GetReg()));
		}
#endif
		// Convert 32-bit regs to 64-bit on 64-bit binaries.
		uint16_t CanonicalByteWidth = global_STARS_program->GetSTARS_ISA_Bytewidth();
		if (TempOp->GetByteWidth() < CanonicalByteWidth) {
			TempOp->SetByteWidth(CanonicalByteWidth);
		}
bool MDIsStackOrFramePointerReg(const STARSOpndTypePtr &RegOp, bool UseFP) {
	if ((nullptr != RegOp) && RegOp->IsRegOp()) {
		PtrReg = RegOp->MatchesReg(MD_STACK_POINTER_REG) || (UseFP && RegOp->MatchesReg(MD_FRAME_POINTER_REG));
clc5q's avatar
clc5q committed
// In SSA computations, we are storing the GlobalNames index into the op_t fields
//  n, offb, and offo. This function extracts an unsigned int from these three 8-bit
//  fields.
unsigned int ExtractGlobalIndex(const STARSOpndTypePtr &GlobalOp) {
	return GlobalOp->GetOpGlobalIndex();
void SetGlobalIndex(STARSOpndTypePtr TempOp, size_t index) {
	TempOp->SetOpGlobalIndex(index);
int MD_STARS_sib_base(const STARSOpndTypePtr &x) {                    // get extended sib base
short MD_STARS_sib_index(const STARSOpndTypePtr &x) {                  // get extended sib index
// Return true if CurrOp could be an indirect memory reference.
bool MDIsIndirectMemoryOpnd(const STARSOpndTypePtr &CurrOp, bool UseFP) {
	if ((nullptr == CurrOp) || (! CurrOp->IsMemOp()))
		STARS_RegNo BaseReg = (STARS_RegNo) MD_STARS_sib_base(CurrOp);
		STARS_RegNo IndexReg = (STARS_RegNo) MD_STARS_sib_index(CurrOp);
		if ((STARS_x86_R_none != IndexReg) && (MD_STACK_POINTER_REG != IndexReg)) { 
			if ((MD_FRAME_POINTER_REG == IndexReg) && UseFP && (!CurrOp->IsStaticMemOp()))
		if (0 != CurrOp->GetSIBScaleFactor())
		if (!indirect && (STARS_x86_R_none != BaseReg)) {
			if ((BaseReg == MD_FRAME_POINTER_REG) && CurrOp->IsStaticMemOp()) {
			else if ((BaseReg == MD_FRAME_POINTER_REG) && UseFP) 
				;  // EBP used as frame pointer for direct access
			else if (BaseReg == MD_STACK_POINTER_REG)
				;  // ESP used as stack pointer for direct access
			else
				indirect = true; // conservative; some register used for addressing
								// other than a stack or frame pointer
		}
	} // end if hasSIB
	else { // no SIB; can have base register only
		STARS_RegNo BaseReg = (STARS_RegNo) CurrOp->GetReg();
		if (CurrOp->IsStaticMemOp()) { // no base register for o_mem
			if (!((0 == BaseReg) || (MD_FRAME_POINTER_REG == BaseReg))) {
				SMP_msg("ERROR: o_mem base reg %d ignored \n", BaseReg);
		else if ((BaseReg == MD_FRAME_POINTER_REG) && UseFP) 
			;  // EBP used as frame pointer for direct access
		else if (BaseReg == MD_STACK_POINTER_REG)
			;  // ESP used as stack pointer for direct access
		else {
			indirect = true;
		}
	}

	return indirect;
} // end MDIsIndirectMemoryOpnd()

// Extract the base and index registers and scale factor and displacement from the
//  memory operand.
void MDExtractAddressFields(const STARSOpndTypePtr &MemOp, int &BaseReg, int &IndexReg, uint16_t &Scale, STARS_ea_t &Offset) {
	BaseReg = STARS_x86_R_none;
	IndexReg = STARS_x86_R_none;
		BaseReg = MD_STARS_sib_base(MemOp);
		IndexReg = (int) MD_STARS_sib_index(MemOp);
		if (MD_STACK_POINTER_REG == IndexReg) // signifies no index register
			IndexReg = STARS_x86_R_none;
		if (STARS_x86_R_none != IndexReg) {
			Scale = (uint16_t) MemOp->GetSIBScaleFactor();
		if (STARS_x86_R_none != BaseReg) {
				// Only IndexReg is allowed for o_mem with SIB byte
			}
		}
	}
	else { // no SIB byte; can have base reg but no index reg or scale factor
		BaseReg = (int) MemOp->GetReg();  // cannot be STARS_x86_R_none for no SIB case
		if (MemOp->IsStaticMemOp()) {
			BaseReg = STARS_x86_R_none; // no Base register for o_mem operands
// MACHINE DEPENDENT: Does MemOp have both a base reg and an index reg, or a scaled index reg?
bool MDIsIndexedMemoryAccess(const STARSOpndTypePtr &MemOp) {
	bool EIPRelativeAccess = (MemOp->HasSegReg() && (STARS_x86_R_cs == MemOp->GetSegReg()));
	if (!EIPRelativeAccess) {
		EIPRelativeAccess = (MemOp->GetReg() == STARS_x86_R_ip);
	}
	if (EIPRelativeAccess)
		return true;

	int BaseReg, IndexReg;
	uint16_t Scale;
	STARS_ea_t Offset;
	MDExtractAddressFields(MemOp, BaseReg, IndexReg, Scale, Offset);
	return ((Scale != 0) || ((BaseReg != STARS_x86_R_none) && (IndexReg != STARS_x86_R_none)));
}


// Is CurrOp a memory operand?
bool IsMemOperand(const STARSOpndTypePtr &CurrOp) {
	return ((nullptr != CurrOp) && CurrOp->IsMemOp());
}

// MACHINE DEPENDENT: Is CurrOp the flags register?
bool MDIsFlagsReg(const STARSOpndTypePtr &CurrOp) {
	return ((nullptr != CurrOp) && CurrOp->MatchesReg(X86_FLAGS_REG));
// MACHINE DEPENDENT: Is register a stack pointer or frame pointer?
bool MDIsStackPtrReg(int RegNumber, bool UseFP) {
	return ((RegNumber == MD_STACK_POINTER_REG) || (UseFP && (RegNumber == MD_FRAME_POINTER_REG)));
// MACHINE DEPENDENT: Is operand a stack memory access?
bool MDIsStackAccessOpnd(const STARSOpndTypePtr &CurrOp, bool UseFP) {
	if ((nullptr == CurrOp) || ((!CurrOp->IsMemDisplacementOp()) && (!CurrOp->IsMemNoDisplacementOp()))) {
		return false;
	}

	MDExtractAddressFields(CurrOp, BaseReg, IndexReg, ScaleFactor, offset);
} // end of MDIsStackAccessOpnd()

// MACHINE DEPENDENT: Is operand a direct stack memory access?
bool MDIsDirectStackAccessOpnd(const STARSOpndTypePtr &CurrOp, bool UseFP) {
	int BaseReg;
	int IndexReg;
	if ((nullptr == CurrOp) || ((! CurrOp->IsMemDisplacementOp()) && (! CurrOp->IsMemNoDisplacementOp()))) {
		return false;
	}

	MDExtractAddressFields(CurrOp, BaseReg, IndexReg, ScaleFactor, offset);
	// When the IndexReg is R_none, access is direct.
	return (MDIsStackPtrReg(BaseReg, UseFP) && (IndexReg == STARS_x86_R_none));
} // end of MDIsDirectStackAccessOpnd()

// MACHINE DEPENDENT: Is operand trackable in data flow analyses (i.e. a direct stack memory access or a register?)
bool MDIsDataFlowOpnd(const STARSOpndTypePtr &CurrOp, bool UseFP) {
		&& ((CurrOp->IsRegOp() && ((STARS_x86_R_ip > CurrOp->GetReg()) || (STARS_x86_R_gs < CurrOp->GetReg()))) || MDIsDirectStackAccessOpnd(CurrOp, UseFP)));
// MACHINE DEPENDENT: Is operand a caller-saved register?
bool MDIsCallerSavedReg(const STARSOpndTypePtr &CurrOp) {
		return false;
	STARS_regnum_t CurrReg = MDCanonicalizeSubReg(CurrOp->GetReg());
	return ((STARS_x86_R_ax == CurrReg) || (STARS_x86_R_cx == CurrReg) || (STARS_x86_R_dx == CurrReg));
} // end of MDIsCallerSavedReg()
// If CurrOp would change when reg is canonicalized, then return CurrOp->clone(), else return CurrOp;
STARSOpndTypePtr CloneIfSubwordReg(const STARSOpndTypePtr &CurrOp) {
	if ((nullptr != CurrOp) && CurrOp->IsRegOp()) {
		if (global_STARS_program->GetSTARS_ISA_Bytewidth() > CurrOp->GetByteWidth()) {
			return CurrOp->clone();
		}
		else {
			return CurrOp;
		}
	}
	else {
		return CurrOp;
	}
} // end of CloneIfSubwordReg()

// If CurrOp would change when Canonicalized or stack-normalized, then return CurrOp->clone(), else return CurrOp;
STARSOpndTypePtr CloneIfNecessary(const STARSOpndTypePtr &CurrOp, bool UseFP) {
	if (MDIsStackAccessOpnd(CurrOp, UseFP)) {
		return CurrOp->clone();
	}
	else {
		return CloneIfSubwordReg(CurrOp);
	}
} // end of CloneIfNecessary()

// DEBUG Print DEF and/or USE for an operand.
void PrintDefUse(unsigned long feature, int OpNum) {
	// CF_ macros number the operands from 1 to 6, while OpNum
	//  is a 0 to 5 index into the insn_t.Operands[] array.
	// OpNum == -1 is a signal that this is a DEF or USE or VarKillSet etc.
	//  operand and not an instruction operand.
	if (-1 == OpNum)
		return;
	switch (OpNum) {
		case 0:
			break;
	}
	return;
} // end PrintDefUse()

// DEBUG print SIB info for an operand.
void PrintSIB(const STARSOpndTypePtr &Opnd) {
jdh8d's avatar
jdh8d committed
	int BaseReg;
	int IndexReg;
	uint16_t ScaleFactor;
	STARS_ea_t offset;
#define NAME_LEN 5
	char BaseName[NAME_LEN] = {'N', 'o', 'n', 'e', '\0'};
	char IndexName[NAME_LEN] = {'N', 'o', 'n', 'e', '\0'};

	MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, offset);

	if (BaseReg != STARS_x86_R_none)
		SMP_strncpy(BaseName, RegNames[BaseReg], NAME_LEN - 1);

	if (IndexReg != STARS_x86_R_none) {
		SMP_strncpy(IndexName, RegNames[IndexReg], NAME_LEN -1);
	}
	SMP_msg(" Base %s Index %s Scale %d Flag4 %d", BaseName, IndexName, ScaleFactor, Opnd->GetSpecFlag4());
// Annotations: concisely print SIB info for an operand.
void AnnotPrintSIB(const STARSOpndTypePtr &Opnd, bool HasOffset, FILE *OutFile, char OutString[STARS_MAXSTR], bool Has64BitAddressing) {
	uint16_t ByteWidth = Opnd->GetByteWidth();
		ByteWidth = 8;

	SMP_strncat(OutString, "[", STARS_MAXSTR - 1);

	MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, offset);

	if (ScaleFactor > 0) {
		(void) SMP_snprintf(ScaleString, 4, "%d", ScaleFactor);
	if (BaseReg != STARS_x86_R_none) {
		(void) SMP_strncat(OutString, MDGetRegNumName((STARS_regnum_t) BaseReg, ByteWidth), STARS_MAXSTR - 1);
		if (IndexReg != STARS_x86_R_none) {
			(void) SMP_strncat(OutString, "+", STARS_MAXSTR - 1);
			(void) SMP_strncat(OutString, MDGetRegNumName((STARS_regnum_t) IndexReg, ByteWidth), STARS_MAXSTR - 1);
				(void) SMP_strncat(OutString, "*", STARS_MAXSTR - 1);
				(void) SMP_strncat(OutString, ScaleString, STARS_MAXSTR - 1);
	else if (IndexReg != STARS_x86_R_none) {
		(void) SMP_strncat(OutString, MDGetRegNumName((STARS_regnum_t) IndexReg, ByteWidth), STARS_MAXSTR - 1);
			(void) SMP_strncat(OutString, "*", STARS_MAXSTR - 1);
			(void) SMP_strncat(OutString, ScaleString, STARS_MAXSTR - 1);
		SMP_msg("ERROR: No BaseReg, no IndexReg in SIB\n");
	if (!HasOffset) // can close the brackets around regs
		(void) SMP_strncat(OutString, "]", STARS_MAXSTR - 1);
		
// Annotations: concisely print SIB info for an operand to OutString.
void StringPrintSIB(const STARSOpndTypePtr &Opnd, bool HasOffset, char *OutString, bool Has64BitAddressing) {
	int BaseReg;
	int IndexReg;
	uint16_t ScaleFactor;
	uint16_t ByteWidth = Opnd->GetByteWidth();
	STARS_ea_t offset;
	char ScaleString[4];

	if (Has64BitAddressing)
		ByteWidth = 8;

	SMP_strncat(OutString, "[", STARS_MAXSTR - 1);

	MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, offset);

	if (ScaleFactor > 0) {
		ScaleFactor = 1 << ScaleFactor;
		(void) SMP_snprintf(ScaleString, 4, "%d", ScaleFactor);
	}

	if (BaseReg != STARS_x86_R_none) {
		(void) SMP_strncat(OutString, MDGetRegNumName((STARS_regnum_t) BaseReg, ByteWidth), STARS_MAXSTR - 1);
		if (IndexReg != STARS_x86_R_none) {
			(void) SMP_strncat(OutString, "+", STARS_MAXSTR - 1);
			(void) SMP_strncat(OutString, MDGetRegNumName((STARS_regnum_t) IndexReg, ByteWidth), STARS_MAXSTR - 1);
			if (ScaleFactor > 0) {
				(void) SMP_strncat(OutString, "*", STARS_MAXSTR - 1);
				(void) SMP_strncat(OutString, ScaleString, STARS_MAXSTR - 1);
			}
		}
	}
	else if (IndexReg != STARS_x86_R_none) {
		(void) SMP_strncat(OutString, MDGetRegNumName((STARS_regnum_t) IndexReg, ByteWidth), STARS_MAXSTR - 1);
		if (ScaleFactor > 0) {
			(void) SMP_strncat(OutString, "*", STARS_MAXSTR - 1);
			(void) SMP_strncat(OutString, ScaleString, STARS_MAXSTR - 1);
		}
	}
	else {
		SMP_msg("ERROR: No BaseReg, no IndexReg in SIB\n");
	}
	if (!HasOffset) // can close the brackets around regs
		(void) SMP_strncat(OutString, "]", STARS_MAXSTR - 1);
	return;
} // end StringPrintSIB()
// Annotations: concisely print SIB info for an operand.
void SPARKAnnotPrintSIB(const STARSOpndTypePtr &Opnd, bool HasOffset, FILE *OutFile, STARS_regnum_t SegReg, bool UseFP, bool Has64BitAddressing, bool UseSavedStackPtr) {
#if 1
	std::string OutString;
	SPARKAnnotSIBToString(Opnd, HasOffset, OutString, SegReg, UseFP, Has64BitAddressing, UseSavedStackPtr);
	SMP_fprintf(OutFile, " %s", OutString.c_str());
#else
	uint16_t ByteWidth = Opnd->GetByteWidth();
	char OutString[STARS_MAXSTR] = {'(', '\0'};
	MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, offset);

	bool SegRegPrefix = STARS_x86_is_segreg((int) SegReg);
clc5q's avatar
clc5q committed
	if (SegRegPrefix) {
		// Emit segment register string unless it is just the stack segment plus a stack operand,
		//  where the stack segment is implied anyway.
		if ((SegReg == STARS_x86_R_ss) && MDIsStackAccessOpnd(Opnd, UseFP)) {