Skip to content
Snippets Groups Projects
SMPDataFlowAnalysis.cpp 61.5 KiB
Newer Older
clc5q's avatar
clc5q committed
//
// SMPDataFlowAnalysis.cpp
//
// This module performs the fundamental data flow analyses needed for the
//   SMP project (Software Memory Protection).
//

#include <vector>

#include <pro.h>
#include <ida.hpp>
#include <idp.hpp>
#include <allins.hpp>
#include <auto.hpp>
#include <bytes.hpp>
#include <funcs.hpp>
#include <intel.hpp>
#include <loader.hpp>
#include <lines.hpp>
#include <name.hpp>

#include "SMPDataFlowAnalysis.h"
#include "SMPStaticAnalyzer.h"

// Set to 1 for debugging output
#define SMP_DEBUG 1
#define SMP_DEBUG2 0   // verbose
#define SMP_DEBUG3 0   // verbose
#define SMP_DEBUG_CONTROLFLOW 0  // tells what processing stage is entered
#define SMP_DEBUG_XOR 0
#define SMP_DEBUG_CHUNKS 1  // tracking down tail chunks for functions
#define SMP_DEBUG_FRAMEFIXUP 0
clc5q's avatar
clc5q committed

// Used for binary search by function number in SMPStaticAnalyzer.cpp
//  to trigger debugging output and find which instruction in which
//  function is causing a crash.
bool SMPBinaryDebug = false;

// Define instruction categories for data flow analysis.
static SMPitype DFACategory[NN_last+1];

static char *RegNames[R_of + 1] =
	{ "EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI",
	  "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15",
	  "AL", "CL", "DL", "BL", "AH", "CH", "DH", "BH",
	  "SPL", "BPL", "SIL", "DIL", "EIP", "ES", "CS", "SS",
	  "DS", "FS", "GS", "CF", "ZF", "SF", "OF" 
	};

// Make the CF_CHG1 .. CF_CHG6 and CF_USE1..CF_USE6 macros more usable
//  by allowing us to pick them up with an array index.
static ulong DefMacros[UA_MAXOP] = {CF_CHG1, CF_CHG2, CF_CHG3, CF_CHG4, CF_CHG5, CF_CHG6};
static ulong UseMacros[UA_MAXOP] = {CF_USE1, CF_USE2, CF_USE3, CF_USE4, CF_USE5, CF_USE6};

// Text to be printed in each optimizing annotation explaining why
//  the annotation was emitted.
static char *OptExplanation[LAST_OPT_CATEGORY + 1] =
	{ "NoOpt", "NoMetaUpdate", "AlwaysNUM", "NUMVia2ndSrcIMMEDNUM",
	  "Always1stSrc", "1stSrcVia2ndSrcIMMEDNUM", "AlwaysPtr",
	  "AlwaysNUM", "AlwaysNUM", "NUMViaFPRegDest"
	};

// *****************************************************************
// Class DefOrUseList
// *****************************************************************

// Default constructor.
DefOrUseList::DefOrUseList(void) {
	return;
}

// Set a Def or Use into the list, along with its type.
void DefOrUseList::SetRef(op_t Ref, SMPOperandType Type) {
	this->Refs.push_back(Ref);
	this->Types.push_back(Type);
	return;
}

// Get a reference by index.
op_t DefOrUseList::GetRef(size_t index) const {
	return Refs[index];
}

SMPOperandType DefOrUseList::GetRefType(size_t index) const {
	return Types[index];
}

// *****************************************************************
// Class SMPInstr
// *****************************************************************

// Constructor for instruction.
SMPInstr::SMPInstr(ea_t addr) {
	this->address = addr;
	this->analyzed = false;
	this->JumpTarget = false;
	return;
}

// Is the instruction the type that terminates a basic block?
bool SMPInstr::IsBasicBlockTerminator() const {
	return ((type == JUMP) || (type == COND_BRANCH)
			|| (type == INDIR_JUMP) || (type == RETURN));
}

// Is the destination operand a memory reference?
bool SMPInstr::HasDestMemoryOperand(void) const {
	bool MemDest = false;
	for (size_t index = 0; index < Defs.GetSize(); ++index) {
		MemDest = ((Defs.GetRef(index).type == o_mem)
			|| (Defs.GetRef(index).type == o_phrase)
			|| (Defs.GetRef(index).type == o_displ));
		if (MemDest)
			break;
	}
	return MemDest;
} // end of SMPInstr::HasDestMemoryOperand()

// Is the destination operand a memory reference?
bool SMPInstr::HasSourceMemoryOperand(void) const {
	bool MemSrc = false;
	for (size_t index = 0; index < Uses.GetSize(); ++index) {
		MemSrc = ((Uses.GetRef(index).type == o_mem)
			|| (Uses.GetRef(index).type == o_phrase)
			|| (Uses.GetRef(index).type == o_displ));
		if (MemSrc)
			break;
	}
	return MemSrc;
} // end of SMPInstr::HasSourceMemoryOperand()

// Does the instruction whose flags are in F have a numeric type
//   as the second source operand?
// NOTE: We can only analyze immediate values now, using a heuristic
//   that values in the range +/- 8K are numeric and others are
//   probably addresses. When data flow analyses are implemented,
//   we will be able to analyze many non-immediate operands.
#define IMMEDNUM_LOWER -8191
#define IMMEDNUM_UPPER 8191
bool SMPInstr::IsSecondSrcOperandNumeric(flags_t F) const {
	bool SecondOpImm = (SMPcmd.Operands[1].type == o_imm);
	signed long TempImm;

	if (SecondOpImm) {
		TempImm = (signed long) SMPcmd.Operands[1].value;
	}

#if SMP_DEBUG
	if (SecondOpImm && (0 > TempImm)) {
#if 0
		msg("Negative immediate: %d Hex: %x ASM: %s\n", TempImm,
			SMPcmd.Operands[1].value, disasm);
#endif
	}
	else if ((!SecondOpImm) && (SMPcmd.Operands[1].type == o_imm)) {
		msg("Problem with flags on immediate src operand: %s\n", disasm);
	}
#endif

	return (SecondOpImm && (TempImm > IMMEDNUM_LOWER)
		&& (TempImm < IMMEDNUM_UPPER));
} // end of SMPInstr::IsSecondSrcOperandNumeric()

// DEBUG Print DEF and/or USE for an operand.
void PrintDefUse(ulong feature, int OpNum) {
	// CF_ macros number the operands from 1 to 6, while OpNum
	//  is a 0 to 5 index into the insn_t.Operands[] array.
	switch (OpNum) {
		case 0:
			if (feature & CF_CHG1)
				msg(" DEF");
			if (feature & CF_USE1)
				msg(" USE");
			break;
		case 1:
			if (feature & CF_CHG2)
				msg(" DEF");
			if (feature & CF_USE2)
				msg(" USE");
			break;
		case 2:
			if (feature & CF_CHG3)
				msg(" DEF");
			if (feature & CF_USE3)
				msg(" USE");
			break;
		case 3:
			if (feature & CF_CHG4)
				msg(" DEF");
			if (feature & CF_USE4)
				msg(" USE");
			break;
		case 4:
			if (feature & CF_CHG5)
				msg(" DEF");
			if (feature & CF_USE5)
				msg(" USE");
			break;
		case 5:
			if (feature & CF_CHG6)
				msg(" DEF");
			if (feature & CF_USE6)
				msg(" USE");
			break;
	}
	return;
} // end PrintDefUse()

// DEBUG print SIB info for an operand.
void PrintSIB(op_t Opnd) {
	int BaseReg = sib_base(Opnd);
	short IndexReg = sib_index(Opnd);
	int ScaleFactor = sib_scale(Opnd);
#define NAME_LEN 5
	char BaseName[NAME_LEN] = {'N', 'o', 'n', 'e', '\0'};
	char IndexName[NAME_LEN] = {'N', 'o', 'n', 'e', '\0'};
#if 0
	if (BaseReg != R_bp) // SIB code for NO BASE REG
#endif
clc5q's avatar
clc5q committed
		qstrncpy(BaseName, RegNames[BaseReg], NAME_LEN - 1);
clc5q's avatar
clc5q committed
	if (IndexReg != R_sp) { // SIB code for NO INDEX REG
		qstrncpy(IndexName, RegNames[IndexReg], NAME_LEN -1);
	}
	msg(" Base %s Index %s Scale %d", BaseName, IndexName, ScaleFactor);
} // end PrintSIB()

// DEBUG print operands for Inst.
void SMPInstr::PrintOperands() const {
	op_t Opnd;
	for (int i = 0; i < UA_MAXOP; ++i) {
		Opnd = SMPcmd.Operands[i];
		if (Opnd.type == o_void)
			continue;
		else if (Opnd.type == o_mem) {
			msg(" Operand %d : memory : addr: %x", i, Opnd.addr);
			PrintDefUse(features, i);
			if (Opnd.hasSIB) { // has SIB info -- is this possible for o_mem?
				msg(" Found SIB byte for o_mem operand ");
clc5q's avatar
clc5q committed
				PrintSIB(Opnd);
			}
		}
		else if (Opnd.type == o_phrase) {
			msg(" Operand %d : memory phrase :", i);
			PrintDefUse(features, i);
			if (Opnd.hasSIB) { // has SIB info
				PrintSIB(Opnd);
			}
			else { // no SIB info
				ushort BaseReg = Opnd.phrase;
				msg(" reg %s", RegNames[BaseReg]);
			}
			if (Opnd.addr != 0) {
				msg(" \n WARNING: addr for o_phrase type: %d\n", Opnd.addr);
			}
		}
		else if (Opnd.type == o_displ) {
			ea_t offset = Opnd.addr;
			PrintDefUse(features, i);
			if (Opnd.hasSIB) {
				PrintSIB(Opnd);
				msg(" displ %d", offset);
			}
			else {
				ushort BaseReg = Opnd.reg;
				msg(" Operand %d : memory displ : reg %s displ %d", i,
					RegNames[BaseReg], offset);
			}
		}
		else if (Opnd.type == o_reg) {
			msg(" Operand %d : register", i);
			msg(" regno: %d", Opnd.reg);
			PrintDefUse(features, i);
		}
		else if (Opnd.type == o_imm) {
			msg(" Operand %d : immed", i);
			PrintDefUse(features, i);
		}
		else if (Opnd.type == o_far) {
			msg(" Operand %d : FarPtrImmed", i);
clc5q's avatar
clc5q committed
			PrintDefUse(features, i);
		}
		else if (Opnd.type == o_near) {
			msg(" Operand %d : NearPtrImmed", i);
clc5q's avatar
clc5q committed
			PrintDefUse(features, i);
		}
		else {
			msg(" Operand %d : unknown", i);
			PrintDefUse(features, i);
		}
		if (!(Opnd.showed()))
			msg(" HIDDEN ");
	}
	msg(" \n");
	return;
} // end of SMPInstr::PrintOperands()

// Print out the destination operand list for the instruction, given
//  the OptCategory for the instruction as a hint.
char * SMPInstr::DestString(int OptType) {
	static char DestList[MAXSTR] = { '\0', '\0' };
	int RegDestCount = 0;
	for (size_t DefIndex = 0; DefIndex < this->NumDefs(); ++DefIndex) {
		op_t DefOpnd = this->GetDef(DefIndex);
		if (o_reg == DefOpnd.type) {
			ushort DestReg = DefOpnd.reg;
			if (0 == RegDestCount) {
				qstrncpy(DestList, RegNames[DestReg], 1 + strlen(RegNames[DestReg]));
			}
			else {
				qstrncat(DestList, " ", MAXSTR);
				qstrncat(DestList, RegNames[DestReg], MAXSTR);
			}
			++RegDestCount;
clc5q's avatar
clc5q committed
		}
	}
	if (0 >= RegDestCount) {
		msg("WARNING: No destination registers: %s\n", this->GetDisasm());
	}
	else {
		qstrncat(DestList, " ZZ ", MAXSTR);
clc5q's avatar
clc5q committed
	}
	return DestList;
} // end of SMPInstr::DestString()

// Equality operator for SMPInstr. Key field is address.
int SMPInstr::operator==(const SMPInstr &rhs) const {
	if (this->address != rhs.GetAddr())
		return 0;
	else
		return 1;
}

// Inequality operator for SMPInstr. Key field is address.
int SMPInstr::operator!=(const SMPInstr &rhs) const {
	return (this->address != rhs.GetAddr());
}

// Less than operator for sorting SMPInstr lists. Key field is address.
int SMPInstr::operator<(const SMPInstr &rhs) const {
	return (this->address < rhs.GetAddr());
}

#define MD_FIRST_ENTER_INSTR  NN_enterw
#define MD_LAST_ENTER_INSTR NN_enterq
clc5q's avatar
clc5q committed
// Is this instruction the one that allocates space on the
//  stack for the local variables?
bool SMPInstr::MDIsFrameAllocInstr(void) const {
clc5q's avatar
clc5q committed
	// The frame allocating instruction should look like:
	//   sub esp,48   or   add esp,-64   etc.
	if ((SMPcmd.itype == NN_sub) || (SMPcmd.itype == NN_add)) {
		if (Defs.GetRef(0).is_reg(R_sp)) {
			// We know that an addition or subtraction is being
			//  performed on the stack pointer. This should not be
			//  possible within the prologue except at the stack
			//  frame allocation instruction, so return true. We
			//  could be more robust in this analysis in the future. **!!**
			// CAUTION: If a compiler allocates 64 bytes for locals
			//  and 16 bytes for outgoing arguments in a single
			//  instruction:  sub esp,80
			//  you cannot insist on finding sub esp,LocSize
			// To make this more robust, we are going to insist that
			//  an allocation of stack space is either performed by
			//  adding a negative immediate value, or by subtracting
			//  a positive immediate value. We will throw in, free of
			//  charge, a subtraction of a register, which is how alloca()
			//  usually allocates stack space.
			if (o_imm == Uses.GetRef(0).type) {
				signed long TempImm = (signed long) Uses.GetRef(0).value;
				if (((0 > TempImm) && (SMPcmd.itype == NN_add))
					|| ((0 < TempImm) && (SMPcmd.itype == NN_sub))) {
					return true;
				}
			}
			else if ((o_reg == Uses.GetRef(0).type)
				&& (SMPcmd.itype == NN_sub)) { // alloca() ?
				return true;
			}
clc5q's avatar
clc5q committed
		}
	}
	else if ((SMPcmd.itype >= MD_FIRST_ENTER_INSTR) && (SMPcmd.itype <= MD_LAST_ENTER_INSTR)) {
		return true;
	}
clc5q's avatar
clc5q committed
	return false;
} // end of SMPInstr::MDIsFrameAllocInstr()

// Is this instruction in the epilogue the one that deallocates the local
//  vars region of the stack frame?
bool SMPInstr::MDIsFrameDeallocInstr(bool UseFP, asize_t LocalVarsSize) const {
	// The usual compiler idiom for the prologue on x86 is to
	//  deallocate the local var space with:   mov esp,ebp
	//  It could be  add esp,constant.  We can be tricked by
	//  add esp,constant when the constant is just the stack
	//  adjustment after a call. We will have to insist that
	//  the immediate operand have at least the value of
	//  LocalVarsSize for this second form, and that UseFP be true
	//  for the first form.
	if (UseFP && (this->SMPcmd.itype == NN_mov)
		&& (this->Defs.GetRef(0).is_reg(R_sp))
		&& (this->Uses.GetRef(0).is_reg(R_bp)))
		return true;
	else if ((this->SMPcmd.itype == NN_add)
		&& (this->Defs.GetRef(0).is_reg(R_sp))
		&& (this->Uses.GetRef(1).is_imm((uval_t) LocalVarsSize)))
		return true;
	else if ((this->SMPcmd.itype == NN_add)
		&& (this->Defs.GetRef(0).is_reg(R_sp))
		&& (this->Uses.GetRef(1).type == o_imm)) {
		msg("Used imprecise LocalVarsSize to find dealloc instr.\n");
		return true;
	}
	else if (NN_leave == this->SMPcmd.itype)
		return true;
clc5q's avatar
clc5q committed
	else
		return false;
} // end of SMPInstr::MDIsFrameDeallocInstr()

// MACHINE DEPENDENT: Is instruction a return instruction?
bool SMPInstr::MDIsReturnInstr(void) const {
	return ((SMPcmd.itype == NN_retn) || (SMPcmd.itype == NN_retf));
}

// MACHINE DEPENDENT: Is instruction a POP instruction?
#define FIRST_POP_INST   NN_pop
#define LAST_POP_INST    NN_popfq
bool SMPInstr::MDIsPopInstr(void) const {
	return ((SMPcmd.itype >= FIRST_POP_INST)
			&& (SMPcmd.itype <= LAST_POP_INST));
}

// MACHINE DEPENDENT: Is instruction a PUSH instruction?
#define FIRST_PUSH_INST   NN_push
#define LAST_PUSH_INST    NN_pushfq
bool SMPInstr::MDIsPushInstr(void) const {
	return ((SMPcmd.itype >= FIRST_PUSH_INST)
			&& (SMPcmd.itype <= LAST_PUSH_INST));
}

// MACHINE DEPENDENT: Does instruction use a callee-saved register?
bool SMPInstr::MDUsesCalleeSavedReg(void) const {
	for (size_t index = 0; index < this->Uses.GetSize(); ++index) {
		op_t CurrUse = this->GetUse(index);
		if (CurrUse.is_reg(R_bp) || CurrUse.is_reg(R_si)
			|| CurrUse.is_reg(R_di) || CurrUse.is_reg(R_bx)) {
			return true;
		}
	}
	return false;
} // end of SMPInstr::MDUsesCalleeSavedReg()

clc5q's avatar
clc5q committed
// Analyze the instruction and its operands.
void SMPInstr::Analyze(void) {
	if (this->analyzed)
		return;

	// Fill cmd structure with disassembly of instr
	ua_ana0(this->address);
	// Get the instr disassembly text.
	(void) generate_disasm_line(this->address, this->disasm, sizeof(this->disasm) - 1);
	// Remove interactive color-coding tags.
	tag_remove(this->disasm, this->disasm, 0);
	// Copy cmd to member variable SMPcmd.
	this->SMPcmd = cmd;
	// Get the canonical features into member variables features.
	this->features = cmd.get_canon_feature();

	// Record what type of instruction this is, simplified for the needs
	//  of data flow and type analysis.
	this->type = DFACategory[cmd.itype];
	// Record optimization category.
	this->OptType = OptCategory[cmd.itype];

	// Build the DEF and USE lists for the instruction.
	this->BuildSMPDefUseLists();
	// Fix up machine dependent quirks in the def and use lists.
	this->MDFixupDefUseLists();

	// Determine whether the instruction is a jump target by looking
	//  at its cross references and seeing if it has "TO" code xrefs.
	xrefblk_t xrefs;
	for (bool ok = xrefs.first_to(this->address, XREF_FAR); ok; ok = xrefs.next_to()) {
		if ((xrefs.from != 0) && (xrefs.iscode)) {
			this->JumpTarget = true;
			break;
		}
	}

	this->analyzed = true;
	return;
} // end of SMPInstr::Analyze()

// Fill the Defs and Uses private data members.
void SMPInstr::BuildSMPDefUseLists(void) {
	size_t OpNum;
	
	// Start with the Defs.
	for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
		if (this->features & DefMacros[OpNum]) { // DEF
			this->Defs.SetRef(this->SMPcmd.Operands[OpNum]);
		}
	} // end for (OpNum = 0; ...)

	// Now, do the Uses. Uses have special case operations, because
	//  any memory operand could have register uses in the addressing
	//  expression, and we must create Uses for those registers. For
	//  example:  mov eax,[ebx + esi*2 + 044Ch]
	//  This is a two-operand instruction with one def: eax. But
	//  there are three uses: [ebx + esi*2 + 044Ch], ebx, and esi.
	//  The first use is an op_t of type o_phrase (memory phrase),
	//  which can be copied from cmd.Operands[1]. Likewise, we just
	//  copy cmd.Operands[0] into the defs list. However, we must create
	//  op_t types for register ebx and register esi and append them
	//  to the Uses list. This is handled by the machine dependent
	//  method MDFixupDefUseLists().
	for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
		if (this->features & UseMacros[OpNum]) { // USE
			this->Uses.SetRef(this->SMPcmd.Operands[OpNum]);
		}
	} // end for (OpNum = 0; ...)

	return;
} // end of SMPInstr::BuildSMPDefUseLists()

// If DefReg is not already in the DEF list, add a DEF for it.
void SMPInstr::MDAddRegDef(ushort DefReg) {
	bool AlreadySet = false;
	for (size_t DefIndex = 0; DefIndex < this->NumDefs(); ++DefIndex) {
		if (this->GetDef(DefIndex).is_reg(DefReg)) {
			AlreadySet = true;
			break;
		}
	}
	if (!AlreadySet) {
		op_t TempDef;
		TempDef.type = o_reg;
		TempDef.reg = DefReg;
		this->Defs.SetRef(TempDef);
	}
	return;
} // end of SMPInstr::MDAddRegDef()

// If UseReg is not already in the USE list, add a USE for it.
void SMPInstr::MDAddRegUse(ushort UseReg) {
	bool AlreadyUsed = false;
	for (size_t UseIndex = 0; UseIndex < this->NumUses(); ++UseIndex) {
		if (this->GetUse(UseIndex).is_reg(UseReg)) {
			AlreadyUsed = true;
			break;
		}
	}
	if (!AlreadyUsed) {
		op_t TempUse;
		TempUse.type = o_reg;
		TempUse.reg = UseReg;
		this->Uses.SetRef(TempUse);
	}
	return;
} // end of SMPInstr::MDAddRegUse()

clc5q's avatar
clc5q committed
// Perform machine dependent ad hoc fixes to the def and use lists.
//  For example, some multiply and divide instructions in x86 implicitly
//  use and/or define register EDX. For memory phrase examples, see comment
//  in BuildSMPDefUseLists().
void SMPInstr::MDFixupDefUseLists(void) {
	// First, handle the uses hidden in memory addressing modes. Note that we do not
	//  care whether we are dealing with a memory destination operand or source
	//  operand, because register USEs, not DEFs, happen within the addressing expressions.
	size_t OpNum;
	for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
		op_t Opnd = SMPcmd.Operands[OpNum];
		if ((Opnd.type == o_phrase) || (Opnd.type == o_displ)) {
			if (Opnd.hasSIB) {
				int BaseReg = sib_base(Opnd);
				short IndexReg = sib_index(Opnd);
				if (R_none != BaseReg) {
					op_t BaseOpnd = Opnd; // Init to current operand field values
					BaseOpnd.type = o_reg; // Change type and reg fields
					BaseOpnd.reg = BaseReg;
					BaseOpnd.hasSIB = 0;
					this->Uses.SetRef(BaseOpnd);
				}
				if (R_none != IndexReg) {
					op_t IndexOpnd = Opnd; // Init to current operand field values
					IndexOpnd.type = o_reg; // Change type and reg fields
					IndexOpnd.reg = IndexReg;
					IndexOpnd.hasSIB = 0;
					this->Uses.SetRef(IndexOpnd);
				}
			}
			else { // no SIB byte; can have base reg but no index reg
				ushort BaseReg = Opnd.reg;  // cannot be R_none for no SIB case
				op_t BaseOpnd = Opnd; // Init to current operand field values
				BaseOpnd.type = o_reg; // Change type and reg fields
				BaseOpnd.reg = BaseReg;
				BaseOpnd.hasSIB = 0;
				this->Uses.SetRef(BaseOpnd);
			}
		} // end if (o_phrase or o_displ operand)
	} // end for (all operands)

	// Now, handle special instruction categories that have implicit operands.
	if (NN_cmpxchg == SMPcmd.itype) {
		// x86 Compare and Exchange conditionally sets EAX. We must keep data flow analysis
		//  sound by declaring that EAX is always a DEF.
		this->MDAddRegDef(R_ax);
	} // end if NN_cmpxchg
	else if (8 == this->GetOptType()) {
		// This category implicitly writes to EDX:EAX.
		this->MDAddRegDef(R_dx);
		this->MDAddRegDef(R_ax);
	} // end else if (8 == GetOptType)
	else if (7 == this->GetOptType()) {
		// Category 7 instructions sometimes write implicitly to EDX:EAX or DX:AX.
		//  DX is the same as EDX to IDA Pro (and SMP); ditto for EAX and AX.
		// DIV, IDIV, and MUL all have hidden EAX or AX operands (hidden in the IDA Pro
		//  sense, because they are not displayed in the disassembly text). For example:
		//  mul ebx means EDX:EAX <-- EAX*EBX, and mul bx means DX:AX <-- AX*BX. If the
		//  source operand is only 8 bits wide, there is room to hold the result in AX
		//  without using DX:  mul bl means AX <-- AL*BL.
		// IMUL has forms with a hidden EAX or AX operand and forms with no implicit
		//  operands:  imul ebx means EDX:EAX <-- EAX*EBX, but imul ebx,edx means that
		//  EBX*EDX gets truncated and the result placed in EBX (no hidden operands).
		bool HiddenEAXUse = false;
		for (size_t UseIndex = 0; UseIndex < this->NumUses(); ++UseIndex) {
			op_t TempUse = this->GetUse(UseIndex);
			if (!TempUse.showed()) { // hidden operand
				if (TempUse.is_reg(R_ax)) { // not R_al, so it is not 8 bits
					this->MDAddRegUse(R_dx);
					this->MDAddRegDef(R_ax);
					this->MDAddRegDef(R_dx);
				}
			}
		}
	} // end else if (7 == OptType)
clc5q's avatar
clc5q committed
	return;
} // end of SMPInstr::MDFixupDefUseLists()

// Handle x86 opcode SIB byte annotations.
void SMPInstr::MDAnnotateSIBStackConstants(FILE *AnnotFile, op_t Opnd, ea_t offset, bool UseFP) {
	int BaseReg = sib_base(Opnd);
	short IndexReg = sib_index(Opnd);
	if (BaseReg == R_none) {
		msg("BaseReg of R_none at %x\n", this->address);
	}
	if (BaseReg == R_sp) { // ESP cannot be IndexReg
		// ESP-relative constant offset
		qfprintf(AnnotFile,
				"%x %d PTRIMMEDESP STACK %d displ %s\n",
				this->SMPcmd.ea, this->SMPcmd.size, offset, this->disasm);
	}
	else if (UseFP && ((IndexReg == R_bp) || (BaseReg == R_bp))) {
		// EBP-relative constant offset
		qfprintf(AnnotFile,
				"%x %d PTRIMMEDEBP STACK %d displ %s\n",
				this->SMPcmd.ea, this->SMPcmd.size, offset, this->disasm);
	}

	return;
} // end of MDAnnotateSIBStackConstants
clc5q's avatar
clc5q committed

// Emit annotations for constants used as ptr offsets from EBP or
//  ESP into the stack frame. Only pay attention to EBP-relative
//  offsets if EBP is being used as a frame pointer (UseFP == true).
void SMPInstr::AnnotateStackConstants(bool UseFP, FILE *AnnotFile) {
	op_t Opnd;
clc5q's avatar
clc5q committed
		msg("PROBLEM INSTRUCTION: \n");
		this->PrintOperands();
	}
#endif
	for (int i = 0; i < UA_MAXOP; ++i) {
		Opnd = SMPcmd.Operands[i];
		if (Opnd.type == o_displ) {
			ea_t offset = Opnd.addr;
			if (Opnd.hasSIB) {
				MDAnnotateSIBStackConstants(AnnotFile, Opnd, offset, UseFP);
clc5q's avatar
clc5q committed
			}
			else { // no SIB
				ushort BaseReg = Opnd.reg;
				if (BaseReg == R_sp) {
					// ESP-relative constant offset
					qfprintf(AnnotFile,
							"%x %d PTRIMMEDESP STACK %d displ %s\n",
clc5q's avatar
clc5q committed
							SMPcmd.ea, SMPcmd.size, offset, disasm);
				}
				else if (UseFP && (BaseReg == R_bp)) {
clc5q's avatar
clc5q committed
					// EBP-relative constant offset
					qfprintf(AnnotFile,
							"%x %d PTRIMMEDEBP STACK %d displ %s\n",
clc5q's avatar
clc5q committed
							SMPcmd.ea, SMPcmd.size, offset, disasm);
				}
			} // end if (Opnd.hasSIB) ... else ...
		} // end if (Opnd.type == o_displ) 
		else if (Opnd.type == o_phrase) {
			ea_t offset = 0; // mmStrata thinks [esp] is [esp+0]
			if (Opnd.hasSIB) {
				MDAnnotateSIBStackConstants(AnnotFile, Opnd, offset, UseFP);
clc5q's avatar
clc5q committed
			}
			else { // Something like [ecx]
				ushort BaseReg = Opnd.reg;
				if (BaseReg == R_sp) {
					// ESP-relative constant offset
					qfprintf(AnnotFile,
							"%x %d PTRIMMEDESP STACK %d displ %s\n",
clc5q's avatar
clc5q committed
							SMPcmd.ea, SMPcmd.size, offset, disasm);
				}
				else if (UseFP && (BaseReg == R_bp)) {
clc5q's avatar
clc5q committed
					// EBP-relative constant offset
					qfprintf(AnnotFile,
							"%x %d PTRIMMEDEBP STACK %d displ %s\n",
clc5q's avatar
clc5q committed
							SMPcmd.ea, SMPcmd.size, offset, disasm);
				}
			} // end if (Opnd.hasSIB) ... else ...
		} // end else if (Opnd.type == o_phrase)
	} // end for all operands
	return;
} // end of SMPInstr::AnnotateStackConstants()

// Emit all annotations for the instruction.
void SMPInstr::EmitAnnotations(bool UseFP, bool AllocSeen, FILE *AnnotFile) {
	ea_t addr = this->address;
	flags_t InstrFlags = getFlags(addr);
	bool MemDest = this->HasDestMemoryOperand();
	bool MemSrc = this->HasSourceMemoryOperand();
	bool SecondSrcOperandNum = this->IsSecondSrcOperandNumeric(InstrFlags);

	++OptCount[OptType]; // keep count for debugging info

#if SMP_DEBUG_MEM
	if (MemDest || MemSrc) {
		msg("OptType: %d %s", OptType, disasm);
		this->PrintOperands();
	}
#endif

	// Emit appropriate optimization annotations.
	bool SDTInstrumentation = false;
	switch (OptType) {
		case 0:  // SDT will have to handle these
		{
#if SMP_DEBUG_TYPE0
			msg("OptType 0: %x  %s\n", addr, disasm);
#endif
			// mmStrata wants to suppress warnings on the PUSH
			//  instructions that precede the LocalVarsAllocInstr
			//  (i.e. the PUSHes of callee-saved regs).
			if (!AllocSeen && this->MDIsPushInstr()) {
				qfprintf(AnnotFile, "%x %d INSTR LOCAL NoWarn %s \n",
						addr, -3, disasm);
			}
			else {
				SDTInstrumentation = true;
			}
			break;
		}

		case 1:  // nothing for SDT to do
		{	qfprintf(AnnotFile, "%x %d INSTR LOCAL NoMetaUpdate %s \n",
					addr, -1, disasm);
			++AnnotationCount[OptType];
			break;
		}

		case 4:  // INC, DEC, etc.: no SDT work unless MemDest
		{	if (MemDest || MemSrc) {
				SDTInstrumentation = true;
				break;  // treat as category 0
	 		}
			qfprintf(AnnotFile, "%x %d INSTR LOCAL Always1stSrc %s \n",
					addr, -1, disasm);
			++AnnotationCount[OptType];
			break;
		}

		case 5: // ADD, etc.: If numeric 2nd src operand, no SDT work.
		{	if (MemDest || MemSrc) {
				SDTInstrumentation = true;
				break;  // treat as category 0
			}
			if (SecondSrcOperandNum) { // treat as category 1
				qfprintf(AnnotFile, "%x %d INSTR LOCAL %s %s \n",
						addr, -1, OptExplanation[OptType], disasm);
				++AnnotationCount[OptType];
			}
			break;
		}

		case 6: // Only OS code should include these; problem for SDT
		{	if (MemDest) {
				SDTInstrumentation = true;
				break;  // treat as category 0
			}
			qfprintf(AnnotFile, "%x %d INSTR LOCAL AlwaysPTR %s \n",
					addr, -OptType, disasm);
			++AnnotationCount[OptType];
			break;
		}

		case 8: // Implicitly writes to EDX:EAX, always numeric.
		{	qfprintf(AnnotFile, "%x %d INSTR LOCAL n EDX EAX ZZ %s %s \n",
					addr, -2, OptExplanation[OptType], disasm);
			++AnnotationCount[OptType];
			SDTInstrumentation = true;
			break;
		}

		case 9:  // Either writes to FP reg (cat. 1) or memory (cat. 0)
		{	if (MemDest) {
#if SMP_DEBUG
				// MemDest seems to happen too much.
				msg("Floating point MemDest: %s \n", disasm);
#endif
				SDTInstrumentation = true;
				break; // treat as category 0
			}
			qfprintf(AnnotFile, "%x %d INSTR LOCAL %s %s \n",
					addr, -1, OptExplanation[OptType], disasm);
			++AnnotationCount[OptType];
			break;
		}

		default: // 2,3,7: Optimization possibilities depend on operands
		{ 
#if SMP_DEBUG2
			if (OptType ==  3) {  // MOV instr class
				if (MemDest) {
					msg("MemDest on MOV: %s\n", disasm);
				}
				else if (!SecondSrcOperandNum) {
					msg("MOV: not 2nd op numeric: %s\n", disasm);
						this->PrintOperands();
				}
			}
#endif
			SDTInstrumentation = true;
			if (MemDest) {
#if SMP_DEBUG_XOR
				if (OptType == 2)
					msg("MemDest on OptType 2: %s\n", disasm);
#endif
				break;  // treat as category 0
			}
			if ((OptType == 2) || (OptType == 7) || SecondSrcOperandNum) {
				qfprintf(AnnotFile, "%x %d INSTR LOCAL n %s %s %s \n",
						addr, -2, this->DestString(OptType), 
						OptExplanation[OptType], disasm);
				++AnnotationCount[OptType];
			}
			break;
		}
	} // end switch (OptType)
	
	// If mmStrata is going to have to deal with the
	//  instruction, then we can annotate EBP and ESP
	//  relative constant offsets. If we have emitted
	//  an annotation of type -1, there is no point
	//  in telling mmStrata about these constants.
	if (SDTInstrumentation) {
		this->AnnotateStackConstants(UseFP, AnnotFile);
	}
	return;
} // end of SMPInstr::EmitAnnotations()

clc5q's avatar
clc5q committed
// *****************************************************************
// Class SMPBasicBlock
// *****************************************************************

// Constructor
SMPBasicBlock::SMPBasicBlock(list<SMPInstr>::iterator First, list<SMPInstr>::iterator Last) {
	this->FirstInstr = First;
	this->LastInstr = Last;
	this->IndirectJump = false;
	this->Returns = false;
	this->SharedTailChunk = false;
}

// Analyze basic block and fill data members.
void SMPBasicBlock::Analyze() {
	if (LastInstr->GetDataFlowType() == INDIR_JUMP) {
		this->IndirectJump = true;
	}
	else if (LastInstr->MDIsReturnInstr()) {
		this->Returns = true;
	}
} // end of SMPBasicBlock::Analyze()

// *****************************************************************
// Class SMPFunction
// *****************************************************************

// Constructor
SMPFunction::SMPFunction(func_t *Info) {
	this->FuncInfo = Info;
	IndirectCalls = false;
	return;
}

// Figure out the different regions of the stack frame, and find the
//  instructions that allocate and deallocate the local variables space
//  on the stack frame.
// The stack frame info will be used to emit stack
//  annotations when Analyze() reaches the stack allocation
//  instruction that sets aside space for local vars.
// Set the address of the instruction at which these
//  annotations should be emitted. This should normally
//  be an instruction such as:  sub esp,48
//  However, for a function with no local variables at all,
//  we will need to determine which instruction should be
//  considered to be the final instruction of the function
//  prologue and return its address.
// Likewise, we find the stack deallocating instruction in
//  the function epilogue.
void SMPFunction::SetStackFrameInfo(void) {
	bool FoundAllocInstr = false;
	bool FoundDeallocInstr = false;

	// The sizes of the three regions of the stack frame other than the
	//  return address are stored in the function structure.
	this->LocalVarsSize = this->FuncInfo->frsize;
	this->CalleeSavedRegsSize = this->FuncInfo->frregs;
	this->IncomingArgsSize = this->FuncInfo->argsize;

	// The return address size can be obtained in a machine independent
	//  way by calling get_frame_retsize(). 
	this->RetAddrSize = get_frame_retsize(this->FuncInfo);

	// IDA Pro has trouble with functions that do not have any local
	//  variables. Unfortunately, the C library has plenty of these
	//  functions. IDA usually claims that frregs is zero and frsize
	//  is N, when the values should have been reversed. We can attempt
	//  to detect this and fix it.
	bool FrameInfoFixed = this->MDFixFrameInfo();

#if SMP_DEBUG_FRAMEFIXUP
	if (FrameInfoFixed) {
		msg("Fixed stack frame size info: %s\n", this->FuncName);
		SMPBasicBlock CurrBlock = this->Blocks.front();
		msg("First basic block:\n");
		for (list<SMPInstr>::iterator CurrInstr = CurrBlock.GetFirstInstr();
			CurrInstr != CurrBlock.GetLastInstr();
			++CurrInstr) {
			msg("%s\n", CurrInstr->GetDisasm());
		}
		msg("%s\n", CurrBlock.GetLastInstr()->GetDisasm());
	}
#endif

	// Now, if LocalVarsSize is not zero, we need to find the instruction
	//  in the function prologue that allocates space on the stack for
	//  local vars. This code could be made more robust in the future
	//  by matching LocalVarsSize to the immediate value in the allocation
	//  instruction. However, IDA Pro is sometimes a little off on this
clc5q's avatar
clc5q committed
	if (0 < this->LocalVarsSize) {
		for (list<SMPInstr>::iterator CurrInstr = this->Instrs.begin();
			CurrInstr != this->Instrs.end();
			++CurrInstr) {
			ea_t addr = CurrInstr->GetAddr();

			// Keep the most recent instruction in the DeallocInstr
			//  in case we reach the return without seeing a dealloc.
			if (!FoundDeallocInstr) {
				this->LocalVarsDeallocInstr = addr;
			}

			if (!FoundAllocInstr
				&& CurrInstr->MDIsFrameAllocInstr()) {
clc5q's avatar
clc5q committed
				this->LocalVarsAllocInstr = addr;
				FoundAllocInstr = true;
				// As soon as we have found the local vars allocation,
				//  we can try to fix incorrect sets of UseFP by IDA.
				// NOTE: We might want to extend this in the future to
				//  handle functions that have no locals.  **!!**
				bool FixedUseFP = MDFixUseFP();
#if SMP_DEBUG_FRAMEFIXUP
				if (FixedUseFP) {
					msg("Fixed UseFP in %s\n", this->FuncName);
				}
#endif
clc5q's avatar
clc5q committed
			}
			else if (FoundAllocInstr) {
				// We can now start searching for the DeallocInstr.
				if (CurrInstr->MDIsFrameDeallocInstr(UseFP, this->LocalVarsSize)) {
					// Keep saving the most recent addr that looks
					//  like the DeallocInstr until we reach the
					//  end of the function. Last one to look like
					//  it is used as the DeallocInstr.
					this->LocalVarsDeallocInstr = addr;
					FoundDeallocInstr = true;
				}
			}
		} // end for (list<SMPInstr>::iterator CurrInstr ... )
		if (!FoundAllocInstr) {
			// Could not find the frame allocating instruction.  Bad.
			// Emit diagnostic and use the first instruction in the
			//  function as a pseudo-allocation instruction to emit
			//  some stack frame info (return address, etc.)