Skip to content
Snippets Groups Projects
SMPDataFlowAnalysis.cpp 17.1 KiB
Newer Older
clc5q's avatar
clc5q committed
//
// SMPDataFlowAnalysis.cpp
//
// This module contains common types an helper classes needed for the
clc5q's avatar
clc5q committed
//   SMP project (Software Memory Protection).
//

#include <list>
#include <set>
clc5q's avatar
clc5q committed
#include <vector>
#include <algorithm>

#include <cstring>
clc5q's avatar
clc5q committed

#include <pro.h>
clc5q's avatar
clc5q committed
#include <assert.h>
clc5q's avatar
clc5q committed
#include <ida.hpp>
#include <idp.hpp>
#include <allins.hpp>
#include <auto.hpp>
#include <bytes.hpp>
#include <funcs.hpp>
#include <intel.hpp>
#include <loader.hpp>
#include <lines.hpp>
#include <name.hpp>

#include "SMPDataFlowAnalysis.h"
#include "SMPStaticAnalyzer.h"

// Set to 1 for debugging output
#define SMP_DEBUG 1
#define SMP_DEBUG2 0   // verbose
#define SMP_DEBUG3 0   // verbose
#define SMP_DEBUG_CONTROLFLOW 0  // tells what processing stage is entered
#define SMP_DEBUG_XOR 0
#define SMP_DEBUG_CHUNKS 1  // tracking down tail chunks for functions
#define SMP_DEBUG_FRAMEFIXUP 0
clc5q's avatar
clc5q committed
#define SMP_DEBUG_DATAFLOW 0

// Compute LVA/SSA or not? Turn it off for NICECAP demo on 31-JAN-2008
#define SMP_COMPUTE_LVA_SSA 0

char *RegNames[R_of + 1] =
	{ "EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI",
	  "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15",
	  "AL", "CL", "DL", "BL", "AH", "CH", "DH", "BH",
	  "SPL", "BPL", "SIL", "DIL", "EIP", "ES", "CS", "SS",
	  "DS", "FS", "GS", "CF", "ZF", "SF", "OF" 
	};

clc5q's avatar
clc5q committed
// Define instruction categories for data flow analysis.
SMPitype DFACategory[NN_last+1];
clc5q's avatar
clc5q committed

clc5q's avatar
clc5q committed
// We need to make subword registers equal to their containing registers when we
//  do comparisons, so that we will realize that register EAX is killed by a prior DEF
//  of register AL, for example, and vice versa. To keep sets ordered strictly,
//  we also have to make AL and AH be equal to each other as well as equal to EAX.
clc5q's avatar
clc5q committed
#define FIRST_x86_SUBWORD_REG R_al
#define LAST_x86_SUBWORD_REG R_bh
bool MDLessReg(const ushort Reg1, const ushort Reg2) {
	bool FirstSubword = ((Reg1 >= FIRST_x86_SUBWORD_REG) && (Reg1 <= LAST_x86_SUBWORD_REG));
	bool SecondSubword = ((Reg2 >= FIRST_x86_SUBWORD_REG) && (Reg2 <= LAST_x86_SUBWORD_REG));
	ushort SReg1 = Reg1;
	ushort SReg2 = Reg2;
	if (FirstSubword) {
		// See enumeration RegNo in intel.hpp.
		if (SReg1 < 20)  // AL, CL, DL or BL
			SReg1 -= 16;
		else             // AH, CH, DH or BH
			SReg1 -= 20;
	}
	if (SecondSubword) {
		if (SReg2 < 20)
			SReg2 -= 16;
		else
			SReg2 -= 20;
	return (SReg1 < SReg2);
clc5q's avatar
clc5q committed
} // end of MDLessReg()

// In SSA computations, we are storing the GlobalNames index into the op_t fields
//  n, offb, and offo. This function extracts an unsigned int from these three 8-bit
//  fields.
unsigned int ExtractGlobalIndex(op_t GlobalOp) {
	unsigned int index = 0;
	index |= (((unsigned int) GlobalOp.offo) & 0x000000ff);
	index <<= 8;
	index |= (((unsigned int) GlobalOp.offb) & 0x000000ff);
	index <<= 8;
	index |= (((unsigned int) GlobalOp.n) & 0x000000ff);
clc5q's avatar
clc5q committed
	return index;
}

// DEBUG Print DEF and/or USE for an operand.
void PrintDefUse(ulong feature, int OpNum) {
	// CF_ macros number the operands from 1 to 6, while OpNum
	//  is a 0 to 5 index into the insn_t.Operands[] array.
	// OpNum == -1 is a signal that this is a DEF or USE or VarKillSet etc.
	//  operand and not an instruction operand.
	if (-1 == OpNum)
		return;
	switch (OpNum) {
		case 0:
			if (feature & CF_CHG1)
				msg(" DEF");
			if (feature & CF_USE1)
				msg(" USE");
			break;
		case 1:
			if (feature & CF_CHG2)
				msg(" DEF");
			if (feature & CF_USE2)
				msg(" USE");
			break;
		case 2:
			if (feature & CF_CHG3)
				msg(" DEF");
			if (feature & CF_USE3)
				msg(" USE");
			break;
		case 3:
			if (feature & CF_CHG4)
				msg(" DEF");
			if (feature & CF_USE4)
				msg(" USE");
			break;
		case 4:
			if (feature & CF_CHG5)
				msg(" DEF");
			if (feature & CF_USE5)
				msg(" USE");
			break;
		case 5:
			if (feature & CF_CHG6)
				msg(" DEF");
			if (feature & CF_USE6)
				msg(" USE");
			break;
	}
	return;
} // end PrintDefUse()

// DEBUG print SIB info for an operand.
void PrintSIB(op_t Opnd) {
	int BaseReg = sib_base(Opnd);
	short IndexReg = sib_index(Opnd);
	int ScaleFactor = sib_scale(Opnd);
#define NAME_LEN 5
	char BaseName[NAME_LEN] = {'N', 'o', 'n', 'e', '\0'};
	char IndexName[NAME_LEN] = {'N', 'o', 'n', 'e', '\0'};
#if 0
	if (BaseReg != R_bp) // SIB code for NO BASE REG
#endif
		qstrncpy(BaseName, RegNames[BaseReg], NAME_LEN - 1);

	if (IndexReg != R_sp) { // SIB code for NO INDEX REG
		qstrncpy(IndexName, RegNames[IndexReg], NAME_LEN -1);
	}
	msg(" Base %s Index %s Scale %d", BaseName, IndexName, ScaleFactor);
} // end PrintSIB()

// Debug: print one operand from an instruction or DEF or USE list.
void PrintOneOperand(op_t Opnd, ulong features, int OpNum) { 
	if (Opnd.type == o_void)
		return;
	else if (Opnd.type == o_mem) {
		msg(" Operand %d : memory : addr: %x", OpNum, Opnd.addr);
		PrintDefUse(features, OpNum);
		if (Opnd.hasSIB) { // has SIB info -- is this possible for o_mem?
			msg(" Found SIB byte for o_mem operand ");
			PrintSIB(Opnd);
		}
	}
	else if (Opnd.type == o_phrase) {
		msg(" Operand %d : memory phrase :", OpNum);
		PrintDefUse(features, OpNum);
		if (Opnd.hasSIB) { // has SIB info
			PrintSIB(Opnd);
		}
		else { // no SIB info
			ushort BaseReg = Opnd.phrase;
			msg(" reg %s", RegNames[BaseReg]);
		}
		if (Opnd.addr != 0) {
			msg(" \n WARNING: addr for o_phrase type: %d\n", Opnd.addr);
		}
	}
	else if (Opnd.type == o_displ) {
		msg(" Operand %d : memory displ :", OpNum);
		ea_t offset = Opnd.addr;
		PrintDefUse(features, OpNum);
		if (Opnd.hasSIB) {
			PrintSIB(Opnd);
			msg(" displ %d", offset);
		}
		else {
			ushort BaseReg = Opnd.reg;
			msg(" reg %s displ %d", RegNames[BaseReg], offset);
		}
	}
	else if (Opnd.type == o_reg) {
		msg(" Operand %d : register %s", OpNum, RegNames[Opnd.reg]);
		PrintDefUse(features, OpNum);
	}
	else if (Opnd.type == o_imm) {
		msg(" Operand %d : immed", OpNum);
		PrintDefUse(features, OpNum);
	}
	else if (Opnd.type == o_far) {
		msg(" Operand %d : FarPtrImmed", OpNum);
		msg(" addr: %x", Opnd.addr);
		PrintDefUse(features, OpNum);
	}
	else if (Opnd.type == o_near) {
		msg(" Operand %d : NearPtrImmed", OpNum);
		msg(" addr: %x", Opnd.addr);
		PrintDefUse(features, OpNum);
	}
	else {
		msg(" Operand %d : unknown", OpNum);
		PrintDefUse(features, OpNum);
	}
	if (!(Opnd.showed()))
		msg(" HIDDEN ");
	return;
} // end of PrintOneOperand()

// Print an operand that has no features flags or operand position number, such
//  as the op_t types found in lists and sets throughout the blocks, phi functions, etc.
void PrintListOperand(op_t Opnd, int SSANum) {
	if (Opnd.type == o_void)
		return;
	else if (Opnd.type == o_mem) {
		msg(" Operand : memory : addr: %x", Opnd.addr);
		if (Opnd.hasSIB) { // has SIB info -- is this possible for o_mem?
			msg(" Found SIB byte for o_mem operand ");
			PrintSIB(Opnd);
		}
	}
	else if (Opnd.type == o_phrase) {
		msg(" Operand : memory phrase :");
		if (Opnd.hasSIB) { // has SIB info
			PrintSIB(Opnd);
		}
		else { // no SIB info
			ushort BaseReg = Opnd.phrase;
			msg(" reg %s", RegNames[BaseReg]);
		}
		if (Opnd.addr != 0) {
			msg(" \n WARNING: addr for o_phrase type: %d\n", Opnd.addr);
		}
	}
	else if (Opnd.type == o_displ) {
		msg(" Operand : memory displ :");
		ea_t offset = Opnd.addr;
		if (Opnd.hasSIB) {
			PrintSIB(Opnd);
			msg(" displ %d", offset);
		}
		else {
			ushort BaseReg = Opnd.reg;
			msg(" reg %s displ %d", RegNames[BaseReg], offset);
		}
	}
	else if (Opnd.type == o_reg) {
		msg(" Operand : register: %s", RegNames[Opnd.reg]);
	}
	else if (Opnd.type == o_imm) {
		msg(" Operand : immed %d", Opnd.value);
	}
	else if (Opnd.type == o_far) {
		msg(" Operand : FarPtrImmed addr: %x", Opnd.addr);
	}
	else if (Opnd.type == o_near) {
		msg(" Operand : NearPtrImmed addr: %x", Opnd.addr);
	}
	else {
		msg(" Operand : unknown");
	}
	
	msg(" SSANum: %d ", SSANum);

	if (!(Opnd.showed()))
		msg(" HIDDEN ");
	return;
} // end of PrintListOperand()

// MACHINE DEPENDENT: Is operand type a known type that we want to analyze?
bool MDKnownOperandType(op_t TempOp) {
	return ((TempOp.type >= o_reg) && (TempOp.type <= o_near));
}

clc5q's avatar
clc5q committed
// *****************************************************************
// Class DefOrUse
// *****************************************************************

// Default constructor to make the compilers happy.
DefOrUse::DefOrUse(void) {
	this->OpType = UNINIT;
	this->SSANumber = -2;
	return;
}

clc5q's avatar
clc5q committed
// Constructor.
DefOrUse::DefOrUse(op_t Ref, SMPOperandType Type, int SSASub) {
	this->Operand = Ref;
	this->OpType = Type;
	this->SSANumber = SSASub;
	return;
}

// Copy constructor.
DefOrUse::DefOrUse(const DefOrUse &CopyIn) {
	*this = CopyIn;
	return;
}

// Assignment operator for copy constructor use.
DefOrUse &DefOrUse::operator=(const DefOrUse &rhs) {
	this->Operand = rhs.Operand;
	this->OpType = rhs.OpType;
	this->SSANumber = rhs.SSANumber;
	return *this;
}

// Debug printing.
void DefOrUse::Dump(void) const {
	PrintListOperand(this->Operand, this->SSANumber);
	if (this->OpType == NUMERIC)
		msg("N ");
	else if (this->OpType == POINTER)
		msg("P ");
	else if (this->OpType == UNKNOWN)
		msg("U ");
	// Don't write anything for UNINIT OpType
	return;
} // end of DefOrUse::Dump()

clc5q's avatar
clc5q committed
// *****************************************************************
// Class DefOrUseList
// *****************************************************************

// Default constructor.
DefOrUseList::DefOrUseList(void) {
	return;
}

// Set a Def or Use into the list, along with its type.
clc5q's avatar
clc5q committed
void DefOrUseList::SetRef(op_t Ref, SMPOperandType Type, int SSASub) {
	DefOrUse CurrRef(Ref, Type, SSASub);
	this->Refs.push_back(CurrRef);
clc5q's avatar
clc5q committed
	return;
}

// Get a reference by index.
clc5q's avatar
clc5q committed
DefOrUse DefOrUseList::GetRef(size_t index) const {
clc5q's avatar
clc5q committed
	return Refs[index];
}

// Change the SSA subscript for a reference.
void DefOrUseList::SetSSANum(size_t index, int NewSSASub) {
	this->Refs[index].SetSSANum(NewSSASub);
	return;
}

// Change the operand type for a reference.
void DefOrUseList::SetType(size_t index, SMPOperandType Type) {
	this->Refs[index].SetType(Type);
	return;
}

// Debug printing.
void DefOrUseList::Dump(void) const {
	for (size_t index = 0; index < this->Refs.size(); ++index) {
		Refs[index].Dump();
	}
	msg("\n");
	return;
}

// Erase duplicate entries, in case SMPInstr::MDFixupDefUseLists() adds one.
void DefOrUseList::EraseDuplicates(void) {
	set<op_t, LessOp> TempRefs; // Use STL set to find duplicates
	set<op_t, LessOp>::iterator TempIter;
	vector<DefOrUse>::iterator RefIter;

	RefIter = this->Refs.begin();
	while (RefIter != this->Refs.end()) {
		TempIter = TempRefs.find(RefIter->GetOp());
		if (TempIter == TempRefs.end()) { // not already in set
			TempRefs.insert(RefIter->GetOp());
			++RefIter;
		}
		else { // found it in set already
			RefIter = this->Refs.erase(RefIter);
		}
	}
	return;
} // end of DefOrUseList::EraseDuplicates()


clc5q's avatar
clc5q committed
// *****************************************************************
// Class SMPPhiFunction
// *****************************************************************

// Constructor
SMPPhiFunction::SMPPhiFunction(int GlobIndex, const DefOrUse &Def) {
clc5q's avatar
clc5q committed
	this->index = GlobIndex;
clc5q's avatar
clc5q committed
	return;
clc5q's avatar
clc5q committed
}

clc5q's avatar
clc5q committed
// Add a phi item to the list
void SMPPhiFunction::PushBack(DefOrUse Ref) {
	this->SubscriptedOps.SetRef(Ref.GetOp(), Ref.GetType(), Ref.GetSSANum());
	return;
}

// Set the SSA number of the defined variable.
void SMPPhiFunction::SetSSADef(int NewSSASub) {
	this->DefName.SetSSANum(NewSSASub);
	return;
}

// Set the SSA number of the input variable.
void SMPPhiFunction::SetSSARef(size_t index, int NewSSASub) {
	this->SubscriptedOps.SetSSANum(index, NewSSASub);
	return;
}

// Debug printing.
void SMPPhiFunction::Dump(void) const {
	msg(" DEF: ");
	this->DefName.Dump();
	msg(" USEs: ");
	this->SubscriptedOps.Dump();
	return;
}

clc5q's avatar
clc5q committed
// Initialize the DFACategory[] array to define instruction classes
//   for the purposes of data flow analysis.
void InitDFACategory(void) {
	// Default category is 0, not the start or end of a basic block.
	(void) memset(DFACategory, 0, sizeof(DFACategory));

DFACategory[NN_call] = CALL;                // Call Procedure
DFACategory[NN_callfi] = INDIR_CALL;              // Indirect Call Far Procedure
DFACategory[NN_callni] = INDIR_CALL;              // Indirect Call Near Procedure

DFACategory[NN_hlt] = HALT;                 // Halt

DFACategory[NN_int] = CALL;                 // Call to Interrupt Procedure
DFACategory[NN_into] = CALL;                // Call to Interrupt Procedure if Overflow Flag = 1
DFACategory[NN_int3] = CALL;                // Trap to Debugger
DFACategory[NN_iretw] = RETURN;               // Interrupt Return
DFACategory[NN_iret] = RETURN;                // Interrupt Return
DFACategory[NN_iretd] = RETURN;               // Interrupt Return (use32)
DFACategory[NN_iretq] = RETURN;               // Interrupt Return (use64)
DFACategory[NN_ja] = COND_BRANCH;                  // Jump if Above (CF=0 & ZF=0)
DFACategory[NN_jae] = COND_BRANCH;                 // Jump if Above or Equal (CF=0)
DFACategory[NN_jb] = COND_BRANCH;                  // Jump if Below (CF=1)
DFACategory[NN_jbe] = COND_BRANCH;                 // Jump if Below or Equal (CF=1 | ZF=1)
DFACategory[NN_jc] = COND_BRANCH;                  // Jump if Carry (CF=1)
DFACategory[NN_jcxz] = COND_BRANCH;                // Jump if CX is 0
DFACategory[NN_jecxz] = COND_BRANCH;               // Jump if ECX is 0
DFACategory[NN_jrcxz] = COND_BRANCH;               // Jump if RCX is 0
DFACategory[NN_je] = COND_BRANCH;                  // Jump if Equal (ZF=1)
DFACategory[NN_jg] = COND_BRANCH;                  // Jump if Greater (ZF=0 & SF=OF)
DFACategory[NN_jge] = COND_BRANCH;                 // Jump if Greater or Equal (SF=OF)
DFACategory[NN_jl] = COND_BRANCH;                  // Jump if Less (SF!=OF)
DFACategory[NN_jle] = COND_BRANCH;                 // Jump if Less or Equal (ZF=1 | SF!=OF)
DFACategory[NN_jna] = COND_BRANCH;                 // Jump if Not Above (CF=1 | ZF=1)
DFACategory[NN_jnae] = COND_BRANCH;                // Jump if Not Above or Equal (CF=1)
DFACategory[NN_jnb] = COND_BRANCH;                 // Jump if Not Below (CF=0)
DFACategory[NN_jnbe] = COND_BRANCH;                // Jump if Not Below or Equal (CF=0 & ZF=0)
DFACategory[NN_jnc] = COND_BRANCH;                 // Jump if Not Carry (CF=0)
DFACategory[NN_jne] = COND_BRANCH;                 // Jump if Not Equal (ZF=0)
DFACategory[NN_jng] = COND_BRANCH;                 // Jump if Not Greater (ZF=1 | SF!=OF)
DFACategory[NN_jnge] = COND_BRANCH;                // Jump if Not Greater or Equal (ZF=1)
DFACategory[NN_jnl] = COND_BRANCH;                 // Jump if Not Less (SF=OF)
DFACategory[NN_jnle] = COND_BRANCH;                // Jump if Not Less or Equal (ZF=0 & SF=OF)
DFACategory[NN_jno] = COND_BRANCH;                 // Jump if Not Overflow (OF=0)
DFACategory[NN_jnp] = COND_BRANCH;                 // Jump if Not Parity (PF=0)
DFACategory[NN_jns] = COND_BRANCH;                 // Jump if Not Sign (SF=0)
DFACategory[NN_jnz] = COND_BRANCH;                 // Jump if Not Zero (ZF=0)
DFACategory[NN_jo] = COND_BRANCH;                  // Jump if Overflow (OF=1)
DFACategory[NN_jp] = COND_BRANCH;                  // Jump if Parity (PF=1)
DFACategory[NN_jpe] = COND_BRANCH;                 // Jump if Parity Even (PF=1)
DFACategory[NN_jpo] = COND_BRANCH;                 // Jump if Parity Odd  (PF=0)
DFACategory[NN_js] = COND_BRANCH;                  // Jump if Sign (SF=1)
DFACategory[NN_jz] = COND_BRANCH;                  // Jump if Zero (ZF=1)
DFACategory[NN_jmp] = JUMP;                 // Jump
DFACategory[NN_jmpfi] = INDIR_JUMP;               // Indirect Far Jump
DFACategory[NN_jmpni] = INDIR_JUMP;               // Indirect Near Jump
DFACategory[NN_jmpshort] = JUMP;            // Jump Short (only in 64-bit mode)

DFACategory[NN_loopw] = COND_BRANCH;               // Loop while ECX != 0
DFACategory[NN_loop] = COND_BRANCH;                // Loop while CX != 0
DFACategory[NN_loopd] = COND_BRANCH;               // Loop while ECX != 0
DFACategory[NN_loopq] = COND_BRANCH;               // Loop while RCX != 0
DFACategory[NN_loopwe] = COND_BRANCH;              // Loop while CX != 0 and ZF=1
DFACategory[NN_loope] = COND_BRANCH;               // Loop while rCX != 0 and ZF=1
DFACategory[NN_loopde] = COND_BRANCH;              // Loop while ECX != 0 and ZF=1
DFACategory[NN_loopqe] = COND_BRANCH;              // Loop while RCX != 0 and ZF=1
DFACategory[NN_loopwne] = COND_BRANCH;             // Loop while CX != 0 and ZF=0
DFACategory[NN_loopne] = COND_BRANCH;              // Loop while rCX != 0 and ZF=0
DFACategory[NN_loopdne] = COND_BRANCH;             // Loop while ECX != 0 and ZF=0
DFACategory[NN_loopqne] = COND_BRANCH;             // Loop while RCX != 0 and ZF=0

DFACategory[NN_retn] = RETURN;                // Return Near from Procedure
DFACategory[NN_retf] = RETURN;                // Return Far from Procedure

//
//      Pentium instructions
//

DFACategory[NN_rsm] = HALT;                 // Resume from System Management Mode

//      Pentium II instructions

DFACategory[NN_sysenter] = CALL;            // Fast Transition to System Call Entry Point
DFACategory[NN_sysexit] = CALL;             // Fast Transition from System Call Entry Point

// AMD syscall/sysret instructions  NOTE: not AMD, found in Intel manual

DFACategory[NN_syscall] = CALL;             // Low latency system call
DFACategory[NN_sysret] = CALL;              // Return from system call

// VMX instructions

DFACategory[NN_vmcall] = INDIR_CALL;              // Call to VM Monitor

  return;

} // end InitDFACategory()