Skip to content
Snippets Groups Projects
SMPFunction.cpp 217 KiB
Newer Older
jdh8d's avatar
jdh8d committed
/*
 * SMPFunction.cpp - <see below>.
 *
 * Copyright (c) 2000, 2001, 2010 - University of Virginia 
 *
 * This file is part of the Memory Error Detection System (MEDS) infrastructure.
 * This file may be used and modified for non-commercial purposes as long as 
 * all copyright, permission, and nonwarranty notices are preserved.  
 * Redistribution is prohibited without prior written consent from the University 
 * of Virginia.
 *
 * Please contact the authors for restrictions applying to commercial use.
 *
 * THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
 * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 *
 * Author: University of Virginia
 * e-mail: jwd@virginia.com
 * URL   : http://www.cs.virginia.edu/
 *
 * Additional copyrights 2010, 2011 by Zephyr Software LLC
 * e-mail: {clc,jwd}@zephyr-software.com
 * URL   : http://www.zephyr-software.com/
 *
jdh8d's avatar
jdh8d committed
 */

//
// SMPFunction.cpp
//
// This module performs the fundamental data flow analyses needed for the
//   SMP project (Software Memory Protection) at the function level.
//

#include <list>
#include <set>
#include <vector>
#include <algorithm>

#include <cstring>

#include <pro.h>
#include <assert.h>
#include <ida.hpp>
#include <idp.hpp>
#include <auto.hpp>
#include <bytes.hpp>
#include <funcs.hpp>
#include <allins.hpp>
#include <intel.hpp>
#include <name.hpp>
#include "SMPDataFlowAnalysis.h"
#include "SMPStaticAnalyzer.h"
#include "SMPFunction.h"
#include "SMPBasicBlock.h"
#include "SMPInstr.h"

// Set to 1 for debugging output
#define SMP_DEBUG 1
#define SMP_DEBUG2 0   // verbose
#define SMP_DEBUG3 0   // verbose
#define SMP_DEBUG_CONTROLFLOW 0  // tells what processing stage is entered
#define SMP_DEBUG_XOR 0
#define SMP_DEBUG_CHUNKS 1  // tracking down tail chunks for functions
#define SMP_DEBUG_FRAMEFIXUP 0
#define SMP_DEBUG_DATAFLOW 0
#define SMP_DEBUG_DATAFLOW_VERBOSE 0
#define SMP_DEBUG_TYPE_INFERENCE 0
clc5q's avatar
clc5q committed
#define SMP_DEBUG_PROFILED_TYPE_INFERENCE 0
#define SMP_DEBUG_STACK_GRANULARITY 0
clc5q's avatar
clc5q committed
#define SMP_DEBUG_FUNC 0
#define SMP_DEBUG_FUNC_SAFETY 1
clc5q's avatar
clc5q committed
#define SMP_VERBOSE_DEBUG_FUNC 0
#define SMP_DEBUG_BUILD_RTL 1   // leave this on; serious errors reported
#define SMP_DEBUG_UNINITIALIZED_SSA_NAMES 1
#define SMP_WARN_UNUSED_DEFS 0
clc5q's avatar
clc5q committed
#define SMP_DEBUG_SWITCH_TABLE_INFO 0
#define SMP_OPTIMIZE_BLOCK_PROFILING 0

// Compute LVA/SSA or not? Turn it off for NICECAP demo on 31-JAN-2008
// Compute fine-grained stack boundaries?
#define SMP_COMPUTE_STACK_GRANULARITY 1
// Use conditional type propagation on phi functions
#define SMP_CONDITIONAL_TYPE_PROPAGATION 0

// Kludges to fix IDA Pro 5.2 errors in cc1.ncexe
#define SMP_IDAPRO52_WORKAROUND 0

// Basic block number 0 is the top of the CFG lattice.
#define SMP_TOP_BLOCK 0 

// Set SharedTailChunks to TRUE for entire printf family
//  After we restructure the parent/tail structure of the database, this
//  will go away.
#define KLUDGE_VFPRINTF_FAMILY 1

// Used for binary search by function number in SMPStaticAnalyzer.cpp
//  to trigger debugging output and find which instruction in which
//  function is causing a crash.
bool SMPBinaryDebug = false;

using namespace std;


// helper function to determine if an object is in a vector
template <class T>	
bool vector_exists(const T &item, const vector<T> &vec) {
	for (size_t i = 0; i < vec.size(); ++i) {
		if (vec[i] == item)
			return true;
	}
	return false;
}
// Comparison function for sorting.
bool LocalVarCompare(const LocalVar &LV1, const LocalVar &LV2) {
	return (LV1.offset < LV2.offset);
}

// *****************************************************************
// Class SMPFunction
// *****************************************************************

// Constructor
SMPFunction::SMPFunction(func_t *Info, SMPProgram* pgm) {
	this->FuncInfo = *Info;
#if 0
#endif
	this->BlockCount = 0;
	this->UseFP = false;
	this->StaticFunc = false;
	this->LibFunc = false;
	this->IndirectCalls = false;
	this->UnresolvedIndirectCalls = false;
	this->IndirectJumps = false;
	this->UnresolvedIndirectJumps = false;
	this->SharedChunks = false;
	this->CallsAlloca = false;
	this->BuiltRTLs = false;
	this->SpecSafeFunc = false;
	this->SafeCallee = false;
	this->SpecSafeCallee = false;
	this->SafeFunc = true;
	this->SpecSafeFunc = true;
	this->SafeCallee = true;
	this->SpecSafeCallee = true;
#endif
	this->NeedsStackReferent = true;
clc5q's avatar
clc5q committed
	this->SpecNeedsStackReferent = true;
	this->HasIndirectWrites = false;
	this->OutgoingArgsComputed = false;
	this->GoodLocalVarTable = false;
	this->OutgoingArgsSize = 0;
	this->TypedDefs = 0;
	this->UntypedDefs = 0;
	this->TypedPhiDefs = 0;
	this->UntypedPhiDefs = 0;
	this->SafeBlocks = 0;
	this->UnsafeBlocks = 0;
	this->Size = 0;
	this->LocalVarsSize = 0;
	this->CalleeSavedRegsSize = 0;
	this->RetAddrSize = 0;
	this->IncomingArgsSize = 0;
	this->OutgoingArgsSize = 0;
	this->LocalVarsAllocInstr = BADADDR;
	this->LocalVarsDeallocInstr = BADADDR;
	this->AllocPointDelta = 0;
	this->MinStackDelta = 0;
	this->ReturnAddrStatus = FUNC_UNKNOWN;
	this->SetIsSpeculative(false);

	this->Instrs.clear();
	this->Blocks.clear();
	this->DirectCallTargets.clear();
	this->IndirectCallTargets.clear();
	this->AllCallTargets.clear();
	this->InstBlockMap.clear();
	this->RPOBlocks.clear();
	this->IDom.clear();
	this->DomTree.clear();
	this->GlobalNames.clear();
	this->BlocksDefinedIn.clear();
	this->SSACounter.clear();
	this->SSAStack.clear();
	this->LocalVarTable.clear();
	this->StackFrameMap.clear();
	this->FineGrainedStackTable.clear();
	this->SavedRegLoc.clear();
	this->ReturnRegTypes.clear();
	this->LiveInSet.clear();
	this->LiveOutSet.clear();
	this->KillSet.clear();
	this->GlobalDefAddrBySSA.clear();

	for (int RegIndex = R_ax; RegIndex <= R_di; ++RegIndex) {
		this->SavedRegLoc.push_back(0); // zero offset means reg not saved
		this->ReturnRegTypes.push_back(UNINIT);
	}

} // end of SMPFunction() constructor
SMPFunction::~SMPFunction() {
	list<SMPInstr *>::iterator InstIter;
	for (InstIter = this->Instrs.begin(); InstIter != this->Instrs.end(); ++InstIter) {
		SMPInstr *CurrInst = (*InstIter);
		delete CurrInst;
	}

	list<SMPBasicBlock *>::iterator BlockIter;
	for (BlockIter = this->Blocks.begin(); BlockIter != this->Blocks.end(); ++BlockIter) {
		SMPBasicBlock *CurrBlock = (*BlockIter);
		delete CurrBlock;
	}
// Get a non-stale pointer to the func_t info for the current function.
func_t *SMPFunction::GetFuncInfo(void) {
	func_t *myPtr = SMP_get_func(this->FirstEA);
// Reset the Processed flags in all blocks to false.
void SMPFunction::ResetProcessedBlocks(void) {
	list<SMPBasicBlock *>::iterator CurrBlock;
	for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) {
		(*CurrBlock)->SetProcessed(false);
	}
	return;
} // end of SMPFunction::ResetProcessedBlocks()

// Return an iterator for the beginning of the LiveInSet. 
set<op_t, LessOp>::iterator SMPFunction::GetFirstLiveIn(void) {
	return this->LiveInSet.begin();
} // end of SMPBasicBlock::GetFirstLiveIn()

// Get termination iterator marker for the LiveIn set, for use by predecessors.
set<op_t, LessOp>::iterator SMPFunction::GetLastLiveIn(void) {
	return this->LiveInSet.end();
}

// Get iterator for the start of the LiveOut set.
set<op_t, LessOp>::iterator SMPFunction::GetFirstLiveOut(void) {
	return this->LiveOutSet.begin();
}

// Get termination iterator marker for the LiveOut set.
set<op_t, LessOp>::iterator SMPFunction::GetLastLiveOut(void) {
	return this->LiveOutSet.end();
}

// Get iterator for the start of the VarKill set.
set<op_t, LessOp>::iterator SMPFunction::GetFirstVarKill(void) {
	return this->KillSet.begin();
}

// Get termination iterator marker for the VarKill set.
set<op_t, LessOp>::iterator SMPFunction::GetLastVarKill(void) {
	return this->KillSet.end();
}

// Four methods to get values from the maps of global reg/SSA to FG info.
//  For local names, see corresponding methods in SMPBasicBlock.
unsigned short SMPFunction::GetDefSignMiscInfo(int DefHashValue) {
	map<int, struct FineGrainedInfo>::iterator MapIter;
	pair<map<int, struct FineGrainedInfo>::iterator, bool> MapResult;

	MapIter = this->GlobalDefFGInfoBySSA.find(DefHashValue);
	if (MapIter != this->GlobalDefFGInfoBySSA.end())
		return MapIter->second.SignMiscInfo;
	else
		return 0;
} // end of SMPFunction::GetDefSignMiscInfo()

unsigned short SMPFunction::GetUseSignMiscInfo(int UseHashValue) {
	map<int, struct FineGrainedInfo>::iterator MapIter;
	pair<map<int, struct FineGrainedInfo>::iterator, bool> MapResult;

	MapIter = this->GlobalUseFGInfoBySSA.find(UseHashValue);
	if (MapIter != this->GlobalUseFGInfoBySSA.end())
		return MapIter->second.SignMiscInfo;
	else
		return 0;
} // end of SMPFunction::GetUseSignMiscInfo()

unsigned short SMPFunction::GetDefWidthTypeInfo(int DefHashValue) {
	map<int, struct FineGrainedInfo>::iterator MapIter;
	pair<map<int, struct FineGrainedInfo>::iterator, bool> MapResult;

	MapIter = this->GlobalDefFGInfoBySSA.find(DefHashValue);
	if (MapIter != this->GlobalDefFGInfoBySSA.end())
		return MapIter->second.SizeInfo;
	else
		return 0;
} // end of SMPFunction::GetDefWidthTypeInfo()

unsigned short SMPFunction::GetUseWidthTypeInfo(int UseHashValue) {
	map<int, struct FineGrainedInfo>::iterator MapIter;
	pair<map<int, struct FineGrainedInfo>::iterator, bool> MapResult;

	MapIter = this->GlobalUseFGInfoBySSA.find(UseHashValue);
	if (MapIter != this->GlobalUseFGInfoBySSA.end())
		return MapIter->second.SizeInfo;
	else
		return 0;
} // end of SMPFunction::GetUseWidthTypeInfo()

struct FineGrainedInfo SMPFunction::GetDefFGInfo(int DefHashValue) {
	map<int, struct FineGrainedInfo>::iterator MapIter;
	pair<map<int, struct FineGrainedInfo>::iterator, bool> MapResult;

	MapIter = this->GlobalDefFGInfoBySSA.find(DefHashValue);
	if (MapIter != this->GlobalDefFGInfoBySSA.end())
		return MapIter->second;
	else {
		struct FineGrainedInfo EmptyFG;
		EmptyFG.SignMiscInfo = 0;
		EmptyFG.SizeInfo = 0;
		return EmptyFG;
	}
} // end of SMPFunction::GetDefFGInfo()

struct FineGrainedInfo SMPFunction::GetUseFGInfo(int UseHashValue) {
	map<int, struct FineGrainedInfo>::iterator MapIter;
	pair<map<int, struct FineGrainedInfo>::iterator, bool> MapResult;

	MapIter = this->GlobalUseFGInfoBySSA.find(UseHashValue);
	if (MapIter != this->GlobalUseFGInfoBySSA.end())
		return MapIter->second;
	else {
		struct FineGrainedInfo EmptyFG;
		EmptyFG.SignMiscInfo = 0;
		EmptyFG.SizeInfo = 0;
		return EmptyFG;
	}
} // end of SMPFunction::GetUseFGInfo()

// Add a caller to the list of all callers of this function.
void SMPFunction::AddCallSource(ea_t addr) {
	// Convert call instruction address to beginning address of the caller.
		SMP_msg("SERIOUS WARNING: Call location %x not in a function.\n", addr);
		return;
	}
	ea_t FirstAddr = FuncInfo->startEA;
	assert(BADADDR != FirstAddr);
	this->AllCallSources.insert(FirstAddr);
	return;
} // end of SMPFunction::AddCallSource()

// Six methods to set values into the maps of global reg/SSA to FG info.
//  For local names, see corresponding methods in SMPBasicBlock.
void SMPFunction::UpdateDefSignMiscInfo(int DefHashValue, unsigned short NewInfo) {
	map<int, struct FineGrainedInfo>::iterator MapIter;
	pair<map<int, struct FineGrainedInfo>::iterator, bool> MapResult;

	MapIter = this->GlobalDefFGInfoBySSA.find(DefHashValue);
	if (MapIter == this->GlobalDefFGInfoBySSA.end()) {
		// Not found; insert first.
		struct FineGrainedInfo NewFGInfo;
		NewFGInfo.SignMiscInfo = NewInfo;
		NewFGInfo.SizeInfo = 0;
		pair<int, struct FineGrainedInfo> MapItem(DefHashValue, NewFGInfo);
		MapResult = this->GlobalDefFGInfoBySSA.insert(MapItem);
		assert(MapResult.second); // Was not previously found, insertion must work.
	}
	else { // found; just OR in the new bits.
		MapIter->second.SignMiscInfo |= NewInfo;
	}

	return;
} // end of SMPFunction::UpdateDefSignMiscInfo()

void SMPFunction::UpdateUseSignMiscInfo(int UseHashValue, unsigned short NewInfo) {
	map<int, struct FineGrainedInfo>::iterator MapIter;
	pair<map<int, struct FineGrainedInfo>::iterator, bool> MapResult;

	MapIter = this->GlobalUseFGInfoBySSA.find(UseHashValue);
	if (MapIter == this->GlobalUseFGInfoBySSA.end()) {
		// Not found; insert first.
		struct FineGrainedInfo NewFGInfo;
		NewFGInfo.SignMiscInfo = NewInfo;
		NewFGInfo.SizeInfo = 0;
		pair<int, struct FineGrainedInfo> MapItem(UseHashValue, NewFGInfo);
		MapResult = this->GlobalUseFGInfoBySSA.insert(MapItem);
		assert(MapResult.second); // Was not previously found, insertion must work.
	}
	else { // found; just OR in the new bits.
		MapIter->second.SignMiscInfo |= NewInfo;
	}

	return;
} // end of SMPFunction::UpdateUseSignMiscInfo()

void SMPFunction::UpdateDefWidthTypeInfo(int DefHashValue, unsigned short NewInfo) {
	map<int, struct FineGrainedInfo>::iterator MapIter;
	pair<map<int, struct FineGrainedInfo>::iterator, bool> MapResult;

	MapIter = this->GlobalDefFGInfoBySSA.find(DefHashValue);
	if (MapIter == this->GlobalDefFGInfoBySSA.end()) {
		// Not found; insert first.
		struct FineGrainedInfo NewFGInfo;
		NewFGInfo.SignMiscInfo = 0;
		NewFGInfo.SizeInfo = NewInfo;
		pair<int, struct FineGrainedInfo> MapItem(DefHashValue, NewFGInfo);
		MapResult = this->GlobalDefFGInfoBySSA.insert(MapItem);
		assert(MapResult.second); // Was not previously found, insertion must work.
	}
	else { // found; just OR in the new bits.
		MapIter->second.SizeInfo |= NewInfo;
	}

	return;
} // end of SMPFunction::UpdateDefWidthTypeInfo()

void SMPFunction::UpdateUseWidthTypeInfo(int UseHashValue, unsigned short NewInfo) {
	map<int, struct FineGrainedInfo>::iterator MapIter;
	pair<map<int, struct FineGrainedInfo>::iterator, bool> MapResult;

	MapIter = this->GlobalUseFGInfoBySSA.find(UseHashValue);
	if (MapIter == this->GlobalUseFGInfoBySSA.end()) {
		// Not found; insert first.
		struct FineGrainedInfo NewFGInfo;
		NewFGInfo.SignMiscInfo = 0;
		NewFGInfo.SizeInfo = NewInfo;
		pair<int, struct FineGrainedInfo> MapItem(UseHashValue, NewFGInfo);
		MapResult = this->GlobalUseFGInfoBySSA.insert(MapItem);
		assert(MapResult.second); // Was not previously found, insertion must work.
	}
	else { // found; just OR in the new bits.
		MapIter->second.SizeInfo |= NewInfo;
	}

	return;
} // end of SMPFunction::UpdateUseWidthTypeInfo()

void SMPFunction::UpdateDefFGInfo(int DefHashValue, struct FineGrainedInfo NewFG) {
	map<int, struct FineGrainedInfo>::iterator MapIter;
	pair<map<int, struct FineGrainedInfo>::iterator, bool> MapResult;

	MapIter = this->GlobalDefFGInfoBySSA.find(DefHashValue);
	if (MapIter == this->GlobalDefFGInfoBySSA.end()) {
		// Not found; insert it.
		pair<int, struct FineGrainedInfo> MapItem(DefHashValue, NewFG);
		MapResult = this->GlobalDefFGInfoBySSA.insert(MapItem);
		assert(MapResult.second); // Was not previously found, insertion must work.
	}
	else { // found; just put in the new bits.
		MapIter->second.SignMiscInfo |= NewFG.SignMiscInfo;
		MapIter->second.SizeInfo |= NewFG.SizeInfo;
	}

	return;
} // end of SMPFunction::UpdateDefFGInfo()

void SMPFunction::UpdateUseFGInfo(int UseHashValue, struct FineGrainedInfo NewFG) {
	map<int, struct FineGrainedInfo>::iterator MapIter;
	pair<map<int, struct FineGrainedInfo>::iterator, bool> MapResult;

	MapIter = this->GlobalUseFGInfoBySSA.find(UseHashValue);
	if (MapIter == this->GlobalUseFGInfoBySSA.end()) {
		// Not found; insert it.
		pair<int, struct FineGrainedInfo> MapItem(UseHashValue, NewFG);
		MapResult = this->GlobalUseFGInfoBySSA.insert(MapItem);
		assert(MapResult.second); // Was not previously found, insertion must work.
	}
	else { // found; just put in the new bits.
		MapIter->second.SignMiscInfo |= NewFG.SignMiscInfo;
		MapIter->second.SizeInfo |= NewFG.SizeInfo;
	}

	return;
} // end of SMPFunction::UpdateUseFGInfo()

// Erase a range of instructions from the Instrs list, usually corresponding
//  the the range of a basic block.
void SMPFunction::EraseInstRange(ea_t FirstAddr, ea_t LastAddr) {
	list<SMPInstr *>::iterator InstIter = this->Instrs.begin();
	SMPInstr *CurrInst;
	ea_t InstAddr;

	while (InstIter != this->Instrs.end()) {
		CurrInst = (*InstIter);
		InstAddr = CurrInst->GetAddr();
		if ((InstAddr >= FirstAddr) && (InstAddr <= LastAddr)) {
			InstIter = this->Instrs.erase(InstIter);
		}
		else {
			++InstIter;
		}
	}
} // end of SMPFunction::EraseInstRange()

// Figure out the different regions of the stack frame, and find the
//  instructions that allocate and deallocate the local variables space
//  on the stack frame.
// The stack frame info will be used to emit stack
//  annotations when Analyze() reaches the stack allocation
//  instruction that sets aside space for local vars.
// Set the address of the instruction at which these
//  annotations should be emitted. This should normally
//  be an instruction such as:  sub esp,48
//  However, for a function with no local variables at all,
//  we will need to determine which instruction should be
//  considered to be the final instruction of the function
//  prologue and return its address.
// Likewise, we find the stack deallocating instruction in
//  the function epilogue.
void SMPFunction::SetStackFrameInfo(void) {
	bool FoundAllocInstr = false;
	bool FoundDeallocInstr = false;
clc5q's avatar
clc5q committed
	bool DebugFlag = false;
#if SMP_DEBUG_FRAMEFIXUP
	DebugFlag |= (0 == strcmp(".init_proc", this->GetFuncName()));
#endif

	// The sizes of the three regions of the stack frame other than the
	//  return address are stored in the function structure.
	this->LocalVarsSize = this->FuncInfo.frsize;
	this->CalleeSavedRegsSize = this->FuncInfo.frregs;
	this->IncomingArgsSize = this->FuncInfo.argsize;

	// The return address size can be obtained in a machine independent
	//  way by calling get_frame_retsize(). 
	this->RetAddrSize = get_frame_retsize(this->GetFuncInfo());

	// IDA Pro has trouble with functions that do not have any local
	//  variables. Unfortunately, the C library has plenty of these
	//  functions. IDA usually claims that frregs is zero and frsize
	//  is N, when the values should have been reversed. We can attempt
	//  to detect this and fix it.
	bool FrameInfoFixed = this->MDFixFrameInfo();
#if SMP_DEBUG_CONTROLFLOW
	SMP_msg("Returned from MDFixFrameInfo()\n");
#if SMP_DEBUG_FRAMEFIXUP
	if (FrameInfoFixed) {
		SMP_msg("Fixed stack frame size info: %s\n", this->GetFuncName());
		SMPBasicBlock *CurrBlock = this->Blocks.front();
		for (list<list<SMPInstr>::iterator>::iterator CurrInstr = CurrBlock->GetFirstInstr();
			CurrInstr != CurrBlock->GetLastInstr();
			SMP_msg("%s\n", (*CurrInstr)->GetDisasm());
		}
	}
#endif

	// Now, if LocalVarsSize is not zero, we need to find the instruction
	//  in the function prologue that allocates space on the stack for
	//  local vars. This code could be made more robust in the future
	//  by matching LocalVarsSize to the immediate value in the allocation
	//  instruction. However, IDA Pro is sometimes a little off on this
	//  number. **!!**
	if (0 < this->LocalVarsSize) {
		if (DebugFlag) SMP_msg("Searching for alloc and dealloc\n");
		list<SMPInstr *>::iterator InstIter = this->Instrs.begin();
#if SMP_USE_SSA_FNOP_MARKER
		++InstIter;  // skip marker instruction
		for ( ; InstIter != this->Instrs.end(); ++InstIter) {
			SMPInstr *CurrInst = (*InstIter);
			ea_t addr = CurrInst->GetAddr();

			// Keep the most recent instruction in the DeallocInstr
			//  in case we reach the return without seeing a dealloc.
			if (!FoundDeallocInstr) {
				this->LocalVarsDeallocInstr = addr;
			}

			if (!FoundAllocInstr
				&& CurrInst->MDIsFrameAllocInstr()) {
#if SMP_DEBUG_CONTROLFLOW
				SMP_msg("Returned from MDIsFrameAllocInstr()\n");
				this->LocalVarsAllocInstr = addr;
				FoundAllocInstr = true;
				if (DebugFlag) SMP_msg("Found alloc: %s\n", CurrInst->GetDisasm());
				// As soon as we have found the local vars allocation,
				//  we can try to fix incorrect sets of UseFP by IDA.
				// NOTE: We might want to extend this in the future to
				//  handle functions that have no locals.  **!!**
				bool FixedUseFP = MDFixUseFP();
#if SMP_DEBUG_CONTROLFLOW
#if SMP_DEBUG_FRAMEFIXUP
				if (FixedUseFP) {
					SMP_msg("Fixed UseFP in %s\n", this->GetFuncName());
				}
#endif
			}
			else if (FoundAllocInstr) {
				// We can now start searching for the DeallocInstr.
				if (CurrInst->MDIsFrameDeallocInstr(UseFP, this->LocalVarsSize)) {
					// Keep saving the most recent addr that looks
					//  like the DeallocInstr until we reach the
					//  end of the function. Last one to look like
					//  it is used as the DeallocInstr.
#if SMP_DEBUG_CONTROLFLOW
					SMP_msg("Returned from MDIsFrameDeallocInstr()\n");
					this->LocalVarsDeallocInstr = addr;
					FoundDeallocInstr = true;
				}
					if (DebugFlag) SMP_msg("Not dealloc: %s\n", CurrInst->GetDisasm());
		} // end for (list<SMPInstr *>::iterator InstIter ... )
		if (!FoundAllocInstr) {
			// Could not find the frame allocating instruction.  Bad.
			// See if we can find the point at which the stack allocation reaches
			//  a total of FuncInfo.frsize+frregs, regardless of whether it happened by push
			//  instructions or some other means.
			this->LocalVarsAllocInstr = this->FindAllocPoint(this->FuncInfo.frsize + this->FuncInfo.frregs);
#if SMP_DEBUG_CONTROLFLOW
			SMP_msg("Returned from FindAllocPoint()\n");
#if SMP_DEBUG_FRAMEFIXUP
			if (BADADDR == this->LocalVarsAllocInstr) {
				SMP_msg("ERROR: Could not find stack frame allocation in %s\n",
					this->GetFuncName());
				SMP_msg("LocalVarsSize: %d  SavedRegsSize: %d ArgsSize: %d\n",
					LocalVarsSize, CalleeSavedRegsSize, IncomingArgsSize);
			}
			else {
				SMP_msg("FindAllocPoint found %x for function %s\n",
					this->LocalVarsAllocInstr, this->GetFuncName());
			}
#endif
		}
#if SMP_DEBUG_FRAMEFIXUP
		if (!FoundDeallocInstr) {
			// Could not find the frame deallocating instruction.  Bad.
			// Emit diagnostic and use the last instruction in the
			// function.
			SMP_msg("ERROR: Could not find stack frame deallocation in %s\n",
				this->GetFuncName());
		}
#endif
	}
	// else LocalVarsSize was zero, meaning that we need to search 
	//  for the end of the function prologue code and emit stack frame
	//  annotations from that address (i.e. this method returns that
	//  address). We will approximate this by finding the end of the
	//  sequence of PUSH instructions at the beginning of the function.
	//  The last PUSH instruction should be the last callee-save-reg
	//  instruction. We can make this more robust in the future by
	//  making sure that we do not count a PUSH of anything other than
	//  a register. **!!**
	// NOTE: 2nd prologue instr is usually mov ebp,esp
	// THE ASSUMPTION THAT WE HAVE ONLY PUSH INSTRUCTIONS BEFORE
	// THE ALLOCATING INSTR IS ONLY TRUE WHEN LOCALVARSSIZE == 0;
	else {
		ea_t SaveAddr = this->FuncInfo.startEA;
		list<SMPInstr *>::iterator InstIter = this->Instrs.begin();
#if SMP_USE_SSA_FNOP_MARKER
		++InstIter;  // skip marker instruction
		for ( ;	InstIter != this->Instrs.end(); ++InstIter) {
			SMPInstr *CurrInst = (*InstIter);
			insn_t CurrCmd = CurrInst->GetCmd();
			ea_t addr = CurrInst->GetAddr();
			if (CurrCmd.itype == NN_push)
				SaveAddr = addr;
			else
				break;
		}
		this->LocalVarsAllocInstr = SaveAddr;
		this->LocalVarsDeallocInstr = 0;
	} // end if (LocalVarsSize > 0) ... else ...

	for (list<SMPInstr *>::iterator InstIter = this->Instrs.begin(); InstIter != this->Instrs.end(); ++InstIter) {
		// We can finally search for stack loads now that UseFP has been fixed by
		//  MDFixUseFP(). Otherwise, we would do this in SMPInstr::Analyze(),
		//  but the UseFP flag is not ready that early.
		(*InstIter)->MDFindLoadFromStack(this->UseFP);

		// Fix up machine dependent quirks in the def and use lists.
		//  This used to be called from within SMPInstr.Analyze(), but info such as UseFP
		//  is not available that early.
		(*InstIter)->MDFixupDefUseLists();
	this->CallsAlloca = this->FindAlloca();

#if SMP_COMPUTE_STACK_GRANULARITY
	// Now, find the boundaries between local variables.
	this->BuildLocalVarTable();
#endif

	// Get callee-saved regs info for remediation use.
	if (FoundAllocInstr) {
		this->MDFindSavedRegs();
	}

	return;
} // end of SMPFunction::SetStackFrameInfo()

// IDA Pro defines the sizes of regions in the stack frame in a way
//  that suits its purposes but not ours. The frsize field of the func_info_t
//  structure measures the distance between the stack pointer and the
//  frame pointer (ESP and EBP in the x86). This region includes some
//  of the callee-saved registers. So, the frregs field only includes
//  the callee-saved registers that are above the frame pointer.
//  x86 standard prologue on gcc/linux:
//    push ebp      ; save old frame pointer
//    mov ebp,esp   ; new frame pointer = current stack pointer
//    push esi      ; callee save reg
//    push edi      ; callee save reg
//    sub esp,34h   ; allocate 52 bytes for local variables
//
//  Notice that EBP acquires its final frame pointer value AFTER the
//  old EBP has been pushed. This means that, of the three callee saved
//  registers, one is above where EBP points and two are below.
//  IDA Pro is concerned with generating readable addressing expressions
//  for items on the stack. None of the callee-saved regs will ever
//  be addressed in the function; they will be dormant until they are popped
//  off the stack in the function epilogue. In order to create readable
//  disassembled code, IDA defines named constant offsets for locals. These
//  offsets are negative values (x86 stack grows downward from EBP toward
//  ESP). When ESP_relative addressing occurs, IDA converts a statement:
//    mov eax,[esp+12]
//  into the statement:
//    mov eax,[esp+3Ch+var_30]
//  Here, 3Ch == 60 decimal is the distance between ESP and EBP, and
//  var_30 is defined to have the value -30h == -48 decimal. So, the
//  "frame size" in IDA Pro is 60 bytes, and a certain local can be
//  addressed in ESP-relative manner as shown, or as [ebp+var_30] for
//  EBP-relative addressing. The interactive IDA user can then edit
//  the name var_30 to something mnemonic, such as "virus_size", and IDA
//  will replace all occurrences with the new name, so that code references
//  automatically become [ebp+virus_size]. As the user proceeds
//  interactively, he eventually produces very understandable code.
// This all makes sense for producing readable assembly text. However,
//  our analyses have a compiler perspective as well as a memory access
//  defense perspective. SMP distinguishes between callee saved regs,
//  which should not be overwritten in the function body, and local
//  variables, which can be written. We view the stack frame in logical
//  pieces: here are the saved regs, here are the locals, here is the
//  return address, etc. We don't care which direction from EBP the
//  callee-saved registers lie; we don't want to lump them in with the
//  local variables. We also don't like the fact that IDA Pro will take
//  the function prologue code shown above and declare frregs=4 and
//  frsize=60, because frsize no longer matches the stack allocation
//  statement sub esp,34h == sub esp,52. We prefer frsize=52 and frregs=12.
// So, the task of this function is to fix these stack sizes in our
//  private data members for the function, while leaving the IDA database
//  alone because IDA needs to maintain its own definitions of these
//  variables.
// Fixing means we will update the data members LocalVarsSize and
//  CalleeSavedRegsSize.
// NOTE: This function is both machine dependent and platform dependent.
//  The prologue and epilogue code generated by gcc-linux is as discussed
//  above, while on Visual Studio and other Windows x86 compilers, the
//  saving of registers other than EBP happens AFTER local stack allocation.
//  A Windows version of the function would expect to see the pushing
//  of ESI and EDI AFTER the sub esp,34h statement.
bool SMPFunction::MDFixFrameInfo(void) {
	int SavedRegsSize = 0;
	int OtherPushesSize = 0;  // besides callee-saved regs
	int NewLocalsSize = 0;
	int OldFrameTotal = this->CalleeSavedRegsSize + this->LocalVarsSize;
	bool Changed = false;
	bool DebugFlag = (0 == strcmp("__libc_csu_init", this->GetFuncName()));

	// Iterate through the first basic block in the function. If we find
	//  a frame allocating Instr in it, then we have local vars. If not,
	//  we don't, and LocalVarsSize should have been zero. Count the callee
	//  register saves leading up to the local allocation. Set data members
	//  according to what we found if the values of the data members would
	//  change.
	SMPBasicBlock *CurrBlock = this->Blocks.front();
	list<SMPInstr *>::iterator CurrIter = CurrBlock->GetFirstInstr();
#if SMP_USE_SSA_FNOP_MARKER
	++CurrIter;  // skip marker instruction
	for ( ; CurrIter != CurrBlock->GetLastInstr(); ++CurrIter) {
		SMPInstr *CurrInstr = (*CurrIter);
		if (CurrInstr->MDIsPushInstr()) {
			// We will make the gcc-linux assumption that a PUSH in
			//  the first basic block, prior to the stack allocating
			//  instruction, is a callee register save. To make this
			//  more robust, we ensure that the register is from
			//  the callee saved group of registers, and that it has
			//  not been defined thus far in the function (else it might
			//  be a push of an outgoing argument to a call that happens
			//  in the first block when there are no locals). **!!!!**
			if (CurrInstr->MDUsesCalleeSavedReg()
				&& !CurrInstr->HasSourceMemoryOperand()) {
				SavedRegsSize += 4; // **!!** should check the size
				if (DebugFlag) SMP_msg("libc_csu_init SavedRegsSize: %d  %s\n", SavedRegsSize,
					CurrInstr->GetDisasm());
			}
			else {
				// Pushes of outgoing args can be scheduled so that
				//  they are mixed with the pushes of callee saved regs.
				OtherPushesSize += 4;
				if (DebugFlag) SMP_msg("libc_csu_init OtherPushesSize: %d  %s\n", OtherPushesSize,
					CurrInstr->GetDisasm());
			}
		}
		else if (CurrInstr->MDIsFrameAllocInstr()) {
			if (DebugFlag) SMP_msg("libc_csu_init allocinstr: %s\n", CurrInstr->GetDisasm());
			SavedRegsSize += OtherPushesSize;
			// Get the size being allocated.
			set<DefOrUse, LessDefUse>::iterator CurrUse;
			for (CurrUse = CurrInstr->GetFirstUse(); CurrUse != CurrInstr->GetLastUse(); ++CurrUse) {
				// Find the immediate operand.
				if (o_imm == CurrUse->GetOp().type) {
					// Get its value into LocalVarsSize.
					long AllocValue = (signed long) CurrUse->GetOp().value;
					// One compiler might have sub esp,24 and another
					//  might have add esp,-24. Take the absolute value.
					if (0 > AllocValue)
						AllocValue = -AllocValue;
					if (AllocValue != (long) this->LocalVarsSize) {
						Changed = true;
#if SMP_DEBUG_FRAMEFIXUP
						if (AllocValue + SavedRegsSize != OldFrameTotal)
							SMP_msg("Total frame size changed: %s\n", this->GetFuncName());
#endif
						this->LocalVarsSize = (asize_t) AllocValue;
						this->CalleeSavedRegsSize = (ushort) SavedRegsSize;
						NewLocalsSize = this->LocalVarsSize;
					}
					else { // Old value was correct; no change.
						NewLocalsSize = this->LocalVarsSize;
						if (SavedRegsSize != this->CalleeSavedRegsSize) {
							this->CalleeSavedRegsSize = (ushort) SavedRegsSize;
							Changed = true;
#if SMP_DEBUG_FRAMEFIXUP
							SMP_msg("Only callee regs size changed: %s\n", this->GetFuncName());
#endif
						}
					}
				} // end if (o_imm == ...)
			} // end for all uses
			break; // After frame allocation instr, we are done
		} // end if (push) .. elsif frame allocating instr
	} // end for all instructions in the first basic block

	// If we did not find an allocating instruction, see if it would keep
	//  the total size the same to set LocalVarsSize to 0 and to set
	//  CalleeSavedRegsSize to SavedRegsSize. If so, do it. If not, we
	//  might be better off to leave the numbers alone.
	if (!Changed && (NewLocalsSize == 0)) {
		if (DebugFlag) SMP_msg("libc_csu_init OldFrameTotal: %d \n", OldFrameTotal);
		if (OldFrameTotal == SavedRegsSize) {
clc5q's avatar
clc5q committed
			this->CalleeSavedRegsSize = (ushort) SavedRegsSize;
			this->LocalVarsSize = 0;
			Changed = true;
		}
#if SMP_DEBUG_FRAMEFIXUP
		else {
			SMP_msg("Could not update frame sizes: %s\n", this->GetFuncName());
		}
#endif
	}

#if SMP_DEBUG_FRAMEFIXUP
	if ((0 < OtherPushesSize) && (0 < NewLocalsSize))
		SMP_msg("Extra pushes found of size %d in %s\n", OtherPushesSize,
			this->GetFuncName());
#endif

	return Changed;
} // end of SMPFunction::MDFixFrameInfo()

// Some functions have difficult to find stack allocations. For example, in some
//  version of glibc, strpbrk() zeroes out register ECX and then pushes it more than
//  100 times in order to allocate zero-ed out local vars space for a character translation
//  table. We will use the stack pointer analysis of IDA to find out if there is a point
//  in the first basic block at which the stack pointer reaches the allocation total
//  that IDA is expecting for the local vars region.
// If so, we return the address of the instruction at which ESP reaches its value, else
//  we return BADADDR.
ea_t SMPFunction::FindAllocPoint(asize_t OriginalLocSize) {
	sval_t TargetSize = - ((sval_t) OriginalLocSize);  // negate; stack grows down 

#if SMP_DEBUG_FRAMEFIXUP
	bool DebugFlag = (0 == strcmp("_dl_runtime_resolve", this->GetFuncName()));
		SMP_msg("%s OriginalLocSize: %d\n", this->GetFuncName(), OriginalLocSize);
	if (this->AnalyzedSP) {
		// Limit our analysis to the first basic block in the function.
		list<SMPInstr *>::iterator InstIter = this->Instrs.begin();
#if SMP_USE_SSA_FNOP_MARKER
		++InstIter;  // skip marker instruction
		for ( ; InstIter != this->Instrs.end(); ++InstIter) {
			SMPInstr *CurrInst = (*InstIter);
			ea_t addr = CurrInst->GetAddr();
			// get_spd() returns a cumulative delta of ESP
			sval_t sp_delta = get_spd(this->GetFuncInfo(), addr);
#if SMP_DEBUG_FRAMEFIXUP
				SMP_msg("%s delta: %d at %x\n", this->GetFuncName(), sp_delta, addr);
			if (sp_delta == TargetSize) { // <= instead of == here?  **!!**
				// Previous instruction hit the frame size.
				if (InstIter == this->Instrs.begin()) {
					return BADADDR;  // cannot back up from first instruction
				}
				else {
					ea_t PrevAddr = (*(--InstIter))->GetAddr();
#if SMP_USE_SSA_FNOP_MARKER
					if ((*(this->Instrs.begin()))->GetAddr() == PrevAddr)
						return BADADDR;  // don't return marker instruction
					else
						return PrevAddr;
#else
					return PrevAddr;
#endif
			if (CurrInst->IsLastInBlock()) {
				// It could be that the current instruction will cause the stack pointer
				//  delta to reach the TargetSize. sp_delta is not updated until after the
				//  current instruction, so we need to look ahead one instruction if the
				//  current block falls through. On the other hand, if the current block
				//  ends with a jump or return, we cannot hit TargetSize.
				if (CurrInst->IsBasicBlockTerminator())
				list<SMPInstr *>::iterator NextInstIter = InstIter;
				++NextInstIter;
				if (NextInstIter == this->Instrs.end())
				sp_delta = get_spd(this->GetFuncInfo(), (*NextInstIter)->GetAddr());
				if (sp_delta == TargetSize) {
					// CurrInst will cause stack pointer delta to hit TargetSize.
					return addr;
				}
				else {
					return BADADDR;
				}
			} // end if LastInBlock
		} // end for all instructions
	} // end if (this->AnalyzedSP)
#if SMP_DEBUG_FRAMEFIXUP
	else {
		SMP_msg("AnalyzedSP is false for %s\n", this->GetFuncName());
	}
#endif
	return BADADDR;
} // end of SMPFunction::FindAllocPoint()

// IDA Pro is sometimes confused by a function that uses the frame pointer
//  register for other purposes. For the x86, a function that uses EBP
//  as a frame pointer would begin with: push ebp; mov ebp,esp to save
//  the old value of EBP and give it a new value as a frame pointer. The
//  allocation of local variable space would have to come AFTER the move
//  instruction. A function that begins: push ebp; push esi; sub esp,24
//  is obviously not using EBP as a frame pointer. IDA is apparently
//  confused by the push ebp instruction being the first instruction
//  in the function. We will reset UseFP to false in this case.
// The inverse problem happens with a function that begins with instructions
//  other than push ebp; mov ebp,esp; ... etc. but eventually has those
//  instructions in the first basic block. For example, a C compiler generates
//  for the first block of main():
//    lea ecx,[esp+arg0]
//    and esp, 0xfffffff0
//    push dword ptr [ecx-4]
//    push ebp
//    mov ebp,esp
//    push ecx
//    sub esp,<framesize>
//
//  This function is obviously using EBP as a frame pointer, but IDA Pro marks
//  the function as not using a frame pointer. We will reset UseFP to true in
//  this case.