Skip to content
Snippets Groups Projects
SMPDataFlowAnalysis.cpp 72.6 KiB
Newer Older
	// Find all upwardly exposed operands and killed operands in this block.
	list<list<SMPInstr>::iterator>::iterator CurrIter;
	for (CurrIter = this->Instrs.begin(); CurrIter != this->Instrs.end(); ++CurrIter) {
		list<SMPInstr>::iterator CurrInst = *CurrIter;

		// Dataflow equation for upward exposed variables: If a variable has not been
		//  killed yet in this block, starting from the top of the block, and it is used
		//  in the current instruction, then it is upwardly exposed.
		size_t limit = CurrInst->NumUses();
		for (size_t index = 0; index < limit; ++index) {
			if (this->MDAlreadyKilled(CurrInst->GetUse(index)))
				this->UpExposedSet.insert(CurrInst->GetUse(index));
		}
		// Dataflow equation for killed variables: If a variable is defined in any
		//  instruction in the block, it is killed by this block (i.e. prior definitions
		//  of that variable will not make it through the block).
		limit = CurrInst->NumDefs();
		for (size_t index = 0; index < limit; ++index) {
			this->KillSet.insert(CurrInst->GetDef(index));
		}
	} // end for all instrs in block
	this->IsLiveInStale = true;  // Would need to compute LiveInSet for first time
	return;
} // end of SMPBasicBlock::InitKilledExposed()

// Return an iterator for the beginning of the LiveInSet. If the set is stale,
//  recompute it first.
set<op_t, LessOp>::iterator SMPBasicBlock::GetFirstLiveIn(void) {
	if (this->IsLiveInStale) {
		// Dataflow equation: A variable is live-in to this block if it
		//  is upwardly exposed from this block, or if it passes through
		//  the block unchanged (i.e. it is not killed and is live out).
		this->LiveInSet.clear();
		set<op_t, LessOp>::iterator OutIter;
		this->LiveInSet.insert(this->UpExposedSet.begin(), this->UpExposedSet.end());
		for (OutIter = this->LiveOutSet.begin(); OutIter != this->LiveOutSet.end(); ++OutIter) {
			if (KillSet.end() != this->KillSet.find(*OutIter)) // Found live out but not killed
				this->LiveInSet.insert(*OutIter);
		}
		this->IsLiveInStale = false;
	}
	return this->LiveInSet.begin();
} // end of SMPBasicBlock::GetFirstLiveIn()

// Get termination iterator marker for the LiveIn set, for use by predecessors.
set<op_t, LessOp>::iterator SMPBasicBlock::GetLastLiveIn(void) {
	// Does not matter if it is stale or not; end marker is the same
	return this->LiveInSet.end();
}

// Update the LiveOut set for the block.
// Return true if it changed, false otherwise.
bool SMPBasicBlock::UpdateLiveOut(void) {
	bool changed = false;
	set<op_t, LessOp> OldLiveOut(this->LiveOutSet); // save copy of old LiveOutSet
	this->LiveOutSet.clear();  // Clear it and rebuild it
	// Dataflow equation for LiveOutSet: If a variable is live-in for any successor
	//  block, it is live out for this block.
	list<list<SMPBasicBlock>::iterator>::iterator SuccIter;
	for (SuccIter = this->Successors.begin(); SuccIter != this->Successors.end(); ++SuccIter) {
		set<op_t, LessOp>::iterator InSuccIter;
		for (InSuccIter = (*SuccIter)->GetFirstLiveIn(); InSuccIter != (*SuccIter)->GetLastLiveIn(); ++InSuccIter) {
			this->LiveOutSet.insert(*InSuccIter);
		}
	}

	// Only remaining question: Did the LiveOutSet change?
	// Short cut: If the set cardinality changed, then the set changed.
	if (this->LiveOutSet.size() != OldLiveOut.size()) {
		changed = true;
	}
	else { // Same # of elements; move through in lockstep and compare.
		set<op_t, LessOp>::iterator NewIter = this->LiveOutSet.begin();
		set<op_t, LessOp>::iterator OldIter = OldLiveOut.begin();
		set<op_t, LessOp>::value_compare OpComp = OldLiveOut.value_comp(); // LessOp()
		while (OldIter != OldLiveOut.end()) { // both iters terminate simultaneously
			if (OpComp(*OldIter, *NewIter) || OpComp(*NewIter, *OldIter)) {
				changed = true;
				break;
			}
			++OldIter;
			++NewIter;
		}
	}

	if (changed)
		this->IsLiveInStale = true;

	OldLiveOut.clear();
	return changed;
} // end of SMPBasicBlock::UpdateLiveOut()

clc5q's avatar
clc5q committed
// *****************************************************************
// Class SMPFunction
// *****************************************************************

// Constructor
SMPFunction::SMPFunction(func_t *Info) {
	this->FuncInfo = Info;
	this->IndirectCalls = false;
	this->SharedChunks = false;
clc5q's avatar
clc5q committed
	return;
}

// Figure out the different regions of the stack frame, and find the
//  instructions that allocate and deallocate the local variables space
//  on the stack frame.
// The stack frame info will be used to emit stack
//  annotations when Analyze() reaches the stack allocation
//  instruction that sets aside space for local vars.
// Set the address of the instruction at which these
//  annotations should be emitted. This should normally
//  be an instruction such as:  sub esp,48
//  However, for a function with no local variables at all,
//  we will need to determine which instruction should be
//  considered to be the final instruction of the function
//  prologue and return its address.
// Likewise, we find the stack deallocating instruction in
//  the function epilogue.
void SMPFunction::SetStackFrameInfo(void) {
	bool FoundAllocInstr = false;
	bool FoundDeallocInstr = false;

	// The sizes of the three regions of the stack frame other than the
	//  return address are stored in the function structure.
	this->LocalVarsSize = this->FuncInfo->frsize;
	this->CalleeSavedRegsSize = this->FuncInfo->frregs;
	this->IncomingArgsSize = this->FuncInfo->argsize;

	// The return address size can be obtained in a machine independent
	//  way by calling get_frame_retsize(). 
	this->RetAddrSize = get_frame_retsize(this->FuncInfo);

	// IDA Pro has trouble with functions that do not have any local
	//  variables. Unfortunately, the C library has plenty of these
	//  functions. IDA usually claims that frregs is zero and frsize
	//  is N, when the values should have been reversed. We can attempt
	//  to detect this and fix it.
	bool FrameInfoFixed = this->MDFixFrameInfo();

#if SMP_DEBUG_FRAMEFIXUP
	if (FrameInfoFixed) {
		msg("Fixed stack frame size info: %s\n", this->FuncName);
		SMPBasicBlock CurrBlock = this->Blocks.front();
		msg("First basic block:\n");
		for (list<list<SMPInstr>::iterator>::iterator CurrInstr = CurrBlock.GetFirstInstr();
clc5q's avatar
clc5q committed
			CurrInstr != CurrBlock.GetLastInstr();
			++CurrInstr) {
			msg("%s\n", (*CurrInstr)->GetDisasm());
clc5q's avatar
clc5q committed
		}
	}
#endif

	// Now, if LocalVarsSize is not zero, we need to find the instruction
	//  in the function prologue that allocates space on the stack for
	//  local vars. This code could be made more robust in the future
	//  by matching LocalVarsSize to the immediate value in the allocation
	//  instruction. However, IDA Pro is sometimes a little off on this
clc5q's avatar
clc5q committed
	if (0 < this->LocalVarsSize) {
		for (list<SMPInstr>::iterator CurrInstr = this->Instrs.begin();
			CurrInstr != this->Instrs.end();
			++CurrInstr) {
			ea_t addr = CurrInstr->GetAddr();

			// Keep the most recent instruction in the DeallocInstr
			//  in case we reach the return without seeing a dealloc.
			if (!FoundDeallocInstr) {
				this->LocalVarsDeallocInstr = addr;
			}

			if (!FoundAllocInstr
				&& CurrInstr->MDIsFrameAllocInstr()) {
clc5q's avatar
clc5q committed
				this->LocalVarsAllocInstr = addr;
				FoundAllocInstr = true;
				// As soon as we have found the local vars allocation,
				//  we can try to fix incorrect sets of UseFP by IDA.
				// NOTE: We might want to extend this in the future to
				//  handle functions that have no locals.  **!!**
				bool FixedUseFP = MDFixUseFP();
#if SMP_DEBUG_FRAMEFIXUP
				if (FixedUseFP) {
					msg("Fixed UseFP in %s\n", this->FuncName);
				}
#endif
clc5q's avatar
clc5q committed
			}
			else if (FoundAllocInstr) {
				// We can now start searching for the DeallocInstr.
				if (CurrInstr->MDIsFrameDeallocInstr(UseFP, this->LocalVarsSize)) {
					// Keep saving the most recent addr that looks
					//  like the DeallocInstr until we reach the
					//  end of the function. Last one to look like
					//  it is used as the DeallocInstr.
					this->LocalVarsDeallocInstr = addr;
					FoundDeallocInstr = true;
				}
			}
		} // end for (list<SMPInstr>::iterator CurrInstr ... )
		if (!FoundAllocInstr) {
			// Could not find the frame allocating instruction.  Bad.
			// Emit diagnostic and use the first instruction in the
			//  function as a pseudo-allocation instruction to emit
			//  some stack frame info (return address, etc.)
			this->LocalVarsAllocInstr = this->FindAllocPoint(this->FuncInfo->frsize);
#if SMP_DEBUG_FRAMEFIXUP
			if (BADADDR == this->LocalVarsAllocInstr) {
				msg("ERROR: Could not find stack frame allocation in %s\n",
					FuncName);
				msg("LocalVarsSize: %d  SavedRegsSize: %d ArgsSize: %d\n",
					LocalVarsSize, CalleeSavedRegsSize, IncomingArgsSize);
			}
			else {
				msg("FindAllocPoint found %x for function %s\n",
					this->LocalVarsAllocInstr, this->GetFuncName());
			}
#endif
clc5q's avatar
clc5q committed
		}
clc5q's avatar
clc5q committed
		if (!FoundDeallocInstr) {
			// Could not find the frame deallocating instruction.  Bad.
			// Emit diagnostic and use the last instruction in the
			// function.
			msg("ERROR: Could not find stack frame deallocation in %s\n",
				FuncName);
		}
clc5q's avatar
clc5q committed
	}
	// else LocalVarsSize was zero, meaning that we need to search 
	//  for the end of the function prologue code and emit stack frame
	//  annotations from that address (i.e. this method returns that
	//  address). We will approximate this by finding the end of the
	//  sequence of PUSH instructions at the beginning of the function.
	//  The last PUSH instruction should be the last callee-save-reg
	//  instruction. We can make this more robust in the future by
	//  making sure that we do not count a PUSH of anything other than
	//  a register. **!!**
	// NOTE: 2nd prologue instr is usually mov ebp,esp
	// THE ASSUMPTION THAT WE HAVE ONLY PUSH INSTRUCTIONS BEFORE
	// THE ALLOCATING INSTR IS ONLY TRUE WHEN LOCALVARSSIZE == 0;
	else {
		ea_t SaveAddr = FuncInfo->startEA;
		for (list<SMPInstr>::iterator CurrInstr = this->Instrs.begin();
			CurrInstr != this->Instrs.end();
			++CurrInstr) {
			insn_t CurrCmd = CurrInstr->GetCmd();
			ea_t addr = CurrInstr->GetAddr();
			if (CurrCmd.itype == NN_push)
				SaveAddr = addr;
			else
				break;
		}
		this->LocalVarsAllocInstr = SaveAddr;
		this->LocalVarsDeallocInstr = 0;
	} // end if (LocalVarsSize > 0) ... else ...

#if 0
	// Now we need to do the corresponding operations from the
	//  end of the function to find the DeallocInstr in the
	//  function epilogue. Because there is no addition to the
	//  stack pointer to deallocate the local vars region, the
	//  function epilogue will consist of (optional) pops of
	//  callee-saved regs, followed by the return instruction.
	//  Working backwards, we should find a return and then 
	//  stop when we do not find any more pops.
	if (0 >= LocalVarsSize) {
		this->LocalVarsDeallocInstr = NULL;
	}
	else {
		SaveAddr = FuncInfo->endEA - 1;
		bool FoundRet = false;
		do {
			ea_t addr = get_item_head(SaveAddr);
			flags_t InstrFlags = getFlags(addr);
			if (isCode(addr) && isHead(addr)) {
				ua_ana0(addr);
				if (!FoundRet) { // Just starting out.
					if (MDIsReturnInstr(cmd)) {
						FoundRet = true;
						SaveAddr = addr - 1;
					}
					else {
						msg("ERROR: Last instruction not a return.\n");
					}
				}
				else { // Should be 0 or more POPs before the return.
					if (MDIsPopInstr(cmd)) {
						SaveAddr = addr - 1;
					}
					else if (FrameAllocInstr(cmd, this->LocalVarsSize)) {
						this->LocalVarsDeallocInstr = addr;
					}
					else {
						msg("ERROR: Frame deallocation not prior to POPs.\n");
						this->LocalVarsDeallocInstr = SaveAddr + 1;
					}
				} // end if (!FoundRet) ... else ...
			}
			else {
				--SaveAddr;
			} // end if (isCode(addr) && isHead(addr))
		} while (NULL == this->LocalVarsDeallocInstr);
	} // end if (0 >= this->LocalVarsSize)
#endif // 0
	return;
} // end of SMPFunction::SetStackFrameInfo()

// IDA Pro defines the sizes of regions in the stack frame in a way
//  that suits its purposes but not ours. the frsize field of the func_info_t
//  structure measures the distance between the stack pointer and the
//  frame pointer (ESP and EBP in the x86). This region includes some
//  of the callee-saved registers. So, the frregs field only includes
//  the callee-saved registers that are above the frame pointer.
//  x86 standard prologue on gcc/linux:
//    push ebp      ; save old frame pointer
//    mov ebp,esp   ; new frame pointer = current stack pointer
//    push esi      ; callee save reg
//    push edi      ; callee save reg
//    sub esp,34h   ; allocate 52 bytes for local variables
//
//  Notice that EBP acquires its final frame pointer value AFTER the
//  old EBP has been pushed. This means that, of the three callee saved
//  registers, one is above where EBP points and two are below.
//  IDA Pro is concerned with generating readable addressing expressions
//  for items on the stack. None of the callee-saved regs will ever
//  be addressed in the function; they will be dormant until they are popped
//  off the stack in the function epilogue. In order to create readable
//  disassembled code, IDA defines named constant offsets for locals. These
//  offsets are negative values (x86 stack grows downward from EBP toward
//  ESP). When ESP_relative addressing occurs, IDA converts a statement:
//    mov eax,[esp+12]
//  into the statement:
//    mov eax,[esp+3Ch+var_30]
//  Here, 3Ch == 60 decimal is the distance between ESP and EBP, and
//  var_30 is defined to ahve the value -30h == -48 decimal. So, the
//  "frame size" in IDA Pro is 60 bytes, and a certain local can be
//  addressed in ESP-relative manner as shown, or as [ebp+var_30] for
//  EBP-relative addressing. The interactive IDA user can then edit
//  the name var_30 to something mnemonic, such as "virus_size", and IDA
//  will replace all occurrences with the new name, so that code references
//  automatically become [ebp+virus_size]. As the user proceeds
//  interactively, he eventually produces very understandable code.
// This all makes sense for producing readable assembly text. However,
//  our analyses have a compiler perspective as well as a memory access
//  defense perspective. SMP distinguishes between callee saved regs,
//  which should not be overwritten in the function body, and local
//  variables, which can be written. We view the stack frame in logical
//  pieces: here are the saved regs, here are the locals, here is the
//  return address, etc. We don't care which direction from EBP the
//  callee-saved registers lie; we don't want to lump them in with the
//  local variables. We also don't like the fact that IDA Pro will take
//  the function prologue code shown above and declare frregs=4 and
//  frsize=60, because frsize no longer matches the stack allocation
//  statement sub esp,34h == sub esp,52. We prefer frsize=52 and frregs=12.
// So, the task of this function is to fix these stack sizes in our
//  private data members for the function, while leaving the IDA database
//  alone because IDA needs to maintain its own definitions of these
//  variables.
clc5q's avatar
clc5q committed
// Fixing means we will update the data members LocalVarsSize and
//  CalleeSavedRegsSize.
// NOTE: This function is both machine dependent and platform dependent.
//  The prologue and epilogue code generated by gcc-linux is as discussed
//  above, while on Visual Studio and other Windows x86 compilers, the
//  saving of registers other than EBP happens AFTER local stack allocation.
//  A Windows version of the function would expect to see the pushing
//  of ESI and EDI AFTER the sub esp,34h statement.
clc5q's avatar
clc5q committed
bool SMPFunction::MDFixFrameInfo(void) {
	int SavedRegsSize = 0;
	int OtherPushesSize = 0;  // besides callee-saved regs
	int NewLocalsSize = 0;
	int OldFrameTotal = this->CalleeSavedRegsSize + this->LocalVarsSize;
	bool Changed = false;
clc5q's avatar
clc5q committed

	// Iterate through the first basic block in the function. If we find
	//  a frame allocating Instr in it, then we have local vars. If not,
	//  we don't, and LocalVarsSize should have been zero. Count the callee
	//  register saves leading up to the local allocation. Set data members
	//  according to what we found if the values of the data members would
	//  change.
clc5q's avatar
clc5q committed
	SMPBasicBlock CurrBlock = this->Blocks.front();
	for (list<list<SMPInstr>::iterator>::iterator CurrIter = CurrBlock.GetFirstInstr();
		CurrIter != CurrBlock.GetLastInstr();
		++CurrIter) {
		list<SMPInstr>::iterator CurrInstr = *CurrIter;
		if (CurrInstr->MDIsPushInstr()) {
			// We will make the gcc-linux assumption that a PUSH in
			//  the first basic block, prior to the stack allocating
			//  instruction, is a callee register save. To make this
			//  more robust, we ensure that the register is from
			//  the callee saved group of registers, and that it has
			//  not been defined thus far in the function (else it might
			//  be a push of an outgoing argument to a call that happens
			//  in the first block when there are no locals). **!!!!**
			if (CurrInstr->MDUsesCalleeSavedReg()
				&& !CurrInstr->HasSourceMemoryOperand()) {
				SavedRegsSize += 4; // **!!** should check the size
			}
			else {
				// Pushes of outgoing args can be scheduled so that
				//  they are mixed with the pushes of callee saved regs.
				OtherPushesSize += 4;
			}
		}
		else if (CurrInstr->MDIsFrameAllocInstr()) {
			SavedRegsSize += OtherPushesSize;
			// Get the size being allocated.
			for (size_t index = 0; index < CurrInstr->NumUses(); ++index) {
				// Find the immediate operand.
				if (o_imm == CurrInstr->GetUse(index).type) {
					// Get its value into LocalVarsSize.
					long AllocValue = (signed long) CurrInstr->GetUse(index).value;
					// One compiler might have sub esp,24 and another
					//  might have add esp,-24. Take the absolute value.
					if (0 > AllocValue)
						AllocValue = -AllocValue;
					if (AllocValue != (long) this->LocalVarsSize) {
						Changed = true;
#if SMP_DEBUG_FRAMEFIXUP
						if (AllocValue + SavedRegsSize != OldFrameTotal)
							msg("Total frame size changed: %s\n", this->FuncName);
#endif
						this->LocalVarsSize = (asize_t) AllocValue;
						this->CalleeSavedRegsSize = (ushort) SavedRegsSize;
						NewLocalsSize = this->LocalVarsSize;
					}
					else { // Old value was correct; no change.
						NewLocalsSize = this->LocalVarsSize;
						if (SavedRegsSize != this->CalleeSavedRegsSize) {
							this->CalleeSavedRegsSize = (ushort) SavedRegsSize;
							Changed = true;
#if SMP_DEBUG_FRAMEFIXUP
							msg("Only callee regs size changed: %s\n", this->FuncName);
#endif
						}
					}
				} // end if (o_imm == ...)
			} // end for all uses
			break; // After frame allocation instr, we are done
		} // end if (push) .. elsif frame allocating instr
	} // end for all instructions in the first basic block

	// If we did not find an allocating instruction, see if it would keep
	//  the total size the same to set LocalVarsSize to 0 and to set
	//  CalleeSavedRegsSize to SavedRegsSize. If so, do it. If not, we
	//  might be better off to leave the numbers alone.
	if (!Changed && (NewLocalsSize == 0)) {
		if (OldFrameTotal == SavedRegsSize) {
			this->CalleeSavedRegsSize = SavedRegsSize;
			this->LocalVarsSize = 0;
			Changed = true;
		}
#if SMP_DEBUG_FRAMEFIXUP
		else {
			msg("Could not update frame sizes: %s\n", this->FuncName);
		}
#endif
clc5q's avatar
clc5q committed
	}

#if SMP_DEBUG_FRAMEFIXUP
	if ((0 < OtherPushesSize) && (0 < NewLocalsSize))
		msg("Extra pushes found of size %d in %s\n", OtherPushesSize,
			this->FuncName);
#endif

	return Changed;
clc5q's avatar
clc5q committed
} // end of SMPFunction::MDFixFrameInfo()

// Some functions have difficult to find stack allocations. For example, in some
//  version of glibc, strpbrk() zeroes out register ECX and then pushes it more than
//  100 times in order to allocate zero-ed out local vars space for a character translation
//  table. We will use the stack pointer analysis of IDA to find out if there is a point
//  in the first basic block at which the stack pointer reaches the allocation total
//  that IDA is expecting for the local vars region.
// If so, we return the address of the instruction at which ESP reaches its value, else
//  we return BADADDR.
ea_t SMPFunction::FindAllocPoint(asize_t OriginalLocSize) {
	bool DebugFlag = (0 == strncmp("strpbrk", this->GetFuncName(), 7));
	sval_t TargetSize = - ((sval_t) OriginalLocSize);  // negate; stack grows down 

#if SMP_DEBUG_FRAMEFIXUP
	if (DebugFlag)
		msg("strpbrk OriginalLocSize: %d\n", OriginalLocSize);
#endif

	if (this->FuncInfo->analyzed_sp()) {
		// Limit our analysis to the first basic block in the function.
		list<SMPInstr>::iterator TempIter = *(--(this->Blocks.front().GetLastInstr()));
		ea_t AddrLimit = TempIter->GetAddr();
		for (list<list<SMPInstr>::iterator>::iterator CurrIter = this->Blocks.front().GetFirstInstr();
			CurrIter != this->Blocks.front().GetLastInstr();
			++CurrIter) {
				list<SMPInstr>::iterator CurrInstr = *CurrIter;
				ea_t addr = CurrInstr->GetAddr();
				// get_spd() returns a cumulative delta of ESP
				sval_t sp_delta = get_spd(this->FuncInfo, addr);
#if SMP_DEBUG_FRAMEFIXUP
				if (DebugFlag)
					msg("strpbrk delta: %d at %x\n", sp_delta, addr);
#endif
				if (sp_delta == TargetSize) {
					// Previous instruction hit the frame size.
					if (CurrInstr == *(this->Blocks.front().GetFirstInstr())) {
						return BADADDR;  // cannot back up from first instruction
					}
					else {
						return (--CurrInstr)->GetAddr();
					}
				}
		}
		// SP delta is marked at the beginning of an instruction to show the SP
		//  after the effects of the previous instruction. Maybe the last instruction
		//  is the first time the SP achieves its desired value, which will not be shown
		//  until the first instruction of the next basic block if it just falls through.
		//  We can compute the delta AFTER the last instruction using get_spd+get_sp_delta.
		list<SMPInstr>::iterator FinalInstr = *(--(this->Blocks.front().GetLastInstr()));
		ea_t FinalAddr = FinalInstr->GetAddr();
		sval_t FinalDelta = get_spd(this->FuncInfo, FinalAddr);
		if (!FinalInstr->IsBasicBlockTerminator()) {
			// Special case. The basic block does not terminate with a branch or
			//  return, but falls through to the start of a loop, most likely.
			//  Thus, the last instruction CAN increase the sp_delta, unlike
			//  a jump or branch, and the sp_delta would not hit the target until
			//  the first instruction in the second block. We can examine the 
			//  effect on the stack pointer of this last instruction to see if it
			//  causes the SP delta to hit the OriginalLocSize.
			sval_t LastInstrDelta = get_sp_delta(this->FuncInfo, FinalAddr);
			if (TargetSize == (FinalDelta + LastInstrDelta)) {
				// Return very last instruction (don't back up 1 here)
				return FinalAddr;
			}
		}
	} // end if (this->FuncInfo->analyzed_sp())
#if SMP_DEBUG_FRAMEFIXUP
	else {
		msg("analyzed_sp() is false for %s\n", this->GetFuncName());
	}
#endif
	return BADADDR;
} // end of SMPFunction::FindAllocPoint()

// IDA Pro is sometimes confused by a function that uses the frame pointer
//  register for other purposes. For the x86, a function that uses EBP
//  as a frame pointer would begin with: push ebp; mov ebp,esp to save
//  the old value of EBP and give it a new value as a frame pointer. The
//  allocation of local variable space would have to come AFTER the move
//  instruction. A function that begins: push ebp; push esi; sub esp,24
//  is obviously not using EBP as a frame pointer. IDA is apparently
//  confused by the push ebp instruction being the first instruction
//  in the function. We will reset UseFP to false in this case.
// NOTE: This logic should work for both Linux and Windows x86 prologues.
bool SMPFunction::MDFixUseFP(void) {
	list<SMPInstr>::iterator CurrInstr = this->Instrs.begin();
	ea_t addr = CurrInstr->GetAddr();
	if (!UseFP)
		return false;  // Only looking to reset true to false.
	while (addr < this->LocalVarsAllocInstr) {
		size_t DefIndex = 0;
		while (DefIndex < CurrInstr->NumDefs()) {
			if (CurrInstr->GetDef(DefIndex).is_reg(R_bp))
				return false; // EBP got set before locals were allocated
			++DefIndex;
		}
		++CurrInstr;
		addr = CurrInstr->GetAddr();
	}
	// If we found no defs of the frame pointer before the local vars
	//  allocation, then the frame pointer register is not being used
	//  as a frame pointer, just as a general callee-saved register.
	this->UseFP = false;
	return true;
} // end of SMPFunction::MDFixUseFP()

clc5q's avatar
clc5q committed
// Emit the annotations describing the regions of the stack frame.
void SMPFunction::EmitStackFrameAnnotations(FILE *AnnotFile, list<SMPInstr>::iterator Instr) {
	ea_t addr = Instr->GetAddr();

	if (0 < IncomingArgsSize)
		qfprintf(AnnotFile, "%x %d INARGS STACK esp + %d %s \n",
				addr, IncomingArgsSize,
				(LocalVarsSize + CalleeSavedRegsSize + RetAddrSize),
				Instr->GetDisasm());
	if (0 < RetAddrSize)
		qfprintf(AnnotFile, "%x %d MEMORYHOLE STACK esp + %d ReturnAddress \n",
				addr, RetAddrSize, (LocalVarsSize + CalleeSavedRegsSize));
	if (0 < CalleeSavedRegsSize)
		qfprintf(AnnotFile, "%x %d MEMORYHOLE STACK esp + %d CalleeSavedRegs \n",
				addr, CalleeSavedRegsSize, LocalVarsSize);
	if (0 < LocalVarsSize)
		qfprintf(AnnotFile, "%x %d LOCALFRAME STACK esp + %d LocalVars \n",
				addr, LocalVarsSize, 0);
	return;
} // end of SMPFunction::EmitStackFrameAnnotations() 

// Main data flow analysis driver. Goes through the function and
//  fills all objects for instructions, basic blocks, and the function
//  itself.
void SMPFunction::Analyze(void) {
	list<SMPInstr>::iterator FirstInBlock = this->Instrs.end();
	   // For starting a basic block
	list<SMPInstr>::iterator LastInBlock = this->Instrs.end();
	   // Terminating a basic block

#if SMP_DEBUG_CONTROLFLOW
	msg("Entering SMPFunction::Analyze.\n");
#endif

	// Get some basic info from the FuncInfo structure.
	this->Size = this->FuncInfo->endEA - this->FuncInfo->startEA;
	this->UseFP = (0 != (this->FuncInfo->flags & (FUNC_FRAME | FUNC_BOTTOMBP)));
	this->StaticFunc = (0 != (this->FuncInfo->flags & FUNC_STATIC));
	get_func_name(this->FuncInfo->startEA, this->FuncName,
		sizeof(this->FuncName) - 1);

#if SMP_DEBUG_CONTROLFLOW
	msg("SMPFunction::Analyze: got basic info.\n");
#endif

	// Cycle through all chunks that belong to the function.
	func_tail_iterator_t FuncTail(this->FuncInfo);
	size_t ChunkCounter = 0;
	for (bool ChunkOK = FuncTail.main(); ChunkOK; ChunkOK = FuncTail.next()) {
		const area_t &CurrChunk = FuncTail.chunk();
		++ChunkCounter;
		if (1 < ChunkCounter) {
			this->SharedChunks = true;
clc5q's avatar
clc5q committed
#if SMP_DEBUG_CHUNKS
			msg("Found tail chunk for %s at %x\n", this->FuncName, CurrChunk.startEA);
clc5q's avatar
clc5q committed
#endif
clc5q's avatar
clc5q committed
		// Build the instruction and block lists for the function.
		for (ea_t addr = CurrChunk.startEA; addr < CurrChunk.endEA;
			addr = get_item_end(addr)) {
			flags_t InstrFlags = getFlags(addr);
			if (isHead(InstrFlags) && isCode(InstrFlags)) {
				SMPInstr CurrInst = SMPInstr(addr);
				// Fill in the instruction data members.
clc5q's avatar
clc5q committed
				msg("SMPFunction::Analyze: calling CurrInst::Analyze.\n");
clc5q's avatar
clc5q committed
				CurrInst.Analyze();
				if (SMPBinaryDebug) {
					msg("Disasm:  %s \n", CurrInst.GetDisasm());
				}
				if (CurrInst.GetDataFlowType() == INDIR_CALL)
					this->IndirectCalls = true;

				// Before we insert the instruction into the instruction
				//  list, determine if it is a jump target that does not
				//  follow a basic block terminator. This is the special case
				//  of a CASE in a SWITCH that falls through into another
				//  CASE, for example. The first sequence of statements
				//  was not terminated by a C "break;" statement, so it
				//  looks like straight line code, but there is an entry
				//  point at the beginning of the second CASE sequence and
				//  we have to split basic blocks at the entry point.
				if ((FirstInBlock != this->Instrs.end())
					&& CurrInst.IsJumpTarget()) {
#if SMP_DEBUG_CONTROLFLOW
clc5q's avatar
clc5q committed
					msg("SMPFunction::Analyze: hit special jump target case.\n");
clc5q's avatar
clc5q committed
					LastInBlock = --(this->Instrs.end());
					SMPBasicBlock CurrBlock = SMPBasicBlock(FirstInBlock,
						LastInBlock);
					CurrBlock.Analyze();
					// If not the first chunk in the function, it is a shared
					//  tail chunk.
					if (ChunkCounter > 1) {
						CurrBlock.SetShared();
					}
					FirstInBlock = this->Instrs.end();
					LastInBlock = this->Instrs.end();
					this->Blocks.push_back(CurrBlock);
				}

#if SMP_DEBUG_CONTROLFLOW
clc5q's avatar
clc5q committed
		msg("SMPFunction::Analyze: putting CurrInst on list.\n");
clc5q's avatar
clc5q committed
				// Insert instruction at end of list.
				this->Instrs.push_back(CurrInst);
clc5q's avatar
clc5q committed
				// Find basic block leaders and terminators.
				if (FirstInBlock == this->Instrs.end()) {
#if SMP_DEBUG_CONTROLFLOW
clc5q's avatar
clc5q committed
		msg("SMPFunction::Analyze: setting FirstInBlock.\n");
clc5q's avatar
clc5q committed
					FirstInBlock = --(this->Instrs.end());
				}
				if (CurrInst.IsBasicBlockTerminator()) {
#if SMP_DEBUG_CONTROLFLOW
clc5q's avatar
clc5q committed
		msg("SMPFunction::Analyze: found block terminator.\n");
clc5q's avatar
clc5q committed
					LastInBlock = --(this->Instrs.end());
					SMPBasicBlock CurrBlock = SMPBasicBlock(FirstInBlock, LastInBlock);
					CurrBlock.Analyze();
					// If not the first chunk in the function, it is a shared
					//  tail chunk.
					if (ChunkCounter > 1) {
						CurrBlock.SetShared();
					}
					FirstInBlock = this->Instrs.end();
					LastInBlock = this->Instrs.end();
					this->Blocks.push_back(CurrBlock);

					// Is the instruction a branch to a target outside the function? If
					//  so, this function has shared tail chunks.
					if (CurrInst.IsBranchToFarChunk()) {
						this->SharedChunks = true;
					}
clc5q's avatar
clc5q committed
				}
			} // end if (isHead(InstrFlags) && isCode(InstrFlags)
		} // end for (ea_t addr = FuncInfo->startEA; ... )

		// Handle the special case in which a function does not terminate
		//  with a return instruction or any other basic block terminator.
		//  Sometimes IDA Pro sees a call to a NORET function and decides
		//  to not include the dead code after it in the function. That
		//  dead code includes the return instruction, so the function no
		//  longer includes a return instruction and terminates with a CALL.
		if (FirstInBlock != this->Instrs.end()) {
			LastInBlock = --(this->Instrs.end());
			SMPBasicBlock CurrBlock = SMPBasicBlock(FirstInBlock, LastInBlock);
			CurrBlock.Analyze();
			// If not the first chunk in the function, it is a shared
			//  tail chunk.
			if (ChunkCounter > 1) {
				CurrBlock.SetShared();
			}
			FirstInBlock = this->Instrs.end();
			LastInBlock = this->Instrs.end();
			this->Blocks.push_back(CurrBlock);
		}
	} // end for (bool ChunkOK = ...)

	// Set up basic block links and map of instructions to blocks.
	if (!(this->HasSharedChunks())) {
		this->SetLinks();
		this->LiveVariableAnalysis();
	}

clc5q's avatar
clc5q committed
#if SMP_DEBUG_CONTROLFLOW
	msg("SMPFunction::Analyze: set stack frame info.\n");
#endif
	// Figure out the stack frame and related info.
	this->SetStackFrameInfo();

	return;
} // end of SMPFunction::Analyze()

// Link basic blocks to their predecessors and successors, and build the map
//  of instruction addresses to basic blocks.
void SMPFunction::SetLinks(void) {
	list<SMPBasicBlock>::iterator CurrBlock;
#if SMP_DEBUG_DATAFLOW
	msg("SetLinks called for %s\n", this->GetFuncName());
#endif
	// First, set up the map of instructions to basic blocks.
	for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) {
		list<list<SMPInstr>::iterator>::iterator CurrInst;
		for (CurrInst = CurrBlock->GetFirstInstr();
			CurrInst != CurrBlock->GetLastInstr();
			++CurrInst) {
				pair<ea_t, list<SMPBasicBlock>::iterator> MapItem((*CurrInst)->GetAddr(),CurrBlock);
				InstBlockMap.insert(MapItem);
		}
	}

#if SMP_DEBUG_DATAFLOW
	msg("SetLinks finished mapping: %s\n", this->GetFuncName());
#endif
	// Next, set successors of each basic block, also setting up the predecessors in the
	//  process.
	for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) {
		list<SMPInstr>::iterator CurrInst = *(--(CurrBlock->GetLastInstr()));
		// Last instruction in block; set successors
		xrefblk_t CurrXrefs;
		for (bool ok = CurrXrefs.first_from(CurrInst->GetAddr(), XREF_ALL);
			ok;
			ok = CurrXrefs.next_from()) {
				if ((CurrXrefs.to != 0) && (CurrXrefs.iscode)) {
					// Found a code target, with its address in CurrXrefs.to
					map<ea_t, list<SMPBasicBlock>::iterator>::iterator MapEntry;
					MapEntry = this->InstBlockMap.find(CurrXrefs.to);
					if (MapEntry == this->InstBlockMap.end()) {
						msg("WARNING: addr %x not found in map for %s\n", CurrXrefs.to,
							this->GetFuncName());
					}
					else {
						list<SMPBasicBlock>::iterator Target = MapEntry->second;
						// Make target block a successor of current block.
						CurrBlock->LinkToSucc(Target);
						// Make current block a predecessor of target block.
						Target->LinkToPred(CurrBlock);
					}
				}
		} // end for all xrefs
	} // end for all blocks

	return;
} // end of SMPFunction::SetLinks()

// Perform live variable analysis on all blocks in the function.
// See chapter 9 of Cooper/Torczon, Engineering a Compiler, for the algorithm.
void SMPFunction::LiveVariableAnalysis(void) {
	list<SMPBasicBlock>::iterator CurrBlock;
	msg("LiveVariableAnalysis for %s\n", this->GetFuncName());

	for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) {
		// Initialize the Killed and UpwardExposed sets for each block.
		CurrBlock->InitKilledExposed();
	}

	bool changed;
	// Iterate over each block, updating LiveOut sets until no more changes are made.
	// NOTE: Would be more efficient if we computed a reverse post-order list of blocks
	//  and traversed this loop in reverse post-order. **!!**
	do {
		changed = false;
		for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) {
			changed |= CurrBlock->UpdateLiveOut();
		}
	} while (changed);

	return;
} // end of SMPFunction::LiveVariableAnalysis()

// Emit all annotations for the function, including all per-instruction
//  annotations.
clc5q's avatar
clc5q committed
void SMPFunction::EmitAnnotations(FILE *AnnotFile) {
	// Emit annotation for the function as a whole.
	if (this->StaticFunc) {
		qfprintf(AnnotFile,	"%x %d FUNC LOCAL  %s ", FuncInfo->startEA,
			this->Size, this->FuncName);
	}
	else {
		qfprintf(AnnotFile,	"%x %d FUNC GLOBAL %s ", FuncInfo->startEA,
			this->Size, this->FuncName);
	}
	if (this->UseFP) {
		qfprintf(AnnotFile, "USEFP ");
	}
	else {
		qfprintf(AnnotFile, "NOFP ");
	}
	if (this->FuncInfo->does_return()) {
		qfprintf(AnnotFile, "\n");
	}
	else {
		qfprintf(AnnotFile, "NORET \n");
	}

	// Loop through all instructions in the function.
	// Output optimization annotations for those
	//  instructions that do not require full computation
	//  of their memory metadata by the Memory Monitor SDT.
	list<SMPInstr>::iterator CurrInst;
	bool AllocSeen = false; // Reached LocalVarsAllocInstr yet?
	bool DeallocTrigger = false;
	for (CurrInst = Instrs.begin(); CurrInst != Instrs.end(); ++CurrInst) {
		ea_t addr = CurrInst->GetAddr();
		if (this->LocalVarsAllocInstr == addr) {
clc5q's avatar
clc5q committed
			AllocSeen = true;
			this->EmitStackFrameAnnotations(AnnotFile, CurrInst);
clc5q's avatar
clc5q committed
		}
		// If this is the instruction which deallocated space
		//  for local variables, we set a flag to remind us to 
		//  emit an annotation on the next instruction.
		// mmStrata wants the instruction AFTER the
		//  deallocating instruction, so that it processes
		//  the deallocation after it happens. It inserts
		//  instrumentation before an instruction, not
		//  after, so it will insert the deallocating
		//  instrumentation before the first POP of callee-saved regs,
		//  if there are any, or before the return, otherwise.
		if (addr == LocalVarsDeallocInstr) {
			DeallocTrigger = true;
		}
		else if (DeallocTrigger) { // Time for annotation
			qfprintf(AnnotFile,	"%x %d DEALLOC STACK esp - %d %s\n", addr,
				LocalVarsSize, LocalVarsSize, CurrInst->GetDisasm());
clc5q's avatar
clc5q committed
			DeallocTrigger = false;
		}

		CurrInst->EmitAnnotations(this->UseFP, AllocSeen, AnnotFile);
clc5q's avatar
clc5q committed
	}  // end for (ea_t addr = FuncInfo->startEA; ...)
	return;
} // end of SMPFunction::EmitAnnotations()
clc5q's avatar
clc5q committed

// Initialize the DFACategory[] array to define instruction classes
//   for the purposes of data flow analysis.
void InitDFACategory(void) {
	// Default category is 0, not the start or end of a basic block.
	(void) memset(DFACategory, 0, sizeof(DFACategory));

DFACategory[NN_call] = CALL;                // Call Procedure
DFACategory[NN_callfi] = INDIR_CALL;              // Indirect Call Far Procedure
DFACategory[NN_callni] = INDIR_CALL;              // Indirect Call Near Procedure

DFACategory[NN_hlt] = HALT;                 // Halt

DFACategory[NN_int] = CALL;                 // Call to Interrupt Procedure
DFACategory[NN_into] = CALL;                // Call to Interrupt Procedure if Overflow Flag = 1
DFACategory[NN_int3] = CALL;                // Trap to Debugger
DFACategory[NN_iretw] = RETURN;               // Interrupt Return
DFACategory[NN_iret] = RETURN;                // Interrupt Return
DFACategory[NN_iretd] = RETURN;               // Interrupt Return (use32)
DFACategory[NN_iretq] = RETURN;               // Interrupt Return (use64)
DFACategory[NN_ja] = COND_BRANCH;                  // Jump if Above (CF=0 & ZF=0)
DFACategory[NN_jae] = COND_BRANCH;                 // Jump if Above or Equal (CF=0)
DFACategory[NN_jb] = COND_BRANCH;                  // Jump if Below (CF=1)
DFACategory[NN_jbe] = COND_BRANCH;                 // Jump if Below or Equal (CF=1 | ZF=1)
DFACategory[NN_jc] = COND_BRANCH;                  // Jump if Carry (CF=1)
DFACategory[NN_jcxz] = COND_BRANCH;                // Jump if CX is 0
DFACategory[NN_jecxz] = COND_BRANCH;               // Jump if ECX is 0
DFACategory[NN_jrcxz] = COND_BRANCH;               // Jump if RCX is 0
DFACategory[NN_je] = COND_BRANCH;                  // Jump if Equal (ZF=1)
DFACategory[NN_jg] = COND_BRANCH;                  // Jump if Greater (ZF=0 & SF=OF)
DFACategory[NN_jge] = COND_BRANCH;                 // Jump if Greater or Equal (SF=OF)
DFACategory[NN_jl] = COND_BRANCH;                  // Jump if Less (SF!=OF)
DFACategory[NN_jle] = COND_BRANCH;                 // Jump if Less or Equal (ZF=1 | SF!=OF)
DFACategory[NN_jna] = COND_BRANCH;                 // Jump if Not Above (CF=1 | ZF=1)
DFACategory[NN_jnae] = COND_BRANCH;                // Jump if Not Above or Equal (CF=1)
DFACategory[NN_jnb] = COND_BRANCH;                 // Jump if Not Below (CF=0)
DFACategory[NN_jnbe] = COND_BRANCH;                // Jump if Not Below or Equal (CF=0 & ZF=0)
DFACategory[NN_jnc] = COND_BRANCH;                 // Jump if Not Carry (CF=0)
DFACategory[NN_jne] = COND_BRANCH;                 // Jump if Not Equal (ZF=0)
DFACategory[NN_jng] = COND_BRANCH;                 // Jump if Not Greater (ZF=1 | SF!=OF)
DFACategory[NN_jnge] = COND_BRANCH;                // Jump if Not Greater or Equal (ZF=1)
DFACategory[NN_jnl] = COND_BRANCH;                 // Jump if Not Less (SF=OF)
DFACategory[NN_jnle] = COND_BRANCH;                // Jump if Not Less or Equal (ZF=0 & SF=OF)
DFACategory[NN_jno] = COND_BRANCH;                 // Jump if Not Overflow (OF=0)
DFACategory[NN_jnp] = COND_BRANCH;                 // Jump if Not Parity (PF=0)
DFACategory[NN_jns] = COND_BRANCH;                 // Jump if Not Sign (SF=0)
DFACategory[NN_jnz] = COND_BRANCH;                 // Jump if Not Zero (ZF=0)
DFACategory[NN_jo] = COND_BRANCH;                  // Jump if Overflow (OF=1)
DFACategory[NN_jp] = COND_BRANCH;                  // Jump if Parity (PF=1)
DFACategory[NN_jpe] = COND_BRANCH;                 // Jump if Parity Even (PF=1)
DFACategory[NN_jpo] = COND_BRANCH;                 // Jump if Parity Odd  (PF=0)
DFACategory[NN_js] = COND_BRANCH;                  // Jump if Sign (SF=1)
DFACategory[NN_jz] = COND_BRANCH;                  // Jump if Zero (ZF=1)
DFACategory[NN_jmp] = JUMP;                 // Jump
DFACategory[NN_jmpfi] = INDIR_JUMP;               // Indirect Far Jump
DFACategory[NN_jmpni] = INDIR_JUMP;               // Indirect Near Jump
DFACategory[NN_jmpshort] = JUMP;            // Jump Short (only in 64-bit mode)

DFACategory[NN_loopw] = COND_BRANCH;               // Loop while ECX != 0
DFACategory[NN_loop] = COND_BRANCH;                // Loop while CX != 0
DFACategory[NN_loopd] = COND_BRANCH;               // Loop while ECX != 0
DFACategory[NN_loopq] = COND_BRANCH;               // Loop while RCX != 0
DFACategory[NN_loopwe] = COND_BRANCH;              // Loop while CX != 0 and ZF=1
DFACategory[NN_loope] = COND_BRANCH;               // Loop while rCX != 0 and ZF=1
DFACategory[NN_loopde] = COND_BRANCH;              // Loop while ECX != 0 and ZF=1
DFACategory[NN_loopqe] = COND_BRANCH;              // Loop while RCX != 0 and ZF=1
DFACategory[NN_loopwne] = COND_BRANCH;             // Loop while CX != 0 and ZF=0
DFACategory[NN_loopne] = COND_BRANCH;              // Loop while rCX != 0 and ZF=0
DFACategory[NN_loopdne] = COND_BRANCH;             // Loop while ECX != 0 and ZF=0
DFACategory[NN_loopqne] = COND_BRANCH;             // Loop while RCX != 0 and ZF=0

DFACategory[NN_retn] = RETURN;                // Return Near from Procedure
DFACategory[NN_retf] = RETURN;                // Return Far from Procedure

//
//      Pentium instructions
//

DFACategory[NN_rsm] = HALT;                 // Resume from System Management Mode

//      Pentium II instructions

DFACategory[NN_sysenter] = CALL;            // Fast Transition to System Call Entry Point
DFACategory[NN_sysexit] = CALL;             // Fast Transition from System Call Entry Point

// AMD syscall/sysret instructions  NOTE: not AMD, found in Intel manual

DFACategory[NN_syscall] = CALL;             // Low latency system call
DFACategory[NN_sysret] = CALL;              // Return from system call

// VMX instructions

DFACategory[NN_vmcall] = INDIR_CALL;              // Call to VM Monitor

  return;

} // end InitDFACategory()