Skip to content
Snippets Groups Projects
SMPFunction.cpp 339 KiB
Newer Older
	}
	return true;
} // end of SMPFunction::UseIDAStackPointerDeltas()

// Analyze changes to the stack pointer over all instructions.
bool SMPFunction::AnalyzeStackPointerDeltas(void) {
	list<pair<SMPBasicBlock *, sval_t> > WorkList;
	vector<SMPInstr *>::iterator InstIter;
	sval_t CurrentDelta = 0;
	sval_t DeltaIncrement = 0; // change when reprocessing a block in alloca()-calling function
	bool ConsistentNetDelta = true; // Net change to stack pointer is consistent at all RETURN locations
	bool ConflictingValuesSeen = false; // At least one block was entered with multiple deltas
	bool StackPointerRestoreSeen = false; // Stack pointer restored; must become true if ConflictingValuesSeen
	bool ReturnSeen = false;
	bool IDAProSucceeded = this->AnalyzedSP;

#if SMP_COMPARE_IDA_STARS_STACK_POINTER_DELTAS
	bool DebugFlag = (0 == strcmp("_aesni_set_encrypt_key", this->GetFuncName()));
	bool TraceFlag = (0 == strcmp("_aesni_set_encrypt_key", this->GetFuncName()));
	if (!this->HasGoodRTLs()) {
		SMP_msg("INFO: Using IDA Pro stack pointer deltas for BADRTLS function %s .\n", this->GetFuncName());
		(void) this->UseIDAStackPointerDeltas();
		this->AnalyzedSP = false;
		return false; // leave it unsolved
	}

	// Temporarily pull the functions that call alloca out of the stack pointer delta computations, so
	//  that we can focus on solving other problems.
	if (this->CallsAlloca || this->HasPushAfterFrameAlloc()) {
			(void) this->UseIDAStackPointerDeltas();
			return false; // leave it unsolved
		}
		else {
			SMP_msg("INFO: Using IDA Pro stack pointer deltas for alloca-calling function %s .\n", this->GetFuncName());
			return this->UseIDAStackPointerDeltas();
	// In order to precisely track stack deltas, we need to deal with instruction sequences that save the stack pointer
	//  and then restore it later. This requires a reaching definitions data flow analysis that includes, at a minimum,
	//  all stack definitions (normalized by stack delta, so that we do not confuse [esp+20] and [esp+20] where the values
	//  of esp are not the same). We also need to keep track of stack pointer saves in both registers and in stack locations.
	// In order for the information about saved stack pointer copies to be available as soon as we need them in the stack
	//  delta analysis, we have to perform both stack delta analysis and reaching definitions analysis at the same time. Luckily,
	//  both analyses are well suited to being performed as forward analyses starting from the entry basic block.
	//
	// Data structures for the reaching definitions analysis include a ReachesIn and a ReachesOut set for each basic block, a
	//  VarKill set for each block, and a DownExposedDefs set for each block. The VarKill set is shared with the later Live Variable
	//  Analysis (LVA), so we compute the VarKill and the UpExposed sets (UpExposed is only used by LVA) on the first pass through
	//  each block. The VarKill and all other LVA sets are sets of operands. The ReachesIn, ReachesOut, and DownExposedDefs sets
	//  are sets of definitions, where a definition is a pair<operand, instruction address>. The StackPtrCopySet is a triple of
	//   <operand, instruction address, stack delta>, arranged as a pair of pairs <operand, <addr, delta> >
	//
	// Algorithm: We maintain a WorkList of pairs <basic block pointer, incoming stack delta to that block>
	//
	// All sets are empty at the beginning.
	// Add the entry basic block to the WorkList, with IncomingDelta of zero.
	// while (WorkList is not empty) do
	//    de-queue first block from WorkList, obtain IncomingDelta
	//    Compute ReachesIn as the union of the ReachesOut of all predecesssor blocks
	//    if (block has not already been processed) then
	//       mark block as processed
	//       for each inst in block (forward iteration) do
	//           for each USE in inst do
	//              if USE operand not in VarKill set for block then
	//                  add USE operand to UpExposed set for block
	//              endif
	//              if USE operand is a stack pointer value AND it will produce DEF that is a stack pointer value then
	//                  if DEF is stack pointer register then  { a stack pointer value that was saved is being restored }
	//                      retrieve new stack pointer delta from saved value in StackPtrCopySet, looking it up in
	//                          that set using the reaching definitions for the USE operand. If inconsistent  ******
	//                  else { stack pointer value is being saved somewhere besides the stack pointer register }
	//                      add stack delta to StackPtrCopySet for DEF that is receiving it in current inst
	//                  endif
	//               endif
	//            endfor { each USE }
	//            for each DEF in inst do
	//               if register or stack operand then 
	//                  add to VarKill set
	//                  update DownExposedDefs set (insert, or replace current def for this operand)
	//               endif
	//            endfor { each DEF }
	//            Store IncomingDelta for current instruction
	//            Get change in delta for current instruction
	//            Add current change to IncomingDelta
	//       endfor { each inst }
	//       At end of block, make ReachesOut set be (ReachesIn minus VarKill) union DownExposedDefs
	//       For each successor block, add pairs <block pointer, IncomingDelta> to end of WorkList
	//    else { block has already been processed at least once}
	//       if IncomingDelta from WorkList is inconsistent with old IncomingDelta then
	//          if function calls alloca() then
	//             if new IncomingDelta makes stack frame look larger than old IncomingDelta then
	//                ignore new IncomingDelta and just process reaching definitions sets below
	//             else
	//                use new IncomingDelta and re-process deltas in this block to converge to
	//                 smallest stack frame, which means we are basically ignoring alloca()'s as much as possible.
	//             endif
	//          else
	//             Set AnalyzedSP to false, emit error message, clear WorkList and bail out of this function.
	//          endif
	//       endif { inconsistent IncomingDelta values }
	//       Recompute ReachesIn as union of ReachesOut of all predecessor blocks
	//       if ReachesIn set changed then
	//          recompute ReachesOut without examining instructions unless alloca() case requires iterating through instructions
	//       endif
	//       if any change in deltas or reaching definitions sets, then add block to end of WorkList along with all successor blocks.
	//    endif
	// end while

	// Mark all blocks as unprocessed
	this->ResetProcessedBlocks();

	this->AnalyzedSP = true;

	// Put the entry block on the work list.
	assert(!(this->Blocks.empty()));
	pair<SMPBasicBlock *, sval_t> WorkingPair (this->Blocks.front(), CurrentDelta);
	WorkList.push_back(WorkingPair);

	// While blocks exist on the work list
	//  if block already processed, confirm that we are re-entering
	//    the block with the same stack pointer delta as previously,
	//    and pop it off the work list
	//    otherwise declare the stack pointer to be un-analyzeable;
	//  else
	//     iterate through all instructions in the block, analyzing
	//     the stack pointer delta of each inst and accumulating current delta
	//     At the end of the block, put the successor blocks on the work list.
	// For both cases, maintain and update reaching definitions sets, and the
	//  UpExposed and VarKill sets that are used by LVA as well as reaching defs analysis.
	bool ReprocessingAllocaBlocks = false;
	bool ReachesInChanged;
	bool ReachesOutChanged = false;
	do {
		SMPBasicBlock *CurrBlock = WorkList.front().first;
		sval_t IncomingDelta = WorkList.front().second;

		if (0 < IncomingDelta) {
			SMP_msg("ERROR: Stack delta of %ld implies stack underflow in func at %lx\n",
				(long) IncomingDelta, (unsigned long) this->FirstEA);
			this->AnalyzedSP = false;
			WorkList.clear();
			break;
		}

		if (CurrBlock->IsProcessed()) { // already processed
			ReachesOutChanged = false;
#if 0
			ReachesInChanged = CurrBlock->ComputeReachesInSet();
			if (ReachesInChanged) {
				ReachesOutChanged = CurrBlock->ComputeReachesOutSet();
			}
#else
			if (CurrBlock->IsReachesOutStale()) {
				ReachesOutChanged = CurrBlock->ComputeReachesOutSet();
			}
#endif
			if (ReachesOutChanged) {
				// Push the successor blocks onto the work list
				sval_t SuccIncomingDelta = CurrBlock->GetOutgoingStackDelta();
				list<SMPBasicBlock *>::iterator SuccIter;
				for (SuccIter = CurrBlock->GetFirstSucc(); SuccIter != CurrBlock->GetLastSucc(); ++SuccIter) {
					pair<SMPBasicBlock *, sval_t> SuccPair (*SuccIter, SuccIncomingDelta);
					WorkList.push_back(SuccPair);
				}
			}
			InstIter = CurrBlock->GetFirstInst();
			sval_t PrevIncomingDelta = (*InstIter)->GetStackPtrOffset();
			if (IncomingDelta == PrevIncomingDelta) {
				// No error, already processed.
				WorkList.pop_front(); // discard already processed block.
			}
#if 1
			else if (this->CallsAlloca || this->HasPushAfterFrameAlloc()) {
#else
			else {
#endif
				ConflictingValuesSeen = true;
				// Calls to alloca() become additional stack allocations, which can produce
				//  multiple possible stack deltas for an instruction if different paths
				//  to the instruction do not hit the same alloca() calls, so it is not
				//  an error to have conflicting deltas in the functions that call alloca().
				//  We want to converge to the smallest magnitude deltas, which are the greatest
				//  values because the deltas are negative. This is the opposite of IDA Pro, which
				//  seems to use the largest stack deltas it has seen.
				if (PrevIncomingDelta >= IncomingDelta) {
					// Old incoming delta should be retained.
					WorkList.pop_front(); // discard already processed block.
				}
				else {
					CurrBlock->SetProcessed(false);
					ReprocessingAllocaBlocks = true;
					DeltaIncrement = IncomingDelta - PrevIncomingDelta;
					continue;  // Make the loop come around and process this block again, using
							   //  the new incoming delta. Because we do this only when it decreases
							   //  the stack size as seen by this block, no infinite loop is possible.
				}
			}
			else {
				this->AnalyzedSP = false;
				SMP_msg("ERROR: Stack delta: PrevIncoming is %ld NewIncoming is %ld at %lx\n",
					(long) PrevIncomingDelta, (long) IncomingDelta, (unsigned long) (*InstIter)->GetAddr());
				WorkList.clear();
			}
		}
		else { // not already processed
			// ReprocessingAllocaBlocks => Reaching definitions sets have already been computed; just need to do stack delta analysis
			ReachesOutChanged = false;
			ReachesInChanged = CurrBlock->ComputeReachesInSet();
			if (ReachesInChanged && ReprocessingAllocaBlocks) {
				// Because block is not truly being processed for the first time, the ReachesOut set can be
				//  recomputed without processing instructions, as the DEDefs set and VarKill set will never
				//  change after the first pass through the block.
				ReachesOutChanged = CurrBlock->ComputeReachesOutSet();
			}

			if (CurrBlock->IsReachesOutStale()) {
				ReachesOutChanged = CurrBlock->ComputeReachesOutSet();
			}
#endif
			CurrBlock->SetProcessed(true);
			WorkList.pop_front();
			for (InstIter = CurrBlock->GetFirstInst(); InstIter != CurrBlock->GetLastInst(); ++InstIter) {
				if (CurrInst->IsFloatNop()) {
					continue; // skip marker instruction
				}
clc5q's avatar
clc5q committed
				ea_t InstAddr = CurrInst->GetAddr();
				if (InstAddr == this->GetFirstFrameAllocInstAddr()) {
					// Record the reset point for frame deallocations
					this->PreAllocStackDelta = IncomingDelta;
				}

				CurrInst->SetStackPtrOffset(IncomingDelta);

				// Search for tail calls, defined strictly as having an incoming stack delta of zero and
				//  being jumps to far chunks.
				if ((0 == IncomingDelta) && (CurrInst->IsBranchToFarChunk())) {
					CurrInst->SetTailCall();
					SMP_msg("Found tail call at %lx from %s: %s\n", (unsigned long) InstAddr, this->GetFuncName(),
							CurrInst->GetDisasm());
#if SMP_COMPARE_IDA_STARS_STACK_POINTER_DELTAS
				if (DebugFlag && IDAProSucceeded && !(this->CallsAlloca || this->HasPushAfterFrameAlloc())) {
clc5q's avatar
clc5q committed
					sval_t IDAProDelta = get_spd(this->GetFuncInfo(), InstAddr);
					if ((IDAProDelta != IncomingDelta) && (!CurrInst->MDIsHaltInstr())) {
						// IDA Pro special-cases the HALT instruction to make it appear that the
						//  incoming stack delta is zero. We do no such special case delta adjudstment,
						//  so we suppress error messages, as our delta will be non-zero.
						SMP_msg("ERROR: At %lx IDA Pro has stack pointer delta of %ld and we compute %ld\n", (unsigned long) InstAddr,
							(long) IDAProDelta, (long) IncomingDelta);
					SMP_msg("INFO: Stack delta trace: %ld at %lx\n",
						(unsigned long) IncomingDelta, (unsigned long) InstAddr);

				// As soon as the stack ptr offset has been set for the current instruction, we can normalize
				//  all of its stack DEFs and USEs.
				bool StackOpsChanged = CurrInst->MDNormalizeStackOps(UseFP, this->GetFramePtrStackDelta(), ReprocessingAllocaBlocks, DeltaIncrement);
				// Dataflow equation for upward exposed variables: If a variable has not been
				//  killed yet in this block, starting from the top of the block, and it is used
				//  in the current instruction, then it is upwardly exposed.
				set<DefOrUse, LessDefUse>::iterator CurrUse;
				if (!ReprocessingAllocaBlocks) { // Only compute on first pass through block
					for (CurrUse = CurrInst->GetFirstUse(); CurrUse != CurrInst->GetLastUse(); ++CurrUse) {
						op_t UseOp = CurrUse->GetOp();
						CanonicalizeOpnd(UseOp);
						if (MDIsDataFlowOpnd(UseOp, this->UsesFramePointer())) {
							// We have a register or stack operand. If stack operand, it is normalized, i.e. EBP-4 might be ESP-8,
							//  where the ESP-8 refers to the value of ESP upon entry to the function, not its current value.
							//  This normalization makes each stack location uniquely named (no aliases at different code locations due
							//  to different values of ESP at different code locations).
							// We only track certain kinds of operands in our data flow analyses.
							// Only add non-immediate operands that are not already killed in this block.
							//  o_near and o_far operands are code addresses in immediate form, e.g.
							//  call _printf might be call 0x8048040, with o_near = 0x8048040.
							if (!(CurrBlock->MDAlreadyKilled(UseOp))) {
								CurrBlock->AddUpExposed(UseOp);
							}
						}
					}

				// Find stack pointer saves and restores.
				bool StackPtrSaved;
				sval_t SavedDelta;
				op_t CopyOperand;
				bool SavedDeltaHasNewValue = false;
				bool ErrorFlag = false;
				if (CurrInst->MDIsStackPtrSaveOrRestore(this->UsesFramePointer(), this->GetFramePtrStackDelta(), StackPtrSaved, SavedDelta, CopyOperand, ErrorFlag)) {
					// NOTE: If CopyOperand is a stack location, it is normalized.
					if (StackPtrSaved) {
						// Insert new entry into the StackPtrCopySet. For the ReprocessingAllocaBlocks case, this might be
						//  just a tricky update of the delta for an existing item in the set.
						bool DeltaInserted = this->AddToStackPtrCopySet(CopyOperand, InstAddr, SavedDelta);
						if (TraceFlag) {
							SMP_msg("INFO: Stack delta saved: %ld at %lx\n", (long) SavedDelta, (unsigned long) InstAddr);
					else { // stack pointer was restored from saved value
						StackPointerRestoreSeen = true;
						SavedDeltaHasNewValue = true; // no need to compute effect of restore instruction later
						if (ReprocessingAllocaBlocks) {
							// Now that the stack pointer has been restored, the effect of the alloca() should
							//  be undone. We no longer need to adjust delta values for the rest of the block.
							DeltaIncrement = 0;
						}
					}
				} // end if (CurrInst->MDIsStackPtrSaveOrRestore())
				else if (ErrorFlag) {
					this->AnalyzedSP = false;
					WorkList.clear();
					SMP_msg("ERROR: ErrorFlag=true from MDIsStackPtrSaveOrRestore() at %lx\n",
						(unsigned long) InstAddr);
				else if (CurrInst->MDIsLeaveInstr()) {
					// LEAVE is a restoration of a stack pointer, not processed by CurrInst->MDIsStackPtrSaveOrRestore()
					StackPointerRestoreSeen = true; 
				}

				// Update VarKill and DownExposedDefs sets for DEFs in current instruction.
				// Dataflow equation for killed variables: If a variable is defined in any
				//  instruction in the block, it is killed by this block (i.e. prior definitions
				//  of that variable will not make it through the block).
				if (!ReprocessingAllocaBlocks) { // Only compute on first pass through block
					set<DefOrUse, LessDefUse>::iterator CurrDef;
					for (CurrDef = CurrInst->GetFirstDef(); CurrDef != CurrInst->GetLastDef(); ++CurrDef) {
						op_t DefOp = CurrDef->GetOp();
						if (MDIsDataFlowOpnd(DefOp, this->UsesFramePointer())) {
							// We have a register or stack operand. If stack operand, it is normalized, i.e. EBP-4 might be ESP-8,
							//  where the ESP-8 refers to the value of ESP upon entry to the function, not its current value.
							//  This normalization makes each stack location uniquely named (no aliases at different code locations due
							//  to different values of ESP at different code locations).
							CurrBlock->AddVarKill(DefOp);
							CurrBlock->UpdateDownExposedDefs(DefOp, InstAddr);
				}

				if (SavedDeltaHasNewValue) {
					IncomingDelta = SavedDelta; // from restore instruction
				}
				else {
					CurrentDelta = CurrInst->AnalyzeStackPointerDelta(IncomingDelta, this->GetFramePtrStackDelta());
					if (SMP_STACK_POINTER_BITWISE_AND_CODE == CurrentDelta) {
						// For now, we ignore instructions that AND a constant into the stack pointer.
						CurrentDelta = 0;
						SMP_msg("WARNING: Stack pointer bitwise AND ignored at %lx\n",
							(unsigned long) CurrInst->GetAddr());
					}
					else if (SMP_STACK_DELTA_ERROR_CODE == CurrentDelta) {
						this->AnalyzedSP = false;
						SMP_msg("ERROR: Stack delta unanalyzeable at %lx\n", (unsigned long) InstAddr);
						WorkList.clear();
						break;
					}
					SMPitype FlowType = CurrInst->GetDataFlowType();
					IncomingDelta += CurrentDelta;
					if ((RETURN == FlowType) && (!CurrInst->IsCondTailCall()) && (!CurrInst->IsTailCall())) {
						// We hope to see a consistent outgoing delta from all RETURN points.
						//  We special-case the conditional jump used as tail call, because it must be followed
						//  by a real return instruction later. If the jump is taken, it acts as a return, but
						//  it has not yet popped the stack.
						// Also, a regular tail call always has the stack delta at zero and does not match
						//  the stack delta of actual return instructions elsewhere in the function.
						if (ReturnSeen) { // This is not the first RETURN seen.
							if (IncomingDelta != this->NetStackDelta) { // Inconsistent
								SMP_msg("ERROR: Inconsistent stack deltas at return instruction at %lx\n",
									(unsigned long) CurrInst->GetAddr());
								ConsistentNetDelta = false;
								this->AnalyzedSP = false;
								WorkList.clear();
								break;
							}
						}
						else { // First RETURN statement seen.
							ReturnSeen = true;
							this->NetStackDelta = IncomingDelta;
#if SMP_AUDIT_STACK_POINTER_DELTAS
							if (CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA != IncomingDelta) {
								SMP_msg("WARNING: Stack delta not %d after return instruction at %lx\n", 
									CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA, (unsigned long) CurrInst->GetAddr());
#endif
						}
						// If we permitted inconsistent stack deltas previously, then the stack pointer has to
						//  have been restored, e.g. if we allocate a frame with sub esp,32 and then we later
						//  have paths that pass through an alloca() call, a push, etc., then the alloca() or
						//  push will not be undone by add esp,32. It must be undone by something like mov esp,ebp.
						if (ConflictingValuesSeen && !StackPointerRestoreSeen) {
							SMP_msg("ERROR: Inconsistent stack deltas seen, no stack pointer restore before return instruction at %lx\n",
								(unsigned long) CurrInst->GetAddr());
							this->AnalyzedSP = false;
							WorkList.clear();
							break;
				} // end if (SavedDeltaHasNewValue) ... else ...
			} // end for each instruction in WorkList block
			if (CurrBlock->IsReachesOutStale()) {
				ReachesOutChanged = CurrBlock->ComputeReachesOutSet();
			// Push the successor blocks onto the work list if anything changed
			if (this->AnalyzedSP) { // if we do not have an error already
				CurrBlock->SetOutgoingStackDelta(IncomingDelta); // record incoming delta for all successors
				if (ReachesOutChanged || (!ReprocessingAllocaBlocks)) { // if anything changed (deltas or reaching defs ReachOut set)
					list<SMPBasicBlock *>::iterator SuccIter;
					if (DebugFlag && (0 == IncomingDelta)) {
						SMP_msg("ERROR: Pushing WorkList items with IncomingDelta of zero. Dumping Block:\n");
						CurrBlock->Dump();
					}
					for (SuccIter = CurrBlock->GetFirstSucc(); SuccIter != CurrBlock->GetLastSucc(); ++SuccIter) {
						pair<SMPBasicBlock *, sval_t> SuccPair (*SuccIter, IncomingDelta);
						WorkList.push_back(SuccPair);
					}
		} // end if block already processed ... else ...
		ReprocessingAllocaBlocks = false; // reset to default before processing next worklist element
	} while (!WorkList.empty());

	this->STARSStackPtrAnalysisPerformed = true;
	if (this->AnalyzedSP) {
		if (CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA != this->NetStackDelta) {
			SMP_msg("WARNING: Non-default stack ptr delta %ld for function: %s\n", (long) this->NetStackDelta, this->GetFuncName());
		}
		if (this->StackAdjustmentComputed 
			&& (this->GlobalStackAdjustment != (CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA - this->NetStackDelta))) {
			// Oops. When program graph cycles caused us to try to compute the GlobalStackAdjustment as our best guess
			//  for this function's effect on the stack delta, we told our callers that these three values would cancel out.
			//  They do not. Our callers have now been using a bad stack delta for their call instructions. Too late for
			//  anything but a diagnostic message.
				SMP_msg("ERROR: Earlier GlobalStackAdjustment computation %ld does not agree with current NetStackDelta result for function: %s\n",
					(long) this->GlobalStackAdjustment, this->GetFuncName());
			SMP_msg("ERROR: Stack Ptr Delta Analysis succeeded in IDA, failed in STARS for %lx : %s\n", 
				(unsigned long) this->FirstEA, this->GetFuncName());
			SMP_msg("SUCCESS: Stack Ptr Delta Analysis failed in IDA, succeeded in STARS for %lx : %s\n",
				(unsigned long) this->FirstEA, this->GetFuncName());
	if (!this->AnalyzedSP) {
		(void) this->UseIDAStackPointerDeltas();
	}
	else {
		// Success, so try to find saved/restored register pairs so that we do not
		//  conservatively say that the function kills registers that it in fact preserves.
		this->FindPreservedRegs();
		this->ComputeGlobalSets();
	}

	// Cannot keep the reaching defs around on huge benchmarks, or we run out of memory.
	//  Once we have SSA form, we can obtain reaching defs info on the fly if we want it.
	list<SMPBasicBlock *>::iterator BlockIter;
	for (BlockIter = this->Blocks.begin(); BlockIter != this->Blocks.end(); ++BlockIter) {
		(*BlockIter)->FreeReachingDefsMemory();
	}

	return this->AnalyzedSP;
} // end of SMPFunction::AnalyzeStackPointerDeltas()

// Insert the arguments into the StackPtrCopySet; or, if a matching entry already exists
//  with a StackDelta of greater magnitude than StackDelta, update just the StackDelta.
// Return true if StackDelta was inserted, false if it was used to update an old entry.
bool SMPFunction::AddToStackPtrCopySet(op_t CopyOp, ea_t InstAddr, sval_t StackDelta) {
	bool NewInsertion;
	pair<ea_t, sval_t> InsertStackDefn(InstAddr, StackDelta);
	pair<op_t, pair<ea_t, sval_t> > InsertStackDefnOp(CopyOp, InsertStackDefn);
	set<pair<op_t, pair<ea_t, sval_t> >, LessStackDeltaCopy>::iterator FindIter;
	pair<set<pair<op_t, pair<ea_t, sval_t> >, LessStackDeltaCopy>::iterator, bool> InsertResult;

	FindIter = this->StackPtrCopySet.find(InsertStackDefnOp);
	if (FindIter == this->StackPtrCopySet.end()) {
		// Not already present; insert new triple.
		NewInsertion = true;
		InsertResult = this->StackPtrCopySet.insert(InsertStackDefnOp);
		assert(InsertResult.second);
	}
	else {
		// Already there; see if delta needs to be updated.
		NewInsertion = false;
		pair<op_t, pair<ea_t, sval_t> > OldStackDefnOp(*FindIter);
		// Favor a smaller stack frame for the alloca-calling functions, e.g. favor -24 over -32 as a delta.
		if (StackDelta > OldStackDefnOp.second.second) {
			// Replace the old entry with a new one.
			this->StackPtrCopySet.erase(FindIter);
			InsertResult = this->StackPtrCopySet.insert(InsertStackDefnOp);
			assert(InsertResult.second);
		}
	}

	return NewInsertion;
} // end of SMPFunction::AddToStackPtrCopySet()

// Determined which regs are not killed by func or its callees; set bits in PreservedRegsBitmap
void SMPFunction::FindPreservedRegs(void) {
	// First, find candidates from the pushes in the entry block.
	bool EntryBlockProcessed = false;
	bool ReturnBlockSeen = false;
	SMPBasicBlock *CurrBlock;
	map<uint32, sval_t> PushedRegsList, PoppedRegsList;
	list<SMPBasicBlock *>::iterator BlockIter;
	vector<SMPInstr *>::iterator InstIter;
	bool success = true;
	for (BlockIter = this->Blocks.begin(); success && (BlockIter != this->Blocks.end()); ++BlockIter) {
		CurrBlock = (*BlockIter);
		if (!EntryBlockProcessed) { // Must be first block
			EntryBlockProcessed = true;
			for (InstIter = CurrBlock->GetFirstInst(); InstIter != CurrBlock->GetLastInst(); ++InstIter) {
				SMPInstr *CurrInst = (*InstIter);
				if (CurrInst->MDIsPushInstr()) {
					// Get list of regs saved, and their relative stack offsets, from the RTLs.
					//  Could be a PushAllRegs instruction, so there might be more than one.
					success = CurrInst->GetPushedRegsList(PushedRegsList);
					if (!success)
						break;
				}
			}
		}
		else if (EntryBlockProcessed && CurrBlock->HasReturn()) {
			// See if we have pops that match the pushes.
			for (InstIter = CurrBlock->GetFirstInst(); InstIter != CurrBlock->GetLastInst(); ++InstIter) {
				SMPInstr *CurrInst = (*InstIter);
				if (CurrInst->MDIsPopInstr()) {
					// Get list of regs restored, and their relative stack offsets, from the RTLs.
					//  Could be a PopAllRegs instruction, so there might be more than one.
					success = CurrInst->GetPoppedRegsList(!ReturnBlockSeen, PoppedRegsList);
					if (!success)
						break;
				}
			}
			ReturnBlockSeen = true;
		}
	} // end for all blocks

	if (success) {
		// Try to match elements from the two lists.
		map<uint32, sval_t>::iterator PushIter, PopIter;
		while (!PushedRegsList.empty() && (!PoppedRegsList.empty())) {
			PushIter = PushedRegsList.begin();
			PopIter = PoppedRegsList.begin();
			uint32 PushedReg = PushIter->first;
			uint32 PoppedReg = PopIter->first;
			if (PushedReg == PoppedReg) {
				if (PushIter->second == PopIter->second) { // match
					this->PreservedRegsBitmap.set(PushedReg);
				}
				PushedRegsList.erase(PushIter);
				PoppedRegsList.erase(PopIter);
			}
			else if (PushedReg < PoppedReg) {
				PushedRegsList.erase(PushIter);
			}
			else { // must be PushedReg > PoppedReg
				PoppedRegsList.erase(PopIter);
			}
		}
	}

	PushedRegsList.clear();
	PoppedRegsList.clear();
	return;
} // end of SMPFunction::FindPreservedRegs()

void SMPFunction::FindAllAllocsAndDeallocs(void) {
	bool FoundAllocInstr = false;
	bool FoundDeallocInstr = false;
clc5q's avatar
clc5q committed
	bool DebugFlag = false;
#if SMP_DEBUG_FRAMEFIXUP
	DebugFlag |= (0 == strcmp("frame_dummy", this->GetFuncName()));
#endif

	// Now, if LocalVarsSize is not zero, we need to find the instruction
	//  in the function prologue that allocates space on the stack for
	//  local vars. This code could be made more robust in the future
	//  by matching LocalVarsSize to the immediate value in the allocation
	//  instruction. However, IDA Pro is sometimes a little off on this
	//  number. **!!**
	if (0 < this->LocalVarsSize) {
		if (DebugFlag) SMP_msg("Searching for alloc and dealloc\n");
		list<SMPInstr *>::iterator InstIter = this->Instrs.begin();
#if SMP_USE_SSA_FNOP_MARKER
		++InstIter;  // skip marker instruction
		for ( ; InstIter != this->Instrs.end(); ++InstIter) {
			SMPInstr *CurrInst = (*InstIter);
			ea_t addr = CurrInst->GetAddr();

			// Keep the most recent instruction in the DeallocInstr
			//  in case we reach the return without seeing a dealloc.
			if (!FoundDeallocInstr) {
				this->LocalVarsDeallocInstr = addr;
			}

			if (!FoundAllocInstr
				&& CurrInst->MDIsFrameAllocInstr()) {
#if SMP_DEBUG_CONTROLFLOW
				SMP_msg("Returned from MDIsFrameAllocInstr()\n");
				this->LocalVarsAllocInstr = addr;
				FoundAllocInstr = true;
				if (DebugFlag) SMP_msg("Found alloc: %s\n", CurrInst->GetDisasm());
				// As soon as we have found the local vars allocation,
				//  we can try to fix incorrect sets of UseFP by IDA.
				// NOTE: We might want to extend this in the future to
				//  handle functions that have no locals.  **!!**
				bool FixedUseFP = MDFixUseFP();
#if SMP_DEBUG_FRAMEFIXUP
				if (FixedUseFP) {
					SMP_msg("Fixed UseFP in %s\n", this->GetFuncName());
				if (this->UsesFramePointer()) { // now that MDFixUseFP() has validated this flag ...
					this->FindFramePointerDelta(); // find stack delta that is saved in frame pointer in function prologue
				}
			}
			else if (FoundAllocInstr) {
				// We can now start searching for the DeallocInstr.
				if (CurrInst->MDIsFrameDeallocInstr(UseFP, this->LocalVarsSize)) {
					// Keep saving the most recent addr that looks
					//  like the DeallocInstr until we reach the
					//  end of the function. Last one to look like
					//  it is used as the DeallocInstr.
#if SMP_DEBUG_CONTROLFLOW
					SMP_msg("Returned from MDIsFrameDeallocInstr()\n");
					this->LocalVarsDeallocInstr = addr;
					FoundDeallocInstr = true;
				}
					if (DebugFlag) SMP_msg("Not dealloc: %s\n", CurrInst->GetDisasm());
		} // end for (list<SMPInstr *>::iterator InstIter ... )
		if (!FoundAllocInstr) {
			// Could not find the frame allocating instruction.  Bad.
			// See if we can find the point at which the stack allocation reaches
			//  a total of FuncInfo.frsize+frregs, regardless of whether it happened by push
			//  instructions or some other means.
			this->LocalVarsAllocInstr = this->FindAllocPoint(this->FuncInfo.frsize + this->FuncInfo.frregs);
#if SMP_DEBUG_CONTROLFLOW
			SMP_msg("Returned from FindAllocPoint()\n");
#if SMP_DEBUG_FRAMEFIXUP
			if (BADADDR == this->LocalVarsAllocInstr) {
				SMP_msg("WARNING: Could not find stack frame allocation in %s\n",
					this->GetFuncName());
				SMP_msg("LocalVarsSize: %lu  SavedRegsSize: %u ArgsSize: %lu\n",
					(unsigned long) LocalVarsSize, CalleeSavedRegsSize, (unsigned long) IncomingArgsSize);
				SMP_msg("FindAllocPoint found %lx for function %s\n",
					(unsigned long) this->LocalVarsAllocInstr, this->GetFuncName());
#if SMP_DEBUG_FRAMEFIXUP
		if (!FoundDeallocInstr) {
			// Could not find the frame deallocating instruction.  Bad.
			// Emit diagnostic and use the last instruction in the
			// function.
			SMP_msg("WARNING: Could not find stack frame deallocation in %s\n",
				this->GetFuncName());
		}
#endif
	}
	// else LocalVarsSize was zero, meaning that we need to search 
	//  for the end of the function prologue code and emit stack frame
	//  annotations from that address (i.e. this method returns that
	//  address). We will approximate this by finding the end of the
	//  sequence of PUSH instructions at the beginning of the function.
	//  The last PUSH instruction should be the last callee-save-reg
	//  instruction. We can make this more robust in the future by
	//  making sure that we do not count a PUSH of anything other than
	//  a register. **!!**
	// NOTE: 2nd prologue instr is usually mov ebp,esp
	// THE ASSUMPTION THAT WE HAVE ONLY PUSH INSTRUCTIONS BEFORE
	// THE ALLOCATING INSTR IS ONLY TRUE WHEN LOCALVARSSIZE == 0;
	else {
		ea_t SaveAddr = this->FuncInfo.startEA;
		list<SMPInstr *>::iterator InstIter = this->Instrs.begin();
#if SMP_USE_SSA_FNOP_MARKER
		++InstIter;  // skip marker instruction
		for ( ;	InstIter != this->Instrs.end(); ++InstIter) {
			SMPInstr *CurrInst = (*InstIter);
			insn_t CurrCmd = CurrInst->GetCmd();
			ea_t addr = CurrInst->GetAddr();
			if (CurrCmd.itype == NN_push)
				SaveAddr = addr;
			else
				break;
		}
		this->LocalVarsAllocInstr = SaveAddr;
		this->LocalVarsDeallocInstr = 0;
		// As soon as we have found the local vars allocation,
		//  we can try to fix incorrect sets of UseFP by IDA.
		// NOTE: We might want to extend this in the future to
		//  handle functions that have no locals.  **!!**
		bool FixedUseFP = this->MDFixUseFP();
#if SMP_DEBUG_FRAMEFIXUP
		if (FixedUseFP) {
			SMP_msg("Fixed UseFP in %s\n", this->GetFuncName());
		}
#endif
		if (this->UsesFramePointer()) { // now that MDFixUseFP() has validated this flag ...
			this->FindFramePointerDelta(); // find stack delta that is saved in frame pointer in function prologue
		}
	} // end if (LocalVarsSize > 0) ... else ...

	if (!FoundAllocInstr && (0 < this->LocalVarsSize) && this->IsLeaf()) {
		// The x86-64 ABI saves time by not allocating a local frame for some leaf functions,
		//  and just accesses locations below the stack as if they were allocated local vars.
		//  We still want the UseFP and FramePointerDelta members to be properly set.
		bool FixedUseFP = MDFixUseFP();
#if SMP_DEBUG_FRAMEFIXUP
		if (FixedUseFP) {
			SMP_msg("Fixed UseFP in %s\n", this->GetFuncName());
		}
#endif
		if (this->UsesFramePointer()) { // now that MDFixUseFP() has validated this flag ...
			this->FindFramePointerDelta(); // find stack delta that is saved in frame pointer in function prologue
			if (0 != this->FramePointerStackDelta) {
				SMP_msg("INFO: Found FramePointerStackDelta of %ld in frameless leaf function %s\n", 
					(long) this->FramePointerStackDelta, this->GetFuncName());
			}
	return;
} // end of SMPFunction::FindAllAllocsAndDeallocs()

// Compute FramePointerStackDelta as soon as possible so that it is available for SyncAllRTs().
void SMPFunction::FindFramePointerDelta(void) {
	bool FirstBlockProcessed = false;
	bool FPSaved = false;  // have seen push of frame pointer reg
	bool SPintoFP = false; // have seen copy of stack pointer into frame pointer
	sval_t IncomingDelta = 0;
	sval_t CurrentDelta;
	list<SMPInstr *>::iterator InstIter = this->Instrs.begin();
#if SMP_USE_SSA_FNOP_MARKER
	++InstIter;  // skip marker instruction
#endif
	while (!FirstBlockProcessed && (InstIter != this->Instrs.end())) {
		SMPInstr *CurrInst = (*InstIter);
		// Accumulate stack delta values.
		CurrentDelta = CurrInst->AnalyzeStackPointerDelta(IncomingDelta, this->GetFramePtrStackDelta());
		if (SMP_STACK_POINTER_BITWISE_AND_CODE == CurrentDelta) {
			// For now, we ignore instructions that AND a constant into the stack pointer.
			CurrentDelta = 0;
		}
		else if (SMP_STACK_DELTA_ERROR_CODE == CurrentDelta) {
			this->AnalyzedSP = false;
			break; // error exit
		}
		// Look for initialization of frame pointer, record its stack delta
		FirstBlockProcessed = CurrInst->IsLastInBlock();
		if (!FPSaved) { // still looking for "push <framepointerreg>"
			if (CurrInst->MDIsPushInstr() && CurrInst->GetCmd().Operands[0].is_reg(MD_FRAME_POINTER_REG)) {
				FPSaved = true;
			}
		}
		else if (!SPintoFP) { // found "push <framepointerreg>", looking for "fp := sp"
			insn_t CurrCmd = CurrInst->GetCmd();
			if ((CurrCmd.itype == MD_MOVE_INSTRUCTION) 
				&& (CurrInst->GetFirstDef()->GetOp().is_reg(MD_FRAME_POINTER_REG))
				&& (CurrInst->GetFirstUse()->GetOp().is_reg(MD_STACK_POINTER_REG))) {
				SPintoFP = true;
				this->FramePointerStackDelta = IncomingDelta;
				FirstBlockProcessed = true; // stop looking
				assert(this->UsesFramePointer());
			}
		}
		IncomingDelta += CurrentDelta;
		++InstIter;
	}

	return;
} // end of SMPFunction::FindFramePointerDelta()

// Figure out the different regions of the stack frame, and find the
//  instructions that allocate and deallocate the local variables space
//  on the stack frame.
// The stack frame info will be used to emit stack
//  annotations when Analyze() reaches the stack allocation
//  instruction that sets aside space for local vars.
// Set the address of the instruction at which these
//  annotations should be emitted. This should normally
//  be an instruction such as:  sub esp,48
//  However, for a function with no local variables at all,
//  we will need to determine which instruction should be
//  considered to be the final instruction of the function
//  prologue and return its address.
// Likewise, we find the stack deallocating instruction in
//  the function epilogue.
void SMPFunction::SetStackFrameInfo(void) {
#if SMP_COMPUTE_STACK_GRANULARITY
	// Now, find the boundaries between local variables.
	this->BuildLocalVarTable();
#endif

	// Get callee-saved regs info for remediation use.
	if (BADADDR != this->GetFirstFrameAllocInstAddr()) {
	return;
} // end of SMPFunction::SetStackFrameInfo()

// IDA Pro defines the sizes of regions in the stack frame in a way
//  that suits its purposes but not ours. The frsize field of the func_info_t
//  structure measures the distance between the stack pointer and the
//  frame pointer (ESP and EBP in the x86). This region includes some
//  of the callee-saved registers. So, the frregs field only includes
//  the callee-saved registers that are above the frame pointer.
//  x86 standard prologue on gcc/linux:
//    push ebp      ; save old frame pointer
//    mov ebp,esp   ; new frame pointer = current stack pointer
//    push esi      ; callee save reg
//    push edi      ; callee save reg
//    sub esp,34h   ; allocate 52 bytes for local variables
//
//  Notice that EBP acquires its final frame pointer value AFTER the
//  old EBP has been pushed. This means that, of the three callee saved
//  registers, one is above where EBP points and two are below.
//  IDA Pro is concerned with generating readable addressing expressions
//  for items on the stack. None of the callee-saved regs will ever
//  be addressed in the function; they will be dormant until they are popped
//  off the stack in the function epilogue. In order to create readable
//  disassembled code, IDA defines named constant offsets for locals. These
//  offsets are negative values (x86 stack grows downward from EBP toward
//  ESP). When ESP_relative addressing occurs, IDA converts a statement:
//    mov eax,[esp+12]
//  into the statement:
//    mov eax,[esp+3Ch+var_30]
//  Here, 3Ch == 60 decimal is the distance between ESP and EBP, and
//  var_30 is defined to have the value -30h == -48 decimal. So, the
//  "frame size" in IDA Pro is 60 bytes, and a certain local can be
//  addressed in ESP-relative manner as shown, or as [ebp+var_30] for
//  EBP-relative addressing. The interactive IDA user can then edit
//  the name var_30 to something mnemonic, such as "virus_size", and IDA
//  will replace all occurrences with the new name, so that code references
//  automatically become [ebp+virus_size]. As the user proceeds
//  interactively, he eventually produces very understandable code.
// This all makes sense for producing readable assembly text. However,
//  our analyses have a compiler perspective as well as a memory access
//  defense perspective. SMP distinguishes between callee saved regs,
//  which should not be overwritten in the function body, and local
//  variables, which can be written. We view the stack frame in logical
//  pieces: here are the saved regs, here are the locals, here is the
//  return address, etc. We don't care which direction from EBP the
//  callee-saved registers lie; we don't want to lump them in with the
//  local variables. We also don't like the fact that IDA Pro will take
//  the function prologue code shown above and declare frregs=4 and
//  frsize=60, because frsize no longer matches the stack allocation
//  statement sub esp,34h == sub esp,52. We prefer frsize=52 and frregs=12.
// So, the task of this function is to fix these stack sizes in our
//  private data members for the function, while leaving the IDA database
//  alone because IDA needs to maintain its own definitions of these
//  variables.
// Fixing means we will update the data members LocalVarsSize and
//  CalleeSavedRegsSize.
// NOTE: This function is both machine dependent and platform dependent.
//  The prologue and epilogue code generated by gcc-linux is as discussed
//  above, while on Visual Studio and other Windows x86 compilers, the
//  saving of registers other than EBP happens AFTER local stack allocation.
//  A Windows version of the function would expect to see the pushing
//  of ESI and EDI AFTER the sub esp,34h statement.
bool SMPFunction::MDFixFrameInfo(void) {
	int SavedRegsSize = 0;
	int OtherPushesSize = 0;  // besides callee-saved regs
	int NewLocalsSize = 0;
	int OldFrameTotal = this->CalleeSavedRegsSize + this->LocalVarsSize;
	bool Changed = false;
	bool DebugFlag = (0 == strcmp("__libc_csu_init", this->GetFuncName()));

	// Iterate through the first basic block in the function. If we find
	//  a frame allocating Instr in it, then we have local vars. If not,
	//  we don't, and LocalVarsSize should have been zero. Count the callee
	//  register saves leading up to the local allocation. Set data members
	//  according to what we found if the values of the data members would
	//  change.
	SMPBasicBlock *CurrBlock = this->Blocks.front();
	vector<SMPInstr *>::iterator CurrIter = CurrBlock->GetFirstInst();
#if SMP_USE_SSA_FNOP_MARKER
	++CurrIter;  // skip marker instruction
	for ( ; CurrIter != CurrBlock->GetLastInst(); ++CurrIter) {
		SMPInstr *CurrInstr = (*CurrIter);
		if (CurrInstr->MDIsPushInstr()) {
			// We will make the gcc-linux assumption that a PUSH in
			//  the first basic block, prior to the stack allocating
			//  instruction, is a callee register save. To make this
			//  more robust, we ensure that the register is from
			//  the callee saved group of registers, and that it has
			//  not been defined thus far in the function (else it might
			//  be a push of an outgoing argument to a call that happens
			//  in the first block when there are no locals). **!!!!**
			if (CurrInstr->MDUsesCalleeSavedReg()
				&& !CurrInstr->HasSourceMemoryOperand()) {
				SavedRegsSize += 4; // **!!** should check the size
				if (DebugFlag) SMP_msg("libc_csu_init SavedRegsSize: %d  %s\n", SavedRegsSize,
					CurrInstr->GetDisasm());
			}
			else {
				// Pushes of outgoing args can be scheduled so that
				//  they are mixed with the pushes of callee saved regs.
				OtherPushesSize += 4;
				if (DebugFlag) SMP_msg("libc_csu_init OtherPushesSize: %d  %s\n", OtherPushesSize,
					CurrInstr->GetDisasm());
			}
		}
		else if (CurrInstr->MDIsFrameAllocInstr()) {
			if (DebugFlag) SMP_msg("libc_csu_init allocinstr: %s\n", CurrInstr->GetDisasm());
			SavedRegsSize += OtherPushesSize;
			// Get the size being allocated.
			set<DefOrUse, LessDefUse>::iterator CurrUse;
			for (CurrUse = CurrInstr->GetFirstUse(); CurrUse != CurrInstr->GetLastUse(); ++CurrUse) {
				// Find the immediate operand.
				if (o_imm == CurrUse->GetOp().type) {
					// Get its value into LocalVarsSize.
					long AllocValue = (signed long) CurrUse->GetOp().value;
					// One compiler might have sub esp,24 and another
					//  might have add esp,-24. Take the absolute value.
					if (0 > AllocValue)
						AllocValue = -AllocValue;
					if (AllocValue != (long) this->LocalVarsSize) {
						Changed = true;
#if SMP_DEBUG_FRAMEFIXUP
						if (AllocValue + SavedRegsSize != OldFrameTotal)
							SMP_msg("Total frame size changed: %s OldTotal: %d NewTotal: %ld\n",
								this->GetFuncName(), OldFrameTotal, (AllocValue + SavedRegsSize));
#endif
						this->LocalVarsSize = (asize_t) AllocValue;
						this->CalleeSavedRegsSize = (ushort) SavedRegsSize;
						NewLocalsSize = this->LocalVarsSize;
					}
					else { // Old value was correct; no change.
						NewLocalsSize = this->LocalVarsSize;
						if (SavedRegsSize != this->CalleeSavedRegsSize) {
							this->CalleeSavedRegsSize = (ushort) SavedRegsSize;
							Changed = true;
#if SMP_DEBUG_FRAMEFIXUP
							SMP_msg("Only callee regs size changed: %s\n", this->GetFuncName());
#endif
						}
					}
				} // end if (o_imm == ...)
			} // end for all uses
			break; // After frame allocation instr, we are done
		} // end if (push) .. elsif frame allocating instr
	} // end for all instructions in the first basic block

	// If we did not find an allocating instruction, see if it would keep
	//  the total size the same to set LocalVarsSize to 0 and to set
	//  CalleeSavedRegsSize to SavedRegsSize. If so, do it. If not, we
	//  might be better off to leave the numbers alone.
	if (!Changed && (NewLocalsSize == 0)) {
		if (DebugFlag) SMP_msg("libc_csu_init OldFrameTotal: %d \n", OldFrameTotal);
		if (OldFrameTotal == SavedRegsSize) {
clc5q's avatar
clc5q committed
			this->CalleeSavedRegsSize = (ushort) SavedRegsSize;
			this->LocalVarsSize = 0;
			Changed = true;
		}
#if SMP_DEBUG_FRAMEFIXUP
		else {
			SMP_msg("Could not update frame sizes: %s\n", this->GetFuncName());