From 604fc5c9c9abb07b41a11b697fa68e8f33f602b0 Mon Sep 17 00:00:00 2001
From: clc5q <clc5q@git.zephyr-software.com>
Date: Fri, 22 Feb 2013 01:33:49 +0000
Subject: [PATCH] Analysis of stack pointer deltas and stack location accesses.

---
 SMPBasicBlock.cpp       |   95 +++-
 SMPBasicBlock.h         |    6 +-
 SMPDBInterface.cpp      |    1 -
 SMPDBInterface.h        |    2 +-
 SMPDataFlowAnalysis.cpp |   54 +-
 SMPDataFlowAnalysis.h   |   10 +
 SMPFunction.cpp         | 1091 ++++++++++++++++++++++++++++-----------
 SMPFunction.h           |   41 +-
 SMPInstr.cpp            |  847 +++++++++++++++++++++++-------
 SMPInstr.h              |   59 ++-
 SMPProgram.cpp          |   71 ++-
 SMPProgram.h            |    4 +-
 SMPStaticAnalyzer.cpp   |   20 +-
 13 files changed, 1730 insertions(+), 571 deletions(-)

diff --git a/SMPBasicBlock.cpp b/SMPBasicBlock.cpp
index ed125077..b99b41be 100644
--- a/SMPBasicBlock.cpp
+++ b/SMPBasicBlock.cpp
@@ -74,6 +74,7 @@ SMPBasicBlock::SMPBasicBlock(SMPFunction *Func, list<SMPInstr *>::iterator First
 	this->FirstAddr = (*First)->GetAddr();
 	this->BlockNum = SMP_BLOCKNUM_UNINIT;
 	this->MyFunc = Func;
+	this->SetOutgoingStackDelta(0);
 #if 0
 	this->IncomingStackPtrDelta = 0;
 #endif
@@ -255,6 +256,19 @@ bool SMPBasicBlock::AllPredecessorsNumbered(void) {
 	return true;
 } // end of SMPBasicBlock::AllPredecessorsNumbered()
 
+// Mark as processed, recurse into successors depth-first.
+void SMPBasicBlock::DepthFirstMark(void) {
+	if (this->IsProcessed()) {
+		return;
+	}
+	this->SetProcessed(true);
+	list<SMPBasicBlock *>::iterator CurrSucc;
+	for (CurrSucc = this->Successors.begin(); CurrSucc != this->Successors.end(); ++CurrSucc) {
+		(*CurrSucc)->DepthFirstMark();
+	}
+	return;
+} // end of SMPBasicBlock::DepthFirstMark()
+
 // Are all instructions in the block no-ops?
 bool SMPBasicBlock::AllNops(void) {
 	size_t NopCount = 0;
@@ -448,7 +462,7 @@ bool SMPBasicBlock::MDAlreadyKilled(op_t Opnd1) const {
 } // end of SMPBasicBlock::MDAlreadyKilled()
 
 // Initialize the KilledSet and UpExposedSet for live variable analysis.
-void SMPBasicBlock::InitKilledExposed(void) {
+void SMPBasicBlock::InitKilledExposed(bool UseFP) {
 	// Find all upwardly exposed operands and killed operands in this block.
 	list<SMPInstr *>::iterator CurrIter;
 	for (CurrIter = this->Instrs.begin(); CurrIter != this->Instrs.end(); ++CurrIter) {
@@ -463,16 +477,19 @@ void SMPBasicBlock::InitKilledExposed(void) {
 			// Only add non-immediate operands that are not already killed in this block.
 			//  o_near and o_far operands are code addresses in immediate form, e.g.
 			//  call _printf might be call 0x8048040, with o_near = 0x8048040.
-			if ((!(this->MDAlreadyKilled(UseOp)))
-				&& (UseOp.type != o_imm) && (UseOp.type != o_near) && (UseOp.type != o_far))
+			if ((!(this->MDAlreadyKilled(UseOp))) && (MDIsDataFlowOpnd(UseOp, UseFP))) {
 				this->UpExposedSet.insert(UseOp);
+			}
 		}
 		// Dataflow equation for killed variables: If a variable is defined in any
 		//  instruction in the block, it is killed by this block (i.e. prior definitions
 		//  of that variable will not make it through the block).
 		set<DefOrUse, LessDefUse>::iterator CurrDef;
 		for (CurrDef = CurrInst->GetFirstDef(); CurrDef != CurrInst->GetLastDef(); ++CurrDef) {
-			this->KillSet.insert(CurrDef->GetOp());
+			op_t DefOp = CurrDef->GetOp();
+			if (MDIsDataFlowOpnd(DefOp, UseFP)) {
+				this->KillSet.insert(DefOp);
+			}
 		}
 	} // end for all instrs in block
 	this->SetLiveInStale(true);  // Would need to compute LiveInSet for first time
@@ -811,7 +828,7 @@ void SMPBasicBlock::UpdateDownExposedDefs(op_t DefOp, ea_t InstAddr) {
 	set<pair<op_t, ea_t>, LessDefinition>::iterator DEDefnIter = this->GetFirstDownExposedDefn();
 	while (DEDefnIter != this->GetLastDownExposedDefn()) {
 		op_t OldOp = DEDefnIter->first;
-		if (IsEqOp(OldOp, DefOp)) {
+		if (IsEqOpIgnoreBitwidth(OldOp, DefOp)) {
 			(void) this->DownExposedDefnSet.erase(DEDefnIter);
 			break; // save time; should never be more than one defn per DefOp in this set, unlike ReachesIn & ReachesOut
 		}
@@ -1082,6 +1099,14 @@ void SMPBasicBlock::SSALocalRenumber(void) {
 	return;
 } // end of SMPBasicBlock::SSALocalRenumber()
 
+// Free memory for reaching defs sets after they are no longer needed.
+void SMPBasicBlock::FreeReachingDefsMemory(void) {
+	this->ReachesInSet.clear();
+	this->ReachesOutSet.clear();
+	this->DownExposedDefnSet.clear();
+	return;
+} // end of SMPBasicBlock::FreeReachingDefsMemory()
+
 // Free SSA data structures that are no longer needed when all SSA numbers have
 //  been recorded in DEFs and USEs.
 void SMPBasicBlock::FreeSSAMemory(void) {
@@ -1254,7 +1279,7 @@ void SMPBasicBlock::PropagateBranchSignedness(ea_t DefAddr, op_t SearchOp, unsig
 	bool LocalDef = this->IsLocalName(SearchOp);
 
 	if (SearchOp.type == o_reg)
-		SearchOp.reg = MDCanonicalizeSubReg(SearchOp.reg);
+		CanonicalizeOpnd(SearchOp);
 	else
 		return;  // Limit to registers for now
 
@@ -1310,7 +1335,7 @@ void SMPBasicBlock::PropagateBranchSignedness(ea_t DefAddr, op_t SearchOp, unsig
 		for (DefIter = DefInst->GetFirstDef(); DefIter != DefInst->GetLastDef(); ++DefIter) {
 			DefOp = DefIter->GetOp();
 			if (DefOp.type == o_reg) {
-				DefOp.reg = MDCanonicalizeSubReg(DefOp.reg);
+				CanonicalizeOpnd(DefOp);
 				DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum());
 				if ((LocalDef && this->IsLocalName(DefOp))
 					|| (!LocalDef && DefBlock->IsLocalName(DefOp))) {
@@ -1354,7 +1379,7 @@ void SMPBasicBlock::PropagateBranchSignedness(ea_t DefAddr, op_t SearchOp, unsig
 			UseOp = UseIter->GetOp();
 			if ((UseOp.type == o_reg) && ((!ReadsMemory) || (DefInst->IsNonAddressReg(UseOp)))) { // don't want addressing registers
 #if 0  // USEs should already be canonicalized
-				UseOp.reg = MDCanonicalizeSubReg(UseOp.reg);
+				CanonicalizeOpnd(UseOp);
 #endif
 				if (UseOp.is_reg(X86_FLAGS_REG)) { 
 					// don't need to propagate to flags after initial call to this method
@@ -1422,7 +1447,7 @@ void SMPBasicBlock::PropagateBranchSignedness(ea_t DefAddr, op_t SearchOp, unsig
 // Find the stack target of a call to memset() and the size in bytes of the memset() region.
 //  If the memset() target is not on the stack, return false. If the 
 //  size of the memset() region is not a constant, we also return false.
-bool SMPBasicBlock::AnalyzeMemSet(ea_t MemSetAddr, op_t &MemSetTarget, size_t &MemSetSize, int &StackOffset) {
+bool SMPBasicBlock::AnalyzeMemSet(ea_t MemSetAddr, op_t &MemSetTarget, size_t &MemSetSize, int &StackOffset, bool &FPRelativeTarget) {
 	unsigned short SignMask;
 	set<DefOrUse, LessDefUse>::iterator UseIter;
 	op_t DefOp, UseOp, UltimateSourceOp;
@@ -1430,7 +1455,9 @@ bool SMPBasicBlock::AnalyzeMemSet(ea_t MemSetAddr, op_t &MemSetTarget, size_t &M
 	int DefSSANum, UseSSANum;
 	bool FoundTarget = false, FoundSize = false;
 	bool UseFP = this->GetFunc()->UsesFramePointer();
+	bool DummyFPRelative = false; // don't care about this for the size argument to memset(), which is o_imm anyway
 	list<SMPInstr *>::reverse_iterator InstRevIter;
+	sval_t CurrentDelta;
 
 	assert(MemSetAddr >= this->FirstAddr);
 	MemSetTarget = InitOp;
@@ -1441,6 +1468,8 @@ bool SMPBasicBlock::AnalyzeMemSet(ea_t MemSetAddr, op_t &MemSetTarget, size_t &M
 	//  [ESP+4], which we do not care about. We will move backwards through the block until
 	//  we find the MemSetAddr, then we will trace the def-use SSA chain back to its 
 	//  source for each argument to memset() that we care about.
+	// NOTE: [ESP+0}, [ESP+4], etc., do not look like that any more, because stack ops have
+	//  been normalized.
 	InstRevIter = this->Instrs.rbegin(); 
 	SMPInstr *CurrInst = (*InstRevIter);
 	InstAddr = CurrInst->GetAddr();
@@ -1465,6 +1494,7 @@ bool SMPBasicBlock::AnalyzeMemSet(ea_t MemSetAddr, op_t &MemSetTarget, size_t &M
 			//  an EBP-relative DefOp, as arguments are not passed EBP-relative. So,
 			//  regardless of whether we actually use EBP as a frame pointer in this
 			//  function, we will pass FALSE as the second argument of this next query.
+			// NOTE: There are no more EBP-relative DEFs after normalization, anyway.
 			if (MDIsStackAccessOpnd(DefOp, false)) {
 				// Found write to stack, ESP-relative. Determine if it is [ESP+0] or [ESP+8].
 				int BaseReg, IndexReg;
@@ -1475,10 +1505,12 @@ bool SMPBasicBlock::AnalyzeMemSet(ea_t MemSetAddr, op_t &MemSetTarget, size_t &M
 				if ((R_none == IndexReg) && (0 == ScaleFactor)) {
 					assert(BaseReg == MD_STACK_POINTER_REG);
 					int SignedOffset = (int) offset; // IDA Pro has signedness problem to fix here.
+					CurrentDelta = CurrInst->GetStackPtrOffset();
+					SignedOffset -= (int) CurrentDelta; // undo normalization of stack offset
 					if (0 == SignedOffset) {
 						UseOp = CurrInst->GetMoveSource();
 						if (o_reg == UseOp.type) {
-							UseOp.reg = MDCanonicalizeSubReg(UseOp.reg);
+							CanonicalizeOpnd(UseOp);
 						}
 						if (o_void == UseOp.type) {
 							SMP_msg("ERROR: No move source at %x within AnalyzeMemSet().\n", InstAddr);
@@ -1488,7 +1520,7 @@ bool SMPBasicBlock::AnalyzeMemSet(ea_t MemSetAddr, op_t &MemSetTarget, size_t &M
 							UseIter = CurrInst->FindUse(UseOp);
 							assert(UseIter != CurrInst->GetLastUse());
 							UseSSANum = UseIter->GetSSANum();
-							if (CurrInst->TraceUltimateMoveSource(UseOp, UseSSANum, UltimateSourceOp)) {
+							if (CurrInst->TraceUltimateMoveSource(UseOp, UseSSANum, UltimateSourceOp, FPRelativeTarget)) {
 								if (MDIsStackAccessOpnd(UltimateSourceOp, UseFP)) {
 									// memset() has a stack location as its target. Extract info from
 									//  UltimateSourceOp.
@@ -1498,7 +1530,7 @@ bool SMPBasicBlock::AnalyzeMemSet(ea_t MemSetAddr, op_t &MemSetTarget, size_t &M
 									}
 									else {
 										MemSetTarget = UltimateSourceOp;
-										StackOffset = (int) offset;
+										StackOffset = (int) offset; // NOTE: this is a normalized offset
 										FoundTarget =  true; // success on argument #1
 										if (FoundSize) break;  // nothing left to find
 									}
@@ -1516,7 +1548,7 @@ bool SMPBasicBlock::AnalyzeMemSet(ea_t MemSetAddr, op_t &MemSetTarget, size_t &M
 					else if (8 == SignedOffset) {
 						UseOp = CurrInst->GetMoveSource();
 						if (o_reg == UseOp.type) {
-							UseOp.reg = MDCanonicalizeSubReg(UseOp.reg);
+							CanonicalizeOpnd(UseOp);
 						}
 						if (o_void == UseOp.type) {
 							SMP_msg("ERROR: No move source at %x within AnalyzeMemSet().\n", InstAddr);
@@ -1526,7 +1558,7 @@ bool SMPBasicBlock::AnalyzeMemSet(ea_t MemSetAddr, op_t &MemSetTarget, size_t &M
 							UseIter = CurrInst->FindUse(UseOp);
 							assert(UseIter != CurrInst->GetLastUse());
 							UseSSANum = UseIter->GetSSANum();
-							if (CurrInst->TraceUltimateMoveSource(UseOp, UseSSANum, UltimateSourceOp)) {
+							if (CurrInst->TraceUltimateMoveSource(UseOp, UseSSANum, UltimateSourceOp, DummyFPRelative)) {
 								if (o_imm != UltimateSourceOp.type) {
 									break; // return false if cannot find an immediate value for argument #3
 								}
@@ -3159,7 +3191,8 @@ bool SMPBasicBlock::IsBenignTruncationDEF(op_t DefOp, int DefSSANum, size_t DefA
 				UseOp = CurrInst->GetMoveSource();
 				SearchOp = UseOp;
 				if (UseOp.type == o_reg) {
-					SearchOp.reg = MDCanonicalizeSubReg(UseOp.reg);
+					SearchOp = UseOp;
+					CanonicalizeOpnd(SearchOp);
 				}
 				else {
 					break;  // Not a sub-register, cannot fit the pattern
@@ -3355,21 +3388,34 @@ bool SMPBasicBlock::IsStackOpNextUsedWithSignedness(op_t StackDefOp, ea_t DefAdd
 // Find stack adjustment code, if any, after CallAddr.
 sval_t SMPBasicBlock::ComputeStackAdjustmentAfterCall(ea_t CallAddr) {
 	sval_t AdjustmentBytes = 0;
+	sval_t PriorAdjustmentBytes = 0; // stack deltas before the call
 	list<SMPInstr *>::iterator InstIter;
 	ea_t InstAddr; // for debugging, breakpoints, etc.
-	bool FoundCallInst;
+	bool FoundCallInst = false;
+	bool FirstBlock = (this->GetFirstAddr() == this->GetFunc()->GetFirstFuncAddr());
 
 	for (InstIter = this->GetFirstInstr(); InstIter != this->GetLastInstr(); ++InstIter) {
 		SMPInstr *CurrInst = (*InstIter);
 		InstAddr = CurrInst->GetAddr();
 		// Two stage search: Find CALL instruction, then find next stack adjustment.
+		sval_t CurrentDelta = CurrInst->FindStackAdjustment();
+		SMPitype DataFlowType = CurrInst->GetDataFlowType();
 		if (!FoundCallInst) {
 			FoundCallInst = (InstAddr == CallAddr);
+			if (!FoundCallInst && !FirstBlock) {
+				PriorAdjustmentBytes += CurrentDelta;
+				if (JUMP <= DataFlowType) {
+					// Reset PriorAdjustmentBytes and exit. Basic block is too confusing
+					//  to analyze if multiple calls happen. We cannot tell if stack adjustments
+					//  between the first call and the second call are related to the first call
+					//  or the second call.
+					PriorAdjustmentBytes = 0;
+					break;
+				}
+			}
 		}
 		else {
-			SMPitype DataFlowType = CurrInst->GetDataFlowType();
 			if (JUMP > DataFlowType) {
-				sval_t CurrentDelta = CurrInst->FindStackAdjustment();
 				if (SMP_STACK_DELTA_ERROR_CODE == CurrentDelta) {
 					// An error will soon occur, no need for further computations.
 					AdjustmentBytes = 0;
@@ -3391,10 +3437,13 @@ sval_t SMPBasicBlock::ComputeStackAdjustmentAfterCall(ea_t CallAddr) {
 				//  push ebx ; caller-save before call to bar
 				//  call bar
 			}
-			else if (RETURN == DataFlowType) {
+			else if ((RETURN == DataFlowType) 
+				|| ((!this->GetFunc()->HasExplicitReturnInstruction()) && ((INDIR_JUMP == DataFlowType) || (JUMP == DataFlowType)))) {
 				// We have been counting up adjustment bytes that precede a RETURN. Those
 				//  adjustments are expected before a RETURN and have nothing to do with
-				//  the function call at CallAddr.
+				//  the function call at CallAddr. If the function has no explicit return instructions, we will
+				//  conservatively treat jumps as probable tail calls, which will later be given the RETURN
+				//  data flow type but have not yet been analyzed and marked.
 				AdjustmentBytes = 0;
 				SMP_msg("INFO: Resetting stack adjustment to zero at return inst at %x\n", InstAddr);
 				break;
@@ -3407,5 +3456,11 @@ sval_t SMPBasicBlock::ComputeStackAdjustmentAfterCall(ea_t CallAddr) {
 		}
 	}
 
+	if (AdjustmentBytes != 0) {
+		// PriorAdjustmentBytes is usually negative (making stack space before the call)
+		//  and AdjustmentBytes is usually positive (returning stack space after the call).
+		//  Adding them gets the net adjustment after the call.
+		AdjustmentBytes += PriorAdjustmentBytes;
+	}
 	return AdjustmentBytes;
 } // end of SMPBasicBlock::ComputeStackAdjustmentAfterCall()
\ No newline at end of file
diff --git a/SMPBasicBlock.h b/SMPBasicBlock.h
index a7883186..b7d52dbf 100644
--- a/SMPBasicBlock.h
+++ b/SMPBasicBlock.h
@@ -224,13 +224,15 @@ public:
 
 	// Analysis methods
 	bool AllPredecessorsNumbered(void);
+	void DepthFirstMark(void); // Depth-first traversal, mark as processed
 	void Analyze();
-	void InitKilledExposed(void); // Initialize KilledSet and UpExposedSet
+	void InitKilledExposed(bool UseFP); // Initialize KilledSet and UpExposedSet
 	bool UpdateLiveOut(void); // Iterate once on updating LiveOutSet; return true if changed
 	void AddToDomFrontier(int); // Add RPO block number to DomFrontier set.
 	void SetLocalNames(void); // Fille the LocalNames member set
 	void SSALocalRenumber(void); // Renumber references to local names
 	void CreateGlobalChains(void); // Create DEF-USE chains for global names used here
+	void FreeReachingDefsMemory(void); // Free memory for reaching defs sets after they are no longer needed
 	void FreeSSAMemory(void); // After SSA #s are in DEFs and USEs, free SSA data structures.
 	void FreeUnusedMemory4(void); // After loop 4 (type inference) in SMPProgram::Analyze(), free memory
 	bool IsGlobalRegDead(ea_t InstAddr, op_t Operand, unsigned int RegIndex) const; // Is global reg dead at InstAddr?
@@ -258,7 +260,7 @@ public:
 	
 	// Find the stack target of a call to memset() and the size in bytes of the memset() region.
 	//  If the memset() target is not on the stack, or the size is not a constant, return false.
-	bool AnalyzeMemSet(ea_t MemSetAddr, op_t &MemSetTarget, size_t &MemSetSize, int &SignedOffset);
+	bool AnalyzeMemSet(ea_t MemSetAddr, op_t &MemSetTarget, size_t &MemSetSize, int &SignedOffset, bool &FPRelativeTarget);
 	
 	bool IsInfiniteSelfLoop(void); // return true if block loops infinitely to itself
 	bool IsBenignOverflowDEF(op_t DefOp, int DefSSANum, size_t DefAddr); // Do we not care if DEF overflowed, due to how it is used?
diff --git a/SMPDBInterface.cpp b/SMPDBInterface.cpp
index 09107faa..4eea8972 100644
--- a/SMPDBInterface.cpp
+++ b/SMPDBInterface.cpp
@@ -58,7 +58,6 @@
 #include "SMPDBInterface.h"
 
 #ifdef STARS_IDA_INTERFACE
-xrefblk_t CurrSMP_xref;
 
 // Get instruction info by address from IDA Pro.
 bool SMPGetCmd(ea_t InstAddr, insn_t &SMPcmd, ulong &SMPfeatures) {
diff --git a/SMPDBInterface.h b/SMPDBInterface.h
index 3ff39d8b..bde36979 100644
--- a/SMPDBInterface.h
+++ b/SMPDBInterface.h
@@ -97,8 +97,8 @@ bool SMPGetCmd(ea_t InstAddr, insn_t &SMPcmd, ulong &SMPfeatures);
 #define SMP_feof(file) feof(file)
 #define SMP_fclose(file) qfclose(file)
 
-extern xrefblk_t CurrSMP_xref;
 struct SMP_xref_t {
+	xrefblk_t CurrSMP_xref;
 	ea_t GetFrom() { return CurrSMP_xref.from; };
 	ea_t GetTo() { return CurrSMP_xref.to; };
 	uchar GetIscode() { return CurrSMP_xref.iscode; };         // 1-is code reference; 0-is data reference
diff --git a/SMPDataFlowAnalysis.cpp b/SMPDataFlowAnalysis.cpp
index 6b3f5498..c224b815 100644
--- a/SMPDataFlowAnalysis.cpp
+++ b/SMPDataFlowAnalysis.cpp
@@ -215,12 +215,14 @@ unsigned short ComputeOperandBitWidthMask(op_t CurrOp, size_t DataSize) {
 		BitWidthMask = FG_MASK_BITWIDTH_24;
 	else if (6 == DataSize)
 		BitWidthMask = FG_MASK_BITWIDTH_48;
+	else if (10 == DataSize)
+		BitWidthMask = FG_MASK_BITWIDTH_80;
 	else if (12 == DataSize)
 		BitWidthMask = FG_MASK_BITWIDTH_96;
 	else if (32 == DataSize)
 		BitWidthMask = FG_MASK_BITWIDTH_256;
 	else {
-		SMP_msg("ERROR: Unknown DataSize: %zu bytes Operand: ", DataSize);
+		SMP_msg("ERROR: Unknown DataSize: %zu bytes ", DataSize);
 		PrintOperand(CurrOp);
 		SMP_msg("\n");
 	}
@@ -292,6 +294,35 @@ bool IsEqOp(op_t Opnd1, op_t Opnd2) {
 		}; // end switch (Opnd1.type)}
 } // end of function IsEqOp()
 
+// Are operands equal, ignoring bitwidth differences for register operands?
+bool IsEqOpIgnoreBitwidth(op_t Opnd1, op_t Opnd2) {
+		if (Opnd1.type != Opnd2.type)
+			return false;
+		switch (Opnd1.type) {
+			case o_void: return true;
+			case o_reg: return (Opnd1.reg == Opnd2.reg);
+			case o_mem: return (Opnd1.addr == Opnd2.addr);
+			case o_phrase: if (Opnd1.hasSIB && Opnd2.hasSIB) return (Opnd1.sib == Opnd2.sib);
+							else return false; // no SIB != has SIB
+			case o_displ: if (Opnd1.hasSIB && Opnd2.hasSIB)
+							  return ((Opnd1.sib == Opnd2.sib) && (Opnd1.addr == Opnd2.addr));
+						  else if ((!Opnd1.hasSIB) && (!Opnd2.hasSIB))
+							  return ((Opnd1.addr == Opnd2.addr) && (Opnd1.reg == Opnd2.reg));
+							else return false; // no SIB != has SIB
+			case o_imm: return (Opnd1.value == Opnd2.value);
+			case o_far:  // fall through to o_near case
+			case o_near: return (Opnd1.addr == Opnd2.addr);
+			case o_trreg:  // fall through
+			case o_dbreg:  // fall through
+			case o_crreg:  // fall through
+			case o_fpreg:  // fall through
+			case o_mmxreg: // fall through
+			case o_xmmreg: return (Opnd1.reg == Opnd2.reg); // no subword regs to deal with
+
+			default: SMP_msg("ERROR: Unknown operand type in IsEqOp.\n"); return false;
+		}; // end switch (Opnd1.type)}
+} // end of function IsEqOp()
+
 // We need to make subword registers equal to their containing registers when we
 //  do comparisons, so that we will realize that register EAX is killed by a prior DEF
 //  of register AL, for example, and vice versa. To keep sets ordered strictly,
@@ -318,6 +349,14 @@ ushort MDCanonicalizeSubReg(const ushort Reg1) {
 	return SReg1;
 } // end of MDCanonicalizeSubReg()
 
+// If TempOp is a register, call MDCanonicalizeSubReg() on it.
+void CanonicalizeOpnd(op_t &TempOp) {
+	if (o_reg == TempOp.type) {
+		TempOp.reg = MDCanonicalizeSubReg(TempOp.reg);
+		TempOp.dtyp = dt_dword; // set to 32-bit width
+	}
+}
+
 // In SSA computations, we are storing the GlobalNames index into the op_t fields
 //  n, offb, and offo. This function extracts an unsigned int from these three 8-bit
 //  fields.
@@ -627,13 +666,14 @@ void PrintOperand(op_t Opnd) {
 	else if (Opnd.type == o_displ) {
 		SMP_msg(" Operand: memory displ :");
 		ea_t offset = Opnd.addr;
+		int SignedOffset = (int) offset;
 		if (Opnd.hasSIB) {
 			PrintSIB(Opnd);
-			SMP_msg(" displ %d", offset);
+			SMP_msg(" displ %d", SignedOffset);
 		}
 		else {
 			ushort BaseReg = Opnd.reg;
-			SMP_msg(" reg %s displ %d", RegNames[BaseReg], offset);
+			SMP_msg(" reg %s displ %d", RegNames[BaseReg], SignedOffset);
 		}
 	}
 	else if (Opnd.type == o_reg) {
@@ -856,7 +896,7 @@ DefOrUse::DefOrUse(op_t Ref, SMPOperandType Type, int SSASub) {
 	if (o_reg == Ref.type) {
 		// We want to map AH, AL, and AX to EAX, etc. throughout our data flow analysis
 		//  and type inference systems.
-		Ref.reg = MDCanonicalizeSubReg(Ref.reg);
+		CanonicalizeOpnd(Ref);
 	}
 	this->Operand = Ref;
 	this->OpType = Type;
@@ -1391,7 +1431,7 @@ void SMPDefUseChain::SetName(op_t Name) {
 	if (o_reg == Name.type) {
 		// We want to map AH, AL, and AX to EAX, etc. throughout our data flow analysis
 		//  and type inference systems.
-		Name.reg = MDCanonicalizeSubReg(Name.reg);
+		CanonicalizeOpnd(Name);
 	}
 	this->SSAName = Name;
 	return;
@@ -1444,7 +1484,7 @@ SMPDUChainArray::SMPDUChainArray(op_t Name, ea_t FirstAddrMinusOne) {
 	if (o_reg == Name.type) {
 		// We want to map AH, AL, and AX to EAX, etc. throughout our data flow analysis
 		//  and type inference systems.
-		Name.reg = MDCanonicalizeSubReg(Name.reg);
+		CanonicalizeOpnd(Name);
 	}
 	this->SSAName = Name;
 	this->BaseAddr = FirstAddrMinusOne;
@@ -1465,7 +1505,7 @@ void SMPDUChainArray::SetName(op_t Name, ea_t FirstAddrMinusOne) {
 	if (o_reg == Name.type) {
 		// We want to map AH, AL, and AX to EAX, etc. throughout our data flow analysis
 		//  and type inference systems.
-		Name.reg = MDCanonicalizeSubReg(Name.reg);
+		CanonicalizeOpnd(Name);
 	}
 	this->SSAName = Name;
 	this->BaseAddr = FirstAddrMinusOne;
diff --git a/SMPDataFlowAnalysis.h b/SMPDataFlowAnalysis.h
index dd99f606..32802296 100644
--- a/SMPDataFlowAnalysis.h
+++ b/SMPDataFlowAnalysis.h
@@ -111,6 +111,9 @@ const char *MDGetRegName(op_t RegOp); // Distinguishes subword regs from their p
 #define X86_MOVE_INSTRUCTION NN_mov
 #define MD_MOVE_INSTRUCTION X86_MOVE_INSTRUCTION
 
+#define X86_DEFAULT_RETURN_ADDRESS_SIZE 4
+#define MD_DEFAULT_RETURN_ADDRESS_SIZE X86_DEFAULT_RETURN_ADDRESS_SIZE
+
 // Debug: print one operand from an instruction or DEF or USE list.
 void PrintDefUse(ulong feature, int OpNum);
 void PrintSIB(op_t Opnd);
@@ -151,6 +154,9 @@ bool MDKnownOperandType(op_t TempOp);
 // MACHINE DEPENEDENT: Convert subword register to its enclosing register.
 ushort MDCanonicalizeSubReg(const ushort Reg1);
 
+// If TempOp is a register, call MDCanonicalizeSubReg() on it.
+void CanonicalizeOpnd(op_t &TempOp);
+
 // MACHINE DEPENDENT: Ordering function for register enum values, to use in set containers.
 bool MDLessReg(const ushort Reg1, const ushort Reg2);
 
@@ -163,6 +169,9 @@ int HashGlobalNameAndSSA(op_t DefOp, int SSANum);
 // Are operands equal?
 bool IsEqOp(op_t Opnd1, op_t Opnd2);
 
+// Are operands equal, ignoring bitwidth differences for register operands?
+bool IsEqOpIgnoreBitwidth(op_t Opnd1, op_t Opnd2);
+
 // Comparison class to use in sorted containers of IDA Pro addresses (type ea_t).
 class LessAddr {
 public:
@@ -418,6 +427,7 @@ struct FineGrainedInfo {
 #define FG_MASK_BITWIDTH_64 64
 #define FG_MASK_BITWIDTH_128 128
 #define FG_MASK_BITWIDTH_256 256
+#define FG_MASK_BITWIDTH_80 80   // two bits are set here
 #define FG_MASK_BITWIDTH_96 512
 #define FG_MASK_BITWIDTH_48 1024
 #define FG_MASK_BITWIDTH_24 2048
diff --git a/SMPFunction.cpp b/SMPFunction.cpp
index 44c9a336..204b5072 100644
--- a/SMPFunction.cpp
+++ b/SMPFunction.cpp
@@ -72,6 +72,7 @@ using namespace std;
 #define SMP_DEBUG_XOR 0
 #define SMP_DEBUG_CHUNKS 1  // tracking down tail chunks for functions
 #define SMP_DEBUG_FRAMEFIXUP 1
+#define SMP_DEBUG_FRAMEFIXUP_VERBOSE 0
 #define SMP_DEBUG_DATAFLOW 0
 #define SMP_DEBUG_DATAFLOW_VERBOSE 0
 #define SMP_DEBUG_TYPE_INFERENCE 0
@@ -86,7 +87,6 @@ using namespace std;
 #define SMP_DEBUG_SWITCH_TABLE_INFO 0
 #define SMP_OPTIMIZE_BLOCK_PROFILING 0
 #define SMP_DECLARE_INDIRECT_TARGETS_UNSAFE 1
-#define SMP_ANALYZE_STACK_POINTER 0
 #define SMP_AUDIT_STACK_POINTER_DELTAS 0
 #define SMP_COMPARE_IDA_STARS_STACK_POINTER_DELTAS 1
 
@@ -150,6 +150,7 @@ SMPFunction::SMPFunction(func_t *Info, SMPProgram* pgm) {
 	this->UseFP = false;
 	this->StaticFunc = false;
 	this->LibFunc = false;
+	this->HasReturnInst = false;
 	this->IndirectCalls = false;
 	this->UnresolvedIndirectCalls = false;
 	this->IndirectJumps = false;
@@ -158,8 +159,10 @@ SMPFunction::SMPFunction(func_t *Info, SMPProgram* pgm) {
 	this->SharedChunks = false;
 	this->UnsharedChunks = false;
 	this->CallsAlloca = false;
+	this->PushAfterLocalVarAlloc = false;
 	this->AnalyzedSP = false;
 	this->STARSStackPtrAnalysisPerformed = false;
+	this->StackAdjustmentComputed = false;
 	this->BuiltRTLs = false;
 #if 1  // default to unsafe
 	this->SafeFunc = false;
@@ -198,10 +201,14 @@ SMPFunction::SMPFunction(func_t *Info, SMPProgram* pgm) {
 	this->AllocPointDelta = 0;
 	this->MinStackDelta = 0;
 	this->MaxStackDelta = 0;
+	this->MinStackAccessOffset = 0;
+	this->MaxStackAccessLimit = 0;
 	this->NetStackDelta = CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA;
 	this->PreAllocStackDelta = CALLING_CONVENTION_DEFAULT_PREFRAMEALLOC_STACK_DELTA;
 	this->FramePointerStackDelta = 0;
+	this->GlobalStackAdjustment = 0;
 	this->LocalVarOffsetLimit = 0;
+	this->IDAReturnAddressOffset = 0;
 
 	this->ReturnAddrStatus = FUNC_UNKNOWN;
 	this->SetIsSpeculative(false);
@@ -395,9 +402,112 @@ void SMPFunction::AddCallSource(ea_t addr) {
 	ea_t FirstAddr = FuncInfo->startEA;
 	assert(BADADDR != FirstAddr);
 	this->AllCallSources.insert(FirstAddr);
+	this->AllCallSites.insert(addr);
 	return;
 } // end of SMPFunction::AddCallSource()
 
+// Add input arguments to the NormalizedStackOpsMap.
+void SMPFunction::AddNormalizedStackOperand(op_t OldOp, ea_t InstAddr, op_t NormalizedOp) {
+	bool DuplicateCase = false; // e.g. inc [esp+8] will have [esp+8] as a DEF and a USE and maps will see [esp+8] twice
+	pair<map<pair<op_t, ea_t>, op_t, LessDefinition>::iterator, bool> InsertResult;
+	pair<map<pair<op_t, ea_t>, map<pair<op_t, ea_t>, op_t, LessDefinition>::iterator, LessDefinition>::iterator, bool> InverseInsertResult;
+	pair<op_t, ea_t> OldValue(OldOp, InstAddr);
+	pair<op_t, ea_t> InverseValue(OldOp, InstAddr); // OldOp was NormalizedOp when it was inserted previously
+	pair<pair<op_t, ea_t>, op_t> InsertValue(OldValue, NormalizedOp);
+	pair<op_t, ea_t> InverseInsertValue(NormalizedOp, InstAddr);
+	map<pair<op_t, ea_t>, op_t, LessDefinition>::iterator OldIter = this->NormalizedStackOpsMap.begin();
+	pair<pair<op_t, ea_t>, map<pair<op_t, ea_t>, op_t, LessDefinition>::iterator> InverseInsertTriple(InverseInsertValue, OldIter);
+	map<pair<op_t, ea_t>, map<pair<op_t, ea_t>, op_t, LessDefinition>::iterator>::iterator InverseIter;
+
+	// If this function calls alloca(), stack operands could be normalized more than once.
+	//  Before we proceed, we update an old entry instead of inserting a new entry.
+	if (this->CallsAlloca || this->HasPushAfterFrameAlloc()) {
+		InverseIter = this->InverseNormalizedStackOpsMap.find(InverseValue);
+		if (InverseIter != this->InverseNormalizedStackOpsMap.end()) {
+			// We have our alloca() update case. We formerly mapped <A, InstAddr> to B.
+			//  Now B is being normalized to C. All we want to do is change the original
+			//  map entry so that we map <A, InstAddr> to C. In this manner, A is always the
+			//  original un-normalized stack op, available for lookup from an RTL.
+			OldIter = InverseIter->second; // OldIter points at map of <A, InstAddr> to B.
+			OldIter->second = NormalizedOp; // Change B to C
+			// Now we want to erase the Inverse map entry and insert a new one that maps
+			//  <C, InstAddr> to OldIter instead of mapping <B, InstAddr> to OldIter.
+			(void) this->InverseNormalizedStackOpsMap.erase(InverseIter);
+			InverseInsertTriple.second = OldIter;
+			InverseInsertResult = this->InverseNormalizedStackOpsMap.insert(InverseInsertTriple);
+			assert(InverseInsertResult.second);
+			return;
+		}
+		else {
+			// We might have the final difficult case: We have a combination of CallsAlloca and the
+			//  DuplicateCase described below (e.g. an increment of a stack location produces a DEF
+			//  and a USE of the same location, causing duplicate mappings to be attempted). We need
+			//  to detect the duplicate case here. What will happen is that, on the first call to this
+			//  method, we will map <A, InstAddr> to B, and reverse-map <B, InstAddr> to A. On the second
+			//  call to this method, we will detect the duplicate case and exit. On the third call, caused
+			//  by CallsAlloca, we are asked to map <B, InstAddr> to C, and we will correctly hit the code
+			//  just above, in the if-clause, to fix the A->B mapping to be an A->C mapping, and we will
+			//  erase the reverse mapping of B->A and replace it with the C->A reverse mapping. On the
+			//  fourth call to this method, we will not find a reverse mapping B->A any more, so the if-clause
+			//  does not execute. We can only detect this case by finding an existing C->A reverse mapping
+			//  and an existing A->C mapping to confirm our inference.
+			pair<op_t, ea_t> TestInverseValue(NormalizedOp, InstAddr);
+			InverseIter = this->InverseNormalizedStackOpsMap.find(TestInverseValue);
+			if (InverseIter != this->InverseNormalizedStackOpsMap.end()) {
+				// Found existing C->A inverse mapping. Is there an A->C mapping to confirm
+				//  our interpretation of the situation?
+				pair<op_t, ea_t> TestOldValue(InverseIter->second->first.first, InstAddr);
+				map<pair<op_t, ea_t>, op_t, LessDefinition>::iterator TestOldIter;
+				TestOldIter = this->NormalizedStackOpsMap.find(TestOldValue);
+				if (TestOldIter != this->NormalizedStackOpsMap.end()) {
+					// We found a mapping from <A, InstAddr>.
+					if (IsEqOp(NormalizedOp, TestOldIter->second)) {
+						// The mapping is A->C as suspected.
+						return; // duplication; nothing to do in either map.
+					}
+				}
+			}
+		}
+	}
+	// At this point, we have no inverse map entry to worry about, because we are
+	//  normalizing this operand for the first time.
+	InsertResult = this->NormalizedStackOpsMap.insert(InsertValue);
+	OldIter = InsertResult.first;
+	if (!(InsertResult.second)) {
+		// Already had an entry. That should mean a rare case such as "inc [esp+8]" which
+		//  produces a USE and a DEF of the same address. We can confirm that the map has
+		//  the same normalized operand we were trying to insert. Otherwise, the collision
+		//  is fatal.
+		op_t OldOldOp = InsertResult.first->first.first;
+		op_t OldNormalizedOp = InsertResult.first->second;
+		assert(IsEqOp(OldOldOp, OldOp) && IsEqOp(OldNormalizedOp, NormalizedOp));
+		DuplicateCase = true;
+	}
+	if (this->CallsAlloca || this->HasPushAfterFrameAlloc()) {
+		// We need to add an entry to the inverse map.
+		InverseInsertTriple.second = OldIter;
+		InverseInsertResult = this->InverseNormalizedStackOpsMap.insert(InverseInsertTriple);
+		assert(InverseInsertResult.second || DuplicateCase);
+	}
+	return;
+} // SMPFunction::AddNormalizedStackOperand()
+
+// Return RTLop if not stack opnd; return normalized RTLop otherwise.
+op_t SMPFunction::GetNormalizedOperand(ea_t InstAddr, op_t RTLop) {
+	op_t NormOp;
+	if (MDIsStackAccessOpnd(RTLop, this->UsesFramePointer())) {
+		pair<op_t, ea_t> OldDefn(RTLop, InstAddr);
+		map<pair<op_t, ea_t>, op_t, LessDefinition>::iterator FindIter = this->NormalizedStackOpsMap.find(OldDefn);
+		assert(this->NormalizedStackOpsMap.end() != FindIter);
+		NormOp = FindIter->second;
+	}
+	else {
+		NormOp = RTLop;
+	}
+	return NormOp;
+} // end of SMPFunction::GetNormalizedOperand()
+
+
 // Six methods to set values into the maps of global reg/SSA to FG info.
 //  For local names, see corresponding methods in SMPBasicBlock.
 void SMPFunction::UpdateDefSignMiscInfo(int DefHashValue, unsigned short NewInfo) {
@@ -574,11 +684,6 @@ void SMPFunction::ComputeTempReachingDefs(op_t TempOp, ea_t UseAddr) {
 			assert(InsertResult.second);
 		}
 	}
-	if (this->TempReachingDefs.empty() && (o_reg == TempOp.type)) {
-		SMP_msg("WARNING: Use of uninitialized variable at %x ", UseAddr);
-		PrintOperand(TempOp);
-		SMP_msg(" \n");
-	}
 	return;
 } // end of SMPFunction::ComputeTempReachingDefs()
 
@@ -636,36 +741,108 @@ bool SMPFunction::FindReachingStackDelta(sval_t &StackDelta) {
 	return UniqueDelta;
 } // end of SMPFunction::FindReachingStackDelta()
 
+// Find any apparent stack adjustment after the call instruction at CallAddr,
+//  confining our search to the basic block containing CallAddr.
+sval_t SMPFunction::GetStackAdjustmentForCallee(ea_t CallAddr) {
+	bool success = false;
+	sval_t CalleeAdjustment = 0;
+
+	SMPBasicBlock *CallBlock = this->GetBlockFromInstAddr(CallAddr);
+	assert(NULL != CallBlock);
+	sval_t BlockAnalysisDelta = CallBlock->ComputeStackAdjustmentAfterCall(CallAddr);
+	if (0 != BlockAnalysisDelta) {
+		CalleeAdjustment = BlockAnalysisDelta;
+		SMP_msg("INFO: Block analysis produced callee adjustment of %d bytes after %x\n", CalleeAdjustment, CallAddr);
+	}
+
+	return CalleeAdjustment;
+} // end of SMPFunction::GetStackAdjustmentForCallee()
+
 // Get stack delta from a callee function that is unable to provide the info from
 //  its own analyses (e.g. analyses failed or have not been performed yet, due to
 //  a mutually recursive clique in the call graph). We have three approaches in
 //  this case: Use a default value, consult IDA Pro's analyses, or see if we can
 //  detect a stack adjustment after the call instruction, from which we could infer
-//  the stack delta of the callee.
-sval_t SMPFunction::GetStackDeltaForCallee(ea_t CallAddr) {
+//  the stack delta of the callee. We choose the latter approach, and find the smallest
+//  adjustment among all call sites for the callee.
+sval_t SMPFunction::GetStackDeltaForCallee(ea_t CallTargetAddr) {
 	bool success = false;
 	sval_t CalleeDelta = CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA;
 
-#if 0
-#ifdef STARS_IDA_INTERFACE
-	if (this->FuncInfo.flags & (FUNC_SP_READY | FUNC_PURGED_OK)) {
-		// FUNC_SP_READY => AnalyzedSP; FUNC_PURGED_OK => analysis of effect on stack succeeded.
-		if (this->FuncInfo.argsize != 0) {
+	SMPFunction *CalleeFunc = this->GetProg()->FindFunction(CallTargetAddr);
+	if (NULL != CalleeFunc) {
+		sval_t GlobalAdjustment = CalleeFunc->ComputeGlobalStackAdjustment();
+		if (0 != GlobalAdjustment) {
+			CalleeDelta -= GlobalAdjustment;
+			SMP_msg("INFO: Global stack adjustment analysis produced callee delta of %d bytes after %x\n", CalleeDelta, CallTargetAddr);
 		}
 	}
-#endif // STARS_IDA_INTERFACE
-#endif
-
-	SMPBasicBlock *CallBlock = this->GetBlockFromInstAddr(CallAddr);
-	sval_t BlockAnalysisDelta = CallBlock->ComputeStackAdjustmentAfterCall(CallAddr);
-	if (0 != BlockAnalysisDelta) {
-		CalleeDelta -= BlockAnalysisDelta;
-		SMP_msg("INFO: Block analysis produced callee delta of %d bytes after %x\n", CalleeDelta, CallAddr);
-	}
 
 	return CalleeDelta;
 } // end of SMPFunction::GetStackDeltaForCallee()
 
+// Compute a consistent (or smallest) stack adjustment seen program-wide after all calls to the current function.
+//  Do not return a non-zero value unless more than one call site can be used as evidence.
+sval_t SMPFunction::ComputeGlobalStackAdjustment(void) {
+	bool FoundZeroAdjustment = false;
+	sval_t GlobalAdjustment = 0;
+	sval_t NegativeAdjustment = -10000; // record negative adjustments detected
+	sval_t PositiveAdjustment = 10000; // record positive adjustments detected
+	size_t NumCallSites = this->AllCallSites.size();
+
+	// Use cached value if already computed.
+	if (this->StackAdjustmentComputed) {
+		return this->GlobalStackAdjustment;
+	}
+
+	if (1 < NumCallSites) { // if only one call site, it is dangerous to draw conclusions about seeming "adjustments."
+		set<ea_t>::iterator CallSiteIter;
+		for (CallSiteIter = this->AllCallSites.begin(); CallSiteIter != this->AllCallSites.end(); ++CallSiteIter) {
+			ea_t CallSiteAddr = (*CallSiteIter);
+			func_t *CurrFunc = SMP_get_func(CallSiteAddr);
+			assert(NULL != CurrFunc);
+			ea_t CallerFirstAddr = CurrFunc->startEA;
+			SMPFunction *CallerFunc = this->GetProg()->FindFunction(CallerFirstAddr);
+			assert(NULL != CallerFunc);
+			sval_t CurrentAdjustment = CallerFunc->GetStackAdjustmentForCallee(CallSiteAddr);
+			// See if CurrentAdjustment is a new, lowest positive value for GlobalAdjustment.
+			if ((0 < CurrentAdjustment) && (CurrentAdjustment < PositiveAdjustment)) {
+				PositiveAdjustment = CurrentAdjustment;
+			}
+			else if ((0 > CurrentAdjustment) && (CurrentAdjustment > NegativeAdjustment)) {
+				NegativeAdjustment = CurrentAdjustment;
+			}
+			else if (0 == CurrentAdjustment) {
+				FoundZeroAdjustment = true;
+				break; // Any zero adjustment found invalidates non-zero inferences
+			}
+		}
+	}
+
+	// See if we consistently had positive or negative adjustments
+	if (FoundZeroAdjustment) {
+		GlobalAdjustment = 0; // cannot be a clear non-zero indication if we found any zeroes
+	}
+	else if (PositiveAdjustment < 10000) { // found at least one positive adjustment
+		if (NegativeAdjustment > -10000) { // found at least one negative adjustment; bad
+			GlobalAdjustment = 0; // inconsistent; reset to zero
+		}
+		else {
+			GlobalAdjustment = PositiveAdjustment;
+		}
+	}
+	else if (NegativeAdjustment > -10000) { // found negative but no positive adjustments
+		GlobalAdjustment = NegativeAdjustment;
+	}
+	else { // did not find negative or positive adjustments
+		GlobalAdjustment = 0;
+	}
+
+	this->StackAdjustmentComputed = true; // signal caching of the value for future speed
+	this->GlobalStackAdjustment = GlobalAdjustment; // cache the value
+	return GlobalAdjustment;
+} // end of SMPFunction::ComputeGlobalStackAdjustment()
+
 // Use IDA Pro stack pointer deltas instead of doing our own analysis.
 bool SMPFunction::UseIDAStackPointerDeltas(void) {
 	list<SMPInstr *>::iterator InstIter;
@@ -696,7 +873,10 @@ bool SMPFunction::AnalyzeStackPointerDeltas(void) {
 	list<SMPInstr *>::iterator InstIter;
 	SMPInstr *CurrInst;
 	sval_t CurrentDelta = 0;
+	sval_t DeltaIncrement = 0; // change when reprocessing a block in alloca()-calling function
 	bool ConsistentNetDelta = true; // Net change to stack pointer is consistent at all RETURN locations
+	bool ConflictingValuesSeen = false; // At least one block was entered with multiple deltas
+	bool StackPointerRestoreSeen = false; // Stack pointer restored; must become true if ConflictingValuesSeen
 	bool ReturnSeen = false;
 	bool IDAProSucceeded = this->AnalyzedSP;
 	bool FirstBlockProcessed = false;
@@ -704,15 +884,14 @@ bool SMPFunction::AnalyzeStackPointerDeltas(void) {
 	bool SPintoFP = false; // found move of stack pointer into frame pointer (FP init)
 
 #if SMP_COMPARE_IDA_STARS_STACK_POINTER_DELTAS
-	bool DebugFlag = (0 == strcmp("find_derivation", this->GetFuncName()));
-	bool TraceFlag = (0 == strcmp("_dl_start_profile", this->GetFuncName()));
-	bool IDATraceFlag = (0 == strcmp("do_length", this->GetFuncName()));
+	bool DebugFlag = (0 == strcmp("qsort", this->GetFuncName()));
+	bool TraceFlag = (0 == strcmp("qsort", this->GetFuncName()));
 #endif
 
-#if 1
+#if 0
 	// Temporarily pull the functions that call alloca out of the stack pointer delta computations, so
 	//  that we can focus on solving other problems.
-	if (this->CallsAlloca) {
+	if (this->CallsAlloca || this->HasPushAfterFrameAlloc()) {
 		if (!this->AnalyzedSP) {
 			(void) this->UseIDAStackPointerDeltas();
 			return false; // leave it unsolved
@@ -844,7 +1023,12 @@ bool SMPFunction::AnalyzeStackPointerDeltas(void) {
 				// No error, already processed.
 				WorkList.pop_front(); // discard already processed block.
 			}
-			else if (this->CallsAlloca) {
+#if 1
+			else if (this->CallsAlloca || this->HasPushAfterFrameAlloc()) {
+#else
+			else {
+#endif
+				ConflictingValuesSeen = true;
 				// Calls to alloca() become additional stack allocations, which can produce
 				//  multiple possible stack deltas for an instruction if different paths
 				//  to the instruction do not hit the same alloca() calls, so it is not
@@ -859,17 +1043,20 @@ bool SMPFunction::AnalyzeStackPointerDeltas(void) {
 				else {
 					CurrBlock->SetProcessed(false);
 					ReprocessingAllocaBlocks = true;
+					DeltaIncrement = IncomingDelta - PrevIncomingDelta;
 					continue;  // Make the loop come around and process this block again, using
 							   //  the new incoming delta. Because we do this only when it decreases
 							   //  the stack size as seen by this block, no infinite loop is possible.
 				}
 			}
+#if 1
 			else {
 				this->AnalyzedSP = false;
 				SMP_msg("ERROR: Stack delta: PrevIncoming is %d NewIncoming is %d at %x\n",
 					PrevIncomingDelta, IncomingDelta, (*InstIter)->GetAddr());
 				WorkList.clear();
 			}
+#endif
 		}
 		else { // not already processed
 			// ReprocessingAllocaBlocks => Reaching definitions sets have already been computed; just need to do stack delta analysis
@@ -885,23 +1072,29 @@ bool SMPFunction::AnalyzeStackPointerDeltas(void) {
 			WorkList.pop_front();
 			for (InstIter = CurrBlock->GetFirstInstr(); InstIter != CurrBlock->GetLastInstr(); ++InstIter) {
 				CurrInst = (*InstIter);
+				if (CurrInst->IsFloatNop()) {
+					continue; // skip marker instruction
+				}
 				ea_t InstAddr = CurrInst->GetAddr();
 				if (InstAddr == this->GetFirstFrameAllocInstAddr()) {
 					// Record the reset point for frame deallocations
 					this->PreAllocStackDelta = IncomingDelta;
 				}
+
+				CurrInst->SetStackPtrOffset(IncomingDelta);
+
+				// Search for tail calls, defined strictly as having an incoming stack delta of zero and
+				//  being jumps to far chunks.
+				if ((0 == IncomingDelta) && (CurrInst->IsBranchToFarChunk())) {
+					CurrInst->SetTailCall();
 #if 0
-				if (this->CallsAlloca) { // keep deltas in a set; paths can lead to multiple deltas per inst
-					bool DeltaAlreadyFound = CurrInst->FindStackPtrDelta(IncomingDelta);
-				}
-				else {
+					SMP_msg("Found tail call at %x from %s: %s\n", InstAddr, this->GetFuncName(),
+							CurrInst->GetDisasm());
 #endif
-					CurrInst->SetStackPtrOffset(IncomingDelta);
-#if 0
 				}
-#endif
+
 #if SMP_COMPARE_IDA_STARS_STACK_POINTER_DELTAS
-				if (DebugFlag && IDAProSucceeded && !this->CallsAlloca) {
+				if (DebugFlag && IDAProSucceeded && !(this->CallsAlloca || this->HasPushAfterFrameAlloc())) {
 					sval_t IDAProDelta = get_spd(this->GetFuncInfo(), InstAddr);
 					if ((IDAProDelta != IncomingDelta) && (!CurrInst->MDIsHaltInstr())) {
 						// IDA Pro special-cases the HALT instruction to make it appear that the
@@ -918,26 +1111,7 @@ bool SMPFunction::AnalyzeStackPointerDeltas(void) {
 
 				// As soon as the stack ptr offset has been set for the current instruction, we can normalize
 				//  all of its stack DEFs and USEs.
-				bool StackOpsChanged = CurrInst->MDNormalizeStackOps(UseFP, this->GetFramePtrStackDelta(), ReprocessingAllocaBlocks);
-				if (!FirstBlockProcessed) { // Look for initialization of frame pointer, record its stack delta
-					FirstBlockProcessed = CurrInst->IsLastInBlock();
-					if (!FPSaved) { // still looking for "push <framepointerreg>"
-						if (CurrInst->MDIsPushInstr() && CurrInst->GetCmd().Operands[0].is_reg(MD_FRAME_POINTER_REG)) {
-							FPSaved = true;
-						}
-					}
-					else if (!SPintoFP) { // found "push <framepointerreg>", looking for "fp := sp"
-						insn_t CurrCmd = CurrInst->GetCmd();
-						if ((CurrCmd.itype == MD_MOVE_INSTRUCTION) 
-							&& (CurrInst->GetFirstDef()->GetOp().is_reg(MD_FRAME_POINTER_REG))
-							&& (CurrInst->GetFirstUse()->GetOp().is_reg(MD_STACK_POINTER_REG))) {
-							SPintoFP = true;
-							this->FramePointerStackDelta = IncomingDelta;
-							FirstBlockProcessed = true; // stop looking
-							assert(this->UsesFramePointer());
-						}
-					}
-				}
+				bool StackOpsChanged = CurrInst->MDNormalizeStackOps(UseFP, this->GetFramePtrStackDelta(), ReprocessingAllocaBlocks, DeltaIncrement);
 
 				// Dataflow equation for upward exposed variables: If a variable has not been
 				//  killed yet in this block, starting from the top of the block, and it is used
@@ -946,6 +1120,7 @@ bool SMPFunction::AnalyzeStackPointerDeltas(void) {
 				if (!ReprocessingAllocaBlocks) { // Only compute on first pass through block
 					for (CurrUse = CurrInst->GetFirstUse(); CurrUse != CurrInst->GetLastUse(); ++CurrUse) {
 						op_t UseOp = CurrUse->GetOp();
+						CanonicalizeOpnd(UseOp);
 						if (MDIsDataFlowOpnd(UseOp, this->UsesFramePointer())) {
 							// We have a register or stack operand. If stack operand, it is normalized, i.e. EBP-4 might be ESP-8,
 							//  where the ESP-8 refers to the value of ESP upon entry to the function, not its current value.
@@ -968,7 +1143,7 @@ bool SMPFunction::AnalyzeStackPointerDeltas(void) {
 				op_t CopyOperand;
 				bool SavedDeltaHasNewValue = false;
 				bool ErrorFlag = false;
-				if (CurrInst->MDIsStackPtrSaveOrRestore(this->UsesFramePointer(), StackPtrSaved, SavedDelta, CopyOperand, ErrorFlag)) {
+				if (CurrInst->MDIsStackPtrSaveOrRestore(this->UsesFramePointer(), this->GetFramePtrStackDelta(), StackPtrSaved, SavedDelta, CopyOperand, ErrorFlag)) {
 					// NOTE: If CopyOperand is a stack location, it is normalized.
 					if (StackPtrSaved) {
 						// Insert new entry into the StackPtrCopySet. For the ReprocessingAllocaBlocks case, this might be
@@ -978,13 +1153,20 @@ bool SMPFunction::AnalyzeStackPointerDeltas(void) {
 							SMP_msg("INFO: Stack delta saved: %d at %x\n", SavedDelta, InstAddr);
 						}
 					}
-					else { // stack pointer or frame pointer was restored
+					else { // stack pointer was restored from saved value
+						StackPointerRestoreSeen = true;
 						SavedDeltaHasNewValue = true; // no need to compute effect of restore instruction later
+						if (ReprocessingAllocaBlocks) {
+							// Now that the stack pointer has been restored, the effect of the alloca() should
+							//  be undone. We no longer need to adjust delta values for the rest of the block.
+							DeltaIncrement = 0;
+						}
 					}
 				} // end if (CurrInst->MDIsStackPtrSaveOrRestore())
 				else if (ErrorFlag) {
 					this->AnalyzedSP = false;
 					WorkList.clear();
+					SMP_msg("ERROR: ErrorFlag=true from MDIsStackPtrSaveOrRestore() at %x\n", InstAddr);
 					break;
 				}
 
@@ -1024,13 +1206,6 @@ bool SMPFunction::AnalyzeStackPointerDeltas(void) {
 						break;
 					}
 					SMPitype FlowType = CurrInst->GetDataFlowType();
-					// Search for tail calls, defined strictly as having an incoming stack delta of zero and
-					//  being jumps to far chunks.
-					if ((0 == IncomingDelta) && (CurrInst->IsBranchToFarChunk())) {
-						CurrInst->SetTailCall();
-						SMP_msg("Found tail call at %x from %s: %s\n", InstAddr, this->GetFuncName(),
-								CurrInst->GetDisasm());
-					}
 					IncomingDelta += CurrentDelta;
 					if ((RETURN == FlowType) && (!CurrInst->IsCondTailCall())) {
 						// We hope to see a consistent outgoing delta from all RETURN points.
@@ -1049,11 +1224,22 @@ bool SMPFunction::AnalyzeStackPointerDeltas(void) {
 						else { // First RETURN statement seen.
 							ReturnSeen = true;
 							this->NetStackDelta = IncomingDelta;
-	#if SMP_AUDIT_STACK_POINTER_DELTAS
-							if (0 != IncomingDelta) {
-								SMP_msg("WARNING: Stack delta not zero after return instruction at %x\n", CurrInst->GetAddr());
+#if SMP_AUDIT_STACK_POINTER_DELTAS
+							if (CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA != IncomingDelta) {
+								SMP_msg("WARNING: Stack delta not %d after return instruction at %x\n", 
+									CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA, CurrInst->GetAddr());
 							}
-	#endif
+#endif
+						}
+						// If we permitted inconsistent stack deltas previously, then the stack pointer has to
+						//  have been restored, e.g. if we allocate a frame with sub esp,32 and then we later
+						//  have paths that pass through an alloca() call, a push, etc., then the alloca() or
+						//  push will not be undone by add esp,32. It must be undone by something like mov esp,ebp.
+						if (ConflictingValuesSeen && !StackPointerRestoreSeen) {
+							SMP_msg("ERROR: Inconsistent stack deltas seen, no stack pointer restore before return instruction at %x\n", CurrInst->GetAddr());
+							this->AnalyzedSP = false;
+							WorkList.clear();
+							break;
 						}
 					}
 				} // end if (SavedDeltaHasNewValue) ... else ...
@@ -1077,10 +1263,22 @@ bool SMPFunction::AnalyzeStackPointerDeltas(void) {
 	} while (!WorkList.empty());
 
 	this->STARSStackPtrAnalysisPerformed = true;
-	if (this->AnalyzedSP && (CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA != this->NetStackDelta)) {
-		SMP_msg("WARNING: Non-default stack ptr delta %d for function: %s\n", this->NetStackDelta, this->GetFuncName());
+	if (this->AnalyzedSP) {
+		if (CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA != this->NetStackDelta) {
+			SMP_msg("WARNING: Non-default stack ptr delta %d for function: %s\n", this->NetStackDelta, this->GetFuncName());
+		}
+		if (this->StackAdjustmentComputed 
+			&& (this->GlobalStackAdjustment != (CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA - this->NetStackDelta))) {
+			// Oops. When program graph cycles caused us to try to compute the GlobalStackAdjustment as our best guess
+			//  for this function's effect on the stack delta, we told our callers that these three values would cancel out.
+			//  They do not. Our callers have now been using a bad stack delta for their call instructions. Too late for
+			//  anything but a diagnostic message.
+				SMP_msg("ERROR: Earlier GlobalStackAdjustment computation %d does not agree with current NetStackDelta result for function: %s\n",
+					this->GlobalStackAdjustment, this->GetFuncName());
+		}
 	}
 
+
 	if (IDAProSucceeded) {
 		if (!this->AnalyzedSP) {
 			SMP_msg("ERROR: Stack Ptr Delta Analysis succeeded in IDA, failed in STARS for %x : %s\n", this->FirstEA,
@@ -1093,6 +1291,17 @@ bool SMPFunction::AnalyzeStackPointerDeltas(void) {
 				this->GetFuncName());
 		}
 	}
+	if (!this->AnalyzedSP) {
+		(void) this->UseIDAStackPointerDeltas();
+	}
+
+	// Cannot keep the reaching defs around on huge benchmarks, or we run out of memory.
+	//  Once we have SSA form, we can obtain reaching defs info on the fly if we want it.
+	list<SMPBasicBlock *>::iterator BlockIter;
+	for (BlockIter = this->Blocks.begin(); BlockIter != this->Blocks.end(); ++BlockIter) {
+		(*BlockIter)->FreeReachingDefsMemory();
+	}
+
 	return this->AnalyzedSP;
 } // end of SMPFunction::AnalyzeStackPointerDeltas()
 
@@ -1172,14 +1381,14 @@ void SMPFunction::FindAllAllocsAndDeallocs(void) {
 				// NOTE: We might want to extend this in the future to
 				//  handle functions that have no locals.  **!!**
 				bool FixedUseFP = MDFixUseFP();
-#if SMP_DEBUG_CONTROLFLOW
-				SMP_msg("Returned from MDFixUseFP()\n");
-#endif
 #if SMP_DEBUG_FRAMEFIXUP
 				if (FixedUseFP) {
 					SMP_msg("Fixed UseFP in %s\n", this->GetFuncName());
 				}
 #endif
+				if (this->UsesFramePointer()) { // now that MDFixUseFP() has validated this flag ...
+					this->FindFramePointerDelta(); // find stack delta that is saved in frame pointer in function prologue
+				}
 			}
 			else if (FoundAllocInstr) {
 				// We can now start searching for the DeallocInstr.
@@ -1210,7 +1419,7 @@ void SMPFunction::FindAllAllocsAndDeallocs(void) {
 #endif
 #if SMP_DEBUG_FRAMEFIXUP
 			if (BADADDR == this->LocalVarsAllocInstr) {
-				SMP_msg("ERROR: Could not find stack frame allocation in %s\n",
+				SMP_msg("WARNING: Could not find stack frame allocation in %s\n",
 					this->GetFuncName());
 				SMP_msg("LocalVarsSize: %d  SavedRegsSize: %d ArgsSize: %d\n",
 					LocalVarsSize, CalleeSavedRegsSize, IncomingArgsSize);
@@ -1226,7 +1435,7 @@ void SMPFunction::FindAllAllocsAndDeallocs(void) {
 			// Could not find the frame deallocating instruction.  Bad.
 			// Emit diagnostic and use the last instruction in the
 			// function.
-			SMP_msg("ERROR: Could not find stack frame deallocation in %s\n",
+			SMP_msg("WARNING: Could not find stack frame deallocation in %s\n",
 				this->GetFuncName());
 		}
 #endif
@@ -1260,11 +1469,72 @@ void SMPFunction::FindAllAllocsAndDeallocs(void) {
 		}
 		this->LocalVarsAllocInstr = SaveAddr;
 		this->LocalVarsDeallocInstr = 0;
+		// As soon as we have found the local vars allocation,
+		//  we can try to fix incorrect sets of UseFP by IDA.
+		// NOTE: We might want to extend this in the future to
+		//  handle functions that have no locals.  **!!**
+		bool FixedUseFP = this->MDFixUseFP();
+#if SMP_DEBUG_FRAMEFIXUP
+		if (FixedUseFP) {
+			SMP_msg("Fixed UseFP in %s\n", this->GetFuncName());
+		}
+#endif
+		if (this->UsesFramePointer()) { // now that MDFixUseFP() has validated this flag ...
+			this->FindFramePointerDelta(); // find stack delta that is saved in frame pointer in function prologue
+		}
 	} // end if (LocalVarsSize > 0) ... else ...
 
 	return;
 } // end of SMPFunction::FindAllAllocsAndDeallocs()
 
+// Compute FramePointerStackDelta as soon as possible so that it is available for SyncAllRTs().
+void SMPFunction::FindFramePointerDelta(void) {
+	bool FirstBlockProcessed = false;
+	bool FPSaved = false;  // have seen push of frame pointer reg
+	bool SPintoFP = false; // have seen copy of stack pointer into frame pointer
+	sval_t IncomingDelta = 0;
+	sval_t CurrentDelta;
+	list<SMPInstr *>::iterator InstIter = this->Instrs.begin();
+#if SMP_USE_SSA_FNOP_MARKER
+	++InstIter;  // skip marker instruction
+#endif
+	while (!FirstBlockProcessed && (InstIter != this->Instrs.end())) {
+		SMPInstr *CurrInst = (*InstIter);
+		// Accumulate stack delta values.
+		CurrentDelta = CurrInst->AnalyzeStackPointerDelta(IncomingDelta, this->GetFramePtrStackDelta());
+		if (SMP_STACK_POINTER_BITWISE_AND_CODE == CurrentDelta) {
+			// For now, we ignore instructions that AND a constant into the stack pointer.
+			CurrentDelta = 0;
+		}
+		else if (SMP_STACK_DELTA_ERROR_CODE == CurrentDelta) {
+			this->AnalyzedSP = false;
+			break; // error exit
+		}
+		// Look for initialization of frame pointer, record its stack delta
+		FirstBlockProcessed = CurrInst->IsLastInBlock();
+		if (!FPSaved) { // still looking for "push <framepointerreg>"
+			if (CurrInst->MDIsPushInstr() && CurrInst->GetCmd().Operands[0].is_reg(MD_FRAME_POINTER_REG)) {
+				FPSaved = true;
+			}
+		}
+		else if (!SPintoFP) { // found "push <framepointerreg>", looking for "fp := sp"
+			insn_t CurrCmd = CurrInst->GetCmd();
+			if ((CurrCmd.itype == MD_MOVE_INSTRUCTION) 
+				&& (CurrInst->GetFirstDef()->GetOp().is_reg(MD_FRAME_POINTER_REG))
+				&& (CurrInst->GetFirstUse()->GetOp().is_reg(MD_STACK_POINTER_REG))) {
+				SPintoFP = true;
+				this->FramePointerStackDelta = IncomingDelta;
+				FirstBlockProcessed = true; // stop looking
+				assert(this->UsesFramePointer());
+			}
+		}
+		IncomingDelta += CurrentDelta;
+		++InstIter;
+	}
+
+	return;
+} // end of SMPFunction::FindFramePointerDelta()
+
 // Figure out the different regions of the stack frame, and find the
 //  instructions that allocate and deallocate the local variables space
 //  on the stack frame.
@@ -1429,7 +1699,8 @@ bool SMPFunction::MDFixFrameInfo(void) {
 						Changed = true;
 #if SMP_DEBUG_FRAMEFIXUP
 						if (AllocValue + SavedRegsSize != OldFrameTotal)
-							SMP_msg("Total frame size changed: %s\n", this->GetFuncName());
+							SMP_msg("Total frame size changed: %s OldTotal: %d NewTotal: %ld\n",
+								this->GetFuncName(), OldFrameTotal, (AllocValue + SavedRegsSize));
 #endif
 						this->LocalVarsSize = (asize_t) AllocValue;
 						this->CalleeSavedRegsSize = (ushort) SavedRegsSize;
@@ -1478,13 +1749,18 @@ bool SMPFunction::MDFixFrameInfo(void) {
 #if SMP_DEBUG_FRAMEFIXUP
 	if (Changed) {
 		SMP_msg("Fixed stack frame size info: %s\n", this->GetFuncName());
+#if SMP_DEBUG_FRAMEFIXUP_VERBOSE
 		SMPBasicBlock *CurrBlock = this->Blocks.front();
 		SMP_msg("First basic block:\n");
-		for (list<SMPInstr *>::iterator CurrInstr = CurrBlock->GetFirstInstr();
-			CurrInstr != CurrBlock->GetLastInstr();
-			++CurrInstr) {
+		list<SMPInstr *>::iterator CurrInstr = CurrBlock->GetFirstInstr();
+#if SMP_USE_SSA_FNOP_MARKER
+		++CurrInstr;
+#endif
+		while (CurrInstr != CurrBlock->GetLastInstr()) {
 			SMP_msg("%s\n", (*CurrInstr)->GetDisasm());
+			++CurrInstr;
 		}
+#endif
 	}
 #endif
 
@@ -1597,15 +1873,19 @@ ea_t SMPFunction::FindAllocPoint(asize_t OriginalLocSize) {
 //  this case.
 // NOTE: This logic should work for both Linux and Windows x86 prologues.
 bool SMPFunction::MDFixUseFP(void) {
+	bool OldUseFP = this->UsesFramePointer();
+	bool HasLocals = (0 < this->LocalVarsSize);
 	list<SMPInstr *>::iterator InstIter = this->Instrs.begin();
-	ea_t addr = (*InstIter)->GetAddr();
+	ea_t addr;
 
 #if SMP_USE_SSA_FNOP_MARKER
 	++InstIter;  // skip marker instruction
 #endif
-	SMPInstr *CurrInst = (*InstIter);
+	SMPInstr *CurrInst;
 
+#if 0
 	if (!(this->UseFP)) {
+#endif
 		// See if we can detect the instruction "push ebp" followed by the instruction
 		//  "mov ebp,esp" in the first basic block. The instructions do not have to be
 		//  consecutive. If we find them, we will reset UseFP to true.
@@ -1613,6 +1893,8 @@ bool SMPFunction::MDFixUseFP(void) {
 		bool EBPSaved = false;
 		bool ESPintoEBP = false;
 		do {
+			CurrInst = (*InstIter);
+			addr = CurrInst->GetAddr();
 			FirstBlockProcessed = CurrInst->IsLastInBlock();
 			if (!EBPSaved) { // still looking for "push ebp"
 				if (CurrInst->MDIsPushInstr() && CurrInst->GetCmd().Operands[0].is_reg(R_bp)) {
@@ -1628,25 +1910,27 @@ bool SMPFunction::MDFixUseFP(void) {
 					FirstBlockProcessed = true; // exit loop
 				}
 			}
-			++InstIter;
-			CurrInst = (*InstIter);
-			addr = CurrInst->GetAddr();
 			// We must get EBP set to its frame pointer value before we reach the
 			//  local frame allocation instruction (i.e. the subtraction of locals space
 			//   from the stack pointer).
-			FirstBlockProcessed |= (addr >= this->LocalVarsAllocInstr);
+			if (HasLocals) {
+				FirstBlockProcessed |= (addr >= this->LocalVarsAllocInstr);
+			}
+			++InstIter;
 		} while (!FirstBlockProcessed);
 		// If we found ESPintoEBP, we also found EBPSaved first, and we need to change
 		//  this->UseFP to true and return true. Otherwise, return false.
 		this->UseFP = ESPintoEBP;
-		if (ESPintoEBP)
-			SMP_msg("INFO: MDFixUseFP reset UseFP to true for %s\n", this->GetFuncName());
-		return ESPintoEBP;
+		bool changed = (ESPintoEBP != OldUseFP);
+		if (changed)
+			SMP_msg("INFO: MDFixUseFP toggled UseFP for %s\n", this->GetFuncName());
+		return (changed);
+#if 0
 	} // end if (!(this->UseFP))
 
 	// At this point, this->UseFP must have been true on entry to this method and we will
 	//  check whether it should be reset to false.
-	while (addr < this->LocalVarsAllocInstr) {
+	while (addr <= this->LocalVarsAllocInstr) {
 		set<DefOrUse, LessDefUse>::iterator CurrDef = CurrInst->GetFirstDef();
 		while (CurrDef != CurrInst->GetLastDef()) {
 			if (CurrDef->GetOp().is_reg(R_bp))
@@ -1663,6 +1947,7 @@ bool SMPFunction::MDFixUseFP(void) {
 	this->UseFP = false;
 	SMP_msg("INFO: MDFixUseFP reset UseFP to false for %s\n", this->GetFuncName());
 	return true;
+#endif
 } // end of SMPFunction::MDFixUseFP()
 
 // Find the callee-saved reg offsets (negative offset from return address)
@@ -1773,6 +2058,7 @@ void SMPFunction::SemiNaiveLocalVarID(void) {
 	//  with other IDA Pro values, not with our corrected values.
 	list<SMPInstr *>::iterator InstIter;
 	bool DebugFlag = false;
+	bool FoundReturnAddress = false;
 	this->LocalVarOffsetLimit = -20000;
 #if SMP_DEBUG_STACK_GRANULARITY
 	DebugFlag |= (0 == strcmp("qSort3", this->GetFuncName()));
@@ -1819,6 +2105,10 @@ void SMPFunction::SemiNaiveLocalVarID(void) {
 			SMP_msg("ERROR: Rejected enormous stack offset %ld for var %s in func %s\n", offset, MemberName, this->GetFuncName());
 			continue;
 		}
+		if (!FoundReturnAddress && (2 == strlen(MemberName)) && (0 == strncmp(" r", MemberName, 2))) {
+			FoundReturnAddress = true;
+			this->IDAReturnAddressOffset = offset;
+		}
 		struct LocalVar TempLocal;
 		TempLocal.offset = offset;
 		TempLocal.size = Member->eoff - Member->soff; // audit later
@@ -1831,7 +2121,6 @@ void SMPFunction::SemiNaiveLocalVarID(void) {
 
 	// If AnalyzedSP is false, that is all we can do.
 	if (!this->AnalyzedSP) {
-		// No allocations; sometimes happens in library functions.
 		this->OutgoingArgsSize = 0;
 		this->MinStackDelta = 0;
 		this->AllocPointDelta = 0;
@@ -1849,7 +2138,7 @@ void SMPFunction::SemiNaiveLocalVarID(void) {
 	for ( ; InstIter != this->Instrs.end(); ++InstIter) {
 		SMPInstr *CurrInst = (*InstIter);
 		ea_t addr = CurrInst->GetAddr();
-		sval_t sp_delta = get_spd(this->GetFuncInfo(), addr);
+		sval_t sp_delta = CurrInst->GetStackPtrOffset();
 		if (sp_delta < this->MinStackDelta)
 			this->MinStackDelta = sp_delta;
 		if (sp_delta > this->MaxStackDelta)
@@ -1859,11 +2148,60 @@ void SMPFunction::SemiNaiveLocalVarID(void) {
 			//  because IDA updates the sp delta AFTER each instruction.
 			list<SMPInstr *>::iterator NextInstIter = InstIter;
 			++NextInstIter;
-			sp_delta = get_spd(this->GetFuncInfo(), (*NextInstIter)->GetAddr());
+			sp_delta = (*NextInstIter)->GetStackPtrOffset();
 			this->AllocPointDelta = sp_delta;
 		}
 	}
 
+	// Calculate min and max stack operand offsets accessed.
+	InstIter = this->Instrs.begin();
+#if SMP_USE_SSA_FNOP_MARKER
+	if ((*InstIter)->IsFloatNop())
+		++InstIter;  // skip marker instruction
+#endif
+	for ( ; InstIter != this->Instrs.end(); ++InstIter) {
+		SMPInstr *CurrInst = (*InstIter);
+		ea_t addr = CurrInst->GetAddr();
+		// Find the min and max stack offsets in DEFs and USEs.
+		op_t TempOp;
+		if (CurrInst->HasDestMemoryOperand() || CurrInst->MDIsPushInstr() || CurrInst->MDIsEnterInstr()) {
+			set<DefOrUse, LessDefUse>::iterator CurrDef;
+			for (CurrDef = CurrInst->GetFirstDef(); CurrDef != CurrInst->GetLastDef(); ++CurrDef) {
+				TempOp = CurrDef->GetOp();
+				if (TempOp.type != o_phrase && TempOp.type != o_displ)
+					continue;
+				this->UpdateMinMaxStackOffsets(CurrInst, TempOp);
+			} // end for all DEFs
+		}
+		if (CurrInst->HasSourceMemoryOperand() || CurrInst->MDIsPopInstr() || CurrInst->MDIsLeaveInstr() || CurrInst->MDIsLoadEffectiveAddressInstr()) {
+			if (CurrInst->MDIsLoadEffectiveAddressInstr()) {
+				TempOp = CurrInst->GetLeaMemUseOp();
+				if ((TempOp.type == o_phrase) || (TempOp.type == o_displ)) {
+					this->UpdateMinMaxStackOffsets(CurrInst, TempOp);
+				}
+			}
+			else {
+				set<DefOrUse, LessDefUse>::iterator CurrUse;
+				for (CurrUse = CurrInst->GetFirstUse(); CurrUse != CurrInst->GetLastUse(); ++CurrUse) {
+					TempOp = CurrUse->GetOp();
+					if ((TempOp.type != o_phrase) && (TempOp.type != o_displ))
+						continue;
+					this->UpdateMinMaxStackOffsets(CurrInst, TempOp);
+				} // end for all USEs
+			}
+		}
+	}
+	if (0 == this->MaxStackAccessLimit) {
+		// Never accessed any incoming args. However, we know the return address is on the stack,
+		//  and it is almost never accessed, so we want to record its presence.
+		this->MaxStackAccessLimit = MD_DEFAULT_RETURN_ADDRESS_SIZE;
+	}
+	if (this->MinStackAccessOffset > this->MinStackDelta) {
+		// Some functions allocate space that is not visibly accessed. We still want to make
+		//  our stack frame maps of maximum size, and MinStackDelta is used for normalizing offsets.
+		this->MinStackAccessOffset = this->MinStackDelta;
+	}
+
 	// IDA Pro sometimes fails to add stack frame members for all incoming args, etc.
 	//  Find and correct these omissions by examining stack accesses in instructions
 	//  and extend the LocalVarTable to cover whatever is out of range.
@@ -1891,8 +2229,7 @@ void SMPFunction::SemiNaiveLocalVarID(void) {
 		for (size_t VarIndex = 0; VarIndex < VarLimit; ++VarIndex) {
 			struct LocalVar TempLocEntry = this->LocalVarTable[VarIndex];
 			bool AboveLocalsRegion = (TempLocEntry.offset >= this->LocalVarsSize);
-			size_t TempSize = this->LocalVarTable[VarIndex + 1].offset
-				- TempLocEntry.offset;
+			size_t TempSize = this->LocalVarTable[VarIndex + 1].offset - TempLocEntry.offset;
 			int DiffSize = ((int) TempSize) - ((int) TempLocEntry.size);
 			// We don't have IDA Pro stack frame members for callee saved registers. This
 			//  omission can make it seem that there is a gap between the uppermost local
@@ -1901,7 +2238,7 @@ void SMPFunction::SemiNaiveLocalVarID(void) {
 			if (DiffSize > 0) { // We are expanding the size.
 				if (!AboveLocalsRegion && ((TempLocEntry.offset + TempLocEntry.size + DiffSize) > this->LocalVarsSize)) {
 					// Current local does not start above the locals region, but its new size will
-					//  carry it into the locals region.
+					//  carry it above the locals region.
 					if ((TempLocEntry.offset + TempLocEntry.size) > this->LocalVarsSize) {
 						// Weird. It already overlapped the callee saved regs region.
 						SMP_msg("WARNING: Local var at offset %ld size %zu in %s extends above local vars region.\n",
@@ -1915,7 +2252,7 @@ void SMPFunction::SemiNaiveLocalVarID(void) {
 			}
 			if (DiffSize < 0)
 				DiffSize = 0; // should not happen with sorted LocalVarTable unless duplicate entries.
-			if (DiffSize != 0) {
+			if (DiffSize != 0)  {
 #if SMP_DEBUG_STACK_GRANULARITY
 				SMP_msg("STACK INFO: Adjusted size for stack frame member at %ld in %s\n",
 					TempLocEntry.offset, this->GetFuncName());
@@ -1976,6 +2313,29 @@ void SMPFunction::SemiNaiveLocalVarID(void) {
 	return;
 } // end of SMPFunction::SemiNaiveLocalVarID()
 
+// Update MinStackAccessOffset and MaxStackAccessLimit if TempOp is stack access
+void SMPFunction::UpdateMinMaxStackOffsets(SMPInstr *CurrInst, op_t TempOp) {
+	ea_t offset;
+	int SignedOffset;
+	size_t DataSize;
+	bool UsedFramePointer;
+	bool IndexedAccess;
+	bool SignedMove;
+	bool UnsignedMove;
+
+	if (this->MDGetStackOffsetAndSize(CurrInst, TempOp, this->MinStackDelta, offset, DataSize, UsedFramePointer,
+		IndexedAccess, SignedMove, UnsignedMove)) {
+		SignedOffset = (int) offset + (int) this->MinStackDelta; // Don't want zero-based for min/max finding
+		if (((sval_t) SignedOffset) < this->MinStackAccessOffset) {
+			this->MinStackAccessOffset = (sval_t) SignedOffset;
+		}
+		if (((sval_t)(SignedOffset + (int) DataSize)) > this->MaxStackAccessLimit) {
+			this->MaxStackAccessLimit = (sval_t)(SignedOffset + (int) DataSize);
+		}
+	}
+	return;
+} // end of SMPFunction::UpdateMinMaxStackOffsets()
+
 // Check and correct the LocalVarTable derived from IDA Pro stack frame members.
 //  Examine each instruction and see if any stack accesses are beyond the LocalVarTable
 //  and create new entries in the LocalVarTable if so.
@@ -1983,102 +2343,90 @@ bool SMPFunction::AuditLocalVarTable(void) {
 	list<SMPInstr *>::iterator InstIter;
 	int SignedOffset;
 
+	// For some functions, IDA Pro does not base its stack frame at the MinStackDelta. This
+	//  is detected by noting that the offset field for the saved return address is not
+	//  the negation of the MinStackDelta, e.g. offset is 12 and MinStackDelta is -16
+	//  for a function such as call_gmon_start, which has a temporary 4-byte decrease
+	//  in the stack delta for an internal thunk call that IDA Pro excludes from the
+	//  stack frame analysis, because it does not represent any local variable:
+	//     call next_instruction
+	//     pop ebx
+	// Instead, IDA Pro typically bases its stack frame at the AllocPointDelta. For many
+	//  functions, the MinStackDelta and the AllocPointDelta are the same. For some, they
+	//  are not the same, and for some functions, IDA Pro has a stack frame that is not
+	//  even based at the AllocPointDelta, because IDA Pro makes a mistake in its analyses
+	//  when the first basic block is interrupted by odd code such as a function call
+	//  before it reaches the frame allocation instruction.
+	// So, we need to align the local var table so that the base of the table is at the
+	//  AllocPointDelta and the saved return address falls at normalized address zero, i.e.
+	//  if AllocPointDelta is -28, then the LocalVarTable will start at offset zero as IDA
+	//  computes offsets, and the saved return address will fall at offset 28, the negation
+	//  of the AllocPointDelta. If the LocalVarTable does not conform to this pattern, we will
+	//  need to add 4-byte entries at the bottom of the table and adjust offsets until the return address
+	//  falls at the correct offset.
+	long IDAFrameAdjustment = (0 - this->IDAReturnAddressOffset - this->AllocPointDelta);
+	if (IDAFrameAdjustment != 0) {
+		SMP_msg("WARNING: %ld bytes IDAFrameAdjustment needed: Func at: %x\n",
+			IDAFrameAdjustment, this->FirstEA);
+		// We need to subtract (IDAReturnAddressOffset + this->AllocPointDelta) from the local var table offsets.
+		//  this->AllocPointDelta is negative, e.g. -44 for libc_csu_init in toy.exe, and IDAReturnAddressOffset
+		//  should be its negation (44 in that example), but is a smaller number (20 in the toy.exe example),
+		//  so we are subtracting (20 + -44) from each offset, meaning we are adding 24. We also add 24 to the 
+		//  value of this->LocalVarOffsetLimit, and create an entry at the bottom of the frame with a size of
+		//  24 in this example.
+		long LocalVarIncrement = (0 - (this->IDAReturnAddressOffset + this->AllocPointDelta));
+		if (LocalVarIncrement <= 0) {
+			SMP_msg("SERIOUS WARNING: Unexpected non-positive value for LocalVarIncrement: %ld Func at: %x\n",
+				LocalVarIncrement, this->FirstEA);
+		}
+		else {
+			for (size_t i = 0; i < this->LocalVarTable.size(); ++i) {
+				this->LocalVarTable[i].offset += LocalVarIncrement;
+			}
+			// Add dummy placeholders at bottom of LocalVarTable, four bytes each.
+			size_t TotalFillerSize = 0;
+			do {
+				struct LocalVar TempLocal;
+				char TempStr[20];
+				TempLocal.offset = (long) TotalFillerSize;
+				TempLocal.size = 4;
+				if (((long)(TempLocal.size + TotalFillerSize)) > LocalVarIncrement) {
+					TempLocal.size = (size_t)(LocalVarIncrement - (long) TotalFillerSize);
+				}
+				TotalFillerSize += TempLocal.size;
+				SMP_strncpy(TempLocal.VarName, "SMP_IDA_FixVar", sizeof(TempLocal.VarName) - 1);
+				(void) SMP_snprintf(TempStr, 18, "%ld", TempLocal.offset);
+				SMP_strncat(TempLocal.VarName, TempStr, sizeof(TempLocal.VarName) - 1);
+				this->LocalVarTable.push_back(TempLocal);
+			} while (((long)TotalFillerSize) < LocalVarIncrement);
+			this->LocalVarOffsetLimit += LocalVarIncrement;
+			this->FuncInfo.frsize += (asize_t) LocalVarIncrement;
+		}
+	}
+
 	// We cannot depend on IDA Pro making Member
 	//  entries for everything that is accessed on the stack.
 	//  When an incoming arg is accessed but no Member is
 	//  created, then LocalVarOffsetLimit will be too small
-	//  and we will get ERROR messages. Just loop through the
-	//  instructions, find offsets higher than the LocalVarTable
-	//  currently holds, and add new entries to LocalVarTable to
-	//  handle them.
-	// Iterate through all instructions and record stack frame accesses in the StackFrameMap.
-	InstIter = this->Instrs.begin();
-#if SMP_USE_SSA_FNOP_MARKER
-	if ((*InstIter)->IsFloatNop())
-		++InstIter;  // skip marker instruction
-#endif
-	for ( ; InstIter != this->Instrs.end(); ++InstIter) {
-		SMPInstr *CurrInst = (*InstIter);
-		ea_t InstAddr = CurrInst->GetAddr();
-		sval_t sp_delta = get_spd(this->GetFuncInfo(), InstAddr);
-		if (0 < sp_delta) {
-			// Stack underflow; about to assert
-			SMP_msg("Stack underflow at %x %s sp_delta: %d\n", CurrInst->GetAddr(),
-				CurrInst->GetDisasm(), sp_delta);
-			return false;
-		}
-		assert(0 >= sp_delta);
-		ea_t offset;
-		size_t DataSize;
-		bool UsedFramePointer;
-		bool IndexedAccess;
-		bool SignedMove;
-		bool UnsignedMove;
-		if (CurrInst->HasDestMemoryOperand()) {
-			// NOTE: We need to catch stack pushes here also (callee-saved regs). !!!!!*******!!!!!!!!
-			set<DefOrUse, LessDefUse>::iterator CurrDef;
-			for (CurrDef = CurrInst->GetFirstDef(); CurrDef != CurrInst->GetLastDef(); ++CurrDef) {
-				op_t TempOp = CurrDef->GetOp();
-				if (TempOp.type != o_phrase && TempOp.type != o_displ)
-					continue;
-				if (this->MDGetStackOffsetAndSize(CurrInst, TempOp, sp_delta, offset, DataSize, UsedFramePointer,
-					IndexedAccess, SignedMove, UnsignedMove)) {
-					SignedOffset = (int) offset;
-					if (IndexedAccess && ((0 > SignedOffset) || ((offset + DataSize) > this->StackFrameMap.size()))) {
-						continue; // Indexed expressions can be within frame but offset is outside frame
-					}
-#if 0 // ls_O3.exe has IDA trouble on chunked function get_funky_string().
-					assert(0 <= SignedOffset);
-#else
-					if (0 > SignedOffset) { // negative offset but not Indexed; very bad
-						SMP_msg("ERROR: Negative stack offset at %x in %s. Abandoning LocalVar ID.\n", CurrInst->GetAddr(), this->GetFuncName());
-						return false;
-					}
-#endif
-					if ((SignedOffset + (long) DataSize) > this->LocalVarOffsetLimit) {
-						// Going out of range. Extend LocalVarTable.
-						struct LocalVar TempLocal;
-						char TempStr[20];
-						TempLocal.offset = (long) SignedOffset;
-						TempLocal.size = DataSize;
-						SMP_strncpy(TempLocal.VarName, "SMP_InArg", sizeof(TempLocal.VarName) - 1);
-						(void) SMP_snprintf(TempStr, 18, "%d", offset);
-						SMP_strncat(TempLocal.VarName, TempStr, sizeof(TempLocal.VarName) - 1);
-						this->LocalVarTable.push_back(TempLocal);
-						this->LocalVarOffsetLimit = (long) (SignedOffset + (long) DataSize);
-					}
-				}
-			}
-		}
-		if (CurrInst->HasSourceMemoryOperand()) {
-			set<DefOrUse, LessDefUse>::iterator CurrUse;
-			for (CurrUse = CurrInst->GetFirstUse(); CurrUse != CurrInst->GetLastUse(); ++CurrUse) {
-				op_t TempOp = CurrUse->GetOp();
-				if ((TempOp.type != o_phrase) && (TempOp.type != o_displ))
-					continue;
-				if (this->MDGetStackOffsetAndSize(CurrInst, TempOp, sp_delta, offset, DataSize, UsedFramePointer,
-					IndexedAccess, SignedMove, UnsignedMove)) {
-					SignedOffset = (int) offset;
-					if (IndexedAccess && ((0 > SignedOffset) || ((offset + DataSize) > this->StackFrameMap.size()))) {
-						continue; // Indexed expressions can be within frame but offset is outside frame
-					}
-					assert(0 <= SignedOffset);
-					if ((SignedOffset + (long) DataSize) > this->LocalVarOffsetLimit) {
-						// Going out of range. Extend LocalVarTable.
-						struct LocalVar TempLocal;
-						char TempStr[20];
-						TempLocal.offset = (long) SignedOffset;
-						TempLocal.size = DataSize;
-						SMP_strncpy(TempLocal.VarName, "SMP_InArg", sizeof(TempLocal.VarName) - 1);
-						(void) SMP_snprintf(TempStr, 18, "%d", offset);
-						SMP_strncat(TempLocal.VarName, TempStr, sizeof(TempLocal.VarName) - 1);
-						this->LocalVarTable.push_back(TempLocal);
-						this->LocalVarOffsetLimit = (long) (SignedOffset + (long) DataSize);
-					}
-				}
-			}
+	//  and we will get ERROR messages. We already looped through the
+	//  instructions to find the MaxStackAccessLimit. If LocalVarOffsetLimit
+	//  is not big enough to reach from AllocPointDelta to MaxStackAccessLimit,
+	//  then add 4-byte incoming arg entries until it reaches.
+	while (this->LocalVarOffsetLimit < (long) this->MaxStackAccessLimit) {
+		// Extend LocalVarTable.
+		struct LocalVar TempLocal;
+		char TempStr[20];
+		TempLocal.offset = this->LocalVarOffsetLimit;
+		TempLocal.size = 4;
+		if ((TempLocal.size + TempLocal.offset) > ((long) this->MaxStackAccessLimit)) {
+			TempLocal.size = ((long) this->MaxStackAccessLimit) - TempLocal.offset;
 		}
-	} // end for all instructions
+		SMP_strncpy(TempLocal.VarName, "SMP_InArg", sizeof(TempLocal.VarName) - 1);
+		(void) SMP_snprintf(TempStr, 18, "%ld", TempLocal.offset);
+		SMP_strncat(TempLocal.VarName, TempStr, sizeof(TempLocal.VarName) - 1);
+		this->LocalVarTable.push_back(TempLocal);
+		this->LocalVarOffsetLimit += TempLocal.size;
+	}
 
 	// Fill in the gaps with new variables as well. SHOULD WE? WHY?
 
@@ -2116,28 +2464,16 @@ void SMPFunction::FindOutgoingArgsSize(void) {
 	if ((0 <= this->MinStackDelta) || (0 <= this->AllocPointDelta)) {
 		// No allocations; sometimes happens in library functions.
 		this->OutgoingArgsSize = 0;
-		this->MinStackDelta = 0;
 		this->AllocPointDelta = 0;
-		return;
+		if ((this->MinStackDelta > this->MaxStackDelta) || (0 < this->MinStackDelta)) {
+			this->MinStackDelta = 0;
+		}
 	}
-	assert(0 > this->MinStackDelta);
+	assert(0 >= this->MinStackDelta);
 
 	// Allocate a vector of stack frame entries, one for each byte of the stack frame.
 	//  This will be our memory map for analyzing stack usage.
-	int limit = 0;
-#if 1
-	if (this->LocalVarOffsetLimit > 0) {
-		if (limit < (this->LocalVarOffsetLimit + this->MinStackDelta)) {
-			// Make room for incoming args, other stuff above local vars.
-			limit = this->LocalVarOffsetLimit + this->MinStackDelta;
-			if (this->MinStackDelta < this->AllocPointDelta) {
-				// Also have stuff below alloc point to make room for.
-				limit += (this->AllocPointDelta - this->MinStackDelta);
-			}
-		}
-	}
-#endif
-	for (int i = this->MinStackDelta; i < limit; ++i) {
+	for (int i = this->MinStackAccessOffset; i < this->MaxStackAccessLimit; ++i) {
 		struct StackFrameEntry TempEntry;
 		TempEntry.VarPtr = NULL;
 		TempEntry.offset = (long) i;
@@ -2163,18 +2499,27 @@ void SMPFunction::FindOutgoingArgsSize(void) {
 #endif
 
 	// Fill in the VarPtr fields for each StackFrameMap entry.
-	if (0 <= this->AllocPointDelta) {
+	if (0 < this->AllocPointDelta) {
 		SMP_msg("FATAL ERROR: AllocPointDelta = %d in %s\n", this->AllocPointDelta, this->GetFuncName());
 	}
-	assert(0 > this->AllocPointDelta);
+	assert(0 >= this->AllocPointDelta);
+
+	// We were not able to adjust the LocalVarTable for a negative IDAFrameAdjustment back
+	//  in AuditLocalVarTable(), but we can use the negative adjustment value in this loop
+	//  to properly match the StackFrameMap entries to the LocalVarTable entries and avoid
+	//  an out of range error.
+	long IDAFrameAdjustment = (0 - this->IDAReturnAddressOffset - this->AllocPointDelta);
+	if (0 < IDAFrameAdjustment) {
+		IDAFrameAdjustment = 0; // only handling the negative case; positive was handled in AuditLocalVarTable()
+	}
 	for (size_t i = 0; i < this->LocalVarTable.size(); ++i) {
 		assert(this->LocalVarTable.at(i).offset >= 0);
-		// Picture that AllocPointDelta is -200, MinStackDelta is -210, and
+		// Picture that AllocPointDelta is -200, MinStackAccessOffset is -210, and
 		//  the LocalVarTable[i].offset is +8 (i.e. 8 bytes above alloc point).
 		//  Then base = 8 + (-200 - -210) = 8 + 10 = 18, the proper offset into
 		//  the StackFrameMap.
 		size_t base = (size_t) (this->LocalVarTable.at(i).offset
-			+ (this->AllocPointDelta - this->MinStackDelta));
+			+ (this->AllocPointDelta - this->MinStackAccessOffset) + IDAFrameAdjustment);
 		size_t limit = base + this->LocalVarTable.at(i).size;
 		if (limit > this->StackFrameMap.size()) {
 			SMP_msg("ERROR: FindOutArgsSize: base = %zu limit = %zu StackFrameMap size = %zu in %s\n",
@@ -2198,11 +2543,14 @@ void SMPFunction::FindOutgoingArgsSize(void) {
 	for ( ; InstIter != this->Instrs.end(); ++InstIter) {
 		SMPInstr *CurrInst = (*InstIter);
 		ea_t InstAddr = CurrInst->GetAddr();
-		sval_t sp_delta = get_spd(this->GetFuncInfo(), InstAddr);
+		sval_t sp_delta = CurrInst->GetStackPtrOffset();
 		if (0 < sp_delta) {
 			// Stack underflow; about to assert
-			SMP_msg("FATAL ERROR: Stack underflow at %x %s sp_delta: %d\n", InstAddr,
+			SMP_msg("ERROR: Stack underflow at %x %s sp_delta: %d\n", InstAddr,
 				CurrInst->GetDisasm(), sp_delta);
+			this->OutgoingArgsComputed = false;
+			this->OutgoingArgsSize = 0;
+			return;
 		}
 		assert(0 >= sp_delta);
 		ea_t offset;
@@ -2217,10 +2565,10 @@ void SMPFunction::FindOutgoingArgsSize(void) {
 				op_t TempOp = CurrDef->GetOp();
 				if (TempOp.type != o_phrase && TempOp.type != o_displ)
 					continue;
-				if (this->MDGetStackOffsetAndSize(CurrInst, TempOp, sp_delta, offset, DataSize, UsedFramePointer,
+				if (this->MDGetStackOffsetAndSize(CurrInst, TempOp, this->MinStackAccessOffset, offset, DataSize, UsedFramePointer,
 					IndexedAccess, SignedMove, UnsignedMove)) {
 					SignedOffset = (int) offset;
-					if (IndexedAccess && ((0 > SignedOffset) || ((offset + DataSize) > this->StackFrameMap.size()))) {
+					if (IndexedAccess && ((0 > SignedOffset) || ((SignedOffset + DataSize) > this->StackFrameMap.size()))) {
 						continue; // Indexed expressions can be within frame even when offset is outside frame
 					}
 					assert(0 <= SignedOffset);
@@ -2286,7 +2634,7 @@ void SMPFunction::FindOutgoingArgsSize(void) {
 				op_t TempOp = CurrUse->GetOp();
 				if (TempOp.type != o_phrase && TempOp.type != o_displ)
 					continue;
-				if (this->MDGetStackOffsetAndSize(CurrInst, TempOp, sp_delta, offset, DataSize, UsedFramePointer,
+				if (this->MDGetStackOffsetAndSize(CurrInst, TempOp, this->MinStackAccessOffset, offset, DataSize, UsedFramePointer,
 					IndexedAccess, SignedMove, UnsignedMove)) {
 					SignedOffset = (int) offset;
 					if (IndexedAccess && ((0 > SignedOffset) || ((SignedOffset + DataSize) > this->StackFrameMap.size()))) {
@@ -2349,7 +2697,9 @@ void SMPFunction::FindOutgoingArgsSize(void) {
 	} // end for all instructions
 
 	// If function is a leaf function, set OutgoingArgsSize to zero and return.
-	if (this->IsLeaf() && !(this->IsDirectlyRecursive())) {
+	// If function has no local frame allocation, ditto.
+	if ((this->IsLeaf() && !(this->IsDirectlyRecursive()))
+		|| (this->AllocPointDelta == 0)) {
 		this->OutgoingArgsSize = 0;
 		return;
 	}
@@ -2446,17 +2796,21 @@ void SMPFunction::FindOutgoingArgsSize(void) {
 //  stack pointer value) and the size in bytes of the data access. Also return whether the
 //  access was frame-pointer-relative, and whether signedness can be inferred due to a load
 //  from the stack being zero-extended or sign-extended.
+// NOTE: This function assumes that offsets are already normalized. i.e. the TempOp argument
+//  should always come from a DEF or USE that has been normalized to the stack delta at function entry.
 // NOTE: TempOp must be of type o_displ or o_phrase, as no other operand type could be a
 //  stack memory access.
-// sp_delta is the stack pointer delta of the current instruction, relative to the initial
-//  stack pointer value for the function.
+// BaseValue is either this->MinStackAccessOffset, or this->MinStackDelta (when this->MinStackAccessOffset is still
+//  being computed).
 // Return true if a stack memory access was found in TempOp, false otherwise.
-bool SMPFunction::MDGetStackOffsetAndSize(SMPInstr *Instr, op_t TempOp, sval_t sp_delta, ea_t &offset, size_t &DataSize, bool &FP,
+bool SMPFunction::MDGetStackOffsetAndSize(SMPInstr *Instr, op_t TempOp, sval_t BaseValue, ea_t &offset, size_t &DataSize, bool &FP,
 										  bool &Indexed, bool &Signed, bool &Unsigned) {
 	int BaseReg;
 	int IndexReg;
 	ushort ScaleFactor;
 	int SignedOffset;
+	sval_t sp_delta = Instr->GetStackPtrOffset();
+	ea_t InstAddr = Instr->GetAddr(); // helps debugging
 
 	assert((o_displ == TempOp.type) || (o_phrase == TempOp.type));
 	MDExtractAddressFields(TempOp, BaseReg, IndexReg, ScaleFactor, offset);
@@ -2469,9 +2823,11 @@ bool SMPFunction::MDGetStackOffsetAndSize(SMPInstr *Instr, op_t TempOp, sval_t s
 
 	if ((BaseReg == R_sp) || (IndexReg == R_sp)) {
 		// ESP-relative constant offset
-		SignedOffset += sp_delta; // base offsets from entry ESP value
-		SignedOffset -= this->MinStackDelta; // convert to StackFrameMap index
-		offset = (ea_t) SignedOffset;
+		if (!Instr->AreDefsNormalized()) {
+			SignedOffset += sp_delta; // base offsets from entry ESP value
+		}
+		SignedOffset -= BaseValue; // convert to StackFrameMap index
+		offset = (ea_t) SignedOffset; // write back to outgoing argument
 		// Get size of data written
 		DataSize = GetOpDataSize(TempOp);
 		FP = false;
@@ -2479,7 +2835,7 @@ bool SMPFunction::MDGetStackOffsetAndSize(SMPInstr *Instr, op_t TempOp, sval_t s
 		unsigned short opcode = Instr->GetCmd().itype;
 		Unsigned = (opcode == NN_movzx);
 		Signed = (opcode == NN_movsx);
-		if ((0 > SignedOffset) && (!Indexed)) {
+		if ((0 > SignedOffset) && (!Indexed) && (BaseValue == this->MinStackAccessOffset)) {
 			// Consider asserting here.
 			SMP_msg("ERROR: Negative offset in MDGetStackOffsetAndSize for inst dump: \n");
 			Instr->Dump();
@@ -2488,17 +2844,19 @@ bool SMPFunction::MDGetStackOffsetAndSize(SMPInstr *Instr, op_t TempOp, sval_t s
 	}
 	else if (this->UseFP && ((BaseReg == R_bp) || (IndexReg == R_bp))) {
 		SignedOffset -= this->FuncInfo.frregs; // base offsets from entry ESP value
-		SignedOffset -= this->MinStackDelta; // convert to StackFrameMap index
+		SignedOffset -= BaseValue; // convert to StackFrameMap index
 		offset = (ea_t) SignedOffset;
 		DataSize = GetOpDataSize(TempOp);
 		FP = true;
 		Indexed = ((BaseReg != R_none) && (IndexReg != R_none)); // two regs used
+		assert(Indexed || (!this->StackPtrAnalysisSucceeded()) || !this->HasSTARSStackPtrAnalysisCompleted()); // Else we should never get here with unnormalized stack operands
 		unsigned short opcode = Instr->GetCmd().itype;
 		Unsigned = (opcode == NN_movzx);
 		Signed = (opcode == NN_movsx);
-		if ((0 > SignedOffset) && (!Indexed)) {
+		if ((0 > SignedOffset) && (!Indexed) && (BaseValue == this->MinStackAccessOffset)) {
 			// Consider asserting here.
-			SMP_msg("ERROR: Negative offset in MDGetStackOffsetAndSize for inst dump: \n");
+			SMP_msg("ERROR: Negative offset %d in MDGetStackOffsetAndSize: frregs: %d MinStackDelta: %d Inst dump: \n",
+				SignedOffset, this->FuncInfo.frregs, this->MinStackDelta);
 			Instr->Dump();
 		}
 		return true;
@@ -2518,7 +2876,6 @@ bool SMPFunction::MDGetFGStackLocInfo(ea_t InstAddr, op_t TempOp, struct FineGra
 
 	assert((o_displ == TempOp.type) || (o_phrase == TempOp.type));
 	MDExtractAddressFields(TempOp, BaseReg, IndexReg, ScaleFactor, offset);
-	sval_t sp_delta = get_spd(this->GetFuncInfo(), InstAddr);
 
 	SignedOffset = (int) offset;
 
@@ -2527,12 +2884,12 @@ bool SMPFunction::MDGetFGStackLocInfo(ea_t InstAddr, op_t TempOp, struct FineGra
 	}
 	if ((BaseReg == R_sp) || (IndexReg == R_sp)) {
 		// ESP-relative constant offset
-		SignedOffset += sp_delta; // base offsets from entry ESP value
-		SignedOffset -= this->MinStackDelta; // convert to StackFrameMap index
+		SignedOffset -= this->MinStackAccessOffset; // convert to StackFrameMap index
 	}
 	else if (this->UseFP && ((BaseReg == R_bp) || (IndexReg == R_bp))) {
+		assert(false); // should never get here with unnormalized stack operand
 		SignedOffset -= this->FuncInfo.frregs; // base offsets from entry ESP value
-		SignedOffset -= this->MinStackDelta; // convert to StackFrameMap index
+		SignedOffset -= this->MinStackAccessOffset; // convert to StackFrameMap index
 	}
 	else {
 		return false;
@@ -2563,7 +2920,6 @@ bool SMPFunction::MDUpdateFGStackLocInfo(ea_t InstAddr, op_t TempOp, struct Fine
 
 	assert((o_displ == TempOp.type) || (o_phrase == TempOp.type));
 	MDExtractAddressFields(TempOp, BaseReg, IndexReg, ScaleFactor, offset);
-	sval_t sp_delta = get_spd(this->GetFuncInfo(), InstAddr);
 
 	SignedOffset = (int) offset;
 
@@ -2572,12 +2928,12 @@ bool SMPFunction::MDUpdateFGStackLocInfo(ea_t InstAddr, op_t TempOp, struct Fine
 	}
 	if ((BaseReg == R_sp) || (IndexReg == R_sp)) {
 		// ESP-relative constant offset
-		SignedOffset += sp_delta; // base offsets from entry ESP value
-		SignedOffset -= this->MinStackDelta; // convert to StackFrameMap index
+		SignedOffset -= this->MinStackAccessOffset; // convert to StackFrameMap index
 	}
 	else if (this->UseFP && ((BaseReg == R_bp) || (IndexReg == R_bp))) {
+		assert(false); // should never get here with unnormalized stack operands
 		SignedOffset -= this->FuncInfo.frregs; // base offsets from entry ESP value
-		SignedOffset -= this->MinStackDelta; // convert to StackFrameMap index
+		SignedOffset -= this->MinStackAccessOffset; // convert to StackFrameMap index
 	}
 	else {
 		return false;
@@ -2649,6 +3005,7 @@ set<SMPPhiFunction, LessPhi>::iterator SMPFunction::GetPhiIterForPhiDef(size_t B
 
 // Is DestOp within the outgoing args area? Assume it must be an ESP-relative
 //  DEF operand in order to be a write to the outgoing args area.
+// NOTE: DestOp should be already normalized to the entry stack delta.
 bool SMPFunction::IsInOutgoingArgsRegion(op_t DestOp) {
 	bool OutArgWrite = false;
 	int BaseReg, IndexReg;
@@ -2676,13 +3033,15 @@ bool SMPFunction::IsInOutgoingArgsRegion(op_t DestOp) {
 		OutArgWrite = true; // be conservative
 	}
 	else {
-		OutArgWrite = (offset < this->OutgoingArgsSize);
+		int SignedOffset = (int) offset;
+		SignedOffset -= this->MinStackDelta; // convert to zero-based from bottom of stack frame
+		OutArgWrite = (((size_t) SignedOffset) < this->OutgoingArgsSize);
 	}
 	return OutArgWrite;
 } // end of SMPFunction::IsInOutgoingArgsRegion()
 
 // Is DestOp a direct memory access above the local vars frame?
-bool SMPFunction::WritesAboveLocalFrame(op_t DestOp) {
+bool SMPFunction::WritesAboveLocalFrame(op_t DestOp, bool OpNormalized) {
 	bool InArgWrite = false;
 	int BaseReg, IndexReg;
 	ushort ScaleFactor;
@@ -2693,6 +3052,7 @@ bool SMPFunction::WritesAboveLocalFrame(op_t DestOp) {
 	SignedOffset = (long) offset;
 	bool ESPrelative = (BaseReg == R_sp) || (IndexReg == R_sp);
 	bool EBPrelative = this->UseFP && ((BaseReg == R_bp) || (IndexReg == R_bp));
+	assert(!EBPrelative || !OpNormalized); // stack operands should be normalized by now
 	if (!(ESPrelative || EBPrelative))
 		return false;
 	if (((IndexReg != R_none) && (BaseReg != R_none))
@@ -2703,9 +3063,19 @@ bool SMPFunction::WritesAboveLocalFrame(op_t DestOp) {
 		return false;
 	}
 
-	InArgWrite = (ESPrelative && (SignedOffset > ((long) this->LocalVarsSize)))
+	// The next statement omits a complication: The possibility that OpNormalized is false,
+	//  and an ESPRelative access is above the stack frame. For the purposes of determining
+	//  whether a function is safe, this is irrelevant, because !OpNormalized would indicate
+	//  that AnalyzedSP is false, which will make the function unsafe anyway. Future uses for
+	//  other purposes need to fix this.
+	InArgWrite = (ESPrelative && OpNormalized && (SignedOffset >= 0))
 		|| (EBPrelative && (SignedOffset > 0));
 
+	if (InArgWrite && OpNormalized && (0 == SignedOffset)) {
+		SMP_msg("DANGER: Write to saved return address detected in function that begins at %x\n",
+			this->FirstEA);
+	}
+
 	return InArgWrite;
 }// end of SMPFunction::WritesAboveLocalFrame()
 
@@ -2720,15 +3090,16 @@ bool SMPFunction::IndexedWritesAboveLocalFrame(op_t DestOp) {
 	MDExtractAddressFields(DestOp, BaseReg, IndexReg, ScaleFactor, offset);
 	bool ESPrelative = (BaseReg == R_sp) || (IndexReg == R_sp);
 	bool EBPrelative = this->UseFP && ((BaseReg == R_bp) || (IndexReg == R_bp));
+	assert(!EBPrelative || !this->StackPtrAnalysisSucceeded() || !this->HasSTARSStackPtrAnalysisCompleted()); // stack operands should be normalized by now
 	if (!(ESPrelative || EBPrelative))
 		return false;
 
 	SignedOffset = (int) offset;
-	InArgWrite = (ESPrelative && (SignedOffset > this->LocalVarsSize))
+	InArgWrite = (ESPrelative && (SignedOffset > 0))
 		|| (EBPrelative && (SignedOffset > 0));
 
 	return InArgWrite;
-}	 // end of SMPFunction::IndexedWritesAboveLocalFrame
+} // end of SMPFunction::IndexedWritesAboveLocalFrame()
 
 // Is CurrOp found anywhere in the StackPtrCopySet, regardless of which address and stack delta
 //  values are associated with it?
@@ -2754,20 +3125,33 @@ bool SMPFunction::IsInStackPtrCopySet(op_t CurrOp) {
 } // end of SMPFunction::IsInStackPtrCopySet()
 
 // Find evidence of calls to alloca(), which appear as stack space allocations (i.e.
-//  subtractions from the stack pointer) AFTER the local frame allocation instruction
+//  subtractions [of unknown values(?)] from the stack pointer) AFTER the local frame allocation instruction
 //  for this function.
 // Return true if such an allocation is found and false otherwise.
 bool SMPFunction::FindAlloca(void) {
-	list<SMPInstr *>::iterator CurrInst = this->Instrs.begin();
+	bool FoundAlloca = false;
+	list<SMPInstr *>::iterator InstIter = this->Instrs.begin();
+	SMPInstr *CurrInst;
+	ea_t InstAddr;
 #if SMP_USE_SSA_FNOP_MARKER
-	++CurrInst;  // skip marker instruction
+	++InstIter;  // skip marker instruction
 #endif
-	for ( ; CurrInst != this->Instrs.end(); ++CurrInst) {
-		if (((*CurrInst)->GetAddr() > this->LocalVarsAllocInstr) && (*CurrInst)->MDIsFrameAllocInstr()) {
-			return true;
+	for ( ; InstIter != this->Instrs.end(); ++InstIter) {
+		CurrInst = (*InstIter);
+		InstAddr = CurrInst->GetAddr();
+		if (InstAddr > this->LocalVarsAllocInstr) {
+			if (CurrInst->MDIsFrameAllocInstr()) {
+				FoundAlloca = true;
+				if (CurrInst->HasAllocaRTL()) {
+					CurrInst->SetAllocaCall();
+				}
+			}
+			else if (CurrInst->MDIsPushInstr()) {
+				this->PushAfterLocalVarAlloc = true;
+			}
 		}
 	}
-	return false;
+	return FoundAlloca;
 } // end of SMPFunction::FindAlloca()
 
 // Emit the annotations describing the regions of the stack frame.
@@ -2983,8 +3367,12 @@ void SMPFunction::Analyze(void) {
 						this->UnresolvedIndirectCalls = (!LinkedToTarget);
 					}
 				} // end if INDIR_CALL or CALL
-				else if (DataFlowType == INDIR_JUMP)
+				else if (DataFlowType == INDIR_JUMP) {
 					this->IndirectJumps = true;
+				}
+				else if (DataFlowType == RETURN) {
+					this->HasReturnInst = true;
+				}
 				// Add call targets for tail call jumps.
 				else if (CurrInst->IsBranchToFarChunk()) {
 					ea_t FarTargetAddr = CurrInst->GetFarBranchTarget();
@@ -3162,7 +3550,7 @@ void SMPFunction::Analyze(void) {
 			InstAddr = CurrInst->GetAddr();
 			CurrInst->SetBlock(CurrBlock->GetThisBlock());
 
-#if 1
+#if 0
 			if (this->AnalyzedSP) {
 				// Audit the IDA SP analysis.
 				sval_t sp_delta = get_spd(this->GetFuncInfo(), InstAddr);
@@ -3179,7 +3567,7 @@ void SMPFunction::Analyze(void) {
 						sp_delta);
 				}
 				else if (sp_delta == 0) {
-#if 1
+#if 0
 					// Search for tail calls.
 					if (CurrInst->IsBranchToFarChunk()) {
 						// After the stack has been restored to the point at which
@@ -3202,6 +3590,7 @@ void SMPFunction::Analyze(void) {
 #endif
 
 		} // end for each inst
+		CurrBlock->Analyze();
 	} // end for each block
 
 	// Set up basic block links and map of instructions to blocks.
@@ -3256,32 +3645,30 @@ void SMPFunction::AdvancedAnalysis(void) {
 		CurrInst = (*InstIter);
 		ea_t InstAddr = CurrInst->GetAddr(); // for debugging breakpoints
 		if (CurrInst->HasGoodRTL())
-			CurrInst->SyncAllRTs();
+			CurrInst->SyncAllRTs(this->UsesFramePointer(), this->GetFramePtrStackDelta());
 
 		// Detect indirect memory references.
 		CurrInst->AnalyzeIndirectRefs(this->UseFP);
 
+#if 0
 		// Is the instruction a branch to a target outside the function? If
 		//  so, this function has shared tail chunks.
 		if (CurrInst->IsBranchToFarChunk() && (!CurrInst->IsTailCall())) {
 			this->SharedChunks = true;
 		}
+#endif
 	} // end for all instructions
 
-	for (BlockIter = this->Blocks.begin(); BlockIter != this->Blocks.end(); ++BlockIter) {
-		(*BlockIter)->Analyze();
-	}
-
 	// Audit the call instructions and call targets.
 	//  !!!!****!!!! NOTE: Not sure the address range checks in this code are valid
 	//   for functions with scattered chunks.
 	if ((!this->AllCallTargets.empty()) || this->UnresolvedIndirectCalls) {
-		bool FoundBadCallTarget = false;
+		bool FoundInternalCallTarget = false;
 		vector<ea_t>::iterator CurrTarget = this->AllCallTargets.begin();
 		while (CurrTarget != this->AllCallTargets.end()) {
 			if ((this->FirstEA <= *CurrTarget) && (this->FuncInfo.endEA >= *CurrTarget)) {
 				// Found a call target that is within the function.
-				FoundBadCallTarget = true;
+				FoundInternalCallTarget = true;
 				if (this->FirstEA == *CurrTarget) { // Direct recursion, not a pseudo-jump
 					this->DirectlyRecursive = true;
 				}
@@ -3291,7 +3678,7 @@ void SMPFunction::AdvancedAnalysis(void) {
 				++CurrTarget;
 			}
 		}
-		if (FoundBadCallTarget) {
+		if (FoundInternalCallTarget) {
 			// We have to mark the pseudo-call instructions and audit the direct and
 			//  indirect call target vectors.
 
@@ -3329,32 +3716,25 @@ void SMPFunction::AdvancedAnalysis(void) {
 				++InstIter;
 			}
 #endif
-		} // end if (FoundBadCallTarget)
+		} // end if (FoundInternalCallTarget)
 	}
 
-	if (!(this->HasSharedChunks())) {
-
-#if 0
-		// Perform LVA and SSA steps.
-		if (!this->HasUnresolvedIndirectJumps()) {
-			this->LiveVariableAnalysis();
-			this->ComputeSSA();
-		}
-#endif
-
-		// Figure out the stack frame and related info.
+	// Figure out the stack frame and related info.
 #if SMP_ANALYZE_STACK_POINTER
-		(void) this->AnalyzeStackPointerDeltas();
+	(void) this->AnalyzeStackPointerDeltas();
 #else
-		(void) this->UseIDAStackPointerDeltas();
+	(void) this->UseIDAStackPointerDeltas();
 #endif
+
+#if SMP_DEBUG_CONTROLFLOW
+	SMP_msg("SMPFunction::Analyze: set stack frame info.\n");
+#endif
+	if (!(this->HasSharedChunks())) {
+
 		this->SetStackFrameInfo();
 
 	} // end if not shared chunks
 	else { // has shared chunks; still want to compute stack frame info
-#if SMP_DEBUG_CONTROLFLOW
-		SMP_msg("SMPFunction::Analyze: set stack frame info.\n");
-#endif
 #ifdef SMP_DEBUG_FUNC
 		SMP_msg(" %s has shared chunks \n", this->GetFuncName());
 #endif
@@ -3381,6 +3761,7 @@ size_t SMPFunction::UnprocessedCalleesCount(void) {
 		SMPFunction *CurrTarget = this->GetProg()->FindFunction(this->AllCallTargets.at(TargetIndex));
 		if (NULL == CurrTarget) {
 #if 0
+			// Bad call targets are removed in AdvancedAnalysis(), which comes later.
 			SMP_msg("ERROR: NULL CallTarget in UnprocessedCalleesCount() at TargetIndex %zu \n", TargetIndex);
 #endif
 		}
@@ -3716,7 +4097,7 @@ void SMPFunction::AnalyzeMetadataLiveness(void) {
 						//  writes, by definition of "safe." So, for safe funcs, only
 						//  the o_mem (globals) and indirect writes are live metadata.
 						if (this->SafeFunc && MDIsStackAccessOpnd(DefOp, this->UseFP)
-							&& (!this->WritesAboveLocalFrame(DefOp))
+							&& (!this->WritesAboveLocalFrame(DefOp, CurrInst->AreDefsNormalized()))
 							&& (!this->IsInOutgoingArgsRegion(DefOp))) {
 							++CurrDef;
 							SafeMemDest = true;
@@ -3728,8 +4109,9 @@ void SMPFunction::AnalyzeMetadataLiveness(void) {
 							ScaleFactor, offset);
 						if (R_none != BaseReg) {
 							BaseOp.reg = MDCanonicalizeSubReg((ushort) BaseReg);
-							if (BaseOp.is_reg(R_sp) 
-								|| (this->UseFP && BaseOp.is_reg(R_bp))) {
+							BaseOp.dtyp = dt_dword; // canonical 32-bit width
+							if (BaseOp.is_reg(MD_STACK_POINTER_REG) 
+								|| (this->UseFP && BaseOp.is_reg(MD_FRAME_POINTER_REG))) {
 								; // do nothing; DEF handled by case above
 							}
 							else {
@@ -3752,6 +4134,7 @@ void SMPFunction::AnalyzeMetadataLiveness(void) {
 						} // end if R_none != BaseReg
 						if (R_none != IndexReg) {
 							IndexOp.reg = MDCanonicalizeSubReg((ushort) IndexReg);
+							IndexOp.dtyp = dt_dword; // canonical 32-bit width
 							if (IndexOp.is_reg(R_sp) 
 								|| (this->UseFP && IndexOp.is_reg(R_bp))) {
 								; // do nothing; DEF handled by case above
@@ -4402,6 +4785,8 @@ bool SMPFunction::FindChainAliasHelper(list<SMPBasicBlock *>::iterator BlockIter
 void SMPFunction::SetLinks(void) {
 	list<SMPBasicBlock *>::iterator BlockIter;
 	SMPBasicBlock *CurrBlock;
+	list<SMPBasicBlock *> UnresolvedBranchWorkList;
+	ea_t InstAddr;
 #if SMP_DEBUG_DATAFLOW_VERBOSE
 	SMP_msg("SetLinks called for %s\n", this->GetFuncName());
 #endif
@@ -4412,7 +4797,8 @@ void SMPFunction::SetLinks(void) {
 		for (CurrInst = CurrBlock->GetFirstInstr();
 			CurrInst != CurrBlock->GetLastInstr();
 			++CurrInst) {
-				pair<ea_t, SMPBasicBlock *> MapItem((*CurrInst)->GetAddr(), CurrBlock);
+				InstAddr = (*CurrInst)->GetAddr();
+				pair<ea_t, SMPBasicBlock *> MapItem(InstAddr, CurrBlock);
 				InstBlockMap.insert(MapItem);
 		}
 	}
@@ -4426,6 +4812,7 @@ void SMPFunction::SetLinks(void) {
 		CurrBlock = (*BlockIter);
 		list<SMPInstr *>::iterator InstIter = (--(CurrBlock->GetLastInstr()));
 		SMPInstr *CurrInst = (*InstIter);
+		InstAddr = CurrInst->GetAddr();
 		bool CondTailCall = false;
 		if (CurrBlock->HasReturn()) {
 			if (!(CurrInst->IsCondTailCall())) {
@@ -4444,18 +4831,22 @@ void SMPFunction::SetLinks(void) {
 
 		// Last instruction in block; set successors
 		bool CallFlag = (CALL == CurrInst->GetDataFlowType());
+		bool IndirJumpFlag = (INDIR_JUMP == CurrInst->GetDataFlowType());
 		bool IndirCallFlag = (INDIR_CALL == CurrInst->GetDataFlowType());
+		// NOTE: Dues to phase re-ordering, we cannot yet identify tail calls,
+		//  so CondTailCall and TailCallFlag will always be false, which is harmless.
+		//  SMPInstr::SetTailCall() will do a little cleanup later.
 		bool TailCallFlag = CondTailCall && CurrInst->IsCondTailCall();
-		bool IndirJumpFlag = (INDIR_JUMP == CurrInst->GetDataFlowType());
 		SMP_xref_t CurrXrefs;
 		bool LinkedToTarget = false;
 		for (bool ok = CurrXrefs.SMP_first_from(CurrInst->GetAddr(), XREF_ALL);
 			ok;
 			ok = CurrXrefs.SMP_next_from()) {
-				if ((CurrXrefs.GetTo() != 0) && (CurrXrefs.GetIscode())) {
+				ea_t TargetAddr = CurrXrefs.GetTo();
+				if ((TargetAddr != 0) && (CurrXrefs.GetIscode())) {
 					// Found a code target, with its address in CurrXrefs.to
 					if ((CallFlag || IndirCallFlag || TailCallFlag) 
-						&& (CurrXrefs.GetTo() != (CurrInst->GetAddr() + CurrInst->GetCmd().size))) {
+						&& (TargetAddr != (CurrInst->GetAddr() + CurrInst->GetCmd().size))) {
 						// A call instruction will have two targets: the fall through to the
 						//  next instruction, and the called function. We want to link to the
 						//  fall-through instruction, but not to the called function.
@@ -4464,11 +4855,14 @@ void SMPFunction::SetLinks(void) {
 						continue;
 					}
 					map<ea_t, SMPBasicBlock *>::iterator MapEntry;
-					MapEntry = this->InstBlockMap.find(CurrXrefs.GetTo());
+					MapEntry = this->InstBlockMap.find(TargetAddr);
 					if (MapEntry == this->InstBlockMap.end()) {
-						SMP_msg("WARNING: addr %x not found in map for %s\n", CurrXrefs.GetTo(),
+						; // do nothing; probably a tail call (not yet identified)
+#if 0
+						SMP_msg("WARNING: addr %x not found in map for %s\n", TargetAddr,
 							this->GetFuncName());
 						SMP_msg(" Referenced from %s\n", CurrInst->GetDisasm());
+#endif
 					}
 					else {
 						SMPBasicBlock *Target = MapEntry->second;
@@ -4481,7 +4875,7 @@ void SMPFunction::SetLinks(void) {
 						if (IndirJumpFlag) {
 #if SMP_DEBUG_SWITCH_TABLE_INFO
 							SMP_msg("Switch table link: jump at %x target at %x\n",
-								CurrInst->GetAddr(), CurrXrefs.GetTo());
+								CurrInst->GetAddr(), TargetAddr);
 #else
 							;
 #endif
@@ -4492,6 +4886,7 @@ void SMPFunction::SetLinks(void) {
 		} // end for all xrefs
 		if (IndirJumpFlag && (!LinkedToTarget)) {
 			this->UnresolvedIndirectJumps = true;
+			UnresolvedBranchWorkList.push_back(CurrBlock);
 			SMP_msg("WARNING: Unresolved indirect jump at %x\n", CurrInst->GetAddr());
 		}
 		else if (IndirCallFlag && (!LinkedToTarget)) {
@@ -4500,6 +4895,78 @@ void SMPFunction::SetLinks(void) {
 		}
 	} // end for all blocks
 
+	// Mark all blocks that can be reached from the entry block, so we can find the unreachable ones.
+	this->ResetProcessedBlocks();
+	this->Blocks.front()->DepthFirstMark();
+	// We have two cases: (1) Unresolved indirect branches could be targeting the unmarked blocks, making
+	//  these blocks reachable, in which case we should link the unresolved branches to the unmarked blocks;
+	//  or (2) there are no unresolved branches, in which case the unmarked blocks are unreachable within
+	//  the function. They might be reachable from outside the function using exception handling jumps, but
+	//  that still would not allow us to link them into the CFG of this function properly, so in any case we
+	//  are deleting those unreachable blocks and not emitting annotations for them.
+	// NOTE: An odd new gcc recursion optimization uses indirect calls within the function, so
+	//  they can behave like indirect jumps. However, we don't want to link unresolved calls to unmarked blocks
+	//  at this time.
+	bool HellNodeCase = (!UnresolvedBranchWorkList.empty() && (this->HasUnresolvedIndirectCalls() || this->HasUnresolvedIndirectJumps()));
+	bool AddedMissingLinks = false;
+	bool changed;
+	do {
+		changed = false;
+		list<SMPBasicBlock *>::iterator BlockIter = this->Blocks.begin();
+		while (BlockIter != this->Blocks.end()) {
+			SMPBasicBlock *CurrBlock = (*BlockIter);
+			if (CurrBlock->IsProcessed()) {
+				++BlockIter;
+			}
+			else {
+				// Block cannot be reached from entry node, even after we have added links
+				//  on previous loop iterations.
+				if (!HellNodeCase) {
+					if (CurrBlock->AllNops())
+						SMP_msg("Removing all nops block at %x\n", CurrBlock->GetFirstAddr());
+					else
+						SMP_msg("Removing unreachable block at %x\n", CurrBlock->GetFirstAddr());
+					// Remove this block from the predecessors list of its successors.
+					list<SMPBasicBlock *>::iterator SuccIter;
+					ea_t TempAddr = CurrBlock->GetFirstAddr();
+					for (SuccIter = CurrBlock->GetFirstSucc(); SuccIter != CurrBlock->GetLastSucc(); ++SuccIter) {
+						(*SuccIter)->ErasePred(TempAddr);
+					}
+					// Remove the unreachable instructions from the function inst list.
+					list<SMPInstr *>::iterator InstIter;
+					InstIter = CurrBlock->GetFirstInstr();
+					ea_t FirstBadAddr = (*InstIter)->GetAddr();
+					InstIter = CurrBlock->GetLastInstr();
+					--InstIter; // get last real instruction
+					ea_t LastBadAddr = (*InstIter)->GetAddr();
+					this->EraseInstRange(FirstBadAddr, LastBadAddr);
+
+					// Finally, remove the block from the blocks list.
+					BlockIter = this->Blocks.erase(BlockIter);
+					this->BlockCount -= 1;
+				}
+				else { // HellNodeCase
+					// Block must be reachable only through an unresolved indirect branch.
+					// Make each unresolved indirect branch link to the block so it is reachable.
+					list<SMPBasicBlock *>::iterator WorkIter;
+					AddedMissingLinks = true;
+					for (WorkIter = UnresolvedBranchWorkList.begin(); WorkIter != UnresolvedBranchWorkList.end(); ++ WorkIter) {
+						SMPBasicBlock *WorkBlock = (*WorkIter);
+						WorkBlock->LinkToSucc(CurrBlock);
+					}
+					// Mark CurrBlock as now being reachable, along with the blocks it dominates.
+					CurrBlock->DepthFirstMark();
+					++BlockIter;
+				}
+				changed = true;
+			} // end if (processed) ... else ...
+		} // end loop through blocks
+	} while (changed);
+	if (HellNodeCase && (!AddedMissingLinks)) {
+		SMP_msg("SERIOUS WARNING: Function at %x has unresolved indirect branches but no unreachable blocks.\n", this->FirstEA);
+	}
+
+#if 0
 	// If we have any blocks that are all no-ops and have no predecessors, remove those
 	//  blocks. They are dead and make the CFG no longer a lattice. Any blocks that have
 	//  no predecessors but are not all no-ops should also be removed with a different
@@ -4508,17 +4975,15 @@ void SMPFunction::SetLinks(void) {
 	//  we cannot conclude that a block with no predecessors is unreachable. Also, the block
 	//  order might be such that removal of a block makes an already processed block
 	//  unreachable, so we have to iterate until there are no more changes.
-	// NOTE: An odd new gcc recursion optimization uses indirect calls within the function, so
-	//  they can behave like indirect jumps.
+	bool NoPredecessors;
+	bool OnlyPredIsItself;
+	list<SMPBasicBlock *>::iterator CurrPred;
 #if SMP_USE_SWITCH_TABLE_INFO
 	if (!(this->HasUnresolvedIndirectJumps() || this->HasUnresolvedIndirectCalls())) {
 #else
 	if (!(this->HasIndirectJumps() || this->HasIndirectCalls())) {
 #endif
 		bool changed;
-		bool NoPredecessors;
-		bool OnlyPredIsItself;
-		list<SMPBasicBlock *>::iterator CurrPred;
 		do {
 			changed = false;
 			BlockIter = this->Blocks.begin();
@@ -4565,7 +5030,42 @@ void SMPFunction::SetLinks(void) {
 				}
 			} // end while all blocks after the first one
 		} while (changed);
-	} // end if not indirect jumps
+	} // end if not unresolved indirect jumps or indirect calls
+	else if (this->UnresolvedIndirectJumps) {
+		// Make each unresolved indirect branch have each block with no predecessor as a target,
+		//  so that the resulting CFG has a proper structure.
+		BlockIter = this->Blocks.begin();
+		++BlockIter; // The top block is expected to have no predecessors, which is not a CFG problem.
+		bool AddedMissingLinks = false;
+		while (BlockIter != this->Blocks.end()) {
+			CurrBlock = (*BlockIter);
+			OnlyPredIsItself = false;
+			CurrPred = CurrBlock->GetFirstPred();
+			NoPredecessors = (CurrPred == CurrBlock->GetLastPred());
+			if (!NoPredecessors) {
+				if ((*CurrPred)->GetFirstAddr() == CurrBlock->GetFirstAddr()) { // self-recursion
+					++CurrPred; // any more preds besides itself?
+					OnlyPredIsItself = (CurrPred == CurrBlock->GetLastPred());
+						// Only predecessor was the self-recursion if no more preds
+				}
+			}
+			if (NoPredecessors || OnlyPredIsItself) {
+				// Block must be reachable only through an unresolved indirect branch.
+				// Make each unresolved indirect branch link to the block so it is reachable.
+				list<SMPBasicBlock *>::iterator WorkIter;
+				AddedMissingLinks = true;
+				for (WorkIter = UnresolvedBranchWorkList.begin(); WorkIter != UnresolvedBranchWorkList.end(); ++ WorkIter) {
+					SMPBasicBlock *WorkBlock = (*WorkIter);
+					WorkBlock->LinkToSucc(CurrBlock);
+				}
+			}
+			++BlockIter;
+		} // end for all blocks
+		if (!AddedMissingLinks) {
+			SMP_msg("SERIOUS WARNING: Function at %x has unresolved indirect branches but no unreachable blocks.\n", this->FirstEA);
+		}
+	}
+#endif
 
 	return;
 } // end of SMPFunction::SetLinks()
@@ -4727,11 +5227,15 @@ void SMPFunction::LiveVariableAnalysis(void) {
 	SMP_msg("LiveVariableAnalysis for %s\n", this->GetFuncName());
 #endif
 
+#if SMP_ANALYZE_STACK_POINTER
+	;
+#else
 	for (BlockIter = this->Blocks.begin(); BlockIter != this->Blocks.end(); ++BlockIter) {
 		CurrBlock = (*BlockIter);
 		// Initialize the Killed and UpwardExposed sets for each block.
-		CurrBlock->InitKilledExposed();
+		CurrBlock->InitKilledExposed(this->UsesFramePointer());
 	}
+#endif
 
 	bool changed;
 	// Iterate over each block, updating LiveOut sets until no more changes are made.
@@ -6223,7 +6727,7 @@ void SMPFunction::EmitAnnotations(FILE *AnnotFile, FILE *InfoAnnotFile) {
 		}
 
 #ifndef SMP_REDUCED_ANALYSIS
-		if (this->HasGoodRTLs() && !this->HasUnresolvedIndirectJumps() && !this->HasSharedChunks()) {
+		if (this->StackPtrAnalysisSucceeded() && this->HasGoodRTLs() && !this->HasUnresolvedIndirectJumps() && !this->HasSharedChunks()) {
 			CurrInst->EmitTypeAnnotations(this->UseFP, AllocSeen, this->NeedsStackReferent, AnnotFile, InfoAnnotFile);
 			CurrInst->EmitIntegerErrorAnnotations(InfoAnnotFile);
 		}
@@ -6413,7 +6917,8 @@ void SMPFunction::MarkFunctionSafe() {
 				// If an lea instruction loads an address above
 				//  the stack frame, we must assume that writes
 				//  above the stack frame could occur.
-				if (this->WritesAboveLocalFrame(CurrInst->GetCmd().Operands[1]))
+				op_t TempOp = CurrInst->GetLeaMemUseOp();
+				if (this->WritesAboveLocalFrame(TempOp, CurrInst->AreUsesNormalized()))
 					WritesAboveLocalFrameIndirect = true;
 			}
 #if SMP_DEBUG_FUNC 
@@ -6466,7 +6971,7 @@ void SMPFunction::MarkFunctionSafe() {
 				bool StackPointerRelative = (BaseReg == R_sp);
 				if (StackPointerRelative || FramePointerRelative) {
 					if (IndexReg == R_none) {
-						bool tempWritesAboveLocalFrame = this->WritesAboveLocalFrame(Operand);
+						bool tempWritesAboveLocalFrame = this->WritesAboveLocalFrame(Operand, CurrInst->AreDefsNormalized());
 						WritesAboveLocalFrame |= tempWritesAboveLocalFrame;
 #if SMP_DEBUG_FUNC 
 						if (tempWritesAboveLocalFrame) {
diff --git a/SMPFunction.h b/SMPFunction.h
index 31401e84..b277932a 100644
--- a/SMPFunction.h
+++ b/SMPFunction.h
@@ -69,11 +69,16 @@ using namespace std;
 //  eliminated if possible.
 #define CALLING_CONVENTION_DEFAULT_PREFRAMEALLOC_STACK_DELTA -4
 
+// What default value should we assign to alloca stack frame allocations?
+#define STARS_DEFAULT_ALLOCA_SIZE -32
+
 // Use IDA info for switch tables to link indirect jumps to successor blocks?
 #define SMP_USE_SWITCH_TABLE_INFO 1
 
 // Detect function code fragments that are not shared with another function.
-#define STARS_FIND_UNSHARED_CHUNKS 0
+#define STARS_FIND_UNSHARED_CHUNKS 1
+
+#define SMP_ANALYZE_STACK_POINTER 1
 
 struct LocalVar {
 	char VarName[MAXSMPVARSTR];
@@ -116,6 +121,7 @@ public:
 	inline SMPProgram *GetProg(void) const { return Program; };
 	inline const char *GetFuncName(void) const { get_func_name(FirstEA, StaticFuncName, MAXSMPSTR-1); return StaticFuncName; };
 	func_t *GetFuncInfo(void);
+	inline ea_t GetFirstFuncAddr(void) const { return FirstEA; };
 	inline long GetTypedDefs(void) const { return TypedDefs; };
 	inline long GetUntypedDefs(void) const { return UntypedDefs; };
 	inline long GetTypedPhiDefs(void) const { return TypedPhiDefs; };
@@ -152,6 +158,7 @@ public:
 	SMPInstr *GetInstFromAddr(ea_t InstAddr);
 	ea_t GetFirstUnprocessedCallee(void);  // first addr of first callee in AllCallTargets with Processed == false
 	inline size_t GetNumBlocks(void) const { return Blocks.size(); };
+	op_t GetNormalizedOperand(ea_t InstAddr, op_t RTLop); // Return RTLop if not stack opnd; return normalized RTLop otherwise.
 	set<SMPPhiFunction, LessPhi>::iterator GetPhiIterForPhiDef(size_t BlockNumber, op_t DefOp, int SSANum);
 		// Given block # and PhiDef op_t and SSANum, return the Phi iterator or assert.
 	
@@ -179,6 +186,7 @@ public:
 	inline void SetSpecNeedsFrame(bool Status) { SpecNeedsStackReferent = Status; return; };
 	inline void SetIsSpeculative(bool IsS) { IsSpeculative = IsS; }
 	void AddCallSource(ea_t addr);
+	void AddNormalizedStackOperand(op_t OldOp, ea_t InstAddr, op_t NormalizedOp); // add to map for RTL lookup later
 
 	// Six methods to set values into the maps of global reg/SSA to FG info.
 	//  For local names, see corresponding methods in SMPBasicBlock.
@@ -196,6 +204,7 @@ public:
 	inline bool IsFuncProcessed(void) const { return FuncProcessed; };
 	inline bool StackPtrAnalysisSucceeded(void) const { return AnalyzedSP; };
 	inline bool HasSTARSStackPtrAnalysisCompleted(void) const { return STARSStackPtrAnalysisPerformed; };
+	inline bool HasExplicitReturnInstruction(void) const { return HasReturnInst; };
 	inline bool HasIndirectCalls(void) const { return IndirectCalls; };
 	inline bool HasUnresolvedIndirectCalls(void) const { return UnresolvedIndirectCalls; };
 	inline bool HasIndirectJumps(void) const { return IndirectJumps; };
@@ -203,6 +212,7 @@ public:
 	inline bool IsDirectlyRecursive(void) const { return DirectlyRecursive; };
 	inline bool HasSharedChunks(void) const { return SharedChunks; };
 	inline bool HasGoodRTLs(void) const { return BuiltRTLs; };
+	inline bool HasPushAfterFrameAlloc(void) const { return PushAfterLocalVarAlloc; };
 	inline bool IsAddrInFunc(ea_t addr) { return ((addr >= FuncInfo.startEA) && (addr <= FuncInfo.endEA)); }
 	inline bool IsLibFunc(void) const { return LibFunc; };
 	inline bool IsLeaf(void) const { return (!IndirectCalls && DirectCallTargets.empty()); };
@@ -212,7 +222,7 @@ public:
 	inline bool IsSpecSafeCallee(void) const { return SafeCallee; };
 	inline bool NeedsStackFrame(void) const { return NeedsStackReferent; };
 	inline bool SpecNeedsStackFrame(void) const { return SpecNeedsStackReferent; };
-	inline bool WritesAboveReturnAddress(void) const { return WritesAboveRA; };
+	inline bool WritesAboveReturnAddress(void) const { return WritesAboveRA; }; // don't use befoer fixing this member
 	inline bool OutArgsRegionComputed(void) const { return OutgoingArgsComputed; };
 	bool IsInOutgoingArgsRegion(op_t DestOp); // Does DestOp fall within outgoing args area?
 	inline bool IsGlobalName(op_t RefOp) const { return (GlobalNames.end() != GlobalNames.find(RefOp)); };
@@ -238,13 +248,13 @@ public:
 	void Analyze(void);  // Analyze all instructions in function
 	void AdvancedAnalysis(void); // Analyses that depend on whole program info but not SSA.
 	size_t UnprocessedCalleesCount(void); // Count of callees that have FuncProcessed == false
-	sval_t GetStackDeltaForCallee(ea_t CallAddr); // Get stack pointer delta for callee function, called at CallAddr
+	sval_t GetStackAdjustmentForCallee(ea_t CallAddr); // Get stack pointer adjustment in basic block, after CallAddr
+	sval_t GetStackDeltaForCallee(ea_t CallTargetAddr); // Get stack pointer delta for callee function, which starts at CallTargetAddr
+	sval_t ComputeGlobalStackAdjustment(void); // Find consistent or smallest stack adjustment after all calls to this function, program-wide
 	void ComputeTempReachingDefs(op_t TempOp, ea_t UseAddr); // Compute the TempReachingDefs set that reaches UseAddr for TempOp
 	void ComputeTempStackDeltaReachesList(op_t TempOp); // Compute the TempStackDeltaReachesList for TempOp for all DefAddrs in TempReachingDefs
 	bool FindReachingStackDelta(sval_t &StackDelta); // Find maximum stack delta in TempStackDeltaReachesList; return true if one consistent delta is in the list
 	void EmitAnnotations(FILE *AnnotFile, FILE *InfoAnnotFile);
-	void RPONumberBlocks(void);
-	void SetLinks(void); // Link basic blocks and map instructions to blocks
 	void LiveVariableAnalysis(void);  // Perform Live Variable Analysis across all blocks
 	void ComputeSSA(void); // Compute SSA form data structures
 	void AliasAnalysis(void); // Find memory writes with possible aliases
@@ -275,6 +285,7 @@ private:
 	bool UseFP;  // Does function use a frame pointer?
 	bool StaticFunc; // Is function declared static?
 	bool LibFunc; // is function a standard library function?
+	bool HasReturnInst; // Does function have a return instruction? (might just have a tail call)
 	bool IndirectCalls; // Does function make indirect calls?
 	bool UnresolvedIndirectCalls; // Calls could not all be linked to targets
 	bool IndirectJumps; // Does function make indirect jumps?
@@ -282,9 +293,11 @@ private:
 	bool DirectlyRecursive; // Calls itself
 	bool SharedChunks; // Does function share a tail chunk with other functions?
 	bool UnsharedChunks; // Does function have notcontiguous fragments that are not shared with other funcs?
-	bool CallsAlloca; // Does function allocate stack space after initial allocation?
+	bool CallsAlloca; // Does function allocate stack space after initial allocation? NOTE:SMPInstr::IsAllocaCall() excludes immediate value alloca calls
+	bool PushAfterLocalVarAlloc; // Does function push onto the stack after allocating local var space?
 	bool AnalyzedSP; // Were stack pointer change points successfully analyzed?
 	bool STARSStackPtrAnalysisPerformed; // Have we done our own stack pointer analysis yet?
+	bool StackAdjustmentComputed; // Have we cached a value for the stack adjustment seen after calls to this function throughout the program?
 	bool BuiltRTLs;  // Were RTLs built succcessfully for all instructions?
 	bool SafeFunc;  // Function needs no bounds checking from mmStrata
 	bool SpecSafeFunc;  // Function needs no bounds checking from mmStrata
@@ -318,13 +331,17 @@ private:
 						  // to the value it has at the entry point of the function
 	sval_t MaxStackDelta; // highest (positive) value that stack pointer reaches, relative
 						  // to the value it has at the entry point of the function
+	sval_t MinStackAccessOffset; // Normalized or unnormalized, min stack byte offset in any DEF or USE
+	sval_t MaxStackAccessLimit; // Normalized or unnormalized, 1 greater than max stack byte offset in any DEF or USE
 	sval_t NetStackDelta; // Net change to stack pointer after function returns; +4 for most functions,
 						  //  because they pop off the return address while returning.
 	sval_t PreAllocStackDelta; // Stack delta right before stack frame allocation, to which the stack
 								//  delta should be reset when we see an instruction that deallocates the
 								//  whole frame.
 	sval_t FramePointerStackDelta; // Stack delta when framepointer := stackpointer was encountered; zero if UseFP is false.
+	sval_t GlobalStackAdjustment; // Stack adjustment seen program-wide after calls to this function; zero or positive.
 	long LocalVarOffsetLimit; // upper bound on stack-relative offsets
+	long IDAReturnAddressOffset; // offset from local frame base of return address in IDA Pro stack frame 
 	FuncType  ReturnAddrStatus; // Marked true if the return address is safe from being overwritten
 	list<SMPInstr *> Instrs;
 	list<SMPBasicBlock *> Blocks;
@@ -332,6 +349,7 @@ private:
 	vector<ea_t> IndirectCallTargets; // addresses called by indirect calls
 	vector<ea_t> AllCallTargets; // union of direct and indirect
 	set<ea_t> AllCallSources; // functions that call this one
+	set<ea_t> AllCallSites; // instructions that call this function
 	map<ea_t, SMPBasicBlock *> InstBlockMap;
 	vector<SMPBasicBlock *> RPOBlocks;
 	vector<int> IDom; // Immediate dominators, indexed and valued by block RPO numbers
@@ -367,25 +385,32 @@ private:
 	set<pair<op_t, pair<ea_t, sval_t> >, LessStackDeltaCopy> StackPtrCopySet; // triple: operand holding copy, InstAddr where copy is made, stack delta for copy
 	list<pair<ea_t, sval_t> > TempStackDeltaReachesList;  // Used for temporary lookups of particular op_t in StackPtrCopySet.
 	set<ea_t, LessAddr> TempReachingDefs; // Temporary list of InstAddrs with defs of one op_t that reach a particular InstAddr.
+	map<pair<op_t, ea_t>, op_t, LessDefinition> NormalizedStackOpsMap; // normalized stack operands, indexed by instruction address (for lookup from RTLs).
+	map<pair<op_t, ea_t>, map<pair<op_t, ea_t>, op_t, LessDefinition>::iterator, LessDefinition> InverseNormalizedStackOpsMap; // index: normalized op,
+				// mapped to: iterator into NormalizedStackOpsMap; only for use in functions that call alloca() and need to re-normalize stack ops repeatedly
 
 	// Methods
 	void EraseInstRange(ea_t FirstAddr, ea_t LastAddr);
+	void RPONumberBlocks(void);
+	void SetLinks(void); // Link basic blocks and map instructions to blocks
 	bool FindDistantCodeFragment(ea_t TargetAddr); // Is TargetAddr the start of a code fragment that belongs to this func, not a separate func?
 	bool AnalyzeStackPointerDeltas(void); // Analyze changes in stack pointer for all instructions; return AnalyzedSP
 	bool UseIDAStackPointerDeltas(void); // Use IDA Pro values instead of doing our own analysis
 	bool AddToStackPtrCopySet(op_t CopyOp, ea_t InstAddr, sval_t StackDelta); // return true if inserted, false if present already (update delta in that case)
 	void FindAllAllocsAndDeallocs(void); // Find all stack frame allocating and deallocating instructions and stack ptr offsets
+	void FindFramePointerDelta(void); // Compute FramePointerStackDelta
 	void SetStackFrameInfo(void);
 	ea_t FindAllocPoint(asize_t); // Deal with difficult to find stack frame allocations
 	bool MDFixFrameInfo(void); // Redefine stack regions for our needs
 	bool MDFixUseFP(void);  // Fix IDA errors affecting UseFP
 	void BuildLocalVarTable(void); // Determine local variable boundaries on the stack
 	void SemiNaiveLocalVarID(void); // Semi-naive algorithm for local var boundaries ID
+	void UpdateMinMaxStackOffsets(SMPInstr *CurrInst, op_t TempOp); // Update MinStackAccessOffset and MaxStackAccessLimit if TempOp is stack access
 	bool AuditLocalVarTable(void); // Check and correct IDA Pro listing of local frame members.
 	void FindOutgoingArgsSize(void); // Find portion of local frame that is outgoing args
-	bool WritesAboveLocalFrame(op_t DestOp); // Is DestOp direct stack write to caller's frame?
+	bool WritesAboveLocalFrame(op_t DestOp, bool OpNormalized); // Is DestOp direct stack write to caller's frame?
 	bool IndexedWritesAboveLocalFrame(op_t DestOp); // Is DestOp direct stack write to caller's frame?
-	bool MDGetStackOffsetAndSize(SMPInstr *Instr, op_t TempOp, sval_t sp_delta, ea_t &offset, size_t &DataSize,
+	bool MDGetStackOffsetAndSize(SMPInstr *Instr, op_t TempOp, sval_t BaseValue, ea_t &offset, size_t &DataSize,
 		bool &FP, bool &Indexed, bool &Signed, bool &Unsigned);  // Find any stack memory access in TempOp, return offset, size,
 					// whether the Frame Pointer was used and signedness (if sign-extended or zero-extended).
 	bool FindAlloca(void); // true if found evidence of alloca() allocations
diff --git a/SMPInstr.cpp b/SMPInstr.cpp
index b812f571..7e925215 100644
--- a/SMPInstr.cpp
+++ b/SMPInstr.cpp
@@ -236,7 +236,7 @@ void SMPGuard::Dump(void) {
 // *****************************************************************
 // Class SMPRegTransfer
 // *****************************************************************
-// Constructor
+// Constructors
 SMPRegTransfer::SMPRegTransfer(void) {
 	this->Guard = NULL;
 	this->LeftOperand.type = o_void;
@@ -248,6 +248,7 @@ SMPRegTransfer::SMPRegTransfer(void) {
 #endif
 	this->booleans1 = 0;
 	this->RightRT = NULL;
+	this->ParentInst = NULL;
 	return;
 }
 
@@ -263,6 +264,60 @@ SMPRegTransfer::~SMPRegTransfer() {
 	return;
 }
 
+// Get the left operand, and normalize it if it is a stack operand.
+op_t SMPRegTransfer::GetLeftOperand(void) const {
+	op_t TempOp = this->LeftOperand;
+	// AreDefsNormalized and AreUsesNormalized should always agree, but we
+	//  use Defs for left operands and Uses for right operands, which is not
+	//  strictly true, but there is no difference in results.
+	if (this->ParentInst->AreDefsNormalized()) {
+		ea_t InstAddr = this->ParentInst->GetAddr();
+		TempOp = this->ParentInst->GetBlock()->GetFunc()->GetNormalizedOperand(InstAddr, TempOp);
+	}
+	return TempOp;
+} // end of SMPRegTransfer::GetLeftOperand()
+
+// Get the right operand, and normalize it if it is a stack operand.
+op_t SMPRegTransfer::GetRightOperand(void) const {
+	op_t TempOp = this->RightOperand;
+	// AreDefsNormalized and AreUsesNormalized should always agree, but we
+	//  use Defs for left operands and Uses for right operands, which is not
+	//  strictly true, but there is no difference in results.
+	if (this->ParentInst->AreUsesNormalized()) {
+		ea_t InstAddr = this->ParentInst->GetAddr();
+		TempOp = this->ParentInst->GetBlock()->GetFunc()->GetNormalizedOperand(InstAddr, TempOp);
+	}
+	return TempOp;
+} // end of SMPRegTransfer::GetRightOperand()
+
+// Does RTL subtract a non-immediate value from the stack pointer?
+bool SMPRegTransfer::IsAllocaRTL(void) {
+	bool AllocaFound = false;
+	// Search for the pattern: stack_pointer := stack_pointer minus non-immediate
+	if ((SMP_ASSIGN == this->GetOperator()) && (this->HasRightSubTree())) {
+		op_t DefOp = this->GetLeftOperand();
+		if (DefOp.is_reg(MD_STACK_POINTER_REG)) {
+			// We have the code pattern stack_pointer := ...
+			SMPRegTransfer *RightRT = this->GetRightTree();
+			SMPoperator RightOperator = RightRT->GetOperator();
+			op_t RightDefOp = RightRT->GetLeftOperand();
+			if ((RightDefOp.is_reg(MD_STACK_POINTER_REG)) && (SMP_SUBTRACT == RightOperator)) {
+				// We have the code pattern stack_pointer := stack_pointer minus ...
+				if (RightRT->HasRightSubTree()) {
+					AllocaFound = true; // not an immediate rightop, whatever it is
+				}
+				else {
+					op_t RightUseOp = RightRT->GetRightOperand();
+					if (o_imm != RightUseOp.type) {
+						AllocaFound = true;
+					}
+				}
+			}
+		}
+	}
+	return AllocaFound;
+} // end of SMPRegTransfer::IsAllocaRTL()
+
 // Compute operand-dependent change in stack pointer value.
 sval_t SMPRegTransfer::ComputeStackPointerAlteration(bool IsLeaveInstr, sval_t IncomingDelta, sval_t FramePtrDelta) {
 	sval_t delta = 0;
@@ -874,7 +929,7 @@ int SMPInstr::operator<=(const SMPInstr &rhs) const {
 
 #define MD_FIRST_ENTER_INSTR  NN_enterw
 #define MD_LAST_ENTER_INSTR NN_enterq
-// Is this instruction the one that allocates space on the
+// Is this instruction one that allocates space on the
 //  stack for the local variables?
 bool SMPInstr::MDIsFrameAllocInstr(void) {
 	// The frame allocating instruction should look like:
@@ -1210,6 +1265,23 @@ bool SMPInstr::MDIsStackPointerCopy(bool UseFP) {
 	return false;
 } // end of SMPInstr::MDIsStackPointerCopy()
 
+// Does any RTL fit the alloca() pattern: stack_pointer -= non-immediate-operand
+bool SMPInstr::HasAllocaRTL(void) {
+	bool FoundAlloca = false;
+	size_t RTLCount = this->RTL.GetCount();
+	size_t RTLIndex;
+
+	for (RTLIndex = 0; RTLIndex < RTLCount; ++RTLIndex) {
+		SMPRegTransfer *CurrRT = this->RTL.GetRT(RTLIndex);
+		if (CurrRT->IsAllocaRTL()) {
+			FoundAlloca = true;
+			break;
+		}
+	}
+
+	return FoundAlloca;
+} // end of SMPInstr::HasAllocaRTL()
+
 // Determine if the instruction saves or restores a pointer into the stack frame.
 // If it saves a stack pointer, set Save to true, set the StackDelta saved, and set
 //   the operand that received the saved stack pointer into CopyOp. and return true.
@@ -1218,30 +1290,48 @@ bool SMPInstr::MDIsStackPointerCopy(bool UseFP) {
 //   register (whichever was restored), leave StackDelta alone for later computation
 //   based on reaching definitions, and return true.
 // For most instructions, no save or restore of a stack pointer, so return false.
-bool SMPInstr::MDIsStackPtrSaveOrRestore(bool UseFP, bool &Save, sval_t &StackDelta, op_t &CopyOp, bool &Error) {
-	bool StackPointerSaveOrRestore = false; // default unless we detect a save or restore of the stack or frame pointer
+bool SMPInstr::MDIsStackPtrSaveOrRestore(bool UseFP, sval_t FPDelta, bool &Save, sval_t &StackDelta, op_t &CopyOp, bool &Error) {
+	bool StackPointerSaveOrRestore;
 	size_t RTLCount = this->RTL.GetCount();
 	size_t RTLIndex;
-	sval_t FPDelta = BADADDR;
-	op_t TempOp = InitOp;
+	op_t TempOp;
 	int BaseReg, IndexReg, CopyReg;
 	ushort Scale;
 	ea_t offset;
 	SMPoperator CurrOper;
-	bool LookUpStackDelta = false; // Get stack delta from reaching defs for TempOp
-	sval_t DeltaAdjust = 0; // add to StackDelta after computing from reaching defs, e.g. lea esp,[ecx-4] get TempOp of ecx
+	bool LookUpStackDelta; // Get stack delta from reaching defs for TempOp
+	sval_t DeltaAdjust; // add to StackDelta after computing from reaching defs, e.g. lea esp,[ecx-4] get TempOp of ecx
 	                        //  and DeltaAdjust of -4
 
-	Save = false; // default unless we detect a stack pointer save
 	Error = false;
-	if (UseFP) {
-		FPDelta = this->GetBlock()->GetFunc()->GetFramePtrStackDelta();
-	}
 
 	for (RTLIndex = 0; RTLIndex < RTLCount; ++RTLIndex) {
 		bool FPRestore = false; // frame pointer is restored
 		bool SPRestore = false; // stack pointer is restored
+		StackPointerSaveOrRestore = false; // default unless we detect a save or restore of the stack or frame pointer
+		TempOp = InitOp;
+		LookUpStackDelta = false;
+		DeltaAdjust = 0;
+		Save = false; // default unless we detect a stack pointer save
+
+		// The stack alignment instructions (SP := SP bitwise_and immediate_value)
+		//  look like something that needs to be processed here, but we always ignore
+		//  these instructions. They have a variable effect on the stack pointer, from zero
+		//  to -15 delta, but we assume that the delta is zero. This works for us because
+		//  no stack accesses will occur into the padding region.
+		// Also, any instruction that definitely does not restore the stack pointer or
+		//  frame pointer from an arbitrary register or memory location, e.g. a leave instruction
+		//  in x86 CPUs, is already handled in normal stack delta computations and needs
+		//  no lookups from reaching defs, etc.
+		if (this->IsStackAlignmentInst() || this->MDIsLeaveInstr() || this->MDIsFrameAllocInstr()) {
+			break; // exit and return false
+		}
+
 		SMPRegTransfer *CurrRT = this->RTL.GetRT(RTLIndex);
+		CurrOper = CurrRT->GetOperator();
+		if (SMP_ASSIGN != CurrOper) {
+			break; // not a regular RTL
+		}
 		op_t LeftOp = CurrRT->GetLeftOperand();
 		if (LeftOp.is_reg(MD_STACK_POINTER_REG)) {
 			SPRestore = true; // temporary; might just be a push or pop RTL, etc., in which case we will reset.
@@ -1249,59 +1339,56 @@ bool SMPInstr::MDIsStackPtrSaveOrRestore(bool UseFP, bool &Save, sval_t &StackDe
 		else if (UseFP && LeftOp.is_reg(MD_FRAME_POINTER_REG)) {
 			FPRestore = true; // likewise temporary
 		}
-		Save = (!(SPRestore || FPRestore));
+		if (!(SPRestore || FPRestore)) {
+#if 0
+			if (LeftOp.is_reg(MD_FLAGS_REG)) {
+				break; // No point in looking for a save into the flags register
+			}
+#endif
+			Save = true;
+		}
 
 		// If we are assigning to the stack pointer reg or the frame pointer reg, we need to analyze the right
 		//  hand side of the RTL to see if it is a stack/frame pointer value, and not a simple push, pop, etc.
-		CurrOper = CurrRT->GetOperator();
-		if (SMP_ASSIGN != CurrOper) {
-			break; // not a regular RTL
-		}
 		if (!(CurrRT->HasRightSubTree())) {
-			// Simple assignment to stack or frame pointer.
+			// Simple assignment.
 			op_t RightOp = CurrRT->GetRightOperand();
-			if (RightOp.is_reg(MD_STACK_POINTER_REG)) {
-				// Must be the move of stack pointer into frame pointer in function prologue.
-				assert(FPRestore); // not really a restore, but we had temporarily marked it as such above
-				Save = true;
-				StackDelta = this->GetStackPtrOffset(); // FP := SP, so saved delta is just current delta
-				CopyOp = RightOp;
-				StackPointerSaveOrRestore = true;
-				FPRestore = false;
-				break;
-			}
-			else if ((o_reg <= RightOp.type) && (o_displ >= RightOp.type)) { // register or memory
-				if (SPRestore || FPRestore) {
-					// stack or frame pointer is being restored; leave Save=false and set other outgoing arguments.
-					TempOp = RightOp;
-					CopyOp = RightOp;
-					StackPointerSaveOrRestore = true;
-					LookUpStackDelta = true;
-					break;
-				}
-				else if (RightOp.is_reg(MD_STACK_POINTER_REG)) {
+			if ((o_reg <= RightOp.type) && (o_displ >= RightOp.type)) { // register or memory
+				if (RightOp.is_reg(MD_STACK_POINTER_REG)) {
 					// Stack pointer reg is being saved.
+					Save = true;
 					StackDelta = this->GetStackPtrOffset(); // LeftOp := SP, so saved delta is just current delta
 					CopyOp = LeftOp;
 					StackPointerSaveOrRestore = true;
+					FPRestore = false; // treat FP := SP as a save of SP rather than a restoration of FP
 					break;
 				}
-				else if (UseFP && RightOp.is_reg(MD_FRAME_POINTER_REG)) {
+				else if (!SPRestore && UseFP && RightOp.is_reg(MD_FRAME_POINTER_REG)) {
 					// Frame pointer is being saved
+					Save = true;
 					StackDelta = FPDelta;
 					CopyOp = LeftOp;
 					StackPointerSaveOrRestore = true;
 					break;
 				}
+				else if (SPRestore || FPRestore) {
+					// stack or frame pointer is being restored; leave Save=false and set other outgoing arguments.
+					TempOp = RightOp;
+					CopyOp = RightOp;
+					StackPointerSaveOrRestore = true;
+					LookUpStackDelta = true;
+				}
 				else { // RightOp is register or non-stack-pointer memory expr; either might hold stack delta
 					TempOp = RightOp;
 					CopyOp = LeftOp;
 					LookUpStackDelta = true; // See if RightOp is holding a stack delta
-					break;
+					StackPointerSaveOrRestore = true;
 				}
 			}
 			else {
-				SMP_msg("ERROR: Invalid operand type for assignment to stack or frame pointer at %x\n", this->GetAddr());
+				if (SPRestore || FPRestore) {
+					SMP_msg("ERROR: Invalid operand type for assignment to stack or frame pointer at %x\n", this->GetAddr());
+				}
 				StackPointerSaveOrRestore = false;
 				break;
 			}
@@ -1334,6 +1421,13 @@ bool SMPInstr::MDIsStackPtrSaveOrRestore(bool UseFP, bool &Save, sval_t &StackDe
 								DeltaAdjust = (0 - DeltaAdjust);
 							}
 							LookUpStackDelta = true;
+							StackPointerSaveOrRestore = true;
+							if (SPRestore || FPRestore) {
+								CopyOp = RightLeftOp;
+							}
+							else {
+								CopyOp = LeftOp;
+							}
 						}
 					}
 				}
@@ -1345,67 +1439,93 @@ bool SMPInstr::MDIsStackPtrSaveOrRestore(bool UseFP, bool &Save, sval_t &StackDe
 				StackPointerSaveOrRestore = false;
 			}
 		}
-	} // end for all RTs in the RTL
 
-	if (LookUpStackDelta) {
-		bool StackAccess = false;
-		// We need to set StackDelta based on the reaching defs for TempOp
-		// A reg is probably a general register, but could have lea ebx,[esp+4] so it could be stack or frame pointer.
-		if (TempOp.is_reg(MD_STACK_POINTER_REG)) {
-			StackDelta = this->GetStackPtrOffset();
-			StackDelta += DeltaAdjust;
-			LookUpStackDelta = false; // just got it; no need for reaching defs
-		}
-		else if (UseFP && TempOp.is_reg(MD_FRAME_POINTER_REG)) {
-			StackDelta = FPDelta;
-			StackDelta += DeltaAdjust;
-			LookUpStackDelta = false; // just got it; no need for reaching defs
-		}
-		else if (o_reg == TempOp.type) { // general reg, not frame or stack pointer reg
-			CopyReg = TempOp.reg;
-		}
-		else {
-			MDExtractAddressFields(TempOp, BaseReg, IndexReg, Scale, offset);
-			CopyReg = BaseReg;
-			bool IndexedAccess = ((R_none != BaseReg) && (R_none != IndexReg));
-			if (IndexedAccess) {
-				StackPointerSaveOrRestore = false;  // Cannot analyze indexed accesses into the stack
+		if (LookUpStackDelta) {
+			bool StackAccess = false;
+			// We need to set StackDelta based on the reaching defs for TempOp
+			// A reg is probably a general register, but could have lea ebx,[esp+4] so it could be stack or frame pointer.
+			if (TempOp.is_reg(MD_STACK_POINTER_REG)) {
+				// Weed out RTs that increment or decrement the stack pointer, e.g. SP := SP -4.
+				//  These are not the kind of "save" or "restore" RTs that we are tracking.
+				if (CopyOp.is_reg(MD_STACK_POINTER_REG)) {
+					StackPointerSaveOrRestore = false;
+					SPRestore = false;
+					FPRestore = false;
+					Save = false;
+				}
+				else {
+					StackDelta = this->GetStackPtrOffset();
+					StackDelta += DeltaAdjust;
+					LookUpStackDelta = false; // just got it; no need for reaching defs
+					StackPointerSaveOrRestore = true;
+				}
+			}
+			else if (UseFP && TempOp.is_reg(MD_FRAME_POINTER_REG)) {
+				StackDelta = FPDelta;
+				StackDelta += DeltaAdjust;
+				LookUpStackDelta = false; // just got it; no need for reaching defs
+				StackPointerSaveOrRestore = true;
 			}
-			else if (MDIsStackPtrReg(BaseReg, UseFP)) {
-				StackAccess = true;
+			else if (o_reg == TempOp.type) { // general reg, not frame or stack pointer reg
+				CopyReg = TempOp.reg;
 			}
 			else {
-				// memory expr that is not stack or frame pointer
-				DeltaAdjust = (sval_t) TempOp.addr; // get normalized delta from addr field
+				MDExtractAddressFields(TempOp, BaseReg, IndexReg, Scale, offset);
+				CopyReg = BaseReg;
+				bool IndexedAccess = ((R_none != BaseReg) && (R_none != IndexReg));
+				if (IndexedAccess) {
+					StackPointerSaveOrRestore = false;  // Cannot analyze indexed accesses into the stack
+				}
+				else if (MDIsStackPtrReg(BaseReg, UseFP)) {
+					StackAccess = true;
+				}
+				else {
+					// memory expr that is not stack or frame pointer
+					DeltaAdjust = (sval_t) TempOp.addr; // get normalized delta from addr field
+				}
 			}
-		}
 
-		if (StackPointerSaveOrRestore && LookUpStackDelta) {
-			op_t FindOp = InitOp;
-			if (StackAccess) {
-				FindOp = TempOp;
+			if (StackPointerSaveOrRestore && LookUpStackDelta) {
+				op_t FindOp = InitOp;
+				if (StackAccess) {
+					FindOp = TempOp;
+				}
+				else {
+					FindOp.type = o_reg;
+					FindOp.reg = CopyReg;
+				}
+				if (this->GetBlock()->GetFunc()->IsInStackPtrCopySet(FindOp)) {
+					// Screened out time wasters that are not in copy set; now,
+					//  look up reaching defs.
+					// We need to find out which are the reaching definitions for the FindOp at the current InstAddr.
+					this->GetBlock()->GetFunc()->ComputeTempReachingDefs(FindOp, this->GetAddr());
+					this->GetBlock()->GetFunc()->ComputeTempStackDeltaReachesList(FindOp);
+					// See if TempStackDeltaReachesList has a consistent delta value.
+					StackPointerSaveOrRestore = this->GetBlock()->GetFunc()->FindReachingStackDelta(StackDelta); // consistent SavedDelta value across entire list
+					StackDelta += DeltaAdjust;
+				}
+				else {
+					StackPointerSaveOrRestore = false; // reset, not in stack pointer copy set
+				}
 			}
-			else {
-				FindOp.type = o_reg;
-				FindOp.reg = CopyReg;
-			}
-			if (this->GetBlock()->GetFunc()->IsInStackPtrCopySet(FindOp)) {
-				// Screened out time wasters that are not in copy set; now,
-				//  look up reaching defs.
-				// We need to find out which are the reaching definitions for the FindOp at the current InstAddr.
-				this->GetBlock()->GetFunc()->ComputeTempReachingDefs(FindOp, this->GetAddr());
-				this->GetBlock()->GetFunc()->ComputeTempStackDeltaReachesList(FindOp);
-				// See if TempStackDeltaReachesList has a consistent delta value.
-				StackPointerSaveOrRestore = this->GetBlock()->GetFunc()->FindReachingStackDelta(StackDelta); // consistent SavedDelta value across entire list
-				StackDelta += DeltaAdjust;
+		} // end if (LookupStackDelta)
+
+		if (!StackPointerSaveOrRestore && !Save && (SPRestore || FPRestore)) {
+			// Any restore that could not be analyzed is an error.
+			Error = true;
+			break; // error exit
+		}
+		else if (StackPointerSaveOrRestore) {
+			if (FPRestore) {
+				// If we succeeded in looking up a stack delta that goes into the frame pointer reg,
+				//  then we want to consider this instruction to be a save of a stack delta into
+				//  a register (which happens to be the frame pointer reg in this case).
+				FPRestore = false;
+				Save = true;
 			}
+			break; // assume only one save or restore in an instruction; exit with success
 		}
-	} // end if (LookupStackDelta)
-
-	if (!StackPointerSaveOrRestore && !Save) {
-		// Any restore that could not be analyzed is an error.
-		Error = true;
-	}
+	} // end for all RTs in the RTL
 
 	return StackPointerSaveOrRestore;
 } // end of SMPInstr::MDIsStackPtrSaveOrRestore()
@@ -1634,21 +1754,20 @@ void SMPInstr::AnalyzeMarker(void) {
 //  actually jumps within a function
 void SMPInstr::AnalyzeCallInst(ea_t FirstFuncAddr, ea_t LastFuncAddr) {
 	if (BADADDR != this->CallTarget) {
-		if ((this->CallTarget > FirstFuncAddr)
-				&& (this->CallTarget <= LastFuncAddr)) {
-			this->SetCallUsedAsJump();
-		}
-		else {
-			this->ResetCallUsedAsJump();
-		}
 		if (this->CallTarget == FirstFuncAddr) {
 			this->SetDirectRecursiveCall();
 		}
 		else {
 			this->ResetDirectRecursiveCall();
+			if ((this->CallTarget > FirstFuncAddr)
+					&& (this->CallTarget < LastFuncAddr)) {
+				this->SetCallUsedAsJump();
+				this->type = JUMP;
+			}
+			else {
+				this->ResetCallUsedAsJump();
+			}
 		}
-		if (this->IsCallUsedAsJump())
-			this->type = JUMP;
 	}
 	return;
 } // end of SMPInstr::AnalyzeCallInst()
@@ -1670,6 +1789,15 @@ sval_t SMPInstr::AnalyzeStackPointerDelta(sval_t IncomingDelta, sval_t PreAllocD
 		//  has no effect on the stack pointer.
 		; // leave InstDelta equal to negative or zero value from StackAlterationTable[]
 	}
+	else if (this->IsRecursiveCall()) {
+		// We don't have the net stack delta for our own function yet, so we cannot
+		//  look it up. We must assume that each call has no net effect on the stack delta.
+		// Alternatively, we could call this->GetBlock()->GetFunc()->GetStackDeltaForCallee() as below.
+		InstDelta = 0;
+	}
+	else if (this->IsAllocaCall()) {
+		InstDelta = STARS_DEFAULT_ALLOCA_SIZE;
+	}
 	else if ((CALL == FlowType) || (INDIR_CALL == FlowType) || TailCall) {
 		// A real call instruction, which pushes a return address on the stack,
 		//  not a call used as a branch within the function. A return instruction
@@ -1689,7 +1817,7 @@ sval_t SMPInstr::AnalyzeStackPointerDelta(sval_t IncomingDelta, sval_t PreAllocD
 			InstDelta = 0;
 		}
 		else { // We have a call target
-			SMPFunction *CalleeFunc = this->GetBlock()->GetFunc()->GetProg()->GetFuncFromAddr(CalledFuncAddr);
+			SMPFunction *CalleeFunc = this->GetBlock()->GetFunc()->GetProg()->FindFunction(CalledFuncAddr);
 			sval_t AdjustmentDelta;
 			if (CalleeFunc) {
 				if (!CalleeFunc->HasSTARSStackPtrAnalysisCompleted()) {
@@ -1699,13 +1827,14 @@ sval_t SMPInstr::AnalyzeStackPointerDelta(sval_t IncomingDelta, sval_t PreAllocD
 					//  that the callee will use our return address, so we assume the default stack delta. If not a
 					//  tail call, we ask our function to see if the information is available from IDA Pro analyses,
 					//  or if it can be inferred from the fact that the call is followed by a stack adjustment.
-					SMP_msg("WARNING: Callee stack ptr analysis not yet performed at inst %x ; normal delta assumed\n", this->GetAddr());
 					if (TailCall) {
 						InstDelta = CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA;
+						SMP_msg("WARNING: Callee stack ptr analysis not yet performed at tail call inst %x ; normal delta assumed\n", this->GetAddr());
 					}
 					else {
-						AdjustmentDelta = this->GetBlock()->GetFunc()->GetStackDeltaForCallee(this->GetAddr());
+						AdjustmentDelta = this->GetBlock()->GetFunc()->GetStackDeltaForCallee(CalledFuncAddr);
 						InstDelta += AdjustmentDelta;
+						SMP_msg("WARNING: Callee stack ptr analysis not yet performed at inst %x ; stack adjustment used\n", this->GetAddr());
 					}
 				}
 				else if (!CalleeFunc->StackPtrAnalysisSucceeded()) {
@@ -1731,7 +1860,7 @@ sval_t SMPInstr::AnalyzeStackPointerDelta(sval_t IncomingDelta, sval_t PreAllocD
 					CalledFuncAddr, this->GetAddr());
 				InstDelta = SMP_STACK_DELTA_ERROR_CODE;
 #else
-				SMP_msg("WARNING: SMPInstr::AnalyzeStackPointerDelta failed to find func at %x in inst %x\n",
+				SMP_msg("ERROR: SMPInstr::AnalyzeStackPointerDelta failed to find func at %x in inst %x\n",
 					CalledFuncAddr, this->GetAddr());
 				if (TailCall) {
 					InstDelta = CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA;
@@ -1782,7 +1911,7 @@ sval_t SMPInstr::FindStackAdjustment(void) {
 
 // Normalize stack operands to have a displacement from the stack pointer value on entry to the function,
 //  rather than the current stack pointer value.
-// UseFP indicates we are usign a frame pointer in the function.
+// UseFP indicates we are using a frame pointer in the function.
 // FPDelta holds the stack delta (normalized) for the frame pointer.
 // DefOp comes in with the operand to be normalized, and contains the normalized operand upon return.
 // Return true if operand is a register or stack location, false otherwise (true => include in data flow analysis sets and SSA.)
@@ -1791,9 +1920,66 @@ bool SMPInstr::MDComputeNormalizedDataFlowOp(bool UseFP, sval_t FPDelta, op_t &D
 		return true;
 	}
 	else if (MDIsStackAccessOpnd(DefOp, UseFP)) {
+		op_t OldOp = DefOp;
 		int SignedOffset = (int) DefOp.addr;
 		sval_t NormalizedDelta;
-		if (DefOp.reg == MD_FRAME_POINTER_REG) {
+
+		if (DefOp.hasSIB) {
+			// We must deal with a potentially indexed memory expression. We want to
+			//  normalize two different cases here: e.g. [esp+ebx+4] will become [esp+ebx-24]
+			//  and [ebp+ebx-8] will become [esp+ebx-12] after normalization. A wrinkle
+			//  on the second case is when the base register and index register are swapped
+			//  in the SIB byte, and we make [ebx+ebp-4] into [esp+ebx-12], which involves
+			//  correcting the index/base reg order in the SIB, because an index reg of ESP
+			//  is the SIB encoding for "no index register" and we cannot leave it like that.
+			int BaseReg = sib_base(DefOp);
+			int IndexReg = (int) sib_index(DefOp);
+			if (X86_STACK_POINTER_REG == IndexReg) // signifies no index register
+				IndexReg = R_none;
+			if (BaseReg == X86_STACK_POINTER_REG) {
+				// We probably have an indexed ESP-relative operand.
+				//  We leave the sib byte alone and normalize the offset.
+				NormalizedDelta = this->GetStackPtrOffset() + (sval_t) SignedOffset;
+			}
+			else {
+				// Must be EBP-relative.
+				NormalizedDelta = FPDelta + (sval_t) SignedOffset;
+				// Unfortunately, when we are dealing with a SIB byte in the opcode, we cannot
+				//  just say DefOp.reg = MD_STACK_POINTER_REG to convert from the frame pointer
+				//  to the stack pointer. Instead, we have to get into the nasty machine code
+				//  level and change the SIB bits that specify either the base register or the
+				//  index register, whichever one is the frame pointer.
+				if (BaseReg == X86_FRAME_POINTER_REG) {
+					// The three least significant bits of the SIB byte are the base register.
+					//  They must contain a 5, which is the x86 value for register EBP, and we
+					//  want to convert it to a 4, denoting register ESP. We can just zero out
+					//  the least significant bit to accomplish that.
+					DefOp.sib &= 0xfe;
+				}
+				else {
+					// We sometimes have an instruction in which the frame pointer is used as
+					//  the "index" register in the SIB byte, and the true index register is
+					//  in the "base" register position in the SIB byte.
+					assert(IndexReg == X86_FRAME_POINTER_REG);
+					// The true index reg is in the lowest three bits, while the next three
+					//  bits must contain a 5 (register EBP) and we want to make them a 4 (ESP).
+					//  We must swap base and index regs as we normalize (see explanation above).
+					char SIBtemp = DefOp.sib;
+					char SIBindex = SIBtemp & 0x38;
+					char SIBbase = SIBtemp & 0x07;
+					assert ((SIBindex >> 3) == 5); // must be EBP
+					SIBtemp &= 0xa0; // zero out lower 6 bits; upper 2 bits are scale factor - leave them alone
+					SIBtemp &= (SIBbase << 3); // make old base reg (e.g. ebx) into a proper index reg
+					SIBtemp |= 0x04; // make the new base reg be 4 (reg ESP)
+					DefOp.sib = SIBtemp;
+				}
+				this->SetFPNormalizedToSP();
+				// Add the stack pointer to the USE set for the instruction.
+				this->MDAddRegUse(X86_STACK_POINTER_REG, false);
+			}
+		}
+
+		else if (DefOp.reg == MD_FRAME_POINTER_REG) {
 			// If FPDelta is -4 and SignedOffset is +8, then we have [ebp+8] as DefOp, and this
 			//  is equivalent to [esp+4] where esp has its entry value, i.e. this would be the first incoming
 			//  argument. If SignedOffset is -12, we have [ebp-12] as DefOp, and this is [esp-16] when
@@ -1803,6 +1989,9 @@ bool SMPInstr::MDComputeNormalizedDataFlowOp(bool UseFP, sval_t FPDelta, op_t &D
 			// Now, we simply convert the memory operand from EBP to ESP and replace the SignedOffset with the
 			//  NormalizedDelta just computed.
 			DefOp.reg = MD_STACK_POINTER_REG;
+			this->SetFPNormalizedToSP();
+			// Add the stack pointer to the USE set for the instruction.
+			this->MDAddRegUse(DefOp.reg, false);
 		}
 		else {
 			assert(DefOp.reg == MD_STACK_POINTER_REG);
@@ -1812,6 +2001,13 @@ bool SMPInstr::MDComputeNormalizedDataFlowOp(bool UseFP, sval_t FPDelta, op_t &D
 			NormalizedDelta = this->GetStackPtrOffset() + (sval_t) SignedOffset;
 		}
 		DefOp.addr = (ea_t) NormalizedDelta; // common to frame and stack pointer cases
+		if ((o_phrase == DefOp.type) && (0 != NormalizedDelta)) {
+			// mov [esp],eax has an [esp] operand of type o_phrase, because there is no
+			//  displacement field. After normalization, it will have a displacement field, so
+			//  it has become an operand like [esp-32] and is now type o_displ.
+			DefOp.type = o_displ;
+		}
+		this->GetBlock()->GetFunc()->AddNormalizedStackOperand(OldOp, this->GetAddr(), DefOp);
 		return true;
 	}
 	else {
@@ -1821,9 +2017,13 @@ bool SMPInstr::MDComputeNormalizedDataFlowOp(bool UseFP, sval_t FPDelta, op_t &D
 
 // Normalize stack operands in all DEFs and USEs to have stack deltas relative to the function entry stack pointer.
 // Return true if any stack DEFs or USEs were normalized.
-bool SMPInstr::MDNormalizeStackOps(bool UseFP, sval_t FPDelta, bool Recomputing) {
+bool SMPInstr::MDNormalizeStackOps(bool UseFP, sval_t FPDelta, bool Recomputing, sval_t DeltaIncrement) {
 	bool StackOpFound = false;
 	bool OpNormalized;
+	bool UniqueDEFMemOp = true; // Does DEFMemOp not match any DEFs?
+	bool UniqueUSEMemOp = true; // Does USEMemOp not match any USEs?
+	bool UniqueLeaUSEMemOp = true; // Does LeaUSEMemOp not match any USEs?
+	bool UniqueMoveSource = true; // Does MoveSource not match any USEs?
 	set<DefOrUse, LessDefUse>::iterator DefIter, UseIter;
 	list<pair<set<DefOrUse, LessDefUse>::iterator, op_t> > DefWorkList, UseWorkList;
 	list<pair<set<DefOrUse, LessDefUse>::iterator, op_t> >::iterator WorkIter;
@@ -1836,10 +2036,18 @@ bool SMPInstr::MDNormalizeStackOps(bool UseFP, sval_t FPDelta, bool Recomputing)
 	for (DefIter = this->GetFirstDef(); DefIter != this->GetLastDef(); ++DefIter) {
 		OldOp = DefIter->GetOp();
 		NewOp = OldOp;
-		if (o_reg != NewOp.type) {
-			OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, NewOp);
+		if ((o_reg != NewOp.type) && (o_imm != NewOp.type)) {
+			if (Recomputing) {
+				OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, true, NewOp);
+			}
+			else {
+				OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, NewOp);
+			}
 			if (OpNormalized) {
 				StackOpFound = true;
+				if (IsEqOp(OldOp, this->DEFMemOp)) {
+					UniqueDEFMemOp = false;
+				}
 				pair<set<DefOrUse, LessDefUse>::iterator, op_t> DefItem(DefIter, NewOp);
 				DefWorkList.push_back(DefItem);
 			}
@@ -1848,17 +2056,40 @@ bool SMPInstr::MDNormalizeStackOps(bool UseFP, sval_t FPDelta, bool Recomputing)
 	// Now go through the DEF worklist and change stack operands to normalized stack operands.
 	for (WorkIter = DefWorkList.begin(); WorkIter != DefWorkList.end(); ++WorkIter) {
 		DefIter = WorkIter->first;
-		DefIter = this->Defs.SetOp(DefIter, NewOp);
+		DefIter = this->Defs.SetOp(DefIter, WorkIter->second);
+	}
+	// Normalize op_t private data member DEFs.
+	if (Recomputing) {
+		OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, UniqueDEFMemOp, this->DEFMemOp);
+	}
+	else {
+		OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, this->DEFMemOp);
 	}
+	// Declare victory.
+	this->SetDefsNormalized();
 
 	// Find all USEs that need changing, and build a second work list.
 	for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) {
 		OldOp = UseIter->GetOp();
 		NewOp = OldOp;
-		if (o_reg != NewOp.type) {
-			OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, NewOp);
+		if ((o_reg != NewOp.type) && (o_imm != NewOp.type)) {
+			if (Recomputing) {
+				OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, true, NewOp);
+			}
+			else {
+				OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, NewOp);
+			}
 			if (OpNormalized) {
 				StackOpFound = true;
+				if (IsEqOp(OldOp, this->USEMemOp)) {
+					UniqueUSEMemOp = false;
+				}
+				if (IsEqOp(OldOp, this->LeaUSEMemOp)) {
+					UniqueLeaUSEMemOp = false;
+				}
+				if (IsEqOp(OldOp, this->MoveSource)) {
+					UniqueMoveSource = false;
+				}
 				pair<set<DefOrUse, LessDefUse>::iterator, op_t> UseItem(UseIter, NewOp);
 				UseWorkList.push_back(UseItem);
 			}
@@ -1868,12 +2099,94 @@ bool SMPInstr::MDNormalizeStackOps(bool UseFP, sval_t FPDelta, bool Recomputing)
 	// Now go through the USE worklist and change stack operands to normalized stack operands.
 	for (WorkIter = UseWorkList.begin(); WorkIter != UseWorkList.end(); ++WorkIter) {
 		UseIter = WorkIter->first;
-		UseIter = this->Uses.SetOp(UseIter, NewOp);
+		UseIter = this->Uses.SetOp(UseIter, WorkIter->second);
+	}
+	// Normalize op_t private data member USEs.
+	if (Recomputing) {
+		OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, UniqueUSEMemOp, this->USEMemOp);
+		OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, UniqueLeaUSEMemOp, this->LeaUSEMemOp);
+		OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, UniqueMoveSource, this->MoveSource);
 	}
+	else {
+		OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, this->USEMemOp);
+		OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, this->LeaUSEMemOp);
+		OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, this->MoveSource);
+	}
+	// Declare victory.
+	this->SetUsesNormalized();
 
 	return StackOpFound;
 } // end of SMPInstr::MDNormalizeStackOps()
 
+// Renormalize SP-relative stack operands in functions that call alloca() by adding DeltaIncrement to their stack displacements.
+// DefOp comes in with the operand to be renormalized, and contains the normalized operand upon return.
+// Return true if operand is a register or stack location, false otherwise (true => include in data flow analysis sets and SSA.)
+bool SMPInstr::MDRecomputeNormalizedDataFlowOp(sval_t DeltaIncrement, bool UpdateMaps, op_t &DefOp) {
+	op_t OldOp = DefOp;
+	if (o_reg == DefOp.type) {
+		return true;
+	}
+	else if (MDIsStackAccessOpnd(DefOp, this->GetBlock()->GetFunc()->UsesFramePointer())) {
+		if (this->HasFPNormalizedToSP()) {
+			// FP-relative operands do no change in alloca() functions when the alloca()
+			//  causes the SP to change.
+			return true;
+		}
+
+		// The remaining cases are simple. The ESP-relative displacement is incremented by
+		//  DeltaIncrement, regardless of the presence of a SIB byte.
+		int SignedOffset = (int) DefOp.addr;
+		sval_t NormalizedDelta = DeltaIncrement + (sval_t) SignedOffset;
+
+		DefOp.addr = (ea_t) NormalizedDelta;
+
+		if ((o_phrase == DefOp.type) && (0 != NormalizedDelta)) {
+			// mov [esp],eax has an [esp] operand of type o_phrase, because there is no
+			//  displacement field. After normalization, it will have a displacement field, so
+			//  it has become an operand like [esp-32] and is now type o_displ.
+			DefOp.type = o_displ;
+		}
+
+		if (UpdateMaps) { // We don't update maps for duplicate entries, e.g. USEMemOp, DEFMemOp, MoveSource
+			this->GetBlock()->GetFunc()->AddNormalizedStackOperand(OldOp, this->GetAddr(), DefOp);
+		}
+		return true;
+	}
+	else {
+		return false;
+	}
+} // end of SMPInstr::MDRecomputeNormalizedDataFlowOp()
+
+// If NormOp is a normalized stack memory operand, unnormalize it.
+void SMPInstr::MDGetUnnormalizedOp(op_t &NormOp) {
+	sval_t SignedOffset;
+	bool UseFP = this->GetBlock()->GetFunc()->UsesFramePointer();
+	if (this->AreDefsNormalized() && MDIsStackAccessOpnd(NormOp, UseFP)) {
+		if (this->HasFPNormalizedToSP()) {
+			// Need to convert NormOp back to frame-pointer-relative address.
+			if (NormOp.hasSIB) {
+				// Convert base register from stack pointer back to frame pointer.
+				NormOp.sib |= 0x01;
+			}
+			else {
+				NormOp.reg = MD_FRAME_POINTER_REG;
+			}
+			SignedOffset = (sval_t) NormOp.addr;
+			SignedOffset -= this->GetBlock()->GetFunc()->GetFramePtrStackDelta();
+		}
+		else {
+			// NormOp should remain stack-pointer-relative address, but it
+			//  should be a positive offset from the current stack pointer instead
+			//  of a negative offset from the entry point of the function.
+			SignedOffset = (sval_t) NormOp.addr;
+			SignedOffset -= this->GetStackPtrOffset();
+			assert(0 <= SignedOffset);
+		}
+		NormOp.addr = (ea_t) SignedOffset;
+	}
+	return;
+} // end of SMPInstr::MDGetUnnormalizedOp()
+
 // Find USE-not-DEF operand that is not the flags register.
 op_t SMPInstr::GetSourceOnlyOperand(void) {
 	size_t OpNum;
@@ -1974,11 +2287,6 @@ void SMPInstr::BuildSMPDefUseLists(void) {
 					PrintOperand(TempOp);
 					SMP_msg("\n");
 				}
-				if (o_reg == TempOp.type) {
-					// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
-					//  analysis and type inference systems.
-					TempOp.reg = MDCanonicalizeSubReg(TempOp.reg);
-				}
 				this->Defs.SetRef(TempOp);
 			}
 		}
@@ -2018,11 +2326,6 @@ void SMPInstr::BuildSMPDefUseLists(void) {
 					PrintOperand(TempOp);
 					SMP_msg("\n");
 				}
-				if (o_reg == TempOp.type) {
-					// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
-					//  analysis and type inference systems.
-					TempOp.reg = MDCanonicalizeSubReg(TempOp.reg);
-				}
 				this->Uses.SetRef(TempOp);
 			}
 		}
@@ -2031,6 +2334,28 @@ void SMPInstr::BuildSMPDefUseLists(void) {
 	return;
 } // end of SMPInstr::BuildSMPDefUseLists()
 
+// Declare a branch/jump to be a tail call, clean up def/use lists.
+void SMPInstr::SetTailCall(void) { 
+	this->booleans1 |= INSTR_SET_TAIL_CALL;
+	if (this->type == COND_BRANCH) {
+		this->SetCondTailCall();
+	}
+	else {
+		this->ResetCondTailCall();
+	}
+	this->CallTarget = this->FarBranchTarget;
+	this->type = RETURN;
+	this->GetBlock()->SetReturns(true);
+
+	// We want to add the caller-saved registers to the USEs and DEFs lists
+	this->MDAddRegDef(R_ax, false);
+	this->MDAddRegDef(R_cx, false);
+	this->MDAddRegDef(R_dx, false);
+	this->MDAddRegUse(R_ax, false);
+	this->MDAddRegUse(R_cx, false);
+	this->MDAddRegUse(R_dx, false);
+} // end of SMPInstr::SetTailCall()
+
 // If DefReg is not already in the DEF list, add a DEF for it.
 void SMPInstr::MDAddRegDef(ushort DefReg, bool Shown, SMPOperandType Type) {
 	op_t TempDef = InitOp;
@@ -2104,9 +2429,6 @@ void SMPInstr::MDFixupDefUseLists(void) {
 				IndexOpnd.reg = (ushort) IndexReg;
 				IndexOpnd.hasSIB = 0;
 				IndexOpnd.set_showed();
-				// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
-				//  analysis and type inference systems.
-				IndexOpnd.reg = MDCanonicalizeSubReg(IndexOpnd.reg);
 				if (0 == ScaleFactor)
 					this->Uses.SetRef(IndexOpnd);
 				else { // scaling == shift ==> NUMERIC
@@ -2120,9 +2442,6 @@ void SMPInstr::MDFixupDefUseLists(void) {
 				BaseOpnd.reg = (ushort) BaseReg;
 				BaseOpnd.hasSIB = 0;
 				BaseOpnd.set_showed();
-				// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
-				//  analysis and type inference systems.
-				BaseOpnd.reg = MDCanonicalizeSubReg(BaseOpnd.reg);
 				RefType = UNINIT;
 #if SMP_BASEREG_POINTER_TYPE
 				// R_sp and R_bp will get type STACKPTR in SMPInstr::SetImmedTypes().
@@ -2140,7 +2459,7 @@ void SMPInstr::MDFixupDefUseLists(void) {
 				//  could have memaddr+indexreg or basereg+offset, depending on what
 				//  the displacement is. The exception is if there is no offset and only
 				//  one addressing register, e.g. mov eax,[ebx].
-				if (BaseOpnd.is_reg(R_sp) || (UseFP && BaseOpnd.is_reg(R_bp))
+				if (BaseOpnd.is_reg(MD_STACK_POINTER_REG) || (UseFP && BaseOpnd.is_reg(MD_FRAME_POINTER_REG))
 					|| leaInst || (!HasIndexReg && !SingleAddressReg)) {
 					;
 				}
@@ -2358,6 +2677,9 @@ void SMPInstr::MDFixupDefUseLists(void) {
 		this->Defs.clear();
 		this->Uses.clear();
 		this->MoveSource = InitOp;
+		this->DEFMemOp = InitOp;
+		this->USEMemOp = InitOp;
+		this->LeaUSEMemOp = InitOp;
 		this->OptType = 1;
 	}
 #endif
@@ -2396,6 +2718,7 @@ bool SMPInstr::MDFindPointerUse(op_t MemOp, bool UseFP) {
 	if (R_none != IndexReg) {
 		IndexOp.type = o_reg;
 		IndexOp.reg = MDCanonicalizeSubReg((ushort) IndexReg);
+		IndexOp.dtyp = dt_dword; // Canonical 32-bit width
 		IndexIter = this->FindUse(IndexOp);
 		assert(IndexIter != this->GetLastUse());
 		IndexType = IndexIter->GetType();
@@ -2403,6 +2726,7 @@ bool SMPInstr::MDFindPointerUse(op_t MemOp, bool UseFP) {
 	if (R_none != BaseReg) {
 		BaseOp.type = o_reg;
 		BaseOp.reg = MDCanonicalizeSubReg((ushort) BaseReg);
+		BaseOp.dtyp = dt_dword; // Canonical 32-bit width
 		BaseIter = this->FindUse(BaseOp);
 		assert(BaseIter != this->GetLastUse());
 		BaseType = BaseIter->GetType();
@@ -2564,7 +2888,7 @@ bool SMPInstr::AllUSEsTyped(void) {
 	return (!FoundUNINIT);
 } // end of SMPInstr::AllUSEsTyped()
 
-// UseOp is a USE reg, not just an address reg in a memory USE
+// Return true if UseOp is a USE reg, not just an address reg in a memory USE
 bool SMPInstr::IsNonAddressReg(op_t UseOp) const { 
 	bool FoundUse = false;
 	ushort SearchReg = MDCanonicalizeSubReg(UseOp.reg);
@@ -2864,7 +3188,7 @@ void SMPInstr::MDSetWidthSignInfo(bool UseFP) {
 				for (DefIter = this->GetFirstDef(); DefIter != this->GetLastDef(); ++DefIter) {
 					DefOp = DefIter->GetOp();
 					if (o_reg == DefOp.type) {
-						DefOp.reg = MDCanonicalizeSubReg(DefOp.reg);
+						CanonicalizeOpnd(DefOp);
 						TempSign = FGEntry.SignMiscInfo & FG_MASK_SIGNEDNESS_BITS; // Get both sign bit flags
 						DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum());
 						if (this->BasicBlock->IsLocalName(DefOp)) {
@@ -2893,7 +3217,7 @@ void SMPInstr::MDSetWidthSignInfo(bool UseFP) {
 			//  in case other architectures are odd.
 			DefOp = DefIter->GetOp();
 			if (!(IsMemOperand(DefOp) || MDIsFlagsReg(DefOp))) {
-				DefOp.reg = MDCanonicalizeSubReg(DefOp.reg);
+				CanonicalizeOpnd(DefOp);
 				DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum());
 				if (this->BasicBlock->IsLocalName(DefOp)) {
 					this->BasicBlock->UpdateDefSignMiscInfo(DefHashValue, SignMask);
@@ -2917,7 +3241,7 @@ void SMPInstr::MDSetWidthSignInfo(bool UseFP) {
 				// All non-memory USEs besides the flags register should get the new SignMask ORed in.
 				UseOp = UseIter->GetOp();
 				if (!(IsMemOperand(UseOp) || MDIsFlagsReg(UseOp))) {
-					UseOp.reg = MDCanonicalizeSubReg(UseOp.reg);
+					CanonicalizeOpnd(UseOp);
 					UseHashValue = HashGlobalNameAndSSA(UseOp, UseIter->GetSSANum());
 					if (this->BasicBlock->IsLocalName(UseOp)) {
 						this->BasicBlock->UpdateUseSignMiscInfo(UseHashValue, SignMask);
@@ -2948,7 +3272,7 @@ void SMPInstr::MDSetWidthSignInfo(bool UseFP) {
 				// All DEFs besides the flags register should get the new SignMask ORed in.
 				DefOp = DefIter->GetOp();
 				if ((DefOp.type == o_reg) && (!(DefOp.is_reg(X86_FLAGS_REG)))) {
-					DefOp.reg = MDCanonicalizeSubReg(DefOp.reg);
+					CanonicalizeOpnd(DefOp);
 					DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum());
 					if (this->BasicBlock->IsLocalName(DefOp)) {
 						this->BasicBlock->UpdateDefSignMiscInfo(DefHashValue, SignMask);
@@ -2965,7 +3289,7 @@ void SMPInstr::MDSetWidthSignInfo(bool UseFP) {
 				// All USEs besides the flags register should get the new SignMask ORed in.
 				UseOp = UseIter->GetOp();
 				if ((UseOp.type == o_reg) && (!(UseOp.is_reg(X86_FLAGS_REG)))) {
-					UseOp.reg = MDCanonicalizeSubReg(UseOp.reg);
+					CanonicalizeOpnd(UseOp);
 					UseHashValue = HashGlobalNameAndSSA(UseOp, UseIter->GetSSANum());
 					if (this->BasicBlock->IsLocalName(UseOp)) {
 						this->BasicBlock->UpdateUseSignMiscInfo(UseHashValue, SignMask);
@@ -3030,14 +3354,12 @@ void SMPInstr::MDSetWidthSignInfo(bool UseFP) {
 		assert(DefIter != this->GetLastDef());
 		DefOp = DefIter->GetOp();
 		assert(o_reg == DefOp.type);
-		DefOp.reg = MDCanonicalizeSubReg(DefOp.reg);
 		SSANum =  DefIter->GetSSANum();
 		DefHashValue = HashGlobalNameAndSSA(DefOp, SSANum);
 		UseIter = this->GetFirstUse();
 		assert(UseIter != this->GetLastUse());
 		UseOp = UseIter->GetOp();
 		assert(o_reg == UseOp.type);
-		UseOp.reg = MDCanonicalizeSubReg(UseOp.reg);
 		assert(UseOp.reg == DefOp.reg);
 		UseHashValue = HashGlobalNameAndSSA(UseOp, UseIter->GetSSANum()); 
 		SignMask = FG_MASK_SIGNED;  // opcodes do sign extension => signed
@@ -3068,7 +3390,7 @@ void SMPInstr::MDSetWidthSignInfo(bool UseFP) {
 		if (!case2) {
 			if (MDIsGeneralPurposeReg(DefOp)) {
 				WidthMask = ComputeOperandBitWidthMask(DefOp, 0);
-				DefOp.reg = MDCanonicalizeSubReg(DefOp.reg);
+				CanonicalizeOpnd(DefOp);
 				DefIter = this->FindDef(DefOp);
 				assert(DefIter != this->GetLastDef());
 				DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum());
@@ -3151,7 +3473,7 @@ void SMPInstr::SetRTLUseOpRegWidthInfo(op_t UseOp) {
 
 	if (MDIsGeneralPurposeReg(UseOp)) {
 		WidthMask = ComputeOperandBitWidthMask(UseOp, 0);
-		UseOp.reg = MDCanonicalizeSubReg(UseOp.reg);
+		CanonicalizeOpnd(UseOp);
 		UseIter = this->FindUse(UseOp);
 		assert(UseIter != this->GetLastUse());
 		UseHashValue = HashGlobalNameAndSSA(UseOp, UseIter->GetSSANum());
@@ -3225,7 +3547,7 @@ bool SMPInstr::IsBenignTruncation(void) {
 			assert(o_void != UseOp.type);
 			SearchOp = UseOp;
 			if (o_reg == UseOp.type) {
-				SearchOp.reg = MDCanonicalizeSubReg(UseOp.reg);
+				CanonicalizeOpnd(SearchOp);
 			}
 			set<DefOrUse, LessDefUse>::iterator UseIter = this->FindUse(SearchOp);
 			assert(UseIter != this->GetLastUse());
@@ -3349,7 +3671,7 @@ bool SMPInstr::MDIsArgumentPass(void) {
 
 // Trace UseOp through register moves back to its stack location or immediate value source.
 //  Return true if we are passing an immediate or stack location back in UltSource.
-bool SMPInstr::TraceUltimateMoveSource(op_t UseOp, int UseSSANum, op_t &UltSource) {
+bool SMPInstr::TraceUltimateMoveSource(op_t UseOp, int UseSSANum, op_t &UltSource, bool &FPRelative) {
 	// If we hit an immediate value or a stack location, we are done.
 	bool UseFP = this->GetBlock()->GetFunc()->UsesFramePointer();
 	op_t NewUseOp; // next UseOp up the move chain
@@ -3382,6 +3704,7 @@ bool SMPInstr::TraceUltimateMoveSource(op_t UseOp, int UseSSANum, op_t &UltSourc
 		// If it is a stack location being loaded, trace succeeded, else it failed.
 		if (StackOp) {
 			UltSource = UseOp;
+			FPRelative = this->HasFPNormalizedToSP();
 			return true;
 		}
 		else {
@@ -3423,6 +3746,7 @@ bool SMPInstr::TraceUltimateMoveSource(op_t UseOp, int UseSSANum, op_t &UltSourc
 		NewUseOp = DefInst->GetLeaMemUseOp();
 		if (MDIsStackAccessOpnd(NewUseOp, UseFP)) {
 			UltSource = NewUseOp;
+			FPRelative = DefInst->HasFPNormalizedToSP();
 			return true;
 		}
 		else {
@@ -3467,6 +3791,7 @@ bool SMPInstr::TraceUltimateMoveSource(op_t UseOp, int UseSSANum, op_t &UltSourc
 				//  addition of 1000 as we move up in the stack frame.
 				NewUseOp.addr += ImmOp.value; // perform the address arithmetic addition
 				UltSource = NewUseOp;
+				FPRelative = NewDefInst->HasFPNormalizedToSP();
 				return true;
 			}
 			else {
@@ -3487,7 +3812,7 @@ bool SMPInstr::TraceUltimateMoveSource(op_t UseOp, int UseSSANum, op_t &UltSourc
 	assert(UseIter != DefInst->GetLastUse());
 	NewUseSSANum = UseIter->GetSSANum();  // unused for immediates, used for regs and stack
 	// Recurse
-	return DefInst->TraceUltimateMoveSource(NewUseOp, NewUseSSANum, UltSource);
+	return DefInst->TraceUltimateMoveSource(NewUseOp, NewUseSSANum, UltSource, FPRelative);
 
 } // end of SMPInstr::TraceUltimateMoveSource()
 
@@ -4751,7 +5076,7 @@ bool SMPInstr::InitFGInfoFromOperator(SMPoperator CurrOp, struct FineGrainedInfo
 // Helper to take USE operand, find its SSANum, and return its UseHashValue.
 int SMPInstr::GetUseOpHashAndSSA(op_t UseOp, int &SSANum) {
 	op_t SearchOp = UseOp;
-	SearchOp.reg = MDCanonicalizeSubReg(UseOp.reg);
+	CanonicalizeOpnd(SearchOp);
 	set<DefOrUse, LessDefUse>::iterator UseIter = this->FindUse(SearchOp);
 	assert(UseIter != this->GetLastUse());
 	SSANum = UseIter->GetSSANum();
@@ -4762,7 +5087,7 @@ int SMPInstr::GetUseOpHashAndSSA(op_t UseOp, int &SSANum) {
 // Helper to take DEF operand, find its SSANum, and return its DefHashValue.
 int SMPInstr::GetDefOpHashAndSSA(op_t DefOp, int &SSANum) {
 	op_t SearchOp = DefOp;
-	SearchOp.reg = MDCanonicalizeSubReg(DefOp.reg);
+	CanonicalizeOpnd(SearchOp);
 	set<DefOrUse, LessDefUse>::iterator DefIter = this->FindDef(SearchOp);
 	assert(DefIter != this->GetLastDef());
 	SSANum = DefIter->GetSSANum();
@@ -5153,6 +5478,7 @@ void SMPInstr::MDAnnotateSIBStackConstants(FILE *AnnotFile, op_t Opnd, ea_t offs
 	ea_t displacement;
 	ushort ScaleFactor;
 	char *disasm = DisAsmText.GetDisAsm(this->GetAddr());
+	int SignedOffset = (int) offset;
 
 	MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, displacement);
 
@@ -5160,13 +5486,13 @@ void SMPInstr::MDAnnotateSIBStackConstants(FILE *AnnotFile, op_t Opnd, ea_t offs
 		// ESP-relative constant offset
 		SMP_fprintf(AnnotFile,
 				"%10x %6d PTRIMMEDESP STACK %d displ %s\n",
-				this->SMPcmd.ea, this->SMPcmd.size, offset, disasm);
+				this->SMPcmd.ea, this->SMPcmd.size, SignedOffset, disasm);
 	}
 	else if (UseFP && ((IndexReg == R_bp) || (BaseReg == R_bp))) {
 		// EBP-relative constant offset
 		SMP_fprintf(AnnotFile,
 				"%10x %6d PTRIMMEDEBP STACK %d displ %s\n",
-				this->SMPcmd.ea, this->SMPcmd.size, offset, disasm);
+				this->SMPcmd.ea, this->SMPcmd.size, SignedOffset, disasm);
 	}
 
 	return;
@@ -5182,6 +5508,7 @@ void SMPInstr::AnnotateStackConstants(bool UseFP, FILE *AnnotFile) {
 	int IndexReg;
 	ushort ScaleFactor;
 	char *disasm = DisAsmText.GetDisAsm(this->GetAddr());
+	int SignedOffset;
 
 #if 0
 	if (this->address == 0x80925f4) {
@@ -5193,6 +5520,7 @@ void SMPInstr::AnnotateStackConstants(bool UseFP, FILE *AnnotFile) {
 		Opnd = this->SMPcmd.Operands[i];
 		if ((Opnd.type == o_displ) || (Opnd.type == o_phrase))
 			MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, offset);
+		SignedOffset = (int) offset;
 
 		if (Opnd.type == o_displ) {
 			if (Opnd.hasSIB) {
@@ -5203,13 +5531,13 @@ void SMPInstr::AnnotateStackConstants(bool UseFP, FILE *AnnotFile) {
 					// ESP-relative constant offset
 					SMP_fprintf(AnnotFile,
 							"%10x %6d PTRIMMEDESP STACK %d displ %s\n",
-							this->SMPcmd.ea, this->SMPcmd.size, offset, disasm);
+							this->SMPcmd.ea, this->SMPcmd.size, SignedOffset, disasm);
 				}
 				else if (UseFP && (BaseReg == R_bp)) {
 					// EBP-relative constant offset
 					SMP_fprintf(AnnotFile,
 							"%10x %6d PTRIMMEDEBP STACK %d displ %s\n",
-							this->SMPcmd.ea, this->SMPcmd.size, offset, disasm);
+							this->SMPcmd.ea, this->SMPcmd.size, SignedOffset, disasm);
 				}
 			} // end if (Opnd.hasSIB) ... else ...
 		} // end if (Opnd.type == o_displ) 
@@ -5223,13 +5551,13 @@ void SMPInstr::AnnotateStackConstants(bool UseFP, FILE *AnnotFile) {
 					// ESP-relative constant offset
 					SMP_fprintf(AnnotFile,
 							"%10x %6d PTRIMMEDESP STACK %d displ %s\n",
-							this->SMPcmd.ea, this->SMPcmd.size, offset, disasm);
+							this->SMPcmd.ea, this->SMPcmd.size, SignedOffset, disasm);
 				}
 				else if (UseFP && (BaseReg == R_bp)) {
 					// EBP-relative constant offset
 					SMP_fprintf(AnnotFile,
 							"%10x %6d PTRIMMEDEBP STACK %d displ %s\n",
-							this->SMPcmd.ea, this->SMPcmd.size, offset, disasm);
+							this->SMPcmd.ea, this->SMPcmd.size, SignedOffset, disasm);
 				}
 			} // end if (Opnd.hasSIB) ... else ...
 		} // end else if (Opnd.type == o_phrase)
@@ -5291,15 +5619,21 @@ void SMPInstr::EmitAnnotations(bool UseFP, bool AllocSeen, bool NeedsFrame, FILE
 	if (MemSrc) {
 		op_t MemSrcOp = this->MDGetMemUseOp();
 		size_t SrcBitWidth = 8 * GetOpDataSize(MemSrcOp);
+		op_t AnnotDefOp = MemSrcOp;
+		// Need to unnormalize stack memory DEFs and USEs before printing annotations.
+		this->MDGetUnnormalizedOp(AnnotDefOp);
 		SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR MEMSRC %d", addr, this->SMPcmd.size, SrcBitWidth);
-		AnnotPrintOperand(MemSrcOp, InfoAnnotFile);
+		AnnotPrintOperand(AnnotDefOp, InfoAnnotFile);
 		SMP_fprintf(InfoAnnotFile, " ZZ %s \n", disasm);
 	}
 	if (MemDest) {
 		op_t MemDestOp = this->MDGetMemDefOp();
 		size_t DestBitWidth = 8 * GetOpDataSize(MemDestOp);
+		op_t AnnotDefOp = MemDestOp;
+		// Need to unnormalize stack memory DEFs and USEs before printing annotations.
+		this->MDGetUnnormalizedOp(AnnotDefOp);
 		SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR MEMDEF %d", addr, this->SMPcmd.size, DestBitWidth);
-		AnnotPrintOperand(MemDestOp, InfoAnnotFile);
+		AnnotPrintOperand(AnnotDefOp, InfoAnnotFile);
 		SMP_fprintf(InfoAnnotFile, " ZZ %s \n", disasm);
 	}
 #endif
@@ -5552,15 +5886,21 @@ void SMPInstr::EmitTypeAnnotations(bool UseFP, bool AllocSeen, bool NeedsFrame,
 	if (MemSrc) {
 		op_t MemSrcOp = this->MDGetMemUseOp();
 		size_t SrcBitWidth = 8 * GetOpDataSize(MemSrcOp);
+		op_t AnnotDefOp = MemSrcOp;
+		// Need to unnormalize stack memory DEFs and USEs before printing annotations.
+		this->MDGetUnnormalizedOp(AnnotDefOp);
 		SMP_fprintf(InfoAnnotFile, "%10x %6zu INSTR MEMSRC %zu", addr, this->SMPcmd.size, SrcBitWidth);
-		AnnotPrintOperand(MemSrcOp, InfoAnnotFile);
+		AnnotPrintOperand(AnnotDefOp, InfoAnnotFile);
 		SMP_fprintf(InfoAnnotFile, " ZZ %s \n", disasm);
 	}
 	if (MemDest) {
 		op_t MemDestOp = this->MDGetMemDefOp();
 		size_t DestBitWidth = 8 * GetOpDataSize(MemDestOp);
+		op_t AnnotDefOp = MemDestOp;
+		// Need to unnormalize stack memory DEFs and USEs before printing annotations.
+		this->MDGetUnnormalizedOp(AnnotDefOp);
 		SMP_fprintf(InfoAnnotFile, "%10x %6zu INSTR MEMDEF %zu", addr, this->SMPcmd.size, DestBitWidth);
-		AnnotPrintOperand(MemDestOp, InfoAnnotFile);
+		AnnotPrintOperand(AnnotDefOp, InfoAnnotFile);
 		SMP_fprintf(InfoAnnotFile, " ZZ %s \n", disasm);
 	}
 #endif
@@ -5851,6 +6191,7 @@ void SMPInstr::EmitTypeAnnotations(bool UseFP, bool AllocSeen, bool NeedsFrame,
 				addr, this->SMPcmd.size, ChildOffset, ChildSize, disasm);
 		}
 #if SMP_IDENTIFY_POINTER_ADDRESS_REG
+		// WARNING: This old code was written prior to the normalization of stack operands.
 		if (MemDest) {
 			assert(this->HasDestMemoryOperand());
 			set<DefOrUse, LessDefUse>::iterator PtrUse;
@@ -6028,7 +6369,12 @@ void SMPInstr::EmitIntegerErrorAnnotations(FILE *InfoAnnotFile) {
 						SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR CHECK UNDERFLOW %s %zu ",
 							this->address, this->SMPcmd.size, SignednessStrings[DefSignMask], DefBitWidth);
 					}
-					AnnotPrintOperand(DefOp, InfoAnnotFile);
+					op_t AnnotDefOp = DefOp;
+					if (o_reg != DefOp.type) {
+						// Need to unnormalize stack memory DEFs and USEs before printing annotations.
+						this->MDGetUnnormalizedOp(AnnotDefOp);
+					}
+					AnnotPrintOperand(AnnotDefOp, InfoAnnotFile);
 					string SinkString("");
 					if (this->GetBlock()->GetFunc()->HasIntErrorCallSink(DefOp, SSANum, this->address, SinkString)) {
 						SMP_fprintf(InfoAnnotFile, " ZZ %s %s \n", SinkString.c_str(), disasm);
@@ -6203,7 +6549,7 @@ void SMPInstr::EmitIntegerErrorAnnotations(FILE *InfoAnnotFile) {
 		//  truncation on this store.
 		op_t SearchOp = UseOp;
 		// Canonicalize sub-regs for searching DEFs and USEs.
-		SearchOp.reg = MDCanonicalizeSubReg(UseOp.reg);
+		CanonicalizeOpnd(SearchOp);
 		SearchOp.dtyp = dt_dword;
 		UseHashValue = HashGlobalNameAndSSA(SearchOp, UseIter->GetSSANum());
 
@@ -6265,8 +6611,7 @@ void SMPInstr::EmitIntegerErrorAnnotations(FILE *InfoAnnotFile) {
 		bool OutArgsWrite = false;
 		if (o_reg == DestSearchOp.type) {
 			StackDestination = false;
-			DestSearchOp.reg = MDCanonicalizeSubReg(DefOp.reg);
-			DestSearchOp.dtyp = dt_dword;
+			CanonicalizeOpnd(DestSearchOp);
 		}
 		else if (!(MDIsStackAccessOpnd(DefOp, UseFP))) {
 			// If destination of move is not a register and is not
@@ -6565,16 +6910,22 @@ void SMPInstr::EmitIntegerErrorAnnotations(FILE *InfoAnnotFile) {
 			op_t MemSetTarget;
 			size_t MemSetSize;
 			int StackOffset;
-			if (this->GetBlock()->AnalyzeMemSet(this->GetAddr(), MemSetTarget, MemSetSize, StackOffset)) {
+			bool FPRelativeTarget = false; // original target before stack delta normalization
+			if (this->GetBlock()->AnalyzeMemSet(this->GetAddr(), MemSetTarget, MemSetSize, StackOffset, FPRelativeTarget)) {
 				if (0 < MemSetSize) {
 					// Emit annotation.
-					if (0 > StackOffset) {
+					// NOTE: We want the unnormalized stack operand, so that Strata and SPRI will get
+					//  annotations that match what is seen in the assembly language.
+					if (FPRelativeTarget) {
 						// Must be negative offset from EBP.
+						assert(UseFP);
+						StackOffset -= (int) this->GetBlock()->GetFunc()->GetFramePtrStackDelta();
 						SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR MEMSET STACKOFFSET_EBP %d SIZE %zu ZZ %s \n",
 							this->address, this->SMPcmd.size, StackOffset, MemSetSize, disasm);
 					}
 					else {
 						// Must be non-negative offset from ESP.
+						StackOffset -= this->GetStackPtrOffset();
 						SMP_fprintf(InfoAnnotFile, "%10x %6d INSTR MEMSET STACKOFFSET_ESP %d SIZE %zu ZZ %s \n",
 							this->address, this->SMPcmd.size, StackOffset, MemSetSize, disasm);
 					}
@@ -6925,9 +7276,11 @@ bool SMPInstr::BuildUnaryRTL(SMPoperator UnaryOp) {
 		// Use of the floating register stack top is implicit
 		DestFound = true;
 		TempRT = new SMPRegTransfer;
+		TempRT->SetParentInst(this);
 		TempRT->SetLeftOperand(FPRegOp);
 		TempRT->SetOperator(SMP_ASSIGN);
 		SMPRegTransfer *RightRT = new SMPRegTransfer;
+		RightRT->SetParentInst(this);
 		RightRT->SetLeftOperand(FPRegOp);
 		RightRT->SetOperator(UnaryOp);
 		RightRT->SetRightOperand(VoidOp);
@@ -6940,9 +7293,11 @@ bool SMPInstr::BuildUnaryRTL(SMPoperator UnaryOp) {
 		// Flags register is implicit destination.
 		DestFound = true;
 		TempRT = new SMPRegTransfer;
+		TempRT->SetParentInst(this);
 		TempRT->SetLeftOperand(FlagsOp);
 		TempRT->SetOperator(SMP_ASSIGN);
 		SMPRegTransfer *RightRT = new SMPRegTransfer;
+		RightRT->SetParentInst(this);
 		if (NN_cmc == this->SMPcmd.itype) { // complement carry flag USEs old carry flag
 			RightRT->SetLeftOperand(FlagsOp);
 			RightRT->SetOperator(SMP_BITWISE_NOT);
@@ -6963,6 +7318,7 @@ bool SMPInstr::BuildUnaryRTL(SMPoperator UnaryOp) {
 			if (MDKnownOperandType(TempOp)) {
 				DestFound = true;
 				TempRT = new SMPRegTransfer;
+				TempRT->SetParentInst(this);
 				LeftOp = TempOp;
 
 				if (WidthDoubler) {
@@ -6992,6 +7348,7 @@ bool SMPInstr::BuildUnaryRTL(SMPoperator UnaryOp) {
 
 				TempRT->SetOperator(SMP_ASSIGN);
 				SMPRegTransfer *RightRT = new SMPRegTransfer;
+				RightRT->SetParentInst(this);
 				RightRT->SetLeftOperand(TempOp);
 				RightRT->SetOperator(UnaryOp);
 				RightRT->SetRightOperand(VoidOp);
@@ -7038,9 +7395,11 @@ bool SMPInstr::BuildUnaryTwoOperandRTL(SMPoperator UnaryOp) {
 
 	if (DestFound && SourceFound) {
 		TempRT = new SMPRegTransfer;
+		TempRT->SetParentInst(this);
 		TempRT->SetLeftOperand(DestOp);
 		TempRT->SetOperator(SMP_ASSIGN);
 		SMPRegTransfer *RightRT = new SMPRegTransfer;
+		RightRT->SetParentInst(this);
 		RightRT->SetLeftOperand(SrcOp);
 		RightRT->SetOperator(UnaryOp);
 		RightRT->SetRightOperand(VoidOp);
@@ -7070,8 +7429,10 @@ bool SMPInstr::BuildBinaryRTL(SMPoperator BinaryOp, bool HiddenFPStackOp) {
 	//  of the comparison.
 	bool SrcIsReallyDest = ((SMP_COMPARE_EQ_AND_SET == BinaryOp) 
 		|| (SMP_COMPARE_GT_AND_SET == BinaryOp));
+	bool StackPointerModification = false; // SP := SP SMP_BITWISE_AND operand
 	SMPRegTransfer *TempRT = NULL;
 	SMPRegTransfer *RightRT = new SMPRegTransfer;
+	RightRT->SetParentInst(this);
 
 	op_t VoidOp = InitOp;
 
@@ -7083,6 +7444,7 @@ bool SMPInstr::BuildBinaryRTL(SMPoperator BinaryOp, bool HiddenFPStackOp) {
 		// Use of the floating register stack top is implicit
 		DestFound = true;
 		TempRT = new SMPRegTransfer;
+		TempRT->SetParentInst(this);
 		TempRT->SetLeftOperand(FPRegOp);
 		TempRT->SetOperator(SMP_ASSIGN);
 		RightRT->SetLeftOperand(FPRegOp);
@@ -7101,6 +7463,7 @@ bool SMPInstr::BuildBinaryRTL(SMPoperator BinaryOp, bool HiddenFPStackOp) {
 				if (!MemDest || ((TempOp.type >= o_mem) && (TempOp.type <= o_displ))) {
 					DestFound = true;
 					TempRT = new SMPRegTransfer;
+					TempRT->SetParentInst(this);
 					TempRT->SetLeftOperand(TempOp);
 					TempRT->SetOperator(SMP_ASSIGN);
 					if (this->IsRegClearIdiom()) {
@@ -7114,6 +7477,9 @@ bool SMPInstr::BuildBinaryRTL(SMPoperator BinaryOp, bool HiddenFPStackOp) {
 						RightRT->SetLeftOperand(TempOp);
 						RightRT->SetOperator(BinaryOp);
 						TempRT->SetRightTree(RightRT);
+						if (TempOp.is_reg(MD_STACK_POINTER_REG) && (SMP_BITWISE_AND == BinaryOp)) {
+							StackPointerModification = true; // searching for SP := SP AND immediate
+						}
 					}
 				}
 				else {
@@ -7144,6 +7510,9 @@ bool SMPInstr::BuildBinaryRTL(SMPoperator BinaryOp, bool HiddenFPStackOp) {
 				if (!MemSrc || MemDest || ((TempOp.type >= o_mem) && (TempOp.type <= o_displ))) {
 					SourceFound = true;
 					RightRT->SetRightOperand(TempOp);
+					if (StackPointerModification && (o_imm == TempOp.type)) {
+						this->SetStackAlignmentInst();
+					}
 				}
 			}
 			if (!(this->features & UseMacros[OpNum])) {
@@ -7249,6 +7618,7 @@ bool SMPInstr::BuildLeaRTL(void) {
 		// Note that almost any combination of BaseReg, IndexReg, and offset can be present
 		//  or absent.
 		AssignRT = new SMPRegTransfer;
+		AssignRT->SetParentInst(this);
 		AssignRT->SetLeftOperand(DefOp);
 		AssignRT->SetOperator(SMP_ASSIGN);
 
@@ -7266,6 +7636,7 @@ bool SMPInstr::BuildLeaRTL(void) {
 		if (ScaledIndexReg) {
 			// First, build the subtree to scale the IndexReg.
 			SMPRegTransfer *MultRT = new SMPRegTransfer;
+			MultRT->SetParentInst(this);
 			MultRT->SetLeftOperand(IndexOp);
 			MultRT->SetOperator(SMP_U_LEFT_SHIFT);
 			MultRT->SetRightOperand(ScaleOp);
@@ -7273,12 +7644,14 @@ bool SMPInstr::BuildLeaRTL(void) {
 			if (0 != offset) {
 				// Add the offset to the scaled index subtree.
 				SMPRegTransfer *AddOffRT = new SMPRegTransfer;
+				AddOffRT->SetParentInst(this);
 				AddOffRT->SetLeftOperand(OffsetOp);
 				AddOffRT->SetOperator(SMP_ADD);
 				AddOffRT->SetRightTree(MultRT);
 				// Add a BaseReg, if any.
 				if (R_none != BaseReg) {
 					SMPRegTransfer *AddBaseRT = new SMPRegTransfer;
+					AddBaseRT->SetParentInst(this);
 					AddBaseRT->SetLeftOperand(BaseOp);
 					AddBaseRT->SetOperator(SMP_ADD);
 					AddBaseRT->SetRightTree(AddOffRT);
@@ -7293,6 +7666,7 @@ bool SMPInstr::BuildLeaRTL(void) {
 				// Add a BaseReg, if any.
 				if (R_none != BaseReg) {
 					SMPRegTransfer *AddBaseRT = new SMPRegTransfer;
+					AddBaseRT->SetParentInst(this);
 					AddBaseRT->SetLeftOperand(BaseOp);
 					AddBaseRT->SetOperator(SMP_ADD);
 					AddBaseRT->SetRightTree(MultRT);
@@ -7308,12 +7682,14 @@ bool SMPInstr::BuildLeaRTL(void) {
 			if (0 != offset) {
 				if (R_none != IndexReg) {
 					SMPRegTransfer *AddOffRT = new SMPRegTransfer;
+					AddOffRT->SetParentInst(this);
 					AddOffRT->SetLeftOperand(OffsetOp);
 					AddOffRT->SetOperator(SMP_ADD);
 					AddOffRT->SetRightOperand(IndexOp);
 					// Add BaseReg, if any.
 					if (R_none != BaseReg) {
 						SMPRegTransfer *AddBaseRT = new SMPRegTransfer;
+						AddBaseRT->SetParentInst(this);
 						AddBaseRT->SetLeftOperand(BaseOp);
 						AddBaseRT->SetOperator(SMP_ADD);
 						AddBaseRT->SetRightTree(AddOffRT);
@@ -7328,6 +7704,7 @@ bool SMPInstr::BuildLeaRTL(void) {
 					// Add BaseReg, if any.
 					if (R_none != BaseReg) {
 						SMPRegTransfer *AddBaseRT = new SMPRegTransfer;
+						AddBaseRT->SetParentInst(this);
 						AddBaseRT->SetLeftOperand(BaseOp);
 						AddBaseRT->SetOperator(SMP_ADD);
 						AddBaseRT->SetRightOperand(OffsetOp);
@@ -7356,6 +7733,7 @@ bool SMPInstr::BuildLeaRTL(void) {
 				}
 				else { // we have a BaseReg and an IndexReg, unscaled, no offset
 					SMPRegTransfer *AddBaseRT = new SMPRegTransfer;
+					AddBaseRT->SetParentInst(this);
 					AddBaseRT->SetLeftOperand(BaseOp);
 					AddBaseRT->SetOperator(SMP_ADD);
 					AddBaseRT->SetRightOperand(IndexOp);
@@ -7377,7 +7755,9 @@ bool SMPInstr::BuildDoubleShiftRTL(SMPoperator BinaryOp) {
 	bool CountFound = false;
 	SMPRegTransfer *TempRT = NULL;
 	SMPRegTransfer *RightRT = new SMPRegTransfer;
+	RightRT->SetParentInst(this);
 	SMPRegTransfer *LowerRightRT = new SMPRegTransfer;
+	LowerRightRT->SetParentInst(this);
 
 	// The doubleword shifts operate as follows: shift the DEF register right or left by
 	//  the number of bits specified by the count, and shift in bits from the USE register,
@@ -7408,6 +7788,7 @@ bool SMPInstr::BuildDoubleShiftRTL(SMPoperator BinaryOp) {
 			if (MDKnownOperandType(TempOp)) {
 				DestFound = true;
 				TempRT = new SMPRegTransfer;
+				TempRT->SetParentInst(this);
 				TempRT->SetLeftOperand(TempOp);
 				TempRT->SetOperator(SMP_ASSIGN);
 				RightRT->SetLeftOperand(TempOp);
@@ -7455,6 +7836,7 @@ bool SMPInstr::BuildMultiplyDivideRTL(SMPoperator BinaryOp) {
 	bool ImplicitEDXUse = false;
 	SMPRegTransfer *TempRT = NULL;
 	SMPRegTransfer *RightRT = new SMPRegTransfer;
+	RightRT->SetParentInst(this);
 
 	op_t FPRegOp = InitOp;
 	FPRegOp.type = o_fpreg;  // floating point register stack
@@ -7487,6 +7869,7 @@ bool SMPInstr::BuildMultiplyDivideRTL(SMPoperator BinaryOp) {
 			if (MDKnownOperandType(TempOp)) {
 				DestFound = true;
 				TempRT = new SMPRegTransfer;
+				TempRT->SetParentInst(this);
 				TempRT->SetLeftOperand(TempOp);
 				TempRT->SetOperator(SMP_ASSIGN);
 				RightRT->SetLeftOperand(TempOp);
@@ -7519,7 +7902,9 @@ bool SMPInstr::BuildMultiplyDivideRTL(SMPoperator BinaryOp) {
 			//  Make a deep copy from existing EAX effect and change EAX dest to EDX.
 			//  For divisions, we also deep copy EAX effect and change EAX source to EDX.
 			SMPRegTransfer *EDXRT = new SMPRegTransfer;
+			EDXRT->SetParentInst(this);
 			SMPRegTransfer *EDXRightRT = new SMPRegTransfer;
+			EDXRightRT->SetParentInst(this);
 			op_t EDXOp;
 			EDXRT->SetOperator(SMP_ASSIGN);
 			EDXOp = TempRT->GetLeftOperand();
@@ -7566,7 +7951,9 @@ bool SMPInstr::BuildBinaryPlusFlagsRTL(SMPoperator BinaryOp) {
 	FlagsOp.reg = X86_FLAGS_REG;
 
 	SMPRegTransfer *RightRT = new SMPRegTransfer;
+	RightRT->SetParentInst(this);
 	SMPRegTransfer *FlagsRightRT = new SMPRegTransfer;
+	FlagsRightRT->SetParentInst(this);
 
 	for (OpNum = 0; !(DestFound && SourceFound) && (OpNum < UA_MAXOP); ++OpNum) {
 		op_t TempOp = this->SMPcmd.Operands[OpNum];
@@ -7574,6 +7961,7 @@ bool SMPInstr::BuildBinaryPlusFlagsRTL(SMPoperator BinaryOp) {
 			if (MDKnownOperandType(TempOp)) {
 				DestFound = true;
 				TempRT = new SMPRegTransfer;
+				TempRT->SetParentInst(this);
 				TempRT->SetLeftOperand(TempOp);
 				TempRT->SetOperator(SMP_ASSIGN);
 				RightRT->SetLeftOperand(TempOp);
@@ -7617,7 +8005,9 @@ bool SMPInstr::BuildUnary2OpndRTL(SMPoperator UnaryOp) {
 	bool DestFound = false;
 	bool SourceFound = false;
 	SMPRegTransfer *TempRT = new SMPRegTransfer;
+	TempRT->SetParentInst(this);
 	SMPRegTransfer *RightRT = new SMPRegTransfer;
+	RightRT->SetParentInst(this);
 	int opcode = this->SMPcmd.itype;
 	bool ExtendedMove = ((NN_movsx == opcode) || (NN_movzx == opcode));
 
@@ -7752,6 +8142,7 @@ bool SMPInstr::BuildMoveRTL(SMPoperator GuardOp) {
 #endif
 
 	SMPRegTransfer *TempRT = new SMPRegTransfer;
+	TempRT->SetParentInst(this);
 
 	op_t VoidOp = InitOp;
 
@@ -7797,12 +8188,12 @@ bool SMPInstr::BuildMoveRTL(SMPoperator GuardOp) {
 #if SMP_DEBUG_BUILD_RTL
 	if (MemSrc && MemDest && (NN_movs != opcode)) {
 		if (NN_stos != opcode) {
-			SMP_msg("ERROR: MemDest and MemSrc in move at %x for %s\n", this->GetAddr(),
+			SMP_msg("ERROR: IDA Pro error: MemDest and MemSrc in move at %x for %s\n", this->GetAddr(),
 				DisAsmText.GetDisAsm(this->GetAddr()));
 		}
 		else {  // IDA incorrectly lists [EDI] as both DEF and USE, because reg EDI
 			    //  is both DEF and USE in NN_stos.
-			SMP_msg("WARNING: MemDest and MemSrc in move at %x for %s\n", this->GetAddr(),
+			SMP_msg("WARNING: Ignoring IDA Pro error: MemDest and MemSrc in move at %x for %s\n", this->GetAddr(),
 				DisAsmText.GetDisAsm(this->GetAddr()));
 		}
 		this->PrintOperands();
@@ -7975,7 +8366,9 @@ bool SMPInstr::BuildMoveRTL(SMPoperator GuardOp) {
 		if (HasRepeatPrefix) { // Must be MOVS or STOS or INS or OUTS
 			// The repeat causes USE and DEF of ECX as a counter
 			SMPRegTransfer *CounterRT = new SMPRegTransfer;
+			CounterRT->SetParentInst(this);
 			SMPRegTransfer *RightRT = new SMPRegTransfer;
+			RightRT->SetParentInst(this);
 			CounterRT->SetLeftOperand(CountOp);
 			CounterRT->SetOperator(SMP_ASSIGN);
 			RightRT->SetLeftOperand(CountOp);
@@ -8044,10 +8437,15 @@ bool SMPInstr::BuildLoadStringRTL(void) {
 	DerefESIOp.reg = R_si;
 
 	SMPRegTransfer *TempRT = new SMPRegTransfer;
+	TempRT->SetParentInst(this);
 	SMPRegTransfer *GuardedIncRT = new SMPRegTransfer;
+	GuardedIncRT->SetParentInst(this);
 	SMPRegTransfer *GuardedDecRT = new SMPRegTransfer;
+	GuardedDecRT->SetParentInst(this);
 	SMPRegTransfer *RightIncRT = new SMPRegTransfer;
+	RightIncRT->SetParentInst(this);
 	SMPRegTransfer *RightDecRT = new SMPRegTransfer;
+	RightDecRT->SetParentInst(this);
 
 	// Build the load string RTL. Ignore ES segment register for now.
 	// Load string is:  AL := [ESI]; if (DF == 0) ESI += 1 else ESI -= 1;
@@ -8119,7 +8517,9 @@ bool SMPInstr::BuildCompareStringRTL(void) {
 	op_t VoidOp = InitOp;
 
 	SMPRegTransfer *TempRT = new SMPRegTransfer;
+	TempRT->SetParentInst(this);
 	SMPRegTransfer *RightRT = new SMPRegTransfer;
+	RightRT->SetParentInst(this);
 
 	for (OpNum = 0; !(Src1Found && Src2Found) && (OpNum < UA_MAXOP); ++OpNum) {
 		op_t TempOp = this->SMPcmd.Operands[OpNum];
@@ -8164,7 +8564,9 @@ bool SMPInstr::BuildCompareStringRTL(void) {
 		if (HasRepeatPrefix) {
 			// The repeat causes USE and DEF of ECX as a counter
 			SMPRegTransfer *CounterRT = new SMPRegTransfer;
+			CounterRT->SetParentInst(this);
 			SMPRegTransfer *RightRT = new SMPRegTransfer;
+			RightRT->SetParentInst(this);
 			CounterRT->SetLeftOperand(CountOp);
 			CounterRT->SetOperator(SMP_ASSIGN);
 			RightRT->SetLeftOperand(CountOp);
@@ -8183,6 +8585,7 @@ bool SMPInstr::BuildExchangeRTL(void) {
 	bool Src1Found = false;
 	bool Src2Found = false;
 	SMPRegTransfer *TempRT = new SMPRegTransfer;  // second effect, src := dest
+	TempRT->SetParentInst(this);
 
 	for (OpNum = 0; !(Src1Found && Src2Found) && (OpNum < UA_MAXOP); ++OpNum) {
 		op_t TempOp = this->SMPcmd.Operands[OpNum];
@@ -8223,6 +8626,7 @@ bool SMPInstr::BuildExchangeRTL(void) {
 	else {
 		// Create the first effect, dest := src
 		SMPRegTransfer *FirstRT = new SMPRegTransfer;
+		FirstRT->SetParentInst(this);
 		FirstRT->SetLeftOperand(TempRT->GetRightOperand());
 		FirstRT->SetRightOperand(TempRT->GetLeftOperand());
 		FirstRT->SetOperator(SMP_ASSIGN);
@@ -8240,6 +8644,7 @@ bool SMPInstr::BuildExchangeAddRTL(void) {
 	bool Src2Found = false;
 
 	SMPRegTransfer *TempRT = new SMPRegTransfer;  // second effect, src := dest
+	TempRT->SetParentInst(this);
 
 	for (OpNum = 0; !(Src1Found && Src2Found) && (OpNum < UA_MAXOP); ++OpNum) {
 		op_t TempOp = this->SMPcmd.Operands[OpNum];
@@ -8278,7 +8683,9 @@ bool SMPInstr::BuildExchangeAddRTL(void) {
 	else {
 		// Create the first effect, dest := dest + src
 		SMPRegTransfer *FirstRT = new SMPRegTransfer;
+		FirstRT->SetParentInst(this);
 		SMPRegTransfer *AddRT = new SMPRegTransfer;
+		AddRT->SetParentInst(this);
 		AddRT->SetLeftOperand(TempRT->GetRightOperand());
 		AddRT->SetOperator(SMP_ADD);
 		AddRT->SetRightOperand(TempRT->GetLeftOperand());
@@ -8301,6 +8708,7 @@ bool SMPInstr::BuildCompareExchangeRTL(void) {
 	op_t DestOp = InitOp;
 	op_t SourceOp = InitOp;
 	SMPRegTransfer *TempRT = new SMPRegTransfer;
+	TempRT->SetParentInst(this);
 
 	for (OpNum = 0; !(DestFound && SourceFound) && (OpNum < UA_MAXOP); ++OpNum) {
 		op_t TempOp = this->SMPcmd.Operands[OpNum];
@@ -8343,6 +8751,7 @@ bool SMPInstr::BuildCompareExchangeRTL(void) {
 		Guard1->SetOperator(SMP_EQUAL);
 		Guard1->SetRightOperand(EAXOp);
 		SMPRegTransfer *FirstRT = new SMPRegTransfer;
+		FirstRT->SetParentInst(this);
 		FirstRT->SetLeftOperand(DestOp);
 		FirstRT->SetRightOperand(SourceOp);
 		FirstRT->SetOperator(SMP_ASSIGN);
@@ -8370,7 +8779,9 @@ bool SMPInstr::BuildPackShiftRTL(SMPoperator PackOp, SMPoperator ShiftOp) {
 	bool CountFound = false;
 	SMPRegTransfer *TempRT = NULL;
 	SMPRegTransfer *ShiftRT = new SMPRegTransfer;
+	ShiftRT->SetParentInst(this);
 	SMPRegTransfer *PackRT = new SMPRegTransfer;
+	PackRT->SetParentInst(this);
 
 	// RTL structure: top operator is assignment, next right operator is a reverse
 	//  shift with the shift count as its left operand, and lowest right operator
@@ -8381,6 +8792,7 @@ bool SMPInstr::BuildPackShiftRTL(SMPoperator PackOp, SMPoperator ShiftOp) {
 			if (MDKnownOperandType(TempOp)) {
 				DestFound = true;
 				TempRT = new SMPRegTransfer;
+				TempRT->SetParentInst(this);
 				TempRT->SetLeftOperand(TempOp);
 				TempRT->SetOperator(SMP_ASSIGN);
 				PackRT->SetLeftOperand(TempOp);
@@ -8428,7 +8840,9 @@ bool SMPInstr::BuildFlagsDestBinaryRTL(SMPoperator BinaryOp) {
 		|| (0 != (this->SMPcmd.auxpref & aux_repne));
 
 	SMPRegTransfer *TempRT = new SMPRegTransfer;
+	TempRT->SetParentInst(this);
 	SMPRegTransfer *RightRT = new SMPRegTransfer;
+	RightRT->SetParentInst(this);
 
 	op_t VoidOp = InitOp, FlagsOp = InitOp;
 
@@ -8512,7 +8926,9 @@ bool SMPInstr::BuildFlagsDestBinaryRTL(SMPoperator BinaryOp) {
 		if (HasRepeatPrefix) { // Must be CMPS or SCAS
 			// The repeat causes USE and DEF of ECX as a counter
 			SMPRegTransfer *CounterRT = new SMPRegTransfer;
+			CounterRT->SetParentInst(this);
 			SMPRegTransfer *RightRT = new SMPRegTransfer;
+			RightRT->SetParentInst(this);
 			CounterRT->SetLeftOperand(CountOp);
 			CounterRT->SetOperator(SMP_ASSIGN);
 			RightRT->SetLeftOperand(CountOp);
@@ -8560,6 +8976,7 @@ bool SMPInstr::BuildCallRTL(void) {
 			if (MDKnownOperandType(TempOp)) {
 				SourceFound = true;
 				TempRT = new SMPRegTransfer;
+				TempRT->SetParentInst(this);
 				TempRT->SetLeftOperand(VoidOp);
 				TempRT->SetOperator(SMP_CALL);
 				TempRT->SetRightOperand(TempOp);
@@ -8692,6 +9109,7 @@ bool SMPInstr::BuildEnterRTL(void) {
 	}
 	else {
 		SMPRegTransfer *TempRT = new SMPRegTransfer;
+		TempRT->SetParentInst(this);
 
 		// Add first effect: [esp-4] := ebp
 		TempRT->SetLeftOperand(SavedEBP);
@@ -8702,9 +9120,11 @@ bool SMPInstr::BuildEnterRTL(void) {
 
 		// Add second effect: ebp := esp - 4
 		TempRT = new SMPRegTransfer;
+		TempRT->SetParentInst(this);
 		TempRT->SetLeftOperand(FramePointerOp);
 		TempRT->SetOperator(SMP_ASSIGN);
 		SMPRegTransfer *RightRT = new SMPRegTransfer;
+		RightRT->SetParentInst(this);
 		RightRT->SetLeftOperand(StackPointerOp);
 		RightRT->SetOperator(SMP_SUBTRACT);
 		RightRT->SetRightOperand(Immed4Op);
@@ -8751,9 +9171,11 @@ bool SMPInstr::BuildLeaveRTL(void) {
 
 	// Build first effect:  ESP := EBP + 4
 	SMPRegTransfer *TempRT = new SMPRegTransfer;
+	TempRT->SetParentInst(this);
 	TempRT->SetLeftOperand(StackPointerOp);
 	TempRT->SetOperator(SMP_ASSIGN);
 	SMPRegTransfer *RightRT = new SMPRegTransfer;
+	RightRT->SetParentInst(this);
 	RightRT->SetOperator(SMP_ADD);
 	RightRT->SetLeftOperand(FramePointerOp);
 	RightRT->SetRightOperand(Immed4Op);
@@ -8764,6 +9186,7 @@ bool SMPInstr::BuildLeaveRTL(void) {
 
 	// Build second effect: EBP := [EBP+0]
 	TempRT = new SMPRegTransfer;
+	TempRT->SetParentInst(this);
 	TempRT->SetLeftOperand(FramePointerOp);
 	TempRT->SetOperator(SMP_ASSIGN);
 	TempRT->SetRightOperand(SavedEBP);
@@ -8782,10 +9205,12 @@ bool SMPInstr::BuildOptType8RTL(void) {
 
 	// Create the effect on EDX.
 	SMPRegTransfer *TempRT = new SMPRegTransfer;
+	TempRT->SetParentInst(this);
 	DestOp.reg = R_dx;
 	TempRT->SetLeftOperand(DestOp);
 	TempRT->SetOperator(SMP_ASSIGN);
 	SMPRegTransfer *RightRT =  new SMPRegTransfer;
+	RightRT->SetParentInst(this);
 	RightRT->SetLeftOperand(VoidOp);
 	RightRT->SetOperator(SMP_SYSTEM_OPERATION);
 	RightRT->SetRightOperand(VoidOp);
@@ -8796,10 +9221,12 @@ bool SMPInstr::BuildOptType8RTL(void) {
 	TempRT = NULL;
 	RightRT = NULL;
 	TempRT = new SMPRegTransfer;
+	TempRT->SetParentInst(this);
 	DestOp.reg = R_ax;
 	TempRT->SetLeftOperand(DestOp);
 	TempRT->SetOperator(SMP_ASSIGN);
 	RightRT = new SMPRegTransfer;
+	RightRT->SetParentInst(this);
 	RightRT->SetLeftOperand(VoidOp);
 	RightRT->SetOperator(SMP_SYSTEM_OPERATION);
 	RightRT->SetRightOperand(VoidOp);
@@ -8839,6 +9266,7 @@ bool SMPInstr::BuildJumpRTL(SMPoperator CondBranchOp) {
 			if (MDKnownOperandType(TempOp)) {
 				TargetFound = true;
 				TempRT = new SMPRegTransfer;
+				TempRT->SetParentInst(this);
 				TempRT->SetLeftOperand(EIPOp);
 				TempRT->SetOperator(SMP_ASSIGN);
 				TempRT->SetRightOperand(TempOp);
@@ -8871,7 +9299,9 @@ bool SMPInstr::BuildJumpRTL(SMPoperator CondBranchOp) {
 // Add to the stack pointer to deallocate stack space, e.g. for a pop instruction.
 void SMPInstr::AddToStackPointer(uval_t delta) {
 	SMPRegTransfer *TempRT = new SMPRegTransfer;
+	TempRT->SetParentInst(this);
 	SMPRegTransfer *RightRT = new SMPRegTransfer;
+	RightRT->SetParentInst(this);
 	op_t StackOp = InitOp, DeltaOp = InitOp;
 
 	StackOp.type = o_reg;
@@ -8890,10 +9320,12 @@ void SMPInstr::AddToStackPointer(uval_t delta) {
 	return;
 } // end of SMPInstr::AddToStackPointer()
 
-// Add to the stack pointer to deallocate stack space, e.g. for a pop instruction.
+// Subtract from the stack pointer to allocate stack space, e.g. for a push instruction.
 void SMPInstr::SubFromStackPointer(uval_t delta) {
 	SMPRegTransfer *TempRT = new SMPRegTransfer;
+	TempRT->SetParentInst(this);
 	SMPRegTransfer *RightRT = new SMPRegTransfer;
+	RightRT->SetParentInst(this);
 	op_t StackOp = InitOp, DeltaOp = InitOp;
 
 	StackOp.type = o_reg;
@@ -8931,6 +9363,7 @@ bool SMPInstr::BuildPopRTL(void) {
 	// Handle special cases first.
 	if ((SMP_FIRST_POP_FLAGS <= this->SMPcmd.itype) && (SMP_LAST_POP_FLAGS >= this->SMPcmd.itype)) {
 		TempRT = new SMPRegTransfer;
+		TempRT->SetParentInst(this);
 		TempRT->SetLeftOperand(FlagsOp);
 		TempRT->SetOperator(SMP_ASSIGN);
 		TempRT->SetRightOperand(StackOp);
@@ -8953,6 +9386,7 @@ bool SMPInstr::BuildPopRTL(void) {
 		RegOp.reg = R_di;
 		StackOp.addr = 0;  // [ESP+0]
 		TempRT = new SMPRegTransfer;
+		TempRT->SetParentInst(this);
 		TempRT->SetLeftOperand(RegOp);
 		TempRT->SetOperator(SMP_ASSIGN);
 		TempRT->SetRightOperand(StackOp);
@@ -8963,6 +9397,7 @@ bool SMPInstr::BuildPopRTL(void) {
 		RegOp.reg = R_si;
 		StackOp.addr = 4;  // [ESP+4]
 		TempRT = new SMPRegTransfer;
+		TempRT->SetParentInst(this);
 		TempRT->SetLeftOperand(RegOp);
 		TempRT->SetOperator(SMP_ASSIGN);
 		TempRT->SetRightOperand(StackOp);
@@ -8973,6 +9408,7 @@ bool SMPInstr::BuildPopRTL(void) {
 		RegOp.reg = R_bp;
 		StackOp.addr = 8;  // [ESP+8]
 		TempRT = new SMPRegTransfer;
+		TempRT->SetParentInst(this);
 		TempRT->SetLeftOperand(RegOp);
 		TempRT->SetOperator(SMP_ASSIGN);
 		TempRT->SetRightOperand(StackOp);
@@ -8985,6 +9421,7 @@ bool SMPInstr::BuildPopRTL(void) {
 		RegOp.reg = R_bx;
 		StackOp.addr = 16;  // [ESP+16]
 		TempRT = new SMPRegTransfer;
+		TempRT->SetParentInst(this);
 		TempRT->SetLeftOperand(RegOp);
 		TempRT->SetOperator(SMP_ASSIGN);
 		TempRT->SetRightOperand(StackOp);
@@ -8995,6 +9432,7 @@ bool SMPInstr::BuildPopRTL(void) {
 		RegOp.reg = R_dx;
 		StackOp.addr = 20;  // [ESP+20]
 		TempRT = new SMPRegTransfer;
+		TempRT->SetParentInst(this);
 		TempRT->SetLeftOperand(RegOp);
 		TempRT->SetOperator(SMP_ASSIGN);
 		TempRT->SetRightOperand(StackOp);
@@ -9005,6 +9443,7 @@ bool SMPInstr::BuildPopRTL(void) {
 		RegOp.reg = R_cx;
 		StackOp.addr = 24;  // [ESP+24]
 		TempRT = new SMPRegTransfer;
+		TempRT->SetParentInst(this);
 		TempRT->SetLeftOperand(RegOp);
 		TempRT->SetOperator(SMP_ASSIGN);
 		TempRT->SetRightOperand(StackOp);
@@ -9015,6 +9454,7 @@ bool SMPInstr::BuildPopRTL(void) {
 		RegOp.reg = R_ax;
 		StackOp.addr = 28;  // [ESP+28]
 		TempRT = new SMPRegTransfer;
+		TempRT->SetParentInst(this);
 		TempRT->SetLeftOperand(RegOp);
 		TempRT->SetOperator(SMP_ASSIGN);
 		TempRT->SetRightOperand(StackOp);
@@ -9033,6 +9473,7 @@ bool SMPInstr::BuildPopRTL(void) {
 			if (MDKnownOperandType(TempOp)) {
 				DestFound = true;
 				TempRT = new SMPRegTransfer;
+				TempRT->SetParentInst(this);
 				TempRT->SetLeftOperand(TempOp);
 				TempRT->SetOperator(SMP_ASSIGN);
 				StackOp.dtyp = TempOp.dtyp;  // size of transfer
@@ -9072,6 +9513,7 @@ bool SMPInstr::BuildPushRTL(void) {
 	// Handle special cases first.
 	if ((SMP_FIRST_PUSH_FLAGS <= this->SMPcmd.itype) && (SMP_LAST_PUSH_FLAGS >= this->SMPcmd.itype)) {
 		TempRT = new SMPRegTransfer;
+		TempRT->SetParentInst(this);
 		TempRT->SetRightOperand(FlagsOp);
 		TempRT->SetOperator(SMP_ASSIGN);
 		TempRT->SetLeftOperand(StackOp);
@@ -9089,6 +9531,7 @@ bool SMPInstr::BuildPushRTL(void) {
 		RegOp.reg = R_di;
 		StackOp.addr = (ea_t) -32;  // [ESP-32]
 		TempRT = new SMPRegTransfer;
+		TempRT->SetParentInst(this);
 		TempRT->SetRightOperand(RegOp);
 		TempRT->SetOperator(SMP_ASSIGN);
 		TempRT->SetLeftOperand(StackOp);
@@ -9099,6 +9542,7 @@ bool SMPInstr::BuildPushRTL(void) {
 		RegOp.reg = R_si;
 		StackOp.addr = (ea_t) -28;  // [ESP-28]
 		TempRT = new SMPRegTransfer;
+		TempRT->SetParentInst(this);
 		TempRT->SetRightOperand(RegOp);
 		TempRT->SetOperator(SMP_ASSIGN);
 		TempRT->SetLeftOperand(StackOp);
@@ -9109,6 +9553,7 @@ bool SMPInstr::BuildPushRTL(void) {
 		RegOp.reg = R_bp;
 		StackOp.addr = (ea_t) -24;  // [ESP-24]
 		TempRT = new SMPRegTransfer;
+		TempRT->SetParentInst(this);
 		TempRT->SetRightOperand(RegOp);
 		TempRT->SetOperator(SMP_ASSIGN);
 		TempRT->SetLeftOperand(StackOp);
@@ -9119,6 +9564,7 @@ bool SMPInstr::BuildPushRTL(void) {
 		RegOp.reg = R_sp;
 		StackOp.addr = (ea_t) -20;  // [ESP-20]
 		TempRT = new SMPRegTransfer;
+		TempRT->SetParentInst(this);
 		TempRT->SetRightOperand(RegOp);
 		TempRT->SetOperator(SMP_ASSIGN);
 		TempRT->SetLeftOperand(StackOp);
@@ -9129,6 +9575,7 @@ bool SMPInstr::BuildPushRTL(void) {
 		RegOp.reg = R_bx;
 		StackOp.addr = (ea_t) -16;  // [ESP-16]
 		TempRT = new SMPRegTransfer;
+		TempRT->SetParentInst(this);
 		TempRT->SetRightOperand(RegOp);
 		TempRT->SetOperator(SMP_ASSIGN);
 		TempRT->SetLeftOperand(StackOp);
@@ -9139,6 +9586,7 @@ bool SMPInstr::BuildPushRTL(void) {
 		RegOp.reg = R_dx;
 		StackOp.addr = (ea_t) -12;  // [ESP-12]
 		TempRT = new SMPRegTransfer;
+		TempRT->SetParentInst(this);
 		TempRT->SetRightOperand(RegOp);
 		TempRT->SetOperator(SMP_ASSIGN);
 		TempRT->SetLeftOperand(StackOp);
@@ -9149,6 +9597,7 @@ bool SMPInstr::BuildPushRTL(void) {
 		RegOp.reg = R_cx;
 		StackOp.addr = (ea_t) -8;  // [ESP-8]
 		TempRT = new SMPRegTransfer;
+		TempRT->SetParentInst(this);
 		TempRT->SetRightOperand(RegOp);
 		TempRT->SetOperator(SMP_ASSIGN);
 		TempRT->SetLeftOperand(StackOp);
@@ -9159,6 +9608,7 @@ bool SMPInstr::BuildPushRTL(void) {
 		RegOp.reg = R_ax;
 		StackOp.addr = (ea_t) -4;  // [ESP-4]
 		TempRT = new SMPRegTransfer;
+		TempRT->SetParentInst(this);
 		TempRT->SetRightOperand(RegOp);
 		TempRT->SetOperator(SMP_ASSIGN);
 		TempRT->SetLeftOperand(StackOp);
@@ -9178,6 +9628,7 @@ bool SMPInstr::BuildPushRTL(void) {
 				SourceFound = true;
 				OpSize = GetOpDataSize(TempOp);
 				TempRT = new SMPRegTransfer;
+				TempRT->SetParentInst(this);
 				TempRT->SetRightOperand(TempOp);
 				TempRT->SetOperator(SMP_ASSIGN);
 				StackOp.dtyp = TempOp.dtyp;  // size of transfer
@@ -9213,6 +9664,7 @@ bool SMPInstr::BuildRTL(void) {
 	//  E.g. mov esi,esi should not generate DEF and USE of esi, because esi does not change.
 	if (this->IsNop()) {
 		NopRT = new SMPRegTransfer;
+		NopRT->SetParentInst(this);
 		NopRT->SetOperator(SMP_NULL_OPERATOR);
 		this->RTL.push_back(NopRT);
 		NopRT = NULL;
@@ -9278,6 +9730,7 @@ bool SMPInstr::BuildRTL(void) {
 			//  so we can just consider these to be no-ops.
 			// NOTE: Shouldn't we killthe EFLAGS register on NN_cli ??!!??!!
 			NopRT = new SMPRegTransfer;
+			NopRT->SetParentInst(this);
 			NopRT->SetOperator(SMP_NULL_OPERATOR);
 			this->RTL.push_back(NopRT);
 			NopRT = NULL;
@@ -9317,6 +9770,7 @@ bool SMPInstr::BuildRTL(void) {
 		case NN_hlt:                 // Halt
 			// Treat as a no-op
 			NopRT = new SMPRegTransfer;
+			NopRT->SetParentInst(this);
 			NopRT->SetOperator(SMP_NULL_OPERATOR);
 			this->RTL.push_back(NopRT);
 			NopRT = NULL;
@@ -9505,6 +9959,7 @@ bool SMPInstr::BuildRTL(void) {
 
 		case NN_nop:                 // No Operation
 			NopRT = new SMPRegTransfer;
+			NopRT->SetParentInst(this);
 			NopRT->SetOperator(SMP_NULL_OPERATOR);
 			this->RTL.push_back(NopRT);
 			NopRT = NULL;
@@ -9651,6 +10106,7 @@ bool SMPInstr::BuildRTL(void) {
 
 		case NN_sti:                 // Set Interrupt Flag
 			NopRT = new SMPRegTransfer;
+			NopRT->SetParentInst(this);
 			NopRT->SetOperator(SMP_NULL_OPERATOR);
 			this->RTL.push_back(NopRT);
 			NopRT = NULL;
@@ -9673,6 +10129,7 @@ bool SMPInstr::BuildRTL(void) {
 		case NN_verw:                // Verify a Segment for Writing
 		case NN_wait:                // Wait until BUSY# Pin is Inactive (HIGH)
 			NopRT = new SMPRegTransfer;
+			NopRT->SetParentInst(this);
 			NopRT->SetOperator(SMP_NULL_OPERATOR);
 			this->RTL.push_back(NopRT);
 			NopRT = NULL;
@@ -9708,6 +10165,7 @@ bool SMPInstr::BuildRTL(void) {
 		case NN_wbinvd:              // Invalidate Data Cache (write changes)
 		case NN_invlpg:              // Invalidate TLB entry
 			NopRT = new SMPRegTransfer;
+			NopRT->SetParentInst(this);
 			NopRT->SetOperator(SMP_NULL_OPERATOR);
 			this->RTL.push_back(NopRT);
 			NopRT = NULL;
@@ -9736,6 +10194,7 @@ bool SMPInstr::BuildRTL(void) {
 
 		case NN_rsm:                 // Resume from System Management Mode
 			NopRT = new SMPRegTransfer;
+			NopRT->SetParentInst(this);
 			NopRT->SetOperator(SMP_NULL_OPERATOR);
 			this->RTL.push_back(NopRT);
 			NopRT = NULL;
@@ -9827,6 +10286,7 @@ bool SMPInstr::BuildRTL(void) {
 		case NN_fxch:                // Exchange Registers
 			// FP registers remain NUMERIC anyway, so this is a no-op to our type system.
 			NopRT = new SMPRegTransfer;
+			NopRT->SetParentInst(this);
 			NopRT->SetOperator(SMP_NULL_OPERATOR);
 			this->RTL.push_back(NopRT);
 			NopRT = NULL;
@@ -9880,6 +10340,7 @@ bool SMPInstr::BuildRTL(void) {
 			//  type and we don't track any of them, so all such instructions
 			//  can be considered to be no-ops.
 			NopRT = new SMPRegTransfer;
+			NopRT->SetParentInst(this);
 			NopRT->SetOperator(SMP_NULL_OPERATOR);
 			this->RTL.push_back(NopRT);
 			NopRT = NULL;
@@ -9915,6 +10376,7 @@ bool SMPInstr::BuildRTL(void) {
 			// Floating point stack and control word and flags operations
 			//  with no memory operands are no-ops to us.
 			NopRT = new SMPRegTransfer;
+			NopRT->SetParentInst(this);
 			NopRT->SetOperator(SMP_NULL_OPERATOR);
 			this->RTL.push_back(NopRT);
 			NopRT = NULL;
@@ -9934,6 +10396,7 @@ bool SMPInstr::BuildRTL(void) {
 
 		case NN_fnop:                // No Operation
 			NopRT = new SMPRegTransfer;
+			NopRT->SetParentInst(this);
 			NopRT->SetOperator(SMP_NULL_OPERATOR);
 			this->RTL.push_back(NopRT);
 			NopRT = NULL;
@@ -9961,6 +10424,7 @@ bool SMPInstr::BuildRTL(void) {
 			// Floating point stack and control word and flags operations
 			//  with no memory operands are no-ops to us.
 			NopRT = new SMPRegTransfer;
+			NopRT->SetParentInst(this);
 			NopRT->SetOperator(SMP_NULL_OPERATOR);
 			this->RTL.push_back(NopRT);
 			NopRT = NULL;
@@ -9989,6 +10453,7 @@ bool SMPInstr::BuildRTL(void) {
 
 		case NN_emms:                // Empty MMX state
 			NopRT = new SMPRegTransfer;
+			NopRT->SetParentInst(this);
 			NopRT->SetOperator(SMP_NULL_OPERATOR);
 			this->RTL.push_back(NopRT);
 			NopRT = NULL;
@@ -10146,6 +10611,7 @@ bool SMPInstr::BuildRTL(void) {
 		case NN_prefetchw:           // Prefetch processor cache line into L1 data cache (mark as modified)
 			// Prefetch opcodes are no-ops to us.
 			NopRT = new SMPRegTransfer;
+			NopRT->SetParentInst(this);
 			NopRT->SetOperator(SMP_NULL_OPERATOR);
 			this->RTL.push_back(NopRT);
 			NopRT = NULL;
@@ -10304,6 +10770,7 @@ bool SMPInstr::BuildRTL(void) {
 		case NN_sfence:              // Store Fence
 			// Cache prefetch and store fence opcodes are no-ops to us.
 			NopRT = new SMPRegTransfer;
+			NopRT->SetParentInst(this);
 			NopRT->SetOperator(SMP_NULL_OPERATOR);
 			this->RTL.push_back(NopRT);
 			NopRT = NULL;
@@ -10396,6 +10863,7 @@ bool SMPInstr::BuildRTL(void) {
 
 		case NN_lfence:              // Load Fence
 			NopRT = new SMPRegTransfer;
+			NopRT->SetParentInst(this);
 			NopRT->SetOperator(SMP_NULL_OPERATOR);
 			this->RTL.push_back(NopRT);
 			NopRT = NULL;
@@ -10412,6 +10880,7 @@ bool SMPInstr::BuildRTL(void) {
 
 		case NN_mfence:              // Memory Fence
 			NopRT = new SMPRegTransfer;
+			NopRT->SetParentInst(this);
 			NopRT->SetOperator(SMP_NULL_OPERATOR);
 			this->RTL.push_back(NopRT);
 			NopRT = NULL;
@@ -10456,6 +10925,7 @@ bool SMPInstr::BuildRTL(void) {
 
 		case NN_pause:               // Spin Loop Hint
 			NopRT = new SMPRegTransfer;
+			NopRT->SetParentInst(this);
 			NopRT->SetOperator(SMP_NULL_OPERATOR);
 			this->RTL.push_back(NopRT);
 			NopRT = NULL;
@@ -10608,15 +11078,15 @@ bool SMPInstr::BuildRTL(void) {
 } // end SMPInstr::BuildRTL()
 
 // Iterate through all reg transfers and call SyncRTLDefUse for each.
-void SMPInstr::SyncAllRTs(void) {
+void SMPInstr::SyncAllRTs(bool UseFP, sval_t FPDelta) {
 	for (size_t index = 0; index < this->RTL.GetCount(); ++index) {
-		this->SyncRTLDefUse(this->RTL.GetRT(index));
+		this->SyncRTLDefUse(this->RTL.GetRT(index), UseFP, FPDelta);
 	}
 	return;
 } // end of SMPInstr:SyncAllRTs()
 
 // Ensure that each operand of the RTL is found in the appropriate DEF or USE list.
-void SMPInstr::SyncRTLDefUse(SMPRegTransfer *CurrRT) {
+void SMPInstr::SyncRTLDefUse(SMPRegTransfer *CurrRT, bool UseFP, sval_t FPDelta) {
 	// The Guard expression and ExtraKills are almost never represented in the DEF and USE
 	//  lists. When they are, they are added in MDFixupDefUseLists(), so we ignore them here.
 
@@ -10676,7 +11146,7 @@ void SMPInstr::SyncRTLDefUse(SMPRegTransfer *CurrRT) {
 		}
 	}
 	else { // recurse into right subtree
-		this->SyncRTLDefUse(CurrRT->GetRightTree());
+		this->SyncRTLDefUse(CurrRT->GetRightTree(), UseFP, FPDelta);
 	}
 	return;
 } // end of SMPInstr::SyncRTLDefUse()
@@ -10696,31 +11166,26 @@ void SMPRegTransfer::SetOperatorType(SMPOperandType OpType, const SMPInstr* Inst
 	RTop.type = NewType; 
 } // end of SMPRegTransfer::SetOperatorType
 
-// Update the memory source operands to have the new type
+// Update the memory source operands to have the new type from profiling info.
 void SMPInstr::UpdateMemLoadTypes(SMPOperandType newType) {
 	bool MemSrc = false;
     op_t Opnd;
-	for (int i = 0; i < UA_MAXOP; ++i) {
-		Opnd = this->SMPcmd.Operands[i];
+	set<DefOrUse, LessDefUse>::iterator UseIter;
+	for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) {
+		Opnd = UseIter->GetOp();
 		optype_t CurrType = Opnd.type;
-		if (this->features & UseMacros[i]) { // USE
-			MemSrc = ((CurrType == o_mem) || (CurrType == o_phrase) || (CurrType == o_displ));
-			if (MemSrc) {
-				set<DefOrUse, LessDefUse>::iterator use = this->FindUse(Opnd);
-				SMPOperandType type = use->GetType();
-
-				assert(newType == (NUMERIC|PROF_BASE));
-				switch (type) {
-					case UNINIT:
-					case CODEPTR:
-						this->SetUseType(Opnd,newType);
-						break;
-					case POINTER:
-						this->SetUseType(Opnd, (SMPOperandType)(UNKNOWN|PROF_BASE));
-						break;
-					default:
-						break;
-				}
+		MemSrc = ((CurrType == o_mem) || (CurrType == o_phrase) || (CurrType == o_displ));
+		if (MemSrc) {
+			SMPOperandType type = UseIter->GetType();
+
+			assert(newType == (NUMERIC|PROF_BASE));
+			if (type == UNINIT) {
+				this->SetUseType(Opnd, newType);
+				break;
+			}
+			else if (type >= POINTER) {
+				this->SetUseType(Opnd, (SMPOperandType)(UNKNOWN|PROF_BASE));
+				break;
 			}
 		}
 	}
diff --git a/SMPInstr.h b/SMPInstr.h
index 9c8c5806..00f4bf21 100644
--- a/SMPInstr.h
+++ b/SMPInstr.h
@@ -44,7 +44,7 @@
 using namespace std;
 
 // Value to signal error in computing stack pointer alteration by instruction or RTL.
-#define SMP_STACK_DELTA_ERROR_CODE  0xfbfaf0fd
+#define SMP_STACK_DELTA_ERROR_CODE  0xebfaf0fd
 // Value to signal that stack pointer is bitwise ANDed to align it; unknown delta that can
 //  probably be ignored.
 #define SMP_STACK_POINTER_BITWISE_AND_CODE 0xebeae0ed
@@ -180,8 +180,8 @@ public:
 	// Get methods
 	inline SMPoperator GetOperator(void) { return RTop.oper; };
 	inline SMPOperandType GetOperatorType(void) { return RTop.type; };
-	inline op_t &GetLeftOperand(void) { return LeftOperand; };
-	inline op_t &GetRightOperand(void) { return RightOperand; };
+	op_t GetLeftOperand(void) const;
+	op_t GetRightOperand(void) const;
 	inline SMPRegTransfer *GetRightTree(void) { return RightRT; };
 	inline SMPGuard *GetGuard(void) { return Guard; };
 	// Set methods
@@ -192,12 +192,14 @@ public:
 	inline void SetRightTree(SMPRegTransfer *RightTree) { RightRT = RightTree; booleans1 |= RTL_SET_RIGHT_SUBTREE; };
 	inline void SetGuard(SMPGuard *NewGuard) { Guard = NewGuard; };
 	inline void SetTypeInferenceComplete(void) { booleans1 |= RTL_SET_TYPE_INFERENCE_COMPLETE; };
+	inline void SetParentInst(SMPInstr *Parent) { ParentInst = Parent; };
 	// Query methods
 	inline bool HasRightSubTree(void) const { return (booleans1 & RTL_SET_RIGHT_SUBTREE); };
 	inline bool IsTypeInferenceComplete(void) const { return (booleans1 & RTL_SET_TYPE_INFERENCE_COMPLETE); };
 	// Printing methods
 	void Dump(void);
 	// Analysis methods
+	bool IsAllocaRTL(void); // subtracts unknown value from stack pointer
 	sval_t ComputeStackPointerAlteration(bool IsLeaveInstr, sval_t IncomingDelta, sval_t FramePtrDelta);  // For instruction with operand-dependent effects on stack pointer.
 private:
 	SMPGuard *Guard;   // guard expression
@@ -210,6 +212,7 @@ private:
 #endif
 	op_t RightOperand; // valid only if RightSubTree is false
 	SMPRegTransfer *RightRT; // right subtree, valid only if RightSubTree is true
+	SMPInstr *ParentInst; // SMPInstr that contains this RegTransfer
 }; // end class SMPRegTransfer
 
 #define SMP_RT_LIMIT  9    // how many register transfers could be in an RTL?
@@ -292,17 +295,17 @@ private:
 // Masks to set bits to use as booleans4 in SMPInstr.
 #define INSTR_SET_DEFS_NORMALIZED 0x01
 #define INSTR_SET_USES_NORMALIZED 0x02
-#define INSTR_SET_UNUSED4 0x04
-#define INSTR_SET_UNUSED8 0x08
-#define INSTR_SET_UNUSED16 0x10
+#define INSTR_SET_STACK_ALIGNMENT 0x04
+#define INSTR_SET_FP_NORMALIZED_TO_SP 0x08
+#define INSTR_SET_ALLOCA 0x10
 #define INSTR_SET_UNUSED32 0x20
 #define INSTR_SET_UNUSED64 0x40
 #define INSTR_SET_UNUSED128 0x80
 #define INSTR_RESET_DEFS_NORMALIZED 0xfe
 #define INSTR_RESET_USES_NORMALIZED 0xfd
-#define INSTR_RESET_UNUSED4 0xfb
-#define INSTR_RESET_UNUSED8 0xf7
-#define INSTR_RESET_UNUSED16 0xef
+#define INSTR_RESET_STACK_ALIGNMENT 0xfb
+#define INSTR_RESET_FP_NORMALIZED_TO_SP 0xf7
+#define INSTR_RESET_ALLOCA 0xef
 #define INSTR_RESET_UNUSED32 0xdf
 #define INSTR_RESET_UNUSED64 0xbf
 #define INSTR_RESET_UNUSED128 0x7f
@@ -355,15 +358,11 @@ public:
 	inline void SetBlock(SMPBasicBlock *Block) { BasicBlock = Block; };
 	inline void SetCmd(insn_t cmd) { SMPcmd = cmd; return; };
 	inline void SetTerminatesBlock(void) { booleans1 |= INSTR_SET_BLOCK_TERM; };
-	inline void SetTailCall(void) { 
-		booleans1 |= INSTR_SET_TAIL_CALL;
-		if (type == COND_BRANCH) SetCondTailCall(); else ResetCondTailCall();
-		CallTarget = FarBranchTarget;
-		type = RETURN;
-	};
+	void SetTailCall(void);
 	inline void SetFirstInBlock(void) { 
 		booleans3 |= INSTR_SET_FIRST_IN_BLOCK;
 	};
+	inline void SetAllocaCall(void) { booleans4 |= INSTR_SET_ALLOCA; };
 	inline void AddDef(op_t DefOp, SMPOperandType DefType, int DefSSANum) {
 		Defs.SetRef(DefOp, DefType, DefSSANum);
 	}
@@ -382,8 +381,6 @@ public:
 	inline void EraseUse(set<DefOrUse, LessDefUse>::iterator UseIter)  { Uses.EraseRef(UseIter); };
 	inline void SetDeadRegs(char RegsString[]) { qstrncpy(DeadRegsString, RegsString, sizeof(DeadRegsString) - 1); };
 	inline void SetStackPtrOffset(sval_t Delta) { StackPtrOffset = Delta; };
-	inline void SetDefsNormalized(void) { booleans4 |= INSTR_SET_DEFS_NORMALIZED; }
-	inline void SetUsesNormalized(void) { booleans4 |= INSTR_SET_USES_NORMALIZED; }
 
 	// Query methods
 	bool HasDestMemoryOperand(void); // Does instruction write to memory?
@@ -399,6 +396,7 @@ public:
 	inline bool IsTailCall(void) const { return (booleans1 & INSTR_SET_TAIL_CALL); };
 	inline bool IsCondTailCall(void) const { return (booleans1 & INSTR_SET_COND_TAIL_CALL); };
 	inline bool IsCallUsedAsJump(void) const { return (booleans1 & INSTR_SET_CALL_USED_AS_JUMP); };
+	inline bool IsRecursiveCall(void) const { return (booleans1 & INSTR_SET_DIRECT_RECURSIVE_CALL); };
 	inline bool MDIsInterruptCall(void) const { return (booleans1 & INSTR_SET_INTERRUPT); };
 	inline bool IsNop(void) const { return (booleans2 & INSTR_SET_NOP); }; // instruction is simple or complex no-op
 	inline bool IsFloatNop(void) const { return (SMPcmd.itype == NN_fnop); };
@@ -444,6 +442,9 @@ public:
 	inline bool HasGoodRTL(void) const { return (booleans1 & INSTR_SET_GOODRTL); };
 	inline bool AreDefsNormalized(void) const { return (booleans4 & INSTR_SET_DEFS_NORMALIZED); };
 	inline bool AreUsesNormalized(void) const { return (booleans4 & INSTR_SET_USES_NORMALIZED); };
+	inline bool IsStackAlignmentInst(void) const { return (booleans4 & INSTR_SET_STACK_ALIGNMENT); };
+	inline bool HasFPNormalizedToSP(void) const { return (booleans4 & INSTR_SET_FP_NORMALIZED_TO_SP); };
+	inline bool IsAllocaCall(void) const { return (booleans4 & INSTR_SET_ALLOCA); };
 #if 0
 	bool FindStackPtrDelta(sval_t SearchDelta) const; // Is SearchDelta in the set of possible stack pointer deltas for this inst?
 													  //  If not, add it to the set.
@@ -465,13 +466,16 @@ public:
 	sval_t FindStackAdjustment(void); // Find amount of stack adjustment, e.g. if this inst is after a call
 	bool MDComputeNormalizedDataFlowOp(bool UseFP, sval_t FPDelta, op_t &DefOp); // Normalize stack operands to use incoming stack delta; leave others as is
 		// return true if register or stack memory operand, false otherwise
-	bool MDNormalizeStackOps(bool UseFP, sval_t FPDelta, bool Recomputing); // Iterate through Defs and Uses, calling MDComputeNormalizedDataFlowOp(); true if changed DEFs or USEs
-	bool MDIsStackPtrSaveOrRestore(bool UseFP, bool &Save, sval_t &StackDelta, op_t &CopyOp, bool &Error); // is stack/frame pointer saved to/restored from CopyOp
+	bool MDNormalizeStackOps(bool UseFP, sval_t FPDelta, bool Recomputing, sval_t DeltaIncrement = 0);
+		// Iterate through Defs and Uses, calling MDComputeNormalizedDataFlowOp(); true if changed DEFs or USEs
+	void MDGetUnnormalizedOp(op_t &NormOp); // If NormOp is a normalized stack memory operand, unnormalize it.
+	bool HasAllocaRTL(void); // RTL shows alloca pattern, i.e. subtracts non-const value from stack pointer
+	bool MDIsStackPtrSaveOrRestore(bool UseFP, sval_t FPDelta, bool &Save, sval_t &StackDelta, op_t &CopyOp, bool &Error); // is stack/frame pointer saved to/restored from CopyOp
 	void MDFixupDefUseLists(void); // Machine-dependent ad hoc fixes
 	void MDFindLoadFromStack(bool UseFP); // determine if instruction is load from stack loc
 	bool MDIsSignedLoad(unsigned short &SignMask); // true if sign or zero-extended; pass out mask bits
 	bool BuildRTL(void);   // Build RTL trees; return true if successfully built.
-	void SyncAllRTs(void); // calls SyncRTLDefUse() for all RTs in RTL
+	void SyncAllRTs(bool UseFP, sval_t FPDelta); // calls SyncRTLDefUse() for all RTs in RTL
 	op_t GetPushedOpnd(void); // Extract source operand from PUSH RTL
 	int MDGetImmedUse(void); // Get immed value from USE list of inst
 	bool MDIsArgumentPass(void); // Does inst pass an outgoing argument?
@@ -496,7 +500,7 @@ public:
 
 	// Trace UseOp through register moves back to its stack location or immediate value source.
 	//  Return true if we are passing an immediate or stack location back in UltSource.
-	bool TraceUltimateMoveSource(op_t UseOp, int UseSSANum, op_t &UltSource);
+	bool TraceUltimateMoveSource(op_t UseOp, int UseSSANum, op_t &UltSource, bool &FPRelative);
 
 private:
 	// Data 
@@ -548,6 +552,9 @@ private:
 #if 0 // now in booleans4
 	bool NormalizedDefs; // stack offsets in DEFs have been normalized with respect to incoming stack delta.
 	bool NormalizedUses; // stack offsets in USEs have been normalized with respect to incoming stack delta.
+	bool StackAlignmentInst; // AND of stack pointer with mask to align stack frame
+	bool FPNormalizedToSP; // inst has frame-pointer-relative stack memory access that has been normalized to stack-pointer-relative
+	bool AllocaCall;    // subtracts unknown value from stack pointer to allocate temp memory on stack
 #endif
 	char DeadRegsString[MAXSMPDEADREGSSTR]; // Registers that are dead at this instruction
 	ea_t CallTarget; // target address of direct or indirect call instruction; BADADDR if unknown or not a call
@@ -627,6 +634,11 @@ private:
 	inline bool AreMemOpsFound(void) const { return (booleans3 & INSTR_SET_MEM_DEF_USE_COMPUTED); };
 	inline void SetMemOpsFound(void) { booleans3 |= INSTR_SET_MEM_DEF_USE_COMPUTED; };
 
+	inline void SetDefsNormalized(void) { booleans4 |= INSTR_SET_DEFS_NORMALIZED; }
+	inline void SetUsesNormalized(void) { booleans4 |= INSTR_SET_USES_NORMALIZED; }
+	inline void SetStackAlignmentInst(void) { booleans4 |= INSTR_SET_STACK_ALIGNMENT; }
+	inline void SetFPNormalizedToSP(void) { booleans4 |= INSTR_SET_FP_NORMALIZED_TO_SP; };
+
 	bool AllDefsNumeric(void);  // true if all DEFs are NUMERIC or CODEPTR
 	bool AnyDefsProfiled(void);	// true if any defs are profile derived
 	bool AllDefMetadataUnused(void); // true if all DEF metadata not needed
@@ -671,7 +683,7 @@ private:
 	void AddToStackPointer(uval_t delta); // helper for BuildRTL()
 	void SubFromStackPointer(uval_t delta); // helper for BuildRTL()
 	bool BuildMultiplyDivideRTL(SMPoperator BinaryOp);  // helper for BuildRTL()
-	void SyncRTLDefUse(SMPRegTransfer *CurrRT); // Ensure that all RTL operands are in the DEF/USE lists
+	void SyncRTLDefUse(SMPRegTransfer *CurrRT, bool UseFP, sval_t FPDelta); // Ensure that all RTL operands are in the DEF/USE lists
 	bool InferOperatorType(SMPRegTransfer *CurrRT); // return true if type updated
 	void SetAddSubSourceType(void);
 	int GetUseOpHashAndSSA(op_t UseOp, int &SSANum); // helper to find SSANum and create & return hash value
@@ -696,6 +708,9 @@ private:
 	SMPMetadataType GetDefMetadataType(void); // metadata type of non-flags DEF
 
 	bool MDIsAddImmediateToReg(op_t &DefOp, op_t &ImmOp); // return true if we have register DefOp += ImmOp.
+
+	bool MDRecomputeNormalizedDataFlowOp(sval_t DeltaIncrement, bool UpdateMaps, op_t &DefOp); // Alter stack delta for SP-relative stack operands in alloca-calling functions
+		// return true if register or stack memory operand, false otherwise
 };  // end class SMPInstr
 
 #endif
diff --git a/SMPProgram.cpp b/SMPProgram.cpp
index 8e914a1a..956d9241 100644
--- a/SMPProgram.cpp
+++ b/SMPProgram.cpp
@@ -170,6 +170,17 @@ SMPProgram::~SMPProgram(void) {
 	return;
 }
 
+// Return SMPFunction pointer from FuncMap for FirstAddr if it exists
+//  in the FuncMap, else return NULL.
+SMPFunction *SMPProgram::FindFunction(ea_t FirstAddr) {
+	SMPFunction *FuncPtr = NULL;
+	map<ea_t, SMPFunction *>::iterator FuncMapIter = this->FuncMap.find(FirstAddr);
+	if (this->FuncMap.end() != FuncMapIter) {
+		FuncPtr = FuncMapIter->second;
+	}
+	return FuncPtr;
+} // end of SMPProgram::FindFunction()
+
 #ifdef STARS_IDA_INTERFACE
 
 // Determine static global variable boundaries.
@@ -464,14 +475,38 @@ void SMPProgram::Analyze(ProfilerInformation *pi) {
 		assert(NULL != CurrFunc);
 		pair<ea_t, SMPFunction *> TempFunc(FuncInfo->startEA, CurrFunc);
 		this->FuncMap.insert(TempFunc);
-		this->TempFuncMap.insert(TempFunc); // make a copy for processing
 		CurrFunc->Analyze();
 	} // end for (size_t FuncIndex = 0; ...) 
 
+	// Find any unshared fragments that were added to FuncMap before it was
+	//  discovered that they were unshared fragments of other functions, and
+	//  remove them. Put all valid functions into the TempFuncMap copy.
+	map<ea_t, SMPFunction*>::iterator MapIter = this->FuncMap.begin();
+	map<ea_t, SMPFunction*>::iterator NextMapIter = MapIter;
+	while (MapIter != this->FuncMap.end()) {
+		++NextMapIter;
+		CurrFunc = MapIter->second;
+		if (NULL == CurrFunc) {
+			this->FuncMap.erase(MapIter);
+		}
+		else if (this->UnsharedFragments.find(CurrFunc->GetFirstFuncAddr()) != this->UnsharedFragments.end()) {
+			this->FuncMap.erase(MapIter);
+		}
+		else {
+			pair<map<ea_t, SMPFunction*>::iterator, bool> InsertResult;
+			InsertResult = this->TempFuncMap.insert(*MapIter);  // make a copy for processing
+			assert(InsertResult.second);
+		}
+		MapIter = NextMapIter;
+	}
+
 #if SMP_COUNT_MEMORY_ALLOCATIONS
 	SMPFuncCount += this->FuncMap.size();
 	SMPFuncCount += this->TempFuncMap.size();
 #endif
+#if SMP_DEBUG
+	SMP_msg("INFO: Number of functions in FuncMap: %zu\n", this->FuncMap.size());
+#endif
 
 	while (!(this->TempFuncMap.empty())) {
 		this->PrioritizeCallGraph();
@@ -488,10 +523,6 @@ void SMPProgram::Analyze(ProfilerInformation *pi) {
 	this->ComputeGlobalSets();
 #endif
 
-#if SMP_DEBUG
-	SMP_msg("Number of functions in FuncMap: %zu\n", this->FuncMap.size());
-#endif
-
 	// In order to reduce memory consumption, emit the global data annotations now,
 	//  and then release the memory for the global data. Note that this means we
 	//  cannot presently apply type inference info to the global data table. If we
@@ -502,7 +533,6 @@ void SMPProgram::Analyze(ProfilerInformation *pi) {
 	this->GlobalNameMap.clear();
 
 	// LOOP 2: safe return address analysis.
-	map<ea_t, SMPFunction*>::iterator MapIter;
 	for (MapIter = this->FuncMap.begin(); MapIter != this->FuncMap.end(); ++MapIter) {
 		CurrFunc = MapIter->second;
 		if (NULL == CurrFunc) {
@@ -530,9 +560,8 @@ void SMPProgram::Analyze(ProfilerInformation *pi) {
 			continue;
 		}
 		CurrFunc->FreeUnusedMemory2(); // free memory
-#if 1
-		if (!(CurrFunc->HasUnresolvedIndirectJumps()
-			|| CurrFunc->HasSharedChunks())) {
+		if (CurrFunc->StackPtrAnalysisSucceeded() && (!(CurrFunc->HasUnresolvedIndirectJumps()
+			|| CurrFunc->HasSharedChunks()))) {
 			if (DebugFlag) {
 				SMP_msg("Performing LVA for %s.\n", CurrFunc->GetFuncName());
 			}
@@ -540,15 +569,10 @@ void SMPProgram::Analyze(ProfilerInformation *pi) {
 			if (DebugFlag) SMP_msg("Computing SSA.\n");
 			CurrFunc->ComputeSSA();
 			if (DebugFlag) SMP_msg("Finished SSA.\n");
-		} // end if not (unresolved indirect jumps or shared chunks)
-#endif
 
-		if (!(CurrFunc->HasUnresolvedIndirectJumps()
-			|| CurrFunc->HasSharedChunks())) {
 #if SMP_DEBUG_OPTIMIZATIONS
 			SMP_msg("Analyzing metadata for function %s\n", CurrFunc->GetFuncName());
 #endif
-			DebugFlag = (0 == strcmp("strchrnul", CurrFunc->GetFuncName()));
 			CurrFunc->AnalyzeMetadataLiveness();
 			if (DebugFlag) SMP_msg("Finished analyzing metadata.\n");
 		}
@@ -580,11 +604,11 @@ void SMPProgram::Analyze(ProfilerInformation *pi) {
 		size_t NumCallTargets = CurrFunc->GetNumCallTargets();
 		for (size_t i = 0; i < NumCallTargets; ++i) {
 			ea_t CallAddr = CurrFunc->GetCallTargetAddr(i);
-			SMPFunction *ChildInstance = FuncMap[CallAddr];
+			SMPFunction *ChildInstance = this->FindFunction(CallAddr);
 			if (!ChildInstance) {
 #if SMP_DEBUG_FUNC
 				// if a call target doesnt have a SMPFunction instance note it down
-				SMP_msg(" Function doesnt have SMPFunction instance at %x \n", CallAddr);
+				SMP_msg("ERROR: Function does not have SMPFunction instance at %x \n", CallAddr);
 #endif
 				continue;
 			}
@@ -601,7 +625,7 @@ void SMPProgram::Analyze(ProfilerInformation *pi) {
 #endif // 0 or 1
 
 #if SMP_USE_SWITCH_TABLE_INFO
-		if (CurrFunc->HasGoodRTLs() && !CurrFunc->HasUnresolvedIndirectJumps() && !CurrFunc->HasSharedChunks()) {
+		if (CurrFunc->StackPtrAnalysisSucceeded() && CurrFunc->HasGoodRTLs() && !CurrFunc->HasUnresolvedIndirectJumps() && !CurrFunc->HasSharedChunks()) {
 #else
 		if (CurrFunc->HasGoodRTLs() && !CurrFunc->HasIndirectJumps() && !CurrFunc->HasSharedChunks()) {
 #endif
@@ -612,8 +636,8 @@ void SMPProgram::Analyze(ProfilerInformation *pi) {
 			CurrFunc->InferTypes(true);
 			CurrFunc->FindRedundantMetadata();
 
-			// If no profiler annotations are available, save time.
-			if (0 < pi->GetProfilerAnnotationCount()) {
+			// Apply profiler information.
+			if (0 < pi->GetProfilerAnnotationCount()) { // If no profiler annotations are available, save time.
 				CurrFunc->ApplyProfilerInformation(pi);
 				CurrFunc->InferTypes(false);
 				CurrFunc->FindRedundantMetadata();
@@ -628,6 +652,9 @@ void SMPProgram::Analyze(ProfilerInformation *pi) {
 
 			// Infer fine-grained info (signedness, bit widths, etc.)
 			CurrFunc->InferFGInfo();
+			if (0 == strcmp("__mktime_internal", CurrFunc->GetFuncName())) {
+				CurrFunc->Dump();
+			}
 
 #if SMP_DEBUG_OPTIMIZATIONS_VERBOSE
 			if (DebugFlag) {
@@ -652,7 +679,7 @@ void SMPProgram::Analyze(ProfilerInformation *pi) {
 } // end of SMPProgram::Analyze()
 
 // Does chunk at ChunkAddr belong exclusively to FuncHead?
-// If so, return true and add ChunkAddr to ProcessedFragments set.
+// If so, return true and add ChunkAddr to UnsharedFragments set.
 bool SMPProgram::IsChunkUnshared(ea_t ChunkAddr, ea_t FuncHeadStart, ea_t FuncHeadEnd) {
 	bool Unshared = true;
 	SMP_xref_t CurrXrefs;
@@ -825,11 +852,11 @@ FuncType SMPProgram::RecurseAndMarkRetAdd(SMPFunction* FuncAttrib) {
 	vector<ea_t> CallTargets = FuncAttrib->GetCallTargets();
 	for (size_t i = 0; i < CallTargets.size(); i++) {
 		ea_t CallAddr = CallTargets[i];
-		SMPFunction* ChildInstance = FuncMap[CallAddr];
+		SMPFunction* ChildInstance = this->FindFunction(CallAddr);
 		if (!ChildInstance) {
 #if SMP_DEBUG_FUNC
 			// if a call target doesnt have a SMPFunction instance note it down
-			SMP_msg(" ERROR: Function does not have SMPFunction instance at %x \n", CallAddr);
+			SMP_msg("ERROR: Function does not have SMPFunction instance at %x \n", CallAddr);
 #endif
 			continue;
 		}
diff --git a/SMPProgram.h b/SMPProgram.h
index 685d0719..c42e0e5d 100644
--- a/SMPProgram.h
+++ b/SMPProgram.h
@@ -124,7 +124,8 @@ public:
 		return GlobalNameMap.end();
 	}
 	inline ProfilerInformation *GetProfInfo(void) { return ProfInfo; };
-	inline SMPFunction *GetFuncFromAddr(ea_t FirstAddr) { return FuncMap[FirstAddr]; };
+	SMPFunction* FindFunction(ea_t FirstAddr); // get function from first addr in function
+	inline size_t GetFuncCount(void) const { return FuncMap.size(); };
 	// Set methods
 	void ProfGranularityFinished(FILE *AnnotFile, FILE *InfoAnnotFile);  // notification from ProfilerInformation
 	bool InsertUnsharedFragment(ea_t TargetAddr); // Add code fragment starting address to set; return false if already in set, true otherwise
@@ -140,7 +141,6 @@ public:
 	bool IsChunkUnshared(ea_t ChunkAddr, ea_t FuncHeadStart, ea_t FuncHeadEnd); // Does chunk at ChunkAddr belong exclusively to FuncHead?
 								// If so, return true and add ChunkAddr to ProcessedFragments set.
 
-	inline SMPFunction* FindFunction(ea_t addr) { return FuncMap[addr]; }
 private:
 	// Data
 	bool ProfilerGranularityComplete;  // Profiler-based granularity inference complete
diff --git a/SMPStaticAnalyzer.cpp b/SMPStaticAnalyzer.cpp
index 25c3c02d..beb192ee 100644
--- a/SMPStaticAnalyzer.cpp
+++ b/SMPStaticAnalyzer.cpp
@@ -38,6 +38,7 @@ using namespace std;
 #include <set>
 
 #include <string>
+#include <ctime>
 
 #include <pro.h>
 #include <ida.hpp>
@@ -82,6 +83,8 @@ int FuncsProcessed = 0;
 #define SMP_FIND_ORPHANS 1  // find code outside of functions
 #define SMP_DEBUG_CODE_ORPHANS 1 // Detect whether we are causing code to be orphaned
 
+#define SMP_IDAP_RUN_DELAY 0  // Delay in IDAP_run() so we can attach debugger to process.
+
 #if SMP_DEBUG_CODE_ORPHANS
 set<ea_t> CodeOrphans;
 #endif
@@ -456,6 +459,19 @@ void IDAP_run(int arg) {
 	string AsmFileSuffix(".asm");
 	AsmFileName += AsmFileSuffix;
 
+		// For debugging, we can add a delay loop so we have time to attach gdb to the
+	// running process and set a breakpoint.
+#if SMP_IDAP_RUN_DELAY
+	time_t start;
+	time_t current;
+
+	time(&start);
+	printf("delay for 15 seconds.\n");
+	do {
+		time(&current);
+	} while(difftime(current,start) < 15.0);
+#endif
+
 	ea_t RecentAddr;
 #if SMP_DEBUG_CODE_ORPHANS
 	CodeOrphans.clear();
@@ -3131,7 +3147,7 @@ StackAlteration[NN_leave] = 1;               // High Level Procedure Exit
 StackAlteration[NN_leaved] = 1;              // High Level Procedure Exit        **
 StackAlteration[NN_leaveq] = 1;              // High Level Procedure Exit        **
 StackAlteration[NN_mov] = 1;                 // Move Data ; could be esp := ebp (deallocate stack frame) or esp := ebx (unknown)
-StackAlteration[NN_pop] = 4;                 // Pop a word from the Stack
+StackAlteration[NN_pop] = 1;                 // Pop a word from the Stack  ; could be 16-bit or 32-bit operand, etc.
 StackAlteration[NN_popaw] = 14;               // Pop all General Registers
 StackAlteration[NN_popa] = 28;                // Pop all General Registers
 StackAlteration[NN_popad] = 28;               // Pop all General Registers (use32)
@@ -3140,7 +3156,7 @@ StackAlteration[NN_popfw] = 2;               // Pop Stack into Flags Register
 StackAlteration[NN_popf] = 4;                // Pop Stack into Flags Register         **
 StackAlteration[NN_popfd] = 4;               // Pop Stack into Eflags Register        **
 StackAlteration[NN_popfq] = 8;               // Pop Stack into Rflags Register        **
-StackAlteration[NN_push] = -4;                // Push Operand onto the Stack
+StackAlteration[NN_push] = 1;                // Push Operand onto the Stack  ; could be 16-bit or 32-bit operand, etc.
 StackAlteration[NN_pushaw] = -14;              // Push all General Registers
 StackAlteration[NN_pusha] = -28;               // Push all General Registers
 StackAlteration[NN_pushad] = -28;              // Push all General Registers (use32)
-- 
GitLab