From a04dd1279347d94f53279efc131e23bd10b76c84 Mon Sep 17 00:00:00 2001
From: Clark Coleman <clc@zephyr-software.com>
Date: Mon, 24 Dec 2018 17:04:05 -0500
Subject: [PATCH] Improved induction variable identification for optimized
 binary code patterns.

---
 include/base/SMPDataFlowAnalysis.h |   4 +
 include/base/SMPFunction.h         |   5 +-
 include/base/SMPInstr.h            |   2 +-
 src/base/SMPDataFlowAnalysis.cpp   |  44 ++
 src/base/SMPFunction.cpp           | 855 +++++++++++++++++++++++------
 src/base/SMPInstr.cpp              |  18 +-
 6 files changed, 766 insertions(+), 162 deletions(-)

diff --git a/include/base/SMPDataFlowAnalysis.h b/include/base/SMPDataFlowAnalysis.h
index 8daf1a79..9ccf2969 100644
--- a/include/base/SMPDataFlowAnalysis.h
+++ b/include/base/SMPDataFlowAnalysis.h
@@ -1035,6 +1035,7 @@ public:
 	void AllocateBits(std::size_t Size); // allocate Size bits, initialized to zero.
 	void SetBit(std::size_t BitIndex);
 	void ResetBit(std::size_t BitIndex);
+	void UnionSets(const STARSBitSet &BitSet2);
 	// Query methods
 	bool IsAnyBitSet(void) const; // Returns false if all bits are zero, true otherwise.
 	int FindHighestBitSet(void) const; // Return highest index set; -1 if no bits set.
@@ -1048,6 +1049,9 @@ private:
 	// Methods
 }; // end class STARSBitSet
 
+STARSBitSet STARSBitSetIntersection(const STARSBitSet &BitSet1, const STARSBitSet &BitSet2);
+STARSBitSet STARSBitSetUnion(const STARSBitSet &BitSet1, const STARSBitSet &BitSet2);
+
 // Initialization routine for DFA category is STARS_Program_t::InitDFACategory().
 extern SMPitype DFACategory[STARS_NN_last + 1];
 
diff --git a/include/base/SMPFunction.h b/include/base/SMPFunction.h
index 91a94404..5ec25c2e 100644
--- a/include/base/SMPFunction.h
+++ b/include/base/SMPFunction.h
@@ -212,6 +212,7 @@ struct InductionVarFamily {
 	STARSDepIndVarVector DependentInductionVars;
 	STARSExpression *BIVInitExpr;
 	STARSExpression *BIVLimitExpr;
+	STARSBitSet BIVSSAClosure; // each bit set indicates its index value is an SSA number in BIV chains in loop
 };
 
 // We can have multiple basic induction vars for one loop, hence multiple families in a list.
@@ -945,6 +946,8 @@ private:
 	void DetectLoopInvariantDEFs2(void); // Collect a set of loop-invariant DEFs with the inst IDs of the DEFs.
 	bool IsUseLoopInvariantDEF(std::size_t LoopIndex, const STARSOpndTypePtr &UseOp, SMPInstr *UseInst); // Track UseOp to its DEF, see if loop-invariant for LoopIndex
 	void DetectLoopInductionVars(void); // Detect basic and dependent loop induction variable families for all loops
+	bool FindIVLiveRangeClosure(const std::size_t LoopIndex, const PhiSetIter PhiIter, STARSBitSet &SSAClosure); // Find SSAClosure from PhiIter for loop; return true if IV definition is satisfied
+	void DetectLoopInductionVars2(void); // Detect basic and dependent loop induction variable families for all loops; use binary-specific analyses
 	void FindDependentInductionVar(std::size_t LoopIndex, struct DependentInductionVar &DIV, STARSOpndTypePtr Add1, STARSOpndTypePtr Add2, STARSOpndTypePtr Mult1, STARSOpndTypePtr Mult2, SMPoperator RhsOperator, SMPInstr *DefInst); // pass in results of DefInst::IsDependentInductionVarArithmetic()
 	bool ReplaceLoopRegWithConst(std::size_t LoopIndex, STARSOpndTypePtr &RegOp, SMPInstr *UseInst); // If RegOp USE in UseInst is an SCCP constant, create an immediate operand for it.
 	void ComputeInArgConstValues(void); // Fill InArgConstValues with SCCP values from call sites.
@@ -957,7 +960,7 @@ private:
 	void CombineMemoryExprs(STARSMemWriteExprsList &MemWriteExprWidths, STARSExprSet &MemWriteExprs, std::vector<std::set<STARS_ea_t> > &StackPtrCopies); // Combine items in the expr set that have common regs, e.g. (RDI+1) and (RDI-2), into fewer set entries
 	bool CreateSPARKMemoryReadRangeExprs(std::size_t LoopIndex, bool RecordLoopRegs, std::set<int> &LoopRegHashes, STARSExprSet &MemWriteExprs, STARSMemWriteExprsList &MemWriteExprWidths, std::vector<std::set<STARS_ea_t> > &StackPtrCopiesVector); // Create a Memory range expression set for indirect or indexed memory writes in the loop; return true on success
 	bool ReplaceBIVWithExpr(std::size_t LoopIndex, const struct InductionVarFamily &IVFamily, STARSExpression *CurrExpr, bool InitCase); // Replace occurrences of IVFamily.BIV in CurrExpr with BIV InitExpr or LimitExpr
-	bool ReplaceAllBIVsWithExprs(std::size_t LoopIndex, STARSExpression *CurrExpr, bool InitCase, bool &changed); // Replace all BIVs in CurrExpr with lower or upper limit (depending on InitCase) exprs for BIV
+	bool ReplaceAllBIVsWithExprs(std::size_t LoopIndex, STARSExpression *CurrExpr, bool InitCase, bool &changed, std::set<int> &RegsReplaced); // Replace all BIVs in CurrExpr with lower or upper limit (depending on InitCase) exprs for BIV
 	bool ReplaceAllDIVsWithExprs(std::size_t LoopIndex, STARSExpression *CurrExpr, bool InitCase, bool &changed); // Replace all DIVs in CurrExpr with lower or upper limit (depending on InitCase) exprs for DIV
 	bool ReplaceAllIVsWithExprs(std::size_t LoopIndex, STARSExpression *CurrExpr, bool InitCase, bool &changed); // wrapper to call ReplaceAllBIVsWithExprs() and ReplaceAllDIVsWithExprs()
 	bool ExpandExprToInArgs(std::size_t LoopIndex, STARSExpression *CurrExpr, bool InitCase, std::set<STARS_ea_t> &StackPtrCopySet); // expand, replace IVs, simplify, until no more can be done
diff --git a/include/base/SMPInstr.h b/include/base/SMPInstr.h
index b76e63e3..9a54d8b8 100644
--- a/include/base/SMPInstr.h
+++ b/include/base/SMPInstr.h
@@ -836,7 +836,7 @@ public:
 	bool IsBasicInductionVarArithmetic(STARSOpndTypePtr &RhsOperand, SMPoperator &RhsOperator) const; // RTL is simple x := x op y where op is +, -, or *
 	// RTL is linear function Add1 + Mult1*Mult2 + Add2 or some portion of it, with nullptr returned for missing operands.
 	bool IsDependentInductionVarArithmetic(STARSOpndTypePtr &Mult1, STARSOpndTypePtr &Mult2, STARSOpndTypePtr &Add1, STARSOpndTypePtr &Add2, SMPoperator &RhsOperator);
-	bool IsAcceptableBIVOperation(void) const; // BIV arithmetic or other opcode acceptable for BIV definition in binaries
+	bool IsAcceptableBIVOperation(bool &RegSpilled, bool &RegUnspilled) const; // BIV arithmetic or other opcode acceptable for BIV definition in binaries
 	bool MDIsSubregMaskInst(std::size_t &BytesMasked); // is AND operation that masks off lower BytesMasked bytes
 	inline bool MDIsBitwiseNotOpcode(void) const { return (STARS_NN_not == GetIDAOpcode()); };
 	inline bool MDIsBitwiseAndOpcode(void) const { return (STARS_NN_and == GetIDAOpcode()); };
diff --git a/src/base/SMPDataFlowAnalysis.cpp b/src/base/SMPDataFlowAnalysis.cpp
index 0dc8b7f6..89117f37 100644
--- a/src/base/SMPDataFlowAnalysis.cpp
+++ b/src/base/SMPDataFlowAnalysis.cpp
@@ -3976,6 +3976,50 @@ size_t STARSBitSet::CountSetBits(void) const {
 	return BitCount;
 } // end of STARSBitSet::CountSetBits()
 
+void STARSBitSet::UnionSets(const STARSBitSet &BitSet2) {
+	size_t NumBits = this->GetNumBits();
+	assert(NumBits == BitSet2.GetNumBits());
+
+	for (size_t index = 0; index < NumBits; ++index) {
+		if (BitSet2.GetBit(index)) {
+			this->SetBit(index);
+		}
+	}
+
+	return;
+} // end of STARSBitSet::UnionSets()
+
+STARSBitSet STARSBitSetIntersection(const STARSBitSet &BitSet1, const STARSBitSet &BitSet2) {
+	size_t NumBits = BitSet1.GetNumBits();
+	assert(NumBits == BitSet2.GetNumBits());
+	STARSBitSet ReturnBitSet;
+	ReturnBitSet.AllocateBits(NumBits);
+
+	for (size_t index = 0; index < NumBits; ++index) {
+		if (BitSet1.GetBit(index) && BitSet2.GetBit(index)) {
+			ReturnBitSet.SetBit(index);
+		}
+	}
+
+	return ReturnBitSet;
+} // end of STARSBitSetIntersection()
+
+STARSBitSet STARSBitSetUnion(const STARSBitSet &BitSet1, const STARSBitSet &BitSet2) {
+	size_t NumBits = BitSet1.GetNumBits();
+	assert(NumBits == BitSet2.GetNumBits());
+	STARSBitSet ReturnBitSet;
+	ReturnBitSet.AllocateBits(NumBits);
+
+	for (size_t index = 0; index < NumBits; ++index) {
+		if (BitSet1.GetBit(index) || BitSet2.GetBit(index)) {
+			ReturnBitSet.SetBit(index);
+		}
+	}
+
+	return ReturnBitSet;
+} // end of STARSBitSetUnion()
+
+
 // Map system or library call name to FG info about its return value.
 map<string, struct FineGrainedInfo> ReturnRegisterTypeMap;
 
diff --git a/src/base/SMPFunction.cpp b/src/base/SMPFunction.cpp
index d66e371f..f05af7a1 100644
--- a/src/base/SMPFunction.cpp
+++ b/src/base/SMPFunction.cpp
@@ -4209,10 +4209,16 @@ int SMPFunction::GetBlockNumForPhiDef(const STARSOpndTypePtr &DefOp, int SSANum)
 
 // Retrieve block iterator for InstAddr from InstBlockMap; assert if failure
 SMPBasicBlock *SMPFunction::GetBlockFromInstAddr(STARS_ea_t InstAddr) {
-	map<STARS_ea_t, SMPBasicBlock *>::iterator MapEntry;
-	MapEntry = this->InstBlockMap.find(InstAddr);
-	assert(MapEntry != this->InstBlockMap.end());
-	return MapEntry->second;
+	if (STARS_IsBlockNumPseudoID(InstAddr)) {
+		int BlockNum = STARS_GetBlockNumFromPseudoID(InstAddr);
+		return this->GetBlockByNum((size_t) BlockNum);
+	}
+	else {
+		map<STARS_ea_t, SMPBasicBlock *>::iterator MapEntry;
+		MapEntry = this->InstBlockMap.find(InstAddr);
+		assert(MapEntry != this->InstBlockMap.end());
+		return MapEntry->second;
+	}
 }
 
 // return -1 if not in InstBlockMap, block # otherwise
@@ -6599,7 +6605,7 @@ void SMPFunction::ComputeSSA(void) {
 		if (this->HasStructuredControlFlow()) {
 			this->DetectLoopInvariantDEFs();
 			if (global_STARS_program->ShouldSTARSPerformDeepLoopAnalyses()) {
-				this->DetectLoopInductionVars();
+				this->DetectLoopInductionVars2();
 			}
 		}
 	}
@@ -7530,6 +7536,456 @@ void SMPFunction::DetectLoopInductionVars(void) {
 	return;
 } // end of SMPFunction::DetectLoopInductionVars()
 
+struct IVTracker {
+	STARSBitSet CurrSSAClosure;
+	bool ValidIVOpsOnly;
+};
+
+list<struct IVTracker>::iterator FindSSAClosure(list<struct IVTracker> &IVTrackerList, const int SSANum1, const int SSANum2) {
+	list<struct IVTracker>::iterator MatchingIter;
+	for (MatchingIter = IVTrackerList.begin(); MatchingIter != IVTrackerList.end(); ++MatchingIter) {
+		if ((*MatchingIter).CurrSSAClosure.GetBit((size_t)SSANum1)
+			|| (*MatchingIter).CurrSSAClosure.GetBit((size_t)SSANum2)) {
+			return MatchingIter;
+		}
+	}
+
+	return IVTrackerList.end();
+} // end of FindSSAClosure()
+
+// Find SSAClosure from PhiIter for loop; return true if IV definition is satisfied
+bool SMPFunction::FindIVLiveRangeClosure(const size_t LoopIndex, const PhiSetIter PhiIter, STARSBitSet &SSAClosure) {
+	bool ValidIVOperations = false;
+	list<size_t> BlockNumList;
+	this->BuildLoopBlockList(LoopIndex, BlockNumList);
+	int HeaderBlockNum = this->LoopHeadBlockNumbers[LoopIndex];
+
+	// Start the closure with the SSA numbers in the Phi function.
+	list<struct IVTracker> IVTrackerList; // for all working closures except the Phi closure.
+	struct IVTracker PhiIVTracker;
+	PhiIVTracker.ValidIVOpsOnly = true;
+	assert(0 <= this->GetMaxStackSSANum());
+	PhiIVTracker.CurrSSAClosure.AllocateBits((size_t) this->GetMaxStackSSANum());
+	bool RegSpilled = false;
+	bool RegUnspilled = false;
+
+	STARSOpndTypePtr PhiOp = PhiIter->GetAnyOp();
+	// For all instructions in the loop, find SSA nums for the PhiOp, 
+	//  whether USE or DEF. Accumulate closures, merge closures when
+	//  they overlap, track whether operations are acceptable for an IV.
+	for (const size_t BlockNum : BlockNumList) {
+		SMPBasicBlock *CurrBlock = this->GetBlockByNum(BlockNum);
+		// Get Phi function DEF and USE SSA numbers.
+		PhiSetIter CurrPhiIter = CurrBlock->FindPhi(PhiOp);
+		if (CurrPhiIter != CurrBlock->GetLastPhi()) {
+			// See if this set of SSA nums overlaps the PhiIVTracker, or this is the header block.
+			bool MainPhiTracker = (BlockNum == ((size_t) HeaderBlockNum));
+			if (!MainPhiTracker) {
+				for (size_t PhiUseIndex = 0; PhiUseIndex < CurrPhiIter->GetPhiListSize(); ++PhiUseIndex) {
+					int SSANum = CurrPhiIter->GetUseSSANum(PhiUseIndex);
+					if (PhiIVTracker.CurrSSAClosure.GetBit((size_t)SSANum)) {
+						MainPhiTracker = true;
+						break;
+					}
+				}
+			}
+			if (MainPhiTracker) {
+				int SSANum = CurrPhiIter->GetDefSSANum();
+				PhiIVTracker.CurrSSAClosure.SetBit((size_t)SSANum);
+				for (size_t PhiUseIndex = 0; PhiUseIndex < CurrPhiIter->GetPhiListSize(); ++PhiUseIndex) {
+					SSANum = CurrPhiIter->GetUseSSANum(PhiUseIndex);
+					PhiIVTracker.CurrSSAClosure.SetBit((size_t)SSANum);
+				}
+			}
+			else {
+				// Find in another tracking set, or create a new one.
+				int SSANum1 = CurrPhiIter->GetUseSSANum(0);
+				int SSANum2 = CurrPhiIter->GetUseSSANum(1);
+				list<struct IVTracker>::iterator TrackIter = FindSSAClosure(IVTrackerList, SSANum1, SSANum2);
+				if (TrackIter != IVTrackerList.end()) {
+					int SSANum = CurrPhiIter->GetDefSSANum();
+					(*TrackIter).CurrSSAClosure.SetBit((size_t)SSANum);
+					for (size_t PhiUseIndex = 0; PhiUseIndex < CurrPhiIter->GetPhiListSize(); ++PhiUseIndex) {
+						SSANum = CurrPhiIter->GetUseSSANum(PhiUseIndex);
+						(*TrackIter).CurrSSAClosure.SetBit((size_t)SSANum);
+					}
+				}
+				else { // Create new tracking set.
+					struct IVTracker NewTrackingSet;
+					NewTrackingSet.CurrSSAClosure.AllocateBits((size_t) this->GetMaxStackSSANum());
+					NewTrackingSet.ValidIVOpsOnly = true;
+					int SSANum = CurrPhiIter->GetDefSSANum();
+					NewTrackingSet.CurrSSAClosure.SetBit((size_t)SSANum);
+					for (size_t PhiUseIndex = 0; PhiUseIndex < CurrPhiIter->GetPhiListSize(); ++PhiUseIndex) {
+						SSANum = CurrPhiIter->GetUseSSANum(PhiUseIndex);
+						NewTrackingSet.CurrSSAClosure.SetBit((size_t)SSANum);
+					}
+					IVTrackerList.push_back(NewTrackingSet);
+				}
+			}
+		} // end if found in current block phi functions
+
+		if (!CurrBlock->IsVarKill(PhiOp))
+			continue; // save time
+
+		for (vector<SMPInstr *>::const_iterator InstIter = CurrBlock->GetFirstConstInst();
+			InstIter != CurrBlock->GetLastConstInst(); ++InstIter) {
+			SMPInstr *CurrInst = (*InstIter);
+			STARSDefUseIter DefIter = CurrInst->FindDef(PhiOp);
+			if (DefIter != CurrInst->GetLastDef()) {
+				int DefSSANum = DefIter->GetSSANum();
+				// See if PhiOp is also a USE, find its closure set.
+				STARSDefUseIter UseIter = CurrInst->FindUse(PhiOp);
+				int UseSSANum = DefSSANum; // lets us use FindSSAClosure() later
+				if (UseIter != CurrInst->GetLastUse()) {
+					UseSSANum = UseIter->GetSSANum();
+				}
+				if ((0 > UseSSANum) || (0 > DefSSANum))
+					return false;
+				STARSOpndTypePtr RightOp = nullptr;
+				SMPoperator CurrOperator;
+				bool CurrSpilled = false;
+				bool CurrUnspilled = false;
+				bool CurrValidIVOp = CurrInst->IsAcceptableBIVOperation(CurrSpilled, CurrUnspilled) || CurrInst->IsBasicInductionVarArithmetic(RightOp, CurrOperator);
+				// Most common case is we only have one closure.
+				if (PhiIVTracker.CurrSSAClosure.GetBit((size_t)UseSSANum) || PhiIVTracker.CurrSSAClosure.GetBit((size_t)DefSSANum)) {
+					// Make sure both bits are set.
+					PhiIVTracker.CurrSSAClosure.SetBit((size_t)UseSSANum);
+					PhiIVTracker.CurrSSAClosure.SetBit((size_t)DefSSANum);
+					RegSpilled = (CurrSpilled || RegSpilled);
+					RegUnspilled = (CurrUnspilled || RegUnspilled);
+					if (!CurrValidIVOp)
+						PhiIVTracker.ValidIVOpsOnly = false;
+				}
+				else {
+					// Locate the matching closure, or create a new one.
+					list<struct IVTracker>::iterator MatchingIter = FindSSAClosure(IVTrackerList, DefSSANum, UseSSANum);
+					if (MatchingIter == IVTrackerList.end()) {
+						// Need new closure set.
+						struct IVTracker NewIVClosure;
+						NewIVClosure.CurrSSAClosure.AllocateBits((size_t) this->GetMaxStackSSANum());
+						NewIVClosure.ValidIVOpsOnly = CurrValidIVOp;
+						IVTrackerList.push_back(NewIVClosure);
+					}
+					else { // Matched a closure
+						// Ensure both SSA Num bits are set.
+						(*MatchingIter).CurrSSAClosure.SetBit((size_t) DefSSANum);
+						(*MatchingIter).CurrSSAClosure.SetBit((size_t) UseSSANum);
+						// Update valid IV ops flag if needed.
+						(*MatchingIter).ValidIVOpsOnly = (*MatchingIter).ValidIVOpsOnly && CurrValidIVOp;
+					}
+				}		
+			}
+		} // end for all inst iters in block
+	} // end for all blocks in loop
+
+	// See if any of the closures need to be merged.
+	list<struct IVTracker>::iterator CurrIter = IVTrackerList.begin();
+	while (CurrIter != IVTrackerList.end()) {
+		list<struct IVTracker>::iterator NextIter = CurrIter;
+		++NextIter;
+		while (NextIter != IVTrackerList.end()) {
+			STARSBitSet IntersectionSet = STARSBitSetIntersection((*CurrIter).CurrSSAClosure, (*NextIter).CurrSSAClosure);
+			if (IntersectionSet.IsAnyBitSet()) { // some intersection found
+				// Merge list entries.
+				(*CurrIter).CurrSSAClosure.UnionSets((*NextIter).CurrSSAClosure);
+				(*CurrIter).ValidIVOpsOnly = ((*CurrIter).ValidIVOpsOnly && (*NextIter).ValidIVOpsOnly);
+				NextIter = IVTrackerList.erase(NextIter);
+			}
+			else {
+				++NextIter;
+			}
+		}
+		++CurrIter;
+	}
+
+	// Finally, see if the Phi closure needs to be merged with any of the remaining closures.
+	CurrIter = IVTrackerList.begin();
+	while (CurrIter != IVTrackerList.end()) {
+		STARSBitSet IntersectionSet = STARSBitSetIntersection((*CurrIter).CurrSSAClosure, PhiIVTracker.CurrSSAClosure);
+		if (IntersectionSet.IsAnyBitSet()) { // some intersection found
+			// Merge list entries.
+			PhiIVTracker.CurrSSAClosure.UnionSets((*CurrIter).CurrSSAClosure);
+			PhiIVTracker.ValidIVOpsOnly = (PhiIVTracker.ValidIVOpsOnly && (*CurrIter).ValidIVOpsOnly);
+		}
+		++CurrIter;
+	}
+
+	// Should we demand perfect matching of RegSpilled and RegUnspilled? Track them in
+	//  each element of the IVTrackerList?
+	ValidIVOperations = PhiIVTracker.ValidIVOpsOnly;
+	SSAClosure = PhiIVTracker.CurrSSAClosure;
+
+	return ValidIVOperations;
+} // end of SMPFunction::FindIVLiveRangeClosure()
+
+// Detect basic and dependent loop induction variable families for all loops.
+//  Use analyses suitable for binaries, in which register assignment could
+//  make use of a register in a live range that does not overlap the live range
+//  of the induction var operations, e.g.:
+//
+//  phi function for reg at loop header
+//  possible use of phi def
+//  spill reg to stack location
+//  re-use reg in all kinds of ways, not as an IV
+//  restore reg from spill location
+//  do induction var arithmetic on reg
+//
+//  Textbook definitions of an IV are based on source code and don't
+//  have to deal with register assignment, split live ranges, etc.
+//  A textbook algorithm will disqualify the reg as an IV due to
+//  the live range "re-use reg in all kinds of ways, not as an IV" above.
+void SMPFunction::DetectLoopInductionVars2(void) {
+	bool UseFP = this->UsesFramePointer();
+	bool VerboseOutput = global_stars_interface->VerboseLoopsMode();
+
+	// Initialize induction var vector with entries that have BIV*0 and SMP_SSA_UNINIT as markers
+	this->LoopInductionVars.resize(this->LoopCount);
+	struct InductionVarTriple DummyBIV;
+	SMPInstr *FirstInst = this->GetInstFromAddr(this->GetFirstFuncAddr());
+	STARSOpndTypePtr ZeroOp = FirstInst->MakeImmediateOpnd(0);
+	DefOrUse MultUse(ZeroOp, UNKNOWN);
+	DummyBIV.Multiplier = MultUse;
+	struct InductionVarFamily DummyFamily;
+	DummyFamily.BIVIncomingSSANum = SMP_SSA_UNINIT;
+	DummyFamily.BasicInductionVar = DummyBIV;
+	DummyFamily.BIVInitExpr = nullptr;
+	DummyFamily.BIVLimitExpr = nullptr;
+	for (size_t LoopIndex = 0; LoopIndex < this->LoopCount; ++LoopIndex) {
+		this->LoopInductionVars[LoopIndex].push_back(DummyFamily);
+	}
+
+	// Go through the Phi functions in the header blocks for all loops to search for basic induction var candidates.
+	for (size_t LoopIndex = 0; LoopIndex < this->LoopHeadBlockNumbers.size(); ++LoopIndex) {
+		int HeaderBlockNum = this->LoopHeadBlockNumbers[LoopIndex];
+		bool FoundBIV = false;
+		list<size_t> BlockNumList;
+		this->BuildLoopBlockList(LoopIndex, BlockNumList);
+
+		SMPBasicBlock *HeaderBlock = this->GetBlockByNum((size_t)HeaderBlockNum);
+		// Look at all register Phi functions in the HeaderBlock.
+		for (PhiSetIter PhiIter = HeaderBlock->GetFirstPhi(); PhiIter != HeaderBlock->GetLastPhi(); ++PhiIter) {
+			STARSOpndTypePtr PhiDefOp = PhiIter->GetAnyOp();
+			if (!MDIsDataFlowOpnd(PhiDefOp, UseFP))
+				continue;
+
+			STARSBitSet SSAClosure;
+			SSAClosure.AllocateBits((size_t) this->GetMaxStackSSANum());
+			if (!FindIVLiveRangeClosure(LoopIndex, PhiIter, SSAClosure))
+				continue;
+
+			struct InductionVarTriple BIVTriple;
+			struct InductionVarFamily CurrentFamily;
+			size_t PhiUseListSize = PhiIter->GetPhiListSize();
+			CurrentFamily.BIVInitExpr = nullptr;
+			CurrentFamily.BIVLimitExpr = nullptr;
+			CurrentFamily.BIVIncomingSSANum = SMP_SSA_UNINIT;
+			size_t InsideBIVFoundCount = 0;
+			STARS_ea_t InsideDefAddr = STARS_BADADDR;
+			STARS_ea_t OutsideDefAddr = STARS_BADADDR;
+			int OutsideSSANum = -1;
+			STARS_ea_t UseDefAddr = STARS_BADADDR;
+
+			// Find outside the loop DEF.
+			for (size_t index = 0; index < PhiUseListSize; ++index) {
+				int SSANum = PhiIter->GetUseSSANum(index);
+				STARS_ea_t DefAddr = this->GetGlobalDefAddr(PhiDefOp, SSANum);
+				SMPBasicBlock *DefBlock = this->GetBlockFromInstAddr(DefAddr);
+				if (!this->IsBlockInLoop(DefBlock->GetNumber(), LoopIndex)) {
+					OutsideDefAddr = DefAddr;
+					OutsideSSANum = SSANum;
+					break;
+				}
+			}
+
+			if (0 > OutsideSSANum)
+				continue; // not valid IV
+
+			for (const size_t BlockNum : BlockNumList) {
+				SMPBasicBlock *CurrBlock = this->GetBlockByNum(BlockNum);
+				if (!CurrBlock->IsVarKill(PhiDefOp))
+					continue; // save time
+
+				for (vector<SMPInstr *>::const_iterator InstIter = CurrBlock->GetFirstConstInst();
+					InstIter != CurrBlock->GetLastConstInst(); ++InstIter) {
+					SMPInstr *CurrInst = (*InstIter);
+					STARS_ea_t InstAddr = CurrInst->GetAddr();
+					STARSDefUseIter DefIter = CurrInst->FindDef(PhiDefOp);
+					if (DefIter != CurrInst->GetLastDef()) {
+						int DefSSANum = DefIter->GetSSANum();
+						assert(0 <= DefSSANum);
+						if (!SSAClosure.GetBit((size_t) DefSSANum))
+							continue; // not part of IV live range
+						CurrentFamily.BIVInsideLoopDefAddrs.push_back(InstAddr);
+
+						STARSOpndTypePtr SecondOp = nullptr;
+						SMPoperator BIVOperator = SMP_NULL_OPERATOR;
+						STARSDefUseIter SecondOpDefIter;
+						if (CurrInst->IsBasicInductionVarArithmetic(SecondOp, BIVOperator)) {
+							// Last question: Is the "y" operand in x := x op y loop-invariant?
+							bool BIVarFound = ((nullptr != SecondOp) && SecondOp->IsImmedOp());
+							int SecondOpSSANum = SMP_SSA_UNINIT;
+							if (!BIVarFound && MDIsDataFlowOpnd(SecondOp, UseFP) && this->IsGlobalName(SecondOp)) {
+								STARSOpndTypePtr SearchOp = CloneIfNecessary(SecondOp, UseFP);
+								CanonicalizeOpnd(SearchOp);
+								STARSDefUseIter SecondOpUseIter = CurrInst->FindUse(SearchOp);
+								assert(SecondOpUseIter != CurrInst->GetLastUse());
+								SecondOpSSANum = SecondOpUseIter->GetSSANum();
+
+								// Last chance to detect BIV: Is SecondOp a USE of a loop-invariant DEF?
+								//  Note that block-local names are loop-variant and are already excluded.
+								STARS_ea_t SecondOpDefAddr = CurrBlock->GetDefAddrFromUseAddr(SearchOp, InsideDefAddr, SecondOpUseIter->GetSSANum(), false);
+								assert(STARS_BADADDR != SecondOpDefAddr);
+								int SecondOpDefBlockNum = SMP_BLOCKNUM_UNINIT;
+								bool SecondOpDEFIsInst = false;
+								if (STARS_IsSSAMarkerPseudoID(SecondOpDefAddr)) { // SSA Marker inst
+									SecondOpDefBlockNum = 0;
+									assert(SecondOpSSANum == 0);
+								}
+								else if (STARS_IsBlockNumPseudoID(SecondOpDefAddr)) {
+									SecondOpDefBlockNum = STARS_GetBlockNumFromPseudoID(SecondOpDefAddr);
+								}
+								else {
+									SecondOpDEFIsInst = true;
+									SecondOpDefBlockNum = this->GetBlockNumFromInstAddr(SecondOpDefAddr);
+								}
+								if (this->IsBlockInLoop(SecondOpDefBlockNum, LoopIndex)) {
+									if (SecondOpDEFIsInst) {
+										// See if SecondOpDEF inst is on the loop-invariant DEFs list.
+										if (this->LoopInvariantDEFs[LoopIndex].find(SecondOpDefAddr) != this->LoopInvariantDEFs[LoopIndex].end()) {
+											BIVarFound = true;
+										}
+									}
+								}
+								else { // DEF of SecondOp is outside the loop entirely
+									BIVarFound = true;
+								}
+							}
+							if (BIVarFound) {
+								DefOrUse PhiDef(PhiDefOp, PhiIter->GetDefType(), PhiIter->GetDefSSANum());
+								BIVTriple.InductionVar = PhiDef;
+								BIVTriple.SubtractAddend = (SMP_SUBTRACT == BIVOperator);
+								if ((SMP_ADD == BIVOperator) || (SMP_SUBTRACT == BIVOperator)) {
+									// Adding an operand to itself is a strength reduction of multiplying by two.
+									if (IsEqOpIgnoreBitwidth(PhiDefOp, SecondOp)) {
+										if (SMP_ADD == BIVOperator) { // SMP_SUBTRACT would be zeroing out, not a progression.
+											// Make triple: 2*InductionVar + 0
+											STARSOpndTypePtr MultiplierOp = CurrInst->MakeImmediateOpnd(2);
+											DefOrUse MultUse(MultiplierOp, NUMERIC);
+											BIVTriple.Multiplier = MultUse;
+											STARSOpndTypePtr AddendOp = CurrInst->MakeImmediateOpnd(0);
+											DefOrUse AddendUse(AddendOp, NUMERIC);
+											BIVTriple.Addend = AddendUse;
+										}
+									}
+									else {
+										// Make triple: 1*InductionVar + SecondOp
+										DefOrUse AddendUse(SecondOp, NUMERIC, SecondOpSSANum);
+										BIVTriple.Addend = AddendUse;
+										STARSOpndTypePtr MultiplierOp = CurrInst->MakeImmediateOpnd(1);
+										DefOrUse MultUse(MultiplierOp, NUMERIC);
+										BIVTriple.Multiplier = MultUse;
+									}
+								}
+								else if ((SMP_S_LEFT_SHIFT == BIVOperator) || (SMP_U_LEFT_SHIFT == BIVOperator)) {
+									// Must be a shift left by constant number of bits, same as multiply.
+									assert(SecondOp->IsImmedOp());
+									// Make triple: (1 << SecondOp)*InductionVar + 0
+									STARS_uval_t ShiftCount = SecondOp->GetImmedValue();
+									STARS_uval_t MultiplierValue = (1 << ShiftCount);
+									STARSOpndTypePtr AddendOp = CurrInst->MakeImmediateOpnd(0);
+									DefOrUse AddendUse(AddendOp, NUMERIC);
+									BIVTriple.Addend = AddendUse;
+									STARSOpndTypePtr MultiplierOp = CurrInst->MakeImmediateOpnd(MultiplierValue);
+									DefOrUse MultUse(MultiplierOp, NUMERIC, SecondOpSSANum);
+									BIVTriple.Multiplier = MultUse;
+								}
+								else {
+									assert((SMP_U_MULTIPLY == BIVOperator) || (SMP_S_MULTIPLY == BIVOperator));
+									// Make triple: SecondOp*InductionVar + 0
+									STARSOpndTypePtr AddendOp = CurrInst->MakeImmediateOpnd(0);
+									DefOrUse AddendUse(AddendOp, NUMERIC);
+									BIVTriple.Addend = AddendUse;
+									DefOrUse MultUse(SecondOp, NUMERIC, SecondOpSSANum);
+									BIVTriple.Multiplier = MultUse;
+								}
+								if (0 < InsideBIVFoundCount) { // not first BIV candidate
+									// Must match previous candidates.
+									if (EqualInductionVars(BIVTriple, CurrentFamily.BasicInductionVar)) {
+										// All is well; no need to update.
+										FoundBIV = true;
+										++InsideBIVFoundCount;
+									}
+									else {
+										FoundBIV = false;
+										SMP_msg("ERROR: Non-matching BIV candidates in loop %d in func %s\n",
+											LoopIndex, this->GetFuncName());
+										break;
+									}
+								}
+								else { // First BIV candidate
+									CurrentFamily.BIVIncomingSSANum = OutsideSSANum;
+									CurrentFamily.BIVIncomingDefAddr = OutsideDefAddr;
+									CurrentFamily.BasicInductionVar = BIVTriple;
+									FoundBIV = true;
+									++InsideBIVFoundCount;
+								}
+								if (FoundBIV && VerboseOutput) {
+									SMP_msg("INFO: Basic Induction Var for func at %llx for Loop %d :", (uint64_t) this->GetFirstFuncAddr(), LoopIndex);
+									DumpInductionVar(BIVTriple);
+								}
+							}
+						} // end if BasicInductionVarArithmetic found
+					}
+				} // end for all insts in block
+			} // end for all blocks in loop
+			if (0 <= CurrentFamily.BIVIncomingSSANum) { // valid IV family
+				CurrentFamily.BIVSSAClosure = SSAClosure;
+				this->LoopInductionVars[LoopIndex].push_back(CurrentFamily);
+				SMP_msg("INFO: BIVFoundCount success for func at %llx for Loop %d \n", (uint64_t) this->GetFirstFuncAddr(), LoopIndex);
+			}
+		} // end for all Phi functions in header block
+		if (!FoundBIV) {
+			SMP_msg("ERROR: LOOP: BIV not found for loop %d at %llx in func %s\n", LoopIndex,
+				(uint64_t)HeaderBlock->GetFirstAddr(), this->GetFuncName());
+			++STARS_LoopInductionVarIDFailures;
+		}
+		else {
+			++STARS_LoopInductionVarIDSuccesses;
+		}
+	} // end for all loops
+
+	// Look for dependent induction variables, which are linear functions of other induction variables.
+	list<size_t> BlockList;
+	for (size_t LoopIndex = 0; LoopIndex < this->LoopHeadBlockNumbers.size(); ++LoopIndex) {
+		this->BuildLoopBlockList(LoopIndex, BlockList);
+		for (list<size_t>::const_iterator BlockIter = BlockList.cbegin(); BlockIter != BlockList.cend(); ++BlockIter) {
+			SMPBasicBlock *CurrBlock = this->GetBlockByNum(*BlockIter);
+			for (vector<SMPInstr *>::iterator InstIter = CurrBlock->GetFirstInst(); InstIter != CurrBlock->GetLastInst(); ++InstIter) {
+				SMPInstr *CurrInst = (*InstIter);
+				STARSOpndTypePtr Mult1 = nullptr, Mult2 = nullptr, Add1 = nullptr, Add2 = nullptr;
+				SMPoperator RhsOperator = SMP_NULL_OPERATOR;
+
+				if (CurrInst->IsDependentInductionVarArithmetic(Mult1, Mult2, Add1, Add2, RhsOperator)) {
+					// See if one of Mult1, Mult2, Add1 or Add2 is a basic induction variable.
+					struct DependentInductionVar DIV;
+					this->FindDependentInductionVar(LoopIndex, DIV, Add1, Add2, Mult1, Mult2, RhsOperator, CurrInst);
+					if (VerboseOutput && (STARS_BADADDR != DIV.DIVDefAddr)) {
+						SMP_msg("INFO: Dependent Induction Variable for loop # %d found at %llx\n", LoopIndex, (uint64_t)CurrInst->GetAddr());
+						PrintOperand(DIV.DIV.GetOp());
+						SMP_msg("\n");
+						DumpInductionVar(DIV.IVExpr);
+					}
+				}
+			}
+		}
+
+		BlockList.clear();
+	}
+
+	return;
+} // end of SMPFunction::DetectLoopInductionVars2()
+
 // Is CurrOp a BIV or DIV for loop with LoopIndex? return iterator and position in family if true,
 //  where we signal BIV with FamilyIndex == 0 and DIV with FamilyIndex == 1+vectorindex.
 //  Do not modify ListIter or FamilyIndex if we are returning false.
@@ -9191,7 +9647,7 @@ bool SMPFunction::ReplaceBIVWithExpr(std::size_t LoopIndex, const struct Inducti
 } // end of SMPFunction::ReplaceBIVWithExpr()
 
 // Replace all BIVs in CurrExpr with lower or upper limit (depending on InitCase) exprs for BIV
-bool SMPFunction::ReplaceAllBIVsWithExprs(std::size_t LoopIndex, STARSExpression *CurrExpr, bool InitCase, bool &changed) {
+bool SMPFunction::ReplaceAllBIVsWithExprs(std::size_t LoopIndex, STARSExpression *CurrExpr, bool InitCase, bool &changed, set<int> &RegsReplaced) {
 	bool success = true;
 	bool UseFP = this->UsesFramePointer();
 	size_t FamilyIndex;
@@ -9212,99 +9668,134 @@ bool SMPFunction::ReplaceAllBIVsWithExprs(std::size_t LoopIndex, STARSExpression
 						// Matched. Replace CurrExpr->LeftOperand with InitExpr or LimitExpr->GetRightTree()
 						//  from the BIV matched.
 						//  For LimitExpr, the LeftOperand is compared to the RightOperand or RightTree, so the right side is the limit.
-						if (InitCase) {
-							if (nullptr == IVFamily.BIVInitExpr) {
-								assert(0 <= IVLoopIndex);
-								success = false;
-								SMP_msg("ERROR: null IVFamily.BIVInitExpr: IVFamily Dump follows.\n");
-								DumpInductionVarFamily(IVFamily);
+
+						// Avoid infinite recursion by seeing if this operand has already been replaced.
+						bool AlreadyReplaced = false;
+						if (CurrOp->IsRegOp()) {
+							int RegHashIndex = HashGlobalNameAndSSA(CurrOp, CurrSSANum);
+							if (RegsReplaced.find(RegHashIndex) != RegsReplaced.cend()) {
+								success = true;
+								AlreadyReplaced = true;
 							}
 							else {
-								STARSExpression *ReplaceExpr = IVFamily.BIVInitExpr;
-
-								// The InitExpr for an IV can be part of the InitExpr
-								//  for a memory access. Avoid infinite recursion by seeing
-								//  if we have matching exprs.
-								if ((*ReplaceExpr < *CurrExpr) || (*CurrExpr < *ReplaceExpr)) {
-									STARSExpression *FinalExpr = ReplaceExpr->Clone();
-									CurrExpr->SetLeftTree(FinalExpr);
-									CurrExpr->SetLeftOperand(nullptr);
-									CurrExpr->SetParentInst(FinalExpr->GetParentInst());
-									changed = true;
-									success = this->ReplaceAllBIVsWithExprs(LoopIndex, CurrExpr->GetLeftTree(), InitCase, changed); // recurse
-								}
-								else { // Exprs are equal
-									success = true; // no more replacing can be done
-								}
+								(void) RegsReplaced.insert(RegHashIndex);
 							}
 						}
-						else { // LimitExpr case
-							if (nullptr == IVFamily.BIVLimitExpr) {
-								assert(0 <= IVLoopIndex);
-								success = false;
-								SMP_msg("ERROR: null IVFamily.BIVLimitExpr: IVFamily Dump follows.\n");
-								DumpInductionVarFamily(IVFamily);
-							}
-							else {
-								// NOTE: There are two cases. If BIVLimitExpr is for the BIV that terminates
-								//  the loop, then it will have a relational operator, e.g. RCX < k. If BIVLimitExpr
-								//  is for a secondary BIV, it will have the form RSI + k, which means "incoming value
-								//  of RSI plus k," where we need the InitExpr for RSI to determine the actual limit.
-								if (IsRelationalOperator(IVFamily.BIVLimitExpr->GetOperator())) {
-									if (IVFamily.BIVLimitExpr->HasRightSubTree()) {
-										STARSExpression *ReplaceExpr = IVFamily.BIVLimitExpr->GetRightTree();
-										
-										// The LimitExpr for an IV can be part of the LimitExpr
-										//  for a memory access. Avoid infinite recursion by seeing
-										//  if we have matching exprs.
-										if ((*ReplaceExpr < *CurrExpr) || (*CurrExpr < *ReplaceExpr)) {
-											STARSExpression *FinalExpr = ReplaceExpr->Clone();
-											CurrExpr->SetLeftTree(FinalExpr);
-											CurrExpr->SetLeftOperand(nullptr);
-											CurrExpr->SetParentInst(FinalExpr->GetParentInst());
-											changed = true;
-											success = this->ReplaceAllBIVsWithExprs(LoopIndex, CurrExpr->GetLeftTree(), InitCase, changed); // recurse
-										}
-										else { // Exprs are equal
-											success = true; // no more replacing can be done
-										}
-									}
-									else { // just a RightOperand, so replace CurrExpr->LeftOperand with BIVLimitExpr->RightOperand
-										bool SameOpnds = ((CurrExpr->GetLeftSSANum() == IVFamily.BIVLimitExpr->GetRightSSANum())
-											&& IsEqOp(CurrExpr->GetConstLeftOperand(), IVFamily.BIVLimitExpr->GetConstRightOperand()));
-										if (!SameOpnds) {
-											CurrExpr->SetLeftOperand(IVFamily.BIVLimitExpr->GetRightOperand());
-											CurrExpr->SetLeftUseAddr(IVFamily.BIVLimitExpr->GetRightUseAddr());
-											CurrExpr->SetLeftSSANum(IVFamily.BIVLimitExpr->GetRightSSANum());
-											CurrExpr->SetLeftPreLoopDefAddr(IVFamily.BIVLimitExpr->GetRightPreLoopDefAddr());
-											success = true;
-											CurrExpr->SetParentInst(IVFamily.BIVLimitExpr->GetParentInst());
-											changed = true;
-										}
-										else { // Opnds are equal
-											success = true; // no more replacing can be done
-										}
-									}
+						if (!AlreadyReplaced) {
+							if (InitCase) {
+								if (nullptr == IVFamily.BIVInitExpr) {
+									assert(0 <= IVLoopIndex);
+									success = false;
+									SMP_msg("ERROR: null IVFamily.BIVInitExpr: IVFamily Dump follows.\n");
+									DumpInductionVarFamily(IVFamily);
 								}
 								else {
-									// Secondary BIV case. Simplest approach is to get the BIVInitExpr
-									//  and use it to replace the left operand as a new left tree.
 									STARSExpression *ReplaceExpr = IVFamily.BIVInitExpr;
 
+									// The InitExpr for an IV can be part of the InitExpr
+									//  for a memory access. Avoid infinite recursion by seeing
+									//  if we have matching exprs.
 									if ((*ReplaceExpr < *CurrExpr) || (*CurrExpr < *ReplaceExpr)) {
 										STARSExpression *FinalExpr = ReplaceExpr->Clone();
 										CurrExpr->SetLeftTree(FinalExpr);
 										CurrExpr->SetLeftOperand(nullptr);
 										CurrExpr->SetParentInst(FinalExpr->GetParentInst());
 										changed = true;
-										success = this->ReplaceAllBIVsWithExprs(LoopIndex, CurrExpr->GetLeftTree(), InitCase, changed); // recurse
+										success = this->ReplaceAllBIVsWithExprs(LoopIndex, CurrExpr->GetLeftTree(), InitCase, changed, RegsReplaced); // recurse
 									}
 									else { // Exprs are equal
 										success = true; // no more replacing can be done
 									}
 								}
 							}
-						} // end if (InitCase) ... else ...
+							else { // LimitExpr case
+								if (nullptr == IVFamily.BIVLimitExpr) {
+									assert(0 <= IVLoopIndex);
+									success = false;
+									SMP_msg("ERROR: null IVFamily.BIVLimitExpr: IVFamily Dump follows.\n");
+									DumpInductionVarFamily(IVFamily);
+								}
+								else {
+									// NOTE: There are two cases. If BIVLimitExpr is for the BIV that terminates
+									//  the loop, then it will have a relational operator, e.g. RCX < k. If BIVLimitExpr
+									//  is for a secondary BIV, it will have the form RSI + k, which means "incoming value
+									//  of RSI plus k," where we need the InitExpr for RSI to determine the actual limit.
+									if (IsRelationalOperator(IVFamily.BIVLimitExpr->GetOperator())) {
+										if (IVFamily.BIVLimitExpr->HasRightSubTree()) {
+											STARSExpression *ReplaceExpr = IVFamily.BIVLimitExpr->GetRightTree();
+
+											// The LimitExpr for an IV can be part of the LimitExpr
+											//  for a memory access. Avoid infinite recursion by seeing
+											//  if we have matching exprs.
+											if ((*ReplaceExpr < *CurrExpr) || (*CurrExpr < *ReplaceExpr)) {
+												// We are about to recurse into a subtree of ReplaceExpr, if
+												//  it has any. Avoid recursion by seeing if CurrExpr matches
+												//  a subtree.
+												bool LeftMatch = (ReplaceExpr->HasLeftSubTree() &&
+													(!((*(ReplaceExpr->GetLeftTree()) < *CurrExpr) || (*CurrExpr < *(ReplaceExpr->GetLeftTree())))));
+												if (LeftMatch) {
+													success = true; // further replacing will recurse infinitely
+												}
+												else {
+													STARSExpression *FinalExpr = ReplaceExpr->Clone();
+													CurrExpr->SetLeftTree(FinalExpr);
+													CurrExpr->SetLeftOperand(nullptr);
+													CurrExpr->SetParentInst(FinalExpr->GetParentInst());
+													changed = true;
+													success = this->ReplaceAllBIVsWithExprs(LoopIndex, CurrExpr->GetLeftTree(), InitCase, changed, RegsReplaced); // recurse
+												}
+											}
+											else { // Exprs are equal
+												success = true; // no more replacing can be done
+											}
+										}
+										else { // just a RightOperand, so replace CurrExpr->LeftOperand with BIVLimitExpr->RightOperand
+											bool SameOpnds = ((CurrExpr->GetLeftSSANum() == IVFamily.BIVLimitExpr->GetRightSSANum())
+												&& IsEqOp(CurrExpr->GetConstLeftOperand(), IVFamily.BIVLimitExpr->GetConstRightOperand()));
+											if (!SameOpnds) {
+												CurrExpr->SetLeftOperand(IVFamily.BIVLimitExpr->GetRightOperand());
+												CurrExpr->SetLeftUseAddr(IVFamily.BIVLimitExpr->GetRightUseAddr());
+												CurrExpr->SetLeftSSANum(IVFamily.BIVLimitExpr->GetRightSSANum());
+												CurrExpr->SetLeftPreLoopDefAddr(IVFamily.BIVLimitExpr->GetRightPreLoopDefAddr());
+												success = true;
+												CurrExpr->SetParentInst(IVFamily.BIVLimitExpr->GetParentInst());
+												changed = true;
+											}
+											else { // Opnds are equal
+												success = true; // no more replacing can be done
+											}
+										}
+									}
+									else {
+										// Secondary BIV case. Simplest approach is to get the BIVInitExpr
+										//  and use it to replace the left operand as a new left tree.
+										STARSExpression *ReplaceExpr = IVFamily.BIVInitExpr;
+
+										if ((*ReplaceExpr < *CurrExpr) || (*CurrExpr < *ReplaceExpr)) {
+											// We are about to recurse into a subtree of ReplaceExpr, if
+											//  it has any. Avoid recursion by seeing if CurrExpr matches
+											//  a subtree.
+											bool LeftMatch = (ReplaceExpr->HasLeftSubTree() &&
+												(!((*(ReplaceExpr->GetLeftTree()) < *CurrExpr) || (*CurrExpr < *(ReplaceExpr->GetLeftTree())))));
+											if (LeftMatch) {
+												success = true; // further replacing will recurse infinitely
+											}
+											else {
+												STARSExpression *FinalExpr = ReplaceExpr->Clone();
+												CurrExpr->SetLeftTree(FinalExpr);
+												CurrExpr->SetLeftOperand(nullptr);
+												CurrExpr->SetParentInst(FinalExpr->GetParentInst());
+												changed = true;
+												success = this->ReplaceAllBIVsWithExprs(LoopIndex, CurrExpr->GetLeftTree(), InitCase, changed, RegsReplaced); // recurse
+											}
+										}
+										else { // Exprs are equal
+											success = true; // no more replacing can be done
+										}
+									}
+								}
+							} // end if (InitCase) ... else ...
+						}
 					}
 					else {
 						// Alternative: Should we not insist on SSANum match? !!!!****!!!!
@@ -9317,7 +9808,7 @@ bool SMPFunction::ReplaceAllBIVsWithExprs(std::size_t LoopIndex, STARSExpression
 		}
 	}
 	else {
-		success = this->ReplaceAllBIVsWithExprs(LoopIndex, CurrExpr->GetLeftTree(), InitCase, changed); // recurse
+		success = this->ReplaceAllBIVsWithExprs(LoopIndex, CurrExpr->GetLeftTree(), InitCase, changed, RegsReplaced); // recurse
 	}
 	if (!success)
 		return success;
@@ -9335,100 +9826,145 @@ bool SMPFunction::ReplaceAllBIVsWithExprs(std::size_t LoopIndex, STARSExpression
 						&& IsEqOp(CurrOp, IVFamily.BasicInductionVar.InductionVar.GetOp())) {
 						// Matched. Replace CurrExpr->RightOperand with InitExpr or LimitExpr->GetRightTree() or LimitExpr->GetRightOperand().
 						//  For LimitExpr, the LeftOperand is compared to to the RightOperand or RightTree, so the right side is the limit.
-						if (InitCase) {
-							if (nullptr == IVFamily.BIVInitExpr) {
-								assert(0 <= IVLoopIndex);
-								success = false;
-								SMP_msg("ERROR: null IVFamily.BIVInitExpr: IVFamily Dump follows.\n");
-								DumpInductionVarFamily(IVFamily);
+
+						// Avoid infinite recursion by seeing if this operand has already been replaced.
+						bool AlreadyReplaced = false;
+						if (CurrOp->IsRegOp()) {
+							int RegHashIndex = HashGlobalNameAndSSA(CurrOp, CurrSSANum);
+							if (RegsReplaced.find(RegHashIndex) != RegsReplaced.cend()) {
+								success = true;
+								AlreadyReplaced = true;
 							}
 							else {
-								STARSExpression *ReplaceExpr = IVFamily.BIVInitExpr;
-
-								// The InitExpr for an IV can be part of the InitExpr
-								//  for a memory access. Avoid infinite recursion by seeing
-								//  if we have matching exprs.
-								if ((*ReplaceExpr < *CurrExpr) || (*CurrExpr < *ReplaceExpr)) {
-									STARSExpression *FinalExpr = ReplaceExpr->Clone();
-									CurrExpr->SetRightTree(FinalExpr);
-									CurrExpr->SetRightOperand(nullptr);
-									CurrExpr->SetParentInst(FinalExpr->GetParentInst());
-									changed = true;
-									success = this->ReplaceAllBIVsWithExprs(LoopIndex, CurrExpr->GetRightTree(), InitCase, changed); // recurse
-								}
-								else { // Exprs are equal
-									success = true; // no more replacing can be done
-								}
+								(void)RegsReplaced.insert(RegHashIndex);
 							}
 						}
-						else if (nullptr == IVFamily.BIVLimitExpr) { // LimitExpr case, safeguard
-							assert(0 <= IVLoopIndex);
-							success = false;
-							SMP_msg("ERROR: null IVFamily.BIVLimitExpr: IVFamily Dump follows.\n");
-							DumpInductionVarFamily(IVFamily);
-						}
-						else { // LimitExpr case, safe to proceed
-							// NOTE: There are two cases. If BIVLimitExpr is for the BIV that terminates
-							//  the loop, then it will have a relational operator, e.g. RCX < k. If BIVLimitExpr
-							//  is for a secondary BIV, it will have the form RSI + k, which means "incoming value
-							//  of RSI plus k," where we need the InitExpr for RSI to determine the actual limit.
-							if (IsRelationalOperator(IVFamily.BIVLimitExpr->GetOperator())) {
-								if (IVFamily.BIVLimitExpr->HasRightSubTree()) {
-									STARSExpression *ReplaceExpr = IVFamily.BIVLimitExpr->GetRightTree();
-
-									// The LimitExpr for an IV can be part of the LimitExpr
+						if (!AlreadyReplaced) {
+							if (InitCase) {
+								if (nullptr == IVFamily.BIVInitExpr) {
+									assert(0 <= IVLoopIndex);
+									success = false;
+									SMP_msg("ERROR: null IVFamily.BIVInitExpr: IVFamily Dump follows.\n");
+									DumpInductionVarFamily(IVFamily);
+								}
+								else {
+									STARSExpression *ReplaceExpr = IVFamily.BIVInitExpr;
+
+									// The InitExpr for an IV can be part of the InitExpr
 									//  for a memory access. Avoid infinite recursion by seeing
 									//  if we have matching exprs.
 									if ((*ReplaceExpr < *CurrExpr) || (*CurrExpr < *ReplaceExpr)) {
-										STARSExpression *FinalExpr = ReplaceExpr->Clone();
-										CurrExpr->SetRightTree(FinalExpr);
-										CurrExpr->SetRightOperand(nullptr);
-										CurrExpr->SetParentInst(FinalExpr->GetParentInst());
-										changed = true;
-										success = this->ReplaceAllBIVsWithExprs(LoopIndex, CurrExpr->GetRightTree(), InitCase, changed); // recurse
+										// We are about to recurse into a subtree of ReplaceExpr, if
+										//  it has any. Avoid recursion by seeing if CurrExpr matches
+										//  a subtree.
+										bool LeftMatch = (ReplaceExpr->HasLeftSubTree() &&
+											(!((*(ReplaceExpr->GetLeftTree()) < *CurrExpr) || (*CurrExpr < *(ReplaceExpr->GetLeftTree())))));
+										if (LeftMatch) {
+											success = true; // further replacing will recurse infinitely
+										}
+										else {
+											STARSExpression *FinalExpr = ReplaceExpr->Clone();
+											CurrExpr->SetRightTree(FinalExpr);
+											CurrExpr->SetRightOperand(nullptr);
+											CurrExpr->SetParentInst(FinalExpr->GetParentInst());
+											changed = true;
+											success = this->ReplaceAllBIVsWithExprs(LoopIndex, CurrExpr->GetRightTree(), InitCase, changed, RegsReplaced); // recurse
+										}
 									}
 									else { // Exprs are equal
 										success = true; // no more replacing can be done
 									}
 								}
-								else { // just a RightOperand, so replace CurrExpr->RightOperand with BIVLimitExpr->RightOperand
-									bool SameOpnds = ((CurrExpr->GetRightSSANum() == IVFamily.BIVLimitExpr->GetRightSSANum())
-										&& IsEqOp(CurrExpr->GetConstRightOperand(), IVFamily.BIVLimitExpr->GetConstRightOperand()));
-									if (!SameOpnds) {
-										CurrExpr->SetRightOperand(IVFamily.BIVLimitExpr->GetRightOperand());
-										CurrExpr->SetRightUseAddr(IVFamily.BIVLimitExpr->GetRightUseAddr());
-										CurrExpr->SetRightSSANum(IVFamily.BIVLimitExpr->GetRightSSANum());
-										CurrExpr->SetRightPreLoopDefAddr(IVFamily.BIVLimitExpr->GetRightPreLoopDefAddr());
-										CurrExpr->SetParentInst(IVFamily.BIVLimitExpr->GetParentInst());
-										changed = true;
-										success = true;
+							}
+							else if (nullptr == IVFamily.BIVLimitExpr) { // LimitExpr case, safeguard
+								assert(0 <= IVLoopIndex);
+								success = false;
+								SMP_msg("ERROR: null IVFamily.BIVLimitExpr: IVFamily Dump follows.\n");
+								DumpInductionVarFamily(IVFamily);
+							}
+							else { // LimitExpr case, safe to proceed
+								// NOTE: There are two cases. If BIVLimitExpr is for the BIV that terminates
+								//  the loop, then it will have a relational operator, e.g. RCX < k. If BIVLimitExpr
+								//  is for a secondary BIV, it will have the form RSI + k, which means "incoming value
+								//  of RSI plus k," where we need the InitExpr for RSI to determine the actual limit.
+								if (IsRelationalOperator(IVFamily.BIVLimitExpr->GetOperator())) {
+									if (IVFamily.BIVLimitExpr->HasRightSubTree()) {
+										STARSExpression *ReplaceExpr = IVFamily.BIVLimitExpr->GetRightTree();
+
+										// The LimitExpr for an IV can be part of the LimitExpr
+										//  for a memory access. Avoid infinite recursion by seeing
+										//  if we have matching exprs.
+										if ((*ReplaceExpr < *CurrExpr) || (*CurrExpr < *ReplaceExpr)) {
+											// We are about to recurse into a subtree of ReplaceExpr, if
+											//  it has any. Avoid recursion by seeing if CurrExpr matches
+											//  a subtree.
+											bool LeftMatch = (ReplaceExpr->HasLeftSubTree() &&
+												(!((*(ReplaceExpr->GetLeftTree()) < *CurrExpr) || (*CurrExpr < *(ReplaceExpr->GetLeftTree())))));
+											if (LeftMatch) {
+												success = true; // further replacing will recurse infinitely
+											}
+											else {
+												STARSExpression *FinalExpr = ReplaceExpr->Clone();
+												CurrExpr->SetRightTree(FinalExpr);
+												CurrExpr->SetRightOperand(nullptr);
+												CurrExpr->SetParentInst(FinalExpr->GetParentInst());
+												changed = true;
+												success = this->ReplaceAllBIVsWithExprs(LoopIndex, CurrExpr->GetRightTree(), InitCase, changed, RegsReplaced); // recurse
+											}
+										}
+										else { // Exprs are equal
+											success = true; // no more replacing can be done
+										}
 									}
-									else { // Opnds are equal
-										success = true; // no more replacing can be done
+									else { // just a RightOperand, so replace CurrExpr->RightOperand with BIVLimitExpr->RightOperand
+										bool SameOpnds = ((CurrExpr->GetRightSSANum() == IVFamily.BIVLimitExpr->GetRightSSANum())
+											&& IsEqOp(CurrExpr->GetConstRightOperand(), IVFamily.BIVLimitExpr->GetConstRightOperand()));
+										if (!SameOpnds) {
+											CurrExpr->SetRightOperand(IVFamily.BIVLimitExpr->GetRightOperand());
+											CurrExpr->SetRightUseAddr(IVFamily.BIVLimitExpr->GetRightUseAddr());
+											CurrExpr->SetRightSSANum(IVFamily.BIVLimitExpr->GetRightSSANum());
+											CurrExpr->SetRightPreLoopDefAddr(IVFamily.BIVLimitExpr->GetRightPreLoopDefAddr());
+											CurrExpr->SetParentInst(IVFamily.BIVLimitExpr->GetParentInst());
+											changed = true;
+											success = true;
+										}
+										else { // Opnds are equal
+											success = true; // no more replacing can be done
+										}
 									}
 								}
-							}
-							else {
-								// Secondary BIV case. Simplest approach is to get the BIVInitExpr
-								//  and use it to replace the right operand as a new right tree.
-								STARSExpression *ReplaceExpr = IVFamily.BIVInitExpr;
+								else {
+									// Secondary BIV case. Simplest approach is to get the BIVInitExpr
+									//  and use it to replace the right operand as a new right tree.
+									STARSExpression *ReplaceExpr = IVFamily.BIVInitExpr;
 
-								// The InitExpr for an IV can be part of the InitExpr
-								//  for a memory access. Avoid infinite recursion by seeing
-								//  if we have matching exprs.
-								if ((*ReplaceExpr < *CurrExpr) || (*CurrExpr < *ReplaceExpr)) {
-									STARSExpression *FinalExpr = ReplaceExpr->Clone();
-									CurrExpr->SetRightTree(FinalExpr);
-									CurrExpr->SetRightOperand(nullptr);
-									CurrExpr->SetParentInst(FinalExpr->GetParentInst());
-									changed = true;
-									success = this->ReplaceAllBIVsWithExprs(LoopIndex, CurrExpr->GetRightTree(), InitCase, changed); // recurse
-								}
-								else { // Exprs are equal
-									success = true; // no more replacing can be done
+									// The InitExpr for an IV can be part of the InitExpr
+									//  for a memory access. Avoid infinite recursion by seeing
+									//  if we have matching exprs.
+									if ((*ReplaceExpr < *CurrExpr) || (*CurrExpr < *ReplaceExpr)) {
+										// We are about to recurse into a subtree of ReplaceExpr, if
+										//  it has any. Avoid recursion by seeing if CurrExpr matches
+										//  a subtree.
+										bool LeftMatch = (ReplaceExpr->HasLeftSubTree() &&
+											(!((*(ReplaceExpr->GetLeftTree()) < *CurrExpr) || (*CurrExpr < *(ReplaceExpr->GetLeftTree())))));
+										if (LeftMatch) {
+											success = true; // further replacing will recurse infinitely
+										}
+										else {
+											STARSExpression *FinalExpr = ReplaceExpr->Clone();
+											CurrExpr->SetRightTree(FinalExpr);
+											CurrExpr->SetRightOperand(nullptr);
+											CurrExpr->SetParentInst(FinalExpr->GetParentInst());
+											changed = true;
+											success = this->ReplaceAllBIVsWithExprs(LoopIndex, CurrExpr->GetRightTree(), InitCase, changed, RegsReplaced); // recurse
+										}
+									}
+									else { // Exprs are equal
+										success = true; // no more replacing can be done
+									}
 								}
-							}
-						} // end if (InitCase) ... else ...
+							} // end if (InitCase) ... else ...
+						}
 					}
 					else {
 						// Alternative: Should we not insist on SSANum match? !!!!****!!!!
@@ -9441,7 +9977,7 @@ bool SMPFunction::ReplaceAllBIVsWithExprs(std::size_t LoopIndex, STARSExpression
 		}
 	}
 	else {
-		success = this->ReplaceAllBIVsWithExprs(LoopIndex, CurrExpr->GetRightTree(), InitCase, changed); // recurse
+		success = this->ReplaceAllBIVsWithExprs(LoopIndex, CurrExpr->GetRightTree(), InitCase, changed, RegsReplaced); // recurse
 	}
 
 	return success;
@@ -9601,7 +10137,8 @@ bool SMPFunction::ReplaceAllDIVsWithExprs(std::size_t LoopIndex, STARSExpression
 
 // wrapper to call ReplaceAllBIVsWithExprs() and ReplaceAllDIVsWithExprs()
 bool SMPFunction::ReplaceAllIVsWithExprs(std::size_t LoopIndex, STARSExpression *CurrExpr, bool InitCase, bool &changed) {
-	bool success = this->ReplaceAllBIVsWithExprs(LoopIndex, CurrExpr, InitCase, changed);
+	set<int> RegsReplaced;
+	bool success = this->ReplaceAllBIVsWithExprs(LoopIndex, CurrExpr, InitCase, changed, RegsReplaced);
 	if (!success)
 		return success;
 	success = this->ReplaceAllDIVsWithExprs(LoopIndex, CurrExpr, InitCase, changed); 
diff --git a/src/base/SMPInstr.cpp b/src/base/SMPInstr.cpp
index 29102a66..bc9d4963 100644
--- a/src/base/SMPInstr.cpp
+++ b/src/base/SMPInstr.cpp
@@ -8111,12 +8111,28 @@ bool SMPInstr::IsDependentInductionVarArithmetic(STARSOpndTypePtr &Mult1, STARSO
 } // end of SMPInstr::IsDependentInductionVarArithmetic()
 
 // BIV arithmetic or other opcode acceptable for BIV definition in binaries
-bool SMPInstr::IsAcceptableBIVOperation(void) const {
+//  Only set spilled flags to true, never to false.
+bool SMPInstr::IsAcceptableBIVOperation(bool &RegSpilled, bool &RegUnspilled) const {
 	bool BIVOp = false;
+	STARSOpndTypePtr RightOp = nullptr;
 	// We permit a basic induction var (BIV) to be modified by
 	//  recursive calls.
 	if (this->IsRecursiveCall())
 		BIVOp = true;
+	else if (this->IsSimpleCopyNoNorm(RightOp)) {
+		bool UseFP = this->GetBlock()->GetFunc()->UsesFramePointer();
+		// See if it was the spilling or unspilling of a register.
+		if (this->HasDestMemoryOperand()) {
+			if (MDIsDirectStackAccessOpnd(this->GetMemDef(), UseFP)) {
+				RegSpilled = true;
+			}
+		}
+		else if (this->HasSourceMemoryOperand()) {
+			if (MDIsDirectStackAccessOpnd(this->GetMemUse(), UseFP)) {
+				RegUnspilled = true;
+			}
+		}
+	}
 
 	return BIVOp;
 } // end of SMPInstr::IsAcceptableBIVOperation()
-- 
GitLab