From a72fb792873ce7e9e322f78c0fbc9360ff75e33d Mon Sep 17 00:00:00 2001
From: Clark Coleman <clc@zephyr-software.com>
Date: Thu, 24 Sep 2020 19:06:41 -0400
Subject: [PATCH] Begin identifying C/C++ loop continue statement code patterns
 for SPARK translation.

---
 include/base/SMPFunction.h |   4 +-
 src/base/SMPFunction.cpp   | 174 +++++++++++++++++++++++++++++++++++--
 2 files changed, 172 insertions(+), 6 deletions(-)

diff --git a/include/base/SMPFunction.h b/include/base/SMPFunction.h
index 00a9fabc..f1225471 100644
--- a/include/base/SMPFunction.h
+++ b/include/base/SMPFunction.h
@@ -596,7 +596,7 @@ public:
 	void ComputeSSA(void); // Compute SSA form data structures
 	bool DoesBlockDominateBlock(int HeadBlockNum, int TailBlockNum) const; // Does basic block HeadBlockNum dominate basic block TailBlockNum?
 	bool IsBlockInAnyLoop(int BlockNum) const; // Is block (with block # BlockNum) inside any loop?
-	bool IsBlockInLoop(int BlockNum, std::size_t LoopNum); // Is block (with block # BlockNum) inside loop # LoopNum?
+	bool IsBlockInLoop(int BlockNum, std::size_t LoopNum) const; // Is block (with block # BlockNum) inside loop # LoopNum?
 	bool AreBlocksInSameLoops(const int BlockNum1, const int BlockNum2) const;
 	bool DoesEdgeExitLoop(const int BlockNum1, const int BlockNum2) const; // Does going from BlockNum1 to BlockNum2 exit a loop?
 	void BuildLoopList(int BlockNum, std::list<std::size_t> &LoopList); // build list of loop numbers that BlockNum is part of.
@@ -1050,6 +1050,8 @@ private:
 	bool CounterVarHelper(const STARSOpndTypePtr &DefOp, int DefSSANum, int BlockNum, bool LocalName, std::list<std::pair<int, STARS_ea_t> > &CounterSSANums); // recursive helper for FindCounterVariables()
 	bool ConditionalTypePropagation(void); // Apply SCC algorithm to unresolved Phi DEFs
 	bool PropagateSignedness(void); // Propagate signedness FG info from DEFs to USEs whenever there is no USE sign info.
+	bool FindLoopContinuePattern(const int BranchBlockNum); // Does code starting at COND_BRANCH in BranchBlockNum represent a C/C++ loop continue statement?
+	bool LoopContinueHelper(const int BranchBlockNum, const int InnerMostLoopNum, const SMPBasicBlock *CurrentBlock);
 	void MarkSpecialNumericErrorCases(void); // Detect and mark special cases before emitting numeric error annotations.
 	void EmitReturnTargetAnnotations(void); // Emit Indirect Branch Target destinations for return instructions in this func.
 	void EmitFuncPtrShadowingAnnotations(FILE *InfoAnnotFile); // Emit annotations for func ptr shadowing defense
diff --git a/src/base/SMPFunction.cpp b/src/base/SMPFunction.cpp
index 09c4445e..533ba2a1 100644
--- a/src/base/SMPFunction.cpp
+++ b/src/base/SMPFunction.cpp
@@ -691,7 +691,7 @@ bool SMPFunction::ComputeInOutRegs(bool InheritPass, bool &WritesMem, bool &Call
 			}
 		}
 		else if (CurrBlock->HasCallInstruction()) { // InheritPass
-			int LoopNum = this->GetInnermostLoopNum(CurrBlock->GetNumber());
+			int LoopNum = (this->GetNumLoops() > 0) ? this->GetInnermostLoopNum(CurrBlock->GetNumber()) : -1;
 			size_t LoopNumOffsetByOne = (size_t)(LoopNum + 1);
 			// Note: LoopNum could be -1 if we are outside of any loop.
 			list<size_t> LoopList;
@@ -4425,7 +4425,8 @@ int SMPFunction::GetLoopNumFromTestBlockNum(int BlockNum) const {
 int SMPFunction::GetInnermostLoopNum(const int BlockNum) const {
 	int LoopNum = -1;
 	size_t MinBlockCount = 100000;
-	if ((BlockNum >= 0) && (this->LoopCount > 0) && this->IsBlockInAnyLoop(BlockNum)) {
+	bool FuncHasLoops = (this->LoopCount > 0);
+	if ((BlockNum >= 0) && FuncHasLoops && this->IsBlockInAnyLoop(BlockNum)) {
 		// Find loop containing this BlockNum with the fewest number of blocks.
 		for (size_t LoopIndex = 0; LoopIndex < this->FuncLoopsByBlock[(size_t) BlockNum].GetNumBits(); ++LoopIndex) {
 			if (this->FuncLoopsByBlock[(size_t) BlockNum].GetBit(LoopIndex)) {
@@ -4437,6 +4438,10 @@ int SMPFunction::GetInnermostLoopNum(const int BlockNum) const {
 			}
 		}
 	}
+	// Is the answer always the lowest loop number?
+	if (FuncHasLoops && this->FuncLoopsByBlock[(size_t)BlockNum].FindHighestBitSet() != LoopNum) {
+		SMP_msg("INFO: Innermost loop num is not highest loop num for block %d in %s\n", BlockNum, this->GetFuncName());
+	}
 	return LoopNum;
 } // end of SMPFunction::GetInnermostLoopNum()
 
@@ -10671,7 +10676,7 @@ void SMPFunction::DetectInArgRegsNeededForMemWriteExprs(void) {
 			int OuterLoopIndex = this->GetOutermostLoopNum((int) BlockIndex);
 			// OuterLoopIndex could be -1 if CurrInst is not in a loop
 			bool NotInLoop = (0 > OuterLoopIndex);
-			int InnerLoopIndex = this->GetInnermostLoopNum((int) BlockIndex);
+			int InnerLoopIndex = NotInLoop ? -1 : this->GetInnermostLoopNum((int) BlockIndex);
 			size_t InnerLoopNumPlusOne = (size_t)(InnerLoopIndex + 1);
 			list<size_t> LoopList; // fill on demand
 			for (vector<SMPInstr *>::iterator InstIter = CurrBlock->GetFirstInst(); InstIter != CurrBlock->GetLastInst(); ++InstIter) {
@@ -13979,7 +13984,7 @@ bool SMPFunction::IsBlockInAnyLoop(int BlockNum) const {
 } // end of SMPFunction::IsBlockInAnyLoop()
 
 // Is block (with block # BlockNum) inside loop # LoopNum?
-bool SMPFunction::IsBlockInLoop(int BlockNum, std::size_t LoopNum) {
+bool SMPFunction::IsBlockInLoop(int BlockNum, std::size_t LoopNum) const {
 	if (0 == this->LoopCount)
 		return false;
 	assert(((size_t) BlockNum) < this->GetNumBlocks());
@@ -16532,6 +16537,165 @@ int SMPFunction::GetLoopNumFromHeaderBlockNum(const int HeaderBlockNum) const {
 	return LoopNumberFound;
 } // end of SMPFunction::GetLoopNumFromHeaderBlockNum()
 
+// Distinguish between code that must be translated as a C/C++ loop "continue" statement, and
+//  code in a loop that could just be an optimized form of an if-then statement inside the loop.
+//  See detailed examples in comments below.
+bool SMPFunction::FindLoopContinuePattern(const int BranchBlockNum) {
+	// There are two cases of a COND_BRANCH at the end of a block inside a loop that
+	//  both target the header block of the loop. In the first case, the fall-through
+	//  exits the loop. This case is simple and the COND_BRANCH is marked as a LOOP_BACK.
+	//  In the second case, the fall-through remains inside the innermost loop containing
+	//  the COND_BRANCH block. While the BranchBlockNum basic block is still a loop tail block,
+	//  as is any block that has a back edge to the loop header, translation to SPARK Ada
+	//  can be tricky.
+	//
+	// This second case has two subcases with respect to SPARK Ada translation. In the simpler
+	//  subcase, the COND_BRANCH is merely guarding the code from the COND_BRANCH to the end of
+	//  the loop:
+	//
+	//  loop
+	//    if (cond1) then
+	//       [code 1 here]
+	//       return; or break; or call non-returning function;
+	//    else
+	//       [code 2 here]
+	//       if (!cond2) then
+	//          loop back to loop header;
+	//       endif;
+	//       [code 3 here]
+	//    endif;
+	//  endloop;
+	//
+	//  This code is just a compiler-optimized form of the following source code:
+	//
+	//  loop
+	//    if (cond1) then
+	//       [code 1 here]
+	//       return; or break; or call non-returning function;
+	//    else
+	//       [code 2 here]
+	//       if (cond2) then
+	//          [code 3 here]
+	//       endif;
+	//    endif;
+	//  endloop;
+	//
+	//  If compiled without optimization, the final endif would need to be
+	//   a jump back to the loop header. To reduce jumps and branches, the
+	//   compiler can take advantage of the fact that it already has to have
+	//   a COND_BRANCH at the test of the innermost if-then and reverse that
+	//   cond2 into !cond2 and make the target be the loop header block. 
+	//
+	// The problem here is that the sequence in which a COND_BRANCH either 
+	//  takes a back edge to the loop header or falls through to code that is
+	//  still within the loop looks like a C/C++ loop continue statement, and
+	//  the SPARK Ada 2012 and Ada 2017 language standards still lack a loop
+	//  continue statement. The Ada 2017 rationale document says that control
+	//  flow would be less readable with a continue statement, and they actually
+	//  recommend putting a label before the endloop and using an explicit goto
+	//  statement (!) to reach it when you want to perform a loop continue.
+	//  SPARK Ada does not permit use of the goto statement, being a subset of
+	//  Ada designed for formal analysis.
+	//
+	// Contrast the above code with a true continue statement:
+	//
+	//  loop
+	//    if (cond1) then
+	//       [code 1 here]
+	//    else
+	//       [code 2 here]
+	//       if (cond2) then
+	//          continue;
+	//       endif;
+	//		 [code 3 here]
+	//    endif;
+	//    [code 4 here]
+	//  endloop;
+	//
+	//  The else branch can be restructured to put [code 3] inside an if-then
+	//   that is contained in the else branch. But [code 4] is reachable by the
+	//   then branch as well as the else branch, and it cannot be contained within
+	//   the else branch by restructuring. As with all code restructurings, we must
+	//   either clone code or introduce a boolean flag to keep track of our path, e.g.:
+	//
+	//  loop
+	//    bool ElseBranchContinueTaken = false;
+	//    if (cond1) then
+	//       [code 1 here]
+	//    else
+	//       [code 2 here]
+	//       if (cond2) then
+	//          ElseBranchContinueTaken = true;
+	//       endif;
+	//       if (!ElseBranchContinueTaken) then
+	//		    [code 3 here]
+	//       endif;
+	//    endif;
+	//    if (!ElseBranchContinueTaken) then
+	//       [code 4 here]
+	//    endif;
+	//  endloop;
+	//
+	//  So, one subcase has complicated problems in SPARK Ada translation, while the other
+	//   has a condition that can be inverted and become an if-then statement.
+	//
+	//  We can distinguish between these two subcases using dominator tree information.
+	//   Beginning at the COND_BRANCH in question, we want to answer the following question:
+	//   Following only forward edges, can you reach a basic block that is not dominated by
+	//   the COND_BRANCH block yet is still within the innermost loop containing the COND_BRANCH
+	//   block? The [code 4] block in the complex subcase is an example. This is a true loop continue
+	//   case that requires complex translation. If we fail to find such a block, then this code
+	//   pattern is NOT a loop continue statement and our SPARK Ada translation just needs to know
+	//   not to treat it like a LOOP_BACK that falls out of the loop.
+
+	const int InnerMostLoopIndex = this->GetInnermostLoopNum(BranchBlockNum);
+	const SMPBasicBlock *BranchBlock = this->GetBlockByNum((size_t)BranchBlockNum);
+	this->ResetProcessedBlocks();
+
+	return this->LoopContinueHelper(BranchBlockNum, InnerMostLoopIndex, BranchBlock);
+} // end of SMPFunction::FindLoopContinuePattern()
+
+// Recursive helper for FindLoopContinuePattern(). See lengthy comments in that method.
+bool SMPFunction::LoopContinueHelper(const int BranchBlockNum, const int InnerMostLoopNum, const SMPBasicBlock *CurrentBlock) {
+	// Find all forward edges from CurrentBlock. See which ones stay within InnerMostLoopNum, then
+	//  test the dominator tree info.
+	bool FoundContinuePattern = false;
+	int CurrBlockNum = CurrentBlock->GetNumber();
+
+	for (list<SMPBasicBlock *>::const_iterator SuccIter = CurrentBlock->GetFirstConstSucc(); SuccIter != CurrentBlock->GetLastConstSucc(); ++SuccIter) {
+		SMPBasicBlock *SuccBlock = (*SuccIter);
+		int SuccBlockNum = SuccBlock->GetNumber();
+		if ((SuccBlockNum != CurrBlockNum) && (!SuccBlock->IsProcessed())) { // avoid self-loops
+			SuccBlock->SetProcessed(true);
+			if (this->IsBlockInLoop(SuccBlockNum, InnerMostLoopNum)) {
+				// Is this a forward edge?
+				if (!this->DoesBlockDominateBlock(SuccBlockNum, BranchBlockNum)) {
+					// Have we escaped the branch we started in, which can be detected
+					//  by the fact that BranchBlockNum does not dominate SuccBlock?
+					if (!this->DoesBlockDominateBlock(BranchBlockNum, SuccBlockNum)) {
+						// SuccBlock is in innermost loop with BranchBlockNum, but we
+						//  cannot enclose it with an if-then inside the BranchBlockNum
+						//  code sequence, so it is a true loop continue pattern.
+						FoundContinuePattern = true;
+						break;
+					}
+					else {
+						// Recurse into SuccBlock.
+						if (this->LoopContinueHelper(BranchBlockNum, InnerMostLoopNum, SuccBlock)) {
+							// Found one block definitely part of a loop continue code pattern.
+							FoundContinuePattern = true;
+							break;
+						}
+					}
+				}
+			}
+		}
+	} // end for all successors
+
+	return FoundContinuePattern;
+} // end of SMPFunction::LoopContinueHelper()
+
+
 // Detect and mark special cases before emitting numeric error and other security annotations.
 void SMPFunction::MarkSpecialNumericErrorCases(void) {
 	list<SMPBasicBlock *>::iterator BlockIter;
@@ -16635,7 +16799,7 @@ void SMPFunction::MarkSpecialNumericErrorCases(void) {
 								if (!this->IsBlockInLoop(FollowBlockNum, (size_t)LoopIndex)) {
 									this->UpdateLoopFollowBlockNum(HeaderBlockNum, FollowBlockNum);
 								}
-								else {
+								else if (this->FindLoopContinuePattern((int) BlockIndex)) {
 									SMP_msg("WARNING: SPARK: LOOP_CONTINUE case detected in block %d FollowBlock %d of %s\n", 
 										CurrBlock->GetNumber(), FollowBlockNum, this->GetFuncName());
 									this->DumpDotCFG();
-- 
GitLab