From a72fb792873ce7e9e322f78c0fbc9360ff75e33d Mon Sep 17 00:00:00 2001 From: Clark Coleman <clc@zephyr-software.com> Date: Thu, 24 Sep 2020 19:06:41 -0400 Subject: [PATCH] Begin identifying C/C++ loop continue statement code patterns for SPARK translation. --- include/base/SMPFunction.h | 4 +- src/base/SMPFunction.cpp | 174 +++++++++++++++++++++++++++++++++++-- 2 files changed, 172 insertions(+), 6 deletions(-) diff --git a/include/base/SMPFunction.h b/include/base/SMPFunction.h index 00a9fabc..f1225471 100644 --- a/include/base/SMPFunction.h +++ b/include/base/SMPFunction.h @@ -596,7 +596,7 @@ public: void ComputeSSA(void); // Compute SSA form data structures bool DoesBlockDominateBlock(int HeadBlockNum, int TailBlockNum) const; // Does basic block HeadBlockNum dominate basic block TailBlockNum? bool IsBlockInAnyLoop(int BlockNum) const; // Is block (with block # BlockNum) inside any loop? - bool IsBlockInLoop(int BlockNum, std::size_t LoopNum); // Is block (with block # BlockNum) inside loop # LoopNum? + bool IsBlockInLoop(int BlockNum, std::size_t LoopNum) const; // Is block (with block # BlockNum) inside loop # LoopNum? bool AreBlocksInSameLoops(const int BlockNum1, const int BlockNum2) const; bool DoesEdgeExitLoop(const int BlockNum1, const int BlockNum2) const; // Does going from BlockNum1 to BlockNum2 exit a loop? void BuildLoopList(int BlockNum, std::list<std::size_t> &LoopList); // build list of loop numbers that BlockNum is part of. @@ -1050,6 +1050,8 @@ private: bool CounterVarHelper(const STARSOpndTypePtr &DefOp, int DefSSANum, int BlockNum, bool LocalName, std::list<std::pair<int, STARS_ea_t> > &CounterSSANums); // recursive helper for FindCounterVariables() bool ConditionalTypePropagation(void); // Apply SCC algorithm to unresolved Phi DEFs bool PropagateSignedness(void); // Propagate signedness FG info from DEFs to USEs whenever there is no USE sign info. + bool FindLoopContinuePattern(const int BranchBlockNum); // Does code starting at COND_BRANCH in BranchBlockNum represent a C/C++ loop continue statement? + bool LoopContinueHelper(const int BranchBlockNum, const int InnerMostLoopNum, const SMPBasicBlock *CurrentBlock); void MarkSpecialNumericErrorCases(void); // Detect and mark special cases before emitting numeric error annotations. void EmitReturnTargetAnnotations(void); // Emit Indirect Branch Target destinations for return instructions in this func. void EmitFuncPtrShadowingAnnotations(FILE *InfoAnnotFile); // Emit annotations for func ptr shadowing defense diff --git a/src/base/SMPFunction.cpp b/src/base/SMPFunction.cpp index 09c4445e..533ba2a1 100644 --- a/src/base/SMPFunction.cpp +++ b/src/base/SMPFunction.cpp @@ -691,7 +691,7 @@ bool SMPFunction::ComputeInOutRegs(bool InheritPass, bool &WritesMem, bool &Call } } else if (CurrBlock->HasCallInstruction()) { // InheritPass - int LoopNum = this->GetInnermostLoopNum(CurrBlock->GetNumber()); + int LoopNum = (this->GetNumLoops() > 0) ? this->GetInnermostLoopNum(CurrBlock->GetNumber()) : -1; size_t LoopNumOffsetByOne = (size_t)(LoopNum + 1); // Note: LoopNum could be -1 if we are outside of any loop. list<size_t> LoopList; @@ -4425,7 +4425,8 @@ int SMPFunction::GetLoopNumFromTestBlockNum(int BlockNum) const { int SMPFunction::GetInnermostLoopNum(const int BlockNum) const { int LoopNum = -1; size_t MinBlockCount = 100000; - if ((BlockNum >= 0) && (this->LoopCount > 0) && this->IsBlockInAnyLoop(BlockNum)) { + bool FuncHasLoops = (this->LoopCount > 0); + if ((BlockNum >= 0) && FuncHasLoops && this->IsBlockInAnyLoop(BlockNum)) { // Find loop containing this BlockNum with the fewest number of blocks. for (size_t LoopIndex = 0; LoopIndex < this->FuncLoopsByBlock[(size_t) BlockNum].GetNumBits(); ++LoopIndex) { if (this->FuncLoopsByBlock[(size_t) BlockNum].GetBit(LoopIndex)) { @@ -4437,6 +4438,10 @@ int SMPFunction::GetInnermostLoopNum(const int BlockNum) const { } } } + // Is the answer always the lowest loop number? + if (FuncHasLoops && this->FuncLoopsByBlock[(size_t)BlockNum].FindHighestBitSet() != LoopNum) { + SMP_msg("INFO: Innermost loop num is not highest loop num for block %d in %s\n", BlockNum, this->GetFuncName()); + } return LoopNum; } // end of SMPFunction::GetInnermostLoopNum() @@ -10671,7 +10676,7 @@ void SMPFunction::DetectInArgRegsNeededForMemWriteExprs(void) { int OuterLoopIndex = this->GetOutermostLoopNum((int) BlockIndex); // OuterLoopIndex could be -1 if CurrInst is not in a loop bool NotInLoop = (0 > OuterLoopIndex); - int InnerLoopIndex = this->GetInnermostLoopNum((int) BlockIndex); + int InnerLoopIndex = NotInLoop ? -1 : this->GetInnermostLoopNum((int) BlockIndex); size_t InnerLoopNumPlusOne = (size_t)(InnerLoopIndex + 1); list<size_t> LoopList; // fill on demand for (vector<SMPInstr *>::iterator InstIter = CurrBlock->GetFirstInst(); InstIter != CurrBlock->GetLastInst(); ++InstIter) { @@ -13979,7 +13984,7 @@ bool SMPFunction::IsBlockInAnyLoop(int BlockNum) const { } // end of SMPFunction::IsBlockInAnyLoop() // Is block (with block # BlockNum) inside loop # LoopNum? -bool SMPFunction::IsBlockInLoop(int BlockNum, std::size_t LoopNum) { +bool SMPFunction::IsBlockInLoop(int BlockNum, std::size_t LoopNum) const { if (0 == this->LoopCount) return false; assert(((size_t) BlockNum) < this->GetNumBlocks()); @@ -16532,6 +16537,165 @@ int SMPFunction::GetLoopNumFromHeaderBlockNum(const int HeaderBlockNum) const { return LoopNumberFound; } // end of SMPFunction::GetLoopNumFromHeaderBlockNum() +// Distinguish between code that must be translated as a C/C++ loop "continue" statement, and +// code in a loop that could just be an optimized form of an if-then statement inside the loop. +// See detailed examples in comments below. +bool SMPFunction::FindLoopContinuePattern(const int BranchBlockNum) { + // There are two cases of a COND_BRANCH at the end of a block inside a loop that + // both target the header block of the loop. In the first case, the fall-through + // exits the loop. This case is simple and the COND_BRANCH is marked as a LOOP_BACK. + // In the second case, the fall-through remains inside the innermost loop containing + // the COND_BRANCH block. While the BranchBlockNum basic block is still a loop tail block, + // as is any block that has a back edge to the loop header, translation to SPARK Ada + // can be tricky. + // + // This second case has two subcases with respect to SPARK Ada translation. In the simpler + // subcase, the COND_BRANCH is merely guarding the code from the COND_BRANCH to the end of + // the loop: + // + // loop + // if (cond1) then + // [code 1 here] + // return; or break; or call non-returning function; + // else + // [code 2 here] + // if (!cond2) then + // loop back to loop header; + // endif; + // [code 3 here] + // endif; + // endloop; + // + // This code is just a compiler-optimized form of the following source code: + // + // loop + // if (cond1) then + // [code 1 here] + // return; or break; or call non-returning function; + // else + // [code 2 here] + // if (cond2) then + // [code 3 here] + // endif; + // endif; + // endloop; + // + // If compiled without optimization, the final endif would need to be + // a jump back to the loop header. To reduce jumps and branches, the + // compiler can take advantage of the fact that it already has to have + // a COND_BRANCH at the test of the innermost if-then and reverse that + // cond2 into !cond2 and make the target be the loop header block. + // + // The problem here is that the sequence in which a COND_BRANCH either + // takes a back edge to the loop header or falls through to code that is + // still within the loop looks like a C/C++ loop continue statement, and + // the SPARK Ada 2012 and Ada 2017 language standards still lack a loop + // continue statement. The Ada 2017 rationale document says that control + // flow would be less readable with a continue statement, and they actually + // recommend putting a label before the endloop and using an explicit goto + // statement (!) to reach it when you want to perform a loop continue. + // SPARK Ada does not permit use of the goto statement, being a subset of + // Ada designed for formal analysis. + // + // Contrast the above code with a true continue statement: + // + // loop + // if (cond1) then + // [code 1 here] + // else + // [code 2 here] + // if (cond2) then + // continue; + // endif; + // [code 3 here] + // endif; + // [code 4 here] + // endloop; + // + // The else branch can be restructured to put [code 3] inside an if-then + // that is contained in the else branch. But [code 4] is reachable by the + // then branch as well as the else branch, and it cannot be contained within + // the else branch by restructuring. As with all code restructurings, we must + // either clone code or introduce a boolean flag to keep track of our path, e.g.: + // + // loop + // bool ElseBranchContinueTaken = false; + // if (cond1) then + // [code 1 here] + // else + // [code 2 here] + // if (cond2) then + // ElseBranchContinueTaken = true; + // endif; + // if (!ElseBranchContinueTaken) then + // [code 3 here] + // endif; + // endif; + // if (!ElseBranchContinueTaken) then + // [code 4 here] + // endif; + // endloop; + // + // So, one subcase has complicated problems in SPARK Ada translation, while the other + // has a condition that can be inverted and become an if-then statement. + // + // We can distinguish between these two subcases using dominator tree information. + // Beginning at the COND_BRANCH in question, we want to answer the following question: + // Following only forward edges, can you reach a basic block that is not dominated by + // the COND_BRANCH block yet is still within the innermost loop containing the COND_BRANCH + // block? The [code 4] block in the complex subcase is an example. This is a true loop continue + // case that requires complex translation. If we fail to find such a block, then this code + // pattern is NOT a loop continue statement and our SPARK Ada translation just needs to know + // not to treat it like a LOOP_BACK that falls out of the loop. + + const int InnerMostLoopIndex = this->GetInnermostLoopNum(BranchBlockNum); + const SMPBasicBlock *BranchBlock = this->GetBlockByNum((size_t)BranchBlockNum); + this->ResetProcessedBlocks(); + + return this->LoopContinueHelper(BranchBlockNum, InnerMostLoopIndex, BranchBlock); +} // end of SMPFunction::FindLoopContinuePattern() + +// Recursive helper for FindLoopContinuePattern(). See lengthy comments in that method. +bool SMPFunction::LoopContinueHelper(const int BranchBlockNum, const int InnerMostLoopNum, const SMPBasicBlock *CurrentBlock) { + // Find all forward edges from CurrentBlock. See which ones stay within InnerMostLoopNum, then + // test the dominator tree info. + bool FoundContinuePattern = false; + int CurrBlockNum = CurrentBlock->GetNumber(); + + for (list<SMPBasicBlock *>::const_iterator SuccIter = CurrentBlock->GetFirstConstSucc(); SuccIter != CurrentBlock->GetLastConstSucc(); ++SuccIter) { + SMPBasicBlock *SuccBlock = (*SuccIter); + int SuccBlockNum = SuccBlock->GetNumber(); + if ((SuccBlockNum != CurrBlockNum) && (!SuccBlock->IsProcessed())) { // avoid self-loops + SuccBlock->SetProcessed(true); + if (this->IsBlockInLoop(SuccBlockNum, InnerMostLoopNum)) { + // Is this a forward edge? + if (!this->DoesBlockDominateBlock(SuccBlockNum, BranchBlockNum)) { + // Have we escaped the branch we started in, which can be detected + // by the fact that BranchBlockNum does not dominate SuccBlock? + if (!this->DoesBlockDominateBlock(BranchBlockNum, SuccBlockNum)) { + // SuccBlock is in innermost loop with BranchBlockNum, but we + // cannot enclose it with an if-then inside the BranchBlockNum + // code sequence, so it is a true loop continue pattern. + FoundContinuePattern = true; + break; + } + else { + // Recurse into SuccBlock. + if (this->LoopContinueHelper(BranchBlockNum, InnerMostLoopNum, SuccBlock)) { + // Found one block definitely part of a loop continue code pattern. + FoundContinuePattern = true; + break; + } + } + } + } + } + } // end for all successors + + return FoundContinuePattern; +} // end of SMPFunction::LoopContinueHelper() + + // Detect and mark special cases before emitting numeric error and other security annotations. void SMPFunction::MarkSpecialNumericErrorCases(void) { list<SMPBasicBlock *>::iterator BlockIter; @@ -16635,7 +16799,7 @@ void SMPFunction::MarkSpecialNumericErrorCases(void) { if (!this->IsBlockInLoop(FollowBlockNum, (size_t)LoopIndex)) { this->UpdateLoopFollowBlockNum(HeaderBlockNum, FollowBlockNum); } - else { + else if (this->FindLoopContinuePattern((int) BlockIndex)) { SMP_msg("WARNING: SPARK: LOOP_CONTINUE case detected in block %d FollowBlock %d of %s\n", CurrBlock->GetNumber(), FollowBlockNum, this->GetFuncName()); this->DumpDotCFG(); -- GitLab