From 024eb37a9e8b0815052bae106d7a96bc0db09166 Mon Sep 17 00:00:00 2001
From: clc5q <clc5q@git.zephyr-software.com>
Date: Sat, 10 Sep 2011 00:29:41 +0000
Subject: [PATCH] Identify self-recursive calls and pseudo-calls used as jumps
 within functions.

---
 SMPFunction.cpp | 115 +++++++++++++++++++++++++++++++-----------------
 SMPFunction.h   |   3 ++
 SMPInstr.cpp    |  67 +++++++++++++++++++++++++---
 SMPInstr.h      |   6 +++
 4 files changed, 145 insertions(+), 46 deletions(-)

diff --git a/SMPFunction.cpp b/SMPFunction.cpp
index 060ae3e7..44fbaf9b 100644
--- a/SMPFunction.cpp
+++ b/SMPFunction.cpp
@@ -139,6 +139,7 @@ SMPFunction::SMPFunction(func_t *Info, SMPProgram* pgm) {
 	this->UnresolvedIndirectCalls = false;
 	this->IndirectJumps = false;
 	this->UnresolvedIndirectJumps = false;
+	this->DirectlyRecursive = false;
 	this->SharedChunks = false;
 	this->CallsAlloca = false;
 	this->AnalyzedSP = false;
@@ -1561,49 +1562,27 @@ void SMPFunction::Analyze(void) {
 					FoundAllCallers = true; // only do this for first inst
 				}
 
-				if (CurrInst.GetDataFlowType() == INDIR_CALL) {
-					this->IndirectCalls = true;
-					// See if IDA has determined all possible targets
-					//  of the indirect call.
-					bool LinkedToTarget = false;
-					for (bool ok = CurrXrefs.first_from(CurrInst.GetAddr(), XREF_ALL);
-						ok;
-						ok = CurrXrefs.next_from()) {
-						if ((CurrXrefs.to != 0) && (CurrXrefs.iscode)) {
-							// Found a code target, with its address in CurrXrefs.to
-							if (CurrXrefs.to == (CurrInst.GetAddr() + CurrInst.GetCmd().size)) {
-								// A call instruction will have two targets: the fall through to the
-								//  next instruction, and the called function. We want to find
-								//  the called function.
-								continue;
-							}
-							// We found a target, not the fall-through.
-							LinkedToTarget = true;
-							this->IndirectCallTargets.push_back(CurrXrefs.to);
-							this->AllCallTargets.push_back(CurrXrefs.to);
-							msg("Found indirect call target %x at %x\n",
-								CurrXrefs.to, CurrInst.GetAddr());
+				SMPitype DataFlowType = CurrInst.GetDataFlowType();
+				if ((DataFlowType == INDIR_CALL)|| (DataFlowType == CALL)) {
+					// See if IDA has determined the target of the call.
+					ea_t TargetAddr = CurrInst.GetCallTarget();
+					bool LinkedToTarget = (BADADDR != TargetAddr);
+					if (LinkedToTarget) {
+						this->AllCallTargets.push_back(TargetAddr);
+						if (INDIR_CALL == DataFlowType) {
+							this->IndirectCallTargets.push_back(TargetAddr);
 						}
-					} // end for all code xfrefs
-					this->UnresolvedIndirectCalls = (!LinkedToTarget);
-					if (!LinkedToTarget) {
-						msg("WARNING: Did not find indirect call target at %x\n",
-							CurrInst.GetAddr());
-					}
-				} // end if INDIR_CALL
-				else if (CurrInst.GetDataFlowType() == INDIR_JUMP)
-					this->IndirectJumps = true;
-				else if (CurrInst.GetDataFlowType() == CALL) {
-					set<DefOrUse, LessDefUse>::iterator CurrUse;
-					for (CurrUse = CurrInst.GetFirstUse(); CurrUse != CurrInst.GetLastUse(); ++CurrUse) {
-						optype_t OpType = CurrUse->GetOp().type;
-						if ((OpType == o_near) || (OpType == o_far)) {
-							ea_t CallTarget = CurrUse->GetOp().addr;
-							this->DirectCallTargets.push_back(CallTarget);
-							this->AllCallTargets.push_back(CallTarget);
+						else {
+							this->DirectCallTargets.push_back(TargetAddr);
 						}
 					}
-				}
+					if (DataFlowType == INDIR_CALL) {
+						this->IndirectCalls = true;
+						this->UnresolvedIndirectCalls = (!LinkedToTarget);
+					}
+				} // end if INDIR_CALL or CALL
+				else if (DataFlowType == INDIR_JUMP)
+					this->IndirectJumps = true;
 
 				// Before we insert the instruction into the instruction
 				//  list, determine if it is a jump target that does not
@@ -1769,6 +1748,62 @@ void SMPFunction::Analyze(void) {
 		// Figure out the stack frame and related info.
 		this->SetStackFrameInfo();
 	}
+
+	// Audit the call instructions and call targets.
+	if ((!this->AllCallTargets.empty()) || this->UnresolvedIndirectCalls) {
+		bool FoundBadCallTarget = false;
+		vector<ea_t>::iterator CurrTarget = this->AllCallTargets.begin();
+		while (CurrTarget != this->AllCallTargets.end()) {
+			if ((this->FirstEA <= *CurrTarget) && (this->FuncInfo.endEA >= *CurrTarget)) {
+				// Found a call target that is within the function.
+				FoundBadCallTarget = true;
+				if (this->FirstEA == *CurrTarget) { // Direct recursion, not a pseudo-jump
+					this->DirectlyRecursive = true;
+				}
+				CurrTarget = this->AllCallTargets.erase(CurrTarget);
+			}
+			else {
+				++CurrTarget;
+			}
+		}
+		if (FoundBadCallTarget) {
+			// We have to mark the pseudo-call instructions and audit the direct and
+			//  indirect call target vectors.
+
+			// Audit direct call targets.
+			CurrTarget = this->DirectCallTargets.begin();
+			while (CurrTarget != this->DirectCallTargets.end()) {
+				if ((this->FirstEA <= *CurrTarget) && (this->FuncInfo.endEA >= *CurrTarget)) {
+					// Found a call target that is within the function.
+					CurrTarget = this->DirectCallTargets.erase(CurrTarget);
+				}
+				else {
+					++CurrTarget;
+				}
+			}
+			// Audit indirect call targets.
+			CurrTarget = this->IndirectCallTargets.begin();
+			while (CurrTarget != this->IndirectCallTargets.end()) {
+				if ((this->FirstEA <= *CurrTarget) && (this->FuncInfo.endEA >= *CurrTarget)) {
+					// Found a call target that is within the function.
+					CurrTarget = this->IndirectCallTargets.erase(CurrTarget);
+				}
+				else {
+					++CurrTarget;
+				}
+			}
+			// Find calls used as jumps.
+			list<SMPInstr>::iterator InstIter = this->Instrs.begin();
+			while (InstIter != this->Instrs.end()) {
+				SMPitype InstFlow = InstIter->GetDataFlowType();
+				if ((CALL == InstFlow) || (INDIR_CALL == InstFlow)) {
+					InstIter->AnalyzeCallInst(this->FirstEA, this->FuncInfo.endEA);
+				}
+				++InstIter;
+			}
+		} // end if (FoundBadCallTarget)
+	}
+
 	this->MarkFunctionSafe();
 } // end of SMPFunction::Analyze()
 
diff --git a/SMPFunction.h b/SMPFunction.h
index 4fe8ecba..15cb0768 100644
--- a/SMPFunction.h
+++ b/SMPFunction.h
@@ -134,8 +134,10 @@ public:
 	inline bool HasUnresolvedIndirectCalls(void) const { return UnresolvedIndirectCalls; };
 	inline bool HasIndirectJumps(void) const { return IndirectJumps; };
 	inline bool HasUnresolvedIndirectJumps(void) const { return UnresolvedIndirectJumps; };
+	inline bool IsDirectlyRecursive(void) const { return DirectlyRecursive; };
 	inline bool HasSharedChunks(void) const { return SharedChunks; };
 	inline bool HasGoodRTLs(void) const { return BuiltRTLs; };
+	inline bool IsAddrInFunc(ea_t addr) { return ((addr >= FuncInfo.startEA) && (addr <= FuncInfo.endEA)); }
 	inline bool IsLibFunc(void) const { return LibFunc; };
 	inline bool IsLeaf(void) const { return (!IndirectCalls && DirectCallTargets.empty()); };
 	inline bool IsSafe(void) const { return SafeFunc; };
@@ -192,6 +194,7 @@ private:
 	bool UnresolvedIndirectCalls; // Calls could not all be linked to targets
 	bool IndirectJumps; // Does function make indirect jumps?
 	bool UnresolvedIndirectJumps; // Jumps could not all be linked to targets
+	bool DirectlyRecursive; // Calls itself
 	bool SharedChunks; // Does function share a tail chunk with other functions?
 	bool CallsAlloca; // Does function allocate stack space after initial allocation?
 	bool AnalyzedSP; // Were stack pointer change points successfully analyzed?
diff --git a/SMPInstr.cpp b/SMPInstr.cpp
index 2c091e48..e40bf9b7 100644
--- a/SMPInstr.cpp
+++ b/SMPInstr.cpp
@@ -235,11 +235,14 @@ SMPInstr::SMPInstr(ea_t addr) {
 	this->BlockTerm = false;
 	this->TailCall = false;
 	this->CondTailCall = false;
+	this->CallUsedAsJump = false;
+	this->DirectRecursiveCall = false;
 	this->Interrupt = false;
 	this->RegClearIdiom = false;
 	this->DeadRegsString[0] = '\0';
 	this->DefsFlags = false;
 	this->UsesFlags = false;
+	this->CallTarget = BADADDR;
 	this->AddSubSourceType = UNINIT;
 	this->AddSubUseType = UNINIT;
 	this->AddSubSourceOp = InitOp;
@@ -925,6 +928,7 @@ void SMPInstr::Analyze(void) {
 		this->SMPcmd.size = 0;
 		return;
 	}
+	// NOTE: Might do this on demand to save time.
 	// Get the instr disassembly text.
 	bool IDAsuccess = generate_disasm_line(this->address, this->disasm, sizeof(this->disasm) - 1);
 	if (IDAsuccess) {
@@ -974,6 +978,50 @@ void SMPInstr::Analyze(void) {
 		}
 	}
 
+	// If instruction is a call or indirect call, see if a call target has been recorded
+	//  by IDA Pro.
+	if (this->GetDataFlowType() == INDIR_CALL) {
+		for (bool ok = xrefs.first_from(this->address, XREF_ALL);
+			ok;
+			ok = xrefs.next_from()) {
+			if ((xrefs.to != 0) && (xrefs.iscode)) {
+				// Found a code target, with its address in CurrXrefs.to
+				if (xrefs.to == (this->address + this->GetCmd().size)) {
+					// A call instruction will have two targets: the fall through to the
+					//  next instruction, and the called function. We want to find
+					//  the called function.
+					continue;
+				}
+				// We found a target, not the fall-through.
+				this->CallTarget = xrefs.to;
+				msg("Found indirect call target %x at %x\n",
+					xrefs.to, this->address);
+				break;
+			}
+		} // end for all code xrefs
+		if (BADADDR == this->CallTarget) {
+			msg("WARNING: Did not find indirect call target at %x\n",
+				this->address);
+		}
+	} // end if INDIR_CALL
+	else if (this->GetDataFlowType() == CALL) {
+		set<DefOrUse, LessDefUse>::iterator CurrUse;
+		for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) {
+			optype_t OpType = CurrUse->GetOp().type;
+			if ((OpType == o_near) || (OpType == o_far)) {
+				this->CallTarget = CurrUse->GetOp().addr;
+			}
+		}
+		if (BADADDR == this->CallTarget) {
+			msg("WARNING: Target not found for direct call at %x\n", this->address);
+		}
+	}
+
+	// Detect the case in which a call instruction is actually a fancy jump within
+	//  the function and no call target should be expected.
+	if (BADADDR != this->CallTarget) {
+	}
+
 	this->analyzed = true;
 	return;
 } // end of SMPInstr::Analyze()
@@ -1002,6 +1050,19 @@ void SMPInstr::AnalyzeMarker(void) {
 	return;
 } // end of SMPInstr::AnalyzeMarker()
 
+// Detect oddities of call instructions, such as pseudo-calls that are
+//  actually jumps within a function
+void SMPInstr::AnalyzeCallInst(ea_t FirstFuncAddr, ea_t LastFuncAddr) {
+	if (BADADDR != this->CallTarget) {
+		this->CallUsedAsJump = ((this->CallTarget > FirstFuncAddr)
+			&& (this->CallTarget <= LastFuncAddr));
+		this->DirectRecursiveCall = (this->CallTarget == FirstFuncAddr);
+		if (this->CallUsedAsJump)
+			this->type = JUMP;
+	}
+	return;
+}
+
 // Find USE-not-DEF operand that is not the flags register.
 op_t SMPInstr::GetSourceOnlyOperand(void) {
 	size_t OpNum;
@@ -2752,12 +2813,6 @@ void SMPInstr::EmitAnnotations(bool UseFP, bool AllocSeen, bool NeedsFrame, FILE
 	}
 #endif
 
-	/* if we have no block, we're not part of a function */
-	if(GetBlock()==NULL)
-		/* and so we should print that we belong to no function */
-                qfprintf(AnnotFile, "%10x %6d INSTR BELONGTO 0 %s\n", GetAddr(), this->SMPcmd.size, GetDisasm());
-
-
 	// Emit appropriate optimization annotations.
 	bool SDTInstrumentation = false;
 	switch (OptType) {
diff --git a/SMPInstr.h b/SMPInstr.h
index 67cc27bc..94bed908 100644
--- a/SMPInstr.h
+++ b/SMPInstr.h
@@ -187,6 +187,7 @@ public:
 
 	// Get methods
 	inline ea_t GetAddr(void) const { return address; };
+	inline ea_t GetCallTarget(void) const { return CallTarget; };
 	inline char *GetDisasm(void) const { return (char *) disasm; };
 	inline SMPBasicBlock *GetBlock(void) const { return BasicBlock; };
 	inline set<DefOrUse, LessDefUse>::iterator GetFirstUse(void) { return Uses.GetFirstRef(); };
@@ -243,6 +244,7 @@ public:
 	bool IsBranchToFarChunk(void);  // instr jumps outside current chunk
 	inline bool IsTailCall(void) const { return TailCall; };
 	inline bool IsCondTailCall(void) const { return CondTailCall; };
+	inline bool IsCallUsedAsJump(void) const { return CallUsedAsJump; };
 	inline bool MDIsInterruptCall(void) const { return Interrupt; };
 	bool MDIsNop(void) const; // instruction is simple or complex no-op
 	bool MDIsPushInstr(void) const;
@@ -270,6 +272,7 @@ public:
 	set<DefOrUse, LessDefUse>::iterator GetPointerAddressReg(op_t MemOp);
 	void Analyze(void); // Fill in basic data for instruction.
 	void AnalyzeMarker(void); // Fill in basic data for top of function pseudo-instruction.
+	void AnalyzeCallInst(ea_t FirstFuncAddr, ea_t LastFuncAddr); // Detect pseudo-calls
 	void AnalyzeIndirectRefs(bool UseFP); // Detect indirect memory operands
 	bool BuildRTL(void);   // Build RTL trees; return true if successfully built.
 	void SyncAllRTs(void); // calls SyncRTLDefUse() for all RTs in RTL
@@ -299,11 +302,14 @@ private:
 	bool BlockTerm;  // This instruction terminates a basic block.
 	bool TailCall;  // This instruction is a tail call (jump to far chunk with stack restored).
 	bool CondTailCall;  // Tail call is conditional branch.
+	bool CallUsedAsJump; // Call instruction, but actually a jump within the function.
+	bool DirectRecursiveCall; // Call to first address in same function
 	bool Interrupt;  // Instruction is a software interrupt call.
 	bool RegClearIdiom; // ASM idiom for move zero into register?
 	char DeadRegsString[MAXSTR]; // Registers that are dead at this instruction
 	bool DefsFlags;  // Instr DEFs the flags
 	bool UsesFlags;  // Instr USEs the flags
+	ea_t CallTarget; // target address of direct or indirect call instruction; BADADDR if unknown or not a call
 	SMPOperandType AddSubSourceType;  // Source op (USE only) type for add/sub
 	op_t AddSubSourceOp; // operand corresponding to AddSubSourceType
 	SMPOperandType AddSubUseType; // type of USE that is also DEFed by add /sub
-- 
GitLab