From 024eb37a9e8b0815052bae106d7a96bc0db09166 Mon Sep 17 00:00:00 2001 From: clc5q <clc5q@git.zephyr-software.com> Date: Sat, 10 Sep 2011 00:29:41 +0000 Subject: [PATCH] Identify self-recursive calls and pseudo-calls used as jumps within functions. --- SMPFunction.cpp | 115 +++++++++++++++++++++++++++++++----------------- SMPFunction.h | 3 ++ SMPInstr.cpp | 67 +++++++++++++++++++++++++--- SMPInstr.h | 6 +++ 4 files changed, 145 insertions(+), 46 deletions(-) diff --git a/SMPFunction.cpp b/SMPFunction.cpp index 060ae3e7..44fbaf9b 100644 --- a/SMPFunction.cpp +++ b/SMPFunction.cpp @@ -139,6 +139,7 @@ SMPFunction::SMPFunction(func_t *Info, SMPProgram* pgm) { this->UnresolvedIndirectCalls = false; this->IndirectJumps = false; this->UnresolvedIndirectJumps = false; + this->DirectlyRecursive = false; this->SharedChunks = false; this->CallsAlloca = false; this->AnalyzedSP = false; @@ -1561,49 +1562,27 @@ void SMPFunction::Analyze(void) { FoundAllCallers = true; // only do this for first inst } - if (CurrInst.GetDataFlowType() == INDIR_CALL) { - this->IndirectCalls = true; - // See if IDA has determined all possible targets - // of the indirect call. - bool LinkedToTarget = false; - for (bool ok = CurrXrefs.first_from(CurrInst.GetAddr(), XREF_ALL); - ok; - ok = CurrXrefs.next_from()) { - if ((CurrXrefs.to != 0) && (CurrXrefs.iscode)) { - // Found a code target, with its address in CurrXrefs.to - if (CurrXrefs.to == (CurrInst.GetAddr() + CurrInst.GetCmd().size)) { - // A call instruction will have two targets: the fall through to the - // next instruction, and the called function. We want to find - // the called function. - continue; - } - // We found a target, not the fall-through. - LinkedToTarget = true; - this->IndirectCallTargets.push_back(CurrXrefs.to); - this->AllCallTargets.push_back(CurrXrefs.to); - msg("Found indirect call target %x at %x\n", - CurrXrefs.to, CurrInst.GetAddr()); + SMPitype DataFlowType = CurrInst.GetDataFlowType(); + if ((DataFlowType == INDIR_CALL)|| (DataFlowType == CALL)) { + // See if IDA has determined the target of the call. + ea_t TargetAddr = CurrInst.GetCallTarget(); + bool LinkedToTarget = (BADADDR != TargetAddr); + if (LinkedToTarget) { + this->AllCallTargets.push_back(TargetAddr); + if (INDIR_CALL == DataFlowType) { + this->IndirectCallTargets.push_back(TargetAddr); } - } // end for all code xfrefs - this->UnresolvedIndirectCalls = (!LinkedToTarget); - if (!LinkedToTarget) { - msg("WARNING: Did not find indirect call target at %x\n", - CurrInst.GetAddr()); - } - } // end if INDIR_CALL - else if (CurrInst.GetDataFlowType() == INDIR_JUMP) - this->IndirectJumps = true; - else if (CurrInst.GetDataFlowType() == CALL) { - set<DefOrUse, LessDefUse>::iterator CurrUse; - for (CurrUse = CurrInst.GetFirstUse(); CurrUse != CurrInst.GetLastUse(); ++CurrUse) { - optype_t OpType = CurrUse->GetOp().type; - if ((OpType == o_near) || (OpType == o_far)) { - ea_t CallTarget = CurrUse->GetOp().addr; - this->DirectCallTargets.push_back(CallTarget); - this->AllCallTargets.push_back(CallTarget); + else { + this->DirectCallTargets.push_back(TargetAddr); } } - } + if (DataFlowType == INDIR_CALL) { + this->IndirectCalls = true; + this->UnresolvedIndirectCalls = (!LinkedToTarget); + } + } // end if INDIR_CALL or CALL + else if (DataFlowType == INDIR_JUMP) + this->IndirectJumps = true; // Before we insert the instruction into the instruction // list, determine if it is a jump target that does not @@ -1769,6 +1748,62 @@ void SMPFunction::Analyze(void) { // Figure out the stack frame and related info. this->SetStackFrameInfo(); } + + // Audit the call instructions and call targets. + if ((!this->AllCallTargets.empty()) || this->UnresolvedIndirectCalls) { + bool FoundBadCallTarget = false; + vector<ea_t>::iterator CurrTarget = this->AllCallTargets.begin(); + while (CurrTarget != this->AllCallTargets.end()) { + if ((this->FirstEA <= *CurrTarget) && (this->FuncInfo.endEA >= *CurrTarget)) { + // Found a call target that is within the function. + FoundBadCallTarget = true; + if (this->FirstEA == *CurrTarget) { // Direct recursion, not a pseudo-jump + this->DirectlyRecursive = true; + } + CurrTarget = this->AllCallTargets.erase(CurrTarget); + } + else { + ++CurrTarget; + } + } + if (FoundBadCallTarget) { + // We have to mark the pseudo-call instructions and audit the direct and + // indirect call target vectors. + + // Audit direct call targets. + CurrTarget = this->DirectCallTargets.begin(); + while (CurrTarget != this->DirectCallTargets.end()) { + if ((this->FirstEA <= *CurrTarget) && (this->FuncInfo.endEA >= *CurrTarget)) { + // Found a call target that is within the function. + CurrTarget = this->DirectCallTargets.erase(CurrTarget); + } + else { + ++CurrTarget; + } + } + // Audit indirect call targets. + CurrTarget = this->IndirectCallTargets.begin(); + while (CurrTarget != this->IndirectCallTargets.end()) { + if ((this->FirstEA <= *CurrTarget) && (this->FuncInfo.endEA >= *CurrTarget)) { + // Found a call target that is within the function. + CurrTarget = this->IndirectCallTargets.erase(CurrTarget); + } + else { + ++CurrTarget; + } + } + // Find calls used as jumps. + list<SMPInstr>::iterator InstIter = this->Instrs.begin(); + while (InstIter != this->Instrs.end()) { + SMPitype InstFlow = InstIter->GetDataFlowType(); + if ((CALL == InstFlow) || (INDIR_CALL == InstFlow)) { + InstIter->AnalyzeCallInst(this->FirstEA, this->FuncInfo.endEA); + } + ++InstIter; + } + } // end if (FoundBadCallTarget) + } + this->MarkFunctionSafe(); } // end of SMPFunction::Analyze() diff --git a/SMPFunction.h b/SMPFunction.h index 4fe8ecba..15cb0768 100644 --- a/SMPFunction.h +++ b/SMPFunction.h @@ -134,8 +134,10 @@ public: inline bool HasUnresolvedIndirectCalls(void) const { return UnresolvedIndirectCalls; }; inline bool HasIndirectJumps(void) const { return IndirectJumps; }; inline bool HasUnresolvedIndirectJumps(void) const { return UnresolvedIndirectJumps; }; + inline bool IsDirectlyRecursive(void) const { return DirectlyRecursive; }; inline bool HasSharedChunks(void) const { return SharedChunks; }; inline bool HasGoodRTLs(void) const { return BuiltRTLs; }; + inline bool IsAddrInFunc(ea_t addr) { return ((addr >= FuncInfo.startEA) && (addr <= FuncInfo.endEA)); } inline bool IsLibFunc(void) const { return LibFunc; }; inline bool IsLeaf(void) const { return (!IndirectCalls && DirectCallTargets.empty()); }; inline bool IsSafe(void) const { return SafeFunc; }; @@ -192,6 +194,7 @@ private: bool UnresolvedIndirectCalls; // Calls could not all be linked to targets bool IndirectJumps; // Does function make indirect jumps? bool UnresolvedIndirectJumps; // Jumps could not all be linked to targets + bool DirectlyRecursive; // Calls itself bool SharedChunks; // Does function share a tail chunk with other functions? bool CallsAlloca; // Does function allocate stack space after initial allocation? bool AnalyzedSP; // Were stack pointer change points successfully analyzed? diff --git a/SMPInstr.cpp b/SMPInstr.cpp index 2c091e48..e40bf9b7 100644 --- a/SMPInstr.cpp +++ b/SMPInstr.cpp @@ -235,11 +235,14 @@ SMPInstr::SMPInstr(ea_t addr) { this->BlockTerm = false; this->TailCall = false; this->CondTailCall = false; + this->CallUsedAsJump = false; + this->DirectRecursiveCall = false; this->Interrupt = false; this->RegClearIdiom = false; this->DeadRegsString[0] = '\0'; this->DefsFlags = false; this->UsesFlags = false; + this->CallTarget = BADADDR; this->AddSubSourceType = UNINIT; this->AddSubUseType = UNINIT; this->AddSubSourceOp = InitOp; @@ -925,6 +928,7 @@ void SMPInstr::Analyze(void) { this->SMPcmd.size = 0; return; } + // NOTE: Might do this on demand to save time. // Get the instr disassembly text. bool IDAsuccess = generate_disasm_line(this->address, this->disasm, sizeof(this->disasm) - 1); if (IDAsuccess) { @@ -974,6 +978,50 @@ void SMPInstr::Analyze(void) { } } + // If instruction is a call or indirect call, see if a call target has been recorded + // by IDA Pro. + if (this->GetDataFlowType() == INDIR_CALL) { + for (bool ok = xrefs.first_from(this->address, XREF_ALL); + ok; + ok = xrefs.next_from()) { + if ((xrefs.to != 0) && (xrefs.iscode)) { + // Found a code target, with its address in CurrXrefs.to + if (xrefs.to == (this->address + this->GetCmd().size)) { + // A call instruction will have two targets: the fall through to the + // next instruction, and the called function. We want to find + // the called function. + continue; + } + // We found a target, not the fall-through. + this->CallTarget = xrefs.to; + msg("Found indirect call target %x at %x\n", + xrefs.to, this->address); + break; + } + } // end for all code xrefs + if (BADADDR == this->CallTarget) { + msg("WARNING: Did not find indirect call target at %x\n", + this->address); + } + } // end if INDIR_CALL + else if (this->GetDataFlowType() == CALL) { + set<DefOrUse, LessDefUse>::iterator CurrUse; + for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) { + optype_t OpType = CurrUse->GetOp().type; + if ((OpType == o_near) || (OpType == o_far)) { + this->CallTarget = CurrUse->GetOp().addr; + } + } + if (BADADDR == this->CallTarget) { + msg("WARNING: Target not found for direct call at %x\n", this->address); + } + } + + // Detect the case in which a call instruction is actually a fancy jump within + // the function and no call target should be expected. + if (BADADDR != this->CallTarget) { + } + this->analyzed = true; return; } // end of SMPInstr::Analyze() @@ -1002,6 +1050,19 @@ void SMPInstr::AnalyzeMarker(void) { return; } // end of SMPInstr::AnalyzeMarker() +// Detect oddities of call instructions, such as pseudo-calls that are +// actually jumps within a function +void SMPInstr::AnalyzeCallInst(ea_t FirstFuncAddr, ea_t LastFuncAddr) { + if (BADADDR != this->CallTarget) { + this->CallUsedAsJump = ((this->CallTarget > FirstFuncAddr) + && (this->CallTarget <= LastFuncAddr)); + this->DirectRecursiveCall = (this->CallTarget == FirstFuncAddr); + if (this->CallUsedAsJump) + this->type = JUMP; + } + return; +} + // Find USE-not-DEF operand that is not the flags register. op_t SMPInstr::GetSourceOnlyOperand(void) { size_t OpNum; @@ -2752,12 +2813,6 @@ void SMPInstr::EmitAnnotations(bool UseFP, bool AllocSeen, bool NeedsFrame, FILE } #endif - /* if we have no block, we're not part of a function */ - if(GetBlock()==NULL) - /* and so we should print that we belong to no function */ - qfprintf(AnnotFile, "%10x %6d INSTR BELONGTO 0 %s\n", GetAddr(), this->SMPcmd.size, GetDisasm()); - - // Emit appropriate optimization annotations. bool SDTInstrumentation = false; switch (OptType) { diff --git a/SMPInstr.h b/SMPInstr.h index 67cc27bc..94bed908 100644 --- a/SMPInstr.h +++ b/SMPInstr.h @@ -187,6 +187,7 @@ public: // Get methods inline ea_t GetAddr(void) const { return address; }; + inline ea_t GetCallTarget(void) const { return CallTarget; }; inline char *GetDisasm(void) const { return (char *) disasm; }; inline SMPBasicBlock *GetBlock(void) const { return BasicBlock; }; inline set<DefOrUse, LessDefUse>::iterator GetFirstUse(void) { return Uses.GetFirstRef(); }; @@ -243,6 +244,7 @@ public: bool IsBranchToFarChunk(void); // instr jumps outside current chunk inline bool IsTailCall(void) const { return TailCall; }; inline bool IsCondTailCall(void) const { return CondTailCall; }; + inline bool IsCallUsedAsJump(void) const { return CallUsedAsJump; }; inline bool MDIsInterruptCall(void) const { return Interrupt; }; bool MDIsNop(void) const; // instruction is simple or complex no-op bool MDIsPushInstr(void) const; @@ -270,6 +272,7 @@ public: set<DefOrUse, LessDefUse>::iterator GetPointerAddressReg(op_t MemOp); void Analyze(void); // Fill in basic data for instruction. void AnalyzeMarker(void); // Fill in basic data for top of function pseudo-instruction. + void AnalyzeCallInst(ea_t FirstFuncAddr, ea_t LastFuncAddr); // Detect pseudo-calls void AnalyzeIndirectRefs(bool UseFP); // Detect indirect memory operands bool BuildRTL(void); // Build RTL trees; return true if successfully built. void SyncAllRTs(void); // calls SyncRTLDefUse() for all RTs in RTL @@ -299,11 +302,14 @@ private: bool BlockTerm; // This instruction terminates a basic block. bool TailCall; // This instruction is a tail call (jump to far chunk with stack restored). bool CondTailCall; // Tail call is conditional branch. + bool CallUsedAsJump; // Call instruction, but actually a jump within the function. + bool DirectRecursiveCall; // Call to first address in same function bool Interrupt; // Instruction is a software interrupt call. bool RegClearIdiom; // ASM idiom for move zero into register? char DeadRegsString[MAXSTR]; // Registers that are dead at this instruction bool DefsFlags; // Instr DEFs the flags bool UsesFlags; // Instr USEs the flags + ea_t CallTarget; // target address of direct or indirect call instruction; BADADDR if unknown or not a call SMPOperandType AddSubSourceType; // Source op (USE only) type for add/sub op_t AddSubSourceOp; // operand corresponding to AddSubSourceType SMPOperandType AddSubUseType; // type of USE that is also DEFed by add /sub -- GitLab