diff --git a/SMPDataFlowAnalysis.cpp b/SMPDataFlowAnalysis.cpp index d90ae4b755256c0c4b0abe97b484410e87466333..4546bde8da9d07dc7d49727b67d3b806d917a13a 100644 --- a/SMPDataFlowAnalysis.cpp +++ b/SMPDataFlowAnalysis.cpp @@ -5,7 +5,8 @@ // SMP project (Software Memory Protection). // -#include <vector> +#include <list> +#include <set> #include <pro.h> #include <ida.hpp> @@ -30,6 +31,7 @@ #define SMP_DEBUG_XOR 0 #define SMP_DEBUG_CHUNKS 1 // tracking down tail chunks for functions #define SMP_DEBUG_FRAMEFIXUP 0 +#define SMP_DEBUG_DATAFLOW 1 // Used for binary search by function number in SMPStaticAnalyzer.cpp // to trigger debugging output and find which instruction in which @@ -116,7 +118,7 @@ bool SMPInstr::HasDestMemoryOperand(void) const { return MemDest; } // end of SMPInstr::HasDestMemoryOperand() -// Is the destination operand a memory reference? +// Is a source operand a memory reference? bool SMPInstr::HasSourceMemoryOperand(void) const { bool MemSrc = false; for (size_t index = 0; index < Uses.GetSize(); ++index) { @@ -343,6 +345,11 @@ int SMPInstr::operator<(const SMPInstr &rhs) const { return (this->address < rhs.GetAddr()); } +// Less than or equal operator for sorting SMPInstr lists. Key field is address. +int SMPInstr::operator<=(const SMPInstr &rhs) const { + return (this->address <= rhs.GetAddr()); +} + #define MD_FIRST_ENTER_INSTR NN_enterw #define MD_LAST_ENTER_INSTR NN_enterq // Is this instruction the one that allocates space on the @@ -450,6 +457,26 @@ bool SMPInstr::MDUsesCalleeSavedReg(void) const { return false; } // end of SMPInstr::MDUsesCalleeSavedReg() +// Is instruction a branch (conditional or unconditional) to a +// code target that is not in the current chunk? +bool SMPInstr::IsBranchToFarChunk(void) const { + func_t *CurrChunk = get_fchunk(this->address); + bool FarBranch = false; + if ((JUMP | COND_BRANCH) & this->GetDataFlowType()) { + // Instruction is a direct branch, conditional or unconditional + if (this->NumUses() > 0) { + op_t JumpTarget = this->GetUse(0); + if ((o_near == JumpTarget.type) || (o_far == JumpTarget.type)) { + // Branches to a code address + func_t *TargetChunk = get_fchunk(JumpTarget.addr); + // Is target address within the same chunk as the branch? + FarBranch = (NULL == TargetChunk) || (CurrChunk->startEA != TargetChunk->startEA); + } + } + } + return FarBranch; +} // end of SMPInstr::IsBranchToFarChunk() + // Analyze the instruction and its operands. void SMPInstr::Analyze(void) { if (this->analyzed) @@ -581,7 +608,7 @@ void SMPInstr::MDFixupDefUseLists(void) { BaseOpnd.hasSIB = 0; this->Uses.SetRef(BaseOpnd); } - if (R_none != IndexReg) { + if (R_none != IndexReg) { // Should we disallow R_sp here? **!!** op_t IndexOpnd = Opnd; // Init to current operand field values IndexOpnd.type = o_reg; // Change type and reg fields IndexOpnd.reg = IndexReg; @@ -606,6 +633,11 @@ void SMPInstr::MDFixupDefUseLists(void) { // sound by declaring that EAX is always a DEF. this->MDAddRegDef(R_ax); } // end if NN_cmpxchg + else if (this->MDIsPopInstr() || this->MDIsPushInstr()) { + // IDA does not include the stack pointer in the DEFs or USEs. + this->MDAddRegDef(R_sp); + this->MDAddRegUse(R_sp); + } else if (8 == this->GetOptType()) { // This category implicitly writes to EDX:EAX. this->MDAddRegDef(R_dx); @@ -870,23 +902,194 @@ void SMPInstr::EmitAnnotations(bool UseFP, bool AllocSeen, FILE *AnnotFile) { // Constructor SMPBasicBlock::SMPBasicBlock(list<SMPInstr>::iterator First, list<SMPInstr>::iterator Last) { - this->FirstInstr = First; - this->LastInstr = Last; this->IndirectJump = false; this->Returns = false; this->SharedTailChunk = false; + list<SMPInstr>::iterator CurrInst = First; + while (CurrInst != Last) { + this->Instrs.push_back(CurrInst); + ++CurrInst; + } + this->Instrs.push_back(CurrInst); // Add last instruction +} + +// Link to predecessor block. +void SMPBasicBlock::LinkToPred(list<SMPBasicBlock>::iterator Predecessor) { + this->Predecessors.push_back(Predecessor); + return; +} + +// Link to successor block. +void SMPBasicBlock::LinkToSucc(list<SMPBasicBlock>::iterator Successor) { + this->Successors.push_back(Successor); + return; } // Analyze basic block and fill data members. void SMPBasicBlock::Analyze() { - if (LastInstr->GetDataFlowType() == INDIR_JUMP) { + if (Instrs.back()->GetDataFlowType() == INDIR_JUMP) { this->IndirectJump = true; } - else if (LastInstr->MDIsReturnInstr()) { + else if (Instrs.back()->MDIsReturnInstr()) { this->Returns = true; } } // end of SMPBasicBlock::Analyze() +// Return true if anything already in the KillSet would kill the operand value. +bool SMPBasicBlock::MDAlreadyKilled(op_t Opnd1) const { + // We have assembly language operands that can be complex, such as + // [ebx + esi*4 + 04h]. If ebx or esi have been killed, then this memory + // phrase should be considered killed. We could be even more conservative + // with base addresses, declaring an entire array killed whenever its base + // address appears in a definition, for example. We will do that if it proves + // to be necessary. + bool FoundInKillSet = (KillSet.end() != KillSet.find(Opnd1)); + switch (Opnd1.type) { + // Some types are simple to test for equality. + case o_void: + case o_reg: + case o_mem: + case o_imm: + case o_far: + case o_near: + // Look in KillSet. These simple types should be found there with + // no complicated comparisons needed. + return FoundInKillSet; + case o_phrase: + case o_displ: + // If found directly in KillSet, return true. Otherwise, see if any registers + // used in the memory addressing expression were killed. + if (FoundInKillSet) + return true; + else { + // Should we add Opnd1 to the KillSet every time we return true below? **!!** + op_t TempOp; + if (Opnd1.hasSIB) { + int BaseReg = sib_base(Opnd1); + short IndexReg = sib_index(Opnd1); + TempOp.type = o_reg; + TempOp.reg = (ushort) BaseReg; + if (this->KillSet.end() != this->KillSet.find(TempOp)) + return true; + if (R_sp != IndexReg) { // Cannot have ESP index reg in SIB + TempOp.reg = (ushort) IndexReg; + if (this->KillSet.end() != this->KillSet.find(TempOp)) + return true; + } + } + else { // no SIB + ushort BaseReg; + if (Opnd1.type == o_phrase) + BaseReg = Opnd1.phrase; + else // o_displ + BaseReg = Opnd1.reg; + TempOp.type = o_reg; + TempOp.reg = BaseReg; + if (this->KillSet.end() != this->KillSet.find(TempOp)) + return true; + } // end if SIB ... else ... + } // end if (FoundInKillSet) ... else ... + default: + msg("Unknown operand type in AlreadyKilled.\n"); + } // end of switch on Opnd1.type + + return false; +} // end of SMPBasicBlock::MDAlreadyKilled() + +// Initialize the KilledSet and UpExposedSet for live variable analysis. +void SMPBasicBlock::InitKilledExposed(void) { + // Find all upwardly exposed operands and killed operands in this block. + list<list<SMPInstr>::iterator>::iterator CurrIter; + for (CurrIter = this->Instrs.begin(); CurrIter != this->Instrs.end(); ++CurrIter) { + list<SMPInstr>::iterator CurrInst = *CurrIter; + + // Dataflow equation for upward exposed variables: If a variable has not been + // killed yet in this block, starting from the top of the block, and it is used + // in the current instruction, then it is upwardly exposed. + size_t limit = CurrInst->NumUses(); + for (size_t index = 0; index < limit; ++index) { + if (this->MDAlreadyKilled(CurrInst->GetUse(index))) + this->UpExposedSet.insert(CurrInst->GetUse(index)); + } + // Dataflow equation for killed variables: If a variable is defined in any + // instruction in the block, it is killed by this block (i.e. prior definitions + // of that variable will not make it through the block). + limit = CurrInst->NumDefs(); + for (size_t index = 0; index < limit; ++index) { + this->KillSet.insert(CurrInst->GetDef(index)); + } + } // end for all instrs in block + this->IsLiveInStale = true; // Would need to compute LiveInSet for first time + return; +} // end of SMPBasicBlock::InitKilledExposed() + +// Return an iterator for the beginning of the LiveInSet. If the set is stale, +// recompute it first. +set<op_t, LessOp>::iterator SMPBasicBlock::GetFirstLiveIn(void) { + if (this->IsLiveInStale) { + // Dataflow equation: A variable is live-in to this block if it + // is upwardly exposed from this block, or if it passes through + // the block unchanged (i.e. it is not killed and is live out). + this->LiveInSet.clear(); + set<op_t, LessOp>::iterator OutIter; + this->LiveInSet.insert(this->UpExposedSet.begin(), this->UpExposedSet.end()); + for (OutIter = this->LiveOutSet.begin(); OutIter != this->LiveOutSet.end(); ++OutIter) { + if (KillSet.end() != this->KillSet.find(*OutIter)) // Found live out but not killed + this->LiveInSet.insert(*OutIter); + } + this->IsLiveInStale = false; + } + return this->LiveInSet.begin(); +} // end of SMPBasicBlock::GetFirstLiveIn() + +// Get termination iterator marker for the LiveIn set, for use by predecessors. +set<op_t, LessOp>::iterator SMPBasicBlock::GetLastLiveIn(void) { + // Does not matter if it is stale or not; end marker is the same + return this->LiveInSet.end(); +} + +// Update the LiveOut set for the block. +// Return true if it changed, false otherwise. +bool SMPBasicBlock::UpdateLiveOut(void) { + bool changed = false; + set<op_t, LessOp> OldLiveOut(this->LiveOutSet); // save copy of old LiveOutSet + this->LiveOutSet.clear(); // Clear it and rebuild it + // Dataflow equation for LiveOutSet: If a variable is live-in for any successor + // block, it is live out for this block. + list<list<SMPBasicBlock>::iterator>::iterator SuccIter; + for (SuccIter = this->Successors.begin(); SuccIter != this->Successors.end(); ++SuccIter) { + set<op_t, LessOp>::iterator InSuccIter; + for (InSuccIter = (*SuccIter)->GetFirstLiveIn(); InSuccIter != (*SuccIter)->GetLastLiveIn(); ++InSuccIter) { + this->LiveOutSet.insert(*InSuccIter); + } + } + + // Only remaining question: Did the LiveOutSet change? + // Short cut: If the set cardinality changed, then the set changed. + if (this->LiveOutSet.size() != OldLiveOut.size()) { + changed = true; + } + else { // Same # of elements; move through in lockstep and compare. + set<op_t, LessOp>::iterator NewIter = this->LiveOutSet.begin(); + set<op_t, LessOp>::iterator OldIter = OldLiveOut.begin(); + set<op_t, LessOp>::value_compare OpComp = OldLiveOut.value_comp(); // LessOp() + while (OldIter != OldLiveOut.end()) { // both iters terminate simultaneously + if (OpComp(*OldIter, *NewIter) || OpComp(*NewIter, *OldIter)) { + changed = true; + break; + } + ++OldIter; + ++NewIter; + } + } + + if (changed) + this->IsLiveInStale = true; + + OldLiveOut.clear(); + return changed; +} // end of SMPBasicBlock::UpdateLiveOut() + // ***************************************************************** // Class SMPFunction // ***************************************************************** @@ -894,7 +1097,8 @@ void SMPBasicBlock::Analyze() { // Constructor SMPFunction::SMPFunction(func_t *Info) { this->FuncInfo = Info; - IndirectCalls = false; + this->IndirectCalls = false; + this->SharedChunks = false; return; } @@ -939,12 +1143,11 @@ void SMPFunction::SetStackFrameInfo(void) { msg("Fixed stack frame size info: %s\n", this->FuncName); SMPBasicBlock CurrBlock = this->Blocks.front(); msg("First basic block:\n"); - for (list<SMPInstr>::iterator CurrInstr = CurrBlock.GetFirstInstr(); + for (list<list<SMPInstr>::iterator>::iterator CurrInstr = CurrBlock.GetFirstInstr(); CurrInstr != CurrBlock.GetLastInstr(); ++CurrInstr) { - msg("%s\n", CurrInstr->GetDisasm()); + msg("%s\n", (*CurrInstr)->GetDisasm()); } - msg("%s\n", CurrBlock.GetLastInstr()->GetDisasm()); } #endif @@ -1174,14 +1377,15 @@ bool SMPFunction::MDFixFrameInfo(void) { // according to what we found if the values of the data members would // change. SMPBasicBlock CurrBlock = this->Blocks.front(); - for (list<SMPInstr>::iterator CurrInstr = CurrBlock.GetFirstInstr(); - CurrInstr != CurrBlock.GetLastInstr(); // LastInstr is jump anyway - ++CurrInstr) { + for (list<list<SMPInstr>::iterator>::iterator CurrIter = CurrBlock.GetFirstInstr(); + CurrIter != CurrBlock.GetLastInstr(); + ++CurrIter) { + list<SMPInstr>::iterator CurrInstr = *CurrIter; if (CurrInstr->MDIsPushInstr()) { // We will make the gcc-linux assumption that a PUSH in // the first basic block, prior to the stack allocating // instruction, is a callee register save. To make this - // more robust, we should ensure that the register is from + // more robust, we ensure that the register is from // the callee saved group of registers, and that it has // not been defined thus far in the function (else it might // be a push of an outgoing argument to a call that happens @@ -1279,10 +1483,12 @@ ea_t SMPFunction::FindAllocPoint(asize_t OriginalLocSize) { if (this->FuncInfo->analyzed_sp()) { // Limit our analysis to the first basic block in the function. - ea_t AddrLimit = this->Blocks.front().GetLastInstr()->GetAddr(); - for (list<SMPInstr>::iterator CurrInstr = this->Blocks.front().GetFirstInstr(); - CurrInstr != this->Blocks.front().GetLastInstr(); - ++CurrInstr) { + list<SMPInstr>::iterator TempIter = *(--(this->Blocks.front().GetLastInstr())); + ea_t AddrLimit = TempIter->GetAddr(); + for (list<list<SMPInstr>::iterator>::iterator CurrIter = this->Blocks.front().GetFirstInstr(); + CurrIter != this->Blocks.front().GetLastInstr(); + ++CurrIter) { + list<SMPInstr>::iterator CurrInstr = *CurrIter; ea_t addr = CurrInstr->GetAddr(); // get_spd() returns a cumulative delta of ESP sval_t sp_delta = get_spd(this->FuncInfo, addr); @@ -1292,7 +1498,7 @@ ea_t SMPFunction::FindAllocPoint(asize_t OriginalLocSize) { #endif if (sp_delta == TargetSize) { // Previous instruction hit the frame size. - if (CurrInstr == this->Blocks.front().GetFirstInstr()) { + if (CurrInstr == *(this->Blocks.front().GetFirstInstr())) { return BADADDR; // cannot back up from first instruction } else { @@ -1302,24 +1508,13 @@ ea_t SMPFunction::FindAllocPoint(asize_t OriginalLocSize) { } // SP delta is marked at the beginning of an instruction to show the SP // after the effects of the previous instruction. Maybe the last instruction - // is the first time the SP shows its desired value. - list<SMPInstr>::iterator FinalInstr = this->Blocks.front().GetLastInstr(); + // is the first time the SP achieves its desired value, which will not be shown + // until the first instruction of the next basic block if it just falls through. + // We can compute the delta AFTER the last instruction using get_spd+get_sp_delta. + list<SMPInstr>::iterator FinalInstr = *(--(this->Blocks.front().GetLastInstr())); ea_t FinalAddr = FinalInstr->GetAddr(); sval_t FinalDelta = get_spd(this->FuncInfo, FinalAddr); -#if SMP_DEBUG_FRAMEFIXUP - if (DebugFlag) - msg("strpbrk FinalDelta: %d\n", FinalDelta); -#endif - if (TargetSize == FinalDelta) { - // Back up one instruction - if (FinalAddr == this->Blocks.front().GetFirstInstr()->GetAddr()) { - return BADADDR; // cannot back up from first instruction - } - else { - return (--FinalInstr)->GetAddr(); - } - } - else if (!FinalInstr->IsBasicBlockTerminator()) { + if (!FinalInstr->IsBasicBlockTerminator()) { // Special case. The basic block does not terminate with a branch or // return, but falls through to the start of a loop, most likely. // Thus, the last instruction CAN increase the sp_delta, unlike @@ -1425,10 +1620,12 @@ void SMPFunction::Analyze(void) { for (bool ChunkOK = FuncTail.main(); ChunkOK; ChunkOK = FuncTail.next()) { const area_t &CurrChunk = FuncTail.chunk(); ++ChunkCounter; + if (1 < ChunkCounter) { + this->SharedChunks = true; #if SMP_DEBUG_CHUNKS - if (1 < ChunkCounter) msg("Found tail chunk for %s at %x\n", this->FuncName, CurrChunk.startEA); #endif + } // Build the instruction and block lists for the function. for (ea_t addr = CurrChunk.startEA; addr < CurrChunk.endEA; addr = get_item_end(addr)) { @@ -1457,9 +1654,9 @@ void SMPFunction::Analyze(void) { // we have to split basic blocks at the entry point. if ((FirstInBlock != this->Instrs.end()) && CurrInst.IsJumpTarget()) { - #if SMP_DEBUG_CONTROLFLOW +#if SMP_DEBUG_CONTROLFLOW msg("SMPFunction::Analyze: hit special jump target case.\n"); - #endif +#endif LastInBlock = --(this->Instrs.end()); SMPBasicBlock CurrBlock = SMPBasicBlock(FirstInBlock, LastInBlock); @@ -1474,23 +1671,23 @@ void SMPFunction::Analyze(void) { this->Blocks.push_back(CurrBlock); } - #if SMP_DEBUG_CONTROLFLOW +#if SMP_DEBUG_CONTROLFLOW msg("SMPFunction::Analyze: putting CurrInst on list.\n"); - #endif +#endif // Insert instruction at end of list. this->Instrs.push_back(CurrInst); - + // Find basic block leaders and terminators. if (FirstInBlock == this->Instrs.end()) { - #if SMP_DEBUG_CONTROLFLOW +#if SMP_DEBUG_CONTROLFLOW msg("SMPFunction::Analyze: setting FirstInBlock.\n"); - #endif +#endif FirstInBlock = --(this->Instrs.end()); } if (CurrInst.IsBasicBlockTerminator()) { - #if SMP_DEBUG_CONTROLFLOW +#if SMP_DEBUG_CONTROLFLOW msg("SMPFunction::Analyze: found block terminator.\n"); - #endif +#endif LastInBlock = --(this->Instrs.end()); SMPBasicBlock CurrBlock = SMPBasicBlock(FirstInBlock, LastInBlock); CurrBlock.Analyze(); @@ -1502,6 +1699,12 @@ void SMPFunction::Analyze(void) { FirstInBlock = this->Instrs.end(); LastInBlock = this->Instrs.end(); this->Blocks.push_back(CurrBlock); + + // Is the instruction a branch to a target outside the function? If + // so, this function has shared tail chunks. + if (CurrInst.IsBranchToFarChunk()) { + this->SharedChunks = true; + } } } // end if (isHead(InstrFlags) && isCode(InstrFlags) } // end for (ea_t addr = FuncInfo->startEA; ... ) @@ -1527,6 +1730,12 @@ void SMPFunction::Analyze(void) { } } // end for (bool ChunkOK = ...) + // Set up basic block links and map of instructions to blocks. + if (!(this->HasSharedChunks())) { + this->SetLinks(); + this->LiveVariableAnalysis(); + } + #if SMP_DEBUG_CONTROLFLOW msg("SMPFunction::Analyze: set stack frame info.\n"); #endif @@ -1536,6 +1745,83 @@ void SMPFunction::Analyze(void) { return; } // end of SMPFunction::Analyze() +// Link basic blocks to their predecessors and successors, and build the map +// of instruction addresses to basic blocks. +void SMPFunction::SetLinks(void) { + list<SMPBasicBlock>::iterator CurrBlock; +#if SMP_DEBUG_DATAFLOW + msg("SetLinks called for %s\n", this->GetFuncName()); +#endif + // First, set up the map of instructions to basic blocks. + for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { + list<list<SMPInstr>::iterator>::iterator CurrInst; + for (CurrInst = CurrBlock->GetFirstInstr(); + CurrInst != CurrBlock->GetLastInstr(); + ++CurrInst) { + pair<ea_t, list<SMPBasicBlock>::iterator> MapItem((*CurrInst)->GetAddr(),CurrBlock); + InstBlockMap.insert(MapItem); + } + } + +#if SMP_DEBUG_DATAFLOW + msg("SetLinks finished mapping: %s\n", this->GetFuncName()); +#endif + // Next, set successors of each basic block, also setting up the predecessors in the + // process. + for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { + list<SMPInstr>::iterator CurrInst = *(--(CurrBlock->GetLastInstr())); + // Last instruction in block; set successors + xrefblk_t CurrXrefs; + for (bool ok = CurrXrefs.first_from(CurrInst->GetAddr(), XREF_ALL); + ok; + ok = CurrXrefs.next_from()) { + if ((CurrXrefs.to != 0) && (CurrXrefs.iscode)) { + // Found a code target, with its address in CurrXrefs.to + map<ea_t, list<SMPBasicBlock>::iterator>::iterator MapEntry; + MapEntry = this->InstBlockMap.find(CurrXrefs.to); + if (MapEntry == this->InstBlockMap.end()) { + msg("WARNING: addr %x not found in map for %s\n", CurrXrefs.to, + this->GetFuncName()); + } + else { + list<SMPBasicBlock>::iterator Target = MapEntry->second; + // Make target block a successor of current block. + CurrBlock->LinkToSucc(Target); + // Make current block a predecessor of target block. + Target->LinkToPred(CurrBlock); + } + } + } // end for all xrefs + } // end for all blocks + + return; +} // end of SMPFunction::SetLinks() + +// Perform live variable analysis on all blocks in the function. +// See chapter 9 of Cooper/Torczon, Engineering a Compiler, for the algorithm. +void SMPFunction::LiveVariableAnalysis(void) { + list<SMPBasicBlock>::iterator CurrBlock; + msg("LiveVariableAnalysis for %s\n", this->GetFuncName()); + + for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { + // Initialize the Killed and UpwardExposed sets for each block. + CurrBlock->InitKilledExposed(); + } + + bool changed; + // Iterate over each block, updating LiveOut sets until no more changes are made. + // NOTE: Would be more efficient if we computed a reverse post-order list of blocks + // and traversed this loop in reverse post-order. **!!** + do { + changed = false; + for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { + changed |= CurrBlock->UpdateLiveOut(); + } + } while (changed); + + return; +} // end of SMPFunction::LiveVariableAnalysis() + // Emit all annotations for the function, including all per-instruction // annotations. void SMPFunction::EmitAnnotations(FILE *AnnotFile) { diff --git a/SMPDataFlowAnalysis.h b/SMPDataFlowAnalysis.h index c2ac2e83a7c131364d28706e68b6151c2112692f..b330be35188c06b354220a4b24a7612de0de17df 100644 --- a/SMPDataFlowAnalysis.h +++ b/SMPDataFlowAnalysis.h @@ -9,6 +9,8 @@ #include <list> #include <vector> +#include <map> +#include <set> #include <cstddef> @@ -24,6 +26,34 @@ class SMPBasicBlock; class SMPInstr; class DefOrUseList; +// MACHINE DEPENDENT: comparison class to permit sorting of op_t operands. +class LessOp { +public: + bool operator()(const op_t &Opnd1, const op_t &Opnd2) const { + if (Opnd1.type != Opnd2.type) + return (Opnd1.type < Opnd2.type); + switch (Opnd1.type) { + case o_void: return false; + case o_reg: return (Opnd1.reg < Opnd2.reg); // **!!** al < eax? etc. + case o_mem: return (Opnd1.addr < Opnd2.addr); + case o_phrase: if (Opnd1.hasSIB && Opnd2.hasSIB) return (Opnd1.sib < Opnd2.sib); + else if (Opnd2.hasSIB) return true; + else if (Opnd1.hasSIB) return false; + else return (Opnd1.phrase < Opnd2.phrase); // no SIB bytes + case o_displ: if (Opnd1.hasSIB && Opnd2.hasSIB) + return ((Opnd1.sib < Opnd2.sib) + || ((Opnd1.sib == Opnd2.sib) && (Opnd1.addr < Opnd2.addr))); + else if (Opnd2.hasSIB) return true; + else if (Opnd1.hasSIB) return false; + else return (Opnd1.addr < Opnd2.addr); // no SIB bytes + case o_imm: return (Opnd1.value < Opnd2.value); + case o_far: // fall through to o_near case + case o_near: return (Opnd1.addr < Opnd2.addr); + default: msg("Unknown operand type.\n"); return false; + }; // end switch (Opnd1.type) + } +}; // end class LessOp + // SMP will operate on a doubly linked list of instructions, which // will be grouped into basic blocks. We will include info about each // instruction that can not all be obtained easily in IDA Pro, and @@ -85,6 +115,7 @@ public: inline char *GetDisasm(void) { return disasm; }; int operator==(const SMPInstr &rhs) const; int operator<(const SMPInstr &rhs) const; + int operator<=(const SMPInstr &rhs) const; int operator!=(const SMPInstr &rhs) const; inline op_t GetUse(size_t index) const { return Uses.GetRef(index); }; inline op_t GetDef(size_t index) const { return Defs.GetRef(index); }; @@ -99,6 +130,7 @@ public: bool IsSecondSrcOperandNumeric(flags_t F) const; bool IsBasicBlockTerminator(void) const; inline bool IsJumpTarget(void) const { return JumpTarget; }; + bool IsBranchToFarChunk(void) const; // instr jumps outside current chunk bool MDIsPushInstr(void) const; bool MDIsPopInstr(void) const; bool MDIsReturnInstr(void) const; @@ -134,22 +166,33 @@ private: class SMPBasicBlock { public: SMPBasicBlock(list<SMPInstr>::iterator FirstInstr, list<SMPInstr>::iterator LastInstr); - void LinkToPred(SMPBasicBlock *Predecessor); + void LinkToPred(list<SMPBasicBlock>::iterator Predecessor); + void LinkToSucc(list<SMPBasicBlock>::iterator Successor); inline bool HasIndirectJump(void) const { return IndirectJump; }; inline bool HasReturn(void) const { return Returns; }; inline bool IsShared(void) const { return SharedTailChunk; }; inline void SetShared(void) { SharedTailChunk = true; }; - inline list<SMPInstr>::iterator GetFirstInstr(void) const { return FirstInstr; }; - inline list<SMPInstr>::iterator GetLastInstr(void) const { return LastInstr; }; + inline list<list<SMPInstr>::iterator>::iterator GetFirstInstr(void) { return Instrs.begin(); }; + inline list<list<SMPInstr>::iterator>::iterator GetLastInstr(void) { return Instrs.end(); }; void Analyze(); + void InitKilledExposed(void); // Initialize KilledSet and UpExposedSet + bool UpdateLiveOut(void); // Iterate once on updating LiveOutSet; return true if changed + set<op_t, LessOp>::iterator GetFirstLiveIn(void); // First LiveIn for use by predecessor + set<op_t, LessOp>::iterator GetLastLiveIn(void); // Last LiveIn for use by predecessor private: - list<SMPInstr>::iterator FirstInstr; - list<SMPInstr>::iterator LastInstr; - list<SMPBasicBlock> Predecessors; - list<SMPBasicBlock> Successors; + list<list<SMPInstr>::iterator> Instrs; + list<list<SMPBasicBlock>::iterator> Predecessors; + list<list<SMPBasicBlock>::iterator> Successors; + // Three sets used in live variable analysis + set<op_t, LessOp> KillSet; // variables killed in this block + set<op_t, LessOp> UpExposedSet; // upward exposed variables in this block + set<op_t, LessOp> LiveOutSet; // Live-Out variables in this block + set<op_t, LessOp> LiveInSet; // contribution to predecessor's live-out iteration bool IndirectJump; // contains an indirect jump instruction bool Returns; // contains a return instruction bool SharedTailChunk; // is part of a code chunk shared among functions + bool IsLiveInStale; // Has LiveOutSet changed since LiveInSet was computed? + bool MDAlreadyKilled(op_t) const; // Was op_t killed by something already in KillSet? }; // Class encapsulating all that the SMP static analyzer cares to know @@ -161,13 +204,18 @@ public: void EmitAnnotations(FILE *AnnotFile); inline bool HasIndirectCalls(void) const { return IndirectCalls; }; inline const char *GetFuncName(void) const { return FuncName; }; + inline bool HasSharedChunks(void) const { return SharedChunks; }; + void SetLinks(void); // Link basic blocks and map instructions to blocks + void LiveVariableAnalysis(void); // Perform Live Variable Analysis across all blocks private: func_t *FuncInfo; list<SMPInstr> Instrs; list<SMPBasicBlock> Blocks; + map<ea_t, list<SMPBasicBlock>::iterator> InstBlockMap; bool UseFP; // Does function use a frame pointer? bool StaticFunc; // Is function declared static? bool IndirectCalls; // Does function make indirect calls? + bool SharedChunks; // Does function share a tail chunk with other functions? char FuncName[MAXSTR]; size_t Size; // Function size in code bytes asize_t LocalVarsSize; // size of local vars region of stack frame