diff --git a/.gitattributes b/.gitattributes index df04dabc3b2a85ae46ec951c82a0e957839253f6..3a5a6eeaf2d9bc760a91fccd8bb8869472511aab 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3,8 +3,14 @@ /README.txt -text /SMP-analyze.sh -text /SMP.idc -text +/SMPBasicBlock.cpp -text +/SMPBasicBlock.h -text /SMPDataFlowAnalysis.cpp -text /SMPDataFlowAnalysis.h -text +/SMPFunction.cpp -text +/SMPFunction.h -text +/SMPInstr.cpp -text +/SMPInstr.h -text /SMPStaticAnalyzer.cpp -text /SMPStaticAnalyzer.h -text /SMPStaticAnalyzer.plx -text diff --git a/SMPBasicBlock.cpp b/SMPBasicBlock.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6e87bd51afa5f8b4a07651a9fa5c0147d5ef09b2 --- /dev/null +++ b/SMPBasicBlock.cpp @@ -0,0 +1,425 @@ +// +// SMPBasicBlock.cpp +// +// This module performs the fundamental basic block level analyses needed for the +// SMP project (Software Memory Protection). +// + +#include <list> +#include <set> +#include <vector> +#include <algorithm> + +#include <cstring> + +#include <pro.h> +#include <assert.h> +#include <ida.hpp> +#include <idp.hpp> +#include <auto.hpp> +#include <bytes.hpp> +#include <funcs.hpp> +#include <name.hpp> +#include <allins.hpp> +#include <intel.hpp> + +#include "SMPDataFlowAnalysis.h" +#include "SMPBasicBlock.h" +#include "SMPInstr.h" + +// Basic block number 0 is the top of the CFG lattice. +#define SMP_TOP_BLOCK 0 + +// ***************************************************************** +// Class SMPBasicBlock +// ***************************************************************** + +// Constructor +SMPBasicBlock::SMPBasicBlock(list<SMPInstr>::iterator First, list<SMPInstr>::iterator Last) { + this->IndirectJump = false; + this->Returns = false; + this->SharedTailChunk = false; + this->BlockNum = SMP_BLOCKNUM_UNINIT; + this->FirstAddr = First->GetAddr(); + list<SMPInstr>::iterator CurrInst = First; + while (CurrInst != Last) { + this->Instrs.push_back(CurrInst); + ++CurrInst; + } + this->Instrs.push_back(CurrInst); // Add last instruction +} + +// Get address of first instruction in the block. +ea_t SMPBasicBlock::GetFirstAddr(void) const { + return this->FirstAddr; +} + +// Equality operator for SMPBasicBlock. Key field is address of first instruction. +int SMPBasicBlock::operator==(const SMPBasicBlock &rhs) const { + if (rhs.GetFirstAddr() != this->FirstAddr) + return 0; + else + return 1; +} + +// Link to predecessor block. +void SMPBasicBlock::LinkToPred(list<SMPBasicBlock>::iterator Predecessor) { + this->Predecessors.push_back(Predecessor); + return; +} + +// Link to successor block. +void SMPBasicBlock::LinkToSucc(list<SMPBasicBlock>::iterator Successor) { + this->Successors.push_back(Successor); + return; +} + +// See if all predecessors have set their ordering number. +bool SMPBasicBlock::AllPredecessorsNumbered(void) { + list<list<SMPBasicBlock>::iterator>::iterator CurrPred; + for (CurrPred = this->Predecessors.begin(); CurrPred != this->Predecessors.end(); ++CurrPred) { + // Don't count current block, in case we have a one-block loop with this block + // as its own predecessor. + if (**CurrPred == *this) + continue; + if ((*CurrPred)->GetNumber() == SMP_BLOCKNUM_UNINIT) + return false; + } + return true; +} // end of SMPBasicBlock::AllPredecessorsNumbered() + +// Are all instructions in the block no-ops? +bool SMPBasicBlock::AllNops(void) { + size_t NopCount = 0; + size_t GoodCount = 0; // non-nop instructions + list<list<SMPInstr>::iterator>::iterator CurrInst; + for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) { + if ((*CurrInst)->MDIsNop()) + ++NopCount; + else + ++GoodCount; + } + return ((0 == GoodCount) && (0 < NopCount)); +} // end of SMPBasicBlock::AllNops() + +// Analyze basic block and fill data members. +void SMPBasicBlock::Analyze() { + if (Instrs.back()->GetDataFlowType() == INDIR_JUMP) { + this->IndirectJump = true; + } + else if (Instrs.back()->MDIsReturnInstr()) { + this->Returns = true; + } +} // end of SMPBasicBlock::Analyze() + +// DEBUG dump of block +void SMPBasicBlock::Dump(void) { + msg("Dump of basic block %d\n", this->BlockNum); + // Dump dataflow analysis sets and links before dumping instructions. + list<list<SMPBasicBlock>::iterator>::iterator CurrLink; + msg("Predecessors: "); + for (CurrLink = this->Predecessors.begin(); CurrLink != this->Predecessors.end(); ++CurrLink) { + msg("%d ", (*CurrLink)->GetNumber()); + } + msg("\n"); + msg("Successors: "); + for (CurrLink = this->Successors.begin(); CurrLink != this->Successors.end(); ++CurrLink) { + msg("%d ", (*CurrLink)->GetNumber()); + } + msg("\n"); + set<op_t, LessOp>::iterator SetItem; + msg("VarKill set: "); + for (SetItem = this->KillSet.begin(); SetItem != this->KillSet.end(); ++SetItem) { + PrintOneOperand(*SetItem, 0, -1); + } + msg("\n"); + msg("UpExposed set: "); + for (SetItem = this->UpExposedSet.begin(); SetItem != this->UpExposedSet.end(); ++SetItem) { + PrintOneOperand(*SetItem, 0, -1); + } + msg("\n"); + msg("LiveIn set: "); + for (SetItem = this->LiveInSet.begin(); SetItem != this->LiveInSet.end(); ++SetItem) { + PrintOneOperand(*SetItem, 0, -1); + } + msg("\n"); + msg("LiveOut set: "); + for (SetItem = this->LiveOutSet.begin(); SetItem != this->LiveOutSet.end(); ++SetItem) { + PrintOneOperand(*SetItem, 0, -1); + } + msg("\n"); + msg("Dominance frontier: "); + set<int>::iterator DomIter; + for (DomIter = this->DomFrontier.begin(); DomIter != this->DomFrontier.end(); ++DomIter) { + msg("%d ", *DomIter); + } + msg("\n"); + set<SMPPhiFunction, LessPhi>::iterator PhiIter; + for (PhiIter = this->PhiFunctions.begin(); PhiIter != this->PhiFunctions.end(); ++PhiIter) { + msg("Phi function for %d : ", PhiIter->GetIndex()); +#if 0 // cannot make this compile on linux/g++ + // Dump out all phi operands + vector<DefOrUse>::iterator DefIter; + for (DefIter = PhiIter->GetFirstOp(); DefIter != PhiIter->GetLastOp(); ++DefIter) { + PrintOneOperand(DefIter->GetOp(), 0, -1); + msg(" SSAnum %d ", DefIter->GetSSANum()); + } +#else // see if the compiler likes it this way! + for (size_t i = 0; i < PhiIter->GetPhiListSize(); ++i) { + DefOrUse PhiRef = PhiIter->GetPhiRef(i); + PrintOneOperand(PhiRef.GetOp(), 0, -1); + msg(" SSAnum %d ", PhiRef.GetSSANum()); + } +#endif + msg("\n"); + } + + if (this->IndirectJump) + msg("Has indirect jump. "); + if (this->Returns) + msg("Has return. "); + if (this->SharedTailChunk) + msg("Is shared tail chunk block. "); + msg("\n"); + + // Now, dump all the instructions. + list<list<SMPInstr>::iterator>::iterator CurrInst; + for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) { + msg("%x : %s\n", (*CurrInst)->GetAddr(), (*CurrInst)->GetDisasm()); + (*CurrInst)->PrintOperands(); + } + msg("\n"); + return; +} // end of SMPBasicBlock::Dump() + +// Return true if anything already in the KillSet would kill the operand value. +bool SMPBasicBlock::MDAlreadyKilled(op_t Opnd1) const { + // We have assembly language operands that can be complex, such as + // [ebx + esi*4 + 04h]. If ebx or esi have been killed, then this memory + // phrase should be considered killed. We could be even more conservative + // with base addresses, declaring an entire array killed whenever its base + // address appears in a definition, for example. We will do that if it proves + // to be necessary. + bool FoundInKillSet = (KillSet.end() != KillSet.find(Opnd1)); + switch (Opnd1.type) { + // Some types are simple to test for equality. + case o_void: + case o_reg: + case o_mem: + case o_imm: + case o_far: + case o_near: + // Look in KillSet. These simple types should be found there with + // no complicated comparisons needed. + return FoundInKillSet; + case o_phrase: + case o_displ: + // If found directly in KillSet, return true. Otherwise, see if any registers + // used in the memory addressing expression were killed. + if (FoundInKillSet) + return true; + else { + // Should we add Opnd1 to the KillSet every time we return true below? **!!** + op_t TempOp; + if (Opnd1.hasSIB) { + int BaseReg = sib_base(Opnd1); + short IndexReg = sib_index(Opnd1); + TempOp.type = o_reg; + TempOp.reg = (ushort) BaseReg; + if (this->KillSet.end() != this->KillSet.find(TempOp)) + return true; + if (R_sp != IndexReg) { // Cannot have ESP index reg in SIB + TempOp.reg = (ushort) IndexReg; + if (this->KillSet.end() != this->KillSet.find(TempOp)) + return true; + } + } + else { // no SIB + ushort BaseReg; + if (Opnd1.type == o_phrase) + BaseReg = Opnd1.phrase; + else // o_displ + BaseReg = Opnd1.reg; + TempOp.type = o_reg; + TempOp.reg = BaseReg; + if (this->KillSet.end() != this->KillSet.find(TempOp)) + return true; + } // end if SIB ... else ... + } // end if (FoundInKillSet) ... else ... + break; + default: + msg("Unknown operand type %d in MDAlreadyKilled, block %d\n", Opnd1.type, this->BlockNum); + } // end of switch on Opnd1.type + + return false; +} // end of SMPBasicBlock::MDAlreadyKilled() + +// Initialize the KilledSet and UpExposedSet for live variable analysis. +void SMPBasicBlock::InitKilledExposed(void) { + // Find all upwardly exposed operands and killed operands in this block. + list<list<SMPInstr>::iterator>::iterator CurrIter; + for (CurrIter = this->Instrs.begin(); CurrIter != this->Instrs.end(); ++CurrIter) { + list<SMPInstr>::iterator CurrInst = *CurrIter; + + // Dataflow equation for upward exposed variables: If a variable has not been + // killed yet in this block, starting from the top of the block, and it is used + // in the current instruction, then it is upwardly exposed. + size_t limit = CurrInst->NumUses(); + for (size_t index = 0; index < limit; ++index) { + op_t UseOp = CurrInst->GetUse(index).GetOp(); + // Only add non-immediate operands that are not already killed in this block. + // o_near and o_far operands are code addresses in immediate form, e.g. + // call _printf might be call 0x8048040, with o_near = 0x8048040. + if ((!(this->MDAlreadyKilled(UseOp))) + && (UseOp.type != o_imm) && (UseOp.type != o_near) && (UseOp.type != o_far)) + this->UpExposedSet.insert(CurrInst->GetUse(index).GetOp()); + } + // Dataflow equation for killed variables: If a variable is defined in any + // instruction in the block, it is killed by this block (i.e. prior definitions + // of that variable will not make it through the block). + limit = CurrInst->NumDefs(); + for (size_t index = 0; index < limit; ++index) { + this->KillSet.insert(CurrInst->GetDef(index).GetOp()); + } + } // end for all instrs in block + this->IsLiveInStale = true; // Would need to compute LiveInSet for first time + return; +} // end of SMPBasicBlock::InitKilledExposed() + +// Return an iterator for the beginning of the LiveInSet. If the set is stale, +// recompute it first. +set<op_t, LessOp>::iterator SMPBasicBlock::GetFirstLiveIn(void) { + if (this->IsLiveInStale) { + // Dataflow equation: A variable is live-in to this block if it + // is upwardly exposed from this block, or if it passes through + // the block unchanged (i.e. it is not killed and is live out). + this->LiveInSet.clear(); + set<op_t, LessOp>::iterator OutIter; + for (OutIter = this->UpExposedSet.begin(); OutIter != this->UpExposedSet.end(); ++OutIter) { + this->LiveInSet.insert(*OutIter); + } + for (OutIter = this->LiveOutSet.begin(); OutIter != this->LiveOutSet.end(); ++OutIter) { + if (KillSet.end() == this->KillSet.find(*OutIter)) // Found live out but not killed + this->LiveInSet.insert(*OutIter); + } + this->IsLiveInStale = false; + } + return this->LiveInSet.begin(); +} // end of SMPBasicBlock::GetFirstLiveIn() + +// Get termination iterator marker for the LiveIn set, for use by predecessors. +set<op_t, LessOp>::iterator SMPBasicBlock::GetLastLiveIn(void) { + // Does not matter if it is stale or not; end marker is the same + return this->LiveInSet.end(); +} + +// Get iterator for the start of the LiveOut set. +set<op_t, LessOp>::iterator SMPBasicBlock::GetFirstLiveOut(void) { + return this->LiveOutSet.begin(); +} + +// Get termination iterator marker for the LiveOut set. +set<op_t, LessOp>::iterator SMPBasicBlock::GetLastLiveOut(void) { + return this->LiveOutSet.end(); +} + +// Get iterator for the start of the VarKill set. +set<op_t, LessOp>::iterator SMPBasicBlock::GetFirstVarKill(void) { + return this->KillSet.begin(); +} + +// Get termination iterator marker for the VarKill set. +set<op_t, LessOp>::iterator SMPBasicBlock::GetLastVarKill(void) { + return this->KillSet.end(); +} + +// Get iterator for the start of the UpExposed set. +set<op_t, LessOp>::iterator SMPBasicBlock::GetFirstUpExposed(void) { + return this->UpExposedSet.begin(); +} + +// Get termination iterator marker for the UpExposed set. +set<op_t, LessOp>::iterator SMPBasicBlock::GetLastUpExposed(void) { + return this->UpExposedSet.end(); +} + +// Get iterator for the start of the DomFrontier set. +set<int>::iterator SMPBasicBlock::GetFirstDomFrontier(void) { + return this->DomFrontier.begin(); +} + +// Get termination iterator marker for the DomFrontier set. +set<int>::iterator SMPBasicBlock::GetLastDomFrontier(void) { + return this->DomFrontier.end(); +} + +// Get iterator for first Phi function. +set<SMPPhiFunction, LessPhi>::iterator SMPBasicBlock::GetFirstPhi(void) { + return this->PhiFunctions.begin(); +} + +// Get termination iterator marker for Phi functions set. +set<SMPPhiFunction, LessPhi>::iterator SMPBasicBlock::GetLastPhi(void) { + return this->PhiFunctions.end(); +} + +// Update the LiveOut set for the block. +// Return true if it changed, false otherwise. +bool SMPBasicBlock::UpdateLiveOut(void) { + bool changed = false; + set<op_t, LessOp> OldLiveOut(this->LiveOutSet); // save copy of old LiveOutSet + this->LiveOutSet.clear(); // Clear it and rebuild it + // Dataflow equation for LiveOutSet: If a variable is live-in for any successor + // block, it is live out for this block. + list<list<SMPBasicBlock>::iterator>::iterator SuccIter; + for (SuccIter = this->Successors.begin(); SuccIter != this->Successors.end(); ++SuccIter) { + set<op_t, LessOp>::iterator InSuccIter; + for (InSuccIter = (*SuccIter)->GetFirstLiveIn(); InSuccIter != (*SuccIter)->GetLastLiveIn(); ++InSuccIter) { + this->LiveOutSet.insert(*InSuccIter); + } + } + + // Only remaining question: Did the LiveOutSet change? + // Short cut: If the set cardinality changed, then the set changed. + if (this->LiveOutSet.size() != OldLiveOut.size()) { + changed = true; + } + else { // Same # of elements; move through in lockstep and compare. + set<op_t, LessOp>::iterator NewIter = this->LiveOutSet.begin(); + set<op_t, LessOp>::iterator OldIter = OldLiveOut.begin(); + set<op_t, LessOp>::value_compare OpComp = OldLiveOut.value_comp(); // LessOp() + while (OldIter != OldLiveOut.end()) { // both iters terminate simultaneously + if (OpComp(*OldIter, *NewIter) || OpComp(*NewIter, *OldIter)) { + changed = true; + break; + } + ++OldIter; + ++NewIter; + } + } + + if (changed) + this->IsLiveInStale = true; + + OldLiveOut.clear(); + return changed; +} // end of SMPBasicBlock::UpdateLiveOut() + +// Insert RPO number block into the dominance frontier set. +void SMPBasicBlock::AddToDomFrontier(int block) { + this->DomFrontier.insert(block); + return; +} // end of SMPBasicBlock::AddToDomFrontier() + +// Add a phi function to the list of phi functions entering this block. +// If phi function for this global name already existed in the block, +// return false because no new phi function was added; else return true. +bool SMPBasicBlock::AddPhi(SMPPhiFunction NewPhi) { + if (this->PhiFunctions.end() == this->PhiFunctions.find(NewPhi)) { + this->PhiFunctions.insert(NewPhi); + return true; + } + else + return false; +} // end of SMPBasicBlock::AddPhi() + diff --git a/SMPBasicBlock.h b/SMPBasicBlock.h new file mode 100644 index 0000000000000000000000000000000000000000..0ae9a69d817846961c726ba4c3093e46f7feafde --- /dev/null +++ b/SMPBasicBlock.h @@ -0,0 +1,111 @@ +#ifndef SMPBASICBLOCK_H +#define SMPBASICBLOCK_H 1 + +// SMPBasicBlock.h +// +// This header defines the interfaces needed for analyzing basic blocks. + +#include <list> +#include <set> + +#include <cstddef> + +#include <pro.h> +#include <ida.hpp> +#include <ua.hpp> + +#include "SMPDataFlowAnalysis.h" +#include "SMPInstr.h" + +using namespace std; + +// Value for a block number before it is initialized. +#define SMP_BLOCKNUM_UNINIT (-1) + +// Value for an SSA subscript number before it is initialized by SSA renaming. +#define SMP_SSA_UNINIT (-1) + +// Class defining basic blocks. +class SMPBasicBlock { +public: + // Constructors + SMPBasicBlock(list<SMPInstr>::iterator FirstInstr, list<SMPInstr>::iterator LastInstr); + // Operators + int operator==(const SMPBasicBlock &rhs) const; + // Get methods + ea_t GetFirstAddr(void) const; + inline int GetNumber(void) const { return BlockNum; } + inline list<list<SMPInstr>::iterator>::iterator GetFirstInstr(void) { return Instrs.begin(); }; + inline list<list<SMPInstr>::iterator>::iterator GetLastInstr(void) { return Instrs.end(); }; + inline list<list<SMPBasicBlock>::iterator>::iterator GetFirstPred(void) { + return Predecessors.begin(); + }; + inline list<list<SMPBasicBlock>::iterator>::iterator GetLastPred(void) { + return Predecessors.end(); + }; + inline list<list<SMPBasicBlock>::iterator>::iterator GetFirstSucc(void) { + return Successors.begin(); + }; + inline list<list<SMPBasicBlock>::iterator>::iterator GetLastSucc(void) { + return Successors.end(); + }; + inline size_t GetNumPreds(void) const { return Predecessors.size(); }; + set<op_t, LessOp>::iterator GetFirstLiveIn(void); // LiveInSet.begin() + set<op_t, LessOp>::iterator GetLastLiveIn(void); // LiveInSet.end() + set<op_t, LessOp>::iterator GetFirstLiveOut(void); // LiveOutSet.begin() + set<op_t, LessOp>::iterator GetLastLiveOut(void); // LiveOutSet.end() + set<op_t, LessOp>::iterator GetFirstVarKill(void); // KillSet.begin() + set<op_t, LessOp>::iterator GetLastVarKill(void); // KillSet.end() + set<op_t, LessOp>::iterator GetFirstUpExposed(void); // UpExposedSet.begin() + set<op_t, LessOp>::iterator GetLastUpExposed(void); // UpExposedSet.end() + set<int>::iterator GetFirstDomFrontier(void); // DomFrontier.begin() + set<int>::iterator GetLastDomFrontier(void); // DomFrontier.end() + set<SMPPhiFunction, LessPhi>::iterator GetFirstPhi(void); // PhiFunctions.begin() + set<SMPPhiFunction, LessPhi>::iterator GetLastPhi(void); // PhiFunctions.end() + // Set methods + inline void SetShared(void) { SharedTailChunk = true; }; + inline void SetNumber(int Num) { BlockNum = Num; }; + void LinkToPred(list<SMPBasicBlock>::iterator Predecessor); + void LinkToSucc(list<SMPBasicBlock>::iterator Successor); + bool AddPhi(SMPPhiFunction NewPhi); + // Query methods + inline bool HasIndirectJump(void) const { return IndirectJump; }; + inline bool HasReturn(void) const { return Returns; }; + inline bool IsShared(void) const { return SharedTailChunk; }; + bool AllNops(void); // Are all instructions in the block no-ops? + inline bool IsLiveIn(op_t CurrOp) const { + return (LiveInSet.end() != LiveInSet.find(CurrOp)); + } + // Printing methods + void Dump(void); + // Analysis methods + bool AllPredecessorsNumbered(void); + void Analyze(); + void InitKilledExposed(void); // Initialize KilledSet and UpExposedSet + bool UpdateLiveOut(void); // Iterate once on updating LiveOutSet; return true if changed + void AddToDomFrontier(int); // Add RPO block number to DomFrontier set. +private: + // Data + ea_t FirstAddr; + int BlockNum; // Number for block ordering algorithms + list<list<SMPInstr>::iterator> Instrs; + list<list<SMPBasicBlock>::iterator> Predecessors; + list<list<SMPBasicBlock>::iterator> Successors; + // Four sets used in live variable analysis + set<op_t, LessOp> KillSet; // variables killed in this block + set<op_t, LessOp> UpExposedSet; // upward exposed variables in this block + set<op_t, LessOp> LiveOutSet; // Live-Out variables in this block + set<op_t, LessOp> LiveInSet; // contribution to predecessor's live-out iteration + // SSA data structures + set<int> DomFrontier; // Dominance frontier for block, as set of RPO block numbers + set<SMPPhiFunction, LessPhi> PhiFunctions; // SSA incoming edge phi functions + // cached query results + bool IndirectJump; // contains an indirect jump instruction + bool Returns; // contains a return instruction + bool SharedTailChunk; // is part of a code chunk shared among functions + bool IsLiveInStale; // Has LiveOutSet changed since LiveInSet was computed? + // Methods + bool MDAlreadyKilled(op_t) const; // Was op_t killed by something already in KillSet? +}; + +#endif diff --git a/SMPFunction.cpp b/SMPFunction.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a8e06e2acddc7c959551201077a41aa205e3750a --- /dev/null +++ b/SMPFunction.cpp @@ -0,0 +1,1301 @@ +// +// SMPFunction.cpp +// +// This module performs the fundamental data flow analyses needed for the +// SMP project (Software Memory Protection) at the function level. +// + +#include <list> +#include <set> +#include <vector> +#include <algorithm> + +#include <cstring> + +#include <pro.h> +#include <assert.h> +#include <ida.hpp> +#include <idp.hpp> +#include <auto.hpp> +#include <bytes.hpp> +#include <funcs.hpp> +#include <allins.hpp> +#include <intel.hpp> +#include <name.hpp> + +#include "SMPDataFlowAnalysis.h" +#include "SMPStaticAnalyzer.h" +#include "SMPFunction.h" +#include "SMPBasicBlock.h" +#include "SMPInstr.h" + +// Set to 1 for debugging output +#define SMP_DEBUG 1 +#define SMP_DEBUG2 0 // verbose +#define SMP_DEBUG3 0 // verbose +#define SMP_DEBUG_CONTROLFLOW 0 // tells what processing stage is entered +#define SMP_DEBUG_XOR 0 +#define SMP_DEBUG_CHUNKS 1 // tracking down tail chunks for functions +#define SMP_DEBUG_FRAMEFIXUP 0 +#define SMP_DEBUG_DATAFLOW 0 + +// Compute LVA/SSA or not? Turn it off for NICECAP demo on 31-JAN-2008 +#define SMP_COMPUTE_LVA_SSA 0 + +// Basic block number 0 is the top of the CFG lattice. +#define SMP_TOP_BLOCK 0 + +// Set SharedTailChunks to TRUE for entire printf family +// After we restructure the parent/tail structure of the database, this +// will go away. +#define KLUDGE_VFPRINTF_FAMILY 1 + +// Used for binary search by function number in SMPStaticAnalyzer.cpp +// to trigger debugging output and find which instruction in which +// function is causing a crash. +bool SMPBinaryDebug = false; + + +// ***************************************************************** +// Class SMPFunction +// ***************************************************************** + +// Constructor +SMPFunction::SMPFunction(func_t *Info) { + this->FuncInfo = *Info; + this->IndirectCalls = false; + this->SharedChunks = false; + return; +} + +// Figure out the different regions of the stack frame, and find the +// instructions that allocate and deallocate the local variables space +// on the stack frame. +// The stack frame info will be used to emit stack +// annotations when Analyze() reaches the stack allocation +// instruction that sets aside space for local vars. +// Set the address of the instruction at which these +// annotations should be emitted. This should normally +// be an instruction such as: sub esp,48 +// However, for a function with no local variables at all, +// we will need to determine which instruction should be +// considered to be the final instruction of the function +// prologue and return its address. +// Likewise, we find the stack deallocating instruction in +// the function epilogue. +void SMPFunction::SetStackFrameInfo(void) { + bool FoundAllocInstr = false; + bool FoundDeallocInstr = false; + + // The sizes of the three regions of the stack frame other than the + // return address are stored in the function structure. + this->LocalVarsSize = this->FuncInfo.frsize; + this->CalleeSavedRegsSize = this->FuncInfo.frregs; + this->IncomingArgsSize = this->FuncInfo.argsize; + + // The return address size can be obtained in a machine independent + // way by calling get_frame_retsize(). + this->RetAddrSize = get_frame_retsize(&(this->FuncInfo)); + + // IDA Pro has trouble with functions that do not have any local + // variables. Unfortunately, the C library has plenty of these + // functions. IDA usually claims that frregs is zero and frsize + // is N, when the values should have been reversed. We can attempt + // to detect this and fix it. + bool FrameInfoFixed = this->MDFixFrameInfo(); + +#if SMP_DEBUG_FRAMEFIXUP + if (FrameInfoFixed) { + msg("Fixed stack frame size info: %s\n", this->FuncName); + SMPBasicBlock CurrBlock = this->Blocks.front(); + msg("First basic block:\n"); + for (list<list<SMPInstr>::iterator>::iterator CurrInstr = CurrBlock.GetFirstInstr(); + CurrInstr != CurrBlock.GetLastInstr(); + ++CurrInstr) { + msg("%s\n", (*CurrInstr)->GetDisasm()); + } + } +#endif + + // Now, if LocalVarsSize is not zero, we need to find the instruction + // in the function prologue that allocates space on the stack for + // local vars. This code could be made more robust in the future + // by matching LocalVarsSize to the immediate value in the allocation + // instruction. However, IDA Pro is sometimes a little off on this + // number. **!!** + if (0 < this->LocalVarsSize) { + for (list<SMPInstr>::iterator CurrInstr = this->Instrs.begin(); + CurrInstr != this->Instrs.end(); + ++CurrInstr) { + ea_t addr = CurrInstr->GetAddr(); + + // Keep the most recent instruction in the DeallocInstr + // in case we reach the return without seeing a dealloc. + if (!FoundDeallocInstr) { + this->LocalVarsDeallocInstr = addr; + } + + if (!FoundAllocInstr + && CurrInstr->MDIsFrameAllocInstr()) { + this->LocalVarsAllocInstr = addr; + FoundAllocInstr = true; + // As soon as we have found the local vars allocation, + // we can try to fix incorrect sets of UseFP by IDA. + // NOTE: We might want to extend this in the future to + // handle functions that have no locals. **!!** + bool FixedUseFP = MDFixUseFP(); +#if SMP_DEBUG_FRAMEFIXUP + if (FixedUseFP) { + msg("Fixed UseFP in %s\n", this->FuncName); + } +#endif + } + else if (FoundAllocInstr) { + // We can now start searching for the DeallocInstr. + if (CurrInstr->MDIsFrameDeallocInstr(UseFP, this->LocalVarsSize)) { + // Keep saving the most recent addr that looks + // like the DeallocInstr until we reach the + // end of the function. Last one to look like + // it is used as the DeallocInstr. + this->LocalVarsDeallocInstr = addr; + FoundDeallocInstr = true; + } + } + } // end for (list<SMPInstr>::iterator CurrInstr ... ) + if (!FoundAllocInstr) { + // Could not find the frame allocating instruction. Bad. + // Emit diagnostic and use the first instruction in the + // function as a pseudo-allocation instruction to emit + // some stack frame info (return address, etc.) + this->LocalVarsAllocInstr = this->FindAllocPoint(this->FuncInfo.frsize); +#if SMP_DEBUG_FRAMEFIXUP + if (BADADDR == this->LocalVarsAllocInstr) { + msg("ERROR: Could not find stack frame allocation in %s\n", + FuncName); + msg("LocalVarsSize: %d SavedRegsSize: %d ArgsSize: %d\n", + LocalVarsSize, CalleeSavedRegsSize, IncomingArgsSize); + } + else { + msg("FindAllocPoint found %x for function %s\n", + this->LocalVarsAllocInstr, this->GetFuncName()); + } +#endif + } +#if SMP_DEBUG_FIX_FRAMEINFO + if (!FoundDeallocInstr) { + // Could not find the frame deallocating instruction. Bad. + // Emit diagnostic and use the last instruction in the + // function. + msg("ERROR: Could not find stack frame deallocation in %s\n", + FuncName); + } +#endif + } + // else LocalVarsSize was zero, meaning that we need to search + // for the end of the function prologue code and emit stack frame + // annotations from that address (i.e. this method returns that + // address). We will approximate this by finding the end of the + // sequence of PUSH instructions at the beginning of the function. + // The last PUSH instruction should be the last callee-save-reg + // instruction. We can make this more robust in the future by + // making sure that we do not count a PUSH of anything other than + // a register. **!!** + // NOTE: 2nd prologue instr is usually mov ebp,esp + // THE ASSUMPTION THAT WE HAVE ONLY PUSH INSTRUCTIONS BEFORE + // THE ALLOCATING INSTR IS ONLY TRUE WHEN LOCALVARSSIZE == 0; + else { + ea_t SaveAddr = this->FuncInfo.startEA; + for (list<SMPInstr>::iterator CurrInstr = this->Instrs.begin(); + CurrInstr != this->Instrs.end(); + ++CurrInstr) { + insn_t CurrCmd = CurrInstr->GetCmd(); + ea_t addr = CurrInstr->GetAddr(); + if (CurrCmd.itype == NN_push) + SaveAddr = addr; + else + break; + } + this->LocalVarsAllocInstr = SaveAddr; + this->LocalVarsDeallocInstr = 0; + } // end if (LocalVarsSize > 0) ... else ... + +#if 0 + // Now we need to do the corresponding operations from the + // end of the function to find the DeallocInstr in the + // function epilogue. Because there is no addition to the + // stack pointer to deallocate the local vars region, the + // function epilogue will consist of (optional) pops of + // callee-saved regs, followed by the return instruction. + // Working backwards, we should find a return and then + // stop when we do not find any more pops. + if (0 >= LocalVarsSize) { + this->LocalVarsDeallocInstr = NULL; + } + else { + SaveAddr = this->FuncInfo.endEA - 1; + bool FoundRet = false; + do { + ea_t addr = get_item_head(SaveAddr); + flags_t InstrFlags = getFlags(addr); + if (isCode(addr) && isHead(addr)) { + ua_ana0(addr); + if (!FoundRet) { // Just starting out. + if (MDIsReturnInstr(cmd)) { + FoundRet = true; + SaveAddr = addr - 1; + } + else { + msg("ERROR: Last instruction not a return.\n"); + } + } + else { // Should be 0 or more POPs before the return. + if (MDIsPopInstr(cmd)) { + SaveAddr = addr - 1; + } + else if (FrameAllocInstr(cmd, this->LocalVarsSize)) { + this->LocalVarsDeallocInstr = addr; + } + else { + msg("ERROR: Frame deallocation not prior to POPs.\n"); + this->LocalVarsDeallocInstr = SaveAddr + 1; + } + } // end if (!FoundRet) ... else ... + } + else { + --SaveAddr; + } // end if (isCode(addr) && isHead(addr)) + } while (NULL == this->LocalVarsDeallocInstr); + } // end if (0 >= this->LocalVarsSize) +#endif // 0 + return; +} // end of SMPFunction::SetStackFrameInfo() + +// IDA Pro defines the sizes of regions in the stack frame in a way +// that suits its purposes but not ours. the frsize field of the func_info_t +// structure measures the distance between the stack pointer and the +// frame pointer (ESP and EBP in the x86). This region includes some +// of the callee-saved registers. So, the frregs field only includes +// the callee-saved registers that are above the frame pointer. +// x86 standard prologue on gcc/linux: +// push ebp ; save old frame pointer +// mov ebp,esp ; new frame pointer = current stack pointer +// push esi ; callee save reg +// push edi ; callee save reg +// sub esp,34h ; allocate 52 bytes for local variables +// +// Notice that EBP acquires its final frame pointer value AFTER the +// old EBP has been pushed. This means that, of the three callee saved +// registers, one is above where EBP points and two are below. +// IDA Pro is concerned with generating readable addressing expressions +// for items on the stack. None of the callee-saved regs will ever +// be addressed in the function; they will be dormant until they are popped +// off the stack in the function epilogue. In order to create readable +// disassembled code, IDA defines named constant offsets for locals. These +// offsets are negative values (x86 stack grows downward from EBP toward +// ESP). When ESP_relative addressing occurs, IDA converts a statement: +// mov eax,[esp+12] +// into the statement: +// mov eax,[esp+3Ch+var_30] +// Here, 3Ch == 60 decimal is the distance between ESP and EBP, and +// var_30 is defined to ahve the value -30h == -48 decimal. So, the +// "frame size" in IDA Pro is 60 bytes, and a certain local can be +// addressed in ESP-relative manner as shown, or as [ebp+var_30] for +// EBP-relative addressing. The interactive IDA user can then edit +// the name var_30 to something mnemonic, such as "virus_size", and IDA +// will replace all occurrences with the new name, so that code references +// automatically become [ebp+virus_size]. As the user proceeds +// interactively, he eventually produces very understandable code. +// This all makes sense for producing readable assembly text. However, +// our analyses have a compiler perspective as well as a memory access +// defense perspective. SMP distinguishes between callee saved regs, +// which should not be overwritten in the function body, and local +// variables, which can be written. We view the stack frame in logical +// pieces: here are the saved regs, here are the locals, here is the +// return address, etc. We don't care which direction from EBP the +// callee-saved registers lie; we don't want to lump them in with the +// local variables. We also don't like the fact that IDA Pro will take +// the function prologue code shown above and declare frregs=4 and +// frsize=60, because frsize no longer matches the stack allocation +// statement sub esp,34h == sub esp,52. We prefer frsize=52 and frregs=12. +// So, the task of this function is to fix these stack sizes in our +// private data members for the function, while leaving the IDA database +// alone because IDA needs to maintain its own definitions of these +// variables. +// Fixing means we will update the data members LocalVarsSize and +// CalleeSavedRegsSize. +// NOTE: This function is both machine dependent and platform dependent. +// The prologue and epilogue code generated by gcc-linux is as discussed +// above, while on Visual Studio and other Windows x86 compilers, the +// saving of registers other than EBP happens AFTER local stack allocation. +// A Windows version of the function would expect to see the pushing +// of ESI and EDI AFTER the sub esp,34h statement. +bool SMPFunction::MDFixFrameInfo(void) { + int SavedRegsSize = 0; + int OtherPushesSize = 0; // besides callee-saved regs + int NewLocalsSize = 0; + int OldFrameTotal = this->CalleeSavedRegsSize + this->LocalVarsSize; + bool Changed = false; + + // Iterate through the first basic block in the function. If we find + // a frame allocating Instr in it, then we have local vars. If not, + // we don't, and LocalVarsSize should have been zero. Count the callee + // register saves leading up to the local allocation. Set data members + // according to what we found if the values of the data members would + // change. + SMPBasicBlock CurrBlock = this->Blocks.front(); + for (list<list<SMPInstr>::iterator>::iterator CurrIter = CurrBlock.GetFirstInstr(); + CurrIter != CurrBlock.GetLastInstr(); + ++CurrIter) { + list<SMPInstr>::iterator CurrInstr = *CurrIter; + if (CurrInstr->MDIsPushInstr()) { + // We will make the gcc-linux assumption that a PUSH in + // the first basic block, prior to the stack allocating + // instruction, is a callee register save. To make this + // more robust, we ensure that the register is from + // the callee saved group of registers, and that it has + // not been defined thus far in the function (else it might + // be a push of an outgoing argument to a call that happens + // in the first block when there are no locals). **!!!!** + if (CurrInstr->MDUsesCalleeSavedReg() + && !CurrInstr->HasSourceMemoryOperand()) { + SavedRegsSize += 4; // **!!** should check the size + } + else { + // Pushes of outgoing args can be scheduled so that + // they are mixed with the pushes of callee saved regs. + OtherPushesSize += 4; + } + } + else if (CurrInstr->MDIsFrameAllocInstr()) { + SavedRegsSize += OtherPushesSize; + // Get the size being allocated. + for (size_t index = 0; index < CurrInstr->NumUses(); ++index) { + // Find the immediate operand. + if (o_imm == CurrInstr->GetUse(index).GetOp().type) { + // Get its value into LocalVarsSize. + long AllocValue = (signed long) CurrInstr->GetUse(index).GetOp().value; + // One compiler might have sub esp,24 and another + // might have add esp,-24. Take the absolute value. + if (0 > AllocValue) + AllocValue = -AllocValue; + if (AllocValue != (long) this->LocalVarsSize) { + Changed = true; +#if SMP_DEBUG_FRAMEFIXUP + if (AllocValue + SavedRegsSize != OldFrameTotal) + msg("Total frame size changed: %s\n", this->FuncName); +#endif + this->LocalVarsSize = (asize_t) AllocValue; + this->CalleeSavedRegsSize = (ushort) SavedRegsSize; + NewLocalsSize = this->LocalVarsSize; + } + else { // Old value was correct; no change. + NewLocalsSize = this->LocalVarsSize; + if (SavedRegsSize != this->CalleeSavedRegsSize) { + this->CalleeSavedRegsSize = (ushort) SavedRegsSize; + Changed = true; +#if SMP_DEBUG_FRAMEFIXUP + msg("Only callee regs size changed: %s\n", this->FuncName); +#endif + } + } + } // end if (o_imm == ...) + } // end for all uses + break; // After frame allocation instr, we are done + } // end if (push) .. elsif frame allocating instr + } // end for all instructions in the first basic block + + // If we did not find an allocating instruction, see if it would keep + // the total size the same to set LocalVarsSize to 0 and to set + // CalleeSavedRegsSize to SavedRegsSize. If so, do it. If not, we + // might be better off to leave the numbers alone. + if (!Changed && (NewLocalsSize == 0)) { + if (OldFrameTotal == SavedRegsSize) { + this->CalleeSavedRegsSize = SavedRegsSize; + this->LocalVarsSize = 0; + Changed = true; + } +#if SMP_DEBUG_FRAMEFIXUP + else { + msg("Could not update frame sizes: %s\n", this->FuncName); + } +#endif + } + +#if SMP_DEBUG_FRAMEFIXUP + if ((0 < OtherPushesSize) && (0 < NewLocalsSize)) + msg("Extra pushes found of size %d in %s\n", OtherPushesSize, + this->FuncName); +#endif + + return Changed; +} // end of SMPFunction::MDFixFrameInfo() + +// Some functions have difficult to find stack allocations. For example, in some +// version of glibc, strpbrk() zeroes out register ECX and then pushes it more than +// 100 times in order to allocate zero-ed out local vars space for a character translation +// table. We will use the stack pointer analysis of IDA to find out if there is a point +// in the first basic block at which the stack pointer reaches the allocation total +// that IDA is expecting for the local vars region. +// If so, we return the address of the instruction at which ESP reaches its value, else +// we return BADADDR. +ea_t SMPFunction::FindAllocPoint(asize_t OriginalLocSize) { + bool DebugFlag = (0 == strncmp("strpbrk", this->GetFuncName(), 7)); + sval_t TargetSize = - ((sval_t) OriginalLocSize); // negate; stack grows down + +#if SMP_DEBUG_FRAMEFIXUP + if (DebugFlag) + msg("strpbrk OriginalLocSize: %d\n", OriginalLocSize); +#endif + + if (this->FuncInfo.analyzed_sp()) { + // Limit our analysis to the first basic block in the function. + list<SMPInstr>::iterator TempIter = *(--(this->Blocks.front().GetLastInstr())); + ea_t AddrLimit = TempIter->GetAddr(); + for (list<list<SMPInstr>::iterator>::iterator CurrIter = this->Blocks.front().GetFirstInstr(); + CurrIter != this->Blocks.front().GetLastInstr(); + ++CurrIter) { + list<SMPInstr>::iterator CurrInstr = *CurrIter; + ea_t addr = CurrInstr->GetAddr(); + // get_spd() returns a cumulative delta of ESP + sval_t sp_delta = get_spd(&(this->FuncInfo), addr); +#if SMP_DEBUG_FRAMEFIXUP + if (DebugFlag) + msg("strpbrk delta: %d at %x\n", sp_delta, addr); +#endif + if (sp_delta == TargetSize) { + // Previous instruction hit the frame size. + if (CurrInstr == *(this->Blocks.front().GetFirstInstr())) { + return BADADDR; // cannot back up from first instruction + } + else { + return (--CurrInstr)->GetAddr(); + } + } + } + // SP delta is marked at the beginning of an instruction to show the SP + // after the effects of the previous instruction. Maybe the last instruction + // is the first time the SP achieves its desired value, which will not be shown + // until the first instruction of the next basic block if it just falls through. + // We can compute the delta AFTER the last instruction using get_spd+get_sp_delta. + list<SMPInstr>::iterator FinalInstr = *(--(this->Blocks.front().GetLastInstr())); + ea_t FinalAddr = FinalInstr->GetAddr(); + sval_t FinalDelta = get_spd(&(this->FuncInfo), FinalAddr); + if (!FinalInstr->IsBasicBlockTerminator()) { + // Special case. The basic block does not terminate with a branch or + // return, but falls through to the start of a loop, most likely. + // Thus, the last instruction CAN increase the sp_delta, unlike + // a jump or branch, and the sp_delta would not hit the target until + // the first instruction in the second block. We can examine the + // effect on the stack pointer of this last instruction to see if it + // causes the SP delta to hit the OriginalLocSize. + sval_t LastInstrDelta = get_sp_delta(&(this->FuncInfo), FinalAddr); + if (TargetSize == (FinalDelta + LastInstrDelta)) { + // Return very last instruction (don't back up 1 here) + return FinalAddr; + } + } + } // end if (this->FuncInfo.analyzed_sp()) +#if SMP_DEBUG_FRAMEFIXUP + else { + msg("analyzed_sp() is false for %s\n", this->GetFuncName()); + } +#endif + return BADADDR; +} // end of SMPFunction::FindAllocPoint() + +// IDA Pro is sometimes confused by a function that uses the frame pointer +// register for other purposes. For the x86, a function that uses EBP +// as a frame pointer would begin with: push ebp; mov ebp,esp to save +// the old value of EBP and give it a new value as a frame pointer. The +// allocation of local variable space would have to come AFTER the move +// instruction. A function that begins: push ebp; push esi; sub esp,24 +// is obviously not using EBP as a frame pointer. IDA is apparently +// confused by the push ebp instruction being the first instruction +// in the function. We will reset UseFP to false in this case. +// NOTE: This logic should work for both Linux and Windows x86 prologues. +bool SMPFunction::MDFixUseFP(void) { + list<SMPInstr>::iterator CurrInstr = this->Instrs.begin(); + ea_t addr = CurrInstr->GetAddr(); + if (!UseFP) + return false; // Only looking to reset true to false. + while (addr < this->LocalVarsAllocInstr) { + size_t DefIndex = 0; + while (DefIndex < CurrInstr->NumDefs()) { + if (CurrInstr->GetDef(DefIndex).GetOp().is_reg(R_bp)) + return false; // EBP got set before locals were allocated + ++DefIndex; + } + ++CurrInstr; + addr = CurrInstr->GetAddr(); + } + // If we found no defs of the frame pointer before the local vars + // allocation, then the frame pointer register is not being used + // as a frame pointer, just as a general callee-saved register. + this->UseFP = false; + return true; +} // end of SMPFunction::MDFixUseFP() + +// Emit the annotations describing the regions of the stack frame. +void SMPFunction::EmitStackFrameAnnotations(FILE *AnnotFile, list<SMPInstr>::iterator Instr) { + ea_t addr = Instr->GetAddr(); + + if (0 < IncomingArgsSize) + qfprintf(AnnotFile, "%x %d INARGS STACK esp + %d %s \n", + addr, IncomingArgsSize, + (LocalVarsSize + CalleeSavedRegsSize + RetAddrSize), + Instr->GetDisasm()); + if (0 < RetAddrSize) + qfprintf(AnnotFile, "%x %d MEMORYHOLE STACK esp + %d ReturnAddress \n", + addr, RetAddrSize, (LocalVarsSize + CalleeSavedRegsSize)); + if (0 < CalleeSavedRegsSize) + qfprintf(AnnotFile, "%x %d MEMORYHOLE STACK esp + %d CalleeSavedRegs \n", + addr, CalleeSavedRegsSize, LocalVarsSize); + if (0 < LocalVarsSize) + qfprintf(AnnotFile, "%x %d LOCALFRAME STACK esp + %d LocalVars \n", + addr, LocalVarsSize, 0); + return; +} // end of SMPFunction::EmitStackFrameAnnotations() + +// Main data flow analysis driver. Goes through the function and +// fills all objects for instructions, basic blocks, and the function +// itself. +void SMPFunction::Analyze(void) { + list<SMPInstr>::iterator FirstInBlock = this->Instrs.end(); + // For starting a basic block + list<SMPInstr>::iterator LastInBlock = this->Instrs.end(); + // Terminating a basic block + +#if SMP_DEBUG_CONTROLFLOW + msg("Entering SMPFunction::Analyze.\n"); +#endif + + // Get some basic info from the FuncInfo structure. + this->Size = this->FuncInfo.endEA - this->FuncInfo.startEA; + this->UseFP = (0 != (this->FuncInfo.flags & (FUNC_FRAME | FUNC_BOTTOMBP))); + this->StaticFunc = (0 != (this->FuncInfo.flags & FUNC_STATIC)); + get_func_name(this->FuncInfo.startEA, this->FuncName, + sizeof(this->FuncName) - 1); + this->BlockCount = 0; + +#if SMP_DEBUG_CONTROLFLOW + msg("SMPFunction::Analyze: got basic info.\n"); +#endif + + // Cycle through all chunks that belong to the function. + func_tail_iterator_t FuncTail(&(this->FuncInfo)); + size_t ChunkCounter = 0; + for (bool ChunkOK = FuncTail.main(); ChunkOK; ChunkOK = FuncTail.next()) { + const area_t &CurrChunk = FuncTail.chunk(); + ++ChunkCounter; + if (1 < ChunkCounter) { + this->SharedChunks = true; +#if SMP_DEBUG_CHUNKS + msg("Found tail chunk for %s at %x\n", this->FuncName, CurrChunk.startEA); +#endif + } + // Build the instruction and block lists for the function. + for (ea_t addr = CurrChunk.startEA; addr < CurrChunk.endEA; + addr = get_item_end(addr)) { + flags_t InstrFlags = getFlags(addr); + if (isHead(InstrFlags) && isCode(InstrFlags)) { + SMPInstr CurrInst = SMPInstr(addr); + // Fill in the instruction data members. +#if SMP_DEBUG_CONTROLFLOW + msg("SMPFunction::Analyze: calling CurrInst::Analyze.\n"); +#endif + CurrInst.Analyze(); + if (SMPBinaryDebug) { + msg("Disasm: %s \n", CurrInst.GetDisasm()); + } + if (CurrInst.GetDataFlowType() == INDIR_CALL) + this->IndirectCalls = true; + + // Before we insert the instruction into the instruction + // list, determine if it is a jump target that does not + // follow a basic block terminator. This is the special case + // of a CASE in a SWITCH that falls through into another + // CASE, for example. The first sequence of statements + // was not terminated by a C "break;" statement, so it + // looks like straight line code, but there is an entry + // point at the beginning of the second CASE sequence and + // we have to split basic blocks at the entry point. + if ((FirstInBlock != this->Instrs.end()) + && CurrInst.IsJumpTarget()) { +#if SMP_DEBUG_CONTROLFLOW + msg("SMPFunction::Analyze: hit special jump target case.\n"); +#endif + LastInBlock = --(this->Instrs.end()); + SMPBasicBlock CurrBlock = SMPBasicBlock(FirstInBlock, + LastInBlock); + CurrBlock.Analyze(); + // If not the first chunk in the function, it is a shared + // tail chunk. + if (ChunkCounter > 1) { + CurrBlock.SetShared(); + } + FirstInBlock = this->Instrs.end(); + LastInBlock = this->Instrs.end(); + this->Blocks.push_back(CurrBlock); + this->BlockCount += 1; + } + +#if SMP_DEBUG_CONTROLFLOW + msg("SMPFunction::Analyze: putting CurrInst on list.\n"); +#endif + // Insert instruction at end of list. + this->Instrs.push_back(CurrInst); + + // Find basic block leaders and terminators. + if (FirstInBlock == this->Instrs.end()) { +#if SMP_DEBUG_CONTROLFLOW + msg("SMPFunction::Analyze: setting FirstInBlock.\n"); +#endif + FirstInBlock = --(this->Instrs.end()); + } + if (CurrInst.IsBasicBlockTerminator()) { +#if SMP_DEBUG_CONTROLFLOW + msg("SMPFunction::Analyze: found block terminator.\n"); +#endif + LastInBlock = --(this->Instrs.end()); + SMPBasicBlock CurrBlock = SMPBasicBlock(FirstInBlock, LastInBlock); + CurrBlock.Analyze(); + // If not the first chunk in the function, it is a shared + // tail chunk. + if (ChunkCounter > 1) { + CurrBlock.SetShared(); + } + FirstInBlock = this->Instrs.end(); + LastInBlock = this->Instrs.end(); + this->Blocks.push_back(CurrBlock); + this->BlockCount += 1; + + // Is the instruction a branch to a target outside the function? If + // so, this function has shared tail chunks. + if (CurrInst.IsBranchToFarChunk()) { + this->SharedChunks = true; + } + } + } // end if (isHead(InstrFlags) && isCode(InstrFlags) + } // end for (ea_t addr = FuncInfo.startEA; ... ) + + // Handle the special case in which a function does not terminate + // with a return instruction or any other basic block terminator. + // Sometimes IDA Pro sees a call to a NORET function and decides + // to not include the dead code after it in the function. That + // dead code includes the return instruction, so the function no + // longer includes a return instruction and terminates with a CALL. + if (FirstInBlock != this->Instrs.end()) { + LastInBlock = --(this->Instrs.end()); + SMPBasicBlock CurrBlock = SMPBasicBlock(FirstInBlock, LastInBlock); + CurrBlock.Analyze(); + // If not the first chunk in the function, it is a shared + // tail chunk. + if (ChunkCounter > 1) { + CurrBlock.SetShared(); + } + FirstInBlock = this->Instrs.end(); + LastInBlock = this->Instrs.end(); + this->Blocks.push_back(CurrBlock); + this->BlockCount += 1; + } + } // end for (bool ChunkOK = ...) + +#if KLUDGE_VFPRINTF_FAMILY + if (0 != strstr(this->GetFuncName(), "printf")) { + this->SharedChunks = true; + msg("Kludging function %s\n", this->GetFuncName()); + } +#endif + + // Set up basic block links and map of instructions to blocks. + if (!(this->HasSharedChunks())) { + this->SetLinks(); +#if SMP_COMPUTE_LVA_SSA + this->RPONumberBlocks(); + this->LiveVariableAnalysis(); + this->ComputeSSA(); + bool DumpFlag = (0 == strcmp("main", this->GetFuncName())); + DumpFlag |= (0 == strcmp("dohanoi", this->GetFuncName())); + DumpFlag |= (0 == strcmp(".init_proc", this->GetFuncName())); +#if 0 + DumpFlag = true; +#endif + if (DumpFlag) + this->Dump(); +#endif + } + +#if SMP_DEBUG_CONTROLFLOW + msg("SMPFunction::Analyze: set stack frame info.\n"); +#endif + // Figure out the stack frame and related info. + this->SetStackFrameInfo(); + + return; +} // end of SMPFunction::Analyze() + +// Compute SSA form data structures across the function. +void SMPFunction::ComputeSSA(void) { +#if 1 + this->ComputeIDoms(); + this->ComputeDomFrontiers(); + this->ComputeGlobalNames(); + this->ComputeBlocksDefinedIn(); + this->InsertPhiFunctions(); + this->SSARenumber(); +#endif + return; +} // end of SMPFunction::ComputeSSA() + +// Link basic blocks to their predecessors and successors, and build the map +// of instruction addresses to basic blocks. +void SMPFunction::SetLinks(void) { + list<SMPBasicBlock>::iterator CurrBlock; +#if SMP_DEBUG_DATAFLOW + msg("SetLinks called for %s\n", this->GetFuncName()); +#endif + // First, set up the map of instructions to basic blocks. + for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { + list<list<SMPInstr>::iterator>::iterator CurrInst; + for (CurrInst = CurrBlock->GetFirstInstr(); + CurrInst != CurrBlock->GetLastInstr(); + ++CurrInst) { + pair<ea_t, list<SMPBasicBlock>::iterator> MapItem((*CurrInst)->GetAddr(),CurrBlock); + InstBlockMap.insert(MapItem); + } + } + +#if SMP_DEBUG_DATAFLOW + msg("SetLinks finished mapping: %s\n", this->GetFuncName()); +#endif + // Next, set successors of each basic block, also setting up the predecessors in the + // process. + for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { + list<SMPInstr>::iterator CurrInst = *(--(CurrBlock->GetLastInstr())); + // Last instruction in block; set successors + bool CallFlag = (CALL == CurrInst->GetDataFlowType()); + xrefblk_t CurrXrefs; + for (bool ok = CurrXrefs.first_from(CurrInst->GetAddr(), XREF_ALL); + ok; + ok = CurrXrefs.next_from()) { + if ((CurrXrefs.to != 0) && (CurrXrefs.iscode)) { + // Found a code target, with its address in CurrXrefs.to + if (CallFlag && (CurrXrefs.to != (CurrInst->GetAddr() + CurrInst->GetCmd().size))) { + // A call instruction will have two targets: the fall through to the + // next instruction, and the called function. We want to link to the + // fall-through instruction, but not to the called function. + // Some blocks end with a call just because the fall-through instruction + // is a jump target from elsewhere. + continue; + } + map<ea_t, list<SMPBasicBlock>::iterator>::iterator MapEntry; + MapEntry = this->InstBlockMap.find(CurrXrefs.to); + if (MapEntry == this->InstBlockMap.end()) { + msg("WARNING: addr %x not found in map for %s\n", CurrXrefs.to, + this->GetFuncName()); + msg(" Referenced from %s\n", CurrInst->GetDisasm()); + } + else { + list<SMPBasicBlock>::iterator Target = MapEntry->second; + // Make target block a successor of current block. + CurrBlock->LinkToSucc(Target); + // Make current block a predecessor of target block. + Target->LinkToPred(CurrBlock); + } + } + } // end for all xrefs + } // end for all blocks + + // If we have any blocks that are all no-ops and have no predecessors, remove those + // blocks. They are dead and make the CFG no longer a lattice. + CurrBlock = this->Blocks.begin(); + while (CurrBlock != this->Blocks.end()) { + if (CurrBlock->AllNops() && (CurrBlock->GetFirstPred() == CurrBlock->GetLastPred())) { + msg("Removing all nops block at %x\n", CurrBlock->GetFirstAddr()); + CurrBlock = this->Blocks.erase(CurrBlock); + this->BlockCount -= 1; + } + else + ++CurrBlock; + } + + return; +} // end of SMPFunction::SetLinks() + +// Number all basic blocks in reverse postorder (RPO) and set RPOBlocks vector to +// access them. +void SMPFunction::RPONumberBlocks(void) { + bool DebugFlag = (0 == strcmp("vfprintf", this->GetFuncName())); + int CurrNum = 0; + list<list<SMPBasicBlock>::iterator> WorkList; + + // Number the first block with 0. + list<SMPBasicBlock>::iterator CurrBlock = this->Blocks.begin(); +#if 0 + if (this->RPOBlocks.capacity() <= (size_t) this->BlockCount) { + msg("Reserving %d RPOBlocks old value: %d\n", 2+this->BlockCount, this->RPOBlocks.capacity()); + this->RPOBlocks.reserve(2 + this->BlockCount); + this->RPOBlocks.assign(2 + this->BlockCount, this->Blocks.end()); + } +#endif + CurrBlock->SetNumber(CurrNum); + this->RPOBlocks.push_back(CurrBlock); + ++CurrNum; + // Push the first block's successors onto the work list. + list<list<SMPBasicBlock>::iterator>::iterator CurrSucc = CurrBlock->GetFirstSucc(); + while (CurrSucc != CurrBlock->GetLastSucc()) { + WorkList.push_back(*CurrSucc); + ++CurrSucc; + } + + // Use the WorkList to iterate through all blocks in the function + list<list<SMPBasicBlock>::iterator>::iterator CurrListItem = WorkList.begin(); + bool change; + while (!WorkList.empty()) { + change = false; + while (CurrListItem != WorkList.end()) { + if ((*CurrListItem)->GetNumber() != SMP_BLOCKNUM_UNINIT) { + // Duplicates get pushed onto the WorkList because a block + // can be the successor of multiple other blocks. If it is + // already numbered, it is a duplicate and can be removed + // from the list. + CurrListItem = WorkList.erase(CurrListItem); + change = true; + continue; + } + if ((*CurrListItem)->AllPredecessorsNumbered()) { + // Ready to be numbered. + (*CurrListItem)->SetNumber(CurrNum); +#if 0 + msg("Set RPO number %d\n", CurrNum); + if (DebugFlag && (7 == CurrNum)) + this->Dump(); +#endif + this->RPOBlocks.push_back(*CurrListItem); + ++CurrNum; + change = true; + // Push its unnumbered successors onto the work list. + CurrSucc = (*CurrListItem)->GetFirstSucc(); + while (CurrSucc != (*CurrListItem)->GetLastSucc()) { + if ((*CurrSucc)->GetNumber() == SMP_BLOCKNUM_UNINIT) + WorkList.push_back(*CurrSucc); + ++CurrSucc; + } + CurrListItem = WorkList.erase(CurrListItem); + } + else { + ++CurrListItem; + } + } // end while (CurrListItem != WorkList.end()) + if (change) { + // Reset CurrListItem to beginning of work list for next iteration. + CurrListItem = WorkList.begin(); + } + else { + // Loops can cause us to not be able to find a WorkList item that has + // all predecessors numbered. Take the WorkList item with the lowest address + // and number it so we can proceed. + CurrListItem = WorkList.begin(); + ea_t LowAddr = (*CurrListItem)->GetFirstAddr(); + list<list<SMPBasicBlock>::iterator>::iterator SaveItem = CurrListItem; + ++CurrListItem; + while (CurrListItem != WorkList.end()) { + if (LowAddr > (*CurrListItem)->GetFirstAddr()) { + SaveItem = CurrListItem; + LowAddr = (*CurrListItem)->GetFirstAddr(); + } + ++CurrListItem; + } + // SaveItem should now be numbered. + (*SaveItem)->SetNumber(CurrNum); + msg("Picked LowAddr %x and set RPO number %d\n", LowAddr, CurrNum); + this->RPOBlocks.push_back(*SaveItem); + ++CurrNum; + // Push its unnumbered successors onto the work list. + CurrSucc = (*SaveItem)->GetFirstSucc(); + while (CurrSucc != (*SaveItem)->GetLastSucc()) { + if ((*CurrSucc)->GetNumber() == SMP_BLOCKNUM_UNINIT) + WorkList.push_back(*CurrSucc); + ++CurrSucc; + } + CurrListItem = WorkList.erase(SaveItem); + CurrListItem = WorkList.begin(); + } // end if (change) ... else ... + } // end while work list is nonempty + return; +} // end of SMPFunction::RPONumberBlocks() + +// Perform live variable analysis on all blocks in the function. +// See chapter 9 of Cooper/Torczon, Engineering a Compiler, for the algorithm. +void SMPFunction::LiveVariableAnalysis(void) { + list<SMPBasicBlock>::iterator CurrBlock; + msg("LiveVariableAnalysis for %s\n", this->GetFuncName()); + + for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { + // Initialize the Killed and UpwardExposed sets for each block. + CurrBlock->InitKilledExposed(); + } + + bool changed; + // Iterate over each block, updating LiveOut sets until no more changes are made. + // NOTE: LVA is more efficient when computed over a reverse post-order list of blocks. +#if 1 + do { + changed = false; + for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { + changed |= CurrBlock->UpdateLiveOut(); + } + } while (changed); +#else // Use reverse postorder + do { + changed = false; + for (size_t index = 0; index < this->RPOBlocks.size(); ++index) { + CurrBlock = this->RPOBlocks[index]; + changed |= CurrBlock->UpdateLiveOut(); + } + } while (changed); +#endif + return; +} // end of SMPFunction::LiveVariableAnalysis() + +// Return the IDom index that is the end of the intersection prefix of the Dom sets of +// the two blocks designated by the RPO numbers passed in. +// See Cooper & Torczon, "Engineering a Compiler" 1st edition figure 9.8. +int SMPFunction::IntersectDoms(int block1, int block2) const { + int finger1 = block1; + int finger2 = block2; + while (finger1 != finger2) { + while (finger1 > finger2) + finger1 = this->IDom.at(finger1); + while (finger2 > finger1) + finger2 = this->IDom.at(finger2); + } + return finger1; +} // end of SMPFunction::IntersectDoms() + +// Compute immediate dominators of all blocks into IDom[] vector. +void SMPFunction::ComputeIDoms(void) { + bool DebugFlag = (0 == strcmp("vfprintf", this->GetFuncName())); + // Initialize the IDom[] vector to uninitialized values for all blocks. + this->IDom.reserve(this->BlockCount); + this->IDom.assign(this->BlockCount, SMP_BLOCKNUM_UNINIT); + if (DebugFlag) msg("BlockCount = %d\n", this->BlockCount); + this->IDom[0] = 0; // Start block dominated only by itself + bool changed; + do { + changed = false; + for (size_t RPONum = 1; RPONum < (size_t) this->BlockCount; ++RPONum) { + if (DebugFlag) msg("RPONum %d\n", RPONum); + if (DebugFlag) { + msg("RPOBlocks vector size: %d\n", this->RPOBlocks.size()); + for (size_t index = 0; index < this->RPOBlocks.size(); ++index) { + msg("RPOBlocks entry %d is %d\n", index, RPOBlocks[index]->GetNumber()); + } + } + list<SMPBasicBlock>::iterator CurrBlock = this->RPOBlocks.at(RPONum); + // if (DebugFlag) msg("CurrBlock: %x\n", CurrBlock._Ptr); + list<list<SMPBasicBlock>::iterator>::iterator CurrPred; + // Initialize NewIdom to the first processed predecessor of block RPONum. + int NewIdom = SMP_BLOCKNUM_UNINIT; + for (CurrPred = CurrBlock->GetFirstPred(); CurrPred != CurrBlock->GetLastPred(); ++CurrPred) { + if (DebugFlag) msg("Pred: %d\n", (*CurrPred)->GetNumber()); + int PredIDOM = this->IDom.at((*CurrPred)->GetNumber()); + if (DebugFlag) msg("Pred IDom: %d\n", PredIDOM); + if (SMP_BLOCKNUM_UNINIT != PredIDOM) { + NewIdom = (*CurrPred)->GetNumber(); + break; + } + } + if (NewIdom == SMP_BLOCKNUM_UNINIT) + msg("Failure on NewIdom in ComputeIDoms for %s\n", this->GetFuncName()); + assert(NewIdom != SMP_BLOCKNUM_UNINIT); + // Loop through all predecessors of block RPONum except block NewIdom. + // Set NewIdom to the intersection of its Dom set and the Doms set of + // each predecessor that has had its Doms set computed. + for (CurrPred = CurrBlock->GetFirstPred(); CurrPred != CurrBlock->GetLastPred(); ++CurrPred) { + int PredNum = (*CurrPred)->GetNumber(); + if (DebugFlag) msg("PredNum: %d\n", PredNum); + int PredIDOM = this->IDom.at(PredNum); + if (DebugFlag) msg("PredIDOM: %d\n", PredIDOM); + if ((SMP_BLOCKNUM_UNINIT == PredIDOM) || (NewIdom == PredIDOM)) { + // Skip predecessors that have uncomputed Dom sets, or are the + // current NewIdom. + continue; + } + if (DebugFlag) msg("Old NewIdom value: %d\n", NewIdom); + NewIdom = this->IntersectDoms(PredNum, NewIdom); + if (DebugFlag) msg("New NewIdom value: %d\n", NewIdom); + } + // If NewIdom is not the value currently in vector IDom[], update the + // vector entry and set changed to true. + if (NewIdom != this->IDom.at(RPONum)) { + if (DebugFlag) msg("IDOM changed from %d to %d\n", this->IDom.at(RPONum), NewIdom); + this->IDom[RPONum] = NewIdom; + changed = true; + } + } + } while (changed); + return; +} // end of SMPFunction::ComputeIDoms() + +// Compute dominance frontier sets for each block. +void SMPFunction::ComputeDomFrontiers(void) { + list<SMPBasicBlock>::iterator CurrBlock; + for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { + // We look only at join points in the CFG, as per Cooper/Torczon chapter 9. + if (1 < CurrBlock->GetNumPreds()) { // join point; more than 1 predecessor + int runner; + list<list<SMPBasicBlock>::iterator>::iterator CurrPred; + for (CurrPred = CurrBlock->GetFirstPred(); CurrPred != CurrBlock->GetLastPred(); ++CurrPred) { + // For each predecessor, we run up the IDom[] vector and add CurrBlock to the + // DomFrontier for all blocks that are between CurrPred and IDom[CurrBlock], + // not including IDom[CurrBlock] itself. + runner = (*CurrPred)->GetNumber(); + while (runner != this->IDom.at(CurrBlock->GetNumber())) { + // Cooper/Harvey/Kennedy paper does not quite agree with the later + // text by Cooper/Torczon. Text says that the start node has no IDom + // in the example on pages 462-463, but it shows an IDOM for the + // root node in Figure 9.9 of value == itself. The first edition text + // on p.463 seems correct, as the start node dominates every node and + // thus should have no dominance frontier. + if (SMP_TOP_BLOCK == runner) + break; + (*CurrPred)->AddToDomFrontier(CurrBlock->GetNumber()); + runner = this->IDom.at(runner); + } + } // end for all predecessors + } // end if join point + } // end for all blocks + return; +} // end of SMPFunction::ComputeDomFrontiers() + +// Compute the GlobalNames set, which includes all operands that are used in more than +// one basic block. It is the union of all UpExposedSets of all blocks. +void SMPFunction::ComputeGlobalNames(void) { + set<op_t, LessOp>::iterator SetIter; + list<SMPBasicBlock>::iterator CurrBlock; + unsigned int index = 0; + if (this->Blocks.size() < 2) + return; // cannot have global names if there is only one block + + for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { + for (SetIter = CurrBlock->GetFirstUpExposed(); SetIter != CurrBlock->GetLastUpExposed(); ++SetIter) { + op_t TempOp = *SetIter; + msg("Global Name: "); + PrintOneOperand(TempOp, 0, -1); + set<op_t, LessOp>::iterator AlreadyInSet = this->GlobalNames.find(TempOp); + if (AlreadyInSet != this->GlobalNames.end()) { + // Already in GlobalNames, so don't assign an index number or call insert. + msg(" already in GlobalNames.\n"); + continue; + } + + // The GlobalNames set will have the complete collection of operands that we are + // going to number in our SSA computations. We now assign an operand number + // within the op_t structure for each, so that we can index into the + // BlocksUsedIn[] vector, for example. This operand number is not to be + // confused with SSA numbers. + // We use the operand number field op_t.n for the lower 8 bits, and the offset + // fields op_t.offb:op_t.offo for the upper 16 bits. We are overwriting IDA + // values here, but operands in the data flow analysis sets should never be + // inserted back into the program anyway. + TempOp.n = (char) (index & 0x000000ff); + TempOp.offb = (char) ((index & 0x0000ff00) >> 8); + TempOp.offo = (char) ((index & 0x00ff0000) >> 16); + ++index; + this->GlobalNames.insert(TempOp); + msg(" inserted as index %d\n", ExtractGlobalIndex(TempOp)); + } + } + + assert(16777215 >= this->GlobalNames.size()); // index fits in 24 bits + return; +} // end of SMPFunction::ComputeGlobalNames() + +// For each item in GlobalNames, record the blocks that DEF the item. +void SMPFunction::ComputeBlocksDefinedIn(void) { + // Loop through all basic blocks and examine all DEFs. For Global DEFs, record + // the block number in BlocksDefinedIn. The VarKillSet records DEFs without + // having to examine every instruction. + list<SMPBasicBlock>::iterator CurrBlock; + this->BlocksDefinedIn.clear(); + for (size_t i = 0; i < this->GlobalNames.size(); ++i) { + list<int> TempList; + this->BlocksDefinedIn.push_back(TempList); + } + msg("Number of GlobalNames: %d\n", this->GlobalNames.size()); + for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { + set<op_t, LessOp>::iterator KillIter; + for (KillIter = CurrBlock->GetFirstVarKill(); KillIter != CurrBlock->GetLastVarKill(); ++KillIter) { + // If killed item is not a block-local item (it is global), record it. + set<op_t, LessOp>::iterator NameIter = this->GlobalNames.find(*KillIter); + if (NameIter != this->GlobalNames.end()) { // found in GlobalNames set + // We have a kill of a global name. Get index from three 8-bit fields. + unsigned int index = ExtractGlobalIndex(*NameIter); +#if 0 + msg("VarKill item offo: %d offb: %d n: %d index: %d\n", NameIter->offo, NameIter->offb, NameIter->n, index); +#endif + assert(index < this->GlobalNames.size()); + // index is a valid subscript for the BlocksDefinedIn vector. Push the + // current block number onto the list of blocks that define this global name. + this->BlocksDefinedIn[index].push_back(CurrBlock->GetNumber()); + } + } + } + return; +} // end of SMPFunction::ComputeBlocksDefinedIn() + +// Compute the phi functions at the entry point of each basic block that is a join point. +void SMPFunction::InsertPhiFunctions(void) { + set<op_t, LessOp>::iterator NameIter; + list<int> WorkList; // list of block numbers + for (NameIter = this->GlobalNames.begin(); NameIter != this->GlobalNames.end(); ++NameIter) { + int CurrNameIndex = (int) (ExtractGlobalIndex(*NameIter)); + // Initialize the work list to all blocks that define the current name. + WorkList.clear(); + list<int>::iterator WorkIter; + for (WorkIter = this->BlocksDefinedIn.at((size_t) CurrNameIndex).begin(); + WorkIter != this->BlocksDefinedIn.at((size_t) CurrNameIndex).end(); + ++WorkIter) { + WorkList.push_back(*WorkIter); + } + + // Iterate through the work list, inserting phi functions for the current name + // into all the blocks in the dominance frontier of each work list block. + // Insert into the work list each block that had a phi function added. + while (!WorkList.empty()) { + msg("WorkList size: %d\n", WorkList.size()); + list<int>::iterator WorkIter = WorkList.begin(); + while (WorkIter != WorkList.end()) { + set<int>::iterator DomFrontIter; + list<SMPBasicBlock>::iterator WorkBlock = this->RPOBlocks[*WorkIter]; + for (DomFrontIter = WorkBlock->GetFirstDomFrontier(); + DomFrontIter != WorkBlock->GetLastDomFrontier(); + ++DomFrontIter) { + list<SMPBasicBlock>::iterator PhiBlock = this->RPOBlocks[*DomFrontIter]; + // Before inserting a phi function for the current name in *PhiBlock, + // see if the current name is LiveIn for *PhiBlock. If not, there + // is no need for the phi function. This check is what makes the SSA + // a fully pruned SSA. + if (PhiBlock->IsLiveIn(*NameIter)) { + size_t NumPreds = PhiBlock->GetNumPreds(); + SMPPhiFunction CurrPhi(CurrNameIndex); + DefOrUse CurrRef(*NameIter); + for (size_t NumCopies = 0; NumCopies < NumPreds; ++NumCopies) { + CurrPhi.PushBack(CurrRef); + } + if (PhiBlock->AddPhi(CurrPhi)) { + // If not already in Phi set, new phi function was inserted. + WorkList.push_back(PhiBlock->GetNumber()); + msg("Added phi for name %d at top of block %d\n", CurrNameIndex, PhiBlock->GetNumber()); + } + } + } // end for all blocks in the dominance frontier + // Remove current block number from the work list + WorkIter = WorkList.erase(WorkIter); + } // end for all block numbers in the work list + } // end while the work list is not empty + } // end for all elements of the GlobalNames set + return; +} // end of SMPFunction::InsertPhiFunctions() + +void SMPFunction::SSARenumber(void) { + + // **!!** Get this into CVS and patch in the code later after final debugging + return; +} + +// Emit all annotations for the function, including all per-instruction +// annotations. +void SMPFunction::EmitAnnotations(FILE *AnnotFile) { + // Emit annotation for the function as a whole. + if (this->StaticFunc) { + qfprintf(AnnotFile, "%x %d FUNC LOCAL %s ", this->FuncInfo.startEA, + this->Size, this->FuncName); + } + else { + qfprintf(AnnotFile, "%x %d FUNC GLOBAL %s ", this->FuncInfo.startEA, + this->Size, this->FuncName); + } + if (this->UseFP) { + qfprintf(AnnotFile, "USEFP "); + } + else { + qfprintf(AnnotFile, "NOFP "); + } + if (this->FuncInfo.does_return()) { + qfprintf(AnnotFile, "\n"); + } + else { + qfprintf(AnnotFile, "NORET \n"); + } + + // Loop through all instructions in the function. + // Output optimization annotations for those + // instructions that do not require full computation + // of their memory metadata by the Memory Monitor SDT. + list<SMPInstr>::iterator CurrInst; + bool AllocSeen = false; // Reached LocalVarsAllocInstr yet? + bool DeallocTrigger = false; + for (CurrInst = Instrs.begin(); CurrInst != Instrs.end(); ++CurrInst) { + ea_t addr = CurrInst->GetAddr(); + if (this->LocalVarsAllocInstr == addr) { + AllocSeen = true; + this->EmitStackFrameAnnotations(AnnotFile, CurrInst); + } + // If this is the instruction which deallocated space + // for local variables, we set a flag to remind us to + // emit an annotation on the next instruction. + // mmStrata wants the instruction AFTER the + // deallocating instruction, so that it processes + // the deallocation after it happens. It inserts + // instrumentation before an instruction, not + // after, so it will insert the deallocating + // instrumentation before the first POP of callee-saved regs, + // if there are any, or before the return, otherwise. + if (addr == LocalVarsDeallocInstr) { + DeallocTrigger = true; + } + else if (DeallocTrigger) { // Time for annotation + qfprintf(AnnotFile, "%x %d DEALLOC STACK esp - %d %s\n", addr, + LocalVarsSize, LocalVarsSize, CurrInst->GetDisasm()); + DeallocTrigger = false; + } + + CurrInst->EmitAnnotations(this->UseFP, AllocSeen, AnnotFile); + } // end for (ea_t addr = FuncInfo.startEA; ...) + return; +} // end of SMPFunction::EmitAnnotations() + +// Debug output dump. +void SMPFunction::Dump(void) { + list<SMPBasicBlock>::iterator CurrBlock; + msg("Debug dump for function: %s\n", this->GetFuncName()); + for (size_t index = 0; index < this->IDom.size(); ++index) { + msg("IDOM for %d: %d\n", index, this->IDom.at(index)); + } + msg("Global names: \n"); + set<op_t, LessOp>::iterator NameIter; + for (NameIter = this->GlobalNames.begin(); NameIter != this->GlobalNames.end(); ++NameIter) { + msg("index: %d ", ExtractGlobalIndex(*NameIter)); + PrintOneOperand(*NameIter, 0, -1); + msg("\n"); + } + msg("Blocks each name is defined in: \n"); + for (size_t index = 0; index < this->BlocksDefinedIn.size(); ++index) { + msg("Name index: %d Blocks: ", index); + list<int>::iterator BlockIter; + for (BlockIter = this->BlocksDefinedIn.at(index).begin(); + BlockIter != this->BlocksDefinedIn.at(index).end(); + ++BlockIter) { + msg("%d ", *BlockIter); + } + msg("\n"); + } + for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { + // Dump out the function number and data flow sets before the instructions. + CurrBlock->Dump(); + } + return; +} // end of SMPFunction::Dump() + diff --git a/SMPFunction.h b/SMPFunction.h new file mode 100644 index 0000000000000000000000000000000000000000..d1dce5fb37bce677d2973adb077052b401ca4df7 --- /dev/null +++ b/SMPFunction.h @@ -0,0 +1,85 @@ +#ifndef SMPFUNCTION_H +#define SMPFUNCTION_H 1 + +// SMPFunction.h +// +// This header defines the interfaces needed for analyzing functions, performing live variable analysis, +// putting code into SSA form, etc. + +#include <list> +#include <vector> +#include <map> +#include <set> + +#include <cstddef> + +#include <pro.h> +#include <ida.hpp> +#include <ua.hpp> + +#include "SMPDataFlowAnalysis.h" +#include "SMPInstr.h" +#include "SMPBasicBlock.h" + +using namespace std; + +// Class encapsulating all that the SMP static analyzer cares to know +// about a function. +class SMPFunction { +public: + // Constructors + SMPFunction(func_t *Info); // Default constructor + // Get methods + inline const char *GetFuncName(void) const { return FuncName; }; + // Set methods + // Query methods + inline bool HasIndirectCalls(void) const { return IndirectCalls; }; + inline bool HasSharedChunks(void) const { return SharedChunks; }; + // Printing methods + void Dump(void); // debug dump + // Analysis methods + void Analyze(void); // Analyze all instructions in function + void EmitAnnotations(FILE *AnnotFile); + void RPONumberBlocks(void); + void SetLinks(void); // Link basic blocks and map instructions to blocks + void LiveVariableAnalysis(void); // Perform Live Variable Analysis across all blocks + void ComputeSSA(void); // Compute SSA form data structures +private: + // Data + func_t FuncInfo; + char FuncName[MAXSTR]; + list<SMPInstr> Instrs; + list<SMPBasicBlock> Blocks; + map<ea_t, list<SMPBasicBlock>::iterator> InstBlockMap; + vector<list<SMPBasicBlock>::iterator> RPOBlocks; + vector<int> IDom; // Immediate dominators, indexed and valued by block RPO numbers + set<op_t, LessOp> GlobalNames; // operands used in more than one block; needed in SSA + vector<list<int> > BlocksDefinedIn; // What blocks DEF each GlobalName; index = op # in GlobalNames + int BlockCount; // number of basic blocks in the function + bool UseFP; // Does function use a frame pointer? + bool StaticFunc; // Is function declared static? + bool IndirectCalls; // Does function make indirect calls? + bool SharedChunks; // Does function share a tail chunk with other functions? + size_t Size; // Function size in code bytes + asize_t LocalVarsSize; // size of local vars region of stack frame + ushort CalleeSavedRegsSize; // stack size of callee pushed regs + int RetAddrSize; // size of return address on stack (4 for most machines) + asize_t IncomingArgsSize; // size of incoming args on stack + ea_t LocalVarsAllocInstr; // address of instr that allocates stack frame + ea_t LocalVarsDeallocInstr; // address of epilogue instr that deallocs frame + // Methods + void SetStackFrameInfo(void); + ea_t FindAllocPoint(asize_t); // Deal with difficult to find stack frame allocations + bool MDFixFrameInfo(void); // Redefine stack regions for our needs + bool MDFixUseFP(void); // Fix IDA errors affecting UseFP + void EmitStackFrameAnnotations(FILE *AnnotFile, list<SMPInstr>::iterator Instr); + void ComputeIDoms(void); // Compute immediate dominators of all blocks into IDom[] + int IntersectDoms(int, int) const; // Find Dom intersection (as IDom[] index) for 2 blocks + void ComputeDomFrontiers(void); // Compute dominance frontiers for all blocks + void ComputeGlobalNames(void); // Compute the GlobalNames set + void ComputeBlocksDefinedIn(void); // Compute the BlocksDefinedIn vector + void InsertPhiFunctions(void); // Insert SSA phi functions at top of each basic block + void SSARenumber(void); // Renumber SSA subscripts for all names +}; // end class SMPFunction + +#endif diff --git a/SMPInstr.cpp b/SMPInstr.cpp new file mode 100644 index 0000000000000000000000000000000000000000..158f4208ff0cd46a5ccddcb8cd57d00f372e6af4 --- /dev/null +++ b/SMPInstr.cpp @@ -0,0 +1,960 @@ +// +// SMPInstr.cpp +// +// This module performs the instruction level analyses needed for the +// SMP project (Software Memory Protection). +// + +#include <cstring> + +#include <pro.h> +#include <assert.h> +#include <ida.hpp> +#include <idp.hpp> +#include <allins.hpp> +#include <auto.hpp> +#include <bytes.hpp> +#include <funcs.hpp> +#include <intel.hpp> +#include <loader.hpp> +#include <lines.hpp> +#include <name.hpp> + +#include "SMPStaticAnalyzer.h" +#include "SMPDataFlowAnalysis.h" +#include "SMPInstr.h" + +// Set to 1 for debugging output +#define SMP_DEBUG 1 +#define SMP_DEBUG2 0 // verbose +#define SMP_DEBUG_XOR 0 + +static char *RegNames[R_of + 1] = + { "EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI", + "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15", + "AL", "CL", "DL", "BL", "AH", "CH", "DH", "BH", + "SPL", "BPL", "SIL", "DIL", "EIP", "ES", "CS", "SS", + "DS", "FS", "GS", "CF", "ZF", "SF", "OF" + }; + +// Make the CF_CHG1 .. CF_CHG6 and CF_USE1..CF_USE6 macros more usable +// by allowing us to pick them up with an array index. +static ulong DefMacros[UA_MAXOP] = {CF_CHG1, CF_CHG2, CF_CHG3, CF_CHG4, CF_CHG5, CF_CHG6}; +static ulong UseMacros[UA_MAXOP] = {CF_USE1, CF_USE2, CF_USE3, CF_USE4, CF_USE5, CF_USE6}; + +// Text to be printed in each optimizing annotation explaining why +// the annotation was emitted. +static char *OptExplanation[LAST_OPT_CATEGORY + 1] = + { "NoOpt", "NoMetaUpdate", "AlwaysNUM", "NUMVia2ndSrcIMMEDNUM", + "Always1stSrc", "1stSrcVia2ndSrcIMMEDNUM", "AlwaysPtr", + "AlwaysNUM", "AlwaysNUM", "NUMViaFPRegDest" + }; + + +// ***************************************************************** +// Class SMPInstr +// ***************************************************************** + +// Constructor for instruction. +SMPInstr::SMPInstr(ea_t addr) { + this->address = addr; + this->analyzed = false; + this->JumpTarget = false; + return; +} + +// Is the instruction the type that terminates a basic block? +bool SMPInstr::IsBasicBlockTerminator() const { + return ((type == JUMP) || (type == COND_BRANCH) + || (type == INDIR_JUMP) || (type == RETURN)); +} + +// Is the destination operand a memory reference? +bool SMPInstr::HasDestMemoryOperand(void) const { + bool MemDest = false; + for (size_t index = 0; index < Defs.GetSize(); ++index) { + optype_t CurrType = Defs.GetRef(index).GetOp().type; + MemDest = ((CurrType == o_mem) || (CurrType == o_phrase) || (CurrType == o_displ)); + if (MemDest) + break; + } + return MemDest; +} // end of SMPInstr::HasDestMemoryOperand() + +// Is a source operand a memory reference? +bool SMPInstr::HasSourceMemoryOperand(void) const { + bool MemSrc = false; + for (size_t index = 0; index < Uses.GetSize(); ++index) { + optype_t CurrType = Uses.GetRef(index).GetOp().type; + MemSrc = ((CurrType == o_mem) || (CurrType == o_phrase) || (CurrType == o_displ)); + if (MemSrc) + break; + } + return MemSrc; +} // end of SMPInstr::HasSourceMemoryOperand() + +// Does the instruction whose flags are in F have a numeric type +// as the second source operand? +// NOTE: We can only analyze immediate values now, using a heuristic +// that values in the range +/- 8K are numeric and others are +// probably addresses. When data flow analyses are implemented, +// we will be able to analyze many non-immediate operands. +#define IMMEDNUM_LOWER -8191 +#define IMMEDNUM_UPPER 8191 +bool SMPInstr::IsSecondSrcOperandNumeric(flags_t F) const { + bool SecondOpImm = (SMPcmd.Operands[1].type == o_imm); + signed long TempImm; + + if (SecondOpImm) { + TempImm = (signed long) SMPcmd.Operands[1].value; + } + +#if SMP_DEBUG + if (SecondOpImm && (0 > TempImm)) { +#if 0 + msg("Negative immediate: %d Hex: %x ASM: %s\n", TempImm, + SMPcmd.Operands[1].value, disasm); +#endif + } + else if ((!SecondOpImm) && (SMPcmd.Operands[1].type == o_imm)) { + msg("Problem with flags on immediate src operand: %s\n", disasm); + } +#endif + + return (SecondOpImm && (TempImm > IMMEDNUM_LOWER) + && (TempImm < IMMEDNUM_UPPER)); +} // end of SMPInstr::IsSecondSrcOperandNumeric() + +// DEBUG Print DEF and/or USE for an operand. +void PrintDefUse(ulong feature, int OpNum) { + // CF_ macros number the operands from 1 to 6, while OpNum + // is a 0 to 5 index into the insn_t.Operands[] array. + // OpNum == -1 is a signal that this is a DEF or USE or VarKillSet etc. + // operand and not an instruction operand. + if (-1 == OpNum) + return; + switch (OpNum) { + case 0: + if (feature & CF_CHG1) + msg(" DEF"); + if (feature & CF_USE1) + msg(" USE"); + break; + case 1: + if (feature & CF_CHG2) + msg(" DEF"); + if (feature & CF_USE2) + msg(" USE"); + break; + case 2: + if (feature & CF_CHG3) + msg(" DEF"); + if (feature & CF_USE3) + msg(" USE"); + break; + case 3: + if (feature & CF_CHG4) + msg(" DEF"); + if (feature & CF_USE4) + msg(" USE"); + break; + case 4: + if (feature & CF_CHG5) + msg(" DEF"); + if (feature & CF_USE5) + msg(" USE"); + break; + case 5: + if (feature & CF_CHG6) + msg(" DEF"); + if (feature & CF_USE6) + msg(" USE"); + break; + } + return; +} // end PrintDefUse() + +// DEBUG print SIB info for an operand. +void PrintSIB(op_t Opnd) { + int BaseReg = sib_base(Opnd); + short IndexReg = sib_index(Opnd); + int ScaleFactor = sib_scale(Opnd); +#define NAME_LEN 5 + char BaseName[NAME_LEN] = {'N', 'o', 'n', 'e', '\0'}; + char IndexName[NAME_LEN] = {'N', 'o', 'n', 'e', '\0'}; +#if 0 + if (BaseReg != R_bp) // SIB code for NO BASE REG +#endif + qstrncpy(BaseName, RegNames[BaseReg], NAME_LEN - 1); + + if (IndexReg != R_sp) { // SIB code for NO INDEX REG + qstrncpy(IndexName, RegNames[IndexReg], NAME_LEN -1); + } + msg(" Base %s Index %s Scale %d", BaseName, IndexName, ScaleFactor); +} // end PrintSIB() + +// Debug: print one operand from an instruction or DEF or USE list. +void PrintOneOperand(op_t Opnd, ulong features, int OpNum) { + if (Opnd.type == o_void) + return; + else if (Opnd.type == o_mem) { + msg(" Operand %d : memory : addr: %x", OpNum, Opnd.addr); + PrintDefUse(features, OpNum); + if (Opnd.hasSIB) { // has SIB info -- is this possible for o_mem? + msg(" Found SIB byte for o_mem operand "); + PrintSIB(Opnd); + } + } + else if (Opnd.type == o_phrase) { + msg(" Operand %d : memory phrase :", OpNum); + PrintDefUse(features, OpNum); + if (Opnd.hasSIB) { // has SIB info + PrintSIB(Opnd); + } + else { // no SIB info + ushort BaseReg = Opnd.phrase; + msg(" reg %s", RegNames[BaseReg]); + } + if (Opnd.addr != 0) { + msg(" \n WARNING: addr for o_phrase type: %d\n", Opnd.addr); + } + } + else if (Opnd.type == o_displ) { + msg(" Operand %d : memory displ :", OpNum); + ea_t offset = Opnd.addr; + PrintDefUse(features, OpNum); + if (Opnd.hasSIB) { + PrintSIB(Opnd); + msg(" displ %d", offset); + } + else { + ushort BaseReg = Opnd.reg; + msg(" reg %s displ %d", RegNames[BaseReg], offset); + } + } + else if (Opnd.type == o_reg) { + msg(" Operand %d : register", OpNum); + msg(" regno: %d", Opnd.reg); + PrintDefUse(features, OpNum); + } + else if (Opnd.type == o_imm) { + msg(" Operand %d : immed", OpNum); + PrintDefUse(features, OpNum); + } + else if (Opnd.type == o_far) { + msg(" Operand %d : FarPtrImmed", OpNum); + msg(" addr: %x", Opnd.addr); + PrintDefUse(features, OpNum); + } + else if (Opnd.type == o_near) { + msg(" Operand %d : NearPtrImmed", OpNum); + msg(" addr: %x", Opnd.addr); + PrintDefUse(features, OpNum); + } + else { + msg(" Operand %d : unknown", OpNum); + PrintDefUse(features, OpNum); + } + if (!(Opnd.showed())) + msg(" HIDDEN "); + return; +} // end of PrintOneOperand() + +// DEBUG print operands for Inst. +void SMPInstr::PrintOperands(void) const { + op_t Opnd; + for (int i = 0; i < UA_MAXOP; ++i) { + Opnd = SMPcmd.Operands[i]; + PrintOneOperand(Opnd, this->features, i); + } + msg(" \n"); + return; +} // end of SMPInstr::PrintOperands() + +// Print out the destination operand list for the instruction, given +// the OptCategory for the instruction as a hint. +char * SMPInstr::DestString(int OptType) { + static char DestList[MAXSTR] = { '\0', '\0' }; + int RegDestCount = 0; + for (size_t DefIndex = 0; DefIndex < this->NumDefs(); ++DefIndex) { + op_t DefOpnd = this->GetDef(DefIndex).GetOp(); + if (o_reg == DefOpnd.type) { + ushort DestReg = DefOpnd.reg; + if (0 == RegDestCount) { + qstrncpy(DestList, RegNames[DestReg], 1 + strlen(RegNames[DestReg])); + } + else { + qstrncat(DestList, " ", MAXSTR); + qstrncat(DestList, RegNames[DestReg], MAXSTR); + } + ++RegDestCount; + } + } + if (0 >= RegDestCount) { + msg("WARNING: No destination registers: %s\n", this->GetDisasm()); + } + else { + qstrncat(DestList, " ZZ ", MAXSTR); + } + return DestList; +} // end of SMPInstr::DestString() + +// Equality operator for SMPInstr. Key field is address. +int SMPInstr::operator==(const SMPInstr &rhs) const { + if (this->address != rhs.GetAddr()) + return 0; + else + return 1; +} + +// Inequality operator for SMPInstr. Key field is address. +int SMPInstr::operator!=(const SMPInstr &rhs) const { + return (this->address != rhs.GetAddr()); +} + +// Less than operator for sorting SMPInstr lists. Key field is address. +int SMPInstr::operator<(const SMPInstr &rhs) const { + return (this->address < rhs.GetAddr()); +} + +// Less than or equal operator for sorting SMPInstr lists. Key field is address. +int SMPInstr::operator<=(const SMPInstr &rhs) const { + return (this->address <= rhs.GetAddr()); +} + +#define MD_FIRST_ENTER_INSTR NN_enterw +#define MD_LAST_ENTER_INSTR NN_enterq +// Is this instruction the one that allocates space on the +// stack for the local variables? +bool SMPInstr::MDIsFrameAllocInstr(void) const { + // The frame allocating instruction should look like: + // sub esp,48 or add esp,-64 etc. + if ((SMPcmd.itype == NN_sub) || (SMPcmd.itype == NN_add)) { + if (Defs.GetRef(0).GetOp().is_reg(R_sp)) { + // We know that an addition or subtraction is being + // performed on the stack pointer. This should not be + // possible within the prologue except at the stack + // frame allocation instruction, so return true. We + // could be more robust in this analysis in the future. **!!** + // CAUTION: If a compiler allocates 64 bytes for locals + // and 16 bytes for outgoing arguments in a single + // instruction: sub esp,80 + // you cannot insist on finding sub esp,LocSize + // To make this more robust, we are going to insist that + // an allocation of stack space is either performed by + // adding a negative immediate value, or by subtracting + // a positive immediate value. We will throw in, free of + // charge, a subtraction of a register, which is how alloca() + // usually allocates stack space. + if (o_imm == Uses.GetRef(0).GetOp().type) { + signed long TempImm = (signed long) Uses.GetRef(0).GetOp().value; + if (((0 > TempImm) && (SMPcmd.itype == NN_add)) + || ((0 < TempImm) && (SMPcmd.itype == NN_sub))) { + return true; + } + } + else if ((o_reg == Uses.GetRef(0).GetOp().type) + && (SMPcmd.itype == NN_sub)) { // alloca() ? + return true; + } + } + } + else if ((SMPcmd.itype >= MD_FIRST_ENTER_INSTR) && (SMPcmd.itype <= MD_LAST_ENTER_INSTR)) { + return true; + } + return false; +} // end of SMPInstr::MDIsFrameAllocInstr() + +// Is this instruction in the epilogue the one that deallocates the local +// vars region of the stack frame? +bool SMPInstr::MDIsFrameDeallocInstr(bool UseFP, asize_t LocalVarsSize) const { + // The usual compiler idiom for the prologue on x86 is to + // deallocate the local var space with: mov esp,ebp + // It could be add esp,constant. We can be tricked by + // add esp,constant when the constant is just the stack + // adjustment after a call. We will have to insist that + // the immediate operand have at least the value of + // LocalVarsSize for this second form, and that UseFP be true + // for the first form. + if (UseFP && (this->SMPcmd.itype == NN_mov) + && (this->Defs.GetRef(0).GetOp().is_reg(R_sp)) + && (this->Uses.GetRef(0).GetOp().is_reg(R_bp))) + return true; + else if ((this->SMPcmd.itype == NN_add) + && (this->Defs.GetRef(0).GetOp().is_reg(R_sp)) + && (this->Uses.GetRef(1).GetOp().is_imm((uval_t) LocalVarsSize))) + return true; + else if ((this->SMPcmd.itype == NN_add) + && (this->Defs.GetRef(0).GetOp().is_reg(R_sp)) + && (this->Uses.GetRef(1).GetOp().type == o_imm)) { + msg("Used imprecise LocalVarsSize to find dealloc instr.\n"); + return true; + } + else if (NN_leave == this->SMPcmd.itype) + return true; + else + return false; +} // end of SMPInstr::MDIsFrameDeallocInstr() + +// Is instruction a no-op? There are 1-byte, 2-byte, etc versions of no-ops. +bool SMPInstr::MDIsNop(void) const { + bool IsNop = false; + ushort opcode = this->SMPcmd.itype; + if (NN_nop == opcode) + IsNop = true; + else if (NN_mov == opcode) { + if ((o_reg == this->SMPcmd.Operands[0].type) + && this->SMPcmd.Operands[1].is_reg(this->SMPcmd.Operands[0].reg)) { + // We have a register to register move with source == destination. + IsNop = true; + } + } + else if (NN_lea == opcode) { + if ((o_reg == this->SMPcmd.Operands[0].type) + && (o_displ == this->SMPcmd.Operands[1].type)) { + // We are looking for 6-byte no-ops like lea esi,[esi+0] + ushort destreg = this->SMPcmd.Operands[0].reg; + if ((this->SMPcmd.Operands[1].hasSIB) + && (destreg == (ushort) sib_base(this->SMPcmd.Operands[1]))) { + IsNop = true; + } + else if (destreg == this->SMPcmd.Operands[1].reg) { + IsNop = true; + } + } + } + return IsNop; +} // end of SMPInstr::MDIsNop() + +// MACHINE DEPENDENT: Is instruction a return instruction? +bool SMPInstr::MDIsReturnInstr(void) const { + return ((SMPcmd.itype == NN_retn) || (SMPcmd.itype == NN_retf)); +} + +// MACHINE DEPENDENT: Is instruction a POP instruction? +#define FIRST_POP_INST NN_pop +#define LAST_POP_INST NN_popfq +bool SMPInstr::MDIsPopInstr(void) const { + return ((SMPcmd.itype >= FIRST_POP_INST) + && (SMPcmd.itype <= LAST_POP_INST)); +} + +// MACHINE DEPENDENT: Is instruction a PUSH instruction? +#define FIRST_PUSH_INST NN_push +#define LAST_PUSH_INST NN_pushfq +bool SMPInstr::MDIsPushInstr(void) const { + return ((SMPcmd.itype >= FIRST_PUSH_INST) + && (SMPcmd.itype <= LAST_PUSH_INST)); +} + +// MACHINE DEPENDENT: Is instruction an ENTER instruction? +#define FIRST_ENTER_INST NN_enterw +#define LAST_ENTER_INST NN_enterq +bool SMPInstr::MDIsEnterInstr(void) const { + return ((SMPcmd.itype >= FIRST_ENTER_INST) + && (SMPcmd.itype <= LAST_ENTER_INST)); +} + +// MACHINE DEPENDENT: Is instruction a LEAVE instruction? +#define FIRST_LEAVE_INST NN_leavew +#define LAST_LEAVE_INST NN_leaveq +bool SMPInstr::MDIsLeaveInstr(void) const { + return ((SMPcmd.itype >= FIRST_LEAVE_INST) + && (SMPcmd.itype <= LAST_LEAVE_INST)); +} + +// MACHINE DEPENDENT: Does instruction use a callee-saved register? +bool SMPInstr::MDUsesCalleeSavedReg(void) const { + for (size_t index = 0; index < this->Uses.GetSize(); ++index) { + op_t CurrUse = this->GetUse(index).GetOp(); + if (CurrUse.is_reg(R_bp) || CurrUse.is_reg(R_si) + || CurrUse.is_reg(R_di) || CurrUse.is_reg(R_bx)) { + return true; + } + } + return false; +} // end of SMPInstr::MDUsesCalleeSavedReg() + +// Is the instruction a register to register copy of a stack pointer or frame pointer +// into a general purpose register (which mmStrata will now need to track as a stack +// relative pointer)? +bool SMPInstr::MDIsStackPointerCopy(bool UseFP) const { + if ((this->OptType == 3) && (this->GetDef(0).GetOp().type == o_reg) + && (!(this->GetDef(0).GetOp().is_reg(R_sp)))) { + if (UseFP) { + if (this->GetUse(0).GetOp().is_reg(R_bp)) + // Move of base pointer EBP into a general register + return true; + else if ((this->GetUse(0).GetOp().is_reg(R_sp)) + && !(this->GetDef(0).GetOp().is_reg(R_bp))) + // Move of ESP into something besides a base pointer + return true; + } + else if (this->GetUse(0).GetOp().is_reg(R_sp)) { + // Move of ESP into a register; no base pointer used in this function + return true; + } + } + return false; +} // end of SMPInstr::MDIsStackPointerCopy() + +// Is instruction a branch (conditional or unconditional) to a +// code target that is not in the current chunk? +bool SMPInstr::IsBranchToFarChunk(void) const { + func_t *CurrChunk = get_fchunk(this->address); + bool FarBranch = false; + if ((JUMP | COND_BRANCH) & this->GetDataFlowType()) { + // Instruction is a direct branch, conditional or unconditional + if (this->NumUses() > 0) { + op_t JumpTarget = this->GetUse(0).GetOp(); + if ((o_near == JumpTarget.type) || (o_far == JumpTarget.type)) { + // Branches to a code address + func_t *TargetChunk = get_fchunk(JumpTarget.addr); + // Is target address within the same chunk as the branch? + FarBranch = (NULL == TargetChunk) || (CurrChunk->startEA != TargetChunk->startEA); + } + } + } + return FarBranch; +} // end of SMPInstr::IsBranchToFarChunk() + +// Analyze the instruction and its operands. +void SMPInstr::Analyze(void) { + if (this->analyzed) + return; + + // Fill cmd structure with disassembly of instr + ua_ana0(this->address); + // Get the instr disassembly text. + (void) generate_disasm_line(this->address, this->disasm, sizeof(this->disasm) - 1); + // Remove interactive color-coding tags. + tag_remove(this->disasm, this->disasm, 0); + // Copy cmd to member variable SMPcmd. + this->SMPcmd = cmd; + // Get the canonical features into member variables features. + this->features = cmd.get_canon_feature(); + + // Record what type of instruction this is, simplified for the needs + // of data flow and type analysis. + this->type = DFACategory[cmd.itype]; + // Record optimization category. + this->OptType = OptCategory[cmd.itype]; + + // Build the DEF and USE lists for the instruction. + this->BuildSMPDefUseLists(); + // Fix up machine dependent quirks in the def and use lists. + this->MDFixupDefUseLists(); + + // Determine whether the instruction is a jump target by looking + // at its cross references and seeing if it has "TO" code xrefs. + xrefblk_t xrefs; + for (bool ok = xrefs.first_to(this->address, XREF_FAR); ok; ok = xrefs.next_to()) { + if ((xrefs.from != 0) && (xrefs.iscode)) { + this->JumpTarget = true; + break; + } + } + + this->analyzed = true; + return; +} // end of SMPInstr::Analyze() + +// Fill the Defs and Uses private data members. +void SMPInstr::BuildSMPDefUseLists(void) { + size_t OpNum; + + // Start with the Defs. + for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { + if (this->features & DefMacros[OpNum]) { // DEF + this->Defs.SetRef(this->SMPcmd.Operands[OpNum]); + } + } // end for (OpNum = 0; ...) + + // Now, do the Uses. Uses have special case operations, because + // any memory operand could have register uses in the addressing + // expression, and we must create Uses for those registers. For + // example: mov eax,[ebx + esi*2 + 044Ch] + // This is a two-operand instruction with one def: eax. But + // there are three uses: [ebx + esi*2 + 044Ch], ebx, and esi. + // The first use is an op_t of type o_phrase (memory phrase), + // which can be copied from cmd.Operands[1]. Likewise, we just + // copy cmd.Operands[0] into the defs list. However, we must create + // op_t types for register ebx and register esi and append them + // to the Uses list. This is handled by the machine dependent + // method MDFixupDefUseLists(). + for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { + if (this->features & UseMacros[OpNum]) { // USE + this->Uses.SetRef(this->SMPcmd.Operands[OpNum]); + } + } // end for (OpNum = 0; ...) + + return; +} // end of SMPInstr::BuildSMPDefUseLists() + +// If DefReg is not already in the DEF list, add a DEF for it. +void SMPInstr::MDAddRegDef(ushort DefReg) { + bool AlreadySet = false; + for (size_t DefIndex = 0; DefIndex < this->NumDefs(); ++DefIndex) { + if (this->GetDef(DefIndex).GetOp().is_reg(DefReg)) { + AlreadySet = true; + break; + } + } + if (!AlreadySet) { + op_t TempDef; + TempDef.type = o_reg; + TempDef.reg = DefReg; + this->Defs.SetRef(TempDef); + } + return; +} // end of SMPInstr::MDAddRegDef() + +// If UseReg is not already in the USE list, add a USE for it. +void SMPInstr::MDAddRegUse(ushort UseReg) { + bool AlreadyUsed = false; + for (size_t UseIndex = 0; UseIndex < this->NumUses(); ++UseIndex) { + if (this->GetUse(UseIndex).GetOp().is_reg(UseReg)) { + AlreadyUsed = true; + break; + } + } + if (!AlreadyUsed) { + op_t TempUse; + TempUse.type = o_reg; + TempUse.reg = UseReg; + this->Uses.SetRef(TempUse); + } + return; +} // end of SMPInstr::MDAddRegUse() + +// Perform machine dependent ad hoc fixes to the def and use lists. +// For example, some multiply and divide instructions in x86 implicitly +// use and/or define register EDX. For memory phrase examples, see comment +// in BuildSMPDefUseLists(). +void SMPInstr::MDFixupDefUseLists(void) { + // First, handle the uses hidden in memory addressing modes. Note that we do not + // care whether we are dealing with a memory destination operand or source + // operand, because register USEs, not DEFs, happen within the addressing expressions. + size_t OpNum; + for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { + op_t Opnd = SMPcmd.Operands[OpNum]; + if ((Opnd.type == o_phrase) || (Opnd.type == o_displ)) { + if (Opnd.hasSIB) { + int BaseReg = sib_base(Opnd); + short IndexReg = sib_index(Opnd); + if (R_none != BaseReg) { + op_t BaseOpnd = Opnd; // Init to current operand field values + BaseOpnd.type = o_reg; // Change type and reg fields + BaseOpnd.reg = BaseReg; + BaseOpnd.hasSIB = 0; + this->Uses.SetRef(BaseOpnd); + } + if (R_none != IndexReg) { // Should we disallow R_sp here? **!!** + op_t IndexOpnd = Opnd; // Init to current operand field values + IndexOpnd.type = o_reg; // Change type and reg fields + IndexOpnd.reg = IndexReg; + IndexOpnd.hasSIB = 0; + this->Uses.SetRef(IndexOpnd); + } + } + else { // no SIB byte; can have base reg but no index reg + ushort BaseReg = Opnd.reg; // cannot be R_none for no SIB case + op_t BaseOpnd = Opnd; // Init to current operand field values + BaseOpnd.type = o_reg; // Change type and reg fields + BaseOpnd.reg = BaseReg; + BaseOpnd.hasSIB = 0; + this->Uses.SetRef(BaseOpnd); + } + } // end if (o_phrase or o_displ operand) + } // end for (all operands) + + // Now, handle special instruction categories that have implicit operands. + if (NN_cmpxchg == SMPcmd.itype) { + // x86 Compare and Exchange conditionally sets EAX. We must keep data flow analysis + // sound by declaring that EAX is always a DEF. + this->MDAddRegDef(R_ax); + } // end if NN_cmpxchg + else if (this->MDIsPopInstr() || this->MDIsPushInstr() || this->MDIsReturnInstr()) { + // IDA does not include the stack pointer in the DEFs or USEs. + this->MDAddRegDef(R_sp); + this->MDAddRegUse(R_sp); + } + else if (this->MDIsEnterInstr() || this->MDIsLeaveInstr()) { + // Entire function prologue or epilogue microcoded. + this->MDAddRegDef(R_sp); + this->MDAddRegUse(R_sp); + this->MDAddRegDef(R_bp); + this->MDAddRegUse(R_bp); + } + else if (8 == this->GetOptType()) { + // This category implicitly writes to EDX:EAX. + this->MDAddRegDef(R_dx); + this->MDAddRegDef(R_ax); + } // end else if (8 == GetOptType) + else if (7 == this->GetOptType()) { + // Category 7 instructions sometimes write implicitly to EDX:EAX or DX:AX. + // DX is the same as EDX to IDA Pro (and SMP); ditto for EAX and AX. + // DIV, IDIV, and MUL all have hidden EAX or AX operands (hidden in the IDA Pro + // sense, because they are not displayed in the disassembly text). For example: + // mul ebx means EDX:EAX <-- EAX*EBX, and mul bx means DX:AX <-- AX*BX. If the + // source operand is only 8 bits wide, there is room to hold the result in AX + // without using DX: mul bl means AX <-- AL*BL. + // IMUL has forms with a hidden EAX or AX operand and forms with no implicit + // operands: imul ebx means EDX:EAX <-- EAX*EBX, but imul ebx,edx means that + // EBX*EDX gets truncated and the result placed in EBX (no hidden operands). + bool HiddenEAXUse = false; + for (size_t UseIndex = 0; UseIndex < this->NumUses(); ++UseIndex) { + op_t TempUse = this->GetUse(UseIndex).GetOp(); + if (!TempUse.showed()) { // hidden operand + if (TempUse.is_reg(R_ax)) { // not R_al, so it is not 8 bits + this->MDAddRegUse(R_dx); + this->MDAddRegDef(R_ax); + this->MDAddRegDef(R_dx); + } + } + } + } // end else if (7 == OptType) + return; +} // end of SMPInstr::MDFixupDefUseLists() + +// Handle x86 opcode SIB byte annotations. +void SMPInstr::MDAnnotateSIBStackConstants(FILE *AnnotFile, op_t Opnd, ea_t offset, bool UseFP) { + int BaseReg = sib_base(Opnd); + short IndexReg = sib_index(Opnd); + if (BaseReg == R_none) { + msg("BaseReg of R_none at %x\n", this->address); + } + if (BaseReg == R_sp) { // ESP cannot be IndexReg + // ESP-relative constant offset + qfprintf(AnnotFile, + "%x %d PTRIMMEDESP STACK %d displ %s\n", + this->SMPcmd.ea, this->SMPcmd.size, offset, this->disasm); + } + else if (UseFP && ((IndexReg == R_bp) || (BaseReg == R_bp))) { + // EBP-relative constant offset + qfprintf(AnnotFile, + "%x %d PTRIMMEDEBP STACK %d displ %s\n", + this->SMPcmd.ea, this->SMPcmd.size, offset, this->disasm); + } + + return; +} // end of MDAnnotateSIBStackConstants + +// Emit annotations for constants used as ptr offsets from EBP or +// ESP into the stack frame. Only pay attention to EBP-relative +// offsets if EBP is being used as a frame pointer (UseFP == true). +void SMPInstr::AnnotateStackConstants(bool UseFP, FILE *AnnotFile) { + op_t Opnd; +#if 0 + if (this->address == 0x80925f4) { + msg("PROBLEM INSTRUCTION: \n"); + this->PrintOperands(); + } +#endif + for (int i = 0; i < UA_MAXOP; ++i) { + Opnd = SMPcmd.Operands[i]; + if (Opnd.type == o_displ) { + ea_t offset = Opnd.addr; + if (Opnd.hasSIB) { + MDAnnotateSIBStackConstants(AnnotFile, Opnd, offset, UseFP); + } + else { // no SIB + ushort BaseReg = Opnd.reg; + if (BaseReg == R_sp) { + // ESP-relative constant offset + qfprintf(AnnotFile, + "%x %d PTRIMMEDESP STACK %d displ %s\n", + SMPcmd.ea, SMPcmd.size, offset, disasm); + } + else if (UseFP && (BaseReg == R_bp)) { + // EBP-relative constant offset + qfprintf(AnnotFile, + "%x %d PTRIMMEDEBP STACK %d displ %s\n", + SMPcmd.ea, SMPcmd.size, offset, disasm); + } + } // end if (Opnd.hasSIB) ... else ... + } // end if (Opnd.type == o_displ) + else if (Opnd.type == o_phrase) { + ea_t offset = 0; // mmStrata thinks [esp] is [esp+0] + if (Opnd.hasSIB) { + MDAnnotateSIBStackConstants(AnnotFile, Opnd, offset, UseFP); + } + else { // Something like [ecx] + ushort BaseReg = Opnd.reg; + if (BaseReg == R_sp) { + // ESP-relative constant offset + qfprintf(AnnotFile, + "%x %d PTRIMMEDESP STACK %d displ %s\n", + SMPcmd.ea, SMPcmd.size, offset, disasm); + } + else if (UseFP && (BaseReg == R_bp)) { + // EBP-relative constant offset + qfprintf(AnnotFile, + "%x %d PTRIMMEDEBP STACK %d displ %s\n", + SMPcmd.ea, SMPcmd.size, offset, disasm); + } + } // end if (Opnd.hasSIB) ... else ... + } // end else if (Opnd.type == o_phrase) + } // end for all operands + + // If we move a stack pointer or frame pointer into another register, we + // need to annotate the implicit zero offset, e.g. mov edi,esp == mov edi,esp+0 + // and edi is becoming a stack pointer that mmStrata needs to track. + if (this->MDIsStackPointerCopy(UseFP)) { + if (UseFP && this->GetUse(0).GetOp().is_reg(R_bp)) { + qfprintf(AnnotFile, "%x %d PTRIMMEDEBP STACK 0 displ %s\n", + SMPcmd.ea, SMPcmd.size, disasm); + } + else { + qfprintf(AnnotFile, "%x %d PTRIMMEDESP STACK 0 displ %s\n", + SMPcmd.ea, SMPcmd.size, disasm); + } + } + + return; +} // end of SMPInstr::AnnotateStackConstants() + +// Emit all annotations for the instruction. +void SMPInstr::EmitAnnotations(bool UseFP, bool AllocSeen, FILE *AnnotFile) { + ea_t addr = this->address; + flags_t InstrFlags = getFlags(addr); + bool MemDest = this->HasDestMemoryOperand(); + bool MemSrc = this->HasSourceMemoryOperand(); + bool SecondSrcOperandNum = this->IsSecondSrcOperandNumeric(InstrFlags); + + ++OptCount[OptType]; // keep count for debugging info + +#if SMP_DEBUG_MEM + if (MemDest || MemSrc) { + msg("OptType: %d %s", OptType, disasm); + this->PrintOperands(); + } +#endif + + // Emit appropriate optimization annotations. + bool SDTInstrumentation = false; + switch (OptType) { + case 0: // SDT will have to handle these + { +#if SMP_DEBUG_TYPE0 + msg("OptType 0: %x %s\n", addr, disasm); +#endif + // mmStrata wants to suppress warnings on the PUSH + // instructions that precede the LocalVarsAllocInstr + // (i.e. the PUSHes of callee-saved regs). + if (!AllocSeen && this->MDIsPushInstr()) { + qfprintf(AnnotFile, "%x %d INSTR LOCAL NoWarn %s \n", + addr, -3, disasm); + } + else { + SDTInstrumentation = true; + } + break; + } + + case 1: // nothing for SDT to do + { qfprintf(AnnotFile, "%x %d INSTR LOCAL NoMetaUpdate %s \n", + addr, -1, disasm); + ++AnnotationCount[OptType]; + break; + } + + case 4: // INC, DEC, etc.: no SDT work unless MemDest + { if (MemDest || MemSrc) { + SDTInstrumentation = true; + break; // treat as category 0 + } + qfprintf(AnnotFile, "%x %d INSTR LOCAL Always1stSrc %s \n", + addr, -1, disasm); + ++AnnotationCount[OptType]; + break; + } + + case 5: // ADD, etc.: If numeric 2nd src operand, no SDT work. + { if (MemDest || MemSrc) { + SDTInstrumentation = true; + break; // treat as category 0 + } + if (SecondSrcOperandNum) { // treat as category 1 + qfprintf(AnnotFile, "%x %d INSTR LOCAL %s %s \n", + addr, -1, OptExplanation[OptType], disasm); + ++AnnotationCount[OptType]; + } + break; + } + + case 6: // Only OS code should include these; problem for SDT + { if (MemDest) { + SDTInstrumentation = true; + break; // treat as category 0 + } + qfprintf(AnnotFile, "%x %d INSTR LOCAL AlwaysPTR %s \n", + addr, -OptType, disasm); + ++AnnotationCount[OptType]; + break; + } + + case 8: // Implicitly writes to EDX:EAX, always numeric. + { qfprintf(AnnotFile, "%x %d INSTR LOCAL n EDX EAX ZZ %s %s \n", + addr, -2, OptExplanation[OptType], disasm); + ++AnnotationCount[OptType]; + SDTInstrumentation = true; + break; + } + + case 9: // Either writes to FP reg (cat. 1) or memory (cat. 0) + { if (MemDest) { +#if SMP_DEBUG + // MemDest seems to happen too much. + msg("Floating point MemDest: %s \n", disasm); +#endif + SDTInstrumentation = true; + break; // treat as category 0 + } + qfprintf(AnnotFile, "%x %d INSTR LOCAL %s %s \n", + addr, -1, OptExplanation[OptType], disasm); + ++AnnotationCount[OptType]; + break; + } + + default: // 2,3,7: Optimization possibilities depend on operands + { +#if SMP_DEBUG2 + if (OptType == 3) { // MOV instr class + if (MemDest) { + msg("MemDest on MOV: %s\n", disasm); + } + else if (!SecondSrcOperandNum) { + msg("MOV: not 2nd op numeric: %s\n", disasm); + this->PrintOperands(); + } + } +#endif + SDTInstrumentation = true; + if (MemDest) { +#if SMP_DEBUG_XOR + if (OptType == 2) + msg("MemDest on OptType 2: %s\n", disasm); +#endif + break; // treat as category 0 + } + if ((OptType == 2) || (OptType == 7) || SecondSrcOperandNum) { + qfprintf(AnnotFile, "%x %d INSTR LOCAL n %s %s %s \n", + addr, -2, this->DestString(OptType), + OptExplanation[OptType], disasm); + ++AnnotationCount[OptType]; + } + break; + } + } // end switch (OptType) + + // If mmStrata is going to have to deal with the + // instruction, then we can annotate EBP and ESP + // relative constant offsets. If we have emitted + // an annotation of type -1, there is no point + // in telling mmStrata about these constants. + if (SDTInstrumentation) { + this->AnnotateStackConstants(UseFP, AnnotFile); + } + return; +} // end of SMPInstr::EmitAnnotations() diff --git a/SMPInstr.h b/SMPInstr.h new file mode 100644 index 0000000000000000000000000000000000000000..86c36bd5d4b48d1963e4041f73ee8ad6817117c5 --- /dev/null +++ b/SMPInstr.h @@ -0,0 +1,87 @@ +#ifndef SMPINSTR_H +#define SMPINSTR_H 1 + +// SMPInstr.h +// +// This header defines the interfaces needed for analyzing instructions. + +#include <cstddef> + +#include <pro.h> +#include <ida.hpp> +#include <ua.hpp> + +#include "SMPDataFlowAnalysis.h" + +using namespace std; + +// Debug: print one operand from an instruction or DEF or USE list. +void PrintOneOperand(op_t Opnd, ulong features, int OpNum); + +class SMPInstr { +public: + // Constructors + SMPInstr(ea_t addr); + // Operators + int operator==(const SMPInstr &rhs) const; + int operator<(const SMPInstr &rhs) const; + int operator<=(const SMPInstr &rhs) const; + int operator!=(const SMPInstr &rhs) const; + // Get methods + inline ea_t GetAddr(void) const { return address; }; + inline char *GetDisasm(void) { return disasm; }; + inline DefOrUse GetUse(size_t index) const { return Uses.GetRef(index); }; + inline DefOrUse GetDef(size_t index) const { return Defs.GetRef(index); }; + inline size_t NumUses(void) const { return Uses.GetSize(); }; + inline size_t NumDefs(void) const { return Defs.GetSize(); }; + inline insn_t GetCmd(void) const { return SMPcmd; }; + inline int GetOptType(void) const { return OptType; }; + inline SMPitype GetDataFlowType(void) const { return type; }; + // Set methods + inline void SetCmd(insn_t cmd) { SMPcmd = cmd; return; }; + // Query methods + bool HasDestMemoryOperand(void) const; // Does instruction write to memory? + bool HasSourceMemoryOperand(void) const; // Does instruction read from memory? + bool IsSecondSrcOperandNumeric(flags_t F) const; + bool IsBasicBlockTerminator(void) const; + inline bool IsJumpTarget(void) const { return JumpTarget; }; + bool IsBranchToFarChunk(void) const; // instr jumps outside current chunk + bool MDIsNop(void) const; // instruction is simple or complex no-op + bool MDIsPushInstr(void) const; + bool MDIsPopInstr(void) const; + bool MDIsReturnInstr(void) const; + bool MDIsEnterInstr(void) const; + bool MDIsLeaveInstr(void) const; + bool MDIsStackPointerCopy(bool UseFP) const; // copies ESP or EBP to register + bool MDIsFrameAllocInstr(void) const; + bool MDIsFrameDeallocInstr(bool UseFP, asize_t LocSize) const; + bool MDUsesCalleeSavedReg(void) const; + // Printing methods + void PrintOperands(void) const; + char *DestString(int OptType); + // Analysis methods + void Analyze(void); // Fill in basic data for instruction. + void AnnotateStackConstants(bool UseFP, FILE *AnnotFile); + void EmitAnnotations(bool UseFP, bool AllocSeen, FILE *AnnotFile); +private: + // Data + insn_t SMPcmd; // copy of 'cmd' for this instruction + ulong features; // Canonical features for SMPcmd + char disasm[MAXSTR]; // Disassembly text of instruction + DefOrUseList Defs; // Definitions list + DefOrUseList Uses; // Uses list + SMPitype type; // Data flow analysis category + int OptType; // Optimization category (see OptCategory[]) + ea_t address; // Code address for 1st byte of instruction + bool analyzed; // Has instr been analyzed yet, setting type + // and DEF and USE lists? + bool JumpTarget; // Is Instr the target of any jumps or branches? + // Methods + void BuildSMPDefUseLists(void); // Build DEF and USE lists for instruction + void MDFixupDefUseLists(void); // Machine-dependent ad hoc fixes + void MDAddRegDef(ushort); // Add DEF of register if not already a DEF + void MDAddRegUse(ushort); // Add USE of register if not already a USE + void MDAnnotateSIBStackConstants(FILE *, op_t, ea_t, bool); // Handle x86 opcode SIB byte +}; // end class SMPInstr + +#endif