diff --git a/SMPDataFlowAnalysis.cpp b/SMPDataFlowAnalysis.cpp index 4546bde8da9d07dc7d49727b67d3b806d917a13a..2bde46b1421faf2034945646ed57eeb2fe1fbce3 100644 --- a/SMPDataFlowAnalysis.cpp +++ b/SMPDataFlowAnalysis.cpp @@ -7,8 +7,13 @@ #include <list> #include <set> +#include <vector> +#include <algorithm> + +#include <cstring> #include <pro.h> +#include <assert.h> #include <ida.hpp> #include <idp.hpp> #include <allins.hpp> @@ -31,7 +36,18 @@ #define SMP_DEBUG_XOR 0 #define SMP_DEBUG_CHUNKS 1 // tracking down tail chunks for functions #define SMP_DEBUG_FRAMEFIXUP 0 -#define SMP_DEBUG_DATAFLOW 1 +#define SMP_DEBUG_DATAFLOW 0 + +// Compute LVA/SSA or not? Turn it off for NICECAP demo on 31-JAN-2008 +#define SMP_COMPUTE_LVA_SSA 0 + +// Basic block number 0 is the top of the CFG lattice. +#define SMP_TOP_BLOCK 0 + +// Set SharedTailChunks to TRUE for entire printf family +// After we restructure the parent/tail structure of the database, this +// will go away. +#define KLUDGE_VFPRINTF_FAMILY 1 // Used for binary search by function number in SMPStaticAnalyzer.cpp // to trigger debugging output and find which instruction in which @@ -62,6 +78,60 @@ static char *OptExplanation[LAST_OPT_CATEGORY + 1] = "AlwaysNUM", "AlwaysNUM", "NUMViaFPRegDest" }; +// We need to make subword registers equal to their containing registers when we +// do comparisons, so that we will realize that register EAX is killed by a prior DEF +// of register AL, for example. However, we do not want AL and AH to be equal to each other. +#define FIRST_x86_SUBWORD_REG R_al +#define LAST_x86_SUBWORD_REG R_bh +bool MDLessReg(const ushort Reg1, const ushort Reg2) { + bool FirstSubword = ((Reg1 >= FIRST_x86_SUBWORD_REG) && (Reg1 <= LAST_x86_SUBWORD_REG)); + bool SecondSubword = ((Reg2 >= FIRST_x86_SUBWORD_REG) && (Reg2 <= LAST_x86_SUBWORD_REG)); + + // Only complexity comes when one is subword and the other is not. + if (FirstSubword == SecondSubword) + return (Reg1 < Reg2); // simple case + else { + if (FirstSubword) { + // See enumeration RegNo in intel.hpp. + if (((Reg1 < 20) && ((Reg1 - Reg2) == 16)) + || ((Reg1 >= 20) && ((Reg1 - Reg2) == 20))) + return false; // subword matches enclosing register + else + return (Reg1 < Reg2); + } + else { // must be SecondSubword + if (((Reg2 < 20) && ((Reg2 - Reg1) == 16)) + || ((Reg2 >= 20) && ((Reg2 - Reg1) == 20))) + return false; // subword matches enclosing register + else + return (Reg1 < Reg2); + } + } +} // end of MDLessReg() + +// In SSA computations, we are storing the GlobalNames index into the op_t fields +// n, offb, and offo. This function extracts an unsigned int from these three 8-bit +// fields. +unsigned int ExtractGlobalIndex(op_t GlobalOp) { + unsigned int index = (unsigned int) GlobalOp.offo; + index <<= 16; + index |= (((unsigned int) GlobalOp.offb) << 8); + index |= ((unsigned int) GlobalOp.n); + return index; +} + +// ***************************************************************** +// Class DefOrUse +// ***************************************************************** + +// Constructor. +DefOrUse::DefOrUse(op_t Ref, SMPOperandType Type, int SSASub) { + this->Operand = Ref; + this->OpType = Type; + this->SSANumber = SSASub; + return; +} + // ***************************************************************** // Class DefOrUseList // ***************************************************************** @@ -72,21 +142,34 @@ DefOrUseList::DefOrUseList(void) { } // Set a Def or Use into the list, along with its type. -void DefOrUseList::SetRef(op_t Ref, SMPOperandType Type) { - this->Refs.push_back(Ref); - this->Types.push_back(Type); +void DefOrUseList::SetRef(op_t Ref, SMPOperandType Type, int SSASub) { + DefOrUse CurrRef(Ref, Type, SSASub); + this->Refs.push_back(CurrRef); return; } // Get a reference by index. -op_t DefOrUseList::GetRef(size_t index) const { +DefOrUse DefOrUseList::GetRef(size_t index) const { return Refs[index]; } -SMPOperandType DefOrUseList::GetRefType(size_t index) const { - return Types[index]; +// ***************************************************************** +// Class SMPPhiFunction +// ***************************************************************** + +// Constructor +SMPPhiFunction::SMPPhiFunction(int GlobIndex) { + this->index = GlobIndex; + return; } +// Add a phi item to the list +void SMPPhiFunction::PushBack(DefOrUse Ref) { + this->SubscriptedOps.SetRef(Ref.GetOp(), Ref.GetType(), Ref.GetSSANum()); + return; +} + + // ***************************************************************** // Class SMPInstr // ***************************************************************** @@ -109,9 +192,8 @@ bool SMPInstr::IsBasicBlockTerminator() const { bool SMPInstr::HasDestMemoryOperand(void) const { bool MemDest = false; for (size_t index = 0; index < Defs.GetSize(); ++index) { - MemDest = ((Defs.GetRef(index).type == o_mem) - || (Defs.GetRef(index).type == o_phrase) - || (Defs.GetRef(index).type == o_displ)); + optype_t CurrType = Defs.GetRef(index).GetOp().type; + MemDest = ((CurrType == o_mem) || (CurrType == o_phrase) || (CurrType == o_displ)); if (MemDest) break; } @@ -122,9 +204,8 @@ bool SMPInstr::HasDestMemoryOperand(void) const { bool SMPInstr::HasSourceMemoryOperand(void) const { bool MemSrc = false; for (size_t index = 0; index < Uses.GetSize(); ++index) { - MemSrc = ((Uses.GetRef(index).type == o_mem) - || (Uses.GetRef(index).type == o_phrase) - || (Uses.GetRef(index).type == o_displ)); + optype_t CurrType = Uses.GetRef(index).GetOp().type; + MemSrc = ((CurrType == o_mem) || (CurrType == o_phrase) || (CurrType == o_displ)); if (MemSrc) break; } @@ -167,6 +248,10 @@ bool SMPInstr::IsSecondSrcOperandNumeric(flags_t F) const { void PrintDefUse(ulong feature, int OpNum) { // CF_ macros number the operands from 1 to 6, while OpNum // is a 0 to 5 index into the insn_t.Operands[] array. + // OpNum == -1 is a signal that this is a DEF or USE or VarKillSet etc. + // operand and not an instruction operand. + if (-1 == OpNum) + return; switch (OpNum) { case 0: if (feature & CF_CHG1) @@ -227,73 +312,79 @@ void PrintSIB(op_t Opnd) { msg(" Base %s Index %s Scale %d", BaseName, IndexName, ScaleFactor); } // end PrintSIB() -// DEBUG print operands for Inst. -void SMPInstr::PrintOperands() const { - op_t Opnd; - for (int i = 0; i < UA_MAXOP; ++i) { - Opnd = SMPcmd.Operands[i]; - if (Opnd.type == o_void) - continue; - else if (Opnd.type == o_mem) { - msg(" Operand %d : memory : addr: %x", i, Opnd.addr); - PrintDefUse(features, i); - if (Opnd.hasSIB) { // has SIB info -- is this possible for o_mem? - msg(" Found SIB byte for o_mem operand "); - PrintSIB(Opnd); - } - } - else if (Opnd.type == o_phrase) { - msg(" Operand %d : memory phrase :", i); - PrintDefUse(features, i); - if (Opnd.hasSIB) { // has SIB info - PrintSIB(Opnd); - } - else { // no SIB info - ushort BaseReg = Opnd.phrase; - msg(" reg %s", RegNames[BaseReg]); - } - if (Opnd.addr != 0) { - msg(" \n WARNING: addr for o_phrase type: %d\n", Opnd.addr); - } - } - else if (Opnd.type == o_displ) { - ea_t offset = Opnd.addr; - PrintDefUse(features, i); - if (Opnd.hasSIB) { - PrintSIB(Opnd); - msg(" displ %d", offset); - } - else { - ushort BaseReg = Opnd.reg; - msg(" Operand %d : memory displ : reg %s displ %d", i, - RegNames[BaseReg], offset); - } +// Debug: print one operand from an instruction or DEF or USE list. +void PrintOneOperand(op_t Opnd, ulong features, int OpNum) { + if (Opnd.type == o_void) + return; + else if (Opnd.type == o_mem) { + msg(" Operand %d : memory : addr: %x", OpNum, Opnd.addr); + PrintDefUse(features, OpNum); + if (Opnd.hasSIB) { // has SIB info -- is this possible for o_mem? + msg(" Found SIB byte for o_mem operand "); + PrintSIB(Opnd); } - else if (Opnd.type == o_reg) { - msg(" Operand %d : register", i); - msg(" regno: %d", Opnd.reg); - PrintDefUse(features, i); + } + else if (Opnd.type == o_phrase) { + msg(" Operand %d : memory phrase :", OpNum); + PrintDefUse(features, OpNum); + if (Opnd.hasSIB) { // has SIB info + PrintSIB(Opnd); } - else if (Opnd.type == o_imm) { - msg(" Operand %d : immed", i); - PrintDefUse(features, i); + else { // no SIB info + ushort BaseReg = Opnd.phrase; + msg(" reg %s", RegNames[BaseReg]); } - else if (Opnd.type == o_far) { - msg(" Operand %d : FarPtrImmed", i); - msg(" addr: %x", Opnd.addr); - PrintDefUse(features, i); + if (Opnd.addr != 0) { + msg(" \n WARNING: addr for o_phrase type: %d\n", Opnd.addr); } - else if (Opnd.type == o_near) { - msg(" Operand %d : NearPtrImmed", i); - msg(" addr: %x", Opnd.addr); - PrintDefUse(features, i); + } + else if (Opnd.type == o_displ) { + msg(" Operand %d : memory displ :", OpNum); + ea_t offset = Opnd.addr; + PrintDefUse(features, OpNum); + if (Opnd.hasSIB) { + PrintSIB(Opnd); + msg(" displ %d", offset); } else { - msg(" Operand %d : unknown", i); - PrintDefUse(features, i); + ushort BaseReg = Opnd.reg; + msg(" reg %s displ %d", RegNames[BaseReg], offset); } - if (!(Opnd.showed())) - msg(" HIDDEN "); + } + else if (Opnd.type == o_reg) { + msg(" Operand %d : register", OpNum); + msg(" regno: %d", Opnd.reg); + PrintDefUse(features, OpNum); + } + else if (Opnd.type == o_imm) { + msg(" Operand %d : immed", OpNum); + PrintDefUse(features, OpNum); + } + else if (Opnd.type == o_far) { + msg(" Operand %d : FarPtrImmed", OpNum); + msg(" addr: %x", Opnd.addr); + PrintDefUse(features, OpNum); + } + else if (Opnd.type == o_near) { + msg(" Operand %d : NearPtrImmed", OpNum); + msg(" addr: %x", Opnd.addr); + PrintDefUse(features, OpNum); + } + else { + msg(" Operand %d : unknown", OpNum); + PrintDefUse(features, OpNum); + } + if (!(Opnd.showed())) + msg(" HIDDEN "); + return; +} // end of PrintOneOperand() + +// DEBUG print operands for Inst. +void SMPInstr::PrintOperands(void) const { + op_t Opnd; + for (int i = 0; i < UA_MAXOP; ++i) { + Opnd = SMPcmd.Operands[i]; + PrintOneOperand(Opnd, this->features, i); } msg(" \n"); return; @@ -305,7 +396,7 @@ char * SMPInstr::DestString(int OptType) { static char DestList[MAXSTR] = { '\0', '\0' }; int RegDestCount = 0; for (size_t DefIndex = 0; DefIndex < this->NumDefs(); ++DefIndex) { - op_t DefOpnd = this->GetDef(DefIndex); + op_t DefOpnd = this->GetDef(DefIndex).GetOp(); if (o_reg == DefOpnd.type) { ushort DestReg = DefOpnd.reg; if (0 == RegDestCount) { @@ -358,7 +449,7 @@ bool SMPInstr::MDIsFrameAllocInstr(void) const { // The frame allocating instruction should look like: // sub esp,48 or add esp,-64 etc. if ((SMPcmd.itype == NN_sub) || (SMPcmd.itype == NN_add)) { - if (Defs.GetRef(0).is_reg(R_sp)) { + if (Defs.GetRef(0).GetOp().is_reg(R_sp)) { // We know that an addition or subtraction is being // performed on the stack pointer. This should not be // possible within the prologue except at the stack @@ -374,14 +465,14 @@ bool SMPInstr::MDIsFrameAllocInstr(void) const { // a positive immediate value. We will throw in, free of // charge, a subtraction of a register, which is how alloca() // usually allocates stack space. - if (o_imm == Uses.GetRef(0).type) { - signed long TempImm = (signed long) Uses.GetRef(0).value; + if (o_imm == Uses.GetRef(0).GetOp().type) { + signed long TempImm = (signed long) Uses.GetRef(0).GetOp().value; if (((0 > TempImm) && (SMPcmd.itype == NN_add)) || ((0 < TempImm) && (SMPcmd.itype == NN_sub))) { return true; } } - else if ((o_reg == Uses.GetRef(0).type) + else if ((o_reg == Uses.GetRef(0).GetOp().type) && (SMPcmd.itype == NN_sub)) { // alloca() ? return true; } @@ -405,16 +496,16 @@ bool SMPInstr::MDIsFrameDeallocInstr(bool UseFP, asize_t LocalVarsSize) const { // LocalVarsSize for this second form, and that UseFP be true // for the first form. if (UseFP && (this->SMPcmd.itype == NN_mov) - && (this->Defs.GetRef(0).is_reg(R_sp)) - && (this->Uses.GetRef(0).is_reg(R_bp))) + && (this->Defs.GetRef(0).GetOp().is_reg(R_sp)) + && (this->Uses.GetRef(0).GetOp().is_reg(R_bp))) return true; else if ((this->SMPcmd.itype == NN_add) - && (this->Defs.GetRef(0).is_reg(R_sp)) - && (this->Uses.GetRef(1).is_imm((uval_t) LocalVarsSize))) + && (this->Defs.GetRef(0).GetOp().is_reg(R_sp)) + && (this->Uses.GetRef(1).GetOp().is_imm((uval_t) LocalVarsSize))) return true; else if ((this->SMPcmd.itype == NN_add) - && (this->Defs.GetRef(0).is_reg(R_sp)) - && (this->Uses.GetRef(1).type == o_imm)) { + && (this->Defs.GetRef(0).GetOp().is_reg(R_sp)) + && (this->Uses.GetRef(1).GetOp().type == o_imm)) { msg("Used imprecise LocalVarsSize to find dealloc instr.\n"); return true; } @@ -424,6 +515,36 @@ bool SMPInstr::MDIsFrameDeallocInstr(bool UseFP, asize_t LocalVarsSize) const { return false; } // end of SMPInstr::MDIsFrameDeallocInstr() +// Is instruction a no-op? There are 1-byte, 2-byte, etc versions of no-ops. +bool SMPInstr::MDIsNop(void) const { + bool IsNop = false; + ushort opcode = this->SMPcmd.itype; + if (NN_nop == opcode) + IsNop = true; + else if (NN_mov == opcode) { + if ((o_reg == this->SMPcmd.Operands[0].type) + && this->SMPcmd.Operands[1].is_reg(this->SMPcmd.Operands[0].reg)) { + // We have a register to register move with source == destination. + IsNop = true; + } + } + else if (NN_lea == opcode) { + if ((o_reg == this->SMPcmd.Operands[0].type) + && (o_displ == this->SMPcmd.Operands[1].type)) { + // We are looking for 6-byte no-ops like lea esi,[esi+0] + ushort destreg = this->SMPcmd.Operands[0].reg; + if ((this->SMPcmd.Operands[1].hasSIB) + && (destreg == (ushort) sib_base(this->SMPcmd.Operands[1]))) { + IsNop = true; + } + else if (destreg == this->SMPcmd.Operands[1].reg) { + IsNop = true; + } + } + } + return IsNop; +} // end of SMPInstr::MDIsNop() + // MACHINE DEPENDENT: Is instruction a return instruction? bool SMPInstr::MDIsReturnInstr(void) const { return ((SMPcmd.itype == NN_retn) || (SMPcmd.itype == NN_retf)); @@ -445,10 +566,26 @@ bool SMPInstr::MDIsPushInstr(void) const { && (SMPcmd.itype <= LAST_PUSH_INST)); } +// MACHINE DEPENDENT: Is instruction an ENTER instruction? +#define FIRST_ENTER_INST NN_enterw +#define LAST_ENTER_INST NN_enterq +bool SMPInstr::MDIsEnterInstr(void) const { + return ((SMPcmd.itype >= FIRST_ENTER_INST) + && (SMPcmd.itype <= LAST_ENTER_INST)); +} + +// MACHINE DEPENDENT: Is instruction a LEAVE instruction? +#define FIRST_LEAVE_INST NN_leavew +#define LAST_LEAVE_INST NN_leaveq +bool SMPInstr::MDIsLeaveInstr(void) const { + return ((SMPcmd.itype >= FIRST_LEAVE_INST) + && (SMPcmd.itype <= LAST_LEAVE_INST)); +} + // MACHINE DEPENDENT: Does instruction use a callee-saved register? bool SMPInstr::MDUsesCalleeSavedReg(void) const { for (size_t index = 0; index < this->Uses.GetSize(); ++index) { - op_t CurrUse = this->GetUse(index); + op_t CurrUse = this->GetUse(index).GetOp(); if (CurrUse.is_reg(R_bp) || CurrUse.is_reg(R_si) || CurrUse.is_reg(R_di) || CurrUse.is_reg(R_bx)) { return true; @@ -465,7 +602,7 @@ bool SMPInstr::IsBranchToFarChunk(void) const { if ((JUMP | COND_BRANCH) & this->GetDataFlowType()) { // Instruction is a direct branch, conditional or unconditional if (this->NumUses() > 0) { - op_t JumpTarget = this->GetUse(0); + op_t JumpTarget = this->GetUse(0).GetOp(); if ((o_near == JumpTarget.type) || (o_far == JumpTarget.type)) { // Branches to a code address func_t *TargetChunk = get_fchunk(JumpTarget.addr); @@ -554,7 +691,7 @@ void SMPInstr::BuildSMPDefUseLists(void) { void SMPInstr::MDAddRegDef(ushort DefReg) { bool AlreadySet = false; for (size_t DefIndex = 0; DefIndex < this->NumDefs(); ++DefIndex) { - if (this->GetDef(DefIndex).is_reg(DefReg)) { + if (this->GetDef(DefIndex).GetOp().is_reg(DefReg)) { AlreadySet = true; break; } @@ -572,7 +709,7 @@ void SMPInstr::MDAddRegDef(ushort DefReg) { void SMPInstr::MDAddRegUse(ushort UseReg) { bool AlreadyUsed = false; for (size_t UseIndex = 0; UseIndex < this->NumUses(); ++UseIndex) { - if (this->GetUse(UseIndex).is_reg(UseReg)) { + if (this->GetUse(UseIndex).GetOp().is_reg(UseReg)) { AlreadyUsed = true; break; } @@ -633,11 +770,18 @@ void SMPInstr::MDFixupDefUseLists(void) { // sound by declaring that EAX is always a DEF. this->MDAddRegDef(R_ax); } // end if NN_cmpxchg - else if (this->MDIsPopInstr() || this->MDIsPushInstr()) { + else if (this->MDIsPopInstr() || this->MDIsPushInstr() || this->MDIsReturnInstr()) { // IDA does not include the stack pointer in the DEFs or USEs. this->MDAddRegDef(R_sp); this->MDAddRegUse(R_sp); } + else if (this->MDIsEnterInstr() || this->MDIsLeaveInstr()) { + // Entire function prologue or epilogue microcoded. + this->MDAddRegDef(R_sp); + this->MDAddRegUse(R_sp); + this->MDAddRegDef(R_bp); + this->MDAddRegUse(R_bp); + } else if (8 == this->GetOptType()) { // This category implicitly writes to EDX:EAX. this->MDAddRegDef(R_dx); @@ -656,7 +800,7 @@ void SMPInstr::MDFixupDefUseLists(void) { // EBX*EDX gets truncated and the result placed in EBX (no hidden operands). bool HiddenEAXUse = false; for (size_t UseIndex = 0; UseIndex < this->NumUses(); ++UseIndex) { - op_t TempUse = this->GetUse(UseIndex); + op_t TempUse = this->GetUse(UseIndex).GetOp(); if (!TempUse.showed()) { // hidden operand if (TempUse.is_reg(R_ax)) { // not R_al, so it is not 8 bits this->MDAddRegUse(R_dx); @@ -900,11 +1044,15 @@ void SMPInstr::EmitAnnotations(bool UseFP, bool AllocSeen, FILE *AnnotFile) { // Class SMPBasicBlock // ***************************************************************** +#define SMP_BLOCKNUM_UNINIT (-1) + // Constructor SMPBasicBlock::SMPBasicBlock(list<SMPInstr>::iterator First, list<SMPInstr>::iterator Last) { this->IndirectJump = false; this->Returns = false; this->SharedTailChunk = false; + this->BlockNum = SMP_BLOCKNUM_UNINIT; + this->FirstAddr = First->GetAddr(); list<SMPInstr>::iterator CurrInst = First; while (CurrInst != Last) { this->Instrs.push_back(CurrInst); @@ -913,6 +1061,19 @@ SMPBasicBlock::SMPBasicBlock(list<SMPInstr>::iterator First, list<SMPInstr>::ite this->Instrs.push_back(CurrInst); // Add last instruction } +// Get address of first instruction in the block. +ea_t SMPBasicBlock::GetFirstAddr(void) const { + return this->FirstAddr; +} + +// Equality operator for SMPBasicBlock. Key field is address of first instruction. +int SMPBasicBlock::operator==(const SMPBasicBlock &rhs) const { + if (rhs.GetFirstAddr() != this->FirstAddr) + return 0; + else + return 1; +} + // Link to predecessor block. void SMPBasicBlock::LinkToPred(list<SMPBasicBlock>::iterator Predecessor) { this->Predecessors.push_back(Predecessor); @@ -925,6 +1086,34 @@ void SMPBasicBlock::LinkToSucc(list<SMPBasicBlock>::iterator Successor) { return; } +// See if all predecessors have set their ordering number. +bool SMPBasicBlock::AllPredecessorsNumbered(void) { + list<list<SMPBasicBlock>::iterator>::iterator CurrPred; + for (CurrPred = this->Predecessors.begin(); CurrPred != this->Predecessors.end(); ++CurrPred) { + // Don't count current block, in case we have a one-block loop with this block + // as its own predecessor. + if (**CurrPred == *this) + continue; + if ((*CurrPred)->GetNumber() == SMP_BLOCKNUM_UNINIT) + return false; + } + return true; +} // end of SMPBasicBlock::AllPredecessorsNumbered() + +// Are all instructions in the block no-ops? +bool SMPBasicBlock::AllNops(void) { + size_t NopCount = 0; + size_t GoodCount = 0; // non-nop instructions + list<list<SMPInstr>::iterator>::iterator CurrInst; + for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) { + if ((*CurrInst)->MDIsNop()) + ++NopCount; + else + ++GoodCount; + } + return ((0 == GoodCount) && (0 < NopCount)); +} // end of SMPBasicBlock::AllNops() + // Analyze basic block and fill data members. void SMPBasicBlock::Analyze() { if (Instrs.back()->GetDataFlowType() == INDIR_JUMP) { @@ -935,6 +1124,86 @@ void SMPBasicBlock::Analyze() { } } // end of SMPBasicBlock::Analyze() +// DEBUG dump of block +void SMPBasicBlock::Dump(void) { + msg("Dump of basic block %d\n", this->BlockNum); + // Dump dataflow analysis sets and links before dumping instructions. + list<list<SMPBasicBlock>::iterator>::iterator CurrLink; + msg("Predecessors: "); + for (CurrLink = this->Predecessors.begin(); CurrLink != this->Predecessors.end(); ++CurrLink) { + msg("%d ", (*CurrLink)->GetNumber()); + } + msg("\n"); + msg("Successors: "); + for (CurrLink = this->Successors.begin(); CurrLink != this->Successors.end(); ++CurrLink) { + msg("%d ", (*CurrLink)->GetNumber()); + } + msg("\n"); + set<op_t, LessOp>::iterator SetItem; + msg("VarKill set: "); + for (SetItem = this->KillSet.begin(); SetItem != this->KillSet.end(); ++SetItem) { + PrintOneOperand(*SetItem, 0, -1); + } + msg("\n"); + msg("UpExposed set: "); + for (SetItem = this->UpExposedSet.begin(); SetItem != this->UpExposedSet.end(); ++SetItem) { + PrintOneOperand(*SetItem, 0, -1); + } + msg("\n"); + msg("LiveIn set: "); + for (SetItem = this->LiveInSet.begin(); SetItem != this->LiveInSet.end(); ++SetItem) { + PrintOneOperand(*SetItem, 0, -1); + } + msg("\n"); + msg("LiveOut set: "); + for (SetItem = this->LiveOutSet.begin(); SetItem != this->LiveOutSet.end(); ++SetItem) { + PrintOneOperand(*SetItem, 0, -1); + } + msg("\n"); + msg("Dominance frontier: "); + set<int>::iterator DomIter; + for (DomIter = this->DomFrontier.begin(); DomIter != this->DomFrontier.end(); ++DomIter) { + msg("%d ", *DomIter); + } + msg("\n"); + set<SMPPhiFunction, LessPhi>::iterator PhiIter; + for (PhiIter = this->PhiFunctions.begin(); PhiIter != this->PhiFunctions.end(); ++PhiIter) { + msg("Phi function for %d : ", PhiIter->GetIndex()); +#if 0 // cannot make this compile on linux/g++ + // Dump out all phi operands + vector<DefOrUse>::iterator DefIter; + for (DefIter = PhiIter->GetFirstOp(); DefIter != PhiIter->GetLastOp(); ++DefIter) { + PrintOneOperand(DefIter->GetOp(), 0, -1); + msg(" SSAnum %d ", DefIter->GetSSANum()); + } +#else // see if the compiler likes it this way! + for (size_t i = 0; i < PhiIter->GetPhiListSize(); ++i) { + DefOrUse PhiRef = PhiIter->GetPhiRef(i); + PrintOneOperand(PhiRef.GetOp(), 0, -1); + msg(" SSAnum %d ", PhiRef.GetSSANum()); + } +#endif + msg("\n"); + } + + if (this->IndirectJump) + msg("Has indirect jump. "); + if (this->Returns) + msg("Has return. "); + if (this->SharedTailChunk) + msg("Is shared tail chunk block. "); + msg("\n"); + + // Now, dump all the instructions. + list<list<SMPInstr>::iterator>::iterator CurrInst; + for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) { + msg("%x : %s\n", (*CurrInst)->GetAddr(), (*CurrInst)->GetDisasm()); + (*CurrInst)->PrintOperands(); + } + msg("\n"); + return; +} // end of SMPBasicBlock::Dump() + // Return true if anything already in the KillSet would kill the operand value. bool SMPBasicBlock::MDAlreadyKilled(op_t Opnd1) const { // We have assembly language operands that can be complex, such as @@ -989,8 +1258,9 @@ bool SMPBasicBlock::MDAlreadyKilled(op_t Opnd1) const { return true; } // end if SIB ... else ... } // end if (FoundInKillSet) ... else ... + break; default: - msg("Unknown operand type in AlreadyKilled.\n"); + msg("Unknown operand type %d in MDAlreadyKilled, block %d\n", Opnd1.type, this->BlockNum); } // end of switch on Opnd1.type return false; @@ -1008,15 +1278,20 @@ void SMPBasicBlock::InitKilledExposed(void) { // in the current instruction, then it is upwardly exposed. size_t limit = CurrInst->NumUses(); for (size_t index = 0; index < limit; ++index) { - if (this->MDAlreadyKilled(CurrInst->GetUse(index))) - this->UpExposedSet.insert(CurrInst->GetUse(index)); + op_t UseOp = CurrInst->GetUse(index).GetOp(); + // Only add non-immediate operands that are not already killed in this block. + // o_near and o_far operands are code addresses in immediate form, e.g. + // call _printf might be call 0x8048040, with o_near = 0x8048040. + if ((!(this->MDAlreadyKilled(UseOp))) + && (UseOp.type != o_imm) && (UseOp.type != o_near) && (UseOp.type != o_far)) + this->UpExposedSet.insert(CurrInst->GetUse(index).GetOp()); } // Dataflow equation for killed variables: If a variable is defined in any // instruction in the block, it is killed by this block (i.e. prior definitions // of that variable will not make it through the block). limit = CurrInst->NumDefs(); for (size_t index = 0; index < limit; ++index) { - this->KillSet.insert(CurrInst->GetDef(index)); + this->KillSet.insert(CurrInst->GetDef(index).GetOp()); } } // end for all instrs in block this->IsLiveInStale = true; // Would need to compute LiveInSet for first time @@ -1032,9 +1307,11 @@ set<op_t, LessOp>::iterator SMPBasicBlock::GetFirstLiveIn(void) { // the block unchanged (i.e. it is not killed and is live out). this->LiveInSet.clear(); set<op_t, LessOp>::iterator OutIter; - this->LiveInSet.insert(this->UpExposedSet.begin(), this->UpExposedSet.end()); + for (OutIter = this->UpExposedSet.begin(); OutIter != this->UpExposedSet.end(); ++OutIter) { + this->LiveInSet.insert(*OutIter); + } for (OutIter = this->LiveOutSet.begin(); OutIter != this->LiveOutSet.end(); ++OutIter) { - if (KillSet.end() != this->KillSet.find(*OutIter)) // Found live out but not killed + if (KillSet.end() == this->KillSet.find(*OutIter)) // Found live out but not killed this->LiveInSet.insert(*OutIter); } this->IsLiveInStale = false; @@ -1048,6 +1325,56 @@ set<op_t, LessOp>::iterator SMPBasicBlock::GetLastLiveIn(void) { return this->LiveInSet.end(); } +// Get iterator for the start of the LiveOut set. +set<op_t, LessOp>::iterator SMPBasicBlock::GetFirstLiveOut(void) { + return this->LiveOutSet.begin(); +} + +// Get termination iterator marker for the LiveOut set. +set<op_t, LessOp>::iterator SMPBasicBlock::GetLastLiveOut(void) { + return this->LiveOutSet.end(); +} + +// Get iterator for the start of the VarKill set. +set<op_t, LessOp>::iterator SMPBasicBlock::GetFirstVarKill(void) { + return this->KillSet.begin(); +} + +// Get termination iterator marker for the VarKill set. +set<op_t, LessOp>::iterator SMPBasicBlock::GetLastVarKill(void) { + return this->KillSet.end(); +} + +// Get iterator for the start of the UpExposed set. +set<op_t, LessOp>::iterator SMPBasicBlock::GetFirstUpExposed(void) { + return this->UpExposedSet.begin(); +} + +// Get termination iterator marker for the UpExposed set. +set<op_t, LessOp>::iterator SMPBasicBlock::GetLastUpExposed(void) { + return this->UpExposedSet.end(); +} + +// Get iterator for the start of the DomFrontier set. +set<int>::iterator SMPBasicBlock::GetFirstDomFrontier(void) { + return this->DomFrontier.begin(); +} + +// Get termination iterator marker for the DomFrontier set. +set<int>::iterator SMPBasicBlock::GetLastDomFrontier(void) { + return this->DomFrontier.end(); +} + +// Get iterator for first Phi function. +set<SMPPhiFunction, LessPhi>::iterator SMPBasicBlock::GetFirstPhi(void) { + return this->PhiFunctions.begin(); +} + +// Get termination iterator marker for Phi functions set. +set<SMPPhiFunction, LessPhi>::iterator SMPBasicBlock::GetLastPhi(void) { + return this->PhiFunctions.end(); +} + // Update the LiveOut set for the block. // Return true if it changed, false otherwise. bool SMPBasicBlock::UpdateLiveOut(void) { @@ -1090,13 +1417,31 @@ bool SMPBasicBlock::UpdateLiveOut(void) { return changed; } // end of SMPBasicBlock::UpdateLiveOut() +// Insert RPO number block into the dominance frontier set. +void SMPBasicBlock::AddToDomFrontier(int block) { + this->DomFrontier.insert(block); + return; +} // end of SMPBasicBlock::AddToDomFrontier() + +// Add a phi function to the list of phi functions entering this block. +// If phi function for this global name already existed in the block, +// return false because no new phi function was added; else return true. +bool SMPBasicBlock::AddPhi(SMPPhiFunction NewPhi) { + if (this->PhiFunctions.end() == this->PhiFunctions.find(NewPhi)) { + this->PhiFunctions.insert(NewPhi); + return true; + } + else + return false; +} // end of SMPBasicBlock::AddPhi() + // ***************************************************************** // Class SMPFunction // ***************************************************************** // Constructor SMPFunction::SMPFunction(func_t *Info) { - this->FuncInfo = Info; + this->FuncInfo = *Info; this->IndirectCalls = false; this->SharedChunks = false; return; @@ -1123,13 +1468,13 @@ void SMPFunction::SetStackFrameInfo(void) { // The sizes of the three regions of the stack frame other than the // return address are stored in the function structure. - this->LocalVarsSize = this->FuncInfo->frsize; - this->CalleeSavedRegsSize = this->FuncInfo->frregs; - this->IncomingArgsSize = this->FuncInfo->argsize; + this->LocalVarsSize = this->FuncInfo.frsize; + this->CalleeSavedRegsSize = this->FuncInfo.frregs; + this->IncomingArgsSize = this->FuncInfo.argsize; // The return address size can be obtained in a machine independent // way by calling get_frame_retsize(). - this->RetAddrSize = get_frame_retsize(this->FuncInfo); + this->RetAddrSize = get_frame_retsize(&(this->FuncInfo)); // IDA Pro has trouble with functions that do not have any local // variables. Unfortunately, the C library has plenty of these @@ -1201,7 +1546,7 @@ void SMPFunction::SetStackFrameInfo(void) { // Emit diagnostic and use the first instruction in the // function as a pseudo-allocation instruction to emit // some stack frame info (return address, etc.) - this->LocalVarsAllocInstr = this->FindAllocPoint(this->FuncInfo->frsize); + this->LocalVarsAllocInstr = this->FindAllocPoint(this->FuncInfo.frsize); #if SMP_DEBUG_FRAMEFIXUP if (BADADDR == this->LocalVarsAllocInstr) { msg("ERROR: Could not find stack frame allocation in %s\n", @@ -1238,7 +1583,7 @@ void SMPFunction::SetStackFrameInfo(void) { // THE ASSUMPTION THAT WE HAVE ONLY PUSH INSTRUCTIONS BEFORE // THE ALLOCATING INSTR IS ONLY TRUE WHEN LOCALVARSSIZE == 0; else { - ea_t SaveAddr = FuncInfo->startEA; + ea_t SaveAddr = this->FuncInfo.startEA; for (list<SMPInstr>::iterator CurrInstr = this->Instrs.begin(); CurrInstr != this->Instrs.end(); ++CurrInstr) { @@ -1266,7 +1611,7 @@ void SMPFunction::SetStackFrameInfo(void) { this->LocalVarsDeallocInstr = NULL; } else { - SaveAddr = FuncInfo->endEA - 1; + SaveAddr = this->FuncInfo.endEA - 1; bool FoundRet = false; do { ea_t addr = get_item_head(SaveAddr); @@ -1405,9 +1750,9 @@ bool SMPFunction::MDFixFrameInfo(void) { // Get the size being allocated. for (size_t index = 0; index < CurrInstr->NumUses(); ++index) { // Find the immediate operand. - if (o_imm == CurrInstr->GetUse(index).type) { + if (o_imm == CurrInstr->GetUse(index).GetOp().type) { // Get its value into LocalVarsSize. - long AllocValue = (signed long) CurrInstr->GetUse(index).value; + long AllocValue = (signed long) CurrInstr->GetUse(index).GetOp().value; // One compiler might have sub esp,24 and another // might have add esp,-24. Take the absolute value. if (0 > AllocValue) @@ -1481,7 +1826,7 @@ ea_t SMPFunction::FindAllocPoint(asize_t OriginalLocSize) { msg("strpbrk OriginalLocSize: %d\n", OriginalLocSize); #endif - if (this->FuncInfo->analyzed_sp()) { + if (this->FuncInfo.analyzed_sp()) { // Limit our analysis to the first basic block in the function. list<SMPInstr>::iterator TempIter = *(--(this->Blocks.front().GetLastInstr())); ea_t AddrLimit = TempIter->GetAddr(); @@ -1491,7 +1836,7 @@ ea_t SMPFunction::FindAllocPoint(asize_t OriginalLocSize) { list<SMPInstr>::iterator CurrInstr = *CurrIter; ea_t addr = CurrInstr->GetAddr(); // get_spd() returns a cumulative delta of ESP - sval_t sp_delta = get_spd(this->FuncInfo, addr); + sval_t sp_delta = get_spd(&(this->FuncInfo), addr); #if SMP_DEBUG_FRAMEFIXUP if (DebugFlag) msg("strpbrk delta: %d at %x\n", sp_delta, addr); @@ -1513,7 +1858,7 @@ ea_t SMPFunction::FindAllocPoint(asize_t OriginalLocSize) { // We can compute the delta AFTER the last instruction using get_spd+get_sp_delta. list<SMPInstr>::iterator FinalInstr = *(--(this->Blocks.front().GetLastInstr())); ea_t FinalAddr = FinalInstr->GetAddr(); - sval_t FinalDelta = get_spd(this->FuncInfo, FinalAddr); + sval_t FinalDelta = get_spd(&(this->FuncInfo), FinalAddr); if (!FinalInstr->IsBasicBlockTerminator()) { // Special case. The basic block does not terminate with a branch or // return, but falls through to the start of a loop, most likely. @@ -1522,13 +1867,13 @@ ea_t SMPFunction::FindAllocPoint(asize_t OriginalLocSize) { // the first instruction in the second block. We can examine the // effect on the stack pointer of this last instruction to see if it // causes the SP delta to hit the OriginalLocSize. - sval_t LastInstrDelta = get_sp_delta(this->FuncInfo, FinalAddr); + sval_t LastInstrDelta = get_sp_delta(&(this->FuncInfo), FinalAddr); if (TargetSize == (FinalDelta + LastInstrDelta)) { // Return very last instruction (don't back up 1 here) return FinalAddr; } } - } // end if (this->FuncInfo->analyzed_sp()) + } // end if (this->FuncInfo.analyzed_sp()) #if SMP_DEBUG_FRAMEFIXUP else { msg("analyzed_sp() is false for %s\n", this->GetFuncName()); @@ -1555,7 +1900,7 @@ bool SMPFunction::MDFixUseFP(void) { while (addr < this->LocalVarsAllocInstr) { size_t DefIndex = 0; while (DefIndex < CurrInstr->NumDefs()) { - if (CurrInstr->GetDef(DefIndex).is_reg(R_bp)) + if (CurrInstr->GetDef(DefIndex).GetOp().is_reg(R_bp)) return false; // EBP got set before locals were allocated ++DefIndex; } @@ -1604,18 +1949,19 @@ void SMPFunction::Analyze(void) { #endif // Get some basic info from the FuncInfo structure. - this->Size = this->FuncInfo->endEA - this->FuncInfo->startEA; - this->UseFP = (0 != (this->FuncInfo->flags & (FUNC_FRAME | FUNC_BOTTOMBP))); - this->StaticFunc = (0 != (this->FuncInfo->flags & FUNC_STATIC)); - get_func_name(this->FuncInfo->startEA, this->FuncName, + this->Size = this->FuncInfo.endEA - this->FuncInfo.startEA; + this->UseFP = (0 != (this->FuncInfo.flags & (FUNC_FRAME | FUNC_BOTTOMBP))); + this->StaticFunc = (0 != (this->FuncInfo.flags & FUNC_STATIC)); + get_func_name(this->FuncInfo.startEA, this->FuncName, sizeof(this->FuncName) - 1); + this->BlockCount = 0; #if SMP_DEBUG_CONTROLFLOW msg("SMPFunction::Analyze: got basic info.\n"); #endif // Cycle through all chunks that belong to the function. - func_tail_iterator_t FuncTail(this->FuncInfo); + func_tail_iterator_t FuncTail(&(this->FuncInfo)); size_t ChunkCounter = 0; for (bool ChunkOK = FuncTail.main(); ChunkOK; ChunkOK = FuncTail.next()) { const area_t &CurrChunk = FuncTail.chunk(); @@ -1669,6 +2015,7 @@ void SMPFunction::Analyze(void) { FirstInBlock = this->Instrs.end(); LastInBlock = this->Instrs.end(); this->Blocks.push_back(CurrBlock); + this->BlockCount += 1; } #if SMP_DEBUG_CONTROLFLOW @@ -1699,6 +2046,7 @@ void SMPFunction::Analyze(void) { FirstInBlock = this->Instrs.end(); LastInBlock = this->Instrs.end(); this->Blocks.push_back(CurrBlock); + this->BlockCount += 1; // Is the instruction a branch to a target outside the function? If // so, this function has shared tail chunks. @@ -1707,7 +2055,7 @@ void SMPFunction::Analyze(void) { } } } // end if (isHead(InstrFlags) && isCode(InstrFlags) - } // end for (ea_t addr = FuncInfo->startEA; ... ) + } // end for (ea_t addr = FuncInfo.startEA; ... ) // Handle the special case in which a function does not terminate // with a return instruction or any other basic block terminator. @@ -1727,13 +2075,33 @@ void SMPFunction::Analyze(void) { FirstInBlock = this->Instrs.end(); LastInBlock = this->Instrs.end(); this->Blocks.push_back(CurrBlock); + this->BlockCount += 1; } } // end for (bool ChunkOK = ...) +#if KLUDGE_VFPRINTF_FAMILY + if (0 != strstr(this->GetFuncName(), "printf")) { + this->SharedChunks = true; + msg("Kludging function %s\n", this->GetFuncName()); + } +#endif + // Set up basic block links and map of instructions to blocks. if (!(this->HasSharedChunks())) { this->SetLinks(); +#if SMP_COMPUTE_LVA_SSA + this->RPONumberBlocks(); this->LiveVariableAnalysis(); + this->ComputeSSA(); + bool DumpFlag = (0 == strcmp("main", this->GetFuncName())); + DumpFlag |= (0 == strcmp("dohanoi", this->GetFuncName())); + DumpFlag |= (0 == strcmp(".init_proc", this->GetFuncName())); +#if 0 + DumpFlag = true; +#endif + if (DumpFlag) + this->Dump(); +#endif } #if SMP_DEBUG_CONTROLFLOW @@ -1745,6 +2113,19 @@ void SMPFunction::Analyze(void) { return; } // end of SMPFunction::Analyze() +// Compute SSA form data structures across the function. +void SMPFunction::ComputeSSA(void) { +#if 1 + this->ComputeIDoms(); + this->ComputeDomFrontiers(); + this->ComputeGlobalNames(); + this->ComputeBlocksDefinedIn(); + this->InsertPhiFunctions(); + this->SSARenumber(); +#endif + return; +} // end of SMPFunction::ComputeSSA() + // Link basic blocks to their predecessors and successors, and build the map // of instruction addresses to basic blocks. void SMPFunction::SetLinks(void) { @@ -1771,17 +2152,27 @@ void SMPFunction::SetLinks(void) { for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { list<SMPInstr>::iterator CurrInst = *(--(CurrBlock->GetLastInstr())); // Last instruction in block; set successors + bool CallFlag = (CALL == CurrInst->GetDataFlowType()); xrefblk_t CurrXrefs; for (bool ok = CurrXrefs.first_from(CurrInst->GetAddr(), XREF_ALL); ok; ok = CurrXrefs.next_from()) { if ((CurrXrefs.to != 0) && (CurrXrefs.iscode)) { // Found a code target, with its address in CurrXrefs.to + if (CallFlag && (CurrXrefs.to != (CurrInst->GetAddr() + CurrInst->GetCmd().size))) { + // A call instruction will have two targets: the fall through to the + // next instruction, and the called function. We want to link to the + // fall-through instruction, but not to the called function. + // Some blocks end with a call just because the fall-through instruction + // is a jump target from elsewhere. + continue; + } map<ea_t, list<SMPBasicBlock>::iterator>::iterator MapEntry; MapEntry = this->InstBlockMap.find(CurrXrefs.to); if (MapEntry == this->InstBlockMap.end()) { msg("WARNING: addr %x not found in map for %s\n", CurrXrefs.to, this->GetFuncName()); + msg(" Referenced from %s\n", CurrInst->GetDisasm()); } else { list<SMPBasicBlock>::iterator Target = MapEntry->second; @@ -1794,9 +2185,125 @@ void SMPFunction::SetLinks(void) { } // end for all xrefs } // end for all blocks + // If we have any blocks that are all no-ops and have no predecessors, remove those + // blocks. They are dead and make the CFG no longer a lattice. + CurrBlock = this->Blocks.begin(); + while (CurrBlock != this->Blocks.end()) { + if (CurrBlock->AllNops() && (CurrBlock->GetFirstPred() == CurrBlock->GetLastPred())) { + msg("Removing all nops block at %x\n", CurrBlock->GetFirstAddr()); + CurrBlock = this->Blocks.erase(CurrBlock); + this->BlockCount -= 1; + } + else + ++CurrBlock; + } + return; } // end of SMPFunction::SetLinks() +// Number all basic blocks in reverse postorder (RPO) and set RPOBlocks vector to +// access them. +void SMPFunction::RPONumberBlocks(void) { + bool DebugFlag = (0 == strcmp("vfprintf", this->GetFuncName())); + int CurrNum = 0; + list<list<SMPBasicBlock>::iterator> WorkList; + + // Number the first block with 0. + list<SMPBasicBlock>::iterator CurrBlock = this->Blocks.begin(); +#if 0 + if (this->RPOBlocks.capacity() <= (size_t) this->BlockCount) { + msg("Reserving %d RPOBlocks old value: %d\n", 2+this->BlockCount, this->RPOBlocks.capacity()); + this->RPOBlocks.reserve(2 + this->BlockCount); + this->RPOBlocks.assign(2 + this->BlockCount, this->Blocks.end()); + } +#endif + CurrBlock->SetNumber(CurrNum); + this->RPOBlocks.push_back(CurrBlock); + ++CurrNum; + // Push the first block's successors onto the work list. + list<list<SMPBasicBlock>::iterator>::iterator CurrSucc = CurrBlock->GetFirstSucc(); + while (CurrSucc != CurrBlock->GetLastSucc()) { + WorkList.push_back(*CurrSucc); + ++CurrSucc; + } + + // Use the WorkList to iterate through all blocks in the function + list<list<SMPBasicBlock>::iterator>::iterator CurrListItem = WorkList.begin(); + bool change; + while (!WorkList.empty()) { + change = false; + while (CurrListItem != WorkList.end()) { + if ((*CurrListItem)->GetNumber() != SMP_BLOCKNUM_UNINIT) { + // Duplicates get pushed onto the WorkList because a block + // can be the successor of multiple other blocks. If it is + // already numbered, it is a duplicate and can be removed + // from the list. + CurrListItem = WorkList.erase(CurrListItem); + change = true; + continue; + } + if ((*CurrListItem)->AllPredecessorsNumbered()) { + // Ready to be numbered. + (*CurrListItem)->SetNumber(CurrNum); +#if 0 + msg("Set RPO number %d\n", CurrNum); + if (DebugFlag && (7 == CurrNum)) + this->Dump(); +#endif + this->RPOBlocks.push_back(*CurrListItem); + ++CurrNum; + change = true; + // Push its unnumbered successors onto the work list. + CurrSucc = (*CurrListItem)->GetFirstSucc(); + while (CurrSucc != (*CurrListItem)->GetLastSucc()) { + if ((*CurrSucc)->GetNumber() == SMP_BLOCKNUM_UNINIT) + WorkList.push_back(*CurrSucc); + ++CurrSucc; + } + CurrListItem = WorkList.erase(CurrListItem); + } + else { + ++CurrListItem; + } + } // end while (CurrListItem != WorkList.end()) + if (change) { + // Reset CurrListItem to beginning of work list for next iteration. + CurrListItem = WorkList.begin(); + } + else { + // Loops can cause us to not be able to find a WorkList item that has + // all predecessors numbered. Take the WorkList item with the lowest address + // and number it so we can proceed. + CurrListItem = WorkList.begin(); + ea_t LowAddr = (*CurrListItem)->GetFirstAddr(); + list<list<SMPBasicBlock>::iterator>::iterator SaveItem = CurrListItem; + ++CurrListItem; + while (CurrListItem != WorkList.end()) { + if (LowAddr > (*CurrListItem)->GetFirstAddr()) { + SaveItem = CurrListItem; + LowAddr = (*CurrListItem)->GetFirstAddr(); + } + ++CurrListItem; + } + // SaveItem should now be numbered. + (*SaveItem)->SetNumber(CurrNum); + msg("Picked LowAddr %x and set RPO number %d\n", LowAddr, CurrNum); + this->RPOBlocks.push_back(*SaveItem); + ++CurrNum; + // Push its unnumbered successors onto the work list. + CurrSucc = (*SaveItem)->GetFirstSucc(); + while (CurrSucc != (*SaveItem)->GetLastSucc()) { + if ((*CurrSucc)->GetNumber() == SMP_BLOCKNUM_UNINIT) + WorkList.push_back(*CurrSucc); + ++CurrSucc; + } + CurrListItem = WorkList.erase(SaveItem); + CurrListItem = WorkList.begin(); + } // end if (change) ... else ... + } // end while work list is nonempty + return; +} // end of SMPFunction::RPONumberBlocks() + // Perform live variable analysis on all blocks in the function. // See chapter 9 of Cooper/Torczon, Engineering a Compiler, for the algorithm. void SMPFunction::LiveVariableAnalysis(void) { @@ -1810,28 +2317,283 @@ void SMPFunction::LiveVariableAnalysis(void) { bool changed; // Iterate over each block, updating LiveOut sets until no more changes are made. - // NOTE: Would be more efficient if we computed a reverse post-order list of blocks - // and traversed this loop in reverse post-order. **!!** + // NOTE: LVA is more efficient when computed over a reverse post-order list of blocks. +#if 1 do { changed = false; for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { changed |= CurrBlock->UpdateLiveOut(); } } while (changed); - +#else // Use reverse postorder + do { + changed = false; + for (size_t index = 0; index < this->RPOBlocks.size(); ++index) { + CurrBlock = this->RPOBlocks[index]; + changed |= CurrBlock->UpdateLiveOut(); + } + } while (changed); +#endif return; } // end of SMPFunction::LiveVariableAnalysis() +// Return the IDom index that is the end of the intersection prefix of the Dom sets of +// the two blocks designated by the RPO numbers passed in. +// See Cooper & Torczon, "Engineering a Compiler" 1st edition figure 9.8. +int SMPFunction::IntersectDoms(int block1, int block2) const { + int finger1 = block1; + int finger2 = block2; + while (finger1 != finger2) { + while (finger1 > finger2) + finger1 = this->IDom.at(finger1); + while (finger2 > finger1) + finger2 = this->IDom.at(finger2); + } + return finger1; +} // end of SMPFunction::IntersectDoms() + +// Compute immediate dominators of all blocks into IDom[] vector. +void SMPFunction::ComputeIDoms(void) { + bool DebugFlag = (0 == strcmp("vfprintf", this->GetFuncName())); + // Initialize the IDom[] vector to uninitialized values for all blocks. + this->IDom.reserve(this->BlockCount); + this->IDom.assign(this->BlockCount, SMP_BLOCKNUM_UNINIT); + if (DebugFlag) msg("BlockCount = %d\n", this->BlockCount); + this->IDom[0] = 0; // Start block dominated only by itself + bool changed; + do { + changed = false; + for (size_t RPONum = 1; RPONum < (size_t) this->BlockCount; ++RPONum) { + if (DebugFlag) msg("RPONum %d\n", RPONum); + if (DebugFlag) { + msg("RPOBlocks vector size: %d\n", this->RPOBlocks.size()); + for (size_t index = 0; index < this->RPOBlocks.size(); ++index) { + msg("RPOBlocks entry %d is %d\n", index, RPOBlocks[index]->GetNumber()); + } + } + list<SMPBasicBlock>::iterator CurrBlock = this->RPOBlocks.at(RPONum); + // if (DebugFlag) msg("CurrBlock: %x\n", CurrBlock._Ptr); + list<list<SMPBasicBlock>::iterator>::iterator CurrPred; + // Initialize NewIdom to the first processed predecessor of block RPONum. + int NewIdom = SMP_BLOCKNUM_UNINIT; + for (CurrPred = CurrBlock->GetFirstPred(); CurrPred != CurrBlock->GetLastPred(); ++CurrPred) { + if (DebugFlag) msg("Pred: %d\n", (*CurrPred)->GetNumber()); + int PredIDOM = this->IDom.at((*CurrPred)->GetNumber()); + if (DebugFlag) msg("Pred IDom: %d\n", PredIDOM); + if (SMP_BLOCKNUM_UNINIT != PredIDOM) { + NewIdom = (*CurrPred)->GetNumber(); + break; + } + } + if (NewIdom == SMP_BLOCKNUM_UNINIT) + msg("Failure on NewIdom in ComputeIDoms for %s\n", this->GetFuncName()); + assert(NewIdom != SMP_BLOCKNUM_UNINIT); + // Loop through all predecessors of block RPONum except block NewIdom. + // Set NewIdom to the intersection of its Dom set and the Doms set of + // each predecessor that has had its Doms set computed. + for (CurrPred = CurrBlock->GetFirstPred(); CurrPred != CurrBlock->GetLastPred(); ++CurrPred) { + int PredNum = (*CurrPred)->GetNumber(); + if (DebugFlag) msg("PredNum: %d\n", PredNum); + int PredIDOM = this->IDom.at(PredNum); + if (DebugFlag) msg("PredIDOM: %d\n", PredIDOM); + if ((SMP_BLOCKNUM_UNINIT == PredIDOM) || (NewIdom == PredIDOM)) { + // Skip predecessors that have uncomputed Dom sets, or are the + // current NewIdom. + continue; + } + if (DebugFlag) msg("Old NewIdom value: %d\n", NewIdom); + NewIdom = this->IntersectDoms(PredNum, NewIdom); + if (DebugFlag) msg("New NewIdom value: %d\n", NewIdom); + } + // If NewIdom is not the value currently in vector IDom[], update the + // vector entry and set changed to true. + if (NewIdom != this->IDom.at(RPONum)) { + if (DebugFlag) msg("IDOM changed from %d to %d\n", this->IDom.at(RPONum), NewIdom); + this->IDom[RPONum] = NewIdom; + changed = true; + } + } + } while (changed); + return; +} // end of SMPFunction::ComputeIDoms() + +// Compute dominance frontier sets for each block. +void SMPFunction::ComputeDomFrontiers(void) { + list<SMPBasicBlock>::iterator CurrBlock; + for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { + // We look only at join points in the CFG, as per Cooper/Torczon chapter 9. + if (1 < CurrBlock->GetNumPreds()) { // join point; more than 1 predecessor + int runner; + list<list<SMPBasicBlock>::iterator>::iterator CurrPred; + for (CurrPred = CurrBlock->GetFirstPred(); CurrPred != CurrBlock->GetLastPred(); ++CurrPred) { + // For each predecessor, we run up the IDom[] vector and add CurrBlock to the + // DomFrontier for all blocks that are between CurrPred and IDom[CurrBlock], + // not including IDom[CurrBlock] itself. + runner = (*CurrPred)->GetNumber(); + while (runner != this->IDom.at(CurrBlock->GetNumber())) { + // Cooper/Harvey/Kennedy paper does not quite agree with the later + // text by Cooper/Torczon. Text says that the start node has no IDom + // in the example on pages 462-463, but it shows an IDOM for the + // root node in Figure 9.9 of value == itself. The first edition text + // on p.463 seems correct, as the start node dominates every node and + // thus should have no dominance frontier. + if (SMP_TOP_BLOCK == runner) + break; + (*CurrPred)->AddToDomFrontier(CurrBlock->GetNumber()); + runner = this->IDom.at(runner); + } + } // end for all predecessors + } // end if join point + } // end for all blocks + return; +} // end of SMPFunction::ComputeDomFrontiers() + +// Compute the GlobalNames set, which includes all operands that are used in more than +// one basic block. It is the union of all UpExposedSets of all blocks. +void SMPFunction::ComputeGlobalNames(void) { + set<op_t, LessOp>::iterator SetIter; + list<SMPBasicBlock>::iterator CurrBlock; + unsigned int index = 0; + if (this->Blocks.size() < 2) + return; // cannot have global names if there is only one block + + for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { + for (SetIter = CurrBlock->GetFirstUpExposed(); SetIter != CurrBlock->GetLastUpExposed(); ++SetIter) { + op_t TempOp = *SetIter; + msg("Global Name: "); + PrintOneOperand(TempOp, 0, -1); + set<op_t, LessOp>::iterator AlreadyInSet = this->GlobalNames.find(TempOp); + if (AlreadyInSet != this->GlobalNames.end()) { + // Already in GlobalNames, so don't assign an index number or call insert. + msg(" already in GlobalNames.\n"); + continue; + } + + // The GlobalNames set will have the complete collection of operands that we are + // going to number in our SSA computations. We now assign an operand number + // within the op_t structure for each, so that we can index into the + // BlocksUsedIn[] vector, for example. This operand number is not to be + // confused with SSA numbers. + // We use the operand number field op_t.n for the lower 8 bits, and the offset + // fields op_t.offb:op_t.offo for the upper 16 bits. We are overwriting IDA + // values here, but operands in the data flow analysis sets should never be + // inserted back into the program anyway. + TempOp.n = (char) (index & 0x000000ff); + TempOp.offb = (char) ((index & 0x0000ff00) >> 8); + TempOp.offo = (char) ((index & 0x00ff0000) >> 16); + ++index; + this->GlobalNames.insert(TempOp); + msg(" inserted as index %d\n", ExtractGlobalIndex(TempOp)); + } + } + + assert(16777215 >= this->GlobalNames.size()); // index fits in 24 bits + return; +} // end of SMPFunction::ComputeGlobalNames() + +// For each item in GlobalNames, record the blocks that DEF the item. +void SMPFunction::ComputeBlocksDefinedIn(void) { + // Loop through all basic blocks and examine all DEFs. For Global DEFs, record + // the block number in BlocksDefinedIn. The VarKillSet records DEFs without + // having to examine every instruction. + list<SMPBasicBlock>::iterator CurrBlock; + this->BlocksDefinedIn.clear(); + for (size_t i = 0; i < this->GlobalNames.size(); ++i) { + list<int> TempList; + this->BlocksDefinedIn.push_back(TempList); + } + msg("Number of GlobalNames: %d\n", this->GlobalNames.size()); + for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { + set<op_t, LessOp>::iterator KillIter; + for (KillIter = CurrBlock->GetFirstVarKill(); KillIter != CurrBlock->GetLastVarKill(); ++KillIter) { + // If killed item is not a block-local item (it is global), record it. + set<op_t, LessOp>::iterator NameIter = this->GlobalNames.find(*KillIter); + if (NameIter != this->GlobalNames.end()) { // found in GlobalNames set + // We have a kill of a global name. Get index from three 8-bit fields. + unsigned int index = ExtractGlobalIndex(*NameIter); +#if 0 + msg("VarKill item offo: %d offb: %d n: %d index: %d\n", NameIter->offo, NameIter->offb, NameIter->n, index); +#endif + assert(index < this->GlobalNames.size()); + // index is a valid subscript for the BlocksDefinedIn vector. Push the + // current block number onto the list of blocks that define this global name. + this->BlocksDefinedIn[index].push_back(CurrBlock->GetNumber()); + } + } + } + return; +} // end of SMPFunction::ComputeBlocksDefinedIn() + +// Compute the phi functions at the entry point of each basic block that is a join point. +void SMPFunction::InsertPhiFunctions(void) { + set<op_t, LessOp>::iterator NameIter; + list<int> WorkList; // list of block numbers + for (NameIter = this->GlobalNames.begin(); NameIter != this->GlobalNames.end(); ++NameIter) { + int CurrNameIndex = (int) (ExtractGlobalIndex(*NameIter)); + // Initialize the work list to all blocks that define the current name. + WorkList.clear(); + list<int>::iterator WorkIter; + for (WorkIter = this->BlocksDefinedIn.at((size_t) CurrNameIndex).begin(); + WorkIter != this->BlocksDefinedIn.at((size_t) CurrNameIndex).end(); + ++WorkIter) { + WorkList.push_back(*WorkIter); + } + + // Iterate through the work list, inserting phi functions for the current name + // into all the blocks in the dominance frontier of each work list block. + // Insert into the work list each block that had a phi function added. + while (!WorkList.empty()) { + msg("WorkList size: %d\n", WorkList.size()); + list<int>::iterator WorkIter = WorkList.begin(); + while (WorkIter != WorkList.end()) { + set<int>::iterator DomFrontIter; + list<SMPBasicBlock>::iterator WorkBlock = this->RPOBlocks[*WorkIter]; + for (DomFrontIter = WorkBlock->GetFirstDomFrontier(); + DomFrontIter != WorkBlock->GetLastDomFrontier(); + ++DomFrontIter) { + list<SMPBasicBlock>::iterator PhiBlock = this->RPOBlocks[*DomFrontIter]; + // Before inserting a phi function for the current name in *PhiBlock, + // see if the current name is LiveIn for *PhiBlock. If not, there + // is no need for the phi function. This check is what makes the SSA + // a fully pruned SSA. + if (PhiBlock->IsLiveIn(*NameIter)) { + size_t NumPreds = PhiBlock->GetNumPreds(); + SMPPhiFunction CurrPhi(CurrNameIndex); + DefOrUse CurrRef(*NameIter); + for (size_t NumCopies = 0; NumCopies < NumPreds; ++NumCopies) { + CurrPhi.PushBack(CurrRef); + } + if (PhiBlock->AddPhi(CurrPhi)) { + // If not already in Phi set, new phi function was inserted. + WorkList.push_back(PhiBlock->GetNumber()); + msg("Added phi for name %d at top of block %d\n", CurrNameIndex, PhiBlock->GetNumber()); + } + } + } // end for all blocks in the dominance frontier + // Remove current block number from the work list + WorkIter = WorkList.erase(WorkIter); + } // end for all block numbers in the work list + } // end while the work list is not empty + } // end for all elements of the GlobalNames set + return; +} // end of SMPFunction::InsertPhiFunctions() + +void SMPFunction::SSARenumber(void) { + + // **!!** Get this into CVS and patch in the code later after final debugging + return; +} + // Emit all annotations for the function, including all per-instruction // annotations. void SMPFunction::EmitAnnotations(FILE *AnnotFile) { // Emit annotation for the function as a whole. if (this->StaticFunc) { - qfprintf(AnnotFile, "%x %d FUNC LOCAL %s ", FuncInfo->startEA, + qfprintf(AnnotFile, "%x %d FUNC LOCAL %s ", this->FuncInfo.startEA, this->Size, this->FuncName); } else { - qfprintf(AnnotFile, "%x %d FUNC GLOBAL %s ", FuncInfo->startEA, + qfprintf(AnnotFile, "%x %d FUNC GLOBAL %s ", this->FuncInfo.startEA, this->Size, this->FuncName); } if (this->UseFP) { @@ -1840,7 +2602,7 @@ void SMPFunction::EmitAnnotations(FILE *AnnotFile) { else { qfprintf(AnnotFile, "NOFP "); } - if (this->FuncInfo->does_return()) { + if (this->FuncInfo.does_return()) { qfprintf(AnnotFile, "\n"); } else { @@ -1880,10 +2642,42 @@ void SMPFunction::EmitAnnotations(FILE *AnnotFile) { } CurrInst->EmitAnnotations(this->UseFP, AllocSeen, AnnotFile); - } // end for (ea_t addr = FuncInfo->startEA; ...) + } // end for (ea_t addr = FuncInfo.startEA; ...) return; } // end of SMPFunction::EmitAnnotations() +// Debug output dump. +void SMPFunction::Dump(void) { + list<SMPBasicBlock>::iterator CurrBlock; + msg("Debug dump for function: %s\n", this->GetFuncName()); + for (size_t index = 0; index < this->IDom.size(); ++index) { + msg("IDOM for %d: %d\n", index, this->IDom.at(index)); + } + msg("Global names: \n"); + set<op_t, LessOp>::iterator NameIter; + for (NameIter = this->GlobalNames.begin(); NameIter != this->GlobalNames.end(); ++NameIter) { + msg("index: %d ", ExtractGlobalIndex(*NameIter)); + PrintOneOperand(*NameIter, 0, -1); + msg("\n"); + } + msg("Blocks each name is defined in: \n"); + for (size_t index = 0; index < this->BlocksDefinedIn.size(); ++index) { + msg("Name index: %d Blocks: ", index); + list<int>::iterator BlockIter; + for (BlockIter = this->BlocksDefinedIn.at(index).begin(); + BlockIter != this->BlocksDefinedIn.at(index).end(); + ++BlockIter) { + msg("%d ", *BlockIter); + } + msg("\n"); + } + for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { + // Dump out the function number and data flow sets before the instructions. + CurrBlock->Dump(); + } + return; +} // end of SMPFunction::Dump() + // Initialize the DFACategory[] array to define instruction classes // for the purposes of data flow analysis. void InitDFACategory(void) { diff --git a/SMPDataFlowAnalysis.h b/SMPDataFlowAnalysis.h index b330be35188c06b354220a4b24a7612de0de17df..0f718269e9de2f13b21e43c5e3ada8c80f75fca5 100644 --- a/SMPDataFlowAnalysis.h +++ b/SMPDataFlowAnalysis.h @@ -25,6 +25,12 @@ class SMPFunction; class SMPBasicBlock; class SMPInstr; class DefOrUseList; +class SMPPhiFunction; + +// Value for an SSA subscript number before it is initialized by SSA renaming. +#define SMP_SSA_UNINIT (-1) + +bool MDLessReg(const ushort Reg1, const ushort Reg2); // MACHINE DEPENDENT: comparison class to permit sorting of op_t operands. class LessOp { @@ -34,7 +40,7 @@ public: return (Opnd1.type < Opnd2.type); switch (Opnd1.type) { case o_void: return false; - case o_reg: return (Opnd1.reg < Opnd2.reg); // **!!** al < eax? etc. + case o_reg: return MDLessReg(Opnd1.reg, Opnd2.reg); case o_mem: return (Opnd1.addr < Opnd2.addr); case o_phrase: if (Opnd1.hasSIB && Opnd2.hasSIB) return (Opnd1.sib < Opnd2.sib); else if (Opnd2.hasSIB) return true; @@ -51,7 +57,7 @@ public: case o_near: return (Opnd1.addr < Opnd2.addr); default: msg("Unknown operand type.\n"); return false; }; // end switch (Opnd1.type) - } + } // end operator }; // end class LessOp // SMP will operate on a doubly linked list of instructions, which @@ -92,69 +98,124 @@ enum SMPOperandType { // What type is a given register or memory operand? UNKNOWN // Might hold an address, might not (Bad!) }; +class DefOrUse { +public: + // Constructors + DefOrUse(op_t Ref, SMPOperandType Type = UNINIT, int SSASub = SMP_SSA_UNINIT); + // Get methods + inline op_t GetOp(void) const { return Operand; }; + inline SMPOperandType GetType(void) const { return OpType; }; + inline int GetSSANum(void) const { return SSANumber; }; + // Set methods + inline void SetSSANum(int Num) { SSANumber = Num; }; + inline void SetType(SMPOperandType Type) { OpType = Type; }; +private: + // Data + op_t Operand; + SMPOperandType OpType; + int SSANumber; +}; // end of class DefOrUse + // Same class is used for both a DEF list and a USE list. class DefOrUseList { public: + // Constructors DefOrUseList(void); - void SetRef(op_t Ref, SMPOperandType Type = UNINIT); - op_t GetRef(size_t index) const; - SMPOperandType GetRefType(size_t index) const; + // Get methods + DefOrUse GetRef(size_t index) const; inline size_t GetSize(void) const { return (size_t) Refs.size(); }; + inline vector<DefOrUse>::iterator GetFirstRef(void) { return Refs.begin(); }; + inline vector<DefOrUse>::iterator GetLastRef(void) { return Refs.end(); }; + // Set methods + void SetRef(op_t Ref, SMPOperandType Type = UNINIT, int SSASub = SMP_SSA_UNINIT); private: - vector<op_t> Refs; // Defined or used operand - vector<SMPOperandType> Types; // indices coupled to Refs + // Data + vector<DefOrUse> Refs; // Defined or used operand with type and SSA subscript }; // end class DefOrUseList +class SMPPhiFunction { +public: + // Constructors + SMPPhiFunction(int GlobIndex); + // Get methods + inline int GetIndex(void) const { return index; }; + inline size_t GetPhiListSize(void) const { return this->SubscriptedOps.GetSize(); }; + inline DefOrUse GetPhiRef(size_t i) const { return this->SubscriptedOps.GetRef(i); }; + inline vector<DefOrUse>::iterator GetFirstOp(void) { return SubscriptedOps.GetFirstRef(); }; + inline vector<DefOrUse>::iterator GetLastOp(void) { return SubscriptedOps.GetLastRef(); }; + // Set methods + void PushBack(DefOrUse Ref); +private: + int index; // index into SMPFunction::BlocksDefinedIn + DefOrUseList SubscriptedOps; +}; // end class SMPPhiFunction + +// Comparison function to permit sorting phi functions and keeping them in sets. +class LessPhi { +public: + bool operator() (const SMPPhiFunction &Phi1, const SMPPhiFunction &Phi2) const { + return (Phi1.GetIndex() < Phi2.GetIndex()); + } +}; // end of class LessPhi + class SMPInstr { public: - // Default constructor + // Constructors SMPInstr(ea_t addr); - // Fill in basic data for instruction. - void Analyze(void); - inline ea_t GetAddr(void) const { return address; }; - inline char *GetDisasm(void) { return disasm; }; + // Operators int operator==(const SMPInstr &rhs) const; int operator<(const SMPInstr &rhs) const; int operator<=(const SMPInstr &rhs) const; int operator!=(const SMPInstr &rhs) const; - inline op_t GetUse(size_t index) const { return Uses.GetRef(index); }; - inline op_t GetDef(size_t index) const { return Defs.GetRef(index); }; + // Get methods + inline ea_t GetAddr(void) const { return address; }; + inline char *GetDisasm(void) { return disasm; }; + inline DefOrUse GetUse(size_t index) const { return Uses.GetRef(index); }; + inline DefOrUse GetDef(size_t index) const { return Defs.GetRef(index); }; inline size_t NumUses(void) const { return Uses.GetSize(); }; inline size_t NumDefs(void) const { return Defs.GetSize(); }; inline insn_t GetCmd(void) const { return SMPcmd; }; + inline int GetOptType(void) const { return OptType; }; + inline SMPitype GetDataFlowType(void) const { return type; }; + // Set methods inline void SetCmd(insn_t cmd) { SMPcmd = cmd; return; }; - // Does instruction write to memory? - bool HasDestMemoryOperand(void) const; - // Does instruction read from memory? - bool HasSourceMemoryOperand(void) const; + // Query methods + bool HasDestMemoryOperand(void) const; // Does instruction write to memory? + bool HasSourceMemoryOperand(void) const; // Does instruction read from memory? bool IsSecondSrcOperandNumeric(flags_t F) const; bool IsBasicBlockTerminator(void) const; inline bool IsJumpTarget(void) const { return JumpTarget; }; bool IsBranchToFarChunk(void) const; // instr jumps outside current chunk + bool MDIsNop(void) const; // instruction is simple or complex no-op bool MDIsPushInstr(void) const; bool MDIsPopInstr(void) const; bool MDIsReturnInstr(void) const; + bool MDIsEnterInstr(void) const; + bool MDIsLeaveInstr(void) const; bool MDIsFrameAllocInstr(void) const; bool MDIsFrameDeallocInstr(bool UseFP, asize_t LocSize) const; bool MDUsesCalleeSavedReg(void) const; - inline int GetOptType(void) const { return OptType; }; - inline SMPitype GetDataFlowType(void) const { return type; }; + // Printing methods void PrintOperands(void) const; char *DestString(int OptType); + // Analysis methods + void Analyze(void); // Fill in basic data for instruction. void AnnotateStackConstants(bool UseFP, FILE *AnnotFile); void EmitAnnotations(bool UseFP, bool AllocSeen, FILE *AnnotFile); private: + // Data + insn_t SMPcmd; // copy of 'cmd' for this instruction + ulong features; // Canonical features for SMPcmd + char disasm[MAXSTR]; // Disassembly text of instruction + DefOrUseList Defs; // Definitions list + DefOrUseList Uses; // Uses list SMPitype type; // Data flow analysis category int OptType; // Optimization category (see OptCategory[]) ea_t address; // Code address for 1st byte of instruction bool analyzed; // Has instr been analyzed yet, setting type // and DEF and USE lists? bool JumpTarget; // Is Instr the target of any jumps or branches? - insn_t SMPcmd; // copy of 'cmd' for this instruction - ulong features; // Canonical features for SMPcmd - char disasm[MAXSTR]; // Disassembly text of instruction - DefOrUseList Defs; // Definitions list - DefOrUseList Uses; // Uses list + // Methods void BuildSMPDefUseLists(void); // Build DEF and USE lists for instruction void MDFixupDefUseLists(void); // Machine-dependent ad hoc fixes void MDAddRegDef(ushort); // Add DEF of register if not already a DEF @@ -165,33 +226,83 @@ private: // Class defining basic blocks. class SMPBasicBlock { public: + // Constructors SMPBasicBlock(list<SMPInstr>::iterator FirstInstr, list<SMPInstr>::iterator LastInstr); + // Operators + int operator==(const SMPBasicBlock &rhs) const; + // Get methods + ea_t GetFirstAddr(void) const; + inline int GetNumber(void) const { return BlockNum; } + inline list<list<SMPInstr>::iterator>::iterator GetFirstInstr(void) { return Instrs.begin(); }; + inline list<list<SMPInstr>::iterator>::iterator GetLastInstr(void) { return Instrs.end(); }; + inline list<list<SMPBasicBlock>::iterator>::iterator GetFirstPred(void) { + return Predecessors.begin(); + }; + inline list<list<SMPBasicBlock>::iterator>::iterator GetLastPred(void) { + return Predecessors.end(); + }; + inline list<list<SMPBasicBlock>::iterator>::iterator GetFirstSucc(void) { + return Successors.begin(); + }; + inline list<list<SMPBasicBlock>::iterator>::iterator GetLastSucc(void) { + return Successors.end(); + }; + inline size_t GetNumPreds(void) const { return Predecessors.size(); }; + set<op_t, LessOp>::iterator GetFirstLiveIn(void); // LiveInSet.begin() + set<op_t, LessOp>::iterator GetLastLiveIn(void); // LiveInSet.end() + set<op_t, LessOp>::iterator GetFirstLiveOut(void); // LiveOutSet.begin() + set<op_t, LessOp>::iterator GetLastLiveOut(void); // LiveOutSet.end() + set<op_t, LessOp>::iterator GetFirstVarKill(void); // KillSet.begin() + set<op_t, LessOp>::iterator GetLastVarKill(void); // KillSet.end() + set<op_t, LessOp>::iterator GetFirstUpExposed(void); // UpExposedSet.begin() + set<op_t, LessOp>::iterator GetLastUpExposed(void); // UpExposedSet.end() + set<int>::iterator GetFirstDomFrontier(void); // DomFrontier.begin() + set<int>::iterator GetLastDomFrontier(void); // DomFrontier.end() + set<SMPPhiFunction, LessPhi>::iterator GetFirstPhi(void); // PhiFunctions.begin() + set<SMPPhiFunction, LessPhi>::iterator GetLastPhi(void); // PhiFunctions.end() + // Set methods + inline void SetShared(void) { SharedTailChunk = true; }; + inline void SetNumber(int Num) { BlockNum = Num; }; void LinkToPred(list<SMPBasicBlock>::iterator Predecessor); void LinkToSucc(list<SMPBasicBlock>::iterator Successor); + bool AddPhi(SMPPhiFunction NewPhi); + // Query methods inline bool HasIndirectJump(void) const { return IndirectJump; }; inline bool HasReturn(void) const { return Returns; }; inline bool IsShared(void) const { return SharedTailChunk; }; - inline void SetShared(void) { SharedTailChunk = true; }; - inline list<list<SMPInstr>::iterator>::iterator GetFirstInstr(void) { return Instrs.begin(); }; - inline list<list<SMPInstr>::iterator>::iterator GetLastInstr(void) { return Instrs.end(); }; + bool AllNops(void); // Are all instructions in the block no-ops? + inline bool IsLiveIn(op_t CurrOp) const { + return (LiveInSet.end() != LiveInSet.find(CurrOp)); + } + // Printing methods + void Dump(void); + // Analysis methods + bool AllPredecessorsNumbered(void); void Analyze(); void InitKilledExposed(void); // Initialize KilledSet and UpExposedSet bool UpdateLiveOut(void); // Iterate once on updating LiveOutSet; return true if changed - set<op_t, LessOp>::iterator GetFirstLiveIn(void); // First LiveIn for use by predecessor - set<op_t, LessOp>::iterator GetLastLiveIn(void); // Last LiveIn for use by predecessor + void AddToDomFrontier(int); // Add RPO block number to DomFrontier set. private: + // Data + ea_t FirstAddr; + int BlockNum; // Number for block ordering algorithms list<list<SMPInstr>::iterator> Instrs; list<list<SMPBasicBlock>::iterator> Predecessors; list<list<SMPBasicBlock>::iterator> Successors; - // Three sets used in live variable analysis + // Four sets used in live variable analysis set<op_t, LessOp> KillSet; // variables killed in this block set<op_t, LessOp> UpExposedSet; // upward exposed variables in this block set<op_t, LessOp> LiveOutSet; // Live-Out variables in this block set<op_t, LessOp> LiveInSet; // contribution to predecessor's live-out iteration + // SSA data structures + set<int> DomFrontier; // Dominance frontier for block, as set of RPO block numbers + set<SMPPhiFunction, LessPhi> PhiFunctions; // SSA incoming edge phi functions + // cached query results bool IndirectJump; // contains an indirect jump instruction bool Returns; // contains a return instruction bool SharedTailChunk; // is part of a code chunk shared among functions bool IsLiveInStale; // Has LiveOutSet changed since LiveInSet was computed? + // Methods bool MDAlreadyKilled(op_t) const; // Was op_t killed by something already in KillSet? }; @@ -199,24 +310,39 @@ private: // about a function. class SMPFunction { public: + // Constructors SMPFunction(func_t *Info); // Default constructor - void Analyze(void); // Analyze all instructions in function - void EmitAnnotations(FILE *AnnotFile); - inline bool HasIndirectCalls(void) const { return IndirectCalls; }; + // Get methods inline const char *GetFuncName(void) const { return FuncName; }; + // Set methods + // Query methods + inline bool HasIndirectCalls(void) const { return IndirectCalls; }; inline bool HasSharedChunks(void) const { return SharedChunks; }; + // Printing methods + void Dump(void); // debug dump + // Analysis methods + void Analyze(void); // Analyze all instructions in function + void EmitAnnotations(FILE *AnnotFile); + void RPONumberBlocks(void); void SetLinks(void); // Link basic blocks and map instructions to blocks void LiveVariableAnalysis(void); // Perform Live Variable Analysis across all blocks + void ComputeSSA(void); // Compute SSA form data structures private: - func_t *FuncInfo; + // Data + func_t FuncInfo; + char FuncName[MAXSTR]; list<SMPInstr> Instrs; list<SMPBasicBlock> Blocks; map<ea_t, list<SMPBasicBlock>::iterator> InstBlockMap; + vector<list<SMPBasicBlock>::iterator> RPOBlocks; + vector<int> IDom; // Immediate dominators, indexed and valued by block RPO numbers + set<op_t, LessOp> GlobalNames; // operands used in more than one block; needed in SSA + vector<list<int> > BlocksDefinedIn; // What blocks DEF each GlobalName; index = op # in GlobalNames + int BlockCount; // number of basic blocks in the function bool UseFP; // Does function use a frame pointer? bool StaticFunc; // Is function declared static? bool IndirectCalls; // Does function make indirect calls? bool SharedChunks; // Does function share a tail chunk with other functions? - char FuncName[MAXSTR]; size_t Size; // Function size in code bytes asize_t LocalVarsSize; // size of local vars region of stack frame ushort CalleeSavedRegsSize; // stack size of callee pushed regs @@ -224,17 +350,22 @@ private: asize_t IncomingArgsSize; // size of incoming args on stack ea_t LocalVarsAllocInstr; // address of instr that allocates stack frame ea_t LocalVarsDeallocInstr; // address of epilogue instr that deallocs frame + // Methods void SetStackFrameInfo(void); ea_t FindAllocPoint(asize_t); // Deal with difficult to find stack frame allocations bool MDFixFrameInfo(void); // Redefine stack regions for our needs bool MDFixUseFP(void); // Fix IDA errors affecting UseFP void EmitStackFrameAnnotations(FILE *AnnotFile, list<SMPInstr>::iterator Instr); + void ComputeIDoms(void); // Compute immediate dominators of all blocks into IDom[] + int IntersectDoms(int, int) const; // Find Dom intersection (as IDom[] index) for 2 blocks + void ComputeDomFrontiers(void); // Compute dominance frontiers for all blocks + void ComputeGlobalNames(void); // Compute the GlobalNames set + void ComputeBlocksDefinedIn(void); // Compute the BlocksDefinedIn vector + void InsertPhiFunctions(void); // Insert SSA phi functions at top of each basic block + void SSARenumber(void); // Renumber SSA subscripts for all names }; // end class SMPFunction // Initialization routine for DFA category. void InitDFACategory(void); -bool IsDestMemoryOperand(insn_t); -bool IsSrcMemoryOperand(insn_t); - #endif