// // SMPFunction.cpp // // This module performs the fundamental data flow analyses needed for the // SMP project (Software Memory Protection) at the function level. // #include <utility> #include <list> #include <set> #include <vector> #include <algorithm> #include <cstring> #include <cstdlib> #include <pro.h> #include <assert.h> #include <ida.hpp> #include <idp.hpp> #include <auto.hpp> #include <bytes.hpp> #include <funcs.hpp> #include <allins.hpp> #include <intel.hpp> #include <name.hpp> #include "SMPDataFlowAnalysis.h" #include "SMPStaticAnalyzer.h" #include "SMPFunction.h" #include "SMPBasicBlock.h" #include "SMPInstr.h" // Set to 1 for debugging output #define SMP_DEBUG 1 #define SMP_DEBUG2 0 // verbose #define SMP_DEBUG3 0 // verbose #define SMP_DEBUG_CONTROLFLOW 0 // tells what processing stage is entered #define SMP_DEBUG_XOR 0 #define SMP_DEBUG_CHUNKS 1 // tracking down tail chunks for functions #define SMP_DEBUG_FRAMEFIXUP 0 #define SMP_DEBUG_DATAFLOW 0 #define SMP_DEBUG_STACK_GRANULARITY 0 #define SMP_DEBUG_BUILD_RTL 1 // Compute LVA/SSA or not? Turn it off for NICECAP demo on 31-JAN-2008 #define SMP_COMPUTE_LVA_SSA 1 // Compute fine-grained stack boundaries? #define SMP_COMPUTE_STACK_GRANULARITY 1 // Basic block number 0 is the top of the CFG lattice. #define SMP_TOP_BLOCK 0 // Set SharedTailChunks to TRUE for entire printf family // After we restructure the parent/tail structure of the database, this // will go away. #define KLUDGE_VFPRINTF_FAMILY 1 // Used for binary search by function number in SMPStaticAnalyzer.cpp // to trigger debugging output and find which instruction in which // function is causing a crash. bool SMPBinaryDebug = false; // ***************************************************************** // Class SMPFunction // ***************************************************************** // Constructor SMPFunction::SMPFunction(func_t *Info) { this->FuncInfo = *Info; this->IndirectCalls = false; this->SharedChunks = false; this->CallsAlloca = false; this->OutgoingArgsSize = 0; this->LocalVarTable.clear(); this->StackFrameMap.clear(); this->DirectCallTargets.clear(); return; } // Figure out the different regions of the stack frame, and find the // instructions that allocate and deallocate the local variables space // on the stack frame. // The stack frame info will be used to emit stack // annotations when Analyze() reaches the stack allocation // instruction that sets aside space for local vars. // Set the address of the instruction at which these // annotations should be emitted. This should normally // be an instruction such as: sub esp,48 // However, for a function with no local variables at all, // we will need to determine which instruction should be // considered to be the final instruction of the function // prologue and return its address. // Likewise, we find the stack deallocating instruction in // the function epilogue. void SMPFunction::SetStackFrameInfo(void) { bool FoundAllocInstr = false; bool FoundDeallocInstr = false; // The sizes of the three regions of the stack frame other than the // return address are stored in the function structure. this->LocalVarsSize = this->FuncInfo.frsize; this->CalleeSavedRegsSize = this->FuncInfo.frregs; this->IncomingArgsSize = this->FuncInfo.argsize; // The return address size can be obtained in a machine independent // way by calling get_frame_retsize(). this->RetAddrSize = get_frame_retsize(&(this->FuncInfo)); // IDA Pro has trouble with functions that do not have any local // variables. Unfortunately, the C library has plenty of these // functions. IDA usually claims that frregs is zero and frsize // is N, when the values should have been reversed. We can attempt // to detect this and fix it. bool FrameInfoFixed = this->MDFixFrameInfo(); #if SMP_DEBUG_FRAMEFIXUP if (FrameInfoFixed) { msg("Fixed stack frame size info: %s\n", this->FuncName); SMPBasicBlock CurrBlock = this->Blocks.front(); msg("First basic block:\n"); for (list<list<SMPInstr>::iterator>::iterator CurrInstr = CurrBlock.GetFirstInstr(); CurrInstr != CurrBlock.GetLastInstr(); ++CurrInstr) { msg("%s\n", (*CurrInstr)->GetDisasm()); } } #endif // Now, if LocalVarsSize is not zero, we need to find the instruction // in the function prologue that allocates space on the stack for // local vars. This code could be made more robust in the future // by matching LocalVarsSize to the immediate value in the allocation // instruction. However, IDA Pro is sometimes a little off on this // number. **!!** if (0 < this->LocalVarsSize) { for (list<SMPInstr>::iterator CurrInstr = this->Instrs.begin(); CurrInstr != this->Instrs.end(); ++CurrInstr) { ea_t addr = CurrInstr->GetAddr(); // Keep the most recent instruction in the DeallocInstr // in case we reach the return without seeing a dealloc. if (!FoundDeallocInstr) { this->LocalVarsDeallocInstr = addr; } if (!FoundAllocInstr && CurrInstr->MDIsFrameAllocInstr()) { this->LocalVarsAllocInstr = addr; FoundAllocInstr = true; // As soon as we have found the local vars allocation, // we can try to fix incorrect sets of UseFP by IDA. // NOTE: We might want to extend this in the future to // handle functions that have no locals. **!!** bool FixedUseFP = MDFixUseFP(); #if SMP_DEBUG_FRAMEFIXUP if (FixedUseFP) { msg("Fixed UseFP in %s\n", this->FuncName); } #endif } else if (FoundAllocInstr) { // We can now start searching for the DeallocInstr. if (CurrInstr->MDIsFrameDeallocInstr(UseFP, this->LocalVarsSize)) { // Keep saving the most recent addr that looks // like the DeallocInstr until we reach the // end of the function. Last one to look like // it is used as the DeallocInstr. this->LocalVarsDeallocInstr = addr; FoundDeallocInstr = true; } } } // end for (list<SMPInstr>::iterator CurrInstr ... ) if (!FoundAllocInstr) { // Could not find the frame allocating instruction. Bad. // See if we can find the point at which the stack allocation reaches // a total of FuncInfo.frsize, regardless of whether it happened by push // instructions or some other means. this->LocalVarsAllocInstr = this->FindAllocPoint(this->FuncInfo.frsize + this->FuncInfo.frregs); #if SMP_DEBUG_FRAMEFIXUP if (BADADDR == this->LocalVarsAllocInstr) { msg("ERROR: Could not find stack frame allocation in %s\n", FuncName); msg("LocalVarsSize: %d SavedRegsSize: %d ArgsSize: %d\n", LocalVarsSize, CalleeSavedRegsSize, IncomingArgsSize); } else { msg("FindAllocPoint found %x for function %s\n", this->LocalVarsAllocInstr, this->GetFuncName()); } #endif } #if SMP_DEBUG_FIX_FRAMEINFO if (!FoundDeallocInstr) { // Could not find the frame deallocating instruction. Bad. // Emit diagnostic and use the last instruction in the // function. msg("ERROR: Could not find stack frame deallocation in %s\n", FuncName); } #endif } // else LocalVarsSize was zero, meaning that we need to search // for the end of the function prologue code and emit stack frame // annotations from that address (i.e. this method returns that // address). We will approximate this by finding the end of the // sequence of PUSH instructions at the beginning of the function. // The last PUSH instruction should be the last callee-save-reg // instruction. We can make this more robust in the future by // making sure that we do not count a PUSH of anything other than // a register. **!!** // NOTE: 2nd prologue instr is usually mov ebp,esp // THE ASSUMPTION THAT WE HAVE ONLY PUSH INSTRUCTIONS BEFORE // THE ALLOCATING INSTR IS ONLY TRUE WHEN LOCALVARSSIZE == 0; else { ea_t SaveAddr = this->FuncInfo.startEA; for (list<SMPInstr>::iterator CurrInstr = this->Instrs.begin(); CurrInstr != this->Instrs.end(); ++CurrInstr) { insn_t CurrCmd = CurrInstr->GetCmd(); ea_t addr = CurrInstr->GetAddr(); if (CurrCmd.itype == NN_push) SaveAddr = addr; else break; } this->LocalVarsAllocInstr = SaveAddr; this->LocalVarsDeallocInstr = 0; } // end if (LocalVarsSize > 0) ... else ... #if 0 // Now we need to do the corresponding operations from the // end of the function to find the DeallocInstr in the // function epilogue. Because there is no addition to the // stack pointer to deallocate the local vars region, the // function epilogue will consist of (optional) pops of // callee-saved regs, followed by the return instruction. // Working backwards, we should find a return and then // stop when we do not find any more pops. if (0 >= LocalVarsSize) { this->LocalVarsDeallocInstr = NULL; } else { SaveAddr = this->FuncInfo.endEA - 1; bool FoundRet = false; do { ea_t addr = get_item_head(SaveAddr); flags_t InstrFlags = getFlags(addr); if (isCode(addr) && isHead(addr)) { ua_ana0(addr); if (!FoundRet) { // Just starting out. if (MDIsReturnInstr(cmd)) { FoundRet = true; SaveAddr = addr - 1; } else { msg("ERROR: Last instruction not a return.\n"); } } else { // Should be 0 or more POPs before the return. if (MDIsPopInstr(cmd)) { SaveAddr = addr - 1; } else if (FrameAllocInstr(cmd, this->LocalVarsSize)) { this->LocalVarsDeallocInstr = addr; } else { msg("ERROR: Frame deallocation not prior to POPs.\n"); this->LocalVarsDeallocInstr = SaveAddr + 1; } } // end if (!FoundRet) ... else ... } else { --SaveAddr; } // end if (isCode(addr) && isHead(addr)) } while (NULL == this->LocalVarsDeallocInstr); } // end if (0 >= this->LocalVarsSize) #endif // 0 this->CallsAlloca = this->FindAlloca(); #if SMP_COMPUTE_STACK_GRANULARITY // Now, find the boundaries between local variables. this->BuildLocalVarTable(); #endif return; } // end of SMPFunction::SetStackFrameInfo() // IDA Pro defines the sizes of regions in the stack frame in a way // that suits its purposes but not ours. The frsize field of the func_info_t // structure measures the distance between the stack pointer and the // frame pointer (ESP and EBP in the x86). This region includes some // of the callee-saved registers. So, the frregs field only includes // the callee-saved registers that are above the frame pointer. // x86 standard prologue on gcc/linux: // push ebp ; save old frame pointer // mov ebp,esp ; new frame pointer = current stack pointer // push esi ; callee save reg // push edi ; callee save reg // sub esp,34h ; allocate 52 bytes for local variables // // Notice that EBP acquires its final frame pointer value AFTER the // old EBP has been pushed. This means that, of the three callee saved // registers, one is above where EBP points and two are below. // IDA Pro is concerned with generating readable addressing expressions // for items on the stack. None of the callee-saved regs will ever // be addressed in the function; they will be dormant until they are popped // off the stack in the function epilogue. In order to create readable // disassembled code, IDA defines named constant offsets for locals. These // offsets are negative values (x86 stack grows downward from EBP toward // ESP). When ESP_relative addressing occurs, IDA converts a statement: // mov eax,[esp+12] // into the statement: // mov eax,[esp+3Ch+var_30] // Here, 3Ch == 60 decimal is the distance between ESP and EBP, and // var_30 is defined to ahve the value -30h == -48 decimal. So, the // "frame size" in IDA Pro is 60 bytes, and a certain local can be // addressed in ESP-relative manner as shown, or as [ebp+var_30] for // EBP-relative addressing. The interactive IDA user can then edit // the name var_30 to something mnemonic, such as "virus_size", and IDA // will replace all occurrences with the new name, so that code references // automatically become [ebp+virus_size]. As the user proceeds // interactively, he eventually produces very understandable code. // This all makes sense for producing readable assembly text. However, // our analyses have a compiler perspective as well as a memory access // defense perspective. SMP distinguishes between callee saved regs, // which should not be overwritten in the function body, and local // variables, which can be written. We view the stack frame in logical // pieces: here are the saved regs, here are the locals, here is the // return address, etc. We don't care which direction from EBP the // callee-saved registers lie; we don't want to lump them in with the // local variables. We also don't like the fact that IDA Pro will take // the function prologue code shown above and declare frregs=4 and // frsize=60, because frsize no longer matches the stack allocation // statement sub esp,34h == sub esp,52. We prefer frsize=52 and frregs=12. // So, the task of this function is to fix these stack sizes in our // private data members for the function, while leaving the IDA database // alone because IDA needs to maintain its own definitions of these // variables. // Fixing means we will update the data members LocalVarsSize and // CalleeSavedRegsSize. // NOTE: This function is both machine dependent and platform dependent. // The prologue and epilogue code generated by gcc-linux is as discussed // above, while on Visual Studio and other Windows x86 compilers, the // saving of registers other than EBP happens AFTER local stack allocation. // A Windows version of the function would expect to see the pushing // of ESI and EDI AFTER the sub esp,34h statement. bool SMPFunction::MDFixFrameInfo(void) { int SavedRegsSize = 0; int OtherPushesSize = 0; // besides callee-saved regs int NewLocalsSize = 0; int OldFrameTotal = this->CalleeSavedRegsSize + this->LocalVarsSize; bool Changed = false; // Iterate through the first basic block in the function. If we find // a frame allocating Instr in it, then we have local vars. If not, // we don't, and LocalVarsSize should have been zero. Count the callee // register saves leading up to the local allocation. Set data members // according to what we found if the values of the data members would // change. SMPBasicBlock CurrBlock = this->Blocks.front(); for (list<list<SMPInstr>::iterator>::iterator CurrIter = CurrBlock.GetFirstInstr(); CurrIter != CurrBlock.GetLastInstr(); ++CurrIter) { list<SMPInstr>::iterator CurrInstr = *CurrIter; if (CurrInstr->MDIsPushInstr()) { // We will make the gcc-linux assumption that a PUSH in // the first basic block, prior to the stack allocating // instruction, is a callee register save. To make this // more robust, we ensure that the register is from // the callee saved group of registers, and that it has // not been defined thus far in the function (else it might // be a push of an outgoing argument to a call that happens // in the first block when there are no locals). **!!!!** if (CurrInstr->MDUsesCalleeSavedReg() && !CurrInstr->HasSourceMemoryOperand()) { SavedRegsSize += 4; // **!!** should check the size } else { // Pushes of outgoing args can be scheduled so that // they are mixed with the pushes of callee saved regs. OtherPushesSize += 4; } } else if (CurrInstr->MDIsFrameAllocInstr()) { SavedRegsSize += OtherPushesSize; // Get the size being allocated. for (size_t index = 0; index < CurrInstr->NumUses(); ++index) { // Find the immediate operand. if (o_imm == CurrInstr->GetUse(index).GetOp().type) { // Get its value into LocalVarsSize. long AllocValue = (signed long) CurrInstr->GetUse(index).GetOp().value; // One compiler might have sub esp,24 and another // might have add esp,-24. Take the absolute value. if (0 > AllocValue) AllocValue = -AllocValue; if (AllocValue != (long) this->LocalVarsSize) { Changed = true; #if SMP_DEBUG_FRAMEFIXUP if (AllocValue + SavedRegsSize != OldFrameTotal) msg("Total frame size changed: %s\n", this->FuncName); #endif this->LocalVarsSize = (asize_t) AllocValue; this->CalleeSavedRegsSize = (ushort) SavedRegsSize; NewLocalsSize = this->LocalVarsSize; } else { // Old value was correct; no change. NewLocalsSize = this->LocalVarsSize; if (SavedRegsSize != this->CalleeSavedRegsSize) { this->CalleeSavedRegsSize = (ushort) SavedRegsSize; Changed = true; #if SMP_DEBUG_FRAMEFIXUP msg("Only callee regs size changed: %s\n", this->FuncName); #endif } } } // end if (o_imm == ...) } // end for all uses break; // After frame allocation instr, we are done } // end if (push) .. elsif frame allocating instr } // end for all instructions in the first basic block // If we did not find an allocating instruction, see if it would keep // the total size the same to set LocalVarsSize to 0 and to set // CalleeSavedRegsSize to SavedRegsSize. If so, do it. If not, we // might be better off to leave the numbers alone. if (!Changed && (NewLocalsSize == 0)) { if (OldFrameTotal == SavedRegsSize) { this->CalleeSavedRegsSize = SavedRegsSize; this->LocalVarsSize = 0; Changed = true; } #if SMP_DEBUG_FRAMEFIXUP else { msg("Could not update frame sizes: %s\n", this->FuncName); } #endif } #if SMP_DEBUG_FRAMEFIXUP if ((0 < OtherPushesSize) && (0 < NewLocalsSize)) msg("Extra pushes found of size %d in %s\n", OtherPushesSize, this->FuncName); #endif return Changed; } // end of SMPFunction::MDFixFrameInfo() // Some functions have difficult to find stack allocations. For example, in some // version of glibc, strpbrk() zeroes out register ECX and then pushes it more than // 100 times in order to allocate zero-ed out local vars space for a character translation // table. We will use the stack pointer analysis of IDA to find out if there is a point // in the first basic block at which the stack pointer reaches the allocation total // that IDA is expecting for the local vars region. // If so, we return the address of the instruction at which ESP reaches its value, else // we return BADADDR. ea_t SMPFunction::FindAllocPoint(asize_t OriginalLocSize) { bool DebugFlag = (0 == strncmp("strpbrk", this->GetFuncName(), 7)); sval_t TargetSize = - ((sval_t) OriginalLocSize); // negate; stack grows down #if SMP_DEBUG_FRAMEFIXUP if (DebugFlag) msg("strpbrk OriginalLocSize: %d\n", OriginalLocSize); #endif if (this->AnalyzedSP) { // Limit our analysis to the first basic block in the function. list<SMPInstr>::iterator CurrInstr; for (CurrInstr = this->Instrs.begin(); CurrInstr != this->Instrs.end(); ++CurrInstr) { ea_t addr = CurrInstr->GetAddr(); // get_spd() returns a cumulative delta of ESP sval_t sp_delta = get_spd(&(this->FuncInfo), addr); #if SMP_DEBUG_FRAMEFIXUP if (DebugFlag) msg("strpbrk delta: %d at %x\n", sp_delta, addr); #endif if (sp_delta == TargetSize) { // Previous instruction hit the frame size. if (CurrInstr == *(this->Blocks.front().GetFirstInstr())) { return BADADDR; // cannot back up from first instruction } else { return (--CurrInstr)->GetAddr(); } } if (CurrInstr->IsLastInBlock()) { // It could be that the current instruction will cause the stack pointer // delta to reach the TargetSize. sp_delta is not updated until after the // current instruction, so we need to look ahead one instruction if the // current block falls through. On the other hand, if the current block // ends with a jump or return, we cannot hit TargetSize. if (CurrInstr->IsBasicBlockTerminator()) return BADADDR; list<SMPInstr>::iterator NextInstr = CurrInstr; ++NextInstr; if (NextInstr == this->Instrs.end()) return BADADDR; sp_delta = get_spd(&(this->FuncInfo), NextInstr->GetAddr()); if (sp_delta == TargetSize) { // CurrInstr will cause stack pointer delta to hit TargetSize. return addr; } else { return BADADDR; } } // end if LastInBlock } // end for all instructions } // end if (this->AnalyzedSP) #if SMP_DEBUG_FRAMEFIXUP else { msg("AnalyzedSP is false for %s\n", this->GetFuncName()); } #endif return BADADDR; } // end of SMPFunction::FindAllocPoint() // IDA Pro is sometimes confused by a function that uses the frame pointer // register for other purposes. For the x86, a function that uses EBP // as a frame pointer would begin with: push ebp; mov ebp,esp to save // the old value of EBP and give it a new value as a frame pointer. The // allocation of local variable space would have to come AFTER the move // instruction. A function that begins: push ebp; push esi; sub esp,24 // is obviously not using EBP as a frame pointer. IDA is apparently // confused by the push ebp instruction being the first instruction // in the function. We will reset UseFP to false in this case. // The inverse problem happens with a function that begins with instructions // other than push ebp; mov ebp,esp; ... etc. but eventually has those // instructions in the first basic block. For example, a C compiler generates // for the first block of main(): // lea ecx,[esp+arg0] // and esp, 0xfffffff0 // push dword ptr [ecx-4] // push ebp // mov ebp,esp // push ecx // sub esp,<framesize> // // This function is obviously using EBP as a frame pointer, but IDA Pro marks // the function as not using a frame pointer. We will reset UseFP to true in // this case. // NOTE: This logic should work for both Linux and Windows x86 prologues. bool SMPFunction::MDFixUseFP(void) { list<SMPInstr>::iterator CurrInstr = this->Instrs.begin(); ea_t addr = CurrInstr->GetAddr(); if (!(this->UseFP)) { // See if we can detect the instruction "push ebp" followed by the instruction // "mov ebp,esp" in the first basic block. The instructions do not have to be // consecutive. If we find them, we will reset UseFP to true. bool FirstBlockProcessed = false; bool EBPSaved = false; bool ESPintoEBP = false; do { FirstBlockProcessed = CurrInstr->IsLastInBlock(); if (!EBPSaved) { // still looking for "push ebp" if (CurrInstr->MDIsPushInstr() && CurrInstr->GetCmd().Operands[0].is_reg(R_bp)) { EBPSaved = true; } } else if (!ESPintoEBP) { // found "push ebp", looking for "mov ebp,esp" insn_t CurrCmd = CurrInstr->GetCmd(); if ((CurrCmd.itype == NN_mov) && (CurrInstr->GetDef(0).GetOp().is_reg(R_bp)) && (CurrInstr->GetUse(0).GetOp().is_reg(R_sp))) { ESPintoEBP = true; FirstBlockProcessed = true; // exit loop } } ++CurrInstr; addr = CurrInstr->GetAddr(); // We must get EBP set to its frame pointer value before we reach the // local frame allocation instruction (i.e. the subtraction of locals space // from the stack pointer). FirstBlockProcessed |= (addr >= this->LocalVarsAllocInstr); } while (!FirstBlockProcessed); // If we found ESPintoEBP, we also found EBPSaved first, and we need to change // this->UseFP to true and return true. Otherwise, return false. this->UseFP = ESPintoEBP; return ESPintoEBP; } // end if (!(this->UseFP)) // At this point, this->UseFP must have been true on entry to this method and we will // check whether it should be reset to false. while (addr < this->LocalVarsAllocInstr) { size_t DefIndex = 0; while (DefIndex < CurrInstr->NumDefs()) { if (CurrInstr->GetDef(DefIndex).GetOp().is_reg(R_bp)) return false; // EBP got set before locals were allocated ++DefIndex; } ++CurrInstr; addr = CurrInstr->GetAddr(); } // If we found no defs of the frame pointer before the local vars // allocation, then the frame pointer register is not being used // as a frame pointer, just as a general callee-saved register. this->UseFP = false; return true; } // end of SMPFunction::MDFixUseFP() // Determine local variable boundaries in the stack frame. void SMPFunction::BuildLocalVarTable(void) { // Currently we just use the info that IDA Pro has inferred from the direct // addressing of stack locations. this->SemiNaiveLocalVarID(); return; } // end of SMPFunction::BuildLocalVarTable() // Use the local variable offset list from IDA's stack frame structure to compute // the table of local variable boundaries. void SMPFunction::SemiNaiveLocalVarID(void) { // NOTE: We use IDA Pro's offsets from this->FuncInfo (e.g. frsize) and NOT // our own corrected values in our private data members. The offsets we // read from the stack frame structure returned by get_frame() are consistent // with other IDA Pro values, not with our corrected values. bool DebugFlag = false; #if SMP_DEBUG_STACK_GRANULARITY DebugFlag |= (0 == strcmp("qSort3", this->GetFuncName())); #endif func_t *FuncPtr = get_func(this->FuncInfo.startEA); if (NULL == FuncPtr) { msg("ERROR in SMPFunction::SemiNaiveLocalVarID; no func ptr\n"); } assert(NULL != FuncPtr); struc_t *StackFrame = get_frame(FuncPtr); if (NULL == StackFrame) { msg("WARNING: No stack frame info from get_frame for %s\n", this->GetFuncName()); return; } member_t *Member = StackFrame->members; for (size_t i = 0; i < StackFrame->memqty; ++i, ++Member) { long offset; char MemberName[MAXSTR] = {'\0'}; if (NULL == Member) { msg("NULL stack frame member pointer in %s\n", this->GetFuncName()); break; } get_member_name(Member->id, MemberName, MAXSTR - 1); if (MemberName == NULL) { #if SMP_DEBUG_STACK_GRANULARITY msg("NULL stack frame member in %s\n", this->GetFuncName()); #endif continue; } offset = Member->soff; if (MemberName[0] == ' ') { #if SMP_DEBUG_STACK_GRANULARITY msg("NULL stack frame name at offset %d in %s\n", offset, this->GetFuncName()); #endif MemberName[1] = '\0'; } if (DebugFlag) { msg("%s local var %s at offset %d\n", this->GetFuncName(), MemberName, offset); } if (offset >= (long) this->LocalVarsSize) break; // Stop after processing locals and outgoing args #if 0 // We want the offset from the stack pointer after local frame allocation. // This subtraction would make it relative to the original stack pointer. offset -= this->FuncInfo.frsize; #endif struct LocalVar TempLocal; TempLocal.offset = offset; TempLocal.size = -1; // compute later qstrncpy(TempLocal.VarName, MemberName, MAXSTR - 1); this->LocalVarTable.push_back(TempLocal); } // end for all stack frame members if (this->LocalVarTable.empty()) return; #if SMP_DEBUG_STACK_GRANULARITY msg("Computing %d local var sizes\n", this->LocalVarTable.size()); #endif // Now we want to fill in the size field for each local for (size_t VarIndex = 0; VarIndex < (this->LocalVarTable.size() - 1); ++VarIndex) { this->LocalVarTable[VarIndex].size = this->LocalVarTable[VarIndex + 1].offset - this->LocalVarTable[VarIndex].offset; } #if SMP_DEBUG_STACK_GRANULARITY msg("Computing last local var size for frsize %d\n", this->FuncInfo.frsize); #endif // Size of last local is total frsize minus savedregs in frame minus offset of last local if (this->LocalVarTable.size() > 0) { size_t SavedRegsSpace = 0; // portion of frsize that is saved regs, not locals. if (this->CalleeSavedRegsSize > this->FuncInfo.frregs) { // IDA Pro counts the save of EBP in frregs, but then EBP gets its new // value and callee saved regs other than the old EBP push get counted // in frsize rather than frregs. CalleeSavedRegsSize includes all saved // regs on the stack, both above and below the current EBP offset. // NOTE: For windows, this has to be done differently, as callee saved regs // happen at the bottom of the local frame, not the top. #if 0 SavedRegsSpace = this->CalleeSavedRegsSize - this->FuncInfo.frregs; #else SavedRegsSpace = this->FuncInfo.frsize - this->LocalVarsSize; #endif } this->LocalVarTable[this->LocalVarTable.size() - 1].size = this->FuncInfo.frsize - SavedRegsSpace - this->LocalVarTable[this->LocalVarTable.size() - 1].offset; } this->LocalVarOffsetLimit = this->LocalVarTable.back().offset + (adiff_t) this->LocalVarTable.back().size; assert(this->LocalVarOffsetLimit <= (adiff_t) this->FuncInfo.frsize); // Find out how many of the locals are really outgoing args. if (this->AnalyzedSP && !this->CallsAlloca && (BADADDR != this->LocalVarsAllocInstr)) { this->FindOutgoingArgsSize(); } else { msg("FindOutgoingArgsSize not called for %s ", this->GetFuncName()); msg("AnalyzedSP: %d CallsAlloca: %d LocalVarsAllocInstr: %x \n", this->AnalyzedSP, this->CallsAlloca, this->LocalVarsAllocInstr); } return; } // end of SMPFunction::SemiNaiveLocalVarID() // Determine how many bytes at the bottom of the stack frame (i.e. at bottom of // this->LocalVarsSize) are used for outgoing args. This is the case when the cdecl // calling convention is used, e.g. gcc/linux allocates local var space + out args space // in a single allocation and then writes outarg values directly to ESP+0, ESP+4, etc. void SMPFunction::FindOutgoingArgsSize(void) { // Compute the lowest value reached by the stack pointer. list<SMPInstr>::iterator CurrInst; this->MinStackDelta = 20000; // Final value should be negative bool DebugFlag = false; #if SMP_DEBUG_STACK_GRANULARITY DebugFlag = (0 == strcmp("simtest", this->GetFuncName())); #endif for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) { ea_t addr = CurrInst->GetAddr(); sval_t sp_delta = get_spd(&(this->FuncInfo), addr); if (sp_delta < this->MinStackDelta) this->MinStackDelta = sp_delta; if (addr == this->LocalVarsAllocInstr) { // Total stack pointer delta is sp_delta for the next instruction, // because IDA updates the sp delta AFTER each instruction. list<SMPInstr>::iterator NextInst = CurrInst; ++NextInst; sp_delta = get_spd(&(this->FuncInfo), NextInst->GetAddr()); this->AllocPointDelta = sp_delta; } } #if SMP_DEBUG_STACK_GRANULARITY msg("AllocPointDelta: %d MinStackDelta: %d\n", this->AllocPointDelta, this->MinStackDelta); #endif assert(0 > this->MinStackDelta); // Allocate a vector of stack frame entries, one for each byte of the stack frame. // This will be our memory map for analyzing stack usage. int limit = 0; #if 1 if (this->LocalVarOffsetLimit > 0) limit = this->LocalVarOffsetLimit; #endif for (int i = this->MinStackDelta; i < limit; ++i) { struct StackFrameEntry TempEntry; TempEntry.VarPtr = NULL; TempEntry.offset = (long) i; TempEntry.Read = false; TempEntry.Written = false; TempEntry.AddressTaken = false; TempEntry.ESPRelativeAccess = false; TempEntry.EBPRelativeAccess = false; this->StackFrameMap.push_back(TempEntry); } // Fill in the VarPtr fields for each StackFrameMap entry. assert(0 > this->AllocPointDelta); for (size_t i = 0; i < this->LocalVarTable.size(); ++i) { assert(this->LocalVarTable.at(i).offset >= 0); // Picture that AllocPointDelta is -200, MinStackDelta is -210, and // the LocalVarTable[i].offset is +8 (i.e. 8 bytes above alloc point). // Then base = 8 + (-200 - -210) = 8 + 10 = 18, the proper offset into // the StackFrameMap. size_t base = (size_t) (this->LocalVarTable.at(i).offset + (this->AllocPointDelta - this->MinStackDelta)); size_t limit = base + this->LocalVarTable.at(i).size; if (limit > this->StackFrameMap.size()) { msg("ERROR: base = %d limit = %d StackFrameMap size = %d\n", base, limit, this->StackFrameMap.size()); } assert(limit <= this->StackFrameMap.size()); for (size_t MapIndex = base; MapIndex < limit; ++MapIndex) { this->StackFrameMap[MapIndex].VarPtr = &(this->LocalVarTable.at(i)); } } // Iterate through all instructions and record stack frame accesses in the StackFrameMap. for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) { sval_t sp_delta = get_spd(&(this->FuncInfo), CurrInst->GetAddr()); if (0 < sp_delta) { // Stack underflow; about to assert msg("Stack underflow at %x %s sp_delta: %d\n", CurrInst->GetAddr(), CurrInst->GetDisasm(), sp_delta); } assert(0 >= sp_delta); ea_t offset; size_t DataSize; bool UsedFramePointer; if (CurrInst->HasDestMemoryOperand()) { for (size_t DefIndex = 0; DefIndex < CurrInst->NumDefs(); ++DefIndex) { op_t TempOp = CurrInst->GetDef(DefIndex).GetOp(); if (TempOp.type != o_phrase && TempOp.type != o_displ) continue; if (this->MDGetStackOffsetAndSize(TempOp, sp_delta, offset, DataSize, UsedFramePointer)) { assert(0 <= offset); if (offset >= this->FuncInfo.frsize) continue; // limit processing to outgoing args and locals if ((offset + DataSize) > this->StackFrameMap.size()) { msg("ERROR: offset = %d DataSize = %d FrameMapSize = %d\n", offset, DataSize, this->StackFrameMap.size()); } assert((offset + DataSize) <= this->StackFrameMap.size()); for (int j = 0; j < (int) DataSize; ++j) { this->StackFrameMap[offset + j].Written = true; if (!UsedFramePointer) this->StackFrameMap[offset + j].ESPRelativeAccess = true; else this->StackFrameMap[offset + j].EBPRelativeAccess = true; } } } // end for all DEFs } // end if DestMemoryOperand if (CurrInst->HasSourceMemoryOperand()) { for (size_t UseIndex = 0; UseIndex < CurrInst->NumUses(); ++UseIndex) { op_t TempOp = CurrInst->GetUse(UseIndex).GetOp(); if (TempOp.type != o_phrase && TempOp.type != o_displ) continue; if (this->MDGetStackOffsetAndSize(TempOp, sp_delta, offset, DataSize, UsedFramePointer)) { assert(0 <= offset); if (offset >= this->FuncInfo.frsize) continue; // limit processing to outgoing args and locals if ((offset + DataSize) > this->StackFrameMap.size()) { msg("ERROR: offset = %d DataSize = %d FrameMapSize = %d\n", offset, DataSize, this->StackFrameMap.size()); } assert((offset + DataSize) <= this->StackFrameMap.size()); for (int j = 0; j < (int) DataSize; ++j) { this->StackFrameMap[offset + j].Read = true; if (!UsedFramePointer) this->StackFrameMap[offset + j].ESPRelativeAccess = true; else this->StackFrameMap[offset + j].EBPRelativeAccess = true; } } } } // end if SourceMemoryOperand // NOTE: Detect taking the address of stack locations. **!!** } // end for all instructions // If function is a leaf function, set OutgoingArgsSize to zero and return. if (this->IsLeaf()) { this->OutgoingArgsSize = 0; return; } // For non-leaf functions, set the OutgoingArgsSize to the write-only, ESP-relative // region of the bottom of the StackFrameMap. for (size_t MapIndex = 0; MapIndex < this->StackFrameMap.size(); ++MapIndex) { // Some of the bottom of the stack frame might be below the local frame allocation. // These are pushes that happened after allocation, etc. We skip over these // locations and define the outgoing args region to start strictly at the bottom // of the local frame allocation. struct StackFrameEntry TempEntry = this->StackFrameMap.at(MapIndex); if (DebugFlag) { msg("StackFrameMap entry %d: offset: %d Read: %d Written: %d ESP: %d EBP: %d\n", MapIndex, TempEntry.offset, TempEntry.Read, TempEntry.Written, TempEntry.ESPRelativeAccess, TempEntry.EBPRelativeAccess); } if (TempEntry.offset < this->AllocPointDelta) continue; if (TempEntry.Read || TempEntry.EBPRelativeAccess || !TempEntry.Written || !TempEntry.ESPRelativeAccess) break; this->OutgoingArgsSize++; } // Sometimes we encounter unused stack space above the outgoing args. Lump this space // in with the outgoing args. We detect this by noting when the outgoing args space // has only partially used the space assigned to a local var. if ((0 < this->OutgoingArgsSize) && (this->OutgoingArgsSize < this->FuncInfo.frsize)) { long MapIndex = (this->AllocPointDelta - this->MinStackDelta); assert(0 <= MapIndex); MapIndex += (((long) this->OutgoingArgsSize) - 1); struct StackFrameEntry TempEntry = this->StackFrameMap.at((size_t) MapIndex); if (this->OutgoingArgsSize < (TempEntry.VarPtr->offset + TempEntry.VarPtr->size)) { msg("OutGoingArgsSize = %d", this->OutgoingArgsSize); this->OutgoingArgsSize = TempEntry.VarPtr->offset + TempEntry.VarPtr->size; msg(" adjusted to %d\n", this->OutgoingArgsSize); } } return; } // end of SMPFunction::FindOutgoingArgsSize() // If TempOp reads or writes to a stack location, return the offset (relative to the initial // stack pointer value) and the size in bytes of the data access. // sp_delta is the stack pointer delta of the current instruction, relative to the initial // stack pointer value for the function. // Return true if a stack memory access was found in TempOp, false otherwise. bool SMPFunction::MDGetStackOffsetAndSize(op_t TempOp, sval_t sp_delta, ea_t &offset, size_t &DataSize, bool &FP) { ushort BaseReg; ushort IndexReg; if (TempOp.type == o_displ) { offset = TempOp.addr; } else { // o_phrase offset = 0; // implicit zero, as in [esp] ==> [esp+0] } if (TempOp.hasSIB) { BaseReg = sib_base(TempOp); IndexReg = sib_index(TempOp); } else { // no SIB BaseReg = TempOp.reg; IndexReg = R_none; } if ((BaseReg == R_sp) || (IndexReg == R_sp)) { // ESP-relative constant offset offset += sp_delta; // base offsets from entry ESP value offset -= this->MinStackDelta; // convert to StackFrameMap index // Get size of data written DataSize = GetOpDataSize(TempOp); FP = false; return true; } else if (this->UseFP && ((BaseReg == R_bp) || (IndexReg == R_bp))) { offset -= this->FuncInfo.frregs; // base offsets from entry ESP value offset -= this->MinStackDelta; // convert to StackFrameMap index DataSize = GetOpDataSize(TempOp); FP = true; return true; } else { return false; } } // end of SMPFunction::MDGetStackOffsetAndSize() // Find evidence of calls to alloca(), which appear as stack space allocations (i.e. // subtractions from the stack pointer) AFTER the local frame allocation instruction // for this function. // Return true if such an allocation is found and false otherwise. bool SMPFunction::FindAlloca(void) { list<SMPInstr>::iterator CurrInst; for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) { if ((CurrInst->GetAddr() > this->LocalVarsAllocInstr) && CurrInst->MDIsFrameAllocInstr()) { return true; } } return false; } // end of SMPFunction::FindAlloca() // Emit the annotations describing the regions of the stack frame. void SMPFunction::EmitStackFrameAnnotations(FILE *AnnotFile, list<SMPInstr>::iterator Instr) { ea_t addr = Instr->GetAddr(); #if 0 if (0 < IncomingArgsSize) { qfprintf(AnnotFile, "%10x %6d INARGS STACK esp + %d %s \n", addr, IncomingArgsSize, (LocalVarsSize + CalleeSavedRegsSize + RetAddrSize), Instr->GetDisasm()); } #endif if (0 < RetAddrSize) { qfprintf(AnnotFile, "%10x %6d MEMORYHOLE STACK esp + %d ReturnAddress \n", addr, RetAddrSize, (LocalVarsSize + CalleeSavedRegsSize)); } if (0 < CalleeSavedRegsSize) { qfprintf(AnnotFile, "%10x %6d MEMORYHOLE STACK esp + %d CalleeSavedRegs \n", addr, CalleeSavedRegsSize, LocalVarsSize); } if (0 < LocalVarsSize) { unsigned long ParentReferentID = DataReferentID++; qfprintf(AnnotFile, "%10x %6d DATAREF STACK %d esp + %d PARENT LocalFrame LOCALFRAME\n", addr, LocalVarsSize, ParentReferentID, 0); #if SMP_COMPUTE_STACK_GRANULARITY if (this->AnalyzedSP && !this->CallsAlloca && (BADADDR != this->LocalVarsAllocInstr)) { // We can only fine-grain the stack frame if we were able to analyze the stack if (this->OutgoingArgsSize > 0) { qfprintf(AnnotFile, "%10x %6d DATAREF STACK %d esp + %d CHILDOF %d OFFSET %d OutArgsRegion OUTARGS\n", addr, this->OutgoingArgsSize, DataReferentID, 0, ParentReferentID, 0); ++DataReferentID; } #if SMP_DEBUG_STACK_GRANULARITY msg("LocalVarTable of size %d for function %s\n", this->LocalVarTable.size(), this->GetFuncName()); #endif for (size_t i = 0; i < this->LocalVarTable.size(); ++i) { #if SMP_DEBUG_STACK_GRANULARITY msg("Entry %d offset %d size %d name %s\n", i, this->LocalVarTable[i].offset, this->LocalVarTable[i].size, this->LocalVarTable[i].VarName); #endif // Don't emit annotations for incoming or outgoing args or anything else // above or below the current local frame. if ((this->LocalVarTable[i].offset >= (long) this->FuncInfo.frsize) || (this->LocalVarTable[i].offset < (long) this->OutgoingArgsSize)) continue; qfprintf(AnnotFile, "%10x %6d DATAREF STACK %d esp + %d CHILDOF %d OFFSET %d LOCALVAR %s \n", addr, this->LocalVarTable[i].size, DataReferentID, this->LocalVarTable[i].offset, ParentReferentID, this->LocalVarTable[i].offset, this->LocalVarTable[i].VarName); ++DataReferentID; } } // end if (this->AnalyzedSP and not Alloca .... ) #endif } // end if (0 < LocalVarsSize) return; } // end of SMPFunction::EmitStackFrameAnnotations() // Main data flow analysis driver. Goes through the function and // fills all objects for instructions, basic blocks, and the function // itself. void SMPFunction::Analyze(void) { list<SMPInstr>::iterator FirstInBlock = this->Instrs.end(); // For starting a basic block list<SMPInstr>::iterator LastInBlock = this->Instrs.end(); // Terminating a basic block #if SMP_DEBUG_CONTROLFLOW msg("Entering SMPFunction::Analyze.\n"); #endif // Get some basic info from the FuncInfo structure. this->Size = this->FuncInfo.endEA - this->FuncInfo.startEA; this->UseFP = (0 != (this->FuncInfo.flags & (FUNC_FRAME | FUNC_BOTTOMBP))); this->StaticFunc = (0 != (this->FuncInfo.flags & FUNC_STATIC)); get_func_name(this->FuncInfo.startEA, this->FuncName, sizeof(this->FuncName) - 1); this->BlockCount = 0; this->AnalyzedSP = this->FuncInfo.analyzed_sp(); #if SMP_DEBUG_CONTROLFLOW msg("SMPFunction::Analyze: got basic info.\n"); #endif // Cycle through all chunks that belong to the function. func_tail_iterator_t FuncTail(&(this->FuncInfo)); size_t ChunkCounter = 0; for (bool ChunkOK = FuncTail.main(); ChunkOK; ChunkOK = FuncTail.next()) { const area_t &CurrChunk = FuncTail.chunk(); ++ChunkCounter; if (1 < ChunkCounter) { this->SharedChunks = true; #if SMP_DEBUG_CHUNKS msg("Found tail chunk for %s at %x\n", this->FuncName, CurrChunk.startEA); #endif } // Build the instruction and block lists for the function. for (ea_t addr = CurrChunk.startEA; addr < CurrChunk.endEA; addr = get_item_end(addr)) { flags_t InstrFlags = getFlags(addr); if (isHead(InstrFlags) && isCode(InstrFlags)) { SMPInstr CurrInst = SMPInstr(addr); // Fill in the instruction data members. #if SMP_DEBUG_CONTROLFLOW msg("SMPFunction::Analyze: calling CurrInst::Analyze.\n"); #endif CurrInst.Analyze(); if (SMPBinaryDebug) { msg("Disasm: %s \n", CurrInst.GetDisasm()); } if (this->AnalyzedSP) { // Audit the IDA SP analysis. sval_t sp_delta = get_spd(&(this->FuncInfo), addr); // sp_delta is difference between current value of stack pointer // and value of the stack pointer coming into the function. It // is updated AFTER each instruction. Thus, it should not get back // above zero (e.g. to +4) until after a return instruction. if (sp_delta > 0) { // Stack pointer has underflowed, according to IDA's analysis, // which is probably incorrect. this->AnalyzedSP = false; msg("Resetting AnalyzedSP to false for %s\n", this->GetFuncName()); msg("Underflowing instruction: %s sp_delta: %d\n", CurrInst.GetDisasm(), sp_delta); } } if (CurrInst.GetDataFlowType() == INDIR_CALL) this->IndirectCalls = true; else if (CurrInst.GetDataFlowType() == CALL) { for (size_t i = 0; i < CurrInst.NumUses(); ++i) { optype_t OpType = CurrInst.GetUse(i).GetOp().type; if ((OpType == o_near) || (OpType == o_far)) { ea_t CallTarget = CurrInst.GetUse(i).GetOp().addr; this->DirectCallTargets.push_back(CallTarget); } } } // Before we insert the instruction into the instruction // list, determine if it is a jump target that does not // follow a basic block terminator. This is the special case // of a CASE in a SWITCH that falls through into another // CASE, for example. The first sequence of statements // was not terminated by a C "break;" statement, so it // looks like straight line code, but there is an entry // point at the beginning of the second CASE sequence and // we have to split basic blocks at the entry point. if ((FirstInBlock != this->Instrs.end()) && CurrInst.IsJumpTarget()) { #if SMP_DEBUG_CONTROLFLOW msg("SMPFunction::Analyze: hit special jump target case.\n"); #endif LastInBlock = --(this->Instrs.end()); SMPBasicBlock CurrBlock = SMPBasicBlock(this, FirstInBlock, LastInBlock); CurrBlock.Analyze(); // If not the first chunk in the function, it is a shared // tail chunk. if (ChunkCounter > 1) { CurrBlock.SetShared(); } FirstInBlock = this->Instrs.end(); LastInBlock = this->Instrs.end(); this->Blocks.push_back(CurrBlock); this->BlockCount += 1; } #if SMP_DEBUG_CONTROLFLOW msg("SMPFunction::Analyze: putting CurrInst on list.\n"); #endif // Insert instruction at end of list. this->Instrs.push_back(CurrInst); // Find basic block leaders and terminators. if (FirstInBlock == this->Instrs.end()) { #if SMP_DEBUG_CONTROLFLOW msg("SMPFunction::Analyze: setting FirstInBlock.\n"); #endif FirstInBlock = --(this->Instrs.end()); } if (CurrInst.IsBasicBlockTerminator()) { #if SMP_DEBUG_CONTROLFLOW msg("SMPFunction::Analyze: found block terminator.\n"); #endif LastInBlock = --(this->Instrs.end()); SMPBasicBlock CurrBlock = SMPBasicBlock(this, FirstInBlock, LastInBlock); CurrBlock.Analyze(); // If not the first chunk in the function, it is a shared // tail chunk. if (ChunkCounter > 1) { CurrBlock.SetShared(); } FirstInBlock = this->Instrs.end(); LastInBlock = this->Instrs.end(); this->Blocks.push_back(CurrBlock); this->BlockCount += 1; // Is the instruction a branch to a target outside the function? If // so, this function has shared tail chunks. if (CurrInst.IsBranchToFarChunk()) { this->SharedChunks = true; } } } // end if (isHead(InstrFlags) && isCode(InstrFlags) } // end for (ea_t addr = FuncInfo.startEA; ... ) // Handle the special case in which a function does not terminate // with a return instruction or any other basic block terminator. // Sometimes IDA Pro sees a call to a NORET function and decides // to not include the dead code after it in the function. That // dead code includes the return instruction, so the function no // longer includes a return instruction and terminates with a CALL. if (FirstInBlock != this->Instrs.end()) { LastInBlock = --(this->Instrs.end()); SMPBasicBlock CurrBlock = SMPBasicBlock(this, FirstInBlock, LastInBlock); CurrBlock.Analyze(); // If not the first chunk in the function, it is a shared // tail chunk. if (ChunkCounter > 1) { CurrBlock.SetShared(); } FirstInBlock = this->Instrs.end(); LastInBlock = this->Instrs.end(); this->Blocks.push_back(CurrBlock); this->BlockCount += 1; } } // end for (bool ChunkOK = ...) #if KLUDGE_VFPRINTF_FAMILY if (0 != strstr(this->GetFuncName(), "printf")) { this->SharedChunks = true; msg("Kludging function %s\n", this->GetFuncName()); } #endif // Set up basic block links and map of instructions to blocks. if (!(this->HasSharedChunks())) { bool DumpFlag = false; #if SMP_DEBUG_DATAFLOW DumpFlag |= (0 == strcmp("main", this->GetFuncName())); DumpFlag |= (0 == strcmp("dohanoi", this->GetFuncName())); DumpFlag |= (0 == strcmp("simtest", this->GetFuncName())); #endif this->SetLinks(); #if SMP_COMPUTE_LVA_SSA list<SMPInstr>::iterator CurrInst; bool GoodRTL; for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) { // Build tree RTLs for the instruction. GoodRTL = CurrInst->BuildRTL(); #if SMP_DEBUG_BUILD_RTL if (!GoodRTL) { msg("ERROR: Cannot build RTL at %x for %s\n", CurrInst->GetAddr(), CurrInst->GetDisasm()); } #if 0 else { msg("Built RTL at %x for %s\n", CurrInst->GetAddr(), CurrInst->GetDisasm()); } #endif #endif } this->RPONumberBlocks(); this->LiveVariableAnalysis(); this->ComputeSSA(); if (DumpFlag) this->Dump(); #endif } #if SMP_DEBUG_CONTROLFLOW msg("SMPFunction::Analyze: set stack frame info.\n"); #endif // Figure out the stack frame and related info. this->SetStackFrameInfo(); return; } // end of SMPFunction::Analyze() // Compute SSA form data structures across the function. void SMPFunction::ComputeSSA(void) { #if SMP_DEBUG_DATAFLOW bool DumpFlag = (0 == strcmp("main", this->GetFuncName())); DumpFlag |= (0 == strcmp("dohanoi", this->GetFuncName())); DumpFlag |= (0 == strcmp("_init_proc", this->GetFuncName())); #endif #if 0 if (DumpFlag) this->Dump(); #endif this->ComputeIDoms(); this->ComputeDomFrontiers(); this->ComputeGlobalNames(); this->ComputeBlocksDefinedIn(); this->InsertPhiFunctions(); this->BuildDominatorTree(); this->SSARenumber(); list<SMPBasicBlock>::iterator CurrBlock; for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { CurrBlock->SetLocalNames(); CurrBlock->SSALocalRenumber(); #if 1 CurrBlock->MarkDeadRegs(); #endif } #if SMP_DEBUG_DATAFLOW if (DumpFlag) this->Dump(); #endif return; } // end of SMPFunction::ComputeSSA() // Link basic blocks to their predecessors and successors, and build the map // of instruction addresses to basic blocks. void SMPFunction::SetLinks(void) { list<SMPBasicBlock>::iterator CurrBlock; #if SMP_DEBUG_DATAFLOW msg("SetLinks called for %s\n", this->GetFuncName()); #endif // First, set up the map of instructions to basic blocks. for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { list<list<SMPInstr>::iterator>::iterator CurrInst; for (CurrInst = CurrBlock->GetFirstInstr(); CurrInst != CurrBlock->GetLastInstr(); ++CurrInst) { pair<ea_t, list<SMPBasicBlock>::iterator> MapItem((*CurrInst)->GetAddr(),CurrBlock); InstBlockMap.insert(MapItem); } } #if SMP_DEBUG_DATAFLOW msg("SetLinks finished mapping: %s\n", this->GetFuncName()); #endif // Next, set successors of each basic block, also setting up the predecessors in the // process. for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { list<SMPInstr>::iterator CurrInst = *(--(CurrBlock->GetLastInstr())); // Last instruction in block; set successors bool CallFlag = (CALL == CurrInst->GetDataFlowType()); xrefblk_t CurrXrefs; for (bool ok = CurrXrefs.first_from(CurrInst->GetAddr(), XREF_ALL); ok; ok = CurrXrefs.next_from()) { if ((CurrXrefs.to != 0) && (CurrXrefs.iscode)) { // Found a code target, with its address in CurrXrefs.to if (CallFlag && (CurrXrefs.to != (CurrInst->GetAddr() + CurrInst->GetCmd().size))) { // A call instruction will have two targets: the fall through to the // next instruction, and the called function. We want to link to the // fall-through instruction, but not to the called function. // Some blocks end with a call just because the fall-through instruction // is a jump target from elsewhere. continue; } map<ea_t, list<SMPBasicBlock>::iterator>::iterator MapEntry; MapEntry = this->InstBlockMap.find(CurrXrefs.to); if (MapEntry == this->InstBlockMap.end()) { msg("WARNING: addr %x not found in map for %s\n", CurrXrefs.to, this->GetFuncName()); msg(" Referenced from %s\n", CurrInst->GetDisasm()); } else { list<SMPBasicBlock>::iterator Target = MapEntry->second; // Make target block a successor of current block. CurrBlock->LinkToSucc(Target); // Make current block a predecessor of target block. Target->LinkToPred(CurrBlock); } } } // end for all xrefs } // end for all blocks // If we have any blocks that are all no-ops and have no predecessors, remove those // blocks. They are dead and make the CFG no longer a lattice. Any blocks that have // no predecessors but are not all no-ops should also be removed with a different // log message. CurrBlock = this->Blocks.begin(); ++CurrBlock; // don't delete the top block, no matter what. while (CurrBlock != this->Blocks.end()) { if (CurrBlock->GetFirstPred() == CurrBlock->GetLastPred()) { if (CurrBlock->AllNops()) msg("Removing all nops block at %x\n", CurrBlock->GetFirstAddr()); else msg("Removing block with no predecessors at %x\n", CurrBlock->GetFirstAddr()); // Remove this block from the predecessors list of its successors. list<list<SMPBasicBlock>::iterator>::iterator SuccIter; ea_t TempAddr = CurrBlock->GetFirstAddr(); for (SuccIter = CurrBlock->GetFirstSucc(); SuccIter != CurrBlock->GetLastSucc(); ++SuccIter) { (*SuccIter)->ErasePred(TempAddr); } // Finally, remove the block from the blocks list. CurrBlock = this->Blocks.erase(CurrBlock); this->BlockCount -= 1; } else ++CurrBlock; } return; } // end of SMPFunction::SetLinks() // Number all basic blocks in reverse postorder (RPO) and set RPOBlocks vector to // access them. void SMPFunction::RPONumberBlocks(void) { bool DebugFlag = false; #if SMP_DEBUG_DATAFLOW DebugFlag = (0 == strcmp("__ieee754_pow", this->GetFuncName())); if (DebugFlag) msg("Entered RPONumberBlocks\n"); #endif int CurrNum = 0; list<list<SMPBasicBlock>::iterator> WorkList; // Number the first block with 0. list<SMPBasicBlock>::iterator CurrBlock = this->Blocks.begin(); #if 0 if (this->RPOBlocks.capacity() <= (size_t) this->BlockCount) { msg("Reserving %d RPOBlocks old value: %d\n", 2+this->BlockCount, this->RPOBlocks.capacity()); this->RPOBlocks.reserve(2 + this->BlockCount); this->RPOBlocks.assign(2 + this->BlockCount, this->Blocks.end()); } #endif CurrBlock->SetNumber(CurrNum); this->RPOBlocks.push_back(CurrBlock); ++CurrNum; // Push the first block's successors onto the work list. list<list<SMPBasicBlock>::iterator>::iterator CurrSucc = CurrBlock->GetFirstSucc(); while (CurrSucc != CurrBlock->GetLastSucc()) { WorkList.push_back(*CurrSucc); ++CurrSucc; } // Use the WorkList to iterate through all blocks in the function list<list<SMPBasicBlock>::iterator>::iterator CurrListItem = WorkList.begin(); bool change; while (!WorkList.empty()) { change = false; while (CurrListItem != WorkList.end()) { if ((*CurrListItem)->GetNumber() != SMP_BLOCKNUM_UNINIT) { // Duplicates get pushed onto the WorkList because a block // can be the successor of multiple other blocks. If it is // already numbered, it is a duplicate and can be removed // from the list. CurrListItem = WorkList.erase(CurrListItem); change = true; continue; } if ((*CurrListItem)->AllPredecessorsNumbered()) { // Ready to be numbered. (*CurrListItem)->SetNumber(CurrNum); #if 0 msg("Set RPO number %d\n", CurrNum); if (DebugFlag && (7 == CurrNum)) this->Dump(); #endif this->RPOBlocks.push_back(*CurrListItem); ++CurrNum; change = true; // Push its unnumbered successors onto the work list. CurrSucc = (*CurrListItem)->GetFirstSucc(); while (CurrSucc != (*CurrListItem)->GetLastSucc()) { if ((*CurrSucc)->GetNumber() == SMP_BLOCKNUM_UNINIT) WorkList.push_back(*CurrSucc); ++CurrSucc; } CurrListItem = WorkList.erase(CurrListItem); } else { ++CurrListItem; } } // end while (CurrListItem != WorkList.end()) if (change) { // Reset CurrListItem to beginning of work list for next iteration. CurrListItem = WorkList.begin(); } else { // Loops can cause us to not be able to find a WorkList item that has // all predecessors numbered. Take the WorkList item with the lowest address // and number it so we can proceed. CurrListItem = WorkList.begin(); ea_t LowAddr = (*CurrListItem)->GetFirstAddr(); list<list<SMPBasicBlock>::iterator>::iterator SaveItem = CurrListItem; ++CurrListItem; while (CurrListItem != WorkList.end()) { if (LowAddr > (*CurrListItem)->GetFirstAddr()) { SaveItem = CurrListItem; LowAddr = (*CurrListItem)->GetFirstAddr(); } ++CurrListItem; } // SaveItem should now be numbered. (*SaveItem)->SetNumber(CurrNum); #if SMP_DEBUG_DATAFLOW msg("Picked LowAddr %x and set RPO number %d\n", LowAddr, CurrNum); #endif this->RPOBlocks.push_back(*SaveItem); ++CurrNum; // Push its unnumbered successors onto the work list. CurrSucc = (*SaveItem)->GetFirstSucc(); while (CurrSucc != (*SaveItem)->GetLastSucc()) { if ((*CurrSucc)->GetNumber() == SMP_BLOCKNUM_UNINIT) WorkList.push_back(*CurrSucc); ++CurrSucc; } CurrListItem = WorkList.erase(SaveItem); CurrListItem = WorkList.begin(); } // end if (change) ... else ... } // end while work list is nonempty return; } // end of SMPFunction::RPONumberBlocks() // Perform live variable analysis on all blocks in the function. // See chapter 9 of Cooper/Torczon, Engineering a Compiler, for the algorithm. void SMPFunction::LiveVariableAnalysis(void) { list<SMPBasicBlock>::iterator CurrBlock; #if SMP_DEBUG_DATAFLOW msg("LiveVariableAnalysis for %s\n", this->GetFuncName()); bool DebugFlag = (0 == strcmp("__ieee754_pow", this->GetFuncName())); #endif for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { // Initialize the Killed and UpwardExposed sets for each block. CurrBlock->InitKilledExposed(); } bool changed; // Iterate over each block, updating LiveOut sets until no more changes are made. // NOTE: LVA is more efficient when computed over a reverse post-order list of blocks // from the inverted CFG. We have an RPO list from the forward CFG. #if 1 do { changed = false; for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { changed |= CurrBlock->UpdateLiveOut(); } } while (changed); #else // Use reverse postorder do { changed = false; for (size_t index = 0; index < this->RPOBlocks.size(); ++index) { CurrBlock = this->RPOBlocks[index]; changed |= CurrBlock->UpdateLiveOut(); } } while (changed); #endif #if SMP_DEBUG_DATAFLOW if (DebugFlag) msg("Exiting LiveVariableAnalysis\n"); #endif return; } // end of SMPFunction::LiveVariableAnalysis() // Return the IDom index that is the end of the intersection prefix of the Dom sets of // the two blocks designated by the RPO numbers passed in. // See Cooper & Torczon, "Engineering a Compiler" 1st edition figure 9.8. int SMPFunction::IntersectDoms(int block1, int block2) const { int finger1 = block1; int finger2 = block2; while (finger1 != finger2) { while (finger1 > finger2) finger1 = this->IDom.at(finger1); while (finger2 > finger1) finger2 = this->IDom.at(finger2); } return finger1; } // end of SMPFunction::IntersectDoms() // Compute immediate dominators of all blocks into IDom[] vector. void SMPFunction::ComputeIDoms(void) { bool DebugFlag = false; #if SMP_DEBUG_DATAFLOW DebugFlag = (0 == strcmp("__ieee754_pow", this->GetFuncName())); if (DebugFlag) msg("Entered ComputeIDoms\n"); #endif // Initialize the IDom[] vector to uninitialized values for all blocks. this->IDom.reserve(this->BlockCount); this->IDom.assign(this->BlockCount, SMP_BLOCKNUM_UNINIT); if (DebugFlag) msg("BlockCount = %d\n", this->BlockCount); this->IDom[0] = 0; // Start block dominated only by itself bool changed; do { changed = false; for (size_t RPONum = 1; RPONum < (size_t) this->BlockCount; ++RPONum) { if (DebugFlag) msg("RPONum %d\n", RPONum); if (DebugFlag) { msg("RPOBlocks vector size: %d\n", this->RPOBlocks.size()); for (size_t index = 0; index < this->RPOBlocks.size(); ++index) { msg("RPOBlocks entry %d is %d\n", index, RPOBlocks[index]->GetNumber()); } } list<SMPBasicBlock>::iterator CurrBlock = this->RPOBlocks.at(RPONum); // if (DebugFlag) msg("CurrBlock: %x\n", CurrBlock._Ptr); list<list<SMPBasicBlock>::iterator>::iterator CurrPred; // Initialize NewIdom to the first processed predecessor of block RPONum. int NewIdom = SMP_BLOCKNUM_UNINIT; for (CurrPred = CurrBlock->GetFirstPred(); CurrPred != CurrBlock->GetLastPred(); ++CurrPred) { int PredNum = (*CurrPred)->GetNumber(); if (DebugFlag) msg("Pred: %d\n", PredNum); // **!!** See comment below about unreachable blocks. if (SMP_BLOCKNUM_UNINIT == PredNum) continue; int PredIDOM = this->IDom.at(PredNum); if (DebugFlag) msg("Pred IDom: %d\n", PredIDOM); if (SMP_BLOCKNUM_UNINIT != PredIDOM) { NewIdom = PredNum; break; } } if (NewIdom == SMP_BLOCKNUM_UNINIT) msg("Failure on NewIdom in ComputeIDoms for %s\n", this->GetFuncName()); assert(NewIdom != SMP_BLOCKNUM_UNINIT); // Loop through all predecessors of block RPONum except block NewIdom. // Set NewIdom to the intersection of its Dom set and the Doms set of // each predecessor that has had its Doms set computed. for (CurrPred = CurrBlock->GetFirstPred(); CurrPred != CurrBlock->GetLastPred(); ++CurrPred) { int PredNum = (*CurrPred)->GetNumber(); if (DebugFlag) msg("PredNum: %d\n", PredNum); // **!!** We could avoid failure on unreachable basic blocks // by executing a continue statement if PredNum is -1. Long term solution // is to prune out unreachable basic blocks. if (PredNum == SMP_BLOCKNUM_UNINIT) continue; int PredIDOM = this->IDom.at(PredNum); if (DebugFlag) msg("PredIDOM: %d\n", PredIDOM); if ((SMP_BLOCKNUM_UNINIT == PredIDOM) || (NewIdom == PredIDOM)) { // Skip predecessors that have uncomputed Dom sets, or are the // current NewIdom. continue; } if (DebugFlag) msg("Old NewIdom value: %d\n", NewIdom); NewIdom = this->IntersectDoms(PredNum, NewIdom); if (DebugFlag) msg("New NewIdom value: %d\n", NewIdom); } // If NewIdom is not the value currently in vector IDom[], update the // vector entry and set changed to true. if (NewIdom != this->IDom.at(RPONum)) { if (DebugFlag) msg("IDOM changed from %d to %d\n", this->IDom.at(RPONum), NewIdom); this->IDom[RPONum] = NewIdom; changed = true; } } } while (changed); return; } // end of SMPFunction::ComputeIDoms() // Compute dominance frontier sets for each block. void SMPFunction::ComputeDomFrontiers(void) { list<SMPBasicBlock>::iterator CurrBlock; for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { // We look only at join points in the CFG, as per Cooper/Torczon chapter 9. if (1 < CurrBlock->GetNumPreds()) { // join point; more than 1 predecessor int runner; list<list<SMPBasicBlock>::iterator>::iterator CurrPred; for (CurrPred = CurrBlock->GetFirstPred(); CurrPred != CurrBlock->GetLastPred(); ++CurrPred) { // For each predecessor, we run up the IDom[] vector and add CurrBlock to the // DomFrontier for all blocks that are between CurrPred and IDom[CurrBlock], // not including IDom[CurrBlock] itself. runner = (*CurrPred)->GetNumber(); while (runner != this->IDom.at(CurrBlock->GetNumber())) { // Cooper/Harvey/Kennedy paper does not quite agree with the later // text by Cooper/Torczon. Text says that the start node has no IDom // in the example on pages 462-463, but it shows an IDOM for the // root node in Figure 9.9 of value == itself. The first edition text // on p.463 seems correct, as the start node dominates every node and // thus should have no dominance frontier. if (SMP_TOP_BLOCK == runner) break; (*CurrPred)->AddToDomFrontier(CurrBlock->GetNumber()); runner = this->IDom.at(runner); } } // end for all predecessors } // end if join point } // end for all blocks return; } // end of SMPFunction::ComputeDomFrontiers() // Compute the GlobalNames set, which includes all operands that are used in more than // one basic block. It is the union of all UpExposedSets of all blocks. void SMPFunction::ComputeGlobalNames(void) { set<op_t, LessOp>::iterator SetIter; list<SMPBasicBlock>::iterator CurrBlock; unsigned int index = 0; if (this->Blocks.size() < 2) return; // cannot have global names if there is only one block bool DebugFlag = false; #if SMP_DEBUG_DATAFLOW DebugFlag = (0 == strcmp("__ieee754_pow", this->GetFuncName())); #endif for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { for (SetIter = CurrBlock->GetFirstUpExposed(); SetIter != CurrBlock->GetLastUpExposed(); ++SetIter) { op_t TempOp = *SetIter; // The GlobalNames set will have the complete collection of operands that we are // going to number in our SSA computations. We now assign an operand number // within the op_t structure for each, so that we can index into the // BlocksUsedIn[] vector, for example. This operand number is not to be // confused with SSA numbers. // We use the operand number field op_t.n for the lower 8 bits, and the offset // fields op_t.offb:op_t.offo for the upper 16 bits. We are overwriting IDA // values here, but operands in the data flow analysis sets should never be // inserted back into the program anyway. SetGlobalIndex(&TempOp, index); #if SMP_DEBUG_DATAFLOW msg("Global Name: "); PrintListOperand(TempOp); #endif set<op_t, LessOp>::iterator AlreadyInSet; pair<set<op_t, LessOp>::iterator, bool> InsertResult; InsertResult = this->GlobalNames.insert(TempOp); if (!InsertResult.second) { // Already in GlobalNames, so don't assign an index number. ; #if SMP_DEBUG_DATAFLOW msg(" already in GlobalNames.\n"); #endif } else { ++index; #if SMP_DEBUG_DATAFLOW msg(" inserted as index %d\n", ExtractGlobalIndex(TempOp)); #endif } } // for each upward exposed item in the current block } // for each basic block assert(16777215 >= this->GlobalNames.size()); // index fits in 24 bits return; } // end of SMPFunction::ComputeGlobalNames() // For each item in GlobalNames, record the blocks that DEF the item. void SMPFunction::ComputeBlocksDefinedIn(void) { // Loop through all basic blocks and examine all DEFs. For Global DEFs, record // the block number in BlocksDefinedIn. The VarKillSet records DEFs without // having to examine every instruction. list<SMPBasicBlock>::iterator CurrBlock; this->BlocksDefinedIn.clear(); for (size_t i = 0; i < this->GlobalNames.size(); ++i) { list<int> TempList; this->BlocksDefinedIn.push_back(TempList); } #if SMP_DEBUG_DATAFLOW msg("Number of GlobalNames: %d\n", this->GlobalNames.size()); msg("Size of BlocksDefinedIn: %d\n", this->BlocksDefinedIn.size()); #endif for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { set<op_t, LessOp>::iterator KillIter; for (KillIter = CurrBlock->GetFirstVarKill(); KillIter != CurrBlock->GetLastVarKill(); ++KillIter) { // If killed item is not a block-local item (it is global), record it. set<op_t, LessOp>::iterator NameIter = this->GlobalNames.find(*KillIter); if (NameIter != this->GlobalNames.end()) { // found in GlobalNames set // We have a kill of a global name. Get index from three 8-bit fields. unsigned int index = ExtractGlobalIndex(*NameIter); if (index >= this->GlobalNames.size()) { // We are about to assert false. msg("ComputeBlocksDefinedIn: Bad index: %d limit: %d\n", index, this->GlobalNames.size()); msg("Block number %d\n", CurrBlock->GetNumber()); msg("Killed item: "); PrintListOperand(*KillIter); msg("\n"); msg("This is a fatal error.\n"); } assert(index < this->GlobalNames.size()); // index is a valid subscript for the BlocksDefinedIn vector. Push the // current block number onto the list of blocks that define this global name. this->BlocksDefinedIn[index].push_back(CurrBlock->GetNumber()); } } } return; } // end of SMPFunction::ComputeBlocksDefinedIn() // Compute the phi functions at the entry point of each basic block that is a join point. void SMPFunction::InsertPhiFunctions(void) { set<op_t, LessOp>::iterator NameIter; list<int> WorkList; // list of block numbers bool DebugFlag = false; #if SMP_DEBUG_DATAFLOW DebugFlag = (0 == strcmp("__ieee754_pow", this->GetFuncName())); #endif if (DebugFlag) msg("GlobalNames size: %d\n", this->GlobalNames.size()); for (NameIter = this->GlobalNames.begin(); NameIter != this->GlobalNames.end(); ++NameIter) { int CurrNameIndex = (int) (ExtractGlobalIndex(*NameIter)); if (DebugFlag) msg("CurrNameIndex: %d\n", CurrNameIndex); #if 0 DebugFlag = (DebugFlag && (6 == CurrNameIndex)); #endif // Initialize the work list to all blocks that define the current name. WorkList.clear(); list<int>::iterator WorkIter; for (WorkIter = this->BlocksDefinedIn.at((size_t) CurrNameIndex).begin(); WorkIter != this->BlocksDefinedIn.at((size_t) CurrNameIndex).end(); ++WorkIter) { WorkList.push_back(*WorkIter); } // Iterate through the work list, inserting phi functions for the current name // into all the blocks in the dominance frontier of each work list block. // Insert into the work list each block that had a phi function added. while (!WorkList.empty()) { #if SMP_DEBUG_DATAFLOW msg("WorkList size: %d\n", WorkList.size()); #endif list<int>::iterator WorkIter = WorkList.begin(); while (WorkIter != WorkList.end()) { set<int>::iterator DomFrontIter; #if SMP_DEBUG_DATAFLOW msg("WorkIter: %d\n", *WorkIter); #endif if (DebugFlag && (*WorkIter > this->BlockCount)) { msg("ERROR: WorkList block # %d out of range.\n", *WorkIter); } list<SMPBasicBlock>::iterator WorkBlock = this->RPOBlocks[*WorkIter]; for (DomFrontIter = WorkBlock->GetFirstDomFrontier(); DomFrontIter != WorkBlock->GetLastDomFrontier(); ++DomFrontIter) { #if SMP_DEBUG_DATAFLOW msg("DomFront: %d\n", *DomFrontIter); #endif if (DebugFlag && (*DomFrontIter > this->BlockCount)) { msg("ERROR: DomFront block # %d out of range.\n", *DomFrontIter); } list<SMPBasicBlock>::iterator PhiBlock = this->RPOBlocks[*DomFrontIter]; // Before inserting a phi function for the current name in *PhiBlock, // see if the current name is LiveIn for *PhiBlock. If not, there // is no need for the phi function. This check is what makes the SSA // a fully pruned SSA. if (PhiBlock->IsLiveIn(*NameIter)) { size_t NumPreds = PhiBlock->GetNumPreds(); DefOrUse CurrRef(*NameIter); SMPPhiFunction CurrPhi(CurrNameIndex, CurrRef); for (size_t NumCopies = 0; NumCopies < NumPreds; ++NumCopies) { CurrPhi.PushBack(CurrRef); // inputs to phi } if (PhiBlock->AddPhi(CurrPhi)) { // If not already in Phi set, new phi function was inserted. WorkList.push_back(PhiBlock->GetNumber()); #if SMP_DEBUG_DATAFLOW msg("Added phi for name %d at top of block %d\n", CurrNameIndex, PhiBlock->GetNumber()); #endif } } else { if (DebugFlag) { msg("Global %d not LiveIn for block %d\n", CurrNameIndex, PhiBlock->GetNumber()); } } } // end for all blocks in the dominance frontier // Remove current block number from the work list if (DebugFlag) { msg("Removing block %d from work list.\n", *WorkIter); } WorkIter = WorkList.erase(WorkIter); } // end for all block numbers in the work list } // end while the work list is not empty if (DebugFlag) msg("WorkList empty.\n"); } // end for all elements of the GlobalNames set return; } // end of SMPFunction::InsertPhiFunctions() // Build the dominator tree. void SMPFunction::BuildDominatorTree(void) { size_t index; // First, fill the DomTree vector with the parent numbers filled in and the child lists // left empty. for (index = 0; index < this->IDom.size(); ++index) { pair<int, list<int> > DomTreeEntry; DomTreeEntry.first = this->IDom.at(index); DomTreeEntry.second.clear(); this->DomTree.push_back(DomTreeEntry); } // Now, push the children onto the appropriate lists. for (index = 0; index < this->IDom.size(); ++index) { // E.g. if block 5 has block 3 as a parent, then we fetch the number 3 // using the expression this->DomTree.at(index).first, which was just // initialized in the previous loop. Then we go to DomTree entry 3 and push // the number 5 on its child list. int parent = this->DomTree.at(index).first; if (parent != index) // block can dominate itself, but not in DomTree! this->DomTree.at(parent).second.push_back((int) index); } return; } // end of SMPFunction::BuildDominatorTree() // Helper for SSA subscript renumbering: return the next SSA number for the global name // and increment the SSACounter to prepare the next number. Push the returned number onto // the SSAStack for the global name. int SMPFunction::SSANewNumber(size_t GlobNameIndex) { int Subscript = this->SSACounter.at(GlobNameIndex); ++(this->SSACounter[GlobNameIndex]); this->SSAStack[GlobNameIndex].push_back(Subscript); return Subscript; } // end of SMPFunction::SSANewNumber() // Main helper for SSA subscript renumbering. Renumber within block throughout its phi // functions, then its DEFs and USEs, then its phi successors. Recurse then on all // successors in the dominator tree. void SMPFunction::SSARename(int BlockNumber) { assert(0 <= BlockNumber); assert(BlockNumber < this->BlockCount); list<SMPBasicBlock>::iterator CurrBlock = this->RPOBlocks.at((size_t) BlockNumber); bool DumpFlag = false; #if SMP_DEBUG_DATAFLOW DumpFlag |= (0 == strcmp("main", this->GetFuncName())); DumpFlag |= (0 == strcmp("dohanoi", this->GetFuncName())); DumpFlag |= (0 == strcmp("image_to_texture", this->GetFuncName())); #endif if (DumpFlag) msg("Entered SSARename for block number %d\n", BlockNumber); // For each phi function at the top of the block, rename the DEF of the phi function // using SSANewNumber() on the global name index. set<SMPPhiFunction, LessPhi>::iterator CurrPhi; list<SMPPhiFunction> TempPhiList; int GlobalNameIndex; for (CurrPhi = CurrBlock->GetFirstPhi(); CurrPhi != CurrBlock->GetLastPhi(); ++CurrPhi) { GlobalNameIndex = CurrPhi->GetIndex(); assert(0 <= GlobalNameIndex); int NewSSANum = this->SSANewNumber((size_t) GlobalNameIndex); #if 0 // g++ is a pain in the neck and won't allow changes to the set item // through CurrPhi, which it types as a const iterator, so this next line does // not compile in g++. CurrPhi->SetSSADef(NewSSANum); #else SMPPhiFunction TempPhi = (*CurrPhi); TempPhi.SetSSADef(NewSSANum); TempPhiList.push_back(TempPhi); #endif } // Go back through the Phi function set and replace the items that need to be updated. // Thank you g++ for being a pain. list<SMPPhiFunction>::iterator TempIter; for (TempIter = TempPhiList.begin(); TempIter != TempPhiList.end(); ++TempIter) { // Use the op_t from the first phi use, because they are all the same. bool Erased = CurrBlock->ErasePhi(TempIter->GetPhiRef(0).GetOp()); assert(Erased); // Now we can add back the phi function that had the DEF SSA number changed. bool Added = CurrBlock->AddPhi(*TempIter); assert(Added); } TempPhiList.clear(); if (DumpFlag) msg("Processed phi functions at top.\n"); // For each instruction in the block, rename all global USEs and then all global DEFs. list<list<SMPInstr>::iterator>::iterator CurrInst; for (CurrInst = CurrBlock->GetFirstInstr(); CurrInst != CurrBlock->GetLastInstr(); ++CurrInst) { size_t RefIndex; for (RefIndex = 0; RefIndex < (*CurrInst)->NumUses(); ++RefIndex) { DefOrUse CurrUse = (*CurrInst)->GetUse(RefIndex); // See if Use is a global name. set<op_t, LessOp>::iterator GlobIter = this->GlobalNames.find(CurrUse.GetOp()); if (GlobIter != this->GlobalNames.end()) { // found it unsigned int GlobIndex = ExtractGlobalIndex(*GlobIter); if (GlobIndex > this->SSAStack.size()) { // Get some debug info out to the log file before we crash. msg("Bad GlobIndex: %d\n", GlobIndex); msg("Error in function %s\n", this->GetFuncName()); exit(EXIT_FAILURE); } // Set the SSA number for this use to the top of stack SSA # (back()) int NewSSANum; if (this->SSAStack.at(GlobIndex).empty()) { // No top of stack entry to read. #if SMP_DEBUG_DATAFLOW msg("WARNING: function %s : Use of uninitialized variable: ", this->GetFuncName()); msg(" Variable: "); PrintListOperand(*GlobIter); msg(" Block number: %d Address: %x Instruction: %s\n", BlockNumber, (*CurrInst)->GetAddr(), (*CurrInst)->GetDisasm()); #endif NewSSANum = SMP_SSA_UNINIT; } else { NewSSANum = this->SSAStack.at(GlobIndex).back(); } (*CurrInst)->SetUseSSA(RefIndex, NewSSANum); } } // end for all USEs for (RefIndex = 0; RefIndex < (*CurrInst)->NumDefs(); ++RefIndex) { DefOrUse CurrDef = (*CurrInst)->GetDef(RefIndex); // See if Def is a global name. set<op_t, LessOp>::iterator GlobIter = this->GlobalNames.find(CurrDef.GetOp()); if (GlobIter != this->GlobalNames.end()) { // found it unsigned int GlobIndex = ExtractGlobalIndex(*GlobIter); // Set the SSA number for this DEF to the SSANewNumber top of stack (*CurrInst)->SetDefSSA(RefIndex, this->SSANewNumber(GlobIndex)); } } // end for all DEFs } // end for all instructions if (DumpFlag) msg("Processed all instructions.\n"); // For all control flow graph (not dominator tree) successors, fill in the current // (outgoing) SSA number in the corresponding USE slot in the phi function, for all // global names appearing in phi functions. list<list<SMPBasicBlock>::iterator>::iterator SuccIter; for (SuccIter = CurrBlock->GetFirstSucc(); SuccIter != CurrBlock->GetLastSucc(); ++SuccIter) { // What position in the Preds list of this successor is CurrBlock? int ListPos = (*SuccIter)->GetPredPosition(BlockNumber); assert(0 <= ListPos); // Go through all phi functions in this successor. At ListPos position in the // incoming arguments for that phi function, set the SSA number to the SSA number // in the top of stack entry for the global name associated with that phi function. set<SMPPhiFunction, LessPhi>::iterator CurrPhi; for (CurrPhi = (*SuccIter)->GetFirstPhi(); CurrPhi != (*SuccIter)->GetLastPhi(); ++CurrPhi) { int GlobIndex = CurrPhi->GetIndex(); int CurrSSA; if (this->SSAStack.at(GlobIndex).empty()) { // No top of stack entry to read. #if SMP_DEBUG_DATAFLOW msg("WARNING: function %s : Path to use of uninitialized variable: ", this->GetFuncName()); msg(" Variable: "); PrintListOperand(CurrPhi->GetAnyOp()); msg(" Block number: %d Successor block number: %d\n", BlockNumber, (*SuccIter)->GetNumber()); #endif CurrSSA = SMP_SSA_UNINIT; } else { CurrSSA = this->SSAStack.at(GlobIndex).back(); // fetch from top of stack } #if 0 // g++ is a pain in the neck and won't allow changes to the set item // through CurrPhi, which it types as a const iterator, so this next line does // not compile in g++. C++ does not know how to distinguish between changing // the field that ordering is based on, and other fields, so g++ has to be // strict, I guess. CurrPhi->SetSSARef(ListPos, CurrSSA); #else SMPPhiFunction TempPhi = (*CurrPhi); TempPhi.SetSSARef(ListPos, CurrSSA); TempPhiList.push_back(TempPhi); if (DumpFlag && (BlockNumber >= 3) && (BlockNumber <= 4)) { msg("BlockNumber: %d ListPos: %d\n", BlockNumber, ListPos); } #endif } // end for all phi functions in successor // Go back through the Phi function set and replace the items that need to be updated. // Thank you g++ for being a pain. for (TempIter = TempPhiList.begin(); TempIter != TempPhiList.end(); ++TempIter) { if (DumpFlag && (BlockNumber >= 3) && (BlockNumber <= 4)) { msg("Special before phi dump:\n"); set<SMPPhiFunction, LessPhi>::iterator FoundPhi; FoundPhi = (*SuccIter)->FindPhi(TempIter->GetAnyOp()); FoundPhi->Dump(); } // Use the op_t from the first phi use, because they are all the same. bool Erased = (*SuccIter)->ErasePhi(TempIter->GetPhiRef(0).GetOp()); assert(Erased); // Now we can add back the phi function that had one SSA number changed. bool Added = (*SuccIter)->AddPhi(*TempIter); assert(Added); if (DumpFlag && (BlockNumber >= 3) && (BlockNumber <= 4)) { msg("Special after phi dump:\n"); set<SMPPhiFunction, LessPhi>::iterator FoundPhi; FoundPhi = (*SuccIter)->FindPhi(TempIter->GetAnyOp()); FoundPhi->Dump(); } } TempPhiList.clear(); } // end for all successors of CurrBlock if (DumpFlag) msg("Processed successor phi functions.\n"); // For each successor in the dominator tree, recurse. list<int>::iterator ChildIter; for (ChildIter = this->DomTree[BlockNumber].second.begin(); ChildIter != this->DomTree[BlockNumber].second.end(); ++ChildIter) { this->SSARename(*ChildIter); } if (DumpFlag) msg("Finished recursion.\n"); // Pop off all SSAStack entries pushed during this block. I.e. for each global name, // pop its SSAStack once per DEF and once per phi function in this block. for (CurrPhi = CurrBlock->GetFirstPhi(); CurrPhi != CurrBlock->GetLastPhi(); ++CurrPhi) { GlobalNameIndex = CurrPhi->GetIndex(); this->SSAStack.at((size_t) GlobalNameIndex).pop_back(); } if (DumpFlag) msg("Popped off entries due to phi functions.\n"); for (CurrInst = CurrBlock->GetFirstInstr(); CurrInst != CurrBlock->GetLastInstr(); ++CurrInst) { size_t RefIndex; for (RefIndex = 0; RefIndex < (*CurrInst)->NumDefs(); ++RefIndex) { DefOrUse CurrDef = (*CurrInst)->GetDef(RefIndex); // See if Def is a global name. set<op_t, LessOp>::iterator GlobIter = this->GlobalNames.find(CurrDef.GetOp()); if (GlobIter != this->GlobalNames.end()) { // found it unsigned int GlobIndex = ExtractGlobalIndex(*GlobIter); this->SSAStack.at((size_t) GlobIndex).pop_back(); } } // end for all DEFs } // end for all instructions if (DumpFlag) msg("Popped off entries due to instructions.\n"); return; } // end of SMPFunction::SSARename() // Main driver of SSA subscript renumbering. void SMPFunction::SSARenumber(void) { if (0 >= this->GlobalNames.size()) return; // no names to renumber // Initialize stacks and counters of SSA numbers. size_t GlobIndex; for (GlobIndex = 0; GlobIndex < this->GlobalNames.size(); ++GlobIndex) { list<int> DummyList; this->SSACounter.push_back(0); this->SSAStack.push_back(DummyList); } // Recurse through the dominator tree starting with node 0. this->SSARename(0); return; } // Emit all annotations for the function, including all per-instruction // annotations. void SMPFunction::EmitAnnotations(FILE *AnnotFile) { // Emit annotation for the function as a whole. if (this->StaticFunc) { qfprintf(AnnotFile, "%10x %6d FUNC LOCAL %s ", this->FuncInfo.startEA, this->Size, this->FuncName); } else { qfprintf(AnnotFile, "%10x %6d FUNC GLOBAL %s ", this->FuncInfo.startEA, this->Size, this->FuncName); } if (this->UseFP) { qfprintf(AnnotFile, "USEFP "); } else { qfprintf(AnnotFile, "NOFP "); } if (this->FuncInfo.does_return()) { qfprintf(AnnotFile, "\n"); } else { qfprintf(AnnotFile, "NORET \n"); } // Loop through all instructions in the function. // Output optimization annotations for those // instructions that do not require full computation // of their memory metadata by the Memory Monitor SDT. list<SMPInstr>::iterator CurrInst; bool AllocSeen = false; // Reached LocalVarsAllocInstr yet? bool DeallocTrigger = false; for (CurrInst = Instrs.begin(); CurrInst != Instrs.end(); ++CurrInst) { ea_t addr = CurrInst->GetAddr(); if (this->LocalVarsAllocInstr == addr) { AllocSeen = true; this->EmitStackFrameAnnotations(AnnotFile, CurrInst); } // If this is the instruction which deallocated space // for local variables, we set a flag to remind us to // emit an annotation on the next instruction. // mmStrata wants the instruction AFTER the // deallocating instruction, so that it processes // the deallocation after it happens. It inserts // instrumentation before an instruction, not // after, so it will insert the deallocating // instrumentation before the first POP of callee-saved regs, // if there are any, or before the return, otherwise. if (addr == LocalVarsDeallocInstr) { DeallocTrigger = true; } else if (DeallocTrigger) { // Time for annotation qfprintf(AnnotFile, "%10x %6d DEALLOC STACK esp - %d %s\n", addr, LocalVarsSize, LocalVarsSize, CurrInst->GetDisasm()); DeallocTrigger = false; } CurrInst->EmitAnnotations(this->UseFP, AllocSeen, AnnotFile); } // end for (ea_t addr = FuncInfo.startEA; ...) return; } // end of SMPFunction::EmitAnnotations() // Debug output dump. void SMPFunction::Dump(void) { list<SMPBasicBlock>::iterator CurrBlock; msg("Debug dump for function: %s\n", this->GetFuncName()); for (size_t index = 0; index < this->IDom.size(); ++index) { msg("IDOM for %d: %d\n", index, this->IDom.at(index)); } for (size_t index = 0; index < this->DomTree.size(); ++index) { msg("DomTree for %d: ", index); list<int>::iterator DomIter; for (DomIter = this->DomTree.at(index).second.begin(); DomIter != this->DomTree.at(index).second.end(); ++DomIter) { msg("%d ", *DomIter); } msg("\n"); } msg("Global names: \n"); set<op_t, LessOp>::iterator NameIter; for (NameIter = this->GlobalNames.begin(); NameIter != this->GlobalNames.end(); ++NameIter) { msg("index: %d ", ExtractGlobalIndex(*NameIter)); PrintListOperand(*NameIter); msg("\n"); } msg("Blocks each name is defined in: \n"); for (size_t index = 0; index < this->BlocksDefinedIn.size(); ++index) { msg("Name index: %d Blocks: ", index); list<int>::iterator BlockIter; for (BlockIter = this->BlocksDefinedIn.at(index).begin(); BlockIter != this->BlocksDefinedIn.at(index).end(); ++BlockIter) { msg("%d ", *BlockIter); } msg("\n"); } for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { // Dump out the function number and data flow sets before the instructions. CurrBlock->Dump(); } return; } // end of SMPFunction::Dump()