Newer
Older
//
// SMPFunction.cpp
//
// This module performs the fundamental data flow analyses needed for the
// SMP project (Software Memory Protection) at the function level.
//
#include <utility>
#include <list>
#include <set>
#include <vector>
#include <algorithm>
#include <cstring>
#include <cstdlib>
#include <pro.h>
#include <assert.h>
#include <ida.hpp>
#include <idp.hpp>
#include <auto.hpp>
#include <bytes.hpp>
#include <funcs.hpp>
#include <allins.hpp>
#include <intel.hpp>
#include <name.hpp>
#include "SMPDataFlowAnalysis.h"
#include "SMPStaticAnalyzer.h"
#include "SMPFunction.h"
#include "SMPBasicBlock.h"
#include "SMPInstr.h"
// Set to 1 for debugging output
#define SMP_DEBUG 1
#define SMP_DEBUG2 0 // verbose
#define SMP_DEBUG3 0 // verbose
#define SMP_DEBUG_CONTROLFLOW 0 // tells what processing stage is entered
#define SMP_DEBUG_XOR 0
#define SMP_DEBUG_CHUNKS 1 // tracking down tail chunks for functions
#define SMP_DEBUG_FRAMEFIXUP 0
#define SMP_DEBUG_DATAFLOW 0
#define SMP_DEBUG_TYPE_INFERENCE 0
#define SMP_DEBUG_STACK_GRANULARITY 0
#define SMP_DEBUG_BUILD_RTL 1 // leave this on; serious errors reported
#define SMP_DEBUG_UNINITIALIZED_SSA_NAMES 1
// Compute LVA/SSA or not? Turn it off for NICECAP demo on 31-JAN-2008
#define SMP_COMPUTE_LVA_SSA 1
// Compute fine-grained stack boundaries?
#define SMP_COMPUTE_STACK_GRANULARITY 1
// Insert a floating no-op instruction at top of each function to hold SSA DEFs
// of LiveIn names?
#define SMP_USE_SSA_FNOP_MARKER 1
// Use conditional type propagation on phi functions
#define SMP_CONDITIONAL_TYPE_PROPAGATION 0
// Kludges to fix IDA Pro 5.2 errors in cc1.ncexe
#define SMP_IDAPRO52_WORKAROUND 0
// Basic block number 0 is the top of the CFG lattice.
#define SMP_TOP_BLOCK 0
// Set SharedTailChunks to TRUE for entire printf family
// After we restructure the parent/tail structure of the database, this
// will go away.
#define KLUDGE_VFPRINTF_FAMILY 1
// Used for binary search by function number in SMPStaticAnalyzer.cpp
// to trigger debugging output and find which instruction in which
// function is causing a crash.
bool SMPBinaryDebug = false;
using namespace std;
// helper function to determine if an object is in a vector
template <class T>
clc5q
committed
bool vector_exists(const T &item, const vector<T> &vec) {
for (size_t i = 0; i < vec.size(); ++i) {
if (vec[i] == item)
return true;
}
return false;
}
// *****************************************************************
// Class SMPFunction
// *****************************************************************
// Constructor
SMPFunction::SMPFunction(func_t *Info, SMPProgram* pgm) {
this->Program=pgm;
this->FuncInfo = *Info;
this->IndirectCalls = false;
this->UnresolvedIndirectCalls = false;
this->IndirectJumps = false;
this->UnresolvedIndirectJumps = false;
this->SharedChunks = false;
this->UseFP = false;
#if 1
this->SafeFunc = false;
#else
this->SafeFunc = true;
this->SpecSafeFunc = true;
this->SafeCallee = true;
this->SpecSafeCallee = true;
#endif
this->WritesAboveRA = false;
this->HasIndirectWrites = false;
this->OutgoingArgsComputed = false;
this->BlockCount = 0;
this->ReturnAddrStatus = FUNC_UNKNOWN;
this->SetIsSpeculative(false);
this->Blocks.clear();
this->DirectCallTargets.clear();
this->IndirectCallTargets.clear();
this->AllCallTargets.clear();
clc5q
committed
this->AllCallSources.clear();
this->InstBlockMap.clear();
this->RPOBlocks.clear();
this->IDom.clear();
this->DomTree.clear();
this->BlocksDefinedIn.clear();
this->SSACounter.clear();
this->SSAStack.clear();
this->LocalVarTable.clear();
this->StackFrameMap.clear();
this->ReturnRegTypes.clear();
this->SavedRegLoc.clear();
for (int RegIndex = R_ax; RegIndex <= R_di; ++RegIndex) {
this->SavedRegLoc.push_back(0); // zero offset means reg not saved
this->ReturnRegTypes.push_back(UNINIT);
}
} // end of SMPFunction() constructor
// Reset the Processed flags in all blocks to false.
void SMPFunction::ResetProcessedBlocks(void) {
list<SMPBasicBlock>::iterator CurrBlock;
for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) {
CurrBlock->SetProcessed(false);
}
return;
} // end of SMPFunction::ResetProcessedBlocks()
clc5q
committed
// Add a caller to the list of all callers of this function.
void SMPFunction::AddCallSource(ea_t addr) {
// Convert call instruction address to beginning address of the caller.
func_t *FuncInfo = get_func(addr);
if (NULL == FuncInfo) {
msg("SERIOUS WARNING: Call location %x not in a function.\n", addr);
return;
}
ea_t FirstAddr = FuncInfo->startEA;
assert(BADADDR != FirstAddr);
this->AllCallSources.insert(FirstAddr);
return;
} // end of SMPFunction::AddCallSource()
// Figure out the different regions of the stack frame, and find the
// instructions that allocate and deallocate the local variables space
// on the stack frame.
// The stack frame info will be used to emit stack
// annotations when Analyze() reaches the stack allocation
// instruction that sets aside space for local vars.
// Set the address of the instruction at which these
// annotations should be emitted. This should normally
// be an instruction such as: sub esp,48
// However, for a function with no local variables at all,
// we will need to determine which instruction should be
// considered to be the final instruction of the function
// prologue and return its address.
// Likewise, we find the stack deallocating instruction in
// the function epilogue.
void SMPFunction::SetStackFrameInfo(void) {
bool FoundAllocInstr = false;
bool FoundDeallocInstr = false;
DebugFlag |= (0 == strcmp(".init_proc", this->GetFuncName()));
#endif
// The sizes of the three regions of the stack frame other than the
// return address are stored in the function structure.
this->LocalVarsSize = this->FuncInfo.frsize;
this->CalleeSavedRegsSize = this->FuncInfo.frregs;
this->IncomingArgsSize = this->FuncInfo.argsize;
// The return address size can be obtained in a machine independent
// way by calling get_frame_retsize().
this->RetAddrSize = get_frame_retsize(&(this->FuncInfo));
// IDA Pro has trouble with functions that do not have any local
// variables. Unfortunately, the C library has plenty of these
// functions. IDA usually claims that frregs is zero and frsize
// is N, when the values should have been reversed. We can attempt
// to detect this and fix it.
bool FrameInfoFixed = this->MDFixFrameInfo();
#if SMP_DEBUG_CONTROLFLOW
msg("Returned from MDFixFrameInfo()\n");
#endif
#if SMP_DEBUG_FRAMEFIXUP
if (FrameInfoFixed) {
msg("Fixed stack frame size info: %s\n", this->FuncName);
SMPBasicBlock CurrBlock = this->Blocks.front();
msg("First basic block:\n");
for (list<list<SMPInstr>::iterator>::iterator CurrInstr = CurrBlock.GetFirstInstr();
CurrInstr != CurrBlock.GetLastInstr();
++CurrInstr) {
msg("%s\n", (*CurrInstr)->GetDisasm());
}
}
#endif
// Now, if LocalVarsSize is not zero, we need to find the instruction
// in the function prologue that allocates space on the stack for
// local vars. This code could be made more robust in the future
// by matching LocalVarsSize to the immediate value in the allocation
// instruction. However, IDA Pro is sometimes a little off on this
// number. **!!**
if (0 < this->LocalVarsSize) {
if (DebugFlag) msg("Searching for alloc and dealloc\n");
for (list<SMPInstr>::iterator CurrInstr = this->Instrs.begin();
CurrInstr != this->Instrs.end();
++CurrInstr) {
#if SMP_USE_SSA_FNOP_MARKER
if (this->Instrs.begin() == CurrInstr)
continue; // skip marker instruction
#endif
ea_t addr = CurrInstr->GetAddr();
// Keep the most recent instruction in the DeallocInstr
// in case we reach the return without seeing a dealloc.
if (!FoundDeallocInstr) {
this->LocalVarsDeallocInstr = addr;
}
if (!FoundAllocInstr
&& CurrInstr->MDIsFrameAllocInstr()) {
#if SMP_DEBUG_CONTROLFLOW
msg("Returned from MDIsFrameAllocInstr()\n");
#endif
this->LocalVarsAllocInstr = addr;
FoundAllocInstr = true;
if (DebugFlag) msg("Found alloc: %s\n", CurrInstr->GetDisasm());
// As soon as we have found the local vars allocation,
// we can try to fix incorrect sets of UseFP by IDA.
// NOTE: We might want to extend this in the future to
// handle functions that have no locals. **!!**
bool FixedUseFP = MDFixUseFP();
#if SMP_DEBUG_CONTROLFLOW
msg("Returned from MDFixUseFP()\n");
#endif
#if SMP_DEBUG_FRAMEFIXUP
if (FixedUseFP) {
msg("Fixed UseFP in %s\n", this->FuncName);
}
#endif
}
else if (FoundAllocInstr) {
// We can now start searching for the DeallocInstr.
if (CurrInstr->MDIsFrameDeallocInstr(UseFP, this->LocalVarsSize)) {
// Keep saving the most recent addr that looks
// like the DeallocInstr until we reach the
// end of the function. Last one to look like
// it is used as the DeallocInstr.
#if SMP_DEBUG_CONTROLFLOW
msg("Returned from MDIsFrameDeallocInstr()\n");
#endif
this->LocalVarsDeallocInstr = addr;
FoundDeallocInstr = true;
}
else {
if (DebugFlag) msg("Not dealloc: %s\n", CurrInstr->GetDisasm());
}
}
} // end for (list<SMPInstr>::iterator CurrInstr ... )
if (!FoundAllocInstr) {
// Could not find the frame allocating instruction. Bad.
// See if we can find the point at which the stack allocation reaches
// a total of FuncInfo.frsize+frregs, regardless of whether it happened by push
// instructions or some other means.
this->LocalVarsAllocInstr = this->FindAllocPoint(this->FuncInfo.frsize + this->FuncInfo.frregs);
#if SMP_DEBUG_CONTROLFLOW
msg("Returned from FindAllocPoint()\n");
#endif
#if SMP_DEBUG_FRAMEFIXUP
if (BADADDR == this->LocalVarsAllocInstr) {
msg("ERROR: Could not find stack frame allocation in %s\n",
FuncName);
msg("LocalVarsSize: %d SavedRegsSize: %d ArgsSize: %d\n",
LocalVarsSize, CalleeSavedRegsSize, IncomingArgsSize);
}
else {
msg("FindAllocPoint found %x for function %s\n",
this->LocalVarsAllocInstr, this->GetFuncName());
}
#endif
}
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
if (!FoundDeallocInstr) {
// Could not find the frame deallocating instruction. Bad.
// Emit diagnostic and use the last instruction in the
// function.
msg("ERROR: Could not find stack frame deallocation in %s\n",
FuncName);
}
#endif
}
// else LocalVarsSize was zero, meaning that we need to search
// for the end of the function prologue code and emit stack frame
// annotations from that address (i.e. this method returns that
// address). We will approximate this by finding the end of the
// sequence of PUSH instructions at the beginning of the function.
// The last PUSH instruction should be the last callee-save-reg
// instruction. We can make this more robust in the future by
// making sure that we do not count a PUSH of anything other than
// a register. **!!**
// NOTE: 2nd prologue instr is usually mov ebp,esp
// THE ASSUMPTION THAT WE HAVE ONLY PUSH INSTRUCTIONS BEFORE
// THE ALLOCATING INSTR IS ONLY TRUE WHEN LOCALVARSSIZE == 0;
else {
ea_t SaveAddr = this->FuncInfo.startEA;
for (list<SMPInstr>::iterator CurrInstr = this->Instrs.begin();
CurrInstr != this->Instrs.end();
++CurrInstr) {
#if SMP_USE_SSA_FNOP_MARKER
if (this->Instrs.begin() == CurrInstr)
continue; // skip marker instruction
#endif
insn_t CurrCmd = CurrInstr->GetCmd();
ea_t addr = CurrInstr->GetAddr();
if (CurrCmd.itype == NN_push)
SaveAddr = addr;
else
break;
}
this->LocalVarsAllocInstr = SaveAddr;
this->LocalVarsDeallocInstr = 0;
} // end if (LocalVarsSize > 0) ... else ...
this->CallsAlloca = this->FindAlloca();
#if SMP_COMPUTE_STACK_GRANULARITY
// Now, find the boundaries between local variables.
this->BuildLocalVarTable();
#endif
// Get callee-saved regs info for remediation use.
if (FoundAllocInstr) {
this->MDFindSavedRegs();
}
return;
} // end of SMPFunction::SetStackFrameInfo()
// IDA Pro defines the sizes of regions in the stack frame in a way
// that suits its purposes but not ours. The frsize field of the func_info_t
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
// structure measures the distance between the stack pointer and the
// frame pointer (ESP and EBP in the x86). This region includes some
// of the callee-saved registers. So, the frregs field only includes
// the callee-saved registers that are above the frame pointer.
// x86 standard prologue on gcc/linux:
// push ebp ; save old frame pointer
// mov ebp,esp ; new frame pointer = current stack pointer
// push esi ; callee save reg
// push edi ; callee save reg
// sub esp,34h ; allocate 52 bytes for local variables
//
// Notice that EBP acquires its final frame pointer value AFTER the
// old EBP has been pushed. This means that, of the three callee saved
// registers, one is above where EBP points and two are below.
// IDA Pro is concerned with generating readable addressing expressions
// for items on the stack. None of the callee-saved regs will ever
// be addressed in the function; they will be dormant until they are popped
// off the stack in the function epilogue. In order to create readable
// disassembled code, IDA defines named constant offsets for locals. These
// offsets are negative values (x86 stack grows downward from EBP toward
// ESP). When ESP_relative addressing occurs, IDA converts a statement:
// mov eax,[esp+12]
// into the statement:
// mov eax,[esp+3Ch+var_30]
// Here, 3Ch == 60 decimal is the distance between ESP and EBP, and
// var_30 is defined to ahve the value -30h == -48 decimal. So, the
// "frame size" in IDA Pro is 60 bytes, and a certain local can be
// addressed in ESP-relative manner as shown, or as [ebp+var_30] for
// EBP-relative addressing. The interactive IDA user can then edit
// the name var_30 to something mnemonic, such as "virus_size", and IDA
// will replace all occurrences with the new name, so that code references
// automatically become [ebp+virus_size]. As the user proceeds
// interactively, he eventually produces very understandable code.
// This all makes sense for producing readable assembly text. However,
// our analyses have a compiler perspective as well as a memory access
// defense perspective. SMP distinguishes between callee saved regs,
// which should not be overwritten in the function body, and local
// variables, which can be written. We view the stack frame in logical
// pieces: here are the saved regs, here are the locals, here is the
// return address, etc. We don't care which direction from EBP the
// callee-saved registers lie; we don't want to lump them in with the
// local variables. We also don't like the fact that IDA Pro will take
// the function prologue code shown above and declare frregs=4 and
// frsize=60, because frsize no longer matches the stack allocation
// statement sub esp,34h == sub esp,52. We prefer frsize=52 and frregs=12.
// So, the task of this function is to fix these stack sizes in our
// private data members for the function, while leaving the IDA database
// alone because IDA needs to maintain its own definitions of these
// variables.
// Fixing means we will update the data members LocalVarsSize and
// CalleeSavedRegsSize.
// NOTE: This function is both machine dependent and platform dependent.
// The prologue and epilogue code generated by gcc-linux is as discussed
// above, while on Visual Studio and other Windows x86 compilers, the
// saving of registers other than EBP happens AFTER local stack allocation.
// A Windows version of the function would expect to see the pushing
// of ESI and EDI AFTER the sub esp,34h statement.
bool SMPFunction::MDFixFrameInfo(void) {
int SavedRegsSize = 0;
int OtherPushesSize = 0; // besides callee-saved regs
int NewLocalsSize = 0;
int OldFrameTotal = this->CalleeSavedRegsSize + this->LocalVarsSize;
bool Changed = false;
bool DebugFlag = (0 == strcmp("__libc_csu_init", this->GetFuncName()));
// Iterate through the first basic block in the function. If we find
// a frame allocating Instr in it, then we have local vars. If not,
// we don't, and LocalVarsSize should have been zero. Count the callee
// register saves leading up to the local allocation. Set data members
// according to what we found if the values of the data members would
// change.
SMPBasicBlock CurrBlock = this->Blocks.front();
for (list<list<SMPInstr>::iterator>::iterator CurrIter = CurrBlock.GetFirstInstr();
CurrIter != CurrBlock.GetLastInstr();
++CurrIter) {
#if SMP_USE_SSA_FNOP_MARKER
if (CurrBlock.GetFirstInstr() == CurrIter)
continue; // skip marker instruction
#endif
list<SMPInstr>::iterator CurrInstr = *CurrIter;
if (CurrInstr->MDIsPushInstr()) {
// We will make the gcc-linux assumption that a PUSH in
// the first basic block, prior to the stack allocating
// instruction, is a callee register save. To make this
// more robust, we ensure that the register is from
// the callee saved group of registers, and that it has
// not been defined thus far in the function (else it might
// be a push of an outgoing argument to a call that happens
// in the first block when there are no locals). **!!!!**
if (CurrInstr->MDUsesCalleeSavedReg()
&& !CurrInstr->HasSourceMemoryOperand()) {
SavedRegsSize += 4; // **!!** should check the size
if (DebugFlag) msg("libc_csu_init SavedRegsSize: %d %s\n", SavedRegsSize,
CurrInstr->GetDisasm());
}
else {
// Pushes of outgoing args can be scheduled so that
// they are mixed with the pushes of callee saved regs.
OtherPushesSize += 4;
if (DebugFlag) msg("libc_csu_init OtherPushesSize: %d %s\n", OtherPushesSize,
CurrInstr->GetDisasm());
}
}
else if (CurrInstr->MDIsFrameAllocInstr()) {
if (DebugFlag) msg("libc_csu_init allocinstr: %s\n", CurrInstr->GetDisasm());
SavedRegsSize += OtherPushesSize;
// Get the size being allocated.
set<DefOrUse, LessDefUse>::iterator CurrUse;
for (CurrUse = CurrInstr->GetFirstUse(); CurrUse != CurrInstr->GetLastUse(); ++CurrUse) {
// Find the immediate operand.
if (o_imm == CurrUse->GetOp().type) {
// Get its value into LocalVarsSize.
long AllocValue = (signed long) CurrUse->GetOp().value;
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
// One compiler might have sub esp,24 and another
// might have add esp,-24. Take the absolute value.
if (0 > AllocValue)
AllocValue = -AllocValue;
if (AllocValue != (long) this->LocalVarsSize) {
Changed = true;
#if SMP_DEBUG_FRAMEFIXUP
if (AllocValue + SavedRegsSize != OldFrameTotal)
msg("Total frame size changed: %s\n", this->FuncName);
#endif
this->LocalVarsSize = (asize_t) AllocValue;
this->CalleeSavedRegsSize = (ushort) SavedRegsSize;
NewLocalsSize = this->LocalVarsSize;
}
else { // Old value was correct; no change.
NewLocalsSize = this->LocalVarsSize;
if (SavedRegsSize != this->CalleeSavedRegsSize) {
this->CalleeSavedRegsSize = (ushort) SavedRegsSize;
Changed = true;
#if SMP_DEBUG_FRAMEFIXUP
msg("Only callee regs size changed: %s\n", this->FuncName);
#endif
}
}
} // end if (o_imm == ...)
} // end for all uses
break; // After frame allocation instr, we are done
} // end if (push) .. elsif frame allocating instr
} // end for all instructions in the first basic block
// If we did not find an allocating instruction, see if it would keep
// the total size the same to set LocalVarsSize to 0 and to set
// CalleeSavedRegsSize to SavedRegsSize. If so, do it. If not, we
// might be better off to leave the numbers alone.
if (!Changed && (NewLocalsSize == 0)) {
if (DebugFlag) msg("libc_csu_init OldFrameTotal: %d %s\n", OldFrameTotal);
if (OldFrameTotal == SavedRegsSize) {
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
this->LocalVarsSize = 0;
Changed = true;
}
#if SMP_DEBUG_FRAMEFIXUP
else {
msg("Could not update frame sizes: %s\n", this->FuncName);
}
#endif
}
#if SMP_DEBUG_FRAMEFIXUP
if ((0 < OtherPushesSize) && (0 < NewLocalsSize))
msg("Extra pushes found of size %d in %s\n", OtherPushesSize,
this->FuncName);
#endif
return Changed;
} // end of SMPFunction::MDFixFrameInfo()
// Some functions have difficult to find stack allocations. For example, in some
// version of glibc, strpbrk() zeroes out register ECX and then pushes it more than
// 100 times in order to allocate zero-ed out local vars space for a character translation
// table. We will use the stack pointer analysis of IDA to find out if there is a point
// in the first basic block at which the stack pointer reaches the allocation total
// that IDA is expecting for the local vars region.
// If so, we return the address of the instruction at which ESP reaches its value, else
// we return BADADDR.
ea_t SMPFunction::FindAllocPoint(asize_t OriginalLocSize) {
bool DebugFlag = (0 == strcmp("_dl_runtime_resolve", this->GetFuncName()));
sval_t TargetSize = - ((sval_t) OriginalLocSize); // negate; stack grows down
#if SMP_DEBUG_FRAMEFIXUP
if (DebugFlag)
msg("%s OriginalLocSize: %d\n", this->GetFuncName(), OriginalLocSize);
// Limit our analysis to the first basic block in the function.
list<SMPInstr>::iterator CurrInstr;
for (CurrInstr = this->Instrs.begin(); CurrInstr != this->Instrs.end(); ++CurrInstr) {
#if SMP_USE_SSA_FNOP_MARKER
if (this->Instrs.begin() == CurrInstr)
continue; // skip marker instruction
#endif
ea_t addr = CurrInstr->GetAddr();
// get_spd() returns a cumulative delta of ESP
sval_t sp_delta = get_spd(&(this->FuncInfo), addr);
#if SMP_DEBUG_FRAMEFIXUP
if (DebugFlag)
msg("%s delta: %d at %x\n", this->GetFuncName(), sp_delta, addr);
if (sp_delta == TargetSize) { // <= instead of == here? **!!**
// Previous instruction hit the frame size.
if (CurrInstr == this->Instrs.begin()) {
return BADADDR; // cannot back up from first instruction
}
else {
ea_t PrevAddr = (--CurrInstr)->GetAddr();
#if SMP_USE_SSA_FNOP_MARKER
if (this->Instrs.begin()->GetAddr() == PrevAddr)
return BADADDR; // don't return marker instruction
else
return PrevAddr;
#else
return PrevAddr;
#endif
if (CurrInstr->IsLastInBlock()) {
// It could be that the current instruction will cause the stack pointer
// delta to reach the TargetSize. sp_delta is not updated until after the
// current instruction, so we need to look ahead one instruction if the
// current block falls through. On the other hand, if the current block
// ends with a jump or return, we cannot hit TargetSize.
if (CurrInstr->IsBasicBlockTerminator())
return BADADDR;
list<SMPInstr>::iterator NextInstr = CurrInstr;
++NextInstr;
if (NextInstr == this->Instrs.end())
return BADADDR;
sp_delta = get_spd(&(this->FuncInfo), NextInstr->GetAddr());
if (sp_delta == TargetSize) {
// CurrInstr will cause stack pointer delta to hit TargetSize.
return addr;
}
else {
return BADADDR;
}
} // end if LastInBlock
} // end for all instructions
#if SMP_DEBUG_FRAMEFIXUP
else {
msg("AnalyzedSP is false for %s\n", this->GetFuncName());
}
#endif
return BADADDR;
} // end of SMPFunction::FindAllocPoint()
// IDA Pro is sometimes confused by a function that uses the frame pointer
// register for other purposes. For the x86, a function that uses EBP
// as a frame pointer would begin with: push ebp; mov ebp,esp to save
// the old value of EBP and give it a new value as a frame pointer. The
// allocation of local variable space would have to come AFTER the move
// instruction. A function that begins: push ebp; push esi; sub esp,24
// is obviously not using EBP as a frame pointer. IDA is apparently
// confused by the push ebp instruction being the first instruction
// in the function. We will reset UseFP to false in this case.
// The inverse problem happens with a function that begins with instructions
// other than push ebp; mov ebp,esp; ... etc. but eventually has those
// instructions in the first basic block. For example, a C compiler generates
// for the first block of main():
// lea ecx,[esp+arg0]
// and esp, 0xfffffff0
// push dword ptr [ecx-4]
// push ebp
// mov ebp,esp
// push ecx
// sub esp,<framesize>
//
// This function is obviously using EBP as a frame pointer, but IDA Pro marks
// the function as not using a frame pointer. We will reset UseFP to true in
// this case.
// NOTE: This logic should work for both Linux and Windows x86 prologues.
bool SMPFunction::MDFixUseFP(void) {
list<SMPInstr>::iterator CurrInstr = this->Instrs.begin();
ea_t addr = CurrInstr->GetAddr();
#if SMP_USE_SSA_FNOP_MARKER
if (this->Instrs.begin() == CurrInstr)
++CurrInstr; // skip marker instruction
#endif
if (!(this->UseFP)) {
// See if we can detect the instruction "push ebp" followed by the instruction
// "mov ebp,esp" in the first basic block. The instructions do not have to be
// consecutive. If we find them, we will reset UseFP to true.
bool FirstBlockProcessed = false;
bool EBPSaved = false;
bool ESPintoEBP = false;
do {
FirstBlockProcessed = CurrInstr->IsLastInBlock();
if (!EBPSaved) { // still looking for "push ebp"
if (CurrInstr->MDIsPushInstr() && CurrInstr->GetCmd().Operands[0].is_reg(R_bp)) {
EBPSaved = true;
}
}
else if (!ESPintoEBP) { // found "push ebp", looking for "mov ebp,esp"
insn_t CurrCmd = CurrInstr->GetCmd();
if ((CurrCmd.itype == NN_mov)
&& (CurrInstr->GetFirstDef()->GetOp().is_reg(R_bp))
&& (CurrInstr->GetFirstUse()->GetOp().is_reg(R_sp))) {
ESPintoEBP = true;
FirstBlockProcessed = true; // exit loop
}
}
++CurrInstr;
addr = CurrInstr->GetAddr();
// We must get EBP set to its frame pointer value before we reach the
// local frame allocation instruction (i.e. the subtraction of locals space
// from the stack pointer).
FirstBlockProcessed |= (addr >= this->LocalVarsAllocInstr);
} while (!FirstBlockProcessed);
// If we found ESPintoEBP, we also found EBPSaved first, and we need to change
// this->UseFP to true and return true. Otherwise, return false.
this->UseFP = ESPintoEBP;
return ESPintoEBP;
} // end if (!(this->UseFP))
// At this point, this->UseFP must have been true on entry to this method and we will
// check whether it should be reset to false.
while (addr < this->LocalVarsAllocInstr) {
set<DefOrUse, LessDefUse>::iterator CurrDef = CurrInstr->GetFirstDef();
while (CurrDef != CurrInstr->GetLastDef()) {
if (CurrDef->GetOp().is_reg(R_bp))
return false; // EBP got set before locals were allocated
}
++CurrInstr;
addr = CurrInstr->GetAddr();
}
// If we found no defs of the frame pointer before the local vars
// allocation, then the frame pointer register is not being used
// as a frame pointer, just as a general callee-saved register.
this->UseFP = false;
msg("MDFixUseFP reset UseFP to false for %s\n", this->GetFuncName());
return true;
} // end of SMPFunction::MDFixUseFP()
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
// Find the callee-saved reg offsets (negative offset from return address)
// for all registers pushed onto the stack before the stack frame allocation
// instruction.
void SMPFunction::MDFindSavedRegs(void) {
list<SMPInstr>::iterator CurrInst;
int RegIndex;
func_t *CurrFunc = get_func(this->GetStartAddr());
assert(NULL != CurrFunc);
for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) {
if (CurrInst->GetAddr() > this->LocalVarsAllocInstr)
break;
if (!(CurrInst->MDIsPushInstr()))
continue;
sval_t CurrOffset = get_spd(CurrFunc, CurrInst->GetAddr());
if (CurrInst->GetCmd().itype == NN_push) {
op_t PushedReg = CurrInst->GetPushedOpnd();
if (o_reg == PushedReg.type) {
RegIndex = (int) PushedReg.reg;
if (RegIndex > R_di) {
msg("WARNING: Skipping save of register %d\n", RegIndex);
continue;
}
if (this->SavedRegLoc.at((size_t) RegIndex) == 0) {
this->SavedRegLoc[(size_t) RegIndex] = CurrOffset - 4;
}
else {
msg("WARNING: Multiple saves of register %d\n", RegIndex);
}
} // end if register push operand
} // end if PUSH instruction
else if (NN_pusha == CurrInst->GetCmd().itype) {
// **!!** Handle pushes of all regs.
this->SavedRegLoc[(size_t) R_ax] = CurrOffset - 4;
this->SavedRegLoc[(size_t) R_cx] = CurrOffset - 8;
this->SavedRegLoc[(size_t) R_dx] = CurrOffset - 12;
this->SavedRegLoc[(size_t) R_bx] = CurrOffset - 16;
this->SavedRegLoc[(size_t) R_sp] = CurrOffset - 20;
this->SavedRegLoc[(size_t) R_bp] = CurrOffset - 24;
this->SavedRegLoc[(size_t) R_si] = CurrOffset - 28;
this->SavedRegLoc[(size_t) R_di] = CurrOffset - 32;
break; // all regs accounted for
}
else if (CurrInst->MDIsEnterInstr()) {
this->SavedRegLoc[(size_t) R_bp] = CurrOffset - 4;
}
} // end for all instructions
return;
} // end of SMPFunction::MDFindSavedRegs()
// Compute the ReturnRegTypes[] as the meet over all register types
// at all return instructions.
void SMPFunction::MDFindReturnTypes(void) {
list<SMPBasicBlock>::iterator CurrBlock;
list<list<SMPInstr>::iterator>::iterator InstIter;
vector<SMPOperandType> RegTypes;
for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) {
if (CurrBlock->HasReturn()) {
// Get the types of all registers at the RETURN point.
// Calculate the meet function over them.
InstIter = CurrBlock->GetLastInstr();
--InstIter;
assert(RETURN == (*InstIter)->GetDataFlowType());
set<DefOrUse, LessDefUse>::iterator CurrUse;
for (CurrUse = (*InstIter)->GetFirstUse();
CurrUse != (*InstIter)->GetLastUse();
++CurrUse) {
op_t UseOp = CurrUse->GetOp();
if ((o_reg != UseOp.type) || (R_di < UseOp.reg))
continue;
this->ReturnRegTypes[UseOp.reg]
= SMPTypeMeet(this->ReturnRegTypes.at(UseOp.reg),
CurrUse->GetType());
} // for all USEs in the RETURN instruction
} // end if current block has a RETURN
} // end for all blocks
return;
} // end of SMPFunction::MDFindReturnTypes()
// Determine local variable boundaries in the stack frame.
void SMPFunction::BuildLocalVarTable(void) {
// Currently we just use the info that IDA Pro has inferred from the direct
// addressing of stack locations.
this->SemiNaiveLocalVarID();
return;
} // end of SMPFunction::BuildLocalVarTable()
// Use the local variable offset list from IDA's stack frame structure to compute
// the table of local variable boundaries.
void SMPFunction::SemiNaiveLocalVarID(void) {
// NOTE: We use IDA Pro's offsets from this->FuncInfo (e.g. frsize) and NOT
// our own corrected values in our private data members. The offsets we
// read from the stack frame structure returned by get_frame() are consistent
// with other IDA Pro values, not with our corrected values.
bool DebugFlag = false;
#if SMP_DEBUG_STACK_GRANULARITY
DebugFlag |= (0 == strcmp("qSort3", this->GetFuncName()));
#endif
func_t *FuncPtr = get_func(this->FuncInfo.startEA);
if (NULL == FuncPtr) {
msg("ERROR in SMPFunction::SemiNaiveLocalVarID; no func ptr\n");
}
assert(NULL != FuncPtr);
struc_t *StackFrame = get_frame(FuncPtr);
if (NULL == StackFrame) {
msg("WARNING: No stack frame info from get_frame for %s\n", this->GetFuncName());
return;
}
member_t *Member = StackFrame->members;
for (size_t i = 0; i < StackFrame->memqty; ++i, ++Member) {
long offset;
if (NULL == Member) {
msg("NULL stack frame member pointer in %s\n", this->GetFuncName());
break;
}
get_member_name(Member->id, MemberName, MAXSMPVARSTR - 1);
if (MemberName == NULL) {
#if SMP_DEBUG_STACK_GRANULARITY
msg("NULL stack frame member in %s\n", this->GetFuncName());
continue;
}
offset = Member->soff;
if (MemberName[0] == ' ') {
#if SMP_DEBUG_STACK_GRANULARITY
msg("NULL stack frame name at offset %d in %s\n", offset, this->GetFuncName());
MemberName[1] = '\0';
}
if (DebugFlag) {
msg("%s local var %s at offset %d\n", this->GetFuncName(), MemberName, offset);
}
if (offset >= (long) this->LocalVarsSize)
break; // Stop after processing locals and outgoing args
#if 0
// We want the offset from the stack pointer after local frame allocation.
// This subtraction would make it relative to the original stack pointer.
offset -= this->FuncInfo.frsize;
#endif
struct LocalVar TempLocal;
TempLocal.offset = offset;
qstrncpy(TempLocal.VarName, MemberName, sizeof(TempLocal.VarName) - 1);
this->LocalVarTable.push_back(TempLocal);
} // end for all stack frame members
if (this->LocalVarTable.empty())
return;
#if SMP_DEBUG_STACK_GRANULARITY
msg("Computing %d local var sizes\n", this->LocalVarTable.size());
// Now we want to fill in the size field for each local
for (size_t VarIndex = 0; VarIndex < (this->LocalVarTable.size() - 1); ++VarIndex) {
this->LocalVarTable[VarIndex].size = this->LocalVarTable[VarIndex + 1].offset
- this->LocalVarTable[VarIndex].offset;
}
#if SMP_DEBUG_STACK_GRANULARITY
msg("Computing last local var size for frsize %d\n", this->FuncInfo.frsize);
#endif
// Size of last local is total frsize minus savedregs in frame minus offset of last local
if (this->LocalVarTable.size() > 0) {
size_t SavedRegsSpace = 0; // portion of frsize that is saved regs, not locals.
if (this->CalleeSavedRegsSize > this->FuncInfo.frregs) {
// IDA Pro counts the save of EBP in frregs, but then EBP gets its new
// value and callee saved regs other than the old EBP push get counted
// in frsize rather than frregs. CalleeSavedRegsSize includes all saved
// regs on the stack, both above and below the current EBP offset.
// NOTE: For windows, this has to be done differently, as callee saved regs
// happen at the bottom of the local frame, not the top.
#if 0
SavedRegsSpace = this->CalleeSavedRegsSize - this->FuncInfo.frregs;
#else
SavedRegsSpace = this->FuncInfo.frsize - this->LocalVarsSize;
#endif
this->LocalVarTable[this->LocalVarTable.size() - 1].size = this->FuncInfo.frsize
- SavedRegsSpace - this->LocalVarTable[this->LocalVarTable.size() - 1].offset;
}
this->LocalVarOffsetLimit = this->LocalVarTable.back().offset
+ (adiff_t) this->LocalVarTable.back().size;
assert(this->LocalVarOffsetLimit <= (adiff_t) this->FuncInfo.frsize);
// Find out how many of the locals are really outgoing args.
if (this->AnalyzedSP && !this->CallsAlloca && (BADADDR != this->LocalVarsAllocInstr)) {
this->FindOutgoingArgsSize();
}
else {
msg("FindOutgoingArgsSize not called for %s ", this->GetFuncName());
msg("AnalyzedSP: %d CallsAlloca: %d LocalVarsAllocInstr: %x \n",
this->AnalyzedSP, this->CallsAlloca, this->LocalVarsAllocInstr);
}
return;
} // end of SMPFunction::SemiNaiveLocalVarID()
// Determine how many bytes at the bottom of the stack frame (i.e. at bottom of
// this->LocalVarsSize) are used for outgoing args. This is the case when the cdecl
// calling convention is used, e.g. gcc/linux allocates local var space + out args space
// in a single allocation and then writes outarg values directly to ESP+0, ESP+4, etc.
void SMPFunction::FindOutgoingArgsSize(void) {
// Compute the lowest value reached by the stack pointer.
list<SMPInstr>::iterator CurrInst;
this->MinStackDelta = 20000; // Final value should be negative
bool DebugFlag = false;
#if SMP_DEBUG_STACK_GRANULARITY
DebugFlag = (0 == strcmp("error_for_asm", this->GetFuncName()));
#endif
this->OutgoingArgsComputed = true;
if (DebugFlag) {
msg("DEBUG: Entered FindOutgoingArgsSize for %s\n", this->GetFuncName());
#if SMP_IDAPRO52_WORKAROUND
this->OutgoingArgsSize = 16;
return;
for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) {
#if SMP_USE_SSA_FNOP_MARKER
if (this->Instrs.begin() == CurrInst)
continue; // skip marker instruction
#endif
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
ea_t addr = CurrInst->GetAddr();
sval_t sp_delta = get_spd(&(this->FuncInfo), addr);
if (sp_delta < this->MinStackDelta)
this->MinStackDelta = sp_delta;
if (addr == this->LocalVarsAllocInstr) {
// Total stack pointer delta is sp_delta for the next instruction,
// because IDA updates the sp delta AFTER each instruction.
list<SMPInstr>::iterator NextInst = CurrInst;
++NextInst;
sp_delta = get_spd(&(this->FuncInfo), NextInst->GetAddr());
this->AllocPointDelta = sp_delta;
}
}
#if SMP_DEBUG_STACK_GRANULARITY
msg("AllocPointDelta: %d MinStackDelta: %d\n", this->AllocPointDelta, this->MinStackDelta);
#endif
assert(0 > this->MinStackDelta);
// Allocate a vector of stack frame entries, one for each byte of the stack frame.
// This will be our memory map for analyzing stack usage.
int limit = 0;
#if 1
if (this->LocalVarOffsetLimit > 0)
limit = this->LocalVarOffsetLimit;
#endif
for (int i = this->MinStackDelta; i < limit; ++i) {
struct StackFrameEntry TempEntry;
TempEntry.VarPtr = NULL;
TempEntry.offset = (long) i;
TempEntry.Read = false;
TempEntry.Written = false;
TempEntry.AddressTaken = false;
TempEntry.ESPRelativeAccess = false;
TempEntry.EBPRelativeAccess = false;
this->StackFrameMap.push_back(TempEntry);
}
// Fill in the VarPtr fields for each StackFrameMap entry.
if (0 <= this->AllocPointDelta) {
msg("FATAL ERROR: AllocPointDelta = %d in %s\n", this->AllocPointDelta, this->GetFuncName());
}
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
assert(0 > this->AllocPointDelta);
for (size_t i = 0; i < this->LocalVarTable.size(); ++i) {
assert(this->LocalVarTable.at(i).offset >= 0);
// Picture that AllocPointDelta is -200, MinStackDelta is -210, and
// the LocalVarTable[i].offset is +8 (i.e. 8 bytes above alloc point).
// Then base = 8 + (-200 - -210) = 8 + 10 = 18, the proper offset into
// the StackFrameMap.
size_t base = (size_t) (this->LocalVarTable.at(i).offset
+ (this->AllocPointDelta - this->MinStackDelta));
size_t limit = base + this->LocalVarTable.at(i).size;
if (limit > this->StackFrameMap.size()) {
msg("ERROR: base = %d limit = %d StackFrameMap size = %d\n", base, limit,
this->StackFrameMap.size());
}
assert(limit <= this->StackFrameMap.size());
for (size_t MapIndex = base; MapIndex < limit; ++MapIndex) {
this->StackFrameMap[MapIndex].VarPtr = &(this->LocalVarTable.at(i));
}
}
// Iterate through all instructions and record stack frame accesses in the StackFrameMap.
for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) {
#if SMP_USE_SSA_FNOP_MARKER
if (this->Instrs.begin() == CurrInst)
continue; // skip marker instruction
#endif
sval_t sp_delta = get_spd(&(this->FuncInfo), CurrInst->GetAddr());
if (0 < sp_delta) {
// Stack underflow; about to assert
msg("Stack underflow at %x %s sp_delta: %d\n", CurrInst->GetAddr(),
CurrInst->GetDisasm(), sp_delta);
}
assert(0 >= sp_delta);
ea_t offset;
size_t DataSize;
bool UsedFramePointer;
if (CurrInst->HasDestMemoryOperand()) {
set<DefOrUse, LessDefUse>::iterator CurrDef;
for (CurrDef = CurrInst->GetFirstDef(); CurrDef != CurrInst->GetLastDef(); ++CurrDef) {
op_t TempOp = CurrDef->GetOp();
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
if (TempOp.type != o_phrase && TempOp.type != o_displ)
continue;
if (this->MDGetStackOffsetAndSize(TempOp, sp_delta, offset, DataSize, UsedFramePointer)) {
assert(0 <= offset);
if (offset >= this->FuncInfo.frsize)
continue; // limit processing to outgoing args and locals
if ((offset + DataSize) > this->StackFrameMap.size()) {
msg("ERROR: offset = %d DataSize = %d FrameMapSize = %d\n",
offset, DataSize, this->StackFrameMap.size());
}
assert((offset + DataSize) <= this->StackFrameMap.size());
for (int j = 0; j < (int) DataSize; ++j) {
this->StackFrameMap[offset + j].Written = true;
if (!UsedFramePointer)
this->StackFrameMap[offset + j].ESPRelativeAccess = true;
else
this->StackFrameMap[offset + j].EBPRelativeAccess = true;
}
}
} // end for all DEFs
} // end if DestMemoryOperand
if (CurrInst->HasSourceMemoryOperand()) {
set<DefOrUse, LessDefUse>::iterator CurrUse;
for (CurrUse = CurrInst->GetFirstUse(); CurrUse != CurrInst->GetLastUse(); ++CurrUse) {
op_t TempOp = CurrUse->GetOp();
if (TempOp.type != o_phrase && TempOp.type != o_displ)
continue;
if (this->MDGetStackOffsetAndSize(TempOp, sp_delta, offset, DataSize, UsedFramePointer)) {
assert(0 <= offset);
if (offset >= this->FuncInfo.frsize)
continue; // limit processing to outgoing args and locals
if ((offset + DataSize) > this->StackFrameMap.size()) {
msg("ERROR: offset = %d DataSize = %d FrameMapSize = %d\n",
offset, DataSize, this->StackFrameMap.size());
}
assert((offset + DataSize) <= this->StackFrameMap.size());
for (int j = 0; j < (int) DataSize; ++j) {
this->StackFrameMap[offset + j].Read = true;
if (!UsedFramePointer)
this->StackFrameMap[offset + j].ESPRelativeAccess = true;
else
this->StackFrameMap[offset + j].EBPRelativeAccess = true;
}
}
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
} // end if SourceMemoryOperand
// NOTE: Detect taking the address of stack locations. **!!**
} // end for all instructions
// If function is a leaf function, set OutgoingArgsSize to zero and return.
if (this->IsLeaf()) {
this->OutgoingArgsSize = 0;
return;
}
// For non-leaf functions, set the OutgoingArgsSize to the write-only, ESP-relative
// region of the bottom of the StackFrameMap.
for (size_t MapIndex = 0; MapIndex < this->StackFrameMap.size(); ++MapIndex) {
// Some of the bottom of the stack frame might be below the local frame allocation.
// These are pushes that happened after allocation, etc. We skip over these
// locations and define the outgoing args region to start strictly at the bottom
// of the local frame allocation.
struct StackFrameEntry TempEntry = this->StackFrameMap.at(MapIndex);
if (DebugFlag) {
msg("StackFrameMap entry %d: offset: %d Read: %d Written: %d ESP: %d EBP: %d\n",
MapIndex, TempEntry.offset, TempEntry.Read, TempEntry.Written,
TempEntry.ESPRelativeAccess, TempEntry.EBPRelativeAccess);
}
if (TempEntry.offset < this->AllocPointDelta)
continue;
if (TempEntry.Read || TempEntry.EBPRelativeAccess || !TempEntry.Written
|| !TempEntry.ESPRelativeAccess)
break;
this->OutgoingArgsSize++;
}
// Sometimes we encounter unused stack space above the outgoing args. Lump this space
// in with the outgoing args. We detect this by noting when the outgoing args space
// has only partially used the space assigned to a local var.
if ((0 < this->OutgoingArgsSize) && (this->OutgoingArgsSize < this->FuncInfo.frsize)) {
long MapIndex = (this->AllocPointDelta - this->MinStackDelta);
assert(0 <= MapIndex);
MapIndex += (((long) this->OutgoingArgsSize) - 1);
struct StackFrameEntry TempEntry = this->StackFrameMap.at((size_t) MapIndex);
if (this->OutgoingArgsSize < (TempEntry.VarPtr->offset + TempEntry.VarPtr->size)) {
clc5q
committed
#if SMP_DEBUG_FRAMEFIXUP
msg("OutGoingArgsSize = %d", this->OutgoingArgsSize);
clc5q
committed
#endif
this->OutgoingArgsSize = TempEntry.VarPtr->offset + TempEntry.VarPtr->size;
clc5q
committed
#if SMP_DEBUG_FRAMEFIXUP
msg(" adjusted to %d\n", this->OutgoingArgsSize);
clc5q
committed
#endif
}
}
return;
} // end of SMPFunction::FindOutgoingArgsSize()
// If TempOp reads or writes to a stack location, return the offset (relative to the initial
// stack pointer value) and the size in bytes of the data access.
// NOTE: TempOp must be of type o_displ or o_phrase, as no other operand type could be a
// stack memory access.
// sp_delta is the stack pointer delta of the current instruction, relative to the initial
// stack pointer value for the function.
// Return true if a stack memory access was found in TempOp, false otherwise.
bool SMPFunction::MDGetStackOffsetAndSize(op_t TempOp, sval_t sp_delta, ea_t &offset, size_t &DataSize, bool &FP) {
clc5q
committed
int BaseReg;
int IndexReg;
ushort ScaleFactor;
assert((o_displ == TempOp.type) || (o_phrase == TempOp.type));
clc5q
committed
MDExtractAddressFields(TempOp, BaseReg, IndexReg, ScaleFactor, offset);
clc5q
committed
if (TempOp.type == o_phrase) {
assert(offset == 0); // implicit zero, as in [esp] ==> [esp+0]
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
}
if ((BaseReg == R_sp) || (IndexReg == R_sp)) {
// ESP-relative constant offset
offset += sp_delta; // base offsets from entry ESP value
offset -= this->MinStackDelta; // convert to StackFrameMap index
// Get size of data written
DataSize = GetOpDataSize(TempOp);
FP = false;
return true;
}
else if (this->UseFP && ((BaseReg == R_bp) || (IndexReg == R_bp))) {
offset -= this->FuncInfo.frregs; // base offsets from entry ESP value
offset -= this->MinStackDelta; // convert to StackFrameMap index
DataSize = GetOpDataSize(TempOp);
FP = true;
return true;
}
else {
return false;
}
} // end of SMPFunction::MDGetStackOffsetAndSize()
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
// Is DestOp within the outgoing args area? Assume it must be an ESP-relative
// DEF operand in order to be a write to the outgoing args area.
bool SMPFunction::WritesToOutgoingArgs(op_t DestOp) {
bool OutArgWrite = false;
int BaseReg, IndexReg;
ushort ScaleFactor;
ea_t offset;
if (this->IsLeaf())
return false;
MDExtractAddressFields(DestOp, BaseReg, IndexReg, ScaleFactor, offset);
if ((BaseReg != R_sp) && (IndexReg != R_sp))
return false;
if (((BaseReg == R_sp) && (IndexReg != R_none))
|| ((IndexReg == R_sp) && (BaseReg != R_none))
|| (0 < ScaleFactor)) {
msg("WARNING: WritesToOutgoingArgs called with indexed write.");
PrintOperand(DestOp);
return false;
}
if (!this->OutgoingArgsComputed) {
OutArgWrite = true; // be conservative
}
else {
OutArgWrite = (offset < this->OutgoingArgsSize);
}
return OutArgWrite;
} // end of SMPFunction::WritesToOutgoingArgs()
// Is DestOp a direct memory access above the local vars frame?
bool SMPFunction::WritesAboveLocalFrame(op_t DestOp) {
bool InArgWrite = false;
int BaseReg, IndexReg;
ushort ScaleFactor;
ea_t offset;
MDExtractAddressFields(DestOp, BaseReg, IndexReg, ScaleFactor, offset);
bool ESPrelative = (BaseReg == R_sp) || (IndexReg == R_sp);
bool EBPrelative = this->UseFP && ((BaseReg == R_bp) || (IndexReg == R_bp));
if (!(ESPrelative || EBPrelative))
return false;
if (((IndexReg != R_none) && (BaseReg != R_none))
|| (0 < ScaleFactor)) {
msg("WARNING: WritesAboveLocalFrame called with indexed write.");
PrintOperand(DestOp);
return false;
}
InArgWrite = (ESPrelative && (SignedOffset > ((long) this->LocalVarsSize)))
|| (EBPrelative && (SignedOffset > 0));
return InArgWrite;
}// end of SMPFunction::WritesAboveLocalFrame()
bool SMPFunction::IndexedWritesAboveLocalFrame(op_t DestOp)
{
bool InArgWrite = false;
int BaseReg, IndexReg;
ushort ScaleFactor;
ea_t offset;
MDExtractAddressFields(DestOp, BaseReg, IndexReg, ScaleFactor, offset);
bool ESPrelative = (BaseReg == R_sp) || (IndexReg == R_sp);
bool EBPrelative = this->UseFP && ((BaseReg == R_bp) || (IndexReg == R_bp));
if (!(ESPrelative || EBPrelative))
return false;
InArgWrite = (ESPrelative && (offset > this->LocalVarsSize))
|| (EBPrelative && (offset > 0));
return InArgWrite;
} // end of SMPFunction::WritesAboveLocalFrame
// Find evidence of calls to alloca(), which appear as stack space allocations (i.e.
// subtractions from the stack pointer) AFTER the local frame allocation instruction
// for this function.
// Return true if such an allocation is found and false otherwise.
bool SMPFunction::FindAlloca(void) {
list<SMPInstr>::iterator CurrInst;
for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) {
#if SMP_USE_SSA_FNOP_MARKER
if (this->Instrs.begin() == CurrInst)
continue; // skip marker instruction
#endif
if ((CurrInst->GetAddr() > this->LocalVarsAllocInstr) && CurrInst->MDIsFrameAllocInstr()) {
return true;
}
}
return false;
} // end of SMPFunction::FindAlloca()
// Emit the annotations describing the regions of the stack frame.
void SMPFunction::EmitStackFrameAnnotations(FILE *AnnotFile, list<SMPInstr>::iterator Instr) {
ea_t addr = Instr->GetAddr();
#if 0
if (0 < IncomingArgsSize) {
qfprintf(AnnotFile, "%10x %6d INARGS STACK esp + %d %s \n",
addr, IncomingArgsSize,
(LocalVarsSize + CalleeSavedRegsSize + RetAddrSize),
Instr->GetDisasm());
}
#endif
if (0 < RetAddrSize) {
qfprintf(AnnotFile, "%10x %6d MEMORYHOLE STACK esp + %d ReturnAddress \n",
addr, RetAddrSize, (LocalVarsSize + CalleeSavedRegsSize));
}
if (0 < CalleeSavedRegsSize) {
qfprintf(AnnotFile, "%10x %6d MEMORYHOLE STACK esp + %d CalleeSavedRegs \n",
addr, CalleeSavedRegsSize, LocalVarsSize);
if (0 < LocalVarsSize) {
unsigned long ParentReferentID = DataReferentID++;
qfprintf(AnnotFile, "%10x %6d DATAREF STACK %d esp + %d PARENT LocalFrame LOCALFRAME\n",
addr, LocalVarsSize, ParentReferentID, 0);
#if SMP_COMPUTE_STACK_GRANULARITY
if (this->AnalyzedSP && !this->CallsAlloca && (BADADDR != this->LocalVarsAllocInstr)) {
// We can only fine-grain the stack frame if we were able to analyze the stack
if (this->OutgoingArgsSize > 0) {
qfprintf(AnnotFile, "%10x %6d DATAREF STACK %d esp + %d CHILDOF %d OFFSET %d OutArgsRegion OUTARGS\n",
addr, this->OutgoingArgsSize, DataReferentID, 0, ParentReferentID, 0);
++DataReferentID;
#if SMP_DEBUG_STACK_GRANULARITY
msg("LocalVarTable of size %d for function %s\n", this->LocalVarTable.size(),
this->GetFuncName());
for (size_t i = 0; i < this->LocalVarTable.size(); ++i) {
#if SMP_DEBUG_STACK_GRANULARITY
msg("Entry %d offset %d size %d name %s\n", i, this->LocalVarTable[i].offset,
this->LocalVarTable[i].size, this->LocalVarTable[i].VarName);
// Don't emit annotations for incoming or outgoing args or anything else
// above or below the current local frame.
if ((this->LocalVarTable[i].offset >= (long) this->FuncInfo.frsize)
|| (this->LocalVarTable[i].offset < (long) this->OutgoingArgsSize))
continue;
qfprintf(AnnotFile, "%10x %6d DATAREF STACK %d esp + %d CHILDOF %d OFFSET %d LOCALVAR %s \n",
addr, this->LocalVarTable[i].size, DataReferentID,
this->LocalVarTable[i].offset, ParentReferentID,
this->LocalVarTable[i].offset, this->LocalVarTable[i].VarName);
++DataReferentID;
} // end if (this->AnalyzedSP and not Alloca .... )
} // end if (0 < LocalVarsSize)
return;
} // end of SMPFunction::EmitStackFrameAnnotations()
// Main data flow analysis driver. Goes through the function and
// fills all objects for instructions, basic blocks, and the function
// itself.
void SMPFunction::Analyze(void) {
clc5q
committed
bool FoundAllCallers = false;
list<SMPInstr>::iterator FirstInBlock = this->Instrs.end();
// For starting a basic block
list<SMPInstr>::iterator LastInBlock = this->Instrs.end();
// Terminating a basic block
#if SMP_DEBUG_CONTROLFLOW
msg("Entering SMPFunction::Analyze.\n");
#endif
// Get some basic info from the FuncInfo structure.
this->Size = this->FuncInfo.endEA - this->FuncInfo.startEA;
this->UseFP = (0 != (this->FuncInfo.flags & (FUNC_FRAME | FUNC_BOTTOMBP)));
this->StaticFunc = (0 != (this->FuncInfo.flags & FUNC_STATIC));
this->LibFunc = (0 != (this->FuncInfo.flags & FUNC_LIB));
get_func_name(this->FuncInfo.startEA, this->FuncName,
sizeof(this->FuncName) - 1);
this->BlockCount = 0;
this->AnalyzedSP = this->FuncInfo.analyzed_sp();
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
#if SMP_DEBUG_CONTROLFLOW
msg("SMPFunction::Analyze: got basic info.\n");
#endif
// Cycle through all chunks that belong to the function.
func_tail_iterator_t FuncTail(&(this->FuncInfo));
size_t ChunkCounter = 0;
for (bool ChunkOK = FuncTail.main(); ChunkOK; ChunkOK = FuncTail.next()) {
const area_t &CurrChunk = FuncTail.chunk();
++ChunkCounter;
if (1 < ChunkCounter) {
this->SharedChunks = true;
#if SMP_DEBUG_CHUNKS
msg("Found tail chunk for %s at %x\n", this->FuncName, CurrChunk.startEA);
#endif
}
// Build the instruction and block lists for the function.
for (ea_t addr = CurrChunk.startEA; addr < CurrChunk.endEA;
addr = get_item_end(addr)) {
flags_t InstrFlags = getFlags(addr);
if (isHead(InstrFlags) && isCode(InstrFlags)) {
SMPInstr CurrInst = SMPInstr(addr);
// Fill in the instruction data members.
#if SMP_DEBUG_CONTROLFLOW
msg("SMPFunction::Analyze: calling CurrInst::Analyze.\n");
#endif
CurrInst.Analyze();
if (SMPBinaryDebug) {
msg("Disasm: %s \n", CurrInst.GetDisasm());
}
#if SMP_USE_SSA_FNOP_MARKER
if (this->Instrs.empty()) {
// First instruction in function. We want to create a pseudo-instruction
// at the top of the function that can hold SSA DEFs for LiveIn names
// to the function. We use a floating point no-op as the pseudo-inst.
// The code address is one less than the start address of the function.
SMPInstr MarkerInst = SMPInstr(addr - 1);
MarkerInst.AnalyzeMarker();
assert(FirstInBlock == this->Instrs.end());
this->Instrs.push_back(MarkerInst);
}
#endif
if (this->AnalyzedSP) {
// Audit the IDA SP analysis.
sval_t sp_delta = get_spd(&(this->FuncInfo), addr);
// sp_delta is difference between current value of stack pointer
// and value of the stack pointer coming into the function. It
// is updated AFTER each instruction. Thus, it should not get back
// above zero (e.g. to +4) until after a return instruction.
if (sp_delta > 0) {
// Stack pointer has underflowed, according to IDA's analysis,
// which is probably incorrect.
this->AnalyzedSP = false;
msg("Resetting AnalyzedSP to false for %s\n", this->GetFuncName());
msg("Underflowing instruction: %s sp_delta: %d\n", CurrInst.GetDisasm(),
sp_delta);
}
else if (sp_delta == 0) {
// Search for tail calls.
if (CurrInst.IsBranchToFarChunk()) {
// After the stack has been restored to the point at which
// we are ready to return, we instead find a jump to a
// far chunk. This is the classic tail call optimization:
// the return statement has been replaced with a jump to
// another function, which will return not to this function,
// but to the caller of this function.
CurrInst.SetTailCall();
msg("Found tail call at %x from %s: %s\n", addr, this->GetFuncName(),
CurrInst.GetDisasm());
// Just like a return instruction, we must make
// DEF-USE chains reach the tail call.
CurrInst.MDAddRegUse(R_ax, false);
CurrInst.MDAddRegUse(R_bx, false);
CurrInst.MDAddRegUse(R_cx, false);
CurrInst.MDAddRegUse(R_dx, false);
CurrInst.MDAddRegUse(R_bp, false);
CurrInst.MDAddRegUse(R_si, false);
CurrInst.MDAddRegUse(R_di, false);
clc5q
committed
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
// Find all functions that call the current function.
xrefblk_t CurrXrefs;
if (!FoundAllCallers) {
for (bool ok = CurrXrefs.first_to(CurrInst.GetAddr(), XREF_ALL);
ok;
ok = CurrXrefs.next_to()) {
if ((CurrXrefs.from != 0) && (CurrXrefs.iscode)) {
// Make sure it is not a fall-through. Must be a
// control-flow instruction of some sort, including
// direct or indirect calls or tail calls.
SMPInstr CallInst(CurrXrefs.from);
CallInst.Analyze();
SMPitype CallType = CallInst.GetDataFlowType();
if ((COND_BRANCH <= CallType) && (RETURN >= CallType)) {
// Found a caller, with its call address in CurrXrefs.from
this->AddCallSource(CurrXrefs.from);
}
}
}
FoundAllCallers = true; // only do this for first inst
}
if (CurrInst.GetDataFlowType() == INDIR_CALL) {
this->IndirectCalls = true;
// See if IDA has determined all possible targets
// of the indirect call.
bool LinkedToTarget = false;
for (bool ok = CurrXrefs.first_from(CurrInst.GetAddr(), XREF_ALL);
ok;
ok = CurrXrefs.next_from()) {
if ((CurrXrefs.to != 0) && (CurrXrefs.iscode)) {
// Found a code target, with its address in CurrXrefs.to
if (CurrXrefs.to == (CurrInst.GetAddr() + CurrInst.GetCmd().size)) {
// A call instruction will have two targets: the fall through to the
// next instruction, and the called function. We want to find
// the called function.
continue;
}
// We found a target, not the fall-through.
LinkedToTarget = true;
this->IndirectCallTargets.push_back(CurrXrefs.to);
this->AllCallTargets.push_back(CurrXrefs.to);
msg("Found indirect call target %x at %x\n",
CurrXrefs.to, CurrInst.GetAddr());
}
} // end for all code xfrefs
this->UnresolvedIndirectCalls = (!LinkedToTarget);
if (!LinkedToTarget) {
msg("WARNING: Did not find indirect call target at %x\n",
CurrInst.GetAddr());
}
} // end if INDIR_CALL
else if (CurrInst.GetDataFlowType() == INDIR_JUMP)
this->IndirectJumps = true;
else if (CurrInst.GetDataFlowType() == CALL) {
set<DefOrUse, LessDefUse>::iterator CurrUse;
for (CurrUse = CurrInst.GetFirstUse(); CurrUse != CurrInst.GetLastUse(); ++CurrUse) {
optype_t OpType = CurrUse->GetOp().type;
if ((OpType == o_near) || (OpType == o_far)) {
ea_t CallTarget = CurrUse->GetOp().addr;
this->DirectCallTargets.push_back(CallTarget);
this->AllCallTargets.push_back(CallTarget);
// Before we insert the instruction into the instruction
// list, determine if it is a jump target that does not
// follow a basic block terminator. This is the special case
// of a CASE in a SWITCH that falls through into another
// CASE, for example. The first sequence of statements
// was not terminated by a C "break;" statement, so it
// looks like straight line code, but there is an entry
// point at the beginning of the second CASE sequence and
// we have to split basic blocks at the entry point.
if ((FirstInBlock != this->Instrs.end())
&& CurrInst.IsJumpTarget()) {
#if SMP_DEBUG_CONTROLFLOW
msg("SMPFunction::Analyze: hit special jump target case.\n");
#endif
LastInBlock = --(this->Instrs.end());
SMPBasicBlock CurrBlock = SMPBasicBlock(this, FirstInBlock,
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
LastInBlock);
CurrBlock.Analyze();
// If not the first chunk in the function, it is a shared
// tail chunk.
if (ChunkCounter > 1) {
CurrBlock.SetShared();
}
FirstInBlock = this->Instrs.end();
LastInBlock = this->Instrs.end();
this->Blocks.push_back(CurrBlock);
this->BlockCount += 1;
}
#if SMP_DEBUG_CONTROLFLOW
msg("SMPFunction::Analyze: putting CurrInst on list.\n");
#endif
// Insert instruction at end of list.
this->Instrs.push_back(CurrInst);
// Find basic block leaders and terminators.
if (FirstInBlock == this->Instrs.end()) {
#if SMP_DEBUG_CONTROLFLOW
msg("SMPFunction::Analyze: setting FirstInBlock.\n");
#if SMP_USE_SSA_FNOP_MARKER
if (2 == this->Instrs.size()) {
// Just pushed first real instruction, after the fnop marker.
FirstInBlock = this->Instrs.begin();
}
else {
FirstInBlock = --(this->Instrs.end());
}
#else
FirstInBlock = --(this->Instrs.end());
}
if (CurrInst.IsBasicBlockTerminator()) {
#if SMP_DEBUG_CONTROLFLOW
msg("SMPFunction::Analyze: found block terminator.\n");
#endif
LastInBlock = --(this->Instrs.end());
SMPBasicBlock CurrBlock = SMPBasicBlock(this, FirstInBlock, LastInBlock);
CurrBlock.Analyze();
// If not the first chunk in the function, it is a shared
// tail chunk.
if (ChunkCounter > 1) {
CurrBlock.SetShared();
}
FirstInBlock = this->Instrs.end();
LastInBlock = this->Instrs.end();
this->Blocks.push_back(CurrBlock);
this->BlockCount += 1;
// Is the instruction a branch to a target outside the function? If
// so, this function has shared tail chunks.
if (CurrInst.IsBranchToFarChunk() && (!CurrInst.IsTailCall())) {
this->SharedChunks = true;
}
}
} // end if (isHead(InstrFlags) && isCode(InstrFlags)
} // end for (ea_t addr = CurrChunk.startEA; ... )
// Handle the special case in which a function does not terminate
// with a return instruction or any other basic block terminator.
// Sometimes IDA Pro sees a call to a NORET function and decides
// to not include the dead code after it in the function. That
// dead code includes the return instruction, so the function no
// longer includes a return instruction and terminates with a CALL.
if (FirstInBlock != this->Instrs.end()) {
LastInBlock = --(this->Instrs.end());
SMPBasicBlock CurrBlock = SMPBasicBlock(this, FirstInBlock, LastInBlock);
CurrBlock.Analyze();
// If not the first chunk in the function, it is a shared
// tail chunk.
if (ChunkCounter > 1) {
CurrBlock.SetShared();
}
FirstInBlock = this->Instrs.end();
LastInBlock = this->Instrs.end();
this->Blocks.push_back(CurrBlock);
this->BlockCount += 1;
}
} // end for (bool ChunkOK = ...)
// Now that we have all instructions and basic blocks, link each instruction
// to its basic block. Note that the instruction has to be linked to the copy
// of the basic block in this->Blocks(), not to the original SMPBasicBlock
// object that was constructed and destructed on the stack above. (Ouch!
// Very painful memory corruption debugging lesson.)
list<SMPBasicBlock>::iterator CurrBlock;
list<list<SMPInstr>::iterator>::iterator InstIter;
for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) {
for (InstIter = CurrBlock->GetFirstInstr(); InstIter != CurrBlock->GetLastInstr(); ++InstIter) {
(*InstIter)->SetBlock(CurrBlock->GetThisBlock());
}
}
#if KLUDGE_VFPRINTF_FAMILY
if (0 != strstr(this->GetFuncName(), "printf")) {
this->SharedChunks = true;
msg("Kludging function %s\n", this->GetFuncName());
}
#endif
#if SMP_IDAPRO52_WORKAROUND
if (0 == strcmp(this->GetFuncName(), "error_for_asm")) {
this->SharedChunks = true;
msg("Kludging function %s\n", this->GetFuncName());
}
#endif
// Set up basic block links and map of instructions to blocks.
if (!(this->HasSharedChunks())) {
this->SetLinks();
this->RPONumberBlocks();
// Figure out the stack frame and related info.
this->SetStackFrameInfo();
clc5q
committed
list<SMPInstr>::iterator CurrInst;
bool GoodRTL;
this->BuiltRTLs = true;
for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) {
// Build tree RTLs for the instruction.
GoodRTL = CurrInst->BuildRTL();
this->BuiltRTLs = (this->BuiltRTLs && GoodRTL);
clc5q
committed
if (!GoodRTL) {
msg("ERROR: Cannot build RTL at %x for %s\n", CurrInst->GetAddr(),
CurrInst->GetDisasm());
}
clc5q
committed
if (GoodRTL)
CurrInst->SyncAllRTs();
// Detect indirect memory references.
CurrInst->AnalyzeIndirectRefs(this->UseFP);
clc5q
committed
} // end for all instructions
} // end if not shared chunks
else { // has shared chunks; still want to compute stack frame info
#if SMP_DEBUG_CONTROLFLOW
msg("SMPFunction::Analyze: set stack frame info.\n");
#ifdef SMP_DEBUG_FUNC
msg(" %s has shared chunks \n", this->GetFuncName());
#endif
// Figure out the stack frame and related info.
this->SetStackFrameInfo();
}
} // end of SMPFunction::Analyze()
// For each instruction, mark the non-flags-reg DEFs as having live
// metadata (mmStrata needs to fetch and track this metadata for this
// instruction) or dead metadata (won't be used as addressing reg, won't
// be stored to memory, won't be returned to caller).
void SMPFunction::AnalyzeMetadataLiveness(void) {
bool changed;
int BaseReg;
int IndexReg;
ushort ScaleFactor;
ea_t offset;
op_t BaseOp, IndexOp, ReturnOp, DefOp, UseOp;
BaseOp.type = o_reg;
IndexOp.type = o_reg;
ReturnOp.type = o_reg;
list<SMPInstr>::iterator CurrInst;
set<DefOrUse, LessDefUse>::iterator CurrDef;
set<DefOrUse, LessDefUse>::iterator CurrUse;
set<DefOrUse, LessDefUse>::iterator NextUse;
bool DebugFlag = false;
if (0 == strcmp("gl_transform_vb_part1", this->GetFuncName())) {
DebugFlag = true;
}
bool SafeMemDest;
for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) {
SafeMemDest = false; // true for some SafeFunc instructions
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
// Skip the SSA marker instruction.
if (NN_fnop == CurrInst->GetCmd().itype)
continue;
CurrDef = CurrInst->GetFirstDef();
while (CurrDef != CurrInst->GetLastDef()) {
if (DEF_METADATA_UNANALYZED == CurrDef->GetMetadataStatus()) {
DefOp = CurrDef->GetOp();
// Handle special registers never used as address regs.
if (DefOp.is_reg(X86_FLAGS_REG)
|| ((o_trreg <= DefOp.type) && (o_xmmreg >= DefOp.type))) {
CurrDef = CurrInst->SetDefMetadata(DefOp,
DEF_METADATA_UNUSED);
changed = true;
}
else if (DefOp.is_reg(R_sp)
|| (this->UseFP && DefOp.is_reg(R_bp))) {
// Stack pointer register DEFs always have live
// metadata, but we don't need to propagate back
// through particular DEF-USE chains.
CurrDef = CurrInst->SetDefMetadata(DefOp, DEF_METADATA_USED);
changed = true;
}
else if ((o_mem <= DefOp.type) && (o_displ >= DefOp.type)) {
// DEF is a memory operand. The addressing registers
// therefore have live metadata, and the memory metadata is live.
// EXCEPTION: If the function is Safe, then direct stack writes
// to local variables (above the outgoing args area of the frame)
// are not live metadata, and there will be no indirect local frame
// writes, by definition of "safe." So, for safe funcs, only
// the o_mem (globals) and indirect writes are live metadata.
if (this->SafeFunc && MDIsStackAccessOpnd(DefOp, this->UseFP)
&& (!this->WritesAboveLocalFrame(DefOp))
&& (!this->WritesToOutgoingArgs(DefOp))) {
++CurrDef;
SafeMemDest = true;
continue;
}
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
CurrDef = CurrInst->SetDefMetadata(DefOp, DEF_METADATA_USED);
changed = true;
MDExtractAddressFields(DefOp, BaseReg, IndexReg,
ScaleFactor, offset);
if (R_none != BaseReg) {
BaseOp.reg = MDCanonicalizeSubReg((ushort) BaseReg);
if (BaseOp.is_reg(R_sp)
|| (this->UseFP && BaseOp.is_reg(R_bp))) {
; // do nothing; DEF handled by case above
}
else {
CurrUse = CurrInst->FindUse(BaseOp);
if (CurrUse == CurrInst->GetLastUse()) {
msg("ERROR: BaseReg %d not in USE list at %x for %s\n",
BaseOp.reg, CurrInst->GetAddr(),
CurrInst->GetDisasm());
}
assert(CurrUse != CurrInst->GetLastUse());
if (this->IsGlobalName(BaseOp)) {
changed |= this->PropagateGlobalMetadata(BaseOp,
DEF_METADATA_USED, CurrUse->GetSSANum());
}
else {
changed |= CurrInst->GetBlock()->PropagateLocalMetadata(BaseOp,
DEF_METADATA_USED, CurrUse->GetSSANum());
}
}
} // end if R_none != BaseReg
if (R_none != IndexReg) {
IndexOp.reg = MDCanonicalizeSubReg((ushort) IndexReg);
if (IndexOp.is_reg(R_sp)
|| (this->UseFP && IndexOp.is_reg(R_bp))) {
; // do nothing; DEF handled by case above
}
else {
CurrUse = CurrInst->FindUse(IndexOp);
if (CurrUse == CurrInst->GetLastUse()) {
msg("ERROR: IndexReg %d not in USE list at %x for %s\n",
IndexOp.reg, CurrInst->GetAddr(),
CurrInst->GetDisasm());
}
assert(CurrUse != CurrInst->GetLastUse());
if (0 != ScaleFactor) {
; // mmStrata knows scaled reg is NUMERIC
// ... its metadata is not fetched
}
else if (this->IsGlobalName(IndexOp)) {
changed |= this->PropagateGlobalMetadata(IndexOp,
DEF_METADATA_USED, CurrUse->GetSSANum());
}
else {
changed |= CurrInst->GetBlock()->PropagateLocalMetadata(IndexOp,
DEF_METADATA_USED, CurrUse->GetSSANum());
}
}
} // end if R_none != IndexReg
} // end if X86_FLAGS_REG .. else if stack ptr ...
} // end if unanalyzed metadata usage
++CurrDef;
} // end while processing DEFs
if ((RETURN == CurrInst->GetDataFlowType())
|| (CALL == CurrInst->GetDataFlowType())
|| (INDIR_CALL == CurrInst->GetDataFlowType())) {
// The EAX and EDX registers can be returned to the caller,
// which might use their metadata. They show up as USEs
// of the return instruction. Some library functions
// pass return values in non-standard ways. e.g. through
// EBX or EDI, so we treat all return regs the same.
// For CALL instructions, values can be passed in caller-saved
// registers, unfortunately, so the metadata is live-in.
CurrUse = CurrInst->GetFirstUse();
while (CurrUse != CurrInst->GetLastUse()) {
NextUse = CurrUse;
++NextUse;
ReturnOp = CurrUse->GetOp();
if ((o_reg == ReturnOp.type) &&
(!ReturnOp.is_reg(X86_FLAGS_REG))) {
if (this->IsGlobalName(ReturnOp)) {
changed |= this->PropagateGlobalMetadata(ReturnOp,
DEF_METADATA_USED, CurrUse->GetSSANum());
}
else {
changed |= CurrInst->GetBlock()->PropagateLocalMetadata(ReturnOp,
DEF_METADATA_USED, CurrUse->GetSSANum());
}
}
CurrUse = NextUse;
} // end while all USEs
} // end if return or call
else if (CurrInst->HasDestMemoryOperand()
// Memory writes cause a lot of metadata usage.
// Addressing registers in the memory destination
// have live metadata used in bounds checking. The
// register being stored to memory could end up being
// used in some other bounds checking, unless we
// have precise memory tracking and know that it
// won't.
// We handled the addressing registers above, so we
// handle the register written to memory here.
// The same exception applies as above: If the destination
// memory operand is not a stack write, then safe functions
// do not need to track the metadata.
if (SafeMemDest) {
continue; // go to next instruction
}
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
CurrUse = CurrInst->GetFirstUse();
while (CurrUse != CurrInst->GetLastUse()) {
NextUse = CurrUse;
++NextUse;
UseOp = CurrUse->GetOp();
// NOTE: **!!** To be less conservative, we
// should propagate less for exchange category
// instructions.
if ((UseOp.type == o_reg) && (!UseOp.is_reg(R_sp))
&& (!(this->UseFP && UseOp.is_reg(R_bp)))
&& (!UseOp.is_reg(X86_FLAGS_REG))) {
if (this->IsGlobalName(UseOp)) {
changed |= this->PropagateGlobalMetadata(UseOp,
DEF_METADATA_USED, CurrUse->GetSSANum());
}
else {
changed |= CurrInst->GetBlock()->PropagateLocalMetadata(UseOp,
DEF_METADATA_USED, CurrUse->GetSSANum());
}
} // end if register
CurrUse = NextUse;
} // end while all USEs
} // end if call or return else if memdest ...
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
} // end for all instructions
} while (changed);
// All DEFs that still have status DEF_METADATA_UNANALYZED can now
// be marked as DEF_METADATA_UNUSED.
for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) {
if (NN_fnop == CurrInst->GetCmd().itype)
continue;
CurrDef = CurrInst->GetFirstDef();
while (CurrDef != CurrInst->GetLastDef()) {
if (DEF_METADATA_UNANALYZED == CurrDef->GetMetadataStatus()) {
CurrDef = CurrInst->SetDefMetadata(CurrDef->GetOp(),
DEF_METADATA_UNUSED);
assert(CurrDef != CurrInst->GetLastDef());
}
++CurrDef;
}
}
return;
} // end of SMPFunction::AnalyzeMetadataLiveness()
// Propagate the metadata Status for UseOp/SSANum to its global DEF.
// Return true if successful.
bool SMPFunction::PropagateGlobalMetadata(op_t UseOp, SMPMetadataType Status, int SSANum) {
bool changed = false;
if ((0 > SSANum) || (o_void == UseOp.type))
return false;
// Find the DEF of UseOp with SSANum.
bool FoundDef = false;
list<SMPInstr>::iterator CurrInst;
for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) {
set<DefOrUse, LessDefUse>::iterator CurrDef;
set<DefOrUse, LessDefUse>::iterator CurrUse;
CurrDef = CurrInst->FindDef(UseOp);
if (CurrDef != CurrInst->GetLastDef()) {
if (SSANum == CurrDef->GetSSANum()) {
FoundDef = true;
if (Status != CurrDef->GetMetadataStatus()) {
CurrDef = CurrInst->SetDefMetadata(UseOp, Status);
changed = (CurrDef != CurrInst->GetLastDef());
// If source operand was memory, we have two cases.
// (1) The instruction could be a load, in which
// case we should simply terminate the
// propagation, because the prior DEF of a memory
// location is always considered live metadata
// already, and we do not want to propagate liveness
// to the address regs in the USE list.
// EXCEPTION: For safe funcs, we propagate liveness
// for stack locations.
// (2) We could have an arithmetic operation such
// as reg := reg arithop memsrc. In this case, we
// still do not want to propagate through the memsrc,
// (with the same safe func EXCEPTION),
// but the register is both DEF and USE and we need
// to propagate through the register.
if (CurrInst->HasSourceMemoryOperand()) {
if (this->SafeFunc) {
op_t MemSrcOp = CurrInst->MDGetMemUseOp();
assert(o_void != MemSrcOp.type);
if (MDIsStackAccessOpnd(MemSrcOp, this->UseFP)) {
// We have a SafeFunc stack access. This is
// the EXCEPTION case where we want to
// propagate metadata liveness for a memory
// location.
CurrUse = CurrInst->FindUse(MemSrcOp);
assert(CurrUse != CurrInst->GetLastUse());
if (this->IsGlobalName(MemSrcOp)) {
changed |= this->PropagateGlobalMetadata(MemSrcOp,
Status, CurrUse->GetSSANum());
}
else {
changed |= CurrInst->GetBlock()->PropagateLocalMetadata(MemSrcOp,
Status, CurrUse->GetSSANum());
}
} // end if stack access operand
} // end if SafeFunc
if (3 == CurrInst->GetOptType()) { // move inst
clc5q
committed
break; // load address regs are not live metadata
}
else if ((5 == CurrInst->GetOptType())
|| (NN_and == CurrInst->GetCmd().itype)
|| (NN_or == CurrInst->GetCmd().itype)
|| (NN_xor == CurrInst->GetCmd().itype)) {
// add, subtract, and, or with memsrc
// Find the DEF reg in the USE list.
CurrUse = CurrInst->FindUse(UseOp);
assert(CurrUse != CurrInst->GetLastUse());
changed |= this->PropagateGlobalMetadata(UseOp,
Status, CurrUse->GetSSANum());
break;
}
} // end if memory source
// Now, propagate the metadata status to all the
// non-memory, non-flags-reg, non-special-reg
// (i.e. regular registers) USEs.
CurrUse = CurrInst->GetFirstUse();
while (CurrUse != CurrInst->GetLastUse()) {
op_t UseOp = CurrUse->GetOp();
// NOTE: **!!** To be less conservative, we
// should propagate less for exchange category
// instructions.
if ((UseOp.type == o_reg) && (!UseOp.is_reg(R_sp))
&& (!(this->UseFP && UseOp.is_reg(R_bp)))
&& (!UseOp.is_reg(X86_FLAGS_REG))) {
changed |= this->PropagateGlobalMetadata(UseOp,
Status, CurrUse->GetSSANum());
}
else {
changed |= CurrInst->GetBlock()->PropagateLocalMetadata(UseOp,
Status, CurrUse->GetSSANum());
}
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
}
break;
}
}
}
if (!FoundDef) {
// Check the Phi functions
list<SMPBasicBlock>::iterator CurrBlock;
for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) {
set<SMPPhiFunction, LessPhi>::iterator DefPhi;
DefPhi = CurrBlock->FindPhi(UseOp);
if (DefPhi != CurrBlock->GetLastPhi()) {
if (SSANum == DefPhi->GetDefSSANum()) {
if (Status != DefPhi->GetDefMetadata()) {
DefPhi = CurrBlock->SetPhiDefMetadata(UseOp, Status);
changed = true;
// If the Phi DEF has live metadata, then the Phi
// USEs each have live metadata. Propagate.
int UseSSANum;
for (size_t index = 0; index < DefPhi->GetPhiListSize(); ++index) {
UseSSANum = DefPhi->GetUseSSANum(index);
// UseSSANum can be -1 in some cases because
// we conservatively make EAX and EDX be USEs
// of all return instructions, when the function
// might have a void return type, making it
// appear as if an uninitialized EAX or EDX
// could make it to the return block.
if (0 <= UseSSANum) {
changed |= this->PropagateGlobalMetadata(UseOp,
Status, UseSSANum);
}
}
}
FoundDef = true;
break;
}
}
} // end for all blocks
} // end if !FoundDef
if (!FoundDef) {
msg("ERROR: Could not find DEF of SSANum %d for: ");
PrintOperand(UseOp);
msg(" in function %s\n", this->GetFuncName());
}
return changed;
} // end of SMPFunction::PropagateGlobalMetadata()
// Find consecutive DEFs of the same type and mark the second one redundant.
void SMPFunction::FindRedundantMetadata(void) {
list<SMPBasicBlock>::iterator CurrBlock;
bool changed = false;
for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) {
changed |= CurrBlock->FindRedundantLocalMetadata(this->SafeFunc);
}
return;
} // end of SMPFunction::FindRedundantMetadata()
// Compute SSA form data structures across the function.
void SMPFunction::ComputeSSA(void) {
bool DumpFlag = false;
#if SMP_DEBUG_DATAFLOW
DumpFlag |= (0 == strcmp("main", this->GetFuncName()));
DumpFlag |= (0 == strcmp("call_gmon_start", this->GetFuncName()));
DumpFlag |= (0 == strcmp("_init_proc", this->GetFuncName()));
#if 0
DebugFlag |= (0 == strcmp("call_gmon_start", this->GetFuncName()));
#endif
#if 0
DumpFlag |= (0 == strcmp("_nl_find_msg", this->GetFuncName()));
if (DumpFlag)
this->Dump();
#endif
this->ComputeIDoms();
this->ComputeDomFrontiers();
this->ComputeGlobalNames();
this->ComputeBlocksDefinedIn();
this->InsertPhiFunctions();
this->BuildDominatorTree();
list<SMPBasicBlock>::iterator CurrBlock;
for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) {
CurrBlock->SetLocalNames();
CurrBlock->SSALocalRenumber();
if (DebugFlag) CurrBlock->Dump();
#if SMP_FULL_LIVENESS_ANALYSIS
CurrBlock->CreateGlobalChains();
#endif
#if 1
CurrBlock->MarkDeadRegs();
#endif
}
#if SMP_DEBUG_DATAFLOW
if (DumpFlag)
this->Dump();
#endif
return;
} // end of SMPFunction::ComputeSSA()
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
// Find memory writes (DEFs) with possible aliases
void SMPFunction::AliasAnalysis(void) {
// First task: Mark which memory DEFs MIGHT be aliased because an
// indirect memory write occurs somewhere in the DEF-USE chain.
// Memory DEF-USE chains with no possible aliasing can be subjected
// to type inference and type-based optimizing annotations, e.g. a
// register spill to memory followed by retrieval from spill memory
// followed by NUMERIC USEs should be typed as a continuous NUMERIC
// chain if there is no possibility of aliasing.
// Preparatory step: For each indirect write, mark all def-use chains
// (maintained at the basic block level) that include the indirect
// write instruction. If there are no indirect writes in the function,
// leave all DEFs marked as unaliased and exit.
if (!(this->HasIndirectWrites))
return;
list<SMPBasicBlock>::iterator CurrBlock;
for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) {
list<list<SMPInstr>::iterator>::iterator CurrInst;
for (CurrInst = CurrBlock->GetFirstInstr();
CurrInst != CurrBlock->GetLastInstr();
++CurrInst) {
if ((*CurrInst)->HasIndirectMemoryWrite()) {
CurrBlock->MarkIndWriteChains((*CurrInst)->GetAddr());
// Until we get true aliasing analysis, any indirect write
// is classified as may-be-aliased.
CurrBlock->SetMaybeAliased(true);
}
} // end for all insts in block
} // end for all blocks in function
// Step one: Find only the memory DEFs to start with.
list<SMPInstr>::iterator CurrInst;
bool FoundIndWrite = false;
for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) {
if (CurrInst->HasDestMemoryOperand()) {
// Starting with the DEF instruction, traverse the control flow
// until we run into (A) the re-definition of the operand, including
// a re-definition of any of its addressing registers, or (B) an
// indirect write. Return false if condition A terminates the
// search, and true if condition B terminates the search.
this->ResetProcessedBlocks();
op_t MemDefOp = CurrInst->MDGetMemDefOp();
assert(o_void != MemDefOp.type);
set<DefOrUse, LessDefUse>::iterator CurrMemDef = CurrInst->FindDef(MemDefOp);
assert(CurrMemDef != CurrInst->GetLastDef());
int SSANum = CurrMemDef->GetSSANum();
FoundIndWrite = this->FindPossibleChainAlias(CurrInst, MemDefOp, SSANum);
if (FoundIndWrite) {
// Mark the DEF as aliased.
CurrMemDef = CurrInst->SetDefIndWrite(CurrMemDef->GetOp(), true);
break; // Don't waste time after first alias found
}
} // end if inst has dest memory operand
} // end for all instructions
return;
} // end of SMPFunction::AliasAnalysis()
// Does the DefOp DEF_USE chain have an indirect mem write starting at CurrInst?
bool SMPFunction::FindPossibleChainAlias(list<SMPInstr>::iterator CurrInst, op_t DefOp, int SSANum) {
// Starting with the DEF instruction, traverse the control flow
// until we run into (A) the re-definition of the operand, including
// a re-definition of any of its addressing registers, or (B) an
// indirect write. Return false if condition A terminates the
// search, and true if condition B terminates the search.
SMPBasicBlock *CurrBlock = CurrInst->GetBlock();
if (!(CurrBlock->IsProcessed())) {
CurrBlock->SetProcessed(true);
}
else
return false; // block already processed
// Proceed by cases:
ea_t DefAddr = CurrInst->GetAddr();
// Case 1: Local name. Return the IndWrite flag for the local Def-Use
// chain begun by CurrInst.
if (CurrBlock->IsLocalName(DefOp)) {
return CurrBlock->GetLocalDUChainIndWrite(DefOp, SSANum);
}
// Case 2: Global name.
// Case 2A: If Def-Use chain within this block for this memory operand
// has its IndWrite flag set to true, then stop and return true.
else if (CurrBlock->GetGlobalDUChainIndWrite(DefOp, DefAddr)) {
return true;
}
// Case 2B: Else if Def-Use chain is not the last chain in this block
// for this operand, then there must be a later redefinition of the
// memory operand (with new SSA number assigned) later in this block.
// Because we did not fall into case 2A, we know there is no IndWrite
// within the current memory operand's chain, so we return false.
else if (CurrBlock->IsLastGlobalChain(DefOp, DefAddr)) {
return false;
}
// Case 2C: Else if current memory operand is NOT LiveOut, even though
// this is the last def-use chain in the block, then there is no more
// traversing of the control flow graph to be done. The chain has ended
// without encountering an IndWrite, so return false.
else if (!(CurrBlock->IsLiveOut(DefOp))) {
return false;
}
// Case 2D: We have passed all previous checks, so we must have a memory
Loading
Loading full blame...