Newer
Older
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
break;
}
}
else { // we have a right subtree in the CurrRT
SMPRegTransfer *RightRT = CurrRT->GetRightTree();
// In order to have a right subtree, we must have something like:
// lea esp,[ecx-4] which produces the RTL: esp := ecx - 4
// We should consider any other RTL structure besides a basic addition or
// subtraction on the right subtree to be invalid.
CurrOper = RightRT->GetOperator();
if ((SMP_ADD == CurrOper) || (SMP_SUBTRACT == CurrOper)) {
op_t RightLeftOp = RightRT->GetLeftOperand();
if (o_reg == RightLeftOp.type) {
if (RightRT->HasRightSubTree()) {
// Complex RTL such as lea esp,[ebx+ecx*4] ; cannot analyze
StackPointerSaveOrRestore = false;
}
else {
op_t RightRightOp = RightRT->GetRightOperand();
if (o_imm != RightRightOp.type) {
// Complex RTL such as lea esp,[ebx+ecx] ; cannot analyze
StackPointerSaveOrRestore = false;
}
else {
TempOp = RightLeftOp;
DeltaAdjust = (sval_t) RightRightOp.value;
if (SMP_SUBTRACT == CurrOper) {
// Negate the stack delta adjustment, e.g. lea esp,[ecx-4] needs DeltaAdjust of -4, not 4.
DeltaAdjust = (0 - DeltaAdjust);
}
LookUpStackDelta = true;
StackPointerSaveOrRestore = true;
if (SPRestore || FPRestore) {
CopyOp = RightLeftOp;
}
else {
CopyOp = LeftOp;
}
}
}
}
else { // weird RTL; LeftOp := (MemoryOp OPER ???)
StackPointerSaveOrRestore = false;
}
}
else { // not ADD or SUBTRACT
StackPointerSaveOrRestore = false;
}
}
if (LookUpStackDelta) {
bool StackAccess = false;
clc5q
committed
bool NonStackMemAccess = false;
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
// We need to set StackDelta based on the reaching defs for TempOp
// A reg is probably a general register, but could have lea ebx,[esp+4] so it could be stack or frame pointer.
if (TempOp.is_reg(MD_STACK_POINTER_REG)) {
// Weed out RTs that increment or decrement the stack pointer, e.g. SP := SP -4.
// These are not the kind of "save" or "restore" RTs that we are tracking.
if (CopyOp.is_reg(MD_STACK_POINTER_REG)) {
StackPointerSaveOrRestore = false;
SPRestore = false;
FPRestore = false;
Save = false;
}
else {
StackDelta = this->GetStackPtrOffset();
StackDelta += DeltaAdjust;
LookUpStackDelta = false; // just got it; no need for reaching defs
StackPointerSaveOrRestore = true;
}
}
else if (UseFP && TempOp.is_reg(MD_FRAME_POINTER_REG)) {
StackDelta = FPDelta;
StackDelta += DeltaAdjust;
LookUpStackDelta = false; // just got it; no need for reaching defs
StackPointerSaveOrRestore = true;
else if (o_reg == TempOp.type) { // general reg, not frame or stack pointer reg
CopyReg = TempOp.reg;
MDExtractAddressFields(TempOp, BaseReg, IndexReg, Scale, offset);
CopyReg = BaseReg;
bool IndexedAccess = ((R_none != BaseReg) && (R_none != IndexReg));
if (IndexedAccess) {
StackPointerSaveOrRestore = false; // Cannot analyze indexed accesses into the stack
}
else if (MDIsStackPtrReg(BaseReg, UseFP)) {
StackAccess = true;
}
else {
// memory expr that is not stack or frame pointer
clc5q
committed
NonStackMemAccess = true; // something like [ecx] might actually turn out to be stack access
DeltaAdjust = (sval_t) TempOp.addr; // get normalized delta from addr field
}
if (StackPointerSaveOrRestore && LookUpStackDelta) {
op_t FindOp = InitOp;
if (StackAccess) {
FindOp = TempOp;
}
else {
FindOp.type = o_reg;
FindOp.reg = CopyReg;
}
if (this->GetBlock()->GetFunc()->IsInStackPtrCopySet(FindOp)) {
// Screened out time wasters that are not in copy set; now,
// look up reaching defs.
// We need to find out which are the reaching definitions for the FindOp at the current InstAddr.
this->GetBlock()->GetFunc()->ComputeTempReachingDefs(FindOp, this->GetAddr());
this->GetBlock()->GetFunc()->ComputeTempStackDeltaReachesList(FindOp);
// See if TempStackDeltaReachesList has a consistent delta value.
StackPointerSaveOrRestore = this->GetBlock()->GetFunc()->FindReachingStackDelta(StackDelta); // consistent SavedDelta value across entire list
StackDelta += DeltaAdjust;
clc5q
committed
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
if (StackPointerSaveOrRestore && NonStackMemAccess) {
// We have something like [ecx] or [ecx+DeltaAdjust]. It turns out that
// ECX has a copy of the stack pointer or frame pointer in it, but that
// does not mean that the memory location [ecx] has a copy of a stack or
// frame pointer. We need to look up the normalized stack address [esp+StackDelta]
// in the StackPtrCopySet just like we did for ECX before we conclude that a
// stack pointer save or restore is happening.
FindOp = InitOp;
FindOp.type = o_displ;
FindOp.reg = MD_STACK_POINTER_REG;
FindOp.addr = StackDelta;
if (this->GetBlock()->GetFunc()->IsInStackPtrCopySet(FindOp)) {
// Screened out time wasters that are not in copy set; now,
// look up reaching defs.
// We need to find out which are the reaching definitions for the FindOp at the current InstAddr.
this->GetBlock()->GetFunc()->ComputeTempReachingDefs(FindOp, this->GetAddr());
this->GetBlock()->GetFunc()->ComputeTempStackDeltaReachesList(FindOp);
// See if TempStackDeltaReachesList has a consistent delta value.
StackPointerSaveOrRestore = this->GetBlock()->GetFunc()->FindReachingStackDelta(StackDelta); // consistent SavedDelta value across entire list
// StackPointerSaveOrRestore will now be true only if [ecx] pointed to a saved stack or frame pointer with consistent delta.
}
else {
// E.g. [ecx] pointed to stack location that was not holding a saved stack or frame pointer.
StackPointerSaveOrRestore = false;
}
}
}
else {
StackPointerSaveOrRestore = false; // reset, not in stack pointer copy set
}
} // end if (LookupStackDelta)
if (!StackPointerSaveOrRestore && !Save && (SPRestore || FPRestore)) {
// Any restore that could not be analyzed is an error.
Error = true;
break; // error exit
}
else if (StackPointerSaveOrRestore) {
if (FPRestore) {
// If we succeeded in looking up a stack delta that goes into the frame pointer reg,
// then we want to consider this instruction to be a save of a stack delta into
// a register (which happens to be the frame pointer reg in this case).
FPRestore = false;
Save = true;
break; // assume only one save or restore in an instruction; exit with success
} // end for all RTs in the RTL
return StackPointerSaveOrRestore;
} // end of SMPInstr::MDIsStackPtrSaveOrRestore()
// If call instruction is to malloc(), set the DEF register EAX type to
// HEAPPTR and return true.
bool SMPInstr::MDFindMallocCall(op_t TargetOp) {
bool changed = false;
func_t *TargetFunc = get_func(TargetOp.addr);
if (TargetFunc) {
char FuncName[MAXSTR];
get_func_name(TargetFunc->startEA, FuncName, sizeof(FuncName) - 1);
if (0 == strcmp("malloc", FuncName)) {
// NOTE: Some compilers might call it __malloc ; make this more robust !!!
#if SMP_VERBOSE_FIND_POINTERS
clc5q
committed
SMP_msg("Found call to malloc at %x\n", this->addr);
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
#endif
op_t SearchOp = InitOp;
SearchOp.type = o_reg;
SearchOp.reg = R_ax;
set<DefOrUse, LessDefUse>::iterator EAXDEF;
EAXDEF = this->SetDefType(SearchOp, HEAPPTR);
int SSANum = EAXDEF->GetSSANum();
changed = true;
if (this->BasicBlock->IsLocalName(SearchOp)) {
(void) this->BasicBlock->PropagateLocalDefType(SearchOp, HEAPPTR,
this->GetAddr(), SSANum, false);
}
else { // global name
this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false
(void) this->BasicBlock->PropagateGlobalDefType(SearchOp, HEAPPTR,
SSANum, false);
}
} // end if "malloc"
} // end if (TargetFunc)
return changed;
} // end of SMPInstr::MDFindMallocCall()
// Is instruction a branch (conditional or unconditional) to a
// code target that is not in the current chunk?
bool SMPInstr::IsBranchToFarChunk(void) {
if (this->IsFarBranchComputed()) { // answer is cached
return this->IsBranchesToFarChunk();
func_t *CurrChunk = get_fchunk(this->address);
bool FarBranch = false;
if ((JUMP | COND_BRANCH) & this->GetDataFlowType()) {
// Instruction is a direct branch, conditional or unconditional
if (this->NumUses() > 0) {
set<DefOrUse, LessDefUse>::iterator CurrUse;
for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) {
op_t JumpTarget = CurrUse->GetOp();
if ((o_near == JumpTarget.type) || (o_far == JumpTarget.type)) {
// Branches to a code address
clc5q
committed
// stdclib sometimes has jumps to zero and calls to zero. These are dead code.
if (0 != JumpTarget.addr) {
func_t *TargetChunk = get_fchunk(JumpTarget.addr);
// Is target address within the same chunk as the branch?
FarBranch = (NULL == TargetChunk) || (CurrChunk->startEA != TargetChunk->startEA);
if (FarBranch) {
this->FarBranchTarget = JumpTarget.addr;
}
if (FarBranch) {
this->SetBranchesToFarChunk();
}
this->SetFarBranchComputed();
return FarBranch;
} // end of SMPInstr::IsBranchToFarChunk()
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetUseSSA(op_t CurrOp, int SSASub) {
return this->Uses.SetSSANum(CurrOp, SSASub);
};
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefSSA(op_t CurrOp, int SSASub) {
return this->Defs.SetSSANum(CurrOp, SSASub);
};
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetUseType(op_t CurrOp, SMPOperandType CurrType) {
return this->Uses.SetType(CurrOp, CurrType, this);
};
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefType(op_t CurrOp, SMPOperandType CurrType) {
return this->Defs.SetType(CurrOp, CurrType, this);
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefMetadata(op_t CurrOp, SMPMetadataType Status) {
return this->Defs.SetMetadata(CurrOp, Status);
};
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefIndWrite(op_t CurrOp, bool IndWriteFlag) {
return this->Defs.SetIndWrite(CurrOp, IndWriteFlag);
};
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetUseNoTruncate(op_t CurrOp, bool NoTruncFlag) {
return this->Uses.SetNoTruncation(CurrOp, NoTruncFlag);
};
clc5q
committed
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefNoOverflow(op_t DefOp, bool NoOverflowFlag) {
return this->Defs.SetNoOverflow(DefOp, NoOverflowFlag);
};
clc5q
committed
// Set the DeadRegsBitmap entry for Regnum.
void SMPInstr::SetRegDead(size_t RegNum) {
this->DeadRegsBitmap.set(RegNum);
return;
}
// Analyze the instruction and its operands.
void SMPInstr::Analyze(void) {
bool DebugFlag = false;
if (0x8049b00 == this->address) {
// Setting up breakpoint line.
DebugFlag = true;
}
// Fill cmd structure with disassembly of instr
clc5q
committed
if (!SMPGetCmd(this->address, this->SMPcmd, this->features))
unsigned short opcode = this->SMPcmd.itype;
// Record what type of instruction this is, simplified for the needs
// of data flow and type analysis.
// Record optimization category.
this->OptType = OptCategory[opcode];
if ((NN_int == opcode) || (NN_into == opcode) || (NN_int3 == opcode)) {
this->SetInterrupt();
}
else {
this->ResetInterrupt();
}
// Fix the IDA Pro mistakes in the operand list.
this->MDFixupIDAProOperandList();
// See if instruction is an ASM idiom for clearing a register.
if ((NN_xor == opcode) || (NN_lea == opcode)) {
ushort FirstReg;
if (o_reg == this->SMPcmd.Operands[0].type) {
FirstReg = this->SMPcmd.Operands[0].reg;
op_t SecondOpnd = this->SMPcmd.Operands[1];
if (NN_xor == opcode) {
// Check for xor of reg with itself
if (SecondOpnd.is_reg(FirstReg)) {
this->SetRegClearIdiom();
}
}
else { // must be lea
// check for lea reg,[nobasereg+nonindexreg+0]
if ((SecondOpnd.type >= o_mem) && (SecondOpnd.type <= o_displ)) {
int BaseReg, IndexReg;
ushort ScaleFactor;
ea_t Offset;
MDExtractAddressFields(SecondOpnd, BaseReg, IndexReg, ScaleFactor, Offset);
if ((R_none == BaseReg) && (R_none == IndexReg) && (0 == Offset)) {
this->SetRegClearIdiom();
}
}
}
// See if instruction is simple nop or ASM idiom for nop.
if (this->MDIsNop()) {
this->SetNop();
}
// Build the DEF and USE lists for the instruction.
this->FindMemOps();
this->BuildSMPDefUseLists();
// Determine whether the instruction is a jump target by looking
// at its cross references and seeing if it has "TO" code xrefs.
clc5q
committed
SMP_xref_t xrefs;
for (bool ok = xrefs.SMP_first_to(this->address, XREF_FAR); ok; ok = xrefs.SMP_next_to()) {
if ((xrefs.GetFrom() != 0) && (xrefs.GetIscode())) {
this->SetJumpTarget();
break;
}
}
// If instruction is a call or indirect call, see if a call target has been recorded
// by IDA Pro.
if (this->GetDataFlowType() == INDIR_CALL) {
clc5q
committed
for (bool ok = xrefs.SMP_first_from(this->address, XREF_ALL);
ok;
clc5q
committed
ok = xrefs.SMP_next_from()) {
if ((xrefs.GetTo() != 0) && (xrefs.GetIscode())) {
// Found a code target, with its address in xrefs.to
clc5q
committed
if (xrefs.GetTo() == (this->address + this->GetCmd().size)) {
// A call instruction will have two targets: the fall through to the
// next instruction, and the called function. We want to find
// the called function.
continue;
}
// We found a target, not the fall-through.
clc5q
committed
this->CallTarget = xrefs.GetTo();
SMP_msg("Found indirect call target %lx at %lx\n",
(unsigned long) xrefs.GetTo(), (unsigned long) this->address);
break;
}
} // end for all code xrefs
if (BADADDR == this->CallTarget) {
SMP_msg("WARNING: Did not find indirect call target at %lx\n",
(unsigned long) this->address);
}
} // end if INDIR_CALL
else if (this->GetDataFlowType() == CALL) {
set<DefOrUse, LessDefUse>::iterator CurrUse;
for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) {
optype_t OpType = CurrUse->GetOp().type;
if ((OpType == o_near) || (OpType == o_far)) {
this->CallTarget = CurrUse->GetOp().addr;
}
}
if (BADADDR == this->CallTarget) {
SMP_msg("ERROR: Target not found for direct call at %lx\n", (unsigned long) this->address);
}
}
if (DebugFlag) {
SMP_msg("Analyzed debug instruction at %lx\n", (unsigned long) this->address);
}
return;
} // end of SMPInstr::Analyze()
// Analyze the floating point NOP marker instruction at the top of the function.
void SMPInstr::AnalyzeMarker(void) {
// Fill member variable SMPcmd structure with disassembly of instr
(void) memset(&(this->SMPcmd), 0, sizeof(this->SMPcmd));
this->SMPcmd.itype = NN_fnop;
this->SMPcmd.size = 1;
this->SMPcmd.ea = this->address;
// Set the instr disassembly text.
DisAsmText.SetMarkerInstText(this->GetAddr());
// Record what type of instruction this is, simplified for the needs
// of data flow and type analysis.
this->type = DFACategory[this->SMPcmd.itype];
// Record optimization category.
this->OptType = OptCategory[this->SMPcmd.itype];
return;
} // end of SMPInstr::AnalyzeMarker()
// Detect oddities of call instructions, such as pseudo-calls that are
// actually jumps within a function
void SMPInstr::AnalyzeCallInst(ea_t FirstFuncAddr, ea_t LastFuncAddr) {
if (BADADDR != this->CallTarget) {
if (this->CallTarget == FirstFuncAddr) {
this->SetDirectRecursiveCall();
}
else {
this->ResetDirectRecursiveCall();
if ((this->CallTarget > FirstFuncAddr)
&& (this->CallTarget < LastFuncAddr)) {
this->SetCallUsedAsJump();
this->type = JUMP;
}
else {
this->ResetCallUsedAsJump();
}
}
return;
clc5q
committed
} // end of SMPInstr::AnalyzeCallInst()
clc5q
committed
sval_t SMPInstr::AnalyzeStackPointerDelta(sval_t IncomingDelta, sval_t PreAllocDelta) {
uint16 InstType = this->SMPcmd.itype;
clc5q
committed
sval_t InstDelta = StackAlteration[InstType];
SMPitype FlowType = this->GetDataFlowType();
clc5q
committed
bool TailCall = this->IsTailCall();
clc5q
committed
if (this->IsCallUsedAsJump() || this->MDIsInterruptCall() || this->IsCondTailCall()) {
// Call is used within function as a jump. Happens when setting up
clc5q
committed
// thunk offsets, for example; OR, call is an interrupt call, in which
// the interrupt return cleans up the stack, leaving a delta of zero, but
// we do not have the system call code to analyze, OR, the call is a conditional
// jump to another function (conditional tail call), in which case the current
// function must have a return statement to fall into which will clean up the
// only thing left on the stack (the return address) and the conditional jump
// has no effect on the stack pointer.
; // leave InstDelta equal to negative or zero value from StackAlterationTable[]
}
else if (this->IsRecursiveCall()) {
// We don't have the net stack delta for our own function yet, so we cannot
// look it up. We must assume that each call has no net effect on the stack delta.
// Alternatively, we could call this->GetBlock()->GetFunc()->GetStackDeltaForCallee() as below.
InstDelta = 0;
}
else if (this->IsAllocaCall()) {
InstDelta = STARS_DEFAULT_ALLOCA_SIZE;
}
clc5q
committed
else if ((CALL == FlowType) || (INDIR_CALL == FlowType) || TailCall) {
// A real call instruction, which pushes a return address on the stack,
// not a call used as a branch within the function. A return instruction
clc5q
committed
// will usually cancel out the stack push that is implicit in the call, which
// means that the function will have a net stack ptr delta of +4, which will
// cancel out the -4 value of the call instruction and set the delta to zero.
// However, this is not true in all cases, so we get the net stack ptr delta
// directly from the called function unless it is an unresolved indirect call,
// in which case we assume +4. !!!!****!!!! In the future, we could analyze
// the code around an unresolved indirect call to see if it seems to be
// removing items left on the stack by the callee.
// SPECIAL CASE: A jump used as a tail call will have a stack ptr effect that is equal
// to the net stack ptr effect of its target function, usually +4, whereas a jump
// would otherwise have a net stack ptr effect of 0.
ea_t CalledFuncAddr = this->GetCallTarget();
if ((BADADDR == CalledFuncAddr) || (0 == CalledFuncAddr)) {
InstDelta = 0;
}
else { // We have a call target
SMPFunction *CalleeFunc = this->GetBlock()->GetFunc()->GetProg()->FindFunction(CalledFuncAddr);
clc5q
committed
sval_t AdjustmentDelta;
if (CalleeFunc) {
if (!CalleeFunc->HasSTARSStackPtrAnalysisCompleted()) {
// Phase ordering issue in the call graph. A mutually recursive clique of functions has to
// be broken by starting processing somewhere, and all callees cannot be processed before
// we start. If we got our stack down to zero and then made a tail call, then we have to assume
// that the callee will use our return address, so we assume the default stack delta. If not a
// tail call, we ask our function to see if the information is available from IDA Pro analyses,
// or if it can be inferred from the fact that the call is followed by a stack adjustment.
if (TailCall) {
InstDelta = CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA;
SMP_msg("WARNING: Callee stack ptr analysis not yet performed at tail call inst %lx ; normal delta assumed\n",
(unsigned long) this->GetAddr());
clc5q
committed
}
else {
AdjustmentDelta = this->GetBlock()->GetFunc()->GetStackDeltaForCallee(CalledFuncAddr);
clc5q
committed
InstDelta += AdjustmentDelta;
SMP_msg("WARNING: Callee stack ptr analysis not yet performed at inst %lx ; stack adjustment used\n",
(unsigned long) this->GetAddr());
clc5q
committed
}
}
else if (!CalleeFunc->StackPtrAnalysisSucceeded()) {
// Callee analyses were done, but they failed. In order to proceed, we have to assume
// the same situation as we just did in the case where analyses have not been performed.
SMP_msg("WARNING: Callee stack ptr analysis failed at inst %lx ; normal delta assumed\n",
(unsigned long) this->GetAddr());
clc5q
committed
if (TailCall) {
InstDelta = CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA;
}
else {
AdjustmentDelta = this->GetBlock()->GetFunc()->GetStackDeltaForCallee(this->GetAddr());
InstDelta += AdjustmentDelta;
}
}
else {
// Callee's analyses have succeeded, so get delta straight from callee.
InstDelta += CalleeFunc->GetNetStackPtrDelta();
}
}
else {
#if 0
SMP_msg("ERROR: SMPInstr::AnalyzeStackPointerDelta failed to find func at %lx in inst %lx\n",
(unsigned long) CalledFuncAddr, (unsigned long) this->GetAddr());
clc5q
committed
InstDelta = SMP_STACK_DELTA_ERROR_CODE;
#else
SMP_msg("ERROR: SMPInstr::AnalyzeStackPointerDelta failed to find func at %lx in inst %lx\n",
(unsigned long) CalledFuncAddr, (unsigned long) this->GetAddr());
clc5q
committed
if (TailCall) {
InstDelta = CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA;
}
else {
InstDelta = 0;
}
#endif
}
}
} // end CALL or INDIR_CALL or TailCall case
else if (1 == InstDelta) {
// value of 1 is trigger to investigate the RTL for the
// true value, which cannot be found simply by table lookup
// In the special case of an x86 LEAVE instruction, the effect
// on the stack pointer is to deallocate the local frame size,
// plus pop the saved frame pointer into EBP. Helper functions
// need to know whether to look for this special case.
bool IsLeaveInstr = this->MDIsLeaveInstr();
clc5q
committed
InstDelta = this->RTL.TotalStackPointerAlteration(IsLeaveInstr, IncomingDelta, PreAllocDelta);
clc5q
committed
return InstDelta;
} // end of SMPInstr::AnalyzeStackPointerDelta()
clc5q
committed
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
// Total the stack adjustment bytes, as happens after a call to a function that leaves
// outgoing args on the stack or swallows incoming args from the stack.
sval_t SMPInstr::FindStackAdjustment(void) {
uint16 InstType = this->SMPcmd.itype;
sval_t InstDelta = StackAlteration[InstType];
if (1 == InstDelta) {
// value of 1 is trigger to investigate the RTL for the
// true value, which cannot be found simply by table lookup
// In the special case of an x86 LEAVE instruction, the effect
// on the stack pointer is to deallocate the local frame size,
// plus pop the saved frame pointer into EBP. Helper functions
// need to know whether to look for this special case.
bool IsLeaveInstr = this->MDIsLeaveInstr();
if (!IsLeaveInstr) {
InstDelta = this->RTL.TotalStackPointerAlteration(IsLeaveInstr, 0, 0);
}
else {
InstDelta = 0; // LEAVE is not the kind of instr we are looking for
}
}
return InstDelta;
} // end of SMPInstr::FindStackAdjustment()
// Normalize stack operands to have a displacement from the stack pointer value on entry to the function,
// rather than the current stack pointer value.
// UseFP indicates we are using a frame pointer in the function.
// FPDelta holds the stack delta (normalized) for the frame pointer.
// DefOp comes in with the operand to be normalized, and contains the normalized operand upon return.
// Return true if operand is a register or stack location, false otherwise (true => include in data flow analysis sets and SSA.)
bool SMPInstr::MDComputeNormalizedDataFlowOp(bool UseFP, sval_t FPDelta, op_t &DefOp) {
if (o_reg == DefOp.type) {
return true;
}
else if (MDIsStackAccessOpnd(DefOp, UseFP)) {
op_t OldOp = DefOp;
int SignedOffset = (int) DefOp.addr;
sval_t NormalizedDelta;
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
if (DefOp.hasSIB) {
// We must deal with a potentially indexed memory expression. We want to
// normalize two different cases here: e.g. [esp+ebx+4] will become [esp+ebx-24]
// and [ebp+ebx-8] will become [esp+ebx-12] after normalization. A wrinkle
// on the second case is when the base register and index register are swapped
// in the SIB byte, and we make [ebx+ebp-4] into [esp+ebx-12], which involves
// correcting the index/base reg order in the SIB, because an index reg of ESP
// is the SIB encoding for "no index register" and we cannot leave it like that.
int BaseReg = sib_base(DefOp);
int IndexReg = (int) sib_index(DefOp);
if (X86_STACK_POINTER_REG == IndexReg) // signifies no index register
IndexReg = R_none;
if (BaseReg == X86_STACK_POINTER_REG) {
// We probably have an indexed ESP-relative operand.
// We leave the sib byte alone and normalize the offset.
NormalizedDelta = this->GetStackPtrOffset() + (sval_t) SignedOffset;
}
else {
// Must be EBP-relative.
NormalizedDelta = FPDelta + (sval_t) SignedOffset;
// Unfortunately, when we are dealing with a SIB byte in the opcode, we cannot
// just say DefOp.reg = MD_STACK_POINTER_REG to convert from the frame pointer
// to the stack pointer. Instead, we have to get into the nasty machine code
// level and change the SIB bits that specify either the base register or the
// index register, whichever one is the frame pointer.
if (BaseReg == X86_FRAME_POINTER_REG) {
// The three least significant bits of the SIB byte are the base register.
// They must contain a 5, which is the x86 value for register EBP, and we
// want to convert it to a 4, denoting register ESP. We can just zero out
// the least significant bit to accomplish that.
DefOp.sib &= 0xfe;
}
else {
// We sometimes have an instruction in which the frame pointer is used as
// the "index" register in the SIB byte, and the true index register is
// in the "base" register position in the SIB byte.
assert(IndexReg == X86_FRAME_POINTER_REG);
// The true index reg is in the lowest three bits, while the next three
// bits must contain a 5 (register EBP) and we want to make them a 4 (ESP).
// We must swap base and index regs as we normalize (see explanation above).
char SIBtemp = DefOp.sib;
char SIBindex = SIBtemp & 0x38;
char SIBbase = SIBtemp & 0x07;
assert ((SIBindex >> 3) == 5); // must be EBP
SIBtemp &= 0xa0; // zero out lower 6 bits; upper 2 bits are scale factor - leave them alone
SIBtemp &= (SIBbase << 3); // make old base reg (e.g. ebx) into a proper index reg
SIBtemp |= 0x04; // make the new base reg be 4 (reg ESP)
DefOp.sib = SIBtemp;
}
this->SetFPNormalizedToSP();
// Add the stack pointer to the USE set for the instruction.
this->MDAddRegUse(X86_STACK_POINTER_REG, false);
}
}
else if (DefOp.reg == MD_FRAME_POINTER_REG) {
// If FPDelta is -4 and SignedOffset is +8, then we have [ebp+8] as DefOp, and this
// is equivalent to [esp+4] where esp has its entry value, i.e. this would be the first incoming
// argument. If SignedOffset is -12, we have [ebp-12] as DefOp, and this is [esp-16] when
// normalized to the entry point value of the stack pointer. In both cases, we can see that the
// normalized stack delta is just FPDelta+SignedOffset.
NormalizedDelta = FPDelta + (sval_t) SignedOffset;
// Now, we simply convert the memory operand from EBP to ESP and replace the SignedOffset with the
// NormalizedDelta just computed.
DefOp.reg = MD_STACK_POINTER_REG;
this->SetFPNormalizedToSP();
// Add the stack pointer to the USE set for the instruction.
this->MDAddRegUse(DefOp.reg, false);
}
else {
assert(DefOp.reg == MD_STACK_POINTER_REG);
// We only need to adjust the offset to reflect the change in the stack pointer since the function
// was entered, e.g. [esp+4] is normalized to [esp-28] if the current esp value is 32 less than it
// was upon function entry. We get the value "-32" in that case from a member variable.
NormalizedDelta = this->GetStackPtrOffset() + (sval_t) SignedOffset;
}
DefOp.addr = (ea_t) NormalizedDelta; // common to frame and stack pointer cases
if ((o_phrase == DefOp.type) && (0 != NormalizedDelta)) {
// mov [esp],eax has an [esp] operand of type o_phrase, because there is no
// displacement field. After normalization, it will have a displacement field, so
// it has become an operand like [esp-32] and is now type o_displ.
DefOp.type = o_displ;
}
this->GetBlock()->GetFunc()->AddNormalizedStackOperand(OldOp, this->GetAddr(), DefOp);
return true;
}
else {
return false;
}
} // end of SMPInstr::MDComputeNormalizedDataFlowOp()
// Normalize stack operands in all DEFs and USEs to have stack deltas relative to the function entry stack pointer.
// Return true if any stack DEFs or USEs were normalized.
bool SMPInstr::MDNormalizeStackOps(bool UseFP, sval_t FPDelta, bool Recomputing, sval_t DeltaIncrement) {
bool StackOpFound = false;
bool OpNormalized;
bool UniqueDEFMemOp = true; // Does DEFMemOp not match any DEFs?
bool UniqueUSEMemOp = true; // Does USEMemOp not match any USEs?
bool UniqueLeaUSEMemOp = true; // Does LeaUSEMemOp not match any USEs?
bool UniqueMoveSource = true; // Does MoveSource not match any USEs?
set<DefOrUse, LessDefUse>::iterator DefIter, UseIter;
list<pair<set<DefOrUse, LessDefUse>::iterator, op_t> > DefWorkList, UseWorkList;
list<pair<set<DefOrUse, LessDefUse>::iterator, op_t> >::iterator WorkIter;
op_t OldOp, NewOp;
// Find all the DEFs that need changing, and put their iterators into a list.
// Normalizing stack ops could change their sort order, hence we could skip over
// a DEF in the set by erasing a DEF and reinserting a normalized DEF, so we
// make all the changes after we iterate through the DEFS set.
for (DefIter = this->GetFirstDef(); DefIter != this->GetLastDef(); ++DefIter) {
OldOp = DefIter->GetOp();
NewOp = OldOp;
if ((o_reg != NewOp.type) && (o_imm != NewOp.type)) {
if (Recomputing) {
OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, true, NewOp);
}
else {
OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, NewOp);
}
if (OpNormalized) {
StackOpFound = true;
if (IsEqOp(OldOp, this->DEFMemOp)) {
UniqueDEFMemOp = false;
}
pair<set<DefOrUse, LessDefUse>::iterator, op_t> DefItem(DefIter, NewOp);
DefWorkList.push_back(DefItem);
}
}
}
// Now go through the DEF worklist and change stack operands to normalized stack operands.
for (WorkIter = DefWorkList.begin(); WorkIter != DefWorkList.end(); ++WorkIter) {
DefIter = WorkIter->first;
DefIter = this->Defs.SetOp(DefIter, WorkIter->second);
}
// Normalize op_t private data member DEFs.
if (Recomputing) {
OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, UniqueDEFMemOp, this->DEFMemOp);
}
else {
OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, this->DEFMemOp);
}
// Find all USEs that need changing, and build a second work list.
for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) {
OldOp = UseIter->GetOp();
NewOp = OldOp;
if ((o_reg != NewOp.type) && (o_imm != NewOp.type)) {
if (Recomputing) {
OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, true, NewOp);
}
else {
OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, NewOp);
}
if (OpNormalized) {
StackOpFound = true;
if (IsEqOp(OldOp, this->USEMemOp)) {
UniqueUSEMemOp = false;
}
if (IsEqOp(OldOp, this->GetLeaMemUseOp())) {
UniqueLeaUSEMemOp = false;
}
if (IsEqOp(OldOp, this->MoveSource)) {
UniqueMoveSource = false;
}
pair<set<DefOrUse, LessDefUse>::iterator, op_t> UseItem(UseIter, NewOp);
UseWorkList.push_back(UseItem);
}
}
}
// Now go through the USE worklist and change stack operands to normalized stack operands.
for (WorkIter = UseWorkList.begin(); WorkIter != UseWorkList.end(); ++WorkIter) {
UseIter = WorkIter->first;
UseIter = this->Uses.SetOp(UseIter, WorkIter->second);
}
// Normalize op_t private data member USEs.
op_t TempLeaMemOp = this->GetLeaMemUseOp();
if (Recomputing) {
OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, UniqueUSEMemOp, this->USEMemOp);
OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, UniqueLeaUSEMemOp, TempLeaMemOp);
if (OpNormalized)
this->SetLeaMemUseOp(TempLeaMemOp);
OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, UniqueMoveSource, this->MoveSource);
else {
OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, this->USEMemOp);
OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, TempLeaMemOp);
if (OpNormalized)
this->SetLeaMemUseOp(TempLeaMemOp);
OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, this->MoveSource);
}
// Declare victory.
this->SetDefsNormalized();
return StackOpFound;
} // end of SMPInstr::MDNormalizeStackOps()
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
// Renormalize SP-relative stack operands in functions that call alloca() by adding DeltaIncrement to their stack displacements.
// DefOp comes in with the operand to be renormalized, and contains the normalized operand upon return.
// Return true if operand is a register or stack location, false otherwise (true => include in data flow analysis sets and SSA.)
bool SMPInstr::MDRecomputeNormalizedDataFlowOp(sval_t DeltaIncrement, bool UpdateMaps, op_t &DefOp) {
op_t OldOp = DefOp;
if (o_reg == DefOp.type) {
return true;
}
else if (MDIsStackAccessOpnd(DefOp, this->GetBlock()->GetFunc()->UsesFramePointer())) {
if (this->HasFPNormalizedToSP()) {
// FP-relative operands do no change in alloca() functions when the alloca()
// causes the SP to change.
return true;
}
// The remaining cases are simple. The ESP-relative displacement is incremented by
// DeltaIncrement, regardless of the presence of a SIB byte.
int SignedOffset = (int) DefOp.addr;
sval_t NormalizedDelta = DeltaIncrement + (sval_t) SignedOffset;
DefOp.addr = (ea_t) NormalizedDelta;
if ((o_phrase == DefOp.type) && (0 != NormalizedDelta)) {
// mov [esp],eax has an [esp] operand of type o_phrase, because there is no
// displacement field. After normalization, it will have a displacement field, so
// it has become an operand like [esp-32] and is now type o_displ.
DefOp.type = o_displ;
}
if (UpdateMaps) { // We don't update maps for duplicate entries, e.g. USEMemOp, DEFMemOp, MoveSource
this->GetBlock()->GetFunc()->AddNormalizedStackOperand(OldOp, this->GetAddr(), DefOp);
}
return true;
}
else {
return false;
}
} // end of SMPInstr::MDRecomputeNormalizedDataFlowOp()
// If NormOp is a normalized stack memory operand, unnormalize it.
void SMPInstr::MDGetUnnormalizedOp(op_t &NormOp) {
sval_t SignedOffset;
bool UseFP = this->GetBlock()->GetFunc()->UsesFramePointer();
if (this->AreDefsNormalized() && MDIsStackAccessOpnd(NormOp, UseFP)) {
if (this->HasFPNormalizedToSP()) {
// Need to convert NormOp back to frame-pointer-relative address.
if (NormOp.hasSIB) {
// Convert base register from stack pointer back to frame pointer.
NormOp.sib |= 0x01;
}
else {
NormOp.reg = MD_FRAME_POINTER_REG;
}
SignedOffset = (sval_t) NormOp.addr;
SignedOffset -= this->GetBlock()->GetFunc()->GetFramePtrStackDelta();
}
else {
// NormOp should remain stack-pointer-relative address, but it
// should be a positive offset from the current stack pointer instead
// of a negative offset from the entry point of the function.
SignedOffset = (sval_t) NormOp.addr;
SignedOffset -= this->GetStackPtrOffset();
assert(0 <= SignedOffset);
}
NormOp.addr = (ea_t) SignedOffset;
}
return;
} // end of SMPInstr::MDGetUnnormalizedOp()
// Find USE-not-DEF operand that is not the flags register.
op_t SMPInstr::GetSourceOnlyOperand(void) {
size_t OpNum;
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
if (this->features & DefMacros[OpNum]) { // DEF
;
}
else if (this->features & UseMacros[OpNum]) { // USE
op_t CurrOp = this->SMPcmd.Operands[OpNum];
if (!(CurrOp.is_reg(X86_FLAGS_REG))) {
return CurrOp;
}
}
}
// It is expected that increment, decrement, and floating point stores
// will not have a USE-only operand. Increment and decrement have an
// operand that is both USEd and DEFed, while the floating point stack
// registers are implicit in most floating point opcodes. Also, exchange
// and exchange-and-add instructions have multiple DEF-and-USE operands.
int TypeGroup = SMPTypeCategory[this->SMPcmd.itype];
if ((TypeGroup != 2) && (TypeGroup != 4) && (TypeGroup != 9) && (TypeGroup != 12)
&& (TypeGroup != 13)) {
SMP_msg("ERROR: Could not find source only operand at %lx in %s\n",
(unsigned long) this->address, DisAsmText.GetDisAsm(this->GetAddr()));
return InitOp;
} // end of SMPInstr::GetSourceOnlyOperand()
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
// Should apparent memory operands be ignored? e.g. lea opcode on x86
bool SMPInstr::MDIgnoreMemOps(void) {
bool leaInst = (NN_lea == this->SMPcmd.itype);
return leaInst;
}
// Find memory DEFs and USEs, store in DEFMemOp and USEMemOp
void SMPInstr::FindMemOps(void) {
size_t OpNum;
if (!(this->MDIgnoreMemOps())) {
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
op_t TempOp = this->SMPcmd.Operands[OpNum];
if ((TempOp.type >= o_mem) && (TempOp.type <= o_displ)) { // memory
if (this->features & DefMacros[OpNum]) { // DEF
if (this->DEFMemOp.type == o_void) { // only save first mem DEF
this->DEFMemOp = TempOp;
}
}
if (this->features & UseMacros[OpNum]) { // USE
if (this->USEMemOp.type == o_void) { // only save first mem USE
this->USEMemOp = TempOp;
}
}
}
} // end for (OpNum = 0; ...)
}
this->SetMemOpsFound();
return;
} // end of SMPInstr::FindMemOps()
// Fix problems with the operands list in SMPcmd.
void SMPInstr::MDFixupIDAProOperandList(void) {
// IDA Pro often takes the instruction imul eax,0x80 and creates the following operands and features bits:
// Opnd[0] = EAX, both DEF and USE
// Opnd[1] = EAX, just USE
// Opnd[2] = immediate, neither DEF nor USE
// Our RTL building keys in on the DEF/USE bits in features, so this looks like imul eax,eax to us.
// We want it to look like:
// Opnd[0] = EAX, both DEF and USE
// Opnd[1] = immediate, just USE
if (NN_imul == this->SMPcmd.itype) {
op_t Opnd2 = this->SMPcmd.Operands[2];
if ((!(this->features & DefMacros[2]))
&& (!(this->features & UseMacros[2]))) {
if (o_void != Opnd2.type) {
// We have a third operand that is neither DEF nor USE.
SMP_msg("INFO: Fixing IMUL operand list at %lx\n", (unsigned long) this->GetAddr());
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
this->Dump();
// Two cases: Operands[0] == Operands[1], e.g. imul eax,Opnd2
// or else three-operand form: e.g. imul eax,ecx,Opnd2
// For the three-operand form, make sure Opnd0 is DEF only, others
// are USE only. For the two-operand form, make sure Opnd0 is DEF and USE,
// Opnd1 is current Opnd2 and is USE only.
op_t Opnd0 = this->SMPcmd.Operands[0];
op_t Opnd1 = this->SMPcmd.Operands[1];
if (IsEqOp(Opnd0, Opnd1)) {
// No need for three-operand form.
this->features |= DefMacros[0];
this->features |= UseMacros[0];
this->SMPcmd.Operands[1] = Opnd2;
this->SMPcmd.Operands[2] = InitOp;
}
else { // Must have three-operand form.
this->features |= UseMacros[2]; // set missing USE bit.
this->features &= (~UseMacros[0]); // Ensure no USE of Opnd0.
}
this->Dump();
}
}
}
return;
} // SMPInstr::MDFixupIDAProOperandList()
// Fill the Defs and Uses private data members.
void SMPInstr::BuildSMPDefUseLists(void) {
size_t OpNum;
bool DebugFlag = (0x8049b00 == this->GetAddr());
bool WidthDoubler = this->MDDoublesWidth();
this->Defs.clear();
this->Uses.clear();
// Start with the Defs.
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
if (this->features & DefMacros[OpNum]) { // DEF
op_t TempOp = this->SMPcmd.Operands[OpNum];
if (WidthDoubler) {
// Opcodes that sign-extend a byte to a word, or a word to a dword,
// have only one operand. It is implicit, and it is the shorter USE.
// That means the DEF will have the same width as the USE, e.g. if
// we are sign-extending AX to EAX, the USE and DEF both be AX without
// a special fix. We fix this problem with the DEF operand now.
if (TempOp.dtyp == dt_byte) {
TempOp.dtyp = dt_word;
TempOp.reg = MDCanonicalizeSubReg(TempOp.reg);
}
else if (TempOp.dtyp == dt_word) {
TempOp.dtyp = dt_dword;
TempOp.reg = MDCanonicalizeSubReg(TempOp.reg);
}
else if (TempOp.dtyp == dt_dword) {
TempOp.dtyp = dt_qword;
}
else {
SMP_msg("ERROR: Instruction operand %zu not 1,2, or 4 bytes at %lx dtyp: %d\n",
OpNum, (unsigned long) this->address, TempOp.dtyp);
}
}
if (MDKnownOperandType(TempOp)) {