Newer
Older
// not a call used as a branch within the function. A return instruction
clc5q
committed
// will usually cancel out the stack push that is implicit in the call, which
// means that the function will have a net stack ptr delta of +4, which will
// cancel out the -4 value of the call instruction and set the delta to zero.
// However, this is not true in all cases, so we get the net stack ptr delta
// directly from the called function unless it is an unresolved indirect call,
// in which case we assume +4. !!!!****!!!! In the future, we could analyze
// the code around an unresolved indirect call to see if it seems to be
// removing items left on the stack by the callee.
clc5q
committed
// SPECIAL CASE: A jump used as a tail call will have a stack ptr effect that is equal
// to the net stack ptr effect of its target function, usually +4, whereas a jump
// would otherwise have a net stack ptr effect of 0.
clc5q
committed
ea_t CalledFuncAddr = this->GetCallTarget();
if ((BADADDR == CalledFuncAddr) || (0 == CalledFuncAddr)) {
InstDelta = 0;
}
else { // We have a call target
SMPFunction *CalleeFunc = this->GetBlock()->GetFunc()->GetProg()->FindFunction(CalledFuncAddr);
clc5q
committed
sval_t AdjustmentDelta;
if (CalleeFunc) {
if (!CalleeFunc->HasSTARSStackPtrAnalysisCompleted()) {
// Phase ordering issue in the call graph. A mutually recursive clique of functions has to
// be broken by starting processing somewhere, and all callees cannot be processed before
// we start. If we got our stack down to zero and then made a tail call, then we have to assume
// that the callee will use our return address, so we assume the default stack delta. If not a
// tail call, we ask our function to see if the information is available from IDA Pro analyses,
// or if it can be inferred from the fact that the call is followed by a stack adjustment.
if (TailCall) {
InstDelta = CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA;
SMP_msg("WARNING: Callee stack ptr analysis not yet performed at tail call inst %lx ; normal delta assumed\n",
(unsigned long) this->GetAddr());
clc5q
committed
}
else {
AdjustmentDelta = this->GetBlock()->GetFunc()->GetStackDeltaForCallee(CalledFuncAddr);
clc5q
committed
InstDelta += AdjustmentDelta;
SMP_msg("WARNING: Callee stack ptr analysis not yet performed at inst %lx ; stack adjustment used\n",
(unsigned long) this->GetAddr());
clc5q
committed
}
}
else if (!CalleeFunc->StackPtrAnalysisSucceeded()) {
// Callee analyses were done, but they failed. In order to proceed, we have to assume
// the same situation as we just did in the case where analyses have not been performed.
SMP_msg("WARNING: Callee stack ptr analysis failed at inst %lx ; normal delta assumed\n",
(unsigned long) this->GetAddr());
clc5q
committed
if (TailCall) {
InstDelta = CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA;
}
else {
AdjustmentDelta = this->GetBlock()->GetFunc()->GetStackDeltaForCallee(this->GetAddr());
InstDelta += AdjustmentDelta;
}
}
else {
// Callee's analyses have succeeded, so get delta straight from callee.
InstDelta += CalleeFunc->GetNetStackPtrDelta();
}
}
else {
#if 0
SMP_msg("ERROR: SMPInstr::AnalyzeStackPointerDelta failed to find func at %lx in inst %lx\n",
(unsigned long) CalledFuncAddr, (unsigned long) this->GetAddr());
clc5q
committed
InstDelta = SMP_STACK_DELTA_ERROR_CODE;
#else
SMP_msg("ERROR: SMPInstr::AnalyzeStackPointerDelta failed to find func at %lx in inst %lx\n",
(unsigned long) CalledFuncAddr, (unsigned long) this->GetAddr());
clc5q
committed
if (TailCall) {
InstDelta = CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA;
}
else {
InstDelta = 0;
}
#endif
}
}
} // end CALL or INDIR_CALL or TailCall case
else if (1 == InstDelta) {
// value of 1 is trigger to investigate the RTL for the
// true value, which cannot be found simply by table lookup
// In the special case of an x86 LEAVE instruction, the effect
// on the stack pointer is to deallocate the local frame size,
// plus pop the saved frame pointer into EBP. Helper functions
// need to know whether to look for this special case.
bool IsLeaveInstr = this->MDIsLeaveInstr();
clc5q
committed
InstDelta = this->RTL.TotalStackPointerAlteration(IsLeaveInstr, IncomingDelta, PreAllocDelta);
clc5q
committed
return InstDelta;
} // end of SMPInstr::AnalyzeStackPointerDelta()
clc5q
committed
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
// Total the stack adjustment bytes, as happens after a call to a function that leaves
// outgoing args on the stack or swallows incoming args from the stack.
sval_t SMPInstr::FindStackAdjustment(void) {
uint16 InstType = this->SMPcmd.itype;
sval_t InstDelta = StackAlteration[InstType];
if (1 == InstDelta) {
// value of 1 is trigger to investigate the RTL for the
// true value, which cannot be found simply by table lookup
// In the special case of an x86 LEAVE instruction, the effect
// on the stack pointer is to deallocate the local frame size,
// plus pop the saved frame pointer into EBP. Helper functions
// need to know whether to look for this special case.
bool IsLeaveInstr = this->MDIsLeaveInstr();
if (!IsLeaveInstr) {
InstDelta = this->RTL.TotalStackPointerAlteration(IsLeaveInstr, 0, 0);
}
else {
InstDelta = 0; // LEAVE is not the kind of instr we are looking for
}
}
return InstDelta;
} // end of SMPInstr::FindStackAdjustment()
// Normalize stack operands to have a displacement from the stack pointer value on entry to the function,
// rather than the current stack pointer value.
// UseFP indicates we are using a frame pointer in the function.
// FPDelta holds the stack delta (normalized) for the frame pointer.
// DefOp comes in with the operand to be normalized, and contains the normalized operand upon return.
// Return true if operand is a register or stack location, false otherwise (true => include in data flow analysis sets and SSA.)
bool SMPInstr::MDComputeNormalizedDataFlowOp(bool UseFP, sval_t FPDelta, op_t &DefOp) {
if (o_reg == DefOp.type) {
return true;
}
else if (MDIsStackAccessOpnd(DefOp, UseFP)) {
op_t OldOp = DefOp;
int SignedOffset = (int) DefOp.addr;
sval_t NormalizedDelta;
if (DefOp.hasSIB) {
// We must deal with a potentially indexed memory expression. We want to
// normalize two different cases here: e.g. [esp+ebx+4] will become [esp+ebx-24]
// and [ebp+ebx-8] will become [esp+ebx-12] after normalization. A wrinkle
// on the second case is when the base register and index register are swapped
// in the SIB byte, and we make [ebx+ebp-4] into [esp+ebx-12], which involves
// correcting the index/base reg order in the SIB, because an index reg of ESP
// is the SIB encoding for "no index register" and we cannot leave it like that.
int BaseReg = MD_STARS_sib_base(DefOp);
int IndexReg = (int) MD_STARS_sib_index(DefOp);
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
if (X86_STACK_POINTER_REG == IndexReg) // signifies no index register
IndexReg = R_none;
if (BaseReg == X86_STACK_POINTER_REG) {
// We probably have an indexed ESP-relative operand.
// We leave the sib byte alone and normalize the offset.
NormalizedDelta = this->GetStackPtrOffset() + (sval_t) SignedOffset;
}
else {
// Must be EBP-relative.
NormalizedDelta = FPDelta + (sval_t) SignedOffset;
// Unfortunately, when we are dealing with a SIB byte in the opcode, we cannot
// just say DefOp.reg = MD_STACK_POINTER_REG to convert from the frame pointer
// to the stack pointer. Instead, we have to get into the nasty machine code
// level and change the SIB bits that specify either the base register or the
// index register, whichever one is the frame pointer.
if (BaseReg == X86_FRAME_POINTER_REG) {
// The three least significant bits of the SIB byte are the base register.
// They must contain a 5, which is the x86 value for register EBP, and we
// want to convert it to a 4, denoting register ESP. We can just zero out
// the least significant bit to accomplish that.
DefOp.sib &= 0xfe;
}
else {
// We sometimes have an instruction in which the frame pointer is used as
// the "index" register in the SIB byte, and the true index register is
// in the "base" register position in the SIB byte.
assert(IndexReg == X86_FRAME_POINTER_REG);
// The true index reg is in the lowest three bits, while the next three
// bits must contain a 5 (register EBP) and we want to make them a 4 (ESP).
// We must swap base and index regs as we normalize (see explanation above).
char SIBtemp = DefOp.sib;
char SIBindex = SIBtemp & 0x38;
char SIBbase = SIBtemp & 0x07;
assert ((SIBindex >> 3) == 5); // must be EBP
SIBtemp &= 0xa0; // zero out lower 6 bits; upper 2 bits are scale factor - leave them alone
SIBtemp &= (SIBbase << 3); // make old base reg (e.g. ebx) into a proper index reg
SIBtemp |= 0x04; // make the new base reg be 4 (reg ESP)
DefOp.sib = SIBtemp;
}
this->SetFPNormalizedToSP();
// Add the stack pointer to the USE set for the instruction.
this->MDAddRegUse(X86_STACK_POINTER_REG, false);
}
}
else if (DefOp.reg == MD_FRAME_POINTER_REG) {
// If FPDelta is -4 and SignedOffset is +8, then we have [ebp+8] as DefOp, and this
// is equivalent to [esp+4] where esp has its entry value, i.e. this would be the first incoming
// argument. If SignedOffset is -12, we have [ebp-12] as DefOp, and this is [esp-16] when
// normalized to the entry point value of the stack pointer. In both cases, we can see that the
// normalized stack delta is just FPDelta+SignedOffset.
NormalizedDelta = FPDelta + (sval_t) SignedOffset;
// Now, we simply convert the memory operand from EBP to ESP and replace the SignedOffset with the
// NormalizedDelta just computed.
DefOp.reg = MD_STACK_POINTER_REG;
this->SetFPNormalizedToSP();
// Add the stack pointer to the USE set for the instruction.
this->MDAddRegUse(DefOp.reg, false);
}
else {
assert(DefOp.reg == MD_STACK_POINTER_REG);
// We only need to adjust the offset to reflect the change in the stack pointer since the function
// was entered, e.g. [esp+4] is normalized to [esp-28] if the current esp value is 32 less than it
// was upon function entry. We get the value "-32" in that case from a member variable.
NormalizedDelta = this->GetStackPtrOffset() + (sval_t) SignedOffset;
}
DefOp.addr = (ea_t) NormalizedDelta; // common to frame and stack pointer cases
if ((o_phrase == DefOp.type) && (0 != NormalizedDelta)) {
// mov [esp],eax has an [esp] operand of type o_phrase, because there is no
// displacement field. After normalization, it will have a displacement field, so
// it has become an operand like [esp-32] and is now type o_displ.
DefOp.type = o_displ;
}
this->GetBlock()->GetFunc()->AddNormalizedStackOperand(OldOp, this->GetAddr(), DefOp);
return true;
}
else {
return false;
}
} // end of SMPInstr::MDComputeNormalizedDataFlowOp()
// Normalize stack operands in all DEFs and USEs to have stack deltas relative to the function entry stack pointer.
// Return true if any stack DEFs or USEs were normalized.
bool SMPInstr::MDNormalizeStackOps(bool UseFP, sval_t FPDelta, bool Recomputing, sval_t DeltaIncrement) {
bool StackOpFound = false;
bool OpNormalized;
bool UniqueDEFMemOp = true; // Does DEFMemOp not match any DEFs?
bool UniqueUSEMemOp = true; // Does USEMemOp not match any USEs?
bool UniqueLeaUSEMemOp = true; // Does LeaUSEMemOp not match any USEs?
bool UniqueMoveSource = true; // Does MoveSource not match any USEs?
set<DefOrUse, LessDefUse>::iterator DefIter, UseIter;
list<pair<set<DefOrUse, LessDefUse>::iterator, op_t> > DefWorkList, UseWorkList;
list<pair<set<DefOrUse, LessDefUse>::iterator, op_t> >::iterator WorkIter;
op_t OldOp, NewOp;
// Find all the DEFs that need changing, and put their iterators into a list.
// Normalizing stack ops could change their sort order, hence we could skip over
// a DEF in the set by erasing a DEF and reinserting a normalized DEF, so we
// make all the changes after we iterate through the DEFS set.
for (DefIter = this->GetFirstDef(); DefIter != this->GetLastDef(); ++DefIter) {
OldOp = DefIter->GetOp();
NewOp = OldOp;
if ((o_reg != NewOp.type) && (o_imm != NewOp.type)) {
if (Recomputing) {
OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, true, NewOp);
}
else {
OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, NewOp);
}
if (OpNormalized) {
StackOpFound = true;
if (IsEqOp(OldOp, this->DEFMemOp)) {
UniqueDEFMemOp = false;
}
pair<set<DefOrUse, LessDefUse>::iterator, op_t> DefItem(DefIter, NewOp);
DefWorkList.push_back(DefItem);
}
}
}
// Now go through the DEF worklist and change stack operands to normalized stack operands.
for (WorkIter = DefWorkList.begin(); WorkIter != DefWorkList.end(); ++WorkIter) {
DefIter = WorkIter->first;
DefIter = this->Defs.SetOp(DefIter, WorkIter->second);
}
// Normalize op_t private data member DEFs.
if (Recomputing) {
OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, UniqueDEFMemOp, this->DEFMemOp);
}
else {
OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, this->DEFMemOp);
}
// Find all USEs that need changing, and build a second work list.
for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) {
OldOp = UseIter->GetOp();
NewOp = OldOp;
if ((o_reg != NewOp.type) && (o_imm != NewOp.type)) {
if (Recomputing) {
OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, true, NewOp);
}
else {
OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, NewOp);
}
if (OpNormalized) {
StackOpFound = true;
if (IsEqOp(OldOp, this->USEMemOp)) {
UniqueUSEMemOp = false;
}
if (IsEqOp(OldOp, this->GetLeaMemUseOp())) {
UniqueLeaUSEMemOp = false;
}
if (IsEqOp(OldOp, this->MoveSource)) {
UniqueMoveSource = false;
}
pair<set<DefOrUse, LessDefUse>::iterator, op_t> UseItem(UseIter, NewOp);
UseWorkList.push_back(UseItem);
}
}
}
// Now go through the USE worklist and change stack operands to normalized stack operands.
for (WorkIter = UseWorkList.begin(); WorkIter != UseWorkList.end(); ++WorkIter) {
UseIter = WorkIter->first;
UseIter = this->Uses.SetOp(UseIter, WorkIter->second);
}
// Normalize op_t private data member USEs.
op_t TempLeaMemOp = this->GetLeaMemUseOp();
if (Recomputing) {
OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, UniqueUSEMemOp, this->USEMemOp);
OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, UniqueLeaUSEMemOp, TempLeaMemOp);
if (OpNormalized)
this->SetLeaMemUseOp(TempLeaMemOp);
OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, UniqueMoveSource, this->MoveSource);
else {
OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, this->USEMemOp);
OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, TempLeaMemOp);
if (OpNormalized)
this->SetLeaMemUseOp(TempLeaMemOp);
OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, this->MoveSource);
}
// Declare victory.
this->SetDefsNormalized();
return StackOpFound;
} // end of SMPInstr::MDNormalizeStackOps()
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
// Renormalize SP-relative stack operands in functions that call alloca() by adding DeltaIncrement to their stack displacements.
// DefOp comes in with the operand to be renormalized, and contains the normalized operand upon return.
// Return true if operand is a register or stack location, false otherwise (true => include in data flow analysis sets and SSA.)
bool SMPInstr::MDRecomputeNormalizedDataFlowOp(sval_t DeltaIncrement, bool UpdateMaps, op_t &DefOp) {
op_t OldOp = DefOp;
if (o_reg == DefOp.type) {
return true;
}
else if (MDIsStackAccessOpnd(DefOp, this->GetBlock()->GetFunc()->UsesFramePointer())) {
if (this->HasFPNormalizedToSP()) {
// FP-relative operands do no change in alloca() functions when the alloca()
// causes the SP to change.
return true;
}
// The remaining cases are simple. The ESP-relative displacement is incremented by
// DeltaIncrement, regardless of the presence of a SIB byte.
int SignedOffset = (int) DefOp.addr;
sval_t NormalizedDelta = DeltaIncrement + (sval_t) SignedOffset;
DefOp.addr = (ea_t) NormalizedDelta;
if ((o_phrase == DefOp.type) && (0 != NormalizedDelta)) {
// mov [esp],eax has an [esp] operand of type o_phrase, because there is no
// displacement field. After normalization, it will have a displacement field, so
// it has become an operand like [esp-32] and is now type o_displ.
DefOp.type = o_displ;
}
if (UpdateMaps) { // We don't update maps for duplicate entries, e.g. USEMemOp, DEFMemOp, MoveSource
this->GetBlock()->GetFunc()->AddNormalizedStackOperand(OldOp, this->GetAddr(), DefOp);
}
return true;
}
else {
return false;
}
} // end of SMPInstr::MDRecomputeNormalizedDataFlowOp()
// If NormOp is a normalized stack memory operand, unnormalize it.
void SMPInstr::MDGetUnnormalizedOp(op_t &NormOp) {
sval_t SignedOffset;
bool UseFP = this->GetBlock()->GetFunc()->UsesFramePointer();
if (this->AreDefsNormalized() && MDIsStackAccessOpnd(NormOp, UseFP)) {
if (this->HasFPNormalizedToSP()) {
// Need to convert NormOp back to frame-pointer-relative address.
if (NormOp.hasSIB) {
// Convert base register from stack pointer back to frame pointer.
NormOp.sib |= 0x01;
}
else {
NormOp.reg = MD_FRAME_POINTER_REG;
}
SignedOffset = (sval_t) NormOp.addr;
SignedOffset -= this->GetBlock()->GetFunc()->GetFramePtrStackDelta();
}
else {
// NormOp should remain stack-pointer-relative address, but it
// should be a positive offset from the current stack pointer instead
// of a negative offset from the entry point of the function.
SignedOffset = (sval_t) NormOp.addr;
SignedOffset -= this->GetStackPtrOffset();
assert((0 <= SignedOffset) || this->GetBlock()->GetFunc()->DoesStackFrameExtendPastStackTop());
}
NormOp.addr = (ea_t) SignedOffset;
}
return;
} // end of SMPInstr::MDGetUnnormalizedOp()
// Find USE-not-DEF operand that is not the flags register.
op_t SMPInstr::GetSourceOnlyOperand(void) {
size_t OpNum;
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
if (this->features & DefMacros[OpNum]) { // DEF
;
}
else if (this->features & UseMacros[OpNum]) { // USE
op_t CurrOp = this->SMPcmd.Operands[OpNum];
if (!(CurrOp.is_reg(X86_FLAGS_REG))) {
return CurrOp;
}
}
}
// It is expected that increment, decrement, and floating point stores
// will not have a USE-only operand. Increment and decrement have an
// operand that is both USEd and DEFed, while the floating point stack
// registers are implicit in most floating point opcodes. Also, exchange
// and exchange-and-add instructions have multiple DEF-and-USE operands.
int TypeGroup = SMPTypeCategory[this->SMPcmd.itype];
if ((TypeGroup != 2) && (TypeGroup != 4) && (TypeGroup != 9) && (TypeGroup != 12)
&& (TypeGroup != 13)) {
SMP_msg("ERROR: Could not find source only operand at %lx in %s\n",
(unsigned long) this->address, DisAsmText.GetDisAsm(this->GetAddr()));
return InitOp;
} // end of SMPInstr::GetSourceOnlyOperand()
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
// Should apparent memory operands be ignored? e.g. lea opcode on x86
bool SMPInstr::MDIgnoreMemOps(void) {
bool leaInst = (NN_lea == this->SMPcmd.itype);
return leaInst;
}
// Find memory DEFs and USEs, store in DEFMemOp and USEMemOp
void SMPInstr::FindMemOps(void) {
size_t OpNum;
if (!(this->MDIgnoreMemOps())) {
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
op_t TempOp = this->SMPcmd.Operands[OpNum];
if ((TempOp.type >= o_mem) && (TempOp.type <= o_displ)) { // memory
if (this->features & DefMacros[OpNum]) { // DEF
if (this->DEFMemOp.type == o_void) { // only save first mem DEF
this->DEFMemOp = TempOp;
}
}
if (this->features & UseMacros[OpNum]) { // USE
if (this->USEMemOp.type == o_void) { // only save first mem USE
this->USEMemOp = TempOp;
}
}
}
} // end for (OpNum = 0; ...)
}
this->SetMemOpsFound();
return;
} // end of SMPInstr::FindMemOps()
// Fix problems with the operands list in SMPcmd.
void SMPInstr::MDFixupIDAProOperandList(void) {
// IDA Pro often takes the instruction imul eax,0x80 and creates the following operands and features bits:
// Opnd[0] = EAX, both DEF and USE
// Opnd[1] = EAX, just USE
// Opnd[2] = immediate, neither DEF nor USE
// Our RTL building keys in on the DEF/USE bits in features, so this looks like imul eax,eax to us.
// We want it to look like:
// Opnd[0] = EAX, both DEF and USE
// Opnd[1] = immediate, just USE
if (NN_imul == this->SMPcmd.itype) {
op_t Opnd2 = this->SMPcmd.Operands[2];
if ((!(this->features & DefMacros[2]))
&& (!(this->features & UseMacros[2]))) {
if (o_void != Opnd2.type) {
// We have a third operand that is neither DEF nor USE.
SMP_msg("INFO: Fixing IMUL operand list at %lx\n", (unsigned long) this->GetAddr());
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
this->Dump();
// Two cases: Operands[0] == Operands[1], e.g. imul eax,Opnd2
// or else three-operand form: e.g. imul eax,ecx,Opnd2
// For the three-operand form, make sure Opnd0 is DEF only, others
// are USE only. For the two-operand form, make sure Opnd0 is DEF and USE,
// Opnd1 is current Opnd2 and is USE only.
op_t Opnd0 = this->SMPcmd.Operands[0];
op_t Opnd1 = this->SMPcmd.Operands[1];
if (IsEqOp(Opnd0, Opnd1)) {
// No need for three-operand form.
this->features |= DefMacros[0];
this->features |= UseMacros[0];
this->SMPcmd.Operands[1] = Opnd2;
this->SMPcmd.Operands[2] = InitOp;
}
else { // Must have three-operand form.
this->features |= UseMacros[2]; // set missing USE bit.
this->features &= (~UseMacros[0]); // Ensure no USE of Opnd0.
}
this->Dump();
}
}
}
return;
} // SMPInstr::MDFixupIDAProOperandList()
// Fill the Defs and Uses private data members.
void SMPInstr::BuildSMPDefUseLists(void) {
size_t OpNum;
bool DebugFlag = (0x8049b00 == this->GetAddr());
bool WidthDoubler = this->MDDoublesWidth();
this->Defs.clear();
this->Uses.clear();
// Start with the Defs.
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
if (this->features & DefMacros[OpNum]) { // DEF
op_t TempOp = this->SMPcmd.Operands[OpNum];
if (WidthDoubler) {
// Opcodes that sign-extend a byte to a word, or a word to a dword,
// have only one operand. It is implicit, and it is the shorter USE.
// That means the DEF will have the same width as the USE, e.g. if
// we are sign-extending AX to EAX, the USE and DEF both be AX without
// a special fix. We fix this problem with the DEF operand now.
if (TempOp.dtyp == dt_byte) {
TempOp.dtyp = dt_word;
TempOp.reg = MDCanonicalizeSubReg(TempOp.reg);
}
else if (TempOp.dtyp == dt_word) {
TempOp.dtyp = dt_dword;
TempOp.reg = MDCanonicalizeSubReg(TempOp.reg);
}
else if (TempOp.dtyp == dt_dword) {
TempOp.dtyp = dt_qword;
}
else {
SMP_msg("ERROR: Instruction operand %zu not 1,2, or 4 bytes at %lx dtyp: %d\n",
OpNum, (unsigned long) this->address, TempOp.dtyp);
}
}
if (MDKnownOperandType(TempOp)) {
clc5q
committed
SMP_msg("DEBUG: Setting DEF for: ");
clc5q
committed
SMP_msg("\n");
this->Defs.SetRef(TempOp);
}
} // end for (OpNum = 0; ...)
if (this->IsRegClearIdiom()) {
// Something like xor eax,eax clears eax but does not really
// use eax. It is the same as mov eax,0 and we don't want to
// extend the prior def-use chain for eax to this instruction
// by treating the instruction as xor eax,eax. Instead, we
// build the DEF and USE lists and RTL as if it were mov eax,0.
op_t ImmOp = InitOp;
ImmOp.value = 0;
ImmOp.dtyp = this->GetOperandDtypField();
this->Uses.SetRef(ImmOp, NUMERIC);
return;
}
// Now, do the Uses. Uses have special case operations, because
// any memory operand could have register uses in the addressing
// expression, and we must create Uses for those registers. For
// example: mov eax,[ebx + esi*2 + 044Ch]
// This is a two-operand instruction with one def: eax. But
// there are three uses: [ebx + esi*2 + 044Ch], ebx, and esi.
// The first use is an op_t of type o_phrase (memory phrase),
// which can be copied from cmd.Operands[1]. Likewise, we just
// copy cmd.Operands[0] into the defs list. However, we must create
// op_t types for register ebx and register esi and append them
// to the Uses list. This is handled by the machine dependent
// method MDFixupDefUseLists().
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
if (this->features & UseMacros[OpNum]) { // USE
op_t TempOp = this->SMPcmd.Operands[OpNum];
if (MDKnownOperandType(TempOp)) {
clc5q
committed
SMP_msg("DEBUG: Setting USE for: ");
clc5q
committed
SMP_msg("\n");
this->Uses.SetRef(TempOp);
}
} // end for (OpNum = 0; ...)
return;
} // end of SMPInstr::BuildSMPDefUseLists()
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
// Declare a branch/jump to be a tail call, clean up def/use lists.
void SMPInstr::SetTailCall(void) {
this->booleans1 |= INSTR_SET_TAIL_CALL;
if (this->type == COND_BRANCH) {
this->SetCondTailCall();
}
else {
this->ResetCondTailCall();
}
this->CallTarget = this->FarBranchTarget;
this->type = RETURN;
this->GetBlock()->SetReturns(true);
// We want to add the caller-saved registers to the USEs and DEFs lists
this->MDAddRegDef(R_ax, false);
this->MDAddRegDef(R_cx, false);
this->MDAddRegDef(R_dx, false);
this->MDAddRegUse(R_ax, false);
this->MDAddRegUse(R_cx, false);
this->MDAddRegUse(R_dx, false);
} // end of SMPInstr::SetTailCall()
// record original Lea instruction [pseudo-]memory operand.
void SMPInstr::SetLeaMemUseOp(op_t NewLeaOperand) {
if (NULL != this->BasicBlock) {
this->GetBlock()->GetFunc()->AddLeaOperand(this->GetAddr(), NewLeaOperand);
}
return;
}
// If DefReg is not already in the DEF list, add a DEF for it.
void SMPInstr::MDAddRegDef(ushort DefReg, bool Shown, SMPOperandType Type) {
op_t TempDef = InitOp;
TempDef.type = o_reg;
TempDef.reg = DefReg;
TempDef.dtyp = this->GetOperandDtypField();
if (Shown)
TempDef.set_showed();
else
TempDef.clr_showed();
return;
} // end of SMPInstr::MDAddRegDef()
// If UseReg is not already in the USE list, add a USE for it.
void SMPInstr::MDAddRegUse(ushort UseReg, bool Shown, SMPOperandType Type) {
op_t TempUse = InitOp;
TempUse.type = o_reg;
TempUse.reg = UseReg;
TempUse.dtyp = this->GetOperandDtypField();
if (Shown)
TempUse.set_showed();
else
TempUse.clr_showed();
return;
} // end of SMPInstr::MDAddRegUse()
// Perform machine dependent ad hoc fixes to the def and use lists.
// For example, some multiply and divide instructions in x86 implicitly
// use and/or define register EDX. For memory phrase examples, see comment
// in BuildSMPDefUseLists().
void SMPInstr::MDFixupDefUseLists(void) {
// First, handle the uses hidden in memory addressing modes. Note that we do not
// care whether we are dealing with a memory destination operand or source
// operand, because register USEs, not DEFs, happen within the addressing expressions.
size_t OpNum;
SMPOperandType RefType;
unsigned short opcode = this->SMPcmd.itype;
int BaseReg;
int IndexReg;
ushort ScaleFactor;
ea_t displacement;
bool UseFP = true;
bool HasIndexReg = false;
bool leaInst = (NN_lea == opcode);
bool DebugFlag = (this->GetAddr() == 0x8086177);
clc5q
committed
SMP_msg("DEBUG: Fixing up DEF-USE lists for debug location\n");
#if SMP_BASEREG_POINTER_TYPE
// Some instructions are analyzed outside of any function or block when fixing up
// the IDB, so we have to assume the block and func pointers might be NULL.
if ((NULL != this->BasicBlock) && (NULL != this->BasicBlock->GetFunc()))
UseFP = this->BasicBlock->GetFunc()->UsesFramePointer();
#endif
if (DebugFlag) {
clc5q
committed
SMP_msg("DEBUG: UseFP = %d\n", UseFP);
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
op_t Opnd = SMPcmd.Operands[OpNum];
if ((Opnd.type == o_phrase) || (Opnd.type == o_displ) || (Opnd.type == o_mem)) {
MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, displacement);
SingleAddressReg = ((0 == displacement)
&& ((R_none == BaseReg) || (R_none == IndexReg)));
if (R_none != IndexReg) {
op_t IndexOpnd = Opnd; // Init to current operand field values
IndexOpnd.type = o_reg; // Change type and reg fields
IndexOpnd.reg = (ushort) IndexReg;
IndexOpnd.hasSIB = 0;
IndexOpnd.set_showed();
if (0 == ScaleFactor)
this->Uses.SetRef(IndexOpnd);
else { // scaling == shift ==> NUMERIC
HasIndexReg = true;
this->Uses.SetRef(IndexOpnd, NUMERIC);
if (R_none != BaseReg) {
op_t BaseOpnd = Opnd; // Init to current operand field values
BaseOpnd.type = o_reg; // Change type and reg fields
BaseOpnd.reg = (ushort) BaseReg;
BaseOpnd.set_showed();
RefType = UNINIT;
#if SMP_BASEREG_POINTER_TYPE
// R_sp and R_bp will get type STACKPTR in SMPInstr::SetImmedTypes().
// Other registers used as base registers should get their USEs as
// base registers typed as POINTER, which might get refined later
// to STACKPTR, GLOBALPTR, HEAPPTR, etc.
// NOTE: the NN_lea opcode is often used without a true base register.
// E.g. lea eax,[eax+eax+5] is an x86 idiom for eax:=eax*2+5, which
// could not be done in one instruction without using the addressing
// modes of the machine to do the arithmetic. We don't want to set the
// USE of EAX to POINTER in this case, so we will conservatively skip
// all lea instructions here.
// We cannot be sure that a register is truly a base register unless
// there is also an index register. E.g. with reg+displacement, we
// could have memaddr+indexreg or basereg+offset, depending on what
// the displacement is. The exception is if there is no offset and only
// one addressing register, e.g. mov eax,[ebx].
if (BaseOpnd.is_reg(MD_STACK_POINTER_REG) || (UseFP && BaseOpnd.is_reg(MD_FRAME_POINTER_REG))
|| leaInst || (!HasIndexReg && !SingleAddressReg)) {
#endif
this->Uses.SetRef(BaseOpnd, RefType);
} // end if R_none != BaseReg
} // end if (o_phrase or o_displ operand)
} // end for (all operands)
// The lea (load effective address) instruction looks as if it has
// a memory USE: lea ebx,[edx+esi]
// However, this instruction is really just: ebx := edx+esi
// Now that the above code has inserted the "addressing" registers
// into the USE list, we should remove the "memory USE".
if (leaInst) {
set<DefOrUse, LessDefUse>::iterator CurrUse;
for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) {
op_t UseOp = CurrUse->GetOp();
if ((o_mem <= UseOp.type) && (o_displ >= UseOp.type)) {
this->EraseUse(CurrUse);
this->USEMemOp = InitOp;
break;
}
}
}
// Next, handle repeat prefices in the instructions. The Intel REPE/REPZ prefix
// is just the text printed for SCAS/CMPS instructions that have a REP prefix.
// Only two distinct prefix codes are actually defined: REP and REPNE/REPNZ, and
// REPNE/REPNZ only applies to SCAS and CMPS instructions.
bool HasRepPrefix = (0 != (this->SMPcmd.auxpref & aux_rep));
bool HasRepnePrefix = (0 != (this->SMPcmd.auxpref & aux_repne));
if (HasRepPrefix && HasRepnePrefix)
SMP_msg("REP and REPNE both present at %lx %s\n", (unsigned long) this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr()));
if (HasRepPrefix || HasRepnePrefix) {
// All repeating instructions use ECX as the countdown register.
op_t BaseOpnd = InitOp;
BaseOpnd.type = o_reg; // Change type and reg fields
BaseOpnd.reg = R_cx;
BaseOpnd.clr_showed();
BaseOpnd.dtyp = this->GetOperandDtypField();
this->Defs.SetRef(BaseOpnd, NUMERIC);
this->Uses.SetRef(BaseOpnd, NUMERIC);
if ((opcode == NN_cmps) || (opcode == NN_scas) || (opcode == NN_movs) || (opcode == NN_stos)) {
// ESI and EDI are USEd and DEFed to point to source and dest strings for CMPS/MOVS.
// Only EDI is involved with SCAS/STOS.
op_t BaseOpnd = InitOp;
BaseOpnd.type = o_reg; // Change type and reg fields
BaseOpnd.clr_showed();
BaseOpnd.dtyp = this->GetOperandDtypField();
if ((opcode == NN_cmps) || (opcode == NN_movs)) {
BaseOpnd.reg = R_si;
this->Defs.SetRef(BaseOpnd, POINTER);
this->Uses.SetRef(BaseOpnd, POINTER);
}
BaseOpnd.reg = R_di;
this->Defs.SetRef(BaseOpnd, POINTER);
this->Uses.SetRef(BaseOpnd, POINTER);
else if ((NN_loopw <= opcode) && (NN_loopqne >= opcode)) {
op_t LoopCounterOp = InitOp;
LoopCounterOp.type = o_reg;
LoopCounterOp.reg = R_cx;
LoopCounterOp.dtyp = this->GetOperandDtypField();
this->Defs.SetRef(LoopCounterOp, NUMERIC);
this->Uses.SetRef(LoopCounterOp, NUMERIC);
}
// Now, handle special instruction categories that have implicit operands.
if (NN_cmpxchg == opcode) {
// x86 Compare and Exchange conditionally sets EAX. We must keep data flow analysis
// sound by declaring that EAX is always a DEF.
this->MDAddRegDef(R_ax, false);
} // end if NN_cmpxchg
else if (this->MDIsPopInstr() || this->MDIsPushInstr() || this->MDIsReturnInstr()) {
// IDA does not include the stack pointer in the DEFs or USEs.
this->MDAddRegDef(R_sp, false);
this->MDAddRegUse(R_sp, false);
// We always reference [esp+0] or [esp-4], so add it to the DEF or USE list.
op_t StackOp = InitOp;
StackOp.type = o_displ;
StackOp.reg = R_sp;
StackOp.dtyp = this->GetOperandDtypField();
if (this->MDIsPopInstr()) {
StackOp.addr = 0; // [ESP+0]
this->Uses.SetRef(StackOp); // USE
}
else {
StackOp.addr = (- ((ea_t) STARS_ISA_Bytewidth)); // [ESP-4]
this->Defs.SetRef(StackOp); // DEF
}
clc5q
committed
else if ((this->type == CALL) || (this->type == INDIR_CALL) || this->IsTailCall()) {
// We want to add the caller-saved registers to the USEs and DEFs lists
this->MDAddRegDef(R_ax, false);
this->MDAddRegDef(R_cx, false);
this->MDAddRegDef(R_dx, false);
this->MDAddRegUse(R_ax, false);
this->MDAddRegUse(R_cx, false);
this->MDAddRegUse(R_dx, false);
#if 1
if (this->MDIsInterruptCall()) {
#endif
this->MDAddRegDef(R_bx, false);
this->MDAddRegUse(R_bx, false);
this->MDAddRegDef(R_si, false);
this->MDAddRegUse(R_si, false);
}
#endif
else if (this->MDIsEnterInstr() || this->MDIsLeaveInstr()) {
// Entire function prologue or epilogue microcoded.
this->MDAddRegDef(MD_STACK_POINTER_REG, false);
this->MDAddRegUse(MD_STACK_POINTER_REG, false);
this->MDAddRegDef(MD_FRAME_POINTER_REG, false);
this->MDAddRegUse(MD_FRAME_POINTER_REG, false);
else if ((opcode == NN_maskmovq) || (opcode == NN_maskmovdqu)) {
else if (8 == this->GetOptType()) {
// This category implicitly writes to EDX:EAX.
this->MDAddRegDef(R_dx, false);
this->MDAddRegDef(R_ax, false);
} // end else if (8 == GetOptType)
else if (7 == this->GetOptType()) {
// Category 7 instructions sometimes write implicitly to EDX:EAX or DX:AX.
// DX is the same as EDX to IDA Pro (and SMP); ditto for EAX and AX.
// DIV, IDIV, and MUL all have hidden EAX or AX operands (hidden in the IDA Pro
// sense, because they are not displayed in the disassembly text). For example:
// mul ebx means EDX:EAX <-- EAX*EBX, and mul bx means DX:AX <-- AX*BX. If the
// source operand is only 8 bits wide, there is room to hold the result in AX
// without using DX: mul bl means AX <-- AL*BL.
// IMUL has forms with a hidden EAX or AX operand and forms with no implicit
// operands: imul ebx means EDX:EAX <-- EAX*EBX, but imul ebx,edx means that
// EBX*EDX gets truncated and the result placed in EBX (no hidden operands).
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
op_t TempUse = this->SMPcmd.Operands[OpNum];
if (!TempUse.showed()) { // hidden operand
if (TempUse.is_reg(R_ax)) { // not R_al, so it is not 8 bits
if ((NN_div == this->SMPcmd.itype) || (NN_idiv == this->SMPcmd.itype)) {
this->MDAddRegUse(R_dx, false);
}
this->MDAddRegDef(R_ax, false);
this->MDAddRegDef(R_dx, false);
}
}
}
} // end else if (7 == OptType)
#if 0
// The floating point instructions in type categories 14 and 15 often USE and DEF
// the floating point register stack, e.g. pushing a value onto that stack is a
// massive copy downward of stack locations. We don't really care about the USE of
// the stack if the value being pushed came from elsewhere than the stack. For example,
// an "fld" opcode pushes its source onto the stack. We build RTLs with a simple
// move structure, but the RTL building can be fooled by seeing two "source" operands
// in the USE list.
if ((14 == SMPTypeCategory[this->SMPcmd.itype])
|| (15 == SMPTypeCategory[this->SMPcmd.itype])) {
}
#endif
clc5q
committed
#if 0 // Not true for LOOP instructions that use only the ECX counter register.
if (this->type == COND_BRANCH) {
assert(SMPUsesFlags[opcode]);
clc5q
committed
#endif
// The return value register EAX is not quite like a caller-save or callee-save
// register (technically, it is caller-save). Within a callee, it might appear
// that EAX has become dead by the time a return instruction is reached, but
// the USE that would make it not dead is in the caller. To prevent type inference
// from mistakenly thinking that all USEs of EAX have been seen in the callee,
// we add EAX to the USE list for all return instructions, as well as for all
// tail calls, which are essentially returns in terms of data flow analysis.
// This USE of EAX will always be of type UNINIT unless its DEF has a known type
// that propagates to it. Thus, it will prevent an invalid back inference of the
// DEF type from "all" USE types that are visible in the callee; even if they
// were all NUMERIC, this return USE will be UNINIT and inhibit the invalid
// type inference. EAX could be loaded with a pointer from memory, for example,
// and USEd only in a comparison instruction, making it falsely appear to be
// a NUMERIC, without this extra USE at the return instruction.
// Because some of the library functions pass values around in EBX, EDI, etc.,
// we will add these general purpose registers to the USE list for returns
// in order to prevent erroneous analyses of dead registers or unused
// metadata.
if ((this->type == RETURN) || this->IsTailCall()) {
this->MDAddRegUse(R_ax, false);
this->MDAddRegUse(R_bx, false);
this->MDAddRegUse(R_cx, false);
this->MDAddRegUse(R_dx, false);
this->MDAddRegUse(MD_FRAME_POINTER_REG, false);
this->MDAddRegUse(R_si, false);
this->MDAddRegUse(R_di, false);
}
clc5q
committed
// Next, add the flags register to the DEFs and USEs for those instructions that
// are marked as defining or using flags.
if (!this->IsDefsFlags() && SMPDefsFlags[opcode]) {
this->MDAddRegDef(X86_FLAGS_REG, false);
this->SetDefsFlags();
if (!this->IsUsesFlags() && SMPUsesFlags[opcode]) {
this->MDAddRegUse(X86_FLAGS_REG, false);
this->SetUsesFlags();
}
if (this->IsNop()) {
// Clear the DEFs and USEs for no-ops.
// These include machine idioms for no-ops, e.g. mov esi,esi
// or xchg ax,ax or lea esi,[esi].
this->Defs.clear();
this->Uses.clear();
this->MoveSource = InitOp;
this->DEFMemOp = InitOp;
this->USEMemOp = InitOp;
this->OptType = 1;
}
#endif
clc5q
committed
SMP_msg("DEBUG after MDFixupDefUseLists:\n");
return;
} // end of SMPInstr::MDFixupDefUseLists()
// If we can definitely identify which part of the addressing expression
// used in MemOp is the POINTER type, and it is not a STACKPTR or GLOBALPTR
// immediate, set the USE type for that register to POINTER and return true.
// If we can find definite NUMERIC addressing registers that are not already
// typed as NUMERIC, set their USE types to NUMERIC and return true.
bool SMPInstr::MDFindPointerUse(op_t MemOp, bool UseFP) {
bool changed = false;
int BaseReg;
int IndexReg;
op_t BaseOp = InitOp;
op_t IndexOp = InitOp;
SMPOperandType BaseType = UNKNOWN;
SMPOperandType IndexType = UNKNOWN;
ushort ScaleFactor;
ea_t offset;
set<DefOrUse, LessDefUse>::iterator BaseIter;
set<DefOrUse, LessDefUse>::iterator IndexIter;
if (NN_lea == this->SMPcmd.itype)
return false; // lea instruction really has no memory operands
if (NN_fnop == this->SMPcmd.itype)
return false; // SSA marker instruction
MDExtractAddressFields(MemOp, BaseReg, IndexReg, ScaleFactor, offset);
if (R_none != IndexReg) {
IndexOp.type = o_reg;
IndexOp.reg = MDCanonicalizeSubReg((ushort) IndexReg);
IndexOp.dtyp = this->GetOperandDtypField(); // Canonical reg width
IndexIter = this->FindUse(IndexOp);
assert(IndexIter != this->GetLastUse());
IndexType = IndexIter->GetType();
}
if (R_none != BaseReg) {
BaseOp.type = o_reg;
BaseOp.reg = MDCanonicalizeSubReg((ushort) BaseReg);
BaseOp.dtyp = this->GetOperandDtypField(); // Canonical reg width