Newer
Older
} // end of SMPInstr::IsBranchToFarChunk()
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetUseSSA(op_t CurrOp, int SSASub) {
return this->Uses.SetSSANum(CurrOp, SSASub);
};
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefSSA(op_t CurrOp, int SSASub) {
return this->Defs.SetSSANum(CurrOp, SSASub);
};
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetUseType(op_t CurrOp, SMPOperandType CurrType) {
return this->Uses.SetType(CurrOp, CurrType, this);
};
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefType(op_t CurrOp, SMPOperandType CurrType) {
return this->Defs.SetType(CurrOp, CurrType, this);
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefMetadata(op_t CurrOp, SMPMetadataType Status) {
return this->Defs.SetMetadata(CurrOp, Status);
};
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefIndWrite(op_t CurrOp, bool IndWriteFlag) {
return this->Defs.SetIndWrite(CurrOp, IndWriteFlag);
};
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetUseNoTruncate(op_t CurrOp, bool NoTruncFlag) {
return this->Uses.SetNoTruncation(CurrOp, NoTruncFlag);
};
clc5q
committed
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefNoOverflow(op_t DefOp, bool NoOverflowFlag) {
return this->Defs.SetNoOverflow(DefOp, NoOverflowFlag);
};
clc5q
committed
// Set the DeadRegsBitmap entry for Regnum.
void SMPInstr::SetRegDead(size_t RegNum) {
this->DeadRegsBitmap.set(RegNum);
return;
}
// Analyze the instruction and its operands.
void SMPInstr::Analyze(void) {
bool DebugFlag = false;
if (0x8049b00 == this->address) {
// Setting up breakpoint line.
DebugFlag = true;
}
// Fill cmd structure with disassembly of instr
clc5q
committed
if (!SMPGetCmd(this->address, this->SMPcmd, this->features))
unsigned short opcode = this->SMPcmd.itype;
// Record what type of instruction this is, simplified for the needs
// of data flow and type analysis.
// Record optimization category.
this->OptType = OptCategory[opcode];
if ((NN_int == opcode) || (NN_into == opcode) || (NN_int3 == opcode)) {
this->SetInterrupt();
}
else {
this->ResetInterrupt();
}
// Fix the IDA Pro mistakes in the operand list.
this->MDFixupIDAProOperandList();
// See if instruction is an ASM idiom for clearing a register.
if ((NN_xor == opcode) || (NN_lea == opcode)) {
ushort FirstReg;
if (o_reg == this->SMPcmd.Operands[0].type) {
FirstReg = this->SMPcmd.Operands[0].reg;
op_t SecondOpnd = this->SMPcmd.Operands[1];
if (NN_xor == opcode) {
// Check for xor of reg with itself
if (SecondOpnd.is_reg(FirstReg)) {
this->SetRegClearIdiom();
}
}
else { // must be lea
// check for lea reg,[nobasereg+nonindexreg+0]
if ((SecondOpnd.type >= o_mem) && (SecondOpnd.type <= o_displ)) {
int BaseReg, IndexReg;
ushort ScaleFactor;
ea_t Offset;
MDExtractAddressFields(SecondOpnd, BaseReg, IndexReg, ScaleFactor, Offset);
if ((R_none == BaseReg) && (R_none == IndexReg) && (0 == Offset)) {
this->SetRegClearIdiom();
}
}
}
// See if instruction is simple nop or ASM idiom for nop.
if (this->MDIsNop()) {
this->SetNop();
}
// Build the DEF and USE lists for the instruction.
this->FindMemOps();
this->BuildSMPDefUseLists();
// Determine whether the instruction is a jump target by looking
// at its cross references and seeing if it has "TO" code xrefs.
clc5q
committed
SMP_xref_t xrefs;
for (bool ok = xrefs.SMP_first_to(this->address, XREF_FAR); ok; ok = xrefs.SMP_next_to()) {
if ((xrefs.GetFrom() != 0) && (xrefs.GetIscode())) {
this->SetJumpTarget();
break;
}
}
// If instruction is a call or indirect call, see if a call target has been recorded
// by IDA Pro.
if (this->GetDataFlowType() == INDIR_CALL) {
clc5q
committed
for (bool ok = xrefs.SMP_first_from(this->address, XREF_ALL);
ok;
clc5q
committed
ok = xrefs.SMP_next_from()) {
if ((xrefs.GetTo() != 0) && (xrefs.GetIscode())) {
// Found a code target, with its address in xrefs.to
clc5q
committed
if (xrefs.GetTo() == (this->address + this->GetCmd().size)) {
// A call instruction will have two targets: the fall through to the
// next instruction, and the called function. We want to find
// the called function.
continue;
}
// We found a target, not the fall-through.
clc5q
committed
this->CallTarget = xrefs.GetTo();
SMP_msg("Found indirect call target %lx at %lx\n",
(unsigned long) xrefs.GetTo(), (unsigned long) this->address);
break;
}
} // end for all code xrefs
if (BADADDR == this->CallTarget) {
SMP_msg("WARNING: Did not find indirect call target at %lx\n",
(unsigned long) this->address);
}
} // end if INDIR_CALL
else if (this->GetDataFlowType() == CALL) {
set<DefOrUse, LessDefUse>::iterator CurrUse;
for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) {
optype_t OpType = CurrUse->GetOp().type;
if ((OpType == o_near) || (OpType == o_far)) {
this->CallTarget = CurrUse->GetOp().addr;
}
}
if (BADADDR == this->CallTarget) {
SMP_msg("ERROR: Target not found for direct call at %lx\n", (unsigned long) this->address);
}
}
if (DebugFlag) {
SMP_msg("Analyzed debug instruction at %lx\n", (unsigned long) this->address);
}
return;
} // end of SMPInstr::Analyze()
// Analyze the floating point NOP marker instruction at the top of the function.
void SMPInstr::AnalyzeMarker(void) {
// Fill member variable SMPcmd structure with disassembly of instr
(void) memset(&(this->SMPcmd), 0, sizeof(this->SMPcmd));
this->SMPcmd.itype = NN_fnop;
this->SMPcmd.size = 1;
this->SMPcmd.ea = this->address;
// Set the instr disassembly text.
DisAsmText.SetMarkerInstText(this->GetAddr());
// Record what type of instruction this is, simplified for the needs
// of data flow and type analysis.
this->type = DFACategory[this->SMPcmd.itype];
// Record optimization category.
this->OptType = OptCategory[this->SMPcmd.itype];
return;
} // end of SMPInstr::AnalyzeMarker()
// Detect oddities of call instructions, such as pseudo-calls that are
// actually jumps within a function
void SMPInstr::AnalyzeCallInst(ea_t FirstFuncAddr, ea_t LastFuncAddr) {
if (BADADDR != this->CallTarget) {
if (this->CallTarget == FirstFuncAddr) {
this->SetDirectRecursiveCall();
}
else {
this->ResetDirectRecursiveCall();
if ((this->CallTarget > FirstFuncAddr)
&& (this->CallTarget < LastFuncAddr)) {
this->SetCallUsedAsJump();
this->type = JUMP;
}
else {
this->ResetCallUsedAsJump();
}
}
return;
clc5q
committed
} // end of SMPInstr::AnalyzeCallInst()
clc5q
committed
sval_t SMPInstr::AnalyzeStackPointerDelta(sval_t IncomingDelta, sval_t PreAllocDelta) {
uint16 InstType = this->SMPcmd.itype;
clc5q
committed
sval_t InstDelta = StackAlteration[InstType];
SMPitype FlowType = this->GetDataFlowType();
clc5q
committed
bool TailCall = this->IsTailCall();
clc5q
committed
if (this->IsCallUsedAsJump() || this->MDIsInterruptCall() || this->IsCondTailCall()) {
// Call is used within function as a jump. Happens when setting up
clc5q
committed
// thunk offsets, for example; OR, call is an interrupt call, in which
// the interrupt return cleans up the stack, leaving a delta of zero, but
// we do not have the system call code to analyze, OR, the call is a conditional
// jump to another function (conditional tail call), in which case the current
// function must have a return statement to fall into which will clean up the
// only thing left on the stack (the return address) and the conditional jump
// has no effect on the stack pointer.
; // leave InstDelta equal to negative or zero value from StackAlterationTable[]
}
else if (this->IsRecursiveCall() || TailCall) {
// We don't have the net stack delta for our own function yet, so we cannot
// look it up. We must assume that each call has no net effect on the stack delta.
// Alternatively, we could call this->GetBlock()->GetFunc()->GetStackDeltaForCallee() as below.
// Also, a tail call happens when the stack delta is down to zero, and the callee does not
// return to the caller, unlike the call cases below, so the callee's net stack delta is
// irrelevant to the caller.
InstDelta = 0;
}
else if (this->IsAllocaCall()) {
InstDelta = STARS_DEFAULT_ALLOCA_SIZE;
}
else if ((CALL == FlowType) || (INDIR_CALL == FlowType)) {
// A real call instruction, which pushes a return address on the stack,
// not a call used as a branch within the function. A return instruction
clc5q
committed
// will usually cancel out the stack push that is implicit in the call, which
// means that the function will have a net stack ptr delta of +4, which will
// cancel out the -4 value of the call instruction and set the delta to zero.
// However, this is not true in all cases, so we get the net stack ptr delta
// directly from the called function unless it is an unresolved indirect call,
// in which case we assume +4. !!!!****!!!! In the future, we could analyze
// the code around an unresolved indirect call to see if it seems to be
// removing items left on the stack by the callee.
clc5q
committed
// SPECIAL CASE: A jump used as a tail call will have a stack ptr effect that is equal
// to the net stack ptr effect of its target function, usually +4, whereas a jump
// would otherwise have a net stack ptr effect of 0.
clc5q
committed
ea_t CalledFuncAddr = this->GetCallTarget();
if ((BADADDR == CalledFuncAddr) || (0 == CalledFuncAddr)) {
InstDelta = 0;
}
else { // We have a call target
SMPFunction *CalleeFunc = this->GetBlock()->GetFunc()->GetProg()->FindFunction(CalledFuncAddr);
clc5q
committed
sval_t AdjustmentDelta;
if (CalleeFunc) {
if (!CalleeFunc->HasSTARSStackPtrAnalysisCompleted()) {
// Phase ordering issue in the call graph. A mutually recursive clique of functions has to
// be broken by starting processing somewhere, and all callees cannot be processed before
// we start. If we got our stack down to zero and then made a tail call, then we have to assume
// that the callee will use our return address, so we assume the default stack delta. If not a
// tail call, we ask our function to see if the information is available from IDA Pro analyses,
// or if it can be inferred from the fact that the call is followed by a stack adjustment.
if (TailCall) {
InstDelta = CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA;
SMP_msg("WARNING: Callee stack ptr analysis not yet performed at tail call inst %lx ; normal delta assumed\n",
(unsigned long) this->GetAddr());
clc5q
committed
}
else {
AdjustmentDelta = this->GetBlock()->GetFunc()->GetStackDeltaForCallee(CalledFuncAddr);
clc5q
committed
InstDelta += AdjustmentDelta;
SMP_msg("WARNING: Callee stack ptr analysis not yet performed at inst %lx ; stack adjustment used\n",
(unsigned long) this->GetAddr());
clc5q
committed
}
}
else if (!CalleeFunc->StackPtrAnalysisSucceeded()) {
// Callee analyses were done, but they failed. In order to proceed, we have to assume
// the same situation as we just did in the case where analyses have not been performed.
SMP_msg("WARNING: Callee stack ptr analysis failed at inst %lx ; normal delta assumed\n",
(unsigned long) this->GetAddr());
clc5q
committed
if (TailCall) {
InstDelta = CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA;
}
else {
AdjustmentDelta = this->GetBlock()->GetFunc()->GetStackDeltaForCallee(this->GetAddr());
InstDelta += AdjustmentDelta;
}
}
else {
// Callee's analyses have succeeded, so get delta straight from callee.
InstDelta += CalleeFunc->GetNetStackPtrDelta();
}
}
else {
#if 0
SMP_msg("ERROR: SMPInstr::AnalyzeStackPointerDelta failed to find func at %lx in inst %lx\n",
(unsigned long) CalledFuncAddr, (unsigned long) this->GetAddr());
clc5q
committed
InstDelta = SMP_STACK_DELTA_ERROR_CODE;
#else
SMP_msg("ERROR: SMPInstr::AnalyzeStackPointerDelta failed to find func at %lx in inst %lx\n",
(unsigned long) CalledFuncAddr, (unsigned long) this->GetAddr());
clc5q
committed
if (TailCall) {
InstDelta = CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA;
}
else {
InstDelta = 0;
}
#endif
}
}
} // end CALL or INDIR_CALL or TailCall case
else if (1 == InstDelta) {
// value of 1 is trigger to investigate the RTL for the
// true value, which cannot be found simply by table lookup
// In the special case of an x86 LEAVE instruction, the effect
// on the stack pointer is to deallocate the local frame size,
// plus pop the saved frame pointer into EBP. Helper functions
// need to know whether to look for this special case.
bool IsLeaveInstr = this->MDIsLeaveInstr();
clc5q
committed
InstDelta = this->RTL.TotalStackPointerAlteration(IsLeaveInstr, IncomingDelta, PreAllocDelta);
clc5q
committed
return InstDelta;
} // end of SMPInstr::AnalyzeStackPointerDelta()
clc5q
committed
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
// Total the stack adjustment bytes, as happens after a call to a function that leaves
// outgoing args on the stack or swallows incoming args from the stack.
sval_t SMPInstr::FindStackAdjustment(void) {
uint16 InstType = this->SMPcmd.itype;
sval_t InstDelta = StackAlteration[InstType];
if (1 == InstDelta) {
// value of 1 is trigger to investigate the RTL for the
// true value, which cannot be found simply by table lookup
// In the special case of an x86 LEAVE instruction, the effect
// on the stack pointer is to deallocate the local frame size,
// plus pop the saved frame pointer into EBP. Helper functions
// need to know whether to look for this special case.
bool IsLeaveInstr = this->MDIsLeaveInstr();
if (!IsLeaveInstr) {
InstDelta = this->RTL.TotalStackPointerAlteration(IsLeaveInstr, 0, 0);
}
else {
InstDelta = 0; // LEAVE is not the kind of instr we are looking for
}
}
return InstDelta;
} // end of SMPInstr::FindStackAdjustment()
// Normalize stack operands to have a displacement from the stack pointer value on entry to the function,
// rather than the current stack pointer value.
// UseFP indicates we are using a frame pointer in the function.
// FPDelta holds the stack delta (normalized) for the frame pointer.
// DefOp comes in with the operand to be normalized, and contains the normalized operand upon return.
// Return true if operand is a register or stack location, false otherwise (true => include in data flow analysis sets and SSA.)
bool SMPInstr::MDComputeNormalizedDataFlowOp(bool UseFP, sval_t FPDelta, op_t &DefOp) {
if (o_reg == DefOp.type) {
return true;
}
else if (MDIsStackAccessOpnd(DefOp, UseFP)) {
op_t OldOp = DefOp;
int SignedOffset = (int) DefOp.addr;
sval_t NormalizedDelta;
if (DefOp.hasSIB) {
// We must deal with a potentially indexed memory expression. We want to
// normalize two different cases here: e.g. [esp+ebx+4] will become [esp+ebx-24]
// and [ebp+ebx-8] will become [esp+ebx-12] after normalization. A wrinkle
// on the second case is when the base register and index register are swapped
// in the SIB byte, and we make [ebx+ebp-4] into [esp+ebx-12], which involves
// correcting the index/base reg order in the SIB, because an index reg of ESP
// is the SIB encoding for "no index register" and we cannot leave it like that.
int BaseReg = MD_STARS_sib_base(DefOp);
int IndexReg = (int) MD_STARS_sib_index(DefOp);
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
if (X86_STACK_POINTER_REG == IndexReg) // signifies no index register
IndexReg = R_none;
if (BaseReg == X86_STACK_POINTER_REG) {
// We probably have an indexed ESP-relative operand.
// We leave the sib byte alone and normalize the offset.
NormalizedDelta = this->GetStackPtrOffset() + (sval_t) SignedOffset;
}
else {
// Must be EBP-relative.
NormalizedDelta = FPDelta + (sval_t) SignedOffset;
// Unfortunately, when we are dealing with a SIB byte in the opcode, we cannot
// just say DefOp.reg = MD_STACK_POINTER_REG to convert from the frame pointer
// to the stack pointer. Instead, we have to get into the nasty machine code
// level and change the SIB bits that specify either the base register or the
// index register, whichever one is the frame pointer.
if (BaseReg == X86_FRAME_POINTER_REG) {
// The three least significant bits of the SIB byte are the base register.
// They must contain a 5, which is the x86 value for register EBP, and we
// want to convert it to a 4, denoting register ESP. We can just zero out
// the least significant bit to accomplish that.
DefOp.sib &= 0xfe;
}
else {
// We sometimes have an instruction in which the frame pointer is used as
// the "index" register in the SIB byte, and the true index register is
// in the "base" register position in the SIB byte.
assert(IndexReg == X86_FRAME_POINTER_REG);
// The true index reg is in the lowest three bits, while the next three
// bits must contain a 5 (register EBP) and we want to make them a 4 (ESP).
// We must swap base and index regs as we normalize (see explanation above).
char SIBtemp = DefOp.sib;
char SIBindex = SIBtemp & 0x38;
char SIBbase = SIBtemp & 0x07;
assert ((SIBindex >> 3) == 5); // must be EBP
SIBtemp &= 0xa0; // zero out lower 6 bits; upper 2 bits are scale factor - leave them alone
SIBtemp &= (SIBbase << 3); // make old base reg (e.g. ebx) into a proper index reg
SIBtemp |= 0x04; // make the new base reg be 4 (reg ESP)
DefOp.sib = SIBtemp;
}
this->SetFPNormalizedToSP();
// Add the stack pointer to the USE set for the instruction.
this->MDAddRegUse(X86_STACK_POINTER_REG, false);
}
}
else if (DefOp.reg == MD_FRAME_POINTER_REG) {
// If FPDelta is -4 and SignedOffset is +8, then we have [ebp+8] as DefOp, and this
// is equivalent to [esp+4] where esp has its entry value, i.e. this would be the first incoming
// argument. If SignedOffset is -12, we have [ebp-12] as DefOp, and this is [esp-16] when
// normalized to the entry point value of the stack pointer. In both cases, we can see that the
// normalized stack delta is just FPDelta+SignedOffset.
NormalizedDelta = FPDelta + (sval_t) SignedOffset;
// Now, we simply convert the memory operand from EBP to ESP and replace the SignedOffset with the
// NormalizedDelta just computed.
DefOp.reg = MD_STACK_POINTER_REG;
this->SetFPNormalizedToSP();
// Add the stack pointer to the USE set for the instruction.
this->MDAddRegUse(DefOp.reg, false);
}
else {
assert(DefOp.reg == MD_STACK_POINTER_REG);
// We only need to adjust the offset to reflect the change in the stack pointer since the function
// was entered, e.g. [esp+4] is normalized to [esp-28] if the current esp value is 32 less than it
// was upon function entry. We get the value "-32" in that case from a member variable.
NormalizedDelta = this->GetStackPtrOffset() + (sval_t) SignedOffset;
}
DefOp.addr = (ea_t) NormalizedDelta; // common to frame and stack pointer cases
if ((o_phrase == DefOp.type) && (0 != NormalizedDelta)) {
// mov [esp],eax has an [esp] operand of type o_phrase, because there is no
// displacement field. After normalization, it will have a displacement field, so
// it has become an operand like [esp-32] and is now type o_displ.
DefOp.type = o_displ;
}
this->GetBlock()->GetFunc()->AddNormalizedStackOperand(OldOp, this->GetAddr(), DefOp);
return true;
}
else {
return false;
}
} // end of SMPInstr::MDComputeNormalizedDataFlowOp()
// Normalize stack operands in all DEFs and USEs to have stack deltas relative to the function entry stack pointer.
// Return true if any stack DEFs or USEs were normalized.
bool SMPInstr::MDNormalizeStackOps(bool UseFP, sval_t FPDelta, bool Recomputing, sval_t DeltaIncrement) {
bool StackOpFound = false;
bool OpNormalized;
bool UniqueDEFMemOp = true; // Does DEFMemOp not match any DEFs?
bool UniqueUSEMemOp = true; // Does USEMemOp not match any USEs?
bool UniqueLeaUSEMemOp = true; // Does LeaUSEMemOp not match any USEs?
bool UniqueMoveSource = true; // Does MoveSource not match any USEs?
set<DefOrUse, LessDefUse>::iterator DefIter, UseIter;
list<pair<set<DefOrUse, LessDefUse>::iterator, op_t> > DefWorkList, UseWorkList;
list<pair<set<DefOrUse, LessDefUse>::iterator, op_t> >::iterator WorkIter;
op_t OldOp, NewOp;
// Find all the DEFs that need changing, and put their iterators into a list.
// Normalizing stack ops could change their sort order, hence we could skip over
// a DEF in the set by erasing a DEF and reinserting a normalized DEF, so we
// make all the changes after we iterate through the DEFS set.
for (DefIter = this->GetFirstDef(); DefIter != this->GetLastDef(); ++DefIter) {
OldOp = DefIter->GetOp();
NewOp = OldOp;
if ((o_reg != NewOp.type) && (o_imm != NewOp.type)) {
if (Recomputing) {
OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, true, NewOp);
}
else {
OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, NewOp);
}
if (OpNormalized) {
StackOpFound = true;
if (IsEqOp(OldOp, this->DEFMemOp)) {
UniqueDEFMemOp = false;
}
pair<set<DefOrUse, LessDefUse>::iterator, op_t> DefItem(DefIter, NewOp);
DefWorkList.push_back(DefItem);
}
}
}
// Now go through the DEF worklist and change stack operands to normalized stack operands.
for (WorkIter = DefWorkList.begin(); WorkIter != DefWorkList.end(); ++WorkIter) {
DefIter = WorkIter->first;
DefIter = this->Defs.SetOp(DefIter, WorkIter->second);
}
// Normalize op_t private data member DEFs.
if (Recomputing) {
OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, UniqueDEFMemOp, this->DEFMemOp);
}
else {
OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, this->DEFMemOp);
}
// Find all USEs that need changing, and build a second work list.
for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) {
OldOp = UseIter->GetOp();
NewOp = OldOp;
if ((o_reg != NewOp.type) && (o_imm != NewOp.type)) {
if (Recomputing) {
OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, true, NewOp);
}
else {
OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, NewOp);
}
if (OpNormalized) {
StackOpFound = true;
if (IsEqOp(OldOp, this->USEMemOp)) {
UniqueUSEMemOp = false;
}
if (IsEqOp(OldOp, this->GetLeaMemUseOp())) {
UniqueLeaUSEMemOp = false;
}
if (IsEqOp(OldOp, this->MoveSource)) {
UniqueMoveSource = false;
}
pair<set<DefOrUse, LessDefUse>::iterator, op_t> UseItem(UseIter, NewOp);
UseWorkList.push_back(UseItem);
}
}
}
// Now go through the USE worklist and change stack operands to normalized stack operands.
for (WorkIter = UseWorkList.begin(); WorkIter != UseWorkList.end(); ++WorkIter) {
UseIter = WorkIter->first;
UseIter = this->Uses.SetOp(UseIter, WorkIter->second);
}
// Normalize op_t private data member USEs.
op_t TempLeaMemOp = this->GetLeaMemUseOp();
if (Recomputing) {
OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, UniqueUSEMemOp, this->USEMemOp);
OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, UniqueLeaUSEMemOp, TempLeaMemOp);
if (OpNormalized)
this->SetLeaMemUseOp(TempLeaMemOp);
OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, UniqueMoveSource, this->MoveSource);
else {
OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, this->USEMemOp);
OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, TempLeaMemOp);
if (OpNormalized)
this->SetLeaMemUseOp(TempLeaMemOp);
OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, this->MoveSource);
}
// Declare victory.
this->SetDefsNormalized();
return StackOpFound;
} // end of SMPInstr::MDNormalizeStackOps()
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
// Renormalize SP-relative stack operands in functions that call alloca() by adding DeltaIncrement to their stack displacements.
// DefOp comes in with the operand to be renormalized, and contains the normalized operand upon return.
// Return true if operand is a register or stack location, false otherwise (true => include in data flow analysis sets and SSA.)
bool SMPInstr::MDRecomputeNormalizedDataFlowOp(sval_t DeltaIncrement, bool UpdateMaps, op_t &DefOp) {
op_t OldOp = DefOp;
if (o_reg == DefOp.type) {
return true;
}
else if (MDIsStackAccessOpnd(DefOp, this->GetBlock()->GetFunc()->UsesFramePointer())) {
if (this->HasFPNormalizedToSP()) {
// FP-relative operands do no change in alloca() functions when the alloca()
// causes the SP to change.
return true;
}
// The remaining cases are simple. The ESP-relative displacement is incremented by
// DeltaIncrement, regardless of the presence of a SIB byte.
int SignedOffset = (int) DefOp.addr;
sval_t NormalizedDelta = DeltaIncrement + (sval_t) SignedOffset;
DefOp.addr = (ea_t) NormalizedDelta;
if ((o_phrase == DefOp.type) && (0 != NormalizedDelta)) {
// mov [esp],eax has an [esp] operand of type o_phrase, because there is no
// displacement field. After normalization, it will have a displacement field, so
// it has become an operand like [esp-32] and is now type o_displ.
DefOp.type = o_displ;
}
if (UpdateMaps) { // We don't update maps for duplicate entries, e.g. USEMemOp, DEFMemOp, MoveSource
this->GetBlock()->GetFunc()->AddNormalizedStackOperand(OldOp, this->GetAddr(), DefOp);
}
return true;
}
else {
return false;
}
} // end of SMPInstr::MDRecomputeNormalizedDataFlowOp()
// If NormOp is a normalized stack memory operand, unnormalize it.
void SMPInstr::MDGetUnnormalizedOp(op_t &NormOp) {
sval_t SignedOffset;
bool UseFP = this->GetBlock()->GetFunc()->UsesFramePointer();
if (this->AreDefsNormalized() && MDIsStackAccessOpnd(NormOp, UseFP)) {
if (this->HasFPNormalizedToSP()) {
// Need to convert NormOp back to frame-pointer-relative address.
if (NormOp.hasSIB) {
// Convert base register from stack pointer back to frame pointer.
NormOp.sib |= 0x01;
}
else {
NormOp.reg = MD_FRAME_POINTER_REG;
}
SignedOffset = (sval_t) NormOp.addr;
SignedOffset -= this->GetBlock()->GetFunc()->GetFramePtrStackDelta();
}
else {
// NormOp should remain stack-pointer-relative address, but it
// should be a positive offset from the current stack pointer instead
// of a negative offset from the entry point of the function.
SignedOffset = (sval_t) NormOp.addr;
SignedOffset -= this->GetStackPtrOffset();
assert((0 <= SignedOffset) || this->GetBlock()->GetFunc()->DoesStackFrameExtendPastStackTop());
}
NormOp.addr = (ea_t) SignedOffset;
}
return;
} // end of SMPInstr::MDGetUnnormalizedOp()
// Find USE-not-DEF operand that is not the flags register.
op_t SMPInstr::GetSourceOnlyOperand(void) {
size_t OpNum;
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
if (this->features & DefMacros[OpNum]) { // DEF
;
}
else if (this->features & UseMacros[OpNum]) { // USE
op_t CurrOp = this->SMPcmd.Operands[OpNum];
if (!(CurrOp.is_reg(X86_FLAGS_REG))) {
return CurrOp;
}
}
}
// It is expected that increment, decrement, and floating point stores
// will not have a USE-only operand. Increment and decrement have an
// operand that is both USEd and DEFed, while the floating point stack
// registers are implicit in most floating point opcodes. Also, exchange
// and exchange-and-add instructions have multiple DEF-and-USE operands.
int TypeGroup = SMPTypeCategory[this->SMPcmd.itype];
if ((TypeGroup != 2) && (TypeGroup != 4) && (TypeGroup != 9) && (TypeGroup != 12)
&& (TypeGroup != 13)) {
SMP_msg("ERROR: Could not find source only operand at %lx in %s\n",
(unsigned long) this->address, DisAsmText.GetDisAsm(this->GetAddr()));
return InitOp;
} // end of SMPInstr::GetSourceOnlyOperand()
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
// Should apparent memory operands be ignored? e.g. lea opcode on x86
bool SMPInstr::MDIgnoreMemOps(void) {
bool leaInst = (NN_lea == this->SMPcmd.itype);
return leaInst;
}
// Find memory DEFs and USEs, store in DEFMemOp and USEMemOp
void SMPInstr::FindMemOps(void) {
size_t OpNum;
if (!(this->MDIgnoreMemOps())) {
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
op_t TempOp = this->SMPcmd.Operands[OpNum];
if ((TempOp.type >= o_mem) && (TempOp.type <= o_displ)) { // memory
if (this->features & DefMacros[OpNum]) { // DEF
if (this->DEFMemOp.type == o_void) { // only save first mem DEF
this->DEFMemOp = TempOp;
}
}
if (this->features & UseMacros[OpNum]) { // USE
if (this->USEMemOp.type == o_void) { // only save first mem USE
this->USEMemOp = TempOp;
}
}
}
} // end for (OpNum = 0; ...)
}
this->SetMemOpsFound();
return;
} // end of SMPInstr::FindMemOps()
// Fix problems with the operands list in SMPcmd.
void SMPInstr::MDFixupIDAProOperandList(void) {
// IDA Pro often takes the instruction imul eax,0x80 and creates the following operands and features bits:
// Opnd[0] = EAX, both DEF and USE
// Opnd[1] = EAX, just USE
// Opnd[2] = immediate, neither DEF nor USE
// Our RTL building keys in on the DEF/USE bits in features, so this looks like imul eax,eax to us.
// We want it to look like:
// Opnd[0] = EAX, both DEF and USE
// Opnd[1] = immediate, just USE
if (NN_imul == this->SMPcmd.itype) {
op_t Opnd2 = this->SMPcmd.Operands[2];
if ((!(this->features & DefMacros[2]))
&& (!(this->features & UseMacros[2]))) {
if (o_void != Opnd2.type) {
// We have a third operand that is neither DEF nor USE.
SMP_msg("INFO: Fixing IMUL operand list at %lx\n", (unsigned long) this->GetAddr());
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
this->Dump();
// Two cases: Operands[0] == Operands[1], e.g. imul eax,Opnd2
// or else three-operand form: e.g. imul eax,ecx,Opnd2
// For the three-operand form, make sure Opnd0 is DEF only, others
// are USE only. For the two-operand form, make sure Opnd0 is DEF and USE,
// Opnd1 is current Opnd2 and is USE only.
op_t Opnd0 = this->SMPcmd.Operands[0];
op_t Opnd1 = this->SMPcmd.Operands[1];
if (IsEqOp(Opnd0, Opnd1)) {
// No need for three-operand form.
this->features |= DefMacros[0];
this->features |= UseMacros[0];
this->SMPcmd.Operands[1] = Opnd2;
this->SMPcmd.Operands[2] = InitOp;
}
else { // Must have three-operand form.
this->features |= UseMacros[2]; // set missing USE bit.
this->features &= (~UseMacros[0]); // Ensure no USE of Opnd0.
}
this->Dump();
}
}
}
return;
} // SMPInstr::MDFixupIDAProOperandList()
// Fill the Defs and Uses private data members.
void SMPInstr::BuildSMPDefUseLists(void) {
size_t OpNum;
bool DebugFlag = (0x8049b00 == this->GetAddr());
bool WidthDoubler = this->MDDoublesWidth();
this->Defs.clear();
this->Uses.clear();
// Start with the Defs.
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
if (this->features & DefMacros[OpNum]) { // DEF
op_t TempOp = this->SMPcmd.Operands[OpNum];
if (WidthDoubler) {
// Opcodes that sign-extend a byte to a word, or a word to a dword,
// have only one operand. It is implicit, and it is the shorter USE.
// That means the DEF will have the same width as the USE, e.g. if
// we are sign-extending AX to EAX, the USE and DEF both be AX without
// a special fix. We fix this problem with the DEF operand now.
if (TempOp.dtyp == dt_byte) {
TempOp.dtyp = dt_word;
TempOp.reg = MDCanonicalizeSubReg(TempOp.reg);
}
else if (TempOp.dtyp == dt_word) {
TempOp.dtyp = dt_dword;
TempOp.reg = MDCanonicalizeSubReg(TempOp.reg);
}
else if (TempOp.dtyp == dt_dword) {
TempOp.dtyp = dt_qword;
}
else {
SMP_msg("ERROR: Instruction operand %zu not 1,2, or 4 bytes at %lx dtyp: %d\n",
OpNum, (unsigned long) this->address, TempOp.dtyp);
}
}
if (MDKnownOperandType(TempOp)) {
clc5q
committed
SMP_msg("DEBUG: Setting DEF for: ");
clc5q
committed
SMP_msg("\n");
this->Defs.SetRef(TempOp);
}
} // end for (OpNum = 0; ...)
if (this->IsRegClearIdiom()) {
// Something like xor eax,eax clears eax but does not really
// use eax. It is the same as mov eax,0 and we don't want to
// extend the prior def-use chain for eax to this instruction
// by treating the instruction as xor eax,eax. Instead, we
// build the DEF and USE lists and RTL as if it were mov eax,0.
op_t ImmOp = InitOp;
ImmOp.value = 0;
ImmOp.dtyp = this->GetOperandDtypField();
this->Uses.SetRef(ImmOp, NUMERIC);
return;
}
// Now, do the Uses. Uses have special case operations, because
// any memory operand could have register uses in the addressing
// expression, and we must create Uses for those registers. For
// example: mov eax,[ebx + esi*2 + 044Ch]
// This is a two-operand instruction with one def: eax. But
// there are three uses: [ebx + esi*2 + 044Ch], ebx, and esi.
// The first use is an op_t of type o_phrase (memory phrase),
// which can be copied from cmd.Operands[1]. Likewise, we just
// copy cmd.Operands[0] into the defs list. However, we must create
// op_t types for register ebx and register esi and append them
// to the Uses list. This is handled by the machine dependent
// method MDFixupDefUseLists().
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
if (this->features & UseMacros[OpNum]) { // USE
op_t TempOp = this->SMPcmd.Operands[OpNum];
if (MDKnownOperandType(TempOp)) {
clc5q
committed
SMP_msg("DEBUG: Setting USE for: ");
clc5q
committed
SMP_msg("\n");
this->Uses.SetRef(TempOp);
}
} // end for (OpNum = 0; ...)
return;
} // end of SMPInstr::BuildSMPDefUseLists()
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
// Declare a branch/jump to be a tail call, clean up def/use lists.
void SMPInstr::SetTailCall(void) {
this->booleans1 |= INSTR_SET_TAIL_CALL;
if (this->type == COND_BRANCH) {
this->SetCondTailCall();
}
else {
this->ResetCondTailCall();
}
this->CallTarget = this->FarBranchTarget;
this->type = RETURN;
this->GetBlock()->SetReturns(true);
// We want to add the caller-saved registers to the USEs and DEFs lists
this->MDAddRegDef(R_ax, false);
this->MDAddRegDef(R_cx, false);
this->MDAddRegDef(R_dx, false);
this->MDAddRegUse(R_ax, false);
this->MDAddRegUse(R_cx, false);
this->MDAddRegUse(R_dx, false);
} // end of SMPInstr::SetTailCall()
// record original Lea instruction [pseudo-]memory operand.
void SMPInstr::SetLeaMemUseOp(op_t NewLeaOperand) {
if (NULL != this->BasicBlock) {
this->GetBlock()->GetFunc()->AddLeaOperand(this->GetAddr(), NewLeaOperand);
}
return;
}
// If DefReg is not already in the DEF list, add a DEF for it.
void SMPInstr::MDAddRegDef(ushort DefReg, bool Shown, SMPOperandType Type) {
op_t TempDef = InitOp;
TempDef.type = o_reg;
TempDef.reg = DefReg;
TempDef.dtyp = this->GetOperandDtypField();
if (Shown)
TempDef.set_showed();
else
TempDef.clr_showed();
return;
} // end of SMPInstr::MDAddRegDef()
// If UseReg is not already in the USE list, add a USE for it.
void SMPInstr::MDAddRegUse(ushort UseReg, bool Shown, SMPOperandType Type) {
op_t TempUse = InitOp;
TempUse.type = o_reg;
TempUse.reg = UseReg;
TempUse.dtyp = this->GetOperandDtypField();
if (Shown)
TempUse.set_showed();
else
TempUse.clr_showed();
return;
} // end of SMPInstr::MDAddRegUse()
// Perform machine dependent ad hoc fixes to the def and use lists.
// For example, some multiply and divide instructions in x86 implicitly
// use and/or define register EDX. For memory phrase examples, see comment
// in BuildSMPDefUseLists().
void SMPInstr::MDFixupDefUseLists(void) {
// First, handle the uses hidden in memory addressing modes. Note that we do not
// care whether we are dealing with a memory destination operand or source
// operand, because register USEs, not DEFs, happen within the addressing expressions.
size_t OpNum;
SMPOperandType RefType;
unsigned short opcode = this->SMPcmd.itype;
int BaseReg;
int IndexReg;
ushort ScaleFactor;
ea_t displacement;
bool UseFP = true;
bool HasIndexReg = false;
bool leaInst = (NN_lea == opcode);
bool DebugFlag = (this->GetAddr() == 0x8086177);
clc5q
committed
SMP_msg("DEBUG: Fixing up DEF-USE lists for debug location\n");
#if SMP_BASEREG_POINTER_TYPE
// Some instructions are analyzed outside of any function or block when fixing up
// the IDB, so we have to assume the block and func pointers might be NULL.
if ((NULL != this->BasicBlock) && (NULL != this->BasicBlock->GetFunc()))
UseFP = this->BasicBlock->GetFunc()->UsesFramePointer();
#endif
if (DebugFlag) {
clc5q
committed
SMP_msg("DEBUG: UseFP = %d\n", UseFP);
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
op_t Opnd = SMPcmd.Operands[OpNum];
if ((Opnd.type == o_phrase) || (Opnd.type == o_displ) || (Opnd.type == o_mem)) {
MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, displacement);
SingleAddressReg = ((0 == displacement)
&& ((R_none == BaseReg) || (R_none == IndexReg)));
if (R_none != IndexReg) {
op_t IndexOpnd = Opnd; // Init to current operand field values
clc5q
committed
IndexOpnd.dtyp = STARS_ISA_dtyp; // Full width for addressing regs
IndexOpnd.type = o_reg; // Change type and reg fields
IndexOpnd.reg = (ushort) IndexReg;
IndexOpnd.hasSIB = 0;
IndexOpnd.set_showed();
if (0 == ScaleFactor)
this->Uses.SetRef(IndexOpnd);
else { // scaling == shift ==> NUMERIC
HasIndexReg = true;
this->Uses.SetRef(IndexOpnd, NUMERIC);
if (R_none != BaseReg) {
op_t BaseOpnd = Opnd; // Init to current operand field values
clc5q
committed
BaseOpnd.dtyp = STARS_ISA_dtyp; // Full width for addressing regs
BaseOpnd.type = o_reg; // Change type and reg fields
BaseOpnd.reg = (ushort) BaseReg;
BaseOpnd.set_showed();
RefType = UNINIT;
#if SMP_BASEREG_POINTER_TYPE
// R_sp and R_bp will get type STACKPTR in SMPInstr::SetImmedTypes().
// Other registers used as base registers should get their USEs as
// base registers typed as POINTER, which might get refined later
// to STACKPTR, GLOBALPTR, HEAPPTR, etc.
// NOTE: the NN_lea opcode is often used without a true base register.
// E.g. lea eax,[eax+eax+5] is an x86 idiom for eax:=eax*2+5, which
// could not be done in one instruction without using the addressing
// modes of the machine to do the arithmetic. We don't want to set the
// USE of EAX to POINTER in this case, so we will conservatively skip
// all lea instructions here.
// We cannot be sure that a register is truly a base register unless
// there is also an index register. E.g. with reg+displacement, we
// could have memaddr+indexreg or basereg+offset, depending on what
// the displacement is. The exception is if there is no offset and only
// one addressing register, e.g. mov eax,[ebx].
if (BaseOpnd.is_reg(MD_STACK_POINTER_REG) || (UseFP && BaseOpnd.is_reg(MD_FRAME_POINTER_REG))
|| leaInst || (!HasIndexReg && !SingleAddressReg)) {
#endif
this->Uses.SetRef(BaseOpnd, RefType);
} // end if R_none != BaseReg
} // end if (o_phrase or o_displ operand)
} // end for (all operands)
// The lea (load effective address) instruction looks as if it has
// a memory USE: lea ebx,[edx+esi]
// However, this instruction is really just: ebx := edx+esi
// Now that the above code has inserted the "addressing" registers
// into the USE list, we should remove the "memory USE".
if (leaInst) {
set<DefOrUse, LessDefUse>::iterator CurrUse;
for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) {
op_t UseOp = CurrUse->GetOp();
if ((o_mem <= UseOp.type) && (o_displ >= UseOp.type)) {
this->EraseUse(CurrUse);
this->USEMemOp = InitOp;
break;
}
}
}
// Next, handle repeat prefices in the instructions. The Intel REPE/REPZ prefix
// is just the text printed for SCAS/CMPS instructions that have a REP prefix.
// Only two distinct prefix codes are actually defined: REP and REPNE/REPNZ, and
// REPNE/REPNZ only applies to SCAS and CMPS instructions.
bool HasRepPrefix = (0 != (this->SMPcmd.auxpref & aux_rep));
bool HasRepnePrefix = (0 != (this->SMPcmd.auxpref & aux_repne));
if (HasRepPrefix && HasRepnePrefix)
SMP_msg("REP and REPNE both present at %lx %s\n", (unsigned long) this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr()));
if (HasRepPrefix || HasRepnePrefix) {
// All repeating instructions use ECX as the countdown register.