Newer
Older
if (this->BasicBlock->IsLocalName(SearchOp)) {
(void) this->BasicBlock->PropagateLocalDefType(SearchOp, HEAPPTR,
this->GetAddr(), SSANum, false);
}
else { // global name
this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false
(void) this->BasicBlock->PropagateGlobalDefType(SearchOp, HEAPPTR,
SSANum, false);
}
} // end if "malloc"
} // end if (TargetFunc)
return changed;
} // end of SMPInstr::MDFindMallocCall()
// Is instruction a branch (conditional or unconditional) to a
// code target that is not in the current chunk?
bool SMPInstr::IsBranchToFarChunk(void) {
if (this->IsFarBranchComputed()) { // answer is cached
return this->IsBranchesToFarChunk();
func_t *CurrChunk = get_fchunk(this->address);
bool FarBranch = false;
if ((JUMP | COND_BRANCH) & this->GetDataFlowType()) {
// Instruction is a direct branch, conditional or unconditional
if (this->NumUses() > 0) {
set<DefOrUse, LessDefUse>::iterator CurrUse;
for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) {
op_t JumpTarget = CurrUse->GetOp();
if ((o_near == JumpTarget.type) || (o_far == JumpTarget.type)) {
// Branches to a code address
clc5q
committed
// stdclib sometimes has jumps to zero and calls to zero. These are dead code.
if (0 != JumpTarget.addr) {
func_t *TargetChunk = get_fchunk(JumpTarget.addr);
// Is target address within the same chunk as the branch?
FarBranch = (NULL == TargetChunk) || (CurrChunk->startEA != TargetChunk->startEA);
if (FarBranch) {
this->FarBranchTarget = JumpTarget.addr;
}
if (FarBranch) {
this->SetBranchesToFarChunk();
}
this->SetFarBranchComputed();
return FarBranch;
} // end of SMPInstr::IsBranchToFarChunk()
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetUseSSA(op_t CurrOp, int SSASub) {
return this->Uses.SetSSANum(CurrOp, SSASub);
};
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefSSA(op_t CurrOp, int SSASub) {
return this->Defs.SetSSANum(CurrOp, SSASub);
};
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetUseType(op_t CurrOp, SMPOperandType CurrType) {
return this->Uses.SetType(CurrOp, CurrType, this);
};
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefType(op_t CurrOp, SMPOperandType CurrType) {
return this->Defs.SetType(CurrOp, CurrType, this);
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefMetadata(op_t CurrOp, SMPMetadataType Status) {
return this->Defs.SetMetadata(CurrOp, Status);
};
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefIndWrite(op_t CurrOp, bool IndWriteFlag) {
return this->Defs.SetIndWrite(CurrOp, IndWriteFlag);
};
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetUseNoTruncate(op_t CurrOp, bool NoTruncFlag) {
return this->Uses.SetNoTruncation(CurrOp, NoTruncFlag);
};
clc5q
committed
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefNoOverflow(op_t DefOp, bool NoOverflowFlag) {
return this->Defs.SetNoOverflow(DefOp, NoOverflowFlag);
};
// Analyze the instruction and its operands.
void SMPInstr::Analyze(void) {
bool DebugFlag = false;
if (0x8049b00 == this->address) {
// Setting up breakpoint line.
DebugFlag = true;
}
// Fill cmd structure with disassembly of instr
clc5q
committed
if (!SMPGetCmd(this->address, this->SMPcmd, this->features))
unsigned short opcode = this->SMPcmd.itype;
// Record what type of instruction this is, simplified for the needs
// of data flow and type analysis.
// Record optimization category.
this->OptType = OptCategory[opcode];
if ((NN_int == opcode) || (NN_into == opcode) || (NN_int3 == opcode)) {
this->SetInterrupt();
}
else {
this->ResetInterrupt();
}
// Fix the IDA Pro mistakes in the operand list.
this->MDFixupIDAProOperandList();
// See if instruction is an ASM idiom for clearing a register.
if ((NN_xor == opcode) || (NN_lea == opcode)) {
ushort FirstReg;
if (o_reg == this->SMPcmd.Operands[0].type) {
FirstReg = this->SMPcmd.Operands[0].reg;
op_t SecondOpnd = this->SMPcmd.Operands[1];
if (NN_xor == opcode) {
// Check for xor of reg with itself
if (SecondOpnd.is_reg(FirstReg)) {
this->SetRegClearIdiom();
}
}
else { // must be lea
// check for lea reg,[nobasereg+nonindexreg+0]
if ((SecondOpnd.type >= o_mem) && (SecondOpnd.type <= o_displ)) {
int BaseReg, IndexReg;
ushort ScaleFactor;
ea_t Offset;
MDExtractAddressFields(SecondOpnd, BaseReg, IndexReg, ScaleFactor, Offset);
if ((R_none == BaseReg) && (R_none == IndexReg) && (0 == Offset)) {
this->SetRegClearIdiom();
}
}
}
// See if instruction is simple nop or ASM idiom for nop.
if (this->MDIsNop()) {
this->SetNop();
}
// Build the DEF and USE lists for the instruction.
this->FindMemOps();
this->BuildSMPDefUseLists();
// Determine whether the instruction is a jump target by looking
// at its cross references and seeing if it has "TO" code xrefs.
clc5q
committed
SMP_xref_t xrefs;
for (bool ok = xrefs.SMP_first_to(this->address, XREF_FAR); ok; ok = xrefs.SMP_next_to()) {
if ((xrefs.GetFrom() != 0) && (xrefs.GetIscode())) {
this->SetJumpTarget();
break;
}
}
// If instruction is a call or indirect call, see if a call target has been recorded
// by IDA Pro.
if (this->GetDataFlowType() == INDIR_CALL) {
clc5q
committed
for (bool ok = xrefs.SMP_first_from(this->address, XREF_ALL);
ok;
clc5q
committed
ok = xrefs.SMP_next_from()) {
if ((xrefs.GetTo() != 0) && (xrefs.GetIscode())) {
// Found a code target, with its address in CurrXrefs.to
clc5q
committed
if (xrefs.GetTo() == (this->address + this->GetCmd().size)) {
// A call instruction will have two targets: the fall through to the
// next instruction, and the called function. We want to find
// the called function.
continue;
}
// We found a target, not the fall-through.
clc5q
committed
this->CallTarget = xrefs.GetTo();
SMP_msg("Found indirect call target %x at %x\n",
xrefs.GetTo(), this->address);
break;
}
} // end for all code xrefs
if (BADADDR == this->CallTarget) {
clc5q
committed
SMP_msg("WARNING: Did not find indirect call target at %x\n",
this->address);
}
} // end if INDIR_CALL
else if (this->GetDataFlowType() == CALL) {
set<DefOrUse, LessDefUse>::iterator CurrUse;
for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) {
optype_t OpType = CurrUse->GetOp().type;
if ((OpType == o_near) || (OpType == o_far)) {
this->CallTarget = CurrUse->GetOp().addr;
}
}
if (BADADDR == this->CallTarget) {
clc5q
committed
SMP_msg("ERROR: Target not found for direct call at %x\n", this->address);
}
}
if (DebugFlag) {
clc5q
committed
SMP_msg("Analyzed debug instruction at %x\n", this->address);
}
return;
} // end of SMPInstr::Analyze()
// Analyze the floating point NOP marker instruction at the top of the function.
void SMPInstr::AnalyzeMarker(void) {
// Fill member variable SMPcmd structure with disassembly of instr
(void) memset(&(this->SMPcmd), 0, sizeof(this->SMPcmd));
this->SMPcmd.itype = NN_fnop;
this->SMPcmd.size = 1;
this->SMPcmd.ea = this->address;
// Set the instr disassembly text.
DisAsmText.SetMarkerInstText(this->GetAddr());
// Record what type of instruction this is, simplified for the needs
// of data flow and type analysis.
this->type = DFACategory[this->SMPcmd.itype];
// Record optimization category.
this->OptType = OptCategory[this->SMPcmd.itype];
return;
} // end of SMPInstr::AnalyzeMarker()
// Detect oddities of call instructions, such as pseudo-calls that are
// actually jumps within a function
void SMPInstr::AnalyzeCallInst(ea_t FirstFuncAddr, ea_t LastFuncAddr) {
if (BADADDR != this->CallTarget) {
if (this->CallTarget == FirstFuncAddr) {
this->SetDirectRecursiveCall();
}
else {
this->ResetDirectRecursiveCall();
if ((this->CallTarget > FirstFuncAddr)
&& (this->CallTarget < LastFuncAddr)) {
this->SetCallUsedAsJump();
this->type = JUMP;
}
else {
this->ResetCallUsedAsJump();
}
}
return;
clc5q
committed
} // end of SMPInstr::AnalyzeCallInst()
clc5q
committed
sval_t SMPInstr::AnalyzeStackPointerDelta(sval_t IncomingDelta, sval_t PreAllocDelta) {
uint16 InstType = this->SMPcmd.itype;
clc5q
committed
sval_t InstDelta = StackAlteration[InstType];
SMPitype FlowType = this->GetDataFlowType();
clc5q
committed
bool TailCall = this->IsTailCall();
clc5q
committed
if (this->IsCallUsedAsJump() || this->MDIsInterruptCall() || this->IsCondTailCall()) {
// Call is used within function as a jump. Happens when setting up
clc5q
committed
// thunk offsets, for example; OR, call is an interrupt call, in which
// the interrupt return cleans up the stack, leaving a delta of zero, but
// we do not have the system call code to analyze, OR, the call is a conditional
// jump to another function (conditional tail call), in which case the current
// function must have a return statement to fall into which will clean up the
// only thing left on the stack (the return address) and the conditional jump
// has no effect on the stack pointer.
; // leave InstDelta equal to negative or zero value from StackAlterationTable[]
}
else if (this->IsRecursiveCall()) {
// We don't have the net stack delta for our own function yet, so we cannot
// look it up. We must assume that each call has no net effect on the stack delta.
// Alternatively, we could call this->GetBlock()->GetFunc()->GetStackDeltaForCallee() as below.
InstDelta = 0;
}
else if (this->IsAllocaCall()) {
InstDelta = STARS_DEFAULT_ALLOCA_SIZE;
}
clc5q
committed
else if ((CALL == FlowType) || (INDIR_CALL == FlowType) || TailCall) {
// A real call instruction, which pushes a return address on the stack,
// not a call used as a branch within the function. A return instruction
clc5q
committed
// will usually cancel out the stack push that is implicit in the call, which
// means that the function will have a net stack ptr delta of +4, which will
// cancel out the -4 value of the call instruction and set the delta to zero.
// However, this is not true in all cases, so we get the net stack ptr delta
// directly from the called function unless it is an unresolved indirect call,
// in which case we assume +4. !!!!****!!!! In the future, we could analyze
// the code around an unresolved indirect call to see if it seems to be
// removing items left on the stack by the callee.
// SPECIAL CASE: A jump used as a tail call will have a stack ptr effect that is equal
// to the net stack ptr effect of its target function, usually +4, whereas a jump
// would otherwise have a net stack ptr effect of 0.
ea_t CalledFuncAddr = this->GetCallTarget();
if ((BADADDR == CalledFuncAddr) || (0 == CalledFuncAddr)) {
InstDelta = 0;
}
else { // We have a call target
SMPFunction *CalleeFunc = this->GetBlock()->GetFunc()->GetProg()->FindFunction(CalledFuncAddr);
clc5q
committed
sval_t AdjustmentDelta;
if (CalleeFunc) {
if (!CalleeFunc->HasSTARSStackPtrAnalysisCompleted()) {
// Phase ordering issue in the call graph. A mutually recursive clique of functions has to
// be broken by starting processing somewhere, and all callees cannot be processed before
// we start. If we got our stack down to zero and then made a tail call, then we have to assume
// that the callee will use our return address, so we assume the default stack delta. If not a
// tail call, we ask our function to see if the information is available from IDA Pro analyses,
// or if it can be inferred from the fact that the call is followed by a stack adjustment.
if (TailCall) {
InstDelta = CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA;
SMP_msg("WARNING: Callee stack ptr analysis not yet performed at tail call inst %x ; normal delta assumed\n", this->GetAddr());
clc5q
committed
}
else {
AdjustmentDelta = this->GetBlock()->GetFunc()->GetStackDeltaForCallee(CalledFuncAddr);
clc5q
committed
InstDelta += AdjustmentDelta;
SMP_msg("WARNING: Callee stack ptr analysis not yet performed at inst %x ; stack adjustment used\n", this->GetAddr());
clc5q
committed
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
}
}
else if (!CalleeFunc->StackPtrAnalysisSucceeded()) {
// Callee analyses were done, but they failed. In order to proceed, we have to assume
// the same situation as we just did in the case where analyses have not been performed.
SMP_msg("WARNING: Callee stack ptr analysis failed at inst %x ; normal delta assumed\n", this->GetAddr());
if (TailCall) {
InstDelta = CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA;
}
else {
AdjustmentDelta = this->GetBlock()->GetFunc()->GetStackDeltaForCallee(this->GetAddr());
InstDelta += AdjustmentDelta;
}
}
else {
// Callee's analyses have succeeded, so get delta straight from callee.
InstDelta += CalleeFunc->GetNetStackPtrDelta();
}
}
else {
#if 0
SMP_msg("ERROR: SMPInstr::AnalyzeStackPointerDelta failed to find func at %x in inst %x\n",
CalledFuncAddr, this->GetAddr());
InstDelta = SMP_STACK_DELTA_ERROR_CODE;
#else
SMP_msg("ERROR: SMPInstr::AnalyzeStackPointerDelta failed to find func at %x in inst %x\n",
clc5q
committed
CalledFuncAddr, this->GetAddr());
if (TailCall) {
InstDelta = CALLING_CONVENTION_DEFAULT_FUNCTION_STACK_DELTA;
}
else {
InstDelta = 0;
}
#endif
}
}
} // end CALL or INDIR_CALL or TailCall case
else if (1 == InstDelta) {
// value of 1 is trigger to investigate the RTL for the
// true value, which cannot be found simply by table lookup
// In the special case of an x86 LEAVE instruction, the effect
// on the stack pointer is to deallocate the local frame size,
// plus pop the saved frame pointer into EBP. Helper functions
// need to know whether to look for this special case.
bool IsLeaveInstr = this->MDIsLeaveInstr();
clc5q
committed
InstDelta = this->RTL.TotalStackPointerAlteration(IsLeaveInstr, IncomingDelta, PreAllocDelta);
clc5q
committed
return InstDelta;
} // end of SMPInstr::AnalyzeStackPointerDelta()
clc5q
committed
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
// Total the stack adjustment bytes, as happens after a call to a function that leaves
// outgoing args on the stack or swallows incoming args from the stack.
sval_t SMPInstr::FindStackAdjustment(void) {
uint16 InstType = this->SMPcmd.itype;
sval_t InstDelta = StackAlteration[InstType];
if (1 == InstDelta) {
// value of 1 is trigger to investigate the RTL for the
// true value, which cannot be found simply by table lookup
// In the special case of an x86 LEAVE instruction, the effect
// on the stack pointer is to deallocate the local frame size,
// plus pop the saved frame pointer into EBP. Helper functions
// need to know whether to look for this special case.
bool IsLeaveInstr = this->MDIsLeaveInstr();
if (!IsLeaveInstr) {
InstDelta = this->RTL.TotalStackPointerAlteration(IsLeaveInstr, 0, 0);
}
else {
InstDelta = 0; // LEAVE is not the kind of instr we are looking for
}
}
return InstDelta;
} // end of SMPInstr::FindStackAdjustment()
// Normalize stack operands to have a displacement from the stack pointer value on entry to the function,
// rather than the current stack pointer value.
// UseFP indicates we are using a frame pointer in the function.
// FPDelta holds the stack delta (normalized) for the frame pointer.
// DefOp comes in with the operand to be normalized, and contains the normalized operand upon return.
// Return true if operand is a register or stack location, false otherwise (true => include in data flow analysis sets and SSA.)
bool SMPInstr::MDComputeNormalizedDataFlowOp(bool UseFP, sval_t FPDelta, op_t &DefOp) {
if (o_reg == DefOp.type) {
return true;
}
else if (MDIsStackAccessOpnd(DefOp, UseFP)) {
op_t OldOp = DefOp;
int SignedOffset = (int) DefOp.addr;
sval_t NormalizedDelta;
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
if (DefOp.hasSIB) {
// We must deal with a potentially indexed memory expression. We want to
// normalize two different cases here: e.g. [esp+ebx+4] will become [esp+ebx-24]
// and [ebp+ebx-8] will become [esp+ebx-12] after normalization. A wrinkle
// on the second case is when the base register and index register are swapped
// in the SIB byte, and we make [ebx+ebp-4] into [esp+ebx-12], which involves
// correcting the index/base reg order in the SIB, because an index reg of ESP
// is the SIB encoding for "no index register" and we cannot leave it like that.
int BaseReg = sib_base(DefOp);
int IndexReg = (int) sib_index(DefOp);
if (X86_STACK_POINTER_REG == IndexReg) // signifies no index register
IndexReg = R_none;
if (BaseReg == X86_STACK_POINTER_REG) {
// We probably have an indexed ESP-relative operand.
// We leave the sib byte alone and normalize the offset.
NormalizedDelta = this->GetStackPtrOffset() + (sval_t) SignedOffset;
}
else {
// Must be EBP-relative.
NormalizedDelta = FPDelta + (sval_t) SignedOffset;
// Unfortunately, when we are dealing with a SIB byte in the opcode, we cannot
// just say DefOp.reg = MD_STACK_POINTER_REG to convert from the frame pointer
// to the stack pointer. Instead, we have to get into the nasty machine code
// level and change the SIB bits that specify either the base register or the
// index register, whichever one is the frame pointer.
if (BaseReg == X86_FRAME_POINTER_REG) {
// The three least significant bits of the SIB byte are the base register.
// They must contain a 5, which is the x86 value for register EBP, and we
// want to convert it to a 4, denoting register ESP. We can just zero out
// the least significant bit to accomplish that.
DefOp.sib &= 0xfe;
}
else {
// We sometimes have an instruction in which the frame pointer is used as
// the "index" register in the SIB byte, and the true index register is
// in the "base" register position in the SIB byte.
assert(IndexReg == X86_FRAME_POINTER_REG);
// The true index reg is in the lowest three bits, while the next three
// bits must contain a 5 (register EBP) and we want to make them a 4 (ESP).
// We must swap base and index regs as we normalize (see explanation above).
char SIBtemp = DefOp.sib;
char SIBindex = SIBtemp & 0x38;
char SIBbase = SIBtemp & 0x07;
assert ((SIBindex >> 3) == 5); // must be EBP
SIBtemp &= 0xa0; // zero out lower 6 bits; upper 2 bits are scale factor - leave them alone
SIBtemp &= (SIBbase << 3); // make old base reg (e.g. ebx) into a proper index reg
SIBtemp |= 0x04; // make the new base reg be 4 (reg ESP)
DefOp.sib = SIBtemp;
}
this->SetFPNormalizedToSP();
// Add the stack pointer to the USE set for the instruction.
this->MDAddRegUse(X86_STACK_POINTER_REG, false);
}
}
else if (DefOp.reg == MD_FRAME_POINTER_REG) {
// If FPDelta is -4 and SignedOffset is +8, then we have [ebp+8] as DefOp, and this
// is equivalent to [esp+4] where esp has its entry value, i.e. this would be the first incoming
// argument. If SignedOffset is -12, we have [ebp-12] as DefOp, and this is [esp-16] when
// normalized to the entry point value of the stack pointer. In both cases, we can see that the
// normalized stack delta is just FPDelta+SignedOffset.
NormalizedDelta = FPDelta + (sval_t) SignedOffset;
// Now, we simply convert the memory operand from EBP to ESP and replace the SignedOffset with the
// NormalizedDelta just computed.
DefOp.reg = MD_STACK_POINTER_REG;
this->SetFPNormalizedToSP();
// Add the stack pointer to the USE set for the instruction.
this->MDAddRegUse(DefOp.reg, false);
}
else {
assert(DefOp.reg == MD_STACK_POINTER_REG);
// We only need to adjust the offset to reflect the change in the stack pointer since the function
// was entered, e.g. [esp+4] is normalized to [esp-28] if the current esp value is 32 less than it
// was upon function entry. We get the value "-32" in that case from a member variable.
NormalizedDelta = this->GetStackPtrOffset() + (sval_t) SignedOffset;
}
DefOp.addr = (ea_t) NormalizedDelta; // common to frame and stack pointer cases
if ((o_phrase == DefOp.type) && (0 != NormalizedDelta)) {
// mov [esp],eax has an [esp] operand of type o_phrase, because there is no
// displacement field. After normalization, it will have a displacement field, so
// it has become an operand like [esp-32] and is now type o_displ.
DefOp.type = o_displ;
}
this->GetBlock()->GetFunc()->AddNormalizedStackOperand(OldOp, this->GetAddr(), DefOp);
return true;
}
else {
return false;
}
} // end of SMPInstr::MDComputeNormalizedDataFlowOp()
// Normalize stack operands in all DEFs and USEs to have stack deltas relative to the function entry stack pointer.
// Return true if any stack DEFs or USEs were normalized.
bool SMPInstr::MDNormalizeStackOps(bool UseFP, sval_t FPDelta, bool Recomputing, sval_t DeltaIncrement) {
bool StackOpFound = false;
bool OpNormalized;
bool UniqueDEFMemOp = true; // Does DEFMemOp not match any DEFs?
bool UniqueUSEMemOp = true; // Does USEMemOp not match any USEs?
bool UniqueLeaUSEMemOp = true; // Does LeaUSEMemOp not match any USEs?
bool UniqueMoveSource = true; // Does MoveSource not match any USEs?
set<DefOrUse, LessDefUse>::iterator DefIter, UseIter;
list<pair<set<DefOrUse, LessDefUse>::iterator, op_t> > DefWorkList, UseWorkList;
list<pair<set<DefOrUse, LessDefUse>::iterator, op_t> >::iterator WorkIter;
op_t OldOp, NewOp;
// Find all the DEFs that need changing, and put their iterators into a list.
// Normalizing stack ops could change their sort order, hence we could skip over
// a DEF in the set by erasing a DEF and reinserting a normalized DEF, so we
// make all the changes after we iterate through the DEFS set.
for (DefIter = this->GetFirstDef(); DefIter != this->GetLastDef(); ++DefIter) {
OldOp = DefIter->GetOp();
NewOp = OldOp;
if ((o_reg != NewOp.type) && (o_imm != NewOp.type)) {
if (Recomputing) {
OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, true, NewOp);
}
else {
OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, NewOp);
}
if (OpNormalized) {
StackOpFound = true;
if (IsEqOp(OldOp, this->DEFMemOp)) {
UniqueDEFMemOp = false;
}
pair<set<DefOrUse, LessDefUse>::iterator, op_t> DefItem(DefIter, NewOp);
DefWorkList.push_back(DefItem);
}
}
}
// Now go through the DEF worklist and change stack operands to normalized stack operands.
for (WorkIter = DefWorkList.begin(); WorkIter != DefWorkList.end(); ++WorkIter) {
DefIter = WorkIter->first;
DefIter = this->Defs.SetOp(DefIter, WorkIter->second);
}
// Normalize op_t private data member DEFs.
if (Recomputing) {
OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, UniqueDEFMemOp, this->DEFMemOp);
}
else {
OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, this->DEFMemOp);
}
// Find all USEs that need changing, and build a second work list.
for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) {
OldOp = UseIter->GetOp();
NewOp = OldOp;
if ((o_reg != NewOp.type) && (o_imm != NewOp.type)) {
if (Recomputing) {
OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, true, NewOp);
}
else {
OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, NewOp);
}
if (OpNormalized) {
StackOpFound = true;
if (IsEqOp(OldOp, this->USEMemOp)) {
UniqueUSEMemOp = false;
}
if (IsEqOp(OldOp, this->GetLeaMemUseOp())) {
UniqueLeaUSEMemOp = false;
}
if (IsEqOp(OldOp, this->MoveSource)) {
UniqueMoveSource = false;
}
pair<set<DefOrUse, LessDefUse>::iterator, op_t> UseItem(UseIter, NewOp);
UseWorkList.push_back(UseItem);
}
}
}
// Now go through the USE worklist and change stack operands to normalized stack operands.
for (WorkIter = UseWorkList.begin(); WorkIter != UseWorkList.end(); ++WorkIter) {
UseIter = WorkIter->first;
UseIter = this->Uses.SetOp(UseIter, WorkIter->second);
}
// Normalize op_t private data member USEs.
if (Recomputing) {
OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, UniqueUSEMemOp, this->USEMemOp);
OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, UniqueLeaUSEMemOp, this->LeaUSEMemOp);
OpNormalized = this->MDRecomputeNormalizedDataFlowOp(DeltaIncrement, UniqueMoveSource, this->MoveSource);
else {
OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, this->USEMemOp);
OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, this->LeaUSEMemOp);
OpNormalized = this->MDComputeNormalizedDataFlowOp(UseFP, FPDelta, this->MoveSource);
}
// Declare victory.
this->SetDefsNormalized();
return StackOpFound;
} // end of SMPInstr::MDNormalizeStackOps()
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
// Renormalize SP-relative stack operands in functions that call alloca() by adding DeltaIncrement to their stack displacements.
// DefOp comes in with the operand to be renormalized, and contains the normalized operand upon return.
// Return true if operand is a register or stack location, false otherwise (true => include in data flow analysis sets and SSA.)
bool SMPInstr::MDRecomputeNormalizedDataFlowOp(sval_t DeltaIncrement, bool UpdateMaps, op_t &DefOp) {
op_t OldOp = DefOp;
if (o_reg == DefOp.type) {
return true;
}
else if (MDIsStackAccessOpnd(DefOp, this->GetBlock()->GetFunc()->UsesFramePointer())) {
if (this->HasFPNormalizedToSP()) {
// FP-relative operands do no change in alloca() functions when the alloca()
// causes the SP to change.
return true;
}
// The remaining cases are simple. The ESP-relative displacement is incremented by
// DeltaIncrement, regardless of the presence of a SIB byte.
int SignedOffset = (int) DefOp.addr;
sval_t NormalizedDelta = DeltaIncrement + (sval_t) SignedOffset;
DefOp.addr = (ea_t) NormalizedDelta;
if ((o_phrase == DefOp.type) && (0 != NormalizedDelta)) {
// mov [esp],eax has an [esp] operand of type o_phrase, because there is no
// displacement field. After normalization, it will have a displacement field, so
// it has become an operand like [esp-32] and is now type o_displ.
DefOp.type = o_displ;
}
if (UpdateMaps) { // We don't update maps for duplicate entries, e.g. USEMemOp, DEFMemOp, MoveSource
this->GetBlock()->GetFunc()->AddNormalizedStackOperand(OldOp, this->GetAddr(), DefOp);
}
return true;
}
else {
return false;
}
} // end of SMPInstr::MDRecomputeNormalizedDataFlowOp()
// If NormOp is a normalized stack memory operand, unnormalize it.
void SMPInstr::MDGetUnnormalizedOp(op_t &NormOp) {
sval_t SignedOffset;
bool UseFP = this->GetBlock()->GetFunc()->UsesFramePointer();
if (this->AreDefsNormalized() && MDIsStackAccessOpnd(NormOp, UseFP)) {
if (this->HasFPNormalizedToSP()) {
// Need to convert NormOp back to frame-pointer-relative address.
if (NormOp.hasSIB) {
// Convert base register from stack pointer back to frame pointer.
NormOp.sib |= 0x01;
}
else {
NormOp.reg = MD_FRAME_POINTER_REG;
}
SignedOffset = (sval_t) NormOp.addr;
SignedOffset -= this->GetBlock()->GetFunc()->GetFramePtrStackDelta();
}
else {
// NormOp should remain stack-pointer-relative address, but it
// should be a positive offset from the current stack pointer instead
// of a negative offset from the entry point of the function.
SignedOffset = (sval_t) NormOp.addr;
SignedOffset -= this->GetStackPtrOffset();
assert(0 <= SignedOffset);
}
NormOp.addr = (ea_t) SignedOffset;
}
return;
} // end of SMPInstr::MDGetUnnormalizedOp()
// Find USE-not-DEF operand that is not the flags register.
op_t SMPInstr::GetSourceOnlyOperand(void) {
size_t OpNum;
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
if (this->features & DefMacros[OpNum]) { // DEF
;
}
else if (this->features & UseMacros[OpNum]) { // USE
op_t CurrOp = this->SMPcmd.Operands[OpNum];
if (!(CurrOp.is_reg(X86_FLAGS_REG))) {
return CurrOp;
}
}
}
// It is expected that increment, decrement, and floating point stores
// will not have a USE-only operand. Increment and decrement have an
// operand that is both USEd and DEFed, while the floating point stack
// registers are implicit in most floating point opcodes. Also, exchange
// and exchange-and-add instructions have multiple DEF-and-USE operands.
int TypeGroup = SMPTypeCategory[this->SMPcmd.itype];
if ((TypeGroup != 2) && (TypeGroup != 4) && (TypeGroup != 9) && (TypeGroup != 12)
&& (TypeGroup != 13)) {
clc5q
committed
SMP_msg("ERROR: Could not find source only operand at %x in %s\n",
this->address, DisAsmText.GetDisAsm(this->GetAddr()));
return InitOp;
} // end of SMPInstr::GetSourceOnlyOperand()
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
// Should apparent memory operands be ignored? e.g. lea opcode on x86
bool SMPInstr::MDIgnoreMemOps(void) {
bool leaInst = (NN_lea == this->SMPcmd.itype);
return leaInst;
}
// Find memory DEFs and USEs, store in DEFMemOp and USEMemOp
void SMPInstr::FindMemOps(void) {
size_t OpNum;
if (!(this->MDIgnoreMemOps())) {
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
op_t TempOp = this->SMPcmd.Operands[OpNum];
if ((TempOp.type >= o_mem) && (TempOp.type <= o_displ)) { // memory
if (this->features & DefMacros[OpNum]) { // DEF
if (this->DEFMemOp.type == o_void) { // only save first mem DEF
this->DEFMemOp = TempOp;
}
}
if (this->features & UseMacros[OpNum]) { // USE
if (this->USEMemOp.type == o_void) { // only save first mem USE
this->USEMemOp = TempOp;
}
}
}
} // end for (OpNum = 0; ...)
}
this->SetMemOpsFound();
return;
} // end of SMPInstr::FindMemOps()
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
// Fix problems with the operands list in SMPcmd.
void SMPInstr::MDFixupIDAProOperandList(void) {
// IDA Pro often takes the instruction imul eax,0x80 and creates the following operands and features bits:
// Opnd[0] = EAX, both DEF and USE
// Opnd[1] = EAX, just USE
// Opnd[2] = immediate, neither DEF nor USE
// Our RTL building keys in on the DEF/USE bits in features, so this looks like imul eax,eax to us.
// We want it to look like:
// Opnd[0] = EAX, both DEF and USE
// Opnd[1] = immediate, just USE
if (NN_imul == this->SMPcmd.itype) {
op_t Opnd2 = this->SMPcmd.Operands[2];
if ((!(this->features & DefMacros[2]))
&& (!(this->features & UseMacros[2]))) {
if (o_void != Opnd2.type) {
// We have a third operand that is neither DEF nor USE.
SMP_msg("INFO: Fixing IMUL operand list at %x\n", this->GetAddr());
this->Dump();
// Two cases: Operands[0] == Operands[1], e.g. imul eax,Opnd2
// or else three-operand form: e.g. imul eax,ecx,Opnd2
// For the three-operand form, make sure Opnd0 is DEF only, others
// are USE only. For the two-operand form, make sure Opnd0 is DEF and USE,
// Opnd1 is current Opnd2 and is USE only.
op_t Opnd0 = this->SMPcmd.Operands[0];
op_t Opnd1 = this->SMPcmd.Operands[1];
if (IsEqOp(Opnd0, Opnd1)) {
// No need for three-operand form.
this->features |= DefMacros[0];
this->features |= UseMacros[0];
this->SMPcmd.Operands[1] = Opnd2;
this->SMPcmd.Operands[2] = InitOp;
}
else { // Must have three-operand form.
this->features |= UseMacros[2]; // set missing USE bit.
this->features &= (~UseMacros[0]); // Ensure no USE of Opnd0.
}
this->Dump();
}
}
}
return;
} // SMPInstr::MDFixupIDAProOperandList()
// Fill the Defs and Uses private data members.
void SMPInstr::BuildSMPDefUseLists(void) {
size_t OpNum;
bool DebugFlag = (0x8049b00 == this->GetAddr());
bool WidthDoubler = this->MDDoublesWidth();
this->Defs.clear();
this->Uses.clear();
// Start with the Defs.
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
if (this->features & DefMacros[OpNum]) { // DEF
op_t TempOp = this->SMPcmd.Operands[OpNum];
if (WidthDoubler) {
// Opcodes that sign-extend a byte to a word, or a word to a dword,
// have only one operand. It is implicit, and it is the shorter USE.
// That means the DEF will have the same width as the USE, e.g. if
// we are sign-extending AX to EAX, the USE and DEF both be AX without
// a special fix. We fix this problem with the DEF operand now.
if (TempOp.dtyp == dt_byte) {
TempOp.dtyp = dt_word;
TempOp.reg = MDCanonicalizeSubReg(TempOp.reg);
}
else if (TempOp.dtyp == dt_word) {
TempOp.dtyp = dt_dword;
TempOp.reg = MDCanonicalizeSubReg(TempOp.reg);
}
else if (TempOp.dtyp == dt_dword) {
TempOp.dtyp = dt_qword;
}
else {
clc5q
committed
SMP_msg("ERROR: Instruction operand %zu not 1,2, or 4 bytes at %x dtyp: %d\n",
OpNum, this->address, TempOp.dtyp);
}
}
if (MDKnownOperandType(TempOp)) {
clc5q
committed
SMP_msg("DEBUG: Setting DEF for: ");
clc5q
committed
SMP_msg("\n");
this->Defs.SetRef(TempOp);
}
} // end for (OpNum = 0; ...)
if (this->IsRegClearIdiom()) {
// Something like xor eax,eax clears eax but does not really
// use eax. It is the same as mov eax,0 and we don't want to
// extend the prior def-use chain for eax to this instruction
// by treating the instruction as xor eax,eax. Instead, we
// build the DEF and USE lists and RTL as if it were mov eax,0.
op_t ImmOp = InitOp;
ImmOp.value = 0;
this->Uses.SetRef(ImmOp, NUMERIC);
return;
}
// Now, do the Uses. Uses have special case operations, because
// any memory operand could have register uses in the addressing
// expression, and we must create Uses for those registers. For
// example: mov eax,[ebx + esi*2 + 044Ch]
// This is a two-operand instruction with one def: eax. But
// there are three uses: [ebx + esi*2 + 044Ch], ebx, and esi.
// The first use is an op_t of type o_phrase (memory phrase),
// which can be copied from cmd.Operands[1]. Likewise, we just
// copy cmd.Operands[0] into the defs list. However, we must create
// op_t types for register ebx and register esi and append them
// to the Uses list. This is handled by the machine dependent
// method MDFixupDefUseLists().
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
if (this->features & UseMacros[OpNum]) { // USE
op_t TempOp = this->SMPcmd.Operands[OpNum];
if (MDKnownOperandType(TempOp)) {
clc5q
committed
SMP_msg("DEBUG: Setting USE for: ");
clc5q
committed
SMP_msg("\n");
this->Uses.SetRef(TempOp);
}
} // end for (OpNum = 0; ...)
return;
} // end of SMPInstr::BuildSMPDefUseLists()
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
// Declare a branch/jump to be a tail call, clean up def/use lists.
void SMPInstr::SetTailCall(void) {
this->booleans1 |= INSTR_SET_TAIL_CALL;
if (this->type == COND_BRANCH) {
this->SetCondTailCall();
}
else {
this->ResetCondTailCall();
}
this->CallTarget = this->FarBranchTarget;
this->type = RETURN;
this->GetBlock()->SetReturns(true);
// We want to add the caller-saved registers to the USEs and DEFs lists
this->MDAddRegDef(R_ax, false);
this->MDAddRegDef(R_cx, false);
this->MDAddRegDef(R_dx, false);
this->MDAddRegUse(R_ax, false);
this->MDAddRegUse(R_cx, false);
this->MDAddRegUse(R_dx, false);
} // end of SMPInstr::SetTailCall()
// If DefReg is not already in the DEF list, add a DEF for it.
void SMPInstr::MDAddRegDef(ushort DefReg, bool Shown, SMPOperandType Type) {
op_t TempDef = InitOp;
TempDef.type = o_reg;
TempDef.reg = DefReg;
if (Shown)
TempDef.set_showed();
else
TempDef.clr_showed();
return;
} // end of SMPInstr::MDAddRegDef()
// If UseReg is not already in the USE list, add a USE for it.
void SMPInstr::MDAddRegUse(ushort UseReg, bool Shown, SMPOperandType Type) {
op_t TempUse = InitOp;
TempUse.type = o_reg;
TempUse.reg = UseReg;
if (Shown)
TempUse.set_showed();
else
TempUse.clr_showed();
return;
} // end of SMPInstr::MDAddRegUse()
// Perform machine dependent ad hoc fixes to the def and use lists.
// For example, some multiply and divide instructions in x86 implicitly
// use and/or define register EDX. For memory phrase examples, see comment
// in BuildSMPDefUseLists().
void SMPInstr::MDFixupDefUseLists(void) {
// First, handle the uses hidden in memory addressing modes. Note that we do not
// care whether we are dealing with a memory destination operand or source
// operand, because register USEs, not DEFs, happen within the addressing expressions.
size_t OpNum;
SMPOperandType RefType;
unsigned short opcode = this->SMPcmd.itype;
int BaseReg;
int IndexReg;
ushort ScaleFactor;
ea_t displacement;
bool UseFP = true;
bool HasIndexReg = false;
bool leaInst = (NN_lea == opcode);
bool DebugFlag = (this->GetAddr() == 0x8086177);
clc5q
committed
SMP_msg("DEBUG: Fixing up DEF-USE lists for debug location\n");
#if SMP_BASEREG_POINTER_TYPE
// Some instructions are analyzed outside of any function or block when fixing up
// the IDB, so we have to assume the block and func pointers might be NULL.
if ((NULL != this->BasicBlock) && (NULL != this->BasicBlock->GetFunc()))
UseFP = this->BasicBlock->GetFunc()->UsesFramePointer();
#endif
if (DebugFlag) {
clc5q
committed
SMP_msg("DEBUG: UseFP = %d\n", UseFP);
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
op_t Opnd = SMPcmd.Operands[OpNum];
if ((Opnd.type == o_phrase) || (Opnd.type == o_displ) || (Opnd.type == o_mem)) {
MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, displacement);
SingleAddressReg = ((0 == displacement)
&& ((R_none == BaseReg) || (R_none == IndexReg)));
if (R_none != IndexReg) {
op_t IndexOpnd = Opnd; // Init to current operand field values
IndexOpnd.type = o_reg; // Change type and reg fields
IndexOpnd.reg = (ushort) IndexReg;
IndexOpnd.hasSIB = 0;
IndexOpnd.set_showed();
if (0 == ScaleFactor)
this->Uses.SetRef(IndexOpnd);
else { // scaling == shift ==> NUMERIC
HasIndexReg = true;
this->Uses.SetRef(IndexOpnd, NUMERIC);
if (R_none != BaseReg) {
op_t BaseOpnd = Opnd; // Init to current operand field values
BaseOpnd.type = o_reg; // Change type and reg fields
BaseOpnd.reg = (ushort) BaseReg;
BaseOpnd.set_showed();
RefType = UNINIT;
#if SMP_BASEREG_POINTER_TYPE
// R_sp and R_bp will get type STACKPTR in SMPInstr::SetImmedTypes().
// Other registers used as base registers should get their USEs as
// base registers typed as POINTER, which might get refined later
// to STACKPTR, GLOBALPTR, HEAPPTR, etc.
// NOTE: the NN_lea opcode is often used without a true base register.
// E.g. lea eax,[eax+eax+5] is an x86 idiom for eax:=eax*2+5, which
// could not be done in one instruction without using the addressing
// modes of the machine to do the arithmetic. We don't want to set the
// USE of EAX to POINTER in this case, so we will conservatively skip
// all lea instructions here.
// We cannot be sure that a register is truly a base register unless
// there is also an index register. E.g. with reg+displacement, we
// could have memaddr+indexreg or basereg+offset, depending on what
// the displacement is. The exception is if there is no offset and only
// one addressing register, e.g. mov eax,[ebx].
if (BaseOpnd.is_reg(MD_STACK_POINTER_REG) || (UseFP && BaseOpnd.is_reg(MD_FRAME_POINTER_REG))
|| leaInst || (!HasIndexReg && !SingleAddressReg)) {
#endif
this->Uses.SetRef(BaseOpnd, RefType);
} // end if R_none != BaseReg
} // end if (o_phrase or o_displ operand)
} // end for (all operands)
// The lea (load effective address) instruction looks as if it has
// a memory USE: lea ebx,[edx+esi]
// However, this instruction is really just: ebx := edx+esi
// Now that the above code has inserted the "addressing" registers
// into the USE list, we should remove the "memory USE".
if (leaInst) {
set<DefOrUse, LessDefUse>::iterator CurrUse;
for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) {
op_t UseOp = CurrUse->GetOp();
if ((o_mem <= UseOp.type) && (o_displ >= UseOp.type)) {
this->LeaUSEMemOp = UseOp;
this->EraseUse(CurrUse);
this->USEMemOp = InitOp;