Newer
Older
clc5q
committed
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
// Does a subtraction operator get applied to same left and right operands?
bool SMPInstr::SubtractsFromItself(void) {
bool SelfSubtract = false;
size_t RTLCount = this->RTL.GetCount();
for (size_t index = 0; index < RTLCount; ++index) {
SMPRegTransfer *CurrRT = this->RTL.GetRT(index);
if ((CurrRT != NULL) && (CurrRT->HasRightSubTree())) {
CurrRT = CurrRT->GetRightTree();
SMPoperator CurrOp = CurrRT->GetOperator();
if ((SMP_SUBTRACT_BORROW == CurrOp) || (SMP_SUBTRACT == CurrOp)) {
if (!(CurrRT->HasRightSubTree())) {
// NOTE: Must change this code when we build more precise SMP_SUBTRACT_BORROW RTL.
op_t LeftOp = CurrRT->GetLeftOperand();
op_t RightOp = CurrRT->GetRightOperand();
SelfSubtract = IsEqOp(RightOp, LeftOp);
}
break;
}
}
}
return SelfSubtract;
} // end of SMPInstr::SubtractsFromItself()
// MACHINE DEPENDENT: Is instruction a return instruction?
bool SMPInstr::MDIsReturnInstr(void) const {
return ((this->SMPcmd.itype == NN_retn) || (this->SMPcmd.itype == NN_retf));
}
// MACHINE DEPENDENT: Is instruction a POP instruction?
#define FIRST_POP_INST NN_pop
#define LAST_POP_INST NN_popfq
bool SMPInstr::MDIsPopInstr(void) const {
return ((this->SMPcmd.itype >= FIRST_POP_INST)
&& (this->SMPcmd.itype <= LAST_POP_INST));
}
// MACHINE DEPENDENT: Is instruction a PUSH instruction?
#define FIRST_PUSH_INST NN_push
#define LAST_PUSH_INST NN_pushfq
bool SMPInstr::MDIsPushInstr(void) const {
return ((this->SMPcmd.itype >= FIRST_PUSH_INST)
&& (this->SMPcmd.itype <= LAST_PUSH_INST));
}
// MACHINE DEPENDENT: Is instruction an ENTER instruction?
bool SMPInstr::MDIsEnterInstr(void) const {
return ((this->SMPcmd.itype >= MD_FIRST_ENTER_INSTR)
&& (this->SMPcmd.itype <= MD_LAST_ENTER_INSTR));
}
// MACHINE DEPENDENT: Is instruction a LEAVE instruction?
bool SMPInstr::MDIsLeaveInstr(void) const {
return ((this->SMPcmd.itype >= MD_FIRST_LEAVE_INSTR)
&& (this->SMPcmd.itype <= MD_LAST_LEAVE_INSTR));
// MACHINE DEPENDENT: Is instruction a HALT instruction?
bool SMPInstr::MDIsHaltInstr(void) const {
return (NN_hlt == this->SMPcmd.itype);
}
#define MD_FIRST_COND_MOVE_INSTR NN_cmova
#define MD_LAST_COND_MOVE_INSTR NN_fcmovnu
// MACHINE DEPENDENT: Is instruction a conditional move?
bool SMPInstr::MDIsConditionalMoveInstr(void) const {
return ((this->SMPcmd.itype >= MD_FIRST_COND_MOVE_INSTR)
&& (this->SMPcmd.itype <= MD_LAST_COND_MOVE_INSTR));
}
// MACHINE DEPENDENT: Do opcode/operands definitely indicate signed arithmetic?
// Generally, this is only true for certain variants of multiplication and division.
bool SMPInstr::MDIsSignedArithmetic(void) const {
unsigned short opcode = this->SMPcmd.itype;
if (NN_idiv == opcode)
return true;
if (NN_imul == opcode) {
// If we discard the upper N bits of the multiplication result, then the
// lower N bits are the same for signed and unsigned multiplication, and
// gcc/g++ often use the IMUL opcode for both signed and unsigned multiplies
// when only N bits of result are retained. Therefore, the SIGNED nature of
// IMUL operands can only be inferred from the case in which 2N bits are kept.
return (!(this->AreMultiplicationBitsDiscarded()));
}
else { // idiv and imul are only possible signed cases
return false;
}
} // end of SMPInstr::MDIsSignedArithmetic()
// MACHINE DEPENDENT: Is instruction a conditional jump based on an unsigned condition?
bool SMPInstr::MDIsUnsignedBranch(void) const {
unsigned short opcode = this->SMPcmd.itype;
return ((NN_ja == opcode) || (NN_jae == opcode) || (NN_jb == opcode) || (NN_jbe == opcode)
|| (NN_jna == opcode) || (NN_jnae == opcode) || (NN_jnb == opcode) || (NN_jnbe == opcode));
}
// MACHINE DEPENDENT: Is instruction a conditional jump based on a signed condition?
bool SMPInstr::MDIsSignedBranch(void) const {
unsigned short opcode = this->SMPcmd.itype;
return ((NN_jg == opcode) || (NN_jge == opcode) || (NN_jl == opcode) || (NN_jle == opcode)
|| (NN_jng == opcode) || (NN_jnge == opcode) || (NN_jnl == opcode) || (NN_jnle == opcode)
|| (NN_js == opcode) || (NN_jns == opcode));
}
// MACHINE DEPENDENT: Is instruction a boolean set based on an unsigned condition?
bool SMPInstr::MDIsUnsignedSetValue(void) const {
unsigned short opcode = this->SMPcmd.itype;
return ((NN_seta == opcode) || (NN_setae == opcode) || (NN_setb == opcode) || (NN_setbe == opcode)
|| (NN_setna == opcode) || (NN_setnae == opcode) || (NN_setnb == opcode) || (NN_setnbe == opcode));
}
// MACHINE DEPENDENT: Is instruction a boolean set based on a signed condition?
bool SMPInstr::MDIsSignedSetValue(void) const {
unsigned short opcode = this->SMPcmd.itype;
return ((NN_setg == opcode) || (NN_setge == opcode) || (NN_setl == opcode) || (NN_setle == opcode)
|| (NN_setng == opcode) || (NN_setnge == opcode) || (NN_setnl == opcode) || (NN_setnle == opcode)
|| (NN_sets == opcode) || (NN_setns == opcode));
}
// MACHINE DEPENDENT: Does instruction use a callee-saved register?
bool SMPInstr::MDUsesCalleeSavedReg(void) {
set<DefOrUse, LessDefUse>::iterator CurrUse;
for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) {
op_t CurrOp = CurrUse->GetOp();
if (CurrOp.is_reg(R_bp) || CurrOp.is_reg(R_si)
|| CurrOp.is_reg(R_di) || CurrOp.is_reg(R_bx)) {
return true;
}
}
return false;
} // end of SMPInstr::MDUsesCalleeSavedReg()
// Is the instruction a register to register copy of a stack pointer or frame pointer
// into a general purpose register (which mmStrata will now need to track as a stack
// relative pointer)?
bool SMPInstr::MDIsStackPointerCopy(bool UseFP) {
// OptType 3 indicates a move instruction
// The lea instruction can perform three operand arithmetic, e.g.
// lea ebx,[esp+12] is just ebx:=esp+12, so it is a stack pointer copy.
if (((this->OptType == 3) || (NN_lea == this->SMPcmd.itype))
&& (this->GetFirstDef()->GetOp().type == o_reg)
&& (!(this->GetFirstDef()->GetOp().is_reg(R_sp)))
&& (!(this->HasSourceMemoryOperand()))) { // reg to reg move
if (this->GetFirstUse()->GetOp().is_reg(R_bp))
// Move of base pointer EBP into a general register
return true;
else if ((this->GetFirstUse()->GetOp().is_reg(R_sp))
&& !(this->GetFirstDef()->GetOp().is_reg(R_bp)))
// Move of ESP into something besides a base pointer
return true;
}
else if (this->GetFirstUse()->GetOp().is_reg(R_sp)) {
// Move of ESP into a register; no base pointer used in this function
return true;
}
}
return false;
} // end of SMPInstr::MDIsStackPointerCopy()
// If call instruction is to malloc(), set the DEF register EAX type to
// HEAPPTR and return true.
bool SMPInstr::MDFindMallocCall(op_t TargetOp) {
bool changed = false;
func_t *TargetFunc = get_func(TargetOp.addr);
if (TargetFunc) {
char FuncName[MAXSTR];
get_func_name(TargetFunc->startEA, FuncName, sizeof(FuncName) - 1);
if (0 == strcmp("malloc", FuncName)) {
// NOTE: Some compilers might call it __malloc ; make this more robust !!!
#if SMP_VERBOSE_FIND_POINTERS
clc5q
committed
SMP_msg("Found call to malloc at %x\n", this->addr);
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
#endif
op_t SearchOp = InitOp;
SearchOp.type = o_reg;
SearchOp.reg = R_ax;
set<DefOrUse, LessDefUse>::iterator EAXDEF;
EAXDEF = this->SetDefType(SearchOp, HEAPPTR);
int SSANum = EAXDEF->GetSSANum();
changed = true;
if (this->BasicBlock->IsLocalName(SearchOp)) {
(void) this->BasicBlock->PropagateLocalDefType(SearchOp, HEAPPTR,
this->GetAddr(), SSANum, false);
}
else { // global name
this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false
(void) this->BasicBlock->PropagateGlobalDefType(SearchOp, HEAPPTR,
SSANum, false);
}
} // end if "malloc"
} // end if (TargetFunc)
return changed;
} // end of SMPInstr::MDFindMallocCall()
// Is instruction a branch (conditional or unconditional) to a
// code target that is not in the current chunk?
bool SMPInstr::IsBranchToFarChunk(void) {
if (this->IsFarBranchComputed()) { // answer is cached
return this->IsBranchesToFarChunk();
func_t *CurrChunk = get_fchunk(this->address);
bool FarBranch = false;
if ((JUMP | COND_BRANCH) & this->GetDataFlowType()) {
// Instruction is a direct branch, conditional or unconditional
if (this->NumUses() > 0) {
set<DefOrUse, LessDefUse>::iterator CurrUse;
for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) {
op_t JumpTarget = CurrUse->GetOp();
if ((o_near == JumpTarget.type) || (o_far == JumpTarget.type)) {
// Branches to a code address
func_t *TargetChunk = get_fchunk(JumpTarget.addr);
// Is target address within the same chunk as the branch?
FarBranch = (NULL == TargetChunk) || (CurrChunk->startEA != TargetChunk->startEA);
if (FarBranch) {
this->FarBranchTarget = JumpTarget.addr;
}
if (FarBranch) {
this->SetBranchesToFarChunk();
}
this->SetFarBranchComputed();
return FarBranch;
} // end of SMPInstr::IsBranchToFarChunk()
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetUseSSA(op_t CurrOp, int SSASub) {
return this->Uses.SetSSANum(CurrOp, SSASub);
};
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefSSA(op_t CurrOp, int SSASub) {
return this->Defs.SetSSANum(CurrOp, SSASub);
};
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetUseType(op_t CurrOp, SMPOperandType CurrType) {
return this->Uses.SetType(CurrOp, CurrType, this);
};
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefType(op_t CurrOp, SMPOperandType CurrType) {
return this->Defs.SetType(CurrOp, CurrType, this);
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefMetadata(op_t CurrOp, SMPMetadataType Status) {
return this->Defs.SetMetadata(CurrOp, Status);
};
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefIndWrite(op_t CurrOp, bool IndWriteFlag) {
return this->Defs.SetIndWrite(CurrOp, IndWriteFlag);
};
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetUseNoTruncate(op_t CurrOp, bool NoTruncFlag) {
return this->Uses.SetNoTruncation(CurrOp, NoTruncFlag);
};
clc5q
committed
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefNoOverflow(op_t DefOp, bool NoOverflowFlag) {
return this->Defs.SetNoOverflow(DefOp, NoOverflowFlag);
};
// Analyze the instruction and its operands.
void SMPInstr::Analyze(void) {
bool DebugFlag = false;
if (0x8049b00 == this->address) {
// Setting up breakpoint line.
DebugFlag = true;
}
// Fill cmd structure with disassembly of instr
clc5q
committed
if (!SMPGetCmd(this->address, this->SMPcmd, this->features))
// Record what type of instruction this is, simplified for the needs
// of data flow and type analysis.
clc5q
committed
this->type = DFACategory[this->SMPcmd.itype];
// Record optimization category.
clc5q
committed
this->OptType = OptCategory[this->SMPcmd.itype];
clc5q
committed
if ((NN_int == this->SMPcmd.itype) || (NN_into == this->SMPcmd.itype) || (NN_int3 == this->SMPcmd.itype)) {
this->SetInterrupt();
}
else {
this->ResetInterrupt();
}
// See if instruction is an ASM idiom for clearing a register.
if (NN_xor == this->SMPcmd.itype) {
ushort FirstReg;
if (o_reg == this->SMPcmd.Operands[0].type) {
FirstReg = this->SMPcmd.Operands[0].reg;
if (this->SMPcmd.Operands[1].is_reg(FirstReg))
this->SetRegClearIdiom();
// See if instruction is simple nop or ASM idiom for nop.
if (this->MDIsNop()) {
this->SetNop();
}
// Build the DEF and USE lists for the instruction.
this->FindMemOps();
this->BuildSMPDefUseLists();
// Determine whether the instruction is a jump target by looking
// at its cross references and seeing if it has "TO" code xrefs.
clc5q
committed
SMP_xref_t xrefs;
for (bool ok = xrefs.SMP_first_to(this->address, XREF_FAR); ok; ok = xrefs.SMP_next_to()) {
if ((xrefs.GetFrom() != 0) && (xrefs.GetIscode())) {
this->SetJumpTarget();
break;
}
}
// If instruction is a call or indirect call, see if a call target has been recorded
// by IDA Pro.
if (this->GetDataFlowType() == INDIR_CALL) {
clc5q
committed
for (bool ok = xrefs.SMP_first_from(this->address, XREF_ALL);
ok;
clc5q
committed
ok = xrefs.SMP_next_from()) {
if ((xrefs.GetTo() != 0) && (xrefs.GetIscode())) {
// Found a code target, with its address in CurrXrefs.to
clc5q
committed
if (xrefs.GetTo() == (this->address + this->GetCmd().size)) {
// A call instruction will have two targets: the fall through to the
// next instruction, and the called function. We want to find
// the called function.
continue;
}
// We found a target, not the fall-through.
clc5q
committed
this->CallTarget = xrefs.GetTo();
SMP_msg("Found indirect call target %x at %x\n",
xrefs.GetTo(), this->address);
break;
}
} // end for all code xrefs
if (BADADDR == this->CallTarget) {
clc5q
committed
SMP_msg("WARNING: Did not find indirect call target at %x\n",
this->address);
}
} // end if INDIR_CALL
else if (this->GetDataFlowType() == CALL) {
set<DefOrUse, LessDefUse>::iterator CurrUse;
for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) {
optype_t OpType = CurrUse->GetOp().type;
if ((OpType == o_near) || (OpType == o_far)) {
this->CallTarget = CurrUse->GetOp().addr;
}
}
if (BADADDR == this->CallTarget) {
clc5q
committed
SMP_msg("ERROR: Target not found for direct call at %x\n", this->address);
}
}
if (DebugFlag) {
clc5q
committed
SMP_msg("Analyzed debug instruction at %x\n", this->address);
}
return;
} // end of SMPInstr::Analyze()
// Analyze the floating point NOP marker instruction at the top of the function.
void SMPInstr::AnalyzeMarker(void) {
// Fill member variable SMPcmd structure with disassembly of instr
(void) memset(&(this->SMPcmd), 0, sizeof(this->SMPcmd));
this->SMPcmd.itype = NN_fnop;
this->SMPcmd.size = 1;
this->SMPcmd.ea = this->address;
// Set the instr disassembly text.
DisAsmText.SetMarkerInstText(this->GetAddr());
// Record what type of instruction this is, simplified for the needs
// of data flow and type analysis.
this->type = DFACategory[this->SMPcmd.itype];
// Record optimization category.
this->OptType = OptCategory[this->SMPcmd.itype];
return;
} // end of SMPInstr::AnalyzeMarker()
// Detect oddities of call instructions, such as pseudo-calls that are
// actually jumps within a function
void SMPInstr::AnalyzeCallInst(ea_t FirstFuncAddr, ea_t LastFuncAddr) {
if (BADADDR != this->CallTarget) {
if ((this->CallTarget > FirstFuncAddr)
&& (this->CallTarget <= LastFuncAddr)) {
this->SetCallUsedAsJump();
}
else {
this->ResetCallUsedAsJump();
}
if (this->CallTarget == FirstFuncAddr) {
this->SetDirectRecursiveCall();
}
else {
this->ResetDirectRecursiveCall();
}
if (this->IsCallUsedAsJump())
this->type = JUMP;
}
return;
}
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
sval_t SMPInstr::AnalyzeStackPointerDelta(void) {
sval_t InstDelta;
sval_t TotalDelta = 0;
uint16 InstType = this->SMPcmd.itype;
SMPitype FlowType = this->GetDataFlowType();
InstDelta = StackAlteration[InstType];
if (this->IsCallUsedAsJump()) {
// Call is used within function as a jump. Happens when setting up
// thunk offsets, for example.
; // leave InstDelta equal to negative value from StackAlterationTable[]
}
else if ((CALL == FlowType) || (INDIR_CALL == FlowType)) {
// A real call instruction, which pushes a return address on the stack,
// not a call used as a branch within the function. A return instruction
// will cancel out the stack push that is implicit in the call, so reset
// the delta to zero.
InstDelta = 0;
}
else if (1 == InstDelta) {
// value of 1 is trigger to investigate the RTL for the
// true value, which cannot be found simply by table lookup
// In the special case of an x86 LEAVE instruction, the effect
// on the stack pointer is to deallocate the local frame size,
// plus pop the saved frame pointer into EBP. Helper functions
// need to know whether to look for this special case.
bool IsLeaveInstr = this->MDIsLeaveInstr();
asize_t LocalVarsSize = this->GetBlock()->GetFunc()->GetLocalVarsSize();
InstDelta = this->RTL.TotalStackPointerAlteration(IsLeaveInstr, LocalVarsSize);
}
if ((SMP_STACK_DELTA_ERROR_CODE != InstDelta) && (SMP_STACK_POINTER_BITWISE_AND_CODE != InstDelta)) {
// If no special code, accumulate total delta in InstDelta; else leave code in InstDelta.
TotalDelta += InstDelta;
}
return TotalDelta;
} // end of SMPInstr::AnalyzeStackPointerDelta()
// Find USE-not-DEF operand that is not the flags register.
op_t SMPInstr::GetSourceOnlyOperand(void) {
size_t OpNum;
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
if (this->features & DefMacros[OpNum]) { // DEF
;
}
else if (this->features & UseMacros[OpNum]) { // USE
op_t CurrOp = this->SMPcmd.Operands[OpNum];
if (!(CurrOp.is_reg(X86_FLAGS_REG))) {
return CurrOp;
}
}
}
// It is expected that increment, decrement, and floating point stores
// will not have a USE-only operand. Increment and decrement have an
// operand that is both USEd and DEFed, while the floating point stack
// registers are implicit in most floating point opcodes. Also, exchange
// and exchange-and-add instructions have multiple DEF-and-USE operands.
int TypeGroup = SMPTypeCategory[this->SMPcmd.itype];
if ((TypeGroup != 2) && (TypeGroup != 4) && (TypeGroup != 9) && (TypeGroup != 12)
&& (TypeGroup != 13)) {
clc5q
committed
SMP_msg("ERROR: Could not find source only operand at %x in %s\n",
this->address, DisAsmText.GetDisAsm(this->GetAddr()));
return InitOp;
} // end of SMPInstr::GetSourceOnlyOperand()
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
// Should apparent memory operands be ignored? e.g. lea opcode on x86
bool SMPInstr::MDIgnoreMemOps(void) {
bool leaInst = (NN_lea == this->SMPcmd.itype);
return leaInst;
}
// Find memory DEFs and USEs, store in DEFMemOp and USEMemOp
void SMPInstr::FindMemOps(void) {
size_t OpNum;
if (!(this->MDIgnoreMemOps())) {
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
op_t TempOp = this->SMPcmd.Operands[OpNum];
if ((TempOp.type >= o_mem) && (TempOp.type <= o_displ)) { // memory
if (this->features & DefMacros[OpNum]) { // DEF
if (this->DEFMemOp.type == o_void) { // only save first mem DEF
this->DEFMemOp = TempOp;
}
}
if (this->features & UseMacros[OpNum]) { // USE
if (this->USEMemOp.type == o_void) { // only save first mem USE
this->USEMemOp = TempOp;
}
}
}
} // end for (OpNum = 0; ...)
}
this->SetMemOpsFound();
return;
} // end of SMPInstr::FindMemOps()
// Fill the Defs and Uses private data members.
void SMPInstr::BuildSMPDefUseLists(void) {
size_t OpNum;
bool DebugFlag = (0x8049b00 == this->GetAddr());
bool WidthDoubler = this->MDDoublesWidth();
this->Defs.clear();
this->Uses.clear();
// Start with the Defs.
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
if (this->features & DefMacros[OpNum]) { // DEF
op_t TempOp = this->SMPcmd.Operands[OpNum];
if (WidthDoubler) {
// Opcodes that sign-extend a byte to a word, or a word to a dword,
// have only one operand. It is implicit, and it is the shorter USE.
// That means the DEF will have the same width as the USE, e.g. if
// we are sign-extending AX to EAX, the USE and DEF both be AX without
// a special fix. We fix this problem with the DEF operand now.
if (TempOp.dtyp == dt_byte) {
TempOp.dtyp = dt_word;
TempOp.reg = MDCanonicalizeSubReg(TempOp.reg);
}
else if (TempOp.dtyp == dt_word) {
TempOp.dtyp = dt_dword;
TempOp.reg = MDCanonicalizeSubReg(TempOp.reg);
}
else if (TempOp.dtyp == dt_dword) {
TempOp.dtyp = dt_qword;
}
else {
clc5q
committed
SMP_msg("ERROR: Instruction operand %zu not 1,2, or 4 bytes at %x dtyp: %d\n",
OpNum, this->address, TempOp.dtyp);
}
}
if (MDKnownOperandType(TempOp)) {
clc5q
committed
SMP_msg("DEBUG: Setting DEF for: ");
clc5q
committed
SMP_msg("\n");
if (o_reg == TempOp.type) {
// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
// analysis and type inference systems.
TempOp.reg = MDCanonicalizeSubReg(TempOp.reg);
}
this->Defs.SetRef(TempOp);
}
} // end for (OpNum = 0; ...)
if (this->IsRegClearIdiom()) {
// Something like xor eax,eax clears eax but does not really
// use eax. It is the same as mov eax,0 and we don't want to
// extend the prior def-use chain for eax to this instruction
// by treating the instruction as xor eax,eax. Instead, we
// build the DEF and USE lists and RTL as if it were mov eax,0.
op_t ImmOp = InitOp;
ImmOp.value = 0;
this->Uses.SetRef(ImmOp, NUMERIC);
return;
}
// Now, do the Uses. Uses have special case operations, because
// any memory operand could have register uses in the addressing
// expression, and we must create Uses for those registers. For
// example: mov eax,[ebx + esi*2 + 044Ch]
// This is a two-operand instruction with one def: eax. But
// there are three uses: [ebx + esi*2 + 044Ch], ebx, and esi.
// The first use is an op_t of type o_phrase (memory phrase),
// which can be copied from cmd.Operands[1]. Likewise, we just
// copy cmd.Operands[0] into the defs list. However, we must create
// op_t types for register ebx and register esi and append them
// to the Uses list. This is handled by the machine dependent
// method MDFixupDefUseLists().
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
if (this->features & UseMacros[OpNum]) { // USE
op_t TempOp = this->SMPcmd.Operands[OpNum];
if (MDKnownOperandType(TempOp)) {
clc5q
committed
SMP_msg("DEBUG: Setting USE for: ");
clc5q
committed
SMP_msg("\n");
if (o_reg == TempOp.type) {
// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
// analysis and type inference systems.
TempOp.reg = MDCanonicalizeSubReg(TempOp.reg);
}
this->Uses.SetRef(TempOp);
}
} // end for (OpNum = 0; ...)
return;
} // end of SMPInstr::BuildSMPDefUseLists()
// If DefReg is not already in the DEF list, add a DEF for it.
void SMPInstr::MDAddRegDef(ushort DefReg, bool Shown, SMPOperandType Type) {
op_t TempDef = InitOp;
TempDef.type = o_reg;
TempDef.reg = DefReg;
if (Shown)
TempDef.set_showed();
else
TempDef.clr_showed();
return;
} // end of SMPInstr::MDAddRegDef()
// If UseReg is not already in the USE list, add a USE for it.
void SMPInstr::MDAddRegUse(ushort UseReg, bool Shown, SMPOperandType Type) {
op_t TempUse = InitOp;
TempUse.type = o_reg;
TempUse.reg = UseReg;
if (Shown)
TempUse.set_showed();
else
TempUse.clr_showed();
return;
} // end of SMPInstr::MDAddRegUse()
// Perform machine dependent ad hoc fixes to the def and use lists.
// For example, some multiply and divide instructions in x86 implicitly
// use and/or define register EDX. For memory phrase examples, see comment
// in BuildSMPDefUseLists().
void SMPInstr::MDFixupDefUseLists(void) {
// First, handle the uses hidden in memory addressing modes. Note that we do not
// care whether we are dealing with a memory destination operand or source
// operand, because register USEs, not DEFs, happen within the addressing expressions.
size_t OpNum;
SMPOperandType RefType;
int BaseReg;
int IndexReg;
ushort ScaleFactor;
ea_t displacement;
bool UseFP = true;
bool HasIndexReg = false;
bool leaInst = (NN_lea == this->SMPcmd.itype);
bool DebugFlag = (this->GetAddr() == 0x8086177);
clc5q
committed
SMP_msg("DEBUG: Fixing up DEF-USE lists for debug location\n");
#if SMP_BASEREG_POINTER_TYPE
// Some instructions are analyzed outside of any function or block when fixing up
// the IDB, so we have to assume the block and func pointers might be NULL.
if ((NULL != this->BasicBlock) && (NULL != this->BasicBlock->GetFunc()))
UseFP = this->BasicBlock->GetFunc()->UsesFramePointer();
#endif
if (DebugFlag) {
clc5q
committed
SMP_msg("DEBUG: UseFP = %d\n", UseFP);
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
op_t Opnd = SMPcmd.Operands[OpNum];
if ((Opnd.type == o_phrase) || (Opnd.type == o_displ) || (Opnd.type == o_mem)) {
MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, displacement);
SingleAddressReg = ((0 == displacement)
&& ((R_none == BaseReg) || (R_none == IndexReg)));
if (R_none != IndexReg) {
op_t IndexOpnd = Opnd; // Init to current operand field values
IndexOpnd.type = o_reg; // Change type and reg fields
IndexOpnd.reg = (ushort) IndexReg;
IndexOpnd.hasSIB = 0;
IndexOpnd.set_showed();
// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
// analysis and type inference systems.
IndexOpnd.reg = MDCanonicalizeSubReg(IndexOpnd.reg);
if (0 == ScaleFactor)
this->Uses.SetRef(IndexOpnd);
else { // scaling == shift ==> NUMERIC
HasIndexReg = true;
this->Uses.SetRef(IndexOpnd, NUMERIC);
if (R_none != BaseReg) {
op_t BaseOpnd = Opnd; // Init to current operand field values
BaseOpnd.type = o_reg; // Change type and reg fields
BaseOpnd.reg = (ushort) BaseReg;
BaseOpnd.set_showed();
// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
// analysis and type inference systems.
BaseOpnd.reg = MDCanonicalizeSubReg(BaseOpnd.reg);
RefType = UNINIT;
#if SMP_BASEREG_POINTER_TYPE
// R_sp and R_bp will get type STACKPTR in SMPInstr::SetImmedTypes().
// Other registers used as base registers should get their USEs as
// base registers typed as POINTER, which might get refined later
// to STACKPTR, GLOBALPTR, HEAPPTR, etc.
// NOTE: the NN_lea opcode is often used without a true base register.
// E.g. lea eax,[eax+eax+5] is an x86 idiom for eax:=eax*2+5, which
// could not be done in one instruction without using the addressing
// modes of the machine to do the arithmetic. We don't want to set the
// USE of EAX to POINTER in this case, so we will conservatively skip
// all lea instructions here.
// We cannot be sure that a register is truly a base register unless
// there is also an index register. E.g. with reg+displacement, we
// could have memaddr+indexreg or basereg+offset, depending on what
// the displacement is. The exception is if there is no offset and only
// one addressing register, e.g. mov eax,[ebx].
if (BaseOpnd.is_reg(R_sp) || (UseFP && BaseOpnd.is_reg(R_bp))
|| leaInst || (!HasIndexReg && !SingleAddressReg)) {
#endif
this->Uses.SetRef(BaseOpnd, RefType);
} // end if R_none != BaseReg
} // end if (o_phrase or o_displ operand)
} // end for (all operands)
// The lea (load effective address) instruction looks as if it has
// a memory USE: lea ebx,[edx+esi]
// However, this instruction is really just: ebx := edx+esi
// Now that the above code has inserted the "addressing" registers
// into the USE list, we should remove the "memory USE".
if (leaInst) {
set<DefOrUse, LessDefUse>::iterator CurrUse;
for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) {
op_t UseOp = CurrUse->GetOp();
if ((o_mem <= UseOp.type) && (o_displ >= UseOp.type)) {
this->LeaUSEMemOp = UseOp;
this->EraseUse(CurrUse);
this->USEMemOp = InitOp;
break;
}
}
}
// Next, handle repeat prefices in the instructions. The Intel REPE/REPZ prefix
// is just the text printed for SCAS/CMPS instructions that have a REP prefix.
// Only two distinct prefix codes are actually defined: REP and REPNE/REPNZ, and
// REPNE/REPNZ only applies to SCAS and CMPS instructions.
bool HasRepPrefix = (0 != (this->SMPcmd.auxpref & aux_rep));
bool HasRepnePrefix = (0 != (this->SMPcmd.auxpref & aux_repne));
if (HasRepPrefix && HasRepnePrefix)
clc5q
committed
SMP_msg("REP and REPNE both present at %x %s\n", this->GetAddr(), DisAsmText.GetDisAsm(this->GetAddr()));
if (HasRepPrefix || HasRepnePrefix) {
// All repeating instructions use ECX as the countdown register.
op_t BaseOpnd = InitOp;
BaseOpnd.type = o_reg; // Change type and reg fields
BaseOpnd.reg = R_cx;
BaseOpnd.clr_showed();
this->Defs.SetRef(BaseOpnd, NUMERIC);
this->Uses.SetRef(BaseOpnd, NUMERIC);
if ((this->SMPcmd.itype == NN_cmps) || (this->SMPcmd.itype == NN_scas)
|| (this->SMPcmd.itype == NN_movs) || (this->SMPcmd.itype == NN_stos)) {
// ESI and EDI are USEd and DEFed to point to source and dest strings for CMPS/MOVS.
// Only EDI is involved with SCAS/STOS.
op_t BaseOpnd = InitOp;
BaseOpnd.type = o_reg; // Change type and reg fields
BaseOpnd.clr_showed();
if ((this->SMPcmd.itype == NN_cmps) || (this->SMPcmd.itype == NN_movs)) {
BaseOpnd.reg = R_si;
this->Defs.SetRef(BaseOpnd, POINTER);
this->Uses.SetRef(BaseOpnd, POINTER);
}
BaseOpnd.reg = R_di;
this->Defs.SetRef(BaseOpnd, POINTER);
this->Uses.SetRef(BaseOpnd, POINTER);
// Now, handle special instruction categories that have implicit operands.
if (NN_cmpxchg == this->SMPcmd.itype) {
// x86 Compare and Exchange conditionally sets EAX. We must keep data flow analysis
// sound by declaring that EAX is always a DEF.
this->MDAddRegDef(R_ax, false);
} // end if NN_cmpxchg
else if (this->MDIsPopInstr() || this->MDIsPushInstr() || this->MDIsReturnInstr()) {
// IDA does not include the stack pointer in the DEFs or USEs.
this->MDAddRegDef(R_sp, false);
this->MDAddRegUse(R_sp, false);
// We always reference [esp+0] or [esp-4], so add it to the DEF or USE list.
op_t StackOp = InitOp;
StackOp.type = o_displ;
StackOp.reg = R_sp;
if (this->MDIsPopInstr()) {
StackOp.addr = 0; // [ESP+0]
this->Uses.SetRef(StackOp); // USE
}
else {
StackOp.addr = (ea_t) -4; // [ESP-4]
this->Defs.SetRef(StackOp); // DEF
}
#if SMP_CALL_TRASHES_REGS
else if ((this->type == CALL) || (this->type == INDIR_CALL)) {
// We want to add the caller-saved registers to the USEs and DEFs lists
this->MDAddRegDef(R_ax, false);
this->MDAddRegDef(R_cx, false);
this->MDAddRegDef(R_dx, false);
this->MDAddRegUse(R_ax, false);
this->MDAddRegUse(R_cx, false);
this->MDAddRegUse(R_dx, false);
#if 1
if (this->MDIsInterruptCall()) {
#endif
this->MDAddRegDef(R_bx, false);
this->MDAddRegUse(R_bx, false);
this->MDAddRegDef(R_si, false);
this->MDAddRegUse(R_si, false);
}
#endif
else if (this->MDIsEnterInstr() || this->MDIsLeaveInstr()) {
// Entire function prologue or epilogue microcoded.
this->MDAddRegDef(R_sp, false);
this->MDAddRegUse(R_sp, false);
this->MDAddRegDef(R_bp, false);
this->MDAddRegUse(R_bp, false);
else if ((this->SMPcmd.itype == NN_maskmovq)
|| (this->SMPcmd.itype == NN_maskmovdqu)) {
else if (8 == this->GetOptType()) {
// This category implicitly writes to EDX:EAX.
this->MDAddRegDef(R_dx, false);
this->MDAddRegDef(R_ax, false);
} // end else if (8 == GetOptType)
else if (7 == this->GetOptType()) {
// Category 7 instructions sometimes write implicitly to EDX:EAX or DX:AX.
// DX is the same as EDX to IDA Pro (and SMP); ditto for EAX and AX.
// DIV, IDIV, and MUL all have hidden EAX or AX operands (hidden in the IDA Pro
// sense, because they are not displayed in the disassembly text). For example:
// mul ebx means EDX:EAX <-- EAX*EBX, and mul bx means DX:AX <-- AX*BX. If the
// source operand is only 8 bits wide, there is room to hold the result in AX
// without using DX: mul bl means AX <-- AL*BL.
// IMUL has forms with a hidden EAX or AX operand and forms with no implicit
// operands: imul ebx means EDX:EAX <-- EAX*EBX, but imul ebx,edx means that
// EBX*EDX gets truncated and the result placed in EBX (no hidden operands).
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
op_t TempUse = this->SMPcmd.Operands[OpNum];
if (!TempUse.showed()) { // hidden operand
if (TempUse.is_reg(R_ax)) { // not R_al, so it is not 8 bits
if ((NN_div == this->SMPcmd.itype) || (NN_idiv == this->SMPcmd.itype)) {
this->MDAddRegUse(R_dx, false);
}
this->MDAddRegDef(R_ax, false);
this->MDAddRegDef(R_dx, false);
}
}
}
} // end else if (7 == OptType)
#if 0
// The floating point instructions in type categories 14 and 15 often USE and DEF
// the floating point register stack, e.g. pushing a value onto that stack is a
// massive copy downward of stack locations. We don't really care about the USE of
// the stack if the value being pushed came from elsewhere than the stack. For example,
// an "fld" opcode pushes its source onto the stack. We build RTLs with a simple
// move structure, but the RTL building can be fooled by seeing two "source" operands
// in the USE list.
if ((14 == SMPTypeCategory[this->SMPcmd.itype])
|| (15 == SMPTypeCategory[this->SMPcmd.itype])) {
}
#endif
clc5q
committed
#if 0 // Not true for LOOP instructions that use only the ECX counter register.
if (this->type == COND_BRANCH) {
assert(SMPUsesFlags[this->SMPcmd.itype]);
}
clc5q
committed
#endif
// The return value register EAX is not quite like a caller-save or callee-save
// register (technically, it is caller-save). Within a callee, it might appear
// that EAX has become dead by the time a return instruction is reached, but
// the USE that would make it not dead is in the caller. To prevent type inference
// from mistakenly thinking that all USEs of EAX have been seen in the callee,
// we add EAX to the USE list for all return instructions, as well as for all
// tail calls, which are essentially returns in terms of data flow analysis.
// This USE of EAX will always be of type UNINIT unless its DEF has a known type
// that propagates to it. Thus, it will prevent an invalid back inference of the
// DEF type from "all" USE types that are visible in the callee; even if they
// were all NUMERIC, this return USE will be UNINIT and inhibit the invalid
// type inference. EAX could be loaded with a pointer from memory, for example,
// and USEd only in a comparison instruction, making it falsely appear to be
// a NUMERIC, without this extra USE at the return instruction.
// Because some of the library functions pass values around in EBX, EDI, etc.,
// we will add these general purpose registers to the USE list for returns
// in order to prevent erroneous analyses of dead registers or unused
// metadata.
if ((this->type == RETURN) || this->IsTailCall()) {
this->MDAddRegUse(R_ax, false);
this->MDAddRegUse(R_bx, false);
this->MDAddRegUse(R_cx, false);
this->MDAddRegUse(R_dx, false);
if (!UseFP)
this->MDAddRegUse(R_bp, false);
this->MDAddRegUse(R_si, false);
this->MDAddRegUse(R_di, false);
}
clc5q
committed
// Next, add the flags register to the DEFs and USEs for those instructions that
// are marked as defining or using flags.
if (!this->IsDefsFlags() && SMPDefsFlags[this->SMPcmd.itype]) {
this->MDAddRegDef(X86_FLAGS_REG, false);
this->SetDefsFlags();
if (!this->IsUsesFlags() && SMPUsesFlags[this->SMPcmd.itype]) {
this->MDAddRegUse(X86_FLAGS_REG, false);
this->SetUsesFlags();
}
if (this->IsNop()) {
// Clear the DEFs and USEs for no-ops.
// These include machine idioms for no-ops, e.g. mov esi,esi
// or xchg ax,ax or lea esi,[esi].
this->Defs.clear();
this->Uses.clear();
this->MoveSource = InitOp;
this->OptType = 1;
}
#endif
clc5q
committed
SMP_msg("DEBUG after MDFixupDefUseLists:\n");
return;
} // end of SMPInstr::MDFixupDefUseLists()
// If we can definitely identify which part of the addressing expression
// used in MemOp is the POINTER type, and it is not a STACKPTR or GLOBALPTR
// immediate, set the USE type for that register to POINTER and return true.
// If we can find definite NUMERIC addressing registers that are not already
// typed as NUMERIC, set their USE types to NUMERIC and return true.
bool SMPInstr::MDFindPointerUse(op_t MemOp, bool UseFP) {
bool changed = false;
int BaseReg;
int IndexReg;
op_t BaseOp = InitOp;
op_t IndexOp = InitOp;
SMPOperandType BaseType = UNKNOWN;
SMPOperandType IndexType = UNKNOWN;
ushort ScaleFactor;
ea_t offset;
set<DefOrUse, LessDefUse>::iterator BaseIter;
set<DefOrUse, LessDefUse>::iterator IndexIter;
if (NN_lea == this->SMPcmd.itype)
return false; // lea instruction really has no memory operands
if (NN_fnop == this->SMPcmd.itype)
return false; // SSA marker instruction
MDExtractAddressFields(MemOp, BaseReg, IndexReg, ScaleFactor, offset);
if (R_none != IndexReg) {
IndexOp.type = o_reg;
IndexOp.reg = MDCanonicalizeSubReg((ushort) IndexReg);
IndexIter = this->FindUse(IndexOp);
assert(IndexIter != this->GetLastUse());
IndexType = IndexIter->GetType();
}
if (R_none != BaseReg) {
BaseOp.type = o_reg;
BaseOp.reg = MDCanonicalizeSubReg((ushort) BaseReg);
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
BaseIter = this->FindUse(BaseOp);
assert(BaseIter != this->GetLastUse());
BaseType = BaseIter->GetType();
}
if ((R_sp == BaseReg) || (UseFP && (R_bp == BaseReg))) {
if ((R_none != IndexReg) && (!IsNumeric(IndexType))) {
// We have an indexed access into the stack frame.
// Set IndexReg USE type to NUMERIC.
changed = true;
IndexIter = this->SetUseType(IndexOp, NUMERIC);
assert(IndexIter != this->GetLastUse());
}
return changed; // stack accesses will get STACKPTR type in SetImmedTypes()
}
if ((R_sp == IndexReg) || (UseFP && (R_bp == IndexReg))) {
if ((R_none != BaseReg) && (!IsNumeric(BaseType))) {
// We have an indexed access into the stack frame.
// Set BaseReg USE type to NUMERIC.
// Note that BaseReg is really an IndexReg and vice versa.
changed = true;
BaseIter = this->SetUseType(BaseOp, NUMERIC);
assert(BaseIter != this->GetLastUse());
clc5q
committed
SMP_msg("WARNING: BaseReg is index, IndexReg is base: %s\n",
DisAsmText.GetDisAsm(this->GetAddr()));
}
return changed; // stack accesses will get STACKPTR type in SetImmedTypes()
}
if (IsImmedGlobalAddress(offset)) {
if ((R_none != IndexReg) && (!IsNumeric(IndexType))) {
// We have an indexed access into a global.
// Set IndexReg USE type to NUMERIC.
changed = true;
IndexIter = this->SetUseType(IndexOp, NUMERIC);
assert(IndexIter != this->GetLastUse());
}