Newer
Older
//
// SMPInstr.cpp
//
// This module performs the instruction level analyses needed for the
// SMP project (Software Memory Protection).
//
#include <cstring>
#include <pro.h>
#include <assert.h>
#include <ida.hpp>
#include <idp.hpp>
#include <allins.hpp>
#include <auto.hpp>
#include <bytes.hpp>
#include <funcs.hpp>
#include <intel.hpp>
#include <loader.hpp>
#include <lines.hpp>
#include <name.hpp>
#include "SMPStaticAnalyzer.h"
#include "SMPDataFlowAnalysis.h"
#include "SMPInstr.h"
#include "SMPProgram.h"
// Set to 1 for debugging output
#define SMP_DEBUG 1
#define SMP_DEBUG2 0 // verbose
#define SMP_DEBUG_XOR 0
#define SMP_DEBUG_BUILD_RTL 1 // should be left on, serious errors!
#define SMP_VERBOSE_DEBUG_BUILD_RTL 0
#define SMP_VERBOSE_DEBUG_BUILD_RTL_DEF_USE 0
#define SMP_VERBOSE_DEBUG_INFER_TYPES 0
#define SMP_CALL_TRASHES_REGS 1 // Add DEFs of caller-saved regs to CALL instructions
#define SMP_BASEREG_POINTER_TYPE 1 // Initialize Base Register USEs to type POINTER?
// Make the CF_CHG1 .. CF_CHG6 and CF_USE1..CF_USE6 macros more usable
// by allowing us to pick them up with an array index.
static ulong DefMacros[UA_MAXOP] = {CF_CHG1, CF_CHG2, CF_CHG3, CF_CHG4, CF_CHG5, CF_CHG6};
static ulong UseMacros[UA_MAXOP] = {CF_USE1, CF_USE2, CF_USE3, CF_USE4, CF_USE5, CF_USE6};
// Text to be printed in each optimizing annotation explaining why
// the annotation was emitted.
static char *OptExplanation[LAST_TYPE_CATEGORY + 1] =
{ "NoOpt", "NoMetaUpdate", "AlwaysNUM", "NUMVia2ndSrcIMMEDNUM",
"Always1stSrc", "1stSrcVia2ndSrcIMMEDNUM", "AlwaysPtr",
"AlwaysNUM", "AlwaysNUM", "NUMViaFPRegDest", "NumericSources",
"StackMemoryTracking", "NumericSources", "NumericMemDest",
"NeverMemDest", "SafeIfNoIndexing"
static char *OperatorText[LAST_SMP_OPERATOR + 1] =
{ "SMP_NULL_OPERATOR", "SMP_CALL", "SMP_INPUT", "SMP_OUTPUT", "SMP_ADDRESS_OF",
"SMP_U_LEFT_SHIFT", "SMP_S_LEFT_SHIFT", "SMP_U_RIGHT_SHIFT", "SMP_S_RIGHT_SHIFT",
"SMP_ROTATE_LEFT", "SMP_ROTATE_LEFT_CARRY", "SMP_ROTATE_RIGHT", "SMP_ROTATE_RIGHT_CARRY",
"SMP_DECREMENT", "SMP_INCREMENT",
"SMP_ADD", "SMP_ADD_CARRY", "SMP_SUBTRACT", "SMP_SUBTRACT_BORROW", "SMP_U_MULTIPLY",
"SMP_S_MULTIPLY", "SMP_U_DIVIDE", "SMP_S_DIVIDE", "SMP_U_REMAINDER",
"SMP_SIGN_EXTEND", "SMP_ZERO_EXTEND", "SMP_ASSIGN", "SMP_BITWISE_AND",
"SMP_BITWISE_OR", "SMP_BITWISE_NOT", "SMP_BITWISE_XOR", "SMP_NEGATE",
"SMP_S_COMPARE", "SMP_U_COMPARE", "SMP_LESS_THAN", "SMP_GREATER_THAN",
"SMP_LESS_EQUAL", "SMP_GREATER_EQUAL", "SMP_EQUAL", "SMP_NOT_EQUAL",
"SMP_LOGICAL_AND", "SMP_LOGICAL_OR", "SMP_UNARY_NUMERIC_OPERATION",
"SMP_BINARY_NUMERIC_OPERATION", "SMP_SYSTEM_OPERATION",
"SMP_UNARY_FLOATING_ARITHMETIC", "SMP_BINARY_FLOATING_ARITHMETIC"
};
// *****************************************************************
// Class SMPGuard
// *****************************************************************
// Constructor
SMPGuard::SMPGuard(void) {
this->LeftOperand.type = o_void;
this->RightOperand.type = o_void;
this->GuardOp = SMP_NULL_OPERATOR;
// Debug print
void SMPGuard::Dump(void) {
msg("GUARD: ");
PrintOperand(this->LeftOperand);
msg(" %s ", OperatorText[this->GuardOp]);
PrintOperand(this->RightOperand);
msg(":");
return;
} // end of SMPGuard::Dump()
// *****************************************************************
// Class SMPRegTransfer
// *****************************************************************
// Constructor
SMPRegTransfer::SMPRegTransfer(void) {
this->Guard = NULL;
this->LeftOperand.type = o_void;
this->RightOperand.type = o_void;
this->RTop.oper = SMP_NULL_OPERATOR;
this->RTop.NonSpeculativeType = UNINIT;
this->RightSubTree = false;
this->RightRT = NULL;
return;
}
// Destructor
SMPRegTransfer::~SMPRegTransfer() {
#if 0
msg("Destroying SMPRegTransfer.\n");
#endif
if (NULL != this->RightRT)
delete this->RightRT;
if (NULL != this->Guard)
delete this->Guard;
return;
}
// Debug print
void SMPRegTransfer::Dump(void) {
if (NULL != this->Guard)
this->Guard->Dump();
// Left operand
if (o_void != this->LeftOperand.type)
PrintOperand(this->LeftOperand);
// Then the operator
msg(" %s ", OperatorText[this->GetOperator()]);
// then the right operand or subtree
if (this->HasRightSubTree())
this->GetRightTree()->Dump();
else if (o_void != this->RightOperand.type)
PrintOperand(this->RightOperand);
return;
}
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
// *****************************************************************
// Class SMPRTL
// *****************************************************************
// Constructor
SMPRTL::SMPRTL() {
this->ExtraKills.clear();
this->RTCount = 0;
return;
}
// Destructor
SMPRTL::~SMPRTL() {
for (size_t index = 0; index < this->RTCount; ++index) {
delete (this->RTvector[index]);
}
this->ExtraKills.clear();
return;
}
// Get methods
SMPRegTransfer *SMPRTL::GetRT(size_t index) {
if (index > this->RTCount)
return NULL;
else
return this->RTvector[index];
}
// Set methods
void SMPRTL::push_back(SMPRegTransfer *NewEffect) {
assert(SMP_RT_LIMIT > this->RTCount);
this->RTvector[this->RTCount] = NewEffect;
++(this->RTCount);
return;
}
// Printing methods
void SMPRTL::Dump(void) {
size_t index;
if (0 < this->RTCount) {
msg("RTL: ");
for (index = 0; index < this->RTCount; ++index) {
this->RTvector[index]->Dump();
}
for (index = 0; index < this->ExtraKills.size(); ++index) {
msg(" KILL: ");
PrintOperand(this->ExtraKills.at(index));
}
msg("\n");
}
return;
} // end of SMPRTL::Dump()
// *****************************************************************
// Class SMPInstr
// *****************************************************************
// Constructor for instruction.
SMPInstr::SMPInstr(ea_t addr) {
this->address = addr;
this->analyzed = false;
this->JumpTarget = false;
this->BlockTerm = false;
this->CondTailCall = false;
this->Interrupt = false;
this->DeadRegsString[0] = '\0';
this->DefsFlags = false;
this->UsesFlags = false;
this->AddSubSourceType = UNINIT;
this->TypeInferenceComplete = false;
clc5q
committed
this->CategoryInferenceComplete = false;
this->BasicBlock = NULL;
return;
}
// Is the instruction the type that terminates a basic block?
bool SMPInstr::IsBasicBlockTerminator() const {
return ((type == JUMP) || (type == COND_BRANCH)
|| (type == INDIR_JUMP) || (type == RETURN));
}
// Is the destination operand a memory reference?
bool SMPInstr::HasDestMemoryOperand(void) {
op_t Opnd;
for (int i = 0; i < UA_MAXOP; ++i) {
Opnd = SMPcmd.Operands[i];
optype_t CurrType = Opnd.type;
if (this->features & DefMacros[i]) { // DEF
MemDest = ((CurrType == o_mem) || (CurrType == o_phrase) || (CurrType == o_displ));
if (MemDest)
break;
}
}
return MemDest;
} // end of SMPInstr::HasDestMemoryOperand()
// Is a source operand a memory reference?
bool SMPInstr::HasSourceMemoryOperand(void) {
// NN_lea looks like it has a memory source, but it does not.
if (NN_lea == this->SMPcmd.itype)
return false;
for (int i = 0; i < UA_MAXOP; ++i) {
Opnd = SMPcmd.Operands[i];
optype_t CurrType = Opnd.type;
if (this->features & UseMacros[i]) { // USE
MemSrc = ((CurrType == o_mem) || (CurrType == o_phrase) || (CurrType == o_displ));
if (MemSrc)
break;
}
}
return MemSrc;
} // end of SMPInstr::HasSourceMemoryOperand()
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
// Get the first memory operand in the DEF list.
op_t SMPInstr::MDGetMemDefOp(void) {
set<DefOrUse, LessDefUse>::iterator DefIter;
op_t MemOp;
MemOp.type = o_void;
for (DefIter = this->GetFirstDef(); DefIter != this->GetLastDef(); ++DefIter) {
optype_t DefType = DefIter->GetOp().type;
if ((DefType >= o_mem) && (DefType <= o_displ)) {
MemOp = DefIter->GetOp();
break;
}
}
return MemOp;
} // end of SMPInstr::MDGetMemDefOp()
// Get the first memory operand in the USE list.
op_t SMPInstr::MDGetMemUseOp(void) {
set<DefOrUse, LessDefUse>::iterator UseIter;
op_t MemOp;
MemOp.type = o_void;
for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) {
optype_t UseType = UseIter->GetOp().type;
if ((UseType >= o_mem) && (UseType <= o_displ)) {
MemOp = UseIter->GetOp();
break;
}
}
return MemOp;
} // end of SMPInstr::MDGetMemUseOp()
// Does the instruction whose flags are in F have a numeric type
// as the second source operand?
// NOTE: We can only analyze immediate values now. When data flow analyses are implemented,
// we will be able to analyze many non-immediate operands.
#define IMMEDNUM_LOWER -8191
#define IMMEDNUM_UPPER 8191
bool SMPInstr::IsSecondSrcOperandNumeric(flags_t F) const {
bool SecondOpImm = (SMPcmd.Operands[1].type == o_imm);
uval_t TempImm;
TempImm = SMPcmd.Operands[1].value;
return (SecondOpImm && IsImmedNumeric(TempImm));
} // end of SMPInstr::IsSecondSrcOperandNumeric()
// Determine the type of the USE-only operand for add and subtract
// instructions. If it is NUMERIC or PROF_NUMERIC, an optimizing
// annotation will result.
void SMPInstr::SetAddSubSourceType(void) {
set<DefOrUse, LessDefUse>::iterator UseIter, DefIter;
bool MemSrc = this->HasSourceMemoryOperand();
#if 0
// First, ensure that we are dealing with a register source.
if (MemSrc) {
this->AddSubSourceType = UNINIT;
return;
}
#endif
// The USE and DEF lists will have the flags and the destination
// operand in common for register adds and subtracts. The USE-only
// operand is the one we are concerned with.
for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) {
op_t UseOp = UseIter->GetOp();
if (MemSrc) {
if ((UseOp.type >= o_mem) && (UseOp.type <= o_displ)) {
// Found the memory USE.
this->AddSubSourceType = UseIter->GetType();
#if SMP_VERBOSE_DEBUG_INFER_TYPES
msg("Set AddSubSourceType to %d at %x: %s\n", UseIter->GetType(),
this->address, this->GetDisasm());
#endif
break;
}
}
else if (this->GetLastDef() == this->FindDef(UseOp)) {
// Found the USE that is not a DEF
this->AddSubSourceType = UseIter->GetType();
#if SMP_VERBOSE_DEBUG_INFER_TYPES
msg("Set AddSubSourceType to %d at %x: %s\n", UseIter->GetType(),
this->address, this->GetDisasm());
#endif
break;
}
}
return;
} // end of SMPInstr::SetAddSubSourceType()
// Are all DEFs in the DEF set NUMERIC type?
bool SMPInstr::AllDefsNumeric(void) {
bool AllNumeric = (this->Defs.GetSize() > 0); // false if no DEFs, true otherwise
set<DefOrUse, LessDefUse>::iterator CurrDef;
for (CurrDef = this->GetFirstDef(); CurrDef != this->GetLastDef(); ++CurrDef) {
clc5q
committed
AllNumeric = (AllNumeric && IsNumeric(CurrDef->GetType()));
}
return AllNumeric;
} // end of SMPInstr::AllDefsNumeric()
// Do all DEFs have DEF_METADATA_UNUSED status?
bool SMPInstr::AllDefMetadataUnused(void) {
bool AllUnused = (this->Defs.GetSize() > 0); // false if no DEFs, true otherwise
set<DefOrUse, LessDefUse>::iterator CurrDef;
for (CurrDef = this->GetFirstDef(); CurrDef != this->GetLastDef(); ++CurrDef) {
AllUnused = (AllUnused
&& (DEF_METADATA_UNUSED == CurrDef->GetMetadataStatus()));
}
return AllUnused;
} // end of SMPInstr::AllDefMetadataUnused()
// DEBUG print operands for Inst.
void SMPInstr::PrintOperands(void) const {
op_t Opnd;
for (int i = 0; i < UA_MAXOP; ++i) {
Opnd = SMPcmd.Operands[i];
PrintOneOperand(Opnd, this->features, i);
}
msg(" \n");
return;
} // end of SMPInstr::PrintOperands()
// Complete DEBUG printing.
void SMPInstr::Dump(void) {
msg("%x %d SMPitype: %d %s\n", this->address, this->SMPcmd.size, (int) this->type,
this->GetDisasm());
msg("USEs: ");
this->Uses.Dump();
msg("DEFs: ");
this->Defs.Dump();
this->RTL.Dump();
#if SMP_VERBOSE_DUMP
this->PrintOperands();
#endif
return;
} // end of SMPInstr::Dump()
// Print out the destination operand list for the instruction, given
// the OptCategory for the instruction as a hint.
char * SMPInstr::DestString(int OptType) {
static char DestList[MAXSTR];
DestList[0] = 'Z'; // Make sure there are no leftovers from last call
DestList[1] = 'Z';
DestList[2] = '\0';
set<DefOrUse, LessDefUse>::iterator CurrDef;
for (CurrDef = this->GetFirstDef(); CurrDef != this->GetLastDef(); ++CurrDef) {
op_t DefOpnd = CurrDef->GetOp();
if (DefOpnd.is_reg(X86_FLAGS_REG)) // don't print flags as a destination
continue;
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
if (o_reg == DefOpnd.type) {
ushort DestReg = DefOpnd.reg;
if (0 == RegDestCount) {
qstrncpy(DestList, RegNames[DestReg], 1 + strlen(RegNames[DestReg]));
}
else {
qstrncat(DestList, " ", MAXSTR);
qstrncat(DestList, RegNames[DestReg], MAXSTR);
}
++RegDestCount;
}
}
if (0 >= RegDestCount) {
msg("WARNING: No destination registers: %s\n", this->GetDisasm());
}
else {
qstrncat(DestList, " ZZ ", MAXSTR);
}
return DestList;
} // end of SMPInstr::DestString()
// Equality operator for SMPInstr. Key field is address.
int SMPInstr::operator==(const SMPInstr &rhs) const {
if (this->address != rhs.GetAddr())
return 0;
else
return 1;
}
// Inequality operator for SMPInstr. Key field is address.
int SMPInstr::operator!=(const SMPInstr &rhs) const {
return (this->address != rhs.GetAddr());
}
// Less than operator for sorting SMPInstr lists. Key field is address.
int SMPInstr::operator<(const SMPInstr &rhs) const {
return (this->address < rhs.GetAddr());
}
// Less than or equal operator for sorting SMPInstr lists. Key field is address.
int SMPInstr::operator<=(const SMPInstr &rhs) const {
return (this->address <= rhs.GetAddr());
}
#define MD_FIRST_ENTER_INSTR NN_enterw
#define MD_LAST_ENTER_INSTR NN_enterq
// Is this instruction the one that allocates space on the
// stack for the local variables?
bool SMPInstr::MDIsFrameAllocInstr(void) {
// The frame allocating instruction should look like:
// sub esp,48 or add esp,-64 etc.
op_t ESPOp;
ESPOp.type = o_reg;
ESPOp.reg = R_sp;
if ((SMPcmd.itype == NN_sub) || (SMPcmd.itype == NN_add)) {
if (this->GetLastDef() != this->Defs.FindRef(ESPOp)) {
// We know that an addition or subtraction is being
// performed on the stack pointer. This should not be
// possible within the prologue except at the stack
// frame allocation instruction, so return true. We
// could be more robust in this analysis in the future. **!!**
// CAUTION: If a compiler allocates 64 bytes for locals
// and 16 bytes for outgoing arguments in a single
// instruction: sub esp,80
// you cannot insist on finding sub esp,LocSize
// To make this more robust, we are going to insist that
// an allocation of stack space is either performed by
// adding a negative immediate value, or by subtracting
// a positive immediate value. We will throw in, free of
// charge, a subtraction of a register, which is how alloca()
// usually allocates stack space.
// PHASE ORDERING: Should we use the Operands[] instead of the USE list? **!!**
set<DefOrUse, LessDefUse>::iterator CurrUse;
for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) {
if (o_imm == CurrUse->GetOp().type) {
signed long TempImm = (signed long) CurrUse->GetOp().value;
if (((0 > TempImm) && (SMPcmd.itype == NN_add))
|| ((0 < TempImm) && (SMPcmd.itype == NN_sub))) {
return true;
}
}
else if ((o_reg == CurrUse->GetOp().type)
&& (!CurrUse->GetOp().is_reg(R_sp)) // skip the ESP operand
&& (SMPcmd.itype == NN_sub)) { // sub esp,reg: alloca() ?
return true;
}
}
}
}
else if ((SMPcmd.itype >= MD_FIRST_ENTER_INSTR) && (SMPcmd.itype <= MD_LAST_ENTER_INSTR)) {
return true;
}
return false;
} // end of SMPInstr::MDIsFrameAllocInstr()
#define MD_FIRST_LEAVE_INSTR NN_leavew
#define MD_LAST_LEAVE_INSTR NN_leaveq
// Is this instruction in the epilogue the one that deallocates the local
// vars region of the stack frame?
bool SMPInstr::MDIsFrameDeallocInstr(bool UseFP, asize_t LocalVarsSize) {
// The usual compiler idiom for the prologue on x86 is to
// deallocate the local var space with: mov esp,ebp
// It could be add esp,constant. We can be tricked by
// add esp,constant when the constant is just the stack
// adjustment after a call. We will have to insist that
// the immediate operand have at least the value of
// LocalVarsSize for this second form, and that UseFP be true
// for the first form.
set<DefOrUse, LessDefUse>::iterator FirstDef = this->GetFirstDef();
set<DefOrUse, LessDefUse>::iterator FirstUse = this->GetFirstUse();
if ((SMPcmd.itype >= MD_FIRST_LEAVE_INSTR) && (SMPcmd.itype <= MD_LAST_LEAVE_INSTR))
return true;
else if (this->HasDestMemoryOperand() || this->HasSourceMemoryOperand()) {
// Don't get fooled by USE or DEF entries of EBP or ESP that come
// from memory operands, e.g. mov eax,[ebp-20]
return false;
}
else if (UseFP && (this->SMPcmd.itype == NN_mov)
&& (FirstDef->GetOp().is_reg(R_sp))
&& (FirstUse->GetOp().is_reg(R_bp)))
return true;
else if ((this->SMPcmd.itype == NN_add)
&& (FirstDef->GetOp().is_reg(R_sp))) {
set<DefOrUse, LessDefUse>::iterator SecondUse = ++FirstUse;
if (SecondUse == this->Uses.GetLastRef())
return false; // no more USEs ... strange for ADD instruction
if (SecondUse->GetOp().is_imm((uval_t) LocalVarsSize))
return true;
else if (SecondUse->GetOp().type == o_imm) {
signed long TempImm = (signed long) this->SMPcmd.Operands[1].value;
if (0 > TempImm) // adding a negative to ESP; alloc, not dealloc
return false;
else {
msg("Used imprecise LocalVarsSize to find dealloc instr.\n");
return true;
}
}
else
return false;
}
else
return false;
} // end of SMPInstr::MDIsFrameDeallocInstr()
// Is instruction a no-op? There are 1-byte, 2-byte, etc., versions of no-ops.
bool SMPInstr::MDIsNop(void) const {
bool IsNop = false;
ushort opcode = this->SMPcmd.itype;
IsNop = true;
else if (NN_mov == opcode) {
if ((o_reg == this->SMPcmd.Operands[0].type)
&& this->SMPcmd.Operands[1].is_reg(this->SMPcmd.Operands[0].reg)) {
// We have a register to register move with source == destination.
IsNop = true;
}
}
else if (NN_lea == opcode) {
if ((o_reg == this->SMPcmd.Operands[0].type)
&& (o_displ == this->SMPcmd.Operands[1].type)
&& (0 == this->SMPcmd.Operands[1].addr)) {
// We are looking for 6-byte no-ops like lea esi,[esi+0]
ushort destreg = this->SMPcmd.Operands[0].reg;
if ((this->SMPcmd.Operands[1].hasSIB)
&& (destreg == (ushort) sib_base(this->SMPcmd.Operands[1]))
&& (R_sp == sib_index(this->SMPcmd.Operands[1]))) {
// R_sp signifies no SIB index register. So, we have
// lea reg,[reg+0] with reg being the same in both place,
// once as Operands[0] and once as the base reg in Operands[1].
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
IsNop = true;
}
else if (destreg == this->SMPcmd.Operands[1].reg) {
IsNop = true;
}
}
}
return IsNop;
} // end of SMPInstr::MDIsNop()
// MACHINE DEPENDENT: Is instruction a return instruction?
bool SMPInstr::MDIsReturnInstr(void) const {
return ((SMPcmd.itype == NN_retn) || (SMPcmd.itype == NN_retf));
}
// MACHINE DEPENDENT: Is instruction a POP instruction?
#define FIRST_POP_INST NN_pop
#define LAST_POP_INST NN_popfq
bool SMPInstr::MDIsPopInstr(void) const {
return ((SMPcmd.itype >= FIRST_POP_INST)
&& (SMPcmd.itype <= LAST_POP_INST));
}
// MACHINE DEPENDENT: Is instruction a PUSH instruction?
#define FIRST_PUSH_INST NN_push
#define LAST_PUSH_INST NN_pushfq
bool SMPInstr::MDIsPushInstr(void) const {
return ((SMPcmd.itype >= FIRST_PUSH_INST)
&& (SMPcmd.itype <= LAST_PUSH_INST));
}
// MACHINE DEPENDENT: Is instruction an ENTER instruction?
bool SMPInstr::MDIsEnterInstr(void) const {
return ((SMPcmd.itype >= MD_FIRST_ENTER_INSTR)
&& (SMPcmd.itype <= MD_LAST_ENTER_INSTR));
}
// MACHINE DEPENDENT: Is instruction a LEAVE instruction?
bool SMPInstr::MDIsLeaveInstr(void) const {
return ((SMPcmd.itype >= MD_FIRST_LEAVE_INSTR)
&& (SMPcmd.itype <= MD_LAST_LEAVE_INSTR));
#define MD_FIRST_COND_MOVE_INSTR NN_cmova
#define MD_LAST_COND_MOVE_INSTR NN_fcmovnu
// MACHINE DEPENDENT: Is instruction a conditional move?
bool SMPInstr::MDIsConditionalMoveInstr(void) const {
return ((SMPcmd.itype >= MD_FIRST_COND_MOVE_INSTR)
&& (SMPcmd.itype <= MD_LAST_COND_MOVE_INSTR));
}
// MACHINE DEPENDENT: Does instruction use a callee-saved register?
bool SMPInstr::MDUsesCalleeSavedReg(void) {
set<DefOrUse, LessDefUse>::iterator CurrUse;
for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) {
op_t CurrOp = CurrUse->GetOp();
if (CurrOp.is_reg(R_bp) || CurrOp.is_reg(R_si)
|| CurrOp.is_reg(R_di) || CurrOp.is_reg(R_bx)) {
return true;
}
}
return false;
} // end of SMPInstr::MDUsesCalleeSavedReg()
// Is the instruction a register to register copy of a stack pointer or frame pointer
// into a general purpose register (which mmStrata will now need to track as a stack
// relative pointer)?
bool SMPInstr::MDIsStackPointerCopy(bool UseFP) {
// OptType 3 indicates a move instruction
if ((this->OptType == 3) && (this->GetFirstDef()->GetOp().type == o_reg)
&& (!(this->GetFirstDef()->GetOp().is_reg(R_sp)))
&& (!(this->HasSourceMemoryOperand()))) { // reg to reg move
if (this->GetFirstUse()->GetOp().is_reg(R_bp))
// Move of base pointer EBP into a general register
return true;
else if ((this->GetFirstUse()->GetOp().is_reg(R_sp))
&& !(this->GetFirstDef()->GetOp().is_reg(R_bp)))
// Move of ESP into something besides a base pointer
return true;
}
else if (this->GetFirstUse()->GetOp().is_reg(R_sp)) {
// Move of ESP into a register; no base pointer used in this function
return true;
}
}
return false;
} // end of SMPInstr::MDIsStackPointerCopy()
// Is instruction a branch (conditional or unconditional) to a
// code target that is not in the current chunk?
bool SMPInstr::IsBranchToFarChunk(void) {
func_t *CurrChunk = get_fchunk(this->address);
bool FarBranch = false;
if ((JUMP | COND_BRANCH) & this->GetDataFlowType()) {
// Instruction is a direct branch, conditional or unconditional
if (this->NumUses() > 0) {
set<DefOrUse, LessDefUse>::iterator CurrUse;
for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) {
op_t JumpTarget = CurrUse->GetOp();
if ((o_near == JumpTarget.type) || (o_far == JumpTarget.type)) {
// Branches to a code address
func_t *TargetChunk = get_fchunk(JumpTarget.addr);
// Is target address within the same chunk as the branch?
FarBranch = (NULL == TargetChunk) || (CurrChunk->startEA != TargetChunk->startEA);
}
}
}
}
return FarBranch;
} // end of SMPInstr::IsBranchToFarChunk()
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetUseSSA(op_t CurrOp, int SSASub) {
return this->Uses.SetSSANum(CurrOp, SSASub);
};
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefSSA(op_t CurrOp, int SSASub) {
return this->Defs.SetSSANum(CurrOp, SSASub);
};
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetUseType(op_t CurrOp, SMPOperandType CurrType) {
return this->Uses.SetType(CurrOp, CurrType, this);
};
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefType(op_t CurrOp, SMPOperandType CurrType) {
return this->Defs.SetType(CurrOp, CurrType, this);
set<DefOrUse, LessDefUse>::iterator SMPInstr::SetDefMetadata(op_t CurrOp, SMPMetadataType Status) {
return this->Defs.SetMetadata(CurrOp, Status);
};
// Analyze the instruction and its operands.
void SMPInstr::Analyze(void) {
if (this->analyzed)
return;
// Fill cmd structure with disassembly of instr
ua_ana0(this->address);
// Get the instr disassembly text.
(void) generate_disasm_line(this->address, this->disasm, sizeof(this->disasm) - 1);
// Remove interactive color-coding tags.
tag_remove(this->disasm, this->disasm, 0);
// Copy cmd to member variable SMPcmd.
this->SMPcmd = cmd;
// Get the canonical features into member variables features.
this->features = cmd.get_canon_feature();
// Record what type of instruction this is, simplified for the needs
// of data flow and type analysis.
this->type = DFACategory[cmd.itype];
// Record optimization category.
this->OptType = OptCategory[cmd.itype];
this->Interrupt = ((NN_int == cmd.itype) || (NN_into == cmd.itype) || (NN_int3 == cmd.itype));
// See if instruction is an ASM idiom for clearing a register.
if (NN_xor == this->SMPcmd.itype) {
ushort FirstReg;
if (o_reg == this->SMPcmd.Operands[0].type) {
FirstReg = this->SMPcmd.Operands[0].reg;
if (this->SMPcmd.Operands[1].is_reg(FirstReg))
this->RegClearIdiom = true;
}
}
// Build the DEF and USE lists for the instruction.
this->BuildSMPDefUseLists();
// Fix up machine dependent quirks in the def and use lists.
this->MDFixupDefUseLists();
// Determine whether the instruction is a jump target by looking
// at its cross references and seeing if it has "TO" code xrefs.
xrefblk_t xrefs;
for (bool ok = xrefs.first_to(this->address, XREF_FAR); ok; ok = xrefs.next_to()) {
if ((xrefs.from != 0) && (xrefs.iscode)) {
this->JumpTarget = true;
break;
}
}
this->analyzed = true;
return;
} // end of SMPInstr::Analyze()
// Analyze the floating point NOP marker instruction at the top of the function.
void SMPInstr::AnalyzeMarker(void) {
if (this->analyzed)
return;
// Fill member variable SMPcmd structure with disassembly of instr
(void) memset(&(this->SMPcmd), 0, sizeof(this->SMPcmd));
this->SMPcmd.itype = NN_fnop;
this->SMPcmd.size = 1;
this->SMPcmd.ea = this->address;
// Get the instr disassembly text.
qstrncpy(this->disasm, "\tfnop\t; Top of function SSA marker for SMP",
sizeof(this->disasm) - 1);
// Record what type of instruction this is, simplified for the needs
// of data flow and type analysis.
this->type = DFACategory[this->SMPcmd.itype];
// Record optimization category.
this->OptType = OptCategory[this->SMPcmd.itype];
this->analyzed = true;
return;
} // end of SMPInstr::AnalyzeMarker()
// Find USE-not-DEF operand that is not the flags register.
op_t SMPInstr::GetSourceOnlyOperand(void) {
size_t OpNum;
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
if (this->features & DefMacros[OpNum]) { // DEF
;
}
else if (this->features & UseMacros[OpNum]) { // USE
op_t CurrOp = this->SMPcmd.Operands[OpNum];
if (!(CurrOp.is_reg(X86_FLAGS_REG))) {
return CurrOp;
}
}
}
// It is expected that increment, decrement, and floating point stores
// will not have a USE-only operand. Increment and decrement have an
// operand that is both USEd and DEFed, while the floating point stack
// registers are implicit in most floating point opcodes. Also, exchange
// and exchange-and-add instructions have multiple DEF-and-USE operands.
int TypeGroup = SMPTypeCategory[this->SMPcmd.itype];
if ((TypeGroup != 2) && (TypeGroup != 4) && (TypeGroup != 9) && (TypeGroup != 12)
&& (TypeGroup != 13)) {
msg("ERROR: Could not find source only operand at %x in %s\n",
this->address, this->GetDisasm());
}
op_t VoidOp;
VoidOp.type = o_void;
return VoidOp;
} // end of SMPInstr::GetSourceOnlyOperand()
// Fill the Defs and Uses private data members.
void SMPInstr::BuildSMPDefUseLists(void) {
size_t OpNum;
bool DebugFlag = (0x804837b == this->GetAddr());
this->Defs.clear();
this->Uses.clear();
// Start with the Defs.
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
if (this->features & DefMacros[OpNum]) { // DEF
op_t TempOp = this->SMPcmd.Operands[OpNum];
if (MDKnownOperandType(TempOp)) {
if (DebugFlag) {
msg("DEBUG: Setting DEF for: ");
PrintOperand(TempOp);
msg("\n");
}
if (o_reg == TempOp.type) {
// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
// analysis and type inference systems.
TempOp.reg = MDCanonicalizeSubReg(TempOp.reg);
}
this->Defs.SetRef(TempOp);
}
} // end for (OpNum = 0; ...)
if (this->RegClearIdiom) {
// Something like xor eax,eax clears eax but does not really
// use eax. It is the same as mov eax,0 and we don't want to
// extend the prior def-use chain for eax to this instruction
// by treating the instruction as xor eax,eax. Instead, we
// build the DEF and USE lists and RTL as if it were mov eax,0.
op_t ImmOp;
ImmOp.type = o_imm;
ImmOp.value = 0;
this->Uses.SetRef(ImmOp, NUMERIC);
return;
}
// Now, do the Uses. Uses have special case operations, because
// any memory operand could have register uses in the addressing
// expression, and we must create Uses for those registers. For
// example: mov eax,[ebx + esi*2 + 044Ch]
// This is a two-operand instruction with one def: eax. But
// there are three uses: [ebx + esi*2 + 044Ch], ebx, and esi.
// The first use is an op_t of type o_phrase (memory phrase),
// which can be copied from cmd.Operands[1]. Likewise, we just
// copy cmd.Operands[0] into the defs list. However, we must create
// op_t types for register ebx and register esi and append them
// to the Uses list. This is handled by the machine dependent
// method MDFixupDefUseLists().
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
if (this->features & UseMacros[OpNum]) { // USE
op_t TempOp = this->SMPcmd.Operands[OpNum];
if (MDKnownOperandType(TempOp)) {
if (DebugFlag) {
msg("DEBUG: Setting USE for: ");
PrintOperand(TempOp);
msg("\n");
}
if (o_reg == TempOp.type) {
// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
// analysis and type inference systems.
TempOp.reg = MDCanonicalizeSubReg(TempOp.reg);
}
this->Uses.SetRef(TempOp);
}
} // end for (OpNum = 0; ...)
return;
} // end of SMPInstr::BuildSMPDefUseLists()
// If DefReg is not already in the DEF list, add a DEF for it.
void SMPInstr::MDAddRegDef(ushort DefReg, bool Shown, SMPOperandType Type) {
op_t TempDef;
TempDef.type = o_reg;
TempDef.reg = DefReg;
if (Shown)
TempDef.set_showed();
else
TempDef.clr_showed();
return;
} // end of SMPInstr::MDAddRegDef()
// If UseReg is not already in the USE list, add a USE for it.
void SMPInstr::MDAddRegUse(ushort UseReg, bool Shown, SMPOperandType Type) {
op_t TempUse;
TempUse.type = o_reg;
TempUse.reg = UseReg;
if (Shown)
TempUse.set_showed();
else
TempUse.clr_showed();
return;
} // end of SMPInstr::MDAddRegUse()
// Perform machine dependent ad hoc fixes to the def and use lists.
// For example, some multiply and divide instructions in x86 implicitly
// use and/or define register EDX. For memory phrase examples, see comment
// in BuildSMPDefUseLists().
void SMPInstr::MDFixupDefUseLists(void) {
// First, handle the uses hidden in memory addressing modes. Note that we do not
// care whether we are dealing with a memory destination operand or source
// operand, because register USEs, not DEFs, happen within the addressing expressions.
size_t OpNum;
SMPOperandType RefType;
int BaseReg;
int IndexReg;
ushort ScaleFactor;
ea_t displacement;
bool UseFP = true;
bool HasIndexReg = false;
bool leaInst = (NN_lea == this->SMPcmd.itype);
bool DebugFlag = (this->GetAddr() == 0x804837b);
if (DebugFlag) {
msg("DEBUG: Fixing up DEF-USE lists for debug location\n");
this->Dump();
}
#if SMP_BASEREG_POINTER_TYPE
// Some instructions are analyzed outside of any function or block when fixing up
// the IDB, so we have to assume the block and func pointers might be NULL.
if ((NULL != this->BasicBlock) && (NULL != this->BasicBlock->GetFunc()))
UseFP = this->BasicBlock->GetFunc()->UsesFramePointer();
#endif
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
op_t Opnd = SMPcmd.Operands[OpNum];
if ((Opnd.type == o_phrase) || (Opnd.type == o_displ) || (Opnd.type == o_mem)) {
MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, displacement);
SingleAddressReg = ((0 == displacement)
&& ((R_none == BaseReg) || (R_none == IndexReg)));
if (R_none != IndexReg) {
op_t IndexOpnd = Opnd; // Init to current operand field values
IndexOpnd.type = o_reg; // Change type and reg fields
IndexOpnd.reg = IndexReg;
IndexOpnd.hasSIB = 0;
IndexOpnd.set_showed();
// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
// analysis and type inference systems.
IndexOpnd.reg = MDCanonicalizeSubReg(IndexOpnd.reg);
if (0 == ScaleFactor)
this->Uses.SetRef(IndexOpnd);
else { // scaling == shift ==> NUMERIC
HasIndexReg = true;
this->Uses.SetRef(IndexOpnd, NUMERIC);
if (R_none != BaseReg) {
op_t BaseOpnd = Opnd; // Init to current operand field values
BaseOpnd.type = o_reg; // Change type and reg fields
BaseOpnd.reg = BaseReg;
BaseOpnd.hasSIB = 0;
BaseOpnd.set_showed();
// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
// analysis and type inference systems.
BaseOpnd.reg = MDCanonicalizeSubReg(BaseOpnd.reg);
RefType = UNINIT;
#if SMP_BASEREG_POINTER_TYPE
// R_sp and R_bp will get type STACKPTR in SMPInstr::SetImmedTypes().
// Other registers used as base registers should get their USEs as
// base registers typed as POINTER, which might get refined later
// to STACKPTR, GLOBALPTR, HEAPPTR, etc.
// NOTE: the NN_lea opcode is often used without a true base register.
// E.g. lea eax,[eax+eax+5] is an x86 idiom for eax:=eax*2+5, which
// could not be done in one instruction without using the addressing
// modes of the machine to do the arithmetic. We don't want to set the
// USE of EAX to POINTER in this case, so we will conservatively skip
// all lea instructions here.
// We cannot be sure that a register is truly a base register unless
// there is also an index register. E.g. with reg+displacement, we
// could have memaddr+indexreg or basereg+offset, depending on what
// the displacement is. The exception is if there is no offset and only
// one addressing register, e.g. mov eax,[ebx].
if (BaseOpnd.is_reg(R_sp) || (UseFP && BaseOpnd.is_reg(R_bp))
|| leaInst || (!HasIndexReg && !SingleAddressReg)) {