Newer
Older
#if SMP_BASEREG_POINTER_TYPE
// Some instructions are analyzed outside of any function or block when fixing up
// the IDB, so we have to assume the block and func pointers might be NULL.
if ((NULL != this->BasicBlock) && (NULL != this->BasicBlock->GetFunc()))
UseFP = this->BasicBlock->GetFunc()->UsesFramePointer();
#endif
if (DebugFlag) {
msg("DEBUG: UseFP = %d\n", UseFP);
}
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
op_t Opnd = SMPcmd.Operands[OpNum];
if ((Opnd.type == o_phrase) || (Opnd.type == o_displ) || (Opnd.type == o_mem)) {
MDExtractAddressFields(Opnd, BaseReg, IndexReg, ScaleFactor, displacement);
SingleAddressReg = ((0 == displacement)
&& ((R_none == BaseReg) || (R_none == IndexReg)));
if (R_none != IndexReg) {
op_t IndexOpnd = Opnd; // Init to current operand field values
IndexOpnd.type = o_reg; // Change type and reg fields
IndexOpnd.reg = (ushort) IndexReg;
IndexOpnd.hasSIB = 0;
IndexOpnd.set_showed();
// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
// analysis and type inference systems.
IndexOpnd.reg = MDCanonicalizeSubReg(IndexOpnd.reg);
if (0 == ScaleFactor)
this->Uses.SetRef(IndexOpnd);
else { // scaling == shift ==> NUMERIC
HasIndexReg = true;
this->Uses.SetRef(IndexOpnd, NUMERIC);
if (R_none != BaseReg) {
op_t BaseOpnd = Opnd; // Init to current operand field values
BaseOpnd.type = o_reg; // Change type and reg fields
BaseOpnd.reg = (ushort) BaseReg;
BaseOpnd.set_showed();
// We want to map AH, AL, and AX to EAX, etc. throughout our data flow
// analysis and type inference systems.
BaseOpnd.reg = MDCanonicalizeSubReg(BaseOpnd.reg);
RefType = UNINIT;
#if SMP_BASEREG_POINTER_TYPE
// R_sp and R_bp will get type STACKPTR in SMPInstr::SetImmedTypes().
// Other registers used as base registers should get their USEs as
// base registers typed as POINTER, which might get refined later
// to STACKPTR, GLOBALPTR, HEAPPTR, etc.
// NOTE: the NN_lea opcode is often used without a true base register.
// E.g. lea eax,[eax+eax+5] is an x86 idiom for eax:=eax*2+5, which
// could not be done in one instruction without using the addressing
// modes of the machine to do the arithmetic. We don't want to set the
// USE of EAX to POINTER in this case, so we will conservatively skip
// all lea instructions here.
// We cannot be sure that a register is truly a base register unless
// there is also an index register. E.g. with reg+displacement, we
// could have memaddr+indexreg or basereg+offset, depending on what
// the displacement is. The exception is if there is no offset and only
// one addressing register, e.g. mov eax,[ebx].
if (BaseOpnd.is_reg(R_sp) || (UseFP && BaseOpnd.is_reg(R_bp))
|| leaInst || (!HasIndexReg && !SingleAddressReg)) {
#endif
this->Uses.SetRef(BaseOpnd, RefType);
} // end if R_none != BaseReg
} // end if (o_phrase or o_displ operand)
} // end for (all operands)
// The lea (load effective address) instruction looks as if it has
// a memory USE: lea ebx,[edx+esi]
// However, this instruction is really just: ebx := edx+esi
// Now that the above code has inserted the "addressing" registers
// into the USE list, we should remove the "memory USE".
if (leaInst) {
set<DefOrUse, LessDefUse>::iterator CurrUse;
for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) {
op_t UseOp = CurrUse->GetOp();
if ((o_mem <= UseOp.type) && (o_displ >= UseOp.type)) {
this->EraseUse(CurrUse);
break;
}
}
}
// Next, handle repeat prefices in the instructions. The Intel REPE/REPZ prefix
// is just the text printed for SCAS/CMPS instructions that have a REP prefix.
// Only two distinct prefix codes are actually defined: REP and REPNE/REPNZ, and
// REPNE/REPNZ only applies to SCAS and CMPS instructions.
bool HasRepPrefix = (0 != (this->SMPcmd.auxpref & aux_rep));
bool HasRepnePrefix = (0 != (this->SMPcmd.auxpref & aux_repne));
if (HasRepPrefix && HasRepnePrefix)
msg("REP and REPNE both present at %x %s\n", this->GetAddr(), this->GetDisasm());
if (HasRepPrefix || HasRepnePrefix) {
// All repeating instructions use ECX as the countdown register.
op_t BaseOpnd = InitOp;
BaseOpnd.type = o_reg; // Change type and reg fields
BaseOpnd.reg = R_cx;
BaseOpnd.clr_showed();
this->Defs.SetRef(BaseOpnd, NUMERIC);
this->Uses.SetRef(BaseOpnd, NUMERIC);
if ((this->SMPcmd.itype == NN_cmps) || (this->SMPcmd.itype == NN_scas)
|| (this->SMPcmd.itype == NN_movs) || (this->SMPcmd.itype == NN_stos)) {
// ESI and EDI are USEd and DEFed to point to source and dest strings for CMPS/MOVS.
// Only EDI is involved with SCAS/STOS.
op_t BaseOpnd = InitOp;
BaseOpnd.type = o_reg; // Change type and reg fields
BaseOpnd.clr_showed();
if ((this->SMPcmd.itype == NN_cmps) || (this->SMPcmd.itype == NN_movs)) {
BaseOpnd.reg = R_si;
this->Defs.SetRef(BaseOpnd, POINTER);
this->Uses.SetRef(BaseOpnd, POINTER);
}
BaseOpnd.reg = R_di;
this->Defs.SetRef(BaseOpnd, POINTER);
this->Uses.SetRef(BaseOpnd, POINTER);
// Now, handle special instruction categories that have implicit operands.
if (NN_cmpxchg == this->SMPcmd.itype) {
// x86 Compare and Exchange conditionally sets EAX. We must keep data flow analysis
// sound by declaring that EAX is always a DEF.
this->MDAddRegDef(R_ax, false);
} // end if NN_cmpxchg
else if (this->MDIsPopInstr() || this->MDIsPushInstr() || this->MDIsReturnInstr()) {
// IDA does not include the stack pointer in the DEFs or USEs.
this->MDAddRegDef(R_sp, false);
this->MDAddRegUse(R_sp, false);
// We always reference [esp+0] or [esp-4], so add it to the DEF or USE list.
op_t StackOp = InitOp;
StackOp.type = o_displ;
StackOp.reg = R_sp;
if (this->MDIsPopInstr()) {
StackOp.addr = 0; // [ESP+0]
this->Uses.SetRef(StackOp); // USE
}
else {
StackOp.addr = (ea_t) -4; // [ESP-4]
this->Defs.SetRef(StackOp); // DEF
}
#if SMP_CALL_TRASHES_REGS
else if ((this->type == CALL) || (this->type == INDIR_CALL)) {
// We want to add the caller-saved registers to the USEs and DEFs lists
this->MDAddRegDef(R_ax, false);
this->MDAddRegDef(R_cx, false);
this->MDAddRegDef(R_dx, false);
this->MDAddRegUse(R_ax, false);
this->MDAddRegUse(R_cx, false);
this->MDAddRegUse(R_dx, false);
#if 1
if (this->MDIsInterruptCall()) {
#endif
this->MDAddRegDef(R_bx, false);
this->MDAddRegUse(R_bx, false);
#if 1
}
#endif
else if (this->MDIsEnterInstr() || this->MDIsLeaveInstr()) {
// Entire function prologue or epilogue microcoded.
this->MDAddRegDef(R_sp, false);
this->MDAddRegUse(R_sp, false);
this->MDAddRegDef(R_bp, false);
this->MDAddRegUse(R_bp, false);
else if (this->SMPcmd.itype == NN_maskmovq) {
else if (8 == this->GetOptType()) {
// This category implicitly writes to EDX:EAX.
this->MDAddRegDef(R_dx, false);
this->MDAddRegDef(R_ax, false);
} // end else if (8 == GetOptType)
else if (7 == this->GetOptType()) {
// Category 7 instructions sometimes write implicitly to EDX:EAX or DX:AX.
// DX is the same as EDX to IDA Pro (and SMP); ditto for EAX and AX.
// DIV, IDIV, and MUL all have hidden EAX or AX operands (hidden in the IDA Pro
// sense, because they are not displayed in the disassembly text). For example:
// mul ebx means EDX:EAX <-- EAX*EBX, and mul bx means DX:AX <-- AX*BX. If the
// source operand is only 8 bits wide, there is room to hold the result in AX
// without using DX: mul bl means AX <-- AL*BL.
// IMUL has forms with a hidden EAX or AX operand and forms with no implicit
// operands: imul ebx means EDX:EAX <-- EAX*EBX, but imul ebx,edx means that
// EBX*EDX gets truncated and the result placed in EBX (no hidden operands).
for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
op_t TempUse = SMPcmd.Operands[OpNum];
if (!TempUse.showed()) { // hidden operand
if (TempUse.is_reg(R_ax)) { // not R_al, so it is not 8 bits
if ((NN_div == this->SMPcmd.itype) || (NN_idiv == this->SMPcmd.itype)) {
this->MDAddRegUse(R_dx, false);
}
this->MDAddRegDef(R_ax, false);
this->MDAddRegDef(R_dx, false);
}
}
}
} // end else if (7 == OptType)
#if 0
// The floating point instructions in type categories 14 and 15 often USE and DEF
// the floating point register stack, e.g. pushing a value onto that stack is a
// massive copy downward of stack locations. We don't really care about the USE of
// the stack if the value being pushed came from elsewhere than the stack. For example,
// an "fld" opcode pushes its source onto the stack. We build RTLs with a simple
// move structure, but the RTL building can be fooled by seeing two "source" operands
// in the USE list.
if ((14 == SMPTypeCategory[this->SMPcmd.itype])
|| (15 == SMPTypeCategory[this->SMPcmd.itype])) {
}
#endif
clc5q
committed
#if 0 // Not true for LOOP instructions that use only the ECX counter register.
if (this->type == COND_BRANCH) {
assert(SMPUsesFlags[this->SMPcmd.itype]);
}
clc5q
committed
#endif
// The return value register EAX is not quite like a caller-save or callee-save
// register (technically, it is caller-save). Within a callee, it might appear
// that EAX has become dead by the time a return instruction is reached, but
// the USE that would make it not dead is in the caller. To prevent type inference
// from mistakenly thinking that all USEs of EAX have been seen in the callee,
// we add EAX to the USE list for all return instructions, as well as for all
// tail calls, which are essentially returns in terms of data flow analysis.
// This USE of EAX will always be of type UNINIT unless its DEF has a known type
// that propagates to it. Thus, it will prevent an invalid back inference of the
// DEF type from "all" USE types that are visible in the callee; even if they
// were all NUMERIC, this return USE will be UNINIT and inhibit the invalid
// type inference. EAX could be loaded with a pointer from memory, for example,
// and USEd only in a comparison instruction, making it falsely appear to be
// a NUMERIC, without this extra USE at the return instruction.
// Because some of the library functions pass values around in EBX, EDI, etc.,
// we will add these general purpose registers to the USE list for returns
// in order to prevent erroneous analyses of dead registers or unused
// metadata.
if (this->type == RETURN) {
this->MDAddRegUse(R_ax, false);
this->MDAddRegUse(R_bx, false);
this->MDAddRegUse(R_cx, false);
this->MDAddRegUse(R_dx, false);
#if 0 // need to solve phase ordering problem first
if (!UseFP)
#endif
this->MDAddRegUse(R_bp, false);
this->MDAddRegUse(R_si, false);
this->MDAddRegUse(R_di, false);
}
clc5q
committed
// Next, add the flags register to the DEFs and USEs for those instructions that
// are marked as defining or using flags.
if (!this->DefsFlags && SMPDefsFlags[this->SMPcmd.itype]) {
this->MDAddRegDef(X86_FLAGS_REG, false);
this->DefsFlags = true;
if (!this->UsesFlags && SMPUsesFlags[this->SMPcmd.itype]) {
this->MDAddRegUse(X86_FLAGS_REG, false);
this->UsesFlags = true;
}
#if 1
if (this->MDIsNop()) {
// Clear the DEFs and USEs for no-ops.
this->Defs.clear();
this->Uses.clear();
}
#endif
msg("DEBUG after MDFixupDefUseLists:\n");
return;
} // end of SMPInstr::MDFixupDefUseLists()
// If we can definitely identify which part of the addressing expression
// used in MemOp is the POINTER type, and it is not a STACKPTR or GLOBALPTR
// immediate, set the USE type for that register to POINTER and return true.
// If we can find definite NUMERIC addressing registers that are not already
// typed as NUMERIC, set their USE types to NUMERIC and return true.
bool SMPInstr::MDFindPointerUse(op_t MemOp, bool UseFP) {
bool changed = false;
int BaseReg;
int IndexReg;
op_t BaseOp = InitOp;
op_t IndexOp = InitOp;
SMPOperandType BaseType = UNKNOWN;
SMPOperandType IndexType = UNKNOWN;
ushort ScaleFactor;
ea_t offset;
set<DefOrUse, LessDefUse>::iterator BaseIter;
set<DefOrUse, LessDefUse>::iterator IndexIter;
if (NN_lea == this->SMPcmd.itype)
return false; // lea instruction really has no memory operands
if (NN_fnop == this->SMPcmd.itype)
return false; // SSA marker instruction
MDExtractAddressFields(MemOp, BaseReg, IndexReg, ScaleFactor, offset);
if (R_none != IndexReg) {
IndexOp.type = o_reg;
IndexOp.reg = MDCanonicalizeSubReg((ushort) IndexReg);
IndexIter = this->FindUse(IndexOp);
assert(IndexIter != this->GetLastUse());
IndexType = IndexIter->GetType();
}
if (R_none != BaseReg) {
BaseOp.type = o_reg;
BaseOp.reg = MDCanonicalizeSubReg((ushort) BaseReg);
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
BaseIter = this->FindUse(BaseOp);
assert(BaseIter != this->GetLastUse());
BaseType = BaseIter->GetType();
}
if ((R_sp == BaseReg) || (UseFP && (R_bp == BaseReg))) {
if ((R_none != IndexReg) && (!IsNumeric(IndexType))) {
// We have an indexed access into the stack frame.
// Set IndexReg USE type to NUMERIC.
changed = true;
IndexIter = this->SetUseType(IndexOp, NUMERIC);
assert(IndexIter != this->GetLastUse());
}
return changed; // stack accesses will get STACKPTR type in SetImmedTypes()
}
if ((R_sp == IndexReg) || (UseFP && (R_bp == IndexReg))) {
if ((R_none != BaseReg) && (!IsNumeric(BaseType))) {
// We have an indexed access into the stack frame.
// Set BaseReg USE type to NUMERIC.
// Note that BaseReg is really an IndexReg and vice versa.
changed = true;
BaseIter = this->SetUseType(BaseOp, NUMERIC);
assert(BaseIter != this->GetLastUse());
msg("WARNING: BaseReg is index, IndexReg is base: %s\n",
this->GetDisasm());
}
return changed; // stack accesses will get STACKPTR type in SetImmedTypes()
}
if (IsImmedGlobalAddress(offset)) {
if ((R_none != IndexReg) && (!IsNumeric(IndexType))) {
// We have an indexed access into a global.
// Set IndexReg USE type to NUMERIC.
changed = true;
IndexIter = this->SetUseType(IndexOp, NUMERIC);
assert(IndexIter != this->GetLastUse());
}
if ((R_none != BaseReg) && (!IsNumeric(BaseType))) {
// We have an indexed access into a global.
// Set BaseReg USE type to NUMERIC.
// Note that BaseReg is really an index register.
changed = true;
BaseIter = this->SetUseType(BaseOp, NUMERIC);
assert(BaseIter != this->GetLastUse());
msg("WARNING: BaseReg used as index: %s\n", this->GetDisasm());
}
return changed; // handled in SetImmedTypes()
}
// At this point, we must have a base address in a register, not used
// to directly address the stack or a global.
if ((0 < ScaleFactor) || (R_none == IndexReg)) {
// IndexReg is scaled, meaning it is NUMERIC, so BaseReg must
// be a POINTER; or IndexReg is not present, so BaseReg is the
// only possible holder of an address.
if (R_none != BaseReg) {
if (UNINIT == BaseIter->GetType()) {
changed = true;
BaseIter = this->SetUseType(BaseOp, POINTER);
assert(BaseIter != this->GetLastUse());
}
}
}
else if (R_none == BaseReg) {
// We have an unscaled IndexReg and no BaseReg and offset was
// not a global offset, so IndexReg must be a POINTER.
if (R_none != IndexReg) {
changed = true;
IndexIter = this->SetUseType(IndexOp, POINTER);
assert(IndexIter != this->GetLastUse());
}
}
}
else { // We have BaseReg and an unscaled IndexReg.
// The only hope for typing something like [ebx+edx] is for
// one register to already be typed NUMERIC, in which case
// the other one must be a POINTER, or if one register is
// already POINTER, then the other one must be NUMERIC.
if (IsNumeric(BaseType)) {
if (UNINIT == IndexType) {
// Set to POINTER or PROF_POINTER
changed = true;
IndexIter = this->SetUseType(IndexOp, POINTER);
assert(IndexIter != this->GetLastUse());
}
else if (IsNumeric(IndexType)) {
msg("ERROR: BaseReg and IndexReg both NUMERIC at %x: %s\n",
this->address, this->GetDisasm());
}
}
else { // BaseReg was not NUMERIC
if (UNINIT == BaseType) { // BaseReg is UNINIT
if (IsNumeric(IndexType)) {
changed = true;
BaseIter = this->SetUseType(BaseOp, POINTER);
assert(BaseIter != this->GetLastUse());
}
else if (IsDataPtr(IndexType)) {
// IndexReg is POINTER, so make BaseReg NUMERIC.
changed = true;
BaseIter = this->SetUseType(BaseOp, NUMERIC);
assert(BaseIter != this->GetLastUse());
}
}
else if (IsDataPtr(BaseType)) {
// BaseReg was a pointer type. IndexReg must be NUMERIC.
if (UNINIT == IndexType) {
changed = true;
IndexIter = this->SetUseType(IndexOp, NUMERIC);
assert(IndexIter != this->GetLastUse());
}
else if (IsDataPtr(IndexType)) {
msg("ERROR: BaseReg and IndexReg both POINTER at %x: %s\n",
this->address, this->GetDisasm());
}
}
}
}
return changed;
} // end of SMPInstr::MDFindPointerUse()
clc5q
committed
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
// Are all DEFs typed to something besides UNINIT?
bool SMPInstr::AllDEFsTyped(void) {
bool FoundUNINIT = false;
set<DefOrUse, LessDefUse>::iterator DefIter;
for (DefIter = this->GetFirstDef(); DefIter != this->GetLastDef(); ++DefIter) {
if (IsEqType(UNINIT, DefIter->GetType())) {
FoundUNINIT = true;
break;
}
}
return (!FoundUNINIT);
} // end of SMPInstr::AllDEFsTyped()
// Are all USEs typed to something besides UNINIT?
bool SMPInstr::AllUSEsTyped(void) {
bool FoundUNINIT = false;
set<DefOrUse, LessDefUse>::iterator UseIter;
for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) {
if (IsEqType(UNINIT, UseIter->GetType())) {
FoundUNINIT = true;
break;
}
}
return (!FoundUNINIT);
} // end of SMPInstr::AllUSEsTyped()
// Set the type of all immediate operands found in the USE set.
// Set all flags and floating point register USEs and DEFs to NUMERIC also.
void SMPInstr::SetImmedTypes(bool UseFP) {
set<DefOrUse, LessDefUse>::iterator CurrUse;
set<DefOrUse, LessDefUse>::iterator CurrDef;
uval_t ImmVal;
#if SMP_VERBOSE_DEBUG_BUILD_RTL
DebugFlag = DebugFlag || (this->address == 0x805cd52) || (this->address == 0x805cd56);
DebugFlag |= (0 == strncmp("__libc_csu_fini", this->BasicBlock->GetFunc()->GetFuncName(), 15));
#endif
CurrUse = this->GetFirstUse();
while (CurrUse != this->GetLastUse()) {
UseOp = CurrUse->GetOp();
if (DebugFlag) {
msg("SetImmedTypes USE: ");
PrintOperand(UseOp);
msg("\n");
}
if (o_imm == UseOp.type) {
ImmVal = UseOp.value;
if (IsImmedGlobalAddress((ea_t) ImmVal)) {
if (DebugFlag) msg("Setting to GLOBALPTR\n");
CurrUse = this->SetUseType(UseOp, GLOBALPTR);
else if (this->Interrupt || IsImmedCodeAddress((ea_t) ImmVal)) {
if (DebugFlag) msg("Setting to CODEPTR\n");
CurrUse = this->SetUseType(UseOp, CODEPTR);
}
else { // NUMERIC
if (DebugFlag) msg("Setting to NUMERIC\n");
CurrUse = this->SetUseType(UseOp, NUMERIC);
else if (o_reg == UseOp.type) {
if (UseOp.is_reg(X86_FLAGS_REG)) {
if (DebugFlag) msg("Setting flags reg to NUMERIC\n");
CurrUse = this->SetUseType(UseOp, NUMERIC);
}
#if 1
else if (UseOp.is_reg(R_sp) || (UseFP && UseOp.is_reg(R_bp))) {
if (DebugFlag) msg("Setting reg to STACKPTR\n");
CurrUse = this->SetUseType(UseOp, STACKPTR);
}
#endif
}
#if 0 // could these registers have pointers in them?
else if ((o_trreg == UseOp.type) ||(o_dbreg == UseOp.type) || (o_crreg == UseOp.type)) {
if (DebugFlag) msg("Setting special reg to NUMERIC\n");
CurrUse = this->SetUseType(UseOp, NUMERIC);
}
#endif
else if ((o_fpreg == UseOp.type) || (o_mmxreg == UseOp.type) || (o_xmmreg == UseOp.type)) {
if (DebugFlag) msg("Setting floating point reg to NUMERIC\n");
CurrUse = this->SetUseType(UseOp, NUMERIC);
}
else if ((o_mem == UseOp.type) || (o_phrase == UseOp.type) || (o_displ == UseOp.type)) {
// For memory operands, we need to identify the POINTER value that
// is used in the addressing mode, if possible.
(void) this->MDFindPointerUse(UseOp, UseFP);
}
++CurrUse;
} // end while all USEs via CurrUse
CurrDef = this->GetFirstDef();
while (CurrDef != this->GetLastDef()) {
DefOp = CurrDef->GetOp();
if (DebugFlag) {
msg("SetImmedTypes DEF: ");
PrintOperand(DefOp);
msg("\n");
}
if (DebugFlag) msg("FuncName: %s\n", this->BasicBlock->GetFunc()->GetFuncName());
if (o_reg == DefOp.type) {
if (DefOp.is_reg(X86_FLAGS_REG)) {
if (DebugFlag) msg("Setting flags reg DEF to NUMERIC\n");
CurrDef = this->SetDefType(DefOp, NUMERIC);
// No need to propagate this DEF type, as all flags will become NUMERIC.
}
#if 1
else if (DefOp.is_reg(R_sp) || (DefOp.is_reg(R_bp) && UseFP)) {
if (DebugFlag) msg("Setting reg DEF to STACKPTR\n");
CurrDef = this->SetDefType(DefOp, STACKPTR);
assert(CurrDef != this->Defs.GetLastRef());
// No need to propagate; all stack and frame pointers will become STACKPTR.
else if ((o_fpreg == DefOp.type) || (o_mmxreg == DefOp.type) || (o_xmmreg == DefOp.type)) {
if (DebugFlag) msg("Setting floating point reg DEF to NUMERIC\n");
CurrDef = this->SetDefType(DefOp, NUMERIC);
// No need to propagate; all FP reg uses will become NUMERIC anyway.
}
#if 0 // could these registers have pointers in them?
else if ((o_trreg == DefOp.type) || (o_dbreg == DefOp.type) || (o_crreg == DefOp.type)) {
if (DebugFlag) msg("Setting special reg DEF to NUMERIC\n");
CurrDef = this->SetDefType(DefOp, NUMERIC);
}
#endif
else if ((o_mem == DefOp.type) || (o_phrase == DefOp.type) || (o_displ == DefOp.type)) {
// For memory operands, we need to identify the POINTER value that
// is used in the addressing mode, if possible.
(void) this->MDFindPointerUse(DefOp, UseFP);
}
++CurrDef;
} // end while all DEFs via CurrDef
return;
} // end of SMPInstr::SetImmedTypes()
// Infer DEF, USE, and RTL SMPoperator types within the instruction based on the type
// of operator, the type category of the instruction, and the previously known types
// of the operands.
bool SMPInstr::InferTypes(void) {
bool changed = false;
int TypeCategory = SMPTypeCategory[this->SMPcmd.itype];
set<DefOrUse, LessDefUse>::iterator CurrDef;
set<DefOrUse, LessDefUse>::iterator CurrUse;
op_t DefOp = InitOp, UseOp = InitOp;
bool UseFP = this->BasicBlock->GetFunc()->UsesFramePointer();
bool SafeFunc = this->BasicBlock->GetFunc()->IsSafe();
#if SMP_VERBOSE_DEBUG_INFER_TYPES
DebugFlag |= (0 == strcmp("__libc_csu_fini", this->BasicBlock->GetFunc()->GetFuncName()));
#endif
if (DebugFlag) {
msg("opcode: %d TypeCategory: %d\n", this->SMPcmd.itype, TypeCategory);
}
// If we are already finished with all types, return false.
if (this->TypeInferenceComplete)
return false;
clc5q
committed
if (this->AllDEFsTyped() && this->AllUSEsTyped()) {
this->TypeInferenceComplete = true;
return false;
}
if (this->HasDestMemoryOperand()) {
changed |= this->MDFindPointerUse(this->MDGetMemDefOp(), UseFP);
}
if (this->HasSourceMemoryOperand()) {
changed |= this->MDFindPointerUse(this->MDGetMemUseOp(), UseFP);
}
// The control flow instructions can be handled simply based on their type
// and do not need an RTL walk.
SMPitype DFAType = this->GetDataFlowType();
bool CallInst = ((DFAType == CALL) || (DFAType == INDIR_CALL));
ushort IndirCallReg = R_none;
if (DFAType == INDIR_CALL) {
op_t TargetOp = this->GetCmd().Operands[0];
if (TargetOp.type == o_reg)
IndirCallReg = TargetOp.reg;
}
clc5q
committed
msg("DFAType: %d CategoryInferenceComplete: %d\n",
DFAType, this->CategoryInferenceComplete);
if ((DFAType >= JUMP) && (DFAType <= INDIR_CALL)) {
// All USEs are either the flags (NUMERIC) or the target address (CODEPTR).
// The exceptions are the USE list for interrupt calls, which includes
// the caller-saved regs, and indirect calls through a memory
// operand, such as call [ebx+esi+20h], where the memory operand
// is a CODEPTR but the addressing registers are a BaseReg and
// IndexReg as in any other memory addressing, and the caller-saved
// regs on any call.
CurrUse = this->GetFirstUse();
while (CurrUse != this->GetLastUse()) {
UseOp = CurrUse->GetOp();
if (UseOp.is_reg(X86_FLAGS_REG))
CurrUse = this->SetUseType(UseOp, NUMERIC);
else if ((CurrUse->GetType() != CODEPTR)
&& (!(this->MDIsInterruptCall() && (o_reg == UseOp.type)))
&& (!(CallInst && MDIsCallerSavedReg(UseOp)))
&& (!(this->HasSourceMemoryOperand()
&& (INDIR_CALL == this->GetDataFlowType())
&& (o_reg == UseOp.type)))) {
CurrUse = this->SetUseType(UseOp, CODEPTR);
}
else if ((CurrUse->GetType() != CODEPTR) && CallInst
&& UseOp.is_reg(IndirCallReg)) {
CurrUse = this->SetUseType(UseOp, CODEPTR);
}
++CurrUse;
}
this->TypeInferenceComplete = true;
return true;
}
// First, see if we can infer something about DEFs and USEs just from the
// type category of the instruction.
clc5q
committed
if (!this->CategoryInferenceComplete) {
switch (TypeCategory) {
case 0: // no inference possible just from type category
case 1: // no inference possible just from type category
case 3: // MOV instructions; inference will come from source to dest in RTL walk.
case 5: // binary arithmetic; inference will come in RTL walk.
case 10: // binary arithmetic; inference will come in RTL walk.
case 11: // push and pop instructions; inference will come in RTL walk.
case 12: // exchange instructions; inference will come in RTL walk.
clc5q
committed
this->CategoryInferenceComplete = true;
break;
case 2: // Result type is always NUMERIC.
case 7: // Result type is always NUMERIC.
case 8: // Result type is always NUMERIC.
case 9: // Result type is always NUMERIC.
case 13: // Result type is always NUMERIC.
case 14: // Result type is always NUMERIC.
case 15: // Result type is always NUMERIC.
CurrDef = this->GetFirstDef();
while (CurrDef != this->GetLastDef()) {
if (!IsEqType(NUMERIC, CurrDef->GetType())) {
SSANum = CurrDef->GetSSANum();
CurrDef = this->SetDefType(DefOp, NUMERIC);
changed = true;
clc5q
committed
// Be conservative and only propagate register DEFs and SAFE stack locs. We
// can improve this in the future. **!!**
if ((o_reg == DefOp.type)
|| (SafeFunc && MDIsStackAccessOpnd(DefOp, UseFP))) {
if (this->BasicBlock->IsLocalName(DefOp)) {
(void) this->BasicBlock->PropagateLocalDefType(DefOp, NUMERIC,
clc5q
committed
this->GetAddr(), SSANum);
}
else { // global name
this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false
(void) this->BasicBlock->PropagateGlobalDefType(DefOp, NUMERIC,
SSANum);
}
clc5q
committed
this->CategoryInferenceComplete = true;
case 4: // Unary INC, DEC, etc.: dest=source, so type remains the same
assert(this->RTL.GetRT(0)->HasRightSubTree());
UseOp = this->RTL.GetRT(0)->GetLeftOperand(); // USE == DEF
CurrUse = this->Uses.FindRef(UseOp);
assert(CurrUse != this->GetLastUse());
if (UNINIT != CurrUse->GetType()) {
// Only one USE, and it has a type assigned, so assign that type
// to the DEF.
CurrDef = this->GetFirstDef();
while (CurrDef != this->GetLastDef()) {
// Two DEFs: EFLAGS is NUMERIC, dest==source
DefOp = CurrDef->GetOp();
SSANum = CurrDef->GetSSANum();
if (DefOp.is_reg(X86_FLAGS_REG)) {
; // SetImmedTypes already made it NUMERIC
CurrDef = this->SetDefType(DefOp, CurrUse->GetType());
clc5q
committed
// Be conservative and only propagate register DEFs and SAFE stack locs. We
// can improve this in the future. **!!**
if ((o_reg == DefOp.type)
|| (SafeFunc && MDIsStackAccessOpnd(DefOp, UseFP))) {
if (this->BasicBlock->IsLocalName(DefOp)) {
(void) this->BasicBlock->PropagateLocalDefType(DefOp, CurrUse->GetType(),
clc5q
committed
this->GetAddr(), SSANum);
}
else { // global name
this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false
(void) this->BasicBlock->PropagateGlobalDefType(DefOp, CurrUse->GetType(),
SSANum);
}
clc5q
committed
this->CategoryInferenceComplete = true;
clc5q
committed
this->TypeInferenceComplete = true;
}
break;
case 6: // Result is always POINTER
DefOp = this->GetFirstDef()->GetOp();
SSANum = this->GetFirstDef()->GetSSANum();
CurrDef = this->SetDefType(DefOp, POINTER);
clc5q
committed
this->CategoryInferenceComplete = true;
clc5q
committed
// Be conservative and only propagate register DEFs and SAFE stack locs. We
// can improve this in the future. **!!**
if ((o_reg == DefOp.type)
|| (SafeFunc && MDIsStackAccessOpnd(DefOp, UseFP))) {
if (this->BasicBlock->IsLocalName(DefOp)) {
(void) this->BasicBlock->PropagateLocalDefType(DefOp, POINTER,
clc5q
committed
this->GetAddr(), SSANum);
}
else { // global name
this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false
(void) this->BasicBlock->PropagateGlobalDefType(DefOp, POINTER,
SSANum);
}
break;
default:
msg("ERROR: Unknown type category for %s\n", this->GetDisasm());
clc5q
committed
this->CategoryInferenceComplete = true;
break;
} // end switch on TypeCategory
} // end if (!CategoryInference)
// Walk the RTL and infer types based on operators and operands.
if (DebugFlag) {
msg("RTcount: %d\n", this->RTL.GetCount());
}
for (size_t index = 0; index < this->RTL.GetCount(); ++index) {
SMPRegTransfer *CurrRT = this->RTL.GetRT(index);
if (SMP_NULL_OPERATOR == CurrRT->GetOperator()) // nothing to infer
continue;
changed |= this->InferOperatorType(CurrRT);
if (DebugFlag) {
msg("returned from InferOperatorType\n");
}
} // end for all RTs in the RTL
return changed;
} // end of SMPInstr::InferTypes()
// Infer the type of an operator within an RT based on the types of its operands and
// based on the operator itself. Recurse down the tree if necessary.
// Return true if the operator type of the RT is updated.
bool SMPInstr::InferOperatorType(SMPRegTransfer *CurrRT) {
bool updated = false;
bool LeftNumeric, RightNumeric;
bool LeftPointer, RightPointer;
bool UseFP = this->BasicBlock->GetFunc()->UsesFramePointer();
bool SafeFunc = this->BasicBlock->GetFunc()->IsSafe();
set<DefOrUse, LessDefUse>::iterator CurrDef;
set<DefOrUse, LessDefUse>::iterator CurrUse;
set<DefOrUse, LessDefUse>::iterator LeftUse;
set<DefOrUse, LessDefUse>::iterator RightUse;
SMPOperandType LeftType = UNINIT;
SMPOperandType RightType = UNINIT;
SMPOperandType OperType = UNINIT;
op_t UseOp = InitOp, DefOp = InitOp, LeftOp = InitOp, RightOp = InitOp;
SMPoperator CurrOp = CurrRT->GetOperator();
#if SMP_VERBOSE_DEBUG_INFER_TYPES
#if 0
DebugFlag |= (0 == strcmp("strtok", this->BasicBlock->GetFunc()->GetFuncName()));
#endif
DebugFlag = DebugFlag || ((this->address == 0x805cd52) || (this->address == 0x805cd56));
#if SMP_VERBOSE_DEBUG_INFER_TYPES
if (DebugFlag) {
msg("Entered InferOperatorType for CurrOp: %d\n", CurrOp);
}
switch (CurrOp) {
case SMP_NULL_OPERATOR:
break;
case SMP_CALL: // CALL instruction
if (UNINIT == CurrRT->GetOperatorType()) {
CurrRT->SetOperatorType(CODEPTR, this);
UseOp = CurrRT->GetRightOperand();
CurrUse = this->Uses.FindRef(UseOp);
assert(CurrUse != this->GetLastUse());
if (UNINIT == CurrUse->GetType()) {
CurrUse = this->SetUseType(UseOp, CODEPTR);
}
else if (CODEPTR != CurrUse->GetType()) {
msg("WARNING: call target is type %d, setting to CODEPTR at %x in %s\n",
CurrUse->GetType(), this->GetAddr(), this->GetDisasm());
CurrUse = this->SetUseType(UseOp, CODEPTR);
}
}
break;
case SMP_INPUT: // input from port
if (UNINIT == CurrRT->GetOperatorType()) {
CurrRT->SetOperatorType(UNKNOWN, this); // Leave DEF as UNINIT and infer later
updated = true;
}
break;
case SMP_OUTPUT: // output to port
case SMP_SIGN_EXTEND:
case SMP_ZERO_EXTEND:
break;
case SMP_ADDRESS_OF: // take effective address
if (UNINIT == CurrRT->GetOperatorType()) {
CurrRT->SetOperatorType(POINTER, this);
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
// Left operand is having its address taken, but we cannot infer what its
// type is.
updated = true;
}
break;
case SMP_U_LEFT_SHIFT: // unsigned left shift
case SMP_S_LEFT_SHIFT: // signed left shift
case SMP_U_RIGHT_SHIFT: // unsigned right shift
case SMP_S_RIGHT_SHIFT: // signed right shift
case SMP_ROTATE_LEFT:
case SMP_ROTATE_LEFT_CARRY: // rotate left through carry
case SMP_ROTATE_RIGHT:
case SMP_ROTATE_RIGHT_CARRY: // rotate right through carry
case SMP_U_MULTIPLY:
case SMP_S_MULTIPLY:
case SMP_U_DIVIDE:
case SMP_S_DIVIDE:
case SMP_U_REMAINDER:
case SMP_BITWISE_NOT: // unary operator
case SMP_BITWISE_XOR:
case SMP_NEGATE: // unary negation
case SMP_S_COMPARE: // signed compare (subtraction-based)
case SMP_U_COMPARE: // unsigned compare (AND-based)
case SMP_LESS_THAN: // boolean test operators
case SMP_GREATER_THAN:
case SMP_LESS_EQUAL:
case SMP_GREATER_EQUAL:
case SMP_EQUAL:
case SMP_NOT_EQUAL:
case SMP_LOGICAL_AND:
case SMP_LOGICAL_OR:
case SMP_UNARY_NUMERIC_OPERATION: // miscellaneous; produces NUMERIC result
case SMP_BINARY_NUMERIC_OPERATION: // miscellaneous; produces NUMERIC result
case SMP_SYSTEM_OPERATION: // for instructions such as CPUID, RDTSC, etc.; NUMERIC
case SMP_UNARY_FLOATING_ARITHMETIC: // all the same to our type system; all NUMERIC
case SMP_BINARY_FLOATING_ARITHMETIC: // all the same to our type system; all NUMERIC
if (UNINIT == CurrRT->GetOperatorType()) {
CurrRT->SetOperatorType(NUMERIC, this);
updated = true;
}
// Left operand should be NUMERIC if it exists.
UseOp = CurrRT->GetLeftOperand();
if (UseOp.type != o_void) {
CurrUse = this->Uses.FindRef(UseOp);
if (CurrUse == this->GetLastUse()) {
msg("WARNING: Adding missing USE of ");
PrintOperand(UseOp);
msg(" in %s\n", this->GetDisasm());
this->Uses.SetRef(UseOp, NUMERIC, -1);
updated = true;
}
else if (UNINIT == CurrUse->GetType()) {
CurrUse = this->SetUseType(UseOp, NUMERIC);
updated = true;
}
}
// Right operand should be NUMERIC if it exists.
if (CurrRT->HasRightSubTree()) {
// Recurse into subtree
updated |= this->InferOperatorType(CurrRT->GetRightTree());
}
else {
UseOp = CurrRT->GetRightOperand();
if (UseOp.type != o_void) {
CurrUse = this->Uses.FindRef(UseOp);
if (CurrUse == this->GetLastUse()) {
msg("WARNING: Adding missing USE of ");
PrintOperand(UseOp);
msg(" in %s\n", this->GetDisasm());
this->Uses.SetRef(UseOp, NUMERIC, -1);
updated = true;
}
else if (UNINIT == CurrUse->GetType()) {
CurrUse = this->SetUseType(UseOp, NUMERIC);
updated = true;
}
}
}
break;
case SMP_INCREMENT:
case SMP_DECREMENT:
// The type of the right operand is propagated to the operator, or vice
// versa, whichever receives a type first.
assert(!CurrRT->HasRightSubTree());
UseOp = CurrRT->GetLeftOperand();
assert(o_void != UseOp.type);
CurrUse = this->Uses.FindRef(UseOp);
if (CurrUse == this->GetLastUse()) {
msg("WARNING: Adding missing USE of ");
PrintOperand(UseOp);
msg(" at %x in %s\n", this->GetAddr(), this->GetDisasm());
this->Uses.SetRef(UseOp);
updated = true;
break;
}
if (UNINIT == CurrRT->GetOperatorType()) {
if (UNINIT != CurrUse->GetType()) {
// Propagate operand type up to the operator.
CurrRT->SetOperatorType(CurrUse->GetType(), this);
updated = true;
}
}
else if (UNINIT == CurrUse->GetType()) {
// Propagate operator type to operand.
CurrUse = this->SetUseType(UseOp, CurrRT->GetOperatorType());
updated = true;
}
break;
case SMP_BITWISE_AND:
case SMP_BITWISE_OR:
// Extract the current types of right and left operands and the operator.
LeftOp = CurrRT->GetLeftOperand();
CurrUse = this->Uses.FindRef(LeftOp);
assert(CurrUse != this->GetLastUse()); // found it
LeftType = CurrUse->GetType();
if (CurrRT->HasRightSubTree()) {
RightType = CurrRT->GetRightTree()->GetOperatorType();
}
else {
RightOp = CurrRT->GetRightOperand();
if (o_void == RightOp.type) {
msg("ERROR: void operand in %s\n", this->GetDisasm());
return false;
if (CurrUse == this->GetLastUse()) {
msg("WARNING: Adding missing USE of ");
msg(" in %s\n", this->GetDisasm());
updated = true;
break;