diff --git a/SMPInstr.cpp b/SMPInstr.cpp index 419540c02b1dc59225ee052e2d0e523ce2b4d34a..9d0b9ef1f37680742a45a2732406d9a5f98e5e3f 100644 --- a/SMPInstr.cpp +++ b/SMPInstr.cpp @@ -195,6 +195,7 @@ SMPInstr::SMPInstr(ea_t addr) { this->analyzed = false; this->JumpTarget = false; this->BlockTerm = false; + this->Interrupt = false; this->DeadRegsString[0] = '\0'; this->DefsFlags = false; this->UsesFlags = false; @@ -521,6 +522,14 @@ bool SMPInstr::MDIsLeaveInstr(void) const { && (SMPcmd.itype <= MD_LAST_LEAVE_INSTR)); } +#define MD_FIRST_COND_MOVE_INSTR NN_cmova +#define MD_LAST_COND_MOVE_INSTR NN_fcmovnu +// MACHINE DEPENDENT: Is instruction a conditional move? +bool SMPInstr::MDIsConditionalMoveInstr(void) const { + return ((SMPcmd.itype >= MD_FIRST_COND_MOVE_INSTR) + && (SMPcmd.itype <= MD_LAST_COND_MOVE_INSTR)); +} + // MACHINE DEPENDENT: Does instruction use a callee-saved register? bool SMPInstr::MDUsesCalleeSavedReg(void) { set<DefOrUse, LessDefUse>::iterator CurrUse; @@ -620,6 +629,8 @@ void SMPInstr::Analyze(void) { // Record optimization category. this->OptType = OptCategory[cmd.itype]; + this->Interrupt = ((NN_int == cmd.itype) || (NN_into == cmd.itype) || (NN_int3 == cmd.itype)); + // Build the DEF and USE lists for the instruction. this->BuildSMPDefUseLists(); // Fix up machine dependent quirks in the def and use lists. @@ -1009,6 +1020,24 @@ void SMPInstr::MDFixupDefUseLists(void) { assert(SMPUsesFlags[this->SMPcmd.itype]); } #endif + // The return value register EAX is not quite like a caller-save or callee-save + // register (technically, it is caller-save). Within a callee, it might appear + // that EAX has become dead by the time a return instruction is reached, but + // the USE that would make it not dead is in the caller. To prevent type inference + // from mistakenly thinking that all USEs of EAX have been seen in the callee, + // we add EAX to the USE list for all return instructions, as well as for all + // tail calls, which are essentially returns in terms of data flow analysis. + // This USE of EAX will always be of type UNINIT unless its DEF has a known type + // that propagates to it. Thus, it will prevent an invalid back inference of the + // DEF type from "all" USE types that are visible in the callee; even if they + // were all NUMERIC, this return USE will be UNINIT and inhibit the invalid + // type inference. EAX could be loaded with a pointer from memory, for example, + // and USEd only in a comparison instruction, making it falsely appear to be + // a NUMERIC, without this extra USE at the return instruction. + if (this->type == RETURN) { + this->MDAddRegUse(R_ax, false); + } + // Next, add the flags register to the DEFs and USEs for those instructions that // are marked as defining or using flags. if (!this->DefsFlags && SMPDefsFlags[this->SMPcmd.itype]) { @@ -1044,7 +1073,8 @@ void SMPInstr::SetImmedTypes(bool UseFP) { op_t DefOp; uval_t ImmVal; bool DebugFlag = false; -#if SMP_VERBOSE_DEBUG_BUILT_RTL +#if SMP_VERBOSE_DEBUG_BUILD_RTL + DebugFlag = DebugFlag || (this->address == 0x805cd52) || (this->address == 0x805cd56); DebugFlag |= (0 == strncmp("__libc_csu_fini", this->BasicBlock->GetFunc()->GetFuncName(), 15)); #endif @@ -1062,7 +1092,7 @@ void SMPInstr::SetImmedTypes(bool UseFP) { if (DebugFlag) msg("Setting to GLOBALPTR\n"); CurrUse = this->SetUseType(UseOp, GLOBALPTR); } - else if (IsImmedCodeAddress((ea_t) ImmVal)) { + else if (this->Interrupt || IsImmedCodeAddress((ea_t) ImmVal)) { if (DebugFlag) msg("Setting to CODEPTR\n"); CurrUse = this->SetUseType(UseOp, CODEPTR); } @@ -1129,13 +1159,6 @@ void SMPInstr::SetImmedTypes(bool UseFP) { else if ((o_trreg == DefOp.type) || (o_dbreg == DefOp.type) || (o_crreg == DefOp.type)) { if (DebugFlag) msg("Setting special reg DEF to NUMERIC\n"); CurrDef = this->SetDefType(DefOp, NUMERIC); - if (this->BasicBlock->IsLocalName(DefOp)) { - (void) this->BasicBlock->PropagateLocalDefType(DefOp, NUMERIC, this->GetAddr()); - } - else { // global name - this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false - (void) this->BasicBlock->PropagateGlobalDefType(DefOp, NUMERIC, CurrDef->GetSSANum()); - } } #endif ++CurrDef; @@ -1179,7 +1202,7 @@ bool SMPInstr::InferTypes(void) { UseOp = CurrUse->GetOp(); if (UseOp.is_reg(X86_FLAGS_REG)) CurrUse = this->SetUseType(UseOp, NUMERIC); - else + else if (CurrUse->GetType() != CODEPTR) CurrUse = this->SetUseType(UseOp, CODEPTR); ++CurrUse; } @@ -1215,14 +1238,18 @@ bool SMPInstr::InferTypes(void) { SSANum = CurrDef->GetSSANum(); CurrDef = this->SetDefType(DefOp, NUMERIC); changed = true; - if (this->BasicBlock->IsLocalName(DefOp)) { - (void) this->BasicBlock->PropagateLocalDefType(DefOp, NUMERIC, - this->GetAddr()); - } - else { // global name - this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false - (void) this->BasicBlock->PropagateGlobalDefType(DefOp, NUMERIC, - SSANum); + // Be conservative and only propagate register DEFs. We can improve + // this in the future. **!!** + if (o_reg == DefOp.type) { + if (this->BasicBlock->IsLocalName(DefOp)) { + (void) this->BasicBlock->PropagateLocalDefType(DefOp, NUMERIC, + this->GetAddr()); + } + else { // global name + this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false + (void) this->BasicBlock->PropagateGlobalDefType(DefOp, NUMERIC, + SSANum); + } } } ++CurrDef; @@ -1245,27 +1272,22 @@ bool SMPInstr::InferTypes(void) { DefOp = CurrDef->GetOp(); SSANum = CurrDef->GetSSANum(); if (DefOp.is_reg(X86_FLAGS_REG)) { - CurrDef = this->SetDefType(DefOp, NUMERIC); - if (this->BasicBlock->IsLocalName(DefOp)) { - (void) this->BasicBlock->PropagateLocalDefType(DefOp, NUMERIC, - this->GetAddr()); - } - else { // global name - this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false - (void) this->BasicBlock->PropagateGlobalDefType(DefOp, NUMERIC, - SSANum); - } + ; // SetImmedTypes already made it NUMERIC } else { CurrDef = this->SetDefType(DefOp, CurrUse->GetType()); - if (this->BasicBlock->IsLocalName(DefOp)) { - (void) this->BasicBlock->PropagateLocalDefType(DefOp, NUMERIC, - this->GetAddr()); - } - else { // global name - this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false - (void) this->BasicBlock->PropagateGlobalDefType(DefOp, NUMERIC, - SSANum); + // Be conservative and only propagate register DEFs. We can improve + // this in the future. **!!** + if (o_reg == DefOp.type) { + if (this->BasicBlock->IsLocalName(DefOp)) { + (void) this->BasicBlock->PropagateLocalDefType(DefOp, CurrUse->GetType(), + this->GetAddr()); + } + else { // global name + this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false + (void) this->BasicBlock->PropagateGlobalDefType(DefOp, CurrUse->GetType(), + SSANum); + } } } ++CurrDef; @@ -1281,14 +1303,18 @@ bool SMPInstr::InferTypes(void) { CurrDef = this->SetDefType(DefOp, POINTER); CategoryInference = true; changed = true; - if (this->BasicBlock->IsLocalName(DefOp)) { - (void) this->BasicBlock->PropagateLocalDefType(DefOp, POINTER, - this->GetAddr()); - } - else { // global name - this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false - (void) this->BasicBlock->PropagateGlobalDefType(DefOp, POINTER, - SSANum); + // Be conservative and only propagate register DEFs. We can improve + // this in the future. **!!** + if (o_reg == DefOp.type) { + if (this->BasicBlock->IsLocalName(DefOp)) { + (void) this->BasicBlock->PropagateLocalDefType(DefOp, POINTER, + this->GetAddr()); + } + else { // global name + this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false + (void) this->BasicBlock->PropagateGlobalDefType(DefOp, POINTER, + SSANum); + } } break; @@ -1333,14 +1359,16 @@ bool SMPInstr::InferOperatorType(SMPRegTransfer *CurrRT) { SMPoperator CurrOp = CurrRT->GetOperator(); bool DebugFlag = false; #if SMP_VERBOSE_DEBUG_INFER_TYPES - DebugFlag |= (0 == strcmp("__libc_csu_fini", this->BasicBlock->GetFunc()->GetFuncName())); +#if 0 + DebugFlag |= (0 == strcmp("strtok", this->BasicBlock->GetFunc()->GetFuncName())); +#endif + DebugFlag = DebugFlag || ((this->address == 0x805cd52) || (this->address == 0x805cd56)); #endif - // See if we can infer the operator type from the operator itself. If so, - // we don't need to recurse down the tree because lower operators no longer - // matter to the final DEF. #if SMP_VERBOSE_DEBUG_INFER_TYPES - msg("Entered InferOperatorType for CurrOp: %d\n", CurrOp); + if (DebugFlag) { + msg("Entered InferOperatorType for CurrOp: %d\n", CurrOp); + } #endif switch (CurrOp) { case SMP_NULL_OPERATOR: @@ -1546,13 +1574,14 @@ bool SMPInstr::InferOperatorType(SMPRegTransfer *CurrRT) { break; } else if (LeftNumeric || RightNumeric) { - if (LeftNumeric && (UNINIT != RightType)) { - // **!!** subtract numeric - pointer ==> PTROFFSET; fix !! + // ADD of NUMERIC to non-NUMERIC preserves non-NUMERIC type. + // AND and OR operations should leave the operator UNINIT for now. + if (LeftNumeric && (UNINIT != RightType) && (SMP_ADD == CurrOp)) { CurrRT->SetOperatorType(RightType); updated = true; break; } - else if (RightNumeric && (UNINIT != LeftType)) { + else if (RightNumeric && (UNINIT != LeftType) && (SMP_ADD == CurrOp)) { CurrRT->SetOperatorType(LeftType); updated = true; break; @@ -1563,9 +1592,6 @@ bool SMPInstr::InferOperatorType(SMPRegTransfer *CurrRT) { if (SMP_ADD == CurrOp) { CurrRT->SetOperatorType(UNKNOWN); } - else if (SMP_SUBTRACT == CurrOp) { - CurrRT->SetOperatorType(PTROFFSET); - } else { // bitwise AND or OR of two pointers msg("WARNING: hash of two pointers in %s\n", this->GetDisasm()); CurrRT->SetOperatorType(NUMERIC); // hash operation? @@ -1722,7 +1748,7 @@ bool SMPInstr::InferOperatorType(SMPRegTransfer *CurrRT) { assert(RightUse != this->GetLastUse()); } else if (OperType == PTROFFSET) { - // PTROFFSET := PTR - ?? ==? ?? is PTR + // PTROFFSET := PTR - ?? ==> ?? is PTR RightUse = this->SetUseType(RightOp, LeftType); updated = true; } @@ -1740,9 +1766,9 @@ bool SMPInstr::InferOperatorType(SMPRegTransfer *CurrRT) { } break; - case SMP_ASSIGN: // Extract the current types of right and left operands and SMP_ASSIGN operator. + OperType = CurrRT->GetOperatorType(); DefOp = CurrRT->GetLeftOperand(); CurrDef = this->Defs.FindRef(DefOp); assert(CurrDef != this->GetLastDef()); // found it @@ -1775,6 +1801,10 @@ bool SMPInstr::InferOperatorType(SMPRegTransfer *CurrRT) { // the right operand or tree up to the SMP_ASSIGN operator, then from // the operator to the left (DEF) operand, or from left up to operator // and down the right, depending on where the existing types are. + if (DebugFlag) { + msg("%x LeftType: %d OperatorType: %d RightType: %d\n", this->address, LeftType, + OperType, RightType); + } if ((UNINIT == RightType) && (UNINIT == LeftType)) { // We will only try to solve the right hand side on this iteration. if (CurrRT->HasRightSubTree()) { @@ -1782,42 +1812,76 @@ bool SMPInstr::InferOperatorType(SMPRegTransfer *CurrRT) { } break; } - else if (UNINIT == CurrRT->GetOperatorType()) { + else if (UNINIT == OperType) { // UNINIT SMP_ASSIGN operator, but either LeftType or RightType is not UNINIT. - if (UNINIT != RightType) - CurrRT->SetOperatorType(RightType); - else + if (UNINIT != RightType) { + // We have to special case conditional moves. Only if both operands + // (the source and the prior value of the potential destination, + // which was added to the USE set by BuildMoveRTL()) agree in type + // can we propagate their common type to the operator and ultimately + // to the DEF. + if ((!this->MDIsConditionalMoveInstr()) || this->Uses.TypesAgreeNoFlags()) { + CurrRT->SetOperatorType(RightType); + updated = true; + } + } + else { CurrRT->SetOperatorType(LeftType); - updated = true; + updated = true; + } break; } else if (UNINIT == LeftType) { // SMP_ASSIGN operator has type, so propagate it. - LeftType = CurrRT->GetOperatorType(); - CurrDef = this->SetDefType(DefOp, LeftType); + LeftType = OperType; + CurrDef = this->SetDefType(DefOp, OperType); updated = true; - if (this->BasicBlock->IsLocalName(DefOp)) { - (void) this->BasicBlock->PropagateLocalDefType(DefOp, LeftType, - this->GetAddr()); - } - else { // global name - this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false - int SSANum = CurrDef->GetSSANum(); - (void) this->BasicBlock->PropagateGlobalDefType(DefOp, LeftType, - SSANum); + // Propagate the new DEF type unless it is an indirect memory access. + // Future: Propagate until re-DEF of addressing register terminates + // the propagation. **!!** + if (!MDIsIndirectMemoryOpnd(DefOp, this->BasicBlock->GetFunc()->UsesFramePointer())) { + // Be conservative and only propagate register DEFs. We can improve + // this in the future. **!!** + if (o_reg == DefOp.type) { + if (this->BasicBlock->IsLocalName(DefOp)) { + (void) this->BasicBlock->PropagateLocalDefType(DefOp, LeftType, + this->GetAddr()); + } + else { // global name + this->BasicBlock->GetFunc()->ResetProcessedBlocks(); // set Processed to false + int SSANum = CurrDef->GetSSANum(); + (void) this->BasicBlock->PropagateGlobalDefType(DefOp, LeftType, + SSANum); + } + } } break; } else if (UNINIT == RightType) { // SMP_ASSIGN operator has type, so propagate it. if (CurrRT->HasRightSubTree()) { - CurrRT->GetRightTree()->SetOperatorType(CurrRT->GetOperatorType()); + CurrRT->GetRightTree()->SetOperatorType(OperType); + updated = true; updated |= this->InferOperatorType(CurrRT->GetRightTree()); } else { - CurrUse = this->SetUseType(UseOp, CurrRT->GetOperatorType()); + // For conditional moves, propagate to the pseudo-USE of the + // destination register as well as the source operand. + if (this->MDIsConditionalMoveInstr()) { + CurrUse = this->FindUse(DefOp); + assert(CurrUse != this->GetLastUse()); + if (UNINIT == CurrUse->GetType()) + CurrUse = this->SetUseType(DefOp, OperType); + else if (OperType != CurrUse->GetType()) { + msg("WARNING: Avoiding lattice oscillation from type %d to %d at %x for: ", + CurrUse->GetType(), OperType, this->address); + PrintOperand(CurrUse->GetOp()); + msg("\n"); + } + } + CurrUse = this->SetUseType(UseOp, OperType); + updated = true; } - updated = true; break; } break; @@ -2923,6 +2987,15 @@ bool SMPInstr::BuildMoveRTL(SMPoperator GuardOp) { Guard1->SetOperator(GuardOp); Guard1->SetRightOperand(ZeroOp); TempRT->SetGuard(Guard1); + if (this->MDIsConditionalMoveInstr()) { + // We need to represent the possibility that the DEF operand will not + // be set because the move is conditional. We will add the DEF operand + // into the USE set and special case our type inferences so that the + // USE and the pseudo-USE (prior SSA value of the DEF operand) must + // agree in type before we can be sure of the result type. + assert(this->Defs.GetSize() == 1); + this->Uses.SetRef(this->Defs.GetFirstRef()->GetOp()); + } } this->RTL.push_back(TempRT); // Now, create the repeat prefix effects diff --git a/SMPInstr.h b/SMPInstr.h index 0ceacf5e9326e4e5e5663918bf4b9ec693691217..637720303ecf63daa367f7922c854f937f2fbd7b 100644 --- a/SMPInstr.h +++ b/SMPInstr.h @@ -189,12 +189,14 @@ public: inline bool IsLastInBlock(void) const { return BlockTerm; }; // does terminate its block inline bool IsJumpTarget(void) { return JumpTarget; }; bool IsBranchToFarChunk(void); // instr jumps outside current chunk + inline bool MDIsInterruptCall(void) const { return Interrupt; }; bool MDIsNop(void) const; // instruction is simple or complex no-op bool MDIsPushInstr(void) const; bool MDIsPopInstr(void) const; bool MDIsReturnInstr(void) const; bool MDIsEnterInstr(void) const; bool MDIsLeaveInstr(void) const; + bool MDIsConditionalMoveInstr(void) const; bool MDIsStackPointerCopy(bool UseFP); // copies ESP or EBP to register bool MDIsFrameAllocInstr(void); bool MDIsFrameDeallocInstr(bool UseFP, asize_t LocSize); @@ -233,6 +235,7 @@ private: // and DEF and USE lists? bool JumpTarget; // Is Instr the target of any jumps or branches? bool BlockTerm; // This instruction terminates a basic block. + bool Interrupt; // Instruction is a software interrupt call. char DeadRegsString[MAXSTR]; // Registers that are dead at this instruction bool DefsFlags; // Instr DEFs the flags bool UsesFlags; // Instr USEs the flags