diff --git a/SMPBasicBlock.cpp b/SMPBasicBlock.cpp index 09813c9ecf36f522812879abc400d3a883792c9d..f56b1b5466320416bf9e0ee578f613f719081470 100644 --- a/SMPBasicBlock.cpp +++ b/SMPBasicBlock.cpp @@ -26,6 +26,9 @@ #include "SMPDataFlowAnalysis.h" #include "SMPBasicBlock.h" #include "SMPInstr.h" +#include "SMPFunction.h" + +#define SMP_DEBUG_DATAFLOW 0 // Basic block number 0 is the top of the CFG lattice. #define SMP_TOP_BLOCK 0 @@ -35,12 +38,13 @@ // ***************************************************************** // Constructor -SMPBasicBlock::SMPBasicBlock(list<SMPInstr>::iterator First, list<SMPInstr>::iterator Last) { +SMPBasicBlock::SMPBasicBlock(SMPFunction *Func, list<SMPInstr>::iterator First, list<SMPInstr>::iterator Last) { this->IndirectJump = false; this->Returns = false; this->SharedTailChunk = false; this->BlockNum = SMP_BLOCKNUM_UNINIT; this->FirstAddr = First->GetAddr(); + this->MyFunc = Func; list<SMPInstr>::iterator CurrInst = First; while (CurrInst != Last) { this->Instrs.push_back(CurrInst); @@ -162,6 +166,9 @@ void SMPBasicBlock::Dump(void) { PhiIter->Dump(); } + msg("DEF-USE chains for block-local names: \n"); + this->LocalDUChains.Dump(); + if (this->IndirectJump) msg("Has indirect jump. "); if (this->Returns) @@ -431,6 +438,187 @@ void SMPBasicBlock::AddToDomFrontier(int block) { return; } // end of SMPBasicBlock::AddToDomFrontier() +// Fill the set of non-global names used in this block. Set local indices that can be +// used later to index into a vector of def-use chains. +void SMPBasicBlock::SetLocalNames(void) { + list<list<SMPInstr>::iterator>::iterator InstIter; + size_t LocalIndex = 0; +#if SMP_DEBUG_DATAFLOW + msg("Entered SetLocalNames.\n"); +#endif + for (InstIter = this->Instrs.begin(); InstIter != this->Instrs.end(); ++InstIter) { + size_t DefIndex; + for (DefIndex = 0; DefIndex < (*InstIter)->NumDefs(); ++DefIndex) { + op_t DefOp = (*InstIter)->GetDef(DefIndex).GetOp(); + if (!(this->MyFunc->IsGlobalName(DefOp))) { + // Not in global names set + if (this->LocalNames.end() == this->LocalNames.find(DefOp)) { + // Not yet in LocalNames, so add it. + SetGlobalIndex(&DefOp, LocalIndex); + this->LocalNames.insert(DefOp); + ++LocalIndex; + } + } + } + } + return; +} // end of SMPBasicBlock::SetLocalNames() + +// Create local DEF-USE chains and renumber all references to all names in LocalNames. +void SMPBasicBlock::SSALocalRenumber(void) { + bool DebugFlag = (0 == strcmp(".init_proc", this->MyFunc->GetFuncName())); + size_t NumLocals = this->LocalNames.size(); + size_t LocalIndex; + vector<int> SSAIndex; + // Initialize SSAIndex and DUChain values for each local name. + set<op_t, LessOp>::iterator NameIter; + if (DebugFlag) + msg("LocalNames size: %d\n", NumLocals); + this->LocalDUChains.ChainsByName.clear(); + this->LocalDUChains.ChainsByName.reserve(NumLocals); + op_t TempOp; + TempOp.type = o_void; + SMPDUChainArray Temp(TempOp); + if (DebugFlag) + msg("Initializing ChainsByName.\n"); + this->LocalDUChains.ChainsByName.assign(NumLocals, Temp); + for (NameIter = this->LocalNames.begin(); NameIter != this->LocalNames.end(); ++NameIter) { + LocalIndex = (size_t) ExtractGlobalIndex(*NameIter); + SSAIndex.push_back(-1); // init SSA indices to -1; first DEF will make it 0 + // Set the local name for each DU chain array. + if (LocalIndex > this->LocalDUChains.ChainsByName.size()) + msg("LocalIndex %d out of bounds in SSALocalRenumber.\n", LocalIndex); + if (DebugFlag) + msg("Setting name for LocalIndex = %d\n", LocalIndex); + this->LocalDUChains.ChainsByName.at(LocalIndex).SetName(*NameIter); + } + + // Iterate through all instructions in the block. For each DEF of a local name, + // set a new SSA index and start a new DU chain. For each USE of a local name, + // set the current SSA index for that name and add the instruction to the DU + // chain. + list<list<SMPInstr>::iterator>::iterator InstIter; + for (InstIter = this->Instrs.begin(); InstIter != this->Instrs.end(); ++InstIter) { + size_t UseIndex; + size_t NameIndex; + // USEs get referenced logically before DEFs, so start with them. + for (UseIndex = 0; UseIndex < (*InstIter)->NumUses(); ++UseIndex) { + op_t UseOp = (*InstIter)->GetUse(UseIndex).GetOp(); + set<op_t, LessOp>::iterator UseNameIter = this->LocalNames.find(UseOp); + if (UseNameIter != this->LocalNames.end()) { + // Found USE of a local name. + NameIndex = ExtractGlobalIndex(*UseNameIter); + if (NameIndex > SSAIndex.size()) + msg("NameIndex %d out of range in SSALocalRenumber.\n", NameIndex); + int SSANum = SSAIndex.at(NameIndex); + // Update the SSA subscript in the DEF-USE list for the instruction. + (*InstIter)->SetUseSSA(UseIndex, SSANum); + if (SSANum >= 0) { // skip USE before DEF names + if ((size_t) SSANum > this->LocalDUChains.ChainsByName.at(NameIndex).DUChains.size()) + msg("SSANum %d out of range in SSALocalRenumber.\n", SSANum); + // Push address of USE instruction onto the DEF-USE chain for this SSANum. + this->LocalDUChains.ChainsByName.at(NameIndex).DUChains.at(SSANum).PushUse((*InstIter)->GetAddr()); + } + } + } // end for all USEs in the instruction. + // Now do the DEFs. + size_t DefIndex; + for (DefIndex = 0; DefIndex < (*InstIter)->NumDefs(); ++DefIndex) { + op_t DefOp = (*InstIter)->GetDef(DefIndex).GetOp(); + set<op_t, LessOp>::iterator DefNameIter = this->LocalNames.find(DefOp); + if (DefNameIter != this->LocalNames.end()) { + // Found DEF of a local name. + NameIndex = ExtractGlobalIndex(*DefNameIter); + if (NameIndex > SSAIndex.size()) + msg("DEF NameIndex %d out of range in SSALocalRenumber.\n", NameIndex); + ++SSAIndex[NameIndex]; // move up to next SSA subscript number + int SSANum = SSAIndex.at(NameIndex); + // Put new SSA number into DEF list entry in the instruction. + (*InstIter)->SetDefSSA(DefIndex, SSANum); + // Before the push_back() call, we should have # chains equal to new SSANum + assert(SSANum == this->LocalDUChains.ChainsByName.at(NameIndex).DUChains.size()); + SetGlobalIndex(&DefOp, NameIndex); + // Set up a new DU chain and push it onto the vector of DU chains for this + // local name. Push the DEF onto the list (it will be the first reference + // on the list, because the list was newly created). + SMPDefUseChain TempDefChain(DefOp, (*InstIter)->GetAddr()); + this->LocalDUChains.ChainsByName.at(NameIndex).DUChains.push_back(TempDefChain); + } + } // end for all DEFs in the instruction + } // end for all instructions in the block + return; +} // end of SMPBasicBlock::SSALocalRenumber() + +// If no DEF-USE chains overlap the instrumentation point for InstAddr (which logically +// falls between the USEs of the instruction at InstAddr and its DEFs), for register RegIndex, +// then it is safe for the instrumentation to use that register without saving and +// restoring its value. Return true in this case, false if there is DEF-USE overlap. +bool SMPBasicBlock::IsRegDead(ea_t InstAddr, unsigned int RegIndex) const { + size_t SSAIndex; + // See if any DEF-USE chains overlap InstAddr's USEs. + for (SSAIndex = 0; SSAIndex < this->LocalDUChains.ChainsByName.at(RegIndex).DUChains.size(); ++SSAIndex) { + ea_t StartAddr = this->LocalDUChains.ChainsByName.at(RegIndex).DUChains.at(SSAIndex).GetDef(); + if (StartAddr >= InstAddr) + continue; // cannot overlap USE if DEF is after InstAddr + ea_t LastAddr = this->LocalDUChains.ChainsByName.at(RegIndex).DUChains.at(SSAIndex).GetLastUse(); + if (LastAddr >= InstAddr) + return false; + } + return true; // no DEF-USE chains overlapped the instrumentation point +} // end of SMPBasicBlock::IsRegDead() + +// Mark the registers that are dead for each instruction in the block. +void SMPBasicBlock::MarkDeadRegs(void) { + // **!!** We will limit ourselves to local names for now. Most of the dead register + // optimization benefit comes from the EFLAGS register, which is usually not a + // global name. + set<op_t, LessOp>::iterator NameIter; + list<list<SMPInstr>::iterator>::iterator InstIter; + op_t FlagsOp; + FlagsOp.type = o_reg; + FlagsOp.reg = X86_FLAGS_REG; + char DeadString[MAXSTR]; + for (InstIter = this->Instrs.begin(); InstIter != this->Instrs.end(); ++InstIter) { + DeadString[0] = '\0'; + // First, put EFLAGS at beginning of string if it is dead. + NameIter = this->LocalNames.find(FlagsOp); + if (NameIter != this->LocalNames.end()) { + // EFLAGS is in the LocalNames + unsigned int RegIndex = ExtractGlobalIndex(*NameIter); + if (this->IsRegDead((*InstIter)->GetAddr(), RegIndex)) { + qstrncat(DeadString, " EFLAGS", sizeof(DeadString) - 1); + } + } + // Now, process all other local regs and skip EFLAGS. + for (NameIter = this->LocalNames.begin(); NameIter != this->LocalNames.end(); ++NameIter) { + if (NameIter->type != o_reg) + continue; + // We never want to consider ESP to be available for instrumentation. + if (NameIter->is_reg(R_sp)) + continue; + // Until we analyze interprocedural register use, it is safest to not + // use EBP for instrumentation. This could change with more analysis. + if (NameIter->is_reg(R_bp)) + continue; + + unsigned int RegIndex = ExtractGlobalIndex(*NameIter); + int RegNum = NameIter->reg; + if (RegNum == X86_FLAGS_REG) { + // We moved EFLAGS to the beginning, per annotations spec + continue; + } + if (this->IsRegDead((*InstIter)->GetAddr(), RegIndex)) { + qstrncat(DeadString, " ", sizeof(DeadString) - 1); + qstrncat(DeadString, RegNames[RegNum], sizeof(DeadString) - 1); + } + } // end for all local names + if (strlen(DeadString) > 1) { + (*InstIter)->SetDeadRegs(DeadString); + } + } // end for all instructions + return; +} // end of SMPBasicBlock::MarkDeadRegs() + // erase() block starting at FirstAddr from Preds list void SMPBasicBlock::ErasePred(ea_t FirstAddr) { list<list<SMPBasicBlock>::iterator>::iterator PredIter; @@ -454,7 +642,7 @@ bool SMPBasicBlock::ErasePhi(op_t OldOp) { return false; // did not find, cannot erase this->PhiFunctions.erase(PhiIter); return true; -} +} // end of SMPBasicBlock::ErasePhi() // Add a phi function to the list of phi functions entering this block. // If phi function for this global name already existed in the block, diff --git a/SMPBasicBlock.h b/SMPBasicBlock.h index aec611178157c54afb4b45e0e32a1f56cf70412a..e6ee3d017c89ff4158944bb99b8940f806f8876d 100644 --- a/SMPBasicBlock.h +++ b/SMPBasicBlock.h @@ -29,7 +29,7 @@ using namespace std; class SMPBasicBlock { public: // Constructors - SMPBasicBlock(list<SMPInstr>::iterator FirstInstr, list<SMPInstr>::iterator LastInstr); + SMPBasicBlock(SMPFunction *Func, list<SMPInstr>::iterator FirstInstr, list<SMPInstr>::iterator LastInstr); // Operators int operator==(const SMPBasicBlock &rhs) const; // Get methods @@ -88,10 +88,15 @@ public: void InitKilledExposed(void); // Initialize KilledSet and UpExposedSet bool UpdateLiveOut(void); // Iterate once on updating LiveOutSet; return true if changed void AddToDomFrontier(int); // Add RPO block number to DomFrontier set. + void SetLocalNames(void); // Fille the LocalNames member set + void SSALocalRenumber(void); // Renumber references to local names + bool IsRegDead(ea_t InstAddr, unsigned int RegIndex) const; // Is local reg dead at InstAddr? + void MarkDeadRegs(void); // Find dead registers for each mmStrata-instrumented instruction private: // Data ea_t FirstAddr; int BlockNum; // Number for block ordering algorithms + SMPFunction *MyFunc; // function containing this block list<list<SMPInstr>::iterator> Instrs; list<list<SMPBasicBlock>::iterator> Predecessors; list<list<SMPBasicBlock>::iterator> Successors; @@ -103,6 +108,8 @@ private: // SSA data structures set<int> DomFrontier; // Dominance frontier for block, as set of RPO block numbers set<SMPPhiFunction, LessPhi> PhiFunctions; // SSA incoming edge phi functions + set<op_t, LessOp> LocalNames; // non-global names referenced in this block + SMPCompleteDUChains LocalDUChains; // def-use chains for local names // cached query results bool IndirectJump; // contains an indirect jump instruction bool Returns; // contains a return instruction diff --git a/SMPDataFlowAnalysis.cpp b/SMPDataFlowAnalysis.cpp index 48821266a06393c73011d4032c76cd68d8c2975e..4b4c3d45a27c47aeeb1cfa2828a33d7884a22665 100644 --- a/SMPDataFlowAnalysis.cpp +++ b/SMPDataFlowAnalysis.cpp @@ -52,6 +52,10 @@ char *RegNames[R_of + 1] = // Define instruction categories for data flow analysis. SMPitype DFACategory[NN_last+1]; +// Define which instructions define and use the CPU flags. +bool SMPDefsFlags[NN_last + 1]; +bool SMPUsesFlags[NN_last + 1]; + // We need to make subword registers equal to their containing registers when we // do comparisons, so that we will realize that register EAX is killed by a prior DEF // of register AL, for example, and vice versa. To keep sets ordered strictly, @@ -93,6 +97,13 @@ unsigned int ExtractGlobalIndex(op_t GlobalOp) { return index; } +void SetGlobalIndex(op_t *TempOp, size_t index) { + TempOp->n = (char) (index & 0x000000ff); + TempOp->offb = (char) ((index & 0x0000ff00) >> 8); + TempOp->offo = (char) ((index & 0x00ff0000) >> 16); + return; +} + // DEBUG Print DEF and/or USE for an operand. void PrintDefUse(ulong feature, int OpNum) { // CF_ macros number the operands from 1 to 6, while OpNum @@ -439,6 +450,97 @@ void SMPPhiFunction::Dump(void) const { return; } +// ***************************************************************** +// Class SMPDefUseChain +// ***************************************************************** + +// Constructors +SMPDefUseChain::SMPDefUseChain(void) { + this->SSAName.type = o_void; + this->RefInstrs.push_back(BADADDR); + return; +} + +SMPDefUseChain::SMPDefUseChain(op_t Name, ea_t Def) { + this->SSAName = Name; + this->RefInstrs.push_back(Def); + return; +} + +// Set the variable name. +void SMPDefUseChain::SetName(op_t Name) { + this->SSAName = Name; + return; +} + +// Set the DEF instruction. +void SMPDefUseChain::SetDef(ea_t Def) { + this->RefInstrs[0] = Def; + return; +} + +// Push a USE onto the list +void SMPDefUseChain::PushUse(ea_t Use) { + this->RefInstrs.push_back(Use); + return; +} + +// DEBUG dump. +void SMPDefUseChain::Dump(int SSANum) { + msg("DEF-USE chain for: "); + PrintListOperand(this->SSAName, SSANum); + if (this->RefInstrs.size() < 1) { + msg(" no references.\n"); + return; + } + msg("\n DEF: %x USEs: ", this->RefInstrs.at(0)); + size_t index; + for (index = 1; index < this->RefInstrs.size(); ++index) + msg("%x ", this->RefInstrs.at(index)); + msg("\n"); + return; +} // end of SMPDefUseChain::Dump() + +// ***************************************************************** +// Class SMPDUChainArray +// ***************************************************************** +SMPDUChainArray::SMPDUChainArray(void) { + this->SSAName.type = o_void; + return; +} + +SMPDUChainArray::SMPDUChainArray(op_t Name) { + this->SSAName = Name; + return; +} + +void SMPDUChainArray::SetName(op_t Name) { + this->SSAName = Name; + return; +} + +// DEBUG dump. +void SMPDUChainArray::Dump(void) { + size_t index; + for (index = 0; index < this->DUChains.size(); ++index) { + this->DUChains.at(index).Dump((int) index); + } + return; +} + +// ***************************************************************** +// Class SMPCompleteDUChains +// ***************************************************************** + +// DEBUG dump. +void SMPCompleteDUChains::Dump(void) { + size_t index; + for (index = 0; index < this->ChainsByName.size(); ++index) { + this->ChainsByName.at(index).Dump(); + } + return; +} // end of SMPCompleteDUChains::Dump() + // Initialize the DFACategory[] array to define instruction classes // for the purposes of data flow analysis. void InitDFACategory(void) { @@ -535,3 +637,836 @@ DFACategory[NN_vmcall] = INDIR_CALL; // Call to VM Monitor return; } // end InitDFACategory() + +// Initialize the SMPDefsFlags[] array to define how we emit +// optimizing annotations. +void InitSMPDefsFlags(void) { + // Default value is true. Many instructions set the flags. + (void) memset(SMPDefsFlags, true, sizeof(SMPDefsFlags)); + +SMPDefsFlags[NN_null] = false; // Unknown Operation +SMPDefsFlags[NN_bound] = false; // Check Array Index Against Bounds +SMPDefsFlags[NN_call] = false; // Call Procedure +SMPDefsFlags[NN_callfi] = false; // Indirect Call Far Procedure +SMPDefsFlags[NN_callni] = false; // Indirect Call Near Procedure +SMPDefsFlags[NN_cbw] = false; // AL -> AX (with sign) +SMPDefsFlags[NN_cwde] = false; // AX -> EAX (with sign) +SMPDefsFlags[NN_cdqe] = false; // EAX -> RAX (with sign) +SMPDefsFlags[NN_clts] = false; // Clear Task-Switched Flag in CR0 +SMPDefsFlags[NN_cwd] = false; // AX -> DX:AX (with sign) +SMPDefsFlags[NN_cdq] = false; // EAX -> EDX:EAX (with sign) +SMPDefsFlags[NN_cqo] = false; // RAX -> RDX:RAX (with sign) +SMPDefsFlags[NN_enterw] = false; // Make Stack Frame for Procedure Parameters +SMPDefsFlags[NN_enter] = false; // Make Stack Frame for Procedure Parameters +SMPDefsFlags[NN_enterd] = false; // Make Stack Frame for Procedure Parameters +SMPDefsFlags[NN_enterq] = false; // Make Stack Frame for Procedure Parameters +SMPDefsFlags[NN_hlt] = false; // Halt +SMPDefsFlags[NN_in] = false; // Input from Port +SMPDefsFlags[NN_ins] = false; // Input Byte(s) from Port to String +SMPDefsFlags[NN_iretw] = false; // Interrupt Return +SMPDefsFlags[NN_iret] = false; // Interrupt Return +SMPDefsFlags[NN_iretd] = false; // Interrupt Return (use32) +SMPDefsFlags[NN_iretq] = false; // Interrupt Return (use64) +SMPDefsFlags[NN_ja] = false; // Jump if Above (CF=0 & ZF=0) +SMPDefsFlags[NN_jae] = false; // Jump if Above or Equal (CF=0) +SMPDefsFlags[NN_jb] = false; // Jump if Below (CF=1) +SMPDefsFlags[NN_jbe] = false; // Jump if Below or Equal (CF=1 | ZF=1) +SMPDefsFlags[NN_jc] = false; // Jump if Carry (CF=1) +SMPDefsFlags[NN_jcxz] = false; // Jump if CX is 0 +SMPDefsFlags[NN_jecxz] = false; // Jump if ECX is 0 +SMPDefsFlags[NN_jrcxz] = false; // Jump if RCX is 0 +SMPDefsFlags[NN_je] = false; // Jump if Equal (ZF=1) +SMPDefsFlags[NN_jg] = false; // Jump if Greater (ZF=0 & SF=OF) +SMPDefsFlags[NN_jge] = false; // Jump if Greater or Equal (SF=OF) +SMPDefsFlags[NN_jl] = false; // Jump if Less (SF!=OF) +SMPDefsFlags[NN_jle] = false; // Jump if Less or Equal (ZF=1 | SF!=OF) +SMPDefsFlags[NN_jna] = false; // Jump if Not Above (CF=1 | ZF=1) +SMPDefsFlags[NN_jnae] = false; // Jump if Not Above or Equal (CF=1) +SMPDefsFlags[NN_jnb] = false; // Jump if Not Below (CF=0) +SMPDefsFlags[NN_jnbe] = false; // Jump if Not Below or Equal (CF=0 & ZF=0) +SMPDefsFlags[NN_jnc] = false; // Jump if Not Carry (CF=0) +SMPDefsFlags[NN_jne] = false; // Jump if Not Equal (ZF=0) +SMPDefsFlags[NN_jng] = false; // Jump if Not Greater (ZF=1 | SF!=OF) +SMPDefsFlags[NN_jnge] = false; // Jump if Not Greater or Equal (ZF=1) +SMPDefsFlags[NN_jnl] = false; // Jump if Not Less (SF=OF) +SMPDefsFlags[NN_jnle] = false; // Jump if Not Less or Equal (ZF=0 & SF=OF) +SMPDefsFlags[NN_jno] = false; // Jump if Not Overflow (OF=0) +SMPDefsFlags[NN_jnp] = false; // Jump if Not Parity (PF=0) +SMPDefsFlags[NN_jns] = false; // Jump if Not Sign (SF=0) +SMPDefsFlags[NN_jnz] = false; // Jump if Not Zero (ZF=0) +SMPDefsFlags[NN_jo] = false; // Jump if Overflow (OF=1) +SMPDefsFlags[NN_jp] = false; // Jump if Parity (PF=1) +SMPDefsFlags[NN_jpe] = false; // Jump if Parity Even (PF=1) +SMPDefsFlags[NN_jpo] = false; // Jump if Parity Odd (PF=0) +SMPDefsFlags[NN_js] = false; // Jump if Sign (SF=1) +SMPDefsFlags[NN_jz] = false; // Jump if Zero (ZF=1) +SMPDefsFlags[NN_jmp] = false; // Jump +SMPDefsFlags[NN_jmpfi] = false; // Indirect Far Jump +SMPDefsFlags[NN_jmpni] = false; // Indirect Near Jump +SMPDefsFlags[NN_jmpshort] = false; // Jump Short (not used) +SMPDefsFlags[NN_lahf] = false; // Load Flags into AH Register +SMPDefsFlags[NN_lea] = false; // Load Effective Address +SMPDefsFlags[NN_leavew] = false; // High Level Procedure Exit +SMPDefsFlags[NN_leave] = false; // High Level Procedure Exit +SMPDefsFlags[NN_leaved] = false; // High Level Procedure Exit +SMPDefsFlags[NN_leaveq] = false; // High Level Procedure Exit +SMPDefsFlags[NN_lgdt] = false; // Load Global Descriptor Table Register +SMPDefsFlags[NN_lidt] = false; // Load Interrupt Descriptor Table Register +SMPDefsFlags[NN_lgs] = false; // Load Full Pointer to GS:xx +SMPDefsFlags[NN_lss] = false; // Load Full Pointer to SS:xx +SMPDefsFlags[NN_lds] = false; // Load Full Pointer to DS:xx +SMPDefsFlags[NN_les] = false; // Load Full Pointer to ES:xx +SMPDefsFlags[NN_lfs] = false; // Load Full Pointer to FS:xx +SMPDefsFlags[NN_loopwe] = false; // Loop while CX != 0 and ZF=1 +SMPDefsFlags[NN_loope] = false; // Loop while rCX != 0 and ZF=1 +SMPDefsFlags[NN_loopde] = false; // Loop while ECX != 0 and ZF=1 +SMPDefsFlags[NN_loopqe] = false; // Loop while RCX != 0 and ZF=1 +SMPDefsFlags[NN_loopwne] = false; // Loop while CX != 0 and ZF=0 +SMPDefsFlags[NN_loopne] = false; // Loop while rCX != 0 and ZF=0 +SMPDefsFlags[NN_loopdne] = false; // Loop while ECX != 0 and ZF=0 +SMPDefsFlags[NN_loopqne] = false; // Loop while RCX != 0 and ZF=0 +SMPDefsFlags[NN_ltr] = false; // Load Task Register +SMPDefsFlags[NN_mov] = false; // Move Data +SMPDefsFlags[NN_movsp] = false; // Move to/from Special Registers +SMPDefsFlags[NN_movs] = false; // Move Byte(s) from String to String +SMPDefsFlags[NN_movsx] = false; // Move with Sign-Extend +SMPDefsFlags[NN_movzx] = false; // Move with Zero-Extend +SMPDefsFlags[NN_nop] = false; // No Operation +SMPDefsFlags[NN_out] = false; // Output to Port +SMPDefsFlags[NN_outs] = false; // Output Byte(s) to Port +SMPDefsFlags[NN_pop] = false; // Pop a word from the Stack +SMPDefsFlags[NN_popaw] = false; // Pop all General Registers +SMPDefsFlags[NN_popa] = false; // Pop all General Registers +SMPDefsFlags[NN_popad] = false; // Pop all General Registers (use32) +SMPDefsFlags[NN_popaq] = false; // Pop all General Registers (use64) +SMPDefsFlags[NN_push] = false; // Push Operand onto the Stack +SMPDefsFlags[NN_pushaw] = false; // Push all General Registers +SMPDefsFlags[NN_pusha] = false; // Push all General Registers +SMPDefsFlags[NN_pushad] = false; // Push all General Registers (use32) +SMPDefsFlags[NN_pushaq] = false; // Push all General Registers (use64) +SMPDefsFlags[NN_pushfw] = false; // Push Flags Register onto the Stack +SMPDefsFlags[NN_pushf] = false; // Push Flags Register onto the Stack +SMPDefsFlags[NN_pushfd] = false; // Push Flags Register onto the Stack (use32) +SMPDefsFlags[NN_pushfq] = false; // Push Flags Register onto the Stack (use64) +SMPDefsFlags[NN_rep] = false; // Repeat String Operation +SMPDefsFlags[NN_repe] = false; // Repeat String Operation while ZF=1 +SMPDefsFlags[NN_repne] = false; // Repeat String Operation while ZF=0 +SMPDefsFlags[NN_retn] = false; // Return Near from Procedure +SMPDefsFlags[NN_retf] = false; // Return Far from Procedure +SMPDefsFlags[NN_shl] = false; // Shift Logical Left +SMPDefsFlags[NN_shr] = false; // Shift Logical Right +SMPDefsFlags[NN_seta] = false; // Set Byte if Above (CF=0 & ZF=0) +SMPDefsFlags[NN_setae] = false; // Set Byte if Above or Equal (CF=0) +SMPDefsFlags[NN_setb] = false; // Set Byte if Below (CF=1) +SMPDefsFlags[NN_setbe] = false; // Set Byte if Below or Equal (CF=1 | ZF=1) +SMPDefsFlags[NN_setc] = false; // Set Byte if Carry (CF=1) +SMPDefsFlags[NN_sete] = false; // Set Byte if Equal (ZF=1) +SMPDefsFlags[NN_setg] = false; // Set Byte if Greater (ZF=0 & SF=OF) +SMPDefsFlags[NN_setge] = false; // Set Byte if Greater or Equal (SF=OF) +SMPDefsFlags[NN_setl] = false; // Set Byte if Less (SF!=OF) +SMPDefsFlags[NN_setle] = false; // Set Byte if Less or Equal (ZF=1 | SF!=OF) +SMPDefsFlags[NN_setna] = false; // Set Byte if Not Above (CF=1 | ZF=1) +SMPDefsFlags[NN_setnae] = false; // Set Byte if Not Above or Equal (CF=1) +SMPDefsFlags[NN_setnb] = false; // Set Byte if Not Below (CF=0) +SMPDefsFlags[NN_setnbe] = false; // Set Byte if Not Below or Equal (CF=0 & ZF=0) +SMPDefsFlags[NN_setnc] = false; // Set Byte if Not Carry (CF=0) +SMPDefsFlags[NN_setne] = false; // Set Byte if Not Equal (ZF=0) +SMPDefsFlags[NN_setng] = false; // Set Byte if Not Greater (ZF=1 | SF!=OF) +SMPDefsFlags[NN_setnge] = false; // Set Byte if Not Greater or Equal (ZF=1) +SMPDefsFlags[NN_setnl] = false; // Set Byte if Not Less (SF=OF) +SMPDefsFlags[NN_setnle] = false; // Set Byte if Not Less or Equal (ZF=0 & SF=OF) +SMPDefsFlags[NN_setno] = false; // Set Byte if Not Overflow (OF=0) +SMPDefsFlags[NN_setnp] = false; // Set Byte if Not Parity (PF=0) +SMPDefsFlags[NN_setns] = false; // Set Byte if Not Sign (SF=0) +SMPDefsFlags[NN_setnz] = false; // Set Byte if Not Zero (ZF=0) +SMPDefsFlags[NN_seto] = false; // Set Byte if Overflow (OF=1) +SMPDefsFlags[NN_setp] = false; // Set Byte if Parity (PF=1) +SMPDefsFlags[NN_setpe] = false; // Set Byte if Parity Even (PF=1) +SMPDefsFlags[NN_setpo] = false; // Set Byte if Parity Odd (PF=0) +SMPDefsFlags[NN_sets] = false; // Set Byte if Sign (SF=1) +SMPDefsFlags[NN_setz] = false; // Set Byte if Zero (ZF=1) +SMPDefsFlags[NN_sgdt] = false; // Store Global Descriptor Table Register +SMPDefsFlags[NN_sidt] = false; // Store Interrupt Descriptor Table Register +SMPDefsFlags[NN_sldt] = false; // Store Local Descriptor Table Register +SMPDefsFlags[NN_str] = false; // Store Task Register +SMPDefsFlags[NN_wait] = false; // Wait until BUSY# Pin is Inactive (HIGH) +SMPDefsFlags[NN_xchg] = false; // Exchange Register/Memory with Register + +// +// 486 instructions +// + +SMPDefsFlags[NN_bswap] = false; // Swap bytes in register +SMPDefsFlags[NN_invd] = false; // Invalidate Data Cache +SMPDefsFlags[NN_wbinvd] = false; // Invalidate Data Cache (write changes) +SMPDefsFlags[NN_invlpg] = false; // Invalidate TLB entry + +// +// Pentium instructions +// + +SMPDefsFlags[NN_rdmsr] = false; // Read Machine Status Register +SMPDefsFlags[NN_wrmsr] = false; // Write Machine Status Register +SMPDefsFlags[NN_cpuid] = false; // Get CPU ID +SMPDefsFlags[NN_rdtsc] = false; // Read Time Stamp Counter + +// +// Pentium Pro instructions +// + +SMPDefsFlags[NN_cmova] = false; // Move if Above (CF=0 & ZF=0) +SMPDefsFlags[NN_cmovb] = false; // Move if Below (CF=1) +SMPDefsFlags[NN_cmovbe] = false; // Move if Below or Equal (CF=1 | ZF=1) +SMPDefsFlags[NN_cmovg] = false; // Move if Greater (ZF=0 & SF=OF) +SMPDefsFlags[NN_cmovge] = false; // Move if Greater or Equal (SF=OF) +SMPDefsFlags[NN_cmovl] = false; // Move if Less (SF!=OF) +SMPDefsFlags[NN_cmovle] = false; // Move if Less or Equal (ZF=1 | SF!=OF) +SMPDefsFlags[NN_cmovnb] = false; // Move if Not Below (CF=0) +SMPDefsFlags[NN_cmovno] = false; // Move if Not Overflow (OF=0) +SMPDefsFlags[NN_cmovnp] = false; // Move if Not Parity (PF=0) +SMPDefsFlags[NN_cmovns] = false; // Move if Not Sign (SF=0) +SMPDefsFlags[NN_cmovnz] = false; // Move if Not Zero (ZF=0) +SMPDefsFlags[NN_cmovo] = false; // Move if Overflow (OF=1) +SMPDefsFlags[NN_cmovp] = false; // Move if Parity (PF=1) +SMPDefsFlags[NN_cmovs] = false; // Move if Sign (SF=1) +SMPDefsFlags[NN_cmovz] = false; // Move if Zero (ZF=1) +SMPDefsFlags[NN_fcmovb] = false; // Floating Move if Below +SMPDefsFlags[NN_fcmove] = false; // Floating Move if Equal +SMPDefsFlags[NN_fcmovbe] = false; // Floating Move if Below or Equal +SMPDefsFlags[NN_fcmovu] = false; // Floating Move if Unordered +SMPDefsFlags[NN_fcmovnb] = false; // Floating Move if Not Below +SMPDefsFlags[NN_fcmovne] = false; // Floating Move if Not Equal +SMPDefsFlags[NN_fcmovnbe] = false; // Floating Move if Not Below or Equal +SMPDefsFlags[NN_fcmovnu] = false; // Floating Move if Not Unordered +SMPDefsFlags[NN_rdpmc] = false; // Read Performance Monitor Counter + +// +// FPP instructuions +// + +SMPDefsFlags[NN_fld] = false; // Load Real +SMPDefsFlags[NN_fst] = false; // Store Real +SMPDefsFlags[NN_fstp] = false; // Store Real and Pop +SMPDefsFlags[NN_fxch] = false; // Exchange Registers +SMPDefsFlags[NN_fild] = false; // Load Integer +SMPDefsFlags[NN_fist] = false; // Store Integer +SMPDefsFlags[NN_fistp] = false; // Store Integer and Pop +SMPDefsFlags[NN_fbld] = false; // Load BCD +SMPDefsFlags[NN_fbstp] = false; // Store BCD and Pop +SMPDefsFlags[NN_fadd] = false; // Add Real +SMPDefsFlags[NN_faddp] = false; // Add Real and Pop +SMPDefsFlags[NN_fiadd] = false; // Add Integer +SMPDefsFlags[NN_fsub] = false; // Subtract Real +SMPDefsFlags[NN_fsubp] = false; // Subtract Real and Pop +SMPDefsFlags[NN_fisub] = false; // Subtract Integer +SMPDefsFlags[NN_fsubr] = false; // Subtract Real Reversed +SMPDefsFlags[NN_fsubrp] = false; // Subtract Real Reversed and Pop +SMPDefsFlags[NN_fisubr] = false; // Subtract Integer Reversed +SMPDefsFlags[NN_fmul] = false; // Multiply Real +SMPDefsFlags[NN_fmulp] = false; // Multiply Real and Pop +SMPDefsFlags[NN_fimul] = false; // Multiply Integer +SMPDefsFlags[NN_fdiv] = false; // Divide Real +SMPDefsFlags[NN_fdivp] = false; // Divide Real and Pop +SMPDefsFlags[NN_fidiv] = false; // Divide Integer +SMPDefsFlags[NN_fdivr] = false; // Divide Real Reversed +SMPDefsFlags[NN_fdivrp] = false; // Divide Real Reversed and Pop +SMPDefsFlags[NN_fidivr] = false; // Divide Integer Reversed +SMPDefsFlags[NN_fsqrt] = false; // Square Root +SMPDefsFlags[NN_fscale] = false; // Scale: st(0) <- st(0) * 2^st(1) +SMPDefsFlags[NN_fprem] = false; // Partial Remainder +SMPDefsFlags[NN_frndint] = false; // Round to Integer +SMPDefsFlags[NN_fxtract] = false; // Extract exponent and significand +SMPDefsFlags[NN_fabs] = false; // Absolute value +SMPDefsFlags[NN_fchs] = false; // Change Sign +SMPDefsFlags[NN_ficom] = false; // Compare Integer +SMPDefsFlags[NN_ficomp] = false; // Compare Integer and Pop +SMPDefsFlags[NN_ftst] = false; // Test +SMPDefsFlags[NN_fxam] = false; // Examine +SMPDefsFlags[NN_fptan] = false; // Partial tangent +SMPDefsFlags[NN_fpatan] = false; // Partial arctangent +SMPDefsFlags[NN_f2xm1] = false; // 2^x - 1 +SMPDefsFlags[NN_fyl2x] = false; // Y * lg2(X) +SMPDefsFlags[NN_fyl2xp1] = false; // Y * lg2(X+1) +SMPDefsFlags[NN_fldz] = false; // Load +0.0 +SMPDefsFlags[NN_fld1] = false; // Load +1.0 +SMPDefsFlags[NN_fldpi] = false; // Load PI=3.14... +SMPDefsFlags[NN_fldl2t] = false; // Load lg2(10) +SMPDefsFlags[NN_fldl2e] = false; // Load lg2(e) +SMPDefsFlags[NN_fldlg2] = false; // Load lg10(2) +SMPDefsFlags[NN_fldln2] = false; // Load ln(2) +SMPDefsFlags[NN_finit] = false; // Initialize Processor +SMPDefsFlags[NN_fninit] = false; // Initialize Processor (no wait) +SMPDefsFlags[NN_fsetpm] = false; // Set Protected Mode +SMPDefsFlags[NN_fldcw] = false; // Load Control Word +SMPDefsFlags[NN_fstcw] = false; // Store Control Word +SMPDefsFlags[NN_fnstcw] = false; // Store Control Word (no wait) +SMPDefsFlags[NN_fstsw] = false; // Store Status Word to memory or AX +SMPDefsFlags[NN_fnstsw] = false; // Store Status Word (no wait) to memory or AX +SMPDefsFlags[NN_fclex] = false; // Clear Exceptions +SMPDefsFlags[NN_fnclex] = false; // Clear Exceptions (no wait) +SMPDefsFlags[NN_fstenv] = false; // Store Environment +SMPDefsFlags[NN_fnstenv] = false; // Store Environment (no wait) +SMPDefsFlags[NN_fldenv] = false; // Load Environment +SMPDefsFlags[NN_fsave] = false; // Save State +SMPDefsFlags[NN_fnsave] = false; // Save State (no wait) +SMPDefsFlags[NN_frstor] = false; // Restore State +SMPDefsFlags[NN_fincstp] = false; // Increment Stack Pointer +SMPDefsFlags[NN_fdecstp] = false; // Decrement Stack Pointer +SMPDefsFlags[NN_ffree] = false; // Free Register +SMPDefsFlags[NN_fnop] = false; // No Operation +SMPDefsFlags[NN_feni] = false; // (8087 only) +SMPDefsFlags[NN_fneni] = false; // (no wait) (8087 only) +SMPDefsFlags[NN_fdisi] = false; // (8087 only) +SMPDefsFlags[NN_fndisi] = false; // (no wait) (8087 only) + +// +// 80387 instructions +// + +SMPDefsFlags[NN_fprem1] = false; // Partial Remainder ( < half ) +SMPDefsFlags[NN_fsincos] = false; // t<-cos(st); st<-sin(st); push t +SMPDefsFlags[NN_fsin] = false; // Sine +SMPDefsFlags[NN_fcos] = false; // Cosine +SMPDefsFlags[NN_fucom] = false; // Compare Unordered Real +SMPDefsFlags[NN_fucomp] = false; // Compare Unordered Real and Pop +SMPDefsFlags[NN_fucompp] = false; // Compare Unordered Real and Pop Twice + +// +// Instructions added 28.02.96 +// + +SMPDefsFlags[NN_svdc] = false; // Save Register and Descriptor +SMPDefsFlags[NN_rsdc] = false; // Restore Register and Descriptor +SMPDefsFlags[NN_svldt] = false; // Save LDTR and Descriptor +SMPDefsFlags[NN_rsldt] = false; // Restore LDTR and Descriptor +SMPDefsFlags[NN_svts] = false; // Save TR and Descriptor +SMPDefsFlags[NN_rsts] = false; // Restore TR and Descriptor +SMPDefsFlags[NN_icebp] = false; // ICE Break Point + +// +// MMX instructions +// + +SMPDefsFlags[NN_emms] = false; // Empty MMX state +SMPDefsFlags[NN_movd] = false; // Move 32 bits +SMPDefsFlags[NN_movq] = false; // Move 64 bits +SMPDefsFlags[NN_packsswb] = false; // Pack with Signed Saturation (Word->Byte) +SMPDefsFlags[NN_packssdw] = false; // Pack with Signed Saturation (Dword->Word) +SMPDefsFlags[NN_packuswb] = false; // Pack with Unsigned Saturation (Word->Byte) +SMPDefsFlags[NN_paddb] = false; // Packed Add Byte +SMPDefsFlags[NN_paddw] = false; // Packed Add Word +SMPDefsFlags[NN_paddd] = false; // Packed Add Dword +SMPDefsFlags[NN_paddsb] = false; // Packed Add with Saturation (Byte) +SMPDefsFlags[NN_paddsw] = false; // Packed Add with Saturation (Word) +SMPDefsFlags[NN_paddusb] = false; // Packed Add Unsigned with Saturation (Byte) +SMPDefsFlags[NN_paddusw] = false; // Packed Add Unsigned with Saturation (Word) +SMPDefsFlags[NN_pand] = false; // Bitwise Logical And +SMPDefsFlags[NN_pandn] = false; // Bitwise Logical And Not +SMPDefsFlags[NN_pcmpeqb] = false; // Packed Compare for Equal (Byte) +SMPDefsFlags[NN_pcmpeqw] = false; // Packed Compare for Equal (Word) +SMPDefsFlags[NN_pcmpeqd] = false; // Packed Compare for Equal (Dword) +SMPDefsFlags[NN_pcmpgtb] = false; // Packed Compare for Greater Than (Byte) +SMPDefsFlags[NN_pcmpgtw] = false; // Packed Compare for Greater Than (Word) +SMPDefsFlags[NN_pcmpgtd] = false; // Packed Compare for Greater Than (Dword) +SMPDefsFlags[NN_pmaddwd] = false; // Packed Multiply and Add +SMPDefsFlags[NN_pmulhw] = false; // Packed Multiply High +SMPDefsFlags[NN_pmullw] = false; // Packed Multiply Low +SMPDefsFlags[NN_por] = false; // Bitwise Logical Or +SMPDefsFlags[NN_psllw] = false; // Packed Shift Left Logical (Word) +SMPDefsFlags[NN_pslld] = false; // Packed Shift Left Logical (Dword) +SMPDefsFlags[NN_psllq] = false; // Packed Shift Left Logical (Qword) +SMPDefsFlags[NN_psraw] = false; // Packed Shift Right Arithmetic (Word) +SMPDefsFlags[NN_psrad] = false; // Packed Shift Right Arithmetic (Dword) +SMPDefsFlags[NN_psrlw] = false; // Packed Shift Right Logical (Word) +SMPDefsFlags[NN_psrld] = false; // Packed Shift Right Logical (Dword) +SMPDefsFlags[NN_psrlq] = false; // Packed Shift Right Logical (Qword) +SMPDefsFlags[NN_psubb] = false; // Packed Subtract Byte +SMPDefsFlags[NN_psubw] = false; // Packed Subtract Word +SMPDefsFlags[NN_psubd] = false; // Packed Subtract Dword +SMPDefsFlags[NN_psubsb] = false; // Packed Subtract with Saturation (Byte) +SMPDefsFlags[NN_psubsw] = false; // Packed Subtract with Saturation (Word) +SMPDefsFlags[NN_psubusb] = false; // Packed Subtract Unsigned with Saturation (Byte) +SMPDefsFlags[NN_psubusw] = false; // Packed Subtract Unsigned with Saturation (Word) +SMPDefsFlags[NN_punpckhbw] = false; // Unpack High Packed Data (Byte->Word) +SMPDefsFlags[NN_punpckhwd] = false; // Unpack High Packed Data (Word->Dword) +SMPDefsFlags[NN_punpckhdq] = false; // Unpack High Packed Data (Dword->Qword) +SMPDefsFlags[NN_punpcklbw] = false; // Unpack Low Packed Data (Byte->Word) +SMPDefsFlags[NN_punpcklwd] = false; // Unpack Low Packed Data (Word->Dword) +SMPDefsFlags[NN_punpckldq] = false; // Unpack Low Packed Data (Dword->Qword) +SMPDefsFlags[NN_pxor] = false; // Bitwise Logical Exclusive Or + +// +// Undocumented Deschutes processor instructions +// + +SMPDefsFlags[NN_fxsave] = false; // Fast save FP context +SMPDefsFlags[NN_fxrstor] = false; // Fast restore FP context + +// Pentium II instructions + +SMPDefsFlags[NN_sysexit] = false; // Fast Transition from System Call Entry Point + +// 3DNow! instructions + +SMPDefsFlags[NN_pavgusb] = false; // Packed 8-bit Unsigned Integer Averaging +SMPDefsFlags[NN_pfadd] = false; // Packed Floating-Point Addition +SMPDefsFlags[NN_pfsub] = false; // Packed Floating-Point Subtraction +SMPDefsFlags[NN_pfsubr] = false; // Packed Floating-Point Reverse Subtraction +SMPDefsFlags[NN_pfacc] = false; // Packed Floating-Point Accumulate +SMPDefsFlags[NN_pfcmpge] = false; // Packed Floating-Point Comparison, Greater or Equal +SMPDefsFlags[NN_pfcmpgt] = false; // Packed Floating-Point Comparison, Greater +SMPDefsFlags[NN_pfcmpeq] = false; // Packed Floating-Point Comparison, Equal +SMPDefsFlags[NN_pfmin] = false; // Packed Floating-Point Minimum +SMPDefsFlags[NN_pfmax] = false; // Packed Floating-Point Maximum +SMPDefsFlags[NN_pi2fd] = false; // Packed 32-bit Integer to Floating-Point +SMPDefsFlags[NN_pf2id] = false; // Packed Floating-Point to 32-bit Integer +SMPDefsFlags[NN_pfrcp] = false; // Packed Floating-Point Reciprocal Approximation +SMPDefsFlags[NN_pfrsqrt] = false; // Packed Floating-Point Reciprocal Square Root Approximation +SMPDefsFlags[NN_pfmul] = false; // Packed Floating-Point Multiplication +SMPDefsFlags[NN_pfrcpit1] = false; // Packed Floating-Point Reciprocal First Iteration Step +SMPDefsFlags[NN_pfrsqit1] = false; // Packed Floating-Point Reciprocal Square Root First Iteration Step +SMPDefsFlags[NN_pfrcpit2] = false; // Packed Floating-Point Reciprocal Second Iteration Step +SMPDefsFlags[NN_pmulhrw] = false; // Packed Floating-Point 16-bit Integer Multiply with rounding +SMPDefsFlags[NN_femms] = false; // Faster entry/exit of the MMX or floating-point state +SMPDefsFlags[NN_prefetch] = false; // Prefetch at least a 32-byte line into L1 data cache +SMPDefsFlags[NN_prefetchw] = false; // Prefetch processor cache line into L1 data cache (mark as modified) + + +// Pentium III instructions + +SMPDefsFlags[NN_addps] = false; // Packed Single-FP Add +SMPDefsFlags[NN_addss] = false; // Scalar Single-FP Add +SMPDefsFlags[NN_andnps] = false; // Bitwise Logical And Not for Single-FP +SMPDefsFlags[NN_andps] = false; // Bitwise Logical And for Single-FP +SMPDefsFlags[NN_cmpps] = false; // Packed Single-FP Compare +SMPDefsFlags[NN_cmpss] = false; // Scalar Single-FP Compare +SMPDefsFlags[NN_cvtpi2ps] = false; // Packed signed INT32 to Packed Single-FP conversion +SMPDefsFlags[NN_cvtps2pi] = false; // Packed Single-FP to Packed INT32 conversion +SMPDefsFlags[NN_cvtsi2ss] = false; // Scalar signed INT32 to Single-FP conversion +SMPDefsFlags[NN_cvtss2si] = false; // Scalar Single-FP to signed INT32 conversion +SMPDefsFlags[NN_cvttps2pi] = false; // Packed Single-FP to Packed INT32 conversion (truncate) +SMPDefsFlags[NN_cvttss2si] = false; // Scalar Single-FP to signed INT32 conversion (truncate) +SMPDefsFlags[NN_divps] = false; // Packed Single-FP Divide +SMPDefsFlags[NN_divss] = false; // Scalar Single-FP Divide +SMPDefsFlags[NN_ldmxcsr] = false; // Load Streaming SIMD Extensions Technology Control/Status Register +SMPDefsFlags[NN_maxps] = false; // Packed Single-FP Maximum +SMPDefsFlags[NN_maxss] = false; // Scalar Single-FP Maximum +SMPDefsFlags[NN_minps] = false; // Packed Single-FP Minimum +SMPDefsFlags[NN_minss] = false; // Scalar Single-FP Minimum +SMPDefsFlags[NN_movaps] = false; // Move Aligned Four Packed Single-FP +SMPDefsFlags[NN_movhlps] = false; // Move High to Low Packed Single-FP +SMPDefsFlags[NN_movhps] = false; // Move High Packed Single-FP +SMPDefsFlags[NN_movlhps] = false; // Move Low to High Packed Single-FP +SMPDefsFlags[NN_movlps] = false; // Move Low Packed Single-FP +SMPDefsFlags[NN_movmskps] = false; // Move Mask to Register +SMPDefsFlags[NN_movss] = false; // Move Scalar Single-FP +SMPDefsFlags[NN_movups] = false; // Move Unaligned Four Packed Single-FP +SMPDefsFlags[NN_mulps] = false; // Packed Single-FP Multiply +SMPDefsFlags[NN_mulss] = false; // Scalar Single-FP Multiply +SMPDefsFlags[NN_orps] = false; // Bitwise Logical OR for Single-FP Data +SMPDefsFlags[NN_rcpps] = false; // Packed Single-FP Reciprocal +SMPDefsFlags[NN_rcpss] = false; // Scalar Single-FP Reciprocal +SMPDefsFlags[NN_rsqrtps] = false; // Packed Single-FP Square Root Reciprocal +SMPDefsFlags[NN_rsqrtss] = false; // Scalar Single-FP Square Root Reciprocal +SMPDefsFlags[NN_shufps] = false; // Shuffle Single-FP +SMPDefsFlags[NN_sqrtps] = false; // Packed Single-FP Square Root +SMPDefsFlags[NN_sqrtss] = false; // Scalar Single-FP Square Root +SMPDefsFlags[NN_stmxcsr] = false; // Store Streaming SIMD Extensions Technology Control/Status Register +SMPDefsFlags[NN_subps] = false; // Packed Single-FP Subtract +SMPDefsFlags[NN_subss] = false; // Scalar Single-FP Subtract +SMPDefsFlags[NN_unpckhps] = false; // Unpack High Packed Single-FP Data +SMPDefsFlags[NN_unpcklps] = false; // Unpack Low Packed Single-FP Data +SMPDefsFlags[NN_xorps] = false; // Bitwise Logical XOR for Single-FP Data +SMPDefsFlags[NN_pavgb] = false; // Packed Average (Byte) +SMPDefsFlags[NN_pavgw] = false; // Packed Average (Word) +SMPDefsFlags[NN_pextrw] = false; // Extract Word +SMPDefsFlags[NN_pinsrw] = false; // Insert Word +SMPDefsFlags[NN_pmaxsw] = false; // Packed Signed Integer Word Maximum +SMPDefsFlags[NN_pmaxub] = false; // Packed Unsigned Integer Byte Maximum +SMPDefsFlags[NN_pminsw] = false; // Packed Signed Integer Word Minimum +SMPDefsFlags[NN_pminub] = false; // Packed Unsigned Integer Byte Minimum +SMPDefsFlags[NN_pmovmskb] = false; // Move Byte Mask to Integer +SMPDefsFlags[NN_pmulhuw] = false; // Packed Multiply High Unsigned +SMPDefsFlags[NN_psadbw] = false; // Packed Sum of Absolute Differences +SMPDefsFlags[NN_pshufw] = false; // Packed Shuffle Word +SMPDefsFlags[NN_maskmovq] = false; // Byte Mask write +SMPDefsFlags[NN_movntps] = false; // Move Aligned Four Packed Single-FP Non Temporal +SMPDefsFlags[NN_movntq] = false; // Move 64 Bits Non Temporal +SMPDefsFlags[NN_prefetcht0] = false; // Prefetch to all cache levels +SMPDefsFlags[NN_prefetcht1] = false; // Prefetch to all cache levels +SMPDefsFlags[NN_prefetcht2] = false; // Prefetch to L2 cache +SMPDefsFlags[NN_prefetchnta] = false; // Prefetch to L1 cache +SMPDefsFlags[NN_sfence] = false; // Store Fence + +// Pentium III Pseudo instructions + +SMPDefsFlags[NN_cmpeqps] = false; // Packed Single-FP Compare EQ +SMPDefsFlags[NN_cmpltps] = false; // Packed Single-FP Compare LT +SMPDefsFlags[NN_cmpleps] = false; // Packed Single-FP Compare LE +SMPDefsFlags[NN_cmpunordps] = false; // Packed Single-FP Compare UNORD +SMPDefsFlags[NN_cmpneqps] = false; // Packed Single-FP Compare NOT EQ +SMPDefsFlags[NN_cmpnltps] = false; // Packed Single-FP Compare NOT LT +SMPDefsFlags[NN_cmpnleps] = false; // Packed Single-FP Compare NOT LE +SMPDefsFlags[NN_cmpordps] = false; // Packed Single-FP Compare ORDERED +SMPDefsFlags[NN_cmpeqss] = false; // Scalar Single-FP Compare EQ +SMPDefsFlags[NN_cmpltss] = false; // Scalar Single-FP Compare LT +SMPDefsFlags[NN_cmpless] = false; // Scalar Single-FP Compare LE +SMPDefsFlags[NN_cmpunordss] = false; // Scalar Single-FP Compare UNORD +SMPDefsFlags[NN_cmpneqss] = false; // Scalar Single-FP Compare NOT EQ +SMPDefsFlags[NN_cmpnltss] = false; // Scalar Single-FP Compare NOT LT +SMPDefsFlags[NN_cmpnless] = false; // Scalar Single-FP Compare NOT LE +SMPDefsFlags[NN_cmpordss] = false; // Scalar Single-FP Compare ORDERED + +// AMD K7 instructions + +// Revisit AMD if we port to it. +SMPDefsFlags[NN_pf2iw] = false; // Packed Floating-Point to Integer with Sign Extend +SMPDefsFlags[NN_pfnacc] = false; // Packed Floating-Point Negative Accumulate +SMPDefsFlags[NN_pfpnacc] = false; // Packed Floating-Point Mixed Positive-Negative Accumulate +SMPDefsFlags[NN_pi2fw] = false; // Packed 16-bit Integer to Floating-Point +SMPDefsFlags[NN_pswapd] = false; // Packed Swap Double Word + +// Undocumented FP instructions (thanks to norbert.juffa@adm.com) + +SMPDefsFlags[NN_fstp1] = false; // Alias of Store Real and Pop +SMPDefsFlags[NN_fxch4] = false; // Alias of Exchange Registers +SMPDefsFlags[NN_ffreep] = false; // Free Register and Pop +SMPDefsFlags[NN_fxch7] = false; // Alias of Exchange Registers +SMPDefsFlags[NN_fstp8] = false; // Alias of Store Real and Pop +SMPDefsFlags[NN_fstp9] = false; // Alias of Store Real and Pop + +// Pentium 4 instructions + +SMPDefsFlags[NN_addpd] = false; // Add Packed Double-Precision Floating-Point Values +SMPDefsFlags[NN_addsd] = false; // Add Scalar Double-Precision Floating-Point Values +SMPDefsFlags[NN_andnpd] = false; // Bitwise Logical AND NOT of Packed Double-Precision Floating-Point Values +SMPDefsFlags[NN_andpd] = false; // Bitwise Logical AND of Packed Double-Precision Floating-Point Values +SMPDefsFlags[NN_clflush] = false; // Flush Cache Line +SMPDefsFlags[NN_cmppd] = false; // Compare Packed Double-Precision Floating-Point Values +SMPDefsFlags[NN_cmpsd] = false; // Compare Scalar Double-Precision Floating-Point Values +SMPDefsFlags[NN_cvtdq2pd] = false; // Convert Packed Doubleword Integers to Packed Single-Precision Floating-Point Values +SMPDefsFlags[NN_cvtdq2ps] = false; // Convert Packed Doubleword Integers to Packed Double-Precision Floating-Point Values +SMPDefsFlags[NN_cvtpd2dq] = false; // Convert Packed Double-Precision Floating-Point Values to Packed Doubleword Integers +SMPDefsFlags[NN_cvtpd2pi] = false; // Convert Packed Double-Precision Floating-Point Values to Packed Doubleword Integers +SMPDefsFlags[NN_cvtpd2ps] = false; // Convert Packed Double-Precision Floating-Point Values to Packed Single-Precision Floating-Point Values +SMPDefsFlags[NN_cvtpi2pd] = false; // Convert Packed Doubleword Integers to Packed Double-Precision Floating-Point Values +SMPDefsFlags[NN_cvtps2dq] = false; // Convert Packed Single-Precision Floating-Point Values to Packed Doubleword Integers +SMPDefsFlags[NN_cvtps2pd] = false; // Convert Packed Single-Precision Floating-Point Values to Packed Double-Precision Floating-Point Values +SMPDefsFlags[NN_cvtsd2si] = false; // Convert Scalar Double-Precision Floating-Point Value to Doubleword Integer +SMPDefsFlags[NN_cvtsd2ss] = false; // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value +SMPDefsFlags[NN_cvtsi2sd] = false; // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value +SMPDefsFlags[NN_cvtss2sd] = false; // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value +SMPDefsFlags[NN_cvttpd2dq] = false; // Convert With Truncation Packed Double-Precision Floating-Point Values to Packed Doubleword Integers +SMPDefsFlags[NN_cvttpd2pi] = false; // Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Doubleword Integers +SMPDefsFlags[NN_cvttps2dq] = false; // Convert With Truncation Packed Single-Precision Floating-Point Values to Packed Doubleword Integers +SMPDefsFlags[NN_cvttsd2si] = false; // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer +SMPDefsFlags[NN_divpd] = false; // Divide Packed Double-Precision Floating-Point Values +SMPDefsFlags[NN_divsd] = false; // Divide Scalar Double-Precision Floating-Point Values +SMPDefsFlags[NN_lfence] = false; // Load Fence +SMPDefsFlags[NN_maskmovdqu] = false; // Store Selected Bytes of Double Quadword +SMPDefsFlags[NN_maxpd] = false; // Return Maximum Packed Double-Precision Floating-Point Values +SMPDefsFlags[NN_maxsd] = false; // Return Maximum Scalar Double-Precision Floating-Point Value +SMPDefsFlags[NN_mfence] = false; // Memory Fence +SMPDefsFlags[NN_minpd] = false; // Return Minimum Packed Double-Precision Floating-Point Values +SMPDefsFlags[NN_minsd] = false; // Return Minimum Scalar Double-Precision Floating-Point Value +SMPDefsFlags[NN_movapd] = false; // Move Aligned Packed Double-Precision Floating-Point Values +SMPDefsFlags[NN_movdq2q] = false; // Move Quadword from XMM to MMX Register +SMPDefsFlags[NN_movdqa] = false; // Move Aligned Double Quadword +SMPDefsFlags[NN_movdqu] = false; // Move Unaligned Double Quadword +SMPDefsFlags[NN_movhpd] = false; // Move High Packed Double-Precision Floating-Point Values +SMPDefsFlags[NN_movlpd] = false; // Move Low Packed Double-Precision Floating-Point Values +SMPDefsFlags[NN_movmskpd] = false; // Extract Packed Double-Precision Floating-Point Sign Mask +SMPDefsFlags[NN_movntdq] = false; // Store Double Quadword Using Non-Temporal Hint +SMPDefsFlags[NN_movnti] = false; // Store Doubleword Using Non-Temporal Hint +SMPDefsFlags[NN_movntpd] = false; // Store Packed Double-Precision Floating-Point Values Using Non-Temporal Hint +SMPDefsFlags[NN_movq2dq] = false; // Move Quadword from MMX to XMM Register +SMPDefsFlags[NN_movsd] = false; // Move Scalar Double-Precision Floating-Point Values +SMPDefsFlags[NN_movupd] = false; // Move Unaligned Packed Double-Precision Floating-Point Values +SMPDefsFlags[NN_mulpd] = false; // Multiply Packed Double-Precision Floating-Point Values +SMPDefsFlags[NN_mulsd] = false; // Multiply Scalar Double-Precision Floating-Point Values +SMPDefsFlags[NN_orpd] = false; // Bitwise Logical OR of Double-Precision Floating-Point Values +SMPDefsFlags[NN_paddq] = false; // Add Packed Quadword Integers +SMPDefsFlags[NN_pause] = false; // Spin Loop Hint +SMPDefsFlags[NN_pmuludq] = false; // Multiply Packed Unsigned Doubleword Integers +SMPDefsFlags[NN_pshufd] = false; // Shuffle Packed Doublewords +SMPDefsFlags[NN_pshufhw] = false; // Shuffle Packed High Words +SMPDefsFlags[NN_pshuflw] = false; // Shuffle Packed Low Words +SMPDefsFlags[NN_pslldq] = false; // Shift Double Quadword Left Logical +SMPDefsFlags[NN_psrldq] = false; // Shift Double Quadword Right Logical +SMPDefsFlags[NN_psubq] = false; // Subtract Packed Quadword Integers +SMPDefsFlags[NN_punpckhqdq] = false; // Unpack High Data +SMPDefsFlags[NN_punpcklqdq] = false; // Unpack Low Data +SMPDefsFlags[NN_shufpd] = false; // Shuffle Packed Double-Precision Floating-Point Values +SMPDefsFlags[NN_sqrtpd] = false; // Compute Square Roots of Packed Double-Precision Floating-Point Values +SMPDefsFlags[NN_sqrtsd] = false; // Compute Square Rootof Scalar Double-Precision Floating-Point Value +SMPDefsFlags[NN_subpd] = false; // Subtract Packed Double-Precision Floating-Point Values +SMPDefsFlags[NN_subsd] = false; // Subtract Scalar Double-Precision Floating-Point Values +SMPDefsFlags[NN_unpckhpd] = false; // Unpack and Interleave High Packed Double-Precision Floating-Point Values +SMPDefsFlags[NN_unpcklpd] = false; // Unpack and Interleave Low Packed Double-Precision Floating-Point Values +SMPDefsFlags[NN_xorpd] = false; // Bitwise Logical OR of Double-Precision Floating-Point Values + + +// AMD syscall/sysret instructions NOTE: not AMD, found in Intel manual + + +// AMD64 instructions NOTE: not AMD, found in Intel manual + +SMPDefsFlags[NN_swapgs] = false; // Exchange GS base with KernelGSBase MSR + +// New Pentium instructions (SSE3) + +SMPDefsFlags[NN_movddup] = false; // Move One Double-FP and Duplicate +SMPDefsFlags[NN_movshdup] = false; // Move Packed Single-FP High and Duplicate +SMPDefsFlags[NN_movsldup] = false; // Move Packed Single-FP Low and Duplicate + +// Missing AMD64 instructions NOTE: also found in Intel manual + +SMPDefsFlags[NN_movsxd] = false; // Move with Sign-Extend Doubleword + +// SSE3 instructions + +SMPDefsFlags[NN_addsubpd] = false; // Add /Sub packed DP FP numbers +SMPDefsFlags[NN_addsubps] = false; // Add /Sub packed SP FP numbers +SMPDefsFlags[NN_haddpd] = false; // Add horizontally packed DP FP numbers +SMPDefsFlags[NN_haddps] = false; // Add horizontally packed SP FP numbers +SMPDefsFlags[NN_hsubpd] = false; // Sub horizontally packed DP FP numbers +SMPDefsFlags[NN_hsubps] = false; // Sub horizontally packed SP FP numbers +SMPDefsFlags[NN_monitor] = false; // Set up a linear address range to be monitored by hardware +SMPDefsFlags[NN_mwait] = false; // Wait until write-back store performed within the range specified by the MONITOR instruction +SMPDefsFlags[NN_fisttp] = false; // Store ST in intXX (chop) and pop +SMPDefsFlags[NN_lddqu] = false; // Load unaligned integer 128-bit + +// SSSE3 instructions + +SMPDefsFlags[NN_psignb] = false; // Packed SIGN Byte +SMPDefsFlags[NN_psignw] = false; // Packed SIGN Word +SMPDefsFlags[NN_psignd] = false; // Packed SIGN Doubleword +SMPDefsFlags[NN_pshufb] = false; // Packed Shuffle Bytes +SMPDefsFlags[NN_pmulhrsw] = false; // Packed Multiply High with Round and Scale +SMPDefsFlags[NN_pmaddubsw] = false; // Multiply and Add Packed Signed and Unsigned Bytes +SMPDefsFlags[NN_phsubsw] = false; // Packed Horizontal Subtract and Saturate +SMPDefsFlags[NN_phaddsw] = false; // Packed Horizontal Add and Saturate +SMPDefsFlags[NN_phaddw] = false; // Packed Horizontal Add Word +SMPDefsFlags[NN_phaddd] = false; // Packed Horizontal Add Doubleword +SMPDefsFlags[NN_phsubw] = false; // Packed Horizontal Subtract Word +SMPDefsFlags[NN_phsubd] = false; // Packed Horizontal Subtract Doubleword +SMPDefsFlags[NN_palignr] = false; // Packed Align Right +SMPDefsFlags[NN_pabsb] = false; // Packed Absolute Value Byte +SMPDefsFlags[NN_pabsw] = false; // Packed Absolute Value Word +SMPDefsFlags[NN_pabsd] = false; // Packed Absolute Value Doubleword + +// VMX instructions + +SMPDefsFlags[NN_last] = false; + + return; + +} // end InitSMPDefsFlags() + +// Initialize the SMPUsesFlags[] array to define how we emit +// optimizing annotations. +void InitSMPUsesFlags(void) { + // Default value is false. Few instructions use the flags. + (void) memset(SMPUsesFlags, false, sizeof(SMPUsesFlags)); + +SMPUsesFlags[NN_null] = true; // Unknown Operation +SMPUsesFlags[NN_adc] = true; // Add with Carry +SMPUsesFlags[NN_into] = true; // Call to Interrupt Procedure if Overflow Flag = 1 +SMPUsesFlags[NN_ja] = true; // Jump if Above (CF=0 & ZF=0) +SMPUsesFlags[NN_jae] = true; // Jump if Above or Equal (CF=0) +SMPUsesFlags[NN_jb] = true; // Jump if Below (CF=1) +SMPUsesFlags[NN_jbe] = true; // Jump if Below or Equal (CF=1 | ZF=1) +SMPUsesFlags[NN_jc] = true; // Jump if Carry (CF=1) +SMPUsesFlags[NN_jcxz] = true; // Jump if CX is 0 +SMPUsesFlags[NN_jecxz] = true; // Jump if ECX is 0 +SMPUsesFlags[NN_jrcxz] = true; // Jump if RCX is 0 +SMPUsesFlags[NN_je] = true; // Jump if Equal (ZF=1) +SMPUsesFlags[NN_jg] = true; // Jump if Greater (ZF=0 & SF=OF) +SMPUsesFlags[NN_jge] = true; // Jump if Greater or Equal (SF=OF) +SMPUsesFlags[NN_jl] = true; // Jump if Less (SF!=OF) +SMPUsesFlags[NN_jle] = true; // Jump if Less or Equal (ZF=1 | SF!=OF) +SMPUsesFlags[NN_jna] = true; // Jump if Not Above (CF=1 | ZF=1) +SMPUsesFlags[NN_jnae] = true; // Jump if Not Above or Equal (CF=1) +SMPUsesFlags[NN_jnb] = true; // Jump if Not Below (CF=0) +SMPUsesFlags[NN_jnbe] = true; // Jump if Not Below or Equal (CF=0 & ZF=0) +SMPUsesFlags[NN_jnc] = true; // Jump if Not Carry (CF=0) +SMPUsesFlags[NN_jne] = true; // Jump if Not Equal (ZF=0) +SMPUsesFlags[NN_jng] = true; // Jump if Not Greater (ZF=1 | SF!=OF) +SMPUsesFlags[NN_jnge] = true; // Jump if Not Greater or Equal (ZF=1) +SMPUsesFlags[NN_jnl] = true; // Jump if Not Less (SF=OF) +SMPUsesFlags[NN_jnle] = true; // Jump if Not Less or Equal (ZF=0 & SF=OF) +SMPUsesFlags[NN_jno] = true; // Jump if Not Overflow (OF=0) +SMPUsesFlags[NN_jnp] = true; // Jump if Not Parity (PF=0) +SMPUsesFlags[NN_jns] = true; // Jump if Not Sign (SF=0) +SMPUsesFlags[NN_jnz] = true; // Jump if Not Zero (ZF=0) +SMPUsesFlags[NN_jo] = true; // Jump if Overflow (OF=1) +SMPUsesFlags[NN_jp] = true; // Jump if Parity (PF=1) +SMPUsesFlags[NN_jpe] = true; // Jump if Parity Even (PF=1) +SMPUsesFlags[NN_jpo] = true; // Jump if Parity Odd (PF=0) +SMPUsesFlags[NN_js] = true; // Jump if Sign (SF=1) +SMPUsesFlags[NN_jz] = true; // Jump if Zero (ZF=1) +SMPUsesFlags[NN_lahf] = true; // Load Flags into AH Register +SMPUsesFlags[NN_loopwe] = true; // Loop while CX != 0 and ZF=1 +SMPUsesFlags[NN_loope] = true; // Loop while rCX != 0 and ZF=1 +SMPUsesFlags[NN_loopde] = true; // Loop while ECX != 0 and ZF=1 +SMPUsesFlags[NN_loopqe] = true; // Loop while RCX != 0 and ZF=1 +SMPUsesFlags[NN_loopwne] = true; // Loop while CX != 0 and ZF=0 +SMPUsesFlags[NN_loopne] = true; // Loop while rCX != 0 and ZF=0 +SMPUsesFlags[NN_loopdne] = true; // Loop while ECX != 0 and ZF=0 +SMPUsesFlags[NN_loopqne] = true; // Loop while RCX != 0 and ZF=0 +SMPUsesFlags[NN_pushfw] = true; // Push Flags Register onto the Stack +SMPUsesFlags[NN_pushf] = true; // Push Flags Register onto the Stack +SMPUsesFlags[NN_pushfd] = true; // Push Flags Register onto the Stack (use32) +SMPUsesFlags[NN_pushfq] = true; // Push Flags Register onto the Stack (use64) +SMPUsesFlags[NN_repe] = true; // Repeat String Operation while ZF=1 +SMPUsesFlags[NN_repne] = true; // Repeat String Operation while ZF=0 +SMPUsesFlags[NN_sahf] = true; // Store AH into Flags Register +SMPUsesFlags[NN_shl] = true; // Shift Logical Left +SMPUsesFlags[NN_shr] = true; // Shift Logical Right +SMPUsesFlags[NN_sbb] = true; // Integer Subtraction with Borrow +SMPUsesFlags[NN_seta] = true; // Set Byte if Above (CF=0 & ZF=0) +SMPUsesFlags[NN_setae] = true; // Set Byte if Above or Equal (CF=0) +SMPUsesFlags[NN_setb] = true; // Set Byte if Below (CF=1) +SMPUsesFlags[NN_setbe] = true; // Set Byte if Below or Equal (CF=1 | ZF=1) +SMPUsesFlags[NN_setc] = true; // Set Byte if Carry (CF=1) +SMPUsesFlags[NN_sete] = true; // Set Byte if Equal (ZF=1) +SMPUsesFlags[NN_setg] = true; // Set Byte if Greater (ZF=0 & SF=OF) +SMPUsesFlags[NN_setge] = true; // Set Byte if Greater or Equal (SF=OF) +SMPUsesFlags[NN_setl] = true; // Set Byte if Less (SF!=OF) +SMPUsesFlags[NN_setle] = true; // Set Byte if Less or Equal (ZF=1 | SF!=OF) +SMPUsesFlags[NN_setna] = true; // Set Byte if Not Above (CF=1 | ZF=1) +SMPUsesFlags[NN_setnae] = true; // Set Byte if Not Above or Equal (CF=1) +SMPUsesFlags[NN_setnb] = true; // Set Byte if Not Below (CF=0) +SMPUsesFlags[NN_setnbe] = true; // Set Byte if Not Below or Equal (CF=0 & ZF=0) +SMPUsesFlags[NN_setnc] = true; // Set Byte if Not Carry (CF=0) +SMPUsesFlags[NN_setne] = true; // Set Byte if Not Equal (ZF=0) +SMPUsesFlags[NN_setng] = true; // Set Byte if Not Greater (ZF=1 | SF!=OF) +SMPUsesFlags[NN_setnge] = true; // Set Byte if Not Greater or Equal (ZF=1) +SMPUsesFlags[NN_setnl] = true; // Set Byte if Not Less (SF=OF) +SMPUsesFlags[NN_setnle] = true; // Set Byte if Not Less or Equal (ZF=0 & SF=OF) +SMPUsesFlags[NN_setno] = true; // Set Byte if Not Overflow (OF=0) +SMPUsesFlags[NN_setnp] = true; // Set Byte if Not Parity (PF=0) +SMPUsesFlags[NN_setns] = true; // Set Byte if Not Sign (SF=0) +SMPUsesFlags[NN_setnz] = true; // Set Byte if Not Zero (ZF=0) +SMPUsesFlags[NN_seto] = true; // Set Byte if Overflow (OF=1) +SMPUsesFlags[NN_setp] = true; // Set Byte if Parity (PF=1) +SMPUsesFlags[NN_setpe] = true; // Set Byte if Parity Even (PF=1) +SMPUsesFlags[NN_setpo] = true; // Set Byte if Parity Odd (PF=0) +SMPUsesFlags[NN_sets] = true; // Set Byte if Sign (SF=1) +SMPUsesFlags[NN_setz] = true; // Set Byte if Zero (ZF=1) +SMPUsesFlags[NN_stos] = true; // Store String + +// +// 486 instructions +// + +// +// Pentium instructions +// + +SMPUsesFlags[NN_cpuid] = true; // Get CPU ID +SMPUsesFlags[NN_cmpxchg8b] = true; // Compare and Exchange Eight Bytes + +// +// Pentium Pro instructions +// + +SMPUsesFlags[NN_cmova] = true; // Move if Above (CF=0 & ZF=0) +SMPUsesFlags[NN_cmovb] = true; // Move if Below (CF=1) +SMPUsesFlags[NN_cmovbe] = true; // Move if Below or Equal (CF=1 | ZF=1) +SMPUsesFlags[NN_cmovg] = true; // Move if Greater (ZF=0 & SF=OF) +SMPUsesFlags[NN_cmovge] = true; // Move if Greater or Equal (SF=OF) +SMPUsesFlags[NN_cmovl] = true; // Move if Less (SF!=OF) +SMPUsesFlags[NN_cmovle] = true; // Move if Less or Equal (ZF=1 | SF!=OF) +SMPUsesFlags[NN_cmovnb] = true; // Move if Not Below (CF=0) +SMPUsesFlags[NN_cmovno] = true; // Move if Not Overflow (OF=0) +SMPUsesFlags[NN_cmovnp] = true; // Move if Not Parity (PF=0) +SMPUsesFlags[NN_cmovns] = true; // Move if Not Sign (SF=0) +SMPUsesFlags[NN_cmovnz] = true; // Move if Not Zero (ZF=0) +SMPUsesFlags[NN_cmovo] = true; // Move if Overflow (OF=1) +SMPUsesFlags[NN_cmovp] = true; // Move if Parity (PF=1) +SMPUsesFlags[NN_cmovs] = true; // Move if Sign (SF=1) +SMPUsesFlags[NN_cmovz] = true; // Move if Zero (ZF=1) +SMPUsesFlags[NN_fcmovb] = true; // Floating Move if Below +SMPUsesFlags[NN_fcmove] = true; // Floating Move if Equal +SMPUsesFlags[NN_fcmovbe] = true; // Floating Move if Below or Equal +SMPUsesFlags[NN_fcmovu] = true; // Floating Move if Unordered +SMPUsesFlags[NN_fcmovnb] = true; // Floating Move if Not Below +SMPUsesFlags[NN_fcmovne] = true; // Floating Move if Not Equal +SMPUsesFlags[NN_fcmovnbe] = true; // Floating Move if Not Below or Equal +SMPUsesFlags[NN_fcmovnu] = true; // Floating Move if Not Unordered + +// +// FPP instructuions +// + + +// +// 80387 instructions +// + + +// +// Instructions added 28.02.96 +// + +SMPUsesFlags[NN_setalc] = true; // Set AL to Carry Flag + +// +// MMX instructions +// + + +// +// Undocumented Deschutes processor instructions +// + + +// Pentium II instructions + + +// 3DNow! instructions + + +// Pentium III instructions + + +// Pentium III Pseudo instructions + + +// AMD K7 instructions + +// Revisit AMD if we port to it. + +// Undocumented FP instructions (thanks to norbert.juffa@adm.com) + +// Pentium 4 instructions + + + +// AMD syscall/sysret instructions NOTE: not AMD, found in Intel manual + +// AMD64 instructions NOTE: not AMD, found in Intel manual + + +// New Pentium instructions (SSE3) + + +// Missing AMD64 instructions NOTE: also found in Intel manual + + +// SSE3 instructions + + +// SSSE3 instructions + + +// VMX instructions + + +SMPUsesFlags[NN_last] = false; + + return; + +} // end InitSMPUsesFlags() + diff --git a/SMPDataFlowAnalysis.h b/SMPDataFlowAnalysis.h index 3a860b091259ddbb766973bb676ace0ebc1f22ab..1ad9044f4c2c9b8ec432497ff2efbfbc1b86e54b 100644 --- a/SMPDataFlowAnalysis.h +++ b/SMPDataFlowAnalysis.h @@ -28,8 +28,13 @@ class SMPPhiFunction; // Value for an SSA subscript number before it is initialized by SSA renaming. #define SMP_SSA_UNINIT (-1) +// Map register number to a string for printing or annotations. extern char *RegNames[]; +// Use the carry flag as the surrogate for the EFLAGS register in the x86 architecture. +// IDA Pro distinguishes among four different flag bits; we aggregate them. +#define X86_FLAGS_REG R_cf + // Debug: print one operand from an instruction or DEF or USE list. void PrintDefUse(ulong feature, int OpNum); void PrintSIB(op_t Opnd); @@ -193,9 +198,60 @@ public: // n, offb, and offo. This function extracts an unsigned int from these three 8-bit // fields. unsigned int ExtractGlobalIndex(op_t GlobalOp); +void SetGlobalIndex(op_t *TempOp, size_t index); + +class SMPDefUseChain { +public: + // Constructors + SMPDefUseChain(void); + SMPDefUseChain(op_t Name, ea_t Def = BADADDR); + // Get methods + inline op_t GetName(void) const { return SSAName; }; + inline ea_t GetDef(void) const { return RefInstrs.at(0); }; + inline ea_t GetUse(size_t index) const { return RefInstrs.at(index + 1); }; + inline ea_t GetLastUse(void) const { return RefInstrs.back(); }; + // Set methods + void SetName(op_t Name); + void SetDef(ea_t Def); + void PushUse(ea_t Use); + // Printing methods + void Dump(int SSANum = (-1)); +private: + op_t SSAName; // What variable is defined and used in the chain? + vector<ea_t> RefInstrs; // First is always DEF, rest are USE. +}; // end class DefUseChain + +class SMPDUChainArray { +public: + // Constructor + SMPDUChainArray(void); + SMPDUChainArray(op_t Name); + // Set methods. + void SetName(op_t Name); + // Printing methods. + void Dump(void); + // Data (public for convenience) + vector<SMPDefUseChain> DUChains; // indexed by SSA number +private: + op_t SSAName; // What variable is used in all chains in the array? +}; // end class SMPDUChainArray + +class SMPCompleteDUChains { +public: + // Printing methods. + void Dump(void); + // Data (public for convenience) + vector<SMPDUChainArray> ChainsByName; // indexed by name index +}; // end class SMPCompleteDUChains // Initialization routine for DFA category. extern SMPitype DFACategory[]; void InitDFACategory(void); +// Initializations for CPU flags DEF/USE. +extern bool SMPDefsFlags[]; +extern bool SMPUsesFlags[]; +void InitSMPDefsFlags(void); +void InitSMPUsesFlags(void); + #endif diff --git a/SMPFunction.cpp b/SMPFunction.cpp index c8979efa8b921b9e7414024d58174a6b91ce26e4..a69fdf6f315ba2715a95d4b2692feb107a1f576d 100644 --- a/SMPFunction.cpp +++ b/SMPFunction.cpp @@ -42,7 +42,7 @@ #define SMP_DEBUG_DATAFLOW 0 // Compute LVA/SSA or not? Turn it off for NICECAP demo on 31-JAN-2008 -#define SMP_COMPUTE_LVA_SSA 0 +#define SMP_COMPUTE_LVA_SSA 1 // Basic block number 0 is the top of the CFG lattice. #define SMP_TOP_BLOCK 0 @@ -514,12 +514,62 @@ ea_t SMPFunction::FindAllocPoint(asize_t OriginalLocSize) { // is obviously not using EBP as a frame pointer. IDA is apparently // confused by the push ebp instruction being the first instruction // in the function. We will reset UseFP to false in this case. +// The inverse problem happens with a function that begins with instructions +// other than push ebp; mov ebp,esp; ... etc. but eventually has those +// instructions in the first basic block. For example, a C compiler generates +// for the first block of main(): +// lea ecx,[esp+arg0] +// and esp, 0xfffffff0 +// push dword ptr [ecx-4] +// push ebp +// mov ebp,esp +// push ecx +// sub esp,<framesize> +// +// This function is obviously using EBP as a frame pointer, but IDA Pro marks +// the function as not using a frame pointer. We will reset UseFP to true in +// this case. // NOTE: This logic should work for both Linux and Windows x86 prologues. bool SMPFunction::MDFixUseFP(void) { list<SMPInstr>::iterator CurrInstr = this->Instrs.begin(); ea_t addr = CurrInstr->GetAddr(); - if (!UseFP) - return false; // Only looking to reset true to false. + if (!(this->UseFP)) { + // See if we can detect the instruction "push ebp" followed by the instruction + // "mov ebp,esp" in the first basic block. The instructions do not have to be + // consecutive. If we find them, we will reset UseFP to true. + bool FirstBlockProcessed = false; + bool EBPSaved = false; + bool ESPintoEBP = false; + do { + FirstBlockProcessed = CurrInstr->IsLastInBlock(); + if (!EBPSaved) { // still looking for "push ebp" + if (CurrInstr->MDIsPushInstr() && CurrInstr->GetCmd().Operands[0].is_reg(R_bp)) { + EBPSaved = true; + } + } + else if (!ESPintoEBP) { // found "push ebp", looking for "mov ebp,esp" + insn_t CurrCmd = CurrInstr->GetCmd(); + if ((CurrCmd.itype == NN_mov) && (CurrInstr->GetDef(0).GetOp().is_reg(R_bp)) + && (CurrInstr->GetUse(0).GetOp().is_reg(R_sp))) { + ESPintoEBP = true; + FirstBlockProcessed = true; // exit loop + } + } + ++CurrInstr; + addr = CurrInstr->GetAddr(); + // We must get EBP set to its frame pointer value before we reach the + // local frame allocation instruction (i.e. the subtraction of locals space + // from the stack pointer). + FirstBlockProcessed |= (addr >= this->LocalVarsAllocInstr); + } while (!FirstBlockProcessed); + // If we found ESPintoEBP, we also found EBPSaved first, and we need to change + // this->UseFP to true and return true. Otherwise, return false. + this->UseFP = ESPintoEBP; + return ESPintoEBP; + } // end if (!(this->UseFP)) + + // At this point, this->UseFP must have been true on entry to this method and we will + // check whether it should be reset to false. while (addr < this->LocalVarsAllocInstr) { size_t DefIndex = 0; while (DefIndex < CurrInstr->NumDefs()) { @@ -627,7 +677,7 @@ void SMPFunction::Analyze(void) { msg("SMPFunction::Analyze: hit special jump target case.\n"); #endif LastInBlock = --(this->Instrs.end()); - SMPBasicBlock CurrBlock = SMPBasicBlock(FirstInBlock, + SMPBasicBlock CurrBlock = SMPBasicBlock(this, FirstInBlock, LastInBlock); CurrBlock.Analyze(); // If not the first chunk in the function, it is a shared @@ -659,7 +709,7 @@ void SMPFunction::Analyze(void) { msg("SMPFunction::Analyze: found block terminator.\n"); #endif LastInBlock = --(this->Instrs.end()); - SMPBasicBlock CurrBlock = SMPBasicBlock(FirstInBlock, LastInBlock); + SMPBasicBlock CurrBlock = SMPBasicBlock(this, FirstInBlock, LastInBlock); CurrBlock.Analyze(); // If not the first chunk in the function, it is a shared // tail chunk. @@ -688,7 +738,7 @@ void SMPFunction::Analyze(void) { // longer includes a return instruction and terminates with a CALL. if (FirstInBlock != this->Instrs.end()) { LastInBlock = --(this->Instrs.end()); - SMPBasicBlock CurrBlock = SMPBasicBlock(FirstInBlock, LastInBlock); + SMPBasicBlock CurrBlock = SMPBasicBlock(this, FirstInBlock, LastInBlock); CurrBlock.Analyze(); // If not the first chunk in the function, it is a shared // tail chunk. @@ -711,9 +761,12 @@ void SMPFunction::Analyze(void) { // Set up basic block links and map of instructions to blocks. if (!(this->HasSharedChunks())) { - bool DumpFlag = (0 == strcmp("main", this->GetFuncName())); + bool DumpFlag = false; +#if SMP_DEBUG_DATAFLOW + DumpFlag |= (0 == strcmp("main", this->GetFuncName())); DumpFlag |= (0 == strcmp("dohanoi", this->GetFuncName())); - DumpFlag |= (0 == strcmp("__ieee754_pow", this->GetFuncName())); + DumpFlag |= (0 == strcmp("frame_dummy", this->GetFuncName())); +#endif this->SetLinks(); #if SMP_COMPUTE_LVA_SSA this->RPONumberBlocks(); @@ -738,10 +791,10 @@ void SMPFunction::ComputeSSA(void) { #if SMP_DEBUG_DATAFLOW bool DumpFlag = (0 == strcmp("main", this->GetFuncName())); DumpFlag |= (0 == strcmp("dohanoi", this->GetFuncName())); - DumpFlag |= (0 == strcmp("__ieee754_pow", this->GetFuncName())); + DumpFlag |= (0 == strcmp("_init_proc", this->GetFuncName())); #endif -#if SMP_DEBUG_DATAFLOW +#if 0 if (DumpFlag) this->Dump(); #endif @@ -753,6 +806,14 @@ void SMPFunction::ComputeSSA(void) { this->InsertPhiFunctions(); this->BuildDominatorTree(); this->SSARenumber(); + list<SMPBasicBlock>::iterator CurrBlock; + for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { + CurrBlock->SetLocalNames(); + CurrBlock->SSALocalRenumber(); +#if 1 + CurrBlock->MarkDeadRegs(); +#endif + } #if SMP_DEBUG_DATAFLOW if (DumpFlag) this->Dump(); @@ -961,8 +1022,8 @@ void SMPFunction::RPONumberBlocks(void) { // See chapter 9 of Cooper/Torczon, Engineering a Compiler, for the algorithm. void SMPFunction::LiveVariableAnalysis(void) { list<SMPBasicBlock>::iterator CurrBlock; - msg("LiveVariableAnalysis for %s\n", this->GetFuncName()); #if SMP_DEBUG_DATAFLOW + msg("LiveVariableAnalysis for %s\n", this->GetFuncName()); bool DebugFlag = (0 == strcmp("__ieee754_pow", this->GetFuncName())); #endif @@ -1147,9 +1208,7 @@ void SMPFunction::ComputeGlobalNames(void) { // fields op_t.offb:op_t.offo for the upper 16 bits. We are overwriting IDA // values here, but operands in the data flow analysis sets should never be // inserted back into the program anyway. - TempOp.n = (char) (index & 0x000000ff); - TempOp.offb = (char) ((index & 0x0000ff00) >> 8); - TempOp.offo = (char) ((index & 0x00ff0000) >> 16); + SetGlobalIndex(&TempOp, index); #if SMP_DEBUG_DATAFLOW msg("Global Name: "); @@ -1352,9 +1411,12 @@ void SMPFunction::SSARename(int BlockNumber) { assert(BlockNumber < this->BlockCount); list<SMPBasicBlock>::iterator CurrBlock = this->RPOBlocks.at((size_t) BlockNumber); - bool DumpFlag = (0 == strcmp("main", this->GetFuncName())); + bool DumpFlag = false; +#if SMP_DEBUG_DATAFLOW + DumpFlag |= (0 == strcmp("main", this->GetFuncName())); DumpFlag |= (0 == strcmp("dohanoi", this->GetFuncName())); DumpFlag |= (0 == strcmp("image_to_texture", this->GetFuncName())); +#endif if (DumpFlag) msg("Entered SSARename for block number %d\n", BlockNumber); diff --git a/SMPFunction.h b/SMPFunction.h index d532b4655c4f0e79e2558ed2e713b6d2c51ea2a1..94f092919139c8eaaad1fa4ce743650190a0e3fa 100644 --- a/SMPFunction.h +++ b/SMPFunction.h @@ -36,6 +36,7 @@ public: // Query methods inline bool HasIndirectCalls(void) const { return IndirectCalls; }; inline bool HasSharedChunks(void) const { return SharedChunks; }; + bool IsGlobalName(op_t RefOp) const { return (GlobalNames.end() != GlobalNames.find(RefOp)); }; // Printing methods void Dump(void); // debug dump // Analysis methods diff --git a/SMPInstr.cpp b/SMPInstr.cpp index 135359eab3933dbf138f0367f945ba00cdc257c6..0140d728a601fadeae1baf31659daf8149cda6af 100644 --- a/SMPInstr.cpp +++ b/SMPInstr.cpp @@ -29,8 +29,6 @@ #define SMP_DEBUG2 0 // verbose #define SMP_DEBUG_XOR 0 -#define X86_FLAGS_REG R_cf - // Make the CF_CHG1 .. CF_CHG6 and CF_USE1..CF_USE6 macros more usable // by allowing us to pick them up with an array index. static ulong DefMacros[UA_MAXOP] = {CF_CHG1, CF_CHG2, CF_CHG3, CF_CHG4, CF_CHG5, CF_CHG6}; @@ -55,6 +53,7 @@ SMPInstr::SMPInstr(ea_t addr) { this->analyzed = false; this->JumpTarget = false; this->BlockTerm = false; + this->DeadRegsString[0] = '\0'; return; } @@ -145,10 +144,15 @@ void SMPInstr::Dump(void) const { // Print out the destination operand list for the instruction, given // the OptCategory for the instruction as a hint. char * SMPInstr::DestString(int OptType) { - static char DestList[MAXSTR] = { '\0', '\0' }; + static char DestList[MAXSTR]; int RegDestCount = 0; + DestList[0] = 'Z'; // Make sure there are no leftovers from last call + DestList[1] = 'Z'; + DestList[2] = '\0'; for (size_t DefIndex = 0; DefIndex < this->NumDefs(); ++DefIndex) { op_t DefOpnd = this->GetDef(DefIndex).GetOp(); + if (DefOpnd.is_reg(X86_FLAGS_REG)) // don't print flags as a destination + continue; if (o_reg == DefOpnd.type) { ushort DestReg = DefOpnd.reg; if (0 == RegDestCount) { @@ -531,7 +535,7 @@ void SMPInstr::MDFixupDefUseLists(void) { size_t OpNum; for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { op_t Opnd = SMPcmd.Operands[OpNum]; - if ((Opnd.type == o_phrase) || (Opnd.type == o_displ)) { + if ((Opnd.type == o_phrase) || (Opnd.type == o_displ) || (Opnd.type == o_mem)) { if (Opnd.hasSIB) { int BaseReg = sib_base(Opnd); short IndexReg = sib_index(Opnd); @@ -542,6 +546,12 @@ void SMPInstr::MDFixupDefUseLists(void) { BaseOpnd.hasSIB = 0; BaseOpnd.set_showed(); this->Uses.SetRef(BaseOpnd); + if (BaseOpnd.is_reg(R_bp)) { + msg("WARNING: EBP base register in SIB: %s\n", this->GetDisasm()); + } + } + else { + msg("WARNING: R_none base register in SIB: %s\n", this->GetDisasm()); } if (R_none != IndexReg) { // Should we disallow R_sp here? **!!** op_t IndexOpnd = Opnd; // Init to current operand field values @@ -550,6 +560,9 @@ void SMPInstr::MDFixupDefUseLists(void) { IndexOpnd.hasSIB = 0; IndexOpnd.set_showed(); this->Uses.SetRef(IndexOpnd); + if (IndexOpnd.is_reg(R_sp)) { + msg("WARNING: ESP index register in SIB: %s\n", this->GetDisasm()); + } } } else { // no SIB byte; can have base reg but no index reg @@ -565,7 +578,7 @@ void SMPInstr::MDFixupDefUseLists(void) { } // end for (all operands) // Now, handle special instruction categories that have implicit operands. - if (NN_cmpxchg == SMPcmd.itype) { + if (NN_cmpxchg == this->SMPcmd.itype) { // x86 Compare and Exchange conditionally sets EAX. We must keep data flow analysis // sound by declaring that EAX is always a DEF. this->MDAddRegDef(R_ax, false); @@ -611,6 +624,18 @@ void SMPInstr::MDFixupDefUseLists(void) { } } // end else if (7 == OptType) + // Next, add the flags register to the DEFs and USEs for those instructions that + // are marked as defining or using flags. + if (this->type == COND_BRANCH) { + assert(SMPUsesFlags[this->SMPcmd.itype]); + } + if (SMPDefsFlags[this->SMPcmd.itype]) { + this->MDAddRegDef(X86_FLAGS_REG, false); + } + if (SMPUsesFlags[this->SMPcmd.itype]) { + this->MDAddRegUse(X86_FLAGS_REG, false); + } + #if 1 if (this->MDIsNop()) { // Clear the DEFs and USEs for no-ops. @@ -932,6 +957,13 @@ void SMPInstr::EmitAnnotations(bool UseFP, bool AllocSeen, FILE *AnnotFile) { // in telling mmStrata about these constants. if (SDTInstrumentation) { this->AnnotateStackConstants(UseFP, AnnotFile); + if (strlen(this->DeadRegsString) > 0) { + // Optimize by informing mmStrata of dead registers. It can avoid saving + // and restoring dead state. This is particularly important for EFLAGS, + // as restoring the flags is a pipeline serializing instruction. + qfprintf(AnnotFile, "%x %d INSTR DEADREGS %s ZZ %s \n", + addr, this->SMPcmd.size, this->DeadRegsString, disasm); + } } return; } // end of SMPInstr::EmitAnnotations() diff --git a/SMPInstr.h b/SMPInstr.h index 959a9706328344526b53498726b794396b31f147..952a4a7ea5632d06309b4e60daf1beb9287ff1b2 100644 --- a/SMPInstr.h +++ b/SMPInstr.h @@ -39,6 +39,7 @@ public: inline void SetTerminatesBlock(void) { BlockTerm = true; }; inline void SetUseSSA(size_t index, int SSASub) { Uses.SetSSANum(index, SSASub); return; }; inline void SetDefSSA(size_t index, int SSASub) { Defs.SetSSANum(index, SSASub); return; }; + inline void SetDeadRegs(char RegsString[]) { qstrncpy(DeadRegsString, RegsString, MAXSTR - 1); return; }; // Query methods bool HasDestMemoryOperand(void) const; // Does instruction write to memory? bool HasSourceMemoryOperand(void) const; // Does instruction read from memory? @@ -79,6 +80,7 @@ private: // and DEF and USE lists? bool JumpTarget; // Is Instr the target of any jumps or branches? bool BlockTerm; // This instruction terminates a basic block. + char DeadRegsString[MAXSTR]; // Registers that are dead at this instruction // Methods void BuildSMPDefUseLists(void); // Build DEF and USE lists for instruction void MDFixupDefUseLists(void); // Machine-dependent ad hoc fixes diff --git a/SMPStaticAnalyzer.cpp b/SMPStaticAnalyzer.cpp index 76bd1eacae343a4ca1b9132fe49b60fb0ad93af3..8a8a857386ff9fc6c0c99245a446ecf3a6176deb 100644 --- a/SMPStaticAnalyzer.cpp +++ b/SMPStaticAnalyzer.cpp @@ -160,6 +160,8 @@ int IDAP_init(void) { hook_to_notification_point(HT_IDP, idp_callback, NULL); InitOptCategory(); InitDFACategory(); + InitSMPDefsFlags(); + InitSMPUsesFlags(); return PLUGIN_KEEP; } // end of IDAP_init