diff --git a/SMPDBInterface.cpp b/SMPDBInterface.cpp index f14760d1e94777f5681232537e668e549ba413a8..ecd5305376df153ff6b3f4ee747227cabd10d224 100644 --- a/SMPDBInterface.cpp +++ b/SMPDBInterface.cpp @@ -55,6 +55,8 @@ #include <name.hpp> #include "SMPDBInterface.h" +#include "SMPStaticAnalyzer.h" +#include "SMPDataFlowAnalysis.h" #ifdef STARS_IDA_INTERFACE @@ -81,6 +83,36 @@ bool SMPGetCmd(ea_t InstAddr, insn_t &SMPcmd, ulong &SMPfeatures) { // Get the canonical features into argument SMPfeatures. SMPfeatures = cmd.get_canon_feature(); + for (int i = 0; i < UA_MAXOP; ++i) { + SMPcmd.Operands[i].specflag4 = 0; +#ifdef __EA64__ + if (STARS_ISA_Bitwidth == 64) { + // Copy the cmd.rex prefix into the op_t.specflag4 field for each operand + // that has a SIB byte. + SMPcmd.Operands[i].specflag4 = SMPcmd.rex; + } +#endif + // See comments on STARS_VEXPR and STARS_VSIB in SMPDataFlowAnalysis.h. + // These bits do not (as of IDA Pro 6.4) conflict with cmd.rex bits. + if ((cmd.auxpref & aux_vexpr) != 0) { + SMPcmd.Operands[i].specflag4 |= STARS_VEXPR; + } + + switch (SMPcmd.itype) { + case NN_vgatherdps: + case NN_vgatherdpd: + case NN_vgatherqps: + case NN_vgatherqpd: + case NN_vpgatherdd: + case NN_vpgatherdq: + case NN_vpgatherqd: + case NN_vpgatherqq: + SMPcmd.Operands[i].specflag4 |= STARS_VSIB; + default: + ; + } + } + return success; } // end of SMPGetCmd() diff --git a/SMPDataFlowAnalysis.cpp b/SMPDataFlowAnalysis.cpp index 10c8b6596eeb55b43dd74542b4deaf536fd7c50e..91d911cfb60a4c6b88bf61e0dddf93676f10e870 100644 --- a/SMPDataFlowAnalysis.cpp +++ b/SMPDataFlowAnalysis.cpp @@ -279,10 +279,11 @@ bool IsEqOp(op_t Opnd1, op_t Opnd2) { case o_void: return true; case o_reg: return ((Opnd1.reg == Opnd2.reg) && (Opnd1.dtyp == Opnd2.dtyp)); case o_mem: return (Opnd1.addr == Opnd2.addr); - case o_phrase: if (Opnd1.hasSIB && Opnd2.hasSIB) return (Opnd1.sib == Opnd2.sib); - else return false; // no SIB != has SIB + case o_phrase: if (Opnd1.hasSIB && Opnd2.hasSIB) return ((Opnd1.sib == Opnd2.sib) && (Opnd1.specflag4 == Opnd2.specflag4)); + else if (Opnd1.hasSIB || Opnd2.hasSIB) return false; // no SIB != has SIB + else return (Opnd1.reg == Opnd2.reg); // neither has SIB; compare register, e.g. [ebx] to [edx] case o_displ: if (Opnd1.hasSIB && Opnd2.hasSIB) - return ((Opnd1.sib == Opnd2.sib) && (Opnd1.addr == Opnd2.addr)); + return ((Opnd1.sib == Opnd2.sib) && (Opnd1.addr == Opnd2.addr) && (Opnd1.specflag4 == Opnd2.specflag4)); else if ((!Opnd1.hasSIB) && (!Opnd2.hasSIB)) return ((Opnd1.addr == Opnd2.addr) && (Opnd1.reg == Opnd2.reg)); else return false; // no SIB != has SIB @@ -308,10 +309,11 @@ bool IsEqOpIgnoreBitwidth(op_t Opnd1, op_t Opnd2) { case o_void: return true; case o_reg: return (Opnd1.reg == Opnd2.reg); case o_mem: return (Opnd1.addr == Opnd2.addr); - case o_phrase: if (Opnd1.hasSIB && Opnd2.hasSIB) return (Opnd1.sib == Opnd2.sib); - else return false; // no SIB != has SIB + case o_phrase: if (Opnd1.hasSIB && Opnd2.hasSIB) return ((Opnd1.sib == Opnd2.sib) && (Opnd1.specflag4 == Opnd2.specflag4)); + else if (Opnd1.hasSIB || Opnd2.hasSIB) return false; // no SIB != has SIB + else return (Opnd1.reg == Opnd2.reg); // neither has SIB; compare register, e.g. [ebx] to [edx] case o_displ: if (Opnd1.hasSIB && Opnd2.hasSIB) - return ((Opnd1.sib == Opnd2.sib) && (Opnd1.addr == Opnd2.addr)); + return ((Opnd1.sib == Opnd2.sib) && (Opnd1.addr == Opnd2.addr) && (Opnd1.specflag4 == Opnd2.specflag4)); else if ((!Opnd1.hasSIB) && (!Opnd2.hasSIB)) return ((Opnd1.addr == Opnd2.addr) && (Opnd1.reg == Opnd2.reg)); else return false; // no SIB != has SIB @@ -391,6 +393,39 @@ void SetGlobalIndex(op_t *TempOp, size_t index) { return; } + +bool MD_STARS_op256(const op_t &x) // is VEX.L set? +{ + return ((x.specflag4 & STARS_VEXPR) != 0) && ((x.specflag4 & VEX_L) != 0); +} + +bool MD_STARS_is_vsib(const op_t &x) // does instruction use VSIB variant of the sib byte? +{ + return ((x.specflag4 & STARS_VSIB) != 0); +} + +int MD_STARS_sib_base(const op_t &x) // get extended sib base +{ + int base = x.sib & 7; +#ifdef __EA64__ + if ( x.specflag4 & REX_B ) + base |= 8; +#endif + return base; +} + +regnum_t MD_STARS_sib_index(const op_t &x) // get extended sib index +{ + regnum_t index = regnum_t((x.sib >> 3) & 7); +#ifdef __EA64__ + if ( x.specflag4 & REX_X ) + index |= 8; +#endif + if (MD_STARS_is_vsib(x)) + index += MD_STARS_op256(x) ? 81 /*R_ymm0*/ : 64 /*R_xmm0*/; + return index; +} + // Return true if CurrOp could be an indirect memory reference. bool MDIsIndirectMemoryOpnd(op_t CurrOp, bool UseFP) { bool indirect = false; @@ -398,8 +433,8 @@ bool MDIsIndirectMemoryOpnd(op_t CurrOp, bool UseFP) { return false; if (CurrOp.hasSIB) { - int BaseReg = sib_base(CurrOp); - short IndexReg = sib_index(CurrOp); + int BaseReg = MD_STARS_sib_base(CurrOp); + short IndexReg = MD_STARS_sib_index(CurrOp); if ((R_none != IndexReg) && (MD_STACK_POINTER_REG != IndexReg)) { if ((MD_FRAME_POINTER_REG == IndexReg) && UseFP) ; @@ -451,8 +486,8 @@ void MDExtractAddressFields(op_t MemOp, int &BaseReg, int &IndexReg, ushort &Sca Offset = MemOp.addr; if (MemOp.hasSIB) { - BaseReg = sib_base(MemOp); - IndexReg = (int) sib_index(MemOp); + BaseReg = MD_STARS_sib_base(MemOp); + IndexReg = (int) MD_STARS_sib_index(MemOp); if (MD_STACK_POINTER_REG == IndexReg) // signifies no index register IndexReg = R_none; if (R_none != IndexReg) { @@ -601,7 +636,7 @@ void PrintSIB(op_t Opnd) { if (IndexReg != R_none) { SMP_strncpy(IndexName, RegNames[IndexReg], NAME_LEN -1); } - SMP_msg(" Base %s Index %s Scale %d", BaseName, IndexName, ScaleFactor); + SMP_msg(" Base %s Index %s Scale %d Flag4 %d", BaseName, IndexName, ScaleFactor, Opnd.specflag4); } // end PrintSIB() // Annotations: concisely print SIB info for an operand. @@ -1088,8 +1123,12 @@ set<DefOrUse, LessDefUse>::iterator DefOrUseSet::InsertRef(DefOrUse Ref) { // Set a Def or Use into the list, along with its type. void DefOrUseSet::SetRef(op_t Ref, SMPOperandType Type, int SSASub) { + pair<set<DefOrUse, LessDefUse>::iterator, bool> InsertResult; DefOrUse CurrRef(Ref, Type, SSASub); - this->Refs.insert(CurrRef); + InsertResult = this->Refs.insert(CurrRef); + if ((!(InsertResult.second)) && (o_reg != Ref.type)) { + SMP_msg("ERROR: Inserted duplicate DEF or USE.\n"); + } return; } diff --git a/SMPDataFlowAnalysis.h b/SMPDataFlowAnalysis.h index d2570b1b78325475f59be97a4e1bbf14de876447..eb76f439a0ca2a5917cd6b9595bd2e352576b4da 100644 --- a/SMPDataFlowAnalysis.h +++ b/SMPDataFlowAnalysis.h @@ -121,6 +121,17 @@ const char *MDGetRegName(op_t RegOp); // Distinguishes subword regs from their p #define X86_BINARY_NOP_INSTRUCTION 0x90 #define MD_BINARY_NOP_INSTRUCTION X86_BINARY_NOP_INSTRUCTION +// Part of our solution to the dense encoding of x86-64 is to record some instruction +// prefix and auxprefix info in each operand, so we can continue to do data flow analysis +// on operands long after the instructions containing the operands have gone stale. +// IDA Pro has macros that operate on their global variable 'cmd' which is only current +// while analyzing an inst, and is too big to pass around with all operands in Phi DEFs, +// LiveIn and LiveOut sets, SSA numbering and operand comparisons, etc. So, we record +// the insnpref byte (a.k.a. cmd.rex) into each operand in the specflag4 field, then we +// record the following two bits of info into unused bits in specflag4. +const int STARS_VEXPR = 0x40; // IDA Pro vexpr() was true on inst containing current op_t +const int STARS_VSIB = 0x20; // IDA Pro is_vsib() was true on inst containing current op_t + // Debug: print one operand from an instruction or DEF or USE list. void PrintDefUse(ulong feature, int OpNum); void PrintSIB(op_t Opnd); @@ -134,6 +145,12 @@ void AnnotPrintOperand(op_t Opnd, FILE *OutFile); // MACHINE DEPENDENT: Could operand be an indirect memory access? bool MDIsIndirectMemoryOpnd(op_t CurrOp, bool UseFP); +// MACHINE DEPENDENT: Extract base reg from SIB byte of operand. +int MD_STARS_sib_base(const op_t &x); + +// MACHINE DEPENDENT: Extract index reg from SIB byte of operand. +regnum_t MD_STARS_sib_index(const op_t &x); + // Is CurrOp a memory operand? bool IsMemOperand(op_t CurrOp); @@ -206,13 +223,14 @@ public: case o_void: return false; case o_reg: return MDLessReg(Opnd1.reg, Opnd2.reg); case o_mem: return (Opnd1.addr < Opnd2.addr); - case o_phrase: if (Opnd1.hasSIB && Opnd2.hasSIB) return (Opnd1.sib < Opnd2.sib); + case o_phrase: if (Opnd1.hasSIB && Opnd2.hasSIB) return ((Opnd1.sib < Opnd2.sib) || ((Opnd1.sib == Opnd2.sib) && (Opnd1.specflag4 < Opnd2.specflag4))); else if (Opnd2.hasSIB) return true; // no SIB < has SIB else if (Opnd1.hasSIB) return false; // no SIB < has SIB else return MDLessReg(Opnd1.phrase, Opnd2.phrase); // no SIB bytes case o_displ: if (Opnd1.hasSIB && Opnd2.hasSIB) return ((Opnd1.sib < Opnd2.sib) - || ((Opnd1.sib == Opnd2.sib) && (Opnd1.addr < Opnd2.addr))); + || ((Opnd1.sib == Opnd2.sib) + && ((Opnd1.addr < Opnd2.addr) || ((Opnd1.addr == Opnd2.addr) && (Opnd1.specflag4 < Opnd2.specflag4))))); else if (Opnd2.hasSIB) return true; // no SIB < has SIB else if (Opnd1.hasSIB) return false; // no SIB < has SIB else return ((Opnd1.addr < Opnd2.addr) @@ -249,13 +267,14 @@ public: case o_void: return false; case o_reg: return MDLessReg(Opnd1.reg, Opnd2.reg); case o_mem: return (Opnd1.addr < Opnd2.addr); - case o_phrase: if (Opnd1.hasSIB && Opnd2.hasSIB) return (Opnd1.sib < Opnd2.sib); + case o_phrase: if (Opnd1.hasSIB && Opnd2.hasSIB) return ((Opnd1.sib < Opnd2.sib) || ((Opnd1.sib == Opnd2.sib) && (Opnd1.specflag4 < Opnd2.specflag4))); else if (Opnd2.hasSIB) return true; // no SIB < has SIB else if (Opnd1.hasSIB) return false; // no SIB < has SIB else return MDLessReg(Opnd1.phrase, Opnd2.phrase); // no SIB bytes case o_displ: if (Opnd1.hasSIB && Opnd2.hasSIB) return ((Opnd1.sib < Opnd2.sib) - || ((Opnd1.sib == Opnd2.sib) && (Opnd1.addr < Opnd2.addr))); + || ((Opnd1.sib == Opnd2.sib) + && ((Opnd1.addr < Opnd2.addr) || ((Opnd1.addr == Opnd2.addr) && (Opnd1.specflag4 < Opnd2.specflag4))))); else if (Opnd2.hasSIB) return true; // no SIB < has SIB else if (Opnd1.hasSIB) return false; // no SIB < has SIB else return ((Opnd1.addr < Opnd2.addr) @@ -292,13 +311,13 @@ public: case o_void: return false; case o_reg: return MDLessReg(Opnd1.reg, Opnd2.reg); case o_mem: return (Opnd1.addr < Opnd2.addr); - case o_phrase: if (Opnd1.hasSIB && Opnd2.hasSIB) return (Opnd1.sib < Opnd2.sib); + case o_phrase: if (Opnd1.hasSIB && Opnd2.hasSIB) return ((Opnd1.sib < Opnd2.sib) || ((Opnd1.sib == Opnd2.sib) && (Opnd1.specflag4 < Opnd2.specflag4))); else if (Opnd2.hasSIB) return true; // no SIB < has SIB else if (Opnd1.hasSIB) return false; // no SIB < has SIB else return MDLessReg(Opnd1.phrase, Opnd2.phrase); // no SIB bytes case o_displ: if (Opnd1.hasSIB && Opnd2.hasSIB) return ((Opnd1.sib < Opnd2.sib) - || ((Opnd1.sib == Opnd2.sib) && (Opnd1.addr < Opnd2.addr))); + || ((Opnd1.sib == Opnd2.sib) && ((Opnd1.addr < Opnd2.addr) || (Opnd1.specflag4 < Opnd2.specflag4)))); else if (Opnd2.hasSIB) return true; // no SIB < has SIB else if (Opnd1.hasSIB) return false; // no SIB < has SIB else return ((Opnd1.addr < Opnd2.addr) diff --git a/SMPFunction.cpp b/SMPFunction.cpp index 51fbb0afaf9b90b860ba4833d313c517ea41d3d0..f45a84e501cbe8118a5c3c487d9bb2cbc84d9d45 100644 --- a/SMPFunction.cpp +++ b/SMPFunction.cpp @@ -452,6 +452,7 @@ void SMPFunction::AddLeaOperand(ea_t addr, op_t LeaOperand) { // Add input arguments to the NormalizedStackOpsMap. void SMPFunction::AddNormalizedStackOperand(op_t OldOp, ea_t InstAddr, op_t NormalizedOp) { bool DuplicateCase = false; // e.g. inc [esp+8] will have [esp+8] as a DEF and a USE and maps will see [esp+8] twice + bool DebugFlag = (InstAddr == 0xb79b); pair<map<pair<op_t, ea_t>, op_t, LessDefinition>::iterator, bool> InsertResult; pair<map<pair<op_t, ea_t>, map<pair<op_t, ea_t>, op_t, LessDefinition>::iterator, LessDefinition>::iterator, bool> InverseInsertResult; pair<op_t, ea_t> OldValue(OldOp, InstAddr); @@ -532,6 +533,17 @@ void SMPFunction::AddNormalizedStackOperand(op_t OldOp, ea_t InstAddr, op_t Norm InverseInsertResult = this->InverseNormalizedStackOpsMap.insert(InverseInsertTriple); assert(InverseInsertResult.second || DuplicateCase); } + if (DebugFlag) { + map<pair<op_t, ea_t>, op_t, LessDefinition>::iterator StackMapIter; + SMP_msg("DEBUG: NormalizedStackOpsMap size: %zd\n", this->NormalizedStackOpsMap.size()); + for (StackMapIter = this->NormalizedStackOpsMap.begin(); StackMapIter != this->NormalizedStackOpsMap.end(); ++ StackMapIter) { + op_t OldOp = StackMapIter->first.first; + ea_t InstAddr = StackMapIter->first.second; + SMP_msg("DEBUG: NormalizedStackOps: "); + PrintOperand(OldOp); + SMP_msg(" addr: %lx\n", (unsigned long) InstAddr); + } + } return; } // SMPFunction::AddNormalizedStackOperand() @@ -554,6 +566,18 @@ map<int, struct STARS_SCCP_Const_Struct>::iterator SMPFunction::InsertGlobalCons // Return RTLop if not stack opnd; return normalized RTLop otherwise. op_t SMPFunction::GetNormalizedOperand(ea_t InstAddr, op_t RTLop) { op_t NormOp; + bool DebugFlag = (0xb79b == InstAddr); + if (DebugFlag) { + map<pair<op_t, ea_t>, op_t, LessDefinition>::iterator StackMapIter; + SMP_msg("DEBUG: NormalizedStackOpsMap size: %zd\n", this->NormalizedStackOpsMap.size()); + for (StackMapIter = this->NormalizedStackOpsMap.begin(); StackMapIter != this->NormalizedStackOpsMap.end(); ++ StackMapIter) { + op_t OldOp = StackMapIter->first.first; + ea_t InstAddr = StackMapIter->first.second; + SMP_msg("DEBUG: NormalizedStackOps: "); + PrintOperand(OldOp); + SMP_msg(" addr: %lx\n", (unsigned long) InstAddr); + } + } if (MDIsStackAccessOpnd(RTLop, this->UsesFramePointer())) { pair<op_t, ea_t> OldDefn(RTLop, InstAddr); map<pair<op_t, ea_t>, op_t, LessDefinition>::iterator FindIter = this->NormalizedStackOpsMap.find(OldDefn); diff --git a/SMPInstr.cpp b/SMPInstr.cpp index 5353809b12bafcd3461acc94f6f6621dbf226b52..b68c02dec99a151ccebb890950351aced859a815 100644 --- a/SMPInstr.cpp +++ b/SMPInstr.cpp @@ -1335,8 +1335,8 @@ bool SMPInstr::MDIsNop(void) const { // We are looking for 6-byte no-ops like lea esi,[esi+0] ushort destreg = this->SMPcmd.Operands[0].reg; if ((this->SMPcmd.Operands[1].hasSIB) - && (destreg == (ushort) sib_base(this->SMPcmd.Operands[1])) - && (R_sp == sib_index(this->SMPcmd.Operands[1]))) { + && (destreg == (ushort) MD_STARS_sib_base(this->SMPcmd.Operands[1])) + && (R_sp == MD_STARS_sib_index(this->SMPcmd.Operands[1]))) { // R_sp signifies no SIB index register. So, we have // lea reg,[reg+0] with reg being the same in both place, // once as Operands[0] and once as the base reg in Operands[1]. @@ -2631,8 +2631,8 @@ bool SMPInstr::MDComputeNormalizedDataFlowOp(bool UseFP, sval_t FPDelta, op_t &D // in the SIB byte, and we make [ebx+ebp-4] into [esp+ebx-12], which involves // correcting the index/base reg order in the SIB, because an index reg of ESP // is the SIB encoding for "no index register" and we cannot leave it like that. - int BaseReg = sib_base(DefOp); - int IndexReg = (int) sib_index(DefOp); + int BaseReg = MD_STARS_sib_base(DefOp); + int IndexReg = (int) MD_STARS_sib_index(DefOp); if (X86_STACK_POINTER_REG == IndexReg) // signifies no index register IndexReg = R_none; if (BaseReg == X86_STACK_POINTER_REG) { diff --git a/SMPStaticAnalyzer.cpp b/SMPStaticAnalyzer.cpp index 80c0c406b7c14667c5563b9baafa675e1bada1f3..68d33236f6156800c675e60a4ec88da041153fd5 100644 --- a/SMPStaticAnalyzer.cpp +++ b/SMPStaticAnalyzer.cpp @@ -383,6 +383,7 @@ int IDAP_init(void) { error("Processor must be x86."); return PLUGIN_SKIP; } +#if 0 // too early to detect 64-bit; moved to IDAP_run() if (inf.is_64bit()) { STARS_ISA_Bitwidth = 64; STARS_ISA_dtyp = dt_qword; @@ -394,6 +395,7 @@ int IDAP_init(void) { SMP_msg("INFO: 32-bit binary detected.\n"); } STARS_ISA_Bytewidth = (STARS_ISA_Bitwidth / 8); +#endif hook_to_notification_point(HT_IDP, idp_callback, NULL); DataReferentID = 1; @@ -436,7 +438,6 @@ int IDAP_init(void) { #endif InitOp.type = o_void; InitOp.addr = 0; - InitOp.dtyp = STARS_ISA_dtyp; InitOp.flags = 0; InitOp.n = 0; InitOp.offb = 0; @@ -453,15 +454,6 @@ int IDAP_init(void) { #ifdef STARS_IRDB_INTERFACE SMPLogFile = NULL; #endif - InitOptCategory(); - InitDFACategory(); - InitTypeCategory(); - InitSMPDefsFlags(); - InitSMPUsesFlags(); - InitLibFuncFGInfoMaps(); - InitIntegerErrorCallSinkMap(); - InitUnsignedArgPositionMap(); - InitStackAlteration(); return PLUGIN_KEEP; } // end of IDAP_init @@ -475,6 +467,29 @@ void IDAP_run(int arg) { FILE *InfoAnnotFile; FILE *AsmFile; + if (inf.is_64bit()) { + STARS_ISA_Bitwidth = 64; + STARS_ISA_dtyp = dt_qword; + SMP_msg("INFO: 64-bit binary detected.\n"); + } + else { + STARS_ISA_Bitwidth = 32; + STARS_ISA_dtyp = dt_dword; + SMP_msg("INFO: 32-bit binary detected.\n"); + } + STARS_ISA_Bytewidth = (STARS_ISA_Bitwidth / 8); + InitOp.dtyp = STARS_ISA_dtyp; + + InitOptCategory(); + InitDFACategory(); + InitTypeCategory(); + InitSMPDefsFlags(); + InitSMPUsesFlags(); + InitLibFuncFGInfoMaps(); + InitIntegerErrorCallSinkMap(); + InitUnsignedArgPositionMap(); + InitStackAlteration(); + #ifdef STARS_IRDB_INTERFACE string ZSTLogFileName(RootFileName); string LogFileSuffix(".STARSlog");