// // SMPDataFlowAnalysis.cpp // // This module contains common types an helper classes needed for the // SMP project (Software Memory Protection). // #include <list> #include <set> #include <vector> #include <algorithm> #include <cstring> #include <pro.h> #include <assert.h> #include <ida.hpp> #include <idp.hpp> #include <allins.hpp> #include <auto.hpp> #include <bytes.hpp> #include <funcs.hpp> #include <intel.hpp> #include <loader.hpp> #include <lines.hpp> #include <name.hpp> #include "SMPDataFlowAnalysis.h" #include "SMPStaticAnalyzer.h" // Set to 1 for debugging output #define SMP_DEBUG 1 #define SMP_DEBUG2 0 // verbose #define SMP_DEBUG3 0 // verbose #define SMP_DEBUG_CONTROLFLOW 0 // tells what processing stage is entered #define SMP_DEBUG_XOR 0 #define SMP_DEBUG_CHUNKS 1 // tracking down tail chunks for functions #define SMP_DEBUG_FRAMEFIXUP 0 #define SMP_DEBUG_DATAFLOW 0 // Compute LVA/SSA or not? Turn it off for NICECAP demo on 31-JAN-2008 #define SMP_COMPUTE_LVA_SSA 0 char *RegNames[R_of + 1] = { "EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI", "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15", "AL", "CL", "DL", "BL", "AH", "CH", "DH", "BH", "SPL", "BPL", "SIL", "DIL", "EIP", "ES", "CS", "SS", "DS", "FS", "GS", "CF", "ZF", "SF", "OF" }; // Define instruction categories for data flow analysis. SMPitype DFACategory[NN_last+1]; // We need to make subword registers equal to their containing registers when we // do comparisons, so that we will realize that register EAX is killed by a prior DEF // of register AL, for example, and vice versa. To keep sets ordered strictly, // we also have to make AL and AH be equal to each other as well as equal to EAX. #define FIRST_x86_SUBWORD_REG R_al #define LAST_x86_SUBWORD_REG R_bh bool MDLessReg(const ushort Reg1, const ushort Reg2) { bool FirstSubword = ((Reg1 >= FIRST_x86_SUBWORD_REG) && (Reg1 <= LAST_x86_SUBWORD_REG)); bool SecondSubword = ((Reg2 >= FIRST_x86_SUBWORD_REG) && (Reg2 <= LAST_x86_SUBWORD_REG)); ushort SReg1 = Reg1; ushort SReg2 = Reg2; if (FirstSubword) { // See enumeration RegNo in intel.hpp. if (SReg1 < 20) // AL, CL, DL or BL SReg1 -= 16; else // AH, CH, DH or BH SReg1 -= 20; } if (SecondSubword) { if (SReg2 < 20) SReg2 -= 16; else SReg2 -= 20; } return (SReg1 < SReg2); } // end of MDLessReg() // In SSA computations, we are storing the GlobalNames index into the op_t fields // n, offb, and offo. This function extracts an unsigned int from these three 8-bit // fields. unsigned int ExtractGlobalIndex(op_t GlobalOp) { unsigned int index = 0; index |= (((unsigned int) GlobalOp.offo) & 0x000000ff); index <<= 8; index |= (((unsigned int) GlobalOp.offb) & 0x000000ff); index <<= 8; index |= (((unsigned int) GlobalOp.n) & 0x000000ff); return index; } // DEBUG Print DEF and/or USE for an operand. void PrintDefUse(ulong feature, int OpNum) { // CF_ macros number the operands from 1 to 6, while OpNum // is a 0 to 5 index into the insn_t.Operands[] array. // OpNum == -1 is a signal that this is a DEF or USE or VarKillSet etc. // operand and not an instruction operand. if (-1 == OpNum) return; switch (OpNum) { case 0: if (feature & CF_CHG1) msg(" DEF"); if (feature & CF_USE1) msg(" USE"); break; case 1: if (feature & CF_CHG2) msg(" DEF"); if (feature & CF_USE2) msg(" USE"); break; case 2: if (feature & CF_CHG3) msg(" DEF"); if (feature & CF_USE3) msg(" USE"); break; case 3: if (feature & CF_CHG4) msg(" DEF"); if (feature & CF_USE4) msg(" USE"); break; case 4: if (feature & CF_CHG5) msg(" DEF"); if (feature & CF_USE5) msg(" USE"); break; case 5: if (feature & CF_CHG6) msg(" DEF"); if (feature & CF_USE6) msg(" USE"); break; } return; } // end PrintDefUse() // DEBUG print SIB info for an operand. void PrintSIB(op_t Opnd) { int BaseReg = sib_base(Opnd); short IndexReg = sib_index(Opnd); int ScaleFactor = sib_scale(Opnd); #define NAME_LEN 5 char BaseName[NAME_LEN] = {'N', 'o', 'n', 'e', '\0'}; char IndexName[NAME_LEN] = {'N', 'o', 'n', 'e', '\0'}; #if 0 if (BaseReg != R_bp) // SIB code for NO BASE REG #endif qstrncpy(BaseName, RegNames[BaseReg], NAME_LEN - 1); if (IndexReg != R_sp) { // SIB code for NO INDEX REG qstrncpy(IndexName, RegNames[IndexReg], NAME_LEN -1); } msg(" Base %s Index %s Scale %d", BaseName, IndexName, ScaleFactor); } // end PrintSIB() // Debug: print one operand from an instruction or DEF or USE list. void PrintOneOperand(op_t Opnd, ulong features, int OpNum) { if (Opnd.type == o_void) return; else if (Opnd.type == o_mem) { msg(" Operand %d : memory : addr: %x", OpNum, Opnd.addr); PrintDefUse(features, OpNum); if (Opnd.hasSIB) { // has SIB info -- is this possible for o_mem? msg(" Found SIB byte for o_mem operand "); PrintSIB(Opnd); } } else if (Opnd.type == o_phrase) { msg(" Operand %d : memory phrase :", OpNum); PrintDefUse(features, OpNum); if (Opnd.hasSIB) { // has SIB info PrintSIB(Opnd); } else { // no SIB info ushort BaseReg = Opnd.phrase; msg(" reg %s", RegNames[BaseReg]); } if (Opnd.addr != 0) { msg(" \n WARNING: addr for o_phrase type: %d\n", Opnd.addr); } } else if (Opnd.type == o_displ) { msg(" Operand %d : memory displ :", OpNum); ea_t offset = Opnd.addr; PrintDefUse(features, OpNum); if (Opnd.hasSIB) { PrintSIB(Opnd); msg(" displ %d", offset); } else { ushort BaseReg = Opnd.reg; msg(" reg %s displ %d", RegNames[BaseReg], offset); } } else if (Opnd.type == o_reg) { msg(" Operand %d : register %s", OpNum, RegNames[Opnd.reg]); PrintDefUse(features, OpNum); } else if (Opnd.type == o_imm) { msg(" Operand %d : immed", OpNum); PrintDefUse(features, OpNum); } else if (Opnd.type == o_far) { msg(" Operand %d : FarPtrImmed", OpNum); msg(" addr: %x", Opnd.addr); PrintDefUse(features, OpNum); } else if (Opnd.type == o_near) { msg(" Operand %d : NearPtrImmed", OpNum); msg(" addr: %x", Opnd.addr); PrintDefUse(features, OpNum); } else { msg(" Operand %d : unknown", OpNum); PrintDefUse(features, OpNum); } if (!(Opnd.showed())) msg(" HIDDEN "); return; } // end of PrintOneOperand() // Print an operand that has no features flags or operand position number, such // as the op_t types found in lists and sets throughout the blocks, phi functions, etc. void PrintListOperand(op_t Opnd, int SSANum) { if (Opnd.type == o_void) return; else if (Opnd.type == o_mem) { msg(" Operand : memory : addr: %x", Opnd.addr); if (Opnd.hasSIB) { // has SIB info -- is this possible for o_mem? msg(" Found SIB byte for o_mem operand "); PrintSIB(Opnd); } } else if (Opnd.type == o_phrase) { msg(" Operand : memory phrase :"); if (Opnd.hasSIB) { // has SIB info PrintSIB(Opnd); } else { // no SIB info ushort BaseReg = Opnd.phrase; msg(" reg %s", RegNames[BaseReg]); } if (Opnd.addr != 0) { msg(" \n WARNING: addr for o_phrase type: %d\n", Opnd.addr); } } else if (Opnd.type == o_displ) { msg(" Operand : memory displ :"); ea_t offset = Opnd.addr; if (Opnd.hasSIB) { PrintSIB(Opnd); msg(" displ %d", offset); } else { ushort BaseReg = Opnd.reg; msg(" reg %s displ %d", RegNames[BaseReg], offset); } } else if (Opnd.type == o_reg) { msg(" Operand : register: %s", RegNames[Opnd.reg]); } else if (Opnd.type == o_imm) { msg(" Operand : immed %d", Opnd.value); } else if (Opnd.type == o_far) { msg(" Operand : FarPtrImmed addr: %x", Opnd.addr); } else if (Opnd.type == o_near) { msg(" Operand : NearPtrImmed addr: %x", Opnd.addr); } else { msg(" Operand : unknown"); } msg(" SSANum: %d ", SSANum); if (!(Opnd.showed())) msg(" HIDDEN "); return; } // end of PrintListOperand() // MACHINE DEPENDENT: Is operand type a known type that we want to analyze? bool MDKnownOperandType(op_t TempOp) { return ((TempOp.type >= o_reg) && (TempOp.type <= o_near)); } // ***************************************************************** // Class DefOrUse // ***************************************************************** // Default constructor to make the compilers happy. DefOrUse::DefOrUse(void) { this->OpType = UNINIT; this->SSANumber = -2; return; } // Constructor. DefOrUse::DefOrUse(op_t Ref, SMPOperandType Type, int SSASub) { this->Operand = Ref; this->OpType = Type; this->SSANumber = SSASub; return; } // Copy constructor. DefOrUse::DefOrUse(const DefOrUse &CopyIn) { *this = CopyIn; return; } // Assignment operator for copy constructor use. DefOrUse &DefOrUse::operator=(const DefOrUse &rhs) { this->Operand = rhs.Operand; this->OpType = rhs.OpType; this->SSANumber = rhs.SSANumber; return *this; } // Debug printing. void DefOrUse::Dump(void) const { PrintListOperand(this->Operand, this->SSANumber); if (this->OpType == NUMERIC) msg("N "); else if (this->OpType == POINTER) msg("P "); else if (this->OpType == UNKNOWN) msg("U "); // Don't write anything for UNINIT OpType return; } // end of DefOrUse::Dump() // ***************************************************************** // Class DefOrUseList // ***************************************************************** // Default constructor. DefOrUseList::DefOrUseList(void) { return; } // Set a Def or Use into the list, along with its type. void DefOrUseList::SetRef(op_t Ref, SMPOperandType Type, int SSASub) { DefOrUse CurrRef(Ref, Type, SSASub); this->Refs.push_back(CurrRef); return; } // Get a reference by index. DefOrUse DefOrUseList::GetRef(size_t index) const { return Refs[index]; } // Change the SSA subscript for a reference. void DefOrUseList::SetSSANum(size_t index, int NewSSASub) { this->Refs[index].SetSSANum(NewSSASub); return; } // Change the operand type for a reference. void DefOrUseList::SetType(size_t index, SMPOperandType Type) { this->Refs[index].SetType(Type); return; } // Debug printing. void DefOrUseList::Dump(void) const { for (size_t index = 0; index < this->Refs.size(); ++index) { Refs[index].Dump(); } msg("\n"); return; } // Erase duplicate entries, in case SMPInstr::MDFixupDefUseLists() adds one. void DefOrUseList::EraseDuplicates(void) { set<op_t, LessOp> TempRefs; // Use STL set to find duplicates set<op_t, LessOp>::iterator TempIter; vector<DefOrUse>::iterator RefIter; RefIter = this->Refs.begin(); while (RefIter != this->Refs.end()) { TempIter = TempRefs.find(RefIter->GetOp()); if (TempIter == TempRefs.end()) { // not already in set TempRefs.insert(RefIter->GetOp()); ++RefIter; } else { // found it in set already RefIter = this->Refs.erase(RefIter); } } return; } // end of DefOrUseList::EraseDuplicates() // ***************************************************************** // Class SMPPhiFunction // ***************************************************************** // Constructor SMPPhiFunction::SMPPhiFunction(int GlobIndex, const DefOrUse &Def) { this->index = GlobIndex; this->DefName = Def; return; } // Add a phi item to the list void SMPPhiFunction::PushBack(DefOrUse Ref) { this->SubscriptedOps.SetRef(Ref.GetOp(), Ref.GetType(), Ref.GetSSANum()); return; } // Set the SSA number of the defined variable. void SMPPhiFunction::SetSSADef(int NewSSASub) { this->DefName.SetSSANum(NewSSASub); return; } // Set the SSA number of the input variable. void SMPPhiFunction::SetSSARef(size_t index, int NewSSASub) { this->SubscriptedOps.SetSSANum(index, NewSSASub); return; } // Debug printing. void SMPPhiFunction::Dump(void) const { msg(" DEF: "); this->DefName.Dump(); msg(" USEs: "); this->SubscriptedOps.Dump(); return; } // Initialize the DFACategory[] array to define instruction classes // for the purposes of data flow analysis. void InitDFACategory(void) { // Default category is 0, not the start or end of a basic block. (void) memset(DFACategory, 0, sizeof(DFACategory)); DFACategory[NN_call] = CALL; // Call Procedure DFACategory[NN_callfi] = INDIR_CALL; // Indirect Call Far Procedure DFACategory[NN_callni] = INDIR_CALL; // Indirect Call Near Procedure DFACategory[NN_hlt] = HALT; // Halt DFACategory[NN_int] = CALL; // Call to Interrupt Procedure DFACategory[NN_into] = CALL; // Call to Interrupt Procedure if Overflow Flag = 1 DFACategory[NN_int3] = CALL; // Trap to Debugger DFACategory[NN_iretw] = RETURN; // Interrupt Return DFACategory[NN_iret] = RETURN; // Interrupt Return DFACategory[NN_iretd] = RETURN; // Interrupt Return (use32) DFACategory[NN_iretq] = RETURN; // Interrupt Return (use64) DFACategory[NN_ja] = COND_BRANCH; // Jump if Above (CF=0 & ZF=0) DFACategory[NN_jae] = COND_BRANCH; // Jump if Above or Equal (CF=0) DFACategory[NN_jb] = COND_BRANCH; // Jump if Below (CF=1) DFACategory[NN_jbe] = COND_BRANCH; // Jump if Below or Equal (CF=1 | ZF=1) DFACategory[NN_jc] = COND_BRANCH; // Jump if Carry (CF=1) DFACategory[NN_jcxz] = COND_BRANCH; // Jump if CX is 0 DFACategory[NN_jecxz] = COND_BRANCH; // Jump if ECX is 0 DFACategory[NN_jrcxz] = COND_BRANCH; // Jump if RCX is 0 DFACategory[NN_je] = COND_BRANCH; // Jump if Equal (ZF=1) DFACategory[NN_jg] = COND_BRANCH; // Jump if Greater (ZF=0 & SF=OF) DFACategory[NN_jge] = COND_BRANCH; // Jump if Greater or Equal (SF=OF) DFACategory[NN_jl] = COND_BRANCH; // Jump if Less (SF!=OF) DFACategory[NN_jle] = COND_BRANCH; // Jump if Less or Equal (ZF=1 | SF!=OF) DFACategory[NN_jna] = COND_BRANCH; // Jump if Not Above (CF=1 | ZF=1) DFACategory[NN_jnae] = COND_BRANCH; // Jump if Not Above or Equal (CF=1) DFACategory[NN_jnb] = COND_BRANCH; // Jump if Not Below (CF=0) DFACategory[NN_jnbe] = COND_BRANCH; // Jump if Not Below or Equal (CF=0 & ZF=0) DFACategory[NN_jnc] = COND_BRANCH; // Jump if Not Carry (CF=0) DFACategory[NN_jne] = COND_BRANCH; // Jump if Not Equal (ZF=0) DFACategory[NN_jng] = COND_BRANCH; // Jump if Not Greater (ZF=1 | SF!=OF) DFACategory[NN_jnge] = COND_BRANCH; // Jump if Not Greater or Equal (ZF=1) DFACategory[NN_jnl] = COND_BRANCH; // Jump if Not Less (SF=OF) DFACategory[NN_jnle] = COND_BRANCH; // Jump if Not Less or Equal (ZF=0 & SF=OF) DFACategory[NN_jno] = COND_BRANCH; // Jump if Not Overflow (OF=0) DFACategory[NN_jnp] = COND_BRANCH; // Jump if Not Parity (PF=0) DFACategory[NN_jns] = COND_BRANCH; // Jump if Not Sign (SF=0) DFACategory[NN_jnz] = COND_BRANCH; // Jump if Not Zero (ZF=0) DFACategory[NN_jo] = COND_BRANCH; // Jump if Overflow (OF=1) DFACategory[NN_jp] = COND_BRANCH; // Jump if Parity (PF=1) DFACategory[NN_jpe] = COND_BRANCH; // Jump if Parity Even (PF=1) DFACategory[NN_jpo] = COND_BRANCH; // Jump if Parity Odd (PF=0) DFACategory[NN_js] = COND_BRANCH; // Jump if Sign (SF=1) DFACategory[NN_jz] = COND_BRANCH; // Jump if Zero (ZF=1) DFACategory[NN_jmp] = JUMP; // Jump DFACategory[NN_jmpfi] = INDIR_JUMP; // Indirect Far Jump DFACategory[NN_jmpni] = INDIR_JUMP; // Indirect Near Jump DFACategory[NN_jmpshort] = JUMP; // Jump Short (only in 64-bit mode) DFACategory[NN_loopw] = COND_BRANCH; // Loop while ECX != 0 DFACategory[NN_loop] = COND_BRANCH; // Loop while CX != 0 DFACategory[NN_loopd] = COND_BRANCH; // Loop while ECX != 0 DFACategory[NN_loopq] = COND_BRANCH; // Loop while RCX != 0 DFACategory[NN_loopwe] = COND_BRANCH; // Loop while CX != 0 and ZF=1 DFACategory[NN_loope] = COND_BRANCH; // Loop while rCX != 0 and ZF=1 DFACategory[NN_loopde] = COND_BRANCH; // Loop while ECX != 0 and ZF=1 DFACategory[NN_loopqe] = COND_BRANCH; // Loop while RCX != 0 and ZF=1 DFACategory[NN_loopwne] = COND_BRANCH; // Loop while CX != 0 and ZF=0 DFACategory[NN_loopne] = COND_BRANCH; // Loop while rCX != 0 and ZF=0 DFACategory[NN_loopdne] = COND_BRANCH; // Loop while ECX != 0 and ZF=0 DFACategory[NN_loopqne] = COND_BRANCH; // Loop while RCX != 0 and ZF=0 DFACategory[NN_retn] = RETURN; // Return Near from Procedure DFACategory[NN_retf] = RETURN; // Return Far from Procedure // // Pentium instructions // DFACategory[NN_rsm] = HALT; // Resume from System Management Mode // Pentium II instructions DFACategory[NN_sysenter] = CALL; // Fast Transition to System Call Entry Point DFACategory[NN_sysexit] = CALL; // Fast Transition from System Call Entry Point // AMD syscall/sysret instructions NOTE: not AMD, found in Intel manual DFACategory[NN_syscall] = CALL; // Low latency system call DFACategory[NN_sysret] = CALL; // Return from system call // VMX instructions DFACategory[NN_vmcall] = INDIR_CALL; // Call to VM Monitor return; } // end InitDFACategory()