From 9c32ac7990f073bf9e7630afc40ca1b660303088 Mon Sep 17 00:00:00 2001 From: clc5q <clc5q@git.zephyr-software.com> Date: Thu, 13 Dec 2007 16:27:38 +0000 Subject: [PATCH] 29-NOV-2007 snapshot. --- .gitattributes | 3 + SMPDataFlowAnalysis.cpp | 1310 +++++++++++++++++++++++++++++++++++++++ SMPDataFlowAnalysis.h | 181 ++++++ SMPStaticAnalyzer.h | 15 + 4 files changed, 1509 insertions(+) create mode 100644 SMPDataFlowAnalysis.cpp create mode 100644 SMPDataFlowAnalysis.h create mode 100644 SMPStaticAnalyzer.h diff --git a/.gitattributes b/.gitattributes index c533adcb..0ad7893c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3,6 +3,9 @@ /README.txt -text /SMP-analyze.sh -text /SMP.idc -text +/SMPDataFlowAnalysis.cpp -text +/SMPDataFlowAnalysis.h -text /SMPStaticAnalyzer.cpp -text +/SMPStaticAnalyzer.h -text /makefile -text /plugin.script -text diff --git a/SMPDataFlowAnalysis.cpp b/SMPDataFlowAnalysis.cpp new file mode 100644 index 00000000..eb982058 --- /dev/null +++ b/SMPDataFlowAnalysis.cpp @@ -0,0 +1,1310 @@ +// +// SMPDataFlowAnalysis.cpp +// +// This module performs the fundamental data flow analyses needed for the +// SMP project (Software Memory Protection). +// + +#include <vector> + +#include <pro.h> +#include <ida.hpp> +#include <idp.hpp> +#include <allins.hpp> +#include <auto.hpp> +#include <bytes.hpp> +#include <funcs.hpp> +#include <intel.hpp> +#include <loader.hpp> +#include <lines.hpp> +#include <name.hpp> + +#include "SMPDataFlowAnalysis.h" +#include "SMPStaticAnalyzer.h" + +// Set to 1 for debugging output +#define SMP_DEBUG 1 +#define SMP_DEBUG2 0 // verbose +#define SMP_DEBUG3 0 // verbose +#define SMP_DEBUG_CONTROLFLOW 0 // tells what processing stage is entered +#define SMP_DEBUG_XOR 0 +#define SMP_DEBUG_CHUNKS 1 // tracking down tail chunks for functions + +// Used for binary search by function number in SMPStaticAnalyzer.cpp +// to trigger debugging output and find which instruction in which +// function is causing a crash. +bool SMPBinaryDebug = false; + +// Define instruction categories for data flow analysis. +static SMPitype DFACategory[NN_last+1]; + +static char *RegNames[R_of + 1] = + { "EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI", + "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15", + "AL", "CL", "DL", "BL", "AH", "CH", "DH", "BH", + "SPL", "BPL", "SIL", "DIL", "EIP", "ES", "CS", "SS", + "DS", "FS", "GS", "CF", "ZF", "SF", "OF" + }; + +// Make the CF_CHG1 .. CF_CHG6 and CF_USE1..CF_USE6 macros more usable +// by allowing us to pick them up with an array index. +static ulong DefMacros[UA_MAXOP] = {CF_CHG1, CF_CHG2, CF_CHG3, CF_CHG4, CF_CHG5, CF_CHG6}; +static ulong UseMacros[UA_MAXOP] = {CF_USE1, CF_USE2, CF_USE3, CF_USE4, CF_USE5, CF_USE6}; + +// Text to be printed in each optimizing annotation explaining why +// the annotation was emitted. +static char *OptExplanation[LAST_OPT_CATEGORY + 1] = + { "NoOpt", "NoMetaUpdate", "AlwaysNUM", "NUMVia2ndSrcIMMEDNUM", + "Always1stSrc", "1stSrcVia2ndSrcIMMEDNUM", "AlwaysPtr", + "AlwaysNUM", "AlwaysNUM", "NUMViaFPRegDest" + }; + +// ***************************************************************** +// Class DefOrUseList +// ***************************************************************** + +// Default constructor. +DefOrUseList::DefOrUseList(void) { + return; +} + +// Set a Def or Use into the list, along with its type. +void DefOrUseList::SetRef(op_t Ref, SMPOperandType Type) { + this->Refs.push_back(Ref); + this->Types.push_back(Type); + return; +} + +// Get a reference by index. +op_t DefOrUseList::GetRef(size_t index) const { + return Refs[index]; +} + +SMPOperandType DefOrUseList::GetRefType(size_t index) const { + return Types[index]; +} + +// ***************************************************************** +// Class SMPInstr +// ***************************************************************** + +// Constructor for instruction. +SMPInstr::SMPInstr(ea_t addr) { + this->address = addr; + this->analyzed = false; + this->JumpTarget = false; + return; +} + +// Is the instruction the type that terminates a basic block? +bool SMPInstr::IsBasicBlockTerminator() const { + return ((type == JUMP) || (type == COND_BRANCH) + || (type == INDIR_JUMP) || (type == RETURN)); +} + +// Is the destination operand a memory reference? +bool SMPInstr::HasDestMemoryOperand(void) const { + bool MemDest = false; + for (size_t index = 0; index < Defs.GetSize(); ++index) { + MemDest = ((Defs.GetRef(index).type == o_mem) + || (Defs.GetRef(index).type == o_phrase) + || (Defs.GetRef(index).type == o_displ)); + if (MemDest) + break; + } + return MemDest; +} // end of SMPInstr::HasDestMemoryOperand() + +// Is the destination operand a memory reference? +bool SMPInstr::HasSourceMemoryOperand(void) const { + bool MemSrc = false; + for (size_t index = 0; index < Uses.GetSize(); ++index) { + MemSrc = ((Uses.GetRef(index).type == o_mem) + || (Uses.GetRef(index).type == o_phrase) + || (Uses.GetRef(index).type == o_displ)); + if (MemSrc) + break; + } + return MemSrc; +} // end of SMPInstr::HasSourceMemoryOperand() + +// Does the instruction whose flags are in F have a numeric type +// as the second source operand? +// NOTE: We can only analyze immediate values now, using a heuristic +// that values in the range +/- 8K are numeric and others are +// probably addresses. When data flow analyses are implemented, +// we will be able to analyze many non-immediate operands. +#define IMMEDNUM_LOWER -8191 +#define IMMEDNUM_UPPER 8191 +bool SMPInstr::IsSecondSrcOperandNumeric(flags_t F) const { + bool SecondOpImm = (SMPcmd.Operands[1].type == o_imm); + signed long TempImm; + + if (SecondOpImm) { + TempImm = (signed long) SMPcmd.Operands[1].value; + } + +#if SMP_DEBUG + if (SecondOpImm && (0 > TempImm)) { +#if 0 + msg("Negative immediate: %d Hex: %x ASM: %s\n", TempImm, + SMPcmd.Operands[1].value, disasm); +#endif + } + else if ((!SecondOpImm) && (SMPcmd.Operands[1].type == o_imm)) { + msg("Problem with flags on immediate src operand: %s\n", disasm); + } +#endif + + return (SecondOpImm && (TempImm > IMMEDNUM_LOWER) + && (TempImm < IMMEDNUM_UPPER)); +} // end of SMPInstr::IsSecondSrcOperandNumeric() + +// DEBUG Print DEF and/or USE for an operand. +void PrintDefUse(ulong feature, int OpNum) { + // CF_ macros number the operands from 1 to 6, while OpNum + // is a 0 to 5 index into the insn_t.Operands[] array. + switch (OpNum) { + case 0: + if (feature & CF_CHG1) + msg(" DEF"); + if (feature & CF_USE1) + msg(" USE"); + break; + case 1: + if (feature & CF_CHG2) + msg(" DEF"); + if (feature & CF_USE2) + msg(" USE"); + break; + case 2: + if (feature & CF_CHG3) + msg(" DEF"); + if (feature & CF_USE3) + msg(" USE"); + break; + case 3: + if (feature & CF_CHG4) + msg(" DEF"); + if (feature & CF_USE4) + msg(" USE"); + break; + case 4: + if (feature & CF_CHG5) + msg(" DEF"); + if (feature & CF_USE5) + msg(" USE"); + break; + case 5: + if (feature & CF_CHG6) + msg(" DEF"); + if (feature & CF_USE6) + msg(" USE"); + break; + } + return; +} // end PrintDefUse() + +// DEBUG print SIB info for an operand. +void PrintSIB(op_t Opnd) { + int BaseReg = sib_base(Opnd); + short IndexReg = sib_index(Opnd); + int ScaleFactor = sib_scale(Opnd); +#define NAME_LEN 5 + char BaseName[NAME_LEN] = {'N', 'o', 'n', 'e', '\0'}; + char IndexName[NAME_LEN] = {'N', 'o', 'n', 'e', '\0'}; + if (BaseReg != R_bp) { // SIB code for NO BASE REG + qstrncpy(BaseName, RegNames[BaseReg], NAME_LEN - 1); + } + if (IndexReg != R_sp) { // SIB code for NO INDEX REG + qstrncpy(IndexName, RegNames[IndexReg], NAME_LEN -1); + } + msg(" Base %s Index %s Scale %d", BaseName, IndexName, ScaleFactor); +} // end PrintSIB() + +// DEBUG print operands for Inst. +void SMPInstr::PrintOperands() const { + op_t Opnd; + for (int i = 0; i < UA_MAXOP; ++i) { + Opnd = SMPcmd.Operands[i]; + if (Opnd.type == o_void) + continue; + else if (Opnd.type == o_mem) { + msg(" Operand %d : memory : addr: %x", i, Opnd.addr); + PrintDefUse(features, i); + if (Opnd.hasSIB) { // has SIB info + PrintSIB(Opnd); + } + } + else if (Opnd.type == o_phrase) { + msg(" Operand %d : memory phrase :", i); + PrintDefUse(features, i); + if (Opnd.hasSIB) { // has SIB info + PrintSIB(Opnd); + } + else { // no SIB info + ushort BaseReg = Opnd.phrase; + msg(" reg %s", RegNames[BaseReg]); + } + if (Opnd.addr != 0) { + msg(" \n WARNING: addr for o_phrase type: %d\n", Opnd.addr); + } + } + else if (Opnd.type == o_displ) { + ea_t offset = Opnd.addr; + PrintDefUse(features, i); + if (Opnd.hasSIB) { + PrintSIB(Opnd); + msg(" displ %d", offset); + } + else { + ushort BaseReg = Opnd.reg; + msg(" Operand %d : memory displ : reg %s displ %d", i, + RegNames[BaseReg], offset); + } + } + else if (Opnd.type == o_reg) { + msg(" Operand %d : register", i); + msg(" regno: %d", Opnd.reg); + PrintDefUse(features, i); + } + else if (Opnd.type == o_imm) { + msg(" Operand %d : immed", i); + PrintDefUse(features, i); + } + else if (Opnd.type == o_far) { + msg(" Operand %d : FarPtrImmed", i); + PrintDefUse(features, i); + } + else if (Opnd.type == o_near) { + msg(" Operand %d : NearPtrImmed", i); + PrintDefUse(features, i); + } + else { + msg(" Operand %d : unknown", i); + PrintDefUse(features, i); + } + if (!(Opnd.showed())) + msg(" HIDDEN "); + } + msg(" \n"); + return; +} // end of SMPInstr::PrintOperands() + +// Print out the destination operand list for the instruction, given +// the OptCategory for the instruction as a hint. +char * SMPInstr::DestString(int OptType) { + static char DestList[MAXSTR]; + if (OptType != 7) { + if (SMPcmd.Operands[0].type != o_reg) { + msg("Problem: destination operand not memory and not reg: %d %d %s \n", + SMPcmd.Operands[0].type, SMPcmd.Operands[1].type, disasm); + } + else { + ushort DestReg = SMPcmd.Operands[0].reg; + qstrncpy(DestList, RegNames[DestReg], + 1 + strlen(RegNames[DestReg])); +#if 1 + qstrncat(DestList, " ZZ ", MAXSTR); +#endif + return DestList; + } + } + else { // OptType 7 could have one or two destinations. + // NOTE: FIX later. Currently a clone of code above. ** +#if SMP_DEBUG3 + msg("OptType 7: %s\n", disasm); + PrintOperands(); +#endif + if (SMPcmd.Operands[0].type != o_reg) { + msg("Problem: destination operand not memory and not reg: %d %d %s\n", + SMPcmd.Operands[0].type, SMPcmd.Operands[1].type, disasm); + } + else { + ushort DestReg = SMPcmd.Operands[0].reg; + qstrncpy(DestList, RegNames[DestReg], + 1 + strlen(RegNames[DestReg])); +#if 1 + qstrncat(DestList, " ZZ ", MAXSTR); +#endif + return DestList; + } + } + DestList[0] = '\0'; + return DestList; +} // end of SMPInstr::DestString() + +// Equality operator for SMPInstr. Key field is address. +int SMPInstr::operator==(const SMPInstr &rhs) const { + if (this->address != rhs.GetAddr()) + return 0; + else + return 1; +} + +// Inequality operator for SMPInstr. Key field is address. +int SMPInstr::operator!=(const SMPInstr &rhs) const { + return (this->address != rhs.GetAddr()); +} + +// Less than operator for sorting SMPInstr lists. Key field is address. +int SMPInstr::operator<(const SMPInstr &rhs) const { + return (this->address < rhs.GetAddr()); +} + +// Get optimization category for instruction +int SMPInstr::GetOptType(void) const { + return OptType; +} + +// Is this instruction the one that allocates space on the +// stack for the local variables of size LocSize? +bool SMPInstr::MDIsFrameAllocInstr(asize_t LocSize) const { + // The frame allocating instruction should look like: + // sub esp,48 or add esp,-64 etc. + if ((SMPcmd.itype == NN_sub) || (SMPcmd.itype == NN_add)) { + if (Defs.GetRef(0).is_reg(R_sp)) { + // We know that an addition or subtraction is being + // performed on the stack pointer. This should not be + // possible within the prologue except at the stack + // frame allocation instruction, so return true. We + // could be more robust in this analysis in the future. **!!** + // CAUTION: If a compiler allocates 64 bytes for locals + // and 16 bytes for outgoing arguments in a single + // instruction: sub esp,80 + // you cannot insist on finding sub esp,LocSize + return true; + } + } + return false; +} // end of SMPInstr::MDIsFrameAllocInstr() + +// Is this instruction in the epilogue the one that deallocates the local +// vars region of the stack frame? +bool SMPInstr::MDIsFrameDeallocInstr(bool UseFP, asize_t LocalVarsSize) const { + // The usual compiler idiom for the prologue on x86 is to + // deallocate the local var space with: mov esp,ebp + // It could be add esp,constant. We can be tricked by + // add esp,constant when the constant is just the stack + // adjustment after a call. We will have to insist that + // the immediate operand have at least the value of + // LocalVarsSize for this second form, and that UseFP be true + // for the first form. + if (UseFP && (this->SMPcmd.itype == NN_mov) + && (this->Defs.GetRef(0).is_reg(R_sp)) + && (this->Uses.GetRef(0).is_reg(R_bp))) + return true; + else if ((this->SMPcmd.itype == NN_add) + && (this->Defs.GetRef(0).is_reg(R_sp)) + && (this->Uses.GetRef(1).is_imm((uval_t) LocalVarsSize))) + return true; + else if ((this->SMPcmd.itype == NN_add) + && (this->Defs.GetRef(0).is_reg(R_sp)) + && (this->Uses.GetRef(1).type == o_imm)) { + msg("Used imprecise LocalVarsSize to find dealloc instr.\n"); + return true; + } + else + return false; +} // end of SMPInstr::MDIsFrameDeallocInstr() + +// MACHINE DEPENDENT: Is instruction a return instruction? +bool SMPInstr::MDIsReturnInstr(void) const { + return ((SMPcmd.itype == NN_retn) || (SMPcmd.itype == NN_retf)); +} + +// MACHINE DEPENDENT: Is instruction a POP instruction? +#define FIRST_POP_INST NN_pop +#define LAST_POP_INST NN_popfq +bool SMPInstr::MDIsPopInstr(void) const { + return ((SMPcmd.itype >= FIRST_POP_INST) + && (SMPcmd.itype <= LAST_POP_INST)); +} + +// MACHINE DEPENDENT: Is instruction a PUSH instruction? +#define FIRST_PUSH_INST NN_push +#define LAST_PUSH_INST NN_pushfq +bool SMPInstr::MDIsPushInstr(void) const { + return ((SMPcmd.itype >= FIRST_PUSH_INST) + && (SMPcmd.itype <= LAST_PUSH_INST)); +} + +// Analyze the instruction and its operands. +void SMPInstr::Analyze(void) { + if (this->analyzed) + return; + + // Fill cmd structure with disassembly of instr + ua_ana0(this->address); + // Get the instr disassembly text. + (void) generate_disasm_line(this->address, this->disasm, sizeof(this->disasm) - 1); + // Remove interactive color-coding tags. + tag_remove(this->disasm, this->disasm, 0); + // Copy cmd to member variable SMPcmd. + this->SMPcmd = cmd; + // Get the canonical features into member variables features. + this->features = cmd.get_canon_feature(); + + // Record what type of instruction this is, simplified for the needs + // of data flow and type analysis. + this->type = DFACategory[cmd.itype]; + // Record optimization category. + this->OptType = OptCategory[cmd.itype]; + + // Build the DEF and USE lists for the instruction. + this->BuildSMPDefUseLists(); + // Fix up machine dependent quirks in the def and use lists. + this->MDFixupDefUseLists(); + + // Determine whether the instruction is a jump target by looking + // at its cross references and seeing if it has "TO" code xrefs. + xrefblk_t xrefs; + for (bool ok = xrefs.first_to(this->address, XREF_FAR); ok; ok = xrefs.next_to()) { + if ((xrefs.from != 0) && (xrefs.iscode)) { + this->JumpTarget = true; + break; + } + } + + this->analyzed = true; + return; +} // end of SMPInstr::Analyze() + +// Fill the Defs and Uses private data members. +void SMPInstr::BuildSMPDefUseLists(void) { + size_t OpNum; + + // Start with the Defs. + for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { + if (this->features & DefMacros[OpNum]) { // DEF + this->Defs.SetRef(this->SMPcmd.Operands[OpNum]); + } + } // end for (OpNum = 0; ...) + + // Now, do the Uses. Uses have special case operations, because + // any memory operand could have register uses in the addressing + // expression, and we must create Uses for those registers. For + // example: mov eax,[ebx + esi*2 + 044Ch] + // This is a two-operand instruction with one def: eax. But + // there are three uses: [ebx + esi*2 + 044Ch], ebx, and esi. + // The first use is an op_t of type o_phrase (memory phrase), + // which can be copied from cmd.Operands[1]. Likewise, we just + // copy cmd.Operands[0] into the defs list. However, we must create + // op_t types for register ebx and register esi and append them + // to the Uses list. This is handled by the machine dependent + // method MDFixupDefUseLists(). + for (OpNum = 0; OpNum < UA_MAXOP; ++OpNum) { + if (this->features & UseMacros[OpNum]) { // USE + this->Uses.SetRef(this->SMPcmd.Operands[OpNum]); + } + } // end for (OpNum = 0; ...) + + return; +} // end of SMPInstr::BuildSMPDefUseLists() + +// Perform machine dependent ad hoc fixes to the def and use lists. +// For example, some multiply and divide instructions in x86 implicitly +// use and/or define register EDX. For memory phrase examples, see comment +// in BuildSMPDefUseLists(). +void SMPInstr::MDFixupDefUseLists(void) { + return; +} + +// Emit annotations for constants used as ptr offsets from EBP or +// ESP into the stack frame. Only pay attention to EBP-relative +// offsets if EBP is being used as a frame pointer (UseFP == true). +void SMPInstr::AnnotateStackConstants(bool UseFP, FILE *AnnotFile) { + op_t Opnd; +#if 0 + if ((this->address == 0x8048409) || (this->address == 0x81488a1)) { + msg("PROBLEM INSTRUCTION: \n"); + this->PrintOperands(); + } +#endif + for (int i = 0; i < UA_MAXOP; ++i) { + Opnd = SMPcmd.Operands[i]; + if (Opnd.type == o_displ) { + ea_t offset = Opnd.addr; + if (Opnd.hasSIB) { + int BaseReg = sib_base(Opnd); + short IndexReg = sib_index(Opnd); + if (BaseReg == R_sp) { // EBP cannot be BaseReg in SIB + // ESP-relative constant offset + qfprintf(AnnotFile, + "%x %d PTRIMMEDESP STACK %d %s\n", + SMPcmd.ea, SMPcmd.size, offset, disasm); + } + else if (IndexReg == R_bp) { // ESP cannot be IndexReg + // EBP-relative constant offset + qfprintf(AnnotFile, + "%x %d PTRIMMEDEBP STACK %d %s\n", + SMPcmd.ea, SMPcmd.size, offset, disasm); + } + } + else { // no SIB + ushort BaseReg = Opnd.reg; + if (BaseReg == R_sp) { + // ESP-relative constant offset + qfprintf(AnnotFile, + "%x %d PTRIMMEDESP STACK %d %s\n", + SMPcmd.ea, SMPcmd.size, offset, disasm); + } + else if (BaseReg == R_bp) { + // EBP-relative constant offset + qfprintf(AnnotFile, + "%x %d PTRIMMEDEBP STACK %d %s\n", + SMPcmd.ea, SMPcmd.size, offset, disasm); + } + } // end if (Opnd.hasSIB) ... else ... + } // end if (Opnd.type == o_displ) + else if (Opnd.type == o_phrase) { + ea_t offset = 0; // mmStrata thinks [esp] is [esp+0] + if (Opnd.hasSIB) { + int BaseReg = sib_base(Opnd); + short IndexReg = sib_index(Opnd); + if (BaseReg == R_sp) { // EBP cannot be BaseReg in SIB + // ESP-relative constant offset + qfprintf(AnnotFile, + "%x %d PTRIMMEDESP STACK %d %s\n", + SMPcmd.ea, SMPcmd.size, offset, disasm); + } + else if (IndexReg == R_bp) { // ESP cannot be IndexReg + // EBP-relative constant offset + qfprintf(AnnotFile, + "%x %d PTRIMMEDEBP STACK %d %s\n", + SMPcmd.ea, SMPcmd.size, offset, disasm); + } + } + else { // Something like [ecx] + ushort BaseReg = Opnd.reg; + if (BaseReg == R_sp) { + // ESP-relative constant offset + qfprintf(AnnotFile, + "%x %d PTRIMMEDESP STACK %d %s\n", + SMPcmd.ea, SMPcmd.size, offset, disasm); + } + else if (BaseReg == R_bp) { + // EBP-relative constant offset + qfprintf(AnnotFile, + "%x %d PTRIMMEDEBP STACK %d %s\n", + SMPcmd.ea, SMPcmd.size, offset, disasm); + } + } // end if (Opnd.hasSIB) ... else ... + } // end else if (Opnd.type == o_phrase) + } // end for all operands + return; +} // end of SMPInstr::AnnotateStackConstants() + +// ***************************************************************** +// Class SMPBasicBlock +// ***************************************************************** + +// Constructor +SMPBasicBlock::SMPBasicBlock(list<SMPInstr>::iterator First, list<SMPInstr>::iterator Last) { + this->FirstInstr = First; + this->LastInstr = Last; + this->IndirectJump = false; + this->Returns = false; + this->SharedTailChunk = false; +} + +// Analyze basic block and fill data members. +void SMPBasicBlock::Analyze() { + if (LastInstr->GetDataFlowType() == INDIR_JUMP) { + this->IndirectJump = true; + } + else if (LastInstr->MDIsReturnInstr()) { + this->Returns = true; + } +} // end of SMPBasicBlock::Analyze() + +// ***************************************************************** +// Class SMPFunction +// ***************************************************************** + +// Constructor +SMPFunction::SMPFunction(func_t *Info) { + this->FuncInfo = Info; + IndirectCalls = false; + return; +} + +// Figure out the different regions of the stack frame, and find the +// instructions that allocate and deallocate the local variables space +// on the stack frame. +// The stack frame info will be used to emit stack +// annotations when Analyze() reaches the stack allocation +// instruction that sets aside space for local vars. +// Set the address of the instruction at which these +// annotations should be emitted. This should normally +// be an instruction such as: sub esp,48 +// However, for a function with no local variables at all, +// we will need to determine which instruction should be +// considered to be the final instruction of the function +// prologue and return its address. +// Likewise, we find the stack deallocating instruction in +// the function epilogue. +void SMPFunction::SetStackFrameInfo(void) { + bool FoundAllocInstr = false; + bool FoundDeallocInstr = false; + + // The sizes of the three regions of the stack frame other than the + // return address are stored in the function structure. + this->LocalVarsSize = this->FuncInfo->frsize; + this->CalleeSavedRegsSize = this->FuncInfo->frregs; + this->IncomingArgsSize = this->FuncInfo->argsize; + + // The return address size can be obtained in a machine independent + // way by calling get_frame_retsize(). + this->RetAddrSize = get_frame_retsize(this->FuncInfo); + + // IDA Pro has trouble with functions that do not have any local + // variables. Unfortunately, the C library has plenty of these + // functions. IDA usually claims that frregs is zero and frsize + // is N, when the values should have been reversed. We can attempt + // to detect this and fix it. + bool FrameInfoFixed = this->MDFixFrameInfo(); + +#define SMP_DEBUG_FRAMEFIXUP 1 +#if SMP_DEBUG_FRAMEFIXUP + if (FrameInfoFixed) { + msg("Fixed stack frame size info: %s\n", this->FuncName); + SMPBasicBlock CurrBlock = this->Blocks.front(); + msg("First basic block:\n"); + for (list<SMPInstr>::iterator CurrInstr = CurrBlock.GetFirstInstr(); + CurrInstr != CurrBlock.GetLastInstr(); + ++CurrInstr) { + msg("%s\n", CurrInstr->GetDisasm()); + } + msg("%s\n", CurrBlock.GetLastInstr()->GetDisasm()); + } +#endif + + // Now, if LocalVarsSize is not zero, we need to find the instruction + // in the function prologue that allocates space on the stack for + // local vars. This code could be made more robust in the future + // by matching LocalVarsSize to the immediate value in the allocation + // instruction. However, IDA Pro is sometimes a little off on this + // number, claiming 4 bytes for functions with no locals at all, + // assigning the 4 bytes used for a callee-saved register to the + // local vars space and then claiming 0 bytes for callee-saved + // registers. **!!** + if (0 < this->LocalVarsSize) { + for (list<SMPInstr>::iterator CurrInstr = this->Instrs.begin(); + CurrInstr != this->Instrs.end(); + ++CurrInstr) { + ea_t addr = CurrInstr->GetAddr(); + + // Keep the most recent instruction in the DeallocInstr + // in case we reach the return without seeing a dealloc. + if (!FoundDeallocInstr) { + this->LocalVarsDeallocInstr = addr; + } + + if (!FoundAllocInstr + && CurrInstr->MDIsFrameAllocInstr(this->LocalVarsSize)) { + this->LocalVarsAllocInstr = addr; + FoundAllocInstr = true; + } + else if (FoundAllocInstr) { + // We can now start searching for the DeallocInstr. + if (CurrInstr->MDIsFrameDeallocInstr(UseFP, this->LocalVarsSize)) { + // Keep saving the most recent addr that looks + // like the DeallocInstr until we reach the + // end of the function. Last one to look like + // it is used as the DeallocInstr. + this->LocalVarsDeallocInstr = addr; + FoundDeallocInstr = true; + } + } + } // end for (list<SMPInstr>::iterator CurrInstr ... ) + if (!FoundAllocInstr) { + // Could not find the frame allocating instruction. Bad. + // Emit diagnostic and use the first instruction in the + // function. + msg("ERROR: Could not find stack frame allocation in %s\n", + FuncName); + msg("LocalVarsSize: %d SavedRegsSize: %d ArgsSize: %d\n", + LocalVarsSize, CalleeSavedRegsSize, IncomingArgsSize); + this->LocalVarsAllocInstr = this->FuncInfo->startEA; + } + if (!FoundDeallocInstr) { + // Could not find the frame deallocating instruction. Bad. + // Emit diagnostic and use the last instruction in the + // function. + msg("ERROR: Could not find stack frame deallocation in %s\n", + FuncName); + } + } + // else LocalVarsSize was zero, meaning that we need to search + // for the end of the function prologue code and emit stack frame + // annotations from that address (i.e. this method returns that + // address). We will approximate this by finding the end of the + // sequence of PUSH instructions at the beginning of the function. + // The last PUSH instruction should be the last callee-save-reg + // instruction. We can make this more robust in the future by + // making sure that we do not count a PUSH of anything other than + // a register. **!!** + // NOTE: 2nd prologue instr is usually mov ebp,esp + // THE ASSUMPTION THAT WE HAVE ONLY PUSH INSTRUCTIONS BEFORE + // THE ALLOCATING INSTR IS ONLY TRUE WHEN LOCALVARSSIZE == 0; + else { + ea_t SaveAddr = FuncInfo->startEA; + for (list<SMPInstr>::iterator CurrInstr = this->Instrs.begin(); + CurrInstr != this->Instrs.end(); + ++CurrInstr) { + insn_t CurrCmd = CurrInstr->GetCmd(); + ea_t addr = CurrInstr->GetAddr(); + if (CurrCmd.itype == NN_push) + SaveAddr = addr; + else + break; + } + this->LocalVarsAllocInstr = SaveAddr; + this->LocalVarsDeallocInstr = 0; + } // end if (LocalVarsSize > 0) ... else ... + +#if 0 + // Now we need to do the corresponding operations from the + // end of the function to find the DeallocInstr in the + // function epilogue. Because there is no addition to the + // stack pointer to deallocate the local vars region, the + // function epilogue will consist of (optional) pops of + // callee-saved regs, followed by the return instruction. + // Working backwards, we should find a return and then + // stop when we do not find any more pops. + if (0 >= LocalVarsSize) { + this->LocalVarsDeallocInstr = NULL; + } + else { + SaveAddr = FuncInfo->endEA - 1; + bool FoundRet = false; + do { + ea_t addr = get_item_head(SaveAddr); + flags_t InstrFlags = getFlags(addr); + if (isCode(addr) && isHead(addr)) { + ua_ana0(addr); + if (!FoundRet) { // Just starting out. + if (MDIsReturnInstr(cmd)) { + FoundRet = true; + SaveAddr = addr - 1; + } + else { + msg("ERROR: Last instruction not a return.\n"); + } + } + else { // Should be 0 or more POPs before the return. + if (MDIsPopInstr(cmd)) { + SaveAddr = addr - 1; + } + else if (FrameAllocInstr(cmd, this->LocalVarsSize)) { + this->LocalVarsDeallocInstr = addr; + } + else { + msg("ERROR: Frame deallocation not prior to POPs.\n"); + this->LocalVarsDeallocInstr = SaveAddr + 1; + } + } // end if (!FoundRet) ... else ... + } + else { + --SaveAddr; + } // end if (isCode(addr) && isHead(addr)) + } while (NULL == this->LocalVarsDeallocInstr); + } // end if (0 >= this->LocalVarsSize) +#endif // 0 + return; +} // end of SMPFunction::SetStackFrameInfo() + +// IDA Pro has trouble with functions that do not have any local +// variables. Unfortunately, the C library has plenty of these +// functions. IDA usually claims that frregs is zero and frsize +// is N, when the values should have been reversed. We can attempt +// to detect this and fix it. +// Fixing means we will update the data members LocalVarsSize and +// CalleeSavedRegsSize. +bool SMPFunction::MDFixFrameInfo(void) { + // Does this function fit the problem pattern, with zero for saved + // regs and nonzero for local vars? + if ((LocalVarsSize == 0) || (CalleeSavedRegsSize != 0)) + return false; + + // Iterate through the first basic block in the function. If we find + // a frame allocating Instr in it, then we have local vars. If not, + // we don't, and LocalVarsSize should have been zero, not + // CalleeSavedRegsSize, so swap them. + SMPBasicBlock CurrBlock = this->Blocks.front(); + for (list<SMPInstr>::iterator CurrInstr = CurrBlock.GetFirstInstr(); + CurrInstr != CurrBlock.GetLastInstr(); // LastInstr is jump anyway + ++CurrInstr) { + if (CurrInstr->MDIsFrameAllocInstr(LocalVarsSize)) + return false; + } + // We did not find a stack frame allocation instruction in the first + // basic block, yet CalleeSavedRegsSize is zero and LocalVarsSize + // is not zero. Swap them. + this->CalleeSavedRegsSize = (ushort) this->LocalVarsSize; + this->LocalVarsSize = 0; + return true; +} // end of SMPFunction::MDFixFrameInfo() + +// Emit the annotations describing the regions of the stack frame. +void SMPFunction::EmitStackFrameAnnotations(FILE *AnnotFile, list<SMPInstr>::iterator Instr) { + ea_t addr = Instr->GetAddr(); + + if (0 < IncomingArgsSize) + qfprintf(AnnotFile, "%x %d INARGS STACK esp + %d %s \n", + addr, IncomingArgsSize, + (LocalVarsSize + CalleeSavedRegsSize + RetAddrSize), + Instr->GetDisasm()); + if (0 < RetAddrSize) + qfprintf(AnnotFile, "%x %d MEMORYHOLE STACK esp + %d ReturnAddress \n", + addr, RetAddrSize, (LocalVarsSize + CalleeSavedRegsSize)); + if (0 < CalleeSavedRegsSize) + qfprintf(AnnotFile, "%x %d MEMORYHOLE STACK esp + %d CalleeSavedRegs \n", + addr, CalleeSavedRegsSize, LocalVarsSize); + if (0 < LocalVarsSize) + qfprintf(AnnotFile, "%x %d LOCALFRAME STACK esp + %d LocalVars \n", + addr, LocalVarsSize, 0); + return; +} // end of SMPFunction::EmitStackFrameAnnotations() + +// Main data flow analysis driver. Goes through the function and +// fills all objects for instructions, basic blocks, and the function +// itself. +void SMPFunction::Analyze(void) { + list<SMPInstr>::iterator FirstInBlock = this->Instrs.end(); + // For starting a basic block + list<SMPInstr>::iterator LastInBlock = this->Instrs.end(); + // Terminating a basic block + +#if SMP_DEBUG_CONTROLFLOW + msg("Entering SMPFunction::Analyze.\n"); +#endif + + // Get some basic info from the FuncInfo structure. + this->Size = this->FuncInfo->endEA - this->FuncInfo->startEA; + this->UseFP = (0 != (this->FuncInfo->flags & (FUNC_FRAME | FUNC_BOTTOMBP))); + this->StaticFunc = (0 != (this->FuncInfo->flags & FUNC_STATIC)); + get_func_name(this->FuncInfo->startEA, this->FuncName, + sizeof(this->FuncName) - 1); + +#if SMP_DEBUG_CONTROLFLOW + msg("SMPFunction::Analyze: got basic info.\n"); +#endif + + // Cycle through all chunks that belong to the function. + func_tail_iterator_t FuncTail(this->FuncInfo); + size_t ChunkCounter = 0; + for (bool ChunkOK = FuncTail.main(); ChunkOK; ChunkOK = FuncTail.next()) { + const area_t &CurrChunk = FuncTail.chunk(); + ++ChunkCounter; +#if SMP_DEBUG_CHUNKS + if (1 < ChunkCounter) + msg("Found tail chunk for %s\n", this->FuncName); +#endif + // Build the instruction and block lists for the function. + for (ea_t addr = CurrChunk.startEA; addr < CurrChunk.endEA; + addr = get_item_end(addr)) { + flags_t InstrFlags = getFlags(addr); + if (isHead(InstrFlags) && isCode(InstrFlags)) { + SMPInstr CurrInst = SMPInstr(addr); + // Fill in the instruction data members. + #if SMP_DEBUG_CONTROLFLOW + msg("SMPFunction::Analyze: calling CurrInst::Analyze.\n"); + #endif + CurrInst.Analyze(); + if (SMPBinaryDebug) { + msg("Disasm: %s \n", CurrInst.GetDisasm()); + } + if (CurrInst.GetDataFlowType() == INDIR_CALL) + this->IndirectCalls = true; + + // Before we insert the instruction into the instruction + // list, determine if it is a jump target that does not + // follow a basic block terminator. This is the special case + // of a CASE in a SWITCH that falls through into another + // CASE, for example. The first sequence of statements + // was not terminated by a C "break;" statement, so it + // looks like straight line code, but there is an entry + // point at the beginning of the second CASE sequence and + // we have to split basic blocks at the entry point. + if ((FirstInBlock != this->Instrs.end()) + && CurrInst.IsJumpTarget()) { + #if SMP_DEBUG_CONTROLFLOW + msg("SMPFunction::Analyze: hit special jump target case.\n"); + #endif + LastInBlock = --(this->Instrs.end()); + SMPBasicBlock CurrBlock = SMPBasicBlock(FirstInBlock, + LastInBlock); + CurrBlock.Analyze(); + // If not the first chunk in the function, it is a shared + // tail chunk. + if (ChunkCounter > 1) { + CurrBlock.SetShared(); + } + FirstInBlock = this->Instrs.end(); + LastInBlock = this->Instrs.end(); + this->Blocks.push_back(CurrBlock); + } + + #if SMP_DEBUG_CONTROLFLOW + msg("SMPFunction::Analyze: putting CurrInst on list.\n"); + #endif + // Insert instruction at end of list. + this->Instrs.push_back(CurrInst); + + // Find basic block leaders and terminators. + if (FirstInBlock == this->Instrs.end()) { + #if SMP_DEBUG_CONTROLFLOW + msg("SMPFunction::Analyze: setting FirstInBlock.\n"); + #endif + FirstInBlock = --(this->Instrs.end()); + } + if (CurrInst.IsBasicBlockTerminator()) { + #if SMP_DEBUG_CONTROLFLOW + msg("SMPFunction::Analyze: found block terminator.\n"); + #endif + LastInBlock = --(this->Instrs.end()); + SMPBasicBlock CurrBlock = SMPBasicBlock(FirstInBlock, LastInBlock); + CurrBlock.Analyze(); + // If not the first chunk in the function, it is a shared + // tail chunk. + if (ChunkCounter > 1) { + CurrBlock.SetShared(); + } + FirstInBlock = this->Instrs.end(); + LastInBlock = this->Instrs.end(); + this->Blocks.push_back(CurrBlock); + } + } // end if (isHead(InstrFlags) && isCode(InstrFlags) + } // end for (ea_t addr = FuncInfo->startEA; ... ) + + // Handle the special case in which a function does not terminate + // with a return instruction or any other basic block terminator. + // Sometimes IDA Pro sees a call to a NORET function and decides + // to not include the dead code after it in the function. That + // dead code includes the return instruction, so the function no + // longer includes a return instruction and terminates with a CALL. + if (FirstInBlock != this->Instrs.end()) { + LastInBlock = --(this->Instrs.end()); + SMPBasicBlock CurrBlock = SMPBasicBlock(FirstInBlock, LastInBlock); + CurrBlock.Analyze(); + // If not the first chunk in the function, it is a shared + // tail chunk. + if (ChunkCounter > 1) { + CurrBlock.SetShared(); + } + FirstInBlock = this->Instrs.end(); + LastInBlock = this->Instrs.end(); + this->Blocks.push_back(CurrBlock); + } + } // end for (bool ChunkOK = ...) + +#if SMP_DEBUG_CONTROLFLOW + msg("SMPFunction::Analyze: set stack frame info.\n"); +#endif + // Figure out the stack frame and related info. + this->SetStackFrameInfo(); + + return; +} // end of SMPFunction::Analyze() + +void SMPFunction::EmitAnnotations(FILE *AnnotFile) { + // Emit annotation for the function as a whole. + if (this->StaticFunc) { + qfprintf(AnnotFile, "%x %d FUNC LOCAL %s ", FuncInfo->startEA, + this->Size, this->FuncName); + } + else { + qfprintf(AnnotFile, "%x %d FUNC GLOBAL %s ", FuncInfo->startEA, + this->Size, this->FuncName); + } + if (this->UseFP) { + qfprintf(AnnotFile, "USEFP "); + } + else { + qfprintf(AnnotFile, "NOFP "); + } + if (this->FuncInfo->does_return()) { + qfprintf(AnnotFile, "\n"); + } + else { + qfprintf(AnnotFile, "NORET \n"); + } + + // Loop through all instructions in the function. + // Output optimization annotations for those + // instructions that do not require full computation + // of their memory metadata by the Memory Monitor SDT. + list<SMPInstr>::iterator CurrInst; + bool AllocSeen = false; // Reached LocalVarsAllocInstr yet? + bool DeallocTrigger = false; + for (CurrInst = Instrs.begin(); CurrInst != Instrs.end(); ++CurrInst) { + ea_t addr = CurrInst->GetAddr(); + flags_t InstrFlags = getFlags(addr); + int OptType = CurrInst->GetOptType(); + char *disasm = CurrInst->GetDisasm(); + bool MemDest = CurrInst->HasDestMemoryOperand(); + bool MemSrc = CurrInst->HasSourceMemoryOperand(); + bool SecondSrcOperandNum = CurrInst->IsSecondSrcOperandNumeric(InstrFlags); + + ++OptCount[OptType]; // keep count for debugging info + + // If this is the instruction which allocated space + // for local variables, we want to emit annotations + // describing the stack frame. + if (addr == LocalVarsAllocInstr) { + EmitStackFrameAnnotations(AnnotFile, CurrInst); + AllocSeen = true; + } + + // If this is the instruction which deallocated space + // for local variables, we set a flag to remind us to + // emit an annotation on the next instruction. + // mmStrata wants the instruction AFTER the + // deallocating instruction, so that it processes + // the deallocation after it happens. It inserts + // instrumentation before an instruction, not + // after, so it will insert the deallocating + // instrumentation before the first POP of callee-saved regs, + // if there are any, or before the return, otherwise. + if (addr == LocalVarsDeallocInstr) { + DeallocTrigger = true; + } + else if (DeallocTrigger) { // Time for annotation + qfprintf(AnnotFile, "%x %d DEALLOC STACK esp - %d %s\n", addr, + LocalVarsSize, LocalVarsSize, disasm); + DeallocTrigger = false; + } + +#if SMP_DEBUG_MEM + if (MemDest || MemSrc) { + msg("OptType: %d %s", OptType, disasm); + CurrInst->PrintOperands(); + } +#endif + + // Emit appropriate optimization annotations. + bool SDTInstrumentation = false; + switch (OptType) { + case 0: // SDT will have to handle these + { +#if SMP_DEBUG_TYPE0 + msg("OptType 0: %x %s\n", addr, disasm); +#endif + // mmStrata wants to suppress warnings on the PUSH + // instructions that precede the LocalVarsAllocInstr + // (i.e. the PUSHes of callee-saved regs). + if (!AllocSeen && CurrInst->MDIsPushInstr()) { + qfprintf(AnnotFile, "%x %d INSTR LOCAL NoWarn %s \n", + addr, -3, disasm); + } + else { + SDTInstrumentation = true; + } + break; + } + + case 1: // nothing for SDT to do + { qfprintf(AnnotFile, "%x %d INSTR LOCAL NoMetaUpdate %s \n", + addr, -1, disasm); + ++AnnotationCount[OptType]; + break; + } + + case 4: // INC, DEC, etc.: no SDT work unless MemDest + { if (MemDest || MemSrc) { + SDTInstrumentation = true; + break; // treat as category 0 + } + qfprintf(AnnotFile, "%x %d INSTR LOCAL Always1stSrc %s \n", + addr, -1, disasm); + ++AnnotationCount[OptType]; + break; + } + + case 5: // ADD, etc.: If numeric 2nd src operand, no SDT work. + { if (MemDest || MemSrc) { + SDTInstrumentation = true; + break; // treat as category 0 + } + if (SecondSrcOperandNum) { // treat as category 1 + qfprintf(AnnotFile, "%x %d INSTR LOCAL %s %s \n", + addr, -1, OptExplanation[OptType], disasm); + ++AnnotationCount[OptType]; + } + break; + } + + case 6: // Only OS code should include these; problem for SDT + { if (MemDest) { + SDTInstrumentation = true; + break; // treat as category 0 + } + qfprintf(AnnotFile, "%x %d INSTR LOCAL AlwaysPTR %s \n", + addr, -OptType, disasm); + ++AnnotationCount[OptType]; + break; + } + + case 8: // Implicitly writes to EDX:EAX, always numeric. + { qfprintf(AnnotFile, "%x %d INSTR LOCAL n EDX EAX ZZ %s %s \n", + addr, -2, OptExplanation[OptType], disasm); + ++AnnotationCount[OptType]; + SDTInstrumentation = true; + break; + } + + case 9: // Either writes to FP reg (cat. 1) or memory (cat. 0) + { if (MemDest) { +#if SMP_DEBUG + // MemDest seems to happen too much. + msg("Floating point MemDest: %s \n", disasm); +#endif + SDTInstrumentation = true; + break; // treat as category 0 + } + qfprintf(AnnotFile, "%x %d INSTR LOCAL %s %s \n", + addr, -1, OptExplanation[OptType], disasm); + ++AnnotationCount[OptType]; + break; + } + + default: // 2,3,7: Optimization possibilities depend on operands + { +#if SMP_DEBUG2 + if (OptType == 3) { // MOV instr class + if (MemDest) { + msg("MemDest on MOV: %s\n", disasm); + } + else if (!SecondSrcOperandNum) { + msg("MOV: not 2nd op numeric: %s\n", disasm); + CurrInst->PrintOperands(); + } + } +#endif + SDTInstrumentation = true; + if (MemDest) { +#if SMP_DEBUG_XOR + if (OptType == 2) + msg("MemDest on OptType 2: %s\n", disasm); +#endif + break; // treat as category 0 + } + if ((OptType == 2) || (OptType == 7) || SecondSrcOperandNum) { + qfprintf(AnnotFile, "%x %d INSTR LOCAL n %s %s %s \n", + addr, -2, CurrInst->DestString(OptType), + OptExplanation[OptType], disasm); + ++AnnotationCount[OptType]; + } + break; + } + } // end switch (OptType) + // If mmStrata is going to have to deal with the + // instruction, then we can annotate EBP and ESP + // relative constant offsets. If we have emitted + // an annotation of type -1, there is no point + // in telling mmStrata about these constants. + if (SDTInstrumentation) { + CurrInst->AnnotateStackConstants(UseFP, AnnotFile); + } + } // end for (ea_t addr = FuncInfo->startEA; ...) + return; +} + +// Initialize the DFACategory[] array to define instruction classes +// for the purposes of data flow analysis. +void InitDFACategory(void) { + // Default category is 0, not the start or end of a basic block. + (void) memset(DFACategory, 0, sizeof(DFACategory)); + +DFACategory[NN_call] = CALL; // Call Procedure +DFACategory[NN_callfi] = INDIR_CALL; // Indirect Call Far Procedure +DFACategory[NN_callni] = INDIR_CALL; // Indirect Call Near Procedure + +DFACategory[NN_hlt] = HALT; // Halt + +DFACategory[NN_int] = CALL; // Call to Interrupt Procedure +DFACategory[NN_into] = CALL; // Call to Interrupt Procedure if Overflow Flag = 1 +DFACategory[NN_int3] = CALL; // Trap to Debugger +DFACategory[NN_iretw] = RETURN; // Interrupt Return +DFACategory[NN_iret] = RETURN; // Interrupt Return +DFACategory[NN_iretd] = RETURN; // Interrupt Return (use32) +DFACategory[NN_iretq] = RETURN; // Interrupt Return (use64) +DFACategory[NN_ja] = COND_BRANCH; // Jump if Above (CF=0 & ZF=0) +DFACategory[NN_jae] = COND_BRANCH; // Jump if Above or Equal (CF=0) +DFACategory[NN_jb] = COND_BRANCH; // Jump if Below (CF=1) +DFACategory[NN_jbe] = COND_BRANCH; // Jump if Below or Equal (CF=1 | ZF=1) +DFACategory[NN_jc] = COND_BRANCH; // Jump if Carry (CF=1) +DFACategory[NN_jcxz] = COND_BRANCH; // Jump if CX is 0 +DFACategory[NN_jecxz] = COND_BRANCH; // Jump if ECX is 0 +DFACategory[NN_jrcxz] = COND_BRANCH; // Jump if RCX is 0 +DFACategory[NN_je] = COND_BRANCH; // Jump if Equal (ZF=1) +DFACategory[NN_jg] = COND_BRANCH; // Jump if Greater (ZF=0 & SF=OF) +DFACategory[NN_jge] = COND_BRANCH; // Jump if Greater or Equal (SF=OF) +DFACategory[NN_jl] = COND_BRANCH; // Jump if Less (SF!=OF) +DFACategory[NN_jle] = COND_BRANCH; // Jump if Less or Equal (ZF=1 | SF!=OF) +DFACategory[NN_jna] = COND_BRANCH; // Jump if Not Above (CF=1 | ZF=1) +DFACategory[NN_jnae] = COND_BRANCH; // Jump if Not Above or Equal (CF=1) +DFACategory[NN_jnb] = COND_BRANCH; // Jump if Not Below (CF=0) +DFACategory[NN_jnbe] = COND_BRANCH; // Jump if Not Below or Equal (CF=0 & ZF=0) +DFACategory[NN_jnc] = COND_BRANCH; // Jump if Not Carry (CF=0) +DFACategory[NN_jne] = COND_BRANCH; // Jump if Not Equal (ZF=0) +DFACategory[NN_jng] = COND_BRANCH; // Jump if Not Greater (ZF=1 | SF!=OF) +DFACategory[NN_jnge] = COND_BRANCH; // Jump if Not Greater or Equal (ZF=1) +DFACategory[NN_jnl] = COND_BRANCH; // Jump if Not Less (SF=OF) +DFACategory[NN_jnle] = COND_BRANCH; // Jump if Not Less or Equal (ZF=0 & SF=OF) +DFACategory[NN_jno] = COND_BRANCH; // Jump if Not Overflow (OF=0) +DFACategory[NN_jnp] = COND_BRANCH; // Jump if Not Parity (PF=0) +DFACategory[NN_jns] = COND_BRANCH; // Jump if Not Sign (SF=0) +DFACategory[NN_jnz] = COND_BRANCH; // Jump if Not Zero (ZF=0) +DFACategory[NN_jo] = COND_BRANCH; // Jump if Overflow (OF=1) +DFACategory[NN_jp] = COND_BRANCH; // Jump if Parity (PF=1) +DFACategory[NN_jpe] = COND_BRANCH; // Jump if Parity Even (PF=1) +DFACategory[NN_jpo] = COND_BRANCH; // Jump if Parity Odd (PF=0) +DFACategory[NN_js] = COND_BRANCH; // Jump if Sign (SF=1) +DFACategory[NN_jz] = COND_BRANCH; // Jump if Zero (ZF=1) +DFACategory[NN_jmp] = JUMP; // Jump +DFACategory[NN_jmpfi] = INDIR_JUMP; // Indirect Far Jump +DFACategory[NN_jmpni] = INDIR_JUMP; // Indirect Near Jump +DFACategory[NN_jmpshort] = JUMP; // Jump Short (only in 64-bit mode) + +DFACategory[NN_loopw] = COND_BRANCH; // Loop while ECX != 0 +DFACategory[NN_loop] = COND_BRANCH; // Loop while CX != 0 +DFACategory[NN_loopd] = COND_BRANCH; // Loop while ECX != 0 +DFACategory[NN_loopq] = COND_BRANCH; // Loop while RCX != 0 +DFACategory[NN_loopwe] = COND_BRANCH; // Loop while CX != 0 and ZF=1 +DFACategory[NN_loope] = COND_BRANCH; // Loop while rCX != 0 and ZF=1 +DFACategory[NN_loopde] = COND_BRANCH; // Loop while ECX != 0 and ZF=1 +DFACategory[NN_loopqe] = COND_BRANCH; // Loop while RCX != 0 and ZF=1 +DFACategory[NN_loopwne] = COND_BRANCH; // Loop while CX != 0 and ZF=0 +DFACategory[NN_loopne] = COND_BRANCH; // Loop while rCX != 0 and ZF=0 +DFACategory[NN_loopdne] = COND_BRANCH; // Loop while ECX != 0 and ZF=0 +DFACategory[NN_loopqne] = COND_BRANCH; // Loop while RCX != 0 and ZF=0 + +DFACategory[NN_retn] = RETURN; // Return Near from Procedure +DFACategory[NN_retf] = RETURN; // Return Far from Procedure + +// +// Pentium instructions +// + +DFACategory[NN_rsm] = HALT; // Resume from System Management Mode + +// Pentium II instructions + +DFACategory[NN_sysenter] = CALL; // Fast Transition to System Call Entry Point +DFACategory[NN_sysexit] = CALL; // Fast Transition from System Call Entry Point + +// AMD syscall/sysret instructions NOTE: not AMD, found in Intel manual + +DFACategory[NN_syscall] = CALL; // Low latency system call +DFACategory[NN_sysret] = CALL; // Return from system call + +// VMX instructions + +DFACategory[NN_vmcall] = INDIR_CALL; // Call to VM Monitor + + return; + +} // end InitDFACategory() diff --git a/SMPDataFlowAnalysis.h b/SMPDataFlowAnalysis.h new file mode 100644 index 00000000..318c861d --- /dev/null +++ b/SMPDataFlowAnalysis.h @@ -0,0 +1,181 @@ +#ifndef SMPDATAFLOWANALYSIS_H +#define SMPDATAFLOWANALYSIS_H 1 + +// SMPDataFlowAnalysis.h +// +// This header defines the interfaces needed for analyzing instructions, +// basic blocks, and functions, performing live variable analysis, +// putting code into SSA form, etc. + +#include <list> +#include <vector> + +#include <cstddef> + +#include <pro.h> +#include <ida.hpp> +#include <ua.hpp> +#include <intel.hpp> + +using namespace std; + +class SMPFunction; +class SMPBasicBlock; +class SMPInstr; +class DefOrUseList; + +// SMP will operate on a doubly linked list of instructions, which +// will be grouped into basic blocks. We will include info about each +// instruction that can not all be obtained easily in IDA Pro, and +// is not all obtained easily from one place in IDA Pro. This +// information will include all that we need to perform data flow +// analyses. Because the LABEL and CASE could be applied to other +// kinds of instructions during analysis, we define the enum values +// as non-overlapping bits. This type might be stored as an int +// in the future as a result of this design decision. +enum SMPitype { + DEFAULT = 0, // most instructions in middle of basic block + LABEL = 1, // instr also has a label (can be jump target) + CASE = 2, // instr also has case label (is inside switch) + JUMP = 4, // unconditional branch + COND_BRANCH = 8, // conditional branch + INDIR_JUMP = 16, // indirect unconditional branch, + CALL = 32, // direct call + INDIR_CALL = 64, // indirect call + RETURN = 128, // function return + HALT = 256 // execution stops +}; + +// A primary goal of data flow analysis in SMP will be to identify +// operands as numeric or pointer type. If an instruction definitely +// produces a numeric result, an annotation could inform the mmStrata +// run time monitor to simply record 'numeric' for the result register +// and otherwise do nothing. This could greatly cut down the overhead +// at run time. If the result register already has an mmStrata metadata +// type of numeric, then the annotation could tell mmStrata to do nothing +// at all, which is even better. In order to emit these annotations, we +// will track all operands in a function as to their basic SMP types. +enum SMPOperandType { // What type is a given register or memory operand? + UNINIT, // Operand type not yet analyzed + NUMERIC, // Definitely holds non-pointer value (char, int, etc.) + POINTER, // Definitely holds an address + UNKNOWN // Might hold an address, might not (Bad!) +}; + +// Same class is used for both a DEF list and a USE list. +class DefOrUseList { +public: + DefOrUseList(void); + void SetRef(op_t Ref, SMPOperandType Type = UNINIT); + op_t GetRef(size_t index) const; + SMPOperandType GetRefType(size_t index) const; + inline size_t GetSize(void) const { return (size_t) Refs.size(); }; +private: + vector<op_t> Refs; // Defined or used operand + vector<SMPOperandType> Types; // indices coupled to Refs +}; // end class DefOrUseList + +class SMPInstr { +public: + // Default constructor + SMPInstr(ea_t addr); + // Fill in basic data for instruction. + void Analyze(void); + inline ea_t GetAddr(void) const { return address; }; + inline char *GetDisasm(void) { return disasm; }; + int operator==(const SMPInstr &rhs) const; + int operator<(const SMPInstr &rhs) const; + int operator!=(const SMPInstr &rhs) const; + inline insn_t GetCmd(void) const { return SMPcmd; }; + inline void SetCmd(insn_t cmd) { SMPcmd = cmd; return; }; + // Does instruction write to memory? + bool HasDestMemoryOperand(void) const; + // Does instruction read from memory? + bool HasSourceMemoryOperand(void) const; + bool IsSecondSrcOperandNumeric(flags_t F) const; + bool IsBasicBlockTerminator(void) const; + inline bool IsJumpTarget(void) const { return JumpTarget; }; + bool MDIsPushInstr(void) const; + bool MDIsPopInstr(void) const; + bool MDIsReturnInstr(void) const; + bool MDIsFrameAllocInstr(asize_t LocSize) const; + bool MDIsFrameDeallocInstr(bool UseFP, asize_t LocSize) const; + int GetOptType(void) const; + inline SMPitype GetDataFlowType(void) const { return type; }; + void PrintOperands(void) const; + char *DestString(int OptType); + void AnnotateStackConstants(bool UseFP, FILE *AnnotFile); +private: + SMPitype type; // Data flow analysis category + int OptType; // Optimization category (see OptCategory[]) + ea_t address; // Code address for 1st byte of instruction + bool analyzed; // Has instr been analyzed yet, setting type + // and DEF and USE lists? + bool JumpTarget; // Is Instr the target of any jumps or branches? + insn_t SMPcmd; // copy of 'cmd' for this instruction + ulong features; // Canonical features for SMPcmd + char disasm[MAXSTR]; // Disassembly text of instruction + DefOrUseList Defs; // Definitions list + DefOrUseList Uses; // Uses list + void BuildSMPDefUseLists(void); // Build DEF and USE lists for instruction + void MDFixupDefUseLists(void); // Machine-dependent ad hoc fixes +}; // end class SMPInstr + +// Class defining basic blocks. +class SMPBasicBlock { +public: + SMPBasicBlock(list<SMPInstr>::iterator FirstInstr, list<SMPInstr>::iterator LastInstr); + void LinkToPred(SMPBasicBlock *Predecessor); + inline bool HasIndirectJump(void) const { return IndirectJump; }; + inline bool HasReturn(void) const { return Returns; }; + inline bool IsShared(void) const { return SharedTailChunk; }; + inline void SetShared(void) { SharedTailChunk = true; }; + inline list<SMPInstr>::iterator GetFirstInstr(void) const { return FirstInstr; }; + inline list<SMPInstr>::iterator GetLastInstr(void) const { return LastInstr; }; + void Analyze(); +private: + list<SMPInstr>::iterator FirstInstr; + list<SMPInstr>::iterator LastInstr; + list<SMPBasicBlock> Predecessors; + list<SMPBasicBlock> Successors; + bool IndirectJump; // contains an indirect jump instruction + bool Returns; // contains a return instruction + bool SharedTailChunk; // is part of a code chunk shared among functions +}; + +// Class encapsulating all that the SMP static analyzer cares to know +// about a function. +class SMPFunction { +public: + SMPFunction(func_t *Info); // Default constructor + void Analyze(void); // Analyze all instructions in function + void EmitAnnotations(FILE *AnnotFile); + inline bool HasIndirectCalls(void) const { return IndirectCalls; }; + inline const char *GetFuncName(void) const { return FuncName; }; +private: + func_t *FuncInfo; + list<SMPInstr> Instrs; + list<SMPBasicBlock> Blocks; + bool UseFP; // Does function use a frame pointer? + bool StaticFunc; // Is function declared static? + bool IndirectCalls; // Does function make indirect calls? + char FuncName[MAXSTR]; + size_t Size; // Function size in code bytes + asize_t LocalVarsSize; // size of local vars region of stack frame + ushort CalleeSavedRegsSize; // stack size of callee pushed regs + int RetAddrSize; // size of return address on stack (4 for most machines) + asize_t IncomingArgsSize; // size of incoming args on stack + ea_t LocalVarsAllocInstr; // address of instr that allocates stack frame + ea_t LocalVarsDeallocInstr; // address of epilogue instr that deallocs frame + void SetStackFrameInfo(void); + bool MDFixFrameInfo(void); + void EmitStackFrameAnnotations(FILE *AnnotFile, list<SMPInstr>::iterator Instr); +}; // end class SMPFunction + +// Initialization routine for DFA category. +void InitDFACategory(void); + +bool IsDestMemoryOperand(insn_t); +bool IsSrcMemoryOperand(insn_t); + +#endif diff --git a/SMPStaticAnalyzer.h b/SMPStaticAnalyzer.h new file mode 100644 index 00000000..3d5f6d44 --- /dev/null +++ b/SMPStaticAnalyzer.h @@ -0,0 +1,15 @@ +#ifndef SMPSTATICANALYZER_H +#define SMPSTATICANALYZER_H 1 + +#include <intel.hpp> + +#define LAST_OPT_CATEGORY 9 + +extern int OptCategory[]; +// Keep statistics on how many instructions we saw in each optimization +// category, and how many optimizing annotations were emitted for +// each category. +extern int OptCount[LAST_OPT_CATEGORY + 1]; +extern int AnnotationCount[LAST_OPT_CATEGORY + 1]; + +#endif -- GitLab