Skip to content
Snippets Groups Projects
SMPStaticAnalyzer.cpp 140 KiB
Newer Older
jdh8d's avatar
jdh8d committed
/*
 * SMPStaticAnalyzer.cpp - <see below>.
 *
 * Copyright (c) 2000, 2001, 2010 - University of Virginia 
 *
 * This file is part of the Memory Error Detection System (MEDS) infrastructure.
 * This file may be used and modified for non-commercial purposes as long as 
 * all copyright, permission, and nonwarranty notices are preserved.  
 * Redistribution is prohibited without prior written consent from the University 
 * of Virginia.
 *
 * Please contact the authors for restrictions applying to commercial use.
 *
 * THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
 * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 *
 * Author: University of Virginia
 * e-mail: jwd@virginia.com
 * URL   : http://www.cs.virginia.edu/
 *
 * Additional copyrights 2010, 2011 by Zephyr Software LLC
 * e-mail: {clc,jwd}@zephyr-software.com
 * URL   : http://www.zephyr-software.com/
jdh8d's avatar
jdh8d committed
 */

//
// SMPStaticAnalyzer.cpp
//
// This plugin performs the static analyses needed for the SMP project
//   (Software Memory Protection).
//

#include <set>
#include <pro.h>
#include <ida.hpp>
#include <idp.hpp>
#include <allins.hpp>
#include <auto.hpp>
#include <bytes.hpp>
#include <funcs.hpp>
#include <intel.hpp>
#include <loader.hpp>
#include <lines.hpp>
#include <name.hpp>
#include <ua.hpp>

#include "SMPStaticAnalyzer.h"
#include "SMPDataFlowAnalysis.h"
#include "SMPFunction.h"
#include "SMPInstr.h"
#include "ProfilerInformation.h"


// Set to 1 for debugging output
#define SMP_DEBUG 1
#define SMP_DEBUG3 0   // verbose
#define SMP_DEBUG_MEM 0 // print memory operands
#define SMP_DEBUG_TYPE0 0 // Output instr info for OptType = 0
#define SMP_DEBUG_CHUNKS 0 // restructuring tail chunks, shared chunks, etc.
#define SMP_DEBUG_DATA_ONLY 0  // Find & fix data addresses in code segments

// Set to 1 when doing a binary search using SMP_DEBUG_COUNT to find
//  which function is causing a problem.
#define SMP_BINARY_DEBUG 0
#define SMP_DEBUG_COUNT 356  // How many funcs to process in problem search
int FuncsProcessed = 0;

#define SMP_FIXUP_IDB 0  // Try to fix the IDA database?
clc5q's avatar
clc5q committed
#define SMP_DEBUG_FIXUP_IDB 0  // debugging output for FixupIDB chain
#define SMP_FIND_ORPHANS 1  // find code outside of functions
#define SMP_DEBUG_CODE_ORPHANS 1 // Detect whether we are causing code to be orphaned

#if SMP_DEBUG_CODE_ORPHANS
set<ea_t> CodeOrphans;
#endif
// Lock prefix for x86 code; jumping around this prefix conditionally looks like jumping
//  into the middle of an instruction to IDA Pro, causing it to not collect instructions
//  into a procedure. We replace these bytes with no-op opcodes because none of our analyses
//  care about LOCK prefices. We store the addresses where we have done the replacement in a 
//  set in case we ever care.
#define X86_LOCK_PREFIX 0xF0
set<ea_t> LockPreficesRemoved; // Addresses where x86 LOCK prefix byte was turned into a no-op by STARS_custom_ana() callback.
static unsigned long CustomAnaCallCount = 0;

// Define optimization categories for instructions.
int OptCategory[NN_last + 1];
// Initialize the OptCategory[] array.
void InitOptCategory(void);

// Record which opcodes change the stack pointer, and by how many
//  bytes up (reduction in stack size for stacks that grow downward)
//  or down (increase in stack size for stacks that grow downward).
sval_t StackAlteration[NN_last + 1];
// Initialize the StackAlteration[] array.
void InitStackAlteration(void);

// Keep statistics on how many instructions we saw in each optimization
//  category, and how many optimizing annotations were emitted for
//  each category.
int OptCount[LAST_OPT_CATEGORY + 1];
int AnnotationCount[LAST_OPT_CATEGORY + 1];

// Unique data referent number to use in data annotations.
unsigned long DataReferentID;

// Debugging counters for analyzing memory usage.
unsigned long UnusedInstrCount;
unsigned long UnusedBlockCount;
unsigned long UnusedStructCount;
unsigned long UnusedIntCount;

#if SMP_COUNT_MEMORY_ALLOCATIONS
// Counters for analyzing memory use for allocated and used objects.
unsigned long SMPInstCount;
unsigned long SMPBlockCount;
unsigned long SMPFuncCount;
unsigned long SMPGlobalVarCount;
unsigned long SMPLocalVarCount;
unsigned long SMPDefUseChainCount;
unsigned long SMPInstBytes;
unsigned long SMPDefUseChainBytes;
#if SMP_MEASURE_NUMERIC_ANNOTATIONS
unsigned long NumericAnnotationsCount12; // cases 1 and 2
unsigned long NumericAnnotationsCount3;  // case 3
unsigned long TruncationAnnotationsCount; // case 4
unsigned long SignednessWithoutTruncationCount; // case 5
unsigned long LeaInstOverflowCount; // case 6
unsigned long WidthDoublingTruncationCount; // case 7
unsigned long BenignOverflowInstCount;
unsigned long BenignOverflowDefCount;
unsigned long SuppressStackPtrOverflowCount;
unsigned long SuppressLiveFlagsOverflowCount;
unsigned long LiveMultiplyBitsCount;
unsigned long BenignTruncationCount;
unsigned long SuppressTruncationRegPiecesAllUsed;
unsigned long SuppressSignednessOnTruncation;
#endif
// The types of data objects based on their first operand flags.
const char *DataTypes[] = { "VOID", "NUMHEX", "NUMDEC", "CHAR",
		"SEG", "OFFSET", "NUMBIN", "NUMOCT", "ENUM", "FORCED", 
		"STRUCTOFFSET", "STACKVAR", "NUMFLOAT", "UNKNOWN", 
		"UNKNOWN", "UNKNOWN", 0};

// Filename (not including path) of executable being analyzed.
static char RootFileName[MAXSTR];

// strings for printing ZST_SysCallType
const char *CallTypeNames[4] = { "Unrestricted", "High-Privilege", "File-Access", "Network-Access" };

clc5q's avatar
clc5q committed
// Operand type that can have all fields initialized to o_void and zero
//  values, to be used to copy-initialize operands that we are adding to
//  RTLs and DEF and USE lists.
op_t InitOp; 

// File foo.exe.alarms for Zephyr Security Toolkit security alarm messages.
FILE *ZST_AlarmFile;

// Code addresses identified by a disassembler, such as objdump on
//  Linux. These can be used to improve the code vs. data identification
//  of IDA Pro.
vector<ea_t> DisasmLocs;
// Code addresses as identified by IDA Pro, to be compared to DisasmLocs.
vector<ea_t> IDAProLocs;

// Function start and end addresses (for function entry chunks only).
//  Kept here because IDA Pro 5.1 seems to have a memory overwriting
//  problem when iterating through all functions in the program. An existing
//  func_t *ChunkInfo data structure was getting overwritten by one of the 
//  function func_t data structures, causing changes of startEA and endEA among
//  other things.
struct SMP_bounds_t {
	ea_t startEA;
	ea_t endEA;
};

vector<SMP_bounds_t> FuncBounds;

// List of functions that need to be reanalyzed after all the code fixup
//  and code discovery is complete. Kept as a list of addresses; any address
//  within the function is good enough to designate it.
list<ea_t> FuncReanalyzeList;

// A code region that has been converted from data but has code addresses that
Loading
Loading full blame...