Newer
Older
/*
* SMPStaticAnalyzer.cpp - <see below>.
*
* Copyright (c) 2000, 2001, 2010 - University of Virginia
*
* This file is part of the Memory Error Detection System (MEDS) infrastructure.
* This file may be used and modified for non-commercial purposes as long as
* all copyright, permission, and nonwarranty notices are preserved.
* Redistribution is prohibited without prior written consent from the University
* of Virginia.
*
* Please contact the authors for restrictions applying to commercial use.
*
* THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*
* Author: University of Virginia
* e-mail: jwd@virginia.com
* URL : http://www.cs.virginia.edu/
*
* Additional copyrights 2010, 2011 by Zephyr Software LLC
* e-mail: {clc,jwd}@zephyr-software.com
* URL : http://www.zephyr-software.com/
//
// SMPStaticAnalyzer.cpp
//
// This plugin performs the static analyses needed for the SMP project
// (Software Memory Protection).
//
using namespace std;
clc5q
committed
#include <list>
#include <vector>
clc5q
committed
#include <string>
#include <ida.hpp>
#include <idp.hpp>
#include <allins.hpp>
#include <auto.hpp>
#include <bytes.hpp>
#include <funcs.hpp>
#include <intel.hpp>
#include <loader.hpp>
#include <lines.hpp>
clc5q
committed
#include <nalt.hpp>
#include <name.hpp>
#include <ua.hpp>
#include "SMPStaticAnalyzer.h"
clc5q
committed
#include "SMPDBInterface.h"
#include "SMPDataFlowAnalysis.h"
clc5q
committed
#include "SMPProgram.h"
#include "SMPFunction.h"
#include "SMPInstr.h"
#include "ProfilerInformation.h"
// Set to 1 for debugging output
#define SMP_DEBUG 1
clc5q
committed
#define SMP_DEBUG2 0 // verbose
#define SMP_DEBUG3 0 // verbose
#define SMP_DEBUG_MEM 0 // print memory operands
#define SMP_DEBUG_TYPE0 0 // Output instr info for OptType = 0
clc5q
committed
#define SMP_DEBUG_CHUNKS 0 // restructuring tail chunks, shared chunks, etc.
#define SMP_DEBUG_DATA_ONLY 0 // Find & fix data addresses in code segments
// Set to 1 when doing a binary search using SMP_DEBUG_COUNT to find
// which function is causing a problem.
#define SMP_BINARY_DEBUG 0
#define SMP_DEBUG_COUNT 356 // How many funcs to process in problem search
int FuncsProcessed = 0;
#define SMP_FIXUP_IDB 0 // Try to fix the IDA database?
#define SMP_DEBUG_FIXUP_IDB 0 // debugging output for FixupIDB chain
#define SMP_FIND_ORPHANS 1 // find code outside of functions
#define SMP_DEBUG_CODE_ORPHANS 1 // Detect whether we are causing code to be orphaned
#if SMP_DEBUG_CODE_ORPHANS
set<ea_t> CodeOrphans;
#endif
// Lock prefix for x86 code; jumping around this prefix conditionally looks like jumping
// into the middle of an instruction to IDA Pro, causing it to not collect instructions
// into a procedure. We replace these bytes with no-op opcodes because none of our analyses
// care about LOCK prefices. We store the addresses where we have done the replacement in a
// set in case we ever care.
#define X86_LOCK_PREFIX 0xF0
set<ea_t> LockPreficesRemoved; // Addresses where x86 LOCK prefix byte was turned into a no-op by STARS_custom_ana() callback.
static unsigned long CustomAnaCallCount = 0;
// Define optimization categories for instructions.
int OptCategory[NN_last + 1];
// Initialize the OptCategory[] array.
void InitOptCategory(void);
// Record which opcodes change the stack pointer, and by how many
// bytes up (reduction in stack size for stacks that grow downward)
// or down (increase in stack size for stacks that grow downward).
sval_t StackAlteration[NN_last + 1];
// Initialize the StackAlteration[] array.
void InitStackAlteration(void);
// Keep statistics on how many instructions we saw in each optimization
// category, and how many optimizing annotations were emitted for
// each category.
int OptCount[LAST_OPT_CATEGORY + 1];
int AnnotationCount[LAST_OPT_CATEGORY + 1];
// Unique data referent number to use in data annotations.
unsigned long DataReferentID;
// Debugging counters for analyzing memory usage.
unsigned long UnusedInstrCount;
unsigned long UnusedBlockCount;
unsigned long UnusedStructCount;
unsigned long UnusedIntCount;
#if SMP_COUNT_MEMORY_ALLOCATIONS
// Counters for analyzing memory use for allocated and used objects.
unsigned long SMPInstCount;
unsigned long SMPBlockCount;
unsigned long SMPFuncCount;
unsigned long SMPGlobalVarCount;
unsigned long SMPLocalVarCount;
unsigned long SMPDefUseChainCount;
unsigned long SMPInstBytes;
unsigned long SMPDefUseChainBytes;
// The types of data objects based on their first operand flags.
const char *DataTypes[] = { "VOID", "NUMHEX", "NUMDEC", "CHAR",
"SEG", "OFFSET", "NUMBIN", "NUMOCT", "ENUM", "FORCED",
"STRUCTOFFSET", "STACKVAR", "NUMFLOAT", "UNKNOWN",
"UNKNOWN", "UNKNOWN", 0};
clc5q
committed
// Filename (not including path) of executable being analyzed.
static char RootFileName[MAXSTR];
// strings for printing ZST_SysCallType
const char *CallTypeNames[4] = { "Unrestricted", "High-Privilege", "File-Access", "Network-Access" };
DisAsmString DisAsmText;
// Operand type that can have all fields initialized to o_void and zero
// values, to be used to copy-initialize operands that we are adding to
// RTLs and DEF and USE lists.
op_t InitOp;
// File foo.exe.alarms for Zephyr Security Toolkit security alarm messages.
FILE *ZST_AlarmFile;
clc5q
committed
// Code addresses identified by a disassembler, such as objdump on
// Linux. These can be used to improve the code vs. data identification
// of IDA Pro.
vector<ea_t> DisasmLocs;
// Code addresses as identified by IDA Pro, to be compared to DisasmLocs.
vector<ea_t> IDAProLocs;
// Function start and end addresses (for function entry chunks only).
// Kept here because IDA Pro 5.1 seems to have a memory overwriting
// problem when iterating through all functions in the program. An existing
// func_t *ChunkInfo data structure was getting overwritten by one of the
// function func_t data structures, causing changes of startEA and endEA among
// other things.
struct SMP_bounds_t {
ea_t startEA;
ea_t endEA;
};
vector<SMP_bounds_t> FuncBounds;
// List of functions that need to be reanalyzed after all the code fixup
// and code discovery is complete. Kept as a list of addresses; any address
// within the function is good enough to designate it.
list<ea_t> FuncReanalyzeList;
// A code region that has been converted from data but has code addresses that
// need to be reanalyzed. This is usually because a former data address is
// now a jump to a code target that is still a data address. We have to wait
// until the target has become code before IDA will accept the jump as valid.
class FixupRegion {
public:
FixupRegion(SMP_bounds_t);
inline ea_t GetStart(void) const { return CodeRegion.startEA; };
inline ea_t GetEnd(void) const { return CodeRegion.endEA; };
inline void SetStart(ea_t addr) { CodeRegion.startEA = addr; };
list<ea_t> FixupInstrs; // easier to expose than to encapsulate
private:
SMP_bounds_t CodeRegion;
};
FixupRegion::FixupRegion(SMP_bounds_t Range) {
this->CodeRegion = Range;
Loading
Loading full blame...