Newer
Older
/*
* SMPStaticAnalyzer.cpp - <see below>.
*
* Copyright (c) 2000, 2001, 2010 - University of Virginia
*
* This file is part of the Memory Error Detection System (MEDS) infrastructure.
* This file may be used and modified for non-commercial purposes as long as
* all copyright, permission, and nonwarranty notices are preserved.
* Redistribution is prohibited without prior written consent from the University
* of Virginia.
*
* Please contact the authors for restrictions applying to commercial use.
*
* THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*
* Author: University of Virginia
* e-mail: jwd@virginia.com
* URL : http://www.cs.virginia.edu/
*
* Additional copyrights 2010, 2011 by Zephyr Software LLC
* e-mail: {clc,jwd}@zephyr-software.com
* URL : http://www.zephyr-software.com/
//
// SMPStaticAnalyzer.cpp
//
// This plugin performs the static analyses needed for the SMP project
// (Software Memory Protection).
//
using namespace std;
clc5q
committed
#include <list>
#include <vector>
clc5q
committed
#include <string>
clc5q
committed
#include <ida.hpp>
#include <idp.hpp>
#include <auto.hpp>
#include <bytes.hpp>
#include <funcs.hpp>
#include <intel.hpp>
#include <loader.hpp>
#include <lines.hpp>
clc5q
committed
#include <nalt.hpp>
#include <name.hpp>
#include <ua.hpp>
#include "SMPStaticAnalyzer.h"
clc5q
committed
#include "SMPDBInterface.h"
#include "SMPDataFlowAnalysis.h"
clc5q
committed
#include "SMPProgram.h"
#include "SMPFunction.h"
#include "SMPInstr.h"
#include "ProfilerInformation.h"
// Set to 1 for debugging output
#define SMP_DEBUG 1
clc5q
committed
#define SMP_DEBUG2 0 // verbose
#define SMP_DEBUG3 0 // verbose
#define SMP_DEBUG_MEM 0 // print memory operands
#define SMP_DEBUG_TYPE0 0 // Output instr info for OptType = 0
clc5q
committed
#define SMP_DEBUG_CHUNKS 0 // restructuring tail chunks, shared chunks, etc.
#define SMP_DEBUG_DATA_ONLY 0 // Find & fix data addresses in code segments
// Set to 1 when doing a binary search using SMP_DEBUG_COUNT to find
// which function is causing a problem.
#define SMP_BINARY_DEBUG 0
#define SMP_DEBUG_COUNT 356 // How many funcs to process in problem search
int FuncsProcessed = 0;
#define SMP_FIXUP_IDB 0 // Try to fix the IDA database?
#define SMP_DEBUG_FIXUP_IDB 0 // debugging output for FixupIDB chain
#define SMP_FIND_ORPHANS 1 // find code outside of functions
#define SMP_DEBUG_CODE_ORPHANS 1 // Detect whether we are causing code to be orphaned
#define SMP_IDAP_RUN_DELAY 0 // Delay in IDAP_run() so we can attach debugger to process.
clc5q
committed
#define STARS_GENERATE_ASM_FILE 1 // Generate ASM file at end of processing?
#if SMP_DEBUG_CODE_ORPHANS
set<ea_t> CodeOrphans;
#endif
// Lock prefix for x86 code; jumping around this prefix conditionally looks like jumping
// into the middle of an instruction to IDA Pro, causing it to not collect instructions
// into a procedure. We replace these bytes with no-op opcodes because none of our analyses
// care about LOCK prefices. We store the addresses where we have done the replacement in a
// set in case we ever care.
#define X86_LOCK_PREFIX 0xF0
set<ea_t> LockPreficesRemoved; // Addresses where x86 LOCK prefix byte was turned into a no-op by STARS_custom_ana() callback.
static unsigned long CustomAnaCallCount = 0;
// Define optimization categories for instructions.
int OptCategory[NN_last + 1];
// Initialize the OptCategory[] array.
void InitOptCategory(void);
// Record which opcodes change the stack pointer, and by how many
// bytes up (reduction in stack size for stacks that grow downward)
// or down (increase in stack size for stacks that grow downward).
sval_t StackAlteration[NN_last + 1];
// Initialize the StackAlteration[] array.
void InitStackAlteration(void);
// Keep statistics on how many instructions we saw in each optimization
// category, and how many optimizing annotations were emitted for
// each category.
int OptCount[LAST_OPT_CATEGORY + 1];
int AnnotationCount[LAST_OPT_CATEGORY + 1];
// Unique data referent number to use in data annotations.
unsigned long DataReferentID;
// Debugging counters for analyzing memory usage.
unsigned long UnusedInstrCount;
unsigned long UnusedBlockCount;
unsigned long UnusedStructCount;
unsigned long UnusedIntCount;
// Counters for measuring SCCP success in finding constant DEFs.
unsigned long ConstantDEFCount;
unsigned long AlwaysTakenBranchCount;
unsigned long NeverTakenBranchCount;
#if SMP_COUNT_MEMORY_ALLOCATIONS
// Counters for analyzing memory use for allocated and used objects.
unsigned long SMPInstCount;
unsigned long SMPBlockCount;
unsigned long SMPFuncCount;
unsigned long SMPGlobalVarCount;
unsigned long SMPLocalVarCount;
unsigned long SMPDefUseChainCount;
unsigned long SMPInstBytes;
unsigned long SMPDefUseChainBytes;
#if SMP_MEASURE_NUMERIC_ANNOTATIONS
unsigned long NumericAnnotationsCount12; // cases 1 and 2
unsigned long NumericAnnotationsCount3; // case 3
unsigned long TruncationAnnotationsCount; // case 4
unsigned long SignednessWithoutTruncationCount; // case 5
unsigned long LeaInstOverflowCount; // case 6
unsigned long WidthDoublingTruncationCount; // case 7
unsigned long BenignOverflowInstCount;
unsigned long BenignOverflowDefCount;
unsigned long SuppressStackPtrOverflowCount;
unsigned long SuppressLiveFlagsOverflowCount;
unsigned long LiveMultiplyBitsCount;
unsigned long BenignTruncationCount;
unsigned long SuppressTruncationRegPiecesAllUsed;
unsigned long SuppressSignednessOnTruncation;
#endif
// The types of data objects based on their first operand flags.
const char *DataTypes[] = { "VOID", "NUMHEX", "NUMDEC", "CHAR",
"SEG", "OFFSET", "NUMBIN", "NUMOCT", "ENUM", "FORCED",
"STRUCTOFFSET", "STACKVAR", "NUMFLOAT", "UNKNOWN",
"UNKNOWN", "UNKNOWN", 0};
clc5q
committed
// Filename (not including path) of executable being analyzed.
static char RootFileName[MAXSTR];
// strings for printing ZST_SysCallType
const char *CallTypeNames[4] = { "Unrestricted", "High-Privilege", "File-Access", "Network-Access" };
DisAsmString DisAsmText;
// Operand type that can have all fields initialized to o_void and zero
// values, to be used to copy-initialize operands that we are adding to
// RTLs and DEF and USE lists.
op_t InitOp;
// File foo.exe.alarms for Zephyr Security Toolkit security alarm messages.
FILE *ZST_AlarmFile;
// File for code xref targets (helps ILR, makes IRDB more complete)
FILE *STARS_XrefsFile;
clc5q
committed
// Code addresses identified by a disassembler, such as objdump on
// Linux. These can be used to improve the code vs. data identification
// of IDA Pro.
vector<ea_t> DisasmLocs;
// Code addresses as identified by IDA Pro, to be compared to DisasmLocs.
vector<ea_t> IDAProLocs;
// Bit masks for extracting bits from a STARSBitSet unsigned char.
const unsigned char STARSBitMasks[8] = { 1, 2, 4, 8, 16, 32, 64, 128 };
clc5q
committed
// Function start and end addresses (for function entry chunks only).
// Kept here because IDA Pro 5.1 seems to have a memory overwriting
// problem when iterating through all functions in the program. An existing
// func_t *ChunkInfo data structure was getting overwritten by one of the
// function func_t data structures, causing changes of startEA and endEA among
// other things.
struct SMP_bounds_t {
ea_t startEA;
ea_t endEA;
};
vector<SMP_bounds_t> FuncBounds;
// List of functions that need to be reanalyzed after all the code fixup
// and code discovery is complete. Kept as a list of addresses; any address
// within the function is good enough to designate it.
list<ea_t> FuncReanalyzeList;
// A code region that has been converted from data but has code addresses that
// need to be reanalyzed. This is usually because a former data address is
// now a jump to a code target that is still a data address. We have to wait
// until the target has become code before IDA will accept the jump as valid.
class FixupRegion {
public:
FixupRegion(SMP_bounds_t);
inline ea_t GetStart(void) const { return CodeRegion.startEA; };
inline ea_t GetEnd(void) const { return CodeRegion.endEA; };
inline void SetStart(ea_t addr) { CodeRegion.startEA = addr; };
list<ea_t> FixupInstrs; // easier to expose than to encapsulate
private:
SMP_bounds_t CodeRegion;
};
FixupRegion::FixupRegion(SMP_bounds_t Range) {
this->CodeRegion = Range;
return;
}
// List of code regions that were not completely analysed because of jump to
// data considerations.
list<FixupRegion> CodeReanalyzeList;
clc5q
committed
// Map library function names to their system call type.
map<string, ZST_SysCallType> ZST_FuncTypeMap;
// Map system call types to their Zephyr Security Toolkit security policy.
map<ZST_SysCallType, ZST_Policy> ZST_TypePolicyMap;
// Set of whitelisted file locations.
set<string> ZST_FileLocWhitelist;
// Set of whitelisted network locations.
set<string> ZST_NetworkLocWhitelist;
// Set of blacklisted file locations.
set<string> ZST_FileLocBlacklist;
// Set of blacklisted network locations.
set<string> ZST_NetworkLocBlacklist;
// Set of system call names whose returned values should be trusted to have only benign numeric errors.
set<string> ZST_SystemCallNumericWhitelist;
void IDAP_run(int);
clc5q
committed
// Functions for diagnosing and/or fixing problems in the IDA database.
void FixupIDB(void); // Driver for all other fixing functions.
void FindDataInCode(void);
void AuditTailChunkOwnership(void);
void FindOrphanedCode(segment_t *, FILE *, FILE *);
void Debug_FindOrphanedCode(segment_t *, bool);
clc5q
committed
void FixCodeIdentification(void);
int FixupNewCodeChunks(void);
void AuditCodeTargets(void);
ea_t FindNewFuncLimit(ea_t);
void SpecialDebugOutput(void);
clc5q
committed
void RemoveIDACodeAddr(ea_t);
void ZST_InitPolicies(const char *);
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
static unsigned long DebugCounter = 0;
// Turn LOCK prefix into no-op when detected. Each is one byte in length.
bool STARS_custom_ana(ea_t CurrentAddr) {
int code = get_byte(CurrentAddr);
++CustomAnaCallCount;
if (X86_LOCK_PREFIX != code) {
return false;
}
else {
#define STARS_ANA_DEBUG_DELAY 0
#if STARS_ANA_DEBUG_DELAY
if (DebugCounter == 0) {
time_t start;
time_t current;
time(&start);
printf("delay for 15 seconds.\n");
do {
time(¤t);
} while(difftime(current,start) < 15.0);
++DebugCounter;
}
#endif
pair<set<ea_t>::iterator, bool> InsertResult;
InsertResult = LockPreficesRemoved.insert(CurrentAddr);
cmd.itype = NN_nop; // make it a no-op
cmd.size = 1; // one-byte no-op
#if 0
cmd.auxpref = 0; // clear prefix and flags fields
cmd.segpref = 0;
cmd.insnpref = 0;
cmd.flags = 0;
#endif
return true;
}
} // end of STARS_custom_ana()
static int idaapi idp_callback(void *, int event_id, va_list va) {
#if STARS_REMOVE_LOCK_PREFIX
if (event_id == processor_t::custom_ana) {
ea_t CurrentAddr = cmd.ea;
#if 1
int code = ua_next_byte();
++CustomAnaCallCount;
if (X86_LOCK_PREFIX == code) {
pair<set<ea_t>::iterator, bool> InsertResult;
InsertResult = LockPreficesRemoved.insert(CurrentAddr);
cmd.itype = NN_nop; // make it a no-op
return (int) (cmd.size + 1);
}
else {
return 0;
}
#else
if (STARS_custom_ana(CurrentAddr)) {
return 1; // handled event
}
#endif
}
#endif // STARS_REMOVE_LOCK_PREFIX
if (event_id == ph.auto_empty_finally) { // IDA analysis is done
// Ensure correct working environment.
if ((inf.filetype != f_ELF) && (inf.filetype != f_PE)) {
error("Executable format must be PE or ELF.");
return 0;
}
IDAP_run(0);
#if STARS_REMOVE_LOCK_PREFIX
SMP_msg("INFO: Calls to STARS_custom_ana: %lu \n", CustomAnaCallCount);
SMP_msg("INFO: Number of LOCK prefices eliminated: %zu \n", LockPreficesRemoved.size());
LockPreficesRemoved.clear();
#endif // STARS_REMOVE_LOCK_PREFIX
qexit(0);
}
return 0; // did not process any event
} // end of idp_callback()
int IDAP_init(void) {
#if 0 // We are now calling from the SMP.idc script.
// Skip this plugin if it was not specified by the user on the
// command line.
if (get_plugin_options("SMPStaticAnalyzer") == NULL) {
clc5q
committed
SMP_msg("IDAP_init point 2.\n");
return PLUGIN_SKIP;
}
#endif
// Ensure correct working environment.
if (ph.id != PLFM_386) {
error("Processor must be x86.");
return PLUGIN_SKIP;
}
hook_to_notification_point(HT_IDP, idp_callback, NULL);
DataReferentID = 1;
UnusedStructCount = 0;
UnusedIntCount = 0;
ConstantDEFCount = 0;
AlwaysTakenBranchCount = 0;
NeverTakenBranchCount = 0;
#if SMP_COUNT_MEMORY_ALLOCATIONS
SMPInstCount = 0;
SMPBlockCount = 0;
SMPDefUseChainCount = 0;
SMPFuncCount = 0;
SMPGlobalVarCount = 0;
SMPLocalVarCount = 0;
SMPInstBytes = 0;
SMPDefUseChainBytes = 0;
#endif
#if SMP_MEASURE_NUMERIC_ANNOTATIONS
NumericAnnotationsCount12 = 0;
NumericAnnotationsCount3 = 0;
TruncationAnnotationsCount = 0;
SignednessWithoutTruncationCount = 0;
LeaInstOverflowCount = 0;
WidthDoublingTruncationCount = 0;
BenignOverflowInstCount = 0;
BenignOverflowDefCount = 0;
SuppressStackPtrOverflowCount = 0;
SuppressLiveFlagsOverflowCount = 0;
LiveMultiplyBitsCount = 0;
BenignTruncationCount = 0;
SuppressTruncationRegPiecesAllUsed = 0;
SuppressSignednessOnTruncation = 0;
InitOp.type = o_void;
InitOp.addr = 0;
InitOp.dtyp = dt_dword;
InitOp.flags = 0;
InitOp.n = 0;
InitOp.offb = 0;
InitOp.offo = 0;
InitOp.reg = R_none;
InitOp.specflag1 = 0;
InitOp.specflag2 = 0;
InitOp.specflag3 = 0;
InitOp.specflag4 = 0;
InitOp.specval = 0;
InitOp.value = 0;
InitOp.set_showed();
ZST_AlarmFile = NULL;
clc5q
committed
#ifdef STARS_IRDB_INTERFACE
SMPLogFile = NULL;
#endif
InitOptCategory();
InitDFACategory();
InitTypeCategory();
InitSMPDefsFlags();
InitSMPUsesFlags();
InitLibFuncFGInfoMaps();
clc5q
committed
InitIntegerErrorCallSinkMap();
InitUnsignedArgPositionMap();
return PLUGIN_KEEP;
} // end of IDAP_init
void IDAP_term(void) {
unhook_from_notification_point(HT_IDP, idp_callback, NULL);
return;
}
void IDAP_run(int arg) {
FILE *AnnotFile;
FILE *InfoAnnotFile;
clc5q
committed
clc5q
committed
#ifdef STARS_IRDB_INTERFACE
string ZSTLogFileName(RootFileName);
string LogFileSuffix(".STARSlog");
ZSTLogFileName += LogFileSuffix;
SMPLogFile = SMP_fopen(ZSTLogFileName.c_str(), "w");
if (NULL == SMPLogFile) {
error("ERROR: Cannot open STARS log file %s\n", ZSTLogFileName.c_str());
error("Redirecting to stderr.\n");
SMPLogFile = stderr;
}
#endif
#if SMP_DEBUG
clc5q
committed
SMP_msg("Beginning IDAP_run.\n");
clc5q
committed
SMP_msg("IDA SDK version: %d \n", IDA_SDK_VERSION);
#if 1
DefOrUse DummyRef;
op_t DummyOperand;
size_t RefObjectSize = sizeof(DummyRef), OpndSize = sizeof(DummyOperand);
SMP_msg("INFO: Size of DefOrUse: %u Size of op_t: %u \n", RefObjectSize, OpndSize);
#endif
// Open the output file.
clc5q
committed
ssize_t FileLen;
FileLen = get_root_filename(RootFileName, sizeof(RootFileName) - 1);
string AnnotFileName(RootFileName);
clc5q
committed
string FileSuffix(".annot");
AnnotFileName += FileSuffix;
string InfoAnnotFileName(RootFileName);
string InfoFileSuffix(".infoannot");
InfoAnnotFileName += InfoFileSuffix;
string ZSTPolicyFileName(RootFileName);
string PolicyFileSuffix(".policy");
ZSTPolicyFileName += PolicyFileSuffix;
string ZSTAlarmFileName(RootFileName);
string AlarmFileSuffix(".alarms");
ZSTAlarmFileName += AlarmFileSuffix;
string AsmFileName(RootFileName);
string AsmFileSuffix(".asm");
AsmFileName += AsmFileSuffix;
string XrefsFileName(RootFileName);
string XrefsFileSuffix(".STARSxrefs");
XrefsFileName += XrefsFileSuffix;
// For debugging, we can add a delay loop so we have time to attach gdb to the
// running process and set a breakpoint.
#if SMP_IDAP_RUN_DELAY
time_t start;
time_t current;
time(&start);
printf("delay for 15 seconds.\n");
do {
time(¤t);
} while(difftime(current,start) < 15.0);
#endif
ea_t RecentAddr;
#if SMP_DEBUG_CODE_ORPHANS
CodeOrphans.clear();
RecentAddr = BADADDR;
#if IDA_SDK_VERSION < 600
clc5q
committed
for (int SegIndex = 0; SegIndex < SMP_get_segm_qty(); ++SegIndex) {
segment_t *seg = SMP_getnseg(SegIndex);
clc5q
committed
for (segment_t *seg = SMP_get_first_seg(); NULL != seg; seg = SMP_get_next_seg(RecentAddr)) {
RecentAddr = seg->startEA;
#endif
if (seg->type == SEG_CODE)
Debug_FindOrphanedCode(seg, true);
}
#endif
clc5q
committed
SMPProgram *CurrProg = new SMPProgram();
CurrProg->AnalyzeData(); // Analyze static data in the executable
clc5q
committed
// read the Profiler generated information into a new prof_info class
ProfilerInformation *prof_info = new ProfilerInformation(AnnotFileName.c_str(), CurrProg);
clc5q
committed
AnnotFile = SMP_fopen(AnnotFileName.c_str(), "w");
if (NULL == AnnotFile) {
error("FATAL ERROR: Cannot open output file %s\n", AnnotFileName.c_str());
delete prof_info;
return;
}
clc5q
committed
InfoAnnotFile = SMP_fopen(InfoAnnotFileName.c_str(), "w");
if (NULL == InfoAnnotFile) {
error("FATAL ERROR: Cannot open output file %s\n", InfoAnnotFileName.c_str());
SMP_fclose(AnnotFile);
return;
}
clc5q
committed
ZST_AlarmFile = SMP_fopen(ZSTAlarmFileName.c_str(), "w");
if (NULL == ZST_AlarmFile) {
error("FATAL ERROR: Cannot open security alarms file %s\n", ZSTAlarmFileName.c_str());
SMP_fclose(AnnotFile);
SMP_fclose(InfoAnnotFile);
delete prof_info;
return;
}
STARS_XrefsFile = SMP_fopen(XrefsFileName.c_str(), "w");
if (NULL == STARS_XrefsFile) {
error("FATAL ERROR: Cannot open STARS code xrefs file %s\n", XrefsFileName.c_str());
SMP_fclose(AnnotFile);
SMP_fclose(InfoAnnotFile);
SMP_fclose(ZST_AlarmFile);
delete prof_info;
return;
}
// Read the Zephyr Security Toolkit system call security policies, if available.
ZST_InitPolicies(ZSTPolicyFileName.c_str());
(void) memset(OptCount, 0, sizeof(OptCount));
(void) memset(AnnotationCount, 0, sizeof(AnnotationCount));
try { // We will catch memory exhaustion errors.
clc5q
committed
// Record the start and end addresses for all function entry
// chunks in the program.
FuncBounds.reserve(10 + get_func_qty());
for (size_t FuncIndex = 0; FuncIndex < get_func_qty(); ++FuncIndex) {
func_t *FuncInfo = getn_func(FuncIndex);
SMP_bounds_t temp;
temp.startEA = FuncInfo->startEA;
temp.endEA = FuncInfo->endEA;
FuncBounds.push_back(temp);
}
#if SMP_DEBUG_DATA_ONLY
clc5q
committed
SMP_fclose(SymsFile);
clc5q
committed
return;
#endif
// Pre-audit the IDA database by seeing if the distinction
// between code and data can be improved, and if all branches
// and calls have proper code targets and code cross references.
#if SMP_FIXUP_IDB
clc5q
committed
#endif
if (0 < prof_info->GetProfilerAnnotationCount()) {
clc5q
committed
SMP_msg("Calling InferDataGranularity\n");
SMP_msg("ptr to MemoryAccessInfo: %p\n", prof_info->GetMemoryAccessInfo());
prof_info->GetMemoryAccessInfo()->InferDataGranularity();
clc5q
committed
SMP_msg("Returned from InferDataGranularity\n");
}
CurrProg->ProfGranularityFinished(AnnotFile, InfoAnnotFile);
CurrProg->EmitAnnotations(AnnotFile, InfoAnnotFile);
clc5q
committed
#if SMP_DEBUG_CODE_ORPHANS
RecentAddr = BADADDR;
#if IDA_SDK_VERSION < 600
clc5q
committed
for (int SegIndex = 0; SegIndex < SMP_get_segm_qty(); ++SegIndex) {
segment_t *seg = SMP_getnseg(SegIndex);
clc5q
committed
for (segment_t *seg = SMP_get_first_seg(); NULL != seg; seg = SMP_get_next_seg(RecentAddr)) {
RecentAddr = seg->startEA;
#endif
if (seg->type == SEG_CODE)
Debug_FindOrphanedCode(seg, true);
}
#endif
RecentAddr = BADADDR;
clc5q
committed
for (int SegIndex = 0; SegIndex < SMP_get_segm_qty(); ++SegIndex) {
segment_t *seg = SMP_getnseg(SegIndex);
clc5q
committed
for (segment_t *seg = SMP_get_first_seg(); NULL != seg; seg = SMP_get_next_seg(RecentAddr)) {
clc5q
committed
if (seg->type == SEG_CODE)
FindOrphanedCode(seg, AnnotFile, InfoAnnotFile);
clc5q
committed
}
clc5q
committed
for (int OptType = 0; OptType <= LAST_OPT_CATEGORY; ++OptType) {
clc5q
committed
SMP_msg("Optimization Category Count %d: %d Annotations: %d\n",
OptType, OptCount[OptType], AnnotationCount[OptType]);
}
clc5q
committed
SMP_fclose(AnnotFile);
SMP_fprintf(InfoAnnotFile, " 8000000 2 SUCCESS ANALYSISCOMPLETED\n");
clc5q
committed
SMP_fclose(InfoAnnotFile);
SMP_fclose(ZST_AlarmFile);
SMP_fclose(STARS_XrefsFile);
clc5q
committed
#if STARS_GENERATE_ASM_FILE
AsmFile = SMP_fopen(AsmFileName.c_str(), "w");
if (NULL == AsmFile) {
error("FATAL ERROR: Cannot open ASM file %s\n", AsmFileName.c_str());
int FileResult = gen_file(OFILE_ASM, AsmFile, LowestCodeAddress, 0xffffffff, (GENFLG_MAPSEG | GENFLG_ASMTYPE));
clc5q
committed
SMP_msg("ERROR: Could not generate ASM file.\n");
clc5q
committed
delete CurrProg;
}
catch (std::bad_alloc) {
delete CurrProg;
delete prof_info;
error("FATAL ERROR: Memory exhausted.\n");
SMP_fprintf(InfoAnnotFile, " 8000000 2 ERROR MEMORYEXHAUSTED\n");
SMP_fclose(AnnotFile);
SMP_fclose(InfoAnnotFile);
SMP_fclose(ZST_AlarmFile);
SMP_fclose(STARS_XrefsFile);
return;
}
} // end IDAP_run()
char IDAP_comment[] = "ZephyrSoftware STARS (Static Analyzer for Reliability and Security)";
char IDAP_help[] = "Good luck";
char IDAP_name[] = "SMPStaticAnalyzer";
char IDAP_hotkey[] = "Alt-J";
plugin_t PLUGIN = {
IDP_INTERFACE_VERSION,
IDAP_init,
IDAP_term,
IDAP_run,
IDAP_comment,
IDAP_help,
IDAP_name,
IDAP_hotkey
};
clc5q
committed
// Find all code addresses in the IDA database and enter them into
// IDAProLocs. Find all code addresses identified by the external
// disassembler (e.g. objdump) and enter them into DisasmLocs.
void FindCodeAddresses(void) {
// Read in code addresses as found by an external disassembler.
ea_t CurrDisasmAddr;
string DisasmFileName(RootFileName);
string FileSuffix(".SMPobjdump");
DisasmFileName += FileSuffix;
clc5q
committed
FILE *DisasmFile = SMP_fopen(DisasmFileName.c_str(), "r");
clc5q
committed
if (NULL == DisasmFile) {
error("FATAL: Cannot open input file %s\n", DisasmFileName.c_str());
return;
}
#define DISASM_RESERVE_SIZE 50000
DisasmLocs.reserve(DISASM_RESERVE_SIZE);
int ScanReturn = qfscanf(DisasmFile, "%x", &CurrDisasmAddr);
while (1 == ScanReturn) {
int NextChar;
DisasmLocs.push_back(CurrDisasmAddr);
// Swallow the rest of the input line and get the next address.
do {
NextChar = qfgetc(DisasmFile);
} while ((EOF != NextChar) && ('\n' != NextChar));
ScanReturn = qfscanf(DisasmFile, "%x", &CurrDisasmAddr);
} // end while (1 == ScanReturn)
if (0 >= DisasmLocs.size()) {
clc5q
committed
SMP_msg("ERROR: No addresses read from %s\n", DisasmFileName.c_str());
SMP_fclose(DisasmFile);
clc5q
committed
return;
}
else {
clc5q
committed
SMP_msg("%zu Disasm addresses read from %s\n", DisasmLocs.size(),
clc5q
committed
DisasmFileName.c_str());
clc5q
committed
SMP_fclose(DisasmFile);
clc5q
committed
}
// Find all the code locs in the IDA Pro database. As we find
// them, store them in IDAProLocs.
clc5q
committed
for (int SegIndex = 0; SegIndex < SMP_get_segm_qty(); ++SegIndex) {
segment_t *seg = SMP_getnseg(SegIndex);
clc5q
committed
for (segment_t *seg = SMP_get_first_seg(); NULL != seg; seg = SMP_get_next_seg(RecentAddr)) {
clc5q
committed
if (SEG_CODE != seg->type)
continue;
for (ea_t addr = seg->startEA; addr < seg->endEA; addr = get_item_end(addr)) {
flags_t InstrFlags = getFlags(addr);
if (isHead(InstrFlags) && isCode(InstrFlags)) {
IDAProLocs.push_back(addr);
clc5q
committed
if ((0x806cda4 <= addr) && (0x806cf99 >= addr))
clc5q
committed
SMP_msg("IDA code addr: %x\n", addr);
clc5q
committed
} // end if (isHead(addr) && isCode(addr)
#if SMP_DEBUG_FIXUP_IDB
else if ((0x806cda4 <= addr) && (0x806cf99 >= addr)) {
if (!isHead(InstrFlags))
clc5q
committed
SMP_msg("Weirdness: not isHead at %x\n", addr);
clc5q
committed
if (isUnknown(InstrFlags)) {
clc5q
committed
SMP_msg("Weirdness: isUnknown at %x\n", addr);
clc5q
committed
}
}
#endif
} // end for (ea_t addr = seg->startEA; ...)
clc5q
committed
return;
} // end FindCodeAddresses()
// Return true if addr is not a proper beginning address for an instruction.
// Return false otherwise.
// Currently, we claim that an instruction is misaligned if DisasmLocs does
// not contain it. This function is useful for dealing with errors in IDA
// code identification, in which a large code section is identified as data,
// but some instructions in the middle of the "data" are identified as
// code but IDA often starts on the wrong boundary in these cases.
bool IsCodeMisaligned(ea_t addr) {
// Do a binary search for addr within DisasmLocs, which is sorted
// in ascending address order because of the way in which it was
// generated.
size_t min = 0;
size_t max = DisasmLocs.size(); // don't access DisasmLocs[max]
size_t index = (min + max) / 2;
while (addr != DisasmLocs[index]) {
if (min >= (max - 1))
return true;
#if 0
clc5q
committed
SMP_msg("min: %d max: %d index: %d\n", min, max, index);
clc5q
committed
#endif
if (addr < DisasmLocs[index])
max = index;
else // must be addr > DisasmLocs[index];
min = index;
index = (min + max) / 2;
}
return false;
} // end of IsCodeMisaligned()
void RemoveIDACodeAddr(ea_t addr) {
// Do a binary search for addr within IDAProLocs, which is sorted
// in ascending address order because of the way in which it was
// generated. Delete the element of IDAProLocs if found.
size_t min = 0;
size_t max = IDAProLocs.size(); // don't access IDAProLocs[max]
size_t index = (min + max) / 2;
while (addr != IDAProLocs[index]) {
if (min >= (max - 1))
return;
#if 0
clc5q
committed
SMP_msg("min: %d max: %d index: %d\n", min, max, index);
clc5q
committed
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
#endif
if (addr < IDAProLocs[index])
max = index;
else // must be addr > IDAProLocs[index];
min = index;
index = (min + max) / 2;
}
// IDAProLocs[index] contains addr.
vector<ea_t>::iterator RemovalIterator = IDAProLocs.begin();
RemovalIterator += index;
RemovalIterator = IDAProLocs.erase(RemovalIterator);
return;
} // end of RemoveIDACodeAddr()
// Driver for all other fixing functions. Upon its return, the IDA
// database (IDB file) should be fixed up as much as we can fix it.
void FixupIDB(void) {
FindCodeAddresses();
#if SMP_DEBUG_FIXUP_IDB
SpecialDebugOutput();
#endif
AuditCodeTargets();
FindDataInCode();
AuditTailChunkOwnership();
if (DisasmLocs.size() > 0) {
FixCodeIdentification();
int fixes = FixupNewCodeChunks();
#if SMP_DEBUG_FIXUP_IDB
#endif
}
DisasmLocs.clear();
IDAProLocs.clear();
clc5q
committed
} // end of FixupIDB()
// Find and print all data head addresses in code segments.
// If an isolated code instruction is found in the midst of a run
// of data bytes and has no code xrefs jumping to it, it is not
// reachable as code and is undoubtedly a mixup by IDA. Possibly
// the whole data region will be converted to code later, in which
// case the isolated code is not necessarily properly aligned and
// parsed at its present address, so we are glad to convert it into
// data anyway so that FindDataToConvert() will succeed on it later.
// Data to code conversion, and isolated code detection, are inhibited
// by IDA identifying several consecutive instructions in the midst
// of a data region, with the code addresses not agreeing with the
// external disassembler's code addresses. We will convert these
// misaligned instructions to data as we detect them. We will also
// convert unexplored bytes (isUnknown(flags) == true) into data if
// they are in the midst of a data sequence.
#define MIN_DATARUN_LEN 24 // #bytes on either side of "isolated" code
void FindDataInCode(void) {
size_t DataRunLen = 0; // How many data bytes in a row have we seen?
bool IsolatedCodeTrigger = false; // Have seen data, then isolated code
// Now looking for data
ea_t IsolatedCodeAddr;
int IsolatedCodeLen;
int InstrLen;
clc5q
committed
bool InstOK;
insn_t LocalCmd;
ulong LocalFeatures;
clc5q
committed
clc5q
committed
for (int SegIndex = 0; SegIndex < SMP_get_segm_qty(); ++SegIndex) {
segment_t *seg = SMP_getnseg(SegIndex);
clc5q
committed
for (segment_t *seg = SMP_get_first_seg(); NULL != seg; seg = SMP_get_next_seg(RecentAddr)) {
clc5q
committed
if (SEG_CODE != seg->type)
continue;
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
char SegName[MAXSTR];
clc5q
committed
ssize_t SegNameSize = SMP_get_segm_name(seg, SegName, sizeof(SegName) - 1);
SMP_msg("Non-code addresses for code segment %s from %x to %x\n",
clc5q
committed
SegName, seg->startEA, seg->endEA);
#endif
for (ea_t addr = seg->startEA; addr < seg->endEA; addr = get_item_end(addr)) {
flags_t AddrFlags = getFlags(addr);
if (isHead(AddrFlags)) {
if (isData(AddrFlags)) {
DataRunLen += get_item_size(addr);
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("Data: %x\n", addr);
clc5q
committed
#endif
if (MIN_DATARUN_LEN <= DataRunLen) {
if (IsolatedCodeTrigger) {
// Saw data, then one isolated code, then data
do_unknown_range(IsolatedCodeAddr, IsolatedCodeLen, DOUNK_SIMPLE);
RemoveIDACodeAddr(IsolatedCodeAddr);
if (do_data_ex(IsolatedCodeAddr, byteflag(),
IsolatedCodeLen, BADNODE)) {
clc5q
committed
SMP_msg("Converted isolated code to data: %x\n",
clc5q
committed
IsolatedCodeAddr);
}
else {
clc5q
committed
SMP_msg("Failed to convert isolated code to data: %x len: %x\n",
clc5q
committed
IsolatedCodeAddr, IsolatedCodeLen);
}
IsolatedCodeTrigger = false;
} // end if (IsolatedCodeTrigger)
} // end if (MIN_DATARUN_LEN <= DataRunLen)
} // end if (isData(AddrFlags)
else if (isUnknown(AddrFlags)) {
// Just in case; unknown usually means not head or tail
// If in a data run, convert to data.
InstrLen = get_item_size(addr);
clc5q
committed
SMP_msg("Unknown: %x len: %x\n", addr, InstrLen);
clc5q
committed
if (0 < DataRunLen) {
if (do_data_ex(addr, byteflag(), InstrLen, BADNODE)) {
clc5q
committed
SMP_msg("Converted unknown to data at %x len: %x\n", addr, InstrLen);
clc5q
committed
DataRunLen += InstrLen;
}
else {
clc5q
committed
SMP_msg("Failed to convert unknown to data at %x len: %x\n", addr, InstrLen);
clc5q
committed
DataRunLen = 0;
IsolatedCodeTrigger = false;
}
}
}
else if (isCode(AddrFlags)) { // must be true
if (MIN_DATARUN_LEN <= DataRunLen) {
clc5q
committed
SMP_msg("DataRunLen: %d at %x\n", DataRunLen, addr);
clc5q
committed
InstOK = SMPGetCmd(addr, LocalCmd, LocalFeatures);
assert(InstOK);
InstrLen = (int) LocalCmd.size;
// We don't check the returned InstrLen for validity because IsCodeMisaligned()
// will check for validity immediately below.
clc5q
committed
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("Calling IsCodeMisaligned: len %d\n", InstrLen);
clc5q
committed
#endif
if (IsCodeMisaligned(addr)) {
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("Code was misaligned.\n");
clc5q
committed
#endif
do_unknown_range(addr, InstrLen, DOUNK_SIMPLE);
RemoveIDACodeAddr(addr);
if (do_data_ex(addr, byteflag(), InstrLen, BADNODE)) {
clc5q
committed
SMP_msg("Converted misaligned code to data at %x : len: %x\n",
clc5q
committed
addr, InstrLen);
clc5q
committed
// Step back so data gets processed.
DataRunLen += get_item_size(addr);
continue; // skip reset of DataRunLen
}
else {
clc5q
committed
SMP_msg("Misaligned code left as unknown at %x : len: %x\n",
clc5q
committed
addr, InstrLen);
clc5q
committed
IsolatedCodeTrigger = false;
}
} // end if (IsCodeMisaligned() ...)
else if (!hasRef(AddrFlags)) {
// No references at all --> isolated code.
IsolatedCodeTrigger = true;
IsolatedCodeAddr = addr;
IsolatedCodeLen = InstrLen;
}
else {
clc5q
committed
SMP_xref_t xb;
bool ok = xb.SMP_first_to(addr, XREF_ALL);
clc5q
committed
if (!ok) {
// No code xrefs to this target addr.
IsolatedCodeTrigger = true;
IsolatedCodeAddr = addr;
IsolatedCodeLen = InstrLen;
}
}
} // end if (MIN_DATARUN_LEN <= DataRunLen)
else if (IsolatedCodeTrigger) {
// Two instructions in a row does not fit the pattern.
IsolatedCodeTrigger = false;
}
DataRunLen = 0;
} // end if (isData) ... else if (isUnknown) ... else isCode
} // end if (isHead)
else if (isUnknown(AddrFlags)) {