Newer
Older
/*
* SMPStaticAnalyzer.cpp - <see below>.
*
* Copyright (c) 2000, 2001, 2010 - University of Virginia
*
* This file is part of the Memory Error Detection System (MEDS) infrastructure.
* This file may be used and modified for non-commercial purposes as long as
* all copyright, permission, and nonwarranty notices are preserved.
* Redistribution is prohibited without prior written consent from the University
* of Virginia.
*
* Please contact the authors for restrictions applying to commercial use.
*
* THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*
* Author: University of Virginia
* e-mail: jwd@virginia.com
* URL : http://www.cs.virginia.edu/
*
* Additional copyrights 2010, 2011 by Zephyr Software LLC
* e-mail: {clc,jwd}@zephyr-software.com
* URL : http://www.zephyr-software.com/
//
// SMPStaticAnalyzer.cpp
//
// This plugin performs the static analyses needed for the SMP project
// (Software Memory Protection).
//
using namespace std;
clc5q
committed
#include <list>
#include <vector>
clc5q
committed
#include <string>
clc5q
committed
clc5q
committed
#include "SMPDBInterface.h"
#include "SMPDataFlowAnalysis.h"
clc5q
committed
#include "SMPProgram.h"
#include "SMPFunction.h"
#include "SMPInstr.h"
#include "ProfilerInformation.h"
#define SMP_DEBUG_DELAY 0 // for setting an early breakpoint
// Set to 1 for debugging output
#define SMP_DEBUG 1
clc5q
committed
#define SMP_DEBUG2 0 // verbose
#define SMP_DEBUG3 0 // verbose
#define SMP_DEBUG_MEM 0 // print memory operands
#define SMP_DEBUG_TYPE0 0 // Output instr info for OptType = 0
clc5q
committed
#define SMP_DEBUG_CHUNKS 0 // restructuring tail chunks, shared chunks, etc.
#define SMP_DEBUG_DATA_ONLY 0 // Find & fix data addresses in code segments
// Set to 1 when doing a binary search using SMP_DEBUG_COUNT to find
// which function is causing a problem.
#define SMP_BINARY_DEBUG 0
#define SMP_DEBUG_COUNT 356 // How many funcs to process in problem search
int FuncsProcessed = 0;
#define SMP_FIXUP_IDB 0 // Try to fix the IDA database?
#define SMP_DEBUG_FIXUP_IDB 0 // debugging output for FixupIDB chain
#define SMP_FIND_ORPHANS 1 // find code outside of functions
#define SMP_DEBUG_CODE_ORPHANS 1 // Detect whether we are causing code to be orphaned
#define SMP_IDAP_RUN_DELAY 0 // Delay in IDAP_run() so we can attach debugger to process.
clc5q
committed
#define STARS_GENERATE_ASM_FILE 1 // Generate ASM file at end of processing?
#define STARS_GENERATE_DIF_FILE STARS_SCCP_CONVERT_UNREACHABLE_BLOCKS // If we optimize, generate DIF file
clc5q
committed
static SMPProgram *CurrProg = NULL;
#if SMP_DEBUG_CODE_ORPHANS
set<ea_t> CodeOrphans;
#endif
// Lock prefix for x86 code; jumping around this prefix conditionally looks like jumping
// into the middle of an instruction to IDA Pro, causing it to not collect instructions
// into a procedure. We replace these bytes with no-op opcodes because none of our analyses
// care about LOCK prefices. We store the addresses where we have done the replacement in a
// set in case we ever care.
#define X86_LOCK_PREFIX 0xF0
set<ea_t> LockPreficesRemoved; // Addresses where x86 LOCK prefix byte was turned into a no-op by STARS_custom_ana() callback.
static unsigned long CustomAnaCallCount = 0;
// Define optimization categories for instructions.
int OptCategory[NN_last + 1];
// Initialize the OptCategory[] array.
void InitOptCategory(void);
// Flag to force reduced analysis so we don't run out of virtual memory
bool STARS_PerformReducedAnalysis;
// Indentation level when emitting SPARK Ada translation of the RTLs.
unsigned short STARS_SPARK_IndentCount;
// Record which opcodes change the stack pointer, and by how many
// bytes up (reduction in stack size for stacks that grow downward)
// or down (increase in stack size for stacks that grow downward).
sval_t StackAlteration[NN_last + 1];
// Initialize the StackAlteration[] array.
void InitStackAlteration(void);
// Keep statistics on how many instructions we saw in each optimization
// category, and how many optimizing annotations were emitted for
// each category.
int OptCount[LAST_OPT_CATEGORY + 1];
int AnnotationCount[LAST_OPT_CATEGORY + 1];
// Unique data referent number to use in data annotations.
unsigned long DataReferentID;
// Debugging counters for analyzing memory usage.
unsigned long UnusedInstrCount;
unsigned long UnusedBlockCount;
unsigned long UnusedStructCount;
unsigned long UnusedIntCount;
// Counters for dead metadata analysis.
unsigned long DeadMetadataCount;
unsigned long LiveMetadataCount;
// Counters for indirect jump resolution.
unsigned long ResolvedIndirectJumpCount;
unsigned long UnresolvedIndirectJumpCount;
// Counters for measuring SCCP success in finding constant DEFs.
unsigned long ConstantDEFCount;
unsigned long AlwaysTakenBranchCount;
unsigned long NeverTakenBranchCount;
// Counters for accessing less than machine register width.
unsigned long SubwordRegCount;
unsigned long SubwordMemCount;
unsigned long SubwordAddressRegCount;
unsigned long SPARKOperandCount; // total operands printed
#if SMP_COUNT_MEMORY_ALLOCATIONS
// Counters for analyzing memory use for allocated and used objects.
unsigned long SMPInstCount;
unsigned long SMPBlockCount;
unsigned long SMPFuncCount;
unsigned long SMPGlobalVarCount;
unsigned long SMPLocalVarCount;
unsigned long SMPDefUseChainCount;
unsigned long SMPInstBytes;
unsigned long SMPDefUseChainBytes;
#if SMP_MEASURE_NUMERIC_ANNOTATIONS
unsigned long NumericAnnotationsCount12; // cases 1 and 2
unsigned long NumericAnnotationsCount3; // case 3
unsigned long TruncationAnnotationsCount; // case 4
unsigned long SignednessWithoutTruncationCount; // case 5
unsigned long LeaInstOverflowCount; // case 6
unsigned long WidthDoublingTruncationCount; // case 7
unsigned long BenignOverflowInstCount;
unsigned long BenignOverflowDefCount;
unsigned long SuppressStackPtrOverflowCount;
unsigned long SuppressLiveFlagsOverflowCount;
unsigned long LiveMultiplyBitsCount;
unsigned long BenignTruncationCount;
unsigned long SuppressTruncationRegPiecesAllUsed;
unsigned long SuppressSignednessOnTruncation;
#endif
#if STARS_SCCP_GATHER_STATISTICS
// Counters for analyzing Sparse Conditional Constant Propagation effectiveness.
unsigned long SCCPFuncsWithArgWriteCount;
unsigned long SCCPFuncsWithConstantArgWriteCount;
unsigned long SCCPOutgoingArgWriteCount;
unsigned long SCCPConstantOutgoingArgWriteCount;
#endif
// Counter for max # of basic blocks seen in one function.
unsigned long STARS_MaxBlockCount;
// Is the binary a 32-bit, 64-bit, etc. instruction set architecture
size_t STARS_ISA_Bitwidth;
size_t STARS_ISA_Bytewidth;
char STARS_ISA_dtyp;
int STARS_MD_LAST_SAVED_REG_NUM;
// The types of data objects based on their first operand flags.
const char *DataTypes[] = { "VOID", "NUMHEX", "NUMDEC", "CHAR",
"SEG", "OFFSET", "NUMBIN", "NUMOCT", "ENUM", "FORCED",
"STRUCTOFFSET", "STACKVAR", "NUMFLOAT", "UNKNOWN",
"UNKNOWN", "UNKNOWN", 0};
clc5q
committed
// Filename (not including path) of executable being analyzed.
clc5q
committed
// strings for printing ZST_SysCallType
const char *CallTypeNames[4] = { "Unrestricted", "High-Privilege", "File-Access", "Network-Access" };
DisAsmString DisAsmText;
// Operand type that can have all fields initialized to o_void and zero
// values, to be used to copy-initialize operands that we are adding to
// RTLs and DEF and USE lists.
STARSOpndType InitOp;
// File foo.exe.alarms for Zephyr Security Toolkit security alarm messages.
FILE *ZST_AlarmFile;
// File for code xref targets (helps ILR, makes IRDB more complete)
FILE *STARS_XrefsFile;
// File to provide details on fast returns, safe and unsafe return-address functions, etc.
FILE *STARS_CallReturnFile;
FILE *ZST_SPARKSourceFile;
FILE *ZST_SPARKHeaderFile;
clc5q
committed
// Code addresses identified by a disassembler, such as objdump on
// Linux. These can be used to improve the code vs. data identification
// of IDA Pro.
vector<ea_t> DisasmLocs;
// Code addresses as identified by IDA Pro, to be compared to DisasmLocs.
vector<ea_t> IDAProLocs;
// Bit masks for extracting bits from a STARSBitSet unsigned char.
const unsigned char STARSBitMasks[8] = { 1, 2, 4, 8, 16, 32, 64, 128 };
clc5q
committed
// Function start and end addresses (for function entry chunks only).
// Kept here because IDA Pro 5.1 seems to have a memory overwriting
// problem when iterating through all functions in the program. An existing
// STARS_Function_t *ChunkInfo data structure was getting overwritten by one of the
// function STARS_Function_t data structures, causing changes of startEA and endEA among
clc5q
committed
// other things.
vector<SMP_bounds_t> FuncBounds;
// List of functions that need to be reanalyzed after all the code fixup
// and code discovery is complete. Kept as a list of addresses; any address
// within the function is good enough to designate it.
list<ea_t> FuncReanalyzeList;
// A code region that has been converted from data but has code addresses that
// need to be reanalyzed. This is usually because a former data address is
// now a jump to a code target that is still a data address. We have to wait
// until the target has become code before IDA will accept the jump as valid.
class FixupRegion {
public:
FixupRegion(SMP_bounds_t);
inline ea_t GetStart(void) const { return CodeRegion.startEA; };
inline ea_t GetEnd(void) const { return CodeRegion.endEA; };
inline void SetStart(ea_t addr) { CodeRegion.startEA = addr; };
list<ea_t> FixupInstrs; // easier to expose than to encapsulate
private:
SMP_bounds_t CodeRegion;
};
FixupRegion::FixupRegion(SMP_bounds_t Range) {
this->CodeRegion = Range;
return;
}
// List of code regions that were not completely analysed because of jump to
// data considerations.
list<FixupRegion> CodeReanalyzeList;
clc5q
committed
// Map library function names to their system call type.
map<string, ZST_SysCallType> ZST_FuncTypeMap;
// Map system call types to their Zephyr Security Toolkit security policy.
map<ZST_SysCallType, ZST_Policy> ZST_TypePolicyMap;
// Set of whitelisted file locations.
set<string> ZST_FileLocWhitelist;
// Set of whitelisted network locations.
set<string> ZST_NetworkLocWhitelist;
// Set of blacklisted file locations.
set<string> ZST_FileLocBlacklist;
// Set of blacklisted network locations.
set<string> ZST_NetworkLocBlacklist;
// Set of system call names whose returned values should be trusted to have only benign numeric errors.
set<string> ZST_SystemCallNumericWhitelist;
void IDAP_run(int);
clc5q
committed
// Functions for diagnosing and/or fixing problems in the IDA database.
void FixupIDB(void); // Driver for all other fixing functions.
void FindDataInCode(void);
void AuditTailChunkOwnership(void);
void FindOrphanedCode(STARS_Segment_t *, FILE *, FILE *);
void Debug_FindOrphanedCode(STARS_Segment_t *, bool);
clc5q
committed
void FixCodeIdentification(void);
int FixupNewCodeChunks(void);
void AuditCodeTargets(void);
void SpecialDebugOutput(void);
clc5q
committed
void RemoveIDACodeAddr(ea_t);
void ZST_InitPolicies(const char *);
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
static unsigned long DebugCounter = 0;
// Turn LOCK prefix into no-op when detected. Each is one byte in length.
bool STARS_custom_ana(ea_t CurrentAddr) {
int code = get_byte(CurrentAddr);
++CustomAnaCallCount;
if (X86_LOCK_PREFIX != code) {
return false;
}
else {
#define STARS_ANA_DEBUG_DELAY 0
#if STARS_ANA_DEBUG_DELAY
if (DebugCounter == 0) {
time_t start;
time_t current;
time(&start);
printf("delay for 15 seconds.\n");
do {
time(¤t);
} while(difftime(current,start) < 15.0);
++DebugCounter;
}
#endif
pair<set<ea_t>::iterator, bool> InsertResult;
InsertResult = LockPreficesRemoved.insert(CurrentAddr);
assert(InsertResult.second);
cmd.itype = NN_nop; // make it a no-op
cmd.size = 1; // one-byte no-op
#if 0
cmd.auxpref = 0; // clear prefix and flags fields
cmd.segpref = 0;
cmd.insnpref = 0;
cmd.flags = 0;
#endif
return true;
}
} // end of STARS_custom_ana()
static int idaapi idp_callback(void *, int event_id, va_list va) {
#if STARS_REMOVE_LOCK_PREFIX
if (event_id == processor_t::custom_ana) {
ea_t CurrentAddr = cmd.ea;
#if 1
int code = ua_next_byte();
++CustomAnaCallCount;
if (X86_LOCK_PREFIX == code) {
pair<set<ea_t>::iterator, bool> InsertResult;
InsertResult = LockPreficesRemoved.insert(CurrentAddr);
cmd.itype = NN_nop; // make it a no-op
return (int) (cmd.size + 1);
}
else {
return 0;
}
#else
if (STARS_custom_ana(CurrentAddr)) {
return 1; // handled event
}
#endif
}
#endif // STARS_REMOVE_LOCK_PREFIX
if (event_id == ph.auto_empty_finally) { // IDA analysis is done
// Handle Cyber Grand Challenge project variant of ELF.
#define STARS_CGC_FILETYPE 16705
if ((inf.filetype != f_ELF) && (inf.filetype != f_PE) && (inf.filetype != STARS_CGC_FILETYPE)) {
IDAP_run(0);
#if STARS_REMOVE_LOCK_PREFIX
SMP_msg("INFO: Calls to STARS_custom_ana: %lu \n", CustomAnaCallCount);
SMP_msg("INFO: Number of LOCK prefices eliminated: %zu \n", LockPreficesRemoved.size());
LockPreficesRemoved.clear();
#endif // STARS_REMOVE_LOCK_PREFIX
qexit(0);
}
return 0; // did not process any event
} // end of idp_callback()
int IDAP_init(void) {
/* init the interface */
global_stars_interface=new STARS_IDA_Interface_t;
#if 0 // We are now calling from the SMP.idc script.
// Skip this plugin if it was not specified by the user on the
// command line.
if (get_plugin_options("SMPStaticAnalyzer") == NULL) {
clc5q
committed
SMP_msg("IDAP_init point 2.\n");
return PLUGIN_SKIP;
}
#endif
// Ensure correct working environment.
if (ph.id != PLFM_386) {
error("Processor must be x86.");
return PLUGIN_SKIP;
}
#if 0 // too early to detect 64-bit; moved to IDAP_run()
STARS_ISA_Bytewidth = (STARS_ISA_Bitwidth / 8);
hook_to_notification_point(HT_IDP, idp_callback, NULL);
STARS_PerformReducedAnalysis = false;
DataReferentID = 1;
UnusedStructCount = 0;
UnusedIntCount = 0;
DeadMetadataCount = 0;
LiveMetadataCount = 0;
ResolvedIndirectJumpCount = 0;
UnresolvedIndirectJumpCount = 0;
ConstantDEFCount = 0;
AlwaysTakenBranchCount = 0;
NeverTakenBranchCount = 0;
SubwordRegCount = 0;
SubwordMemCount = 0;
SubwordAddressRegCount = 0;
SPARKOperandCount = 0;
#if SMP_COUNT_MEMORY_ALLOCATIONS
SMPInstCount = 0;
SMPBlockCount = 0;
SMPDefUseChainCount = 0;
SMPFuncCount = 0;
SMPGlobalVarCount = 0;
SMPLocalVarCount = 0;
SMPInstBytes = 0;
SMPDefUseChainBytes = 0;
#endif
#if SMP_MEASURE_NUMERIC_ANNOTATIONS
NumericAnnotationsCount12 = 0;
NumericAnnotationsCount3 = 0;
TruncationAnnotationsCount = 0;
SignednessWithoutTruncationCount = 0;
LeaInstOverflowCount = 0;
WidthDoublingTruncationCount = 0;
BenignOverflowInstCount = 0;
BenignOverflowDefCount = 0;
SuppressStackPtrOverflowCount = 0;
SuppressLiveFlagsOverflowCount = 0;
LiveMultiplyBitsCount = 0;
BenignTruncationCount = 0;
SuppressTruncationRegPiecesAllUsed = 0;
SuppressSignednessOnTruncation = 0;
#endif
#if STARS_SCCP_GATHER_STATISTICS
SCCPFuncsWithArgWriteCount = 0;
SCCPFuncsWithConstantArgWriteCount = 0;
SCCPOutgoingArgWriteCount = 0;
SCCPConstantOutgoingArgWriteCount = 0;
STARS_MaxBlockCount = 0;
clc5q
committed
InitOp.type = o_void;
clc5q
committed
InitOp.flags = 0;
InitOp.set_showed();
// NOTE: InitOp.dtyp field is initialized in IDAP_run() to 32 or 64 bits.
clc5q
committed
InitOp.value = 0;
InitOp.addr = 0;
InitOp.specval = 0;
InitOp.specflag1 = 0;
InitOp.specflag2 = 0;
InitOp.specflag3 = 0;
InitOp.specflag4 = 0;
ZST_AlarmFile = NULL;
ZST_SPARKSourceFile = NULL;
ZST_SPARKHeaderFile = NULL;
clc5q
committed
#ifdef STARS_IRDB_INTERFACE
SMPLogFile = NULL;
#endif
return PLUGIN_KEEP;
} // end of IDAP_init
void IDAP_term(void) {
unhook_from_notification_point(HT_IDP, idp_callback, NULL);
return;
}
void IDAP_run(int arg) {
FILE *AnnotFile;
FILE *InfoAnnotFile;
clc5q
committed
// For debugging, we can add a delay loop so we have time to attach gdb to the
// running process and set a breakpoint.
#if SMP_DEBUG_DELAY
time_t start;
time_t current;
time(&start);
printf("delay for 15 seconds.\n");
do {
time(¤t);
} while(difftime(current,start) < 15.0);
#endif
if (inf.is_64bit()) {
STARS_ISA_Bitwidth = 64;
STARS_ISA_dtyp = dt_qword;
STARS_MD_LAST_SAVED_REG_NUM = R_r15;
SMP_msg("INFO: 64-bit binary detected.\n");
}
else {
STARS_ISA_Bitwidth = 32;
STARS_ISA_dtyp = dt_dword;
STARS_MD_LAST_SAVED_REG_NUM = R_di;
SMP_msg("INFO: 32-bit binary detected.\n");
}
STARS_ISA_Bytewidth = (STARS_ISA_Bitwidth / 8);
InitOp.dtyp = STARS_ISA_dtyp;
InitOptCategory();
InitDFACategory();
InitTypeCategory();
InitSMPDefsFlags();
InitSMPUsesFlags();
InitLibFuncFGInfoMaps();
InitIntegerErrorCallSinkMap();
InitUnsignedArgPositionMap();
clc5q
committed
InitTaintWarningArgPositionMap();
InitPointerArgPositionMap();
clc5q
committed
MDInitializeCallerSavedRegs();
clc5q
committed
#ifdef STARS_IRDB_INTERFACE
string ZSTLogFileName(RootFileName);
string LogFileSuffix(".STARSlog");
ZSTLogFileName += LogFileSuffix;
SMPLogFile = SMP_fopen(ZSTLogFileName.c_str(), "w");
if (NULL == SMPLogFile) {
error("ERROR: Cannot open STARS log file %s\n", ZSTLogFileName.c_str());
error("Redirecting to stderr.\n");
SMPLogFile = stderr;
}
#endif
#if SMP_DEBUG
clc5q
committed
SMP_msg("Beginning IDAP_run.\n");
clc5q
committed
SMP_msg("IDA SDK version: %d \n", IDA_SDK_VERSION);
STARSOpndType DummyOperand;
size_t RefObjectSize = sizeof(DummyRef), OpndSize = sizeof(DummyOperand);
SMP_msg("INFO: Size of DefOrUse: %zu Size of op_t: %zu \n", RefObjectSize, OpndSize);
// Open the output file.
clc5q
committed
ssize_t FileLen;
FileLen = get_root_filename(RootFileName, sizeof(RootFileName) - 1);
string AnnotFileName(RootFileName);
clc5q
committed
string FileSuffix(".annot");
AnnotFileName += FileSuffix;
string InfoAnnotFileName(RootFileName);
string InfoFileSuffix(".infoannot");
InfoAnnotFileName += InfoFileSuffix;
string ZSTPolicyFileName(RootFileName);
string PolicyFileSuffix(".policy");
ZSTPolicyFileName += PolicyFileSuffix;
string ZSTAlarmFileName(RootFileName);
string AlarmFileSuffix(".alarms");
ZSTAlarmFileName += AlarmFileSuffix;
string AsmFileName(RootFileName);
string AsmFileSuffix(".asm");
AsmFileName += AsmFileSuffix;
string DifFileName(RootFileName);
string DifFileSuffix(".dif");
DifFileName += DifFileSuffix;
string XrefsFileName(RootFileName);
string XrefsFileSuffix(".STARSxrefs");
XrefsFileName += XrefsFileSuffix;
string CallRetFileName(RootFileName);
string CallRetFileSuffix(".STARScallreturn");
CallRetFileName += CallRetFileSuffix;
#if ZST_EMIT_SPARK_ADA_TRANSLATION
string SPARKSourceFileName(RootFileName);
string SPARKSourceFileSuffix(".ZSTSPARK.adb");
SPARKSourceFileName += SPARKSourceFileSuffix;
string SPARKHeaderFileName(RootFileName);
string SPARKHeaderFileSuffix(".ZSTSPARK.ads");
SPARKHeaderFileName += SPARKHeaderFileSuffix;
#endif
// For debugging, we can add a delay loop so we have time to attach gdb to the
// running process and set a breakpoint.
#if SMP_IDAP_RUN_DELAY
time_t start;
time_t current;
time(&start);
printf("delay for 15 seconds.\n");
do {
time(¤t);
} while(difftime(current,start) < 15.0);
#endif
ea_t RecentAddr;
#if SMP_DEBUG_CODE_ORPHANS
CodeOrphans.clear();
RecentAddr = BADADDR;
for (STARS_Segment_t *seg = SMP_get_first_seg(); NULL != seg; seg = SMP_get_next_seg(RecentAddr)) {
RecentAddr = seg->get_startEA();
if (seg->IsCodeSegment())
Debug_FindOrphanedCode(seg, true);
}
#endif
STARS_XrefsFile = SMP_fopen(XrefsFileName.c_str(), "w");
if (NULL == STARS_XrefsFile) {
error("FATAL ERROR: Cannot open STARS code xrefs file %s\n", XrefsFileName.c_str());
return;
}
STARS_CallReturnFile = SMP_fopen(CallRetFileName.c_str(), "w");
if (NULL == STARS_CallReturnFile) {
error("FATAL ERROR: Cannot open STARS calls and returns info file %s\n", CallRetFileName.c_str());
SMP_fclose(STARS_XrefsFile);
return;
}
CurrProg = new SMPProgram();
CurrProg->AnalyzeData(); // Analyze static data in the executable
clc5q
committed
// read the Profiler generated information into a new prof_info class
ProfilerInformation *prof_info = new ProfilerInformation(AnnotFileName.c_str(), CurrProg);
clc5q
committed
AnnotFile = SMP_fopen(AnnotFileName.c_str(), "w");
if (NULL == AnnotFile) {
error("FATAL ERROR: Cannot open output file %s\n", AnnotFileName.c_str());
SMP_fclose(STARS_XrefsFile);
SMP_fclose(STARS_CallReturnFile);
delete prof_info;
return;
}
clc5q
committed
InfoAnnotFile = SMP_fopen(InfoAnnotFileName.c_str(), "w");
if (NULL == InfoAnnotFile) {
error("FATAL ERROR: Cannot open output file %s\n", InfoAnnotFileName.c_str());
SMP_fclose(STARS_XrefsFile);
SMP_fclose(STARS_CallReturnFile);
SMP_fclose(AnnotFile);
return;
}
clc5q
committed
ZST_AlarmFile = SMP_fopen(ZSTAlarmFileName.c_str(), "w");
if (NULL == ZST_AlarmFile) {
error("FATAL ERROR: Cannot open security alarms file %s\n", ZSTAlarmFileName.c_str());
SMP_fclose(STARS_XrefsFile);
SMP_fclose(STARS_CallReturnFile);
SMP_fclose(AnnotFile);
SMP_fclose(InfoAnnotFile);
delete prof_info;
return;
}
#if ZST_EMIT_SPARK_ADA_TRANSLATION
ZST_SPARKSourceFile = SMP_fopen(SPARKSourceFileName.c_str(), "w");
if (NULL == ZST_SPARKSourceFile) {
error("FATAL ERROR: Cannot open SPARK-Ada source output file %s\n", SPARKSourceFileName.c_str());
SMP_fclose(STARS_XrefsFile);
SMP_fclose(STARS_CallReturnFile);
SMP_fclose(AnnotFile);
SMP_fclose(InfoAnnotFile);
SMP_fclose(ZST_AlarmFile);
delete prof_info;
return;
}
ZST_SPARKHeaderFile = SMP_fopen(SPARKHeaderFileName.c_str(), "w");
if (NULL == ZST_SPARKHeaderFile) {
error("FATAL ERROR: Cannot open SPARK-Ada header output file %s\n", SPARKHeaderFileName.c_str());
SMP_fclose(STARS_XrefsFile);
SMP_fclose(STARS_CallReturnFile);
SMP_fclose(AnnotFile);
SMP_fclose(InfoAnnotFile);
SMP_fclose(ZST_AlarmFile);
SMP_fclose(ZST_SPARKSourceFile);
delete prof_info;
return;
}
#endif
// Read the Zephyr Security Toolkit system call security policies, if available.
ZST_InitPolicies(ZSTPolicyFileName.c_str());
(void) memset(OptCount, 0, sizeof(OptCount));
(void) memset(AnnotationCount, 0, sizeof(AnnotationCount));
try { // We will catch memory exhaustion errors.
clc5q
committed
// Record the start and end addresses for all function entry
// chunks in the program.
FuncBounds.reserve(10 + get_func_qty());
for (size_t FuncIndex = 0; FuncIndex < SMP_get_func_qty(); ++FuncIndex) {
STARS_Function_t *FuncInfo = SMP_getn_func(FuncIndex);
clc5q
committed
SMP_bounds_t temp;
temp.startEA = FuncInfo->get_startEA();
temp.endEA = FuncInfo->get_endEA();
clc5q
committed
FuncBounds.push_back(temp);
}
#if SMP_DEBUG_DATA_ONLY
clc5q
committed
SMP_fclose(SymsFile);
clc5q
committed
return;
#endif
// Pre-audit the IDA database by seeing if the distinction
// between code and data can be improved, and if all branches
// and calls have proper code targets and code cross references.
#if SMP_FIXUP_IDB
clc5q
committed
#endif
if (0 < prof_info->GetProfilerAnnotationCount()) {
clc5q
committed
SMP_msg("Calling InferDataGranularity\n");
SMP_msg("ptr to MemoryAccessInfo: %p\n", prof_info->GetMemoryAccessInfo());
prof_info->GetMemoryAccessInfo()->InferDataGranularity();
clc5q
committed
SMP_msg("Returned from InferDataGranularity\n");
}
CurrProg->ProfGranularityFinished(AnnotFile, InfoAnnotFile);
CurrProg->Analyze(prof_info, AnnotFile, InfoAnnotFile);
if (!STARS_PerformReducedAnalysis) {
CurrProg->EmitAnnotations(AnnotFile, InfoAnnotFile);
}
clc5q
committed
#if SMP_DEBUG_CODE_ORPHANS
RecentAddr = BADADDR;
for (STARS_Segment_t *seg = SMP_get_first_seg(); NULL != seg; seg = SMP_get_next_seg(RecentAddr)) {
RecentAddr = seg->get_startEA();
if (seg->IsCodeSegment())
Debug_FindOrphanedCode(seg, true);
}
#endif
RecentAddr = BADADDR;
for (STARS_Segment_t *seg = SMP_get_first_seg(); NULL != seg; seg = SMP_get_next_seg(RecentAddr)) {
RecentAddr = seg->get_startEA();
if (seg->IsCodeSegment())
FindOrphanedCode(seg, AnnotFile, InfoAnnotFile);
clc5q
committed
}
clc5q
committed
for (int OptType = 0; OptType <= LAST_OPT_CATEGORY; ++OptType) {
clc5q
committed
SMP_msg("Optimization Category Count %d: %d Annotations: %d\n",
OptType, OptCount[OptType], AnnotationCount[OptType]);
}
clc5q
committed
SMP_fclose(AnnotFile);
SMP_fprintf(InfoAnnotFile, " 8000000 2 SUCCESS ANALYSISCOMPLETED\n");
clc5q
committed
SMP_fclose(InfoAnnotFile);
SMP_fclose(ZST_AlarmFile);
SMP_fclose(STARS_XrefsFile);
SMP_fclose(STARS_CallReturnFile);
#if ZST_EMIT_SPARK_ADA_TRANSLATION
SMP_fclose(ZST_SPARKSourceFile);
SMP_fclose(ZST_SPARKHeaderFile);
if (!STARS_PerformReducedAnalysis) {
clc5q
committed
#if STARS_GENERATE_ASM_FILE
AsmFile = SMP_fopen(AsmFileName.c_str(), "w");
if (NULL == AsmFile) {
error("FATAL ERROR: Cannot open ASM file %s\n", AsmFileName.c_str());
}
else {
int FileResult = gen_file(OFILE_ASM, AsmFile, LowestCodeAddress, 0xffffffff, (GENFLG_MAPSEG | GENFLG_ASMTYPE));
if (0 >= FileResult) {
SMP_msg("ERROR: Could not generate ASM file.\n");
}
SMP_fclose(AsmFile);
#endif
#if STARS_GENERATE_DIF_FILE
DifFile = SMP_fopen(DifFileName.c_str(), "w");
if (NULL == DifFile) {
error("FATAL ERROR: Cannot open DIF file %s\n", DifFileName.c_str());
}
else {
int FileResult = gen_file(OFILE_DIF, DifFile, LowestCodeAddress, BADADDR, 0);
if (0 >= FileResult) {
SMP_msg("ERROR: Could not generate DIF file.\n");
}
SMP_fclose(DifFile);
}
SMP_msg("INFO: Files closed, freeing memory and exiting.\n");
SMP_msg("INFO: Deleted prof_info.\n");
delete CurrProg;
SMP_msg("INFO: Deleted CurrProg. Returning to IDA Pro.\n");
}
catch (std::bad_alloc) {
delete CurrProg;
delete prof_info;
error("FATAL ERROR: Memory exhausted.\n");
SMP_fprintf(InfoAnnotFile, " 8000000 2 ERROR MEMORYEXHAUSTED\n");
SMP_fclose(AnnotFile);
SMP_fclose(InfoAnnotFile);
SMP_fclose(ZST_AlarmFile);
SMP_fclose(STARS_XrefsFile);
return;
}
} // end IDAP_run()
char IDAP_comment[] = "ZephyrSoftware STARS (Static Analyzer for Reliability and Security)";
char IDAP_help[] = "Good luck";
char IDAP_name[] = "SMPStaticAnalyzer";
char IDAP_hotkey[] = "Alt-J";
plugin_t PLUGIN = {
IDP_INTERFACE_VERSION,
IDAP_init,
IDAP_term,
IDAP_run,
IDAP_comment,
IDAP_help,
IDAP_name,
IDAP_hotkey
};
clc5q
committed
// Find all code addresses in the IDA database and enter them into
// IDAProLocs. Find all code addresses identified by the external
// disassembler (e.g. objdump) and enter them into DisasmLocs.
void FindCodeAddresses(void) {
// Read in code addresses as found by an external disassembler.
ea_t CurrDisasmAddr;
string DisasmFileName(RootFileName);
string FileSuffix(".SMPobjdump");
DisasmFileName += FileSuffix;
clc5q
committed
FILE *DisasmFile = SMP_fopen(DisasmFileName.c_str(), "r");
clc5q
committed
if (NULL == DisasmFile) {
error("FATAL: Cannot open input file %s\n", DisasmFileName.c_str());
return;
}
#define DISASM_RESERVE_SIZE 50000
DisasmLocs.reserve(DISASM_RESERVE_SIZE);
unsigned long TempAddr;
int ScanReturn = qfscanf(DisasmFile, "%lx", &TempAddr);
CurrDisasmAddr = (ea_t) TempAddr;
clc5q
committed
while (1 == ScanReturn) {
int NextChar;
DisasmLocs.push_back(CurrDisasmAddr);
// Swallow the rest of the input line and get the next address.
do {
NextChar = qfgetc(DisasmFile);
} while ((EOF != NextChar) && ('\n' != NextChar));
ScanReturn = qfscanf(DisasmFile, "%lx", &TempAddr);
CurrDisasmAddr = (ea_t) TempAddr;
clc5q
committed
} // end while (1 == ScanReturn)
if (0 >= DisasmLocs.size()) {
clc5q
committed
SMP_msg("ERROR: No addresses read from %s\n", DisasmFileName.c_str());
SMP_fclose(DisasmFile);
clc5q
committed
return;
}
else {
clc5q
committed
SMP_msg("%zu Disasm addresses read from %s\n", DisasmLocs.size(),
clc5q
committed
DisasmFileName.c_str());
clc5q
committed
SMP_fclose(DisasmFile);
clc5q
committed
}
// Find all the code locs in the IDA Pro database. As we find
// them, store them in IDAProLocs.
for (STARS_Segment_t *seg = SMP_get_first_seg(); NULL != seg; seg = SMP_get_next_seg(RecentAddr)) {
RecentAddr = seg->get_startEA();
if (!seg->IsCodeSegment())
clc5q
committed
continue;
for (ea_t addr = seg->get_startEA(); addr < seg->get_endEA(); addr = get_item_end(addr)) {
clc5q
committed
flags_t InstrFlags = getFlags(addr);
if (isHead(InstrFlags) && isCode(InstrFlags)) {
IDAProLocs.push_back(addr);
clc5q
committed
if ((0x806cda4 <= addr) && (0x806cf99 >= addr))
clc5q
committed
SMP_msg("IDA code addr: %x\n", addr);
clc5q
committed
} // end if (isHead(addr) && isCode(addr)
#if SMP_DEBUG_FIXUP_IDB
else if ((0x806cda4 <= addr) && (0x806cf99 >= addr)) {
if (!isHead(InstrFlags))
clc5q
committed
SMP_msg("Weirdness: not isHead at %x\n", addr);
clc5q
committed
if (isUnknown(InstrFlags)) {
clc5q
committed
SMP_msg("Weirdness: isUnknown at %x\n", addr);
clc5q
committed
}
}
#endif
} // end for (ea_t addr = seg->startEA; ...)
clc5q
committed
return;
} // end FindCodeAddresses()
// Return true if addr is not a proper beginning address for an instruction.
// Return false otherwise.
// Currently, we claim that an instruction is misaligned if DisasmLocs does
// not contain it. This function is useful for dealing with errors in IDA
// code identification, in which a large code section is identified as data,
// but some instructions in the middle of the "data" are identified as
// code but IDA often starts on the wrong boundary in these cases.
bool IsCodeMisaligned(ea_t addr) {
// Do a binary search for addr within DisasmLocs, which is sorted
// in ascending address order because of the way in which it was
// generated.
size_t min = 0;
size_t max = DisasmLocs.size(); // don't access DisasmLocs[max]
size_t index = (min + max) / 2;
while (addr != DisasmLocs[index]) {
if (min >= (max - 1))
return true;
#if 0
clc5q
committed
SMP_msg("min: %d max: %d index: %d\n", min, max, index);
clc5q
committed
#endif
if (addr < DisasmLocs[index])
max = index;
else // must be addr > DisasmLocs[index];
min = index;
index = (min + max) / 2;
}
return false;
} // end of IsCodeMisaligned()
void RemoveIDACodeAddr(ea_t addr) {
// Do a binary search for addr within IDAProLocs, which is sorted
// in ascending address order because of the way in which it was
// generated. Delete the element of IDAProLocs if found.
size_t min = 0;
size_t max = IDAProLocs.size(); // don't access IDAProLocs[max]
size_t index = (min + max) / 2;
while (addr != IDAProLocs[index]) {
if (min >= (max - 1))
return;
#if 0
clc5q
committed
SMP_msg("min: %d max: %d index: %d\n", min, max, index);
clc5q
committed
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
#endif
if (addr < IDAProLocs[index])
max = index;
else // must be addr > IDAProLocs[index];
min = index;
index = (min + max) / 2;
}
// IDAProLocs[index] contains addr.
vector<ea_t>::iterator RemovalIterator = IDAProLocs.begin();
RemovalIterator += index;
RemovalIterator = IDAProLocs.erase(RemovalIterator);
return;
} // end of RemoveIDACodeAddr()
// Driver for all other fixing functions. Upon its return, the IDA
// database (IDB file) should be fixed up as much as we can fix it.
void FixupIDB(void) {
FindCodeAddresses();
#if SMP_DEBUG_FIXUP_IDB
SpecialDebugOutput();
#endif
AuditCodeTargets();
FindDataInCode();
AuditTailChunkOwnership();
if (DisasmLocs.size() > 0) {
FixCodeIdentification();
int fixes = FixupNewCodeChunks();
#if SMP_DEBUG_FIXUP_IDB
#endif
}
DisasmLocs.clear();
IDAProLocs.clear();
clc5q
committed
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
} // end of FixupIDB()
// Find and print all data head addresses in code segments.
// If an isolated code instruction is found in the midst of a run
// of data bytes and has no code xrefs jumping to it, it is not
// reachable as code and is undoubtedly a mixup by IDA. Possibly
// the whole data region will be converted to code later, in which
// case the isolated code is not necessarily properly aligned and
// parsed at its present address, so we are glad to convert it into
// data anyway so that FindDataToConvert() will succeed on it later.
// Data to code conversion, and isolated code detection, are inhibited
// by IDA identifying several consecutive instructions in the midst
// of a data region, with the code addresses not agreeing with the
// external disassembler's code addresses. We will convert these
// misaligned instructions to data as we detect them. We will also
// convert unexplored bytes (isUnknown(flags) == true) into data if
// they are in the midst of a data sequence.
#define MIN_DATARUN_LEN 24 // #bytes on either side of "isolated" code
void FindDataInCode(void) {
size_t DataRunLen = 0; // How many data bytes in a row have we seen?
bool IsolatedCodeTrigger = false; // Have seen data, then isolated code
// Now looking for data
ea_t IsolatedCodeAddr;
int IsolatedCodeLen;
int InstrLen;
clc5q
committed
bool InstOK;
insn_t LocalCmd;
uint32 LocalFeatures;
clc5q
committed
for (STARS_Segment_t *seg = SMP_get_first_seg(); NULL != seg; seg = SMP_get_next_seg(RecentAddr)) {
RecentAddr = seg->get_startEA();
if (!seg->IsCodeSegment())
clc5q
committed
continue;
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
char SegName[MAXSTR];
clc5q
committed
ssize_t SegNameSize = SMP_get_segm_name(seg, SegName, sizeof(SegName) - 1);
SMP_msg("Non-code addresses for code segment %s from %x to %x\n",
clc5q
committed
SegName, seg->startEA, seg->endEA);
#endif
for (ea_t addr = seg->get_startEA(); addr < seg->get_endEA(); addr = get_item_end(addr)) {
clc5q
committed
flags_t AddrFlags = getFlags(addr);
if (isHead(AddrFlags)) {
if (isData(AddrFlags)) {
DataRunLen += get_item_size(addr);
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("Data: %x\n", addr);
clc5q
committed
#endif
if (MIN_DATARUN_LEN <= DataRunLen) {
if (IsolatedCodeTrigger) {
// Saw data, then one isolated code, then data
do_unknown_range(IsolatedCodeAddr, IsolatedCodeLen, DOUNK_SIMPLE);
RemoveIDACodeAddr(IsolatedCodeAddr);
if (do_data_ex(IsolatedCodeAddr, byteflag(),
IsolatedCodeLen, BADNODE)) {
SMP_msg("Converted isolated code to data: %lx\n",
(unsigned long) IsolatedCodeAddr);
clc5q
committed
}
else {
SMP_msg("Failed to convert isolated code to data: %lx len: %x\n",
(unsigned long) IsolatedCodeAddr, IsolatedCodeLen);
clc5q
committed
}
IsolatedCodeTrigger = false;
} // end if (IsolatedCodeTrigger)
} // end if (MIN_DATARUN_LEN <= DataRunLen)
} // end if (isData(AddrFlags)
else if (isUnknown(AddrFlags)) {
// Just in case; unknown usually means not head or tail
// If in a data run, convert to data.
InstrLen = get_item_size(addr);
clc5q
committed
SMP_msg("Unknown: %x len: %x\n", addr, InstrLen);
clc5q
committed
if (0 < DataRunLen) {
if (do_data_ex(addr, byteflag(), InstrLen, BADNODE)) {
clc5q
committed
SMP_msg("Converted unknown to data at %x len: %x\n", addr, InstrLen);
clc5q
committed
DataRunLen += InstrLen;
}
else {
clc5q
committed
SMP_msg("Failed to convert unknown to data at %x len: %x\n", addr, InstrLen);
clc5q
committed
DataRunLen = 0;
IsolatedCodeTrigger = false;
}
}
}
else if (isCode(AddrFlags)) { // must be true
if (MIN_DATARUN_LEN <= DataRunLen) {
clc5q
committed
SMP_msg("DataRunLen: %d at %x\n", DataRunLen, addr);
clc5q
committed
InstOK = SMPGetCmd(addr, LocalCmd, LocalFeatures);
assert(InstOK);
InstrLen = (int) LocalCmd.size;
// We don't check the returned InstrLen for validity because IsCodeMisaligned()
// will check for validity immediately below.
clc5q
committed
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("Calling IsCodeMisaligned: len %d\n", InstrLen);
clc5q
committed
#endif
if (IsCodeMisaligned(addr)) {
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("Code was misaligned.\n");
clc5q
committed
#endif
do_unknown_range(addr, InstrLen, DOUNK_SIMPLE);
RemoveIDACodeAddr(addr);
if (do_data_ex(addr, byteflag(), InstrLen, BADNODE)) {
clc5q
committed
SMP_msg("Converted misaligned code to data at %x : len: %x\n",
clc5q
committed
addr, InstrLen);
clc5q
committed
// Step back so data gets processed.
DataRunLen += get_item_size(addr);
continue; // skip reset of DataRunLen
}
else {
clc5q
committed
SMP_msg("Misaligned code left as unknown at %x : len: %x\n",
clc5q
committed
addr, InstrLen);
clc5q
committed
IsolatedCodeTrigger = false;
}
} // end if (IsCodeMisaligned() ...)
else if (!hasRef(AddrFlags)) {
// No references at all --> isolated code.
IsolatedCodeTrigger = true;
IsolatedCodeAddr = addr;
IsolatedCodeLen = InstrLen;
}
else {
clc5q
committed
SMP_xref_t xb;
bool ok = xb.SMP_first_to(addr, XREF_ALL);
clc5q
committed
if (!ok) {
// No code xrefs to this target addr.
IsolatedCodeTrigger = true;
IsolatedCodeAddr = addr;
IsolatedCodeLen = InstrLen;
}
}
} // end if (MIN_DATARUN_LEN <= DataRunLen)
else if (IsolatedCodeTrigger) {
// Two instructions in a row does not fit the pattern.
IsolatedCodeTrigger = false;
}
DataRunLen = 0;
} // end if (isData) ... else if (isUnknown) ... else isCode
} // end if (isHead)
else if (isUnknown(AddrFlags)) {
// If in a data run, convert to data.
InstrLen = get_item_size(addr);
clc5q
committed
SMP_msg("Unknown: %x len: %x\n", addr, InstrLen);
clc5q
committed
if (0 < DataRunLen) {
if (do_data_ex(addr, byteflag(), InstrLen, BADNODE)) {
clc5q
committed
SMP_msg("Converted unknown to data at %x len: %x\n", addr, InstrLen);
clc5q
committed
DataRunLen += InstrLen;
}
else {
clc5q
committed
SMP_msg("Failed to convert unknown to data at %x len: %x\n", addr, InstrLen);
clc5q
committed
DataRunLen = 0;
IsolatedCodeTrigger = false;
}
}
}
} // end for (ea_t addr = seg->startEA; ...)
clc5q
committed
return;
} // end of FindDataInCode()
// The choices that IDA makes for deciding which parent function of a
// TAIL chunk is the primary owner of the tail can be counterintuitive.
// A function entry can both fall into and jump to a tail chunk that
// is contiguous with it, yet the "owner" might be a function that is
// far below it in the executable address space. This function will
// change the ownership to a more sensible arrangement.
void AuditTailChunkOwnership(void) {
clc5q
committed
} // end of AuditTailChunkOwnership()
// If the addresses signified from DisasmIndex to IDAProIndex are
// all considered data and do NOT follow a return instruction,
// return false and update AreaSize to reflect the area to be
// converted.
// Return value: true -> skip to IDAProIndex; false -> convert AreaSize bytes.
bool FindDataToConvert(size_t IDAProIndex, size_t DisasmIndex, int &AreaSize) {
ea_t PrevIDAAddr;
ea_t NextIDAAddr;
size_t ShadowDisasmIndex = DisasmIndex - 1;
ea_t DisasmAddr = DisasmLocs[ShadowDisasmIndex];
bool CannotConvert = false; // return value
bool DebugAddress = false;
#if SMP_DEBUG_FIXUP_IDB
DebugAddress = (DisasmAddr == 0x806c19a);
#endif
if (DebugAddress) {
clc5q
committed
SMP_msg("IDAProIndex: %zu DisasmIndex: %zu\n", IDAProIndex, DisasmIndex);
SMP_msg("IDA locs size %zu Disasm locs size %zu\n", IDAProLocs.size(),
clc5q
committed
DisasmLocs.size());
}
if (IDAProIndex >= IDAProLocs.size()) {
// Have already processed the last IDA address.
clc5q
committed
if (DebugAddress) SMP_msg(" Already done with IDAProLocs.\n");
clc5q
committed
return true;
}
else if (DisasmIndex >= DisasmLocs.size()) {
// Strange. Last Disasm address is only one to convert, and
// IDA still has addresses after that?
clc5q
committed
if (DebugAddress) SMP_msg(" Already done with DisasmLocs.\n");
clc5q
committed
return true;
}
else if (IDAProIndex < 2) {
// We have Disasm addrs before the very first IDA addr. We
// don't trust this boundary case.
clc5q
committed
if (DebugAddress) SMP_msg(" Boundary case with IDAProLocs.\n");
clc5q
committed
return true;
}
NextIDAAddr = IDAProLocs[IDAProIndex - 1];
PrevIDAAddr = IDAProLocs[IDAProIndex - 2];
if (DebugAddress) SMP_msg(" PrevIDAAddr: %lx NextIDAAddr: %lx\n", (unsigned long) PrevIDAAddr, (unsigned long) NextIDAAddr);
clc5q
committed
// See if previous IDA address was a return.
flags_t PrevFlags = getFlags(PrevIDAAddr);
if (!isCode(PrevFlags) || !isHead(PrevFlags)) {
SMP_msg("PrevIDAAddr %lx not isCode or not isHead.\n", (unsigned long) PrevIDAAddr);
clc5q
committed
return true;
}
SMPInstr PrevInstr(PrevIDAAddr);
PrevInstr.Analyze();
clc5q
committed
if (DebugAddress) SMP_msg("Finished PrevInstr.Analyze()\n");
clc5q
committed
if (PrevInstr.MDIsReturnInstr()) {
// Right after a return come no-ops and 2-byte no-ops
// that are just for alignment. IDA does not seem to be
// happy when we convert all those to code.
clc5q
committed
if (DebugAddress) SMP_msg(" Data followed a return instruction.\n");
clc5q
committed
return true;
}
// Now, see if the area from DisasmAddr to NextIDAAddr is all data
// according to IDA.
while (DisasmAddr < NextIDAAddr) {
flags_t DataFlags = getFlags(DisasmAddr);
if (isTail(DataFlags)) {
if (DebugAddress) SMP_msg(" tail byte: %lx\n", (unsigned long) DisasmAddr);
clc5q
committed
DisasmAddr = get_item_end(DisasmAddr);
}
else if (isData(DataFlags)) {
if (DebugAddress) SMP_msg(" data byte: %lx\n", (unsigned long) DisasmAddr);
clc5q
committed
DisasmAddr = get_item_end(DisasmAddr);
}
else if (isCode(DataFlags)) {
// How could this ever happen?
if (DebugAddress) SMP_msg(" isCode: %lx\n", (unsigned long) DisasmAddr);
clc5q
committed
return true;
}
else { // must be isUnknown()
// Very conservative here; only want to convert when the whole
// region is data, because that is a symptom of IDA missing
// a piece of code within a function (usually a piece of code
// that is only reachable via an indirect jump).
if (DebugAddress) SMP_msg(" Not isData: %lx\n", (unsigned long) DisasmAddr);
clc5q
committed
return true;
}
if (DebugAddress) SMP_msg(" new DisasmAddr: %lx\n", (unsigned long) DisasmAddr);
clc5q
committed
} // end while (DisasmAddr < NextIDAAddr)
clc5q
committed
if (DebugAddress) SMP_msg(" loop exit CannotConvert: %d\n", CannotConvert);
clc5q
committed
if (!CannotConvert) {
// Success.
DisasmAddr = DisasmLocs[ShadowDisasmIndex];
AreaSize = NextIDAAddr - DisasmAddr;
if (DebugAddress) {
clc5q
committed
SMP_msg(" Success! AreaSize: %x Old index: %zu new index: %zu\n",
clc5q
committed
AreaSize, ShadowDisasmIndex, DisasmIndex);
clc5q
committed
SMP_msg(" exiting FindDataToConvert()\n");
SMP_msg("\n");
clc5q
committed
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
}
} // end if (!CannotConvert)
return CannotConvert;
} // end of FindDataToConvert()
// Does a converted code region look like a function prologue? If so,
// we should not include it in the previous function.
bool IsFunctionPrologue(ea_t StartAddr, ea_t EndAddr) {
return false; // **!!** TODO
} // end of IsFunctionPrologue()
// Patch program bytes that could not be converted from
// data to code, if it can be determined that the bytes represent code
// that IDA has a hard time with.
// Currently limited to finding "call near ptr 0" instructions, which
// often are found in optimized glibc code because gcc was able to
// determine that a function pointer was zero and did constant propagation,
// but unfortunately was not able to determine that the code was unreachable.
// IDA will not succeed in ua_code() for "call 0", but there is no danger
// of a working program ever executing this code. Replacing the call with
// no-ops permits us to continue converting a contiguous range of data to
// code, and permits IDA to reanalyze the function later.
// Returns true if program bytes were patched.
bool MDPatchUnconvertedBytes(ea_t CurrDisasmAddr) {
flags_t AddrFlags = getFlags(CurrDisasmAddr);
if (isData(AddrFlags) || isTail(AddrFlags)) {
// Bytes should have been converted to unknown already.
clc5q
committed
SMP_msg("Cannot patch data bytes or tail bytes at %x\n", CurrDisasmAddr);
clc5q
committed
return false;
}
SMPInstr PatchInstr(CurrDisasmAddr);
PatchInstr.Analyze();
int InstrLen = PatchInstr.GetCmd().size;
if (0 >= InstrLen) {
clc5q
committed
SMP_msg("decode_insn() failed on patch location %x\n", CurrDisasmAddr);
clc5q
committed
return false;
}
else {
if (PatchInstr.GetCmd().itype != NN_call) {
clc5q
committed
SMP_msg("Cannot patch non-call instruction at %x\n", CurrDisasmAddr);
clc5q
committed
return false;
}
PatchInstr.PrintOperands();
STARSOpndType CallDest = PatchInstr.GetFirstUse()->GetOp();
clc5q
committed
if ((o_near != CallDest.type) || (0 != CallDest.addr)) {
clc5q
committed
SMP_msg("Cannot patch call unless it is call near ptr 0 at %x",
clc5q
committed
CurrDisasmAddr);
clc5q
committed
return false;
}
ea_t PatchAddr = CurrDisasmAddr;
for (int i = 0; i < InstrLen; ++i) {
bool ok = patch_byte(PatchAddr, 0x90); // x86 no-op
if (!ok) {
clc5q
committed
SMP_msg("patch_byte() failed at %x\n", PatchAddr);
clc5q
committed
return false;
}
++PatchAddr;
}
clc5q
committed
SMP_msg("Patched %d bytes successfully at %x\n", InstrLen, CurrDisasmAddr);
#if IDA_SDK_VERSION < 600
InstrLen = ua_code(CurrDisasmAddr);
#else
InstrLen = create_insn(CurrDisasmAddr);
clc5q
committed
if (0 >= InstrLen) {
clc5q
committed
SMP_msg(" ... but ua_code() still failed!\n");
clc5q
committed
return false;
}
} // end if (0 >= InstrLen) ... else ...
return true;
} // end of MDPatchUnconvertedBytes()
// Use the lists of code addresses identified by IDA Pro (in IDAProLocs)
clc5q
committed
// and an external disassembler (in DisasmLocs). Compare the lists and
// try to convert addresses to code that are found in DisasmLocs but
// not in IDAProLocs. Emit warnings when IDAProLocs has a code address
// not found in DisasmLocs.
void FixCodeIdentification(void) {
size_t DisasmIndex = 0;
ea_t CurrDisasmAddr = DisasmLocs[DisasmIndex++];
size_t IDAProIndex = 0;
ea_t CurrAddr = IDAProLocs[IDAProIndex++];
while (DisasmIndex <= DisasmLocs.size()) {
// If the current address is less than the current
// external disasm address, we have the rare case in
// which IDA Pro has identified an address as code
// but the external disasm has not. Emit a warning
// message and go on to the next IDA address.
if (CurrAddr < CurrDisasmAddr) {
SMPInstr TempInstr(CurrAddr);
TempInstr.Analyze();
SMP_msg("AUDIT: Address %lx is code in IDB but not in external disassembler: %s\n",
(unsigned long) CurrAddr, TempInstr.GetDisasm());
clc5q
committed
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
if (IDAProIndex < IDAProLocs.size())
CurrAddr = IDAProLocs[IDAProIndex++];
else {
// Last IDA addr; might still process Disasm addrs
// after loop exit.
break;
}
}
else if (CurrAddr == CurrDisasmAddr) {
// If equal, no problem, we are moving through the
// code addresses in lockstep. Grab the next address
// from each source.
if (DisasmIndex < DisasmLocs.size()) {
CurrDisasmAddr = DisasmLocs[DisasmIndex++];
}
else {
++DisasmIndex; // cause loop exit; skip cleanup loop
}
if (IDAProIndex < IDAProLocs.size())
CurrAddr = IDAProLocs[IDAProIndex++];
else {
// Last IDA addr; might still process Disasm addrs
// after loop exit in cleanup loop.
break;
}
}
else {
// We must have CurrAddr > CurrDisasmAddr. That means
// IDA has jumped over some code addresses in
// DisasmLocs. We need to try to convert addresses
// to code until we can reach the current addr.
// For now, we will address only the case in which IDA
// has identified addresses as data bytes, and the
// external disassembler(e.g. objdump) has identified
// the same addresses as code. We only want to deal with
// contiguous areas of data-to-code conversion that do NOT
// follow a return statement.
int AreaSize = 0;
ea_t AreaStart = CurrDisasmAddr;
ea_t AreaEnd;
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("CurrDisasmAddr: %x CurrAddr: %x\n", CurrDisasmAddr, CurrAddr);
clc5q
committed
#endif
bool SkipArea = FindDataToConvert(IDAProIndex, DisasmIndex, AreaSize);
if (SkipArea) {
// Skip over the extra external disasm addresses.
while (CurrDisasmAddr < CurrAddr)
CurrDisasmAddr = DisasmLocs[DisasmIndex++];
}
else {
// Convert the overlooked code region to unexplored.
AreaEnd = CurrDisasmAddr + AreaSize;
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("Found data to convert: %x to %x\n", AreaStart, AreaEnd);
clc5q
committed
#endif
do_unknown_range(AreaStart, AreaSize, DOUNK_SIMPLE);
SMP_bounds_t ConvertRegion;
ConvertRegion.startEA = AreaStart;
ConvertRegion.endEA = AreaEnd;
FixupRegion CurrRegion(ConvertRegion);
CodeReanalyzeList.push_back(CurrRegion);
clc5q
committed
bool AllConverted = true;
bool AllNops = true;
clc5q
committed
do {
flags_t InstrFlags = getFlags(CurrDisasmAddr);
if (!isUnknown(InstrFlags)) {
SMP_msg("Sync problem in FixCodeID: %lx\n", (unsigned long) CurrDisasmAddr);
clc5q
committed
}
else {
int InstrLen = ua_code(CurrDisasmAddr);
int InstrLen = create_insn(CurrDisasmAddr);
clc5q
committed
if (InstrLen > 0) { // Successfully converted to code
SMPInstr NewInstr(CurrDisasmAddr);
NewInstr.Analyze();
if (!NewInstr.IsNop())
AllNops = false;
clc5q
committed
#if SMP_DEBUG_FIXUP_IDB
#if 0
clc5q
committed
SMP_msg("FixCodeID success at %x: len: %d %s\n", CurrDisasmAddr,
clc5q
committed
InstrLen, NewInstr.GetDisasm());
#endif
clc5q
committed
#endif
}
else {
if (MDPatchUnconvertedBytes(CurrDisasmAddr)) {
clc5q
committed
SMP_msg(" Patched bytes at %x\n", CurrDisasmAddr);
clc5q
committed
}
else {
CurrRegion.FixupInstrs.push_back(CurrDisasmAddr);
clc5q
committed
AllConverted = false;
clc5q
committed
SMP_msg("FixCodeID failure at %x\n", CurrDisasmAddr);
clc5q
committed
}
}
} // end if (isCode(InstrFlags) ... else ...
if (DisasmIndex < DisasmLocs.size()) {
CurrDisasmAddr = DisasmLocs[DisasmIndex++];
}
else {
// cause loops to exit
CurrDisasmAddr = CurrAddr;
++DisasmIndex; // skip cleanup loop
}
} while (CurrDisasmAddr < CurrAddr);
if (AllConverted && AllNops) {
// We want to convert the region back to unexplored bytes
// and take it off the work list. Regions that are all nops
// create data flow analysis problems sometimes. The region
// is often unreachable code and produces a basic block with
// no predecessors within a function. This often happens when
// an optimizing compiler uses nops as padding to align jump
// targets on cache line bounaries. With no fall through into
// the nops, they are unreachable and should be left as unknown.
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("FixCodeID nops region from %x to %x\n", CurrRegion.GetStart(),
CurrRegion.GetEnd());
do_unknown_range(CurrRegion.GetStart(),
CurrRegion.GetEnd() - CurrRegion.GetStart(), DOUNK_SIMPLE);
CodeReanalyzeList.pop_back();
}
clc5q
committed
} // end if (SkipArea) ... else ...
} // end if (addr < CurrDisasmAddr) .. else if ... else ...
} // end while (DisasmIndex <= DisasmLocs.size()
#if 0 // Make this code use FindDataToConvert() **!!**
// Cleanup loop:
// If there are still Disasm addrs to process, try to turn them
// into code in the IDB.
while (DisasmIndex <= DisasmLocs.size()) {
flags_t InstrFlags = getFlags(CurrDisasmAddr);
if (isCode(InstrFlags)) {
clc5q
committed
SMP_msg("Sync problem in FixCodeID: %x\n", CurrDisasmAddr);
clc5q
committed
}
else {
// Clear bytes to unexplored.
clc5q
committed
segment_t *seg = SMP_getseg(CurrDisasmAddr);
clc5q
committed
if (SEG_CODE == seg->type) {
do_unknown_range(CurrDisasmAddr, seg->endEA - CurrDisasmAddr, DOUNK_SIMPLE);
}
else {
// Might be safest to just discontinue processing
// if we wander into a non-code segment.
// DisasmLocs should not have an entire code segment
// that IDA Pro missed.
break;
}
int InstrLen = ua_code(CurrDisasmAddr);
if (InstrLen > 0) { // Successfully converted to code
SMPInstr NewInstr(CurrDisasmAddr);
NewInstr.Analyze();
clc5q
committed
SMP_msg("FixCodeID success at %x: %s\n", CurrDisasmAddr,
clc5q
committed
NewInstr.GetDisasm());
}
else {
clc5q
committed
SMP_msg("FixCodeID failure at %x\n", CurrDisasmAddr);
clc5q
committed
}
} // end if (isCode(InstrFlags) ... else ...
if (DisasmIndex < DisasmLocs.size()) {
CurrDisasmAddr = DisasmLocs[DisasmIndex++];
}
else {
++DisasmIndex; // cause loop to exit
}
} // end while (DisasmIndex <= DisasmLocs.size()
#endif
return;
} // end of FixCodeIdentification()
// Analyze instructions that could not be analyzed earlier and were placed on the CodeReanalyzeList.
// Earlier failures are usually because the instruction branches to an address that has not
// yet been converted from data to code, so ua_code() failed. Now that all data to code
// conversions have completed, ua_code() should succeed.
// Return the number of instructions successfully analyzed.
int FixupNewCodeChunks(void) {
list<FixupRegion>::iterator CurrRegion;
int changes = 0;
for (CurrRegion = CodeReanalyzeList.begin(); CurrRegion != CodeReanalyzeList.end(); ++CurrRegion) {
bool AllConverted = true;
bool AllNops = true;
bool NoFixups = (0 == CurrRegion->FixupInstrs.size());
if (NoFixups) {
CurrRegion->SetStart(BADADDR); // mark for removal
continue; // skip to next region
}
list<ea_t>::iterator CurrInstr;
for (CurrInstr = CurrRegion->FixupInstrs.begin(); CurrInstr != CurrRegion->FixupInstrs.end(); ++CurrInstr) {
#if IDA_SDK_VERSION < 600
int InstrLen = ua_code(*CurrInstr);
#else
int InstrLen = create_insn(*CurrInstr);
if (InstrLen > 0) { // Successfully converted to code
SMPInstr NewInstr(*CurrInstr);
NewInstr.Analyze();
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("FixupNewCodeChunks success at %x: len: %d\n", *CurrInstr, InstrLen);
#endif
if (!NewInstr.IsNop()) {
AllNops = false;
*CurrInstr = BADADDR; // mark for removal
}
}
else {
AllConverted = false;
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("FixupNewCodeChunks failure at %x\n", *CurrInstr);
#endif
}
} // end for all instrs in CurrRegion
if (AllConverted && !AllNops) {
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("FixupNewCodeChunks success for region from %x to %x\n",
CurrRegion->GetStart(), CurrRegion->GetEnd());
#endif
CurrRegion->SetStart(BADADDR); // mark for removal
}
else if (AllConverted && AllNops) {
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("FixupNewCodeChunks re-converting nops region from %x to %x\n",
CurrRegion->GetStart(), CurrRegion->GetEnd());
#endif
do_unknown_range(CurrRegion->GetStart(),
CurrRegion->GetEnd() - CurrRegion->GetStart(), DOUNK_SIMPLE);
CurrRegion->SetStart(BADADDR); // mark for removal
}
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
else {
// Remove only the instructions that were fixed up.
CurrInstr = CurrRegion->FixupInstrs.begin();
while (CurrInstr != CurrRegion->FixupInstrs.end()) {
if (BADADDR == *CurrInstr) {
CurrInstr = CurrRegion->FixupInstrs.erase(CurrInstr);
}
else {
++CurrInstr;
}
}
}
} // end for all regions in the CodeReanalyzeList
// Remove completed regions from the CodeReanalyzeList
CurrRegion = CodeReanalyzeList.begin();
while (CurrRegion != CodeReanalyzeList.end()) {
if (BADADDR == CurrRegion->GetStart())
CurrRegion = CodeReanalyzeList.erase(CurrRegion);
else
++CurrRegion;
}
#if 0
if (AllConverted) {
if (IsFunctionPrologue(AreaStart, AreaEnd)) {
// Create a new function entry chunk here.
// **!!** TODO
;
}
else {
// Extend the previous chunk to include the
// converted code.
ea_t PrevIDAAddr = IDAProLocs[IDAProIndex - 2];
STARS_Function_t *PrevChunk = get_fchunk(PrevIDAAddr);
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg(" addr in chunk to extend: %x\n", PrevIDAAddr);
SMP_msg(" STARS_Function_t pointer for chunk: %x\n", PrevChunk);
#endif
#if 0 // temporary for debugging
if (is_func_entry(PrevChunk)) {
// Extend the func entry to contain the new code.
if (func_setend(PrevIDAAddr, AreaEnd)) {
clc5q
committed
SMP_msg("Func extended to include code from %x to %x\n",
AreaStart, AreaEnd);
FuncReanalyzeList.push_back(PrevIDAAddr);
}
else {
clc5q
committed
SMP_msg("Failed to extend func from %x to %x\n",
AreaStart, AreaEnd);
}
}
else { // tail
// See if this works for function tails, also.
// Extend the func entry to contain the new code.
if (func_setend(PrevIDAAddr, AreaEnd)) {
clc5q
committed
SMP_msg("Tail extended to include code from %x to %x\n",
AreaStart, AreaEnd);
STARS_Function_t *TailOwner = get_func(PrevChunk->owner);
FuncReanalyzeList.push_back(PrevIDAAddr);
}
else {
clc5q
committed
SMP_msg("Failed to extend tail from %x to %x\n",
AreaStart, AreaEnd);
}
} // end if (is_func_entry()) ... else ...
#endif
} // end if (IsFunctionPrologue()) ... else ...
} // end if (AllConverted)
else {
clc5q
committed
SMP_msg("not AllConverted; cannot include new code in previous chunk.\n");
}
#endif
return changes;
} // end of FixupNewCodeChunnks()
// Audit the IDA code database by looking at all instructions in the
// code segment and printing all those that are not contained in a
// function. Emit the context-free annotations that we are able to
// emit on a per-instruction basis.
void FindOrphanedCode(STARS_Segment_t *CurrSeg, FILE *AnnotFile, FILE *InfoAnnotFile) {
for (ea_t addr = CurrSeg->get_startEA(); addr < CurrSeg->get_endEA();
flags_t InstrFlags = getFlags(addr);
clc5q
committed
if (isTail(InstrFlags))
continue;
if (isHead(InstrFlags) && isCode(InstrFlags)) {
ea_t FirstFuncAddr;
if (!(CurrProg->IsInstAddrStillInFunction(addr, FirstFuncAddr))) {
SMPInstr CurrInst(addr);
CurrInst.Analyze();
clc5q
committed
SMP_msg("Orphan code at %x : %s\n", addr, CurrInst.GetDisasm());
// TODO: If there are code xrefs to the orphan code,
// see what kind. If a CALL, and orphan code looks
// like a prologue, make a function. If a JUMP of
// some kind, then make a function chunk and make
// it a tail of all functions that jump to it. **!!**
clc5q
committed
// Do machine-dependent fixes for DEF and USE lists.
// The fixes can help produce better annotations.
CurrInst.MDFixupDefUseLists();
// If instruction is still not included in a code chunk,
// emit annotations for it in isolation.
CurrInst.EmitAnnotations(true, false, true, AnnotFile, InfoAnnotFile, CurrProg);
// If instruction is an indirect branch, emit an XREF
// annotation for each of its targets.
SMPitype CurrDataFlow = CurrInst.GetDataFlowType();
if ((CurrDataFlow == INDIR_JUMP) || (CurrDataFlow == INDIR_CALL)) {
SMP_xref_t xrefs;
for (bool ok = xrefs.SMP_first_from(addr, XREF_ALL); ok; ok = xrefs.SMP_next_from()) {
if (xrefs.GetTo() != 0) {
if (xrefs.GetIscode() && (xrefs.GetType() != fl_F)) {
// Found a code target, with its address in xrefs.to
PrintCodeToCodeXref(addr, xrefs.GetTo(), CurrInst.GetSize());
}
}
}
}
}
}
else if (isUnknown(InstrFlags)) {
clc5q
committed
SMP_msg("Unanalyzed byte at %x\n", addr);
// Can IDA analyze this to be code?
int InstrLen;
#if IDA_SDK_VERSION < 600
InstrLen = ua_code(addr);
#else
InstrLen = create_insn(addr);
#endif
bool IDAsuccess = generate_disasm_line(addr, disasm, sizeof(disasm) - 1);
if (IDAsuccess) {
// Remove interactive color-coding tags.
ssize_t StringLen = tag_remove(disasm, disasm, 0);
if (-1 >= StringLen) {
SMP_msg("ERROR: tag_remove failed at addr %lx \n", (unsigned long) addr);
clc5q
committed
SMP_msg("Successfully analyzed! %s\n", disasm);
SMPInstr UnknownInstr(addr);
UnknownInstr.Analyze();
// TODO: Get new code into a chunk. **!!**
// If instruction is still not included in a code chunk,
// emit annotations for it in isolation.
UnknownInstr.EmitAnnotations(true, false, true, AnnotFile, InfoAnnotFile, CurrProg);
SMP_msg("ERROR: generate_disasm_line failed at addr %lx \n", (unsigned long) addr);
}
}
} // end for (ea_t addr = CurrSeg->startEA; ...)
} // end of FindOrphanedCode()
// Version of FindOrphanedCode that does not emit annotations but can be used
// to determine at what point in time code becomes orphaned.
void Debug_FindOrphanedCode(STARS_Segment_t *CurrSeg, bool FirstRun) {
ea_t DebugAddr = 0x8050db0;
for (ea_t addr = CurrSeg->get_startEA(); addr < CurrSeg->get_endEA();
addr = get_item_end(addr)) {
flags_t InstrFlags = getFlags(addr);
if (isHead(InstrFlags) && isCode(InstrFlags)) {
STARS_Function_t *CurrFunc = SMP_get_func(addr);
if (NULL == CurrFunc) { // Code not in a func; orphaned
pair<set<ea_t>::iterator, bool> pairib;
pairib = CodeOrphans.insert(addr);
if (DebugAddr == addr) {
SMP_msg("DEBUG: Orphaned code addr %lx found.\n", (unsigned long) addr);
}
if ((!FirstRun) && (pairib.second)) {
SMP_msg("SERIOUS WARNING: Newly orphaned code at %lx \n", (unsigned long) addr);
}
}
}
} // end for (ea_t addr = CurrSeg->startEA; ...)
} // end of Debug_FindOrphanedCode()
// Audit the IDA database with respect to branches and calls. They should
// each have valid code targets (not data or unknown bytes) and the code
// cross references should reflect the linkage.
void AuditCodeTargets(void) {
} // end of AuditCodeTargets()
void SpecialDebugOutput(void) {
char disasm[MAXSTR];
vector<ea_t> ProblemAddrs;
ProblemAddrs.push_back(0x8066d08);
bool IDAsuccess;
int InstLen;
ssize_t StringLen;
clc5q
committed
insn_t LocalCmd;
uint32 LocalFeatures;
for (size_t index = 0; index < ProblemAddrs.size(); ++index) {
ea_t addr = ProblemAddrs[index];
flags_t InstrFlags = getFlags(addr);
if (isCode(InstrFlags) && isHead(InstrFlags)) {
clc5q
committed
IDAsuccess = SMPGetCmd(addr, LocalCmd, LocalFeatures);
InstLen = (int) LocalCmd.size;
if ((IDAsuccess) && (0 < InstLen)) {
IDAsuccess = generate_disasm_line(addr, disasm, sizeof(disasm) - 1);
if (IDAsuccess) {
StringLen = tag_remove(disasm, disasm, 0);
if (-1 < StringLen)
SMP_msg("Problem addr %lx : %s\n", (unsigned long) addr, disasm);
SMP_msg("ERROR: tag_remove failed at addr %lx \n", (unsigned long) addr);
SMP_msg("ERROR: generate_disasm_line failed at addr %lx \n", (unsigned long) addr);
SMP_msg("ERROR: decode_insn failed at addr %lx \n", (unsigned long) addr);
}
}
return;
} // end of SpecialDebugOutput()
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
// Convert a call type string from the policy file, such as "FILECALLS", to the
// corresponding ZST_SysCallType, such as ZST_FILE_CALL.
ZST_SysCallType ConvertStringToCallType(char *Str2) {
ZST_SysCallType ReturnVal;
if (0 == strcmp("PRIVILEGECALLS", Str2)) {
ReturnVal = ZST_HIGHPRIVILEGE_CALL;
}
else if (0 == strcmp("FILECALLS", Str2)) {
ReturnVal = ZST_FILE_CALL;
}
else if (0 == strcmp("NETWORKCALLS", Str2)) {
ReturnVal = ZST_NETWORK_CALL;
}
else {
ReturnVal = ZST_UNMONITORED_CALL;
}
return ReturnVal;
} // end of ConvertStringToCallType()
// Convert a policy string from the policy file, such as "DISALLOW", to
// the corresponding ZST_Policy value, such as ZST_DISALLOW.
ZST_Policy ConvertStringToPolicy(char *Str3) {
ZST_Policy ReturnVal;
if (0 == strcmp("DISALLOW", Str3)) {
ReturnVal = ZST_DISALLOW;
}
else if (0 == strcmp("WHITELIST", Str3)) {
ReturnVal = ZST_WHITELIST;
}
else if (0 == strcmp("BLACKLIST", Str3)) {
ReturnVal = ZST_BLACKLIST;
}
else { // error handling precedes calls to this function
ReturnVal = ZST_ALLOWALL;
}
return ReturnVal;
} // end of ConvertStringToPolicy()
// Given a function name, return its Zephyr Security Toolkit call type.
ZST_SysCallType GetCallTypeFromFuncName(string SysCallName) {
ZST_SysCallType ReturnVal;
map<string, ZST_SysCallType>::iterator FindIter = ZST_FuncTypeMap.find(SysCallName);
if (FindIter == ZST_FuncTypeMap.end()) { // not found; might not even be system call
ReturnVal = ZST_UNMONITORED_CALL;
}
else {
ReturnVal = FindIter->second;
}
return ReturnVal;
} // end of GetCallTypeFromFuncName()
// Get the user-specified security policy for the given call type.
ZST_Policy GetPolicyFromCallType(ZST_SysCallType CallType) {
ZST_Policy ReturnVal;
map<ZST_SysCallType, ZST_Policy>::iterator FindIter = ZST_TypePolicyMap.find(CallType);
if (FindIter == ZST_TypePolicyMap.end()) {
// Policy not found; default to ALLOW_ALL
ReturnVal = ZST_ALLOWALL;
}
else {
ReturnVal = FindIter->second;
}
return ReturnVal;
} // end of GetPolicyFromCallType()
// Given a call type and called function name, is it on the location whitelist
// for that call type?
// NOTE: HANDLE CASE IN WHICH WHITELISTED LOCATION IS A PREFIX, TERMINATING in a slash.
bool IsLocationWhitelisted(ZST_SysCallType CallType, string LocationName) {
set<string>::iterator FindIter;
bool ReturnVal;
if (CallType == ZST_FILE_CALL) {
FindIter = ZST_FileLocWhitelist.find(LocationName);
ReturnVal = (FindIter != ZST_FileLocWhitelist.end());
}
else if (CallType == ZST_NETWORK_CALL) {
FindIter = ZST_NetworkLocWhitelist.find(LocationName);
ReturnVal = (FindIter != ZST_NetworkLocWhitelist.end());
}
else { // should not be here
ReturnVal = false;
}
return ReturnVal;
} // end of IsLocationWhitelisted()
// Given a call type and called function name, is it on the location blacklist
// for that call type?
// NOTE: HANDLE CASE IN WHICH BLACKLISTED LOCATION IS A PREFIX, TERMINATING in a slash.
bool IsLocationBlacklisted(ZST_SysCallType CallType, string LocationName) {
set<string>::iterator FindIter;
bool ReturnVal;
if (CallType == ZST_FILE_CALL) {
FindIter = ZST_FileLocBlacklist.find(LocationName);
ReturnVal = (FindIter != ZST_FileLocBlacklist.end());
}
else if (CallType == ZST_NETWORK_CALL) {
FindIter = ZST_NetworkLocBlacklist.find(LocationName);
ReturnVal = (FindIter != ZST_NetworkLocBlacklist.end());
}
else { // should not be here
ReturnVal = false;
}
return ReturnVal;
}
// Given a called function name, does it produce only benign numeric errors when
// its returned values are used in arithmetic? (i.e. it is a trusted input)
bool IsNumericSafeSystemCall(string CallName) {
set<string>::iterator FindIter = ZST_SystemCallNumericWhitelist.find(CallName);
bool ReturnVal = (FindIter != ZST_SystemCallNumericWhitelist.end());
return ReturnVal;
}
// Utility functions to print code xrefs to STARS_XrefsFile
void PrintCodeToCodeXref(ea_t FromAddr, ea_t ToAddr, size_t InstrSize) {
SMP_fprintf(STARS_XrefsFile, "%10lx %6zu INSTR XREF IBT FROMIB %10lx \n",
(unsigned long) ToAddr, InstrSize, (unsigned long) FromAddr);
return;
}
void PrintDataToCodeXref(ea_t FromDataAddr, ea_t ToCodeAddr, size_t InstrSize) {
SMP_fprintf(STARS_XrefsFile, "%10lx %6zu INSTR XREF IBT FROMDATA %10lx \n",
(unsigned long) ToCodeAddr, InstrSize, (unsigned long) FromDataAddr);
return;
}
// These two constants should agree with their counterparts in ZST-policy.c.
#define ZST_MAX_FILE_NAME_LEN 1024
#define ZST_MAX_CALL_NAME_LEN 64
// Read the foo.exe.policy file to initialize our security policies for system calls.
void ZST_InitPolicies(const char *PolicyFileName) {
clc5q
committed
FILE *PolicyFile = SMP_fopen(PolicyFileName, "r");
char Str1[ZST_MAX_CALL_NAME_LEN], Str2[ZST_MAX_CALL_NAME_LEN], Str3[ZST_MAX_FILE_NAME_LEN];
string SafeSystemCall1("gettimeofday");
ZST_SystemCallNumericWhitelist.insert(SafeSystemCall1);
if (NULL != PolicyFile) {
clc5q
committed
while (!SMP_feof(PolicyFile)) {
int ItemsRead = qfscanf(PolicyFile, "%63s %63s %1023s", Str1, Str2, Str3);
if (3 != ItemsRead) {
clc5q
committed
SMP_msg("ERROR: Line in %s had %d items instead of the required 3; line ignored.\n", PolicyFileName, ItemsRead);
}
else {
string ThirdStr(Str3);
pair<set<string>::iterator, bool> SetInsertResult;
if (0 == strcmp(Str1, "SECURITYPOLICY")) {
ZST_SysCallType TempCallType = ConvertStringToCallType(Str2);
ZST_Policy TempPolicy = ConvertStringToPolicy(Str3);
pair<map<ZST_SysCallType, ZST_Policy>::iterator, bool> InsertResult;
pair<ZST_SysCallType, ZST_Policy> TempPair(TempCallType, TempPolicy);
InsertResult = ZST_TypePolicyMap.insert(TempPair);
if (!(InsertResult.second)) {
clc5q
committed
SMP_msg("ERROR: Could not insert security policy %s for %s. Possible duplicate or conflicting policies.\n",
Str3, Str2);
}
}
else if (0 == strcmp(Str1, "FILELOCATION")) {
if (0 == strcmp(Str2, "WHITELIST")) {
SetInsertResult = ZST_FileLocWhitelist.insert(ThirdStr);
if (!(SetInsertResult.second)) {
clc5q
committed
SMP_msg("WARNING: Duplicate file whitelist location %s ignored.\n", Str3);
}
}
else if (0 == strcmp(Str2, "BLACKLIST")) {
SetInsertResult = ZST_FileLocBlacklist.insert(ThirdStr);
if (!(SetInsertResult.second)) {
clc5q
committed
SMP_msg("WARNING: Duplicate file blacklist location %s ignored.\n", Str3);
}
}
else {
clc5q
committed
SMP_msg("ERROR: Unknown second field value in policy line: %s %s %s ; ignored\n", Str1, Str2, Str3);
}
}
else if (0 == strcmp(Str1, "NETWORKLOCATION")) {
if (0 == strcmp(Str2, "WHITELIST")) {
SetInsertResult = ZST_NetworkLocWhitelist.insert(ThirdStr);
if (!(SetInsertResult.second)) {
clc5q
committed
SMP_msg("WARNING: Duplicate network whitelist location %s ignored.\n", Str3);
}
}
else if (0 == strcmp(Str2, "BLACKLIST")) {
SetInsertResult = ZST_NetworkLocBlacklist.insert(ThirdStr);
if (!(SetInsertResult.second)) {
clc5q
committed
SMP_msg("WARNING: Duplicate network blacklist location %s ignored.\n", Str3);
}
}
else {
clc5q
committed
SMP_msg("ERROR: Unknown second field value in policy line: %s %s %s ; ignored\n", Str1, Str2, Str3);
}
}
else {
clc5q
committed
SMP_msg("ERROR: Unknown first field value in policy line: %s %s %s ; ignored\n", Str1, Str2, Str3);
}
}
}
clc5q
committed
if (0 == SMP_fclose(PolicyFile)) {
SMP_msg("Policy file %s successfully closed; all policies recorded.\n", PolicyFileName);
}
else {
clc5q
committed
SMP_msg("ERROR: fclose failed on policy file %s. However, policies should be in effect.\n", PolicyFileName);
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
}
// Now, initialize the system call name maps.
pair<map<string, ZST_SysCallType>::iterator, bool> FuncInsertResult;
// Do all the high privilege calls first.
string SysFuncName("putenv");
pair<string, ZST_SysCallType> FuncNamePolicyPair(SysFuncName, ZST_HIGHPRIVILEGE_CALL);
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("setenv");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("setegid");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("seteuid");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("setgid");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("setpgid");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("setregid");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("setreuid");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("setuid");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("execl");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("execv");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("execle");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("execve");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("execlp");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("execvp");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("system");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
// Now do all the file operation calls.
FuncNamePolicyPair.second = ZST_FILE_CALL;
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("chdir");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("chmod");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("chown");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("creat");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("creat64");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("fopen");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("freopen");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("open");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("open64");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("mknod");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("remove");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("rmdir");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
FuncNamePolicyPair.first.clear();
FuncNamePolicyPair.first.append("unlink");
FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
assert(FuncInsertResult.second);
// Finally, handle all the network connection calls.
FuncNamePolicyPair.second = ZST_NETWORK_CALL;
FuncNamePolicyPair.first.clear();
Loading
Loading full blame...