Skip to content
Snippets Groups Projects
SMPStaticAnalyzer.cpp 179 KiB
Newer Older
jdh8d's avatar
jdh8d committed
/*
 * SMPStaticAnalyzer.cpp - <see below>.
 *
 * Copyright (c) 2000, 2001, 2010 - University of Virginia 
 *
 * This file is part of the Memory Error Detection System (MEDS) infrastructure.
 * This file may be used and modified for non-commercial purposes as long as 
 * all copyright, permission, and nonwarranty notices are preserved.  
 * Redistribution is prohibited without prior written consent from the University 
 * of Virginia.
 *
 * Please contact the authors for restrictions applying to commercial use.
 *
 * THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
 * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 *
 * Author: University of Virginia
 * e-mail: jwd@virginia.com
 * URL   : http://www.cs.virginia.edu/
 *
 * Additional copyrights 2010, 2011 by Zephyr Software LLC
 * e-mail: {clc,jwd}@zephyr-software.com
 * URL   : http://www.zephyr-software.com/
jdh8d's avatar
jdh8d committed
 */

//
// SMPStaticAnalyzer.cpp
//
// This plugin performs the static analyses needed for the SMP project
//   (Software Memory Protection).
//

#include <set>
#include "SMPStaticAnalyzer.h"
#include "SMPDataFlowAnalysis.h"
#include "SMPFunction.h"
#include "SMPInstr.h"
#include "ProfilerInformation.h"
#define SMP_DEBUG_DELAY 0   // for setting an early breakpoint

// Set to 1 for debugging output
#define SMP_DEBUG 1
#define SMP_DEBUG3 0   // verbose
#define SMP_DEBUG_MEM 0 // print memory operands
#define SMP_DEBUG_TYPE0 0 // Output instr info for OptType = 0
#define SMP_DEBUG_CHUNKS 0 // restructuring tail chunks, shared chunks, etc.
#define SMP_DEBUG_DATA_ONLY 0  // Find & fix data addresses in code segments

// Set to 1 when doing a binary search using SMP_DEBUG_COUNT to find
//  which function is causing a problem.
#define SMP_BINARY_DEBUG 0
#define SMP_DEBUG_COUNT 356  // How many funcs to process in problem search
int FuncsProcessed = 0;

#define SMP_FIXUP_IDB 0  // Try to fix the IDA database?
clc5q's avatar
clc5q committed
#define SMP_DEBUG_FIXUP_IDB 0  // debugging output for FixupIDB chain
#define SMP_FIND_ORPHANS 1  // find code outside of functions
#define SMP_DEBUG_CODE_ORPHANS 1 // Detect whether we are causing code to be orphaned

#define SMP_IDAP_RUN_DELAY 0  // Delay in IDAP_run() so we can attach debugger to process.
#define STARS_GENERATE_ASM_FILE 1  // Generate ASM file at end of processing?
#define STARS_GENERATE_DIF_FILE STARS_SCCP_CONVERT_UNREACHABLE_BLOCKS  // If we optimize, generate DIF file
static SMPProgram *CurrProg = NULL;
jdh8d's avatar
jdh8d committed
STARS_Interface_t* global_stars_interface=NULL;
#if SMP_DEBUG_CODE_ORPHANS
set<ea_t> CodeOrphans;
#endif
// Lock prefix for x86 code; jumping around this prefix conditionally looks like jumping
//  into the middle of an instruction to IDA Pro, causing it to not collect instructions
//  into a procedure. We replace these bytes with no-op opcodes because none of our analyses
//  care about LOCK prefices. We store the addresses where we have done the replacement in a 
//  set in case we ever care.
#define X86_LOCK_PREFIX 0xF0
set<ea_t> LockPreficesRemoved; // Addresses where x86 LOCK prefix byte was turned into a no-op by STARS_custom_ana() callback.
static unsigned long CustomAnaCallCount = 0;

// Define optimization categories for instructions.
int OptCategory[NN_last + 1];
// Initialize the OptCategory[] array.
void InitOptCategory(void);

// Flag to force reduced analysis so we don't run out of virtual memory
bool STARS_PerformReducedAnalysis;

// Indentation level when emitting SPARK Ada translation of the RTLs.
unsigned short STARS_SPARK_IndentCount;

// Record which opcodes change the stack pointer, and by how many
//  bytes up (reduction in stack size for stacks that grow downward)
//  or down (increase in stack size for stacks that grow downward).
sval_t StackAlteration[NN_last + 1];
// Initialize the StackAlteration[] array.
void InitStackAlteration(void);

// Keep statistics on how many instructions we saw in each optimization
//  category, and how many optimizing annotations were emitted for
//  each category.
int OptCount[LAST_OPT_CATEGORY + 1];
int AnnotationCount[LAST_OPT_CATEGORY + 1];

// Unique data referent number to use in data annotations.
unsigned long DataReferentID;

// Debugging counters for analyzing memory usage.
unsigned long UnusedInstrCount;
unsigned long UnusedBlockCount;
unsigned long UnusedStructCount;
unsigned long UnusedIntCount;

// Counters for dead metadata analysis.
unsigned long DeadMetadataCount;
unsigned long LiveMetadataCount;

// Counters for indirect jump resolution.
unsigned long ResolvedIndirectJumpCount;
unsigned long UnresolvedIndirectJumpCount;

// Counters for measuring SCCP success in finding constant DEFs.
unsigned long ConstantDEFCount;
unsigned long AlwaysTakenBranchCount;
unsigned long NeverTakenBranchCount;

// Counters for accessing less than machine register width.
unsigned long SubwordRegCount;
unsigned long SubwordMemCount;
unsigned long SubwordAddressRegCount;
unsigned long SPARKOperandCount; // total operands printed

#if SMP_COUNT_MEMORY_ALLOCATIONS
// Counters for analyzing memory use for allocated and used objects.
unsigned long SMPInstCount;
unsigned long SMPBlockCount;
unsigned long SMPFuncCount;
unsigned long SMPGlobalVarCount;
unsigned long SMPLocalVarCount;
unsigned long SMPDefUseChainCount;
unsigned long SMPInstBytes;
unsigned long SMPDefUseChainBytes;
#if SMP_MEASURE_NUMERIC_ANNOTATIONS
unsigned long NumericAnnotationsCount12; // cases 1 and 2
unsigned long NumericAnnotationsCount3;  // case 3
unsigned long TruncationAnnotationsCount; // case 4
unsigned long SignednessWithoutTruncationCount; // case 5
unsigned long LeaInstOverflowCount; // case 6
unsigned long WidthDoublingTruncationCount; // case 7
unsigned long BenignOverflowInstCount;
unsigned long BenignOverflowDefCount;
unsigned long SuppressStackPtrOverflowCount;
unsigned long SuppressLiveFlagsOverflowCount;
unsigned long LiveMultiplyBitsCount;
unsigned long BenignTruncationCount;
unsigned long SuppressTruncationRegPiecesAllUsed;
unsigned long SuppressSignednessOnTruncation;
#endif
clc5q's avatar
clc5q committed
#if STARS_SCCP_GATHER_STATISTICS
// Counters for analyzing Sparse Conditional Constant Propagation effectiveness.
unsigned long SCCPFuncsWithArgWriteCount;
unsigned long SCCPFuncsWithConstantArgWriteCount;
unsigned long SCCPOutgoingArgWriteCount;
unsigned long SCCPConstantOutgoingArgWriteCount;
#endif

// Counter for max # of basic blocks seen in one function.
unsigned long STARS_MaxBlockCount;

clc5q's avatar
clc5q committed
// Is the binary a 32-bit, 64-bit, etc. instruction set architecture
size_t STARS_ISA_Bitwidth;
size_t STARS_ISA_Bytewidth;
char STARS_ISA_dtyp;
int STARS_MD_LAST_SAVED_REG_NUM;

// The types of data objects based on their first operand flags.
const char *DataTypes[] = { "VOID", "NUMHEX", "NUMDEC", "CHAR",
		"SEG", "OFFSET", "NUMBIN", "NUMOCT", "ENUM", "FORCED", 
		"STRUCTOFFSET", "STACKVAR", "NUMFLOAT", "UNKNOWN", 
		"UNKNOWN", "UNKNOWN", 0};

// Filename (not including path) of executable being analyzed.
clc5q's avatar
clc5q committed
char RootFileName[MAXSTR];
// strings for printing ZST_SysCallType
const char *CallTypeNames[4] = { "Unrestricted", "High-Privilege", "File-Access", "Network-Access" };

clc5q's avatar
clc5q committed
// Operand type that can have all fields initialized to o_void and zero
//  values, to be used to copy-initialize operands that we are adding to
//  RTLs and DEF and USE lists.
clc5q's avatar
clc5q committed

// File foo.exe.alarms for Zephyr Security Toolkit security alarm messages.
FILE *ZST_AlarmFile;

// File for code xref targets (helps ILR, makes IRDB more complete)
FILE *STARS_XrefsFile;

// File to provide details on fast returns, safe and unsafe return-address functions, etc.
FILE *STARS_CallReturnFile;

FILE *ZST_SPARKSourceFile;
FILE *ZST_SPARKHeaderFile;
// Code addresses identified by a disassembler, such as objdump on
//  Linux. These can be used to improve the code vs. data identification
//  of IDA Pro.
vector<ea_t> DisasmLocs;
// Code addresses as identified by IDA Pro, to be compared to DisasmLocs.
vector<ea_t> IDAProLocs;

// Bit masks for extracting bits from a STARSBitSet unsigned char.
const unsigned char STARSBitMasks[8] = { 1, 2, 4, 8, 16, 32, 64, 128 };

// Function start and end addresses (for function entry chunks only).
//  Kept here because IDA Pro 5.1 seems to have a memory overwriting
//  problem when iterating through all functions in the program. An existing
//  STARS_Function_t *ChunkInfo data structure was getting overwritten by one of the 
//  function STARS_Function_t data structures, causing changes of startEA and endEA among
//  other things.

vector<SMP_bounds_t> FuncBounds;

// List of functions that need to be reanalyzed after all the code fixup
//  and code discovery is complete. Kept as a list of addresses; any address
//  within the function is good enough to designate it.
list<ea_t> FuncReanalyzeList;

// A code region that has been converted from data but has code addresses that
//  need to be reanalyzed. This is usually because a former data address is
//  now a jump to a code target that is still a data address. We have to wait
//  until the target has become code before IDA will accept the jump as valid.
class FixupRegion {
public:
	FixupRegion(SMP_bounds_t);
	inline ea_t GetStart(void) const { return CodeRegion.startEA; };
	inline ea_t GetEnd(void) const { return CodeRegion.endEA; };
	inline void SetStart(ea_t addr) { CodeRegion.startEA = addr; };
	list<ea_t> FixupInstrs; // easier to expose than to encapsulate
private:
	SMP_bounds_t CodeRegion;
};

FixupRegion::FixupRegion(SMP_bounds_t Range) {
	this->CodeRegion = Range;
	return;
}

// List of code regions that were not completely analysed because of jump to
//  data considerations.
list<FixupRegion> CodeReanalyzeList;
// Map library function names to their system call type.
map<string, ZST_SysCallType> ZST_FuncTypeMap;

// Map system call types to their Zephyr Security Toolkit security policy.
map<ZST_SysCallType, ZST_Policy> ZST_TypePolicyMap;

// Set of whitelisted file locations.
set<string> ZST_FileLocWhitelist;

// Set of whitelisted network locations.
set<string> ZST_NetworkLocWhitelist;

// Set of blacklisted file locations.
set<string> ZST_FileLocBlacklist;

// Set of blacklisted network locations.
set<string> ZST_NetworkLocBlacklist;

// Set of system call names whose returned values should be trusted to have only benign numeric errors.
set<string> ZST_SystemCallNumericWhitelist;

// Functions for diagnosing and/or fixing problems in the IDA database.
void FixupIDB(void);  // Driver for all other fixing functions.
void FindDataInCode(void);
void AuditTailChunkOwnership(void);
jdh8d's avatar
jdh8d committed
void FindOrphanedCode(STARS_Segment_t *, FILE *, FILE *);
void Debug_FindOrphanedCode(STARS_Segment_t *, bool);
void AuditCodeTargets(void);
void SpecialDebugOutput(void);
void ZST_InitPolicies(const char *);
static unsigned long DebugCounter = 0;

// Turn LOCK prefix into no-op when detected. Each is one byte in length.
bool STARS_custom_ana(ea_t CurrentAddr) {
	int code = get_byte(CurrentAddr);
	++CustomAnaCallCount;
	if (X86_LOCK_PREFIX != code) {
		return false;
	}
	else {
#define STARS_ANA_DEBUG_DELAY 0
#if STARS_ANA_DEBUG_DELAY
		if (DebugCounter == 0) {
			time_t start;
			time_t current;

			time(&start);
			printf("delay for 15 seconds.\n");
			do {
				time(&current);
			} while(difftime(current,start) < 15.0);
			++DebugCounter;
		}
#endif		
		pair<set<ea_t>::iterator, bool> InsertResult;
		InsertResult = LockPreficesRemoved.insert(CurrentAddr);
		assert(InsertResult.second);
		cmd.itype = NN_nop; // make it a no-op
		cmd.size = 1; // one-byte no-op
#if 0
		cmd.auxpref = 0; // clear prefix and flags fields
		cmd.segpref = 0;
		cmd.insnpref = 0;
		cmd.flags = 0;
#endif
		return true;
	}
} // end of STARS_custom_ana()

static int idaapi idp_callback(void *, int event_id, va_list va) {
#if STARS_REMOVE_LOCK_PREFIX
	if (event_id == processor_t::custom_ana) {
		ea_t CurrentAddr = cmd.ea;
#if 1
		int code = ua_next_byte();
		++CustomAnaCallCount;
		if (X86_LOCK_PREFIX == code) {
			pair<set<ea_t>::iterator, bool> InsertResult;
			InsertResult = LockPreficesRemoved.insert(CurrentAddr);
			cmd.itype = NN_nop; // make it a no-op
			return (int) (cmd.size + 1);
		}
		else {
			return 0;
		}
#else
		if (STARS_custom_ana(CurrentAddr)) {
			return 1; // handled event
		}
#endif
	}
#endif // STARS_REMOVE_LOCK_PREFIX

	if (event_id == ph.auto_empty_finally) {   // IDA analysis is done
		// Ensure correct working environment.
		// Handle Cyber Grand Challenge project variant of ELF.
#define STARS_CGC_FILETYPE 16705
		if ((inf.filetype != f_ELF) && (inf.filetype != f_PE) && (inf.filetype != STARS_CGC_FILETYPE)) {
jdh8d's avatar
jdh8d committed
			error("Executable format must be PE, ELF or CGC.");
			return 0;
		}
#if STARS_REMOVE_LOCK_PREFIX
		SMP_msg("INFO: Calls to STARS_custom_ana: %lu \n", CustomAnaCallCount);
		SMP_msg("INFO: Number of LOCK prefices eliminated: %zu \n", LockPreficesRemoved.size());
		LockPreficesRemoved.clear();
#endif // STARS_REMOVE_LOCK_PREFIX

	return 0;  // did not process any event
} // end of idp_callback()

	/* init the interface */
	global_stars_interface=new STARS_IDA_Interface_t;


#if 0 // We are now calling from the SMP.idc script.
	// Skip this plugin if it was not specified by the user on the
	//  command line.
	if (get_plugin_options("SMPStaticAnalyzer") == NULL) {
		return PLUGIN_SKIP;
	}
#endif
	// Ensure correct working environment.
	if (ph.id != PLFM_386) {
		error("Processor must be x86.");
 		return PLUGIN_SKIP;
	}
#if 0 // too early to detect 64-bit; moved to IDAP_run()
	if (inf.is_64bit()) {
clc5q's avatar
clc5q committed
		STARS_ISA_Bitwidth = 64;
		STARS_ISA_dtyp = dt_qword;
		SMP_msg("INFO: 64-bit binary detected.\n");
clc5q's avatar
clc5q committed
	}
	else {
		STARS_ISA_Bitwidth = 32;
		STARS_ISA_dtyp = dt_dword;
		SMP_msg("INFO: 32-bit binary detected.\n");
clc5q's avatar
clc5q committed
	}
	STARS_ISA_Bytewidth = (STARS_ISA_Bitwidth / 8);
	hook_to_notification_point(HT_IDP, idp_callback, NULL);
	STARS_PerformReducedAnalysis = false;
	STARS_SPARK_IndentCount = 1;
	UnusedStructCount = 0;
	UnusedIntCount = 0;
	DeadMetadataCount = 0;
	LiveMetadataCount = 0;
	ResolvedIndirectJumpCount = 0;
	UnresolvedIndirectJumpCount = 0;
	ConstantDEFCount = 0;
	AlwaysTakenBranchCount = 0;
	NeverTakenBranchCount = 0;
	SubwordRegCount = 0;
	SubwordMemCount = 0;
	SubwordAddressRegCount = 0;
	SPARKOperandCount = 0;
#if SMP_COUNT_MEMORY_ALLOCATIONS
	SMPInstCount = 0;
	SMPBlockCount = 0;
	SMPDefUseChainCount = 0;
	SMPFuncCount = 0;
	SMPGlobalVarCount = 0;
	SMPLocalVarCount = 0;
	SMPInstBytes = 0;
	SMPDefUseChainBytes = 0;
#endif
#if SMP_MEASURE_NUMERIC_ANNOTATIONS
	NumericAnnotationsCount12 = 0; 
	NumericAnnotationsCount3 = 0; 
	TruncationAnnotationsCount = 0;
	SignednessWithoutTruncationCount = 0;
	LeaInstOverflowCount = 0; 
	WidthDoublingTruncationCount = 0;
	BenignOverflowInstCount = 0;
	BenignOverflowDefCount = 0;
	SuppressStackPtrOverflowCount = 0;
	SuppressLiveFlagsOverflowCount = 0;
	LiveMultiplyBitsCount = 0;
	BenignTruncationCount = 0;
	SuppressTruncationRegPiecesAllUsed = 0;
	SuppressSignednessOnTruncation = 0;
clc5q's avatar
clc5q committed
#endif
#if STARS_SCCP_GATHER_STATISTICS
	SCCPFuncsWithArgWriteCount = 0;
	SCCPFuncsWithConstantArgWriteCount = 0;
	SCCPOutgoingArgWriteCount = 0;
	SCCPConstantOutgoingArgWriteCount = 0;
#endif
clc5q's avatar
clc5q committed
	InitOp.n = 0;
clc5q's avatar
clc5q committed
	InitOp.offb = 0;
	InitOp.offo = 0;
	InitOp.flags = 0;
	InitOp.set_showed();
	// NOTE: InitOp.dtyp field is initialized in IDAP_run() to 32 or 64 bits.
clc5q's avatar
clc5q committed
	InitOp.reg = R_none;
clc5q's avatar
clc5q committed
	InitOp.specflag1 = 0;
	InitOp.specflag2 = 0;
	InitOp.specflag3 = 0;
	InitOp.specflag4 = 0;
	ZST_SPARKSourceFile = NULL;
	ZST_SPARKHeaderFile = NULL;
#ifdef STARS_IRDB_INTERFACE
	SMPLogFile = NULL;
#endif
	return PLUGIN_KEEP;
} // end of IDAP_init

void IDAP_term(void) {
	unhook_from_notification_point(HT_IDP, idp_callback, NULL);
	return;
}

void IDAP_run(int arg) {
	FILE *AnnotFile;
	FILE *InfoAnnotFile;
	FILE *AsmFile;
	// For debugging, we can add a delay loop so we have time to attach gdb to the
	// running process and set a breakpoint.
#if SMP_DEBUG_DELAY
	time_t start;
	time_t current;

	time(&start);
	printf("delay for 15 seconds.\n");
	do {
		time(&current);
	} while(difftime(current,start) < 15.0);
#endif

	if (inf.is_64bit()) {
		STARS_ISA_Bitwidth = 64;
		STARS_ISA_dtyp = dt_qword;
		STARS_MD_LAST_SAVED_REG_NUM = R_r15;
		SMP_msg("INFO: 64-bit binary detected.\n");
	}
	else {
		STARS_ISA_Bitwidth = 32;
		STARS_ISA_dtyp = dt_dword;
		STARS_MD_LAST_SAVED_REG_NUM = R_di;
		SMP_msg("INFO: 32-bit binary detected.\n");
	}
	STARS_ISA_Bytewidth = (STARS_ISA_Bitwidth / 8);
	InitOp.dtyp = STARS_ISA_dtyp;

    InitOptCategory();
	InitDFACategory();
	InitTypeCategory();
	InitSMPDefsFlags();
	InitSMPUsesFlags();
	InitLibFuncFGInfoMaps();
	InitIntegerErrorCallSinkMap();
	InitUnsignedArgPositionMap();
	InitStackAlteration();
	MDInitializeArgumentRegs();
#ifdef STARS_IRDB_INTERFACE
	string ZSTLogFileName(RootFileName);
	string LogFileSuffix(".STARSlog");
	ZSTLogFileName += LogFileSuffix;
	SMPLogFile = SMP_fopen(ZSTLogFileName.c_str(), "w");
	if (NULL == SMPLogFile) {
		error("ERROR: Cannot open STARS log file %s\n", ZSTLogFileName.c_str());
		error("Redirecting to stderr.\n");
		SMPLogFile = stderr;
	}
#endif

	SMP_msg("IDA SDK version: %d \n", IDA_SDK_VERSION);
#if 1
	DefOrUse DummyRef;
	size_t RefObjectSize = sizeof(DummyRef), OpndSize = sizeof(DummyOperand);
	SMP_msg("INFO: Size of DefOrUse: %zu Size of op_t: %zu \n", RefObjectSize, OpndSize);
	ssize_t FileLen;
	FileLen = get_root_filename(RootFileName, sizeof(RootFileName) - 1);
	string AnnotFileName(RootFileName);
	AnnotFileName += FileSuffix;
	string InfoAnnotFileName(RootFileName);
	string InfoFileSuffix(".infoannot");
	InfoAnnotFileName += InfoFileSuffix;
	string ZSTPolicyFileName(RootFileName);
	string PolicyFileSuffix(".policy");
	ZSTPolicyFileName += PolicyFileSuffix;
	string ZSTAlarmFileName(RootFileName);
	string AlarmFileSuffix(".alarms");
	ZSTAlarmFileName += AlarmFileSuffix;
	string AsmFileName(RootFileName);
	string AsmFileSuffix(".asm");
	AsmFileName += AsmFileSuffix;
	string DifFileName(RootFileName);
	string DifFileSuffix(".dif");
	DifFileName += DifFileSuffix;
	string XrefsFileName(RootFileName);
	string XrefsFileSuffix(".STARSxrefs");
	XrefsFileName += XrefsFileSuffix;
	string CallRetFileName(RootFileName);
	string CallRetFileSuffix(".STARScallreturn");
	CallRetFileName += CallRetFileSuffix;
#if ZST_EMIT_SPARK_ADA_TRANSLATION
	string SPARKSourceFileName(RootFileName);
	string SPARKSourceFileSuffix(".ZSTSPARK.adb");
	SPARKSourceFileName += SPARKSourceFileSuffix;
	string SPARKHeaderFileName(RootFileName);
	string SPARKHeaderFileSuffix(".ZSTSPARK.ads");
	SPARKHeaderFileName += SPARKHeaderFileSuffix;
#endif
	// For debugging, we can add a delay loop so we have time to attach gdb to the
	// running process and set a breakpoint.
#if SMP_IDAP_RUN_DELAY
	time_t start;
	time_t current;

	time(&start);
	printf("delay for 15 seconds.\n");
	do {
		time(&current);
	} while(difftime(current,start) < 15.0);
#endif

	ea_t RecentAddr;
#if SMP_DEBUG_CODE_ORPHANS
	CodeOrphans.clear();
	RecentAddr = BADADDR;
jdh8d's avatar
jdh8d committed
	for (STARS_Segment_t *seg = SMP_get_first_seg(); NULL != seg; seg = SMP_get_next_seg(RecentAddr)) {
		RecentAddr = seg->get_startEA();
		if (seg->IsCodeSegment())
			Debug_FindOrphanedCode(seg, true);
	}
#endif

	STARS_XrefsFile = SMP_fopen(XrefsFileName.c_str(), "w");
	if (NULL == STARS_XrefsFile) {
		error("FATAL ERROR: Cannot open STARS code xrefs file %s\n", XrefsFileName.c_str());
		return;
	}

	STARS_CallReturnFile = SMP_fopen(CallRetFileName.c_str(), "w");
	if (NULL == STARS_CallReturnFile) {
		error("FATAL ERROR: Cannot open STARS calls and returns info file %s\n", CallRetFileName.c_str());
		SMP_fclose(STARS_XrefsFile);
		return;
	}

jdh8d's avatar
jdh8d committed
	CurrProg->AnalyzeData(); // Analyze static data in the executable
	// read the Profiler generated information into a new prof_info class 
	ProfilerInformation *prof_info = new ProfilerInformation(AnnotFileName.c_str(), CurrProg);
	AnnotFile = SMP_fopen(AnnotFileName.c_str(), "w");
	if (NULL == AnnotFile) {
		error("FATAL ERROR: Cannot open output file %s\n", AnnotFileName.c_str());
		SMP_fclose(STARS_XrefsFile);
		SMP_fclose(STARS_CallReturnFile);
	InfoAnnotFile = SMP_fopen(InfoAnnotFileName.c_str(), "w");
	if (NULL == InfoAnnotFile) {
		error("FATAL ERROR: Cannot open output file %s\n", InfoAnnotFileName.c_str());
		SMP_fclose(STARS_XrefsFile);
		SMP_fclose(STARS_CallReturnFile);
jdh8d's avatar
jdh8d committed
		delete prof_info;
	ZST_AlarmFile = SMP_fopen(ZSTAlarmFileName.c_str(), "w");
	if (NULL == ZST_AlarmFile) {
		error("FATAL ERROR: Cannot open security alarms file %s\n", ZSTAlarmFileName.c_str());
		SMP_fclose(STARS_XrefsFile);
		SMP_fclose(STARS_CallReturnFile);
		SMP_fclose(AnnotFile);
		SMP_fclose(InfoAnnotFile);
#if ZST_EMIT_SPARK_ADA_TRANSLATION
	ZST_SPARKSourceFile = SMP_fopen(SPARKSourceFileName.c_str(), "w");
	if (NULL == ZST_SPARKSourceFile) {
		error("FATAL ERROR: Cannot open SPARK-Ada source output file %s\n", SPARKSourceFileName.c_str());
		SMP_fclose(STARS_XrefsFile);
		SMP_fclose(STARS_CallReturnFile);
		SMP_fclose(AnnotFile);
		SMP_fclose(InfoAnnotFile);
		SMP_fclose(ZST_AlarmFile);
		delete prof_info;
		return;
	}
	ZST_SPARKHeaderFile = SMP_fopen(SPARKHeaderFileName.c_str(), "w");
	if (NULL == ZST_SPARKHeaderFile) {
		error("FATAL ERROR: Cannot open SPARK-Ada header output file %s\n", SPARKHeaderFileName.c_str());
		SMP_fclose(STARS_XrefsFile);
		SMP_fclose(STARS_CallReturnFile);
		SMP_fclose(AnnotFile);
		SMP_fclose(InfoAnnotFile);
		SMP_fclose(ZST_AlarmFile);
		SMP_fclose(ZST_SPARKSourceFile);
		delete prof_info;
		return;
	}
#endif

	// Read the Zephyr Security Toolkit system call security policies, if available.
	ZST_InitPolicies(ZSTPolicyFileName.c_str());

	(void) memset(OptCount, 0, sizeof(OptCount));
	(void) memset(AnnotationCount, 0, sizeof(AnnotationCount));

	try { // We will catch memory exhaustion errors.

	// Record the start and end addresses for all function entry
	//  chunks in the program.
	FuncBounds.reserve(10 + get_func_qty());
jdh8d's avatar
jdh8d committed
	for (size_t FuncIndex = 0; FuncIndex < SMP_get_func_qty(); ++FuncIndex) {
		STARS_Function_t *FuncInfo = SMP_getn_func(FuncIndex);
jdh8d's avatar
jdh8d committed
		temp.startEA = FuncInfo->get_startEA();
		temp.endEA = FuncInfo->get_endEA();
jdh8d's avatar
jdh8d committed
	FindDataInCode();
	FixCodeIdentification();
jdh8d's avatar
jdh8d committed
	delete prof_info;
	return;
#endif

	// Pre-audit the IDA database by seeing if the distinction
	//  between code and data can be improved, and if all branches
	//  and calls have proper code targets and code cross references.
#if SMP_FIXUP_IDB
jdh8d's avatar
jdh8d committed
	FixupIDB();
	if (0 < prof_info->GetProfilerAnnotationCount()) {
		SMP_msg("Calling InferDataGranularity\n");
		SMP_msg("ptr to MemoryAccessInfo: %p\n", prof_info->GetMemoryAccessInfo());
		prof_info->GetMemoryAccessInfo()->InferDataGranularity();
		SMP_msg("Returned from InferDataGranularity\n");
	CurrProg->ProfGranularityFinished(AnnotFile, InfoAnnotFile);
	CurrProg->Analyze(prof_info, AnnotFile, InfoAnnotFile);
	if (!STARS_PerformReducedAnalysis) {
		CurrProg->EmitAnnotations(AnnotFile, InfoAnnotFile);
	}
#if SMP_DEBUG_CODE_ORPHANS
	RecentAddr = BADADDR;
jdh8d's avatar
jdh8d committed
	for (STARS_Segment_t *seg = SMP_get_first_seg(); NULL != seg; seg = SMP_get_next_seg(RecentAddr)) {
		RecentAddr = seg->get_startEA();
		if (seg->IsCodeSegment())
			Debug_FindOrphanedCode(seg, true);
	}
#endif

clc5q's avatar
clc5q committed
#if SMP_FIND_ORPHANS
jdh8d's avatar
jdh8d committed
	for (STARS_Segment_t *seg = SMP_get_first_seg(); NULL != seg; seg = SMP_get_next_seg(RecentAddr)) {
		RecentAddr = seg->get_startEA();
		if (seg->IsCodeSegment())
			FindOrphanedCode(seg, AnnotFile, InfoAnnotFile);
	for (int OptType = 0; OptType <= LAST_OPT_CATEGORY; ++OptType) {
		SMP_msg("Optimization Category Count %d:  %d Annotations: %d\n",
			OptType, OptCount[OptType], AnnotationCount[OptType]);
	}

	SMP_fprintf(InfoAnnotFile, "   8000000      2 SUCCESS ANALYSISCOMPLETED\n");
	SMP_fclose(InfoAnnotFile);
	SMP_fclose(ZST_AlarmFile);
#if ZST_EMIT_SPARK_ADA_TRANSLATION
	SMP_fclose(ZST_SPARKSourceFile);
	SMP_fclose(ZST_SPARKHeaderFile);
	if (!STARS_PerformReducedAnalysis) {
		AsmFile = SMP_fopen(AsmFileName.c_str(), "w");
		if (NULL == AsmFile) {
			error("FATAL ERROR: Cannot open ASM file %s\n", AsmFileName.c_str());
		}
		else {
			int FileResult = gen_file(OFILE_ASM, AsmFile, LowestCodeAddress, 0xffffffff, (GENFLG_MAPSEG | GENFLG_ASMTYPE));
			if (0 >= FileResult) {
				SMP_msg("ERROR: Could not generate ASM file.\n");
			}
			SMP_fclose(AsmFile);
clc5q's avatar
clc5q committed
		}
#endif
#if STARS_GENERATE_DIF_FILE
		DifFile = SMP_fopen(DifFileName.c_str(), "w");
		if (NULL == DifFile) {
			error("FATAL ERROR: Cannot open DIF file %s\n", DifFileName.c_str());
		}
		else {
			int FileResult = gen_file(OFILE_DIF, DifFile, LowestCodeAddress, BADADDR, 0);
			if (0 >= FileResult) {
				SMP_msg("ERROR: Could not generate DIF file.\n");
			}
			SMP_fclose(DifFile);
clc5q's avatar
clc5q committed
#endif
	}
	SMP_msg("INFO: Files closed, freeing memory and exiting.\n");
jdh8d's avatar
jdh8d committed
	delete prof_info;
	SMP_msg("INFO: Deleted prof_info.\n");
	delete CurrProg;
	SMP_msg("INFO: Deleted CurrProg. Returning to IDA Pro.\n");
	}
	catch (std::bad_alloc) {
		delete CurrProg;
		delete prof_info;
		error("FATAL ERROR: Memory exhausted.\n");
		SMP_fprintf(InfoAnnotFile, "   8000000      2 ERROR MEMORYEXHAUSTED\n");
		SMP_fclose(AnnotFile);
		SMP_fclose(InfoAnnotFile);
		SMP_fclose(ZST_AlarmFile);
char IDAP_comment[] = "ZephyrSoftware STARS (Static Analyzer for Reliability and Security)";
char IDAP_help[] = "Good luck";
char IDAP_name[] = "SMPStaticAnalyzer";
char IDAP_hotkey[] = "Alt-J";

plugin_t PLUGIN = {
	IDP_INTERFACE_VERSION,
	PLUGIN_PROC | PLUGIN_MOD,
	IDAP_init,
	IDAP_term,
	IDAP_run,
	IDAP_comment,
	IDAP_help,
	IDAP_name,
	IDAP_hotkey
};

// Find all code addresses in the IDA database and enter them into
//  IDAProLocs. Find all code addresses identified by the external
//  disassembler (e.g. objdump) and enter them into DisasmLocs.
void FindCodeAddresses(void) {
	// Read in code addresses as found by an external disassembler.
	ea_t CurrDisasmAddr;
	string DisasmFileName(RootFileName);
	string FileSuffix(".SMPobjdump");
	DisasmFileName += FileSuffix;
	FILE *DisasmFile = SMP_fopen(DisasmFileName.c_str(), "r");
	if (NULL == DisasmFile) {
		error("FATAL: Cannot open input file %s\n", DisasmFileName.c_str());
		return;
	}

#define DISASM_RESERVE_SIZE  50000
	DisasmLocs.reserve(DISASM_RESERVE_SIZE);
	unsigned long TempAddr;
	int ScanReturn = qfscanf(DisasmFile, "%lx", &TempAddr);
	CurrDisasmAddr = (ea_t) TempAddr;
	while (1 == ScanReturn) {
		int NextChar;
		DisasmLocs.push_back(CurrDisasmAddr);
		// Swallow the rest of the input line and get the next address.
		do {
			NextChar = qfgetc(DisasmFile);
		} while ((EOF != NextChar) && ('\n' != NextChar));
		ScanReturn = qfscanf(DisasmFile, "%lx", &TempAddr);
		CurrDisasmAddr = (ea_t) TempAddr;
	} // end while (1 == ScanReturn)
	if (0 >= DisasmLocs.size()) {
		SMP_msg("ERROR: No addresses read from %s\n", DisasmFileName.c_str());
		SMP_fclose(DisasmFile);
		DisasmLocs.clear();
		SMP_msg("%zu Disasm addresses read from %s\n", DisasmLocs.size(),
	}

	// Find all the code locs in the IDA Pro database. As we find
	//  them, store them in IDAProLocs.
	ea_t RecentAddr = BADADDR;
jdh8d's avatar
jdh8d committed
	for (STARS_Segment_t *seg = SMP_get_first_seg(); NULL != seg; seg = SMP_get_next_seg(RecentAddr)) {
		RecentAddr = seg->get_startEA();
		if (!seg->IsCodeSegment())
jdh8d's avatar
jdh8d committed
		for (ea_t addr = seg->get_startEA(); addr < seg->get_endEA(); addr = get_item_end(addr)) {
			flags_t InstrFlags = getFlags(addr);
			if (isHead(InstrFlags) && isCode(InstrFlags)) {
				IDAProLocs.push_back(addr);
clc5q's avatar
clc5q committed
#if 0
				if ((0x806cda4 <= addr) && (0x806cf99 >= addr))
clc5q's avatar
clc5q committed
#endif
			} // end if (isHead(addr) && isCode(addr)
#if SMP_DEBUG_FIXUP_IDB
			else if ((0x806cda4 <= addr) && (0x806cf99 >= addr)) {
				if (!isHead(InstrFlags))
					SMP_msg("Weirdness: not isHead at %x\n", addr);
					SMP_msg("Weirdness: isUnknown at %x\n", addr);
	} // end for all segments
	return;
} // end FindCodeAddresses()

// Return true if addr is not a proper beginning address for an instruction.
// Return false otherwise.
// Currently, we claim that an instruction is misaligned if DisasmLocs does
//  not contain it. This function is useful for dealing with errors in IDA
//  code identification, in which a large code section is identified as data,
//  but some instructions in the middle of the "data" are identified as
//  code but IDA often starts on the wrong boundary in these cases.
bool IsCodeMisaligned(ea_t addr) {
	// Do a binary search for addr within DisasmLocs, which is sorted
	//  in ascending address order because of the way in which it was
	//  generated.
	size_t min = 0;
	size_t max = DisasmLocs.size();  // don't access DisasmLocs[max]
	size_t index = (min + max) / 2;
	
	while (addr != DisasmLocs[index]) {
		if (min >= (max - 1))
			return true;
#if 0
		SMP_msg("min: %d max: %d index: %d\n", min, max, index);
#endif
		if (addr < DisasmLocs[index])
			max = index;
		else // must be addr > DisasmLocs[index];
			min = index;
	
		index = (min + max) / 2;
	}

	return false;
} // end of IsCodeMisaligned()

void RemoveIDACodeAddr(ea_t addr) {
	// Do a binary search for addr within IDAProLocs, which is sorted
	//  in ascending address order because of the way in which it was
	//  generated. Delete the element of IDAProLocs if found.
	size_t min = 0;
	size_t max = IDAProLocs.size();  // don't access IDAProLocs[max]
	size_t index = (min + max) / 2;
	
	while (addr != IDAProLocs[index]) {
		if (min >= (max - 1))
			return;
#if 0
		SMP_msg("min: %d max: %d index: %d\n", min, max, index);
#endif
		if (addr < IDAProLocs[index])
			max = index;
		else // must be addr > IDAProLocs[index];
			min = index;
	
		index = (min + max) / 2;
	}

	// IDAProLocs[index] contains addr.
	vector<ea_t>::iterator RemovalIterator = IDAProLocs.begin();
	RemovalIterator += index;
	RemovalIterator = IDAProLocs.erase(RemovalIterator);
	return;
} // end of RemoveIDACodeAddr()

// Driver for all other fixing functions. Upon its return, the IDA
//  database (IDB file) should be fixed up as much as we can fix it.
void FixupIDB(void) {
	FindCodeAddresses();
#if SMP_DEBUG_FIXUP_IDB
	SpecialDebugOutput();
#endif
	AuditCodeTargets();
	FindDataInCode();
	AuditTailChunkOwnership();
	if (DisasmLocs.size() > 0) {
		FixCodeIdentification();
		int fixes = FixupNewCodeChunks();
		SpecialDebugOutput();
	}
	DisasmLocs.clear();
	IDAProLocs.clear();
} // end of FixupIDB()

// Find and print all data head addresses in code segments. 
// If an isolated code instruction is found in the midst of a run
//  of data bytes and has no code xrefs jumping to it, it is not
//  reachable as code and is undoubtedly a mixup by IDA. Possibly
//  the whole data region will be converted to code later, in which
//  case the isolated code is not necessarily properly aligned and
//  parsed at its present address, so we are glad to convert it into
//  data anyway so that FindDataToConvert() will succeed on it later.
// Data to code conversion, and isolated code detection, are inhibited
//  by IDA identifying several consecutive instructions in the midst
//  of a data region, with the code addresses not agreeing with the
//  external disassembler's code addresses. We will convert these
//  misaligned instructions to data as we detect them. We will also
//  convert unexplored bytes (isUnknown(flags) == true) into data if
//  they are in the midst of a data sequence.
#define MIN_DATARUN_LEN 24  // #bytes on either side of "isolated" code
void FindDataInCode(void) {
	size_t DataRunLen = 0; // How many data bytes in a row have we seen?
	bool IsolatedCodeTrigger = false; // Have seen data, then isolated code
									// Now looking for data
	ea_t IsolatedCodeAddr;
	int IsolatedCodeLen;
	int InstrLen;
	ea_t RecentAddr = BADADDR;
jdh8d's avatar
jdh8d committed
	for (STARS_Segment_t *seg = SMP_get_first_seg(); NULL != seg; seg = SMP_get_next_seg(RecentAddr)) {
		RecentAddr = seg->get_startEA();
		if (!seg->IsCodeSegment())
		ssize_t SegNameSize = SMP_get_segm_name(seg, SegName, sizeof(SegName) - 1);
		SMP_msg("Non-code addresses for code segment %s from %x to %x\n",
jdh8d's avatar
jdh8d committed
		for (ea_t addr = seg->get_startEA(); addr < seg->get_endEA(); addr = get_item_end(addr)) {
			flags_t AddrFlags = getFlags(addr);
			if (isHead(AddrFlags)) {
				if (isData(AddrFlags)) {
					DataRunLen += get_item_size(addr);
#if SMP_DEBUG_FIXUP_IDB
#endif
					if (MIN_DATARUN_LEN <= DataRunLen) {
						if (IsolatedCodeTrigger) {
							// Saw data, then one isolated code, then data
							do_unknown_range(IsolatedCodeAddr, IsolatedCodeLen, DOUNK_SIMPLE);
							RemoveIDACodeAddr(IsolatedCodeAddr);
							if (do_data_ex(IsolatedCodeAddr, byteflag(),
								IsolatedCodeLen, BADNODE)) {
									SMP_msg("Converted isolated code to data: %lx\n",
										(unsigned long) IsolatedCodeAddr);
								SMP_msg("Failed to convert isolated code to data: %lx len: %x\n",
									(unsigned long) IsolatedCodeAddr, IsolatedCodeLen);
							}
							IsolatedCodeTrigger = false;
						} // end if (IsolatedCodeTrigger)
					} // end if (MIN_DATARUN_LEN <= DataRunLen)
				} // end if (isData(AddrFlags)
				else if (isUnknown(AddrFlags)) {
					// Just in case; unknown usually means not head or tail
					// If in a data run, convert to data.
					InstrLen = get_item_size(addr);
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
					SMP_msg("Unknown: %x len: %x\n", addr, InstrLen);
clc5q's avatar
clc5q committed
#endif
					if (0 < DataRunLen) {
						if (do_data_ex(addr, byteflag(), InstrLen, BADNODE)) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
							SMP_msg("Converted unknown to data at %x len: %x\n", addr, InstrLen);
clc5q's avatar
clc5q committed
#endif
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
							SMP_msg("Failed to convert unknown to data at %x len: %x\n", addr, InstrLen);
clc5q's avatar
clc5q committed
#endif
							DataRunLen = 0;
							IsolatedCodeTrigger = false;
						}
					}
				}
				else if (isCode(AddrFlags)) {  // must be true
					if (MIN_DATARUN_LEN <= DataRunLen) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
						SMP_msg("DataRunLen: %d at %x\n", DataRunLen, addr);
						InstOK = SMPGetCmd(addr, LocalCmd, LocalFeatures);
						assert(InstOK);

						InstrLen = (int) LocalCmd.size;

						// We don't check the returned InstrLen for validity because IsCodeMisaligned()
						//  will check for validity immediately below.
						SMP_msg("Calling IsCodeMisaligned: len %d\n", InstrLen);
#endif
						if (IsCodeMisaligned(addr)) {
#if SMP_DEBUG_FIXUP_IDB
#endif
							do_unknown_range(addr, InstrLen, DOUNK_SIMPLE);
							RemoveIDACodeAddr(addr);
							if (do_data_ex(addr, byteflag(), InstrLen, BADNODE)) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
								SMP_msg("Converted misaligned code to data at %x : len: %x\n",
clc5q's avatar
clc5q committed
#endif
								// Step back so data gets processed.
								DataRunLen += get_item_size(addr);
								continue; // skip reset of DataRunLen
							}
							else {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
								SMP_msg("Misaligned code left as unknown at %x : len: %x\n",
clc5q's avatar
clc5q committed
#endif
								IsolatedCodeTrigger = false;
							}
						} // end if (IsCodeMisaligned() ...)
						else if (!hasRef(AddrFlags)) {
							// No references at all --> isolated code.
							IsolatedCodeTrigger = true;
							IsolatedCodeAddr = addr;
							IsolatedCodeLen = InstrLen;
						}
						else {
							SMP_xref_t xb;
							bool ok = xb.SMP_first_to(addr, XREF_ALL);
							if (!ok) {
								// No code xrefs to this target addr.
								IsolatedCodeTrigger = true;
								IsolatedCodeAddr = addr;
								IsolatedCodeLen = InstrLen;
							}
						}
					} // end if (MIN_DATARUN_LEN <= DataRunLen)
					else if (IsolatedCodeTrigger) {
						// Two instructions in a row does not fit the pattern.
						IsolatedCodeTrigger = false;
					}
					DataRunLen = 0;
				} // end if (isData) ... else if (isUnknown) ... else isCode
			} // end if (isHead)
			else if (isUnknown(AddrFlags)) {
				// If in a data run, convert to data.
				InstrLen = get_item_size(addr);
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
				SMP_msg("Unknown: %x len: %x\n", addr, InstrLen);
clc5q's avatar
clc5q committed
#endif
				if (0 < DataRunLen) {
					if (do_data_ex(addr, byteflag(), InstrLen, BADNODE)) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
						SMP_msg("Converted unknown to data at %x len: %x\n", addr, InstrLen);
clc5q's avatar
clc5q committed
#endif
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
						SMP_msg("Failed to convert unknown to data at %x len: %x\n", addr, InstrLen);
clc5q's avatar
clc5q committed
#endif
						DataRunLen = 0;
						IsolatedCodeTrigger = false;
					}
				}
			}
		} // end for (ea_t addr =  seg->startEA; ...)
	} // end for all segments
	return;
} // end of FindDataInCode()


// The choices that IDA makes for deciding which parent function of a
//  TAIL chunk is the primary owner of the tail can be counterintuitive.
//  A function entry can both fall into and jump to a tail chunk that
//  is contiguous with it, yet the "owner" might be a function that is
//  far below it in the executable address space. This function will
//  change the ownership to a more sensible arrangement.
void AuditTailChunkOwnership(void) {
jdh8d's avatar
jdh8d committed
	SMP_AuditTailChunkOwnership();
} // end of AuditTailChunkOwnership()

// If the addresses signified from DisasmIndex to IDAProIndex are
//  all considered data and do NOT follow a return instruction,
//  return false and update AreaSize to reflect the area to be
//  converted.
// Return value: true -> skip to IDAProIndex; false -> convert AreaSize bytes.
bool FindDataToConvert(size_t IDAProIndex, size_t DisasmIndex, int &AreaSize) {
	ea_t PrevIDAAddr;
	ea_t NextIDAAddr;
	size_t ShadowDisasmIndex = DisasmIndex - 1;
	ea_t DisasmAddr = DisasmLocs[ShadowDisasmIndex];
	bool CannotConvert = false;  // return value
	bool DebugAddress = false;
#if SMP_DEBUG_FIXUP_IDB
	DebugAddress = (DisasmAddr == 0x806c19a);
#endif

	if (DebugAddress) {
		SMP_msg("IDAProIndex: %zu DisasmIndex: %zu\n", IDAProIndex, DisasmIndex);
		SMP_msg("IDA locs size %zu Disasm locs size %zu\n", IDAProLocs.size(),
			DisasmLocs.size());
	}
	if (IDAProIndex >= IDAProLocs.size()) {
		// Have already processed the last IDA address.
		if (DebugAddress) SMP_msg(" Already done with IDAProLocs.\n");
		return true;
	}
	else if (DisasmIndex >= DisasmLocs.size()) {
		// Strange. Last Disasm address is only one to convert, and
		//  IDA still has addresses after that?
		if (DebugAddress) SMP_msg(" Already done with DisasmLocs.\n");
		return true;
	}
	else if (IDAProIndex < 2) {
		// We have Disasm addrs before the very first IDA addr. We
		//  don't trust this boundary case.
		if (DebugAddress) SMP_msg(" Boundary case with IDAProLocs.\n");
		return true;
	}
	NextIDAAddr = IDAProLocs[IDAProIndex - 1];
	PrevIDAAddr = IDAProLocs[IDAProIndex - 2];
	if (DebugAddress) SMP_msg(" PrevIDAAddr: %lx NextIDAAddr: %lx\n", (unsigned long) PrevIDAAddr, (unsigned long) NextIDAAddr);

	// See if previous IDA address was a return.
	flags_t PrevFlags = getFlags(PrevIDAAddr);
	if (!isCode(PrevFlags) || !isHead(PrevFlags)) {
		SMP_msg("PrevIDAAddr %lx not isCode or not isHead.\n", (unsigned long) PrevIDAAddr);
		return true;
	}
	SMPInstr PrevInstr(PrevIDAAddr);
	PrevInstr.Analyze();
	if (DebugAddress) SMP_msg("Finished PrevInstr.Analyze()\n");
	if (PrevInstr.MDIsReturnInstr()) {
		// Right after a return come no-ops and 2-byte no-ops
		//  that are just for alignment. IDA does not seem to be
		//  happy when we convert all those to code.
		if (DebugAddress) SMP_msg(" Data followed a return instruction.\n");
		return true;
	}
	// Now, see if the area from DisasmAddr to NextIDAAddr is all data
	//  according to IDA.
	while (DisasmAddr < NextIDAAddr) {
		flags_t DataFlags = getFlags(DisasmAddr);
		if (isTail(DataFlags)) {
			if (DebugAddress) SMP_msg(" tail byte: %lx\n", (unsigned long) DisasmAddr);
			DisasmAddr = get_item_end(DisasmAddr);
		}
		else if (isData(DataFlags)) {
			if (DebugAddress) SMP_msg(" data byte: %lx\n", (unsigned long) DisasmAddr);
			DisasmAddr = get_item_end(DisasmAddr);
		}
		else if (isCode(DataFlags)) {
			// How could this ever happen?
			if (DebugAddress) SMP_msg(" isCode: %lx\n", (unsigned long) DisasmAddr);
			return true;
		}
		else { // must be isUnknown()
			// Very conservative here; only want to convert when the whole
			//  region is data, because that is a symptom of IDA missing
			//  a piece of code within a function (usually a piece of code
			//  that is only reachable via an indirect jump).
			if (DebugAddress) SMP_msg(" Not isData: %lx\n", (unsigned long) DisasmAddr);
		if (DebugAddress) SMP_msg(" new DisasmAddr: %lx\n", (unsigned long) DisasmAddr);
	if (DebugAddress) SMP_msg(" loop exit CannotConvert: %d\n", CannotConvert);
	if (!CannotConvert) {
		// Success.
		DisasmAddr = DisasmLocs[ShadowDisasmIndex];
		AreaSize = NextIDAAddr - DisasmAddr;
		if (DebugAddress) { 
			SMP_msg(" Success! AreaSize: %x Old index: %zu new index: %zu\n",
			SMP_msg(" exiting FindDataToConvert()\n");
			SMP_msg("\n");
		}
	} // end if (!CannotConvert)
	return CannotConvert;
} // end of FindDataToConvert()

// Does a converted code region look like a function prologue? If so,
//  we should not include it in the previous function.
bool IsFunctionPrologue(ea_t StartAddr, ea_t EndAddr) {
	return false;  // **!!** TODO 
} // end of IsFunctionPrologue()

// Patch program bytes that could not be converted from
//  data to code, if it can be determined that the bytes represent code
//  that IDA has a hard time with.
// Currently limited to finding "call near ptr 0" instructions, which
//  often are found in optimized glibc code because gcc was able to
//  determine that a function pointer was zero and did constant propagation,
//  but unfortunately was not able to determine that the code was unreachable.
//  IDA will not succeed in ua_code() for "call 0", but there is no danger
//  of a working program ever executing this code. Replacing the call with
//  no-ops permits us to continue converting a contiguous range of data to
//  code, and permits IDA to reanalyze the function later.
// Returns true if program bytes were patched.
bool MDPatchUnconvertedBytes(ea_t CurrDisasmAddr) {
	flags_t AddrFlags = getFlags(CurrDisasmAddr);
	if (isData(AddrFlags) || isTail(AddrFlags)) {
		// Bytes should have been converted to unknown already.
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
		SMP_msg("Cannot patch data bytes or tail bytes at %x\n", CurrDisasmAddr);
clc5q's avatar
clc5q committed
#endif
		return false;
	}
	SMPInstr PatchInstr(CurrDisasmAddr);
	PatchInstr.Analyze();
	int InstrLen = PatchInstr.GetCmd().size;
	if (0 >= InstrLen) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
		SMP_msg("decode_insn() failed on patch location %x\n", CurrDisasmAddr);
clc5q's avatar
clc5q committed
#endif
		return false;
	}
	else {
		if (PatchInstr.GetCmd().itype != NN_call) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
			SMP_msg("Cannot patch non-call instruction at %x\n", CurrDisasmAddr);
clc5q's avatar
clc5q committed
#endif
		STARSOpndType CallDest = PatchInstr.GetFirstUse()->GetOp();
		if ((o_near != CallDest.type) || (0 != CallDest.addr)) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
			SMP_msg("Cannot patch call unless it is call near ptr 0 at %x",
clc5q's avatar
clc5q committed
#endif
			return false;
		}
		ea_t PatchAddr = CurrDisasmAddr;
		for (int i = 0; i < InstrLen; ++i) {
			bool ok = patch_byte(PatchAddr, 0x90);  // x86 no-op
			if (!ok) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
				SMP_msg("patch_byte() failed at %x\n", PatchAddr);
clc5q's avatar
clc5q committed
#endif
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
		SMP_msg("Patched %d bytes successfully at %x\n", InstrLen, CurrDisasmAddr);
clc5q's avatar
clc5q committed
#endif
#if IDA_SDK_VERSION < 600
		InstrLen = ua_code(CurrDisasmAddr);
#else
		InstrLen = create_insn(CurrDisasmAddr);
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
			SMP_msg(" ... but ua_code() still failed!\n");
clc5q's avatar
clc5q committed
#endif
			return false;
		}
	} // end if (0 >= InstrLen) ... else ...
	return true;
} // end of MDPatchUnconvertedBytes()

// Use the lists of code addresses identified by IDA Pro (in IDAProLocs)
//  and an external disassembler (in DisasmLocs). Compare the lists and
//  try to convert addresses to code that are found in DisasmLocs but
//  not in IDAProLocs. Emit warnings when IDAProLocs has a code address
//  not found in DisasmLocs.
void FixCodeIdentification(void) {
	size_t DisasmIndex = 0;
	ea_t CurrDisasmAddr = DisasmLocs[DisasmIndex++];
	size_t IDAProIndex = 0;
	ea_t CurrAddr = IDAProLocs[IDAProIndex++];

	while (DisasmIndex <= DisasmLocs.size()) {
		// If the current address is less than the current
		//  external disasm address, we have the rare case in
		//  which IDA Pro has identified an address as code
		//  but the external disasm has not. Emit a warning
		//  message and go on to the next IDA address.
		if (CurrAddr < CurrDisasmAddr) {
			SMPInstr TempInstr(CurrAddr);
			TempInstr.Analyze();
			SMP_msg("AUDIT: Address %lx is code in IDB but not in external disassembler: %s\n",
				(unsigned long) CurrAddr, TempInstr.GetDisasm());
			if (IDAProIndex < IDAProLocs.size())
				CurrAddr = IDAProLocs[IDAProIndex++];
			else {
				// Last IDA addr; might still process Disasm addrs
				//  after loop exit.
				break;
			}
		}
		else if (CurrAddr == CurrDisasmAddr) {
			// If equal, no problem, we are moving through the
			//  code addresses in lockstep. Grab the next address
			//  from each source.
			if (DisasmIndex < DisasmLocs.size()) {
				CurrDisasmAddr = DisasmLocs[DisasmIndex++];
			}
			else {
				++DisasmIndex;  // cause loop exit; skip cleanup loop
			}
			if (IDAProIndex < IDAProLocs.size())
				CurrAddr = IDAProLocs[IDAProIndex++];
			else {
				// Last IDA addr; might still process Disasm addrs
				//  after loop exit in cleanup loop.
				break;
			}
		}
		else {
			// We must have CurrAddr > CurrDisasmAddr. That means
			//  IDA has jumped over some code addresses in
			//  DisasmLocs. We need to try to convert addresses
			//  to code until we can reach the current addr.
			// For now, we will address only the case in which IDA
			//  has identified addresses as data bytes, and the
			//  external disassembler(e.g. objdump) has identified
			//  the same addresses as code. We only want to deal with
			//  contiguous areas of data-to-code conversion that do NOT
			//  follow a return statement.
			int AreaSize = 0;
			ea_t AreaStart = CurrDisasmAddr;
			ea_t AreaEnd;
#if SMP_DEBUG_FIXUP_IDB
			SMP_msg("CurrDisasmAddr: %x  CurrAddr: %x\n", CurrDisasmAddr, CurrAddr);
#endif
			bool SkipArea = FindDataToConvert(IDAProIndex, DisasmIndex, AreaSize);
			if (SkipArea) {
				// Skip over the extra external disasm addresses.
				while (CurrDisasmAddr < CurrAddr)
					CurrDisasmAddr = DisasmLocs[DisasmIndex++];
			}
			else { 
				// Convert the overlooked code region to unexplored.
				AreaEnd = CurrDisasmAddr + AreaSize;
#if SMP_DEBUG_FIXUP_IDB
				SMP_msg("Found data to convert: %x to %x\n", AreaStart, AreaEnd);
#endif
				do_unknown_range(AreaStart, AreaSize, DOUNK_SIMPLE);
				SMP_bounds_t ConvertRegion;
				ConvertRegion.startEA = AreaStart;
				ConvertRegion.endEA = AreaEnd;
				FixupRegion CurrRegion(ConvertRegion);
				CodeReanalyzeList.push_back(CurrRegion);
				do {
					flags_t InstrFlags = getFlags(CurrDisasmAddr);
					if (!isUnknown(InstrFlags)) {
						SMP_msg("Sync problem in FixCodeID: %lx\n", (unsigned long) CurrDisasmAddr);
#if IDA_SDK_VERSION < 600
						int InstrLen = ua_code(CurrDisasmAddr);
						int InstrLen = create_insn(CurrDisasmAddr);
						if (InstrLen > 0) { // Successfully converted to code
							SMPInstr NewInstr(CurrDisasmAddr);
							NewInstr.Analyze();
							SMP_msg("FixCodeID success at %x: len: %d %s\n", CurrDisasmAddr,
#endif
						}
						else {
							if (MDPatchUnconvertedBytes(CurrDisasmAddr)) {
clc5q's avatar
clc5q committed
								;
#if SMP_DEBUG_FIXUP_IDB
								SMP_msg(" Patched bytes at %x\n", CurrDisasmAddr);
clc5q's avatar
clc5q committed
#endif
								CurrRegion.FixupInstrs.push_back(CurrDisasmAddr);
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
								SMP_msg("FixCodeID failure at %x\n", CurrDisasmAddr);
clc5q's avatar
clc5q committed
#endif
							}
						}
					} // end if (isCode(InstrFlags) ... else ...
					if (DisasmIndex < DisasmLocs.size()) {
						CurrDisasmAddr = DisasmLocs[DisasmIndex++];
					}
					else {
						// cause loops to exit
						CurrDisasmAddr = CurrAddr;
						++DisasmIndex; // skip cleanup loop
					}
				} while (CurrDisasmAddr < CurrAddr);
				if (AllConverted && AllNops) {
					// We want to convert the region back to unexplored bytes
					//  and take it off the work list. Regions that are all nops
					//  create data flow analysis problems sometimes. The region
					//  is often unreachable code and produces a basic block with
					//  no predecessors within a function. This often happens when
					//  an optimizing compiler uses nops as padding to align jump
					//  targets on cache line bounaries. With no fall through into
					//  the nops, they are unreachable and should be left as unknown.
					SMP_msg("FixCodeID nops region from %x to %x\n", CurrRegion.GetStart(),
					do_unknown_range(CurrRegion.GetStart(),
						CurrRegion.GetEnd() - CurrRegion.GetStart(), DOUNK_SIMPLE);
					CodeReanalyzeList.pop_back();
				}
			} // end if (SkipArea) ... else ...
		} // end if (addr < CurrDisasmAddr) .. else if ... else ...
	} // end while (DisasmIndex <= DisasmLocs.size()

#if 0  // Make this code use FindDataToConvert()  **!!**
	// Cleanup loop:
	// If there are still Disasm addrs to process, try to turn them
	//  into code in the IDB.
	while (DisasmIndex <= DisasmLocs.size()) {
		flags_t InstrFlags = getFlags(CurrDisasmAddr);
		if (isCode(InstrFlags)) {
			SMP_msg("Sync problem in FixCodeID: %x\n", CurrDisasmAddr);
			segment_t *seg = SMP_getseg(CurrDisasmAddr);
			if (SEG_CODE == seg->type) {
				do_unknown_range(CurrDisasmAddr, seg->endEA - CurrDisasmAddr, DOUNK_SIMPLE);
			}
			else {
				// Might be safest to just discontinue processing
				//  if we wander into a non-code segment.
				//  DisasmLocs should not have an entire code segment
				//  that IDA Pro missed.
				break;
			}
			int InstrLen = ua_code(CurrDisasmAddr);
			if (InstrLen > 0) { // Successfully converted to code
				SMPInstr NewInstr(CurrDisasmAddr);
				NewInstr.Analyze();
				SMP_msg("FixCodeID success at %x: %s\n", CurrDisasmAddr,
				SMP_msg("FixCodeID failure at %x\n", CurrDisasmAddr);
			}
		} // end if (isCode(InstrFlags) ... else ...
		if (DisasmIndex < DisasmLocs.size()) {
			CurrDisasmAddr = DisasmLocs[DisasmIndex++];
		}
		else {
			++DisasmIndex; // cause loop to exit
		}
	} // end while (DisasmIndex <= DisasmLocs.size()
#endif

	return;
} // end of FixCodeIdentification()
// Analyze instructions that could not be analyzed earlier and were placed on the CodeReanalyzeList.
//  Earlier failures are usually because the instruction branches to an address that has not
//  yet been converted from data to code, so ua_code() failed. Now that all data to code
//  conversions have completed, ua_code() should succeed.
// Return the number of instructions successfully analyzed.
int FixupNewCodeChunks(void) {
	list<FixupRegion>::iterator CurrRegion;
	int changes = 0;
	for (CurrRegion = CodeReanalyzeList.begin(); CurrRegion != CodeReanalyzeList.end(); ++CurrRegion) {
		bool AllConverted = true;
		bool AllNops = true;
		bool NoFixups = (0 == CurrRegion->FixupInstrs.size());
		if (NoFixups) {
			CurrRegion->SetStart(BADADDR);  // mark for removal
			continue;  // skip to next region
		}
		list<ea_t>::iterator CurrInstr;
		for (CurrInstr = CurrRegion->FixupInstrs.begin(); CurrInstr != CurrRegion->FixupInstrs.end(); ++CurrInstr)  {
#if IDA_SDK_VERSION < 600
			int InstrLen = ua_code(*CurrInstr);
#else
			int InstrLen = create_insn(*CurrInstr);
			if (InstrLen > 0) { // Successfully converted to code
				SMPInstr NewInstr(*CurrInstr);
				NewInstr.Analyze();
#if SMP_DEBUG_FIXUP_IDB
				SMP_msg("FixupNewCodeChunks success at %x: len: %d\n", *CurrInstr, InstrLen);
					AllNops = false;
					*CurrInstr = BADADDR; // mark for removal
				}
			}
			else {
				AllConverted = false;
#if SMP_DEBUG_FIXUP_IDB
				SMP_msg("FixupNewCodeChunks failure at %x\n", *CurrInstr);
#endif
			}
		} // end for all instrs in CurrRegion
		if (AllConverted && !AllNops) {
			SMP_msg("FixupNewCodeChunks success for region from %x to %x\n",
				CurrRegion->GetStart(), CurrRegion->GetEnd());
#endif
			CurrRegion->SetStart(BADADDR); // mark for removal
		}
		else if (AllConverted && AllNops) {
#if SMP_DEBUG_FIXUP_IDB
			SMP_msg("FixupNewCodeChunks re-converting nops region from %x to %x\n",
				CurrRegion->GetStart(), CurrRegion->GetEnd());
#endif
			do_unknown_range(CurrRegion->GetStart(),
				CurrRegion->GetEnd() - CurrRegion->GetStart(), DOUNK_SIMPLE);
			CurrRegion->SetStart(BADADDR); // mark for removal
		}
		else {
			// Remove only the instructions that were fixed up.
			CurrInstr = CurrRegion->FixupInstrs.begin(); 
			while (CurrInstr != CurrRegion->FixupInstrs.end()) {
				if (BADADDR == *CurrInstr) {
					CurrInstr = CurrRegion->FixupInstrs.erase(CurrInstr);
				}
				else {
					++CurrInstr;
				}
			}
		}
	} // end for all regions in the CodeReanalyzeList

	// Remove completed regions from the CodeReanalyzeList
	CurrRegion = CodeReanalyzeList.begin();
	while (CurrRegion != CodeReanalyzeList.end()) {
		if (BADADDR == CurrRegion->GetStart())
			CurrRegion = CodeReanalyzeList.erase(CurrRegion);
		else
			++CurrRegion;
	}

#if 0
	if (AllConverted) {
					if (IsFunctionPrologue(AreaStart, AreaEnd)) {
						// Create a new function entry chunk here.
						//  **!!** TODO
						;
					}
					else {
						// Extend the previous chunk to include the
						//  converted code.
						ea_t PrevIDAAddr = IDAProLocs[IDAProIndex - 2];
						STARS_Function_t *PrevChunk = get_fchunk(PrevIDAAddr);
						SMP_msg(" addr in chunk to extend: %x\n", PrevIDAAddr);
						SMP_msg(" STARS_Function_t pointer for chunk: %x\n", PrevChunk);
#endif
#if 0  // temporary for debugging
						if (is_func_entry(PrevChunk)) {
							// Extend the func entry to contain the new code.
							if (func_setend(PrevIDAAddr, AreaEnd)) {
								SMP_msg("Func extended to include code from %x to %x\n",
									AreaStart, AreaEnd);
								FuncReanalyzeList.push_back(PrevIDAAddr);
							}
							else {
								SMP_msg("Failed to extend func from %x to %x\n",
									AreaStart, AreaEnd);
							}
						}
						else { // tail
							// See if this works for function tails, also.
							// Extend the func entry to contain the new code.
							if (func_setend(PrevIDAAddr, AreaEnd)) {
								SMP_msg("Tail extended to include code from %x to %x\n",
								STARS_Function_t *TailOwner = get_func(PrevChunk->owner);
								FuncReanalyzeList.push_back(PrevIDAAddr);
							}
							else {
								SMP_msg("Failed to extend tail from %x to %x\n",
									AreaStart, AreaEnd);
							}
						} // end if (is_func_entry()) ... else ...
#endif
					} // end if (IsFunctionPrologue()) ... else ...
				} // end if (AllConverted)
				else {
					SMP_msg("not AllConverted; cannot include new code in previous chunk.\n");
				}
#endif

	return changes;
} // end of FixupNewCodeChunnks()

// Audit the IDA code database by looking at all instructions in the
//  code segment and printing all those that are not contained in a
//  function. Emit the context-free annotations that we are able to
//  emit on a per-instruction basis.
jdh8d's avatar
jdh8d committed
void FindOrphanedCode(STARS_Segment_t *CurrSeg, FILE *AnnotFile, FILE *InfoAnnotFile) {
	char disasm[MAXSTR];
jdh8d's avatar
jdh8d committed
	for (ea_t addr = CurrSeg->get_startEA(); addr < CurrSeg->get_endEA();
		addr = get_item_end(addr)) {
		flags_t InstrFlags = getFlags(addr);
		if (isHead(InstrFlags) && isCode(InstrFlags)) {
			ea_t FirstFuncAddr;
			if (!(CurrProg->IsInstAddrStillInFunction(addr, FirstFuncAddr))) {
				SMPInstr CurrInst(addr);
				CurrInst.Analyze();
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
				SMP_msg("Orphan code at %x : %s\n", addr, CurrInst.GetDisasm());
clc5q's avatar
clc5q committed
#endif
				// TODO: If there are code xrefs to the orphan code,
				//  see what kind. If a CALL, and orphan code looks
				//  like a prologue, make a function. If a JUMP of
				//  some kind, then make a function chunk and make
				//  it a tail of all functions that jump to it. **!!**

				// Do machine-dependent fixes for DEF and USE lists.
				//  The fixes can help produce better annotations.
				CurrInst.MDFixupDefUseLists();

				// If instruction is still not included in a code chunk,
				//  emit annotations for it in isolation.
				CurrInst.EmitAnnotations(true, false, true, AnnotFile, InfoAnnotFile, CurrProg);

				// If instruction is an indirect branch, emit an XREF
				//  annotation for each of its targets.
				SMPitype CurrDataFlow = CurrInst.GetDataFlowType();
				if ((CurrDataFlow == INDIR_JUMP) || (CurrDataFlow == INDIR_CALL)) {
					SMP_xref_t xrefs;
					for (bool ok = xrefs.SMP_first_from(addr, XREF_ALL); ok; ok = xrefs.SMP_next_from()) {
						if (xrefs.GetTo() != 0) {
							if (xrefs.GetIscode() && (xrefs.GetType() != fl_F)) {
								// Found a code target, with its address in xrefs.to
								PrintCodeToCodeXref(addr, xrefs.GetTo(), CurrInst.GetSize());
							}
						}
					}
				}

			}
		}
		else if (isUnknown(InstrFlags)) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
clc5q's avatar
clc5q committed
#endif
			// Can IDA analyze this to be code?
			int InstrLen;
#if IDA_SDK_VERSION < 600
			InstrLen = ua_code(addr);
#else
			InstrLen = create_insn(addr);
#endif
			if (InstrLen > 0) {
				bool IDAsuccess = generate_disasm_line(addr, disasm, sizeof(disasm) - 1);
				if (IDAsuccess) {
					// Remove interactive color-coding tags.
					ssize_t StringLen = tag_remove(disasm, disasm, 0);
					if (-1 >= StringLen) {
						SMP_msg("ERROR: tag_remove failed at addr %lx \n", (unsigned long) addr);
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
						SMP_msg("Successfully analyzed!  %s\n", disasm);
clc5q's avatar
clc5q committed
#endif
						SMPInstr UnknownInstr(addr);
						UnknownInstr.Analyze();
						// TODO: Get new code into a chunk.  **!!**
						// If instruction is still not included in a code chunk,
						//  emit annotations for it in isolation.
						UnknownInstr.EmitAnnotations(true, false, true, AnnotFile, InfoAnnotFile, CurrProg);
					SMP_msg("ERROR: generate_disasm_line failed at addr %lx \n", (unsigned long) addr);
			}
		}
	} // end for (ea_t addr = CurrSeg->startEA; ...)
} // end of FindOrphanedCode()

// Version of FindOrphanedCode that does not emit annotations but can be used
//  to determine at what point in time code becomes orphaned.
jdh8d's avatar
jdh8d committed
void Debug_FindOrphanedCode(STARS_Segment_t *CurrSeg, bool FirstRun) {
	ea_t DebugAddr = 0x8050db0;
jdh8d's avatar
jdh8d committed
	for (ea_t addr = CurrSeg->get_startEA(); addr < CurrSeg->get_endEA();
		addr = get_item_end(addr)) {

		flags_t InstrFlags = getFlags(addr);
		if (isHead(InstrFlags) && isCode(InstrFlags)) {
			STARS_Function_t *CurrFunc = SMP_get_func(addr);
			if (NULL == CurrFunc) {  // Code not in a func; orphaned
				pair<set<ea_t>::iterator, bool> pairib;
				pairib = CodeOrphans.insert(addr);
				if (DebugAddr == addr) {
					SMP_msg("DEBUG: Orphaned code addr %lx found.\n", (unsigned long) addr);
				}
				if ((!FirstRun) && (pairib.second)) {
					SMP_msg("SERIOUS WARNING: Newly orphaned code at %lx \n", (unsigned long) addr);
				}
			}
		}
	} // end for (ea_t addr = CurrSeg->startEA; ...)
} // end of Debug_FindOrphanedCode()

// Audit the IDA database with respect to branches and calls. They should
//  each have valid code targets (not data or unknown bytes) and the code
//  cross references should reflect the linkage.
void AuditCodeTargets(void) {
	SMP_AuditCodeTargets();
} // end of AuditCodeTargets()


void SpecialDebugOutput(void) {
	char disasm[MAXSTR];
	vector<ea_t> ProblemAddrs;
	ProblemAddrs.push_back(0x8066d08);
	bool IDAsuccess;
	int InstLen;
	ssize_t StringLen;

	for (size_t index = 0; index < ProblemAddrs.size(); ++index) {
		ea_t addr = ProblemAddrs[index];
		flags_t InstrFlags = getFlags(addr);
		if (isCode(InstrFlags) && isHead(InstrFlags)) {
			IDAsuccess = SMPGetCmd(addr, LocalCmd, LocalFeatures);
			InstLen = (int) LocalCmd.size;

			if ((IDAsuccess) && (0 < InstLen)) {
				IDAsuccess = generate_disasm_line(addr, disasm, sizeof(disasm) - 1);
				if (IDAsuccess) {
					StringLen = tag_remove(disasm, disasm, 0);
					if (-1 < StringLen)
						SMP_msg("Problem addr %lx : %s\n", (unsigned long) addr, disasm);
						SMP_msg("ERROR: tag_remove failed at addr %lx \n", (unsigned long) addr);
					SMP_msg("ERROR: generate_disasm_line failed at addr %lx \n", (unsigned long) addr);
				SMP_msg("ERROR: decode_insn failed at addr %lx \n", (unsigned long) addr);
		}
	}
	return;
} // end of SpecialDebugOutput()

// Convert a call type string from the policy file, such as "FILECALLS", to the
//  corresponding ZST_SysCallType, such as ZST_FILE_CALL.
ZST_SysCallType ConvertStringToCallType(char *Str2) {
	ZST_SysCallType ReturnVal;
	if (0 == strcmp("PRIVILEGECALLS", Str2)) {
		ReturnVal = ZST_HIGHPRIVILEGE_CALL;
	}
	else if (0 == strcmp("FILECALLS", Str2)) {
		ReturnVal = ZST_FILE_CALL;
	}
	else if (0 == strcmp("NETWORKCALLS", Str2)) {
		ReturnVal = ZST_NETWORK_CALL;
	}
	else {
		ReturnVal = ZST_UNMONITORED_CALL;
	}
	return ReturnVal;
} // end of ConvertStringToCallType()

// Convert a policy string from the policy file, such as "DISALLOW", to
//  the corresponding ZST_Policy value, such as ZST_DISALLOW.
ZST_Policy ConvertStringToPolicy(char *Str3) {
	ZST_Policy ReturnVal;
	if (0 == strcmp("DISALLOW", Str3)) {
		ReturnVal = ZST_DISALLOW;
	}
	else if (0 == strcmp("WHITELIST", Str3)) {
		ReturnVal = ZST_WHITELIST;
	}
	else if (0 == strcmp("BLACKLIST", Str3)) {
		ReturnVal = ZST_BLACKLIST;
	}
	else { // error handling precedes calls to this function
		ReturnVal = ZST_ALLOWALL;
	}
	return ReturnVal;
} // end of ConvertStringToPolicy()

// Given a function name, return its Zephyr Security Toolkit call type.
ZST_SysCallType GetCallTypeFromFuncName(string SysCallName) {
	ZST_SysCallType ReturnVal;
	map<string, ZST_SysCallType>::iterator FindIter = ZST_FuncTypeMap.find(SysCallName);
	if (FindIter == ZST_FuncTypeMap.end()) { // not found; might not even be system call
		ReturnVal = ZST_UNMONITORED_CALL;
	}
	else {
		ReturnVal = FindIter->second;
	}
	return ReturnVal;
} // end of GetCallTypeFromFuncName()

// Get the user-specified security policy for the given call type.
ZST_Policy GetPolicyFromCallType(ZST_SysCallType CallType) {
	ZST_Policy ReturnVal;
	map<ZST_SysCallType, ZST_Policy>::iterator FindIter = ZST_TypePolicyMap.find(CallType);
	if (FindIter == ZST_TypePolicyMap.end()) {
		// Policy not found; default to ALLOW_ALL
		ReturnVal = ZST_ALLOWALL;
	}
	else {
		ReturnVal = FindIter->second;
	}
	return ReturnVal;
} // end of GetPolicyFromCallType()

// Given a call type and called function name, is it on the location whitelist
//  for that call type?
// NOTE: HANDLE CASE IN WHICH WHITELISTED LOCATION IS A PREFIX, TERMINATING in a slash.
bool IsLocationWhitelisted(ZST_SysCallType CallType, string LocationName) {
	set<string>::iterator FindIter;
	bool ReturnVal;

	if (CallType == ZST_FILE_CALL) {
		FindIter = ZST_FileLocWhitelist.find(LocationName);
		ReturnVal = (FindIter != ZST_FileLocWhitelist.end());
	}
	else if (CallType == ZST_NETWORK_CALL) {
		FindIter = ZST_NetworkLocWhitelist.find(LocationName);
		ReturnVal = (FindIter != ZST_NetworkLocWhitelist.end());
	}
	else { // should not be here
		ReturnVal = false;
	}
	return ReturnVal;
} // end of IsLocationWhitelisted()

// Given a call type and called function name, is it on the location blacklist
//  for that call type?
// NOTE: HANDLE CASE IN WHICH BLACKLISTED LOCATION IS A PREFIX, TERMINATING in a slash.
bool IsLocationBlacklisted(ZST_SysCallType CallType, string LocationName) {
	set<string>::iterator FindIter;
	bool ReturnVal;

	if (CallType == ZST_FILE_CALL) {
		FindIter = ZST_FileLocBlacklist.find(LocationName);
		ReturnVal = (FindIter != ZST_FileLocBlacklist.end());
	}
	else if (CallType == ZST_NETWORK_CALL) {
		FindIter = ZST_NetworkLocBlacklist.find(LocationName);
		ReturnVal = (FindIter != ZST_NetworkLocBlacklist.end());
	}
	else { // should not be here
		ReturnVal = false;
	}
	return ReturnVal;
}

// Given a called function name, does it produce only benign numeric errors when
//  its returned values are used in arithmetic? (i.e. it is a trusted input)
bool IsNumericSafeSystemCall(string CallName) {
	set<string>::iterator FindIter = ZST_SystemCallNumericWhitelist.find(CallName);
	bool ReturnVal = (FindIter != ZST_SystemCallNumericWhitelist.end());
	return ReturnVal;
}

// Utility functions to print code xrefs to STARS_XrefsFile
void PrintCodeToCodeXref(ea_t FromAddr, ea_t ToAddr, size_t InstrSize) {
	SMP_fprintf(STARS_XrefsFile, "%10lx %6zu INSTR XREF IBT FROMIB %10lx \n",
						(unsigned long) ToAddr, InstrSize, (unsigned long) FromAddr);
	return;
}

void PrintDataToCodeXref(ea_t FromDataAddr, ea_t ToCodeAddr, size_t InstrSize) {
	SMP_fprintf(STARS_XrefsFile, "%10lx %6zu INSTR XREF IBT FROMDATA %10lx \n",
						(unsigned long) ToCodeAddr, InstrSize, (unsigned long) FromDataAddr);
// These two constants should agree with their counterparts in ZST-policy.c.
#define ZST_MAX_FILE_NAME_LEN 1024
#define ZST_MAX_CALL_NAME_LEN 64
// Read the foo.exe.policy file to initialize our security policies for system calls.
void ZST_InitPolicies(const char *PolicyFileName) {
	FILE *PolicyFile = SMP_fopen(PolicyFileName, "r");
	char Str1[ZST_MAX_CALL_NAME_LEN], Str2[ZST_MAX_CALL_NAME_LEN], Str3[ZST_MAX_FILE_NAME_LEN];

	string SafeSystemCall1("gettimeofday");
	ZST_SystemCallNumericWhitelist.insert(SafeSystemCall1);

			int ItemsRead = qfscanf(PolicyFile, "%63s %63s %1023s", Str1, Str2, Str3);
			if (3 != ItemsRead) {
				SMP_msg("ERROR: Line in %s had %d items instead of the required 3; line ignored.\n", PolicyFileName, ItemsRead);
				pair<set<string>::iterator, bool> SetInsertResult;
				if (0 == strcmp(Str1, "SECURITYPOLICY")) {
					ZST_SysCallType TempCallType = ConvertStringToCallType(Str2);
					ZST_Policy TempPolicy = ConvertStringToPolicy(Str3);
					pair<map<ZST_SysCallType, ZST_Policy>::iterator, bool> InsertResult;
					pair<ZST_SysCallType, ZST_Policy> TempPair(TempCallType, TempPolicy);
					InsertResult = ZST_TypePolicyMap.insert(TempPair);
					if (!(InsertResult.second)) {
						SMP_msg("ERROR: Could not insert security policy %s for %s. Possible duplicate or conflicting policies.\n",
							Str3, Str2);
					}
				}
				else if (0 == strcmp(Str1, "FILELOCATION")) {
					if (0 == strcmp(Str2, "WHITELIST")) {
						SetInsertResult = ZST_FileLocWhitelist.insert(ThirdStr);
						if (!(SetInsertResult.second)) {
							SMP_msg("WARNING: Duplicate file whitelist location %s ignored.\n", Str3);
						}
					}
					else if (0 == strcmp(Str2, "BLACKLIST")) {
						SetInsertResult = ZST_FileLocBlacklist.insert(ThirdStr);
						if (!(SetInsertResult.second)) {
							SMP_msg("WARNING: Duplicate file blacklist location %s ignored.\n", Str3);
						SMP_msg("ERROR: Unknown second field value in policy line: %s %s %s ; ignored\n", Str1, Str2, Str3);
					}
				}
				else if (0 == strcmp(Str1, "NETWORKLOCATION")) {
					if (0 == strcmp(Str2, "WHITELIST")) {
						SetInsertResult = ZST_NetworkLocWhitelist.insert(ThirdStr);
						if (!(SetInsertResult.second)) {
							SMP_msg("WARNING: Duplicate network whitelist location %s ignored.\n", Str3);
						}
					}
					else if (0 == strcmp(Str2, "BLACKLIST")) {
						SetInsertResult = ZST_NetworkLocBlacklist.insert(ThirdStr);
						if (!(SetInsertResult.second)) {
							SMP_msg("WARNING: Duplicate network blacklist location %s ignored.\n", Str3);
						SMP_msg("ERROR: Unknown second field value in policy line: %s %s %s ; ignored\n", Str1, Str2, Str3);
					SMP_msg("ERROR: Unknown first field value in policy line: %s %s %s ; ignored\n", Str1, Str2, Str3);
		if (0 == SMP_fclose(PolicyFile)) {
			SMP_msg("Policy file %s successfully closed; all policies recorded.\n", PolicyFileName);
			SMP_msg("ERROR: fclose failed on policy file %s. However, policies should be in effect.\n", PolicyFileName);
		}
		// Now, initialize the system call name maps.
		pair<map<string, ZST_SysCallType>::iterator, bool> FuncInsertResult;
		// Do all the high privilege calls first.
		string SysFuncName("putenv");
		pair<string, ZST_SysCallType> FuncNamePolicyPair(SysFuncName, ZST_HIGHPRIVILEGE_CALL);
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("setenv");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("setegid");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("seteuid");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("setgid");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("setpgid");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("setregid");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("setreuid");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("setuid");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("execl");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("execv");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("execle");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("execve");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("execlp");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("execvp");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("system");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);

		// Now do all the file operation calls.
		FuncNamePolicyPair.second = ZST_FILE_CALL;
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("chdir");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("chmod");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("chown");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("creat");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("creat64");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("fopen");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("freopen");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("open");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("open64");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("mknod");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("remove");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("rmdir");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);
		FuncNamePolicyPair.first.clear();
		FuncNamePolicyPair.first.append("unlink");
		FuncInsertResult = ZST_FuncTypeMap.insert(FuncNamePolicyPair);
		assert(FuncInsertResult.second);

		// Finally, handle all the network connection calls.
		FuncNamePolicyPair.second = ZST_NETWORK_CALL;
		FuncNamePolicyPair.first.clear();
Loading
Loading full blame...