Skip to content
Snippets Groups Projects
SMPStaticAnalyzer.cpp 59 KiB
Newer Older
jdh8d's avatar
jdh8d committed
/*
 * SMPStaticAnalyzer.cpp - <see below>.
 *
 * Copyright (c) 2000, 2001, 2010 - University of Virginia 
 *
 * This file is part of the Memory Error Detection System (MEDS) infrastructure.
 * This file may be used and modified for non-commercial purposes as long as 
 * all copyright, permission, and nonwarranty notices are preserved.  
 * Redistribution is prohibited without prior written consent from the University 
 * of Virginia.
 *
 * Please contact the authors for restrictions applying to commercial use.
 *
 * THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
 * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 *
 * Author: University of Virginia
 * e-mail: jwd@virginia.com
 * URL   : http://www.cs.virginia.edu/
 *
 * Additional copyrights 2010, 2011, 2012, 2013, 2014, 2015 by Zephyr Software LLC
 * e-mail: {clc,jwd}@zephyr-software.com
 * URL   : http://www.zephyr-software.com/
jdh8d's avatar
jdh8d committed
 */

//
// SMPStaticAnalyzer.cpp
//
// This plugin performs the static analyses needed for the SMP project
//   (Software Memory Protection).
//

#include <set>
jdh8d's avatar
jdh8d committed
#include <interfaces/idapro/all.h>
#if 0
#include <ida.hpp>
#include <idp.hpp>
#include <ua.hpp>
#include <bytes.hpp>
jdh8d's avatar
jdh8d committed
#endif
#include <loader.hpp>
#include "interfaces/STARSTypes.h"
#include "interfaces/STARSIDATypes.h"
#include "interfaces/SMPDBInterface.h"
#include "base/SMPStaticAnalyzer.h"
#include "base/SMPDataFlowAnalysis.h"
#include "base/SMPProgram.h"
#include "base/SMPFunction.h"
#include "base/SMPInstr.h"
#include "base/ProfilerInformation.h"
#include "interfaces/abstract/STARSOp.h"
#include "interfaces/abstract/STARSInterface.h"
#include "interfaces/idapro/STARSInterface.h"
#include "interfaces/idapro/STARSProgram.h"
#define SMP_DEBUG_DELAY 0   // for setting an early breakpoint

// Set to 1 for debugging output
#define SMP_DEBUG 1
#define SMP_DEBUG3 0   // verbose
#define SMP_DEBUG_MEM 0 // print memory operands
#define SMP_DEBUG_TYPE0 0 // Output instr info for OptType = 0
#define SMP_DEBUG_CHUNKS 0 // restructuring tail chunks, shared chunks, etc.
#define SMP_DEBUG_DATA_ONLY 0  // Find & fix data addresses in code segments

// Set to 1 when doing a binary search using SMP_DEBUG_COUNT to find
//  which function is causing a problem.
#define SMP_BINARY_DEBUG 0
#define SMP_DEBUG_COUNT 356  // How many funcs to process in problem search
int FuncsProcessed = 0;

#define SMP_FIXUP_IDB 0  // Try to fix the IDA database? NOTE: Needs lots of updating before re-enabling.
clc5q's avatar
clc5q committed
#define SMP_DEBUG_FIXUP_IDB 0  // debugging output for FixupIDB chain
#define SMP_FIND_ORPHANS 1  // find code outside of functions
#define SMP_DEBUG_CODE_ORPHANS 0 // Detect whether we are causing code to be orphaned
#define SMP_IDAP_RUN_DELAY 0  // Delay in IDAP_run() so we can attach debugger to process.
#define STARS_GENERATE_DIF_FILE STARS_SCCP_CONVERT_UNREACHABLE_BLOCKS  // If we optimize, generate DIF file
static SMPProgram *CurrProg = NULL;
STARS_Interface_t* global_stars_interface = NULL;
STARS_Program_t *global_STARS_program = NULL;
// Should we convert the x86 LOCK prefix byte to a no-op to avoid
//  IDA Pro problems with instructions that jump past the LOCK
//  prefix and look like they are jumping into the middle of an
//  instruction?
#define STARS_REMOVE_LOCK_PREFIX 0
// Lock prefix for x86 code; jumping around this prefix conditionally looks like jumping
//  into the middle of an instruction to IDA Pro, causing it to not collect instructions
//  into a procedure. We replace these bytes with no-op opcodes because none of our analyses
//  care about LOCK prefices. We store the addresses where we have done the replacement in a 
//  set in case we ever care.
#define X86_LOCK_PREFIX 0xF0
set<STARS_ea_t> LockPreficesRemoved; // Addresses where x86 LOCK prefix byte was turned into a no-op by STARS_custom_ana() callback.
static unsigned long CustomAnaCallCount = 0;

// Code addresses identified by a disassembler, such as objdump on
//  Linux. These can be used to improve the code vs. data identification
//  of IDA Pro.
// Code addresses as identified by IDA Pro, to be compared to DisasmLocs.

// List of functions that need to be reanalyzed after all the code fixup
//  and code discovery is complete. Kept as a list of addresses; any address
//  within the function is good enough to designate it.
list<STARS_ea_t> FuncReanalyzeList;

// A code region that has been converted from data but has code addresses that
//  need to be reanalyzed. This is usually because a former data address is
//  now a jump to a code target that is still a data address. We have to wait
//  until the target has become code before IDA will accept the jump as valid.
class FixupRegion {
public:
	FixupRegion(SMP_bounds_t);
	inline STARS_ea_t GetStart(void) const { return CodeRegion.startEA; };
	inline STARS_ea_t GetEnd(void) const { return CodeRegion.endEA; };
	inline void SetStart(STARS_ea_t addr) { CodeRegion.startEA = addr; };
	list<STARS_ea_t> FixupInstrs; // easier to expose than to encapsulate
private:
	SMP_bounds_t CodeRegion;
};

FixupRegion::FixupRegion(SMP_bounds_t Range) {
	this->CodeRegion = Range;
	return;
}

// List of code regions that were not completely analysed because of jump to
//  data considerations.
list<FixupRegion> CodeReanalyzeList;
// Map library function names to their system call type.
map<string, ZST_SysCallType> ZST_FuncTypeMap;

// Map system call types to their Zephyr Security Toolkit security policy.
map<ZST_SysCallType, ZST_Policy> ZST_TypePolicyMap;

// Set of whitelisted file locations.
set<string> ZST_FileLocWhitelist;

// Set of whitelisted network locations.
set<string> ZST_NetworkLocWhitelist;

// Set of blacklisted file locations.
set<string> ZST_FileLocBlacklist;

// Set of blacklisted network locations.
set<string> ZST_NetworkLocBlacklist;

// Set of system call names whose returned values should be trusted to have only benign numeric errors.
set<string> ZST_SystemCallNumericWhitelist;
clc5q's avatar
clc5q committed
#if (IDA_SDK_VERSION < 700)
clc5q's avatar
clc5q committed
#else
bool IDAP_run(size_t);
#endif

// Functions for diagnosing and/or fixing problems in the IDA database.
void FixupIDB(void);  // Driver for all other fixing functions.
void FindDataInCode(void);
void AuditTailChunkOwnership(void);
jdh8d's avatar
jdh8d committed
void FindOrphanedCode(STARS_Segment_t *, FILE *, FILE *);
void Debug_FindOrphanedCode(STARS_Segment_t *, bool);
void FindLinksFromOrphanedCode(STARS_Segment_t *);
void AuditCodeTargets(void);
void SpecialDebugOutput(void);
void RemoveIDACodeAddr(STARS_ea_t);
static unsigned long DebugCounter = 0;

// Turn LOCK prefix into no-op when detected. Each is one byte in length.
bool STARS_custom_ana(STARS_ea_t CurrentAddr) {
	// static_assert(sizeof(STARS_ea_t) == sizeof(uintptr_t), "Sizeof mismatch between STARS_ea_t and uintptr_t");
	int code = get_byte(CurrentAddr);
	++CustomAnaCallCount;
	if (X86_LOCK_PREFIX != code) {
		return false;
	}
	else {
#define STARS_ANA_DEBUG_DELAY 0
#if STARS_ANA_DEBUG_DELAY
		if (DebugCounter == 0) {
			time_t start;
			time_t current;

			time(&start);
			printf("delay for 15 seconds.\n");
			do {
				time(&current);
			} while(difftime(current,start) < 15.0);
			++DebugCounter;
		}
#endif		
		pair<set<STARS_ea_t>::iterator, bool> InsertResult;
		InsertResult = LockPreficesRemoved.insert(CurrentAddr);
		assert(InsertResult.second);
clc5q's avatar
clc5q committed
#if (IDA_SDK_VERSION < 700)
		cmd.itype = NN_nop; // make it a no-op
		cmd.size = 1; // one-byte no-op
#if 0
		cmd.auxpref = 0; // clear prefix and flags fields
		cmd.segpref = 0;
		cmd.insnpref = 0;
		cmd.flags = 0;
clc5q's avatar
clc5q committed
#endif
#endif
		return true;
	}
} // end of STARS_custom_ana()

clc5q's avatar
clc5q committed
#if (IDA_SDK_VERSION < 700)
static int idaapi idp_callback(void *, int event_id, va_list va) {
clc5q's avatar
clc5q committed
#else
static ssize_t idaapi idp_callback(void *, int event_id, va_list va) {
#endif

#if STARS_REMOVE_LOCK_PREFIX
	if (event_id == processor_t::custom_ana) {
#if 1
		int code = ua_next_byte();
		++CustomAnaCallCount;
		if (X86_LOCK_PREFIX == code) {
			pair<set<STARS_ea_t>::iterator, bool> InsertResult;
			InsertResult = LockPreficesRemoved.insert(CurrentAddr);
			cmd.itype = NN_nop; // make it a no-op
			return (int) (cmd.size + 1);
		}
		else {
			return 0;
		}
#else
		if (STARS_custom_ana(CurrentAddr)) {
			return 1; // handled event
		}
#endif
	}
#endif // STARS_REMOVE_LOCK_PREFIX
clc5q's avatar
clc5q committed
#if (IDA_SDK_VERSION < 700)
	int CheckCode = ph.auto_empty_finally;
#else
	idb_event::event_code_t CheckCode = idb_event::auto_empty_finally;
#endif
	if (event_id == (int) CheckCode) {   // IDA analysis is done
		// Ensure correct working environment.
		// Handle Cyber Grand Challenge project variant of ELF.
#define STARS_CGC_FILETYPE 16705
		bool CGCFile = (inf.filetype == STARS_CGC_FILETYPE);
		if ((inf.filetype != f_ELF) && (inf.filetype != f_PE) && (!CGCFile)) {
			SMP_msg("ERROR: FileType is %d \n", inf.filetype);
jdh8d's avatar
jdh8d committed
			error("Executable format must be PE, ELF or CGC.");
			return 0;
		}
		else if (CGCFile) {
an7s's avatar
an7s committed
			global_stars_interface->SetCGCBinary();
		}
clc5q's avatar
clc5q committed
#if (IDA_SDK_VERSION < 700)
clc5q's avatar
clc5q committed
#else
		bool success = IDAP_run(0);
#endif
#if STARS_REMOVE_LOCK_PREFIX
		SMP_msg("INFO: Calls to STARS_custom_ana: %lu \n", CustomAnaCallCount);
		SMP_msg("INFO: Number of LOCK prefices eliminated: %zu \n", LockPreficesRemoved.size());
		LockPreficesRemoved.clear();
#endif // STARS_REMOVE_LOCK_PREFIX

	return 0;  // did not process any event
} // end of idp_callback()

	/* init the interface */
	global_stars_interface = new STARS_IDA_Interface_t;
	global_STARS_program = new STARS_IDA_Program_t;
#if 0 // We are now calling from the SMP.idc script.
	// Skip this plugin if it was not specified by the user on the
	//  command line.
	if (get_plugin_options("SMPStaticAnalyzer") == NULL) {
		return PLUGIN_SKIP;
	}
#endif
	// Ensure correct working environment.
	if (ph.id != PLFM_386) {
		error("Processor must be x86.");
clc5q's avatar
clc5q committed
#if (IDA_SDK_VERSION < 700)
	bool hooked = hook_to_notification_point(HT_IDP, idp_callback, NULL);
#else
	// auto_empty_finally is now hooked to HT_IDB instead of HT_IDP
clc5q's avatar
clc5q committed
	bool hooked = hook_to_notification_point(HT_IDB, idp_callback, NULL);
#endif
#ifdef STARS_IRDB_INTERFACE
	SMPLogFile = NULL;
#endif
	return PLUGIN_KEEP;
} // end of IDAP_init

void IDAP_term(void) {
clc5q's avatar
clc5q committed
#if (IDA_SDK_VERSION < 700)
	unhook_from_notification_point(HT_IDP, idp_callback, NULL);
clc5q's avatar
clc5q committed
#else
	// auto_empty_finally is now hooked to HT_IDB instead of HT_IDP
clc5q's avatar
clc5q committed
	unhook_from_notification_point(HT_IDB, idp_callback, NULL);
#endif
clc5q's avatar
clc5q committed
#if (IDA_SDK_VERSION < 700)
clc5q's avatar
clc5q committed
#else
bool IDAP_run(size_t arg) {
#endif
clc5q's avatar
clc5q committed
	FILE *AnnotFile = NULL;
	FILE *InfoAnnotFile = NULL;
	FILE *AsmFile;
	// For debugging, we can add a delay loop so we have time to attach gdb to the
	// running process and set a breakpoint.
#if SMP_DEBUG_DELAY
	time_t start;
	time_t current;

	time(&start);
	printf("delay for 15 seconds.\n");
	do {
		time(&current);
	} while(difftime(current,start) < 15.0);
#endif

#if SMP_DEBUG
	SMP_msg("Beginning IDAP_run.\n");
#endif

	SMP_msg("IDA SDK version: %d \n", IDA_SDK_VERSION);

	DefOrUse DummyRef;
	STARSOpndType DummyOperand;
	std::size_t RefObjectSize = sizeof(DummyRef), OpndSize = sizeof(DummyOperand);
	SMP_msg("INFO: Size of DefOrUse: %zu Size of op_t: %zu \n", RefObjectSize, OpndSize);
	SMP_msg("INFO: Size of STARS_ea_t: %zu Size of uintptr_t: %zu \n", sizeof(STARS_ea_t), sizeof(uintptr_t));
	SMP_msg("INFO: Size of SMPInstr: %zu Size of SMPBasicBlock: %zu \n", sizeof(SMPInstr), sizeof(SMPBasicBlock));
	if (inf.is_64bit()) {
		global_STARS_program->Set64BitBinary();
		SMP_msg("INFO: 64-bit binary detected.\n");
	}
	else {
		global_STARS_program->Set32BitBinary();
		SMP_msg("INFO: 32-bit binary detected.\n");
	}
clc5q's avatar
clc5q committed
	global_STARS_program->SetIDAProDriverFlag();
	// See if command-line options for our plugin got passed to IDA Pro.
	const char *PluginOptionsString = ::get_plugin_options("SMPStaticAnalyzer");
	bool GenAsmFlag = true;
	if (nullptr != PluginOptionsString) {
		SMP_msg("INFO: Command-line plugin options string: %s\n", PluginOptionsString);
		if (nullptr != strstr(PluginOptionsString, "Reduce2")) {
			global_STARS_program->SetLevel2ReducedProcessingFlag(true);
			SMP_msg("INFO: Level 2 Reduced processing requested via command-line options.\n");
		}
		else if (nullptr != strstr(PluginOptionsString, "Reduced")) {
			global_STARS_program->SetReducedProcessingFlag(true);
			SMP_msg("INFO: Reduced processing requested via command-line options.\n");
		}
		else {
			SMP_msg("INFO: Command-line options did not include the Reduced option.\n");
		}
		if (nullptr != strstr(PluginOptionsString, "ShadowFuncPtrs")) {
			global_STARS_program->SetFuncPtrShadowingFlag(true);
			SMP_msg("INFO: Function pointer shadowing requested via command-line options.\n");
		}
		if (nullptr != strstr(PluginOptionsString, "ShadowArgs")) {
			global_STARS_program->SetArgShadowingFlag(true);
			SMP_msg("INFO: Critical argument shadowing requested via command-line options.\n");
		}
		if (nullptr != strstr(PluginOptionsString, "ImproveCFG")) {
			global_STARS_program->SetCFGImprovementFlag(true);
			SMP_msg("INFO: Maximum CFG improvement requested via command-line options.\n");
		}
		if (nullptr != strstr(PluginOptionsString, "AnalyzeLoops")) {
			global_STARS_program->SetLoopAnalysesFlag(true);
			SMP_msg("INFO: Deep loop analyses requested via command-line options.\n");
		}
		if (nullptr != strstr(PluginOptionsString, "SPARK")) {
			global_STARS_program->SetSPARKFlag(true);
			SMP_msg("INFO: Translation to SPARK Ada requested via command-line options.\n");
		}
		if (nullptr != strstr(PluginOptionsString, "NoGenAsm")) {
			GenAsmFlag = false;
			SMP_msg("INFO: ASM file generation requested via command-line options.\n");
		}
	}
	else {
		SMP_msg("INFO: No command-line options found.\n");
	}

	global_STARS_program->DetermineRootFileName();

	// read the Profiler generated information into a new prof_info class 
	CurrProg = new SMPProgram();
	// Open the files for Xrefs from data, etc. Wait for profiling data to be read from the
	//  main annotation file before reopening it for output.
	if (!(global_STARS_program->OpenSecondaryFilesOnly())) {
		SMP_msg("FATAL ERROR: At least one file could not be opened.\n");
		error("FATAL ERROR: At least one file could not be opened.\n");
		delete global_STARS_program;
		delete global_stars_interface;
#if (IDA_SDK_VERSION < 700)
		return;
#else
		return false;
#endif
	}

	CurrProg->AnalyzeData(); // Analyze static data in the executable

	global_STARS_program->FindCodeAddressesTaken(CurrProg); // find code addresses in read-only data segments
	// Note: ProfilerInformation must come after the call above to AnalyzeData().
	ProfilerInformation *prof_info = new ProfilerInformation(global_STARS_program->GetAnnotFileName().c_str(), CurrProg);

	if (!global_STARS_program->OpenMainAnnotationFile()) {
		SMP_msg("FATAL ERROR: At least one file could not be opened.\n");
		error("FATAL ERROR: At least one file could not be opened.\n");
		delete global_STARS_program;
		delete global_stars_interface;
clc5q's avatar
clc5q committed
#if (IDA_SDK_VERSION < 700)
clc5q's avatar
clc5q committed
#else
		return false;
#endif
	string AsmFileName(global_STARS_program->GetRootFileName());
	string AsmFileSuffix(".asm");
	AsmFileName += AsmFileSuffix;
	string DifFileName(global_STARS_program->GetRootFileName());
	string DifFileSuffix(".dif");
	DifFileName += DifFileSuffix;
	// For debugging, we can add a delay loop so we have time to attach gdb to the
	// running process and set a breakpoint.
#if SMP_IDAP_RUN_DELAY
	time_t start;
	time_t current;

	time(&start);
	printf("delay for 15 seconds.\n");
	do {
		time(&current);
	} while(difftime(current,start) < 15.0);
#endif

#if SMP_DEBUG_CODE_ORPHANS
	CodeOrphans.clear();
	RecentAddr = STARS_BADADDR;
jdh8d's avatar
jdh8d committed
	for (STARS_Segment_t *seg = SMP_get_first_seg(); NULL != seg; seg = SMP_get_next_seg(RecentAddr)) {
		RecentAddr = seg->get_startEA();
		if (seg->IsCodeSegment())
			Debug_FindOrphanedCode(seg, true);
	}
	// Read the Zephyr Security Toolkit system call security policies, if available.
	global_STARS_program->ZST_InitPolicies();
	try { // We will catch memory exhaustion errors.

jdh8d's avatar
jdh8d committed
	FindDataInCode();
	FixCodeIdentification();
jdh8d's avatar
jdh8d committed
	delete prof_info;
	delete CurrProg;
	delete global_STARS_program;
	delete global_stars_interface;
clc5q's avatar
clc5q committed
#if (IDA_SDK_VERSION < 700)
clc5q's avatar
clc5q committed
#else
	return true;
#endif
#endif

	// Pre-audit the IDA database by seeing if the distinction
	//  between code and data can be improved, and if all branches
	//  and calls have proper code targets and code cross references.
#if SMP_FIXUP_IDB
jdh8d's avatar
jdh8d committed
	FixupIDB();
	if (0 < prof_info->GetProfilerAnnotationCount()) {
		SMP_msg("Calling InferDataGranularity\n");
		SMP_msg("ptr to MemoryAccessInfo: %p\n", prof_info->GetMemoryAccessInfo());
		prof_info->GetMemoryAccessInfo()->InferDataGranularity();
		SMP_msg("Returned from InferDataGranularity\n");
	
	CurrProg->ProfGranularityFinished(global_STARS_program->GetAnnotFile(), global_STARS_program->GetInfoAnnotFile());

	RecentAddr = STARS_BADADDR;
	for (STARS_Segment_t *seg = SMP_get_first_seg(); NULL != seg; seg = SMP_get_next_seg(RecentAddr)) {
		RecentAddr = seg->get_startEA();
		if (seg->IsCodeSegment())
			FindLinksFromOrphanedCode(seg);
	}

	CurrProg->Analyze(prof_info, global_STARS_program->GetAnnotFile(), global_STARS_program->GetInfoAnnotFile());
	if (global_STARS_program->ShouldSTARSPerformFullAnalysis()) {
		CurrProg->EmitAnnotations(global_STARS_program->GetAnnotFile(), global_STARS_program->GetInfoAnnotFile());
	// Process the instructions that are not in functions (generally, an IDA problem, or just no-ops for
	//  alignment purposes).
#if SMP_DEBUG_CODE_ORPHANS
	RecentAddr = STARS_BADADDR;
jdh8d's avatar
jdh8d committed
	for (STARS_Segment_t *seg = SMP_get_first_seg(); NULL != seg; seg = SMP_get_next_seg(RecentAddr)) {
		RecentAddr = seg->get_startEA();
		if (seg->IsCodeSegment())
			Debug_FindOrphanedCode(seg, true);
	}
#endif

clc5q's avatar
clc5q committed
#if SMP_FIND_ORPHANS
	RecentAddr = STARS_BADADDR;
jdh8d's avatar
jdh8d committed
	for (STARS_Segment_t *seg = SMP_get_first_seg(); NULL != seg; seg = SMP_get_next_seg(RecentAddr)) {
		RecentAddr = seg->get_startEA();
		if (seg->IsCodeSegment())
			FindOrphanedCode(seg, global_STARS_program->GetAnnotFile(), global_STARS_program->GetInfoAnnotFile());
	for (int OptType = 0; OptType <= LAST_OPT_CATEGORY; ++OptType) {
		SMP_msg("Optimization Category Count %d:  %d Annotations: %d\n",
			OptType, global_STARS_program->GetOptCount(OptType), global_STARS_program->GetAnnotationCount(OptType));
	SMP_fprintf(global_STARS_program->GetInfoAnnotFile(), "   8000000      2 SUCCESS ANALYSISCOMPLETED\n");
	global_STARS_program->CloseFiles();
	if (!global_STARS_program->ShouldSTARSPerformReducedAnalysis()) {
		if (GenAsmFlag) {
			AsmFile = SMP_fopen(AsmFileName.c_str(), "w");
			if (NULL == AsmFile) {
				error("FATAL ERROR: Cannot open ASM file %s\n", AsmFileName.c_str());
			}
			else {
				int FileResult = gen_file(OFILE_ASM, AsmFile, LowestCodeAddress, 0xffffffff, (GENFLG_MAPSEG | GENFLG_ASMTYPE));
				if (0 >= FileResult) {
					SMP_msg("ERROR: Could not generate ASM file.\n");
				}
				SMP_fclose(AsmFile);
clc5q's avatar
clc5q committed
		}
#if STARS_GENERATE_DIF_FILE
		DifFile = SMP_fopen(DifFileName.c_str(), "w");
		if (NULL == DifFile) {
			error("FATAL ERROR: Cannot open DIF file %s\n", DifFileName.c_str());
		}
		else {
			int FileResult = gen_file(OFILE_DIF, DifFile, LowestCodeAddress, STARS_BADADDR, 0);
			if (0 >= FileResult) {
				SMP_msg("ERROR: Could not generate DIF file.\n");
			}
			SMP_fclose(DifFile);
clc5q's avatar
clc5q committed
#endif
	} // end if not reduced analysis

	SMP_msg("INFO: Files closed, freeing memory and exiting.\n");
jdh8d's avatar
jdh8d committed
	delete prof_info;
	SMP_msg("INFO: Deleted prof_info.\n");
	delete CurrProg;
	SMP_msg("INFO: Deleted CurrProg. Returning to IDA Pro.\n");
	delete global_STARS_program;
	delete global_stars_interface;
clc5q's avatar
clc5q committed
#if (IDA_SDK_VERSION < 700)
clc5q's avatar
clc5q committed
#else
	return true;
#endif
clc5q's avatar
clc5q committed
		SMP_msg("FATAL ERROR: Memory exhausted.\n");
		if (NULL != InfoAnnotFile) {
			SMP_fprintf(InfoAnnotFile, "   8000000      2 ERROR MEMORYEXHAUSTED\n");
			SMP_fclose(InfoAnnotFile);
		}
		if (NULL != AnnotFile) {
			SMP_fclose(AnnotFile);
		}
		delete CurrProg;
		delete prof_info;
		delete global_STARS_program;
		delete global_stars_interface;
clc5q's avatar
clc5q committed
#if (IDA_SDK_VERSION < 700)
clc5q's avatar
clc5q committed
#else
		return true;
#endif
char IDAP_comment[] = "Zephyr Software STARS (Static Analyzer for Reliability and Security)";
char IDAP_help[] = "Good luck";
char IDAP_hotkey[] = "Alt-J";

plugin_t PLUGIN = {
	IDP_INTERFACE_VERSION,
	PLUGIN_PROC | PLUGIN_MOD,
	IDAP_init,
	IDAP_term,
	IDAP_run,
	IDAP_comment,
	IDAP_help,
	IDAP_name,
	IDAP_hotkey
};

// Find all code addresses in the IDA database and enter them into
//  IDAProLocs. Find all code addresses identified by the external
//  disassembler (e.g. objdump) and enter them into DisasmLocs.
void FindCodeAddresses(void) {
	// Read in code addresses as found by an external disassembler.
	STARS_ea_t CurrDisasmAddr;
	string DisasmFileName(global_STARS_program->GetRootFileName());
	string FileSuffix(".SMPobjdump");
	DisasmFileName += FileSuffix;
	FILE *DisasmFile = SMP_fopen(DisasmFileName.c_str(), "r");
	if (NULL == DisasmFile) {
		error("FATAL: Cannot open input file %s\n", DisasmFileName.c_str());
		return;
	}

#define DISASM_RESERVE_SIZE  50000
	DisasmLocs.reserve(DISASM_RESERVE_SIZE);
	unsigned long TempAddr;
	int ScanReturn = qfscanf(DisasmFile, "%lx", &TempAddr);
	CurrDisasmAddr = (STARS_ea_t) TempAddr;
	while (1 == ScanReturn) {
		int NextChar;
		DisasmLocs.push_back(CurrDisasmAddr);
		// Swallow the rest of the input line and get the next address.
		do {
			NextChar = qfgetc(DisasmFile);
		} while ((EOF != NextChar) && ('\n' != NextChar));
		ScanReturn = qfscanf(DisasmFile, "%lx", &TempAddr);
		CurrDisasmAddr = (STARS_ea_t) TempAddr;
	} // end while (1 == ScanReturn)
	if (0 >= DisasmLocs.size()) {
		SMP_msg("ERROR: No addresses read from %s\n", DisasmFileName.c_str());
		SMP_fclose(DisasmFile);
		DisasmLocs.clear();
		SMP_msg("%zu Disasm addresses read from %s\n", DisasmLocs.size(),
	}

	// Find all the code locs in the IDA Pro database. As we find
	//  them, store them in IDAProLocs.
	STARS_ea_t RecentAddr = STARS_BADADDR;
jdh8d's avatar
jdh8d committed
	for (STARS_Segment_t *seg = SMP_get_first_seg(); NULL != seg; seg = SMP_get_next_seg(RecentAddr)) {
		RecentAddr = seg->get_startEA();
		if (!seg->IsCodeSegment())
		for (STARS_ea_t addr = seg->get_startEA(); addr < seg->get_endEA(); addr = SMP_get_item_end(addr)) {
clc5q's avatar
clc5q committed
			flags_t InstrFlags = SMP_getFlags(addr);
			if (SMP_isHead(InstrFlags) && SMP_isCode(InstrFlags)) {
clc5q's avatar
clc5q committed
#if 0
				if ((0x806cda4 <= addr) && (0x806cf99 >= addr))
clc5q's avatar
clc5q committed
#endif
			} // end if (SMP_isHead(addr) && SMP_isCode(addr)
#if SMP_DEBUG_FIXUP_IDB
			else if ((0x806cda4 <= addr) && (0x806cf99 >= addr)) {
				if (!SMP_isHead(InstrFlags))
					SMP_msg("Weirdness: not isHead at %x\n", addr);
clc5q's avatar
clc5q committed
				if (SMP_isUnknown(InstrFlags)) {
					SMP_msg("Weirdness: isUnknown at %x\n", addr);
		} // end for (STARS_ea_t addr = seg->startEA; ...)
	} // end for all segments
	return;
} // end FindCodeAddresses()

// Return true if addr is not a proper beginning address for an instruction.
// Return false otherwise.
// Currently, we claim that an instruction is misaligned if DisasmLocs does
//  not contain it. This function is useful for dealing with errors in IDA
//  code identification, in which a large code section is identified as data,
//  but some instructions in the middle of the "data" are identified as
//  code but IDA often starts on the wrong boundary in these cases.
bool IsCodeMisaligned(STARS_ea_t addr) {
	// Do a binary search for addr within DisasmLocs, which is sorted
	//  in ascending address order because of the way in which it was
	//  generated.
	size_t min = 0;
	size_t max = DisasmLocs.size();  // don't access DisasmLocs[max]
	size_t index = (min + max) / 2;
	
	while (addr != DisasmLocs[index]) {
		if (min >= (max - 1))
			return true;
#if 0
		SMP_msg("min: %d max: %d index: %d\n", min, max, index);
#endif
		if (addr < DisasmLocs[index])
			max = index;
		else // must be addr > DisasmLocs[index];
			min = index;
	
		index = (min + max) / 2;
	}

	return false;
} // end of IsCodeMisaligned()

void RemoveIDACodeAddr(STARS_ea_t addr) {
	// Do a binary search for addr within IDAProLocs, which is sorted
	//  in ascending address order because of the way in which it was
	//  generated. Delete the element of IDAProLocs if found.
	size_t min = 0;
	size_t max = IDAProLocs.size();  // don't access IDAProLocs[max]
	size_t index = (min + max) / 2;
	
	while (addr != IDAProLocs[index]) {
		if (min >= (max - 1))
			return;
#if 0
		SMP_msg("min: %d max: %d index: %d\n", min, max, index);
#endif
		if (addr < IDAProLocs[index])
			max = index;
		else // must be addr > IDAProLocs[index];
			min = index;
	
		index = (min + max) / 2;
	}

	// IDAProLocs[index] contains addr.
	vector<STARS_ea_t>::iterator RemovalIterator = IDAProLocs.begin();
	RemovalIterator += index;
	RemovalIterator = IDAProLocs.erase(RemovalIterator);
	return;
} // end of RemoveIDACodeAddr()

// Driver for all other fixing functions. Upon its return, the IDA
//  database (IDB file) should be fixed up as much as we can fix it.
void FixupIDB(void) {
	FindCodeAddresses();
#if SMP_DEBUG_FIXUP_IDB
	SpecialDebugOutput();
#endif
	AuditCodeTargets();
	FindDataInCode();
	AuditTailChunkOwnership();
	if (DisasmLocs.size() > 0) {
		FixCodeIdentification();
		int fixes = FixupNewCodeChunks();
		SpecialDebugOutput();
	}
	DisasmLocs.clear();
	IDAProLocs.clear();
} // end of FixupIDB()

// Find and print all data head addresses in code segments. 
// If an isolated code instruction is found in the midst of a run
//  of data bytes and has no code xrefs jumping to it, it is not
//  reachable as code and is undoubtedly a mixup by IDA. Possibly
//  the whole data region will be converted to code later, in which
//  case the isolated code is not necessarily properly aligned and
//  parsed at its present address, so we are glad to convert it into
//  data anyway so that FindDataToConvert() will succeed on it later.
// Data to code conversion, and isolated code detection, are inhibited
//  by IDA identifying several consecutive instructions in the midst
//  of a data region, with the code addresses not agreeing with the
//  external disassembler's code addresses. We will convert these
//  misaligned instructions to data as we detect them. We will also
clc5q's avatar
clc5q committed
//  convert unexplored bytes (SMP_isUnknown(flags) == true) into data if
//  they are in the midst of a data sequence.
#define MIN_DATARUN_LEN 24  // #bytes on either side of "isolated" code
void FindDataInCode(void) {
	size_t DataRunLen = 0; // How many data bytes in a row have we seen?
	bool IsolatedCodeTrigger = false; // Have seen data, then isolated code
									// Now looking for data
	STARS_ea_t IsolatedCodeAddr = STARS_BADADDR;
	int IsolatedCodeLen = 0;
	STARS_ea_t RecentAddr = STARS_BADADDR;
jdh8d's avatar
jdh8d committed
	for (STARS_Segment_t *seg = SMP_get_first_seg(); NULL != seg; seg = SMP_get_next_seg(RecentAddr)) {
		RecentAddr = seg->get_startEA();
		if (!seg->IsCodeSegment())
		STARS_ssize_t SegNameSize = SMP_get_segm_name(seg, SegName, sizeof(SegName) - 1);
		SMP_msg("Non-code addresses for code segment %s from %x to %x\n",
		for (STARS_ea_t addr = seg->get_startEA(); addr < seg->get_endEA(); addr = SMP_get_item_end(addr)) {
clc5q's avatar
clc5q committed
			flags_t AddrFlags = SMP_getFlags(addr);
			if (SMP_isHead(AddrFlags)) {
clc5q's avatar
clc5q committed
				if (SMP_isData(AddrFlags)) {
					DataRunLen += get_item_size(addr);
#if SMP_DEBUG_FIXUP_IDB
#endif
					if (MIN_DATARUN_LEN <= DataRunLen) {
						if (IsolatedCodeTrigger) {
							// Saw data, then one isolated code, then data
clc5q's avatar
clc5q committed
							SMP_del_items(IsolatedCodeAddr, IsolatedCodeLen, 0);
clc5q's avatar
clc5q committed
							if (SMP_create_data(IsolatedCodeAddr, SMP_byteflag(),
									SMP_msg("Converted isolated code to data: %lx\n",
										(unsigned long) IsolatedCodeAddr);
								SMP_msg("Failed to convert isolated code to data: %llx len: %x\n",
									(uint64_t) IsolatedCodeAddr, IsolatedCodeLen);
							}
							IsolatedCodeTrigger = false;
						} // end if (IsolatedCodeTrigger)
					} // end if (MIN_DATARUN_LEN <= DataRunLen)
clc5q's avatar
clc5q committed
				} // end if (SMP_isData(AddrFlags)
				else if (SMP_isUnknown(AddrFlags)) {
					// Just in case; unknown usually means not head or tail
					// If in a data run, convert to data.
					InstrLen = get_item_size(addr);
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
					SMP_msg("Unknown: %x len: %x\n", addr, InstrLen);
clc5q's avatar
clc5q committed
#endif
clc5q's avatar
clc5q committed
						if (SMP_create_data(addr, SMP_byteflag(), InstrLen, BADNODE)) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
							SMP_msg("Converted unknown to data at %x len: %x\n", addr, InstrLen);
clc5q's avatar
clc5q committed
#endif
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
							SMP_msg("Failed to convert unknown to data at %x len: %x\n", addr, InstrLen);
clc5q's avatar
clc5q committed
#endif
				else if (SMP_isCode(AddrFlags)) {  // must be true
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
						SMP_msg("DataRunLen: %d at %x\n", DataRunLen, addr);
						SMPInstr TempInst(addr);
						TempInst.Analyze();
						InstrLen = (int) TempInst.GetSize();
						// We don't check the returned InstrLen for validity because IsCodeMisaligned()
						//  will check for validity immediately below.
						SMP_msg("Calling IsCodeMisaligned: len %d\n", InstrLen);
#endif
						if (IsCodeMisaligned(addr)) {
#if SMP_DEBUG_FIXUP_IDB
clc5q's avatar
clc5q committed
							SMP_del_items(addr, InstrLen, 0);
clc5q's avatar
clc5q committed
							if (SMP_create_data(addr, SMP_byteflag(), InstrLen, BADNODE)) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
								SMP_msg("Converted misaligned code to data at %x : len: %x\n",
clc5q's avatar
clc5q committed
#endif
								// Step back so data gets processed.
								DataRunLen += get_item_size(addr);
								continue; // skip reset of DataRunLen
							}
							else {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
								SMP_msg("Misaligned code left as unknown at %x : len: %x\n",
clc5q's avatar
clc5q committed
#endif
								IsolatedCodeTrigger = false;
							}
						} // end if (IsCodeMisaligned() ...)
clc5q's avatar
clc5q committed
						else if (!SMP_hasRef(AddrFlags)) {
							// No references at all --> isolated code.
							IsolatedCodeTrigger = true;
							IsolatedCodeAddr = addr;
							IsolatedCodeLen = InstrLen;
						}
						else {
							SMP_xref_t xb;
							bool ok = xb.SMP_first_to(addr, XREF_ALL);
							if (!ok) {
								// No code xrefs to this target addr.
								IsolatedCodeTrigger = true;
								IsolatedCodeAddr = addr;
								IsolatedCodeLen = InstrLen;
							}
						}
					} // end if (MIN_DATARUN_LEN <= DataRunLen)
					else if (IsolatedCodeTrigger) {
						// Two instructions in a row does not fit the pattern.
						IsolatedCodeTrigger = false;
					}
					DataRunLen = 0;
clc5q's avatar
clc5q committed
				} // end if (SMP_isData) ... else if (isUnknown) ... else SMP_isCode
clc5q's avatar
clc5q committed
			else if (SMP_isUnknown(AddrFlags)) {
				// If in a data run, convert to data.
				InstrLen = get_item_size(addr);
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
				SMP_msg("Unknown: %x len: %x\n", addr, InstrLen);
clc5q's avatar
clc5q committed
#endif
clc5q's avatar
clc5q committed
					if (SMP_create_data(addr, SMP_byteflag(), InstrLen, BADNODE)) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
						SMP_msg("Converted unknown to data at %x len: %x\n", addr, InstrLen);
clc5q's avatar
clc5q committed
#endif
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
						SMP_msg("Failed to convert unknown to data at %x len: %x\n", addr, InstrLen);
clc5q's avatar
clc5q committed
#endif
		} // end for (STARS_ea_t addr =  seg->startEA; ...)
	} // end for all segments
	return;
} // end of FindDataInCode()


// The choices that IDA makes for deciding which parent function of a
//  TAIL chunk is the primary owner of the tail can be counterintuitive.
//  A function entry can both fall into and jump to a tail chunk that
//  is contiguous with it, yet the "owner" might be a function that is
//  far below it in the executable address space. This function will
//  change the ownership to a more sensible arrangement.
void AuditTailChunkOwnership(void) {