Newer
Older
/*
* SMPStaticAnalyzer.cpp - <see below>.
*
* Copyright (c) 2000, 2001, 2010 - University of Virginia
*
* This file is part of the Memory Error Detection System (MEDS) infrastructure.
* This file may be used and modified for non-commercial purposes as long as
* all copyright, permission, and nonwarranty notices are preserved.
* Redistribution is prohibited without prior written consent from the University
* of Virginia.
*
* Please contact the authors for restrictions applying to commercial use.
*
* THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*
* Author: University of Virginia
* e-mail: jwd@virginia.com
* URL : http://www.cs.virginia.edu/
*
* Additional copyrights 2010, 2011, 2012, 2013, 2014, 2015 by Zephyr Software LLC
* e-mail: {clc,jwd}@zephyr-software.com
* URL : http://www.zephyr-software.com/
//
// SMPStaticAnalyzer.cpp
//
// This plugin performs the static analyses needed for the SMP project
// (Software Memory Protection).
//
clc5q
committed
#include <list>
#include <vector>
clc5q
committed
#include <string>

Jason Hiser
committed
#include <sstream>
clc5q
committed
#if 0 // should all be in interfaces/idapro/all.h
#include <pro.h>
#include <ua.hpp>
#include <bytes.hpp>
#include <loader.hpp> // for plugin_t
#include "interfaces/STARSTypes.h"
#include "interfaces/STARSIDATypes.h"
#include "interfaces/SMPDBInterface.h"
#include "base/SMPStaticAnalyzer.h"
#include "base/SMPDataFlowAnalysis.h"
#include "base/SMPProgram.h"
#include "base/SMPFunction.h"
#include "base/SMPInstr.h"
#include "base/ProfilerInformation.h"
#include "interfaces/abstract/STARSOp.h"
#include "interfaces/abstract/STARSInterface.h"
#include "interfaces/idapro/STARSInterface.h"
#include "interfaces/idapro/STARSProgram.h"
using namespace std;
clc5q
committed
#define SMP_DEBUG_DELAY 0 // for setting an early breakpoint
#define SMP_DELAY_TIME 25.0 // 25 seconds
// Set to 1 for debugging output
#define SMP_DEBUG 1
clc5q
committed
#define SMP_DEBUG2 0 // verbose
#define SMP_DEBUG3 0 // verbose
#define SMP_DEBUG_MEM 0 // print memory operands
#define SMP_DEBUG_TYPE0 0 // Output instr info for OptType = 0
clc5q
committed
#define SMP_DEBUG_CHUNKS 0 // restructuring tail chunks, shared chunks, etc.
#define SMP_DEBUG_DATA_ONLY 0 // Find & fix data addresses in code segments
// Set to 1 when doing a binary search using SMP_DEBUG_COUNT to find
// which function is causing a problem.
#define SMP_BINARY_DEBUG 0
#define SMP_DEBUG_COUNT 356 // How many funcs to process in problem search
int FuncsProcessed = 0;
#define SMP_FIXUP_IDB 0 // Try to fix the IDA database? NOTE: Needs lots of updating before re-enabling.
#define SMP_DEBUG_FIXUP_IDB 0 // debugging output for FixupIDB chain
#define SMP_FIND_ORPHANS 1 // find code outside of functions
#define SMP_DEBUG_CODE_ORPHANS 0 // Detect whether we are causing code to be orphaned
#define SMP_IDAP_RUN_DELAY 0 // Delay in IDAP_run() so we can attach debugger to process.
#define STARS_GENERATE_DIF_FILE STARS_SCCP_CONVERT_UNREACHABLE_BLOCKS // If we optimize, generate DIF file
clc5q
committed
typedef op_t STARSOpndType;
static SMPProgram *CurrProg = nullptr;
STARS_Interface_t* global_stars_interface = nullptr;
STARS_Program_t *global_STARS_program = nullptr;
set<STARS_ea_t> CodeOrphans;
// Should we convert the x86 LOCK prefix byte to a no-op to avoid
// IDA Pro problems with instructions that jump past the LOCK
// prefix and look like they are jumping into the middle of an
// instruction?
#define STARS_REMOVE_LOCK_PREFIX 0
// Lock prefix for x86 code; jumping around this prefix conditionally looks like jumping
// into the middle of an instruction to IDA Pro, causing it to not collect instructions
// into a procedure. We replace these bytes with no-op opcodes because none of our analyses
// care about LOCK prefices. We store the addresses where we have done the replacement in a
// set in case we ever care.
#define X86_LOCK_PREFIX 0xF0
set<STARS_ea_t> LockPreficesRemoved; // Addresses where x86 LOCK prefix byte was turned into a no-op by STARS_custom_ana() callback.
static unsigned long CustomAnaCallCount = 0;
clc5q
committed
// Code addresses identified by a disassembler, such as objdump on
// Linux. These can be used to improve the code vs. data identification
// of IDA Pro.
vector<STARS_ea_t> DisasmLocs;
clc5q
committed
// Code addresses as identified by IDA Pro, to be compared to DisasmLocs.
vector<STARS_ea_t> IDAProLocs;
clc5q
committed
// List of functions that need to be reanalyzed after all the code fixup
// and code discovery is complete. Kept as a list of addresses; any address
// within the function is good enough to designate it.
list<STARS_ea_t> FuncReanalyzeList;
// A code region that has been converted from data but has code addresses that
// need to be reanalyzed. This is usually because a former data address is
// now a jump to a code target that is still a data address. We have to wait
// until the target has become code before IDA will accept the jump as valid.
class FixupRegion {
public:
FixupRegion(SMP_bounds_t);
inline STARS_ea_t GetStart(void) const { return CodeRegion.startEA; };
inline STARS_ea_t GetEnd(void) const { return CodeRegion.endEA; };
inline void SetStart(STARS_ea_t addr) { CodeRegion.startEA = addr; };
list<STARS_ea_t> FixupInstrs; // easier to expose than to encapsulate
private:
SMP_bounds_t CodeRegion;
};
FixupRegion::FixupRegion(SMP_bounds_t Range) {
this->CodeRegion = Range;
return;
}
// List of code regions that were not completely analysed because of jump to
// data considerations.
list<FixupRegion> CodeReanalyzeList;
clc5q
committed
// Map library function names to their system call type.
map<string, ZST_SysCallType> ZST_FuncTypeMap;
// Map system call types to their Zephyr Security Toolkit security policy.
map<ZST_SysCallType, ZST_Policy> ZST_TypePolicyMap;
// Set of whitelisted file locations.
set<string> ZST_FileLocWhitelist;
// Set of whitelisted network locations.
set<string> ZST_NetworkLocWhitelist;
// Set of blacklisted file locations.
set<string> ZST_FileLocBlacklist;
// Set of blacklisted network locations.
set<string> ZST_NetworkLocBlacklist;
// Set of system call names whose returned values should be trusted to have only benign numeric errors.
set<string> ZST_SystemCallNumericWhitelist;
clc5q
committed
// Functions for diagnosing and/or fixing problems in the IDA database.
void FixupIDB(void); // Driver for all other fixing functions.
void FindDataInCode(void);
void AuditTailChunkOwnership(void);
void FindOrphanedCode(STARS_Segment_t *, FILE *, FILE *);
void Debug_FindOrphanedCode(STARS_Segment_t *, bool);
void FindLinksFromOrphanedCode(STARS_Segment_t *);
clc5q
committed
void FixCodeIdentification(void);
int FixupNewCodeChunks(void);
void AuditCodeTargets(void);
void SpecialDebugOutput(void);
void RemoveIDACodeAddr(STARS_ea_t);
static unsigned long DebugCounter = 0;
// Turn LOCK prefix into no-op when detected. Each is one byte in length.
bool STARS_custom_ana(STARS_ea_t CurrentAddr) {
// static_assert(sizeof(STARS_ea_t) == sizeof(uintptr_t), "Sizeof mismatch between STARS_ea_t and uintptr_t");
int code = ::get_byte(CurrentAddr);
++CustomAnaCallCount;
if (X86_LOCK_PREFIX != code) {
return false;
}
else {
#define STARS_ANA_DEBUG_DELAY 0
#if STARS_ANA_DEBUG_DELAY
if (DebugCounter == 0) {
time_t start;
time_t current;
time(&start);
printf("delay for 15 seconds.\n");
do {
time(¤t);
} while(difftime(current,start) < 15.0);
++DebugCounter;
}
#endif
pair<set<STARS_ea_t>::iterator, bool> InsertResult;
InsertResult = LockPreficesRemoved.insert(CurrentAddr);
assert(InsertResult.second);
cmd.itype = NN_nop; // make it a no-op
cmd.size = 1; // one-byte no-op
#if 0
cmd.auxpref = 0; // clear prefix and flags fields
cmd.segpref = 0;
cmd.insnpref = 0;
cmd.flags = 0;
#endif
return true;
}
} // end of STARS_custom_ana()
#if (IDA_SDK_VERSION > 749)
struct STARS_plugmod_t : public plugmod_t {
bool idaapi run(std::size_t arg) {
return IDAP_run(arg);
}
~STARS_plugmod_t() { IDAP_term(); }
};
static STARS_plugmod_t *PluginModuleStruct;
#endif
static int idaapi idp_callback(void *, int event_id, va_list va) {
#else
static ssize_t idaapi idp_callback(void *, int event_id, va_list va) {
#endif
#if STARS_REMOVE_LOCK_PREFIX
if (event_id == processor_t::custom_ana) {
STARS_ea_t CurrentAddr = cmd.ea;
#if 1
int code = ua_next_byte();
++CustomAnaCallCount;
if (X86_LOCK_PREFIX == code) {
pair<set<STARS_ea_t>::iterator, bool> InsertResult;
InsertResult = LockPreficesRemoved.insert(CurrentAddr);
cmd.itype = NN_nop; // make it a no-op
return (int) (cmd.size + 1);
}
else {
return 0;
}
#else
if (STARS_custom_ana(CurrentAddr)) {
return 1; // handled event
}
#endif
}
#endif // STARS_REMOVE_LOCK_PREFIX
#if (IDA_SDK_VERSION < 700)
int CheckCode = ph.auto_empty_finally;
#else
idb_event::event_code_t CheckCode = idb_event::auto_empty_finally;
#endif
if (event_id == (int) CheckCode) { // IDA analysis is done
// Handle Cyber Grand Challenge project variant of ELF.
#define STARS_CGC_FILETYPE 16705
bool CGCFile = (inf.filetype == STARS_CGC_FILETYPE);
if ((inf.filetype != f_ELF) && (inf.filetype != f_PE) && (!CGCFile)) {
SMP_msg("ERROR: FileType is %d \n", inf.filetype);
IDAP_run(0);
#elif (IDA_SDK_VERSION < 750)
success = IDAP_run(0);
#else
success = PluginModuleStruct->run(0);
#if STARS_REMOVE_LOCK_PREFIX
SMP_msg("INFO: Calls to STARS_custom_ana: %lu \n", CustomAnaCallCount);
SMP_msg("INFO: Number of LOCK prefices eliminated: %zu \n", LockPreficesRemoved.size());
LockPreficesRemoved.clear();
#endif // STARS_REMOVE_LOCK_PREFIX
void IDAP_term(void) {
#if (IDA_SDK_VERSION < 700)
::unhook_from_notification_point(HT_IDP, idp_callback, nullptr);
#else
// auto_empty_finally is now hooked to HT_IDB instead of HT_IDP
::unhook_from_notification_point(HT_IDB, idp_callback, nullptr);
#endif
return;
} // end of IDAP_term()
int IDAP_init(void) {
static plugmod_t * idaapi IDAP_init(void) {
PluginModuleStruct = new STARS_plugmod_t;
global_stars_interface = new STARS_IDA_Interface_t;
global_STARS_program = new STARS_IDA_Program_t;
SMP_msg("IDAP_init entered.\n");
// Ensure correct working environment.
if (ph.id != PLFM_386 && ph.id != PLFM_ARM && ph.id != PLFM_MIPS)
error("Processor must be x86, ARM or MIPS.");
return PLUGIN_SKIP;
#else
return nullptr;
#endif
bool hooked = hook_to_notification_point(HT_IDP, idp_callback, nullptr);
// auto_empty_finally is now hooked to HT_IDB instead of HT_IDP
bool hooked = hook_to_notification_point(HT_IDB, idp_callback, nullptr);
clc5q
committed
#ifdef STARS_IRDB_INTERFACE
SMPLogFile = nullptr;
clc5q
committed
#endif
return PLUGIN_KEEP;
} // end of IDAP_init
void IDAP_run(int arg) {
bool idaapi IDAP_run(std::size_t arg) {
FILE *AnnotFile = nullptr;
FILE *InfoAnnotFile = nullptr;
clc5q
committed
// For debugging, we can add a delay loop so we have time to attach gdb to the
// running process and set a breakpoint.
#if SMP_DEBUG_DELAY
time_t start;
time_t current;
time(&start);
SMP_msg("delay for 25 seconds.\n");
printf("delay for 25 seconds.\n");
do {
time(¤t);
} while(difftime(current,start) < SMP_DELAY_TIME);
#if SMP_DEBUG
SMP_msg("Beginning IDAP_run.\n");
#endif
SMP_msg("IDA SDK version: %d \n", IDA_SDK_VERSION);
#if __unix__
SMP_msg("INFO: Compiled in Unix environment.\n");
#elif __win32
SMP_msg("INFO: Compiled in Windows 32 or 64 environment.\n");
#else
SMP_msg("INFO: Compiled in unknown environment.\n");
#endif
DefOrUse DummyRef;
STARSOpndType DummyOperand;
std::size_t RefObjectSize = sizeof(DummyRef), OpndSize = sizeof(DummyOperand);
SMP_msg("INFO: Size of DefOrUse: %zu Size of op_t: %zu \n", RefObjectSize, OpndSize);
SMP_msg("INFO: Size of STARS_ea_t: %zu Size of uintptr_t: %zu \n", sizeof(STARS_ea_t), sizeof(uintptr_t));
clc5q
committed
SMP_msg("INFO: Size of SMPInstr: %zu Size of SMPBasicBlock: %zu \n", sizeof(SMPInstr), sizeof(SMPBasicBlock));
global_STARS_program->Set64BitBinary();
throw invalid_argument("Unknown machine type");
global_STARS_program->SetProcessorType(pt);
SMP_msg("INFO: 64-bit binary detected.\n");
}
else {
global_STARS_program->Set32BitBinary();
const auto pt = (ph.id == PLFM_386) ? ptX86_32 :
(ph.id == PLFM_ARM) ? ptARM :
(ph.id == PLFM_MIPS) ? ptMIPS :
throw invalid_argument("Unknown machine type");
global_STARS_program->SetProcessorType(pt);
SMP_msg("INFO: 32-bit binary detected.\n");
}
global_STARS_program->InitData();
// See if command-line options for our plugin got passed to IDA Pro.
const char *PluginOptionsString = ::get_plugin_options("SMPStaticAnalyzer");
if (nullptr != PluginOptionsString) {
SMP_msg("INFO: Command-line plugin options string: %s\n", PluginOptionsString);
if (nullptr != strstr(PluginOptionsString, "Reduce2")) {
global_STARS_program->SetLevel2ReducedProcessingFlag(true);
SMP_msg("INFO: Level 2 Reduced processing requested via command-line options.\n");
}
else if (nullptr != strstr(PluginOptionsString, "Reduced")) {
global_STARS_program->SetReducedProcessingFlag(true);
SMP_msg("INFO: Reduced processing requested via command-line options.\n");
}
else {
SMP_msg("INFO: Command-line options did not include the Reduced option.\n");
}
if (nullptr != strstr(PluginOptionsString, "ShadowFuncPtrs")) {
global_STARS_program->SetFuncPtrShadowingFlag(true);
SMP_msg("INFO: Function pointer shadowing requested via command-line options.\n");
}
if (nullptr != strstr(PluginOptionsString, "ShadowArgs")) {
global_STARS_program->SetArgShadowingFlag(true);
SMP_msg("INFO: Critical argument shadowing requested via command-line options.\n");
}
if (nullptr != strstr(PluginOptionsString, "ImproveCFG")) {
global_STARS_program->SetCFGImprovementFlag(true);
SMP_msg("INFO: Maximum CFG improvement requested via command-line options.\n");
}
if (nullptr != strstr(PluginOptionsString, "AnalyzeLoops")) {
global_STARS_program->SetLoopAnalysesFlag(true);
SMP_msg("INFO: Deep loop analyses requested via command-line options.\n");
}
Clark Coleman
committed
if (nullptr != strstr(PluginOptionsString, "SCCP")) {
global_STARS_program->SetConstantPropagationFlag(true);
SMP_msg("INFO: Constant propagation via SCCP requested via command-line options.\n");
}
if (nullptr != strstr(PluginOptionsString, "SPARK")) {
global_STARS_program->SetSPARKFlag(true);
SMP_msg("INFO: Translation to SPARK Ada requested via command-line options.\n");
}
if (nullptr != strstr(PluginOptionsString, "NoGenAsm")) {
GenAsmFlag = false;
Clark Coleman
committed
SMP_msg("INFO: ASM file generation suppressed via command-line options.\n");
}
else {
SMP_msg("INFO: No command-line options found.\n");
}
global_STARS_program->DetermineRootFileName();
// read the Profiler generated information into a new prof_info class
CurrProg = new SMPProgram();
// Open the files for Xrefs from data, etc. Wait for profiling data to be read from the
// main annotation file before reopening it for output.
if (!(global_STARS_program->OpenSecondaryFilesOnly())) {
SMP_msg("FATAL ERROR: At least one file could not be opened.\n");
error("FATAL ERROR: At least one file could not be opened.\n");
delete global_STARS_program;
delete global_stars_interface;
#if (IDA_SDK_VERSION < 700)
return;
#else
return false;
#endif
}
time_t StartTime = time(nullptr);
Clark Coleman
committed
time_t EndTime, Time1, Time2;
CurrProg->AnalyzeData(); // Analyze static data in the executable
Time1 = time(nullptr);
Clark Coleman
committed
if (!global_STARS_program->IsSharedObject()) {
global_STARS_program->FindCodeAddressesTaken(CurrProg); // find code addresses in read-only data segments
}
Time2 = time(nullptr);
// Note: ProfilerInformation must come after the call above to AnalyzeData().
ProfilerInformation *prof_info = new ProfilerInformation(global_STARS_program->GetAnnotFileName().c_str(), CurrProg);
EndTime = time(nullptr);
Clark Coleman
committed
double TimeDiff = difftime(EndTime, StartTime);
SMP_msg("INFO: TIME: Phase 0: Data, CodeAddressesTaken, ProfileInfo: %7.2f\n", TimeDiff);
TimeDiff = difftime(Time1, StartTime);
SMP_msg("INFO: TIME: Phase 0A: Data: %7.2f\n", TimeDiff);
TimeDiff = difftime(Time2, Time1);
SMP_msg("INFO: TIME: Phase 0B: CodeAddressesTaken: %7.2f\n", TimeDiff);
if (!global_STARS_program->OpenMainAnnotationFile()) {
SMP_msg("FATAL ERROR: At least one file could not be opened.\n");
error("FATAL ERROR: At least one file could not be opened.\n");
delete global_STARS_program;
delete global_stars_interface;
return;
}
string AsmFileName(global_STARS_program->GetRootFileName());
string AsmFileSuffix(".asm");
AsmFileName += AsmFileSuffix;
string DifFileName(global_STARS_program->GetRootFileName());
string DifFileSuffix(".dif");
DifFileName += DifFileSuffix;
// For debugging, we can add a delay loop so we have time to attach gdb to the
// running process and set a breakpoint.
#if SMP_IDAP_RUN_DELAY
time_t start;
time_t current;
time(&start);
printf("delay for 15 seconds.\n");
do {
time(¤t);
} while(difftime(current,start) < 15.0);
#endif
STARS_ea_t RecentAddr;
#if SMP_DEBUG_CODE_ORPHANS
CodeOrphans.clear();
RecentAddr = STARS_BADADDR;
for (STARS_Segment_t *seg = SMP_get_first_seg(); nullptr != seg; seg = SMP_get_next_seg(RecentAddr)) {
RecentAddr = seg->get_startEA();
if (seg->IsCodeSegment())
Debug_FindOrphanedCode(seg, true);
}
// Read the Zephyr Security Toolkit system call security policies, if available.
global_STARS_program->ZST_InitPolicies();
try { // We will catch memory exhaustion errors.
clc5q
committed
#if SMP_DEBUG_DATA_ONLY
clc5q
committed
SMP_fclose(SymsFile);
delete CurrProg;
delete global_STARS_program;
delete global_stars_interface;

Jason Hiser
committed
clc5q
committed
return;
clc5q
committed
#endif
// Pre-audit the IDA database by seeing if the distinction
// between code and data can be improved, and if all branches
// and calls have proper code targets and code cross references.
#if SMP_FIXUP_IDB
clc5q
committed
#endif
if (0 < prof_info->GetProfilerAnnotationCount()) {
clc5q
committed
SMP_msg("Calling InferDataGranularity\n");
SMP_msg("ptr to MemoryAccessInfo: %p\n", prof_info->GetMemoryAccessInfo());
prof_info->GetMemoryAccessInfo()->InferDataGranularity();
clc5q
committed
SMP_msg("Returned from InferDataGranularity\n");
}
CurrProg->ProfGranularityFinished(global_STARS_program->GetAnnotFile(), global_STARS_program->GetInfoAnnotFile());
RecentAddr = STARS_BADADDR;
for (STARS_Segment_t *seg = SMP_get_first_seg(); nullptr != seg; seg = SMP_get_next_seg(RecentAddr)) {
RecentAddr = seg->get_startEA();
if (seg->IsCodeSegment())
FindLinksFromOrphanedCode(seg);
}
CurrProg->Analyze(prof_info, global_STARS_program->GetAnnotFile(), global_STARS_program->GetInfoAnnotFile());
if (global_STARS_program->ShouldSTARSPerformFullAnalysis()) {
CurrProg->EmitAnnotations(global_STARS_program->GetAnnotFile(), global_STARS_program->GetInfoAnnotFile());
}
clc5q
committed
// Process the instructions that are not in functions (generally, an IDA problem, or just no-ops for
// alignment purposes).
#if SMP_DEBUG_CODE_ORPHANS
RecentAddr = STARS_BADADDR;
for (STARS_Segment_t *seg = SMP_get_first_seg(); nullptr != seg; seg = SMP_get_next_seg(RecentAddr)) {
RecentAddr = seg->get_startEA();
if (seg->IsCodeSegment())
Debug_FindOrphanedCode(seg, true);
}
#endif
RecentAddr = STARS_BADADDR;
for (STARS_Segment_t *seg = SMP_get_first_seg(); nullptr != seg; seg = SMP_get_next_seg(RecentAddr)) {
RecentAddr = seg->get_startEA();
if (seg->IsCodeSegment())
FindOrphanedCode(seg, global_STARS_program->GetAnnotFile(), global_STARS_program->GetInfoAnnotFile());
clc5q
committed
}
clc5q
committed
// Output statistics.
for (int OptType = 0; OptType <= LAST_OPT_CATEGORY; ++OptType) {
clc5q
committed
SMP_msg("Optimization Category Count %d: %d Annotations: %d\n",
OptType, global_STARS_program->GetOptCount(OptType), global_STARS_program->GetAnnotationCount(OptType));
SMP_fprintf(global_STARS_program->GetInfoAnnotFile(), " 8000000 2 SUCCESS ANALYSISCOMPLETED\n");
global_STARS_program->CloseFiles();
if (!global_STARS_program->ShouldSTARSPerformReducedAnalysis()) {
if (GenAsmFlag) {
AsmFile = SMP_fopen(AsmFileName.c_str(), "w");
if (nullptr == AsmFile) {
error("FATAL ERROR: Cannot open ASM file %s\n", AsmFileName.c_str());
}
else {
int FileResult = gen_file(OFILE_ASM, AsmFile, LowestCodeAddress, 0xffffffff, (GENFLG_MAPSEG | GENFLG_ASMTYPE));
if (0 >= FileResult) {
SMP_msg("ERROR: Could not generate ASM file.\n");
}
SMP_fclose(AsmFile);
}
#if STARS_GENERATE_DIF_FILE
DifFile = SMP_fopen(DifFileName.c_str(), "w");
if (nullptr == DifFile) {
error("FATAL ERROR: Cannot open DIF file %s\n", DifFileName.c_str());
}
else {
int FileResult = gen_file(OFILE_DIF, DifFile, LowestCodeAddress, STARS_BADADDR, 0);
if (0 >= FileResult) {
SMP_msg("ERROR: Could not generate DIF file.\n");
}
SMP_fclose(DifFile);
} // end if not reduced analysis
SMP_msg("INFO: Files closed, freeing memory and exiting.\n");
SMP_msg("INFO: Deleted prof_info.\n");
delete CurrProg;

Jason Hiser
committed
stringstream sout;
STARS_Instruction_t::DumpCreationStats(sout);
STARS_op_t::DumpCreationStats(sout);

Jason Hiser
committed
SMP_msg("Interface stats: \n %s", sout.str().c_str());
SMP_msg("INFO: Deleted CurrProg. Returning to IDA Pro.\n");
#if (IDA_SDK_VERSION > 749)
SMP_msg("Deleting PluginModuleStruct and exiting IDAP_run.\n");
delete PluginModuleStruct;
#endif
delete global_STARS_program;
delete global_stars_interface;
if (nullptr != InfoAnnotFile) {
SMP_fprintf(InfoAnnotFile, " 8000000 2 ERROR MEMORYEXHAUSTED\n");
SMP_fclose(InfoAnnotFile);
}
if (nullptr != AnnotFile) {
delete CurrProg;
delete prof_info;
delete global_STARS_program;
delete global_stars_interface;
} // end IDAP_run()
char IDAP_comment[] = "Zephyr Software STARS (Static Analyzer for Reliability and Security)";
char IDAP_help[] = "Good luck";
char IDAP_name[] = "STARS";
char IDAP_hotkey[] = "Alt-J";
plugin_t PLUGIN = {
IDP_INTERFACE_VERSION,
#else
PLUGIN_MULTI | PLUGIN_PROC | PLUGIN_MOD,
#endif
IDAP_term,
IDAP_run,
#else
nullptr, // term() is called from plugmod_t destructor
nullptr, // run() is called from plugmod_t
#endif
IDAP_comment,
IDAP_help,
IDAP_name,
IDAP_hotkey
};
clc5q
committed
// Find all code addresses in the IDA database and enter them into
// IDAProLocs. Find all code addresses identified by the external
// disassembler (e.g. objdump) and enter them into DisasmLocs.
void FindCodeAddresses(void) {
// Read in code addresses as found by an external disassembler.
STARS_ea_t CurrDisasmAddr;
string DisasmFileName(global_STARS_program->GetRootFileName());
clc5q
committed
string FileSuffix(".SMPobjdump");
DisasmFileName += FileSuffix;
clc5q
committed
FILE *DisasmFile = SMP_fopen(DisasmFileName.c_str(), "r");
if (nullptr == DisasmFile) {
clc5q
committed
error("FATAL: Cannot open input file %s\n", DisasmFileName.c_str());
return;
}
#define DISASM_RESERVE_SIZE 50000
DisasmLocs.reserve(DISASM_RESERVE_SIZE);
unsigned long TempAddr;
int ScanReturn = qfscanf(DisasmFile, "%lx", &TempAddr);
CurrDisasmAddr = (STARS_ea_t) TempAddr;
clc5q
committed
while (1 == ScanReturn) {
int NextChar;
DisasmLocs.push_back(CurrDisasmAddr);
// Swallow the rest of the input line and get the next address.
do {
NextChar = qfgetc(DisasmFile);
} while ((EOF != NextChar) && ('\n' != NextChar));
ScanReturn = qfscanf(DisasmFile, "%lx", &TempAddr);
CurrDisasmAddr = (STARS_ea_t) TempAddr;
clc5q
committed
} // end while (1 == ScanReturn)
if (0 >= DisasmLocs.size()) {
clc5q
committed
SMP_msg("ERROR: No addresses read from %s\n", DisasmFileName.c_str());
SMP_fclose(DisasmFile);
clc5q
committed
return;
}
else {
clc5q
committed
SMP_msg("%zu Disasm addresses read from %s\n", DisasmLocs.size(),
clc5q
committed
DisasmFileName.c_str());
clc5q
committed
SMP_fclose(DisasmFile);
clc5q
committed
}
// Find all the code locs in the IDA Pro database. As we find
// them, store them in IDAProLocs.
STARS_ea_t RecentAddr = STARS_BADADDR;
for (STARS_Segment_t *seg = SMP_get_first_seg(); nullptr != seg; seg = SMP_get_next_seg(RecentAddr)) {
RecentAddr = seg->get_startEA();
if (!seg->IsCodeSegment())
clc5q
committed
continue;
for (STARS_ea_t addr = seg->get_startEA(); addr < seg->get_endEA(); addr = SMP_get_item_end(addr)) {
if (SMP_isHead(InstrFlags) && SMP_isCode(InstrFlags)) {
clc5q
committed
IDAProLocs.push_back(addr);
clc5q
committed
if ((0x806cda4 <= addr) && (0x806cf99 >= addr))
clc5q
committed
SMP_msg("IDA code addr: %x\n", addr);
} // end if (SMP_isHead(addr) && SMP_isCode(addr)
clc5q
committed
#if SMP_DEBUG_FIXUP_IDB
else if ((0x806cda4 <= addr) && (0x806cf99 >= addr)) {
if (!SMP_isHead(InstrFlags))
clc5q
committed
SMP_msg("Weirdness: not isHead at %x\n", addr);
clc5q
committed
SMP_msg("Weirdness: isUnknown at %x\n", addr);
clc5q
committed
}
}
#endif
} // end for (STARS_ea_t addr = seg->startEA; ...)
clc5q
committed
return;
} // end FindCodeAddresses()
// Return true if addr is not a proper beginning address for an instruction.
// Return false otherwise.
// Currently, we claim that an instruction is misaligned if DisasmLocs does
// not contain it. This function is useful for dealing with errors in IDA
// code identification, in which a large code section is identified as data,
// but some instructions in the middle of the "data" are identified as
// code but IDA often starts on the wrong boundary in these cases.
bool IsCodeMisaligned(STARS_ea_t addr) {
clc5q
committed
// Do a binary search for addr within DisasmLocs, which is sorted
// in ascending address order because of the way in which it was
// generated.
size_t min = 0;
size_t max = DisasmLocs.size(); // don't access DisasmLocs[max]
size_t index = (min + max) / 2;
while (addr != DisasmLocs[index]) {
if (min >= (max - 1))
return true;
#if 0
clc5q
committed
SMP_msg("min: %d max: %d index: %d\n", min, max, index);
clc5q
committed
#endif
if (addr < DisasmLocs[index])
max = index;
else // must be addr > DisasmLocs[index];
min = index;
index = (min + max) / 2;
}
return false;
} // end of IsCodeMisaligned()
void RemoveIDACodeAddr(STARS_ea_t addr) {
clc5q
committed
// Do a binary search for addr within IDAProLocs, which is sorted
// in ascending address order because of the way in which it was
// generated. Delete the element of IDAProLocs if found.
size_t min = 0;
size_t max = IDAProLocs.size(); // don't access IDAProLocs[max]
size_t index = (min + max) / 2;
while (addr != IDAProLocs[index]) {
if (min >= (max - 1))
return;
#if 0
clc5q
committed
SMP_msg("min: %d max: %d index: %d\n", min, max, index);
clc5q
committed
#endif
if (addr < IDAProLocs[index])
max = index;
else // must be addr > IDAProLocs[index];
min = index;
index = (min + max) / 2;
}
// IDAProLocs[index] contains addr.
vector<STARS_ea_t>::iterator RemovalIterator = IDAProLocs.begin();
clc5q
committed
RemovalIterator += index;
RemovalIterator = IDAProLocs.erase(RemovalIterator);
return;
} // end of RemoveIDACodeAddr()
// Driver for all other fixing functions. Upon its return, the IDA
// database (IDB file) should be fixed up as much as we can fix it.
void FixupIDB(void) {
FindCodeAddresses();
#if SMP_DEBUG_FIXUP_IDB
SpecialDebugOutput();
#endif
AuditCodeTargets();
FindDataInCode();
AuditTailChunkOwnership();
if (DisasmLocs.size() > 0) {
FixCodeIdentification();
int fixes = FixupNewCodeChunks();
#if SMP_DEBUG_FIXUP_IDB
#endif
}
DisasmLocs.clear();
IDAProLocs.clear();
clc5q
committed
} // end of FixupIDB()
// Find and print all data head addresses in code segments.
// If an isolated code instruction is found in the midst of a run
// of data bytes and has no code xrefs jumping to it, it is not
// reachable as code and is undoubtedly a mixup by IDA. Possibly
// the whole data region will be converted to code later, in which
// case the isolated code is not necessarily properly aligned and
// parsed at its present address, so we are glad to convert it into
// data anyway so that FindDataToConvert() will succeed on it later.
// Data to code conversion, and isolated code detection, are inhibited
// by IDA identifying several consecutive instructions in the midst
// of a data region, with the code addresses not agreeing with the
// external disassembler's code addresses. We will convert these
// misaligned instructions to data as we detect them. We will also
// convert unexplored bytes (SMP_isUnknown(flags) == true) into data if
clc5q
committed
// they are in the midst of a data sequence.
#define MIN_DATARUN_LEN 24 // #bytes on either side of "isolated" code
void FindDataInCode(void) {
size_t DataRunLen = 0; // How many data bytes in a row have we seen?
bool IsolatedCodeTrigger = false; // Have seen data, then isolated code
// Now looking for data
STARS_ea_t IsolatedCodeAddr = STARS_BADADDR;
int IsolatedCodeLen = 0;
clc5q
committed
int InstrLen;
clc5q
committed
bool InstOK;
insn_t LocalCmd;
uint32 LocalFeatures;
clc5q
committed
STARS_ea_t RecentAddr = STARS_BADADDR;
for (STARS_Segment_t *seg = SMP_get_first_seg(); nullptr != seg; seg = SMP_get_next_seg(RecentAddr)) {
RecentAddr = seg->get_startEA();
if (!seg->IsCodeSegment())
clc5q
committed
continue;
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
char SegName[MAXSTR];
STARS_ssize_t SegNameSize = SMP_get_segm_name(seg, SegName, sizeof(SegName) - 1);
clc5q
committed
SMP_msg("Non-code addresses for code segment %s from %x to %x\n",
clc5q
committed
SegName, seg->startEA, seg->endEA);
#endif
for (STARS_ea_t addr = seg->get_startEA(); addr < seg->get_endEA(); addr = SMP_get_item_end(addr)) {
if (SMP_isHead(AddrFlags)) {
clc5q
committed
DataRunLen += get_item_size(addr);
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("Data: %x\n", addr);
clc5q
committed
#endif
if (MIN_DATARUN_LEN <= DataRunLen) {
if (IsolatedCodeTrigger) {
// Saw data, then one isolated code, then data
clc5q
committed
RemoveIDACodeAddr(IsolatedCodeAddr);
clc5q
committed
IsolatedCodeLen, BADNODE)) {
SMP_msg("Converted isolated code to data: %lx\n",
(unsigned long) IsolatedCodeAddr);
clc5q
committed
}
else {
SMP_msg("Failed to convert isolated code to data: %llx len: %x\n",
(uint64_t) IsolatedCodeAddr, IsolatedCodeLen);
clc5q
committed
}
IsolatedCodeTrigger = false;
} // end if (IsolatedCodeTrigger)
} // end if (MIN_DATARUN_LEN <= DataRunLen)
} // end if (SMP_isData(AddrFlags)
else if (SMP_isUnknown(AddrFlags)) {
clc5q
committed
// Just in case; unknown usually means not head or tail
// If in a data run, convert to data.
InstrLen = get_item_size(addr);
clc5q
committed
SMP_msg("Unknown: %x len: %x\n", addr, InstrLen);
clc5q
committed
if (0 < DataRunLen) {
if (SMP_create_data(addr, SMP_byteflag(), InstrLen, BADNODE)) {
clc5q
committed
SMP_msg("Converted unknown to data at %x len: %x\n", addr, InstrLen);
clc5q
committed
DataRunLen += InstrLen;
}
else {
clc5q
committed
SMP_msg("Failed to convert unknown to data at %x len: %x\n", addr, InstrLen);
clc5q
committed
DataRunLen = 0;
IsolatedCodeTrigger = false;
}
}
}
else if (SMP_isCode(AddrFlags)) { // must be true
clc5q
committed
if (MIN_DATARUN_LEN <= DataRunLen) {
clc5q
committed
SMP_msg("DataRunLen: %d at %x\n", DataRunLen, addr);
SMPInstr TempInst(addr);
TempInst.Analyze();
clc5q
committed
InstrLen = (int) TempInst.GetSize();
clc5q
committed
// We don't check the returned InstrLen for validity because IsCodeMisaligned()
// will check for validity immediately below.
clc5q
committed
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("Calling IsCodeMisaligned: len %d\n", InstrLen);