/* * SMPStaticAnalyzer.cpp - <see below>. * * Copyright (c) 2000, 2001, 2010 - University of Virginia * * This file is part of the Memory Error Detection System (MEDS) infrastructure. * This file may be used and modified for non-commercial purposes as long as * all copyright, permission, and nonwarranty notices are preserved. * Redistribution is prohibited without prior written consent from the University * of Virginia. * * Please contact the authors for restrictions applying to commercial use. * * THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. * * Author: University of Virginia * e-mail: jwd@virginia.com * URL : http://www.cs.virginia.edu/ * * Additional copyrights 2010, 2011, 2012, 2013, 2014, 2015 by Zephyr Software LLC * e-mail: {clc,jwd}@zephyr-software.com * URL : http://www.zephyr-software.com/ */ // // SMPStaticAnalyzer.cpp // // This plugin performs the static analyses needed for the SMP project // (Software Memory Protection). // #include <list> #include <vector> #include <set> #include <string> #include <sstream> #include <ctime> #include <interfaces/idapro/all.h> #if 0 // should all be in interfaces/idapro/all.h #include <pro.h> #include <ida.hpp> #include <idp.hpp> #include <ua.hpp> #include <bytes.hpp> #include <nalt.hpp> #endif #include <loader.hpp> // for plugin_t #include "interfaces/STARSTypes.h" #include "interfaces/STARSIDATypes.h" #include "interfaces/SMPDBInterface.h" #include "base/SMPStaticAnalyzer.h" #include "base/SMPDataFlowAnalysis.h" #include "base/SMPProgram.h" #include "base/SMPFunction.h" #include "base/SMPInstr.h" #include "base/ProfilerInformation.h" #include "interfaces/abstract/STARSOp.h" #include "interfaces/abstract/STARSInterface.h" #include "interfaces/idapro/STARSInterface.h" #include "interfaces/idapro/STARSProgram.h" using namespace std; #define SMP_DEBUG_DELAY 0 // for setting an early breakpoint #define SMP_DELAY_TIME 25.0 // 25 seconds // Set to 1 for debugging output #define SMP_DEBUG 1 #define SMP_DEBUG2 0 // verbose #define SMP_DEBUG3 0 // verbose #define SMP_DEBUG_MEM 0 // print memory operands #define SMP_DEBUG_TYPE0 0 // Output instr info for OptType = 0 #define SMP_DEBUG_CHUNKS 0 // restructuring tail chunks, shared chunks, etc. #define SMP_DEBUG_DATA_ONLY 0 // Find & fix data addresses in code segments // Set to 1 when doing a binary search using SMP_DEBUG_COUNT to find // which function is causing a problem. #define SMP_BINARY_DEBUG 0 #define SMP_DEBUG_COUNT 356 // How many funcs to process in problem search int FuncsProcessed = 0; #define SMP_FIXUP_IDB 0 // Try to fix the IDA database? NOTE: Needs lots of updating before re-enabling. #define SMP_DEBUG_FIXUP_IDB 0 // debugging output for FixupIDB chain #define SMP_FIND_ORPHANS 1 // find code outside of functions #define SMP_DEBUG_CODE_ORPHANS 0 // Detect whether we are causing code to be orphaned #define SMP_IDAP_RUN_DELAY 0 // Delay in IDAP_run() so we can attach debugger to process. #define STARS_GENERATE_DIF_FILE STARS_SCCP_CONVERT_UNREACHABLE_BLOCKS // If we optimize, generate DIF file typedef op_t STARSOpndType; static SMPProgram *CurrProg = nullptr; STARS_Interface_t* global_stars_interface = nullptr; STARS_Program_t *global_STARS_program = nullptr; set<STARS_ea_t> CodeOrphans; // Should we convert the x86 LOCK prefix byte to a no-op to avoid // IDA Pro problems with instructions that jump past the LOCK // prefix and look like they are jumping into the middle of an // instruction? #define STARS_REMOVE_LOCK_PREFIX 0 // Lock prefix for x86 code; jumping around this prefix conditionally looks like jumping // into the middle of an instruction to IDA Pro, causing it to not collect instructions // into a procedure. We replace these bytes with no-op opcodes because none of our analyses // care about LOCK prefices. We store the addresses where we have done the replacement in a // set in case we ever care. #define X86_LOCK_PREFIX 0xF0 set<STARS_ea_t> LockPreficesRemoved; // Addresses where x86 LOCK prefix byte was turned into a no-op by STARS_custom_ana() callback. static unsigned long CustomAnaCallCount = 0; // Code addresses identified by a disassembler, such as objdump on // Linux. These can be used to improve the code vs. data identification // of IDA Pro. vector<STARS_ea_t> DisasmLocs; // Code addresses as identified by IDA Pro, to be compared to DisasmLocs. vector<STARS_ea_t> IDAProLocs; // List of functions that need to be reanalyzed after all the code fixup // and code discovery is complete. Kept as a list of addresses; any address // within the function is good enough to designate it. list<STARS_ea_t> FuncReanalyzeList; // A code region that has been converted from data but has code addresses that // need to be reanalyzed. This is usually because a former data address is // now a jump to a code target that is still a data address. We have to wait // until the target has become code before IDA will accept the jump as valid. class FixupRegion { public: FixupRegion(SMP_bounds_t); inline STARS_ea_t GetStart(void) const { return CodeRegion.startEA; }; inline STARS_ea_t GetEnd(void) const { return CodeRegion.endEA; }; inline void SetStart(STARS_ea_t addr) { CodeRegion.startEA = addr; }; list<STARS_ea_t> FixupInstrs; // easier to expose than to encapsulate private: SMP_bounds_t CodeRegion; }; FixupRegion::FixupRegion(SMP_bounds_t Range) { this->CodeRegion = Range; return; } // List of code regions that were not completely analysed because of jump to // data considerations. list<FixupRegion> CodeReanalyzeList; // Map library function names to their system call type. map<string, ZST_SysCallType> ZST_FuncTypeMap; // Map system call types to their Zephyr Security Toolkit security policy. map<ZST_SysCallType, ZST_Policy> ZST_TypePolicyMap; // Set of whitelisted file locations. set<string> ZST_FileLocWhitelist; // Set of whitelisted network locations. set<string> ZST_NetworkLocWhitelist; // Set of blacklisted file locations. set<string> ZST_FileLocBlacklist; // Set of blacklisted network locations. set<string> ZST_NetworkLocBlacklist; // Set of system call names whose returned values should be trusted to have only benign numeric errors. set<string> ZST_SystemCallNumericWhitelist; #if (IDA_SDK_VERSION < 700) void idaapi IDAP_run(int); #else bool idaapi IDAP_run(std::size_t); #endif void IDAP_term(void); // Functions for diagnosing and/or fixing problems in the IDA database. void FixupIDB(void); // Driver for all other fixing functions. void FindDataInCode(void); void AuditTailChunkOwnership(void); void FindOrphanedCode(STARS_Segment_t *, FILE *, FILE *); void Debug_FindOrphanedCode(STARS_Segment_t *, bool); void FindLinksFromOrphanedCode(STARS_Segment_t *); void FixCodeIdentification(void); int FixupNewCodeChunks(void); void AuditCodeTargets(void); void SpecialDebugOutput(void); void RemoveIDACodeAddr(STARS_ea_t); static unsigned long DebugCounter = 0; // Turn LOCK prefix into no-op when detected. Each is one byte in length. bool STARS_custom_ana(STARS_ea_t CurrentAddr) { // static_assert(sizeof(STARS_ea_t) == sizeof(uintptr_t), "Sizeof mismatch between STARS_ea_t and uintptr_t"); int code = ::get_byte(CurrentAddr); ++CustomAnaCallCount; if (X86_LOCK_PREFIX != code) { return false; } else { #define STARS_ANA_DEBUG_DELAY 0 #if STARS_ANA_DEBUG_DELAY if (DebugCounter == 0) { time_t start; time_t current; time(&start); printf("delay for 15 seconds.\n"); do { time(¤t); } while(difftime(current,start) < 15.0); ++DebugCounter; } #endif pair<set<STARS_ea_t>::iterator, bool> InsertResult; InsertResult = LockPreficesRemoved.insert(CurrentAddr); assert(InsertResult.second); #if (IDA_SDK_VERSION < 700) cmd.itype = NN_nop; // make it a no-op cmd.size = 1; // one-byte no-op #if 0 cmd.auxpref = 0; // clear prefix and flags fields cmd.segpref = 0; cmd.insnpref = 0; cmd.flags = 0; #endif #endif return true; } } // end of STARS_custom_ana() #if (IDA_SDK_VERSION > 749) struct STARS_plugmod_t : public plugmod_t { bool idaapi run(std::size_t arg) { return IDAP_run(arg); } ~STARS_plugmod_t() { IDAP_term(); } }; static STARS_plugmod_t *PluginModuleStruct; #endif #if (IDA_SDK_VERSION < 700) static int idaapi idp_callback(void *, int event_id, va_list va) { #else static ssize_t idaapi idp_callback(void *, int event_id, va_list va) { #endif bool success = 0; #if STARS_REMOVE_LOCK_PREFIX if (event_id == processor_t::custom_ana) { STARS_ea_t CurrentAddr = cmd.ea; #if 1 int code = ua_next_byte(); ++CustomAnaCallCount; if (X86_LOCK_PREFIX == code) { pair<set<STARS_ea_t>::iterator, bool> InsertResult; InsertResult = LockPreficesRemoved.insert(CurrentAddr); cmd.itype = NN_nop; // make it a no-op return (int) (cmd.size + 1); } else { return 0; } #else if (STARS_custom_ana(CurrentAddr)) { return 1; // handled event } #endif } #endif // STARS_REMOVE_LOCK_PREFIX #if (IDA_SDK_VERSION < 700) int CheckCode = ph.auto_empty_finally; #else idb_event::event_code_t CheckCode = idb_event::auto_empty_finally; #endif if (event_id == (int) CheckCode) { // IDA analysis is done // Ensure correct working environment. // Handle Cyber Grand Challenge project variant of ELF. #define STARS_CGC_FILETYPE 16705 bool CGCFile = (inf.filetype == STARS_CGC_FILETYPE); if ((inf.filetype != f_ELF) && (inf.filetype != f_PE) && (!CGCFile)) { SMP_msg("ERROR: FileType is %d \n", inf.filetype); error("Executable format must be PE, ELF or CGC."); return 0; } else if (CGCFile) { global_stars_interface->SetCGCBinary(); } #if (IDA_SDK_VERSION < 700) IDAP_run(0); #elif (IDA_SDK_VERSION < 750) success = IDAP_run(0); #else success = PluginModuleStruct->run(0); #endif #if STARS_REMOVE_LOCK_PREFIX SMP_msg("INFO: Calls to STARS_custom_ana: %lu \n", CustomAnaCallCount); SMP_msg("INFO: Number of LOCK prefices eliminated: %zu \n", LockPreficesRemoved.size()); LockPreficesRemoved.clear(); #endif // STARS_REMOVE_LOCK_PREFIX // qexit(0); } return (ssize_t)success; } // end of idp_callback() void IDAP_term(void) { #if (IDA_SDK_VERSION < 700) ::unhook_from_notification_point(HT_IDP, idp_callback, nullptr); #else // auto_empty_finally is now hooked to HT_IDB instead of HT_IDP ::unhook_from_notification_point(HT_IDB, idp_callback, nullptr); #endif return; } // end of IDAP_term() #if (IDA_SDK_VERSION < 750) int IDAP_init(void) { #else static plugmod_t * idaapi IDAP_init(void) { PluginModuleStruct = new STARS_plugmod_t; #endif /* init the interface */ global_stars_interface = new STARS_IDA_Interface_t; global_STARS_program = new STARS_IDA_Program_t; SMP_msg("IDAP_init entered.\n"); // Ensure correct working environment. if (ph.id != PLFM_386 && ph.id != PLFM_ARM && ph.id != PLFM_MIPS) { error("Processor must be x86, ARM or MIPS."); #if (IDA_SDK_VERSION < 750) return PLUGIN_SKIP; #else return nullptr; #endif } #if (IDA_SDK_VERSION < 700) bool hooked = hook_to_notification_point(HT_IDP, idp_callback, nullptr); #else // auto_empty_finally is now hooked to HT_IDB instead of HT_IDP bool hooked = hook_to_notification_point(HT_IDB, idp_callback, nullptr); #endif #ifdef STARS_IRDB_INTERFACE SMPLogFile = nullptr; #endif SMP_msg("Exiting IDAP_init.\n"); #if (IDA_SDK_VERSION < 750) return PLUGIN_KEEP; #else return PluginModuleStruct; #endif } // end of IDAP_init #if (IDA_SDK_VERSION < 700) void IDAP_run(int arg) { #else bool idaapi IDAP_run(std::size_t arg) { #endif FILE *AnnotFile = nullptr; FILE *InfoAnnotFile = nullptr; FILE *AsmFile; FILE *DifFile; // For debugging, we can add a delay loop so we have time to attach gdb to the // running process and set a breakpoint. #if SMP_DEBUG_DELAY time_t start; time_t current; time(&start); SMP_msg("delay for 25 seconds.\n"); printf("delay for 25 seconds.\n"); do { time(¤t); } while(difftime(current,start) < SMP_DELAY_TIME); #endif #if SMP_DEBUG SMP_msg("Beginning IDAP_run.\n"); #endif SMP_msg("IDA SDK version: %d \n", IDA_SDK_VERSION); #if __unix__ SMP_msg("INFO: Compiled in Unix environment.\n"); #elif __win32 SMP_msg("INFO: Compiled in Windows 32 or 64 environment.\n"); #else SMP_msg("INFO: Compiled in unknown environment.\n"); #endif DefOrUse DummyRef; STARSOpndType DummyOperand; std::size_t RefObjectSize = sizeof(DummyRef), OpndSize = sizeof(DummyOperand); SMP_msg("INFO: Size of DefOrUse: %zu Size of op_t: %zu \n", RefObjectSize, OpndSize); SMP_msg("INFO: Size of STARS_ea_t: %zu Size of uintptr_t: %zu \n", sizeof(STARS_ea_t), sizeof(uintptr_t)); SMP_msg("INFO: Size of SMPInstr: %zu Size of SMPBasicBlock: %zu \n", sizeof(SMPInstr), sizeof(SMPBasicBlock)); if (SMP_is_64bit()) { global_STARS_program->Set64BitBinary(); const auto pt = (ph.id == PLFM_386) ? ptX86_64 : (ph.id == PLFM_ARM) ? ptARM64 : throw invalid_argument("Unknown machine type"); global_STARS_program->SetProcessorType(pt); SMP_msg("INFO: 64-bit binary detected.\n"); } else { global_STARS_program->Set32BitBinary(); const auto pt = (ph.id == PLFM_386) ? ptX86_32 : (ph.id == PLFM_ARM) ? ptARM : (ph.id == PLFM_MIPS) ? ptMIPS : throw invalid_argument("Unknown machine type"); global_STARS_program->SetProcessorType(pt); SMP_msg("INFO: 32-bit binary detected.\n"); } global_STARS_program->InitData(); global_STARS_program->SetIDAProDriverFlag(); // See if command-line options for our plugin got passed to IDA Pro. const char *PluginOptionsString = ::get_plugin_options("SMPStaticAnalyzer"); bool GenAsmFlag = true; if (nullptr != PluginOptionsString) { SMP_msg("INFO: Command-line plugin options string: %s\n", PluginOptionsString); if (nullptr != strstr(PluginOptionsString, "Reduce2")) { global_STARS_program->SetLevel2ReducedProcessingFlag(true); SMP_msg("INFO: Level 2 Reduced processing requested via command-line options.\n"); } else if (nullptr != strstr(PluginOptionsString, "Reduced")) { global_STARS_program->SetReducedProcessingFlag(true); SMP_msg("INFO: Reduced processing requested via command-line options.\n"); } else { SMP_msg("INFO: Command-line options did not include the Reduced option.\n"); } if (nullptr != strstr(PluginOptionsString, "ShadowFuncPtrs")) { global_STARS_program->SetFuncPtrShadowingFlag(true); SMP_msg("INFO: Function pointer shadowing requested via command-line options.\n"); } if (nullptr != strstr(PluginOptionsString, "ShadowArgs")) { global_STARS_program->SetArgShadowingFlag(true); SMP_msg("INFO: Critical argument shadowing requested via command-line options.\n"); } if (nullptr != strstr(PluginOptionsString, "ImproveCFG")) { global_STARS_program->SetCFGImprovementFlag(true); SMP_msg("INFO: Maximum CFG improvement requested via command-line options.\n"); } if (nullptr != strstr(PluginOptionsString, "AnalyzeLoops")) { global_STARS_program->SetLoopAnalysesFlag(true); SMP_msg("INFO: Deep loop analyses requested via command-line options.\n"); } if (nullptr != strstr(PluginOptionsString, "SCCP")) { global_STARS_program->SetConstantPropagationFlag(true); SMP_msg("INFO: Constant propagation via SCCP requested via command-line options.\n"); } if (nullptr != strstr(PluginOptionsString, "SPARK")) { global_STARS_program->SetSPARKFlag(true); SMP_msg("INFO: Translation to SPARK Ada requested via command-line options.\n"); } if (nullptr != strstr(PluginOptionsString, "NoGenAsm")) { GenAsmFlag = false; SMP_msg("INFO: ASM file generation suppressed via command-line options.\n"); } } else { SMP_msg("INFO: No command-line options found.\n"); } global_STARS_program->DetermineRootFileName(); // read the Profiler generated information into a new prof_info class CurrProg = new SMPProgram(); // Open the files for Xrefs from data, etc. Wait for profiling data to be read from the // main annotation file before reopening it for output. if (!(global_STARS_program->OpenSecondaryFilesOnly())) { SMP_msg("FATAL ERROR: At least one file could not be opened.\n"); error("FATAL ERROR: At least one file could not be opened.\n"); delete global_STARS_program; delete global_stars_interface; #if (IDA_SDK_VERSION < 700) return; #else return false; #endif } time_t StartTime = time(nullptr); time_t EndTime, Time1, Time2; CurrProg->AnalyzeData(); // Analyze static data in the executable Time1 = time(nullptr); if (!global_STARS_program->IsSharedObject()) { global_STARS_program->FindCodeAddressesTaken(CurrProg); // find code addresses in read-only data segments } Time2 = time(nullptr); // Note: ProfilerInformation must come after the call above to AnalyzeData(). ProfilerInformation *prof_info = new ProfilerInformation(global_STARS_program->GetAnnotFileName().c_str(), CurrProg); EndTime = time(nullptr); double TimeDiff = difftime(EndTime, StartTime); SMP_msg("INFO: TIME: Phase 0: Data, CodeAddressesTaken, ProfileInfo: %7.2f\n", TimeDiff); TimeDiff = difftime(Time1, StartTime); SMP_msg("INFO: TIME: Phase 0A: Data: %7.2f\n", TimeDiff); TimeDiff = difftime(Time2, Time1); SMP_msg("INFO: TIME: Phase 0B: CodeAddressesTaken: %7.2f\n", TimeDiff); if (!global_STARS_program->OpenMainAnnotationFile()) { SMP_msg("FATAL ERROR: At least one file could not be opened.\n"); error("FATAL ERROR: At least one file could not be opened.\n"); delete global_STARS_program; delete global_stars_interface; #if (IDA_SDK_VERSION < 700) return; #else return false; #endif } string AsmFileName(global_STARS_program->GetRootFileName()); string AsmFileSuffix(".asm"); AsmFileName += AsmFileSuffix; string DifFileName(global_STARS_program->GetRootFileName()); string DifFileSuffix(".dif"); DifFileName += DifFileSuffix; // For debugging, we can add a delay loop so we have time to attach gdb to the // running process and set a breakpoint. #if SMP_IDAP_RUN_DELAY time_t start; time_t current; time(&start); printf("delay for 15 seconds.\n"); do { time(¤t); } while(difftime(current,start) < 15.0); #endif STARS_ea_t RecentAddr; #if 0 #if SMP_DEBUG_CODE_ORPHANS CodeOrphans.clear(); RecentAddr = STARS_BADADDR; for (STARS_Segment_t *seg = SMP_get_first_seg(); nullptr != seg; seg = SMP_get_next_seg(RecentAddr)) { RecentAddr = seg->get_startEA(); if (seg->IsCodeSegment()) Debug_FindOrphanedCode(seg, true); } #endif #endif // Read the Zephyr Security Toolkit system call security policies, if available. global_STARS_program->ZST_InitPolicies(); try { // We will catch memory exhaustion errors. #if SMP_DEBUG_DATA_ONLY FindDataInCode(); FixCodeIdentification(); SMP_fclose(SymsFile); delete prof_info; delete CurrProg; delete global_STARS_program; delete global_stars_interface; #if (IDA_SDK_VERSION < 700) return; #else return true; #endif #endif // Pre-audit the IDA database by seeing if the distinction // between code and data can be improved, and if all branches // and calls have proper code targets and code cross references. #if SMP_FIXUP_IDB FixupIDB(); #endif if (0 < prof_info->GetProfilerAnnotationCount()) { SMP_msg("Calling InferDataGranularity\n"); SMP_msg("ptr to MemoryAccessInfo: %p\n", prof_info->GetMemoryAccessInfo()); prof_info->GetMemoryAccessInfo()->InferDataGranularity(); SMP_msg("Returned from InferDataGranularity\n"); } CurrProg->ProfGranularityFinished(global_STARS_program->GetAnnotFile(), global_STARS_program->GetInfoAnnotFile()); RecentAddr = STARS_BADADDR; for (STARS_Segment_t *seg = SMP_get_first_seg(); nullptr != seg; seg = SMP_get_next_seg(RecentAddr)) { RecentAddr = seg->get_startEA(); if (seg->IsCodeSegment()) FindLinksFromOrphanedCode(seg); } CurrProg->Analyze(prof_info, global_STARS_program->GetAnnotFile(), global_STARS_program->GetInfoAnnotFile()); if (global_STARS_program->ShouldSTARSPerformFullAnalysis()) { CurrProg->EmitAnnotations(global_STARS_program->GetAnnotFile(), global_STARS_program->GetInfoAnnotFile()); } // Process the instructions that are not in functions (generally, an IDA problem, or just no-ops for // alignment purposes). #if SMP_DEBUG_CODE_ORPHANS RecentAddr = STARS_BADADDR; for (STARS_Segment_t *seg = SMP_get_first_seg(); nullptr != seg; seg = SMP_get_next_seg(RecentAddr)) { RecentAddr = seg->get_startEA(); if (seg->IsCodeSegment()) Debug_FindOrphanedCode(seg, true); } #endif #if SMP_FIND_ORPHANS RecentAddr = STARS_BADADDR; for (STARS_Segment_t *seg = SMP_get_first_seg(); nullptr != seg; seg = SMP_get_next_seg(RecentAddr)) { RecentAddr = seg->get_startEA(); if (seg->IsCodeSegment()) FindOrphanedCode(seg, global_STARS_program->GetAnnotFile(), global_STARS_program->GetInfoAnnotFile()); } #endif // Output statistics. for (int OptType = 0; OptType <= LAST_OPT_CATEGORY; ++OptType) { SMP_msg("Optimization Category Count %d: %d Annotations: %d\n", OptType, global_STARS_program->GetOptCount(OptType), global_STARS_program->GetAnnotationCount(OptType)); } SMP_fprintf(global_STARS_program->GetInfoAnnotFile(), " 8000000 2 SUCCESS ANALYSISCOMPLETED\n"); global_STARS_program->CloseFiles(); if (!global_STARS_program->ShouldSTARSPerformReducedAnalysis()) { if (GenAsmFlag) { AsmFile = SMP_fopen(AsmFileName.c_str(), "w"); if (nullptr == AsmFile) { error("FATAL ERROR: Cannot open ASM file %s\n", AsmFileName.c_str()); } else { int FileResult = gen_file(OFILE_ASM, AsmFile, LowestCodeAddress, 0xffffffff, (GENFLG_MAPSEG | GENFLG_ASMTYPE)); if (0 >= FileResult) { SMP_msg("ERROR: Could not generate ASM file.\n"); } SMP_fclose(AsmFile); } } #if STARS_GENERATE_DIF_FILE DifFile = SMP_fopen(DifFileName.c_str(), "w"); if (nullptr == DifFile) { error("FATAL ERROR: Cannot open DIF file %s\n", DifFileName.c_str()); } else { int FileResult = gen_file(OFILE_DIF, DifFile, LowestCodeAddress, STARS_BADADDR, 0); if (0 >= FileResult) { SMP_msg("ERROR: Could not generate DIF file.\n"); } SMP_fclose(DifFile); } #endif } // end if not reduced analysis SMP_msg("INFO: Files closed, freeing memory and exiting.\n"); delete prof_info; SMP_msg("INFO: Deleted prof_info.\n"); delete CurrProg; stringstream sout; STARS_Instruction_t::DumpCreationStats(sout); STARS_op_t::DumpCreationStats(sout); SMP_msg("Interface stats: \n %s", sout.str().c_str()); SMP_msg("INFO: Deleted CurrProg. Returning to IDA Pro.\n"); #if (IDA_SDK_VERSION > 749) SMP_msg("Deleting PluginModuleStruct and exiting IDAP_run.\n"); delete PluginModuleStruct; #endif delete global_STARS_program; delete global_stars_interface; #if (IDA_SDK_VERSION < 700) return; #else return true; #endif } catch (const std::bad_alloc &) { SMP_msg("FATAL ERROR: Memory exhausted.\n"); if (nullptr != InfoAnnotFile) { SMP_fprintf(InfoAnnotFile, " 8000000 2 ERROR MEMORYEXHAUSTED\n"); SMP_fclose(InfoAnnotFile); } if (nullptr != AnnotFile) { SMP_fclose(AnnotFile); } delete CurrProg; delete prof_info; delete global_STARS_program; delete global_stars_interface; #if (IDA_SDK_VERSION < 700) return; #else return true; #endif } } // end IDAP_run() char IDAP_comment[] = "Zephyr Software STARS (Static Analyzer for Reliability and Security)"; char IDAP_help[] = "Good luck"; char IDAP_name[] = "STARS"; char IDAP_hotkey[] = "Alt-J"; plugin_t PLUGIN = { IDP_INTERFACE_VERSION, #if (IDA_SDK_VERSION < 750) PLUGIN_PROC | PLUGIN_MOD, #else PLUGIN_MULTI | PLUGIN_PROC | PLUGIN_MOD, #endif IDAP_init, #if (IDA_SDK_VERSION < 750) IDAP_term, IDAP_run, #else nullptr, // term() is called from plugmod_t destructor nullptr, // run() is called from plugmod_t #endif IDAP_comment, IDAP_help, IDAP_name, IDAP_hotkey }; // Find all code addresses in the IDA database and enter them into // IDAProLocs. Find all code addresses identified by the external // disassembler (e.g. objdump) and enter them into DisasmLocs. void FindCodeAddresses(void) { // Read in code addresses as found by an external disassembler. STARS_ea_t CurrDisasmAddr; string DisasmFileName(global_STARS_program->GetRootFileName()); string FileSuffix(".SMPobjdump"); DisasmFileName += FileSuffix; FILE *DisasmFile = SMP_fopen(DisasmFileName.c_str(), "r"); if (nullptr == DisasmFile) { error("FATAL: Cannot open input file %s\n", DisasmFileName.c_str()); return; } #define DISASM_RESERVE_SIZE 50000 DisasmLocs.reserve(DISASM_RESERVE_SIZE); unsigned long TempAddr; int ScanReturn = qfscanf(DisasmFile, "%lx", &TempAddr); CurrDisasmAddr = (STARS_ea_t) TempAddr; while (1 == ScanReturn) { int NextChar; DisasmLocs.push_back(CurrDisasmAddr); // Swallow the rest of the input line and get the next address. do { NextChar = qfgetc(DisasmFile); } while ((EOF != NextChar) && ('\n' != NextChar)); ScanReturn = qfscanf(DisasmFile, "%lx", &TempAddr); CurrDisasmAddr = (STARS_ea_t) TempAddr; } // end while (1 == ScanReturn) if (0 >= DisasmLocs.size()) { SMP_msg("ERROR: No addresses read from %s\n", DisasmFileName.c_str()); SMP_fclose(DisasmFile); DisasmLocs.clear(); return; } else { SMP_msg("%zu Disasm addresses read from %s\n", DisasmLocs.size(), DisasmFileName.c_str()); SMP_fclose(DisasmFile); } // Find all the code locs in the IDA Pro database. As we find // them, store them in IDAProLocs. STARS_ea_t RecentAddr = STARS_BADADDR; for (STARS_Segment_t *seg = SMP_get_first_seg(); nullptr != seg; seg = SMP_get_next_seg(RecentAddr)) { RecentAddr = seg->get_startEA(); if (!seg->IsCodeSegment()) continue; for (STARS_ea_t addr = seg->get_startEA(); addr < seg->get_endEA(); addr = SMP_get_item_end(addr)) { flags_t InstrFlags = SMP_getFlags(addr); if (SMP_isHead(InstrFlags) && SMP_isCode(InstrFlags)) { IDAProLocs.push_back(addr); #if 0 if ((0x806cda4 <= addr) && (0x806cf99 >= addr)) SMP_msg("IDA code addr: %x\n", addr); #endif } // end if (SMP_isHead(addr) && SMP_isCode(addr) #if SMP_DEBUG_FIXUP_IDB else if ((0x806cda4 <= addr) && (0x806cf99 >= addr)) { if (!SMP_isHead(InstrFlags)) SMP_msg("Weirdness: not isHead at %x\n", addr); if (SMP_isUnknown(InstrFlags)) { SMP_msg("Weirdness: isUnknown at %x\n", addr); } } #endif } // end for (STARS_ea_t addr = seg->startEA; ...) } // end for all segments return; } // end FindCodeAddresses() // Return true if addr is not a proper beginning address for an instruction. // Return false otherwise. // Currently, we claim that an instruction is misaligned if DisasmLocs does // not contain it. This function is useful for dealing with errors in IDA // code identification, in which a large code section is identified as data, // but some instructions in the middle of the "data" are identified as // code but IDA often starts on the wrong boundary in these cases. bool IsCodeMisaligned(STARS_ea_t addr) { // Do a binary search for addr within DisasmLocs, which is sorted // in ascending address order because of the way in which it was // generated. size_t min = 0; size_t max = DisasmLocs.size(); // don't access DisasmLocs[max] size_t index = (min + max) / 2; while (addr != DisasmLocs[index]) { if (min >= (max - 1)) return true; #if 0 SMP_msg("min: %d max: %d index: %d\n", min, max, index); #endif if (addr < DisasmLocs[index]) max = index; else // must be addr > DisasmLocs[index]; min = index; index = (min + max) / 2; } return false; } // end of IsCodeMisaligned() void RemoveIDACodeAddr(STARS_ea_t addr) { // Do a binary search for addr within IDAProLocs, which is sorted // in ascending address order because of the way in which it was // generated. Delete the element of IDAProLocs if found. size_t min = 0; size_t max = IDAProLocs.size(); // don't access IDAProLocs[max] size_t index = (min + max) / 2; while (addr != IDAProLocs[index]) { if (min >= (max - 1)) return; #if 0 SMP_msg("min: %d max: %d index: %d\n", min, max, index); #endif if (addr < IDAProLocs[index]) max = index; else // must be addr > IDAProLocs[index]; min = index; index = (min + max) / 2; } // IDAProLocs[index] contains addr. vector<STARS_ea_t>::iterator RemovalIterator = IDAProLocs.begin(); RemovalIterator += index; RemovalIterator = IDAProLocs.erase(RemovalIterator); return; } // end of RemoveIDACodeAddr() // Driver for all other fixing functions. Upon its return, the IDA // database (IDB file) should be fixed up as much as we can fix it. void FixupIDB(void) { FindCodeAddresses(); #if SMP_DEBUG_FIXUP_IDB SpecialDebugOutput(); #endif AuditCodeTargets(); FindDataInCode(); AuditTailChunkOwnership(); if (DisasmLocs.size() > 0) { FixCodeIdentification(); int fixes = FixupNewCodeChunks(); #if SMP_DEBUG_FIXUP_IDB SpecialDebugOutput(); #endif } DisasmLocs.clear(); IDAProLocs.clear(); } // end of FixupIDB() // Find and print all data head addresses in code segments. // If an isolated code instruction is found in the midst of a run // of data bytes and has no code xrefs jumping to it, it is not // reachable as code and is undoubtedly a mixup by IDA. Possibly // the whole data region will be converted to code later, in which // case the isolated code is not necessarily properly aligned and // parsed at its present address, so we are glad to convert it into // data anyway so that FindDataToConvert() will succeed on it later. // Data to code conversion, and isolated code detection, are inhibited // by IDA identifying several consecutive instructions in the midst // of a data region, with the code addresses not agreeing with the // external disassembler's code addresses. We will convert these // misaligned instructions to data as we detect them. We will also // convert unexplored bytes (SMP_isUnknown(flags) == true) into data if // they are in the midst of a data sequence. #define MIN_DATARUN_LEN 24 // #bytes on either side of "isolated" code void FindDataInCode(void) { size_t DataRunLen = 0; // How many data bytes in a row have we seen? bool IsolatedCodeTrigger = false; // Have seen data, then isolated code // Now looking for data STARS_ea_t IsolatedCodeAddr = STARS_BADADDR; int IsolatedCodeLen = 0; int InstrLen; bool InstOK; insn_t LocalCmd; uint32 LocalFeatures; STARS_ea_t RecentAddr = STARS_BADADDR; for (STARS_Segment_t *seg = SMP_get_first_seg(); nullptr != seg; seg = SMP_get_next_seg(RecentAddr)) { RecentAddr = seg->get_startEA(); if (!seg->IsCodeSegment()) continue; #if SMP_DEBUG_FIXUP_IDB char SegName[MAXSTR]; STARS_ssize_t SegNameSize = SMP_get_segm_name(seg, SegName, sizeof(SegName) - 1); SMP_msg("Non-code addresses for code segment %s from %x to %x\n", SegName, seg->startEA, seg->endEA); #endif for (STARS_ea_t addr = seg->get_startEA(); addr < seg->get_endEA(); addr = SMP_get_item_end(addr)) { flags_t AddrFlags = SMP_getFlags(addr); if (SMP_isHead(AddrFlags)) { if (SMP_isData(AddrFlags)) { DataRunLen += get_item_size(addr); #if SMP_DEBUG_FIXUP_IDB SMP_msg("Data: %x\n", addr); #endif if (MIN_DATARUN_LEN <= DataRunLen) { if (IsolatedCodeTrigger) { // Saw data, then one isolated code, then data SMP_del_items(IsolatedCodeAddr, IsolatedCodeLen, 0); RemoveIDACodeAddr(IsolatedCodeAddr); if (SMP_create_data(IsolatedCodeAddr, SMP_byteflag(), IsolatedCodeLen, BADNODE)) { SMP_msg("Converted isolated code to data: %lx\n", (unsigned long) IsolatedCodeAddr); } else { SMP_msg("Failed to convert isolated code to data: %llx len: %x\n", (uint64_t) IsolatedCodeAddr, IsolatedCodeLen); } IsolatedCodeTrigger = false; } // end if (IsolatedCodeTrigger) } // end if (MIN_DATARUN_LEN <= DataRunLen) } // end if (SMP_isData(AddrFlags) else if (SMP_isUnknown(AddrFlags)) { // Just in case; unknown usually means not head or tail // If in a data run, convert to data. InstrLen = get_item_size(addr); #if SMP_DEBUG_FIXUP_IDB SMP_msg("Unknown: %x len: %x\n", addr, InstrLen); #endif if (0 < DataRunLen) { if (SMP_create_data(addr, SMP_byteflag(), InstrLen, BADNODE)) { #if SMP_DEBUG_FIXUP_IDB SMP_msg("Converted unknown to data at %x len: %x\n", addr, InstrLen); #endif DataRunLen += InstrLen; } else { #if SMP_DEBUG_FIXUP_IDB SMP_msg("Failed to convert unknown to data at %x len: %x\n", addr, InstrLen); #endif DataRunLen = 0; IsolatedCodeTrigger = false; } } } else if (SMP_isCode(AddrFlags)) { // must be true if (MIN_DATARUN_LEN <= DataRunLen) { #if SMP_DEBUG_FIXUP_IDB SMP_msg("DataRunLen: %d at %x\n", DataRunLen, addr); #endif SMPInstr TempInst(addr); TempInst.Analyze(); InstrLen = (int) TempInst.GetSize(); // We don't check the returned InstrLen for validity because IsCodeMisaligned() // will check for validity immediately below. #if SMP_DEBUG_FIXUP_IDB SMP_msg("Calling IsCodeMisaligned: len %d\n", InstrLen); #endif if (IsCodeMisaligned(addr)) { #if SMP_DEBUG_FIXUP_IDB SMP_msg("Code was misaligned.\n"); #endif SMP_del_items(addr, InstrLen, 0); RemoveIDACodeAddr(addr); if (SMP_create_data(addr, SMP_byteflag(), InstrLen, BADNODE)) { #if SMP_DEBUG_FIXUP_IDB SMP_msg("Converted misaligned code to data at %x : len: %x\n", addr, InstrLen); #endif // Step back so data gets processed. DataRunLen += get_item_size(addr); continue; // skip reset of DataRunLen } else { #if SMP_DEBUG_FIXUP_IDB SMP_msg("Misaligned code left as unknown at %x : len: %x\n", addr, InstrLen); #endif IsolatedCodeTrigger = false; } } // end if (IsCodeMisaligned() ...) else if (!SMP_hasRef(AddrFlags)) { // No references at all --> isolated code. IsolatedCodeTrigger = true; IsolatedCodeAddr = addr; IsolatedCodeLen = InstrLen; } else { SMP_xref_t xb; bool ok = xb.SMP_first_to(addr, XREF_ALL); if (!ok) { // No code xrefs to this target addr. IsolatedCodeTrigger = true; IsolatedCodeAddr = addr; IsolatedCodeLen = InstrLen; } } } // end if (MIN_DATARUN_LEN <= DataRunLen) else if (IsolatedCodeTrigger) { // Two instructions in a row does not fit the pattern. IsolatedCodeTrigger = false; } DataRunLen = 0; } // end if (SMP_isData) ... else if (isUnknown) ... else SMP_isCode } // end if (SMP_isHead) else if (SMP_isUnknown(AddrFlags)) { // If in a data run, convert to data. InstrLen = get_item_size(addr); #if SMP_DEBUG_FIXUP_IDB SMP_msg("Unknown: %x len: %x\n", addr, InstrLen); #endif if (0 < DataRunLen) { if (SMP_create_data(addr, SMP_byteflag(), InstrLen, BADNODE)) { #if SMP_DEBUG_FIXUP_IDB SMP_msg("Converted unknown to data at %x len: %x\n", addr, InstrLen); #endif DataRunLen += InstrLen; } else { #if SMP_DEBUG_FIXUP_IDB SMP_msg("Failed to convert unknown to data at %x len: %x\n", addr, InstrLen); #endif DataRunLen = 0; IsolatedCodeTrigger = false; } } } } // end for (STARS_ea_t addr = seg->startEA; ...) } // end for all segments return; } // end of FindDataInCode() // The choices that IDA makes for deciding which parent function of a // TAIL chunk is the primary owner of the tail can be counterintuitive. // A function entry can both fall into and jump to a tail chunk that // is contiguous with it, yet the "owner" might be a function that is // far below it in the executable address space. This function will // change the ownership to a more sensible arrangement. void AuditTailChunkOwnership(void) { SMP_AuditTailChunkOwnership(); } // end of AuditTailChunkOwnership() // If the addresses signified from DisasmIndex to IDAProIndex are // all considered data and do NOT follow a return instruction, // return false and update AreaSize to reflect the area to be // converted. // Return value: true -> skip to IDAProIndex; false -> convert AreaSize bytes. bool FindDataToConvert(size_t IDAProIndex, size_t DisasmIndex, int &AreaSize) { STARS_ea_t PrevIDAAddr; STARS_ea_t NextIDAAddr; size_t ShadowDisasmIndex = DisasmIndex - 1; STARS_ea_t DisasmAddr = DisasmLocs[ShadowDisasmIndex]; bool CannotConvert = false; // return value bool DebugAddress = false; #if SMP_DEBUG_FIXUP_IDB DebugAddress = (DisasmAddr == 0x806c19a); #endif if (DebugAddress) { SMP_msg("IDAProIndex: %zu DisasmIndex: %zu\n", IDAProIndex, DisasmIndex); SMP_msg("IDA locs size %zu Disasm locs size %zu\n", IDAProLocs.size(), DisasmLocs.size()); } if (IDAProIndex >= IDAProLocs.size()) { // Have already processed the last IDA address. if (DebugAddress) SMP_msg(" Already done with IDAProLocs.\n"); return true; } else if (DisasmIndex >= DisasmLocs.size()) { // Strange. Last Disasm address is only one to convert, and // IDA still has addresses after that? if (DebugAddress) SMP_msg(" Already done with DisasmLocs.\n"); return true; } else if (IDAProIndex < 2) { // We have Disasm addrs before the very first IDA addr. We // don't trust this boundary case. if (DebugAddress) SMP_msg(" Boundary case with IDAProLocs.\n"); return true; } NextIDAAddr = IDAProLocs[IDAProIndex - 1]; PrevIDAAddr = IDAProLocs[IDAProIndex - 2]; if (DebugAddress) SMP_msg(" PrevIDAAddr: %lx NextIDAAddr: %lx\n", (unsigned long) PrevIDAAddr, (unsigned long) NextIDAAddr); // See if previous IDA address was a return. flags_t PrevFlags = SMP_getFlags(PrevIDAAddr); if (!SMP_isCode(PrevFlags) || !SMP_isHead(PrevFlags)) { SMP_msg("PrevIDAAddr %lx not isCode or not isHead.\n", (unsigned long) PrevIDAAddr); return true; } SMPInstr PrevInstr(PrevIDAAddr); PrevInstr.Analyze(); if (DebugAddress) SMP_msg("Finished PrevInstr.Analyze()\n"); if (PrevInstr.HasReturnOpcode()) { // Right after a return come no-ops and 2-byte no-ops // that are just for alignment. IDA does not seem to be // happy when we convert all those to code. if (DebugAddress) SMP_msg(" Data followed a return instruction.\n"); return true; } // Now, see if the area from DisasmAddr to NextIDAAddr is all data // according to IDA. while (DisasmAddr < NextIDAAddr) { flags_t DataFlags = SMP_getFlags(DisasmAddr); if (SMP_isTail(DataFlags)) { if (DebugAddress) SMP_msg(" tail byte: %lx\n", (unsigned long) DisasmAddr); DisasmAddr = SMP_get_item_end(DisasmAddr); } else if (SMP_isData(DataFlags)) { if (DebugAddress) SMP_msg(" data byte: %lx\n", (unsigned long) DisasmAddr); DisasmAddr = SMP_get_item_end(DisasmAddr); } else if (SMP_isCode(DataFlags)) { // How could this ever happen? if (DebugAddress) SMP_msg(" isCode: %lx\n", (unsigned long) DisasmAddr); return true; } else { // must be isUnknown() // Very conservative here; only want to convert when the whole // region is data, because that is a symptom of IDA missing // a piece of code within a function (usually a piece of code // that is only reachable via an indirect jump). if (DebugAddress) SMP_msg(" Not isData: %lx\n", (unsigned long) DisasmAddr); return true; } if (DebugAddress) SMP_msg(" new DisasmAddr: %lx\n", (unsigned long) DisasmAddr); } // end while (DisasmAddr < NextIDAAddr) if (DebugAddress) SMP_msg(" loop exit CannotConvert: %d\n", CannotConvert); if (!CannotConvert) { // Success. DisasmAddr = DisasmLocs[ShadowDisasmIndex]; AreaSize = NextIDAAddr - DisasmAddr; if (DebugAddress) { SMP_msg(" Success! AreaSize: %x Old index: %zu new index: %zu\n", AreaSize, ShadowDisasmIndex, DisasmIndex); SMP_msg(" exiting FindDataToConvert()\n"); SMP_msg("\n"); } } // end if (!CannotConvert) return CannotConvert; } // end of FindDataToConvert() // Does a converted code region look like a function prologue? If so, // we should not include it in the previous function. bool IsFunctionPrologue(STARS_ea_t StartAddr, STARS_ea_t EndAddr) { return false; // **!!** TODO } // end of IsFunctionPrologue() // Patch program bytes that could not be converted from // data to code, if it can be determined that the bytes represent code // that IDA has a hard time with. // Currently limited to finding "call near ptr 0" instructions, which // often are found in optimized glibc code because gcc was able to // determine that a function pointer was zero and did constant propagation, // but unfortunately was not able to determine that the code was unreachable. // IDA will not succeed in ua_code() for "call 0", but there is no danger // of a working program ever executing this code. Replacing the call with // no-ops permits us to continue converting a contiguous range of data to // code, and permits IDA to reanalyze the function later. // Returns true if program bytes were patched. bool MDPatchUnconvertedBytes(STARS_ea_t CurrDisasmAddr) { flags_t AddrFlags = SMP_getFlags(CurrDisasmAddr); if (SMP_isData(AddrFlags) || SMP_isTail(AddrFlags)) { // Bytes should have been converted to unknown already. #if SMP_DEBUG_FIXUP_IDB SMP_msg("Cannot patch data bytes or tail bytes at %x\n", CurrDisasmAddr); #endif return false; } SMPInstr PatchInstr(CurrDisasmAddr); PatchInstr.Analyze(); int InstrLen = (int) PatchInstr.GetSize(); if (0 >= InstrLen) { #if SMP_DEBUG_FIXUP_IDB SMP_msg("decode_insn() failed on patch location %x\n", CurrDisasmAddr); #endif return false; } else { if (PatchInstr.GetIDAOpcode() != NN_call) { #if SMP_DEBUG_FIXUP_IDB SMP_msg("Cannot patch non-call instruction at %x\n", CurrDisasmAddr); #endif return false; } PatchInstr.PrintOperands(); STARSOpndTypePtr CallDest = PatchInstr.GetFirstUse()->GetOp(); if ((! CallDest->IsNearPointer()) || (0 != CallDest->GetAddr())) { #if SMP_DEBUG_FIXUP_IDB SMP_msg("Cannot patch call unless it is call near ptr 0 at %x", CurrDisasmAddr); #endif return false; } STARS_ea_t PatchAddr = CurrDisasmAddr; for (int i = 0; i < InstrLen; ++i) { bool ok = patch_byte(PatchAddr, 0x90); // x86 no-op if (!ok) { #if SMP_DEBUG_FIXUP_IDB SMP_msg("patch_byte() failed at %x\n", PatchAddr); #endif return false; } ++PatchAddr; } #if SMP_DEBUG_FIXUP_IDB SMP_msg("Patched %d bytes successfully at %x\n", InstrLen, CurrDisasmAddr); #endif InstrLen = create_insn(CurrDisasmAddr); if (0 >= InstrLen) { #if SMP_DEBUG_FIXUP_IDB SMP_msg(" ... but ua_code() still failed!\n"); #endif return false; } } // end if (0 >= InstrLen) ... else ... return true; } // end of MDPatchUnconvertedBytes() // Use the lists of code addresses identified by IDA Pro (in IDAProLocs) // and an external disassembler (in DisasmLocs). Compare the lists and // try to convert addresses to code that are found in DisasmLocs but // not in IDAProLocs. Emit warnings when IDAProLocs has a code address // not found in DisasmLocs. void FixCodeIdentification(void) { size_t DisasmIndex = 0; STARS_ea_t CurrDisasmAddr = DisasmLocs[DisasmIndex++]; size_t IDAProIndex = 0; STARS_ea_t CurrAddr = IDAProLocs[IDAProIndex++]; while (DisasmIndex <= DisasmLocs.size()) { // If the current address is less than the current // external disasm address, we have the rare case in // which IDA Pro has identified an address as code // but the external disasm has not. Emit a warning // message and go on to the next IDA address. if (CurrAddr < CurrDisasmAddr) { SMPInstr TempInstr(CurrAddr); TempInstr.Analyze(); SMP_msg("AUDIT: Address %lx is code in IDB but not in external disassembler: %s\n", (unsigned long) CurrAddr, TempInstr.GetDisasm()); if (IDAProIndex < IDAProLocs.size()) CurrAddr = IDAProLocs[IDAProIndex++]; else { // Last IDA addr; might still process Disasm addrs // after loop exit. break; } } else if (CurrAddr == CurrDisasmAddr) { // If equal, no problem, we are moving through the // code addresses in lockstep. Grab the next address // from each source. if (DisasmIndex < DisasmLocs.size()) { CurrDisasmAddr = DisasmLocs[DisasmIndex++]; } else { ++DisasmIndex; // cause loop exit; skip cleanup loop } if (IDAProIndex < IDAProLocs.size()) CurrAddr = IDAProLocs[IDAProIndex++]; else { // Last IDA addr; might still process Disasm addrs // after loop exit in cleanup loop. break; } } else { // We must have CurrAddr > CurrDisasmAddr. That means // IDA has jumped over some code addresses in // DisasmLocs. We need to try to convert addresses // to code until we can reach the current addr. // For now, we will address only the case in which IDA // has identified addresses as data bytes, and the // external disassembler(e.g. objdump) has identified // the same addresses as code. We only want to deal with // contiguous areas of data-to-code conversion that do NOT // follow a return statement. int AreaSize = 0; STARS_ea_t AreaStart = CurrDisasmAddr; STARS_ea_t AreaEnd; #if SMP_DEBUG_FIXUP_IDB SMP_msg("CurrDisasmAddr: %x CurrAddr: %x\n", CurrDisasmAddr, CurrAddr); #endif bool SkipArea = FindDataToConvert(IDAProIndex, DisasmIndex, AreaSize); if (SkipArea) { // Skip over the extra external disasm addresses. while (CurrDisasmAddr < CurrAddr) CurrDisasmAddr = DisasmLocs[DisasmIndex++]; } else { // Convert the overlooked code region to unexplored. AreaEnd = CurrDisasmAddr + AreaSize; #if SMP_DEBUG_FIXUP_IDB SMP_msg("Found data to convert: %x to %x\n", AreaStart, AreaEnd); #endif SMP_del_items(AreaStart, AreaSize, 0); SMP_bounds_t ConvertRegion; ConvertRegion.startEA = AreaStart; ConvertRegion.endEA = AreaEnd; FixupRegion CurrRegion(ConvertRegion); CodeReanalyzeList.push_back(CurrRegion); bool AllConverted = true; bool AllNops = true; do { flags_t InstrFlags = SMP_getFlags(CurrDisasmAddr); if (!SMP_isUnknown(InstrFlags)) { SMP_msg("Sync problem in FixCodeID: %lx\n", (unsigned long) CurrDisasmAddr); } else { int InstrLen = create_insn(CurrDisasmAddr); if (InstrLen > 0) { // Successfully converted to code SMPInstr NewInstr(CurrDisasmAddr); NewInstr.Analyze(); if (!NewInstr.IsNop()) AllNops = false; #if SMP_DEBUG_FIXUP_IDB #if 0 SMP_msg("FixCodeID success at %x: len: %d %s\n", CurrDisasmAddr, InstrLen, NewInstr.GetDisasm()); #endif #endif } else { if (MDPatchUnconvertedBytes(CurrDisasmAddr)) { ; #if SMP_DEBUG_FIXUP_IDB SMP_msg(" Patched bytes at %x\n", CurrDisasmAddr); #endif } else { CurrRegion.FixupInstrs.push_back(CurrDisasmAddr); AllConverted = false; #if SMP_DEBUG_FIXUP_IDB SMP_msg("FixCodeID failure at %x\n", CurrDisasmAddr); #endif } } } // end if (SMP_isCode(InstrFlags) ... else ... if (DisasmIndex < DisasmLocs.size()) { CurrDisasmAddr = DisasmLocs[DisasmIndex++]; } else { // cause loops to exit CurrDisasmAddr = CurrAddr; ++DisasmIndex; // skip cleanup loop } } while (CurrDisasmAddr < CurrAddr); if (AllConverted && AllNops) { // We want to convert the region back to unexplored bytes // and take it off the work list. Regions that are all nops // create data flow analysis problems sometimes. The region // is often unreachable code and produces a basic block with // no predecessors within a function. This often happens when // an optimizing compiler uses nops as padding to align jump // targets on cache line bounaries. With no fall through into // the nops, they are unreachable and should be left as unknown. #if SMP_DEBUG_FIXUP_IDB SMP_msg("FixCodeID nops region from %x to %x\n", CurrRegion.GetStart(), CurrRegion.GetEnd()); #endif SMP_del_items(CurrRegion.GetStart(), CurrRegion.GetEnd() - CurrRegion.GetStart(), 0); CodeReanalyzeList.pop_back(); } } // end if (SkipArea) ... else ... } // end if (addr < CurrDisasmAddr) .. else if ... else ... } // end while (DisasmIndex <= DisasmLocs.size() #if 0 // Make this code use FindDataToConvert() **!!** // Cleanup loop: // If there are still Disasm addrs to process, try to turn them // into code in the IDB. while (DisasmIndex <= DisasmLocs.size()) { flags_t InstrFlags = SMP_getFlags(CurrDisasmAddr); if (SMP_isCode(InstrFlags)) { SMP_msg("Sync problem in FixCodeID: %x\n", CurrDisasmAddr); } else { // Clear bytes to unexplored. segment_t *seg = SMP_getseg(CurrDisasmAddr); if (SEG_CODE == seg->type) { SMP_del_items(CurrDisasmAddr, seg->endEA - CurrDisasmAddr, 0); } else { // Might be safest to just discontinue processing // if we wander into a non-code segment. // DisasmLocs should not have an entire code segment // that IDA Pro missed. break; } int InstrLen = ua_code(CurrDisasmAddr); if (InstrLen > 0) { // Successfully converted to code SMPInstr NewInstr(CurrDisasmAddr); NewInstr.Analyze(); SMP_msg("FixCodeID success at %x: %s\n", CurrDisasmAddr, NewInstr.GetDisasm()); } else { SMP_msg("FixCodeID failure at %x\n", CurrDisasmAddr); } } // end if (SMP_isCode(InstrFlags) ... else ... if (DisasmIndex < DisasmLocs.size()) { CurrDisasmAddr = DisasmLocs[DisasmIndex++]; } else { ++DisasmIndex; // cause loop to exit } } // end while (DisasmIndex <= DisasmLocs.size() #endif return; } // end of FixCodeIdentification() // Analyze instructions that could not be analyzed earlier and were placed on the CodeReanalyzeList. // Earlier failures are usually because the instruction branches to an address that has not // yet been converted from data to code, so ua_code() failed. Now that all data to code // conversions have completed, ua_code() should succeed. // Return the number of instructions successfully analyzed. int FixupNewCodeChunks(void) { list<FixupRegion>::iterator CurrRegion; int changes = 0; for (CurrRegion = CodeReanalyzeList.begin(); CurrRegion != CodeReanalyzeList.end(); ++CurrRegion) { bool AllConverted = true; bool AllNops = true; bool NoFixups = (0 == CurrRegion->FixupInstrs.size()); if (NoFixups) { CurrRegion->SetStart(STARS_BADADDR); // mark for removal continue; // skip to next region } list<STARS_ea_t>::iterator CurrInstr; for (CurrInstr = CurrRegion->FixupInstrs.begin(); CurrInstr != CurrRegion->FixupInstrs.end(); ++CurrInstr) { int InstrLen = ::create_insn(*CurrInstr); if (InstrLen > 0) { // Successfully converted to code SMPInstr NewInstr(*CurrInstr); NewInstr.Analyze(); #if SMP_DEBUG_FIXUP_IDB SMP_msg("FixupNewCodeChunks success at %x: len: %d\n", *CurrInstr, InstrLen); #endif if (!NewInstr.IsNop()) { AllNops = false; *CurrInstr = STARS_BADADDR; // mark for removal } } else { AllConverted = false; #if SMP_DEBUG_FIXUP_IDB SMP_msg("FixupNewCodeChunks failure at %x\n", *CurrInstr); #endif } } // end for all instrs in CurrRegion if (AllConverted && !AllNops) { #if SMP_DEBUG_FIXUP_IDB SMP_msg("FixupNewCodeChunks success for region from %x to %x\n", CurrRegion->GetStart(), CurrRegion->GetEnd()); #endif CurrRegion->SetStart(STARS_BADADDR); // mark for removal } else if (AllConverted && AllNops) { #if SMP_DEBUG_FIXUP_IDB SMP_msg("FixupNewCodeChunks re-converting nops region from %x to %x\n", CurrRegion->GetStart(), CurrRegion->GetEnd()); #endif SMP_del_items(CurrRegion->GetStart(), CurrRegion->GetEnd() - CurrRegion->GetStart(), 0); CurrRegion->SetStart(STARS_BADADDR); // mark for removal } else { // Remove only the instructions that were fixed up. CurrInstr = CurrRegion->FixupInstrs.begin(); while (CurrInstr != CurrRegion->FixupInstrs.end()) { if (STARS_BADADDR == *CurrInstr) { CurrInstr = CurrRegion->FixupInstrs.erase(CurrInstr); } else { ++CurrInstr; } } } } // end for all regions in the CodeReanalyzeList // Remove completed regions from the CodeReanalyzeList CurrRegion = CodeReanalyzeList.begin(); while (CurrRegion != CodeReanalyzeList.end()) { if (STARS_BADADDR == CurrRegion->GetStart()) CurrRegion = CodeReanalyzeList.erase(CurrRegion); else ++CurrRegion; } #if 0 if (AllConverted) { if (IsFunctionPrologue(AreaStart, AreaEnd)) { // Create a new function entry chunk here. // **!!** TODO ; } else { // Extend the previous chunk to include the // converted code. STARS_ea_t PrevIDAAddr = IDAProLocs[IDAProIndex - 2]; STARS_Function_t *PrevChunk = get_fchunk(PrevIDAAddr); #if SMP_DEBUG_FIXUP_IDB SMP_msg(" addr in chunk to extend: %x\n", PrevIDAAddr); SMP_msg(" STARS_Function_t pointer for chunk: %x\n", PrevChunk); #endif #if 0 // temporary for debugging if (is_func_entry(PrevChunk)) { // Extend the func entry to contain the new code. if (func_setend(PrevIDAAddr, AreaEnd)) { SMP_msg("Func extended to include code from %x to %x\n", AreaStart, AreaEnd); FuncReanalyzeList.push_back(PrevIDAAddr); } else { SMP_msg("Failed to extend func from %x to %x\n", AreaStart, AreaEnd); } } else { // tail // See if this works for function tails, also. // Extend the func entry to contain the new code. if (func_setend(PrevIDAAddr, AreaEnd)) { SMP_msg("Tail extended to include code from %x to %x\n", AreaStart, AreaEnd); STARS_Function_t *TailOwner = get_func(PrevChunk->owner); FuncReanalyzeList.push_back(PrevIDAAddr); } else { SMP_msg("Failed to extend tail from %x to %x\n", AreaStart, AreaEnd); } } // end if (is_func_entry()) ... else ... #endif } // end if (IsFunctionPrologue()) ... else ... } // end if (AllConverted) else { SMP_msg("not AllConverted; cannot include new code in previous chunk.\n"); } #endif return changes; } // end of FixupNewCodeChunks() // Audit the IDA code database by looking at all instructions in the // code segment and printing all those that are not contained in a // function. Emit the context-free annotations that we are able to // emit on a per-instruction basis. void FindOrphanedCode(STARS_Segment_t *CurrSeg, FILE *AnnotFile, FILE *InfoAnnotFile) { char disasm[MAXSTR]; qstring *Qdisasm; bool PrefetchInstSeenLast = false; // inst before current inst was a prefetch bool UndefinedOpcodeSeenLast = false; // inst before current inst was an undefined opcode for (STARS_ea_t addr = CurrSeg->get_startEA(); addr < CurrSeg->get_endEA(); addr = SMP_get_item_end(addr)) { flags_t InstrFlags = SMP_getFlags(addr); if (SMP_isTail(InstrFlags)) continue; if (SMP_isHead(InstrFlags) && SMP_isCode(InstrFlags)) { STARS_ea_t FirstFuncAddr; if (!(CurrProg->IsInstAddrStillInFunction(addr, FirstFuncAddr))) { SMPInstr CurrInst(addr); CurrInst.Analyze(); #if SMP_DEBUG_FIXUP_IDB SMP_msg("Orphan code at %x : %s\n", addr, CurrInst.GetDisasm()); #endif // TODO: If there are code xrefs to the orphan code, // see what kind. If a CALL, and orphan code looks // like a prologue, make a function. If a JUMP of // some kind, then make a function chunk and make // it a tail of all functions that jump to it. **!!** // Do machine-dependent fixes for DEF and USE lists. // The fixes can help produce better annotations. CurrInst.MDFixupDefUseLists(); // If instruction is still not included in a code chunk, // emit annotations for it in isolation. if (CurrInst.IsAnalyzeable()) { CurrInst.EmitAnnotations(true, false, true, AnnotFile, InfoAnnotFile, CurrProg); // If instruction is an indirect branch, emit an XREF // annotation for each of its targets. SMPitype CurrDataFlow = CurrInst.GetDataFlowType(); bool IndirCallFlag = (CurrDataFlow == INDIR_CALL); if ((CurrDataFlow == INDIR_JUMP) || IndirCallFlag) { global_STARS_program->PrintAllCodeToCodeXrefs(addr, CurrInst.GetSize(), IndirCallFlag, false); } // If the address of this instruction appeared in data, emit an IBT annotation. if (global_STARS_program->IsCodeAddressTaken(addr)) { global_STARS_program->PrintUnknownCodeXref(addr, CurrInst.GetSize(), ZST_CODEADDRESSTAKEN); } } // end if inst is Analyzeable // Emit IBT annotations for instructions that fit computed-goto patterns in libc/glibc, such // as prefetch instructions and the instructions that follow them (computed goto often chooses // between going to the prefetch or jumping just past it, and IDA Pro cannot analyze these libc // macro-generated computed gotos even if they are not orphaned code). Likewise, an undefined opcode // often separates an indirect jump and its first target, so inst after undefined opcode is IBT. bool EmitIBTAnnotation = (PrefetchInstSeenLast || UndefinedOpcodeSeenLast); if (CurrInst.MDIsPrefetchOpcode()) { PrefetchInstSeenLast = true; UndefinedOpcodeSeenLast = false; EmitIBTAnnotation = true; } else if (CurrInst.MDIsUndefinedOpcode()) { UndefinedOpcodeSeenLast = true; PrefetchInstSeenLast = false; } else { PrefetchInstSeenLast = false; UndefinedOpcodeSeenLast = false; } if (EmitIBTAnnotation) { global_STARS_program->PrintUnknownCodeXref(addr, CurrInst.GetSize(), ZST_COMPUTEDGOTO); } // mark addresses that are taken via the instruction global_STARS_program->PrintAllAddressTakenXrefs(addr, CurrInst.GetSize()); } } else if (SMP_isUnknown(InstrFlags)) { #if SMP_DEBUG_FIXUP_IDB SMP_msg("Unanalyzed byte at %x\n", addr); #endif // Can IDA analyze this to be code? int InstrLen; InstrLen = create_insn(addr); if (InstrLen > 0) { bool IDAsuccess = SMP_generate_disasm_line(addr, disasm, sizeof(disasm) - 1); if (IDAsuccess) { // Remove interactive color-coding tags. STARS_ssize_t StringLen = SMP_tag_remove(disasm, disasm, sizeof(disasm) - 1); if (-1 >= StringLen) { SMP_msg("ERROR: tag_remove failed at addr %lx \n", (unsigned long) addr); } else { #if SMP_DEBUG_FIXUP_IDB SMP_msg("Successfully analyzed! %s\n", disasm); #endif SMPInstr UnknownInstr(addr); UnknownInstr.Analyze(); // TODO: Get new code into a chunk. **!!** // If instruction is still not included in a code chunk, // emit annotations for it in isolation. UnknownInstr.EmitAnnotations(true, false, true, AnnotFile, InfoAnnotFile, CurrProg); } } else { SMP_msg("ERROR: generate_disasm_line failed at addr %lx \n", (unsigned long) addr); } } } } // end for (ea_t addr = CurrSeg->startEA; ...) } // end of FindOrphanedCode() // Find calls and jumps from orphaned code to functions. Mark those functions // so that we will know that the program CFG is not complete. void FindLinksFromOrphanedCode(STARS_Segment_t *CurrSeg) { char disasm[MAXSTR]; for (STARS_ea_t addr = CurrSeg->get_startEA(); addr < CurrSeg->get_endEA(); addr = SMP_get_item_end(addr)) { flags_t InstrFlags = SMP_getFlags(addr); if (SMP_isHead(InstrFlags) && SMP_isCode(InstrFlags)) { // Does IDA Pro think addr is inside a function? func_t* CurrIDAFunc = ::get_func(addr); if (nullptr == CurrIDAFunc) { SMPInstr CurrInst(addr); CurrInst.Analyze(); if (CurrInst.IsAnalyzeable()) { // If instruction is control flow, see if it reaches an addr in a function. SMPitype CurrDataFlow = CurrInst.GetDataFlowType(); if (IsBranchOrCall(CurrDataFlow)) { STARS_ea_t TargetAddr = CurrInst.GetCallTarget(); if (STARS_BADADDR == TargetAddr) { TargetAddr = CurrInst.GetJumpTarget(); } if (STARS_BADADDR != TargetAddr) { // See if TargetAddr is inside a function. func_t* TargetFunc = ::get_func(TargetAddr); if (nullptr != TargetFunc) { #if (IDA_SDK_VERSION < 700) STARS_ea_t FirstAddrInFunc = TargetFunc->startEA; #else STARS_ea_t FirstAddrInFunc = TargetFunc->start_ea; #endif if (FirstAddrInFunc != TargetAddr) { SMP_msg("WARNING: Orphaned code at %llx calls %llx which is inside func starting at %llx\n", (unsigned long long) addr, (unsigned long long) TargetAddr, (unsigned long long) FirstAddrInFunc); } CurrProg->SetFuncAddrCalledFromOrphanedCode(FirstAddrInFunc); SMP_msg("INFO: Orphaned code at %llx calls func at %llx\n", (unsigned long long) addr, (unsigned long long) FirstAddrInFunc); } } } } } // end if (nullptr == CurrFunc) } // end if addr is code and isHead } // end for (ea_t addr = CurrSeg->startEA; ...) return; } // end of FindLinksFromOrphanedCode() // Version of FindOrphanedCode that does not emit annotations but can be used // to determine at what point in time code becomes orphaned. void Debug_FindOrphanedCode(STARS_Segment_t *CurrSeg, bool FirstRun) { STARS_ea_t DebugAddr = 0x8050db0; for (STARS_ea_t addr = CurrSeg->get_startEA(); addr < CurrSeg->get_endEA(); addr = SMP_get_item_end(addr)) { flags_t InstrFlags = SMP_getFlags(addr); if (SMP_isHead(InstrFlags) && SMP_isCode(InstrFlags)) { STARS_Function_t *CurrFunc = SMP_get_func(addr); if (nullptr == CurrFunc) { // Code not in a func; orphaned pair<set<STARS_ea_t>::iterator, bool> pairib; pairib = CodeOrphans.insert(addr); if (DebugAddr == addr) { SMP_msg("DEBUG: Orphaned code addr %lx found.\n", (unsigned long) addr); } if ((!FirstRun) && (pairib.second)) { SMP_msg("SERIOUS WARNING: Newly orphaned code at %lx \n", (unsigned long) addr); } } } } // end for (STARS_ea_t addr = CurrSeg->startEA; ...) } // end of Debug_FindOrphanedCode() // Audit the IDA database with respect to branches and calls. They should // each have valid code targets (not data or unknown bytes) and the code // cross references should reflect the linkage. void AuditCodeTargets(void) { SMP_AuditCodeTargets(); } // end of AuditCodeTargets() void SpecialDebugOutput(void) { char disasm[MAXSTR]; vector<STARS_ea_t> ProblemAddrs; ProblemAddrs.push_back(0x8066d08); bool IDAsuccess; int InstLen; STARS_ssize_t StringLen; insn_t LocalCmd; uint32 LocalFeatures; for (size_t index = 0; index < ProblemAddrs.size(); ++index) { STARS_ea_t addr = ProblemAddrs[index]; flags_t InstrFlags = SMP_getFlags(addr); if (SMP_isCode(InstrFlags) && SMP_isHead(InstrFlags)) { SMPInstr TempInst(addr); IDAsuccess = TempInst.FillCmd(); // Emits ERROR messages if there are failures } } return; } // end of SpecialDebugOutput()