diff --git a/SMPStaticAnalyzer.cpp b/SMPStaticAnalyzer.cpp index 28c3f82797ce23f98e944afbb08827cbb3f66ad0..e1f632317e2dfadb2015c13f90b46cfcd2f6e045 100644 --- a/SMPStaticAnalyzer.cpp +++ b/SMPStaticAnalyzer.cpp @@ -27,6 +27,7 @@ #define SMP_DEBUG3 0 // verbose #define SMP_DEBUG_MEM 0 // print memory operands #define SMP_DEBUG_TYPE0 0 // Output instr info for OptType = 0 +#define SMP_DEBUG_ORPHANS 1 // find code outside of functions // Set to 1 when doing a binary search using SMP_DEBUG_COUNT to find // which function is causing a problem. @@ -60,11 +61,11 @@ static char *DataTypes[] = { "VOID", "NUMHEX", "NUMDEC", "CHAR", "STRUCTOFFSET", "STACKVAR", "NUMFLOAT", "UNKNOWN", "UNKNOWN", "UNKNOWN", 0}; - void IDAP_run(int); - - - +void FindOrphanedCode(segment_t *, FILE *); +void AuditCodeTargets(void); +ea_t FindNewFuncLimit(ea_t); +void SpecialDebugOutput(void); static int idaapi idp_callback(void *, int event_id, va_list va) { if (event_id == ph.auto_empty_finally) { // IDA analysis is done @@ -128,12 +129,19 @@ void IDAP_run(int arg) { (void) memset(OptCount, 0, sizeof(OptCount)); (void) memset(AnnotationCount, 0, sizeof(AnnotationCount)); + // Pre-audit the IDA database by seeing if all branches and calls + // have proper code targets and code cross references. + SpecialDebugOutput(); + AuditCodeTargets(); + // First, examine the data segments and print info about static // data, such as name/address/size. Do the same for functions in // code segments. // Loop through all segments. for (int SegIndex = 0; SegIndex < get_segm_qty(); ++SegIndex) { + char SegName[MAXSTR]; seg = getnseg(SegIndex); + ssize_t SegNameSize = get_segm_name(seg, SegName, sizeof(SegName) - 1); // We are only interested in the data segments of type // SEG_DATA, SEG_BSS and SEG_COMM. @@ -143,7 +151,11 @@ void IDAP_run(int arg) { // examining all data objects (effective addresses). ReadOnlyFlag = ((seg->perm & SEGPERM_READ) && (!(seg->perm & SEGPERM_WRITE))); #if SMP_DEBUG - msg("Starting data segment of type %d\n", seg->type); + msg("Starting data segment of type %d", seg->type); + if (SegNameSize > 0) + msg(" SegName: %s\n", SegName); + else + msg("\n"); if (ReadOnlyFlag) { msg("Read-only data segment.\n"); } @@ -185,7 +197,11 @@ void IDAP_run(int arg) { } // end if (seg->type == SEG_DATA ...) else if (seg->type == SEG_CODE) { #if SMP_DEBUG - msg("Starting code segment.\n"); + msg("Starting code segment"); + if (SegNameSize > 0) + msg(" SegName: %s\n", SegName); + else + msg("\n"); #endif #if SMP_DEBUG2 if (!FuncsDumped) { @@ -275,10 +291,14 @@ void IDAP_run(int arg) { delete CurrFunc; CurrFunc = NULL; } // end for (size_t FuncIndex = 0; ...) +#if SMP_DEBUG_ORPHANS + FindOrphanedCode(seg, SymsFile); +#endif } // end else if (seg->type === SEG_CODE) else { #if SMP_DEBUG - msg("Not processing segment of type %d \n", seg->type); + msg("Not processing segment of type %d SegName: %s\n", + seg->type, SegName); #endif } } // end for (int SegIndex = 0; ... ) @@ -310,6 +330,193 @@ plugin_t PLUGIN = { }; +// Audit the IDA code database by looking at all instructions in the +// code segment and printing all those that are not contained in a +// function. Emit the context-free annotations that we are able to +// emit ona per-instruction basis. +void FindOrphanedCode(segment_t *CurrSeg, FILE *AnnotFile) { + char disasm[MAXSTR]; + for (ea_t addr = CurrSeg->startEA; addr < CurrSeg->endEA; + addr = get_item_end(addr)) { + flags_t InstrFlags = getFlags(addr); + if (isHead(InstrFlags) && isCode(InstrFlags)) { + func_t *CurrFunc = get_func(addr); + if (NULL == CurrFunc) { + SMPInstr CurrInst(addr); + CurrInst.Analyze(); + msg("Orphan code at %x : %s\n", addr, CurrInst.GetDisasm()); + // TODO: If there are code xrefs to the orphan code, + // see what kind. If a CALL, and orphan code looks + // like a prologue, make a function. If a JUMP of + // some kind, then make a function chunk and make + // it a tail of all functions that jump to it. **!!** + + // If instruction is still not included in a code chunk, + // emit annotations for it in isolation. + CurrInst.EmitAnnotations(true, false, AnnotFile); + } + } + else if (isUnknown(InstrFlags)) { + msg("Unanalyzed byte at %x\n", addr); + // Can IDA analyze this to be code? + int InstrLen = ua_code(addr); + if (InstrLen > 0) { + (void) generate_disasm_line(addr, disasm, sizeof(disasm) - 1); + // Remove interactive color-coding tags. + tag_remove(disasm, disasm, 0); + msg("Successfully analyzed! %s\n", disasm); + // TODO: Get new code into a chunk. **!!** + } + } + } // end for (ea_t addr = CurrSeg->startEA; ...) +} // end of FindOrphanedCode() + +// Audit the IDA database with respect to branches and calls. They should +// each have valid code targets (not data or unknown bytes) and the code +// cross references should reflect the linkage. +void AuditCodeTargets(void) { + // Cover all the code that IDA has grouped into functions by iterating + // through all function chunks in the program. + for (size_t ChunkIndex = 0; ChunkIndex < get_fchunk_qty(); ++ChunkIndex) { + func_t *ChunkInfo = getn_fchunk((int) ChunkIndex); + char FuncName[MAXSTR]; + get_func_name(ChunkInfo->startEA, FuncName, sizeof(FuncName) - 1); + + // First, see if any calls to this function (if this chunk is + // an entry point) are not coming from within functions. + if (is_func_entry(ChunkInfo)) { + xrefblk_t xb; + ea_t addr = ChunkInfo->startEA; + for (bool ok = xb.first_to(addr, XREF_ALL); ok; ok = xb.next_to()) { + uchar XrefType = xb.type & XREF_MASK; + if (xb.iscode) { + if ((XrefType == fl_U) || (XrefType == fl_USobsolete)) { + msg("Bad xref type: %x %s\n", addr, FuncName); + } + else if ((XrefType == fl_JF) || (XrefType == fl_JN)) { + msg("Jump to func: %x %s from: %x\n", + addr, FuncName, xb.from); + } + else if (XrefType == fl_F) { + msg("Fall through to func: %x %s from: %x\n", + addr, FuncName, xb.from); + } + else if ((XrefType == fl_CF) || (XrefType == fl_CN)) { + // Far call or Near call + func_t *CallingFunc = get_func(xb.from); + if (NULL == CallingFunc) { + msg("Call to %x Func %s from %x not in function.\n", + addr, FuncName, xb.from); + } + } + } // end if (xb.iscode) + else { // DATA xref + if (XrefType == dr_O) { + msg("Data xref to %x Func %s from %x\n", + addr, FuncName, xb.from); + } + else { + msg("Strange data xref %d to %x Func %s from %x\n", + XrefType, addr, FuncName, xb.from); + } + } + } // end for (bool ok = xb.first_to(); ...) + } // end if (is_func_entry(ChunkInfo)) + + // Next, see if any call or branch in this chunk references + // a target address that is not in a function. If so, and the + // callee address code looks like a function prologue, then + // create a function for the contiguous code starting at that + // address and ask IDA to analyze it and store it in the + // IDA database. If it is a branch target, not a call target, + // create a new TAIL chunk for the current parent functions. + for (ea_t addr = ChunkInfo->startEA; addr < ChunkInfo->endEA; + addr = get_item_end(addr)) { + flags_t InstrFlags = getFlags(addr); + if (isCode(InstrFlags) && isHead(InstrFlags)) { + SMPInstr CurrInst(addr); + CurrInst.Analyze(); + if ((CALL|JUMP|COND_BRANCH) & CurrInst.GetDataFlowType()) { + xrefblk_t xb; + for (bool ok = xb.first_from(addr, XREF_FAR); ok; ok = xb.next_from()) { + if (xb.iscode) { + func_t *FuncInfo = get_func(xb.to); + if (NULL == FuncInfo) { + // Found call to addr that is not in a func. + ea_t FirstAddr = xb.to; + // Find limits of contiguous code starting at FirstAddr. + ea_t LastAddr = FindNewFuncLimit(xb.to); + if (CALL == CurrInst.GetDataFlowType()) + msg("Found new func from %x to %x\n", + FirstAddr, LastAddr); + else + msg("Found new chunk from %x to %x\n", + FirstAddr, LastAddr); + } + } + } + } + } + } + } // end for (size_t ChunkIndex = 0; ... ) + + return; +} // end of AuditCodeTargets() + +// Find the span of contiguous code that is not contained within any +// function, starting at StartAddr, which should already be an example +// of an instruction address that is outside of a function. +ea_t FindNewFuncLimit(ea_t StartAddr) { + ea_t LimitAddr = StartAddr; + segment_t *seg = getseg(StartAddr); + if (NULL == seg) + return LimitAddr; + ea_t SegLimit = seg->endEA; + + for (ea_t addr = get_item_end(StartAddr); addr < SegLimit; addr = get_item_end(addr)) { + flags_t InstrFlags = getFlags(addr); + if (isCode(InstrFlags) && isHead(InstrFlags)) { + LimitAddr = addr; + func_t *FuncInfo = get_func(addr); + if (NULL != FuncInfo) + break; // ran into an existing function + } + else // Not a code head; time to stop. + break; + } + return LimitAddr; +} // end of FindNewFuncLimit() + +void SpecialDebugOutput(void) { + char disasm[MAXSTR]; + vector<ea_t> ProblemAddrs; + ProblemAddrs.push_back(0x8048175); + ProblemAddrs.push_back(0x80481c6); + ProblemAddrs.push_back(0x80481e4); + ProblemAddrs.push_back(0x804847A); + ProblemAddrs.push_back(0x080486A8); + ProblemAddrs.push_back(0x08048A2A); + ProblemAddrs.push_back(0x08048CE1); + ProblemAddrs.push_back(0x08048DC0); + ProblemAddrs.push_back(0x08048E90); + ProblemAddrs.push_back(0x08049087); + ProblemAddrs.push_back(0x08049148); + ProblemAddrs.push_back(0x08049231); + + for (size_t index = 0; index < ProblemAddrs.size(); ++index) { + ea_t addr = ProblemAddrs[index]; + flags_t InstrFlags = getFlags(addr); + if (isCode(InstrFlags) && isHead(InstrFlags)) { + ua_ana0(addr); + generate_disasm_line(addr, disasm, sizeof(disasm) - 1); + tag_remove(disasm, disasm, 0); + msg("Problem addr %x : %s\n", addr, disasm); + } + } + return; +} // end of SpecialDebugOutput() + + // Initialize the OptCategory[] array to define how we emit // optimizing annotations.