#include <list> #include <cstdint> #include <interfaces/idapro/all.h> // #include "interfaces/STARSTypes.h" #include "interfaces/SMPDBInterface.h" #include "interfaces/STARSIDATypes.h" #include "interfaces/abstract/all.h" #include "interfaces/idapro/all.h" #include "base/SMPDataFlowAnalysis.h" #include "base/SMPInstr.h" #include "base/SMPBasicBlock.h" #include "base/SMPFunction.h" #include "base/SMPProgram.h" #if (IDA_SDK_VERSION < 700) #include <area.hpp> #else #include <range.hpp> #endif #include <funcs.hpp> #include <struct.hpp> #include <xref.hpp> #pragma GCC diagnostic ignored "-Wstrict-aliasing" #include <tryblks.hpp> #pragma GCC diagnostic pop using namespace std; #define STARS_DEBUG_MEMORY_CORRUPTION 0 // leave this on; serious errors reported #define SMP_DEBUG_BUILD_RTL 1 // Expand chunks to include interleaved regions of insts not in any other function #define STARS_INTEGRATE_ORPHANED_REGIONS 1 // Used for binary search by function number in SMPStaticAnalyzer.cpp // to trigger debugging output and find which instruction in which // function is causing a crash. bool SMPBinaryDebug = false; bool STARS_IDA_Function_t::IsChunkUnshared(STARS_ea_t ChunkAddr, STARS_ea_t FuncHeadStart, STARS_ea_t FuncHeadEnd) { bool Unshared = true; SMP_xref_t CurrXrefs; // See if any xref reaches this ChunkAddr from outside the FuncHead range. for (bool ok = CurrXrefs.SMP_first_to(ChunkAddr, XREF_ALL); ok; ok = CurrXrefs.SMP_next_to()) { STARS_ea_t FromAddr = CurrXrefs.GetFrom(); if ((FromAddr != 0) && (CurrXrefs.GetIscode())) { // We found a code xref that comes to the ChunkAddr. Whether it is a fall-through or // a jump/call, it is unshared only if it comes from within the FuncHead address range. if ((FromAddr < FuncHeadStart) || (FromAddr >= FuncHeadEnd)) { Unshared = false; break; } } } return Unshared; } // end of STARS_IDA_Function_t::IsChunkUnshared() // Is InstID in this function? For IDA Pro, InstID is a code address. bool STARS_IDA_Function_t::IsInstIDInFunc(STARS_ea_t InstID) { bool HasChunks = (this->HasSharedChunks() || this->UnsharedChunks); bool Found = false; if (HasChunks) { func_tail_iterator_t FuncTail(this->the_func); for (bool ChunkOK = FuncTail.main(); ChunkOK; ChunkOK = FuncTail.next()) { const STARS_area_t &CurrChunk = FuncTail.chunk(); STARS_ea_t CurrChunkStartAddr; STARS_ea_t CurrChunkLastAddr; #if (IDA_SDK_VERSION < 700) CurrChunkLastAddr = CurrChunk.endEA; #else CurrChunkLastAddr = CurrChunk.end_ea; #endif #if (IDA_SDK_VERSION < 700) CurrChunkStartAddr = CurrChunk.startEA; #else CurrChunkStartAddr = CurrChunk.start_ea; #endif if ((InstID >= CurrChunkStartAddr) && (InstID < CurrChunkLastAddr)) { Found = true; break; } } // end for all chunks in function } else { Found = ((InstID >= this->get_startEA()) && (InstID < this->get_endEA())); } return Found; } // end of STARS_IDA_Function_t::IsInstIDInFunc() void STARS_IDA_Function_t::MarkSharedChunks(void) { char name[STARS_MAXSTR]; STARS_ea_t FirstEA = this->get_startEA(); (void) this->GetFunctionName(name, sizeof(name)); // Determine if we are dealing with shared chunks. STARS_ea_t FuncHeadLastAddr = 0; std::size_t ChunkCounter = 0; // func_tail_iterator_t FuncTail((func_t*)*dynamic_cast<STARS_IDA_Function_t*>(this->GetFuncInfo())); func_tail_iterator_t FuncTail(this->the_func); // (func_t*)*(dynamic_cast<STARS_IDA_Function_t*>(this->GetFuncInfo()))); for (bool ChunkOK = FuncTail.main(); ChunkOK; ChunkOK = FuncTail.next()) { const STARS_area_t &CurrChunk = FuncTail.chunk(); ++ChunkCounter; if (1 == ChunkCounter) { // head chunk #if (IDA_SDK_VERSION < 700) FuncHeadLastAddr = CurrChunk.endEA; #else FuncHeadLastAddr = CurrChunk.end_ea; #endif } else { // a tail chunk #if (IDA_SDK_VERSION < 700) STARS_ea_t CurrChunkStartAddr = CurrChunk.startEA; #else STARS_ea_t CurrChunkStartAddr = CurrChunk.start_ea; #endif if (this->IsChunkUnshared(CurrChunkStartAddr, FirstEA, FuncHeadLastAddr)) { this->UnsharedChunks = true; // SMP_msg("INFO: Interface Found unshared tail chunk for %s at %lx\n", // name, (unsigned long) CurrChunkStartAddr); } else { this->SharedChunks = true; // SMP_msg("INFO: Interface Found tail chunk for %s at %lx\n", // name, (unsigned long) CurrChunkStartAddr); } } } #if STARS_INTEGRATE_ORPHANED_REGIONS if (1 < ChunkCounter) { // search for interleaved orphaned instructions and integrate them list<pair<STARS_ea_t, STARS_ea_t> > ExpansionWorkList; // expand after iterating through chunks; // IDA Pro seems to have interferences via global variables STARS_Segment_t *CurrSeg = global_stars_interface->getseg(FirstEA); assert(NULL != CurrSeg); STARS_ea_t SegAddrLimit = CurrSeg->get_endEA(); bool ExpansionDebugFlag = (FirstEA == 0x49cf0); if (ExpansionDebugFlag) { for (bool ChunkOK = FuncTail.main(); ChunkOK; ChunkOK = FuncTail.next()) { const STARS_area_t &CurrChunk = FuncTail.chunk(); #if (IDA_SDK_VERSION < 700) STARS_ea_t CurrChunkStartAddr = CurrChunk.startEA; STARS_ea_t CurrChunkEndAddr = CurrChunk.endEA; #else STARS_ea_t CurrChunkStartAddr = CurrChunk.start_ea; STARS_ea_t CurrChunkEndAddr = CurrChunk.end_ea; #endif SMP_msg("DEBUG: Chunk from %llx to %llx\n", (uint64_t) CurrChunkStartAddr, (uint64_t) CurrChunkEndAddr); } } for (bool ChunkOK = FuncTail.main(); ChunkOK; ChunkOK = FuncTail.next()) { const STARS_area_t &CurrChunk = FuncTail.chunk(); // Go through addresses after this chunk and find the extent of instructions // that are orphans (not part of any function) and expand the current chunk to // include the orphans. STARS_ea_t UpperLimit = 0; STARS_ea_t ChunkStartAddr = GetChunkStart(CurrChunk); STARS_ea_t ChunkEndAddr = GetChunkEnd(CurrChunk); for (STARS_ea_t addr = ChunkEndAddr; addr < SegAddrLimit; addr = SMP_get_item_end(addr)) { flags_t InstrFlags = SMP_getFlags(addr); if (!SMP_isHead(InstrFlags)) { continue; } if (SMP_isCode(InstrFlags)) { func_t *NextFunc = ::get_func(addr); if (NULL == NextFunc) { // inst is not in any func UpperLimit = SMP_get_item_end(addr); } else { if (ExpansionDebugFlag) { #if (IDA_SDK_VERSION < 700) if (NextFunc->startEA == FirstEA) { #else if (NextFunc->start_ea == FirstEA) { #endif SMP_msg("DEBUG: Chunk expansion terminated at %llx by code from same function.\n", (uint64_t) addr); } else { SMP_msg("DEBUG: Chunk expansion terminated at %llx by code from different function.\n", (uint64_t) addr); } } break; } } } // for all addrs after CurrChunk if (UpperLimit > 0) { // need to expand earlier chunk pair<STARS_ea_t, STARS_ea_t> WorkListItem(ChunkStartAddr, UpperLimit); ExpansionWorkList.push_back(WorkListItem); } else if (ExpansionDebugFlag) { SMP_msg("DEBUG: Chunk from %llx to %llx not Expanded\n", (uint64_t) ChunkStartAddr, (uint64_t) ChunkEndAddr); } } // end for all chunks for (list<pair<STARS_ea_t, STARS_ea_t> >::iterator WorkIter = ExpansionWorkList.begin(); WorkIter != ExpansionWorkList.end(); ++WorkIter) { STARS_ea_t StartAddr = (*WorkIter).first; STARS_ea_t LimitAddr = (*WorkIter).second; #if (IDA_SDK_VERSION < 700) bool success = ::func_setend(StartAddr, LimitAddr); #else bool success = ::set_func_end(StartAddr, LimitAddr); #endif if (success) { SMP_msg("INFO: Expanded chunk limit at %llx to new limit at %llx to include orphan code.\n", (unsigned long long) StartAddr, (unsigned long long) LimitAddr); } else { SMP_msg("ERROR: Failed to expand chunk limit at %llx to new limit at %llx to include orphan code.\n", (unsigned long long) StartAddr, (unsigned long long) LimitAddr); } } // end for ExpansionWorkList items ExpansionWorkList.clear(); } // end if (1 < ChunkCounter) #endif // STARS_INTEGRATE_ORPHANED_REGIONS return; } // end of STARS_IDA_Function_t::MarkSharedChunks() bool STARS_IDA_Function_t::IsMultiEntry(bool HasIndirectJumps) { STARS_ea_t FirstEA = this->get_startEA(); func_tail_iterator_t FuncTail(the_func); // (func_t*)*(dynamic_cast<STARS_IDA_Function_t*>(this->GetFuncInfo()))); size_t CallTargetCount = 0; // how many addresses in this function are called? size_t AddressTakenCount = 0; // how many instructions in this function have their address taken? bool HasChunks = (this->HasSharedChunks() || this->UnsharedChunks); for (bool ChunkOK = FuncTail.main(); ChunkOK; ChunkOK = FuncTail.next()) { const STARS_area_t &CurrChunk = FuncTail.chunk(); // Start with just the beginning addrs of chunks, as IDA will break into chunks when calls target an inst. (???) STARS_ea_t addr = GetChunkStart(CurrChunk); if (addr != FirstEA) { // Determine whether the instruction is a call target by looking // at its cross references and seeing if it has "TO" code xrefs. SMP_xref_t xrefs; for (bool ok = xrefs.SMP_first_to(addr, XREF_FAR); ok; ok = xrefs.SMP_next_to()) { STARS_ea_t DistantAddr = xrefs.GetFrom(); if ((DistantAddr != 0) && (xrefs.GetIscode())) { // Now we see if the distant instruction is in another function. STARS_Function_t *SourceFunc = SMP_get_func(DistantAddr); if (NULL != SourceFunc) { if (HasChunks && (SourceFunc->get_startEA() != FirstEA)) { ++CallTargetCount; SMP_msg("INFO: MultiEntry due to call from %llx to %llx FirstEA: %llx\n", (unsigned long long) DistantAddr, (unsigned long long) addr, (unsigned long long) FirstEA); break; } } } } // end for all XREF_FAR xrefs } #if 0 // Code below does not work, because the dr_O xref type, "data reference : offset," // is used for loads of data addresses, e.g. in frame_dummy(): // mov dword ptr [esp], offset __JCR_LIST__ // We have to go through read-only data segments and look for code addresses. if (!HasIndirectJumps) { // switch tables produce false positives; get more precise later. do { if (FirstEA != addr) { for (bool ok = xrefs.SMP_first_from(addr, XREF_DATA); ok; ok = xrefs.SMP_next_from()) { STARS_ea_t DistantAddr = xrefs.GetTo(); if ((DistantAddr != 0) && (xrefs.GetType() == dr_O)) { ++AddressTakenCount; SMP_msg("INFO: MultiEntry: Address of inst at %llx taken in data at %llx.\n", (unsigned long long) addr, (unsigned long long) DistantAddr); break; } } // end for all XREF_FAR xrefs } addr = SMP_get_item_end(addr); } while ((addr != CurrChunk.endEA) && (0 == AddressTakenCount)); } #endif } // end for all chunks // A single-entry function has the entry point code-xrefed, and no addresses taken other than perhaps the entry point, which we skipped bool MultiEntryFound = ((CallTargetCount > 0) || (AddressTakenCount > 0)); return MultiEntryFound; } // end of STARS_IDA_Function_t::IsMultiEntry() void STARS_IDA_Function_t::UpdateXrefs() { size_t ChunkCounter = 0; func_tail_iterator_t FuncTail(the_func); enum cref_t NearJump = (cref_t)(fl_JN | XREF_USER); enum cref_t FarJump = (cref_t)(fl_JF | XREF_USER); for (bool ChunkOK = FuncTail.main(); ChunkOK; ChunkOK = FuncTail.next()) { #if (IDA_SDK_VERSION < 700) const area_t &CurrChunk = FuncTail.chunk(); #else const range_t &CurrChunk = FuncTail.chunk(); #endif // Find the instructions for each chunk, audit the xrefs. STARS_ea_t ChunkStartAddr = GetChunkStart(CurrChunk); STARS_ea_t ChunkEndAddr = GetChunkEnd(CurrChunk); for (STARS_ea_t addr = ChunkStartAddr; addr < ChunkEndAddr; addr = SMP_get_item_end(addr)) { flags_t InstrFlags = SMP_getFlags(addr); if (SMP_isHead(InstrFlags) && SMP_isCode(InstrFlags)) { // Fill cmd structure with disassembly of instr SMPInstr TempInst(addr); if (!TempInst.FillCmd()) { SMP_msg("ERROR: SMPGetCmd failed from MDAuditJumpXrefs at %llx\n", (uint64_t) addr); } else { // Determine whether the instruction is a jump target by looking // at its cross references and seeing if it has "TO" code xrefs. SMP_xref_t xrefs, Distant_xrefs; for (bool ok = xrefs.SMP_first_to(addr, XREF_FAR); ok; ok = xrefs.SMP_next_to()) { STARS_ea_t DistantAddr = xrefs.GetFrom(); if ((DistantAddr != 0) && (xrefs.GetIscode())) { // Now we see if the distant instruction has an xref to this instruction. bool FoundDistantXref = false; for (bool ok2 = Distant_xrefs.SMP_first_from(DistantAddr, XREF_FAR); ok2; ok2 = Distant_xrefs.SMP_next_from()) { STARS_ea_t TargetAddr = Distant_xrefs.GetTo(); if (TargetAddr == addr) { FoundDistantXref = true; break; } } if (!FoundDistantXref) { SMP_msg("WARNING: Missing code Xref from %lx to %lx\n", (unsigned long) DistantAddr, (unsigned long) addr); long SignedAddrDiff = (long) (DistantAddr - addr); if ((SignedAddrDiff < -128) || (SignedAddrDiff > 127)) { add_cref(DistantAddr, addr, FarJump); } else { add_cref(DistantAddr, addr, NearJump); } } } } // end for all "to" xrefs // Now check the "from" xrefs to see if the target inst has the corresponding "to" xref. for (bool ok = xrefs.SMP_first_from(addr, XREF_FAR); ok; ok = xrefs.SMP_next_from()) { STARS_ea_t DistantAddr = xrefs.GetTo(); if ((DistantAddr != 0) && (xrefs.GetIscode())) { // Now we see if the distant instruction has an xref to this instruction. bool FoundDistantXref = false; for (bool ok2 = Distant_xrefs.SMP_first_to(DistantAddr, XREF_FAR); ok2; ok2 = Distant_xrefs.SMP_next_to()) { STARS_ea_t SourceAddr = Distant_xrefs.GetFrom(); if (SourceAddr == addr) { FoundDistantXref = true; break; } } if (!FoundDistantXref) { SMP_msg("WARNING: Missing code Xref to %lx from %lx\n", (unsigned long) DistantAddr, (unsigned long) addr); long SignedAddrDiff = (long) (DistantAddr - addr); if ((SignedAddrDiff < -128) || (SignedAddrDiff > 127)) { add_cref(DistantAddr, addr, FarJump); } else { add_cref(DistantAddr, addr, NearJump); } } } } // end for all "from" xrefs } // end if (!TempInst.Fill() ... else ... } // end if (IsHead() and IsCode()) } // end for all addrs in chunk } // end for all chunks return; } // end of STARS_IDA_Function_t::UpdateXrefs() void STARS_IDA_Function_t::BuildFuncIR(SMPFunction *func) { bool FoundAllCallers = false; list<SMPInstr *>::iterator FirstInBlock = func->Instrs.end(); // For starting a basic block list<SMPInstr *>::iterator LastInBlock = func->Instrs.end(); // Terminating a basic block set<STARS_ea_t> FragmentWorkList; // Distant code fragments that belong to this function and need processing STARS_ea_t InstAddr; // grab address to help in debugging, conditional breakpoints, etc. STARS_ea_t PreviousIndirJumpAddr = BADADDR; enum cref_t NearJump = (cref_t)(fl_JN | XREF_USER); enum cref_t FarJump = (cref_t)(fl_JF | XREF_USER); func_tail_iterator_t FuncTail(the_func); // Cycle through all chunks that belong to the function. size_t ChunkCounter = 0; bool GoodRTL; func->BuiltRTLs = true; for (bool ChunkOK = FuncTail.main(); ChunkOK; ChunkOK = FuncTail.next()) { #if (IDA_SDK_VERSION < 700) const area_t &CurrChunk = FuncTail.chunk(); #else const range_t &CurrChunk = FuncTail.chunk(); #endif ++ChunkCounter; STARS_ea_t ChunkStartAddr = GetChunkStart(CurrChunk); STARS_ea_t ChunkEndAddr = GetChunkEnd(CurrChunk); #if 0 if (ChunkStartAddr < func->GetFirstFuncAddr()) { func->FirstEA = ChunkStartAddr; } #endif #if STARS_DEBUG_MEMORY_CORRUPTION bool DebugFlag = (0 == strcmp("sub_8063BE0", func->GetFuncName())); #endif // Build the instruction and block lists for the function. bool PreviousInstWasCall = false; for (STARS_ea_t addr = ChunkStartAddr; addr < ChunkEndAddr; addr = SMP_get_item_end(addr)) { flags_t InstrFlags = SMP_getFlags(addr); if (!SMP_isHead(InstrFlags)) { continue; } if (!SMP_isCode(InstrFlags)) { //data if (PreviousInstWasCall) { // Detect no-ops after calls that IDA Pro classifies as data. // This data classification is harmful to IRDB analysis, and // prevents STARS/IRDB from finding the fall-through of a fixed call. int InstrLen = ::create_insn(addr); if (InstrLen > 0) { // Successfully converted to code SMPInstr *CurrInst = new SMPInstr(addr); // Fill in the instruction data members. CurrInst->Analyze(); GoodRTL = CurrInst->HasGoodRTL(); func->BuiltRTLs = (func->BuiltRTLs && GoodRTL); #if SMP_DEBUG_BUILD_RTL if (!GoodRTL) { static auto msg_count=0u; if(msg_count<100) { SMP_msg("ERROR: Cannot build RTL at %llx for %s\n", (unsigned long long) CurrInst->GetAddr(), CurrInst->GetDisasm()); msg_count++; if(msg_count==100) { SMP_msg("ERROR: Eliding future Cannot build RTL msgs\n"); msg_count++; } } } #endif if (CurrInst->IsNop()) { SMP_msg("INFO: Found nop marked as data after CALL, converted to code at %p\n", addr); // Insert instruction at end of list. func->Instrs.push_back(CurrInst); } else { PreviousInstWasCall = false; // stop searching when nop sequence ends } } } else { // NOTE: The code below seems to produce nothing. It would seem that !xrefs.GetIscode() should // be xrefs.GetIscode() but that fix only seems to find rare garbage cases in which IDA Pro // has data in a code section because of failure to identify code, and then we get random // code addresses that are mistakenly found in the false "data" areas. !!!!****!!!! // Check for code xrefs from the data. SMP_xref_t xrefs; for (bool ok = xrefs.SMP_first_from(addr, XREF_ALL); ok; ok = xrefs.SMP_next_from()) { if ((xrefs.GetTo() != 0) && (!xrefs.GetIscode())) { // Found a code target, with its address in xrefs.to global_STARS_program->PrintDataToCodeXref(addr, xrefs.GetTo(), 0); } } } } else { // code SMPInstr *CurrInst = new SMPInstr(addr); // Fill in the instruction data members. #if SMP_DEBUG_CONTROLFLOW SMP_msg("SMPFunction::Analyze: calling CurrInst::Analyze.\n"); #endif CurrInst->Analyze(); if (SMPBinaryDebug) { SMP_msg("Disasm: %s \n", CurrInst->GetDisasm()); } #if SMP_COUNT_MEMORY_ALLOCATIONS SMPInstBytes += sizeof(*CurrInst); #endif #if SMP_USE_SSA_FNOP_MARKER if (func->Instrs.empty()) { // First instruction in function. We want to create a pseudo-instruction // at the top of the function that can hold SSA DEFs for LiveIn names // to the function. We use a floating point no-op as the pseudo-inst. // The code address is one less than the start address of the function. SMPInstr *MarkerInst = new SMPInstr(STARS_SSA_MARKER_PSEUDO_ID); MarkerInst->AnalyzeMarker(); assert(NN_fnop == MarkerInst->GetIDAOpcode()); func->BuiltRTLs = (func->BuiltRTLs && MarkerInst->HasGoodRTL()); assert(FirstInBlock == func->Instrs.end()); func->Instrs.push_back(MarkerInst); #if SMP_COUNT_MEMORY_ALLOCATIONS SMPInstBytes += sizeof(*MarkerInst); #endif } #endif // Find all functions that call the current function. SMP_xref_t CurrXrefs; if (!FoundAllCallers) { for (bool ok = CurrXrefs.SMP_first_to(addr, XREF_ALL); ok; ok = CurrXrefs.SMP_next_to()) { STARS_ea_t FromAddr = CurrXrefs.GetFrom(); if (FromAddr != 0) { if (CurrXrefs.GetIscode()) { // Make sure it is not a fall-through. Must be a // control-flow instruction of some sort, including // direct or indirect calls or tail calls. SMPInstr CallInst(FromAddr); CallInst.Analyze(); SMPitype CallType = CallInst.GetDataFlowType(); if ((JUMP <= CallType) && (RETURN >= CallType)) { // Found a caller, with its call address in CurrXrefs.from func->AddCallSource(FromAddr); } else if (DEFAULT == CallType) { SMP_msg("ERROR: Fallthrough from %llx to func at %llx with xref type %d\n", (uint64_t) FromAddr, (uint64_t) addr, CurrXrefs.GetType()); } else { SMP_msg("ERROR: Bad CallInst.Analyze() at %llx to %llx with xref type %d\n", (uint64_t) FromAddr, (uint64_t) addr, CurrXrefs.GetType()); } } else { // Data xref global_STARS_program->PrintDataToCodeXref(FromAddr, addr, 0); func->PossibleIndirectCallTarget = true; SMP_msg("INFO: Func at %llx has its address in data address %llx\n", (unsigned long long) addr, (unsigned long long) FromAddr); // Code xrefs all precede the first data xref, so // we do not need to process any more xrefs. One // indirect data reference is enough to change the // status of the function; additional references are // not important. break; } } } FoundAllCallers = true; // only do this for first inst } SMPitype DataFlowType = CurrInst->GetDataFlowType(); PreviousInstWasCall = false; if ((DataFlowType == INDIR_CALL) || (DataFlowType == CALL)) { PreviousInstWasCall = true; // See if IDA has determined the target of the call. #if 0 // Phase ordering: Do this later. CurrInst->AnalyzeCallInst(func->GetFirstFuncAddr()); #endif STARS_ea_t TargetAddr = CurrInst->GetCallTarget(); bool LinkedToTarget = (BADADDR != TargetAddr); if (LinkedToTarget) { if (0 == TargetAddr) { SMP_msg("WARNING: Ignoring NULL call target (unreachable) at %lx\n", (unsigned long) CurrInst->GetAddr()); } else { if (INDIR_CALL == DataFlowType) { pair<set<STARS_ea_t>::iterator, bool> InsertResult = func->IndirectCallTargets.insert(TargetAddr); if (InsertResult.second) { func->AllCallTargets.push_back(TargetAddr); } } else { (void) func->AddDirectCallTarget(TargetAddr); } } } if (DataFlowType == INDIR_CALL) { func->SetHasIndirectCalls(); if (!LinkedToTarget && (!CurrInst->MDIsSystemCall())) { func->SetHasUnresolvedIndirectCalls(); } } } // end if INDIR_CALL or CALL else if (DataFlowType == INDIR_JUMP) { func->IndirectJumps = true; #if STARS_AUDIT_INDIR_JUMP_XREFS PreviousIndirJumpAddr = addr; #endif } else if (DataFlowType == RETURN) { func->HasReturnInst = true; } // Add call targets for tail call jumps. else if (CurrInst->IsBranchToOtherFunc()) { STARS_ea_t FarTargetAddr = CurrInst->GetJumpTarget(); if (BADADDR != FarTargetAddr) { assert((RETURN == DataFlowType) || (JUMP == DataFlowType) || (COND_BRANCH == DataFlowType)); // Optimized tail calls, where the stack frame is down to zero at the call point, // get RETURN as their DataFlowType. Might have to revisit that idea at some point. !!!!****!!!! if (func->FindDistantCodeFragment(FarTargetAddr)) { if (func->GetProg()->InsertUnsharedFragment(FarTargetAddr)) { // Fragment address was inserted in SMPProgram set, was not already there. pair<set<STARS_ea_t>::iterator, bool> InsertResult; InsertResult = FragmentWorkList.insert(FarTargetAddr); if (InsertResult.second) { SMP_msg("INFO: Found distant code fragment at %llx that can be added to func, reached from %llx\n", (unsigned long long) FarTargetAddr, (unsigned long long) addr); #if 0 if (FarTargetAddr < func->GetFirstFuncAddr()) { func->FirstEA = FarTargetAddr; } #endif } else { // These kind of fragments are generally only jumped to from one place, // and jump back into the function that jumped into them. Very suspicious // to encounter such a fragment more than once, and even if it happens, // the insertion into the SMPProgram set should have failed due to already // being present. This message and assertion should never be reached. SMP_msg("FATAL ERROR: Distant fragment at %lx reached from %lx already reached from same function.\n", (unsigned long) FarTargetAddr, (unsigned long) addr); assert(InsertResult.second); // sanity lost; shut down } } else { // Fragment address was already in SMPProgram set ; // Probably added in loop at beginning that found unshared fragments. #if 0 // These kind of fragments are generally only jumped to from one place, // and jump back into the function that jumped into them. Very suspicious // to encounter such a fragment more than once. SMP_msg("WARNING: Distant fragment at %x reached from %x has already been processed.\n", FarTargetAddr, addr); #endif } } else if (!func->GetProg()->IsUnsharedFragment(FarTargetAddr)) { (void) func->AddDirectCallTarget(FarTargetAddr); } } } #if STARS_AUDIT_INDIR_JUMP_XREFS if (FirstInBlock == func->Instrs.end()) { // CurrInst will start a block if (CurrInst->HasNoCodeXrefs() && (BADADDR != PreviousIndirJumpAddr) && (addr != PreviousIndirJumpAddr)) { bool CatchBlockFound = global_STARS_program->IsCatchBlockAddr(addr); if (!CatchBlockFound) { #if 1 // IDA Pro is not likely to have this problem in 7.x, unlike 5.x when this code was added. // So we no longer perform this speculative operation, unless it needs resurrecting later. // This block appears unreachable, but it can probably be reached by // the most recent indirect jump. IDA Pro sometimes thinks it has // resolved an indirect jump completely but has only done so partially. SMP_msg("WARNING: Adding possible missing indirect jump code Xref to %llx from %llx\n", (uint64_t)addr, (uint64_t)PreviousIndirJumpAddr); long SignedAddrDiff = (long)(addr - PreviousIndirJumpAddr); if ((SignedAddrDiff < -128) || (SignedAddrDiff > 127)) { SMP_add_cref(PreviousIndirJumpAddr, addr, FarJump); } else { SMP_add_cref(PreviousIndirJumpAddr, addr, NearJump); } CurrInst->SetJumpTarget(); #endif } } } #endif // Before we insert the instruction into the instruction // list, determine if it is a jump target that does not // follow a basic block terminator. This is the special case // of a CASE in a SWITCH that falls through into another // CASE, for example. The first sequence of statements // was not terminated by a C "break;" statement, so it // looks like straight line code, but there is an entry // point at the beginning of the second CASE sequence and // we have to split basic blocks at the entry point. if ((FirstInBlock != func->Instrs.end()) && CurrInst->IsJumpTarget()) { #if SMP_DEBUG_CONTROLFLOW SMP_msg("SMPFunction::Analyze: hit special jump target case.\n"); #endif LastInBlock = --(func->Instrs.end()); SMPBasicBlock *NewBlock = new SMPBasicBlock(func, FirstInBlock, LastInBlock); // If not the first chunk in the function, it is a shared // tail chunk. if (ChunkCounter > 1) { NewBlock->SetShared(); } FirstInBlock = func->Instrs.end(); LastInBlock = func->Instrs.end(); func->Blocks.push_back(NewBlock); func->BlockCount += 1; } GoodRTL = CurrInst->HasGoodRTL(); func->BuiltRTLs = (func->BuiltRTLs && GoodRTL); #if SMP_DEBUG_BUILD_RTL if (!GoodRTL) { SMP_msg("ERROR: Cannot build RTL at %llx for %s\n", (unsigned long long) CurrInst->GetAddr(), CurrInst->GetDisasm()); } #endif #if SMP_DEBUG_CONTROLFLOW SMP_msg("SMPFunction::Analyze: putting CurrInst on list.\n"); #endif // Insert instruction at end of list. func->Instrs.push_back(CurrInst); // Find basic block leaders and terminators. if (FirstInBlock == func->Instrs.end()) { #if SMP_DEBUG_CONTROLFLOW SMP_msg("SMPFunction::Analyze: setting FirstInBlock.\n"); #endif #if SMP_USE_SSA_FNOP_MARKER if (2 == func->Instrs.size()) { // Just pushed first real instruction, after the fnop marker. FirstInBlock = func->Instrs.begin(); } else { FirstInBlock = --(func->Instrs.end()); } #else FirstInBlock = --(func->Instrs.end()); #endif } if (CurrInst->IsBasicBlockTerminator()) { #if SMP_DEBUG_CONTROLFLOW SMP_msg("SMPFunction::Analyze: found block terminator.\n"); #endif PreviousInstWasCall = false; // call to NORET func terminates block LastInBlock = --(func->Instrs.end()); SMPBasicBlock *NewBlock = new SMPBasicBlock(func, FirstInBlock, LastInBlock); // If not the first chunk in the function, it is a shared // tail chunk. if (ChunkCounter > 1) { NewBlock->SetShared(); } FirstInBlock = func->Instrs.end(); LastInBlock = func->Instrs.end(); func->Blocks.push_back(NewBlock); func->BlockCount += 1; } } // end if (isCode(InstrFlags)) } // end for (STARS_ea_t addr = CurrChunk.startEA; ... ) // Handle the special case in which a function does not terminate // with a return instruction or any other basic block terminator. // Sometimes IDA Pro sees a call to a NORET function and decides // to not include the dead code after it in the function. That // dead code includes the return instruction, so the function no // longer includes a return instruction and terminates with a CALL. if (FirstInBlock != func->Instrs.end()) { LastInBlock = --(func->Instrs.end()); SMPBasicBlock *NewBlock = new SMPBasicBlock(func, FirstInBlock, LastInBlock); // If not the first chunk in the function, it is a shared // tail chunk. if (ChunkCounter > 1) { NewBlock->SetShared(); } FirstInBlock = func->Instrs.end(); LastInBlock = func->Instrs.end(); func->Blocks.push_back(NewBlock); func->BlockCount += 1; } } // end for (bool ChunkOK = ...) return; } // end of STARS_IDA_Function_t::BuildFuncIR() bool STARS_IDA_Function_t::FindDistantCodeFragment(SMPFunction* func, STARS_ea_t TargetAddr) { STARS_ea_t FirstEA=this->get_startEA(); bool PrivateFragment = false; bool AlreadyFound = func->GetProg()->IsUnsharedFragment(TargetAddr); STARS_Function_t *TargetFunc = SMP_get_func(TargetAddr); if ((!AlreadyFound) && TargetFunc) { // Determine if we are dealing with shared chunks. size_t ChunkCounter = 0; func_tail_iterator_t FuncTail((func_t*)*(dynamic_cast<STARS_IDA_Function_t*>(TargetFunc))); for (bool ChunkOK = FuncTail.main(); ChunkOK; ChunkOK = FuncTail.next()) { ++ChunkCounter; } if (1 < ChunkCounter) { SMP_msg("INFO: Code fragment at %lx is shared chunk.\n", (unsigned long) TargetAddr); } else { bool JumpsBackIntoCurrentFunc = false; bool HasReturnInstruction = false; bool AllocatesStackFrame = false; for (bool ChunkOK = FuncTail.main(); ChunkOK; ChunkOK = FuncTail.next()) { #if (IDA_SDK_VERSION < 700) const area_t &CurrChunk = FuncTail.chunk(); #else const range_t &CurrChunk = FuncTail.chunk(); #endif ++ChunkCounter; STARS_ea_t ChunkStartAddr = GetChunkStart(CurrChunk); STARS_ea_t ChunkEndAddr = GetChunkEnd(CurrChunk); // Analyze the instructions in the chunk. for (STARS_ea_t addr = ChunkStartAddr; addr < ChunkEndAddr; addr = SMP_get_item_end(addr)) { flags_t InstrFlags = SMP_getFlags(addr); if (SMP_isHead(InstrFlags) && SMP_isCode(InstrFlags)) { SMPInstr *CurrInst = new SMPInstr(addr); // Fill in the instruction data members. CurrInst->Analyze(); // Search for two negative indicators (stack allocations and returns) // and one positive indicator (jump back into this function). if (CurrInst->HasReturnOpcode()) { HasReturnInstruction = true; break; } else if (CurrInst->MDIsFrameAllocInstr()) { AllocatesStackFrame = true; break; } else { SMPitype FlowType = CurrInst->GetDataFlowType(); if ((JUMP == FlowType) || (INDIR_JUMP == FlowType)) { if (CurrInst->HasGoodRTL()) { STARS_ea_t FragmentJumpTarget = CurrInst->GetJumpTarget(); if ((FragmentJumpTarget >= FirstEA) && (FragmentJumpTarget < this->get_endEA())) { JumpsBackIntoCurrentFunc = true; } } } } } // end if isHead() and isCode() } // end for all addrs in chunk } // end for all chunks (there will only be one) PrivateFragment = (JumpsBackIntoCurrentFunc && (!HasReturnInstruction) && (!AllocatesStackFrame)); } // end if (1 < ChunkCounter) ... else ... } // end if (!AlreadyFound && TargetFunc) return PrivateFragment; } // end of STARSFunction::FindDistantCodeFragment() // Limit damage from garbage stack offset values produced by IDA Pro. #define IDAPRO_KLUDGE_STACK_FRAME_SIZE_LIMIT 5000000 void STARS_IDA_Function_t::FillInLocalVarTable(SMPFunction *CurrFunc) { bool DebugFlag = false; bool FoundReturnAddress = false; #if SMP_DEBUG_STACK_GRANULARITY DebugFlag |= (0 == strcmp("qSort3", this->GetFuncName())); #endif struc_t *StackFrame = SMP_get_frame(this->the_func); if (NULL == StackFrame) { SMP_msg("WARNING: No stack frame info from get_frame for %s\n", CurrFunc->GetFuncName()); CurrFunc->SetLocalVarOffsetLimit(0); // return; } else { member_t *Member = StackFrame->members; for (std::size_t i = 0; i < StackFrame->memqty; ++i, ++Member) { long offset; char MemberName[MAXSMPVARSTR] = { '\0' }; if (NULL == Member) { SMP_msg("NULL stack frame member pointer in %s\n", CurrFunc->GetFuncName()); break; } qstring TempName; #if (IDA_SDK_VERSION < 700) STARS_ssize_t NameLen = ::get_member_name2(&TempName, Member->id); #else STARS_ssize_t NameLen = ::get_member_name(&TempName, Member->id); #endif ::qstrncpy(MemberName, TempName.c_str(), sizeof(MemberName)); if (MemberName[0] == '\0') { #if SMP_DEBUG_STACK_GRANULARITY SMP_msg("NULL stack frame member in %s\n", CurrFunc->GetFuncName()); #endif continue; } if (Member->unimem()) { // Not a separate variable; name for member of a union. // The union itself should have a separate entry, so we skip this. SMP_msg("STACK INFO: Skipping union member %s frame member %zu in stack frame for %s\n", MemberName, i, CurrFunc->GetFuncName()); continue; } offset = (long) Member->get_soff(); // Would be 0 for union member, so we skipped them above. if (DebugFlag) { SMP_msg("%s local var %s at offset %ld\n", CurrFunc->GetFuncName(), MemberName, offset); } if (offset > IDAPRO_KLUDGE_STACK_FRAME_SIZE_LIMIT) { SMP_msg("ERROR: Rejected enormous stack offset %ld for var %s in func %s\n", offset, MemberName, CurrFunc->GetFuncName()); continue; } if (!FoundReturnAddress && (2 == strlen(MemberName)) && (0 == strncmp(" r", MemberName, 2))) { FoundReturnAddress = true; CurrFunc->SetIDAReturnAddressOffset(offset); } struct LocalVar TempLocal; TempLocal.offset = offset; TempLocal.size = Member->eoff - Member->soff; // audit later SMP_strncpy(TempLocal.VarName, MemberName, sizeof(TempLocal.VarName) - 1); CurrFunc->PushBackLocalVarEntry(TempLocal); if ((offset + (long) TempLocal.size) >= CurrFunc->GetLocalVarOffsetLimit()) { CurrFunc->SetLocalVarOffsetLimit((long)(TempLocal.offset + TempLocal.size)); } } // end for all stack frame members } return; } // end of STARS_IDA_Function_t::FillInLocalVarTable() void STARS_IDA_Function_t::FindEHCatchBlocks(void) { // Use the tryblks.hpp interface from IDA Pro to parse the EH_FRAME // section in the ELF binary, or equivalent section in other binaries. // // Algorithm: // 1. Call get_tryblks() for the address range of current func. // 2. for each tryblk_t in the qvector returned: // if the kind field is TB_CPP, use cpp() to get a catchvec_t cast. // 3. for each catch_t in the catchvec_t, extract the start_ea field // from each element and record as the beginning of a reachable catch block. // Step 1. tryblks_t *TryBlockVec = new tryblks_t(); range_t FuncRange(this->the_func->start_ea, this->the_func->end_ea); std::size_t NumTryBlks = get_tryblks(TryBlockVec, FuncRange); // Step 2. for (std::size_t TryIndex = 0; TryIndex < NumTryBlks; ++TryIndex) { tryblk_t CurrTryBlk = TryBlockVec->at(TryIndex); if (CurrTryBlk.is_cpp()) { catchvec_t CatchBlksVec = CurrTryBlk.cpp(); // Step 3. for (std::size_t CatchIndex = 0; CatchIndex < CatchBlksVec.size(); ++CatchIndex) { catch_t CurrCatchBlk = CatchBlksVec.at(CatchIndex); for (std::size_t CatchIndex2 = 0; CatchIndex2 < CurrCatchBlk.size(); ++CatchIndex2) { STARS_ea_t CatchAddr = CurrCatchBlk.at(CatchIndex2).start_ea; global_STARS_program->SetCatchBlockAddr(CatchAddr); SMP_msg("INFO: EH: Catch block at address %llx for func at %llx\n", (uint64_t) CatchAddr, (uint64_t) FuncRange.start_ea); } } } } delete TryBlockVec; return; } // end of STARS_IDA_Function_t::FindEHCatchBlocks() // return success or failure of analysis bool STARS_IDA_Function_t::AnalyzeInstAsCallTarget(SMPFunction *CurrFunc, bool &IsIndirectCallTarget, bool &IsTailCallTarget) { bool success = true; STARS_ea_t FirstAddr = CurrFunc->GetFirstFuncAddr(); SMP_xref_t xrefs; for (bool ok = xrefs.SMP_first_to(FirstAddr, XREF_ALL); ok; ok = xrefs.SMP_next_to()) { STARS_ea_t FromAddr = xrefs.GetFrom(); if (FromAddr != 0) { if (!xrefs.GetIscode()) { // found data xref IsIndirectCallTarget = true; // addr of func appears in data; assume indirect calls to func SMP_msg("INFO: Assuming IndirectCallTarget func at %llx due to its addr in data at %llx\n", (uint64_t)FirstAddr, (uint64_t)FromAddr); break; // One indirect reference is enough; all code xrefs come before data xrefs. } else { // found code xref; see if it is a jump used as a tail call // These tail calls could be a problem for fast returns if they go from unsafe to safe functions. SMPInstr TempInst(FromAddr); bool CmdOK = TempInst.FillCmd(); if (!CmdOK) { // Better be conservative and assume it could be a tail call. IsTailCallTarget = true; SMP_msg("ERROR: Could not decode instruction at %llx from within MarkFunctionSafe(); assuming tail call\n", (uint64_t)FromAddr); } else if (TempInst.GetIDAOpcode() != MD_CALL_INSTRUCTION) { // not a call instruction; must be jump of some sort IsTailCallTarget = true; SMP_msg("INFO: Tail call from %llx to %llx\n", (uint64_t)FromAddr, (uint64_t)FirstAddr); } } } } return success; } // end of STARS_IDA_Function_t::AnalyzeInstAsCallTarget()