diff --git a/include/interfaces/abstract/STARSFunction.h b/include/interfaces/abstract/STARSFunction.h index c2df4de7e549e20b3f550435009d9d24f9b98d8b..5b3cc79fc864382206e2816a608ffa0bcefb7ab8 100644 --- a/include/interfaces/abstract/STARSFunction.h +++ b/include/interfaces/abstract/STARSFunction.h @@ -25,7 +25,7 @@ class STARS_Function_t virtual bool IsStackPointerAnalyzed() = 0; virtual STARS_sval_t get_spd(STARS_ea_t ea) = 0; virtual bool HasReturnPoints() = 0; - virtual bool IsMultiEntry() = 0; + virtual bool IsMultiEntry(bool HasIndirectJumps) = 0; virtual void MarkSharedChunks() = 0; virtual bool HasSharedChunks() const = 0; virtual void SetSharedChunks(bool v) = 0; diff --git a/include/interfaces/abstract/STARSProgram.h b/include/interfaces/abstract/STARSProgram.h index f4906ece6d41fd2b93aec2f25cbd0dd645c36ba6..fd1c8d3eb01b1b4ef5195144afec3c201c587153 100644 --- a/include/interfaces/abstract/STARSProgram.h +++ b/include/interfaces/abstract/STARSProgram.h @@ -80,6 +80,7 @@ class STARS_Program_t // its returned values are used in arithmetic? (i.e. it is a trusted input) bool IsNumericSafeSystemCall(std::string CallName) const; virtual bool AreInstIDsInSameFunction(const STARS_ea_t InstID1, const STARS_ea_t InstID2) const; + virtual bool IsCodeAddressTaken(const STARS_ea_t InstAddr) const { return false; }; // Printing methods @@ -96,6 +97,7 @@ class STARS_Program_t virtual void ReportTotalCodeSize(unsigned long long TotalCodeSize) = 0; // Set flags, take actions based on code size. virtual void InitStaticDataTable(SMPProgram *CurrProg) = 0; // Process global static data sections virtual void GetBlockSuccessorTargets(bool CallFlag, STARS_InstructionID_t LastBlockInst, std::size_t InstSize, std::list<STARS_InstructionID_t> &SuccList) = 0; // Get successors for LastBlockInst, omitting call targets + virtual void FindCodeAddressesTaken(void) = 0; protected: // We maintain a list of the caller-saved regs for the current binary's ABI. diff --git a/include/interfaces/idapro/STARSFunction.h b/include/interfaces/idapro/STARSFunction.h index d90a4fc82d86b644608e9aad37cb6d79c31576df..89ad00c8e0ed7f5a3dda5f165f4f5a0b424c422e 100644 --- a/include/interfaces/idapro/STARSFunction.h +++ b/include/interfaces/idapro/STARSFunction.h @@ -51,7 +51,7 @@ public: virtual bool IsLibraryFunction() { return (0 != (the_func->flags & FUNC_LIB)); } virtual bool IsStackPointerAnalyzed() { return the_func->analyzed_sp(); } virtual bool HasReturnPoints() { return the_func->does_return(); } - virtual bool IsMultiEntry(); + virtual bool IsMultiEntry(bool HasIndirectJumps); virtual bool IsChunkUnshared(STARS_ea_t ChunkAddr, STARS_ea_t FuncHeadStart, STARS_ea_t FuncHeadEnd); virtual bool IsInstIDInFunc(STARS_ea_t InstID); diff --git a/include/interfaces/idapro/STARSProgram.h b/include/interfaces/idapro/STARSProgram.h index d0bf5ec6803603d5947585987493f8e6b9bda528..8ba17512699270be83412ccb858bc51257d9d39b 100644 --- a/include/interfaces/idapro/STARSProgram.h +++ b/include/interfaces/idapro/STARSProgram.h @@ -29,6 +29,9 @@ public: // Set (mutator) methods // Query methods + virtual bool IsCodeAddressTaken(const STARS_ea_t InstAddr) const { + return (CodeAddressesTaken.find(InstAddr) != CodeAddressesTaken.cend()); + }; // Printing methods virtual void PrintAllCodeToCodeXrefs(STARS_ea_t InstAddr, std::size_t InstSize, bool CallFlag); @@ -43,10 +46,12 @@ public: void ReportTotalCodeSize(unsigned long long TotalCodeSize); void InitStaticDataTable(SMPProgram *CurrProg); void GetBlockSuccessorTargets(bool CallFlag, STARS_InstructionID_t LastBlockInst, std::size_t InstSize, std::list<STARS_InstructionID_t> &SuccList); // Get successors for LastBlockInst, omitting call targets + virtual void FindCodeAddressesTaken(void); // find code addresses in data private: // Data std::size_t STARS_Platform_Bitwidth; // not binary, but the system OS we are running on + std::set<STARS_ea_t> CodeAddressesTaken; // instruction addresses found in data segments // Methods void ComputeGlobalFieldOffsets(struct GlobalVar &CurrGlobal); diff --git a/include/interfaces/irdb/STARSFunction.h b/include/interfaces/irdb/STARSFunction.h index 863e160c1fffd5eb4d702d91f2f3e7089c9ffa8a..1352f760073f271274b9d96f6037cbcea8b04262 100644 --- a/include/interfaces/irdb/STARSFunction.h +++ b/include/interfaces/irdb/STARSFunction.h @@ -86,7 +86,7 @@ return 0; virtual bool IsStackPointerAnalyzed(){ return false; assert(0); } virtual STARS_sval_t get_spd(STARS_ea_t ea){ return 0; assert(0); } virtual bool HasReturnPoints(){ return true; assert(0); } - virtual bool IsMultiEntry(){ return false; assert(0); } + virtual bool IsMultiEntry(bool HasIndirectJumps){ return false; assert(0); } virtual void MarkSharedChunks(){ return; assert(0); } virtual bool HasSharedChunks() const { return false; assert(0); } virtual void SetSharedChunks(bool v) { return; assert(0); } diff --git a/include/interfaces/irdb/STARSProgram.h b/include/interfaces/irdb/STARSProgram.h index fa44b6957201640c5e5d26e893781cb86f9fde47..bce6fbebc2eaa8c6cd3addd0e67e1ae4a7af3bbe 100644 --- a/include/interfaces/irdb/STARSProgram.h +++ b/include/interfaces/irdb/STARSProgram.h @@ -33,6 +33,7 @@ public: virtual void PrintAllCodeToCodeXrefs(STARS_ea_t, std::size_t, bool CallFlag) {} virtual void GetBlockSuccessorTargets(bool, STARS_InstructionID_t, std::size_t, std::list<STARS_InstructionID_t>&); + virtual void FindCodeAddressesTaken(void) { return; }; private: diff --git a/src/base/SMPFunction.cpp b/src/base/SMPFunction.cpp index 072e77009d9d99b7d755318e31e0153e4dabf8e3..1e789c188e913dfe56b0af170bec7f8aeb58878f 100644 --- a/src/base/SMPFunction.cpp +++ b/src/base/SMPFunction.cpp @@ -3738,7 +3738,7 @@ void SMPFunction::EmitStackFrameAnnotations(FILE *AnnotFile, SMPInstr *Instr) { // Mark functions with multiple entry points. These will be unsafe for fast returns and // are probably IDA Pro disassembly problems rather than true multi-entry functions. void SMPFunction::DetectMultiEntryFunction(void) { - this->MultipleEntryPoints = GetFuncInfo()->IsMultiEntry(); + this->MultipleEntryPoints = GetFuncInfo()->IsMultiEntry(this->HasIndirectJumps()); } // end of SMPFunction::DetectMultiEntryFunction() // Audit and fix the IDA Pro code cross references for jumps and jump targets. @@ -9863,6 +9863,9 @@ void SMPFunction::MarkFunctionSafe() { this->PossibleIndirectCallTarget |= IsIndirectCallTarget; this->PossibleTailCallTarget = IsTailCallTarget; + this->DetectMultiEntryFunction(); + bool SearchForCodeAddressesTaken = (!(this->PossibleIndirectCallTarget || this->MultipleEntryPoints)); + list<SMPInstr *>::iterator Instructions = Instrs.begin(); SMPInstr *CurrInst; #if SMP_USE_SSA_FNOP_MARKER @@ -9876,6 +9879,26 @@ void SMPFunction::MarkFunctionSafe() { bool XferESPtoEBP = false; for ( ; Instructions != Instrs.end(); ++Instructions) { CurrInst = (*Instructions); + STARS_ea_t address = CurrInst->GetAddr(); + // If we do not already have a MultiEntryFunction determination, search + // for addresses beyond the entry point that have their address taken. + if (SearchForCodeAddressesTaken && (address != FirstAddr)) { + // Could optimize this later by searching the CodeAddressesTaken set + // for the range of addresses in this function. + if (global_STARS_program->IsCodeAddressTaken(address)) { + this->MultipleEntryPoints = true; + SearchForCodeAddressesTaken = false; // don't need to look for more + SMP_msg("INFO: Func at %llx becoming multi-entry because code addr %llx found in data.\n", + (unsigned long long) FirstAddr, (unsigned long long) address); + } + else if (this->GetProg()->IsCodeXrefFromData(address)) { + this->MultipleEntryPoints = true; + SearchForCodeAddressesTaken = false; // don't need to look for more + SMP_msg("INFO: Func at %llx becoming multi-entry because code addr %llx found in CodeXrefsFromData.\n", + (unsigned long long) FirstAddr, (unsigned long long) address); + } + } + if (!CurrInst->IsAnalyzeable()) continue; #if SMP_VERBOSE_DEBUG_FUNC @@ -9895,7 +9918,6 @@ void SMPFunction::MarkFunctionSafe() { continue; } } - STARS_ea_t address = CurrInst->GetAddr(); if ((CurrInst->IsTailCall()) || (CurrInst->IsCondTailCall())) { // Moved up here because a tail call can be the point at which the stack // frame returns to its prior state, making it the DeallocInstr. @@ -10052,9 +10074,6 @@ void SMPFunction::MarkFunctionSafe() { this->HasIndirectWrites = this->HasIndirectWrites || (HasIndexedStackWrite || HasIndirectWrite || WritesAboveLocalFrameIndirect || HasIndirectGlobalWrite); - this->DetectMultiEntryFunction(); - - #if 1 bool UnsafeReturnAddr = (Unsafe || AccessesReturnAddress || this->HasUnsafeIndirectWrites || (!this->AnalyzedSP) || this->MultipleEntryPoints); #else diff --git a/src/drivers/idapro/SMPStaticAnalyzer.cpp b/src/drivers/idapro/SMPStaticAnalyzer.cpp index aa73d8b93bb19e431f37d2a55cd909810d78c8f7..0c10fa8ae898c1adc06edf9b3060b802350212b3 100644 --- a/src/drivers/idapro/SMPStaticAnalyzer.cpp +++ b/src/drivers/idapro/SMPStaticAnalyzer.cpp @@ -352,6 +352,8 @@ void IDAP_run(int arg) { CurrProg->AnalyzeData(); // Analyze static data in the executable + global_STARS_program->FindCodeAddressesTaken(); // find code addresses in read-only data segments + // Note: ProfilerInformation must come after the call above to AnalyzeData(). ProfilerInformation *prof_info = new ProfilerInformation(global_STARS_program->GetAnnotFileName().c_str(), CurrProg); diff --git a/src/interfaces/idapro/STARSFunction.cpp b/src/interfaces/idapro/STARSFunction.cpp index 21edda8f11416969a31db8af4cd906c1e8a3995a..5a6d37b68ab8f217e4887253871bdf9238b4676c 100644 --- a/src/interfaces/idapro/STARSFunction.cpp +++ b/src/interfaces/idapro/STARSFunction.cpp @@ -162,37 +162,67 @@ void STARS_IDA_Function_t::MarkSharedChunks(void) { return; } // end of STARS_IDA_Function_t::MarkSharedChunks() -bool STARS_IDA_Function_t::IsMultiEntry() -{ - STARS_ea_t FirstEA=this->get_startEA(); - func_tail_iterator_t FuncTail(the_func);// (func_t*)*(dynamic_cast<STARS_IDA_Function_t*>(this->GetFuncInfo()))); - size_t CallTargetCount = 0; // how many addresses in this function are called? - - if (this->HasSharedChunks() || this->UnsharedChunks) { - for (bool ChunkOK = FuncTail.main(); ChunkOK; ChunkOK = FuncTail.next()) { - const area_t &CurrChunk = FuncTail.chunk(); - STARS_ea_t addr = CurrChunk.startEA; // Start with just the beginning addrs of chunks. - // Determine whether the instruction is a call target by looking - // at its cross references and seeing if it has "TO" code xrefs. - SMP_xref_t xrefs, Distant_xrefs; - for (bool ok = xrefs.SMP_first_to(addr, XREF_FAR); ok; ok = xrefs.SMP_next_to()) { - STARS_ea_t DistantAddr = xrefs.GetFrom(); - if ((DistantAddr != 0) && (xrefs.GetIscode())) { - // Now we see if the distant instruction is in another function. - STARS_Function_t *SourceFunc = SMP_get_func(DistantAddr); - if (NULL != SourceFunc) { - if (SourceFunc->get_startEA() != FirstEA) { - ++CallTargetCount; - break; - } - } - } - } // end for all xrefs - } // end for all chunks +bool STARS_IDA_Function_t::IsMultiEntry(bool HasIndirectJumps) { + STARS_ea_t FirstEA = this->get_startEA(); + func_tail_iterator_t FuncTail(the_func); // (func_t*)*(dynamic_cast<STARS_IDA_Function_t*>(this->GetFuncInfo()))); + size_t CallTargetCount = 0; // how many addresses in this function are called? + size_t AddressTakenCount = 0; // how many instructions in this function have their address taken? + bool HasChunks = (this->HasSharedChunks() || this->UnsharedChunks); + + for (bool ChunkOK = FuncTail.main(); ChunkOK; ChunkOK = FuncTail.next()) { + const area_t &CurrChunk = FuncTail.chunk(); + // Start with just the beginning addrs of chunks, as IDA will break into chunks when calls target an inst. (???) + STARS_ea_t addr = CurrChunk.startEA; + + if (addr != FirstEA) { + // Determine whether the instruction is a call target by looking + // at its cross references and seeing if it has "TO" code xrefs. + SMP_xref_t xrefs; + for (bool ok = xrefs.SMP_first_to(addr, XREF_FAR); ok; ok = xrefs.SMP_next_to()) { + STARS_ea_t DistantAddr = xrefs.GetFrom(); + if ((DistantAddr != 0) && (xrefs.GetIscode())) { + // Now we see if the distant instruction is in another function. + STARS_Function_t *SourceFunc = SMP_get_func(DistantAddr); + if (NULL != SourceFunc) { + if (HasChunks && (SourceFunc->get_startEA() != FirstEA)) { + ++CallTargetCount; + SMP_msg("INFO: MultiEntry due to call from %llx to %llx FirstEA: %llx\n", + (unsigned long long) DistantAddr, (unsigned long long) addr, (unsigned long long) FirstEA); + break; + } + } + } + } // end for all XREF_FAR xrefs } +#if 0 + // Code below does not work, because the dr_O xref type, "data reference : offset," + // is used for loads of data addresses, e.g. in frame_dummy(): + // mov dword ptr [esp], offset __JCR_LIST__ + // We have to go through read-only data segments and look for code addresses. + if (!HasIndirectJumps) { // switch tables produce false positives; get more precise later. + do { + if (FirstEA != addr) { + for (bool ok = xrefs.SMP_first_from(addr, XREF_DATA); ok; ok = xrefs.SMP_next_from()) { + STARS_ea_t DistantAddr = xrefs.GetTo(); + if ((DistantAddr != 0) && (xrefs.GetType() == dr_O)) { + ++AddressTakenCount; + SMP_msg("INFO: MultiEntry: Address of inst at %llx taken in data at %llx.\n", + (unsigned long long) addr, (unsigned long long) DistantAddr); + break; + } + } // end for all XREF_FAR xrefs + } + addr = SMP_get_item_end(addr); + } while ((addr != CurrChunk.endEA) && (0 == AddressTakenCount)); + } +#endif + } // end for all chunks - return CallTargetCount > 1; -} + // A single-entry function has the entry point code-xrefed, and no addresses taken other than perhaps the entry point, which we skipped + bool MultiEntryFound = ((CallTargetCount > 0) || (AddressTakenCount > 0)); + + return MultiEntryFound; +} // end of STARS_IDA_Function_t::IsMultiEntry() void STARS_IDA_Function_t::UpdateXrefs() { @@ -276,7 +306,7 @@ void STARS_IDA_Function_t::UpdateXrefs() } // end for all chunks return; -} +} // end of STARS_IDA_Function_t::UpdateXrefs() void STARS_IDA_Function_t::BuildFuncIR(SMPFunction *func) { @@ -603,9 +633,8 @@ void STARS_IDA_Function_t::BuildFuncIR(SMPFunction *func) func->BlockCount += 1; } } // end for (bool ChunkOK = ...) -} - - + return; +} // end of STARS_IDA_Function_t::BuildFuncIR() bool STARS_IDA_Function_t::FindDistantCodeFragment(SMPFunction* func, STARS_ea_t TargetAddr) { diff --git a/src/interfaces/idapro/STARSIDAProgram.cpp b/src/interfaces/idapro/STARSIDAProgram.cpp index fcbfdaa2054db18c68c2bd5de19f1d3aad3f6b3c..25085b2fbe77d33f6b34f883de57e4b75c39d041 100644 --- a/src/interfaces/idapro/STARSIDAProgram.cpp +++ b/src/interfaces/idapro/STARSIDAProgram.cpp @@ -459,3 +459,46 @@ void STARS_IDA_Program_t::GetBlockSuccessorTargets(bool CallFlag, STARS_Instruct } // end for all xrefs return; } // end of STARS_IDA_Program_t::GetBlockSuccessorTargets() + +// find code addresses in data +void STARS_IDA_Program_t::FindCodeAddressesTaken(void) { + STARS_ea_t RecentAddr = STARS_BADADDR; + bool Need64BitCodeAddresses = (4 < global_STARS_program->GetSTARS_ISA_Bytewidth()) && (0xffffffff < HighestCodeAddress); + bool Need32BitCodeAddresses = (0xffffffff >= LowestCodeAddress); + bool success; + uint32_t Value32; + uint64_t Value64; + + for (STARS_Segment_t *seg = SMP_get_first_seg(); NULL != seg; seg = SMP_get_next_seg(RecentAddr)) { + STARS_ea_t ea = seg->get_startEA(); + RecentAddr = ea; + bool ReadOnlyFlag = ((seg->IsReadableSegment()) && (!(seg->IsWriteableSegment()))); + if (ReadOnlyFlag && ((seg->IsDataSegment()) || (seg->IsBSSSegment()) || (seg->IsCommonSegment()))) { + // Loop through each of the segments we are interested in, + // examining all data objects (effective addresses). + // Make sure we start on a multiple of 4 bytes + if (0 != (ea & 0x3)) { + SMP_msg("WARNING: Data segment unaligned: begins at %llx\n", (unsigned long long) ea); + ea -= (ea & 0x3); // zero out last two bits + ea += 4; // Get back inside data segment + } + STARS_ea_t EndEA = seg->get_endEA(); + while (ea < EndEA) { + if (Need32BitCodeAddresses) { + success = seg->GetReadOnlyMem32BitValue(ea, Value32); + if (success && IsAddressInCodeRange((STARS_ea_t) Value32)) { + this->CodeAddressesTaken.insert((STARS_ea_t) Value32); + } + } + if (Need64BitCodeAddresses) { + success = seg->GetReadOnlyMem64BitValue(ea, Value64); + if (success && IsAddressInCodeRange((STARS_ea_t) Value64)) { + this->CodeAddressesTaken.insert((STARS_ea_t) Value64); + } + } + ea += 4; // even looking for 64-bit addresses, start on every 4-byte aligned address + } + } // end if read-only data segment + } // end for all segments + return; +} // STARS_IDA_Program_t::FindCodeAddressesTaken() diff --git a/tests/commit/trimmed-sorted-save-busybox.psexe.annot.REMOVED.git-id b/tests/commit/trimmed-sorted-save-busybox.psexe.annot.REMOVED.git-id index 2b14ce10efd23d3c775fdf34a28a90a752b95138..a07cf46e64739d26995f4ad614b438491481c98b 100644 --- a/tests/commit/trimmed-sorted-save-busybox.psexe.annot.REMOVED.git-id +++ b/tests/commit/trimmed-sorted-save-busybox.psexe.annot.REMOVED.git-id @@ -1 +1 @@ -28adac0358add90dfe737a8c413d8d8235480d95 \ No newline at end of file +7ec5c05dc4cc2def07ff892d01e7f19c33637ccf \ No newline at end of file diff --git a/tests/commit/trimmed-sorted-save-ffmpeg.psexe.annot.REMOVED.git-id b/tests/commit/trimmed-sorted-save-ffmpeg.psexe.annot.REMOVED.git-id index 198829877b1ce4791200ce810306c9f0a994a834..d46348d6625e689adbdf523039d416445a6857cc 100644 --- a/tests/commit/trimmed-sorted-save-ffmpeg.psexe.annot.REMOVED.git-id +++ b/tests/commit/trimmed-sorted-save-ffmpeg.psexe.annot.REMOVED.git-id @@ -1 +1 @@ -383119d4bb5a64105db45001e444b7432ef1cf5b \ No newline at end of file +f1ee538ffeb6cbee02524ab8af2d91bb777899d0 \ No newline at end of file diff --git a/tests/commit/trimmed-sorted-save-gimp.psexe.annot.REMOVED.git-id b/tests/commit/trimmed-sorted-save-gimp.psexe.annot.REMOVED.git-id index 811d457cc9543c022a4843437d6aceab9f5b08e3..3d5b05b5b5df621b3e187d703476aeed9cbda9a5 100644 --- a/tests/commit/trimmed-sorted-save-gimp.psexe.annot.REMOVED.git-id +++ b/tests/commit/trimmed-sorted-save-gimp.psexe.annot.REMOVED.git-id @@ -1 +1 @@ -0e4980b04210be002a664c26b2c003f7692f97fc \ No newline at end of file +b8acf73cc67e90fa6ae1a816ed52beee50631f2f \ No newline at end of file diff --git a/tests/commit/trimmed-sorted-save-less.psexe.annot.REMOVED.git-id b/tests/commit/trimmed-sorted-save-less.psexe.annot.REMOVED.git-id index 621e4a58cbbcdcab07e277f0c3e460dbb9f03d53..5b77050bc3ccc369706ceb5d49de96cfad6930b6 100644 --- a/tests/commit/trimmed-sorted-save-less.psexe.annot.REMOVED.git-id +++ b/tests/commit/trimmed-sorted-save-less.psexe.annot.REMOVED.git-id @@ -1 +1 @@ -7969f0646e5677afba5f1c4f7f1bd08684a295fa \ No newline at end of file +50fc27ed978524a905a68ccaf136eeadf09905c5 \ No newline at end of file diff --git a/tests/commit/trimmed-sorted-save-nginx.psexe.annot.REMOVED.git-id b/tests/commit/trimmed-sorted-save-nginx.psexe.annot.REMOVED.git-id index 782f3ea002e091ed4e3204e4b6d466c66109a726..581824a0abba5793bce262d38d67563565517b8d 100644 --- a/tests/commit/trimmed-sorted-save-nginx.psexe.annot.REMOVED.git-id +++ b/tests/commit/trimmed-sorted-save-nginx.psexe.annot.REMOVED.git-id @@ -1 +1 @@ -e4311455e661a99443b757ed1ac4b2f191ce5fc0 \ No newline at end of file +4535d22e8c6b11afb5e01f54367bd21d28bc4864 \ No newline at end of file diff --git a/tests/commit/trimmed-sorted-save-openssl.psexe.annot.REMOVED.git-id b/tests/commit/trimmed-sorted-save-openssl.psexe.annot.REMOVED.git-id index fd03fc11f7d1c7f90cf224a66e8fca4da254a593..829008d3dc5a00be3e91680ed407ad4785563c82 100644 --- a/tests/commit/trimmed-sorted-save-openssl.psexe.annot.REMOVED.git-id +++ b/tests/commit/trimmed-sorted-save-openssl.psexe.annot.REMOVED.git-id @@ -1 +1 @@ -17cb2a52f7b222b277876455c3bcfc51cb79b876 \ No newline at end of file +5058c3135613ee7be870aad3222f75150b3fb62d \ No newline at end of file