Newer
Older
#include "interfaces/STARSTypes.h"
#include "interfaces/STARSIDATypes.h"
#include "interfaces/SMPDBInterface.h"
#include "interfaces/abstract/all.h"
#include "base/SMPBasicBlock.h"
#include "base/SMPFunction.h"
#include "base/SMPProgram.h"
#if STARS_USE_EHP_LIB
#include <ehp.hpp> // security_transforms/libehp/include/ehp.hpp
#endif
clc5q
committed
#include <pro.h>
#include <fpro.h>
#include <bytes.hpp>
#include <kernwin.hpp>
#include <xref.hpp>
clc5q
committed
Clark Coleman
committed
#include <funcs.hpp>
Clark Coleman
committed
#if (IDA_SDK_VERSION < 700)
#include <area.hpp>
#else
#include <range.hpp>
#endif
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
#include <tryblks.hpp>
#pragma GCC diagnostic pop
#if STARS_USE_EHP_LIB
using namespace EHP;
#endif
int STARS_IDA_Interface_t::STARS_fprintf(FILE *fp, const char *format, ...) {
va_list va;
va_start(va, format);
int code = qvfprintf(fp, format, va);
va_end(va);
return code;
}
int STARS_IDA_Interface_t::STARS_fscanf(FILE *fp, const char *format, ...) {
va_list va;
va_start(va, format);
int code = qvfscanf(fp, format, va);
va_end(va);
return code;
}
long STARS_IDA_Interface_t::STARS_ftell(FILE *fp) {
return (long) qftell(fp);
}
char * STARS_IDA_Interface_t::STARS_fgets(char *buffer, int buflen, FILE *fp) {
return qfgets(buffer, buflen, fp);
}
int STARS_IDA_Interface_t::STARS_fseek(FILE *fp, long offset, int whence) {
return qfseek(fp, offset, whence);
}
int STARS_IDA_Interface_t::STARS_msg(const char *format, ...) {
va_list va;
va_start(va, format);
int nbytes = vmsg(format, va);
va_end(va);
return nbytes;
}
int STARS_IDA_Interface_t::STARS_snprintf(char *buffer, std::size_t n, const char *format, ...) {
va_list va;
va_start(va, format);
int code = qvsnprintf(buffer, n, format, va);
va_end(va);
return code;
}
// The choices that IDA makes for deciding which parent function of a
// TAIL chunk is the primary owner of the tail can be counterintuitive.
// A function entry can both fall into and jump to a tail chunk that
// is contiguous with it, yet the "owner" might be a function that is
// far below it in the executable address space. This function will
// change the ownership to a more sensible arrangement.
void STARS_IDA_Interface_t::AuditTailChunkOwnership(void)
{
char FuncName[MAXSTR];
// Iterate through all chunks in the program.
std::size_t NumChunks = get_fchunk_qty();
for (std::size_t ChunkIndex = 0; ChunkIndex < NumChunks; ++ChunkIndex) {
#if (IDA_SDK_VERSION < 700)
STARS_ea_t ChunkStartAddr = ChunkInfo->startEA;
#else
STARS_ea_t ChunkStartAddr = ChunkInfo->start_ea;
#endif
if (is_func_tail(ChunkInfo)) {
// For each TAIL chunk, find all the parent chunks. Find the last
// parent chunk with an address less than the TAIL chunk address.
STARS_ea_t BestCandidate = 0;
func_parent_iterator_t FuncParent(ChunkInfo);
#if SMP_DEBUG_CHUNKS
#endif
for (bool ok = FuncParent.first(); ok; ok = FuncParent.next()) {
STARS_ea_t parent = FuncParent.parent();
#if SMP_DEBUG_CHUNKS
SMP_msg(" parent: %x ", parent);
#endif
if ((parent > BestCandidate) && (parent < ChunkStartAddr)) {
}
#if SMP_DEBUG_CHUNKS
SMP_msg("\n");
#endif
// Make the best parent chunk the owner of the TAIL chunk if it is
// not already the owner.
if (ChunkInfo->owner != BestCandidate) {
if (0 < BestCandidate) {
if (set_tail_owner(ChunkInfo, BestCandidate)) {
func_t *FuncInfo = ::get_func(BestCandidate);
SMP_msg("Set %llx as new owner of tail %llx\n",
(uint64_t) BestCandidate, (uint64_t) ChunkStartAddr);
// Reanalyze the parent function (and all its
// tail chunks) now that the structure has changed.
reanalyze_function(FuncInfo);
}
else {
SMP_msg("set_tail_owner failed for tail %llx and parent %llx\n",
(uint64_t) ChunkStartAddr, (uint64_t) BestCandidate);
}
}
else {
func_t *FuncInfo = ::get_func(ChunkInfo->owner);
#if SMP_DEBUG_CHUNKS
::get_func_name(FuncInfo->startEA, FuncName, sizeof(FuncName) - 1);
SMP_msg("No good parent candidate before tail at %llx\n",
(uint64_t) ChunkStartAddr);
SMP_msg("Current parent is %x: %s\n", FuncInfo->startEA, FuncName);
#endif
// Find out if a function entry chunk that comes before the
// tail is a better candidate for the owner (i.e. it falls
// through to the tail, or jumps to it).
BestCandidate = 0;
#if SMP_DEBUG_CHUNKS
SMP_msg("Finding parent func candidates for %x:", ChunkInfo->startEA);
#endif
SMP_bounds_t CurrFunc;
for (std::size_t FuncIndex = 0; FuncIndex < global_STARS_program->GetFuncBoundsSize(); ++FuncIndex) {
CurrFunc = global_STARS_program->GetFuncBounds(FuncIndex);
STARS_ea_t FuncStartAddr = CurrFunc.startEA;
if ((FuncStartAddr < ChunkStartAddr)
&& (FuncStartAddr > BestCandidate)) {
BestCandidate = FuncStartAddr;
SMP_msg(" candidate: %llx tail: %llx", (uint64_t) BestCandidate,
(uint64_t) ChunkStartAddr);
SMP_msg(" not a candidate: %llx tail: %llx best: %llx\n",
(uint64_t) FuncStartAddr, (uint64_t) ChunkStartAddr, (uint64_t) BestCandidate);
} // end for (std::size_t FuncIndex = 0; ...)
if (0 >= BestCandidate) { // highly unlikely
SMP_msg("No good func entry parent candidate.\n");
}
else {
FuncInfo = ::get_func(BestCandidate);
#if SMP_DEBUG_CHUNKS
::get_func_name(FuncStartAddr, FuncName, sizeof(FuncName) - 1);
SMP_msg("Best func entry parent candidate: %s at %llx",
FuncName, (uint64_t) BestCandidate);
if (FuncInfo->endEA == ChunkInfo->startEA)
SMP_msg(" Function endEA == tail chunk startEA");
SMP_msg("\n");
#endif
}
}
} // end if (ChunkInfo->owner != BestCandidate)
#if SMP_DEBUG_CHUNKS
else {
SMP_msg("Already best parent for %x is %x\n", ChunkInfo->startEA,
ChunkInfo->owner);
}
#endif
} // end if (is_func_tail(ChunkInfo))
} // end for (std::size_t ChunkIndex = 0; ...)
return;
} // end of AuditTailChunkOwnership()
// Find the span of contiguous code that is not contained within any
// function, starting at StartAddr, which should already be an example
// of an instruction address that is outside of a function.
static STARS_ea_t FindNewFuncLimit(STARS_ea_t StartAddr)
{
STARS_ea_t LimitAddr = StartAddr;
STARS_Segment_t *seg = SMP_getseg(StartAddr);
if (NULL == seg)
return LimitAddr;
STARS_ea_t SegLimit = seg->get_endEA();
for (STARS_ea_t addr = SMP_get_item_end(StartAddr); addr < SegLimit; addr = SMP_get_item_end(addr)) {
if (SMP_isCode(InstrFlags) && SMP_isHead(InstrFlags)) {
LimitAddr = addr;
func_t *FuncInfo = get_func(addr);
if (NULL != FuncInfo)
break; // ran into an existing function
}
else // Not a code head; time to stop.
break;
}
return LimitAddr;
} // end of FindNewFuncLimit()
bool STARS_IDA_Interface_t::InstHasNoCodeXrefs(STARS_InstructionID_t InstID) const {
SMP_xref_t CurrXrefs;
STARS_ea_t InstAddr = InstID.GetIDWithinFile();
bool FoundCodeXref = false;
for (bool ok = CurrXrefs.SMP_first_to(InstAddr, XREF_ALL); ok; ok = CurrXrefs.SMP_next_to()) {
STARS_ea_t FromAddr = CurrXrefs.GetFrom();
if ((FromAddr != 0) && (CurrXrefs.GetIscode())) {
FoundCodeXref = true;
break;
}
}
return (!FoundCodeXref);
} // end of STARS_IDA_Interface_t::InstHasNoCodeXrefs()
bool STARS_IDA_Interface_t::IsInstJumpTarget(STARS_InstructionID_t InstID) const {
// Determine whether the instruction is a jump target by looking
// at its cross references and seeing if it has "TO" code xrefs.
bool InstIsJumpTarget = false;
SMP_xref_t xrefs;
// TODO: Why XREF_FAR? What about near jumps?
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
for (bool ok = xrefs.SMP_first_to(InstID.GetIDWithinFile(), XREF_FAR); ok; ok = xrefs.SMP_next_to()) {
if ((xrefs.GetFrom() != 0) && (xrefs.GetIscode())) {
InstIsJumpTarget = true;
break;
}
}
return InstIsJumpTarget;
}
STARS_InstructionID_t STARS_IDA_Interface_t::FindFirstCallTarget(STARS_InstructionID_t CallInstID) const {
SMP_xref_t xrefs;
STARS_ea_t CallTarget = STARS_BADADDR;
STARS_ea_t CallInstAddr = CallInstID.GetIDWithinFile();
for (bool ok = xrefs.SMP_first_from(CallInstAddr, XREF_ALL); ok; ok = xrefs.SMP_next_from()) {
if ((xrefs.GetTo() != 0) && (xrefs.GetIscode())) {
// Found a code target, with its address in xrefs.to
// Exclude the ordinary fall-through control flow type, fl_F
if (xrefs.GetIscode() && (xrefs.GetType() == fl_F)) {
// A call instruction will have two targets: the fall through to the
// next instruction, and the called function. We want to find
// the called function.
continue;
}
// We found a target, not the fall-through.
CallTarget = xrefs.GetTo();
SMP_msg("Found indirect call target %lx at %lx\n", (unsigned long) CallTarget, (unsigned long) CallInstAddr);
break;
}
} // end for all code xrefs
if (STARS_BADADDR == CallTarget)
SMP_msg("WARNING: Did not find indirect call target at %lx\n", (unsigned long) CallInstAddr);
return STARS_InstructionID_t(CallTarget);
} // end of STARS_IDA_Interface_t::FindFirstCallTarget()
// Audit the IDA database with respect to branches and calls. They should
// each have valid code targets (not data or unknown bytes) and the code
// cross references should reflect the linkage.
void STARS_IDA_Interface_t::AuditCodeTargets(void)
{
// Cover all the code that IDA has grouped into functions by iterating
// through all function chunks in the program.
std::size_t NumChunks = get_fchunk_qty();
for (std::size_t ChunkIndex = 0; ChunkIndex < NumChunks; ++ChunkIndex) {
func_t *ChunkInfo = getn_fchunk((int) ChunkIndex);
char FuncName[MAXSTR];
#if (IDA_SDK_VERSION < 700)
STARS_ea_t ChunkStartAddr = ChunkInfo->startEA;
STARS_ea_t ChunkEndAddr = ChunkInfo->endEA;
#else
STARS_ea_t ChunkStartAddr = ChunkInfo->start_ea;
STARS_ea_t ChunkEndAddr = ChunkInfo->end_ea;
#endif
get_func_name(ChunkStartAddr, FuncName, sizeof(FuncName) - 1);
// First, see if any calls to this function (if this chunk is
// an entry point) are not coming from within functions.
if (is_func_entry(ChunkInfo)) {
SMP_xref_t xb;
for (bool ok = xb.SMP_first_to(addr, XREF_ALL); ok; ok = xb.SMP_next_to()) {
uchar XrefType = xb.GetType() & XREF_MASK;
if (xb.GetIscode()) {
if ((XrefType == fl_U) || (XrefType == fl_USobsolete)) {
SMP_msg("AUDIT: Bad xref type: %llx %s\n", (uint64_t) addr, FuncName);
}
#if SMP_DEBUG_FIXUP_IDB
else if ((XrefType == fl_JF) || (XrefType == fl_JN)) {
SMP_msg("Jump to func: %llx %s from: %llx\n",
(uint64_t) addr, FuncName, (uint64_t) xb.GetFrom());
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
}
#endif
else if (XrefType == fl_F) {
SMP_msg("AUDIT: Fall through to func: %lx %s from: %lx\n",
(unsigned long) addr, FuncName, (unsigned long) xb.GetFrom());
}
else if ((XrefType == fl_CF) || (XrefType == fl_CN)) {
// Far call or Near call
func_t *CallingFunc = ::get_func(xb.GetFrom());
if (NULL == CallingFunc) {
;
#if SMP_DEBUG_FIXUP_IDB
SMP_msg("Call to %x Func %s from %x not in function.\n",
addr, FuncName, xb.GetFrom());
#endif
}
}
} // end if (xb.GetIscode())
else { // DATA xref
if (XrefType == dr_O) {
#if SMP_DEBUG_FIXUP_IDB
SMP_msg("Data xref to %x Func %s from %x\n",
addr, FuncName, xb.GetFrom());
#endif
}
else {
SMP_msg("AUDIT: Strange data xref %d to %lx Func %s from %lx\n",
XrefType, (unsigned long) addr, FuncName, (unsigned long) xb.GetFrom());
}
}
} // end for (bool ok = xb.SMP_first_to(); ...)
} // end if (is_func_entry(ChunkInfo))
// Next, see if any call or branch in this chunk references
// a target address that is not in a function. If so, and the
// callee address code looks like a function prologue, then
// create a function for the contiguous code starting at that
// address and ask IDA to analyze it and store it in the
// IDA database. If it is a branch target, not a call target,
// create a new TAIL chunk for the current parent functions.
for (STARS_ea_t addr = ChunkStartAddr; addr < ChunkEndAddr;
addr = SMP_get_item_end(addr)) {
flags_t InstrFlags = SMP_getFlags(addr);
if (SMP_isCode(InstrFlags) && SMP_isHead(InstrFlags)) {
SMPInstr CurrInst(addr);
CurrInst.Analyze();
if ((CALL|JUMP|COND_BRANCH) & CurrInst.GetDataFlowType()) {
SMP_xref_t xb;
for (bool ok = xb.SMP_first_from(addr, XREF_FAR); ok; ok = xb.SMP_next_from()) {
if (xb.GetIscode()) {
STARS_ea_t FirstAddr = xb.GetTo();
func_t *FuncInfo = ::get_func(FirstAddr);
if (NULL == FuncInfo) {
// Found call to addr that is not in a func.
// Find limits of contiguous code starting at FirstAddr.
STARS_ea_t LastAddr = FindNewFuncLimit(FirstAddr);
if (CALL == CurrInst.GetDataFlowType())
SMP_msg("AUDIT: Found new func from %lx to %lx\n",
(unsigned long) FirstAddr, (unsigned long) LastAddr);
else
SMP_msg("AUDIT: Found new chunk from %lx to %lx\n",
(unsigned long) FirstAddr, (unsigned long) LastAddr);
}
}
}
}
}
}
} // end for (std::size_t ChunkIndex = 0; ... )
Clark Coleman
committed
// Detect IDA Pro func boundary problems, if code segment; true if problems found
bool STARS_IDA_Interface_t::AuditFunctionBoundaries(const STARS_ea_t startEA, const STARS_ea_t endEA) const {
bool ProblemFound = false;
// First, use the tryblks.hpp interface from IDA Pro to parse the EH_FRAME
Clark Coleman
committed
// section in the ELF binary, or equivalent section in other binaries.
//
// Algorithm:
// 1. Call get_tryblks() for the address range of current seg, passed in as args.
// 2. for each tryblk_t in the qvector returned:
// if the kind field is TB_CPP, examine the parent struct rangevec_t, which holds try blocks.
// 3. for each range_t in the rangevec_t, extract the start_ea and end_ea fields
// from each element and note a problem if start_ea and (end_ea - 1) are not in the
// same func.
// Step 1.
tryblks_t *TryBlockVec = new tryblks_t();
range_t FuncRange(startEA, endEA);
std::size_t NumTryBlks = ::get_tryblks(TryBlockVec, FuncRange);
// Step 2.
for (std::size_t TryIndex = 0; TryIndex < NumTryBlks; ++TryIndex) {
tryblk_t CurrTryBlk = TryBlockVec->at(TryIndex);
if (CurrTryBlk.is_cpp()) { // C++ try/catch type
// Step 3.
for (std::size_t TryIndex2 = 0; TryIndex2 < CurrTryBlk.size(); ++TryIndex2) {
range_t CurrTryBlk2 = CurrTryBlk.at(TryIndex2);
// !!!!****!!!! TryIndex3 not used; review data structures
for (std::size_t TryIndex3 = 0; TryIndex3 < CurrTryBlk2.size(); ++TryIndex3) {
Clark Coleman
committed
STARS_ea_t CurrStartEA = CurrTryBlk2.start_ea;
STARS_ea_t CurrEndEA = CurrTryBlk2.end_ea;
#if 1
SMP_msg("INFO: EHSEG: Try block from %llx to %llx \n",
(uint64_t) CurrStartEA, (uint64_t) (CurrEndEA - 1));
#endif
// See if start and end of try block are in the same func.
STARS_Function_t *StartFunc = SMP_get_func(CurrStartEA);
STARS_Function_t *EndFunc = SMP_get_func(CurrEndEA - 1);
if (StartFunc != EndFunc) {
ProblemFound = true;
SMP_msg("ERROR: FUNCBOUNDS: Try block from %llx to %llx spans functions\n",
(uint64_t) CurrStartEA, (uint64_t) (CurrEndEA - 1));
}
} // end for TryIndex3
} // end for TryIndex2
Clark Coleman
committed
}
} // end for TryIndex
Clark Coleman
committed
delete TryBlockVec;
return ProblemFound;
} // end of STARS_IDA_Interface_t::AuditFunctionBoundaries()
// Detect IDA Pro func boundary problems using EH_FRAME FDE info; true if problems found
bool STARS_IDA_Interface_t::AuditEHFunctionBoundaries(void) const {
bool ProblemFound = false;
#if STARS_USE_EHP_LIB
#ifdef __X64__ // stub out for 32-bit plugins; libehp is 64 bits
// Use the FDEs (Frame Descriptor Entries) from the eh_frame section
// to perform the same algorithm as above: an FDE should contain only one func.
const string ExeFileName = global_STARS_program->GetRootFileName();
auto EHParser = EHP::EHFrameParser_t::factory(ExeFileName);
Clark Coleman
committed
const auto FDEvecptr = EHParser->getFDEs();
for (const auto FDEveciter : *FDEvecptr) {
uint64_t startAddr = FDEveciter->getStartAddress();
uint64_t endAddr = FDEveciter->getEndAddress();
// See if start and end of FDE landing pad are in the same IDA Pro func.
STARS_ea_t CurrStartEA = (STARS_ea_t) startAddr;
STARS_ea_t CurrEndEA = (STARS_ea_t) endAddr;
func_t *StartFunc = ::get_func(CurrStartEA);
func_t *EndFunc = ::get_func(CurrEndEA - 1);
if (StartFunc != EndFunc) {
ProblemFound = true;
SMP_msg("ERROR: FUNCBOUNDS: FDE range from %llx to %llx spans functions\n",
(uint64_t) CurrStartEA, (uint64_t) (CurrEndEA - 1));
Clark Coleman
committed
} // end for (const auto FDEveciter : *FDEvecptr)
#endif // __X64__
#endif // STARS_USE_EHP_LIB
return ProblemFound;
} // end of STARS_IDA_Interface_t::AuditEHFunctionBoundaries()