Newer
Older
clc5q
committed
if (IsCodeMisaligned(addr)) {
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("Code was misaligned.\n");
clc5q
committed
#endif
clc5q
committed
RemoveIDACodeAddr(addr);
if (SMP_create_data(addr, SMP_byteflag(), InstrLen, BADNODE)) {
clc5q
committed
SMP_msg("Converted misaligned code to data at %x : len: %x\n",
clc5q
committed
addr, InstrLen);
clc5q
committed
// Step back so data gets processed.
DataRunLen += get_item_size(addr);
continue; // skip reset of DataRunLen
}
else {
clc5q
committed
SMP_msg("Misaligned code left as unknown at %x : len: %x\n",
clc5q
committed
addr, InstrLen);
clc5q
committed
IsolatedCodeTrigger = false;
}
} // end if (IsCodeMisaligned() ...)
clc5q
committed
// No references at all --> isolated code.
IsolatedCodeTrigger = true;
IsolatedCodeAddr = addr;
IsolatedCodeLen = InstrLen;
}
else {
clc5q
committed
SMP_xref_t xb;
bool ok = xb.SMP_first_to(addr, XREF_ALL);
clc5q
committed
if (!ok) {
// No code xrefs to this target addr.
IsolatedCodeTrigger = true;
IsolatedCodeAddr = addr;
IsolatedCodeLen = InstrLen;
}
}
} // end if (MIN_DATARUN_LEN <= DataRunLen)
else if (IsolatedCodeTrigger) {
// Two instructions in a row does not fit the pattern.
IsolatedCodeTrigger = false;
}
DataRunLen = 0;
} // end if (SMP_isData) ... else if (isUnknown) ... else SMP_isCode
} // end if (SMP_isHead)
clc5q
committed
// If in a data run, convert to data.
InstrLen = get_item_size(addr);
clc5q
committed
SMP_msg("Unknown: %x len: %x\n", addr, InstrLen);
clc5q
committed
if (0 < DataRunLen) {
if (SMP_create_data(addr, SMP_byteflag(), InstrLen, BADNODE)) {
clc5q
committed
SMP_msg("Converted unknown to data at %x len: %x\n", addr, InstrLen);
clc5q
committed
DataRunLen += InstrLen;
}
else {
clc5q
committed
SMP_msg("Failed to convert unknown to data at %x len: %x\n", addr, InstrLen);
clc5q
committed
DataRunLen = 0;
IsolatedCodeTrigger = false;
}
}
}
} // end for (STARS_ea_t addr = seg->startEA; ...)
clc5q
committed
return;
} // end of FindDataInCode()
// The choices that IDA makes for deciding which parent function of a
// TAIL chunk is the primary owner of the tail can be counterintuitive.
// A function entry can both fall into and jump to a tail chunk that
// is contiguous with it, yet the "owner" might be a function that is
// far below it in the executable address space. This function will
// change the ownership to a more sensible arrangement.
void AuditTailChunkOwnership(void) {
clc5q
committed
} // end of AuditTailChunkOwnership()
// If the addresses signified from DisasmIndex to IDAProIndex are
// all considered data and do NOT follow a return instruction,
// return false and update AreaSize to reflect the area to be
// converted.
// Return value: true -> skip to IDAProIndex; false -> convert AreaSize bytes.
bool FindDataToConvert(size_t IDAProIndex, size_t DisasmIndex, int &AreaSize) {
STARS_ea_t PrevIDAAddr;
STARS_ea_t NextIDAAddr;
clc5q
committed
size_t ShadowDisasmIndex = DisasmIndex - 1;
STARS_ea_t DisasmAddr = DisasmLocs[ShadowDisasmIndex];
clc5q
committed
bool CannotConvert = false; // return value
bool DebugAddress = false;
#if SMP_DEBUG_FIXUP_IDB
DebugAddress = (DisasmAddr == 0x806c19a);
#endif
if (DebugAddress) {
clc5q
committed
SMP_msg("IDAProIndex: %zu DisasmIndex: %zu\n", IDAProIndex, DisasmIndex);
SMP_msg("IDA locs size %zu Disasm locs size %zu\n", IDAProLocs.size(),
clc5q
committed
DisasmLocs.size());
}
if (IDAProIndex >= IDAProLocs.size()) {
// Have already processed the last IDA address.
clc5q
committed
if (DebugAddress) SMP_msg(" Already done with IDAProLocs.\n");
clc5q
committed
return true;
}
else if (DisasmIndex >= DisasmLocs.size()) {
// Strange. Last Disasm address is only one to convert, and
// IDA still has addresses after that?
clc5q
committed
if (DebugAddress) SMP_msg(" Already done with DisasmLocs.\n");
clc5q
committed
return true;
}
else if (IDAProIndex < 2) {
// We have Disasm addrs before the very first IDA addr. We
// don't trust this boundary case.
clc5q
committed
if (DebugAddress) SMP_msg(" Boundary case with IDAProLocs.\n");
clc5q
committed
return true;
}
NextIDAAddr = IDAProLocs[IDAProIndex - 1];
PrevIDAAddr = IDAProLocs[IDAProIndex - 2];
if (DebugAddress) SMP_msg(" PrevIDAAddr: %lx NextIDAAddr: %lx\n", (unsigned long) PrevIDAAddr, (unsigned long) NextIDAAddr);
clc5q
committed
// See if previous IDA address was a return.
if (!SMP_isCode(PrevFlags) || !SMP_isHead(PrevFlags)) {
SMP_msg("PrevIDAAddr %lx not isCode or not isHead.\n", (unsigned long) PrevIDAAddr);
clc5q
committed
return true;
}
SMPInstr PrevInstr(PrevIDAAddr);
PrevInstr.Analyze();
clc5q
committed
if (DebugAddress) SMP_msg("Finished PrevInstr.Analyze()\n");
if (PrevInstr.HasReturnOpcode()) {
clc5q
committed
// Right after a return come no-ops and 2-byte no-ops
// that are just for alignment. IDA does not seem to be
// happy when we convert all those to code.
clc5q
committed
if (DebugAddress) SMP_msg(" Data followed a return instruction.\n");
clc5q
committed
return true;
}
// Now, see if the area from DisasmAddr to NextIDAAddr is all data
// according to IDA.
while (DisasmAddr < NextIDAAddr) {
flags_t DataFlags = SMP_getFlags(DisasmAddr);
if (SMP_isTail(DataFlags)) {
if (DebugAddress) SMP_msg(" tail byte: %lx\n", (unsigned long) DisasmAddr);
DisasmAddr = SMP_get_item_end(DisasmAddr);
clc5q
committed
}
if (DebugAddress) SMP_msg(" data byte: %lx\n", (unsigned long) DisasmAddr);
DisasmAddr = SMP_get_item_end(DisasmAddr);
clc5q
committed
}
else if (SMP_isCode(DataFlags)) {
clc5q
committed
// How could this ever happen?
if (DebugAddress) SMP_msg(" isCode: %lx\n", (unsigned long) DisasmAddr);
clc5q
committed
return true;
}
else { // must be isUnknown()
// Very conservative here; only want to convert when the whole
// region is data, because that is a symptom of IDA missing
// a piece of code within a function (usually a piece of code
// that is only reachable via an indirect jump).
if (DebugAddress) SMP_msg(" Not isData: %lx\n", (unsigned long) DisasmAddr);
clc5q
committed
return true;
}
if (DebugAddress) SMP_msg(" new DisasmAddr: %lx\n", (unsigned long) DisasmAddr);
clc5q
committed
} // end while (DisasmAddr < NextIDAAddr)
clc5q
committed
if (DebugAddress) SMP_msg(" loop exit CannotConvert: %d\n", CannotConvert);
clc5q
committed
if (!CannotConvert) {
// Success.
DisasmAddr = DisasmLocs[ShadowDisasmIndex];
AreaSize = NextIDAAddr - DisasmAddr;
if (DebugAddress) {
clc5q
committed
SMP_msg(" Success! AreaSize: %x Old index: %zu new index: %zu\n",
clc5q
committed
AreaSize, ShadowDisasmIndex, DisasmIndex);
clc5q
committed
SMP_msg(" exiting FindDataToConvert()\n");
SMP_msg("\n");
clc5q
committed
}
} // end if (!CannotConvert)
return CannotConvert;
} // end of FindDataToConvert()
// Does a converted code region look like a function prologue? If so,
// we should not include it in the previous function.
bool IsFunctionPrologue(STARS_ea_t StartAddr, STARS_ea_t EndAddr) {
clc5q
committed
return false; // **!!** TODO
} // end of IsFunctionPrologue()
// Patch program bytes that could not be converted from
// data to code, if it can be determined that the bytes represent code
// that IDA has a hard time with.
// Currently limited to finding "call near ptr 0" instructions, which
// often are found in optimized glibc code because gcc was able to
// determine that a function pointer was zero and did constant propagation,
// but unfortunately was not able to determine that the code was unreachable.
// IDA will not succeed in ua_code() for "call 0", but there is no danger
// of a working program ever executing this code. Replacing the call with
// no-ops permits us to continue converting a contiguous range of data to
// code, and permits IDA to reanalyze the function later.
// Returns true if program bytes were patched.
bool MDPatchUnconvertedBytes(STARS_ea_t CurrDisasmAddr) {
flags_t AddrFlags = SMP_getFlags(CurrDisasmAddr);
if (SMP_isData(AddrFlags) || SMP_isTail(AddrFlags)) {
clc5q
committed
// Bytes should have been converted to unknown already.
clc5q
committed
SMP_msg("Cannot patch data bytes or tail bytes at %x\n", CurrDisasmAddr);
clc5q
committed
return false;
}
SMPInstr PatchInstr(CurrDisasmAddr);
PatchInstr.Analyze();
Clark Coleman
committed
int InstrLen = (int) PatchInstr.GetSize();
clc5q
committed
if (0 >= InstrLen) {
clc5q
committed
SMP_msg("decode_insn() failed on patch location %x\n", CurrDisasmAddr);
clc5q
committed
return false;
}
else {
if (PatchInstr.GetIDAOpcode() != NN_call) {
clc5q
committed
SMP_msg("Cannot patch non-call instruction at %x\n", CurrDisasmAddr);
clc5q
committed
return false;
}
PatchInstr.PrintOperands();
STARSOpndTypePtr CallDest = PatchInstr.GetFirstUse()->GetOp();
if ((! CallDest->IsNearPointer()) || (0 != CallDest->GetAddr())) {
clc5q
committed
SMP_msg("Cannot patch call unless it is call near ptr 0 at %x",
clc5q
committed
CurrDisasmAddr);
clc5q
committed
return false;
}
STARS_ea_t PatchAddr = CurrDisasmAddr;
clc5q
committed
for (int i = 0; i < InstrLen; ++i) {
bool ok = patch_byte(PatchAddr, 0x90); // x86 no-op
if (!ok) {
clc5q
committed
SMP_msg("patch_byte() failed at %x\n", PatchAddr);
clc5q
committed
return false;
}
++PatchAddr;
}
clc5q
committed
SMP_msg("Patched %d bytes successfully at %x\n", InstrLen, CurrDisasmAddr);
InstrLen = create_insn(CurrDisasmAddr);
clc5q
committed
if (0 >= InstrLen) {
clc5q
committed
SMP_msg(" ... but ua_code() still failed!\n");
clc5q
committed
return false;
}
} // end if (0 >= InstrLen) ... else ...
return true;
} // end of MDPatchUnconvertedBytes()
// Use the lists of code addresses identified by IDA Pro (in IDAProLocs)
clc5q
committed
// and an external disassembler (in DisasmLocs). Compare the lists and
// try to convert addresses to code that are found in DisasmLocs but
// not in IDAProLocs. Emit warnings when IDAProLocs has a code address
// not found in DisasmLocs.
void FixCodeIdentification(void) {
size_t DisasmIndex = 0;
STARS_ea_t CurrDisasmAddr = DisasmLocs[DisasmIndex++];
clc5q
committed
size_t IDAProIndex = 0;
STARS_ea_t CurrAddr = IDAProLocs[IDAProIndex++];
clc5q
committed
while (DisasmIndex <= DisasmLocs.size()) {
// If the current address is less than the current
// external disasm address, we have the rare case in
// which IDA Pro has identified an address as code
// but the external disasm has not. Emit a warning
// message and go on to the next IDA address.
if (CurrAddr < CurrDisasmAddr) {
SMPInstr TempInstr(CurrAddr);
TempInstr.Analyze();
SMP_msg("AUDIT: Address %lx is code in IDB but not in external disassembler: %s\n",
(unsigned long) CurrAddr, TempInstr.GetDisasm());
clc5q
committed
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
if (IDAProIndex < IDAProLocs.size())
CurrAddr = IDAProLocs[IDAProIndex++];
else {
// Last IDA addr; might still process Disasm addrs
// after loop exit.
break;
}
}
else if (CurrAddr == CurrDisasmAddr) {
// If equal, no problem, we are moving through the
// code addresses in lockstep. Grab the next address
// from each source.
if (DisasmIndex < DisasmLocs.size()) {
CurrDisasmAddr = DisasmLocs[DisasmIndex++];
}
else {
++DisasmIndex; // cause loop exit; skip cleanup loop
}
if (IDAProIndex < IDAProLocs.size())
CurrAddr = IDAProLocs[IDAProIndex++];
else {
// Last IDA addr; might still process Disasm addrs
// after loop exit in cleanup loop.
break;
}
}
else {
// We must have CurrAddr > CurrDisasmAddr. That means
// IDA has jumped over some code addresses in
// DisasmLocs. We need to try to convert addresses
// to code until we can reach the current addr.
// For now, we will address only the case in which IDA
// has identified addresses as data bytes, and the
// external disassembler(e.g. objdump) has identified
// the same addresses as code. We only want to deal with
// contiguous areas of data-to-code conversion that do NOT
// follow a return statement.
int AreaSize = 0;
STARS_ea_t AreaStart = CurrDisasmAddr;
STARS_ea_t AreaEnd;
clc5q
committed
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("CurrDisasmAddr: %x CurrAddr: %x\n", CurrDisasmAddr, CurrAddr);
clc5q
committed
#endif
bool SkipArea = FindDataToConvert(IDAProIndex, DisasmIndex, AreaSize);
if (SkipArea) {
// Skip over the extra external disasm addresses.
while (CurrDisasmAddr < CurrAddr)
CurrDisasmAddr = DisasmLocs[DisasmIndex++];
}
else {
// Convert the overlooked code region to unexplored.
AreaEnd = CurrDisasmAddr + AreaSize;
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("Found data to convert: %x to %x\n", AreaStart, AreaEnd);
clc5q
committed
#endif
SMP_bounds_t ConvertRegion;
ConvertRegion.startEA = AreaStart;
ConvertRegion.endEA = AreaEnd;
FixupRegion CurrRegion(ConvertRegion);
CodeReanalyzeList.push_back(CurrRegion);
clc5q
committed
bool AllConverted = true;
bool AllNops = true;
clc5q
committed
do {
flags_t InstrFlags = SMP_getFlags(CurrDisasmAddr);
if (!SMP_isUnknown(InstrFlags)) {
SMP_msg("Sync problem in FixCodeID: %lx\n", (unsigned long) CurrDisasmAddr);
clc5q
committed
}
else {
int InstrLen = create_insn(CurrDisasmAddr);
clc5q
committed
if (InstrLen > 0) { // Successfully converted to code
SMPInstr NewInstr(CurrDisasmAddr);
NewInstr.Analyze();
if (!NewInstr.IsNop())
AllNops = false;
clc5q
committed
#if SMP_DEBUG_FIXUP_IDB
#if 0
clc5q
committed
SMP_msg("FixCodeID success at %x: len: %d %s\n", CurrDisasmAddr,
clc5q
committed
InstrLen, NewInstr.GetDisasm());
#endif
clc5q
committed
#endif
}
else {
if (MDPatchUnconvertedBytes(CurrDisasmAddr)) {
clc5q
committed
SMP_msg(" Patched bytes at %x\n", CurrDisasmAddr);
clc5q
committed
}
else {
CurrRegion.FixupInstrs.push_back(CurrDisasmAddr);
clc5q
committed
AllConverted = false;
clc5q
committed
SMP_msg("FixCodeID failure at %x\n", CurrDisasmAddr);
clc5q
committed
}
}
} // end if (SMP_isCode(InstrFlags) ... else ...
clc5q
committed
if (DisasmIndex < DisasmLocs.size()) {
CurrDisasmAddr = DisasmLocs[DisasmIndex++];
}
else {
// cause loops to exit
CurrDisasmAddr = CurrAddr;
++DisasmIndex; // skip cleanup loop
}
} while (CurrDisasmAddr < CurrAddr);
if (AllConverted && AllNops) {
// We want to convert the region back to unexplored bytes
// and take it off the work list. Regions that are all nops
// create data flow analysis problems sometimes. The region
// is often unreachable code and produces a basic block with
// no predecessors within a function. This often happens when
// an optimizing compiler uses nops as padding to align jump
// targets on cache line bounaries. With no fall through into
// the nops, they are unreachable and should be left as unknown.
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("FixCodeID nops region from %x to %x\n", CurrRegion.GetStart(),
CurrRegion.GetEnd());
SMP_del_items(CurrRegion.GetStart(),
CurrRegion.GetEnd() - CurrRegion.GetStart(), 0);
CodeReanalyzeList.pop_back();
}
clc5q
committed
} // end if (SkipArea) ... else ...
} // end if (addr < CurrDisasmAddr) .. else if ... else ...
} // end while (DisasmIndex <= DisasmLocs.size()
#if 0 // Make this code use FindDataToConvert() **!!**
// Cleanup loop:
// If there are still Disasm addrs to process, try to turn them
// into code in the IDB.
while (DisasmIndex <= DisasmLocs.size()) {
if (SMP_isCode(InstrFlags)) {
clc5q
committed
SMP_msg("Sync problem in FixCodeID: %x\n", CurrDisasmAddr);
clc5q
committed
}
else {
// Clear bytes to unexplored.
clc5q
committed
segment_t *seg = SMP_getseg(CurrDisasmAddr);
clc5q
committed
if (SEG_CODE == seg->type) {
SMP_del_items(CurrDisasmAddr, seg->endEA - CurrDisasmAddr, 0);
clc5q
committed
}
else {
// Might be safest to just discontinue processing
// if we wander into a non-code segment.
// DisasmLocs should not have an entire code segment
// that IDA Pro missed.
break;
}
int InstrLen = ua_code(CurrDisasmAddr);
if (InstrLen > 0) { // Successfully converted to code
SMPInstr NewInstr(CurrDisasmAddr);
NewInstr.Analyze();
clc5q
committed
SMP_msg("FixCodeID success at %x: %s\n", CurrDisasmAddr,
clc5q
committed
NewInstr.GetDisasm());
}
else {
clc5q
committed
SMP_msg("FixCodeID failure at %x\n", CurrDisasmAddr);
clc5q
committed
}
} // end if (SMP_isCode(InstrFlags) ... else ...
clc5q
committed
if (DisasmIndex < DisasmLocs.size()) {
CurrDisasmAddr = DisasmLocs[DisasmIndex++];
}
else {
++DisasmIndex; // cause loop to exit
}
} // end while (DisasmIndex <= DisasmLocs.size()
#endif
return;
} // end of FixCodeIdentification()
// Analyze instructions that could not be analyzed earlier and were placed on the CodeReanalyzeList.
// Earlier failures are usually because the instruction branches to an address that has not
// yet been converted from data to code, so ua_code() failed. Now that all data to code
// conversions have completed, ua_code() should succeed.
// Return the number of instructions successfully analyzed.
int FixupNewCodeChunks(void) {
list<FixupRegion>::iterator CurrRegion;
int changes = 0;
for (CurrRegion = CodeReanalyzeList.begin(); CurrRegion != CodeReanalyzeList.end(); ++CurrRegion) {
bool AllConverted = true;
bool AllNops = true;
bool NoFixups = (0 == CurrRegion->FixupInstrs.size());
if (NoFixups) {
CurrRegion->SetStart(STARS_BADADDR); // mark for removal
continue; // skip to next region
}
list<STARS_ea_t>::iterator CurrInstr;
for (CurrInstr = CurrRegion->FixupInstrs.begin(); CurrInstr != CurrRegion->FixupInstrs.end(); ++CurrInstr) {
Clark Coleman
committed
int InstrLen = ::create_insn(*CurrInstr);
if (InstrLen > 0) { // Successfully converted to code
SMPInstr NewInstr(*CurrInstr);
NewInstr.Analyze();
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("FixupNewCodeChunks success at %x: len: %d\n", *CurrInstr, InstrLen);
#endif
if (!NewInstr.IsNop()) {
AllNops = false;
*CurrInstr = STARS_BADADDR; // mark for removal
}
else {
AllConverted = false;
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("FixupNewCodeChunks failure at %x\n", *CurrInstr);
#endif
}
} // end for all instrs in CurrRegion
if (AllConverted && !AllNops) {
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("FixupNewCodeChunks success for region from %x to %x\n",
CurrRegion->GetStart(), CurrRegion->GetEnd());
#endif
CurrRegion->SetStart(STARS_BADADDR); // mark for removal
}
else if (AllConverted && AllNops) {
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("FixupNewCodeChunks re-converting nops region from %x to %x\n",
CurrRegion->GetStart(), CurrRegion->GetEnd());
#endif
SMP_del_items(CurrRegion->GetStart(),
CurrRegion->GetEnd() - CurrRegion->GetStart(), 0);
CurrRegion->SetStart(STARS_BADADDR); // mark for removal
else {
// Remove only the instructions that were fixed up.
CurrInstr = CurrRegion->FixupInstrs.begin();
while (CurrInstr != CurrRegion->FixupInstrs.end()) {
if (STARS_BADADDR == *CurrInstr) {
CurrInstr = CurrRegion->FixupInstrs.erase(CurrInstr);
}
else {
++CurrInstr;
}
}
}
} // end for all regions in the CodeReanalyzeList
// Remove completed regions from the CodeReanalyzeList
CurrRegion = CodeReanalyzeList.begin();
while (CurrRegion != CodeReanalyzeList.end()) {
if (STARS_BADADDR == CurrRegion->GetStart())
CurrRegion = CodeReanalyzeList.erase(CurrRegion);
else
++CurrRegion;
}
#if 0
if (AllConverted) {
if (IsFunctionPrologue(AreaStart, AreaEnd)) {
// Create a new function entry chunk here.
// **!!** TODO
;
}
else {
// Extend the previous chunk to include the
// converted code.
STARS_ea_t PrevIDAAddr = IDAProLocs[IDAProIndex - 2];
STARS_Function_t *PrevChunk = get_fchunk(PrevIDAAddr);
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg(" addr in chunk to extend: %x\n", PrevIDAAddr);
SMP_msg(" STARS_Function_t pointer for chunk: %x\n", PrevChunk);
#endif
#if 0 // temporary for debugging
if (is_func_entry(PrevChunk)) {
// Extend the func entry to contain the new code.
if (func_setend(PrevIDAAddr, AreaEnd)) {
clc5q
committed
SMP_msg("Func extended to include code from %x to %x\n",
AreaStart, AreaEnd);
FuncReanalyzeList.push_back(PrevIDAAddr);
}
else {
clc5q
committed
SMP_msg("Failed to extend func from %x to %x\n",
AreaStart, AreaEnd);
}
}
else { // tail
// See if this works for function tails, also.
// Extend the func entry to contain the new code.
if (func_setend(PrevIDAAddr, AreaEnd)) {
clc5q
committed
SMP_msg("Tail extended to include code from %x to %x\n",
AreaStart, AreaEnd);
STARS_Function_t *TailOwner = get_func(PrevChunk->owner);
FuncReanalyzeList.push_back(PrevIDAAddr);
}
else {
clc5q
committed
SMP_msg("Failed to extend tail from %x to %x\n",
AreaStart, AreaEnd);
}
} // end if (is_func_entry()) ... else ...
#endif
} // end if (IsFunctionPrologue()) ... else ...
} // end if (AllConverted)
else {
clc5q
committed
SMP_msg("not AllConverted; cannot include new code in previous chunk.\n");
}
#endif
return changes;
// Audit the IDA code database by looking at all instructions in the
// code segment and printing all those that are not contained in a
// function. Emit the context-free annotations that we are able to
// emit on a per-instruction basis.
void FindOrphanedCode(STARS_Segment_t *CurrSeg, FILE *AnnotFile, FILE *InfoAnnotFile) {
bool PrefetchInstSeenLast = false; // inst before current inst was a prefetch
bool UndefinedOpcodeSeenLast = false; // inst before current inst was an undefined opcode
for (STARS_ea_t addr = CurrSeg->get_startEA(); addr < CurrSeg->get_endEA();
addr = SMP_get_item_end(addr)) {
flags_t InstrFlags = SMP_getFlags(addr);
if (SMP_isTail(InstrFlags))
clc5q
committed
continue;
if (SMP_isHead(InstrFlags) && SMP_isCode(InstrFlags)) {
STARS_ea_t FirstFuncAddr;
if (!(CurrProg->IsInstAddrStillInFunction(addr, FirstFuncAddr))) {
SMPInstr CurrInst(addr);
CurrInst.Analyze();
clc5q
committed
SMP_msg("Orphan code at %x : %s\n", addr, CurrInst.GetDisasm());
// TODO: If there are code xrefs to the orphan code,
// see what kind. If a CALL, and orphan code looks
// like a prologue, make a function. If a JUMP of
// some kind, then make a function chunk and make
// it a tail of all functions that jump to it. **!!**
clc5q
committed
// Do machine-dependent fixes for DEF and USE lists.
// The fixes can help produce better annotations.
CurrInst.MDFixupDefUseLists();
// If instruction is still not included in a code chunk,
// emit annotations for it in isolation.
if (CurrInst.IsAnalyzeable()) {
CurrInst.EmitAnnotations(true, false, true, AnnotFile, InfoAnnotFile, CurrProg);
// If instruction is an indirect branch, emit an XREF
// annotation for each of its targets.
SMPitype CurrDataFlow = CurrInst.GetDataFlowType();
bool IndirCallFlag = (CurrDataFlow == INDIR_CALL);
if ((CurrDataFlow == INDIR_JUMP) || IndirCallFlag) {
global_STARS_program->PrintAllCodeToCodeXrefs(addr, CurrInst.GetSize(), IndirCallFlag, false);
clc5q
committed
// If the address of this instruction appeared in data, emit an IBT annotation.
if (global_STARS_program->IsCodeAddressTaken(addr)) {
global_STARS_program->PrintUnknownCodeXref(addr, CurrInst.GetSize(), ZST_CODEADDRESSTAKEN);
clc5q
committed
}
} // end if inst is Analyzeable
// Emit IBT annotations for instructions that fit computed-goto patterns in libc/glibc, such
// as prefetch instructions and the instructions that follow them (computed goto often chooses
// between going to the prefetch or jumping just past it, and IDA Pro cannot analyze these libc
// macro-generated computed gotos even if they are not orphaned code). Likewise, an undefined opcode
// often separates an indirect jump and its first target, so inst after undefined opcode is IBT.
bool EmitIBTAnnotation = (PrefetchInstSeenLast || UndefinedOpcodeSeenLast);
if (CurrInst.MDIsPrefetchOpcode()) {
PrefetchInstSeenLast = true;
UndefinedOpcodeSeenLast = false;
EmitIBTAnnotation = true;
}
else if (CurrInst.MDIsUndefinedOpcode()) {
UndefinedOpcodeSeenLast = true;
PrefetchInstSeenLast = false;
}
else {
PrefetchInstSeenLast = false;
UndefinedOpcodeSeenLast = false;
}
if (EmitIBTAnnotation) {
global_STARS_program->PrintUnknownCodeXref(addr, CurrInst.GetSize(), ZST_COMPUTEDGOTO);
// mark addresses that are taken via the instruction
global_STARS_program->PrintAllAddressTakenXrefs(addr, CurrInst.GetSize());
clc5q
committed
SMP_msg("Unanalyzed byte at %x\n", addr);
// Can IDA analyze this to be code?
int InstrLen;
InstrLen = create_insn(addr);
bool IDAsuccess = SMP_generate_disasm_line(addr, disasm, sizeof(disasm) - 1);
if (IDAsuccess) {
// Remove interactive color-coding tags.
STARS_ssize_t StringLen = SMP_tag_remove(disasm, disasm, sizeof(disasm) - 1);
SMP_msg("ERROR: tag_remove failed at addr %lx \n", (unsigned long) addr);
clc5q
committed
SMP_msg("Successfully analyzed! %s\n", disasm);
SMPInstr UnknownInstr(addr);
UnknownInstr.Analyze();
// TODO: Get new code into a chunk. **!!**
// If instruction is still not included in a code chunk,
// emit annotations for it in isolation.
UnknownInstr.EmitAnnotations(true, false, true, AnnotFile, InfoAnnotFile, CurrProg);
SMP_msg("ERROR: generate_disasm_line failed at addr %lx \n", (unsigned long) addr);
}
}
} // end for (ea_t addr = CurrSeg->startEA; ...)
} // end of FindOrphanedCode()
// Find calls and jumps from orphaned code to functions. Mark those functions
// so that we will know that the program CFG is not complete.
void FindLinksFromOrphanedCode(STARS_Segment_t *CurrSeg) {
char disasm[MAXSTR];
for (STARS_ea_t addr = CurrSeg->get_startEA(); addr < CurrSeg->get_endEA(); addr = SMP_get_item_end(addr)) {
if (SMP_isHead(InstrFlags) && SMP_isCode(InstrFlags)) {
// Does IDA Pro think addr is inside a function?
func_t* CurrIDAFunc = ::get_func(addr);
if (nullptr == CurrIDAFunc) {
SMPInstr CurrInst(addr);
CurrInst.Analyze();
if (CurrInst.IsAnalyzeable()) {
// If instruction is control flow, see if it reaches an addr in a function.
SMPitype CurrDataFlow = CurrInst.GetDataFlowType();
if (IsBranchOrCall(CurrDataFlow)) {
STARS_ea_t TargetAddr = CurrInst.GetCallTarget();
if (STARS_BADADDR == TargetAddr) {
TargetAddr = CurrInst.GetJumpTarget();
}
if (STARS_BADADDR != TargetAddr) {
// See if TargetAddr is inside a function.
func_t* TargetFunc = ::get_func(TargetAddr);
if (nullptr != TargetFunc) {
STARS_ea_t FirstAddrInFunc = TargetFunc->startEA;
#else
STARS_ea_t FirstAddrInFunc = TargetFunc->start_ea;
#endif
if (FirstAddrInFunc != TargetAddr) {
SMP_msg("WARNING: Orphaned code at %llx calls %llx which is inside func starting at %llx\n",
(unsigned long long) addr, (unsigned long long) TargetAddr, (unsigned long long) FirstAddrInFunc);
}
CurrProg->SetFuncAddrCalledFromOrphanedCode(FirstAddrInFunc);
SMP_msg("INFO: Orphaned code at %llx calls func at %llx\n",
(unsigned long long) addr, (unsigned long long) FirstAddrInFunc);
}
}
}
}
} // end if (nullptr == CurrFunc)
} // end if addr is code and isHead
} // end for (ea_t addr = CurrSeg->startEA; ...)
return;
} // end of FindLinksFromOrphanedCode()
// Version of FindOrphanedCode that does not emit annotations but can be used
// to determine at what point in time code becomes orphaned.
void Debug_FindOrphanedCode(STARS_Segment_t *CurrSeg, bool FirstRun) {
STARS_ea_t DebugAddr = 0x8050db0;
for (STARS_ea_t addr = CurrSeg->get_startEA(); addr < CurrSeg->get_endEA();
addr = SMP_get_item_end(addr)) {
if (SMP_isHead(InstrFlags) && SMP_isCode(InstrFlags)) {
STARS_Function_t *CurrFunc = SMP_get_func(addr);
if (nullptr == CurrFunc) { // Code not in a func; orphaned
pair<set<STARS_ea_t>::iterator, bool> pairib;
pairib = CodeOrphans.insert(addr);
if (DebugAddr == addr) {
SMP_msg("DEBUG: Orphaned code addr %lx found.\n", (unsigned long) addr);
}
if ((!FirstRun) && (pairib.second)) {
SMP_msg("SERIOUS WARNING: Newly orphaned code at %lx \n", (unsigned long) addr);
} // end for (STARS_ea_t addr = CurrSeg->startEA; ...)
} // end of Debug_FindOrphanedCode()
// Audit the IDA database with respect to branches and calls. They should
// each have valid code targets (not data or unknown bytes) and the code
// cross references should reflect the linkage.
void AuditCodeTargets(void) {
} // end of AuditCodeTargets()
void SpecialDebugOutput(void) {
char disasm[MAXSTR];
vector<STARS_ea_t> ProblemAddrs;
ProblemAddrs.push_back(0x8066d08);
bool IDAsuccess;
int InstLen;
STARS_ssize_t StringLen;
clc5q
committed
insn_t LocalCmd;
uint32 LocalFeatures;
for (size_t index = 0; index < ProblemAddrs.size(); ++index) {
STARS_ea_t addr = ProblemAddrs[index];
if (SMP_isCode(InstrFlags) && SMP_isHead(InstrFlags)) {
SMPInstr TempInst(addr);
IDAsuccess = TempInst.FillCmd(); // Emits ERROR messages if there are failures