Newer
Older
for (size_t ChunkIndex = 0; ChunkIndex < NumChunks; ++ChunkIndex) {
clc5q
committed
func_t *ChunkInfo = getn_fchunk((int) ChunkIndex);
if (is_func_tail(ChunkInfo)) {
// For each TAIL chunk, find all the parent chunks. Find the last
// parent chunk with an address less than the TAIL chunk address.
ea_t BestCandidate = 0;
func_parent_iterator_t FuncParent(ChunkInfo);
#if SMP_DEBUG_CHUNKS
clc5q
committed
SMP_msg("Tail chunk: %x ", ChunkInfo->startEA);
clc5q
committed
#endif
for (bool ok = FuncParent.first(); ok; ok = FuncParent.next()) {
ea_t parent = FuncParent.parent();
#if SMP_DEBUG_CHUNKS
clc5q
committed
SMP_msg(" parent: %x ", parent);
clc5q
committed
#endif
if ((parent > BestCandidate) && (parent < ChunkInfo->startEA))
BestCandidate = parent;
}
#if SMP_DEBUG_CHUNKS
clc5q
committed
SMP_msg("\n");
clc5q
committed
#endif
// Make the best parent chunk the owner of the TAIL chunk if it is
// not already the owner.
if (ChunkInfo->owner != BestCandidate) {
if (0 < BestCandidate) {
if (set_tail_owner(ChunkInfo, BestCandidate)) {
func_t *FuncInfo = get_func(BestCandidate);
clc5q
committed
SMP_msg("Set %x as new owner of tail %x\n",
clc5q
committed
BestCandidate, ChunkInfo->startEA);
// Reanalyze the parent function (and all its
// tail chunks) now that the structure has changed.
reanalyze_function(FuncInfo);
}
else {
clc5q
committed
SMP_msg("set_tail_owner failed for tail %x and parent %x\n",
clc5q
committed
ChunkInfo->startEA, BestCandidate);
}
}
else {
func_t *FuncInfo = get_func(ChunkInfo->owner);
get_func_name(FuncInfo->startEA, FuncName, sizeof(FuncName) - 1);
#if SMP_DEBUG_CHUNKS
clc5q
committed
SMP_msg("No good parent candidate before tail at %x\n",
clc5q
committed
ChunkInfo->startEA);
clc5q
committed
SMP_msg("Current parent is %x: %s\n", FuncInfo->startEA, FuncName);
clc5q
committed
#endif
// Find out if a function entry chunk that comes before the
// tail is a better candidate for the owner (i.e. it falls
// through to the tail, or jumps to it).
BestCandidate = 0;
#if SMP_DEBUG_CHUNKS
clc5q
committed
SMP_msg("Finding parent func candidates for %x:", ChunkInfo->startEA);
clc5q
committed
#endif
SMP_bounds_t CurrFunc;
for (size_t FuncIndex = 0; FuncIndex < FuncBounds.size(); ++FuncIndex) {
CurrFunc = FuncBounds[FuncIndex];
if ((CurrFunc.startEA < ChunkInfo->startEA)
&& (CurrFunc.startEA > BestCandidate)) {
BestCandidate = CurrFunc.startEA;
#if SMP_DEBUG_CHUNKS
clc5q
committed
SMP_msg(" candidate: %x tail: %x", BestCandidate,
clc5q
committed
ChunkInfo->startEA);
#endif
}
else {
#if SMP_DEBUG_CHUNKS
clc5q
committed
SMP_msg(" not a candidate: %x tail: %x best: %x\n",
clc5q
committed
CurrFunc.startEA, ChunkInfo->startEA, BestCandidate);
#endif
break;
}
} // end for (size_t FuncIndex = 0; ...)
if (0 >= BestCandidate) { // highly unlikely
clc5q
committed
SMP_msg("No good func entry parent candidate.\n");
clc5q
committed
}
else {
FuncInfo = get_func(BestCandidate);
get_func_name(FuncInfo->startEA, FuncName, sizeof(FuncName) - 1);
#if SMP_DEBUG_CHUNKS
clc5q
committed
SMP_msg("Best func entry parent candidate: %s at %x",
clc5q
committed
FuncName, BestCandidate);
if (FuncInfo->endEA == ChunkInfo->startEA)
clc5q
committed
SMP_msg(" Function endEA == tail chunk startEA");
SMP_msg("\n");
clc5q
committed
#endif
}
}
} // end if (ChunkInfo->owner != BestCandidate)
#if SMP_DEBUG_CHUNKS
else {
clc5q
committed
SMP_msg("Already best parent for %x is %x\n", ChunkInfo->startEA,
clc5q
committed
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
ChunkInfo->owner);
}
#endif
} // end if (is_func_tail(ChunkInfo))
} // end for (size_t ChunkIndex = 0; ...)
return;
} // end of AuditTailChunkOwnership()
// If the addresses signified from DisasmIndex to IDAProIndex are
// all considered data and do NOT follow a return instruction,
// return false and update AreaSize to reflect the area to be
// converted.
// Return value: true -> skip to IDAProIndex; false -> convert AreaSize bytes.
bool FindDataToConvert(size_t IDAProIndex, size_t DisasmIndex, int &AreaSize) {
ea_t PrevIDAAddr;
ea_t NextIDAAddr;
size_t ShadowDisasmIndex = DisasmIndex - 1;
ea_t DisasmAddr = DisasmLocs[ShadowDisasmIndex];
bool CannotConvert = false; // return value
bool DebugAddress = false;
#if SMP_DEBUG_FIXUP_IDB
DebugAddress = (DisasmAddr == 0x806c19a);
#endif
if (DebugAddress) {
clc5q
committed
SMP_msg("IDAProIndex: %zu DisasmIndex: %zu\n", IDAProIndex, DisasmIndex);
SMP_msg("IDA locs size %zu Disasm locs size %zu\n", IDAProLocs.size(),
clc5q
committed
DisasmLocs.size());
}
if (IDAProIndex >= IDAProLocs.size()) {
// Have already processed the last IDA address.
clc5q
committed
if (DebugAddress) SMP_msg(" Already done with IDAProLocs.\n");
clc5q
committed
return true;
}
else if (DisasmIndex >= DisasmLocs.size()) {
// Strange. Last Disasm address is only one to convert, and
// IDA still has addresses after that?
clc5q
committed
if (DebugAddress) SMP_msg(" Already done with DisasmLocs.\n");
clc5q
committed
return true;
}
else if (IDAProIndex < 2) {
// We have Disasm addrs before the very first IDA addr. We
// don't trust this boundary case.
clc5q
committed
if (DebugAddress) SMP_msg(" Boundary case with IDAProLocs.\n");
clc5q
committed
return true;
}
NextIDAAddr = IDAProLocs[IDAProIndex - 1];
PrevIDAAddr = IDAProLocs[IDAProIndex - 2];
clc5q
committed
if (DebugAddress) SMP_msg(" PrevIDAAddr: %x NextIDAAddr: %x\n", PrevIDAAddr, NextIDAAddr);
clc5q
committed
// See if previous IDA address was a return.
flags_t PrevFlags = getFlags(PrevIDAAddr);
if (!isCode(PrevFlags) || !isHead(PrevFlags)) {
clc5q
committed
SMP_msg("PrevIDAAddr %x not isCode or not isHead.\n", PrevIDAAddr);
clc5q
committed
return true;
}
SMPInstr PrevInstr(PrevIDAAddr);
PrevInstr.Analyze();
clc5q
committed
if (DebugAddress) SMP_msg("Finished PrevInstr.Analyze()\n");
clc5q
committed
if (PrevInstr.MDIsReturnInstr()) {
// Right after a return come no-ops and 2-byte no-ops
// that are just for alignment. IDA does not seem to be
// happy when we convert all those to code.
clc5q
committed
if (DebugAddress) SMP_msg(" Data followed a return instruction.\n");
clc5q
committed
return true;
}
// Now, see if the area from DisasmAddr to NextIDAAddr is all data
// according to IDA.
while (DisasmAddr < NextIDAAddr) {
flags_t DataFlags = getFlags(DisasmAddr);
if (isTail(DataFlags)) {
clc5q
committed
if (DebugAddress) SMP_msg(" tail byte: %x\n", DisasmAddr);
clc5q
committed
DisasmAddr = get_item_end(DisasmAddr);
}
else if (isData(DataFlags)) {
clc5q
committed
if (DebugAddress) SMP_msg(" data byte: %x\n", DisasmAddr);
clc5q
committed
DisasmAddr = get_item_end(DisasmAddr);
}
else if (isCode(DataFlags)) {
// How could this ever happen?
clc5q
committed
if (DebugAddress) SMP_msg(" isCode: %x\n", DisasmAddr);
clc5q
committed
return true;
}
else { // must be isUnknown()
// Very conservative here; only want to convert when the whole
// region is data, because that is a symptom of IDA missing
// a piece of code within a function (usually a piece of code
// that is only reachable via an indirect jump).
clc5q
committed
if (DebugAddress) SMP_msg(" Not isData: %x\n", DisasmAddr);
clc5q
committed
return true;
}
clc5q
committed
if (DebugAddress) SMP_msg(" new DisasmAddr: %x\n", DisasmAddr);
clc5q
committed
} // end while (DisasmAddr < NextIDAAddr)
clc5q
committed
if (DebugAddress) SMP_msg(" loop exit CannotConvert: %d\n", CannotConvert);
clc5q
committed
if (!CannotConvert) {
// Success.
DisasmAddr = DisasmLocs[ShadowDisasmIndex];
AreaSize = NextIDAAddr - DisasmAddr;
if (DebugAddress) {
clc5q
committed
SMP_msg(" Success! AreaSize: %x Old index: %zu new index: %zu\n",
clc5q
committed
AreaSize, ShadowDisasmIndex, DisasmIndex);
clc5q
committed
SMP_msg(" exiting FindDataToConvert()\n");
SMP_msg("\n");
clc5q
committed
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
}
} // end if (!CannotConvert)
return CannotConvert;
} // end of FindDataToConvert()
// Does a converted code region look like a function prologue? If so,
// we should not include it in the previous function.
bool IsFunctionPrologue(ea_t StartAddr, ea_t EndAddr) {
return false; // **!!** TODO
} // end of IsFunctionPrologue()
// Patch program bytes that could not be converted from
// data to code, if it can be determined that the bytes represent code
// that IDA has a hard time with.
// Currently limited to finding "call near ptr 0" instructions, which
// often are found in optimized glibc code because gcc was able to
// determine that a function pointer was zero and did constant propagation,
// but unfortunately was not able to determine that the code was unreachable.
// IDA will not succeed in ua_code() for "call 0", but there is no danger
// of a working program ever executing this code. Replacing the call with
// no-ops permits us to continue converting a contiguous range of data to
// code, and permits IDA to reanalyze the function later.
// Returns true if program bytes were patched.
bool MDPatchUnconvertedBytes(ea_t CurrDisasmAddr) {
flags_t AddrFlags = getFlags(CurrDisasmAddr);
if (isData(AddrFlags) || isTail(AddrFlags)) {
// Bytes should have been converted to unknown already.
clc5q
committed
SMP_msg("Cannot patch data bytes or tail bytes at %x\n", CurrDisasmAddr);
clc5q
committed
return false;
}
SMPInstr PatchInstr(CurrDisasmAddr);
PatchInstr.Analyze();
int InstrLen = PatchInstr.GetCmd().size;
if (0 >= InstrLen) {
clc5q
committed
SMP_msg("decode_insn() failed on patch location %x\n", CurrDisasmAddr);
clc5q
committed
return false;
}
else {
if (PatchInstr.GetCmd().itype != NN_call) {
clc5q
committed
SMP_msg("Cannot patch non-call instruction at %x\n", CurrDisasmAddr);
clc5q
committed
return false;
}
PatchInstr.PrintOperands();
op_t CallDest = PatchInstr.GetFirstUse()->GetOp();
clc5q
committed
if ((o_near != CallDest.type) || (0 != CallDest.addr)) {
clc5q
committed
SMP_msg("Cannot patch call unless it is call near ptr 0 at %x",
clc5q
committed
CurrDisasmAddr);
clc5q
committed
return false;
}
ea_t PatchAddr = CurrDisasmAddr;
for (int i = 0; i < InstrLen; ++i) {
bool ok = patch_byte(PatchAddr, 0x90); // x86 no-op
if (!ok) {
clc5q
committed
SMP_msg("patch_byte() failed at %x\n", PatchAddr);
clc5q
committed
return false;
}
++PatchAddr;
}
clc5q
committed
SMP_msg("Patched %d bytes successfully at %x\n", InstrLen, CurrDisasmAddr);
#if IDA_SDK_VERSION < 600
InstrLen = ua_code(CurrDisasmAddr);
#else
InstrLen = create_insn(CurrDisasmAddr);
clc5q
committed
if (0 >= InstrLen) {
clc5q
committed
SMP_msg(" ... but ua_code() still failed!\n");
clc5q
committed
return false;
}
} // end if (0 >= InstrLen) ... else ...
return true;
} // end of MDPatchUnconvertedBytes()
// Use the lists of code addresses identified by IDA Pro (in IDAProLocs)
clc5q
committed
// and an external disassembler (in DisasmLocs). Compare the lists and
// try to convert addresses to code that are found in DisasmLocs but
// not in IDAProLocs. Emit warnings when IDAProLocs has a code address
// not found in DisasmLocs.
void FixCodeIdentification(void) {
size_t DisasmIndex = 0;
ea_t CurrDisasmAddr = DisasmLocs[DisasmIndex++];
size_t IDAProIndex = 0;
ea_t CurrAddr = IDAProLocs[IDAProIndex++];
while (DisasmIndex <= DisasmLocs.size()) {
// If the current address is less than the current
// external disasm address, we have the rare case in
// which IDA Pro has identified an address as code
// but the external disasm has not. Emit a warning
// message and go on to the next IDA address.
if (CurrAddr < CurrDisasmAddr) {
SMPInstr TempInstr(CurrAddr);
TempInstr.Analyze();
clc5q
committed
SMP_msg("Address %x is code in IDB but not in external disassembler: %s\n",
clc5q
committed
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
CurrAddr, TempInstr.GetDisasm());
if (IDAProIndex < IDAProLocs.size())
CurrAddr = IDAProLocs[IDAProIndex++];
else {
// Last IDA addr; might still process Disasm addrs
// after loop exit.
break;
}
}
else if (CurrAddr == CurrDisasmAddr) {
// If equal, no problem, we are moving through the
// code addresses in lockstep. Grab the next address
// from each source.
if (DisasmIndex < DisasmLocs.size()) {
CurrDisasmAddr = DisasmLocs[DisasmIndex++];
}
else {
++DisasmIndex; // cause loop exit; skip cleanup loop
}
if (IDAProIndex < IDAProLocs.size())
CurrAddr = IDAProLocs[IDAProIndex++];
else {
// Last IDA addr; might still process Disasm addrs
// after loop exit in cleanup loop.
break;
}
}
else {
// We must have CurrAddr > CurrDisasmAddr. That means
// IDA has jumped over some code addresses in
// DisasmLocs. We need to try to convert addresses
// to code until we can reach the current addr.
// For now, we will address only the case in which IDA
// has identified addresses as data bytes, and the
// external disassembler(e.g. objdump) has identified
// the same addresses as code. We only want to deal with
// contiguous areas of data-to-code conversion that do NOT
// follow a return statement.
int AreaSize = 0;
ea_t AreaStart = CurrDisasmAddr;
ea_t AreaEnd;
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("CurrDisasmAddr: %x CurrAddr: %x\n", CurrDisasmAddr, CurrAddr);
clc5q
committed
#endif
bool SkipArea = FindDataToConvert(IDAProIndex, DisasmIndex, AreaSize);
if (SkipArea) {
// Skip over the extra external disasm addresses.
while (CurrDisasmAddr < CurrAddr)
CurrDisasmAddr = DisasmLocs[DisasmIndex++];
}
else {
// Convert the overlooked code region to unexplored.
AreaEnd = CurrDisasmAddr + AreaSize;
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("Found data to convert: %x to %x\n", AreaStart, AreaEnd);
clc5q
committed
#endif
do_unknown_range(AreaStart, AreaSize, DOUNK_SIMPLE);
SMP_bounds_t ConvertRegion;
ConvertRegion.startEA = AreaStart;
ConvertRegion.endEA = AreaEnd;
FixupRegion CurrRegion(ConvertRegion);
CodeReanalyzeList.push_back(CurrRegion);
clc5q
committed
bool AllConverted = true;
bool AllNops = true;
clc5q
committed
do {
flags_t InstrFlags = getFlags(CurrDisasmAddr);
if (!isUnknown(InstrFlags)) {
clc5q
committed
SMP_msg("Sync problem in FixCodeID: %x\n", CurrDisasmAddr);
clc5q
committed
}
else {
int InstrLen = ua_code(CurrDisasmAddr);
int InstrLen = create_insn(CurrDisasmAddr);
clc5q
committed
if (InstrLen > 0) { // Successfully converted to code
SMPInstr NewInstr(CurrDisasmAddr);
NewInstr.Analyze();
if (!NewInstr.IsNop())
AllNops = false;
clc5q
committed
#if SMP_DEBUG_FIXUP_IDB
#if 0
clc5q
committed
SMP_msg("FixCodeID success at %x: len: %d %s\n", CurrDisasmAddr,
clc5q
committed
InstrLen, NewInstr.GetDisasm());
#endif
clc5q
committed
#endif
}
else {
if (MDPatchUnconvertedBytes(CurrDisasmAddr)) {
clc5q
committed
SMP_msg(" Patched bytes at %x\n", CurrDisasmAddr);
clc5q
committed
}
else {
CurrRegion.FixupInstrs.push_back(CurrDisasmAddr);
clc5q
committed
AllConverted = false;
clc5q
committed
SMP_msg("FixCodeID failure at %x\n", CurrDisasmAddr);
clc5q
committed
}
}
} // end if (isCode(InstrFlags) ... else ...
if (DisasmIndex < DisasmLocs.size()) {
CurrDisasmAddr = DisasmLocs[DisasmIndex++];
}
else {
// cause loops to exit
CurrDisasmAddr = CurrAddr;
++DisasmIndex; // skip cleanup loop
}
} while (CurrDisasmAddr < CurrAddr);
if (AllConverted && AllNops) {
// We want to convert the region back to unexplored bytes
// and take it off the work list. Regions that are all nops
// create data flow analysis problems sometimes. The region
// is often unreachable code and produces a basic block with
// no predecessors within a function. This often happens when
// an optimizing compiler uses nops as padding to align jump
// targets on cache line bounaries. With no fall through into
// the nops, they are unreachable and should be left as unknown.
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("FixCodeID nops region from %x to %x\n", CurrRegion.GetStart(),
CurrRegion.GetEnd());
do_unknown_range(CurrRegion.GetStart(),
CurrRegion.GetEnd() - CurrRegion.GetStart(), DOUNK_SIMPLE);
CodeReanalyzeList.pop_back();
}
clc5q
committed
} // end if (SkipArea) ... else ...
} // end if (addr < CurrDisasmAddr) .. else if ... else ...
} // end while (DisasmIndex <= DisasmLocs.size()
#if 0 // Make this code use FindDataToConvert() **!!**
// Cleanup loop:
// If there are still Disasm addrs to process, try to turn them
// into code in the IDB.
while (DisasmIndex <= DisasmLocs.size()) {
flags_t InstrFlags = getFlags(CurrDisasmAddr);
if (isCode(InstrFlags)) {
clc5q
committed
SMP_msg("Sync problem in FixCodeID: %x\n", CurrDisasmAddr);
clc5q
committed
}
else {
// Clear bytes to unexplored.
clc5q
committed
segment_t *seg = SMP_getseg(CurrDisasmAddr);
clc5q
committed
if (SEG_CODE == seg->type) {
do_unknown_range(CurrDisasmAddr, seg->endEA - CurrDisasmAddr, DOUNK_SIMPLE);
}
else {
// Might be safest to just discontinue processing
// if we wander into a non-code segment.
// DisasmLocs should not have an entire code segment
// that IDA Pro missed.
break;
}
int InstrLen = ua_code(CurrDisasmAddr);
if (InstrLen > 0) { // Successfully converted to code
SMPInstr NewInstr(CurrDisasmAddr);
NewInstr.Analyze();
clc5q
committed
SMP_msg("FixCodeID success at %x: %s\n", CurrDisasmAddr,
clc5q
committed
NewInstr.GetDisasm());
}
else {
clc5q
committed
SMP_msg("FixCodeID failure at %x\n", CurrDisasmAddr);
clc5q
committed
}
} // end if (isCode(InstrFlags) ... else ...
if (DisasmIndex < DisasmLocs.size()) {
CurrDisasmAddr = DisasmLocs[DisasmIndex++];
}
else {
++DisasmIndex; // cause loop to exit
}
} // end while (DisasmIndex <= DisasmLocs.size()
#endif
return;
} // end of FixCodeIdentification()
// Analyze instructions that could not be analyzed earlier and were placed on the CodeReanalyzeList.
// Earlier failures are usually because the instruction branches to an address that has not
// yet been converted from data to code, so ua_code() failed. Now that all data to code
// conversions have completed, ua_code() should succeed.
// Return the number of instructions successfully analyzed.
int FixupNewCodeChunks(void) {
list<FixupRegion>::iterator CurrRegion;
int changes = 0;
for (CurrRegion = CodeReanalyzeList.begin(); CurrRegion != CodeReanalyzeList.end(); ++CurrRegion) {
bool AllConverted = true;
bool AllNops = true;
bool NoFixups = (0 == CurrRegion->FixupInstrs.size());
if (NoFixups) {
CurrRegion->SetStart(BADADDR); // mark for removal
continue; // skip to next region
}
list<ea_t>::iterator CurrInstr;
for (CurrInstr = CurrRegion->FixupInstrs.begin(); CurrInstr != CurrRegion->FixupInstrs.end(); ++CurrInstr) {
#if IDA_SDK_VERSION < 600
int InstrLen = ua_code(*CurrInstr);
#else
int InstrLen = create_insn(*CurrInstr);
if (InstrLen > 0) { // Successfully converted to code
SMPInstr NewInstr(*CurrInstr);
NewInstr.Analyze();
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("FixupNewCodeChunks success at %x: len: %d\n", *CurrInstr, InstrLen);
#endif
if (!NewInstr.IsNop()) {
AllNops = false;
*CurrInstr = BADADDR; // mark for removal
}
}
else {
AllConverted = false;
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("FixupNewCodeChunks failure at %x\n", *CurrInstr);
#endif
}
} // end for all instrs in CurrRegion
if (AllConverted && !AllNops) {
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("FixupNewCodeChunks success for region from %x to %x\n",
CurrRegion->GetStart(), CurrRegion->GetEnd());
#endif
CurrRegion->SetStart(BADADDR); // mark for removal
}
else if (AllConverted && AllNops) {
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg("FixupNewCodeChunks re-converting nops region from %x to %x\n",
CurrRegion->GetStart(), CurrRegion->GetEnd());
#endif
do_unknown_range(CurrRegion->GetStart(),
CurrRegion->GetEnd() - CurrRegion->GetStart(), DOUNK_SIMPLE);
CurrRegion->SetStart(BADADDR); // mark for removal
}
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
else {
// Remove only the instructions that were fixed up.
CurrInstr = CurrRegion->FixupInstrs.begin();
while (CurrInstr != CurrRegion->FixupInstrs.end()) {
if (BADADDR == *CurrInstr) {
CurrInstr = CurrRegion->FixupInstrs.erase(CurrInstr);
}
else {
++CurrInstr;
}
}
}
} // end for all regions in the CodeReanalyzeList
// Remove completed regions from the CodeReanalyzeList
CurrRegion = CodeReanalyzeList.begin();
while (CurrRegion != CodeReanalyzeList.end()) {
if (BADADDR == CurrRegion->GetStart())
CurrRegion = CodeReanalyzeList.erase(CurrRegion);
else
++CurrRegion;
}
#if 0
if (AllConverted) {
if (IsFunctionPrologue(AreaStart, AreaEnd)) {
// Create a new function entry chunk here.
// **!!** TODO
;
}
else {
// Extend the previous chunk to include the
// converted code.
ea_t PrevIDAAddr = IDAProLocs[IDAProIndex - 2];
func_t *PrevChunk = get_fchunk(PrevIDAAddr);
#if SMP_DEBUG_FIXUP_IDB
clc5q
committed
SMP_msg(" addr in chunk to extend: %x\n", PrevIDAAddr);
SMP_msg(" func_t pointer for chunk: %x\n", PrevChunk);
#endif
#if 0 // temporary for debugging
if (is_func_entry(PrevChunk)) {
// Extend the func entry to contain the new code.
if (func_setend(PrevIDAAddr, AreaEnd)) {
clc5q
committed
SMP_msg("Func extended to include code from %x to %x\n",
AreaStart, AreaEnd);
FuncReanalyzeList.push_back(PrevIDAAddr);
}
else {
clc5q
committed
SMP_msg("Failed to extend func from %x to %x\n",
AreaStart, AreaEnd);
}
}
else { // tail
// See if this works for function tails, also.
// Extend the func entry to contain the new code.
if (func_setend(PrevIDAAddr, AreaEnd)) {
clc5q
committed
SMP_msg("Tail extended to include code from %x to %x\n",
AreaStart, AreaEnd);
func_t *TailOwner = get_func(PrevChunk->owner);
FuncReanalyzeList.push_back(PrevIDAAddr);
}
else {
clc5q
committed
SMP_msg("Failed to extend tail from %x to %x\n",
AreaStart, AreaEnd);
}
} // end if (is_func_entry()) ... else ...
#endif
} // end if (IsFunctionPrologue()) ... else ...
} // end if (AllConverted)
else {
clc5q
committed
SMP_msg("not AllConverted; cannot include new code in previous chunk.\n");
}
#endif
return changes;
} // end of FixupNewCodeChunnks()
// Audit the IDA code database by looking at all instructions in the
// code segment and printing all those that are not contained in a
// function. Emit the context-free annotations that we are able to
// emit on a per-instruction basis.
void FindOrphanedCode(segment_t *CurrSeg, FILE *AnnotFile, FILE *InfoAnnotFile) {
char disasm[MAXSTR];
for (ea_t addr = CurrSeg->startEA; addr < CurrSeg->endEA;
addr = get_item_end(addr)) {
flags_t InstrFlags = getFlags(addr);
clc5q
committed
if (isTail(InstrFlags))
continue;
if (isHead(InstrFlags) && isCode(InstrFlags)) {
func_t *CurrFunc = get_func(addr);
if (NULL == CurrFunc) {
SMPInstr CurrInst(addr);
CurrInst.Analyze();
clc5q
committed
SMP_msg("Orphan code at %x : %s\n", addr, CurrInst.GetDisasm());
// TODO: If there are code xrefs to the orphan code,
// see what kind. If a CALL, and orphan code looks
// like a prologue, make a function. If a JUMP of
// some kind, then make a function chunk and make
// it a tail of all functions that jump to it. **!!**
clc5q
committed
// Do machine-dependent fixes for DEF and USE lists.
// The fixes can help produce better annotations.
CurrInst.MDFixupDefUseLists();
// If instruction is still not included in a code chunk,
// emit annotations for it in isolation.
CurrInst.EmitAnnotations(true, false, true, AnnotFile, InfoAnnotFile);
}
}
else if (isUnknown(InstrFlags)) {
clc5q
committed
SMP_msg("Unanalyzed byte at %x\n", addr);
// Can IDA analyze this to be code?
int InstrLen;
#if IDA_SDK_VERSION < 600
InstrLen = ua_code(addr);
#else
InstrLen = create_insn(addr);
#endif
bool IDAsuccess = generate_disasm_line(addr, disasm, sizeof(disasm) - 1);
if (IDAsuccess) {
// Remove interactive color-coding tags.
ssize_t StringLen = tag_remove(disasm, disasm, 0);
if (-1 >= StringLen) {
clc5q
committed
SMP_msg("ERROR: tag_remove failed at addr %x \n", addr);
clc5q
committed
SMP_msg("Successfully analyzed! %s\n", disasm);
SMPInstr UnknownInstr(addr);
UnknownInstr.Analyze();
// TODO: Get new code into a chunk. **!!**
// If instruction is still not included in a code chunk,
// emit annotations for it in isolation.
UnknownInstr.EmitAnnotations(true, false, true, AnnotFile, InfoAnnotFile);
clc5q
committed
SMP_msg("ERROR: generate_disasm_line failed at addr %x \n", addr);
}
}
} // end for (ea_t addr = CurrSeg->startEA; ...)
} // end of FindOrphanedCode()
// Version of FindOrphanedCode that does not emit annotations but can be used
// to determine at what point in time code becomes orphaned.
void Debug_FindOrphanedCode(segment_t *CurrSeg, bool FirstRun) {
ea_t DebugAddr = 0x8050db0;
for (ea_t addr = CurrSeg->startEA; addr < CurrSeg->endEA;
addr = get_item_end(addr)) {
flags_t InstrFlags = getFlags(addr);
if (isHead(InstrFlags) && isCode(InstrFlags)) {
func_t *CurrFunc = get_func(addr);
if (NULL == CurrFunc) { // Code not in a func; orphaned
pair<set<ea_t>::iterator, bool> pairib;
pairib = CodeOrphans.insert(addr);
if (DebugAddr == addr) {
clc5q
committed
SMP_msg("DEBUG: Orphaned code addr %x found.\n", addr);
}
if ((!FirstRun) && (pairib.second)) {
clc5q
committed
SMP_msg("SERIOUS WARNING: Newly orphaned code at %x \n", addr);
}
}
}
} // end for (ea_t addr = CurrSeg->startEA; ...)
} // end of Debug_FindOrphanedCode()
// Audit the IDA database with respect to branches and calls. They should
// each have valid code targets (not data or unknown bytes) and the code
// cross references should reflect the linkage.
void AuditCodeTargets(void) {
// Cover all the code that IDA has grouped into functions by iterating
// through all function chunks in the program.
size_t NumChunks = get_fchunk_qty();
for (size_t ChunkIndex = 0; ChunkIndex < NumChunks; ++ChunkIndex) {
func_t *ChunkInfo = getn_fchunk((int) ChunkIndex);
char FuncName[MAXSTR];
get_func_name(ChunkInfo->startEA, FuncName, sizeof(FuncName) - 1);
// First, see if any calls to this function (if this chunk is
// an entry point) are not coming from within functions.
if (is_func_entry(ChunkInfo)) {
clc5q
committed
SMP_xref_t xb;
ea_t addr = ChunkInfo->startEA;
clc5q
committed
for (bool ok = xb.SMP_first_to(addr, XREF_ALL); ok; ok = xb.SMP_next_to()) {
uchar XrefType = xb.GetType() & XREF_MASK;
if (xb.GetIscode()) {
if ((XrefType == fl_U) || (XrefType == fl_USobsolete)) {
clc5q
committed
SMP_msg("Bad xref type: %x %s\n", addr, FuncName);
clc5q
committed
#if SMP_DEBUG_FIXUP_IDB
else if ((XrefType == fl_JF) || (XrefType == fl_JN)) {
clc5q
committed
SMP_msg("Jump to func: %x %s from: %x\n",
addr, FuncName, xb.GetFrom());
clc5q
committed
#endif
clc5q
committed
SMP_msg("Fall through to func: %x %s from: %x\n",
addr, FuncName, xb.GetFrom());
}
else if ((XrefType == fl_CF) || (XrefType == fl_CN)) {
// Far call or Near call
clc5q
committed
func_t *CallingFunc = get_func(xb.GetFrom());
clc5q
committed
SMP_msg("Call to %x Func %s from %x not in function.\n",
addr, FuncName, xb.GetFrom());
clc5q
committed
} // end if (xb.GetIscode())
else { // DATA xref
if (XrefType == dr_O) {
clc5q
committed
SMP_msg("Data xref to %x Func %s from %x\n",
addr, FuncName, xb.GetFrom());
clc5q
committed
SMP_msg("Strange data xref %d to %x Func %s from %x\n",
XrefType, addr, FuncName, xb.GetFrom());
clc5q
committed
} // end for (bool ok = xb.SMP_first_to(); ...)
} // end if (is_func_entry(ChunkInfo))
// Next, see if any call or branch in this chunk references
// a target address that is not in a function. If so, and the
// callee address code looks like a function prologue, then
// create a function for the contiguous code starting at that
// address and ask IDA to analyze it and store it in the
// IDA database. If it is a branch target, not a call target,
// create a new TAIL chunk for the current parent functions.
for (ea_t addr = ChunkInfo->startEA; addr < ChunkInfo->endEA;
addr = get_item_end(addr)) {
flags_t InstrFlags = getFlags(addr);
if (isCode(InstrFlags) && isHead(InstrFlags)) {
SMPInstr CurrInst(addr);
CurrInst.Analyze();
if ((CALL|JUMP|COND_BRANCH) & CurrInst.GetDataFlowType()) {
clc5q
committed
SMP_xref_t xb;
for (bool ok = xb.SMP_first_from(addr, XREF_FAR); ok; ok = xb.SMP_next_from()) {
if (xb.GetIscode()) {
ea_t FirstAddr = xb.GetTo();
func_t *FuncInfo = get_func(FirstAddr);
if (NULL == FuncInfo) {
// Found call to addr that is not in a func.
// Find limits of contiguous code starting at FirstAddr.
clc5q
committed
ea_t LastAddr = FindNewFuncLimit(FirstAddr);
if (CALL == CurrInst.GetDataFlowType())
clc5q
committed
SMP_msg("Found new func from %x to %x\n",
FirstAddr, LastAddr);
else
clc5q
committed
SMP_msg("Found new chunk from %x to %x\n",
FirstAddr, LastAddr);
}
}
}
}
}
}
} // end for (size_t ChunkIndex = 0; ... )
return;
} // end of AuditCodeTargets()
// Find the span of contiguous code that is not contained within any
// function, starting at StartAddr, which should already be an example
// of an instruction address that is outside of a function.
ea_t FindNewFuncLimit(ea_t StartAddr) {
ea_t LimitAddr = StartAddr;
clc5q
committed
segment_t *seg = SMP_getseg(StartAddr);
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
if (NULL == seg)
return LimitAddr;
ea_t SegLimit = seg->endEA;
for (ea_t addr = get_item_end(StartAddr); addr < SegLimit; addr = get_item_end(addr)) {
flags_t InstrFlags = getFlags(addr);
if (isCode(InstrFlags) && isHead(InstrFlags)) {
LimitAddr = addr;
func_t *FuncInfo = get_func(addr);
if (NULL != FuncInfo)
break; // ran into an existing function
}
else // Not a code head; time to stop.
break;
}
return LimitAddr;
} // end of FindNewFuncLimit()
void SpecialDebugOutput(void) {
char disasm[MAXSTR];
vector<ea_t> ProblemAddrs;
ProblemAddrs.push_back(0x8066d08);
bool IDAsuccess;
int InstLen;
ssize_t StringLen;
clc5q
committed
insn_t LocalCmd;
ulong LocalFeatures;
for (size_t index = 0; index < ProblemAddrs.size(); ++index) {
ea_t addr = ProblemAddrs[index];
flags_t InstrFlags = getFlags(addr);
if (isCode(InstrFlags) && isHead(InstrFlags)) {
clc5q
committed
IDAsuccess = SMPGetCmd(addr, LocalCmd, LocalFeatures);
InstLen = (int) LocalCmd.size;
if ((IDAsuccess) && (0 < InstLen)) {
IDAsuccess = generate_disasm_line(addr, disasm, sizeof(disasm) - 1);
if (IDAsuccess) {
StringLen = tag_remove(disasm, disasm, 0);
if (-1 < StringLen)
clc5q
committed
SMP_msg("Problem addr %x : %s\n", addr, disasm);
clc5q
committed
SMP_msg("ERROR: tag_remove failed at addr %x \n", addr);
clc5q
committed
SMP_msg("ERROR: generate_disasm_line failed at addr %x \n", addr);
clc5q
committed
SMP_msg("ERROR: decode_insn failed at addr %x \n", addr);
}
}
return;
} // end of SpecialDebugOutput()
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
// Convert a call type string from the policy file, such as "FILECALLS", to the
// corresponding ZST_SysCallType, such as ZST_FILE_CALL.
ZST_SysCallType ConvertStringToCallType(char *Str2) {
ZST_SysCallType ReturnVal;
if (0 == strcmp("PRIVILEGECALLS", Str2)) {
ReturnVal = ZST_HIGHPRIVILEGE_CALL;
}
else if (0 == strcmp("FILECALLS", Str2)) {
ReturnVal = ZST_FILE_CALL;
}
else if (0 == strcmp("NETWORKCALLS", Str2)) {
ReturnVal = ZST_NETWORK_CALL;
}
else {
ReturnVal = ZST_UNMONITORED_CALL;
}
return ReturnVal;
} // end of ConvertStringToCallType()
// Convert a policy string from the policy file, such as "DISALLOW", to
// the corresponding ZST_Policy value, such as ZST_DISALLOW.
ZST_Policy ConvertStringToPolicy(char *Str3) {
ZST_Policy ReturnVal;
if (0 == strcmp("DISALLOW", Str3)) {
ReturnVal = ZST_DISALLOW;
}
else if (0 == strcmp("WHITELIST", Str3)) {
ReturnVal = ZST_WHITELIST;
}
else if (0 == strcmp("BLACKLIST", Str3)) {
ReturnVal = ZST_BLACKLIST;
}
else { // error handling precedes calls to this function
ReturnVal = ZST_ALLOWALL;
}
return ReturnVal;
} // end of ConvertStringToPolicy()
// Given a function name, return its Zephyr Security Toolkit call type.
ZST_SysCallType GetCallTypeFromFuncName(string SysCallName) {
ZST_SysCallType ReturnVal;
map<string, ZST_SysCallType>::iterator FindIter = ZST_FuncTypeMap.find(SysCallName);
if (FindIter == ZST_FuncTypeMap.end()) { // not found; might not even be system call
ReturnVal = ZST_UNMONITORED_CALL;
}
else {
ReturnVal = FindIter->second;
}
return ReturnVal;
} // end of GetCallTypeFromFuncName()
// Get the user-specified security policy for the given call type.
ZST_Policy GetPolicyFromCallType(ZST_SysCallType CallType) {
ZST_Policy ReturnVal;
map<ZST_SysCallType, ZST_Policy>::iterator FindIter = ZST_TypePolicyMap.find(CallType);
if (FindIter == ZST_TypePolicyMap.end()) {
// Policy not found; default to ALLOW_ALL
ReturnVal = ZST_ALLOWALL;
}
else {
ReturnVal = FindIter->second;
}
return ReturnVal;
} // end of GetPolicyFromCallType()
// Given a call type and called function name, is it on the location whitelist
// for that call type?
// NOTE: HANDLE CASE IN WHICH WHITELISTED LOCATION IS A PREFIX, TERMINATING in a slash.
bool IsLocationWhitelisted(ZST_SysCallType CallType, string LocationName) {
set<string>::iterator FindIter;
bool ReturnVal;
if (CallType == ZST_FILE_CALL) {
FindIter = ZST_FileLocWhitelist.find(LocationName);
ReturnVal = (FindIter != ZST_FileLocWhitelist.end());
}
else if (CallType == ZST_NETWORK_CALL) {
FindIter = ZST_NetworkLocWhitelist.find(LocationName);
ReturnVal = (FindIter != ZST_NetworkLocWhitelist.end());
}
else { // should not be here
ReturnVal = false;
}
return ReturnVal;
} // end of IsLocationWhitelisted()
// Given a call type and called function name, is it on the location blacklist
// for that call type?
// NOTE: HANDLE CASE IN WHICH BLACKLISTED LOCATION IS A PREFIX, TERMINATING in a slash.
bool IsLocationBlacklisted(ZST_SysCallType CallType, string LocationName) {
set<string>::iterator FindIter;
bool ReturnVal;
if (CallType == ZST_FILE_CALL) {
FindIter = ZST_FileLocBlacklist.find(LocationName);
ReturnVal = (FindIter != ZST_FileLocBlacklist.end());
}
else if (CallType == ZST_NETWORK_CALL) {
FindIter = ZST_NetworkLocBlacklist.find(LocationName);
ReturnVal = (FindIter != ZST_NetworkLocBlacklist.end());
}
else { // should not be here
ReturnVal = false;
}
return ReturnVal;
}
// These two constants should agree with their counterparts in ZST-policy.c.
#define ZST_MAX_FILE_NAME_LEN 1024
#define ZST_MAX_CALL_NAME_LEN 64
// Read the foo.exe.policy file to initialize our security policies for system calls.
void ZST_InitPolicies(const char *PolicyFileName) {
clc5q
committed
FILE *PolicyFile = SMP_fopen(PolicyFileName, "r");
char Str1[ZST_MAX_CALL_NAME_LEN], Str2[ZST_MAX_CALL_NAME_LEN], Str3[ZST_MAX_FILE_NAME_LEN];
if (NULL != PolicyFile) {
clc5q
committed
while (!SMP_feof(PolicyFile)) {
int ItemsRead = qfscanf(PolicyFile, "%63s %63s %1023s", Str1, Str2, Str3);
if (3 != ItemsRead) {
clc5q
committed
SMP_msg("ERROR: Line in %s had %d items instead of the required 3; line ignored.\n", PolicyFileName, ItemsRead);