Skip to content
Snippets Groups Projects
SMPStaticAnalyzer.cpp 140 KiB
Newer Older
				else {
					func_t *FuncInfo = get_func(ChunkInfo->owner);
					get_func_name(FuncInfo->startEA, FuncName, sizeof(FuncName) - 1);
#if SMP_DEBUG_CHUNKS
					SMP_msg("No good parent candidate before tail at %x\n",
					SMP_msg("Current parent is %x: %s\n", FuncInfo->startEA, FuncName);
#endif
					// Find out if a function entry chunk that comes before the
					//  tail is a better candidate for the owner (i.e. it falls
					//  through to the tail, or jumps to it).
					BestCandidate = 0;
#if SMP_DEBUG_CHUNKS
					SMP_msg("Finding parent func candidates for %x:", ChunkInfo->startEA);
#endif
					SMP_bounds_t CurrFunc;
					for (size_t FuncIndex = 0; FuncIndex < FuncBounds.size(); ++FuncIndex) {
						CurrFunc = FuncBounds[FuncIndex];
						if ((CurrFunc.startEA < ChunkInfo->startEA)
							&& (CurrFunc.startEA > BestCandidate)) {
							BestCandidate = CurrFunc.startEA;
#if SMP_DEBUG_CHUNKS
							SMP_msg(" candidate: %x tail: %x", BestCandidate,
							SMP_msg(" not a candidate: %x tail: %x best: %x\n",
								CurrFunc.startEA, ChunkInfo->startEA, BestCandidate);
#endif
							break;
						}
					} // end for (size_t FuncIndex = 0; ...)
					if (0 >= BestCandidate) { // highly unlikely
						SMP_msg("No good func entry parent candidate.\n");
					}
					else {
						FuncInfo = get_func(BestCandidate);
						get_func_name(FuncInfo->startEA, FuncName, sizeof(FuncName) - 1);
#if SMP_DEBUG_CHUNKS
						SMP_msg("Best func entry parent candidate: %s at %x",
							FuncName, BestCandidate);
						if (FuncInfo->endEA == ChunkInfo->startEA)
							SMP_msg(" Function endEA == tail chunk startEA");
						SMP_msg("\n");
#endif
					}
				}
			} // end if (ChunkInfo->owner != BestCandidate)
#if SMP_DEBUG_CHUNKS
			else {
				SMP_msg("Already best parent for %x is %x\n", ChunkInfo->startEA,
					ChunkInfo->owner);
			}
#endif
		} // end if (is_func_tail(ChunkInfo))
	} // end for (size_t ChunkIndex = 0; ...)

	return;
} // end of AuditTailChunkOwnership()

// If the addresses signified from DisasmIndex to IDAProIndex are
//  all considered data and do NOT follow a return instruction,
//  return false and update AreaSize to reflect the area to be
//  converted.
// Return value: true -> skip to IDAProIndex; false -> convert AreaSize bytes.
bool FindDataToConvert(size_t IDAProIndex, size_t DisasmIndex, int &AreaSize) {
	ea_t PrevIDAAddr;
	ea_t NextIDAAddr;
	size_t ShadowDisasmIndex = DisasmIndex - 1;
	ea_t DisasmAddr = DisasmLocs[ShadowDisasmIndex];
	bool CannotConvert = false;  // return value
	bool DebugAddress = false;
#if SMP_DEBUG_FIXUP_IDB
	DebugAddress = (DisasmAddr == 0x806c19a);
#endif

	if (DebugAddress) {
		SMP_msg("IDAProIndex: %zu DisasmIndex: %zu\n", IDAProIndex, DisasmIndex);
		SMP_msg("IDA locs size %zu Disasm locs size %zu\n", IDAProLocs.size(),
			DisasmLocs.size());
	}
	if (IDAProIndex >= IDAProLocs.size()) {
		// Have already processed the last IDA address.
		if (DebugAddress) SMP_msg(" Already done with IDAProLocs.\n");
		return true;
	}
	else if (DisasmIndex >= DisasmLocs.size()) {
		// Strange. Last Disasm address is only one to convert, and
		//  IDA still has addresses after that?
		if (DebugAddress) SMP_msg(" Already done with DisasmLocs.\n");
		return true;
	}
	else if (IDAProIndex < 2) {
		// We have Disasm addrs before the very first IDA addr. We
		//  don't trust this boundary case.
		if (DebugAddress) SMP_msg(" Boundary case with IDAProLocs.\n");
		return true;
	}
	NextIDAAddr = IDAProLocs[IDAProIndex - 1];
	PrevIDAAddr = IDAProLocs[IDAProIndex - 2];
	if (DebugAddress) SMP_msg(" PrevIDAAddr: %x NextIDAAddr: %x\n", PrevIDAAddr, NextIDAAddr);

	// See if previous IDA address was a return.
	flags_t PrevFlags = getFlags(PrevIDAAddr);
	if (!isCode(PrevFlags) || !isHead(PrevFlags)) {
		SMP_msg("PrevIDAAddr %x not isCode or not isHead.\n", PrevIDAAddr);
		return true;
	}
	SMPInstr PrevInstr(PrevIDAAddr);
	PrevInstr.Analyze();
	if (DebugAddress) SMP_msg("Finished PrevInstr.Analyze()\n");
	if (PrevInstr.MDIsReturnInstr()) {
		// Right after a return come no-ops and 2-byte no-ops
		//  that are just for alignment. IDA does not seem to be
		//  happy when we convert all those to code.
		if (DebugAddress) SMP_msg(" Data followed a return instruction.\n");
		return true;
	}
	// Now, see if the area from DisasmAddr to NextIDAAddr is all data
	//  according to IDA.
	while (DisasmAddr < NextIDAAddr) {
		flags_t DataFlags = getFlags(DisasmAddr);
		if (isTail(DataFlags)) {
			if (DebugAddress) SMP_msg(" tail byte: %x\n", DisasmAddr);
			DisasmAddr = get_item_end(DisasmAddr);
		}
		else if (isData(DataFlags)) {
			if (DebugAddress) SMP_msg(" data byte: %x\n", DisasmAddr);
			DisasmAddr = get_item_end(DisasmAddr);
		}
		else if (isCode(DataFlags)) {
			// How could this ever happen?
			if (DebugAddress) SMP_msg(" isCode: %x\n", DisasmAddr);
			return true;
		}
		else { // must be isUnknown()
			// Very conservative here; only want to convert when the whole
			//  region is data, because that is a symptom of IDA missing
			//  a piece of code within a function (usually a piece of code
			//  that is only reachable via an indirect jump).
			if (DebugAddress) SMP_msg(" Not isData: %x\n", DisasmAddr);
		if (DebugAddress) SMP_msg(" new DisasmAddr: %x\n", DisasmAddr);
	if (DebugAddress) SMP_msg(" loop exit CannotConvert: %d\n", CannotConvert);
	if (!CannotConvert) {
		// Success.
		DisasmAddr = DisasmLocs[ShadowDisasmIndex];
		AreaSize = NextIDAAddr - DisasmAddr;
		if (DebugAddress) { 
			SMP_msg(" Success! AreaSize: %x Old index: %zu new index: %zu\n",
			SMP_msg(" exiting FindDataToConvert()\n");
			SMP_msg("\n");
		}
	} // end if (!CannotConvert)
	return CannotConvert;
} // end of FindDataToConvert()

// Does a converted code region look like a function prologue? If so,
//  we should not include it in the previous function.
bool IsFunctionPrologue(ea_t StartAddr, ea_t EndAddr) {
	return false;  // **!!** TODO 
} // end of IsFunctionPrologue()

// Patch program bytes that could not be converted from
//  data to code, if it can be determined that the bytes represent code
//  that IDA has a hard time with.
// Currently limited to finding "call near ptr 0" instructions, which
//  often are found in optimized glibc code because gcc was able to
//  determine that a function pointer was zero and did constant propagation,
//  but unfortunately was not able to determine that the code was unreachable.
//  IDA will not succeed in ua_code() for "call 0", but there is no danger
//  of a working program ever executing this code. Replacing the call with
//  no-ops permits us to continue converting a contiguous range of data to
//  code, and permits IDA to reanalyze the function later.
// Returns true if program bytes were patched.
bool MDPatchUnconvertedBytes(ea_t CurrDisasmAddr) {
	flags_t AddrFlags = getFlags(CurrDisasmAddr);
	if (isData(AddrFlags) || isTail(AddrFlags)) {
		// Bytes should have been converted to unknown already.
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
		SMP_msg("Cannot patch data bytes or tail bytes at %x\n", CurrDisasmAddr);
clc5q's avatar
clc5q committed
#endif
		return false;
	}
	SMPInstr PatchInstr(CurrDisasmAddr);
	PatchInstr.Analyze();
	int InstrLen = PatchInstr.GetCmd().size;
	if (0 >= InstrLen) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
		SMP_msg("decode_insn() failed on patch location %x\n", CurrDisasmAddr);
clc5q's avatar
clc5q committed
#endif
		return false;
	}
	else {
		if (PatchInstr.GetCmd().itype != NN_call) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
			SMP_msg("Cannot patch non-call instruction at %x\n", CurrDisasmAddr);
clc5q's avatar
clc5q committed
#endif
		op_t CallDest = PatchInstr.GetFirstUse()->GetOp();
		if ((o_near != CallDest.type) || (0 != CallDest.addr)) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
			SMP_msg("Cannot patch call unless it is call near ptr 0 at %x",
clc5q's avatar
clc5q committed
#endif
			return false;
		}
		ea_t PatchAddr = CurrDisasmAddr;
		for (int i = 0; i < InstrLen; ++i) {
			bool ok = patch_byte(PatchAddr, 0x90);  // x86 no-op
			if (!ok) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
				SMP_msg("patch_byte() failed at %x\n", PatchAddr);
clc5q's avatar
clc5q committed
#endif
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
		SMP_msg("Patched %d bytes successfully at %x\n", InstrLen, CurrDisasmAddr);
clc5q's avatar
clc5q committed
#endif
#if IDA_SDK_VERSION < 600
		InstrLen = ua_code(CurrDisasmAddr);
#else
		InstrLen = create_insn(CurrDisasmAddr);
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
			SMP_msg(" ... but ua_code() still failed!\n");
clc5q's avatar
clc5q committed
#endif
			return false;
		}
	} // end if (0 >= InstrLen) ... else ...
	return true;
} // end of MDPatchUnconvertedBytes()

// Use the lists of code addresses identified by IDA Pro (in IDAProLocs)
//  and an external disassembler (in DisasmLocs). Compare the lists and
//  try to convert addresses to code that are found in DisasmLocs but
//  not in IDAProLocs. Emit warnings when IDAProLocs has a code address
//  not found in DisasmLocs.
void FixCodeIdentification(void) {
	size_t DisasmIndex = 0;
	ea_t CurrDisasmAddr = DisasmLocs[DisasmIndex++];
	size_t IDAProIndex = 0;
	ea_t CurrAddr = IDAProLocs[IDAProIndex++];

	while (DisasmIndex <= DisasmLocs.size()) {
		// If the current address is less than the current
		//  external disasm address, we have the rare case in
		//  which IDA Pro has identified an address as code
		//  but the external disasm has not. Emit a warning
		//  message and go on to the next IDA address.
		if (CurrAddr < CurrDisasmAddr) {
			SMPInstr TempInstr(CurrAddr);
			TempInstr.Analyze();
			SMP_msg("Address %x is code in IDB but not in external disassembler: %s\n",
				CurrAddr, TempInstr.GetDisasm());
			if (IDAProIndex < IDAProLocs.size())
				CurrAddr = IDAProLocs[IDAProIndex++];
			else {
				// Last IDA addr; might still process Disasm addrs
				//  after loop exit.
				break;
			}
		}
		else if (CurrAddr == CurrDisasmAddr) {
			// If equal, no problem, we are moving through the
			//  code addresses in lockstep. Grab the next address
			//  from each source.
			if (DisasmIndex < DisasmLocs.size()) {
				CurrDisasmAddr = DisasmLocs[DisasmIndex++];
			}
			else {
				++DisasmIndex;  // cause loop exit; skip cleanup loop
			}
			if (IDAProIndex < IDAProLocs.size())
				CurrAddr = IDAProLocs[IDAProIndex++];
			else {
				// Last IDA addr; might still process Disasm addrs
				//  after loop exit in cleanup loop.
				break;
			}
		}
		else {
			// We must have CurrAddr > CurrDisasmAddr. That means
			//  IDA has jumped over some code addresses in
			//  DisasmLocs. We need to try to convert addresses
			//  to code until we can reach the current addr.
			int InstrLen;
			// For now, we will address only the case in which IDA
			//  has identified addresses as data bytes, and the
			//  external disassembler(e.g. objdump) has identified
			//  the same addresses as code. We only want to deal with
			//  contiguous areas of data-to-code conversion that do NOT
			//  follow a return statement.
			int AreaSize = 0;
			ea_t AreaStart = CurrDisasmAddr;
			ea_t AreaEnd;
#if SMP_DEBUG_FIXUP_IDB
			SMP_msg("CurrDisasmAddr: %x  CurrAddr: %x\n", CurrDisasmAddr, CurrAddr);
#endif
			bool SkipArea = FindDataToConvert(IDAProIndex, DisasmIndex, AreaSize);
			if (SkipArea) {
				// Skip over the extra external disasm addresses.
				while (CurrDisasmAddr < CurrAddr)
					CurrDisasmAddr = DisasmLocs[DisasmIndex++];
			}
			else { 
				// Convert the overlooked code region to unexplored.
				AreaEnd = CurrDisasmAddr + AreaSize;
#if SMP_DEBUG_FIXUP_IDB
				SMP_msg("Found data to convert: %x to %x\n", AreaStart, AreaEnd);
#endif
				do_unknown_range(AreaStart, AreaSize, DOUNK_SIMPLE);
				SMP_bounds_t ConvertRegion;
				ConvertRegion.startEA = AreaStart;
				ConvertRegion.endEA = AreaEnd;
				FixupRegion CurrRegion(ConvertRegion);
				CodeReanalyzeList.push_back(CurrRegion);
				do {
					flags_t InstrFlags = getFlags(CurrDisasmAddr);
					if (!isUnknown(InstrFlags)) {
						SMP_msg("Sync problem in FixCodeID: %x\n", CurrDisasmAddr);
#if IDA_SDK_VERSION < 600
						InstrLen = ua_code(CurrDisasmAddr);
#else
						InstrLen = create_insn(CurrDisasmAddr);
						if (InstrLen > 0) { // Successfully converted to code
							SMPInstr NewInstr(CurrDisasmAddr);
							NewInstr.Analyze();
							SMP_msg("FixCodeID success at %x: len: %d %s\n", CurrDisasmAddr,
#endif
						}
						else {
							if (MDPatchUnconvertedBytes(CurrDisasmAddr)) {
clc5q's avatar
clc5q committed
								;
#if SMP_DEBUG_FIXUP_IDB
								SMP_msg(" Patched bytes at %x\n", CurrDisasmAddr);
clc5q's avatar
clc5q committed
#endif
								CurrRegion.FixupInstrs.push_back(CurrDisasmAddr);
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
								SMP_msg("FixCodeID failure at %x\n", CurrDisasmAddr);
clc5q's avatar
clc5q committed
#endif
							}
						}
					} // end if (isCode(InstrFlags) ... else ...
					if (DisasmIndex < DisasmLocs.size()) {
						CurrDisasmAddr = DisasmLocs[DisasmIndex++];
					}
					else {
						// cause loops to exit
						CurrDisasmAddr = CurrAddr;
						++DisasmIndex; // skip cleanup loop
					}
				} while (CurrDisasmAddr < CurrAddr);
				if (AllConverted && AllNops) {
					// We want to convert the region back to unexplored bytes
					//  and take it off the work list. Regions that are all nops
					//  create data flow analysis problems sometimes. The region
					//  is often unreachable code and produces a basic block with
					//  no predecessors within a function. This often happens when
					//  an optimizing compiler uses nops as padding to align jump
					//  targets on cache line bounaries. With no fall through into
					//  the nops, they are unreachable and should be left as unknown.
					SMP_msg("FixCodeID nops region from %x to %x\n", CurrRegion.GetStart(),
					do_unknown_range(CurrRegion.GetStart(),
						CurrRegion.GetEnd() - CurrRegion.GetStart(), DOUNK_SIMPLE);
					CodeReanalyzeList.pop_back();
				}
			} // end if (SkipArea) ... else ...
		} // end if (addr < CurrDisasmAddr) .. else if ... else ...
	} // end while (DisasmIndex <= DisasmLocs.size()

#if 0  // Make this code use FindDataToConvert()  **!!**
	// Cleanup loop:
	// If there are still Disasm addrs to process, try to turn them
	//  into code in the IDB.
	while (DisasmIndex <= DisasmLocs.size()) {
		flags_t InstrFlags = getFlags(CurrDisasmAddr);
		if (isCode(InstrFlags)) {
			SMP_msg("Sync problem in FixCodeID: %x\n", CurrDisasmAddr);
			segment_t *seg = SMP_getseg(CurrDisasmAddr);
			if (SEG_CODE == seg->type) {
				do_unknown_range(CurrDisasmAddr, seg->endEA - CurrDisasmAddr, DOUNK_SIMPLE);
			}
			else {
				// Might be safest to just discontinue processing
				//  if we wander into a non-code segment.
				//  DisasmLocs should not have an entire code segment
				//  that IDA Pro missed.
				break;
			}
			int InstrLen = ua_code(CurrDisasmAddr);
			if (InstrLen > 0) { // Successfully converted to code
				SMPInstr NewInstr(CurrDisasmAddr);
				NewInstr.Analyze();
				SMP_msg("FixCodeID success at %x: %s\n", CurrDisasmAddr,
				SMP_msg("FixCodeID failure at %x\n", CurrDisasmAddr);
			}
		} // end if (isCode(InstrFlags) ... else ...
		if (DisasmIndex < DisasmLocs.size()) {
			CurrDisasmAddr = DisasmLocs[DisasmIndex++];
		}
		else {
			++DisasmIndex; // cause loop to exit
		}
	} // end while (DisasmIndex <= DisasmLocs.size()
#endif

	return;
} // end of FixCodeIdentification()
// Analyze instructions that could not be analyzed earlier and were placed on the CodeReanalyzeList.
//  Earlier failures are usually because the instruction branches to an address that has not
//  yet been converted from data to code, so ua_code() failed. Now that all data to code
//  conversions have completed, ua_code() should succeed.
// Return the number of instructions successfully analyzed.
int FixupNewCodeChunks(void) {
	list<FixupRegion>::iterator CurrRegion;
	int changes = 0;
	for (CurrRegion = CodeReanalyzeList.begin(); CurrRegion != CodeReanalyzeList.end(); ++CurrRegion) {
		bool AllConverted = true;
		bool AllNops = true;
		bool NoFixups = (0 == CurrRegion->FixupInstrs.size());
		if (NoFixups) {
			CurrRegion->SetStart(BADADDR);  // mark for removal
			continue;  // skip to next region
		}
		list<ea_t>::iterator CurrInstr;
		for (CurrInstr = CurrRegion->FixupInstrs.begin(); CurrInstr != CurrRegion->FixupInstrs.end(); ++CurrInstr)  {
#if IDA_SDK_VERSION < 600
			int InstrLen = ua_code(*CurrInstr);
#else
			int InstrLen = create_insn(*CurrInstr);
			if (InstrLen > 0) { // Successfully converted to code
				SMPInstr NewInstr(*CurrInstr);
				NewInstr.Analyze();
#if SMP_DEBUG_FIXUP_IDB
				SMP_msg("FixupNewCodeChunks success at %x: len: %d\n", *CurrInstr, InstrLen);
					AllNops = false;
					*CurrInstr = BADADDR; // mark for removal
				}
			}
			else {
				AllConverted = false;
#if SMP_DEBUG_FIXUP_IDB
				SMP_msg("FixupNewCodeChunks failure at %x\n", *CurrInstr);
#endif
			}
		} // end for all instrs in CurrRegion
		if (AllConverted && !AllNops) {
			SMP_msg("FixupNewCodeChunks success for region from %x to %x\n",
				CurrRegion->GetStart(), CurrRegion->GetEnd());
#endif
			CurrRegion->SetStart(BADADDR); // mark for removal
		}
		else if (AllConverted && AllNops) {
#if SMP_DEBUG_FIXUP_IDB
			SMP_msg("FixupNewCodeChunks re-converting nops region from %x to %x\n",
				CurrRegion->GetStart(), CurrRegion->GetEnd());
#endif
			do_unknown_range(CurrRegion->GetStart(),
				CurrRegion->GetEnd() - CurrRegion->GetStart(), DOUNK_SIMPLE);
			CurrRegion->SetStart(BADADDR); // mark for removal
		}
		else {
			// Remove only the instructions that were fixed up.
			CurrInstr = CurrRegion->FixupInstrs.begin(); 
			while (CurrInstr != CurrRegion->FixupInstrs.end()) {
				if (BADADDR == *CurrInstr) {
					CurrInstr = CurrRegion->FixupInstrs.erase(CurrInstr);
				}
				else {
					++CurrInstr;
				}
			}
		}
	} // end for all regions in the CodeReanalyzeList

	// Remove completed regions from the CodeReanalyzeList
	CurrRegion = CodeReanalyzeList.begin();
	while (CurrRegion != CodeReanalyzeList.end()) {
		if (BADADDR == CurrRegion->GetStart())
			CurrRegion = CodeReanalyzeList.erase(CurrRegion);
		else
			++CurrRegion;
	}

#if 0
	if (AllConverted) {
					if (IsFunctionPrologue(AreaStart, AreaEnd)) {
						// Create a new function entry chunk here.
						//  **!!** TODO
						;
					}
					else {
						// Extend the previous chunk to include the
						//  converted code.
						ea_t PrevIDAAddr = IDAProLocs[IDAProIndex - 2];
						func_t *PrevChunk = get_fchunk(PrevIDAAddr);
#if SMP_DEBUG_FIXUP_IDB
						SMP_msg(" addr in chunk to extend: %x\n", PrevIDAAddr);
						SMP_msg(" func_t pointer for chunk: %x\n", PrevChunk);
#endif
#if 0  // temporary for debugging
						if (is_func_entry(PrevChunk)) {
							// Extend the func entry to contain the new code.
							if (func_setend(PrevIDAAddr, AreaEnd)) {
								SMP_msg("Func extended to include code from %x to %x\n",
									AreaStart, AreaEnd);
								FuncReanalyzeList.push_back(PrevIDAAddr);
							}
							else {
								SMP_msg("Failed to extend func from %x to %x\n",
									AreaStart, AreaEnd);
							}
						}
						else { // tail
							// See if this works for function tails, also.
							// Extend the func entry to contain the new code.
							if (func_setend(PrevIDAAddr, AreaEnd)) {
								SMP_msg("Tail extended to include code from %x to %x\n",
									AreaStart, AreaEnd);
								func_t *TailOwner = get_func(PrevChunk->owner);
								FuncReanalyzeList.push_back(PrevIDAAddr);
							}
							else {
								SMP_msg("Failed to extend tail from %x to %x\n",
									AreaStart, AreaEnd);
							}
						} // end if (is_func_entry()) ... else ...
#endif
					} // end if (IsFunctionPrologue()) ... else ...
				} // end if (AllConverted)
				else {
					SMP_msg("not AllConverted; cannot include new code in previous chunk.\n");
				}
#endif

	return changes;
} // end of FixupNewCodeChunnks()

// Audit the IDA code database by looking at all instructions in the
//  code segment and printing all those that are not contained in a
//  function. Emit the context-free annotations that we are able to
//  emit on a per-instruction basis.
void FindOrphanedCode(segment_t *CurrSeg, FILE *AnnotFile, FILE *InfoAnnotFile) {
	char disasm[MAXSTR];
	for (ea_t addr = CurrSeg->startEA; addr < CurrSeg->endEA;
		addr = get_item_end(addr)) {
		flags_t InstrFlags = getFlags(addr);
		if (isHead(InstrFlags) && isCode(InstrFlags)) {
			func_t *CurrFunc = get_func(addr);
			if (NULL == CurrFunc) {
				SMPInstr CurrInst(addr);
				CurrInst.Analyze();
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
				SMP_msg("Orphan code at %x : %s\n", addr, CurrInst.GetDisasm());
clc5q's avatar
clc5q committed
#endif
				// TODO: If there are code xrefs to the orphan code,
				//  see what kind. If a CALL, and orphan code looks
				//  like a prologue, make a function. If a JUMP of
				//  some kind, then make a function chunk and make
				//  it a tail of all functions that jump to it. **!!**

				// If instruction is still not included in a code chunk,
				//  emit annotations for it in isolation.
				CurrInst.EmitAnnotations(true, false, true, AnnotFile, InfoAnnotFile);
			}
		}
		else if (isUnknown(InstrFlags)) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
clc5q's avatar
clc5q committed
#endif
			// Can IDA analyze this to be code?
			int InstrLen;
#if IDA_SDK_VERSION < 600
			InstrLen = ua_code(addr);
#else
			InstrLen = create_insn(addr);
#endif
			if (InstrLen > 0) {
				bool IDAsuccess = generate_disasm_line(addr, disasm, sizeof(disasm) - 1);
				if (IDAsuccess) {
					// Remove interactive color-coding tags.
					ssize_t StringLen = tag_remove(disasm, disasm, 0);
					if (-1 >= StringLen) {
						SMP_msg("ERROR: tag_remove failed at addr %x \n", addr);
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
						SMP_msg("Successfully analyzed!  %s\n", disasm);
clc5q's avatar
clc5q committed
#endif
						SMPInstr UnknownInstr(addr);
						UnknownInstr.Analyze();
						// TODO: Get new code into a chunk.  **!!**
						// If instruction is still not included in a code chunk,
						//  emit annotations for it in isolation.
						UnknownInstr.EmitAnnotations(true, false, true, AnnotFile, InfoAnnotFile);
					SMP_msg("ERROR: generate_disasm_line failed at addr %x \n", addr);
			}
		}
	} // end for (ea_t addr = CurrSeg->startEA; ...)
} // end of FindOrphanedCode()

// Version of FindOrphanedCode that does not emit annotations but can be used
//  to determine at what point in time code becomes orphaned.
void Debug_FindOrphanedCode(segment_t *CurrSeg, bool FirstRun) {
	char disasm[MAXSTR];
	ea_t DebugAddr = 0x8050db0;
	for (ea_t addr = CurrSeg->startEA; addr < CurrSeg->endEA;
		addr = get_item_end(addr)) {

		flags_t InstrFlags = getFlags(addr);
		if (isHead(InstrFlags) && isCode(InstrFlags)) {
			func_t *CurrFunc = get_func(addr);
			if (NULL == CurrFunc) {  // Code not in a func; orphaned
				pair<set<ea_t>::iterator, bool> pairib;
				pairib = CodeOrphans.insert(addr);
				if (DebugAddr == addr) {
					SMP_msg("DEBUG: Orphaned code addr %x found.\n", addr);
				}
				if ((!FirstRun) && (pairib.second)) {
					SMP_msg("SERIOUS WARNING: Newly orphaned code at %x \n", addr);
				}
			}
		}
	} // end for (ea_t addr = CurrSeg->startEA; ...)
} // end of Debug_FindOrphanedCode()

// Audit the IDA database with respect to branches and calls. They should
//  each have valid code targets (not data or unknown bytes) and the code
//  cross references should reflect the linkage.
void AuditCodeTargets(void) {
	// Cover all the code that IDA has grouped into functions by iterating
	//  through all function chunks in the program.
	size_t NumChunks = get_fchunk_qty();
	for (size_t ChunkIndex = 0; ChunkIndex < NumChunks; ++ChunkIndex) {
		func_t *ChunkInfo = getn_fchunk((int) ChunkIndex);
		char FuncName[MAXSTR];
		get_func_name(ChunkInfo->startEA, FuncName, sizeof(FuncName) - 1);

		// First, see if any calls to this function (if this chunk is
		//  an entry point) are not coming from within functions.
		if (is_func_entry(ChunkInfo)) {
			ea_t addr = ChunkInfo->startEA;
			for (bool ok = xb.SMP_first_to(addr, XREF_ALL);	ok; ok = xb.SMP_next_to()) {
				uchar XrefType = xb.GetType() & XREF_MASK;
				if (xb.GetIscode()) {
					if ((XrefType == fl_U) || (XrefType == fl_USobsolete)) {
						SMP_msg("Bad xref type: %x %s\n", addr, FuncName);
					else if ((XrefType == fl_JF) || (XrefType == fl_JN)) {
						SMP_msg("Jump to func: %x %s from: %x\n",
							addr, FuncName, xb.GetFrom());
					else if (XrefType == fl_F) {
						SMP_msg("Fall through to func: %x %s from: %x\n",
							addr, FuncName, xb.GetFrom());
					}
					else if ((XrefType == fl_CF) || (XrefType == fl_CN)) {
						// Far call or Near call
						func_t *CallingFunc = get_func(xb.GetFrom());
						if (NULL == CallingFunc) {
clc5q's avatar
clc5q committed
							;
#if SMP_DEBUG_FIXUP_IDB
							SMP_msg("Call to %x Func %s from %x not in function.\n",
								addr, FuncName, xb.GetFrom());
clc5q's avatar
clc5q committed
#endif
				else { // DATA xref
					if (XrefType == dr_O) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
						SMP_msg("Data xref to %x Func %s from %x\n",
							addr, FuncName, xb.GetFrom());
clc5q's avatar
clc5q committed
#endif
						SMP_msg("Strange data xref %d to %x Func %s from %x\n",
							XrefType, addr, FuncName, xb.GetFrom());
			} // end for (bool ok = xb.SMP_first_to(); ...)
		} // end if (is_func_entry(ChunkInfo))

		// Next, see if any call or branch in this chunk references
		//  a target address that is not in a function. If so, and the
		//  callee address code looks like a function prologue, then
		//  create a function for the contiguous code starting at that
		//  address and ask IDA to analyze it and store it in the
		//  IDA database. If it is a branch target, not a call target,
		//  create a new TAIL chunk for the current parent functions.
		for (ea_t addr = ChunkInfo->startEA; addr < ChunkInfo->endEA;
			addr = get_item_end(addr)) {
			flags_t InstrFlags = getFlags(addr);
			if (isCode(InstrFlags) && isHead(InstrFlags)) {
				SMPInstr CurrInst(addr);
				CurrInst.Analyze();
				if ((CALL|JUMP|COND_BRANCH) & CurrInst.GetDataFlowType()) {
					SMP_xref_t xb;
					for (bool ok = xb.SMP_first_from(addr, XREF_FAR); ok; ok = xb.SMP_next_from()) {
						if (xb.GetIscode()) {
							ea_t FirstAddr = xb.GetTo();
							func_t *FuncInfo = get_func(FirstAddr);
							if (NULL == FuncInfo) {
								// Found call to addr that is not in a func.
								// Find limits of contiguous code starting at FirstAddr.
								ea_t LastAddr = FindNewFuncLimit(FirstAddr);
								if (CALL == CurrInst.GetDataFlowType())
										FirstAddr, LastAddr);
								else
										FirstAddr, LastAddr);
							}
						}
					}
				}
			}
		}
	} // end for (size_t ChunkIndex = 0; ... )

	return;
} // end of AuditCodeTargets()

// Find the span of contiguous code that is not contained within any
//  function, starting at StartAddr, which should already be an example
//  of an instruction address that is outside of a function.
ea_t FindNewFuncLimit(ea_t StartAddr) {
	ea_t LimitAddr = StartAddr;
	if (NULL == seg)
		return LimitAddr;
	ea_t SegLimit = seg->endEA;

	for (ea_t addr = get_item_end(StartAddr); addr < SegLimit; addr = get_item_end(addr)) {
		flags_t InstrFlags = getFlags(addr);
		if (isCode(InstrFlags) && isHead(InstrFlags)) {
			LimitAddr = addr;
			func_t *FuncInfo = get_func(addr);
			if (NULL != FuncInfo)
				break; // ran into an existing function
		}
		else // Not a code head; time to stop.
			break;
	}
	return LimitAddr;
} // end of FindNewFuncLimit()

void SpecialDebugOutput(void) {
	char disasm[MAXSTR];
	vector<ea_t> ProblemAddrs;
	ProblemAddrs.push_back(0x8066d08);
	bool IDAsuccess;
	int InstLen;
	ssize_t StringLen;

	for (size_t index = 0; index < ProblemAddrs.size(); ++index) {
		ea_t addr = ProblemAddrs[index];
		flags_t InstrFlags = getFlags(addr);
		if (isCode(InstrFlags) && isHead(InstrFlags)) {
			IDAsuccess = SMPGetCmd(addr, LocalCmd, LocalFeatures);
			InstLen = (int) LocalCmd.size;

			if ((IDAsuccess) && (0 < InstLen)) {
				IDAsuccess = generate_disasm_line(addr, disasm, sizeof(disasm) - 1);
				if (IDAsuccess) {
					StringLen = tag_remove(disasm, disasm, 0);
					if (-1 < StringLen)
						SMP_msg("Problem addr %x : %s\n", addr, disasm);
						SMP_msg("ERROR: tag_remove failed at addr %x \n", addr);
					SMP_msg("ERROR: generate_disasm_line failed at addr %x \n", addr);
				SMP_msg("ERROR: decode_insn failed at addr %x \n", addr);
		}
	}
	return;
} // end of SpecialDebugOutput()

// Convert a call type string from the policy file, such as "FILECALLS", to the
//  corresponding ZST_SysCallType, such as ZST_FILE_CALL.
ZST_SysCallType ConvertStringToCallType(char *Str2) {
	ZST_SysCallType ReturnVal;
	if (0 == strcmp("PRIVILEGECALLS", Str2)) {
		ReturnVal = ZST_HIGHPRIVILEGE_CALL;
	}
	else if (0 == strcmp("FILECALLS", Str2)) {
		ReturnVal = ZST_FILE_CALL;
	}
	else if (0 == strcmp("NETWORKCALLS", Str2)) {
		ReturnVal = ZST_NETWORK_CALL;
	}
	else {
		ReturnVal = ZST_UNMONITORED_CALL;
	}
	return ReturnVal;
} // end of ConvertStringToCallType()

// Convert a policy string from the policy file, such as "DISALLOW", to
//  the corresponding ZST_Policy value, such as ZST_DISALLOW.
ZST_Policy ConvertStringToPolicy(char *Str3) {
	ZST_Policy ReturnVal;
	if (0 == strcmp("DISALLOW", Str3)) {
		ReturnVal = ZST_DISALLOW;
	}
	else if (0 == strcmp("WHITELIST", Str3)) {
		ReturnVal = ZST_WHITELIST;
	}
	else if (0 == strcmp("BLACKLIST", Str3)) {
		ReturnVal = ZST_BLACKLIST;
	}
	else { // error handling precedes calls to this function
		ReturnVal = ZST_ALLOWALL;
	}
	return ReturnVal;
} // end of ConvertStringToPolicy()

// Given a function name, return its Zephyr Security Toolkit call type.
ZST_SysCallType GetCallTypeFromFuncName(string SysCallName) {
	ZST_SysCallType ReturnVal;
	map<string, ZST_SysCallType>::iterator FindIter = ZST_FuncTypeMap.find(SysCallName);
	if (FindIter == ZST_FuncTypeMap.end()) { // not found; might not even be system call
		ReturnVal = ZST_UNMONITORED_CALL;
	}
	else {
		ReturnVal = FindIter->second;
	}
	return ReturnVal;
} // end of GetCallTypeFromFuncName()

// Get the user-specified security policy for the given call type.
ZST_Policy GetPolicyFromCallType(ZST_SysCallType CallType) {
	ZST_Policy ReturnVal;
	map<ZST_SysCallType, ZST_Policy>::iterator FindIter = ZST_TypePolicyMap.find(CallType);
	if (FindIter == ZST_TypePolicyMap.end()) {
		// Policy not found; default to ALLOW_ALL
		ReturnVal = ZST_ALLOWALL;
	}
	else {
		ReturnVal = FindIter->second;
	}
	return ReturnVal;
} // end of GetPolicyFromCallType()

// Given a call type and called function name, is it on the location whitelist
//  for that call type?
// NOTE: HANDLE CASE IN WHICH WHITELISTED LOCATION IS A PREFIX, TERMINATING in a slash.
bool IsLocationWhitelisted(ZST_SysCallType CallType, string LocationName) {
	set<string>::iterator FindIter;
	bool ReturnVal;

	if (CallType == ZST_FILE_CALL) {
		FindIter = ZST_FileLocWhitelist.find(LocationName);
		ReturnVal = (FindIter != ZST_FileLocWhitelist.end());
	}
	else if (CallType == ZST_NETWORK_CALL) {
		FindIter = ZST_NetworkLocWhitelist.find(LocationName);
		ReturnVal = (FindIter != ZST_NetworkLocWhitelist.end());
	}
	else { // should not be here
		ReturnVal = false;
	}
	return ReturnVal;
} // end of IsLocationWhitelisted()

// Given a call type and called function name, is it on the location blacklist
//  for that call type?
// NOTE: HANDLE CASE IN WHICH BLACKLISTED LOCATION IS A PREFIX, TERMINATING in a slash.
bool IsLocationBlacklisted(ZST_SysCallType CallType, string LocationName) {
	set<string>::iterator FindIter;
	bool ReturnVal;

	if (CallType == ZST_FILE_CALL) {
		FindIter = ZST_FileLocBlacklist.find(LocationName);
		ReturnVal = (FindIter != ZST_FileLocBlacklist.end());
	}
	else if (CallType == ZST_NETWORK_CALL) {
		FindIter = ZST_NetworkLocBlacklist.find(LocationName);
		ReturnVal = (FindIter != ZST_NetworkLocBlacklist.end());
	}
	else { // should not be here
		ReturnVal = false;
	}
	return ReturnVal;
}

// These two constants should agree with their counterparts in ZST-policy.c.
#define ZST_MAX_FILE_NAME_LEN 1024
#define ZST_MAX_CALL_NAME_LEN 64
// Read the foo.exe.policy file to initialize our security policies for system calls.
void ZST_InitPolicies(const char *PolicyFileName) {
	FILE *PolicyFile = SMP_fopen(PolicyFileName, "r");
	char Str1[ZST_MAX_CALL_NAME_LEN], Str2[ZST_MAX_CALL_NAME_LEN], Str3[ZST_MAX_FILE_NAME_LEN];

	if (NULL != PolicyFile) {
			int ItemsRead = qfscanf(PolicyFile, "%63s %63s %1023s", Str1, Str2, Str3);
			if (3 != ItemsRead) {
				SMP_msg("ERROR: Line in %s had %d items instead of the required 3; line ignored.\n", PolicyFileName, ItemsRead);
			}
			else {
				string FirstStr(Str1), SecondStr(Str2), ThirdStr(Str3);
				pair<set<string>::iterator, bool> SetInsertResult;
				if (0 == strcmp(Str1, "SECURITYPOLICY")) {
					ZST_SysCallType TempCallType = ConvertStringToCallType(Str2);
					ZST_Policy TempPolicy = ConvertStringToPolicy(Str3);
					pair<map<ZST_SysCallType, ZST_Policy>::iterator, bool> InsertResult;
					pair<ZST_SysCallType, ZST_Policy> TempPair(TempCallType, TempPolicy);
					InsertResult = ZST_TypePolicyMap.insert(TempPair);
					if (!(InsertResult.second)) {
						SMP_msg("ERROR: Could not insert security policy %s for %s. Possible duplicate or conflicting policies.\n",
							Str3, Str2);
					}
				}
				else if (0 == strcmp(Str1, "FILELOCATION")) {
					if (0 == strcmp(Str2, "WHITELIST")) {
						SetInsertResult = ZST_FileLocWhitelist.insert(ThirdStr);
						if (!(SetInsertResult.second)) {
							SMP_msg("WARNING: Duplicate file whitelist location %s ignored.\n", Str3);
						}
					}
					else if (0 == strcmp(Str2, "BLACKLIST")) {
						SetInsertResult = ZST_FileLocBlacklist.insert(ThirdStr);
						if (!(SetInsertResult.second)) {
							SMP_msg("WARNING: Duplicate file blacklist location %s ignored.\n", Str3);
						SMP_msg("ERROR: Unknown second field value in policy line: %s %s %s ; ignored\n", Str1, Str2, Str3);
					}
				}
				else if (0 == strcmp(Str1, "NETWORKLOCATION")) {
					if (0 == strcmp(Str2, "WHITELIST")) {
						SetInsertResult = ZST_NetworkLocWhitelist.insert(ThirdStr);
						if (!(SetInsertResult.second)) {
							SMP_msg("WARNING: Duplicate network whitelist location %s ignored.\n", Str3);
						}
					}
					else if (0 == strcmp(Str2, "BLACKLIST")) {
						SetInsertResult = ZST_NetworkLocBlacklist.insert(ThirdStr);
						if (!(SetInsertResult.second)) {
							SMP_msg("WARNING: Duplicate network blacklist location %s ignored.\n", Str3);