Skip to content
Snippets Groups Projects
SMPStaticAnalyzer.cpp 61.7 KiB
Newer Older
						if (IsCodeMisaligned(addr)) {
#if SMP_DEBUG_FIXUP_IDB
clc5q's avatar
clc5q committed
							SMP_del_items(addr, InstrLen, 0);
clc5q's avatar
clc5q committed
							if (SMP_create_data(addr, SMP_byteflag(), InstrLen, BADNODE)) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
								SMP_msg("Converted misaligned code to data at %x : len: %x\n",
clc5q's avatar
clc5q committed
#endif
								// Step back so data gets processed.
								DataRunLen += get_item_size(addr);
								continue; // skip reset of DataRunLen
							}
							else {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
								SMP_msg("Misaligned code left as unknown at %x : len: %x\n",
clc5q's avatar
clc5q committed
#endif
								IsolatedCodeTrigger = false;
							}
						} // end if (IsCodeMisaligned() ...)
clc5q's avatar
clc5q committed
						else if (!SMP_hasRef(AddrFlags)) {
							// No references at all --> isolated code.
							IsolatedCodeTrigger = true;
							IsolatedCodeAddr = addr;
							IsolatedCodeLen = InstrLen;
						}
						else {
							SMP_xref_t xb;
							bool ok = xb.SMP_first_to(addr, XREF_ALL);
							if (!ok) {
								// No code xrefs to this target addr.
								IsolatedCodeTrigger = true;
								IsolatedCodeAddr = addr;
								IsolatedCodeLen = InstrLen;
							}
						}
					} // end if (MIN_DATARUN_LEN <= DataRunLen)
					else if (IsolatedCodeTrigger) {
						// Two instructions in a row does not fit the pattern.
						IsolatedCodeTrigger = false;
					}
					DataRunLen = 0;
clc5q's avatar
clc5q committed
				} // end if (SMP_isData) ... else if (isUnknown) ... else SMP_isCode
clc5q's avatar
clc5q committed
			else if (SMP_isUnknown(AddrFlags)) {
				// If in a data run, convert to data.
				InstrLen = get_item_size(addr);
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
				SMP_msg("Unknown: %x len: %x\n", addr, InstrLen);
clc5q's avatar
clc5q committed
#endif
clc5q's avatar
clc5q committed
					if (SMP_create_data(addr, SMP_byteflag(), InstrLen, BADNODE)) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
						SMP_msg("Converted unknown to data at %x len: %x\n", addr, InstrLen);
clc5q's avatar
clc5q committed
#endif
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
						SMP_msg("Failed to convert unknown to data at %x len: %x\n", addr, InstrLen);
clc5q's avatar
clc5q committed
#endif
		} // end for (STARS_ea_t addr =  seg->startEA; ...)
	} // end for all segments
	return;
} // end of FindDataInCode()


// The choices that IDA makes for deciding which parent function of a
//  TAIL chunk is the primary owner of the tail can be counterintuitive.
//  A function entry can both fall into and jump to a tail chunk that
//  is contiguous with it, yet the "owner" might be a function that is
//  far below it in the executable address space. This function will
//  change the ownership to a more sensible arrangement.
void AuditTailChunkOwnership(void) {
jdh8d's avatar
jdh8d committed
	SMP_AuditTailChunkOwnership();
} // end of AuditTailChunkOwnership()

// If the addresses signified from DisasmIndex to IDAProIndex are
//  all considered data and do NOT follow a return instruction,
//  return false and update AreaSize to reflect the area to be
//  converted.
// Return value: true -> skip to IDAProIndex; false -> convert AreaSize bytes.
bool FindDataToConvert(size_t IDAProIndex, size_t DisasmIndex, int &AreaSize) {
	STARS_ea_t PrevIDAAddr;
	STARS_ea_t NextIDAAddr;
	STARS_ea_t DisasmAddr = DisasmLocs[ShadowDisasmIndex];
	bool CannotConvert = false;  // return value
	bool DebugAddress = false;
#if SMP_DEBUG_FIXUP_IDB
	DebugAddress = (DisasmAddr == 0x806c19a);
#endif

	if (DebugAddress) {
		SMP_msg("IDAProIndex: %zu DisasmIndex: %zu\n", IDAProIndex, DisasmIndex);
		SMP_msg("IDA locs size %zu Disasm locs size %zu\n", IDAProLocs.size(),
			DisasmLocs.size());
	}
	if (IDAProIndex >= IDAProLocs.size()) {
		// Have already processed the last IDA address.
		if (DebugAddress) SMP_msg(" Already done with IDAProLocs.\n");
		return true;
	}
	else if (DisasmIndex >= DisasmLocs.size()) {
		// Strange. Last Disasm address is only one to convert, and
		//  IDA still has addresses after that?
		if (DebugAddress) SMP_msg(" Already done with DisasmLocs.\n");
		return true;
	}
	else if (IDAProIndex < 2) {
		// We have Disasm addrs before the very first IDA addr. We
		//  don't trust this boundary case.
		if (DebugAddress) SMP_msg(" Boundary case with IDAProLocs.\n");
		return true;
	}
	NextIDAAddr = IDAProLocs[IDAProIndex - 1];
	PrevIDAAddr = IDAProLocs[IDAProIndex - 2];
	if (DebugAddress) SMP_msg(" PrevIDAAddr: %lx NextIDAAddr: %lx\n", (unsigned long) PrevIDAAddr, (unsigned long) NextIDAAddr);
clc5q's avatar
clc5q committed
	flags_t PrevFlags = SMP_getFlags(PrevIDAAddr);
	if (!SMP_isCode(PrevFlags) || !SMP_isHead(PrevFlags)) {
		SMP_msg("PrevIDAAddr %lx not isCode or not isHead.\n", (unsigned long) PrevIDAAddr);
		return true;
	}
	SMPInstr PrevInstr(PrevIDAAddr);
	PrevInstr.Analyze();
	if (DebugAddress) SMP_msg("Finished PrevInstr.Analyze()\n");
	if (PrevInstr.HasReturnOpcode()) {
		// Right after a return come no-ops and 2-byte no-ops
		//  that are just for alignment. IDA does not seem to be
		//  happy when we convert all those to code.
		if (DebugAddress) SMP_msg(" Data followed a return instruction.\n");
		return true;
	}
	// Now, see if the area from DisasmAddr to NextIDAAddr is all data
	//  according to IDA.
	while (DisasmAddr < NextIDAAddr) {
clc5q's avatar
clc5q committed
		flags_t DataFlags = SMP_getFlags(DisasmAddr);
		if (SMP_isTail(DataFlags)) {
			if (DebugAddress) SMP_msg(" tail byte: %lx\n", (unsigned long) DisasmAddr);
			DisasmAddr = SMP_get_item_end(DisasmAddr);
clc5q's avatar
clc5q committed
		else if (SMP_isData(DataFlags)) {
			if (DebugAddress) SMP_msg(" data byte: %lx\n", (unsigned long) DisasmAddr);
			DisasmAddr = SMP_get_item_end(DisasmAddr);
		else if (SMP_isCode(DataFlags)) {
			if (DebugAddress) SMP_msg(" isCode: %lx\n", (unsigned long) DisasmAddr);
			return true;
		}
		else { // must be isUnknown()
			// Very conservative here; only want to convert when the whole
			//  region is data, because that is a symptom of IDA missing
			//  a piece of code within a function (usually a piece of code
			//  that is only reachable via an indirect jump).
			if (DebugAddress) SMP_msg(" Not isData: %lx\n", (unsigned long) DisasmAddr);
		if (DebugAddress) SMP_msg(" new DisasmAddr: %lx\n", (unsigned long) DisasmAddr);
	if (DebugAddress) SMP_msg(" loop exit CannotConvert: %d\n", CannotConvert);
	if (!CannotConvert) {
		// Success.
		DisasmAddr = DisasmLocs[ShadowDisasmIndex];
		AreaSize = NextIDAAddr - DisasmAddr;
		if (DebugAddress) { 
			SMP_msg(" Success! AreaSize: %x Old index: %zu new index: %zu\n",
			SMP_msg(" exiting FindDataToConvert()\n");
			SMP_msg("\n");
		}
	} // end if (!CannotConvert)
	return CannotConvert;
} // end of FindDataToConvert()

// Does a converted code region look like a function prologue? If so,
//  we should not include it in the previous function.
bool IsFunctionPrologue(STARS_ea_t StartAddr, STARS_ea_t EndAddr) {
	return false;  // **!!** TODO 
} // end of IsFunctionPrologue()

// Patch program bytes that could not be converted from
//  data to code, if it can be determined that the bytes represent code
//  that IDA has a hard time with.
// Currently limited to finding "call near ptr 0" instructions, which
//  often are found in optimized glibc code because gcc was able to
//  determine that a function pointer was zero and did constant propagation,
//  but unfortunately was not able to determine that the code was unreachable.
//  IDA will not succeed in ua_code() for "call 0", but there is no danger
//  of a working program ever executing this code. Replacing the call with
//  no-ops permits us to continue converting a contiguous range of data to
//  code, and permits IDA to reanalyze the function later.
// Returns true if program bytes were patched.
bool MDPatchUnconvertedBytes(STARS_ea_t CurrDisasmAddr) {
clc5q's avatar
clc5q committed
	flags_t AddrFlags = SMP_getFlags(CurrDisasmAddr);
	if (SMP_isData(AddrFlags) || SMP_isTail(AddrFlags)) {
		// Bytes should have been converted to unknown already.
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
		SMP_msg("Cannot patch data bytes or tail bytes at %x\n", CurrDisasmAddr);
clc5q's avatar
clc5q committed
#endif
		return false;
	}
	SMPInstr PatchInstr(CurrDisasmAddr);
	PatchInstr.Analyze();
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
		SMP_msg("decode_insn() failed on patch location %x\n", CurrDisasmAddr);
clc5q's avatar
clc5q committed
#endif
		if (PatchInstr.GetIDAOpcode() != NN_call) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
			SMP_msg("Cannot patch non-call instruction at %x\n", CurrDisasmAddr);
clc5q's avatar
clc5q committed
#endif
		STARSOpndTypePtr CallDest = PatchInstr.GetFirstUse()->GetOp();
		if ((! CallDest->IsNearPointer()) || (0 != CallDest->GetAddr())) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
			SMP_msg("Cannot patch call unless it is call near ptr 0 at %x",
clc5q's avatar
clc5q committed
#endif
		STARS_ea_t PatchAddr = CurrDisasmAddr;
		for (int i = 0; i < InstrLen; ++i) {
			bool ok = patch_byte(PatchAddr, 0x90);  // x86 no-op
			if (!ok) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
				SMP_msg("patch_byte() failed at %x\n", PatchAddr);
clc5q's avatar
clc5q committed
#endif
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
		SMP_msg("Patched %d bytes successfully at %x\n", InstrLen, CurrDisasmAddr);
clc5q's avatar
clc5q committed
#endif
		InstrLen = create_insn(CurrDisasmAddr);
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
			SMP_msg(" ... but ua_code() still failed!\n");
clc5q's avatar
clc5q committed
#endif
			return false;
		}
	} // end if (0 >= InstrLen) ... else ...
	return true;
} // end of MDPatchUnconvertedBytes()

// Use the lists of code addresses identified by IDA Pro (in IDAProLocs)
//  and an external disassembler (in DisasmLocs). Compare the lists and
//  try to convert addresses to code that are found in DisasmLocs but
//  not in IDAProLocs. Emit warnings when IDAProLocs has a code address
//  not found in DisasmLocs.
void FixCodeIdentification(void) {
	size_t DisasmIndex = 0;
	STARS_ea_t CurrDisasmAddr = DisasmLocs[DisasmIndex++];
	STARS_ea_t CurrAddr = IDAProLocs[IDAProIndex++];

	while (DisasmIndex <= DisasmLocs.size()) {
		// If the current address is less than the current
		//  external disasm address, we have the rare case in
		//  which IDA Pro has identified an address as code
		//  but the external disasm has not. Emit a warning
		//  message and go on to the next IDA address.
		if (CurrAddr < CurrDisasmAddr) {
			SMPInstr TempInstr(CurrAddr);
			TempInstr.Analyze();
			SMP_msg("AUDIT: Address %lx is code in IDB but not in external disassembler: %s\n",
				(unsigned long) CurrAddr, TempInstr.GetDisasm());
			if (IDAProIndex < IDAProLocs.size())
				CurrAddr = IDAProLocs[IDAProIndex++];
			else {
				// Last IDA addr; might still process Disasm addrs
				//  after loop exit.
				break;
			}
		}
		else if (CurrAddr == CurrDisasmAddr) {
			// If equal, no problem, we are moving through the
			//  code addresses in lockstep. Grab the next address
			//  from each source.
			if (DisasmIndex < DisasmLocs.size()) {
				CurrDisasmAddr = DisasmLocs[DisasmIndex++];
			}
			else {
				++DisasmIndex;  // cause loop exit; skip cleanup loop
			}
			if (IDAProIndex < IDAProLocs.size())
				CurrAddr = IDAProLocs[IDAProIndex++];
			else {
				// Last IDA addr; might still process Disasm addrs
				//  after loop exit in cleanup loop.
				break;
			}
		}
		else {
			// We must have CurrAddr > CurrDisasmAddr. That means
			//  IDA has jumped over some code addresses in
			//  DisasmLocs. We need to try to convert addresses
			//  to code until we can reach the current addr.
			// For now, we will address only the case in which IDA
			//  has identified addresses as data bytes, and the
			//  external disassembler(e.g. objdump) has identified
			//  the same addresses as code. We only want to deal with
			//  contiguous areas of data-to-code conversion that do NOT
			//  follow a return statement.
			int AreaSize = 0;
			STARS_ea_t AreaStart = CurrDisasmAddr;
			STARS_ea_t AreaEnd;
			SMP_msg("CurrDisasmAddr: %x  CurrAddr: %x\n", CurrDisasmAddr, CurrAddr);
#endif
			bool SkipArea = FindDataToConvert(IDAProIndex, DisasmIndex, AreaSize);
			if (SkipArea) {
				// Skip over the extra external disasm addresses.
				while (CurrDisasmAddr < CurrAddr)
					CurrDisasmAddr = DisasmLocs[DisasmIndex++];
			}
			else { 
				// Convert the overlooked code region to unexplored.
				AreaEnd = CurrDisasmAddr + AreaSize;
#if SMP_DEBUG_FIXUP_IDB
				SMP_msg("Found data to convert: %x to %x\n", AreaStart, AreaEnd);
clc5q's avatar
clc5q committed
				SMP_del_items(AreaStart, AreaSize, 0);
				SMP_bounds_t ConvertRegion;
				ConvertRegion.startEA = AreaStart;
				ConvertRegion.endEA = AreaEnd;
				FixupRegion CurrRegion(ConvertRegion);
				CodeReanalyzeList.push_back(CurrRegion);
clc5q's avatar
clc5q committed
					flags_t InstrFlags = SMP_getFlags(CurrDisasmAddr);
					if (!SMP_isUnknown(InstrFlags)) {
						SMP_msg("Sync problem in FixCodeID: %lx\n", (unsigned long) CurrDisasmAddr);
						int InstrLen = create_insn(CurrDisasmAddr);
						if (InstrLen > 0) { // Successfully converted to code
							SMPInstr NewInstr(CurrDisasmAddr);
							NewInstr.Analyze();
							SMP_msg("FixCodeID success at %x: len: %d %s\n", CurrDisasmAddr,
#endif
						}
						else {
							if (MDPatchUnconvertedBytes(CurrDisasmAddr)) {
clc5q's avatar
clc5q committed
								;
#if SMP_DEBUG_FIXUP_IDB
								SMP_msg(" Patched bytes at %x\n", CurrDisasmAddr);
clc5q's avatar
clc5q committed
#endif
								CurrRegion.FixupInstrs.push_back(CurrDisasmAddr);
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
								SMP_msg("FixCodeID failure at %x\n", CurrDisasmAddr);
clc5q's avatar
clc5q committed
#endif
					} // end if (SMP_isCode(InstrFlags) ... else ...
					if (DisasmIndex < DisasmLocs.size()) {
						CurrDisasmAddr = DisasmLocs[DisasmIndex++];
					}
					else {
						// cause loops to exit
						CurrDisasmAddr = CurrAddr;
						++DisasmIndex; // skip cleanup loop
					}
				} while (CurrDisasmAddr < CurrAddr);
				if (AllConverted && AllNops) {
					// We want to convert the region back to unexplored bytes
					//  and take it off the work list. Regions that are all nops
					//  create data flow analysis problems sometimes. The region
					//  is often unreachable code and produces a basic block with
					//  no predecessors within a function. This often happens when
					//  an optimizing compiler uses nops as padding to align jump
					//  targets on cache line bounaries. With no fall through into
					//  the nops, they are unreachable and should be left as unknown.
					SMP_msg("FixCodeID nops region from %x to %x\n", CurrRegion.GetStart(),
clc5q's avatar
clc5q committed
					SMP_del_items(CurrRegion.GetStart(),
						CurrRegion.GetEnd() - CurrRegion.GetStart(), 0);
			} // end if (SkipArea) ... else ...
		} // end if (addr < CurrDisasmAddr) .. else if ... else ...
	} // end while (DisasmIndex <= DisasmLocs.size()

#if 0  // Make this code use FindDataToConvert()  **!!**
	// Cleanup loop:
	// If there are still Disasm addrs to process, try to turn them
	//  into code in the IDB.
	while (DisasmIndex <= DisasmLocs.size()) {
clc5q's avatar
clc5q committed
		flags_t InstrFlags = SMP_getFlags(CurrDisasmAddr);
		if (SMP_isCode(InstrFlags)) {
			SMP_msg("Sync problem in FixCodeID: %x\n", CurrDisasmAddr);
			segment_t *seg = SMP_getseg(CurrDisasmAddr);
clc5q's avatar
clc5q committed
				SMP_del_items(CurrDisasmAddr, seg->endEA - CurrDisasmAddr, 0);
			}
			else {
				// Might be safest to just discontinue processing
				//  if we wander into a non-code segment.
				//  DisasmLocs should not have an entire code segment
				//  that IDA Pro missed.
				break;
			}
			int InstrLen = ua_code(CurrDisasmAddr);
			if (InstrLen > 0) { // Successfully converted to code
				SMPInstr NewInstr(CurrDisasmAddr);
				NewInstr.Analyze();
				SMP_msg("FixCodeID success at %x: %s\n", CurrDisasmAddr,
				SMP_msg("FixCodeID failure at %x\n", CurrDisasmAddr);
		} // end if (SMP_isCode(InstrFlags) ... else ...
		if (DisasmIndex < DisasmLocs.size()) {
			CurrDisasmAddr = DisasmLocs[DisasmIndex++];
		}
		else {
			++DisasmIndex; // cause loop to exit
		}
	} // end while (DisasmIndex <= DisasmLocs.size()
#endif

	return;
} // end of FixCodeIdentification()
// Analyze instructions that could not be analyzed earlier and were placed on the CodeReanalyzeList.
//  Earlier failures are usually because the instruction branches to an address that has not
//  yet been converted from data to code, so ua_code() failed. Now that all data to code
//  conversions have completed, ua_code() should succeed.
// Return the number of instructions successfully analyzed.
int FixupNewCodeChunks(void) {
	list<FixupRegion>::iterator CurrRegion;
	int changes = 0;
	for (CurrRegion = CodeReanalyzeList.begin(); CurrRegion != CodeReanalyzeList.end(); ++CurrRegion) {
		bool AllConverted = true;
		bool AllNops = true;
		bool NoFixups = (0 == CurrRegion->FixupInstrs.size());
		if (NoFixups) {
			CurrRegion->SetStart(STARS_BADADDR);  // mark for removal
			continue;  // skip to next region
		}
		list<STARS_ea_t>::iterator CurrInstr;
		for (CurrInstr = CurrRegion->FixupInstrs.begin(); CurrInstr != CurrRegion->FixupInstrs.end(); ++CurrInstr)  {
			if (InstrLen > 0) { // Successfully converted to code
				SMPInstr NewInstr(*CurrInstr);
				NewInstr.Analyze();
#if SMP_DEBUG_FIXUP_IDB
				SMP_msg("FixupNewCodeChunks success at %x: len: %d\n", *CurrInstr, InstrLen);
					*CurrInstr = STARS_BADADDR; // mark for removal
			}
			else {
				AllConverted = false;
#if SMP_DEBUG_FIXUP_IDB
				SMP_msg("FixupNewCodeChunks failure at %x\n", *CurrInstr);
#endif
			}
		} // end for all instrs in CurrRegion
		if (AllConverted && !AllNops) {
			SMP_msg("FixupNewCodeChunks success for region from %x to %x\n",
				CurrRegion->GetStart(), CurrRegion->GetEnd());
#endif
			CurrRegion->SetStart(STARS_BADADDR); // mark for removal
		else if (AllConverted && AllNops) {
#if SMP_DEBUG_FIXUP_IDB
			SMP_msg("FixupNewCodeChunks re-converting nops region from %x to %x\n",
				CurrRegion->GetStart(), CurrRegion->GetEnd());
#endif
clc5q's avatar
clc5q committed
			SMP_del_items(CurrRegion->GetStart(),
				CurrRegion->GetEnd() - CurrRegion->GetStart(), 0);
			CurrRegion->SetStart(STARS_BADADDR); // mark for removal
		else {
			// Remove only the instructions that were fixed up.
			CurrInstr = CurrRegion->FixupInstrs.begin(); 
			while (CurrInstr != CurrRegion->FixupInstrs.end()) {
				if (STARS_BADADDR == *CurrInstr) {
					CurrInstr = CurrRegion->FixupInstrs.erase(CurrInstr);
				}
				else {
					++CurrInstr;
				}
			}
		}
	} // end for all regions in the CodeReanalyzeList

	// Remove completed regions from the CodeReanalyzeList
	CurrRegion = CodeReanalyzeList.begin();
	while (CurrRegion != CodeReanalyzeList.end()) {
		if (STARS_BADADDR == CurrRegion->GetStart())
			CurrRegion = CodeReanalyzeList.erase(CurrRegion);
		else
			++CurrRegion;
	}

#if 0
	if (AllConverted) {
					if (IsFunctionPrologue(AreaStart, AreaEnd)) {
						// Create a new function entry chunk here.
						//  **!!** TODO
						;
					}
					else {
						// Extend the previous chunk to include the
						//  converted code.
						STARS_ea_t PrevIDAAddr = IDAProLocs[IDAProIndex - 2];
						STARS_Function_t *PrevChunk = get_fchunk(PrevIDAAddr);
						SMP_msg(" addr in chunk to extend: %x\n", PrevIDAAddr);
						SMP_msg(" STARS_Function_t pointer for chunk: %x\n", PrevChunk);
#endif
#if 0  // temporary for debugging
						if (is_func_entry(PrevChunk)) {
							// Extend the func entry to contain the new code.
							if (func_setend(PrevIDAAddr, AreaEnd)) {
								SMP_msg("Func extended to include code from %x to %x\n",
									AreaStart, AreaEnd);
								FuncReanalyzeList.push_back(PrevIDAAddr);
							}
							else {
								SMP_msg("Failed to extend func from %x to %x\n",
									AreaStart, AreaEnd);
							}
						}
						else { // tail
							// See if this works for function tails, also.
							// Extend the func entry to contain the new code.
							if (func_setend(PrevIDAAddr, AreaEnd)) {
								SMP_msg("Tail extended to include code from %x to %x\n",
								STARS_Function_t *TailOwner = get_func(PrevChunk->owner);
								FuncReanalyzeList.push_back(PrevIDAAddr);
							}
							else {
								SMP_msg("Failed to extend tail from %x to %x\n",
									AreaStart, AreaEnd);
							}
						} // end if (is_func_entry()) ... else ...
#endif
					} // end if (IsFunctionPrologue()) ... else ...
				} // end if (AllConverted)
				else {
					SMP_msg("not AllConverted; cannot include new code in previous chunk.\n");
} // end of FixupNewCodeChunks()
// Audit the IDA code database by looking at all instructions in the
//  code segment and printing all those that are not contained in a
//  function. Emit the context-free annotations that we are able to
//  emit on a per-instruction basis.
jdh8d's avatar
jdh8d committed
void FindOrphanedCode(STARS_Segment_t *CurrSeg, FILE *AnnotFile, FILE *InfoAnnotFile) {
	char disasm[MAXSTR];
clc5q's avatar
clc5q committed
	qstring *Qdisasm;
	bool PrefetchInstSeenLast = false; // inst before current inst was a prefetch
	bool UndefinedOpcodeSeenLast = false; // inst before current inst was an undefined opcode
	for (STARS_ea_t addr = CurrSeg->get_startEA(); addr < CurrSeg->get_endEA();
		addr = SMP_get_item_end(addr)) {
clc5q's avatar
clc5q committed
		flags_t InstrFlags = SMP_getFlags(addr);
		if (SMP_isTail(InstrFlags))
		if (SMP_isHead(InstrFlags) && SMP_isCode(InstrFlags)) {
			if (!(CurrProg->IsInstAddrStillInFunction(addr, FirstFuncAddr))) {
				SMPInstr CurrInst(addr);
				CurrInst.Analyze();
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
				SMP_msg("Orphan code at %x : %s\n", addr, CurrInst.GetDisasm());
clc5q's avatar
clc5q committed
#endif
				// TODO: If there are code xrefs to the orphan code,
				//  see what kind. If a CALL, and orphan code looks
				//  like a prologue, make a function. If a JUMP of
				//  some kind, then make a function chunk and make
				//  it a tail of all functions that jump to it. **!!**

				// Do machine-dependent fixes for DEF and USE lists.
				//  The fixes can help produce better annotations.
				CurrInst.MDFixupDefUseLists();

				// If instruction is still not included in a code chunk,
				//  emit annotations for it in isolation.
				if (CurrInst.IsAnalyzeable()) {
					CurrInst.EmitAnnotations(true, false, true, AnnotFile, InfoAnnotFile, CurrProg);

					// If instruction is an indirect branch, emit an XREF
					//  annotation for each of its targets.
					SMPitype CurrDataFlow = CurrInst.GetDataFlowType();
					bool IndirCallFlag = (CurrDataFlow == INDIR_CALL);
					if ((CurrDataFlow == INDIR_JUMP) || IndirCallFlag) {
						global_STARS_program->PrintAllCodeToCodeXrefs(addr, CurrInst.GetSize(), IndirCallFlag, false);
					// If the address of this instruction appeared in data, emit an IBT annotation.
					if (global_STARS_program->IsCodeAddressTaken(addr)) {
						global_STARS_program->PrintUnknownCodeXref(addr, CurrInst.GetSize(), ZST_CODEADDRESSTAKEN);
				// Emit IBT annotations for instructions that fit computed-goto patterns in libc/glibc, such
				//  as prefetch instructions and the instructions that follow them (computed goto often chooses
				//  between going to the prefetch or jumping just past it, and IDA Pro cannot analyze these libc
				//  macro-generated computed gotos even if they are not orphaned code). Likewise, an undefined opcode
				//  often separates an indirect jump and its first target, so inst after undefined opcode is IBT.
				bool EmitIBTAnnotation = (PrefetchInstSeenLast || UndefinedOpcodeSeenLast);
				if (CurrInst.MDIsPrefetchOpcode()) {
					PrefetchInstSeenLast = true;
					UndefinedOpcodeSeenLast = false;
					EmitIBTAnnotation = true;
				}
				else if (CurrInst.MDIsUndefinedOpcode()) {
					UndefinedOpcodeSeenLast = true;
					PrefetchInstSeenLast = false;
				}
				else {
					PrefetchInstSeenLast = false;
					UndefinedOpcodeSeenLast = false;
				}
				if (EmitIBTAnnotation) {
					global_STARS_program->PrintUnknownCodeXref(addr, CurrInst.GetSize(), ZST_COMPUTEDGOTO);
				// mark addresses that are taken via the instruction
				global_STARS_program->PrintAllAddressTakenXrefs(addr, CurrInst.GetSize());
clc5q's avatar
clc5q committed
		else if (SMP_isUnknown(InstrFlags)) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
clc5q's avatar
clc5q committed
#endif
			// Can IDA analyze this to be code?
			int InstrLen;
			InstrLen = create_insn(addr);
			if (InstrLen > 0) {
				bool IDAsuccess = SMP_generate_disasm_line(addr, disasm, sizeof(disasm) - 1);
				if (IDAsuccess) {
					// Remove interactive color-coding tags.
					STARS_ssize_t StringLen = SMP_tag_remove(disasm, disasm, sizeof(disasm) - 1);
					if (-1 >= StringLen) {
						SMP_msg("ERROR: tag_remove failed at addr %lx \n", (unsigned long) addr);
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
						SMP_msg("Successfully analyzed!  %s\n", disasm);
clc5q's avatar
clc5q committed
#endif
						SMPInstr UnknownInstr(addr);
						UnknownInstr.Analyze();
						// TODO: Get new code into a chunk.  **!!**
						// If instruction is still not included in a code chunk,
						//  emit annotations for it in isolation.
						UnknownInstr.EmitAnnotations(true, false, true, AnnotFile, InfoAnnotFile, CurrProg);
					SMP_msg("ERROR: generate_disasm_line failed at addr %lx \n", (unsigned long) addr);
			}
		}
	} // end for (ea_t addr = CurrSeg->startEA; ...)
} // end of FindOrphanedCode()

// Find calls and jumps from orphaned code to functions. Mark those functions
//  so that we will know that the program CFG is not complete.
void FindLinksFromOrphanedCode(STARS_Segment_t *CurrSeg) {
	char disasm[MAXSTR];
	for (STARS_ea_t addr = CurrSeg->get_startEA(); addr < CurrSeg->get_endEA(); addr = SMP_get_item_end(addr)) {
clc5q's avatar
clc5q committed
		flags_t InstrFlags = SMP_getFlags(addr);
		if (SMP_isHead(InstrFlags) && SMP_isCode(InstrFlags)) {
			// Does IDA Pro think addr is inside a function?
			func_t* CurrIDAFunc = ::get_func(addr);
				SMPInstr CurrInst(addr);
				CurrInst.Analyze();
				if (CurrInst.IsAnalyzeable()) {
					// If instruction is control flow, see if it reaches an addr in a function.
					SMPitype CurrDataFlow = CurrInst.GetDataFlowType();
					if (IsBranchOrCall(CurrDataFlow)) {
						STARS_ea_t TargetAddr = CurrInst.GetCallTarget();
						if (STARS_BADADDR == TargetAddr) {
							TargetAddr = CurrInst.GetJumpTarget();
						}
						if (STARS_BADADDR != TargetAddr) {
							// See if TargetAddr is inside a function.
							func_t* TargetFunc = ::get_func(TargetAddr);
clc5q's avatar
clc5q committed
#if (IDA_SDK_VERSION < 700)
								STARS_ea_t FirstAddrInFunc = TargetFunc->startEA;
clc5q's avatar
clc5q committed
#else
								STARS_ea_t FirstAddrInFunc = TargetFunc->start_ea;
#endif
								if (FirstAddrInFunc != TargetAddr) {
									SMP_msg("WARNING: Orphaned code at %llx calls %llx which is inside func starting at %llx\n",
										(unsigned long long) addr, (unsigned long long) TargetAddr, (unsigned long long) FirstAddrInFunc);
								}
								CurrProg->SetFuncAddrCalledFromOrphanedCode(FirstAddrInFunc);
								SMP_msg("INFO: Orphaned code at %llx calls func at %llx\n",
									(unsigned long long) addr, (unsigned long long) FirstAddrInFunc);
							}
						}
					}
				}
			} // end if (nullptr == CurrFunc)
		} // end if addr is code and isHead
	} // end for (ea_t addr = CurrSeg->startEA; ...)
	return;
} // end of FindLinksFromOrphanedCode()

// Version of FindOrphanedCode that does not emit annotations but can be used
//  to determine at what point in time code becomes orphaned.
jdh8d's avatar
jdh8d committed
void Debug_FindOrphanedCode(STARS_Segment_t *CurrSeg, bool FirstRun) {
	STARS_ea_t DebugAddr = 0x8050db0;
	for (STARS_ea_t addr = CurrSeg->get_startEA(); addr < CurrSeg->get_endEA();
		addr = SMP_get_item_end(addr)) {
clc5q's avatar
clc5q committed
		flags_t InstrFlags = SMP_getFlags(addr);
		if (SMP_isHead(InstrFlags) && SMP_isCode(InstrFlags)) {
			STARS_Function_t *CurrFunc = SMP_get_func(addr);
			if (nullptr == CurrFunc) {  // Code not in a func; orphaned
				pair<set<STARS_ea_t>::iterator, bool> pairib;
				pairib = CodeOrphans.insert(addr);
				if (DebugAddr == addr) {
					SMP_msg("DEBUG: Orphaned code addr %lx found.\n", (unsigned long) addr);
				}
				if ((!FirstRun) && (pairib.second)) {
					SMP_msg("SERIOUS WARNING: Newly orphaned code at %lx \n", (unsigned long) addr);
	} // end for (STARS_ea_t addr = CurrSeg->startEA; ...)
} // end of Debug_FindOrphanedCode()

// Audit the IDA database with respect to branches and calls. They should
//  each have valid code targets (not data or unknown bytes) and the code
//  cross references should reflect the linkage.
void AuditCodeTargets(void) {
	SMP_AuditCodeTargets();
} // end of AuditCodeTargets()


void SpecialDebugOutput(void) {
	char disasm[MAXSTR];
	ProblemAddrs.push_back(0x8066d08);
	bool IDAsuccess;
	int InstLen;

	for (size_t index = 0; index < ProblemAddrs.size(); ++index) {
		STARS_ea_t addr = ProblemAddrs[index];
clc5q's avatar
clc5q committed
		flags_t InstrFlags = SMP_getFlags(addr);
		if (SMP_isCode(InstrFlags) && SMP_isHead(InstrFlags)) {
			SMPInstr TempInst(addr);
			IDAsuccess = TempInst.FillCmd(); // Emits ERROR messages if there are failures
		}
	}
	return;
} // end of SpecialDebugOutput()