Skip to content
Snippets Groups Projects
SMPStaticAnalyzer.cpp 53.5 KiB
Newer Older
		if (DebugAddress) SMP_msg(" new DisasmAddr: %lx\n", (unsigned long) DisasmAddr);
	if (DebugAddress) SMP_msg(" loop exit CannotConvert: %d\n", CannotConvert);
	if (!CannotConvert) {
		// Success.
		DisasmAddr = DisasmLocs[ShadowDisasmIndex];
		AreaSize = NextIDAAddr - DisasmAddr;
		if (DebugAddress) { 
			SMP_msg(" Success! AreaSize: %x Old index: %zu new index: %zu\n",
			SMP_msg(" exiting FindDataToConvert()\n");
			SMP_msg("\n");
		}
	} // end if (!CannotConvert)
	return CannotConvert;
} // end of FindDataToConvert()

// Does a converted code region look like a function prologue? If so,
//  we should not include it in the previous function.
bool IsFunctionPrologue(STARS_ea_t StartAddr, STARS_ea_t EndAddr) {
	return false;  // **!!** TODO 
} // end of IsFunctionPrologue()

// Patch program bytes that could not be converted from
//  data to code, if it can be determined that the bytes represent code
//  that IDA has a hard time with.
// Currently limited to finding "call near ptr 0" instructions, which
//  often are found in optimized glibc code because gcc was able to
//  determine that a function pointer was zero and did constant propagation,
//  but unfortunately was not able to determine that the code was unreachable.
//  IDA will not succeed in ua_code() for "call 0", but there is no danger
//  of a working program ever executing this code. Replacing the call with
//  no-ops permits us to continue converting a contiguous range of data to
//  code, and permits IDA to reanalyze the function later.
// Returns true if program bytes were patched.
bool MDPatchUnconvertedBytes(STARS_ea_t CurrDisasmAddr) {
	flags_t AddrFlags = getFlags(CurrDisasmAddr);
	if (isData(AddrFlags) || isTail(AddrFlags)) {
		// Bytes should have been converted to unknown already.
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
		SMP_msg("Cannot patch data bytes or tail bytes at %x\n", CurrDisasmAddr);
clc5q's avatar
clc5q committed
#endif
		return false;
	}
	SMPInstr PatchInstr(CurrDisasmAddr);
	PatchInstr.Analyze();
	int InstrLen = PatchInstr.GetSize();
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
		SMP_msg("decode_insn() failed on patch location %x\n", CurrDisasmAddr);
clc5q's avatar
clc5q committed
#endif
		if (PatchInstr.GetIDAOpcode() != NN_call) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
			SMP_msg("Cannot patch non-call instruction at %x\n", CurrDisasmAddr);
clc5q's avatar
clc5q committed
#endif
		STARSOpndTypePtr CallDest = PatchInstr.GetFirstUse()->GetOp();
		if ((! CallDest->IsNearPointer()) || (0 != CallDest->GetAddr())) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
			SMP_msg("Cannot patch call unless it is call near ptr 0 at %x",
clc5q's avatar
clc5q committed
#endif
		STARS_ea_t PatchAddr = CurrDisasmAddr;
		for (int i = 0; i < InstrLen; ++i) {
			bool ok = patch_byte(PatchAddr, 0x90);  // x86 no-op
			if (!ok) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
				SMP_msg("patch_byte() failed at %x\n", PatchAddr);
clc5q's avatar
clc5q committed
#endif
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
		SMP_msg("Patched %d bytes successfully at %x\n", InstrLen, CurrDisasmAddr);
clc5q's avatar
clc5q committed
#endif
#if IDA_SDK_VERSION < 600
		InstrLen = ua_code(CurrDisasmAddr);
#else
		InstrLen = create_insn(CurrDisasmAddr);
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
			SMP_msg(" ... but ua_code() still failed!\n");
clc5q's avatar
clc5q committed
#endif
			return false;
		}
	} // end if (0 >= InstrLen) ... else ...
	return true;
} // end of MDPatchUnconvertedBytes()

// Use the lists of code addresses identified by IDA Pro (in IDAProLocs)
//  and an external disassembler (in DisasmLocs). Compare the lists and
//  try to convert addresses to code that are found in DisasmLocs but
//  not in IDAProLocs. Emit warnings when IDAProLocs has a code address
//  not found in DisasmLocs.
void FixCodeIdentification(void) {
	size_t DisasmIndex = 0;
	STARS_ea_t CurrDisasmAddr = DisasmLocs[DisasmIndex++];
	STARS_ea_t CurrAddr = IDAProLocs[IDAProIndex++];

	while (DisasmIndex <= DisasmLocs.size()) {
		// If the current address is less than the current
		//  external disasm address, we have the rare case in
		//  which IDA Pro has identified an address as code
		//  but the external disasm has not. Emit a warning
		//  message and go on to the next IDA address.
		if (CurrAddr < CurrDisasmAddr) {
			SMPInstr TempInstr(CurrAddr);
			TempInstr.Analyze();
			SMP_msg("AUDIT: Address %lx is code in IDB but not in external disassembler: %s\n",
				(unsigned long) CurrAddr, TempInstr.GetDisasm());
			if (IDAProIndex < IDAProLocs.size())
				CurrAddr = IDAProLocs[IDAProIndex++];
			else {
				// Last IDA addr; might still process Disasm addrs
				//  after loop exit.
				break;
			}
		}
		else if (CurrAddr == CurrDisasmAddr) {
			// If equal, no problem, we are moving through the
			//  code addresses in lockstep. Grab the next address
			//  from each source.
			if (DisasmIndex < DisasmLocs.size()) {
				CurrDisasmAddr = DisasmLocs[DisasmIndex++];
			}
			else {
				++DisasmIndex;  // cause loop exit; skip cleanup loop
			}
			if (IDAProIndex < IDAProLocs.size())
				CurrAddr = IDAProLocs[IDAProIndex++];
			else {
				// Last IDA addr; might still process Disasm addrs
				//  after loop exit in cleanup loop.
				break;
			}
		}
		else {
			// We must have CurrAddr > CurrDisasmAddr. That means
			//  IDA has jumped over some code addresses in
			//  DisasmLocs. We need to try to convert addresses
			//  to code until we can reach the current addr.
			// For now, we will address only the case in which IDA
			//  has identified addresses as data bytes, and the
			//  external disassembler(e.g. objdump) has identified
			//  the same addresses as code. We only want to deal with
			//  contiguous areas of data-to-code conversion that do NOT
			//  follow a return statement.
			int AreaSize = 0;
			STARS_ea_t AreaStart = CurrDisasmAddr;
			STARS_ea_t AreaEnd;
			SMP_msg("CurrDisasmAddr: %x  CurrAddr: %x\n", CurrDisasmAddr, CurrAddr);
#endif
			bool SkipArea = FindDataToConvert(IDAProIndex, DisasmIndex, AreaSize);
			if (SkipArea) {
				// Skip over the extra external disasm addresses.
				while (CurrDisasmAddr < CurrAddr)
					CurrDisasmAddr = DisasmLocs[DisasmIndex++];
			}
			else { 
				// Convert the overlooked code region to unexplored.
				AreaEnd = CurrDisasmAddr + AreaSize;
#if SMP_DEBUG_FIXUP_IDB
				SMP_msg("Found data to convert: %x to %x\n", AreaStart, AreaEnd);
#endif
				do_unknown_range(AreaStart, AreaSize, DOUNK_SIMPLE);
				SMP_bounds_t ConvertRegion;
				ConvertRegion.startEA = AreaStart;
				ConvertRegion.endEA = AreaEnd;
				FixupRegion CurrRegion(ConvertRegion);
				CodeReanalyzeList.push_back(CurrRegion);
				do {
					flags_t InstrFlags = getFlags(CurrDisasmAddr);
					if (!isUnknown(InstrFlags)) {
						SMP_msg("Sync problem in FixCodeID: %lx\n", (unsigned long) CurrDisasmAddr);
#if IDA_SDK_VERSION < 600
						int InstrLen = ua_code(CurrDisasmAddr);
						int InstrLen = create_insn(CurrDisasmAddr);
						if (InstrLen > 0) { // Successfully converted to code
							SMPInstr NewInstr(CurrDisasmAddr);
							NewInstr.Analyze();
							SMP_msg("FixCodeID success at %x: len: %d %s\n", CurrDisasmAddr,
#endif
						}
						else {
							if (MDPatchUnconvertedBytes(CurrDisasmAddr)) {
clc5q's avatar
clc5q committed
								;
#if SMP_DEBUG_FIXUP_IDB
								SMP_msg(" Patched bytes at %x\n", CurrDisasmAddr);
clc5q's avatar
clc5q committed
#endif
								CurrRegion.FixupInstrs.push_back(CurrDisasmAddr);
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
								SMP_msg("FixCodeID failure at %x\n", CurrDisasmAddr);
clc5q's avatar
clc5q committed
#endif
							}
						}
					} // end if (isCode(InstrFlags) ... else ...
					if (DisasmIndex < DisasmLocs.size()) {
						CurrDisasmAddr = DisasmLocs[DisasmIndex++];
					}
					else {
						// cause loops to exit
						CurrDisasmAddr = CurrAddr;
						++DisasmIndex; // skip cleanup loop
					}
				} while (CurrDisasmAddr < CurrAddr);
				if (AllConverted && AllNops) {
					// We want to convert the region back to unexplored bytes
					//  and take it off the work list. Regions that are all nops
					//  create data flow analysis problems sometimes. The region
					//  is often unreachable code and produces a basic block with
					//  no predecessors within a function. This often happens when
					//  an optimizing compiler uses nops as padding to align jump
					//  targets on cache line bounaries. With no fall through into
					//  the nops, they are unreachable and should be left as unknown.
					SMP_msg("FixCodeID nops region from %x to %x\n", CurrRegion.GetStart(),
					do_unknown_range(CurrRegion.GetStart(),
						CurrRegion.GetEnd() - CurrRegion.GetStart(), DOUNK_SIMPLE);
					CodeReanalyzeList.pop_back();
				}
			} // end if (SkipArea) ... else ...
		} // end if (addr < CurrDisasmAddr) .. else if ... else ...
	} // end while (DisasmIndex <= DisasmLocs.size()

#if 0  // Make this code use FindDataToConvert()  **!!**
	// Cleanup loop:
	// If there are still Disasm addrs to process, try to turn them
	//  into code in the IDB.
	while (DisasmIndex <= DisasmLocs.size()) {
		flags_t InstrFlags = getFlags(CurrDisasmAddr);
		if (isCode(InstrFlags)) {
			SMP_msg("Sync problem in FixCodeID: %x\n", CurrDisasmAddr);
			segment_t *seg = SMP_getseg(CurrDisasmAddr);
			if (SEG_CODE == seg->type) {
				do_unknown_range(CurrDisasmAddr, seg->endEA - CurrDisasmAddr, DOUNK_SIMPLE);
			}
			else {
				// Might be safest to just discontinue processing
				//  if we wander into a non-code segment.
				//  DisasmLocs should not have an entire code segment
				//  that IDA Pro missed.
				break;
			}
			int InstrLen = ua_code(CurrDisasmAddr);
			if (InstrLen > 0) { // Successfully converted to code
				SMPInstr NewInstr(CurrDisasmAddr);
				NewInstr.Analyze();
				SMP_msg("FixCodeID success at %x: %s\n", CurrDisasmAddr,
				SMP_msg("FixCodeID failure at %x\n", CurrDisasmAddr);
			}
		} // end if (isCode(InstrFlags) ... else ...
		if (DisasmIndex < DisasmLocs.size()) {
			CurrDisasmAddr = DisasmLocs[DisasmIndex++];
		}
		else {
			++DisasmIndex; // cause loop to exit
		}
	} // end while (DisasmIndex <= DisasmLocs.size()
#endif

	return;
} // end of FixCodeIdentification()
// Analyze instructions that could not be analyzed earlier and were placed on the CodeReanalyzeList.
//  Earlier failures are usually because the instruction branches to an address that has not
//  yet been converted from data to code, so ua_code() failed. Now that all data to code
//  conversions have completed, ua_code() should succeed.
// Return the number of instructions successfully analyzed.
int FixupNewCodeChunks(void) {
	list<FixupRegion>::iterator CurrRegion;
	int changes = 0;
	for (CurrRegion = CodeReanalyzeList.begin(); CurrRegion != CodeReanalyzeList.end(); ++CurrRegion) {
		bool AllConverted = true;
		bool AllNops = true;
		bool NoFixups = (0 == CurrRegion->FixupInstrs.size());
		if (NoFixups) {
			CurrRegion->SetStart(BADADDR);  // mark for removal
			continue;  // skip to next region
		}
		list<STARS_ea_t>::iterator CurrInstr;
		for (CurrInstr = CurrRegion->FixupInstrs.begin(); CurrInstr != CurrRegion->FixupInstrs.end(); ++CurrInstr)  {
#if IDA_SDK_VERSION < 600
			int InstrLen = ua_code(*CurrInstr);
#else
			int InstrLen = create_insn(*CurrInstr);
			if (InstrLen > 0) { // Successfully converted to code
				SMPInstr NewInstr(*CurrInstr);
				NewInstr.Analyze();
#if SMP_DEBUG_FIXUP_IDB
				SMP_msg("FixupNewCodeChunks success at %x: len: %d\n", *CurrInstr, InstrLen);
					AllNops = false;
					*CurrInstr = BADADDR; // mark for removal
				}
			}
			else {
				AllConverted = false;
#if SMP_DEBUG_FIXUP_IDB
				SMP_msg("FixupNewCodeChunks failure at %x\n", *CurrInstr);
#endif
			}
		} // end for all instrs in CurrRegion
		if (AllConverted && !AllNops) {
			SMP_msg("FixupNewCodeChunks success for region from %x to %x\n",
				CurrRegion->GetStart(), CurrRegion->GetEnd());
#endif
			CurrRegion->SetStart(BADADDR); // mark for removal
		}
		else if (AllConverted && AllNops) {
#if SMP_DEBUG_FIXUP_IDB
			SMP_msg("FixupNewCodeChunks re-converting nops region from %x to %x\n",
				CurrRegion->GetStart(), CurrRegion->GetEnd());
#endif
			do_unknown_range(CurrRegion->GetStart(),
				CurrRegion->GetEnd() - CurrRegion->GetStart(), DOUNK_SIMPLE);
			CurrRegion->SetStart(BADADDR); // mark for removal
		}
		else {
			// Remove only the instructions that were fixed up.
			CurrInstr = CurrRegion->FixupInstrs.begin(); 
			while (CurrInstr != CurrRegion->FixupInstrs.end()) {
				if (BADADDR == *CurrInstr) {
					CurrInstr = CurrRegion->FixupInstrs.erase(CurrInstr);
				}
				else {
					++CurrInstr;
				}
			}
		}
	} // end for all regions in the CodeReanalyzeList

	// Remove completed regions from the CodeReanalyzeList
	CurrRegion = CodeReanalyzeList.begin();
	while (CurrRegion != CodeReanalyzeList.end()) {
		if (BADADDR == CurrRegion->GetStart())
			CurrRegion = CodeReanalyzeList.erase(CurrRegion);
		else
			++CurrRegion;
	}

#if 0
	if (AllConverted) {
					if (IsFunctionPrologue(AreaStart, AreaEnd)) {
						// Create a new function entry chunk here.
						//  **!!** TODO
						;
					}
					else {
						// Extend the previous chunk to include the
						//  converted code.
						STARS_ea_t PrevIDAAddr = IDAProLocs[IDAProIndex - 2];
						STARS_Function_t *PrevChunk = get_fchunk(PrevIDAAddr);
						SMP_msg(" addr in chunk to extend: %x\n", PrevIDAAddr);
						SMP_msg(" STARS_Function_t pointer for chunk: %x\n", PrevChunk);
#endif
#if 0  // temporary for debugging
						if (is_func_entry(PrevChunk)) {
							// Extend the func entry to contain the new code.
							if (func_setend(PrevIDAAddr, AreaEnd)) {
								SMP_msg("Func extended to include code from %x to %x\n",
									AreaStart, AreaEnd);
								FuncReanalyzeList.push_back(PrevIDAAddr);
							}
							else {
								SMP_msg("Failed to extend func from %x to %x\n",
									AreaStart, AreaEnd);
							}
						}
						else { // tail
							// See if this works for function tails, also.
							// Extend the func entry to contain the new code.
							if (func_setend(PrevIDAAddr, AreaEnd)) {
								SMP_msg("Tail extended to include code from %x to %x\n",
								STARS_Function_t *TailOwner = get_func(PrevChunk->owner);
								FuncReanalyzeList.push_back(PrevIDAAddr);
							}
							else {
								SMP_msg("Failed to extend tail from %x to %x\n",
									AreaStart, AreaEnd);
							}
						} // end if (is_func_entry()) ... else ...
#endif
					} // end if (IsFunctionPrologue()) ... else ...
				} // end if (AllConverted)
				else {
					SMP_msg("not AllConverted; cannot include new code in previous chunk.\n");
				}
#endif

	return changes;
} // end of FixupNewCodeChunnks()

// Audit the IDA code database by looking at all instructions in the
//  code segment and printing all those that are not contained in a
//  function. Emit the context-free annotations that we are able to
//  emit on a per-instruction basis.
jdh8d's avatar
jdh8d committed
void FindOrphanedCode(STARS_Segment_t *CurrSeg, FILE *AnnotFile, FILE *InfoAnnotFile) {
	char disasm[MAXSTR];
	for (STARS_ea_t addr = CurrSeg->get_startEA(); addr < CurrSeg->get_endEA();
		addr = SMP_get_item_end(addr)) {
		flags_t InstrFlags = getFlags(addr);
		if (isHead(InstrFlags) && isCode(InstrFlags)) {
			if (!(CurrProg->IsInstAddrStillInFunction(addr, FirstFuncAddr))) {
				SMPInstr CurrInst(addr);
				CurrInst.Analyze();
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
				SMP_msg("Orphan code at %x : %s\n", addr, CurrInst.GetDisasm());
clc5q's avatar
clc5q committed
#endif
				// TODO: If there are code xrefs to the orphan code,
				//  see what kind. If a CALL, and orphan code looks
				//  like a prologue, make a function. If a JUMP of
				//  some kind, then make a function chunk and make
				//  it a tail of all functions that jump to it. **!!**

				// Do machine-dependent fixes for DEF and USE lists.
				//  The fixes can help produce better annotations.
				CurrInst.MDFixupDefUseLists();

				// If instruction is still not included in a code chunk,
				//  emit annotations for it in isolation.
				if (CurrInst.IsAnalyzeable()) {
					CurrInst.EmitAnnotations(true, false, true, AnnotFile, InfoAnnotFile, CurrProg);

					// If instruction is an indirect branch, emit an XREF
					//  annotation for each of its targets.
					SMPitype CurrDataFlow = CurrInst.GetDataFlowType();
					if ((CurrDataFlow == INDIR_JUMP) || (CurrDataFlow == INDIR_CALL)) {
						SMP_xref_t xrefs;
						for (bool ok = xrefs.SMP_first_from(addr, XREF_ALL); ok; ok = xrefs.SMP_next_from()) {
							if (xrefs.GetTo() != 0) {
								if (xrefs.GetIscode() && (xrefs.GetType() != fl_F)) {
									// Found a code target, with its address in xrefs.to
									global_STARS_program->PrintCodeToCodeXref(addr, xrefs.GetTo(), CurrInst.GetSize());
			}
		}
		else if (isUnknown(InstrFlags)) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
clc5q's avatar
clc5q committed
#endif
			// Can IDA analyze this to be code?
			int InstrLen;
#if IDA_SDK_VERSION < 600
			InstrLen = ua_code(addr);
#else
			InstrLen = create_insn(addr);
#endif
			if (InstrLen > 0) {
				bool IDAsuccess = generate_disasm_line(addr, disasm, sizeof(disasm) - 1);
				if (IDAsuccess) {
					// Remove interactive color-coding tags.
					STARS_ssize_t StringLen = tag_remove(disasm, disasm, 0);
					if (-1 >= StringLen) {
						SMP_msg("ERROR: tag_remove failed at addr %lx \n", (unsigned long) addr);
clc5q's avatar
clc5q committed
#if SMP_DEBUG_FIXUP_IDB
						SMP_msg("Successfully analyzed!  %s\n", disasm);
clc5q's avatar
clc5q committed
#endif
						SMPInstr UnknownInstr(addr);
						UnknownInstr.Analyze();
						// TODO: Get new code into a chunk.  **!!**
						// If instruction is still not included in a code chunk,
						//  emit annotations for it in isolation.
						UnknownInstr.EmitAnnotations(true, false, true, AnnotFile, InfoAnnotFile, CurrProg);
					SMP_msg("ERROR: generate_disasm_line failed at addr %lx \n", (unsigned long) addr);
			}
		}
	} // end for (ea_t addr = CurrSeg->startEA; ...)
} // end of FindOrphanedCode()

// Version of FindOrphanedCode that does not emit annotations but can be used
//  to determine at what point in time code becomes orphaned.
jdh8d's avatar
jdh8d committed
void Debug_FindOrphanedCode(STARS_Segment_t *CurrSeg, bool FirstRun) {
	STARS_ea_t DebugAddr = 0x8050db0;
	for (STARS_ea_t addr = CurrSeg->get_startEA(); addr < CurrSeg->get_endEA();
		addr = SMP_get_item_end(addr)) {

		flags_t InstrFlags = getFlags(addr);
		if (isHead(InstrFlags) && isCode(InstrFlags)) {
			STARS_Function_t *CurrFunc = SMP_get_func(addr);
			if (NULL == CurrFunc) {  // Code not in a func; orphaned
				pair<set<STARS_ea_t>::iterator, bool> pairib;
				pairib = CodeOrphans.insert(addr);
				if (DebugAddr == addr) {
					SMP_msg("DEBUG: Orphaned code addr %lx found.\n", (unsigned long) addr);
				}
				if ((!FirstRun) && (pairib.second)) {
					SMP_msg("SERIOUS WARNING: Newly orphaned code at %lx \n", (unsigned long) addr);
	} // end for (STARS_ea_t addr = CurrSeg->startEA; ...)
} // end of Debug_FindOrphanedCode()

// Audit the IDA database with respect to branches and calls. They should
//  each have valid code targets (not data or unknown bytes) and the code
//  cross references should reflect the linkage.
void AuditCodeTargets(void) {
	SMP_AuditCodeTargets();
} // end of AuditCodeTargets()


void SpecialDebugOutput(void) {
	char disasm[MAXSTR];
	ProblemAddrs.push_back(0x8066d08);
	bool IDAsuccess;
	int InstLen;

	for (size_t index = 0; index < ProblemAddrs.size(); ++index) {
		STARS_ea_t addr = ProblemAddrs[index];
		flags_t InstrFlags = getFlags(addr);
		if (isCode(InstrFlags) && isHead(InstrFlags)) {
			IDAsuccess = SMPGetCmd(addr, LocalCmd, LocalFeatures);
			InstLen = (int) LocalCmd.size;

			if ((IDAsuccess) && (0 < InstLen)) {
				IDAsuccess = generate_disasm_line(addr, disasm, sizeof(disasm) - 1);
				if (IDAsuccess) {
					StringLen = tag_remove(disasm, disasm, 0);
					if (-1 < StringLen)
						SMP_msg("Problem addr %lx : %s\n", (unsigned long) addr, disasm);
						SMP_msg("ERROR: tag_remove failed at addr %lx \n", (unsigned long) addr);
					SMP_msg("ERROR: generate_disasm_line failed at addr %lx \n", (unsigned long) addr);
				SMP_msg("ERROR: decode_insn failed at addr %lx \n", (unsigned long) addr);
		}
	}
	return;
} // end of SpecialDebugOutput()