From 9595c27848fa3aa43378f1ba8f2906cf0d38b042 Mon Sep 17 00:00:00 2001
From: clc5q <clc5q@git.zephyr-software.com>
Date: Thu, 13 Dec 2007 19:45:43 +0000
Subject: [PATCH] Added lots of code to improve IDA code identification using
 code addresses detected by objdump.

---
 SMPStaticAnalyzer.cpp | 869 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 860 insertions(+), 9 deletions(-)

diff --git a/SMPStaticAnalyzer.cpp b/SMPStaticAnalyzer.cpp
index e1f63231..0be81e5c 100644
--- a/SMPStaticAnalyzer.cpp
+++ b/SMPStaticAnalyzer.cpp
@@ -5,6 +5,11 @@
 //   (Software Memory Protection).
 //
 
+#include <list>
+#include <vector>
+
+#include <string>
+
 #include <ida.hpp>
 #include <idp.hpp>
 #include <allins.hpp>
@@ -14,6 +19,7 @@
 #include <intel.hpp>
 #include <loader.hpp>
 #include <lines.hpp>
+#include <nalt.hpp>
 #include <name.hpp>
 #include <ua.hpp>
 
@@ -23,11 +29,13 @@
 
 // Set to 1 for debugging output
 #define SMP_DEBUG 1
-#define SMP_DEBUG2 1   // verbose
+#define SMP_DEBUG2 0   // verbose
 #define SMP_DEBUG3 0   // verbose
 #define SMP_DEBUG_MEM 0 // print memory operands
 #define SMP_DEBUG_TYPE0 0 // Output instr info for OptType = 0
 #define SMP_DEBUG_ORPHANS 1  // find code outside of functions
+#define SMP_DEBUG_CHUNKS 0 // restructuring tail chunks, shared chunks, etc.
+#define SMP_DEBUG_DATA_ONLY 0  // Find & fix data addresses in code segments
 
 // Set to 1 when doing a binary search using SMP_DEBUG_COUNT to find
 //  which function is causing a problem.
@@ -35,6 +43,8 @@
 #define SMP_DEBUG_COUNT 356  // How many funcs to process in problem search
 int FuncsProcessed = 0;
 
+#define SMP_FIXUP_IDB 1  // Try to fix the IDA database?
+#define SMP_DEBUG_FIXUP_IDB 0  // debugging output for FixupIDB chain
 
 // Define optimization categories for instructions.
 int OptCategory[NN_last+1];
@@ -61,11 +71,46 @@ static char *DataTypes[] = { "VOID", "NUMHEX", "NUMDEC", "CHAR",
 		"STRUCTOFFSET", "STACKVAR", "NUMFLOAT", "UNKNOWN", 
 		"UNKNOWN", "UNKNOWN", 0};
 
+// Filename (not including path) of executable being analyzed.
+static char RootFileName[MAXSTR];
+
+// Code addresses identified by a disassembler, such as objdump on
+//  Linux. These can be used to improve the code vs. data identification
+//  of IDA Pro.
+vector<ea_t> DisasmLocs;
+// Code addresses as identified by IDA Pro, to be compared to DisasmLocs.
+vector<ea_t> IDAProLocs;
+
+// Function start and end addresses (for function entry chunks only).
+//  Kept here because IDA Pro 5.1 seems to have a memory overwriting
+//  problem when iterating through all functions in the program. An existing
+//  func_t *ChunkInfo data structure was getting overwritten by one of the 
+//  function func_t data structures, causing changes of startEA and endEA among
+//  other things.
+struct SMP_bounds_t {
+	ea_t startEA;
+	ea_t endEA;
+};
+
+vector<SMP_bounds_t> FuncBounds;
+
+// List of functions that need to be reanalyzed after all the code fixup
+//  and code discovery is complete. Kept as a list of addresses; any address
+//  within the function is good enough to designate it.
+list<ea_t> ReanalyzeList;
+
 void IDAP_run(int);
+
+// Functions for diagnosing and/or fixing problems in the IDA database.
+void FixupIDB(void);  // Driver for all other fixing functions.
+void FindDataInCode(void);
+void AuditTailChunkOwnership(void);
 void FindOrphanedCode(segment_t *, FILE *);
+void FixCodeIdentification(void);
 void AuditCodeTargets(void);
 ea_t FindNewFuncLimit(ea_t);
 void SpecialDebugOutput(void);
+void RemoveIDACodeAddr(ea_t);
 
 static int idaapi idp_callback(void *, int event_id, va_list va) {
 	if (event_id == ph.auto_empty_finally) {   // IDA analysis is done
@@ -105,34 +150,61 @@ void IDAP_term(void) {
 }
 
 void IDAP_run(int arg) {
-
 	segment_t *seg;
 	char buf[MAXSTR];
 	ea_t ea;
 	flags_t ObjFlags;
 	bool ReadOnlyFlag;
 	FILE *SymsFile;
-	char FuncName[MAXSTR];			
 	SMPFunction *CurrFunc = NULL;
 	bool FuncsDumped = false;
 
+#if SMP_DEBUG2
+	char FuncName[MAXSTR];			
+#endif
+
 #if SMP_DEBUG
 	msg("Beginning IDAP_run.\n");
 #endif
 	// Open the output file.
-	SymsFile = qfopen("SMP.annot", "w");
+	ssize_t FileLen;
+	FileLen = get_root_filename(RootFileName, sizeof(RootFileName) - 1);
+	string SymsFileName(RootFileName);
+	string FileSuffix(".annot");
+	SymsFileName += FileSuffix;
+	SymsFile = qfopen(SymsFileName.c_str(), "w");
 	if (NULL == SymsFile) {
-		error("FATAL: Cannot open output file SMP.annot\n");
+		error("FATAL: Cannot open output file %s\n", SymsFileName.c_str());
 		return;
 	}
 
 	(void) memset(OptCount, 0, sizeof(OptCount));
 	(void) memset(AnnotationCount, 0, sizeof(AnnotationCount));
 
-	// Pre-audit the IDA database by seeing if all branches and calls
-	//  have proper code targets and code cross references.
-	SpecialDebugOutput();
-	AuditCodeTargets();
+	// Record the start and end addresses for all function entry
+	//  chunks in the program.
+	FuncBounds.reserve(10 + get_func_qty());
+	for (size_t FuncIndex = 0; FuncIndex < get_func_qty(); ++FuncIndex) {
+		func_t *FuncInfo = getn_func(FuncIndex);
+		SMP_bounds_t temp;
+		temp.startEA = FuncInfo->startEA;
+		temp.endEA = FuncInfo->endEA;
+		FuncBounds.push_back(temp);
+	}
+
+#if SMP_DEBUG_DATA_ONLY
+	FindDataInCode();
+	FixCodeIdentification();
+	qfclose(SymsFile);
+	return;
+#endif
+
+	// Pre-audit the IDA database by seeing if the distinction
+	//  between code and data can be improved, and if all branches
+	//  and calls have proper code targets and code cross references.
+#if SMP_FIXUP_IDB
+	FixupIDB();
+#endif
 
 	// First, examine the data segments and print info about static
 	//   data, such as name/address/size. Do the same for functions in
@@ -291,9 +363,11 @@ void IDAP_run(int arg) {
 				delete CurrFunc;
 				CurrFunc = NULL;
 			} // end for (size_t FuncIndex = 0; ...) 
+
 #if SMP_DEBUG_ORPHANS
 			FindOrphanedCode(seg, SymsFile);
 #endif
+
 		} // end else if (seg->type === SEG_CODE)
 		else {
 #if SMP_DEBUG
@@ -329,6 +403,779 @@ plugin_t PLUGIN = {
 	IDAP_hotkey
 };
 
+// Find all code addresses in the IDA database and enter them into
+//  IDAProLocs. Find all code addresses identified by the external
+//  disassembler (e.g. objdump) and enter them into DisasmLocs.
+void FindCodeAddresses(void) {
+	// Read in code addresses as found by an external disassembler.
+	ea_t CurrDisasmAddr;
+	string DisasmFileName(RootFileName);
+	string FileSuffix(".SMPobjdump");
+	DisasmFileName += FileSuffix;
+	FILE *DisasmFile = qfopen(DisasmFileName.c_str(), "r");
+	if (NULL == DisasmFile) {
+		error("FATAL: Cannot open input file %s\n", DisasmFileName.c_str());
+		return;
+	}
+
+#define DISASM_RESERVE_SIZE  50000
+	DisasmLocs.reserve(DISASM_RESERVE_SIZE);
+	int ScanReturn = qfscanf(DisasmFile, "%x", &CurrDisasmAddr);
+	while (1 == ScanReturn) {
+		int NextChar;
+		DisasmLocs.push_back(CurrDisasmAddr);
+		// Swallow the rest of the input line and get the next address.
+		do {
+			NextChar = qfgetc(DisasmFile);
+		} while ((EOF != NextChar) && ('\n' != NextChar));
+		ScanReturn = qfscanf(DisasmFile, "%x", &CurrDisasmAddr);
+	} // end while (1 == ScanReturn)
+	if (0 >= DisasmLocs.size()) {
+		msg("ERROR: No addresses read from %s\n", DisasmFileName.c_str());
+		qfclose(DisasmFile);
+		return;
+	}
+	else {
+		msg("%d Disasm addresses read from %s\n", DisasmLocs.size(),
+			DisasmFileName.c_str());
+		qfclose(DisasmFile);
+	}
+
+	// Find all the code locs in the IDA Pro database. As we find
+	//  them, store them in IDAProLocs.
+	for (int SegIndex = 0; SegIndex < get_segm_qty(); ++SegIndex) {
+		segment_t *seg = getnseg(SegIndex);
+		if (SEG_CODE != seg->type)
+			continue;
+
+		for (ea_t addr = seg->startEA; addr < seg->endEA; addr = get_item_end(addr)) {
+			flags_t InstrFlags = getFlags(addr);
+			if (isHead(InstrFlags) && isCode(InstrFlags)) {
+				IDAProLocs.push_back(addr);
+				if ((0x806cda4 <= addr) && (0x806cf99 >= addr))
+					msg("IDA code addr: %x\n", addr);
+			} // end if (isHead(addr) && isCode(addr)
+#if SMP_DEBUG_FIXUP_IDB
+			else if ((0x806cda4 <= addr) && (0x806cf99 >= addr)) {
+				if (!isHead(InstrFlags))
+					msg("Weirdness: not isHead at %x\n", addr);
+				if (isUnknown(InstrFlags)) {
+					msg("Weirdness: isUnknown at %x\n", addr);
+				}
+			}
+#endif
+		} // end for (ea_t addr = seg->startEA; ...)
+	} // end for (int SegIndex = 0; ...)
+	return;
+} // end FindCodeAddresses()
+
+// Return true if addr is not a proper beginning address for an instruction.
+// Return false otherwise.
+// Currently, we claim that an instruction is misaligned if DisasmLocs does
+//  not contain it. This function is useful for dealing with errors in IDA
+//  code identification, in which a large code section is identified as data,
+//  but some instructions in the middle of the "data" are identified as
+//  code but IDA often starts on the wrong boundary in these cases.
+bool IsCodeMisaligned(ea_t addr) {
+	// Do a binary search for addr within DisasmLocs, which is sorted
+	//  in ascending address order because of the way in which it was
+	//  generated.
+	size_t min = 0;
+	size_t max = DisasmLocs.size();  // don't access DisasmLocs[max]
+	size_t index = (min + max) / 2;
+	
+	while (addr != DisasmLocs[index]) {
+		if (min >= (max - 1))
+			return true;
+#if 0
+		msg("min: %d max: %d index: %d\n", min, max, index);
+#endif
+		if (addr < DisasmLocs[index])
+			max = index;
+		else // must be addr > DisasmLocs[index];
+			min = index;
+	
+		index = (min + max) / 2;
+	}
+
+	return false;
+} // end of IsCodeMisaligned()
+
+void RemoveIDACodeAddr(ea_t addr) {
+	// Do a binary search for addr within IDAProLocs, which is sorted
+	//  in ascending address order because of the way in which it was
+	//  generated. Delete the element of IDAProLocs if found.
+	size_t min = 0;
+	size_t max = IDAProLocs.size();  // don't access IDAProLocs[max]
+	size_t index = (min + max) / 2;
+	
+	while (addr != IDAProLocs[index]) {
+		if (min >= (max - 1))
+			return;
+#if 0
+		msg("min: %d max: %d index: %d\n", min, max, index);
+#endif
+		if (addr < IDAProLocs[index])
+			max = index;
+		else // must be addr > IDAProLocs[index];
+			min = index;
+	
+		index = (min + max) / 2;
+	}
+
+	// IDAProLocs[index] contains addr.
+	vector<ea_t>::iterator RemovalIterator = IDAProLocs.begin();
+	RemovalIterator += index;
+	RemovalIterator = IDAProLocs.erase(RemovalIterator);
+	return;
+} // end of RemoveIDACodeAddr()
+
+// Driver for all other fixing functions. Upon its return, the IDA
+//  database (IDB file) should be fixed up as much as we can fix it.
+void FixupIDB(void) {
+	FindCodeAddresses();
+#if SMP_DEBUG_FIXUP_IDB
+	SpecialDebugOutput();
+#endif
+	AuditCodeTargets();
+	FindDataInCode();
+	AuditTailChunkOwnership();
+	FixCodeIdentification();
+} // end of FixupIDB()
+
+// Find and print all data head addresses in code segments. 
+// If an isolated code instruction is found in the midst of a run
+//  of data bytes and has no code xrefs jumping to it, it is not
+//  reachable as code and is undoubtedly a mixup by IDA. Possibly
+//  the whole data region will be converted to code later, in which
+//  case the isolated code is not necessarily properly aligned and
+//  parsed at its present address, so we are glad to convert it into
+//  data anyway so that FindDataToConvert() will succeed on it later.
+// Data to code conversion, and isolated code detection, are inhibited
+//  by IDA identifying several consecutive instructions in the midst
+//  of a data region, with the code addresses not agreeing with the
+//  external disassembler's code addresses. We will convert these
+//  misaligned instructions to data as we detect them. We will also
+//  convert unexplored bytes (isUnknown(flags) == true) into data if
+//  they are in the midst of a data sequence.
+#define MIN_DATARUN_LEN 24  // #bytes on either side of "isolated" code
+void FindDataInCode(void) {
+	size_t DataRunLen = 0; // How many data bytes in a row have we seen?
+	bool IsolatedCodeTrigger = false; // Have seen data, then isolated code
+									// Now looking for data
+	ea_t IsolatedCodeAddr;
+	int IsolatedCodeLen;
+	int InstrLen;
+
+	for (int SegIndex = 0; SegIndex < get_segm_qty(); ++SegIndex) {
+		char SegName[MAXSTR];
+		segment_t *seg = getnseg(SegIndex);
+		ssize_t SegNameSize = get_segm_name(seg, SegName, sizeof(SegName) - 1);
+		if (SEG_CODE != seg->type)
+			continue;
+#if SMP_DEBUG_FIXUP_IDB
+		msg("Non-code addresses for code segment %s from %x to %x\n",
+			SegName, seg->startEA, seg->endEA);
+#endif
+		for (ea_t addr = seg->startEA; addr < seg->endEA; addr = get_item_end(addr)) {
+			flags_t AddrFlags = getFlags(addr);
+			if (isHead(AddrFlags)) {
+				if (isData(AddrFlags)) {
+					DataRunLen += get_item_size(addr);
+#if SMP_DEBUG_FIXUP_IDB
+					msg("Data: %x\n", addr);
+#endif
+					if (MIN_DATARUN_LEN <= DataRunLen) {
+						if (IsolatedCodeTrigger) {
+							// Saw data, then one isolated code, then data
+							do_unknown_range(IsolatedCodeAddr, IsolatedCodeLen, DOUNK_SIMPLE);
+							RemoveIDACodeAddr(IsolatedCodeAddr);
+							if (do_data_ex(IsolatedCodeAddr, byteflag(),
+								IsolatedCodeLen, BADNODE)) {
+									msg("Converted isolated code to data: %x\n",
+										IsolatedCodeAddr);
+							}
+							else {
+								msg("Failed to convert isolated code to data: %x len: %x\n",
+									IsolatedCodeAddr, IsolatedCodeLen);
+							}
+							IsolatedCodeTrigger = false;
+						} // end if (IsolatedCodeTrigger)
+					} // end if (MIN_DATARUN_LEN <= DataRunLen)
+				} // end if (isData(AddrFlags)
+				else if (isUnknown(AddrFlags)) {
+					// Just in case; unknown usually means not head or tail
+					// If in a data run, convert to data.
+					InstrLen = get_item_size(addr);
+					msg("Unknown: %x len: %x\n", addr, InstrLen);
+					if (0 < DataRunLen) {
+						if (do_data_ex(addr, byteflag(), InstrLen, BADNODE)) {
+							msg("Converted unknown to data at %x len: %x\n", addr, InstrLen);
+							DataRunLen += InstrLen;
+						}
+						else {
+							msg("Failed to convert unknown to data at %x len: %x\n", addr, InstrLen);
+							DataRunLen = 0;
+							IsolatedCodeTrigger = false;
+						}
+					}
+				}
+				else if (isCode(AddrFlags)) {  // must be true
+					if (MIN_DATARUN_LEN <= DataRunLen) {
+						msg("DataRunLen: %d at %x\n", DataRunLen, addr);
+						InstrLen = ua_ana0(addr);
+#if SMP_DEBUG_FIXUP_IDB
+						msg("Calling IsCodeMisaligned: len %d\n", InstrLen);
+#endif
+						if (IsCodeMisaligned(addr)) {
+#if SMP_DEBUG_FIXUP_IDB
+							msg("Code was misaligned.\n");
+#endif
+							do_unknown_range(addr, InstrLen, DOUNK_SIMPLE);
+							RemoveIDACodeAddr(addr);
+							if (do_data_ex(addr, byteflag(), InstrLen, BADNODE)) {
+								msg("Converted misaligned code to data at %x : len: %x\n",
+									addr, InstrLen);
+								// Step back so data gets processed.
+								DataRunLen += get_item_size(addr);
+								continue; // skip reset of DataRunLen
+							}
+							else {
+								msg("Misaligned code left as unknown at %x : len: %x\n",
+									addr, InstrLen);
+								IsolatedCodeTrigger = false;
+							}
+						} // end if (IsCodeMisaligned() ...)
+						else if (!hasRef(AddrFlags)) {
+							// No references at all --> isolated code.
+							IsolatedCodeTrigger = true;
+							IsolatedCodeAddr = addr;
+							IsolatedCodeLen = InstrLen;
+						}
+						else {
+							xrefblk_t xb;
+							bool ok = xb.first_to(IsolatedCodeAddr, XREF_ALL);
+							if (!ok) {
+								// No code xrefs to this target addr.
+								IsolatedCodeTrigger = true;
+								IsolatedCodeAddr = addr;
+								IsolatedCodeLen = InstrLen;
+							}
+						}
+					} // end if (MIN_DATARUN_LEN <= DataRunLen)
+					else if (IsolatedCodeTrigger) {
+						// Two instructions in a row does not fit the pattern.
+						IsolatedCodeTrigger = false;
+					}
+					DataRunLen = 0;
+				} // end if (isData) ... else if (isUnknown) ... else isCode
+			} // end if (isHead)
+			else if (isUnknown(AddrFlags)) {
+				// If in a data run, convert to data.
+				InstrLen = get_item_size(addr);
+				msg("Unknown: %x len: %x\n", addr, InstrLen);
+				if (0 < DataRunLen) {
+					if (do_data_ex(addr, byteflag(), InstrLen, BADNODE)) {
+						msg("Converted unknown to data at %x len: %x\n", addr, InstrLen);
+						DataRunLen += InstrLen;
+					}
+					else {
+						msg("Failed to convert unknown to data at %x len: %x\n", addr, InstrLen);
+						DataRunLen = 0;
+						IsolatedCodeTrigger = false;
+					}
+				}
+			}
+		} // end for (ea_t addr =  seg->startEA; ...)
+	} // end for (int SegIndex = 0; ...)
+	return;
+} // end of FindDataInCode()
+
+
+// The choices that IDA makes for deciding which parent function of a
+//  TAIL chunk is the primary owner of the tail can be counterintuitive.
+//  A function entry can both fall into and jump to a tail chunk that
+//  is contiguous with it, yet the "owner" might be a function that is
+//  far below it in the executable address space. This function will
+//  change the ownership to a more sensible arrangement.
+void AuditTailChunkOwnership(void) {
+	char FuncName[MAXSTR];
+	// Iterate through all chunks in the program.
+	for (size_t ChunkIndex = 0; ChunkIndex < get_fchunk_qty(); ++ChunkIndex) {
+		func_t *ChunkInfo = getn_fchunk((int) ChunkIndex);
+		if (is_func_tail(ChunkInfo)) {
+			// For each TAIL chunk, find all the parent chunks. Find the last
+			//  parent chunk with an address less than the TAIL chunk address.
+			ea_t BestCandidate = 0;
+			func_parent_iterator_t FuncParent(ChunkInfo);
+#if SMP_DEBUG_CHUNKS
+			msg("Tail chunk: %x ", ChunkInfo->startEA);
+#endif
+			for (bool ok = FuncParent.first(); ok; ok = FuncParent.next()) {
+				ea_t parent = FuncParent.parent();
+#if SMP_DEBUG_CHUNKS
+				msg(" parent: %x ", parent);
+#endif
+				if ((parent > BestCandidate) && (parent < ChunkInfo->startEA))
+					BestCandidate = parent;
+			}
+#if SMP_DEBUG_CHUNKS
+			msg("\n");
+#endif
+			//  Make the best parent chunk the owner of the TAIL chunk if it is
+			//  not already the owner.
+			if (ChunkInfo->owner != BestCandidate) {
+				if (0 < BestCandidate) {
+					if (set_tail_owner(ChunkInfo, BestCandidate)) {
+						func_t *FuncInfo = get_func(BestCandidate);
+						msg("Set %x as new owner of tail %x\n",
+							BestCandidate, ChunkInfo->startEA);
+						// Reanalyze the parent function (and all its
+						//  tail chunks) now that the structure has changed.
+						reanalyze_function(FuncInfo);
+					}
+					else {
+						msg("set_tail_owner failed for tail %x and parent %x\n",
+							ChunkInfo->startEA, BestCandidate);
+					}
+				}
+				else {
+					func_t *FuncInfo = get_func(ChunkInfo->owner);
+					get_func_name(FuncInfo->startEA, FuncName, sizeof(FuncName) - 1);
+#if SMP_DEBUG_CHUNKS
+					msg("No good parent candidate before tail at %x\n",
+						ChunkInfo->startEA);
+					msg("Current parent is %x: %s\n", FuncInfo->startEA, FuncName);
+#endif
+					// Find out if a function entry chunk that comes before the
+					//  tail is a better candidate for the owner (i.e. it falls
+					//  through to the tail, or jumps to it).
+					BestCandidate = 0;
+#if SMP_DEBUG_CHUNKS
+					msg("Finding parent func candidates for %x:", ChunkInfo->startEA);
+#endif
+					SMP_bounds_t CurrFunc;
+					for (size_t FuncIndex = 0; FuncIndex < FuncBounds.size(); ++FuncIndex) {
+						CurrFunc = FuncBounds[FuncIndex];
+						if ((CurrFunc.startEA < ChunkInfo->startEA)
+							&& (CurrFunc.startEA > BestCandidate)) {
+							BestCandidate = CurrFunc.startEA;
+#if SMP_DEBUG_CHUNKS
+							msg(" candidate: %x tail: %x", BestCandidate,
+								ChunkInfo->startEA);
+#endif
+						}
+						else {
+#if SMP_DEBUG_CHUNKS
+							msg(" not a candidate: %x tail: %x best: %x\n",
+								CurrFunc.startEA, ChunkInfo->startEA, BestCandidate);
+#endif
+							break;
+						}
+					} // end for (size_t FuncIndex = 0; ...)
+					if (0 >= BestCandidate) { // highly unlikely
+						msg("No good func entry parent candidate.\n");
+					}
+					else {
+						FuncInfo = get_func(BestCandidate);
+						get_func_name(FuncInfo->startEA, FuncName, sizeof(FuncName) - 1);
+#if SMP_DEBUG_CHUNKS
+						msg("Best func entry parent candidate: %s at %x",
+							FuncName, BestCandidate);
+						if (FuncInfo->endEA == ChunkInfo->startEA)
+							msg(" Function endEA == tail chunk startEA");
+						msg("\n");
+#endif
+					}
+				}
+			} // end if (ChunkInfo->owner != BestCandidate)
+#if SMP_DEBUG_CHUNKS
+			else {
+				msg("Already best parent for %x is %x\n", ChunkInfo->startEA,
+					ChunkInfo->owner);
+			}
+#endif
+		} // end if (is_func_tail(ChunkInfo))
+	} // end for (size_t ChunkIndex = 0; ...)
+
+	return;
+} // end of AuditTailChunkOwnership()
+
+// If the addresses signified from DisasmIndex to IDAProIndex are
+//  all considered data and do NOT follow a return instruction,
+//  return false and update AreaSize to reflect the area to be
+//  converted.
+// Return value: true -> skip to IDAProIndex; false -> convert AreaSize bytes.
+bool FindDataToConvert(size_t IDAProIndex, size_t DisasmIndex, int &AreaSize) {
+	ea_t PrevIDAAddr;
+	ea_t NextIDAAddr;
+	size_t ShadowDisasmIndex = DisasmIndex - 1;
+	ea_t DisasmAddr = DisasmLocs[ShadowDisasmIndex];
+	bool CannotConvert = false;  // return value
+	bool DebugAddress = false;
+#if SMP_DEBUG_FIXUP_IDB
+	DebugAddress = (DisasmAddr == 0x806c19a);
+#endif
+
+	if (DebugAddress) {
+		msg("IDAProIndex: %d DisasmIndex: %d\n", IDAProIndex, DisasmIndex);
+		msg("IDA locs size %d Disasm locs size %d\n", IDAProLocs.size(),
+			DisasmLocs.size());
+	}
+	if (IDAProIndex >= IDAProLocs.size()) {
+		// Have already processed the last IDA address.
+		if (DebugAddress) msg(" Already done with IDAProLocs.\n");
+		return true;
+	}
+	else if (DisasmIndex >= DisasmLocs.size()) {
+		// Strange. Last Disasm address is only one to convert, and
+		//  IDA still has addresses after that?
+		if (DebugAddress) msg(" Already done with DisasmLocs.\n");
+		return true;
+	}
+	else if (IDAProIndex < 2) {
+		// We have Disasm addrs before the very first IDA addr. We
+		//  don't trust this boundary case.
+		if (DebugAddress) msg(" Boundary case with IDAProLocs.\n");
+		return true;
+	}
+	NextIDAAddr = IDAProLocs[IDAProIndex - 1];
+	PrevIDAAddr = IDAProLocs[IDAProIndex - 2];
+	if (DebugAddress) msg(" PrevIDAAddr: %x NextIDAAddr: %x\n", PrevIDAAddr, NextIDAAddr);
+
+	// See if previous IDA address was a return.
+	flags_t PrevFlags = getFlags(PrevIDAAddr);
+	if (!isCode(PrevFlags) || !isHead(PrevFlags)) {
+		msg("PrevIDAAddr %x not isCode or not isHead.\n", PrevIDAAddr);
+		return true;
+	}
+	SMPInstr PrevInstr(PrevIDAAddr);
+	PrevInstr.Analyze();
+	if (DebugAddress) msg("Finished PrevInstr.Analyze()\n");
+	if (PrevInstr.MDIsReturnInstr()) {
+		// Right after a return come no-ops and 2-byte no-ops
+		//  that are just for alignment. IDA does not seem to be
+		//  happy when we convert all those to code.
+		if (DebugAddress) msg(" Data followed a return instruction.\n");
+		return true;
+	}
+	// Now, see if the area from DisasmAddr to NextIDAAddr is all data
+	//  according to IDA.
+	while (DisasmAddr < NextIDAAddr) {
+		flags_t DataFlags = getFlags(DisasmAddr);
+		if (isTail(DataFlags)) {
+			if (DebugAddress) msg(" tail byte: %x\n", DisasmAddr);
+			DisasmAddr = get_item_end(DisasmAddr);
+		}
+		else if (isData(DataFlags)) {
+			if (DebugAddress) msg(" data byte: %x\n", DisasmAddr);
+			DisasmAddr = get_item_end(DisasmAddr);
+		}
+		else if (isCode(DataFlags)) {
+			// How could this ever happen?
+			if (DebugAddress) msg(" isCode: %x\n", DisasmAddr);
+			return true;
+		}
+		else { // must be isUnknown()
+			// Very conservative here; only want to convert when the whole
+			//  region is data, because that is a symptom of IDA missing
+			//  a piece of code within a function (usually a piece of code
+			//  that is only reachable via an indirect jump).
+			if (DebugAddress) msg(" Not isData: %x\n", DisasmAddr);
+			return true;
+		}
+		if (DebugAddress) msg(" new DisasmAddr: %x\n", DisasmAddr);
+	} // end while (DisasmAddr < NextIDAAddr)
+	if (DebugAddress) msg(" loop exit CannotConvert: %d\n", CannotConvert);
+	if (!CannotConvert) {
+		// Success.
+		DisasmAddr = DisasmLocs[ShadowDisasmIndex];
+		AreaSize = NextIDAAddr - DisasmAddr;
+		if (DebugAddress) { 
+			msg(" Success! AreaSize: %x Old index: %d new index: %d\n",
+				AreaSize, ShadowDisasmIndex, DisasmIndex);
+			msg(" exiting FindDataToConvert()\n");
+			msg("\n");
+		}
+	} // end if (!CannotConvert)
+	return CannotConvert;
+} // end of FindDataToConvert()
+
+// Does a converted code region look like a function prologue? If so,
+//  we should not include it in the previous function.
+bool IsFunctionPrologue(ea_t StartAddr, ea_t EndAddr) {
+	return false;  // **!!** TODO 
+} // end of IsFunctionPrologue()
+
+// Patch program bytes that could not be converted from
+//  data to code, if it can be determined that the bytes represent code
+//  that IDA has a hard time with.
+// Currently limited to finding "call near ptr 0" instructions, which
+//  often are found in optimized glibc code because gcc was able to
+//  determine that a function pointer was zero and did constant propagation,
+//  but unfortunately was not able to determine that the code was unreachable.
+//  IDA will not succeed in ua_code() for "call 0", but there is no danger
+//  of a working program ever executing this code. Replacing the call with
+//  no-ops permits us to continue converting a contiguous range of data to
+//  code, and permits IDA to reanalyze the function later.
+// Returns true if program bytes were patched.
+bool MDPatchUnconvertedBytes(ea_t CurrDisasmAddr) {
+	flags_t AddrFlags = getFlags(CurrDisasmAddr);
+	if (isData(AddrFlags) || isTail(AddrFlags)) {
+		// Bytes should have been converted to unknown already.
+		msg("Cannot patch data bytes or tail bytes at %x\n", CurrDisasmAddr);
+		return false;
+	}
+	SMPInstr PatchInstr(CurrDisasmAddr);
+	PatchInstr.Analyze();
+	int InstrLen = PatchInstr.GetCmd().size;
+	if (0 >= InstrLen) {
+		msg("ua_ana0() failed on patch location %x\n", CurrDisasmAddr);
+		return false;
+	}
+	else {
+		if (PatchInstr.GetCmd().itype != NN_call) {
+			msg("Cannot patch non-call instruction at %x\n", CurrDisasmAddr);
+			return false;
+		}
+		PatchInstr.PrintOperands();
+		op_t CallDest = PatchInstr.GetUse(0);
+		if ((o_near != CallDest.type) || (0 != CallDest.addr)) {
+			msg("Cannot patch call unless it is call near ptr 0 at %x",
+				CurrDisasmAddr);
+			return false;
+		}
+		ea_t PatchAddr = CurrDisasmAddr;
+		for (int i = 0; i < InstrLen; ++i) {
+			bool ok = patch_byte(PatchAddr, 0x90);  // x86 no-op
+			if (!ok) {
+				msg("patch_byte() failed at %x\n", PatchAddr);
+				return false;
+			}
+			++PatchAddr;
+		}
+		msg("Patched %d bytes successfully at %x\n", InstrLen, CurrDisasmAddr);
+		InstrLen = ua_code(CurrDisasmAddr);
+		if (0 >= InstrLen) {
+			msg(" ... but ua_code() still failed!\n");
+			return false;
+		}
+	} // end if (0 >= InstrLen) ... else ...
+	return true;
+} // end of MDPatchUnconvertedBytes()
+
+// Create lists of code addresses identified by IDA Pro (in IDAProLocs)
+//  and an external disassembler (in DisasmLocs). Compare the lists and
+//  try to convert addresses to code that are found in DisasmLocs but
+//  not in IDAProLocs. Emit warnings when IDAProLocs has a code address
+//  not found in DisasmLocs.
+void FixCodeIdentification(void) {
+	size_t DisasmIndex = 0;
+	ea_t CurrDisasmAddr = DisasmLocs[DisasmIndex++];
+	size_t IDAProIndex = 0;
+	ea_t CurrAddr = IDAProLocs[IDAProIndex++];
+
+	while (DisasmIndex <= DisasmLocs.size()) {
+		// If the current address is less than the current
+		//  external disasm address, we have the rare case in
+		//  which IDA Pro has identified an address as code
+		//  but the external disasm has not. Emit a warning
+		//  message and go on to the next IDA address.
+		if (CurrAddr < CurrDisasmAddr) {
+			SMPInstr TempInstr(CurrAddr);
+			TempInstr.Analyze();
+			msg("Address %x is code in IDB but not in external disassembler: %s\n",
+				CurrAddr, TempInstr.GetDisasm());
+			if (IDAProIndex < IDAProLocs.size())
+				CurrAddr = IDAProLocs[IDAProIndex++];
+			else {
+				// Last IDA addr; might still process Disasm addrs
+				//  after loop exit.
+				break;
+			}
+		}
+		else if (CurrAddr == CurrDisasmAddr) {
+			// If equal, no problem, we are moving through the
+			//  code addresses in lockstep. Grab the next address
+			//  from each source.
+			if (DisasmIndex < DisasmLocs.size()) {
+				CurrDisasmAddr = DisasmLocs[DisasmIndex++];
+			}
+			else {
+				++DisasmIndex;  // cause loop exit; skip cleanup loop
+			}
+			if (IDAProIndex < IDAProLocs.size())
+				CurrAddr = IDAProLocs[IDAProIndex++];
+			else {
+				// Last IDA addr; might still process Disasm addrs
+				//  after loop exit in cleanup loop.
+				break;
+			}
+		}
+		else {
+			// We must have CurrAddr > CurrDisasmAddr. That means
+			//  IDA has jumped over some code addresses in
+			//  DisasmLocs. We need to try to convert addresses
+			//  to code until we can reach the current addr.
+			int InstrLen;
+			// For now, we will address only the case in which IDA
+			//  has identified addresses as data bytes, and the
+			//  external disassembler(e.g. objdump) has identified
+			//  the same addresses as code. We only want to deal with
+			//  contiguous areas of data-to-code conversion that do NOT
+			//  follow a return statement.
+			int AreaSize = 0;
+			ea_t AreaStart = CurrDisasmAddr;
+			ea_t AreaEnd;
+#if SMP_DEBUG_FIXUP_IDB
+			msg("CurrDisasmAddr: %x  CurrAddr: %x\n", CurrDisasmAddr, CurrAddr);
+#endif
+			bool SkipArea = FindDataToConvert(IDAProIndex, DisasmIndex, AreaSize);
+
+			bool DebugAddress = (CurrDisasmAddr == 0x806c19a);
+			if (SkipArea) {
+				// Skip over the extra external disasm addresses.
+				while (CurrDisasmAddr < CurrAddr)
+					CurrDisasmAddr = DisasmLocs[DisasmIndex++];
+			}
+			else { 
+				// Convert the overlooked code region to unexplored.
+				AreaEnd = CurrDisasmAddr + AreaSize;
+#if SMP_DEBUG_FIXUP_IDB
+				msg("Found data to convert: %x to %x\n", AreaStart, AreaEnd);
+#endif
+				do_unknown_range(AreaStart, AreaSize, DOUNK_SIMPLE);
+				bool AllConverted = true;
+				do {
+					flags_t InstrFlags = getFlags(CurrDisasmAddr);
+					if (!isUnknown(InstrFlags)) {
+						msg("Sync problem in FixCodeID: %x\n", CurrDisasmAddr);
+					}
+					else {
+						InstrLen = ua_code(CurrDisasmAddr);
+						if (InstrLen > 0) { // Successfully converted to code
+							SMPInstr NewInstr(CurrDisasmAddr);
+							NewInstr.Analyze();
+#if SMP_DEBUG_FIXUP_IDB
+							msg("FixCodeID success at %x: len: %d %s\n", CurrDisasmAddr,
+									InstrLen, NewInstr.GetDisasm());
+#endif
+						}
+						else {
+							if (MDPatchUnconvertedBytes(CurrDisasmAddr)) {
+								msg(" Patched bytes at %x\n", CurrDisasmAddr);
+							}
+							else {
+								AllConverted = false;
+								msg("FixCodeID failure at %x\n", CurrDisasmAddr);
+							}
+						}
+					} // end if (isCode(InstrFlags) ... else ...
+					if (DisasmIndex < DisasmLocs.size()) {
+						CurrDisasmAddr = DisasmLocs[DisasmIndex++];
+					}
+					else {
+						// cause loops to exit
+						CurrDisasmAddr = CurrAddr;
+						++DisasmIndex; // skip cleanup loop
+					}
+				} while (CurrDisasmAddr < CurrAddr);
+				if (AllConverted) {
+					if (IsFunctionPrologue(AreaStart, AreaEnd)) {
+						// Create a new function entry chunk here.
+						//  **!!** TODO
+						;
+					}
+					else {
+						// Extend the previous chunk to include the
+						//  converted code.
+						ea_t PrevIDAAddr = IDAProLocs[IDAProIndex - 2];
+						func_t *PrevChunk = get_fchunk(PrevIDAAddr);
+#if SMP_DEBUG_FIXUP_IDB
+						msg(" addr in chunk to extend: %x\n", PrevIDAAddr);
+						msg(" func_t pointer for chunk: %x\n", PrevChunk);
+#endif
+#if 0  // temporary for debugging
+						if (is_func_entry(PrevChunk)) {
+							// Extend the func entry to contain the new code.
+							if (func_setend(PrevIDAAddr, AreaEnd)) {
+								msg("Func extended to include code from %x to %x\n",
+									AreaStart, AreaEnd);
+								ReanalyzeList.push_back(PrevIDAAddr);
+							}
+							else {
+								msg("Failed to extend func from %x to %x\n",
+									AreaStart, AreaEnd);
+							}
+						}
+						else { // tail
+							// See if this works for function tails, also.
+							// Extend the func entry to contain the new code.
+							if (func_setend(PrevIDAAddr, AreaEnd)) {
+								msg("Tail extended to include code from %x to %x\n",
+									AreaStart, AreaEnd);
+								func_t *TailOwner = get_func(PrevChunk->owner);
+								ReanalyzeList.push_back(PrevIDAAddr);
+							}
+							else {
+								msg("Failed to extend tail from %x to %x\n",
+									AreaStart, AreaEnd);
+							}
+						} // end if (is_func_entry()) ... else ...
+#endif
+					} // end if (IsFunctionPrologue()) ... else ...
+				} // end if (AllConverted)
+				else {
+					msg("not AllConverted; cannot include new code in previous chunk.\n");
+				}
+			} // end if (SkipArea) ... else ...
+		} // end if (addr < CurrDisasmAddr) .. else if ... else ...
+	} // end while (DisasmIndex <= DisasmLocs.size()
+
+#if 0  // Make this code use FindDataToConvert()  **!!**
+	// Cleanup loop:
+	// If there are still Disasm addrs to process, try to turn them
+	//  into code in the IDB.
+	while (DisasmIndex <= DisasmLocs.size()) {
+		flags_t InstrFlags = getFlags(CurrDisasmAddr);
+		if (isCode(InstrFlags)) {
+			msg("Sync problem in FixCodeID: %x\n", CurrDisasmAddr);
+		}
+		else {
+			// Clear bytes to unexplored.
+			segment_t *seg = getseg(CurrDisasmAddr);
+			if (SEG_CODE == seg->type) {
+				do_unknown_range(CurrDisasmAddr, seg->endEA - CurrDisasmAddr, DOUNK_SIMPLE);
+			}
+			else {
+				// Might be safest to just discontinue processing
+				//  if we wander into a non-code segment.
+				//  DisasmLocs should not have an entire code segment
+				//  that IDA Pro missed.
+				break;
+			}
+			int InstrLen = ua_code(CurrDisasmAddr);
+			if (InstrLen > 0) { // Successfully converted to code
+				SMPInstr NewInstr(CurrDisasmAddr);
+				NewInstr.Analyze();
+				msg("FixCodeID success at %x: %s\n", CurrDisasmAddr,
+						NewInstr.GetDisasm());
+			}
+			else {
+				msg("FixCodeID failure at %x\n", CurrDisasmAddr);
+			}
+		} // end if (isCode(InstrFlags) ... else ...
+		if (DisasmIndex < DisasmLocs.size()) {
+			CurrDisasmAddr = DisasmLocs[DisasmIndex++];
+		}
+		else {
+			++DisasmIndex; // cause loop to exit
+		}
+	} // end while (DisasmIndex <= DisasmLocs.size()
+#endif
+
+	return;
+} // end of FixCodeIdentification()
 
 // Audit the IDA code database by looking at all instructions in the
 //  code segment and printing all those that are not contained in a
@@ -339,6 +1186,8 @@ void FindOrphanedCode(segment_t *CurrSeg, FILE *AnnotFile) {
 	for (ea_t addr = CurrSeg->startEA; addr < CurrSeg->endEA;
 		addr = get_item_end(addr)) {
 		flags_t InstrFlags = getFlags(addr);
+		if (isTail(InstrFlags))
+			continue;
 		if (isHead(InstrFlags) && isCode(InstrFlags)) {
 			func_t *CurrFunc = get_func(addr);
 			if (NULL == CurrFunc) {
@@ -393,10 +1242,12 @@ void AuditCodeTargets(void) {
 					if ((XrefType == fl_U) || (XrefType == fl_USobsolete)) {
 						msg("Bad xref type: %x %s\n", addr, FuncName);
 					}
+#if SMP_DEBUG_FIXUP_IDB
 					else if ((XrefType == fl_JF) || (XrefType == fl_JN)) {
 						msg("Jump to func: %x %s from: %x\n",
 							addr, FuncName, xb.from);
 					}
+#endif
 					else if (XrefType == fl_F) {
 						msg("Fall through to func: %x %s from: %x\n",
 							addr, FuncName, xb.from);
-- 
GitLab