From d678a840fc978443aed6261322ae2a9f914fb0ac Mon Sep 17 00:00:00 2001
From: Jason Hiser <jdhiser@gmail.com>
Date: Tue, 27 Jul 2021 21:17:53 +0000
Subject: [PATCH] ZS-86 issue in parsing dynsym section.

The dynsym section is parsed by objdump/grep.  It's terrible and
needs to be implemented in C++ to reduce dependence on external tools
knowing what they are doing.

In this case, the terrible pattern matching with grep caused a symbol
defined in the binary to be missed, because `grep -v UND` was used
to determine whether a symbol "mattered".  1) symbol names might have
UND and get eliminated accidentally, but 2) (what really happened)
was that the symbol was defined in a section without a "link" field
in the dynsym section, thus objdump displayed the section name as UND
and thus the code skipped the symbol.  Don't use objdump to parse ELF
files!
---
 irdb-libs/ir_builders/fill_in_indtargs.cpp | 18 +++++++++++-------
 zipr                                       |  2 +-
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/irdb-libs/ir_builders/fill_in_indtargs.cpp b/irdb-libs/ir_builders/fill_in_indtargs.cpp
index 02ff29676..44e6ea531 100644
--- a/irdb-libs/ir_builders/fill_in_indtargs.cpp
+++ b/irdb-libs/ir_builders/fill_in_indtargs.cpp
@@ -619,16 +619,20 @@ void infer_targets(FileIR_t *firp, section* shdr, EXEIO::exeio* exeiop)
 
 	cout<<"Checking section "<<shdr->get_name() <<endl;
 
-	const char* data=shdr->get_data() ; // C(char*)malloc(shdr->sh_size);
+	const char* data=shdr->get_data() ; 
 
 	assert(arch_ptr_bytes()==4 || arch_ptr_bytes()==8);
 	// assume pointers need to be at least 4-byte aligned.
-	for(auto i=0u;i+arch_ptr_bytes()<=(size_t)shdr->get_size();i+=4)
+	for(auto i=0u; i+arch_ptr_bytes() <= (size_t)shdr->get_size(); i+=4)
 	{
-		// even on 64-bit, pointers might be stored as 32-bit, as a 
-		// elf object has the 32-bit limitations.
-		// there's no real reason to look for 64-bit pointers 
+		// Even on 64-bit, pointers might be stored as 32-bit, as a 
+		// elf object has the 32-bit limitations.  E.g., reloc tables, PLTs, GOTs, etc.
+		// are still stored in 32-bit format.
+		// Thus, we don't bother looking for 64-bit pointers 
+		// FIXME: Prior comment does not match the code.  Clarify which model is needed,
+		// and make comment match code.
 		const auto ptr_val = uint64_t(
+		                // cptrtoh<uint32_t>(firp, reinterpret_cast<const uint8_t*>(&data[i])) 
 		                (arch_ptr_bytes()==4) ?  cptrtoh<uint32_t>(firp, reinterpret_cast<const uint8_t*>(&data[i])) :
 		                (arch_ptr_bytes()==8) ?  cptrtoh<uint64_t>(firp, reinterpret_cast<const uint8_t*>(&data[i])) :
 		                throw invalid_argument("Cannot map architecture size to bit width")
@@ -3055,10 +3059,10 @@ void read_stars_xref_file(FileIR_t* firp)
 
 void process_dynsym(FileIR_t* firp)
 {
-	auto dynsymfile = popen("$PS_OBJDUMP -T a.ncexe | $PS_GREP '^[0-9]\\+' | $PS_GREP -v UND | awk '{print $1;}' | $PS_GREP -v '^$'", "r");
+	auto dynsymfile = popen("$PS_OBJDUMP -T a.ncexe | $PS_GREP '^[0-9]\\+' | awk '{print $1;}' | $PS_GREP -v '^$'", "r");
 	if(!dynsymfile)
 	{
-		perror("Cannot open readeh_tmp_file.exe");
+		perror("Cannot start pipe to $PS_OBJDUMP a.ncexe");
 		exit(2);
 	}
 	auto target=(unsigned int)0;
diff --git a/zipr b/zipr
index 540600644..69b50a5a9 160000
--- a/zipr
+++ b/zipr
@@ -1 +1 @@
-Subproject commit 540600644bea29fef62c6e08ff96ae527d328b7c
+Subproject commit 69b50a5a9ada750e286854e2ecc7e4ad68cd94d6
-- 
GitLab