From d678a840fc978443aed6261322ae2a9f914fb0ac Mon Sep 17 00:00:00 2001 From: Jason Hiser <jdhiser@gmail.com> Date: Tue, 27 Jul 2021 21:17:53 +0000 Subject: [PATCH] ZS-86 issue in parsing dynsym section. The dynsym section is parsed by objdump/grep. It's terrible and needs to be implemented in C++ to reduce dependence on external tools knowing what they are doing. In this case, the terrible pattern matching with grep caused a symbol defined in the binary to be missed, because `grep -v UND` was used to determine whether a symbol "mattered". 1) symbol names might have UND and get eliminated accidentally, but 2) (what really happened) was that the symbol was defined in a section without a "link" field in the dynsym section, thus objdump displayed the section name as UND and thus the code skipped the symbol. Don't use objdump to parse ELF files! --- irdb-libs/ir_builders/fill_in_indtargs.cpp | 18 +++++++++++------- zipr | 2 +- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/irdb-libs/ir_builders/fill_in_indtargs.cpp b/irdb-libs/ir_builders/fill_in_indtargs.cpp index 02ff29676..44e6ea531 100644 --- a/irdb-libs/ir_builders/fill_in_indtargs.cpp +++ b/irdb-libs/ir_builders/fill_in_indtargs.cpp @@ -619,16 +619,20 @@ void infer_targets(FileIR_t *firp, section* shdr, EXEIO::exeio* exeiop) cout<<"Checking section "<<shdr->get_name() <<endl; - const char* data=shdr->get_data() ; // C(char*)malloc(shdr->sh_size); + const char* data=shdr->get_data() ; assert(arch_ptr_bytes()==4 || arch_ptr_bytes()==8); // assume pointers need to be at least 4-byte aligned. - for(auto i=0u;i+arch_ptr_bytes()<=(size_t)shdr->get_size();i+=4) + for(auto i=0u; i+arch_ptr_bytes() <= (size_t)shdr->get_size(); i+=4) { - // even on 64-bit, pointers might be stored as 32-bit, as a - // elf object has the 32-bit limitations. - // there's no real reason to look for 64-bit pointers + // Even on 64-bit, pointers might be stored as 32-bit, as a + // elf object has the 32-bit limitations. E.g., reloc tables, PLTs, GOTs, etc. + // are still stored in 32-bit format. + // Thus, we don't bother looking for 64-bit pointers + // FIXME: Prior comment does not match the code. Clarify which model is needed, + // and make comment match code. const auto ptr_val = uint64_t( + // cptrtoh<uint32_t>(firp, reinterpret_cast<const uint8_t*>(&data[i])) (arch_ptr_bytes()==4) ? cptrtoh<uint32_t>(firp, reinterpret_cast<const uint8_t*>(&data[i])) : (arch_ptr_bytes()==8) ? cptrtoh<uint64_t>(firp, reinterpret_cast<const uint8_t*>(&data[i])) : throw invalid_argument("Cannot map architecture size to bit width") @@ -3055,10 +3059,10 @@ void read_stars_xref_file(FileIR_t* firp) void process_dynsym(FileIR_t* firp) { - auto dynsymfile = popen("$PS_OBJDUMP -T a.ncexe | $PS_GREP '^[0-9]\\+' | $PS_GREP -v UND | awk '{print $1;}' | $PS_GREP -v '^$'", "r"); + auto dynsymfile = popen("$PS_OBJDUMP -T a.ncexe | $PS_GREP '^[0-9]\\+' | awk '{print $1;}' | $PS_GREP -v '^$'", "r"); if(!dynsymfile) { - perror("Cannot open readeh_tmp_file.exe"); + perror("Cannot start pipe to $PS_OBJDUMP a.ncexe"); exit(2); } auto target=(unsigned int)0; diff --git a/zipr b/zipr index 540600644..69b50a5a9 160000 --- a/zipr +++ b/zipr @@ -1 +1 @@ -Subproject commit 540600644bea29fef62c6e08ff96ae527d328b7c +Subproject commit 69b50a5a9ada750e286854e2ecc7e4ad68cd94d6 -- GitLab