From 54189aeec7386340f8b6460d2250c496fb9ecb17 Mon Sep 17 00:00:00 2001 From: Jason Hiser <jdhiser@gmail.com> Date: Wed, 21 Jun 2023 20:39:08 +0000 Subject: [PATCH] Adding arm support. not fully working yet. --- SConscript | 4 +- include/ehp.hpp | 9 ++ src/arm_ehp.cpp | 255 ++++++++++++++++++++++++++++++++++++++ src/ehp.cpp | 43 +++++-- src/ehp_priv.hpp | 157 ++++++++++++++++++++++- src/scoop_replacement.hpp | 1 + test/test.cpp | 1 + 7 files changed, 458 insertions(+), 12 deletions(-) create mode 100644 src/arm_ehp.cpp diff --git a/SConscript b/SConscript index 490c039..dadac26 100644 --- a/SConscript +++ b/SConscript @@ -19,8 +19,8 @@ import os Import('env') env.Replace(debug=ARGUMENTS.get("debug",0)) -env.Append(CFLAGS=" -DUSE_ELFIO=1 ") -env.Append(CXXFLAGS=" -DUSE_ELFIO=1 ") +env.Append(CFLAGS=" -DUSE_ELFIO=1 -Wno-psabi ") +env.Append(CXXFLAGS=" -DUSE_ELFIO=1 -Wno-psabi ") if int(env['debug']) == 1: print("Setting debug mode") env.Append(CFLAGS=" -g ") diff --git a/include/ehp.hpp b/include/ehp.hpp index 198824a..e5cf55f 100644 --- a/include/ehp.hpp +++ b/include/ehp.hpp @@ -218,6 +218,15 @@ class EHFrameParser_t const string eh_frame_hdr_data, const uint64_t eh_frame_hdr_data_start_addr, const string gcc_except_table_data, const uint64_t gcc_except_table_data_start_addr ); + + static unique_ptr<const EHFrameParser_t> arm_factory( + uint8_t ptrsize, + EHPEndianness_t endian_style, + const string &extab_sec, const uint64_t extab_addr, + const string &exidx_sec, const uint64_t exidx_addr, + const string &exidx_lnk_sec, const uint64_t lnk_addr + ); + }; // e.g. diff --git a/src/arm_ehp.cpp b/src/arm_ehp.cpp new file mode 100644 index 0000000..7170daf --- /dev/null +++ b/src/arm_ehp.cpp @@ -0,0 +1,255 @@ +// @HEADER_COMPONENT libehp +// @HEADER_LANG C++ +// @HEADER_BEGIN + + +int32_t handle_pcrel31(int32_t rel) +{ + return (rel << 1) >> 1; +} + +template <int ptrsize> +bool split_arm_eh_frame_impl_t<ptrsize>::parse(const bool is_be) +{ + const auto contents = exidx_scoop->getContents(); // do not make reference, as we take a c_str() of this in a moemnt. + const auto fde_tab = reinterpret_cast<const uint32_t*>(contents.c_str()); + const auto fde_end = reinterpret_cast<const uint32_t*>(reinterpret_cast<const char*>(fde_tab)+exidx_scoop->getLength()); + auto fde_idx=0u; + auto current_address = exidx_scoop->getStart(); + vector<arm_fde_contents_t<ptrsize> > local_fdes; + + while(&fde_tab[fde_idx+2] <= fde_end) + { + const auto first_entry = handle_pcrel31(fde_tab[fde_idx]); + const auto fde_start = current_address + first_entry; + const auto second_entry = fde_tab[fde_idx+1]; + const auto upper_bit = second_entry >> 31; + const auto can_unwind = second_entry != 0x1; // can't unwind == 1: + const auto offset_to_start = handle_pcrel31(second_entry); + const auto contains_inline_unwind_entry = can_unwind && (second_entry>>31); // is inline if bit 31 set, and not special pattern cant_unwind. + const auto inline_unwind_entry = second_entry & 0x7fffffff; // the EH unwind table entry itself if it can be encoded in 31 bits. + const auto lsda_addr = + !can_unwind ? 0 : // the special pattern 0x1 indicating can't unwind. + contains_inline_unwind_entry ? 0 : // no lsda addr if the entry is inline + upper_bit == 0 ? current_address + 4 + offset_to_start : // pcrel31 offset if bit 31 is clear. + throw runtime_error("Unexpected/logic error"); // report error if this is messed up. + + + // update the prior fde to end just before this one starts. + if(local_fdes.size() != 0) + { + local_fdes[local_fdes.size()-1].setEndAddress(fde_start-1); + } + + auto fde = arm_fde_contents_t<ptrsize>{fde_start,lsda_addr,can_unwind}; + /* + cout << hex ; + cout << "\tFde ("<< fde.getStartAddress(); + cout << "-" << fde.getEndAddress(); + cout << ")\tlsda_addr=" << fde.getLSDAAddress(); + cout << "\tcan_unwind=" << boolalpha << fde.getCanUnwind() << endl; + */ + auto unwind_pgm =vector<uint8_t>(); + if(lsda_addr) + { + // fetch the first word of the lsda. + throw_assert(extab_scoop->getStart() <= lsda_addr && lsda_addr <= extab_scoop->getEnd()); + + // note: do not make reference as we are going to do unsafe stuff. + // and need a copy that won't change. + const auto contents_str = extab_scoop->getContents(); + const auto contents = reinterpret_cast<const char*>(contents_str.data()); + const auto start_offset = lsda_addr - extab_scoop->getStart(); + + // fetch 4 bytes to detect type + if(lsda_addr + sizeof(uint32_t) > extab_scoop->getEnd()) + throw out_of_range("Cannot parse lsda at " + to_hex_string(lsda_addr)); + const auto first_word = *reinterpret_cast<const uint32_t*>(&contents[start_offset]); + if(first_word >> 31) // check the top bit. + { + const auto byte1 = (first_word >> 0)&0xff; + const auto byte2 = (first_word >> 8)&0xff; + const auto byte3 = (first_word >> 16)&0xff; + const auto byte4 = (first_word >> 24)&0xff; + const auto personality_index = byte4 & 0xf; + cout << "Found arm32-specific model = " << hex << personality_index << endl; + switch(personality_index) + { + case 0: + { + unwind_pgm.push_back(byte3); + unwind_pgm.push_back(byte2); + unwind_pgm.push_back(byte1); + break; + } + case 1: + case 2: + { + const auto words_following = byte3; + unwind_pgm.push_back(byte2); + unwind_pgm.push_back(byte1); + for(auto i = 0u; i < words_following; i++) + { + const auto next_word = *reinterpret_cast<const uint32_t*>(&contents[start_offset+4+i*4]); + unwind_pgm.push_back((next_word >> 24)&0xff); + unwind_pgm.push_back((next_word >> 16)&0xff); + unwind_pgm.push_back((next_word >> 8 )&0xff); + unwind_pgm.push_back((next_word >> 0 )&0xff); + } + + break; + } + default: + throw new out_of_range("Unknown personality index: "+ to_string(personality_index)); + } + + } + else + { + // generic version. + const auto offset_to_personality_routine = handle_pcrel31(first_word); + const auto personality_routine_addr=lsda_addr+offset_to_personality_routine; + fde.setPersonality(personality_routine_addr); + cout << "Found generic model with personality = " << hex << personality_routine_addr << endl; + const auto second_word = *reinterpret_cast<const uint32_t*>(&contents[start_offset+4]); + const auto byte1 = (second_word >> 0 )&0xff; + const auto byte2 = (second_word >> 8 )&0xff; + const auto byte3 = (second_word >> 16)&0xff; + const auto byte4 = (second_word >> 24)&0xff; + const auto words_following = byte4; + unwind_pgm.push_back(byte3); + unwind_pgm.push_back(byte2); + unwind_pgm.push_back(byte1); + for(auto i = 0u; i < words_following; i++) + { + const auto next_word = *reinterpret_cast<const uint32_t*>(&contents[start_offset+4+i*4]); + unwind_pgm.push_back((next_word >> 24)&0xff); + unwind_pgm.push_back((next_word >> 16)&0xff); + unwind_pgm.push_back((next_word >> 8 )&0xff); + unwind_pgm.push_back((next_word >> 0 )&0xff); + } + + // 4 for personality routine, + 1 for a length specifier + the length in bytes. + fde.parse_lsda(lsda_addr+8+words_following*4,extab_scoop.get(), fde_start, is_be); + } + } + if(contains_inline_unwind_entry ) + { + const auto byte1 = (inline_unwind_entry >> 0)&0xff; + const auto byte2 = (inline_unwind_entry >> 8)&0xff; + const auto byte3 = (inline_unwind_entry >> 16)&0xff; + const auto byte4 = (inline_unwind_entry >> 24)&0x7f; // drop high bit of high word for pcrel31. + unwind_pgm.push_back(byte4); + unwind_pgm.push_back(byte3); + unwind_pgm.push_back(byte2); + unwind_pgm.push_back(byte1); + cout << "Found arm32-specific inline_entra with 4 instructions:" << endl << hex; + /* + cout + << "\t\t" << byte1 << endl + << "\t\t" << byte2 << endl + << "\t\t" << byte3 << endl + << "\t\t" << byte4 << endl; + */ + } + cout << "\tFde ("<< fde.getStartAddress(); + cout << "Unwind pgm = " << hex << endl; + for(auto byte : unwind_pgm) + { + cout << "\t" << +byte << endl; + } + local_fdes.push_back(fde); + + fde_idx += 2; + current_address += 8; + } + // last fde goes to the end of the linked section. + local_fdes[local_fdes.size()-1].setEndAddress(lnk_scoop->getEnd()); + +/* + for(const auto& fde: local_fdes) + { + cout << hex ; + cout << "\tFde ("<< fde.getStartAddress(); + cout << "-" << fde.getEndAddress(); + cout << ")\tlsda_addr=" << fde.getLSDAAddress(); + cout << "\tcan_unwind=" << boolalpha << fde.getCanUnwind() << endl; + fde.getLSDA()->print(); + + } + */ + fdes.insert(ALLOF(local_fdes)); + + return true; +} + +unique_ptr<const EHFrameParser_t> EHFrameParser_t::arm_factory( + uint8_t ptrsize, + EHPEndianness_t endian_type, + const string &extab_sec, const uint64_t extab_addr, + const string &exidx_sec, const uint64_t exidx_addr, + const string &lnk_sec, const uint64_t lnk_addr + ) +{ + const auto extab_scoop=ScoopReplacement_t(extab_sec, extab_addr); + const auto exidx_scoop=ScoopReplacement_t(exidx_sec,exidx_addr); + const auto lnk_scoop =ScoopReplacement_t(lnk_sec, lnk_addr); + auto ret_val=(EHFrameParser_t*)nullptr; + if(ptrsize==4) + ret_val=new split_arm_eh_frame_impl_t<4>(extab_scoop,exidx_scoop,lnk_scoop); + else if(ptrsize==8) + ret_val=new split_arm_eh_frame_impl_t<8>(extab_scoop,exidx_scoop,lnk_scoop); + else + throw out_of_range("ptrsize must be 4 or 8"); + + + const auto is_big_endian = [] () -> bool + { + union + { + uint32_t i; + char c[4]; + } bint = {0x01020304}; + + return bint.c[0] == 1; + }; + + const auto is_be = endian_type == BIG || ( is_big_endian() && endian_type == HOST) ; + + ret_val->parse(is_be); + + return unique_ptr<const EHFrameParser_t>(ret_val); + +} + + +template <int ptrsize> +const FDEVector_t* split_arm_eh_frame_impl_t<ptrsize>::getFDEs() const +{ + if(fdes_cache.size()==0) + { + transform(ALLOF(fdes), back_inserter(fdes_cache), [](const arm_fde_contents_t<ptrsize> &a) { return &a; }); + } + return &fdes_cache; +} + +template <int ptrsize> +const CIEVector_t* split_arm_eh_frame_impl_t<ptrsize>::getCIEs() const +{ + if(cies_cache.size()==0) + { + transform(ALLOF(fdes), back_inserter(cies_cache), [](const arm_fde_contents_t<ptrsize> &a) { return &a.getCIE(); }); + } + return &cies_cache; +} + +template <int ptrsize> +const FDEContents_t* split_arm_eh_frame_impl_t<ptrsize>::findFDE(uint64_t addr) const +{ + + const auto tofind=arm_fde_contents_t<ptrsize>( addr, addr+1); + const auto fde_it=fdes.find(tofind); + const auto raw_ret_ptr = (fde_it==fdes.end()) ? nullptr : &*fde_it; + return raw_ret_ptr; +} + diff --git a/src/ehp.cpp b/src/ehp.cpp index 74c33ca..3af7c69 100644 --- a/src/ehp.cpp +++ b/src/ehp.cpp @@ -96,6 +96,14 @@ using namespace ELFIO; #define ALLOF(s) begin(s), end(s) +template <class T> +std::string to_hex_string(const T& in) +{ + stringstream s; + s << hex << in; + return s.str(); +} + template <int ptrsize> template <class T> bool eh_frame_util_t<ptrsize>::read_type(T &value, uint64_t &position, const uint8_t* const data, const uint64_t max, const bool is_be) @@ -2040,21 +2048,33 @@ unique_ptr<const EHFrameParser_t> EHFrameParser_t::factory(const string filename throw invalid_argument(string() + "Cannot open file: " + filename); } - auto get_info=[&](const string name) -> pair<string,uint64_t> + // return data, address, lnk field + struct info_t + { + string contents; + uint64_t address; + uint32_t lnk; + }; + auto get_info=[&](const string name) -> info_t { const auto &sec=elfiop->sections[name.c_str()]; if(sec==nullptr) - return {"",0}; + return {"",0,0}; auto contents=string(sec->get_data(), sec->get_size()); auto addr=sec->get_address(); - return {contents,addr}; + auto lnk=sec->get_link(); + return {contents,addr,lnk}; }; const auto eh_frame_section=get_info(".eh_frame"); const auto eh_frame_hdr_section=get_info(".eh_frame_hdr"); const auto gcc_except_table_section=get_info(".gcc_except_table"); + const auto extab=get_info(".ARM.extab"); + const auto exidx=get_info(".ARM.exidx"); + const auto is_arm = exidx.contents.length() != 0 ; + const auto ex_lnk=is_arm ? get_info(elfiop->sections[exidx.lnk]->get_name()) : info_t{"",0,0}; const auto ptrsize = elfiop->get_class()==ELFCLASS64 ? 8 : elfiop->get_class()==ELFCLASS32 ? 4 : @@ -2069,10 +2089,17 @@ unique_ptr<const EHFrameParser_t> EHFrameParser_t::factory(const string filename if(ptrsize==0) throw invalid_argument(string() + "Invalid ELF class in : " + filename); - return EHFrameParser_t::factory(ptrsize, file_endianness, - eh_frame_section.first, eh_frame_section.second, - eh_frame_hdr_section.first, eh_frame_hdr_section.second, - gcc_except_table_section.first, gcc_except_table_section.second); + return is_arm ? + EHFrameParser_t::arm_factory(ptrsize, file_endianness, + extab.contents, extab.address, + exidx.contents, exidx.address, + ex_lnk.contents, ex_lnk.address) + + : + EHFrameParser_t::factory(ptrsize, file_endianness, + eh_frame_section.contents, eh_frame_section.address, + eh_frame_hdr_section.contents, eh_frame_hdr_section.address, + gcc_except_table_section.contents, gcc_except_table_section.address); } #endif @@ -2116,6 +2143,6 @@ unique_ptr<const EHFrameParser_t> EHFrameParser_t::factory( } - +#include "arm_ehp.cpp" diff --git a/src/ehp_priv.hpp b/src/ehp_priv.hpp index f5ef9d1..21fae42 100644 --- a/src/ehp_priv.hpp +++ b/src/ehp_priv.hpp @@ -33,6 +33,8 @@ #include <algorithm> #include <memory> #include <set> +#include <functional> +#include <stdexcept> #include "ehp_dwarf2.hpp" #include "scoop_replacement.hpp" @@ -381,7 +383,6 @@ class lsda_t : public LSDA_t, private eh_frame_util_t<ptrsize> }; - template <int ptrsize> class fde_contents_t : public FDEContents_t, eh_frame_util_t<ptrsize> { @@ -424,7 +425,7 @@ class fde_contents_t : public FDEContents_t, eh_frame_util_t<ptrsize> const eh_program_t<ptrsize>& getProgram() const ; eh_program_t<ptrsize>& getProgram() ; - const LSDA_t* getLSDA() const { return &lsda; } // shared_ptr<LSDA_t>(new lsda_t<ptrsize>(lsda)) ; } + const LSDA_t* getLSDA() const { return &lsda; } const lsda_t<ptrsize>& getLSDAInternal() const { return lsda; } uint64_t getLSDAAddress() const { return lsda_addr; } @@ -486,6 +487,7 @@ class split_eh_frame_impl_t : public EHFrameParser_t } bool parse(const bool is_be); + bool parse_arm(const bool is_be); void print() const; virtual const FDEVector_t* getFDEs() const; @@ -496,6 +498,157 @@ class split_eh_frame_impl_t : public EHFrameParser_t }; +// overrides for arm-specific classes. +/* +template <int ptrsize> +class arm_lsda_t : public lsda_t<ptrsize> +{ + private: + + + public: + arm_lsda_t() : lsda_t<ptrsize>() {} + bool parse_lsda(const uint64_t lsda_addr, + const ScoopReplacement_t* exidx_scoop, + const uint64_t fde_region_start, + const bool is_be + ); + void print() const { lsda_t::print(); } + +}; +*/ + +template <int ptrsize> +class arm_cie_contents_t : public CIEContents_t, private eh_frame_util_t<ptrsize> +{ + private: + uint64_t personality; + + public: + + arm_cie_contents_t() {}; + + const eh_program_t<ptrsize>& getProgram() const { throw std::runtime_error( " not implimented"); } + uint64_t getPosition() const { throw std::runtime_error( " not implimented"); } + uint64_t getLength() const { throw std::runtime_error( " not implimented"); } + uint64_t getCAF() const { throw std::runtime_error( " not implimented"); } + int64_t getDAF() const { throw std::runtime_error( " not implimented"); } + uint8_t getPersonalityEncoding() const { throw std::runtime_error( " not implimented"); } + uint64_t getPersonality() const { return personality; } + uint64_t getPersonalityPointerPosition() const { throw std::runtime_error( " not implimented"); } + uint64_t getPersonalityPointerSize() const { throw std::runtime_error( " not implimented"); } + uint64_t getReturnRegister() const { throw std::runtime_error( " not implimented"); } + string getAugmentation() const { throw std::runtime_error( " not implimented"); } + uint8_t getLSDAEncoding() const { throw std::runtime_error( " not implimented"); } + uint8_t getFDEEncoding() const { throw std::runtime_error( " not implimented"); } + + virtual void print(const uint64_t startAddr) const + { + cout << "personality=" << hex << personality << endl; + } + + void setPersonality(uint64_t pers) { personality=pers; } + +}; + +template <int ptrsize> +class arm_fde_contents_t : public FDEContents_t, eh_frame_util_t<ptrsize> +{ + uint64_t fde_start_addr=0; + uint64_t fde_end_addr=0; + uint64_t fde_lsda_addr=0; + uint32_t can_unwind=false; + lsda_t<ptrsize> lsda; + eh_program_t<ptrsize> eh_pgm; + arm_cie_contents_t<ptrsize> cie; + public: + arm_fde_contents_t( uint64_t fde_start,uint64_t lsda_addr,bool p_can_unwind) : + fde_start_addr(fde_start), + fde_lsda_addr(lsda_addr), + can_unwind(p_can_unwind) + {} + arm_fde_contents_t(const uint64_t start_addr, const uint64_t end_addr) + : + fde_start_addr(start_addr), + fde_end_addr(end_addr) + {} + + virtual uint64_t getPosition() const { throw std::runtime_error( " not implimented"); } + virtual uint64_t getLength() const { return fde_end_addr-fde_start_addr+1; } + virtual uint64_t getStartAddress() const { return fde_start_addr; } + virtual uint64_t getEndAddress() const { return fde_end_addr; } + virtual uint64_t getFDEStartAddress() const { return fde_start_addr; } + virtual uint64_t getFDEEndAddress() const {return fde_end_addr; } + virtual const CIEContents_t& getCIE() const { return cie; } + virtual const EHProgram_t& getProgram() const { throw std::runtime_error(" not implimented"); } + virtual const LSDA_t* getLSDA() const { return &lsda; } + virtual uint64_t getLSDAAddress() const { return fde_lsda_addr; } + virtual uint64_t getStartAddressPosition() const { throw std::runtime_error(" not implimented"); } + virtual uint64_t getEndAddressPosition() const { throw std::runtime_error(" not implimented"); } + virtual uint64_t getEndAddressSize() const { throw std::runtime_error(" not implimented"); } + virtual uint64_t getLSDAAddressPosition() const { throw std::runtime_error(" not implimented"); } + virtual uint64_t getLSDAAddressSize() const { throw std::runtime_error(" not implimented"); } + virtual void print() const { throw std::runtime_error(" not implimented"); } + + void setEndAddress(uint64_t end) { fde_end_addr = end; } + bool getCanUnwind() const { return can_unwind; } + void setPersonality(uint64_t pers) { cie.setPersonality(pers); } + + bool parse_lsda(const uint64_t lsda_addr, + const ScoopReplacement_t* extab_scoop, + const uint64_t fde_region_start, + const bool is_be + ) + { + return lsda.parse_lsda(lsda_addr,extab_scoop,fde_region_start,is_be); + } + + +}; + +template <int ptrsize> +bool operator<(const arm_fde_contents_t<ptrsize>& a, const arm_fde_contents_t<ptrsize>& b) { return a.getFDEEndAddress()-1 < b.getFDEStartAddress(); } + +template <int ptrsize> +class split_arm_eh_frame_impl_t : public EHFrameParser_t +{ + private: + + unique_ptr<ScoopReplacement_t> extab_scoop; + unique_ptr<ScoopReplacement_t> exidx_scoop; + unique_ptr<ScoopReplacement_t> lnk_scoop; + set<arm_fde_contents_t <ptrsize> > fdes; + + mutable CIEVector_t cies_cache; + mutable FDEVector_t fdes_cache; + + public: + + split_arm_eh_frame_impl_t + ( + const ScoopReplacement_t &extab, + const ScoopReplacement_t &exidx, + const ScoopReplacement_t &lnk + ) + : + extab_scoop(new ScoopReplacement_t(extab)), + exidx_scoop(new ScoopReplacement_t(exidx)), + lnk_scoop (new ScoopReplacement_t(lnk)) + { + } + + bool parse(const bool is_be); + void print() const {}; + + virtual const FDEVector_t* getFDEs() const ; + virtual const CIEVector_t* getCIEs() const ; + virtual const FDEContents_t* findFDE(uint64_t addr) const ; + + + +}; + + } #endif diff --git a/src/scoop_replacement.hpp b/src/scoop_replacement.hpp index cafcd7a..ccdb01a 100644 --- a/src/scoop_replacement.hpp +++ b/src/scoop_replacement.hpp @@ -50,6 +50,7 @@ class ScoopReplacement_t addr_t getEnd() const { return end; } addr_t getStart() const { return start; } + uint32_t getLength() const { return end-start+1; } private: string data; diff --git a/test/test.cpp b/test/test.cpp index 572bb2f..81f7b45 100644 --- a/test/test.cpp +++ b/test/test.cpp @@ -40,6 +40,7 @@ void print_lps(const EHFrameParser_t* ehp) cout<<"Found FDE at : " << fde->getStartAddress() << "-"<<fde->getEndAddress()<<endl; const auto lsda=fde->getLSDA(); assert(lsda); + lsda->print(); const auto callsites=lsda->getCallSites(); assert(callsites); -- GitLab