From 99dc59d8e2a9d31909cf354e7d10c0439afe17b9 Mon Sep 17 00:00:00 2001 From: Jason Hiser <jdhiser@gmail.com> Date: Thu, 22 Jun 2023 02:53:30 +0000 Subject: [PATCH] Working parser?? --- src/arm_ehp.cpp | 303 ++++++++++++++++++++++++++++++++--------------- src/ehp_priv.hpp | 52 +++++++- test/test.cpp | 1 + 3 files changed, 258 insertions(+), 98 deletions(-) diff --git a/src/arm_ehp.cpp b/src/arm_ehp.cpp index 7170daf..723f237 100644 --- a/src/arm_ehp.cpp +++ b/src/arm_ehp.cpp @@ -27,7 +27,7 @@ bool split_arm_eh_frame_impl_t<ptrsize>::parse(const bool is_be) const auto can_unwind = second_entry != 0x1; // can't unwind == 1: const auto offset_to_start = handle_pcrel31(second_entry); const auto contains_inline_unwind_entry = can_unwind && (second_entry>>31); // is inline if bit 31 set, and not special pattern cant_unwind. - const auto inline_unwind_entry = second_entry & 0x7fffffff; // the EH unwind table entry itself if it can be encoded in 31 bits. + //const auto inline_unwind_entry = second_entry & 0x7fffffff; // the EH unwind table entry itself if it can be encoded in 31 bits. const auto lsda_addr = !can_unwind ? 0 : // the special pattern 0x1 indicating can't unwind. contains_inline_unwind_entry ? 0 : // no lsda addr if the entry is inline @@ -54,103 +54,13 @@ bool split_arm_eh_frame_impl_t<ptrsize>::parse(const bool is_be) { // fetch the first word of the lsda. throw_assert(extab_scoop->getStart() <= lsda_addr && lsda_addr <= extab_scoop->getEnd()); - - // note: do not make reference as we are going to do unsafe stuff. - // and need a copy that won't change. - const auto contents_str = extab_scoop->getContents(); - const auto contents = reinterpret_cast<const char*>(contents_str.data()); - const auto start_offset = lsda_addr - extab_scoop->getStart(); - - // fetch 4 bytes to detect type - if(lsda_addr + sizeof(uint32_t) > extab_scoop->getEnd()) - throw out_of_range("Cannot parse lsda at " + to_hex_string(lsda_addr)); - const auto first_word = *reinterpret_cast<const uint32_t*>(&contents[start_offset]); - if(first_word >> 31) // check the top bit. - { - const auto byte1 = (first_word >> 0)&0xff; - const auto byte2 = (first_word >> 8)&0xff; - const auto byte3 = (first_word >> 16)&0xff; - const auto byte4 = (first_word >> 24)&0xff; - const auto personality_index = byte4 & 0xf; - cout << "Found arm32-specific model = " << hex << personality_index << endl; - switch(personality_index) - { - case 0: - { - unwind_pgm.push_back(byte3); - unwind_pgm.push_back(byte2); - unwind_pgm.push_back(byte1); - break; - } - case 1: - case 2: - { - const auto words_following = byte3; - unwind_pgm.push_back(byte2); - unwind_pgm.push_back(byte1); - for(auto i = 0u; i < words_following; i++) - { - const auto next_word = *reinterpret_cast<const uint32_t*>(&contents[start_offset+4+i*4]); - unwind_pgm.push_back((next_word >> 24)&0xff); - unwind_pgm.push_back((next_word >> 16)&0xff); - unwind_pgm.push_back((next_word >> 8 )&0xff); - unwind_pgm.push_back((next_word >> 0 )&0xff); - } - - break; - } - default: - throw new out_of_range("Unknown personality index: "+ to_string(personality_index)); - } - - } - else - { - // generic version. - const auto offset_to_personality_routine = handle_pcrel31(first_word); - const auto personality_routine_addr=lsda_addr+offset_to_personality_routine; - fde.setPersonality(personality_routine_addr); - cout << "Found generic model with personality = " << hex << personality_routine_addr << endl; - const auto second_word = *reinterpret_cast<const uint32_t*>(&contents[start_offset+4]); - const auto byte1 = (second_word >> 0 )&0xff; - const auto byte2 = (second_word >> 8 )&0xff; - const auto byte3 = (second_word >> 16)&0xff; - const auto byte4 = (second_word >> 24)&0xff; - const auto words_following = byte4; - unwind_pgm.push_back(byte3); - unwind_pgm.push_back(byte2); - unwind_pgm.push_back(byte1); - for(auto i = 0u; i < words_following; i++) - { - const auto next_word = *reinterpret_cast<const uint32_t*>(&contents[start_offset+4+i*4]); - unwind_pgm.push_back((next_word >> 24)&0xff); - unwind_pgm.push_back((next_word >> 16)&0xff); - unwind_pgm.push_back((next_word >> 8 )&0xff); - unwind_pgm.push_back((next_word >> 0 )&0xff); - } - - // 4 for personality routine, + 1 for a length specifier + the length in bytes. - fde.parse_lsda(lsda_addr+8+words_following*4,extab_scoop.get(), fde_start, is_be); - } + cout << "Found out-of-line unwind info." << endl << hex; + unwind_pgm=parse_arm_eh_pgm(lsda_addr,extab_scoop.get(),fde, is_be); } if(contains_inline_unwind_entry ) { - const auto byte1 = (inline_unwind_entry >> 0)&0xff; - const auto byte2 = (inline_unwind_entry >> 8)&0xff; - const auto byte3 = (inline_unwind_entry >> 16)&0xff; - const auto byte4 = (inline_unwind_entry >> 24)&0x7f; // drop high bit of high word for pcrel31. - unwind_pgm.push_back(byte4); - unwind_pgm.push_back(byte3); - unwind_pgm.push_back(byte2); - unwind_pgm.push_back(byte1); - cout << "Found arm32-specific inline_entra with 4 instructions:" << endl << hex; - /* - cout - << "\t\t" << byte1 << endl - << "\t\t" << byte2 << endl - << "\t\t" << byte3 << endl - << "\t\t" << byte4 << endl; - */ + cout << "Found inline_entry:" << endl << hex; + unwind_pgm=parse_arm_eh_pgm(current_address+4,exidx_scoop.get(),fde, is_be); } cout << "\tFde ("<< fde.getStartAddress(); cout << "Unwind pgm = " << hex << endl; @@ -158,6 +68,7 @@ bool split_arm_eh_frame_impl_t<ptrsize>::parse(const bool is_be) { cout << "\t" << +byte << endl; } + fde.setProgram(arm_eh_program_t<ptrsize>{unwind_pgm}); local_fdes.push_back(fde); fde_idx += 2; @@ -165,7 +76,7 @@ bool split_arm_eh_frame_impl_t<ptrsize>::parse(const bool is_be) } // last fde goes to the end of the linked section. local_fdes[local_fdes.size()-1].setEndAddress(lnk_scoop->getEnd()); - + /* for(const auto& fde: local_fdes) { @@ -183,6 +94,92 @@ bool split_arm_eh_frame_impl_t<ptrsize>::parse(const bool is_be) return true; } +template <int ptrsize> +vector<uint8_t> split_arm_eh_frame_impl_t<ptrsize>::parse_arm_eh_pgm(const uint64_t lsda_addr, const ScoopReplacement_t *lsda_scoop, arm_fde_contents_t<ptrsize> &fde, const bool is_be) +{ + auto unwind_pgm=vector<uint8_t>(); + const auto fde_start=fde.getStartAddress(); + + // note: do not make reference as we are going to do unsafe stuff. + // and need a copy that won't change. + const auto contents_str = lsda_scoop->getContents(); + const auto contents = reinterpret_cast<const char*>(contents_str.data()); + const auto start_offset = lsda_addr - lsda_scoop->getStart(); + + // fetch 4 bytes to detect type + if(lsda_addr + sizeof(uint32_t) > lsda_scoop->getEnd()) + throw out_of_range("Cannot parse lsda at " + to_hex_string(lsda_addr)); + const auto first_word = *reinterpret_cast<const uint32_t*>(&contents[start_offset]); + if(first_word >> 31) // check the top bit. + { + const auto byte1 = (first_word >> 0)&0xff; + const auto byte2 = (first_word >> 8)&0xff; + const auto byte3 = (first_word >> 16)&0xff; + const auto byte4 = (first_word >> 24)&0xff; + const auto personality_index = byte4 & 0xf; + cout << "Found arm32-specific personality routine, pr" << hex << personality_index << endl; + switch(personality_index) + { + case 0: + { + unwind_pgm.push_back(byte3); + unwind_pgm.push_back(byte2); + unwind_pgm.push_back(byte1); + break; + } + case 1: + case 2: + { + const auto words_following = byte3; + unwind_pgm.push_back(byte2); + unwind_pgm.push_back(byte1); + for(auto i = 0u; i < words_following; i++) + { + const auto next_word = *reinterpret_cast<const uint32_t*>(&contents[start_offset+4+i*4]); + unwind_pgm.push_back((next_word >> 24)&0xff); + unwind_pgm.push_back((next_word >> 16)&0xff); + unwind_pgm.push_back((next_word >> 8 )&0xff); + unwind_pgm.push_back((next_word >> 0 )&0xff); + } + + break; + } + default: + throw new out_of_range("Unknown personality index: "+ to_string(personality_index)); + } + + } + else + { + // generic version. + const auto offset_to_personality_routine = handle_pcrel31(first_word); + const auto personality_routine_addr=lsda_addr+offset_to_personality_routine; + fde.setPersonality(personality_routine_addr); + cout << "Found generic model with personality = " << hex << personality_routine_addr << endl; + const auto second_word = *reinterpret_cast<const uint32_t*>(&contents[start_offset+4]); + const auto byte1 = (second_word >> 0 )&0xff; + const auto byte2 = (second_word >> 8 )&0xff; + const auto byte3 = (second_word >> 16)&0xff; + const auto byte4 = (second_word >> 24)&0xff; + const auto words_following = byte4; + unwind_pgm.push_back(byte3); + unwind_pgm.push_back(byte2); + unwind_pgm.push_back(byte1); + for(auto i = 0u; i < words_following; i++) + { + const auto next_word = *reinterpret_cast<const uint32_t*>(&contents[start_offset+8+i*4]); + unwind_pgm.push_back((next_word >> 24)&0xff); + unwind_pgm.push_back((next_word >> 16)&0xff); + unwind_pgm.push_back((next_word >> 8 )&0xff); + unwind_pgm.push_back((next_word >> 0 )&0xff); + } + + // 4 for personality routine, + 1 for a length specifier + the length in bytes. + fde.parse_lsda(lsda_addr+8+words_following*4,lsda_scoop, fde_start, is_be); + } + return unwind_pgm; +} + unique_ptr<const EHFrameParser_t> EHFrameParser_t::arm_factory( uint8_t ptrsize, EHPEndianness_t endian_type, @@ -253,3 +250,117 @@ const FDEContents_t* split_arm_eh_frame_impl_t<ptrsize>::findFDE(uint64_t addr) return raw_ret_ptr; } +template <int ptrsize> +const EHProgramInstructionVector_t* arm_eh_program_t<ptrsize>::getInstructions() const +{ + if(instructions_cache.size()==0) + { + transform(ALLOF(instructions), back_inserter(instructions_cache), [](const arm_eh_program_insn_t<ptrsize> &a) { return &a;}); + } + return &instructions_cache; +} + +template <int ptrsize> +arm_eh_program_t<ptrsize>::arm_eh_program_t(const vector<uint8_t>& unwind_pgm) +{ + auto unwind_idx=0u; + const auto unwind_pgm_data = unwind_pgm.data(); + while(unwind_idx < unwind_pgm.size()) + { + const auto opcode_byte1=unwind_pgm[unwind_idx]; + const auto top_two=opcode_byte1>>6; + const auto top_four=opcode_byte1>>4; + const auto top_five=opcode_byte1>>3; + const auto bottom_three=opcode_byte1&0b111; + const auto bits543=(opcode_byte1>>3)&0b111; + + // see https://github.com/ARM-software/abi-aa/blob/main/ehabi32/ehabi32.rst#frame-unwinding-instructions + if( + top_two == 0b00 || // vsp=vsp+6-bit-immed + top_two == 0b01 || // vsp=vsp-6-bit-immed + top_four==0b1001 || // set vsp=r[immed] || reserved if immed==13||15 + top_four==0b1010 || // pop r4-r[4+immed] || pop r4-r[4+immed]+r14 + opcode_byte1 == 0xb0 || // finish + opcode_byte1 == 0b10110100 || // Pop Return Address Authentication Code pseudo-register (see remark g) + opcode_byte1 == 0b10110101 || // Use current vsp as modifier in Return Addresss Authentication (see remark h) + opcode_byte1 == 0b10110110 || // Spare (was Pop FPA) + opcode_byte1 == 0b10110111 || // Spare (was Pop FPA) + top_five == 0b10111 || // Pop VFP double-precision registers D[8]-D[8+nnn] saved (as if) by FSTMFDX (see remark d) + (top_five == 0b11000 && bottom_three != 6 && bottom_three !=7) + || // Intel Wireless MMX pop wR[10]-wR[10+nnn] + (top_five == 0b11001 && bottom_three != 0 && bottom_three !=1) + || // Spare (yyy != 000, 001) + top_five == 0b10111 || // Pop VFP double-precision registers D[8]-D[8+nnn] saved (as if) by VPUSH (see remark d) + (top_two == 0b11&& bits543 != 0b000 && bits543 != 0b001 && bits543 != 0b010) + // Spare (xxx != 000, 001, 010) + + + ) + { + // cout << "Found 1 byte unwind arm insn" << endl; + instructions.push_back(arm_eh_program_insn_t<ptrsize>{string{1,opcode_byte1}}); + unwind_idx++; + } + else if( + top_four == 0b1000 || // 12-bit immed==0 ? refuse to unwind : pop registers indicated by immed + opcode_byte1 == 0b10110001 || // spare || pop registers + opcode_byte1 == 0b10110011 || // pop vfp double registers. + (top_five == 0b11000 && bottom_three == 6) + || // Intel Wireless MMX pop wR[ssss]-wR[ssss+cccc] (see remark e) + (top_five == 0b11000 && bottom_three == 7) + || // Spare || Intel Wireless MMX pop wCGR registers under mask {wCGR3,2,1,0} || Spare (xxxx != 0000) + opcode_byte1 == 0b11001000 || // Pop VFP double precision registers D[16+ssss]-D[16+ssss+cccc] saved (as if) by VPUSH (see remarks d,e) + opcode_byte1 == 0b11001001 // Pop VFP double precision registers D[ssss]-D[ssss+cccc] saved (as if) by VPUSH (see remark d) + ) + { + // cout << "Found 2 byte arm insn" << endl; + unwind_idx++; + if(unwind_idx>=unwind_pgm.size()) + throw runtime_error("Cannot decode arm32 unwind instruction with prefix 0b1000"); + const auto opcode_byte2=unwind_pgm[unwind_idx]; + instructions.push_back(arm_eh_program_insn_t<ptrsize>{string{opcode_byte1,opcode_byte2}}); + unwind_idx++; + } + else if ( + opcode_byte1 == 0b10110010 // vsp += uleb128 + ) + { + // declare vars needed to call uleb routine. + const auto initial_pos=uint64_t{unwind_idx+1}; + const auto max=initial_pos+unwind_pgm.size(); + auto final_pos=initial_pos; // updated by read_uleb + auto res=uint64_t{0}; // ignore output of read_uleb, just need length + // read uleb128 and sanity check. + const auto fail = eh_frame_util_t<ptrsize>::read_uleb128(res,final_pos,unwind_pgm_data,max); + if(fail) + throw new out_of_range("Unable to read uleb128 in unwind_pgm"); + + // calc uleb length and record instructions.. + const auto uleb_len=final_pos-initial_pos; + instructions.push_back(arm_eh_program_insn_t<ptrsize>(string(reinterpret_cast<const char*>(unwind_pgm_data+unwind_idx), 1+uleb_len))); + const auto insn_len = 1+uleb_len; + // cout << "Found multi-byte ( " << insn_len << " bytes) arm32 instructions" << endl; + unwind_idx+=insn_len; + } + else + throw new out_of_range("Cannot determine arm32 unwind instruction length"); + } +} + +template <int ptrsize> +void arm_eh_program_t<ptrsize>::print(const uint64_t pc, const int64_t caf) const +{ + auto tmp_pc=pc; + for(const auto &insn : instructions) + insn.print(tmp_pc,caf); +} + +template <int ptrsize> +void arm_eh_program_insn_t<ptrsize>::print(uint64_t &pc, int64_t caf) const +{ + cout <<"arm32 unwind insn len=" << dec << program_bytes.size() << "bytes = "; + for(const auto byte : program_bytes) + cout << hex << +byte << ", "; + cout << endl; +} + diff --git a/src/ehp_priv.hpp b/src/ehp_priv.hpp index 21fae42..dbbb026 100644 --- a/src/ehp_priv.hpp +++ b/src/ehp_priv.hpp @@ -551,6 +551,51 @@ class arm_cie_contents_t : public CIEContents_t, private eh_frame_util_t<ptrsize }; +template <int ptrsize> +class arm_eh_program_insn_t : public EHProgramInstruction_t +{ + public: + + arm_eh_program_insn_t() ; + arm_eh_program_insn_t(const string &s) + : + program_bytes(begin(s),end(s)) + { + } + + + virtual ~arm_eh_program_insn_t() {} + virtual void print(uint64_t &pc, int64_t caf=1) const ; + virtual tuple<string, int64_t, int64_t> decode() const { throw std::runtime_error("not implemented"); } + virtual uint64_t getSize() const { return program_bytes.size(); } + virtual bool isNop() const { return false; } + virtual bool isDefCFAOffset() const { return false; } + virtual bool isRestoreState() const { return false; } + virtual bool isRememberState() const { return false; } + virtual const EHProgramInstructionByteVector_t& getBytes() const { return program_bytes; } + virtual bool advance(uint64_t &cur_addr, uint64_t CAF) const { throw std::runtime_error("not implemented"); } + + private: + + vector<uint8_t> program_bytes; +}; + +template <int ptrsize> +class arm_eh_program_t : public EHProgram_t +{ + public: + arm_eh_program_t(const vector<uint8_t>& unwind_pgm={}); + virtual const EHProgramInstructionVector_t* getInstructions() const; + vector<arm_eh_program_insn_t <ptrsize> >& getInstructionsInternal() { return instructions; } + const vector<eh_program_insn_t <ptrsize> >& getInstructionsInternal() const { return instructions; } + void print(const uint64_t start_addr, const int64_t caf) const; + + private: + vector<arm_eh_program_insn_t <ptrsize> > instructions; + mutable EHProgramInstructionVector_t instructions_cache; +}; + + template <int ptrsize> class arm_fde_contents_t : public FDEContents_t, eh_frame_util_t<ptrsize> { @@ -559,7 +604,7 @@ class arm_fde_contents_t : public FDEContents_t, eh_frame_util_t<ptrsize> uint64_t fde_lsda_addr=0; uint32_t can_unwind=false; lsda_t<ptrsize> lsda; - eh_program_t<ptrsize> eh_pgm; + arm_eh_program_t<ptrsize> eh_pgm; arm_cie_contents_t<ptrsize> cie; public: arm_fde_contents_t( uint64_t fde_start,uint64_t lsda_addr,bool p_can_unwind) : @@ -580,7 +625,7 @@ class arm_fde_contents_t : public FDEContents_t, eh_frame_util_t<ptrsize> virtual uint64_t getFDEStartAddress() const { return fde_start_addr; } virtual uint64_t getFDEEndAddress() const {return fde_end_addr; } virtual const CIEContents_t& getCIE() const { return cie; } - virtual const EHProgram_t& getProgram() const { throw std::runtime_error(" not implimented"); } + virtual const EHProgram_t& getProgram() const { return eh_pgm; } virtual const LSDA_t* getLSDA() const { return &lsda; } virtual uint64_t getLSDAAddress() const { return fde_lsda_addr; } virtual uint64_t getStartAddressPosition() const { throw std::runtime_error(" not implimented"); } @@ -593,6 +638,7 @@ class arm_fde_contents_t : public FDEContents_t, eh_frame_util_t<ptrsize> void setEndAddress(uint64_t end) { fde_end_addr = end; } bool getCanUnwind() const { return can_unwind; } void setPersonality(uint64_t pers) { cie.setPersonality(pers); } + void setProgram(const arm_eh_program_t<ptrsize>& pgm) { eh_pgm=pgm; } bool parse_lsda(const uint64_t lsda_addr, const ScoopReplacement_t* extab_scoop, @@ -622,6 +668,8 @@ class split_arm_eh_frame_impl_t : public EHFrameParser_t mutable CIEVector_t cies_cache; mutable FDEVector_t fdes_cache; + vector<uint8_t> parse_arm_eh_pgm(const uint64_t lsda_addr, const ScoopReplacement_t *lsda_scoop, arm_fde_contents_t<ptrsize> &fde, const bool is_be); + public: split_arm_eh_frame_impl_t diff --git a/test/test.cpp b/test/test.cpp index 81f7b45..8924267 100644 --- a/test/test.cpp +++ b/test/test.cpp @@ -38,6 +38,7 @@ void print_lps(const EHFrameParser_t* ehp) for(const auto fde : *fdes) { cout<<"Found FDE at : " << fde->getStartAddress() << "-"<<fde->getEndAddress()<<endl; + fde->getProgram().print(fde->getStartAddress(),1); const auto lsda=fde->getLSDA(); assert(lsda); lsda->print(); -- GitLab