// @HEADER_COMPONENT libehp // @HEADER_LANG C++ // @HEADER_BEGIN int32_t handle_pcrel31(int32_t rel) { return (rel << 1) >> 1; } template <int ptrsize> bool split_arm_eh_frame_impl_t<ptrsize>::parse(const bool is_be) { const auto contents = exidx_scoop->getContents(); // do not make reference, as we take a c_str() of this in a moemnt. const auto fde_tab = reinterpret_cast<const uint32_t*>(contents.c_str()); const auto fde_end = reinterpret_cast<const uint32_t*>(reinterpret_cast<const uint8_t*>(fde_tab)+exidx_scoop->getLength()); auto fde_idx=0u; auto current_address = exidx_scoop->getStart(); vector<arm_fde_contents_t<ptrsize> > local_fdes; while(&fde_tab[fde_idx+2] <= fde_end) { const auto first_entry = handle_pcrel31(fde_tab[fde_idx]); const auto fde_start = current_address + first_entry; const auto second_entry = fde_tab[fde_idx+1]; const auto upper_bit = second_entry >> 31; const auto can_unwind = second_entry != 0x1; // can't unwind == 1: const auto offset_to_start = handle_pcrel31(second_entry); const auto contains_inline_unwind_entry = can_unwind && (second_entry>>31); // is inline if bit 31 set, and not special pattern cant_unwind. //const auto inline_unwind_entry = second_entry & 0x7fffffff; // the EH unwind table entry itself if it can be encoded in 31 bits. const auto lsda_addr = !can_unwind ? 0 : // the special pattern 0x1 indicating can't unwind. contains_inline_unwind_entry ? 0 : // no lsda addr if the entry is inline upper_bit == 0 ? current_address + 4 + offset_to_start : // pcrel31 offset if bit 31 is clear. throw runtime_error("Unexpected/logic error"); // report error if this is messed up. // update the prior fde to end just before this one starts. if(local_fdes.size() != 0) { local_fdes[local_fdes.size()-1].setEndAddress(fde_start-1); } auto fde = arm_fde_contents_t<ptrsize>{fde_start,lsda_addr,can_unwind}; /* cout << hex ; cout << "\tFde ("<< fde.getStartAddress(); cout << "-" << fde.getEndAddress(); cout << ")\tlsda_addr=" << fde.getLSDAAddress(); cout << "\tcan_unwind=" << boolalpha << fde.getCanUnwind() << endl; */ auto unwind_pgm =vector<uint8_t>(); if(lsda_addr) { // fetch the first word of the lsda. throw_assert(extab_scoop->getStart() <= lsda_addr && lsda_addr <= extab_scoop->getEnd()); // cout << "Found out-of-line unwind info." << endl << hex; unwind_pgm=parse_arm_eh_pgm(lsda_addr,extab_scoop.get(),fde, is_be); } if(contains_inline_unwind_entry ) { // cout << "Found inline_entry:" << endl << hex; unwind_pgm=parse_arm_eh_pgm(current_address+4,exidx_scoop.get(),fde, is_be); } //cout << "\tFde ("<< fde.getStartAddress(); //cout << "Unwind pgm = " << hex << endl; //for(auto byte : unwind_pgm) //{ //cout << "\t" << +byte << endl; //} fde.setProgram(arm_eh_program_t<ptrsize>{unwind_pgm}); local_fdes.push_back(fde); fde_idx += 2; current_address += 8; } // last fde goes to the end of the linked section. local_fdes[local_fdes.size()-1].setEndAddress(lnk_scoop->getEnd()); /* for(const auto& fde: local_fdes) { cout << hex ; cout << "\tFde ("<< fde.getStartAddress(); cout << "-" << fde.getEndAddress(); cout << ")\tlsda_addr=" << fde.getLSDAAddress(); cout << "\tcan_unwind=" << boolalpha << fde.getCanUnwind() << endl; fde.getLSDA()->print(); } */ fdes.insert(ALLOF(local_fdes)); return true; } template <int ptrsize> vector<uint8_t> split_arm_eh_frame_impl_t<ptrsize>::parse_arm_eh_pgm(const uint64_t lsda_addr, const ScoopReplacement_t *lsda_scoop, arm_fde_contents_t<ptrsize> &fde, const bool is_be) { auto unwind_pgm=vector<uint8_t>(); const auto fde_start=fde.getStartAddress(); // note: do not make reference as we are going to do unsafe stuff. // and need a copy that won't change. const auto contents_str = lsda_scoop->getContents(); const auto contents = reinterpret_cast<const uint8_t*>(contents_str.data()); const auto start_offset = lsda_addr - lsda_scoop->getStart(); // fetch 4 bytes to detect type if(lsda_addr + sizeof(uint32_t) > lsda_scoop->getEnd()) throw out_of_range("Cannot parse lsda at " + to_hex_string(lsda_addr)); const auto first_word = *reinterpret_cast<const uint32_t*>(&contents[start_offset]); if(first_word >> 31) // check the top bit. { const auto byte1 = (first_word >> 0)&0xff; const auto byte2 = (first_word >> 8)&0xff; const auto byte3 = (first_word >> 16)&0xff; const auto byte4 = (first_word >> 24)&0xff; const auto personality_index = byte4 & 0xf; // cout << "Found arm32-specific personality routine, pr" << hex << personality_index << endl; switch(personality_index) { case 0: { unwind_pgm.push_back(byte3); unwind_pgm.push_back(byte2); unwind_pgm.push_back(byte1); break; } case 1: case 2: { const auto words_following = byte3; unwind_pgm.push_back(byte2); unwind_pgm.push_back(byte1); for(auto i = 0u; i < words_following; i++) { const auto next_word = *reinterpret_cast<const uint32_t*>(&contents[start_offset+4+i*4]); unwind_pgm.push_back((next_word >> 24)&0xff); unwind_pgm.push_back((next_word >> 16)&0xff); unwind_pgm.push_back((next_word >> 8 )&0xff); unwind_pgm.push_back((next_word >> 0 )&0xff); } break; } default: throw new out_of_range("Unknown personality index: "+ to_string(personality_index)); } } else { // generic version. const auto offset_to_personality_routine = handle_pcrel31(first_word); const auto personality_routine_addr=lsda_addr+offset_to_personality_routine; fde.setPersonality(personality_routine_addr); // cout << "Found generic model with personality = " << hex << personality_routine_addr << endl; const auto second_word = *reinterpret_cast<const uint32_t*>(&contents[start_offset+4]); const auto byte1 = (second_word >> 0 )&0xff; const auto byte2 = (second_word >> 8 )&0xff; const auto byte3 = (second_word >> 16)&0xff; const auto byte4 = (second_word >> 24)&0xff; const auto words_following = byte4; unwind_pgm.push_back(byte3); unwind_pgm.push_back(byte2); unwind_pgm.push_back(byte1); for(auto i = 0u; i < words_following; i++) { const auto next_word = *reinterpret_cast<const uint32_t*>(&contents[start_offset+8+i*4]); unwind_pgm.push_back((next_word >> 24)&0xff); unwind_pgm.push_back((next_word >> 16)&0xff); unwind_pgm.push_back((next_word >> 8 )&0xff); unwind_pgm.push_back((next_word >> 0 )&0xff); } // 4 for personality routine, + 1 for a length specifier + the length in bytes. fde.parse_lsda(lsda_addr+8+words_following*4,lsda_scoop, fde_start, is_be); } return unwind_pgm; } unique_ptr<const EHFrameParser_t> EHFrameParser_t::arm_factory( uint8_t ptrsize, EHPEndianness_t endian_type, const string &extab_sec, const uint64_t extab_addr, const string &exidx_sec, const uint64_t exidx_addr, const string &lnk_sec, const uint64_t lnk_addr ) { const auto extab_scoop=ScoopReplacement_t(extab_sec, extab_addr); const auto exidx_scoop=ScoopReplacement_t(exidx_sec,exidx_addr); const auto lnk_scoop =ScoopReplacement_t(lnk_sec, lnk_addr); auto ret_val=(EHFrameParser_t*)nullptr; if(ptrsize==4) ret_val=new split_arm_eh_frame_impl_t<4>(extab_scoop,exidx_scoop,lnk_scoop); else if(ptrsize==8) ret_val=new split_arm_eh_frame_impl_t<8>(extab_scoop,exidx_scoop,lnk_scoop); else throw out_of_range("ptrsize must be 4 or 8"); const auto is_big_endian = [] () -> bool { union { uint32_t i; char c[4]; } bint = {0x01020304}; return bint.c[0] == 1; }; const auto is_be = endian_type == BIG || ( is_big_endian() && endian_type == HOST) ; ret_val->parse(is_be); return unique_ptr<const EHFrameParser_t>(ret_val); } template <int ptrsize> const FDEVector_t* split_arm_eh_frame_impl_t<ptrsize>::getFDEs() const { if(fdes_cache.size()==0) { transform(ALLOF(fdes), back_inserter(fdes_cache), [](const arm_fde_contents_t<ptrsize> &a) { return &a; }); } return &fdes_cache; } template <int ptrsize> const CIEVector_t* split_arm_eh_frame_impl_t<ptrsize>::getCIEs() const { if(cies_cache.size()==0) { transform(ALLOF(fdes), back_inserter(cies_cache), [](const arm_fde_contents_t<ptrsize> &a) { return &a.getCIE(); }); } return &cies_cache; } template <int ptrsize> const FDEContents_t* split_arm_eh_frame_impl_t<ptrsize>::findFDE(uint64_t addr) const { const auto tofind=arm_fde_contents_t<ptrsize>( addr, addr+1); const auto fde_it=fdes.find(tofind); const auto raw_ret_ptr = (fde_it==fdes.end()) ? nullptr : &*fde_it; return raw_ret_ptr; } template <int ptrsize> const EHProgramInstructionVector_t* arm_eh_program_t<ptrsize>::getInstructions() const { if(instructions_cache.size()==0) { transform(ALLOF(instructions), back_inserter(instructions_cache), [](const arm_eh_program_insn_t<ptrsize> &a) { return &a;}); } return &instructions_cache; } template <int ptrsize> arm_eh_program_t<ptrsize>::arm_eh_program_t(const vector<uint8_t>& unwind_pgm) { auto unwind_idx=0u; const auto unwind_pgm_data = unwind_pgm.data(); while(unwind_idx < unwind_pgm.size()) { const auto opcode_byte1=unwind_pgm[unwind_idx]; const auto top_two=opcode_byte1>>6; const auto top_four=opcode_byte1>>4; const auto top_five=opcode_byte1>>3; const auto bottom_three=opcode_byte1&0b111; const auto bits543=(opcode_byte1>>3)&0b111; // see https://github.com/ARM-software/abi-aa/blob/main/ehabi32/ehabi32.rst#frame-unwinding-instructions if( top_two == 0b00 || // vsp=vsp+6-bit-immed top_two == 0b01 || // vsp=vsp-6-bit-immed top_four==0b1001 || // set vsp=r[immed] || reserved if immed==13||15 top_four==0b1010 || // pop r4-r[4+immed] || pop r4-r[4+immed]+r14 opcode_byte1 == 0xb0 || // finish opcode_byte1 == 0b10110100 || // Pop Return Address Authentication Code pseudo-register (see remark g) opcode_byte1 == 0b10110101 || // Use current vsp as modifier in Return Addresss Authentication (see remark h) opcode_byte1 == 0b10110110 || // Spare (was Pop FPA) opcode_byte1 == 0b10110111 || // Spare (was Pop FPA) top_five == 0b10111 || // Pop VFP double-precision registers D[8]-D[8+nnn] saved (as if) by FSTMFDX (see remark d) (top_five == 0b11000 && bottom_three != 6 && bottom_three !=7) || // Intel Wireless MMX pop wR[10]-wR[10+nnn] (top_five == 0b11001 && bottom_three != 0 && bottom_three !=1) || // Spare (yyy != 000, 001) top_five == 0b10111 || // Pop VFP double-precision registers D[8]-D[8+nnn] saved (as if) by VPUSH (see remark d) (top_two == 0b11&& bits543 != 0b000 && bits543 != 0b001 && bits543 != 0b010) // Spare (xxx != 000, 001, 010) ) { // cout << "Found 1 byte unwind arm insn" << endl; instructions.push_back(arm_eh_program_insn_t<ptrsize>({opcode_byte1})); unwind_idx++; } else if( top_four == 0b1000 || // 12-bit immed==0 ? refuse to unwind : pop registers indicated by immed opcode_byte1 == 0b10110001 || // spare || pop registers opcode_byte1 == 0b10110011 || // pop vfp double registers. (top_five == 0b11000 && bottom_three == 6) || // Intel Wireless MMX pop wR[ssss]-wR[ssss+cccc] (see remark e) (top_five == 0b11000 && bottom_three == 7) || // Spare || Intel Wireless MMX pop wCGR registers under mask {wCGR3,2,1,0} || Spare (xxxx != 0000) opcode_byte1 == 0b11001000 || // Pop VFP double precision registers D[16+ssss]-D[16+ssss+cccc] saved (as if) by VPUSH (see remarks d,e) opcode_byte1 == 0b11001001 // Pop VFP double precision registers D[ssss]-D[ssss+cccc] saved (as if) by VPUSH (see remark d) ) { // cout << "Found 2 byte arm insn" << endl; unwind_idx++; if(unwind_idx>=unwind_pgm.size()) throw runtime_error("Cannot decode arm32 unwind instruction with prefix 0b1000"); const auto opcode_byte2=unwind_pgm[unwind_idx]; instructions.push_back(arm_eh_program_insn_t<ptrsize>({opcode_byte1,opcode_byte2})); unwind_idx++; } else if ( opcode_byte1 == 0b10110010 // vsp += uleb128 ) { // declare vars needed to call uleb routine. const auto initial_pos=uint64_t{unwind_idx+1}; const auto max=initial_pos+unwind_pgm.size(); auto final_pos=initial_pos; // updated by read_uleb auto res=uint64_t{0}; // ignore output of read_uleb, just need length // read uleb128 and sanity check. const auto fail = eh_frame_util_t<ptrsize>::read_uleb128(res,final_pos,unwind_pgm_data,max); if(fail) throw new out_of_range("Unable to read uleb128 in unwind_pgm"); // calc uleb length and record instructions.. const auto uleb_len=final_pos-initial_pos; const auto unwind_pgm_start = unwind_pgm_data+unwind_idx; const auto unwind_pgm_end = unwind_pgm_data+unwind_idx+1+uleb_len; instructions.push_back(arm_eh_program_insn_t<ptrsize>(vector<uint8_t>({unwind_pgm_start,unwind_pgm_end}))); const auto insn_len = 1+uleb_len; // cout << "Found multi-byte ( " << insn_len << " bytes) arm32 instructions" << endl; unwind_idx+=insn_len; } else throw new out_of_range("Cannot determine arm32 unwind instruction length"); } } template <int ptrsize> void arm_eh_program_t<ptrsize>::print(const uint64_t pc, const int64_t caf) const { auto tmp_pc=pc; for(const auto &insn : instructions) insn.print(tmp_pc,caf); } template <int ptrsize> void arm_eh_program_insn_t<ptrsize>::print(uint64_t &pc, int64_t caf) const { cout <<"arm32 unwind insn len=" << dec << program_bytes.size() << "bytes = "; for(const auto byte : program_bytes) cout << hex << +byte << ", "; cout << endl; } template <int ptrsize> void arm_fde_contents_t<ptrsize>::print() const { cout << "start_addr = " << hex << fde_start_addr << endl; cout << "end_addr = " << hex << fde_end_addr << endl; cout << "lsda_addr = " << hex << fde_lsda_addr << endl; cout << "can_unwind = " << boolalpha << can_unwind << endl; // lsda_t<ptrsize> lsda; // arm_eh_program_t<ptrsize> eh_pgm; // arm_cie_contents_t<ptrsize> cie; }