Newer
Older
/*
* Copyright (c) 2014 - Zephyr Software LLC
*
* This file may be used and modified for non-commercial purposes as long as
* all copyright, permission, and nonwarranty notices are preserved.
* Redistribution is prohibited without prior written consent from Zephyr
* Software.
*
* Please contact the authors for restrictions applying to commercial use.
*
* THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*
* Author: Zephyr Software
* e-mail: jwd@zephyr-software.com
* URL : http://www.zephyr-software.com/
*
*/
#include <limits>
#include <string>
#include <stdlib.h>
#include <string.h>
#include <map>
#include <assert.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <regex.h>

Jason Hiser
committed
#include <cctype>
#include "check_thunks.hpp"
#include "back_search.hpp"

Jason Hiser
committed
#define ALLOF(a) begin(a),end(a)
extern void read_ehframe(FileIR_t* firp, EXEIO::exeio* );
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
template<typename T>
static inline T cptrtoh(FileIR_t* firp, const uint8_t* cptr)
{
const auto ptrsize = firp->getArchitectureBitWidth() / 8 ;
const auto mt = firp->getArchitecture()->getMachineType();
switch(ptrsize)
{
case 4:
{
const auto raw_const = *reinterpret_cast<const uint64_t*>(cptr);
switch(mt)
{
case admtI386:
case admtArm32:
return le32toh(raw_const);
case admtMips32:
return be32toh(raw_const);
default:
throw invalid_argument("Cannot detect machine type");
}
assert(0);
}
case 8:
{
const auto raw_const = *reinterpret_cast<const uint32_t*>(cptr);
switch(mt)
{
case admtX86_64:
case admtAarch64:
return le64toh(raw_const);
case admtMips64:
return be64toh(raw_const);
default:
throw invalid_argument("Cannot detect machine type");
}
assert(0);
break;
}
default:
{
throw invalid_argument("Cannot detect pointer size");
}
}
}

Jason Hiser
committed
class PopulateIndTargs_t : public TransformStep_t
// record all full addresses and page-addresses found per function (or null for no function
using PerFuncAddrSet_t=set<VirtualOffset_t>;

Jason Hiser
committed
map<Function_t*,PerFuncAddrSet_t> all_adrp_results;
map<Function_t*,PerFuncAddrSet_t> all_add_adrp_results;
// record all full addresses and page-addresses found that are spilled to the stack
using SpillPoint_t = pair<Function_t*, VirtualOffset_t>;

Jason Hiser
committed
map<SpillPoint_t,PerFuncAddrSet_t> spilled_add_adrp_results;
map<SpillPoint_t,PerFuncAddrSet_t> spilled_adrps;
// record all full addresses found that are spilled to to a floating-point register (e.g., D10)
using DregSpillPoint_t = pair<Function_t*, string>;
map<DregSpillPoint_t, PerFuncAddrSet_t> spilled_to_dreg;
map<string,PerFuncAddrSet_t> per_reg_add_adrp_results;

Jason Hiser
committed
set< pair <VirtualOffset_t,VirtualOffset_t> > bounds;
map<VirtualOffset_t,ibt_provenance_t> targets;
// the set of ranges represented by the eh_frame section, could be empty for non-elf files.
set< pair< VirtualOffset_t, VirtualOffset_t> > ranges;
map< Instruction_t*, fii_icfs > jmptables;
map<VirtualOffset_t,Instruction_t*> lookupInstructionMap;
// the set of things that are partially unpinned already.
set<Instruction_t*> already_unpinned;
Jason Hiser
committed
/*
* Convert a reg id to a lower-case string
*/
string registerToSearchString(const RegisterID_t& reg)
{
auto str=registerToString(reg);
transform(ALLOF(str), begin(str), ::tolower);
return str;
}
void range(VirtualOffset_t start, VirtualOffset_t end)
pair<VirtualOffset_t,VirtualOffset_t> foo(start,end);
/*
* is_in_range - determine if an address is referenced by the eh_frame section
*/
for(auto bound : ranges)
auto start=bound.first;
auto end=bound.second;
if(start<=p && p<=end)
return true;
}
return false;
}
* process_range - do nothing now -- fix calls deals with this.
bool possible_target(VirtualOffset_t p, VirtualOffset_t from_addr, ibt_provenance_t prov)
{
{
if(getenv("IB_VERBOSE")!=nullptr)
{
cout<<"Found IB target address 0x"<<std::hex<<p<<" at 0x"<<from_addr<<std::dec<<", prov="<<prov<<endl;
cout<<"Found IB target address 0x"<<std::hex<<p<<" from unknown location, prov="<<prov<<endl;
}
return true;
}
return false;
}
bool is_possible_target(VirtualOffset_t p, VirtualOffset_t addr)
for(auto bound : bounds)
{
auto start=bound.first;
auto end=bound.second;
if(start<=p && p<=end)
{
return true;
}
}
return false;
}

Jason Hiser
committed
EXEIO::section* find_section(VirtualOffset_t addr, EXEIO::exeio *exeiop)

Jason Hiser
committed
for ( int i = 0; i < exeiop->sections.size(); ++i )

Jason Hiser
committed
EXEIO::section* pSec = exeiop->sections[i];
assert(pSec);
if(pSec->get_address() > addr)
continue;
if(addr >= pSec->get_address()+pSec->get_size())
return nullptr;
void handle_argument(
const DecodedInstruction_t& decoded_insn,
Instruction_t* insn,
ibt_provenance_t::provtype_t pt = ibt_provenance_t::ibtp_text
)
if(arg.isMemory() && decoded_insn.getMnemonic()=="lea")
assert(insn->getAddress());
possible_target(arg.getMemoryDisplacement() + insn->getAddress()->getVirtualOffset() +
insn->getDataBits().length(), insn->getAddress()->getVirtualOffset(), pt);
possible_target(arg.getMemoryDisplacement(), insn->getAddress()->getVirtualOffset(), pt);
void lookupInstruction_init(FileIR_t *firp)
lookupInstructionMap.clear();
const auto addr=insn->getAddress()->getVirtualOffset();
lookupInstructionMap[addr]=insn;
Instruction_t *lookupInstruction(FileIR_t *firp, VirtualOffset_t virtual_offset)
{
if(lookupInstructionMap.find(virtual_offset)!=lookupInstructionMap.end())
return lookupInstructionMap[virtual_offset];
return nullptr;
const auto addr=insn->getAddress()->getVirtualOffset();
/* lookup in the list of targets */
if(targets.find(addr)!=targets.end())

Jason Hiser
committed
const auto isret = targets[addr].areOnlyTheseSet(ibt_provenance_t::ibtp_ret);
const auto isprintf = targets[addr].areOnlyTheseSet(ibt_provenance_t::ibtp_stars_data|ibt_provenance_t::ibtp_texttoprintf) &&
targets[addr].isFullySet (ibt_provenance_t::ibtp_stars_data|ibt_provenance_t::ibtp_texttoprintf);
if(getenv("IB_VERBOSE")!=nullptr)
cout<<"Skipping pin for ret at "<<hex<<addr<<endl;
}
else if(isprintf)
{
if(getenv("IB_VERBOSE")!=nullptr)
cout<<"Skipping pin for text to printf at "<<hex<<addr<<endl;
}
else if(firp->findScoop(addr))
{
if(getenv("IB_VERBOSE")!=nullptr)
cout<<"Skipping pin data_in_text "<<hex<<addr<<endl;
}
if(getenv("IB_VERBOSE")!=nullptr)
cout<<"Setting pin at "<<hex<<addr<<endl;
auto newaddr=firp->addNewAddress(insn->getAddress()->getFileID(), insn->getAddress()->getVirtualOffset());
insn->setIndirectBranchTargetAddress(newaddr);
bool CallToPrintfFollows(FileIR_t *firp, Instruction_t* insn, const string& arg_str)
{
for(auto ptr=insn->getFallthrough(); ptr!=nullptr; ptr=ptr->getFallthrough())
auto d=DecodedInstruction_t ::factory(ptr);
if(d->getMnemonic() == string("call"))
{
// check we have a target
return false;
// check the target has a function
if(ptr->getTarget()->getFunction()==nullptr)
return false;
// check if we're calling printf.
if(ptr->getTarget()->getFunction()->getName().find("printf")==string::npos)
return false;
// found it
return true;
}
// found reference to argstring, assume it's a write and exit
if(d->getDisassembly().find(arg_str)!= string::npos)
return false;
}
return false;
}
bool texttoprintf(FileIR_t *firp,Instruction_t* insn)
{
string dst="";
// note that dst is an output parameter of IsParameterWrite and an input parameter to CallFollows
if(isParameterWrite(firp,insn, dst) && CallToPrintfFollows(firp,insn,dst))
{
return true;
}
return false;
}

Jason Hiser
committed
void get_instruction_targets(FileIR_t *firp, EXEIO::exeio* exeiop, const set<VirtualOffset_t>& thunk_bases)
auto disasm=DecodedInstruction_t::factory(insn);
VirtualOffset_t instr_len = disasm->length(); // Disassemble(insn,disasm);
assert(instr_len==insn->getDataBits().size());
const auto mt=firp->getArchitecture()->getMachineType();
if(mt==admtX86_64 || mt==admtI386)
{
// work for both 32- and 64-bit.

Jason Hiser
committed
check_for_PIC_switch_table32_type2(firp, insn, *disasm, exeiop, thunk_bases);
check_for_PIC_switch_table32_type3(firp, insn, *disasm, exeiop, thunk_bases);

Jason Hiser
committed
check_for_PIC_switch_table32(firp, insn, *disasm, exeiop, thunk_bases);
else if (firp->getArchitectureBitWidth()==64)

Jason Hiser
committed
check_for_PIC_switch_table64(firp, insn, *disasm, exeiop);

Jason Hiser
committed
check_for_nonPIC_switch_table(firp, insn, *disasm, exeiop);
check_for_nonPIC_switch_table_pattern2(firp, insn, *disasm, exeiop);
}
else if(mt==admtAarch64)
{
check_for_arm64_switch_type1(firp,insn, *disasm, exeiop);
}
else if(mt==admtArm32)
{
check_for_arm32_switch_type1(firp,insn, *disasm, exeiop);
check_for_arm32_switch_type2(firp,insn, *disasm, exeiop);
check_for_arm32_switch_type3(firp,insn, *disasm, exeiop);
/* no reason to look for pc-rel constants in mips */
if(firp->getArchitecture()->getFileType() == adftELFSO)
continue;;
else
throw invalid_argument("Cannot determine machine type");
/* other branches can't indicate an indirect branch target */
if(disasm->isBranch()) // disasm.Instruction.BranchType)
ibt_provenance_t::provtype_t prov=0;
if(!texttoprintf(firp,insn))
{
prov=ibt_provenance_t::ibtp_text;
}
else
{
cout<<"TextToPrintf analysis of '"<<disasm->getDisassembly()<<"' successful at " <<hex<<insn->getAddress()->getVirtualOffset()<<endl;
prov=ibt_provenance_t::ibtp_texttoprintf;
/* otherwise, any immediate is a possible branch target */
{
if(op->isConstant())
possible_target(op->getConstant(), 0, prov);
}
const auto op=disasm->getOperand(i);
handle_argument(*disasm, *op, insn, prov);
return;
/* loaded, and contains instruction, record the bounds */
VirtualOffset_t first=shdr->get_address();
VirtualOffset_t second=shdr->get_address()+shdr->get_size();
bounds.insert(pair<VirtualOffset_t,VirtualOffset_t>(first,second));
/* check for a not loaded section */
if( ! shdr->isLoadable())
/* check for a loaded, but contains instruction section.
* we'll look through the VariantIR for this section. */
if( shdr->isExecutable() )
/* if the type is NOBITS, then there's no actual data to look through */
if(shdr->isBSS() )
return;
// skip .dynsym section -- process-dynsym does this.
if(shdr->get_name()==".dynsym")
return;
cout<<"Checking section "<<shdr->get_name() <<endl;
// assume pointers need to be at least 4-byte aligned.
for(auto i=0u;i+arch_ptr_bytes()<=(size_t)shdr->get_size();i+=4)
// even on 64-bit, pointers might be stored as 32-bit, as a
// elf object has the 32-bit limitations.
// there's no real reason to look for 64-bit pointers
const auto ptr_val = uint64_t(
(arch_ptr_bytes()==4) ? cptrtoh<uint32_t>(firp, reinterpret_cast<const uint8_t*>(&data[i])) :
(arch_ptr_bytes()==8) ? cptrtoh<uint64_t>(firp, reinterpret_cast<const uint8_t*>(&data[i])) :
throw invalid_argument("Cannot map architecture size to bit width")
);
const auto ptr_addr = i+shdr->get_address();
const auto ptr_prov =
(shdr->get_name()==".init_array") ? ibt_provenance_t::ibtp_initarray :
(shdr->get_name()==".fini_array") ? ibt_provenance_t::ibtp_finiarray :
(shdr->get_name()==".got.plt") ? ibt_provenance_t::ibtp_gotplt :
(shdr->get_name()==".got") ? ibt_provenance_t::ibtp_got :
(shdr->get_name()==".symtab") ? ibt_provenance_t::ibtp_symtab :
(shdr->isWriteable()) ? ibt_provenance_t::ibtp_data :
ibt_provenance_t::ibtp_rodata;
possible_target(ptr_val, ptr_addr, ptr_prov);
void handle_scoop_scanning(FileIR_t* firp)
{
// check for addresses in scoops in the text section.
{

Jason Hiser
committed
if(scoop->getName() == ".ctor" || scoop->getName() == ".dtor" )
{
const auto &scoop_contents = scoop->getContents();

Jason Hiser
committed
for(auto i = 0u; i + ptrsize < scoop_contents.size(); i += ptrsize)
{
const auto ptr = cptrtoh<uint64_t>(firp, reinterpret_cast<const uint8_t*>(scoop_contents.c_str() + i));

Jason Hiser
committed
possible_target(ptr, scoop->getStart()->getVirtualOffset() + i, ibt_provenance_t::ibtp_data);
}
}
// test if scoop was added by fill_in_cfg -- make this test better.
if(scoop->getName().find("data_in_text_")==string::npos) continue;

Jason Hiser
committed
// at the moment, FIC only creates 4-, 8-, and 16- bytes scoops
// change this code if FIC chagnes.

Jason Hiser
committed
{
// may be a 4-byter, which can't hold an address.
continue;
}

Jason Hiser
committed
{
// check to see if the scoop has an IBTA
const auto addr=*(uint64_t*)(scoop->getContents().c_str());
possible_target(addr, scoop->getStart()->getVirtualOffset(), ibt_provenance_t::ibtp_unknown);

Jason Hiser
committed
}
else
{
// we may see 16 indicating that a ldr q-word happened.
// this isn't likely an IBT, so we skip scanning it.

Jason Hiser
committed
}
}
}
set<Instruction_t*> find_in_function(string needle, Function_t *haystack)
{
regex_t preg;
set<Instruction_t*> found_instructions;
assert(0 == regcomp(&preg, needle.c_str(), REG_EXTENDED));
for (auto candidate : haystack->getInstructions())
auto disasm=DecodedInstruction_t::factory(candidate);
// check it's the requested type
if(regexec(&preg, disasm->getDisassembly().c_str(), 0, nullptr, 0) == 0)
{
found_instructions.insert(candidate);
}
}
regfree(&preg);
return found_instructions;
}

Jason Hiser
committed
const DecodedInstruction_t &d10,
EXEIO::exeio* exeiop)
{
const auto prov=ibt_provenance_t::ibtp_switchtable_type1;
/*
* Check for hand-written assembly for divsi and udivsi that has this dispatch insn: addeq pc, pc, <reg> lsl #2
*/
const auto d = DecodedInstruction_t::factory(insn);
const auto is_addne = d->getMnemonic() == "addne" ;
if(is_addne)
{
const auto is_op0_pc = d->getOperand(0)->getString()=="pc";
const auto is_op1_pc = d->getOperand(1)->getString()=="pc";
if(is_op0_pc && is_op1_pc)
{
cout << "Found gcc addne pc,pc idiom" << endl;
for(auto i=1u;i<32u;i++)
{
const auto ibta = insn->getAddress()->getVirtualOffset() + 8 + i*12;
possible_target(ibta,0,prov);
}
}
}
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
void check_for_arm32_switch_type2(
FileIR_t *firp,
Instruction_t* i10,
const DecodedInstruction_t &d10,
EXEIO::exeio* exeiop)
{
#if 0
Looking for this pattern:
I9: cmp r2, #4
I10: ldrls pc, [pc, r2, lsl #2]
or this:
I8: ldr r3, [pc, #k]
I9: cmp r2, r3
I10: ldrls pc, [pc, r2, lsl #2]
#endif
const auto prov=ibt_provenance_t::ibtp_switchtable_type2;
// check that i10 is what we need
const auto i10_dis = d10.getDisassembly();
const auto is_i10_ldrls = i10_dis.find("ldrls pc, [pc")==0;
if(!is_i10_ldrls) return;
// this is sufficient to determine we have a switch dispatch.
// now try to figure out the table size.
auto jt_size = numeric_limits<uint32_t>::max();
// and we do that by looking for a cmp on the dispatch register.
// the dispatch register is the index register in the ldrls instruction.
// which we find via string extraction.
const auto i10_dis_15_3 = i10_dis.substr(15,3); // reg or reg,
const auto i10_index_reg = i10_dis_15_3[2] == ',' ? i10_dis_15_3.substr(0,2) : i10_dis_15_3;
// look for i9
auto i9=(Instruction_t*)nullptr;
if(!backup_until( string()+"cmp "+i10_index_reg+",", /* look for this pattern. */
i9, /* find i9 */
i10, /* before I10 */
"^"+i10_index_reg+"$" /* stop if i10_reg set */
))
{
return;
}
if(i9 != nullptr)
{
// decode i9
const auto d9 = DecodedInstruction_t::factory(i9);
const auto d9_op1 = d9->getOperand(1);
// look for a constant in the 2nd operand.
if( d9_op1->isConstant())
jt_size=d9_op1->getConstant();
else
{
// check if it's a register
// and look backwards for a load of the register from the .text seg
// TBD
}
}
// extract the jump table -- this is simple as the addresing mode in i10 says it's at is "pc+8".
const auto jt_addr = i10->getAddress()->getVirtualOffset() + 8u;
const auto jt_section = find_section(jt_addr,exeiop);
assert(jt_section);
const auto jt_secdata = jt_section->get_data();
const auto jt_secaddr = jt_section->get_address();
const auto jt_secendaddr = jt_secaddr + jt_section->get_size();
auto jt_entry_no=0u;
while(true)
{
// calculate some stuff about the jump table entry we're looking at
const auto jte_size = 4u;
const auto jte_offset = jt_entry_no * jte_size;
const auto jte_addr = jt_addr + jte_offset;
// stop if we've exceeded the section size
if(jte_addr + jte_size > jt_secendaddr) break;
// extract the table entry
const auto jte = * reinterpret_cast<const uint32_t*>(&jt_secdata[jte_addr - jt_secaddr]);
// mark the instruction at jte as an ibt
possible_target(jte, jte_addr, prov);
// check to see if the entry is valid. if not, exit.
const auto ibtarget = lookupInstruction(firp, jte);
if(ibtarget == nullptr) break;
cout << "Found ARM32 switch (ldrls -- type2)@0x" << hex << i10->getAddress()->getVirtualOffset()
<< " table_entry[" << dec << jt_entry_no << "]=" << hex << jte << "@0x " << jte_addr
<< " to " << ibtarget->getBaseID() << ":" << ibtarget->getDisassembly()
<< "@" << ibtarget->getAddress()->getVirtualOffset() << endl;
// add to i10
jmptables[i10].insert(ibtarget);
// stop if we've exceeded the number of table entries we found.
if(jt_entry_no+1 > jt_size) break;
jt_entry_no++;
}
// add a data scoop for the switch table.
cout << "Detected " << dec << jt_entry_no << "entries in this table. adding data scoop for table" << endl;
addSwitchTableScoop(firp, jt_entry_no + 1 , 4, jt_addr, exeiop, nullptr, 0, false);
// mark that we figured out all possible targets for this ib.
jmptables[i10].setAnalysisStatus(iasAnalysisComplete);
}
void check_for_arm32_switch_type3(
FileIR_t *firp,
Instruction_t* i10,
const DecodedInstruction_t &d10,
EXEIO::exeio* exeiop)
{
#if 0
Looking for this pattern:
I9: cmp r2, #4
I10: addls pc, [pc, r2, lsl #2]
or this:
I8: ldr r3, [pc, #k]
I9: cmp r2, r3
I10: addls pc, [pc, r2, lsl #2]
#endif
const auto prov=ibt_provenance_t::ibtp_switchtable_type3;
// check that i10 is what we need
const auto i10_dis = d10.getDisassembly();
const auto is_i10_ldrls = i10_dis.find("addls pc, pc")==0;
if(!is_i10_ldrls) return;
// this is sufficient to determine we have a switch dispatch.
// now try to figure out the table size.
auto jt_size = numeric_limits<uint32_t>::max();
// and we do that by looking for a cmp on the dispatch register.
// the dispatch register is the index register in the ldrls instruction.
// which we find via string extraction.
const auto i10_index_reg = d10.getOperand(2)->getString();
// look for i9
auto i9=(Instruction_t*)nullptr;
if(!backup_until( string()+"cmp "+i10_index_reg+",", /* look for this pattern. */
i9, /* find i9 */
i10, /* before I10 */
"^"+i10_index_reg+"$" /* stop if i10_reg set */
))
{
return;
}
if(i9 != nullptr)
{
// decode i9
const auto d9 = DecodedInstruction_t::factory(i9);
const auto d9_op1 = d9->getOperand(1);
// look for a constant in the 2nd operand.
if( d9_op1->isConstant())
jt_size=d9_op1->getConstant();
else
{
// check if it's a register
// and look backwards for a load of the register from the .text seg
// TBD
}
}
// extract the jump table -- this is simple as the addresing mode in i10 says it's at is "pc+8".
const auto jt_addr = i10->getAddress()->getVirtualOffset() + 8u;
const auto jt_entry_size = 4u;
auto jt_entry_no=0u;
while(true)
{
// check to see if the entry is valid. if not, exit.
const auto jte = jt_addr + jt_entry_no * jt_entry_size;
const auto ibtarget = lookupInstruction(firp, jte);
if(ibtarget == nullptr) break;
// check if it's an uncond branch
const auto ibt_dis = DecodedInstruction_t::factory(ibtarget);
if(ibt_dis->getMnemonic() != "b") break;
// mark the instruction at jte as an ibt
possible_target(jte, 0, prov);
cout << "Found ARM32 switch (addls -- type2)@0x" << hex << i10->getAddress()->getVirtualOffset()
<< " to " << ibtarget->getBaseID() << ":" << ibtarget->getDisassembly()
<< "@" << ibtarget->getAddress()->getVirtualOffset() << endl;
// add to i10
jmptables[i10].insert(ibtarget);
// stop if we've exceeded the number of table entries we found.
if(jt_entry_no+1 > jt_size) break;
jt_entry_no++;
}
// add a data scoop for the switch table.
cout << "Detected " << dec << jt_entry_no << "entries in this table. adding data scoop for table" << endl;
// mark that we figured out all possible targets for this ib.
jmptables[i10].setAnalysisStatus(iasAnalysisComplete);
}
FileIR_t *firp,
Instruction_t* i10,
const DecodedInstruction_t &d10,

Jason Hiser
committed
EXEIO::exeio* exeiop)
const auto prov=ibt_provenance_t::ibtp_switchtable_type1;
#if 0
Sample code for this branch type:

Jason Hiser
committed
; x2 gets the value of x0
; this probably is not normal or required, but we are not checking
; it anyhow. This is just to understand the example.

Jason Hiser
committed
i1: 0x4039c4: cmp table_index_reg, #0x3
i2: 0x4039c8: b.hi 0x4039d4 ; may also be a b.ls
// generate switch table base address
// this code may be hard to find if the compiler optimizes it
// outside the block with the rest of the dispatch code, and/or
// spills a register.
// thus, we allow for it not to be found, and instead us any "unk"
// we return true if we've found the entry to avoid looking at unks
// if we don't need to.
i5: 0x40449c: adrp table_page_reg, 0x415000 // table page

Jason Hiser
committed
i6: 0x4044a0: add table_base_reg, table_page_reg, #0x2b0 // table offset
// table=table_page+table_offset
//
// load from switch table

Jason Hiser
committed
i7: 0x4044a4: ldrh table_entry_reg, [table_base_reg,table_index_reg,uxtw #1]
or
i7: 0x4044a4: ldrb table_entry_reg, [table_base_reg,table_index_reg,uxtw ]
// calculate branch_addr+4+table[i]*4

Jason Hiser
committed
i8: 0x4044a8: adr branch_reg, 0x4044b4 // jump base addr
i9: 0x4044ac: add i10_reg, branch_reg, table_entry_reg, sxth #2
// actually take the branch
i10: 0x4044b0: br i10_reg
i11: 0x4044b4:
notes:
1) jump table entries are 2-bytes
2) jump table entries specify an offset from the byte after dispatch branch
3) jump table entries dont store the lower 2 bits of the offset, as they
have to be 0 due to instruction alignment
#endif
// sanity check the jump

Jason Hiser
committed
if(d10.getMnemonic() != "br") return;
const auto i10_reg=d10.getOperand(0)->getString();
// try to find I9
auto i9=(Instruction_t*)nullptr;

Jason Hiser
committed
/* search for externder=sxth or sxtb */
if(!backup_until( string()+"(add "+i10_reg+",.* sxth #2)|(add "+i10_reg+",.* sxtb #2)", /* look for this pattern. */
i9, /* find i9 */
i10, /* before I10 */

Jason Hiser
committed
{
return;
}
// Extract the I9 fields.
assert(i9);
const auto d9p = DecodedInstruction_t::factory(i9);
const auto &d9 = *d9p;
const auto offset_reg = d9.getOperand(1)->getString();
const auto table_entry_reg = d9.getOperand(2)->getString();
// try to find I8
auto i8=(Instruction_t*)nullptr;

Jason Hiser
committed
if(!backup_until(string()+"adr "+offset_reg+",", /* look for this pattern. */
i8, /* find i8 */
i9, /* before I9 */
"^"+offset_reg+"$" /* stop if offste_reg set */

Jason Hiser
committed
return;
// extract the I8 fields
assert(i8);
const auto d8p = DecodedInstruction_t::factory(i8);
const auto &d8 = *d8p;
const auto jump_base_addr = d8.getOperand(1)->getConstant();
// try to find I7
auto i7=(Instruction_t*)nullptr;

Jason Hiser
committed
if(!backup_until(string()+ "(ldrh "+table_entry_reg+",)|(ldrb "+table_entry_reg+",)", /* look for this pattern. */
i7, /* find i7 */
i9, /* before I9 */
"^"+table_entry_reg+"$" /* stop if index_reg set */

Jason Hiser
committed
return;
// extract the I7 fields
assert(i7);
const auto d7p = DecodedInstruction_t::factory(i7);
const auto &d7 = *d7p;
const auto memory_op_string = d7.getOperand(1)->getString();

Jason Hiser
committed
const auto plus_pos = memory_op_string.find(" +");
const auto table_base_reg = memory_op_string.substr(0,plus_pos);
const auto table_index_reg = memory_op_string.substr(plus_pos+3);
const auto table_entry_size =
d7.getMnemonic()=="ldrb" ? 1 :
d7.getMnemonic()=="ldrh" ? 2 :
throw invalid_argument("Unable to detected switch table entry size for ARM64");
// now we try to find the table base in I5 and I6
// but we may fail due to compiler opts. Prepare for such failures

Jason Hiser
committed
// by creating a set of possible table bases.
// If we find i5/i6 or a reload of a spilled table address,
// we will refine our guess.
auto all_table_bases= per_reg_add_adrp_results[table_base_reg];
// try to find I6
auto i6=(Instruction_t*)nullptr;

Jason Hiser
committed
if(backup_until(string()+"add "+table_base_reg+",", /* look for this pattern. */
i6, /* find i6 */
i7, /* before I7 */
"^"+table_base_reg+"$", /* stop if table_base_reg set */

Jason Hiser
committed
true, /* look hard -- recursely examine up to 10k instructions and 500 blocks */
10000,
500
))
{
// extract the I6 fields