Newer
Older
/*
* Copyright (c) 2014 - Zephyr Software LLC
*
* This file may be used and modified for non-commercial purposes as long as
* all copyright, permission, and nonwarranty notices are preserved.
* Redistribution is prohibited without prior written consent from Zephyr
* Software.
*
* Please contact the authors for restrictions applying to commercial use.
*
* THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*
* Author: Zephyr Software
* e-mail: jwd@zephyr-software.com
* URL : http://www.zephyr-software.com/
*
*/
#include <limits>
#include <string>
#include <stdlib.h>
#include <string.h>
#include <map>
#include <assert.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <regex.h>

Jason Hiser
committed
#include <cctype>
#include "check_thunks.hpp"
#include "back_search.hpp"

Jason Hiser
committed
#define ALLOF(a) begin(a),end(a)
extern void read_ehframe(FileIR_t* firp, EXEIO::exeio* );

Jason Hiser
committed
class PopulateIndTargs_t : public TransformStep_t
// record all full addresses and page-addresses found per function (or null for no function
using PerFuncAddrSet_t=set<VirtualOffset_t>;

Jason Hiser
committed
map<Function_t*,PerFuncAddrSet_t> all_adrp_results;
map<Function_t*,PerFuncAddrSet_t> all_add_adrp_results;
// record all full addresses and page-addresses found that are spilled to the stack
using SpillPoint_t = pair<Function_t*, VirtualOffset_t>;

Jason Hiser
committed
map<SpillPoint_t,PerFuncAddrSet_t> spilled_add_adrp_results;
map<SpillPoint_t,PerFuncAddrSet_t> spilled_adrps;
// record all full addresses found that are spilled to to a floating-point register (e.g., D10)
using DregSpillPoint_t = pair<Function_t*, string>;
map<DregSpillPoint_t, PerFuncAddrSet_t> spilled_to_dreg;
map<string,PerFuncAddrSet_t> per_reg_add_adrp_results;

Jason Hiser
committed
set< pair <VirtualOffset_t,VirtualOffset_t> > bounds;
map<VirtualOffset_t,ibt_provenance_t> targets;
// the set of ranges represented by the eh_frame section, could be empty for non-elf files.
set< pair< VirtualOffset_t, VirtualOffset_t> > ranges;
map< Instruction_t*, fii_icfs > jmptables;
map<VirtualOffset_t,Instruction_t*> lookupInstructionMap;
// the set of things that are partially unpinned already.
set<Instruction_t*> already_unpinned;
Jason Hiser
committed
/*
* Convert a reg id to a lower-case string
*/
string registerToSearchString(const RegisterID_t& reg)
{
auto str=registerToString(reg);
transform(ALLOF(str), begin(str), ::tolower);
return str;
}
void range(VirtualOffset_t start, VirtualOffset_t end)
pair<VirtualOffset_t,VirtualOffset_t> foo(start,end);
/*
* is_in_range - determine if an address is referenced by the eh_frame section
*/
for(auto bound : ranges)
auto start=bound.first;
auto end=bound.second;
if(start<=p && p<=end)
return true;
}
return false;
}
* process_range - do nothing now -- fix calls deals with this.
bool possible_target(VirtualOffset_t p, VirtualOffset_t from_addr, ibt_provenance_t prov)
{
{
if(getenv("IB_VERBOSE")!=nullptr)
{
cout<<"Found IB target address 0x"<<std::hex<<p<<" at 0x"<<from_addr<<std::dec<<", prov="<<prov<<endl;
cout<<"Found IB target address 0x"<<std::hex<<p<<" from unknown location, prov="<<prov<<endl;
}
return true;
}
return false;
}
bool is_possible_target(VirtualOffset_t p, VirtualOffset_t addr)
for(auto bound : bounds)
{
auto start=bound.first;
auto end=bound.second;
if(start<=p && p<=end)
{
return true;
}
}
return false;
}

Jason Hiser
committed
EXEIO::section* find_section(VirtualOffset_t addr, EXEIO::exeio *exeiop)

Jason Hiser
committed
for ( int i = 0; i < exeiop->sections.size(); ++i )

Jason Hiser
committed
EXEIO::section* pSec = exeiop->sections[i];
assert(pSec);
if(pSec->get_address() > addr)
continue;
if(addr >= pSec->get_address()+pSec->get_size())
return nullptr;
void handle_argument(
const DecodedInstruction_t& decoded_insn,
Instruction_t* insn,
ibt_provenance_t::provtype_t pt = ibt_provenance_t::ibtp_text
)
if(arg.isMemory() && decoded_insn.getMnemonic()=="lea")
assert(insn->getAddress());
possible_target(arg.getMemoryDisplacement() + insn->getAddress()->getVirtualOffset() +
insn->getDataBits().length(), insn->getAddress()->getVirtualOffset(), pt);
possible_target(arg.getMemoryDisplacement(), insn->getAddress()->getVirtualOffset(), pt);
void lookupInstruction_init(FileIR_t *firp)
lookupInstructionMap.clear();
const auto addr=insn->getAddress()->getVirtualOffset();
lookupInstructionMap[addr]=insn;
Instruction_t *lookupInstruction(FileIR_t *firp, VirtualOffset_t virtual_offset)
{
if(lookupInstructionMap.find(virtual_offset)!=lookupInstructionMap.end())
return lookupInstructionMap[virtual_offset];
return nullptr;
const auto addr=insn->getAddress()->getVirtualOffset();
/* lookup in the list of targets */
if(targets.find(addr)!=targets.end())

Jason Hiser
committed
const auto isret = targets[addr].areOnlyTheseSet(ibt_provenance_t::ibtp_ret);
const auto isprintf = targets[addr].areOnlyTheseSet(ibt_provenance_t::ibtp_stars_data|ibt_provenance_t::ibtp_texttoprintf) &&
targets[addr].isFullySet (ibt_provenance_t::ibtp_stars_data|ibt_provenance_t::ibtp_texttoprintf);
if(getenv("IB_VERBOSE")!=nullptr)
cout<<"Skipping pin for ret at "<<hex<<addr<<endl;
}
else if(isprintf)
{
if(getenv("IB_VERBOSE")!=nullptr)
cout<<"Skipping pin for text to printf at "<<hex<<addr<<endl;
}
else if(firp->findScoop(addr))
{
if(getenv("IB_VERBOSE")!=nullptr)
cout<<"Skipping pin data_in_text "<<hex<<addr<<endl;
}
if(getenv("IB_VERBOSE")!=nullptr)
cout<<"Setting pin at "<<hex<<addr<<endl;
auto newaddr=firp->addNewAddress(insn->getAddress()->getFileID(), insn->getAddress()->getVirtualOffset());
insn->setIndirectBranchTargetAddress(newaddr);
bool CallToPrintfFollows(FileIR_t *firp, Instruction_t* insn, const string& arg_str)
{
for(auto ptr=insn->getFallthrough(); ptr!=nullptr; ptr=ptr->getFallthrough())
auto d=DecodedInstruction_t ::factory(ptr);
if(d->getMnemonic() == string("call"))
{
// check we have a target
return false;
// check the target has a function
if(ptr->getTarget()->getFunction()==nullptr)
return false;
// check if we're calling printf.
if(ptr->getTarget()->getFunction()->getName().find("printf")==string::npos)
return false;
// found it
return true;
}
// found reference to argstring, assume it's a write and exit
if(d->getDisassembly().find(arg_str)!= string::npos)
return false;
}
return false;
}
bool texttoprintf(FileIR_t *firp,Instruction_t* insn)
{
string dst="";
// note that dst is an output parameter of IsParameterWrite and an input parameter to CallFollows
if(isParameterWrite(firp,insn, dst) && CallToPrintfFollows(firp,insn,dst))
{
return true;
}
return false;
}

Jason Hiser
committed
void get_instruction_targets(FileIR_t *firp, EXEIO::exeio* exeiop, const set<VirtualOffset_t>& thunk_bases)
auto disasm=DecodedInstruction_t::factory(insn);
VirtualOffset_t instr_len = disasm->length(); // Disassemble(insn,disasm);
assert(instr_len==insn->getDataBits().size());
const auto mt=firp->getArchitecture()->getMachineType();
if(mt==admtX86_64 || mt==admtI386)
{
// work for both 32- and 64-bit.

Jason Hiser
committed
check_for_PIC_switch_table32_type2(firp, insn, *disasm, exeiop, thunk_bases);
check_for_PIC_switch_table32_type3(firp, insn, *disasm, exeiop, thunk_bases);

Jason Hiser
committed
check_for_PIC_switch_table32(firp, insn, *disasm, exeiop, thunk_bases);
else if (firp->getArchitectureBitWidth()==64)

Jason Hiser
committed
check_for_PIC_switch_table64(firp, insn, *disasm, exeiop);

Jason Hiser
committed
check_for_nonPIC_switch_table(firp, insn, *disasm, exeiop);
check_for_nonPIC_switch_table_pattern2(firp, insn, *disasm, exeiop);
}
else if(mt==admtAarch64)
{
check_for_arm64_switch_type1(firp,insn, *disasm, exeiop);
}
else if(mt==admtArm32)
{
check_for_arm32_switch_type1(firp,insn, *disasm, exeiop);
check_for_arm32_switch_type2(firp,insn, *disasm, exeiop);
check_for_arm32_switch_type3(firp,insn, *disasm, exeiop);
}
else
throw invalid_argument("Cannot determine machine type");
/* other branches can't indicate an indirect branch target */
if(disasm->isBranch()) // disasm.Instruction.BranchType)
ibt_provenance_t::provtype_t prov=0;
if(!texttoprintf(firp,insn))
{
prov=ibt_provenance_t::ibtp_text;
}
else
{
cout<<"TextToPrintf analysis of '"<<disasm->getDisassembly()<<"' successful at " <<hex<<insn->getAddress()->getVirtualOffset()<<endl;
prov=ibt_provenance_t::ibtp_texttoprintf;
/* otherwise, any immediate is a possible branch target */
{
if(op->isConstant())
possible_target(op->getConstant(), 0, prov);
}
const auto op=disasm->getOperand(i);
handle_argument(*disasm, *op, insn, prov);
return;
/* loaded, and contains instruction, record the bounds */
VirtualOffset_t first=shdr->get_address();
VirtualOffset_t second=shdr->get_address()+shdr->get_size();
bounds.insert(pair<VirtualOffset_t,VirtualOffset_t>(first,second));
/* check for a not loaded section */
if( ! shdr->isLoadable())
/* check for a loaded, but contains instruction section.
* we'll look through the VariantIR for this section. */
if( shdr->isExecutable() )
/* if the type is NOBITS, then there's no actual data to look through */
if(shdr->isBSS() )
return;
// skip .dynsym section -- process-dynsym does this.
if(shdr->get_name()==".dynsym")
return;
cout<<"Checking section "<<shdr->get_name() <<endl;
// assume pointers need to be at least 4-byte aligned.
for(auto i=0u;i+arch_ptr_bytes()<=(size_t)shdr->get_size();i+=4)
// even on 64-bit, pointers might be stored as 32-bit, as a
// elf object has the 32-bit limitations.
// there's no real reason to look for 64-bit pointers
uintptr_t p=0;
if(arch_ptr_bytes()==4)
p=*(int*)&data[i];
else
p=*(VirtualOffset_t*)&data[i]; // 64 or 32-bit depending on sizeof uintptr_t, may need porting for cross platform analysis.
if(shdr->get_name()==".init_array")
prov=ibt_provenance_t::ibtp_initarray;
else if(shdr->get_name()==".fini_array")
prov=ibt_provenance_t::ibtp_finiarray;
else if(shdr->get_name()==".got.plt")
prov=ibt_provenance_t::ibtp_gotplt;
else if(shdr->get_name()==".got")
prov=ibt_provenance_t::ibtp_got;
else if(shdr->get_name()==".symtab")
prov=ibt_provenance_t::ibtp_symtab;
prov=ibt_provenance_t::ibtp_data;
else
prov=ibt_provenance_t::ibtp_rodata;
possible_target(p, i+shdr->get_address(), prov);
void handle_scoop_scanning(FileIR_t* firp)
{
// check for addresses in scoops in the text section.
{
// test if scoop was added by fill_in_cfg -- make this test better.
if(scoop->getName().find("data_in_text_")==string::npos) continue;

Jason Hiser
committed
// at the moment, FIC only creates 4-, 8-, and 16- bytes scoops
// change this code if FIC chagnes.

Jason Hiser
committed
{
// may be a 4-byter, which can't hold an address.
continue;
}

Jason Hiser
committed
{
// check to see if the scoop has an IBTA
const auto addr=*(uint64_t*)(scoop->getContents().c_str());
possible_target(addr, scoop->getStart()->getVirtualOffset(), ibt_provenance_t::ibtp_unknown);

Jason Hiser
committed
}
else
{
// we may see 16 indicating that a ldr q-word happened.
// this isn't likely an IBT, so we skip scanning it.

Jason Hiser
committed
}
}
}
set<Instruction_t*> find_in_function(string needle, Function_t *haystack)
{
regex_t preg;
set<Instruction_t*> found_instructions;
assert(0 == regcomp(&preg, needle.c_str(), REG_EXTENDED));
for (auto candidate : haystack->getInstructions())
auto disasm=DecodedInstruction_t::factory(candidate);
// check it's the requested type
if(regexec(&preg, disasm->getDisassembly().c_str(), 0, nullptr, 0) == 0)
{
found_instructions.insert(candidate);
}
}
regfree(&preg);
return found_instructions;
}

Jason Hiser
committed

Jason Hiser
committed
bool backup_until(const string &insn_type_regex_str,
Instruction_t *& prev,
Instruction_t* orig,
const string & stop_if_set="",
bool recursive=false,
uint32_t max_insns=10000u,
uint32_t max_recursions=5u)

Jason Hiser
committed
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
const auto find_or_build_regex=[&] (const string& s) -> regex_t&
{
// declare a freer for regexs so they go away when the program ends.
const auto regex_freer=[](regex_t* to_free) -> void
{
regfree(to_free);
delete to_free;
};
// keep the map safe from anyone but me using it.
using regex_unique_ptr_t=unique_ptr<regex_t, decltype(regex_freer)>;
static map<string, regex_unique_ptr_t > regexs_used;
if(s=="")
{
static regex_t empty;
return empty;
}
const auto it=regexs_used.find(s);
if(it==regexs_used.end())
{
// allocate a new regex ptr
regexs_used.insert(pair<string,regex_unique_ptr_t>(s,move(regex_unique_ptr_t(new regex_t, regex_freer))));
// and compile it.
auto ®ex_ptr=regexs_used.at(s);
const auto ret=regcomp(regex_ptr.get(), s.c_str(), REG_EXTENDED);
// error check
assert(ret==0);
}
return *regexs_used.at(s).get();
};

Jason Hiser
committed
// build regexs.
const auto &preg = find_or_build_regex(insn_type_regex_str);
const auto &stop_expression = find_or_build_regex(stop_if_set);

Jason Hiser
committed
prev=orig;

Jason Hiser
committed
while(preds[prev].size()==1 && max_insns > 0)

Jason Hiser
committed
// dec max for next loop
max_insns--;
// get I7's disassembly
const auto disasm=DecodedInstruction_t::factory(prev);
// check it's the requested type
if(regexec(&preg, disasm->getDisassembly().c_str(), 0, nullptr, 0) == 0)

Jason Hiser
committed
if(stop_if_set!="")

Jason Hiser
committed
{
for(const auto operand : disasm->getOperands())
{
if(operand->isWritten() && regexec(&stop_expression, operand->getString().c_str(), 0, nullptr, 0) == 0)
return false;
}

Jason Hiser
committed
}
// otherwise, try backing up again.
}

Jason Hiser
committed
if(recursive && max_insns > 0 && max_recursions > 0 )

Jason Hiser
committed
const auto myprev=prev;
// can't just use prev because recursive call will update it.

Jason Hiser
committed
const auto &mypreds=preds[myprev];
for(const auto pred : mypreds)

Jason Hiser
committed
prev=pred;// mark that we are here, in case we return true here.
const auto disasm=DecodedInstruction_t::factory(pred);
// check it's the requested type
if(regexec(&preg, disasm->getDisassembly().c_str(), 0, nullptr, 0) == 0)
return true;
if(stop_if_set!="")

Jason Hiser
committed
{
for(const auto operand : disasm->getOperands())
{
if(operand->isWritten() && regexec(&stop_expression, operand->getString().c_str(), 0, nullptr, 0) == 0)
return false;
}

Jason Hiser
committed
if(backup_until(insn_type_regex_str, prev, pred, stop_if_set, recursive, max_insns, max_recursions/mypreds.size()))

Jason Hiser
committed
return true;
// reset for next call
prev=myprev;
}
}
const DecodedInstruction_t &d10,
EXEIO::exeio* exeiop)
{
const auto prov=ibt_provenance_t::ibtp_switchtable_type1;
/*
* Check for hand-written assembly for divsi and udivsi that has this dispatch insn: addeq pc, pc, <reg> lsl #2
*/
const auto d = DecodedInstruction_t::factory(insn);
const auto is_addne = d->getMnemonic() == "addne" ;
if(is_addne)
{
const auto is_op0_pc = d->getOperand(0)->getString()=="pc";
const auto is_op1_pc = d->getOperand(1)->getString()=="pc";
if(is_op0_pc && is_op1_pc)
{
cout << "Found gcc addne pc,pc idiom" << endl;
for(auto i=1u;i<32u;i++)
{
const auto ibta = insn->getAddress()->getVirtualOffset() + 8 + i*12;
possible_target(ibta,0,prov);
}
}
}
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
void check_for_arm32_switch_type2(
FileIR_t *firp,
Instruction_t* i10,
const DecodedInstruction_t &d10,
EXEIO::exeio* exeiop)
{
#if 0
Looking for this pattern:
I9: cmp r2, #4
I10: ldrls pc, [pc, r2, lsl #2]
or this:
I8: ldr r3, [pc, #k]
I9: cmp r2, r3
I10: ldrls pc, [pc, r2, lsl #2]
#endif
const auto prov=ibt_provenance_t::ibtp_switchtable_type2;
// check that i10 is what we need
const auto i10_dis = d10.getDisassembly();
const auto is_i10_ldrls = i10_dis.find("ldrls pc, [pc")==0;
if(!is_i10_ldrls) return;
// this is sufficient to determine we have a switch dispatch.
// now try to figure out the table size.
auto jt_size = numeric_limits<uint32_t>::max();
// and we do that by looking for a cmp on the dispatch register.
// the dispatch register is the index register in the ldrls instruction.
// which we find via string extraction.
const auto i10_dis_15_3 = i10_dis.substr(15,3); // reg or reg,
const auto i10_index_reg = i10_dis_15_3[2] == ',' ? i10_dis_15_3.substr(0,2) : i10_dis_15_3;
// look for i9
auto i9=(Instruction_t*)nullptr;
if(!backup_until( string()+"cmp "+i10_index_reg+",", /* look for this pattern. */
i9, /* find i9 */
i10, /* before I10 */
"^"+i10_index_reg+"$" /* stop if i10_reg set */
))
{
return;
}
if(i9 != nullptr)
{
// decode i9
const auto d9 = DecodedInstruction_t::factory(i9);
const auto d9_op1 = d9->getOperand(1);
// look for a constant in the 2nd operand.
if( d9_op1->isConstant())
jt_size=d9_op1->getConstant();
else
{
// check if it's a register
// and look backwards for a load of the register from the .text seg
// TBD
}
}
// extract the jump table -- this is simple as the addresing mode in i10 says it's at is "pc+8".
const auto jt_addr = i10->getAddress()->getVirtualOffset() + 8u;
const auto jt_section = find_section(jt_addr,exeiop);
assert(jt_section);
const auto jt_secdata = jt_section->get_data();
const auto jt_secaddr = jt_section->get_address();
const auto jt_secendaddr = jt_secaddr + jt_section->get_size();
auto jt_entry_no=0u;
while(true)
{
// calculate some stuff about the jump table entry we're looking at
const auto jte_size = 4u;
const auto jte_offset = jt_entry_no * jte_size;
const auto jte_addr = jt_addr + jte_offset;
// stop if we've exceeded the section size
if(jte_addr + jte_size > jt_secendaddr) break;
// extract the table entry
const auto jte = * reinterpret_cast<const uint32_t*>(&jt_secdata[jte_addr - jt_secaddr]);
// mark the instruction at jte as an ibt
possible_target(jte, jte_addr, prov);
// check to see if the entry is valid. if not, exit.
const auto ibtarget = lookupInstruction(firp, jte);
if(ibtarget == nullptr) break;
cout << "Found ARM32 switch (ldrls -- type2)@0x" << hex << i10->getAddress()->getVirtualOffset()
<< " table_entry[" << dec << jt_entry_no << "]=" << hex << jte << "@0x " << jte_addr
<< " to " << ibtarget->getBaseID() << ":" << ibtarget->getDisassembly()
<< "@" << ibtarget->getAddress()->getVirtualOffset() << endl;
// add to i10
jmptables[i10].insert(ibtarget);
// stop if we've exceeded the number of table entries we found.
if(jt_entry_no+1 > jt_size) break;
jt_entry_no++;
}
// add a data scoop for the switch table.
cout << "Detected " << dec << jt_entry_no << "entries in this table. adding data scoop for table" << endl;
addSwitchTableScoop(firp, jt_entry_no + 1 , 4, jt_addr, exeiop, nullptr, 0, false);
// mark that we figured out all possible targets for this ib.
jmptables[i10].setAnalysisStatus(iasAnalysisComplete);
}
void check_for_arm32_switch_type3(
FileIR_t *firp,
Instruction_t* i10,
const DecodedInstruction_t &d10,
EXEIO::exeio* exeiop)
{
#if 0
Looking for this pattern:
I9: cmp r2, #4
I10: addls pc, [pc, r2, lsl #2]
or this:
I8: ldr r3, [pc, #k]
I9: cmp r2, r3
I10: addls pc, [pc, r2, lsl #2]
#endif
const auto prov=ibt_provenance_t::ibtp_switchtable_type3;
// check that i10 is what we need
const auto i10_dis = d10.getDisassembly();
const auto is_i10_ldrls = i10_dis.find("addls pc, pc")==0;
if(!is_i10_ldrls) return;
// this is sufficient to determine we have a switch dispatch.
// now try to figure out the table size.
auto jt_size = numeric_limits<uint32_t>::max();
// and we do that by looking for a cmp on the dispatch register.
// the dispatch register is the index register in the ldrls instruction.
// which we find via string extraction.
const auto i10_index_reg = d10.getOperand(2)->getString();
// look for i9
auto i9=(Instruction_t*)nullptr;
if(!backup_until( string()+"cmp "+i10_index_reg+",", /* look for this pattern. */
i9, /* find i9 */
i10, /* before I10 */
"^"+i10_index_reg+"$" /* stop if i10_reg set */
))
{
return;
}
if(i9 != nullptr)
{
// decode i9
const auto d9 = DecodedInstruction_t::factory(i9);
const auto d9_op1 = d9->getOperand(1);
// look for a constant in the 2nd operand.
if( d9_op1->isConstant())
jt_size=d9_op1->getConstant();
else
{
// check if it's a register
// and look backwards for a load of the register from the .text seg
// TBD
}
}
// extract the jump table -- this is simple as the addresing mode in i10 says it's at is "pc+8".
const auto jt_addr = i10->getAddress()->getVirtualOffset() + 8u;
const auto jt_entry_size = 4u;
auto jt_entry_no=0u;
while(true)
{
// check to see if the entry is valid. if not, exit.
const auto jte = jt_addr + jt_entry_no * jt_entry_size;
const auto ibtarget = lookupInstruction(firp, jte);
if(ibtarget == nullptr) break;
// check if it's an uncond branch
const auto ibt_dis = DecodedInstruction_t::factory(ibtarget);
if(ibt_dis->getMnemonic() != "b") break;
// mark the instruction at jte as an ibt
possible_target(jte, 0, prov);
cout << "Found ARM32 switch (addls -- type2)@0x" << hex << i10->getAddress()->getVirtualOffset()
<< " to " << ibtarget->getBaseID() << ":" << ibtarget->getDisassembly()
<< "@" << ibtarget->getAddress()->getVirtualOffset() << endl;
// add to i10
jmptables[i10].insert(ibtarget);
// stop if we've exceeded the number of table entries we found.
if(jt_entry_no+1 > jt_size) break;
jt_entry_no++;
}
// add a data scoop for the switch table.
cout << "Detected " << dec << jt_entry_no << "entries in this table. adding data scoop for table" << endl;
// mark that we figured out all possible targets for this ib.
jmptables[i10].setAnalysisStatus(iasAnalysisComplete);
}
FileIR_t *firp,
Instruction_t* i10,
const DecodedInstruction_t &d10,

Jason Hiser
committed
EXEIO::exeio* exeiop)
const auto prov=ibt_provenance_t::ibtp_switchtable_type1;
#if 0
Sample code for this branch type:

Jason Hiser
committed
; x2 gets the value of x0
; this probably is not normal or required, but we are not checking
; it anyhow. This is just to understand the example.

Jason Hiser
committed
i1: 0x4039c4: cmp table_index_reg, #0x3
i2: 0x4039c8: b.hi 0x4039d4 ; may also be a b.ls
// generate switch table base address
// this code may be hard to find if the compiler optimizes it
// outside the block with the rest of the dispatch code, and/or
// spills a register.
// thus, we allow for it not to be found, and instead us any "unk"
// we return true if we've found the entry to avoid looking at unks
// if we don't need to.
i5: 0x40449c: adrp table_page_reg, 0x415000 // table page

Jason Hiser
committed
i6: 0x4044a0: add table_base_reg, table_page_reg, #0x2b0 // table offset
// table=table_page+table_offset
//
// load from switch table

Jason Hiser
committed
i7: 0x4044a4: ldrh table_entry_reg, [table_base_reg,table_index_reg,uxtw #1]
or
i7: 0x4044a4: ldrb table_entry_reg, [table_base_reg,table_index_reg,uxtw ]
// calculate branch_addr+4+table[i]*4

Jason Hiser
committed
i8: 0x4044a8: adr branch_reg, 0x4044b4 // jump base addr
i9: 0x4044ac: add i10_reg, branch_reg, table_entry_reg, sxth #2
// actually take the branch
i10: 0x4044b0: br i10_reg
i11: 0x4044b4:
notes:
1) jump table entries are 2-bytes
2) jump table entries specify an offset from the byte after dispatch branch
3) jump table entries dont store the lower 2 bits of the offset, as they
have to be 0 due to instruction alignment
#endif
// sanity check the jump

Jason Hiser
committed
if(d10.getMnemonic() != "br") return;
const auto i10_reg=d10.getOperand(0)->getString();
// try to find I9
auto i9=(Instruction_t*)nullptr;

Jason Hiser
committed
/* search for externder=sxth or sxtb */
if(!backup_until( string()+"(add "+i10_reg+",.* sxth #2)|(add "+i10_reg+",.* sxtb #2)", /* look for this pattern. */
i9, /* find i9 */
i10, /* before I10 */

Jason Hiser
committed
{
return;
}
// Extract the I9 fields.
assert(i9);
const auto d9p = DecodedInstruction_t::factory(i9);
const auto &d9 = *d9p;
const auto offset_reg = d9.getOperand(1)->getString();
const auto table_entry_reg = d9.getOperand(2)->getString();
// try to find I8
auto i8=(Instruction_t*)nullptr;

Jason Hiser
committed
if(!backup_until(string()+"adr "+offset_reg+",", /* look for this pattern. */
i8, /* find i8 */
i9, /* before I9 */
"^"+offset_reg+"$" /* stop if offste_reg set */

Jason Hiser
committed
return;
// extract the I8 fields
assert(i8);
const auto d8p = DecodedInstruction_t::factory(i8);