Skip to content
Snippets Groups Projects
fill_in_indtargs.cpp 132 KiB
Newer Older
/*
 * Copyright (c) 2014 - Zephyr Software LLC
 *
 * This file may be used and modified for non-commercial purposes as long as
 * all copyright, permission, and nonwarranty notices are preserved.
 * Redistribution is prohibited without prior written consent from Zephyr
 * Software.
 *
 * Please contact the authors for restrictions applying to commercial use.
 *
 * THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
 * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 *
 * Author: Zephyr Software
 * e-mail: jwd@zephyr-software.com
 * URL   : http://www.zephyr-software.com/
 *
 */
jdh8d's avatar
jdh8d committed

#include <irdb-core>
#include <irdb-util>
jdh8d's avatar
jdh8d committed
#include <iostream>
jdh8d's avatar
 
jdh8d committed
#include <fstream>
#include <algorithm>
jdh8d's avatar
jdh8d committed
#include <stdlib.h>
#include <string.h>
#include <map>
#include <assert.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <regex.h>
jdh8d's avatar
jdh8d committed
#include <ctype.h>
#include <list>
jdh8d's avatar
jdh8d committed
#include <stdio.h>
jdh8d's avatar
 
jdh8d committed
#include <elf.h>
jdh8d's avatar
jdh8d committed
#include <exeio.h>
jdh8d's avatar
 
jdh8d committed
#include "fill_in_indtargs.hpp"
#include "libMEDSAnnotation.h"
jdh8d's avatar
jdh8d committed

using namespace IRDB_SDK;
using namespace std;
jdh8d's avatar
jdh8d committed
using namespace EXEIO;
jdh8d's avatar
 
jdh8d committed
using namespace MEDS_Annotation;
jdh8d's avatar
 
jdh8d committed
/*
 * defines 
 */
jdh8d's avatar
 
jdh8d committed

extern void read_ehframe(FileIR_t* firp, EXEIO::exeio* );

Jason Hiser's avatar
Jason Hiser committed
template<typename T>
static inline T cptrtoh(FileIR_t* firp, const uint8_t* cptr)
{
	const auto ptrsize = firp->getArchitectureBitWidth() / 8 ;
	const auto mt      = firp->getArchitecture()->getMachineType();

	switch(ptrsize)
	{
		case 4:
		{
			const auto raw_const = *reinterpret_cast<const uint64_t*>(cptr);
			switch(mt)
			{
				case admtI386:
				case admtArm32:
					return le32toh(raw_const);
				case admtMips32:
					return be32toh(raw_const);

				default:
					throw invalid_argument("Cannot detect machine type");
			}
			assert(0);
		}
		case 8:
		{
			const auto raw_const = *reinterpret_cast<const uint32_t*>(cptr);
			switch(mt)
			{
				case admtX86_64:
				case admtAarch64:
					return le64toh(raw_const);
				case admtMips64:
					return be64toh(raw_const);

				default:
					throw invalid_argument("Cannot detect machine type");
			}
			assert(0);
			break;
		}
		default:
		{
			throw invalid_argument("Cannot detect pointer size");
		}
	}


}

class PopulateIndTargs_t : public TransformStep_t
// record all full addresses and page-addresses found per function (or null for no function
using PerFuncAddrSet_t=set<VirtualOffset_t>;
map<Function_t*,PerFuncAddrSet_t> all_adrp_results;
map<Function_t*,PerFuncAddrSet_t> all_add_adrp_results;

// record all full addresses and page-addresses found that are spilled to the stack
using SpillPoint_t = pair<Function_t*, VirtualOffset_t>;
map<SpillPoint_t,PerFuncAddrSet_t> spilled_add_adrp_results;
map<SpillPoint_t,PerFuncAddrSet_t> spilled_adrps;

// record all full addresses found that are spilled to to a floating-point register (e.g., D10) 
using DregSpillPoint_t = pair<Function_t*, string>;
map<DregSpillPoint_t, PerFuncAddrSet_t> spilled_to_dreg;

map<string,PerFuncAddrSet_t> per_reg_add_adrp_results;
jdh8d's avatar
 
jdh8d committed
/* 
 * class variables 
jdh8d's avatar
 
jdh8d committed
 */
jdh8d's avatar
jdh8d committed


jdh8d's avatar
 
jdh8d committed
// the bounds of the executable sections in the pgm.
set< pair <VirtualOffset_t,VirtualOffset_t>  > bounds;
jdh8d's avatar
jdh8d committed

jdh8d's avatar
 
jdh8d committed
// the set of (possible) targets we've found.
map<VirtualOffset_t,ibt_provenance_t> targets;
jdh8d's avatar
 
jdh8d committed

// the set of ranges represented by the eh_frame section, could be empty for non-elf files.
set< pair< VirtualOffset_t, VirtualOffset_t> > ranges;
jdh8d's avatar
 
jdh8d committed
// a way to map an instruction to its set of (direct) predecessors. 
jdh8d's avatar
jdh8d committed
map< Instruction_t* , InstructionSet_t > preds;
an7s's avatar
an7s committed
// keep track of jmp tables
map< Instruction_t*, fii_icfs > jmptables;
jdh8d's avatar
 
jdh8d committed

// a map of virtual offset -> instruction for quick access.
map<VirtualOffset_t,Instruction_t*> lookupInstructionMap;
jdh8d's avatar
 
jdh8d committed

// the set of things that are partially unpinned already.
set<Instruction_t*> already_unpinned;

long total_unpins=0;
jdh8d's avatar
 
jdh8d committed

/*
 * Convert a reg id to a lower-case string
 */
string registerToSearchString(const RegisterID_t& reg)
{
	auto str=registerToString(reg);
	transform(ALLOF(str), begin(str), ::tolower);
	return str;

}
void range(VirtualOffset_t start, VirtualOffset_t end)
	pair<VirtualOffset_t,VirtualOffset_t> foo(start,end);
jdh8d's avatar
jdh8d committed
	ranges.insert(foo);
}

jdh8d's avatar
 
jdh8d committed

/*   
 * is_in_range - determine if an address is referenced by the eh_frame section 
 */
bool is_in_range(VirtualOffset_t p)
	for(auto  bound : ranges)
		auto start=bound.first;
		auto end=bound.second;
jdh8d's avatar
jdh8d committed
		if(start<=p && p<=end)
			return true;
	}
	return false;
}

jdh8d's avatar
 
jdh8d committed
/*
 * process_range -  do nothing now -- fix calls deals with this.
jdh8d's avatar
 
jdh8d committed
 */
void process_ranges(FileIR_t* firp)
jdh8d's avatar
 
jdh8d committed

bool possible_target(VirtualOffset_t p, VirtualOffset_t from_addr, ibt_provenance_t prov)
jdh8d's avatar
 
jdh8d committed
	if(is_possible_target(p,from_addr))
		if(getenv("IB_VERBOSE")!=nullptr)
jdh8d's avatar
 
jdh8d committed
			if(from_addr!=0)
				cout<<"Found IB target address 0x"<<std::hex<<p<<" at 0x"<<from_addr<<std::dec<<", prov="<<prov<<endl;
				cout<<"Found IB target address 0x"<<std::hex<<p<<" from unknown location, prov="<<prov<<endl;
jdh8d's avatar
 
jdh8d committed
		targets[p].add(prov);
bool is_possible_target(VirtualOffset_t p, VirtualOffset_t addr)
jdh8d's avatar
jdh8d committed
{
	for(auto bound : bounds)
		auto start=bound.first;
		auto end=bound.second;
		if(start<=p && p<=end)
		{
			return true;
		}
        }
	return false;

}
jdh8d's avatar
jdh8d committed

EXEIO::section*  find_section(VirtualOffset_t addr, EXEIO::exeio *exeiop)
         for ( int i = 0; i < exeiop->sections.size(); ++i )
                 EXEIO::section* pSec = exeiop->sections[i];
                 assert(pSec);
                 if(pSec->get_address() > addr)
                         continue;
                 if(addr >= pSec->get_address()+pSec->get_size())
                         continue;

                 return pSec;
jdh8d's avatar
jdh8d committed
	}
jdh8d's avatar
jdh8d committed
}

void handle_argument(
		     const DecodedInstruction_t& decoded_insn, 
		     const DecodedOperand_t &arg, 
		     Instruction_t* insn, 
		     ibt_provenance_t::provtype_t pt = ibt_provenance_t::ibtp_text
		    )
jdh8d's avatar
jdh8d committed
{
	if(arg.isMemory() && decoded_insn.getMnemonic()=="lea") 
jdh8d's avatar
 
jdh8d committed
	{
		if(arg.isPcrel()) 
jdh8d's avatar
 
jdh8d committed
		{
			assert(insn);
			assert(insn->getAddress());
			possible_target(arg.getMemoryDisplacement() + insn->getAddress()->getVirtualOffset() +
				insn->getDataBits().length(), insn->getAddress()->getVirtualOffset(), pt);
jdh8d's avatar
 
jdh8d committed
		}
		else
			possible_target(arg.getMemoryDisplacement(), insn->getAddress()->getVirtualOffset(), pt);
jdh8d's avatar
 
jdh8d committed
	}
jdh8d's avatar
jdh8d committed
}


void lookupInstruction_init(FileIR_t *firp)
an7s's avatar
an7s committed
{
	for(auto insn : firp->getInstructions())
an7s's avatar
an7s committed
        {
		const auto addr=insn->getAddress()->getVirtualOffset();
an7s's avatar
an7s committed
	}
an7s's avatar
an7s committed

Instruction_t *lookupInstruction(FileIR_t *firp, VirtualOffset_t virtual_offset)
{
	if(lookupInstructionMap.find(virtual_offset)!=lookupInstructionMap.end())
		return lookupInstructionMap[virtual_offset];
an7s's avatar
an7s committed
}

void mark_targets(FileIR_t *firp)
jdh8d's avatar
jdh8d committed
{
        for(auto insn : firp->getInstructions())
		const auto addr=insn->getAddress()->getVirtualOffset();
jdh8d's avatar
jdh8d committed

		/* lookup in the list of targets */
		if(targets.find(addr)!=targets.end())
an7s's avatar
an7s committed
		{
			const auto isret    = targets[addr].areOnlyTheseSet(ibt_provenance_t::ibtp_ret);
			const auto isprintf = targets[addr].areOnlyTheseSet(ibt_provenance_t::ibtp_stars_data|ibt_provenance_t::ibtp_texttoprintf) && 
			                      targets[addr].isFullySet     (ibt_provenance_t::ibtp_stars_data|ibt_provenance_t::ibtp_texttoprintf);
				if(getenv("IB_VERBOSE")!=nullptr)
					cout<<"Skipping pin for ret at "<<hex<<addr<<endl;
			}
			else if(isprintf)
			{
				if(getenv("IB_VERBOSE")!=nullptr)
					cout<<"Skipping pin for text to printf at "<<hex<<addr<<endl;
			}
Jason Hiser's avatar
Jason Hiser committed
			else if(firp->findScoop(addr))
			{
				if(getenv("IB_VERBOSE")!=nullptr)
					cout<<"Skipping pin data_in_text "<<hex<<addr<<endl;
			}
				if(getenv("IB_VERBOSE")!=nullptr)
					cout<<"Setting pin at "<<hex<<addr<<endl;
				auto newaddr=firp->addNewAddress(insn->getAddress()->getFileID(), insn->getAddress()->getVirtualOffset());
				insn->setIndirectBranchTargetAddress(newaddr);
an7s's avatar
an7s committed
		}
jdh8d's avatar
jdh8d committed
	}
}
an7s's avatar
an7s committed


bool CallToPrintfFollows(FileIR_t *firp, Instruction_t* insn, const string& arg_str)
{
	for(auto ptr=insn->getFallthrough(); ptr!=nullptr; ptr=ptr->getFallthrough())
		auto d=DecodedInstruction_t ::factory(ptr);
		if(d->getMnemonic() == string("call"))
			if(ptr->getTarget()==nullptr)
				return false;

			// check the target has a function 
			if(ptr->getTarget()->getFunction()==nullptr)
				return false;

			// check if we're calling printf.
			if(ptr->getTarget()->getFunction()->getName().find("printf")==string::npos)
				return false;

			// found it
			return true;
		}

		// found reference to argstring, assume it's a write and exit
		if(d->getDisassembly().find(arg_str)!= string::npos)
			return false;
	}

	return false;
}

bool texttoprintf(FileIR_t *firp,Instruction_t* insn)
{
	string dst="";
	// note that dst is an output parameter of IsParameterWrite and an input parameter to CallFollows
	if(isParameterWrite(firp,insn, dst) && CallToPrintfFollows(firp,insn,dst))
void get_instruction_targets(FileIR_t *firp, EXEIO::exeio* exeiop, const set<VirtualOffset_t>& thunk_bases)
jdh8d's avatar
jdh8d committed
{
        for(auto insn : firp->getInstructions())
jdh8d's avatar
jdh8d committed
        {
		auto disasm=DecodedInstruction_t::factory(insn);
                VirtualOffset_t instr_len = disasm->length(); // Disassemble(insn,disasm);
jdh8d's avatar
jdh8d committed

                assert(instr_len==insn->getDataBits().size());
jdh8d's avatar
jdh8d committed

		const auto mt=firp->getArchitecture()->getMachineType();
jdh8d's avatar
 
jdh8d committed

		if(mt==admtX86_64 || mt==admtI386)
		{
			// work for both 32- and 64-bit.
			check_for_PIC_switch_table32_type2(firp, insn, *disasm, exeiop, thunk_bases);
			check_for_PIC_switch_table32_type3(firp, insn, *disasm, exeiop, thunk_bases);
			if (firp->getArchitectureBitWidth()==32)
				check_for_PIC_switch_table32(firp, insn, *disasm, exeiop, thunk_bases);
			else if (firp->getArchitectureBitWidth()==64)
				check_for_PIC_switch_table64(firp, insn, *disasm, exeiop);
			check_for_nonPIC_switch_table(firp, insn, *disasm, exeiop);
			check_for_nonPIC_switch_table_pattern2(firp, insn, *disasm, exeiop);
		}
		else if(mt==admtAarch64)
		{
Jason Hiser's avatar
Jason Hiser committed
			check_for_arm64_switch_type1(firp,insn,  *disasm, exeiop);
		}
		else if(mt==admtArm32)
		{
			check_for_arm32_switch_type1(firp,insn,  *disasm, exeiop);
Jason Hiser's avatar
Jason Hiser committed
			check_for_arm32_switch_type2(firp,insn,  *disasm, exeiop);
			check_for_arm32_switch_type3(firp,insn,  *disasm, exeiop);
Jason Hiser's avatar
Jason Hiser committed
		else if(mt==admtMips32)
		{
Jason Hiser's avatar
Jason Hiser committed
			/* no reason to look for pc-rel constants in mips */
			if(firp->getArchitecture()->getFileType() == adftELFSO)
				continue;;
Jason Hiser's avatar
Jason Hiser committed
		}
		else
			throw invalid_argument("Cannot determine machine type");
jdh8d's avatar
jdh8d committed
		/* other branches can't indicate an indirect branch target */
		if(disasm->isBranch()) // disasm.Instruction.BranchType)
jdh8d's avatar
jdh8d committed
			continue;

		ibt_provenance_t::provtype_t prov=0;
			prov=ibt_provenance_t::ibtp_text;
			cout<<"TextToPrintf analysis of '"<<disasm->getDisassembly()<<"' successful at " <<hex<<insn->getAddress()->getVirtualOffset()<<endl;
			prov=ibt_provenance_t::ibtp_texttoprintf;
		/* otherwise, any immediate is a possible branch target */
		for(const auto& op: disasm->getOperands())
			if(op->isConstant())
				possible_target(op->getConstant(), 0, prov);
jdh8d's avatar
jdh8d committed

		for(auto i=0;i<4;i++)
		{
			if(disasm->hasOperand(i))
				const auto op=disasm->getOperand(i);
				handle_argument(*disasm, *op, insn, prov);
jdh8d's avatar
jdh8d committed
	}
}

jdh8d's avatar
 
jdh8d committed
void get_executable_bounds(FileIR_t *firp, const section* shdr)
jdh8d's avatar
jdh8d committed
{

	/* not a loaded section */
jdh8d's avatar
jdh8d committed
	if( !shdr->isLoadable()) 
jdh8d's avatar
jdh8d committed
		return;

	/* loaded, and contains instruction, record the bounds */
jdh8d's avatar
jdh8d committed
	if( !shdr->isExecutable() )
jdh8d's avatar
jdh8d committed
		return;

	VirtualOffset_t first=shdr->get_address();
	VirtualOffset_t second=shdr->get_address()+shdr->get_size();
jdh8d's avatar
jdh8d committed

	bounds.insert(pair<VirtualOffset_t,VirtualOffset_t>(first,second));
jdh8d's avatar
 
jdh8d committed
void infer_targets(FileIR_t *firp, section* shdr)
jdh8d's avatar
jdh8d committed
{
Jason Hiser's avatar
Jason Hiser committed
	/* check for a not loaded section */
	if( ! shdr->isLoadable()) 
jdh8d's avatar
jdh8d committed
		return;

Jason Hiser's avatar
Jason Hiser committed
	/* check for a loaded, but contains instruction section.  
	 * we'll look through the VariantIR for this section. */
	if( shdr->isExecutable() ) 
jdh8d's avatar
jdh8d committed
		return;

	/* if the type is NOBITS, then there's no actual data to look through */
Jason Hiser's avatar
Jason Hiser committed
	if(shdr->isBSS() ) 
		return;
	// skip .dynsym section -- process-dynsym does this.
	if(shdr->get_name()==".dynsym")

	cout<<"Checking section "<<shdr->get_name() <<endl;

jdh8d's avatar
 
jdh8d committed
	const char* data=shdr->get_data() ; // C(char*)malloc(shdr->sh_size);
jdh8d's avatar
jdh8d committed

jdh8d's avatar
 
jdh8d committed
	assert(arch_ptr_bytes()==4 || arch_ptr_bytes()==8);
Jason Hiser's avatar
Jason Hiser committed
	// assume pointers need to be at least 4-byte aligned.
	for(auto i=0u;i+arch_ptr_bytes()<=(size_t)shdr->get_size();i+=4)
jdh8d's avatar
jdh8d committed
	{
		// even on 64-bit, pointers might be stored as 32-bit, as a 
		// elf object has the 32-bit limitations.
		// there's no real reason to look for 64-bit pointers 
		const auto ptr_val = uint64_t(
		                (arch_ptr_bytes()==4) ?  cptrtoh<uint32_t>(firp, reinterpret_cast<const uint8_t*>(&data[i])) :
		                (arch_ptr_bytes()==8) ?  cptrtoh<uint64_t>(firp, reinterpret_cast<const uint8_t*>(&data[i])) :
		                throw invalid_argument("Cannot map architecture size to bit width")
                	       );

		const auto ptr_addr = i+shdr->get_address();

		const auto ptr_prov = 
			(shdr->get_name()==".init_array") ? ibt_provenance_t::ibtp_initarray :
			(shdr->get_name()==".fini_array") ? ibt_provenance_t::ibtp_finiarray :
			(shdr->get_name()==".got.plt") ?    ibt_provenance_t::ibtp_gotplt    :
			(shdr->get_name()==".got") ?        ibt_provenance_t::ibtp_got       :
			(shdr->get_name()==".symtab") ?     ibt_provenance_t::ibtp_symtab    :
			(shdr->isWriteable()) ?             ibt_provenance_t::ibtp_data      :
			ibt_provenance_t::ibtp_rodata;

		possible_target(ptr_val, ptr_addr, ptr_prov);
void handle_scoop_scanning(FileIR_t* firp)
{
	// check for addresses in scoops in the text section. 
	for(auto scoop : firp->getDataScoops())
		if(scoop->getName() == ".ctor" || scoop->getName() == ".dtor" )
		{
			const auto &scoop_contents = scoop->getContents();
Jason Hiser's avatar
Jason Hiser committed
			const auto  ptrsize = firp->getArchitectureBitWidth() / 8 ;
			for(auto i = 0u; i + ptrsize < scoop_contents.size(); i += ptrsize)
			{
Jason Hiser's avatar
Jason Hiser committed
				const auto ptr = cptrtoh<uint64_t>(firp, reinterpret_cast<const uint8_t*>(scoop_contents.c_str() + i));
				possible_target(ptr, scoop->getStart()->getVirtualOffset() + i, ibt_provenance_t::ibtp_data);
			}

		}

		// test if scoop was added by fill_in_cfg -- make this test better.
		if(scoop->getName().find("data_in_text_")==string::npos) continue;
		// at the moment, FIC only creates 4-, 8-, and 16- bytes scoops
		// change this code if FIC chagnes.
		if(scoop->getSize() == 4 ) 
		if(scoop->getSize() == 8 )
			const auto addr=*(uint64_t*)(scoop->getContents().c_str());
			possible_target(addr, scoop->getStart()->getVirtualOffset(), ibt_provenance_t::ibtp_unknown);
		}
		else
		{
			// we may see 16 indicating that a ldr q-word happened.
			// this isn't likely an IBT, so we skip scanning it.
			assert(scoop->getSize() == 16 );	 
jdh8d's avatar
jdh8d committed

void print_targets()
{
	int j=1;
	for(auto  p : targets )
jdh8d's avatar
jdh8d committed
	{
		const auto target=p.first;
jdh8d's avatar
jdh8d committed
	
		cout<<hex<<target;
jdh8d's avatar
jdh8d committed
		if(j%10 == 0)
			cout<<endl; 
		else
			cout<<", ";
jdh8d's avatar
jdh8d committed
	}

	cout<<endl;
}

set<Instruction_t*> find_in_function(string needle, Function_t *haystack)
{
	regex_t preg;
	set<Instruction_t*> found_instructions;

	assert(0 == regcomp(&preg, needle.c_str(), REG_EXTENDED));

	for (auto candidate :  haystack->getInstructions())
		auto disasm=DecodedInstruction_t::factory(candidate);

		// check it's the requested type
		if(regexec(&preg, disasm->getDisassembly().c_str(), 0, nullptr, 0) == 0)
		{
			found_instructions.insert(candidate);
		}
	}
	regfree(&preg);
	return found_instructions;
}

Jason Hiser's avatar
Jason Hiser committed
void check_for_arm32_switch_type1(
		FileIR_t *firp, 
Jason Hiser's avatar
Jason Hiser committed
		Instruction_t* insn, 
Jason Hiser's avatar
Jason Hiser committed
		const DecodedInstruction_t &d10, 
		EXEIO::exeio* exeiop)
{
Jason Hiser's avatar
Jason Hiser committed
	const auto prov=ibt_provenance_t::ibtp_switchtable_type1;

	/*
	 * Check for hand-written assembly for divsi and udivsi that has this dispatch insn: addeq pc, pc, <reg> lsl #2
	 */
	const auto d        = DecodedInstruction_t::factory(insn);
	const auto is_addne = d->getMnemonic() == "addne" ;
	if(is_addne) 
	{
		const auto is_op0_pc = d->getOperand(0)->getString()=="pc";
		const auto is_op1_pc = d->getOperand(1)->getString()=="pc";
		if(is_op0_pc && is_op1_pc)
		{
			cout << "Found gcc addne pc,pc idiom" << endl;
			for(auto i=1u;i<32u;i++)
			{
				const auto ibta = insn->getAddress()->getVirtualOffset() + 8 + i*12;
				possible_target(ibta,0,prov);
			}
		}
	}
Jason Hiser's avatar
Jason Hiser committed
	return;
}
Jason Hiser's avatar
Jason Hiser committed
void check_for_arm32_switch_type2(
		FileIR_t *firp, 
		Instruction_t* i10, 
		const DecodedInstruction_t &d10, 
		EXEIO::exeio* exeiop)
{
#if 0

Looking for this pattern:

I9:	cmp	r2, #4
I10:	ldrls	pc, [pc, r2, lsl #2]

or this:

I8:	ldr	r3, [pc, #k]
I9:	cmp	r2, r3
I10:	ldrls	pc, [pc, r2, lsl #2]

#endif

	const auto prov=ibt_provenance_t::ibtp_switchtable_type2;

	// check that i10 is what we need
	const auto i10_dis       = d10.getDisassembly();
	const auto is_i10_ldrls  = i10_dis.find("ldrls pc, [pc")==0;
	if(!is_i10_ldrls) return;

	// this is sufficient to determine we have a switch dispatch.
	// now try to figure out the table size.
	auto jt_size = numeric_limits<uint32_t>::max();

	// and we do that by looking  for a cmp on the dispatch register.
	// the dispatch register is the index register in the ldrls instruction.
	// which we find via string extraction.
	const auto i10_dis_15_3  = i10_dis.substr(15,3);  // reg or reg,
	const auto i10_index_reg = i10_dis_15_3[2] == ',' ? i10_dis_15_3.substr(0,2) : i10_dis_15_3;

	// look for i9
	auto i9=(Instruction_t*)nullptr;
	if(!backup_until( string()+"cmp "+i10_index_reg+",", /* look for this pattern. */
				i9,                          /* find i9 */
				i10,                         /* before I10 */
				"^"+i10_index_reg+"$"        /* stop if i10_reg set */
				))
	{
		return; 
	}


	if(i9 != nullptr)
	{
		// decode i9
		const auto d9     = DecodedInstruction_t::factory(i9);
		const auto d9_op1 = d9->getOperand(1);

		// look for a constant in the 2nd operand.
		if( d9_op1->isConstant())
			jt_size=d9_op1->getConstant();
		else
		{
			// check if it's a register 
			// and look backwards for a load of the register from the .text seg
			// TBD
		}
	}

	// extract the jump table -- this is simple as the addresing mode in i10 says it's at is "pc+8".
	const auto jt_addr    = i10->getAddress()->getVirtualOffset() + 8u;
	const auto jt_section = find_section(jt_addr,exeiop);
	assert(jt_section);
	const auto jt_secdata = jt_section->get_data();
	const auto jt_secaddr = jt_section->get_address();
	const auto jt_secendaddr = jt_secaddr + jt_section->get_size();

	auto jt_entry_no=0u;
	while(true)
	{
		// calculate some stuff about the jump table entry we're looking at
		const auto jte_size   = 4u;
		const auto jte_offset = jt_entry_no * jte_size;  
		const auto jte_addr   = jt_addr + jte_offset;

		// stop if we've exceeded the section size
		if(jte_addr + jte_size > jt_secendaddr)  break;

		// extract the table entry
		const auto jte = * reinterpret_cast<const uint32_t*>(&jt_secdata[jte_addr - jt_secaddr]);

		// mark the instruction at jte as an ibt
		possible_target(jte, jte_addr, prov);

		// check to see if the entry is valid.  if not, exit.
		const auto ibtarget = lookupInstruction(firp, jte);
		if(ibtarget == nullptr) break;

		cout << "Found ARM32 switch (ldrls -- type2)@0x" << hex << i10->getAddress()->getVirtualOffset()
		     << " table_entry[" << dec << jt_entry_no << "]=" << hex << jte << "@0x " << jte_addr
		     << " to " << ibtarget->getBaseID() << ":" << ibtarget->getDisassembly() 
		     << "@" << ibtarget->getAddress()->getVirtualOffset() << endl;

		// add to i10
		jmptables[i10].insert(ibtarget);

		// stop if we've exceeded the number of table entries we found.
		if(jt_entry_no+1 > jt_size) break;

		jt_entry_no++;

	}

	// add a data scoop for the switch table.
	cout << "Detected " << dec << jt_entry_no << "entries in this table.  adding data scoop for table" << endl;
	addSwitchTableScoop(firp, jt_entry_no + 1 , 4, jt_addr, exeiop, nullptr, 0, false);

	// mark that we figured out all possible targets for this ib.
	jmptables[i10].setAnalysisStatus(iasAnalysisComplete);
}

void check_for_arm32_switch_type3(
		FileIR_t *firp, 
		Instruction_t* i10, 
		const DecodedInstruction_t &d10, 
		EXEIO::exeio* exeiop)
{
#if 0

Looking for this pattern:

I9:	cmp	r2, #4
I10:	addls	pc, [pc, r2, lsl #2]

or this:

I8:	ldr	r3, [pc, #k]
I9:	cmp	r2, r3
I10:	addls	pc, [pc, r2, lsl #2]

#endif

	const auto prov=ibt_provenance_t::ibtp_switchtable_type3;

	// check that i10 is what we need
	const auto i10_dis       = d10.getDisassembly();
	const auto is_i10_ldrls  = i10_dis.find("addls pc, pc")==0;
	if(!is_i10_ldrls) return;

	// this is sufficient to determine we have a switch dispatch.
	// now try to figure out the table size.
	auto jt_size = numeric_limits<uint32_t>::max();

	// and we do that by looking  for a cmp on the dispatch register.
	// the dispatch register is the index register in the ldrls instruction.
	// which we find via string extraction.
	const auto i10_index_reg = d10.getOperand(2)->getString();

	// look for i9
	auto i9=(Instruction_t*)nullptr;
	if(!backup_until( string()+"cmp "+i10_index_reg+",", /* look for this pattern. */
				i9,                          /* find i9 */
				i10,                         /* before I10 */
				"^"+i10_index_reg+"$"        /* stop if i10_reg set */
				))
	{
		return; 
	}


	if(i9 != nullptr)
	{
		// decode i9
		const auto d9     = DecodedInstruction_t::factory(i9);
		const auto d9_op1 = d9->getOperand(1);

		// look for a constant in the 2nd operand.
		if( d9_op1->isConstant())
			jt_size=d9_op1->getConstant();
		else
		{
			// check if it's a register 
			// and look backwards for a load of the register from the .text seg
			// TBD
		}
	}

	// extract the jump table -- this is simple as the addresing mode in i10 says it's at is "pc+8".
	const auto jt_addr       = i10->getAddress()->getVirtualOffset() + 8u;
	const auto jt_entry_size = 4u;

	auto jt_entry_no=0u;
	while(true)
	{
		// check to see if the entry is valid.  if not, exit.
		const auto jte = jt_addr + jt_entry_no * jt_entry_size;
		const auto ibtarget = lookupInstruction(firp, jte);
		if(ibtarget == nullptr) break;

		// check if it's an uncond branch
		const auto ibt_dis = DecodedInstruction_t::factory(ibtarget);
		if(ibt_dis->getMnemonic() != "b") break;

		// mark the instruction at jte as an ibt
		possible_target(jte, 0, prov);

		cout << "Found ARM32 switch (addls -- type2)@0x" << hex << i10->getAddress()->getVirtualOffset()
		     << " to " << ibtarget->getBaseID() << ":" << ibtarget->getDisassembly() 
		     << "@" << ibtarget->getAddress()->getVirtualOffset() << endl;

		// add to i10
		jmptables[i10].insert(ibtarget);

		// stop if we've exceeded the number of table entries we found.
		if(jt_entry_no+1 > jt_size) break;

		jt_entry_no++;

	}

	// add a data scoop for the switch table.
	cout << "Detected " << dec << jt_entry_no << "entries in this table.  adding data scoop for table" << endl;

	// mark that we figured out all possible targets for this ib.
	jmptables[i10].setAnalysisStatus(iasAnalysisComplete);
}
Jason Hiser's avatar
Jason Hiser committed
void check_for_arm64_switch_type1(
		FileIR_t *firp, 
		Instruction_t* i10, 
		const DecodedInstruction_t &d10, 
Jason Hiser's avatar
Jason Hiser committed
	const auto prov=ibt_provenance_t::ibtp_switchtable_type1;

#if 0
Sample code for this branch type:

       ; x2 gets the value of x0
       ; this probably is not normal or required, but we are not checking 
       ; it anyhow.  This is just to understand the example.
i1:    0x4039c4:    cmp     table_index_reg, #0x3
i2:    0x4039c8:    b.hi    0x4039d4	 ; may also be a b.ls

       // generate switch table base address
       // this code may be hard to find if the compiler optimizes it
       // outside the block with the rest of the dispatch code, and/or
       // spills a register.
       // thus, we allow for it not to be found, and instead us any "unk"
       // we return true if we've found the entry to avoid looking at unks
       // if we don't need to.
i5:    0x40449c:    adrp    table_page_reg, 0x415000          // table page 
i6:    0x4044a0:    add     table_base_reg, table_page_reg, #0x2b0 // table offset 
       // table=table_page+table_offset
       //
       // load from switch table
i7:    0x4044a4:    ldrh    table_entry_reg, [table_base_reg,table_index_reg,uxtw #1]
or
i7:    0x4044a4:    ldrb    table_entry_reg, [table_base_reg,table_index_reg,uxtw ]

       // calculate branch_addr+4+table[i]*4
i8:    0x4044a8:    adr     branch_reg, 0x4044b4 // jump base addr
i9:    0x4044ac:    add     i10_reg, branch_reg, table_entry_reg, sxth #2
       // actually take the branch
i10:   0x4044b0:    br      i10_reg
i11:   0x4044b4:    


notes:

1) jump table entries are 2-bytes
2) jump table entries specify an offset from the byte after dispatch branch
3) jump table entries dont store the lower 2 bits of the offset, as they 
   have to be 0 due to instruction alignment

	
#endif
	// sanity check the jump 
	const auto i10_reg=d10.getOperand(0)->getString();

	// try to find I9
	auto i9=(Instruction_t*)nullptr;
	/* search for externder=sxth or sxtb */
	if(!backup_until( string()+"(add "+i10_reg+",.* sxth #2)|(add "+i10_reg+",.* sxtb #2)", /* look for this pattern. */
				i9,                            /* find i9 */
				i10,                           /* before I10 */
				"^"+i10_reg+"$"                /* stop if i10_reg set */

	// Extract the I9 fields.
	assert(i9);
	const auto d9p             = DecodedInstruction_t::factory(i9);
	const auto &d9             = *d9p;
	const auto offset_reg      = d9.getOperand(1)->getString();
	const auto table_entry_reg = d9.getOperand(2)->getString();


	// try to find I8
	auto i8=(Instruction_t*)nullptr;
	if(!backup_until(string()+"adr "+offset_reg+",", /* look for this pattern. */
				i8,                      /* find i8 */
				i9,                      /* before I9 */
				"^"+offset_reg+"$"       /* stop if offste_reg set */


	// extract the I8 fields
	assert(i8);
	const auto d8p            = DecodedInstruction_t::factory(i8);
	const auto &d8            = *d8p;
	const auto jump_base_addr = d8.getOperand(1)->getConstant();

	// try to find I7
	auto i7=(Instruction_t*)nullptr;
	if(!backup_until(string()+ "(ldrh "+table_entry_reg+",)|(ldrb "+table_entry_reg+",)", /* look for this pattern. */
				i7,                                                           /* find i7 */
				i9,                                                           /* before I9 */
				"^"+table_entry_reg+"$"                                       /* stop if index_reg set */


	// extract the I7 fields
	assert(i7);
	const auto d7p              = DecodedInstruction_t::factory(i7);
	const auto &d7              = *d7p;
	const auto memory_op_string = d7.getOperand(1)->getString();
	const auto plus_pos         = memory_op_string.find(" +");
	const auto table_base_reg   = memory_op_string.substr(0,plus_pos);
	const auto table_index_reg  = memory_op_string.substr(plus_pos+3);
	const auto table_entry_size = 
		d7.getMnemonic()=="ldrb" ? 1 :
		d7.getMnemonic()=="ldrh" ? 2 :
		throw invalid_argument("Unable to detected switch table entry size for ARM64");

	// now we try to find the table base in I5 and I6
	// but we may fail due to compiler opts.  Prepare for such failures
	// by creating a set of possible table bases.
	// If we find i5/i6 or a reload of a spilled table address,
	// we will refine our guess.
	auto all_table_bases= per_reg_add_adrp_results[table_base_reg];

	// try to find I6
	auto i6=(Instruction_t*)nullptr;
	if(backup_until(string()+"add "+table_base_reg+",",  /* look for this pattern. */
	                i6,                                  /* find i6 */
	                i7,                                  /* before I7 */
	                "^"+table_base_reg+"$",              /* stop if table_base_reg set */
	                true,			             /* look hard -- recursely examine up to 10k instructions and 500 blocks */
	                10000,
	                500
			))
	{


		// extract the I6 fields