Skip to content
Snippets Groups Projects
fill_in_cfg.cpp 33.5 KiB
Newer Older
/*
 * Copyright (c) 2014 - Zephyr Software LLC
 *
 * This file may be used and modified for non-commercial purposes as long as
 * all copyright, permission, and nonwarranty notices are preserved.
 * Redistribution is prohibited without prior written consent from Zephyr
 * Software.
 *
 * Please contact the authors for restrictions applying to commercial use.
 *
 * THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
 * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 *
 * Author: Zephyr Software
 * e-mail: jwd@zephyr-software.com
 * URL   : http://www.zephyr-software.com/
 *
 */

jdh8d's avatar
 
jdh8d committed
#include <fstream>
#include <string.h>
#include <assert.h>
jdh8d's avatar
jdh8d committed
#include <sys/mman.h>
#include <ctype.h>
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#include <pe_bliss.h>
#include <pe_structures.h>
#pragma GCC diagnostic pop


using namespace IRDB_SDK;
using namespace PopCFG;

template < typename T > 
static inline std::string to_hex_string( const T& n )
{
        std::ostringstream stm ;
        stm << std::hex<< "0x"<< n ;
        return stm.str() ;
}

void PopulateCFG::populate_instruction_map
		InstructionMap_t &insnMap,
		FileIR_t *firp
jdh8d's avatar
jdh8d committed
	/* start from scratch each time */
	insnMap.clear();

	/* for each instruction in the IR */
	for(auto insn : firp->getInstructions())
		const auto fileID= insn->getAddress()->getFileID();
		const auto vo    = insn->getAddress()->getVirtualOffset();
		const auto p     = pair<DatabaseID_t,VirtualOffset_t>(fileID,vo);

		assert(insnMap[p]==NULL);
		insnMap[p]=insn;
	}

}

void PopulateCFG::set_fallthrough
	InstructionMap_t &insnMap,
	DecodedInstruction_t *disasm, 
	Instruction_t *insn, 
	FileIR_t *firp
	if(insn->getFallthrough())
	const auto is_mips = firp->getArchitecture()->getMachineType() == admtMips32;	 
	// always set fallthrough for mips
	// other platforms can end fallthroughs ofr uncond branches and returns
	if(!is_mips) 
		// check for branches with targets 
		if(
			(disasm->isUnconditionalBranch() ) ||	// it is a unconditional branch 
			(disasm->isReturn())			// or a return
		  )
		{
			// this is a branch with no fallthrough instruction
			return;
		}
	}

	/* get the address of the next instrution */
	
	auto virtual_offset=insn->getAddress()->getVirtualOffset() + insn->getDataBits().size();

	/* create a pair of offset/file */
	auto p=pair<DatabaseID_t,VirtualOffset_t>(insn->getAddress()->getFileID(),virtual_offset);
	
	/* lookup the target insn from the map */

	/* sanity, note we may see odd control transfers to 0x0 */
	if(fallthrough_insn==NULL &&   virtual_offset!=0)
	{
		cout<<"Cannot set fallthrough for "<<std::hex<<insn->getAddress()->getVirtualOffset();
an7s's avatar
an7s committed
		cout<< " : "<<insn->getDisassembly()<<endl;
		bad_fallthrough_count++;
	}

	/* set the target for this insn */
	if(fallthrough_insn!=0)
Anh Nguyen-Tuong's avatar
Anh Nguyen-Tuong committed
	{
		fallthroughs_set++;
		insn->setFallthrough(fallthrough_insn);
Anh Nguyen-Tuong's avatar
Anh Nguyen-Tuong committed
	}
jdh8d's avatar
jdh8d committed
	else
		missed_instructions.insert(pair<DatabaseID_t,VirtualOffset_t>(insn->getAddress()->getFileID(),virtual_offset));
void PopulateCFG::set_delay_slots
	(
	InstructionMap_t &insnMap,
	DecodedInstruction_t *disasm, 
	Instruction_t *insn, 
	FileIR_t *firp
	)
{
	const auto is_mips = firp->getArchitecture()->getMachineType() == admtMips32;
	if(!is_mips)
		return;

	// using df=DecodedInstruction_t::factory;
	const auto d=DecodedInstruction_t::factory(insn);
	if(d->isBranch())
		const auto branch_addr     = insn->getAddress();
		const auto delay_slot_insn = insnMap[ {branch_addr->getFileID(), branch_addr->getVirtualOffset() + 4}];
		assert(delay_slot_insn);
		(void)firp->addNewRelocation(insn,0,"delay_slot1", delay_slot_insn, 0);
void PopulateCFG::set_target
	InstructionMap_t &insnMap,
	DecodedInstruction_t *disasm, 
	Instruction_t *insn, 
	FileIR_t *firp
	if(insn->getTarget())
	const auto &operands = disasm->getOperands();
	for(auto operand : operands)
	{	
		// check for branches with targets 
		if(
			disasm->isBranch() &&		// it is a branch 
			!disasm->isReturn() && 		// and not a return
			operand->isConstant()		// and has a constant argument 
	//		cout<<"Found direct jump with addr=" << insn->getAddress()->getVirtualOffset() <<
	//			" disasm="<<disasm->CompleteInstr<<" ArgMnemonic="<<
	//			disasm->Argument1.ArgMnemonic<<"."<<endl;

			/* get the offset */
			auto virtual_offset=disasm->getAddress();

			/* create a pair of offset/file */
			auto p=pair<DatabaseID_t,VirtualOffset_t>(insn->getAddress()->getFileID(),virtual_offset);
		
			/* lookup the target insn from the map */

			/* sanity, note we may see odd control transfers to 0x0 */
			if(target_insn==NULL)
				unsigned char first_byte=0;
				if(insn->getFallthrough())
					first_byte=(insn->getFallthrough()->getDataBits().c_str())[0];
				VirtualOffset_t jump_dist=virtual_offset-(insn->getAddress()->getVirtualOffset()+(insn->getDataBits()).size());
				if(	
					// jump 1 byte forward
					jump_dist == 1 &&

					// and we calculated the fallthrough
					insn->getFallthrough()!=NULL &&

					// and the fallthrough starts with a lock prefix
					first_byte==0xf0
				  )
				{
					odd_target_count++;
					target_insn=insn->getFallthrough();
						cout<<"Cannot set target (target="<< std::hex << virtual_offset << ") for "<<std::hex<<insn->getAddress()->getVirtualOffset()<<"."<<endl;

			/* set the target for this insn */
			if(target_insn!=0)
				insn->setTarget(target_insn);
				missed_instructions.insert( pair<DatabaseID_t,VirtualOffset_t>(insn->getAddress()->getFileID(),virtual_offset));
Anh Nguyen-Tuong's avatar
Anh Nguyen-Tuong committed
		}
File_t* PopulateCFG::find_file(FileIR_t* firp, DatabaseID_t fileid)
	assert(firp->getFile()->getBaseID()==fileid);
	return firp->getFile();
jdh8d's avatar
jdh8d committed
}

jdh8d's avatar
 
jdh8d committed

void PopulateCFG::add_new_instructions(FileIR_t *firp)
jdh8d's avatar
jdh8d committed
{
	int found_instructions=0;
jdh8d's avatar
jdh8d committed
	{
		/* get the address we've missed */
jdh8d's avatar
jdh8d committed

		/* get the address ID of the instruction that's missing the missed addressed */
jdh8d's avatar
jdh8d committed
		
		/* figure out which file we're looking at */
		auto filep=find_file(firp,missed_fileid);
jdh8d's avatar
jdh8d committed
		assert(filep);
jdh8d's avatar
jdh8d committed


jdh8d's avatar
jdh8d committed

		bool found=false;
	
        	/* look through each section and find the missing target*/
        	for (secndx=1; secndx<secnum; secndx++)
		{
        		/* not a loaded section */
        		if( !exeiop->sections[secndx]->isLoadable()) 
jdh8d's avatar
jdh8d committed
                		continue;
jdh8d's avatar
jdh8d committed
        		/* loaded, and contains instruction, record the bounds */
        		if( !exeiop->sections[secndx]->isExecutable()) 
jdh8d's avatar
jdh8d committed
                		continue;
		
        		VirtualOffset_t first=exeiop->sections[secndx]->get_address();
        		VirtualOffset_t second=exeiop->sections[secndx]->get_address()+exeiop->sections[secndx]->get_size();
jdh8d's avatar
jdh8d committed

			/* is the missed instruction in this section */
jdh8d's avatar
 
jdh8d committed
			if(first<=missed_address && missed_address<second)
jdh8d's avatar
jdh8d committed
			{
				const char* data=exeiop->sections[secndx]->get_data();
jdh8d's avatar
 
jdh8d committed
				// second=data?
				VirtualOffset_t offset_into_section=missed_address-exeiop->sections[secndx]->get_address();
jdh8d's avatar
jdh8d committed
	
				/* disassemble the instruction */
				auto disasm_p=DecodedInstruction_t::factory(missed_address, (void*)&data[offset_into_section], exeiop->sections[secndx]->get_size()-offset_into_section );
				auto &disasm=*disasm_p;
jdh8d's avatar
jdh8d committed


				/* if we found the instruction, but can't disassemble it, then we skip out for now */
jdh8d's avatar
jdh8d committed
				{
jdh8d's avatar
 
jdh8d committed
					if(getenv("VERBOSE_CFG"))
						cout<<"Found invalid insn at "<<missed_address<<endl;
jdh8d's avatar
jdh8d committed
				}
jdh8d's avatar
 
jdh8d committed
				else if(getenv("VERBOSE_CFG"))
					cout<<"Found valid insn at "<<missed_address<<": "<<disasm.getDisassembly()<<endl;
				/* intel instructions have a max size of 16 */
				assert(1<=instr_len && instr_len<=16);


				/* here we are certain we found the instruction  */
				found=true;

jdh8d's avatar
jdh8d committed
				/* get the new bits for an instruction */
				string newinsnbits;
				newinsnbits.resize(instr_len);
				for(auto i=0U;i<instr_len;i++)
jdh8d's avatar
jdh8d committed
					newinsnbits[i]=data[offset_into_section+i];

				/* create a new address */
				auto newaddr=firp->addNewAddress(missed_fileid,missed_address);
jdh8d's avatar
jdh8d committed

				/* create a new instruction */
				auto newinsn=firp->addNewInstruction(newaddr, nullptr, newinsnbits, disasm.getDisassembly()+string(" from fill_in_cfg "), nullptr);
				(void)newinsn;// just add to IR

jdh8d's avatar
jdh8d committed
				/* fallthrough/target/is indirect will be set later */

				/* insert into the IR */


				cout<<"Found new instruction, "<<newinsn->getComment()<<", at "<<std::hex<<newinsn->getAddress()->getVirtualOffset()<<" in file "<<"<no name yet>"<<"."<<endl; 
jdh8d's avatar
jdh8d committed
				found_instructions++;
			}
		
		}
		if(!found)
		{
			failed_target_count++;
	
			cout<<"Cannot find address "<<std::hex<<missed_address<<" in file "<<"<no name yet>"<<"."<<endl; 
		} 
jdh8d's avatar
 
jdh8d committed
	}	
jdh8d's avatar
jdh8d committed
	cout<<"Found a total of "<<std::dec<<found_instructions<<" new instructions."<<endl;
void PopulateCFG::fill_in_cfg(FileIR_t *firp)
jdh8d's avatar
jdh8d committed
	int round=0;
	
	do
	{
		bad_target_count=0;
		bad_fallthrough_count=0;
		failed_target_count=0;
		missed_instructions.clear();

		auto insnMap = InstructionMap_t();
		populate_instruction_map(insnMap, firp);
jdh8d's avatar
jdh8d committed

		cout << "Found "<<firp->getInstructions().size()<<" instructions." <<endl;
jdh8d's avatar
jdh8d committed

		/* for each instruction, disassemble it and set the target/fallthrough */
		for(auto insn : firp->getInstructions())
jdh8d's avatar
jdh8d committed
		{
      			auto disasm=DecodedInstruction_t::factory(insn);
jdh8d's avatar
jdh8d committed
	
      			const auto instr_len = disasm->length();
jdh8d's avatar
jdh8d committed
	
			assert(instr_len==insn->getDataBits().size());
jdh8d's avatar
jdh8d committed
	
			set_fallthrough(insnMap, disasm.get(), insn, firp);
			set_target(insnMap, disasm.get(), insn, firp);
			set_delay_slots(insnMap, disasm.get(), insn, firp);
jdh8d's avatar
jdh8d committed
			
		}
		if(bad_target_count>0)
			cout<<std::dec<<"Found "<<bad_target_count<<" bad targets at round "<<round<<endl;
		if(bad_fallthrough_count>0)
			cout<<"Found "<<bad_fallthrough_count<<" bad fallthroughs at round "<<round<<endl;
		cout<<"Missed instruction count="<<missed_instructions.size()<<endl;
		add_new_instructions(firp);
jdh8d's avatar
jdh8d committed

		round++;

	/* keep trying this while we're resolving targets.  if at any point we fail to resolve a new target/fallthrough address, then we give up */
	} while(missed_instructions.size()>failed_target_count);

an7s's avatar
an7s committed
	cout<<"Caution: Was unable to find instructions for these addresses:"<<hex<<endl;
jdh8d's avatar
jdh8d committed
		/* get the address we've missed */
		VirtualOffset_t missed_address=p.second;
jdh8d's avatar
jdh8d committed
		cout << missed_address << ", ";
an7s's avatar
an7s committed
	cout<<dec<<endl;
jdh8d's avatar
jdh8d committed
	/* set the base IDs for all instructions */
	firp->setBaseIDS();
jdh8d's avatar
jdh8d committed

	/* for each instruction, set the original address id to be that of the address id, as fill_in_cfg is 
	 * designed to work on only original programs.
	 */
	for(auto insn : firp->getInstructions())
		insn->setOriginalAddressID(insn->getAddress()->getBaseID());
bool PopulateCFG::is_in_relro_segment(const int secndx)
	ELFIO::elfio *real_exeiop = reinterpret_cast<ELFIO::elfio*>(exeiop->get_elfio()); 
	if(!real_exeiop)
	int segnum = real_exeiop->segments.size();
	VirtualOffset_t sec_start=(VirtualOffset_t)(exeiop->sections[secndx]->get_address());
	VirtualOffset_t sec_end=(VirtualOffset_t)(exeiop->sections[secndx]->get_address() + exeiop->sections[secndx]->get_size() - 1 );

	/* look through each section */
	for (int segndx=1; segndx<segnum; segndx++)
	{
		ELFIO::Elf_Word type=real_exeiop->segments[segndx]->get_type();
#ifndef PT_GNU_RELRO
#define PT_GNU_RELRO    0x6474e552      /* Read-only after relocation */
#endif

		if(type==PT_GNU_RELRO)
		{
			VirtualOffset_t seg_start=(VirtualOffset_t)(real_exeiop->segments[segndx]->get_virtual_address());
			VirtualOffset_t seg_end=(VirtualOffset_t)(real_exeiop->segments[segndx]->get_virtual_address() + real_exeiop->segments[segndx]->get_memory_size() - 1 );

			// check if start lies within 
			if(seg_start <= sec_start  && sec_start <= seg_end)
				return true;

			// check if end lies within 
			if(seg_start <= sec_end  && sec_end <= seg_end)
				return true;
			
			// check if crosses
			if(sec_start < seg_start  && seg_end < sec_end)
				return true;
		}
	}

	return false;
}
void PopulateCFG::fill_in_scoops(FileIR_t *firp)
jdh8d's avatar
 
jdh8d committed
{
	auto max_base_id=firp->getMaxBaseID();
jdh8d's avatar
 
jdh8d committed

	/* look through each section */
	for (auto secndx=1; secndx<secnum; secndx++)
jdh8d's avatar
 
jdh8d committed
	{
		/* not a loaded section, try next section */
		if(!exeiop->sections[secndx]->isLoadable()) 
jdh8d's avatar
 
jdh8d committed
		{
			cout << "Skipping scoop for section (not loadable) " << exeiop->sections[secndx]->get_name() << endl;
jdh8d's avatar
 
jdh8d committed
			continue;
		}

        	if(exeiop->sections[secndx]->isWriteable() && exeiop->sections[secndx]->isExecutable()) 
jdh8d's avatar
 
jdh8d committed
		{
			ofstream fout("warning.txt");
			fout << "Found that section " << exeiop->sections[secndx]->get_name() << " is both writeable and executable.  Program is inherently unsafe!" << endl;
jdh8d's avatar
 
jdh8d committed
		}

		/* executable sections handled by zipr/spri. */
        	if(exeiop->sections[secndx]->isExecutable()) 
jdh8d's avatar
 
jdh8d committed
		{
			cout << "Skipping scoop for section (executable) " << exeiop->sections[secndx]->get_name() << endl;
jdh8d's avatar
 
jdh8d committed
                	continue;
		}
		/* name */
		const auto name=string(exeiop->sections[secndx]->get_name());
jdh8d's avatar
jdh8d committed

		/* start and end address */
		auto startaddr = firp->addNewAddress(firp->getFile()->getBaseID(), exeiop->sections[secndx]->get_address());
		auto endaddr   = firp->addNewAddress(firp->getFile()->getBaseID(), exeiop->sections[secndx]->get_address() + exeiop->sections[secndx]->get_size()-1);


		auto the_contents=string(exeiop->sections[secndx]->get_size(), '\0'); 
		// deal with .bss segments that are 0 init'd.
		if (exeiop->sections[secndx]->get_data()) 
			the_contents.assign(exeiop->sections[secndx]->get_data(),exeiop->sections[secndx]->get_size());
jdh8d's avatar
 
jdh8d committed

		/* permissions */
		int permissions= 
			( exeiop->sections[secndx]->isReadable()   << 2 ) | 
			( exeiop->sections[secndx]->isWriteable()  << 1 ) | 
			( exeiop->sections[secndx]->isExecutable() << 0 ) ;
jdh8d's avatar
 
jdh8d committed

		bool is_relro=is_in_relro_segment(secndx);
Anh Nguyen-Tuong's avatar
Anh Nguyen-Tuong committed
		scoops_detected++;
		auto newscoop=firp->addNewDataScoop( name, startaddr, endaddr, NULL, permissions, is_relro, the_contents, max_base_id++ );
		(void)newscoop; // just give it to the IR
jdh8d's avatar
 
jdh8d committed

		cout << "Allocated new scoop for section " << name
		     << "(" << hex << startaddr->getVirtualOffset() << "-"
		     << hex << endaddr->getVirtualOffset() << ")"
		     << " perms=" << permissions << " relro=" << boolalpha << is_relro << endl;
jdh8d's avatar
 
jdh8d committed

	}
	for(auto extra_scoop : extra_scoops)
	{
		const auto start_vo        = extra_scoop.first;                                           // start and end offsets in this file 
		const auto end_vo          = extra_scoop.second;
		auto startaddr             = firp->addNewAddress(firp->getFile()->getBaseID(), start_vo); // start and end address 
		auto endaddr               = firp->addNewAddress(firp->getFile()->getBaseID(), end_vo);
		const auto sec             = exeiop->sections.findByAddress(start_vo);                    // section that contains the data 
		assert(sec);
		const auto sec_data        = sec->get_data();                                             // data
		const auto scoop_start_ptr = sec_data+(start_vo-sec->get_address());                      // relevant start of data
		const auto the_contents    = string(scoop_start_ptr, end_vo-start_vo+1); 
		const auto name            = string("user_added_")+to_hex_string(start_vo);               // name of new segment
		const auto permissions     =                                                              // permissions and relro bit.
			( sec->isReadable()   << 2 ) |  
			( sec->isWriteable()  << 1 ) | 
			( sec->isExecutable() << 0 ) ;
		const auto is_relro       = false;

		// finally, create the new scoop
		firp->addNewDataScoop( name, startaddr, endaddr, NULL, permissions, is_relro, the_contents, max_base_id++ );
		
	}
jdh8d's avatar
 
jdh8d committed

}
void PopulateCFG::detect_scoops_in_code(FileIR_t *firp)
{
	// make sure preds are up to date for this
	calc_preds(firp);

	auto already_scoopified=map<VirtualOffset_t,DataScoop_t*>();
	const auto is_arm64       = firp->getArchitecture()->getMachineType() == admtAarch64;
	const auto is_arm32       = firp->getArchitecture()->getMachineType() == admtArm32;
	const auto is_arm_variant = is_arm32 || is_arm64;
Jason Hiser's avatar
Jason Hiser committed

	if(!is_arm_variant) return;

	// check each insn for an ldr with a pcrel operand.
	for(auto insn : firp->getInstructions())
	{
		// look for ldr's with a pcrel operand
		const auto d               = DecodedInstruction_t::factory(insn);
		const auto mnemonic        = d->getMnemonic();
		const auto is_ldrd_variant  = mnemonic.substr(0,4) == "ldrd";
		const auto is_ldr_variant  = mnemonic.substr(0,3) == "ldr";
		const auto is_vldr_variant = mnemonic.substr(0,4) == "vldr";
		const auto is_relevant_ldr = is_ldr_variant || is_vldr_variant || is_ldrd_variant;
		if(!is_relevant_ldr) continue;	 
		const auto op0             = d->getOperand(0);
		// the address we detect as referenced by this instruction.
		auto referenced_address = VirtualOffset_t(0);
		auto do_unpin       = is_arm32;

		if(is_ldrd_variant && !d->getOperand(2)->isPcrel())
		{
			const auto mem_op = d->getOperand(2);
			if( mem_op->hasIndexRegister() ) 	continue;
			if( mem_op->getMemoryDisplacement() != 0 ) continue;

			const auto mem_str         = mem_op->getString();
			const auto end_of_base_reg = mem_str.find(" ");
			const auto find_reg        = mem_str.substr(0,end_of_base_reg);

			// find the instruction that sets the base reg.
			auto add_pc_insn = (Instruction_t*)nullptr;
			if(!backup_until( string()+"add.* "+find_reg+", pc, #",  /* look for this pattern. */
						add_pc_insn,        /* strong instruction in add_pc_insn */
						insn,               /* insn I10 */
						"^"+find_reg+"$"    /* stop if find_reg set */
						))
			{
				continue;
			}

			const auto add_pc_insn_d = DecodedInstruction_t::factory(add_pc_insn);

			// record to unpin something.
			referenced_address = add_pc_insn_d -> getOperand(2)->getConstant() + (is_arm32 ? add_pc_insn->getAddress()->getVirtualOffset() + 8 : 0); 
			do_unpin           = false;

		}
		else
		{

			// capstone reports ldrd instructions as having 2 "dest" operands.
			// so we skip to the 3rd operand to get the memory op.  
			// todo:  fix libirdb-core to fix this and skip the odd operand.
			// todo:  report to capstone that they are broken.
			const auto mem_op = d->getOperand(1);
			if( !mem_op->isPcrel()) continue;

			// if there is an indexing operation, skip this instruction.
			if( mem_op->hasIndexRegister()) continue;

			// sanity check that it's a memory operation, and extract fields
			assert(mem_op->isMemory());

			
			referenced_address = mem_op->getMemoryDisplacement() + (is_arm32 ? insn->getAddress()->getVirtualOffset() + 8 : 0); 
		}
		// no address found.
		if(referenced_address == 0) continue;

		const auto name           = "data_in_text_"+to_hex_string(referenced_address);
Jason Hiser's avatar
Jason Hiser committed
		const auto op0_str            = op0->getString();
Jason Hiser's avatar
Jason Hiser committed

		// if op0 is the PC, the instruction is some switch dispatch that we have to detect in more depth.  skip here.  see check_arm32_switch...
		if(is_arm32 && op0_str == "pc" ) continue;

Jason Hiser's avatar
Jason Hiser committed
		const auto referenced_size    =  // could use API call?
			is_ldrd_variant             ? 8  : // special case for load int reg pair.
			is_arm64 && op0_str[0]=='w' ? 4  : // arm64 regs
Jason Hiser's avatar
Jason Hiser committed
			is_arm64 && op0_str[0]=='x' ? 8  : 
			is_arm64 && op0_str[0]=='s' ? 4  : 
			is_arm64 && op0_str[0]=='d' ? 8  : 
			is_arm64 && op0_str[0]=='q' ? 16 : 
			is_arm32 && op0_str[0]=='s' ? 4  : // arm32 regs
Jason Hiser's avatar
Jason Hiser committed
			is_arm32 && op0_str[0]=='r' ? 4  : // arm32 regs
			is_arm32 && op0_str   =="sb"? 4  : // special arm32 regs
			is_arm32 && op0_str   =="sl"? 4  : 
Jason Hiser's avatar
Jason Hiser committed
			is_arm32 && op0_str   =="lr"? 4  : 
			is_arm32 && op0_str   =="fp"? 4  : 
			is_arm32 && op0_str   =="sp"? 4  : 
			is_arm32 && op0_str   =="ip"? 4  : 
			is_arm32 && op0_str[0]=='d' ? 8  : // arm32 regs
			throw domain_error("Cannot decode instruction size");
			;

		// check if we've seen this address already
Jason Hiser's avatar
Jason Hiser committed
		const auto already_seen_it = already_scoopified.find(referenced_address);
		if(already_seen_it == end(already_scoopified)) 
		{

			// find section and sanity check.
			const auto sec=exeiop->sections.findByAddress(referenced_address);
			if(sec==nullptr) continue; 

			// only trying to do this for executable chunks, other code deals with
			// scoops not in the .text section.
			if(!sec->isExecutable()) continue;

			const auto new_scoop_addr = do_unpin ?  VirtualOffset_t(0u) : referenced_address;
			const auto sec_data       = sec->get_data();
			const auto sec_start      = sec->get_address();
			const auto the_contents   = string(&sec_data[referenced_address-sec_start],referenced_size);
			const auto fileid         = firp->getFile()->getBaseID();
			auto new_start_addr       = firp->addNewAddress(fileid,new_scoop_addr);
			auto new_end_addr         = firp->addNewAddress(fileid,new_scoop_addr+referenced_size-1);
			const auto permissions    = 0x4; /* R-- */
			const auto is_relro       = false;

			// create the new scoop
			already_scoopified[referenced_address] = firp->addNewDataScoop(name, new_start_addr, 
					new_end_addr, NULL, permissions, is_relro, the_contents);
		}

		auto newscoop = already_scoopified[referenced_address] ;
		assert(newscoop);
		const auto start_addr = newscoop -> getStart();
		const auto end_addr   = newscoop -> getEnd();

		if(do_unpin)
		{
			const auto new_addend = -uint32_t(insn->getAddress()->getVirtualOffset());
			(void)firp->addNewRelocation(insn,0,"pcrel",newscoop, new_addend);
		}
Jason Hiser's avatar
Jason Hiser committed
		cout << "Allocated data in text segment " << name << "=(" << start_addr->getVirtualOffset() << "-"
		     << end_addr->getVirtualOffset() << ")" 
		     << " for " << d->getDisassembly() << "@" << hex << insn->getAddress()->getVirtualOffset() << endl;
void PopulateCFG::ctor_detection(FileIR_t *firp)
{
	const auto create_text_scoop=[&](const string& name, const VirtualOffset_t& start_vo, const VirtualOffset_t& end_vo)
		{
			auto startaddr             = firp->addNewAddress(firp->getFile()->getBaseID(), start_vo); // start and end address 
			auto endaddr               = firp->addNewAddress(firp->getFile()->getBaseID(), end_vo);
			const auto sec             = exeiop->sections.findByAddress(start_vo);                    // section that contains the data 
			assert(sec);
			const auto sec_data        = sec->get_data();                                             // data
			const auto scoop_start_ptr = sec_data+(start_vo-sec->get_address());                      // relevant start of data
			const auto the_contents    = string(scoop_start_ptr, end_vo-start_vo+1); 
			const auto permissions     =                                                              // permissions and relro bit.
				( sec->isReadable()   << 2 ) |  
				( sec->isWriteable()  << 1 ) | 
				( sec->isExecutable() << 0 ) ;
			const auto is_relro       = false;

			// finally, create the new scoop
			firp->addNewDataScoop( name, startaddr, endaddr, NULL, permissions, is_relro, the_contents);
			cout << "Added ctor/dtor scoop called " << name << " at " << hex << start_vo << "-" << end_vo << endl;
	const auto gnustyle = [&]() -> bool
	{
		assert(firp);
		if(do_ctor_detection == cdt_NO) return false;

		const auto is_pe = firp->getArchitecture()->getFileType() == adftPE ;
		if(do_ctor_detection == cdt_PE32AUTO && !is_pe) return false;

		// is either a PE file and we are in auto detect mode
		// or ctor_detection is yes.
		assert(is_pe || do_ctor_detection == cdt_YES);

		auto find_ctor_start=[&](const exeio_section_t* sec, const VirtualOffset_t end_of_ctor) -> VirtualOffset_t
			{
				// values needed later for various things
				const auto ptrsize   = firp->getArchitectureBitWidth() / 8 ;
				const auto sec_data  = sec->get_data();
				const auto sec_start = sec->get_address();

				// check for a null terminator at the stated end of table
				const auto null_term_addr=end_of_ctor-ptrsize;
				const auto null_term_value =
					ptrsize == 8 ?  *(uint64_t*)(sec_data+null_term_addr-sec_start) :
					ptrsize == 4 ?  *(uint32_t*)(sec_data+null_term_addr-sec_start) :
					throw invalid_argument("Unknown ptrsize");

				// not found, return this isn't sane.
				if(null_term_value!=0) return 0;

				// now scan the table in reverse order for 1) valid entries, or 2) a -1 terminator.
				auto next_addr=null_term_addr-ptrsize;
				while(true)
				{
					// check for flowing
					if(next_addr<sec_start) return 0;

					// get the table entry
					const auto ctor_entry_value =
						ptrsize == 8 ?  *(uint64_t*)(sec_data+next_addr-sec_start) :
						ptrsize == 4 ?  *(uint32_t*)(sec_data+next_addr-sec_start) :
						throw invalid_argument("Unknown ptrsize");

					// check for the -1 terminator
					if(ptrsize == 8 && (int64_t)ctor_entry_value==int64_t(-1)) return next_addr;
					if(ptrsize == 4 && (int32_t)ctor_entry_value==int32_t(-1)) return next_addr;

					// check if the table entry isn't a valid address
					const auto is_before_start = ctor_entry_value <  sec->get_address() ;
					const auto is_after_end    = ctor_entry_value > (sec->get_address()+sec->get_size());
					if(is_before_start || is_after_end) return 0;

					next_addr -= ptrsize;
				}
			};


		const auto text_sec = exeiop->sections[".text"];
		if(text_sec == nullptr) return false;
		const auto text_end_addr = text_sec->get_address()+text_sec->get_size();
		const auto dtor_end      = text_end_addr-1;
		const auto dtor_start    = find_ctor_start(text_sec,text_end_addr);
		if(dtor_start==0) return false;
		create_text_scoop(".dtor", dtor_start, dtor_end);

		const auto ctor_end   = dtor_start-1;
		const auto ctor_start = find_ctor_start(text_sec,dtor_start);
		if(ctor_start==0) return false;
		create_text_scoop(".ctor", ctor_start, ctor_end);
		return true;
	};

Jason Hiser's avatar
Jason Hiser committed
#if 0
	const auto winstyle = [&]() -> bool
	{
		const auto  reloc_sec = exeiop->sections[".reloc"];
		if(reloc_sec == nullptr) return false;
		const auto reloc_data = string(reloc_sec->get_data(), reloc_sec->get_size());
		const auto peblissp = static_cast<pe_bliss::pe_base*>(exeiop->get_pebliss());
		if(peblissp == nullptr) return false;
		const auto pe_image_base = peblissp->get_image_base_64();

		// for each block in the reloc section
		using reloc_block_header_t = 
			struct 
			{
				uint32_t page;
				uint32_t block_size;
			};
		using reloc_block_t = 
			struct
			{
				uint16_t offset:12; 	// lower bits 
				uint16_t type:4;	// higher bits than offset?
			};
		static_assert(sizeof(reloc_block_header_t)==8, "size of reloc_block_header is wrong");
		static_assert(sizeof(reloc_block_t)==2, "size of reloc_block is wrong");
		// start at block 0
		auto current_offset = VirtualOffset_t(0);
		while(true)
		{

			// stop if we fall off the end of the section.
			if(current_offset + sizeof(reloc_block_header_t) > static_cast<size_t>(reloc_sec->get_size()))
				break;

			// read the header
			auto header = reloc_block_header_t();
			memcpy(&header, &reloc_data.c_str()[current_offset], sizeof(header));

			// start counting the length after we've read the header.
			auto current_length = size_t(sizeof(header));

			// for each entry in the block
			while(true)
			{
				// stop if we exceed the block
				if(current_length + sizeof(reloc_block_t) > header.block_size)
					break;
				// stop if we exceed the section
				if(current_offset + current_length + sizeof(reloc_block_t) > static_cast<size_t>(reloc_sec->get_size()))
					break;
				
				// read the block
				auto block  = reloc_block_t();
				memcpy(&block, &reloc_data.c_str()[current_offset+current_length], sizeof(block));
				current_length += sizeof(block);

				// for now, just log
				const auto rva = pe_image_base + header.page + block.offset;
				cout << "reloc rva = " << hex << rva << " type = " << block.type << '\n';

				switch(block.type)
				{
					case pe_bliss::pe_win::image_rel_based_absolute: /* ignored */
						break;
					case pe_bliss::pe_win::image_rel_based_dir64: /* dir64 */
					{
						const auto relocd_sec = exeiop->sections.findByAddress(rva);
						if(!relocd_sec) throw runtime_error("Unexpected relocation address?");

						const auto relocd_sec_name = relocd_sec->get_name();
						if(relocd_sec_name != ".text") // no need to pin anything besides the text seg.
							continue;   // try next block

						const auto new_name = "winstyle_scoop_"+to_string(rva);
						create_text_scoop(new_name, rva, rva+8);

						break;
					}
					default:
						throw runtime_error("Cannot handle PE relocation.");

				}
			}
			// update to the next block
			current_offset += current_length;
		}
		return true;


	};


	const auto scoopify_data_dir_sections = [&]() 
	{
		const auto peblissp = static_cast<pe_bliss::pe_base*>(exeiop->get_pebliss());

		for(auto dir=0u; dir<15; dir++)
		{
			if(!peblissp->directory_exists(dir))
				continue;

			const auto dir_rva  = peblissp->get_directory_rva (dir);
			const auto dir_size = peblissp->get_directory_size(dir);

			cout << "Directory " << dec << dir << " is at " << hex << dir_rva << " - " << dir_rva+dir_size << '\n';

			
		}
	};

	
//	scoopify_data_dir_sections();

	if(gnustyle()) return;
//	if(winstyle()) return;

	cerr << "Caution!  Could not find any reloc handling for ctor/dtor and/or plts." << endl;
void PopulateCFG::fill_in_landing_pads(FileIR_t *firp)
{
	const auto eh_frame_rep_ptr = split_eh_frame_t::factory(firp);
	if(getenv("EHIR_VERBOSE"))
		eh_frame_rep_ptr->print();
	cout<<"Completed eh-frame parsing"<<endl;

	map<Function_t*,set<Instruction_t*> > insns_to_add_to_funcs;

	for(const auto t : firp->getInstructions())
		if(t->getFunction()==NULL)
		auto lp=eh_frame_rep_ptr->find_lp(t);
		if(lp && lp->getFunction()==NULL)
			insns_to_add_to_funcs[t->getFunction()].insert(lp);
	for(const auto & p : insns_to_add_to_funcs)
	{
		auto & func=p.first; 	
		auto insns=p.second; 	/* copy */
		auto insn_count=0;

		while(insns.size()>0 )
		{
			auto it=insns.begin();
			auto insn=*it;
			insns.erase(it);

			assert(insn);
			if(insn->getFunction()!=NULL)
				continue;

			auto lp=eh_frame_rep_ptr->find_lp(insn);
			if(lp && lp->getFunction()==NULL)
			insn->setFunction(func);
			cout<<"	Adding "<<insn->getBaseID()<<":"<<insn->getDisassembly()<<"@"<<hex<<insn->getAddress()->getVirtualOffset()<<dec<<endl;
			auto target=insn->getTarget();
			auto fallthru=insn->getFallthrough();

			if(target) insns.insert(target);
			if(fallthru) insns.insert(fallthru);
		}
		cout<<"Found LP outside of function "<<func->getName()<<" added "<<insn_count<<" instructions"<<endl;
int PopulateCFG::parseArgs(const vector<string> step_args)
Jason Hiser's avatar
Jason Hiser committed
    for (auto i = 0u; i < step_args.size(); ++i)
            if (step_args[i]=="--fix-landing-pads")
                    fix_landing_pads = true;
            else if (step_args[i]=="--no-fix-landing-pads")
                    fix_landing_pads = false;
            else if (step_args[i]=="--extra-scoop")
	    {
		    i++;
		    const auto extra_scoops_str=step_args[i];
		    stringstream ss(extra_scoops_str);
		    auto start_addr = VirtualOffset_t(0);
		    auto end_addr   = VirtualOffset_t(0);
		    auto c          = uint8_t(0);
		    ss>>hex>>start_addr>>c>>end_addr;
		    assert(c=='-');
		    extra_scoops.insert({start_addr,end_addr});
		    cout << "Recognizing request to add " << hex << start_addr << "-" << end_addr << " as a scoop."<<endl;
	    }
            else if (step_args[i]=="--do-ctor-detection")
	    {
		    do_ctor_detection=cdt_YES;
	    }
            else if (step_args[i]=="--no-ctor-detection")
	    {
		    do_ctor_detection=cdt_NO;
	    }
Matthew McGill's avatar
Matthew McGill committed

    cout<<"fix_landing_pads="<<fix_landing_pads<<endl;