Newer
Older
/*
* Copyright (c) 2014 - Zephyr Software LLC
*
* This file may be used and modified for non-commercial purposes as long as
* all copyright, permission, and nonwarranty notices are preserved.
* Redistribution is prohibited without prior written consent from Zephyr
* Software.
*
* Please contact the authors for restrictions applying to commercial use.
*
* THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*
* Author: Zephyr Software
* e-mail: jwd@zephyr-software.com
* URL : http://www.zephyr-software.com/
*
*/
Matthew McGill
committed
#include "fill_in_cfg.hpp"
#include <iostream>
#include <string.h>
#include <assert.h>
#include "elfio/elfio.hpp"

Jason Hiser
committed
#include "split_eh_frame.hpp"
#include "back_search.hpp"
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#include <pe_bliss.h>
#include <pe_structures.h>
#pragma GCC diagnostic pop
using namespace std;
using namespace EXEIO;

Jason Hiser
committed
using namespace PopCFG;
template < typename T >
static inline std::string to_hex_string( const T& n )
{
std::ostringstream stm ;
stm << std::hex<< "0x"<< n ;
return stm.str() ;
}
/* start from scratch each time */
insnMap.clear();
/* for each instruction in the IR */
const auto fileID= insn->getAddress()->getFileID();
const auto vo = insn->getAddress()->getVirtualOffset();
const auto p = pair<DatabaseID_t,VirtualOffset_t>(fileID,vo);
assert(insnMap[p]==NULL);
insnMap[p]=insn;
}
}
DecodedInstruction_t *disasm,
Instruction_t *insn,
FileIR_t *firp
)
{
assert(disasm);
assert(insn);
const auto is_mips = firp->getArchitecture()->getMachineType() == admtMips32;
// always set fallthrough for mips
// other platforms can end fallthroughs ofr uncond branches and returns
if(!is_mips)
// check for branches with targets
if(
(disasm->isUnconditionalBranch() ) || // it is a unconditional branch
(disasm->isReturn()) // or a return
)
{
// this is a branch with no fallthrough instruction
return;
}
}
/* get the address of the next instrution */
auto virtual_offset=insn->getAddress()->getVirtualOffset() + insn->getDataBits().size();
/* create a pair of offset/file */
auto p=pair<DatabaseID_t,VirtualOffset_t>(insn->getAddress()->getFileID(),virtual_offset);
/* lookup the target insn from the map */
auto fallthrough_insn=insnMap[p];
/* sanity, note we may see odd control transfers to 0x0 */
if(fallthrough_insn==NULL && virtual_offset!=0)
{
cout<<"Cannot set fallthrough for "<<std::hex<<insn->getAddress()->getVirtualOffset();
bad_fallthrough_count++;
}
/* set the target for this insn */
if(fallthrough_insn!=0)
missed_instructions.insert(pair<DatabaseID_t,VirtualOffset_t>(insn->getAddress()->getFileID(),virtual_offset));
void PopulateCFG::set_delay_slots
(
InstructionMap_t &insnMap,
DecodedInstruction_t *disasm,
Instruction_t *insn,
FileIR_t *firp
)
{
const auto is_mips = firp->getArchitecture()->getMachineType() == admtMips32;
if(!is_mips)
return;
// using df=DecodedInstruction_t::factory;
const auto d=DecodedInstruction_t::factory(insn);
if(d->isBranch())
const auto branch_addr = insn->getAddress();
const auto delay_slot_insn = insnMap[ {branch_addr->getFileID(), branch_addr->getVirtualOffset() + 4}];
assert(delay_slot_insn);
(void)firp->addNewRelocation(insn,0,"delay_slot1", delay_slot_insn, 0);
InstructionMap_t &insnMap,
DecodedInstruction_t *disasm,
Instruction_t *insn,
FileIR_t *firp
)
{
assert(insn);
assert(disasm);
const auto &operands = disasm->getOperands();
for(auto operand : operands)
{
// check for branches with targets
if(
disasm->isBranch() && // it is a branch
!disasm->isReturn() && // and not a return
operand->isConstant() // and has a constant argument
// cout<<"Found direct jump with addr=" << insn->getAddress()->getVirtualOffset() <<
// " disasm="<<disasm->CompleteInstr<<" ArgMnemonic="<<
// disasm->Argument1.ArgMnemonic<<"."<<endl;
/* get the offset */
auto virtual_offset=disasm->getAddress();
/* create a pair of offset/file */
auto p=pair<DatabaseID_t,VirtualOffset_t>(insn->getAddress()->getFileID(),virtual_offset);
/* lookup the target insn from the map */
auto target_insn=insnMap[p];
/* sanity, note we may see odd control transfers to 0x0 */
if(target_insn==NULL)
unsigned char first_byte=0;
if(insn->getFallthrough())
first_byte=(insn->getFallthrough()->getDataBits().c_str())[0];
VirtualOffset_t jump_dist=virtual_offset-(insn->getAddress()->getVirtualOffset()+(insn->getDataBits()).size());
if(
// jump 1 byte forward
jump_dist == 1 &&
// and we calculated the fallthrough
// and the fallthrough starts with a lock prefix
first_byte==0xf0
)
{
odd_target_count++;
}
else
{
if(virtual_offset!=0)
cout<<"Cannot set target (target="<< std::hex << virtual_offset << ") for "<<std::hex<<insn->getAddress()->getVirtualOffset()<<"."<<endl;
bad_target_count++;
}
/* set the target for this insn */
if(target_insn!=0)
missed_instructions.insert( pair<DatabaseID_t,VirtualOffset_t>(insn->getAddress()->getFileID(),virtual_offset));
File_t* PopulateCFG::find_file(FileIR_t* firp, DatabaseID_t fileid)
assert(firp->getFile()->getBaseID()==fileid);
return firp->getFile();
void PopulateCFG::add_new_instructions(FileIR_t *firp)
for(auto p : missed_instructions)
auto missed_address=p.second;
/* get the address ID of the instruction that's missing the missed addressed */
auto missed_fileid=p.first;
auto filep=find_file(firp,missed_fileid);

Jason Hiser
committed
int secnum = exeiop->sections.size();
int secndx=0;
bool found=false;
/* look through each section and find the missing target*/
for (secndx=1; secndx<secnum; secndx++)
{
/* not a loaded section */

Jason Hiser
committed
if( !exeiop->sections[secndx]->isLoadable())

Jason Hiser
committed
if( !exeiop->sections[secndx]->isExecutable())

Jason Hiser
committed
VirtualOffset_t first=exeiop->sections[secndx]->get_address();
VirtualOffset_t second=exeiop->sections[secndx]->get_address()+exeiop->sections[secndx]->get_size();

Jason Hiser
committed
const char* data=exeiop->sections[secndx]->get_data();

Jason Hiser
committed
VirtualOffset_t offset_into_section=missed_address-exeiop->sections[secndx]->get_address();

Jason Hiser
committed
auto disasm_p=DecodedInstruction_t::factory(missed_address, (void*)&data[offset_into_section], exeiop->sections[secndx]->get_size()-offset_into_section );
/* if we found the instruction, but can't disassemble it, then we skip out for now */
if(!disasm.valid())
if(getenv("VERBOSE_CFG"))
cout<<"Found invalid insn at "<<missed_address<<endl;
cout<<"Found valid insn at "<<missed_address<<": "<<disasm.getDisassembly()<<endl;
const auto instr_len = disasm.length();
/* intel instructions have a max size of 16 */
assert(1<=instr_len && instr_len<=16);
/* here we are certain we found the instruction */
found=true;
/* get the new bits for an instruction */
string newinsnbits;
newinsnbits.resize(instr_len);
newinsnbits[i]=data[offset_into_section+i];
/* create a new address */
auto newaddr=firp->addNewAddress(missed_fileid,missed_address);
auto newinsn=firp->addNewInstruction(newaddr, nullptr, newinsnbits, disasm.getDisassembly()+string(" from fill_in_cfg "), nullptr);
(void)newinsn;// just add to IR
/* fallthrough/target/is indirect will be set later */
/* insert into the IR */
cout<<"Found new instruction, "<<newinsn->getComment()<<", at "<<std::hex<<newinsn->getAddress()->getVirtualOffset()<<" in file "<<"<no name yet>"<<"."<<endl;
found_instructions++;
}
}
if(!found)
{
failed_target_count++;
cout<<"Cannot find address "<<std::hex<<missed_address<<" in file "<<"<no name yet>"<<"."<<endl;
}
cout<<"Found a total of "<<std::dec<<found_instructions<<" new instructions."<<endl;
void PopulateCFG::fill_in_cfg(FileIR_t *firp)
int round=0;
do
{
bad_target_count=0;
bad_fallthrough_count=0;
failed_target_count=0;
missed_instructions.clear();
populate_instruction_map(insnMap, firp);
cout << "Found "<<firp->getInstructions().size()<<" instructions." <<endl;
/* for each instruction, disassemble it and set the target/fallthrough */
auto disasm=DecodedInstruction_t::factory(insn);
assert(instr_len==insn->getDataBits().size());
set_fallthrough(insnMap, disasm.get(), insn, firp);
set_target(insnMap, disasm.get(), insn, firp);
set_delay_slots(insnMap, disasm.get(), insn, firp);
}
if(bad_target_count>0)
cout<<std::dec<<"Found "<<bad_target_count<<" bad targets at round "<<round<<endl;
if(bad_fallthrough_count>0)
cout<<"Found "<<bad_fallthrough_count<<" bad fallthroughs at round "<<round<<endl;
cout<<"Missed instruction count="<<missed_instructions.size()<<endl;
round++;
/* keep trying this while we're resolving targets. if at any point we fail to resolve a new target/fallthrough address, then we give up */
} while(missed_instructions.size()>failed_target_count);
cout<<"Caution: Was unable to find instructions for these addresses:"<<hex<<endl;
for(auto p : missed_instructions)
/* for each instruction, set the original address id to be that of the address id, as fill_in_cfg is
* designed to work on only original programs.
*/
for(auto insn : firp->getInstructions())
insn->setOriginalAddressID(insn->getAddress()->getBaseID());
bool PopulateCFG::is_in_relro_segment(const int secndx)
{

Jason Hiser
committed
ELFIO::elfio *real_exeiop = reinterpret_cast<ELFIO::elfio*>(exeiop->get_elfio());
if(!real_exeiop)
return false;

Jason Hiser
committed
int segnum = real_exeiop->segments.size();

Jason Hiser
committed
VirtualOffset_t sec_start=(VirtualOffset_t)(exeiop->sections[secndx]->get_address());
VirtualOffset_t sec_end=(VirtualOffset_t)(exeiop->sections[secndx]->get_address() + exeiop->sections[secndx]->get_size() - 1 );
/* look through each section */
for (int segndx=1; segndx<segnum; segndx++)
{

Jason Hiser
committed
ELFIO::Elf_Word type=real_exeiop->segments[segndx]->get_type();
#ifndef PT_GNU_RELRO
#define PT_GNU_RELRO 0x6474e552 /* Read-only after relocation */
#endif
if(type==PT_GNU_RELRO)
{

Jason Hiser
committed
VirtualOffset_t seg_start=(VirtualOffset_t)(real_exeiop->segments[segndx]->get_virtual_address());
VirtualOffset_t seg_end=(VirtualOffset_t)(real_exeiop->segments[segndx]->get_virtual_address() + real_exeiop->segments[segndx]->get_memory_size() - 1 );
// check if start lies within
if(seg_start <= sec_start && sec_start <= seg_end)
return true;
// check if end lies within
if(seg_start <= sec_end && sec_end <= seg_end)
return true;
// check if crosses
if(sec_start < seg_start && seg_end < sec_end)
return true;
}
}
return false;
}
void PopulateCFG::fill_in_scoops(FileIR_t *firp)

Jason Hiser
committed
auto secnum = exeiop->sections.size();

Jason Hiser
committed
for (auto secndx=1; secndx<secnum; secndx++)

Jason Hiser
committed
if(!exeiop->sections[secndx]->isLoadable())

Jason Hiser
committed
cout << "Skipping scoop for section (not loadable) " << exeiop->sections[secndx]->get_name() << endl;

Jason Hiser
committed
if(exeiop->sections[secndx]->isWriteable() && exeiop->sections[secndx]->isExecutable())

Jason Hiser
committed
fout << "Found that section " << exeiop->sections[secndx]->get_name() << " is both writeable and executable. Program is inherently unsafe!" << endl;
/* executable sections handled by zipr/spri. */

Jason Hiser
committed
if(exeiop->sections[secndx]->isExecutable())

Jason Hiser
committed
cout << "Skipping scoop for section (executable) " << exeiop->sections[secndx]->get_name() << endl;

Jason Hiser
committed
const auto name=string(exeiop->sections[secndx]->get_name());

Jason Hiser
committed
/* start and end address */
auto startaddr = firp->addNewAddress(firp->getFile()->getBaseID(), exeiop->sections[secndx]->get_address());
auto endaddr = firp->addNewAddress(firp->getFile()->getBaseID(), exeiop->sections[secndx]->get_address() + exeiop->sections[secndx]->get_size()-1);
auto the_contents=string(exeiop->sections[secndx]->get_size(), '\0');
// deal with .bss segments that are 0 init'd.
if (exeiop->sections[secndx]->get_data())
the_contents.assign(exeiop->sections[secndx]->get_data(),exeiop->sections[secndx]->get_size());

Jason Hiser
committed
( exeiop->sections[secndx]->isReadable() << 2 ) |
( exeiop->sections[secndx]->isWriteable() << 1 ) |
( exeiop->sections[secndx]->isExecutable() << 0 ) ;
bool is_relro=is_in_relro_segment(secndx);
auto newscoop=firp->addNewDataScoop( name, startaddr, endaddr, NULL, permissions, is_relro, the_contents, max_base_id++ );
(void)newscoop; // just give it to the IR

Jason Hiser
committed
cout << "Allocated new scoop for section " << name
<< "(" << hex << startaddr->getVirtualOffset() << "-"
<< hex << endaddr->getVirtualOffset() << ")"
<< " perms=" << permissions << " relro=" << boolalpha << is_relro << endl;

Jason Hiser
committed
for(auto extra_scoop : extra_scoops)
{
const auto start_vo = extra_scoop.first; // start and end offsets in this file
const auto end_vo = extra_scoop.second;
auto startaddr = firp->addNewAddress(firp->getFile()->getBaseID(), start_vo); // start and end address
auto endaddr = firp->addNewAddress(firp->getFile()->getBaseID(), end_vo);
const auto sec = exeiop->sections.findByAddress(start_vo); // section that contains the data
assert(sec);
const auto sec_data = sec->get_data(); // data
const auto scoop_start_ptr = sec_data+(start_vo-sec->get_address()); // relevant start of data
const auto the_contents = string(scoop_start_ptr, end_vo-start_vo+1);
const auto name = string("user_added_")+to_hex_string(start_vo); // name of new segment
const auto permissions = // permissions and relro bit.
( sec->isReadable() << 2 ) |
( sec->isWriteable() << 1 ) |
( sec->isExecutable() << 0 ) ;
const auto is_relro = false;
// finally, create the new scoop
firp->addNewDataScoop( name, startaddr, endaddr, NULL, permissions, is_relro, the_contents, max_base_id++ );
}
void PopulateCFG::detect_scoops_in_code(FileIR_t *firp)
{
// make sure preds are up to date for this
calc_preds(firp);

Jason Hiser
committed
// data for this function
auto already_scoopified=map<VirtualOffset_t,DataScoop_t*>();

Jason Hiser
committed
const auto is_arm64 = firp->getArchitecture()->getMachineType() == admtAarch64;
const auto is_arm32 = firp->getArchitecture()->getMachineType() == admtArm32;
const auto is_arm_variant = is_arm32 || is_arm64;
// only valid for arm64
// check each insn for an ldr with a pcrel operand.
{
// look for ldr's with a pcrel operand
const auto d = DecodedInstruction_t::factory(insn);
const auto mnemonic = d->getMnemonic();
const auto is_ldrd_variant = mnemonic.substr(0,4) == "ldrd";
const auto is_ldr_variant = mnemonic.substr(0,3) == "ldr";
const auto is_vldr_variant = mnemonic.substr(0,4) == "vldr";
const auto is_relevant_ldr = is_ldr_variant || is_vldr_variant || is_ldrd_variant;
// extract op0
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
// the address we detect as referenced by this instruction.
auto referenced_address = VirtualOffset_t(0);
auto do_unpin = is_arm32;
if(is_ldrd_variant && !d->getOperand(2)->isPcrel())
{
const auto mem_op = d->getOperand(2);
if( mem_op->hasIndexRegister() ) continue;
if( mem_op->getMemoryDisplacement() != 0 ) continue;
const auto mem_str = mem_op->getString();
const auto end_of_base_reg = mem_str.find(" ");
const auto find_reg = mem_str.substr(0,end_of_base_reg);
// find the instruction that sets the base reg.
auto add_pc_insn = (Instruction_t*)nullptr;
if(!backup_until( string()+"add.* "+find_reg+", pc, #", /* look for this pattern. */
add_pc_insn, /* strong instruction in add_pc_insn */
insn, /* insn I10 */
"^"+find_reg+"$" /* stop if find_reg set */
))
{
continue;
}
const auto add_pc_insn_d = DecodedInstruction_t::factory(add_pc_insn);
// record to unpin something.
referenced_address = add_pc_insn_d -> getOperand(2)->getConstant() + (is_arm32 ? add_pc_insn->getAddress()->getVirtualOffset() + 8 : 0);
do_unpin = false;
}
else
{
// capstone reports ldrd instructions as having 2 "dest" operands.
// so we skip to the 3rd operand to get the memory op.
// todo: fix libirdb-core to fix this and skip the odd operand.
// todo: report to capstone that they are broken.
const auto mem_op = d->getOperand(1);
if( !mem_op->isPcrel()) continue;
// if there is an indexing operation, skip this instruction.
if( mem_op->hasIndexRegister()) continue;
// sanity check that it's a memory operation, and extract fields
assert(mem_op->isMemory());
referenced_address = mem_op->getMemoryDisplacement() + (is_arm32 ? insn->getAddress()->getVirtualOffset() + 8 : 0);
}
// no address found.
if(referenced_address == 0) continue;
const auto name = "data_in_text_"+to_hex_string(referenced_address);
// if op0 is the PC, the instruction is some switch dispatch that we have to detect in more depth. skip here. see check_arm32_switch...
if(is_arm32 && op0_str == "pc" ) continue;
is_ldrd_variant ? 8 : // special case for load int reg pair.
is_arm64 && op0_str[0]=='w' ? 4 : // arm64 regs
is_arm64 && op0_str[0]=='x' ? 8 :
is_arm64 && op0_str[0]=='s' ? 4 :
is_arm64 && op0_str[0]=='d' ? 8 :
is_arm64 && op0_str[0]=='q' ? 16 :
is_arm32 && op0_str[0]=='s' ? 4 : // arm32 regs
is_arm32 && op0_str[0]=='r' ? 4 : // arm32 regs
is_arm32 && op0_str =="sb"? 4 : // special arm32 regs
is_arm32 && op0_str =="sl"? 4 :
is_arm32 && op0_str =="lr"? 4 :
is_arm32 && op0_str =="fp"? 4 :
is_arm32 && op0_str =="sp"? 4 :
is_arm32 && op0_str =="ip"? 4 :
is_arm32 && op0_str[0]=='d' ? 8 : // arm32 regs

Jason Hiser
committed
throw domain_error("Cannot decode instruction size");
;
// check if we've seen this address already
const auto already_seen_it = already_scoopified.find(referenced_address);
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
if(already_seen_it == end(already_scoopified))
{
// find section and sanity check.
const auto sec=exeiop->sections.findByAddress(referenced_address);
if(sec==nullptr) continue;
// only trying to do this for executable chunks, other code deals with
// scoops not in the .text section.
if(!sec->isExecutable()) continue;
const auto new_scoop_addr = do_unpin ? VirtualOffset_t(0u) : referenced_address;
const auto sec_data = sec->get_data();
const auto sec_start = sec->get_address();
const auto the_contents = string(&sec_data[referenced_address-sec_start],referenced_size);
const auto fileid = firp->getFile()->getBaseID();
auto new_start_addr = firp->addNewAddress(fileid,new_scoop_addr);
auto new_end_addr = firp->addNewAddress(fileid,new_scoop_addr+referenced_size-1);
const auto permissions = 0x4; /* R-- */
const auto is_relro = false;
// create the new scoop
already_scoopified[referenced_address] = firp->addNewDataScoop(name, new_start_addr,
new_end_addr, NULL, permissions, is_relro, the_contents);
}
auto newscoop = already_scoopified[referenced_address] ;
assert(newscoop);
const auto start_addr = newscoop -> getStart();
const auto end_addr = newscoop -> getEnd();
if(do_unpin)
{
const auto new_addend = -uint32_t(insn->getAddress()->getVirtualOffset());
(void)firp->addNewRelocation(insn,0,"pcrel",newscoop, new_addend);
}
cout << "Allocated data in text segment " << name << "=(" << start_addr->getVirtualOffset() << "-"
<< end_addr->getVirtualOffset() << ")"
<< " for " << d->getDisassembly() << "@" << hex << insn->getAddress()->getVirtualOffset() << endl;
}
}

Jason Hiser
committed
void PopulateCFG::ctor_detection(FileIR_t *firp)
{
const auto create_text_scoop=[&](const string& name, const VirtualOffset_t& start_vo, const VirtualOffset_t& end_vo)

Jason Hiser
committed
{
auto startaddr = firp->addNewAddress(firp->getFile()->getBaseID(), start_vo); // start and end address
auto endaddr = firp->addNewAddress(firp->getFile()->getBaseID(), end_vo);
const auto sec = exeiop->sections.findByAddress(start_vo); // section that contains the data
assert(sec);
const auto sec_data = sec->get_data(); // data
const auto scoop_start_ptr = sec_data+(start_vo-sec->get_address()); // relevant start of data
const auto the_contents = string(scoop_start_ptr, end_vo-start_vo+1);
const auto permissions = // permissions and relro bit.
( sec->isReadable() << 2 ) |
( sec->isWriteable() << 1 ) |
( sec->isExecutable() << 0 ) ;
const auto is_relro = false;
// finally, create the new scoop
firp->addNewDataScoop( name, startaddr, endaddr, NULL, permissions, is_relro, the_contents);

Jason Hiser
committed
cout << "Added ctor/dtor scoop called " << name << " at " << hex << start_vo << "-" << end_vo << endl;

Jason Hiser
committed
};
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
const auto gnustyle = [&]() -> bool
{
assert(firp);
if(do_ctor_detection == cdt_NO) return false;
const auto is_pe = firp->getArchitecture()->getFileType() == adftPE ;
if(do_ctor_detection == cdt_PE32AUTO && !is_pe) return false;
// is either a PE file and we are in auto detect mode
// or ctor_detection is yes.
assert(is_pe || do_ctor_detection == cdt_YES);
auto find_ctor_start=[&](const exeio_section_t* sec, const VirtualOffset_t end_of_ctor) -> VirtualOffset_t
{
// values needed later for various things
const auto ptrsize = firp->getArchitectureBitWidth() / 8 ;
const auto sec_data = sec->get_data();
const auto sec_start = sec->get_address();
// check for a null terminator at the stated end of table
const auto null_term_addr=end_of_ctor-ptrsize;
const auto null_term_value =
ptrsize == 8 ? *(uint64_t*)(sec_data+null_term_addr-sec_start) :
ptrsize == 4 ? *(uint32_t*)(sec_data+null_term_addr-sec_start) :
throw invalid_argument("Unknown ptrsize");
// not found, return this isn't sane.
if(null_term_value!=0) return 0;
// now scan the table in reverse order for 1) valid entries, or 2) a -1 terminator.
auto next_addr=null_term_addr-ptrsize;
while(true)
{
// check for flowing
if(next_addr<sec_start) return 0;
// get the table entry
const auto ctor_entry_value =
ptrsize == 8 ? *(uint64_t*)(sec_data+next_addr-sec_start) :
ptrsize == 4 ? *(uint32_t*)(sec_data+next_addr-sec_start) :
throw invalid_argument("Unknown ptrsize");
// check for the -1 terminator
if(ptrsize == 8 && (int64_t)ctor_entry_value==int64_t(-1)) return next_addr;
if(ptrsize == 4 && (int32_t)ctor_entry_value==int32_t(-1)) return next_addr;
// check if the table entry isn't a valid address
const auto is_before_start = ctor_entry_value < sec->get_address() ;
const auto is_after_end = ctor_entry_value > (sec->get_address()+sec->get_size());
if(is_before_start || is_after_end) return 0;
next_addr -= ptrsize;
}
};
const auto text_sec = exeiop->sections[".text"];
if(text_sec == nullptr) return false;
const auto text_end_addr = text_sec->get_address()+text_sec->get_size();
const auto dtor_end = text_end_addr-1;
const auto dtor_start = find_ctor_start(text_sec,text_end_addr);
if(dtor_start==0) return false;
create_text_scoop(".dtor", dtor_start, dtor_end);
const auto ctor_end = dtor_start-1;
const auto ctor_start = find_ctor_start(text_sec,dtor_start);
if(ctor_start==0) return false;
create_text_scoop(".ctor", ctor_start, ctor_end);
return true;
};
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
const auto winstyle = [&]() -> bool
{
const auto reloc_sec = exeiop->sections[".reloc"];
if(reloc_sec == nullptr) return false;
const auto reloc_data = string(reloc_sec->get_data(), reloc_sec->get_size());
const auto peblissp = static_cast<pe_bliss::pe_base*>(exeiop->get_pebliss());
if(peblissp == nullptr) return false;
const auto pe_image_base = peblissp->get_image_base_64();
// for each block in the reloc section
using reloc_block_header_t =
struct
{
uint32_t page;
uint32_t block_size;
};
using reloc_block_t =
struct
{
uint16_t offset:12; // lower bits
uint16_t type:4; // higher bits than offset?
};
static_assert(sizeof(reloc_block_header_t)==8, "size of reloc_block_header is wrong");
static_assert(sizeof(reloc_block_t)==2, "size of reloc_block is wrong");
// start at block 0
auto current_offset = VirtualOffset_t(0);
while(true)
{
// stop if we fall off the end of the section.
if(current_offset + sizeof(reloc_block_header_t) > static_cast<size_t>(reloc_sec->get_size()))
break;
// read the header
auto header = reloc_block_header_t();
memcpy(&header, &reloc_data.c_str()[current_offset], sizeof(header));
// start counting the length after we've read the header.
auto current_length = size_t(sizeof(header));
// for each entry in the block
while(true)
{
// stop if we exceed the block
if(current_length + sizeof(reloc_block_t) > header.block_size)
break;
// stop if we exceed the section
if(current_offset + current_length + sizeof(reloc_block_t) > static_cast<size_t>(reloc_sec->get_size()))
break;
// read the block
auto block = reloc_block_t();
memcpy(&block, &reloc_data.c_str()[current_offset+current_length], sizeof(block));
current_length += sizeof(block);
// for now, just log
const auto rva = pe_image_base + header.page + block.offset;
cout << "reloc rva = " << hex << rva << " type = " << block.type << '\n';
switch(block.type)
{
case pe_bliss::pe_win::image_rel_based_absolute: /* ignored */
break;
case pe_bliss::pe_win::image_rel_based_dir64: /* dir64 */
{
const auto relocd_sec = exeiop->sections.findByAddress(rva);
if(!relocd_sec) throw runtime_error("Unexpected relocation address?");
const auto relocd_sec_name = relocd_sec->get_name();
if(relocd_sec_name != ".text") // no need to pin anything besides the text seg.
continue; // try next block
const auto new_name = "winstyle_scoop_"+to_string(rva);
create_text_scoop(new_name, rva, rva+8);
break;
}
default:
throw runtime_error("Cannot handle PE relocation.");
}
}
// update to the next block
current_offset += current_length;
}
return true;
};
const auto scoopify_data_dir_sections = [&]()
{
const auto peblissp = static_cast<pe_bliss::pe_base*>(exeiop->get_pebliss());
for(auto dir=0u; dir<15; dir++)
{
if(!peblissp->directory_exists(dir))
continue;
const auto dir_rva = peblissp->get_directory_rva (dir);
const auto dir_size = peblissp->get_directory_size(dir);
cout << "Directory " << dec << dir << " is at " << hex << dir_rva << " - " << dir_rva+dir_size << '\n';
}
};
// scoopify_data_dir_sections();
if(gnustyle()) return;
// if(winstyle()) return;
cerr << "Caution! Could not find any reloc handling for ctor/dtor and/or plts." << endl;

Jason Hiser
committed
}
void PopulateCFG::fill_in_landing_pads(FileIR_t *firp)
{
const auto eh_frame_rep_ptr = split_eh_frame_t::factory(firp);
if(getenv("EHIR_VERBOSE"))
eh_frame_rep_ptr->print();
cout<<"Completed eh-frame parsing"<<endl;
map<Function_t*,set<Instruction_t*> > insns_to_add_to_funcs;
for(const auto t : firp->getInstructions())
continue;
auto lp=eh_frame_rep_ptr->find_lp(t);
if(lp && lp->getFunction()==NULL)
insns_to_add_to_funcs[t->getFunction()].insert(lp);
};
for(const auto & p : insns_to_add_to_funcs)
{
auto & func=p.first;
auto insns=p.second; /* copy */
auto insn_count=0;
while(insns.size()>0 )
{
auto it=insns.begin();
auto insn=*it;
insns.erase(it);
assert(insn);
continue;
auto lp=eh_frame_rep_ptr->find_lp(insn);
insns.insert(lp);
insn->setFunction(func);
cout<<" Adding "<<insn->getBaseID()<<":"<<insn->getDisassembly()<<"@"<<hex<<insn->getAddress()->getVirtualOffset()<<dec<<endl;
insn_count++;
auto target=insn->getTarget();
auto fallthru=insn->getFallthrough();
if(target) insns.insert(target);
if(fallthru) insns.insert(fallthru);
}
cout<<"Found LP outside of function "<<func->getName()<<" added "<<insn_count<<" instructions"<<endl;
};
}
int PopulateCFG::parseArgs(const vector<string> step_args)
Matthew McGill
committed
Matthew McGill
committed
{
if (step_args[i]=="--fix-landing-pads")
Matthew McGill
committed
{
fix_landing_pads = true;
Matthew McGill
committed
}
else if (step_args[i]=="--no-fix-landing-pads")
Matthew McGill
committed
{
fix_landing_pads = false;
Matthew McGill
committed
}

Jason Hiser
committed
else if (step_args[i]=="--extra-scoop")
{
i++;
const auto extra_scoops_str=step_args[i];
stringstream ss(extra_scoops_str);
auto start_addr = VirtualOffset_t(0);
auto end_addr = VirtualOffset_t(0);
auto c = uint8_t(0);
ss>>hex>>start_addr>>c>>end_addr;
assert(c=='-');
extra_scoops.insert({start_addr,end_addr});
cout << "Recognizing request to add " << hex << start_addr << "-" << end_addr << " as a scoop."<<endl;
}
else if (step_args[i]=="--do-ctor-detection")
{
do_ctor_detection=cdt_YES;
}
else if (step_args[i]=="--no-ctor-detection")
{
do_ctor_detection=cdt_NO;
}
Matthew McGill
committed
}
cout<<"fix_landing_pads="<<fix_landing_pads<<endl;
Matthew McGill
committed