Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • opensrc/zipr
  • whh8b/zipr
2 results
Show changes
Commits on Source (17)
Showing
with 126 additions and 4789 deletions
......@@ -21,28 +21,42 @@ stages:
- ./cicd_testing/do-clean.sh
# per os items
do-nightly-clean-ubuntu18:
<<: *do-nightly-clean
tags:
- ubuntu18
variables:
OS: 'ubuntu18'
# per os items
do-nightly-clean-ubuntu16:
#do-nightly-clean-ubuntu18:
# <<: *do-nightly-clean
# tags:
# - ubuntu18
# variables:
# OS: 'ubuntu18'
#
## per os items
#do-nightly-clean-ubuntu16:
# <<: *do-nightly-clean
# tags:
# - ubuntu16
# variables:
# OS: 'ubuntu16'
#
## per os items
#do-nightly-clean-centos75:
# <<: *do-nightly-clean
# tags:
# - centos75
# variables:
# OS: 'centos75'
#
do-nightly-clean-arm32:
<<: *do-nightly-clean
tags:
- ubuntu16
- arm32
variables:
OS: 'ubuntu16'
OS: 'arm32'
# per os items
do-nightly-clean-centos75:
do-nightly-clean-arm64:
<<: *do-nightly-clean
tags:
- centos75
- arm64
variables:
OS: 'centos75'
OS: 'arm64'
#
......@@ -56,26 +70,40 @@ do-nightly-clean-centos75:
# per os items
do-build-ubuntu18:
<<: *do-build
tags:
- ubuntu18
variables:
OS: 'ubuntu18'
do-build-ubuntu16:
#do-build-ubuntu18:
# <<: *do-build
# tags:
# - ubuntu18
# variables:
# OS: 'ubuntu18'
#
#do-build-ubuntu16:
# <<: *do-build
# tags:
# - ubuntu16
# variables:
# OS: 'ubuntu16'
#
#do-build-centos75:
# <<: *do-build
# tags:
# - centos75
# variables:
# OS: 'centos75'
#
do-build-arm32:
<<: *do-build
tags:
- ubuntu16
- arm32
variables:
OS: 'ubuntu16'
OS: 'arm32'
do-build-centos75:
do-build-arm64:
<<: *do-build
tags:
- centos75
- arm64
variables:
OS: 'centos75'
OS: 'arm64'
#
......@@ -86,20 +114,20 @@ do-build-centos75:
script:
- ./cicd_testing/eh-tests.sh
basic_eh-tests-ubuntu18:
<<: *basic_eh-tests
tags:
- ubuntu18
basic_eh-tests-ubuntu16:
<<: *basic_eh-tests
tags:
- ubuntu16
basic_eh-tests-centos75:
<<: *basic_eh-tests
tags:
- centos75
#basic_eh-tests-ubuntu18:
# <<: *basic_eh-tests
# tags:
# - ubuntu18
#
#basic_eh-tests-ubuntu16:
# <<: *basic_eh-tests
# tags:
# - ubuntu16
#
#basic_eh-tests-centos75:
# <<: *basic_eh-tests
# tags:
# - centos75
#
# test_cmds.sh fixcalls
......@@ -109,64 +137,64 @@ basic_eh-tests-centos75:
script:
- ./cicd_testing/builtin-xforms.sh
builtin-xforms-ubuntu18:
<<: *builtin-xforms
tags:
- ubuntu18
builtin-xforms-ubuntu16:
<<: *builtin-xforms
tags:
- ubuntu16
builtin-xforms-centos75:
<<: *builtin-xforms
tags:
- centos75
variables:
OS: 'centos75'
#builtin-xforms-ubuntu18:
# <<: *builtin-xforms
# tags:
# - ubuntu18
#
#builtin-xforms-ubuntu16:
# <<: *builtin-xforms
# tags:
# - ubuntu16
#
#builtin-xforms-centos75:
# <<: *builtin-xforms
# tags:
# - centos75
# variables:
# OS: 'centos75'
#
# elfdep test
#
# template
.elfdep: &elfdep
stage: test
script:
- ./cicd_testing/elfdep.sh
elfdep-ubuntu18:
<<: *elfdep
tags:
- ubuntu18
variables:
OS: 'ubuntu18'
elfdep-ubuntu16:
<<: *elfdep
tags:
- ubuntu16
variables:
OS: 'ubuntu16'
elfdep-centos75:
<<: *elfdep
tags:
- centos75
variables:
OS: 'centos75'
#.elfdep: &elfdep
# stage: test
# script:
# - ./cicd_testing/elfdep.sh
#
#elfdep-ubuntu18:
# <<: *elfdep
# tags:
# - ubuntu18
# variables:
# OS: 'ubuntu18'
#
#elfdep-ubuntu16:
# <<: *elfdep
# tags:
# - ubuntu16
# variables:
# OS: 'ubuntu16'
#
#elfdep-centos75:
# <<: *elfdep
# tags:
# - centos75
# variables:
# OS: 'centos75'
#
# deploy a docker image
#
deploy-u18:
stage: deploy
script:
- ./cicd_testing/deploy.sh
tags:
- ubuntu18
variables:
OS: 'ubuntu18'
#deploy-u18:
# stage: deploy
# script:
# - ./cicd_testing/deploy.sh
# tags:
# - ubuntu18
# variables:
# OS: 'ubuntu18'
......@@ -53,8 +53,6 @@ Export('env')
# list of zipr plugins and irdb xforms to build
transformDirs='''
builtin_xforms/add_lib
builtin_xforms/move_globals
builtin_xforms/p1transform
builtin_xforms/resolve_callbacks
zipr_push64_reloc_plugin
zipr
......
Subproject commit 73b402e2b8ad059acc19cbdd37c41a3ea30097f4
Subproject commit 3539f5fc632060d890c77b2b294f3edfc4691a2e
......@@ -6,7 +6,12 @@ if [[ "$*" =~ "--debug" ]]; then
SCONSDEBUG=" debug=1 "
fi
scons $SCONSDEBUG -j 3
if [[ $(uname -m) == 'armv7l' ]] || [[ $(uname -m) == 'aarch64' ]]; then
scons $SCONSDEBUG
else
scons $SCONSDEBUG -j 3
fi
exit
......
import os
Import('env')
# import and create a copy of the environment so we don't screw up anyone elses env.
myenv=env.Clone()
cpppath='''
$SECURITY_TRANSFORMS_HOME/third_party/elfio-code
$PEASOUP_HOME/irdb-libs/libEXEIO/include
$IRDB_SDK/include
'''
files=Glob( Dir('.').srcnode().abspath+"/*.cpp")
pgm="move_globals.so"
LIBPATH="$SECURITY_TRANSFORMS_HOME/lib"
LIBS=Split("irdb-core irdb-cfg irdb-util irdb-transform irdb-deep StructDiv EXEIO ")
myenv.Append(CPPPATH=Split(cpppath))
pgm=myenv.SharedLibrary(pgm, files, LIBPATH=LIBPATH, LIBS=LIBS)
install=myenv.Install("$SECURITY_TRANSFORMS_HOME/plugins_install/", pgm)
Default(install)
Return('install')
env=Environment()
Export('env')
lib=SConscript("SConscript")
#include "mg.hpp"
#include <assert.h>
#include <stdexcept>
#include <unistd.h>
#include <memory>
#include <inttypes.h>
#include <algorithm>
#include <elf.h>
#include <cctype>
#include <iomanip>
#include <cstdlib>
#include <random>
using namespace std;
using namespace IRDB_SDK;
using namespace EXEIO;
#define ALLOF(s) begin(s), end(s)
// use this to determine whether a scoop has a given name.
static struct ScoopFinder : binary_function<DataScoop_t*,string,bool>
{
// declare a simple scoop finder function that finds scoops by name
bool operator()(const DataScoop_t* scoop, const string word) const
{
return (scoop->getName() == word);
};
} finder;
template<class S, class T> inline
static bool contains(const S &container, const T& value)
{
return find(container.begin(), container.end(), value) != container.end();
}
static bool arg_has_memory(const DecodedOperand_t &arg)
{
/* if it's relative memory, watch out! */
if(arg.isMemory())
return true;
return false;
}
static bool arg_has_relative(const DecodedOperand_t &arg)
{
/* if it's relative memory, watch out! */
if(arg.isMemory() && arg.isPcrel())
return true;
return false;
}
static DecodedOperandVector_t::iterator find_memory_operand(DecodedOperandVector_t &operands)
{
// const auto operands=disasm.getOperands();
auto the_arg=operands.end();
if(operands.size()>0 && arg_has_memory(*operands[0]))
the_arg=next(operands.begin(),0);
if(operands.size()>1 && arg_has_memory(*operands[1]))
the_arg=next(operands.begin(),1);
if(operands.size()>2 && arg_has_memory(*operands[2]))
the_arg=next(operands.begin(),2);
if(operands.size()>3 && arg_has_memory(*operands[3]))
the_arg=next(operands.begin(),3);
return the_arg;
}
template< typename T >
static std::string to_hex_string( T i )
{
std::stringstream stream;
stream << "0x"
<< std::hex << i;
return stream.str();
}
template <class T_Sym, class T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::MoveGlobals_t(
VariantID_t *p_variantID,
FileIR_t *p_variantIR,
const string &p_dont_move,
const string &p_move_only,
const int p_max_mov,
const bool p_random,
const bool p_aggressive,
const bool p_use_stars)
:
Transform_t(p_variantIR),
exe_reader(NULL),
tied_unpinned(0),
tied_pinned(0),
tied_nochange(0),
ties_for_folded_constants(0),
dont_move(p_dont_move),
move_only(p_move_only),
max_moveables(p_max_mov),
random(p_random),
aggressive(p_aggressive),
m_use_stars(p_use_stars)
{
}
#if 0
template <class T_Sym, class T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
MEDS_Annotations_t& MoveGlobals_t<T_Sym, T_Rela, T_Rel, T_Dyn, T_Extractor>::getAnnotations()
{
assert(m_use_stars);
return m_annotationParser->getAnnotations();
}
#endif
template <class T_Sym, class T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
int MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::execute(pqxxDB_t &pqxx_interface)
{
// read the executeable file
/*
int elfoid = this->getFileIR()->getFile()->getELFOID(); // from Transform base class
pqxx::largeobject lo(elfoid);
lo.to_file(pqxx_interface.getTransaction(),"readeh_tmp_file.exe");
*/
// load the executable.
this->exe_reader = new EXEIO::exeio;
assert(this->exe_reader);
this->exe_reader->load((char*)"a.ncexe");
#if 0
STARS::IRDB_Interface_t STARS_analysis_engine(pqxx_interface);
STARS_analysis_engine.GetSTARSOptions().SetDeepLoopAnalyses(true);
STARS_analysis_engine.GetSTARSOptions().SetConstantPropagation(true);
if(m_use_stars)
{
STARS_analysis_engine.do_STARS(this->getFileIR());
this->m_annotationParser = &STARS_analysis_engine.getAnnotations();
assert(getenv("SELF_VALIDATE")==nullptr || getAnnotations().size() > 15);
}
cout << "move_globals execute(): enter" << endl;
const auto annot_size = m_use_stars ? (size_t)this->getAnnotations().size() : (size_t)0 ;
cout << "size of annotation set: " << annot_size << endl;
#endif
if(m_use_stars)
{
auto deep_analysis=DeepAnalysis_t::factory(getFileIR(), aeSTARS, {"SetDeepLoopAnalyses=true", "SetConstantPropagation=true"});
deep_global_static_ranges = deep_analysis -> getStaticGlobalRanges();
sentinels = deep_analysis -> getRangeSentinels();
cout<<dec;
cout<<"#ATTRIBUTE "<<deep_global_static_ranges->size() <<" num_global_static_range_annotations" <<endl;
cout<<"#ATTRIBUTE "<<sentinels->size() <<" num_sentinel_annotations" <<endl;
}
this->ParseSyms(exe_reader);
this->SetupScoopMap();
this->FilterScoops();
this->TieScoops();
this->FindInstructionReferences(); // may record some scoops are tied together
this->FindDataReferences();
this->FilterAndCoalesceTiedScoops();
this->UpdateScoopLocations();
this->PrintStats();
return 0;
}
// go through the .symtab and .dynsym bits of the table and make scoops for each symbol.
template <class T_Sym, class T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::SetupScoopMap()
{
for(auto &s : getFileIR()->getDataScoops())
{
RangePair_t p(s->getStart()->getVirtualOffset(), s->getEnd()->getVirtualOffset());
scoop_map[p]=s;
}
}
template <class T_Sym, class T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
DataScoop_t* MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::findScoopByAddress(const IRDB_SDK::VirtualOffset_t a) const
{
RangePair_t p(a,a);
auto smit=scoop_map.find(p);
if(smit==scoop_map.end())
return NULL;
return smit->second;
}
template <class T_Sym, class T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
bool MoveGlobals_t<T_Sym, T_Rela, T_Rel, T_Dyn, T_Extractor>::AreScoopsAdjacent(const DataScoop_t *a, const DataScoop_t *b) const
{
bool adjacent = true;
const IRDB_SDK::VirtualOffset_t aStart = a->getStart()->getVirtualOffset();
const IRDB_SDK::VirtualOffset_t aEnd = a->getEnd()->getVirtualOffset();
const IRDB_SDK::VirtualOffset_t bStart = b->getStart()->getVirtualOffset();
const IRDB_SDK::VirtualOffset_t bEnd = b->getEnd()->getVirtualOffset();
IRDB_SDK::VirtualOffset_t FirstEnd, SecondStart;
if (aStart > bStart)
{
FirstEnd = bEnd;
SecondStart = aStart;
}
else
{
FirstEnd = aEnd;
SecondStart = bStart;
}
for (IRDB_SDK::VirtualOffset_t i = FirstEnd + 1; adjacent && (i < SecondStart); ++i)
{
DataScoop_t *c = this->findScoopByAddress(i);
if (c)
{
adjacent = false; // found intervening scoop before SecondStart
}
}
return adjacent;
} // end of AreScoopsAdjacent()
// go through the .symtab and .dynsym bits of the table and make scoops for each symbol.
template <class T_Sym, class T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::ParseSyms(EXEIO::exeio * readerp)
{
auto max_id=getFileIR()->getMaxBaseID();
if(getenv("MG_VERBOSE"))
cout<<"Initial scoops:"<<endl;
for(const auto &scoop : getFileIR()->getDataScoops())
{
if(getenv("MG_VERBOSE"))
{
cout<<"scoop: "<<scoop->getName()<<" ("<<hex<<scoop->getStart()->getVirtualOffset()
<<"-"<<scoop->getEnd()->getVirtualOffset()<<")"<<endl;
}
const auto moveable_sections=set<string>({
".interp",
".note.ABI-tag",
".note.gnu.build-id",
".gnu.hash",
".dynsym",
".dynstr",
".gnu.version",
".gnu.version_r",
".rel.dyn",
".rel.plt",
".rela.dyn",
".rela.plt",
".init_array",
".fini_array",
".jcr",
".dynamic",
".got",
".got.plt"
});
// white list some scoops as moveable, despite the symbol table
if(moveable_sections.find(scoop->getName())!=moveable_sections.end())
{
cout<<"Register scoop "<<scoop->getName()<<" as movable"<<endl;
moveable_scoops.insert(scoop);
}
}
assert(readerp);
auto elfiop=reinterpret_cast<ELFIO::elfio*>(readerp->get_elfio());
assert(elfiop);
auto &reader=*elfiop;
auto splits=0u;
// for each section in the elf file.
auto n = (Elf_Half) reader.sections.size();
for ( auto i = (Elf_Half ) 0; i < n; ++i )
{
// For all sections
auto sec = reader.sections[i];
char* max_splits = getenv("MG_MAX_SPLITS");
// if it's a symtab section
if ( SHT_SYMTAB == sec->get_type() || SHT_DYNSYM == sec->get_type() )
{
auto symbols = ELFIO::symbol_section_accessor ( reader, sec );
// for each symbol in the section
auto sym_no = symbols.get_symbols_num();
for (auto i = (decltype(sym_no))0; i < sym_no; ++i )
{
// check to see if we've been directed to not split everything up.
if (max_splits && (splits >= strtoul(max_splits, NULL, 0)))
break;
auto name=std::string();
auto value=(Elf64_Addr)0; // note: elf64_addr OK for 32-bit machines still.
auto size=(Elf_Xword)0;
auto bind=(unsigned char)0;
auto type=(unsigned char)0;
auto section=(Elf_Half)0;
auto other=(unsigned char)0;
// elfio always takes a value of type Elf64-Addr regardless of mach type.
symbols.get_symbol( i, name, value, size, bind, type, section, other );
// if it's a symbol that describes an object (as opposed to a binding, or a function or a ...)
if(type==STT_OBJECT && (bind==STB_LOCAL || bind==STB_GLOBAL) && value!=0 && size!=0)
{
auto tosplit=getFileIR()->findScoop(value);
// something went wrong if we can't find the scoop for this object.
if(tosplit==NULL) continue;
cout << "Section: "<<sec->get_name() << " name="<< name << " size="
<<hex<<size<< " addr="<<hex<<value<<" scoop: "<<tosplit->getName()<<endl;
auto before=(DataScoop_t*)NULL, containing=(DataScoop_t*)NULL, after=(DataScoop_t*)NULL;
if(getenv("MG_VERBOSE"))
{
cout<<"\ttosplit: "<<hex<<tosplit->getStart()->getVirtualOffset()<<"-"
<<tosplit->getEnd()->getVirtualOffset();
}
if(value+size-1 > tosplit->getEnd()->getVirtualOffset())
{
cout<<"Skipping symbol "<<name<<" due to an object that's already split?"<<endl;
cout<<"Start (but not end) of "<<name<<" is in in object " <<
tosplit->getName()<<":("<<hex<<tosplit->getStart()->getVirtualOffset()<<"-" <<
tosplit->getEnd()->getVirtualOffset()<<")"<<endl;;
continue; // try next symbol
}
if(moveable_scoops.find(tosplit)!=end(moveable_scoops))
{
cout<<"Avoiding resplit of "<<name<<" due to an object that's already split?"<<endl;
// don't re-split something that's arlready moveable.
continue;
}
getFileIR()->splitScoop(tosplit, value, size, before,containing,after,&max_id);
if(getenv("MG_VERBOSE"))
{
if(before)
{
cout<<"\tBefore: "<<hex<<before->getStart()->getVirtualOffset()
<<"-"<<before->getEnd()->getVirtualOffset();
}
cout<<"\tContaining: "<<hex<<containing->getStart()->getVirtualOffset()
<<"-"<<containing->getEnd()->getVirtualOffset();
if(after)
{
cout<<"\tAfter: "<<hex<<after->getStart()->getVirtualOffset()
<<"-"<<after->getEnd()->getVirtualOffset();
}
cout<<endl;
}
assert(containing);
containing->setName(name);
moveable_scoops.insert(containing);
splits++;
}
}
cout << std::endl;
}
}
// guarantee unique scoop names
auto scoop_names=set<string>();
for(auto & s : getFileIR()->getDataScoops())
{
while(scoop_names.find(s->getName())!=scoop_names.end())
{
cout<<"Rename scoop because of name conflict: "<<s->getName()<<" --> ";
s->setName(s->getName()+"-renamed");
cout<<s->getName()<<endl;
}
scoop_names.insert(s->getName());
}
cout<<"# ATTRIBUTE Non-Overlapping_Globals::data_scoop_splits_performed="<<dec<<splits<<endl;
}
template <class T_Sym, class T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::FilterScoops()
{
// filter using the move_only option
DataScoopSet_t move_only_scoops;
// for each word in move_only
istringstream mo_ss(move_only);
for_each(istream_iterator<string>(mo_ss),
istream_iterator<string>(), [&](const string & word)
{
// find the scoop
auto it=find_if(ALLOF(moveable_scoops), bind2nd(finder, word));
// if found, insert into the move_only set.
if(it!=moveable_scoops.end())
move_only_scoops.insert(*it);
});
// update the moveable_scoops based on the move_only set.
if(move_only != "" )
{
moveable_scoops.clear();
moveable_scoops.insert(ALLOF(move_only_scoops));
if(getenv("MG_VERBOSE"))
{
cout<<"Moveable Scoops after move_only filter:"<<endl;
for(auto &s : moveable_scoops)
cout<<s->getName()<<endl;
cout<<endl;
}
}
// filter based on the dont_move option
// for each word in dont_move
istringstream dm_ss(dont_move);
for_each(istream_iterator<string>(dm_ss),
istream_iterator<string>(), [&](const string & word)
{
// find scoop by that name.
auto it=find_if(ALLOF(moveable_scoops), bind2nd(finder,word));
if(it!=moveable_scoops.end())
{
moveable_scoops.erase(*it);
}
});
if(dont_move!="")
{
if(getenv("MG_VERBOSE"))
{
cout<<"Moveable Scoops after dont_move filter:"<<endl;
for(auto &s : moveable_scoops)
cout<<s->getName()<<endl;
cout<<endl;
}
}
if(max_moveables>0)
{
mt19937 generator(time(0));
uniform_real_distribution<double> distribution(0.0,1.0);
while(moveable_scoops.size() > (unsigned)max_moveables)
{
if (random == true)
{
double rand_num = distribution(generator);
int rand_idx = (int) (rand_num * moveable_scoops.size());
auto it = moveable_scoops.begin();
advance(it, rand_idx);
moveable_scoops.erase(it);
}
else moveable_scoops.erase(prev(moveable_scoops.end()));
}
}
}
template <class T_Sym, class T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::TieScoops()
{
struct scoop_pairs_t
{
string first, second;
}scoop_pairs[] = {
{ ".rel.dyn", ".rel.plt" }, // the dynamic linker goes through both sections together when LD_BIND_NOW is set.
{ ".rela.dyn", ".rela.plt" }
// can't tie .got and .got.plt because of relro differences.
// can make insanity happen.
// { ".got", ".got.plt" }
};
for_each(ALLOF(scoop_pairs), [this](const scoop_pairs_t pair)
{
auto it1=find_if(ALLOF(moveable_scoops), bind2nd(finder,pair.first));
auto it2=find_if(ALLOF(moveable_scoops), bind2nd(finder,pair.second));
// both exist, tie together.
if(it1!=moveable_scoops.end() && it2!=moveable_scoops.end())
tied_scoops.insert(ScoopPair_t(*it1,*it2));
// first exists, rename for easier management later.
else if(it1!=moveable_scoops.end() && it2==moveable_scoops.end())
(*it1)->setName(pair.first+" coalesced w/"+ pair.second);
// second exists, rename for easier management later.
else if(it1==moveable_scoops.end() && it2!=moveable_scoops.end())
(*it2)->setName(pair.first+" coalesced w/"+ pair.second);
// or, none exists at all.
});
}
template <class T_Sym, class T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::HandleMemoryOperand(DecodedInstruction_t& disasm, const DecodedOperandVector_t::iterator the_arg, Instruction_t* insn, const DecodedOperandVector_t &the_arg_container)
{
// no mem arg.
if(the_arg==the_arg_container.end())
{
if(getenv("MG_VERBOSE"))
{
cout << "Note: "<<hex<<" no memory op in:";
cout << insn->getBaseID()<<":"<<disasm.getDisassembly();
cout << endl;
}
return;
}
// shared objects don't need this, you have to use a pcrel addressing mode.
if(!arg_has_relative(**the_arg) && exe_reader->isDLL())
{
if(getenv("MG_VERBOSE"))
{
cout << "Note: "<<hex<<" no dll-style address in:";
cout << insn->getBaseID()<<":"<<disasm.getDisassembly();
cout << endl;
}
return;
}
const auto small_memory_threshold= exe_reader->isDLL() ? 10 : 4096*10;
auto to1 = (DataScoop_t*) NULL;
// examine the memory operation to see if there's a pc-rel
if ((*the_arg)->isMemory() &&
(*the_arg)->hasMemoryDisplacement() &&
(*the_arg)->getMemoryDisplacementEncodingSize() == 4
)
{
auto rel_addr1 = (VirtualOffset_t)(*the_arg)->getMemoryDisplacement() /*Memory.Displacement*/;
if (arg_has_relative(*(*the_arg)))
rel_addr1 += insn->getDataBits().size();
to1 = DetectProperScoop(disasm, the_arg, insn, rel_addr1, false, the_arg_container);
auto disp_offset = disasm.getMemoryDisplacementOffset(the_arg->get(),insn); // the_arg->Memory.DisplacementAddr-disasm.EIP;
auto disp_size = (*the_arg)->getMemoryDisplacementEncodingSize(); // the_arg->Memory.DisplacementSize;
assert((0 < disp_offset) && (disp_offset <= (insn->getDataBits().size() - disp_size)));
// skip if not found, executable, or not moveable.
if (to1 && (to1->isExecuteable() || moveable_scoops.find(to1) == moveable_scoops.end()))
{
// do nothing, no log or action is necessary for pointers to code.
if(getenv("MG_VERBOSE"))
{
cout<<"Skipping (scoop exists, but exe scoop, or not moveable scoop) pcrel mem op in insn: "
<< hex << insn->getBaseID()<<":"<<disasm.getDisassembly()<<" to "
<< to1->getName()<<" ("
<<hex<<to1->getStart()->getVirtualOffset()<<"-"
<<hex<<to1->getEnd()->getVirtualOffset()<<")"<<endl;
}
}
else if(to1)
{
// look for any pcrel relative relocs from fix_calls
Relocation_t* pcrel_reloc=FindRelocationWithType(insn,"pcrel");
if(pcrel_reloc)
{
if(getenv("MG_VERBOSE"))
{
cout<<"Setting pcrel mem op in insn: "
<< hex <<insn->getBaseID()<<":"<<disasm.getDisassembly()<<" to "
<< to1->getName()<<" ("
<<hex<<to1->getStart()->getVirtualOffset()<<"-"
<<hex<<to1->getEnd()->getVirtualOffset()<<")"<<endl;
}
//ApplyPcrelMemoryRelocation(insn,to1);
pcrel_refs_to_scoops.insert({insn,to1});
}
else
{
if(getenv("MG_VERBOSE"))
{
cout<<"Absolute mem-op to scoop in insn: "
<< hex << insn->getBaseID()<<":"<<disasm.getDisassembly()<<" to "
<< to1->getName()<<" ("
<<hex<<to1->getStart()->getVirtualOffset()<<"-"
<<hex<<to1->getEnd()->getVirtualOffset()<<")"<<endl;
}
//ApplyAbsoluteMemoryRelocation(insn,to1);
absolute_refs_to_scoops.insert({insn,to1});
}
}
else if ( -small_memory_threshold < (int)rel_addr1 && (int)rel_addr1 < small_memory_threshold )
{
if((0 != rel_addr1) && getenv("MG_VERBOSE"))
{
cout << "Note: "<<hex<<rel_addr1<<" not declared address in (low addr thresh) :";
cout << insn->getBaseID()<<":"<<disasm.getDisassembly();
cout << endl;
}
}
else
{
if ((0 != rel_addr1) && getenv("MG_VERBOSE"))
{
cout << "Note: "<<hex<<rel_addr1<<" not declared address in (no scoop):";
cout << insn->getBaseID()<<":"<<disasm.getDisassembly();
cout << endl;
}
}
}
else
{
if(getenv("MG_VERBOSE"))
{
cout << "Note: "<<hex<<" no address in:";
cout << insn->getBaseID()<<":"<<disasm.getDisassembly();
cout << endl;
}
}
}
template <class T_Sym, class T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::ApplyPcrelMemoryRelocation(Instruction_t* insn, DataScoop_t* to)
{
const auto disasmp=DecodedInstruction_t::factory(insn);
const auto &disasm=*disasmp;
auto operands=disasm.getOperands();
#if 1
// don't change instructions that reference re-pinned scoops.
// This was necessary because we were not getting the zipr_unpin_plugin
// to undo our changes to the instruction in the case of a re-pinned scoop.
// That problem is fixed, but it is more efficient and safer to
// avoid editing instructions that reference re-pinned scoops.
if (this->moveable_scoops.find(to) == this->moveable_scoops.cend()) {
if (getenv("MG_VERBOSE")) {
cout << "Avoiding editing of insn at " << hex << insn->getBaseID() << " after repinning scoop "
<< to->getName() << endl;
}
return;
}
#endif
auto the_arg=find_memory_operand(operands);
assert(the_arg!=operands.end());
unsigned int disp_offset=disasm.getMemoryDisplacementOffset(the_arg->get(),insn)/*the_arg->Memory.DisplacementAddr-disasm.EIP*/;
unsigned int disp_size=(*the_arg)->getMemoryDisplacementEncodingSize() /*the_arg->Memory.DisplacementSize*/;
Relocation_t* pcrel_reloc=FindRelocationWithType(insn,"pcrel");
pcrel_reloc->setWRT(to);
// note about this case: the pcrel reloc already exists for the
// case where an instruction is moving.
// now the relocs WRT field indicates that the target might move too.
// will have to edit push_relocs.zpi to handle this.
assert(0<disp_offset && disp_offset<=(insn->getDataBits().size() - disp_size));
assert(disp_size==4);
unsigned int new_disp=(*the_arg)->getMemoryDisplacement() /*the_arg->Memory.Displacement*/ - to->getStart()->getVirtualOffset();
insn->setDataBits(insn->getDataBits().replace(disp_offset, disp_size, (char*)&new_disp, disp_size));
}
template <class T_Sym, class T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::ApplyAbsoluteMemoryRelocation(Instruction_t* insn, DataScoop_t* to)
{
//DISASM disasm;
//Disassemble(insn,disasm);
const auto disasmp=DecodedInstruction_t::factory(insn);
const auto &disasm=*disasmp;
auto operands=disasm.getOperands();
#if 1
// don't change instructions that reference re-pinned scoops.
// This was necessary because we were not getting the zipr_unpin_plugin
// to undo our changes to the instruction in the case of a re-pinned scoop.
// That problem is fixed, but it is more efficient and safer to
// avoid editing instructions that reference re-pinned scoops.
if (this->moveable_scoops.find(to) == this->moveable_scoops.cend()) {
if (getenv("MG_VERBOSE")) {
cout << "Avoiding editing of insn at " << hex << insn->getBaseID() << " after repinning scoop "
<< to->getName() << endl;
}
return;
}
#endif
auto the_arg = find_memory_operand(operands);
unsigned int disp_offset=disasm.getMemoryDisplacementOffset(the_arg->get(),insn) /*the_arg->Memory.DisplacementAddr-disasm.EIP*/;
unsigned int disp_size=(*the_arg)->getMemoryDisplacementEncodingSize() /*the_arg->Memory.DisplacementSize*/;
assert(0<disp_offset && disp_offset<=insn->getDataBits().size() - disp_size);
auto reloc=getFileIR()->addNewRelocation(insn,0, "absoluteptr_to_scoop",to);
(void)reloc; // just giving to the ir
assert(0<disp_offset && disp_offset<=(insn->getDataBits().size() - disp_size));
assert(disp_size==4);
unsigned int new_disp=(*the_arg)->getMemoryDisplacement() /*the_arg->Memory.Displacement*/ - to->getStart()->getVirtualOffset();
insn->setDataBits(insn->getDataBits().replace(disp_offset, disp_size, (char*)&new_disp, disp_size));
}
// See if STARS analyzed the instruction and determined which scoop it references.
template <class T_Sym, class T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
DataScoop_t* MoveGlobals_t<T_Sym, T_Rela, T_Rel, T_Dyn, T_Extractor>::DetectAnnotationScoop(Instruction_t* insn)
{
if (!m_use_stars)
return nullptr;
const auto dgsr_it = deep_global_static_ranges->find(insn);
const auto dgsr_found = dgsr_it != deep_global_static_ranges->end();
const auto sentinel_it = sentinels->find(insn);
const auto is_sentinel = sentinel_it != sentinels->end();
auto ReferencedScoop = (DataScoop_t*)nullptr;
if(dgsr_found && is_sentinel)
{
const auto StartAddr = dgsr_it->second;
ReferencedScoop = findScoopByAddress(StartAddr);
}
return ReferencedScoop;
} // end of DetectAnnotationScoop()
template <class T_Sym, class T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
DataScoop_t* MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::DetectProperScoop(const DecodedInstruction_t& disasm, const DecodedOperandVector_t::iterator the_arg, Instruction_t* insn, VirtualOffset_t insn_addr, bool immed, const DecodedOperandVector_t &the_arg_container)
{
assert(insn);
assert(immed || (the_arg != the_arg_container.end())); // immeds don't need an argument, but memory ops do.
if (immed && (0 == insn_addr))
return NULL; // immed value of zero is not a scoop address
const int small_memory_threshold = exe_reader->isDLL() ? 10 : 4096 * 10;
bool ValidImmed = immed && (small_memory_threshold <= ((int)insn_addr));
DataScoop_t *ret = this->findScoopByAddress(insn_addr);
// so far, we haven't run into any problems with not finding a scoop. we could later.
if (!ret)
{
// check for things that _just_ run off the end of a scoop.
for (auto i = 0; (i < 8) && (ret == NULL); i++)
ret = findScoopByAddress(insn_addr - i);
// check for things that just miss the beginning of a scoop
for (auto i = 0; (i < 8) && (ret == NULL); i++)
ret = findScoopByAddress(insn_addr + i);
}
// See if STARS analyzed the instruction and determined which scoop it references.
DataScoop_t *retSTARS = (immed && (!ValidImmed)) ? nullptr : this->DetectAnnotationScoop(insn);
#if 1
if (!ret)
{
if (nullptr != retSTARS)
{
cout << "Detected proper scoop using annotation, not using after DetectProperScoop failure for insn at " << hex << insn->getBaseID() << endl;
}
return ret;
}
#endif
/* check to see if it's an elftable */
if (find(ALLOF(elftable_nocodeptr_names), ret->getName()) != elftable_nocodeptr_names.end())
{
/* it's an elftable, so we don't need to look so hard because */
/* we probably aren't pointing to an elf table from an instruction */
/* find middle of table */
auto mid_of_table = (ret->getStart()->getVirtualOffset() / 2) + (ret->getEnd()->getVirtualOffset() / 2);
/* look forward if above middle, else look backwards */
const auto op = (insn_addr < mid_of_table)
? [](const VirtualOffset_t i, const VirtualOffset_t j) { return i - j; }
: [](const VirtualOffset_t i, const VirtualOffset_t j) { return i + j; }
;
/* start at begin/end of table depending on direction */
const auto addr = (insn_addr < mid_of_table)
? ret->getStart()->getVirtualOffset()
: ret->getEnd()->getVirtualOffset()
;
/* scan 128 bytes looking for a relevant scoop */
const auto thres = 128;
for (auto i = 1; i < thres; i++)
{
/* check what's here */
auto candidate = findScoopByAddress(op(addr, i));
if (candidate != NULL)
return candidate;
}
/* didn't find anything */
} /* if elftable */
/* Not an elf table use conservative and/or aggressive heuristics*/
ret = DetectProperScoop_ConsiderEndOfPrev(disasm, the_arg, insn, insn_addr, immed, ret, the_arg_container);
if (!aggressive)
ret = DetectProperScoop_ConsiderStartOfNext(disasm, the_arg, insn, insn_addr, immed, ret, the_arg_container);
if (nullptr != retSTARS)
{
if (nullptr == ret)
{
// ret = retSTARS; // Dangerous to use; e.g. mov [rdi+0x200],rax will cause edit of 0x200 because RDI was resolved by STARS to a scoop address
cout << "Detected proper scoop using annotation, not using after DetectProperScoop final failure for insn at " << hex << insn->getBaseID() << endl;
}
else if (retSTARS != ret)
{
// We have two different non-null choices. We will tie the two scoops
// together if they are adjacent, and pin them both otherwise.
if (this->AreScoopsAdjacent(ret, retSTARS)) // tie adjacent scoops
{
cout << "Tieing adjacent scoops due to STARS vs. DetectProperScoop conflict for insn at " << hex << insn->getBaseID() << endl;
if (ret->getStart()->getVirtualOffset() < retSTARS->getStart()->getVirtualOffset()) {
ScoopPair_t TiedPair(ret, retSTARS);
(void) this->tied_scoops.insert(TiedPair);
}
else {
ScoopPair_t TiedPair(retSTARS, ret);
(void) this->tied_scoops.insert(TiedPair);
}
}
else // not adjacent; must pin
{
cout << "Pinning non-adjacent scoops due to STARS vs. DetectProperScoop conflict for insn at " << hex << insn->getBaseID() << endl;
(void) this->moveable_scoops.erase(ret);
(void) this->moveable_scoops.erase(retSTARS);
}
}
}
return ret;
} // end of DetectProperScoop()
template <class T_Sym, class T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
DataScoop_t* MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::DetectProperScoop_ConsiderStartOfNext(
const DecodedInstruction_t& disasm,
const DecodedOperandVector_t::iterator mem_arg,
Instruction_t* insn,
VirtualOffset_t insn_addr,
bool immed,
DataScoop_t* candidate_scoop,
const DecodedOperandVector_t &mem_arg_container
)
{
assert(immed || mem_arg!=mem_arg_container.end()); // immeds don't need an argument, but memory ops do.
const auto is_lea=disasm.getMnemonic() /*string(disasm.Instruction.Mnemonic)*/==string("lea");
const auto consider_multiple_sizes= is_lea || immed;
auto strides= consider_multiple_sizes ? set<int>({1,2,4,8}) : set<int>({ (int)(*mem_arg)->getArgumentSizeInBytes() /*ArgSize/8*/});
// get other strides from the containing function
if(insn->getFunction())
for_each(ALLOF(insn->getFunction()->getInstructions()), [&strides](Instruction_t* insn)
{
//auto d=DISASM({});
//Disassemble(insn,d);
const auto dp=DecodedInstruction_t::factory(insn);
const auto &d=*dp;
auto potential_stride=0;
// if( string(d.Instruction.Mnemonic)=="add " || string(d.Instruction.Mnemonic)=="sub " )
if( d.getMnemonic()=="add" || d.getMnemonic()=="sub")
{
potential_stride=d.getImmediate(); //.Instruction.Immediat;
}
//if(string(d.Instruction.Mnemonic)=="lea ")
if(d.getMnemonic()=="lea")
{
potential_stride=d.getOperand(1)->getMemoryDisplacement(); /*d.Argument2.Memory.Displacement; */
}
if(abs(potential_stride)<500 && potential_stride!=0)
{
strides.insert(potential_stride);
strides.insert(-potential_stride);
}
});
const auto stride_multipliers= set<int>({-1,1});
//const auto NO_REG=0;
const auto contains_base_reg = mem_arg!=mem_arg_container.end() && (*mem_arg)->hasBaseRegister(); // mem_arg ? mem_arg->Memory.BaseRegister != NO_REG : false;
const auto contains_index_reg = mem_arg!=mem_arg_container.end() && (*mem_arg)->hasIndexRegister(); // mem_arg ? mem_arg->Memory.IndexRegister != NO_REG : false;
const auto contains_reg = contains_base_reg || contains_index_reg;
const auto memory_access= mem_arg!=mem_arg_container.end() && !is_lea;
const auto is_direct_memory_access=memory_access && !contains_reg;
// check for a direct memory access
if(is_direct_memory_access)
{
return candidate_scoop;
}
// calculate each offset=stride*multiplier pair
auto candidate_offsets=set<int>();
for_each(ALLOF(strides), [&](const int stride)
{
for_each(ALLOF(stride_multipliers), [&](const int multiplier)
{
candidate_offsets.insert(stride*multiplier);
});
});
// how to tie two scoops
auto insert_scoop_pair=[&](DataScoop_t* a, DataScoop_t* b, int i, int offset)
{
const auto tied_scoop_pair = ScoopPair_t(a,b) ;
assert(tied_scoop_pair.first->getEnd()->getVirtualOffset()+1 == tied_scoop_pair.second->getStart()->getVirtualOffset());
tied_scoops.insert(tied_scoop_pair);
cout<<" Tieing scoops "<<tied_scoop_pair.first->getName()<<" and "<<tied_scoop_pair.second->getName()<<" for i="<<dec<<i<<" offset="<<offset<<endl;
ties_for_folded_constants++;
};
// how to decide if a scoop at offset i should be tied.
// no scoop -> no tie
// un-tie-able scoop -> no tie
// else tie
auto should_tie=[&](const int i, DataScoop_t* prev_scoop) -> DataScoop_t*
{
DataScoop_t *this_scoop=findScoopByAddress(insn_addr+i);
// no scoop at this addr?
if(this_scoop==NULL)
return NULL;
// un-tie-able scoop at this addr?
if(find(ALLOF(elftable_nocodeptr_names), this_scoop->getName())!=elftable_nocodeptr_names.end())
return NULL;
return this_scoop;
};
// check each offset for a scoop that needings tieing tot his one.
for_each(ALLOF(candidate_offsets), [&](const int offset)
{
assert(offset!=0);
auto candidate_offset_scoop=findScoopByAddress(insn_addr+offset) ;
// check to see if the offset is in a different scoop
if(candidate_scoop != candidate_offset_scoop)
{
// yes, therefore we have to tie all scoops between the start and end together.
// stop if there's an untieable scoop in the way.
auto prev_scoop=candidate_scoop;
if(offset < 0 )
{
for(auto i=(int)-1;i>=offset; i--)
{
auto this_scoop=should_tie(i,prev_scoop);
if(this_scoop)
{
if(this_scoop!=prev_scoop)
insert_scoop_pair(this_scoop,prev_scoop, i, offset);
prev_scoop=this_scoop;
}
else
break;
}
}
else
{
for(auto i=(int)1;i<=offset; i++)
{
auto this_scoop=should_tie(i,prev_scoop);
if(this_scoop)
{
if(this_scoop!=prev_scoop)
insert_scoop_pair(prev_scoop,this_scoop, i, offset);
prev_scoop=this_scoop;
}
else
break;
}
}
}
});
return candidate_scoop;
}
template <class T_Sym, class T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
DataScoop_t* MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::DetectProperScoop_ConsiderEndOfPrev(
const DecodedInstruction_t& disasm,
const DecodedOperandVector_t::iterator the_arg,
Instruction_t* insn,
VirtualOffset_t insn_addr,
bool immed,
DataScoop_t* ret,
const DecodedOperandVector_t &the_arg_container
)
{
// possibility for future work: identify cases where
// [addr+rbx*8] that came from something like =a[i-1]. And addr==a[-1].
// for now, memory operands that actually access memory, there's no additional analysis needed
//if(!immed && string(disasm.Instruction.Mnemonic)!=string("lea "))
if(!immed && disasm.getMnemonic()!=string("lea"))
// this should filter out cmp, move, test, add, with a memory operation
return ret;
// now we have an immediate or an lea (i.e., no cmp reg, [mem] operations)
// that's pointing to a scoop. Let's check if it's a boundary between two scoops
if(insn_addr!=ret->getStart()->getVirtualOffset())
// it's not, so just continue.
return ret;
// now look to see if there's a scoop regsitered that abuts this scoop;
DataScoop_t *scoop_for_prev=findScoopByAddress(insn_addr-1);
// if not found, we know we aren't in a boundary case.
if(!scoop_for_prev)
return ret;
/* check to see if the immediate next instruction dereferences the destination of an lea. */
Instruction_t* next_insn=insn->getFallthrough();
if(next_insn == NULL)
next_insn=insn->getTarget();
if(next_insn && disasm.getMnemonic() /*string(disasm.Instruction.Mnemonic)*/==string("lea"))
{
//DISASM lea_disasm;
//Disassemble(insn,lea_disasm);
const auto lea_disasmp=DecodedInstruction_t::factory(insn);
const auto &lea_disasm=*lea_disasmp;;
string dstreg=lea_disasm.getOperand(0)->getString(); // Argument1.ArgMnemonic;
//DISASM next_disasm;
//Disassimble(next_insn,next_disasm);
const auto next_disasmp=DecodedInstruction_t::factory(next_insn);
const auto &next_disasm=*next_disasmp;
auto memarg_container=next_disasm.getOperands();
const auto memarg=find_memory_operand(memarg_container);
// if we found a memory operation that uses the register, with no indexing, then conclude that
// we must access the variable after the address (not the variable before the address)
// if(memarg && string(next_disasm.Instruction.Mnemonic)!="lea " && string(memarg->ArgMnemonic)==dstreg )
if(memarg!=memarg_container.end() && next_disasm.getMnemonic()!="lea" && (*memarg)->getString()/*string(memarg->ArgMnemonic)*/==dstreg )
return ret;
}
// if we're in a function
// check that function for other references to scoop_for_prev
if(insn->getFunction())
{
auto found_insn_it=find_if(
ALLOF(insn->getFunction()->getInstructions()),
[&](Instruction_t* func_insn)
{
// disassemble instruction
//DISASM func_insn_disasm;
//Disassemble(func_insn,func_insn_disasm);
const auto func_insn_disasmp=DecodedInstruction_t::factory(func_insn);
const auto &func_insn_disasm=*func_insn_disasmp;
auto func_insn_disasm_operands=func_insn_disasm.getOperands();
// enter instructions have 2 immediates, so we can't just "getImmediate()"
if(func_insn_disasm.getMnemonic()=="enter")
return false;
// check the immediate
// if(getFileIR()->findScoop(func_insn_disasm.Instruction.Immediat) == scoop_for_prev)
if(scoop_for_prev->getStart()->getVirtualOffset() <= (VirtualOffset_t)func_insn_disasm.getImmediate() &&
(VirtualOffset_t)func_insn_disasm.getImmediate() <= scoop_for_prev->getEnd()->getVirtualOffset())
return true; // return from lamba that we found an insn.
// don't bother with the memory check unless we're an LEA
//if(func_insn_disasm.Instruction.Mnemonic!=string("lea "))
if(func_insn_disasm.getMnemonic()!=string("lea"))
return false;
// check the memory -- find the argument that's the mem ref;
const auto the_arg=find_memory_operand(func_insn_disasm_operands);
if(the_arg!=func_insn_disasm_operands.end())
{
// see if the lea has a scoop reference.
VirtualOffset_t addr=(*the_arg)->getMemoryDisplacement();
if(arg_has_relative(*(*the_arg)))
addr+=insn->getDataBits().size();
if(getFileIR()->findScoop(addr) == scoop_for_prev)
return true; // return from lamba
}
// not found in this insn
return false; // lambda return
});
// no reference to prev_scoop found, just return;
if(found_insn_it==insn->getFunction()->getInstructions().end())
{
return ret;
}
}
// if we make it this far, we note that a single function has sketchy (aka address-generating) references
// to both scoop_for_prev and ret;
// in this case, we need to make keep these two scoops together since we can't tell which way the sketchy ref's go.
// for now, just record the sketchy refs.
cout<<"Boundary note: instruction "<<insn->getBaseID()<<":"<<disasm.getDisassembly()<<" has immed/lea that points at boundary case.";
if(insn->getFunction())
cout<<" In "<<insn->getFunction()->getName()<<".";
cout<<endl;
cout<<"Keep together "<<
scoop_for_prev->getName()<<" ("<<hex<< scoop_for_prev->getStart()->getVirtualOffset()<<"-"<<scoop_for_prev->getEnd()->getVirtualOffset()<<") and "<<
ret->getName()<<" ("<<hex<< ret->getStart()->getVirtualOffset()<<"-"<<ret->getEnd()->getVirtualOffset()<<")"<<endl;
tied_scoops.insert(ScoopPair_t(scoop_for_prev,ret));
return ret;
}
template <class T_Sym, class T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::ApplyImmediateRelocation(Instruction_t *insn, DataScoop_t* to)
{
const auto disasmp=DecodedInstruction_t::factory(insn);
const auto &disasm=*disasmp;
VirtualOffset_t rel_addr2=disasm.getImmediate(); // Instruction.Immediat;
#if 1 // don't change instructions that reference re-pinned scoops.
// This was necessary because we were not getting the zipr_unpin_plugin
// to undo our changes to the instruction in the case of a re-pinned scoop.
// That problem is fixed, but it is more efficient and safer to
// avoid editing instructions that reference re-pinned scoops.
if (this->moveable_scoops.find(to) == this->moveable_scoops.cend()) {
if (getenv("MG_VERBOSE")) {
cout << "Avoiding editing of insn at " << hex << insn->getBaseID() << " after repinning scoop "
<< to->getName() << endl;
}
return;
}
#endif
/*
Relocation_t* reloc = new Relocation_t(BaseObj_t::NOT_IN_DATABASE, 0, "immedptr_to_scoop", to);
insn->getRelocations().insert(reloc);
getFileIR()->getRelocations().insert(reloc);
*/
auto reloc=getFileIR()->addNewRelocation(insn,0, "immedptr_to_scoop", to);
(void)reloc; // not used, just giving to the IR
// fixme: insn bits changed here
assert(strtoumax(disasm.getOperand(1)->getString().c_str() /*Argument2.ArgMnemonic*/, NULL, 0) == rel_addr2);
VirtualOffset_t new_addr = rel_addr2 - to->getStart()->getVirtualOffset();
assert(4 < insn->getDataBits().size());
insn->setDataBits(insn->getDataBits().replace(insn->getDataBits().size()-4, 4, (char*)&new_addr, 4));
cout<<"Non-Overlapping_Globals::ApplyImmediateReloc::Setting "<<hex<<insn->getBaseID()<<" to "<<insn->getDisassembly()<<endl;
}
template <class T_Sym, class T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::HandleImmediateOperand(const DecodedInstruction_t& disasm, const DecodedOperandVector_t::iterator the_arg, Instruction_t* insn)
{
// shared objects don't need this, you have to use a pcrel addressing mode.
if(exe_reader->isDLL())
{
return;
}
const int small_memory_threshold= exe_reader->isDLL() ? 10 : 4096*10;
// enter instructions have 2 immediates, so we can't just "getImmediate()"
if(disasm.getMnemonic()=="enter")
return;
VirtualOffset_t rel_addr2=disasm.getImmediate(); //Instruction.Immediat;
auto operands=disasm.getOperands();
DataScoop_t *to2=DetectProperScoop(disasm, operands.end(), insn, rel_addr2, true, operands);
// skip if not found, executable, or not moveable.
if( to2 && (to2->isExecuteable() || moveable_scoops.find(to2) == moveable_scoops.end()))
{
// do nothing, no log or action is necessary for (potential) pointers to code or
// (potential) pointers to non-moveable data.
}
else if(to2)
{
// there's no need to find pointers in other types of instructions,
// such as mul or vfmasubadd231 (yes, that's a real instruction on x86)
// note: yes other instructions may have a memory operand with a pointer, but that's handled above.
// this is for instruction's immediate fields, not their memory operand's displacement.
//
// compares, tests are often used because the compiler strength reduces.
// moves are used to load addresses into a register.
// adds are used to load addresses plus an offset into a register.
// here's an example where sub is used with a pointer:
//
// DegenCount[strchr(Alphabet,iupac)-Alphabet] = ...
//
// 0x0000000000402a99 <+25>: call 0x401620 <strchr@plt>
// 0x0000000000402a9e <+30>: mov rbp <- rax
// 0x0000000000402aa1 <+33>: mov rdi <- rbx
// 0x0000000000402aa4 <+36>: sub rbp <- 0x65b500 # note: constant is a poitner here!
// 0x0000000000402aab <+43>: eax <- ...
// 0x0000000000402ab0 <+48>: mov DWORD PTR [rbp*4+0x65b520] <- eax
if(disasm.getMnemonic() == string("mov") ||
disasm.getMnemonic() == string("cmp") ||
disasm.getMnemonic() == string("test") ||
disasm.getMnemonic() == string("add") ||
disasm.getMnemonic() == string("sub") )
{
if(getenv("MG_VERBOSE"))
{
cout<<"Found non-mem ref in insn: "<<insn->getBaseID()<<":"<<disasm.getDisassembly()<<" to "
<< to2->getName() <<"("
<<hex<<to2->getStart()->getVirtualOffset()<<"-"
<<hex<<to2->getEnd()->getVirtualOffset()<<")"<<endl;
}
unsigned int size=immed_refs_to_scoops.size();
immed_refs_to_scoops.insert({insn,to2});
assert( (size+1)==immed_refs_to_scoops.size());
}
}
else
{
if ((int)rel_addr2 < -small_memory_threshold || (int) rel_addr2 > small_memory_threshold || getenv("MG_VERBOSE"))
{
if ((0 != rel_addr2) && getenv("MG_VERBOSE"))
{
cout << "Note: " << hex << rel_addr2 << " not declared address in:";
cout << insn->getBaseID() << ":" << disasm.getDisassembly();
cout << endl;
}
}
}
}
// put in links between scoops and any references to them.
template <class T_Sym, class T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::FindInstructionReferences()
{
for(InstructionSet_t::iterator iit=getFileIR()->getInstructions().begin();
iit!=getFileIR()->getInstructions().end();
++iit
)
{
Instruction_t* insn=*iit;
//DISASM disasm;
//Disassemble(insn,disasm);
auto disasmp=DecodedInstruction_t::factory(insn);
auto &disasm=*disasmp;
auto disasm_operands=disasm.getOperands();
// find memory arg.
const auto the_arg=find_memory_operand(disasm_operands);
if(getenv("MG_VERBOSE"))
cout<<"Considering "<<hex<<insn->getBaseID()<<":"<<disasm.getDisassembly()<<endl;
HandleMemoryOperand(disasm,the_arg,insn, disasm_operands);
HandleImmediateOperand(disasm,the_arg,insn);
}
}
template <class T_Sym, class T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::ApplyDataRelocation(DataScoop_t *from, unsigned int offset, DataScoop_t* to)
{
assert(to && from);
const char* data=from->getContents().c_str();
unsigned int byte_width=getFileIR()->getArchitectureBitWidth()/8;
VirtualOffset_t val=(VirtualOffset_t)NULL;
if(byte_width==4)
val=*(int*)&data[offset];
else if(byte_width==8)
val=*(long long*)&data[offset];
else
assert(0);
/*
Relocation_t* reloc=new Relocation_t(BaseObj_t::NOT_IN_DATABASE, offset, "dataptr_to_scoop", to);
from->getRelocations().insert(reloc);
getFileIR()->getRelocations().insert(reloc);
*/
auto reloc=getFileIR()->addNewRelocation(from,offset, "dataptr_to_scoop", to);
(void)reloc; // just giving to ir
VirtualOffset_t newval=val-to->getStart()->getVirtualOffset();
auto str=from->getContents();
// create new value for pointer.
if(byte_width==4)
{
unsigned int intnewval=(unsigned int)newval; // 64->32 narrowing OK.
str.replace(offset, byte_width, (char*)&intnewval, byte_width);
}
else if(byte_width==8)
{
str.replace(offset, byte_width, (char*)&newval, byte_width);
}
else
assert(0);
from->setContents(str);
}
//
// check if val is a pointer or part of a string that mimics a pointer
//
static inline bool is_part_of_string(VirtualOffset_t val, const DataScoop_t* from, const DataScoop_t* to, int offset)
{
assert(from && to);
// locate strings that look like pointers but aren't. e.g.: "ion\0" and "ren\0". Note that both are null terminated.
// this is a problem on 64-bit code because we screw up the string.
// note: the most sigificant byte is 0, and the lower 3 signfiicant bytes are printable.
// the least significant byte is special. In a valid pointer, it's almost always 00 or 01 for 64-bit code or shared libraries,
// and 0x08 0x09 for 32-bit main executables. Very very rarely is it anything else.
// however, for 0x01, 0x08, and 0x09 aren't printable, so we don't confuse these bytes in a string for an address and we don't need to detect this.
if ( ((val >> 24) & 0xff) != 0 ) // check for non-0
return false;
if ( !isprint(((val >> 16) & 0xff))) // and 3 printable characters.
return false;
if ( !isprint(((val >> 8) & 0xff)))
return false;
if ( !isprint(((val >> 0) & 0xff)))
return false;
// number of bytes that must precede the pointer and be string bytes to disambiguate a string's end from a pointer.
const int string_preheader_size=4;
// if we dont' have enough bytes of preheader, skip it.
if( offset < string_preheader_size )
return false;
// check each byte preceeding the candidate pointer to see if it's printable.
for(auto i=0;i<string_preheader_size;i++)
{
if(i>offset)
return false;
unsigned char b=from->getContents()[offset-i];
if(!isprint(b))
return false;
}
// we found enough string chars before the (candidate) pointer value, so we think that a string is here, not a pointer.
if(getenv("MG_VERBOSE"))
{
cout<<"Found string as non-ref "<<hex<<val<<" at "<<from->getName()<<"+"<<offset<<" ("
<<hex<<from->getStart()->getVirtualOffset()<<"-"
<<hex<<from->getEnd()->getVirtualOffset()<<") to "
<<to->getName()<<" ("
<<hex<<to->getStart()->getVirtualOffset()<<"-"
<<hex<<to->getEnd()->getVirtualOffset()<<")"<<endl;
}
return true;
}
// put in links between scoops and any references to them.
template <class T_Sym, class T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::FindDataReferences()
{
unsigned int byte_width=getFileIR()->getArchitectureBitWidth()/8;
typedef function<void (DataScoop_t*)> ScannerFunction_t;
auto read_bytewidth=[&](const char* data, const int i) -> long long
{
auto val=(long long)0;
if(byte_width==4)
val=*(int*)&data[i];
else if(byte_width==8)
val=*(long long*)&data[i];
else
assert(0);
return val;
};
ScannerFunction_t got_scanner=[&](DataScoop_t* scoop)
{
// got scanner doesn't scan data section for shared objects since they can't have a constant address
if(exe_reader->isDLL())
return;
auto data=scoop->getContents().c_str();
auto len=scoop->getContents().size();
for ( auto i=0u; i+byte_width-1<len; i+=byte_width)
{
const auto val=read_bytewidth(data,i);
auto to=findScoopByAddress(val);
if(to)
{
if(getenv("MG_VERBOSE"))
{
cout<<"Found ref "<<hex<<val<<" at "<<scoop->getName()<<"+"<<i<<" ("
<<hex<<scoop->getStart()->getVirtualOffset()<<"-"
<<hex<<scoop->getEnd()->getVirtualOffset()<<") to "
<<to->getName()<<" ("
<<hex<<to->getStart()->getVirtualOffset()<<"-"
<<hex<<to->getEnd()->getVirtualOffset()<<")"<<endl;
}
data_refs_to_scoops.insert({scoop,i,to});
}
}
};
ScannerFunction_t default_scanner=[&](DataScoop_t* scoop)
{
// default scanner doesn't scan data section for shared objects since they can't have a constant address
if(exe_reader->isDLL())
return;
auto data=scoop->getContents().c_str();
auto len=scoop->getContents().size();
// try not to overrun the array
for ( auto i=0u; i+byte_width-1<len; i+=byte_width)
{
auto val=read_bytewidth(data,i);
auto to=findScoopByAddress(val);
if(to)
{
auto aggressive_qualify_for_moving = [this](const DataScoop_t* from,
DataScoop_t* &to,
bool &move_ok,
bool &disqualifies_to,
const VirtualOffset_t addr, unsigned int offset_in_scoop
) -> void
{
move_ok=true;
disqualifies_to=false;
if( !to->isExecuteable() &&
moveable_scoops.find(to) != moveable_scoops.end() &&
!is_part_of_string(addr,from,to,offset_in_scoop)
)
{
return;
}
move_ok=false;
};
auto qualify_for_moving = [this](const DataScoop_t* from,
DataScoop_t* &to,
bool &move_ok,
bool &disqualifies_to,
const VirtualOffset_t addr, unsigned int offset_in_scoop
) -> void
{
move_ok=true;
disqualifies_to=false;
// if points at executable scoop, we aren't doing that here!
if(to->isExecuteable())
{ move_ok=false; disqualifies_to=false; return ; }
// if not moveable, we aren't doing that here.
if ( moveable_scoops.find(to) == moveable_scoops.end())
{ move_ok=false; disqualifies_to=false; return ; }
/* the above worked ok-ish, but not great. trying this method to be more conservative */
{ move_ok=false; disqualifies_to=true; return ; }
/*
// if this constant appears to be part of a string, skip it!
if(is_part_of_string(addr,from,to,offset_in_scoop))
{ move_ok=false; disqualifies_to=false; return ; }
// very few variables start at an address that ends in 0x000 and often address-looking constants do
// if we see such an address, pin-and-win.
if ( (addr&0xfff) == 0x000 && addr==to->getStart()->getVirtualOffset())
{ move_ok=false; disqualifies_to=true; return ; }
// if we point at the start of a scoop, it's OK to move.
if(addr==to->getStart()->getVirtualOffset())
{ move_ok=true; disqualifies_to=false; return ; }
// if it points near a scoop, but not directly at it, it's hard to tell if it's moveable or not
if(abs((long)addr-(long)to->getStart()->getVirtualOffset()) < 16 )
{ move_ok=false; disqualifies_to=true; return ; }
// else, it's pointing in the middle of a scoop, so it's probably not a
// pointer at all.
{ move_ok=false; disqualifies_to=false; return ; }
*/
};
auto move_ok=false;
auto disqualifies_to=false;
if(aggressive)
aggressive_qualify_for_moving(scoop, to,move_ok,disqualifies_to,val, i);
else
qualify_for_moving(scoop, to,move_ok,disqualifies_to,val, i);
if(move_ok)
{
if(getenv("MG_VERBOSE"))
{
cout<<"Found ref "<<hex<<val<<" at "<<scoop->getName()<<"+"<<i<<" ("
<<hex<<scoop->getStart()->getVirtualOffset()<<"-"
<<hex<<scoop->getEnd()->getVirtualOffset()<<") to "
<<to->getName()<<" ("
<<hex<<to->getStart()->getVirtualOffset()<<"-"
<<hex<<to->getEnd()->getVirtualOffset()<<")"<<endl;
}
// put those bytes back in the string.
//ApplyDataRelocations(*sit,i,to);
data_refs_to_scoops.insert({scoop,i,to});
}
else
{
if(getenv("MG_VERBOSE"))
{
cout<<"Found ref-looking-constant "<<hex<<val<<" at "<<scoop->getName()<<"+"<<i<<" ("
<<hex<<scoop->getStart()->getVirtualOffset()<<"-"
<<hex<<scoop->getEnd()->getVirtualOffset()<<") which would otherwise be to "
<<to->getName()<<" ("
<<hex<<to->getStart()->getVirtualOffset()<<"-"
<<hex<<to->getEnd()->getVirtualOffset()<<")"<<endl;
}
}
if(disqualifies_to)
{
if(getenv("MG_VERBOSE"))
{
cout<<"Ref-looking-constant "<<hex<<val<<" at "<<scoop->getName()<<"+"<<i<<" ("
<<hex<<scoop->getStart()->getVirtualOffset()<<"-"
<<hex<<scoop->getEnd()->getVirtualOffset()<<") is inconclusive. Repinning "
<<to->getName()<<" ("
<<hex<<to->getStart()->getVirtualOffset()<<"-"
<<hex<<to->getEnd()->getVirtualOffset()<<")"<<endl;
}
moveable_scoops.erase(to);
}
}
else
{
if((0 != val) && getenv("MG_VERBOSE"))
{
cout<<"Constant "<<hex<<val<<" at "<<scoop->getName()<<"+"<<i<<" ("
<<hex<<scoop->getStart()->getVirtualOffset()<<"-"
<<hex<<scoop->getEnd()->getVirtualOffset()<<") doesn't point at scoop."<<endl;
}
}
}
};
ScannerFunction_t dynsym_scanner=[&](DataScoop_t* scoop)
{
const char* data=scoop->getContents().c_str();
unsigned int len=scoop->getContents().size();
T_Sym* symptr=(T_Sym*)data;
const char* end=data+len;
while((const char*)symptr<end)
{
VirtualOffset_t val=symptr->st_value;
DataScoop_t *to=findScoopByAddress(val);
if(to)
{
unsigned int offset=(unsigned int)((VirtualOffset_t)symptr)-((VirtualOffset_t)data);
offset+=((VirtualOffset_t)&symptr->st_value)-(VirtualOffset_t)symptr;
if(getenv("MG_VERBOSE"))
{
cout<<"Found dynsym:st_value ref "<<hex<<val<<" at "<<scoop->getName()<<"+"<<offset<<" ("
<<hex<<scoop->getStart()->getVirtualOffset()<<"-"
<<hex<<scoop->getEnd()->getVirtualOffset()<<") to "
<<to->getName()<<" ("
<<hex<<to->getStart()->getVirtualOffset()<<"-"
<<hex<<to->getEnd()->getVirtualOffset()<<")"<<endl;
}
data_refs_to_scoops.insert({scoop,offset,to});
}
symptr++; // next symbol
}
};
ScannerFunction_t rel_scanner=[&](DataScoop_t* scoop)
{
const char* data=scoop->getContents().c_str();
unsigned int len=scoop->getContents().size();
T_Rela * symptr=(T_Rela*)data;
const char* end=data+len;
while((const char*)symptr<end)
{
// handle offset field
{
VirtualOffset_t val=symptr->r_offset;
DataScoop_t *to=findScoopByAddress(val);
if(to)
{
unsigned int offset=(unsigned int)((VirtualOffset_t)symptr)-((VirtualOffset_t)data);
offset+=((VirtualOffset_t)&symptr->r_offset)-(VirtualOffset_t)symptr;
if(getenv("MG_VERBOSE"))
{
cout<<"Found rela:r_offset ref "<<hex<<val<<" at "<<scoop->getName()<<"+"<<offset<<" ("
<<hex<<scoop->getStart()->getVirtualOffset()<<"-"
<<hex<<scoop->getEnd()->getVirtualOffset()<<") to "
<<to->getName()<<" ("
<<hex<<to->getStart()->getVirtualOffset()<<"-"
<<hex<<to->getEnd()->getVirtualOffset()<<")"<<endl;
}
data_refs_to_scoops.insert({scoop,offset,to});
}
}
symptr++; // next symbol
}
};
ScannerFunction_t rela_scanner=[&](DataScoop_t* scoop)
{
const char* data=scoop->getContents().c_str();
unsigned int len=scoop->getContents().size();
T_Rela * symptr=(T_Rela*)data;
const char* end=data+len;
while((const char*)symptr<end)
{
// handle addend field
{
VirtualOffset_t val=symptr->r_addend;
DataScoop_t *to=findScoopByAddress(val);
if(to)
{
unsigned int offset=(unsigned int)((VirtualOffset_t)symptr)-((VirtualOffset_t)data);
offset+=((VirtualOffset_t)&symptr->r_addend)-(VirtualOffset_t)symptr;
if(getenv("MG_VERBOSE"))
{
cout<<"Found rela:r_added ref "<<hex<<val<<" at "<<scoop->getName()<<"+"<<offset<<" ("
<<hex<<scoop->getStart()->getVirtualOffset()<<"-"
<<hex<<scoop->getEnd()->getVirtualOffset()<<") to "
<<to->getName()<<" ("
<<hex<<to->getStart()->getVirtualOffset()<<"-"
<<hex<<to->getEnd()->getVirtualOffset()<<")"<<endl;
}
data_refs_to_scoops.insert({scoop,offset,to});
}
}
// handle offset field
{
VirtualOffset_t val=symptr->r_offset;
DataScoop_t *to=findScoopByAddress(val);
if(to)
{
unsigned int offset=(unsigned int)((VirtualOffset_t)symptr)-((VirtualOffset_t)data);
offset+=((VirtualOffset_t)&symptr->r_offset)-(VirtualOffset_t)symptr;
if(getenv("MG_VERBOSE"))
{
cout<<"Found rela:r_offset ref "<<hex<<val<<" at "<<scoop->getName()<<"+"<<offset<<" ("
<<hex<<scoop->getStart()->getVirtualOffset()<<"-"
<<hex<<scoop->getEnd()->getVirtualOffset()<<") to "
<<to->getName()<<" ("
<<hex<<to->getStart()->getVirtualOffset()<<"-"
<<hex<<to->getEnd()->getVirtualOffset()<<")"<<endl;
}
data_refs_to_scoops.insert({scoop,offset,to});
}
}
symptr++; // next symbol
}
};
ScannerFunction_t dynamic_scanner=[&](DataScoop_t* scoop)
{
const auto data=scoop->getContents().c_str();
const auto len=scoop->getContents().size();
auto symptr=(T_Dyn*)data;
const char* end=data+len;
while((const char*)symptr<end)
{
switch(symptr->d_tag)
{
case DT_INIT_ARRAY:
case DT_FINI_ARRAY:
case DT_GNU_HASH:
case DT_STRTAB:
case DT_SYMTAB:
case DT_PLTGOT:
case DT_JMPREL:
case DT_RELA:
case DT_VERNEED:
case DT_VERSYM:
{
const auto val=symptr->d_un.d_val;
auto *to=findScoopByAddress(val);
if(to)
{
auto offset=(unsigned int) (((VirtualOffset_t)symptr)-((VirtualOffset_t)data));
offset+=((VirtualOffset_t)&symptr->d_un.d_val)-(VirtualOffset_t)symptr;
if(getenv("MG_VERBOSE"))
{
cout<<"Found .dynamic:d_val ref "<<hex<<val<<" at "<<scoop->getName()<<"+"<<offset<<" ("
<<hex<<scoop->getStart()->getVirtualOffset()<<"-"
<<hex<<scoop->getEnd()->getVirtualOffset()<<") to "
<<to->getName()<<" ("
<<hex<<to->getStart()->getVirtualOffset()<<"-"
<<hex<<to->getEnd()->getVirtualOffset()<<")"<<endl;
}
data_refs_to_scoops.insert({scoop,offset,to});
}
break;
}
default: // do nothing
break;
}
symptr++; // next symbol
}
};
// special scanners for special sections
const struct scoop_scanners_t
{ string name;
ScannerFunction_t scanner_fn;
} scoop_scanners[] = {
{ ".dynsym", dynsym_scanner },
{ ".got", got_scanner },
{ ".got.plt", got_scanner },
{ ".rel.dyn", rel_scanner },
{ ".rel.plt", rel_scanner },
{ ".rel.dyn coalesced w/.rel.plt", rel_scanner },
{ ".rela.dyn", rela_scanner },
{ ".rela.plt", rela_scanner },
{ ".rela.dyn coalesced w/.rela.plt", rela_scanner },
{ ".dynamic", dynamic_scanner }
};
// main algorithm: apply the right scanner for each scoop
for_each(ALLOF(getFileIR()->getDataScoops()), [&](DataScoop_t* scoop)
{
auto scanner=find_if(ALLOF(scoop_scanners), [&](const scoop_scanners_t scanner)
{
return scanner.name==scoop->getName();
});
if(scanner!=end(scoop_scanners))
scanner->scanner_fn(scoop);
else
default_scanner(scoop);
});
}
template <class T_Sym, class T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::FilterAndCoalesceTiedScoops()
{
const auto is_in_dont_coalesce_scoops = [](const DataScoop_t* to_find) -> bool
{
const string dont_coalesce_scoops[] =
{
".dynamic",
".jcr"
};
const auto a_binder = bind1st(finder, to_find);
const auto it=find_if(ALLOF(dont_coalesce_scoops), a_binder);
return (it!=end(dont_coalesce_scoops));
};
// step 1: find everything that's tied to a pinned scoop and pin it.
// repeat until no changes.
bool changed=true;
while(changed)
{
changed=false;
for(auto it=tied_scoops.begin(); it!=tied_scoops.end(); /* nop */)
{
auto current=it++;
const ScoopPair_t& p=*current;
DataScoop_t* s1=p.first;
DataScoop_t* s2=p.second;
bool s1_moveable=contains(moveable_scoops, s1);
bool s2_moveable=contains(moveable_scoops, s2);
if(is_in_dont_coalesce_scoops(s1) || is_in_dont_coalesce_scoops(s2))
{
cout<<"Skipping coalesce of "<<s1->getName()<<" and "<<s2->getName()<<endl;
tied_scoops.erase(current);
continue;
}
if(s1_moveable && s2_moveable)
{
// do nothing if they're both unpinned.
tied_unpinned++;
}
else if(s1_moveable)
{
tied_pinned++;
// s1 is pinned to an unmoveable, so it's unmoveable.
cout<<"Re-pinning "<<s1->getName()<<endl;
moveable_scoops.erase(s1);
tied_scoops.erase(current);
changed=true;
}
else if(s2_moveable)
{
cout<<"Re-pinning "<<s2->getName()<<endl;
tied_pinned++;
// s2 is pinned to an unmoveable.
moveable_scoops.erase(s2);
tied_scoops.erase(current);
changed=true;
}
else
{
tied_nochange++;
tied_scoops.erase(current);
}
}
}
// step 2, coalesce
changed=true;
while(changed)
{
changed=false;
for(auto it=tied_scoops.begin(); it!=tied_scoops.end(); )
{
auto current=it++;
const ScoopPair_t& p=*current;
DataScoop_t* s1=p.first;
DataScoop_t* s2=p.second;
if(is_in_dont_coalesce_scoops(s1) || is_in_dont_coalesce_scoops(s2))
{
cout<<"Skipping coalesce of "<<s1->getName()<<" and "<<s2->getName()<<endl;
continue;
}
bool s1_moveable=contains(moveable_scoops, s1);
bool s2_moveable=contains(moveable_scoops, s2);
// we previously removed anything that's pinned from moveable
if(s1_moveable && s2_moveable)
{
// assert order is right
assert(s1->getStart()->getVirtualOffset() < s2->getStart()->getVirtualOffset());
// check if these are adjacent.
if(s1->getEnd()->getVirtualOffset()+1 < s2->getStart()->getVirtualOffset())
{
// pad s1 to fill hole
string new_contents=s1->getContents();
new_contents.resize(s2->getStart()->getVirtualOffset()-s1->getStart()->getVirtualOffset());
s1->getEnd()->setVirtualOffset(s2->getStart()->getVirtualOffset()-1);
}
else if(s1->getEnd()->getVirtualOffset()+1 == s2->getStart()->getVirtualOffset())
{
// do nothing if they fit perfectly.
}
else
assert(0); // overlapping scoops?
cout<<"Coalescing 2-tied, but unpinned scoops "<<s1->getName()<<" and "<<s2->getName()<<"."<<endl;
// update our inteneral data structures for how to apply relocs.
auto insn_fixup_updater=[s1,s2](set<Insn_fixup_t> &the_set)
{
unsigned int size=the_set.size();
set<Insn_fixup_t> new_elements;
auto it=the_set.begin();
while(it!=the_set.end())
{
auto current = it++;
auto replacer=*current;
if(replacer.to == s2)
{
the_set.erase(current);
replacer.to=s1;
new_elements.insert(replacer);
}
}
the_set.insert(new_elements.begin(), new_elements.end());
assert(size==the_set.size());
};
insn_fixup_updater(pcrel_refs_to_scoops);
insn_fixup_updater(absolute_refs_to_scoops);
insn_fixup_updater(immed_refs_to_scoops);
auto scoop_fixup_updater=[s1,s2](set<Scoop_fixup_t> &the_set)
{
set<Scoop_fixup_t> new_elements;
auto it=the_set.begin();
while(it!=the_set.end())
{
auto current = it++;
if(current->to == s2 || current->from==s2)
{
auto replacer=*current;
if(replacer.to==s2)
replacer.to=s1;
if(replacer.from==s2)
{
replacer.from=s1;
cout<<"Updating data_ref_to_scoops offset from "<<hex<<replacer.offset<<" to "<<replacer.offset+s1->getSize()<<endl;
replacer.offset+=s1->getSize();
}
the_set.erase(current);
new_elements.insert(replacer);
}
}
the_set.insert(new_elements.begin(), new_elements.end());
};
scoop_fixup_updater(data_refs_to_scoops);
for(auto &r : getFileIR()->getRelocations())
{
// s2 just came into existence, didn't it?
// assert(r->getWRT()!=s2);
// yes, but there may be relocs pointing at the s2 part of
// a split object, and so the reloc might get updated to point to s2 instead.
if( r->getWRT()==s2)
{
r->setWRT(s1);
r->setAddend(r->getAddend()+s1->getSize());
}
}
/*
don't remove scoop here, as it will delete s2. this bit is moved later.
getFileIR()->getAddresses().erase(s1->getEnd());
getFileIR()->getAddresses().erase(s2->getStart());
getFileIR()->getDataScoops().erase(s2); // remove s2 from the IR
*/
// s2's end addresss is about to go away, so
// update s1's end VO instead of using s2 end addr.
s1->getEnd()->setVirtualOffset(s2->getEnd()->getVirtualOffset());
moveable_scoops.erase(s2); // remove it from our analysis
unsigned int old_s1_size=s1->getContents().size();
s1->setContents(s1->getContents()+s2->getContents());
s1->setName(s1->getName()+" coalesced w/"+ s2->getName());
if(!s2->isRelRo())
s1->clearRelRo();
s1->setRawPerms( s1->getRawPerms() | s2->getRawPerms());
// we just created s2 in this pass, right?
// no, s2 could be one of the sections from the orig binary that we've been asked to move
// and it might have relocs for unpinning
//assert(s2->getRelocations().size()==0); // assert no relocs that're part of s2.
// add s2's relocs to s1.
for(auto reloc : s2->getRelocations())
{
cout<<"Adjusting reloc "<< s2->getName()<<"+"<<reloc->getOffset()<<":"<<reloc->getType()<<" to ";
reloc->setOffset(reloc->getOffset()+old_s1_size);
auto s1_relocs=s1->getRelocations();
s1_relocs.insert(reloc);
s1->setRelocations(s1_relocs);
cout << s1->getName()<<"+"<<reloc->getOffset()<<":"<<reloc->getType()<<endl;
}
// tell s2 it has no relocs so when we remove it, they don't go away.
s2->setRelocations({});
// we've processed this one.
tied_scoops.erase(current);
auto scoop_pair_first_finder=
[s2](const ScoopPair_t& p2)
{
return (p2.first==s2);
};
auto found=find_if(ALLOF(tied_scoops), scoop_pair_first_finder);
if( found!=tied_scoops.end())
{
ScoopPair_t p2=*found;
p2.first=s1;
tied_scoops.erase(found);
tied_scoops.insert(p2);
}
assert(find_if(ALLOF(tied_scoops), scoop_pair_first_finder) ==tied_scoops.end());
// finally remove s2 from the IR.
getFileIR()->removeScoop(s2);
changed=true;
break;
}
else
assert(0); // why are there pinned scoops still?
}
}
// ensure we handled eveything.
assert(tied_scoops.size()==0);
}
template <class T_Sym, class T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::UpdateScoopLocations()
{
// apply all 3 types of relocs for instructions
for_each(ALLOF(pcrel_refs_to_scoops),
[this] (const Insn_fixup_t & it)
{
if (getenv("MG_VERBOSE"))
cout << "Applying pcrel w/wrt from " << it.from->getDisassembly() << " to " << it.to->getName() << " at " << hex << it.from->getBaseID() << endl;
ApplyPcrelMemoryRelocation(it.from,it.to);
});
for_each(ALLOF(absolute_refs_to_scoops),
[this] (const Insn_fixup_t & it)
{
if (getenv("MG_VERBOSE"))
cout << "Applying absptr_to_scoop from " << it.from->getDisassembly() << " to " << it.to->getName() << " at " << hex << it.from->getBaseID() << endl;
ApplyAbsoluteMemoryRelocation(it.from,it.to);
});
for_each(ALLOF(immed_refs_to_scoops),
[this] (const Insn_fixup_t & it)
{
if (getenv("MG_VERBOSE"))
cout << "Applying immedptr_to_scoop from " << it.from->getDisassembly() << " to " << it.to->getName() << " at " << hex << it.from->getBaseID() << endl;
ApplyImmediateRelocation(it.from, it.to);
});
for_each(ALLOF(data_refs_to_scoops),
[this] (const Scoop_fixup_t & it)
{
if (getenv("MG_VERBOSE"))
cout << "Applying dataptr_to_scoop from " << it.from->getName() << " to " << it.to->getName() << " at " << hex << it.offset << endl;
ApplyDataRelocation(it.from, it.offset, it.to);
});
// unpin all the moveable scoops.
for (auto sit : moveable_scoops)
{
VirtualOffset_t newend = sit->getEnd()->getVirtualOffset() - sit->getStart()->getVirtualOffset();
sit->getEnd()->setVirtualOffset(newend);
sit->getStart()->setVirtualOffset(0);
}
}
// would be nice to have a FindRelocation function that takes a parameterized type.
template <class T_Sym, class T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
Relocation_t* MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::FindRelocationWithType(BaseObj_t* obj, std::string type)
{
RelocationSet_t::iterator rit = obj->getRelocations().begin();
for( ; rit!=obj->getRelocations().end(); rit++)
{
Relocation_t *reloc=*rit;
if (reloc->getType() == type)
return reloc;
}
return NULL;
}
template <class T_Sym, class T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::PrintStats()
{
const auto resorted_moveable_scoops=DataScoopSet_t(ALLOF(moveable_scoops));
DataScoopSet_t unmoveable_scoops;
unsigned long long moveable_scoop_bytes=0;
unsigned long long unmoveable_scoop_bytes=0;
unsigned long long total_scoop_bytes=0;
set_difference(
ALLOF(getFileIR()->getDataScoops()),
ALLOF(resorted_moveable_scoops),
inserter(unmoveable_scoops,unmoveable_scoops.end()));
if(getenv("MG_VERBOSE"))
{
cout<<"Moveable scoops: "<<endl;
for_each(ALLOF(moveable_scoops), [](DataScoop_t* scoop)
{
cout<<"\t"<<scoop->getName()<<", contents: "<<endl;
auto i=0u;
const auto max_prints_env=getenv("MG_MAX_SCOOP_CONTENT_PRINT");
const auto max_prints=max_prints_env ? strtoul(max_prints_env,NULL,0) : 16ul;
for(i=0;i+8<scoop->getSize() && i<max_prints;i+=8)
cout<<"\t\tat:"<<hex<<i<<" value:0x"<<hex<<*(uint64_t*)&scoop->getContents().c_str()[i]<<" "<<endl;
for(/* empty init */;i<scoop->getSize() && i<max_prints;i++)
cout<<"\t\tat:"<<hex<<i<<" value:0x"<<hex<< + *(uint8_t*)&scoop->getContents().c_str()[i]<<" "<<endl;
});
cout<<"Not moveable scoops: "<<endl;
for_each(ALLOF(unmoveable_scoops), [](DataScoop_t* scoop)
{
cout<<"\t"<<scoop->getName()<<" at "<<hex<<scoop->getStart()->getVirtualOffset()<<endl;
});
}
// gather number of moveable bytes
for_each(moveable_scoops.begin(), moveable_scoops.end(), [&moveable_scoop_bytes, &total_scoop_bytes](DataScoop_t* scoop)
{
moveable_scoop_bytes += scoop->getSize();
total_scoop_bytes+=scoop->getSize();
});
// gather number of unmoveable bytes
for_each(unmoveable_scoops.begin(), unmoveable_scoops.end(), [&unmoveable_scoop_bytes,&total_scoop_bytes](DataScoop_t* scoop)
{
unmoveable_scoop_bytes +=scoop->getSize();
total_scoop_bytes+=scoop->getSize();
});
assert(getenv("SELF_VALIDATE")==nullptr || moveable_scoops.size() >= 5);
assert(getenv("SELF_VALIDATE")==nullptr || (immed_refs_to_scoops.size() + pcrel_refs_to_scoops.size()+absolute_refs_to_scoops.size()) > 5);
cout<<"# ATTRIBUTE ASSURANCE_Non-Overlapping_Globals::Total_data_items="<<dec<<unmoveable_scoops.size()+moveable_scoops.size()<<endl;
cout<<"# ATTRIBUTE ASSURANCE_Non-Overlapping_Globals::Unmoveable_data_items="<<dec<<unmoveable_scoops.size()<<endl;
cout<<"# ATTRIBUTE ASSURANCE_Non-Overlapping_Globals::Moveable_data_items="<<dec<<moveable_scoops.size()<<endl;
cout<<"# ATTRIBUTE ASSURANCE_Non-Overlapping_Globals::Percent_data_items_moveable="<<std::fixed<<std::setprecision(1)<< ((float)moveable_scoops.size()/((float)(unmoveable_scoops.size()+moveable_scoops.size())))*100.00<<"%"<< endl;
cout<<"# ATTRIBUTE ASSURANCE_Non-Overlapping_Globals::Unmoveable_data_items_in_bytes="<<dec<<unmoveable_scoop_bytes<<endl;
cout<<"# ATTRIBUTE ASSURANCE_Non-Overlapping_Globals::Moveable_data_items_in_bytes="<<dec<<moveable_scoop_bytes<<endl;
cout<<"# ATTRIBUTE ASSURANCE_Non-Overlapping_Globals::Total_data_items_in_bytes="<<dec<<total_scoop_bytes<<endl;
cout << "# ATTRIBUTE ASSURANCE_Non-Overlapping_Globals::Percent_data_item_bytes_moved="<<std::fixed<<std::setprecision(1) << ((double)moveable_scoop_bytes/(double)total_scoop_bytes)*100.00 <<"%"<< endl;
cout << "# ATTRIBUTE ASSURANCE_Non-Overlapping_Globals::Percent_data_item_bytes_not_moved=" << std::fixed <<std::setprecision(1)<< ((double)unmoveable_scoop_bytes/(double)total_scoop_bytes)*100.00 <<"%"<< endl;
cout<<"# ATTRIBUTE Non-Overlapping_Globals::tied_scoops="<<dec<<tied_scoops.size()<<endl;
cout<<"# ATTRIBUTE Non-Overlapping_Globals::pcrel_refs="<<dec<<pcrel_refs_to_scoops.size()<<endl;
cout<<"# ATTRIBUTE Non-Overlapping_Globals::abs_refs="<<dec<<absolute_refs_to_scoops.size()<<endl;
cout<<"# ATTRIBUTE Non-Overlapping_Globals::imm_refs="<<dec<<immed_refs_to_scoops.size()<<endl;
cout<<"# ATTRIBUTE Non-Overlapping_Globals::data_refs="<<dec<<data_refs_to_scoops.size()<<endl;
cout<<"# ATTRIBUTE Non-Overlapping_Globals::coalesced_scoops="<<dec<<tied_unpinned<<endl;
cout<<"# ATTRIBUTE Non-Overlapping_Globals::repinned_scoops="<<dec<<tied_pinned<<endl;
cout<<"# ATTRIBUTE Non-Overlapping_Globals::ties_for_folded_constants="<<dec<<ties_for_folded_constants<<endl;
cout<<"# ATTRIBUTE Non-Overlapping_Globals::tied_scoop_pairs_that_were_already_pinned="<<dec<<tied_nochange<<endl;
cout<<"#ATTRIBUTE mg::unmoveable_scoops="<<dec<<unmoveable_scoops.size()<<endl;
cout<<"#ATTRIBUTE mg::moveable_scoops="<<dec<<moveable_scoops.size()<<endl;
cout<<"#ATTRIBUTE mg::pcrel_refs="<<dec<<pcrel_refs_to_scoops.size()<<endl;
cout<<"#ATTRIBUTE mg::abs_refs="<<dec<<absolute_refs_to_scoops.size()<<endl;
cout<<"#ATTRIBUTE mg::imm_refs="<<dec<<immed_refs_to_scoops.size()<<endl;
cout<<"#ATTRIBUTE mg::data_refs="<<dec<<data_refs_to_scoops.size()<<endl;
cout<<"#ATTRIBUTE mg::coalesced_scoops="<<dec<<tied_unpinned<<endl;
cout<<"#ATTRIBUTE mg::repinned_scoops="<<dec<<tied_pinned<<endl;
cout<<"#ATTRIBUTE mg::ties_for_folded_constants="<<dec<<ties_for_folded_constants<<endl;
cout<<"#ATTRIBUTE mg::tied_scoop_pairs_that_were_already_pinned="<<dec<<tied_nochange<<endl;
}
// explicit instatnation for elf32 and elf64
template class MoveGlobals_t<Elf32_Sym, Elf32_Rela, Elf32_Rel, Elf32_Dyn, Extractor32_t>;
template class MoveGlobals_t<Elf64_Sym, Elf64_Rela, Elf64_Rel, Elf64_Dyn, Extractor64_t>;
#ifndef _LIBTRANSFORM_INSTRUCTIONCOUNT_H_
#define _LIBTRANSFORM_INSTRUCTIONCOUNT_H_
#include <irdb-transform>
#include <irdb-deep>
#include <memory>
#include <map>
#include <set>
#include <tuple>
#include <exeio.h>
#include <elfio/elfio.hpp>
#include <elfio/elfio_symbols.hpp>
#include <elf.h>
using Elf_Xword = uint64_t;
using Elf_Half = uint16_t;
template <class T_Sym, class T_Rela, class T_Rel, class T_Dyn, class Extractor>
class MoveGlobals_t : public IRDB_SDK::Transform_t
{
public:
MoveGlobals_t(IRDB_SDK::VariantID_t *p_variantID,
IRDB_SDK::FileIR_t*p_variantIR,
const std::string &p_dont_move,
const std::string &p_move_only,
const int p_max_moveables,
const bool p_random,
const bool p_aggrssive,
const bool p_use_stars=false);
int execute(IRDB_SDK::pqxxDB_t &pqxx_interface);
private:
// MEDS_Annotation::MEDS_Annotations_t& getAnnotations();
void ParseSyms(EXEIO::exeio * reader);
void SetupScoopMap();
void FilterScoops();
void TieScoops();
void FindInstructionReferences();
void FindDataReferences();
void UpdateScoopLocations();
void FilterAndCoalesceTiedScoops();
IRDB_SDK::Relocation_t* FindRelocationWithType(IRDB_SDK::BaseObj_t* obj, std::string type);
void PrintStats();
void HandleMemoryOperand(IRDB_SDK::DecodedInstruction_t &disasm, const IRDB_SDK::DecodedOperandVector_t::iterator the_arg,
IRDB_SDK::Instruction_t* insn, const IRDB_SDK::DecodedOperandVector_t &the_arg_container);
void HandleImmediateOperand(const IRDB_SDK::DecodedInstruction_t& disasm, const IRDB_SDK::DecodedOperandVector_t::iterator the_arg, IRDB_SDK::Instruction_t* insn);
IRDB_SDK::DataScoop_t* DetectAnnotationScoop(IRDB_SDK::Instruction_t* insn);
IRDB_SDK::DataScoop_t* DetectProperScoop(const IRDB_SDK::DecodedInstruction_t& disasm, const IRDB_SDK::DecodedOperandVector_t::iterator the_arg, IRDB_SDK::Instruction_t* insn,
IRDB_SDK::VirtualOffset_t immed_addr, bool immed, const IRDB_SDK::DecodedOperandVector_t &the_arg_container);
IRDB_SDK::DataScoop_t* DetectProperScoop_ConsiderEndOfPrev(const IRDB_SDK::DecodedInstruction_t& disasm, const IRDB_SDK::DecodedOperandVector_t::iterator the_arg, IRDB_SDK::Instruction_t* insn,
IRDB_SDK::VirtualOffset_t insn_addr, bool immed, IRDB_SDK::DataScoop_t* ret, const IRDB_SDK::DecodedOperandVector_t &the_arg_container);
IRDB_SDK::DataScoop_t* DetectProperScoop_ConsiderStartOfNext(const IRDB_SDK::DecodedInstruction_t& disasm, const IRDB_SDK::DecodedOperandVector_t::iterator the_arg, IRDB_SDK::Instruction_t* insn,
IRDB_SDK::VirtualOffset_t insn_addr, bool immed, IRDB_SDK::DataScoop_t* cand_scoop, const IRDB_SDK::DecodedOperandVector_t &the_arg_container);
void ApplyImmediateRelocation(IRDB_SDK::Instruction_t *insn, IRDB_SDK::DataScoop_t* to);
void ApplyAbsoluteMemoryRelocation(IRDB_SDK::Instruction_t *insn, IRDB_SDK::DataScoop_t* to);
void ApplyPcrelMemoryRelocation(IRDB_SDK::Instruction_t *insn, IRDB_SDK::DataScoop_t* to);
void ApplyDataRelocation(IRDB_SDK::DataScoop_t *from, unsigned int offset, IRDB_SDK::DataScoop_t* to);
IRDB_SDK::DataScoop_t* findScoopByAddress(const IRDB_SDK::VirtualOffset_t a) const;
bool AreScoopsAdjacent(const IRDB_SDK::DataScoop_t *a, const IRDB_SDK::DataScoop_t *b) const;
std::vector<T_Sym> static_symbols;
std::vector<T_Sym> dynamic_symbols;
EXEIO::exeio* exe_reader;
struct cmpByName {
bool operator()(const IRDB_SDK::DataScoop_t* a, const IRDB_SDK::DataScoop_t* b) const {
return (a->getName() < b->getName());
}
};
std::set<IRDB_SDK::DataScoop_t*> moveable_scoops;
std::map<IRDB_SDK::DataScoop_t*,unsigned int> reasons;
using ScoopPair_t = std::pair<IRDB_SDK::DataScoop_t*,IRDB_SDK::DataScoop_t*>;
std::set<ScoopPair_t> tied_scoops;
// sets to record what insns need to be fixed later.
struct Insn_fixup
{
IRDB_SDK::Instruction_t* from; IRDB_SDK::DataScoop_t* to;
bool operator <(const struct Insn_fixup& rhs) const
{
return std::tie(from, to) < std::tie(rhs.from, rhs.to);
}
};
using Insn_fixup_t = struct Insn_fixup;
std::set<Insn_fixup_t> pcrel_refs_to_scoops, absolute_refs_to_scoops, immed_refs_to_scoops;
// data references to scoops
struct Scoop_fixup
{
IRDB_SDK::DataScoop_t* from; unsigned int offset; IRDB_SDK::DataScoop_t* to;
bool operator <(const struct Scoop_fixup & rhs) const
{
return std::tie(from, offset, to) < std::tie(rhs.from, rhs.offset, rhs.to);
}
};
using Scoop_fixup_t = struct Scoop_fixup;;
std::set<Scoop_fixup_t> data_refs_to_scoops;
int tied_unpinned;
int tied_pinned;
int tied_nochange;
int ties_for_folded_constants;
const std::string dont_move;
const std::string move_only;
using RangePair_t = std::pair<IRDB_SDK::VirtualOffset_t,IRDB_SDK::VirtualOffset_t>;
struct cmpByRange
{
bool operator()(const RangePair_t& a, const RangePair_t& b) const {
return (a.second < b.first);
}
};
std::map<RangePair_t, IRDB_SDK::DataScoop_t*, cmpByRange> scoop_map;
const int max_moveables;
const bool random;
const bool aggressive;
const bool m_use_stars;
std::unique_ptr<IRDB_SDK::StaticGlobalStartMap_t > deep_global_static_ranges;
std::unique_ptr<IRDB_SDK::RangeSentinelSet_t> sentinels;
};
class Extractor64_t
{
public:
Elf_Xword elf_r_sym (Elf_Xword a) { return ELF64_R_SYM (a); }
Elf_Xword elf_r_type(Elf_Xword a) { return ELF64_R_TYPE(a); }
unsigned char elf_st_bind(unsigned char a) { return ELF64_ST_BIND(a); }
unsigned char elf_st_type(unsigned char a) { return ELF64_ST_TYPE(a); }
};
class Extractor32_t
{
public:
Elf32_Word elf_r_sym (Elf32_Word a) { return ELF32_R_SYM (a); }
Elf32_Word elf_r_type(Elf32_Word a) { return ELF32_R_TYPE(a); }
unsigned char elf_st_bind(unsigned char a) { return ELF32_ST_BIND(a); }
unsigned char elf_st_type(unsigned char a) { return ELF32_ST_TYPE(a); }
};
const static auto elftable_names= std::set<std::string> ({".dynamic",".got",".got.plt",".dynstr",".dynsym",".rel.dyn",".rela.dyn",".rel.plt",".rela.plt", ".gnu.version", ".gnu_version_r"});
const static auto elftable_nocodeptr_names= std::set<std::string> ({".dynamic"});
using MoveGlobals32_t = class MoveGlobals_t<Elf32_Sym, Elf32_Rela, Elf32_Rel, Elf32_Dyn, Extractor32_t>;
using MoveGlobals64_t = class MoveGlobals_t<Elf64_Sym, Elf64_Rela, Elf64_Rel, Elf64_Dyn, Extractor64_t>;
#endif
#include <stdlib.h>
#include <fstream>
#include <irdb-core>
#include "mg.hpp"
#include <getopt.h>
using namespace std;
using namespace IRDB_SDK;
#define ALLOF(a) begin(a),end(a)
class MoveGlobalsDriver_t : public TransformStep_t
{
void usage(string programName)
{
auto et_names=string();
for(const auto &str : elftable_names )
et_names+=str+" ";
cerr << "Usage: " << programName << " <variant id> <annotation file>" <<endl;
cout<<"--elftables-only,-o Move sections titled \""<<et_names<<"\""<<endl;
cout<<"--aggressive Use aggressive heuristics to move more variables (does not affect elftables) "<<endl;
cout<<"--no-conservative alias for --aggressive"<<endl;
cout<<"--conservative (default) Use conservative heuristics to increase reliability (does not affect elftables) "<<endl;
cout<<"--no-aggressive alias for --conservative"<<endl;
cout<<endl;
cout<<"---- debugging options (power users only) "<<endl;
cout<<"--use-stars Enable STARS deep analysis for more analysis precision (current default). "<<endl;
cout<<"--no-use-stars Disable STARS deep analysis for more analysis precision. "<<endl;
cout<<"--move,-m Move only the given objects."<<endl;
cout<<"--dont,-d Dont move the listed objects (overrides --only)."<<endl;
cout<<"--number,-n Max number of scoops to move."<<endl;
cout<<"--random,-r Randomly select scoops to move."<<endl;
cout<<"--help,--usage,-?,-h Display this message"<<endl;
}
DatabaseID_t variantID = BaseObj_t::NOT_IN_DATABASE;
string dont_move = "";
string move_only = "";
size_t max_moveables = 0;
bool random = false;
bool aggressive = false;
VariantID_t* pidp = nullptr;
const string programName=string("libmove_globals.so");
bool use_stars=true;
int parseArgs(const vector<string> step_args)
{
auto argv = vector<char*>({const_cast<char*>("libmove_globals.so")});
transform(ALLOF(step_args), back_inserter(argv), [](const string &s) -> char* { return const_cast<char*>(s.c_str()); } );
const auto argc = argv.size();
// Parse some options for the transform
const static struct option long_options[] = {
{"elftables-only", no_argument, 0, 'o'},
{"use-stars", no_argument, 0, 's'},
{"no-use-stars", no_argument, 0, 't'},
{"move", required_argument, 0, 'm'},
{"dont", required_argument, 0, 'd'},
{"number", required_argument, 0, 'n'},
{"aggressive", no_argument, 0, 'a'},
{"no-aggressive", no_argument, 0, 'A'},
{"conservative", no_argument, 0, 'A'},
{"no-conservative", no_argument, 0, 'a'},
{"random", no_argument, 0, 'r'},
{"help", no_argument, 0, 'h'},
{"usage", no_argument, 0, '?'},
{0,0,0,0}
};
auto short_opts="b:oh?m:d:n:aAst";
while(1)
{
int c = getopt_long(argc, &argv[0], short_opts, long_options, nullptr);
if (c == -1)
break;
switch(c) {
case 0:
break;
case 'c':
case 'o':
// add elftables to move only list
for(const auto &str : elftable_names )
move_only+= str+" ";
break;
case 's':
use_stars=true;
break;
case 't':
use_stars=false;
break;
case 'm':
move_only+=string(" ") + optarg;
break;
case 'd':
dont_move+=string(" ") + optarg;
break;
case 'n':
max_moveables+=strtoll(optarg,nullptr,0);
break;
case 'r':
random=true;
break;
case 'a':
cout<<"Setting aggressive mode"<<endl;
aggressive=true;
break;
case 'A':
cout<<"Setting conservative mode"<<endl;
aggressive=false;
break;
case '?':
case 'h':
usage("libmove_globals.so");
return 1;
default:
break;
}
}
return 0;
}
int executeStep()
{
variantID=getVariantID();
auto irdb_objects=getIRDBObjects();
auto exit_code = (int) 0;
/* setup the interface to the sql server */
const auto pqxx_interface=irdb_objects->getDBInterface();
BaseObj_t::setInterface(pqxx_interface);
// get the variant info from the database
pidp=irdb_objects->addVariant(variantID); // new VariantID_t(variantID);
assert(pidp && pidp->isRegistered()==true);
auto transformExitCode = (int) 0;
for(auto this_file : pidp->getFiles())
{
try
{
/* read the IR from the DB */
auto firp = irdb_objects->addFileIR(variantID, this_file->getBaseID()); // new FileIR_t(*pidp, this_file);
cout<<"Transforming "<<this_file->getURL()<<endl;
assert(firp && pidp);
/*
* Create a transformation and then
* invoke its execution.
*/
if (firp->getArchitectureBitWidth() == 64)
{
MoveGlobals64_t mg(pidp, firp, dont_move, move_only, max_moveables, random, aggressive, use_stars);
transformExitCode = mg.execute(*pqxx_interface);
}
else
{
MoveGlobals32_t mg(pidp, firp, dont_move, move_only, max_moveables, random, aggressive, use_stars);
transformExitCode = mg.execute(*pqxx_interface);
}
/*
* If everything about the transformation
* went okay, then we will write the updated
* set of instructions to the database.
*/
if (transformExitCode != 0)
{
cerr << programName << ": transform failed. Check logs." << endl;
exit_code=2;
}
}
catch (DatabaseError_t pnide)
{
cerr << programName << ": Unexpected database error: " << pnide << endl;
return 1;
}
catch (const std::exception &exc)
{
// catch anything thrown within try block that derives from std::exception
std::cerr << "Unexpected exception: " << exc.what();
return 1;
}
catch (...)
{
cerr << programName << ": Unexpected error" << endl;
return 1;
}
}
return exit_code;
}
std::string getStepName(void) const override
{
return std::string("move_globals");
}
};
extern "C"
shared_ptr<TransformStep_t> getTransformStep(void)
{
return shared_ptr<TransformStep_t>(new MoveGlobalsDriver_t());
}
#!/bin/bash
# the bad boys
#benchmarks="
# 400.perlbench
# 403.gcc
# 445.gobmk
# 450.soplex
# 453.povray
# 458.sjeng
# 464.h264ref
# 465.tonto
# 471.omnetpp
# 481.wrf
# 482.sphinx3
# 483.xalancbmk
# "
# all
all_benchmarks="400.perlbench 401.bzip2 403.gcc 410.bwaves 416.gamess 429.mcf 433.milc 434.zeusmp 435.gromacs 436.cactusADM 437.leslie3d 444.namd 445.gobmk 450.soplex 453.povray 454.calculix 456.hmmer 458.sjeng 459.GemsFDTD 462.libquantum 464.h264ref 465.tonto 470.lbm 471.omnetpp 473.astar 481.wrf 482.sphinx3 483.xalancbmk"
number=1
setup()
{
if [ ! -d spec2006 ]; then
# svn co ^/spec2006/trunk spec2006
git clone --depth 1 http://git.zephyr-software.com/allzp/spec2006.git spec2006
fi
if [[ ! -f /usr/bin/gfortran ]]; then
sudo apt-get install gfortran -y
fi
cd spec2006/
if [ ! -L bin ]; then
ln -s bin.power/ bin
fi
source shrc
bin/relocate
}
run_test()
{
config_name=$1
config=$2
benchmarks="$3"
all_configs_that_were_run="$all_configs_that_were_run $config_name"
cd $SPEC
if [ ! -d result.$config_name ]; then
dropdb $PGDATABASE
createdb $PGDATABASE
$PEASOUP_HOME/tools/db/pdb_setup.sh
rm -Rf result/*
runspec --action scrub --config $config $benchmarks
echo
echo "**************************************************************************"
echo "Starting test of $config_name"
echo "**************************************************************************"
echo
runspec --action validate --config $config -n $number $benchmarks
cp benchspec/CPU2006/*/exe/* result
mv result result.$config_name
for bench in $benchmarks
do
mv benchspec/CPU2006/$bench/run/build*/peasoup*/logs result.$config_name/$bench.log
done
fi
}
get_size_result()
{
bench=$1
if [ -e $bench ]; then
size=$(stat --printf="%s" $bench)
#echo -n "$size"
#LC_ALL= numfmt --grouping $size
#LC_ALL= printf "%'d" $size
#LC_NUMERIC=en_US printf "%'d" $size
#LC_NUMERIC=en_US printf "%'f" $size
#LC_NUMERIC=en_US printf "%'.f" $size
#LC_NUMERIC=en_US printf "%'10.10f" $size
#LC_NUMERIC=en_US /usr/bin/printf "%'d" $size
echo $size
else
echo -n "0"
fi
}
get_result()
{
bench=$1
config=$2
results=$(cat $SPEC/result.$config/CPU2006.002.log|grep Success|grep $bench|grep ratio=|sed 's/.*ratio=//'|sed 's/,.*//')
sum=0
count=0
for res in $results
do
sum=$(echo $sum + $res | bc)
count=$(echo $count + 1 | bc)
done
#echo sum=$sum
#echo count=$count
res=$(echo "scale=2; $sum / $count" | bc 2> /dev/null )
count=$(echo $res|wc -w)
if [ $count = 1 ]; then
echo -n $res
else
echo -n "0"
fi
}
# global
all_configs_that_were_run=""
get_raw_results()
{
local configs="$all_configs_that_were_run"
get_raw_perf_results "$configs"
get_raw_size_results "$configs"
get_raw_mg_results "$configs"
#get_raw_fde_results "$configs"
}
get_raw_perf_results()
{
configs=$*
first_config=$1
echo "--------------------------------------------------------------"
echo "Performance results are:"
echo "--------------------------------------------------------------"
echo benchmark $configs
for bench in $benchmarks
do
echo -n "$bench "
for config in $*
do
get_result $bench $config
echo -n " "
done
echo
done
}
get_raw_size_results()
{
echo "--------------------------------------------------------------"
echo "Size results are:"
echo "--------------------------------------------------------------"
configs=$*
echo benchmark $configs
for bench in $SPEC/result.$first_config/*_base.amd64-m64-gcc42-nn
do
echo -n "$(basename $bench _base.amd64-m64-gcc42-nn) "
for config in $*
do
if [[ $config == "baseline" ]]; then
file="$SPEC/result.$config/$(basename $bench)"
cp $file /tmp/foo.exe
strip /tmp/foo.exe
file="/tmp/foo.exe"
else
file="$SPEC/result.$config/$(basename $bench)"
fi
res=$(get_size_result $file)
#printf "%15s" $res
echo -n " $res"
done
echo
done
}
get_raw_mg_results()
{
echo "--------------------------------------------------------------"
echo "Move_globals unmoveable scoop count:"
echo "--------------------------------------------------------------"
configs=$*
echo benchmark $configs
for bench in $benchmarks
do
echo -n $bench
for config in $*
do
file="$SPEC/result.$config/${bench}.log/move_globals.log"
res="N/A"
if [[ -f $file ]]; then
res=$(grep "ATTRIBUTE mg::unmoveable_scoops=" $file|sed "s|.*=||")
fi
echo -n " $res"
done
echo
done
echo "--------------------------------------------------------------"
echo "Move_globals moveable scoop count:"
echo "--------------------------------------------------------------"
configs=$*
echo benchmark $configs
for bench in $benchmarks
do
echo -n $bench
for config in $*
do
file="$SPEC/result.$config/${bench}.log/move_globals.log"
res="N/A"
if [[ -f $file ]]; then
res=$(grep "ATTRIBUTE mg::moveable_scoops=" $file|sed "s|.*=||")
fi
echo -n " $res"
done
echo
done
}
get_raw_fde_results()
{
echo "--------------------------------------------------------------"
echo "FDE results are:"
echo "--------------------------------------------------------------"
configs=$*
echo benchmark $configs
for bench in $SPEC/result.$first_config/*_base.amd64-m64-gcc42-nn
do
#printf "%-20s" $(basename $bench _base.amd64-m64-gcc42-nn)
echo -n $(basename $bench _base.amd64-m64-gcc42-nn)
for config in $*
do
file="$SPEC/result.$config/$(basename $bench)"
res=$(readelf -w $file |grep FDE|wc -l )
#if [[ $config == "baseline" ]]; then
#else
#fi
#printf "%15s" $res
echo -n " $res"
done
echo
done
}
main()
{
zipr_flags=" --backend zipr --step-option zipr:--add-sections --step-option zipr:true"
trace_flags=" --step-option zipr:--traceplacement:on --step-option zipr:true"
relax_flags=" --step-option zipr:--relax:on --step-option zipr:true --step-option zipr:--unpin:on --step-option zipr:false"
nounpin_flags=" --step-option zipr:--unpin:on --step-option zipr:false"
split_flags=" --step-option fill_in_indtargs:--split-eh-frame "
icall_flags=" --step-option fix_calls:--no-fix-icalls "
p1flags=" --critical-step p1transform=on "
mg_flags_elftables=" --critical-step move_globals=on --step-option move_globals:--elftables"
mg_flags_conservative=" --critical-step move_globals=on "
mg_aggressive_flags=" --critical-step move_globals=on --step-option move_globals:--aggressive "
start_dir=$(pwd)
setup
run_test baseline $SPEC/config/ubuntu14.04lts-64bit.cfg "$all_benchmarks"
run_test baseline-fpic $SPEC/config/ubuntu14.04lts-64bit-fpic.cfg "$all_benchmarks"
# should be 100% success, tested by jdh on 8/28/17 as 100% success.
#PSOPTS="$zipr_flags " run_test zipr $SPEC/config/ubuntu14.04lts-64bit-withps.cfg "$all_benchmarks"
PSOPTS="$zipr_flags $trace_flags " run_test zipr-trace $SPEC/config/ubuntu14.04lts-64bit-withps.cfg "$all_benchmarks"
PSOPTS="$zipr_flags $trace_flags " run_test zipr-trace-fpic $SPEC/config/ubuntu14.04lts-64bit-fpic-withps.cfg "$all_benchmarks"
PSOPTS="$zipr_flags $trace_flags $mg_flags_conservative" run_test mg-conservative-trace $SPEC/config/ubuntu14.04lts-64bit-withps.cfg "$all_benchmarks"
PSOPTS="$zipr_flags $trace_flags $mg_flags_conservative" run_test mg-conservative-trace-fpic $SPEC/config/ubuntu14.04lts-64bit-fpic-withps.cfg "$all_benchmarks"
PSOPTS="$zipr_flags $trace_flags $mg_flags_all $mg_aggressive_flags" run_test mg-aggressive-trace $SPEC/config/ubuntu14.04lts-64bit-withps.cfg "$all_benchmarks"
PSOPTS="$zipr_flags $trace_flags $mg_flags_all $mg_aggressive_flags" run_test mg-aggressive-trace-fpic $SPEC/config/ubuntu14.04lts-64bit-fpic-withps.cfg "$all_benchmarks"
PSOPTS="$zipr_flags $trace_flags $mg_flags_elftables" run_test mgelftables-trace $SPEC/config/ubuntu14.04lts-64bit-withps.cfg "$all_benchmarks"
get_raw_results
# get_raw_results baseline zipr zipr-trace split-no-fix-icalls split-no-fix-icalls-trace zipr-trace-p1 split-trace-p1 split-no-fix-icalls-trace-p1 zipr-trace-p1-rerun
}
main "$@"
/*
* Copyright (c) 2013, 2014 - University of Virginia
*
* This file may be used and modified for non-commercial purposes as long as
* all copyright, permission, and nonwarranty notices are preserved.
* Redistribution is prohibited without prior written consent from the University
* of Virginia.
*
* Please contact the authors for restrictions applying to commercial use.
*
* THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*
* Author: University of Virginia
* e-mail: jwd@virginia.com
* URL : http://www.cs.virginia.edu/
*
*/
#include "AnnotationBoundaryGenerator.hpp"
#include <map>
#include <iostream>
using namespace std;
using namespace IRDB_SDK;
//using namespace MEDS_Annotation;
vector<Range> AnnotationBoundaryGenerator::GetBoundaries(IRDB_SDK::Function_t *func)
{
assert(0);
#if 0
vector<Range> ranges;
// std::multimap<VirtualOffset, MEDS_AnnotationBase>
MEDS_Annotations_t annotations = annotParser->getAnnotations();
for(
set<Instruction_t*>::const_iterator it=func->getInstructions().begin();
it!=func->getInstructions().end();
++it
)
{
Instruction_t* instr = *it;
VirtualOffset_t irdb_vo = instr->getAddress()->getVirtualOffset();
if (irdb_vo == 0) continue;
VirtualOffset vo(irdb_vo);
//std::pair<std::multimap<VirtualOffset, MEDS_AnnotationBase>::iterator,std::multimap<VirtualOffset, MEDS_AnnotationBase>::iterator> ret;
std::pair<MEDS_Annotations_t::iterator,MEDS_Annotations_t::iterator> ret;
ret = annotations.equal_range(vo);
MEDS_InstructionCheckAnnotation annotation;
MEDS_InstructionCheckAnnotation* p_annotation;
for (MEDS_Annotations_t::iterator it = ret.first; it != ret.second; ++it)
{
p_annotation=dynamic_cast<MEDS_InstructionCheckAnnotation*>(it->second);
if(p_annotation==NULL)
continue;
annotation = *p_annotation;
if (annotation.isValid() && annotation.isMemset())
{
//cerr<<"Memset annot found"<<endl;
int objectSize = annotation.getObjectSize();
int offset = annotation.getStackOffset();
Range cur;
cur.SetOffset(offset);
cur.SetSize(objectSize);
if (annotation.isEbpOffset())
{
if(offset < 0)
{
ranges.push_back(cur);
}
} else if (annotation.isEspOffset())
{
if(offset >= 0)
{
ranges.push_back(cur);
}
} else
{
// something went wrong
assert(0);
}
}
}
}
return ranges;
#endif
}
/*
* Copyright (c) 2013, 2014 - University of Virginia
*
* This file may be used and modified for non-commercial purposes as long as
* all copyright, permission, and nonwarranty notices are preserved.
* Redistribution is prohibited without prior written consent from the University
* of Virginia.
*
* Please contact the authors for restrictions applying to commercial use.
*
* THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*
* Author: University of Virginia
* e-mail: jwd@virginia.com
* URL : http://www.cs.virginia.edu/
*
*/
#ifndef __ANNOTBOUNDGEN
#define __ANNOTBOUNDGEN
#include "PrecedenceBoundaryGenerator.hpp"
//#include "MEDS_AnnotationParser.hpp"
//#include "MEDS_AnnotationBase.hpp"
//#include "MEDS_InstructionCheckAnnotation.hpp"
// #include "VirtualOffset.hpp"
#include <fstream>
class AnnotationBoundaryGenerator : public PrecedenceBoundaryGenerator
{
protected:
// MEDS_Annotation::MEDS_AnnotationParser *annotParser;
public:
// AnnotationBoundaryGenerator(MEDS_Annotation::MEDS_AnnotationParser *annotParser) : annotParser(annotParser){}
virtual std::vector<Range> GetBoundaries(IRDB_SDK::Function_t *func);
};
#endif
/*
* Copyright (c) 2013, 2014 - University of Virginia
*
* This file may be used and modified for non-commercial purposes as long as
* all copyright, permission, and nonwarranty notices are preserved.
* Redistribution is prohibited without prior written consent from the University
* of Virginia.
*
* Please contact the authors for restrictions applying to commercial use.
*
* THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*
* Author: University of Virginia
* e-mail: jwd@virginia.com
* URL : http://www.cs.virginia.edu/
*
*/
#include "DirectOffsetInference.hpp"
using namespace std;
using namespace IRDB_SDK;
DirectOffsetInference::DirectOffsetInference(OffsetInference *offset_inference)
{
//TODO: throw exception
assert(offset_inference != NULL);
this->offset_inference = offset_inference;
}
PNStackLayout* DirectOffsetInference::GetPNStackLayout(Function_t *func)
{
return offset_inference->GetDirectAccessLayout(func);
}
std::string DirectOffsetInference::GetInferenceName() const
{
return "Direct Offset Inference";
}
/*
* Copyright (c) 2013, 2014 - University of Virginia
*
* This file may be used and modified for non-commercial purposes as long as
* all copyright, permission, and nonwarranty notices are preserved.
* Redistribution is prohibited without prior written consent from the University
* of Virginia.
*
* Please contact the authors for restrictions applying to commercial use.
*
* THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*
* Author: University of Virginia
* e-mail: jwd@virginia.com
* URL : http://www.cs.virginia.edu/
*
*/
#ifndef __DIRECTOFFSETINFERENCE
#define __DIRECTOFFSETINFERENCE
#include "OffsetInference.hpp"
#include "PNStackLayoutInference.hpp"
class DirectOffsetInference : public PNStackLayoutInference
{
protected:
OffsetInference *offset_inference;
public:
DirectOffsetInference(OffsetInference *offset_inference);
virtual PNStackLayout* GetPNStackLayout(IRDB_SDK::Function_t *func);
virtual std::string GetInferenceName() const;
};
#endif
#include "EhUpdater.hpp"
#include <string>
#include <map>
using namespace std;
using namespace IRDB_SDK;
extern map<Function_t*, set<Instruction_t*> > inserted_instr; //used to undo inserted instructions
// see https://en.wikipedia.org/wiki/LEB128
static bool read_uleb128
( uint64_t &result,
uint32_t& position,
const uint8_t* const data,
const uint32_t max)
{
result = 0;
auto shift = 0;
while( position < max )
{
auto byte = data[position];
position++;
result |= ( ( byte & 0x7f ) << shift);
if ( ( byte & 0x80) == 0)
break;
shift += 7;
}
return ( position >= max );
}
static string to_uleb128 (uint64_t value)
{
auto output_str=string("");
do
{
auto byte = value&0x7f; // low order 7 bits of value;
value >>= 7;
if (value != 0) // more bytes to come
byte |= 0x80; // set high order bit of byte;
output_str.push_back(byte);
} while (value != 0);
return output_str;
}
/* transform any eh handling info for the FDE program*/
bool EhUpdater_t::update_program(EhProgram_t* ehpgm)
{
assert(ehpgm);
const auto daf=ehpgm->getDataAlignmentFactor();
const auto saved_reg_size=m_layout->GetSavedRegsSize();
const auto orig_frame_size=m_layout->GetOriginalAllocSize();
const auto altered_frame_size=m_layout->GetAlteredAllocSize();
/* change the offset, as needed, in a dwarf instruction. the offset is at location 'pos' */
const auto change_offset=[&](string &dwarf_insn, const uint32_t offset_pos, const bool factored) -> void
{
/* handle */
/* format: [(char)(opcode+reg#)] [(uleb128) offset/data_alignment_factor] */
/* we need to adjust factored offset if it's greater than the saved reg size */
auto factored_offset=uint64_t(0);
auto pos_to_read=(uint32_t)1;
const auto data=reinterpret_cast<const uint8_t*>(dwarf_insn.data());
const auto res=read_uleb128(factored_offset,pos_to_read,data, dwarf_insn.size());
assert(res);
auto offset=factored_offset;
if(factored)
offset*=daf;
if(offset>saved_reg_size)
{
const auto new_offset=offset+(altered_frame_size-orig_frame_size);
auto factored_new_offset=new_offset;
if(factored)
factored_new_offset/=daf;
const auto encoded_factored_new_offset=to_uleb128(factored_new_offset);
auto new_dwarf_insn=string("");
for(auto i=0U;i<offset_pos;i++)
new_dwarf_insn.push_back(dwarf_insn[i]);
new_dwarf_insn+=encoded_factored_new_offset;
dwarf_insn=new_dwarf_insn;
}
};
auto tmppgm = ehpgm->getFDEProgram();
for(auto &dwarf_insn : tmppgm)
{
auto opcode=dwarf_insn[0];
auto opcode_upper2=(uint8_t)(opcode >> 6);
auto opcode_lower6=(uint8_t)(opcode & (0x3f));
switch(opcode_upper2)
{
/* case DW_CFA_offset: */
/* reg should be restored from CFA+(offset*daf) */
case 0x2: /* DW_CFA_offset: */
{
change_offset(dwarf_insn,1, true);
break;
};
case 0:
{
switch(opcode_lower6)
{
/* sanitize */
case 0xd: /* DW_CFA_def_cfa_register: */
{
/* [ (char)opcode ] [ (uleb)register ] */
/* assert if register != sp (if bp not used) or bp (if bp is used) */
/* never update this insn */
assert(0);
}
/* handle */
case 0xe: /* DW_CFA_def_cfa_offset: */
{
/* [(char)opcode] [(uleb)offset] */
/* if offset > saved reg size, new_offset=offset+(new_frame_size-old_frame_size) */
change_offset(dwarf_insn,1,false);
break;
}
case 0x11: /*DW_CFA_def_cfa_offset_sf: */
{
/* [(char)opcode] [(sleb)offset/data_alignment_factor] */
/* if offset > saved reg size, new_offset=offset+(new_frame_size-old_frame_size) */
assert(0);
}
case 0x5: /*DW_CFA_offset_extended: */
{
/* [ (char)opcode ] [ (uleb)reg # ] [ (uleb) offset] */
/* we need to adjust factored offset if it's greater than the saved reg size */
change_offset(dwarf_insn,2,true);
break;
}
default:
break;
}
}
default:
break;
}
}
ehpgm->setFDEProgram(tmppgm);
return true;
}
/* transform any eh handling info for the instruction */
bool EhUpdater_t::update_instructions(Instruction_t* insn)
{
const auto ehpgm=insn->getEhProgram();
/* no program == no update */
if(ehpgm==NULL)
return true;
const auto new_eh_pgm=m_firp->copyEhProgram(*ehpgm);
// const auto new_eh_pgm=new EhProgram_t(*ehpgm);
// m_firp->GetAllEhPrograms().insert(new_eh_pgm);
insn->setEhProgram(new_eh_pgm);
return update_program(new_eh_pgm);
}
/* transform any eh handling info for each instruction */
bool EhUpdater_t::update_instructions()
{
// for all instructions in the fuunction.
for(const auto& i: m_func->getInstructions())
update_instructions(i);
// plus any instructions we allocated for the function
// inserted instrucctions are now part of m_func->getInstructions() -- were we updating this frame twice?
// for(const auto& i: inserted_instr[m_func])
// update_instructions(i);
return true;
}
/* transform any eh handling info for the function */
bool EhUpdater_t::execute()
{
if(m_func->getUseFramePointer()) /* no updates needed if a frame pointer is used */
return true;
/* can only update for p1 functions */
if( ! m_layout->IsP1() )
return false;
return update_instructions();
}
#ifndef EhUpdater
#define EhUpdater
#include <irdb-core>
#include "PNStackLayout.hpp"
class EhUpdater_t
{
public:
EhUpdater_t(IRDB_SDK::FileIR_t* p_firp, IRDB_SDK::Function_t* p_func, PNStackLayout* p_layout)
:
m_firp(p_firp),
m_func(p_func),
m_layout(p_layout)
{
}
bool execute();
private:
bool update_instructions();
bool update_instructions(IRDB_SDK::Instruction_t* insn);
bool update_program(IRDB_SDK::EhProgram_t* ehpgm);
IRDB_SDK::FileIR_t* m_firp;
IRDB_SDK::Function_t* m_func;
PNStackLayout* m_layout;
};
#endif
/*
* Copyright (c) 2013, 2014 - University of Virginia
*
* This file may be used and modified for non-commercial purposes as long as
* all copyright, permission, and nonwarranty notices are preserved.
* Redistribution is prohibited without prior written consent from the University
* of Virginia.
*
* Please contact the authors for restrictions applying to commercial use.
*
* THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*
* Author: University of Virginia
* e-mail: jwd@virginia.com
* URL : http://www.cs.virginia.edu/
*
*/
#include "General_Utility.hpp"
#include <limits.h>
#include <cstdlib>
#include <cerrno>
using namespace std;
STR2NUM_ERROR str2int (int &i, char const *s, int base)
{
char *end;
long l;
errno = 0;
l = strtol(s, &end, base);
if ((errno == ERANGE && l == LONG_MAX) || l > INT_MAX) {
return STR2_OVERFLOW;
}
if ((errno == ERANGE && l == LONG_MIN) || l < INT_MIN) {
return STR2_UNDERFLOW;
}
if (*s == '\0' || *end != '\0') {
return STR2_INCONVERTIBLE;
}
i = l;
return STR2_SUCCESS;
}
//TODO: what if the string represents a negative number? Currently
//the number will be translated into an unsigned int. I could make this
//and incovertible situation.
STR2NUM_ERROR str2uint (unsigned int &i, char const *s, int base)
{
char *end;
unsigned long l;
errno = 0;
l = strtoul(s, &end, base);
if ((errno == ERANGE && l == ULONG_MAX) || l > UINT_MAX) {
return STR2_OVERFLOW;
}
if (*s == '\0' || *end != '\0') {
return STR2_INCONVERTIBLE;
}
i = l;
return STR2_SUCCESS;
}
/*
* Copyright (c) 2013, 2014 - University of Virginia
*
* This file may be used and modified for non-commercial purposes as long as
* all copyright, permission, and nonwarranty notices are preserved.
* Redistribution is prohibited without prior written consent from the University
* of Virginia.
*
* Please contact the authors for restrictions applying to commercial use.
*
* THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*
* Author: University of Virginia
* e-mail: jwd@virginia.com
* URL : http://www.cs.virginia.edu/
*
*/
#ifndef _GENERAL_UTILITY
#define _GENERAL_UTILITY
enum STR2NUM_ERROR { STR2_SUCCESS, STR2_OVERFLOW, STR2_UNDERFLOW, STR2_INCONVERTIBLE };
STR2NUM_ERROR str2int (int &i, char const *s, int base = 0);
STR2NUM_ERROR str2uint (unsigned int &i, char const *s, int base = 0);
#endif
/*
* Copyright (c) 2013, 2014 - University of Virginia
*
* This file may be used and modified for non-commercial purposes as long as
* all copyright, permission, and nonwarranty notices are preserved.
* Redistribution is prohibited without prior written consent from the University
* of Virginia.
*
* Please contact the authors for restrictions applying to commercial use.
*
* THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*
* Author: University of Virginia
* e-mail: jwd@virginia.com
* URL : http://www.cs.virginia.edu/
*
*/
#include "OffsetInference.hpp"
#include "General_Utility.hpp"
//#include "beaengine/BeaEngine.h"
#include <cassert>
#include <iostream>
#include <cstdlib>
#include <set>
#include <fstream>
#include "globals.h"
using namespace std;
using namespace IRDB_SDK;
static Relocation_t* FindRelocation(Instruction_t* insn, string type)
{
RelocationSet_t::iterator rit;
for( rit=insn->getRelocations().begin(); rit!=insn->getRelocations().end(); ++rit)
{
Relocation_t& reloc=*(*rit);
if(reloc.getType()==type)
{
return &reloc;
}
}
return NULL;
}
extern int get_saved_reg_size();
//TODO: Use cfg entry point only, then use func instructions,
//TODO: matching reg expressions use max match constant
//TODO: negative offsets?
//TODO: what if func is null?
//TODO: everying operates on regex because when I first wrote this, I didn't
//know DISASM had much of this information. We should migrate to using
//this struct more. That goes for the entire PN code base as well.
//TODO: The inferences generated are highly conservative in what functions
//are considered transformable. Look at how the dealloc_flag and alloc_count
OffsetInference::~OffsetInference()
{
//It is assumed that all pointers in the maps are unique
//this is supposed to be guaranteed by the mechanisms of this
//object
//TODO: add some asserts to ensure no double delete
map<Function_t*,PNStackLayout*>::iterator it;
for(it=direct.begin();it !=direct.end();it++)
{
delete (*it).second;
}
for(it=scaled.begin();it !=scaled.end();it++)
{
delete (*it).second;
}
for(it=all_offsets.begin();it !=all_offsets.end();it++)
{
delete (*it).second;
}
}
/*
void OffsetInference::getInstructions(vector<Instruction_t*> &instructions,libIRDB::BasicBlock_t *block,set<libIRDB::BasicBlock_t*> &block_set)
{
instructions.insert(instructions.end(),block->getInstructions().begin(),block->getInstructions().end());
block_set.insert(block);
// cerr<<"OffsetInference: getInstructions(): predecessors = "<<block->GetPredecessors().size()<<" successors = "<<block->GetSuccessors().size()<<endl;
for(
set<libIRDB::BasicBlock_t*>::const_iterator it = block->GetSuccessors().begin();
it != block->GetSuccessors().end();
++it
)
{
if(block_set.find(*it) == block_set.end())
getInstructions(instructions,*it,block_set);
}
for(
set<libIRDB::BasicBlock_t*>::const_iterator it = block->GetPredecessors().begin();
it != block->GetPredecessors().end();
++it
)
{
if(block_set.find(*it) == block_set.end())
getInstructions(instructions,*it,block_set);
}
}
*/
StackLayout* OffsetInference::SetupLayout(Function_t *func)
{
unsigned int stack_frame_size = 0;
int saved_regs_size = 0;
int out_args_size = func->getOutArgsRegionSize();
bool push_frame_pointer = false;
bool save_frame_pointer = false;
Instruction_t *entry = func->getEntryPoint();
if(pn_regex==NULL)
pn_regex=new PNRegularExpressions;
// bool has_frame_pointer = false;
int max = PNRegularExpressions::MAX_MATCHES;
// regmatch_t pmatch[max];
// regmatch_t *pmatch=(regmatch_t*)malloc(max*sizeof(regmatch_t));
regmatch_t *pmatch=new regmatch_t[max];
memset(pmatch, 0,sizeof(regmatch_t) * max);
assert(out_args_size >=0);
//TODO: find the fallthrough of the entry block, and loop to it if necessary.
/*
for(
vector<Instruction_t*>::const_iterator it=entry->getInstructions().begin();
it!=entry->getInstructions().end();
++it
)
*/
string disasm_str;
//loop through fallthroughs of the entry (entry will be update on every iteration)
//until entry is null, or entry has left the function.
while(entry != NULL && (entry->getFunction()==func))
{
in_prologue[entry]=true;
string matched;
//Instruction_t* instr=*it;
Instruction_t* instr = entry;
const auto disasmp=DecodedInstruction_t::factory(instr);
const auto &disasm=*disasmp;
disasm_str = disasm.getDisassembly(); // CompleteInstr;
if(verbose_log)
cerr << "OffsetInference: SetupLayout(): disassembled line = "<<disasm_str<< endl;
//TODO: find push ebp, then count pushes to sub esp, stack alloc size and pushed size are fed to layout objects
//TODO: for now I assume all pushes are 32 bits, is this a correct assumption?
if(regexec(&(pn_regex->regex_push_ebp), disasm_str.c_str(), max, pmatch, 0)==0)
{
if(verbose_log)
cerr << "OffsetInference: SetupLayout(): Push EBP Found"<<endl;
push_frame_pointer = true;
if(stack_frame_size != 0)
{
//TODO: handle this better
if(verbose_log)
cerr<<"OffsetInference: SetupLayout(): Stack Frame Already Allocated, Ignoring Push EBP"<<endl;
entry = entry->getFallthrough();
continue;
}
//TODO: ignoring this code for now, although it appears this code no longer
//makes sense anyway. Don't reset the saved regs if yous ee another push ebp
//just ignore it for now. EBP is usually pushed first, if it isn't
//it is likely not going to be used as a base pointer, in which case I really
//don't want to reset the saved regs count anyway. If it is a base pointer
//and pushed other than first, then I don't know how this func will work
//anyway.
// else
// {
// saved_regs_size = 0;
// }
}
else if(regexec(&(pn_regex->regex_save_fp), disasm_str.c_str(), max, pmatch, 0)==0)
{
save_frame_pointer = true;
}
else if(regexec(&(pn_regex->regex_push_anything), disasm_str.c_str(), max, pmatch, 0)==0)
{
if(verbose_log)
cerr<<"OffsetInference: SetupLayout(): Push (anything) Found"<<endl;
if(stack_frame_size != 0)
{
//TODO: handle this better
if(verbose_log)
cerr<<"OffsetInference: SetupLayout(): Stack Frame Already Allocated, Ignoring Push Instruction"<<endl;
entry = entry->getFallthrough();
continue;
}
// cerr<<"PUSH FOUND: "<<disasm.CompleteInstr<<endl;
// cerr<<"PUSH Argument1: "<<hex<<(disasm.Argument1.ArgType & 0xF0000000)<<endl;
// cerr<<"PUSH Argument2: "<<hex<<(disasm.Argument2.ArgType & 0xF0000000)<<endl;
// cerr<<"CONST_TYPE = "<<hex<<CONSTANT_TYPE<<endl;
//if the push is a constant, then check if the next instruction
//is an unconditional jmp, if so, ignore the push, assume
//the push is part of fixed calls.
if(disasm.getOperand(0)->isConstant() )
{
//Grab the pushed value
assert(pmatch[1].rm_so >=0 && pmatch[1].rm_eo >=0);
int mlen = pmatch[1].rm_eo - pmatch[1].rm_so;
matched = disasm_str.substr(pmatch[1].rm_so,mlen);
//cerr<<"DEBUG DEBUG: Disasm match: "<<disasm.CompleteInstr<<endl;
// if((it+1) != entry->getInstructions().end())
if(entry->getFallthrough() != NULL)
{
Instruction_t* next = entry->getFallthrough();
const auto next_disasmp=DecodedInstruction_t::factory(next);
const auto &next_disasm=*next_disasmp;
//cerr<<"DEBUG DEBUG: Disasm next match: "<<next_disasm.CompleteInstr<<endl;
if(next_disasm.isUnconditionalBranch() /*Instruction.BranchType == JmpType*/)
{
if(verbose_log)
cerr<<"OffsetInference: SetupLayout(): Found push matching fix calls pattern, ignoring the push (i.e., not recording the bytes pushed)."<<endl;
//find the indirect branch target instruction, and reset entry to this instruction, then continue execution of the loop.
int target_addr_offset;
assert(str2int(target_addr_offset, matched.c_str())==STR2_SUCCESS);
//TODO: it is better to make a map of ind branch targets, but this efficient enough for now.
//Setting entry to null is a primitive way of checking if the target is in the same function
//if it isn't, entry will be NULL at the end of the loop.
auto found_reloc=false;
for(RelocationSet_t::iterator rit=instr->getRelocations().begin();
rit!=instr->getRelocations().end();
++rit)
{
Relocation_t* reloc=*rit;
if(reloc->getType()==string("32-bit") || reloc->getType()==string("push64"))
{
found_reloc=true;
if(reloc->getWRT()==NULL)
{
break;
}
else
{
// getWRT returns an BaseObj, but this reloc type expects an instruction
// safe cast and check.
Instruction_t* wrt_insn=dynamic_cast<Instruction_t*>(reloc->getWRT());
assert(wrt_insn);
if(wrt_insn->getFunction() == func)
{
entry = wrt_insn;
break;
}
}
}
}
if(found_reloc)
{
entry=NULL;
for(
set<Instruction_t*>::const_iterator it=func->getInstructions().begin();
it!=func->getInstructions().end();
++it
)
{
Instruction_t *cur = *it;
if(cur->getIndirectBranchTargetAddress() == NULL)
continue;
int cur_ibta = (int)cur->getIndirectBranchTargetAddress()->getVirtualOffset();
//The target instruction is found, set entry to point to this instruction
//continue analysis from this instruction.
if(cur_ibta == target_addr_offset)
{
entry = cur;
break;
}
}
continue;
}
}
}
}
//else the push value is registered
//TODO: assuming 4 bytes here for saved regs
saved_regs_size += get_saved_reg_size();
}
else if(regexec(&(pn_regex->regex_stack_alloc), disasm_str.c_str(), max, pmatch, 0)==0)
{
if(verbose_log)
cerr << "OffsetInference: FindAllOffsets(): Found Stack Alloc"<<endl;
//TODO: Is this the way this situation should be handled?
//The first esp sub instruction is considered the stack allocation, all other subs are ignored
//Given that I return when the first one is found, this is probably a useless check.
if(stack_frame_size != 0)
{
if(verbose_log)
cerr <<"OffsetInference: FindAllOffsets(): Stack Alloc Previously Found, Ignoring Instruction"<<endl;
entry = entry->getFallthrough();
continue;
}
//extract K from: sub esp, K
if (pmatch[1].rm_so >= 0 && pmatch[1].rm_eo >= 0)
{
int mlen = pmatch[1].rm_eo - pmatch[1].rm_so;
matched = disasm_str.substr(pmatch[1].rm_so,mlen);
//extract K
//stack_frame_size = strtol(matched.c_str(),NULL,0);
if(str2uint(stack_frame_size, matched.c_str())!= STR2_SUCCESS)
{
//If this occurs, then the found stack size is not a
//constant integer, so it must be a register.
//TODO: is this what I really want to do?
if(verbose_log)
cerr<<"OffsetInference: LayoutSetup(): Found non-integral stack allocation ("<<matched<<") before integral stack allocation, generating a null layout inference for function "<<func->getName()<<endl;
return NULL;
}
//else
if(verbose_log)
cerr<<"OffsetInference: LayoutSetup(): Stack alloc Size = "<<stack_frame_size<<
" Saved Regs Size = "<<saved_regs_size<<" out args size = "<<out_args_size<<endl;
//TODO: with the new code for determine if a frame pointer exists
//I don't consider the case where the frame poitner is pushed but
//ebp is not setup like a frame pointer. In this case, ebp acts
//like a real general purpose register.
//The hack for now is to check if ebp is pushed, but the frame pointer
//is not saved. In this case, consider ebp as another saved reg
//and add to the save of the saved regs.
//When you fix this, look at PNTransformDriver in the canary_rewrite
//subroutine. You will see a case where there is a check for the frame
//pointer, and an additional 4 bytes is added. This should be removed
//in the future to only use the size of the saved regs, but
//this changes any ebp relative offset calculations to remove 4 bytes.
//Confusing, I know.
if(push_frame_pointer&&!save_frame_pointer)
saved_regs_size +=get_saved_reg_size();
//There is now enough information to create the PNStackLayout objects
if((unsigned)stack_frame_size<(unsigned)out_args_size) // what?
{
cerr<<"****************************************************************"<<endl;
cerr<<"****************************************************************"<<endl;
cerr<<"**Insanity coming from STARS, out_args_size > stack_frame_size**"<<endl;
cerr<<"****************************************************************"<<endl;
cerr<<"****************************************************************"<<endl;
return NULL;
}
return new StackLayout("All Offset Layout",func->getName(),stack_frame_size,saved_regs_size,(push_frame_pointer&&save_frame_pointer),out_args_size);
}
}
entry = entry->getFallthrough();
}
return NULL;
}
// Should we punt on the P1 transform when we see lea reg,[rsp+k] where
// the offset k takes us above the local stack frame, into saved regs or inargs
// or the return address? It is often the case that the address is used as a
// loop sentinel and no memory access occurs there, but failing to take into
// account the padding that gets inserted below that address causes problems.
// More precise analyses from STARS could be used to avoid punting on P1 altogether,
// but not many functions are affected in the typical binary.
#define PN_PUNT_ON_LEA_RSP_ABOVE_STACK_FRAME 1
//TODO: what about moving esp into a register?
//TODO: Try catches for exceptions thrown by PNStackLayout, for now asserts will fail in PNStackLayout
void OffsetInference::FindAllOffsets(Function_t *func)
{
StackLayout *pn_all_offsets = NULL;
StackLayout *pn_direct_offsets = NULL;
StackLayout *pn_scaled_offsets = NULL;
StackLayout *pn_p1_offsets = NULL;
int max = PNRegularExpressions::MAX_MATCHES;
//regmatch_t pmatch[max];
regmatch_t *pmatch = new regmatch_t[max];
assert(pmatch);
memset(pmatch, 0, sizeof(regmatch_t) * max);
unsigned int stack_frame_size = 0;
unsigned int saved_regs_size = 0;
int ret_cnt = 0;
bool lea_sanitize = false;
//TODO: hack for T&E to make inferences more conservative
bool dealloc_flag = false;
bool has_frame_pointer = false;
int alloc_count = 0;
//TODO: a hack for when ebp is used as an index. If found
//only p1 should be attempted.
bool PN_safe = true;
if (verbose_log)
cerr << "OffsetInference: FindAllOffsets(): Looking at Function = " << func->getName() << endl;
// libIRDB::ControlFlowGraph_t cfg(func);
// libIRDB::BasicBlock_t *block = cfg.getEntry();
//TODO: this is an addition for TNE to detect direct recursion,
//in the future the call graph should be analyzed to find all recursion.
// Instruction_t *first_instr = *(block->getInstructions().begin());
Instruction_t *first_instr = func->getEntryPoint();
// pn_all_offsets = SetupLayout(block,func);
pn_all_offsets = SetupLayout(func);
int out_args_size = func->getOutArgsRegionSize();
if (pn_all_offsets != NULL)
{
stack_frame_size = pn_all_offsets->GetAllocSize();
saved_regs_size = pn_all_offsets->GetSavedRegsSize();
has_frame_pointer = pn_all_offsets->HasFramePointer();
assert(out_args_size >= 0);
pn_direct_offsets = new StackLayout("Direct Offset Inference", func->getName(), stack_frame_size, saved_regs_size, has_frame_pointer, out_args_size);
pn_scaled_offsets = new StackLayout("Scaled Offset Inference", func->getName(), stack_frame_size, saved_regs_size, has_frame_pointer, out_args_size);
//do not consider out args for p1
pn_p1_offsets = new StackLayout("P1 Offset Inference", func->getName(), stack_frame_size, saved_regs_size, has_frame_pointer, 0);
}
else
{
direct[func] = NULL;
scaled[func] = NULL;
all_offsets[func] = NULL;
p1[func] = NULL;
return;
}
//Just checking that the entry point has no predecessors
//assert(block->GetPredecessors().size() !=0);
#if 0
//put all instructions into one vector
vector<Instruction_t*> instructions;
set<libIRDB::BasicBlock_t*> block_set;
getInstructions(instructions,block,block_set);
if(instructions.size() != func->getInstructions().size())
{
cerr<<"OffsetInference: FindAllOffsets(): Number of CFG found instructions does not equal Function_t found instructions"<<endl;
}
//Checking that getInstructions hasn't screwed up
assert(instructions.size() != 0);
#endif
//TODO: should I start modifying at the entry point?
for(
set<Instruction_t*>::const_iterator it = func->getInstructions().begin();
it!=func->getInstructions().end();
++it
)
{
string matched;
Instruction_t* instr = *it;
DatabaseID_t InstID = instr->getBaseID();
string disasm_str;
const auto disasmp = DecodedInstruction_t::factory(instr);
const auto &disasm = *disasmp;
disasm_str = disasm.getDisassembly() /*CompleteInstr*/;
if (verbose_log)
cerr << "OffsetInference: FindAllOffsets(): ID =" << InstID << " disassembled line = " << disasm_str << endl;
#if 0
//TODO: find push ebp, then count pushes to sub esp, stack alloc size and pushed size are fed to layout objects
//TODO: for now I assume all pushes are 32 bits, is this a correct assumption?
if(regexec(&(pn_regex->regex_push_ebp), disasm_str.c_str(), max, pmatch, 0)==0)
{
cerr << "OffsetInference: FindAllOffsets(): Push EBP Found"<<endl;
if(stack_frame_size != 0)
{
//TODO: handle this better
cerr<<"OffsetInference: FindAllOffsets(): Stack Frame Already Allocated, Ignoring Push EBP"<<endl;
continue;
}
else
{
saved_regs_size = 0;
}
}
else if(regexec(&(pn_regex->regex_push_anything), disasm_str.c_str(), max, pmatch, 0)==0)
{
cerr<<"OffsetInference: FindAllOffsets(): Push (anything) Found"<<endl;
if(stack_frame_size != 0)
{
//TODO: handle this better
cerr<<"OffsetInference: FindAllOffsets(): Stack Frame Already Allocated, Ignoring Push Instruction"<<endl;
continue;
}
else
{
//TODO: assuming 4 bytes here for saved regs
saved_regs_size += get_saved_reg_size();
}
}
else if(regexec(&(pn_regex->regex_stack_alloc), disasm_str.c_str(), max, pmatch, 0)==0)
{
cerr << "OffsetInference: FindAllOffsets(): Found Stack Alloc"<<endl;
//TODO: Is this the way this situation should be handled?
//The first esp sub instruction is considered the stack allocation, all other subs are ignored
if(stack_frame_size != 0)
{
cerr <<"OffsetInference: FindAllOffsets(): Stack Alloc Previously Found, Ignoring Instruction"<<endl;
continue;
}
//extract K from: sub esp, K
if (pmatch[1].rm_so >= 0 && pmatch[1].rm_eo >= 0)
{
int mlen = pmatch[1].rm_eo - pmatch[1].rm_so;
matched = disasm_str.substr(pmatch[1].rm_so,mlen);
//extract K
stack_frame_size = strtol(matched.c_str(),NULL,0);
cerr<<"OffsetInference: FindAllOffsets(): Stack alloc Size = "<<stack_frame_size<<
" Saved Regs Size = "<<saved_regs_size<<" out args size = "<<out_args_size<<endl;
//There is now enough information to create the PNStackLayout objects
pn_all_offsets = new PNStackLayout("All Offset Layout",func->getName(),stack_frame_size,saved_regs_size,out_args_size);
pn_direct_offsets = new PNStackLayout("Direct Offset Layout",func->getName(),stack_frame_size,saved_regs_size,out_args_size);
pn_scaled_offsets = new PNStackLayout("Scaled Offset Layout", func->getName(),stack_frame_size,saved_regs_size,out_args_size);
pn_p1_offsets = new PNStackLayout("P1 Layout",func->getName(),stack_frame_size,saved_regs_size,out_args_size);
}
}
else
#endif
if (regexec(&(pn_regex->regex_push_anything), disasm_str.c_str(), max, pmatch, 0) == 0)
{
Instruction_t* ft = instr->getFallthrough();
const auto reloc1 = FindRelocation(instr, "32-bit");
const auto reloc2 = FindRelocation(instr, "push64");
if (reloc1 != NULL || reloc2 != NULL)
{
/* definite a push from a fixed calls */
}
else if (ft && !ft->getFallthrough() &&
(ft->getTarget() == NULL || ft->getTarget()->getFunction() != instr->getFunction()))
{
/* probably a push/jmp converted by fix calls */
/* can ignore this push */
}
else if (!in_prologue[instr])
{
cerr << "Found push instruction not in prologue, marking as not canary safe";
cerr << "Insn =" << disasm_str << " ID = " << InstID << endl;
pn_direct_offsets->SetCanarySafe(false);
pn_scaled_offsets->SetCanarySafe(false);
pn_all_offsets->SetCanarySafe(false);
pn_p1_offsets->SetCanarySafe(false);
}
} // end if push anything
/* check for an lea with an rsp in it -- needs to be done before other regex's */
if (regexec(&(pn_regex->regex_lea_rsp), disasm_str.c_str(), 5, pmatch, 0) == 0)
{
if (verbose_log)
cerr << "OffsetInference: lea_rsp found: ID = " << InstID << endl;
if (pmatch[1].rm_so >= 0 && pmatch[1].rm_eo >= 0)
{
if (verbose_log)
cerr << "OffsetInference: lea_rsp found const" << endl;
int mlen = pmatch[1].rm_eo - pmatch[1].rm_so;
matched = disasm_str.substr(pmatch[1].rm_so, mlen);
// extract displacement
int offset = disasm.getOperand(1)->getMemoryDisplacement() /*Argument2.Memory.Displacement*/;
if (offset < 0)
{
if (verbose_log)
cerr << "OffsetInference: lea_rsp neg offset sanitize" << endl;
lea_sanitize = true;
}
unsigned int uoffset = (unsigned int) offset;
/* if this lea is pointing to saved regs */
if (uoffset >= stack_frame_size)
{
if (uoffset < (saved_regs_size + stack_frame_size)) {
if (verbose_log)
cerr << "OffsetInference: lea_rsp found in saved regs area" << endl;
lea_sanitize = true;
}
else {
if (verbose_log)
cerr << "OffsetInference: lea_rsp found above saved regs area BLAH BLAH" << endl;
}
#if PN_PUNT_ON_LEA_RSP_ABOVE_STACK_FRAME
direct[func] = NULL;
scaled[func] = NULL;
all_offsets[func] = NULL;
p1[func] = NULL;
cerr << "OffsetInference: lea_rsp above local frame, punting on P1 transform for func " << func->getName() << endl;
return;
#endif
}
else if (verbose_log) {
cerr << "OffsetInference: lea_rsp found in local stack frame" << endl;
}
}
} // end if lea_rsp
// now, on to doing offset identification
if (regexec(&(pn_regex->regex_stack_dealloc_implicit), disasm_str.c_str(), max, pmatch, 0) == 0)
{
dealloc_flag = true;
//TODO: there needs to be a check of lea esp, [ebp-<const>] to make sure const is not in the current stack frame.
}
else if (regexec(&(pn_regex->regex_ret), disasm_str.c_str(), max, pmatch, 0) == 0)
{
++ret_cnt;
}
else if(regexec(&(pn_regex->regex_and_esp), disasm_str.c_str(), max, pmatch, 0) == 0)
{
//TODO: decide how to better handle this option.
//Right now I am going to enforce in PNTransformDriver that
//the alignment instruction is removed.
if (verbose_log)
cerr << "OffsetInference: FindAllOffsets(): Layout is not canary safe" << endl;
pn_direct_offsets->SetCanarySafe(false);
pn_scaled_offsets->SetCanarySafe(false);
pn_all_offsets->SetCanarySafe(false);
pn_p1_offsets->SetCanarySafe(false);
} // end if AND RSP with mask for stack alignment
else if (regexec(&(pn_regex->regex_stack_alloc), disasm_str.c_str(), max, pmatch, 0) == 0)
{
//check if the stack allocation uses an integral offset.
//extract K from: sub esp, K
if (pmatch[1].rm_so >= 0 && pmatch[1].rm_eo >= 0)
{
int mlen = pmatch[1].rm_eo - pmatch[1].rm_so;
matched = disasm_str.substr(pmatch[1].rm_so, mlen);
// extract K
unsigned int scheck;
if (str2uint(scheck, matched.c_str()) != STR2_SUCCESS)
{
// If this occurs, then the found stack size is not a
// constant integer, so it must be a register.
// Even though I am specifying only p1 should be performed
// I am still going to set this flag for all transforms.
pn_direct_offsets->SetStaticStack(false);
pn_scaled_offsets->SetStaticStack(false);
pn_all_offsets->SetStaticStack(false);
pn_p1_offsets->SetStaticStack(false);
PN_safe = false;
// Consider this case not canary safe for now
// TODO: can I make this canary safe?
pn_direct_offsets->SetCanarySafe(false);
pn_scaled_offsets->SetCanarySafe(false);
pn_all_offsets->SetCanarySafe(false);
pn_p1_offsets->SetCanarySafe(false);
if (verbose_log)
cerr << "OffsetInference: instruction contains a dynamic stack allocation, not pn_safe" << endl;
// TODO: this output should be removed after TNE
// Only used to give Jason an indication that a
// non-static func has been detected.
ofstream dynstackfile;
dynstackfile.open("dynamic_stack.log", fstream::out|fstream::app);
if (dynstackfile.is_open())
{
// I don't think this can happen, but I really don't want
// to add a null pointer exception to TNE
if (instr == NULL || instr->getAddress() == NULL)
{
dynstackfile<<func->getName() << " : " << disasm_str << endl;
}
else
{
dynstackfile << func->getName() << " : " << hex << instr->getAddress()->getVirtualOffset() << " : " << disasm_str << endl;
}
dynstackfile.close();
}
continue;
}
}
++alloc_count;
if (alloc_count > 1)
{
if (verbose_log)
cerr << "OffsetInference: integral stack allocations exceeded 1, abandon inference" << endl;
break;
}
} // end if stack allocation instruction
// TODO: hack for TNE 2, if we see a jmp to an esp or ebp relative address, ignore this function entirely
// The reason is fix calls will fix an esp/ebp relative call by adding 4 to the original address and pushing
// before the inserted jmp. This gives the false impression that there is a boundary at this location
// and also gives a false impression that the location should be modified using the wrong boundary, even if
// p1 is used only. Specifically this occurred when the frame size was 0x20, and the call was to esp+0x1c
// the fix call because a jmp esp+0x20 which was outside the frame, and PN corrected by changing the offset
// to reflect the padding.
else if (disasm.isUnconditionalBranch() /*Instruction.BranchType == JmpType*/)
{
if (regexec(&(pn_regex->regex_esp_scaled), disasm_str.c_str(), max, pmatch, 0) == 0 ||
regexec(&(pn_regex->regex_esp_direct), disasm_str.c_str(), max, pmatch, 0) == 0 ||
regexec(&(pn_regex->regex_ebp_scaled), disasm_str.c_str(), max, pmatch, 0) == 0 ||
regexec(&(pn_regex->regex_ebp_direct), disasm_str.c_str(), max, pmatch, 0) == 0)
{
cerr << "OffsetInference: FindAllOffsets(): Layout contains a jmp relative to esp or ebp, ignore function for now" << endl;
direct[func] = NULL;
scaled[func] = NULL;
all_offsets[func] = NULL;
p1[func] = NULL;
// TODO: cleanup memory, since this is all so ugly at the moment, I'm inclined to leak memory than
// to risk a segfault deleting a pointer.
return;
}
} // end if unconditional branch
else if (regexec(&(pn_regex->regex_esp_scaled), disasm_str.c_str(), max, pmatch, 0) == 0)
{
if (verbose_log)
cerr << "OffsetInference: FindAllOffsets(): Found ESP Scaled Instruction" << endl;
#if 0
if(stack_frame_size <=0)
{
cerr<<"OffsetInference: FindAllOffsets(): Frame Alloc Not Found, Aborting Offset Search"<<endl;
break;
}
#endif
if (pmatch[1].rm_so >= 0 && pmatch[1].rm_eo >= 0)
{
int mlen = pmatch[1].rm_eo - pmatch[1].rm_so;
matched = disasm_str.substr(pmatch[1].rm_so, mlen);
// extract displacement
int offset = strtol(matched.c_str(), NULL, 0);
if (pn_all_offsets != NULL)
{
pn_all_offsets->InsertESPOffset(offset);
}
if (pn_scaled_offsets != NULL)
{
pn_scaled_offsets->InsertESPOffset(offset);
}
if (verbose_log)
cerr << "OffsetInference: FindAllOffsets(): ESP Offset = " << offset << endl;
}
} // end if esp scaled
else if (regexec(&(pn_regex->regex_esp_direct), disasm_str.c_str(), max, pmatch, 0) == 0)
{
if (verbose_log)
cerr << "OffsetInference: FindAllOffsets: Found ESP Direct Instruction" << endl;
#if 0
if (stack_frame_size <= 0)
{
cerr << "OffsetInference: FindAllOffsets(): Frame Alloc Not Found, Aborting Offset Search" << endl;
break;
}
#endif
if (pmatch[1].rm_so >= 0 && pmatch[1].rm_eo >= 0)
{
int mlen = pmatch[1].rm_eo - pmatch[1].rm_so;
matched = disasm_str.substr(pmatch[1].rm_so, mlen);
// extract displacement
int offset = strtol(matched.c_str(), NULL, 0);
if (pn_all_offsets != NULL)
{
pn_all_offsets->InsertESPOffset(offset);
}
if (pn_direct_offsets != NULL)
{
pn_direct_offsets->InsertESPOffset(offset);
}
if (verbose_log)
cerr << "OffsetInference: FindAllOffsets(): ESP Offset = " << offset << endl;
}
} // end if esp direct access
else if (regexec(&(pn_regex->regex_ebp_scaled), disasm_str.c_str(), max, pmatch, 0) == 0)
{
if (verbose_log) {
cerr << "OffsetInference: FindAllOffsets(): Found EBP Scaled Instruction" << endl;
}
#if 0
if(stack_frame_size <=0)
{
cerr<<"OffsetInference: FindAllOffsets(): Frame Alloc Not Found, Aborting Offset Search"<<endl;
break;
}
#endif
if (pmatch[1].rm_so >= 0 && pmatch[1].rm_eo >= 0)
{
int mlen = pmatch[1].rm_eo - pmatch[1].rm_so;
matched = disasm_str.substr(pmatch[1].rm_so, mlen);
// extract displacement
int offset = strtol(matched.c_str(), NULL, 0);
#if 0
if(stack_frame_size - offset < 0)
{
cerr<<"OffsetInference: FindAllOffsets: Detected Negative ESP Offset, Aborting Offset Search"<<endl;
pn_all_offsets = NULL;
pn_scaled_offsets = NULL;
pn_direct_offsets = NULL;
break;
}
#endif
if (!has_frame_pointer && verbose_log) {
cerr << "BOGUS processing of EBP offset; not a frame pointer." << endl;
}
if (pn_all_offsets != NULL)
{
pn_all_offsets->InsertEBPOffset(offset);
}
if (pn_scaled_offsets != NULL)
{
pn_scaled_offsets->InsertEBPOffset(offset);
}
}
} // end if ebp scaled
else if(regexec(&(pn_regex->regex_ebp_direct), disasm_str.c_str(), max, pmatch, 0) == 0)
{
if (verbose_log) {
cerr << "OffsetInference: FindAllOffsets(): Found EBP Direct Instruction" << endl;
}
#if 0
if(stack_frame_size <=0)
{
cerr<<"OffsetInference: FindAllOffsets(): Frame Alloc Not Found, Aborting Offset Search"<<endl;
break;
}
#endif
if (pmatch[1].rm_so >= 0 && pmatch[1].rm_eo >= 0)
{
int mlen = pmatch[1].rm_eo - pmatch[1].rm_so;
matched = disasm_str.substr(pmatch[1].rm_so, mlen);
// extract displacement
int offset = strtol(matched.c_str(), NULL, 0);
#if 0
if (stack_frame_size - offset < 0)
{
cerr << "OffsetInference: FindAllOffsets: Detected Negative ESP Offset, Aborting Offset Search" << endl;
pn_all_offsets = NULL;
pn_scaled_offsets = NULL;
pn_direct_offsets = NULL;
break;
}
#endif
if (verbose_log) {
cerr << "OffsetInference: FindAllOffsets(): Extracted EBP offset = " << offset << endl;
if (!has_frame_pointer) {
cerr << "BOGUS processing of EBP offset; not a frame pointer." << endl;
}
}
if (pn_all_offsets != NULL)
{
pn_all_offsets->InsertEBPOffset(offset);
}
if (pn_direct_offsets != NULL)
{
pn_direct_offsets->InsertEBPOffset(offset);
}
}
} // end if ebp direct
else if(regexec(&(pn_regex->regex_stack_dealloc), disasm_str.c_str(), max, pmatch, 0) == 0)
{
// if we find a dealloc, set a flag indicating as such
dealloc_flag = true;
//TODO: if the amount to dealloc is not equal to the stack frame size
//exit inference
int mlen = pmatch[1].rm_eo - pmatch[1].rm_so;
matched = disasm_str.substr(pmatch[1].rm_so, mlen);
// extract displacement
int offset = strtol(matched.c_str(), NULL, 0);
//NOTE: I have seen cases where there is an add esp, 0x0000000
//in unoptimized code. In this case, the compiler must have
//restored the stack already, ignore the instruction.
//TODO: casting stack_frame_size, make sure it isn't larger than
//max int, I don't know what to do if I see this.
if(offset != (int)stack_frame_size && offset != 0)
{
if(verbose_log)
cerr<<"OffsetInference: stack deallocation detected with different size of allocation, abandon inference"<<endl;
//dealloc_flag = false;
//TODO: hacked in for TNE, rewrite.
direct[func] = NULL;
scaled[func] = NULL;
all_offsets[func] = NULL;
p1[func] = NULL;
return;
}
}
//TODO: this is a hack for cases when ebp is used as an index,
//in these cases, only attempt P1 for now, but in the future
//dynamic checks can be used to determine what object is referred to.
else if(regexec(&(pn_regex->regex_scaled_ebp_index), disasm_str.c_str(), 5, pmatch, 0)==0)
{
PN_safe = false;
if(verbose_log)
cerr<<"OffsetInference: instruction contains an ebp index, not pn_safe"<<endl;
//TODO: at this point I could probably break the loop,
}
//TODO: a hack for TNE to check for direct recursion to dial down padding
else if(regexec(&(pn_regex->regex_call), disasm_str.c_str(), 5, pmatch, 0)==0)
{
if(instr->getTarget() != NULL && instr->getTarget()->getAddress() != NULL)
{
if(instr->getTarget()->getAddress()->getVirtualOffset() == first_instr->getAddress()->getVirtualOffset())
{
if(verbose_log)
cerr<<"OffsetInference: function contains a direct recursive call"<<endl;
pn_direct_offsets->SetRecursive(true);
pn_scaled_offsets->SetRecursive(true);
pn_all_offsets->SetRecursive(true);
pn_p1_offsets->SetRecursive(true);
}
}
}
else
{
if(verbose_log)
cerr<<"OffsetInference: FindAllOffsets: No Pattern Match"<<endl;
}
} // end for all instructions
//TODO: everything is horribly hacked and messy, redo this function.
//if no dealloc is found, set all inferences to null
//TODO: this was hacked together quickly, one flag is preferable.
//TODO: there might be a memory leak here, see the objects deleted
//at the end of this function.
if(alloc_count>1 || lea_sanitize)
{
if(lea_sanitize)
cerr<<"OffsetInference: FindAllOffsets: lea_rsp that points to saved regs found "<<endl;
else if(verbose_log)
cerr<<"OffsetInference: FindAllOffsets: Multiple integral stack allocations found, returning null inferences"<<endl;
direct[func] = NULL;
scaled[func] = NULL;
all_offsets[func] = NULL;
p1[func] = NULL;
return;
}
else
{
if(!dealloc_flag && ret_cnt == 0)
{
if(verbose_log)
cerr<<"OffsetInference: FindAllOffsets: Function is missing stack deallocaiton, but does not return, assuming transformable"<<endl;
dealloc_flag = true;
}
//TODO: I need to revisit this such that you can pass a pointer to PNStackLayout,
//and handle NULL accordingly.
//TODO: this has become too hacky, redo.
if(!dealloc_flag)
{
pn_direct_offsets->SetPaddingSafe(false);
pn_scaled_offsets->SetPaddingSafe(false);
pn_all_offsets->SetPaddingSafe(false);
pn_p1_offsets->SetPaddingSafe(false);
}
unsigned int aoi_size = pn_all_offsets->GetRanges().size();
//TODO: causes a memory leak since I may reset to NULL, redo
//if the size of aoi is the same as any other inference
//assume they are the same (insert a null layout entry)
if(pn_direct_offsets->GetRanges().size() != aoi_size)
direct[func] = new PNStackLayout(*pn_direct_offsets, func);
else
direct[func] = NULL;
if(pn_scaled_offsets->GetRanges().size() != aoi_size)
scaled[func] = new PNStackLayout(*pn_scaled_offsets, func);
else
scaled[func] = NULL;
//TODO: BIG TODO: There is quite a delema here. If p1 is the same as
//AOI, I don't want to generate it to save time, but what if a function
//has no coverage, so p1 is used, if I set it null here because the
//layouts are the same, I wont have any modification for that function.
p1[func] = new PNStackLayout(*pn_p1_offsets, func);
all_offsets[func] = new PNStackLayout(*pn_all_offsets, func);
if(!dealloc_flag)
{
if(verbose_log)
cerr<<"OffsetInference: FindAllOffsets: No Stack Deallocation Found"<<endl;
if(direct[func] != NULL && !direct[func]->IsShuffleSafe())
{
if(verbose_log)
cerr<<"OffsetInference: FindAllOffsets: direct offset inference cannot be shuffled, generating null inference"<<endl;
direct[func] = NULL;
}
if(scaled[func] != NULL && !scaled[func]->IsShuffleSafe())
{
if(verbose_log)
cerr<<"OffsetInference: FindAllOffsets: scaled offset inference cannot be shuffled, generating null inference"<<endl;
scaled[func] = NULL;
}
if(all_offsets[func] != NULL && !all_offsets[func]->IsShuffleSafe())
{
if(verbose_log)
cerr<<"OffsetInference: FindAllOffsets: all offset inference cannot be shuffled, generating null inference"<<endl;
all_offsets[func] = NULL;
}
p1[func] = NULL;
if(verbose_log)
cerr<<"OffsetInference: FindAllOffsets: p1 inference by default cannot be shuffled, generating null inference"<<endl;
}
if(!PN_safe)
{
if(verbose_log)
cerr<<"OffsetInference: FindAllOffsets: Function not pn_safe, using only p1 (p1 may have been previously disabled)"<<endl;
direct[func] = NULL;
scaled[func] = NULL;
all_offsets[func] = NULL;
}
}
//memory clean up
delete pn_direct_offsets;
delete pn_scaled_offsets;
delete pn_all_offsets;
delete pn_p1_offsets;
} // end of OffsetInference::FindAllOffsets()
//If map entry exists, return it, else perform boundary detection
//If no layout can be made, NULL is returned.
PNStackLayout* OffsetInference::GetPNStackLayout(Function_t *func)
{
return GetLayout(all_offsets,func);
}
PNStackLayout* OffsetInference::GetDirectAccessLayout(Function_t *func)
{
return GetLayout(direct,func);
}
PNStackLayout* OffsetInference::GetScaledAccessLayout(Function_t *func)
{
return GetLayout(scaled,func);
}
PNStackLayout* OffsetInference::GetP1AccessLayout(Function_t *func)
{
return GetLayout(p1,func);
}
PNStackLayout* OffsetInference::GetLayout(map<Function_t*,PNStackLayout*> &mymap,Function_t *func)
{
//No layout found, find all offset boundaries
if (mymap.find(func) == mymap.end())
{
FindAllOffsets(func);
}
//At this point an entry should be made for the function
assert(mymap.find(func) != mymap.end());
return mymap.find(func)->second;
}
string OffsetInference::GetInferenceName() const
{
return "All Offsets Inference";
}
/*
* Copyright (c) 2013, 2014 - University of Virginia
*
* This file may be used and modified for non-commercial purposes as long as
* all copyright, permission, and nonwarranty notices are preserved.
* Redistribution is prohibited without prior written consent from the University
* of Virginia.
*
* Please contact the authors for restrictions applying to commercial use.
*
* THIS SOURCE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*
* Author: University of Virginia
* e-mail: jwd@virginia.com
* URL : http://www.cs.virginia.edu/
*
*/
#ifndef __OFFSETSTACKLAYOUTINFERENCE
#define __OFFSETSTACKLAYOUTINFERENCE
#include "PNStackLayoutInference.hpp"
#include "PNRegularExpressions.hpp"
#include <map>
#include <string>
class OffsetInference : public PNStackLayoutInference
{
protected:
std::map<IRDB_SDK::Instruction_t*, bool> in_prologue;
std::map<IRDB_SDK::Function_t*,PNStackLayout*> direct;
std::map<IRDB_SDK::Function_t*,PNStackLayout*> scaled;
std::map<IRDB_SDK::Function_t*,PNStackLayout*> all_offsets;
std::map<IRDB_SDK::Function_t*,PNStackLayout*> p1;
PNRegularExpressions *pn_regex;
virtual void FindAllOffsets(IRDB_SDK::Function_t *func);
virtual PNStackLayout* GetLayout(std::map<IRDB_SDK::Function_t*,PNStackLayout*> &mymap, IRDB_SDK::Function_t *func);
// virtual void getInstructions(std::vector<IRDB_SDK::Instruction_t*> &instructions,IRDB_SDK::libIRDB::BasicBlock_t *block,std::set<IRDB_SDK::libIRDB::BasicBlock_t*> &block_set);
virtual StackLayout* SetupLayout(IRDB_SDK::Function_t *func);
public:
OffsetInference() : pn_regex(NULL) {}
virtual ~OffsetInference();
virtual PNStackLayout* GetPNStackLayout(IRDB_SDK::Function_t *func);
virtual PNStackLayout* GetDirectAccessLayout(IRDB_SDK::Function_t *func);
virtual PNStackLayout* GetScaledAccessLayout(IRDB_SDK::Function_t *func);
virtual PNStackLayout* GetP1AccessLayout(IRDB_SDK::Function_t *func);
virtual std::string GetInferenceName() const;
};
#endif