Skip to content
Snippets Groups Projects
mg.cpp 71.7 KiB
Newer Older
#include "mg.hpp"


#include <assert.h>
#include <stdexcept>
#include <unistd.h>
#include <memory>
#include <inttypes.h>
#include <algorithm>
#include <elf.h>
#include <cctype>
#include <iomanip>
#include <cstdlib>
#include <random>


using namespace std;
using namespace IRDB_SDK;
using namespace EXEIO;

#define ALLOF(s) begin(s), end(s)

// use this to determine whether a scoop has a given name.
static struct ScoopFinder : binary_function<DataScoop_t*,string,bool>
{
	// declare a simple scoop finder function that finds scoops by name
	bool operator()(const DataScoop_t* scoop, const string word)  const
	{
		return (scoop->getName() == word);
	};
} finder;

template<class S, class T> inline
static bool contains(const S &container, const T& value)
{
	return find(container.begin(), container.end(), value) != container.end();
}



static bool arg_has_memory(const DecodedOperand_t &arg)
{
	/* if it's relative memory, watch out! */
	if(arg.isMemory())
		return true;

	return false;
}

static bool arg_has_relative(const DecodedOperand_t &arg)
{
	/* if it's relative memory, watch out! */
	if(arg.isMemory() && arg.isPcrel())
		return true;
	return false;
}

static DecodedOperandVector_t::iterator find_memory_operand(DecodedOperandVector_t &operands)
{
	// const auto operands=disasm.getOperands();
	auto the_arg=operands.end();
	if(operands.size()>0 && arg_has_memory(*operands[0]))
		the_arg=next(operands.begin(),0);
	if(operands.size()>1 && arg_has_memory(*operands[1]))
		the_arg=next(operands.begin(),1);
	if(operands.size()>2 && arg_has_memory(*operands[2]))
		the_arg=next(operands.begin(),2);
	if(operands.size()>3 && arg_has_memory(*operands[3]))
		the_arg=next(operands.begin(),3);
	return the_arg;
}


template< typename T >
static std::string to_hex_string( T i )
{
	std::stringstream stream;
	stream << "0x"
		<< std::hex << i;
	return stream.str();
}


Jason Hiser's avatar
Jason Hiser committed
template <class T_Sym, class  T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
bool MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::is_elftable(DataScoop_t* ret)
{ 
	return find(ALLOF(elftable_names), ret->getName()) != elftable_names.end() ;  
}; 

template <class T_Sym, class  T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
bool MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::is_noptr_table(DataScoop_t* ret)
{ 
	return find(ALLOF(elftable_nocodeptr_names), ret->getName()) != elftable_nocodeptr_names.end() ;  
}; 

template <class T_Sym, class  T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::MoveGlobals_t(
	VariantID_t *p_variantID, 
	FileIR_t *p_variantIR, 
	const string &p_dont_move, 
	const string &p_move_only, 
	const int p_max_mov,
        const bool p_random,
	const bool p_aggressive,
	const bool p_use_stars)
	:
	exe_reader(NULL),
	tied_unpinned(0),
	tied_pinned(0),
	tied_nochange(0),
	ties_for_folded_constants(0),
	dont_move(p_dont_move),
	move_only(p_move_only),
	max_moveables(p_max_mov),
        random(p_random),
	aggressive(p_aggressive),
	m_use_stars(p_use_stars),
	m_verbose(getenv("MG_VERBOSE") != nullptr) 
{ 
}

template <class T_Sym, class  T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
int MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::execute(pqxxDB_t &pqxx_interface)
{

	// read the executeable file

	// load the executable.
Jason Hiser's avatar
Jason Hiser committed
	exe_reader = new EXEIO::exeio;
	assert(exe_reader);
	exe_reader->load((char*)"a.ncexe");

	if(m_use_stars)
	{
		auto deep_analysis=DeepAnalysis_t::factory(getFileIR(), aeSTARS,  {"SetDeepLoopAnalyses=true", "SetConstantPropagation=true"});
		deep_global_static_ranges = deep_analysis -> getStaticGlobalRanges();
		sentinels                 = deep_analysis -> getRangeSentinels();
		cout<<dec;
		cout<<"#ATTRIBUTE "<<deep_global_static_ranges->size() <<" num_global_static_range_annotations" <<endl;
		cout<<"#ATTRIBUTE "<<sentinels->size()                 <<" num_sentinel_annotations"            <<endl;
	}




Jason Hiser's avatar
Jason Hiser committed
	ParseSyms(exe_reader);
	SetupScoopMap();
	FilterScoops();
	TieScoops();
	FindInstructionReferences();	// may record some scoops are tied together
	FindDataReferences();
	FilterAndCoalesceTiedScoops();
	UpdateScoopLocations();
	PrintStats();

	return 0;
}

// go through the .symtab and .dynsym bits of the table and make scoops for each symbol.
template <class T_Sym, class  T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::SetupScoopMap()
{
	for(auto &s : getFileIR()->getDataScoops())
	{
		if(s->getStart()->getVirtualOffset() == 0)
			continue;
		if(s->getName() == ".tdata")
			continue;
		if(s->getName() == ".tbss")
			continue;
		RangePair_t p(s->getStart()->getVirtualOffset(), s->getEnd()->getVirtualOffset());
		scoop_map[p]=s;
	}
}

template <class T_Sym, class  T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
DataScoop_t* MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::findScoopByAddress(const IRDB_SDK::VirtualOffset_t a) const
{
	RangePair_t p(a,a);
	auto smit=scoop_map.find(p);
	if(smit==scoop_map.end())
		return NULL;
	return smit->second;
}

template <class T_Sym, class  T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
bool MoveGlobals_t<T_Sym, T_Rela, T_Rel, T_Dyn, T_Extractor>::AreScoopsAdjacent(const DataScoop_t *a, const DataScoop_t *b) const
{
	bool adjacent = true;
	const IRDB_SDK::VirtualOffset_t aStart = a->getStart()->getVirtualOffset();
	const IRDB_SDK::VirtualOffset_t aEnd = a->getEnd()->getVirtualOffset();
	const IRDB_SDK::VirtualOffset_t bStart = b->getStart()->getVirtualOffset();
	const IRDB_SDK::VirtualOffset_t bEnd = b->getEnd()->getVirtualOffset();
	IRDB_SDK::VirtualOffset_t FirstEnd, SecondStart;
	if (aStart > bStart)
	{
		FirstEnd = bEnd;
		SecondStart = aStart;
	}
	else 
	{
		FirstEnd = aEnd;
		SecondStart = bStart;
	}
	for (IRDB_SDK::VirtualOffset_t i = FirstEnd + 1; adjacent && (i < SecondStart); ++i)
	{
Jason Hiser's avatar
Jason Hiser committed
		DataScoop_t *c = findScoopByAddress(i);
		if (c)
		{
			adjacent = false; // found intervening scoop before SecondStart
		}		
	}

	return adjacent;
} // end of AreScoopsAdjacent()

// go through the .symtab and .dynsym bits of the table and make scoops for each symbol.
template <class T_Sym, class  T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::ParseSyms(EXEIO::exeio * readerp)
{

	auto max_id=getFileIR()->getMaxBaseID();

		cout<<"Initial scoops:"<<endl;
	for(const auto &scoop : getFileIR()->getDataScoops())
	{
		{
			cout<<"scoop: "<<scoop->getName()<<" ("<<hex<<scoop->getStart()->getVirtualOffset()
				<<"-"<<scoop->getEnd()->getVirtualOffset()<<")"<<endl;
		}



		const auto moveable_sections=set<string>({ 
						".interp",
						".note.ABI-tag",
						".note.gnu.build-id",
						".gnu.hash",
						".dynsym",
						".dynstr",
						".gnu.version",
						".gnu.version_r",
						".rel.dyn",
						".rel.plt",
						".rela.dyn",
						".rela.plt",
						".init_array",
						".fini_array",
						".jcr",
						".dynamic",
						".got",
						".got.plt"
						});
		// white list some scoops as moveable, despite the symbol table
		if(moveable_sections.find(scoop->getName())!=moveable_sections.end()) 
		{
			cout<<"Register scoop "<<scoop->getName()<<" as movable"<<endl;
			moveable_scoops.insert(scoop);
		}
	}

	assert(readerp);
	auto elfiop=reinterpret_cast<ELFIO::elfio*>(readerp->get_elfio());
	assert(elfiop);
	auto &reader=*elfiop;

	auto splits=0u;

	// for each section in the elf file.
	auto n = (Elf_Half) reader.sections.size();
	for ( auto i = (Elf_Half ) 0; i < n; ++i ) 
	{
		// For all sections
		auto sec = reader.sections[i];
		const char* max_splits = m_verbose ? getenv("MG_MAX_SPLITS") : "0";

		// if it's a symtab section
		if ( SHT_SYMTAB == sec->get_type() || SHT_DYNSYM == sec->get_type() ) 
		{
			auto symbols = ELFIO::symbol_section_accessor ( reader, sec );

			// for each symbol in the section
			auto sym_no = symbols.get_symbols_num();
			for (auto i = (decltype(sym_no))0; i < sym_no; ++i ) 
			{
				// check to see if we've been directed to not split everything up.
				if (max_splits && (splits >= strtoul(max_splits, NULL, 0)))
					break;

				auto name=std::string();
				auto value=(Elf64_Addr)0;	// note:  elf64_addr OK for 32-bit machines still.
				auto size=(Elf_Xword)0;
				auto bind=(unsigned char)0;
				auto type=(unsigned char)0;
				auto section=(Elf_Half)0;
				auto other=(unsigned char)0;

				// elfio always takes a value of type Elf64-Addr regardless of mach type.
				symbols.get_symbol( i, name, value, size, bind, type, section, other );

				// if it's a symbol that describes an object (as opposed to a binding, or a function or a ...)
				if(type==STT_OBJECT && (bind==STB_LOCAL || bind==STB_GLOBAL) && value!=0 && size!=0)
				{
					auto tosplit=getFileIR()->findScoop(value);	

					// something went wrong if we can't find the scoop for this object.
					if(tosplit==NULL) continue;

					cout << "Section: "<<sec->get_name() << " name="<<  name << " size="
						 <<hex<<size<< " addr="<<hex<<value<<" scoop: "<<tosplit->getName()<<endl;

					auto before=(DataScoop_t*)NULL, containing=(DataScoop_t*)NULL, after=(DataScoop_t*)NULL;

					{
						cout<<"\ttosplit: "<<hex<<tosplit->getStart()->getVirtualOffset()<<"-"
							<<tosplit->getEnd()->getVirtualOffset();
					}
	
					if(value+size-1 > tosplit->getEnd()->getVirtualOffset())
					{
						cout<<"Skipping symbol "<<name<<" due to an object that's already split?"<<endl;
						cout<<"Start (but not end) of "<<name<<" is in in object " <<
							tosplit->getName()<<":("<<hex<<tosplit->getStart()->getVirtualOffset()<<"-" <<
							tosplit->getEnd()->getVirtualOffset()<<")"<<endl;;
						continue; // try next symbol
					}

					if(moveable_scoops.find(tosplit)!=end(moveable_scoops))
					{
						cout<<"Avoiding resplit of "<<name<<" due to an object that's already split?"<<endl;
						// don't re-split something that's arlready moveable.	
						continue;
					}

					getFileIR()->splitScoop(tosplit, value, size, before,containing,after,&max_id);

					{
						if(before)
						{
							cout<<"\tBefore: "<<hex<<before->getStart()->getVirtualOffset()
								<<"-"<<before->getEnd()->getVirtualOffset();
						}
						cout<<"\tContaining: "<<hex<<containing->getStart()->getVirtualOffset()
							<<"-"<<containing->getEnd()->getVirtualOffset();
						if(after)
						{
							cout<<"\tAfter: "<<hex<<after->getStart()->getVirtualOffset()
								<<"-"<<after->getEnd()->getVirtualOffset();
						}
						cout<<endl;
					}

					assert(containing);
					containing->setName(name);
					moveable_scoops.insert(containing);

					splits++;
					

				}
			}
			cout << std::endl;
		}

	}

        // guarantee unique scoop names
        auto scoop_names=set<string>();
        for(auto & s : getFileIR()->getDataScoops())
        {
                while(scoop_names.find(s->getName())!=scoop_names.end())
                {
                        cout<<"Rename scoop because of name conflict: "<<s->getName()<<" --> ";
Jason Hiser's avatar
Jason Hiser committed
                        s->setName(s->getName()+"-renamed"+to_string(rand()));
                        cout<<s->getName()<<endl;
                }
                scoop_names.insert(s->getName());
        }

	cout<<"# ATTRIBUTE Non-Overlapping_Globals::data_scoop_splits_performed="<<dec<<splits<<endl;
}

template <class T_Sym, class  T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::FilterScoops()
{
	const auto mg_env = m_verbose;

	// filter using the move_only option
	DataScoopSet_t move_only_scoops;	
	// for each word in move_only
	istringstream mo_ss(move_only);
	for_each(istream_iterator<string>(mo_ss),
		istream_iterator<string>(), [&](const string & word)
	{
		// find the scoop
		auto it=find_if(ALLOF(moveable_scoops), bind2nd(finder, word));
		// if found, insert into the move_only set.
		if(it!=moveable_scoops.end())
Jason Hiser's avatar
Jason Hiser committed
		{
			if(mg_env)
				cout<<"Keeping scoop (for mo_ss) "<< word << endl;
			move_only_scoops.insert(*it);
Jason Hiser's avatar
Jason Hiser committed
		}
		else
		{
			if(mg_env)
				cout<<"Skipping scoop (for mo_ss) "<< word << endl;
		}
		
	});

	// update the moveable_scoops based on the move_only set.
	if(move_only != "" )
	{
		moveable_scoops.clear();
		moveable_scoops.insert(ALLOF(move_only_scoops));

Jason Hiser's avatar
Jason Hiser committed
		if(mg_env)
		{
			cout<<"Moveable Scoops after move_only filter:"<<endl;
			for(auto &s : moveable_scoops)
				cout<<s->getName()<<endl;
			cout<<endl;

		}
	}


	// filter based on the dont_move option
	// for each word in dont_move
	istringstream dm_ss(dont_move);
	for_each(istream_iterator<string>(dm_ss),
		istream_iterator<string>(), [&](const string & word)
	{
		// find scoop by that name.
		auto it=find_if(ALLOF(moveable_scoops), bind2nd(finder,word));
		if(it!=moveable_scoops.end())
		{
			moveable_scoops.erase(*it);
		}
		
	});
	if(dont_move!="")
	{
		{
			cout<<"Moveable Scoops after dont_move filter:"<<endl;
			for(auto &s : moveable_scoops)
				cout<<s->getName()<<endl;
			cout<<endl;

		}
	}

	if(max_moveables>0)
	{
                mt19937 generator(time(0));
                uniform_real_distribution<double> distribution(0.0,1.0);
		while(moveable_scoops.size() > (unsigned)max_moveables)
		{
Jason Hiser's avatar
Jason Hiser committed
			if (random == true)
			{
				double rand_num = distribution(generator);
				int rand_idx = (int) (rand_num * moveable_scoops.size());
				auto it = moveable_scoops.begin();
				advance(it, rand_idx);
				moveable_scoops.erase(it);
			}
			else 
				moveable_scoops.erase(prev(moveable_scoops.end()));
		}
 	}
}

template <class T_Sym, class  T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::TieScoops()
{
	struct scoop_pairs_t 
	{
		string first, second;
	}scoop_pairs[] = {
		{ ".rel.dyn", ".rel.plt" }, // the dynamic linker goes through both sections together when LD_BIND_NOW is set.  
		{ ".rela.dyn", ".rela.plt" }
// can't tie .got and .got.plt because of relro differences.
// can make insanity happen.
//		{ ".got", ".got.plt" }
	};

	for_each(ALLOF(scoop_pairs), [this](const scoop_pairs_t pair)
	{
		auto it1=find_if(ALLOF(moveable_scoops), bind2nd(finder,pair.first));
		auto it2=find_if(ALLOF(moveable_scoops), bind2nd(finder,pair.second));

		// both exist, tie together.
		if(it1!=moveable_scoops.end() && it2!=moveable_scoops.end())
			tied_scoops.insert(ScoopPair_t(*it1,*it2));

		// first exists, rename for easier management later.
		else if(it1!=moveable_scoops.end() && it2==moveable_scoops.end())
			(*it1)->setName(pair.first+" coalesced w/"+ pair.second);

		// second exists, rename for easier management later.
		else if(it1==moveable_scoops.end() && it2!=moveable_scoops.end())
			(*it2)->setName(pair.first+" coalesced w/"+ pair.second);

		// or, none exists at all.
	});
}


template <class T_Sym, class  T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::HandleMemoryOperand(DecodedInstruction_t& disasm, const DecodedOperandVector_t::iterator the_arg, Instruction_t* insn, const DecodedOperandVector_t &the_arg_container)
{
	// no mem arg.
	if(the_arg==the_arg_container.end())
	{
		{
			cout << "Note:  "<<hex<<" no memory op in:";
			cout << insn->getBaseID()<<":"<<disasm.getDisassembly();
			cout << endl;
		}
		return;
	}

	// shared objects don't need this, you have to use a pcrel addressing mode.
	if(!arg_has_relative(**the_arg) && exe_reader->isDLL())
	{
		{
			cout << "Note:  "<<hex<<" no dll-style address in:";
			cout << insn->getBaseID()<<":"<<disasm.getDisassembly();
			cout << endl;
		}
		return;
	}

	const auto small_memory_threshold= exe_reader->isDLL() ? 10 : 4096*10;

	auto to1 = (DataScoop_t*) NULL;
	// examine the memory operation to see if there's a pc-rel
Jason Hiser's avatar
Jason Hiser committed
	if ((*the_arg)->isMemory() && 
	    (*the_arg)->hasMemoryDisplacement() && 
Jason Hiser's avatar
Jason Hiser committed
	    (*the_arg)->getMemoryDisplacementEncodingSize() == 4
		auto rel_addr1 = (VirtualOffset_t)(*the_arg)->getMemoryDisplacement();
		if (arg_has_relative(*(*the_arg)))
			rel_addr1 += insn->getDataBits().size();
		to1 = DetectProperScoop(disasm, the_arg, insn, rel_addr1, false, the_arg_container);

		auto disp_offset = disasm.getMemoryDisplacementOffset(the_arg->get(),insn); 
		auto disp_size = (*the_arg)->getMemoryDisplacementEncodingSize(); 
		assert((0 < disp_offset) && (disp_offset <= (insn->getDataBits().size() - disp_size)));

		// skip if not found, executable, or not moveable.
		if (to1 && (to1->isExecuteable() || moveable_scoops.find(to1) == moveable_scoops.end())) 	  
		{  
			// do nothing, no log or action is necessary for pointers to code.
			{
				cout<<"Skipping (scoop exists, but exe scoop, or not moveable scoop) pcrel mem op in insn: "
					<< hex << insn->getBaseID()<<":"<<disasm.getDisassembly()<<" to "
					<< to1->getName()<<" ("
					<<hex<<to1->getStart()->getVirtualOffset()<<"-" 
					<<hex<<to1->getEnd()->getVirtualOffset()<<")"<<endl; 
			}
		}
		else if(to1)
		{

			// look for any pcrel relative relocs from fix_calls
			Relocation_t* pcrel_reloc=FindRelocationWithType(insn,"pcrel");
			if(pcrel_reloc)
			{
				{
					cout<<"Setting pcrel mem op in insn: "
						<< hex <<insn->getBaseID()<<":"<<disasm.getDisassembly()<<" to "
						<< to1->getName()<<" ("
						<<hex<<to1->getStart()->getVirtualOffset()<<"-" 
						<<hex<<to1->getEnd()->getVirtualOffset()<<")"<<endl; 
				}
				pcrel_refs_to_scoops.insert({insn,to1});
			}
			else 
			{
				{
					cout<<"Absolute mem-op to scoop in insn: "
						<< hex << insn->getBaseID()<<":"<<disasm.getDisassembly()<<" to "
						<< to1->getName()<<" ("
						<<hex<<to1->getStart()->getVirtualOffset()<<"-" 
						<<hex<<to1->getEnd()->getVirtualOffset()<<")"<<endl; 
				}
				if(!is_noptr_table(to1))
					absolute_refs_to_scoops.insert({insn,to1});
			}
		}
		else if ( -small_memory_threshold < (int)rel_addr1 && (int)rel_addr1 < small_memory_threshold )
		{
			if((0 != rel_addr1) && m_verbose)
			{
				cout << "Note:  "<<hex<<rel_addr1<<" not declared address in (low addr thresh) :";
				cout << insn->getBaseID()<<":"<<disasm.getDisassembly();
				cout << endl;
			}
		}
		else 
		{
			if ((0 != rel_addr1) && m_verbose)
			{
				cout << "Note:  "<<hex<<rel_addr1<<" not declared address in (no scoop):";
				cout << insn->getBaseID()<<":"<<disasm.getDisassembly();
				cout << endl;
			}
		}
	}
	else
	{
		{
			cout << "Note:  "<<hex<<" no address in:";
			cout << insn->getBaseID()<<":"<<disasm.getDisassembly();
			cout << endl;
		}
	}
}

template <class T_Sym, class  T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::ApplyPcrelMemoryRelocation(Instruction_t* insn, DataScoop_t* to)
{
	const auto disasmp=DecodedInstruction_t::factory(insn);
	const auto &disasm=*disasmp;
	auto operands=disasm.getOperands();

Jason Hiser's avatar
Jason Hiser committed
#if 1 
	// don't change instructions that reference re-pinned scoops.
	// This was necessary because we were not getting the zipr_unpin_plugin
	//  to undo our changes to the instruction in the case of a re-pinned scoop.
	//  That problem is fixed, but it is more efficient and safer to
	//  avoid editing instructions that reference re-pinned scoops.
Jason Hiser's avatar
Jason Hiser committed
	if (moveable_scoops.find(to) == moveable_scoops.cend()) {
			cout << "Avoiding editing of insn at " << hex << insn->getBaseID() << " after repinning scoop "
				<< to->getName() << endl;
		}
		return;
	}
#endif

	auto the_arg=find_memory_operand(operands);
	assert(the_arg!=operands.end());
	unsigned int disp_offset=disasm.getMemoryDisplacementOffset(the_arg->get(),insn)/*the_arg->Memory.DisplacementAddr-disasm.EIP*/;
	unsigned int disp_size=(*the_arg)->getMemoryDisplacementEncodingSize() /*the_arg->Memory.DisplacementSize*/;
	Relocation_t* pcrel_reloc=FindRelocationWithType(insn,"pcrel");
	pcrel_reloc->setWRT(to);	
// note about this case:  the pcrel reloc already exists for the 
// case where an instruction is moving.  
// now the relocs WRT field indicates that the target might move too.
// will have to edit push_relocs.zpi to handle this.
	assert(0<disp_offset && disp_offset<=(insn->getDataBits().size() - disp_size));
	assert(disp_size==4);
	unsigned int new_disp=(*the_arg)->getMemoryDisplacement() /*the_arg->Memory.Displacement*/ - to->getStart()->getVirtualOffset();
	insn->setDataBits(insn->getDataBits().replace(disp_offset, disp_size, (char*)&new_disp, disp_size));
}

template <class T_Sym, class  T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::ApplyAbsoluteMemoryRelocation(Instruction_t* insn, DataScoop_t* to)
{
	const auto disasmp=DecodedInstruction_t::factory(insn);
	const auto &disasm=*disasmp;
	auto operands=disasm.getOperands();

Jason Hiser's avatar
Jason Hiser committed
#if 1 
	// don't change instructions that reference re-pinned scoops.
	// This was necessary because we were not getting the zipr_unpin_plugin
	//  to undo our changes to the instruction in the case of a re-pinned scoop.
	//  That problem is fixed, but it is more efficient and safer to
	//  avoid editing instructions that reference re-pinned scoops.
Jason Hiser's avatar
Jason Hiser committed
	if (moveable_scoops.find(to) == moveable_scoops.cend()) {
			cout << "Avoiding editing of insn at " << hex << insn->getBaseID() << " after repinning scoop "
				<< to->getName() << endl;
		}
		return;
	}
#endif

	auto the_arg = find_memory_operand(operands);
	unsigned int disp_offset=disasm.getMemoryDisplacementOffset(the_arg->get(),insn);
	unsigned int disp_size=(*the_arg)->getMemoryDisplacementEncodingSize();
	assert(0<disp_offset && disp_offset<=insn->getDataBits().size() - disp_size);
	auto reloc=getFileIR()->addNewRelocation(insn,0, "absoluteptr_to_scoop",to);
	(void)reloc; // just giving to the ir

	assert(0<disp_offset && disp_offset<=(insn->getDataBits().size() - disp_size));
	assert(disp_size==4);
	unsigned int new_disp=(*the_arg)->getMemoryDisplacement() /*the_arg->Memory.Displacement*/ - to->getStart()->getVirtualOffset();
	insn->setDataBits(insn->getDataBits().replace(disp_offset, disp_size, (char*)&new_disp, disp_size));
}

// See if STARS analyzed the instruction and determined which scoop it references.
template <class T_Sym, class  T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
DataScoop_t* MoveGlobals_t<T_Sym, T_Rela, T_Rel, T_Dyn, T_Extractor>::DetectAnnotationScoop(Instruction_t* insn)
{
	if (!m_use_stars)
		return nullptr;

	const auto dgsr_it     = deep_global_static_ranges->find(insn);
       	const auto dgsr_found  = dgsr_it != deep_global_static_ranges->end();
	const auto sentinel_it = sentinels->find(insn);
	const auto is_sentinel = sentinel_it != sentinels->end();
	
	auto ReferencedScoop = (DataScoop_t*)nullptr;
	if(dgsr_found && is_sentinel)
	{
		const auto  StartAddr = dgsr_it->second;
		ReferencedScoop = findScoopByAddress(StartAddr);
	}
	return ReferencedScoop;
} // end of DetectAnnotationScoop()

template <class T_Sym, class  T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
DataScoop_t* MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::DetectProperScoop(const DecodedInstruction_t& disasm, const DecodedOperandVector_t::iterator the_arg, Instruction_t* insn, VirtualOffset_t insn_addr, bool immed, const DecodedOperandVector_t &the_arg_container)
{
	assert(insn);
	assert(immed || (the_arg != the_arg_container.end()));	// immeds don't need an argument, but memory ops do.
	if (immed && (0 == insn_addr))
		return NULL; // immed value of zero is not a scoop address

Jason Hiser's avatar
Jason Hiser committed
	const auto small_memory_threshold = exe_reader->isDLL() ? 10 : 4096 * 10;
	const auto ValidImmed             = immed && (small_memory_threshold <= ((int)insn_addr));
Jason Hiser's avatar
Jason Hiser committed
	auto ret = findScoopByAddress(insn_addr);

	// so far, we haven't run into any problems with not finding a scoop.  we could later.
	if (!ret)
	{
		// check for things that _just_ run off the end of a scoop.
		for (auto i = 0; (i < 8) && (ret == NULL); i++)
			ret = findScoopByAddress(insn_addr - i);	
		// check for things that just miss the beginning of a scoop 
		for (auto i = 0; (i < 8) && (ret == NULL); i++)
			ret = findScoopByAddress(insn_addr + i);	
	}
	
	// See if STARS analyzed the instruction and determined which scoop it references.
Jason Hiser's avatar
Jason Hiser committed
	const auto retSTARS = (immed && (!ValidImmed)) ? (DataScoop_t*)nullptr : DetectAnnotationScoop(insn);

	if (!ret)
	{
		if (nullptr != retSTARS)
		{
			cout << "Detected proper scoop using annotation, not using after DetectProperScoop failure for insn at " << hex << insn->getBaseID() << endl;
		}
		return ret;
	}
Jason Hiser's avatar
Jason Hiser committed
	/* check to see if it's directly pointing at an elftable that isn't allowed to have pointers */
	if (is_noptr_table(ret))
	{
		/* it's an elftable, so we don't need to look so hard because */
		/* we probably aren't pointing to an elf table from an instruction */
		/* find middle of table */
Jason Hiser's avatar
Jason Hiser committed
		const auto mid_of_table = (ret->getStart()->getVirtualOffset() / 2) + (ret->getEnd()->getVirtualOffset() / 2);

		/* look forward if above middle, else look backwards */
		const auto op = (insn_addr < mid_of_table)
			? [](const VirtualOffset_t i, const VirtualOffset_t j) { return i - j; }
Jason Hiser's avatar
Jason Hiser committed
			: [](const VirtualOffset_t i, const VirtualOffset_t j) { return i + j; }
			;

		/* start at begin/end of table depending on direction */
		const auto addr = (insn_addr < mid_of_table)
			? ret->getStart()->getVirtualOffset()
			: ret->getEnd()->getVirtualOffset()
			;

		/* scan 128 bytes looking for a relevant scoop */
		const auto thres = 128;
		for (auto i = 1; i < thres; i++)
		{
			/* check what's here */
			auto candidate = findScoopByAddress(op(addr, i));
			if (candidate != NULL)
				return candidate;
		}
		/* didn't find anything */
	} /* if elftable */

	/* Not an elf table use conservative and/or aggressive heuristics*/
	ret = DetectProperScoop_ConsiderEndOfPrev(disasm, the_arg, insn, insn_addr, immed, ret, the_arg_container);

	if (!aggressive)
		ret = DetectProperScoop_ConsiderStartOfNext(disasm, the_arg, insn, insn_addr, immed, ret, the_arg_container);

	if (nullptr != retSTARS)
	{
		if (nullptr == ret)
		{
			// ret = retSTARS; // Dangerous to use; e.g. mov [rdi+0x200],rax will cause edit of 0x200 because RDI was resolved by STARS to a scoop address
			cout << "Detected proper scoop using annotation, not using after DetectProperScoop final failure for insn at " << hex << insn->getBaseID() << endl;
		}
		else if (retSTARS != ret)
		{
			// We have two different non-null choices. We will tie the two scoops
			//  together if they are adjacent, and pin them both otherwise.
Jason Hiser's avatar
Jason Hiser committed
			if (AreScoopsAdjacent(ret, retSTARS)) // tie adjacent scoops
			{
				cout << "Tieing adjacent scoops due to STARS vs. DetectProperScoop conflict for insn at " << hex << insn->getBaseID() << endl;
Jason Hiser's avatar
Jason Hiser committed
				if (ret->getStart()->getVirtualOffset() < retSTARS->getStart()->getVirtualOffset()) 
					tied_scoops.insert({ret, retSTARS});
				else 
					tied_scoops.insert({retSTARS, ret});
			}
			else // not adjacent; must pin
			{
				cout << "Pinning non-adjacent scoops due to STARS vs. DetectProperScoop conflict for insn at " << hex << insn->getBaseID() << endl;
Jason Hiser's avatar
Jason Hiser committed
				if(!is_elftable(ret)) 
					moveable_scoops.erase(ret);
				if(!is_elftable(retSTARS)) 
					moveable_scoops.erase(retSTARS);
			}
		}

	}
	return ret;
} // end of DetectProperScoop()

template <class T_Sym, class  T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
DataScoop_t* MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::DetectProperScoop_ConsiderStartOfNext(
	const DecodedInstruction_t& disasm, 	
	const DecodedOperandVector_t::iterator mem_arg, 	
	Instruction_t* insn, 	
	VirtualOffset_t insn_addr, 	
	bool immed, 	
	DataScoop_t* candidate_scoop,
	const DecodedOperandVector_t &mem_arg_container
	)
{

	assert(immed || mem_arg!=mem_arg_container.end());	// immeds don't need an argument, but memory ops do.

	const auto is_lea=disasm.getMnemonic() == string("lea");
	const auto consider_multiple_sizes= is_lea || immed;

	auto strides= consider_multiple_sizes ? set<int>({1,2,4,8}) : set<int>({ (int)(*mem_arg)->getArgumentSizeInBytes() });

	// get other strides from the containing function
	if(insn->getFunction())
	{
		for(auto func_insn : insn->getFunction()->getInstructions())
			const auto dp=DecodedInstruction_t::factory(func_insn);
			const auto &d=*dp;

			auto potential_stride=0;
			if( d.getMnemonic()=="add" || d.getMnemonic()=="sub")
			{
				potential_stride=d.getImmediate(); 
			}

			if(d.getMnemonic()=="lea")
			{
				potential_stride=d.getOperand(1)->getMemoryDisplacement(); 
			}

			if(abs(potential_stride)<500  && potential_stride!=0)
			{
				strides.insert(potential_stride);
				strides.insert(-potential_stride);
			}

	const auto stride_multipliers= set<int>({-1,1});

	const auto contains_base_reg  = mem_arg != mem_arg_container.end() && (*mem_arg)->hasBaseRegister(); 
	const auto contains_index_reg = mem_arg != mem_arg_container.end() && (*mem_arg)->hasIndexRegister(); 
	const auto contains_reg       = contains_base_reg || contains_index_reg;
	const auto memory_access      = mem_arg!=mem_arg_container.end() && !is_lea;
	const auto is_direct_memory_access = memory_access && !contains_reg;

	// check for a direct memory access
	if(is_direct_memory_access)
	{
		return candidate_scoop;
	}


	// calculate each offset=stride*multiplier pair
	auto candidate_offsets=set<int>();
		for(auto multiplier : stride_multipliers)
		{
			candidate_offsets.insert(stride*multiplier);

	// how to tie two scoops
	auto insert_scoop_pair=[&](DataScoop_t* a, DataScoop_t* b, int i, int offset)
	{
		const auto tied_scoop_pair = ScoopPair_t(a,b) ;
		assert(tied_scoop_pair.first->getEnd()->getVirtualOffset()+1 == tied_scoop_pair.second->getStart()->getVirtualOffset());
		tied_scoops.insert(tied_scoop_pair);
		cout<<"	Tieing scoops "<<tied_scoop_pair.first->getName()<<" and "<<tied_scoop_pair.second->getName()<<" for i="<<dec<<i<<" offset="<<offset<<endl;
		ties_for_folded_constants++;
	};

	// how to decide if a scoop at offset i should be tied.
	// no scoop ->  no tie
	// un-tie-able scoop -> no tie
	// else tie
	auto should_tie=[&](const int i, DataScoop_t* prev_scoop) -> DataScoop_t* 
	{
		DataScoop_t *this_scoop=findScoopByAddress(insn_addr+i);	
		// no scoop at this addr?
Jason Hiser's avatar
Jason Hiser committed
		if(this_scoop==nullptr)
			return nullptr;
		// un-tie-able scoop at this addr?
		if(is_noptr_table(this_scoop))
Jason Hiser's avatar
Jason Hiser committed
			return nullptr;

		// if both scoops are already pinned, no reason to tie.
		const auto is_prev_moveable = moveable_scoops.find(prev_scoop)!=moveable_scoops.end();
		const auto is_this_moveable = moveable_scoops.find(this_scoop)!=moveable_scoops.end();
		if(!is_prev_moveable && !is_this_moveable) 
			return nullptr;

		// else, tie
		return this_scoop;
	};


	// check each offset for a scoop that needings tieing tot his one.
	for(auto offset : candidate_offsets)
	{
		assert(offset!=0);
		auto candidate_offset_scoop=findScoopByAddress(insn_addr+offset) ;
	
		// check to see if the offset is in a different scoop 
		if(candidate_scoop != candidate_offset_scoop)
		{

			// yes, therefore we have to tie all scoops between the start and end together.
			// stop if there's an untieable scoop in the way.
			auto prev_scoop=candidate_scoop;
			if(offset < 0 ) 
			{
				for(auto i=(int)-1;i>=offset; i--)
				{
					auto this_scoop=should_tie(i,prev_scoop);
					if(this_scoop)
					{
						if(this_scoop!=prev_scoop)
							insert_scoop_pair(this_scoop,prev_scoop, i, offset);
						prev_scoop=this_scoop;
					}
					else 
						break;
				}
			}
			else
			{
				for(auto i=(int)1;i<=offset; i++)
				{
					auto this_scoop=should_tie(i,prev_scoop);
					if(this_scoop)
					{
						if(this_scoop!=prev_scoop)
							insert_scoop_pair(prev_scoop,this_scoop, i, offset);
						prev_scoop=this_scoop;
					}
					else 
						break;
				}
			}
		}

	return candidate_scoop;
}

template <class T_Sym, class  T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
DataScoop_t* MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::DetectProperScoop_ConsiderEndOfPrev(