Skip to content
Snippets Groups Projects
mg.cpp 71.5 KiB
Newer Older
	const DecodedOperandVector_t &the_arg_container
	)
{

	// possibility for future work:  identify cases where 
	// 	[addr+rbx*8] that came from something like =a[i-1].  And addr==a[-1].
	// for now, memory operands that actually access memory, there's no additional analysis needed 
	if(!immed && disasm.getMnemonic()!=string("lea"))	
		// this should filter out cmp, move, test, add,  with a memory operation
		return ret;

	// now we have an immediate or an lea (i.e., no cmp reg, [mem] operations)
	// that's pointing to a scoop.   Let's check if it's a boundary between two scoops
	if(insn_addr!=ret->getStart()->getVirtualOffset())	
		// it's not, so just continue.
		return ret;


	// now look to see if there's a scoop regsitered that abuts this scoop; 
	DataScoop_t *scoop_for_prev=findScoopByAddress(insn_addr-1);	

	// if not found, we know we aren't in a boundary case.
	if(!scoop_for_prev)
		return ret;

	/* check to see if the immediate next instruction dereferences the destination of an lea. */
	Instruction_t* next_insn=insn->getFallthrough();
	if(next_insn == NULL) 
		next_insn=insn->getTarget();

	if(next_insn && disasm.getMnemonic() == string("lea"))	
	{
		const auto lea_disasmp=DecodedInstruction_t::factory(insn);
		const auto &lea_disasm=*lea_disasmp;;
		string dstreg=lea_disasm.getOperand(0)->getString(); 

		const auto next_disasmp=DecodedInstruction_t::factory(next_insn);
		const auto &next_disasm=*next_disasmp;
		auto memarg_container=next_disasm.getOperands();
		const auto memarg=find_memory_operand(memarg_container);

		// if we found a memory operation that uses the register, with no indexing, then conclude that 
		// we must access the variable after the address (not the variable before the address) 
		// if(memarg && string(next_disasm.Instruction.Mnemonic)!="lea " && string(memarg->ArgMnemonic)==dstreg )
		if(memarg!=memarg_container.end() && next_disasm.getMnemonic()!="lea" && (*memarg)->getString()==dstreg )
			return ret;
		
	}
	

	// if we're in a function
	// check that function for other references to scoop_for_prev
	if(insn->getFunction())
	{
		auto found_insn_it=find_if(
			ALLOF(insn->getFunction()->getInstructions()), 
			[&](Instruction_t* func_insn)
			{
				// disassemble instruction 
				const auto func_insn_disasmp=DecodedInstruction_t::factory(func_insn);
				const auto &func_insn_disasm=*func_insn_disasmp;
				auto func_insn_disasm_operands=func_insn_disasm.getOperands();

				// enter instructions have 2 immediates, so we can't just "getImmediate()"
				if(func_insn_disasm.getMnemonic()=="enter")
					return false;

				// check the immediate
				// if(getFileIR()->findScoop(func_insn_disasm.Instruction.Immediat) == scoop_for_prev)	
				 if(scoop_for_prev->getStart()->getVirtualOffset() <= (VirtualOffset_t)func_insn_disasm.getImmediate() && 
				    (VirtualOffset_t)func_insn_disasm.getImmediate() <= scoop_for_prev->getEnd()->getVirtualOffset())	
					return true;	// return from lamba that we found an insn.

				// don't bother with the memory check unless we're an LEA
				//if(func_insn_disasm.Instruction.Mnemonic!=string("lea "))
				if(func_insn_disasm.getMnemonic()!=string("lea"))
					return false; 

				// check the memory -- find the argument that's the mem ref;
				const auto the_arg=find_memory_operand(func_insn_disasm_operands);
				if(the_arg!=func_insn_disasm_operands.end())
				{
					// see if the lea has a scoop reference.
					VirtualOffset_t addr=(*the_arg)->getMemoryDisplacement();
					if(arg_has_relative(*(*the_arg)))
						addr+=insn->getDataBits().size();
		
					if(findScoopByAddress(addr) == scoop_for_prev)	
						return true;  // return from lamba
					
				}

				// not found in this insn
				return false; // lambda return
				

			});

		// no reference to prev_scoop found, just return;
		if(found_insn_it==insn->getFunction()->getInstructions().end())
		{
			return ret;
		}

	}


	// if we make it this far, we note that a single function has sketchy (aka address-generating) references
	// to both scoop_for_prev and ret;
	// in this case, we need to make keep these two scoops together since we can't tell which way the sketchy ref's go.
	// for now, just record the sketchy refs.

	cout<<"Boundary note:  instruction "<<insn->getBaseID()<<":"<<disasm.getDisassembly()<<" has immed/lea that points at boundary case.";
	if(insn->getFunction())
		cout<<" In "<<insn->getFunction()->getName()<<".";
	cout<<endl;
	cout<<"Keep together "<<
		scoop_for_prev->getName()<<" ("<<hex<< scoop_for_prev->getStart()->getVirtualOffset()<<"-"<<scoop_for_prev->getEnd()->getVirtualOffset()<<") and "<<
		ret->getName()<<" ("<<hex<< ret->getStart()->getVirtualOffset()<<"-"<<ret->getEnd()->getVirtualOffset()<<")"<<endl;

	tied_scoops.insert(ScoopPair_t(scoop_for_prev,ret));
	return ret;
}



template <class T_Sym, class  T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::ApplyImmediateRelocation(Instruction_t *insn, DataScoop_t* to)
{
	const auto disasmp=DecodedInstruction_t::factory(insn);
	const auto &disasm=*disasmp;
	VirtualOffset_t rel_addr2=disasm.getImmediate(); // Instruction.Immediat;

#if 1   // don't change instructions that reference re-pinned scoops.
	// This was necessary because we were not getting the zipr_unpin_plugin
	//  to undo our changes to the instruction in the case of a re-pinned scoop.
	//  That problem is fixed, but it is more efficient and safer to
	//  avoid editing instructions that reference re-pinned scoops.
Jason Hiser's avatar
Jason Hiser committed
	if (moveable_scoops.find(to) == moveable_scoops.cend()) {
			cout << "Avoiding editing of insn at " << hex << insn->getBaseID() << " after repinning scoop "
				<< to->getName() << endl;
		}
		return;
	}
#endif

	getFileIR()->addNewRelocation(insn,0, "immedptr_to_scoop", to);

	// fixme: insn bits changed here 
	assert(strtoumax(disasm.getOperand(1)->getString().c_str(), NULL, 0) ==  rel_addr2);

	VirtualOffset_t new_addr = rel_addr2 - to->getStart()->getVirtualOffset();
	assert(4 < insn->getDataBits().size());
	insn->setDataBits(insn->getDataBits().replace(insn->getDataBits().size()-4, 4, (char*)&new_addr, 4));

	cout<<"Non-Overlapping_Globals::ApplyImmediateReloc::Setting "<<hex<<insn->getBaseID()<<" to "<<insn->getDisassembly()<<endl;
}


template <class T_Sym, class  T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::HandleImmediateOperand(const DecodedInstruction_t& disasm, const DecodedOperandVector_t::iterator the_arg, Instruction_t* insn)
{
	// shared objects don't need this, you have to use a pcrel addressing mode.
	if(exe_reader->isDLL())
	{
		return;
	}
	const int small_memory_threshold= exe_reader->isDLL() ? 10 : 4096*10;

	// enter instructions have 2 immediates, so we can't just "getImmediate()"
	if(disasm.getMnemonic()=="enter")
		return;

	VirtualOffset_t rel_addr2=disasm.getImmediate(); 
	auto operands=disasm.getOperands();
	DataScoop_t *to2=DetectProperScoop(disasm, operands.end(), insn, rel_addr2, true, operands);


	// skip if not found, executable, or not moveable.
	if( to2 && (to2->isExecuteable() || moveable_scoops.find(to2) == moveable_scoops.end()))
	{  
		// do nothing, no log or action is necessary for (potential) pointers to code or 
		// (potential) pointers to non-moveable data.
	}
	else if(to2)
	{

		// there's no need to find pointers in other types of instructions, 
		// such as mul or vfmasubadd231 (yes, that's a real instruction on x86)
		// note: yes other instructions may have a memory operand with a pointer, but that's handled above.
		// this is for instruction's immediate fields, not their memory operand's displacement.
		//
		// compares, tests are often used because the compiler strength reduces.
		// moves are used to load addresses into a register.
		// adds are used to load addresses plus an offset into a register.
		// here's an example where sub is used with a pointer:
		//
		//  	DegenCount[strchr(Alphabet,iupac)-Alphabet] = ...
		//
		// 	0x0000000000402a99 <+25>:	call   0x401620 <strchr@plt>
		// 	0x0000000000402a9e <+30>:	mov    rbp <- rax
   		//	0x0000000000402aa1 <+33>:	mov    rdi <- rbx
   		//	0x0000000000402aa4 <+36>:	sub    rbp <- 0x65b500  # note:  constant is a poitner here!
   		//	0x0000000000402aab <+43>:	eax <-  ...
   		//	0x0000000000402ab0 <+48>:	mov    DWORD PTR [rbp*4+0x65b520] <- eax

		if(disasm.getMnemonic() == string("mov") ||
		   disasm.getMnemonic() == string("cmp") ||
		   disasm.getMnemonic() == string("test") ||
		   disasm.getMnemonic() == string("add")  ||
		   disasm.getMnemonic() == string("sub") )
		{
			{
				cout<<"Found non-mem ref in insn: "<<insn->getBaseID()<<":"<<disasm.getDisassembly()<<" to "
					<< to2->getName() <<"("
					<<hex<<to2->getStart()->getVirtualOffset()<<"-" 
					<<hex<<to2->getEnd()->getVirtualOffset()<<")"<<endl; 
			}
			
			if(!is_noptr_table(to2))
				immed_refs_to_scoops.insert({insn,to2});
		if ((int)rel_addr2 < -small_memory_threshold || (int) rel_addr2 > small_memory_threshold || m_verbose)
			if ((0 != rel_addr2) && m_verbose)
			{
				cout << "Note:  " << hex << rel_addr2 << " not declared address in:";
				cout << insn->getBaseID() << ":" << disasm.getDisassembly();
				cout << endl;
			}
		}
	}
}

// put in links between scoops and any references to them.
template <class T_Sym, class  T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::FindInstructionReferences()
{
	for(auto insn : getFileIR()->getInstructions())
	{
		auto disasmp=DecodedInstruction_t::factory(insn);
		auto &disasm=*disasmp;
		auto disasm_operands=disasm.getOperands();

		// find memory arg.
		const auto the_arg=find_memory_operand(disasm_operands);

			cout<<"Considering "<<hex<<insn->getBaseID()<<":"<<disasm.getDisassembly()<<endl;
		HandleMemoryOperand(disasm,the_arg,insn, disasm_operands);
		HandleImmediateOperand(disasm,the_arg,insn);
	}

}


template <class T_Sym, class  T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::ApplyDataRelocation(DataScoop_t *from, unsigned int offset, DataScoop_t* to)
{
	assert(to && from);

	const char* data=from->getContents().c_str();
	unsigned int byte_width=getFileIR()->getArchitectureBitWidth()/8;
	VirtualOffset_t val=(VirtualOffset_t)NULL;

	if(byte_width==4)
		val=*(int*)&data[offset];
	else if(byte_width==8)
		val=*(long long*)&data[offset];
	else
		assert(0);
		
	auto reloc=getFileIR()->addNewRelocation(from,offset, "dataptr_to_scoop", to);
	(void)reloc; // just giving to ir

	VirtualOffset_t newval=val-to->getStart()->getVirtualOffset();

	// auto str=from->getContents();
	// create new value for pointer.
	if(byte_width==4)
	{
		const auto intnewval=(unsigned int)newval;	 // 64->32 narrowing OK. 
		from->replaceBytes(offset, string(reinterpret_cast<const char*>(&intnewval), byte_width));
	}
	else if(byte_width==8)
	{
		from->replaceBytes(offset,string(reinterpret_cast<const char*>(&newval),byte_width));
	// from->setContents(str);
}



//
// check if val is a pointer or part of a string that mimics a pointer
//
static inline bool is_part_of_string(VirtualOffset_t val, const DataScoop_t* from,  const DataScoop_t* to, int offset)
{
	assert(from && to);

	// locate strings that look like pointers but aren't.  e.g.:  "ion\0" and "ren\0".  Note that both are null terminated. 
	// this is a problem on 64-bit code because we screw up the string.
	
	// note:  the most sigificant byte is 0, and the lower 3 signfiicant bytes are printable.


	// the least significant byte is special.  In a valid pointer, it's almost always 00 or 01 for 64-bit code or shared libraries, 
	// and 0x08 0x09 for 32-bit main executables.   Very very rarely is it anything else.
	// however, for 0x01, 0x08, and 0x09 aren't printable, so we don't confuse these bytes in a string for an address and we don't need to detect this.
	if ( ((val >> 24) & 0xff) != 0 )	// check for non-0
		return false;
	if ( !isprint(((val >> 16) & 0xff)))	// and 3 printable characters.
		return false;
	if ( !isprint(((val >> 8) & 0xff)))
		return false;
	if ( !isprint(((val >> 0) & 0xff)))
		return false;

	// number of bytes that must precede the pointer and be string bytes to disambiguate a string's end from a pointer.
	const int string_preheader_size=4;

	// if we dont' have enough bytes of preheader, skip it.
	if( offset < string_preheader_size ) 
		return false;

	// check each byte preceeding the candidate pointer to see if it's printable.
	for(auto i=0;i<string_preheader_size;i++)
	{
		if(i>offset)
			return false;
		unsigned char b=from->getContents()[offset-i];
		if(!isprint(b))
			return false;
	}

	// we found enough string chars before the (candidate) pointer value, so we think that a string is here, not a pointer.
	{
		cout<<"Found string as non-ref "<<hex<<val<<" at "<<from->getName()<<"+"<<offset<<" ("
			<<hex<<from->getStart()->getVirtualOffset()<<"-" 
			<<hex<<from->getEnd()->getVirtualOffset()<<") to "
			<<to->getName()<<" ("
			<<hex<<to->getStart()->getVirtualOffset()<<"-" 
			<<hex<<to->getEnd()->getVirtualOffset()<<")"<<endl;
	}
	return true;

}

// put in links between scoops and any references to them.
template <class T_Sym, class  T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::FindDataReferences()
{
	unsigned int byte_width=getFileIR()->getArchitectureBitWidth()/8;

	typedef function<void (DataScoop_t*)> ScannerFunction_t;

	auto read_bytewidth=[&](const char* data, const int i) -> long long
	{
		auto val=(long long)0;
		if(byte_width==4)
			val=*(int*)&data[i];
		else if(byte_width==8)
			val=*(long long*)&data[i];
		else
			assert(0);
		return val;
	};

	ScannerFunction_t got_scanner=[&](DataScoop_t* scoop)
	{
		// got scanner doesn't scan data section for shared objects since they can't have a constant address
		if(exe_reader->isDLL())
			return;

		auto data=scoop->getContents().c_str();
		auto len=scoop->getContents().size();

		for ( auto i=0u; i+byte_width-1<len; i+=byte_width)
		{
			const auto val=read_bytewidth(data,i);
			auto to=findScoopByAddress(val);	
			if(to)
			{
				{
					cout<<"Found ref "<<hex<<val<<" at "<<scoop->getName()<<"+"<<i<<" ("
						<<hex<<scoop->getStart()->getVirtualOffset()<<"-" 
						<<hex<<scoop->getEnd()->getVirtualOffset()<<") to "
						<<to->getName()<<" ("
						<<hex<<to->getStart()->getVirtualOffset()<<"-" 
						<<hex<<to->getEnd()->getVirtualOffset()<<")"<<endl;
				}

				data_refs_to_scoops.insert({scoop,i,to}); 
			}
		}
	};

	ScannerFunction_t default_scanner=[&](DataScoop_t* scoop)
	{
		// default scanner doesn't scan data section for shared objects since they can't have a constant address
		if(exe_reader->isDLL())
			return;

		auto data=scoop->getContents().c_str();
		auto len=scoop->getContents().size();

		// try not to overrun the array
		for ( auto i=0u; i+byte_width-1<len; i+=byte_width)
		{
			auto val=read_bytewidth(data,i);
			auto to=findScoopByAddress(val);	

			if(to)
			{

				auto aggressive_qualify_for_moving = [this](const DataScoop_t* from, 
								DataScoop_t* &to, 
								bool &move_ok, 
								bool &disqualifies_to, 
								const VirtualOffset_t addr, unsigned int offset_in_scoop
							       ) -> void
				{
					move_ok=true;
					disqualifies_to=false;
					if( !to->isExecuteable() && 
					    moveable_scoops.find(to) != moveable_scoops.end() && 
					    !is_part_of_string(addr,from,to,offset_in_scoop)
					  )
					{
						return;	
					}
					move_ok=false;
				};

				auto qualify_for_moving = [this](const DataScoop_t* from, 
								DataScoop_t* &to, 
								bool &move_ok, 
								bool &disqualifies_to, 
								const VirtualOffset_t addr, unsigned int offset_in_scoop
							       ) -> void
				{
					move_ok=true;
					disqualifies_to=false;

					// if points at executable scoop, we aren't doing that here!
					if(to->isExecuteable())
					{ move_ok=false;  disqualifies_to=false; return ; }

					// if not moveable, we aren't doing that here.	
 					if ( moveable_scoops.find(to) == moveable_scoops.end())
					{ move_ok=false;  disqualifies_to=false; return ; }


					/* the above worked ok-ish, but not great.  trying this method to be more conservative */
					{ move_ok=false;  disqualifies_to=true; return ; }

/*
					// if this constant appears to be part of a string, skip it!
					if(is_part_of_string(addr,from,to,offset_in_scoop))
					{ move_ok=false;  disqualifies_to=false; return ; }

					// very few variables start at an address that ends in 0x000 and often address-looking constants do
					// if we see such an address, pin-and-win.
					if ( (addr&0xfff) == 0x000 && addr==to->getStart()->getVirtualOffset())
					{ move_ok=false;  disqualifies_to=true; return ; }

					// if we point at the start of a scoop, it's OK to move.
					if(addr==to->getStart()->getVirtualOffset())
					{ move_ok=true;  disqualifies_to=false; return ; }

					// if it points near a scoop, but not directly at it, it's hard to tell if it's moveable or not
					if(abs((long)addr-(long)to->getStart()->getVirtualOffset()) < 16 )
					{ move_ok=false;  disqualifies_to=true; return ; }

					// else, it's pointing in the middle of a scoop, so it's probably not a 
					// pointer at all.
					{ move_ok=false;  disqualifies_to=false; return ; }
*/

				};

				auto move_ok=false;
				auto disqualifies_to=false;
				if(aggressive)
					aggressive_qualify_for_moving(scoop, to,move_ok,disqualifies_to,val, i);
				else
					qualify_for_moving(scoop, to,move_ok,disqualifies_to,val, i);

				if(move_ok)
				{
					{
						cout<<"Found ref "<<hex<<val<<" at "<<scoop->getName()<<"+"<<i<<" ("
							<<hex<<scoop->getStart()->getVirtualOffset()<<"-" 
							<<hex<<scoop->getEnd()->getVirtualOffset()<<") to "
							<<to->getName()<<" ("
							<<hex<<to->getStart()->getVirtualOffset()<<"-" 
							<<hex<<to->getEnd()->getVirtualOffset()<<")"<<endl;
					}


					// put those bytes back in the string.
					//ApplyDataRelocations(*sit,i,to);

					data_refs_to_scoops.insert({scoop,i,to}); 
				}
				else
				{
					{
						cout<<"Found ref-looking-constant "<<hex<<val<<" at "<<scoop->getName()<<"+"<<i<<" ("
							<<hex<<scoop->getStart()->getVirtualOffset()<<"-" 
							<<hex<<scoop->getEnd()->getVirtualOffset()<<") which would otherwise be to "
							<<to->getName()<<" ("
							<<hex<<to->getStart()->getVirtualOffset()<<"-" 
							<<hex<<to->getEnd()->getVirtualOffset()<<")"<<endl;
					}
				}
				if(disqualifies_to)
				{
Jason Hiser's avatar
Jason Hiser committed
					if(!is_elftable(to))
Jason Hiser's avatar
Jason Hiser committed
						{
							cout<<"Ref-looking-constant "<<hex<<val<<" at "<<scoop->getName()<<"+"<<i<<" ("
								<<hex<<scoop->getStart()->getVirtualOffset()<<"-" 
								<<hex<<scoop->getEnd()->getVirtualOffset()<<") is inconclusive.  Repinning "
								<<to->getName()<<" ("
								<<hex<<to->getStart()->getVirtualOffset()<<"-" 
								<<hex<<to->getEnd()->getVirtualOffset()<<")"<<endl;
						}
						moveable_scoops.erase(to);
					}
					else
					{
Jason Hiser's avatar
Jason Hiser committed
						{
							cout<<"Ref-looking-constant "<<hex<<val<<" at "<<scoop->getName()<<"+"<<i<<" ("
								<<hex<<scoop->getStart()->getVirtualOffset()<<"-" 
								<<hex<<scoop->getEnd()->getVirtualOffset()<<") is inconclusive.  Not repinning because is elftable "
								<<to->getName()<<" ("
								<<hex<<to->getStart()->getVirtualOffset()<<"-" 
								<<hex<<to->getEnd()->getVirtualOffset()<<")"<<endl;
						}
				if((0 != val) && m_verbose)
				{
					cout<<"Constant "<<hex<<val<<" at "<<scoop->getName()<<"+"<<i<<" ("
						<<hex<<scoop->getStart()->getVirtualOffset()<<"-" 
						<<hex<<scoop->getEnd()->getVirtualOffset()<<") doesn't point at scoop."<<endl;
				}
			}
		}
	};

	ScannerFunction_t dynsym_scanner=[&](DataScoop_t* scoop) 
	{ 
		const char* data=scoop->getContents().c_str();
		unsigned int len=scoop->getContents().size();
		T_Sym* symptr=(T_Sym*)data;
		const char* end=data+len;

		while((const char*)symptr<end)
		{

			VirtualOffset_t val=symptr->st_value;
			DataScoop_t *to=findScoopByAddress(val);	
			if(to)
			{
				unsigned int offset=(unsigned int)((VirtualOffset_t)symptr)-((VirtualOffset_t)data);
				offset+=((VirtualOffset_t)&symptr->st_value)-(VirtualOffset_t)symptr;

				{

					cout<<"Found dynsym:st_value ref "<<hex<<val<<" at "<<scoop->getName()<<"+"<<offset<<" ("
						<<hex<<scoop->getStart()->getVirtualOffset()<<"-" 
						<<hex<<scoop->getEnd()->getVirtualOffset()<<") to "
						<<to->getName()<<" ("
						<<hex<<to->getStart()->getVirtualOffset()<<"-" 
						<<hex<<to->getEnd()->getVirtualOffset()<<")"<<endl;
				}

				data_refs_to_scoops.insert({scoop,offset,to}); 
			}
	
			symptr++; // next symbol
		}
		
	};
	ScannerFunction_t rel_scanner=[&](DataScoop_t* scoop) 
	{  
		const char* data=scoop->getContents().c_str();
		unsigned int len=scoop->getContents().size();

		T_Rela * symptr=(T_Rela*)data;	
		const char* end=data+len;

		while((const char*)symptr<end)
		{
			// handle offset field
			{
				VirtualOffset_t val=symptr->r_offset;
				DataScoop_t *to=findScoopByAddress(val);	
				if(to)
				{
					unsigned int offset=(unsigned int)((VirtualOffset_t)symptr)-((VirtualOffset_t)data);
					offset+=((VirtualOffset_t)&symptr->r_offset)-(VirtualOffset_t)symptr;

					{
						cout<<"Found rela:r_offset ref "<<hex<<val<<" at "<<scoop->getName()<<"+"<<offset<<" ("
							<<hex<<scoop->getStart()->getVirtualOffset()<<"-" 
							<<hex<<scoop->getEnd()->getVirtualOffset()<<") to "
							<<to->getName()<<" ("
							<<hex<<to->getStart()->getVirtualOffset()<<"-" 
							<<hex<<to->getEnd()->getVirtualOffset()<<")"<<endl;
					}

					data_refs_to_scoops.insert({scoop,offset,to}); 
				}
			}
	
			symptr++; // next symbol
		}
	}; 
	ScannerFunction_t rela_scanner=[&](DataScoop_t* scoop)
	{ 
		const char* data=scoop->getContents().c_str();
		unsigned int len=scoop->getContents().size();

		T_Rela * symptr=(T_Rela*)data;	
		const char* end=data+len;

		while((const char*)symptr<end)
		{
			// handle addend field
			{
				VirtualOffset_t val=symptr->r_addend;
				DataScoop_t *to=findScoopByAddress(val);	
				if(to)
				{
					unsigned int offset=(unsigned int)((VirtualOffset_t)symptr)-((VirtualOffset_t)data);
					offset+=((VirtualOffset_t)&symptr->r_addend)-(VirtualOffset_t)symptr;

					{
						cout<<"Found rela:r_added ref "<<hex<<val<<" at "<<scoop->getName()<<"+"<<offset<<" ("
							<<hex<<scoop->getStart()->getVirtualOffset()<<"-" 
							<<hex<<scoop->getEnd()->getVirtualOffset()<<") to "
							<<to->getName()<<" ("
							<<hex<<to->getStart()->getVirtualOffset()<<"-" 
							<<hex<<to->getEnd()->getVirtualOffset()<<")"<<endl;
					}

					data_refs_to_scoops.insert({scoop,offset,to}); 
				}
			}
			// handle offset field
			{
				VirtualOffset_t val=symptr->r_offset;
				DataScoop_t *to=findScoopByAddress(val);	
				if(to)
				{
					unsigned int offset=(unsigned int)((VirtualOffset_t)symptr)-((VirtualOffset_t)data);
					offset+=((VirtualOffset_t)&symptr->r_offset)-(VirtualOffset_t)symptr;

					{
						cout<<"Found rela:r_offset ref "<<hex<<val<<" at "<<scoop->getName()<<"+"<<offset<<" ("
							<<hex<<scoop->getStart()->getVirtualOffset()<<"-" 
							<<hex<<scoop->getEnd()->getVirtualOffset()<<") to "
							<<to->getName()<<" ("
							<<hex<<to->getStart()->getVirtualOffset()<<"-" 
							<<hex<<to->getEnd()->getVirtualOffset()<<")"<<endl;
					}

					data_refs_to_scoops.insert({scoop,offset,to}); 
				}
			}
	
			symptr++; // next symbol
		}
	};
	ScannerFunction_t dynamic_scanner=[&](DataScoop_t* scoop)
	{ 
		const auto data=scoop->getContents().c_str();
		const auto len=scoop->getContents().size();
		auto symptr=(T_Dyn*)data;
		const char* end=data+len;

		while((const char*)symptr<end)
		{

			switch(symptr->d_tag)
			{
				case DT_INIT_ARRAY:
				case DT_FINI_ARRAY:
				case DT_GNU_HASH:
				case DT_STRTAB:
				case DT_SYMTAB:
				case DT_PLTGOT:
				case DT_JMPREL:
				case DT_RELA:
				case DT_VERNEED:
				case DT_VERSYM:
				{
					const auto val=symptr->d_un.d_val;
					auto *to=findScoopByAddress(val);	

					if(to)
					{

						auto offset=(unsigned int) (((VirtualOffset_t)symptr)-((VirtualOffset_t)data));
						offset+=((VirtualOffset_t)&symptr->d_un.d_val)-(VirtualOffset_t)symptr;

						{

							cout<<"Found .dynamic:d_val ref "<<hex<<val<<" at "<<scoop->getName()<<"+"<<offset<<" ("
								<<hex<<scoop->getStart()->getVirtualOffset()<<"-" 
								<<hex<<scoop->getEnd()->getVirtualOffset()<<") to "
								<<to->getName()<<" ("
								<<hex<<to->getStart()->getVirtualOffset()<<"-" 
								<<hex<<to->getEnd()->getVirtualOffset()<<")"<<endl;
						}

						data_refs_to_scoops.insert({scoop,offset,to}); 
					}
					break;
				}
				default:  // do nothing

					break;
			}

			symptr++; // next symbol
		}
		
	};


	// special scanners for special sections
	const struct scoop_scanners_t
	{	string name;
		ScannerFunction_t scanner_fn;
	} scoop_scanners[] = {
		{ ".dynsym", dynsym_scanner }, 
		{ ".got", got_scanner }, 
		{ ".got.plt", got_scanner }, 
		{ ".rel.dyn", rel_scanner }, 
		{ ".rel.plt", rel_scanner }, 
		{ ".rel.dyn coalesced w/.rel.plt", rel_scanner }, 
		{ ".rela.dyn", rela_scanner }, 
		{ ".rela.plt", rela_scanner }, 
		{ ".rela.dyn coalesced w/.rela.plt", rela_scanner }, 
		{ ".dynamic", dynamic_scanner } 
	};

	// main algorithm:  apply the right scanner for each scoop
	for_each(ALLOF(getFileIR()->getDataScoops()), [&](DataScoop_t* scoop)
	{
		auto scanner=find_if(ALLOF(scoop_scanners), [&](const scoop_scanners_t scanner)
		{
			return scanner.name==scoop->getName();

		});
		if(scanner!=end(scoop_scanners))
			scanner->scanner_fn(scoop);
		else
			default_scanner(scoop);

	});
}


template <class T_Sym, class  T_Rela, class T_Rel, class T_Dyn, class T_Extractor>
void MoveGlobals_t<T_Sym,T_Rela,T_Rel,T_Dyn,T_Extractor>::FilterAndCoalesceTiedScoops()
{
	const auto is_in_dont_coalesce_scoops = [](const DataScoop_t* to_find) -> bool
		{

			const string dont_coalesce_scoops[] =
			{
				".dynamic",
				".jcr"
			};
			const auto a_binder = bind1st(finder, to_find);
			const auto it=find_if(ALLOF(dont_coalesce_scoops), a_binder);

			return (it!=end(dont_coalesce_scoops));
		};



	// step 1:  find everything that's tied to a pinned scoop and pin it.
	// repeat until no changes.
	bool changed=true;
	while(changed)
	{
		changed=false;
		for(auto it=tied_scoops.begin(); it!=tied_scoops.end(); /* nop */)
		{
			auto current=it++;
			const ScoopPair_t& p=*current;
			DataScoop_t* s1=p.first;
			DataScoop_t* s2=p.second;
			bool s1_moveable=contains(moveable_scoops, s1);
			bool s2_moveable=contains(moveable_scoops, s2);

			if(is_in_dont_coalesce_scoops(s1) || is_in_dont_coalesce_scoops(s2)) 
			{
				cout<<"Skipping coalesce of "<<s1->getName()<<" and "<<s2->getName()<<endl;
				tied_scoops.erase(current);
				continue;
			}

			if(s1_moveable && s2_moveable)
			{
				// do nothing if they're both unpinned.
				tied_unpinned++;
			}
			else  if(s1_moveable)
			{
				tied_pinned++;

				// s1 is pinned to an unmoveable, so it's unmoveable.
				cout<<"Re-pinning "<<s1->getName()<<endl;
				moveable_scoops.erase(s1);	 
				tied_scoops.erase(current);
				changed=true;
			}
			else  if(s2_moveable)
			{
				cout<<"Re-pinning "<<s2->getName()<<endl;
				tied_pinned++;
				// s2 is pinned to an unmoveable.
				moveable_scoops.erase(s2); 
				tied_scoops.erase(current);
				changed=true;
			}
			else
			{
				tied_nochange++;
				tied_scoops.erase(current);
			}


		}
	}

	// step 2, coalesce
	changed=true;
	while(changed)
	{
		changed=false;
		for(auto it=tied_scoops.begin(); it!=tied_scoops.end(); )
		{
			auto current=it++; 
			const ScoopPair_t& p=*current;
			DataScoop_t* s1=p.first;
			DataScoop_t* s2=p.second;


			if(is_in_dont_coalesce_scoops(s1) || is_in_dont_coalesce_scoops(s2)) 
			{
				cout<<"Skipping coalesce of "<<s1->getName()<<" and "<<s2->getName()<<endl;
				continue;
			}

			bool s1_moveable=contains(moveable_scoops, s1);
			bool s2_moveable=contains(moveable_scoops, s2);

			// we previously removed anything that's pinned from moveable 
			if(s1_moveable && s2_moveable)
			{
				// assert order is right
				assert(s1->getStart()->getVirtualOffset() < s2->getStart()->getVirtualOffset());

				// check if these are adjacent.
				if(s1->getEnd()->getVirtualOffset()+1 < s2->getStart()->getVirtualOffset())
				{
					// pad s1 to fill hole	
					string new_contents=s1->getContents();
					new_contents.resize(s2->getStart()->getVirtualOffset()-s1->getStart()->getVirtualOffset());
					s1->getEnd()->setVirtualOffset(s2->getStart()->getVirtualOffset()-1);
				}
				else if(s1->getEnd()->getVirtualOffset()+1 == s2->getStart()->getVirtualOffset())
				{
					// do nothing if they fit perfectly.
				}
				else
					assert(0); // overlapping scoops?

				cout<<"Coalescing 2-tied, but unpinned scoops "<<s1->getName()<<" and "<<s2->getName()<<"."<<endl;


				// update our inteneral data structures for how to apply relocs.
				auto insn_fixup_updater=[s1,s2](set<Insn_fixup_t> &the_set)
					{
						unsigned int size=the_set.size();
						set<Insn_fixup_t> new_elements;
						auto it=the_set.begin();
						while(it!=the_set.end())
						{
							auto current = it++;
							auto replacer=*current;
							if(replacer.to == s2) 
							{
								the_set.erase(current);
								replacer.to=s1;
								new_elements.insert(replacer);
							}
						}
						the_set.insert(new_elements.begin(), new_elements.end());
						assert(size==the_set.size());
					};
				insn_fixup_updater(pcrel_refs_to_scoops);
				insn_fixup_updater(absolute_refs_to_scoops);
				insn_fixup_updater(immed_refs_to_scoops);

				auto scoop_fixup_updater=[s1,s2](set<Scoop_fixup_t> &the_set)
					{
						set<Scoop_fixup_t> new_elements;
						auto it=the_set.begin();
						while(it!=the_set.end())
						{
							auto current = it++;
							if(current->to == s2 || current->from==s2) 
							{
								auto replacer=*current;
								if(replacer.to==s2)
									replacer.to=s1;
							
								if(replacer.from==s2)
								{
									replacer.from=s1;
									cout<<"Updating data_ref_to_scoops offset from "<<hex<<replacer.offset<<" to "<<replacer.offset+s1->getSize()<<endl;
									replacer.offset+=s1->getSize();
								}
								the_set.erase(current);
								new_elements.insert(replacer);
							}
						}
						the_set.insert(new_elements.begin(), new_elements.end());
					};
				scoop_fixup_updater(data_refs_to_scoops);

				for(auto &r : getFileIR()->getRelocations())
				{
					// s2 just came into existence, didn't it?
					// assert(r->getWRT()!=s2);
					// yes, but there may be relocs pointing at the s2 part of 
					// a split object, and so the reloc might get updated to point to s2 instead.
					if( r->getWRT()==s2)
					{
						r->setWRT(s1);
						r->setAddend(r->getAddend()+s1->getSize());
					}
				}


				/*
				don't remove scoop here, as it will delete s2.  this bit is moved later.	
				*/
				// s2's end addresss is about to go away, so
				// update s1's end VO instead of using s2 end addr.
				s1->getEnd()->setVirtualOffset(s2->getEnd()->getVirtualOffset()); 
				moveable_scoops.erase(s2);		// remove it from our analysis
				unsigned int old_s1_size=s1->getContents().size();
				s1->setContents(s1->getContents()+s2->getContents());
				s1->setName(s1->getName()+" coalesced w/"+ s2->getName());
				if(!s2->isRelRo())
					s1->clearRelRo();
				s1->setRawPerms( s1->getRawPerms() | s2->getRawPerms());

				// we just created s2 in this pass, right?
				// no, s2 could be one of the sections from the orig binary that we've been asked to move
				// and it might have relocs for unpinning
				//assert(s2->getRelocations().size()==0); // assert no relocs that're part of s2.

				// add s2's relocs to s1.