Skip to content
Snippets Groups Projects
SMPFunction.cpp 205 KiB
Newer Older
	for (index = 0; index < this->IDom.size(); ++index) {
		pair<int, list<int> > DomTreeEntry;
		DomTreeEntry.first = this->IDom.at(index);
		DomTreeEntry.second.clear();
		this->DomTree.push_back(DomTreeEntry);
	}
	// Now, push the children onto the appropriate lists.
	for (index = 0; index < this->IDom.size(); ++index) {
		// E.g. if block 5 has block 3 as a parent, then we fetch the number 3
		//  using the expression this->DomTree.at(index).first, which was just
		//  initialized in the previous loop. Then we go to DomTree entry 3 and push
		//  the number 5 on its child list.
		int parent = this->DomTree.at(index).first;
clc5q's avatar
clc5q committed
		if (parent != (int) index) // block can dominate itself, but not in DomTree!
			this->DomTree.at(parent).second.push_back((int) index);
	}

	return;
} // end of SMPFunction::BuildDominatorTree()

// Helper for SSA subscript renumbering: return the next SSA number for the global name
//  and increment the SSACounter to prepare the next number. Push the returned number onto
//  the SSAStack for the global name.
int SMPFunction::SSANewNumber(size_t GlobNameIndex) {
	int Subscript = this->SSACounter.at(GlobNameIndex);
	++(this->SSACounter[GlobNameIndex]);
	this->SSAStack[GlobNameIndex].push_back(Subscript);
	return Subscript;
} // end of SMPFunction::SSANewNumber()

// Main helper for SSA subscript renumbering. Renumber within block throughout its phi
//  functions, then its DEFs and USEs, then its phi successors. Recurse then on all
//  successors in the dominator tree.
void SMPFunction::SSARename(int BlockNumber) {
	assert(0 <= BlockNumber);
	assert(BlockNumber < this->BlockCount);
	list<SMPBasicBlock>::iterator CurrBlock = this->RPOBlocks.at((size_t) BlockNumber);
	DumpFlag |=	(0 == strcmp("main", this->GetFuncName()));
	DumpFlag |= (0 == strcmp("dohanoi", this->GetFuncName()));
	DumpFlag |= (0 == strcmp("uw_frame_state_for", this->GetFuncName()));
	DumpFlag |= (0 == strcmp("_IO_sputbackc", this->GetFuncName()));
clc5q's avatar
clc5q committed
#endif

	if (DumpFlag) msg("Entered SSARename for block number %d\n", BlockNumber);

	// For each phi function at the top of the block, rename the DEF of the phi function
	//  using SSANewNumber() on the global name index.
	set<SMPPhiFunction, LessPhi>::iterator CurrPhi;
	list<SMPPhiFunction> TempPhiList;
	int GlobalNameIndex;
	for (CurrPhi = CurrBlock->GetFirstPhi(); CurrPhi != CurrBlock->GetLastPhi(); ++CurrPhi) {
		op_t PhiDefOp = CurrPhi->GetAnyOp();
		GlobalNameIndex = CurrPhi->GetIndex();
		assert(0 <= GlobalNameIndex);
		int NewSSANum = this->SSANewNumber((size_t) GlobalNameIndex);

		// Cannot change the C++ STL set item directly, as sets might become unordered.
		SMPPhiFunction TempPhi = (*CurrPhi);
		TempPhi.SetSSADef(NewSSANum);
		TempPhiList.push_back(TempPhi);
		if (o_reg == PhiDefOp.type) {
			if (DumpFlag && DefOp.is_reg(R_ax)) {
				msg("New EAX Phi Def SSANum: %d Block %d\n", NewSSANum, BlockNumber);
			}
			// Map the final SSA number to the block number.
			int DefHashValue = HashGlobalNameAndSSA(PhiDefOp, NewSSANum);
			pair<int, ea_t> DefMapEntry(DefHashValue, CurrBlock->GetNumber());
			pair<map<int, ea_t>::iterator, bool> MapReturnValue;
			MapReturnValue = this->GlobalDefAddrBySSA.insert(DefMapEntry);
			assert(MapReturnValue.second);
		}
	}
	// Go back through the Phi function set and replace the items that need to be updated.
	list<SMPPhiFunction>::iterator TempIter;
	for (TempIter = TempPhiList.begin(); TempIter != TempPhiList.end(); ++TempIter) {
		// Use the op_t from the first phi use, because they are all the same.
		bool Erased = CurrBlock->ErasePhi(TempIter->GetPhiRef(0).GetOp());
		assert(Erased);
		// Now we can add back the phi function that had the DEF SSA number changed.
		bool Added = CurrBlock->AddPhi(*TempIter);
		assert(Added);
	}
	TempPhiList.clear();
	if (DumpFlag) msg("Processed phi functions at top.\n");

	// For each instruction in the block, rename all global USEs and then all global DEFs.
	list<list<SMPInstr>::iterator>::iterator CurrInst;
	for (CurrInst = CurrBlock->GetFirstInstr(); CurrInst != CurrBlock->GetLastInstr(); ++CurrInst) {
		set<DefOrUse, LessDefUse>::iterator CurrUse = (*CurrInst)->GetFirstUse();
		ea_t InstAddr = (*CurrInst)->GetAddr(); // for debugging break points
		while (CurrUse != (*CurrInst)->GetLastUse()) {
			// See if Use is a global name.
			UseOp = CurrUse->GetOp();
			set<op_t, LessOp>::iterator GlobIter = this->GlobalNames.find(UseOp);
			if (GlobIter != this->GlobalNames.end()) { // found it
				unsigned int GlobIndex = ExtractGlobalIndex(*GlobIter);
				if (GlobIndex > this->SSAStack.size()) {
					// Get some debug info out to the log file before we crash.
					msg("Bad GlobIndex: %d at %x in %s\n", GlobIndex, InstAddr, this->GetFuncName());
					exit(EXIT_FAILURE);
				}
				// Set the SSA number for this use to the top of stack SSA # (back())
				int NewSSANum;
				if (this->SSAStack.at(GlobIndex).empty()) {
					// No top of stack entry to read.
#if SMP_DEBUG_UNINITIALIZED_SSA_NAMES
					if (!(*CurrInst)->MDIsPopInstr() && (o_reg == UseOp.type)) {
						// POP uses the stack offset and generates spurious
						//  uninitialized variable messages for [esp+0].
						msg("WARNING: function %s : Use of uninitialized variable: ",
							this->GetFuncName());
						msg(" Variable: ");
						PrintListOperand(*GlobIter);
						msg(" Block number: %d Address: %x Instruction: %s\n", BlockNumber,
							(*CurrInst)->GetAddr(), (*CurrInst)->GetDisasm());
					}
#endif
					NewSSANum = SMP_SSA_UNINIT;
				}
				else {
					NewSSANum = this->SSAStack.at(GlobIndex).back();
				}
				CurrUse = (*CurrInst)->SetUseSSA(UseOp, NewSSANum);
				if (DumpFlag && (o_reg == UseOp.type) && UseOp.is_reg(R_ax)) {
					msg("New EAX Use SSANum: %d at %x\n", NewSSANum, (*CurrInst)->GetAddr());
				}
		set<DefOrUse, LessDefUse>::iterator CurrDef = (*CurrInst)->GetFirstDef();
		while (CurrDef != (*CurrInst)->GetLastDef()) {
			// See if Def is a global name.
			DefOp = CurrDef->GetOp();
			set<op_t, LessOp>::iterator GlobIter = this->GlobalNames.find(DefOp);
			if (GlobIter != this->GlobalNames.end()) { // found it
				unsigned int GlobIndex = ExtractGlobalIndex(*GlobIter);
				// Set the SSA number for this DEF to the SSANewNumber top of stack
				int NewSSANum = this->SSANewNumber(GlobIndex);
				CurrDef = (*CurrInst)->SetDefSSA(DefOp, NewSSANum);
				if (o_reg == DefOp.type) {
					if (DumpFlag && DefOp.is_reg(R_ax)) {
						msg("New EAX Def SSANum: %d at %x\n", NewSSANum, DefAddr);
					}

					// Map the final SSA number to the DEF address.
					int DefHashValue = HashGlobalNameAndSSA(DefOp, NewSSANum);
					pair<int, ea_t> DefMapEntry(DefHashValue, DefAddr);
					pair<map<int, ea_t>::iterator, bool> MapReturnValue;
					MapReturnValue = this->GlobalDefAddrBySSA.insert(DefMapEntry);
					assert(MapReturnValue.second);
				}
		} //  end for all DEFs
	} // end for all instructions
	if (DumpFlag) msg("Processed all instructions.\n");

	// For all control flow graph (not dominator tree) successors, fill in the current
	//  (outgoing) SSA number in the corresponding USE slot in the phi function, for all
	//  global names appearing in phi functions.
	list<list<SMPBasicBlock>::iterator>::iterator SuccIter;
	for (SuccIter = CurrBlock->GetFirstSucc(); SuccIter != CurrBlock->GetLastSucc(); ++SuccIter) {
		// What position in the Preds list of this successor is CurrBlock?
		int ListPos = (*SuccIter)->GetPredPosition(BlockNumber);
		assert(0 <= ListPos);

		// Go through all phi functions in this successor. At ListPos position in the
		//  incoming arguments for that phi function, set the SSA number to the SSA number
		//  in the top of stack entry for the global name associated with that phi function.
		set<SMPPhiFunction, LessPhi>::iterator CurrPhi;
		for (CurrPhi = (*SuccIter)->GetFirstPhi(); CurrPhi != (*SuccIter)->GetLastPhi(); ++CurrPhi) {
			int GlobIndex = CurrPhi->GetIndex();
			int CurrSSA;
			if (this->SSAStack.at(GlobIndex).empty()) {
				// No top of stack entry to read.
#if SMP_DEBUG_UNINITIALIZED_SSA_NAMES
				msg("WARNING: function %s : Path to use of uninitialized variable: ",
					this->GetFuncName());
				msg(" Variable: ");
				PrintListOperand(CurrPhi->GetAnyOp());
				msg(" Block number: %d Successor block number: %d\n", BlockNumber,
					(*SuccIter)->GetNumber());
#endif
				CurrSSA = SMP_SSA_UNINIT;
			}
			else {
				CurrSSA = this->SSAStack.at(GlobIndex).back();  // fetch from top of stack
			}
			SMPPhiFunction TempPhi = (*CurrPhi);
			TempPhi.SetSSARef(ListPos, CurrSSA);
			TempPhiList.push_back(TempPhi);
			if (DumpFlag && (BlockNumber >= 3) && (BlockNumber <= 4)) {
				msg("BlockNumber: %d  ListPos: %d\n", BlockNumber, ListPos);
			}
		} // end for all phi functions in successor
		// Go back through the Phi function set and replace the items that need to be updated.
		for (TempIter = TempPhiList.begin(); TempIter != TempPhiList.end(); ++TempIter) {
			if (DumpFlag && (BlockNumber >= 3) && (BlockNumber <= 4)) {
				msg("Special before phi dump:\n");
				set<SMPPhiFunction, LessPhi>::iterator FoundPhi;
				FoundPhi = (*SuccIter)->FindPhi(TempIter->GetAnyOp());
				FoundPhi->Dump();
			}
			// Use the op_t from the first phi use, because they are all the same.
			bool Erased = (*SuccIter)->ErasePhi(TempIter->GetPhiRef(0).GetOp());
			assert(Erased);
			// Now we can add back the phi function that had one SSA number changed.
			bool Added = (*SuccIter)->AddPhi(*TempIter);
			assert(Added);
			if (DumpFlag && (BlockNumber >= 3) && (BlockNumber <= 4)) {
				msg("Special after phi dump:\n");
				set<SMPPhiFunction, LessPhi>::iterator FoundPhi;
				FoundPhi = (*SuccIter)->FindPhi(TempIter->GetAnyOp());
				FoundPhi->Dump();
			}
		}
		TempPhiList.clear();
	} // end for all successors of CurrBlock
	if (DumpFlag) msg("Processed successor phi functions.\n");

	// For each successor in the dominator tree, recurse.
	list<int>::iterator ChildIter;
	for (ChildIter = this->DomTree[BlockNumber].second.begin();
		ChildIter != this->DomTree[BlockNumber].second.end();
		++ChildIter) {
			this->SSARename(*ChildIter);
	}
	if (DumpFlag) msg("Finished recursion.\n");

	// Pop off all SSAStack entries pushed during this block. I.e. for each global name,
	//  pop its SSAStack once per DEF and once per phi function in this block.
	for (CurrPhi = CurrBlock->GetFirstPhi(); CurrPhi != CurrBlock->GetLastPhi(); ++CurrPhi) {
		GlobalNameIndex = CurrPhi->GetIndex();
		this->SSAStack.at((size_t) GlobalNameIndex).pop_back();
	}
	if (DumpFlag) msg("Popped off entries due to phi functions.\n");
	for (CurrInst = CurrBlock->GetFirstInstr(); CurrInst != CurrBlock->GetLastInstr(); ++CurrInst) {
		set<DefOrUse, LessDefUse>::iterator CurrDef;
		for (CurrDef = (*CurrInst)->GetFirstDef(); CurrDef !=(*CurrInst)->GetLastDef(); ++CurrDef) {
			// See if DEF is a global name.
			set<op_t, LessOp>::iterator GlobIter = this->GlobalNames.find(CurrDef->GetOp());
			if (GlobIter != this->GlobalNames.end()) { // found it
				unsigned int GlobIndex = ExtractGlobalIndex(*GlobIter);
				this->SSAStack.at((size_t) GlobIndex).pop_back();
			}
		} //  end for all DEFs
	} // end for all instructions
	if (DumpFlag) { 
		msg("Popped off entries due to instructions.\n");
	}

	return;
} // end of SMPFunction::SSARename()

// Main driver of SSA subscript renumbering.
void SMPFunction::SSARenumber(void) {
clc5q's avatar
clc5q committed
#if 0
	DumpFlag |= (0 == strcmp("_IO_sputbackc", this->GetFuncName()));
#endif

	if (0 >= this->GlobalNames.size())
		return;  // no names to renumber

	// Initialize stacks and counters of SSA numbers.
	size_t GlobIndex;
	assert(0 == this->SSACounter.size());
	for (GlobIndex = 0; GlobIndex < this->GlobalNames.size(); ++GlobIndex) {
		list<int> DummyList;
		this->SSACounter.push_back(0);
		this->SSAStack.push_back(DummyList);
	}
	// Recurse through the dominator tree starting with node 0.
	this->SSARename(0);
} // end of SMPFunction::SSARenumber()

// Main driver for the type inference system.
void SMPFunction::InferTypes(bool FirstIter) {
	// The type inference system is an iteration over four analysis steps, until
	//  a fixed point is reached:
	// 1) Within an instruction, set types of operators based on the operator type,
	//     the operand types, and the instruction type category, and propagate the
	//     type of the SMP_ASSIGN operator to its DEF.
	// 2) Propagate the type of a DEF along its SSA chain to all USEs of that SSA name.
	// 3) If all USEs of an SSA name have the same type, but the DEF has no type,
clc5q's avatar
clc5q committed
	//     then infer that the DEF must have the same type.
	// 4) If all references to a memory location have the same type, mark that memory
	//     location as having that type, if no aliasing occurs.
	//
	// The type inference system will mark DEFs and USEs in each instruction's DEF and USE
	//  sets with an inferred type. This inference on USEs is not conclusive for other USEs
	//  outside of that instruction. For example, a pointer could be read in from memory
	//  and used as a pointer, then hashed using an arithmetic operation. If the arithmetic
	//  operation always treats its source operands as NUMERIC and produces a NUMERIC
	//  result, e.g. SMP_BITWISE_XOR, then the USE of that pointer is NUMERIC within
	//  this xor instruction. If the DEF at the beginning of the SSA chain for the pointer
	//  is eventually marked as POINTER, then all USEs in the chain will be marked POINTER
	//  as well (see step 2 above). This inconsistency along the USE chain is perfectly
	//  acceptable in our type system. It is important to mark the USEs according to how
	//  we observe them being used, because consistent USEs will propagate back up to
	//  the DEF in step 3 above.

	bool changed;
clc5q's avatar
clc5q committed
	bool NewChange = false;
	bool DebugFlag = false;
#if SMP_DEBUG_TYPE_INFERENCE
	DebugFlag |= (0 == strcmp("__libc_csu_init", this->GetFuncName()));
	list<SMPInstr>::iterator CurrInst;
clc5q's avatar
clc5q committed
	set<DefOrUse, LessDefUse>::iterator CurrDef;
	set<DefOrUse, LessDefUse>::iterator NextDef;
	list<SMPBasicBlock>::iterator CurrBlock;
	if (DebugFlag) {
		this->Dump();
	}
	// One time only: Set the types of immediate values, flags register, stack and frame
	//  pointers, and floating point registers.
	if (FirstIter) {
		for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) {
			if (DebugFlag) {
				msg("SetImmedTypes for inst at %x: %s\n", CurrInst->GetAddr(), CurrInst->GetDisasm());
			}
			CurrInst->SetImmedTypes(this->UseFP);
			// Infer signedness, bit width, and other info from the nature of the instruction
			//  (e.g. loads from stack locations whose signedness has been inferred earlier
			//  in FindOutGoingArgSize(), or inherently signed arithmetic opcodes like signed
			//  or unsigned multiplies and divides).
			CurrInst->MDSetWidthSignInfo(this->UseFP);
		}
		// Check for signedness inferences from conditional branches at the end of blocks.
		for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) {
			CurrBlock->MarkBranchSignedness();
	// Iterate until no more changes: set types in DEF and USE lists based on RTL
clc5q's avatar
clc5q committed
	//  operators and the instruction category, SSA DEF-USE chains, etc.
		do {
			changed = false;
			// Step one: Infer types within instructions, context free.
			// Step two, propagating DEF types to all USEs, happens within step one
			//  whenever a DEF type is set for the first time.
			for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) {
				if (DebugFlag) msg("Inferring types for %s\n", CurrInst->GetDisasm());
				NewChange = CurrInst->InferTypes();
				changed = (changed || NewChange);
			}
		} while (changed);
clc5q's avatar
clc5q committed
		if (DebugFlag) msg("Finished type inference steps 1 and 2.\n");
		// Step three: If all USEs of an SSA name have the same type, but the DEF has no
		//  type, then infer that the DEF must have the same type.
		this->TypedDefs = 0;
		this->UntypedDefs = 0;
		this->TypedPhiDefs = 0;
		this->UntypedPhiDefs = 0;
clc5q's avatar
clc5q committed
		for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) {
			// Find any DEF that still has type UNINIT.
			CurrDef = CurrInst->GetFirstDef();
			while (CurrDef != CurrInst->GetLastDef()) {
				// Set erase() and insert() are needed to change types of DEFs, so
				//  get hold of the next iterator value now.
				NextDef = CurrDef;
				++NextDef;
				if (UNINIT != CurrDef->GetType()) {
					++(this->TypedDefs);
				}
				else {
clc5q's avatar
clc5q committed
					op_t DefOp = CurrDef->GetOp();
					bool MemDef = (DefOp.type != o_reg);
					bool AliasedMemWrite = (MemDef && CurrDef->HasIndirectWrite());
					++(this->UntypedDefs);
					if (MDIsIndirectMemoryOpnd(DefOp, this->UseFP)  // relax this?
						|| (o_mem == DefOp.type)
						// Don't want to infer along DEF-USE chains for indirect
						//  memory accesses until we have alias analysis.
						++CurrDef;
						continue;
					}
clc5q's avatar
clc5q committed
					ea_t DefAddr = CurrInst->GetAddr();
					// Call inference method based on whether it is a block-local
					//  name or a global name.
					if (CurrInst->GetBlock()->IsLocalName(DefOp)) {
						set<op_t, LessOp>::iterator NameIter;
						NameIter = CurrInst->GetBlock()->FindLocalName(DefOp);
						assert(CurrInst->GetBlock()->GetLastLocalName() != NameIter);
						unsigned int LocIndex = ExtractGlobalIndex(*NameIter);
						NewChange = CurrInst->GetBlock()->InferLocalDefType(DefOp, LocIndex, DefAddr);
						if (NewChange) {
							--(this->UntypedDefs);
							++(this->TypedDefs);
						}
clc5q's avatar
clc5q committed
						changed = (changed || NewChange);
					}
					else {
						// global name
						bool CallInst = ((CALL == CurrInst->GetDataFlowType())
							|| (INDIR_CALL == CurrInst->GetDataFlowType()));
						SMPOperandType DefType = UNINIT;
						DefType = this->InferGlobalDefType(DefOp,
							CurrDef->GetSSANum(), CurrInst->GetBlock(), CallInst);
						if (IsNotEqType(UNINIT, DefType)) {
							CurrDef = CurrInst->SetDefType(DefOp, DefType);
							--(this->UntypedDefs);
							++(this->TypedDefs);
							NewChange = true;
clc5q's avatar
clc5q committed
						changed = (changed || NewChange);
					} // end if local name ... else ...
				} // end if (UNINIT != CurrDef->GetType()) .. else ...
clc5q's avatar
clc5q committed
				CurrDef = NextDef;
			} // end while all DEFs in the DEF set
		} // end for all instructions
		if (DebugFlag) msg("Finished type inference step 3.\n");
		if (!changed) { // Check for Phi function DEFs that are still UNINIT
			for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) {
				changed |= CurrBlock->InferAllPhiDefTypes();
			}
		}
		if (DebugFlag) msg("Finished unconditional phi type inference.\n");

#if SMP_CONDITIONAL_TYPE_PROPAGATION
		if (!changed) { // Try conditional type propagation
			changed |= this->ConditionalTypePropagation();
			if (DebugFlag)
				msg("changed = %d after conditional type propagation.\n", changed);
		}
#endif

	} while (changed);
	// With type inference finished, infer signedness from the types, e.g.
	//  POINTER and CODEPOINTER types must be UNSIGNED.
	if (FirstIter) { // Don't want profiler-dependent signedness in the system yet.
		for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) {
			CurrInst->InferSignednessFromSMPTypes(this->UsesFramePointer());
		}
	}

	// Record the meet of all register types that reach RETURN instructions.
	this->MDFindReturnTypes();
	return;
} // end of SMPFunction::InferTypes()
// determine signedness and width info for all operands
void SMPFunction::InferFGInfo(void) {
	bool changed, NewChange;
	unsigned short IterCount = 0;
	list<SMPInstr>::iterator CurrInst;
	list<SMPBasicBlock>::iterator CurrBlock;

	do {
		changed = false;
		++IterCount;
		for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) {
			NewChange = CurrInst->InferFGInfo(IterCount);
			changed = (changed || NewChange);
		}
		if (changed) {
			for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) {
				CurrBlock->PropagatePhiFGInfo();
			}
		}
	} while (changed);
	return;
} // end of SMPFunction::InferFGInfo()

// Apply the profiler information to this function once we've inferred everything we can about it.
void SMPFunction::ApplyProfilerInformation(ProfilerInformation* pi)
{
	assert(pi);

	// If no profiler annotations are available, save time.
	if (0 == pi->GetProfilerAnnotationCount())
		return;

	SetIsSpeculative(true);	

	list<SMPInstr>::iterator CurrInst;
	set<DefOrUse, LessDefUse>::iterator CurrDef, NextDef;
	
	bool DebugFlag = false;
clc5q's avatar
clc5q committed
#if SMP_DEBUG_PROFILED_TYPE_INFERENCE
	DebugFlag |= (0 == strcmp("dohanoi", this->GetFuncName()));
#endif

	// for each instruction in this function 
clc5q's avatar
clc5q committed
	for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) {
		// lookup whether a load at this instruction was profiled as always numeric 
		InstructionInformation* ii = pi->GetInfo( CurrInst->GetAddr());
clc5q's avatar
clc5q committed
		if (ii && DebugFlag)
			msg("Found instruction information for %x\n", CurrInst->GetAddr());
clc5q's avatar
clc5q committed
		if (ii && ii->isNumeric()) {
#if SMP_DEBUG_PROFILED_TYPE_INFERENCE
			msg("Found instruction information for %x and it's numeric!\n", CurrInst->GetAddr());
clc5q's avatar
clc5q committed
#endif
			CurrInst->UpdateMemLoadTypes((SMPOperandType)(NUMERIC|PROF_BASE));
		}

		// lookup whether this instruction has been profiled as an indirect call
clc5q's avatar
clc5q committed
		set<ea_t> indirect_call_targets = pi->GetIndirectCallTargets(CurrInst->GetAddr());
clc5q's avatar
clc5q committed
		for (set<ea_t>::iterator ict_iter = indirect_call_targets.begin();
			ict_iter != indirect_call_targets.end();
			++ict_iter)
clc5q's avatar
clc5q committed
			ea_t target = *ict_iter;
			if (vector_exists(target, IndirectCallTargets))
				IndirectCallTargets.push_back(target);
			if (vector_exists(target, AllCallTargets))
				AllCallTargets.push_back(target);

		}

}	// end of SMPFunction::ApplyProfilerInformation

clc5q's avatar
clc5q committed
// For the UNINIT type DEF DefOp, see if all its USEs have
//  a single type. If so, set the DEF to that type and return type,
//  else return UNINIT.
SMPOperandType SMPFunction::InferGlobalDefType(op_t DefOp, int SSANum, SMPBasicBlock *DefBlock, bool CallInst) {
	bool DebugFlag = false;
	bool FoundNumeric = false;
	bool FoundPointer = false;
	bool FoundUnknown = false;
	bool FoundUninit = false;

#if SMP_DEBUG_TYPE_INFERENCE
	DebugFlag |= (0 == strcmp("mem_init", this->GetFuncName()));
clc5q's avatar
clc5q committed

	if (DebugFlag) {
		msg("InferGlobalDefType for SSANum %d of ", SSANum);
		PrintOperand(DefOp);
		msg("\n");
	}

clc5q's avatar
clc5q committed
	list<list<SMPInstr>::iterator>::iterator InstIter;
	list<SMPBasicBlock>::iterator BlockIter;
clc5q's avatar
clc5q committed

	assert(0 <= SSANum);
	set<DefOrUse, LessDefUse>::iterator CurrUse;
	// Go through all instructions in the function and find the instructions
clc5q's avatar
clc5q committed
	//  that have USEs of DefOp with SSANum. If all USEs in the chain have
	//  a single type (other than UNINIT), change the DEF type to match the
	//  USE type and set changed to true.
clc5q's avatar
clc5q committed
	set<op_t, LessOp>::iterator NameIter = this->FindGlobalName(DefOp);
	unsigned int NameIndex = ExtractGlobalIndex(*NameIter);
clc5q's avatar
clc5q committed
	bool FirstUseSeen = false;
	SMPOperandType UseType = UNINIT;
	SMPOperandType PtrType = UNINIT;
clc5q's avatar
clc5q committed
	for (BlockIter = this->Blocks.begin(); BlockIter != this->Blocks.end(); ++BlockIter) {
		if (!(BlockIter->IsGlobalReferenced(NameIndex)))
			continue;
		// Found a block that at least has references to the global name, although we don't
		//  know if it has references to the particular SSANum we are looking for. We can
		//  look at each instruction in the block.
		for (InstIter = BlockIter->GetFirstInstr(); InstIter != BlockIter->GetLastInstr(); ++InstIter) {
			CurrUse = (*InstIter)->FindUse(DefOp);
			if (CurrUse != (*InstIter)->GetLastUse()) { // found a USE of DefOp
				if (CurrUse->GetSSANum() == SSANum) { // matched SSA number
					if (!FirstUseSeen) {
						FirstUseSeen = true;
					}
					UseType = CurrUse->GetType();
					FoundNumeric |= (IsNumeric(UseType));
					FoundUnknown |= (IsUnknown(UseType));
					FoundUninit |= (IsEqType(UNINIT, UseType));
					if (IsDataPtr(UseType)) {
						if (FoundPointer) {
							if (IsNotEqType(PtrType, UseType)) {
clc5q's avatar
clc5q committed
#if SMP_DEBUG_TYPE_INFERENCE
clc5q's avatar
clc5q committed
								msg("WARNING: Differing ptr types in global chain:");
								msg(" Prev: %d Current: %d %s\n", PtrType, UseType,
									(*InstIter)->GetDisasm());
clc5q's avatar
clc5q committed
#endif
clc5q's avatar
clc5q committed
								PtrType = POINTER;
							}
						}
						else {
							FoundPointer = true;
							PtrType = UseType;
clc5q's avatar
clc5q committed
				} // end if matched SSA #
			} // end if found a USE of DefOp
		} // end for all instructions
	} // end for all blocks
clc5q's avatar
clc5q committed

	// Now, we need to check the phi functions and see if there are Phi USEs of the DefOp.
	set<SMPPhiFunction, LessPhi>::iterator UsePhi;
	for (BlockNum = 0; BlockNum < (size_t) this->BlockCount; ++BlockNum) {
		UsePhi = this->RPOBlocks.at(BlockNum)->FindPhi(DefOp);
		if (UsePhi != this->RPOBlocks.at(BlockNum)->GetLastPhi()) {
			// Found phi function for DefOp. See if we can find a USE
			//  with USE SSANum corresponding to our DEF SSANum.
			for (size_t PhiIndex = 0; PhiIndex < UsePhi->GetPhiListSize(); ++PhiIndex) {
				if (UsePhi->GetUseSSANum(PhiIndex) == SSANum) {
					// We have a Phi USE that matches our DEF.
					if (!FirstUseSeen) {
						FirstUseSeen = true;
					UseType = UsePhi->GetUseType(PhiIndex);
					FoundNumeric |= (IsNumeric(UseType));
					FoundUnknown |= (IsUnknown(UseType));
					FoundUninit |= (IsEqType(UNINIT, UseType));
					if (IsDataPtr(UseType)) {
						if (FoundPointer) {
							if (IsNotEqType(PtrType, UseType)) {
clc5q's avatar
clc5q committed
								;
#if SMP_DEBUG_TYPE_INFERENCE
								msg("WARNING: Differing ptr types in global chain at Phi:");
								msg(" Prev: %d Current: %d BlockNum: %d\n",
									PtrType, UseType, BlockNum);
clc5q's avatar
clc5q committed
#endif
							}
							PtrType = POINTER;
						}
						else {
							FoundPointer = true;
							PtrType = UseType;
						}
				} // end if matched SSA #
			} // end for all Phi USEs
		} // end if found matching Phi function for DefOp
	} // end for all block numbers in the function
	if (FirstUseSeen) {
		// Do we have a consistent type?
		// If we see any definite POINTER uses, we must set the DEF
		//  to type POINTER or a refinement of it.
		if (FoundPointer)
			UseType = PtrType;
		else if (FoundNumeric && !FoundUninit && !FoundUnknown)
			UseType = NUMERIC;
		else
			return UNINIT; // no POINTER, but no consistent type
clc5q's avatar
clc5q committed
		assert(UNINIT != UseType);
		if (DebugFlag) msg("Inferring global DEF of type %d\n", UseType);
		return UseType;
clc5q's avatar
clc5q committed
	}
	else { // not FirstUseSeen
		// If the block returns, then the DEFs could be used in the caller.
		if (!(DefBlock->HasReturn())) {
			UseType = UNINIT;
			// We probably want to set the DEF type to NUMERIC if there are no uses.
			//  Have to check these cases out manually in the *.asm first.  **!!**
clc5q's avatar
clc5q committed
			//  If they are memory DEFs, we cannot optimize, so we might want to see
			//  if we can find a reg DEF with no USEs here. We also want to exclude
			//  warning messages for the caller-saved reg DEFs generated for CALLs.
			if ((o_reg == DefOp.type) && (!CallInst)) {
				;
#if SMP_WARN_UNUSED_DEFS
				msg("WARNING: global DEF with no USEs for SSANum %d DefOp: ",
					SSANum);
clc5q's avatar
clc5q committed
				PrintOperand(DefOp);
				msg("\n");
clc5q's avatar
clc5q committed
			}
clc5q's avatar
clc5q committed
	}
	return UseType;
clc5q's avatar
clc5q committed
} // end of SMPFunction::InferGlobalDefType()

#define SMP_SIMPLE_CONDITIONAL_TYPE_PROPAGATION 1
#if SMP_SIMPLE_CONDITIONAL_TYPE_PROPAGATION
// The simple form of conditional type propagation observes that we
//  simply need to apply the meet operator over Phi function USEs and
//  then propagate any DEF type changes using PropagateGlobalDefType().
//  The outermost iteration over all type inference methods in InferTypes()
//  will take care of all the propagation that is handled by the work list
//  processing in the textbook algorithm.
// Iteration convergence might be slower in the simple approach, but the code
//  is much simpler to debug.
bool SMPFunction::ConditionalTypePropagation(void) {
	bool changed = false;
	list<SMPBasicBlock>::iterator CurrBlock;
	vector<list<SMPBasicBlock>::iterator>::iterator CurrRPO;
	set<SMPPhiFunction, LessPhi>::iterator CurrPhi;

	for (CurrRPO = this->RPOBlocks.begin(); CurrRPO != this->RPOBlocks.end(); ++CurrRPO) {
		CurrBlock = *CurrRPO;
		SMPOperandType MeetType;
		for (CurrPhi = CurrBlock->GetFirstPhi(); CurrPhi != CurrBlock->GetLastPhi(); ++CurrPhi) {
			MeetType = CurrPhi->ConditionalMeetType();
			// Here we use a straight equality test, not our macros,
			//  because we consider it a change if the MeetType is
			//  profiler derived and the DEFType is not.
			if (MeetType == CurrPhi->GetDefType())
				continue;
			// Change the DEF type to the MeetType and propagate.
			op_t DefOp = CurrPhi->GetAnyOp();
			bool IsMemOp = (o_reg != DefOp.type);
			CurrPhi = CurrBlock->SetPhiDefType(DefOp, MeetType);
			changed = true;
			this->ResetProcessedBlocks();
			changed |= CurrBlock->PropagateGlobalDefType(DefOp,
				MeetType, CurrPhi->GetDefSSANum(), IsMemOp);
		} // end for all phi functions in the current block
	} // end for all blocks

	return changed;
} // end of SMPFunction::ConditionalTypePropagation()

#else  // not SMP_SIMPLE_CONDITIONAL_TYPE_PROPAGATION

// Apply the SCC (Sparse Conditional Constant) propagation algorithm to
//  propagate types starting from unresolved Phi DEFs.
bool SMPFunction::ConditionalTypePropagation(void) {
	bool changed = false;

	// Collections of Phi functions and instructions that have a DEF
	//  with type UNINIT for the current global name.
	map<int, set<SMPPhiFunction, LessPhi>::iterator> UninitDEFPhis;
	vector<list<SMPInstr>::iterator> UninitDEFInsts;

	// Work lists of Phi functions and instructions that need to be processed
	//  according to the SCC algorithm.
	list<map<int, set<SMPPhiFunction, LessPhi>::iterator>::iterator> PhiWorkList;
	list<vector<list<SMPInstr>::iterator>::iterator> InstWorkList;

	// Iterate through all global names that are either (1) registers
	//  or (2) stack locations in SAFE functions.
	set<op_t, LessOp>::iterator CurrGlob;
	for (CurrGlob = this->GetFirstGlobalName(); CurrGlob != this->GetLastGlobalName(); ++CurrGlob) {
		op_t GlobalOp = *CurrGlob;
		list<SMPBasicBlock>::iterator CurrBlock;
		vector<list<SMPBasicBlock>::iterator>::iterator CurrRPO;
		if (MDIsIndirectMemoryOpnd(GlobalOp, this->UseFP))
			continue; // need alias analysis to process indirect accesses
		if ((GlobalOp.type != o_reg)
			&& (!((this->ReturnAddrStatus == FUNC_SAFE) && MDIsStackAccessOpnd(GlobalOp, this->UseFP))))
			continue; // not register, not safe stack access

		// Set up a map (indexed by SSANum) of iterators to Phi functions
		//  for the current global name that have UNINIT as the Phi DEF type.
		UninitDEFPhis.clear();
		UninitDEFInsts.clear();
		for (CurrRPO = this->RPOBlocks.begin(); CurrRPO != this->RPOBlocks.end(); ++CurrRPO) {
			CurrBlock = *CurrRPO;
			set<SMPPhiFunction, LessPhi>::iterator CurrPhi;
			CurrPhi = CurrBlock->FindPhi(GlobalOp);
			if (CurrPhi != CurrBlock->GetLastPhi()) {
				// Found Phi function for current global name.
				if (IsEqType(CurrPhi->GetDefType(), UNINIT)) {
					// Phi DEF is UNINIT; add Phi to the map.
					pair<int, set<SMPPhiFunction, LessPhi>::iterator> TempPair(CurrPhi->GetDefSSANum(), CurrPhi);
					bool Inserted = false;
					map<int, set<SMPPhiFunction, LessPhi>::iterator>::iterator WhereIns;
					pair<map<int, set<SMPPhiFunction, LessPhi>::iterator>::iterator, bool> Result(WhereIns, Inserted);
					Result = UninitDEFPhis.insert(TempPair);
					assert(Result.second == true);
				}
			}
		} // end for all blocks

		// If any Phi DEF had UNINIT as its type, set up a vector of
		//  iterators to instructions that have UNINIT as the DEF type
		//  for the current global name.
		if (UninitDEFPhis.empty())
			continue;
		list<SMPInstr>::iterator CurrInst;
		for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) {
			set<DefOrUse, LessDefUse>::iterator CurrDef = CurrInst->FindDef(GlobalOp);
			if (CurrDef != CurrInst->GetLastDef()) {
				// Found DEF of current global name.
				if (IsEqType(UNINIT, CurrDef->GetType())) {
					UninitDEFInsts.push_back(CurrInst);
				}
			}
		} // end for all instructions

		// Put all UNINIT Phi DEFs that have at least one USE
		//  that is not UNINIT onto the PhiWorkList.
		map<int, set<SMPPhiFunction, LessPhi>::iterator>::iterator CurrUnPhi;
		for (CurrUnPhi = UninitDEFPhis.begin(); CurrUnPhi != UninitDEFPhis.end(); ++CurrUnPhi) {
			pair<int, set<SMPPhiFunction, LessPhi>::iterator> PhiDefPair(*CurrUnPhi);
			if (PhiDefPair.second->HasTypedUses()) {
				PhiWorkList.push_back(CurrUnPhi);
			}
		}

		// Iterate until both work lists are empty:
		while (!(PhiWorkList.empty() && InstWorkList.empty())) {
			// Process Phi items first.
			while (!PhiWorkList.empty()) {
				// If applying the meet operator over the Phi USE types
				//  would produce a new DEF type, change the DEF type and
				//  propagate it, adding Phi functions and instructions that
				//  received the propagated type to their respective work lists.
				map<int, set<SMPPhiFunction, LessPhi>::iterator>::iterator MapIter;
				MapIter = PhiWorkList.front();
				PhiWorkList.pop_front();  // remove from work list
				pair<int, set<SMPPhiFunction, LessPhi>::iterator> PhiDefPair;
				PhiDefPair.first = MapIter->first;
				PhiDefPair.second = MapIter->second;
				set<SMPPhiFunction, LessPhi>::iterator CurrPhi = PhiDefPair.second;
				SMPOperandType MeetType = CurrPhi->ConditionalMeetType();
				// Here we use a straight equality test, not our macros,
				//  because we consider it a change if the MeetType is
				//  profiler derived and the DEFType is not.
				if (MeetType == CurrPhi->GetDefType())
					continue;
				// At this point, we need to set the DEFType to the MeetType
				//  and propagate the change. We have a map of all the
				//  critical Phi functions for this global name, as well
				//  as a vector of the relevant instructions for this name.
				CurrPhi->SetDefType(MeetType);
				changed = true;
				int DefSSANum = CurrPhi->GetDefSSANum();
				map<int, set<SMPPhiFunction, LessPhi>::iterator>::iterator PhiIter;
				vector<list<SMPInstr>::iterator>::iterator InstIter;
				// Propagate to Phi functions first.
				for (PhiIter = UninitDEFPhis.begin(); PhiIter != UninitDEFPhis.end(); ++PhiIter) {
					if (DefSSANum == PhiIter->first)
						continue;  // Skip the Phi that we just changed
					for (size_t index = 0; index < PhiIter->second->GetPhiListSize(); ++index) {
						if (DefSSANum == PhiIter->second->GetUseSSANum(index)) {
							// Matched SSA # to USE. Propagate new type.
							PhiIter->second->SetRefType(index, MeetType);
							// Add this phi function to the work list.
							PhiWorkList.push_back(PhiIter);
						}
					}
				}
#define SMP_COND_TYPE_PROP_TO_INSTS 0
#if SMP_COND_TYPE_PROP_TO_INSTS
				// Propagate to instructions with uninit DEFs of global name.
				//  The idea is that the instructions that hold up type propagation
				//  are the ones that USE and then DEF the same global name.
				//  For example, "increment EAX" has to know the type of
				//  the USE of EAX in order to set the type of the DEF.
#endif
			} // end while the PhiWorkList is not empty
#if SMP_COND_TYPE_PROP_TO_INSTS
			// The PhiWorkList is empty at this point, so process
			//  instructions on the InstWorkList.
#endif
		} // end while both work lists are not empty

	} // end for all global names
	return changed;
} // end of SMPFunction::ConditionalTypePropagation()
#endif  // end if SMP_SIMPLE_CONDITIONAL_TYPE_PROPAGATION else ...

// Emit all annotations for the function, including all per-instruction
//  annotations.
void SMPFunction::EmitAnnotations(FILE *AnnotFile, FILE *InfoAnnotFile) {
	// Emit annotation for the function as a whole.
	bool FuncHasProblems = ((!this->AnalyzedSP) || (!this->HasGoodRTLs()) || (this->HasUnresolvedIndirectCalls())
		|| (this->HasUnresolvedIndirectJumps()) || (this->HasSharedChunks()));

	if (this->StaticFunc) {
		qfprintf(AnnotFile,	"%10x %6u FUNC LOCAL  %s ", this->FuncInfo.startEA,
			this->Size, this->GetFuncName());
		qfprintf(AnnotFile,	"%10x %6u FUNC GLOBAL %s ", this->FuncInfo.startEA,
			this->Size, this->GetFuncName());
	switch (this->ReturnAddrStatus)
	{
		case FUNC_UNKNOWN:
		{
			qfprintf(AnnotFile, "FUNC_UNKNOWN ");
			break;
		}
		case FUNC_SAFE:
		{
			qfprintf(AnnotFile, "FUNC_SAFE ");
			break;
		}
		case FUNC_UNSAFE:
		{
			qfprintf(AnnotFile, "FUNC_UNSAFE ");
			break;
		}
		default:
			assert(0);	
	}
	if (this->UseFP) {
		qfprintf(AnnotFile, "USEFP ");
	}
	else {
		qfprintf(AnnotFile, "NOFP ");
	}
	if (this->FuncInfo.does_return()) {
		qfprintf(AnnotFile, "RET ");
		qfprintf(AnnotFile, "NORET ");
	if (this->IsLeaf())
		qfprintf(AnnotFile, "FUNC_LEAF ");
	// store the return address
	qfprintf(AnnotFile,"%10x ", this->FuncInfo.endEA - 1);

	if (this->IsLibFunc())
		qfprintf(AnnotFile, "LIBRARY ");
	qfprintf(AnnotFile, "\n");
jdh8d's avatar
jdh8d committed
	// Emit annotations about how to restore register values
	qfprintf(AnnotFile, "%10x %6d FUNC FRAMERESTORE ", this->FuncInfo.startEA, 0);
jdh8d's avatar
jdh8d committed
	for(int i=R_ax; i<=R_di; i++)
	{
		qfprintf(AnnotFile, "%d %d %d ", i, this->SavedRegLoc[i], this->ReturnRegTypes[i]);
	}
	qfprintf(AnnotFile, "ZZ\n");

	qfprintf(AnnotFile, "%10x %6d FUNC MMSAFENESS ", this->FuncInfo.startEA, 0);
clc5q's avatar
clc5q committed
	if (!IsSpecSafe())
		qfprintf(AnnotFile, "UNSAFE\n");
clc5q's avatar
clc5q committed
	else if (!IsSafe())
		qfprintf(AnnotFile, "SPECSAFE\n");
clc5q's avatar
clc5q committed
	else {
		assert(IsSafe());
		qfprintf(AnnotFile, "SAFE\n");
	}

	// If function has problems that limited our analyses, emit an information annotation so that
	//  other tools can be aware of which analyses will be sound.
	if (FuncHasProblems) {
		qfprintf(InfoAnnotFile,	"%10x %6u FUNC PROBLEM %s ", this->FuncInfo.startEA,
			this->Size, this->GetFuncName());
		if (!this->AnalyzedSP) {
			qfprintf(InfoAnnotFile, "STACKANALYSIS ");
		}
		if (this->HasSharedChunks()) {
			qfprintf(InfoAnnotFile, "CHUNKS ");
		}
		if (this->HasUnresolvedIndirectJumps()) {
			qfprintf(InfoAnnotFile, "JUMPUNRESOLVED ");
		}
		if (this->HasUnresolvedIndirectCalls()) {
			qfprintf(InfoAnnotFile, "CALLUNRESOLVED ");
		}
		if (!this->HasGoodRTLs()) {
			qfprintf(InfoAnnotFile, "BADRTLS ");
		}
		qfprintf(InfoAnnotFile, "\n");
	}

	// Loop through all instructions in the function.
	// Output optimization annotations for those
	//  instructions that do not require full computation
	//  of their memory metadata by the Memory Monitor SDT.
	list<SMPInstr>::iterator CurrInst = Instrs.begin();
	++CurrInst;  // skip marker instruction
	bool AllocSeen = false; // Reached LocalVarsAllocInstr yet?
	bool DeallocTrigger = false;
	for ( ; CurrInst != Instrs.end(); ++CurrInst) {
		ea_t addr = CurrInst->GetAddr();
		qfprintf(AnnotFile, "%10x %6d INSTR BELONGTO %x \n", addr, 0, GetStartAddr());
		if (this->LocalVarsAllocInstr == addr) {
			AllocSeen = true;
			if (this->NeedsStackReferent)
				this->EmitStackFrameAnnotations(AnnotFile, CurrInst);
				int OptType = CurrInst->GetOptType(); 
				if (5 == OptType) { // ADD or SUB
					// Prevent mmStrata from extending the caller's stack frame
					//  to include the new allocation.
					qfprintf(AnnotFile, "%10x %6d INSTR LOCAL SafeFrameAlloc %s \n",
						addr, -1, CurrInst->GetDisasm());
				}
				else if (CurrInst->MDIsPushInstr()) {
					qfprintf(AnnotFile, "%10x %6d INSTR LOCAL NoWarn %s \n",
						addr, -3, CurrInst->GetDisasm());
				}
				// mmStrata ignores the DATAREF annotations anyway, so even though
				//  they are not needed, emit them for use by Strata and other tools
				//  in other projects besides MEDS.