Skip to content
Snippets Groups Projects
SMPInstr.cpp 293 KiB
Newer Older
				break;  // treat as category 0
			}
#endif
			if (SecondSrcOperandImmNum 
				&& !this->MDIsFrameAllocInstr()
#if SPECIAL_CASE_CARRY_BORROW
				qfprintf(AnnotFile, "%10x %6d INSTR LOCAL %s %s \n",
						addr, -1, OptExplanation[this->OptType], disasm);
				++AnnotationCount[this->OptType];
			else if (IsEqType(NUMERIC, this->AddSubSourceType)
#if SPECIAL_CASE_CARRY_BORROW
				qfprintf(AnnotFile, "%10x %6d INSTR LOCAL 2ndSrcNumeric %s \n",
						addr, -1, disasm);
				++AnnotationCount[this->OptType];
			else if (NumericDEFs) {
				qfprintf(AnnotFile, "%10x %6d INSTR LOCAL n %s NumericDEFs %s \n",
jdh8d's avatar
jdh8d committed
						addr, ProfiledDEFs ? -256-2 : -2, this->DestString(this->OptType), disasm);
				++AnnotationCount[this->OptType];
#if SMP_OPTIMIZE_ADD_TO_NUMERIC
			else if ((NN_add == this->SMPcmd.itype) && (!MemSrc)
				&& IsNumeric(this->AddSubUseType)) {
				// reg1 := reg1 + reg2, where reg1 comes in as NUMERIC,
				//  means that reg1 will get DEFed to the type of reg2,
				//  whatever it is. If reg2 were known to be NUMERIC,
				//  we would have hit one of the annotation cases above.
				qfprintf(AnnotFile, "%10x %6d INSTR LOCAL %s := %s ZZ AddToNumeric %s \n",
					addr, -5, RegNames[this->AddSubUseOp.reg],
					RegNames[this->AddSubSourceOp.reg], disasm);
				++AnnotationCount[this->OptType];
			}
#endif
			else {
				SDTInstrumentation = true;
			}
			break;

		case 6: // Only OS code should include these; problem for SDT
				SDTInstrumentation = true;
				break;  // treat as category 0
			}
			qfprintf(AnnotFile, "%10x %6d INSTR LOCAL AlwaysPTR %s \n",
					addr, -OptType, disasm);
			++AnnotationCount[this->OptType];
			break;

		case 8: // Implicitly writes to EDX:EAX, always numeric.
			if (this->OptType == 10) { // writes to ECX also
				qfprintf(AnnotFile, "%10x %6d INSTR LOCAL n EDX EAX ECX ZZ %s %s \n",
						addr, -2, OptExplanation[this->OptType], disasm);
			}
			else {
				qfprintf(AnnotFile, "%10x %6d INSTR LOCAL n EDX EAX ZZ %s %s \n",
						addr, -2, OptExplanation[this->OptType], disasm);
			}
			++AnnotationCount[this->OptType];
			SDTInstrumentation = true;
			break;

		case 9:  // Either writes to FP reg (cat. 1) or memory (cat. 0)
			if (MemDest) {
				SDTInstrumentation = true;
#if 0
				if (NumericDEFs) {
					qfprintf(AnnotFile, "%10x %6d INSTR LOCAL n %s NumericDEFs %s \n",
jdh8d's avatar
jdh8d committed
						addr, ProfiledDEFs ? -256-2 : -2, this->DestString(this->OptType), disasm);
					++AnnotationCount[this->OptType];
				}
#endif
			}
			else {
				qfprintf(AnnotFile, "%10x %6d INSTR LOCAL %s %s \n",
						addr, -1, OptExplanation[this->OptType], disasm);
				++AnnotationCount[this->OptType];
		case 10: // AND, OR, etc.: If all DEFs have been inferred to be
				 //  NUMERIC, then output optimizing annotation.
			SDTInstrumentation = true;
			if (MemDest) { // **!!** optimize with numeric annotation in future
				break;  // treat as category 0
			}
			else if (NumericDEFs) { // NUMERIC result because of NUMERIC sources
				qfprintf(AnnotFile, "%10x %6d INSTR LOCAL n %s NumericDEFs %s \n",
jdh8d's avatar
jdh8d committed
						addr, ProfiledDEFs ? -256-2 : -2, this->DestString(this->OptType), disasm);
				++AnnotationCount[this->OptType];
			}
			break;

		case 12: // Exchange, exchange and add, conditional exchange: All NUMERIC
				 //  sources ==> NUMERIC DEFs, so nothing for mmStrata to do.
			if (MemDest) { // **!!** optimize with numeric annotation in future
				SDTInstrumentation = true;
				break;  // treat as category 0
			}
			else if (NumericDEFs) { // NUMERIC result because of NUMERIC sources
				qfprintf(AnnotFile, "%10x %6d INSTR LOCAL %s %s \n", addr,
jdh8d's avatar
jdh8d committed
						ProfiledDEFs ? -256-1 : -1, OptExplanation[TypeGroup], disasm);
				++AnnotationCount[this->OptType];
			}
			else 
				SDTInstrumentation = true;
			break;

		case 13:
		case 15: // Floating point, NUMERIC, possible memory destination.
				 //  If not memory destination, fpreg dest, so nothing for mmStrata to do.
			if (MemDest) { // **!!** optimize with numeric annotation in future
				SDTInstrumentation = true;
				break;  // treat as category 0
			}
			else { // NUMERIC floating register result; these regs are always NUMERIC
				qfprintf(AnnotFile, "%10x %6d INSTR LOCAL %s %s \n", addr,
						-1, OptExplanation[TypeGroup], disasm);
				++AnnotationCount[this->OptType];
			}
			break;

		default: // 2,3,7: Optimization possibilities depend on operands 
			SDTInstrumentation = true;
			if (MemDest) {
				break;  // treat as category 0
			}
			if ((OptType == 2) || (OptType == 7) || SecondSrcOperandImmNum) {
				qfprintf(AnnotFile, "%10x %6d INSTR LOCAL n %s %s %s \n",
						addr, -2, this->DestString(this->OptType), 
						OptExplanation[this->OptType], disasm);
				++AnnotationCount[this->OptType];
			}
			else if (NumericDEFs) { // NUMERIC move instruction
				qfprintf(AnnotFile, "%10x %6d INSTR LOCAL n %s NumericDEFs %s \n",
jdh8d's avatar
jdh8d committed
						addr, ProfiledDEFs ? -256-2 : -2, this->DestString(this->OptType), disasm);
				++AnnotationCount[this->OptType];
jdh8d's avatar
jdh8d committed

	// always annotate stack constants for the profiler, etc.
	this->AnnotateStackConstants(UseFP, AnnotFile);

	// If mmStrata is going to have to deal with the
	//  instruction, then we can annotate EBP and ESP
	//  relative constant offsets. If we have emitted
	//  an annotation of type -1, there is no point
	//  in telling mmStrata about these constants.
clc5q's avatar
clc5q committed
	// Likewise, we can tell mmStrata if a MemDest is an
	//  non-directly-accessed child object.
	int ChildOffset, ChildSize;
		if (strlen(this->DeadRegsString) > 0) {
			// Optimize by informing mmStrata of dead registers. It can avoid saving
			//  and restoring dead state. This is particularly important for EFLAGS,
			//  as restoring the flags is a pipeline serializing instruction.
			qfprintf(AnnotFile, "%10x %6d INSTR DEADREGS %s ZZ %s \n",
				addr, this->SMPcmd.size, this->DeadRegsString, disasm);
		}
clc5q's avatar
clc5q committed
		if (MemDest && ProfInfo->GetMemoryAccessInfo()->ComputeNonDirectAccessRegion(addr,
			ChildOffset, ChildSize)) {
			qfprintf(AnnotFile, "%10x %6d INSTR CHILDACCESS %d %d ZZ %s \n",
				addr, this->SMPcmd.size, ChildOffset, ChildSize, disasm);
		}
#if SMP_IDENTIFY_POINTER_ADDRESS_REG
		if (MemDest) {
			assert(o_void != this->DestMemOp.type);
			set<DefOrUse, LessDefUse>::iterator PtrUse;
			PtrUse = this->GetPointerAddressReg(this->DestMemOp);
			if (PtrUse != this->GetLastUse()) { // found POINTER addr reg USE
				if (PtrUse->GetOp().type == o_reg) {
					ushort PtrReg = PtrUse->GetOp().reg;
					qfprintf(AnnotFile, "%10x %6d INSTR POINTER reg %s ZZ %s \n",
						addr, this->SMPcmd.size, RegNames[PtrReg], disasm);
				}
			}
		}
#endif
	}
	return;
} // end of SMPInstr::EmitTypeAnnotations()

// emit check annotations for signedness, overflow, truncation, etc.
void SMPInstr::EmitIntegerErrorAnnotations(FILE *InfoAnnotFile) {
	set<DefOrUse, LessDefUse>::iterator UseIter, DefIter;
	op_t UseOp, DefOp;
	unsigned short UseWidthInfo, DefWidthInfo, SourceDefWidthInfo;
	unsigned short UseSignInfo, DefSignInfo, SourceDefSignInfo;
	unsigned short UseSignMask, DefSignMask, SourceDefSignMask;
	struct FineGrainedInfo UseFGInfo, DefFGInfo, SourceDefFGInfo;
	size_t UseBitWidth, DefBitWidth, UseMaxBitWidth, SourceDefBitWidth;
	ea_t DefAddr;
	int UseHashValue, DefHashValue;
	bool OverflowOpcode = this->MDIsOverflowingOpcode();
	bool UnderflowOpcode = this->MDIsUnderflowingOpcode();
	bool CheckForOverflow;
	bool UseIsSigned, DefIsSigned, UseIsUnsigned, DefIsUnsigned, SourceDefIsSigned, SourceDefIsUnsigned;
	bool UseSignMixed, SourceDefSignMixed; // inconsistent signedness
	bool UseFP = this->BasicBlock->GetFunc()->UsesFramePointer();
	bool SignednessCheckEmitted = false;

	// Case 1: Overflow on addition.
	// Case 2: Underflow on subtraction.
	if (OverflowOpcode || UnderflowOpcode) {
		// If the flags register DEF is dead, we need a CHECK OVERFLOW/UNDERFLOW annotation.
		DefOp = InitOp;
		DefOp.type = o_reg;
		DefOp.reg = MD_FLAGS_REG;
		DefIter = this->FindDef(DefOp);
		assert(DefIter != this->GetLastDef());
		if (this->BasicBlock->IsDefDead(this->address, DefOp)) {
			DefIter = this->GetFirstNonFlagsDef();
			assert(DefIter != this->GetLastDef());
			DefOp = DefIter->GetOp();
			// Don't worry about stack space allocation instructions. The
			//  program will crash long before the stack pointer underflows
			//  below zero.
			if (!((o_reg == DefOp.type) && DefOp.is_reg(MD_STACK_POINTER_REG))) {
				DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum());
				if (o_reg == DefOp.type) {
					if (this->BasicBlock->IsLocalName(DefOp)) {
						// Local name, find in basic block maps.
						DefFGInfo = this->BasicBlock->GetDefFGInfo(DefHashValue);
					}
					else { // Global name, find in global maps.
						DefFGInfo = this->BasicBlock->GetFunc()->GetDefFGInfo(DefHashValue);
					}
				}
				else if (MDIsStackAccessOpnd(DefOp, UseFP)) {
					bool success = this->BasicBlock->GetFunc()->MDGetFGStackLocInfo(this->address, DefOp, DefFGInfo);
					assert(success);
				}
				else { // non-stack memory address; we know nothing about it.
					DefFGInfo.SignMiscInfo = 0;
					DefFGInfo.SizeInfo = 0;
				}
				DefSignInfo = DefFGInfo.SignMiscInfo;
				DefSignMask = DefSignInfo & FG_MASK_SIGNEDNESS_BITS;
				DefWidthInfo = DefFGInfo.SizeInfo;
				DefBitWidth = LargestBitWidthFromMask(DefWidthInfo);
				if (0 == DefBitWidth) {
					// Could happen for non-stack memory operands, for example.
					DefBitWidth = MD_NORMAL_MACHINE_BITWIDTH;
				}
				if (OverflowOpcode) {
					qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK OVERFLOW %s %d ",
						this->address, this->SMPcmd.size, SignednessStrings[DefSignMask], DefBitWidth,
						MDGetRegName(DefOp), disasm);
				}
				else { // must be UnderflowOpcode
					qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK UNDERFLOW %s %d ",
						this->address, this->SMPcmd.size, SignednessStrings[DefSignMask], DefBitWidth,
						MDGetRegName(DefOp), disasm);
				}
				AnnotPrintOperand(DefOp, InfoAnnotFile);
				qfprintf(InfoAnnotFile, " ZZ %s \n", this->disasm);
			}
		} // end if flags reg is dead

	} // end cases 1-2

	// Case 3: Overflow on multiplication with upper bits discarded.
		// There are four overflow sub-cases for x86: (A) the multiplication result
		//  can go into EDX:EAX for 32x32=>64 bit multiplication; (B) the result
		//  can go into DX:AX for 16x16=>32 bit; (C) the result can be in AX
		//  for 8x8=>16 bit; (D) see below. The latter case (C) will be detected most easily
		//  as a truncation in a later instruction, i.e. if only AL gets stored
		//  later, then we check the AH bits at that time for a truncation
		//  error. Because our SSA numbering lumps AL, AH, AX, and EAX into
		//  a single canonicalized register, we would have a hard time using
		//  SSA-based def-use chains to determine if AH is dead.
		// For the other two sub-cases, the question is whether EDX becomes dead
		//  starting with the DEF of EDX in the multiply instruction.
		// Case (D) is where the multiply instruction discards the upper bits
		// Sub-cases A&B are detected by checking if EDX is dead, and if so, then
		//  emitting an annotation to check for the overflow flag. The x86 sets
		//  overflow and carry flags on multiplication instructions based on whether
		//  the result carries out of the lower half of the result to the upper half.
		// Sub-case D is also detected using flags, but we don't need to check whether EDX
		//  is dead. We just need to detect that EDX is not in the DEF set in the
		//  first place. We have a private member flag for that case.
		if (this->MultiplicationBitsDiscarded) { // Sub-case D
			CheckForOverflow = true;
			assert(this->RTL.GetCount() > 0);
			DefOp = this->RTL.GetRT(0)->GetLeftOperand();
			DefIter = this->FindDef(DefOp);
			assert(DefIter != this->GetLastDef());
			DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum());
		}
		else {
			// If the instruction were EDX:=EDX*foo, then it would be
			//  the multiplication bits discarded case and would not
			//  reach this else clause. Therefore, if we find EDX in
			//  the DEF set, it is holding upper result bits of the
			//  multiplication and we have the potential for sub-cases A&B
			//  but not sub-case C. So, we check to see if the DEF of EDX
			//  is dead.
			DefOp = InitOp;
			DefOp.type = o_reg;
			DefOp.reg = R_dx;
			DefIter = this->FindDef(DefOp);
			if (DefIter != this->GetLastDef()) {
				// We found DEF of EDX, so it is not AX:=AL*op8 sub-case C.
				// Now, is DEF of EDX dead (i.e. no uses?)
				CheckForOverflow = this->BasicBlock->IsDefDead(this->address, DefOp);
				if (CheckForOverflow) {
					DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum());
				}
			}
		} // end if sub-case D else if sub-case A or B

		if (CheckForOverflow) { // need an annotation
			if (this->BasicBlock->IsLocalName(DefOp)) {
				// Local name, find in basic block maps.
				DefFGInfo = this->BasicBlock->GetDefFGInfo(DefHashValue);
			}
			else { // Global name, find in global maps.
				DefFGInfo = this->BasicBlock->GetFunc()->GetDefFGInfo(DefHashValue);
			}

			DefWidthInfo = DefFGInfo.SizeInfo;
			DefBitWidth = LargestBitWidthFromMask(DefWidthInfo);

			if (this->MDIsUnsignedArithmetic()) {
				qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK OVERFLOW UNSIGNED %d %s ZZ %s \n",
					this->address, this->SMPcmd.size, DefBitWidth, RegNames[DefOp.reg], disasm);
			}
			else {
				qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK OVERFLOW SIGNED %d %s ZZ %s \n",
					this->address, this->SMPcmd.size, DefBitWidth, RegNames[DefOp.reg], disasm);
			}
		}

	// Case 4: Signedness error on move.
	// Case 5: Truncation error on move.
	UseOp = this->GetMoveSource();
	if ((3 == this->OptType) && (o_reg == UseOp.type)) {
		// Possibilities for a move: reg to reg, mem to reg, or reg to mem. If we load
		//  from memory into a register, we cannot track signedness in memory unless it
		//  is a stack location. In that case, we record the signedness in the stack
		//  map and transfer it to the reg DEF in SMPInstr.MDSetWidthSignInfo(). That
		//  determines the signedness of the reg DEF and it cannot be in conflict with
		//  the stack memory USE. The load from stack to reg also determines width
		//  of the stack operand and we cannot have a truncation. So, we can restrict
		//  our analysis of cases 4-5 to register-source move instructions, as we
		//  have done in the condition above.
		//
		//  Similarly, we cannot detect a signedness conflict if the destination is a 
		//  memory location that is not known to be a particular stack offset location.
		//
		// So, we only concern ourselves with signedness errors
		//  when the USE operand of the move is a register, and the destination is another
		//  register or a stack location.
		//
		// We can have a truncation error and a signedness error on a single instruction, so
		//  we group them into common code. For example, move the lower half of a 32-bit unsigned
		//  into a 16-bit signed destination. Upper bits set to 1 and discarded would be a 
		//  truncation, and setting the sign bit of the 16-bit signed destination would be a
		//  signedness error.
		//
		// NOTE: Signedness errors are different from overflow and truncation errors. We
		//  can have incomplete knowledge about an instructions operands and still determine
		//  that truncation occurred. For example, if we do not know whether register EAX
		//  is signed or unsigned, we can still say that storing only AX is a truncation error
		//  if the upper half of EAX is a mixture of one and zero bits. If EAX is unsigned,
		//  we could be more specific and insist that the upper half be all zero bits; if EAX
		//  is signed, we could insist that the upper half of EAX be the sign-extension of AX.
		//  We can avoid false positives by only declaring a truncation error when the upper
		//  half of EAX is not all zero bits or all one bits. This approach allows a few
		//  potential false negatives. With signedness, if we don't know the signedness
		//  of one of the operands, we can only avoid false positives by doing no checks at
		//  all.
		UseIter = this->FindUse(UseOp);
		assert(UseIter != this->GetLastUse());
		UseBitWidth = 8 * GetOpDataSize(UseOp);

		// Now, the question is: Are we storing fewer bits than
		//  we were using in our computations in this DEF-USE chain?
		//  E.g. if we computed using 32 bits and then only store 16,
		//  we have potential truncation error. But if we computed
		//  using 16 bits all along, we have already checked for 16-bit
		//  overflows on arithmetic in the DU chain and there can be no
		//  truncation on this store.
		op_t SearchOp = UseOp;
		// Canonicalize sub-regs for searching DEFs and USEs.
		SearchOp.reg = MDCanonicalizeSubReg(UseOp.reg);
		SearchOp.dtyp = dt_dword;
		UseHashValue = HashGlobalNameAndSSA(SearchOp, UseIter->GetSSANum());

		if (this->BasicBlock->IsLocalName(SearchOp)) {
			// Local name, find in basic block maps.
			SourceDefFGInfo = this->BasicBlock->GetDefFGInfo(UseHashValue);
			UseFGInfo = this->BasicBlock->GetUseFGInfo(UseHashValue);
		}
		else { // Global name, find in global maps.
			SourceDefFGInfo = this->BasicBlock->GetFunc()->GetDefFGInfo(UseHashValue);
			UseFGInfo = this->BasicBlock->GetFunc()->GetUseFGInfo(UseHashValue);
		}

		SourceDefWidthInfo = SourceDefFGInfo.SizeInfo;
		UseWidthInfo = UseFGInfo.SizeInfo;
		SourceDefBitWidth = LargestBitWidthFromMask(SourceDefWidthInfo);
		UseMaxBitWidth = LargestBitWidthFromMask(UseWidthInfo);
		UseSignMask = (UseFGInfo.SignMiscInfo & FG_MASK_SIGNEDNESS_BITS);
		SourceDefSignInfo = SourceDefFGInfo.SignMiscInfo;
		SourceDefSignMask = (SourceDefSignInfo & FG_MASK_SIGNEDNESS_BITS);
		// Next four statements exclude the inconsistent sign case and the no sign info known case.
		UseIsSigned = (FG_MASK_SIGNED == UseSignMask);  // exact, not bit-mask-AND
		UseIsUnsigned = (FG_MASK_UNSIGNED == UseSignMask);  // exact, not bit-mask-AND
		SourceDefIsSigned = (FG_MASK_SIGNED == SourceDefSignMask);  // exact, not bit-mask-AND
		SourceDefIsUnsigned = (FG_MASK_UNSIGNED == SourceDefSignMask);  // exact, not bit-mask-AND
		UseSignMixed = (FG_MASK_INCONSISTENT_SIGN == UseSignMask); // exclude uninit sign case
		SourceDefSignMixed = (FG_MASK_INCONSISTENT_SIGN == SourceDefSignMask); // exclude uninit sign case

		// Not only the CHECK SIGNEDNESS annotations depend on the signedness of the
		//  source and destination operands. The CHECK TRUNCATION annotations come
		//  in SIGNED, UNSIGNED, and UNKNOWNSIGN variants, so we need to get the
		//  signedness of the destination operand before we proceeed.
		DefOp = this->RTL.GetRT(0)->GetLeftOperand(); // RTL must be dest := rhs
		op_t DestSearchOp = DefOp;
		bool StackDestination;
		if (o_reg == DestSearchOp.type) {
			StackDestination = false;
			DestSearchOp.reg = MDCanonicalizeSubReg(DefOp.reg);
			DestSearchOp.dtyp = dt_dword;
		}
		else if (!(MDIsStackAccessOpnd(DefOp, UseFP))) {
			// If destination of move is not a register and is not
			//  a stack location, we cannot track its signedness and width.
			return;
		}
		else {
			StackDestination = true;
		}
		DefIter = this->FindDef(DestSearchOp);

		if (StackDestination) {
			// Fetch FG info from stack map.
			bool success = this->GetBlock()->GetFunc()->MDGetFGStackLocInfo(this->address, DefOp, DefFGInfo);
			assert(success);
		}
		else {
			// Fetch FG info from register FG info maps.
			DefHashValue = HashGlobalNameAndSSA(DestSearchOp, DefIter->GetSSANum());
			if (this->BasicBlock->IsLocalName(DestSearchOp)) {
				// Local name, find in basic block maps.
				DefFGInfo = this->BasicBlock->GetDefFGInfo(DefHashValue);
			}
			else { // Global name, find in global maps.
				DefFGInfo = this->BasicBlock->GetFunc()->GetDefFGInfo(DefHashValue);
		DefSignMask = (DefFGInfo.SignMiscInfo & FG_MASK_SIGNEDNESS_BITS);
		// Next two statements exclude the inconsistent sign case and the no sign info known case.
		DefIsSigned = (FG_MASK_SIGNED == DefSignMask);  // exact, not bit-mask-AND
		DefIsUnsigned = (FG_MASK_UNSIGNED == DefSignMask);  // exact, not bit-mask-AND

		// If we set the (source) DEF bit width to 0, it means we wanted to have the USEs determine
		//  the width. This happens on sign-extended and zero-extended loads. If we zero-extend
		//  a 16-bit value to 32 bits, then immediately store the lower 16 bits to a 16-bit location,
		//  then the upper bits cannot have any overflow info yet. But if we do 32-bit arithmetic
		//  on the zero-extended value, and then store the lower 16 bits, we need to check for
		//  truncation. So, the key is whether the value ever got used as a 32-bit value. If it
		//  did, check for truncation; if not, there is no need to check.
		if ((SourceDefBitWidth > UseBitWidth) 
			|| ((SourceDefBitWidth == 0) && (UseMaxBitWidth > UseBitWidth))) {
			// Original DEF (or subsequent USE) was wider than what we are storing now.
			unsigned short SourceDefReg = SearchOp.reg;
			unsigned short UseReg = UseOp.reg;
			if (SourceDefBitWidth == 0) { // Convert for printing annotation.
				SourceDefBitWidth = 8 * GetOpDataSize(SearchOp);
			}
			// OK, we need to check for possible truncation. But, how we check depends on the
			//  signedness combinations of the source and destination operands of the move.
			//  Each operand can be signed, unsigned, or of unknown sign (and we lump the
			//  inconsistent sign case into the unknown sign case). So, we have a set of 3x3=9
			//  possible combinations of signedness. 
			// Now we have the DefSignMask to compare to the UseSignMask. The nine possible
			//  combinations, and the annotations we want to emit for each, are shown below.
			//  S = SIGNED, U = UNSIGNED, and ? = unknown or inconsistent sign.
			//  S => U indicates a SIGNED being stored into an UNSIGNED, for example.
			//  Assume without loss of generality that register EAX is the source of
			//   all the move instructions, and that only subword register AX is being stored.
			//   We can perform all truncation and signedness checks on EAX just prior to
			//   the move instruction, which is cheaper than performing checks on the 
			//   destination if the destination is in memory.
			//
			//  U => U
			//  U => S
			//  S => U
			//  U => ?
			//  ? => U
			//
			//  In these first five cases, EAX must be the zero-extension of AX else there is
			//   a truncation error. In the three cases in which the source (EAX/AX) is UNSIGNED,
			//   discarding upper bits that are not zero is obviously truncation. In the case
			//   of S => U, if the upper bits of EAX are not all zeroes, then we either have
			//   a large positive value of EAX that is being truncated, or EAX is negative and
			//   the lower bits will be misinterpreted in the unsigned destination. Finally,
			//   the ? => U case must be either U => U or S => U, and these two cases already
			//   share the demand that EAX be the zero-extension of AX. So, these five cases
			//   will receive the annotation: CHECK TRUNCATION UNSIGNED 32 EAX 16 AX which
			//   means that EAX is tested against AX to see if it is the 32-bit zero-extension
			//   of 16-bit reg AX.
			//  In the U => S case, we can have a signedness error as well as truncation. Even
			//   if the truncation check passes (all upper half bits of EAX are zero), the top
			//   bit of AX might be 1, and this will be misinterpreted as a sign bit in the
			//   destination. So, this case receives a second annotation: CHECK SIGNEDNESS SIGNED 16 AX.
			//  In the two cases that involve signedness uncertainty, there are possible signedness
			//   errors that we are not checking ar tun-time, because we do not have enough information
			//   to perform the checks without generating many more false positives than true positives.
			//   As a result, false negatives on signedness can occur.
			//
			// On to more of the 9 combinations:
			//
			// S => S
			//
			// In this case, EAX must be the sign-extension of AX. Because the destination is also
			//  signed, nothing is lost if the sign-extension bits (all zeroes or all ones) are dropped.
			//  We emit a CHECK TRUNCATION SIGNED 32 EAX 16 AX annotation to test EAX == sign-extended AX.
			//
			// S => ?
			// ? => S
			// ? => ?
			//
			// These final three cases all involve at least one operand of unknown signedness, and no
			//  operands that are known to be unsigned. In each case, there are two possibilities:
			//  either EAX must be the sign-extension of AX, or EAX must be the zero-extension of AX.
			//  Because of the uncertainty that is represented by the question marks, we are not sure
			//  which of these two cases we are dealing with. However, rather than just give up and
			//  perform no run-time checks (to avoid false positives), we can still perform a run-time
			//  check that will catch (perhaps most) true positives while causing no false positives.
			//  We can insist that EAX must be EITHER the sign-extension or the zero-extension of AX.
			//  To be neither type of extension of AX implies that some sort of truncation is happening.
			//  So, we emit a CHECK TRUNCATION UNKNOWNSIGN 32 EAX 16 AX annotation, and the Strata
			//  instrumentation will check for either EAX == sign-extended AX or EAX == zero-extended AX
			//  being true. If neither is true, we raise a true positive alert. False negatives on
			//  signedness errors are the result of the uncertainty, but all truncations are detected
			//  for all nine cases.

			if (DefIsUnsigned || UseIsUnsigned) {
				// First five cases above: any UNSIGNED operand leads to CHECK TRUNCATION UNSIGNED annotation.
				qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK TRUNCATION UNSIGNED %d %s %d %s ZZ %s \n",
					this->address, this->SMPcmd.size, SourceDefBitWidth,
					MDGetRegName(SearchOp), UseBitWidth, MDGetRegName(UseOp), disasm);
				if (UseIsUnsigned && DefIsSigned) {
					qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS SIGNED %d %s ZZ %s \n",
						this->address, this->SMPcmd.size, UseBitWidth, MDGetRegName(UseOp), disasm);
				}
			else if (DefIsSigned && UseIsSigned) {
				// S => S case above. Emit CHECK TRUNCATION SIGNED annotation.
				qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK TRUNCATION SIGNED %d %s %d %s ZZ %s \n",
					this->address, this->SMPcmd.size, SourceDefBitWidth,
					MDGetRegName(SearchOp), UseBitWidth, MDGetRegName(UseOp), disasm);
			}
			else {
				// S => ?, ? => S, ? => ? cases above: CHECK TRUNCATION UNKNOWNSIGN annotation.
				qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK TRUNCATION UNKNOWNSIGN %d %s %d %s ZZ %s \n",
					this->address, this->SMPcmd.size, SourceDefBitWidth,
					MDGetRegName(SearchOp), UseBitWidth, MDGetRegName(UseOp), disasm);
			}
#if 1
			// Now check for signedness conflicts between the UseOp USEs and its DEF.
			if (UseIsSigned && SourceDefIsUnsigned) {
				qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS SIGNED %d %s ZZ %s \n",
					this->address, this->SMPcmd.size, UseBitWidth, MDGetRegName(UseOp), disasm);
			}
			else if (UseIsUnsigned && SourceDefIsSigned) {
				// Currently same annotation, but might differ in the future for better forensics
				//  and more precise diagnostic messages.
				qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS SIGNED %d %s ZZ %s \n",
					this->address, this->SMPcmd.size, UseBitWidth, MDGetRegName(UseOp), disasm);
			}
			else if ((!SourceDefSignMixed) && UseSignMixed) {
				// DEF has consistent and known signedness, USE is inconsistent.
				qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS SIGNED %d %s ZZ %s \n",
					this->address, this->SMPcmd.size, UseBitWidth, MDGetRegName(UseOp), disasm);
			}
#endif
		} // end if truncation
		else { // still need to check for signedness errors even if no truncation
			if (UseIsSigned && DefIsUnsigned) {
				qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS SIGNED %d %s ZZ %s \n",
					this->address, this->SMPcmd.size, UseBitWidth, MDGetRegName(UseOp), disasm);
			}
			else if (UseIsUnsigned && DefIsSigned) {
				// Currently same annotation, but might differ in the future for better forensics
				//  and more precise diagnostic messages.
				qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS SIGNED %d %s ZZ %s \n",
					this->address, this->SMPcmd.size, UseBitWidth, MDGetRegName(UseOp), disasm);
			}
#if 1
			else if (UseIsSigned && SourceDefIsUnsigned) {
				qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS SIGNED %d %s ZZ %s \n",
					this->address, this->SMPcmd.size, UseBitWidth, MDGetRegName(UseOp), disasm);
			}
			else if (UseIsUnsigned && SourceDefIsSigned) {
				// Currently same annotation, but might differ in the future for better forensics
				//  and more precise diagnostic messages.
				qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS SIGNED %d %s ZZ %s \n",
					this->address, this->SMPcmd.size, UseBitWidth, MDGetRegName(UseOp), disasm);
			}
			else if ((!SourceDefSignMixed) && UseSignMixed) {
				// DEF has consistent and known signedness, USE is inconsistent.
				qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK SIGNEDNESS SIGNED %d %s ZZ %s \n",
					this->address, this->SMPcmd.size, UseBitWidth, MDGetRegName(UseOp), disasm);
			}
#endif
		} // end if truncation else check signedness

	} // end of cases 4-5, (3 == OptType) checking for TRUNCATION and SIGNEDNESS errors
	return;
} // end of SMPInstr::EmitIntegerErrorAnnotations()

clc5q's avatar
clc5q committed
// Go through the PUSH RTL and get the operand pushed.
op_t SMPInstr::GetPushedOpnd(void) {
clc5q's avatar
clc5q committed

	if (NN_push == this->SMPcmd.itype) {
		for (size_t OpNum = 0; OpNum < UA_MAXOP; ++OpNum) {
			op_t TempOp = this->SMPcmd.Operands[OpNum];
			if (this->features & UseMacros[OpNum]) { // USE
				return TempOp;
			}
		}
		msg("ERROR: Could not find PUSH operand at %x %s\n", this->address,
			this->GetDisasm());
		return VoidOp;
	}
	else {
		return VoidOp;
	}
} // end of SMPInstr::GetPushedOpnd()

// Get the immediate value used in the instruction. Return zero
//  if no immediate was used.
int SMPInstr::MDGetImmedUse(void) {
	int ImmedVal = 0;
	set<DefOrUse, LessDefUse>::iterator CurrUse;
	for (CurrUse = this->GetFirstUse(); CurrUse != this->GetLastUse(); ++CurrUse) {
		op_t UseOp = CurrUse->GetOp();
		if (o_imm == UseOp.type) {
			ImmedVal = (int) UseOp.value;
			break;
		}
	}
	return ImmedVal;
} // end of SMPInstr::MDGetImmedUse()

// Build the RTL for an instruction with a unary opcode
bool SMPInstr::BuildUnaryRTL(SMPoperator UnaryOp) {
	size_t OpNum;
	bool DestFound = false;
	SMPRegTransfer *TempRT = NULL;

	FPRegOp.type = o_fpreg;  // floating point register stack

	FlagsOp.type = o_reg;
	FlagsOp.reg = X86_FLAGS_REG;

	// Handle special cases first
	if (SMP_UNARY_FLOATING_ARITHMETIC == UnaryOp) {
		// Use of the floating register stack top is implicit
		DestFound = true;
		TempRT = new SMPRegTransfer;
		TempRT->SetLeftOperand(FPRegOp);
		TempRT->SetOperator(SMP_ASSIGN);
		SMPRegTransfer *RightRT = new SMPRegTransfer;
		RightRT->SetLeftOperand(FPRegOp);
		RightRT->SetOperator(UnaryOp);
		RightRT->SetRightOperand(VoidOp);
		TempRT->SetRightTree(RightRT);
		this->RTL.push_back(TempRT);
	}
	else if ((NN_clc == this->SMPcmd.itype) || (NN_cld == this->SMPcmd.itype)
		|| (NN_cmc == this->SMPcmd.itype) || (NN_stc == this->SMPcmd.itype)
		|| (NN_std == this->SMPcmd.itype)) {
		// Flags register is implicit destination.
		DestFound = true;
		TempRT = new SMPRegTransfer;
		TempRT->SetLeftOperand(FlagsOp);
		TempRT->SetOperator(SMP_ASSIGN);
		SMPRegTransfer *RightRT = new SMPRegTransfer;
		if (NN_cmc == this->SMPcmd.itype) { // complement carry flag USEs old carry flag
			RightRT->SetLeftOperand(FlagsOp);
			RightRT->SetOperator(SMP_BITWISE_NOT);
		}
		else {
			RightRT->SetLeftOperand(VoidOp);
			RightRT->SetOperator(UnaryOp);
		}
		RightRT->SetRightOperand(VoidOp);
		TempRT->SetRightTree(RightRT);
		this->RTL.push_back(TempRT);
	}

	for (OpNum = 0; !DestFound && (OpNum < UA_MAXOP); ++OpNum) {
		op_t TempOp = this->SMPcmd.Operands[OpNum];
		if (this->features & DefMacros[OpNum]) { // DEF
			if (MDKnownOperandType(TempOp)) {
				DestFound = true;
				TempRT = new SMPRegTransfer;
				TempRT->SetLeftOperand(TempOp);
				TempRT->SetOperator(SMP_ASSIGN);
				SMPRegTransfer *RightRT = new SMPRegTransfer;
				RightRT->SetLeftOperand(TempOp);
				RightRT->SetOperator(UnaryOp);
				RightRT->SetRightOperand(VoidOp);
				TempRT->SetRightTree(RightRT);
				this->RTL.push_back(TempRT);
			}
		}
	} // end for (OpNum = 0; ...)

#if SMP_DEBUG_BUILD_RTL
	if (!DestFound) {
		msg("ERROR: Could not find unary operand at %x for %s\n", this->GetAddr(), this->GetDisasm());
	}
#endif
	return DestFound;
} // end of SMPInstr::BuildUnaryRTL()

// Build the RTL for an instruction with a binary arithmetic opcode
bool SMPInstr::BuildBinaryRTL(SMPoperator BinaryOp) {
	size_t OpNum;
	bool DestFound = false;
	bool SourceFound = false;
	bool MemSrc = this->HasSourceMemoryOperand();
	bool MemDest = this->HasDestMemoryOperand();
	// Work around IDA pro error; they assumed that the pcmpeq and pcmpgt
	//  families of instructions were just compares, so they do not tag
	//  either operand as a DEF. Actually, the first operand has byte or
	//  word or dword fields set to all 1's or all 0's based on the result
	//  of the comparison.
	bool SrcIsReallyDest = ((SMP_COMPARE_EQ_AND_SET == BinaryOp) 
		|| (SMP_COMPARE_GT_AND_SET == BinaryOp));
	SMPRegTransfer *TempRT = NULL;
	SMPRegTransfer *RightRT = new SMPRegTransfer;

	FPRegOp.type = o_fpreg;  // floating point register stack

	// Handle special cases first
	if (SMP_BINARY_FLOATING_ARITHMETIC == BinaryOp) {
		// Use of the floating register stack top is implicit
		DestFound = true;
		TempRT = new SMPRegTransfer;
		TempRT->SetLeftOperand(FPRegOp);
		TempRT->SetOperator(SMP_ASSIGN);
		RightRT->SetLeftOperand(FPRegOp);
		RightRT->SetOperator(BinaryOp);
		RightRT->SetRightOperand(VoidOp);
		TempRT->SetRightTree(RightRT);
	}

	for (OpNum = 0; !(DestFound && SourceFound) && (OpNum < UA_MAXOP); ++OpNum) {
		op_t TempOp = this->SMPcmd.Operands[OpNum];
		if ((this->features & DefMacros[OpNum]) 
			|| (SrcIsReallyDest && (0 == OpNum))) { // DEF
			if (!DestFound && MDKnownOperandType(TempOp)) {
				// See comments just below for floating point sources. FP stores
				//  are analogous to FP loads.
				if (!MemDest || ((TempOp.type >= o_mem) && (TempOp.type <= o_displ))) {
					DestFound = true;
					TempRT = new SMPRegTransfer;
					TempRT->SetLeftOperand(TempOp);
					TempRT->SetOperator(SMP_ASSIGN);
clc5q's avatar
clc5q committed
					if (this->RegClearIdiom) {
clc5q's avatar
clc5q committed
						ImmOp.type = o_imm;
						ImmOp.value = 0;
						TempRT->SetRightOperand(ImmOp);
						SourceFound = true; // cause loop exit
					}
					else {
						RightRT->SetLeftOperand(TempOp);
						RightRT->SetOperator(BinaryOp);
						TempRT->SetRightTree(RightRT);
					}
				}
				else {
					;
#if SMP_VERBOSE_DEBUG_BUILD_RTL
					msg("WARNING: Skipping DEF operand: ");
					PrintOperand(TempOp);
					msg(" at %x in %s\n", this->GetAddr(), this->GetDisasm());
#endif
				}
			}
			else if (DestFound && (SMP_BINARY_FLOATING_ARITHMETIC != BinaryOp)) {
				;
#if SMP_VERBOSE_DEBUG_BUILD_RTL
				msg("ERROR: Found two DEF operands: ");
				PrintOperand(TempOp);
				msg(" at %x in %s\n", this->GetAddr(), this->GetDisasm());
#endif
			if (!SourceFound && MDKnownOperandType(TempOp)) {
				// If this is a floating point instruction with the fpregs listed as
				//  a USE and a memory operand also listed as a USE, then we want to
				//  ignore the irrelevant USE of the fpreg stack.
				// Note that MemDest AND MemSrc means something like add mem,reg is being
				//  processed, where the memory operand is both DEF and USE.
				if (!MemSrc || MemDest || ((TempOp.type >= o_mem) && (TempOp.type <= o_displ))) {
					SourceFound = true;
					RightRT->SetRightOperand(TempOp);
				}
			if (!(this->features & UseMacros[OpNum])) {
				;
#if SMP_VERBOSE_DEBUG_BUILD_RTL_DEF_USE
				msg("WARNING: Operand neither DEF nor USE: ");
				PrintOperand(TempOp);
				msg(" at %x in %s\n", this->GetAddr(), this->GetDisasm());
#endif
			}
		} // end if DEF ... else ...
	} // end for (OpNum = 0; ...)

	if (!DestFound || !SourceFound) {
		assert(NULL != RightRT);
		if (DestFound && (NULL != TempRT))
#if SMP_DEBUG_BUILD_RTL
		if (!DestFound) {
			msg("ERROR: Could not find binary DEF operand at %x for %s\n", this->GetAddr(),
				this->GetDisasm());
		}
		else {
			msg("ERROR: Could not find binary operand at %x for %s\n", this->GetAddr(),
				this->GetDisasm());
			this->PrintOperands();
		}
#endif
	}
	else {
		this->RTL.push_back(TempRT);
	}
	return (DestFound && SourceFound);
} // end of SMPInstr::BuildBinaryRTL()

// Build the RTL for a load-effective-address instruction.
bool SMPInstr::BuildLeaRTL(void) {
	size_t OpNum;
	bool DestFound = false;
	bool SourceFound = false;
	SMPRegTransfer *AssignRT = NULL;
	int BaseReg;
	int IndexReg;
	ushort ScaleFactor;
	ea_t offset;
	bool ScaledIndexReg;

	for (OpNum = 0; !(DestFound && SourceFound) && (OpNum < UA_MAXOP); ++OpNum) {
		op_t TempOp = this->SMPcmd.Operands[OpNum];
		if (this->features & DefMacros[OpNum]) { // DEF
			DefOp = TempOp;
			DestFound = true;
			assert(o_reg == DefOp.type);
		}
		else { // USE
			if (!SourceFound && MDKnownOperandType(TempOp)) {
				if ((TempOp.type >= o_mem) && (TempOp.type <= o_displ)) {
					SourceFound = true;
					MDExtractAddressFields(TempOp, BaseReg, IndexReg, ScaleFactor, offset);
				}
				else {
					;
#if SMP_VERBOSE_DEBUG_BUILD_RTL
					msg("WARNING: Skipping USE operand: ");
					PrintOperand(TempOp);
					msg(" at %x in %s\n", this->GetAddr(), this->GetDisasm());
#endif
				}
			}

			if (!(this->features & UseMacros[OpNum])) {
				;
#if SMP_VERBOSE_DEBUG_BUILD_RTL_DEF_USE
				msg("WARNING: Operand neither DEF nor USE: ");
				PrintOperand(TempOp);
				msg(" at %x in %s\n", this->GetAddr(), this->GetDisasm());
#endif
			}
		} // end if DEF ... else ...
	} // end for (OpNum = 0; ...)

	if (!DestFound || !SourceFound) {
#if SMP_DEBUG_BUILD_RTL
		if (!DestFound) {
			msg("ERROR: Could not find lea DEF operand at %x for %s\n", this->GetAddr(),
				this->GetDisasm());
		}
		else {
			msg("ERROR: Could not find lea USE operand at %x for %s\n", this->GetAddr(),
				this->GetDisasm());
			this->PrintOperands();
		}
#endif
	}
	else { // Ready to build the RTL
		// We build the RTL down to the right, in reverse order, with any multiplication
		//  of the index register by a scale factor at the bottom of the RTL tree.
		// Note that almost any combination of BaseReg, IndexReg, and offset can be present
		//  or absent.
		AssignRT = new SMPRegTransfer;
		AssignRT->SetLeftOperand(DefOp);
		AssignRT->SetOperator(SMP_ASSIGN);

		ScaledIndexReg = ((ScaleFactor > 0) && (IndexReg != R_none));
		op_t BaseOp = InitOp, IndexOp = InitOp, OffsetOp = InitOp, ScaleOp = InitOp;
		BaseOp.type = o_reg;
		BaseOp.reg = (ushort) BaseReg;
		IndexOp.type = o_reg;
		IndexOp.reg = (ushort) IndexReg;
		OffsetOp.type = o_imm;
		OffsetOp.value = (uval_t) offset;
		ScaleOp.type = o_imm;
		ScaleOp.value = (uval_t) ScaleFactor;

		if (ScaledIndexReg) {
			// First, build the subtree to scale the IndexReg.
			SMPRegTransfer *MultRT = new SMPRegTransfer;
			MultRT->SetLeftOperand(IndexOp);
			MultRT->SetOperator(SMP_U_LEFT_SHIFT);
			MultRT->SetRightOperand(ScaleOp);
			// Now, case on the possibilities for existence of the other address fields.
			if (0 != offset) {
				// Add the offset to the scaled index subtree.
				SMPRegTransfer *AddOffRT = new SMPRegTransfer;
				AddOffRT->SetLeftOperand(OffsetOp);
				AddOffRT->SetOperator(SMP_ADD);
				AddOffRT->SetRightTree(MultRT);
				// Add a BaseReg, if any.
				if (R_none != BaseReg) {
					SMPRegTransfer *AddBaseRT = new SMPRegTransfer;
					AddBaseRT->SetLeftOperand(BaseOp);
					AddBaseRT->SetOperator(SMP_ADD);
					AddBaseRT->SetRightTree(AddOffRT);
					// Link into assignment root tree.
					AssignRT->SetRightTree(AddBaseRT);
				}
				else { // no BaseReg
					AssignRT->SetRightTree(AddOffRT);
				}
			} // end if nonzero offset
			else { // no offset to add
				// Add a BaseReg, if any.
				if (R_none != BaseReg) {
					SMPRegTransfer *AddBaseRT = new SMPRegTransfer;
					AddBaseRT->SetLeftOperand(BaseOp);
					AddBaseRT->SetOperator(SMP_ADD);
					AddBaseRT->SetRightTree(MultRT);
					// Link into assignment root tree.
					AssignRT->SetRightTree(AddBaseRT);
				}
				else { // no BaseReg
					AssignRT->SetRightTree(MultRT);
				}
			}
		} // end if ScaleIndexReg
		else { // no scaled index register
			if (0 != offset) {
				if (R_none != IndexReg) {
					SMPRegTransfer *AddOffRT = new SMPRegTransfer;
					AddOffRT->SetLeftOperand(OffsetOp);