diff --git a/SMPBasicBlock.cpp b/SMPBasicBlock.cpp index 0961c6716dc9ccc174610926cc27a54b78a9ed05..43d5e669c0c1833c4156e6163b39f65c0fdffc32 100644 --- a/SMPBasicBlock.cpp +++ b/SMPBasicBlock.cpp @@ -85,6 +85,7 @@ SMPBasicBlock::SMPBasicBlock(SMPFunction *Func, list<SMPInstr>::iterator First, this->LastFlagsUse = First->GetAddr() - 1; this->Instrs.clear(); + this->AddrInstMap.clear(); this->Predecessors.clear(); this->Successors.clear(); this->KillSet.clear(); @@ -95,21 +96,29 @@ SMPBasicBlock::SMPBasicBlock(SMPFunction *Func, list<SMPInstr>::iterator First, this->PhiFunctions.clear(); this->LocalNames.clear(); + ea_t InstAddr; + list<SMPInstr>::iterator CurrInst = First; while (CurrInst != Last) { this->Instrs.push_back(CurrInst); - if (CurrInst->HasFlagsDef() && (CurrInst->GetAddr() < this->FirstFlagsDef)) - this->FirstFlagsDef = CurrInst->GetAddr(); - if (CurrInst->HasFlagsUse() && (CurrInst->GetAddr() > this->LastFlagsUse)) - this->LastFlagsUse = CurrInst->GetAddr(); + InstAddr = CurrInst->GetAddr(); + pair<ea_t, list<SMPInstr>::iterator> MapItem(InstAddr, CurrInst); + this->AddrInstMap.insert(MapItem); + if (CurrInst->HasFlagsDef() && (InstAddr < this->FirstFlagsDef)) + this->FirstFlagsDef = InstAddr; + if (CurrInst->HasFlagsUse() && (InstAddr > this->LastFlagsUse)) + this->LastFlagsUse = InstAddr; ++CurrInst; } // Now process the last instruction. - if (Last->HasFlagsDef() && (Last->GetAddr() < this->FirstFlagsDef)) - this->FirstFlagsDef = Last->GetAddr(); - if (Last->HasFlagsUse() && (Last->GetAddr() > this->LastFlagsUse)) - this->LastFlagsUse = Last->GetAddr(); + InstAddr = Last->GetAddr(); + pair<ea_t, list<SMPInstr>::iterator> MapItem2(InstAddr, Last); + this->AddrInstMap.insert(MapItem2); + if (Last->HasFlagsDef() && (InstAddr < this->FirstFlagsDef)) + this->FirstFlagsDef = InstAddr; + if (Last->HasFlagsUse() && (InstAddr > this->LastFlagsUse)) + this->LastFlagsUse = InstAddr; Last->SetTerminatesBlock(); this->Instrs.push_back(Last); // Add last instruction } @@ -119,6 +128,52 @@ ea_t SMPBasicBlock::GetFirstAddr(void) const { return this->FirstAddr; } +// Four methods to get values from the maps of global reg/SSA to FG info. +// For global names, see corresponding methods in SMPFunction. +unsigned short SMPBasicBlock::GetDefSignMiscInfo(int DefHashValue) { + map<int, struct FineGrainedInfo>::iterator MapIter; + pair<map<int, struct FineGrainedInfo>::iterator, bool> MapResult; + + MapIter = this->LocalDefFGInfoBySSA.find(DefHashValue); + if (MapIter != this->LocalDefFGInfoBySSA.end()) + return MapIter->second.SignMiscInfo; + else + return 0; +} // end of SMPBasicBlock::GetDefSignMiscInfo() + +unsigned short SMPBasicBlock::GetUseSignMiscInfo(int UseHashValue) { + map<int, struct FineGrainedInfo>::iterator MapIter; + pair<map<int, struct FineGrainedInfo>::iterator, bool> MapResult; + + MapIter = this->LocalUseFGInfoBySSA.find(UseHashValue); + if (MapIter != this->LocalUseFGInfoBySSA.end()) + return MapIter->second.SignMiscInfo; + else + return 0; +} // end of SMPBasicBlock::GetUseSignMiscInfo() + +unsigned short SMPBasicBlock::GetDefWidthTypeInfo(int DefHashValue) { + map<int, struct FineGrainedInfo>::iterator MapIter; + pair<map<int, struct FineGrainedInfo>::iterator, bool> MapResult; + + MapIter = this->LocalDefFGInfoBySSA.find(DefHashValue); + if (MapIter != this->LocalDefFGInfoBySSA.end()) + return MapIter->second.SizeInfo; + else + return 0; +} // end of SMPBasicBlock::GetDefWidthTypeInfo() + +unsigned short SMPBasicBlock::GetUseWidthTypeInfo(int UseHashValue) { + map<int, struct FineGrainedInfo>::iterator MapIter; + pair<map<int, struct FineGrainedInfo>::iterator, bool> MapResult; + + MapIter = this->LocalUseFGInfoBySSA.find(UseHashValue); + if (MapIter != this->LocalUseFGInfoBySSA.end()) + return MapIter->second.SizeInfo; + else + return 0; +} // end of SMPBasicBlock::GetUseWidthTypeInfo() + bool SMPBasicBlock::IsLocalName(op_t CurrOp) const { return (!this->LocalNames.empty() && (this->LocalNames.end() != this->LocalNames.find(CurrOp))); @@ -256,11 +311,9 @@ void SMPBasicBlock::Dump(void) { msg("DEF-USE chains for block-local names: \n"); this->LocalDUChains.Dump(); -#if SMP_FULL_LIVENESS_ANALYSIS msg("\n"); msg("DEF-USE chains for block-local use of global names: \n"); this->GlobalDUChains.Dump(); -#endif msg("\n"); if (this->IndirectJump) @@ -489,6 +542,24 @@ set<SMPPhiFunction, LessPhi>::iterator SMPBasicBlock::FindPhi(op_t FindOp) { return this->PhiFunctions.end(); } // end of SMPBasicBlock::FindPhi() +// Get an iterator to the instruction at address InstAddr. +// Will assert if it does not find the instruction. +list<SMPInstr>::iterator SMPBasicBlock::FindInstr(ea_t InstAddr) { + list<SMPInstr>::iterator InstIter; + map<ea_t, list<SMPInstr>::iterator>::iterator MapEntry; + + MapEntry = this->AddrInstMap.find(InstAddr); + if (MapEntry == this->AddrInstMap.end()) { + msg("ERROR: Failure on AddrInstMap. Size: %d Addr: %x \n", + this->AddrInstMap.size(), InstAddr); + msg("First addr in block: %x \n", this->FirstAddr); + } + assert(MapEntry != this->AddrInstMap.end()); + InstIter = MapEntry->second; + + return InstIter; +} // end of SMPBasicBlock::FindInstr() + // The predecessor order is used in SSA phi functions. Get the ordinal number // of a given block number in the predecessor list. int SMPBasicBlock::GetPredPosition(int BlockNum) { @@ -503,6 +574,85 @@ int SMPBasicBlock::GetPredPosition(int BlockNum) { return -1; } +// Set the DEF iterator from the DefAddr and DefOp. Assert if not found. +set<DefOrUse, LessDefUse>::iterator SMPBasicBlock::GetGlobalDefIterFromDefAddr(op_t DefOp, ea_t DefAddr) { + map<ea_t, list<SMPInstr>::iterator>::iterator MapIter = this->AddrInstMap.find(DefAddr); + assert(MapIter != this->AddrInstMap.end()); + set<DefOrUse, LessDefUse>::iterator DefIter = MapIter->second->FindDef(DefOp); + assert(DefIter != MapIter->second->GetLastDef()); + return DefIter; +} // end of SMPBasicBlock::GetGlobalDefIterFromDefAddr() + +// Given a USE operand and the address of its instr, return DEF using DU chains or Phi func. +set<DefOrUse, LessDefUse>::iterator SMPBasicBlock::GetDefFromOpAddr(op_t UseOp, ea_t InstAddr, int SSANum) { + ea_t DefAddr; + list<SMPInstr>::iterator DefInstr; + set<DefOrUse, LessDefUse>::iterator DefIter; + + if (this->IsLocalName(UseOp)) { + // Search in the local DU chains. + unsigned int LocalDUIndex = this->GetLocalDUIndex(UseOp, SSANum); + DefAddr = this->LocalDUChains.ChainsByName.at(LocalDUIndex).DUChains.at(SSANum).GetDef(); + DefInstr = this->FindInstr(DefAddr); + DefIter = DefInstr->FindDef(UseOp); + assert(DefIter != DefInstr->GetLastDef()); + } + else { + // Global DEF for this SSANum must be in the Phi functions or within a block. + DefAddr = this->MyFunc->GetGlobalDefAddr(UseOp, SSANum); + if (DefAddr == BADADDR) { // Could not find it anywhere. + msg("ERROR: Failure in GetDefFromOpAddr(): InstAddr %x SSANum %d\n", + InstAddr, SSANum); + assert(DefAddr != BADADDR); // kablooey! + } + else if (DefAddr < this->MyFunc->GetNumBlocks()) { + // A block number was returned, not an instruction address. + // The DEF for this SSANum is only in a Phi function for that + // block number. + size_t BlockNumber = (size_t) DefAddr; + set<SMPPhiFunction, LessPhi>::iterator PhiIter = this->MyFunc->GetPhiIterForPhiDef(BlockNumber, UseOp, SSANum); + // PhiIter is guaranteed to be good, else we assert in GetPhiIterForPhiDef(). + assert(SSANum == PhiIter->GetDefSSANum()); + DefOrUse DefCopy = PhiIter->GetDefCopy(); + DefOrUseSet DummySet; + DefIter = DummySet.InsertRef(DefCopy); + } + else { // Must be a DEF within a block, not in a Phi function. + // NOTE: We might be looking within the current block with this code; perfectly OK. + list<SMPBasicBlock>::iterator DefBlockIter = this->MyFunc->GetBlockFromInstAddr(DefAddr); + DefIter = DefBlockIter->GetGlobalDefIterFromDefAddr(UseOp, DefAddr); + } + } + + return DefIter; +} // end of SMPBasicBlock::GetDefFromOpAddr() + +// Given a USE operand and the address of its instr, return DEF addr using DU chains or Phi func. +// If DEF is in a Phi function, we return the block number, which never conflicts with instruction +// addresses. +ea_t SMPBasicBlock::GetDefAddrFromUseAddr(op_t UseOp, ea_t InstAddr, int SSANum, bool LocalName) { + ea_t DefAddr; + list<SMPInstr>::iterator DefInstr; + set<DefOrUse, LessDefUse>::iterator DefIter; + + if (LocalName) { + // Search in the local DU chains. + unsigned int LocalDUIndex = this->GetLocalDUIndex(UseOp, SSANum); + DefAddr = this->LocalDUChains.ChainsByName.at(LocalDUIndex).DUChains.at(SSANum).GetDef(); + } + else { + // Global DEF for this SSANum must be in the Phi functions or within a block. + DefAddr = this->MyFunc->GetGlobalDefAddr(UseOp, SSANum); + if (DefAddr == BADADDR) { // Could not find it anywhere. + msg("ERROR: Failure in GetDefAddrFromUseAddr(): InstAddr %x SSANum %d\n", + InstAddr, SSANum); + assert(DefAddr != BADADDR); // kablooey! + } + } + + return DefAddr; +} // end of SMPBasicBlock::GetDefAddrFromUseAddr() + // Update the LiveOut set for the block. // Return true if it changed, false otherwise. bool SMPBasicBlock::UpdateLiveOut(void) { @@ -577,7 +727,6 @@ void SMPBasicBlock::SetLocalNames(void) { return; } // end of SMPBasicBlock::SetLocalNames() -#if SMP_FULL_LIVENESS_ANALYSIS // Create local def-use chains for all global names // Important to remember that a chain can start without a def because the variable is Live-In; // in this case we give the chain a pseudo-def at address (first addr in block - 1) **!!** @@ -607,7 +756,7 @@ void SMPBasicBlock::CreateGlobalChains() { LocalUseIndex.push_back(-1); // init LocalUse indices to -1; first DEF will make it 0 // Set the local name for each DU chain array. if (LocalIndex > this->GlobalDUChains.ChainsByName.size()) - msg("LocalIndex %d out of bounds in CreateGlobalChains.\n", LocalIndex); + msg("ERROR: LocalIndex %d out of bounds in CreateGlobalChains.\n", LocalIndex); if (DebugFlag) msg("Setting name for LocalIndex = %d\n", LocalIndex); this->GlobalDUChains.ChainsByName.at(LocalIndex).SetName(*NameIter); @@ -674,7 +823,6 @@ void SMPBasicBlock::CreateGlobalChains() { msg("Exiting CreateGlobalChains()\n"); return; } // end of SMPBasicBlock::CreateGlobalChains() -#endif // Create local DEF-USE chains and renumber all references to all names in LocalNames. void SMPBasicBlock::SSALocalRenumber(void) { @@ -851,6 +999,174 @@ bool SMPBasicBlock::IsLastGlobalChain(op_t DefOp, ea_t DefAddr) { return (DefAddr == LastDefAddr); } // end of SMPBasicBlock::IsLastGlobalChain() +// If branch at block end is signed/unsigned, propagate to operands that set flags before it. +void SMPBasicBlock::MarkBranchSignedness(void) { + unsigned short SignMask; + set<DefOrUse, LessDefUse>::iterator UseIter; + op_t UseOp; + ea_t DefAddr; // for flags USE in branch + int SSANum; // for flags USE in branch + bool LocalFlags; // is flags register a local name? + list<list<SMPInstr>::iterator>::reverse_iterator InstIter; + + InstIter = this->Instrs.rbegin(); // Any conditional branch would be last instruction + if ((*InstIter)->MDIsUnsignedBranch()) { + SignMask = FG_MASK_UNSIGNED; + } + else if ((*InstIter)->MDIsSignedBranch()) { + SignMask = FG_MASK_SIGNED; + } + else + return; // Last instruction is not a signed or unsigned branch. + + // Find the flags USE. + UseOp.type = o_reg; // set up a dummy op for searching + UseOp.reg = X86_FLAGS_REG; + UseIter = (*InstIter)->FindUse(UseOp); + assert(UseIter != (*InstIter)->GetLastUse()); + UseOp = UseIter->GetOp(); // get full info in all fields + SSANum = UseIter->GetSSANum(); + LocalFlags = this->IsLocalName(UseOp); + + DefAddr = this->GetDefAddrFromUseAddr(UseOp, (*InstIter)->GetAddr(), SSANum, LocalFlags); + // Pass DefAddr to recursive helper function. + this->PropagateBranchSignedness(DefAddr, UseOp, SignMask); + + return; +} // end of SMPBasicBlock::MarkBranchSignedness() + +// For the DefAddr of the flags, propagate SignMask to all DEFs and USEs in the +// instruction or Phi function indicated by DefAddr. +// We recurse only if we encounter Phi functions, terminating the recursion as +// soon as we can propagate the SignMask to the USEs and DEFS of an instruction. +void SMPBasicBlock::PropagateBranchSignedness(ea_t DefAddr, op_t SearchOp, unsigned short SignMask) { + list<SMPInstr>::iterator DefInstIter; + list<SMPBasicBlock>::iterator DefBlockIter; + set<DefOrUse, LessDefUse>::iterator UseIter; + set<DefOrUse, LessDefUse>::iterator DefIter; + op_t DefOp, UseOp; + int DefHashValue, UseHashValue; + ea_t PhiUseDefAddr; // for a Phi USE, where was it DEFed? + ea_t UseDefAddr; // for an instruction USE, where was it DEFed? + int SSANum; + bool LocalDef = this->IsLocalName(SearchOp); + + if (SearchOp.type == o_reg) + SearchOp.reg = MDCanonicalizeSubReg(SearchOp.reg); + else + return; // Limit to registers for now + + if (DefAddr < this->MyFunc->GetNumBlocks()) { // found in Phi DEF; DefAddr is block # + // We need to propagate the SignMask to the Phi DEF, and then to the Phi USEs, and + // then we need to propagate the SignMask to the DEF of each Phi USE. It is possible + // that a Phi USE has its DEF in another block's Phi function, so this might recurse. + size_t BlockNum = (size_t) DefAddr; + if (IsMemOperand(SearchOp)) + return; // end recursion + DefBlockIter = this->MyFunc->GetBlockByNum(BlockNum); + set<SMPPhiFunction, LessPhi>::iterator PhiIter = DefBlockIter->FindPhi(SearchOp); + assert(PhiIter != DefBlockIter->GetLastPhi()); + SSANum = PhiIter->GetDefSSANum(); + DefHashValue = HashGlobalNameAndSSA(SearchOp, SSANum); + this->MyFunc->UpdateDefSignMiscInfo(DefHashValue, SignMask); + + size_t UseIndex, UseLimit; + UseLimit = PhiIter->GetPhiListSize(); + for (UseIndex = 0; UseIndex < UseLimit; ++UseIndex) { + int UseSSANum = PhiIter->GetUseSSANum(UseIndex); + UseHashValue = HashGlobalNameAndSSA(SearchOp, UseSSANum); + // Avoid propagation and recursion if no change will be made. + unsigned short OldSignInfo = this->MyFunc->GetUseSignMiscInfo(UseHashValue); + if (OldSignInfo != (OldSignInfo | SignMask)) { // sign info will change + this->MyFunc->UpdateUseSignMiscInfo(UseHashValue, SignMask); + PhiUseDefAddr = this->MyFunc->GetGlobalDefAddr(SearchOp, UseSSANum); + assert(BADADDR != PhiUseDefAddr); + if (PhiUseDefAddr < this->MyFunc->GetNumBlocks()) { + // DEF of current Phi USE is in another Phi function. + DefBlockIter = this->MyFunc->GetBlockByNum((size_t) PhiUseDefAddr); + } + else { // DEF is in an instruction. + DefBlockIter = this->MyFunc->GetBlockFromInstAddr(PhiUseDefAddr); + } + if (PhiUseDefAddr != DefAddr) { // Don't recurse infinitely + DefBlockIter->PropagateBranchSignedness(PhiUseDefAddr, SearchOp, SignMask); // recurse + } + } + } + } + else { // DEF must be in an instruction + if (LocalDef) { // DEF must be in a local instruction + DefInstIter = this->FindInstr(DefAddr); + } + else { // DEF must be in a non-local instruction + DefBlockIter = this->MyFunc->GetBlockFromInstAddr(DefAddr); + // DefBlockIter must be good, else GetBlockFromInstAddr() would assert. + DefInstIter = DefBlockIter->FindInstr(DefAddr); + } + // Now we should have a good DefInstIter for the Inst that DEFs SearchOp. + // Loop through the DEFs and USEs in this Inst and OR in the SignMask for non-memory operands. + for (DefIter = DefInstIter->GetFirstDef(); DefIter != DefInstIter->GetLastDef(); ++DefIter) { + DefOp = DefIter->GetOp(); + if (DefOp.type == o_reg) { + DefOp.reg = MDCanonicalizeSubReg(DefOp.reg); + DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum()); + if ((LocalDef && this->IsLocalName(DefOp)) + || (!LocalDef && DefBlockIter->IsLocalName(DefOp))) { + // DefOp is local to a block, must use block-local maps. + if (LocalDef) { // still within current block + this->UpdateDefSignMiscInfo(DefHashValue, SignMask); + } + else { // DEF in another block; use DefBlockIter + DefBlockIter->UpdateDefSignMiscInfo(DefHashValue, SignMask); + } + } + else { // DefOp is global, use SMPFunction global maps. + this->MyFunc->UpdateDefSignMiscInfo(DefHashValue, SignMask); + } + } + } // end for all DEFs + // A USE in the instruction that DEFed the flags used in our original condition branch + // could be local to the block containing the instruction, else it is a global name. + // We only want to update the sign info of the USE. Propagation to the DEF of that USE + // will happen during the final propagation processing in SMPFunction::PropagateFGInfo(). + for (UseIter = DefInstIter->GetFirstDef(); UseIter != DefInstIter->GetLastDef(); ++UseIter) { + UseOp = UseIter->GetOp(); + if (UseOp.type == o_reg) { + UseOp.reg = MDCanonicalizeSubReg(UseOp.reg); + if (UseOp.is_reg(X86_FLAGS_REG)) { + // don't need to propagate to flags after initial call to this method + continue; // save time + } + int UseSSANum = UseIter->GetSSANum(); + UseHashValue = HashGlobalNameAndSSA(UseOp, UseSSANum); + if (LocalDef) { + if (this->IsLocalName(UseOp)) {// USE must be in a local instruction + this->UpdateUseSignMiscInfo(UseHashValue, SignMask); + } + else { // USE is global name + this->MyFunc->UpdateUseSignMiscInfo(UseHashValue, SignMask); + } + } + else if (DefBlockIter->IsLocalName(UseOp)) { // USE is local to DEF block + DefBlockIter->UpdateUseSignMiscInfo(UseHashValue, SignMask); + } + else { // USE is global name + this->MyFunc->UpdateUseSignMiscInfo(UseHashValue, SignMask); + } + } + else if (MDIsStackAccessOpnd(UseOp, this->MyFunc->UsesFramePointer())) { + // Arithmetic using the stack as a source operand. We need to update + // our stack frame FG info sign inferences, in case we have some + // odd code that never loads from this stack location but uses it + // as a source operand for arithmetic. + // NOTE: Figure out how to do this later. !!!!!!!!! + ; + } + } // end for all USEs + } // end if (Phi DEF) else ... + return; +} // end of SMPBasicBlock::PropagateBranchSignedness() + // For the newly defined type DefType for DefOp at instruction DefAddr, propagate the // type to all USEs in the local SSA chain for the DEF. If any USE types change, // return true. @@ -1499,7 +1815,6 @@ set<SMPPhiFunction, LessPhi>::iterator SMPBasicBlock::InferPhiDefType(set<SMPPhi return ReturnPhi; } // end of SMPBasicBlock::InferPhiDefType() -#if SMP_FULL_LIVENESS_ANALYSIS // Extension of IsRegDead for Global names. Check and see if it overlaps with a DEF-USE chain bool SMPBasicBlock::IsGlobalRegDead(ea_t InstAddr, op_t Operand, unsigned int RegIndex) const { bool FoundInLiveInSet = (LiveInSet.end() != LiveInSet.find(Operand)); @@ -1591,7 +1906,6 @@ bool SMPBasicBlock::IsGlobalRegDead(ea_t InstAddr, op_t Operand, unsigned int Re if (DebugFlag) msg("H"); return true; // no DEF-USE chains overlapped the instrumentation point } // end of SMPBasicBlock::IsGlobalRegDead() -#endif // If no DEF-USE chains overlap the instrumentation point for InstAddr (which logically // falls between the USEs of the instruction at InstAddr and its DEFs), for register RegIndex, @@ -1671,7 +1985,6 @@ void SMPBasicBlock::MarkDeadRegs(void) { } } else { -#if SMP_FULL_LIVENESS_ANALYSIS assert(FlagNameIter != this->MyFunc->GetLastGlobalName()); unsigned int RegIndex = ExtractGlobalIndex(*FlagNameIter); bool newCatch, oldCatch; @@ -1689,17 +2002,7 @@ void SMPBasicBlock::MarkDeadRegs(void) { if (newCatch && oldCatch) msg("Not a new success.\n"); #endif -#else - if (this->AreGlobalFlagsDead(InstAddr, FlagsOp)) { - qstrncat(DeadString, " EFLAGS", sizeof(DeadString) - 1); - } - else { - ; -#if SMP_DEBUG_OPTIMIZATIONS - msg("Global EFLAGS in %s\n", this->MyFunc->GetFuncName()); -#endif - } -#endif // end if SMP_FULL_LIVENESS_ANALYSIS ... else ... + } // Now, process all other local regs and skip EFLAGS. @@ -1726,7 +2029,6 @@ void SMPBasicBlock::MarkDeadRegs(void) { } } // end for all local names -#if SMP_FULL_LIVENESS_ANALYSIS // Now, process all other global regs and skip EFLAGS. //msg(" Done. Analyzing global regs..."); size_t NumGlobals = this->MyFunc->NumGlobalNames(); @@ -1765,7 +2067,6 @@ void SMPBasicBlock::MarkDeadRegs(void) { } } // end for all global names } -#endif //msg("Done. Next instruction.\n"); if (strlen(DeadString) > 1) { @@ -1935,6 +2236,92 @@ set<SMPPhiFunction, LessPhi>::iterator SMPBasicBlock::SetPhiDefMetadata(op_t Def return CurrPhi; } // end of SMPBasicBlock::SetPhiDefMetadata() +// Four methods to get values into the maps of local reg/SSA to FG info. +// For global names, see corresponding methods in SMPFunction. +void SMPBasicBlock::UpdateDefSignMiscInfo(int DefHashValue, unsigned short NewInfo) { + map<int, struct FineGrainedInfo>::iterator MapIter; + pair< map<int, struct FineGrainedInfo>::iterator, bool> MapResult; + + MapIter = this->LocalDefFGInfoBySSA.find(DefHashValue); + if (MapIter == this->LocalDefFGInfoBySSA.end()) { + // Not found; insert first. + struct FineGrainedInfo NewFGInfo; + NewFGInfo.SignMiscInfo = NewInfo; + NewFGInfo.SizeInfo = 0; + pair<int, struct FineGrainedInfo> MapItem(DefHashValue, NewFGInfo); + MapResult = this->LocalDefFGInfoBySSA.insert(MapItem); + assert(MapResult.second); // Was not previously found, insertion must work. + } + else { // found; just OR in the new bits. + MapIter->second.SignMiscInfo |= NewInfo; + } + + return; +} // end of SMPBasicBlock::UpdateDefSignMiscInfo() + +void SMPBasicBlock::UpdateUseSignMiscInfo(int UseHashValue, unsigned short NewInfo) { + map<int, struct FineGrainedInfo>::iterator MapIter; + pair<map<int, struct FineGrainedInfo>::iterator, bool> MapResult; + + MapIter = this->LocalUseFGInfoBySSA.find(UseHashValue); + if (MapIter == this->LocalUseFGInfoBySSA.end()) { + // Not found; insert first. + struct FineGrainedInfo NewFGInfo; + NewFGInfo.SignMiscInfo = NewInfo; + NewFGInfo.SizeInfo = 0; + pair<int, struct FineGrainedInfo> MapItem(UseHashValue, NewFGInfo); + MapResult = this->LocalUseFGInfoBySSA.insert(MapItem); + assert(MapResult.second); // Was not previously found, insertion must work. + } + else { // found; just OR in the new bits. + MapIter->second.SignMiscInfo |= NewInfo; + } + + return; +} // end of SMPBasicBlock::UpdateUseSignMiscInfo() + +void SMPBasicBlock::UpdateDefWidthTypeInfo(int DefHashValue, unsigned short NewInfo) { + map<int, struct FineGrainedInfo>::iterator MapIter; + pair<map<int, struct FineGrainedInfo>::iterator, bool> MapResult; + + MapIter = this->LocalDefFGInfoBySSA.find(DefHashValue); + if (MapIter == this->LocalDefFGInfoBySSA.end()) { + // Not found; insert first. + struct FineGrainedInfo NewFGInfo; + NewFGInfo.SignMiscInfo = 0; + NewFGInfo.SizeInfo = NewInfo; + pair<int, struct FineGrainedInfo> MapItem(DefHashValue, NewFGInfo); + MapResult = this->LocalDefFGInfoBySSA.insert(MapItem); + assert(MapResult.second); // Was not previously found, insertion must work. + } + else { // found; just OR in the new bits. + MapIter->second.SizeInfo |= NewInfo; + } + + return; +} // end of SMPBasicBlock::UpdateDefWidthTypeInfo() + +void SMPBasicBlock::UpdateUseWidthTypeInfo(int UseHashValue, unsigned short NewInfo) { + map<int, struct FineGrainedInfo>::iterator MapIter; + pair<map<int, struct FineGrainedInfo>::iterator, bool> MapResult; + + MapIter = this->LocalUseFGInfoBySSA.find(UseHashValue); + if (MapIter == this->LocalUseFGInfoBySSA.end()) { + // Not found; insert first. + struct FineGrainedInfo NewFGInfo; + NewFGInfo.SignMiscInfo = 0; + NewFGInfo.SizeInfo = NewInfo; + pair<int, struct FineGrainedInfo> MapItem(UseHashValue, NewFGInfo); + MapResult = this->LocalUseFGInfoBySSA.insert(MapItem); + assert(MapResult.second); // Was not previously found, insertion must work. + } + else { // found; just OR in the new bits. + MapIter->second.SizeInfo |= NewInfo; + } + + return; +} // end of SMPBasicBlock::UpdateUseWidthTypeInfo() + // Find consecutive DEFs of the same name within the block, both having // live metadata, that have the same type, making the second DEF // redundant metadata. diff --git a/SMPBasicBlock.h b/SMPBasicBlock.h index b4bcb364cb305fa83f826b1f26c1f82e74a5eba5..9e9ea178f0ba5efd496bccdfd388de2294295164 100644 --- a/SMPBasicBlock.h +++ b/SMPBasicBlock.h @@ -46,9 +46,6 @@ using namespace std; -// Do Full Liveness Analysis? Experimental by KBinswanger -#define SMP_FULL_LIVENESS_ANALYSIS 1 - // Value for a block number before it is initialized. #define SMP_BLOCKNUM_UNINIT (-1) @@ -103,6 +100,18 @@ public: int GetPredPosition(int BlockNum); // What is position # within Preds of BlockNum? set<SMPPhiFunction, LessPhi>::iterator FindPhi(op_t FindOp); set<op_t, LessOp>::iterator FindLocalName(op_t FindOp); + // Given a USE operand and the address of its instr, return DEF from DU chains or Phi func. + set<DefOrUse, LessDefUse>::iterator GetDefFromOpAddr(op_t UseOp, ea_t InstAddr, int SSANum); + set<DefOrUse, LessDefUse>::iterator GetGlobalDefIterFromDefAddr(op_t DefOp, ea_t DefAddr); + ea_t GetDefAddrFromUseAddr(op_t UseOp, ea_t InstAddr, int SSANum, bool LocalName); + list<SMPInstr>::iterator FindInstr(ea_t InstAddr); + + // Four methods to get values from the maps of local reg/SSA to FG info. + // For global names, see corresponding methods in SMPFunction. + unsigned short GetDefSignMiscInfo(int DefHashValue); + unsigned short GetUseSignMiscInfo(int UseHashValue); + unsigned short GetDefWidthTypeInfo(int DefHashValue); + unsigned short GetUseWidthTypeInfo(int UseHashValue); // Set methods inline void SetProcessed(bool flag) { Processed = flag; }; @@ -123,6 +132,13 @@ public: bool SetPhiUseType(op_t DefOp, size_t index, SMPOperandType Type); set<SMPPhiFunction, LessPhi>::iterator SetPhiDefMetadata(op_t DefOp, SMPMetadataType Status); + // Four methods to set values into the maps of local reg/SSA to FG info. + // For global names, see corresponding methods in SMPFunction. + void UpdateDefSignMiscInfo(int DefHashValue, unsigned short NewInfo); + void UpdateUseSignMiscInfo(int UseHashValue, unsigned short NewInfo); + void UpdateDefWidthTypeInfo(int DefHashValue, unsigned short NewInfo); + void UpdateUseWidthTypeInfo(int UseHashValue, unsigned short NewInfo); + // Query methods inline bool IsProcessed(void) const { return Processed; }; inline bool HasIndirectJump(void) const { return IndirectJump; }; @@ -153,10 +169,8 @@ public: void AddToDomFrontier(int); // Add RPO block number to DomFrontier set. void SetLocalNames(void); // Fille the LocalNames member set void SSALocalRenumber(void); // Renumber references to local names -#if SMP_FULL_LIVENESS_ANALYSIS void CreateGlobalChains(void); // Create DEF-USE chains for global names used here bool IsGlobalRegDead(ea_t InstAddr, op_t Operand, unsigned int RegIndex) const; // Is global reg dead at InstAddr? -#endif bool IsRegDead(ea_t InstAddr, unsigned int RegIndex) const; // Is local reg dead at InstAddr? void MarkDeadRegs(void); // Find dead registers for each mmStrata-instrumented instruction bool PropagateLocalDefType(op_t DefOp, SMPOperandType DefType, ea_t DefAddr, int SSANum, bool IsMemOp); // to all uses @@ -169,6 +183,8 @@ public: bool GetLocalDUChainIndWrite(op_t DefOp, int SSANum); // Get IndWrite flag for DefOp chain bool GetGlobalDUChainIndWrite(op_t DefOp, ea_t DefAddr); // Get IndWrite flag for DefOp chain bool IsLastGlobalChain(op_t DefOp, ea_t DefAddr); // DU-chain is last one for global DefOp + void MarkBranchSignedness(void); // If branch at block end is signed/unsigned, propagate to operands that set flags before it. + private: // Data ea_t FirstAddr; @@ -188,6 +204,7 @@ private: ea_t LastFlagsUse; // addr of last instr that USEs the flags list<list<SMPInstr>::iterator> Instrs; + map<ea_t, list<SMPInstr>::iterator> AddrInstMap; list<list<SMPBasicBlock>::iterator> Predecessors; list<list<SMPBasicBlock>::iterator> Successors; // Four sets used in live variable analysis @@ -200,9 +217,17 @@ private: set<SMPPhiFunction, LessPhi> PhiFunctions; // SSA incoming edge phi functions set<op_t, LessOp> LocalNames; // non-global names referenced in this block SMPCompleteDUChains LocalDUChains; // def-use chains for local names -#if SMP_FULL_LIVENESS_ANALYSIS SMPCompleteDUChains GlobalDUChains; // def-use chains for global names -#endif + // NOTE: The GlobalChains.ChainsByName.at(GlobalIndex).DUChains are indexed + // starting at zero. The indices into the DUChains have nothing to do + // with the SSA Numbers, unlike the LocalDUCHains. + + map<int, struct FineGrainedInfo> LocalDefFGInfoBySSA; // map hash of local name & SSANum to DEF FG info. + // NOTE: We are currently limiting this map to registers, not all local names. + + map<int, struct FineGrainedInfo> LocalUseFGInfoBySSA; // map hash of local name & SSANum to USE FG info. + // NOTE: We are currently limiting this map to registers, not all local names. + // Methods bool MDAlreadyKilled(op_t) const; // Was op_t killed by something already in KillSet? @@ -210,6 +235,7 @@ private: set<SMPPhiFunction, LessPhi>::iterator InferPhiDefType(set<SMPPhiFunction, LessPhi>::iterator DefPhi, bool &changed); // infer, propagate to all uses unsigned int GetLocalDUIndex(op_t DefOp, int SSANum); unsigned int GetGlobalDUIndex(op_t DefOp, ea_t DefAddr); + void PropagateBranchSignedness(ea_t DefAddr, op_t SearchOp, unsigned short SignMask); }; #endif diff --git a/SMPDataFlowAnalysis.cpp b/SMPDataFlowAnalysis.cpp index 9897f171fc3c16cb7b9f61ac2f2d9a7038d38f2d..2badfd2f64a2fde1ea68f96178929eb58b5ec9a8 100644 --- a/SMPDataFlowAnalysis.cpp +++ b/SMPDataFlowAnalysis.cpp @@ -84,6 +84,20 @@ const char *RegNames[MAX_IDA_REG + 1] = "MXCSR" }; +const unsigned char RegSizes[MAX_IDA_REG + 1] = + { 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 1, 1, 1, 1, 1, 1, 1, 1, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 8, 8, 8, + 8, 8, 8, 8, 8, 4, 4, 4, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 4 + }; + // Define instruction categories for data flow analysis. SMPitype DFACategory[NN_last+1]; // Define instruction categories for data type analysis. @@ -93,9 +107,24 @@ int SMPTypeCategory[NN_last+1]; bool SMPDefsFlags[NN_last + 1]; bool SMPUsesFlags[NN_last + 1]; +// Hash a global name and SSA number into an int, for use in SMPFunction.GlobalDefAddrBySSA map +int HashGlobalNameAndSSA(op_t DefOp, int SSANum) { + return ((SSANum << 16) | (DefOp.reg)); +} + // Get the size in bytes of the data type of an operand. size_t GetOpDataSize(op_t DataOp) { size_t DataSize; + if (o_reg == DataOp.type) { + DataSize = RegSizes[DataOp.reg]; + if (DataOp.dtyp == dt_word) { + DataSize = 2; +#if 0 + // msg("Found 16-bit register using dtyp field.\n"); +#endif + } + return DataSize; + } switch (DataOp.dtyp) { case dt_byte: DataSize = 1; @@ -118,6 +147,7 @@ size_t GetOpDataSize(op_t DataOp) { DataSize = 12; break; case dt_byte16: + case dt_ldbl: DataSize = 16; break; case dt_fword: @@ -127,7 +157,7 @@ size_t GetOpDataSize(op_t DataOp) { DataSize = 3; break; default: - msg("WARNING: unexpected data type %d in GetOpDataSize() :", DataOp.dtyp); + msg("ERROR: unexpected data type %d in GetOpDataSize() :", DataOp.dtyp); PrintOperand(DataOp); msg("\n"); DataSize = 4; @@ -136,6 +166,66 @@ size_t GetOpDataSize(op_t DataOp) { return DataSize; } // end of GetOpDataSize() +// Return one of the bit width masks for the current operand. +// Pass in DataSize in bytes if known, else pass in DataSize = 0. +unsigned int ComputeOperandBitWidthMask(op_t CurrOp, size_t DataSize) { + unsigned int BitWidthMask = 32; + if (0 == DataSize) + DataSize = GetOpDataSize(CurrOp); + if (4 == DataSize) + BitWidthMask = FG_MASK_BITWIDTH_32; + else if (8 == DataSize) + BitWidthMask = FG_MASK_BITWIDTH_64; + else if (1 == DataSize) + BitWidthMask = FG_MASK_BITWIDTH_8; + else if (2 == DataSize) + BitWidthMask = FG_MASK_BITWIDTH_16; + else if (16 == DataSize) + BitWidthMask = FG_MASK_BITWIDTH_128; + else if (3 == DataSize) + BitWidthMask = FG_MASK_BITWIDTH_24; + else if (6 == DataSize) + BitWidthMask = FG_MASK_BITWIDTH_48; + else if (12 == DataSize) + BitWidthMask = FG_MASK_BITWIDTH_96; + else if (32 == DataSize) + BitWidthMask = FG_MASK_BITWIDTH_256; + else { + msg("ERROR: Unknown DataSize: %d bytes Operand: ", DataSize); + PrintOperand(CurrOp); + msg("\n"); + } + return BitWidthMask; +} // end of ComputeOperandBitWidthMask() + +// Compute largest bit width from a SignMiscInfo bit mask. +size_t LargestBitWidthFromMask(unsigned short WidthTypeInfo) { + unsigned short BitWidthMask = WidthTypeInfo & FG_MASK_BITWIDTH_FIELDS; + size_t LargestWidth = 0; + + // Go from highest bit width to lowest. + if (BitWidthMask & FG_MASK_BITWIDTH_256) + LargestWidth = 256; + else if (BitWidthMask & FG_MASK_BITWIDTH_128) + LargestWidth = 128; + else if (BitWidthMask & FG_MASK_BITWIDTH_96) + LargestWidth = 96; + else if (BitWidthMask & FG_MASK_BITWIDTH_64) + LargestWidth = 64; + else if (BitWidthMask & FG_MASK_BITWIDTH_48) + LargestWidth = 48; + else if (BitWidthMask & FG_MASK_BITWIDTH_32) + LargestWidth = 32; + else if (BitWidthMask & FG_MASK_BITWIDTH_24) + LargestWidth = 24; + else if (BitWidthMask & FG_MASK_BITWIDTH_16) + LargestWidth = 16; + else if (BitWidthMask & FG_MASK_BITWIDTH_8) + LargestWidth = 8; + + return LargestWidth; +} // end of LargestBitWidthFromMask() + // We need to make subword registers equal to their containing registers when we // do comparisons, so that we will realize that register EAX is killed by a prior DEF // of register AL, for example, and vice versa. To keep sets ordered strictly, @@ -266,6 +356,16 @@ void MDExtractAddressFields(op_t MemOp, int &BaseReg, int &IndexReg, ushort &Sca return; } // end of MDExtractAddressFields() +// Is CurrOp a memory operand? +bool IsMemOperand(op_t CurrOp) { + return ((o_mem == CurrOp.type) || (o_displ == CurrOp.type) || (o_phrase == CurrOp.type)); +} + +// MACHINE DEPENDENT: Is CurrOp the flags register? +bool MDIsFlagsReg(op_t CurrOp) { + return ((o_reg == CurrOp.type) && CurrOp.is_reg(X86_FLAGS_REG)); +} + // MACHINE DEPENDENT: Is operand a stack memory access? bool MDIsStackAccessOpnd(op_t CurrOp, bool UseFP) { int BaseReg; @@ -627,6 +727,14 @@ set<DefOrUse, LessDefUse>::iterator DefOrUseSet::FindRef(op_t SearchOp) { return CurrRef; } +// Insert a new DEF or USE; must be new, insert must succeed else we assert. +set<DefOrUse, LessDefUse>::iterator DefOrUseSet::InsertRef(DefOrUse Ref) { + pair<set<DefOrUse, LessDefUse>::iterator, bool> InsertResult; + InsertResult = this->Refs.insert(Ref); + assert(InsertResult.second); + return InsertResult.first; +} + // Set a Def or Use into the list, along with its type. void DefOrUseSet::SetRef(op_t Ref, SMPOperandType Type, int SSASub) { DefOrUse CurrRef(Ref, Type, SSASub); @@ -815,6 +923,11 @@ SMPPhiFunction::SMPPhiFunction(int GlobIndex, const DefOrUse &Def) { return; } +DefOrUse SMPPhiFunction::GetDefCopy(void) const { + DefOrUse DefCopy(this->DefName); + return DefCopy; +} + // Add a phi item to the list void SMPPhiFunction::PushBack(DefOrUse Ref) { this->SubscriptedOps.SetRef(Ref.GetOp(), Ref.GetType(), Ref.GetSSANum()); @@ -960,12 +1073,7 @@ SMPDefUseChain::SMPDefUseChain(void) { } SMPDefUseChain::SMPDefUseChain(op_t Name, ea_t Def) { - if (o_reg == Name.type) { - // We want to map AH, AL, and AX to EAX, etc. throughout our data flow analysis - // and type inference systems. - Name.reg = MDCanonicalizeSubReg(Name.reg); - } - this->SSAName = Name; + this->SetName(Name); this->RefInstrs.push_back(Def); this->IndWrite = false; return; diff --git a/SMPDataFlowAnalysis.h b/SMPDataFlowAnalysis.h index 1fb52fb1a05e1eda6086b1975ad817b20394549d..1d76ded05445122da75fa1a07710ca4e42480cb5 100644 --- a/SMPDataFlowAnalysis.h +++ b/SMPDataFlowAnalysis.h @@ -96,6 +96,12 @@ void PrintOperand(op_t Opnd); // MACHINE DEPENDENT: Could operand be an indirect memory access? bool MDIsIndirectMemoryOpnd(op_t CurrOp, bool UseFP); +// Is CurrOp a memory operand? +bool IsMemOperand(op_t CurrOp); + +// MACHINE DEPENDENT: Is CurrOp the flags register? +bool MDIsFlagsReg(op_t CurrOp); + // MACHINE DEPENDENT: Is operand a stack memory access? bool MDIsStackAccessOpnd(op_t CurrOp, bool UseFP); @@ -108,9 +114,15 @@ void MDExtractAddressFields(op_t MemOp, int &BaseReg, int &IndexReg, ushort &Sca // MACHINE DEPENDENT: Is operand type a known type that we want to analyze? bool MDKnownOperandType(op_t TempOp); +// MACHINE DEPENEDENT: Convert subword register to its enclosing register. ushort MDCanonicalizeSubReg(const ushort Reg1); + +// MACHINE DEPENDENT: Ordering function for register enum values, to use in set containers. bool MDLessReg(const ushort Reg1, const ushort Reg2); +// Hash a global name and SSA number into an int, for use in SMPFunction.GlobalDefAddrBySSA map +int HashGlobalNameAndSSA(op_t DefOp, int SSANum); + // MACHINE DEPENDENT: comparison class to permit sorting of op_t operands. class LessOp { public: @@ -241,21 +253,67 @@ enum SMPMetadataType { DEF_METADATA_PROF_REDUNDANT = 8 // redundant, based on profiler-derived types }; +// Encoding of info for fine-grained analysis of stack frames, integer error analysis, etc. +struct FineGrainedInfo { + unsigned short SizeInfo; // 8-bit, 16-bit, ...128-bit, int, float, string + unsigned short SignMiscInfo; // signed, unsigned, read, written, FP-relative, etc +}; + +// Masks for FineGrainedInfo.SignMiscInfo +#define FG_MASK_SIGNED 1 +#define FG_MASK_UNSIGNED 2 +#define FG_MASK_INCONSISTENT_SIGN 3 +#define FG_MASK_SIGNEDNESS_BITS 3 // better name for some uses +#define FG_MASK_READ 4 +#define FG_MASK_WRITTEN 8 +#define FG_MASK_SP_RELATIVE 16 +#define FG_MASK_FP_RELATIVE 32 +#define FG_MASK_ADDRESS_TAKEN 64 + +// Masks for FineGrainedInfo.SizeInfo +#define FG_MASK_INTEGER 1 +#define FG_MASK_STRING 2 // string data +#define FG_MASK_FLOAT_MMX 4 // floating point or MMX/SSE/packed +// remaining bit fields are the bit widths, e.g. 8 => 8-bit, ... 128 => 128-bit +#define FG_MASK_BITWIDTH_8 8 +#define FG_MASK_BITWIDTH_16 16 +#define FG_MASK_BITWIDTH_32 32 +#define FG_MASK_BITWIDTH_64 64 +#define FG_MASK_BITWIDTH_128 128 +#define FG_MASK_BITWIDTH_256 256 +#define FG_MASK_BITWIDTH_96 512 +#define FG_MASK_BITWIDTH_48 1024 +#define FG_MASK_BITWIDTH_24 2048 +#define FG_MASK_BITWIDTH_FIELDS (8|16|32|64|128|256|512|1024|2048) +// miscellaneous type info +#define FG_MASK_CODEPOINTER 4096 +#define FG_MASK_DATAPOINTER 8192 + +// Return one of the bit width masks above for the current operand. +unsigned int ComputeOperandBitWidthMask(op_t CurrOp, size_t DataSize); + +// Compute largest bit width from a SignMiscInfo bit mask. +size_t LargestBitWidthFromMask(unsigned short WidthTypeInfo); + class DefOrUse { public: // Constructors DefOrUse(void); DefOrUse(op_t Ref, SMPOperandType Type = UNINIT, int SSASub = SMP_SSA_UNINIT); DefOrUse(const DefOrUse &CopyIn); + // Operators DefOrUse &operator=(const DefOrUse &rhs); + // Get methods inline op_t GetOp(void) const { return Operand; }; inline SMPOperandType GetType(void) const { return OpType; }; inline int GetSSANum(void) const { return SSANumber; }; inline SMPMetadataType GetMetadataStatus(void) const { return MetadataStatus; }; + // Query methods. inline bool HasIndirectWrite(void) const { return IndWrite; }; + // Set methods inline void SetSSANum(int Num) { SSANumber = Num; }; void SetType(SMPOperandType Type, const SMPInstr* instr); @@ -263,6 +321,7 @@ public: MetadataStatus = NewStatus; }; void SetIndWrite(bool IndMemWrite); + // Printing methods void Dump(void) const; private: @@ -294,13 +353,16 @@ public: DefOrUseSet(void); // Destructor. ~DefOrUseSet(); + // Get methods - // DefOrUse GetRef(size_t index) const; inline size_t GetSize(void) const { return (size_t) Refs.size(); }; inline set<DefOrUse, LessDefUse>::iterator GetFirstRef(void) { return Refs.begin(); }; inline set<DefOrUse, LessDefUse>::iterator GetLastRef(void) { return Refs.end(); }; set<DefOrUse, LessDefUse>::iterator FindRef(op_t SearchOp); + // Set methods + // Insert a new DEF or USE; must be new, insert must succeed else we assert. + set<DefOrUse, LessDefUse>::iterator InsertRef(DefOrUse Ref); void SetRef(op_t Ref, SMPOperandType Type = UNINIT, int SSASub = SMP_SSA_UNINIT); set<DefOrUse, LessDefUse>::iterator SetSSANum(op_t CurrOp, int NewSSASub); set<DefOrUse, LessDefUse>::iterator SetType(op_t CurrOp, SMPOperandType Type, const SMPInstr* Instr); @@ -311,10 +373,12 @@ public: inline void EraseRef(set<DefOrUse, LessDefUse>::iterator RefIter) { Refs.erase(RefIter); return; } + // Printing methods void Dump(void); + // Analysis methods - bool TypesAgreeNoFlags(void); // Are all types consistent, ignoring flags registers? + bool TypesAgreeNoFlags(void); // Are all types consistent, ignoring flag registers? private: // Data set<DefOrUse, LessDefUse> Refs; // Defined or used operand with type and SSA subscript @@ -325,6 +389,7 @@ class DefOrUseList { public: // Constructors DefOrUseList(void); + // Get methods DefOrUse GetRef(size_t index) const; inline size_t GetSize(void) const { return (size_t) Refs.size(); }; @@ -333,6 +398,7 @@ public: inline vector<DefOrUse>::iterator GetLastRef(void) { return Refs.end(); }; inline int GetRefSSANum(size_t index) const { return Refs.at(index).GetSSANum(); }; inline SMPOperandType GetRefType(size_t index) const { return Refs.at(index).GetType(); }; + // Set methods void SetRef(op_t Ref, SMPOperandType Type = UNINIT, int SSASub = SMP_SSA_UNINIT); void SetSSANum(size_t index, int NewSSASub); @@ -344,16 +410,18 @@ public: void EraseDuplicates(void); // in case SMPInstr::MDFixupDefUseLists() adds duplicate private: // Data - vector<DefOrUse> Refs; // Defined or used operand with type and SSA subscript + vector<DefOrUse> Refs; // Defined or used operand with type, SSA subscript, etc. }; // end class DefOrUseList class SMPPhiFunction { public: // Constructors SMPPhiFunction(int GlobIndex, const DefOrUse &Def); + // Get methods inline int GetIndex(void) const { return index; }; inline size_t GetPhiListSize(void) const { return this->SubscriptedOps.GetSize(); }; + DefOrUse GetDefCopy(void) const; inline DefOrUse GetPhiRef(size_t i) const { return this->SubscriptedOps.GetRef(i); }; inline DefOrUse *GetRefNum(size_t index) { return SubscriptedOps.GetRefNum(index); }; inline vector<DefOrUse>::iterator GetFirstOp(void) { return SubscriptedOps.GetFirstRef(); }; @@ -364,6 +432,7 @@ public: inline SMPOperandType GetDefType(void) const { return DefName.GetType(); }; inline SMPOperandType GetUseType(size_t index) const { return SubscriptedOps.GetRefType(index); }; inline SMPMetadataType GetDefMetadata(void) const { return DefName.GetMetadataStatus(); }; + // Set methods void PushBack(DefOrUse Ref); // add inputs to phi function void SetSSADef(int NewSSASub); @@ -371,6 +440,7 @@ public: void SetDefType(SMPOperandType Type, const SMPInstr* instr); void SetRefType(size_t index, SMPOperandType Type, const SMPInstr* instr); void SetDefMetadata(SMPMetadataType Status); + // Query methods bool HasTypedUses(void); // true ==> at least one USE is not UNINIT type // Printing methods @@ -413,8 +483,11 @@ public: else return BADADDR; }; + inline size_t GetNumUses(void) const { return RefInstrs.size() - 1; }; + // Query methods. inline bool HasIndirectWrite(void) { return IndWrite; }; + // Set methods void SetName(op_t Name); void SetDef(ea_t Def); @@ -438,7 +511,7 @@ public: // Printing methods. void Dump(void); // Data (public for convenience) - vector<SMPDefUseChain> DUChains; // indexed by SSA number + vector<SMPDefUseChain> DUChains; // indexed by SSA number for local chains private: op_t SSAName; // What variable is used in all chains in the array? }; // end class SMPDUChainArray diff --git a/SMPFunction.cpp b/SMPFunction.cpp index 1cfa1a616673ad406e5c2ffc2338472cbfa8eeeb..7c2a71502e4140e0470f78bb2ede3428267d19df 100644 --- a/SMPFunction.cpp +++ b/SMPFunction.cpp @@ -198,11 +198,13 @@ SMPFunction::SMPFunction(func_t *Info, SMPProgram* pgm) { this->SSAStack.clear(); this->LocalVarTable.clear(); this->StackFrameMap.clear(); + this->FineGrainedStackTable.clear(); this->SavedRegLoc.clear(); this->ReturnRegTypes.clear(); this->LiveInSet.clear(); this->LiveOutSet.clear(); this->KillSet.clear(); + this->GlobalDefAddrBySSA.clear(); for (int RegIndex = R_ax; RegIndex <= R_di; ++RegIndex) { this->SavedRegLoc.push_back(0); // zero offset means reg not saved @@ -258,6 +260,52 @@ set<op_t, LessOp>::iterator SMPFunction::GetLastVarKill(void) { return this->KillSet.end(); } +// Four methods to get values from the maps of global reg/SSA to FG info. +// For local names, see corresponding methods in SMPBasicBlock. +unsigned short SMPFunction::GetDefSignMiscInfo(int DefHashValue) { + map<int, struct FineGrainedInfo>::iterator MapIter; + pair<map<int, struct FineGrainedInfo>::iterator, bool> MapResult; + + MapIter = this->GlobalDefFGInfoBySSA.find(DefHashValue); + if (MapIter != this->GlobalDefFGInfoBySSA.end()) + return MapIter->second.SignMiscInfo; + else + return 0; +} // end of SMPFunction::GetDefSignMiscInfo() + +unsigned short SMPFunction::GetUseSignMiscInfo(int UseHashValue) { + map<int, struct FineGrainedInfo>::iterator MapIter; + pair<map<int, struct FineGrainedInfo>::iterator, bool> MapResult; + + MapIter = this->GlobalUseFGInfoBySSA.find(UseHashValue); + if (MapIter != this->GlobalUseFGInfoBySSA.end()) + return MapIter->second.SignMiscInfo; + else + return 0; +} // end of SMPFunction::GetUseSignMiscInfo() + +unsigned short SMPFunction::GetDefWidthTypeInfo(int DefHashValue) { + map<int, struct FineGrainedInfo>::iterator MapIter; + pair<map<int, struct FineGrainedInfo>::iterator, bool> MapResult; + + MapIter = this->GlobalDefFGInfoBySSA.find(DefHashValue); + if (MapIter != this->GlobalDefFGInfoBySSA.end()) + return MapIter->second.SizeInfo; + else + return 0; +} // end of SMPFunction::GetDefWidthTypeInfo() + +unsigned short SMPFunction::GetUseWidthTypeInfo(int UseHashValue) { + map<int, struct FineGrainedInfo>::iterator MapIter; + pair<map<int, struct FineGrainedInfo>::iterator, bool> MapResult; + + MapIter = this->GlobalUseFGInfoBySSA.find(UseHashValue); + if (MapIter != this->GlobalUseFGInfoBySSA.end()) + return MapIter->second.SizeInfo; + else + return 0; +} // end of SMPFunction::GetUseWidthTypeInfo() + // Add a caller to the list of all callers of this function. void SMPFunction::AddCallSource(ea_t addr) { // Convert call instruction address to beginning address of the caller. @@ -272,6 +320,92 @@ void SMPFunction::AddCallSource(ea_t addr) { return; } // end of SMPFunction::AddCallSource() +// Four methods to get values into the maps of global reg/SSA to FG info. +// For local names, see corresponding methods in SMPBasicBlock. +void SMPFunction::UpdateDefSignMiscInfo(int DefHashValue, unsigned short NewInfo) { + map<int, struct FineGrainedInfo>::iterator MapIter; + pair<map<int, struct FineGrainedInfo>::iterator, bool> MapResult; + + MapIter = this->GlobalDefFGInfoBySSA.find(DefHashValue); + if (MapIter == this->GlobalDefFGInfoBySSA.end()) { + // Not found; insert first. + struct FineGrainedInfo NewFGInfo; + NewFGInfo.SignMiscInfo = NewInfo; + NewFGInfo.SizeInfo = 0; + pair<int, struct FineGrainedInfo> MapItem(DefHashValue, NewFGInfo); + MapResult = this->GlobalDefFGInfoBySSA.insert(MapItem); + assert(MapResult.second); // Was not previously found, insertion must work. + } + else { // found; just OR in the new bits. + MapIter->second.SignMiscInfo |= NewInfo; + } + + return; +} // end of SMPFunction::UpdateDefSignMiscInfo() + +void SMPFunction::UpdateUseSignMiscInfo(int UseHashValue, unsigned short NewInfo) { + map<int, struct FineGrainedInfo>::iterator MapIter; + pair<map<int, struct FineGrainedInfo>::iterator, bool> MapResult; + + MapIter = this->GlobalUseFGInfoBySSA.find(UseHashValue); + if (MapIter == this->GlobalUseFGInfoBySSA.end()) { + // Not found; insert first. + struct FineGrainedInfo NewFGInfo; + NewFGInfo.SignMiscInfo = NewInfo; + NewFGInfo.SizeInfo = 0; + pair<int, struct FineGrainedInfo> MapItem(UseHashValue, NewFGInfo); + MapResult = this->GlobalUseFGInfoBySSA.insert(MapItem); + assert(MapResult.second); // Was not previously found, insertion must work. + } + else { // found; just OR in the new bits. + MapIter->second.SignMiscInfo |= NewInfo; + } + + return; +} // end of SMPFunction::UpdateUseSignMiscInfo() + +void SMPFunction::UpdateDefWidthTypeInfo(int DefHashValue, unsigned short NewInfo) { + map<int, struct FineGrainedInfo>::iterator MapIter; + pair<map<int, struct FineGrainedInfo>::iterator, bool> MapResult; + + MapIter = this->GlobalDefFGInfoBySSA.find(DefHashValue); + if (MapIter == this->GlobalDefFGInfoBySSA.end()) { + // Not found; insert first. + struct FineGrainedInfo NewFGInfo; + NewFGInfo.SignMiscInfo = 0; + NewFGInfo.SizeInfo = NewInfo; + pair<int, struct FineGrainedInfo> MapItem(DefHashValue, NewFGInfo); + MapResult = this->GlobalDefFGInfoBySSA.insert(MapItem); + assert(MapResult.second); // Was not previously found, insertion must work. + } + else { // found; just OR in the new bits. + MapIter->second.SizeInfo |= NewInfo; + } + + return; +} // end of SMPFunction::UpdateDefWidthTypeInfo() + +void SMPFunction::UpdateUseWidthTypeInfo(int UseHashValue, unsigned short NewInfo) { + map<int, struct FineGrainedInfo>::iterator MapIter; + pair<map<int, struct FineGrainedInfo>::iterator, bool> MapResult; + + MapIter = this->GlobalUseFGInfoBySSA.find(UseHashValue); + if (MapIter == this->GlobalUseFGInfoBySSA.end()) { + // Not found; insert first. + struct FineGrainedInfo NewFGInfo; + NewFGInfo.SignMiscInfo = 0; + NewFGInfo.SizeInfo = NewInfo; + pair<int, struct FineGrainedInfo> MapItem(UseHashValue, NewFGInfo); + MapResult = this->GlobalUseFGInfoBySSA.insert(MapItem); + assert(MapResult.second); // Was not previously found, insertion must work. + } + else { // found; just OR in the new bits. + MapIter->second.SizeInfo |= NewInfo; + } + + return; +} // end of SMPFunction::UpdateUseWidthTypeInfo() + // Figure out the different regions of the stack frame, and find the // instructions that allocate and deallocate the local variables space // on the stack frame. @@ -1035,6 +1169,7 @@ void SMPFunction::FindOutgoingArgsSize(void) { // Compute the lowest value reached by the stack pointer. list<SMPInstr>::iterator CurrInst; this->MinStackDelta = 20000; // Final value should be negative + unsigned int BitWidthMask; bool DebugFlag = false; #if SMP_DEBUG_STACK_GRANULARITY DebugFlag = (0 == strcmp("error_for_asm", this->GetFuncName())); @@ -1083,6 +1218,7 @@ void SMPFunction::FindOutgoingArgsSize(void) { #endif for (int i = this->MinStackDelta; i < limit; ++i) { struct StackFrameEntry TempEntry; + struct FineGrainedInfo TempFineGrained; TempEntry.VarPtr = NULL; TempEntry.offset = (long) i; TempEntry.Read = false; @@ -1091,6 +1227,9 @@ void SMPFunction::FindOutgoingArgsSize(void) { TempEntry.ESPRelativeAccess = false; TempEntry.EBPRelativeAccess = false; this->StackFrameMap.push_back(TempEntry); + TempFineGrained.SignMiscInfo = 0; + TempFineGrained.SizeInfo = 0; + this->FineGrainedStackTable.push_back(TempFineGrained); } // Fill in the VarPtr fields for each StackFrameMap entry. @@ -1134,13 +1273,16 @@ void SMPFunction::FindOutgoingArgsSize(void) { ea_t offset; size_t DataSize; bool UsedFramePointer; + bool SignedMove; + bool UnsignedMove; if (CurrInst->HasDestMemoryOperand()) { set<DefOrUse, LessDefUse>::iterator CurrDef; for (CurrDef = CurrInst->GetFirstDef(); CurrDef != CurrInst->GetLastDef(); ++CurrDef) { op_t TempOp = CurrDef->GetOp(); if (TempOp.type != o_phrase && TempOp.type != o_displ) continue; - if (this->MDGetStackOffsetAndSize(TempOp, sp_delta, offset, DataSize, UsedFramePointer)) { + if (this->MDGetStackOffsetAndSize(CurrInst, TempOp, sp_delta, offset, DataSize, UsedFramePointer, + SignedMove, UnsignedMove)) { assert(0 <= offset); if (offset >= this->FuncInfo.frsize) continue; // limit processing to outgoing args and locals @@ -1151,12 +1293,25 @@ void SMPFunction::FindOutgoingArgsSize(void) { assert((offset + DataSize) <= this->StackFrameMap.size()); for (int j = 0; j < (int) DataSize; ++j) { this->StackFrameMap[offset + j].Written = true; - if (!UsedFramePointer) + if (!UsedFramePointer) { this->StackFrameMap[offset + j].ESPRelativeAccess = true; - else + } + else { this->StackFrameMap[offset + j].EBPRelativeAccess = true; + } } - } + BitWidthMask = ComputeOperandBitWidthMask(TempOp, DataSize); + this->FineGrainedStackTable.at(offset).SizeInfo |= BitWidthMask; + this->FineGrainedStackTable.at(offset).SignMiscInfo |= FG_MASK_WRITTEN; + if (!UsedFramePointer) { + this->FineGrainedStackTable.at(offset).SignMiscInfo |= FG_MASK_SP_RELATIVE; + } + else { + this->FineGrainedStackTable.at(offset).SignMiscInfo |= FG_MASK_FP_RELATIVE; + } + // We will process the signedness of stores later, so that loads can take precedence + // over stores in determining signedness. + } // end if MDGetStackOffsetAndSize() } // end for all DEFs } // end if DestMemoryOperand @@ -1166,7 +1321,8 @@ void SMPFunction::FindOutgoingArgsSize(void) { op_t TempOp = CurrUse->GetOp(); if (TempOp.type != o_phrase && TempOp.type != o_displ) continue; - if (this->MDGetStackOffsetAndSize(TempOp, sp_delta, offset, DataSize, UsedFramePointer)) { + if (this->MDGetStackOffsetAndSize(CurrInst, TempOp, sp_delta, offset, DataSize, UsedFramePointer, + SignedMove, UnsignedMove)) { assert(0 <= offset); if (offset >= this->FuncInfo.frsize) continue; // limit processing to outgoing args and locals @@ -1182,7 +1338,22 @@ void SMPFunction::FindOutgoingArgsSize(void) { else this->StackFrameMap[offset + j].EBPRelativeAccess = true; } - } + BitWidthMask = ComputeOperandBitWidthMask(TempOp, DataSize); + this->FineGrainedStackTable.at(offset).SizeInfo |= BitWidthMask; + this->FineGrainedStackTable.at(offset).SignMiscInfo |= FG_MASK_READ; + if (!UsedFramePointer) { + this->FineGrainedStackTable.at(offset).SignMiscInfo |= FG_MASK_SP_RELATIVE; + } + else { + this->FineGrainedStackTable.at(offset).SignMiscInfo |= FG_MASK_FP_RELATIVE; + } + if (SignedMove) { + this->FineGrainedStackTable.at(offset).SignMiscInfo |= FG_MASK_SIGNED; + } + else if (UnsignedMove) { + this->FineGrainedStackTable.at(offset).SignMiscInfo |= FG_MASK_UNSIGNED; + } + } // end if MDGetStackOffsetAndSize() } // end for all USEs } // end if SourceMemoryOperand // NOTE: Detect taking the address of stack locations. **!!** @@ -1239,13 +1410,16 @@ void SMPFunction::FindOutgoingArgsSize(void) { } // end of SMPFunction::FindOutgoingArgsSize() // If TempOp reads or writes to a stack location, return the offset (relative to the initial -// stack pointer value) and the size in bytes of the data access. +// stack pointer value) and the size in bytes of the data access. Also return whether the +// access was frame-pointer-relative, and whether signedness can be inferred due to a load +// from the stack being zero-extended or sign-extended. // NOTE: TempOp must be of type o_displ or o_phrase, as no other operand type could be a // stack memory access. // sp_delta is the stack pointer delta of the current instruction, relative to the initial // stack pointer value for the function. // Return true if a stack memory access was found in TempOp, false otherwise. -bool SMPFunction::MDGetStackOffsetAndSize(op_t TempOp, sval_t sp_delta, ea_t &offset, size_t &DataSize, bool &FP) { +bool SMPFunction::MDGetStackOffsetAndSize(list<SMPInstr>::iterator Instr, op_t TempOp, sval_t sp_delta, ea_t &offset, size_t &DataSize, bool &FP, + bool &Signed, bool &Unsigned) { int BaseReg; int IndexReg; ushort ScaleFactor; @@ -1263,6 +1437,9 @@ bool SMPFunction::MDGetStackOffsetAndSize(op_t TempOp, sval_t sp_delta, ea_t &of // Get size of data written DataSize = GetOpDataSize(TempOp); FP = false; + unsigned short opcode = Instr->GetCmd().itype; + Unsigned = (opcode == NN_movzx); + Signed = (opcode == NN_movsx); return true; } else if (this->UseFP && ((BaseReg == R_bp) || (IndexReg == R_bp))) { @@ -1270,12 +1447,87 @@ bool SMPFunction::MDGetStackOffsetAndSize(op_t TempOp, sval_t sp_delta, ea_t &of offset -= this->MinStackDelta; // convert to StackFrameMap index DataSize = GetOpDataSize(TempOp); FP = true; + unsigned short opcode = Instr->GetCmd().itype; + Unsigned = (opcode == NN_movzx); + Signed = (opcode == NN_movsx); return true; } else { return false; } } // end of SMPFunction::MDGetStackOffsetAndSize() + +// Return fine grained stack entry for stack op TempOp from instruction at InstAddr +bool SMPFunction::MDGetFGStackLocInfo(ea_t InstAddr, op_t TempOp, struct FineGrainedInfo &FGEntry) { + int BaseReg; + int IndexReg; + ushort ScaleFactor; + ea_t offset; + int SignedOffset; + + assert((o_displ == TempOp.type) || (o_phrase == TempOp.type)); + MDExtractAddressFields(TempOp, BaseReg, IndexReg, ScaleFactor, offset); + sval_t sp_delta = get_spd(this->GetFuncInfo(), InstAddr); + + SignedOffset = (int) offset; + + if (TempOp.type == o_phrase) { + assert(offset == 0); // implicit zero, as in [esp] ==> [esp+0] + } + if ((BaseReg == R_sp) || (IndexReg == R_sp)) { + // ESP-relative constant offset + offset += sp_delta; // base offsets from entry ESP value + offset -= this->MinStackDelta; // convert to StackFrameMap index + } + else if (this->UseFP && ((BaseReg == R_bp) || (IndexReg == R_bp))) { + offset -= this->FuncInfo.frregs; // base offsets from entry ESP value + offset -= this->MinStackDelta; // convert to StackFrameMap index + } + else { + return false; + } + // We did not return false, so we should have a good offset. Use it to + // pass back the fine grained stack table entry for that offset. + if ((0 > offset) || (offset >= this->FineGrainedStackTable.size())) { + msg("ERROR: FG stack table index out of range in MDGetFGStackLocInfo at %x\n", InstAddr); + FGEntry.SignMiscInfo = 0; + FGEntry.SizeInfo = 0; + } + else { + FGEntry = this->FineGrainedStackTable.at(offset); + } + return true; +} // end of SMPFunction::MDGetFGStackLocInfo() + +// retrieve DEF addr from GlobalDefAddrBySSA or return BADADDR +ea_t SMPFunction::GetGlobalDefAddr(op_t DefOp, int SSANum) { + map<int, ea_t>::iterator DefAddrMapIter; + map<int, ea_t>::iterator MapResult; + ea_t DefAddr = BADADDR; // BADADDR means we did not find it + + int HashedName = HashGlobalNameAndSSA(DefOp, SSANum); + MapResult = this->GlobalDefAddrBySSA.find(HashedName); + if (MapResult != this->GlobalDefAddrBySSA.end()) { // Found it. + DefAddr = (ea_t) MapResult->second; + } + return DefAddr; +} // end of SMPFunction::GetGlobalDefAddr() + +// Retrieve block iterator for InstAddr from InstBlockMap; assert if failure +list<SMPBasicBlock>::iterator SMPFunction::GetBlockFromInstAddr(ea_t InstAddr) { + map<ea_t, list<SMPBasicBlock>::iterator>::iterator MapEntry; + MapEntry = this->InstBlockMap.find(InstAddr); + assert(MapEntry != this->InstBlockMap.end()); + return MapEntry->second; +} + +// Given block # and PhiDef op_t and SSANum, return the Phi iterator or assert. +set<SMPPhiFunction, LessPhi>::iterator SMPFunction::GetPhiIterForPhiDef(size_t BlockNumber, op_t DefOp, int SSANum) { + list<SMPBasicBlock>::iterator DefBlock = this->RPOBlocks.at(BlockNumber); + set<SMPPhiFunction, LessPhi>::iterator PhiIter = DefBlock->FindPhi(DefOp); + assert(PhiIter != DefBlock->GetLastPhi()); + return PhiIter; +} // Is DestOp within the outgoing args area? Assume it must be an ESP-relative // DEF operand in order to be a write to the outgoing args area. @@ -1754,6 +2006,15 @@ void SMPFunction::Analyze(void) { this->SetStackFrameInfo(); } + // We can finally search for stack loads now that UseFP has been fixed by + // GetStackFrameInfo(). Otherwise, we would do this in SMPInstr::Analyze(), + // but the UseFP flag is not ready that early. + list<SMPInstr>::iterator StLoadInstIter = this->Instrs.begin(); + while (StLoadInstIter != this->Instrs.end()) { + StLoadInstIter->MDFindLoadFromStack(this->UseFP); + ++StLoadInstIter; + } + // Audit the call instructions and call targets. if ((!this->AllCallTargets.empty()) || this->UnresolvedIndirectCalls) { bool FoundBadCallTarget = false; @@ -2245,10 +2506,9 @@ void SMPFunction::ComputeSSA(void) { CurrBlock->SSALocalRenumber(); if (DumpFlag) CurrBlock->Dump(); -#if SMP_FULL_LIVENESS_ANALYSIS if (DebugFlag) msg("Computing global chains.\n"); CurrBlock->CreateGlobalChains(); -#endif + #if 1 if (DebugFlag) msg("Marking dead registers.\n"); CurrBlock->MarkDeadRegs(); @@ -2586,7 +2846,7 @@ void SMPFunction::SetLinks(void) { #if SMP_USE_SWITCH_TABLE_INFO if (!(this->HasUnresolvedIndirectJumps() || this->HasUnresolvedIndirectCalls())) { #else - if (!(this->HasIndirectJumps())) { + if (!(this->HasIndirectJumps() || this->HasIndirectCalls())) { #endif bool changed; bool NoPredecessors; @@ -3242,7 +3502,9 @@ int SMPFunction::SSANewNumber(size_t GlobNameIndex) { void SMPFunction::SSARename(int BlockNumber) { assert(0 <= BlockNumber); assert(BlockNumber < this->BlockCount); + list<SMPBasicBlock>::iterator CurrBlock = this->RPOBlocks.at((size_t) BlockNumber); + op_t UseOp, DefOp; bool DumpFlag = false; #if SMP_DEBUG_DATAFLOW_VERBOSE @@ -3250,6 +3512,7 @@ void SMPFunction::SSARename(int BlockNumber) { DumpFlag |= (0 == strcmp("dohanoi", this->GetFuncName())); DumpFlag |= (0 == strcmp("uw_frame_state_for", this->GetFuncName())); #endif + DumpFlag |= (0 == strcmp("_IO_sputbackc", this->GetFuncName())); if (DumpFlag) msg("Entered SSARename for block number %d\n", BlockNumber); @@ -3259,6 +3522,7 @@ void SMPFunction::SSARename(int BlockNumber) { list<SMPPhiFunction> TempPhiList; int GlobalNameIndex; for (CurrPhi = CurrBlock->GetFirstPhi(); CurrPhi != CurrBlock->GetLastPhi(); ++CurrPhi) { + op_t PhiDefOp = CurrPhi->GetAnyOp(); GlobalNameIndex = CurrPhi->GetIndex(); assert(0 <= GlobalNameIndex); int NewSSANum = this->SSANewNumber((size_t) GlobalNameIndex); @@ -3268,9 +3532,19 @@ void SMPFunction::SSARename(int BlockNumber) { TempPhi.SetSSADef(NewSSANum); TempPhiList.push_back(TempPhi); + if (o_reg == PhiDefOp.type) { + if (DumpFlag && DefOp.is_reg(R_ax)) { + msg("New EAX Phi Def SSANum: %d Block %d\n", NewSSANum, BlockNumber); + } + // Map the final SSA number to the block number. + int DefHashValue = HashGlobalNameAndSSA(PhiDefOp, NewSSANum); + pair<int, ea_t> DefMapEntry(DefHashValue, CurrBlock->GetNumber()); + pair<map<int, ea_t>::iterator, bool> MapReturnValue; + MapReturnValue = this->GlobalDefAddrBySSA.insert(DefMapEntry); + assert(MapReturnValue.second); + } } // Go back through the Phi function set and replace the items that need to be updated. - // Thank you g++ for being a pain. list<SMPPhiFunction>::iterator TempIter; for (TempIter = TempPhiList.begin(); TempIter != TempPhiList.end(); ++TempIter) { // Use the op_t from the first phi use, because they are all the same. @@ -3289,7 +3563,8 @@ void SMPFunction::SSARename(int BlockNumber) { set<DefOrUse, LessDefUse>::iterator CurrUse = (*CurrInst)->GetFirstUse(); while (CurrUse != (*CurrInst)->GetLastUse()) { // See if Use is a global name. - set<op_t, LessOp>::iterator GlobIter = this->GlobalNames.find(CurrUse->GetOp()); + UseOp = CurrUse->GetOp(); + set<op_t, LessOp>::iterator GlobIter = this->GlobalNames.find(UseOp); if (GlobIter != this->GlobalNames.end()) { // found it unsigned int GlobIndex = ExtractGlobalIndex(*GlobIter); if (GlobIndex > this->SSAStack.size()) { @@ -3303,7 +3578,7 @@ void SMPFunction::SSARename(int BlockNumber) { if (this->SSAStack.at(GlobIndex).empty()) { // No top of stack entry to read. #if SMP_DEBUG_UNINITIALIZED_SSA_NAMES - if (!(*CurrInst)->MDIsPopInstr() && (o_reg == GlobIter->type)) { + if (!(*CurrInst)->MDIsPopInstr() && (o_reg == UseOp.type)) { // POP uses the stack offset and generates spurious // uninitialized variable messages for [esp+0]. msg("WARNING: function %s : Use of uninitialized variable: ", @@ -3319,18 +3594,36 @@ void SMPFunction::SSARename(int BlockNumber) { else { NewSSANum = this->SSAStack.at(GlobIndex).back(); } - CurrUse = (*CurrInst)->SetUseSSA(CurrUse->GetOp(), NewSSANum); + CurrUse = (*CurrInst)->SetUseSSA(UseOp, NewSSANum); + if (DumpFlag && (o_reg == UseOp.type) && UseOp.is_reg(R_ax)) { + msg("New EAX Use SSANum: %d at %x\n", NewSSANum, (*CurrInst)->GetAddr()); + } } ++CurrUse; } // end for all USEs set<DefOrUse, LessDefUse>::iterator CurrDef = (*CurrInst)->GetFirstDef(); while (CurrDef != (*CurrInst)->GetLastDef()) { // See if Def is a global name. - set<op_t, LessOp>::iterator GlobIter = this->GlobalNames.find(CurrDef->GetOp()); + DefOp = CurrDef->GetOp(); + set<op_t, LessOp>::iterator GlobIter = this->GlobalNames.find(DefOp); if (GlobIter != this->GlobalNames.end()) { // found it unsigned int GlobIndex = ExtractGlobalIndex(*GlobIter); // Set the SSA number for this DEF to the SSANewNumber top of stack - CurrDef = (*CurrInst)->SetDefSSA(CurrDef->GetOp(), this->SSANewNumber(GlobIndex)); + int NewSSANum = this->SSANewNumber(GlobIndex); + CurrDef = (*CurrInst)->SetDefSSA(DefOp, NewSSANum); + if (o_reg == DefOp.type) { + ea_t DefAddr = (*CurrInst)->GetAddr(); + if (DumpFlag && DefOp.is_reg(R_ax)) { + msg("New EAX Def SSANum: %d at %x\n", NewSSANum, DefAddr); + } + + // Map the final SSA number to the DEF address. + int DefHashValue = HashGlobalNameAndSSA(DefOp, NewSSANum); + pair<int, ea_t> DefMapEntry(DefHashValue, DefAddr); + pair<map<int, ea_t>::iterator, bool> MapReturnValue; + MapReturnValue = this->GlobalDefAddrBySSA.insert(DefMapEntry); + assert(MapReturnValue.second); + } } ++CurrDef; } // end for all DEFs @@ -3368,31 +3661,23 @@ void SMPFunction::SSARename(int BlockNumber) { else { CurrSSA = this->SSAStack.at(GlobIndex).back(); // fetch from top of stack } -#if 0 - // g++ is a pain in the neck and won't allow changes to the set item - // through CurrPhi, which it types as a const iterator, so this next line does - // not compile in g++. C++ does not know how to distinguish between changing - // the field that ordering is based on, and other fields, so g++ has to be - // strict, I guess. - CurrPhi->SetSSARef(ListPos, CurrSSA); -#else SMPPhiFunction TempPhi = (*CurrPhi); TempPhi.SetSSARef(ListPos, CurrSSA); TempPhiList.push_back(TempPhi); if (DumpFlag && (BlockNumber >= 3) && (BlockNumber <= 4)) { msg("BlockNumber: %d ListPos: %d\n", BlockNumber, ListPos); } -#endif } // end for all phi functions in successor // Go back through the Phi function set and replace the items that need to be updated. - // Thank you g++ for being a pain. for (TempIter = TempPhiList.begin(); TempIter != TempPhiList.end(); ++TempIter) { +#if 0 if (DumpFlag && (BlockNumber >= 3) && (BlockNumber <= 4)) { msg("Special before phi dump:\n"); set<SMPPhiFunction, LessPhi>::iterator FoundPhi; FoundPhi = (*SuccIter)->FindPhi(TempIter->GetAnyOp()); FoundPhi->Dump(); } +#endif // Use the op_t from the first phi use, because they are all the same. bool Erased = (*SuccIter)->ErasePhi(TempIter->GetPhiRef(0).GetOp()); assert(Erased); @@ -3437,13 +3722,20 @@ void SMPFunction::SSARename(int BlockNumber) { } } // end for all DEFs } // end for all instructions - if (DumpFlag) msg("Popped off entries due to instructions.\n"); + if (DumpFlag) { + msg("Popped off entries due to instructions.\n"); + } return; } // end of SMPFunction::SSARename() // Main driver of SSA subscript renumbering. void SMPFunction::SSARenumber(void) { + bool DumpFlag = false; +#if 1 + DumpFlag |= (0 == strcmp("_IO_sputbackc", this->GetFuncName())); +#endif + if (0 >= this->GlobalNames.size()) return; // no names to renumber @@ -3458,6 +3750,8 @@ void SMPFunction::SSARenumber(void) { // Recurse through the dominator tree starting with node 0. this->SSARename(0); + if (DumpFlag) + this->Dump(); return; } // end of SMPFunction::SSARenumber() @@ -3491,7 +3785,7 @@ void SMPFunction::InferTypes(bool FirstIter) { bool NewChange = false; bool DebugFlag = false; #if SMP_DEBUG_TYPE_INFERENCE - DebugFlag |= (0 == strcmp("InputMove", this->GetFuncName())); + DebugFlag |= (0 == strcmp("__libc_csu_init", this->GetFuncName())); #endif list<SMPInstr>::iterator CurrInst; set<DefOrUse, LessDefUse>::iterator CurrDef; @@ -3509,6 +3803,15 @@ void SMPFunction::InferTypes(bool FirstIter) { msg("SetImmedTypes for inst at %x: %s\n", CurrInst->GetAddr(), CurrInst->GetDisasm()); } CurrInst->SetImmedTypes(this->UseFP); + // Infer signedness, bit width, and other info from the nature of the instruction + // (e.g. loads from stack locations whose signedness has been inferred earlier + // in FindOutGoingArgSize(), or inherently signed arithmetic opcodes like signed + // or unsigned multiplies and divides). + CurrInst->MDSetWidthSignInfo(this->UseFP); + } + // Check for signedness inferences from conditional branches at the end of blocks. + for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) { + CurrBlock->MarkBranchSignedness(); } } @@ -3992,7 +4295,7 @@ bool SMPFunction::ConditionalTypePropagation(void) { // Emit all annotations for the function, including all per-instruction // annotations. -void SMPFunction::EmitAnnotations(FILE *AnnotFile) { +void SMPFunction::EmitAnnotations(FILE *AnnotFile, FILE *InfoAnnotFile) { // Emit annotation for the function as a whole. if (this->StaticFunc) { qfprintf(AnnotFile, "%10x %6d FUNC LOCAL %s ", this->FuncInfo.startEA, @@ -4117,8 +4420,9 @@ void SMPFunction::EmitAnnotations(FILE *AnnotFile) { DeallocTrigger = false; } - if (this->HasGoodRTLs()) { + if (this->HasGoodRTLs() && !this->HasUnresolvedIndirectJumps() && !this->HasSharedChunks()) { CurrInst->EmitTypeAnnotations(this->UseFP, AllocSeen, this->NeedsStackReferent, AnnotFile); + CurrInst->EmitIntegerErrorAnnotations(InfoAnnotFile); } else { CurrInst->EmitAnnotations(this->UseFP, AllocSeen, this->NeedsStackReferent, AnnotFile); diff --git a/SMPFunction.h b/SMPFunction.h index 9d17242c72b9718e05f909da27c6aba25b488786..50ad0907df6c03271e49d38dbbcb523c8a7b4f97 100644 --- a/SMPFunction.h +++ b/SMPFunction.h @@ -80,7 +80,6 @@ enum FuncType { FUNC_UNSAFE = 2 }; - // Class encapsulating all that the SMP static analyzer cares to know // about a function. class SMPFunction { @@ -113,6 +112,21 @@ public: inline const vector<ea_t> GetCallTargets() const { return AllCallTargets; }; bool GetIsSpeculative() { return IsSpeculative; } inline size_t GetNumCallers(void) const { return AllCallSources.size(); }; + bool MDGetFGStackLocInfo(ea_t InstAddr, op_t TempOp, struct FineGrainedInfo &FGEntry); + // Return fine grained stack entry for stack op TempOp from instruction at InstAddr + ea_t GetGlobalDefAddr(op_t DefOp, int SSANum); // retrieve from GlobalDefAddrBySSA or return BADADDR + list<SMPBasicBlock>::iterator GetBlockFromInstAddr(ea_t InstAddr); // retrieve from InstBlockMap or assert + inline list<SMPBasicBlock>::iterator GetBlockByNum(size_t BlockIndex) const { return RPOBlocks.at(BlockIndex); }; + + inline size_t GetNumBlocks(void) const { return Blocks.size(); }; + set<SMPPhiFunction, LessPhi>::iterator GetPhiIterForPhiDef(size_t BlockNumber, op_t DefOp, int SSANum); + // Given block # and PhiDef op_t and SSANum, return the Phi iterator or assert. + // Four methods to get values from the maps of global reg/SSA to FG info. + // For local names, see corresponding methods in SMPBasicBlock. + unsigned short GetDefSignMiscInfo(int DefHashValue); + unsigned short GetUseSignMiscInfo(int UseHashValue); + unsigned short GetDefWidthTypeInfo(int DefHashValue); + unsigned short GetUseWidthTypeInfo(int UseHashValue); // Set methods inline void IncTypedPhiDefs(void) { ++TypedPhiDefs; return; }; @@ -126,9 +140,16 @@ public: inline void SetSpecFuncSafe(bool Status) { SpecSafeFunc = Status; return; }; inline void SetNeedsFrame(bool Status) { NeedsStackReferent = Status; return; }; inline void SetSpecNeedsFrame(bool Status) { SpecNeedsStackReferent = Status; return; }; - void SetIsSpeculative(bool IsS) { IsSpeculative = IsS; } + inline void SetIsSpeculative(bool IsS) { IsSpeculative = IsS; } void AddCallSource(ea_t addr); + // Four methods to get values into the maps of global reg/SSA to FG info. + // For local names, see corresponding methods in SMPBasicBlock. + void UpdateDefSignMiscInfo(int DefHashValue, unsigned short NewInfo); + void UpdateUseSignMiscInfo(int UseHashValue, unsigned short NewInfo); + void UpdateDefWidthTypeInfo(int DefHashValue, unsigned short NewInfo); + void UpdateUseWidthTypeInfo(int UseHashValue, unsigned short NewInfo); + // Query methods inline bool HasIndirectCalls(void) const { return IndirectCalls; }; inline bool HasUnresolvedIndirectCalls(void) const { return UnresolvedIndirectCalls; }; @@ -166,7 +187,7 @@ public: void ResetProcessedBlocks(void); // Set Processed flag to false in all blocks bool ComputeGlobalSets(void); // return true if LiveIn, LiveOut, Kill sets change void Analyze(void); // Analyze all instructions in function - void EmitAnnotations(FILE *AnnotFile); + void EmitAnnotations(FILE *AnnotFile, FILE *InfoAnnotFile); void RPONumberBlocks(void); void SetLinks(void); // Link basic blocks and map instructions to blocks void LiveVariableAnalysis(void); // Perform Live Variable Analysis across all blocks @@ -244,10 +265,21 @@ private: vector<list<int> > SSAStack; // SSA stack of most recent SSA number, indexed by global # vector<struct LocalVar> LocalVarTable; // offset-sorted list of local vars / outgoing args vector<struct StackFrameEntry> StackFrameMap; // memory map of every byte on stack frame + vector<struct FineGrainedInfo> FineGrainedStackTable; // built using opcode analysis, not IDA stack info vector<int> SavedRegLoc; // indexed by reg #; offset from return address of callee-saved reg vector<SMPOperandType> ReturnRegTypes; // indexed by reg #; inferred types upon return + map<int, ea_t> GlobalDefAddrBySSA; // map hash of global name & SSANum to DEF inst addr + // If global DEF for that SSA is found in a Phi function, we use block number instead of inst addr + // Instruction addresses should never overlap block #s, as block #s start at 0 and top out at a few hundred. + // NOTE: We are currently limiting this map to registers, not all global names. + + map<int, struct FineGrainedInfo> GlobalDefFGInfoBySSA; // map hash of global name & SSANum to DEF FG info. + // NOTE: We are currently limiting this map to registers, not all global names. + + map<int, struct FineGrainedInfo> GlobalUseFGInfoBySSA; // map hash of global name & SSANum to USE FG info. + // NOTE: We are currently limiting this map to registers, not all global names. - // Four sets used in live variable analysis + // Three sets used in live variable analysis set<op_t, LessOp> KillSet; // registers killed in this function set<op_t, LessOp> LiveOutSet; // Live-Out registers in this function set<op_t, LessOp> LiveInSet; // registers live in to this function @@ -263,9 +295,9 @@ private: bool WritesToOutgoingArgs(op_t DestOp); // Does DestOp fall within outgoing args area? bool WritesAboveLocalFrame(op_t DestOp); // Is DestOp direct stack write to caller's frame? bool IndexedWritesAboveLocalFrame(op_t DestOp); // Is DestOp direct stack write to caller's frame? - bool MDGetStackOffsetAndSize(op_t TempOp, sval_t sp_delta, ea_t &offset, size_t &DataSize, - bool &FP); // Find any stack memory access in TempOp, return offset, size, and whether - // the Frame Pointer was used. + bool MDGetStackOffsetAndSize(list<SMPInstr>::iterator Instr, op_t TempOp, sval_t sp_delta, ea_t &offset, size_t &DataSize, + bool &FP, bool &Signed, bool &Unsigned); // Find any stack memory access in TempOp, return offset, size, + // whether the Frame Pointer was used and signedness (if sign-extended or zero-extended). bool FindAlloca(void); // true if found evidence of alloca() allocations void MDFindSavedRegs(void); // Fill in SavedRegLoc[] offsets void EmitStackFrameAnnotations(FILE *AnnotFile, list<SMPInstr>::iterator Instr); diff --git a/SMPInstr.cpp b/SMPInstr.cpp index 99a8548ec1d185deb6872898eddc81bba0ebcef5..8284a3e4206aec3596609ef775ba599160f9cd01 100644 --- a/SMPInstr.cpp +++ b/SMPInstr.cpp @@ -253,8 +253,10 @@ SMPInstr::SMPInstr(ea_t addr) { this->SrcMemOp = InitOp; this->DEFMemOp = InitOp; this->USEMemOp = InitOp; + this->MoveSource = InitOp; this->IndirectMemRead = false; this->IndirectMemWrite = false; + this->LoadFromStack = false; this->TypeInferenceComplete = false; this->CategoryInferenceComplete = false; this->BasicBlock = NULL; @@ -720,6 +722,9 @@ bool SMPInstr::MDIsFrameDeallocInstr(bool UseFP, asize_t LocalVarsSize) { bool SMPInstr::MDIsNop(void) const { bool IsNop = false; ushort opcode = this->SMPcmd.itype; + + // NOTE: More examples have arisen, e.g. xchg reg with itself. !!!!!! + if (NN_nop == opcode) IsNop = true; else if (NN_mov == opcode) { @@ -792,6 +797,21 @@ bool SMPInstr::MDIsConditionalMoveInstr(void) const { && (this->SMPcmd.itype <= MD_LAST_COND_MOVE_INSTR)); } +// MACHINE DEPENDENT: Is instruction a conditional jump based on an unsigned condition? +bool SMPInstr::MDIsUnsignedBranch(void) const { + unsigned short opcode = this->SMPcmd.itype; + return ((NN_ja == opcode) || (NN_jae == opcode) || (NN_jb == opcode) || (NN_jbe == opcode) + || (NN_jna == opcode) || (NN_jnae == opcode) || (NN_jnb == opcode) || (NN_jnbe == opcode)); +} + +// MACHINE DEPENDENT: Is instruction a conditional jump based on a signed condition? +bool SMPInstr::MDIsSignedBranch(void) const { + unsigned short opcode = this->SMPcmd.itype; + return ((NN_jg == opcode) || (NN_jge == opcode) || (NN_jl == opcode) || (NN_jle == opcode) + || (NN_jng == opcode) || (NN_jnge == opcode) || (NN_jnl == opcode) || (NN_jnle == opcode) + || (NN_js == opcode) || (NN_jns == opcode)); +} + // MACHINE DEPENDENT: Does instruction use a callee-saved register? bool SMPInstr::MDUsesCalleeSavedReg(void) { set<DefOrUse, LessDefUse>::iterator CurrUse; @@ -1493,8 +1513,12 @@ void SMPInstr::MDFixupDefUseLists(void) { #if 1 if (this->MDIsNop()) { // Clear the DEFs and USEs for no-ops. + // These include machine idioms for no-ops, e.g. mov esi,esi + // or xchg ax,ax or lea esi,[esi]. this->Defs.clear(); this->Uses.clear(); + this->MoveSource = InitOp; + this->OptType = 1; } #endif @@ -1800,6 +1824,170 @@ void SMPInstr::SetImmedTypes(bool UseFP) { return; } // end of SMPInstr::SetImmedTypes() +// Is the instruction a load from the stack? +void SMPInstr::MDFindLoadFromStack(bool UseFP) { + set<DefOrUse, LessDefUse>::iterator UseIter; + op_t UseOp; + + if ((3 == this->OptType) && (this->HasSourceMemoryOperand())) { + // Loads and stores are OptCategory 3. We want only loads from the stack. + for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) { + UseOp = UseIter->GetOp(); + if (MDIsStackAccessOpnd(UseOp, UseFP)) { + this->LoadFromStack = true; + break; + } + } + } + return; +} // end of SMPInstr::MDFindLoadFromStack() + +// Determine if instr is inherently signed load instruction. +// True if sign or zero-extended; pass out mask bits if true. +bool SMPInstr::MDIsSignedLoad(unsigned short &SignMask) { + unsigned short opcode = this->SMPcmd.itype; + if (NN_movzx == opcode) { + SignMask = FG_MASK_UNSIGNED; + } + else if (NN_movsx == opcode) { + SignMask = FG_MASK_SIGNED; + } + else { + return false; + } + return true; +} + +// Infer sign, bit width, other type info for simple cases where all the info needed is +// within the instruction or can be read from the FineGrainedStackTable in the SMPFunction. +// NOTE: Must be called after SSA analysis is complete. +void SMPInstr::MDSetWidthSignInfo(bool UseFP) { + set<DefOrUse, LessDefUse>::iterator UseIter; + set<DefOrUse, LessDefUse>::iterator DefIter; + op_t UseOp, DefOp; + struct FineGrainedInfo FGEntry; + bool ValueWillChange; + unsigned short SignMask, TempSign; + int DefHashValue, UseHashValue; + + // Case 1: Load from stack location. + if (this->IsLoadFromStack()) { + bool success = false; + for (UseIter = this->GetFirstUse(); UseIter != this->GetLastUse(); ++UseIter) { + UseOp = UseIter->GetOp(); + if (MDIsStackAccessOpnd(UseOp, UseFP)) { + // Found the stack location being loaded into a register. Now we need + // to get the sign and width info from the fine grained stack frame + // analysis. + success = this->GetBlock()->GetFunc()->MDGetFGStackLocInfo(this->address, UseOp, FGEntry); + assert(success); + // Now we have signedness info in FGEntry. We need to OR it into the register target of the load. + for (DefIter = this->GetFirstDef(); DefIter != this->GetLastDef(); ++DefIter) { + DefOp = DefIter->GetOp(); + if (o_reg == DefOp.type) { + DefOp.reg = MDCanonicalizeSubReg(DefOp.reg); + TempSign = FGEntry.SignMiscInfo & FG_MASK_SIGNEDNESS_BITS; // Get both sign bit flags + DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum()); + if (this->BasicBlock->IsLocalName(DefOp)) { + this->BasicBlock->UpdateDefSignMiscInfo(DefHashValue, TempSign); + } + else { + this->BasicBlock->GetFunc()->UpdateDefSignMiscInfo(DefHashValue, TempSign); + } + break; // Should be only one register target for stack load, and no flags are set. + } + } + break; // Only concerned with the stack operand + } + } + assert(success); + } // end if this->IsLoadFromStack() + + // Case 2: Loads that are sign-extended or zero-extended imply signed and unsigned, respectively. + // NOTE: If from the stack, they were handled in Case 1, and the signedness of the stack location + // was recorded a long time ago in SMPFunction::FindOutgoingArgsSize(); + else if (this->MDIsSignedLoad(SignMask)) { + DefIter = this->GetFirstDef(); + while (DefIter != this->GetLastDef()) { + // All non-memory DEFs besides the flags register should get the new SignMask ORed in. + // On x86, there should only be one DEF for this move, and no flags, but we will generalize + // in case other architectures are odd. + DefOp = DefIter->GetOp(); + if (!(IsMemOperand(DefOp) || MDIsFlagsReg(DefOp))) { + DefOp.reg = MDCanonicalizeSubReg(DefOp.reg); + DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum()); + if (this->BasicBlock->IsLocalName(DefOp)) { + this->BasicBlock->UpdateDefSignMiscInfo(DefHashValue, SignMask); + } + else { + this->BasicBlock->GetFunc()->UpdateDefSignMiscInfo(DefHashValue, SignMask); + } + } + ++DefIter; + } + + UseIter = this->GetFirstUse(); + while (UseIter != this->GetLastUse()) { + // All non-memory USEs besides the flags register should get the new SignMask ORed in. + UseOp = UseIter->GetOp(); + if (!(IsMemOperand(UseOp) || MDIsFlagsReg(UseOp))) { + UseOp.reg = MDCanonicalizeSubReg(UseOp.reg); + UseHashValue = HashGlobalNameAndSSA(UseOp, UseIter->GetSSANum()); + if (this->BasicBlock->IsLocalName(UseOp)) { + this->BasicBlock->UpdateUseSignMiscInfo(UseHashValue, SignMask); + } + else { + this->BasicBlock->GetFunc()->UpdateUseSignMiscInfo(UseHashValue, SignMask); + } + } + ++UseIter; + } + } + + // Case 3: multiplies and divides are signed or unsigned. + else if (7 == this->OptType) { // Multiplies and divides are type 7. + SignMask = FG_MASK_SIGNED; + if (this->MDIsUnsignedArithmetic()) { + SignMask = FG_MASK_UNSIGNED; + } + DefIter = this->GetFirstDef(); + while (DefIter != this->GetLastDef()) { + // All DEFs besides the flags register should get the new SignMask ORed in. + DefOp = DefIter->GetOp(); + if (!(DefOp.is_reg(X86_FLAGS_REG))) { + DefOp.reg = MDCanonicalizeSubReg(DefOp.reg); + DefHashValue = HashGlobalNameAndSSA(DefOp, DefIter->GetSSANum()); + if (this->BasicBlock->IsLocalName(DefOp)) { + this->BasicBlock->UpdateDefSignMiscInfo(DefHashValue, SignMask); + } + else { + this->BasicBlock->GetFunc()->UpdateDefSignMiscInfo(DefHashValue, SignMask); + } + } + ++DefIter; + } + + UseIter = this->GetFirstUse(); + while (UseIter != this->GetLastUse()) { + // All USEs besides the flags register should get the new SignMask ORed in. + UseOp = UseIter->GetOp(); + if (!(UseOp.is_reg(X86_FLAGS_REG))) { + UseOp.reg = MDCanonicalizeSubReg(UseOp.reg); + UseHashValue = HashGlobalNameAndSSA(UseOp, UseIter->GetSSANum()); + if (this->BasicBlock->IsLocalName(UseOp)) { + this->BasicBlock->UpdateUseSignMiscInfo(UseHashValue, SignMask); + } + else { + this->BasicBlock->GetFunc()->UpdateUseSignMiscInfo(UseHashValue, SignMask); + } + } + ++UseIter; + } + } + + return; +} // end of SMPInstr::MDSetWidthSignInfo() + // Infer DEF, USE, and RTL SMPoperator types within the instruction based on the type // of operator, the type category of the instruction, and the previously known types // of the operands. @@ -3280,7 +3468,6 @@ void SMPInstr::EmitTypeAnnotations(bool UseFP, bool AllocSeen, bool NeedsFrame, } // end switch (OptType) - // always annotate stack constants for the profiler, etc. this->AnnotateStackConstants(UseFP, AnnotFile); @@ -3323,6 +3510,76 @@ void SMPInstr::EmitTypeAnnotations(bool UseFP, bool AllocSeen, bool NeedsFrame, return; } // end of SMPInstr::EmitTypeAnnotations() + +// We don't worry about truncation unless we see operands less than normal width. +// This would change if we port to 64-bit machines. +#define MD_NORMAL_MACHINE_BITWIDTH 32 + +// emit check annotations for signedness, overflow, truncation, etc. +void SMPInstr::EmitIntegerErrorAnnotations(FILE *InfoAnnotFile) { + set<DefOrUse, LessDefUse>::iterator UseIter, DefIter; + op_t UseOp, DefOp; + unsigned short UseWidthInfo, DefWidthInfo; + size_t UseBitWidth, DefBitWidth; + ea_t DefAddr; + int UseHashValue, DefHashValue; + + // Case 1: Overflow on addition. + + // Case 2: Underflow on subtraction. + + // Case 3: Overflow on multiplication with upper bits discarded. + + // Case 4: Signedness error on move. + + // Case 5: Truncation error on move. + if (3 == this->OptType) { + // Get data width of source operand. + UseOp = this->GetMoveSource(); + UseIter = this->FindUse(UseOp); + assert(UseIter != this->GetLastUse()); + UseBitWidth = 8 * GetOpDataSize(UseOp); + if ((UseBitWidth < MD_NORMAL_MACHINE_BITWIDTH) && (o_reg == UseOp.type)) { + // Now, the question is: Are we storing fewer bits than + // we were using in our computations in this DEF-USE chain? + // E.g. if we computed using 32 bits and then only store 16, + // we have potential truncation error. But if we computed + // using 16 bits all along, we have already checked for 16-bit + // overflows on arithmetic in the DU chain and there can be no + // truncation on this store. + op_t SearchOp = UseOp; + // Canonicalize sub-regs for searching DEFs and USEs. + SearchOp.reg = MDCanonicalizeSubReg(UseOp.reg); + UseHashValue = HashGlobalNameAndSSA(SearchOp, UseIter->GetSSANum()); + + if (this->BasicBlock->IsLocalName(SearchOp)) { + // Local name, find in basic block maps. + DefWidthInfo = this->BasicBlock->GetDefWidthTypeInfo(UseHashValue); + } + else { // Global name, find in global maps. + DefWidthInfo = this->BasicBlock->GetFunc()->GetDefWidthTypeInfo(UseHashValue); + } + + DefBitWidth = LargestBitWidthFromMask(DefWidthInfo); + + if (DefBitWidth > UseBitWidth) { + // Original DEF was wider than what we are storing now. + if ((o_reg == UseOp.type) && (o_reg == DefOp.type)) { + unsigned short DefReg = DefOp.reg; + unsigned short UseReg = UseOp.reg; + qfprintf(InfoAnnotFile, "%10x %6d INSTR CHECK TRUNCATION %d %s %d %s ZZ %s \n", + this->address, this->SMPcmd.size, DefBitWidth, RegNames[DefReg], UseBitWidth, + RegNames[UseReg], disasm); + } + else { + msg("ERROR: Non-reg operands in TRUNCATION annotation.\n"); + } + } + } + } // end of case 5, (3 == OptType) checking for TRUNCATION + return; +} // end of SMPInstr::EmitIntegerErrorAnnotations() + // Go through the PUSH RTL and get the operand pushed. op_t SMPInstr::GetPushedOpnd(void) { op_t VoidOp = InitOp; @@ -3965,6 +4222,8 @@ bool SMPInstr::BuildUnary2OpndRTL(SMPoperator UnaryOp) { SMPRegTransfer *TempRT = new SMPRegTransfer; SMPRegTransfer *RightRT = new SMPRegTransfer; int opcode = this->SMPcmd.itype; + bool ExtendedMove = ((NN_movsx == opcode) || (NN_movzx == opcode)); + op_t VoidOp = InitOp; op_t FlagsOp = InitOp; @@ -4025,6 +4284,8 @@ bool SMPInstr::BuildUnary2OpndRTL(SMPoperator UnaryOp) { } else { RightRT->SetLeftOperand(TempOp); + if (ExtendedMove) + this->MoveSource = TempOp; } } } @@ -4143,7 +4404,7 @@ bool SMPInstr::BuildMoveRTL(SMPoperator GuardOp) { this->GetDisasm()); } else { // IDA incorrectly lists [EDI] as both DEF and USE, because reg EDI - // is both DEF and USE. + // is both DEF and USE in NN_stos. msg("WARNING: MemDest and MemSrc in move at %x for %s\n", this->GetAddr(), this->GetDisasm()); } @@ -4273,6 +4534,7 @@ bool SMPInstr::BuildMoveRTL(SMPoperator GuardOp) { if (!MemSrc || MemDest || ((TempOp.type >= o_mem) && (TempOp.type <= o_displ))) { SourceFound = true; TempRT->SetRightOperand(TempOp); + this->MoveSource = TempOp; } } if (this->features & UseMacros[OpNum]) { diff --git a/SMPInstr.h b/SMPInstr.h index d1a3245032fc6c6b68eab695e29a1663aec2c6f2..194dbcc43d1f287fecfb9598c2aa7edffaa49113 100644 --- a/SMPInstr.h +++ b/SMPInstr.h @@ -210,6 +210,7 @@ public: inline SMPitype GetDataFlowType(void) const { return type; }; op_t MDGetMemUseOp(void); op_t MDGetMemDefOp(void); + inline op_t GetMoveSource(void) const { return MoveSource; }; // Set methods inline void SetBlock(SMPBasicBlock *Block) { BasicBlock = Block; }; @@ -264,6 +265,10 @@ public: bool MDIsFrameAllocInstr(void); bool MDIsFrameDeallocInstr(bool UseFP, asize_t LocSize); bool MDUsesCalleeSavedReg(void); + inline bool MDIsUnsignedArithmetic(void) const { return ((NN_mul == SMPcmd.itype) || (NN_div == SMPcmd.itype)); }; + bool MDIsUnsignedBranch(void) const; + bool MDIsSignedBranch(void) const; + inline bool IsLoadFromStack(void) const { return LoadFromStack; }; inline bool HasFlagsDef(void) const { return DefsFlags; }; inline bool HasFlagsUse(void) const { return UsesFlags; }; bool AllDEFsTyped(void); // No DEF is UNINIT @@ -281,17 +286,22 @@ public: void AnalyzeMarker(void); // Fill in basic data for top of function pseudo-instruction. void AnalyzeCallInst(ea_t FirstFuncAddr, ea_t LastFuncAddr); // Detect pseudo-calls void AnalyzeIndirectRefs(bool UseFP); // Detect indirect memory operands + void MDFindLoadFromStack(bool UseFP); // determine if instruction is load from stack loc + bool MDIsSignedLoad(unsigned short &SignMask); // true if sign or zero-extended; pass out mask bits bool BuildRTL(void); // Build RTL trees; return true if successfully built. void SyncAllRTs(void); // calls SyncRTLDefUse() for all RTs in RTL op_t GetPushedOpnd(void); // Extract source operand from PUSH RTL int MDGetImmedUse(void); // Get immed value from USE list of inst void SetImmedTypes(bool UseFP); // type all immediate operands as NUMERIC, CODEPTR, GLOBALPTR // and set other context-free types (ESP == STACKPTR, etc.) + void MDSetWidthSignInfo(bool UseFP); // Infer sign, bit width, etc. in simple cases within one instr + bool InferTypes(void); // return true if any DEF-USE or RTL operator types were updated. void AnnotateStackConstants(bool UseFP, FILE *AnnotFile); void EmitAnnotations(bool UseFP, bool AllocSeen, bool NeedsFrame, FILE *AnnotFile); // No RTLs available void EmitTypeAnnotations(bool UseFP, bool AllocSeen, bool NeedsFrame, FILE *AnnotFile); // Use RTL types void EmitSafeReturn(FILE *AnnotFile); // emit annotation to denote that the return belongs to a safe function. + void EmitIntegerErrorAnnotations(FILE *InfoAnnotFile); // emit check annotations for signedness, overflow, truncation, etc. void UpdateMemLoadTypes(SMPOperandType newType); private: @@ -321,12 +331,19 @@ private: op_t AddSubSourceOp; // operand corresponding to AddSubSourceType SMPOperandType AddSubUseType; // type of USE that is also DEFed by add /sub op_t AddSubUseOp; // operand corresponding to AddSubUseType + + // !!!! Look at redundancy and lack of caching of the four members below and optimize !!! op_t DestMemOp; // memory DEF operand from SMPcmd.Opnd list, if any op_t SrcMemOp; // memory USE operand from SMPcmd.Opnd list, if any op_t DEFMemOp; // memory DEF list opnd, if any op_t USEMemOp; // memory USE list opnd, if any + + op_t MoveSource; // source operand for any move instruction, including + // zero-extended or sign-extended. + bool IndirectMemWrite; // Writes through indirect memory operand bool IndirectMemRead; // Reads through indirect memory operand + bool LoadFromStack; // memory load from stack location bool TypeInferenceComplete; // All types have been resolved // For some type categories, inference just based on the category is done one time // only, so this is a variable that is set to true after the first inference diff --git a/SMPProgram.cpp b/SMPProgram.cpp index 66124a217abfe484ae565c13deec26e6d8213268..6c94467102784636f67dba2f2ee2b3bbcf3f3d4f 100644 --- a/SMPProgram.cpp +++ b/SMPProgram.cpp @@ -327,9 +327,10 @@ void SMPProgram::AnalyzeData(void) { } // end of SMPProgram::AnalyzeData() // Notification from ProfilerInformation that granularity inference is complete. -void SMPProgram::ProfGranularityFinished(FILE *AnnotFile) { +void SMPProgram::ProfGranularityFinished(FILE *AnnotFile, FILE *InfoAnnotFile) { this->ProfilerGranularityComplete = true; this->AnnotationFile = AnnotFile; + this->InfoAnnotationFile = InfoAnnotFile; return; } @@ -421,7 +422,7 @@ void SMPProgram::Analyze(ProfilerInformation *pi) { // cannot presently apply type inference info to the global data table. If we // want to do that in the future, we will have to redesign. assert(true == this->ProfilerGranularityComplete); - this->EmitDataAnnotations(this->AnnotationFile); + this->EmitDataAnnotations(this->AnnotationFile, this->InfoAnnotationFile); this->GlobalVarTable.clear(); this->GlobalNameMap.clear(); @@ -505,9 +506,9 @@ void SMPProgram::Analyze(ProfilerInformation *pi) { #if SMP_INFER_TYPES #if SMP_USE_SWITCH_TABLE_INFO - if (CurrFunc->HasGoodRTLs() && !CurrFunc->HasUnresolvedIndirectJumps()) { + if (CurrFunc->HasGoodRTLs() && !CurrFunc->HasUnresolvedIndirectJumps() && !CurrFunc->HasSharedChunks()) { #else - if (CurrFunc->HasGoodRTLs() && !CurrFunc->HasIndirectJumps()) { + if (CurrFunc->HasGoodRTLs() && !CurrFunc->HasIndirectJumps() && !CurrFunc->HasSharedChunks()) { #endif #if SMP_DEBUG_OPTIMIZATIONS msg("Inferring types for function %s\n", CurrFunc->GetFuncName()); @@ -548,7 +549,7 @@ void SMPProgram::Analyze(ProfilerInformation *pi) { } // end of SMPProgram::Analyze() // Emit global data annotations. -void SMPProgram::EmitDataAnnotations(FILE *AnnotFile) { +void SMPProgram::EmitDataAnnotations(FILE *AnnotFile, FILE *InfoAnnotFile) { // Emit global static data annotations first. map<ea_t, struct GlobalVar>::iterator GlobalIter; for (GlobalIter = this->GlobalVarTable.begin(); GlobalIter != this->GlobalVarTable.end(); ++GlobalIter) { @@ -637,7 +638,7 @@ void SMPProgram::EmitDataAnnotations(FILE *AnnotFile) { } // Emit all annotations for the program. -void SMPProgram::EmitAnnotations(FILE *AnnotFile) { +void SMPProgram::EmitAnnotations(FILE *AnnotFile, FILE *InfoAnnotFile) { long TotalSafeBlocks = 0; // basic blocks with no unsafe writes long TotalUnsafeBlocks = 0; // basic blocks with unsafe writes @@ -646,7 +647,7 @@ void SMPProgram::EmitAnnotations(FILE *AnnotFile) { for (FuncIter = this->FuncMap.begin(); FuncIter != this->FuncMap.end(); ++FuncIter) { SMPFunction *TempFunc = FuncIter->second; if (TempFunc == NULL) continue; - TempFunc->EmitAnnotations(AnnotFile); + TempFunc->EmitAnnotations(AnnotFile, InfoAnnotFile); TotalSafeBlocks += TempFunc->GetSafeBlocks(); TotalUnsafeBlocks += TempFunc->GetUnsafeBlocks(); } // end for all functions diff --git a/SMPProgram.h b/SMPProgram.h index 5d911ce3fa9cbe9bcd1a312c15fd93943ced4f84..62b9aabb9fa68bf6d1ef21c31d007e95afeeee75 100644 --- a/SMPProgram.h +++ b/SMPProgram.h @@ -113,15 +113,15 @@ public: } inline ProfilerInformation *GetProfInfo(void) { return ProfInfo; }; // Set methods - void ProfGranularityFinished(FILE *AnnotFile); // notification from ProfilerInformation + void ProfGranularityFinished(FILE *AnnotFile, FILE *InfoAnnotFile); // notification from ProfilerInformation // Query methods // Printing methods void Dump(void); // debug dump // Analysis methods void AnalyzeData(void); // Analyze static data in the program. void Analyze(ProfilerInformation* pi); // Analyze all functions in the program - void EmitDataAnnotations(FILE *AnnotFile); // Emit annotations for global data - void EmitAnnotations(FILE *AnnotFile); // Emit annotations for all functions + void EmitDataAnnotations(FILE *AnnotFile, FILE *InfoAnnotFile); // Emit annotations for global data + void EmitAnnotations(FILE *AnnotFile, FILE *InfoAnnotFile); // Emit annotations for all functions SMPFunction* FindFunction(ea_t addr) { return FuncMap[addr]; } private: @@ -129,6 +129,7 @@ private: bool ProfilerGranularityComplete; // Profiler-based granularity inference complete ProfilerInformation *ProfInfo; FILE *AnnotationFile; // need to store temporarily to pass to EmitDataAnnotations + FILE *InfoAnnotationFile; // need to store temporarily to pass to EmitDataAnnotations map<ea_t, SMPFunction *> FuncMap; // all functions in the program map<ea_t, struct GlobalVar> GlobalVarTable; // all global static variables map<string, ea_t> GlobalNameMap; // map global name to address diff --git a/SMPStaticAnalyzer.cpp b/SMPStaticAnalyzer.cpp index 28361f70f9f00db662eb488473549041617b60c0..fdc49959fe4e4040b137ec55dd4627beeb562246 100644 --- a/SMPStaticAnalyzer.cpp +++ b/SMPStaticAnalyzer.cpp @@ -230,7 +230,8 @@ void IDAP_term(void) { } void IDAP_run(int arg) { - FILE *SymsFile; + FILE *AnnotFile; + FILE *InfoAnnotFile; #if SMP_DEBUG msg("Beginning IDAP_run.\n"); @@ -241,9 +242,12 @@ void IDAP_run(int arg) { // Open the output file. ssize_t FileLen; FileLen = get_root_filename(RootFileName, sizeof(RootFileName) - 1); - string SymsFileName(RootFileName); + string AnnotFileName(RootFileName); string FileSuffix(".annot"); - SymsFileName += FileSuffix; + AnnotFileName += FileSuffix; + string InfoAnnotFileName(RootFileName); + string InfoFileSuffix(".infoannot"); + InfoAnnotFileName += InfoFileSuffix; ea_t RecentAddr; #if SMP_DEBUG_CODE_ORPHANS @@ -265,11 +269,17 @@ void IDAP_run(int arg) { CurrProg->AnalyzeData(); // Analyze static data in the executable // read the Profiler generated information into a new prof_info class - ProfilerInformation *prof_info = new ProfilerInformation(SymsFileName.c_str(), CurrProg); + ProfilerInformation *prof_info = new ProfilerInformation(AnnotFileName.c_str(), CurrProg); - SymsFile = qfopen(SymsFileName.c_str(), "w"); - if (NULL == SymsFile) { - error("FATAL ERROR: Cannot open output file %s\n", SymsFileName.c_str()); + AnnotFile = qfopen(AnnotFileName.c_str(), "w"); + if (NULL == AnnotFile) { + error("FATAL ERROR: Cannot open output file %s\n", AnnotFileName.c_str()); + delete prof_info; + return; + } + InfoAnnotFile = qfopen(InfoAnnotFileName.c_str(), "w"); + if (NULL == InfoAnnotFile) { + error("FATAL ERROR: Cannot open output file %s\n", InfoAnnotFileName.c_str()); delete prof_info; return; } @@ -309,9 +319,9 @@ void IDAP_run(int arg) { prof_info->GetMemoryAccessInfo()->InferDataGranularity(); msg("Returned from InferDataGranularity\n"); } - CurrProg->ProfGranularityFinished(SymsFile); + CurrProg->ProfGranularityFinished(AnnotFile, InfoAnnotFile); CurrProg->Analyze(prof_info); - CurrProg->EmitAnnotations(SymsFile); + CurrProg->EmitAnnotations(AnnotFile, InfoAnnotFile); #if SMP_DEBUG_CODE_ORPHANS RecentAddr = BADADDR; @@ -337,7 +347,7 @@ void IDAP_run(int arg) { RecentAddr = seg->startEA; #endif if (seg->type == SEG_CODE) - FindOrphanedCode(seg, SymsFile); + FindOrphanedCode(seg, AnnotFile); } #endif @@ -346,13 +356,14 @@ void IDAP_run(int arg) { OptType, OptCount[OptType], AnnotationCount[OptType]); } - qfclose(SymsFile); + qfclose(AnnotFile); + qfclose(InfoAnnotFile); delete CurrProg; delete prof_info; return; } // end IDAP_run() -char IDAP_comment[] = "UVa SMP/NICECAP Project"; +char IDAP_comment[] = "ZephyrSoftware STARS (Static Analyzer for Reliability and Security)"; char IDAP_help[] = "Good luck"; char IDAP_name[] = "SMPStaticAnalyzer"; char IDAP_hotkey[] = "Alt-J"; @@ -1598,7 +1609,8 @@ void InitOptCategory(void) { // Category 1 instructions never need updating of their memory // metadata by the Memory Monitor SDT. Currently, this is because // these instructions only have effects on registers we do not maintain - // metadata for, such as the EIP and the FLAGS, e.g. jumps, compares. + // metadata for, such as the EIP and the FLAGS, e.g. jumps, compares, + // or because they are no-ops, including machine-dependent no-op idioms. // Category 2 instructions always have a result type of 'n' (number). // Category 3 instructions have a result type of 'n' (number) // whenever the second source operand is an operand of type 'n'.