Newer
Older
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
}
break;
}
case 6: // Only OS code should include these; problem for SDT
{ if (MemDest) {
SDTInstrumentation = true;
break; // treat as category 0
}
qfprintf(AnnotFile, "%x %d INSTR LOCAL AlwaysPTR %s \n",
addr, -OptType, disasm);
++AnnotationCount[OptType];
break;
}
case 8: // Implicitly writes to EDX:EAX, always numeric.
{ qfprintf(AnnotFile, "%x %d INSTR LOCAL n EDX EAX ZZ %s %s \n",
addr, -2, OptExplanation[OptType], disasm);
++AnnotationCount[OptType];
SDTInstrumentation = true;
break;
}
case 9: // Either writes to FP reg (cat. 1) or memory (cat. 0)
{ if (MemDest) {
#if SMP_DEBUG
// MemDest seems to happen too much.
msg("Floating point MemDest: %s \n", disasm);
#endif
SDTInstrumentation = true;
break; // treat as category 0
}
qfprintf(AnnotFile, "%x %d INSTR LOCAL %s %s \n",
addr, -1, OptExplanation[OptType], disasm);
++AnnotationCount[OptType];
break;
}
default: // 2,3,7: Optimization possibilities depend on operands
{
#if SMP_DEBUG2
if (OptType == 3) { // MOV instr class
if (MemDest) {
msg("MemDest on MOV: %s\n", disasm);
}
else if (!SecondSrcOperandNum) {
msg("MOV: not 2nd op numeric: %s\n", disasm);
this->PrintOperands();
}
}
#endif
SDTInstrumentation = true;
if (MemDest) {
#if SMP_DEBUG_XOR
if (OptType == 2)
msg("MemDest on OptType 2: %s\n", disasm);
#endif
break; // treat as category 0
}
if ((OptType == 2) || (OptType == 7) || SecondSrcOperandNum) {
qfprintf(AnnotFile, "%x %d INSTR LOCAL n %s %s %s \n",
addr, -2, this->DestString(OptType),
OptExplanation[OptType], disasm);
++AnnotationCount[OptType];
}
break;
}
} // end switch (OptType)
// If mmStrata is going to have to deal with the
// instruction, then we can annotate EBP and ESP
// relative constant offsets. If we have emitted
// an annotation of type -1, there is no point
// in telling mmStrata about these constants.
if (SDTInstrumentation) {
this->AnnotateStackConstants(UseFP, AnnotFile);
}
return;
} // end of SMPInstr::EmitAnnotations()
// *****************************************************************
// Class SMPBasicBlock
// *****************************************************************
// Constructor
SMPBasicBlock::SMPBasicBlock(list<SMPInstr>::iterator First, list<SMPInstr>::iterator Last) {
this->IndirectJump = false;
this->Returns = false;
this->SharedTailChunk = false;
this->BlockNum = SMP_BLOCKNUM_UNINIT;
this->FirstAddr = First->GetAddr();
list<SMPInstr>::iterator CurrInst = First;
while (CurrInst != Last) {
this->Instrs.push_back(CurrInst);
++CurrInst;
}
this->Instrs.push_back(CurrInst); // Add last instruction
}
// Get address of first instruction in the block.
ea_t SMPBasicBlock::GetFirstAddr(void) const {
return this->FirstAddr;
}
// Equality operator for SMPBasicBlock. Key field is address of first instruction.
int SMPBasicBlock::operator==(const SMPBasicBlock &rhs) const {
if (rhs.GetFirstAddr() != this->FirstAddr)
return 0;
else
return 1;
}
// Link to predecessor block.
void SMPBasicBlock::LinkToPred(list<SMPBasicBlock>::iterator Predecessor) {
this->Predecessors.push_back(Predecessor);
return;
}
// Link to successor block.
void SMPBasicBlock::LinkToSucc(list<SMPBasicBlock>::iterator Successor) {
this->Successors.push_back(Successor);
return;
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
// See if all predecessors have set their ordering number.
bool SMPBasicBlock::AllPredecessorsNumbered(void) {
list<list<SMPBasicBlock>::iterator>::iterator CurrPred;
for (CurrPred = this->Predecessors.begin(); CurrPred != this->Predecessors.end(); ++CurrPred) {
// Don't count current block, in case we have a one-block loop with this block
// as its own predecessor.
if (**CurrPred == *this)
continue;
if ((*CurrPred)->GetNumber() == SMP_BLOCKNUM_UNINIT)
return false;
}
return true;
} // end of SMPBasicBlock::AllPredecessorsNumbered()
// Are all instructions in the block no-ops?
bool SMPBasicBlock::AllNops(void) {
size_t NopCount = 0;
size_t GoodCount = 0; // non-nop instructions
list<list<SMPInstr>::iterator>::iterator CurrInst;
for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) {
if ((*CurrInst)->MDIsNop())
++NopCount;
else
++GoodCount;
}
return ((0 == GoodCount) && (0 < NopCount));
} // end of SMPBasicBlock::AllNops()
// Analyze basic block and fill data members.
void SMPBasicBlock::Analyze() {
if (Instrs.back()->GetDataFlowType() == INDIR_JUMP) {
else if (Instrs.back()->MDIsReturnInstr()) {
this->Returns = true;
}
} // end of SMPBasicBlock::Analyze()
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
// DEBUG dump of block
void SMPBasicBlock::Dump(void) {
msg("Dump of basic block %d\n", this->BlockNum);
// Dump dataflow analysis sets and links before dumping instructions.
list<list<SMPBasicBlock>::iterator>::iterator CurrLink;
msg("Predecessors: ");
for (CurrLink = this->Predecessors.begin(); CurrLink != this->Predecessors.end(); ++CurrLink) {
msg("%d ", (*CurrLink)->GetNumber());
}
msg("\n");
msg("Successors: ");
for (CurrLink = this->Successors.begin(); CurrLink != this->Successors.end(); ++CurrLink) {
msg("%d ", (*CurrLink)->GetNumber());
}
msg("\n");
set<op_t, LessOp>::iterator SetItem;
msg("VarKill set: ");
for (SetItem = this->KillSet.begin(); SetItem != this->KillSet.end(); ++SetItem) {
PrintOneOperand(*SetItem, 0, -1);
}
msg("\n");
msg("UpExposed set: ");
for (SetItem = this->UpExposedSet.begin(); SetItem != this->UpExposedSet.end(); ++SetItem) {
PrintOneOperand(*SetItem, 0, -1);
}
msg("\n");
msg("LiveIn set: ");
for (SetItem = this->LiveInSet.begin(); SetItem != this->LiveInSet.end(); ++SetItem) {
PrintOneOperand(*SetItem, 0, -1);
}
msg("\n");
msg("LiveOut set: ");
for (SetItem = this->LiveOutSet.begin(); SetItem != this->LiveOutSet.end(); ++SetItem) {
PrintOneOperand(*SetItem, 0, -1);
}
msg("\n");
msg("Dominance frontier: ");
set<int>::iterator DomIter;
for (DomIter = this->DomFrontier.begin(); DomIter != this->DomFrontier.end(); ++DomIter) {
msg("%d ", *DomIter);
}
msg("\n");
set<SMPPhiFunction, LessPhi>::iterator PhiIter;
for (PhiIter = this->PhiFunctions.begin(); PhiIter != this->PhiFunctions.end(); ++PhiIter) {
msg("Phi function for %d : ", PhiIter->GetIndex());
#if 0 // cannot make this compile on linux/g++
// Dump out all phi operands
vector<DefOrUse>::iterator DefIter;
for (DefIter = PhiIter->GetFirstOp(); DefIter != PhiIter->GetLastOp(); ++DefIter) {
PrintOneOperand(DefIter->GetOp(), 0, -1);
msg(" SSAnum %d ", DefIter->GetSSANum());
}
#else // see if the compiler likes it this way!
for (size_t i = 0; i < PhiIter->GetPhiListSize(); ++i) {
DefOrUse PhiRef = PhiIter->GetPhiRef(i);
PrintOneOperand(PhiRef.GetOp(), 0, -1);
msg(" SSAnum %d ", PhiRef.GetSSANum());
}
#endif
msg("\n");
}
if (this->IndirectJump)
msg("Has indirect jump. ");
if (this->Returns)
msg("Has return. ");
if (this->SharedTailChunk)
msg("Is shared tail chunk block. ");
msg("\n");
// Now, dump all the instructions.
list<list<SMPInstr>::iterator>::iterator CurrInst;
for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) {
msg("%x : %s\n", (*CurrInst)->GetAddr(), (*CurrInst)->GetDisasm());
(*CurrInst)->PrintOperands();
}
msg("\n");
return;
} // end of SMPBasicBlock::Dump()
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
// Return true if anything already in the KillSet would kill the operand value.
bool SMPBasicBlock::MDAlreadyKilled(op_t Opnd1) const {
// We have assembly language operands that can be complex, such as
// [ebx + esi*4 + 04h]. If ebx or esi have been killed, then this memory
// phrase should be considered killed. We could be even more conservative
// with base addresses, declaring an entire array killed whenever its base
// address appears in a definition, for example. We will do that if it proves
// to be necessary.
bool FoundInKillSet = (KillSet.end() != KillSet.find(Opnd1));
switch (Opnd1.type) {
// Some types are simple to test for equality.
case o_void:
case o_reg:
case o_mem:
case o_imm:
case o_far:
case o_near:
// Look in KillSet. These simple types should be found there with
// no complicated comparisons needed.
return FoundInKillSet;
case o_phrase:
case o_displ:
// If found directly in KillSet, return true. Otherwise, see if any registers
// used in the memory addressing expression were killed.
if (FoundInKillSet)
return true;
else {
// Should we add Opnd1 to the KillSet every time we return true below? **!!**
op_t TempOp;
if (Opnd1.hasSIB) {
int BaseReg = sib_base(Opnd1);
short IndexReg = sib_index(Opnd1);
TempOp.type = o_reg;
TempOp.reg = (ushort) BaseReg;
if (this->KillSet.end() != this->KillSet.find(TempOp))
return true;
if (R_sp != IndexReg) { // Cannot have ESP index reg in SIB
TempOp.reg = (ushort) IndexReg;
if (this->KillSet.end() != this->KillSet.find(TempOp))
return true;
}
}
else { // no SIB
ushort BaseReg;
if (Opnd1.type == o_phrase)
BaseReg = Opnd1.phrase;
else // o_displ
BaseReg = Opnd1.reg;
TempOp.type = o_reg;
TempOp.reg = BaseReg;
if (this->KillSet.end() != this->KillSet.find(TempOp))
return true;
} // end if SIB ... else ...
} // end if (FoundInKillSet) ... else ...
msg("Unknown operand type %d in MDAlreadyKilled, block %d\n", Opnd1.type, this->BlockNum);
} // end of switch on Opnd1.type
return false;
} // end of SMPBasicBlock::MDAlreadyKilled()
// Initialize the KilledSet and UpExposedSet for live variable analysis.
void SMPBasicBlock::InitKilledExposed(void) {
// Find all upwardly exposed operands and killed operands in this block.
list<list<SMPInstr>::iterator>::iterator CurrIter;
for (CurrIter = this->Instrs.begin(); CurrIter != this->Instrs.end(); ++CurrIter) {
list<SMPInstr>::iterator CurrInst = *CurrIter;
// Dataflow equation for upward exposed variables: If a variable has not been
// killed yet in this block, starting from the top of the block, and it is used
// in the current instruction, then it is upwardly exposed.
size_t limit = CurrInst->NumUses();
for (size_t index = 0; index < limit; ++index) {
op_t UseOp = CurrInst->GetUse(index).GetOp();
// Only add non-immediate operands that are not already killed in this block.
// o_near and o_far operands are code addresses in immediate form, e.g.
// call _printf might be call 0x8048040, with o_near = 0x8048040.
if ((!(this->MDAlreadyKilled(UseOp)))
&& (UseOp.type != o_imm) && (UseOp.type != o_near) && (UseOp.type != o_far))
this->UpExposedSet.insert(CurrInst->GetUse(index).GetOp());
}
// Dataflow equation for killed variables: If a variable is defined in any
// instruction in the block, it is killed by this block (i.e. prior definitions
// of that variable will not make it through the block).
limit = CurrInst->NumDefs();
for (size_t index = 0; index < limit; ++index) {
this->KillSet.insert(CurrInst->GetDef(index).GetOp());
}
} // end for all instrs in block
this->IsLiveInStale = true; // Would need to compute LiveInSet for first time
return;
} // end of SMPBasicBlock::InitKilledExposed()
// Return an iterator for the beginning of the LiveInSet. If the set is stale,
// recompute it first.
set<op_t, LessOp>::iterator SMPBasicBlock::GetFirstLiveIn(void) {
if (this->IsLiveInStale) {
// Dataflow equation: A variable is live-in to this block if it
// is upwardly exposed from this block, or if it passes through
// the block unchanged (i.e. it is not killed and is live out).
this->LiveInSet.clear();
set<op_t, LessOp>::iterator OutIter;
for (OutIter = this->UpExposedSet.begin(); OutIter != this->UpExposedSet.end(); ++OutIter) {
this->LiveInSet.insert(*OutIter);
}
for (OutIter = this->LiveOutSet.begin(); OutIter != this->LiveOutSet.end(); ++OutIter) {
if (KillSet.end() == this->KillSet.find(*OutIter)) // Found live out but not killed
this->LiveInSet.insert(*OutIter);
}
this->IsLiveInStale = false;
}
return this->LiveInSet.begin();
} // end of SMPBasicBlock::GetFirstLiveIn()
// Get termination iterator marker for the LiveIn set, for use by predecessors.
set<op_t, LessOp>::iterator SMPBasicBlock::GetLastLiveIn(void) {
// Does not matter if it is stale or not; end marker is the same
return this->LiveInSet.end();
}
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
// Get iterator for the start of the LiveOut set.
set<op_t, LessOp>::iterator SMPBasicBlock::GetFirstLiveOut(void) {
return this->LiveOutSet.begin();
}
// Get termination iterator marker for the LiveOut set.
set<op_t, LessOp>::iterator SMPBasicBlock::GetLastLiveOut(void) {
return this->LiveOutSet.end();
}
// Get iterator for the start of the VarKill set.
set<op_t, LessOp>::iterator SMPBasicBlock::GetFirstVarKill(void) {
return this->KillSet.begin();
}
// Get termination iterator marker for the VarKill set.
set<op_t, LessOp>::iterator SMPBasicBlock::GetLastVarKill(void) {
return this->KillSet.end();
}
// Get iterator for the start of the UpExposed set.
set<op_t, LessOp>::iterator SMPBasicBlock::GetFirstUpExposed(void) {
return this->UpExposedSet.begin();
}
// Get termination iterator marker for the UpExposed set.
set<op_t, LessOp>::iterator SMPBasicBlock::GetLastUpExposed(void) {
return this->UpExposedSet.end();
}
// Get iterator for the start of the DomFrontier set.
set<int>::iterator SMPBasicBlock::GetFirstDomFrontier(void) {
return this->DomFrontier.begin();
}
// Get termination iterator marker for the DomFrontier set.
set<int>::iterator SMPBasicBlock::GetLastDomFrontier(void) {
return this->DomFrontier.end();
}
// Get iterator for first Phi function.
set<SMPPhiFunction, LessPhi>::iterator SMPBasicBlock::GetFirstPhi(void) {
return this->PhiFunctions.begin();
}
// Get termination iterator marker for Phi functions set.
set<SMPPhiFunction, LessPhi>::iterator SMPBasicBlock::GetLastPhi(void) {
return this->PhiFunctions.end();
}
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
// Update the LiveOut set for the block.
// Return true if it changed, false otherwise.
bool SMPBasicBlock::UpdateLiveOut(void) {
bool changed = false;
set<op_t, LessOp> OldLiveOut(this->LiveOutSet); // save copy of old LiveOutSet
this->LiveOutSet.clear(); // Clear it and rebuild it
// Dataflow equation for LiveOutSet: If a variable is live-in for any successor
// block, it is live out for this block.
list<list<SMPBasicBlock>::iterator>::iterator SuccIter;
for (SuccIter = this->Successors.begin(); SuccIter != this->Successors.end(); ++SuccIter) {
set<op_t, LessOp>::iterator InSuccIter;
for (InSuccIter = (*SuccIter)->GetFirstLiveIn(); InSuccIter != (*SuccIter)->GetLastLiveIn(); ++InSuccIter) {
this->LiveOutSet.insert(*InSuccIter);
}
}
// Only remaining question: Did the LiveOutSet change?
// Short cut: If the set cardinality changed, then the set changed.
if (this->LiveOutSet.size() != OldLiveOut.size()) {
changed = true;
}
else { // Same # of elements; move through in lockstep and compare.
set<op_t, LessOp>::iterator NewIter = this->LiveOutSet.begin();
set<op_t, LessOp>::iterator OldIter = OldLiveOut.begin();
set<op_t, LessOp>::value_compare OpComp = OldLiveOut.value_comp(); // LessOp()
while (OldIter != OldLiveOut.end()) { // both iters terminate simultaneously
if (OpComp(*OldIter, *NewIter) || OpComp(*NewIter, *OldIter)) {
changed = true;
break;
}
++OldIter;
++NewIter;
}
}
if (changed)
this->IsLiveInStale = true;
OldLiveOut.clear();
return changed;
} // end of SMPBasicBlock::UpdateLiveOut()
// Insert RPO number block into the dominance frontier set.
void SMPBasicBlock::AddToDomFrontier(int block) {
this->DomFrontier.insert(block);
return;
} // end of SMPBasicBlock::AddToDomFrontier()
// Add a phi function to the list of phi functions entering this block.
// If phi function for this global name already existed in the block,
// return false because no new phi function was added; else return true.
bool SMPBasicBlock::AddPhi(SMPPhiFunction NewPhi) {
if (this->PhiFunctions.end() == this->PhiFunctions.find(NewPhi)) {
this->PhiFunctions.insert(NewPhi);
return true;
}
else
return false;
} // end of SMPBasicBlock::AddPhi()
// *****************************************************************
// Class SMPFunction
// *****************************************************************
// Constructor
SMPFunction::SMPFunction(func_t *Info) {
this->IndirectCalls = false;
this->SharedChunks = false;
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
return;
}
// Figure out the different regions of the stack frame, and find the
// instructions that allocate and deallocate the local variables space
// on the stack frame.
// The stack frame info will be used to emit stack
// annotations when Analyze() reaches the stack allocation
// instruction that sets aside space for local vars.
// Set the address of the instruction at which these
// annotations should be emitted. This should normally
// be an instruction such as: sub esp,48
// However, for a function with no local variables at all,
// we will need to determine which instruction should be
// considered to be the final instruction of the function
// prologue and return its address.
// Likewise, we find the stack deallocating instruction in
// the function epilogue.
void SMPFunction::SetStackFrameInfo(void) {
bool FoundAllocInstr = false;
bool FoundDeallocInstr = false;
// The sizes of the three regions of the stack frame other than the
// return address are stored in the function structure.
this->LocalVarsSize = this->FuncInfo.frsize;
this->CalleeSavedRegsSize = this->FuncInfo.frregs;
this->IncomingArgsSize = this->FuncInfo.argsize;
// The return address size can be obtained in a machine independent
// way by calling get_frame_retsize().
this->RetAddrSize = get_frame_retsize(&(this->FuncInfo));
// IDA Pro has trouble with functions that do not have any local
// variables. Unfortunately, the C library has plenty of these
// functions. IDA usually claims that frregs is zero and frsize
// is N, when the values should have been reversed. We can attempt
// to detect this and fix it.
bool FrameInfoFixed = this->MDFixFrameInfo();
#if SMP_DEBUG_FRAMEFIXUP
if (FrameInfoFixed) {
msg("Fixed stack frame size info: %s\n", this->FuncName);
SMPBasicBlock CurrBlock = this->Blocks.front();
msg("First basic block:\n");
for (list<list<SMPInstr>::iterator>::iterator CurrInstr = CurrBlock.GetFirstInstr();
CurrInstr != CurrBlock.GetLastInstr();
++CurrInstr) {
msg("%s\n", (*CurrInstr)->GetDisasm());
}
}
#endif
// Now, if LocalVarsSize is not zero, we need to find the instruction
// in the function prologue that allocates space on the stack for
// local vars. This code could be made more robust in the future
// by matching LocalVarsSize to the immediate value in the allocation
// instruction. However, IDA Pro is sometimes a little off on this
// number. **!!**
if (0 < this->LocalVarsSize) {
for (list<SMPInstr>::iterator CurrInstr = this->Instrs.begin();
CurrInstr != this->Instrs.end();
++CurrInstr) {
ea_t addr = CurrInstr->GetAddr();
// Keep the most recent instruction in the DeallocInstr
// in case we reach the return without seeing a dealloc.
if (!FoundDeallocInstr) {
this->LocalVarsDeallocInstr = addr;
}
if (!FoundAllocInstr
&& CurrInstr->MDIsFrameAllocInstr()) {
this->LocalVarsAllocInstr = addr;
FoundAllocInstr = true;
// As soon as we have found the local vars allocation,
// we can try to fix incorrect sets of UseFP by IDA.
// NOTE: We might want to extend this in the future to
// handle functions that have no locals. **!!**
bool FixedUseFP = MDFixUseFP();
#if SMP_DEBUG_FRAMEFIXUP
if (FixedUseFP) {
msg("Fixed UseFP in %s\n", this->FuncName);
}
#endif
}
else if (FoundAllocInstr) {
// We can now start searching for the DeallocInstr.
if (CurrInstr->MDIsFrameDeallocInstr(UseFP, this->LocalVarsSize)) {
// Keep saving the most recent addr that looks
// like the DeallocInstr until we reach the
// end of the function. Last one to look like
// it is used as the DeallocInstr.
this->LocalVarsDeallocInstr = addr;
FoundDeallocInstr = true;
}
}
} // end for (list<SMPInstr>::iterator CurrInstr ... )
if (!FoundAllocInstr) {
// Could not find the frame allocating instruction. Bad.
// Emit diagnostic and use the first instruction in the
// function as a pseudo-allocation instruction to emit
// some stack frame info (return address, etc.)
this->LocalVarsAllocInstr = this->FindAllocPoint(this->FuncInfo.frsize);
#if SMP_DEBUG_FRAMEFIXUP
if (BADADDR == this->LocalVarsAllocInstr) {
msg("ERROR: Could not find stack frame allocation in %s\n",
FuncName);
msg("LocalVarsSize: %d SavedRegsSize: %d ArgsSize: %d\n",
LocalVarsSize, CalleeSavedRegsSize, IncomingArgsSize);
}
else {
msg("FindAllocPoint found %x for function %s\n",
this->LocalVarsAllocInstr, this->GetFuncName());
}
#endif
#if SMP_DEBUG_FIX_FRAMEINFO
if (!FoundDeallocInstr) {
// Could not find the frame deallocating instruction. Bad.
// Emit diagnostic and use the last instruction in the
// function.
msg("ERROR: Could not find stack frame deallocation in %s\n",
FuncName);
}
#endif
}
// else LocalVarsSize was zero, meaning that we need to search
// for the end of the function prologue code and emit stack frame
// annotations from that address (i.e. this method returns that
// address). We will approximate this by finding the end of the
// sequence of PUSH instructions at the beginning of the function.
// The last PUSH instruction should be the last callee-save-reg
// instruction. We can make this more robust in the future by
// making sure that we do not count a PUSH of anything other than
// a register. **!!**
// NOTE: 2nd prologue instr is usually mov ebp,esp
// THE ASSUMPTION THAT WE HAVE ONLY PUSH INSTRUCTIONS BEFORE
// THE ALLOCATING INSTR IS ONLY TRUE WHEN LOCALVARSSIZE == 0;
else {
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
for (list<SMPInstr>::iterator CurrInstr = this->Instrs.begin();
CurrInstr != this->Instrs.end();
++CurrInstr) {
insn_t CurrCmd = CurrInstr->GetCmd();
ea_t addr = CurrInstr->GetAddr();
if (CurrCmd.itype == NN_push)
SaveAddr = addr;
else
break;
}
this->LocalVarsAllocInstr = SaveAddr;
this->LocalVarsDeallocInstr = 0;
} // end if (LocalVarsSize > 0) ... else ...
#if 0
// Now we need to do the corresponding operations from the
// end of the function to find the DeallocInstr in the
// function epilogue. Because there is no addition to the
// stack pointer to deallocate the local vars region, the
// function epilogue will consist of (optional) pops of
// callee-saved regs, followed by the return instruction.
// Working backwards, we should find a return and then
// stop when we do not find any more pops.
if (0 >= LocalVarsSize) {
this->LocalVarsDeallocInstr = NULL;
}
else {
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
bool FoundRet = false;
do {
ea_t addr = get_item_head(SaveAddr);
flags_t InstrFlags = getFlags(addr);
if (isCode(addr) && isHead(addr)) {
ua_ana0(addr);
if (!FoundRet) { // Just starting out.
if (MDIsReturnInstr(cmd)) {
FoundRet = true;
SaveAddr = addr - 1;
}
else {
msg("ERROR: Last instruction not a return.\n");
}
}
else { // Should be 0 or more POPs before the return.
if (MDIsPopInstr(cmd)) {
SaveAddr = addr - 1;
}
else if (FrameAllocInstr(cmd, this->LocalVarsSize)) {
this->LocalVarsDeallocInstr = addr;
}
else {
msg("ERROR: Frame deallocation not prior to POPs.\n");
this->LocalVarsDeallocInstr = SaveAddr + 1;
}
} // end if (!FoundRet) ... else ...
}
else {
--SaveAddr;
} // end if (isCode(addr) && isHead(addr))
} while (NULL == this->LocalVarsDeallocInstr);
} // end if (0 >= this->LocalVarsSize)
#endif // 0
return;
} // end of SMPFunction::SetStackFrameInfo()
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
// IDA Pro defines the sizes of regions in the stack frame in a way
// that suits its purposes but not ours. the frsize field of the func_info_t
// structure measures the distance between the stack pointer and the
// frame pointer (ESP and EBP in the x86). This region includes some
// of the callee-saved registers. So, the frregs field only includes
// the callee-saved registers that are above the frame pointer.
// x86 standard prologue on gcc/linux:
// push ebp ; save old frame pointer
// mov ebp,esp ; new frame pointer = current stack pointer
// push esi ; callee save reg
// push edi ; callee save reg
// sub esp,34h ; allocate 52 bytes for local variables
//
// Notice that EBP acquires its final frame pointer value AFTER the
// old EBP has been pushed. This means that, of the three callee saved
// registers, one is above where EBP points and two are below.
// IDA Pro is concerned with generating readable addressing expressions
// for items on the stack. None of the callee-saved regs will ever
// be addressed in the function; they will be dormant until they are popped
// off the stack in the function epilogue. In order to create readable
// disassembled code, IDA defines named constant offsets for locals. These
// offsets are negative values (x86 stack grows downward from EBP toward
// ESP). When ESP_relative addressing occurs, IDA converts a statement:
// mov eax,[esp+12]
// into the statement:
// mov eax,[esp+3Ch+var_30]
// Here, 3Ch == 60 decimal is the distance between ESP and EBP, and
// var_30 is defined to ahve the value -30h == -48 decimal. So, the
// "frame size" in IDA Pro is 60 bytes, and a certain local can be
// addressed in ESP-relative manner as shown, or as [ebp+var_30] for
// EBP-relative addressing. The interactive IDA user can then edit
// the name var_30 to something mnemonic, such as "virus_size", and IDA
// will replace all occurrences with the new name, so that code references
// automatically become [ebp+virus_size]. As the user proceeds
// interactively, he eventually produces very understandable code.
// This all makes sense for producing readable assembly text. However,
// our analyses have a compiler perspective as well as a memory access
// defense perspective. SMP distinguishes between callee saved regs,
// which should not be overwritten in the function body, and local
// variables, which can be written. We view the stack frame in logical
// pieces: here are the saved regs, here are the locals, here is the
// return address, etc. We don't care which direction from EBP the
// callee-saved registers lie; we don't want to lump them in with the
// local variables. We also don't like the fact that IDA Pro will take
// the function prologue code shown above and declare frregs=4 and
// frsize=60, because frsize no longer matches the stack allocation
// statement sub esp,34h == sub esp,52. We prefer frsize=52 and frregs=12.
// So, the task of this function is to fix these stack sizes in our
// private data members for the function, while leaving the IDA database
// alone because IDA needs to maintain its own definitions of these
// variables.
// Fixing means we will update the data members LocalVarsSize and
// CalleeSavedRegsSize.
// NOTE: This function is both machine dependent and platform dependent.
// The prologue and epilogue code generated by gcc-linux is as discussed
// above, while on Visual Studio and other Windows x86 compilers, the
// saving of registers other than EBP happens AFTER local stack allocation.
// A Windows version of the function would expect to see the pushing
// of ESI and EDI AFTER the sub esp,34h statement.
int SavedRegsSize = 0;
int OtherPushesSize = 0; // besides callee-saved regs
int NewLocalsSize = 0;
int OldFrameTotal = this->CalleeSavedRegsSize + this->LocalVarsSize;
bool Changed = false;
// Iterate through the first basic block in the function. If we find
// a frame allocating Instr in it, then we have local vars. If not,
// we don't, and LocalVarsSize should have been zero. Count the callee
// register saves leading up to the local allocation. Set data members
// according to what we found if the values of the data members would
// change.
for (list<list<SMPInstr>::iterator>::iterator CurrIter = CurrBlock.GetFirstInstr();
CurrIter != CurrBlock.GetLastInstr();
++CurrIter) {
list<SMPInstr>::iterator CurrInstr = *CurrIter;
if (CurrInstr->MDIsPushInstr()) {
// We will make the gcc-linux assumption that a PUSH in
// the first basic block, prior to the stack allocating
// instruction, is a callee register save. To make this
// more robust, we ensure that the register is from
// the callee saved group of registers, and that it has
// not been defined thus far in the function (else it might
// be a push of an outgoing argument to a call that happens
// in the first block when there are no locals). **!!!!**
if (CurrInstr->MDUsesCalleeSavedReg()
&& !CurrInstr->HasSourceMemoryOperand()) {
SavedRegsSize += 4; // **!!** should check the size
}
else {
// Pushes of outgoing args can be scheduled so that
// they are mixed with the pushes of callee saved regs.
OtherPushesSize += 4;
}
}
else if (CurrInstr->MDIsFrameAllocInstr()) {
SavedRegsSize += OtherPushesSize;
// Get the size being allocated.
for (size_t index = 0; index < CurrInstr->NumUses(); ++index) {
// Find the immediate operand.
if (o_imm == CurrInstr->GetUse(index).GetOp().type) {
// Get its value into LocalVarsSize.
long AllocValue = (signed long) CurrInstr->GetUse(index).GetOp().value;
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
// One compiler might have sub esp,24 and another
// might have add esp,-24. Take the absolute value.
if (0 > AllocValue)
AllocValue = -AllocValue;
if (AllocValue != (long) this->LocalVarsSize) {
Changed = true;
#if SMP_DEBUG_FRAMEFIXUP
if (AllocValue + SavedRegsSize != OldFrameTotal)
msg("Total frame size changed: %s\n", this->FuncName);
#endif
this->LocalVarsSize = (asize_t) AllocValue;
this->CalleeSavedRegsSize = (ushort) SavedRegsSize;
NewLocalsSize = this->LocalVarsSize;
}
else { // Old value was correct; no change.
NewLocalsSize = this->LocalVarsSize;
if (SavedRegsSize != this->CalleeSavedRegsSize) {
this->CalleeSavedRegsSize = (ushort) SavedRegsSize;
Changed = true;
#if SMP_DEBUG_FRAMEFIXUP
msg("Only callee regs size changed: %s\n", this->FuncName);
#endif
}
}
} // end if (o_imm == ...)
} // end for all uses
break; // After frame allocation instr, we are done
} // end if (push) .. elsif frame allocating instr
} // end for all instructions in the first basic block
// If we did not find an allocating instruction, see if it would keep
// the total size the same to set LocalVarsSize to 0 and to set
// CalleeSavedRegsSize to SavedRegsSize. If so, do it. If not, we
// might be better off to leave the numbers alone.
if (!Changed && (NewLocalsSize == 0)) {
if (OldFrameTotal == SavedRegsSize) {
this->CalleeSavedRegsSize = SavedRegsSize;
this->LocalVarsSize = 0;
Changed = true;
}
#if SMP_DEBUG_FRAMEFIXUP
else {
msg("Could not update frame sizes: %s\n", this->FuncName);
}
#endif
#if SMP_DEBUG_FRAMEFIXUP
if ((0 < OtherPushesSize) && (0 < NewLocalsSize))
msg("Extra pushes found of size %d in %s\n", OtherPushesSize,
this->FuncName);
#endif
return Changed;
// Some functions have difficult to find stack allocations. For example, in some
// version of glibc, strpbrk() zeroes out register ECX and then pushes it more than
// 100 times in order to allocate zero-ed out local vars space for a character translation
// table. We will use the stack pointer analysis of IDA to find out if there is a point
// in the first basic block at which the stack pointer reaches the allocation total
// that IDA is expecting for the local vars region.
// If so, we return the address of the instruction at which ESP reaches its value, else
// we return BADADDR.
ea_t SMPFunction::FindAllocPoint(asize_t OriginalLocSize) {
bool DebugFlag = (0 == strncmp("strpbrk", this->GetFuncName(), 7));
sval_t TargetSize = - ((sval_t) OriginalLocSize); // negate; stack grows down
#if SMP_DEBUG_FRAMEFIXUP
if (DebugFlag)
msg("strpbrk OriginalLocSize: %d\n", OriginalLocSize);
#endif
// Limit our analysis to the first basic block in the function.
list<SMPInstr>::iterator TempIter = *(--(this->Blocks.front().GetLastInstr()));
ea_t AddrLimit = TempIter->GetAddr();
for (list<list<SMPInstr>::iterator>::iterator CurrIter = this->Blocks.front().GetFirstInstr();
CurrIter != this->Blocks.front().GetLastInstr();
++CurrIter) {
list<SMPInstr>::iterator CurrInstr = *CurrIter;
ea_t addr = CurrInstr->GetAddr();
// get_spd() returns a cumulative delta of ESP
sval_t sp_delta = get_spd(&(this->FuncInfo), addr);
#if SMP_DEBUG_FRAMEFIXUP
if (DebugFlag)
msg("strpbrk delta: %d at %x\n", sp_delta, addr);
#endif
if (sp_delta == TargetSize) {
// Previous instruction hit the frame size.
if (CurrInstr == *(this->Blocks.front().GetFirstInstr())) {
return BADADDR; // cannot back up from first instruction
}
else {
return (--CurrInstr)->GetAddr();
}
}
}
// SP delta is marked at the beginning of an instruction to show the SP
// after the effects of the previous instruction. Maybe the last instruction
// is the first time the SP achieves its desired value, which will not be shown
// until the first instruction of the next basic block if it just falls through.
// We can compute the delta AFTER the last instruction using get_spd+get_sp_delta.
list<SMPInstr>::iterator FinalInstr = *(--(this->Blocks.front().GetLastInstr()));
ea_t FinalAddr = FinalInstr->GetAddr();
sval_t FinalDelta = get_spd(&(this->FuncInfo), FinalAddr);
if (!FinalInstr->IsBasicBlockTerminator()) {
// Special case. The basic block does not terminate with a branch or
// return, but falls through to the start of a loop, most likely.
// Thus, the last instruction CAN increase the sp_delta, unlike
// a jump or branch, and the sp_delta would not hit the target until
// the first instruction in the second block. We can examine the
// effect on the stack pointer of this last instruction to see if it
// causes the SP delta to hit the OriginalLocSize.
sval_t LastInstrDelta = get_sp_delta(&(this->FuncInfo), FinalAddr);
if (TargetSize == (FinalDelta + LastInstrDelta)) {
// Return very last instruction (don't back up 1 here)
return FinalAddr;
}
}
#if SMP_DEBUG_FRAMEFIXUP
else {
msg("analyzed_sp() is false for %s\n", this->GetFuncName());
}
#endif
return BADADDR;
} // end of SMPFunction::FindAllocPoint()
// IDA Pro is sometimes confused by a function that uses the frame pointer
// register for other purposes. For the x86, a function that uses EBP
// as a frame pointer would begin with: push ebp; mov ebp,esp to save
// the old value of EBP and give it a new value as a frame pointer. The
// allocation of local variable space would have to come AFTER the move
// instruction. A function that begins: push ebp; push esi; sub esp,24
// is obviously not using EBP as a frame pointer. IDA is apparently
// confused by the push ebp instruction being the first instruction
// in the function. We will reset UseFP to false in this case.
// NOTE: This logic should work for both Linux and Windows x86 prologues.
bool SMPFunction::MDFixUseFP(void) {
list<SMPInstr>::iterator CurrInstr = this->Instrs.begin();
ea_t addr = CurrInstr->GetAddr();
if (!UseFP)
return false; // Only looking to reset true to false.
while (addr < this->LocalVarsAllocInstr) {
size_t DefIndex = 0;
while (DefIndex < CurrInstr->NumDefs()) {
if (CurrInstr->GetDef(DefIndex).GetOp().is_reg(R_bp))
return false; // EBP got set before locals were allocated
++DefIndex;
}
++CurrInstr;
addr = CurrInstr->GetAddr();
}
// If we found no defs of the frame pointer before the local vars
// allocation, then the frame pointer register is not being used
// as a frame pointer, just as a general callee-saved register.
this->UseFP = false;
return true;
} // end of SMPFunction::MDFixUseFP()
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
// Emit the annotations describing the regions of the stack frame.
void SMPFunction::EmitStackFrameAnnotations(FILE *AnnotFile, list<SMPInstr>::iterator Instr) {
ea_t addr = Instr->GetAddr();
if (0 < IncomingArgsSize)
qfprintf(AnnotFile, "%x %d INARGS STACK esp + %d %s \n",
addr, IncomingArgsSize,
(LocalVarsSize + CalleeSavedRegsSize + RetAddrSize),
Instr->GetDisasm());
if (0 < RetAddrSize)
qfprintf(AnnotFile, "%x %d MEMORYHOLE STACK esp + %d ReturnAddress \n",
addr, RetAddrSize, (LocalVarsSize + CalleeSavedRegsSize));
if (0 < CalleeSavedRegsSize)
qfprintf(AnnotFile, "%x %d MEMORYHOLE STACK esp + %d CalleeSavedRegs \n",
addr, CalleeSavedRegsSize, LocalVarsSize);
if (0 < LocalVarsSize)
qfprintf(AnnotFile, "%x %d LOCALFRAME STACK esp + %d LocalVars \n",
addr, LocalVarsSize, 0);
return;
} // end of SMPFunction::EmitStackFrameAnnotations()
// Main data flow analysis driver. Goes through the function and
// fills all objects for instructions, basic blocks, and the function
// itself.
void SMPFunction::Analyze(void) {
list<SMPInstr>::iterator FirstInBlock = this->Instrs.end();
// For starting a basic block
list<SMPInstr>::iterator LastInBlock = this->Instrs.end();
// Terminating a basic block
#if SMP_DEBUG_CONTROLFLOW
msg("Entering SMPFunction::Analyze.\n");
#endif
// Get some basic info from the FuncInfo structure.
this->Size = this->FuncInfo.endEA - this->FuncInfo.startEA;
this->UseFP = (0 != (this->FuncInfo.flags & (FUNC_FRAME | FUNC_BOTTOMBP)));
this->StaticFunc = (0 != (this->FuncInfo.flags & FUNC_STATIC));
get_func_name(this->FuncInfo.startEA, this->FuncName,