Newer
Older
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
// Find all upwardly exposed operands and killed operands in this block.
list<list<SMPInstr>::iterator>::iterator CurrIter;
for (CurrIter = this->Instrs.begin(); CurrIter != this->Instrs.end(); ++CurrIter) {
list<SMPInstr>::iterator CurrInst = *CurrIter;
// Dataflow equation for upward exposed variables: If a variable has not been
// killed yet in this block, starting from the top of the block, and it is used
// in the current instruction, then it is upwardly exposed.
size_t limit = CurrInst->NumUses();
for (size_t index = 0; index < limit; ++index) {
if (this->MDAlreadyKilled(CurrInst->GetUse(index)))
this->UpExposedSet.insert(CurrInst->GetUse(index));
}
// Dataflow equation for killed variables: If a variable is defined in any
// instruction in the block, it is killed by this block (i.e. prior definitions
// of that variable will not make it through the block).
limit = CurrInst->NumDefs();
for (size_t index = 0; index < limit; ++index) {
this->KillSet.insert(CurrInst->GetDef(index));
}
} // end for all instrs in block
this->IsLiveInStale = true; // Would need to compute LiveInSet for first time
return;
} // end of SMPBasicBlock::InitKilledExposed()
// Return an iterator for the beginning of the LiveInSet. If the set is stale,
// recompute it first.
set<op_t, LessOp>::iterator SMPBasicBlock::GetFirstLiveIn(void) {
if (this->IsLiveInStale) {
// Dataflow equation: A variable is live-in to this block if it
// is upwardly exposed from this block, or if it passes through
// the block unchanged (i.e. it is not killed and is live out).
this->LiveInSet.clear();
set<op_t, LessOp>::iterator OutIter;
this->LiveInSet.insert(this->UpExposedSet.begin(), this->UpExposedSet.end());
for (OutIter = this->LiveOutSet.begin(); OutIter != this->LiveOutSet.end(); ++OutIter) {
if (KillSet.end() != this->KillSet.find(*OutIter)) // Found live out but not killed
this->LiveInSet.insert(*OutIter);
}
this->IsLiveInStale = false;
}
return this->LiveInSet.begin();
} // end of SMPBasicBlock::GetFirstLiveIn()
// Get termination iterator marker for the LiveIn set, for use by predecessors.
set<op_t, LessOp>::iterator SMPBasicBlock::GetLastLiveIn(void) {
// Does not matter if it is stale or not; end marker is the same
return this->LiveInSet.end();
}
// Update the LiveOut set for the block.
// Return true if it changed, false otherwise.
bool SMPBasicBlock::UpdateLiveOut(void) {
bool changed = false;
set<op_t, LessOp> OldLiveOut(this->LiveOutSet); // save copy of old LiveOutSet
this->LiveOutSet.clear(); // Clear it and rebuild it
// Dataflow equation for LiveOutSet: If a variable is live-in for any successor
// block, it is live out for this block.
list<list<SMPBasicBlock>::iterator>::iterator SuccIter;
for (SuccIter = this->Successors.begin(); SuccIter != this->Successors.end(); ++SuccIter) {
set<op_t, LessOp>::iterator InSuccIter;
for (InSuccIter = (*SuccIter)->GetFirstLiveIn(); InSuccIter != (*SuccIter)->GetLastLiveIn(); ++InSuccIter) {
this->LiveOutSet.insert(*InSuccIter);
}
}
// Only remaining question: Did the LiveOutSet change?
// Short cut: If the set cardinality changed, then the set changed.
if (this->LiveOutSet.size() != OldLiveOut.size()) {
changed = true;
}
else { // Same # of elements; move through in lockstep and compare.
set<op_t, LessOp>::iterator NewIter = this->LiveOutSet.begin();
set<op_t, LessOp>::iterator OldIter = OldLiveOut.begin();
set<op_t, LessOp>::value_compare OpComp = OldLiveOut.value_comp(); // LessOp()
while (OldIter != OldLiveOut.end()) { // both iters terminate simultaneously
if (OpComp(*OldIter, *NewIter) || OpComp(*NewIter, *OldIter)) {
changed = true;
break;
}
++OldIter;
++NewIter;
}
}
if (changed)
this->IsLiveInStale = true;
OldLiveOut.clear();
return changed;
} // end of SMPBasicBlock::UpdateLiveOut()
// *****************************************************************
// Class SMPFunction
// *****************************************************************
// Constructor
SMPFunction::SMPFunction(func_t *Info) {
this->FuncInfo = Info;
this->IndirectCalls = false;
this->SharedChunks = false;
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
return;
}
// Figure out the different regions of the stack frame, and find the
// instructions that allocate and deallocate the local variables space
// on the stack frame.
// The stack frame info will be used to emit stack
// annotations when Analyze() reaches the stack allocation
// instruction that sets aside space for local vars.
// Set the address of the instruction at which these
// annotations should be emitted. This should normally
// be an instruction such as: sub esp,48
// However, for a function with no local variables at all,
// we will need to determine which instruction should be
// considered to be the final instruction of the function
// prologue and return its address.
// Likewise, we find the stack deallocating instruction in
// the function epilogue.
void SMPFunction::SetStackFrameInfo(void) {
bool FoundAllocInstr = false;
bool FoundDeallocInstr = false;
// The sizes of the three regions of the stack frame other than the
// return address are stored in the function structure.
this->LocalVarsSize = this->FuncInfo->frsize;
this->CalleeSavedRegsSize = this->FuncInfo->frregs;
this->IncomingArgsSize = this->FuncInfo->argsize;
// The return address size can be obtained in a machine independent
// way by calling get_frame_retsize().
this->RetAddrSize = get_frame_retsize(this->FuncInfo);
// IDA Pro has trouble with functions that do not have any local
// variables. Unfortunately, the C library has plenty of these
// functions. IDA usually claims that frregs is zero and frsize
// is N, when the values should have been reversed. We can attempt
// to detect this and fix it.
bool FrameInfoFixed = this->MDFixFrameInfo();
#if SMP_DEBUG_FRAMEFIXUP
if (FrameInfoFixed) {
msg("Fixed stack frame size info: %s\n", this->FuncName);
SMPBasicBlock CurrBlock = this->Blocks.front();
msg("First basic block:\n");
for (list<list<SMPInstr>::iterator>::iterator CurrInstr = CurrBlock.GetFirstInstr();
CurrInstr != CurrBlock.GetLastInstr();
++CurrInstr) {
msg("%s\n", (*CurrInstr)->GetDisasm());
}
}
#endif
// Now, if LocalVarsSize is not zero, we need to find the instruction
// in the function prologue that allocates space on the stack for
// local vars. This code could be made more robust in the future
// by matching LocalVarsSize to the immediate value in the allocation
// instruction. However, IDA Pro is sometimes a little off on this
// number. **!!**
if (0 < this->LocalVarsSize) {
for (list<SMPInstr>::iterator CurrInstr = this->Instrs.begin();
CurrInstr != this->Instrs.end();
++CurrInstr) {
ea_t addr = CurrInstr->GetAddr();
// Keep the most recent instruction in the DeallocInstr
// in case we reach the return without seeing a dealloc.
if (!FoundDeallocInstr) {
this->LocalVarsDeallocInstr = addr;
}
if (!FoundAllocInstr
&& CurrInstr->MDIsFrameAllocInstr()) {
this->LocalVarsAllocInstr = addr;
FoundAllocInstr = true;
// As soon as we have found the local vars allocation,
// we can try to fix incorrect sets of UseFP by IDA.
// NOTE: We might want to extend this in the future to
// handle functions that have no locals. **!!**
bool FixedUseFP = MDFixUseFP();
#if SMP_DEBUG_FRAMEFIXUP
if (FixedUseFP) {
msg("Fixed UseFP in %s\n", this->FuncName);
}
#endif
}
else if (FoundAllocInstr) {
// We can now start searching for the DeallocInstr.
if (CurrInstr->MDIsFrameDeallocInstr(UseFP, this->LocalVarsSize)) {
// Keep saving the most recent addr that looks
// like the DeallocInstr until we reach the
// end of the function. Last one to look like
// it is used as the DeallocInstr.
this->LocalVarsDeallocInstr = addr;
FoundDeallocInstr = true;
}
}
} // end for (list<SMPInstr>::iterator CurrInstr ... )
if (!FoundAllocInstr) {
// Could not find the frame allocating instruction. Bad.
// Emit diagnostic and use the first instruction in the
// function as a pseudo-allocation instruction to emit
// some stack frame info (return address, etc.)
this->LocalVarsAllocInstr = this->FindAllocPoint(this->FuncInfo->frsize);
#if SMP_DEBUG_FRAMEFIXUP
if (BADADDR == this->LocalVarsAllocInstr) {
msg("ERROR: Could not find stack frame allocation in %s\n",
FuncName);
msg("LocalVarsSize: %d SavedRegsSize: %d ArgsSize: %d\n",
LocalVarsSize, CalleeSavedRegsSize, IncomingArgsSize);
}
else {
msg("FindAllocPoint found %x for function %s\n",
this->LocalVarsAllocInstr, this->GetFuncName());
}
#endif
#if SMP_DEBUG_FIX_FRAMEINFO
if (!FoundDeallocInstr) {
// Could not find the frame deallocating instruction. Bad.
// Emit diagnostic and use the last instruction in the
// function.
msg("ERROR: Could not find stack frame deallocation in %s\n",
FuncName);
}
#endif
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
}
// else LocalVarsSize was zero, meaning that we need to search
// for the end of the function prologue code and emit stack frame
// annotations from that address (i.e. this method returns that
// address). We will approximate this by finding the end of the
// sequence of PUSH instructions at the beginning of the function.
// The last PUSH instruction should be the last callee-save-reg
// instruction. We can make this more robust in the future by
// making sure that we do not count a PUSH of anything other than
// a register. **!!**
// NOTE: 2nd prologue instr is usually mov ebp,esp
// THE ASSUMPTION THAT WE HAVE ONLY PUSH INSTRUCTIONS BEFORE
// THE ALLOCATING INSTR IS ONLY TRUE WHEN LOCALVARSSIZE == 0;
else {
ea_t SaveAddr = FuncInfo->startEA;
for (list<SMPInstr>::iterator CurrInstr = this->Instrs.begin();
CurrInstr != this->Instrs.end();
++CurrInstr) {
insn_t CurrCmd = CurrInstr->GetCmd();
ea_t addr = CurrInstr->GetAddr();
if (CurrCmd.itype == NN_push)
SaveAddr = addr;
else
break;
}
this->LocalVarsAllocInstr = SaveAddr;
this->LocalVarsDeallocInstr = 0;
} // end if (LocalVarsSize > 0) ... else ...
#if 0
// Now we need to do the corresponding operations from the
// end of the function to find the DeallocInstr in the
// function epilogue. Because there is no addition to the
// stack pointer to deallocate the local vars region, the
// function epilogue will consist of (optional) pops of
// callee-saved regs, followed by the return instruction.
// Working backwards, we should find a return and then
// stop when we do not find any more pops.
if (0 >= LocalVarsSize) {
this->LocalVarsDeallocInstr = NULL;
}
else {
SaveAddr = FuncInfo->endEA - 1;
bool FoundRet = false;
do {
ea_t addr = get_item_head(SaveAddr);
flags_t InstrFlags = getFlags(addr);
if (isCode(addr) && isHead(addr)) {
ua_ana0(addr);
if (!FoundRet) { // Just starting out.
if (MDIsReturnInstr(cmd)) {
FoundRet = true;
SaveAddr = addr - 1;
}
else {
msg("ERROR: Last instruction not a return.\n");
}
}
else { // Should be 0 or more POPs before the return.
if (MDIsPopInstr(cmd)) {
SaveAddr = addr - 1;
}
else if (FrameAllocInstr(cmd, this->LocalVarsSize)) {
this->LocalVarsDeallocInstr = addr;
}
else {
msg("ERROR: Frame deallocation not prior to POPs.\n");
this->LocalVarsDeallocInstr = SaveAddr + 1;
}
} // end if (!FoundRet) ... else ...
}
else {
--SaveAddr;
} // end if (isCode(addr) && isHead(addr))
} while (NULL == this->LocalVarsDeallocInstr);
} // end if (0 >= this->LocalVarsSize)
#endif // 0
return;
} // end of SMPFunction::SetStackFrameInfo()
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
// IDA Pro defines the sizes of regions in the stack frame in a way
// that suits its purposes but not ours. the frsize field of the func_info_t
// structure measures the distance between the stack pointer and the
// frame pointer (ESP and EBP in the x86). This region includes some
// of the callee-saved registers. So, the frregs field only includes
// the callee-saved registers that are above the frame pointer.
// x86 standard prologue on gcc/linux:
// push ebp ; save old frame pointer
// mov ebp,esp ; new frame pointer = current stack pointer
// push esi ; callee save reg
// push edi ; callee save reg
// sub esp,34h ; allocate 52 bytes for local variables
//
// Notice that EBP acquires its final frame pointer value AFTER the
// old EBP has been pushed. This means that, of the three callee saved
// registers, one is above where EBP points and two are below.
// IDA Pro is concerned with generating readable addressing expressions
// for items on the stack. None of the callee-saved regs will ever
// be addressed in the function; they will be dormant until they are popped
// off the stack in the function epilogue. In order to create readable
// disassembled code, IDA defines named constant offsets for locals. These
// offsets are negative values (x86 stack grows downward from EBP toward
// ESP). When ESP_relative addressing occurs, IDA converts a statement:
// mov eax,[esp+12]
// into the statement:
// mov eax,[esp+3Ch+var_30]
// Here, 3Ch == 60 decimal is the distance between ESP and EBP, and
// var_30 is defined to ahve the value -30h == -48 decimal. So, the
// "frame size" in IDA Pro is 60 bytes, and a certain local can be
// addressed in ESP-relative manner as shown, or as [ebp+var_30] for
// EBP-relative addressing. The interactive IDA user can then edit
// the name var_30 to something mnemonic, such as "virus_size", and IDA
// will replace all occurrences with the new name, so that code references
// automatically become [ebp+virus_size]. As the user proceeds
// interactively, he eventually produces very understandable code.
// This all makes sense for producing readable assembly text. However,
// our analyses have a compiler perspective as well as a memory access
// defense perspective. SMP distinguishes between callee saved regs,
// which should not be overwritten in the function body, and local
// variables, which can be written. We view the stack frame in logical
// pieces: here are the saved regs, here are the locals, here is the
// return address, etc. We don't care which direction from EBP the
// callee-saved registers lie; we don't want to lump them in with the
// local variables. We also don't like the fact that IDA Pro will take
// the function prologue code shown above and declare frregs=4 and
// frsize=60, because frsize no longer matches the stack allocation
// statement sub esp,34h == sub esp,52. We prefer frsize=52 and frregs=12.
// So, the task of this function is to fix these stack sizes in our
// private data members for the function, while leaving the IDA database
// alone because IDA needs to maintain its own definitions of these
// variables.
// Fixing means we will update the data members LocalVarsSize and
// CalleeSavedRegsSize.
// NOTE: This function is both machine dependent and platform dependent.
// The prologue and epilogue code generated by gcc-linux is as discussed
// above, while on Visual Studio and other Windows x86 compilers, the
// saving of registers other than EBP happens AFTER local stack allocation.
// A Windows version of the function would expect to see the pushing
// of ESI and EDI AFTER the sub esp,34h statement.
int SavedRegsSize = 0;
int OtherPushesSize = 0; // besides callee-saved regs
int NewLocalsSize = 0;
int OldFrameTotal = this->CalleeSavedRegsSize + this->LocalVarsSize;
bool Changed = false;
// Iterate through the first basic block in the function. If we find
// a frame allocating Instr in it, then we have local vars. If not,
// we don't, and LocalVarsSize should have been zero. Count the callee
// register saves leading up to the local allocation. Set data members
// according to what we found if the values of the data members would
// change.
for (list<list<SMPInstr>::iterator>::iterator CurrIter = CurrBlock.GetFirstInstr();
CurrIter != CurrBlock.GetLastInstr();
++CurrIter) {
list<SMPInstr>::iterator CurrInstr = *CurrIter;
if (CurrInstr->MDIsPushInstr()) {
// We will make the gcc-linux assumption that a PUSH in
// the first basic block, prior to the stack allocating
// instruction, is a callee register save. To make this
// more robust, we ensure that the register is from
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
// the callee saved group of registers, and that it has
// not been defined thus far in the function (else it might
// be a push of an outgoing argument to a call that happens
// in the first block when there are no locals). **!!!!**
if (CurrInstr->MDUsesCalleeSavedReg()
&& !CurrInstr->HasSourceMemoryOperand()) {
SavedRegsSize += 4; // **!!** should check the size
}
else {
// Pushes of outgoing args can be scheduled so that
// they are mixed with the pushes of callee saved regs.
OtherPushesSize += 4;
}
}
else if (CurrInstr->MDIsFrameAllocInstr()) {
SavedRegsSize += OtherPushesSize;
// Get the size being allocated.
for (size_t index = 0; index < CurrInstr->NumUses(); ++index) {
// Find the immediate operand.
if (o_imm == CurrInstr->GetUse(index).type) {
// Get its value into LocalVarsSize.
long AllocValue = (signed long) CurrInstr->GetUse(index).value;
// One compiler might have sub esp,24 and another
// might have add esp,-24. Take the absolute value.
if (0 > AllocValue)
AllocValue = -AllocValue;
if (AllocValue != (long) this->LocalVarsSize) {
Changed = true;
#if SMP_DEBUG_FRAMEFIXUP
if (AllocValue + SavedRegsSize != OldFrameTotal)
msg("Total frame size changed: %s\n", this->FuncName);
#endif
this->LocalVarsSize = (asize_t) AllocValue;
this->CalleeSavedRegsSize = (ushort) SavedRegsSize;
NewLocalsSize = this->LocalVarsSize;
}
else { // Old value was correct; no change.
NewLocalsSize = this->LocalVarsSize;
if (SavedRegsSize != this->CalleeSavedRegsSize) {
this->CalleeSavedRegsSize = (ushort) SavedRegsSize;
Changed = true;
#if SMP_DEBUG_FRAMEFIXUP
msg("Only callee regs size changed: %s\n", this->FuncName);
#endif
}
}
} // end if (o_imm == ...)
} // end for all uses
break; // After frame allocation instr, we are done
} // end if (push) .. elsif frame allocating instr
} // end for all instructions in the first basic block
// If we did not find an allocating instruction, see if it would keep
// the total size the same to set LocalVarsSize to 0 and to set
// CalleeSavedRegsSize to SavedRegsSize. If so, do it. If not, we
// might be better off to leave the numbers alone.
if (!Changed && (NewLocalsSize == 0)) {
if (OldFrameTotal == SavedRegsSize) {
this->CalleeSavedRegsSize = SavedRegsSize;
this->LocalVarsSize = 0;
Changed = true;
}
#if SMP_DEBUG_FRAMEFIXUP
else {
msg("Could not update frame sizes: %s\n", this->FuncName);
}
#endif
#if SMP_DEBUG_FRAMEFIXUP
if ((0 < OtherPushesSize) && (0 < NewLocalsSize))
msg("Extra pushes found of size %d in %s\n", OtherPushesSize,
this->FuncName);
#endif
return Changed;
// Some functions have difficult to find stack allocations. For example, in some
// version of glibc, strpbrk() zeroes out register ECX and then pushes it more than
// 100 times in order to allocate zero-ed out local vars space for a character translation
// table. We will use the stack pointer analysis of IDA to find out if there is a point
// in the first basic block at which the stack pointer reaches the allocation total
// that IDA is expecting for the local vars region.
// If so, we return the address of the instruction at which ESP reaches its value, else
// we return BADADDR.
ea_t SMPFunction::FindAllocPoint(asize_t OriginalLocSize) {
bool DebugFlag = (0 == strncmp("strpbrk", this->GetFuncName(), 7));
sval_t TargetSize = - ((sval_t) OriginalLocSize); // negate; stack grows down
#if SMP_DEBUG_FRAMEFIXUP
if (DebugFlag)
msg("strpbrk OriginalLocSize: %d\n", OriginalLocSize);
#endif
if (this->FuncInfo->analyzed_sp()) {
// Limit our analysis to the first basic block in the function.
list<SMPInstr>::iterator TempIter = *(--(this->Blocks.front().GetLastInstr()));
ea_t AddrLimit = TempIter->GetAddr();
for (list<list<SMPInstr>::iterator>::iterator CurrIter = this->Blocks.front().GetFirstInstr();
CurrIter != this->Blocks.front().GetLastInstr();
++CurrIter) {
list<SMPInstr>::iterator CurrInstr = *CurrIter;
ea_t addr = CurrInstr->GetAddr();
// get_spd() returns a cumulative delta of ESP
sval_t sp_delta = get_spd(this->FuncInfo, addr);
#if SMP_DEBUG_FRAMEFIXUP
if (DebugFlag)
msg("strpbrk delta: %d at %x\n", sp_delta, addr);
#endif
if (sp_delta == TargetSize) {
// Previous instruction hit the frame size.
if (CurrInstr == *(this->Blocks.front().GetFirstInstr())) {
return BADADDR; // cannot back up from first instruction
}
else {
return (--CurrInstr)->GetAddr();
}
}
}
// SP delta is marked at the beginning of an instruction to show the SP
// after the effects of the previous instruction. Maybe the last instruction
// is the first time the SP achieves its desired value, which will not be shown
// until the first instruction of the next basic block if it just falls through.
// We can compute the delta AFTER the last instruction using get_spd+get_sp_delta.
list<SMPInstr>::iterator FinalInstr = *(--(this->Blocks.front().GetLastInstr()));
ea_t FinalAddr = FinalInstr->GetAddr();
sval_t FinalDelta = get_spd(this->FuncInfo, FinalAddr);
if (!FinalInstr->IsBasicBlockTerminator()) {
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
// Special case. The basic block does not terminate with a branch or
// return, but falls through to the start of a loop, most likely.
// Thus, the last instruction CAN increase the sp_delta, unlike
// a jump or branch, and the sp_delta would not hit the target until
// the first instruction in the second block. We can examine the
// effect on the stack pointer of this last instruction to see if it
// causes the SP delta to hit the OriginalLocSize.
sval_t LastInstrDelta = get_sp_delta(this->FuncInfo, FinalAddr);
if (TargetSize == (FinalDelta + LastInstrDelta)) {
// Return very last instruction (don't back up 1 here)
return FinalAddr;
}
}
} // end if (this->FuncInfo->analyzed_sp())
#if SMP_DEBUG_FRAMEFIXUP
else {
msg("analyzed_sp() is false for %s\n", this->GetFuncName());
}
#endif
return BADADDR;
} // end of SMPFunction::FindAllocPoint()
// IDA Pro is sometimes confused by a function that uses the frame pointer
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
// register for other purposes. For the x86, a function that uses EBP
// as a frame pointer would begin with: push ebp; mov ebp,esp to save
// the old value of EBP and give it a new value as a frame pointer. The
// allocation of local variable space would have to come AFTER the move
// instruction. A function that begins: push ebp; push esi; sub esp,24
// is obviously not using EBP as a frame pointer. IDA is apparently
// confused by the push ebp instruction being the first instruction
// in the function. We will reset UseFP to false in this case.
// NOTE: This logic should work for both Linux and Windows x86 prologues.
bool SMPFunction::MDFixUseFP(void) {
list<SMPInstr>::iterator CurrInstr = this->Instrs.begin();
ea_t addr = CurrInstr->GetAddr();
if (!UseFP)
return false; // Only looking to reset true to false.
while (addr < this->LocalVarsAllocInstr) {
size_t DefIndex = 0;
while (DefIndex < CurrInstr->NumDefs()) {
if (CurrInstr->GetDef(DefIndex).is_reg(R_bp))
return false; // EBP got set before locals were allocated
++DefIndex;
}
++CurrInstr;
addr = CurrInstr->GetAddr();
}
// If we found no defs of the frame pointer before the local vars
// allocation, then the frame pointer register is not being used
// as a frame pointer, just as a general callee-saved register.
this->UseFP = false;
return true;
} // end of SMPFunction::MDFixUseFP()
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
// Emit the annotations describing the regions of the stack frame.
void SMPFunction::EmitStackFrameAnnotations(FILE *AnnotFile, list<SMPInstr>::iterator Instr) {
ea_t addr = Instr->GetAddr();
if (0 < IncomingArgsSize)
qfprintf(AnnotFile, "%x %d INARGS STACK esp + %d %s \n",
addr, IncomingArgsSize,
(LocalVarsSize + CalleeSavedRegsSize + RetAddrSize),
Instr->GetDisasm());
if (0 < RetAddrSize)
qfprintf(AnnotFile, "%x %d MEMORYHOLE STACK esp + %d ReturnAddress \n",
addr, RetAddrSize, (LocalVarsSize + CalleeSavedRegsSize));
if (0 < CalleeSavedRegsSize)
qfprintf(AnnotFile, "%x %d MEMORYHOLE STACK esp + %d CalleeSavedRegs \n",
addr, CalleeSavedRegsSize, LocalVarsSize);
if (0 < LocalVarsSize)
qfprintf(AnnotFile, "%x %d LOCALFRAME STACK esp + %d LocalVars \n",
addr, LocalVarsSize, 0);
return;
} // end of SMPFunction::EmitStackFrameAnnotations()
// Main data flow analysis driver. Goes through the function and
// fills all objects for instructions, basic blocks, and the function
// itself.
void SMPFunction::Analyze(void) {
list<SMPInstr>::iterator FirstInBlock = this->Instrs.end();
// For starting a basic block
list<SMPInstr>::iterator LastInBlock = this->Instrs.end();
// Terminating a basic block
#if SMP_DEBUG_CONTROLFLOW
msg("Entering SMPFunction::Analyze.\n");
#endif
// Get some basic info from the FuncInfo structure.
this->Size = this->FuncInfo->endEA - this->FuncInfo->startEA;
this->UseFP = (0 != (this->FuncInfo->flags & (FUNC_FRAME | FUNC_BOTTOMBP)));
this->StaticFunc = (0 != (this->FuncInfo->flags & FUNC_STATIC));
get_func_name(this->FuncInfo->startEA, this->FuncName,
sizeof(this->FuncName) - 1);
#if SMP_DEBUG_CONTROLFLOW
msg("SMPFunction::Analyze: got basic info.\n");
#endif
// Cycle through all chunks that belong to the function.
func_tail_iterator_t FuncTail(this->FuncInfo);
size_t ChunkCounter = 0;
for (bool ChunkOK = FuncTail.main(); ChunkOK; ChunkOK = FuncTail.next()) {
const area_t &CurrChunk = FuncTail.chunk();
++ChunkCounter;
if (1 < ChunkCounter) {
this->SharedChunks = true;
msg("Found tail chunk for %s at %x\n", this->FuncName, CurrChunk.startEA);
// Build the instruction and block lists for the function.
for (ea_t addr = CurrChunk.startEA; addr < CurrChunk.endEA;
addr = get_item_end(addr)) {
flags_t InstrFlags = getFlags(addr);
if (isHead(InstrFlags) && isCode(InstrFlags)) {
SMPInstr CurrInst = SMPInstr(addr);
// Fill in the instruction data members.
#if SMP_DEBUG_CONTROLFLOW
msg("SMPFunction::Analyze: calling CurrInst::Analyze.\n");
#endif
CurrInst.Analyze();
if (SMPBinaryDebug) {
msg("Disasm: %s \n", CurrInst.GetDisasm());
}
if (CurrInst.GetDataFlowType() == INDIR_CALL)
this->IndirectCalls = true;
// Before we insert the instruction into the instruction
// list, determine if it is a jump target that does not
// follow a basic block terminator. This is the special case
// of a CASE in a SWITCH that falls through into another
// CASE, for example. The first sequence of statements
// was not terminated by a C "break;" statement, so it
// looks like straight line code, but there is an entry
// point at the beginning of the second CASE sequence and
// we have to split basic blocks at the entry point.
if ((FirstInBlock != this->Instrs.end())
&& CurrInst.IsJumpTarget()) {
msg("SMPFunction::Analyze: hit special jump target case.\n");
LastInBlock = --(this->Instrs.end());
SMPBasicBlock CurrBlock = SMPBasicBlock(FirstInBlock,
LastInBlock);
CurrBlock.Analyze();
// If not the first chunk in the function, it is a shared
// tail chunk.
if (ChunkCounter > 1) {
CurrBlock.SetShared();
}
FirstInBlock = this->Instrs.end();
LastInBlock = this->Instrs.end();
this->Blocks.push_back(CurrBlock);
}
msg("SMPFunction::Analyze: putting CurrInst on list.\n");
// Insert instruction at end of list.
this->Instrs.push_back(CurrInst);
// Find basic block leaders and terminators.
if (FirstInBlock == this->Instrs.end()) {
FirstInBlock = --(this->Instrs.end());
}
if (CurrInst.IsBasicBlockTerminator()) {
LastInBlock = --(this->Instrs.end());
SMPBasicBlock CurrBlock = SMPBasicBlock(FirstInBlock, LastInBlock);
CurrBlock.Analyze();
// If not the first chunk in the function, it is a shared
// tail chunk.
if (ChunkCounter > 1) {
CurrBlock.SetShared();
}
FirstInBlock = this->Instrs.end();
LastInBlock = this->Instrs.end();
this->Blocks.push_back(CurrBlock);
// Is the instruction a branch to a target outside the function? If
// so, this function has shared tail chunks.
if (CurrInst.IsBranchToFarChunk()) {
this->SharedChunks = true;
}
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
}
} // end if (isHead(InstrFlags) && isCode(InstrFlags)
} // end for (ea_t addr = FuncInfo->startEA; ... )
// Handle the special case in which a function does not terminate
// with a return instruction or any other basic block terminator.
// Sometimes IDA Pro sees a call to a NORET function and decides
// to not include the dead code after it in the function. That
// dead code includes the return instruction, so the function no
// longer includes a return instruction and terminates with a CALL.
if (FirstInBlock != this->Instrs.end()) {
LastInBlock = --(this->Instrs.end());
SMPBasicBlock CurrBlock = SMPBasicBlock(FirstInBlock, LastInBlock);
CurrBlock.Analyze();
// If not the first chunk in the function, it is a shared
// tail chunk.
if (ChunkCounter > 1) {
CurrBlock.SetShared();
}
FirstInBlock = this->Instrs.end();
LastInBlock = this->Instrs.end();
this->Blocks.push_back(CurrBlock);
}
} // end for (bool ChunkOK = ...)
// Set up basic block links and map of instructions to blocks.
if (!(this->HasSharedChunks())) {
this->SetLinks();
this->LiveVariableAnalysis();
}
#if SMP_DEBUG_CONTROLFLOW
msg("SMPFunction::Analyze: set stack frame info.\n");
#endif
// Figure out the stack frame and related info.
this->SetStackFrameInfo();
return;
} // end of SMPFunction::Analyze()
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
// Link basic blocks to their predecessors and successors, and build the map
// of instruction addresses to basic blocks.
void SMPFunction::SetLinks(void) {
list<SMPBasicBlock>::iterator CurrBlock;
#if SMP_DEBUG_DATAFLOW
msg("SetLinks called for %s\n", this->GetFuncName());
#endif
// First, set up the map of instructions to basic blocks.
for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) {
list<list<SMPInstr>::iterator>::iterator CurrInst;
for (CurrInst = CurrBlock->GetFirstInstr();
CurrInst != CurrBlock->GetLastInstr();
++CurrInst) {
pair<ea_t, list<SMPBasicBlock>::iterator> MapItem((*CurrInst)->GetAddr(),CurrBlock);
InstBlockMap.insert(MapItem);
}
}
#if SMP_DEBUG_DATAFLOW
msg("SetLinks finished mapping: %s\n", this->GetFuncName());
#endif
// Next, set successors of each basic block, also setting up the predecessors in the
// process.
for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) {
list<SMPInstr>::iterator CurrInst = *(--(CurrBlock->GetLastInstr()));
// Last instruction in block; set successors
xrefblk_t CurrXrefs;
for (bool ok = CurrXrefs.first_from(CurrInst->GetAddr(), XREF_ALL);
ok;
ok = CurrXrefs.next_from()) {
if ((CurrXrefs.to != 0) && (CurrXrefs.iscode)) {
// Found a code target, with its address in CurrXrefs.to
map<ea_t, list<SMPBasicBlock>::iterator>::iterator MapEntry;
MapEntry = this->InstBlockMap.find(CurrXrefs.to);
if (MapEntry == this->InstBlockMap.end()) {
msg("WARNING: addr %x not found in map for %s\n", CurrXrefs.to,
this->GetFuncName());
}
else {
list<SMPBasicBlock>::iterator Target = MapEntry->second;
// Make target block a successor of current block.
CurrBlock->LinkToSucc(Target);
// Make current block a predecessor of target block.
Target->LinkToPred(CurrBlock);
}
}
} // end for all xrefs
} // end for all blocks
return;
} // end of SMPFunction::SetLinks()
// Perform live variable analysis on all blocks in the function.
// See chapter 9 of Cooper/Torczon, Engineering a Compiler, for the algorithm.
void SMPFunction::LiveVariableAnalysis(void) {
list<SMPBasicBlock>::iterator CurrBlock;
msg("LiveVariableAnalysis for %s\n", this->GetFuncName());
for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) {
// Initialize the Killed and UpwardExposed sets for each block.
CurrBlock->InitKilledExposed();
}
bool changed;
// Iterate over each block, updating LiveOut sets until no more changes are made.
// NOTE: Would be more efficient if we computed a reverse post-order list of blocks
// and traversed this loop in reverse post-order. **!!**
do {
changed = false;
for (CurrBlock = this->Blocks.begin(); CurrBlock != this->Blocks.end(); ++CurrBlock) {
changed |= CurrBlock->UpdateLiveOut();
}
} while (changed);
return;
} // end of SMPFunction::LiveVariableAnalysis()
// Emit all annotations for the function, including all per-instruction
// annotations.
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
void SMPFunction::EmitAnnotations(FILE *AnnotFile) {
// Emit annotation for the function as a whole.
if (this->StaticFunc) {
qfprintf(AnnotFile, "%x %d FUNC LOCAL %s ", FuncInfo->startEA,
this->Size, this->FuncName);
}
else {
qfprintf(AnnotFile, "%x %d FUNC GLOBAL %s ", FuncInfo->startEA,
this->Size, this->FuncName);
}
if (this->UseFP) {
qfprintf(AnnotFile, "USEFP ");
}
else {
qfprintf(AnnotFile, "NOFP ");
}
if (this->FuncInfo->does_return()) {
qfprintf(AnnotFile, "\n");
}
else {
qfprintf(AnnotFile, "NORET \n");
}
// Loop through all instructions in the function.
// Output optimization annotations for those
// instructions that do not require full computation
// of their memory metadata by the Memory Monitor SDT.
list<SMPInstr>::iterator CurrInst;
bool AllocSeen = false; // Reached LocalVarsAllocInstr yet?
bool DeallocTrigger = false;
for (CurrInst = Instrs.begin(); CurrInst != Instrs.end(); ++CurrInst) {
ea_t addr = CurrInst->GetAddr();
if (this->LocalVarsAllocInstr == addr) {
this->EmitStackFrameAnnotations(AnnotFile, CurrInst);
}
// If this is the instruction which deallocated space
// for local variables, we set a flag to remind us to
// emit an annotation on the next instruction.
// mmStrata wants the instruction AFTER the
// deallocating instruction, so that it processes
// the deallocation after it happens. It inserts
// instrumentation before an instruction, not
// after, so it will insert the deallocating
// instrumentation before the first POP of callee-saved regs,
// if there are any, or before the return, otherwise.
if (addr == LocalVarsDeallocInstr) {
DeallocTrigger = true;
}
else if (DeallocTrigger) { // Time for annotation
qfprintf(AnnotFile, "%x %d DEALLOC STACK esp - %d %s\n", addr,
LocalVarsSize, LocalVarsSize, CurrInst->GetDisasm());
CurrInst->EmitAnnotations(this->UseFP, AllocSeen, AnnotFile);
} // end for (ea_t addr = FuncInfo->startEA; ...)
return;
} // end of SMPFunction::EmitAnnotations()
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
// Initialize the DFACategory[] array to define instruction classes
// for the purposes of data flow analysis.
void InitDFACategory(void) {
// Default category is 0, not the start or end of a basic block.
(void) memset(DFACategory, 0, sizeof(DFACategory));
DFACategory[NN_call] = CALL; // Call Procedure
DFACategory[NN_callfi] = INDIR_CALL; // Indirect Call Far Procedure
DFACategory[NN_callni] = INDIR_CALL; // Indirect Call Near Procedure
DFACategory[NN_hlt] = HALT; // Halt
DFACategory[NN_int] = CALL; // Call to Interrupt Procedure
DFACategory[NN_into] = CALL; // Call to Interrupt Procedure if Overflow Flag = 1
DFACategory[NN_int3] = CALL; // Trap to Debugger
DFACategory[NN_iretw] = RETURN; // Interrupt Return
DFACategory[NN_iret] = RETURN; // Interrupt Return
DFACategory[NN_iretd] = RETURN; // Interrupt Return (use32)
DFACategory[NN_iretq] = RETURN; // Interrupt Return (use64)
DFACategory[NN_ja] = COND_BRANCH; // Jump if Above (CF=0 & ZF=0)
DFACategory[NN_jae] = COND_BRANCH; // Jump if Above or Equal (CF=0)
DFACategory[NN_jb] = COND_BRANCH; // Jump if Below (CF=1)
DFACategory[NN_jbe] = COND_BRANCH; // Jump if Below or Equal (CF=1 | ZF=1)
DFACategory[NN_jc] = COND_BRANCH; // Jump if Carry (CF=1)
DFACategory[NN_jcxz] = COND_BRANCH; // Jump if CX is 0
DFACategory[NN_jecxz] = COND_BRANCH; // Jump if ECX is 0
DFACategory[NN_jrcxz] = COND_BRANCH; // Jump if RCX is 0
DFACategory[NN_je] = COND_BRANCH; // Jump if Equal (ZF=1)
DFACategory[NN_jg] = COND_BRANCH; // Jump if Greater (ZF=0 & SF=OF)
DFACategory[NN_jge] = COND_BRANCH; // Jump if Greater or Equal (SF=OF)
DFACategory[NN_jl] = COND_BRANCH; // Jump if Less (SF!=OF)
DFACategory[NN_jle] = COND_BRANCH; // Jump if Less or Equal (ZF=1 | SF!=OF)
DFACategory[NN_jna] = COND_BRANCH; // Jump if Not Above (CF=1 | ZF=1)
DFACategory[NN_jnae] = COND_BRANCH; // Jump if Not Above or Equal (CF=1)
DFACategory[NN_jnb] = COND_BRANCH; // Jump if Not Below (CF=0)
DFACategory[NN_jnbe] = COND_BRANCH; // Jump if Not Below or Equal (CF=0 & ZF=0)
DFACategory[NN_jnc] = COND_BRANCH; // Jump if Not Carry (CF=0)
DFACategory[NN_jne] = COND_BRANCH; // Jump if Not Equal (ZF=0)
DFACategory[NN_jng] = COND_BRANCH; // Jump if Not Greater (ZF=1 | SF!=OF)
DFACategory[NN_jnge] = COND_BRANCH; // Jump if Not Greater or Equal (ZF=1)
DFACategory[NN_jnl] = COND_BRANCH; // Jump if Not Less (SF=OF)
DFACategory[NN_jnle] = COND_BRANCH; // Jump if Not Less or Equal (ZF=0 & SF=OF)
DFACategory[NN_jno] = COND_BRANCH; // Jump if Not Overflow (OF=0)
DFACategory[NN_jnp] = COND_BRANCH; // Jump if Not Parity (PF=0)
DFACategory[NN_jns] = COND_BRANCH; // Jump if Not Sign (SF=0)
DFACategory[NN_jnz] = COND_BRANCH; // Jump if Not Zero (ZF=0)
DFACategory[NN_jo] = COND_BRANCH; // Jump if Overflow (OF=1)
DFACategory[NN_jp] = COND_BRANCH; // Jump if Parity (PF=1)
DFACategory[NN_jpe] = COND_BRANCH; // Jump if Parity Even (PF=1)
DFACategory[NN_jpo] = COND_BRANCH; // Jump if Parity Odd (PF=0)
DFACategory[NN_js] = COND_BRANCH; // Jump if Sign (SF=1)
DFACategory[NN_jz] = COND_BRANCH; // Jump if Zero (ZF=1)
DFACategory[NN_jmp] = JUMP; // Jump
DFACategory[NN_jmpfi] = INDIR_JUMP; // Indirect Far Jump
DFACategory[NN_jmpni] = INDIR_JUMP; // Indirect Near Jump
DFACategory[NN_jmpshort] = JUMP; // Jump Short (only in 64-bit mode)
DFACategory[NN_loopw] = COND_BRANCH; // Loop while ECX != 0
DFACategory[NN_loop] = COND_BRANCH; // Loop while CX != 0
DFACategory[NN_loopd] = COND_BRANCH; // Loop while ECX != 0
DFACategory[NN_loopq] = COND_BRANCH; // Loop while RCX != 0
DFACategory[NN_loopwe] = COND_BRANCH; // Loop while CX != 0 and ZF=1
DFACategory[NN_loope] = COND_BRANCH; // Loop while rCX != 0 and ZF=1
DFACategory[NN_loopde] = COND_BRANCH; // Loop while ECX != 0 and ZF=1
DFACategory[NN_loopqe] = COND_BRANCH; // Loop while RCX != 0 and ZF=1
DFACategory[NN_loopwne] = COND_BRANCH; // Loop while CX != 0 and ZF=0
DFACategory[NN_loopne] = COND_BRANCH; // Loop while rCX != 0 and ZF=0
DFACategory[NN_loopdne] = COND_BRANCH; // Loop while ECX != 0 and ZF=0
DFACategory[NN_loopqne] = COND_BRANCH; // Loop while RCX != 0 and ZF=0
DFACategory[NN_retn] = RETURN; // Return Near from Procedure
DFACategory[NN_retf] = RETURN; // Return Far from Procedure
//
// Pentium instructions
//
DFACategory[NN_rsm] = HALT; // Resume from System Management Mode
// Pentium II instructions
DFACategory[NN_sysenter] = CALL; // Fast Transition to System Call Entry Point
DFACategory[NN_sysexit] = CALL; // Fast Transition from System Call Entry Point
// AMD syscall/sysret instructions NOTE: not AMD, found in Intel manual
DFACategory[NN_syscall] = CALL; // Low latency system call
DFACategory[NN_sysret] = CALL; // Return from system call
// VMX instructions
DFACategory[NN_vmcall] = INDIR_CALL; // Call to VM Monitor
return;
} // end InitDFACategory()