Newer
Older
//
// SMPFunction.cpp
//
// This module performs the fundamental data flow analyses needed for the
// SMP project (Software Memory Protection) at the function level.
//
#include <utility>
#include <list>
#include <set>
#include <vector>
#include <algorithm>
#include <cstring>
#include <cstdlib>
#include <pro.h>
#include <assert.h>
#include <ida.hpp>
#include <idp.hpp>
#include <auto.hpp>
#include <bytes.hpp>
#include <funcs.hpp>
#include <allins.hpp>
#include <intel.hpp>
#include <name.hpp>
#include "SMPDataFlowAnalysis.h"
#include "SMPStaticAnalyzer.h"
#include "SMPFunction.h"
#include "SMPBasicBlock.h"
#include "SMPInstr.h"
// Set to 1 for debugging output
#define SMP_DEBUG 1
#define SMP_DEBUG2 0 // verbose
#define SMP_DEBUG3 0 // verbose
#define SMP_DEBUG_CONTROLFLOW 0 // tells what processing stage is entered
#define SMP_DEBUG_XOR 0
#define SMP_DEBUG_CHUNKS 1 // tracking down tail chunks for functions
#define SMP_DEBUG_FRAMEFIXUP 0
#define SMP_DEBUG_DATAFLOW 0
#define SMP_DEBUG_STACK_GRANULARITY 0
#define SMP_DEBUG_BUILD_RTL 1
// Compute LVA/SSA or not? Turn it off for NICECAP demo on 31-JAN-2008
#define SMP_COMPUTE_LVA_SSA 1
// Compute fine-grained stack boundaries?
#define SMP_COMPUTE_STACK_GRANULARITY 1
// Basic block number 0 is the top of the CFG lattice.
#define SMP_TOP_BLOCK 0
// Set SharedTailChunks to TRUE for entire printf family
// After we restructure the parent/tail structure of the database, this
// will go away.
#define KLUDGE_VFPRINTF_FAMILY 1
// Used for binary search by function number in SMPStaticAnalyzer.cpp
// to trigger debugging output and find which instruction in which
// function is causing a crash.
bool SMPBinaryDebug = false;
// *****************************************************************
// Class SMPFunction
// *****************************************************************
// Constructor
SMPFunction::SMPFunction(func_t *Info) {
this->FuncInfo = *Info;
this->IndirectCalls = false;
this->SharedChunks = false;
this->CallsAlloca = false;
this->OutgoingArgsSize = 0;
this->LocalVarTable.clear();
this->StackFrameMap.clear();
this->DirectCallTargets.clear();
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
return;
}
// Figure out the different regions of the stack frame, and find the
// instructions that allocate and deallocate the local variables space
// on the stack frame.
// The stack frame info will be used to emit stack
// annotations when Analyze() reaches the stack allocation
// instruction that sets aside space for local vars.
// Set the address of the instruction at which these
// annotations should be emitted. This should normally
// be an instruction such as: sub esp,48
// However, for a function with no local variables at all,
// we will need to determine which instruction should be
// considered to be the final instruction of the function
// prologue and return its address.
// Likewise, we find the stack deallocating instruction in
// the function epilogue.
void SMPFunction::SetStackFrameInfo(void) {
bool FoundAllocInstr = false;
bool FoundDeallocInstr = false;
// The sizes of the three regions of the stack frame other than the
// return address are stored in the function structure.
this->LocalVarsSize = this->FuncInfo.frsize;
this->CalleeSavedRegsSize = this->FuncInfo.frregs;
this->IncomingArgsSize = this->FuncInfo.argsize;
// The return address size can be obtained in a machine independent
// way by calling get_frame_retsize().
this->RetAddrSize = get_frame_retsize(&(this->FuncInfo));
// IDA Pro has trouble with functions that do not have any local
// variables. Unfortunately, the C library has plenty of these
// functions. IDA usually claims that frregs is zero and frsize
// is N, when the values should have been reversed. We can attempt
// to detect this and fix it.
bool FrameInfoFixed = this->MDFixFrameInfo();
#if SMP_DEBUG_CONTROLFLOW
msg("Returned from MDFixFrameInfo()\n");
#endif
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#if SMP_DEBUG_FRAMEFIXUP
if (FrameInfoFixed) {
msg("Fixed stack frame size info: %s\n", this->FuncName);
SMPBasicBlock CurrBlock = this->Blocks.front();
msg("First basic block:\n");
for (list<list<SMPInstr>::iterator>::iterator CurrInstr = CurrBlock.GetFirstInstr();
CurrInstr != CurrBlock.GetLastInstr();
++CurrInstr) {
msg("%s\n", (*CurrInstr)->GetDisasm());
}
}
#endif
// Now, if LocalVarsSize is not zero, we need to find the instruction
// in the function prologue that allocates space on the stack for
// local vars. This code could be made more robust in the future
// by matching LocalVarsSize to the immediate value in the allocation
// instruction. However, IDA Pro is sometimes a little off on this
// number. **!!**
if (0 < this->LocalVarsSize) {
for (list<SMPInstr>::iterator CurrInstr = this->Instrs.begin();
CurrInstr != this->Instrs.end();
++CurrInstr) {
ea_t addr = CurrInstr->GetAddr();
// Keep the most recent instruction in the DeallocInstr
// in case we reach the return without seeing a dealloc.
if (!FoundDeallocInstr) {
this->LocalVarsDeallocInstr = addr;
}
if (!FoundAllocInstr
&& CurrInstr->MDIsFrameAllocInstr()) {
#if SMP_DEBUG_CONTROLFLOW
msg("Returned from MDIsFrameAllocInstr()\n");
#endif
this->LocalVarsAllocInstr = addr;
FoundAllocInstr = true;
// As soon as we have found the local vars allocation,
// we can try to fix incorrect sets of UseFP by IDA.
// NOTE: We might want to extend this in the future to
// handle functions that have no locals. **!!**
bool FixedUseFP = MDFixUseFP();
#if SMP_DEBUG_CONTROLFLOW
msg("Returned from MDFixUseFP()\n");
#endif
#if SMP_DEBUG_FRAMEFIXUP
if (FixedUseFP) {
msg("Fixed UseFP in %s\n", this->FuncName);
}
#endif
}
else if (FoundAllocInstr) {
// We can now start searching for the DeallocInstr.
if (CurrInstr->MDIsFrameDeallocInstr(UseFP, this->LocalVarsSize)) {
// Keep saving the most recent addr that looks
// like the DeallocInstr until we reach the
// end of the function. Last one to look like
// it is used as the DeallocInstr.
#if SMP_DEBUG_CONTROLFLOW
msg("Returned from MDIsFrameDeallocInstr()\n");
#endif
this->LocalVarsDeallocInstr = addr;
FoundDeallocInstr = true;
}
}
} // end for (list<SMPInstr>::iterator CurrInstr ... )
if (!FoundAllocInstr) {
// Could not find the frame allocating instruction. Bad.
// See if we can find the point at which the stack allocation reaches
// a total of FuncInfo.frsize, regardless of whether it happened by push
// instructions or some other means.
this->LocalVarsAllocInstr = this->FindAllocPoint(this->FuncInfo.frsize + this->FuncInfo.frregs);
#if SMP_DEBUG_CONTROLFLOW
msg("Returned from FindAllocPoint()\n");
#endif
#if SMP_DEBUG_FRAMEFIXUP
if (BADADDR == this->LocalVarsAllocInstr) {
msg("ERROR: Could not find stack frame allocation in %s\n",
FuncName);
msg("LocalVarsSize: %d SavedRegsSize: %d ArgsSize: %d\n",
LocalVarsSize, CalleeSavedRegsSize, IncomingArgsSize);
}
else {
msg("FindAllocPoint found %x for function %s\n",
this->LocalVarsAllocInstr, this->GetFuncName());
}
#endif
}
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
if (!FoundDeallocInstr) {
// Could not find the frame deallocating instruction. Bad.
// Emit diagnostic and use the last instruction in the
// function.
msg("ERROR: Could not find stack frame deallocation in %s\n",
FuncName);
}
#endif
}
// else LocalVarsSize was zero, meaning that we need to search
// for the end of the function prologue code and emit stack frame
// annotations from that address (i.e. this method returns that
// address). We will approximate this by finding the end of the
// sequence of PUSH instructions at the beginning of the function.
// The last PUSH instruction should be the last callee-save-reg
// instruction. We can make this more robust in the future by
// making sure that we do not count a PUSH of anything other than
// a register. **!!**
// NOTE: 2nd prologue instr is usually mov ebp,esp
// THE ASSUMPTION THAT WE HAVE ONLY PUSH INSTRUCTIONS BEFORE
// THE ALLOCATING INSTR IS ONLY TRUE WHEN LOCALVARSSIZE == 0;
else {
ea_t SaveAddr = this->FuncInfo.startEA;
for (list<SMPInstr>::iterator CurrInstr = this->Instrs.begin();
CurrInstr != this->Instrs.end();
++CurrInstr) {
insn_t CurrCmd = CurrInstr->GetCmd();
ea_t addr = CurrInstr->GetAddr();
if (CurrCmd.itype == NN_push)
SaveAddr = addr;
else
break;
}
this->LocalVarsAllocInstr = SaveAddr;
this->LocalVarsDeallocInstr = 0;
} // end if (LocalVarsSize > 0) ... else ...
#if 0
// Now we need to do the corresponding operations from the
// end of the function to find the DeallocInstr in the
// function epilogue. Because there is no addition to the
// stack pointer to deallocate the local vars region, the
// function epilogue will consist of (optional) pops of
// callee-saved regs, followed by the return instruction.
// Working backwards, we should find a return and then
// stop when we do not find any more pops.
if (0 >= LocalVarsSize) {
this->LocalVarsDeallocInstr = NULL;
}
else {
SaveAddr = this->FuncInfo.endEA - 1;
bool FoundRet = false;
do {
ea_t addr = get_item_head(SaveAddr);
flags_t InstrFlags = getFlags(addr);
if (isCode(addr) && isHead(addr)) {
ua_ana0(addr);
if (!FoundRet) { // Just starting out.
if (MDIsReturnInstr(cmd)) {
FoundRet = true;
SaveAddr = addr - 1;
}
else {
msg("ERROR: Last instruction not a return.\n");
}
}
else { // Should be 0 or more POPs before the return.
if (MDIsPopInstr(cmd)) {
SaveAddr = addr - 1;
}
else if (FrameAllocInstr(cmd, this->LocalVarsSize)) {
this->LocalVarsDeallocInstr = addr;
}
else {
msg("ERROR: Frame deallocation not prior to POPs.\n");
this->LocalVarsDeallocInstr = SaveAddr + 1;
}
} // end if (!FoundRet) ... else ...
}
else {
--SaveAddr;
} // end if (isCode(addr) && isHead(addr))
} while (NULL == this->LocalVarsDeallocInstr);
} // end if (0 >= this->LocalVarsSize)
#endif // 0
this->CallsAlloca = this->FindAlloca();
#if SMP_COMPUTE_STACK_GRANULARITY
// Now, find the boundaries between local variables.
this->BuildLocalVarTable();
#endif
return;
} // end of SMPFunction::SetStackFrameInfo()
// IDA Pro defines the sizes of regions in the stack frame in a way
// that suits its purposes but not ours. The frsize field of the func_info_t
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
// structure measures the distance between the stack pointer and the
// frame pointer (ESP and EBP in the x86). This region includes some
// of the callee-saved registers. So, the frregs field only includes
// the callee-saved registers that are above the frame pointer.
// x86 standard prologue on gcc/linux:
// push ebp ; save old frame pointer
// mov ebp,esp ; new frame pointer = current stack pointer
// push esi ; callee save reg
// push edi ; callee save reg
// sub esp,34h ; allocate 52 bytes for local variables
//
// Notice that EBP acquires its final frame pointer value AFTER the
// old EBP has been pushed. This means that, of the three callee saved
// registers, one is above where EBP points and two are below.
// IDA Pro is concerned with generating readable addressing expressions
// for items on the stack. None of the callee-saved regs will ever
// be addressed in the function; they will be dormant until they are popped
// off the stack in the function epilogue. In order to create readable
// disassembled code, IDA defines named constant offsets for locals. These
// offsets are negative values (x86 stack grows downward from EBP toward
// ESP). When ESP_relative addressing occurs, IDA converts a statement:
// mov eax,[esp+12]
// into the statement:
// mov eax,[esp+3Ch+var_30]
// Here, 3Ch == 60 decimal is the distance between ESP and EBP, and
// var_30 is defined to ahve the value -30h == -48 decimal. So, the
// "frame size" in IDA Pro is 60 bytes, and a certain local can be
// addressed in ESP-relative manner as shown, or as [ebp+var_30] for
// EBP-relative addressing. The interactive IDA user can then edit
// the name var_30 to something mnemonic, such as "virus_size", and IDA
// will replace all occurrences with the new name, so that code references
// automatically become [ebp+virus_size]. As the user proceeds
// interactively, he eventually produces very understandable code.
// This all makes sense for producing readable assembly text. However,
// our analyses have a compiler perspective as well as a memory access
// defense perspective. SMP distinguishes between callee saved regs,
// which should not be overwritten in the function body, and local
// variables, which can be written. We view the stack frame in logical
// pieces: here are the saved regs, here are the locals, here is the
// return address, etc. We don't care which direction from EBP the
// callee-saved registers lie; we don't want to lump them in with the
// local variables. We also don't like the fact that IDA Pro will take
// the function prologue code shown above and declare frregs=4 and
// frsize=60, because frsize no longer matches the stack allocation
// statement sub esp,34h == sub esp,52. We prefer frsize=52 and frregs=12.
// So, the task of this function is to fix these stack sizes in our
// private data members for the function, while leaving the IDA database
// alone because IDA needs to maintain its own definitions of these
// variables.
// Fixing means we will update the data members LocalVarsSize and
// CalleeSavedRegsSize.
// NOTE: This function is both machine dependent and platform dependent.
// The prologue and epilogue code generated by gcc-linux is as discussed
// above, while on Visual Studio and other Windows x86 compilers, the
// saving of registers other than EBP happens AFTER local stack allocation.
// A Windows version of the function would expect to see the pushing
// of ESI and EDI AFTER the sub esp,34h statement.
bool SMPFunction::MDFixFrameInfo(void) {
int SavedRegsSize = 0;
int OtherPushesSize = 0; // besides callee-saved regs
int NewLocalsSize = 0;
int OldFrameTotal = this->CalleeSavedRegsSize + this->LocalVarsSize;
bool Changed = false;
// Iterate through the first basic block in the function. If we find
// a frame allocating Instr in it, then we have local vars. If not,
// we don't, and LocalVarsSize should have been zero. Count the callee
// register saves leading up to the local allocation. Set data members
// according to what we found if the values of the data members would
// change.
SMPBasicBlock CurrBlock = this->Blocks.front();
for (list<list<SMPInstr>::iterator>::iterator CurrIter = CurrBlock.GetFirstInstr();
CurrIter != CurrBlock.GetLastInstr();
++CurrIter) {
list<SMPInstr>::iterator CurrInstr = *CurrIter;
if (CurrInstr->MDIsPushInstr()) {
// We will make the gcc-linux assumption that a PUSH in
// the first basic block, prior to the stack allocating
// instruction, is a callee register save. To make this
// more robust, we ensure that the register is from
// the callee saved group of registers, and that it has
// not been defined thus far in the function (else it might
// be a push of an outgoing argument to a call that happens
// in the first block when there are no locals). **!!!!**
if (CurrInstr->MDUsesCalleeSavedReg()
&& !CurrInstr->HasSourceMemoryOperand()) {
SavedRegsSize += 4; // **!!** should check the size
}
else {
// Pushes of outgoing args can be scheduled so that
// they are mixed with the pushes of callee saved regs.
OtherPushesSize += 4;
}
}
else if (CurrInstr->MDIsFrameAllocInstr()) {
SavedRegsSize += OtherPushesSize;
// Get the size being allocated.
set<DefOrUse, LessDefUse>::iterator CurrUse;
for (CurrUse = CurrInstr->GetFirstUse(); CurrUse != CurrInstr->GetLastUse(); ++CurrUse) {
// Find the immediate operand.
if (o_imm == CurrUse->GetOp().type) {
// Get its value into LocalVarsSize.
long AllocValue = (signed long) CurrUse->GetOp().value;
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
// One compiler might have sub esp,24 and another
// might have add esp,-24. Take the absolute value.
if (0 > AllocValue)
AllocValue = -AllocValue;
if (AllocValue != (long) this->LocalVarsSize) {
Changed = true;
#if SMP_DEBUG_FRAMEFIXUP
if (AllocValue + SavedRegsSize != OldFrameTotal)
msg("Total frame size changed: %s\n", this->FuncName);
#endif
this->LocalVarsSize = (asize_t) AllocValue;
this->CalleeSavedRegsSize = (ushort) SavedRegsSize;
NewLocalsSize = this->LocalVarsSize;
}
else { // Old value was correct; no change.
NewLocalsSize = this->LocalVarsSize;
if (SavedRegsSize != this->CalleeSavedRegsSize) {
this->CalleeSavedRegsSize = (ushort) SavedRegsSize;
Changed = true;
#if SMP_DEBUG_FRAMEFIXUP
msg("Only callee regs size changed: %s\n", this->FuncName);
#endif
}
}
} // end if (o_imm == ...)
} // end for all uses
break; // After frame allocation instr, we are done
} // end if (push) .. elsif frame allocating instr
} // end for all instructions in the first basic block
// If we did not find an allocating instruction, see if it would keep
// the total size the same to set LocalVarsSize to 0 and to set
// CalleeSavedRegsSize to SavedRegsSize. If so, do it. If not, we
// might be better off to leave the numbers alone.
if (!Changed && (NewLocalsSize == 0)) {
if (OldFrameTotal == SavedRegsSize) {
this->CalleeSavedRegsSize = SavedRegsSize;
this->LocalVarsSize = 0;
Changed = true;
}
#if SMP_DEBUG_FRAMEFIXUP
else {
msg("Could not update frame sizes: %s\n", this->FuncName);
}
#endif
}
#if SMP_DEBUG_FRAMEFIXUP
if ((0 < OtherPushesSize) && (0 < NewLocalsSize))
msg("Extra pushes found of size %d in %s\n", OtherPushesSize,
this->FuncName);
#endif
return Changed;
} // end of SMPFunction::MDFixFrameInfo()
// Some functions have difficult to find stack allocations. For example, in some
// version of glibc, strpbrk() zeroes out register ECX and then pushes it more than
// 100 times in order to allocate zero-ed out local vars space for a character translation
// table. We will use the stack pointer analysis of IDA to find out if there is a point
// in the first basic block at which the stack pointer reaches the allocation total
// that IDA is expecting for the local vars region.
// If so, we return the address of the instruction at which ESP reaches its value, else
// we return BADADDR.
ea_t SMPFunction::FindAllocPoint(asize_t OriginalLocSize) {
bool DebugFlag = (0 == strncmp("strpbrk", this->GetFuncName(), 7));
sval_t TargetSize = - ((sval_t) OriginalLocSize); // negate; stack grows down
#if SMP_DEBUG_FRAMEFIXUP
if (DebugFlag)
msg("strpbrk OriginalLocSize: %d\n", OriginalLocSize);
#endif
// Limit our analysis to the first basic block in the function.
list<SMPInstr>::iterator CurrInstr;
for (CurrInstr = this->Instrs.begin(); CurrInstr != this->Instrs.end(); ++CurrInstr) {
ea_t addr = CurrInstr->GetAddr();
// get_spd() returns a cumulative delta of ESP
sval_t sp_delta = get_spd(&(this->FuncInfo), addr);
#if SMP_DEBUG_FRAMEFIXUP
if (DebugFlag)
msg("strpbrk delta: %d at %x\n", sp_delta, addr);
if (sp_delta == TargetSize) {
// Previous instruction hit the frame size.
if (CurrInstr == *(this->Blocks.front().GetFirstInstr())) {
return BADADDR; // cannot back up from first instruction
}
else {
return (--CurrInstr)->GetAddr();
if (CurrInstr->IsLastInBlock()) {
// It could be that the current instruction will cause the stack pointer
// delta to reach the TargetSize. sp_delta is not updated until after the
// current instruction, so we need to look ahead one instruction if the
// current block falls through. On the other hand, if the current block
// ends with a jump or return, we cannot hit TargetSize.
if (CurrInstr->IsBasicBlockTerminator())
return BADADDR;
list<SMPInstr>::iterator NextInstr = CurrInstr;
++NextInstr;
if (NextInstr == this->Instrs.end())
return BADADDR;
sp_delta = get_spd(&(this->FuncInfo), NextInstr->GetAddr());
if (sp_delta == TargetSize) {
// CurrInstr will cause stack pointer delta to hit TargetSize.
return addr;
}
else {
return BADADDR;
}
} // end if LastInBlock
} // end for all instructions
#if SMP_DEBUG_FRAMEFIXUP
else {
msg("AnalyzedSP is false for %s\n", this->GetFuncName());
}
#endif
return BADADDR;
} // end of SMPFunction::FindAllocPoint()
// IDA Pro is sometimes confused by a function that uses the frame pointer
// register for other purposes. For the x86, a function that uses EBP
// as a frame pointer would begin with: push ebp; mov ebp,esp to save
// the old value of EBP and give it a new value as a frame pointer. The
// allocation of local variable space would have to come AFTER the move
// instruction. A function that begins: push ebp; push esi; sub esp,24
// is obviously not using EBP as a frame pointer. IDA is apparently
// confused by the push ebp instruction being the first instruction
// in the function. We will reset UseFP to false in this case.
// The inverse problem happens with a function that begins with instructions
// other than push ebp; mov ebp,esp; ... etc. but eventually has those
// instructions in the first basic block. For example, a C compiler generates
// for the first block of main():
// lea ecx,[esp+arg0]
// and esp, 0xfffffff0
// push dword ptr [ecx-4]
// push ebp
// mov ebp,esp
// push ecx
// sub esp,<framesize>
//
// This function is obviously using EBP as a frame pointer, but IDA Pro marks
// the function as not using a frame pointer. We will reset UseFP to true in
// this case.
// NOTE: This logic should work for both Linux and Windows x86 prologues.
bool SMPFunction::MDFixUseFP(void) {
list<SMPInstr>::iterator CurrInstr = this->Instrs.begin();
ea_t addr = CurrInstr->GetAddr();
if (!(this->UseFP)) {
// See if we can detect the instruction "push ebp" followed by the instruction
// "mov ebp,esp" in the first basic block. The instructions do not have to be
// consecutive. If we find them, we will reset UseFP to true.
bool FirstBlockProcessed = false;
bool EBPSaved = false;
bool ESPintoEBP = false;
do {
FirstBlockProcessed = CurrInstr->IsLastInBlock();
if (!EBPSaved) { // still looking for "push ebp"
if (CurrInstr->MDIsPushInstr() && CurrInstr->GetCmd().Operands[0].is_reg(R_bp)) {
EBPSaved = true;
}
}
else if (!ESPintoEBP) { // found "push ebp", looking for "mov ebp,esp"
insn_t CurrCmd = CurrInstr->GetCmd();
if ((CurrCmd.itype == NN_mov)
&& (CurrInstr->GetFirstDef()->GetOp().is_reg(R_bp))
&& (CurrInstr->GetFirstUse()->GetOp().is_reg(R_sp))) {
ESPintoEBP = true;
FirstBlockProcessed = true; // exit loop
}
}
++CurrInstr;
addr = CurrInstr->GetAddr();
// We must get EBP set to its frame pointer value before we reach the
// local frame allocation instruction (i.e. the subtraction of locals space
// from the stack pointer).
FirstBlockProcessed |= (addr >= this->LocalVarsAllocInstr);
} while (!FirstBlockProcessed);
// If we found ESPintoEBP, we also found EBPSaved first, and we need to change
// this->UseFP to true and return true. Otherwise, return false.
this->UseFP = ESPintoEBP;
return ESPintoEBP;
} // end if (!(this->UseFP))
// At this point, this->UseFP must have been true on entry to this method and we will
// check whether it should be reset to false.
while (addr < this->LocalVarsAllocInstr) {
set<DefOrUse, LessDefUse>::iterator CurrDef = CurrInstr->GetFirstDef();
while (CurrDef != CurrInstr->GetLastDef()) {
if (CurrDef->GetOp().is_reg(R_bp))
return false; // EBP got set before locals were allocated
}
++CurrInstr;
addr = CurrInstr->GetAddr();
}
// If we found no defs of the frame pointer before the local vars
// allocation, then the frame pointer register is not being used
// as a frame pointer, just as a general callee-saved register.
this->UseFP = false;
return true;
} // end of SMPFunction::MDFixUseFP()
// Determine local variable boundaries in the stack frame.
void SMPFunction::BuildLocalVarTable(void) {
// Currently we just use the info that IDA Pro has inferred from the direct
// addressing of stack locations.
this->SemiNaiveLocalVarID();
return;
} // end of SMPFunction::BuildLocalVarTable()
// Use the local variable offset list from IDA's stack frame structure to compute
// the table of local variable boundaries.
void SMPFunction::SemiNaiveLocalVarID(void) {
// NOTE: We use IDA Pro's offsets from this->FuncInfo (e.g. frsize) and NOT
// our own corrected values in our private data members. The offsets we
// read from the stack frame structure returned by get_frame() are consistent
// with other IDA Pro values, not with our corrected values.
bool DebugFlag = false;
#if SMP_DEBUG_STACK_GRANULARITY
DebugFlag |= (0 == strcmp("qSort3", this->GetFuncName()));
#endif
func_t *FuncPtr = get_func(this->FuncInfo.startEA);
if (NULL == FuncPtr) {
msg("ERROR in SMPFunction::SemiNaiveLocalVarID; no func ptr\n");
}
assert(NULL != FuncPtr);
struc_t *StackFrame = get_frame(FuncPtr);
if (NULL == StackFrame) {
msg("WARNING: No stack frame info from get_frame for %s\n", this->GetFuncName());
return;
}
member_t *Member = StackFrame->members;
for (size_t i = 0; i < StackFrame->memqty; ++i, ++Member) {
long offset;
char MemberName[MAXSTR] = {'\0'};
if (NULL == Member) {
msg("NULL stack frame member pointer in %s\n", this->GetFuncName());
break;
}
get_member_name(Member->id, MemberName, MAXSTR - 1);
if (MemberName == NULL) {
#if SMP_DEBUG_STACK_GRANULARITY
msg("NULL stack frame member in %s\n", this->GetFuncName());
continue;
}
offset = Member->soff;
if (MemberName[0] == ' ') {
#if SMP_DEBUG_STACK_GRANULARITY
msg("NULL stack frame name at offset %d in %s\n", offset, this->GetFuncName());
MemberName[1] = '\0';
}
if (DebugFlag) {
msg("%s local var %s at offset %d\n", this->GetFuncName(), MemberName, offset);
}
if (offset >= (long) this->LocalVarsSize)
break; // Stop after processing locals and outgoing args
#if 0
// We want the offset from the stack pointer after local frame allocation.
// This subtraction would make it relative to the original stack pointer.
offset -= this->FuncInfo.frsize;
#endif
struct LocalVar TempLocal;
TempLocal.offset = offset;
TempLocal.size = -1; // compute later
qstrncpy(TempLocal.VarName, MemberName, MAXSTR - 1);
this->LocalVarTable.push_back(TempLocal);
} // end for all stack frame members
if (this->LocalVarTable.empty())
return;
#if SMP_DEBUG_STACK_GRANULARITY
msg("Computing %d local var sizes\n", this->LocalVarTable.size());
// Now we want to fill in the size field for each local
for (size_t VarIndex = 0; VarIndex < (this->LocalVarTable.size() - 1); ++VarIndex) {
this->LocalVarTable[VarIndex].size = this->LocalVarTable[VarIndex + 1].offset
- this->LocalVarTable[VarIndex].offset;
}
#if SMP_DEBUG_STACK_GRANULARITY
msg("Computing last local var size for frsize %d\n", this->FuncInfo.frsize);
#endif
// Size of last local is total frsize minus savedregs in frame minus offset of last local
if (this->LocalVarTable.size() > 0) {
size_t SavedRegsSpace = 0; // portion of frsize that is saved regs, not locals.
if (this->CalleeSavedRegsSize > this->FuncInfo.frregs) {
// IDA Pro counts the save of EBP in frregs, but then EBP gets its new
// value and callee saved regs other than the old EBP push get counted
// in frsize rather than frregs. CalleeSavedRegsSize includes all saved
// regs on the stack, both above and below the current EBP offset.
// NOTE: For windows, this has to be done differently, as callee saved regs
// happen at the bottom of the local frame, not the top.
#if 0
SavedRegsSpace = this->CalleeSavedRegsSize - this->FuncInfo.frregs;
#else
SavedRegsSpace = this->FuncInfo.frsize - this->LocalVarsSize;
#endif
this->LocalVarTable[this->LocalVarTable.size() - 1].size = this->FuncInfo.frsize
- SavedRegsSpace - this->LocalVarTable[this->LocalVarTable.size() - 1].offset;
}
this->LocalVarOffsetLimit = this->LocalVarTable.back().offset
+ (adiff_t) this->LocalVarTable.back().size;
assert(this->LocalVarOffsetLimit <= (adiff_t) this->FuncInfo.frsize);
// Find out how many of the locals are really outgoing args.
if (this->AnalyzedSP && !this->CallsAlloca && (BADADDR != this->LocalVarsAllocInstr)) {
this->FindOutgoingArgsSize();
}
else {
msg("FindOutgoingArgsSize not called for %s ", this->GetFuncName());
msg("AnalyzedSP: %d CallsAlloca: %d LocalVarsAllocInstr: %x \n",
this->AnalyzedSP, this->CallsAlloca, this->LocalVarsAllocInstr);
}
return;
} // end of SMPFunction::SemiNaiveLocalVarID()
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
// Determine how many bytes at the bottom of the stack frame (i.e. at bottom of
// this->LocalVarsSize) are used for outgoing args. This is the case when the cdecl
// calling convention is used, e.g. gcc/linux allocates local var space + out args space
// in a single allocation and then writes outarg values directly to ESP+0, ESP+4, etc.
void SMPFunction::FindOutgoingArgsSize(void) {
// Compute the lowest value reached by the stack pointer.
list<SMPInstr>::iterator CurrInst;
this->MinStackDelta = 20000; // Final value should be negative
bool DebugFlag = false;
#if SMP_DEBUG_STACK_GRANULARITY
DebugFlag = (0 == strcmp("simtest", this->GetFuncName()));
#endif
for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) {
ea_t addr = CurrInst->GetAddr();
sval_t sp_delta = get_spd(&(this->FuncInfo), addr);
if (sp_delta < this->MinStackDelta)
this->MinStackDelta = sp_delta;
if (addr == this->LocalVarsAllocInstr) {
// Total stack pointer delta is sp_delta for the next instruction,
// because IDA updates the sp delta AFTER each instruction.
list<SMPInstr>::iterator NextInst = CurrInst;
++NextInst;
sp_delta = get_spd(&(this->FuncInfo), NextInst->GetAddr());
this->AllocPointDelta = sp_delta;
}
}
#if SMP_DEBUG_STACK_GRANULARITY
msg("AllocPointDelta: %d MinStackDelta: %d\n", this->AllocPointDelta, this->MinStackDelta);
#endif
assert(0 > this->MinStackDelta);
// Allocate a vector of stack frame entries, one for each byte of the stack frame.
// This will be our memory map for analyzing stack usage.
int limit = 0;
#if 1
if (this->LocalVarOffsetLimit > 0)
limit = this->LocalVarOffsetLimit;
#endif
for (int i = this->MinStackDelta; i < limit; ++i) {
struct StackFrameEntry TempEntry;
TempEntry.VarPtr = NULL;
TempEntry.offset = (long) i;
TempEntry.Read = false;
TempEntry.Written = false;
TempEntry.AddressTaken = false;
TempEntry.ESPRelativeAccess = false;
TempEntry.EBPRelativeAccess = false;
this->StackFrameMap.push_back(TempEntry);
}
// Fill in the VarPtr fields for each StackFrameMap entry.
assert(0 > this->AllocPointDelta);
for (size_t i = 0; i < this->LocalVarTable.size(); ++i) {
assert(this->LocalVarTable.at(i).offset >= 0);
// Picture that AllocPointDelta is -200, MinStackDelta is -210, and
// the LocalVarTable[i].offset is +8 (i.e. 8 bytes above alloc point).
// Then base = 8 + (-200 - -210) = 8 + 10 = 18, the proper offset into
// the StackFrameMap.
size_t base = (size_t) (this->LocalVarTable.at(i).offset
+ (this->AllocPointDelta - this->MinStackDelta));
size_t limit = base + this->LocalVarTable.at(i).size;
if (limit > this->StackFrameMap.size()) {
msg("ERROR: base = %d limit = %d StackFrameMap size = %d\n", base, limit,
this->StackFrameMap.size());
}
assert(limit <= this->StackFrameMap.size());
for (size_t MapIndex = base; MapIndex < limit; ++MapIndex) {
this->StackFrameMap[MapIndex].VarPtr = &(this->LocalVarTable.at(i));
}
}
// Iterate through all instructions and record stack frame accesses in the StackFrameMap.
for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) {
sval_t sp_delta = get_spd(&(this->FuncInfo), CurrInst->GetAddr());
if (0 < sp_delta) {
// Stack underflow; about to assert
msg("Stack underflow at %x %s sp_delta: %d\n", CurrInst->GetAddr(),
CurrInst->GetDisasm(), sp_delta);
}
assert(0 >= sp_delta);
ea_t offset;
size_t DataSize;
bool UsedFramePointer;
if (CurrInst->HasDestMemoryOperand()) {
set<DefOrUse, LessDefUse>::iterator CurrDef;
for (CurrDef = CurrInst->GetFirstDef(); CurrDef != CurrInst->GetLastDef(); ++CurrDef) {
op_t TempOp = CurrDef->GetOp();
if (TempOp.type != o_phrase && TempOp.type != o_displ)
continue;
if (this->MDGetStackOffsetAndSize(TempOp, sp_delta, offset, DataSize, UsedFramePointer)) {
assert(0 <= offset);
if (offset >= this->FuncInfo.frsize)
continue; // limit processing to outgoing args and locals
if ((offset + DataSize) > this->StackFrameMap.size()) {
msg("ERROR: offset = %d DataSize = %d FrameMapSize = %d\n",
offset, DataSize, this->StackFrameMap.size());
}
assert((offset + DataSize) <= this->StackFrameMap.size());
for (int j = 0; j < (int) DataSize; ++j) {
this->StackFrameMap[offset + j].Written = true;
if (!UsedFramePointer)
this->StackFrameMap[offset + j].ESPRelativeAccess = true;
else
this->StackFrameMap[offset + j].EBPRelativeAccess = true;
}
}
} // end for all DEFs
} // end if DestMemoryOperand
if (CurrInst->HasSourceMemoryOperand()) {
set<DefOrUse, LessDefUse>::iterator CurrUse;
for (CurrUse = CurrInst->GetFirstUse(); CurrUse != CurrInst->GetLastUse(); ++CurrUse) {
op_t TempOp = CurrUse->GetOp();
if (TempOp.type != o_phrase && TempOp.type != o_displ)
continue;
if (this->MDGetStackOffsetAndSize(TempOp, sp_delta, offset, DataSize, UsedFramePointer)) {
assert(0 <= offset);
if (offset >= this->FuncInfo.frsize)
continue; // limit processing to outgoing args and locals
if ((offset + DataSize) > this->StackFrameMap.size()) {
msg("ERROR: offset = %d DataSize = %d FrameMapSize = %d\n",
offset, DataSize, this->StackFrameMap.size());
}
assert((offset + DataSize) <= this->StackFrameMap.size());
for (int j = 0; j < (int) DataSize; ++j) {
this->StackFrameMap[offset + j].Read = true;
if (!UsedFramePointer)
this->StackFrameMap[offset + j].ESPRelativeAccess = true;
else
this->StackFrameMap[offset + j].EBPRelativeAccess = true;
}
}
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
} // end if SourceMemoryOperand
// NOTE: Detect taking the address of stack locations. **!!**
} // end for all instructions
// If function is a leaf function, set OutgoingArgsSize to zero and return.
if (this->IsLeaf()) {
this->OutgoingArgsSize = 0;
return;
}
// For non-leaf functions, set the OutgoingArgsSize to the write-only, ESP-relative
// region of the bottom of the StackFrameMap.
for (size_t MapIndex = 0; MapIndex < this->StackFrameMap.size(); ++MapIndex) {
// Some of the bottom of the stack frame might be below the local frame allocation.
// These are pushes that happened after allocation, etc. We skip over these
// locations and define the outgoing args region to start strictly at the bottom
// of the local frame allocation.
struct StackFrameEntry TempEntry = this->StackFrameMap.at(MapIndex);
if (DebugFlag) {
msg("StackFrameMap entry %d: offset: %d Read: %d Written: %d ESP: %d EBP: %d\n",
MapIndex, TempEntry.offset, TempEntry.Read, TempEntry.Written,
TempEntry.ESPRelativeAccess, TempEntry.EBPRelativeAccess);
}
if (TempEntry.offset < this->AllocPointDelta)
continue;
if (TempEntry.Read || TempEntry.EBPRelativeAccess || !TempEntry.Written
|| !TempEntry.ESPRelativeAccess)
break;
this->OutgoingArgsSize++;
}
// Sometimes we encounter unused stack space above the outgoing args. Lump this space
// in with the outgoing args. We detect this by noting when the outgoing args space
// has only partially used the space assigned to a local var.
if ((0 < this->OutgoingArgsSize) && (this->OutgoingArgsSize < this->FuncInfo.frsize)) {
long MapIndex = (this->AllocPointDelta - this->MinStackDelta);
assert(0 <= MapIndex);
MapIndex += (((long) this->OutgoingArgsSize) - 1);
struct StackFrameEntry TempEntry = this->StackFrameMap.at((size_t) MapIndex);
if (this->OutgoingArgsSize < (TempEntry.VarPtr->offset + TempEntry.VarPtr->size)) {
msg("OutGoingArgsSize = %d", this->OutgoingArgsSize);
this->OutgoingArgsSize = TempEntry.VarPtr->offset + TempEntry.VarPtr->size;
msg(" adjusted to %d\n", this->OutgoingArgsSize);
}
}
return;
} // end of SMPFunction::FindOutgoingArgsSize()
// If TempOp reads or writes to a stack location, return the offset (relative to the initial
// stack pointer value) and the size in bytes of the data access.
// sp_delta is the stack pointer delta of the current instruction, relative to the initial
// stack pointer value for the function.
// Return true if a stack memory access was found in TempOp, false otherwise.
bool SMPFunction::MDGetStackOffsetAndSize(op_t TempOp, sval_t sp_delta, ea_t &offset, size_t &DataSize, bool &FP) {
ushort BaseReg;
ushort IndexReg;
if (TempOp.type == o_displ) {
offset = TempOp.addr;
}
else { // o_phrase
offset = 0; // implicit zero, as in [esp] ==> [esp+0]
}
if (TempOp.hasSIB) {
BaseReg = sib_base(TempOp);
IndexReg = sib_index(TempOp);
}
else { // no SIB
BaseReg = TempOp.reg;
IndexReg = R_none;
}
if ((BaseReg == R_sp) || (IndexReg == R_sp)) {
// ESP-relative constant offset
offset += sp_delta; // base offsets from entry ESP value
offset -= this->MinStackDelta; // convert to StackFrameMap index
// Get size of data written
DataSize = GetOpDataSize(TempOp);
FP = false;
return true;
}
else if (this->UseFP && ((BaseReg == R_bp) || (IndexReg == R_bp))) {
offset -= this->FuncInfo.frregs; // base offsets from entry ESP value
offset -= this->MinStackDelta; // convert to StackFrameMap index
DataSize = GetOpDataSize(TempOp);
FP = true;
return true;
}
else {
return false;
}
} // end of SMPFunction::MDGetStackOffsetAndSize()
// Find evidence of calls to alloca(), which appear as stack space allocations (i.e.
// subtractions from the stack pointer) AFTER the local frame allocation instruction
// for this function.
// Return true if such an allocation is found and false otherwise.
bool SMPFunction::FindAlloca(void) {
list<SMPInstr>::iterator CurrInst;
for (CurrInst = this->Instrs.begin(); CurrInst != this->Instrs.end(); ++CurrInst) {
if ((CurrInst->GetAddr() > this->LocalVarsAllocInstr) && CurrInst->MDIsFrameAllocInstr()) {
return true;
}
}
return false;
} // end of SMPFunction::FindAlloca()
// Emit the annotations describing the regions of the stack frame.
void SMPFunction::EmitStackFrameAnnotations(FILE *AnnotFile, list<SMPInstr>::iterator Instr) {
ea_t addr = Instr->GetAddr();
#if 0
if (0 < IncomingArgsSize) {
qfprintf(AnnotFile, "%10x %6d INARGS STACK esp + %d %s \n",
addr, IncomingArgsSize,
(LocalVarsSize + CalleeSavedRegsSize + RetAddrSize),
Instr->GetDisasm());
}
#endif
if (0 < RetAddrSize) {
qfprintf(AnnotFile, "%10x %6d MEMORYHOLE STACK esp + %d ReturnAddress \n",
addr, RetAddrSize, (LocalVarsSize + CalleeSavedRegsSize));
}
if (0 < CalleeSavedRegsSize) {
qfprintf(AnnotFile, "%10x %6d MEMORYHOLE STACK esp + %d CalleeSavedRegs \n",
addr, CalleeSavedRegsSize, LocalVarsSize);
if (0 < LocalVarsSize) {
unsigned long ParentReferentID = DataReferentID++;
qfprintf(AnnotFile, "%10x %6d DATAREF STACK %d esp + %d PARENT LocalFrame LOCALFRAME\n",
addr, LocalVarsSize, ParentReferentID, 0);
#if SMP_COMPUTE_STACK_GRANULARITY
if (this->AnalyzedSP && !this->CallsAlloca && (BADADDR != this->LocalVarsAllocInstr)) {