Newer
Older
// This module contains common types an helper classes needed for the
#include <vector>
#include <algorithm>
#include <cstring>
#include <ida.hpp>
#include <idp.hpp>
#include <allins.hpp>
#include <auto.hpp>
#include <bytes.hpp>
#include <funcs.hpp>
#include <intel.hpp>
#include <loader.hpp>
#include <lines.hpp>
#include <name.hpp>
#include "SMPDataFlowAnalysis.h"
#include "SMPStaticAnalyzer.h"
// Set to 1 for debugging output
#define SMP_DEBUG 1
#define SMP_DEBUG2 0 // verbose
#define SMP_DEBUG3 0 // verbose
#define SMP_DEBUG_CONTROLFLOW 0 // tells what processing stage is entered
#define SMP_DEBUG_XOR 0
#define SMP_DEBUG_CHUNKS 1 // tracking down tail chunks for functions
#define SMP_DEBUG_FRAMEFIXUP 0
#define SMP_DEBUG_DATAFLOW 0
// Compute LVA/SSA or not? Turn it off for NICECAP demo on 31-JAN-2008
#define SMP_COMPUTE_LVA_SSA 0
char *RegNames[R_of + 1] =
{ "EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI",
"R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15",
"AL", "CL", "DL", "BL", "AH", "CH", "DH", "BH",
"SPL", "BPL", "SIL", "DIL", "EIP", "ES", "CS", "SS",
"DS", "FS", "GS", "CF", "ZF", "SF", "OF"
};
SMPitype DFACategory[NN_last+1];
// We need to make subword registers equal to their containing registers when we
// do comparisons, so that we will realize that register EAX is killed by a prior DEF
// of register AL, for example, and vice versa. To keep sets ordered strictly,
// we also have to make AL and AH be equal to each other as well as equal to EAX.
#define FIRST_x86_SUBWORD_REG R_al
#define LAST_x86_SUBWORD_REG R_bh
bool MDLessReg(const ushort Reg1, const ushort Reg2) {
bool FirstSubword = ((Reg1 >= FIRST_x86_SUBWORD_REG) && (Reg1 <= LAST_x86_SUBWORD_REG));
bool SecondSubword = ((Reg2 >= FIRST_x86_SUBWORD_REG) && (Reg2 <= LAST_x86_SUBWORD_REG));
ushort SReg1 = Reg1;
ushort SReg2 = Reg2;
if (FirstSubword) {
// See enumeration RegNo in intel.hpp.
if (SReg1 < 20) // AL, CL, DL or BL
SReg1 -= 16;
else // AH, CH, DH or BH
SReg1 -= 20;
}
if (SecondSubword) {
if (SReg2 < 20)
SReg2 -= 16;
else
SReg2 -= 20;
return (SReg1 < SReg2);
} // end of MDLessReg()
// In SSA computations, we are storing the GlobalNames index into the op_t fields
// n, offb, and offo. This function extracts an unsigned int from these three 8-bit
// fields.
unsigned int ExtractGlobalIndex(op_t GlobalOp) {
unsigned int index = 0;
index |= (((unsigned int) GlobalOp.offo) & 0x000000ff);
index <<= 8;
index |= (((unsigned int) GlobalOp.offb) & 0x000000ff);
index <<= 8;
index |= (((unsigned int) GlobalOp.n) & 0x000000ff);
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
// DEBUG Print DEF and/or USE for an operand.
void PrintDefUse(ulong feature, int OpNum) {
// CF_ macros number the operands from 1 to 6, while OpNum
// is a 0 to 5 index into the insn_t.Operands[] array.
// OpNum == -1 is a signal that this is a DEF or USE or VarKillSet etc.
// operand and not an instruction operand.
if (-1 == OpNum)
return;
switch (OpNum) {
case 0:
if (feature & CF_CHG1)
msg(" DEF");
if (feature & CF_USE1)
msg(" USE");
break;
case 1:
if (feature & CF_CHG2)
msg(" DEF");
if (feature & CF_USE2)
msg(" USE");
break;
case 2:
if (feature & CF_CHG3)
msg(" DEF");
if (feature & CF_USE3)
msg(" USE");
break;
case 3:
if (feature & CF_CHG4)
msg(" DEF");
if (feature & CF_USE4)
msg(" USE");
break;
case 4:
if (feature & CF_CHG5)
msg(" DEF");
if (feature & CF_USE5)
msg(" USE");
break;
case 5:
if (feature & CF_CHG6)
msg(" DEF");
if (feature & CF_USE6)
msg(" USE");
break;
}
return;
} // end PrintDefUse()
// DEBUG print SIB info for an operand.
void PrintSIB(op_t Opnd) {
int BaseReg = sib_base(Opnd);
short IndexReg = sib_index(Opnd);
int ScaleFactor = sib_scale(Opnd);
#define NAME_LEN 5
char BaseName[NAME_LEN] = {'N', 'o', 'n', 'e', '\0'};
char IndexName[NAME_LEN] = {'N', 'o', 'n', 'e', '\0'};
#if 0
if (BaseReg != R_bp) // SIB code for NO BASE REG
#endif
qstrncpy(BaseName, RegNames[BaseReg], NAME_LEN - 1);
if (IndexReg != R_sp) { // SIB code for NO INDEX REG
qstrncpy(IndexName, RegNames[IndexReg], NAME_LEN -1);
}
msg(" Base %s Index %s Scale %d", BaseName, IndexName, ScaleFactor);
} // end PrintSIB()
// Debug: print one operand from an instruction or DEF or USE list.
void PrintOneOperand(op_t Opnd, ulong features, int OpNum) {
if (Opnd.type == o_void)
return;
else if (Opnd.type == o_mem) {
msg(" Operand %d : memory : addr: %x", OpNum, Opnd.addr);
PrintDefUse(features, OpNum);
if (Opnd.hasSIB) { // has SIB info -- is this possible for o_mem?
msg(" Found SIB byte for o_mem operand ");
PrintSIB(Opnd);
}
}
else if (Opnd.type == o_phrase) {
msg(" Operand %d : memory phrase :", OpNum);
PrintDefUse(features, OpNum);
if (Opnd.hasSIB) { // has SIB info
PrintSIB(Opnd);
}
else { // no SIB info
ushort BaseReg = Opnd.phrase;
msg(" reg %s", RegNames[BaseReg]);
}
if (Opnd.addr != 0) {
msg(" \n WARNING: addr for o_phrase type: %d\n", Opnd.addr);
}
}
else if (Opnd.type == o_displ) {
msg(" Operand %d : memory displ :", OpNum);
ea_t offset = Opnd.addr;
PrintDefUse(features, OpNum);
if (Opnd.hasSIB) {
PrintSIB(Opnd);
msg(" displ %d", offset);
}
else {
ushort BaseReg = Opnd.reg;
msg(" reg %s displ %d", RegNames[BaseReg], offset);
}
}
else if (Opnd.type == o_reg) {
msg(" Operand %d : register %s", OpNum, RegNames[Opnd.reg]);
PrintDefUse(features, OpNum);
}
else if (Opnd.type == o_imm) {
msg(" Operand %d : immed", OpNum);
PrintDefUse(features, OpNum);
}
else if (Opnd.type == o_far) {
msg(" Operand %d : FarPtrImmed", OpNum);
msg(" addr: %x", Opnd.addr);
PrintDefUse(features, OpNum);
}
else if (Opnd.type == o_near) {
msg(" Operand %d : NearPtrImmed", OpNum);
msg(" addr: %x", Opnd.addr);
PrintDefUse(features, OpNum);
}
else {
msg(" Operand %d : unknown", OpNum);
PrintDefUse(features, OpNum);
}
if (!(Opnd.showed()))
msg(" HIDDEN ");
return;
} // end of PrintOneOperand()
// Print an operand that has no features flags or operand position number, such
// as the op_t types found in lists and sets throughout the blocks, phi functions, etc.
void PrintListOperand(op_t Opnd, int SSANum) {
if (Opnd.type == o_void)
return;
else if (Opnd.type == o_mem) {
msg(" Operand : memory : addr: %x", Opnd.addr);
if (Opnd.hasSIB) { // has SIB info -- is this possible for o_mem?
msg(" Found SIB byte for o_mem operand ");
PrintSIB(Opnd);
}
}
else if (Opnd.type == o_phrase) {
msg(" Operand : memory phrase :");
if (Opnd.hasSIB) { // has SIB info
PrintSIB(Opnd);
}
else { // no SIB info
ushort BaseReg = Opnd.phrase;
msg(" reg %s", RegNames[BaseReg]);
}
if (Opnd.addr != 0) {
msg(" \n WARNING: addr for o_phrase type: %d\n", Opnd.addr);
}
}
else if (Opnd.type == o_displ) {
msg(" Operand : memory displ :");
ea_t offset = Opnd.addr;
if (Opnd.hasSIB) {
PrintSIB(Opnd);
msg(" displ %d", offset);
}
else {
ushort BaseReg = Opnd.reg;
msg(" reg %s displ %d", RegNames[BaseReg], offset);
}
}
else if (Opnd.type == o_reg) {
msg(" Operand : register: %s", RegNames[Opnd.reg]);
}
else if (Opnd.type == o_imm) {
msg(" Operand : immed %d", Opnd.value);
}
else if (Opnd.type == o_far) {
msg(" Operand : FarPtrImmed addr: %x", Opnd.addr);
}
else if (Opnd.type == o_near) {
msg(" Operand : NearPtrImmed addr: %x", Opnd.addr);
}
else {
msg(" Operand : unknown");
}
msg(" SSANum: %d ", SSANum);
if (!(Opnd.showed()))
msg(" HIDDEN ");
return;
} // end of PrintListOperand()
// MACHINE DEPENDENT: Is operand type a known type that we want to analyze?
bool MDKnownOperandType(op_t TempOp) {
return ((TempOp.type >= o_reg) && (TempOp.type <= o_near));
}
// *****************************************************************
// Class DefOrUse
// *****************************************************************
// Default constructor to make the compilers happy.
DefOrUse::DefOrUse(void) {
this->OpType = UNINIT;
this->SSANumber = -2;
return;
}
// Constructor.
DefOrUse::DefOrUse(op_t Ref, SMPOperandType Type, int SSASub) {
this->Operand = Ref;
this->OpType = Type;
this->SSANumber = SSASub;
return;
}
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
// Copy constructor.
DefOrUse::DefOrUse(const DefOrUse &CopyIn) {
*this = CopyIn;
return;
}
// Assignment operator for copy constructor use.
DefOrUse &DefOrUse::operator=(const DefOrUse &rhs) {
this->Operand = rhs.Operand;
this->OpType = rhs.OpType;
this->SSANumber = rhs.SSANumber;
return *this;
}
// Debug printing.
void DefOrUse::Dump(void) const {
PrintListOperand(this->Operand, this->SSANumber);
if (this->OpType == NUMERIC)
msg("N ");
else if (this->OpType == POINTER)
msg("P ");
else if (this->OpType == UNKNOWN)
msg("U ");
// Don't write anything for UNINIT OpType
return;
} // end of DefOrUse::Dump()
// *****************************************************************
// Class DefOrUseList
// *****************************************************************
// Default constructor.
DefOrUseList::DefOrUseList(void) {
return;
}
// Set a Def or Use into the list, along with its type.
void DefOrUseList::SetRef(op_t Ref, SMPOperandType Type, int SSASub) {
DefOrUse CurrRef(Ref, Type, SSASub);
this->Refs.push_back(CurrRef);
DefOrUse DefOrUseList::GetRef(size_t index) const {
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
// Change the SSA subscript for a reference.
void DefOrUseList::SetSSANum(size_t index, int NewSSASub) {
this->Refs[index].SetSSANum(NewSSASub);
return;
}
// Change the operand type for a reference.
void DefOrUseList::SetType(size_t index, SMPOperandType Type) {
this->Refs[index].SetType(Type);
return;
}
// Debug printing.
void DefOrUseList::Dump(void) const {
for (size_t index = 0; index < this->Refs.size(); ++index) {
Refs[index].Dump();
}
msg("\n");
return;
}
// Erase duplicate entries, in case SMPInstr::MDFixupDefUseLists() adds one.
void DefOrUseList::EraseDuplicates(void) {
set<op_t, LessOp> TempRefs; // Use STL set to find duplicates
set<op_t, LessOp>::iterator TempIter;
vector<DefOrUse>::iterator RefIter;
RefIter = this->Refs.begin();
while (RefIter != this->Refs.end()) {
TempIter = TempRefs.find(RefIter->GetOp());
if (TempIter == TempRefs.end()) { // not already in set
TempRefs.insert(RefIter->GetOp());
++RefIter;
}
else { // found it in set already
RefIter = this->Refs.erase(RefIter);
}
}
return;
} // end of DefOrUseList::EraseDuplicates()
// *****************************************************************
// Class SMPPhiFunction
// *****************************************************************
// Constructor
SMPPhiFunction::SMPPhiFunction(int GlobIndex, const DefOrUse &Def) {
this->DefName = Def;
// Add a phi item to the list
void SMPPhiFunction::PushBack(DefOrUse Ref) {
this->SubscriptedOps.SetRef(Ref.GetOp(), Ref.GetType(), Ref.GetSSANum());
return;
}
// Set the SSA number of the defined variable.
void SMPPhiFunction::SetSSADef(int NewSSASub) {
this->DefName.SetSSANum(NewSSASub);
return;
}
// Set the SSA number of the input variable.
void SMPPhiFunction::SetSSARef(size_t index, int NewSSASub) {
this->SubscriptedOps.SetSSANum(index, NewSSASub);
return;
}
// Debug printing.
void SMPPhiFunction::Dump(void) const {
msg(" DEF: ");
this->DefName.Dump();
msg(" USEs: ");
this->SubscriptedOps.Dump();
return;
}
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
// Initialize the DFACategory[] array to define instruction classes
// for the purposes of data flow analysis.
void InitDFACategory(void) {
// Default category is 0, not the start or end of a basic block.
(void) memset(DFACategory, 0, sizeof(DFACategory));
DFACategory[NN_call] = CALL; // Call Procedure
DFACategory[NN_callfi] = INDIR_CALL; // Indirect Call Far Procedure
DFACategory[NN_callni] = INDIR_CALL; // Indirect Call Near Procedure
DFACategory[NN_hlt] = HALT; // Halt
DFACategory[NN_int] = CALL; // Call to Interrupt Procedure
DFACategory[NN_into] = CALL; // Call to Interrupt Procedure if Overflow Flag = 1
DFACategory[NN_int3] = CALL; // Trap to Debugger
DFACategory[NN_iretw] = RETURN; // Interrupt Return
DFACategory[NN_iret] = RETURN; // Interrupt Return
DFACategory[NN_iretd] = RETURN; // Interrupt Return (use32)
DFACategory[NN_iretq] = RETURN; // Interrupt Return (use64)
DFACategory[NN_ja] = COND_BRANCH; // Jump if Above (CF=0 & ZF=0)
DFACategory[NN_jae] = COND_BRANCH; // Jump if Above or Equal (CF=0)
DFACategory[NN_jb] = COND_BRANCH; // Jump if Below (CF=1)
DFACategory[NN_jbe] = COND_BRANCH; // Jump if Below or Equal (CF=1 | ZF=1)
DFACategory[NN_jc] = COND_BRANCH; // Jump if Carry (CF=1)
DFACategory[NN_jcxz] = COND_BRANCH; // Jump if CX is 0
DFACategory[NN_jecxz] = COND_BRANCH; // Jump if ECX is 0
DFACategory[NN_jrcxz] = COND_BRANCH; // Jump if RCX is 0
DFACategory[NN_je] = COND_BRANCH; // Jump if Equal (ZF=1)
DFACategory[NN_jg] = COND_BRANCH; // Jump if Greater (ZF=0 & SF=OF)
DFACategory[NN_jge] = COND_BRANCH; // Jump if Greater or Equal (SF=OF)
DFACategory[NN_jl] = COND_BRANCH; // Jump if Less (SF!=OF)
DFACategory[NN_jle] = COND_BRANCH; // Jump if Less or Equal (ZF=1 | SF!=OF)
DFACategory[NN_jna] = COND_BRANCH; // Jump if Not Above (CF=1 | ZF=1)
DFACategory[NN_jnae] = COND_BRANCH; // Jump if Not Above or Equal (CF=1)
DFACategory[NN_jnb] = COND_BRANCH; // Jump if Not Below (CF=0)
DFACategory[NN_jnbe] = COND_BRANCH; // Jump if Not Below or Equal (CF=0 & ZF=0)
DFACategory[NN_jnc] = COND_BRANCH; // Jump if Not Carry (CF=0)
DFACategory[NN_jne] = COND_BRANCH; // Jump if Not Equal (ZF=0)
DFACategory[NN_jng] = COND_BRANCH; // Jump if Not Greater (ZF=1 | SF!=OF)
DFACategory[NN_jnge] = COND_BRANCH; // Jump if Not Greater or Equal (ZF=1)
DFACategory[NN_jnl] = COND_BRANCH; // Jump if Not Less (SF=OF)
DFACategory[NN_jnle] = COND_BRANCH; // Jump if Not Less or Equal (ZF=0 & SF=OF)
DFACategory[NN_jno] = COND_BRANCH; // Jump if Not Overflow (OF=0)
DFACategory[NN_jnp] = COND_BRANCH; // Jump if Not Parity (PF=0)
DFACategory[NN_jns] = COND_BRANCH; // Jump if Not Sign (SF=0)
DFACategory[NN_jnz] = COND_BRANCH; // Jump if Not Zero (ZF=0)
DFACategory[NN_jo] = COND_BRANCH; // Jump if Overflow (OF=1)
DFACategory[NN_jp] = COND_BRANCH; // Jump if Parity (PF=1)
DFACategory[NN_jpe] = COND_BRANCH; // Jump if Parity Even (PF=1)
DFACategory[NN_jpo] = COND_BRANCH; // Jump if Parity Odd (PF=0)
DFACategory[NN_js] = COND_BRANCH; // Jump if Sign (SF=1)
DFACategory[NN_jz] = COND_BRANCH; // Jump if Zero (ZF=1)
DFACategory[NN_jmp] = JUMP; // Jump
DFACategory[NN_jmpfi] = INDIR_JUMP; // Indirect Far Jump
DFACategory[NN_jmpni] = INDIR_JUMP; // Indirect Near Jump
DFACategory[NN_jmpshort] = JUMP; // Jump Short (only in 64-bit mode)
DFACategory[NN_loopw] = COND_BRANCH; // Loop while ECX != 0
DFACategory[NN_loop] = COND_BRANCH; // Loop while CX != 0
DFACategory[NN_loopd] = COND_BRANCH; // Loop while ECX != 0
DFACategory[NN_loopq] = COND_BRANCH; // Loop while RCX != 0
DFACategory[NN_loopwe] = COND_BRANCH; // Loop while CX != 0 and ZF=1
DFACategory[NN_loope] = COND_BRANCH; // Loop while rCX != 0 and ZF=1
DFACategory[NN_loopde] = COND_BRANCH; // Loop while ECX != 0 and ZF=1
DFACategory[NN_loopqe] = COND_BRANCH; // Loop while RCX != 0 and ZF=1
DFACategory[NN_loopwne] = COND_BRANCH; // Loop while CX != 0 and ZF=0
DFACategory[NN_loopne] = COND_BRANCH; // Loop while rCX != 0 and ZF=0
DFACategory[NN_loopdne] = COND_BRANCH; // Loop while ECX != 0 and ZF=0
DFACategory[NN_loopqne] = COND_BRANCH; // Loop while RCX != 0 and ZF=0
DFACategory[NN_retn] = RETURN; // Return Near from Procedure
DFACategory[NN_retf] = RETURN; // Return Far from Procedure
//
// Pentium instructions
//
DFACategory[NN_rsm] = HALT; // Resume from System Management Mode
// Pentium II instructions
DFACategory[NN_sysenter] = CALL; // Fast Transition to System Call Entry Point
DFACategory[NN_sysexit] = CALL; // Fast Transition from System Call Entry Point
// AMD syscall/sysret instructions NOTE: not AMD, found in Intel manual
DFACategory[NN_syscall] = CALL; // Low latency system call
DFACategory[NN_sysret] = CALL; // Return from system call
// VMX instructions
DFACategory[NN_vmcall] = INDIR_CALL; // Call to VM Monitor
return;
} // end InitDFACategory()