Newer
Older
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
// Undocumented Deschutes processor instructions
//
OptCategory[NN_fxsave] = 1; // Fast save FP context ** to where?
OptCategory[NN_fxrstor] = 1; // Fast restore FP context ** from where?
// Pentium II instructions
OptCategory[NN_sysenter] = 1; // Fast Transition to System Call Entry Point
OptCategory[NN_sysexit] = 1; // Fast Transition from System Call Entry Point
// 3DNow! instructions
OptCategory[NN_pavgusb] = 1; // Packed 8-bit Unsigned Integer Averaging
OptCategory[NN_pfadd] = 1; // Packed Floating-Point Addition
OptCategory[NN_pfsub] = 1; // Packed Floating-Point Subtraction
OptCategory[NN_pfsubr] = 1; // Packed Floating-Point Reverse Subtraction
OptCategory[NN_pfacc] = 1; // Packed Floating-Point Accumulate
OptCategory[NN_pfcmpge] = 1; // Packed Floating-Point Comparison, Greater or Equal
OptCategory[NN_pfcmpgt] = 1; // Packed Floating-Point Comparison, Greater
OptCategory[NN_pfcmpeq] = 1; // Packed Floating-Point Comparison, Equal
OptCategory[NN_pfmin] = 1; // Packed Floating-Point Minimum
OptCategory[NN_pfmax] = 1; // Packed Floating-Point Maximum
OptCategory[NN_pi2fd] = 1; // Packed 32-bit Integer to Floating-Point
OptCategory[NN_pf2id] = 1; // Packed Floating-Point to 32-bit Integer
OptCategory[NN_pfrcp] = 1; // Packed Floating-Point Reciprocal Approximation
OptCategory[NN_pfrsqrt] = 1; // Packed Floating-Point Reciprocal Square Root Approximation
OptCategory[NN_pfmul] = 1; // Packed Floating-Point Multiplication
OptCategory[NN_pfrcpit1] = 1; // Packed Floating-Point Reciprocal First Iteration Step
OptCategory[NN_pfrsqit1] = 1; // Packed Floating-Point Reciprocal Square Root First Iteration Step
OptCategory[NN_pfrcpit2] = 1; // Packed Floating-Point Reciprocal Second Iteration Step
OptCategory[NN_pmulhrw] = 1; // Packed Floating-Point 16-bit Integer Multiply with rounding
OptCategory[NN_femms] = 1; // Faster entry/exit of the MMX or floating-point state
OptCategory[NN_prefetch] = 1; // Prefetch at least a 32-byte line into L1 data cache
OptCategory[NN_prefetchw] = 1; // Prefetch processor cache line into L1 data cache (mark as modified)
// Pentium III instructions
OptCategory[NN_addps] = 1; // Packed Single-FP Add
OptCategory[NN_addss] = 1; // Scalar Single-FP Add
OptCategory[NN_andnps] = 1; // Bitwise Logical And Not for Single-FP
OptCategory[NN_andps] = 1; // Bitwise Logical And for Single-FP
OptCategory[NN_cmpps] = 1; // Packed Single-FP Compare
OptCategory[NN_cmpss] = 1; // Scalar Single-FP Compare
OptCategory[NN_comiss] = 1; // Scalar Ordered Single-FP Compare and Set EFLAGS
OptCategory[NN_cvtpi2ps] = 1; // Packed signed INT32 to Packed Single-FP conversion
OptCategory[NN_cvtps2pi] = 1; // Packed Single-FP to Packed INT32 conversion
OptCategory[NN_cvtsi2ss] = 1; // Scalar signed INT32 to Single-FP conversion
OptCategory[NN_cvtss2si] = 2; // Scalar Single-FP to signed INT32 conversion
OptCategory[NN_cvttps2pi] = 1; // Packed Single-FP to Packed INT32 conversion (truncate)
OptCategory[NN_cvttss2si] = 2; // Scalar Single-FP to signed INT32 conversion (truncate)
OptCategory[NN_divps] = 1; // Packed Single-FP Divide
OptCategory[NN_divss] = 1; // Scalar Single-FP Divide
OptCategory[NN_ldmxcsr] = 1; // Load Streaming SIMD Extensions Technology Control/Status Register
OptCategory[NN_maxps] = 1; // Packed Single-FP Maximum
OptCategory[NN_maxss] = 1; // Scalar Single-FP Maximum
OptCategory[NN_minps] = 1; // Packed Single-FP Minimum
OptCategory[NN_minss] = 1; // Scalar Single-FP Minimum
OptCategory[NN_movaps] = 9; // Move Aligned Four Packed Single-FP ** infer memsrc 'n'?
OptCategory[NN_movhlps] = 1; // Move High to Low Packed Single-FP
OptCategory[NN_movhps] = 1; // Move High Packed Single-FP
OptCategory[NN_movlhps] = 1; // Move Low to High Packed Single-FP
OptCategory[NN_movlps] = 1; // Move Low Packed Single-FP
OptCategory[NN_movmskps] = 1; // Move Mask to Register
OptCategory[NN_movss] = 9; // Move Scalar Single-FP
OptCategory[NN_movups] = 9; // Move Unaligned Four Packed Single-FP
OptCategory[NN_mulps] = 1; // Packed Single-FP Multiply
OptCategory[NN_mulss] = 1; // Scalar Single-FP Multiply
OptCategory[NN_orps] = 1; // Bitwise Logical OR for Single-FP Data
OptCategory[NN_rcpps] = 1; // Packed Single-FP Reciprocal
OptCategory[NN_rcpss] = 1; // Scalar Single-FP Reciprocal
OptCategory[NN_rsqrtps] = 1; // Packed Single-FP Square Root Reciprocal
OptCategory[NN_rsqrtss] = 1; // Scalar Single-FP Square Root Reciprocal
OptCategory[NN_shufps] = 1; // Shuffle Single-FP
OptCategory[NN_sqrtps] = 1; // Packed Single-FP Square Root
OptCategory[NN_sqrtss] = 1; // Scalar Single-FP Square Root
OptCategory[NN_stmxcsr] = 0; // Store Streaming SIMD Extensions Technology Control/Status Register ** Infer dest is 'n'
OptCategory[NN_subps] = 1; // Packed Single-FP Subtract
OptCategory[NN_subss] = 1; // Scalar Single-FP Subtract
OptCategory[NN_ucomiss] = 1; // Scalar Unordered Single-FP Compare and Set EFLAGS
OptCategory[NN_unpckhps] = 1; // Unpack High Packed Single-FP Data
OptCategory[NN_unpcklps] = 1; // Unpack Low Packed Single-FP Data
OptCategory[NN_xorps] = 1; // Bitwise Logical XOR for Single-FP Data
OptCategory[NN_pavgb] = 1; // Packed Average (Byte)
OptCategory[NN_pavgw] = 1; // Packed Average (Word)
OptCategory[NN_pextrw] = 2; // Extract Word
OptCategory[NN_pinsrw] = 1; // Insert Word
OptCategory[NN_pmaxsw] = 1; // Packed Signed Integer Word Maximum
OptCategory[NN_pmaxub] = 1; // Packed Unsigned Integer Byte Maximum
OptCategory[NN_pminsw] = 1; // Packed Signed Integer Word Minimum
OptCategory[NN_pminub] = 1; // Packed Unsigned Integer Byte Minimum
OptCategory[NN_pmovmskb] = 1; // Move Byte Mask to Integer
OptCategory[NN_pmulhuw] = 1; // Packed Multiply High Unsigned
OptCategory[NN_psadbw] = 1; // Packed Sum of Absolute Differences
OptCategory[NN_pshufw] = 1; // Packed Shuffle Word
OptCategory[NN_maskmovq] = 0; // Byte Mask write ** Infer dest is 'n'
OptCategory[NN_movntps] = 0; // Move Aligned Four Packed Single-FP Non Temporal * infer dest is 'n'
OptCategory[NN_movntq] = 0; // Move 64 Bits Non Temporal ** Infer dest is 'n'
OptCategory[NN_prefetcht0] = 1; // Prefetch to all cache levels
OptCategory[NN_prefetcht1] = 1; // Prefetch to all cache levels
OptCategory[NN_prefetcht2] = 1; // Prefetch to L2 cache
OptCategory[NN_prefetchnta] = 1; // Prefetch to L1 cache
OptCategory[NN_sfence] = 1; // Store Fence
// Pentium III Pseudo instructions
OptCategory[NN_cmpeqps] = 1; // Packed Single-FP Compare EQ
OptCategory[NN_cmpltps] = 1; // Packed Single-FP Compare LT
OptCategory[NN_cmpleps] = 1; // Packed Single-FP Compare LE
OptCategory[NN_cmpunordps] = 1; // Packed Single-FP Compare UNORD
OptCategory[NN_cmpneqps] = 1; // Packed Single-FP Compare NOT EQ
OptCategory[NN_cmpnltps] = 1; // Packed Single-FP Compare NOT LT
OptCategory[NN_cmpnleps] = 1; // Packed Single-FP Compare NOT LE
OptCategory[NN_cmpordps] = 1; // Packed Single-FP Compare ORDERED
OptCategory[NN_cmpeqss] = 1; // Scalar Single-FP Compare EQ
OptCategory[NN_cmpltss] = 1; // Scalar Single-FP Compare LT
OptCategory[NN_cmpless] = 1; // Scalar Single-FP Compare LE
OptCategory[NN_cmpunordss] = 1; // Scalar Single-FP Compare UNORD
OptCategory[NN_cmpneqss] = 1; // Scalar Single-FP Compare NOT EQ
OptCategory[NN_cmpnltss] = 1; // Scalar Single-FP Compare NOT LT
OptCategory[NN_cmpnless] = 1; // Scalar Single-FP Compare NOT LE
OptCategory[NN_cmpordss] = 1; // Scalar Single-FP Compare ORDERED
// AMD K7 instructions
// Revisit AMD if we port to it.
OptCategory[NN_pf2iw] = 0; // Packed Floating-Point to Integer with Sign Extend
OptCategory[NN_pfnacc] = 0; // Packed Floating-Point Negative Accumulate
OptCategory[NN_pfpnacc] = 0; // Packed Floating-Point Mixed Positive-Negative Accumulate
OptCategory[NN_pi2fw] = 0; // Packed 16-bit Integer to Floating-Point
OptCategory[NN_pswapd] = 0; // Packed Swap Double Word
// Undocumented FP instructions (thanks to norbert.juffa@adm.com)
OptCategory[NN_fstp1] = 9; // Alias of Store Real and Pop
OptCategory[NN_fcom2] = 1; // Alias of Compare Real
OptCategory[NN_fcomp3] = 1; // Alias of Compare Real and Pop
OptCategory[NN_fxch4] = 1; // Alias of Exchange Registers
OptCategory[NN_fcomp5] = 1; // Alias of Compare Real and Pop
OptCategory[NN_ffreep] = 1; // Free Register and Pop
OptCategory[NN_fxch7] = 1; // Alias of Exchange Registers
OptCategory[NN_fstp8] = 9; // Alias of Store Real and Pop
OptCategory[NN_fstp9] = 9; // Alias of Store Real and Pop
// Pentium 4 instructions
OptCategory[NN_addpd] = 1; // Add Packed Double-Precision Floating-Point Values
OptCategory[NN_addsd] = 1; // Add Scalar Double-Precision Floating-Point Values
OptCategory[NN_andnpd] = 1; // Bitwise Logical AND NOT of Packed Double-Precision Floating-Point Values
OptCategory[NN_andpd] = 1; // Bitwise Logical AND of Packed Double-Precision Floating-Point Values
OptCategory[NN_clflush] = 1; // Flush Cache Line
OptCategory[NN_cmppd] = 1; // Compare Packed Double-Precision Floating-Point Values
OptCategory[NN_cmpsd] = 1; // Compare Scalar Double-Precision Floating-Point Values
OptCategory[NN_comisd] = 1; // Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS
OptCategory[NN_cvtdq2pd] = 1; // Convert Packed Doubleword Integers to Packed Single-Precision Floating-Point Values
OptCategory[NN_cvtdq2ps] = 1; // Convert Packed Doubleword Integers to Packed Double-Precision Floating-Point Values
OptCategory[NN_cvtpd2dq] = 1; // Convert Packed Double-Precision Floating-Point Values to Packed Doubleword Integers
OptCategory[NN_cvtpd2pi] = 1; // Convert Packed Double-Precision Floating-Point Values to Packed Doubleword Integers
OptCategory[NN_cvtpd2ps] = 1; // Convert Packed Double-Precision Floating-Point Values to Packed Single-Precision Floating-Point Values
OptCategory[NN_cvtpi2pd] = 1; // Convert Packed Doubleword Integers to Packed Double-Precision Floating-Point Values
OptCategory[NN_cvtps2dq] = 1; // Convert Packed Single-Precision Floating-Point Values to Packed Doubleword Integers
OptCategory[NN_cvtps2pd] = 1; // Convert Packed Single-Precision Floating-Point Values to Packed Double-Precision Floating-Point Values
OptCategory[NN_cvtsd2si] = 2; // Convert Scalar Double-Precision Floating-Point Value to Doubleword Integer
OptCategory[NN_cvtsd2ss] = 1; // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
OptCategory[NN_cvtsi2sd] = 1; // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value
OptCategory[NN_cvtss2sd] = 1; // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
OptCategory[NN_cvttpd2dq] = 1; // Convert With Truncation Packed Double-Precision Floating-Point Values to Packed Doubleword Integers
OptCategory[NN_cvttpd2pi] = 1; // Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Doubleword Integers
OptCategory[NN_cvttps2dq] = 1; // Convert With Truncation Packed Single-Precision Floating-Point Values to Packed Doubleword Integers
OptCategory[NN_cvttsd2si] = 2; // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
OptCategory[NN_divpd] = 1; // Divide Packed Double-Precision Floating-Point Values
OptCategory[NN_divsd] = 1; // Divide Scalar Double-Precision Floating-Point Values
OptCategory[NN_lfence] = 1; // Load Fence
OptCategory[NN_maskmovdqu] = 0; // Store Selected Bytes of Double Quadword ** Infer dest is 'n'
OptCategory[NN_maxpd] = 1; // Return Maximum Packed Double-Precision Floating-Point Values
OptCategory[NN_maxsd] = 1; // Return Maximum Scalar Double-Precision Floating-Point Value
OptCategory[NN_mfence] = 1; // Memory Fence
OptCategory[NN_minpd] = 1; // Return Minimum Packed Double-Precision Floating-Point Values
OptCategory[NN_minsd] = 1; // Return Minimum Scalar Double-Precision Floating-Point Value
OptCategory[NN_movapd] = 9; // Move Aligned Packed Double-Precision Floating-Point Values ** Infer dest is 'n'
OptCategory[NN_movdq2q] = 1; // Move Quadword from XMM to MMX Register
OptCategory[NN_movdqa] = 9; // Move Aligned Double Quadword ** Infer dest is 'n'
OptCategory[NN_movdqu] = 9; // Move Unaligned Double Quadword ** Infer dest is 'n'
OptCategory[NN_movhpd] = 9; // Move High Packed Double-Precision Floating-Point Values ** Infer dest is 'n'
OptCategory[NN_movlpd] = 9; // Move Low Packed Double-Precision Floating-Point Values ** Infer dest is 'n'
OptCategory[NN_movmskpd] = 2; // Extract Packed Double-Precision Floating-Point Sign Mask
OptCategory[NN_movntdq] = 0; // Store Double Quadword Using Non-Temporal Hint
OptCategory[NN_movnti] = 0; // Store Doubleword Using Non-Temporal Hint
OptCategory[NN_movntpd] = 0; // Store Packed Double-Precision Floating-Point Values Using Non-Temporal Hint
OptCategory[NN_movq2dq] = 1; // Move Quadword from MMX to XMM Register
OptCategory[NN_movsd] = 9; // Move Scalar Double-Precision Floating-Point Values
OptCategory[NN_movupd] = 9; // Move Unaligned Packed Double-Precision Floating-Point Values
OptCategory[NN_mulpd] = 1; // Multiply Packed Double-Precision Floating-Point Values
OptCategory[NN_mulsd] = 1; // Multiply Scalar Double-Precision Floating-Point Values
OptCategory[NN_orpd] = 1; // Bitwise Logical OR of Double-Precision Floating-Point Values
OptCategory[NN_paddq] = 1; // Add Packed Quadword Integers
OptCategory[NN_pause] = 1; // Spin Loop Hint
OptCategory[NN_pmuludq] = 1; // Multiply Packed Unsigned Doubleword Integers
OptCategory[NN_pshufd] = 1; // Shuffle Packed Doublewords
OptCategory[NN_pshufhw] = 1; // Shuffle Packed High Words
OptCategory[NN_pshuflw] = 1; // Shuffle Packed Low Words
OptCategory[NN_pslldq] = 1; // Shift Double Quadword Left Logical
OptCategory[NN_psrldq] = 1; // Shift Double Quadword Right Logical
OptCategory[NN_psubq] = 1; // Subtract Packed Quadword Integers
OptCategory[NN_punpckhqdq] = 1; // Unpack High Data
OptCategory[NN_punpcklqdq] = 1; // Unpack Low Data
OptCategory[NN_shufpd] = 1; // Shuffle Packed Double-Precision Floating-Point Values
OptCategory[NN_sqrtpd] = 1; // Compute Square Roots of Packed Double-Precision Floating-Point Values
OptCategory[NN_sqrtsd] = 1; // Compute Square Rootof Scalar Double-Precision Floating-Point Value
OptCategory[NN_subpd] = 1; // Subtract Packed Double-Precision Floating-Point Values
OptCategory[NN_subsd] = 1; // Subtract Scalar Double-Precision Floating-Point Values
OptCategory[NN_ucomisd] = 1; // Unordered Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS
OptCategory[NN_unpckhpd] = 1; // Unpack and Interleave High Packed Double-Precision Floating-Point Values
OptCategory[NN_unpcklpd] = 1; // Unpack and Interleave Low Packed Double-Precision Floating-Point Values
OptCategory[NN_xorpd] = 1; // Bitwise Logical OR of Double-Precision Floating-Point Values
// AMD syscall/sysret instructions NOTE: not AMD, found in Intel manual
OptCategory[NN_syscall] = 1; // Low latency system call
OptCategory[NN_sysret] = 1; // Return from system call
// AMD64 instructions NOTE: not AMD, found in Intel manual
OptCategory[NN_swapgs] = 1; // Exchange GS base with KernelGSBase MSR
// New Pentium instructions (SSE3)
OptCategory[NN_movddup] = 9; // Move One Double-FP and Duplicate
OptCategory[NN_movshdup] = 9; // Move Packed Single-FP High and Duplicate
OptCategory[NN_movsldup] = 9; // Move Packed Single-FP Low and Duplicate
// Missing AMD64 instructions NOTE: also found in Intel manual
OptCategory[NN_movsxd] = 2; // Move with Sign-Extend Doubleword
OptCategory[NN_cmpxchg16b] = 0; // Compare and Exchange 16 Bytes
// SSE3 instructions
OptCategory[NN_addsubpd] = 1; // Add /Sub packed DP FP numbers
OptCategory[NN_addsubps] = 1; // Add /Sub packed SP FP numbers
OptCategory[NN_haddpd] = 1; // Add horizontally packed DP FP numbers
OptCategory[NN_haddps] = 1; // Add horizontally packed SP FP numbers
OptCategory[NN_hsubpd] = 1; // Sub horizontally packed DP FP numbers
OptCategory[NN_hsubps] = 1; // Sub horizontally packed SP FP numbers
OptCategory[NN_monitor] = 1; // Set up a linear address range to be monitored by hardware
OptCategory[NN_mwait] = 1; // Wait until write-back store performed within the range specified by the MONITOR instruction
OptCategory[NN_fisttp] = 2; // Store ST in intXX (chop) and pop
OptCategory[NN_lddqu] = 0; // Load unaligned integer 128-bit
// SSSE3 instructions
OptCategory[NN_psignb] = 1; // Packed SIGN Byte
OptCategory[NN_psignw] = 1; // Packed SIGN Word
OptCategory[NN_psignd] = 1; // Packed SIGN Doubleword
OptCategory[NN_pshufb] = 1; // Packed Shuffle Bytes
OptCategory[NN_pmulhrsw] = 1; // Packed Multiply High with Round and Scale
OptCategory[NN_pmaddubsw] = 1; // Multiply and Add Packed Signed and Unsigned Bytes
OptCategory[NN_phsubsw] = 1; // Packed Horizontal Subtract and Saturate
OptCategory[NN_phaddsw] = 1; // Packed Horizontal Add and Saturate
OptCategory[NN_phaddw] = 1; // Packed Horizontal Add Word
OptCategory[NN_phaddd] = 1; // Packed Horizontal Add Doubleword
OptCategory[NN_phsubw] = 1; // Packed Horizontal Subtract Word
OptCategory[NN_phsubd] = 1; // Packed Horizontal Subtract Doubleword
OptCategory[NN_palignr] = 1; // Packed Align Right
OptCategory[NN_pabsb] = 1; // Packed Absolute Value Byte
OptCategory[NN_pabsw] = 1; // Packed Absolute Value Word
OptCategory[NN_pabsd] = 1; // Packed Absolute Value Doubleword
// VMX instructions
OptCategory[NN_vmcall] = 1; // Call to VM Monitor
OptCategory[NN_vmclear] = 0; // Clear Virtual Machine Control Structure
OptCategory[NN_vmlaunch] = 1; // Launch Virtual Machine
OptCategory[NN_vmresume] = 1; // Resume Virtual Machine
OptCategory[NN_vmptrld] = 6; // Load Pointer to Virtual Machine Control Structure
OptCategory[NN_vmptrst] = 0; // Store Pointer to Virtual Machine Control Structure
OptCategory[NN_vmread] = 0; // Read Field from Virtual Machine Control Structure
OptCategory[NN_vmwrite] = 0; // Write Field from Virtual Machine Control Structure
OptCategory[NN_vmxoff] = 1; // Leave VMX Operation
OptCategory[NN_vmxon] = 1; // Enter VMX Operation
OptCategory[NN_last] = 1;
return;
} // end InitOptCategory()