From 3a1d4bcd4eac840da2cded2ab77c671cc07418b5 Mon Sep 17 00:00:00 2001 From: jdh8d <jdh8d@git.zephyr-software.com> Date: Wed, 4 May 2016 16:12:53 +0000 Subject: [PATCH] added intel and AMD FMA instructions Former-commit-id: 596fbe6d6515b795cce7114c28ee4030e6b5442e --- .gitattributes | 2 + .../Includes/Routines_Disasm.c | 132 ++++++- .../Includes/instr_set/Data_opcode.h | 42 +-- .../Includes/instr_set/opcodes_VEX.c | 343 +++++++++++++++++- .../Includes/internal_datas.h | 10 + beaengine/beaengineSources/Includes/protos.h | 72 +++- beaengine/jdh_tests/fma_amd.nasm | 107 ++++++ beaengine/jdh_tests/fma_intel.nasm | 237 ++++++++++++ 8 files changed, 911 insertions(+), 34 deletions(-) create mode 100644 beaengine/jdh_tests/fma_amd.nasm create mode 100644 beaengine/jdh_tests/fma_intel.nasm diff --git a/.gitattributes b/.gitattributes index 4f98e22b9..e85e03e95 100644 --- a/.gitattributes +++ b/.gitattributes @@ -175,6 +175,8 @@ beaengine/jdh_tests/Makefile -text beaengine/jdh_tests/avx1.nasm -text beaengine/jdh_tests/avx2.nasm -text beaengine/jdh_tests/dbea.c -text +beaengine/jdh_tests/fma_amd.nasm -text +beaengine/jdh_tests/fma_intel.nasm -text beaengine/jdh_tests/insns.s -text beaengine/jdh_tests/insns_avx.s -text /build.sh -text diff --git a/beaengine/beaengineSources/Includes/Routines_Disasm.c b/beaengine/beaengineSources/Includes/Routines_Disasm.c index fd9b6141d..f5b3bfb84 100644 --- a/beaengine/beaengineSources/Includes/Routines_Disasm.c +++ b/beaengine/beaengineSources/Includes/Routines_Disasm.c @@ -114,6 +114,56 @@ int __bea_callspec__ InitVariables (PDISASM pMyDisasm) { * ==================================================================== */ void __bea_callspec__ FixArgSizeForMemoryOperand (PDISASM pMyDisasm) { + if (GV.MemDecoration == Arg4byte) { + (*pMyDisasm).Argument4.ArgSize = 8; + } + else if (GV.MemDecoration == Arg4word) { + (*pMyDisasm).Argument4.ArgSize = 16; + } + else if (GV.MemDecoration == Arg4dword) { + (*pMyDisasm).Argument4.ArgSize = 32; + } + else if (GV.MemDecoration == Arg4qword) { + (*pMyDisasm).Argument4.ArgSize = 64; + } + else if (GV.MemDecoration == Arg4multibytes) { + (*pMyDisasm).Argument4.ArgSize = 0; + } + else if (GV.MemDecoration == Arg4tbyte) { + (*pMyDisasm).Argument4.ArgSize = 80; + } + else if (GV.MemDecoration == Arg4fword) { + (*pMyDisasm).Argument4.ArgSize = 48; + } + else if (GV.MemDecoration == Arg4dqword) { + (*pMyDisasm).Argument4.ArgSize = 128; + } + + if (GV.MemDecoration == Arg3byte) { + (*pMyDisasm).Argument3.ArgSize = 8; + } + else if (GV.MemDecoration == Arg3word) { + (*pMyDisasm).Argument3.ArgSize = 16; + } + else if (GV.MemDecoration == Arg3dword) { + (*pMyDisasm).Argument3.ArgSize = 32; + } + else if (GV.MemDecoration == Arg3qword) { + (*pMyDisasm).Argument3.ArgSize = 64; + } + else if (GV.MemDecoration == Arg3multibytes) { + (*pMyDisasm).Argument3.ArgSize = 0; + } + else if (GV.MemDecoration == Arg3tbyte) { + (*pMyDisasm).Argument3.ArgSize = 80; + } + else if (GV.MemDecoration == Arg3fword) { + (*pMyDisasm).Argument3.ArgSize = 48; + } + else if (GV.MemDecoration == Arg3dqword) { + (*pMyDisasm).Argument3.ArgSize = 128; + } + if (GV.MemDecoration == Arg2byte) { (*pMyDisasm).Argument2.ArgSize = 8; } @@ -1075,11 +1125,83 @@ void __bea_callspec__ BuildCompleteInstruction(PDISASM pMyDisasm) } } /* =============== if Arg4.Exists */ - if (GV.forth_arg != 0) { - (void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, ", "); - i += 2; - (void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, (char*) &(*pMyDisasm).Argument4.ArgMnemonic); - i = strlen((char*) &(*pMyDisasm).CompleteInstr); + if (GV.forth_arg != 0) + { + (void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, ", "); + i += 2; + if ((GV.MemDecoration >300) && (GV.MemDecoration <3299)) + { + GV.MemDecoration -= 300; + (void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, NasmPrefixes[GV.MemDecoration-1]); + i = strlen((char*) &(*pMyDisasm).CompleteInstr); + if (GV.SYNTAX_ == NasmSyntax) + { + i = strlen((char*) &(*pMyDisasm).CompleteInstr); + if ((GV.SEGMENTREGS != 0) || (GV.SEGMENTFS != 0)) + { + (void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, "["); + i++; + if (GV.SEGMENTREGS != 0) + { + (void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, + SegmentRegs[(*pMyDisasm).Argument4.SegmentReg]); + } + else + { + (void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, SegmentRegs[3]); + } + i = strlen((char*) &(*pMyDisasm).CompleteInstr); + } + else + { + (void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, "["); + i++; + } + } + else + { + if (GV.SYNTAX_ == MasmSyntax) + { + (void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, MasmPrefixes[GV.MemDecoration-1]); + i = strlen((char*) &(*pMyDisasm).CompleteInstr); + } + else + { + (void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, GoAsmPrefixes[GV.MemDecoration-1]); + i = strlen((char*) &(*pMyDisasm).CompleteInstr); + } + if ((GV.SEGMENTREGS != 0) || (GV.SEGMENTFS != 0)) + { + if (GV.SEGMENTREGS != 0) + { + (void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, SegmentRegs[(*pMyDisasm).Argument4.SegmentReg]); + } + else + { + (void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, SegmentRegs[3]); + } + i = strlen((char*) &(*pMyDisasm).CompleteInstr); + (void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, "["); + i++; + } + else + { + (void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, "["); + i++; + } + } + /* =============== add Arg4.ArgMnemonic */ + (void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, (char*) &(*pMyDisasm).Argument4.ArgMnemonic); + i = strlen((char*) &(*pMyDisasm).CompleteInstr); + (void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, "]"); + i++; + } + else + { + (void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, (char*) &(*pMyDisasm).Argument4.ArgMnemonic); + i = strlen((char*) &(*pMyDisasm).CompleteInstr); + } + } diff --git a/beaengine/beaengineSources/Includes/instr_set/Data_opcode.h b/beaengine/beaengineSources/Includes/instr_set/Data_opcode.h index c48187339..bc6437996 100644 --- a/beaengine/beaengineSources/Includes/instr_set/Data_opcode.h +++ b/beaengine/beaengineSources/Includes/instr_set/Data_opcode.h @@ -79,23 +79,23 @@ void (__bea_callspec__ *opcode_map2[])(PDISASM) = { /* */ /* =============================================================================== */ void (__bea_callspec__ *opcode_map3[])(PDISASM) = { -/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ -/*0*/pshufb_ , phaddw_ , phaddd_ , phaddsw_ , pmaddubsw_, phsubw_ , phsubd_ , phsubsw_ , psignb_ , psignw_ , psignd_ , pmulhrsw_ , vpermilps1, FailDecode, FailDecode, FailDecode, -/*1*/pblendvb_ , FailDecode, FailDecode, FailDecode, blendvps_ , blendvpd_ , FailDecode, ptest_ , vbrdcstss , vbrdcstsd , FailDecode, FailDecode, pabsb_ , pabsw_ , pabsd_ , FailDecode, -/*2*/pmovsxbw_ , pmovsxbd_ , pmovsxbq_ , pmovsxwd_ , pmovsxwq_ , pmovsxdq_ , FailDecode, FailDecode, pmuldq_ , pcmpeqq_ , movntdqa_ , packusdw_ , FailDecode, FailDecode, FailDecode, FailDecode, -/*3*/pmovzxbw_ , pmovzxbd_ , pmovzxbq_ , pmovzxwd_ , pmovzxwq_ , pmovzxdq_ , FailDecode, pcmpgtq_ , pminsb_ , pminsd_ , pminuw_ , pminud_ , pmaxsb_ , pmaxsd_ , pmaxuw_ , pmaxud_ , -/*4*/pmulld_ ,phminposuw_, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, -/*5*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, -/*6*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, -/*7*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, -/*8*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, -/*9*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, -/*a*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, -/*b*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, -/*c*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, -/*d*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, aesimc , aesenc , aesenclast, aesdec , aesdeclast, -/*e*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, -/*e*/crc32_GvEb, crc32_GvEv, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, +/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ +/*0*/pshufb_ , phaddw_ , phaddd_ , phaddsw_ , pmaddubsw_, phsubw_ , phsubd_ , phsubsw_ , psignb_ , psignw_ , psignd_ , pmulhrsw_ , vpermilps1, FailDecode, FailDecode, FailDecode, +/*1*/pblendvb_ , FailDecode, FailDecode, FailDecode, blendvps_ , blendvpd_ , FailDecode, ptest_ , vbrdcstss , vbrdcstsd , FailDecode, FailDecode, pabsb_ , pabsw_ , pabsd_ , FailDecode, +/*2*/pmovsxbw_ , pmovsxbd_ , pmovsxbq_ , pmovsxwd_ , pmovsxwq_ , pmovsxdq_ , FailDecode, FailDecode, pmuldq_ , pcmpeqq_ , movntdqa_ , packusdw_ , FailDecode, FailDecode, FailDecode, FailDecode, +/*3*/pmovzxbw_ , pmovzxbd_ , pmovzxbq_ , pmovzxwd_ , pmovzxwq_ , pmovzxdq_ , FailDecode, pcmpgtq_ , pminsb_ , pminsd_ , pminuw_ , pminud_ , pmaxsb_ , pmaxsd_ , pmaxuw_ , pmaxud_ , +/*4*/pmulld_ ,phminposuw_, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, +/*5*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, +/*6*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, +/*7*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, +/*8*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, +/*9*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, fmaddsub132p,fmsubadd132p,fmadd132p, fmadd132s , fmsub132p, fmsub132s, fnmadd132p, fnmadd132s, fnmsub132p, fnmsub132s, +/*a*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, fmaddsub213p,fmsubadd213p,fmadd213p, fmadd213s , fmsub213p, fmsub213s, fnmadd213p, fnmadd213s, fnmsub213p, fnmsub213s, +/*b*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, fmaddsub231p,fmsubadd231p,fmadd231p, fmadd231s , fmsub231p, fmsub231s, fnmadd231p, fnmadd231s, fnmsub231p, fnmsub231s, +/*c*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, +/*d*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, aesimc , aesenc , aesenclast, aesdec , aesdeclast, +/*e*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, +/*e*/crc32_GvEb, crc32_GvEv, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, }; /* =============================================================================== */ @@ -111,10 +111,10 @@ void (__bea_callspec__ *opcode_map2[])(PDISASM) = { /*1*/ FailDecode, FailDecode, FailDecode, FailDecode, pextrb_ , pextrw2_ , pextrd_ , extractps_, vinsrtf128, vextraf128, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, /*2*/ pinsrb_ , insertps_ , pinsrd_ , FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, /*3*/ FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, -/*4*/ dpps_ , dppd_ , mpsadbw_ , FailDecode, pclmulqdq_, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, vblendvpd, FailDecode, FailDecode, FailDecode, FailDecode, -/*5*/ FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, -/*6*/ pcmpestrm_, pcmpestri_, pcmpistrm_, pcmpistri_, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, -/*7*/ FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, +/*4*/ dpps_ , dppd_ , mpsadbw_ , FailDecode, pclmulqdq_, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, vblendvpd , FailDecode, FailDecode, FailDecode, FailDecode, +/*5*/ FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, vfmaddsubps,vfmaddsubpd,vfmsubaddps,vfmsubaddpd, +/*6*/ pcmpestrm_, pcmpestri_, pcmpistrm_, pcmpistri_, FailDecode, FailDecode, FailDecode, FailDecode, vfmaddps , vfmaddpd , vfmaddss , vfmaddsd , vfmsubps , vfmsubpd , vfmsubss , vfmsubsd , +/*7*/ FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, vfnmaddps , vfnmaddpd , vfnmaddss , vfnmaddsd , vfnmsubps , vfnmsubpd , vfnmsubss , vfnmsubsd , /*8*/ FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, /*9*/ FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, /*a*/ FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, diff --git a/beaengine/beaengineSources/Includes/instr_set/opcodes_VEX.c b/beaengine/beaengineSources/Includes/instr_set/opcodes_VEX.c index adeae0936..a7510e31a 100644 --- a/beaengine/beaengineSources/Includes/instr_set/opcodes_VEX.c +++ b/beaengine/beaengineSources/Includes/instr_set/opcodes_VEX.c @@ -95,6 +95,8 @@ instructions that leave the upper half of the register unchanged, for reasons of #include <assert.h> +#define TRUE 1 +#define FALSE 0 static void finish_vex(PDISASM pMyDisasm) { @@ -360,22 +362,52 @@ void L_reg(ARGTYPE* arg, PDISASM pMyDisasm) GV.EIP_++; } -void VxHxWxLx(PDISASM pMyDisasm) + +void FourOpFMAW0(PDISASM pMyDisasm) { if(!GV.VEX.has_vex) FailDecode(pMyDisasm); + GV.MemDecoration=Arg3fword; + GV.AVX_=GV.VEX.length; + GV.SSE_=!GV.VEX.length; + V_reg(&(*pMyDisasm).Argument2, pMyDisasm); MOD_RM(&(*pMyDisasm).Argument3, pMyDisasm); Reg_Opcode(&(*pMyDisasm).Argument1, pMyDisasm); GV.EIP_ += GV.DECALAGE_EIP+2; L_reg(&(*pMyDisasm).Argument4, pMyDisasm); + GV.AVX_=0; + GV.SSE_=0; + GV.third_arg=1; GV.forth_arg=1; } -void VyHyWyLy(PDISASM pMyDisasm) +void FourOpFMAW1(PDISASM pMyDisasm) +{ + if(!GV.VEX.has_vex) + FailDecode(pMyDisasm); + + GV.MemDecoration=Arg4fword; + GV.AVX_=GV.VEX.length; + GV.SSE_=!GV.VEX.length; + + V_reg(&(*pMyDisasm).Argument2, pMyDisasm); + MOD_RM(&(*pMyDisasm).Argument4, pMyDisasm); + Reg_Opcode(&(*pMyDisasm).Argument1, pMyDisasm); + GV.EIP_ += GV.DECALAGE_EIP+2; + L_reg(&(*pMyDisasm).Argument3, pMyDisasm); + + GV.AVX_=0; + GV.SSE_=0; + + GV.third_arg=1; + GV.forth_arg=1; +} + +void VxHxWxLx(PDISASM pMyDisasm) { if(!GV.VEX.has_vex) FailDecode(pMyDisasm); @@ -388,8 +420,21 @@ void VyHyWyLy(PDISASM pMyDisasm) GV.third_arg=1; GV.forth_arg=1; +} +void VyHyWyLy(PDISASM pMyDisasm) +{ + if(!GV.VEX.has_vex) + FailDecode(pMyDisasm); + V_reg(&(*pMyDisasm).Argument2, pMyDisasm); + MOD_RM(&(*pMyDisasm).Argument3, pMyDisasm); + Reg_Opcode(&(*pMyDisasm).Argument1, pMyDisasm); + GV.EIP_ += GV.DECALAGE_EIP+2; + L_reg(&(*pMyDisasm).Argument4, pMyDisasm); + + GV.third_arg=1; + GV.forth_arg=1; } /* @@ -625,9 +670,303 @@ void vpermilps1 (PDISASM pMyDisasm) FailDecode(pMyDisasm); } + +void xmm_xmm_xmmmem(PDISASM pMyDisasm, const char* opcode) +{ + + (*pMyDisasm).Instruction.Category = AVX_INSTRUCTION; + #ifndef BEA_LIGHT_DISASSEMBLY + if(GV.VEX.has_vex) + { + (void) strcpy ((*pMyDisasm).Instruction.Mnemonic, "v"); + } + (void) strcat ((*pMyDisasm).Instruction.Mnemonic, opcode); + if (GV.VEX.has_vex && GV.VEX.W==0) + { + // packed single or single-to-single if w=0 + (void) strcat ((*pMyDisasm).Instruction.Mnemonic, "s"); + } + else + { + // packed double or single-to-double if w=0 + (void) strcat ((*pMyDisasm).Instruction.Mnemonic, "d"); + } + (void) strcat ((*pMyDisasm).Instruction.Mnemonic, " "); + #endif + + GV.MemDecoration=Arg3fword; + GV.AVX_=GV.VEX.length; + GV.SSE_=!GV.VEX.length; + MOD_RM(&(*pMyDisasm).Argument3, pMyDisasm); + V_reg( &(*pMyDisasm).Argument2, pMyDisasm); + Reg_Opcode(&(*pMyDisasm).Argument1, pMyDisasm); + GV.EIP_ += GV.DECALAGE_EIP+2; + GV.third_arg=1; + GV.AVX_=0; + GV.SSE_=0; +} + /* 0f 3a 04 */ void vpermilps2 (PDISASM pMyDisasm) { assert(pMyDisasm); /* avoids warning */ assert(0); } + +void FMA_3op(PDISASM pMyDisasm, const char* opcode, int allow_ymms) +{ + if (GV.VEX.has_vex && GV.VEX.length==1 && !allow_ymms) + { + FailDecode(pMyDisasm); + return; + } + xmm_xmm_xmmmem(pMyDisasm, opcode); +} + +#if 0 +/* 0f 38 99 */ +void fmadd132sd(PDISASM pMyDisasm) +{ + if(GV.VEX.has_vex && GV.VEX.length==0 && GV.VEX.implicit_prefixes==1 /* 66 */ && GV.VEX.W==1) + { + xmm_xmm_xmmmem(pMyDisasm, "fmadd132sd"); + } + else if(GV.VEX.has_vex && GV.VEX.length==0 && GV.VEX.implicit_prefixes==1 /* 66 */ && GV.VEX.W==0) + { + xmm_xmm_xmmmem(pMyDisasm, "fmadd132ss"); + } + else + FailDecode(pMyDisasm); +} + +/* 0f 38 a9 */ +void fmadd213sd(PDISASM pMyDisasm) +{ + if(GV.VEX.has_vex && GV.VEX.length==0 && GV.VEX.implicit_prefixes==1 /* 66 */ && GV.VEX.W==1) + { + xmm_xmm_xmmmem(pMyDisasm, "fmadd213sd"); + } + else if(GV.VEX.has_vex && GV.VEX.length==0 && GV.VEX.implicit_prefixes==1 /* 66 */ && GV.VEX.W==0) + { + xmm_xmm_xmmmem(pMyDisasm, "fmadd213ss"); + } + else + FailDecode(pMyDisasm); +} +/* 0f 38 b9 */ +void fmadd231sd(PDISASM pMyDisasm) +{ + if(GV.VEX.has_vex && GV.VEX.length==0 && GV.VEX.implicit_prefixes==1 /* 66 */ && GV.VEX.W==1) + { + xmm_xmm_xmmmem(pMyDisasm, "fmadd231sd"); + } + else if(GV.VEX.has_vex && GV.VEX.length==0 && GV.VEX.implicit_prefixes==1 /* 66 */ && GV.VEX.W==0) + { + xmm_xmm_xmmmem(pMyDisasm, "fmadd231ss"); + } + else + FailDecode(pMyDisasm); +} +#endif + +void fmadd132p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); } +void fmadd213p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); } +void fmadd231p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); } +void fmaddsub132p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); } +void fmaddsub213p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); } +void fmaddsub231p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); } +void fmsub132p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); } +void fmsub213p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); } +void fmsub231p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); } +void fmsubadd132p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); } +void fmsubadd213p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); } +void fmsubadd231p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); } +void fnmadd132p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); } +void fnmadd213p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); } +void fnmadd231p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); } +void fnmsub132p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); } +void fnmsub213p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); } +void fnmsub231p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); } + +void fmadd132s(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, FALSE); } +void fmadd213s(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, FALSE); } +void fmadd231s(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, FALSE); } +void fmsub132s(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, FALSE); } +void fmsub213s(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, FALSE); } +void fmsub231s(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, FALSE); } +void fnmadd132s(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, FALSE); } +void fnmadd213s(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, FALSE); } +void fnmadd231s(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, FALSE); } +void fnmsub132s(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, FALSE); } +void fnmsub213s(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, FALSE); } +void fnmsub231s(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, FALSE); } + + + + + + + +// ************************** +// AMD 4-op FMA instructions +// ************************** + +void FourOpFMA(PDISASM pMyDisasm, const char* opcode, int allow_ymms) +{ + #ifndef BEA_LIGHT_DISASSEMBLY + if(GV.VEX.has_vex) + { + (void) strcpy ((*pMyDisasm).Instruction.Mnemonic, "v"); + } + (void) strcat ((*pMyDisasm).Instruction.Mnemonic, opcode); + (void) strcat ((*pMyDisasm).Instruction.Mnemonic, " "); + #endif + + // mmmmmm==0x3 to get to this function, see opcode comment above func. + if(GV.VEX.has_vex && GV.VEX.length==0 && GV.VEX.implicit_prefixes==1 /* 66 */ && GV.VEX.W==0) + { + FourOpFMAW0(pMyDisasm); + } + else if(GV.VEX.has_vex && GV.VEX.length==0 && GV.VEX.implicit_prefixes==1 /* 66 */ && GV.VEX.W==1) + { + FourOpFMAW1(pMyDisasm); + } + else if(GV.VEX.has_vex && GV.VEX.length==1 && allow_ymms && GV.VEX.implicit_prefixes==1 /* 66 */ && GV.VEX.W==0) + { + FourOpFMAW0(pMyDisasm); + } + else if(GV.VEX.has_vex && GV.VEX.length==1 && allow_ymms && GV.VEX.implicit_prefixes==1 /* 66 */ && GV.VEX.W==1) + { + FourOpFMAW0(pMyDisasm); + } + else + FailDecode(pMyDisasm); + +} + + +/* 0f 3a 69 */ +void vfmaddpd(PDISASM pMyDisasm) +{ + FourOpFMA(pMyDisasm,"fmaddpd", TRUE); +} + +/* 0f 3a 68 */ +void vfmaddps(PDISASM pMyDisasm) +{ + FourOpFMA(pMyDisasm,"fmaddps", TRUE); +} + +/* 0f 3a 6b */ +void vfmaddsd(PDISASM pMyDisasm) +{ + FourOpFMA(pMyDisasm,"fmaddsd", FALSE); +} + +/* 0f 3a 6a */ +void vfmaddss(PDISASM pMyDisasm) +{ + FourOpFMA(pMyDisasm,"fmaddss", FALSE); +} + +/* 0f 3a 5d */ +void vfmaddsubpd(PDISASM pMyDisasm) +{ + FourOpFMA(pMyDisasm,"fmaddsubpd", TRUE); +} + +/* 0f 3a 5c */ +void vfmaddsubps(PDISASM pMyDisasm) +{ + FourOpFMA(pMyDisasm,"fmaddsubps", TRUE); +} + + +/* 0f 3a 5f */ +void vfmsubaddpd(PDISASM pMyDisasm) +{ + FourOpFMA(pMyDisasm,"fmsubaddpd", TRUE); +} + +/* 0f 3a 5e */ +void vfmsubaddps(PDISASM pMyDisasm) +{ + FourOpFMA(pMyDisasm,"fmsubaddps", TRUE); +} + + +/* 0f 3a 6d */ +void vfmsubpd(PDISASM pMyDisasm) +{ + FourOpFMA(pMyDisasm,"fmsubpd", TRUE); +} + +/* 0f 3a 6c */ +void vfmsubps(PDISASM pMyDisasm) +{ + FourOpFMA(pMyDisasm,"fmsubpd", TRUE); +} + +/* 0f 3a 6f */ +void vfmsubsd(PDISASM pMyDisasm) +{ + FourOpFMA(pMyDisasm,"fmsubsd", FALSE); + +} + +/* 0f 3a 6e */ +void vfmsubss(PDISASM pMyDisasm) +{ + FourOpFMA(pMyDisasm,"fmsubss", FALSE); + +} + +/* 0f 3a 79 */ +void vfnmaddpd(PDISASM pMyDisasm) +{ + FourOpFMA(pMyDisasm,"fnmaddpd", TRUE); +} + +/* 0f 3a 78 */ +void vfnmaddps(PDISASM pMyDisasm) +{ + FourOpFMA(pMyDisasm,"fnmaddps", TRUE); +} + +/* 0f 3a 7b */ +void vfnmaddsd(PDISASM pMyDisasm) +{ + FourOpFMA(pMyDisasm,"fnmaddsd", FALSE); +} + +/* 0f 3a 7a */ +void vfnmaddss(PDISASM pMyDisasm) +{ + FourOpFMA(pMyDisasm,"fnmaddss", FALSE); +} + +/* 0f 3a 7d */ +void vfnmsubpd(PDISASM pMyDisasm) +{ + FourOpFMA(pMyDisasm,"fnmsubpd", TRUE); +} + +/* 0f 3a 7c */ +void vfnmsubps(PDISASM pMyDisasm) +{ + FourOpFMA(pMyDisasm,"fnmsubps", TRUE); +} + +/* 0f 3a 7f */ +void vfnmsubsd(PDISASM pMyDisasm) +{ + FourOpFMA(pMyDisasm,"fnmsubsd", FALSE); +} + +/* 0f 3a 7e */ +void vfnmsubss(PDISASM pMyDisasm) +{ + FourOpFMA(pMyDisasm,"fnmsubss", FALSE); +} + + diff --git a/beaengine/beaengineSources/Includes/internal_datas.h b/beaengine/beaengineSources/Includes/internal_datas.h index 31771f878..2dbca3444 100644 --- a/beaengine/beaengineSources/Includes/internal_datas.h +++ b/beaengine/beaengineSources/Includes/internal_datas.h @@ -55,6 +55,16 @@ #define Arg3oword 209 #define Arg3yword 210 +#define Arg4byte 301 +#define Arg4word 302 +#define Arg4dword 303 +#define Arg4qword 304 +#define Arg4multibytes 305 +#define Arg4tbyte 306 +#define Arg4fword 307 +#define Arg4dqword 308 +#define Arg4oword 309 +#define Arg4yword 310 EFLStruct EFLAGS_TABLE[] = { {UN_, UN_, UN_, MO_, UN_, MO_, 0 , 0 , 0 , 0 , 0, 0}, /* 0-AAA */ diff --git a/beaengine/beaengineSources/Includes/protos.h b/beaengine/beaengineSources/Includes/protos.h index b9a489eba..512688bcf 100644 --- a/beaengine/beaengineSources/Includes/protos.h +++ b/beaengine/beaengineSources/Includes/protos.h @@ -729,9 +729,69 @@ void vinsrtf128 (PDISASM pMyDisasm); void vperm2f128 (PDISASM pMyDisasm); void vpermilps1 (PDISASM pMyDisasm); void vpermilps2 (PDISASM pMyDisasm); - - - - - - +void fmadd132sd(PDISASM pMyDisasm); +void fmadd213sd(PDISASM pMyDisasmd); +void fmadd231sd(PDISASM pMyDisasmd); + + +// amd 4-op fma's +void vfmaddpd(PDISASM pMyDisasm); +void vfmaddps(PDISASM pMyDisasm); +void vfmaddsd(PDISASM pMyDisasm); +void vfmaddss(PDISASM pMyDisasm); + +void vfmaddsubpd(PDISASM pMyDisasm); +void vfmaddsubps(PDISASM pMyDisasm); + +void vfmsubaddpd(PDISASM pMyDisasm); +void vfmsubaddps(PDISASM pMyDisasm); + +void vfmsubpd(PDISASM pMyDisasm); +void vfmsubps(PDISASM pMyDisasm); +void vfmsubsd(PDISASM pMyDisasm); +void vfmsubss(PDISASM pMyDisasm); + +void vfnmaddpd(PDISASM pMyDisasm); +void vfnmaddps(PDISASM pMyDisasm); +void vfnmaddsd(PDISASM pMyDisasm); +void vfnmaddss(PDISASM pMyDisasm); + +void vfnmsubpd(PDISASM pMyDisasm); +void vfnmsubps(PDISASM pMyDisasm); +void vfnmsubsd(PDISASM pMyDisasm); +void vfnmsubss(PDISASM pMyDisasm); + + + +// intel 3-op fma's +void fmadd132p(PDISASM pMyDisasm); +void fmadd213p(PDISASM pMyDisasm); +void fmadd231p(PDISASM pMyDisasm); +void fmaddsub132p(PDISASM pMyDisasm); +void fmaddsub213p(PDISASM pMyDisasm); +void fmaddsub231p(PDISASM pMyDisasm); +void fmsub132p(PDISASM pMyDisasm); +void fmsub213p(PDISASM pMyDisasm); +void fmsub231p(PDISASM pMyDisasm); +void fmsubadd132p(PDISASM pMyDisasm); +void fmsubadd213p(PDISASM pMyDisasm); +void fmsubadd231p(PDISASM pMyDisasm); +void fnmadd132p(PDISASM pMyDisasm); +void fnmadd213p(PDISASM pMyDisasm); +void fnmadd231p(PDISASM pMyDisasm); +void fnmsub132p(PDISASM pMyDisasm); +void fnmsub213p(PDISASM pMyDisasm); +void fnmsub231p(PDISASM pMyDisasm); + +void fmadd132s(PDISASM pMyDisasm); +void fmadd213s(PDISASM pMyDisasm); +void fmadd231s(PDISASM pMyDisasm); +void fmsub132s(PDISASM pMyDisasm); +void fmsub213s(PDISASM pMyDisasm); +void fmsub231s(PDISASM pMyDisasm); +void fnmadd132s(PDISASM pMyDisasm); +void fnmadd213s(PDISASM pMyDisasm); +void fnmadd231s(PDISASM pMyDisasm); +void fnmsub132s(PDISASM pMyDisasm); +void fnmsub213s(PDISASM pMyDisasm); +void fnmsub231s(PDISASM pMyDisasm); diff --git a/beaengine/jdh_tests/fma_amd.nasm b/beaengine/jdh_tests/fma_amd.nasm new file mode 100644 index 000000000..acf3d4483 --- /dev/null +++ b/beaengine/jdh_tests/fma_amd.nasm @@ -0,0 +1,107 @@ +bits 64 +global main +section .text +main: + vfmaddpd xmm0, xmm1, xmm2, xmm3 + vfmaddps xmm0, xmm1, xmm2, xmm3 + vfmaddsd xmm0, xmm1, xmm2, xmm3 + vfmaddss xmm0, xmm1, xmm2, xmm3 + vfmaddsubpd xmm0, xmm1, xmm2, xmm3 + vfmaddsubps xmm0, xmm1, xmm2, xmm3 + vfmsubaddpd xmm0, xmm1, xmm2, xmm3 + vfmsubaddps xmm0, xmm1, xmm2, xmm3 + vfmsubpd xmm0, xmm1, xmm2, xmm3 + vfmsubps xmm0, xmm1, xmm2, xmm3 + vfmsubsd xmm0, xmm1, xmm2, xmm3 + vfmsubss xmm0, xmm1, xmm2, xmm3 + vfnmaddpd xmm0, xmm1, xmm2, xmm3 + vfnmaddps xmm0, xmm1, xmm2, xmm3 + vfnmaddsd xmm0, xmm1, xmm2, xmm3 + vfnmaddss xmm0, xmm1, xmm2, xmm3 + vfnmsubpd xmm0, xmm1, xmm2, xmm3 + vfnmsubps xmm0, xmm1, xmm2, xmm3 + vfnmsubsd xmm0, xmm1, xmm2, xmm3 + vfnmsubss xmm0, xmm1, xmm2, xmm3 + + vfmaddpd xmm0, xmm1, [rsp+1024], xmm3 + vfmaddps xmm0, xmm1, [rsp+1024], xmm3 + vfmaddsd xmm0, xmm1, [rsp+1024], xmm3 + vfmaddss xmm0, xmm1, [rsp+1024], xmm3 + vfmaddsubpd xmm0, xmm1, [rsp+1024], xmm3 + vfmaddsubps xmm0, xmm1, [rsp+1024], xmm3 + vfmsubaddpd xmm0, xmm1, [rsp+1024], xmm3 + vfmsubaddps xmm0, xmm1, [rsp+1024], xmm3 + vfmsubpd xmm0, xmm1, [rsp+1024], xmm3 + vfmsubps xmm0, xmm1, [rsp+1024], xmm3 + vfmsubsd xmm0, xmm1, [rsp+1024], xmm3 + vfmsubss xmm0, xmm1, [rsp+1024], xmm3 + vfnmaddpd xmm0, xmm1, [rsp+1024], xmm3 + vfnmaddps xmm0, xmm1, [rsp+1024], xmm3 + vfnmaddsd xmm0, xmm1, [rsp+1024], xmm3 + vfnmaddss xmm0, xmm1, [rsp+1024], xmm3 + vfnmsubpd xmm0, xmm1, [rsp+1024], xmm3 + vfnmsubps xmm0, xmm1, [rsp+1024], xmm3 + vfnmsubsd xmm0, xmm1, [rsp+1024], xmm3 + vfnmsubss xmm0, xmm1, [rsp+1024], xmm3 + + vfmaddpd xmm0, xmm1, xmm2, [rsp+1024] + vfmaddps xmm0, xmm1, xmm2, [rsp+1024] + vfmaddsd xmm0, xmm1, xmm2, [rsp+1024] + vfmaddss xmm0, xmm1, xmm2, [rsp+1024] + vfmaddsubpd xmm0, xmm1, xmm2, [rsp+1024] + vfmaddsubps xmm0, xmm1, xmm2, [rsp+1024] + vfmsubaddpd xmm0, xmm1, xmm2, [rsp+1024] + vfmsubaddps xmm0, xmm1, xmm2, [rsp+1024] + vfmsubpd xmm0, xmm1, xmm2, [rsp+1024] + vfmsubps xmm0, xmm1, xmm2, [rsp+1024] + vfmsubsd xmm0, xmm1, xmm2, [rsp+1024] + vfmsubss xmm0, xmm1, xmm2, [rsp+1024] + vfnmaddpd xmm0, xmm1, xmm2, [rsp+1024] + vfnmaddps xmm0, xmm1, xmm2, [rsp+1024] + vfnmaddsd xmm0, xmm1, xmm2, [rsp+1024] + vfnmaddss xmm0, xmm1, xmm2, [rsp+1024] + vfnmsubpd xmm0, xmm1, xmm2, [rsp+1024] + vfnmsubps xmm0, xmm1, xmm2, [rsp+1024] + vfnmsubsd xmm0, xmm1, xmm2, [rsp+1024] + vfnmsubss xmm0, xmm1, xmm2, [rsp+1024] + +; test ymm versions + vfmaddpd ymm0, ymm1, ymm2, ymm3 + vfmaddps ymm0, ymm1, ymm2, ymm3 + vfmaddsubpd ymm0, ymm1, ymm2, ymm3 + vfmaddsubps ymm0, ymm1, ymm2, ymm3 + vfmsubaddpd ymm0, ymm1, ymm2, ymm3 + vfmsubaddps ymm0, ymm1, ymm2, ymm3 + vfmsubpd ymm0, ymm1, ymm2, ymm3 + vfmsubps ymm0, ymm1, ymm2, ymm3 + vfnmaddpd ymm0, ymm1, ymm2, ymm3 + vfnmaddps ymm0, ymm1, ymm2, ymm3 + vfnmsubpd ymm0, ymm1, ymm2, ymm3 + vfnmsubps ymm0, ymm1, ymm2, ymm3 + + vfmaddpd ymm0, ymm1, [rsp+1024], ymm3 + vfmaddps ymm0, ymm1, [rsp+1024], ymm3 + vfmaddsubpd ymm0, ymm1, [rsp+1024], ymm3 + vfmaddsubps ymm0, ymm1, [rsp+1024], ymm3 + vfmsubaddpd ymm0, ymm1, [rsp+1024], ymm3 + vfmsubaddps ymm0, ymm1, [rsp+1024], ymm3 + vfmsubpd ymm0, ymm1, [rsp+1024], ymm3 + vfmsubps ymm0, ymm1, [rsp+1024], ymm3 + vfnmaddpd ymm0, ymm1, [rsp+1024], ymm3 + vfnmaddps ymm0, ymm1, [rsp+1024], ymm3 + vfnmsubpd ymm0, ymm1, [rsp+1024], ymm3 + vfnmsubps ymm0, ymm1, [rsp+1024], ymm3 + + vfmaddpd ymm0, ymm1, ymm2, [rsp+1024] + vfmaddps ymm0, ymm1, ymm2, [rsp+1024] + vfmaddsubpd ymm0, ymm1, ymm2, [rsp+1024] + vfmaddsubps ymm0, ymm1, ymm2, [rsp+1024] + vfmsubaddpd ymm0, ymm1, ymm2, [rsp+1024] + vfmsubaddps ymm0, ymm1, ymm2, [rsp+1024] + vfmsubpd ymm0, ymm1, ymm2, [rsp+1024] + vfmsubps ymm0, ymm1, ymm2, [rsp+1024] + vfnmaddpd ymm0, ymm1, ymm2, [rsp+1024] + vfnmaddps ymm0, ymm1, ymm2, [rsp+1024] + vfnmsubpd ymm0, ymm1, ymm2, [rsp+1024] + vfnmsubps ymm0, ymm1, ymm2, [rsp+1024] + diff --git a/beaengine/jdh_tests/fma_intel.nasm b/beaengine/jdh_tests/fma_intel.nasm new file mode 100644 index 000000000..90dea376b --- /dev/null +++ b/beaengine/jdh_tests/fma_intel.nasm @@ -0,0 +1,237 @@ +bits 64 +global main +section .text +main: + +; 3 regsiter pd+ps + +vfmadd132pd xmm0, xmm1, xmm2 +vfmadd132ps xmm0, xmm1, xmm2 +vfmadd213pd xmm0, xmm1, xmm2 +vfmadd213ps xmm0, xmm1, xmm2 +vfmadd231pd xmm0, xmm1, xmm2 +vfmadd231ps xmm0, xmm1, xmm2 +vfmaddsub132pd xmm0, xmm1, xmm2 +vfmaddsub132ps xmm0, xmm1, xmm2 +vfmaddsub213pd xmm0, xmm1, xmm2 +vfmaddsub213ps xmm0, xmm1, xmm2 +vfmaddsub231pd xmm0, xmm1, xmm2 +vfmaddsub231ps xmm0, xmm1, xmm2 +vfmsub132pd xmm0, xmm1, xmm2 +vfmsub132ps xmm0, xmm1, xmm2 +vfmsub213pd xmm0, xmm1, xmm2 +vfmsub213ps xmm0, xmm1, xmm2 +vfmsub231pd xmm0, xmm1, xmm2 +vfmsub231ps xmm0, xmm1, xmm2 +vfmsubadd132pd xmm0, xmm1, xmm2 +vfmsubadd132ps xmm0, xmm1, xmm2 +vfmsubadd213pd xmm0, xmm1, xmm2 +vfmsubadd213ps xmm0, xmm1, xmm2 +vfmsubadd231pd xmm0, xmm1, xmm2 +vfmsubadd231ps xmm0, xmm1, xmm2 +vfnmadd132pd xmm0, xmm1, xmm2 +vfnmadd132ps xmm0, xmm1, xmm2 +vfnmadd213pd xmm0, xmm1, xmm2 +vfnmadd213ps xmm0, xmm1, xmm2 +vfnmadd231pd xmm0, xmm1, xmm2 +vfnmadd231ps xmm0, xmm1, xmm2 +vfnmsub132pd xmm0, xmm1, xmm2 +vfnmsub132ps xmm0, xmm1, xmm2 +vfnmsub213pd xmm0, xmm1, xmm2 +vfnmsub213ps xmm0, xmm1, xmm2 +vfnmsub231pd xmm0, xmm1, xmm2 +vfnmsub231ps xmm0, xmm1, xmm2 + + + +; 2 regsiter+mem pd+ps + +vfmadd132pd xmm0, xmm1, [rsp+1024] +vfmadd132ps xmm0, xmm1, [rsp+1024] +vfmadd213pd xmm0, xmm1, [rsp+1024] +vfmadd213ps xmm0, xmm1, [rsp+1024] +vfmadd231pd xmm0, xmm1, [rsp+1024] +vfmadd231ps xmm0, xmm1, [rsp+1024] +vfmaddsub132pd xmm0, xmm1, [rsp+1024] +vfmaddsub132ps xmm0, xmm1, [rsp+1024] +vfmaddsub213pd xmm0, xmm1, [rsp+1024] +vfmaddsub213ps xmm0, xmm1, [rsp+1024] +vfmaddsub231pd xmm0, xmm1, [rsp+1024] +vfmaddsub231ps xmm0, xmm1, [rsp+1024] +vfmsub132pd xmm0, xmm1, [rsp+1024] +vfmsub132ps xmm0, xmm1, [rsp+1024] +vfmsub213pd xmm0, xmm1, [rsp+1024] +vfmsub213ps xmm0, xmm1, [rsp+1024] +vfmsub231pd xmm0, xmm1, [rsp+1024] +vfmsub231ps xmm0, xmm1, [rsp+1024] +vfmsubadd132pd xmm0, xmm1, [rsp+1024] +vfmsubadd132ps xmm0, xmm1, [rsp+1024] +vfmsubadd213pd xmm0, xmm1, [rsp+1024] +vfmsubadd213ps xmm0, xmm1, [rsp+1024] +vfmsubadd231pd xmm0, xmm1, [rsp+1024] +vfmsubadd231ps xmm0, xmm1, [rsp+1024] +vfnmadd132pd xmm0, xmm1, [rsp+1024] +vfnmadd132ps xmm0, xmm1, [rsp+1024] +vfnmadd213pd xmm0, xmm1, [rsp+1024] +vfnmadd213ps xmm0, xmm1, [rsp+1024] +vfnmadd231pd xmm0, xmm1, [rsp+1024] +vfnmadd231ps xmm0, xmm1, [rsp+1024] +vfnmsub132pd xmm0, xmm1, [rsp+1024] +vfnmsub132ps xmm0, xmm1, [rsp+1024] +vfnmsub213pd xmm0, xmm1, [rsp+1024] +vfnmsub213ps xmm0, xmm1, [rsp+1024] +vfnmsub231pd xmm0, xmm1, [rsp+1024] +vfnmsub231ps xmm0, xmm1, [rsp+1024] + + +; 3 regsiter pd+ps as y-regs + +vfmadd132pd ymm0, ymm1, ymm2 +vfmadd132ps ymm0, ymm1, ymm2 +vfmadd213pd ymm0, ymm1, ymm2 +vfmadd213ps ymm0, ymm1, ymm2 +vfmadd231pd ymm0, ymm1, ymm2 +vfmadd231ps ymm0, ymm1, ymm2 +vfmaddsub132pd ymm0, ymm1, ymm2 +vfmaddsub132ps ymm0, ymm1, ymm2 +vfmaddsub213pd ymm0, ymm1, ymm2 +vfmaddsub213ps ymm0, ymm1, ymm2 +vfmaddsub231pd ymm0, ymm1, ymm2 +vfmaddsub231ps ymm0, ymm1, ymm2 +vfmsub132pd ymm0, ymm1, ymm2 +vfmsub132ps ymm0, ymm1, ymm2 +vfmsub213pd ymm0, ymm1, ymm2 +vfmsub213ps ymm0, ymm1, ymm2 +vfmsub231pd ymm0, ymm1, ymm2 +vfmsub231ps ymm0, ymm1, ymm2 +vfmsubadd132pd ymm0, ymm1, ymm2 +vfmsubadd132ps ymm0, ymm1, ymm2 +vfmsubadd213pd ymm0, ymm1, ymm2 +vfmsubadd213ps ymm0, ymm1, ymm2 +vfmsubadd231pd ymm0, ymm1, ymm2 +vfmsubadd231ps ymm0, ymm1, ymm2 +vfnmadd132pd ymm0, ymm1, ymm2 +vfnmadd132ps ymm0, ymm1, ymm2 +vfnmadd213pd ymm0, ymm1, ymm2 +vfnmadd213ps ymm0, ymm1, ymm2 +vfnmadd231pd ymm0, ymm1, ymm2 +vfnmadd231ps ymm0, ymm1, ymm2 +vfnmsub132pd ymm0, ymm1, ymm2 +vfnmsub132ps ymm0, ymm1, ymm2 +vfnmsub213pd ymm0, ymm1, ymm2 +vfnmsub213ps ymm0, ymm1, ymm2 +vfnmsub231pd ymm0, ymm1, ymm2 +vfnmsub231ps ymm0, ymm1, ymm2 + +; 2 regsiter+mem pd+ps as y-regs + +vfmadd132pd ymm0, ymm1, [rsp+1024] +vfmadd132ps ymm0, ymm1, [rsp+1024] +vfmadd213pd ymm0, ymm1, [rsp+1024] +vfmadd213ps ymm0, ymm1, [rsp+1024] +vfmadd231pd ymm0, ymm1, [rsp+1024] +vfmadd231ps ymm0, ymm1, [rsp+1024] +vfmaddsub132pd ymm0, ymm1, [rsp+1024] +vfmaddsub132ps ymm0, ymm1, [rsp+1024] +vfmaddsub213pd ymm0, ymm1, [rsp+1024] +vfmaddsub213ps ymm0, ymm1, [rsp+1024] +vfmaddsub231pd ymm0, ymm1, [rsp+1024] +vfmaddsub231ps ymm0, ymm1, [rsp+1024] +vfmsub132pd ymm0, ymm1, [rsp+1024] +vfmsub132ps ymm0, ymm1, [rsp+1024] +vfmsub213pd ymm0, ymm1, [rsp+1024] +vfmsub213ps ymm0, ymm1, [rsp+1024] +vfmsub231pd ymm0, ymm1, [rsp+1024] +vfmsub231ps ymm0, ymm1, [rsp+1024] +vfmsubadd132pd ymm0, ymm1, [rsp+1024] +vfmsubadd132ps ymm0, ymm1, [rsp+1024] +vfmsubadd213pd ymm0, ymm1, [rsp+1024] +vfmsubadd213ps ymm0, ymm1, [rsp+1024] +vfmsubadd231pd ymm0, ymm1, [rsp+1024] +vfmsubadd231ps ymm0, ymm1, [rsp+1024] +vfnmadd132pd ymm0, ymm1, [rsp+1024] +vfnmadd132ps ymm0, ymm1, [rsp+1024] +vfnmadd213pd ymm0, ymm1, [rsp+1024] +vfnmadd213ps ymm0, ymm1, [rsp+1024] +vfnmadd231pd ymm0, ymm1, [rsp+1024] +vfnmadd231ps ymm0, ymm1, [rsp+1024] +vfnmsub132pd ymm0, ymm1, [rsp+1024] +vfnmsub132ps ymm0, ymm1, [rsp+1024] +vfnmsub213pd ymm0, ymm1, [rsp+1024] +vfnmsub213ps ymm0, ymm1, [rsp+1024] +vfnmsub231pd ymm0, ymm1, [rsp+1024] +vfnmsub231ps ymm0, ymm1, [rsp+1024] + + + + + + + + + + + + + + + + + + + +; 3 regsiter sd+ss + +vfmadd132sd xmm0, xmm1, xmm2 +vfmadd132ss xmm0, xmm1, xmm2 +vfmadd213sd xmm0, xmm1, xmm2 +vfmadd213ss xmm0, xmm1, xmm2 +vfmadd231sd xmm0, xmm1, xmm2 +vfmadd231ss xmm0, xmm1, xmm2 +vfmsub132sd xmm0, xmm1, xmm2 +vfmsub132ss xmm0, xmm1, xmm2 +vfmsub213sd xmm0, xmm1, xmm2 +vfmsub213ss xmm0, xmm1, xmm2 +vfmsub231sd xmm0, xmm1, xmm2 +vfmsub231ss xmm0, xmm1, xmm2 +vfnmadd132sd xmm0, xmm1, xmm2 +vfnmadd132ss xmm0, xmm1, xmm2 +vfnmadd213sd xmm0, xmm1, xmm2 +vfnmadd213ss xmm0, xmm1, xmm2 +vfnmadd231sd xmm0, xmm1, xmm2 +vfnmadd231ss xmm0, xmm1, xmm2 +vfnmsub132sd xmm0, xmm1, xmm2 +vfnmsub132ss xmm0, xmm1, xmm2 +vfnmsub213sd xmm0, xmm1, xmm2 +vfnmsub213ss xmm0, xmm1, xmm2 +vfnmsub231sd xmm0, xmm1, xmm2 +vfnmsub231ss xmm0, xmm1, xmm2 + + + +; 2 regsiter+mem sd+ss + +vfmadd132sd xmm0, xmm1, [rsp+1024] +vfmadd132ss xmm0, xmm1, [rsp+1024] +vfmadd213sd xmm0, xmm1, [rsp+1024] +vfmadd213ss xmm0, xmm1, [rsp+1024] +vfmadd231sd xmm0, xmm1, [rsp+1024] +vfmadd231ss xmm0, xmm1, [rsp+1024] +vfmsub132sd xmm0, xmm1, [rsp+1024] +vfmsub132ss xmm0, xmm1, [rsp+1024] +vfmsub213sd xmm0, xmm1, [rsp+1024] +vfmsub213ss xmm0, xmm1, [rsp+1024] +vfmsub231sd xmm0, xmm1, [rsp+1024] +vfmsub231ss xmm0, xmm1, [rsp+1024] +vfnmadd132sd xmm0, xmm1, [rsp+1024] +vfnmadd132ss xmm0, xmm1, [rsp+1024] +vfnmadd213sd xmm0, xmm1, [rsp+1024] +vfnmadd213ss xmm0, xmm1, [rsp+1024] +vfnmadd231sd xmm0, xmm1, [rsp+1024] +vfnmadd231ss xmm0, xmm1, [rsp+1024] +vfnmsub132sd xmm0, xmm1, [rsp+1024] +vfnmsub132ss xmm0, xmm1, [rsp+1024] +vfnmsub213sd xmm0, xmm1, [rsp+1024] +vfnmsub213ss xmm0, xmm1, [rsp+1024] +vfnmsub231sd xmm0, xmm1, [rsp+1024] +vfnmsub231ss xmm0, xmm1, [rsp+1024] -- GitLab