From 3a1d4bcd4eac840da2cded2ab77c671cc07418b5 Mon Sep 17 00:00:00 2001
From: jdh8d <jdh8d@git.zephyr-software.com>
Date: Wed, 4 May 2016 16:12:53 +0000
Subject: [PATCH] added intel and AMD FMA instructions

Former-commit-id: 596fbe6d6515b795cce7114c28ee4030e6b5442e
---
 .gitattributes                                |   2 +
 .../Includes/Routines_Disasm.c                | 132 ++++++-
 .../Includes/instr_set/Data_opcode.h          |  42 +--
 .../Includes/instr_set/opcodes_VEX.c          | 343 +++++++++++++++++-
 .../Includes/internal_datas.h                 |  10 +
 beaengine/beaengineSources/Includes/protos.h  |  72 +++-
 beaengine/jdh_tests/fma_amd.nasm              | 107 ++++++
 beaengine/jdh_tests/fma_intel.nasm            | 237 ++++++++++++
 8 files changed, 911 insertions(+), 34 deletions(-)
 create mode 100644 beaengine/jdh_tests/fma_amd.nasm
 create mode 100644 beaengine/jdh_tests/fma_intel.nasm

diff --git a/.gitattributes b/.gitattributes
index 4f98e22b9..e85e03e95 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -175,6 +175,8 @@ beaengine/jdh_tests/Makefile -text
 beaengine/jdh_tests/avx1.nasm -text
 beaengine/jdh_tests/avx2.nasm -text
 beaengine/jdh_tests/dbea.c -text
+beaengine/jdh_tests/fma_amd.nasm -text
+beaengine/jdh_tests/fma_intel.nasm -text
 beaengine/jdh_tests/insns.s -text
 beaengine/jdh_tests/insns_avx.s -text
 /build.sh -text
diff --git a/beaengine/beaengineSources/Includes/Routines_Disasm.c b/beaengine/beaengineSources/Includes/Routines_Disasm.c
index fd9b6141d..f5b3bfb84 100644
--- a/beaengine/beaengineSources/Includes/Routines_Disasm.c
+++ b/beaengine/beaengineSources/Includes/Routines_Disasm.c
@@ -114,6 +114,56 @@ int __bea_callspec__ InitVariables (PDISASM pMyDisasm) {
  * ==================================================================== */
 void __bea_callspec__ FixArgSizeForMemoryOperand (PDISASM pMyDisasm) {
 
+    if (GV.MemDecoration == Arg4byte) {
+        (*pMyDisasm).Argument4.ArgSize = 8;
+    }
+    else if (GV.MemDecoration == Arg4word) {
+        (*pMyDisasm).Argument4.ArgSize = 16;
+    }
+    else if (GV.MemDecoration == Arg4dword) {
+        (*pMyDisasm).Argument4.ArgSize = 32;
+    }
+    else if (GV.MemDecoration == Arg4qword) {
+        (*pMyDisasm).Argument4.ArgSize = 64;
+    }
+    else if (GV.MemDecoration == Arg4multibytes) {
+        (*pMyDisasm).Argument4.ArgSize = 0;
+    }
+    else if (GV.MemDecoration == Arg4tbyte) {
+        (*pMyDisasm).Argument4.ArgSize = 80;
+    }
+    else if (GV.MemDecoration == Arg4fword) {
+        (*pMyDisasm).Argument4.ArgSize = 48;
+    }
+    else if (GV.MemDecoration == Arg4dqword) {
+        (*pMyDisasm).Argument4.ArgSize = 128;
+    }
+
+    if (GV.MemDecoration == Arg3byte) {
+        (*pMyDisasm).Argument3.ArgSize = 8;
+    }
+    else if (GV.MemDecoration == Arg3word) {
+        (*pMyDisasm).Argument3.ArgSize = 16;
+    }
+    else if (GV.MemDecoration == Arg3dword) {
+        (*pMyDisasm).Argument3.ArgSize = 32;
+    }
+    else if (GV.MemDecoration == Arg3qword) {
+        (*pMyDisasm).Argument3.ArgSize = 64;
+    }
+    else if (GV.MemDecoration == Arg3multibytes) {
+        (*pMyDisasm).Argument3.ArgSize = 0;
+    }
+    else if (GV.MemDecoration == Arg3tbyte) {
+        (*pMyDisasm).Argument3.ArgSize = 80;
+    }
+    else if (GV.MemDecoration == Arg3fword) {
+        (*pMyDisasm).Argument3.ArgSize = 48;
+    }
+    else if (GV.MemDecoration == Arg3dqword) {
+        (*pMyDisasm).Argument3.ArgSize = 128;
+    }
+
     if (GV.MemDecoration == Arg2byte) {
         (*pMyDisasm).Argument2.ArgSize = 8;
     }
@@ -1075,11 +1125,83 @@ void __bea_callspec__ BuildCompleteInstruction(PDISASM pMyDisasm)
 		}
     	}
     /* =============== if Arg4.Exists */
-    if (GV.forth_arg != 0) {
-        (void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, ", ");
-        i += 2;
-        (void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, (char*) &(*pMyDisasm).Argument4.ArgMnemonic);
-        i = strlen((char*) &(*pMyDisasm).CompleteInstr);
+    if (GV.forth_arg != 0) 
+	{
+        	(void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, ", ");
+        	i += 2;
+		if ((GV.MemDecoration >300) && (GV.MemDecoration <3299)) 
+    		{
+        		GV.MemDecoration -= 300;
+	    		(void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, NasmPrefixes[GV.MemDecoration-1]);
+            		i = strlen((char*) &(*pMyDisasm).CompleteInstr);
+	   		if (GV.SYNTAX_ == NasmSyntax) 
+			{
+            			i = strlen((char*) &(*pMyDisasm).CompleteInstr);
+            			if ((GV.SEGMENTREGS != 0) || (GV.SEGMENTFS != 0))
+				{
+                			(void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, "[");
+                			i++;
+                			if (GV.SEGMENTREGS != 0) 
+					{
+                    				(void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, 
+							SegmentRegs[(*pMyDisasm).Argument4.SegmentReg]);
+                			}
+                			else 
+					{
+                    				(void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, SegmentRegs[3]);
+                			}
+                			i = strlen((char*) &(*pMyDisasm).CompleteInstr);
+            			}
+            			else		 
+				{
+                			(void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, "[");
+                			i++;
+            			}
+        		}
+        		else 
+			{
+            			if (GV.SYNTAX_ == MasmSyntax) 
+				{
+                			(void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, MasmPrefixes[GV.MemDecoration-1]);
+                			i = strlen((char*) &(*pMyDisasm).CompleteInstr);
+            			}
+            			else 	
+				{
+                			(void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, GoAsmPrefixes[GV.MemDecoration-1]);
+                			i = strlen((char*) &(*pMyDisasm).CompleteInstr);
+            			}
+            			if ((GV.SEGMENTREGS != 0) || (GV.SEGMENTFS != 0))
+				{
+                			if (GV.SEGMENTREGS != 0) 
+					{
+                    				(void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, SegmentRegs[(*pMyDisasm).Argument4.SegmentReg]);
+                			}
+                			else 
+					{
+                    				(void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, SegmentRegs[3]);
+                			}
+                			i = strlen((char*) &(*pMyDisasm).CompleteInstr);
+                			(void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, "[");
+                			i++;
+            			}
+            			else 
+				{
+                			(void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, "[");
+                			i++;
+            			}
+        		}
+        		/* =============== add Arg4.ArgMnemonic */
+        		(void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, (char*) &(*pMyDisasm).Argument4.ArgMnemonic);
+        		i = strlen((char*) &(*pMyDisasm).CompleteInstr);
+        		(void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, "]");
+        		i++;
+    		}
+		else
+		{
+        		(void) strcpy ((char*) &(*pMyDisasm).CompleteInstr+i, (char*) &(*pMyDisasm).Argument4.ArgMnemonic);
+        		i = strlen((char*) &(*pMyDisasm).CompleteInstr);
+		}
+    	
     }
 
 
diff --git a/beaengine/beaengineSources/Includes/instr_set/Data_opcode.h b/beaengine/beaengineSources/Includes/instr_set/Data_opcode.h
index c48187339..bc6437996 100644
--- a/beaengine/beaengineSources/Includes/instr_set/Data_opcode.h
+++ b/beaengine/beaengineSources/Includes/instr_set/Data_opcode.h
@@ -79,23 +79,23 @@ void (__bea_callspec__ *opcode_map2[])(PDISASM) = {
 /*														                            */
 /* ===============================================================================  */
  void (__bea_callspec__ *opcode_map3[])(PDISASM) = {
-/*   0           1  	     2	         3           4           5           6           7           8           9           a           b           c           d           e           f */
-/*0*/pshufb_   , phaddw_   , phaddd_   , phaddsw_  , pmaddubsw_, phsubw_   , phsubd_   , phsubsw_  , psignb_   , psignw_   , psignd_   , pmulhrsw_ , vpermilps1, FailDecode, FailDecode, FailDecode,
-/*1*/pblendvb_ , FailDecode, FailDecode, FailDecode, blendvps_ , blendvpd_ , FailDecode, ptest_    , vbrdcstss , vbrdcstsd , FailDecode, FailDecode, pabsb_    , pabsw_    , pabsd_    , FailDecode,
-/*2*/pmovsxbw_ , pmovsxbd_ , pmovsxbq_ , pmovsxwd_ , pmovsxwq_ , pmovsxdq_ , FailDecode, FailDecode, pmuldq_   , pcmpeqq_  , movntdqa_ , packusdw_ , FailDecode, FailDecode, FailDecode, FailDecode,
-/*3*/pmovzxbw_ , pmovzxbd_ , pmovzxbq_ , pmovzxwd_ , pmovzxwq_ , pmovzxdq_ , FailDecode, pcmpgtq_  , pminsb_   , pminsd_   , pminuw_   , pminud_   , pmaxsb_   , pmaxsd_   , pmaxuw_   , pmaxud_   ,
-/*4*/pmulld_   ,phminposuw_, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
-/*5*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
-/*6*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
-/*7*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
-/*8*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
-/*9*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
-/*a*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
-/*b*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
-/*c*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
-/*d*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, aesimc    , aesenc    , aesenclast, aesdec    , aesdeclast,
-/*e*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
-/*e*/crc32_GvEb, crc32_GvEv, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
+/*   0           1  	     2	         3           4           5           6            7             8           9           a           b           c           d           e           f */
+/*0*/pshufb_   , phaddw_   , phaddd_   , phaddsw_  , pmaddubsw_, phsubw_   , phsubd_   ,  phsubsw_  ,  psignb_   , psignw_   , psignd_   , pmulhrsw_ , vpermilps1, FailDecode, FailDecode, FailDecode,
+/*1*/pblendvb_ , FailDecode, FailDecode, FailDecode, blendvps_ , blendvpd_ , FailDecode,  ptest_    ,  vbrdcstss , vbrdcstsd , FailDecode, FailDecode, pabsb_    , pabsw_    , pabsd_    , FailDecode,
+/*2*/pmovsxbw_ , pmovsxbd_ , pmovsxbq_ , pmovsxwd_ , pmovsxwq_ , pmovsxdq_ , FailDecode,  FailDecode,  pmuldq_   , pcmpeqq_  , movntdqa_ , packusdw_ , FailDecode, FailDecode, FailDecode, FailDecode,
+/*3*/pmovzxbw_ , pmovzxbd_ , pmovzxbq_ , pmovzxwd_ , pmovzxwq_ , pmovzxdq_ , FailDecode,  pcmpgtq_  ,  pminsb_   , pminsd_   , pminuw_   , pminud_   , pmaxsb_   , pmaxsd_   , pmaxuw_   , pmaxud_   ,
+/*4*/pmulld_   ,phminposuw_, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,  FailDecode,  FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
+/*5*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,  FailDecode,  FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
+/*6*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,  FailDecode,  FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
+/*7*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,  FailDecode,  FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
+/*8*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,  FailDecode,  FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
+/*9*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, fmaddsub132p,fmsubadd132p,fmadd132p,  fmadd132s , fmsub132p,  fmsub132s,  fnmadd132p, fnmadd132s, fnmsub132p, fnmsub132s,
+/*a*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, fmaddsub213p,fmsubadd213p,fmadd213p,  fmadd213s , fmsub213p,  fmsub213s,  fnmadd213p, fnmadd213s, fnmsub213p, fnmsub213s,
+/*b*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, fmaddsub231p,fmsubadd231p,fmadd231p,  fmadd231s , fmsub231p,  fmsub231s,  fnmadd231p, fnmadd231s, fnmsub231p, fnmsub231s,
+/*c*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,  FailDecode,  FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
+/*d*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,  FailDecode,  FailDecode, FailDecode, FailDecode, aesimc    , aesenc    , aesenclast, aesdec    , aesdeclast,
+/*e*/FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,  FailDecode,  FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
+/*e*/crc32_GvEb, crc32_GvEv, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,  FailDecode,  FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
  };
 
 /* ===============================================================================  */
@@ -111,10 +111,10 @@ void (__bea_callspec__ *opcode_map2[])(PDISASM) = {
 /*1*/    FailDecode, FailDecode, FailDecode, FailDecode, pextrb_   , pextrw2_  , pextrd_   , extractps_, vinsrtf128, vextraf128, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
 /*2*/    pinsrb_   , insertps_ , pinsrd_   , FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
 /*3*/    FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
-/*4*/    dpps_     , dppd_     , mpsadbw_  , FailDecode, pclmulqdq_, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, vblendvpd, FailDecode, FailDecode, FailDecode, FailDecode,
-/*5*/    FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
-/*6*/    pcmpestrm_, pcmpestri_, pcmpistrm_, pcmpistri_, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
-/*7*/    FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
+/*4*/    dpps_     , dppd_     , mpsadbw_  , FailDecode, pclmulqdq_, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, vblendvpd , FailDecode, FailDecode, FailDecode, FailDecode,
+/*5*/    FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, vfmaddsubps,vfmaddsubpd,vfmsubaddps,vfmsubaddpd,
+/*6*/    pcmpestrm_, pcmpestri_, pcmpistrm_, pcmpistri_, FailDecode, FailDecode, FailDecode, FailDecode, vfmaddps  , vfmaddpd  , vfmaddss  , vfmaddsd  , vfmsubps  , vfmsubpd  , vfmsubss  , vfmsubsd  ,
+/*7*/    FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, vfnmaddps , vfnmaddpd , vfnmaddss , vfnmaddsd , vfnmsubps , vfnmsubpd , vfnmsubss , vfnmsubsd ,
 /*8*/    FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
 /*9*/    FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
 /*a*/    FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode, FailDecode,
diff --git a/beaengine/beaengineSources/Includes/instr_set/opcodes_VEX.c b/beaengine/beaengineSources/Includes/instr_set/opcodes_VEX.c
index adeae0936..a7510e31a 100644
--- a/beaengine/beaengineSources/Includes/instr_set/opcodes_VEX.c
+++ b/beaengine/beaengineSources/Includes/instr_set/opcodes_VEX.c
@@ -95,6 +95,8 @@ instructions that leave the upper half of the register unchanged, for reasons of
 
 #include <assert.h>
 
+#define TRUE 1
+#define FALSE 0
 
 static void finish_vex(PDISASM pMyDisasm)
 {
@@ -360,22 +362,52 @@ void L_reg(ARGTYPE* arg, PDISASM pMyDisasm)
 	GV.EIP_++;
 }
 
-void VxHxWxLx(PDISASM pMyDisasm)
+
+void FourOpFMAW0(PDISASM pMyDisasm)
 {
 	if(!GV.VEX.has_vex)
 		FailDecode(pMyDisasm);
 
+	GV.MemDecoration=Arg3fword;
+	GV.AVX_=GV.VEX.length;
+	GV.SSE_=!GV.VEX.length;
+
 	V_reg(&(*pMyDisasm).Argument2, pMyDisasm);
     	MOD_RM(&(*pMyDisasm).Argument3, pMyDisasm);
     	Reg_Opcode(&(*pMyDisasm).Argument1, pMyDisasm);
     	GV.EIP_ += GV.DECALAGE_EIP+2;
 	L_reg(&(*pMyDisasm).Argument4, pMyDisasm);
 
+	GV.AVX_=0;
+	GV.SSE_=0;
+
 	GV.third_arg=1;
 	GV.forth_arg=1;
 }
 
-void VyHyWyLy(PDISASM pMyDisasm)
+void FourOpFMAW1(PDISASM pMyDisasm)
+{
+	if(!GV.VEX.has_vex)
+		FailDecode(pMyDisasm);
+
+	GV.MemDecoration=Arg4fword;
+	GV.AVX_=GV.VEX.length;
+	GV.SSE_=!GV.VEX.length;
+
+	V_reg(&(*pMyDisasm).Argument2, pMyDisasm);
+    	MOD_RM(&(*pMyDisasm).Argument4, pMyDisasm);
+    	Reg_Opcode(&(*pMyDisasm).Argument1, pMyDisasm);
+    	GV.EIP_ += GV.DECALAGE_EIP+2;
+	L_reg(&(*pMyDisasm).Argument3, pMyDisasm);
+
+	GV.AVX_=0;
+	GV.SSE_=0;
+
+	GV.third_arg=1;
+	GV.forth_arg=1;
+}
+
+void VxHxWxLx(PDISASM pMyDisasm)
 {
 	if(!GV.VEX.has_vex)
 		FailDecode(pMyDisasm);
@@ -388,8 +420,21 @@ void VyHyWyLy(PDISASM pMyDisasm)
 
 	GV.third_arg=1;
 	GV.forth_arg=1;
+}
 
+void VyHyWyLy(PDISASM pMyDisasm)
+{
+	if(!GV.VEX.has_vex)
+		FailDecode(pMyDisasm);
 
+	V_reg(&(*pMyDisasm).Argument2, pMyDisasm);
+    	MOD_RM(&(*pMyDisasm).Argument3, pMyDisasm);
+    	Reg_Opcode(&(*pMyDisasm).Argument1, pMyDisasm);
+    	GV.EIP_ += GV.DECALAGE_EIP+2;
+	L_reg(&(*pMyDisasm).Argument4, pMyDisasm);
+
+	GV.third_arg=1;
+	GV.forth_arg=1;
 }
 
 /*
@@ -625,9 +670,303 @@ void vpermilps1 (PDISASM pMyDisasm)
 		FailDecode(pMyDisasm);
 }
 
+
+void xmm_xmm_xmmmem(PDISASM pMyDisasm, const char* opcode)
+{
+
+	(*pMyDisasm).Instruction.Category = AVX_INSTRUCTION;
+        #ifndef BEA_LIGHT_DISASSEMBLY
+        if(GV.VEX.has_vex)
+        {
+                (void) strcpy ((*pMyDisasm).Instruction.Mnemonic, "v");
+        }
+	(void) strcat ((*pMyDisasm).Instruction.Mnemonic, opcode);
+	if (GV.VEX.has_vex && GV.VEX.W==0)
+	{
+		// packed single or single-to-single if w=0
+		(void) strcat ((*pMyDisasm).Instruction.Mnemonic, "s");
+	}
+	else
+	{
+		// packed double or single-to-double if w=0
+		(void) strcat ((*pMyDisasm).Instruction.Mnemonic, "d");
+	}
+	(void) strcat ((*pMyDisasm).Instruction.Mnemonic, " ");
+	#endif
+
+	GV.MemDecoration=Arg3fword;
+	GV.AVX_=GV.VEX.length;
+	GV.SSE_=!GV.VEX.length;
+	MOD_RM(&(*pMyDisasm).Argument3, pMyDisasm);
+	V_reg( &(*pMyDisasm).Argument2, pMyDisasm);
+	Reg_Opcode(&(*pMyDisasm).Argument1, pMyDisasm);
+	GV.EIP_ += GV.DECALAGE_EIP+2;
+	GV.third_arg=1;
+	GV.AVX_=0;
+	GV.SSE_=0;
+}
+
 /* 0f 3a 04 */
 void vpermilps2 (PDISASM pMyDisasm)
 {
 	assert(pMyDisasm); /* avoids warning */
 	assert(0);
 }
+
+void FMA_3op(PDISASM pMyDisasm, const char* opcode, int allow_ymms)
+{
+	if (GV.VEX.has_vex && GV.VEX.length==1 && !allow_ymms)
+	{
+		FailDecode(pMyDisasm);
+		return;
+	}
+	xmm_xmm_xmmmem(pMyDisasm, opcode);
+}
+
+#if 0
+/* 0f 38 99 */
+void fmadd132sd(PDISASM pMyDisasm)
+{
+	if(GV.VEX.has_vex && GV.VEX.length==0 && GV.VEX.implicit_prefixes==1 /* 66 */ && GV.VEX.W==1)
+	{
+		xmm_xmm_xmmmem(pMyDisasm, "fmadd132sd");
+	}
+	else if(GV.VEX.has_vex && GV.VEX.length==0 && GV.VEX.implicit_prefixes==1 /* 66 */ && GV.VEX.W==0)
+	{
+		xmm_xmm_xmmmem(pMyDisasm, "fmadd132ss");
+	}
+	else
+		FailDecode(pMyDisasm);
+}
+
+/* 0f 38 a9 */
+void fmadd213sd(PDISASM pMyDisasm)
+{
+	if(GV.VEX.has_vex && GV.VEX.length==0 && GV.VEX.implicit_prefixes==1 /* 66 */ && GV.VEX.W==1)
+	{
+		xmm_xmm_xmmmem(pMyDisasm, "fmadd213sd");
+	}
+	else if(GV.VEX.has_vex && GV.VEX.length==0 && GV.VEX.implicit_prefixes==1 /* 66 */ && GV.VEX.W==0)
+	{
+		xmm_xmm_xmmmem(pMyDisasm, "fmadd213ss");
+	}
+	else
+		FailDecode(pMyDisasm);
+}
+/* 0f 38 b9 */
+void fmadd231sd(PDISASM pMyDisasm)
+{
+	if(GV.VEX.has_vex && GV.VEX.length==0 && GV.VEX.implicit_prefixes==1 /* 66 */ && GV.VEX.W==1)
+	{
+		xmm_xmm_xmmmem(pMyDisasm, "fmadd231sd");
+	}
+	else if(GV.VEX.has_vex && GV.VEX.length==0 && GV.VEX.implicit_prefixes==1 /* 66 */ && GV.VEX.W==0)
+	{
+		xmm_xmm_xmmmem(pMyDisasm, "fmadd231ss");
+	}
+	else
+		FailDecode(pMyDisasm);
+}
+#endif
+
+void fmadd132p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); }
+void fmadd213p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); }
+void fmadd231p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); }
+void fmaddsub132p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); }
+void fmaddsub213p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); }
+void fmaddsub231p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); }
+void fmsub132p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); }
+void fmsub213p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); }
+void fmsub231p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); }
+void fmsubadd132p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); }
+void fmsubadd213p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); }
+void fmsubadd231p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); }
+void fnmadd132p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); }
+void fnmadd213p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); }
+void fnmadd231p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); }
+void fnmsub132p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); }
+void fnmsub213p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); }
+void fnmsub231p(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, TRUE); }
+
+void fmadd132s(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, FALSE); }
+void fmadd213s(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, FALSE); }
+void fmadd231s(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, FALSE); }
+void fmsub132s(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, FALSE); }
+void fmsub213s(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, FALSE); }
+void fmsub231s(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, FALSE); }
+void fnmadd132s(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, FALSE); }
+void fnmadd213s(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, FALSE); }
+void fnmadd231s(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, FALSE); }
+void fnmsub132s(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, FALSE); }
+void fnmsub213s(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, FALSE); }
+void fnmsub231s(PDISASM pMyDisasm) { FMA_3op(pMyDisasm, __func__, FALSE); }
+
+
+
+
+
+
+
+//  **************************
+//  AMD 4-op FMA instructions
+//  **************************
+
+void FourOpFMA(PDISASM pMyDisasm, const char* opcode, int allow_ymms)
+{
+        #ifndef BEA_LIGHT_DISASSEMBLY
+        if(GV.VEX.has_vex)
+        {
+                (void) strcpy ((*pMyDisasm).Instruction.Mnemonic, "v");
+        }
+	(void) strcat ((*pMyDisasm).Instruction.Mnemonic, opcode);
+	(void) strcat ((*pMyDisasm).Instruction.Mnemonic, " ");
+	#endif
+
+	// mmmmmm==0x3 to get to this function, see opcode comment above func.
+	if(GV.VEX.has_vex && GV.VEX.length==0 && GV.VEX.implicit_prefixes==1 /* 66 */ && GV.VEX.W==0)
+	{
+		FourOpFMAW0(pMyDisasm);
+	}
+	else if(GV.VEX.has_vex && GV.VEX.length==0 && GV.VEX.implicit_prefixes==1 /* 66 */ && GV.VEX.W==1)
+	{
+		FourOpFMAW1(pMyDisasm);
+	}
+	else if(GV.VEX.has_vex && GV.VEX.length==1 && allow_ymms && GV.VEX.implicit_prefixes==1 /* 66 */ && GV.VEX.W==0)
+	{
+		FourOpFMAW0(pMyDisasm);
+	}
+	else if(GV.VEX.has_vex && GV.VEX.length==1 && allow_ymms && GV.VEX.implicit_prefixes==1 /* 66 */ && GV.VEX.W==1)
+	{
+		FourOpFMAW0(pMyDisasm);
+	}
+	else
+		FailDecode(pMyDisasm);
+
+}
+
+
+/* 0f 3a 69 */
+void vfmaddpd(PDISASM pMyDisasm)
+{
+	FourOpFMA(pMyDisasm,"fmaddpd", TRUE);
+}
+
+/* 0f 3a 68 */
+void vfmaddps(PDISASM pMyDisasm)
+{
+	FourOpFMA(pMyDisasm,"fmaddps", TRUE);
+}
+
+/* 0f 3a 6b */
+void vfmaddsd(PDISASM pMyDisasm)
+{
+	FourOpFMA(pMyDisasm,"fmaddsd", FALSE);
+}
+
+/* 0f 3a 6a */
+void vfmaddss(PDISASM pMyDisasm)
+{
+	FourOpFMA(pMyDisasm,"fmaddss", FALSE);
+}
+
+/* 0f 3a 5d */
+void vfmaddsubpd(PDISASM pMyDisasm)
+{
+	FourOpFMA(pMyDisasm,"fmaddsubpd", TRUE);
+}
+
+/* 0f 3a 5c */
+void vfmaddsubps(PDISASM pMyDisasm)
+{
+	FourOpFMA(pMyDisasm,"fmaddsubps", TRUE);
+}
+
+
+/* 0f 3a 5f */
+void vfmsubaddpd(PDISASM pMyDisasm)
+{
+	FourOpFMA(pMyDisasm,"fmsubaddpd", TRUE);
+}
+
+/* 0f 3a 5e */
+void vfmsubaddps(PDISASM pMyDisasm)
+{
+	FourOpFMA(pMyDisasm,"fmsubaddps", TRUE);
+}
+
+
+/* 0f 3a 6d */
+void vfmsubpd(PDISASM pMyDisasm)
+{
+	FourOpFMA(pMyDisasm,"fmsubpd", TRUE);
+}
+
+/* 0f 3a 6c */
+void vfmsubps(PDISASM pMyDisasm)
+{
+	FourOpFMA(pMyDisasm,"fmsubpd", TRUE);
+}
+
+/* 0f 3a 6f */
+void vfmsubsd(PDISASM pMyDisasm)
+{
+	FourOpFMA(pMyDisasm,"fmsubsd", FALSE);
+
+}
+
+/* 0f 3a 6e */
+void vfmsubss(PDISASM pMyDisasm)
+{
+	FourOpFMA(pMyDisasm,"fmsubss", FALSE);
+
+}
+
+/* 0f 3a 79 */
+void vfnmaddpd(PDISASM pMyDisasm)
+{
+	FourOpFMA(pMyDisasm,"fnmaddpd", TRUE);
+}
+
+/* 0f 3a 78 */
+void vfnmaddps(PDISASM pMyDisasm)
+{
+	FourOpFMA(pMyDisasm,"fnmaddps", TRUE);
+}
+
+/* 0f 3a 7b */
+void vfnmaddsd(PDISASM pMyDisasm)
+{
+	FourOpFMA(pMyDisasm,"fnmaddsd", FALSE);
+}
+
+/* 0f 3a 7a */
+void vfnmaddss(PDISASM pMyDisasm)
+{
+	FourOpFMA(pMyDisasm,"fnmaddss", FALSE);
+}
+
+/* 0f 3a 7d */
+void vfnmsubpd(PDISASM pMyDisasm)
+{
+	FourOpFMA(pMyDisasm,"fnmsubpd", TRUE);
+}
+
+/* 0f 3a 7c */
+void vfnmsubps(PDISASM pMyDisasm)
+{
+	FourOpFMA(pMyDisasm,"fnmsubps", TRUE);
+}
+
+/* 0f 3a 7f */
+void vfnmsubsd(PDISASM pMyDisasm)
+{
+	FourOpFMA(pMyDisasm,"fnmsubsd", FALSE);
+}
+
+/* 0f 3a 7e */
+void vfnmsubss(PDISASM pMyDisasm)
+{
+	FourOpFMA(pMyDisasm,"fnmsubss", FALSE);
+}
+
+
diff --git a/beaengine/beaengineSources/Includes/internal_datas.h b/beaengine/beaengineSources/Includes/internal_datas.h
index 31771f878..2dbca3444 100644
--- a/beaengine/beaengineSources/Includes/internal_datas.h
+++ b/beaengine/beaengineSources/Includes/internal_datas.h
@@ -55,6 +55,16 @@
 #define     Arg3oword       209
 #define     Arg3yword       210
 
+#define     Arg4byte        301
+#define     Arg4word        302
+#define     Arg4dword       303
+#define     Arg4qword       304
+#define     Arg4multibytes  305
+#define     Arg4tbyte       306
+#define     Arg4fword       307
+#define     Arg4dqword      308
+#define     Arg4oword       309
+#define     Arg4yword       310
 
 EFLStruct EFLAGS_TABLE[] = {
     {UN_, UN_, UN_, MO_, UN_, MO_, 0  , 0  , 0  , 0  , 0, 0},  /* 0-AAA */
diff --git a/beaengine/beaengineSources/Includes/protos.h b/beaengine/beaengineSources/Includes/protos.h
index b9a489eba..512688bcf 100644
--- a/beaengine/beaengineSources/Includes/protos.h
+++ b/beaengine/beaengineSources/Includes/protos.h
@@ -729,9 +729,69 @@ void vinsrtf128 (PDISASM pMyDisasm);
 void vperm2f128 (PDISASM pMyDisasm);
 void vpermilps1 (PDISASM pMyDisasm);
 void vpermilps2 (PDISASM pMyDisasm);
-
-
-
-
-
-
+void fmadd132sd(PDISASM pMyDisasm);
+void fmadd213sd(PDISASM pMyDisasmd);
+void fmadd231sd(PDISASM pMyDisasmd);
+
+
+// amd 4-op fma's
+void vfmaddpd(PDISASM pMyDisasm);
+void vfmaddps(PDISASM pMyDisasm);
+void vfmaddsd(PDISASM pMyDisasm);
+void vfmaddss(PDISASM pMyDisasm);
+
+void vfmaddsubpd(PDISASM pMyDisasm);
+void vfmaddsubps(PDISASM pMyDisasm);
+
+void vfmsubaddpd(PDISASM pMyDisasm);
+void vfmsubaddps(PDISASM pMyDisasm);
+
+void vfmsubpd(PDISASM pMyDisasm);
+void vfmsubps(PDISASM pMyDisasm);
+void vfmsubsd(PDISASM pMyDisasm);
+void vfmsubss(PDISASM pMyDisasm);
+
+void vfnmaddpd(PDISASM pMyDisasm);
+void vfnmaddps(PDISASM pMyDisasm);
+void vfnmaddsd(PDISASM pMyDisasm);
+void vfnmaddss(PDISASM pMyDisasm);
+
+void vfnmsubpd(PDISASM pMyDisasm);
+void vfnmsubps(PDISASM pMyDisasm);
+void vfnmsubsd(PDISASM pMyDisasm);
+void vfnmsubss(PDISASM pMyDisasm);
+
+
+
+// intel 3-op fma's
+void fmadd132p(PDISASM pMyDisasm);
+void fmadd213p(PDISASM pMyDisasm);
+void fmadd231p(PDISASM pMyDisasm);
+void fmaddsub132p(PDISASM pMyDisasm);
+void fmaddsub213p(PDISASM pMyDisasm);
+void fmaddsub231p(PDISASM pMyDisasm);
+void fmsub132p(PDISASM pMyDisasm);
+void fmsub213p(PDISASM pMyDisasm);
+void fmsub231p(PDISASM pMyDisasm);
+void fmsubadd132p(PDISASM pMyDisasm);
+void fmsubadd213p(PDISASM pMyDisasm);
+void fmsubadd231p(PDISASM pMyDisasm);
+void fnmadd132p(PDISASM pMyDisasm);
+void fnmadd213p(PDISASM pMyDisasm);
+void fnmadd231p(PDISASM pMyDisasm);
+void fnmsub132p(PDISASM pMyDisasm);
+void fnmsub213p(PDISASM pMyDisasm);
+void fnmsub231p(PDISASM pMyDisasm);
+
+void fmadd132s(PDISASM pMyDisasm);
+void fmadd213s(PDISASM pMyDisasm);
+void fmadd231s(PDISASM pMyDisasm);
+void fmsub132s(PDISASM pMyDisasm);
+void fmsub213s(PDISASM pMyDisasm);
+void fmsub231s(PDISASM pMyDisasm);
+void fnmadd132s(PDISASM pMyDisasm);
+void fnmadd213s(PDISASM pMyDisasm);
+void fnmadd231s(PDISASM pMyDisasm);
+void fnmsub132s(PDISASM pMyDisasm);
+void fnmsub213s(PDISASM pMyDisasm);
+void fnmsub231s(PDISASM pMyDisasm);
diff --git a/beaengine/jdh_tests/fma_amd.nasm b/beaengine/jdh_tests/fma_amd.nasm
new file mode 100644
index 000000000..acf3d4483
--- /dev/null
+++ b/beaengine/jdh_tests/fma_amd.nasm
@@ -0,0 +1,107 @@
+bits 64
+global main
+section .text
+main:
+	vfmaddpd xmm0, xmm1, xmm2, xmm3	
+	vfmaddps xmm0, xmm1, xmm2, xmm3	
+	vfmaddsd xmm0, xmm1, xmm2, xmm3	
+	vfmaddss xmm0, xmm1, xmm2, xmm3	
+	vfmaddsubpd xmm0, xmm1, xmm2, xmm3	
+	vfmaddsubps xmm0, xmm1, xmm2, xmm3	
+	vfmsubaddpd xmm0, xmm1, xmm2, xmm3	
+	vfmsubaddps xmm0, xmm1, xmm2, xmm3	
+	vfmsubpd xmm0, xmm1, xmm2, xmm3	
+	vfmsubps xmm0, xmm1, xmm2, xmm3	
+	vfmsubsd xmm0, xmm1, xmm2, xmm3	
+	vfmsubss xmm0, xmm1, xmm2, xmm3	
+	vfnmaddpd xmm0, xmm1, xmm2, xmm3
+	vfnmaddps xmm0, xmm1, xmm2, xmm3
+	vfnmaddsd xmm0, xmm1, xmm2, xmm3
+	vfnmaddss xmm0, xmm1, xmm2, xmm3
+	vfnmsubpd xmm0, xmm1, xmm2, xmm3
+	vfnmsubps xmm0, xmm1, xmm2, xmm3
+	vfnmsubsd xmm0, xmm1, xmm2, xmm3
+	vfnmsubss xmm0, xmm1, xmm2, xmm3
+
+	vfmaddpd xmm0, xmm1, [rsp+1024], xmm3	
+	vfmaddps xmm0, xmm1, [rsp+1024], xmm3	
+	vfmaddsd xmm0, xmm1, [rsp+1024], xmm3	
+	vfmaddss xmm0, xmm1, [rsp+1024], xmm3	
+	vfmaddsubpd xmm0, xmm1, [rsp+1024], xmm3	
+	vfmaddsubps xmm0, xmm1, [rsp+1024], xmm3	
+	vfmsubaddpd xmm0, xmm1, [rsp+1024], xmm3	
+	vfmsubaddps xmm0, xmm1, [rsp+1024], xmm3	
+	vfmsubpd xmm0, xmm1, [rsp+1024], xmm3	
+	vfmsubps xmm0, xmm1, [rsp+1024], xmm3	
+	vfmsubsd xmm0, xmm1, [rsp+1024], xmm3	
+	vfmsubss xmm0, xmm1, [rsp+1024], xmm3	
+	vfnmaddpd xmm0, xmm1, [rsp+1024], xmm3
+	vfnmaddps xmm0, xmm1, [rsp+1024], xmm3
+	vfnmaddsd xmm0, xmm1, [rsp+1024], xmm3
+	vfnmaddss xmm0, xmm1, [rsp+1024], xmm3
+	vfnmsubpd xmm0, xmm1, [rsp+1024], xmm3
+	vfnmsubps xmm0, xmm1, [rsp+1024], xmm3
+	vfnmsubsd xmm0, xmm1, [rsp+1024], xmm3
+	vfnmsubss xmm0, xmm1, [rsp+1024], xmm3
+
+	vfmaddpd xmm0, xmm1, xmm2, [rsp+1024]	
+	vfmaddps xmm0, xmm1, xmm2, [rsp+1024]	
+	vfmaddsd xmm0, xmm1, xmm2, [rsp+1024]	
+	vfmaddss xmm0, xmm1, xmm2, [rsp+1024]	
+	vfmaddsubpd xmm0, xmm1, xmm2, [rsp+1024]	
+	vfmaddsubps xmm0, xmm1, xmm2, [rsp+1024]	
+	vfmsubaddpd xmm0, xmm1, xmm2, [rsp+1024]	
+	vfmsubaddps xmm0, xmm1, xmm2, [rsp+1024]	
+	vfmsubpd xmm0, xmm1, xmm2, [rsp+1024]	
+	vfmsubps xmm0, xmm1, xmm2, [rsp+1024]	
+	vfmsubsd xmm0, xmm1, xmm2, [rsp+1024]	
+	vfmsubss xmm0, xmm1, xmm2, [rsp+1024]	
+	vfnmaddpd xmm0, xmm1, xmm2, [rsp+1024]
+	vfnmaddps xmm0, xmm1, xmm2, [rsp+1024]
+	vfnmaddsd xmm0, xmm1, xmm2, [rsp+1024]
+	vfnmaddss xmm0, xmm1, xmm2, [rsp+1024]
+	vfnmsubpd xmm0, xmm1, xmm2, [rsp+1024]
+	vfnmsubps xmm0, xmm1, xmm2, [rsp+1024]
+	vfnmsubsd xmm0, xmm1, xmm2, [rsp+1024]
+	vfnmsubss xmm0, xmm1, xmm2, [rsp+1024]
+
+; test ymm versions
+	vfmaddpd ymm0, ymm1, ymm2, ymm3	
+	vfmaddps ymm0, ymm1, ymm2, ymm3	
+	vfmaddsubpd ymm0, ymm1, ymm2, ymm3	
+	vfmaddsubps ymm0, ymm1, ymm2, ymm3	
+	vfmsubaddpd ymm0, ymm1, ymm2, ymm3	
+	vfmsubaddps ymm0, ymm1, ymm2, ymm3	
+	vfmsubpd ymm0, ymm1, ymm2, ymm3	
+	vfmsubps ymm0, ymm1, ymm2, ymm3	
+	vfnmaddpd ymm0, ymm1, ymm2, ymm3
+	vfnmaddps ymm0, ymm1, ymm2, ymm3
+	vfnmsubpd ymm0, ymm1, ymm2, ymm3
+	vfnmsubps ymm0, ymm1, ymm2, ymm3
+
+	vfmaddpd ymm0, ymm1, [rsp+1024], ymm3	
+	vfmaddps ymm0, ymm1, [rsp+1024], ymm3	
+	vfmaddsubpd ymm0, ymm1, [rsp+1024], ymm3	
+	vfmaddsubps ymm0, ymm1, [rsp+1024], ymm3	
+	vfmsubaddpd ymm0, ymm1, [rsp+1024], ymm3	
+	vfmsubaddps ymm0, ymm1, [rsp+1024], ymm3	
+	vfmsubpd ymm0, ymm1, [rsp+1024], ymm3	
+	vfmsubps ymm0, ymm1, [rsp+1024], ymm3	
+	vfnmaddpd ymm0, ymm1, [rsp+1024], ymm3
+	vfnmaddps ymm0, ymm1, [rsp+1024], ymm3
+	vfnmsubpd ymm0, ymm1, [rsp+1024], ymm3
+	vfnmsubps ymm0, ymm1, [rsp+1024], ymm3
+
+	vfmaddpd ymm0, ymm1, ymm2, [rsp+1024]	
+	vfmaddps ymm0, ymm1, ymm2, [rsp+1024]	
+	vfmaddsubpd ymm0, ymm1, ymm2, [rsp+1024]	
+	vfmaddsubps ymm0, ymm1, ymm2, [rsp+1024]	
+	vfmsubaddpd ymm0, ymm1, ymm2, [rsp+1024]	
+	vfmsubaddps ymm0, ymm1, ymm2, [rsp+1024]	
+	vfmsubpd ymm0, ymm1, ymm2, [rsp+1024]	
+	vfmsubps ymm0, ymm1, ymm2, [rsp+1024]	
+	vfnmaddpd ymm0, ymm1, ymm2, [rsp+1024]
+	vfnmaddps ymm0, ymm1, ymm2, [rsp+1024]
+	vfnmsubpd ymm0, ymm1, ymm2, [rsp+1024]
+	vfnmsubps ymm0, ymm1, ymm2, [rsp+1024]
+
diff --git a/beaengine/jdh_tests/fma_intel.nasm b/beaengine/jdh_tests/fma_intel.nasm
new file mode 100644
index 000000000..90dea376b
--- /dev/null
+++ b/beaengine/jdh_tests/fma_intel.nasm
@@ -0,0 +1,237 @@
+bits 64
+global main
+section .text
+main:
+
+; 3 regsiter  pd+ps
+
+vfmadd132pd xmm0, xmm1, xmm2
+vfmadd132ps xmm0, xmm1, xmm2
+vfmadd213pd xmm0, xmm1, xmm2
+vfmadd213ps xmm0, xmm1, xmm2
+vfmadd231pd xmm0, xmm1, xmm2
+vfmadd231ps xmm0, xmm1, xmm2
+vfmaddsub132pd xmm0, xmm1, xmm2
+vfmaddsub132ps xmm0, xmm1, xmm2
+vfmaddsub213pd xmm0, xmm1, xmm2
+vfmaddsub213ps xmm0, xmm1, xmm2
+vfmaddsub231pd xmm0, xmm1, xmm2
+vfmaddsub231ps xmm0, xmm1, xmm2
+vfmsub132pd xmm0, xmm1, xmm2
+vfmsub132ps xmm0, xmm1, xmm2
+vfmsub213pd xmm0, xmm1, xmm2
+vfmsub213ps xmm0, xmm1, xmm2
+vfmsub231pd xmm0, xmm1, xmm2
+vfmsub231ps xmm0, xmm1, xmm2
+vfmsubadd132pd xmm0, xmm1, xmm2
+vfmsubadd132ps xmm0, xmm1, xmm2
+vfmsubadd213pd xmm0, xmm1, xmm2
+vfmsubadd213ps xmm0, xmm1, xmm2
+vfmsubadd231pd xmm0, xmm1, xmm2
+vfmsubadd231ps xmm0, xmm1, xmm2
+vfnmadd132pd xmm0, xmm1, xmm2
+vfnmadd132ps xmm0, xmm1, xmm2
+vfnmadd213pd xmm0, xmm1, xmm2
+vfnmadd213ps xmm0, xmm1, xmm2
+vfnmadd231pd xmm0, xmm1, xmm2
+vfnmadd231ps xmm0, xmm1, xmm2
+vfnmsub132pd xmm0, xmm1, xmm2
+vfnmsub132ps xmm0, xmm1, xmm2
+vfnmsub213pd xmm0, xmm1, xmm2
+vfnmsub213ps xmm0, xmm1, xmm2
+vfnmsub231pd xmm0, xmm1, xmm2
+vfnmsub231ps xmm0, xmm1, xmm2
+
+
+
+; 2 regsiter+mem  pd+ps
+
+vfmadd132pd xmm0, xmm1, [rsp+1024]
+vfmadd132ps xmm0, xmm1, [rsp+1024]
+vfmadd213pd xmm0, xmm1, [rsp+1024]
+vfmadd213ps xmm0, xmm1, [rsp+1024]
+vfmadd231pd xmm0, xmm1, [rsp+1024]
+vfmadd231ps xmm0, xmm1, [rsp+1024]
+vfmaddsub132pd xmm0, xmm1, [rsp+1024]
+vfmaddsub132ps xmm0, xmm1, [rsp+1024]
+vfmaddsub213pd xmm0, xmm1, [rsp+1024]
+vfmaddsub213ps xmm0, xmm1, [rsp+1024]
+vfmaddsub231pd xmm0, xmm1, [rsp+1024]
+vfmaddsub231ps xmm0, xmm1, [rsp+1024]
+vfmsub132pd xmm0, xmm1, [rsp+1024]
+vfmsub132ps xmm0, xmm1, [rsp+1024]
+vfmsub213pd xmm0, xmm1, [rsp+1024]
+vfmsub213ps xmm0, xmm1, [rsp+1024]
+vfmsub231pd xmm0, xmm1, [rsp+1024]
+vfmsub231ps xmm0, xmm1, [rsp+1024]
+vfmsubadd132pd xmm0, xmm1, [rsp+1024]
+vfmsubadd132ps xmm0, xmm1, [rsp+1024]
+vfmsubadd213pd xmm0, xmm1, [rsp+1024]
+vfmsubadd213ps xmm0, xmm1, [rsp+1024]
+vfmsubadd231pd xmm0, xmm1, [rsp+1024]
+vfmsubadd231ps xmm0, xmm1, [rsp+1024]
+vfnmadd132pd xmm0, xmm1, [rsp+1024]
+vfnmadd132ps xmm0, xmm1, [rsp+1024]
+vfnmadd213pd xmm0, xmm1, [rsp+1024]
+vfnmadd213ps xmm0, xmm1, [rsp+1024]
+vfnmadd231pd xmm0, xmm1, [rsp+1024]
+vfnmadd231ps xmm0, xmm1, [rsp+1024]
+vfnmsub132pd xmm0, xmm1, [rsp+1024]
+vfnmsub132ps xmm0, xmm1, [rsp+1024]
+vfnmsub213pd xmm0, xmm1, [rsp+1024]
+vfnmsub213ps xmm0, xmm1, [rsp+1024]
+vfnmsub231pd xmm0, xmm1, [rsp+1024]
+vfnmsub231ps xmm0, xmm1, [rsp+1024]
+
+
+; 3 regsiter  pd+ps as y-regs
+
+vfmadd132pd ymm0, ymm1, ymm2
+vfmadd132ps ymm0, ymm1, ymm2
+vfmadd213pd ymm0, ymm1, ymm2
+vfmadd213ps ymm0, ymm1, ymm2
+vfmadd231pd ymm0, ymm1, ymm2
+vfmadd231ps ymm0, ymm1, ymm2
+vfmaddsub132pd ymm0, ymm1, ymm2
+vfmaddsub132ps ymm0, ymm1, ymm2
+vfmaddsub213pd ymm0, ymm1, ymm2
+vfmaddsub213ps ymm0, ymm1, ymm2
+vfmaddsub231pd ymm0, ymm1, ymm2
+vfmaddsub231ps ymm0, ymm1, ymm2
+vfmsub132pd ymm0, ymm1, ymm2
+vfmsub132ps ymm0, ymm1, ymm2
+vfmsub213pd ymm0, ymm1, ymm2
+vfmsub213ps ymm0, ymm1, ymm2
+vfmsub231pd ymm0, ymm1, ymm2
+vfmsub231ps ymm0, ymm1, ymm2
+vfmsubadd132pd ymm0, ymm1, ymm2
+vfmsubadd132ps ymm0, ymm1, ymm2
+vfmsubadd213pd ymm0, ymm1, ymm2
+vfmsubadd213ps ymm0, ymm1, ymm2
+vfmsubadd231pd ymm0, ymm1, ymm2
+vfmsubadd231ps ymm0, ymm1, ymm2
+vfnmadd132pd ymm0, ymm1, ymm2
+vfnmadd132ps ymm0, ymm1, ymm2
+vfnmadd213pd ymm0, ymm1, ymm2
+vfnmadd213ps ymm0, ymm1, ymm2
+vfnmadd231pd ymm0, ymm1, ymm2
+vfnmadd231ps ymm0, ymm1, ymm2
+vfnmsub132pd ymm0, ymm1, ymm2
+vfnmsub132ps ymm0, ymm1, ymm2
+vfnmsub213pd ymm0, ymm1, ymm2
+vfnmsub213ps ymm0, ymm1, ymm2
+vfnmsub231pd ymm0, ymm1, ymm2
+vfnmsub231ps ymm0, ymm1, ymm2
+
+; 2 regsiter+mem  pd+ps as y-regs
+
+vfmadd132pd ymm0, ymm1, [rsp+1024]
+vfmadd132ps ymm0, ymm1, [rsp+1024]
+vfmadd213pd ymm0, ymm1, [rsp+1024]
+vfmadd213ps ymm0, ymm1, [rsp+1024]
+vfmadd231pd ymm0, ymm1, [rsp+1024]
+vfmadd231ps ymm0, ymm1, [rsp+1024]
+vfmaddsub132pd ymm0, ymm1, [rsp+1024]
+vfmaddsub132ps ymm0, ymm1, [rsp+1024]
+vfmaddsub213pd ymm0, ymm1, [rsp+1024]
+vfmaddsub213ps ymm0, ymm1, [rsp+1024]
+vfmaddsub231pd ymm0, ymm1, [rsp+1024]
+vfmaddsub231ps ymm0, ymm1, [rsp+1024]
+vfmsub132pd ymm0, ymm1, [rsp+1024]
+vfmsub132ps ymm0, ymm1, [rsp+1024]
+vfmsub213pd ymm0, ymm1, [rsp+1024]
+vfmsub213ps ymm0, ymm1, [rsp+1024]
+vfmsub231pd ymm0, ymm1, [rsp+1024]
+vfmsub231ps ymm0, ymm1, [rsp+1024]
+vfmsubadd132pd ymm0, ymm1, [rsp+1024]
+vfmsubadd132ps ymm0, ymm1, [rsp+1024]
+vfmsubadd213pd ymm0, ymm1, [rsp+1024]
+vfmsubadd213ps ymm0, ymm1, [rsp+1024]
+vfmsubadd231pd ymm0, ymm1, [rsp+1024]
+vfmsubadd231ps ymm0, ymm1, [rsp+1024]
+vfnmadd132pd ymm0, ymm1, [rsp+1024]
+vfnmadd132ps ymm0, ymm1, [rsp+1024]
+vfnmadd213pd ymm0, ymm1, [rsp+1024]
+vfnmadd213ps ymm0, ymm1, [rsp+1024]
+vfnmadd231pd ymm0, ymm1, [rsp+1024]
+vfnmadd231ps ymm0, ymm1, [rsp+1024]
+vfnmsub132pd ymm0, ymm1, [rsp+1024]
+vfnmsub132ps ymm0, ymm1, [rsp+1024]
+vfnmsub213pd ymm0, ymm1, [rsp+1024]
+vfnmsub213ps ymm0, ymm1, [rsp+1024]
+vfnmsub231pd ymm0, ymm1, [rsp+1024]
+vfnmsub231ps ymm0, ymm1, [rsp+1024]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+; 3 regsiter  sd+ss
+
+vfmadd132sd xmm0, xmm1, xmm2
+vfmadd132ss xmm0, xmm1, xmm2
+vfmadd213sd xmm0, xmm1, xmm2
+vfmadd213ss xmm0, xmm1, xmm2
+vfmadd231sd xmm0, xmm1, xmm2
+vfmadd231ss xmm0, xmm1, xmm2
+vfmsub132sd xmm0, xmm1, xmm2
+vfmsub132ss xmm0, xmm1, xmm2
+vfmsub213sd xmm0, xmm1, xmm2
+vfmsub213ss xmm0, xmm1, xmm2
+vfmsub231sd xmm0, xmm1, xmm2
+vfmsub231ss xmm0, xmm1, xmm2
+vfnmadd132sd xmm0, xmm1, xmm2
+vfnmadd132ss xmm0, xmm1, xmm2
+vfnmadd213sd xmm0, xmm1, xmm2
+vfnmadd213ss xmm0, xmm1, xmm2
+vfnmadd231sd xmm0, xmm1, xmm2
+vfnmadd231ss xmm0, xmm1, xmm2
+vfnmsub132sd xmm0, xmm1, xmm2
+vfnmsub132ss xmm0, xmm1, xmm2
+vfnmsub213sd xmm0, xmm1, xmm2
+vfnmsub213ss xmm0, xmm1, xmm2
+vfnmsub231sd xmm0, xmm1, xmm2
+vfnmsub231ss xmm0, xmm1, xmm2
+
+
+
+; 2 regsiter+mem  sd+ss
+
+vfmadd132sd xmm0, xmm1, [rsp+1024]
+vfmadd132ss xmm0, xmm1, [rsp+1024]
+vfmadd213sd xmm0, xmm1, [rsp+1024]
+vfmadd213ss xmm0, xmm1, [rsp+1024]
+vfmadd231sd xmm0, xmm1, [rsp+1024]
+vfmadd231ss xmm0, xmm1, [rsp+1024]
+vfmsub132sd xmm0, xmm1, [rsp+1024]
+vfmsub132ss xmm0, xmm1, [rsp+1024]
+vfmsub213sd xmm0, xmm1, [rsp+1024]
+vfmsub213ss xmm0, xmm1, [rsp+1024]
+vfmsub231sd xmm0, xmm1, [rsp+1024]
+vfmsub231ss xmm0, xmm1, [rsp+1024]
+vfnmadd132sd xmm0, xmm1, [rsp+1024]
+vfnmadd132ss xmm0, xmm1, [rsp+1024]
+vfnmadd213sd xmm0, xmm1, [rsp+1024]
+vfnmadd213ss xmm0, xmm1, [rsp+1024]
+vfnmadd231sd xmm0, xmm1, [rsp+1024]
+vfnmadd231ss xmm0, xmm1, [rsp+1024]
+vfnmsub132sd xmm0, xmm1, [rsp+1024]
+vfnmsub132ss xmm0, xmm1, [rsp+1024]
+vfnmsub213sd xmm0, xmm1, [rsp+1024]
+vfnmsub213ss xmm0, xmm1, [rsp+1024]
+vfnmsub231sd xmm0, xmm1, [rsp+1024]
+vfnmsub231ss xmm0, xmm1, [rsp+1024]
-- 
GitLab