7 files changed, 8757 insertions, 0 deletions
diff --git a/src/core/arm/interpreter/vfp/asm_vfp.h b/src/core/arm/interpreter/vfp/asm_vfp.h
new file mode 100644
index 000000000..f4ab34fd4
--- /dev/null
+++ b/src/core/arm/interpreter/vfp/asm_vfp.h
@@ -0,0 +1,84 @@
+/*
+ * arch/arm/include/asm/vfp.h
+ *
+ * VFP register definitions.
+ * First, the standard VFP set.
+ */
+
+#define FPSID			cr0
+#define FPSCR			cr1
+#define MVFR1			cr6
+#define MVFR0			cr7
+#define FPEXC			cr8
+#define FPINST			cr9
+#define FPINST2			cr10
+
+/* FPSID bits */
+#define FPSID_IMPLEMENTER_BIT	(24)
+#define FPSID_IMPLEMENTER_MASK	(0xff << FPSID_IMPLEMENTER_BIT)
+#define FPSID_SOFTWARE		(1<<23)
+#define FPSID_FORMAT_BIT	(21)
+#define FPSID_FORMAT_MASK	(0x3  << FPSID_FORMAT_BIT)
+#define FPSID_NODOUBLE		(1<<20)
+#define FPSID_ARCH_BIT		(16)
+#define FPSID_ARCH_MASK		(0xF  << FPSID_ARCH_BIT)
+#define FPSID_PART_BIT		(8)
+#define FPSID_PART_MASK		(0xFF << FPSID_PART_BIT)
+#define FPSID_VARIANT_BIT	(4)
+#define FPSID_VARIANT_MASK	(0xF  << FPSID_VARIANT_BIT)
+#define FPSID_REV_BIT		(0)
+#define FPSID_REV_MASK		(0xF  << FPSID_REV_BIT)
+
+/* FPEXC bits */
+#define FPEXC_EX		(1 << 31)
+#define FPEXC_EN		(1 << 30)
+#define FPEXC_DEX		(1 << 29)
+#define FPEXC_FP2V		(1 << 28)
+#define FPEXC_VV		(1 << 27)
+#define FPEXC_TFV		(1 << 26)
+#define FPEXC_LENGTH_BIT	(8)
+#define FPEXC_LENGTH_MASK	(7 << FPEXC_LENGTH_BIT)
+#define FPEXC_IDF		(1 << 7)
+#define FPEXC_IXF		(1 << 4)
+#define FPEXC_UFF		(1 << 3)
+#define FPEXC_OFF		(1 << 2)
+#define FPEXC_DZF		(1 << 1)
+#define FPEXC_IOF		(1 << 0)
+#define FPEXC_TRAP_MASK		(FPEXC_IDF|FPEXC_IXF|FPEXC_UFF|FPEXC_OFF|FPEXC_DZF|FPEXC_IOF)
+
+/* FPSCR bits */
+#define FPSCR_DEFAULT_NAN	(1<<25)
+#define FPSCR_FLUSHTOZERO	(1<<24)
+#define FPSCR_ROUND_NEAREST	(0<<22)
+#define FPSCR_ROUND_PLUSINF	(1<<22)
+#define FPSCR_ROUND_MINUSINF	(2<<22)
+#define FPSCR_ROUND_TOZERO	(3<<22)
+#define FPSCR_RMODE_BIT		(22)
+#define FPSCR_RMODE_MASK	(3 << FPSCR_RMODE_BIT)
+#define FPSCR_STRIDE_BIT	(20)
+#define FPSCR_STRIDE_MASK	(3 << FPSCR_STRIDE_BIT)
+#define FPSCR_LENGTH_BIT	(16)
+#define FPSCR_LENGTH_MASK	(7 << FPSCR_LENGTH_BIT)
+#define FPSCR_IOE		(1<<8)
+#define FPSCR_DZE		(1<<9)
+#define FPSCR_OFE		(1<<10)
+#define FPSCR_UFE		(1<<11)
+#define FPSCR_IXE		(1<<12)
+#define FPSCR_IDE		(1<<15)
+#define FPSCR_IOC		(1<<0)
+#define FPSCR_DZC		(1<<1)
+#define FPSCR_OFC		(1<<2)
+#define FPSCR_UFC		(1<<3)
+#define FPSCR_IXC		(1<<4)
+#define FPSCR_IDC		(1<<7)
+
+/* MVFR0 bits */
+#define MVFR0_A_SIMD_BIT	(0)
+#define MVFR0_A_SIMD_MASK	(0xf << MVFR0_A_SIMD_BIT)
+
+/* Bit patterns for decoding the packaged operation descriptors */
+#define VFPOPDESC_LENGTH_BIT	(9)
+#define VFPOPDESC_LENGTH_MASK	(0x07 << VFPOPDESC_LENGTH_BIT)
+#define VFPOPDESC_UNUSED_BIT	(24)
+#define VFPOPDESC_UNUSED_MASK	(0xFF << VFPOPDESC_UNUSED_BIT)
+#define VFPOPDESC_OPDESC_MASK	(~(VFPOPDESC_LENGTH_MASK | VFPOPDESC_UNUSED_MASK))
diff --git a/src/core/arm/interpreter/vfp/vfp.cpp b/src/core/arm/interpreter/vfp/vfp.cpp
new file mode 100644
index 000000000..eea5e24a9
--- /dev/null
+++ b/src/core/arm/interpreter/vfp/vfp.cpp
@@ -0,0 +1,357 @@
+/*
+    armvfp.c - ARM VFPv3 emulation unit
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/* Note: this file handles interface with arm core and vfp registers */
+
+/* Opens debug for classic interpreter only */
+//#define DEBUG
+
+#include "common/common.h"
+
+#include "core/arm/interpreter/armdefs.h"
+#include "core/arm/interpreter/vfp/vfp.h"
+
+//ARMul_State* persistent_state; /* function calls from SoftFloat lib don't have an access to ARMul_state. */
+
+unsigned
+VFPInit (ARMul_State *state)
+{
+	state->VFP[VFP_OFFSET(VFP_FPSID)] = VFP_FPSID_IMPLMEN<<24 | VFP_FPSID_SW<<23 | VFP_FPSID_SUBARCH<<16 | 
+		VFP_FPSID_PARTNUM<<8 | VFP_FPSID_VARIANT<<4 | VFP_FPSID_REVISION;
+	state->VFP[VFP_OFFSET(VFP_FPEXC)] = 0;
+	state->VFP[VFP_OFFSET(VFP_FPSCR)] = 0;
+	
+	//persistent_state = state;
+	/* Reset only specify VFP_FPEXC_EN = '0' */
+
+	return No_exp;
+}
+
+unsigned
+VFPMRC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value)
+{
+	/* MRC<c> <coproc>,<opc1>,<Rt>,<CRn>,<CRm>{,<opc2>} */
+	int CoProc = BITS (8, 11); /* 10 or 11 */
+	int OPC_1 = BITS (21, 23);
+	int Rt = BITS (12, 15);
+	int CRn = BITS (16, 19);
+	int CRm = BITS (0, 3);
+	int OPC_2 = BITS (5, 7);
+	
+	/* TODO check access permission */
+	
+	/* CRn/opc1 CRm/opc2 */
+	
+	if (CoProc == 10 || CoProc == 11)
+	{
+		#define VFP_MRC_TRANS
+		#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+		#undef VFP_MRC_TRANS
+	}
+	DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, OPC_1 %x, Rt %x, CRn %x, CRm %x, OPC_2 %x\n", 
+	       instr, CoProc, OPC_1, Rt, CRn, CRm, OPC_2);
+
+	return ARMul_CANT;
+}
+
+unsigned
+VFPMCR (ARMul_State * state, unsigned type, ARMword instr, ARMword value)
+{
+	/* MCR<c> <coproc>,<opc1>,<Rt>,<CRn>,<CRm>{,<opc2>} */
+	int CoProc = BITS (8, 11); /* 10 or 11 */
+	int OPC_1 = BITS (21, 23);
+	int Rt = BITS (12, 15);
+	int CRn = BITS (16, 19);
+	int CRm = BITS (0, 3);
+	int OPC_2 = BITS (5, 7);
+	
+	/* TODO check access permission */
+	
+	/* CRn/opc1 CRm/opc2 */
+	if (CoProc == 10 || CoProc == 11)
+	{
+		#define VFP_MCR_TRANS
+		#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+		#undef VFP_MCR_TRANS
+	}
+	DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, OPC_1 %x, Rt %x, CRn %x, CRm %x, OPC_2 %x\n", 
+	       instr, CoProc, OPC_1, Rt, CRn, CRm, OPC_2);
+
+	return ARMul_CANT;
+}
+
+unsigned
+VFPMRRC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value1, ARMword * value2)
+{
+	/* MCRR<c> <coproc>,<opc1>,<Rt>,<Rt2>,<CRm> */
+	int CoProc = BITS (8, 11); /* 10 or 11 */
+	int OPC_1 = BITS (4, 7);
+	int Rt = BITS (12, 15);
+	int Rt2 = BITS (16, 19);
+	int CRm = BITS (0, 3);
+	
+	if (CoProc == 10 || CoProc == 11)
+	{
+		#define VFP_MRRC_TRANS
+		#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+		#undef VFP_MRRC_TRANS
+	}
+	DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, OPC_1 %x, Rt %x, Rt2 %x, CRm %x\n", 
+	       instr, CoProc, OPC_1, Rt, Rt2, CRm);
+
+	return ARMul_CANT;
+}
+
+unsigned
+VFPMCRR (ARMul_State * state, unsigned type, ARMword instr, ARMword value1, ARMword value2)
+{
+	/* MCRR<c> <coproc>,<opc1>,<Rt>,<Rt2>,<CRm> */
+	int CoProc = BITS (8, 11); /* 10 or 11 */
+	int OPC_1 = BITS (4, 7);
+	int Rt = BITS (12, 15);
+	int Rt2 = BITS (16, 19);
+	int CRm = BITS (0, 3);
+	
+	/* TODO check access permission */
+	
+	/* CRn/opc1 CRm/opc2 */
+	
+	if (CoProc == 11 || CoProc == 10)
+	{
+		#define VFP_MCRR_TRANS
+		#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+		#undef VFP_MCRR_TRANS
+	}
+	DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, OPC_1 %x, Rt %x, Rt2 %x, CRm %x\n", 
+	       instr, CoProc, OPC_1, Rt, Rt2, CRm);
+
+	return ARMul_CANT;
+}
+
+unsigned
+VFPSTC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value)
+{
+	/* STC{L}<c> <coproc>,<CRd>,[<Rn>],<option> */
+	int CoProc = BITS (8, 11); /* 10 or 11 */
+	int CRd = BITS (12, 15);
+	int Rn = BITS (16, 19);
+	int imm8 = BITS (0, 7);
+	int P = BIT(24);
+	int U = BIT(23);
+	int D = BIT(22);
+	int W = BIT(21);
+	
+	/* TODO check access permission */
+	
+	/* VSTM */
+	if ( (P|U|D|W) == 0 )
+	{
+		DEBUG_LOG(ARM11, "In %s, UNDEFINED\n", __FUNCTION__); exit(-1);
+	}
+	if (CoProc == 10 || CoProc == 11)
+	{
+		#if 1
+		if (P == 0 && U == 0 && W == 0)
+		{
+			DEBUG_LOG(ARM11, "VSTM Related encodings\n"); exit(-1);
+		}
+		if (P == U && W == 1)
+		{
+			DEBUG_LOG(ARM11, "UNDEFINED\n"); exit(-1);
+		}
+		#endif
+
+		#define VFP_STC_TRANS
+		#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+		#undef VFP_STC_TRANS
+	}
+	DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, CRd %x, Rn %x, imm8 %x, P %x, U %x, D %x, W %x\n", 
+	       instr, CoProc, CRd, Rn, imm8, P, U, D, W);
+
+	return ARMul_CANT;
+}
+
+unsigned
+VFPLDC (ARMul_State * state, unsigned type, ARMword instr, ARMword value)
+{
+	/* LDC{L}<c> <coproc>,<CRd>,[<Rn>] */
+	int CoProc = BITS (8, 11); /* 10 or 11 */
+	int CRd = BITS (12, 15);
+	int Rn = BITS (16, 19);
+	int imm8 = BITS (0, 7);
+	int P = BIT(24);
+	int U = BIT(23);
+	int D = BIT(22);
+	int W = BIT(21);
+	
+	/* TODO check access permission */
+	
+	if ( (P|U|D|W) == 0 )
+	{
+		DEBUG_LOG(ARM11, "In %s, UNDEFINED\n", __FUNCTION__); exit(-1);
+	}
+	if (CoProc == 10 || CoProc == 11)
+	{
+		#define VFP_LDC_TRANS
+		#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+		#undef VFP_LDC_TRANS
+	}
+	DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, CRd %x, Rn %x, imm8 %x, P %x, U %x, D %x, W %x\n", 
+	       instr, CoProc, CRd, Rn, imm8, P, U, D, W);
+
+	return ARMul_CANT;
+}
+
+unsigned
+VFPCDP (ARMul_State * state, unsigned type, ARMword instr)
+{
+	/* CDP<c> <coproc>,<opc1>,<CRd>,<CRn>,<CRm>,<opc2> */
+	int CoProc = BITS (8, 11); /* 10 or 11 */
+	int OPC_1 = BITS (20, 23);
+	int CRd = BITS (12, 15);
+	int CRn = BITS (16, 19);
+	int CRm = BITS (0, 3);
+	int OPC_2 = BITS (5, 7);
+	
+	/* TODO check access permission */
+	
+	/* CRn/opc1 CRm/opc2 */
+
+	if (CoProc == 10 || CoProc == 11)
+	{
+		#define VFP_CDP_TRANS
+		#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+		#undef VFP_CDP_TRANS
+		
+		int exceptions = 0;
+		if (CoProc == 10)
+			exceptions = vfp_single_cpdo(state, instr, state->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else 
+			exceptions = vfp_double_cpdo(state, instr, state->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		vfp_raise_exceptions(state, exceptions, instr, state->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		return ARMul_DONE;
+	}
+	DEBUG_LOG(ARM11, "Can't identify %x\n", instr);
+	return ARMul_CANT;
+}
+
+
+/* ----------- MRC ------------ */
+#define VFP_MRC_IMPL
+#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+#undef VFP_MRC_IMPL
+
+#define VFP_MRRC_IMPL
+#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+#undef VFP_MRRC_IMPL
+
+
+/* ----------- MCR ------------ */
+#define VFP_MCR_IMPL
+#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+#undef VFP_MCR_IMPL
+
+#define VFP_MCRR_IMPL
+#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+#undef VFP_MCRR_IMPL
+
+/* Memory operation are not inlined, as old Interpreter and Fast interpreter
+   don't have the same memory operation interface.
+   Old interpreter framework does one access to coprocessor per data, and
+   handles already data write, as well as address computation,
+   which is not the case for Fast interpreter. Therefore, implementation
+   of vfp instructions in old interpreter and fast interpreter are separate. */
+
+/* ----------- STC ------------ */
+#define VFP_STC_IMPL
+#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+#undef VFP_STC_IMPL
+
+
+/* ----------- LDC ------------ */
+#define VFP_LDC_IMPL
+#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+#undef VFP_LDC_IMPL
+
+
+/* ----------- CDP ------------ */
+#define VFP_CDP_IMPL
+#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+#undef VFP_CDP_IMPL
+
+/* Miscellaneous functions */
+int32_t vfp_get_float(arm_core_t* state, unsigned int reg)
+{
+	DBG("VFP get float: s%d=[%08x]\n", reg, state->ExtReg[reg]);
+	return state->ExtReg[reg];
+}
+
+void vfp_put_float(arm_core_t* state, int32_t val, unsigned int reg)
+{
+	DBG("VFP put float: s%d <= [%08x]\n", reg, val);
+	state->ExtReg[reg] = val;
+}
+
+uint64_t vfp_get_double(arm_core_t* state, unsigned int reg)
+{
+	uint64_t result;
+	result = ((uint64_t) state->ExtReg[reg*2+1])<<32 | state->ExtReg[reg*2];
+	DBG("VFP get double: s[%d-%d]=[%016llx]\n", reg*2+1, reg*2, result);
+	return result;
+}
+
+void vfp_put_double(arm_core_t* state, uint64_t val, unsigned int reg)
+{
+	DBG("VFP put double: s[%d-%d] <= [%08x-%08x]\n", reg*2+1, reg*2, (uint32_t) (val>>32), (uint32_t) (val & 0xffffffff));
+	state->ExtReg[reg*2] = (uint32_t) (val & 0xffffffff);
+	state->ExtReg[reg*2+1] = (uint32_t) (val>>32);
+}
+
+
+
+/*
+ * Process bitmask of exception conditions. (from vfpmodule.c)
+ */
+void vfp_raise_exceptions(ARMul_State* state, u32 exceptions, u32 inst, u32 fpscr)
+{
+	int si_code = 0;
+
+	vfpdebug("VFP: raising exceptions %08x\n", exceptions);
+
+	if (exceptions == VFP_EXCEPTION_ERROR) {
+		DEBUG_LOG(ARM11, "unhandled bounce %x\n", inst);
+		exit(-1);
+		return;
+	}
+
+	/*
+	 * If any of the status flags are set, update the FPSCR.
+	 * Comparison instructions always return at least one of
+	 * these flags set.
+	 */
+	if (exceptions & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V))
+		fpscr &= ~(FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V);
+
+	fpscr |= exceptions;
+
+	state->VFP[VFP_OFFSET(VFP_FPSCR)] = fpscr;
+}
diff --git a/src/core/arm/interpreter/vfp/vfp.h b/src/core/arm/interpreter/vfp/vfp.h
new file mode 100644
index 000000000..f738a615b
--- /dev/null
+++ b/src/core/arm/interpreter/vfp/vfp.h
@@ -0,0 +1,111 @@
+/* 
+    vfp/vfp.h - ARM VFPv3 emulation unit - vfp interface
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#ifndef __VFP_H__
+#define __VFP_H__
+
+#define DBG(...) DEBUG_LOG(ARM11, __VA_ARGS__)
+
+#define vfpdebug //printf
+
+#include "core/arm/interpreter/vfp/vfp_helper.h" /* for references to cdp SoftFloat functions */
+
+unsigned VFPInit (ARMul_State *state);
+unsigned VFPMRC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value);
+unsigned VFPMCR (ARMul_State * state, unsigned type, ARMword instr, ARMword value);
+unsigned VFPMRRC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value1, ARMword * value2);
+unsigned VFPMCRR (ARMul_State * state, unsigned type, ARMword instr, ARMword value1, ARMword value2);
+unsigned VFPSTC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value);
+unsigned VFPLDC (ARMul_State * state, unsigned type, ARMword instr, ARMword value);
+unsigned VFPCDP (ARMul_State * state, unsigned type, ARMword instr);
+
+/* FPSID Information */
+#define VFP_FPSID_IMPLMEN 0 	/* should be the same as cp15 0 c0 0*/
+#define VFP_FPSID_SW 0
+#define VFP_FPSID_SUBARCH 0x2 	/* VFP version. Current is v3 (not strict) */
+#define VFP_FPSID_PARTNUM 0x1
+#define VFP_FPSID_VARIANT 0x1
+#define VFP_FPSID_REVISION 0x1
+
+/* FPEXC Flags */
+#define VFP_FPEXC_EX 1<<31
+#define VFP_FPEXC_EN 1<<30
+
+/* FPSCR Flags */
+#define VFP_FPSCR_NFLAG 1<<31
+#define VFP_FPSCR_ZFLAG 1<<30
+#define VFP_FPSCR_CFLAG 1<<29
+#define VFP_FPSCR_VFLAG 1<<28
+
+#define VFP_FPSCR_AHP 1<<26 	/* Alternative Half Precision */
+#define VFP_FPSCR_DN 1<<25 	/* Default NaN */
+#define VFP_FPSCR_FZ 1<<24 	/* Flush-to-zero */
+#define VFP_FPSCR_RMODE 3<<22 	/* Rounding Mode */
+#define VFP_FPSCR_STRIDE 3<<20 	/* Stride (vector) */
+#define VFP_FPSCR_LEN 7<<16 	/* Stride (vector) */
+
+#define VFP_FPSCR_IDE 1<<15	/* Input Denormal exc */
+#define VFP_FPSCR_IXE 1<<12	/* Inexact exc */
+#define VFP_FPSCR_UFE 1<<11	/* Undeflow exc */
+#define VFP_FPSCR_OFE 1<<10	/* Overflow exc */
+#define VFP_FPSCR_DZE 1<<9	/* Division by Zero exc */
+#define VFP_FPSCR_IOE 1<<8	/* Invalid Operation exc */
+
+#define VFP_FPSCR_IDC 1<<7	/* Input Denormal cum exc */
+#define VFP_FPSCR_IXC 1<<4	/* Inexact cum exc */
+#define VFP_FPSCR_UFC 1<<3	/* Undeflow cum exc */
+#define VFP_FPSCR_OFC 1<<2	/* Overflow cum exc */
+#define VFP_FPSCR_DZC 1<<1	/* Division by Zero cum exc */
+#define VFP_FPSCR_IOC 1<<0	/* Invalid Operation cum exc */
+
+/* Inline instructions. Note: Used in a cpp file as well */
+#ifdef __cplusplus
+ extern "C" {
+#endif
+int32_t vfp_get_float(ARMul_State * state, unsigned int reg);
+void vfp_put_float(ARMul_State * state, int32_t val, unsigned int reg);
+uint64_t vfp_get_double(ARMul_State * state, unsigned int reg);
+void vfp_put_double(ARMul_State * state, uint64_t val, unsigned int reg);
+void vfp_raise_exceptions(ARMul_State * state, uint32_t exceptions, uint32_t inst, uint32_t fpscr);
+u32 vfp_single_cpdo(ARMul_State* state, u32 inst, u32 fpscr);
+u32 vfp_double_cpdo(ARMul_State* state, u32 inst, u32 fpscr);
+
+/* MRC */
+inline void VMRS(ARMul_State * state, ARMword reg, ARMword Rt, ARMword *value);
+inline void VMOVBRS(ARMul_State * state, ARMword to_arm, ARMword t, ARMword n, ARMword *value);
+inline void VMOVBRRD(ARMul_State * state, ARMword to_arm, ARMword t, ARMword t2, ARMword n, ARMword *value1, ARMword *value2);
+inline void VMOVI(ARMul_State * state, ARMword single, ARMword d, ARMword imm);
+inline void VMOVR(ARMul_State * state, ARMword single, ARMword d, ARMword imm);
+/* MCR */
+inline void VMSR(ARMul_State * state, ARMword reg, ARMword Rt);
+/* STC */
+inline int VSTM(ARMul_State * state, int type, ARMword instr, ARMword* value);
+inline int VPUSH(ARMul_State * state, int type, ARMword instr, ARMword* value);
+inline int VSTR(ARMul_State * state, int type, ARMword instr, ARMword* value);
+/* LDC */
+inline int VLDM(ARMul_State * state, int type, ARMword instr, ARMword value);
+inline int VPOP(ARMul_State * state, int type, ARMword instr, ARMword value);
+inline int VLDR(ARMul_State * state, int type, ARMword instr, ARMword value);
+
+#ifdef __cplusplus
+ }
+#endif
+
+#endif
diff --git a/src/core/arm/interpreter/vfp/vfp_helper.h b/src/core/arm/interpreter/vfp/vfp_helper.h
new file mode 100644
index 000000000..80f9a93f4
--- /dev/null
+++ b/src/core/arm/interpreter/vfp/vfp_helper.h
@@ -0,0 +1,541 @@
+/*
+    vfp/vfp.h - ARM VFPv3 emulation unit - SoftFloat lib helper
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/* 
+ *  The following code is derivative from Linux Android kernel vfp
+ *  floating point support.
+ * 
+ *  Copyright (C) 2004 ARM Limited.
+ *  Written by Deep Blue Solutions Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __VFP_HELPER_H__
+#define __VFP_HELPER_H__
+
+/* Custom edit */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include "core/arm/interpreter/armdefs.h"
+
+#define u16 uint16_t
+#define u32 uint32_t
+#define u64 uint64_t
+#define s16 int16_t
+#define s32 int32_t
+#define s64 int64_t
+
+#define pr_info //printf
+#define pr_debug //printf
+
+static u32 fls(int x);
+#define do_div(n, base) {n/=base;}
+
+/* From vfpinstr.h */
+
+#define INST_CPRTDO(inst)	(((inst) & 0x0f000000) == 0x0e000000)
+#define INST_CPRT(inst)		((inst) & (1 << 4))
+#define INST_CPRT_L(inst)	((inst) & (1 << 20))
+#define INST_CPRT_Rd(inst)	(((inst) & (15 << 12)) >> 12)
+#define INST_CPRT_OP(inst)	(((inst) >> 21) & 7)
+#define INST_CPNUM(inst)	((inst) & 0xf00)
+#define CPNUM(cp)		((cp) << 8)
+
+#define FOP_MASK	(0x00b00040)
+#define FOP_FMAC	(0x00000000)
+#define FOP_FNMAC	(0x00000040)
+#define FOP_FMSC	(0x00100000)
+#define FOP_FNMSC	(0x00100040)
+#define FOP_FMUL	(0x00200000)
+#define FOP_FNMUL	(0x00200040)
+#define FOP_FADD	(0x00300000)
+#define FOP_FSUB	(0x00300040)
+#define FOP_FDIV	(0x00800000)
+#define FOP_EXT		(0x00b00040)
+
+#define FOP_TO_IDX(inst)	((inst & 0x00b00000) >> 20 | (inst & (1 << 6)) >> 4)
+
+#define FEXT_MASK	(0x000f0080)
+#define FEXT_FCPY	(0x00000000)
+#define FEXT_FABS	(0x00000080)
+#define FEXT_FNEG	(0x00010000)
+#define FEXT_FSQRT	(0x00010080)
+#define FEXT_FCMP	(0x00040000)
+#define FEXT_FCMPE	(0x00040080)
+#define FEXT_FCMPZ	(0x00050000)
+#define FEXT_FCMPEZ	(0x00050080)
+#define FEXT_FCVT	(0x00070080)
+#define FEXT_FUITO	(0x00080000)
+#define FEXT_FSITO	(0x00080080)
+#define FEXT_FTOUI	(0x000c0000)
+#define FEXT_FTOUIZ	(0x000c0080)
+#define FEXT_FTOSI	(0x000d0000)
+#define FEXT_FTOSIZ	(0x000d0080)
+
+#define FEXT_TO_IDX(inst)	((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7)
+
+#define vfp_get_sd(inst)	((inst & 0x0000f000) >> 11 | (inst & (1 << 22)) >> 22)
+#define vfp_get_dd(inst)	((inst & 0x0000f000) >> 12 | (inst & (1 << 22)) >> 18)
+#define vfp_get_sm(inst)	((inst & 0x0000000f) << 1 | (inst & (1 << 5)) >> 5)
+#define vfp_get_dm(inst)	((inst & 0x0000000f) | (inst & (1 << 5)) >> 1)
+#define vfp_get_sn(inst)	((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7)
+#define vfp_get_dn(inst)	((inst & 0x000f0000) >> 16 | (inst & (1 << 7)) >> 3)
+
+#define vfp_single(inst)	(((inst) & 0x0000f00) == 0xa00)
+
+#define FPSCR_N	(1 << 31)
+#define FPSCR_Z	(1 << 30)
+#define FPSCR_C (1 << 29)
+#define FPSCR_V	(1 << 28)
+
+/* -------------- */
+
+/* From asm/include/vfp.h */
+
+/* FPSCR bits */
+#define FPSCR_DEFAULT_NAN	(1<<25)
+#define FPSCR_FLUSHTOZERO	(1<<24)
+#define FPSCR_ROUND_NEAREST	(0<<22)
+#define FPSCR_ROUND_PLUSINF	(1<<22)
+#define FPSCR_ROUND_MINUSINF	(2<<22)
+#define FPSCR_ROUND_TOZERO	(3<<22)
+#define FPSCR_RMODE_BIT		(22)
+#define FPSCR_RMODE_MASK	(3 << FPSCR_RMODE_BIT)
+#define FPSCR_STRIDE_BIT	(20)
+#define FPSCR_STRIDE_MASK	(3 << FPSCR_STRIDE_BIT)
+#define FPSCR_LENGTH_BIT	(16)
+#define FPSCR_LENGTH_MASK	(7 << FPSCR_LENGTH_BIT)
+#define FPSCR_IOE		(1<<8)
+#define FPSCR_DZE		(1<<9)
+#define FPSCR_OFE		(1<<10)
+#define FPSCR_UFE		(1<<11)
+#define FPSCR_IXE		(1<<12)
+#define FPSCR_IDE		(1<<15)
+#define FPSCR_IOC		(1<<0)
+#define FPSCR_DZC		(1<<1)
+#define FPSCR_OFC		(1<<2)
+#define FPSCR_UFC		(1<<3)
+#define FPSCR_IXC		(1<<4)
+#define FPSCR_IDC		(1<<7)
+
+/* ---------------- */
+
+static inline u32 vfp_shiftright32jamming(u32 val, unsigned int shift)
+{
+	if (shift) {
+		if (shift < 32)
+			val = val >> shift | ((val << (32 - shift)) != 0);
+		else
+			val = val != 0;
+	}
+	return val;
+}
+
+static inline u64 vfp_shiftright64jamming(u64 val, unsigned int shift)
+{
+	if (shift) {
+		if (shift < 64)
+			val = val >> shift | ((val << (64 - shift)) != 0);
+		else
+			val = val != 0;
+	}
+	return val;
+}
+
+static inline u32 vfp_hi64to32jamming(u64 val)
+{
+	u32 v;
+	u32 highval = val >> 32;
+	u32 lowval = val & 0xffffffff;
+
+	if (lowval >= 1)
+		v = highval | 1;
+	else
+		v = highval;
+
+	return v;
+}
+
+static inline void add128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml)
+{
+	*resl = nl + ml;
+	*resh = nh + mh;
+	if (*resl < nl)
+		*resh += 1;
+}
+
+static inline void sub128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml)
+{
+	*resl = nl - ml;
+	*resh = nh - mh;
+	if (*resl > nl)
+		*resh -= 1;
+}
+
+static inline void mul64to128(u64 *resh, u64 *resl, u64 n, u64 m)
+{
+	u32 nh, nl, mh, ml;
+	u64 rh, rma, rmb, rl;
+
+	nl = n;
+	ml = m;
+	rl = (u64)nl * ml;
+
+	nh = n >> 32;
+	rma = (u64)nh * ml;
+
+	mh = m >> 32;
+	rmb = (u64)nl * mh;
+	rma += rmb;
+
+	rh = (u64)nh * mh;
+	rh += ((u64)(rma < rmb) << 32) + (rma >> 32);
+
+	rma <<= 32;
+	rl += rma;
+	rh += (rl < rma);
+
+	*resl = rl;
+	*resh = rh;
+}
+
+static inline void shift64left(u64 *resh, u64 *resl, u64 n)
+{
+	*resh = n >> 63;
+	*resl = n << 1;
+}
+
+static inline u64 vfp_hi64multiply64(u64 n, u64 m)
+{
+	u64 rh, rl;
+	mul64to128(&rh, &rl, n, m);
+	return rh | (rl != 0);
+}
+
+static inline u64 vfp_estimate_div128to64(u64 nh, u64 nl, u64 m)
+{
+	u64 mh, ml, remh, reml, termh, terml, z;
+
+	if (nh >= m)
+		return ~0ULL;
+	mh = m >> 32;
+	if (mh << 32 <= nh) {
+		z = 0xffffffff00000000ULL;
+	} else {
+		z = nh;
+		do_div(z, mh);
+		z <<= 32;
+	}
+	mul64to128(&termh, &terml, m, z);
+	sub128(&remh, &reml, nh, nl, termh, terml);
+	ml = m << 32;
+	while ((s64)remh < 0) {
+		z -= 0x100000000ULL;
+		add128(&remh, &reml, remh, reml, mh, ml);
+	}
+	remh = (remh << 32) | (reml >> 32);
+	if (mh << 32 <= remh) {
+		z |= 0xffffffff;
+	} else {
+		do_div(remh, mh);
+		z |= remh;
+	}
+	return z;
+}
+
+/*
+ * Operations on unpacked elements
+ */
+#define vfp_sign_negate(sign)	(sign ^ 0x8000)
+
+/*
+ * Single-precision
+ */
+struct vfp_single {
+	s16	exponent;
+	u16	sign;
+	u32	significand;
+};
+
+#ifdef __cplusplus
+ extern "C" {
+#endif
+extern s32 vfp_get_float(ARMul_State * state, unsigned int reg);
+extern void vfp_put_float(ARMul_State * state, s32 val, unsigned int reg);
+#ifdef __cplusplus
+ }
+#endif
+
+/*
+ * VFP_SINGLE_MANTISSA_BITS - number of bits in the mantissa
+ * VFP_SINGLE_EXPONENT_BITS - number of bits in the exponent
+ * VFP_SINGLE_LOW_BITS - number of low bits in the unpacked significand
+ *  which are not propagated to the float upon packing.
+ */
+#define VFP_SINGLE_MANTISSA_BITS	(23)
+#define VFP_SINGLE_EXPONENT_BITS	(8)
+#define VFP_SINGLE_LOW_BITS		(32 - VFP_SINGLE_MANTISSA_BITS - 2)
+#define VFP_SINGLE_LOW_BITS_MASK	((1 << VFP_SINGLE_LOW_BITS) - 1)
+
+/*
+ * The bit in an unpacked float which indicates that it is a quiet NaN
+ */
+#define VFP_SINGLE_SIGNIFICAND_QNAN	(1 << (VFP_SINGLE_MANTISSA_BITS - 1 + VFP_SINGLE_LOW_BITS))
+
+/*
+ * Operations on packed single-precision numbers
+ */
+#define vfp_single_packed_sign(v)	((v) & 0x80000000)
+#define vfp_single_packed_negate(v)	((v) ^ 0x80000000)
+#define vfp_single_packed_abs(v)	((v) & ~0x80000000)
+#define vfp_single_packed_exponent(v)	(((v) >> VFP_SINGLE_MANTISSA_BITS) & ((1 << VFP_SINGLE_EXPONENT_BITS) - 1))
+#define vfp_single_packed_mantissa(v)	((v) & ((1 << VFP_SINGLE_MANTISSA_BITS) - 1))
+
+/*
+ * Unpack a single-precision float.  Note that this returns the magnitude
+ * of the single-precision float mantissa with the 1. if necessary,
+ * aligned to bit 30.
+ */
+static inline void vfp_single_unpack(struct vfp_single *s, s32 val)
+{
+	u32 significand;
+
+	s->sign = vfp_single_packed_sign(val) >> 16,
+	s->exponent = vfp_single_packed_exponent(val);
+
+	significand = (u32) val;
+	significand = (significand << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2;
+	if (s->exponent && s->exponent != 255)
+		significand |= 0x40000000;
+	s->significand = significand;
+}
+
+/*
+ * Re-pack a single-precision float.  This assumes that the float is
+ * already normalised such that the MSB is bit 30, _not_ bit 31.
+ */
+static inline s32 vfp_single_pack(struct vfp_single *s)
+{
+	u32 val;
+	val = (s->sign << 16) +
+	      (s->exponent << VFP_SINGLE_MANTISSA_BITS) +
+	      (s->significand >> VFP_SINGLE_LOW_BITS);
+	return (s32)val;
+}
+
+#define VFP_NUMBER		(1<<0)
+#define VFP_ZERO		(1<<1)
+#define VFP_DENORMAL		(1<<2)
+#define VFP_INFINITY		(1<<3)
+#define VFP_NAN			(1<<4)
+#define VFP_NAN_SIGNAL		(1<<5)
+
+#define VFP_QNAN		(VFP_NAN)
+#define VFP_SNAN		(VFP_NAN|VFP_NAN_SIGNAL)
+
+static inline int vfp_single_type(struct vfp_single *s)
+{
+	int type = VFP_NUMBER;
+	if (s->exponent == 255) {
+		if (s->significand == 0)
+			type = VFP_INFINITY;
+		else if (s->significand & VFP_SINGLE_SIGNIFICAND_QNAN)
+			type = VFP_QNAN;
+		else
+			type = VFP_SNAN;
+	} else if (s->exponent == 0) {
+		if (s->significand == 0)
+			type |= VFP_ZERO;
+		else
+			type |= VFP_DENORMAL;
+	}
+	return type;
+}
+
+
+u32 vfp_single_normaliseround(ARMul_State* state, int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func);
+
+/*
+ * Double-precision
+ */
+struct vfp_double {
+	s16	exponent;
+	u16	sign;
+	u64	significand;
+};
+
+/*
+ * VFP_REG_ZERO is a special register number for vfp_get_double
+ * which returns (double)0.0.  This is useful for the compare with
+ * zero instructions.
+ */
+#ifdef CONFIG_VFPv3
+#define VFP_REG_ZERO	32
+#else
+#define VFP_REG_ZERO	16
+#endif
+#ifdef __cplusplus
+ extern "C" {
+#endif
+extern u64 vfp_get_double(ARMul_State * state, unsigned int reg);
+extern void vfp_put_double(ARMul_State * state, u64 val, unsigned int reg);
+#ifdef __cplusplus
+ }
+#endif
+#define VFP_DOUBLE_MANTISSA_BITS	(52)
+#define VFP_DOUBLE_EXPONENT_BITS	(11)
+#define VFP_DOUBLE_LOW_BITS		(64 - VFP_DOUBLE_MANTISSA_BITS - 2)
+#define VFP_DOUBLE_LOW_BITS_MASK	((1 << VFP_DOUBLE_LOW_BITS) - 1)
+
+/*
+ * The bit in an unpacked double which indicates that it is a quiet NaN
+ */
+#define VFP_DOUBLE_SIGNIFICAND_QNAN	(1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1 + VFP_DOUBLE_LOW_BITS))
+
+/*
+ * Operations on packed single-precision numbers
+ */
+#define vfp_double_packed_sign(v)	((v) & (1ULL << 63))
+#define vfp_double_packed_negate(v)	((v) ^ (1ULL << 63))
+#define vfp_double_packed_abs(v)	((v) & ~(1ULL << 63))
+#define vfp_double_packed_exponent(v)	(((v) >> VFP_DOUBLE_MANTISSA_BITS) & ((1 << VFP_DOUBLE_EXPONENT_BITS) - 1))
+#define vfp_double_packed_mantissa(v)	((v) & ((1ULL << VFP_DOUBLE_MANTISSA_BITS) - 1))
+
+/*
+ * Unpack a double-precision float.  Note that this returns the magnitude
+ * of the double-precision float mantissa with the 1. if necessary,
+ * aligned to bit 62.
+ */
+static inline void vfp_double_unpack(struct vfp_double *s, s64 val)
+{
+	u64 significand;
+
+	s->sign = vfp_double_packed_sign(val) >> 48;
+	s->exponent = vfp_double_packed_exponent(val);
+
+	significand = (u64) val;
+	significand = (significand << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2;
+	if (s->exponent && s->exponent != 2047)
+		significand |= (1ULL << 62);
+	s->significand = significand;
+}
+
+/*
+ * Re-pack a double-precision float.  This assumes that the float is
+ * already normalised such that the MSB is bit 30, _not_ bit 31.
+ */
+static inline s64 vfp_double_pack(struct vfp_double *s)
+{
+	u64 val;
+	val = ((u64)s->sign << 48) +
+	      ((u64)s->exponent << VFP_DOUBLE_MANTISSA_BITS) +
+	      (s->significand >> VFP_DOUBLE_LOW_BITS);
+	return (s64)val;
+}
+
+static inline int vfp_double_type(struct vfp_double *s)
+{
+	int type = VFP_NUMBER;
+	if (s->exponent == 2047) {
+		if (s->significand == 0)
+			type = VFP_INFINITY;
+		else if (s->significand & VFP_DOUBLE_SIGNIFICAND_QNAN)
+			type = VFP_QNAN;
+		else
+			type = VFP_SNAN;
+	} else if (s->exponent == 0) {
+		if (s->significand == 0)
+			type |= VFP_ZERO;
+		else
+			type |= VFP_DENORMAL;
+	}
+	return type;
+}
+
+u32 vfp_double_normaliseround(ARMul_State* state, int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func);
+
+u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand);
+
+/*
+ * A special flag to tell the normalisation code not to normalise.
+ */
+#define VFP_NAN_FLAG	0x100
+
+/*
+ * A bit pattern used to indicate the initial (unset) value of the
+ * exception mask, in case nothing handles an instruction.  This
+ * doesn't include the NAN flag, which get masked out before
+ * we check for an error.
+ */
+#define VFP_EXCEPTION_ERROR	((u32)-1 & ~VFP_NAN_FLAG)
+
+/*
+ * A flag to tell vfp instruction type.
+ *  OP_SCALAR - this operation always operates in scalar mode
+ *  OP_SD - the instruction exceptionally writes to a single precision result.
+ *  OP_DD - the instruction exceptionally writes to a double precision result.
+ *  OP_SM - the instruction exceptionally reads from a single precision operand.
+ */
+#define OP_SCALAR	(1 << 0)
+#define OP_SD		(1 << 1)
+#define OP_DD		(1 << 1)
+#define OP_SM		(1 << 2)
+
+struct op {
+	u32 (* const fn)(ARMul_State* state, int dd, int dn, int dm, u32 fpscr);
+	u32 flags;
+};
+
+static inline u32 fls(int x)
+{
+	int r = 32;
+
+	if (!x)
+		return 0;
+	if (!(x & 0xffff0000u)) {
+		x <<= 16;
+		r -= 16;
+	}
+	if (!(x & 0xff000000u)) {
+		x <<= 8;
+		r -= 8;
+	}
+	if (!(x & 0xf0000000u)) {
+		x <<= 4;
+		r -= 4;
+	}
+	if (!(x & 0xc0000000u)) {
+		x <<= 2;
+		r -= 2;
+	}
+	if (!(x & 0x80000000u)) {
+		x <<= 1;
+		r -= 1;
+	}
+	return r;
+
+}
+
+#endif
diff --git a/src/core/arm/interpreter/vfp/vfpdouble.cpp b/src/core/arm/interpreter/vfp/vfpdouble.cpp
new file mode 100644
index 000000000..cd5b5afa4
--- /dev/null
+++ b/src/core/arm/interpreter/vfp/vfpdouble.cpp
@@ -0,0 +1,1263 @@
+/*
+    vfp/vfpdouble.c - ARM VFPv3 emulation unit - SoftFloat double instruction
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/*
+ * This code is derived in part from :
+ * - Android kernel
+ * - John R. Housers softfloat library, which
+ * carries the following notice:
+ *
+ * ===========================================================================
+ * This C source file is part of the SoftFloat IEC/IEEE Floating-point
+ * Arithmetic Package, Release 2.
+ *
+ * Written by John R. Hauser.  This work was made possible in part by the
+ * International Computer Science Institute, located at Suite 600, 1947 Center
+ * Street, Berkeley, California 94704.  Funding was partially provided by the
+ * National Science Foundation under grant MIP-9311980.  The original version
+ * of this code was written as part of a project to build a fixed-point vector
+ * processor in collaboration with the University of California at Berkeley,
+ * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+ * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
+ * arithmetic/softfloat.html'.
+ *
+ * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+ * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+ * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+ * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+ * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+ *
+ * Derivative works are acceptable, even for commercial purposes, so long as
+ * (1) they include prominent notice that the work is derivative, and (2) they
+ * include prominent notice akin to these three paragraphs for those parts of
+ * this code that are retained.
+ * ===========================================================================
+ */
+ 
+#include "core/arm/interpreter/vfp/vfp.h"
+#include "core/arm/interpreter/vfp/vfp_helper.h"
+#include "core/arm/interpreter/vfp/asm_vfp.h"
+
+static struct vfp_double vfp_double_default_qnan = {
+	//.exponent	= 2047,
+	//.sign		= 0,
+	//.significand	= VFP_DOUBLE_SIGNIFICAND_QNAN,
+};
+
+static void vfp_double_dump(const char *str, struct vfp_double *d)
+{
+	pr_debug("VFP: %s: sign=%d exponent=%d significand=%016llx\n",
+		 str, d->sign != 0, d->exponent, d->significand);
+}
+
+static void vfp_double_normalise_denormal(struct vfp_double *vd)
+{
+	int bits = 31 - fls(vd->significand >> 32);
+	if (bits == 31)
+		bits = 63 - fls(vd->significand);
+
+	vfp_double_dump("normalise_denormal: in", vd);
+
+	if (bits) {
+		vd->exponent -= bits - 1;
+		vd->significand <<= bits;
+	}
+
+	vfp_double_dump("normalise_denormal: out", vd);
+}
+
+u32 vfp_double_normaliseround(ARMul_State* state, int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func)
+{
+	u64 significand, incr;
+	int exponent, shift, underflow;
+	u32 rmode;
+
+	vfp_double_dump("pack: in", vd);
+
+	/*
+	 * Infinities and NaNs are a special case.
+	 */
+	if (vd->exponent == 2047 && (vd->significand == 0 || exceptions))
+		goto pack;
+
+	/*
+	 * Special-case zero.
+	 */
+	if (vd->significand == 0) {
+		vd->exponent = 0;
+		goto pack;
+	}
+
+	exponent = vd->exponent;
+	significand = vd->significand;
+
+	shift = 32 - fls(significand >> 32);
+	if (shift == 32)
+		shift = 64 - fls(significand);
+	if (shift) {
+		exponent -= shift;
+		significand <<= shift;
+	}
+
+#if 1
+	vd->exponent = exponent;
+	vd->significand = significand;
+	vfp_double_dump("pack: normalised", vd);
+#endif
+
+	/*
+	 * Tiny number?
+	 */
+	underflow = exponent < 0;
+	if (underflow) {
+		significand = vfp_shiftright64jamming(significand, -exponent);
+		exponent = 0;
+#if 1
+		vd->exponent = exponent;
+		vd->significand = significand;
+		vfp_double_dump("pack: tiny number", vd);
+#endif
+		if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1)))
+			underflow = 0;
+	}
+
+	/*
+	 * Select rounding increment.
+	 */
+	incr = 0;
+	rmode = fpscr & FPSCR_RMODE_MASK;
+
+	if (rmode == FPSCR_ROUND_NEAREST) {
+		incr = 1ULL << VFP_DOUBLE_LOW_BITS;
+		if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0)
+			incr -= 1;
+	} else if (rmode == FPSCR_ROUND_TOZERO) {
+		incr = 0;
+	} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0))
+		incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1;
+
+	pr_debug("VFP: rounding increment = 0x%08llx\n", incr);
+
+	/*
+	 * Is our rounding going to overflow?
+	 */
+	if ((significand + incr) < significand) {
+		exponent += 1;
+		significand = (significand >> 1) | (significand & 1);
+		incr >>= 1;
+#if 1
+		vd->exponent = exponent;
+		vd->significand = significand;
+		vfp_double_dump("pack: overflow", vd);
+#endif
+	}
+
+	/*
+	 * If any of the low bits (which will be shifted out of the
+	 * number) are non-zero, the result is inexact.
+	 */
+	if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1))
+		exceptions |= FPSCR_IXC;
+
+	/*
+	 * Do our rounding.
+	 */
+	significand += incr;
+
+	/*
+	 * Infinity?
+	 */
+	if (exponent >= 2046) {
+		exceptions |= FPSCR_OFC | FPSCR_IXC;
+		if (incr == 0) {
+			vd->exponent = 2045;
+			vd->significand = 0x7fffffffffffffffULL;
+		} else {
+			vd->exponent = 2047;		/* infinity */
+			vd->significand = 0;
+		}
+	} else {
+		if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0)
+			exponent = 0;
+		if (exponent || significand > 0x8000000000000000ULL)
+			underflow = 0;
+		if (underflow)
+			exceptions |= FPSCR_UFC;
+		vd->exponent = exponent;
+		vd->significand = significand >> 1;
+	}
+
+ pack:
+	vfp_double_dump("pack: final", vd);
+	{
+		s64 d = vfp_double_pack(vd);
+		pr_debug("VFP: %s: d(d%d)=%016llx exceptions=%08x\n", func,
+			 dd, d, exceptions);
+		vfp_put_double(state, d, dd);
+	}
+	return exceptions;
+}
+
+/*
+ * Propagate the NaN, setting exceptions if it is signalling.
+ * 'n' is always a NaN.  'm' may be a number, NaN or infinity.
+ */
+static u32
+vfp_propagate_nan(struct vfp_double *vdd, struct vfp_double *vdn,
+		  struct vfp_double *vdm, u32 fpscr)
+{
+	struct vfp_double *nan;
+	int tn, tm = 0;
+
+	tn = vfp_double_type(vdn);
+
+	if (vdm)
+		tm = vfp_double_type(vdm);
+
+	if (fpscr & FPSCR_DEFAULT_NAN)
+		/*
+		 * Default NaN mode - always returns a quiet NaN
+		 */
+		nan = &vfp_double_default_qnan;
+	else {
+		/*
+		 * Contemporary mode - select the first signalling
+		 * NAN, or if neither are signalling, the first
+		 * quiet NAN.
+		 */
+		if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
+			nan = vdn;
+		else
+			nan = vdm;
+		/*
+		 * Make the NaN quiet.
+		 */
+		nan->significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
+	}
+
+	*vdd = *nan;
+
+	/*
+	 * If one was a signalling NAN, raise invalid operation.
+	 */
+	return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
+}
+
+/*
+ * Extended operations
+ */
+static u32 vfp_double_fabs(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_put_double(state, vfp_double_packed_abs(vfp_get_double(state, dm)), dd);
+	return 0;
+}
+
+static u32 vfp_double_fcpy(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_put_double(state, vfp_get_double(state, dm), dd);
+	return 0;
+}
+
+static u32 vfp_double_fneg(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_put_double(state, vfp_double_packed_negate(vfp_get_double(state, dm)), dd);
+	return 0;
+}
+
+static u32 vfp_double_fsqrt(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	struct vfp_double vdm, vdd, *vdp;
+	int ret, tm;
+
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+	tm = vfp_double_type(&vdm);
+	if (tm & (VFP_NAN|VFP_INFINITY)) {
+		vdp = &vdd;
+
+		if (tm & VFP_NAN)
+			ret = vfp_propagate_nan(vdp, &vdm, NULL, fpscr);
+		else if (vdm.sign == 0) {
+ sqrt_copy:
+			vdp = &vdm;
+			ret = 0;
+		} else {
+ sqrt_invalid:
+			vdp = &vfp_double_default_qnan;
+			ret = FPSCR_IOC;
+		}
+		vfp_put_double(state, vfp_double_pack(vdp), dd);
+		return ret;
+	}
+
+	/*
+	 * sqrt(+/- 0) == +/- 0
+	 */
+	if (tm & VFP_ZERO)
+		goto sqrt_copy;
+
+	/*
+	 * Normalise a denormalised number
+	 */
+	if (tm & VFP_DENORMAL)
+		vfp_double_normalise_denormal(&vdm);
+
+	/*
+	 * sqrt(<0) = invalid
+	 */
+	if (vdm.sign)
+		goto sqrt_invalid;
+
+	vfp_double_dump("sqrt", &vdm);
+
+	/*
+	 * Estimate the square root.
+	 */
+	vdd.sign = 0;
+	vdd.exponent = ((vdm.exponent - 1023) >> 1) + 1023;
+	vdd.significand = (u64)vfp_estimate_sqrt_significand(vdm.exponent, vdm.significand >> 32) << 31;
+
+	vfp_double_dump("sqrt estimate1", &vdd);
+
+	vdm.significand >>= 1 + (vdm.exponent & 1);
+	vdd.significand += 2 + vfp_estimate_div128to64(vdm.significand, 0, vdd.significand);
+
+	vfp_double_dump("sqrt estimate2", &vdd);
+
+	/*
+	 * And now adjust.
+	 */
+	if ((vdd.significand & VFP_DOUBLE_LOW_BITS_MASK) <= 5) {
+		if (vdd.significand < 2) {
+			vdd.significand = ~0ULL;
+		} else {
+			u64 termh, terml, remh, reml;
+			vdm.significand <<= 2;
+			mul64to128(&termh, &terml, vdd.significand, vdd.significand);
+			sub128(&remh, &reml, vdm.significand, 0, termh, terml);
+			while ((s64)remh < 0) {
+				vdd.significand -= 1;
+				shift64left(&termh, &terml, vdd.significand);
+				terml |= 1;
+				add128(&remh, &reml, remh, reml, termh, terml);
+			}
+			vdd.significand |= (remh | reml) != 0;
+		}
+	}
+	vdd.significand = vfp_shiftright64jamming(vdd.significand, 1);
+
+	return vfp_double_normaliseround(state, dd, &vdd, fpscr, 0, "fsqrt");
+}
+
+/*
+ * Equal	:= ZC
+ * Less than	:= N
+ * Greater than	:= C
+ * Unordered	:= CV
+ */
+static u32 vfp_compare(ARMul_State* state, int dd, int signal_on_qnan, int dm, u32 fpscr)
+{
+	s64 d, m;
+	u32 ret = 0;
+
+	pr_debug("In %s, state=0x%x, fpscr=0x%x\n", __FUNCTION__, state, fpscr);
+	m = vfp_get_double(state, dm);
+	if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) {
+		ret |= FPSCR_C | FPSCR_V;
+		if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1))))
+			/*
+			 * Signalling NaN, or signalling on quiet NaN
+			 */
+			ret |= FPSCR_IOC;
+	}
+
+	d = vfp_get_double(state, dd);
+	if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) {
+		ret |= FPSCR_C | FPSCR_V;
+		if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1))))
+			/*
+			 * Signalling NaN, or signalling on quiet NaN
+			 */
+			ret |= FPSCR_IOC;
+	}
+
+	if (ret == 0) {
+		//printf("In %s, d=%lld, m =%lld\n ", __FUNCTION__, d, m);
+		if (d == m || vfp_double_packed_abs(d | m) == 0) {
+			/*
+			 * equal
+			 */
+			ret |= FPSCR_Z | FPSCR_C;
+			//printf("In %s,1 ret=0x%x\n", __FUNCTION__, ret);
+		} else if (vfp_double_packed_sign(d ^ m)) {
+			/*
+			 * different signs
+			 */
+			if (vfp_double_packed_sign(d))
+				/*
+				 * d is negative, so d < m
+				 */
+				ret |= FPSCR_N;
+			else
+				/*
+				 * d is positive, so d > m
+				 */
+				ret |= FPSCR_C;
+		} else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) {
+			/*
+			 * d < m
+			 */
+			ret |= FPSCR_N;
+		} else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) {
+			/*
+			 * d > m
+			 */
+			ret |= FPSCR_C;
+		}
+	}
+	pr_debug("In %s, state=0x%x, ret=0x%x\n", __FUNCTION__, state, ret);
+
+	return ret;
+}
+
+static u32 vfp_double_fcmp(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_compare(state, dd, 0, dm, fpscr);
+}
+
+static u32 vfp_double_fcmpe(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_compare(state, dd, 1, dm, fpscr);
+}
+
+static u32 vfp_double_fcmpz(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_compare(state, dd, 0, VFP_REG_ZERO, fpscr);
+}
+
+static u32 vfp_double_fcmpez(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_compare(state, dd, 1, VFP_REG_ZERO, fpscr);
+}
+
+static u32 vfp_double_fcvts(ARMul_State* state, int sd, int unused, int dm, u32 fpscr)
+{
+	struct vfp_double vdm;
+	struct vfp_single vsd;
+	int tm;
+	u32 exceptions = 0;
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+
+	tm = vfp_double_type(&vdm);
+
+	/*
+	 * If we have a signalling NaN, signal invalid operation.
+	 */
+	if (tm == VFP_SNAN)
+		exceptions = FPSCR_IOC;
+
+	if (tm & VFP_DENORMAL)
+		vfp_double_normalise_denormal(&vdm);
+
+	vsd.sign = vdm.sign;
+	vsd.significand = vfp_hi64to32jamming(vdm.significand);
+
+	/*
+	 * If we have an infinity or a NaN, the exponent must be 255
+	 */
+	if (tm & (VFP_INFINITY|VFP_NAN)) {
+		vsd.exponent = 255;
+		if (tm == VFP_QNAN)
+			vsd.significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
+		goto pack_nan;
+	} else if (tm & VFP_ZERO)
+		vsd.exponent = 0;
+	else
+		vsd.exponent = vdm.exponent - (1023 - 127);
+
+	return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, "fcvts");
+
+ pack_nan:
+	vfp_put_float(state, vfp_single_pack(&vsd), sd);
+	return exceptions;
+}
+
+static u32 vfp_double_fuito(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	struct vfp_double vdm;
+	u32 m = vfp_get_float(state, dm);
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vdm.sign = 0;
+	vdm.exponent = 1023 + 63 - 1;
+	vdm.significand = (u64)m;
+
+	return vfp_double_normaliseround(state, dd, &vdm, fpscr, 0, "fuito");
+}
+
+static u32 vfp_double_fsito(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	struct vfp_double vdm;
+	u32 m = vfp_get_float(state, dm);
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vdm.sign = (m & 0x80000000) >> 16;
+	vdm.exponent = 1023 + 63 - 1;
+	vdm.significand = vdm.sign ? -m : m;
+
+	return vfp_double_normaliseround(state, dd, &vdm, fpscr, 0, "fsito");
+}
+
+static u32 vfp_double_ftoui(ARMul_State* state, int sd, int unused, int dm, u32 fpscr)
+{
+	struct vfp_double vdm;
+	u32 d, exceptions = 0;
+	int rmode = fpscr & FPSCR_RMODE_MASK;
+	int tm;
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+
+	/*
+	 * Do we have a denormalised number?
+	 */
+	tm = vfp_double_type(&vdm);
+	if (tm & VFP_DENORMAL)
+		exceptions |= FPSCR_IDC;
+
+	if (tm & VFP_NAN)
+		vdm.sign = 0;
+
+	if (vdm.exponent >= 1023 + 32) {
+		d = vdm.sign ? 0 : 0xffffffff;
+		exceptions = FPSCR_IOC;
+	} else if (vdm.exponent >= 1023 - 1) {
+		int shift = 1023 + 63 - vdm.exponent;
+		u64 rem, incr = 0;
+
+		/*
+		 * 2^0 <= m < 2^32-2^8
+		 */
+		d = (vdm.significand << 1) >> shift;
+		rem = vdm.significand << (65 - shift);
+
+		if (rmode == FPSCR_ROUND_NEAREST) {
+			incr = 0x8000000000000000ULL;
+			if ((d & 1) == 0)
+				incr -= 1;
+		} else if (rmode == FPSCR_ROUND_TOZERO) {
+			incr = 0;
+		} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) {
+			incr = ~0ULL;
+		}
+
+		if ((rem + incr) < rem) {
+			if (d < 0xffffffff)
+				d += 1;
+			else
+				exceptions |= FPSCR_IOC;
+		}
+
+		if (d && vdm.sign) {
+			d = 0;
+			exceptions |= FPSCR_IOC;
+		} else if (rem)
+			exceptions |= FPSCR_IXC;
+	} else {
+		d = 0;
+		if (vdm.exponent | vdm.significand) {
+			exceptions |= FPSCR_IXC;
+			if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0)
+				d = 1;
+			else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) {
+				d = 0;
+				exceptions |= FPSCR_IOC;
+			}
+		}
+	}
+
+	pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
+
+	vfp_put_float(state, d, sd);
+
+	return exceptions;
+}
+
+static u32 vfp_double_ftouiz(ARMul_State* state, int sd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_double_ftoui(state, sd, unused, dm, FPSCR_ROUND_TOZERO);
+}
+
+static u32 vfp_double_ftosi(ARMul_State* state, int sd, int unused, int dm, u32 fpscr)
+{
+	struct vfp_double vdm;
+	u32 d, exceptions = 0;
+	int rmode = fpscr & FPSCR_RMODE_MASK;
+	int tm;
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+	vfp_double_dump("VDM", &vdm);
+
+	/*
+	 * Do we have denormalised number?
+	 */
+	tm = vfp_double_type(&vdm);
+	if (tm & VFP_DENORMAL)
+		exceptions |= FPSCR_IDC;
+
+	if (tm & VFP_NAN) {
+		d = 0;
+		exceptions |= FPSCR_IOC;
+	} else if (vdm.exponent >= 1023 + 32) {
+		d = 0x7fffffff;
+		if (vdm.sign)
+			d = ~d;
+		exceptions |= FPSCR_IOC;
+	} else if (vdm.exponent >= 1023 - 1) {
+		int shift = 1023 + 63 - vdm.exponent;	/* 58 */
+		u64 rem, incr = 0;
+
+		d = (vdm.significand << 1) >> shift;
+		rem = vdm.significand << (65 - shift);
+
+		if (rmode == FPSCR_ROUND_NEAREST) {
+			incr = 0x8000000000000000ULL;
+			if ((d & 1) == 0)
+				incr -= 1;
+		} else if (rmode == FPSCR_ROUND_TOZERO) {
+			incr = 0;
+		} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) {
+			incr = ~0ULL;
+		}
+
+		if ((rem + incr) < rem && d < 0xffffffff)
+			d += 1;
+		if (d > 0x7fffffff + (vdm.sign != 0)) {
+			d = 0x7fffffff + (vdm.sign != 0);
+			exceptions |= FPSCR_IOC;
+		} else if (rem)
+			exceptions |= FPSCR_IXC;
+
+		if (vdm.sign)
+			d = -d;
+	} else {
+		d = 0;
+		if (vdm.exponent | vdm.significand) {
+			exceptions |= FPSCR_IXC;
+			if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0)
+				d = 1;
+			else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign)
+				d = -1;
+		}
+	}
+
+	pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
+
+	vfp_put_float(state, (s32)d, sd);
+
+	return exceptions;
+}
+
+static u32 vfp_double_ftosiz(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_double_ftosi(state, dd, unused, dm, FPSCR_ROUND_TOZERO);
+}
+
+static struct op fops_ext[] = {
+    { vfp_double_fcpy,   0 },                 //0x00000000 - FEXT_FCPY
+    { vfp_double_fabs,   0 },                 //0x00000001 - FEXT_FABS
+    { vfp_double_fneg,   0 },                 //0x00000002 - FEXT_FNEG
+    { vfp_double_fsqrt,  0 },                 //0x00000003 - FEXT_FSQRT
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { vfp_double_fcmp,   OP_SCALAR },         //0x00000008 - FEXT_FCMP
+    { vfp_double_fcmpe,  OP_SCALAR },         //0x00000009 - FEXT_FCMPE
+    { vfp_double_fcmpz,  OP_SCALAR },         //0x0000000A - FEXT_FCMPZ
+    { vfp_double_fcmpez, OP_SCALAR },         //0x0000000B - FEXT_FCMPEZ
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { vfp_double_fcvts,  OP_SCALAR|OP_DD },   //0x0000000F - FEXT_FCVT
+    { vfp_double_fuito,  OP_SCALAR },         //0x00000010 - FEXT_FUITO
+    { vfp_double_fsito,  OP_SCALAR },         //0x00000011 - FEXT_FSITO
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { vfp_double_ftoui,  OP_SCALAR },         //0x00000018 - FEXT_FTOUI
+    { vfp_double_ftouiz, OP_SCALAR },         //0x00000019 - FEXT_FTOUIZ
+    { vfp_double_ftosi,  OP_SCALAR },         //0x0000001A - FEXT_FTOSI
+    { vfp_double_ftosiz, OP_SCALAR },         //0x0000001B - FEXT_FTOSIZ
+};
+
+
+
+
+static u32
+vfp_double_fadd_nonnumber(struct vfp_double *vdd, struct vfp_double *vdn,
+			  struct vfp_double *vdm, u32 fpscr)
+{
+	struct vfp_double *vdp;
+	u32 exceptions = 0;
+	int tn, tm;
+
+	tn = vfp_double_type(vdn);
+	tm = vfp_double_type(vdm);
+
+	if (tn & tm & VFP_INFINITY) {
+		/*
+		 * Two infinities.  Are they different signs?
+		 */
+		if (vdn->sign ^ vdm->sign) {
+			/*
+			 * different signs -> invalid
+			 */
+			exceptions = FPSCR_IOC;
+			vdp = &vfp_double_default_qnan;
+		} else {
+			/*
+			 * same signs -> valid
+			 */
+			vdp = vdn;
+		}
+	} else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
+		/*
+		 * One infinity and one number -> infinity
+		 */
+		vdp = vdn;
+	} else {
+		/*
+		 * 'n' is a NaN of some type
+		 */
+		return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
+	}
+	*vdd = *vdp;
+	return exceptions;
+}
+
+static u32
+vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn,
+	       struct vfp_double *vdm, u32 fpscr)
+{
+	u32 exp_diff;
+	u64 m_sig;
+
+	if (vdn->significand & (1ULL << 63) ||
+	    vdm->significand & (1ULL << 63)) {
+		pr_info("VFP: bad FP values\n");
+		vfp_double_dump("VDN", vdn);
+		vfp_double_dump("VDM", vdm);
+	}
+
+	/*
+	 * Ensure that 'n' is the largest magnitude number.  Note that
+	 * if 'n' and 'm' have equal exponents, we do not swap them.
+	 * This ensures that NaN propagation works correctly.
+	 */
+	if (vdn->exponent < vdm->exponent) {
+		struct vfp_double *t = vdn;
+		vdn = vdm;
+		vdm = t;
+	}
+
+	/*
+	 * Is 'n' an infinity or a NaN?  Note that 'm' may be a number,
+	 * infinity or a NaN here.
+	 */
+	if (vdn->exponent == 2047)
+		return vfp_double_fadd_nonnumber(vdd, vdn, vdm, fpscr);
+
+	/*
+	 * We have two proper numbers, where 'vdn' is the larger magnitude.
+	 *
+	 * Copy 'n' to 'd' before doing the arithmetic.
+	 */
+	*vdd = *vdn;
+
+	/*
+	 * Align 'm' with the result.
+	 */
+	exp_diff = vdn->exponent - vdm->exponent;
+	m_sig = vfp_shiftright64jamming(vdm->significand, exp_diff);
+
+	/*
+	 * If the signs are different, we are really subtracting.
+	 */
+	if (vdn->sign ^ vdm->sign) {
+		m_sig = vdn->significand - m_sig;
+		if ((s64)m_sig < 0) {
+			vdd->sign = vfp_sign_negate(vdd->sign);
+			m_sig = -m_sig;
+		} else if (m_sig == 0) {
+			vdd->sign = (fpscr & FPSCR_RMODE_MASK) ==
+				      FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
+		}
+	} else {
+		m_sig += vdn->significand;
+	}
+	vdd->significand = m_sig;
+
+	return 0;
+}
+
+static u32
+vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn,
+		    struct vfp_double *vdm, u32 fpscr)
+{
+	vfp_double_dump("VDN", vdn);
+	vfp_double_dump("VDM", vdm);
+
+	/*
+	 * Ensure that 'n' is the largest magnitude number.  Note that
+	 * if 'n' and 'm' have equal exponents, we do not swap them.
+	 * This ensures that NaN propagation works correctly.
+	 */
+	if (vdn->exponent < vdm->exponent) {
+		struct vfp_double *t = vdn;
+		vdn = vdm;
+		vdm = t;
+		pr_debug("VFP: swapping M <-> N\n");
+	}
+
+	vdd->sign = vdn->sign ^ vdm->sign;
+
+	/*
+	 * If 'n' is an infinity or NaN, handle it.  'm' may be anything.
+	 */
+	if (vdn->exponent == 2047) {
+		if (vdn->significand || (vdm->exponent == 2047 && vdm->significand))
+			return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
+		if ((vdm->exponent | vdm->significand) == 0) {
+			*vdd = vfp_double_default_qnan;
+			return FPSCR_IOC;
+		}
+		vdd->exponent = vdn->exponent;
+		vdd->significand = 0;
+		return 0;
+	}
+
+	/*
+	 * If 'm' is zero, the result is always zero.  In this case,
+	 * 'n' may be zero or a number, but it doesn't matter which.
+	 */
+	if ((vdm->exponent | vdm->significand) == 0) {
+		vdd->exponent = 0;
+		vdd->significand = 0;
+		return 0;
+	}
+
+	/*
+	 * We add 2 to the destination exponent for the same reason
+	 * as the addition case - though this time we have +1 from
+	 * each input operand.
+	 */
+	vdd->exponent = vdn->exponent + vdm->exponent - 1023 + 2;
+	vdd->significand = vfp_hi64multiply64(vdn->significand, vdm->significand);
+
+	vfp_double_dump("VDD", vdd);
+	return 0;
+}
+
+#define NEG_MULTIPLY	(1 << 0)
+#define NEG_SUBTRACT	(1 << 1)
+
+static u32
+vfp_double_multiply_accumulate(ARMul_State* state, int dd, int dn, int dm, u32 fpscr, u32 negate, char *func)
+{
+	struct vfp_double vdd, vdp, vdn, vdm;
+	u32 exceptions;
+
+	vfp_double_unpack(&vdn, vfp_get_double(state, dn));
+	if (vdn.exponent == 0 && vdn.significand)
+		vfp_double_normalise_denormal(&vdn);
+
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+	if (vdm.exponent == 0 && vdm.significand)
+		vfp_double_normalise_denormal(&vdm);
+
+	exceptions = vfp_double_multiply(&vdp, &vdn, &vdm, fpscr);
+	if (negate & NEG_MULTIPLY)
+		vdp.sign = vfp_sign_negate(vdp.sign);
+
+	vfp_double_unpack(&vdn, vfp_get_double(state, dd));
+	if (negate & NEG_SUBTRACT)
+		vdn.sign = vfp_sign_negate(vdn.sign);
+
+	exceptions |= vfp_double_add(&vdd, &vdn, &vdp, fpscr);
+
+	return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, func);
+}
+
+/*
+ * Standard operations
+ */
+
+/*
+ * sd = sd + (sn * sm)
+ */
+static u32 vfp_double_fmac(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_double_multiply_accumulate(state, dd, dn, dm, fpscr, 0, "fmac");
+}
+
+/*
+ * sd = sd - (sn * sm)
+ */
+static u32 vfp_double_fnmac(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_double_multiply_accumulate(state, dd, dn, dm, fpscr, NEG_MULTIPLY, "fnmac");
+}
+
+/*
+ * sd = -sd + (sn * sm)
+ */
+static u32 vfp_double_fmsc(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_double_multiply_accumulate(state, dd, dn, dm, fpscr, NEG_SUBTRACT, "fmsc");
+}
+
+/*
+ * sd = -sd - (sn * sm)
+ */
+static u32 vfp_double_fnmsc(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_double_multiply_accumulate(state, dd, dn, dm, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
+}
+
+/*
+ * sd = sn * sm
+ */
+static u32 vfp_double_fmul(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+	struct vfp_double vdd, vdn, vdm;
+	u32 exceptions;
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_double_unpack(&vdn, vfp_get_double(state, dn));
+	if (vdn.exponent == 0 && vdn.significand)
+		vfp_double_normalise_denormal(&vdn);
+
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+	if (vdm.exponent == 0 && vdm.significand)
+		vfp_double_normalise_denormal(&vdm);
+
+	exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr);
+	return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, "fmul");
+}
+
+/*
+ * sd = -(sn * sm)
+ */
+static u32 vfp_double_fnmul(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+	struct vfp_double vdd, vdn, vdm;
+	u32 exceptions;
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_double_unpack(&vdn, vfp_get_double(state, dn));
+	if (vdn.exponent == 0 && vdn.significand)
+		vfp_double_normalise_denormal(&vdn);
+
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+	if (vdm.exponent == 0 && vdm.significand)
+		vfp_double_normalise_denormal(&vdm);
+
+	exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr);
+	vdd.sign = vfp_sign_negate(vdd.sign);
+
+	return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, "fnmul");
+}
+
+/*
+ * sd = sn + sm
+ */
+static u32 vfp_double_fadd(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+	struct vfp_double vdd, vdn, vdm;
+	u32 exceptions;
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_double_unpack(&vdn, vfp_get_double(state, dn));
+	if (vdn.exponent == 0 && vdn.significand)
+		vfp_double_normalise_denormal(&vdn);
+
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+	if (vdm.exponent == 0 && vdm.significand)
+		vfp_double_normalise_denormal(&vdm);
+
+	exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr);
+
+	return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, "fadd");
+}
+
+/*
+ * sd = sn - sm
+ */
+static u32 vfp_double_fsub(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+	struct vfp_double vdd, vdn, vdm;
+	u32 exceptions;
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_double_unpack(&vdn, vfp_get_double(state, dn));
+	if (vdn.exponent == 0 && vdn.significand)
+		vfp_double_normalise_denormal(&vdn);
+
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+	if (vdm.exponent == 0 && vdm.significand)
+		vfp_double_normalise_denormal(&vdm);
+
+	/*
+	 * Subtraction is like addition, but with a negated operand.
+	 */
+	vdm.sign = vfp_sign_negate(vdm.sign);
+
+	exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr);
+
+	return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, "fsub");
+}
+
+/*
+ * sd = sn / sm
+ */
+static u32 vfp_double_fdiv(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+	struct vfp_double vdd, vdn, vdm;
+	u32 exceptions = 0;
+	int tm, tn;
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_double_unpack(&vdn, vfp_get_double(state, dn));
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+
+	vdd.sign = vdn.sign ^ vdm.sign;
+
+	tn = vfp_double_type(&vdn);
+	tm = vfp_double_type(&vdm);
+
+	/*
+	 * Is n a NAN?
+	 */
+	if (tn & VFP_NAN)
+		goto vdn_nan;
+
+	/*
+	 * Is m a NAN?
+	 */
+	if (tm & VFP_NAN)
+		goto vdm_nan;
+
+	/*
+	 * If n and m are infinity, the result is invalid
+	 * If n and m are zero, the result is invalid
+	 */
+	if (tm & tn & (VFP_INFINITY|VFP_ZERO))
+		goto invalid;
+
+	/*
+	 * If n is infinity, the result is infinity
+	 */
+	if (tn & VFP_INFINITY)
+		goto infinity;
+
+	/*
+	 * If m is zero, raise div0 exceptions
+	 */
+	if (tm & VFP_ZERO)
+		goto divzero;
+
+	/*
+	 * If m is infinity, or n is zero, the result is zero
+	 */
+	if (tm & VFP_INFINITY || tn & VFP_ZERO)
+		goto zero;
+
+	if (tn & VFP_DENORMAL)
+		vfp_double_normalise_denormal(&vdn);
+	if (tm & VFP_DENORMAL)
+		vfp_double_normalise_denormal(&vdm);
+
+	/*
+	 * Ok, we have two numbers, we can perform division.
+	 */	
+	vdd.exponent = vdn.exponent - vdm.exponent + 1023 - 1;
+	vdm.significand <<= 1;
+	if (vdm.significand <= (2 * vdn.significand)) {
+		vdn.significand >>= 1;
+		vdd.exponent++;
+	}
+	vdd.significand = vfp_estimate_div128to64(vdn.significand, 0, vdm.significand);
+	if ((vdd.significand & 0x1ff) <= 2) {
+		u64 termh, terml, remh, reml;
+		mul64to128(&termh, &terml, vdm.significand, vdd.significand);
+		sub128(&remh, &reml, vdn.significand, 0, termh, terml);
+		while ((s64)remh < 0) {
+			vdd.significand -= 1;
+			add128(&remh, &reml, remh, reml, 0, vdm.significand);
+		}
+		vdd.significand |= (reml != 0);
+	}
+	return vfp_double_normaliseround(state, dd, &vdd, fpscr, 0, "fdiv");
+
+ vdn_nan:
+	exceptions = vfp_propagate_nan(&vdd, &vdn, &vdm, fpscr);
+ pack:
+	vfp_put_double(state, vfp_double_pack(&vdd), dd);
+	return exceptions;
+
+ vdm_nan:
+	exceptions = vfp_propagate_nan(&vdd, &vdm, &vdn, fpscr);
+	goto pack;
+
+ zero:
+	vdd.exponent = 0;
+	vdd.significand = 0;
+	goto pack;
+
+ divzero:
+	exceptions = FPSCR_DZC;
+ infinity:
+	vdd.exponent = 2047;
+	vdd.significand = 0;
+	goto pack;
+
+ invalid:
+	vfp_put_double(state, vfp_double_pack(&vfp_double_default_qnan), dd);
+	return FPSCR_IOC;
+}
+
+static struct op fops[] = {
+	{ vfp_double_fmac,  0 },
+	{ vfp_double_fmsc,  0 },
+	{ vfp_double_fmul,  0 },
+	{ vfp_double_fadd,  0 },
+	{ vfp_double_fnmac, 0 },
+	{ vfp_double_fnmsc, 0 },
+	{ vfp_double_fnmul, 0 },
+	{ vfp_double_fsub,  0 },
+	{ vfp_double_fdiv,  0 },
+};
+
+#define FREG_BANK(x)	((x) & 0x0c)
+#define FREG_IDX(x)	((x) & 3)
+
+u32 vfp_double_cpdo(ARMul_State* state, u32 inst, u32 fpscr)
+{
+	u32 op = inst & FOP_MASK;
+	u32 exceptions = 0;
+	unsigned int dest;
+	unsigned int dn = vfp_get_dn(inst);
+	unsigned int dm;
+	unsigned int vecitr, veclen, vecstride;
+	struct op *fop;
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vecstride = (1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK));
+
+	fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)];
+
+	/*
+	 * fcvtds takes an sN register number as destination, not dN.
+	 * It also always operates on scalars.
+	 */
+	if (fop->flags & OP_SD)
+		dest = vfp_get_sd(inst);
+	else
+		dest = vfp_get_dd(inst);
+
+	/*
+	 * f[us]ito takes a sN operand, not a dN operand.
+	 */
+	if (fop->flags & OP_SM)
+		dm = vfp_get_sm(inst);
+	else
+		dm = vfp_get_dm(inst);
+
+	/*
+	 * If destination bank is zero, vector length is always '1'.
+	 * ARM DDI0100F C5.1.3, C5.3.2.
+	 */
+	if ((fop->flags & OP_SCALAR) || (FREG_BANK(dest) == 0))
+		veclen = 0;
+	else
+		veclen = fpscr & FPSCR_LENGTH_MASK;
+
+	pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
+		 (veclen >> FPSCR_LENGTH_BIT) + 1);
+
+	if (!fop->fn) {
+		printf("VFP: could not find double op %d\n", FEXT_TO_IDX(inst));
+		goto invalid;
+	}
+
+	for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
+		u32 except;
+		char type;
+
+		type = fop->flags & OP_SD ? 's' : 'd';
+		if (op == FOP_EXT)
+			pr_debug("VFP: itr%d (%c%u) = op[%u] (d%u)\n",
+				 vecitr >> FPSCR_LENGTH_BIT,
+				 type, dest, dn, dm);
+		else
+			pr_debug("VFP: itr%d (%c%u) = (d%u) op[%u] (d%u)\n",
+				 vecitr >> FPSCR_LENGTH_BIT,
+				 type, dest, dn, FOP_TO_IDX(op), dm);
+
+		except = fop->fn(state, dest, dn, dm, fpscr);
+		pr_debug("VFP: itr%d: exceptions=%08x\n",
+			 vecitr >> FPSCR_LENGTH_BIT, except);
+
+		exceptions |= except;
+
+		/*
+		 * CHECK: It appears to be undefined whether we stop when
+		 * we encounter an exception.  We continue.
+		 */
+		dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 3);
+		dn = FREG_BANK(dn) + ((FREG_IDX(dn) + vecstride) & 3);
+		if (FREG_BANK(dm) != 0)
+			dm = FREG_BANK(dm) + ((FREG_IDX(dm) + vecstride) & 3);
+	}
+	return exceptions;
+
+ invalid:
+	return ~0;
+}
diff --git a/src/core/arm/interpreter/vfp/vfpinstr.cpp b/src/core/arm/interpreter/vfp/vfpinstr.cpp
new file mode 100644
index 000000000..a57047911
--- /dev/null
+++ b/src/core/arm/interpreter/vfp/vfpinstr.cpp
@@ -0,0 +1,5123 @@
+/*
+    vfp/vfpinstr.c - ARM VFPv3 emulation unit - Individual instructions data
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/* Notice: this file should not be compiled as is, and is meant to be
+   included in other files only. */
+
+/* ----------------------------------------------------------------------- */
+/* CDP instructions */
+/* cond 1110 opc1 CRn- CRd- copr op20 CRm- CDP */
+
+/* ----------------------------------------------------------------------- */
+/* VMLA */
+/* cond 1110 0D00 Vn-- Vd-- 101X N0M0 Vm-- */
+#define vfpinstr 	vmla
+#define vfpinstr_inst 	vmla_inst
+#define VFPLABEL_INST 	VMLA_INST
+#ifdef VFP_DECODE
+{"vmla",        4,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x0,    9, 11, 0x5,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmla",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmla_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VMLA :\n");
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0 && (OPC_2 & 0x2) == 0)
+{
+	DBG("VMLA :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int m;
+	int n;
+	int d ;
+	int add = (BIT(6) == 0);
+	int s = BIT(8) == 0;
+	Value *mm;
+	Value *nn;
+	Value *tmp;
+	if(s){
+		m = BIT(5) | BITS(0,3) << 1;
+		n = BIT(7) | BITS(16,19) << 1;
+		d = BIT(22) | BITS(12,15) << 1;
+		mm = FR32(m);
+		nn = FR32(n);
+		tmp = FPMUL(nn,mm);
+		if(!add)
+			tmp = FPNEG32(tmp);
+		mm = FR32(d);
+		tmp = FPADD(mm,tmp);
+		//LETS(d,tmp);
+		LETFPS(d,tmp);
+	}else {
+		m = BITS(0,3) | BIT(5) << 4;
+		n = BITS(16,19) | BIT(7) << 4;
+		d = BIT(22) << 4 | BITS(12,15);
+		//mm = SITOFP(32,RSPR(m));
+		//LETS(d,tmp);
+		mm = ZEXT64(IBITCAST32(FR32(2 * m)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
+		tmp = OR(SHL(nn,CONST64(32)),mm);
+		mm = FPBITCAST64(tmp);
+		tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
+		nn = OR(SHL(nn,CONST64(32)),tmp);
+		nn = FPBITCAST64(nn);
+		tmp = FPMUL(nn,mm);
+		if(!add)
+			tmp = FPNEG64(tmp);
+		mm = ZEXT64(IBITCAST32(FR32(2 * d)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * d  + 1)));
+		mm = OR(SHL(nn,CONST64(32)),mm);
+		mm = FPBITCAST64(mm);
+		tmp = FPADD(mm,tmp);
+		mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));
+		nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff)));	
+		LETFPS(2*d ,FPBITCAST32(nn));
+		LETFPS(d*2 + 1 , FPBITCAST32(mm));
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VNMLS */
+/* cond 1110 0D00 Vn-- Vd-- 101X N1M0 Vm-- */
+#define vfpinstr 	vmls
+#define vfpinstr_inst 	vmls_inst
+#define VFPLABEL_INST 	VMLS_INST
+#ifdef VFP_DECODE
+{"vmls",        7,      ARMVFP2,    28 , 31, 0xF, 25, 27, 0x1,   23, 23, 1,  11, 11, 0,  8, 9, 0x2,  6, 6, 1,  4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmls",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmls_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VMLS :\n");
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0 && (OPC_2 & 0x2) == 2)
+{
+	DBG("VMLS :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s VMLS instruction is executed out of here.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int m;
+	int n;
+	int d ;
+	int add = (BIT(6) == 0);
+	int s = BIT(8) == 0;
+	Value *mm;
+	Value *nn;
+	Value *tmp;
+	if(s){
+		m = BIT(5) | BITS(0,3) << 1;
+		n = BIT(7) | BITS(16,19) << 1;
+		d = BIT(22) | BITS(12,15) << 1;
+		mm = FR32(m);
+		nn = FR32(n);
+		tmp = FPMUL(nn,mm);
+		if(!add)
+			tmp = FPNEG32(tmp);
+		mm = FR32(d);
+		tmp = FPADD(mm,tmp);
+		//LETS(d,tmp);
+		LETFPS(d,tmp);
+	}else {
+		m = BITS(0,3) | BIT(5) << 4;
+		n = BITS(16,19) | BIT(7) << 4;
+		d = BIT(22) << 4 | BITS(12,15);
+		//mm = SITOFP(32,RSPR(m));
+		//LETS(d,tmp);
+		mm = ZEXT64(IBITCAST32(FR32(2 * m)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
+		tmp = OR(SHL(nn,CONST64(32)),mm);
+		mm = FPBITCAST64(tmp);
+		tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
+		nn = OR(SHL(nn,CONST64(32)),tmp);
+		nn = FPBITCAST64(nn);
+		tmp = FPMUL(nn,mm);
+		if(!add)
+			tmp = FPNEG64(tmp);
+		mm = ZEXT64(IBITCAST32(FR32(2 * d)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * d  + 1)));
+		mm = OR(SHL(nn,CONST64(32)),mm);
+		mm = FPBITCAST64(mm);
+		tmp = FPADD(mm,tmp);
+		mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));
+		nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff)));	
+		LETFPS(2*d ,FPBITCAST32(nn));
+		LETFPS(d*2 + 1 , FPBITCAST32(mm));
+	}	
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VNMLA */
+/* cond 1110 0D01 Vn-- Vd-- 101X N1M0 Vm-- */
+#define vfpinstr 	vnmla
+#define vfpinstr_inst 	vnmla_inst
+#define VFPLABEL_INST 	VNMLA_INST
+#ifdef VFP_DECODE
+//{"vnmla",       5,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x0,    9, 11, 0x5,     6, 6, 1,        4, 4, 0},
+{"vnmla",       4,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x1,    9, 11, 0x5,     4, 4, 0},
+{"vnmla",       5,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x2,    9, 11, 0x5,     6, 6, 1,  4, 4, 0},
+//{"vnmla",       5,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x2,    9, 11, 0x5,     6, 6, 1,        4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vnmla",       0,      ARMVFP2, 0},
+{"vnmla",       0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vnmla_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VNMLA :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 1 && (OPC_2 & 0x2) == 2)
+{
+	DBG("VNMLA :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s VNMLA instruction is executed out of here.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int m;
+	int n;
+	int d ;
+	int add = (BIT(6) == 0);
+	int s = BIT(8) == 0;
+	Value *mm;
+	Value *nn;
+	Value *tmp;
+	if(s){
+		m = BIT(5) | BITS(0,3) << 1;
+		n = BIT(7) | BITS(16,19) << 1;
+		d = BIT(22) | BITS(12,15) << 1;
+		mm = FR32(m);
+		nn = FR32(n);
+		tmp = FPMUL(nn,mm);
+		if(!add)
+			tmp = FPNEG32(tmp);
+		mm = FR32(d);
+		tmp = FPADD(FPNEG32(mm),tmp);
+		//LETS(d,tmp);
+		LETFPS(d,tmp);
+	}else {
+		m = BITS(0,3) | BIT(5) << 4;
+		n = BITS(16,19) | BIT(7) << 4;
+		d = BIT(22) << 4 | BITS(12,15);
+		//mm = SITOFP(32,RSPR(m));
+		//LETS(d,tmp);
+		mm = ZEXT64(IBITCAST32(FR32(2 * m)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
+		tmp = OR(SHL(nn,CONST64(32)),mm);
+		mm = FPBITCAST64(tmp);
+		tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
+		nn = OR(SHL(nn,CONST64(32)),tmp);
+		nn = FPBITCAST64(nn);
+		tmp = FPMUL(nn,mm);
+		if(!add)
+			tmp = FPNEG64(tmp);
+		mm = ZEXT64(IBITCAST32(FR32(2 * d)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * d  + 1)));
+		mm = OR(SHL(nn,CONST64(32)),mm);
+		mm = FPBITCAST64(mm);
+		tmp = FPADD(FPNEG64(mm),tmp);
+		mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));	
+		nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff)));
+		LETFPS(2*d ,FPBITCAST32(nn));
+		LETFPS(d*2 + 1 , FPBITCAST32(mm));
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VNMLS */
+/* cond 1110 0D01 Vn-- Vd-- 101X N0M0 Vm-- */
+#define vfpinstr 	vnmls
+#define vfpinstr_inst 	vnmls_inst
+#define VFPLABEL_INST 	VNMLS_INST
+#ifdef VFP_DECODE
+{"vnmls",       5,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x1,    9, 11, 0x5,     6, 6, 0,        4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vnmls",       0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vnmls_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VNMLS :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 1 && (OPC_2 & 0x2) == 0)
+{
+	DBG("VNMLS :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int m;
+	int n;
+	int d ;
+	int add = (BIT(6) == 0);
+	int s = BIT(8) == 0;
+	Value *mm;
+	Value *nn;
+	Value *tmp;
+	if(s){
+		m = BIT(5) | BITS(0,3) << 1;
+		n = BIT(7) | BITS(16,19) << 1;
+		d = BIT(22) | BITS(12,15) << 1;
+		mm = FR32(m);
+		nn = FR32(n);
+		tmp = FPMUL(nn,mm);
+		if(!add)
+			tmp = FPNEG32(tmp);
+		mm = FR32(d);
+		tmp = FPADD(FPNEG32(mm),tmp);
+		//LETS(d,tmp);
+		LETFPS(d,tmp);
+	}else {
+		m = BITS(0,3) | BIT(5) << 4;
+		n = BITS(16,19) | BIT(7) << 4;
+		d = BIT(22) << 4 | BITS(12,15);
+		//mm = SITOFP(32,RSPR(m));
+		//LETS(d,tmp);
+		mm = ZEXT64(IBITCAST32(FR32(2 * m)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
+		tmp = OR(SHL(nn,CONST64(32)),mm);
+		mm = FPBITCAST64(tmp);
+		tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
+		nn = OR(SHL(nn,CONST64(32)),tmp);
+		nn = FPBITCAST64(nn);
+		tmp = FPMUL(nn,mm);
+		if(!add)
+			tmp = FPNEG64(tmp);
+		mm = ZEXT64(IBITCAST32(FR32(2 * d)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * d  + 1)));
+		mm = OR(SHL(nn,CONST64(32)),mm);
+		mm = FPBITCAST64(mm);
+		tmp = FPADD(FPNEG64(mm),tmp);
+		mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));
+		nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff)));	
+		LETFPS(2*d ,FPBITCAST32(nn));
+		LETFPS(d*2 + 1 , FPBITCAST32(mm));
+	}	
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VNMUL */
+/* cond 1110 0D10 Vn-- Vd-- 101X N0M0 Vm-- */
+#define vfpinstr 	vnmul
+#define vfpinstr_inst 	vnmul_inst
+#define VFPLABEL_INST 	VNMUL_INST
+#ifdef VFP_DECODE
+{"vnmul",       5,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x2,    9, 11, 0x5,     6, 6, 1,        4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vnmul",       0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vnmul_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VNMUL :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 2 && (OPC_2 & 0x2) == 2)
+{
+	DBG("VNMUL :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}		
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int m;
+	int n;
+	int d ;
+	int add = (BIT(6) == 0);
+	int s = BIT(8) == 0;
+	Value *mm;
+	Value *nn;
+	Value *tmp;
+	if(s){
+		m = BIT(5) | BITS(0,3) << 1;
+		n = BIT(7) | BITS(16,19) << 1;
+		d = BIT(22) | BITS(12,15) << 1;
+		mm = FR32(m);
+		nn = FR32(n);
+		tmp = FPMUL(nn,mm);
+		//LETS(d,tmp);
+		LETFPS(d,FPNEG32(tmp));
+	}else {
+		m = BITS(0,3) | BIT(5) << 4;
+		n = BITS(16,19) | BIT(7) << 4;
+		d = BIT(22) << 4 | BITS(12,15);
+		//mm = SITOFP(32,RSPR(m));
+		//LETS(d,tmp);
+		mm = ZEXT64(IBITCAST32(FR32(2 * m)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
+		tmp = OR(SHL(nn,CONST64(32)),mm);
+		mm = FPBITCAST64(tmp);
+		tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
+		nn = OR(SHL(nn,CONST64(32)),tmp);
+		nn = FPBITCAST64(nn);
+		tmp = FPMUL(nn,mm);
+		tmp = FPNEG64(tmp);
+		mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));
+		nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff)));	
+		LETFPS(2*d ,FPBITCAST32(nn));
+		LETFPS(d*2 + 1 , FPBITCAST32(mm));
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VMUL */
+/* cond 1110 0D10 Vn-- Vd-- 101X N0M0 Vm-- */
+#define vfpinstr 	vmul
+#define vfpinstr_inst 	vmul_inst
+#define VFPLABEL_INST 	VMUL_INST
+#ifdef VFP_DECODE
+{"vmul",        5,      ARMVFP2,        23, 27, 0x1c,  20, 21, 0x2,     9, 11, 0x5,      6, 6, 0,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmul",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmul_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VMUL :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 2 && (OPC_2 & 0x2) == 0)
+{
+	DBG("VMUL :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//printf("\n\n\t\tin %s instruction is executed out.\n\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int m;
+	int n;
+	int d ;
+	int s = BIT(8) == 0;
+	Value *mm;
+	Value *nn;
+	Value *tmp;
+	if(s){
+		m = BIT(5) | BITS(0,3) << 1;
+		n = BIT(7) | BITS(16,19) << 1;
+		d = BIT(22) | BITS(12,15) << 1;
+		//mm = SITOFP(32,FR(m));
+		//nn = SITOFP(32,FRn));
+		mm = FR32(m);
+		nn = FR32(n);
+		tmp = FPMUL(nn,mm);
+		//LETS(d,tmp);
+		LETFPS(d,tmp);
+	}else {
+		m = BITS(0,3) | BIT(5) << 4;
+		n = BITS(16,19) | BIT(7) << 4;
+		d = BIT(22) << 4 | BITS(12,15);
+		//mm = SITOFP(32,RSPR(m));
+		//LETS(d,tmp);
+		Value *lo = FR32(2 * m);
+		Value *hi = FR32(2 * m + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		Value *hi64 = ZEXT64(hi);
+		Value* lo64 = ZEXT64(lo);
+		Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		Value* m0 = FPBITCAST64(v64);
+		lo = FR32(2 * n);
+		hi = FR32(2 * n + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		hi64 = ZEXT64(hi);
+		lo64 = ZEXT64(lo);
+		v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		Value *n0 = FPBITCAST64(v64); 
+		tmp = FPMUL(n0,m0);
+		Value *val64 = IBITCAST64(tmp);
+		hi = LSHR(val64,CONST64(32));
+		lo = AND(val64,CONST64(0xffffffff));
+		hi = TRUNC32(hi);
+		lo  = TRUNC32(lo);
+		hi = FPBITCAST32(hi);
+		lo = FPBITCAST32(lo);		
+		LETFPS(2*d ,lo);
+		LETFPS(d*2 + 1 , hi);
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VADD */
+/* cond 1110 0D11 Vn-- Vd-- 101X N0M0 Vm-- */
+#define vfpinstr 	vadd
+#define vfpinstr_inst 	vadd_inst
+#define VFPLABEL_INST 	VADD_INST
+#ifdef VFP_DECODE
+{"vadd",        5,      ARMVFP2,        23, 27, 0x1c,  20, 21, 0x3,     9, 11, 0x5,      6, 6, 0,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vadd",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vadd_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VADD :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 3 && (OPC_2 & 0x2) == 0)
+{
+	DBG("VADD :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction will implement out of JIT.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int m;
+	int n;
+	int d ;
+	int s = BIT(8) == 0;
+	Value *mm;
+	Value *nn;
+	Value *tmp;
+	if(s){
+		m = BIT(5) | BITS(0,3) << 1;
+		n = BIT(7) | BITS(16,19) << 1;
+		d = BIT(22) | BITS(12,15) << 1;
+		mm = FR32(m);
+		nn = FR32(n);
+		tmp = FPADD(nn,mm);
+		LETFPS(d,tmp);
+	}else {
+		m = BITS(0,3) | BIT(5) << 4;
+		n = BITS(16,19) | BIT(7) << 4;
+		d = BIT(22) << 4 | BITS(12,15);
+		Value *lo = FR32(2 * m);
+		Value *hi = FR32(2 * m + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		Value *hi64 = ZEXT64(hi);
+		Value* lo64 = ZEXT64(lo);
+		Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		Value* m0 = FPBITCAST64(v64);
+		lo = FR32(2 * n);
+		hi = FR32(2 * n + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		hi64 = ZEXT64(hi);
+		lo64 = ZEXT64(lo);
+		v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		Value *n0 = FPBITCAST64(v64); 
+		tmp = FPADD(n0,m0);
+		Value *val64 = IBITCAST64(tmp);
+		hi = LSHR(val64,CONST64(32));
+		lo = AND(val64,CONST64(0xffffffff));
+		hi = TRUNC32(hi);
+		lo  = TRUNC32(lo);
+		hi = FPBITCAST32(hi);
+		lo = FPBITCAST32(lo);		
+		LETFPS(2*d ,lo);
+		LETFPS(d*2 + 1 , hi);
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VSUB */
+/* cond 1110 0D11 Vn-- Vd-- 101X N1M0 Vm-- */
+#define vfpinstr 	vsub
+#define vfpinstr_inst 	vsub_inst
+#define VFPLABEL_INST 	VSUB_INST
+#ifdef VFP_DECODE
+{"vsub",        5,      ARMVFP2,        23, 27, 0x1c,  20, 21, 0x3,     9, 11, 0x5,      6, 6, 1,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vsub",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vsub_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VSUB :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 3 && (OPC_2 & 0x2) == 2)
+{
+	DBG("VSUB :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instr=0x%x, instruction is executed out of JIT.\n", __FUNCTION__, instr);
+	//arch_arm_undef(cpu, bb, instr);
+	int m;
+	int n;
+	int d ;
+	int s = BIT(8) == 0;
+	Value *mm;
+	Value *nn;
+	Value *tmp;
+	if(s){
+		m = BIT(5) | BITS(0,3) << 1;
+		n = BIT(7) | BITS(16,19) << 1;
+		d = BIT(22) | BITS(12,15) << 1;
+		mm = FR32(m);
+		nn = FR32(n);
+		tmp = FPSUB(nn,mm);
+		LETFPS(d,tmp);
+	}else {
+		m = BITS(0,3) | BIT(5) << 4;
+		n = BITS(16,19) | BIT(7) << 4;
+		d = BIT(22) << 4 | BITS(12,15);
+		Value *lo = FR32(2 * m);
+		Value *hi = FR32(2 * m + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		Value *hi64 = ZEXT64(hi);
+		Value* lo64 = ZEXT64(lo);
+		Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		Value* m0 = FPBITCAST64(v64);
+		lo = FR32(2 * n);
+		hi = FR32(2 * n + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		hi64 = ZEXT64(hi);
+		lo64 = ZEXT64(lo);
+		v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		Value *n0 = FPBITCAST64(v64); 
+		tmp = FPSUB(n0,m0);
+		Value *val64 = IBITCAST64(tmp);
+		hi = LSHR(val64,CONST64(32));
+		lo = AND(val64,CONST64(0xffffffff));
+		hi = TRUNC32(hi);
+		lo  = TRUNC32(lo);
+		hi = FPBITCAST32(hi);
+		lo = FPBITCAST32(lo);		
+		LETFPS(2*d ,lo);
+		LETFPS(d*2 + 1 , hi);
+	} 
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VDIV */
+/* cond 1110 1D00 Vn-- Vd-- 101X N0M0 Vm-- */
+#define vfpinstr 	vdiv
+#define vfpinstr_inst 	vdiv_inst
+#define VFPLABEL_INST 	VDIV_INST
+#ifdef VFP_DECODE
+{"vdiv",        5,      ARMVFP2,        23, 27, 0x1d,  20, 21, 0x0,     9, 11, 0x5,      6, 6, 0,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vdiv",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vdiv_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VDIV :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xA && (OPC_2 & 0x2) == 0)
+{
+	DBG("VDIV :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int m;
+	int n;
+	int d ;
+	int s = BIT(8) == 0;
+	Value *mm;
+	Value *nn;
+	Value *tmp;
+	if(s){
+		m = BIT(5) | BITS(0,3) << 1;
+		n = BIT(7) | BITS(16,19) << 1;
+		d = BIT(22) | BITS(12,15) << 1;
+		mm = FR32(m);
+		nn = FR32(n);
+		tmp = FPDIV(nn,mm);
+		LETFPS(d,tmp);
+	}else {
+		m = BITS(0,3) | BIT(5) << 4;
+		n = BITS(16,19) | BIT(7) << 4;
+		d = BIT(22) << 4 | BITS(12,15);
+		Value *lo = FR32(2 * m);
+		Value *hi = FR32(2 * m + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		Value *hi64 = ZEXT64(hi);
+		Value* lo64 = ZEXT64(lo);
+		Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		Value* m0 = FPBITCAST64(v64);
+		lo = FR32(2 * n);
+		hi = FR32(2 * n + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		hi64 = ZEXT64(hi);
+		lo64 = ZEXT64(lo);
+		v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		Value *n0 = FPBITCAST64(v64); 
+		tmp = FPDIV(n0,m0);
+		Value *val64 = IBITCAST64(tmp);
+		hi = LSHR(val64,CONST64(32));
+		lo = AND(val64,CONST64(0xffffffff));
+		hi = TRUNC32(hi);
+		lo  = TRUNC32(lo);
+		hi = FPBITCAST32(hi);
+		lo = FPBITCAST32(lo);		
+		LETFPS(2*d ,lo);
+		LETFPS(d*2 + 1 , hi);
+	} 		
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VMOVI move immediate */
+/* cond 1110 1D11 im4H Vd-- 101X 0000 im4L */
+/* cond 1110 opc1 CRn- CRd- copr op20 CRm- CDP */
+#define vfpinstr 	vmovi
+#define vfpinstr_inst 	vmovi_inst
+#define VFPLABEL_INST 	VMOVI_INST
+#ifdef VFP_DECODE
+{"vmov(i)",       4,      ARMVFP3,        23, 27, 0x1d,   20, 21, 0x3,    9, 11, 0x5,     4, 7, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmov(i)",       0,      ARMVFP3, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovi_inst {
+	unsigned int single;
+	unsigned int d;
+	unsigned int imm;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+	
+	inst_cream->single   = BIT(inst, 8) == 0;
+	inst_cream->d        = (inst_cream->single ? BITS(inst,12,15)<<1 | BIT(inst,22) : BITS(inst,12,15) | BIT(inst,22)<<4);
+	unsigned int imm8 = BITS(inst, 16, 19) << 4 | BITS(inst, 0, 3);
+	if (inst_cream->single)
+		inst_cream->imm = BIT(imm8, 7)<<31 | (BIT(imm8, 6)==0)<<30 | (BIT(imm8, 6) ? 0x1f : 0)<<25 | BITS(imm8, 0, 5)<<19;
+	else
+		inst_cream->imm = BIT(imm8, 7)<<31 | (BIT(imm8, 6)==0)<<30 | (BIT(imm8, 6) ? 0xff : 0)<<22 | BITS(imm8, 0, 5)<<16;
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		VMOVI(cpu, inst_cream->single, inst_cream->d, inst_cream->imm);
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ( (OPC_1 & 0xb) == 0xb && BITS(4, 7) == 0)
+{
+	unsigned int single   = BIT(8) == 0;
+	unsigned int d        = (single ? BITS(12,15)<<1 | BIT(22) : BITS(12,15) | BIT(22)<<4);
+	unsigned int imm;
+	instr = BITS(16, 19) << 4 | BITS(0, 3); /* FIXME dirty workaround to get a correct imm */
+	if (single) {
+		imm = BIT(7)<<31 | (BIT(6)==0)<<30 | (BIT(6) ? 0x1f : 0)<<25 | BITS(0, 5)<<19;
+	} else {
+		imm = BIT(7)<<31 | (BIT(6)==0)<<30 | (BIT(6) ? 0xff : 0)<<22 | BITS(0, 5)<<16;
+	}
+	VMOVI(state, single, d, imm);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_CDP_IMPL
+void VMOVI(ARMul_State * state, ARMword single, ARMword d, ARMword imm)
+{
+	DBG("VMOV(I) :\n");
+		
+	if (single)
+	{
+		DBG("\ts%d <= [%x]\n", d, imm);
+		state->ExtReg[d] = imm;
+	}
+	else
+	{
+		/* Check endian please */
+		DBG("\ts[%d-%d] <= [%x-%x]\n", d*2+1, d*2, imm, 0);
+		state->ExtReg[d*2+1] = imm;
+		state->ExtReg[d*2] = 0;
+	}
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int single = (BIT(8) == 0);
+	int d;
+	int imm32;
+	Value *v;
+	Value *tmp;
+	v = CONST32(BITS(0,3) | BITS(16,19) << 4);
+	//v = CONST64(0x3ff0000000000000);
+	if(single){
+		d = BIT(22) | BITS(12,15) << 1;
+	}else {
+		d = BITS(12,15) | BIT(22) << 4;
+	}
+	if(single){
+		LETFPS(d,FPBITCAST32(v));
+	}else {
+		//v = UITOFP(64,v);
+		//tmp = IBITCAST64(v);
+		LETFPS(d*2 ,FPBITCAST32(TRUNC32(AND(v,CONST64(0xffffffff)))));
+		LETFPS(d * 2 + 1,FPBITCAST32(TRUNC32(LSHR(v,CONST64(32)))));
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VMOVR move register */
+/* cond 1110 1D11 0000 Vd-- 101X 01M0 Vm-- */
+/* cond 1110 opc1 CRn- CRd- copr op20 CRm- CDP */
+#define vfpinstr 	vmovr
+#define vfpinstr_inst 	vmovr_inst
+#define VFPLABEL_INST 	VMOVR_INST
+#ifdef VFP_DECODE
+{"vmov(r)",       5,      ARMVFP3,        23, 27, 0x1d,   16, 21, 0x30,    9, 11, 0x5,    6, 7, 1,        4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmov(r)",       0,      ARMVFP3, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovr_inst {
+	unsigned int single;
+	unsigned int d;
+	unsigned int m;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	VFP_DEBUG_UNTESTED(VMOVR);
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+	
+	inst_cream->single   = BIT(inst, 8) == 0;
+	inst_cream->d        = (inst_cream->single ? BITS(inst,12,15)<<1 | BIT(inst,22) : BITS(inst,12,15) | BIT(inst,22)<<4);
+	inst_cream->m        = (inst_cream->single ? BITS(inst, 0, 3)<<1 | BIT(inst, 5) : BITS(inst, 0, 3) | BIT(inst, 5)<<4);
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		VMOVR(cpu, inst_cream->single, inst_cream->d, inst_cream->m);
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ( (OPC_1 & 0xb) == 0xb && CRn == 0 && (OPC_2 & 0x6) == 0x2 )
+{
+	unsigned int single   = BIT(8) == 0;
+	unsigned int d        = (single ? BITS(12,15)<<1 | BIT(22) : BITS(12,15) | BIT(22)<<4);
+	unsigned int m        = (single ? BITS( 0, 3)<<1 | BIT( 5) : BITS( 0, 3) | BIT( 5)<<4);;
+	VMOVR(state, single, d, m);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_CDP_IMPL
+void VMOVR(ARMul_State * state, ARMword single, ARMword d, ARMword m)
+{
+	DBG("VMOV(R) :\n");
+		
+	if (single)
+	{
+		DBG("\ts%d <= s%d[%x]\n", d, m, state->ExtReg[m]);
+		state->ExtReg[d] = state->ExtReg[m];
+	}
+	else
+	{
+		/* Check endian please */
+		DBG("\ts[%d-%d] <= s[%d-%d][%x-%x]\n", d*2+1, d*2, m*2+1, m*2, state->ExtReg[m*2+1], state->ExtReg[m*2]);
+		state->ExtReg[d*2+1] = state->ExtReg[m*2+1];
+		state->ExtReg[d*2] = state->ExtReg[m*2];
+	}
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+	if(instr >> 28 != 0xe)
+		*tag |= TAG_CONDITIONAL;
+
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s VMOV \n", __FUNCTION__);
+	int single   = BIT(8) == 0;
+	int d        = (single ? BITS(12,15)<<1 | BIT(22) : BIT(22) << 4 | BITS(12,15));
+	int m        = (single ? BITS(0, 3)<<1 | BIT(5) : BITS(0, 3) | BIT(5)<<4);
+
+	if (single)
+	{
+		LETFPS(d, FR32(m));
+	}
+	else
+	{
+		/* Check endian please */
+		LETFPS((d*2 + 1), FR32(m*2 + 1));
+		LETFPS((d * 2), FR32(m * 2));
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VABS */
+/* cond 1110 1D11 0000 Vd-- 101X 11M0 Vm-- */
+#define vfpinstr 	vabs
+#define vfpinstr_inst 	vabs_inst
+#define VFPLABEL_INST 	VABS_INST
+#ifdef VFP_DECODE
+{"vabs",        5,      ARMVFP2,        23, 27, 0x1d,  16, 21, 0x30,    9, 11, 0x5,      6, 7, 3,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vabs",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vabs_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;VFP_DEBUG_UNTESTED(VABS);
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VABS :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn == 0 && (OPC_2 & 0x7) == 6)
+{
+	DBG("VABS :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int single   = BIT(8) == 0;
+	int d        = (single ? BITS(12,15)<<1 | BIT(22) : BIT(22) << 4 | BITS(12,15));
+	int m        = (single ? BITS(0, 3)<<1 | BIT(5) : BITS(0, 3) | BIT(5)<<4);
+	Value* m0;
+	if (single)
+	{
+		m0 =  FR32(m);
+		m0 = SELECT(FPCMP_OLT(m0,FPCONST32(0.0)),FPNEG32(m0),m0);
+		LETFPS(d,m0);
+	}
+	else
+	{
+		/* Check endian please */
+		Value *lo = FR32(2 * m);
+		Value *hi = FR32(2 * m + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		Value *hi64 = ZEXT64(hi);
+		Value* lo64 = ZEXT64(lo);
+		Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		m0 = FPBITCAST64(v64);
+		m0 = SELECT(FPCMP_OLT(m0,FPCONST64(0.0)),FPNEG64(m0),m0);
+		Value *val64 = IBITCAST64(m0);
+		hi = LSHR(val64,CONST64(32));
+		lo = AND(val64,CONST64(0xffffffff));
+		hi = TRUNC32(hi);
+		lo  = TRUNC32(lo);
+		hi = FPBITCAST32(hi);
+		lo = FPBITCAST32(lo);		
+		LETFPS(2*d ,lo);
+		LETFPS(d*2 + 1 , hi);
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VNEG */
+/* cond 1110 1D11 0001 Vd-- 101X 11M0 Vm-- */
+#define vfpinstr 	vneg
+#define vfpinstr_inst 	vneg_inst
+#define VFPLABEL_INST 	VNEG_INST
+#ifdef VFP_DECODE
+//{"vneg",        5,      ARMVFP2,        23, 27, 0x1d,  16, 21, 0x30,    9, 11, 0x5,      6, 7, 1,     4, 4, 0},
+{"vneg",        5,      ARMVFP2,        23, 27, 0x1d,  17, 21, 0x18,    9, 11, 0x5,      6, 7, 1,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vneg",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vneg_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;VFP_DEBUG_UNTESTED(VNEG);
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+
+		DBG("VNEG :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn == 1 && (OPC_2 & 0x7) == 2)
+{
+	DBG("VNEG :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int single   = BIT(8) == 0;
+	int d        = (single ? BITS(12,15)<<1 | BIT(22) : BIT(22) << 4 | BITS(12,15));
+	int m        = (single ? BITS(0, 3)<<1 | BIT(5) : BITS(0, 3) | BIT(5)<<4);
+	Value* m0;
+	if (single)
+	{
+		m0 =  FR32(m);
+		m0 = FPNEG32(m0);
+		LETFPS(d,m0);
+	}
+	else
+	{
+		/* Check endian please */
+		Value *lo = FR32(2 * m);
+		Value *hi = FR32(2 * m + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		Value *hi64 = ZEXT64(hi);
+		Value* lo64 = ZEXT64(lo);
+		Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		m0 = FPBITCAST64(v64);
+		m0 = FPNEG64(m0);
+		Value *val64 = IBITCAST64(m0);
+		hi = LSHR(val64,CONST64(32));
+		lo = AND(val64,CONST64(0xffffffff));
+		hi = TRUNC32(hi);
+		lo  = TRUNC32(lo);
+		hi = FPBITCAST32(hi);
+		lo = FPBITCAST32(lo);		
+		LETFPS(2*d ,lo);
+		LETFPS(d*2 + 1 , hi);
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VSQRT */
+/* cond 1110 1D11 0001 Vd-- 101X 11M0 Vm-- */
+#define vfpinstr 	vsqrt
+#define vfpinstr_inst 	vsqrt_inst
+#define VFPLABEL_INST 	VSQRT_INST
+#ifdef VFP_DECODE
+{"vsqrt",        5,      ARMVFP2,        23, 27, 0x1d,  16, 21, 0x31,    9, 11, 0x5,      6, 7, 3,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vsqrt",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vsqrt_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VSQRT :\n");
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn == 1 && (OPC_2 & 0x7) == 6)
+{
+	DBG("VSQRT :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int dp_op = (BIT(8) == 1);
+	int d = dp_op ? BITS(12,15) | BIT(22) << 4 : BIT(22) | BITS(12,15) << 1;
+	int m = dp_op ? BITS(0,3) | BIT(5) << 4 : BIT(5) | BITS(0,3) << 1;
+	Value* v;
+	Value* tmp;
+	if(dp_op){
+		v = SHL(ZEXT64(IBITCAST32(FR32(2 * m + 1))),CONST64(32));
+		tmp = ZEXT64(IBITCAST32(FR32(2 * m)));
+		v = OR(v,tmp);
+		v = FPSQRT(FPBITCAST64(v));
+		tmp = TRUNC32(LSHR(IBITCAST64(v),CONST64(32)));
+		v = TRUNC32(AND(IBITCAST64(v),CONST64( 0xffffffff)));		
+		LETFPS(2 * d , FPBITCAST32(v));
+		LETFPS(2 * d + 1, FPBITCAST32(tmp));
+	}else {
+		v = FR32(m);
+		v = FPSQRT(FPEXT(64,v));
+		v = FPTRUNC(32,v);
+		LETFPS(d,v);
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VCMP VCMPE */
+/* cond 1110 1D11 0100 Vd-- 101X E1M0 Vm-- Encoding 1 */
+#define vfpinstr 	vcmp
+#define vfpinstr_inst 	vcmp_inst
+#define VFPLABEL_INST 	VCMP_INST
+#ifdef VFP_DECODE
+{"vcmp",        5,      ARMVFP2,        23, 27, 0x1d,  16, 21, 0x34,    9, 11, 0x5,      6, 6, 1,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vcmp",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vcmp_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+
+		DBG("VCMP(1) :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn == 4 && (OPC_2 & 0x2) == 2)
+{
+	DBG("VCMP(1) :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is executed out of JIT.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int dp_op = (BIT(8) == 1);
+	int d = dp_op ? BITS(12,15) | BIT(22) << 4 : BIT(22) | BITS(12,15) << 1;
+	int m = dp_op ? BITS(0,3) | BIT(5) << 4 : BIT(5) | BITS(0,3) << 1;
+	Value* v;
+	Value* tmp;
+	Value* n;
+	Value* z;
+	Value* c;
+	Value* vt;
+	Value* v1;
+	Value* nzcv;
+	if(dp_op){
+		v = SHL(ZEXT64(IBITCAST32(FR32(2 * m + 1))),CONST64(32));
+		tmp = ZEXT64(IBITCAST32(FR32(2 * m)));
+		v1 = OR(v,tmp);
+		v = SHL(ZEXT64(IBITCAST32(FR32(2 * d + 1))),CONST64(32));
+		tmp = ZEXT64(IBITCAST32(FR32(2 * d)));
+		v = OR(v,tmp);
+		z = FPCMP_OEQ(FPBITCAST64(v),FPBITCAST64(v1));
+		n = FPCMP_OLT(FPBITCAST64(v),FPBITCAST64(v1));
+		c = FPCMP_OGE(FPBITCAST64(v),FPBITCAST64(v1)); 
+		tmp =  FPCMP_UNO(FPBITCAST64(v),FPBITCAST64(v1));
+		v1 = tmp;
+		c = OR(c,tmp);
+		n = SHL(ZEXT32(n),CONST32(31));
+		z = SHL(ZEXT32(z),CONST32(30));
+		c = SHL(ZEXT32(c),CONST32(29));
+		v1 = SHL(ZEXT32(v1),CONST(28));
+		nzcv = OR(OR(OR(n,z),c),v1);	
+		v = R(VFP_FPSCR);
+		tmp = OR(nzcv,AND(v,CONST32(0x0fffffff)));
+		LET(VFP_FPSCR,tmp);
+	}else {
+		z = FPCMP_OEQ(FR32(d),FR32(m));
+		n = FPCMP_OLT(FR32(d),FR32(m));
+		c = FPCMP_OGE(FR32(d),FR32(m)); 
+		tmp = FPCMP_UNO(FR32(d),FR32(m));
+		c = OR(c,tmp);
+		v1 = tmp;
+		n = SHL(ZEXT32(n),CONST32(31));
+		z = SHL(ZEXT32(z),CONST32(30));
+		c = SHL(ZEXT32(c),CONST32(29));
+		v1 = SHL(ZEXT32(v1),CONST(28));
+		nzcv = OR(OR(OR(n,z),c),v1);	
+		v = R(VFP_FPSCR);
+		tmp = OR(nzcv,AND(v,CONST32(0x0fffffff)));
+		LET(VFP_FPSCR,tmp);
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VCMP VCMPE */
+/* cond 1110 1D11 0100 Vd-- 101X E1M0 Vm-- Encoding 2 */
+#define vfpinstr 	vcmp2
+#define vfpinstr_inst 	vcmp2_inst
+#define VFPLABEL_INST 	VCMP2_INST
+#ifdef VFP_DECODE
+{"vcmp2",        5,      ARMVFP2,        23, 27, 0x1d,  16, 21, 0x35,    9, 11, 0x5,     0, 6, 0x40},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vcmp2",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vcmp2_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VCMP(2) :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn == 5 && (OPC_2 & 0x2) == 2 && CRm == 0)
+{
+	DBG("VCMP(2) :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction will executed out of JIT.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int dp_op = (BIT(8) == 1);
+	int d = dp_op ? BITS(12,15) | BIT(22) << 4 : BIT(22) | BITS(12,15) << 1;
+	//int m = dp_op ? BITS(0,3) | BIT(5) << 4 : BIT(5) | BITS(0,3) << 1;
+	Value* v;
+	Value* tmp;
+	Value* n;
+	Value* z;
+	Value* c;
+	Value* vt;
+	Value* v1;
+	Value* nzcv;
+	if(dp_op){
+		v1 = CONST64(0);
+		v = SHL(ZEXT64(IBITCAST32(FR32(2 * d + 1))),CONST64(32));
+		tmp = ZEXT64(IBITCAST32(FR32(2 * d)));
+		v = OR(v,tmp);
+		z = FPCMP_OEQ(FPBITCAST64(v),FPBITCAST64(v1));
+		n = FPCMP_OLT(FPBITCAST64(v),FPBITCAST64(v1));
+		c = FPCMP_OGE(FPBITCAST64(v),FPBITCAST64(v1)); 
+		tmp =  FPCMP_UNO(FPBITCAST64(v),FPBITCAST64(v1));
+		v1 = tmp;
+		c = OR(c,tmp);
+		n = SHL(ZEXT32(n),CONST32(31));
+		z = SHL(ZEXT32(z),CONST32(30));
+		c = SHL(ZEXT32(c),CONST32(29));
+		v1 = SHL(ZEXT32(v1),CONST(28));
+		nzcv = OR(OR(OR(n,z),c),v1);	
+		v = R(VFP_FPSCR);
+		tmp = OR(nzcv,AND(v,CONST32(0x0fffffff)));
+		LET(VFP_FPSCR,tmp);
+	}else {
+		v1 = CONST(0);
+		v1 = FPBITCAST32(v1);
+		z = FPCMP_OEQ(FR32(d),v1);
+		n = FPCMP_OLT(FR32(d),v1);
+		c = FPCMP_OGE(FR32(d),v1); 
+		tmp = FPCMP_UNO(FR32(d),v1);
+		c = OR(c,tmp);
+		v1 = tmp;
+		n = SHL(ZEXT32(n),CONST32(31));
+		z = SHL(ZEXT32(z),CONST32(30));
+		c = SHL(ZEXT32(c),CONST32(29));
+		v1 = SHL(ZEXT32(v1),CONST(28));
+		nzcv = OR(OR(OR(n,z),c),v1);	
+		v = R(VFP_FPSCR);
+		tmp = OR(nzcv,AND(v,CONST32(0x0fffffff)));
+		LET(VFP_FPSCR,tmp);
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VCVTBDS between double and single */
+/* cond 1110 1D11 0111 Vd-- 101X 11M0 Vm-- */
+#define vfpinstr 	vcvtbds
+#define vfpinstr_inst 	vcvtbds_inst
+#define VFPLABEL_INST 	VCVTBDS_INST
+#ifdef VFP_DECODE
+{"vcvt(bds)",   5,      ARMVFP2,        23, 27, 0x1d,  16, 21, 0x37,    9, 11, 0x5,      6, 7, 3,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vcvt(bds)",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vcvtbds_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VCVT(BDS) :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn == 7 && (OPC_2 & 0x6) == 6)
+{
+	DBG("VCVT(BDS) :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is executed out.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int dp_op = (BIT(8) == 1);
+	int d = dp_op ? BITS(12,15) << 1 | BIT(22) : BIT(22) << 4 | BITS(12,15);
+	int m = dp_op ? BITS(0,3) | BIT(5) << 4 : BIT(5) | BITS(0,3) << 1;
+	int d2s = dp_op;
+	Value* v;
+	Value* tmp;
+	Value* v1;
+	if(d2s){
+		v = SHL(ZEXT64(IBITCAST32(FR32(2 * m + 1))),CONST64(32));
+		tmp = ZEXT64(IBITCAST32(FR32(2 * m)));
+		v1 = OR(v,tmp);
+		tmp = FPTRUNC(32,FPBITCAST64(v1));
+		LETFPS(d,tmp);	
+	}else {
+		v = FR32(m);
+		tmp = FPEXT(64,v);
+		v = IBITCAST64(tmp);
+		tmp = TRUNC32(AND(v,CONST64(0xffffffff)));
+		v1 = TRUNC32(LSHR(v,CONST64(32)));
+		LETFPS(2 * d, FPBITCAST32(tmp) );
+		LETFPS(2 * d + 1, FPBITCAST32(v1));
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VCVTBFF between floating point and fixed point */
+/* cond 1110 1D11 1op2 Vd-- 101X X1M0 Vm-- */
+#define vfpinstr 	vcvtbff
+#define vfpinstr_inst 	vcvtbff_inst
+#define VFPLABEL_INST 	VCVTBFF_INST
+#ifdef VFP_DECODE
+{"vcvt(bff)",   6,      ARMVFP3,        23, 27, 0x1d,  19, 21, 0x7,     17, 17, 0x1,      9, 11, 0x5,  	6, 6, 1},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vcvt(bff)",   0,      ARMVFP3,         4, 4, 1},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vcvtbff_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;VFP_DEBUG_UNTESTED(VCVTBFF);
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VCVT(BFF) :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn >= 0xA && (OPC_2 & 0x2) == 2)
+{
+	DBG("VCVT(BFF) :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arch_arm_undef(cpu, bb, instr);
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VCVTBFI between floating point and integer */
+/* cond 1110 1D11 1op2 Vd-- 101X X1M0 Vm-- */
+#define vfpinstr 	vcvtbfi
+#define vfpinstr_inst 	vcvtbfi_inst
+#define VFPLABEL_INST 	VCVTBFI_INST
+#ifdef VFP_DECODE
+{"vcvt(bfi)",   5,      ARMVFP2,        23, 27, 0x1d,  19, 21, 0x7,     9, 11, 0x5,      6, 6, 1,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vcvt(bfi)",   0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vcvtbfi_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VCVT(BFI) :\n");
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn > 7 && (OPC_2 & 0x2) == 2)
+{
+	DBG("VCVT(BFI) :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	DBG("\t\tin %s, instruction will be executed out of JIT.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s, instruction will be executed out of JIT.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	unsigned int opc2 = BITS(16,18);
+	int to_integer = ((opc2 >> 2) == 1);	
+	int dp_op =  (BIT(8) == 1);
+	unsigned int op = BIT(7);
+	int m,d;
+	Value* v;
+	Value* hi;
+	Value* lo;
+	Value* v64; 
+	if(to_integer){
+		d = BIT(22) | (BITS(12,15) << 1);
+		if(dp_op)
+			m = BITS(0,3) | BIT(5) << 4;
+		else
+			m = BIT(5) | BITS(0,3) << 1;
+	}else {
+		m = BIT(5) | BITS(0,3) << 1;
+		if(dp_op)
+			d = BITS(12,15) | BIT(22) << 4;
+ 		else
+			d  = BIT(22) | BITS(12,15) << 1;		
+	}
+	if(to_integer){
+		if(dp_op){
+			lo = FR32(m * 2);
+		        hi = FR32(m * 2 + 1);	
+			hi = ZEXT64(IBITCAST32(hi));
+			lo = ZEXT64(IBITCAST32(lo));
+			v64 = OR(SHL(hi,CONST64(32)),lo);	
+			if(BIT(16)){
+				v = FPTOSI(32,FPBITCAST64(v64));
+			}
+			else
+				v = FPTOUI(32,FPBITCAST64(v64));
+				
+				v = FPBITCAST32(v);
+				LETFPS(d,v);
+		}else {
+			v = FR32(m);
+			if(BIT(16)){
+				
+				v = FPTOSI(32,v);
+			}
+			else
+				v = FPTOUI(32,v);
+				LETFPS(d,FPBITCAST32(v));
+		}
+	}else {
+		if(dp_op){	
+			v = IBITCAST32(FR32(m));
+			if(BIT(7))
+				v64 = SITOFP(64,v); 
+			else
+				v64 = UITOFP(64,v);
+			v = IBITCAST64(v64);
+			hi = FPBITCAST32(TRUNC32(LSHR(v,CONST64(32))));
+			lo = FPBITCAST32(TRUNC32(AND(v,CONST64(0xffffffff))));
+			LETFPS(2 * d , lo);
+			LETFPS(2 * d + 1, hi);
+		}else {
+			v = IBITCAST32(FR32(m));
+			if(BIT(7))
+				v = SITOFP(32,v);
+			else
+				v = UITOFP(32,v);
+				LETFPS(d,v);
+		}
+	}
+	return No_exp;
+}
+
+/**
+* @brief The implementation of c language for vcvtbfi instruction of dyncom
+*
+* @param cpu
+* @param instr
+*
+* @return 
+*/
+int vcvtbfi_instr_impl(arm_core_t* cpu, uint32 instr){
+	int dp_operation = BIT(8);
+	int ret;
+	if (dp_operation)
+		ret = vfp_double_cpdo(cpu, instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+	else
+		ret = vfp_single_cpdo(cpu, instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+	vfp_raise_exceptions(cpu, ret, instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+	return 0;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* MRC / MCR instructions */
+/* cond 1110 AAAL XXXX XXXX 101C XBB1 XXXX */
+/* cond 1110 op11 CRn- Rt-- copr op21 CRm- */
+
+/* ----------------------------------------------------------------------- */
+/* VMOVBRS between register and single precision */
+/* cond 1110 000o Vn-- Rt-- 1010 N001 0000 */
+/* cond 1110 op11 CRn- Rt-- copr op21 CRm- MRC */
+#define vfpinstr 	vmovbrs
+#define vfpinstr_inst 	vmovbrs_inst
+#define VFPLABEL_INST 	VMOVBRS_INST
+#ifdef VFP_DECODE
+{"vmovbrs",    3,    ARMVFP2,    21, 27, 0x70,    8, 11, 0xA,    0, 6, 0x10},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmovbrs",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovbrs_inst {
+	unsigned int to_arm;
+	unsigned int t;
+	unsigned int n;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->to_arm   = BIT(inst, 20) == 1;
+	inst_cream->t        = BITS(inst, 12, 15);
+	inst_cream->n        = BIT(inst, 7) | BITS(inst, 16, 19)<<1;
+
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+           
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		VMOVBRS(cpu, inst_cream->to_arm, inst_cream->t, inst_cream->n, &(cpu->Reg[inst_cream->t]));
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MRC_TRANS
+if (OPC_1 == 0x0 && CRm == 0 && (OPC_2 & 0x3) == 0)
+{
+	/* VMOV r to s */
+	/* Transfering Rt is not mandatory, as the value of interest is pointed by value */
+	VMOVBRS(state, BIT(20), Rt, BIT(7)|CRn<<1, value);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MCR_TRANS
+if (OPC_1 == 0x0 && CRm == 0 && (OPC_2 & 0x3) == 0)
+{
+	/* VMOV s to r */
+	/* Transfering Rt is not mandatory, as the value of interest is pointed by value */
+	VMOVBRS(state, BIT(20), Rt, BIT(7)|CRn<<1, &value);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MRC_IMPL
+void VMOVBRS(ARMul_State * state, ARMword to_arm, ARMword t, ARMword n, ARMword *value)
+{
+	DBG("VMOV(BRS) :\n");
+	if (to_arm)
+	{
+		DBG("\tr%d <= s%d=[%x]\n", t, n, state->ExtReg[n]);
+		*value = state->ExtReg[n];
+	}
+	else
+	{
+		DBG("\ts%d <= r%d=[%x]\n", n, t, *value);
+		state->ExtReg[n] = *value;
+	}
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("VMOV(BRS) :\n");
+	int to_arm   = BIT(20) == 1;
+	int t        = BITS(12, 15);
+	int n        = BIT(7) | BITS(16, 19)<<1;
+
+	if (to_arm)
+	{
+		DBG("\tr%d <= s%d\n", t, n);
+		LET(t, IBITCAST32(FR32(n)));
+	}
+	else
+	{
+		DBG("\ts%d <= r%d\n", n, t);
+		LETFPS(n, FPBITCAST32(R(t)));
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VMSR */
+/* cond 1110 1110 reg- Rt-- 1010 0001 0000 */
+/* cond 1110 op10 CRn- Rt-- copr op21 CRm- MCR */
+#define vfpinstr 	vmsr
+#define vfpinstr_inst 	vmsr_inst
+#define VFPLABEL_INST 	VMSR_INST
+#ifdef VFP_DECODE
+{"vmsr",    2,    ARMVFP2,    20, 27, 0xEE,    0, 11, 0xA10},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmsr",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmsr_inst {
+	unsigned int reg;
+	unsigned int Rd;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->reg  = BITS(inst, 16, 19);
+	inst_cream->Rd   = BITS(inst, 12, 15);
+   
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		/* FIXME: special case for access to FPSID and FPEXC, VFP must be disabled ,
+		   and in privilegied mode */
+		/* Exceptions must be checked, according to v7 ref manual */
+		CHECK_VFP_ENABLED;
+           
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		VMSR(cpu, inst_cream->reg, inst_cream->Rd);
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MCR_TRANS
+if (OPC_1 == 0x7 && CRm == 0 && OPC_2 == 0)
+{
+	VMSR(state, CRn, Rt);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MCR_IMPL
+void VMSR(ARMul_State * state, ARMword reg, ARMword Rt)
+{
+	if (reg == 1)
+	{
+		DBG("VMSR :\tfpscr <= r%d=[%x]\n", Rt, state->Reg[Rt]);
+		state->VFP[VFP_OFFSET(VFP_FPSCR)] = state->Reg[Rt];
+	}
+	else if (reg == 8)
+	{
+		DBG("VMSR :\tfpexc <= r%d=[%x]\n", Rt, state->Reg[Rt]);
+		state->VFP[VFP_OFFSET(VFP_FPEXC)] = state->Reg[Rt];
+	}
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	DBG("VMSR :");
+	if(RD == 15) {
+		printf("in %s is not implementation.\n", __FUNCTION__);
+		exit(-1);
+	}
+	
+	Value *data = NULL;
+	int reg = RN;
+	int Rt   = RD;
+	if (reg == 1)
+	{
+		LET(VFP_FPSCR, R(Rt));
+		DBG("\tflags <= fpscr\n");
+	}
+	else
+	{
+		switch (reg)
+		{
+		case 8:
+			LET(VFP_FPEXC, R(Rt));
+			DBG("\tfpexc <= r%d \n", Rt);
+			break;
+		default:
+			DBG("\tSUBARCHITECTURE DEFINED\n");
+			break;
+		}
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VMOVBRC register to scalar */
+/* cond 1110 0XX0 Vd-- Rt-- 1011 DXX1 0000 */
+/* cond 1110 op10 CRn- Rt-- copr op21 CRm- MCR */
+#define vfpinstr 	vmovbrc
+#define vfpinstr_inst 	vmovbrc_inst
+#define VFPLABEL_INST 	VMOVBRC_INST
+#ifdef VFP_DECODE
+{"vmovbrc",    4,    ARMVFP2,    23, 27, 0x1C,    20, 20, 0x0,    8,11,0xB,    0,4,0x10},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmovbrc",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovbrc_inst {
+	unsigned int esize;
+	unsigned int index;
+	unsigned int d;
+	unsigned int t;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->d     = BITS(inst, 16, 19)|BIT(inst, 7)<<4;
+	inst_cream->t     = BITS(inst, 12, 15);
+	/* VFP variant of instruction */
+	inst_cream->esize = 32;
+	inst_cream->index = BIT(inst, 21);
+   
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		VFP_DEBUG_UNIMPLEMENTED(VMOVBRC);
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MCR_TRANS
+if ((OPC_1 & 0x4) == 0 && CoProc == 11 && CRm == 0)
+{
+	VFP_DEBUG_UNIMPLEMENTED(VMOVBRC);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arch_arm_undef(cpu, bb, instr);
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VMRS */
+/* cond 1110 1111 CRn- Rt-- 1010 0001 0000 */
+/* cond 1110 op11 CRn- Rt-- copr op21 CRm- MRC */
+#define vfpinstr 	vmrs
+#define vfpinstr_inst 	vmrs_inst
+#define VFPLABEL_INST 	VMRS_INST
+#ifdef VFP_DECODE
+{"vmrs",        2,      ARMVFP2,        20, 27, 0xEF,     0, 11, 0xa10},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmrs",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmrs_inst {
+	unsigned int reg;
+	unsigned int Rt;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->reg  = BITS(inst, 16, 19);
+	inst_cream->Rt	 = BITS(inst, 12, 15);
+   
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		/* FIXME: special case for access to FPSID and FPEXC, VFP must be disabled,
+		   and in privilegied mode */
+		/* Exceptions must be checked, according to v7 ref manual */
+		CHECK_VFP_ENABLED;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		DBG("VMRS :");
+	
+		if (inst_cream->reg == 1) /* FPSCR */
+		{
+			if (inst_cream->Rt != 15)
+			{	
+				cpu->Reg[inst_cream->Rt] = cpu->VFP[VFP_OFFSET(VFP_FPSCR)];
+				DBG("\tr%d <= fpscr[%08x]\n", inst_cream->Rt, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+			}
+			else
+			{	
+				cpu->NFlag = (cpu->VFP[VFP_OFFSET(VFP_FPSCR)] >> 31) & 1;
+				cpu->ZFlag = (cpu->VFP[VFP_OFFSET(VFP_FPSCR)] >> 30) & 1;
+				cpu->CFlag = (cpu->VFP[VFP_OFFSET(VFP_FPSCR)] >> 29) & 1;
+				cpu->VFlag = (cpu->VFP[VFP_OFFSET(VFP_FPSCR)] >> 28) & 1;
+				DBG("\tflags <= fpscr[%1xxxxxxxx]\n", cpu->VFP[VFP_OFFSET(VFP_FPSCR)]>>28);
+			}
+		} 
+		else
+		{
+			switch (inst_cream->reg)
+			{
+			case 0:
+				cpu->Reg[inst_cream->Rt] = cpu->VFP[VFP_OFFSET(VFP_FPSID)];
+				DBG("\tr%d <= fpsid[%08x]\n", inst_cream->Rt, cpu->VFP[VFP_OFFSET(VFP_FPSID)]);
+				break;
+			case 6:
+				/* MVFR1, VFPv3 only ? */
+				DBG("\tr%d <= MVFR1 unimplemented\n", inst_cream->Rt);
+				break;
+			case 7:
+				/* MVFR0, VFPv3 only? */
+				DBG("\tr%d <= MVFR0 unimplemented\n", inst_cream->Rt);
+				break;
+			case 8:
+				cpu->Reg[inst_cream->Rt] = cpu->VFP[VFP_OFFSET(VFP_FPEXC)];
+				DBG("\tr%d <= fpexc[%08x]\n", inst_cream->Rt, cpu->VFP[VFP_OFFSET(VFP_FPEXC)]);
+				break;
+			default:
+				DBG("\tSUBARCHITECTURE DEFINED\n");
+				break;
+			}
+		}
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MRC_TRANS
+if (OPC_1 == 0x7 && CRm == 0 && OPC_2 == 0)
+{
+	VMRS(state, CRn, Rt, value);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MRC_IMPL
+void VMRS(ARMul_State * state, ARMword reg, ARMword Rt, ARMword * value)
+{
+	DBG("VMRS :");
+	if (reg == 1)
+	{
+		if (Rt != 15)
+		{
+			*value = state->VFP[VFP_OFFSET(VFP_FPSCR)];
+			DBG("\tr%d <= fpscr[%08x]\n", Rt, state->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		}
+		else
+		{
+			*value = state->VFP[VFP_OFFSET(VFP_FPSCR)] ;
+			DBG("\tflags <= fpscr[%1xxxxxxxx]\n", state->VFP[VFP_OFFSET(VFP_FPSCR)]>>28);
+		}
+	}
+	else
+	{
+		switch (reg)
+		{
+		case 0:
+			*value = state->VFP[VFP_OFFSET(VFP_FPSID)];
+			DBG("\tr%d <= fpsid[%08x]\n", Rt, state->VFP[VFP_OFFSET(VFP_FPSID)]);
+			break;
+		case 6:
+			/* MVFR1, VFPv3 only ? */
+			DBG("\tr%d <= MVFR1 unimplemented\n", Rt);
+			break;
+		case 7:
+			/* MVFR0, VFPv3 only? */
+			DBG("\tr%d <= MVFR0 unimplemented\n", Rt);
+			break;
+		case 8:
+			*value = state->VFP[VFP_OFFSET(VFP_FPEXC)];
+			DBG("\tr%d <= fpexc[%08x]\n", Rt, state->VFP[VFP_OFFSET(VFP_FPEXC)]);
+			break;
+		default:
+			DBG("\tSUBARCHITECTURE DEFINED\n");
+			break;
+		}
+	}
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	DBG("\t\tin %s .\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	
+	Value *data = NULL;
+	int reg = BITS(16, 19);;
+	int Rt   = BITS(12, 15);
+	DBG("VMRS : reg=%d, Rt=%d\n", reg, Rt);
+	if (reg == 1)
+	{
+		if (Rt != 15)
+		{
+			LET(Rt, R(VFP_FPSCR));
+			DBG("\tr%d <= fpscr\n", Rt);
+		}
+		else
+		{
+			//LET(Rt, R(VFP_FPSCR));
+			update_cond_from_fpscr(cpu, instr, bb, pc);
+			DBG("In %s, \tflags <= fpscr\n", __FUNCTION__);
+		}
+	}
+	else
+	{
+		switch (reg)
+		{
+		case 0:
+			LET(Rt, R(VFP_FPSID));
+			DBG("\tr%d <= fpsid\n", Rt);
+			break;
+		case 6:
+			/* MVFR1, VFPv3 only ? */
+			DBG("\tr%d <= MVFR1 unimplemented\n", Rt);
+			break;
+		case 7:
+			/* MVFR0, VFPv3 only? */
+			DBG("\tr%d <= MVFR0 unimplemented\n", Rt);
+			break;
+		case 8:
+			LET(Rt, R(VFP_FPEXC));
+			DBG("\tr%d <= fpexc\n", Rt);
+			break;
+		default:
+			DBG("\tSUBARCHITECTURE DEFINED\n");
+			break;
+		}
+	}
+
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VMOVBCR scalar to register */
+/* cond 1110 XXX1 Vd-- Rt-- 1011 NXX1 0000 */
+/* cond 1110 op11 CRn- Rt-- copr op21 CRm- MCR */
+#define vfpinstr 	vmovbcr
+#define vfpinstr_inst 	vmovbcr_inst
+#define VFPLABEL_INST 	VMOVBCR_INST
+#ifdef VFP_DECODE
+{"vmovbcr",    4,    ARMVFP2,    24, 27, 0xE,    20, 20, 1,    8, 11,0xB,    0,4, 0x10},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmovbcr",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovbcr_inst {
+	unsigned int esize;
+	unsigned int index;
+	unsigned int d;
+	unsigned int t;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->d     = BITS(inst, 16, 19)|BIT(inst, 7)<<4;
+	inst_cream->t     = BITS(inst, 12, 15);
+	/* VFP variant of instruction */
+	inst_cream->esize = 32;
+	inst_cream->index = BIT(inst, 21);
+   
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		VFP_DEBUG_UNIMPLEMENTED(VMOVBCR);
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MCR_TRANS
+if (CoProc == 11 && CRm == 0)
+{
+	VFP_DEBUG_UNIMPLEMENTED(VMOVBCR);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arch_arm_undef(cpu, bb, instr);
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* MRRC / MCRR instructions */
+/* cond 1100 0101 Rt2- Rt-- copr opc1 CRm- MRRC */
+/* cond 1100 0100 Rt2- Rt-- copr opc1 CRm- MCRR */
+
+/* ----------------------------------------------------------------------- */
+/* VMOVBRRSS between 2 registers to 2 singles */
+/* cond 1100 010X Rt2- Rt-- 1010 00X1 Vm-- */
+/* cond 1100 0101 Rt2- Rt-- copr opc1 CRm- MRRC */
+#define vfpinstr 	vmovbrrss
+#define vfpinstr_inst 	vmovbrrss_inst
+#define VFPLABEL_INST 	VMOVBRRSS_INST
+#ifdef VFP_DECODE
+{"vmovbrrss",    3,    ARMVFP2,    21, 27, 0x62,    8, 11, 0xA,    4, 4, 1},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmovbrrss",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovbrrss_inst {
+	unsigned int to_arm;
+	unsigned int t;
+	unsigned int t2;
+	unsigned int m;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->to_arm     = BIT(inst, 20) == 1;
+	inst_cream->t          = BITS(inst, 12, 15);
+	inst_cream->t2         = BITS(inst, 16, 19);
+	inst_cream->m          = BITS(inst, 0, 3)<<1|BIT(inst, 5);
+   
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		VFP_DEBUG_UNIMPLEMENTED(VMOVBRRSS);
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MCRR_TRANS
+if (CoProc == 10 && (OPC_1 & 0xD) == 1)
+{
+	VFP_DEBUG_UNIMPLEMENTED(VMOVBRRSS);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MRRC_TRANS
+if (CoProc == 10 && (OPC_1 & 0xD) == 1)
+{
+	VFP_DEBUG_UNIMPLEMENTED(VMOVBRRSS);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arch_arm_undef(cpu, bb, instr);
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VMOVBRRD between 2 registers and 1 double */
+/* cond 1100 010X Rt2- Rt-- 1011 00X1 Vm-- */
+/* cond 1100 0101 Rt2- Rt-- copr opc1 CRm- MRRC */
+#define vfpinstr 	vmovbrrd
+#define vfpinstr_inst 	vmovbrrd_inst
+#define VFPLABEL_INST 	VMOVBRRD_INST
+#ifdef VFP_DECODE
+{"vmovbrrd",    3,    ARMVFP2,    21, 27, 0x62,    6, 11, 0x2c,    4, 4, 1},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmovbrrd",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovbrrd_inst {
+	unsigned int to_arm;
+	unsigned int t;
+	unsigned int t2;
+	unsigned int m;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->to_arm   = BIT(inst, 20) == 1;
+	inst_cream->t        = BITS(inst, 12, 15);
+	inst_cream->t2       = BITS(inst, 16, 19);
+	inst_cream->m        = BIT(inst, 5)<<4 | BITS(inst, 0, 3);
+
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		VMOVBRRD(cpu, inst_cream->to_arm, inst_cream->t, inst_cream->t2, inst_cream->m, 
+				&(cpu->Reg[inst_cream->t]), &(cpu->Reg[inst_cream->t2]));
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MCRR_TRANS
+if (CoProc == 11 && (OPC_1 & 0xD) == 1)
+{
+	/* Transfering Rt and Rt2 is not mandatory, as the value of interest is pointed by value1 and value2 */
+	VMOVBRRD(state, BIT(20), Rt, Rt2, BIT(5)<<4|CRm, &value1, &value2);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MRRC_TRANS
+if (CoProc == 11 && (OPC_1 & 0xD) == 1)
+{
+	/* Transfering Rt and Rt2 is not mandatory, as the value of interest is pointed by value1 and value2 */
+	VMOVBRRD(state, BIT(20), Rt, Rt2, BIT(5)<<4|CRm, value1, value2);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MRRC_IMPL
+void VMOVBRRD(ARMul_State * state, ARMword to_arm, ARMword t, ARMword t2, ARMword n, ARMword *value1, ARMword *value2)
+{
+	DBG("VMOV(BRRD) :\n");
+	if (to_arm)
+	{
+		DBG("\tr[%d-%d] <= s[%d-%d]=[%x-%x]\n", t2, t, n*2+1, n*2, state->ExtReg[n*2+1], state->ExtReg[n*2]);
+		*value2 = state->ExtReg[n*2+1];
+		*value1 = state->ExtReg[n*2];
+	}
+	else
+	{
+		DBG("\ts[%d-%d] <= r[%d-%d]=[%x-%x]\n", n*2+1, n*2, t2, t, *value2, *value1);
+		state->ExtReg[n*2+1] = *value2;
+		state->ExtReg[n*2] = *value1;
+	}
+}
+
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	if(instr >> 28 != 0xe)
+		*tag |= TAG_CONDITIONAL;
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int to_arm   = BIT(20) == 1;
+	int t        = BITS(12, 15);
+	int t2       = BITS(16, 19);
+	int n        = BIT(5)<<4 | BITS(0, 3);
+	if(to_arm){
+		LET(t, IBITCAST32(FR32(n * 2)));
+		LET(t2, IBITCAST32(FR32(n * 2 + 1)));
+	}
+	else{
+		LETFPS(n * 2, FPBITCAST32(R(t)));
+		LETFPS(n * 2 + 1, FPBITCAST32(R(t2)));
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* LDC/STC between 2 registers and 1 double */
+/* cond 110X XXX1 Rn-- CRd- copr imm- imm- LDC */
+/* cond 110X XXX0 Rn-- CRd- copr imm8 imm8 STC */
+
+/* ----------------------------------------------------------------------- */
+/* VSTR */
+/* cond 1101 UD00 Rn-- Vd-- 101X imm8 imm8 */
+#define vfpinstr 	vstr
+#define vfpinstr_inst 	vstr_inst
+#define VFPLABEL_INST 	VSTR_INST
+#ifdef VFP_DECODE
+{"vstr",        3,      ARMVFP2,        24, 27, 0xd,   20, 21, 0,       9, 11, 0x5},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vstr",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vstr_inst {
+	unsigned int single;
+	unsigned int n;
+	unsigned int d;
+	unsigned int imm32;
+	unsigned int add;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+	
+	inst_cream->single = BIT(inst, 8) == 0;
+	inst_cream->add	   = BIT(inst, 23);
+	inst_cream->imm32  = BITS(inst, 0,7) << 2;
+	inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+	inst_cream->n	   = BITS(inst, 16, 19);
+
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		unsigned int base = (inst_cream->n == 15 ? (cpu->Reg[inst_cream->n] & 0xFFFFFFFC) + 8 : cpu->Reg[inst_cream->n]);
+		addr = (inst_cream->add ? base + inst_cream->imm32 : base - inst_cream->imm32);
+		DBG("VSTR :\n");
+		
+		
+		if (inst_cream->single)
+		{
+			fault = check_address_validity(cpu, addr, &phys_addr, 0);
+			if (fault) goto MMU_EXCEPTION;
+			fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d], 32);
+			if (fault) goto MMU_EXCEPTION;
+			DBG("\taddr[%x] <= s%d=[%x]\n", addr, inst_cream->d, cpu->ExtReg[inst_cream->d]);
+		}
+		else
+		{
+			fault = check_address_validity(cpu, addr, &phys_addr, 0);
+			if (fault) goto MMU_EXCEPTION;
+
+			/* Check endianness */
+			fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d*2], 32);
+			if (fault) goto MMU_EXCEPTION;
+
+			fault = check_address_validity(cpu, addr + 4, &phys_addr, 0);
+			if (fault) goto MMU_EXCEPTION;
+
+			fault = interpreter_write_memory(core, addr + 4, phys_addr, cpu->ExtReg[inst_cream->d*2+1], 32);
+			if (fault) goto MMU_EXCEPTION;
+			DBG("\taddr[%x-%x] <= s[%d-%d]=[%x-%x]\n", addr+4, addr, inst_cream->d*2+1, inst_cream->d*2, cpu->ExtReg[inst_cream->d*2+1], cpu->ExtReg[inst_cream->d*2]);
+		}
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_STC_TRANS
+if (P == 1 && W == 0)
+{
+	return VSTR(state, type, instr, value);
+}
+#endif
+#ifdef VFP_STC_IMPL
+int VSTR(ARMul_State * state, int type, ARMword instr, ARMword * value)
+{
+	static int i = 0;
+	static int single_reg, add, d, n, imm32, regs;
+	if (type == ARMul_FIRST)
+	{
+		single_reg = BIT(8) == 0;	/* Double precision */
+		add = BIT(23);		/* */
+		imm32 = BITS(0,7)<<2;	/* may not be used */
+		d = single_reg ? BITS(12, 15)<<1|BIT(22) : BIT(22)<<4|BITS(12, 15); /* Base register */
+		n = BITS(16, 19);	/* destination register */
+		
+		DBG("VSTR :\n");
+		
+		i = 0;
+		regs = 1;
+		
+		return ARMul_DONE;
+	}
+	else if (type == ARMul_DATA)
+	{
+		if (single_reg)
+		{
+			*value = state->ExtReg[d+i];
+			DBG("\taddr[?] <= s%d=[%x]\n", d+i, state->ExtReg[d+i]);
+			i++;
+			if (i < regs)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+		else
+		{
+			/* FIXME Careful of endianness, may need to rework this */
+			*value = state->ExtReg[d*2+i];
+			DBG("\taddr[?] <= s[%d]=[%x]\n", d*2+i, state->ExtReg[d*2+i]);
+			i++;
+			if (i < regs*2)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+	}
+
+	return -1;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+	*tag |= TAG_NEW_BB;
+	if(instr >> 28 != 0xe)
+		*tag |= TAG_CONDITIONAL;
+
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	int single = BIT(8) == 0;
+	int add	   = BIT(23);
+	int imm32  = BITS(0,7) << 2;
+	int d      = (single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4));
+	int n	   = BITS(16, 19);
+
+	Value* base = (n == 15) ? ADD(AND(R(n), CONST(0xFFFFFFFC)), CONST(8)): R(n);
+	Value* Addr = add ? ADD(base, CONST(imm32)) : SUB(base, CONST(imm32));
+	DBG("VSTR :\n");
+	//if(single)
+	//	bb = arch_check_mm(cpu, bb, Addr, 4, 0, cpu->dyncom_engine->bb_trap);
+	//else
+	//	bb = arch_check_mm(cpu, bb, Addr, 8, 0, cpu->dyncom_engine->bb_trap);
+	//Value* phys_addr;
+	if(single){
+		#if 0
+		phys_addr = get_phys_addr(cpu, bb, Addr, 0);
+		bb = cpu->dyncom_engine->bb;
+		arch_write_memory(cpu, bb, phys_addr, RSPR(d), 32);
+		#endif
+		//memory_write(cpu, bb, Addr, RSPR(d), 32);
+		memory_write(cpu, bb, Addr, IBITCAST32(FR32(d)), 32);
+		bb = cpu->dyncom_engine->bb;
+	}
+	else{
+		#if 0
+		phys_addr = get_phys_addr(cpu, bb, Addr, 0);
+		bb = cpu->dyncom_engine->bb;
+		arch_write_memory(cpu, bb, phys_addr, RSPR(d * 2), 32);
+		#endif
+		//memory_write(cpu, bb, Addr, RSPR(d * 2), 32);
+		memory_write(cpu, bb, Addr, IBITCAST32(FR32(d * 2)), 32);
+		bb = cpu->dyncom_engine->bb;
+		#if 0
+		phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 0);
+		bb = cpu->dyncom_engine->bb;
+		arch_write_memory(cpu, bb, phys_addr, RSPR(d * 2 + 1), 32);
+		#endif
+		//memory_write(cpu, bb, ADD(Addr, CONST(4)), RSPR(d * 2 + 1), 32);
+		memory_write(cpu, bb, ADD(Addr, CONST(4)), IBITCAST32(FR32(d * 2 + 1)), 32);
+		bb = cpu->dyncom_engine->bb;
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VPUSH */
+/* cond 1101 0D10 1101 Vd-- 101X imm8 imm8 */
+#define vfpinstr 	vpush
+#define vfpinstr_inst 	vpush_inst
+#define VFPLABEL_INST 	VPUSH_INST
+#ifdef VFP_DECODE
+{"vpush",       3,      ARMVFP2,        23, 27, 0x1a,  16, 21, 0x2d,    9, 11, 0x5},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vpush",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vpush_inst {
+	unsigned int single;
+	unsigned int d;
+	unsigned int imm32;
+	unsigned int regs;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->single  = BIT(inst, 8) == 0;
+	inst_cream->d       = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+	inst_cream->imm32   = BITS(inst, 0, 7)<<2;
+	inst_cream->regs    = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
+
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+				
+		int i;
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		DBG("VPUSH :\n");
+			
+		addr = cpu->Reg[R13] - inst_cream->imm32;
+
+
+		for (i = 0; i < inst_cream->regs; i++)
+		{
+			if (inst_cream->single)
+			{
+				fault = check_address_validity(cpu, addr, &phys_addr, 0);
+				if (fault) goto MMU_EXCEPTION;
+				fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d+i], 32);
+				if (fault) goto MMU_EXCEPTION;
+				DBG("\taddr[%x] <= s%d=[%x]\n", addr, inst_cream->d+i, cpu->ExtReg[inst_cream->d+i]);
+				addr += 4;
+			}
+			else
+			{
+				/* Careful of endianness, little by default */
+				fault = check_address_validity(cpu, addr, &phys_addr, 0);
+				if (fault) goto MMU_EXCEPTION;
+				fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2], 32);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = check_address_validity(cpu, addr + 4, &phys_addr, 0);
+				if (fault) goto MMU_EXCEPTION;
+				fault = interpreter_write_memory(core, addr + 4, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2 + 1], 32);
+				if (fault) goto MMU_EXCEPTION;
+				DBG("\taddr[%x-%x] <= s[%d-%d]=[%x-%x]\n", addr+4, addr, (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, cpu->ExtReg[(inst_cream->d+i)*2+1], cpu->ExtReg[(inst_cream->d+i)*2]);
+				addr += 8;
+			}
+		}
+		DBG("\tsp[%x]", cpu->Reg[R13]);
+		cpu->Reg[R13] = cpu->Reg[R13] - inst_cream->imm32;
+		DBG("=>[%x]\n", cpu->Reg[R13]);
+	
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vpush_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_STC_TRANS
+if (P == 1 && U == 0 && W == 1 && Rn == 0xD)
+{
+	return VPUSH(state, type, instr, value);
+}
+#endif
+#ifdef VFP_STC_IMPL
+int VPUSH(ARMul_State * state, int type, ARMword instr, ARMword * value)
+{
+	static int i = 0;
+	static int single_regs, add, wback, d, n, imm32, regs;
+	if (type == ARMul_FIRST)
+	{
+		single_regs = BIT(8) == 0;	/* Single precision */
+		d = single_regs ? BITS(12, 15)<<1|BIT(22) : BIT(22)<<4|BITS(12, 15); /* Base register */
+		imm32 = BITS(0,7)<<2;	/* may not be used */
+		regs = single_regs ? BITS(0, 7) : BITS(1, 7); /* FSTMX if regs is odd */
+
+		DBG("VPUSH :\n");
+		DBG("\tsp[%x]", state->Reg[R13]);
+		state->Reg[R13] = state->Reg[R13] - imm32;
+		DBG("=>[%x]\n", state->Reg[R13]);
+		
+		i = 0;
+		
+		return ARMul_DONE;
+	} 
+	else if (type == ARMul_DATA)
+	{
+		if (single_regs)
+		{
+			*value = state->ExtReg[d + i];
+			DBG("\taddr[?] <= s%d=[%x]\n", d+i, state->ExtReg[d + i]);
+			i++;
+			if (i < regs)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+		else
+		{
+			/* FIXME Careful of endianness, may need to rework this */
+			*value = state->ExtReg[d*2 + i];
+			DBG("\taddr[?] <= s[%d]=[%x]\n", d*2 + i, state->ExtReg[d*2 + i]);
+			i++;
+			if (i < regs*2)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+	}
+
+	return -1;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+	*tag |= TAG_NEW_BB;
+	if(instr >> 28 != 0xe)
+		*tag |= TAG_CONDITIONAL;
+
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	int single  = BIT(8) == 0;
+	int d       = (single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4));
+	int imm32   = BITS(0, 7)<<2;
+	int regs    = (single ? BITS(0, 7) : BITS(1, 7));
+
+	DBG("\t\tin %s \n", __FUNCTION__);
+	Value* Addr = SUB(R(13), CONST(imm32));
+	//if(single)
+	//	bb = arch_check_mm(cpu, bb, Addr, regs * 4, 0, cpu->dyncom_engine->bb_trap);
+	//else
+	//	bb = arch_check_mm(cpu, bb, Addr, regs * 8, 0, cpu->dyncom_engine->bb_trap);
+	//Value* phys_addr;
+	int i;
+	for (i = 0; i < regs; i++)
+	{
+		if (single)
+		{
+			//fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d+i], 32);
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, Addr, 0);
+			bb = cpu->dyncom_engine->bb;
+			arch_write_memory(cpu, bb, phys_addr, RSPR(d + i), 32);
+			#endif
+			//memory_write(cpu, bb, Addr, RSPR(d + i), 32);
+			memory_write(cpu, bb, Addr, IBITCAST32(FR32(d + i)), 32);
+			bb = cpu->dyncom_engine->bb;
+			Addr = ADD(Addr, CONST(4));
+		}
+		else
+		{
+			/* Careful of endianness, little by default */
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, Addr, 0);
+			bb = cpu->dyncom_engine->bb;
+			arch_write_memory(cpu, bb, phys_addr, RSPR((d + i) * 2), 32);
+			#endif
+			//memory_write(cpu, bb, Addr, RSPR((d + i) * 2), 32);
+			memory_write(cpu, bb, Addr, IBITCAST32(FR32((d + i) * 2)), 32);
+			bb = cpu->dyncom_engine->bb;
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 0);
+			bb = cpu->dyncom_engine->bb;
+			arch_write_memory(cpu, bb, phys_addr, RSPR((d + i) * 2 + 1), 32);
+			#endif
+			//memory_write(cpu, bb, ADD(Addr, CONST(4)), RSPR((d + i) * 2 + 1), 32);
+			memory_write(cpu, bb, ADD(Addr, CONST(4)), IBITCAST32(FR32((d + i) * 2 + 1)), 32);
+			bb = cpu->dyncom_engine->bb;
+
+			Addr = ADD(Addr, CONST(8));
+		}
+	}
+	LET(13, SUB(R(13), CONST(imm32)));
+
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VSTM */
+/* cond 110P UDW0 Rn-- Vd-- 101X imm8 imm8 */
+#define vfpinstr 	vstm
+#define vfpinstr_inst 	vstm_inst
+#define VFPLABEL_INST 	VSTM_INST
+#ifdef VFP_DECODE
+{"vstm",	3,	ARMVFP2,	25, 27, 0x6,	20, 20, 0,	9, 11, 0x5},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vstm",	0,	ARMVFP2,	0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vstm_inst {
+	unsigned int single;
+	unsigned int add;
+	unsigned int wback;
+	unsigned int d;
+	unsigned int n;
+	unsigned int imm32;
+	unsigned int regs;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->single = BIT(inst, 8) == 0;
+	inst_cream->add    = BIT(inst, 23);
+	inst_cream->wback  = BIT(inst, 21);
+	inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+	inst_cream->n      = BITS(inst, 16, 19);
+	inst_cream->imm32  = BITS(inst, 0, 7)<<2;
+	inst_cream->regs   = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
+
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST: /* encoding 1 */
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		int i;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		addr = (inst_cream->add ? cpu->Reg[inst_cream->n] : cpu->Reg[inst_cream->n] - inst_cream->imm32);
+		DBG("VSTM : addr[%x]\n", addr);
+		
+		
+		for (i = 0; i < inst_cream->regs; i++)
+		{
+			if (inst_cream->single)
+			{
+				fault = check_address_validity(cpu, addr, &phys_addr, 0);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d+i], 32);
+				if (fault) goto MMU_EXCEPTION;
+				DBG("\taddr[%x] <= s%d=[%x]\n", addr, inst_cream->d+i, cpu->ExtReg[inst_cream->d+i]);
+				addr += 4;
+			}
+			else
+			{
+				/* Careful of endianness, little by default */
+				fault = check_address_validity(cpu, addr, &phys_addr, 0);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2], 32);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = check_address_validity(cpu, addr + 4, &phys_addr, 0);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = interpreter_write_memory(core, addr + 4, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2 + 1], 32);
+				if (fault) goto MMU_EXCEPTION;
+				DBG("\taddr[%x-%x] <= s[%d-%d]=[%x-%x]\n", addr+4, addr, (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, cpu->ExtReg[(inst_cream->d+i)*2+1], cpu->ExtReg[(inst_cream->d+i)*2]);
+				addr += 8;
+			}
+		}
+		if (inst_cream->wback){
+			cpu->Reg[inst_cream->n] = (inst_cream->add ? cpu->Reg[inst_cream->n] + inst_cream->imm32 : 
+						   cpu->Reg[inst_cream->n] - inst_cream->imm32);
+			DBG("\twback r%d[%x]\n", inst_cream->n, cpu->Reg[inst_cream->n]);
+		}
+
+	}
+	cpu->Reg[15] += 4;
+	INC_PC(sizeof(vstm_inst));
+
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_STC_TRANS
+/* Should be the last operation of STC */
+return VSTM(state, type, instr, value);
+#endif
+#ifdef VFP_STC_IMPL
+int VSTM(ARMul_State * state, int type, ARMword instr, ARMword * value)
+{
+	static int i = 0;
+	static int single_regs, add, wback, d, n, imm32, regs;
+	if (type == ARMul_FIRST)
+	{
+		single_regs = BIT(8) == 0;	/* Single precision */
+		add = BIT(23);		/* */
+		wback = BIT(21);	/* write-back */
+		d = single_regs ? BITS(12, 15)<<1|BIT(22) : BIT(22)<<4|BITS(12, 15); /* Base register */
+		n = BITS(16, 19);	/* destination register */
+		imm32 = BITS(0,7) * 4;	/* may not be used */
+		regs = single_regs ? BITS(0, 7) : BITS(0, 7)>>1; /* FSTMX if regs is odd */
+
+		DBG("VSTM :\n");
+		
+		if (wback) {
+			state->Reg[n] = (add ? state->Reg[n] + imm32 : state->Reg[n] - imm32);
+			DBG("\twback r%d[%x]\n", n, state->Reg[n]);
+		}
+		
+		i = 0;
+		
+		return ARMul_DONE;
+	} 
+	else if (type == ARMul_DATA)
+	{
+		if (single_regs)
+		{
+			*value = state->ExtReg[d + i];
+			DBG("\taddr[?] <= s%d=[%x]\n", d+i, state->ExtReg[d + i]);
+			i++;
+			if (i < regs)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+		else
+		{
+			/* FIXME Careful of endianness, may need to rework this */
+			*value = state->ExtReg[d*2 + i];
+			DBG("\taddr[?] <= s[%d]=[%x]\n", d*2 + i, state->ExtReg[d*2 + i]);
+			i++;
+			if (i < regs*2)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+	}
+
+	return -1;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+	*tag |= TAG_NEW_BB;
+	if(instr >> 28 != 0xe)
+		*tag |= TAG_CONDITIONAL;
+
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	//arch_arm_undef(cpu, bb, instr);
+	int single = BIT(8) == 0;
+	int add    = BIT(23);
+	int wback  = BIT(21);
+	int d      = single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4);
+	int n      = BITS(16, 19);
+	int imm32  = BITS(0, 7)<<2;
+	int regs   = single ? BITS(0, 7) : BITS(1, 7);
+
+	Value* Addr = SELECT(CONST1(add), R(n), SUB(R(n), CONST(imm32)));
+	DBG("VSTM \n");
+	//if(single)
+	//	bb = arch_check_mm(cpu, bb, Addr, regs * 4, 0, cpu->dyncom_engine->bb_trap);
+	//else
+	//	bb = arch_check_mm(cpu, bb, Addr, regs * 8, 0, cpu->dyncom_engine->bb_trap);
+
+	int i;	
+	Value* phys_addr;
+	for (i = 0; i < regs; i++)
+	{
+		if (single)
+		{
+			
+			//fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d+i], 32);
+			/* if R(i) is R15? */
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, Addr, 0);
+			bb = cpu->dyncom_engine->bb;
+			arch_write_memory(cpu, bb, phys_addr, RSPR(d + i), 32);
+			#endif
+			//memory_write(cpu, bb, Addr, RSPR(d + i), 32);
+			memory_write(cpu, bb, Addr, IBITCAST32(FR32(d + i)),32);
+			bb = cpu->dyncom_engine->bb;
+			//if (fault) goto MMU_EXCEPTION;
+			//DBG("\taddr[%x] <= s%d=[%x]\n", addr, inst_cream->d+i, cpu->ExtReg[inst_cream->d+i]);
+			Addr = ADD(Addr, CONST(4));
+		}
+		else
+		{
+		
+			//fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2], 32);
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, Addr, 0);
+			bb = cpu->dyncom_engine->bb;
+			arch_write_memory(cpu, bb, phys_addr, RSPR((d + i) * 2), 32);
+			#endif
+			//memory_write(cpu, bb, Addr, RSPR((d + i) * 2), 32);
+			memory_write(cpu, bb, Addr, IBITCAST32(FR32((d + i) * 2)),32);
+			bb = cpu->dyncom_engine->bb;
+			//if (fault) goto MMU_EXCEPTION;
+
+			//fault = interpreter_write_memory(core, addr + 4, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2 + 1], 32);
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 0);
+			bb = cpu->dyncom_engine->bb;
+			arch_write_memory(cpu, bb, phys_addr, RSPR((d + i) * 2 + 1), 32);
+			#endif
+			//memory_write(cpu, bb, ADD(Addr, CONST(4)), RSPR((d + i) * 2 + 1), 32);
+			memory_write(cpu, bb, ADD(Addr, CONST(4)), IBITCAST32(FR32((d + i) * 2 + 1)), 32);
+			bb = cpu->dyncom_engine->bb;
+			//if (fault) goto MMU_EXCEPTION;
+			//DBG("\taddr[%x-%x] <= s[%d-%d]=[%x-%x]\n", addr+4, addr, (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, cpu->ExtReg[(inst_cream->d+i)*2+1], cpu->ExtReg[(inst_cream->d+i)*2]);
+			//addr += 8;
+			Addr = ADD(Addr, CONST(8));
+		}
+	}
+	if (wback){
+		//cpu->Reg[n] = (add ? cpu->Reg[n] + imm32 : 
+		//			   cpu->Reg[n] - imm32);
+		LET(n, SELECT(CONST1(add), ADD(R(n), CONST(imm32)), SUB(R(n), CONST(imm32))));
+		DBG("\twback r%d, add=%d, imm32=%d\n", n, add, imm32);
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VPOP */
+/* cond 1100 1D11 1101 Vd-- 101X imm8 imm8 */
+#define vfpinstr 	vpop
+#define vfpinstr_inst 	vpop_inst
+#define VFPLABEL_INST 	VPOP_INST
+#ifdef VFP_DECODE
+{"vpop",        3,      ARMVFP2,        23, 27, 0x19,  16, 21, 0x3d,    9, 11, 0x5},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vpop",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vpop_inst {
+	unsigned int single;
+	unsigned int d;
+	unsigned int imm32;
+	unsigned int regs;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->single  = BIT(inst, 8) == 0;
+	inst_cream->d       = (inst_cream->single ? (BITS(inst, 12, 15)<<1)|BIT(inst, 22) : BITS(inst, 12, 15)|(BIT(inst, 22)<<4));
+	inst_cream->imm32   = BITS(inst, 0, 7)<<2;
+	inst_cream->regs    = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		int i;
+		unsigned int value1, value2;
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		DBG("VPOP :\n");
+		
+		addr = cpu->Reg[R13];
+		
+
+		for (i = 0; i < inst_cream->regs; i++)
+		{
+			if (inst_cream->single)
+			{
+				fault = check_address_validity(cpu, addr, &phys_addr, 1);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = interpreter_read_memory(core, addr, phys_addr, value1, 32);
+				if (fault) goto MMU_EXCEPTION;
+				DBG("\ts%d <= [%x] addr[%x]\n", inst_cream->d+i, value1, addr);
+				cpu->ExtReg[inst_cream->d+i] = value1;
+				addr += 4;
+			}
+			else
+			{
+				/* Careful of endianness, little by default */
+				fault = check_address_validity(cpu, addr, &phys_addr, 1);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = interpreter_read_memory(core, addr, phys_addr, value1, 32);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = check_address_validity(cpu, addr + 4, &phys_addr, 1);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = interpreter_read_memory(core, addr + 4, phys_addr, value2, 32);
+				if (fault) goto MMU_EXCEPTION;
+				DBG("\ts[%d-%d] <= [%x-%x] addr[%x-%x]\n", (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, value2, value1, addr+4, addr);
+				cpu->ExtReg[(inst_cream->d+i)*2] = value1;
+				cpu->ExtReg[(inst_cream->d+i)*2 + 1] = value2;
+				addr += 8;
+			}
+		}
+		DBG("\tsp[%x]", cpu->Reg[R13]);
+		cpu->Reg[R13] = cpu->Reg[R13] + inst_cream->imm32;
+		DBG("=>[%x]\n", cpu->Reg[R13]);
+		
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vpop_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_LDC_TRANS
+if (P == 0 && U == 1 && W == 1 && Rn == 0xD)
+{
+	return VPOP(state, type, instr, value);
+}
+#endif
+#ifdef VFP_LDC_IMPL
+int VPOP(ARMul_State * state, int type, ARMword instr, ARMword value)
+{
+	static int i = 0;
+	static int single_regs, add, wback, d, n, imm32, regs;
+	if (type == ARMul_FIRST)
+	{
+		single_regs = BIT(8) == 0;	/* Single precision */
+		d = single_regs ? BITS(12, 15)<<1|BIT(22) : BIT(22)<<4|BITS(12, 15); /* Base register */
+		imm32 = BITS(0,7)<<2;	/* may not be used */
+		regs = single_regs ? BITS(0, 7) : BITS(1, 7); /* FLDMX if regs is odd */
+
+		DBG("VPOP :\n");
+		DBG("\tsp[%x]", state->Reg[R13]);
+		state->Reg[R13] = state->Reg[R13] + imm32;
+		DBG("=>[%x]\n", state->Reg[R13]);
+		
+		i = 0;
+		
+		return ARMul_DONE;
+	}
+	else if (type == ARMul_TRANSFER)
+	{
+		return ARMul_DONE;
+	}
+	else if (type == ARMul_DATA)
+	{
+		if (single_regs)
+		{
+			state->ExtReg[d + i] = value;
+			DBG("\ts%d <= [%x]\n", d + i, value);
+			i++;
+			if (i < regs)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+		else
+		{
+			/* FIXME Careful of endianness, may need to rework this */
+			state->ExtReg[d*2 + i] = value;
+			DBG("\ts%d <= [%x]\n", d*2 + i, value);
+			i++;
+			if (i < regs*2)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+	}
+
+	return -1;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	/* Should check if PC is destination register */
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+	*tag |= TAG_NEW_BB;
+	if(instr >> 28 != 0xe)
+		*tag |= TAG_CONDITIONAL;
+
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction .\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int single  = BIT(8) == 0;
+	int d       = (single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4));
+	int imm32   = BITS(0, 7)<<2;
+	int regs    = (single ? BITS(0, 7) : BITS(1, 7));
+
+	int i;
+	unsigned int value1, value2;
+
+	DBG("VPOP :\n");
+		
+	Value* Addr = R(13);
+	Value* val;
+	//if(single)
+	//	bb = arch_check_mm(cpu, bb, Addr, regs * 4, 1, cpu->dyncom_engine->bb_trap);
+	//else
+	//	bb = arch_check_mm(cpu, bb, Addr, regs * 4, 1, cpu->dyncom_engine->bb_trap);
+	//Value* phys_addr;	
+	for (i = 0; i < regs; i++)
+	{
+		if (single)
+		{
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, Addr, 1);
+			bb = cpu->dyncom_engine->bb;
+			val = arch_read_memory(cpu,bb,phys_addr,0,32);
+			#endif
+			memory_read(cpu, bb, Addr, 0, 32);
+			bb = cpu->dyncom_engine->bb;
+			val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+			LETFPS(d + i, FPBITCAST32(val));
+			Addr = ADD(Addr, CONST(4));
+		}
+		else
+		{
+			/* Careful of endianness, little by default */
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, Addr, 1);
+			bb = cpu->dyncom_engine->bb;
+			val = arch_read_memory(cpu,bb,phys_addr,0,32);
+			#endif
+			memory_read(cpu, bb, Addr, 0, 32);
+			bb = cpu->dyncom_engine->bb;
+			val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+			LETFPS((d + i) * 2, FPBITCAST32(val));
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 1);
+			bb = cpu->dyncom_engine->bb;
+			val = arch_read_memory(cpu,bb,phys_addr,0,32);
+			#endif
+			memory_read(cpu, bb, ADD(Addr, CONST(4)), 0, 32);
+			bb = cpu->dyncom_engine->bb;
+			val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+			LETFPS((d + i) * 2 + 1, FPBITCAST32(val));
+
+			Addr = ADD(Addr, CONST(8));
+		}
+	}
+	LET(13, ADD(R(13), CONST(imm32)));
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VLDR */
+/* cond 1101 UD01 Rn-- Vd-- 101X imm8 imm8 */
+#define vfpinstr 	vldr
+#define vfpinstr_inst 	vldr_inst
+#define VFPLABEL_INST 	VLDR_INST
+#ifdef VFP_DECODE
+{"vldr",        3,      ARMVFP2,        24, 27, 0xd,   20, 21, 0x1,     9, 11, 0x5},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vldr",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vldr_inst {
+	unsigned int single;
+	unsigned int n;
+	unsigned int d;
+	unsigned int imm32;
+	unsigned int add;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+	
+	inst_cream->single = BIT(inst, 8) == 0;
+	inst_cream->add	   = BIT(inst, 23);
+	inst_cream->imm32  = BITS(inst, 0,7) << 2;
+	inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+	inst_cream->n	   = BITS(inst, 16, 19);
+
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		unsigned int base = (inst_cream->n == 15 ? (cpu->Reg[inst_cream->n] & 0xFFFFFFFC) + 8 : cpu->Reg[inst_cream->n]);
+		addr = (inst_cream->add ? base + inst_cream->imm32 : base - inst_cream->imm32);
+		DBG("VLDR :\n", addr);
+		
+		
+		if (inst_cream->single)
+		{
+			fault = check_address_validity(cpu, addr, &phys_addr, 1);
+			if (fault) goto MMU_EXCEPTION;
+			fault = interpreter_read_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d], 32);
+			if (fault) goto MMU_EXCEPTION;
+			DBG("\ts%d <= [%x] addr[%x]\n", inst_cream->d, cpu->ExtReg[inst_cream->d], addr);
+		}
+		else
+		{
+			unsigned int word1, word2;
+			fault = check_address_validity(cpu, addr, &phys_addr, 1);
+			if (fault) goto MMU_EXCEPTION;
+			fault = interpreter_read_memory(core, addr, phys_addr, word1, 32);
+			if (fault) goto MMU_EXCEPTION;
+
+			fault = check_address_validity(cpu, addr + 4, &phys_addr, 1);
+			if (fault) goto MMU_EXCEPTION;
+			fault = interpreter_read_memory(core, addr + 4, phys_addr, word2, 32);
+			if (fault) goto MMU_EXCEPTION;
+			/* Check endianness */
+			cpu->ExtReg[inst_cream->d*2] = word1;
+			cpu->ExtReg[inst_cream->d*2+1] = word2;
+			DBG("\ts[%d-%d] <= [%x-%x] addr[%x-%x]\n", inst_cream->d*2+1, inst_cream->d*2, word2, word1, addr+4, addr);
+		}
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vldr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_LDC_TRANS
+if (P == 1 && W == 0)
+{
+	return VLDR(state, type, instr, value);
+}
+#endif
+#ifdef VFP_LDC_IMPL
+int VLDR(ARMul_State * state, int type, ARMword instr, ARMword value)
+{
+	static int i = 0;
+	static int single_reg, add, d, n, imm32, regs;
+	if (type == ARMul_FIRST)
+	{
+		single_reg = BIT(8) == 0;	/* Double precision */
+		add = BIT(23);		/* */
+		imm32 = BITS(0,7)<<2;	/* may not be used */
+		d = single_reg ? BITS(12, 15)<<1|BIT(22) : BIT(22)<<4|BITS(12, 15); /* Base register */
+		n = BITS(16, 19);	/* destination register */
+		
+		DBG("VLDR :\n");
+		
+		i = 0;
+		regs = 1;
+		
+		return ARMul_DONE;
+	}
+	else if (type == ARMul_TRANSFER)
+	{
+		return ARMul_DONE;
+	}
+	else if (type == ARMul_DATA)
+	{
+		if (single_reg)
+		{
+			state->ExtReg[d+i] = value;
+			DBG("\ts%d <= [%x]\n", d+i, value);
+			i++;
+			if (i < regs)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+		else
+		{
+			/* FIXME Careful of endianness, may need to rework this */
+			state->ExtReg[d*2+i] = value;
+			DBG("\ts[%d] <= [%x]\n", d*2+i, value);
+			i++;
+			if (i < regs*2)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+	}
+
+	return -1;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	/* Should check if PC is destination register */
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+	*tag |= TAG_NEW_BB;
+	if(instr >> 28 != 0xe)
+		*tag |= TAG_CONDITIONAL;
+
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	int single = BIT(8) == 0;
+	int add    = BIT(23);
+	int wback  = BIT(21);
+	int d      = (single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4));
+	int n      = BITS(16, 19);
+	int imm32  = BITS(0, 7)<<2;
+	int regs   = (single ? BITS(0, 7) : BITS(1, 7));
+	Value* base = R(n);
+	DBG("\t\tin %s .\n", __FUNCTION__);
+	if(n == 15){
+		base = ADD(AND(base, CONST(0xFFFFFFFC)), CONST(8));
+	}
+	Value* Addr = add ? (ADD(base, CONST(imm32))) : (SUB(base, CONST(imm32)));
+	//if(single)
+	//	bb = arch_check_mm(cpu, bb, Addr, 4, 1, cpu->dyncom_engine->bb_trap);
+	//else
+	//	bb = arch_check_mm(cpu, bb, Addr, 8, 1, cpu->dyncom_engine->bb_trap);
+	//Value* phys_addr;
+	Value* val;
+	if(single){
+		#if 0
+		phys_addr = get_phys_addr(cpu, bb, Addr, 1);
+		bb = cpu->dyncom_engine->bb;
+		val = arch_read_memory(cpu,bb,phys_addr,0,32);
+		#endif
+		memory_read(cpu, bb, Addr, 0, 32);
+		bb = cpu->dyncom_engine->bb;
+		val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+		//LETS(d, val);
+		LETFPS(d,FPBITCAST32(val));
+	}
+	else{
+		#if 0
+		phys_addr = get_phys_addr(cpu, bb, Addr, 1);
+		bb = cpu->dyncom_engine->bb;
+		val = arch_read_memory(cpu,bb,phys_addr,0,32);
+		#endif
+		memory_read(cpu, bb, Addr, 0, 32);
+		bb = cpu->dyncom_engine->bb;
+		val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+		//LETS(d * 2, val);
+		LETFPS(d * 2,FPBITCAST32(val));
+		#if 0
+		phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 1);
+		bb = cpu->dyncom_engine->bb;
+		val = arch_read_memory(cpu,bb,phys_addr,0,32);
+		#endif
+		memory_read(cpu, bb, ADD(Addr, CONST(4)), 0,32);
+		bb = cpu->dyncom_engine->bb;
+		val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+		//LETS(d * 2 + 1, val);
+		LETFPS( d * 2 + 1,FPBITCAST32(val));
+	}
+
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VLDM */
+/* cond 110P UDW1 Rn-- Vd-- 101X imm8 imm8 */
+#define vfpinstr 	vldm
+#define vfpinstr_inst 	vldm_inst
+#define VFPLABEL_INST 	VLDM_INST
+#ifdef VFP_DECODE
+{"vldm",	3,	ARMVFP2,	25, 27, 0x6,	20, 20, 1,	9, 11, 0x5},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vldm",	0,	ARMVFP2,	0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vldm_inst {
+	unsigned int single;
+	unsigned int add;
+	unsigned int wback;
+	unsigned int d;
+	unsigned int n;
+	unsigned int imm32;
+	unsigned int regs;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->single = BIT(inst, 8) == 0;
+	inst_cream->add    = BIT(inst, 23);
+	inst_cream->wback  = BIT(inst, 21);
+	inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+	inst_cream->n      = BITS(inst, 16, 19);
+	inst_cream->imm32  = BITS(inst, 0, 7)<<2;
+	inst_cream->regs   = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
+
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		int i;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		addr = (inst_cream->add ? cpu->Reg[inst_cream->n] : cpu->Reg[inst_cream->n] - inst_cream->imm32);
+		DBG("VLDM : addr[%x]\n", addr);
+		
+		for (i = 0; i < inst_cream->regs; i++)
+		{
+			if (inst_cream->single)
+			{
+				fault = check_address_validity(cpu, addr, &phys_addr, 1);
+				if (fault) goto MMU_EXCEPTION;
+				fault = interpreter_read_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d+i], 32);
+				if (fault) goto MMU_EXCEPTION;
+				DBG("\ts%d <= [%x] addr[%x]\n", inst_cream->d+i, cpu->ExtReg[inst_cream->d+i], addr);
+				addr += 4;
+			}
+			else
+			{
+				/* Careful of endianness, little by default */
+				fault = check_address_validity(cpu, addr, &phys_addr, 1);
+				if (fault) goto MMU_EXCEPTION;
+				fault = interpreter_read_memory(core, addr, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2], 32);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = check_address_validity(cpu, addr + 4, &phys_addr, 1);
+				if (fault) goto MMU_EXCEPTION;
+				fault = interpreter_read_memory(core, addr + 4, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2 + 1], 32);
+				if (fault) goto MMU_EXCEPTION;
+				DBG("\ts[%d-%d] <= [%x-%x] addr[%x-%x]\n", (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, cpu->ExtReg[(inst_cream->d+i)*2+1], cpu->ExtReg[(inst_cream->d+i)*2], addr+4, addr);
+				addr += 8;
+			}
+		}
+		if (inst_cream->wback){
+			cpu->Reg[inst_cream->n] = (inst_cream->add ? cpu->Reg[inst_cream->n] + inst_cream->imm32 : 
+						   cpu->Reg[inst_cream->n] - inst_cream->imm32);
+			DBG("\twback r%d[%x]\n", inst_cream->n, cpu->Reg[inst_cream->n]);
+		}
+
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_LDC_TRANS
+/* Should be the last operation of LDC */
+return VLDM(state, type, instr, value);
+#endif
+#ifdef VFP_LDC_IMPL
+int VLDM(ARMul_State * state, int type, ARMword instr, ARMword value)
+{
+	static int i = 0;
+	static int single_regs, add, wback, d, n, imm32, regs;
+	if (type == ARMul_FIRST)
+	{
+		single_regs = BIT(8) == 0;	/* Single precision */
+		add = BIT(23);		/* */
+		wback = BIT(21);	/* write-back */
+		d = single_regs ? BITS(12, 15)<<1|BIT(22) : BIT(22)<<4|BITS(12, 15); /* Base register */
+		n = BITS(16, 19);	/* destination register */
+		imm32 = BITS(0,7) * 4;	/* may not be used */
+		regs = single_regs ? BITS(0, 7) : BITS(0, 7)>>1; /* FLDMX if regs is odd */
+
+		DBG("VLDM :\n");
+		
+		if (wback) {
+			state->Reg[n] = (add ? state->Reg[n] + imm32 : state->Reg[n] - imm32);
+			DBG("\twback r%d[%x]\n", n, state->Reg[n]);
+		}
+		
+		i = 0;
+		
+		return ARMul_DONE;
+	} 
+	else if (type == ARMul_DATA)
+	{
+		if (single_regs)
+		{
+			state->ExtReg[d + i] = value;
+			DBG("\ts%d <= [%x] addr[?]\n", d+i, state->ExtReg[d + i]);
+			i++;
+			if (i < regs)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+		else
+		{
+			/* FIXME Careful of endianness, may need to rework this */
+			state->ExtReg[d*2 + i] = value;
+			DBG("\ts[%d] <= [%x] addr[?]\n", d*2 + i, state->ExtReg[d*2 + i]);
+			i++;
+			if (i < regs*2)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+	}
+
+	return -1;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+	*tag |= TAG_NEW_BB;
+	if(instr >> 28 != 0xe)
+		*tag |= TAG_CONDITIONAL;
+
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	int single = BIT(8) == 0;
+	int add    = BIT(23);
+	int wback  = BIT(21);
+	int d      = single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|BIT(22)<<4;
+	int n      = BITS(16, 19);
+	int imm32  = BITS(0, 7)<<2;
+	int regs   = single ? BITS(0, 7) : BITS(1, 7);
+
+	Value* Addr = SELECT(CONST1(add), R(n), SUB(R(n), CONST(imm32)));
+	//if(single)
+	//	bb = arch_check_mm(cpu, bb, Addr, regs * 4, 1, cpu->dyncom_engine->bb_trap);
+	//else
+	//	bb = arch_check_mm(cpu, bb, Addr, regs * 4, 1, cpu->dyncom_engine->bb_trap);
+
+	DBG("VLDM \n");
+	int i;	
+	//Value* phys_addr;
+	Value* val;
+	for (i = 0; i < regs; i++)
+	{
+		if (single)
+		{
+			
+			//fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d+i], 32);
+			/* if R(i) is R15? */
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, Addr, 1);
+			bb = cpu->dyncom_engine->bb;
+			val = arch_read_memory(cpu,bb,phys_addr,0,32);
+			#endif
+			memory_read(cpu, bb, Addr, 0, 32);
+			bb = cpu->dyncom_engine->bb;
+			val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+			//LETS(d + i, val);
+			LETFPS(d + i, FPBITCAST32(val));
+			//if (fault) goto MMU_EXCEPTION;
+			//DBG("\taddr[%x] <= s%d=[%x]\n", addr, inst_cream->d+i, cpu->ExtReg[inst_cream->d+i]);
+			Addr = ADD(Addr, CONST(4));
+		}
+		else
+		{
+			#if 0	
+			phys_addr = get_phys_addr(cpu, bb, Addr, 1);
+			bb = cpu->dyncom_engine->bb;
+			val = arch_read_memory(cpu,bb,phys_addr,0,32);
+			#endif
+			memory_read(cpu, bb, Addr, 0, 32);
+			bb = cpu->dyncom_engine->bb;
+			val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+			LETFPS((d + i) * 2, FPBITCAST32(val));
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 1);
+			bb = cpu->dyncom_engine->bb;
+			val = arch_read_memory(cpu,bb,phys_addr,0,32);
+			#endif
+			memory_read(cpu, bb, Addr, 0, 32);
+			bb = cpu->dyncom_engine->bb;
+			val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+			LETFPS((d + i) * 2 + 1, FPBITCAST32(val));
+
+			//fault = interpreter_write_memory(core, addr + 4, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2 + 1], 32);
+			//DBG("\taddr[%x-%x] <= s[%d-%d]=[%x-%x]\n", addr+4, addr, (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, cpu->ExtReg[(inst_cream->d+i)*2+1], cpu->ExtReg[(inst_cream->d+i)*2]);
+			//addr += 8;
+			Addr = ADD(Addr, CONST(8));
+		}
+	}
+	if (wback){
+		//cpu->Reg[n] = (add ? cpu->Reg[n] + imm32 : 
+		//			   cpu->Reg[n] - imm32);
+		LET(n, SELECT(CONST1(add), ADD(R(n), CONST(imm32)), SUB(R(n), CONST(imm32))));
+		DBG("\twback r%d, add=%d, imm32=%d\n", n, add, imm32);
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+#define VFP_DEBUG_TRANSLATE DBG("in func %s, %x\n", __FUNCTION__, inst);
+#define VFP_DEBUG_UNIMPLEMENTED(x) printf("in func %s, " #x " unimplemented\n", __FUNCTION__); exit(-1);
+#define VFP_DEBUG_UNTESTED(x) printf("in func %s, " #x " untested\n", __FUNCTION__);
+
+#define CHECK_VFP_ENABLED	
+	
+#define CHECK_VFP_CDP_RET	vfp_raise_exceptions(cpu, ret, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]); //if (ret == -1) {printf("VFP CDP FAILURE %x\n", inst_cream->instr); exit(-1);}
diff --git a/src/core/arm/interpreter/vfp/vfpsingle.cpp b/src/core/arm/interpreter/vfp/vfpsingle.cpp
new file mode 100644
index 000000000..05279f5ce
--- /dev/null
+++ b/src/core/arm/interpreter/vfp/vfpsingle.cpp
@@ -0,0 +1,1278 @@
+/*
+    vfp/vfpsingle.c - ARM VFPv3 emulation unit - SoftFloat single instruction
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/*
+ * This code is derived in part from :
+ * - Android kernel
+ * - John R. Housers softfloat library, which
+ * carries the following notice:
+ *
+ * ===========================================================================
+ * This C source file is part of the SoftFloat IEC/IEEE Floating-point
+ * Arithmetic Package, Release 2.
+ *
+ * Written by John R. Hauser.  This work was made possible in part by the
+ * International Computer Science Institute, located at Suite 600, 1947 Center
+ * Street, Berkeley, California 94704.  Funding was partially provided by the
+ * National Science Foundation under grant MIP-9311980.  The original version
+ * of this code was written as part of a project to build a fixed-point vector
+ * processor in collaboration with the University of California at Berkeley,
+ * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+ * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
+ * arithmetic/softfloat.html'.
+ *
+ * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+ * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+ * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+ * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+ * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+ *
+ * Derivative works are acceptable, even for commercial purposes, so long as
+ * (1) they include prominent notice that the work is derivative, and (2) they
+ * include prominent notice akin to these three paragraphs for those parts of
+ * this code that are retained.
+ * ===========================================================================
+ */
+
+#include "core/arm/interpreter/vfp/vfp_helper.h"
+#include "core/arm/interpreter/vfp/asm_vfp.h"
+#include "core/arm/interpreter/vfp/vfp.h"
+
+static struct vfp_single vfp_single_default_qnan = {
+	//.exponent	= 255,
+	//.sign		= 0,
+	//.significand	= VFP_SINGLE_SIGNIFICAND_QNAN,
+};
+
+static void vfp_single_dump(const char *str, struct vfp_single *s)
+{
+	pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n",
+		 str, s->sign != 0, s->exponent, s->significand);
+}
+
+static void vfp_single_normalise_denormal(struct vfp_single *vs)
+{
+	int bits = 31 - fls(vs->significand);
+
+	vfp_single_dump("normalise_denormal: in", vs);
+
+	if (bits) {
+		vs->exponent -= bits - 1;
+		vs->significand <<= bits;
+	}
+
+	vfp_single_dump("normalise_denormal: out", vs);
+}
+
+
+u32 vfp_single_normaliseround(ARMul_State* state, int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func)
+{
+	u32 significand, incr, rmode;
+	int exponent, shift, underflow;
+
+	vfp_single_dump("pack: in", vs);
+
+	/*
+	 * Infinities and NaNs are a special case.
+	 */
+	if (vs->exponent == 255 && (vs->significand == 0 || exceptions))
+		goto pack;
+
+	/*
+	 * Special-case zero.
+	 */
+	if (vs->significand == 0) {
+		vs->exponent = 0;
+		goto pack;
+	}
+
+	exponent = vs->exponent;
+	significand = vs->significand;
+
+	/*
+	 * Normalise first.  Note that we shift the significand up to
+	 * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least
+	 * significant bit.
+	 */
+	shift = 32 - fls(significand);
+	if (shift < 32 && shift) {
+		exponent -= shift;
+		significand <<= shift;
+	}
+
+#if 1
+	vs->exponent = exponent;
+	vs->significand = significand;
+	vfp_single_dump("pack: normalised", vs);
+#endif
+
+	/*
+	 * Tiny number?
+	 */
+	underflow = exponent < 0;
+	if (underflow) {
+		significand = vfp_shiftright32jamming(significand, -exponent);
+		exponent = 0;
+#if 1
+		vs->exponent = exponent;
+		vs->significand = significand;
+		vfp_single_dump("pack: tiny number", vs);
+#endif
+		if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)))
+			underflow = 0;
+	}
+
+	/*
+	 * Select rounding increment.
+	 */
+	incr = 0;
+	rmode = fpscr & FPSCR_RMODE_MASK;
+
+	if (rmode == FPSCR_ROUND_NEAREST) {
+		incr = 1 << VFP_SINGLE_LOW_BITS;
+		if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0)
+			incr -= 1;
+	} else if (rmode == FPSCR_ROUND_TOZERO) {
+		incr = 0;
+	} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0))
+		incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1;
+
+	pr_debug("VFP: rounding increment = 0x%08x\n", incr);
+
+	/*
+	 * Is our rounding going to overflow?
+	 */
+	if ((significand + incr) < significand) {
+		exponent += 1;
+		significand = (significand >> 1) | (significand & 1);
+		incr >>= 1;
+#if 1
+		vs->exponent = exponent;
+		vs->significand = significand;
+		vfp_single_dump("pack: overflow", vs);
+#endif
+	}
+
+	/*
+	 * If any of the low bits (which will be shifted out of the
+	 * number) are non-zero, the result is inexact.
+	 */
+	if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))
+		exceptions |= FPSCR_IXC;
+
+	/*
+	 * Do our rounding.
+	 */
+	significand += incr;
+
+	/*
+	 * Infinity?
+	 */
+	if (exponent >= 254) {
+		exceptions |= FPSCR_OFC | FPSCR_IXC;
+		if (incr == 0) {
+			vs->exponent = 253;
+			vs->significand = 0x7fffffff;
+		} else {
+			vs->exponent = 255;		/* infinity */
+			vs->significand = 0;
+		}
+	} else {
+		if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0)
+			exponent = 0;
+		if (exponent || significand > 0x80000000)
+			underflow = 0;
+		if (underflow)
+			exceptions |= FPSCR_UFC;
+		vs->exponent = exponent;
+		vs->significand = significand >> 1;
+	}
+
+ pack:
+	vfp_single_dump("pack: final", vs);
+	{
+		s32 d = vfp_single_pack(vs);
+#if 1
+		pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func,
+			 sd, d, exceptions);
+#endif
+		vfp_put_float(state, d, sd);
+	}
+
+	return exceptions;
+}
+
+/*
+ * Propagate the NaN, setting exceptions if it is signalling.
+ * 'n' is always a NaN.  'm' may be a number, NaN or infinity.
+ */
+static u32
+vfp_propagate_nan(struct vfp_single *vsd, struct vfp_single *vsn,
+		  struct vfp_single *vsm, u32 fpscr)
+{
+	struct vfp_single *nan;
+	int tn, tm = 0;
+
+	tn = vfp_single_type(vsn);
+
+	if (vsm)
+		tm = vfp_single_type(vsm);
+
+	if (fpscr & FPSCR_DEFAULT_NAN)
+		/*
+		 * Default NaN mode - always returns a quiet NaN
+		 */
+		nan = &vfp_single_default_qnan;
+	else {
+		/*
+		 * Contemporary mode - select the first signalling
+		 * NAN, or if neither are signalling, the first
+		 * quiet NAN.
+		 */
+		if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
+			nan = vsn;
+		else
+			nan = vsm;
+		/*
+		 * Make the NaN quiet.
+		 */
+		nan->significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
+	}
+
+	*vsd = *nan;
+
+	/*
+	 * If one was a signalling NAN, raise invalid operation.
+	 */
+	return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
+}
+
+
+/*
+ * Extended operations
+ */
+static u32 vfp_single_fabs(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	vfp_put_float(state, vfp_single_packed_abs(m), sd);
+	return 0;
+}
+
+static u32 vfp_single_fcpy(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	vfp_put_float(state, m, sd);
+	return 0;
+}
+
+static u32 vfp_single_fneg(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	vfp_put_float(state, vfp_single_packed_negate(m), sd);
+	return 0;
+}
+
+static const u16 sqrt_oddadjust[] = {
+	0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0,
+	0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67
+};
+
+static const u16 sqrt_evenadjust[] = {
+	0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e,
+	0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002
+};
+
+u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand)
+{
+	int index;
+	u32 z, a;
+
+	if ((significand & 0xc0000000) != 0x40000000) {
+		pr_debug("VFP: estimate_sqrt: invalid significand\n");
+	}
+
+	a = significand << 1;
+	index = (a >> 27) & 15;
+	if (exponent & 1) {
+		z = 0x4000 + (a >> 17) - sqrt_oddadjust[index];
+		z = ((a / z) << 14) + (z << 15);
+		a >>= 1;
+	} else {
+		z = 0x8000 + (a >> 17) - sqrt_evenadjust[index];
+		z = a / z + z;
+		z = (z >= 0x20000) ? 0xffff8000 : (z << 15);
+		if (z <= a)
+			return (s32)a >> 1;
+	}
+	{
+		u64 v = (u64)a << 31;
+		do_div(v, z);
+		return v + (z >> 1);
+	}
+}
+
+static u32 vfp_single_fsqrt(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	struct vfp_single vsm, vsd, *vsp;
+	int ret, tm;
+
+	vfp_single_unpack(&vsm, m);
+	tm = vfp_single_type(&vsm);
+	if (tm & (VFP_NAN|VFP_INFINITY)) {
+		vsp = &vsd;
+
+		if (tm & VFP_NAN)
+			ret = vfp_propagate_nan(vsp, &vsm, NULL, fpscr);
+		else if (vsm.sign == 0) {
+ sqrt_copy:
+			vsp = &vsm;
+			ret = 0;
+		} else {
+ sqrt_invalid:
+			vsp = &vfp_single_default_qnan;
+			ret = FPSCR_IOC;
+		}
+		vfp_put_float(state, vfp_single_pack(vsp), sd);
+		return ret;
+	}
+
+	/*
+	 * sqrt(+/- 0) == +/- 0
+	 */
+	if (tm & VFP_ZERO)
+		goto sqrt_copy;
+
+	/*
+	 * Normalise a denormalised number
+	 */
+	if (tm & VFP_DENORMAL)
+		vfp_single_normalise_denormal(&vsm);
+
+	/*
+	 * sqrt(<0) = invalid
+	 */
+	if (vsm.sign)
+		goto sqrt_invalid;
+
+	vfp_single_dump("sqrt", &vsm);
+
+	/*
+	 * Estimate the square root.
+	 */
+	vsd.sign = 0;
+	vsd.exponent = ((vsm.exponent - 127) >> 1) + 127;
+	vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2;
+
+	vfp_single_dump("sqrt estimate", &vsd);
+
+	/*
+	 * And now adjust.
+	 */
+	if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) {
+		if (vsd.significand < 2) {
+			vsd.significand = 0xffffffff;
+		} else {
+			u64 term;
+			s64 rem;
+			vsm.significand <<= !(vsm.exponent & 1);
+			term = (u64)vsd.significand * vsd.significand;
+			rem = ((u64)vsm.significand << 32) - term;
+
+			pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem);
+
+			while (rem < 0) {
+				vsd.significand -= 1;
+				rem += ((u64)vsd.significand << 1) | 1;
+			}
+			vsd.significand |= rem != 0;
+		}
+	}
+	vsd.significand = vfp_shiftright32jamming(vsd.significand, 1);
+
+	return vfp_single_normaliseround(state, sd, &vsd, fpscr, 0, "fsqrt");
+}
+
+/*
+ * Equal	:= ZC
+ * Less than	:= N
+ * Greater than	:= C
+ * Unordered	:= CV
+ */
+static u32 vfp_compare(ARMul_State* state, int sd, int signal_on_qnan, s32 m, u32 fpscr)
+{
+	s32 d;
+	u32 ret = 0;
+
+	d = vfp_get_float(state, sd);
+	if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) {
+		ret |= FPSCR_C | FPSCR_V;
+		if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
+			/*
+			 * Signalling NaN, or signalling on quiet NaN
+			 */
+			ret |= FPSCR_IOC;
+	}
+
+	if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) {
+		ret |= FPSCR_C | FPSCR_V;
+		if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
+			/*
+			 * Signalling NaN, or signalling on quiet NaN
+			 */
+			ret |= FPSCR_IOC;
+	}
+
+	if (ret == 0) {
+		if (d == m || vfp_single_packed_abs(d | m) == 0) {
+			/*
+			 * equal
+			 */
+			ret |= FPSCR_Z | FPSCR_C;
+		} else if (vfp_single_packed_sign(d ^ m)) {
+			/*
+			 * different signs
+			 */
+			if (vfp_single_packed_sign(d))
+				/*
+				 * d is negative, so d < m
+				 */
+				ret |= FPSCR_N;
+			else
+				/*
+				 * d is positive, so d > m
+				 */
+				ret |= FPSCR_C;
+		} else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) {
+			/*
+			 * d < m
+			 */
+			ret |= FPSCR_N;
+		} else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) {
+			/*
+			 * d > m
+			 */
+			ret |= FPSCR_C;
+		}
+	}
+	return ret;
+}
+
+static u32 vfp_single_fcmp(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	return vfp_compare(state, sd, 0, m, fpscr);
+}
+
+static u32 vfp_single_fcmpe(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	return vfp_compare(state, sd, 1, m, fpscr);
+}
+
+static u32 vfp_single_fcmpz(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	return vfp_compare(state, sd, 0, 0, fpscr);
+}
+
+static u32 vfp_single_fcmpez(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	return vfp_compare(state, sd, 1, 0, fpscr);
+}
+
+static u32 vfp_single_fcvtd(ARMul_State* state, int dd, int unused, s32 m, u32 fpscr)
+{
+	struct vfp_single vsm;
+	struct vfp_double vdd;
+	int tm;
+	u32 exceptions = 0;
+
+	vfp_single_unpack(&vsm, m);
+
+	tm = vfp_single_type(&vsm);
+
+	/*
+	 * If we have a signalling NaN, signal invalid operation.
+	 */
+	if (tm == VFP_SNAN)
+		exceptions = FPSCR_IOC;
+
+	if (tm & VFP_DENORMAL)
+		vfp_single_normalise_denormal(&vsm);
+
+	vdd.sign = vsm.sign;
+	vdd.significand = (u64)vsm.significand << 32;
+
+	/*
+	 * If we have an infinity or NaN, the exponent must be 2047.
+	 */
+	if (tm & (VFP_INFINITY|VFP_NAN)) {
+		vdd.exponent = 2047;
+		if (tm == VFP_QNAN)
+			vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
+		goto pack_nan;
+	} else if (tm & VFP_ZERO)
+		vdd.exponent = 0;
+	else
+		vdd.exponent = vsm.exponent + (1023 - 127);
+
+	return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, "fcvtd");
+
+ pack_nan:
+	vfp_put_double(state, vfp_double_pack(&vdd), dd);
+	return exceptions;
+}
+
+static u32 vfp_single_fuito(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	struct vfp_single vs;
+
+	vs.sign = 0;
+	vs.exponent = 127 + 31 - 1;
+	vs.significand = (u32)m;
+
+	return vfp_single_normaliseround(state, sd, &vs, fpscr, 0, "fuito");
+}
+
+static u32 vfp_single_fsito(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	struct vfp_single vs;
+
+	vs.sign = (m & 0x80000000) >> 16;
+	vs.exponent = 127 + 31 - 1;
+	vs.significand = vs.sign ? -m : m;
+
+	return vfp_single_normaliseround(state, sd, &vs, fpscr, 0, "fsito");
+}
+
+static u32 vfp_single_ftoui(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	struct vfp_single vsm;
+	u32 d, exceptions = 0;
+	int rmode = fpscr & FPSCR_RMODE_MASK;
+	int tm;
+
+	vfp_single_unpack(&vsm, m);
+	vfp_single_dump("VSM", &vsm);
+
+	/*
+	 * Do we have a denormalised number?
+	 */
+	tm = vfp_single_type(&vsm);
+	if (tm & VFP_DENORMAL)
+		exceptions |= FPSCR_IDC;
+
+	if (tm & VFP_NAN)
+		vsm.sign = 0;
+
+	if (vsm.exponent >= 127 + 32) {
+		d = vsm.sign ? 0 : 0xffffffff;
+		exceptions = FPSCR_IOC;
+	} else if (vsm.exponent >= 127 - 1) {
+		int shift = 127 + 31 - vsm.exponent;
+		u32 rem, incr = 0;
+
+		/*
+		 * 2^0 <= m < 2^32-2^8
+		 */
+		d = (vsm.significand << 1) >> shift;
+		rem = vsm.significand << (33 - shift);
+
+		if (rmode == FPSCR_ROUND_NEAREST) {
+			incr = 0x80000000;
+			if ((d & 1) == 0)
+				incr -= 1;
+		} else if (rmode == FPSCR_ROUND_TOZERO) {
+			incr = 0;
+		} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
+			incr = ~0;
+		}
+
+		if ((rem + incr) < rem) {
+			if (d < 0xffffffff)
+				d += 1;
+			else
+				exceptions |= FPSCR_IOC;
+		}
+
+		if (d && vsm.sign) {
+			d = 0;
+			exceptions |= FPSCR_IOC;
+		} else if (rem)
+			exceptions |= FPSCR_IXC;
+	} else {
+		d = 0;
+		if (vsm.exponent | vsm.significand) {
+			exceptions |= FPSCR_IXC;
+			if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
+				d = 1;
+			else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) {
+				d = 0;
+				exceptions |= FPSCR_IOC;
+			}
+		}
+	}
+
+	pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
+
+	vfp_put_float(state, d, sd);
+
+	return exceptions;
+}
+
+static u32 vfp_single_ftouiz(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	return vfp_single_ftoui(state, sd, unused, m, FPSCR_ROUND_TOZERO);
+}
+
+static u32 vfp_single_ftosi(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	struct vfp_single vsm;
+	u32 d, exceptions = 0;
+	int rmode = fpscr & FPSCR_RMODE_MASK;
+	int tm;
+
+	vfp_single_unpack(&vsm, m);
+	vfp_single_dump("VSM", &vsm);
+
+	/*
+	 * Do we have a denormalised number?
+	 */
+	tm = vfp_single_type(&vsm);
+	if (vfp_single_type(&vsm) & VFP_DENORMAL)
+		exceptions |= FPSCR_IDC;
+
+	if (tm & VFP_NAN) {
+		d = 0;
+		exceptions |= FPSCR_IOC;
+	} else if (vsm.exponent >= 127 + 32) {
+		/*
+		 * m >= 2^31-2^7: invalid
+		 */
+		d = 0x7fffffff;
+		if (vsm.sign)
+			d = ~d;
+		exceptions |= FPSCR_IOC;
+	} else if (vsm.exponent >= 127 - 1) {
+		int shift = 127 + 31 - vsm.exponent;
+		u32 rem, incr = 0;
+
+		/* 2^0 <= m <= 2^31-2^7 */
+		d = (vsm.significand << 1) >> shift;
+		rem = vsm.significand << (33 - shift);
+
+		if (rmode == FPSCR_ROUND_NEAREST) {
+			incr = 0x80000000;
+			if ((d & 1) == 0)
+				incr -= 1;
+		} else if (rmode == FPSCR_ROUND_TOZERO) {
+			incr = 0;
+		} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
+			incr = ~0;
+		}
+
+		if ((rem + incr) < rem && d < 0xffffffff)
+			d += 1;
+		if (d > 0x7fffffff + (vsm.sign != 0)) {
+			d = 0x7fffffff + (vsm.sign != 0);
+			exceptions |= FPSCR_IOC;
+		} else if (rem)
+			exceptions |= FPSCR_IXC;
+
+		if (vsm.sign)
+			d = -d;
+	} else {
+		d = 0;
+		if (vsm.exponent | vsm.significand) {
+			exceptions |= FPSCR_IXC;
+			if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
+				d = 1;
+			else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign)
+				d = -1;
+		}
+	}
+
+	pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
+
+	vfp_put_float(state, (s32)d, sd);
+
+	return exceptions;
+}
+
+static u32 vfp_single_ftosiz(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	return vfp_single_ftosi(state, sd, unused, m, FPSCR_ROUND_TOZERO);
+}
+
+static struct op fops_ext[] = {
+    { vfp_single_fcpy,   0 },                 //0x00000000 - FEXT_FCPY
+    { vfp_single_fabs,   0 },                 //0x00000001 - FEXT_FABS
+    { vfp_single_fneg,   0 },                 //0x00000002 - FEXT_FNEG
+    { vfp_single_fsqrt,  0 },                 //0x00000003 - FEXT_FSQRT
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { vfp_single_fcmp,   OP_SCALAR },         //0x00000008 - FEXT_FCMP
+    { vfp_single_fcmpe,  OP_SCALAR },         //0x00000009 - FEXT_FCMPE
+    { vfp_single_fcmpz,  OP_SCALAR },         //0x0000000A - FEXT_FCMPZ
+    { vfp_single_fcmpez, OP_SCALAR },         //0x0000000B - FEXT_FCMPEZ
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { vfp_single_fcvtd,  OP_SCALAR|OP_DD },   //0x0000000F - FEXT_FCVT
+    { vfp_single_fuito,  OP_SCALAR },         //0x00000010 - FEXT_FUITO
+    { vfp_single_fsito,  OP_SCALAR },         //0x00000011 - FEXT_FSITO
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { vfp_single_ftoui,  OP_SCALAR },         //0x00000018 - FEXT_FTOUI
+    { vfp_single_ftouiz, OP_SCALAR },         //0x00000019 - FEXT_FTOUIZ
+    { vfp_single_ftosi,  OP_SCALAR },         //0x0000001A - FEXT_FTOSI
+    { vfp_single_ftosiz, OP_SCALAR },         //0x0000001B - FEXT_FTOSIZ
+};
+
+
+
+
+
+static u32
+vfp_single_fadd_nonnumber(struct vfp_single *vsd, struct vfp_single *vsn,
+			  struct vfp_single *vsm, u32 fpscr)
+{
+	struct vfp_single *vsp;
+	u32 exceptions = 0;
+	int tn, tm;
+
+	tn = vfp_single_type(vsn);
+	tm = vfp_single_type(vsm);
+
+	if (tn & tm & VFP_INFINITY) {
+		/*
+		 * Two infinities.  Are they different signs?
+		 */
+		if (vsn->sign ^ vsm->sign) {
+			/*
+			 * different signs -> invalid
+			 */
+			exceptions = FPSCR_IOC;
+			vsp = &vfp_single_default_qnan;
+		} else {
+			/*
+			 * same signs -> valid
+			 */
+			vsp = vsn;
+		}
+	} else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
+		/*
+		 * One infinity and one number -> infinity
+		 */
+		vsp = vsn;
+	} else {
+		/*
+		 * 'n' is a NaN of some type
+		 */
+		return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
+	}
+	*vsd = *vsp;
+	return exceptions;
+}
+
+static u32
+vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn,
+	       struct vfp_single *vsm, u32 fpscr)
+{
+	u32 exp_diff, m_sig;
+
+	if (vsn->significand & 0x80000000 ||
+	    vsm->significand & 0x80000000) {
+		pr_info("VFP: bad FP values\n");
+		vfp_single_dump("VSN", vsn);
+		vfp_single_dump("VSM", vsm);
+	}
+
+	/*
+	 * Ensure that 'n' is the largest magnitude number.  Note that
+	 * if 'n' and 'm' have equal exponents, we do not swap them.
+	 * This ensures that NaN propagation works correctly.
+	 */
+	if (vsn->exponent < vsm->exponent) {
+		struct vfp_single *t = vsn;
+		vsn = vsm;
+		vsm = t;
+	}
+
+	/*
+	 * Is 'n' an infinity or a NaN?  Note that 'm' may be a number,
+	 * infinity or a NaN here.
+	 */
+	if (vsn->exponent == 255)
+		return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr);
+
+	/*
+	 * We have two proper numbers, where 'vsn' is the larger magnitude.
+	 *
+	 * Copy 'n' to 'd' before doing the arithmetic.
+	 */
+	*vsd = *vsn;
+
+	/*
+	 * Align both numbers.
+	 */
+	exp_diff = vsn->exponent - vsm->exponent;
+	m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff);
+
+	/*
+	 * If the signs are different, we are really subtracting.
+	 */
+	if (vsn->sign ^ vsm->sign) {
+		m_sig = vsn->significand - m_sig;
+		if ((s32)m_sig < 0) {
+			vsd->sign = vfp_sign_negate(vsd->sign);
+			m_sig = -m_sig;
+		} else if (m_sig == 0) {
+			vsd->sign = (fpscr & FPSCR_RMODE_MASK) ==
+				      FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
+		}
+	} else {
+		m_sig = vsn->significand + m_sig;
+	}
+	vsd->significand = m_sig;
+
+	return 0;
+}
+
+static u32
+vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_single *vsm, u32 fpscr)
+{
+	vfp_single_dump("VSN", vsn);
+	vfp_single_dump("VSM", vsm);
+
+	/*
+	 * Ensure that 'n' is the largest magnitude number.  Note that
+	 * if 'n' and 'm' have equal exponents, we do not swap them.
+	 * This ensures that NaN propagation works correctly.
+	 */
+	if (vsn->exponent < vsm->exponent) {
+		struct vfp_single *t = vsn;
+		vsn = vsm;
+		vsm = t;
+		pr_debug("VFP: swapping M <-> N\n");
+	}
+
+	vsd->sign = vsn->sign ^ vsm->sign;
+
+	/*
+	 * If 'n' is an infinity or NaN, handle it.  'm' may be anything.
+	 */
+	if (vsn->exponent == 255) {
+		if (vsn->significand || (vsm->exponent == 255 && vsm->significand))
+			return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
+		if ((vsm->exponent | vsm->significand) == 0) {
+			*vsd = vfp_single_default_qnan;
+			return FPSCR_IOC;
+		}
+		vsd->exponent = vsn->exponent;
+		vsd->significand = 0;
+		return 0;
+	}
+
+	/*
+	 * If 'm' is zero, the result is always zero.  In this case,
+	 * 'n' may be zero or a number, but it doesn't matter which.
+	 */
+	if ((vsm->exponent | vsm->significand) == 0) {
+		vsd->exponent = 0;
+		vsd->significand = 0;
+		return 0;
+	}
+
+	/*
+	 * We add 2 to the destination exponent for the same reason as
+	 * the addition case - though this time we have +1 from each
+	 * input operand.
+	 */
+	vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2;
+	vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand);
+
+	vfp_single_dump("VSD", vsd);
+	return 0;
+}
+
+#define NEG_MULTIPLY	(1 << 0)
+#define NEG_SUBTRACT	(1 << 1)
+
+static u32
+vfp_single_multiply_accumulate(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func)
+{
+	struct vfp_single vsd, vsp, vsn, vsm;
+	u32 exceptions;
+	s32 v;
+
+	v = vfp_get_float(state, sn);
+	pr_debug("VFP: s%u = %08x\n", sn, v);
+	vfp_single_unpack(&vsn, v);
+	if (vsn.exponent == 0 && vsn.significand)
+		vfp_single_normalise_denormal(&vsn);
+
+	vfp_single_unpack(&vsm, m);
+	if (vsm.exponent == 0 && vsm.significand)
+		vfp_single_normalise_denormal(&vsm);
+
+	exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr);
+	if (negate & NEG_MULTIPLY)
+		vsp.sign = vfp_sign_negate(vsp.sign);
+
+	v = vfp_get_float(state, sd);
+	pr_debug("VFP: s%u = %08x\n", sd, v);
+	vfp_single_unpack(&vsn, v);
+	if (negate & NEG_SUBTRACT)
+		vsn.sign = vfp_sign_negate(vsn.sign);
+
+	exceptions |= vfp_single_add(&vsd, &vsn, &vsp, fpscr);
+
+	return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, func);
+}
+
+/*
+ * Standard operations
+ */
+
+/*
+ * sd = sd + (sn * sm)
+ */
+static u32 vfp_single_fmac(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+	pr_debug("In %sVFP: s%u = %08x\n", __FUNCTION__, sn, sd);
+	return vfp_single_multiply_accumulate(state, sd, sn, m, fpscr, 0, "fmac");
+}
+
+/*
+ * sd = sd - (sn * sm)
+ */
+static u32 vfp_single_fnmac(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+	pr_debug("In %sVFP: s%u = %08x\n", __FUNCTION__, sd, sn);
+	return vfp_single_multiply_accumulate(state, sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac");
+}
+
+/*
+ * sd = -sd + (sn * sm)
+ */
+static u32 vfp_single_fmsc(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+	pr_debug("In %sVFP: s%u = %08x\n", __FUNCTION__, sn, sd);
+	return vfp_single_multiply_accumulate(state, sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc");
+}
+
+/*
+ * sd = -sd - (sn * sm)
+ */
+static u32 vfp_single_fnmsc(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+	pr_debug("In %sVFP: s%u = %08x\n", __FUNCTION__, sn, sd);
+	return vfp_single_multiply_accumulate(state, sd, sn, m, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
+}
+
+/*
+ * sd = sn * sm
+ */
+static u32 vfp_single_fmul(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+	struct vfp_single vsd, vsn, vsm;
+	u32 exceptions;
+	s32 n = vfp_get_float(state, sn);
+
+	pr_debug("In %sVFP: s%u = %08x\n", __FUNCTION__, sn, n);
+
+	vfp_single_unpack(&vsn, n);
+	if (vsn.exponent == 0 && vsn.significand)
+		vfp_single_normalise_denormal(&vsn);
+
+	vfp_single_unpack(&vsm, m);
+	if (vsm.exponent == 0 && vsm.significand)
+		vfp_single_normalise_denormal(&vsm);
+
+	exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
+	return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, "fmul");
+}
+
+/*
+ * sd = -(sn * sm)
+ */
+static u32 vfp_single_fnmul(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+	struct vfp_single vsd, vsn, vsm;
+	u32 exceptions;
+	s32 n = vfp_get_float(state, sn);
+
+	pr_debug("VFP: s%u = %08x\n", sn, n);
+
+	vfp_single_unpack(&vsn, n);
+	if (vsn.exponent == 0 && vsn.significand)
+		vfp_single_normalise_denormal(&vsn);
+
+	vfp_single_unpack(&vsm, m);
+	if (vsm.exponent == 0 && vsm.significand)
+		vfp_single_normalise_denormal(&vsm);
+
+	exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
+	vsd.sign = vfp_sign_negate(vsd.sign);
+	return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, "fnmul");
+}
+
+/*
+ * sd = sn + sm
+ */
+static u32 vfp_single_fadd(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+	struct vfp_single vsd, vsn, vsm;
+	u32 exceptions;
+	s32 n = vfp_get_float(state, sn);
+
+	pr_debug("VFP: s%u = %08x\n", sn, n);
+
+	/*
+	 * Unpack and normalise denormals.
+	 */
+	vfp_single_unpack(&vsn, n);
+	if (vsn.exponent == 0 && vsn.significand)
+		vfp_single_normalise_denormal(&vsn);
+
+	vfp_single_unpack(&vsm, m);
+	if (vsm.exponent == 0 && vsm.significand)
+		vfp_single_normalise_denormal(&vsm);
+
+	exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr);
+
+	return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, "fadd");
+}
+
+/*
+ * sd = sn - sm
+ */
+static u32 vfp_single_fsub(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+	pr_debug("In %sVFP: s%u = %08x\n", __FUNCTION__, sn, sd);
+	/*
+	 * Subtraction is addition with one sign inverted.
+	 */
+	return vfp_single_fadd(state, sd, sn, vfp_single_packed_negate(m), fpscr);
+}
+
+/*
+ * sd = sn / sm
+ */
+static u32 vfp_single_fdiv(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+	struct vfp_single vsd, vsn, vsm;
+	u32 exceptions = 0;
+	s32 n = vfp_get_float(state, sn);
+	int tm, tn;
+
+	pr_debug("VFP: s%u = %08x\n", sn, n);
+
+	vfp_single_unpack(&vsn, n);
+	vfp_single_unpack(&vsm, m);
+
+	vsd.sign = vsn.sign ^ vsm.sign;
+
+	tn = vfp_single_type(&vsn);
+	tm = vfp_single_type(&vsm);
+
+	/*
+	 * Is n a NAN?
+	 */
+	if (tn & VFP_NAN)
+		goto vsn_nan;
+
+	/*
+	 * Is m a NAN?
+	 */
+	if (tm & VFP_NAN)
+		goto vsm_nan;
+
+	/*
+	 * If n and m are infinity, the result is invalid
+	 * If n and m are zero, the result is invalid
+	 */
+	if (tm & tn & (VFP_INFINITY|VFP_ZERO))
+		goto invalid;
+
+	/*
+	 * If n is infinity, the result is infinity
+	 */
+	if (tn & VFP_INFINITY)
+		goto infinity;
+
+	/*
+	 * If m is zero, raise div0 exception
+	 */
+	if (tm & VFP_ZERO)
+		goto divzero;
+
+	/*
+	 * If m is infinity, or n is zero, the result is zero
+	 */
+	if (tm & VFP_INFINITY || tn & VFP_ZERO)
+		goto zero;
+
+	if (tn & VFP_DENORMAL)
+		vfp_single_normalise_denormal(&vsn);
+	if (tm & VFP_DENORMAL)
+		vfp_single_normalise_denormal(&vsm);
+
+	/*
+	 * Ok, we have two numbers, we can perform division.
+	 */
+	vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1;
+	vsm.significand <<= 1;
+	if (vsm.significand <= (2 * vsn.significand)) {
+		vsn.significand >>= 1;
+		vsd.exponent++;
+	}
+	{
+		u64 significand = (u64)vsn.significand << 32;
+		do_div(significand, vsm.significand);
+		vsd.significand = significand;
+	}
+	if ((vsd.significand & 0x3f) == 0)
+		vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32);
+
+	return vfp_single_normaliseround(state, sd, &vsd, fpscr, 0, "fdiv");
+
+ vsn_nan:
+	exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr);
+ pack:
+	vfp_put_float(state, vfp_single_pack(&vsd), sd);
+	return exceptions;
+
+ vsm_nan:
+	exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr);
+	goto pack;
+
+ zero:
+	vsd.exponent = 0;
+	vsd.significand = 0;
+	goto pack;
+
+ divzero:
+	exceptions = FPSCR_DZC;
+ infinity:
+	vsd.exponent = 255;
+	vsd.significand = 0;
+	goto pack;
+
+ invalid:
+	vfp_put_float(state, vfp_single_pack(&vfp_single_default_qnan), sd);
+	return FPSCR_IOC;
+}
+
+static struct op fops[] = {
+	{ vfp_single_fmac,  0 },
+	{ vfp_single_fmsc,  0 },
+	{ vfp_single_fmul,  0 },
+	{ vfp_single_fadd,  0 },
+	{ vfp_single_fnmac, 0 },
+	{ vfp_single_fnmsc, 0 },
+	{ vfp_single_fnmul, 0 },
+	{ vfp_single_fsub,  0 },
+	{ vfp_single_fdiv,  0 },
+};
+
+#define FREG_BANK(x)	((x) & 0x18)
+#define FREG_IDX(x)	((x) & 7)
+
+u32 vfp_single_cpdo(ARMul_State* state, u32 inst, u32 fpscr)
+{
+	u32 op = inst & FOP_MASK;
+	u32 exceptions = 0;
+	unsigned int dest;
+	unsigned int sn = vfp_get_sn(inst);
+	unsigned int sm = vfp_get_sm(inst);
+	unsigned int vecitr, veclen, vecstride;
+	struct op *fop;
+	pr_debug("In %s\n", __FUNCTION__);
+
+	vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK);
+
+	fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)];
+
+	/*
+	 * fcvtsd takes a dN register number as destination, not sN.
+	 * Technically, if bit 0 of dd is set, this is an invalid
+	 * instruction.  However, we ignore this for efficiency.
+	 * It also only operates on scalars.
+	 */
+	if (fop->flags & OP_DD)
+		dest = vfp_get_dd(inst);
+	else
+		dest = vfp_get_sd(inst);
+
+	/*
+	 * If destination bank is zero, vector length is always '1'.
+	 * ARM DDI0100F C5.1.3, C5.3.2.
+	 */
+	if ((fop->flags & OP_SCALAR) || FREG_BANK(dest) == 0)
+		veclen = 0;
+	else
+		veclen = fpscr & FPSCR_LENGTH_MASK;
+
+	pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
+		 (veclen >> FPSCR_LENGTH_BIT) + 1);
+
+	if (!fop->fn) {
+		printf("VFP: could not find single op %d, inst=0x%x@0x%x\n", FEXT_TO_IDX(inst), inst, state->Reg[15]);
+		exit(-1);
+		goto invalid;
+	}
+
+	for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
+		s32 m = vfp_get_float(state, sm);
+		u32 except;
+		char type;
+
+		type = fop->flags & OP_DD ? 'd' : 's';
+		if (op == FOP_EXT)
+			pr_debug("VFP: itr%d (%c%u) = op[%u] (s%u=%08x)\n",
+				 vecitr >> FPSCR_LENGTH_BIT, type, dest, sn,
+				 sm, m);
+		else
+			pr_debug("VFP: itr%d (%c%u) = (s%u) op[%u] (s%u=%08x)\n",
+				 vecitr >> FPSCR_LENGTH_BIT, type, dest, sn,
+				 FOP_TO_IDX(op), sm, m);
+
+		except = fop->fn(state, dest, sn, m, fpscr);
+		pr_debug("VFP: itr%d: exceptions=%08x\n",
+			 vecitr >> FPSCR_LENGTH_BIT, except);
+
+		exceptions |= except;
+
+		/*
+		 * CHECK: It appears to be undefined whether we stop when
+		 * we encounter an exception.  We continue.
+		 */
+		dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 7);
+		sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7);
+		if (FREG_BANK(sm) != 0)
+			sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7);
+	}
+	return exceptions;
+
+ invalid:
+	return (u32)-1;
+}