14 files changed, 11033 insertions, 0 deletions
diff --git a/src/core/arm/skyeye_common/arm_regformat.h b/src/core/arm/skyeye_common/arm_regformat.h
new file mode 100644
index 000000000..0ca62780b
--- /dev/null
+++ b/src/core/arm/skyeye_common/arm_regformat.h
@@ -0,0 +1,103 @@
+#ifndef __ARM_REGFORMAT_H__
+#define __ARM_REGFORMAT_H__
+
+enum arm_regno{
+    R0 = 0,
+    R1,
+    R2,
+    R3,
+    R4,
+    R5,
+    R6,
+    R7,
+    R8,
+    R9,
+    R10,
+    R11,
+    R12,
+    R13,
+    LR,
+    R15, //PC,
+    CPSR_REG,
+    SPSR_REG,
+#if 1
+    PHYS_PC,
+    R13_USR,
+    R14_USR,
+    R13_SVC,
+    R14_SVC,
+    R13_ABORT,
+    R14_ABORT,
+    R13_UNDEF,
+    R14_UNDEF,
+    R13_IRQ,
+    R14_IRQ,
+    R8_FIRQ,
+    R9_FIRQ,
+    R10_FIRQ,
+    R11_FIRQ,
+    R12_FIRQ,
+    R13_FIRQ,
+    R14_FIRQ,
+    SPSR_INVALID1,
+    SPSR_INVALID2,
+    SPSR_SVC,
+    SPSR_ABORT,
+    SPSR_UNDEF,
+    SPSR_IRQ,
+    SPSR_FIRQ,
+    MODE_REG, /* That is the cpsr[4 : 0], just for calculation easily */
+    BANK_REG,
+    EXCLUSIVE_TAG,
+    EXCLUSIVE_STATE,
+    EXCLUSIVE_RESULT,
+    CP15_BASE,
+    CP15_C0 = CP15_BASE,
+    CP15_C0_C0 = CP15_C0,
+    CP15_MAIN_ID = CP15_C0_C0,
+    CP15_CACHE_TYPE,
+    CP15_TCM_STATUS,
+    CP15_TLB_TYPE,
+    CP15_C0_C1,
+    CP15_PROCESSOR_FEATURE_0 = CP15_C0_C1,
+    CP15_PROCESSOR_FEATURE_1,
+    CP15_DEBUG_FEATURE_0,
+    CP15_AUXILIARY_FEATURE_0,
+    CP15_C1_C0,
+    CP15_CONTROL = CP15_C1_C0,
+    CP15_AUXILIARY_CONTROL,
+    CP15_COPROCESSOR_ACCESS_CONTROL,
+    CP15_C2,
+    CP15_C2_C0 = CP15_C2,
+    CP15_TRANSLATION_BASE = CP15_C2_C0,
+    CP15_TRANSLATION_BASE_TABLE_0 = CP15_TRANSLATION_BASE,
+    CP15_TRANSLATION_BASE_TABLE_1,
+    CP15_TRANSLATION_BASE_CONTROL,
+    CP15_DOMAIN_ACCESS_CONTROL,
+    CP15_RESERVED,
+    /* Fault status */
+    CP15_FAULT_STATUS,
+    CP15_INSTR_FAULT_STATUS,
+    CP15_COMBINED_DATA_FSR = CP15_FAULT_STATUS,
+    CP15_INST_FSR,
+    /* Fault Address register */
+    CP15_FAULT_ADDRESS,
+    CP15_COMBINED_DATA_FAR = CP15_FAULT_ADDRESS,
+    CP15_WFAR,
+    CP15_IFAR,
+    CP15_PID,
+    CP15_CONTEXT_ID,
+    CP15_THREAD_URO,
+    CP15_TLB_FAULT_ADDR, /* defined by SkyEye */
+    CP15_TLB_FAULT_STATUS, /* defined by SkyEye */
+    /* VFP registers */
+    VFP_BASE,
+    VFP_FPSID = VFP_BASE,
+    VFP_FPSCR,
+    VFP_FPEXC,
+#endif    
+    MAX_REG_NUM,
+};
+
+#define VFP_OFFSET(x) (x - VFP_BASE)
+#endif
diff --git a/src/core/arm/skyeye_common/armcpu.h b/src/core/arm/skyeye_common/armcpu.h
new file mode 100644
index 000000000..6b5ea8566
--- /dev/null
+++ b/src/core/arm/skyeye_common/armcpu.h
@@ -0,0 +1,83 @@
+/*
+ *    arm
+ *    armcpu.h
+ *
+ *    Copyright (C) 2003, 2004 Sebastian Biallas (sb@biallas.net)
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License version 2 as
+ *    published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with this program; if not, write to the Free Software
+ *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __ARM_CPU_H__
+#define __ARM_CPU_H__
+//#include <skyeye_thread.h>
+//#include <skyeye_obj.h>
+//#include <skyeye_mach.h>
+//#include <skyeye_exec.h>
+
+#include <stddef.h>
+#include <stdio.h>
+
+#include "common/thread.h"
+
+
+typedef struct ARM_CPU_State_s {
+    ARMul_State * core;
+    uint32_t core_num;
+    /* The core id that boot from
+     */
+    uint32_t boot_core_id;
+}ARM_CPU_State;
+
+//static ARM_CPU_State* get_current_cpu(){
+//    machine_config_t* mach = get_current_mach();
+//    /* Casting a conf_obj_t to ARM_CPU_State type */
+//    ARM_CPU_State* cpu = (ARM_CPU_State*)mach->cpu_data->obj;
+//
+//    return cpu;
+//}
+
+/**
+* @brief Get the core instance boot from
+*
+* @return
+*/
+//static ARMul_State* get_boot_core(){
+//    ARM_CPU_State* cpu = get_current_cpu();
+//    return &cpu->core[cpu->boot_core_id];
+//}
+/**
+* @brief Get the instance of running core
+*
+* @return the core instance
+*/
+//static ARMul_State* get_current_core(){
+//    /* Casting a conf_obj_t to ARM_CPU_State type */
+//    int id = Common::CurrentThreadId();
+//    /* If thread is not in running mode, we should give the boot core */
+//    if(get_thread_state(id) != Running_state){
+//        return get_boot_core();
+//    }
+//    /* Judge if we are running in paralell or sequenial */
+//    if(thread_exist(id)){
+//        conf_object_t* conf_obj = get_current_exec_priv(id);
+//        return (ARMul_State*)get_cast_conf_obj(conf_obj, "arm_core_t");
+//    }
+//
+//    return NULL;
+//}
+
+#define CURRENT_CORE get_current_core()
+
+#endif
+
diff --git a/src/core/arm/skyeye_common/armdefs.h b/src/core/arm/skyeye_common/armdefs.h
new file mode 100644
index 000000000..fd574a7a1
--- /dev/null
+++ b/src/core/arm/skyeye_common/armdefs.h
@@ -0,0 +1,931 @@
+/*  armdefs.h -- ARMulator common definitions:  ARM6 Instruction Emulator.
+    Copyright (C) 1994 Advanced RISC Machines Ltd.
+ 
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+ 
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+ 
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifndef _ARMDEFS_H_
+#define _ARMDEFS_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include "common/platform.h"
+
+//teawater add for arm2x86 2005.02.14-------------------------------------------
+// koodailar remove it for mingw 2005.12.18----------------
+//anthonylee modify it for portable 2007.01.30
+//#include "portable/mman.h"
+
+#include "arm_regformat.h"
+#include "common/platform.h"
+#include "core/arm/skyeye_common/skyeye_defs.h"
+
+//AJ2D--------------------------------------------------------------------------
+
+//teawater add for arm2x86 2005.07.03-------------------------------------------
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#if EMU_PLATFORM == PLATFORM_LINUX
+#include <unistd.h>
+#endif
+#include <errno.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+//#include <memory_space.h>
+//AJ2D--------------------------------------------------------------------------
+#if 0
+#if 0
+#define DIFF_STATE 1
+#define __FOLLOW_MODE__ 0
+#else
+#define DIFF_STATE 0
+#define __FOLLOW_MODE__ 1
+#endif
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#define TRUE 1
+#endif
+
+#define LOW 0
+#define HIGH 1
+#define LOWHIGH 1
+#define HIGHLOW 2
+
+//teawater add DBCT_TEST_SPEED 2005.10.04---------------------------------------
+#include <signal.h>
+
+#include "common/platform.h"
+
+#if EMU_PLATFORM == PLATFORM_LINUX
+#include <sys/time.h>
+#endif
+
+//#define DBCT_TEST_SPEED
+#define DBCT_TEST_SPEED_SEC    10
+//AJ2D--------------------------------------------------------------------------
+
+//teawater add compile switch for DBCT GDB RSP function 2005.10.21--------------
+//#define DBCT_GDBRSP
+//AJ2D--------------------------------------------------------------------------
+
+//#include <skyeye_defs.h>
+//#include <skyeye_types.h>
+
+#define ARM_BYTE_TYPE         0
+#define ARM_HALFWORD_TYPE     1
+#define ARM_WORD_TYPE         2
+
+//the define of cachetype
+#define NONCACHE  0
+#define DATACACHE  1
+#define INSTCACHE  2
+
+#ifndef __STDC__
+typedef char *VoidStar;
+#endif
+
+typedef unsigned long long ARMdword;    /* must be 64 bits wide */
+typedef unsigned int ARMword;    /* must be 32 bits wide */
+typedef unsigned char ARMbyte;    /* must be 8 bits wide */
+typedef unsigned short ARMhword;    /* must be 16 bits wide */
+typedef struct ARMul_State ARMul_State;
+typedef struct ARMul_io ARMul_io;
+typedef struct ARMul_Energy ARMul_Energy;
+
+//teawater add for arm2x86 2005.06.24-------------------------------------------
+#include <stdint.h>
+//AJ2D--------------------------------------------------------------------------
+/*
+//chy 2005-05-11
+#ifndef __CYGWIN__
+//teawater add for arm2x86 2005.02.14-------------------------------------------
+typedef unsigned char           uint8_t;
+typedef unsigned short          uint16_t;
+typedef unsigned int            u32;
+#if defined (__x86_64__)
+typedef unsigned long           uint64_t;
+#else
+typedef unsigned long long      uint64_t;
+#endif
+////AJ2D--------------------------------------------------------------------------
+#endif
+*/
+
+#include "core/arm/skyeye_common/armmmu.h"
+//#include "lcd/skyeye_lcd.h"
+
+
+//#include "skyeye.h"
+//#include "skyeye_device.h"
+//#include "net/skyeye_net.h"
+//#include "skyeye_config.h"
+
+
+typedef unsigned ARMul_CPInits (ARMul_State * state);
+typedef unsigned ARMul_CPExits (ARMul_State * state);
+typedef unsigned ARMul_LDCs (ARMul_State * state, unsigned type,
+                 ARMword instr, ARMword value);
+typedef unsigned ARMul_STCs (ARMul_State * state, unsigned type,
+                 ARMword instr, ARMword * value);
+typedef unsigned ARMul_MRCs (ARMul_State * state, unsigned type,
+                 ARMword instr, ARMword * value);
+typedef unsigned ARMul_MCRs (ARMul_State * state, unsigned type,
+                 ARMword instr, ARMword value);
+typedef unsigned ARMul_MRRCs (ARMul_State * state, unsigned type,
+                 ARMword instr, ARMword * value1, ARMword * value2);
+typedef unsigned ARMul_MCRRs (ARMul_State * state, unsigned type,
+                 ARMword instr, ARMword value1, ARMword value2);
+typedef unsigned ARMul_CDPs (ARMul_State * state, unsigned type,
+                 ARMword instr);
+typedef unsigned ARMul_CPReads (ARMul_State * state, unsigned reg,
+                ARMword * value);
+typedef unsigned ARMul_CPWrites (ARMul_State * state, unsigned reg,
+                 ARMword value);
+
+
+//added by ksh,2004-3-5
+struct ARMul_io
+{
+    ARMword *instr;        //to display the current interrupt state
+    ARMword *net_flag;    //to judge if network is enabled
+    ARMword *net_int;    //netcard interrupt
+
+    //ywc,2004-04-01
+    ARMword *ts_int;
+    ARMword *ts_is_enable;
+    ARMword *ts_addr_begin;
+    ARMword *ts_addr_end;
+    ARMword *ts_buffer;
+};
+
+/* added by ksh,2004-11-26,some energy profiling */
+struct ARMul_Energy
+{
+    int energy_prof;    /* <tktan>  BUG200103282109 : for energy profiling */
+    int enable_func_energy;    /* <tktan> BUG200105181702 */
+    char *func_energy;
+    int func_display;    /* <tktan> BUG200103311509 : for function call display */
+    int func_disp_start;    /* <tktan> BUG200104191428 : to start func profiling */
+    char *start_func;    /* <tktan> BUG200104191428 */
+
+    FILE *outfile;        /* <tktan> BUG200105201531 : direct console to file */
+    long long tcycle, pcycle;
+    float t_energy;
+    void *cur_task;        /* <tktan> BUG200103291737 */
+    long long t_mem_cycle, t_idle_cycle, t_uart_cycle;
+    long long p_mem_cycle, p_idle_cycle, p_uart_cycle;
+    long long p_io_update_tcycle;
+    /*record CCCR,to get current core frequency */
+    ARMword cccr;
+};
+#if 0
+#define MAX_BANK 8
+#define MAX_STR  1024
+
+typedef struct mem_bank
+{
+    ARMword (*read_byte) (ARMul_State * state, ARMword addr);
+    void (*write_byte) (ARMul_State * state, ARMword addr, ARMword data);
+      ARMword (*read_halfword) (ARMul_State * state, ARMword addr);
+    void (*write_halfword) (ARMul_State * state, ARMword addr,
+                ARMword data);
+      ARMword (*read_word) (ARMul_State * state, ARMword addr);
+    void (*write_word) (ARMul_State * state, ARMword addr, ARMword data);
+    unsigned int addr, len;
+    char filename[MAX_STR];
+    unsigned type;        //chy 2003-09-21: maybe io,ram,rom
+} mem_bank_t;
+typedef struct
+{
+    int bank_num;
+    int current_num;    /*current num of bank */
+    mem_bank_t mem_banks[MAX_BANK];
+} mem_config_t;
+#endif
+#define VFP_REG_NUM 64
+struct ARMul_State
+{
+    ARMword Emulate;    /* to start and stop emulation */
+    unsigned EndCondition;    /* reason for stopping */
+    unsigned ErrorCode;    /* type of illegal instruction */
+
+    /* Order of the following register should not be modified */
+    ARMword Reg[16];    /* the current register file */
+    ARMword Cpsr;        /* the current psr */
+    ARMword Spsr_copy;
+    ARMword phys_pc;
+    ARMword Reg_usr[2];
+    ARMword Reg_svc[2]; /* R13_SVC R14_SVC */
+    ARMword Reg_abort[2]; /* R13_ABORT R14_ABORT */
+    ARMword Reg_undef[2]; /* R13 UNDEF R14 UNDEF */
+    ARMword Reg_irq[2];   /* R13_IRQ R14_IRQ */
+    ARMword Reg_firq[7];  /* R8---R14 FIRQ */
+    ARMword Spsr[7];    /* the exception psr's */
+    ARMword Mode;        /* the current mode */
+    ARMword Bank;        /* the current register bank */
+    ARMword exclusive_tag;
+    ARMword exclusive_state;
+    ARMword exclusive_result;
+    ARMword CP15[VFP_BASE - CP15_BASE];
+    ARMword VFP[3]; /* FPSID, FPSCR, and FPEXC */
+    /* VFPv2 and VFPv3-D16 has 16 doubleword registers (D0-D16 or S0-S31).
+       VFPv3-D32/ASIMD may have up to 32 doubleword registers (D0-D31),
+        and only 32 singleword registers are accessible (S0-S31). */
+    ARMword ExtReg[VFP_REG_NUM];
+    /* ---- End of the ordered registers ---- */
+    
+    ARMword RegBank[7][16];    /* all the registers */
+    //chy:2003-08-19, used in arm xscale
+    /* 40 bit accumulator.  We always keep this 64 bits wide,
+       and move only 40 bits out of it in an MRA insn.  */
+    ARMdword Accumulator;
+
+    ARMword NFlag, ZFlag, CFlag, VFlag, IFFlags;    /* dummy flags for speed */
+        unsigned long long int icounter, debug_icounter, kernel_icounter;
+        unsigned int shifter_carry_out;
+        //ARMword translate_pc;
+
+    /* add armv6 flags dyf:2010-08-09 */
+    ARMword GEFlag, EFlag, AFlag, QFlags;
+    //chy:2003-08-19, used in arm v5e|xscale
+    ARMword SFlag;
+#ifdef MODET
+    ARMword TFlag;        /* Thumb state */
+#endif
+    ARMword instr, pc, temp;    /* saved register state */
+    ARMword loaded, decoded;    /* saved pipeline state */
+    //chy 2006-04-12 for ICE breakpoint
+    ARMword loaded_addr, decoded_addr;    /* saved pipeline state addr*/
+    unsigned int NumScycles, NumNcycles, NumIcycles, NumCcycles, NumFcycles;    /* emulated cycles used */
+    unsigned long long NumInstrs;    /* the number of instructions executed */
+    unsigned NumInstrsToExecute;
+
+    ARMword currentexaddr;
+    ARMword currentexval;
+    ARMword servaddr;
+
+    unsigned NextInstr;
+    unsigned VectorCatch;    /* caught exception mask */
+    unsigned CallDebug;    /* set to call the debugger */
+    unsigned CanWatch;    /* set by memory interface if its willing to suffer the
+                   overhead of checking for watchpoints on each memory
+                   access */
+    unsigned int StopHandle;
+
+    char *CommandLine;    /* Command Line from ARMsd */
+
+    ARMul_CPInits *CPInit[16];    /* coprocessor initialisers */
+    ARMul_CPExits *CPExit[16];    /* coprocessor finalisers */
+    ARMul_LDCs *LDC[16];    /* LDC instruction */
+    ARMul_STCs *STC[16];    /* STC instruction */
+    ARMul_MRCs *MRC[16];    /* MRC instruction */
+    ARMul_MCRs *MCR[16];    /* MCR instruction */
+    ARMul_MRRCs *MRRC[16];    /* MRRC instruction */
+    ARMul_MCRRs *MCRR[16];    /* MCRR instruction */
+    ARMul_CDPs *CDP[16];    /* CDP instruction */
+    ARMul_CPReads *CPRead[16];    /* Read CP register */
+    ARMul_CPWrites *CPWrite[16];    /* Write CP register */
+    unsigned char *CPData[16];    /* Coprocessor data */
+    unsigned char const *CPRegWords[16];    /* map of coprocessor register sizes */
+
+    unsigned EventSet;    /* the number of events in the queue */
+    unsigned int Now;    /* time to the nearest cycle */
+    struct EventNode **EventPtr;    /* the event list */
+
+    unsigned Debug;        /* show instructions as they are executed */
+    unsigned NresetSig;    /* reset the processor */
+    unsigned NfiqSig;
+    unsigned NirqSig;
+
+    unsigned abortSig;
+    unsigned NtransSig;
+    unsigned bigendSig;
+    unsigned prog32Sig;
+    unsigned data32Sig;
+    unsigned syscallSig;
+
+/* 2004-05-09 chy
+----------------------------------------------------------
+read ARM Architecture Reference Manual
+2.6.5 Data Abort
+There are three Abort Model in ARM arch.
+
+Early Abort Model: used in some ARMv3 and earlier implementations. In this
+model, base register wirteback occurred for LDC,LDM,STC,STM instructions, and
+the base register was unchanged for all other instructions. (oldest)
+
+Base Restored Abort Model: If a Data Abort occurs in an instruction which
+specifies base register writeback, the value in the base register is
+unchanged. (strongarm, xscale)
+
+Base Updated Abort Model: If a Data Abort occurs in an instruction which
+specifies base register writeback, the base register writeback still occurs.
+(arm720T)
+
+read PART B
+chap2 The System Control Coprocessor  CP15
+2.4 Register1:control register
+L(bit 6): in some ARMv3 and earlier implementations, the abort model of the
+processor could be configured:
+0=early Abort Model Selected(now obsolete)
+1=Late Abort Model selceted(same as Base Updated Abort Model)
+
+on later processors, this bit reads as 1 and ignores writes.
+-------------------------------------------------------------
+So, if lateabtSig=1, then it means Late Abort Model(Base Updated Abort Model)
+    if lateabtSig=0, then it means Base Restored Abort Model
+*/
+    unsigned lateabtSig;
+
+    ARMword Vector;        /* synthesize aborts in cycle modes */
+    ARMword Aborted;    /* sticky flag for aborts */
+    ARMword Reseted;    /* sticky flag for Reset */
+    ARMword Inted, LastInted;    /* sticky flags for interrupts */
+    ARMword Base;        /* extra hand for base writeback */
+    ARMword AbortAddr;    /* to keep track of Prefetch aborts */
+
+    const struct Dbg_HostosInterface *hostif;
+
+    int verbose;        /* non-zero means print various messages like the banner */
+
+    mmu_state_t mmu;
+    int mmu_inited;
+    //mem_state_t mem;
+    /*remove io_state to skyeye_mach_*.c files */
+    //io_state_t io;
+    /* point to a interrupt pending register. now for skyeye-ne2k.c
+     * later should move somewhere. e.g machine_config_t*/
+
+
+    //chy: 2003-08-11, for different arm core type
+    unsigned is_v4;        /* Are we emulating a v4 architecture (or higher) ?  */
+    unsigned is_v5;        /* Are we emulating a v5 architecture ?  */
+    unsigned is_v5e;    /* Are we emulating a v5e architecture ?  */
+    unsigned is_v6;        /* Are we emulating a v6 architecture ?  */
+    unsigned is_v7;        /* Are we emulating a v7 architecture ?  */
+    unsigned is_XScale;    /* Are we emulating an XScale architecture ?  */
+    unsigned is_iWMMXt;    /* Are we emulating an iWMMXt co-processor ?  */
+    unsigned is_ep9312;    /* Are we emulating a Cirrus Maverick co-processor ?  */
+    //chy 2005-09-19
+    unsigned is_pxa27x;    /* Are we emulating a Intel PXA27x co-processor ?  */
+    //chy: seems only used in xscale's CP14
+    unsigned int LastTime;    /* Value of last call to ARMul_Time() */
+    ARMword CP14R0_CCD;    /* used to count 64 clock cycles with CP14 R0 bit 3 set */
+
+
+//added by ksh:for handle different machs io 2004-3-5
+    ARMul_io mach_io;
+
+/*added by ksh,2004-11-26,some energy profiling*/
+    ARMul_Energy energy;
+
+//teawater add for next_dis 2004.10.27-----------------------
+    int disassemble;
+//AJ2D------------------------------------------
+
+//teawater add for arm2x86 2005.02.15-------------------------------------------
+    u32 trap;
+    u32 tea_break_addr;
+    u32 tea_break_ok;
+    int tea_pc;
+//AJ2D--------------------------------------------------------------------------
+//teawater add for arm2x86 2005.07.03-------------------------------------------
+
+    /*
+     * 2007-01-24 removed the term-io functions by Anthony Lee,
+     * moved to "device/uart/skyeye_uart_stdio.c".
+     */
+
+//AJ2D--------------------------------------------------------------------------
+//teawater add for arm2x86 2005.07.05-------------------------------------------
+    //arm_arm A2-18
+    int abort_model;    //0 Base Restored Abort Model, 1 the Early Abort Model, 2 Base Updated Abort Model 
+//AJ2D--------------------------------------------------------------------------
+//teawater change for return if running tb dirty 2005.07.09---------------------
+    void *tb_now;
+//AJ2D--------------------------------------------------------------------------
+
+//teawater add for record reg value to ./reg.txt 2005.07.10---------------------
+    FILE *tea_reg_fd;
+//AJ2D--------------------------------------------------------------------------
+
+/*added by ksh in 2005-10-1*/
+    cpu_config_t *cpu;
+    //mem_config_t *mem_bank;
+
+/* added LPC remap function */
+    int vector_remap_flag;
+    u32 vector_remap_addr;
+    u32 vector_remap_size;
+
+    u32 step;
+    u32 cycle;
+    int stop_simulator;
+    conf_object_t *dyncom_cpu;
+//teawater add DBCT_TEST_SPEED 2005.10.04---------------------------------------
+#ifdef DBCT_TEST_SPEED
+    uint64_t    instr_count;
+#endif    //DBCT_TEST_SPEED
+//    FILE * state_log;
+//diff log
+//#if DIFF_STATE
+    FILE * state_log;
+//#endif
+    /* monitored memory for exclusice access */
+    ARMword exclusive_tag_array[128];
+    /* 1 means exclusive access and 0 means open access */
+    ARMword exclusive_access_state;
+
+    memory_space_intf space;    
+    u32 CurrInstr;
+    u32 last_pc; /* the last pc executed */
+    u32 last_instr; /* the last inst executed */
+    u32 WriteAddr[17];
+    u32 WriteData[17];
+    u32 WritePc[17];
+    u32 CurrWrite;
+};
+#define DIFF_WRITE 0
+
+typedef ARMul_State arm_core_t;
+#define ResetPin NresetSig
+#define FIQPin NfiqSig
+#define IRQPin NirqSig
+#define AbortPin abortSig
+#define TransPin NtransSig
+#define BigEndPin bigendSig
+#define Prog32Pin prog32Sig
+#define Data32Pin data32Sig
+#define LateAbortPin lateabtSig
+
+/***************************************************************************\
+*                        Types of ARM we know about                         *
+\***************************************************************************/
+
+/* The bitflags */
+#define ARM_Fix26_Prop   0x01
+#define ARM_Nexec_Prop   0x02
+#define ARM_Debug_Prop   0x10
+#define ARM_Isync_Prop   ARM_Debug_Prop
+#define ARM_Lock_Prop    0x20
+//chy 2003-08-11 
+#define ARM_v4_Prop      0x40
+#define ARM_v5_Prop      0x80
+/*jeff.du 2010-08-05 */
+#define ARM_v6_Prop      0xc0
+
+#define ARM_v5e_Prop     0x100
+#define ARM_XScale_Prop  0x200
+#define ARM_ep9312_Prop  0x400
+#define ARM_iWMMXt_Prop  0x800
+//chy 2005-09-19
+#define ARM_PXA27X_Prop  0x1000
+#define ARM_v7_Prop      0x2000
+
+/* ARM2 family */
+#define ARM2    (ARM_Fix26_Prop)
+#define ARM2as  ARM2
+#define ARM61   ARM2
+#define ARM3    ARM2
+
+#ifdef ARM60            /* previous definition in armopts.h */
+#undef ARM60
+#endif
+
+/* ARM6 family */
+#define ARM6    (ARM_Lock_Prop)
+#define ARM60   ARM6
+#define ARM600  ARM6
+#define ARM610  ARM6
+#define ARM620  ARM6
+
+
+/***************************************************************************\
+*                   Macros to extract instruction fields                    *
+\***************************************************************************/
+
+#define BIT(n) ( (ARMword)(instr>>(n))&1)    /* bit n of instruction */
+#define BITS(m,n) ( (ARMword)(instr<<(31-(n))) >> ((31-(n))+(m)) )    /* bits m to n of instr */
+#define TOPBITS(n) (instr >> (n))    /* bits 31 to n of instr */
+
+/***************************************************************************\
+*                      The hardware vector addresses                        *
+\***************************************************************************/
+
+#define ARMResetV 0L
+#define ARMUndefinedInstrV 4L
+#define ARMSWIV 8L
+#define ARMPrefetchAbortV 12L
+#define ARMDataAbortV 16L
+#define ARMAddrExceptnV 20L
+#define ARMIRQV 24L
+#define ARMFIQV 28L
+#define ARMErrorV 32L        /* This is an offset, not an address ! */
+
+#define ARMul_ResetV ARMResetV
+#define ARMul_UndefinedInstrV ARMUndefinedInstrV
+#define ARMul_SWIV ARMSWIV
+#define ARMul_PrefetchAbortV ARMPrefetchAbortV
+#define ARMul_DataAbortV ARMDataAbortV
+#define ARMul_AddrExceptnV ARMAddrExceptnV
+#define ARMul_IRQV ARMIRQV
+#define ARMul_FIQV ARMFIQV
+
+/***************************************************************************\
+*                          Mode and Bank Constants                          *
+\***************************************************************************/
+
+#define USER26MODE 0L
+#define FIQ26MODE 1L
+#define IRQ26MODE 2L
+#define SVC26MODE 3L
+#define USER32MODE 16L
+#define FIQ32MODE 17L
+#define IRQ32MODE 18L
+#define SVC32MODE 19L
+#define ABORT32MODE 23L
+#define UNDEF32MODE 27L
+//chy 2006-02-15 add system32 mode
+#define SYSTEM32MODE 31L
+
+#define ARM32BITMODE (state->Mode > 3)
+#define ARM26BITMODE (state->Mode <= 3)
+#define ARMMODE (state->Mode)
+#define ARMul_MODEBITS 0x1fL
+#define ARMul_MODE32BIT ARM32BITMODE
+#define ARMul_MODE26BIT ARM26BITMODE
+
+#define USERBANK 0
+#define FIQBANK 1
+#define IRQBANK 2
+#define SVCBANK 3
+#define ABORTBANK 4
+#define UNDEFBANK 5
+#define DUMMYBANK 6
+#define SYSTEMBANK USERBANK
+#define BANK_CAN_ACCESS_SPSR(bank)  \
+  ((bank) != USERBANK && (bank) != SYSTEMBANK && (bank) != DUMMYBANK)
+
+
+/***************************************************************************\
+*                  Definitons of things in the emulator                     *
+\***************************************************************************/
+#ifdef __cplusplus
+extern "C" {
+#endif
+extern void ARMul_EmulateInit (void);
+extern void ARMul_Reset (ARMul_State * state);
+#ifdef __cplusplus
+    }
+#endif
+extern ARMul_State *ARMul_NewState (ARMul_State * state);
+extern ARMword ARMul_DoProg (ARMul_State * state);
+extern ARMword ARMul_DoInstr (ARMul_State * state);
+/***************************************************************************\
+*                Definitons of things for event handling                    *
+\***************************************************************************/
+
+extern void ARMul_ScheduleEvent (ARMul_State * state, unsigned int delay,
+                 unsigned (*func) ());
+extern void ARMul_EnvokeEvent (ARMul_State * state);
+extern unsigned int ARMul_Time (ARMul_State * state);
+
+/***************************************************************************\
+*                          Useful support routines                          *
+\***************************************************************************/
+
+extern ARMword ARMul_GetReg (ARMul_State * state, unsigned mode,
+                 unsigned reg);
+extern void ARMul_SetReg (ARMul_State * state, unsigned mode, unsigned reg,
+              ARMword value);
+extern ARMword ARMul_GetPC (ARMul_State * state);
+extern ARMword ARMul_GetNextPC (ARMul_State * state);
+extern void ARMul_SetPC (ARMul_State * state, ARMword value);
+extern ARMword ARMul_GetR15 (ARMul_State * state);
+extern void ARMul_SetR15 (ARMul_State * state, ARMword value);
+
+extern ARMword ARMul_GetCPSR (ARMul_State * state);
+extern void ARMul_SetCPSR (ARMul_State * state, ARMword value);
+extern ARMword ARMul_GetSPSR (ARMul_State * state, ARMword mode);
+extern void ARMul_SetSPSR (ARMul_State * state, ARMword mode, ARMword value);
+
+/***************************************************************************\
+*                  Definitons of things to handle aborts                    *
+\***************************************************************************/
+
+extern void ARMul_Abort (ARMul_State * state, ARMword address);
+#ifdef MODET
+#define ARMul_ABORTWORD (state->TFlag ? 0xefffdfff : 0xefffffff)    /* SWI -1 */
+#define ARMul_PREFETCHABORT(address) if (state->AbortAddr == 1) \
+                                        state->AbortAddr = (address & (state->TFlag ? ~1L : ~3L))
+#else
+#define ARMul_ABORTWORD 0xefffffff    /* SWI -1 */
+#define ARMul_PREFETCHABORT(address) if (state->AbortAddr == 1) \
+                                        state->AbortAddr = (address & ~3L)
+#endif
+#define ARMul_DATAABORT(address) state->abortSig = HIGH ; \
+                                 state->Aborted = ARMul_DataAbortV ;
+#define ARMul_CLEARABORT state->abortSig = LOW
+
+/***************************************************************************\
+*              Definitons of things in the memory interface                 *
+\***************************************************************************/
+
+extern unsigned ARMul_MemoryInit (ARMul_State * state,
+                  unsigned int initmemsize);
+extern void ARMul_MemoryExit (ARMul_State * state);
+
+extern ARMword ARMul_LoadInstrS (ARMul_State * state, ARMword address,
+                 ARMword isize);
+extern ARMword ARMul_LoadInstrN (ARMul_State * state, ARMword address,
+                 ARMword isize);
+#ifdef __cplusplus
+extern "C" {
+#endif
+extern ARMword ARMul_ReLoadInstr (ARMul_State * state, ARMword address,
+                  ARMword isize);
+#ifdef __cplusplus
+    }
+#endif
+extern ARMword ARMul_LoadWordS (ARMul_State * state, ARMword address);
+extern ARMword ARMul_LoadWordN (ARMul_State * state, ARMword address);
+extern ARMword ARMul_LoadHalfWord (ARMul_State * state, ARMword address);
+extern ARMword ARMul_LoadByte (ARMul_State * state, ARMword address);
+
+extern void ARMul_StoreWordS (ARMul_State * state, ARMword address,
+                  ARMword data);
+extern void ARMul_StoreWordN (ARMul_State * state, ARMword address,
+                  ARMword data);
+extern void ARMul_StoreHalfWord (ARMul_State * state, ARMword address,
+                 ARMword data);
+extern void ARMul_StoreByte (ARMul_State * state, ARMword address,
+                 ARMword data);
+
+extern ARMword ARMul_SwapWord (ARMul_State * state, ARMword address,
+                   ARMword data);
+extern ARMword ARMul_SwapByte (ARMul_State * state, ARMword address,
+                   ARMword data);
+
+extern void ARMul_Icycles (ARMul_State * state, unsigned number,
+               ARMword address);
+extern void ARMul_Ccycles (ARMul_State * state, unsigned number,
+               ARMword address);
+
+extern ARMword ARMul_ReadWord (ARMul_State * state, ARMword address);
+extern ARMword ARMul_ReadByte (ARMul_State * state, ARMword address);
+extern void ARMul_WriteWord (ARMul_State * state, ARMword address,
+                 ARMword data);
+extern void ARMul_WriteByte (ARMul_State * state, ARMword address,
+                 ARMword data);
+
+extern ARMword ARMul_MemAccess (ARMul_State * state, ARMword, ARMword,
+                ARMword, ARMword, ARMword, ARMword, ARMword,
+                ARMword, ARMword, ARMword);
+
+/***************************************************************************\
+*            Definitons of things in the co-processor interface             *
+\***************************************************************************/
+
+#define ARMul_FIRST 0
+#define ARMul_TRANSFER 1
+#define ARMul_BUSY 2
+#define ARMul_DATA 3
+#define ARMul_INTERRUPT 4
+#define ARMul_DONE 0
+#define ARMul_CANT 1
+#define ARMul_INC 3
+
+#define ARMul_CP13_R0_FIQ       0x1
+#define ARMul_CP13_R0_IRQ       0x2
+#define ARMul_CP13_R8_PMUS      0x1
+
+#define ARMul_CP14_R0_ENABLE    0x0001
+#define ARMul_CP14_R0_CLKRST    0x0004
+#define ARMul_CP14_R0_CCD       0x0008
+#define ARMul_CP14_R0_INTEN0    0x0010
+#define ARMul_CP14_R0_INTEN1    0x0020
+#define ARMul_CP14_R0_INTEN2    0x0040
+#define ARMul_CP14_R0_FLAG0     0x0100
+#define ARMul_CP14_R0_FLAG1     0x0200
+#define ARMul_CP14_R0_FLAG2     0x0400
+#define ARMul_CP14_R10_MOE_IB   0x0004
+#define ARMul_CP14_R10_MOE_DB   0x0008
+#define ARMul_CP14_R10_MOE_BT   0x000c
+#define ARMul_CP15_R1_ENDIAN    0x0080
+#define ARMul_CP15_R1_ALIGN     0x0002
+#define ARMul_CP15_R5_X         0x0400
+#define ARMul_CP15_R5_ST_ALIGN  0x0001
+#define ARMul_CP15_R5_IMPRE     0x0406
+#define ARMul_CP15_R5_MMU_EXCPT 0x0400
+#define ARMul_CP15_DBCON_M      0x0100
+#define ARMul_CP15_DBCON_E1     0x000c
+#define ARMul_CP15_DBCON_E0     0x0003
+
+extern unsigned ARMul_CoProInit (ARMul_State * state);
+extern void ARMul_CoProExit (ARMul_State * state);
+extern void ARMul_CoProAttach (ARMul_State * state, unsigned number,
+                   ARMul_CPInits * init, ARMul_CPExits * exit,
+                   ARMul_LDCs * ldc, ARMul_STCs * stc,
+                   ARMul_MRCs * mrc, ARMul_MCRs * mcr,
+                   ARMul_MRRCs * mrrc, ARMul_MCRRs * mcrr,
+                   ARMul_CDPs * cdp,
+                   ARMul_CPReads * read, ARMul_CPWrites * write);
+extern void ARMul_CoProDetach (ARMul_State * state, unsigned number);
+
+/***************************************************************************\
+*               Definitons of things in the host environment                *
+\***************************************************************************/
+
+extern unsigned ARMul_OSInit (ARMul_State * state);
+extern void ARMul_OSExit (ARMul_State * state);
+
+#ifdef __cplusplus
+ extern "C" {
+#endif
+
+extern unsigned ARMul_OSHandleSWI (ARMul_State * state, ARMword number);
+#ifdef __cplusplus
+}
+#endif
+
+
+extern ARMword ARMul_OSLastErrorP (ARMul_State * state);
+
+extern ARMword ARMul_Debug (ARMul_State * state, ARMword pc, ARMword instr);
+extern unsigned ARMul_OSException (ARMul_State * state, ARMword vector,
+                   ARMword pc);
+extern int rdi_log;
+
+/***************************************************************************\
+*                            Host-dependent stuff                           *
+\***************************************************************************/
+
+#ifdef macintosh
+pascal void SpinCursor (short increment);    /* copied from CursorCtl.h */
+# define HOURGLASS           SpinCursor( 1 )
+# define HOURGLASS_RATE      1023    /* 2^n - 1 */
+#endif
+
+//teawater add for arm2x86 2005.02.14-------------------------------------------
+/*ywc 2005-03-31*/
+/*
+#include "arm2x86.h"
+#include "arm2x86_dp.h"
+#include "arm2x86_movl.h"
+#include "arm2x86_psr.h"
+#include "arm2x86_shift.h"
+#include "arm2x86_mem.h"
+#include "arm2x86_mul.h"
+#include "arm2x86_test.h"
+#include "arm2x86_other.h"
+#include "list.h"
+#include "tb.h"
+*/
+#define EQ 0
+#define NE 1
+#define CS 2
+#define CC 3
+#define MI 4
+#define PL 5
+#define VS 6
+#define VC 7
+#define HI 8
+#define LS 9
+#define GE 10
+#define LT 11
+#define GT 12
+#define LE 13
+#define AL 14
+#define NV 15
+
+#ifndef NFLAG
+#define NFLAG    state->NFlag
+#endif //NFLAG
+
+#ifndef ZFLAG
+#define ZFLAG    state->ZFlag
+#endif //ZFLAG
+
+#ifndef CFLAG
+#define CFLAG    state->CFlag
+#endif //CFLAG
+
+#ifndef VFLAG
+#define VFLAG    state->VFlag
+#endif //VFLAG
+
+#ifndef IFLAG
+#define IFLAG    (state->IFFlags >> 1)
+#endif //IFLAG
+
+#ifndef FFLAG
+#define FFLAG    (state->IFFlags & 1)
+#endif //FFLAG
+
+#ifndef IFFLAGS
+#define IFFLAGS    state->IFFlags
+#endif //VFLAG
+
+#define FLAG_MASK    0xf0000000
+#define NBIT_SHIFT    31
+#define ZBIT_SHIFT    30
+#define CBIT_SHIFT    29
+#define VBIT_SHIFT    28
+#ifdef DBCT
+//teawater change for local tb branch directly jump 2005.10.18------------------
+#include "dbct/list.h"
+#include "dbct/arm2x86.h"
+#include "dbct/arm2x86_dp.h"
+#include "dbct/arm2x86_movl.h"
+#include "dbct/arm2x86_psr.h"
+#include "dbct/arm2x86_shift.h"
+#include "dbct/arm2x86_mem.h"
+#include "dbct/arm2x86_mul.h"
+#include "dbct/arm2x86_test.h"
+#include "dbct/arm2x86_other.h"
+#include "dbct/arm2x86_coproc.h"
+#include "dbct/tb.h"
+#endif
+//AJ2D--------------------------------------------------------------------------
+//AJ2D--------------------------------------------------------------------------
+#define SKYEYE_OUTREGS(fd) { fprintf ((fd), "R %x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,C %x,S %x,%x,%x,%x,%x,%x,%x,M %x,B %x,E %x,I %x,P %x,T %x,L %x,D %x,",\
+                         state->Reg[0],state->Reg[1],state->Reg[2],state->Reg[3], \
+                         state->Reg[4],state->Reg[5],state->Reg[6],state->Reg[7], \
+                         state->Reg[8],state->Reg[9],state->Reg[10],state->Reg[11], \
+                         state->Reg[12],state->Reg[13],state->Reg[14],state->Reg[15], \
+             state->Cpsr,   state->Spsr[0], state->Spsr[1], state->Spsr[2],\
+                         state->Spsr[3],state->Spsr[4], state->Spsr[5], state->Spsr[6],\
+             state->Mode,state->Bank,state->ErrorCode,state->instr,state->pc,\
+             state->temp,state->loaded,state->decoded);}
+
+#define SKYEYE_OUTMOREREGS(fd) { fprintf ((fd),"\
+RUs %x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,\
+RF  %x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,\
+RI  %x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,\
+RS  %x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,\
+RA  %x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,\
+RUn %x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x\n",\
+                         state->RegBank[0][0],state->RegBank[0][1],state->RegBank[0][2],state->RegBank[0][3], \
+                         state->RegBank[0][4],state->RegBank[0][5],state->RegBank[0][6],state->RegBank[0][7], \
+                         state->RegBank[0][8],state->RegBank[0][9],state->RegBank[0][10],state->RegBank[0][11], \
+                         state->RegBank[0][12],state->RegBank[0][13],state->RegBank[0][14],state->RegBank[0][15], \
+                         state->RegBank[1][0],state->RegBank[1][1],state->RegBank[1][2],state->RegBank[1][3], \
+                         state->RegBank[1][4],state->RegBank[1][5],state->RegBank[1][6],state->RegBank[1][7], \
+                         state->RegBank[1][8],state->RegBank[1][9],state->RegBank[1][10],state->RegBank[1][11], \
+                         state->RegBank[1][12],state->RegBank[1][13],state->RegBank[1][14],state->RegBank[1][15], \
+                         state->RegBank[2][0],state->RegBank[2][1],state->RegBank[2][2],state->RegBank[2][3], \
+                         state->RegBank[2][4],state->RegBank[2][5],state->RegBank[2][6],state->RegBank[2][7], \
+                         state->RegBank[2][8],state->RegBank[2][9],state->RegBank[2][10],state->RegBank[2][11], \
+                         state->RegBank[2][12],state->RegBank[2][13],state->RegBank[2][14],state->RegBank[2][15], \
+                         state->RegBank[3][0],state->RegBank[3][1],state->RegBank[3][2],state->RegBank[3][3], \
+                         state->RegBank[3][4],state->RegBank[3][5],state->RegBank[3][6],state->RegBank[3][7], \
+                         state->RegBank[3][8],state->RegBank[3][9],state->RegBank[3][10],state->RegBank[3][11], \
+                         state->RegBank[3][12],state->RegBank[3][13],state->RegBank[3][14],state->RegBank[3][15], \
+                         state->RegBank[4][0],state->RegBank[4][1],state->RegBank[4][2],state->RegBank[4][3], \
+                         state->RegBank[4][4],state->RegBank[4][5],state->RegBank[4][6],state->RegBank[4][7], \
+                         state->RegBank[4][8],state->RegBank[4][9],state->RegBank[4][10],state->RegBank[4][11], \
+                         state->RegBank[4][12],state->RegBank[4][13],state->RegBank[4][14],state->RegBank[4][15], \
+                         state->RegBank[5][0],state->RegBank[5][1],state->RegBank[5][2],state->RegBank[5][3], \
+                         state->RegBank[5][4],state->RegBank[5][5],state->RegBank[5][6],state->RegBank[5][7], \
+                         state->RegBank[5][8],state->RegBank[5][9],state->RegBank[5][10],state->RegBank[5][11], \
+                         state->RegBank[5][12],state->RegBank[5][13],state->RegBank[5][14],state->RegBank[5][15] \
+        );}
+
+
+#define SA1110        0x6901b110
+#define SA1100        0x4401a100
+#define PXA250          0x69052100
+#define PXA270           0x69054110
+//#define PXA250              0x69052903
+// 0x69052903;  //PXA250 B1 from intel 278522-001.pdf
+
+
+extern void ARMul_UndefInstr (ARMul_State *, ARMword);
+extern void ARMul_FixCPSR (ARMul_State *, ARMword, ARMword);
+extern void ARMul_FixSPSR (ARMul_State *, ARMword, ARMword);
+extern void ARMul_ConsolePrint (ARMul_State *, const char *, ...);
+extern void ARMul_SelectProcessor (ARMul_State *, unsigned);
+
+#define DIFF_LOG 0
+#define SAVE_LOG 0
+
+#endif /* _ARMDEFS_H_ */
diff --git a/src/core/arm/skyeye_common/armemu.h b/src/core/arm/skyeye_common/armemu.h
new file mode 100644
index 000000000..c0f0270fe
--- /dev/null
+++ b/src/core/arm/skyeye_common/armemu.h
@@ -0,0 +1,656 @@
+/*  armemu.h -- ARMulator emulation macros:  ARM6 Instruction Emulator.
+    Copyright (C) 1994 Advanced RISC Machines Ltd.
+ 
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+ 
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+ 
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+#ifndef __ARMEMU_H__
+#define __ARMEMU_H__
+
+
+#include "core/arm/skyeye_common/armdefs.h"
+//#include "skyeye.h"
+
+//extern ARMword isize;
+
+#define DEBUG(...) DEBUG_LOG(ARM11, __VA_ARGS__)
+
+/* Condition code values.  */
+#define EQ 0
+#define NE 1
+#define CS 2
+#define CC 3
+#define MI 4
+#define PL 5
+#define VS 6
+#define VC 7
+#define HI 8
+#define LS 9
+#define GE 10
+#define LT 11
+#define GT 12
+#define LE 13
+#define AL 14
+#define NV 15
+
+/* Shift Opcodes.  */
+#define LSL 0
+#define LSR 1
+#define ASR 2
+#define ROR 3
+
+/* Macros to twiddle the status flags and mode.  */
+#define NBIT ((unsigned)1L << 31)
+#define ZBIT (1L << 30)
+#define CBIT (1L << 29)
+#define VBIT (1L << 28)
+#define SBIT (1L << 27)
+#define IBIT (1L << 7)
+#define FBIT (1L << 6)
+#define IFBITS (3L << 6)
+#define R15IBIT (1L << 27)
+#define R15FBIT (1L << 26)
+#define R15IFBITS (3L << 26)
+
+#define POS(i) ( (~(i)) >> 31 )
+#define NEG(i) ( (i) >> 31 )
+
+#ifdef MODET			/* Thumb support.  */
+/* ??? This bit is actually in the low order bit of the PC in the hardware.
+   It isn't clear if the simulator needs to model that or not.  */
+#define TBIT (1L << 5)
+#define TFLAG state->TFlag
+#define SETT state->TFlag = 1
+#define CLEART state->TFlag = 0
+#define ASSIGNT(res) state->TFlag = res
+#define INSN_SIZE (TFLAG ? 2 : 4)
+#else
+#define INSN_SIZE 4
+#endif
+
+/*add armv6 CPSR  feature*/
+#define EFLAG state->EFlag
+#define SETE state->EFlag = 1
+#define CLEARE state->EFlag = 0
+#define ASSIGNE(res) state->NFlag = res
+
+#define AFLAG state->AFlag
+#define SETA  state->AFlag = 1
+#define CLEARA state->AFlag = 0
+#define ASSIGNA(res) state->NFlag = res
+
+#define QFLAG state->QFlag
+#define SETQ state->QFlag = 1
+#define CLEARQ state->AFlag = 0
+#define ASSIGNQ(res) state->QFlag = res
+
+/* add end */
+
+#define NFLAG state->NFlag
+#define SETN state->NFlag = 1
+#define CLEARN state->NFlag = 0
+#define ASSIGNN(res) state->NFlag = res
+
+#define ZFLAG state->ZFlag
+#define SETZ state->ZFlag = 1
+#define CLEARZ state->ZFlag = 0
+#define ASSIGNZ(res) state->ZFlag = res
+
+#define CFLAG state->CFlag
+#define SETC state->CFlag = 1
+#define CLEARC state->CFlag = 0
+#define ASSIGNC(res) state->CFlag = res
+
+#define VFLAG state->VFlag
+#define SETV state->VFlag = 1
+#define CLEARV state->VFlag = 0
+#define ASSIGNV(res) state->VFlag = res
+
+#define SFLAG state->SFlag
+#define SETS state->SFlag = 1
+#define CLEARS state->SFlag = 0
+#define ASSIGNS(res) state->SFlag = res
+
+#define IFLAG (state->IFFlags >> 1)
+#define FFLAG (state->IFFlags & 1)
+#define IFFLAGS state->IFFlags
+#define ASSIGNINT(res) state->IFFlags = (((res) >> 6) & 3)
+#define ASSIGNR15INT(res) state->IFFlags = (((res) >> 26) & 3) ;
+
+#define PSR_FBITS (0xff000000L)
+#define PSR_SBITS (0x00ff0000L)
+#define PSR_XBITS (0x0000ff00L)
+#define PSR_CBITS (0x000000ffL)
+
+#if defined MODE32 || defined MODET
+#define CCBITS (0xf8000000L)
+#else
+#define CCBITS (0xf0000000L)
+#endif
+
+#define INTBITS (0xc0L)
+
+#if defined MODET && defined MODE32
+#define PCBITS (0xffffffffL)
+#else
+#define PCBITS (0xfffffffcL)
+#endif
+
+#define MODEBITS (0x1fL)
+#define R15INTBITS (3L << 26)
+
+#if defined MODET && defined MODE32
+#define R15PCBITS (0x03ffffffL)
+#else
+#define R15PCBITS (0x03fffffcL)
+#endif
+
+#define R15PCMODEBITS (0x03ffffffL)
+#define R15MODEBITS (0x3L)
+
+#ifdef MODE32
+#define PCMASK PCBITS
+#define PCWRAP(pc) (pc)
+#else
+#define PCMASK R15PCBITS
+#define PCWRAP(pc) ((pc) & R15PCBITS)
+#endif
+
+#define PC (state->Reg[15] & PCMASK)
+#define R15CCINTMODE (state->Reg[15] & (CCBITS | R15INTBITS | R15MODEBITS))
+#define R15INT (state->Reg[15] & R15INTBITS)
+#define R15INTPC (state->Reg[15] & (R15INTBITS | R15PCBITS))
+#define R15INTPCMODE (state->Reg[15] & (R15INTBITS | R15PCBITS | R15MODEBITS))
+#define R15INTMODE (state->Reg[15] & (R15INTBITS | R15MODEBITS))
+#define R15PC (state->Reg[15] & R15PCBITS)
+#define R15PCMODE (state->Reg[15] & (R15PCBITS | R15MODEBITS))
+#define R15MODE (state->Reg[15] & R15MODEBITS)
+
+#define ECC ((NFLAG << 31) | (ZFLAG << 30) | (CFLAG << 29) | (VFLAG << 28) | (SFLAG << 27))
+#define EINT (IFFLAGS << 6)
+#define ER15INT (IFFLAGS << 26)
+#define EMODE (state->Mode)
+
+#ifdef MODET
+#define CPSR (ECC | EINT | EMODE | (TFLAG << 5))
+#else
+#define CPSR (ECC | EINT | EMODE)
+#endif
+
+#ifdef MODE32
+#define PATCHR15
+#else
+#define PATCHR15 state->Reg[15] = ECC | ER15INT | EMODE | R15PC
+#endif
+
+#define GETSPSR(bank) (ARMul_GetSPSR (state, EMODE))
+#define SETPSR_F(d,s) d = ((d) & ~PSR_FBITS) | ((s) & PSR_FBITS)
+#define SETPSR_S(d,s) d = ((d) & ~PSR_SBITS) | ((s) & PSR_SBITS)
+#define SETPSR_X(d,s) d = ((d) & ~PSR_XBITS) | ((s) & PSR_XBITS)
+#define SETPSR_C(d,s) d = ((d) & ~PSR_CBITS) | ((s) & PSR_CBITS)
+
+#define SETR15PSR(s) 								\
+  do										\
+    {										\
+      if (state->Mode == USER26MODE)						\
+        {									\
+          state->Reg[15] = ((s) & CCBITS) | R15PC | ER15INT | EMODE;		\
+          ASSIGNN ((state->Reg[15] & NBIT) != 0);				\
+          ASSIGNZ ((state->Reg[15] & ZBIT) != 0);				\
+          ASSIGNC ((state->Reg[15] & CBIT) != 0);				\
+          ASSIGNV ((state->Reg[15] & VBIT) != 0);				\
+        }									\
+      else									\
+        {									\
+          state->Reg[15] = R15PC | ((s) & (CCBITS | R15INTBITS | R15MODEBITS));	\
+          ARMul_R15Altered (state);						\
+       }									\
+    }										\
+  while (0)
+
+#define SETABORT(i, m, d)						\
+  do									\
+    { 									\
+      int SETABORT_mode = (m);						\
+									\
+      ARMul_SetSPSR (state, SETABORT_mode, ARMul_GetCPSR (state));	\
+      ARMul_SetCPSR (state, ((ARMul_GetCPSR (state) & ~(EMODE | TBIT))	\
+			     | (i) | SETABORT_mode));			\
+      state->Reg[14] = temp - (d);					\
+    }									\
+  while (0)
+
+#ifndef MODE32
+#define VECTORS 0x20
+#define LEGALADDR 0x03ffffff
+#define VECTORACCESS(address) (address < VECTORS && ARMul_MODE26BIT && state->prog32Sig)
+#define ADDREXCEPT(address)   (address > LEGALADDR && !state->data32Sig)
+#endif
+
+#define INTERNALABORT(address)			\
+  do						\
+    {						\
+      if (address < VECTORS)			\
+	state->Aborted = ARMul_DataAbortV;	\
+      else					\
+	state->Aborted = ARMul_AddrExceptnV;	\
+    }						\
+  while (0)
+
+#ifdef MODE32
+#define TAKEABORT ARMul_Abort (state, ARMul_DataAbortV)
+#else
+#define TAKEABORT 					\
+  do							\
+    {							\
+      if (state->Aborted == ARMul_AddrExceptnV) 	\
+	ARMul_Abort (state, ARMul_AddrExceptnV); 	\
+      else 						\
+	ARMul_Abort (state, ARMul_DataAbortV);		\
+    }							\
+  while (0)
+#endif
+
+#define CPTAKEABORT					\
+  do							\
+    {							\
+      if (!state->Aborted)				\
+	ARMul_Abort (state, ARMul_UndefinedInstrV); 	\
+      else if (state->Aborted == ARMul_AddrExceptnV) 	\
+	ARMul_Abort (state, ARMul_AddrExceptnV); 	\
+      else 						\
+	ARMul_Abort (state, ARMul_DataAbortV);		\
+    }							\
+  while (0);
+
+
+/* Different ways to start the next instruction.  */
+#define SEQ           0
+#define NONSEQ        1
+#define PCINCEDSEQ    2
+#define PCINCEDNONSEQ 3
+#define PRIMEPIPE     4
+#define RESUME        8
+
+/************************************/
+/* shenoubang 2012-3-11 */
+/* for armv7 DBG DMB DSB instr*/
+/************************************/
+#define MBReqTypes_Writes	0
+#define MBReqTypes_All		1
+
+#define NORMALCYCLE state->NextInstr = 0
+#define BUSUSEDN    state->NextInstr |= 1	/* The next fetch will be an N cycle.  */
+#define BUSUSEDINCPCS						\
+  do								\
+    {								\
+      if (! state->is_v4)					\
+        {							\
+	  /* A standard PC inc and an S cycle.  */		\
+      state->Reg[15] += INSN_SIZE;				\
+	  state->NextInstr = (state->NextInstr & 0xff) | 2;	\
+	}							\
+    }								\
+  while (0)
+
+#define BUSUSEDINCPCN					\
+  do							\
+    {							\
+      if (state->is_v4)					\
+	BUSUSEDN;					\
+      else						\
+	{						\
+	  /* A standard PC inc and an N cycle.  */	\
+      state->Reg[15] += INSN_SIZE;			\
+	  state->NextInstr |= 3;			\
+	}						\
+    }							\
+  while (0)
+
+#define INCPC 			\
+  do				\
+    {				\
+      /* A standard PC inc.  */	\
+      state->Reg[15] += INSN_SIZE;	\
+      state->NextInstr |= 2;	\
+    }				\
+  while (0)
+
+#define FLUSHPIPE state->NextInstr |= PRIMEPIPE
+
+/* Cycle based emulation.  */
+
+#define OUTPUTCP(i,a,b)
+#define NCYCLE
+#define SCYCLE
+#define ICYCLE
+#define CCYCLE
+#define NEXTCYCLE(c)
+
+/* Macros to extract parts of instructions.  */
+#define DESTReg (BITS (12, 15))
+#define LHSReg  (BITS (16, 19))
+#define RHSReg  (BITS ( 0,  3))
+
+#define DEST (state->Reg[DESTReg])
+
+#ifdef MODE32
+#ifdef MODET
+#define LHS ((LHSReg == 15) ? (state->Reg[15] & 0xFFFFFFFC) : (state->Reg[LHSReg]))
+#define RHS ((RHSReg == 15) ? (state->Reg[15] & 0xFFFFFFFC) : (state->Reg[RHSReg]))
+#else
+#define LHS (state->Reg[LHSReg])
+#define RHS (state->Reg[RHSReg])
+#endif
+#else
+#define LHS ((LHSReg == 15) ? R15PC : (state->Reg[LHSReg]))
+#define RHS ((RHSReg == 15) ? R15PC : (state->Reg[RHSReg]))
+#endif
+
+#define MULDESTReg (BITS (16, 19))
+#define MULLHSReg  (BITS ( 0,  3))
+#define MULRHSReg  (BITS ( 8, 11))
+#define MULACCReg  (BITS (12, 15))
+
+#define DPImmRHS (ARMul_ImmedTable[BITS(0, 11)])
+#define DPSImmRHS temp = BITS(0,11) ; \
+                  rhs = ARMul_ImmedTable[temp] ; \
+                  if (temp > 255) /* There was a shift.  */ \
+                     ASSIGNC (rhs >> 31) ;
+
+#ifdef MODE32
+#define DPRegRHS  ((BITS (4,11) == 0) ? state->Reg[RHSReg] \
+                                      : GetDPRegRHS (state, instr))
+#define DPSRegRHS ((BITS (4,11) == 0) ? state->Reg[RHSReg] \
+                                      : GetDPSRegRHS (state, instr))
+#else
+#define DPRegRHS  ((BITS (0, 11) < 15) ? state->Reg[RHSReg] \
+                                       : GetDPRegRHS (state, instr))
+#define DPSRegRHS ((BITS (0, 11) < 15) ? state->Reg[RHSReg] \
+                                       : GetDPSRegRHS (state, instr))
+#endif
+
+#define LSBase state->Reg[LHSReg]
+#define LSImmRHS (BITS(0,11))
+
+#ifdef MODE32
+#define LSRegRHS ((BITS (4, 11) == 0) ? state->Reg[RHSReg] \
+                                      : GetLSRegRHS (state, instr))
+#else
+#define LSRegRHS ((BITS (0, 11) < 15) ? state->Reg[RHSReg] \
+                                      : GetLSRegRHS (state, instr))
+#endif
+
+#define LSMNumRegs ((ARMword) ARMul_BitList[BITS (0, 7)] + \
+                    (ARMword) ARMul_BitList[BITS (8, 15)] )
+#define LSMBaseFirst ((LHSReg == 0 && BIT (0)) || \
+                      (BIT (LHSReg) && BITS (0, LHSReg - 1) == 0))
+
+#define SWAPSRC (state->Reg[RHSReg])
+
+#define LSCOff (BITS (0, 7) << 2)
+#define CPNum   BITS (8, 11)
+
+/* Determine if access to coprocessor CP is permitted.
+   The XScale has a register in CP15 which controls access to CP0 - CP13.  */
+//chy 2003-09-03, new CP_ACCESS_ALLOWED
+/*
+#define CP_ACCESS_ALLOWED(STATE, CP)			\
+    (   ((CP) >= 14)					\
+     || (! (STATE)->is_XScale)				\
+     || (read_cp15_reg (15, 0, 1) & (1 << (CP))))
+*/
+#define CP_ACCESS_ALLOWED(STATE, CP)			\
+    (   ((CP) >= 14)	)				\
+
+/* Macro to rotate n right by b bits.  */
+#define ROTATER(n, b) (((n) >> (b)) | ((n) << (32 - (b))))
+
+/* Macros to store results of instructions.  */
+#define WRITEDEST(d)				\
+  do						\
+    {						\
+      if (DESTReg == 15) 			\
+	WriteR15 (state, d); 			\
+      else 					\
+	DEST = d;				\
+    }						\
+  while (0)
+
+#define WRITESDEST(d)				\
+  do						\
+    {						\
+      if (DESTReg == 15)			\
+	WriteSR15 (state, d);			\
+      else					\
+	{					\
+	  DEST = d;				\
+	  ARMul_NegZero (state, d);		\
+	}					\
+    }						\
+  while (0)
+
+#define WRITEDESTB(d)				\
+  do						\
+    {						\
+      if (DESTReg == 15){			\
+	WriteR15Branch (state, d);		\
+      }						\
+      else{					\
+	DEST = d;				\
+      }						\
+    }						\
+  while (0)
+
+#define BYTETOBUS(data) ((data & 0xff) | \
+                        ((data & 0xff) << 8) | \
+                        ((data & 0xff) << 16) | \
+                        ((data & 0xff) << 24))
+
+#define BUSTOBYTE(address, data)				\
+  do								\
+    {								\
+      if (state->bigendSig) 					\
+	temp = (data >> (((address ^ 3) & 3) << 3)) & 0xff;	\
+      else							\
+	temp = (data >> ((address & 3) << 3)) & 0xff;		\
+    }								\
+  while (0)
+
+#define LOADMULT(instr,   address, wb)  LoadMult   (state, instr, address, wb)
+#define LOADSMULT(instr,  address, wb)  LoadSMult  (state, instr, address, wb)
+#define STOREMULT(instr,  address, wb)  StoreMult  (state, instr, address, wb)
+#define STORESMULT(instr, address, wb)  StoreSMult (state, instr, address, wb)
+
+#define POSBRANCH ((instr & 0x7fffff) << 2)
+#define NEGBRANCH ((0xff000000 |(instr & 0xffffff)) << 2)
+
+
+/* Values for Emulate.  */
+#define STOP            0	/* stop */
+#define CHANGEMODE      1	/* change mode */
+#define ONCE            2	/* execute just one interation */
+#define RUN             3	/* continuous execution */
+
+/* Stuff that is shared across modes.  */
+extern unsigned ARMul_MultTable[];	/* Number of I cycles for a mult.  */
+extern ARMword ARMul_ImmedTable[];	/* Immediate DP LHS values.  */
+extern char ARMul_BitList[];	/* Number of bits in a byte table.  */
+
+#define EVENTLISTSIZE 1024L
+
+/* Thumb support.  */
+typedef enum
+{
+	t_undefined,		/* Undefined Thumb instruction.  */
+	t_decoded,		/* Instruction decoded to ARM equivalent.  */
+	t_branch		/* Thumb branch (already processed).  */
+}
+tdstate;
+
+/*********************************************************************************
+ * Check all the possible undef or unpredict behavior, Some of them probably is
+ * out-of-updated with the newer ISA.
+ * -- Michael.Kang
+ ********************************************************************************/
+#define UNDEF_WARNING WARN_LOG(ARM11, "undefined or unpredicted behavior for arm instruction.\n");
+
+/* Macros to scrutinize instructions.  */
+#define UNDEF_Test UNDEF_WARNING
+//#define UNDEF_Test
+
+//#define UNDEF_Shift UNDEF_WARNING
+#define UNDEF_Shift
+
+//#define UNDEF_MSRPC UNDEF_WARNING
+#define UNDEF_MSRPC
+
+//#define UNDEF_MRSPC UNDEF_WARNING
+#define UNDEF_MRSPC
+
+#define UNDEF_MULPCDest UNDEF_WARNING
+//#define UNDEF_MULPCDest
+
+#define UNDEF_MULDestEQOp1 UNDEF_WARNING
+//#define UNDEF_MULDestEQOp1
+
+//#define UNDEF_LSRBPC UNDEF_WARNING
+#define UNDEF_LSRBPC
+
+//#define UNDEF_LSRBaseEQOffWb UNDEF_WARNING
+#define UNDEF_LSRBaseEQOffWb
+
+//#define UNDEF_LSRBaseEQDestWb UNDEF_WARNING
+#define UNDEF_LSRBaseEQDestWb
+
+//#define UNDEF_LSRPCBaseWb UNDEF_WARNING
+#define UNDEF_LSRPCBaseWb
+
+//#define UNDEF_LSRPCOffWb UNDEF_WARNING
+#define UNDEF_LSRPCOffWb
+
+//#define UNDEF_LSMNoRegs UNDEF_WARNING
+#define UNDEF_LSMNoRegs
+
+//#define UNDEF_LSMPCBase UNDEF_WARNING
+#define UNDEF_LSMPCBase
+
+//#define UNDEF_LSMUserBankWb UNDEF_WARNING
+#define UNDEF_LSMUserBankWb
+
+//#define UNDEF_LSMBaseInListWb UNDEF_WARNING
+#define UNDEF_LSMBaseInListWb
+
+#define UNDEF_SWPPC UNDEF_WARNING
+//#define UNDEF_SWPPC
+
+#define UNDEF_CoProHS UNDEF_WARNING
+//#define UNDEF_CoProHS
+
+#define UNDEF_MCRPC UNDEF_WARNING
+//#define UNDEF_MCRPC
+
+//#define UNDEF_LSCPCBaseWb UNDEF_WARNING
+#define UNDEF_LSCPCBaseWb
+
+#define UNDEF_UndefNotBounced UNDEF_WARNING
+//#define UNDEF_UndefNotBounced
+
+#define UNDEF_ShortInt UNDEF_WARNING
+//#define UNDEF_ShortInt
+
+#define UNDEF_IllegalMode UNDEF_WARNING
+//#define UNDEF_IllegalMode
+
+#define UNDEF_Prog32SigChange UNDEF_WARNING
+//#define UNDEF_Prog32SigChange
+
+#define UNDEF_Data32SigChange UNDEF_WARNING
+//#define UNDEF_Data32SigChange
+
+/* Prototypes for exported functions.  */
+extern unsigned ARMul_NthReg (ARMword, unsigned);
+extern int AddOverflow (ARMword, ARMword, ARMword);
+extern int SubOverflow (ARMword, ARMword, ARMword);
+/* Prototypes for exported functions.  */
+#ifdef __cplusplus
+ extern "C" {
+#endif
+extern ARMword ARMul_Emulate26 (ARMul_State *);
+extern ARMword ARMul_Emulate32 (ARMul_State *);
+#ifdef __cplusplus
+ }
+#endif
+extern unsigned IntPending (ARMul_State *);
+extern void ARMul_CPSRAltered (ARMul_State *);
+extern void ARMul_R15Altered (ARMul_State *);
+extern ARMword ARMul_GetPC (ARMul_State *);
+extern ARMword ARMul_GetNextPC (ARMul_State *);
+extern ARMword ARMul_GetR15 (ARMul_State *);
+extern ARMword ARMul_GetCPSR (ARMul_State *);
+extern void ARMul_EnvokeEvent (ARMul_State *);
+extern unsigned int ARMul_Time (ARMul_State *);
+extern void ARMul_NegZero (ARMul_State *, ARMword);
+extern void ARMul_SetPC (ARMul_State *, ARMword);
+extern void ARMul_SetR15 (ARMul_State *, ARMword);
+extern void ARMul_SetCPSR (ARMul_State *, ARMword);
+extern ARMword ARMul_GetSPSR (ARMul_State *, ARMword);
+extern void ARMul_Abort26 (ARMul_State *, ARMword);
+extern void ARMul_Abort32 (ARMul_State *, ARMword);
+extern ARMword ARMul_MRC (ARMul_State *, ARMword);
+extern void ARMul_MRRC (ARMul_State *, ARMword, ARMword *, ARMword *);
+extern void ARMul_CDP (ARMul_State *, ARMword);
+extern void ARMul_LDC (ARMul_State *, ARMword, ARMword);
+extern void ARMul_STC (ARMul_State *, ARMword, ARMword);
+extern void ARMul_MCR (ARMul_State *, ARMword, ARMword);
+extern void ARMul_MCRR (ARMul_State *, ARMword, ARMword, ARMword);
+extern void ARMul_SetSPSR (ARMul_State *, ARMword, ARMword);
+extern ARMword ARMul_SwitchMode (ARMul_State *, ARMword, ARMword);
+extern ARMword ARMul_Align (ARMul_State *, ARMword, ARMword);
+extern ARMword ARMul_SwitchMode (ARMul_State *, ARMword, ARMword);
+extern void ARMul_MSRCpsr (ARMul_State *, ARMword, ARMword);
+extern void ARMul_SubOverflow (ARMul_State *, ARMword, ARMword, ARMword);
+extern void ARMul_AddOverflow (ARMul_State *, ARMword, ARMword, ARMword);
+extern void ARMul_SubCarry (ARMul_State *, ARMword, ARMword, ARMword);
+extern void ARMul_AddCarry (ARMul_State *, ARMword, ARMword, ARMword);
+extern tdstate ARMul_ThumbDecode (ARMul_State *, ARMword, ARMword, ARMword *);
+extern ARMword ARMul_GetReg (ARMul_State *, unsigned, unsigned);
+extern void ARMul_SetReg (ARMul_State *, unsigned, unsigned, ARMword);
+extern void ARMul_ScheduleEvent (ARMul_State *, unsigned int,
+				 unsigned (*)(ARMul_State *));
+/* Coprocessor support functions.  */
+extern unsigned ARMul_CoProInit (ARMul_State *);
+extern void ARMul_CoProExit (ARMul_State *);
+extern void ARMul_CoProAttach (ARMul_State *, unsigned, ARMul_CPInits *,
+			       ARMul_CPExits *, ARMul_LDCs *, ARMul_STCs *,
+			       ARMul_MRCs *, ARMul_MCRs *, ARMul_MRRCs *, ARMul_MCRRs *, 
+			       ARMul_CDPs *, ARMul_CPReads *, ARMul_CPWrites *);
+extern void ARMul_CoProDetach (ARMul_State *, unsigned);
+extern ARMword read_cp15_reg (unsigned, unsigned, unsigned);
+
+extern unsigned DSPLDC4 (ARMul_State *, unsigned, ARMword, ARMword);
+extern unsigned DSPMCR4 (ARMul_State *, unsigned, ARMword, ARMword);
+extern unsigned DSPMRC4 (ARMul_State *, unsigned, ARMword, ARMword *);
+extern unsigned DSPSTC4 (ARMul_State *, unsigned, ARMword, ARMword *);
+extern unsigned DSPCDP4 (ARMul_State *, unsigned, ARMword);
+extern unsigned DSPMCR5 (ARMul_State *, unsigned, ARMword, ARMword);
+extern unsigned DSPMRC5 (ARMul_State *, unsigned, ARMword, ARMword *);
+extern unsigned DSPLDC5 (ARMul_State *, unsigned, ARMword, ARMword);
+extern unsigned DSPSTC5 (ARMul_State *, unsigned, ARMword, ARMword *);
+extern unsigned DSPCDP5 (ARMul_State *, unsigned, ARMword);
+extern unsigned DSPMCR6 (ARMul_State *, unsigned, ARMword, ARMword);
+extern unsigned DSPMRC6 (ARMul_State *, unsigned, ARMword, ARMword *);
+extern unsigned DSPCDP6 (ARMul_State *, unsigned, ARMword);
+
+
+#endif
diff --git a/src/core/arm/skyeye_common/armmmu.h b/src/core/arm/skyeye_common/armmmu.h
new file mode 100644
index 000000000..818108c9c
--- /dev/null
+++ b/src/core/arm/skyeye_common/armmmu.h
@@ -0,0 +1,254 @@
+/*
+    armmmu.c - Memory Management Unit emulation.
+    ARMulator extensions for the ARM7100 family.
+    Copyright (C) 1999  Ben Williamson
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#ifndef _ARMMMU_H_
+#define _ARMMMU_H_
+
+
+#define WORD_SHT			2
+#define WORD_SIZE			(1<<WORD_SHT)
+/* The MMU is accessible with MCR and MRC operations to copro 15: */
+
+#define MMU_COPRO			(15)
+
+/* Register numbers in the MMU: */
+
+typedef enum mmu_regnum_t
+{
+	MMU_ID = 0,
+	MMU_CONTROL = 1,
+	MMU_TRANSLATION_TABLE_BASE = 2,
+	MMU_DOMAIN_ACCESS_CONTROL = 3,
+	MMU_FAULT_STATUS = 5,
+	MMU_FAULT_ADDRESS = 6,
+	MMU_CACHE_OPS = 7,
+	MMU_TLB_OPS = 8,
+	MMU_CACHE_LOCKDOWN = 9,
+	MMU_TLB_LOCKDOWN = 10,
+	MMU_PID = 13,
+
+	/*MMU_V4 */
+	MMU_V4_CACHE_OPS = 7,
+	MMU_V4_TLB_OPS = 8,
+
+	/*MMU_V3 */
+	MMU_V3_FLUSH_TLB = 5,
+	MMU_V3_FLUSH_TLB_ENTRY = 6,
+	MMU_V3_FLUSH_CACHE = 7,
+
+	/*MMU Intel SA-1100 */
+	MMU_SA_RB_OPS = 9,
+	MMU_SA_DEBUG = 14,
+	MMU_SA_CP15_R15 = 15,
+	//chy 2003-08-24
+	/*Intel xscale CP15 */
+	XSCALE_CP15_CACHE_TYPE = 0,
+	XSCALE_CP15_AUX_CONTROL = 1,
+	XSCALE_CP15_COPRO_ACCESS = 15,
+
+} mmu_regnum_t;
+
+/* Bits in the control register */
+
+#define CONTROL_MMU			(1<<0)
+#define CONTROL_ALIGN_FAULT		(1<<1)
+#define CONTROL_CACHE			(1<<2)
+#define CONTROL_DATA_CACHE		(1<<2)
+#define CONTROL_WRITE_BUFFER		(1<<3)
+#define CONTROL_BIG_ENDIAN		(1<<7)
+#define CONTROL_SYSTEM			(1<<8)
+#define CONTROL_ROM			(1<<9)
+#define CONTROL_UNDEFINED               (1<<10)
+#define CONTROL_BRANCH_PREDICT          (1<<11)
+#define CONTROL_INSTRUCTION_CACHE       (1<<12)
+#define CONTROL_VECTOR                  (1<<13)
+#define CONTROL_RR                      (1<<14)
+#define CONTROL_L4                      (1<<15)
+#define CONTROL_XP                      (1<<23)
+#define CONTROL_EE                      (1<<25)
+
+/*Macro defines for MMU state*/
+#define MMU_CTL (state->mmu.control)
+#define MMU_Enabled (state->mmu.control & CONTROL_MMU)
+#define MMU_Disabled (!(MMU_Enabled))
+#define MMU_Aligned (state->mmu.control & CONTROL_ALIGN_FAULT)
+
+#define MMU_ICacheEnabled (MMU_CTL & CONTROL_INSTRUCTION_CACHE)
+#define MMU_ICacheDisabled (!(MMU_ICacheDisabled))
+
+#define MMU_DCacheEnabled (MMU_CTL & CONTROL_DATA_CACHE)
+#define MMU_DCacheDisabled (!(MMU_DCacheEnabled))
+
+#define MMU_CacheEnabled (MMU_CTL & CONTROL_CACHE)
+#define MMU_CacheDisabled (!(MMU_CacheEnabled))
+
+#define MMU_WBEnabled (MMU_CTL & CONTROL_WRITE_BUFFER)
+#define MMU_WBDisabled (!(MMU_WBEnabled))
+
+/*virt_addr exchange according to CP15.R13(process id virtul mapping)*/
+#define PID_VA_MAP_MASK	0xfe000000
+//#define mmu_pid_va_map(va) ({\
+//	ARMword ret; \
+//	if ((va) & PID_VA_MAP_MASK)\
+//		ret = (va); \
+//	else \
+//		ret = ((va) | (state->mmu.process_id & PID_VA_MAP_MASK));\
+//	ret;\
+//})
+#define mmu_pid_va_map(va) ((va) & PID_VA_MAP_MASK) ? (va) : ((va) | (state->mmu.process_id & PID_VA_MAP_MASK))
+
+/* FS[3:0] in the fault status register: */
+
+typedef enum fault_t
+{
+	NO_FAULT = 0x0,
+	ALIGNMENT_FAULT = 0x1,
+
+	SECTION_TRANSLATION_FAULT = 0x5,
+	PAGE_TRANSLATION_FAULT = 0x7,
+	SECTION_DOMAIN_FAULT = 0x9,
+	PAGE_DOMAIN_FAULT = 0xB,
+	SECTION_PERMISSION_FAULT = 0xD,
+	SUBPAGE_PERMISSION_FAULT = 0xF,
+
+	/* defined by skyeye */
+	TLB_READ_MISS = 0x30,
+	TLB_WRITE_MISS = 0x40,
+
+} fault_t;
+
+typedef struct mmu_ops_s
+{
+	/*initilization */
+	int (*init) (ARMul_State * state);
+	/*free on exit */
+	void (*exit) (ARMul_State * state);
+	/*read byte data */
+	  fault_t (*read_byte) (ARMul_State * state, ARMword va,
+				ARMword * data);
+	/*write byte data */
+	  fault_t (*write_byte) (ARMul_State * state, ARMword va,
+				 ARMword data);
+	/*read halfword data */
+	  fault_t (*read_halfword) (ARMul_State * state, ARMword va,
+				    ARMword * data);
+	/*write halfword data */
+	  fault_t (*write_halfword) (ARMul_State * state, ARMword va,
+				     ARMword data);
+	/*read word data */
+	  fault_t (*read_word) (ARMul_State * state, ARMword va,
+				ARMword * data);
+	/*write word data */
+	  fault_t (*write_word) (ARMul_State * state, ARMword va,
+				 ARMword data);
+	/*load instr */
+	  fault_t (*load_instr) (ARMul_State * state, ARMword va,
+				 ARMword * instr);
+	/*mcr */
+	  ARMword (*mcr) (ARMul_State * state, ARMword instr, ARMword val);
+	/*mrc */
+	  ARMword (*mrc) (ARMul_State * state, ARMword instr, ARMword * val);
+
+	/*ywc 2005-04-16 convert virtual address to physics address */
+	int (*v2p_dbct) (ARMul_State * state, ARMword virt_addr,
+			 ARMword * phys_addr);
+} mmu_ops_t;
+
+
+#include "core/arm/interpreter/mmu/tlb.h"
+#include "core/arm/interpreter/mmu/rb.h"
+#include "core/arm/interpreter/mmu/wb.h"
+#include "core/arm/interpreter/mmu/cache.h"
+
+/*special process mmu.h*/
+#include "core/arm/interpreter/mmu/sa_mmu.h"
+//#include "core/arm/interpreter/mmu/arm7100_mmu.h"
+//#include "core/arm/interpreter/mmu/arm920t_mmu.h"
+//#include "core/arm/interpreter/mmu/arm926ejs_mmu.h"
+#include "core/arm/interpreter/mmu/arm1176jzf_s_mmu.h"
+//#include "core/arm/interpreter/mmu/cortex_a9_mmu.h"
+
+typedef struct mmu_state_t
+{
+	ARMword control;
+	ARMword translation_table_base;
+/* dyf 201-08-11 for arm1176 */
+	ARMword auxiliary_control;
+	ARMword coprocessor_access_control;
+	ARMword translation_table_base0;
+	ARMword translation_table_base1;
+	ARMword translation_table_ctrl;
+/* arm1176 end */
+
+	ARMword domain_access_control;
+	ARMword fault_status;
+	ARMword fault_statusi;  /* prefetch fault status */
+	ARMword fault_address;
+	ARMword last_domain;
+	ARMword process_id;
+	ARMword context_id;
+	ARMword thread_uro_id;
+	ARMword cache_locked_down;
+	ARMword tlb_locked_down;
+//chy 2003-08-24 for xscale
+	ARMword cache_type;	// 0
+	ARMword aux_control;	// 1
+	ARMword copro_access;	// 15
+
+	mmu_ops_t ops;
+	union
+	{
+		sa_mmu_t sa_mmu;
+		//arm7100_mmu_t arm7100_mmu;
+		//arm920t_mmu_t arm920t_mmu;
+		//arm926ejs_mmu_t arm926ejs_mmu;
+	} u;
+} mmu_state_t;
+
+int mmu_init (ARMul_State * state);
+int mmu_reset (ARMul_State * state);
+void mmu_exit (ARMul_State * state);
+
+fault_t mmu_read_word (ARMul_State * state, ARMword virt_addr,
+		       ARMword * data);
+fault_t mmu_write_word (ARMul_State * state, ARMword virt_addr, ARMword data);
+fault_t mmu_load_instr (ARMul_State * state, ARMword virt_addr,
+			ARMword * instr);
+
+ARMword mmu_mrc (ARMul_State * state, ARMword instr, ARMword * value);
+void mmu_mcr (ARMul_State * state, ARMword instr, ARMword value);
+
+/*ywc 20050416*/
+int mmu_v2p_dbct (ARMul_State * state, ARMword virt_addr,
+		  ARMword * phys_addr);
+
+fault_t
+mmu_read_byte (ARMul_State * state, ARMword virt_addr, ARMword * data);
+fault_t
+mmu_read_halfword (ARMul_State * state, ARMword virt_addr, ARMword * data);
+fault_t
+mmu_read_word (ARMul_State * state, ARMword virt_addr, ARMword * data);
+fault_t
+mmu_write_byte (ARMul_State * state, ARMword virt_addr, ARMword data);
+fault_t
+mmu_write_halfword (ARMul_State * state, ARMword virt_addr, ARMword data);
+fault_t
+mmu_write_word (ARMul_State * state, ARMword virt_addr, ARMword data);
+#endif /* _ARMMMU_H_ */
diff --git a/src/core/arm/skyeye_common/armos.h b/src/core/arm/skyeye_common/armos.h
new file mode 100644
index 000000000..4b58801ad
--- /dev/null
+++ b/src/core/arm/skyeye_common/armos.h
@@ -0,0 +1,138 @@
+/*  armos.h -- ARMulator OS definitions:  ARM6 Instruction Emulator.
+    Copyright (C) 1994 Advanced RISC Machines Ltd.
+ 
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+ 
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+ 
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+//#include "bank_defs.h"
+//#include "dyncom/defines.h"
+
+//typedef struct mmap_area{
+//	mem_bank_t bank;
+//	void *mmap_addr;
+//	struct mmap_area *next;
+//}mmap_area_t;
+
+#if FAST_MEMORY
+/* in user mode, mmap_base will be on initial brk,
+   set at the first mmap request */
+#define mmap_base -1
+#else
+#define mmap_base 0x50000000
+#endif
+static long mmap_next_base = mmap_base;
+
+//static mmap_area_t* new_mmap_area(int sim_addr, int len);
+static char mmap_mem_write(short size, int addr, uint32_t value);
+static char mmap_mem_read(short size, int addr, uint32_t * value);
+
+/***************************************************************************\
+*                               SWI numbers                                 *
+\***************************************************************************/
+
+#define SWI_Syscall                0x0
+#define SWI_Exit                   0x1
+#define SWI_Read                   0x3
+#define SWI_Write                  0x4
+#define SWI_Open                   0x5
+#define SWI_Close                  0x6
+#define SWI_Seek                   0x13
+#define SWI_Rename                 0x26
+#define SWI_Break                  0x11
+
+#define SWI_Times		   0x2b
+#define SWI_Brk			   0x2d
+
+#define SWI_Mmap                   0x5a
+#define SWI_Munmap                 0x5b
+#define SWI_Mmap2                  0xc0
+
+#define SWI_GetUID32               0xc7
+#define SWI_GetGID32               0xc8
+#define SWI_GetEUID32              0xc9
+#define SWI_GetEGID32              0xca
+
+#define SWI_ExitGroup		   0xf8
+
+#if 0
+#define SWI_Time                   0xd
+#define SWI_Clock                  0x61
+#define SWI_Time                   0x63
+#define SWI_Remove                 0x64
+#define SWI_Rename                 0x65
+#define SWI_Flen                   0x6c
+#endif
+
+#define SWI_Uname		   0x7a
+#define SWI_Fcntl                  0xdd 
+#define SWI_Fstat64  		   0xc5
+#define SWI_Gettimeofday           0x4e
+#define SWI_Set_tls                0xf0005
+
+#define SWI_Breakpoint             0x180000	/* see gdb's tm-arm.h */
+
+/***************************************************************************\
+*                             SWI structures                                *
+\***************************************************************************/
+
+/* Arm binaries (for now) only support 32 bit, and expect to receive
+   32-bit compliant structure in return of a systen call. Because
+   we use host system calls to emulate system calls, the returned
+   structure can be 32-bit compliant or 64-bit compliant, depending
+   on the OS running skyeye. Therefore, we need a fixed size structure
+   adapted to arm.*/
+
+/* Borrowed from qemu */
+struct target_stat64 {
+	unsigned short	st_dev;
+	unsigned char	__pad0[10];
+	uint32_t	__st_ino;
+	unsigned int	st_mode;
+	unsigned int	st_nlink;
+	uint32_t	st_uid;
+	uint32_t	st_gid;
+	unsigned short	st_rdev;
+	unsigned char	__pad3[10];
+	unsigned char	__pad31[4];
+	long long	st_size;
+	uint32_t	st_blksize;
+	unsigned char	__pad32[4];
+	uint32_t	st_blocks;
+	uint32_t	__pad4;
+	uint32_t	st32_atime;
+	uint32_t	__pad5;
+	uint32_t	st32_mtime;
+	uint32_t	__pad6;
+	uint32_t	st32_ctime;
+	uint32_t	__pad7;
+	unsigned long long	st_ino;
+};// __attribute__((packed));
+
+struct target_tms32 {
+    uint32_t tms_utime;
+    uint32_t tms_stime;
+    uint32_t tms_cutime;
+    uint32_t tms_cstime;
+};
+
+struct target_timeval32 {
+	uint32_t tv_sec;     /* seconds */
+	uint32_t tv_usec;    /* microseconds */
+};
+
+struct target_timezone32 {
+	int32_t tz_minuteswest;     /* minutes west of Greenwich */
+	int32_t tz_dsttime;         /* type of DST correction */
+};
+
diff --git a/src/core/arm/skyeye_common/skyeye_defs.h b/src/core/arm/skyeye_common/skyeye_defs.h
new file mode 100644
index 000000000..b6713ebad
--- /dev/null
+++ b/src/core/arm/skyeye_common/skyeye_defs.h
@@ -0,0 +1,111 @@
+#ifndef CORE_ARM_SKYEYE_DEFS_H_
+#define CORE_ARM_SKYEYE_DEFS_H_
+
+#include "common/common.h"
+
+#define MODE32
+#define MODET
+
+typedef struct
+{
+	const char *cpu_arch_name;	/*cpu architecture version name.e.g. armv4t */
+	const char *cpu_name;	/*cpu name. e.g. arm7tdmi or arm720t */
+	u32 cpu_val;	/*CPU value; also call MMU ID or processor id;see
+				   ARM Architecture Reference Manual B2-6 */
+	u32 cpu_mask;	/*cpu_val's mask. */
+	u32 cachetype;	/*this cpu has what kind of cache */
+} cpu_config_t;
+
+typedef struct conf_object_s{
+	char* objname;
+	void* obj;
+	char* class_name;
+}conf_object_t;
+
+typedef enum{
+	/* No exception */
+	No_exp = 0,
+	/* Memory allocation exception */
+	Malloc_exp,
+	/* File open exception */
+	File_open_exp,
+	/* DLL open exception */
+	Dll_open_exp,
+	/* Invalid argument exception */
+	Invarg_exp,
+	/* Invalid module exception */
+	Invmod_exp,
+	/* wrong format exception for config file parsing */
+	Conf_format_exp,
+	/* some reference excess the predefiend range. Such as the index out of array range */
+	Excess_range_exp,
+	/* Can not find the desirable result */
+	Not_found_exp,
+
+	/* Unknown exception */
+	Unknown_exp
+}exception_t;
+
+typedef enum {
+	Align = 0,
+	UnAlign	
+}align_t;
+
+typedef enum {
+	Little_endian = 0,
+	Big_endian
+}endian_t;
+//typedef int exception_t;
+
+typedef enum{
+	Phys_addr = 0,
+	Virt_addr
+}addr_type_t;
+
+typedef exception_t(*read_byte_t)(conf_object_t* target, u32 addr, void *buf, size_t count);
+typedef exception_t(*write_byte_t)(conf_object_t* target, u32 addr, const void *buf, size_t count);
+
+typedef struct memory_space{
+	conf_object_t* conf_obj;
+	read_byte_t read;
+	write_byte_t write;
+}memory_space_intf;
+
+
+/*
+ * a running instance for a specific archteciture.
+ */
+typedef struct generic_arch_s
+{
+	char* arch_name;
+	void (*init) (void);
+	void (*reset) (void);
+	void (*step_once) (void);
+	void (*set_pc)(u32 addr);
+	u32 (*get_pc)(void);
+	u32 (*get_step)(void);
+	//chy 2004-04-15 
+	//int (*ICE_write_byte) (u32 addr, uint8_t v);
+	//int (*ICE_read_byte)(u32 addr, uint8_t *pv);
+	u32 (*get_regval_by_id)(int id);
+	u32 (*get_regnum)(void);
+	char* (*get_regname_by_id)(int id);
+	exception_t (*set_regval_by_id)(int id, u32 value);
+	/*
+	 * read a data by virtual address.
+	 */
+	exception_t (*mmu_read)(short size, u32 addr, u32 * value);
+	/*
+	 * write a data by a virtual address.
+	 */
+	exception_t (*mmu_write)(short size, u32 addr, u32 value);
+	/**
+	 * get a signal from external
+	 */
+	//exception_t (*signal)(interrupt_signal_t* signal);
+
+	endian_t endianess;
+	align_t alignment;
+} generic_arch_t;
+
+#endif
+\ No newline at end of file
diff --git a/src/core/arm/skyeye_common/vfp/asm_vfp.h b/src/core/arm/skyeye_common/vfp/asm_vfp.h
new file mode 100644
index 000000000..f4ab34fd4
--- /dev/null
+++ b/src/core/arm/skyeye_common/vfp/asm_vfp.h
@@ -0,0 +1,84 @@
+/*
+ * arch/arm/include/asm/vfp.h
+ *
+ * VFP register definitions.
+ * First, the standard VFP set.
+ */
+
+#define FPSID			cr0
+#define FPSCR			cr1
+#define MVFR1			cr6
+#define MVFR0			cr7
+#define FPEXC			cr8
+#define FPINST			cr9
+#define FPINST2			cr10
+
+/* FPSID bits */
+#define FPSID_IMPLEMENTER_BIT	(24)
+#define FPSID_IMPLEMENTER_MASK	(0xff << FPSID_IMPLEMENTER_BIT)
+#define FPSID_SOFTWARE		(1<<23)
+#define FPSID_FORMAT_BIT	(21)
+#define FPSID_FORMAT_MASK	(0x3  << FPSID_FORMAT_BIT)
+#define FPSID_NODOUBLE		(1<<20)
+#define FPSID_ARCH_BIT		(16)
+#define FPSID_ARCH_MASK		(0xF  << FPSID_ARCH_BIT)
+#define FPSID_PART_BIT		(8)
+#define FPSID_PART_MASK		(0xFF << FPSID_PART_BIT)
+#define FPSID_VARIANT_BIT	(4)
+#define FPSID_VARIANT_MASK	(0xF  << FPSID_VARIANT_BIT)
+#define FPSID_REV_BIT		(0)
+#define FPSID_REV_MASK		(0xF  << FPSID_REV_BIT)
+
+/* FPEXC bits */
+#define FPEXC_EX		(1 << 31)
+#define FPEXC_EN		(1 << 30)
+#define FPEXC_DEX		(1 << 29)
+#define FPEXC_FP2V		(1 << 28)
+#define FPEXC_VV		(1 << 27)
+#define FPEXC_TFV		(1 << 26)
+#define FPEXC_LENGTH_BIT	(8)
+#define FPEXC_LENGTH_MASK	(7 << FPEXC_LENGTH_BIT)
+#define FPEXC_IDF		(1 << 7)
+#define FPEXC_IXF		(1 << 4)
+#define FPEXC_UFF		(1 << 3)
+#define FPEXC_OFF		(1 << 2)
+#define FPEXC_DZF		(1 << 1)
+#define FPEXC_IOF		(1 << 0)
+#define FPEXC_TRAP_MASK		(FPEXC_IDF|FPEXC_IXF|FPEXC_UFF|FPEXC_OFF|FPEXC_DZF|FPEXC_IOF)
+
+/* FPSCR bits */
+#define FPSCR_DEFAULT_NAN	(1<<25)
+#define FPSCR_FLUSHTOZERO	(1<<24)
+#define FPSCR_ROUND_NEAREST	(0<<22)
+#define FPSCR_ROUND_PLUSINF	(1<<22)
+#define FPSCR_ROUND_MINUSINF	(2<<22)
+#define FPSCR_ROUND_TOZERO	(3<<22)
+#define FPSCR_RMODE_BIT		(22)
+#define FPSCR_RMODE_MASK	(3 << FPSCR_RMODE_BIT)
+#define FPSCR_STRIDE_BIT	(20)
+#define FPSCR_STRIDE_MASK	(3 << FPSCR_STRIDE_BIT)
+#define FPSCR_LENGTH_BIT	(16)
+#define FPSCR_LENGTH_MASK	(7 << FPSCR_LENGTH_BIT)
+#define FPSCR_IOE		(1<<8)
+#define FPSCR_DZE		(1<<9)
+#define FPSCR_OFE		(1<<10)
+#define FPSCR_UFE		(1<<11)
+#define FPSCR_IXE		(1<<12)
+#define FPSCR_IDE		(1<<15)
+#define FPSCR_IOC		(1<<0)
+#define FPSCR_DZC		(1<<1)
+#define FPSCR_OFC		(1<<2)
+#define FPSCR_UFC		(1<<3)
+#define FPSCR_IXC		(1<<4)
+#define FPSCR_IDC		(1<<7)
+
+/* MVFR0 bits */
+#define MVFR0_A_SIMD_BIT	(0)
+#define MVFR0_A_SIMD_MASK	(0xf << MVFR0_A_SIMD_BIT)
+
+/* Bit patterns for decoding the packaged operation descriptors */
+#define VFPOPDESC_LENGTH_BIT	(9)
+#define VFPOPDESC_LENGTH_MASK	(0x07 << VFPOPDESC_LENGTH_BIT)
+#define VFPOPDESC_UNUSED_BIT	(24)
+#define VFPOPDESC_UNUSED_MASK	(0xFF << VFPOPDESC_UNUSED_BIT)
+#define VFPOPDESC_OPDESC_MASK	(~(VFPOPDESC_LENGTH_MASK | VFPOPDESC_UNUSED_MASK))
diff --git a/src/core/arm/skyeye_common/vfp/vfp.cpp b/src/core/arm/skyeye_common/vfp/vfp.cpp
new file mode 100644
index 000000000..e4fa3c20a
--- /dev/null
+++ b/src/core/arm/skyeye_common/vfp/vfp.cpp
@@ -0,0 +1,357 @@
+/*
+    armvfp.c - ARM VFPv3 emulation unit
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/* Note: this file handles interface with arm core and vfp registers */
+
+/* Opens debug for classic interpreter only */
+//#define DEBUG
+
+#include "common/common.h"
+
+#include "core/arm/skyeye_common/armdefs.h"
+#include "core/arm/skyeye_common/vfp/vfp.h"
+
+//ARMul_State* persistent_state; /* function calls from SoftFloat lib don't have an access to ARMul_state. */
+
+unsigned
+VFPInit (ARMul_State *state)
+{
+	state->VFP[VFP_OFFSET(VFP_FPSID)] = VFP_FPSID_IMPLMEN<<24 | VFP_FPSID_SW<<23 | VFP_FPSID_SUBARCH<<16 | 
+		VFP_FPSID_PARTNUM<<8 | VFP_FPSID_VARIANT<<4 | VFP_FPSID_REVISION;
+	state->VFP[VFP_OFFSET(VFP_FPEXC)] = 0;
+	state->VFP[VFP_OFFSET(VFP_FPSCR)] = 0;
+	
+	//persistent_state = state;
+	/* Reset only specify VFP_FPEXC_EN = '0' */
+
+	return No_exp;
+}
+
+unsigned
+VFPMRC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value)
+{
+	/* MRC<c> <coproc>,<opc1>,<Rt>,<CRn>,<CRm>{,<opc2>} */
+	int CoProc = BITS (8, 11); /* 10 or 11 */
+	int OPC_1 = BITS (21, 23);
+	int Rt = BITS (12, 15);
+	int CRn = BITS (16, 19);
+	int CRm = BITS (0, 3);
+	int OPC_2 = BITS (5, 7);
+	
+	/* TODO check access permission */
+	
+	/* CRn/opc1 CRm/opc2 */
+	
+	if (CoProc == 10 || CoProc == 11)
+	{
+		#define VFP_MRC_TRANS
+		#include "core/arm/skyeye_common/vfp/vfpinstr.cpp"
+		#undef VFP_MRC_TRANS
+	}
+	DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, OPC_1 %x, Rt %x, CRn %x, CRm %x, OPC_2 %x\n", 
+	       instr, CoProc, OPC_1, Rt, CRn, CRm, OPC_2);
+
+	return ARMul_CANT;
+}
+
+unsigned
+VFPMCR (ARMul_State * state, unsigned type, ARMword instr, ARMword value)
+{
+	/* MCR<c> <coproc>,<opc1>,<Rt>,<CRn>,<CRm>{,<opc2>} */
+	int CoProc = BITS (8, 11); /* 10 or 11 */
+	int OPC_1 = BITS (21, 23);
+	int Rt = BITS (12, 15);
+	int CRn = BITS (16, 19);
+	int CRm = BITS (0, 3);
+	int OPC_2 = BITS (5, 7);
+	
+	/* TODO check access permission */
+	
+	/* CRn/opc1 CRm/opc2 */
+	if (CoProc == 10 || CoProc == 11)
+	{
+		#define VFP_MCR_TRANS
+		#include "core/arm/skyeye_common/vfp/vfpinstr.cpp"
+		#undef VFP_MCR_TRANS
+	}
+	DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, OPC_1 %x, Rt %x, CRn %x, CRm %x, OPC_2 %x\n", 
+	       instr, CoProc, OPC_1, Rt, CRn, CRm, OPC_2);
+
+	return ARMul_CANT;
+}
+
+unsigned
+VFPMRRC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value1, ARMword * value2)
+{
+	/* MCRR<c> <coproc>,<opc1>,<Rt>,<Rt2>,<CRm> */
+	int CoProc = BITS (8, 11); /* 10 or 11 */
+	int OPC_1 = BITS (4, 7);
+	int Rt = BITS (12, 15);
+	int Rt2 = BITS (16, 19);
+	int CRm = BITS (0, 3);
+	
+	if (CoProc == 10 || CoProc == 11)
+	{
+		#define VFP_MRRC_TRANS
+		#include "core/arm/skyeye_common/vfp/vfpinstr.cpp"
+		#undef VFP_MRRC_TRANS
+	}
+	DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, OPC_1 %x, Rt %x, Rt2 %x, CRm %x\n", 
+	       instr, CoProc, OPC_1, Rt, Rt2, CRm);
+
+	return ARMul_CANT;
+}
+
+unsigned
+VFPMCRR (ARMul_State * state, unsigned type, ARMword instr, ARMword value1, ARMword value2)
+{
+	/* MCRR<c> <coproc>,<opc1>,<Rt>,<Rt2>,<CRm> */
+	int CoProc = BITS (8, 11); /* 10 or 11 */
+	int OPC_1 = BITS (4, 7);
+	int Rt = BITS (12, 15);
+	int Rt2 = BITS (16, 19);
+	int CRm = BITS (0, 3);
+	
+	/* TODO check access permission */
+	
+	/* CRn/opc1 CRm/opc2 */
+	
+	if (CoProc == 11 || CoProc == 10)
+	{
+		#define VFP_MCRR_TRANS
+		#include "core/arm/skyeye_common/vfp/vfpinstr.cpp"
+		#undef VFP_MCRR_TRANS
+	}
+	DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, OPC_1 %x, Rt %x, Rt2 %x, CRm %x\n", 
+	       instr, CoProc, OPC_1, Rt, Rt2, CRm);
+
+	return ARMul_CANT;
+}
+
+unsigned
+VFPSTC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value)
+{
+	/* STC{L}<c> <coproc>,<CRd>,[<Rn>],<option> */
+	int CoProc = BITS (8, 11); /* 10 or 11 */
+	int CRd = BITS (12, 15);
+	int Rn = BITS (16, 19);
+	int imm8 = BITS (0, 7);
+	int P = BIT(24);
+	int U = BIT(23);
+	int D = BIT(22);
+	int W = BIT(21);
+	
+	/* TODO check access permission */
+	
+	/* VSTM */
+	if ( (P|U|D|W) == 0 )
+	{
+		DEBUG_LOG(ARM11, "In %s, UNDEFINED\n", __FUNCTION__); exit(-1);
+	}
+	if (CoProc == 10 || CoProc == 11)
+	{
+		#if 1
+		if (P == 0 && U == 0 && W == 0)
+		{
+			DEBUG_LOG(ARM11, "VSTM Related encodings\n"); exit(-1);
+		}
+		if (P == U && W == 1)
+		{
+			DEBUG_LOG(ARM11, "UNDEFINED\n"); exit(-1);
+		}
+		#endif
+
+		#define VFP_STC_TRANS
+		#include "core/arm/skyeye_common/vfp/vfpinstr.cpp"
+		#undef VFP_STC_TRANS
+	}
+	DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, CRd %x, Rn %x, imm8 %x, P %x, U %x, D %x, W %x\n", 
+	       instr, CoProc, CRd, Rn, imm8, P, U, D, W);
+
+	return ARMul_CANT;
+}
+
+unsigned
+VFPLDC (ARMul_State * state, unsigned type, ARMword instr, ARMword value)
+{
+	/* LDC{L}<c> <coproc>,<CRd>,[<Rn>] */
+	int CoProc = BITS (8, 11); /* 10 or 11 */
+	int CRd = BITS (12, 15);
+	int Rn = BITS (16, 19);
+	int imm8 = BITS (0, 7);
+	int P = BIT(24);
+	int U = BIT(23);
+	int D = BIT(22);
+	int W = BIT(21);
+	
+	/* TODO check access permission */
+	
+	if ( (P|U|D|W) == 0 )
+	{
+		DEBUG_LOG(ARM11, "In %s, UNDEFINED\n", __FUNCTION__); exit(-1);
+	}
+	if (CoProc == 10 || CoProc == 11)
+	{
+		#define VFP_LDC_TRANS
+		#include "core/arm/skyeye_common/vfp/vfpinstr.cpp"
+		#undef VFP_LDC_TRANS
+	}
+	DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, CRd %x, Rn %x, imm8 %x, P %x, U %x, D %x, W %x\n", 
+	       instr, CoProc, CRd, Rn, imm8, P, U, D, W);
+
+	return ARMul_CANT;
+}
+
+unsigned
+VFPCDP (ARMul_State * state, unsigned type, ARMword instr)
+{
+	/* CDP<c> <coproc>,<opc1>,<CRd>,<CRn>,<CRm>,<opc2> */
+	int CoProc = BITS (8, 11); /* 10 or 11 */
+	int OPC_1 = BITS (20, 23);
+	int CRd = BITS (12, 15);
+	int CRn = BITS (16, 19);
+	int CRm = BITS (0, 3);
+	int OPC_2 = BITS (5, 7);
+	
+	/* TODO check access permission */
+	
+	/* CRn/opc1 CRm/opc2 */
+
+	if (CoProc == 10 || CoProc == 11)
+	{
+		#define VFP_CDP_TRANS
+		#include "core/arm/skyeye_common/vfp/vfpinstr.cpp"
+		#undef VFP_CDP_TRANS
+		
+		int exceptions = 0;
+		if (CoProc == 10)
+			exceptions = vfp_single_cpdo(state, instr, state->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else 
+			exceptions = vfp_double_cpdo(state, instr, state->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		vfp_raise_exceptions(state, exceptions, instr, state->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		return ARMul_DONE;
+	}
+	DEBUG_LOG(ARM11, "Can't identify %x\n", instr);
+	return ARMul_CANT;
+}
+
+
+/* ----------- MRC ------------ */
+#define VFP_MRC_IMPL
+#include "core/arm/skyeye_common/vfp/vfpinstr.cpp"
+#undef VFP_MRC_IMPL
+
+#define VFP_MRRC_IMPL
+#include "core/arm/skyeye_common/vfp/vfpinstr.cpp"
+#undef VFP_MRRC_IMPL
+
+
+/* ----------- MCR ------------ */
+#define VFP_MCR_IMPL
+#include "core/arm/skyeye_common/vfp/vfpinstr.cpp"
+#undef VFP_MCR_IMPL
+
+#define VFP_MCRR_IMPL
+#include "core/arm/skyeye_common/vfp/vfpinstr.cpp"
+#undef VFP_MCRR_IMPL
+
+/* Memory operation are not inlined, as old Interpreter and Fast interpreter
+   don't have the same memory operation interface.
+   Old interpreter framework does one access to coprocessor per data, and
+   handles already data write, as well as address computation,
+   which is not the case for Fast interpreter. Therefore, implementation
+   of vfp instructions in old interpreter and fast interpreter are separate. */
+
+/* ----------- STC ------------ */
+#define VFP_STC_IMPL
+#include "core/arm/skyeye_common/vfp/vfpinstr.cpp"
+#undef VFP_STC_IMPL
+
+
+/* ----------- LDC ------------ */
+#define VFP_LDC_IMPL
+#include "core/arm/skyeye_common/vfp/vfpinstr.cpp"
+#undef VFP_LDC_IMPL
+
+
+/* ----------- CDP ------------ */
+#define VFP_CDP_IMPL
+#include "core/arm/skyeye_common/vfp/vfpinstr.cpp"
+#undef VFP_CDP_IMPL
+
+/* Miscellaneous functions */
+int32_t vfp_get_float(arm_core_t* state, unsigned int reg)
+{
+	DBG("VFP get float: s%d=[%08x]\n", reg, state->ExtReg[reg]);
+	return state->ExtReg[reg];
+}
+
+void vfp_put_float(arm_core_t* state, int32_t val, unsigned int reg)
+{
+	DBG("VFP put float: s%d <= [%08x]\n", reg, val);
+	state->ExtReg[reg] = val;
+}
+
+uint64_t vfp_get_double(arm_core_t* state, unsigned int reg)
+{
+	uint64_t result;
+	result = ((uint64_t) state->ExtReg[reg*2+1])<<32 | state->ExtReg[reg*2];
+	DBG("VFP get double: s[%d-%d]=[%016llx]\n", reg*2+1, reg*2, result);
+	return result;
+}
+
+void vfp_put_double(arm_core_t* state, uint64_t val, unsigned int reg)
+{
+	DBG("VFP put double: s[%d-%d] <= [%08x-%08x]\n", reg*2+1, reg*2, (uint32_t) (val>>32), (uint32_t) (val & 0xffffffff));
+	state->ExtReg[reg*2] = (uint32_t) (val & 0xffffffff);
+	state->ExtReg[reg*2+1] = (uint32_t) (val>>32);
+}
+
+
+
+/*
+ * Process bitmask of exception conditions. (from vfpmodule.c)
+ */
+void vfp_raise_exceptions(ARMul_State* state, u32 exceptions, u32 inst, u32 fpscr)
+{
+	int si_code = 0;
+
+	vfpdebug("VFP: raising exceptions %08x\n", exceptions);
+
+	if (exceptions == VFP_EXCEPTION_ERROR) {
+		DEBUG_LOG(ARM11, "unhandled bounce %x\n", inst);
+		exit(-1);
+		return;
+	}
+
+	/*
+	 * If any of the status flags are set, update the FPSCR.
+	 * Comparison instructions always return at least one of
+	 * these flags set.
+	 */
+	if (exceptions & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V))
+		fpscr &= ~(FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V);
+
+	fpscr |= exceptions;
+
+	state->VFP[VFP_OFFSET(VFP_FPSCR)] = fpscr;
+}
diff --git a/src/core/arm/skyeye_common/vfp/vfp.h b/src/core/arm/skyeye_common/vfp/vfp.h
new file mode 100644
index 000000000..ed627d41f
--- /dev/null
+++ b/src/core/arm/skyeye_common/vfp/vfp.h
@@ -0,0 +1,111 @@
+/* 
+    vfp/vfp.h - ARM VFPv3 emulation unit - vfp interface
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#ifndef __VFP_H__
+#define __VFP_H__
+
+#define DBG(...) //DEBUG_LOG(ARM11, __VA_ARGS__)
+
+#define vfpdebug //printf
+
+#include "core/arm/skyeye_common/vfp/vfp_helper.h" /* for references to cdp SoftFloat functions */
+
+unsigned VFPInit (ARMul_State *state);
+unsigned VFPMRC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value);
+unsigned VFPMCR (ARMul_State * state, unsigned type, ARMword instr, ARMword value);
+unsigned VFPMRRC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value1, ARMword * value2);
+unsigned VFPMCRR (ARMul_State * state, unsigned type, ARMword instr, ARMword value1, ARMword value2);
+unsigned VFPSTC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value);
+unsigned VFPLDC (ARMul_State * state, unsigned type, ARMword instr, ARMword value);
+unsigned VFPCDP (ARMul_State * state, unsigned type, ARMword instr);
+
+/* FPSID Information */
+#define VFP_FPSID_IMPLMEN 0 	/* should be the same as cp15 0 c0 0*/
+#define VFP_FPSID_SW 0
+#define VFP_FPSID_SUBARCH 0x2 	/* VFP version. Current is v3 (not strict) */
+#define VFP_FPSID_PARTNUM 0x1
+#define VFP_FPSID_VARIANT 0x1
+#define VFP_FPSID_REVISION 0x1
+
+/* FPEXC Flags */
+#define VFP_FPEXC_EX 1<<31
+#define VFP_FPEXC_EN 1<<30
+
+/* FPSCR Flags */
+#define VFP_FPSCR_NFLAG 1<<31
+#define VFP_FPSCR_ZFLAG 1<<30
+#define VFP_FPSCR_CFLAG 1<<29
+#define VFP_FPSCR_VFLAG 1<<28
+
+#define VFP_FPSCR_AHP 1<<26 	/* Alternative Half Precision */
+#define VFP_FPSCR_DN 1<<25 	/* Default NaN */
+#define VFP_FPSCR_FZ 1<<24 	/* Flush-to-zero */
+#define VFP_FPSCR_RMODE 3<<22 	/* Rounding Mode */
+#define VFP_FPSCR_STRIDE 3<<20 	/* Stride (vector) */
+#define VFP_FPSCR_LEN 7<<16 	/* Stride (vector) */
+
+#define VFP_FPSCR_IDE 1<<15	/* Input Denormal exc */
+#define VFP_FPSCR_IXE 1<<12	/* Inexact exc */
+#define VFP_FPSCR_UFE 1<<11	/* Undeflow exc */
+#define VFP_FPSCR_OFE 1<<10	/* Overflow exc */
+#define VFP_FPSCR_DZE 1<<9	/* Division by Zero exc */
+#define VFP_FPSCR_IOE 1<<8	/* Invalid Operation exc */
+
+#define VFP_FPSCR_IDC 1<<7	/* Input Denormal cum exc */
+#define VFP_FPSCR_IXC 1<<4	/* Inexact cum exc */
+#define VFP_FPSCR_UFC 1<<3	/* Undeflow cum exc */
+#define VFP_FPSCR_OFC 1<<2	/* Overflow cum exc */
+#define VFP_FPSCR_DZC 1<<1	/* Division by Zero cum exc */
+#define VFP_FPSCR_IOC 1<<0	/* Invalid Operation cum exc */
+
+/* Inline instructions. Note: Used in a cpp file as well */
+#ifdef __cplusplus
+ extern "C" {
+#endif
+int32_t vfp_get_float(ARMul_State * state, unsigned int reg);
+void vfp_put_float(ARMul_State * state, int32_t val, unsigned int reg);
+uint64_t vfp_get_double(ARMul_State * state, unsigned int reg);
+void vfp_put_double(ARMul_State * state, uint64_t val, unsigned int reg);
+void vfp_raise_exceptions(ARMul_State * state, uint32_t exceptions, uint32_t inst, uint32_t fpscr);
+u32 vfp_single_cpdo(ARMul_State* state, u32 inst, u32 fpscr);
+u32 vfp_double_cpdo(ARMul_State* state, u32 inst, u32 fpscr);
+
+/* MRC */
+inline void VMRS(ARMul_State * state, ARMword reg, ARMword Rt, ARMword *value);
+inline void VMOVBRS(ARMul_State * state, ARMword to_arm, ARMword t, ARMword n, ARMword *value);
+inline void VMOVBRRD(ARMul_State * state, ARMword to_arm, ARMword t, ARMword t2, ARMword n, ARMword *value1, ARMword *value2);
+inline void VMOVI(ARMul_State * state, ARMword single, ARMword d, ARMword imm);
+inline void VMOVR(ARMul_State * state, ARMword single, ARMword d, ARMword imm);
+/* MCR */
+inline void VMSR(ARMul_State * state, ARMword reg, ARMword Rt);
+/* STC */
+inline int VSTM(ARMul_State * state, int type, ARMword instr, ARMword* value);
+inline int VPUSH(ARMul_State * state, int type, ARMword instr, ARMword* value);
+inline int VSTR(ARMul_State * state, int type, ARMword instr, ARMword* value);
+/* LDC */
+inline int VLDM(ARMul_State * state, int type, ARMword instr, ARMword value);
+inline int VPOP(ARMul_State * state, int type, ARMword instr, ARMword value);
+inline int VLDR(ARMul_State * state, int type, ARMword instr, ARMword value);
+
+#ifdef __cplusplus
+ }
+#endif
+
+#endif
diff --git a/src/core/arm/skyeye_common/vfp/vfp_helper.h b/src/core/arm/skyeye_common/vfp/vfp_helper.h
new file mode 100644
index 000000000..a55bf875c
--- /dev/null
+++ b/src/core/arm/skyeye_common/vfp/vfp_helper.h
@@ -0,0 +1,541 @@
+/*
+    vfp/vfp.h - ARM VFPv3 emulation unit - SoftFloat lib helper
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/* 
+ *  The following code is derivative from Linux Android kernel vfp
+ *  floating point support.
+ * 
+ *  Copyright (C) 2004 ARM Limited.
+ *  Written by Deep Blue Solutions Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __VFP_HELPER_H__
+#define __VFP_HELPER_H__
+
+/* Custom edit */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include "core/arm/skyeye_common/armdefs.h"
+
+#define u16 uint16_t
+#define u32 uint32_t
+#define u64 uint64_t
+#define s16 int16_t
+#define s32 int32_t
+#define s64 int64_t
+
+#define pr_info //printf
+#define pr_debug //printf
+
+static u32 vfp_fls(int x);
+#define do_div(n, base) {n/=base;}
+
+/* From vfpinstr.h */
+
+#define INST_CPRTDO(inst)	(((inst) & 0x0f000000) == 0x0e000000)
+#define INST_CPRT(inst)		((inst) & (1 << 4))
+#define INST_CPRT_L(inst)	((inst) & (1 << 20))
+#define INST_CPRT_Rd(inst)	(((inst) & (15 << 12)) >> 12)
+#define INST_CPRT_OP(inst)	(((inst) >> 21) & 7)
+#define INST_CPNUM(inst)	((inst) & 0xf00)
+#define CPNUM(cp)		((cp) << 8)
+
+#define FOP_MASK	(0x00b00040)
+#define FOP_FMAC	(0x00000000)
+#define FOP_FNMAC	(0x00000040)
+#define FOP_FMSC	(0x00100000)
+#define FOP_FNMSC	(0x00100040)
+#define FOP_FMUL	(0x00200000)
+#define FOP_FNMUL	(0x00200040)
+#define FOP_FADD	(0x00300000)
+#define FOP_FSUB	(0x00300040)
+#define FOP_FDIV	(0x00800000)
+#define FOP_EXT		(0x00b00040)
+
+#define FOP_TO_IDX(inst)	((inst & 0x00b00000) >> 20 | (inst & (1 << 6)) >> 4)
+
+#define FEXT_MASK	(0x000f0080)
+#define FEXT_FCPY	(0x00000000)
+#define FEXT_FABS	(0x00000080)
+#define FEXT_FNEG	(0x00010000)
+#define FEXT_FSQRT	(0x00010080)
+#define FEXT_FCMP	(0x00040000)
+#define FEXT_FCMPE	(0x00040080)
+#define FEXT_FCMPZ	(0x00050000)
+#define FEXT_FCMPEZ	(0x00050080)
+#define FEXT_FCVT	(0x00070080)
+#define FEXT_FUITO	(0x00080000)
+#define FEXT_FSITO	(0x00080080)
+#define FEXT_FTOUI	(0x000c0000)
+#define FEXT_FTOUIZ	(0x000c0080)
+#define FEXT_FTOSI	(0x000d0000)
+#define FEXT_FTOSIZ	(0x000d0080)
+
+#define FEXT_TO_IDX(inst)	((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7)
+
+#define vfp_get_sd(inst)	((inst & 0x0000f000) >> 11 | (inst & (1 << 22)) >> 22)
+#define vfp_get_dd(inst)	((inst & 0x0000f000) >> 12 | (inst & (1 << 22)) >> 18)
+#define vfp_get_sm(inst)	((inst & 0x0000000f) << 1 | (inst & (1 << 5)) >> 5)
+#define vfp_get_dm(inst)	((inst & 0x0000000f) | (inst & (1 << 5)) >> 1)
+#define vfp_get_sn(inst)	((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7)
+#define vfp_get_dn(inst)	((inst & 0x000f0000) >> 16 | (inst & (1 << 7)) >> 3)
+
+#define vfp_single(inst)	(((inst) & 0x0000f00) == 0xa00)
+
+#define FPSCR_N	(1 << 31)
+#define FPSCR_Z	(1 << 30)
+#define FPSCR_C (1 << 29)
+#define FPSCR_V	(1 << 28)
+
+/* -------------- */
+
+/* From asm/include/vfp.h */
+
+/* FPSCR bits */
+#define FPSCR_DEFAULT_NAN	(1<<25)
+#define FPSCR_FLUSHTOZERO	(1<<24)
+#define FPSCR_ROUND_NEAREST	(0<<22)
+#define FPSCR_ROUND_PLUSINF	(1<<22)
+#define FPSCR_ROUND_MINUSINF	(2<<22)
+#define FPSCR_ROUND_TOZERO	(3<<22)
+#define FPSCR_RMODE_BIT		(22)
+#define FPSCR_RMODE_MASK	(3 << FPSCR_RMODE_BIT)
+#define FPSCR_STRIDE_BIT	(20)
+#define FPSCR_STRIDE_MASK	(3 << FPSCR_STRIDE_BIT)
+#define FPSCR_LENGTH_BIT	(16)
+#define FPSCR_LENGTH_MASK	(7 << FPSCR_LENGTH_BIT)
+#define FPSCR_IOE		(1<<8)
+#define FPSCR_DZE		(1<<9)
+#define FPSCR_OFE		(1<<10)
+#define FPSCR_UFE		(1<<11)
+#define FPSCR_IXE		(1<<12)
+#define FPSCR_IDE		(1<<15)
+#define FPSCR_IOC		(1<<0)
+#define FPSCR_DZC		(1<<1)
+#define FPSCR_OFC		(1<<2)
+#define FPSCR_UFC		(1<<3)
+#define FPSCR_IXC		(1<<4)
+#define FPSCR_IDC		(1<<7)
+
+/* ---------------- */
+
+static inline u32 vfp_shiftright32jamming(u32 val, unsigned int shift)
+{
+	if (shift) {
+		if (shift < 32)
+			val = val >> shift | ((val << (32 - shift)) != 0);
+		else
+			val = val != 0;
+	}
+	return val;
+}
+
+static inline u64 vfp_shiftright64jamming(u64 val, unsigned int shift)
+{
+	if (shift) {
+		if (shift < 64)
+			val = val >> shift | ((val << (64 - shift)) != 0);
+		else
+			val = val != 0;
+	}
+	return val;
+}
+
+static inline u32 vfp_hi64to32jamming(u64 val)
+{
+	u32 v;
+	u32 highval = val >> 32;
+	u32 lowval = val & 0xffffffff;
+
+	if (lowval >= 1)
+		v = highval | 1;
+	else
+		v = highval;
+
+	return v;
+}
+
+static inline void add128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml)
+{
+	*resl = nl + ml;
+	*resh = nh + mh;
+	if (*resl < nl)
+		*resh += 1;
+}
+
+static inline void sub128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml)
+{
+	*resl = nl - ml;
+	*resh = nh - mh;
+	if (*resl > nl)
+		*resh -= 1;
+}
+
+static inline void mul64to128(u64 *resh, u64 *resl, u64 n, u64 m)
+{
+	u32 nh, nl, mh, ml;
+	u64 rh, rma, rmb, rl;
+
+	nl = n;
+	ml = m;
+	rl = (u64)nl * ml;
+
+	nh = n >> 32;
+	rma = (u64)nh * ml;
+
+	mh = m >> 32;
+	rmb = (u64)nl * mh;
+	rma += rmb;
+
+	rh = (u64)nh * mh;
+	rh += ((u64)(rma < rmb) << 32) + (rma >> 32);
+
+	rma <<= 32;
+	rl += rma;
+	rh += (rl < rma);
+
+	*resl = rl;
+	*resh = rh;
+}
+
+static inline void shift64left(u64 *resh, u64 *resl, u64 n)
+{
+	*resh = n >> 63;
+	*resl = n << 1;
+}
+
+static inline u64 vfp_hi64multiply64(u64 n, u64 m)
+{
+	u64 rh, rl;
+	mul64to128(&rh, &rl, n, m);
+	return rh | (rl != 0);
+}
+
+static inline u64 vfp_estimate_div128to64(u64 nh, u64 nl, u64 m)
+{
+	u64 mh, ml, remh, reml, termh, terml, z;
+
+	if (nh >= m)
+		return ~0ULL;
+	mh = m >> 32;
+	if (mh << 32 <= nh) {
+		z = 0xffffffff00000000ULL;
+	} else {
+		z = nh;
+		do_div(z, mh);
+		z <<= 32;
+	}
+	mul64to128(&termh, &terml, m, z);
+	sub128(&remh, &reml, nh, nl, termh, terml);
+	ml = m << 32;
+	while ((s64)remh < 0) {
+		z -= 0x100000000ULL;
+		add128(&remh, &reml, remh, reml, mh, ml);
+	}
+	remh = (remh << 32) | (reml >> 32);
+	if (mh << 32 <= remh) {
+		z |= 0xffffffff;
+	} else {
+		do_div(remh, mh);
+		z |= remh;
+	}
+	return z;
+}
+
+/*
+ * Operations on unpacked elements
+ */
+#define vfp_sign_negate(sign)	(sign ^ 0x8000)
+
+/*
+ * Single-precision
+ */
+struct vfp_single {
+	s16	exponent;
+	u16	sign;
+	u32	significand;
+};
+
+#ifdef __cplusplus
+ extern "C" {
+#endif
+extern s32 vfp_get_float(ARMul_State * state, unsigned int reg);
+extern void vfp_put_float(ARMul_State * state, s32 val, unsigned int reg);
+#ifdef __cplusplus
+ }
+#endif
+
+/*
+ * VFP_SINGLE_MANTISSA_BITS - number of bits in the mantissa
+ * VFP_SINGLE_EXPONENT_BITS - number of bits in the exponent
+ * VFP_SINGLE_LOW_BITS - number of low bits in the unpacked significand
+ *  which are not propagated to the float upon packing.
+ */
+#define VFP_SINGLE_MANTISSA_BITS	(23)
+#define VFP_SINGLE_EXPONENT_BITS	(8)
+#define VFP_SINGLE_LOW_BITS		(32 - VFP_SINGLE_MANTISSA_BITS - 2)
+#define VFP_SINGLE_LOW_BITS_MASK	((1 << VFP_SINGLE_LOW_BITS) - 1)
+
+/*
+ * The bit in an unpacked float which indicates that it is a quiet NaN
+ */
+#define VFP_SINGLE_SIGNIFICAND_QNAN	(1 << (VFP_SINGLE_MANTISSA_BITS - 1 + VFP_SINGLE_LOW_BITS))
+
+/*
+ * Operations on packed single-precision numbers
+ */
+#define vfp_single_packed_sign(v)	((v) & 0x80000000)
+#define vfp_single_packed_negate(v)	((v) ^ 0x80000000)
+#define vfp_single_packed_abs(v)	((v) & ~0x80000000)
+#define vfp_single_packed_exponent(v)	(((v) >> VFP_SINGLE_MANTISSA_BITS) & ((1 << VFP_SINGLE_EXPONENT_BITS) - 1))
+#define vfp_single_packed_mantissa(v)	((v) & ((1 << VFP_SINGLE_MANTISSA_BITS) - 1))
+
+/*
+ * Unpack a single-precision float.  Note that this returns the magnitude
+ * of the single-precision float mantissa with the 1. if necessary,
+ * aligned to bit 30.
+ */
+static inline void vfp_single_unpack(struct vfp_single *s, s32 val)
+{
+	u32 significand;
+
+	s->sign = vfp_single_packed_sign(val) >> 16,
+	s->exponent = vfp_single_packed_exponent(val);
+
+	significand = (u32) val;
+	significand = (significand << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2;
+	if (s->exponent && s->exponent != 255)
+		significand |= 0x40000000;
+	s->significand = significand;
+}
+
+/*
+ * Re-pack a single-precision float.  This assumes that the float is
+ * already normalised such that the MSB is bit 30, _not_ bit 31.
+ */
+static inline s32 vfp_single_pack(struct vfp_single *s)
+{
+	u32 val;
+	val = (s->sign << 16) +
+	      (s->exponent << VFP_SINGLE_MANTISSA_BITS) +
+	      (s->significand >> VFP_SINGLE_LOW_BITS);
+	return (s32)val;
+}
+
+#define VFP_NUMBER		(1<<0)
+#define VFP_ZERO		(1<<1)
+#define VFP_DENORMAL		(1<<2)
+#define VFP_INFINITY		(1<<3)
+#define VFP_NAN			(1<<4)
+#define VFP_NAN_SIGNAL		(1<<5)
+
+#define VFP_QNAN		(VFP_NAN)
+#define VFP_SNAN		(VFP_NAN|VFP_NAN_SIGNAL)
+
+static inline int vfp_single_type(struct vfp_single *s)
+{
+	int type = VFP_NUMBER;
+	if (s->exponent == 255) {
+		if (s->significand == 0)
+			type = VFP_INFINITY;
+		else if (s->significand & VFP_SINGLE_SIGNIFICAND_QNAN)
+			type = VFP_QNAN;
+		else
+			type = VFP_SNAN;
+	} else if (s->exponent == 0) {
+		if (s->significand == 0)
+			type |= VFP_ZERO;
+		else
+			type |= VFP_DENORMAL;
+	}
+	return type;
+}
+
+
+u32 vfp_single_normaliseround(ARMul_State* state, int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func);
+
+/*
+ * Double-precision
+ */
+struct vfp_double {
+	s16	exponent;
+	u16	sign;
+	u64	significand;
+};
+
+/*
+ * VFP_REG_ZERO is a special register number for vfp_get_double
+ * which returns (double)0.0.  This is useful for the compare with
+ * zero instructions.
+ */
+#ifdef CONFIG_VFPv3
+#define VFP_REG_ZERO	32
+#else
+#define VFP_REG_ZERO	16
+#endif
+#ifdef __cplusplus
+ extern "C" {
+#endif
+extern u64 vfp_get_double(ARMul_State * state, unsigned int reg);
+extern void vfp_put_double(ARMul_State * state, u64 val, unsigned int reg);
+#ifdef __cplusplus
+ }
+#endif
+#define VFP_DOUBLE_MANTISSA_BITS	(52)
+#define VFP_DOUBLE_EXPONENT_BITS	(11)
+#define VFP_DOUBLE_LOW_BITS		(64 - VFP_DOUBLE_MANTISSA_BITS - 2)
+#define VFP_DOUBLE_LOW_BITS_MASK	((1 << VFP_DOUBLE_LOW_BITS) - 1)
+
+/*
+ * The bit in an unpacked double which indicates that it is a quiet NaN
+ */
+#define VFP_DOUBLE_SIGNIFICAND_QNAN	(1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1 + VFP_DOUBLE_LOW_BITS))
+
+/*
+ * Operations on packed single-precision numbers
+ */
+#define vfp_double_packed_sign(v)	((v) & (1ULL << 63))
+#define vfp_double_packed_negate(v)	((v) ^ (1ULL << 63))
+#define vfp_double_packed_abs(v)	((v) & ~(1ULL << 63))
+#define vfp_double_packed_exponent(v)	(((v) >> VFP_DOUBLE_MANTISSA_BITS) & ((1 << VFP_DOUBLE_EXPONENT_BITS) - 1))
+#define vfp_double_packed_mantissa(v)	((v) & ((1ULL << VFP_DOUBLE_MANTISSA_BITS) - 1))
+
+/*
+ * Unpack a double-precision float.  Note that this returns the magnitude
+ * of the double-precision float mantissa with the 1. if necessary,
+ * aligned to bit 62.
+ */
+static inline void vfp_double_unpack(struct vfp_double *s, s64 val)
+{
+	u64 significand;
+
+	s->sign = vfp_double_packed_sign(val) >> 48;
+	s->exponent = vfp_double_packed_exponent(val);
+
+	significand = (u64) val;
+	significand = (significand << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2;
+	if (s->exponent && s->exponent != 2047)
+		significand |= (1ULL << 62);
+	s->significand = significand;
+}
+
+/*
+ * Re-pack a double-precision float.  This assumes that the float is
+ * already normalised such that the MSB is bit 30, _not_ bit 31.
+ */
+static inline s64 vfp_double_pack(struct vfp_double *s)
+{
+	u64 val;
+	val = ((u64)s->sign << 48) +
+	      ((u64)s->exponent << VFP_DOUBLE_MANTISSA_BITS) +
+	      (s->significand >> VFP_DOUBLE_LOW_BITS);
+	return (s64)val;
+}
+
+static inline int vfp_double_type(struct vfp_double *s)
+{
+	int type = VFP_NUMBER;
+	if (s->exponent == 2047) {
+		if (s->significand == 0)
+			type = VFP_INFINITY;
+		else if (s->significand & VFP_DOUBLE_SIGNIFICAND_QNAN)
+			type = VFP_QNAN;
+		else
+			type = VFP_SNAN;
+	} else if (s->exponent == 0) {
+		if (s->significand == 0)
+			type |= VFP_ZERO;
+		else
+			type |= VFP_DENORMAL;
+	}
+	return type;
+}
+
+u32 vfp_double_normaliseround(ARMul_State* state, int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func);
+
+u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand);
+
+/*
+ * A special flag to tell the normalisation code not to normalise.
+ */
+#define VFP_NAN_FLAG	0x100
+
+/*
+ * A bit pattern used to indicate the initial (unset) value of the
+ * exception mask, in case nothing handles an instruction.  This
+ * doesn't include the NAN flag, which get masked out before
+ * we check for an error.
+ */
+#define VFP_EXCEPTION_ERROR	((u32)-1 & ~VFP_NAN_FLAG)
+
+/*
+ * A flag to tell vfp instruction type.
+ *  OP_SCALAR - this operation always operates in scalar mode
+ *  OP_SD - the instruction exceptionally writes to a single precision result.
+ *  OP_DD - the instruction exceptionally writes to a double precision result.
+ *  OP_SM - the instruction exceptionally reads from a single precision operand.
+ */
+#define OP_SCALAR	(1 << 0)
+#define OP_SD		(1 << 1)
+#define OP_DD		(1 << 1)
+#define OP_SM		(1 << 2)
+
+struct op {
+	u32 (* const fn)(ARMul_State* state, int dd, int dn, int dm, u32 fpscr);
+	u32 flags;
+};
+
+static u32 vfp_fls(int x)
+{
+	int r = 32;
+
+	if (!x)
+		return 0;
+	if (!(x & 0xffff0000u)) {
+		x <<= 16;
+		r -= 16;
+	}
+	if (!(x & 0xff000000u)) {
+		x <<= 8;
+		r -= 8;
+	}
+	if (!(x & 0xf0000000u)) {
+		x <<= 4;
+		r -= 4;
+	}
+	if (!(x & 0xc0000000u)) {
+		x <<= 2;
+		r -= 2;
+	}
+	if (!(x & 0x80000000u)) {
+		x <<= 1;
+		r -= 1;
+	}
+	return r;
+
+}
+
+#endif
diff --git a/src/core/arm/skyeye_common/vfp/vfpdouble.cpp b/src/core/arm/skyeye_common/vfp/vfpdouble.cpp
new file mode 100644
index 000000000..13411ad80
--- /dev/null
+++ b/src/core/arm/skyeye_common/vfp/vfpdouble.cpp
@@ -0,0 +1,1263 @@
+/*
+    vfp/vfpdouble.c - ARM VFPv3 emulation unit - SoftFloat double instruction
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/*
+ * This code is derived in part from :
+ * - Android kernel
+ * - John R. Housers softfloat library, which
+ * carries the following notice:
+ *
+ * ===========================================================================
+ * This C source file is part of the SoftFloat IEC/IEEE Floating-point
+ * Arithmetic Package, Release 2.
+ *
+ * Written by John R. Hauser.  This work was made possible in part by the
+ * International Computer Science Institute, located at Suite 600, 1947 Center
+ * Street, Berkeley, California 94704.  Funding was partially provided by the
+ * National Science Foundation under grant MIP-9311980.  The original version
+ * of this code was written as part of a project to build a fixed-point vector
+ * processor in collaboration with the University of California at Berkeley,
+ * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+ * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
+ * arithmetic/softfloat.html'.
+ *
+ * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+ * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+ * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+ * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+ * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+ *
+ * Derivative works are acceptable, even for commercial purposes, so long as
+ * (1) they include prominent notice that the work is derivative, and (2) they
+ * include prominent notice akin to these three paragraphs for those parts of
+ * this code that are retained.
+ * ===========================================================================
+ */
+ 
+#include "core/arm/skyeye_common/vfp/vfp.h"
+#include "core/arm/skyeye_common/vfp/vfp_helper.h"
+#include "core/arm/skyeye_common/vfp/asm_vfp.h"
+
+static struct vfp_double vfp_double_default_qnan = {
+	//.exponent	= 2047,
+	//.sign		= 0,
+	//.significand	= VFP_DOUBLE_SIGNIFICAND_QNAN,
+};
+
+static void vfp_double_dump(const char *str, struct vfp_double *d)
+{
+	pr_debug("VFP: %s: sign=%d exponent=%d significand=%016llx\n",
+		 str, d->sign != 0, d->exponent, d->significand);
+}
+
+static void vfp_double_normalise_denormal(struct vfp_double *vd)
+{
+	int bits = 31 - vfp_fls(vd->significand >> 32);
+	if (bits == 31)
+		bits = 63 - vfp_fls(vd->significand);
+
+	vfp_double_dump("normalise_denormal: in", vd);
+
+	if (bits) {
+		vd->exponent -= bits - 1;
+		vd->significand <<= bits;
+	}
+
+	vfp_double_dump("normalise_denormal: out", vd);
+}
+
+u32 vfp_double_normaliseround(ARMul_State* state, int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func)
+{
+	u64 significand, incr;
+	int exponent, shift, underflow;
+	u32 rmode;
+
+	vfp_double_dump("pack: in", vd);
+
+	/*
+	 * Infinities and NaNs are a special case.
+	 */
+	if (vd->exponent == 2047 && (vd->significand == 0 || exceptions))
+		goto pack;
+
+	/*
+	 * Special-case zero.
+	 */
+	if (vd->significand == 0) {
+		vd->exponent = 0;
+		goto pack;
+	}
+
+	exponent = vd->exponent;
+	significand = vd->significand;
+
+	shift = 32 - vfp_fls(significand >> 32);
+	if (shift == 32)
+		shift = 64 - vfp_fls(significand);
+	if (shift) {
+		exponent -= shift;
+		significand <<= shift;
+	}
+
+#if 1
+	vd->exponent = exponent;
+	vd->significand = significand;
+	vfp_double_dump("pack: normalised", vd);
+#endif
+
+	/*
+	 * Tiny number?
+	 */
+	underflow = exponent < 0;
+	if (underflow) {
+		significand = vfp_shiftright64jamming(significand, -exponent);
+		exponent = 0;
+#if 1
+		vd->exponent = exponent;
+		vd->significand = significand;
+		vfp_double_dump("pack: tiny number", vd);
+#endif
+		if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1)))
+			underflow = 0;
+	}
+
+	/*
+	 * Select rounding increment.
+	 */
+	incr = 0;
+	rmode = fpscr & FPSCR_RMODE_MASK;
+
+	if (rmode == FPSCR_ROUND_NEAREST) {
+		incr = 1ULL << VFP_DOUBLE_LOW_BITS;
+		if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0)
+			incr -= 1;
+	} else if (rmode == FPSCR_ROUND_TOZERO) {
+		incr = 0;
+	} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0))
+		incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1;
+
+	pr_debug("VFP: rounding increment = 0x%08llx\n", incr);
+
+	/*
+	 * Is our rounding going to overflow?
+	 */
+	if ((significand + incr) < significand) {
+		exponent += 1;
+		significand = (significand >> 1) | (significand & 1);
+		incr >>= 1;
+#if 1
+		vd->exponent = exponent;
+		vd->significand = significand;
+		vfp_double_dump("pack: overflow", vd);
+#endif
+	}
+
+	/*
+	 * If any of the low bits (which will be shifted out of the
+	 * number) are non-zero, the result is inexact.
+	 */
+	if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1))
+		exceptions |= FPSCR_IXC;
+
+	/*
+	 * Do our rounding.
+	 */
+	significand += incr;
+
+	/*
+	 * Infinity?
+	 */
+	if (exponent >= 2046) {
+		exceptions |= FPSCR_OFC | FPSCR_IXC;
+		if (incr == 0) {
+			vd->exponent = 2045;
+			vd->significand = 0x7fffffffffffffffULL;
+		} else {
+			vd->exponent = 2047;		/* infinity */
+			vd->significand = 0;
+		}
+	} else {
+		if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0)
+			exponent = 0;
+		if (exponent || significand > 0x8000000000000000ULL)
+			underflow = 0;
+		if (underflow)
+			exceptions |= FPSCR_UFC;
+		vd->exponent = exponent;
+		vd->significand = significand >> 1;
+	}
+
+ pack:
+	vfp_double_dump("pack: final", vd);
+	{
+		s64 d = vfp_double_pack(vd);
+		pr_debug("VFP: %s: d(d%d)=%016llx exceptions=%08x\n", func,
+			 dd, d, exceptions);
+		vfp_put_double(state, d, dd);
+	}
+	return exceptions;
+}
+
+/*
+ * Propagate the NaN, setting exceptions if it is signalling.
+ * 'n' is always a NaN.  'm' may be a number, NaN or infinity.
+ */
+static u32
+vfp_propagate_nan(struct vfp_double *vdd, struct vfp_double *vdn,
+		  struct vfp_double *vdm, u32 fpscr)
+{
+	struct vfp_double *nan;
+	int tn, tm = 0;
+
+	tn = vfp_double_type(vdn);
+
+	if (vdm)
+		tm = vfp_double_type(vdm);
+
+	if (fpscr & FPSCR_DEFAULT_NAN)
+		/*
+		 * Default NaN mode - always returns a quiet NaN
+		 */
+		nan = &vfp_double_default_qnan;
+	else {
+		/*
+		 * Contemporary mode - select the first signalling
+		 * NAN, or if neither are signalling, the first
+		 * quiet NAN.
+		 */
+		if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
+			nan = vdn;
+		else
+			nan = vdm;
+		/*
+		 * Make the NaN quiet.
+		 */
+		nan->significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
+	}
+
+	*vdd = *nan;
+
+	/*
+	 * If one was a signalling NAN, raise invalid operation.
+	 */
+	return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
+}
+
+/*
+ * Extended operations
+ */
+static u32 vfp_double_fabs(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_put_double(state, vfp_double_packed_abs(vfp_get_double(state, dm)), dd);
+	return 0;
+}
+
+static u32 vfp_double_fcpy(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_put_double(state, vfp_get_double(state, dm), dd);
+	return 0;
+}
+
+static u32 vfp_double_fneg(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_put_double(state, vfp_double_packed_negate(vfp_get_double(state, dm)), dd);
+	return 0;
+}
+
+static u32 vfp_double_fsqrt(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	struct vfp_double vdm, vdd, *vdp;
+	int ret, tm;
+
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+	tm = vfp_double_type(&vdm);
+	if (tm & (VFP_NAN|VFP_INFINITY)) {
+		vdp = &vdd;
+
+		if (tm & VFP_NAN)
+			ret = vfp_propagate_nan(vdp, &vdm, NULL, fpscr);
+		else if (vdm.sign == 0) {
+ sqrt_copy:
+			vdp = &vdm;
+			ret = 0;
+		} else {
+ sqrt_invalid:
+			vdp = &vfp_double_default_qnan;
+			ret = FPSCR_IOC;
+		}
+		vfp_put_double(state, vfp_double_pack(vdp), dd);
+		return ret;
+	}
+
+	/*
+	 * sqrt(+/- 0) == +/- 0
+	 */
+	if (tm & VFP_ZERO)
+		goto sqrt_copy;
+
+	/*
+	 * Normalise a denormalised number
+	 */
+	if (tm & VFP_DENORMAL)
+		vfp_double_normalise_denormal(&vdm);
+
+	/*
+	 * sqrt(<0) = invalid
+	 */
+	if (vdm.sign)
+		goto sqrt_invalid;
+
+	vfp_double_dump("sqrt", &vdm);
+
+	/*
+	 * Estimate the square root.
+	 */
+	vdd.sign = 0;
+	vdd.exponent = ((vdm.exponent - 1023) >> 1) + 1023;
+	vdd.significand = (u64)vfp_estimate_sqrt_significand(vdm.exponent, vdm.significand >> 32) << 31;
+
+	vfp_double_dump("sqrt estimate1", &vdd);
+
+	vdm.significand >>= 1 + (vdm.exponent & 1);
+	vdd.significand += 2 + vfp_estimate_div128to64(vdm.significand, 0, vdd.significand);
+
+	vfp_double_dump("sqrt estimate2", &vdd);
+
+	/*
+	 * And now adjust.
+	 */
+	if ((vdd.significand & VFP_DOUBLE_LOW_BITS_MASK) <= 5) {
+		if (vdd.significand < 2) {
+			vdd.significand = ~0ULL;
+		} else {
+			u64 termh, terml, remh, reml;
+			vdm.significand <<= 2;
+			mul64to128(&termh, &terml, vdd.significand, vdd.significand);
+			sub128(&remh, &reml, vdm.significand, 0, termh, terml);
+			while ((s64)remh < 0) {
+				vdd.significand -= 1;
+				shift64left(&termh, &terml, vdd.significand);
+				terml |= 1;
+				add128(&remh, &reml, remh, reml, termh, terml);
+			}
+			vdd.significand |= (remh | reml) != 0;
+		}
+	}
+	vdd.significand = vfp_shiftright64jamming(vdd.significand, 1);
+
+	return vfp_double_normaliseround(state, dd, &vdd, fpscr, 0, "fsqrt");
+}
+
+/*
+ * Equal	:= ZC
+ * Less than	:= N
+ * Greater than	:= C
+ * Unordered	:= CV
+ */
+static u32 vfp_compare(ARMul_State* state, int dd, int signal_on_qnan, int dm, u32 fpscr)
+{
+	s64 d, m;
+	u32 ret = 0;
+
+	pr_debug("In %s, state=0x%x, fpscr=0x%x\n", __FUNCTION__, state, fpscr);
+	m = vfp_get_double(state, dm);
+	if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) {
+		ret |= FPSCR_C | FPSCR_V;
+		if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1))))
+			/*
+			 * Signalling NaN, or signalling on quiet NaN
+			 */
+			ret |= FPSCR_IOC;
+	}
+
+	d = vfp_get_double(state, dd);
+	if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) {
+		ret |= FPSCR_C | FPSCR_V;
+		if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1))))
+			/*
+			 * Signalling NaN, or signalling on quiet NaN
+			 */
+			ret |= FPSCR_IOC;
+	}
+
+	if (ret == 0) {
+		//printf("In %s, d=%lld, m =%lld\n ", __FUNCTION__, d, m);
+		if (d == m || vfp_double_packed_abs(d | m) == 0) {
+			/*
+			 * equal
+			 */
+			ret |= FPSCR_Z | FPSCR_C;
+			//printf("In %s,1 ret=0x%x\n", __FUNCTION__, ret);
+		} else if (vfp_double_packed_sign(d ^ m)) {
+			/*
+			 * different signs
+			 */
+			if (vfp_double_packed_sign(d))
+				/*
+				 * d is negative, so d < m
+				 */
+				ret |= FPSCR_N;
+			else
+				/*
+				 * d is positive, so d > m
+				 */
+				ret |= FPSCR_C;
+		} else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) {
+			/*
+			 * d < m
+			 */
+			ret |= FPSCR_N;
+		} else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) {
+			/*
+			 * d > m
+			 */
+			ret |= FPSCR_C;
+		}
+	}
+	pr_debug("In %s, state=0x%x, ret=0x%x\n", __FUNCTION__, state, ret);
+
+	return ret;
+}
+
+static u32 vfp_double_fcmp(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_compare(state, dd, 0, dm, fpscr);
+}
+
+static u32 vfp_double_fcmpe(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_compare(state, dd, 1, dm, fpscr);
+}
+
+static u32 vfp_double_fcmpz(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_compare(state, dd, 0, VFP_REG_ZERO, fpscr);
+}
+
+static u32 vfp_double_fcmpez(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_compare(state, dd, 1, VFP_REG_ZERO, fpscr);
+}
+
+static u32 vfp_double_fcvts(ARMul_State* state, int sd, int unused, int dm, u32 fpscr)
+{
+	struct vfp_double vdm;
+	struct vfp_single vsd;
+	int tm;
+	u32 exceptions = 0;
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+
+	tm = vfp_double_type(&vdm);
+
+	/*
+	 * If we have a signalling NaN, signal invalid operation.
+	 */
+	if (tm == VFP_SNAN)
+		exceptions = FPSCR_IOC;
+
+	if (tm & VFP_DENORMAL)
+		vfp_double_normalise_denormal(&vdm);
+
+	vsd.sign = vdm.sign;
+	vsd.significand = vfp_hi64to32jamming(vdm.significand);
+
+	/*
+	 * If we have an infinity or a NaN, the exponent must be 255
+	 */
+	if (tm & (VFP_INFINITY|VFP_NAN)) {
+		vsd.exponent = 255;
+		if (tm == VFP_QNAN)
+			vsd.significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
+		goto pack_nan;
+	} else if (tm & VFP_ZERO)
+		vsd.exponent = 0;
+	else
+		vsd.exponent = vdm.exponent - (1023 - 127);
+
+	return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, "fcvts");
+
+ pack_nan:
+	vfp_put_float(state, vfp_single_pack(&vsd), sd);
+	return exceptions;
+}
+
+static u32 vfp_double_fuito(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	struct vfp_double vdm;
+	u32 m = vfp_get_float(state, dm);
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vdm.sign = 0;
+	vdm.exponent = 1023 + 63 - 1;
+	vdm.significand = (u64)m;
+
+	return vfp_double_normaliseround(state, dd, &vdm, fpscr, 0, "fuito");
+}
+
+static u32 vfp_double_fsito(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	struct vfp_double vdm;
+	u32 m = vfp_get_float(state, dm);
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vdm.sign = (m & 0x80000000) >> 16;
+	vdm.exponent = 1023 + 63 - 1;
+	vdm.significand = vdm.sign ? -m : m;
+
+	return vfp_double_normaliseround(state, dd, &vdm, fpscr, 0, "fsito");
+}
+
+static u32 vfp_double_ftoui(ARMul_State* state, int sd, int unused, int dm, u32 fpscr)
+{
+	struct vfp_double vdm;
+	u32 d, exceptions = 0;
+	int rmode = fpscr & FPSCR_RMODE_MASK;
+	int tm;
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+
+	/*
+	 * Do we have a denormalised number?
+	 */
+	tm = vfp_double_type(&vdm);
+	if (tm & VFP_DENORMAL)
+		exceptions |= FPSCR_IDC;
+
+	if (tm & VFP_NAN)
+		vdm.sign = 0;
+
+	if (vdm.exponent >= 1023 + 32) {
+		d = vdm.sign ? 0 : 0xffffffff;
+		exceptions = FPSCR_IOC;
+	} else if (vdm.exponent >= 1023 - 1) {
+		int shift = 1023 + 63 - vdm.exponent;
+		u64 rem, incr = 0;
+
+		/*
+		 * 2^0 <= m < 2^32-2^8
+		 */
+		d = (vdm.significand << 1) >> shift;
+		rem = vdm.significand << (65 - shift);
+
+		if (rmode == FPSCR_ROUND_NEAREST) {
+			incr = 0x8000000000000000ULL;
+			if ((d & 1) == 0)
+				incr -= 1;
+		} else if (rmode == FPSCR_ROUND_TOZERO) {
+			incr = 0;
+		} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) {
+			incr = ~0ULL;
+		}
+
+		if ((rem + incr) < rem) {
+			if (d < 0xffffffff)
+				d += 1;
+			else
+				exceptions |= FPSCR_IOC;
+		}
+
+		if (d && vdm.sign) {
+			d = 0;
+			exceptions |= FPSCR_IOC;
+		} else if (rem)
+			exceptions |= FPSCR_IXC;
+	} else {
+		d = 0;
+		if (vdm.exponent | vdm.significand) {
+			exceptions |= FPSCR_IXC;
+			if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0)
+				d = 1;
+			else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) {
+				d = 0;
+				exceptions |= FPSCR_IOC;
+			}
+		}
+	}
+
+	pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
+
+	vfp_put_float(state, d, sd);
+
+	return exceptions;
+}
+
+static u32 vfp_double_ftouiz(ARMul_State* state, int sd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_double_ftoui(state, sd, unused, dm, FPSCR_ROUND_TOZERO);
+}
+
+static u32 vfp_double_ftosi(ARMul_State* state, int sd, int unused, int dm, u32 fpscr)
+{
+	struct vfp_double vdm;
+	u32 d, exceptions = 0;
+	int rmode = fpscr & FPSCR_RMODE_MASK;
+	int tm;
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+	vfp_double_dump("VDM", &vdm);
+
+	/*
+	 * Do we have denormalised number?
+	 */
+	tm = vfp_double_type(&vdm);
+	if (tm & VFP_DENORMAL)
+		exceptions |= FPSCR_IDC;
+
+	if (tm & VFP_NAN) {
+		d = 0;
+		exceptions |= FPSCR_IOC;
+	} else if (vdm.exponent >= 1023 + 32) {
+		d = 0x7fffffff;
+		if (vdm.sign)
+			d = ~d;
+		exceptions |= FPSCR_IOC;
+	} else if (vdm.exponent >= 1023 - 1) {
+		int shift = 1023 + 63 - vdm.exponent;	/* 58 */
+		u64 rem, incr = 0;
+
+		d = (vdm.significand << 1) >> shift;
+		rem = vdm.significand << (65 - shift);
+
+		if (rmode == FPSCR_ROUND_NEAREST) {
+			incr = 0x8000000000000000ULL;
+			if ((d & 1) == 0)
+				incr -= 1;
+		} else if (rmode == FPSCR_ROUND_TOZERO) {
+			incr = 0;
+		} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) {
+			incr = ~0ULL;
+		}
+
+		if ((rem + incr) < rem && d < 0xffffffff)
+			d += 1;
+		if (d > 0x7fffffff + (vdm.sign != 0)) {
+			d = 0x7fffffff + (vdm.sign != 0);
+			exceptions |= FPSCR_IOC;
+		} else if (rem)
+			exceptions |= FPSCR_IXC;
+
+		if (vdm.sign)
+			d = -d;
+	} else {
+		d = 0;
+		if (vdm.exponent | vdm.significand) {
+			exceptions |= FPSCR_IXC;
+			if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0)
+				d = 1;
+			else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign)
+				d = -1;
+		}
+	}
+
+	pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
+
+	vfp_put_float(state, (s32)d, sd);
+
+	return exceptions;
+}
+
+static u32 vfp_double_ftosiz(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_double_ftosi(state, dd, unused, dm, FPSCR_ROUND_TOZERO);
+}
+
+static struct op fops_ext[] = {
+    { vfp_double_fcpy,   0 },                 //0x00000000 - FEXT_FCPY
+    { vfp_double_fabs,   0 },                 //0x00000001 - FEXT_FABS
+    { vfp_double_fneg,   0 },                 //0x00000002 - FEXT_FNEG
+    { vfp_double_fsqrt,  0 },                 //0x00000003 - FEXT_FSQRT
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { vfp_double_fcmp,   OP_SCALAR },         //0x00000008 - FEXT_FCMP
+    { vfp_double_fcmpe,  OP_SCALAR },         //0x00000009 - FEXT_FCMPE
+    { vfp_double_fcmpz,  OP_SCALAR },         //0x0000000A - FEXT_FCMPZ
+    { vfp_double_fcmpez, OP_SCALAR },         //0x0000000B - FEXT_FCMPEZ
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { vfp_double_fcvts,  OP_SCALAR|OP_DD },   //0x0000000F - FEXT_FCVT
+    { vfp_double_fuito,  OP_SCALAR },         //0x00000010 - FEXT_FUITO
+    { vfp_double_fsito,  OP_SCALAR },         //0x00000011 - FEXT_FSITO
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { vfp_double_ftoui,  OP_SCALAR },         //0x00000018 - FEXT_FTOUI
+    { vfp_double_ftouiz, OP_SCALAR },         //0x00000019 - FEXT_FTOUIZ
+    { vfp_double_ftosi,  OP_SCALAR },         //0x0000001A - FEXT_FTOSI
+    { vfp_double_ftosiz, OP_SCALAR },         //0x0000001B - FEXT_FTOSIZ
+};
+
+
+
+
+static u32
+vfp_double_fadd_nonnumber(struct vfp_double *vdd, struct vfp_double *vdn,
+			  struct vfp_double *vdm, u32 fpscr)
+{
+	struct vfp_double *vdp;
+	u32 exceptions = 0;
+	int tn, tm;
+
+	tn = vfp_double_type(vdn);
+	tm = vfp_double_type(vdm);
+
+	if (tn & tm & VFP_INFINITY) {
+		/*
+		 * Two infinities.  Are they different signs?
+		 */
+		if (vdn->sign ^ vdm->sign) {
+			/*
+			 * different signs -> invalid
+			 */
+			exceptions = FPSCR_IOC;
+			vdp = &vfp_double_default_qnan;
+		} else {
+			/*
+			 * same signs -> valid
+			 */
+			vdp = vdn;
+		}
+	} else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
+		/*
+		 * One infinity and one number -> infinity
+		 */
+		vdp = vdn;
+	} else {
+		/*
+		 * 'n' is a NaN of some type
+		 */
+		return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
+	}
+	*vdd = *vdp;
+	return exceptions;
+}
+
+static u32
+vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn,
+	       struct vfp_double *vdm, u32 fpscr)
+{
+	u32 exp_diff;
+	u64 m_sig;
+
+	if (vdn->significand & (1ULL << 63) ||
+	    vdm->significand & (1ULL << 63)) {
+		pr_info("VFP: bad FP values\n");
+		vfp_double_dump("VDN", vdn);
+		vfp_double_dump("VDM", vdm);
+	}
+
+	/*
+	 * Ensure that 'n' is the largest magnitude number.  Note that
+	 * if 'n' and 'm' have equal exponents, we do not swap them.
+	 * This ensures that NaN propagation works correctly.
+	 */
+	if (vdn->exponent < vdm->exponent) {
+		struct vfp_double *t = vdn;
+		vdn = vdm;
+		vdm = t;
+	}
+
+	/*
+	 * Is 'n' an infinity or a NaN?  Note that 'm' may be a number,
+	 * infinity or a NaN here.
+	 */
+	if (vdn->exponent == 2047)
+		return vfp_double_fadd_nonnumber(vdd, vdn, vdm, fpscr);
+
+	/*
+	 * We have two proper numbers, where 'vdn' is the larger magnitude.
+	 *
+	 * Copy 'n' to 'd' before doing the arithmetic.
+	 */
+	*vdd = *vdn;
+
+	/*
+	 * Align 'm' with the result.
+	 */
+	exp_diff = vdn->exponent - vdm->exponent;
+	m_sig = vfp_shiftright64jamming(vdm->significand, exp_diff);
+
+	/*
+	 * If the signs are different, we are really subtracting.
+	 */
+	if (vdn->sign ^ vdm->sign) {
+		m_sig = vdn->significand - m_sig;
+		if ((s64)m_sig < 0) {
+			vdd->sign = vfp_sign_negate(vdd->sign);
+			m_sig = -m_sig;
+		} else if (m_sig == 0) {
+			vdd->sign = (fpscr & FPSCR_RMODE_MASK) ==
+				      FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
+		}
+	} else {
+		m_sig += vdn->significand;
+	}
+	vdd->significand = m_sig;
+
+	return 0;
+}
+
+static u32
+vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn,
+		    struct vfp_double *vdm, u32 fpscr)
+{
+	vfp_double_dump("VDN", vdn);
+	vfp_double_dump("VDM", vdm);
+
+	/*
+	 * Ensure that 'n' is the largest magnitude number.  Note that
+	 * if 'n' and 'm' have equal exponents, we do not swap them.
+	 * This ensures that NaN propagation works correctly.
+	 */
+	if (vdn->exponent < vdm->exponent) {
+		struct vfp_double *t = vdn;
+		vdn = vdm;
+		vdm = t;
+		pr_debug("VFP: swapping M <-> N\n");
+	}
+
+	vdd->sign = vdn->sign ^ vdm->sign;
+
+	/*
+	 * If 'n' is an infinity or NaN, handle it.  'm' may be anything.
+	 */
+	if (vdn->exponent == 2047) {
+		if (vdn->significand || (vdm->exponent == 2047 && vdm->significand))
+			return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
+		if ((vdm->exponent | vdm->significand) == 0) {
+			*vdd = vfp_double_default_qnan;
+			return FPSCR_IOC;
+		}
+		vdd->exponent = vdn->exponent;
+		vdd->significand = 0;
+		return 0;
+	}
+
+	/*
+	 * If 'm' is zero, the result is always zero.  In this case,
+	 * 'n' may be zero or a number, but it doesn't matter which.
+	 */
+	if ((vdm->exponent | vdm->significand) == 0) {
+		vdd->exponent = 0;
+		vdd->significand = 0;
+		return 0;
+	}
+
+	/*
+	 * We add 2 to the destination exponent for the same reason
+	 * as the addition case - though this time we have +1 from
+	 * each input operand.
+	 */
+	vdd->exponent = vdn->exponent + vdm->exponent - 1023 + 2;
+	vdd->significand = vfp_hi64multiply64(vdn->significand, vdm->significand);
+
+	vfp_double_dump("VDD", vdd);
+	return 0;
+}
+
+#define NEG_MULTIPLY	(1 << 0)
+#define NEG_SUBTRACT	(1 << 1)
+
+static u32
+vfp_double_multiply_accumulate(ARMul_State* state, int dd, int dn, int dm, u32 fpscr, u32 negate, const char *func)
+{
+	struct vfp_double vdd, vdp, vdn, vdm;
+	u32 exceptions;
+
+	vfp_double_unpack(&vdn, vfp_get_double(state, dn));
+	if (vdn.exponent == 0 && vdn.significand)
+		vfp_double_normalise_denormal(&vdn);
+
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+	if (vdm.exponent == 0 && vdm.significand)
+		vfp_double_normalise_denormal(&vdm);
+
+	exceptions = vfp_double_multiply(&vdp, &vdn, &vdm, fpscr);
+	if (negate & NEG_MULTIPLY)
+		vdp.sign = vfp_sign_negate(vdp.sign);
+
+	vfp_double_unpack(&vdn, vfp_get_double(state, dd));
+	if (negate & NEG_SUBTRACT)
+		vdn.sign = vfp_sign_negate(vdn.sign);
+
+	exceptions |= vfp_double_add(&vdd, &vdn, &vdp, fpscr);
+
+	return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, func);
+}
+
+/*
+ * Standard operations
+ */
+
+/*
+ * sd = sd + (sn * sm)
+ */
+static u32 vfp_double_fmac(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_double_multiply_accumulate(state, dd, dn, dm, fpscr, 0, "fmac");
+}
+
+/*
+ * sd = sd - (sn * sm)
+ */
+static u32 vfp_double_fnmac(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_double_multiply_accumulate(state, dd, dn, dm, fpscr, NEG_MULTIPLY, "fnmac");
+}
+
+/*
+ * sd = -sd + (sn * sm)
+ */
+static u32 vfp_double_fmsc(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_double_multiply_accumulate(state, dd, dn, dm, fpscr, NEG_SUBTRACT, "fmsc");
+}
+
+/*
+ * sd = -sd - (sn * sm)
+ */
+static u32 vfp_double_fnmsc(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+	pr_debug("In %s\n", __FUNCTION__);
+	return vfp_double_multiply_accumulate(state, dd, dn, dm, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
+}
+
+/*
+ * sd = sn * sm
+ */
+static u32 vfp_double_fmul(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+	struct vfp_double vdd, vdn, vdm;
+	u32 exceptions;
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_double_unpack(&vdn, vfp_get_double(state, dn));
+	if (vdn.exponent == 0 && vdn.significand)
+		vfp_double_normalise_denormal(&vdn);
+
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+	if (vdm.exponent == 0 && vdm.significand)
+		vfp_double_normalise_denormal(&vdm);
+
+	exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr);
+	return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, "fmul");
+}
+
+/*
+ * sd = -(sn * sm)
+ */
+static u32 vfp_double_fnmul(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+	struct vfp_double vdd, vdn, vdm;
+	u32 exceptions;
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_double_unpack(&vdn, vfp_get_double(state, dn));
+	if (vdn.exponent == 0 && vdn.significand)
+		vfp_double_normalise_denormal(&vdn);
+
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+	if (vdm.exponent == 0 && vdm.significand)
+		vfp_double_normalise_denormal(&vdm);
+
+	exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr);
+	vdd.sign = vfp_sign_negate(vdd.sign);
+
+	return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, "fnmul");
+}
+
+/*
+ * sd = sn + sm
+ */
+static u32 vfp_double_fadd(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+	struct vfp_double vdd, vdn, vdm;
+	u32 exceptions;
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_double_unpack(&vdn, vfp_get_double(state, dn));
+	if (vdn.exponent == 0 && vdn.significand)
+		vfp_double_normalise_denormal(&vdn);
+
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+	if (vdm.exponent == 0 && vdm.significand)
+		vfp_double_normalise_denormal(&vdm);
+
+	exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr);
+
+	return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, "fadd");
+}
+
+/*
+ * sd = sn - sm
+ */
+static u32 vfp_double_fsub(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+	struct vfp_double vdd, vdn, vdm;
+	u32 exceptions;
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_double_unpack(&vdn, vfp_get_double(state, dn));
+	if (vdn.exponent == 0 && vdn.significand)
+		vfp_double_normalise_denormal(&vdn);
+
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+	if (vdm.exponent == 0 && vdm.significand)
+		vfp_double_normalise_denormal(&vdm);
+
+	/*
+	 * Subtraction is like addition, but with a negated operand.
+	 */
+	vdm.sign = vfp_sign_negate(vdm.sign);
+
+	exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr);
+
+	return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, "fsub");
+}
+
+/*
+ * sd = sn / sm
+ */
+static u32 vfp_double_fdiv(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+	struct vfp_double vdd, vdn, vdm;
+	u32 exceptions = 0;
+	int tm, tn;
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vfp_double_unpack(&vdn, vfp_get_double(state, dn));
+	vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+
+	vdd.sign = vdn.sign ^ vdm.sign;
+
+	tn = vfp_double_type(&vdn);
+	tm = vfp_double_type(&vdm);
+
+	/*
+	 * Is n a NAN?
+	 */
+	if (tn & VFP_NAN)
+		goto vdn_nan;
+
+	/*
+	 * Is m a NAN?
+	 */
+	if (tm & VFP_NAN)
+		goto vdm_nan;
+
+	/*
+	 * If n and m are infinity, the result is invalid
+	 * If n and m are zero, the result is invalid
+	 */
+	if (tm & tn & (VFP_INFINITY|VFP_ZERO))
+		goto invalid;
+
+	/*
+	 * If n is infinity, the result is infinity
+	 */
+	if (tn & VFP_INFINITY)
+		goto infinity;
+
+	/*
+	 * If m is zero, raise div0 exceptions
+	 */
+	if (tm & VFP_ZERO)
+		goto divzero;
+
+	/*
+	 * If m is infinity, or n is zero, the result is zero
+	 */
+	if (tm & VFP_INFINITY || tn & VFP_ZERO)
+		goto zero;
+
+	if (tn & VFP_DENORMAL)
+		vfp_double_normalise_denormal(&vdn);
+	if (tm & VFP_DENORMAL)
+		vfp_double_normalise_denormal(&vdm);
+
+	/*
+	 * Ok, we have two numbers, we can perform division.
+	 */	
+	vdd.exponent = vdn.exponent - vdm.exponent + 1023 - 1;
+	vdm.significand <<= 1;
+	if (vdm.significand <= (2 * vdn.significand)) {
+		vdn.significand >>= 1;
+		vdd.exponent++;
+	}
+	vdd.significand = vfp_estimate_div128to64(vdn.significand, 0, vdm.significand);
+	if ((vdd.significand & 0x1ff) <= 2) {
+		u64 termh, terml, remh, reml;
+		mul64to128(&termh, &terml, vdm.significand, vdd.significand);
+		sub128(&remh, &reml, vdn.significand, 0, termh, terml);
+		while ((s64)remh < 0) {
+			vdd.significand -= 1;
+			add128(&remh, &reml, remh, reml, 0, vdm.significand);
+		}
+		vdd.significand |= (reml != 0);
+	}
+	return vfp_double_normaliseround(state, dd, &vdd, fpscr, 0, "fdiv");
+
+ vdn_nan:
+	exceptions = vfp_propagate_nan(&vdd, &vdn, &vdm, fpscr);
+ pack:
+	vfp_put_double(state, vfp_double_pack(&vdd), dd);
+	return exceptions;
+
+ vdm_nan:
+	exceptions = vfp_propagate_nan(&vdd, &vdm, &vdn, fpscr);
+	goto pack;
+
+ zero:
+	vdd.exponent = 0;
+	vdd.significand = 0;
+	goto pack;
+
+ divzero:
+	exceptions = FPSCR_DZC;
+ infinity:
+	vdd.exponent = 2047;
+	vdd.significand = 0;
+	goto pack;
+
+ invalid:
+	vfp_put_double(state, vfp_double_pack(&vfp_double_default_qnan), dd);
+	return FPSCR_IOC;
+}
+
+static struct op fops[] = {
+	{ vfp_double_fmac,  0 },
+	{ vfp_double_fmsc,  0 },
+	{ vfp_double_fmul,  0 },
+	{ vfp_double_fadd,  0 },
+	{ vfp_double_fnmac, 0 },
+	{ vfp_double_fnmsc, 0 },
+	{ vfp_double_fnmul, 0 },
+	{ vfp_double_fsub,  0 },
+	{ vfp_double_fdiv,  0 },
+};
+
+#define FREG_BANK(x)	((x) & 0x0c)
+#define FREG_IDX(x)	((x) & 3)
+
+u32 vfp_double_cpdo(ARMul_State* state, u32 inst, u32 fpscr)
+{
+	u32 op = inst & FOP_MASK;
+	u32 exceptions = 0;
+	unsigned int dest;
+	unsigned int dn = vfp_get_dn(inst);
+	unsigned int dm;
+	unsigned int vecitr, veclen, vecstride;
+	struct op *fop;
+
+	pr_debug("In %s\n", __FUNCTION__);
+	vecstride = (1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK));
+
+	fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)];
+
+	/*
+	 * fcvtds takes an sN register number as destination, not dN.
+	 * It also always operates on scalars.
+	 */
+	if (fop->flags & OP_SD)
+		dest = vfp_get_sd(inst);
+	else
+		dest = vfp_get_dd(inst);
+
+	/*
+	 * f[us]ito takes a sN operand, not a dN operand.
+	 */
+	if (fop->flags & OP_SM)
+		dm = vfp_get_sm(inst);
+	else
+		dm = vfp_get_dm(inst);
+
+	/*
+	 * If destination bank is zero, vector length is always '1'.
+	 * ARM DDI0100F C5.1.3, C5.3.2.
+	 */
+	if ((fop->flags & OP_SCALAR) || (FREG_BANK(dest) == 0))
+		veclen = 0;
+	else
+		veclen = fpscr & FPSCR_LENGTH_MASK;
+
+	pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
+		 (veclen >> FPSCR_LENGTH_BIT) + 1);
+
+	if (!fop->fn) {
+		printf("VFP: could not find double op %d\n", FEXT_TO_IDX(inst));
+		goto invalid;
+	}
+
+	for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
+		u32 except;
+		char type;
+
+		type = fop->flags & OP_SD ? 's' : 'd';
+		if (op == FOP_EXT)
+			pr_debug("VFP: itr%d (%c%u) = op[%u] (d%u)\n",
+				 vecitr >> FPSCR_LENGTH_BIT,
+				 type, dest, dn, dm);
+		else
+			pr_debug("VFP: itr%d (%c%u) = (d%u) op[%u] (d%u)\n",
+				 vecitr >> FPSCR_LENGTH_BIT,
+				 type, dest, dn, FOP_TO_IDX(op), dm);
+
+		except = fop->fn(state, dest, dn, dm, fpscr);
+		pr_debug("VFP: itr%d: exceptions=%08x\n",
+			 vecitr >> FPSCR_LENGTH_BIT, except);
+
+		exceptions |= except;
+
+		/*
+		 * CHECK: It appears to be undefined whether we stop when
+		 * we encounter an exception.  We continue.
+		 */
+		dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 3);
+		dn = FREG_BANK(dn) + ((FREG_IDX(dn) + vecstride) & 3);
+		if (FREG_BANK(dm) != 0)
+			dm = FREG_BANK(dm) + ((FREG_IDX(dm) + vecstride) & 3);
+	}
+	return exceptions;
+
+ invalid:
+	return ~0;
+}
diff --git a/src/core/arm/skyeye_common/vfp/vfpinstr.cpp b/src/core/arm/skyeye_common/vfp/vfpinstr.cpp
new file mode 100644
index 000000000..a57047911
--- /dev/null
+++ b/src/core/arm/skyeye_common/vfp/vfpinstr.cpp
@@ -0,0 +1,5123 @@
+/*
+    vfp/vfpinstr.c - ARM VFPv3 emulation unit - Individual instructions data
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/* Notice: this file should not be compiled as is, and is meant to be
+   included in other files only. */
+
+/* ----------------------------------------------------------------------- */
+/* CDP instructions */
+/* cond 1110 opc1 CRn- CRd- copr op20 CRm- CDP */
+
+/* ----------------------------------------------------------------------- */
+/* VMLA */
+/* cond 1110 0D00 Vn-- Vd-- 101X N0M0 Vm-- */
+#define vfpinstr 	vmla
+#define vfpinstr_inst 	vmla_inst
+#define VFPLABEL_INST 	VMLA_INST
+#ifdef VFP_DECODE
+{"vmla",        4,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x0,    9, 11, 0x5,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmla",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmla_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VMLA :\n");
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0 && (OPC_2 & 0x2) == 0)
+{
+	DBG("VMLA :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int m;
+	int n;
+	int d ;
+	int add = (BIT(6) == 0);
+	int s = BIT(8) == 0;
+	Value *mm;
+	Value *nn;
+	Value *tmp;
+	if(s){
+		m = BIT(5) | BITS(0,3) << 1;
+		n = BIT(7) | BITS(16,19) << 1;
+		d = BIT(22) | BITS(12,15) << 1;
+		mm = FR32(m);
+		nn = FR32(n);
+		tmp = FPMUL(nn,mm);
+		if(!add)
+			tmp = FPNEG32(tmp);
+		mm = FR32(d);
+		tmp = FPADD(mm,tmp);
+		//LETS(d,tmp);
+		LETFPS(d,tmp);
+	}else {
+		m = BITS(0,3) | BIT(5) << 4;
+		n = BITS(16,19) | BIT(7) << 4;
+		d = BIT(22) << 4 | BITS(12,15);
+		//mm = SITOFP(32,RSPR(m));
+		//LETS(d,tmp);
+		mm = ZEXT64(IBITCAST32(FR32(2 * m)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
+		tmp = OR(SHL(nn,CONST64(32)),mm);
+		mm = FPBITCAST64(tmp);
+		tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
+		nn = OR(SHL(nn,CONST64(32)),tmp);
+		nn = FPBITCAST64(nn);
+		tmp = FPMUL(nn,mm);
+		if(!add)
+			tmp = FPNEG64(tmp);
+		mm = ZEXT64(IBITCAST32(FR32(2 * d)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * d  + 1)));
+		mm = OR(SHL(nn,CONST64(32)),mm);
+		mm = FPBITCAST64(mm);
+		tmp = FPADD(mm,tmp);
+		mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));
+		nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff)));	
+		LETFPS(2*d ,FPBITCAST32(nn));
+		LETFPS(d*2 + 1 , FPBITCAST32(mm));
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VNMLS */
+/* cond 1110 0D00 Vn-- Vd-- 101X N1M0 Vm-- */
+#define vfpinstr 	vmls
+#define vfpinstr_inst 	vmls_inst
+#define VFPLABEL_INST 	VMLS_INST
+#ifdef VFP_DECODE
+{"vmls",        7,      ARMVFP2,    28 , 31, 0xF, 25, 27, 0x1,   23, 23, 1,  11, 11, 0,  8, 9, 0x2,  6, 6, 1,  4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmls",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmls_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VMLS :\n");
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0 && (OPC_2 & 0x2) == 2)
+{
+	DBG("VMLS :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s VMLS instruction is executed out of here.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int m;
+	int n;
+	int d ;
+	int add = (BIT(6) == 0);
+	int s = BIT(8) == 0;
+	Value *mm;
+	Value *nn;
+	Value *tmp;
+	if(s){
+		m = BIT(5) | BITS(0,3) << 1;
+		n = BIT(7) | BITS(16,19) << 1;
+		d = BIT(22) | BITS(12,15) << 1;
+		mm = FR32(m);
+		nn = FR32(n);
+		tmp = FPMUL(nn,mm);
+		if(!add)
+			tmp = FPNEG32(tmp);
+		mm = FR32(d);
+		tmp = FPADD(mm,tmp);
+		//LETS(d,tmp);
+		LETFPS(d,tmp);
+	}else {
+		m = BITS(0,3) | BIT(5) << 4;
+		n = BITS(16,19) | BIT(7) << 4;
+		d = BIT(22) << 4 | BITS(12,15);
+		//mm = SITOFP(32,RSPR(m));
+		//LETS(d,tmp);
+		mm = ZEXT64(IBITCAST32(FR32(2 * m)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
+		tmp = OR(SHL(nn,CONST64(32)),mm);
+		mm = FPBITCAST64(tmp);
+		tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
+		nn = OR(SHL(nn,CONST64(32)),tmp);
+		nn = FPBITCAST64(nn);
+		tmp = FPMUL(nn,mm);
+		if(!add)
+			tmp = FPNEG64(tmp);
+		mm = ZEXT64(IBITCAST32(FR32(2 * d)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * d  + 1)));
+		mm = OR(SHL(nn,CONST64(32)),mm);
+		mm = FPBITCAST64(mm);
+		tmp = FPADD(mm,tmp);
+		mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));
+		nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff)));	
+		LETFPS(2*d ,FPBITCAST32(nn));
+		LETFPS(d*2 + 1 , FPBITCAST32(mm));
+	}	
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VNMLA */
+/* cond 1110 0D01 Vn-- Vd-- 101X N1M0 Vm-- */
+#define vfpinstr 	vnmla
+#define vfpinstr_inst 	vnmla_inst
+#define VFPLABEL_INST 	VNMLA_INST
+#ifdef VFP_DECODE
+//{"vnmla",       5,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x0,    9, 11, 0x5,     6, 6, 1,        4, 4, 0},
+{"vnmla",       4,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x1,    9, 11, 0x5,     4, 4, 0},
+{"vnmla",       5,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x2,    9, 11, 0x5,     6, 6, 1,  4, 4, 0},
+//{"vnmla",       5,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x2,    9, 11, 0x5,     6, 6, 1,        4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vnmla",       0,      ARMVFP2, 0},
+{"vnmla",       0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vnmla_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VNMLA :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 1 && (OPC_2 & 0x2) == 2)
+{
+	DBG("VNMLA :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s VNMLA instruction is executed out of here.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int m;
+	int n;
+	int d ;
+	int add = (BIT(6) == 0);
+	int s = BIT(8) == 0;
+	Value *mm;
+	Value *nn;
+	Value *tmp;
+	if(s){
+		m = BIT(5) | BITS(0,3) << 1;
+		n = BIT(7) | BITS(16,19) << 1;
+		d = BIT(22) | BITS(12,15) << 1;
+		mm = FR32(m);
+		nn = FR32(n);
+		tmp = FPMUL(nn,mm);
+		if(!add)
+			tmp = FPNEG32(tmp);
+		mm = FR32(d);
+		tmp = FPADD(FPNEG32(mm),tmp);
+		//LETS(d,tmp);
+		LETFPS(d,tmp);
+	}else {
+		m = BITS(0,3) | BIT(5) << 4;
+		n = BITS(16,19) | BIT(7) << 4;
+		d = BIT(22) << 4 | BITS(12,15);
+		//mm = SITOFP(32,RSPR(m));
+		//LETS(d,tmp);
+		mm = ZEXT64(IBITCAST32(FR32(2 * m)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
+		tmp = OR(SHL(nn,CONST64(32)),mm);
+		mm = FPBITCAST64(tmp);
+		tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
+		nn = OR(SHL(nn,CONST64(32)),tmp);
+		nn = FPBITCAST64(nn);
+		tmp = FPMUL(nn,mm);
+		if(!add)
+			tmp = FPNEG64(tmp);
+		mm = ZEXT64(IBITCAST32(FR32(2 * d)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * d  + 1)));
+		mm = OR(SHL(nn,CONST64(32)),mm);
+		mm = FPBITCAST64(mm);
+		tmp = FPADD(FPNEG64(mm),tmp);
+		mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));	
+		nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff)));
+		LETFPS(2*d ,FPBITCAST32(nn));
+		LETFPS(d*2 + 1 , FPBITCAST32(mm));
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VNMLS */
+/* cond 1110 0D01 Vn-- Vd-- 101X N0M0 Vm-- */
+#define vfpinstr 	vnmls
+#define vfpinstr_inst 	vnmls_inst
+#define VFPLABEL_INST 	VNMLS_INST
+#ifdef VFP_DECODE
+{"vnmls",       5,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x1,    9, 11, 0x5,     6, 6, 0,        4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vnmls",       0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vnmls_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VNMLS :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 1 && (OPC_2 & 0x2) == 0)
+{
+	DBG("VNMLS :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int m;
+	int n;
+	int d ;
+	int add = (BIT(6) == 0);
+	int s = BIT(8) == 0;
+	Value *mm;
+	Value *nn;
+	Value *tmp;
+	if(s){
+		m = BIT(5) | BITS(0,3) << 1;
+		n = BIT(7) | BITS(16,19) << 1;
+		d = BIT(22) | BITS(12,15) << 1;
+		mm = FR32(m);
+		nn = FR32(n);
+		tmp = FPMUL(nn,mm);
+		if(!add)
+			tmp = FPNEG32(tmp);
+		mm = FR32(d);
+		tmp = FPADD(FPNEG32(mm),tmp);
+		//LETS(d,tmp);
+		LETFPS(d,tmp);
+	}else {
+		m = BITS(0,3) | BIT(5) << 4;
+		n = BITS(16,19) | BIT(7) << 4;
+		d = BIT(22) << 4 | BITS(12,15);
+		//mm = SITOFP(32,RSPR(m));
+		//LETS(d,tmp);
+		mm = ZEXT64(IBITCAST32(FR32(2 * m)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
+		tmp = OR(SHL(nn,CONST64(32)),mm);
+		mm = FPBITCAST64(tmp);
+		tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
+		nn = OR(SHL(nn,CONST64(32)),tmp);
+		nn = FPBITCAST64(nn);
+		tmp = FPMUL(nn,mm);
+		if(!add)
+			tmp = FPNEG64(tmp);
+		mm = ZEXT64(IBITCAST32(FR32(2 * d)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * d  + 1)));
+		mm = OR(SHL(nn,CONST64(32)),mm);
+		mm = FPBITCAST64(mm);
+		tmp = FPADD(FPNEG64(mm),tmp);
+		mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));
+		nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff)));	
+		LETFPS(2*d ,FPBITCAST32(nn));
+		LETFPS(d*2 + 1 , FPBITCAST32(mm));
+	}	
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VNMUL */
+/* cond 1110 0D10 Vn-- Vd-- 101X N0M0 Vm-- */
+#define vfpinstr 	vnmul
+#define vfpinstr_inst 	vnmul_inst
+#define VFPLABEL_INST 	VNMUL_INST
+#ifdef VFP_DECODE
+{"vnmul",       5,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x2,    9, 11, 0x5,     6, 6, 1,        4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vnmul",       0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vnmul_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VNMUL :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 2 && (OPC_2 & 0x2) == 2)
+{
+	DBG("VNMUL :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}		
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int m;
+	int n;
+	int d ;
+	int add = (BIT(6) == 0);
+	int s = BIT(8) == 0;
+	Value *mm;
+	Value *nn;
+	Value *tmp;
+	if(s){
+		m = BIT(5) | BITS(0,3) << 1;
+		n = BIT(7) | BITS(16,19) << 1;
+		d = BIT(22) | BITS(12,15) << 1;
+		mm = FR32(m);
+		nn = FR32(n);
+		tmp = FPMUL(nn,mm);
+		//LETS(d,tmp);
+		LETFPS(d,FPNEG32(tmp));
+	}else {
+		m = BITS(0,3) | BIT(5) << 4;
+		n = BITS(16,19) | BIT(7) << 4;
+		d = BIT(22) << 4 | BITS(12,15);
+		//mm = SITOFP(32,RSPR(m));
+		//LETS(d,tmp);
+		mm = ZEXT64(IBITCAST32(FR32(2 * m)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
+		tmp = OR(SHL(nn,CONST64(32)),mm);
+		mm = FPBITCAST64(tmp);
+		tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
+		nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
+		nn = OR(SHL(nn,CONST64(32)),tmp);
+		nn = FPBITCAST64(nn);
+		tmp = FPMUL(nn,mm);
+		tmp = FPNEG64(tmp);
+		mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));
+		nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff)));	
+		LETFPS(2*d ,FPBITCAST32(nn));
+		LETFPS(d*2 + 1 , FPBITCAST32(mm));
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VMUL */
+/* cond 1110 0D10 Vn-- Vd-- 101X N0M0 Vm-- */
+#define vfpinstr 	vmul
+#define vfpinstr_inst 	vmul_inst
+#define VFPLABEL_INST 	VMUL_INST
+#ifdef VFP_DECODE
+{"vmul",        5,      ARMVFP2,        23, 27, 0x1c,  20, 21, 0x2,     9, 11, 0x5,      6, 6, 0,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmul",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmul_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VMUL :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 2 && (OPC_2 & 0x2) == 0)
+{
+	DBG("VMUL :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//printf("\n\n\t\tin %s instruction is executed out.\n\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int m;
+	int n;
+	int d ;
+	int s = BIT(8) == 0;
+	Value *mm;
+	Value *nn;
+	Value *tmp;
+	if(s){
+		m = BIT(5) | BITS(0,3) << 1;
+		n = BIT(7) | BITS(16,19) << 1;
+		d = BIT(22) | BITS(12,15) << 1;
+		//mm = SITOFP(32,FR(m));
+		//nn = SITOFP(32,FRn));
+		mm = FR32(m);
+		nn = FR32(n);
+		tmp = FPMUL(nn,mm);
+		//LETS(d,tmp);
+		LETFPS(d,tmp);
+	}else {
+		m = BITS(0,3) | BIT(5) << 4;
+		n = BITS(16,19) | BIT(7) << 4;
+		d = BIT(22) << 4 | BITS(12,15);
+		//mm = SITOFP(32,RSPR(m));
+		//LETS(d,tmp);
+		Value *lo = FR32(2 * m);
+		Value *hi = FR32(2 * m + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		Value *hi64 = ZEXT64(hi);
+		Value* lo64 = ZEXT64(lo);
+		Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		Value* m0 = FPBITCAST64(v64);
+		lo = FR32(2 * n);
+		hi = FR32(2 * n + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		hi64 = ZEXT64(hi);
+		lo64 = ZEXT64(lo);
+		v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		Value *n0 = FPBITCAST64(v64); 
+		tmp = FPMUL(n0,m0);
+		Value *val64 = IBITCAST64(tmp);
+		hi = LSHR(val64,CONST64(32));
+		lo = AND(val64,CONST64(0xffffffff));
+		hi = TRUNC32(hi);
+		lo  = TRUNC32(lo);
+		hi = FPBITCAST32(hi);
+		lo = FPBITCAST32(lo);		
+		LETFPS(2*d ,lo);
+		LETFPS(d*2 + 1 , hi);
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VADD */
+/* cond 1110 0D11 Vn-- Vd-- 101X N0M0 Vm-- */
+#define vfpinstr 	vadd
+#define vfpinstr_inst 	vadd_inst
+#define VFPLABEL_INST 	VADD_INST
+#ifdef VFP_DECODE
+{"vadd",        5,      ARMVFP2,        23, 27, 0x1c,  20, 21, 0x3,     9, 11, 0x5,      6, 6, 0,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vadd",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vadd_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VADD :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 3 && (OPC_2 & 0x2) == 0)
+{
+	DBG("VADD :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction will implement out of JIT.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int m;
+	int n;
+	int d ;
+	int s = BIT(8) == 0;
+	Value *mm;
+	Value *nn;
+	Value *tmp;
+	if(s){
+		m = BIT(5) | BITS(0,3) << 1;
+		n = BIT(7) | BITS(16,19) << 1;
+		d = BIT(22) | BITS(12,15) << 1;
+		mm = FR32(m);
+		nn = FR32(n);
+		tmp = FPADD(nn,mm);
+		LETFPS(d,tmp);
+	}else {
+		m = BITS(0,3) | BIT(5) << 4;
+		n = BITS(16,19) | BIT(7) << 4;
+		d = BIT(22) << 4 | BITS(12,15);
+		Value *lo = FR32(2 * m);
+		Value *hi = FR32(2 * m + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		Value *hi64 = ZEXT64(hi);
+		Value* lo64 = ZEXT64(lo);
+		Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		Value* m0 = FPBITCAST64(v64);
+		lo = FR32(2 * n);
+		hi = FR32(2 * n + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		hi64 = ZEXT64(hi);
+		lo64 = ZEXT64(lo);
+		v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		Value *n0 = FPBITCAST64(v64); 
+		tmp = FPADD(n0,m0);
+		Value *val64 = IBITCAST64(tmp);
+		hi = LSHR(val64,CONST64(32));
+		lo = AND(val64,CONST64(0xffffffff));
+		hi = TRUNC32(hi);
+		lo  = TRUNC32(lo);
+		hi = FPBITCAST32(hi);
+		lo = FPBITCAST32(lo);		
+		LETFPS(2*d ,lo);
+		LETFPS(d*2 + 1 , hi);
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VSUB */
+/* cond 1110 0D11 Vn-- Vd-- 101X N1M0 Vm-- */
+#define vfpinstr 	vsub
+#define vfpinstr_inst 	vsub_inst
+#define VFPLABEL_INST 	VSUB_INST
+#ifdef VFP_DECODE
+{"vsub",        5,      ARMVFP2,        23, 27, 0x1c,  20, 21, 0x3,     9, 11, 0x5,      6, 6, 1,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vsub",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vsub_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VSUB :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 3 && (OPC_2 & 0x2) == 2)
+{
+	DBG("VSUB :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instr=0x%x, instruction is executed out of JIT.\n", __FUNCTION__, instr);
+	//arch_arm_undef(cpu, bb, instr);
+	int m;
+	int n;
+	int d ;
+	int s = BIT(8) == 0;
+	Value *mm;
+	Value *nn;
+	Value *tmp;
+	if(s){
+		m = BIT(5) | BITS(0,3) << 1;
+		n = BIT(7) | BITS(16,19) << 1;
+		d = BIT(22) | BITS(12,15) << 1;
+		mm = FR32(m);
+		nn = FR32(n);
+		tmp = FPSUB(nn,mm);
+		LETFPS(d,tmp);
+	}else {
+		m = BITS(0,3) | BIT(5) << 4;
+		n = BITS(16,19) | BIT(7) << 4;
+		d = BIT(22) << 4 | BITS(12,15);
+		Value *lo = FR32(2 * m);
+		Value *hi = FR32(2 * m + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		Value *hi64 = ZEXT64(hi);
+		Value* lo64 = ZEXT64(lo);
+		Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		Value* m0 = FPBITCAST64(v64);
+		lo = FR32(2 * n);
+		hi = FR32(2 * n + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		hi64 = ZEXT64(hi);
+		lo64 = ZEXT64(lo);
+		v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		Value *n0 = FPBITCAST64(v64); 
+		tmp = FPSUB(n0,m0);
+		Value *val64 = IBITCAST64(tmp);
+		hi = LSHR(val64,CONST64(32));
+		lo = AND(val64,CONST64(0xffffffff));
+		hi = TRUNC32(hi);
+		lo  = TRUNC32(lo);
+		hi = FPBITCAST32(hi);
+		lo = FPBITCAST32(lo);		
+		LETFPS(2*d ,lo);
+		LETFPS(d*2 + 1 , hi);
+	} 
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VDIV */
+/* cond 1110 1D00 Vn-- Vd-- 101X N0M0 Vm-- */
+#define vfpinstr 	vdiv
+#define vfpinstr_inst 	vdiv_inst
+#define VFPLABEL_INST 	VDIV_INST
+#ifdef VFP_DECODE
+{"vdiv",        5,      ARMVFP2,        23, 27, 0x1d,  20, 21, 0x0,     9, 11, 0x5,      6, 6, 0,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vdiv",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vdiv_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VDIV :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xA && (OPC_2 & 0x2) == 0)
+{
+	DBG("VDIV :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int m;
+	int n;
+	int d ;
+	int s = BIT(8) == 0;
+	Value *mm;
+	Value *nn;
+	Value *tmp;
+	if(s){
+		m = BIT(5) | BITS(0,3) << 1;
+		n = BIT(7) | BITS(16,19) << 1;
+		d = BIT(22) | BITS(12,15) << 1;
+		mm = FR32(m);
+		nn = FR32(n);
+		tmp = FPDIV(nn,mm);
+		LETFPS(d,tmp);
+	}else {
+		m = BITS(0,3) | BIT(5) << 4;
+		n = BITS(16,19) | BIT(7) << 4;
+		d = BIT(22) << 4 | BITS(12,15);
+		Value *lo = FR32(2 * m);
+		Value *hi = FR32(2 * m + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		Value *hi64 = ZEXT64(hi);
+		Value* lo64 = ZEXT64(lo);
+		Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		Value* m0 = FPBITCAST64(v64);
+		lo = FR32(2 * n);
+		hi = FR32(2 * n + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		hi64 = ZEXT64(hi);
+		lo64 = ZEXT64(lo);
+		v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		Value *n0 = FPBITCAST64(v64); 
+		tmp = FPDIV(n0,m0);
+		Value *val64 = IBITCAST64(tmp);
+		hi = LSHR(val64,CONST64(32));
+		lo = AND(val64,CONST64(0xffffffff));
+		hi = TRUNC32(hi);
+		lo  = TRUNC32(lo);
+		hi = FPBITCAST32(hi);
+		lo = FPBITCAST32(lo);		
+		LETFPS(2*d ,lo);
+		LETFPS(d*2 + 1 , hi);
+	} 		
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VMOVI move immediate */
+/* cond 1110 1D11 im4H Vd-- 101X 0000 im4L */
+/* cond 1110 opc1 CRn- CRd- copr op20 CRm- CDP */
+#define vfpinstr 	vmovi
+#define vfpinstr_inst 	vmovi_inst
+#define VFPLABEL_INST 	VMOVI_INST
+#ifdef VFP_DECODE
+{"vmov(i)",       4,      ARMVFP3,        23, 27, 0x1d,   20, 21, 0x3,    9, 11, 0x5,     4, 7, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmov(i)",       0,      ARMVFP3, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovi_inst {
+	unsigned int single;
+	unsigned int d;
+	unsigned int imm;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+	
+	inst_cream->single   = BIT(inst, 8) == 0;
+	inst_cream->d        = (inst_cream->single ? BITS(inst,12,15)<<1 | BIT(inst,22) : BITS(inst,12,15) | BIT(inst,22)<<4);
+	unsigned int imm8 = BITS(inst, 16, 19) << 4 | BITS(inst, 0, 3);
+	if (inst_cream->single)
+		inst_cream->imm = BIT(imm8, 7)<<31 | (BIT(imm8, 6)==0)<<30 | (BIT(imm8, 6) ? 0x1f : 0)<<25 | BITS(imm8, 0, 5)<<19;
+	else
+		inst_cream->imm = BIT(imm8, 7)<<31 | (BIT(imm8, 6)==0)<<30 | (BIT(imm8, 6) ? 0xff : 0)<<22 | BITS(imm8, 0, 5)<<16;
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		VMOVI(cpu, inst_cream->single, inst_cream->d, inst_cream->imm);
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ( (OPC_1 & 0xb) == 0xb && BITS(4, 7) == 0)
+{
+	unsigned int single   = BIT(8) == 0;
+	unsigned int d        = (single ? BITS(12,15)<<1 | BIT(22) : BITS(12,15) | BIT(22)<<4);
+	unsigned int imm;
+	instr = BITS(16, 19) << 4 | BITS(0, 3); /* FIXME dirty workaround to get a correct imm */
+	if (single) {
+		imm = BIT(7)<<31 | (BIT(6)==0)<<30 | (BIT(6) ? 0x1f : 0)<<25 | BITS(0, 5)<<19;
+	} else {
+		imm = BIT(7)<<31 | (BIT(6)==0)<<30 | (BIT(6) ? 0xff : 0)<<22 | BITS(0, 5)<<16;
+	}
+	VMOVI(state, single, d, imm);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_CDP_IMPL
+void VMOVI(ARMul_State * state, ARMword single, ARMword d, ARMword imm)
+{
+	DBG("VMOV(I) :\n");
+		
+	if (single)
+	{
+		DBG("\ts%d <= [%x]\n", d, imm);
+		state->ExtReg[d] = imm;
+	}
+	else
+	{
+		/* Check endian please */
+		DBG("\ts[%d-%d] <= [%x-%x]\n", d*2+1, d*2, imm, 0);
+		state->ExtReg[d*2+1] = imm;
+		state->ExtReg[d*2] = 0;
+	}
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int single = (BIT(8) == 0);
+	int d;
+	int imm32;
+	Value *v;
+	Value *tmp;
+	v = CONST32(BITS(0,3) | BITS(16,19) << 4);
+	//v = CONST64(0x3ff0000000000000);
+	if(single){
+		d = BIT(22) | BITS(12,15) << 1;
+	}else {
+		d = BITS(12,15) | BIT(22) << 4;
+	}
+	if(single){
+		LETFPS(d,FPBITCAST32(v));
+	}else {
+		//v = UITOFP(64,v);
+		//tmp = IBITCAST64(v);
+		LETFPS(d*2 ,FPBITCAST32(TRUNC32(AND(v,CONST64(0xffffffff)))));
+		LETFPS(d * 2 + 1,FPBITCAST32(TRUNC32(LSHR(v,CONST64(32)))));
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VMOVR move register */
+/* cond 1110 1D11 0000 Vd-- 101X 01M0 Vm-- */
+/* cond 1110 opc1 CRn- CRd- copr op20 CRm- CDP */
+#define vfpinstr 	vmovr
+#define vfpinstr_inst 	vmovr_inst
+#define VFPLABEL_INST 	VMOVR_INST
+#ifdef VFP_DECODE
+{"vmov(r)",       5,      ARMVFP3,        23, 27, 0x1d,   16, 21, 0x30,    9, 11, 0x5,    6, 7, 1,        4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmov(r)",       0,      ARMVFP3, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovr_inst {
+	unsigned int single;
+	unsigned int d;
+	unsigned int m;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	VFP_DEBUG_UNTESTED(VMOVR);
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+	
+	inst_cream->single   = BIT(inst, 8) == 0;
+	inst_cream->d        = (inst_cream->single ? BITS(inst,12,15)<<1 | BIT(inst,22) : BITS(inst,12,15) | BIT(inst,22)<<4);
+	inst_cream->m        = (inst_cream->single ? BITS(inst, 0, 3)<<1 | BIT(inst, 5) : BITS(inst, 0, 3) | BIT(inst, 5)<<4);
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		VMOVR(cpu, inst_cream->single, inst_cream->d, inst_cream->m);
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ( (OPC_1 & 0xb) == 0xb && CRn == 0 && (OPC_2 & 0x6) == 0x2 )
+{
+	unsigned int single   = BIT(8) == 0;
+	unsigned int d        = (single ? BITS(12,15)<<1 | BIT(22) : BITS(12,15) | BIT(22)<<4);
+	unsigned int m        = (single ? BITS( 0, 3)<<1 | BIT( 5) : BITS( 0, 3) | BIT( 5)<<4);;
+	VMOVR(state, single, d, m);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_CDP_IMPL
+void VMOVR(ARMul_State * state, ARMword single, ARMword d, ARMword m)
+{
+	DBG("VMOV(R) :\n");
+		
+	if (single)
+	{
+		DBG("\ts%d <= s%d[%x]\n", d, m, state->ExtReg[m]);
+		state->ExtReg[d] = state->ExtReg[m];
+	}
+	else
+	{
+		/* Check endian please */
+		DBG("\ts[%d-%d] <= s[%d-%d][%x-%x]\n", d*2+1, d*2, m*2+1, m*2, state->ExtReg[m*2+1], state->ExtReg[m*2]);
+		state->ExtReg[d*2+1] = state->ExtReg[m*2+1];
+		state->ExtReg[d*2] = state->ExtReg[m*2];
+	}
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+	if(instr >> 28 != 0xe)
+		*tag |= TAG_CONDITIONAL;
+
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s VMOV \n", __FUNCTION__);
+	int single   = BIT(8) == 0;
+	int d        = (single ? BITS(12,15)<<1 | BIT(22) : BIT(22) << 4 | BITS(12,15));
+	int m        = (single ? BITS(0, 3)<<1 | BIT(5) : BITS(0, 3) | BIT(5)<<4);
+
+	if (single)
+	{
+		LETFPS(d, FR32(m));
+	}
+	else
+	{
+		/* Check endian please */
+		LETFPS((d*2 + 1), FR32(m*2 + 1));
+		LETFPS((d * 2), FR32(m * 2));
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VABS */
+/* cond 1110 1D11 0000 Vd-- 101X 11M0 Vm-- */
+#define vfpinstr 	vabs
+#define vfpinstr_inst 	vabs_inst
+#define VFPLABEL_INST 	VABS_INST
+#ifdef VFP_DECODE
+{"vabs",        5,      ARMVFP2,        23, 27, 0x1d,  16, 21, 0x30,    9, 11, 0x5,      6, 7, 3,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vabs",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vabs_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;VFP_DEBUG_UNTESTED(VABS);
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VABS :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn == 0 && (OPC_2 & 0x7) == 6)
+{
+	DBG("VABS :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int single   = BIT(8) == 0;
+	int d        = (single ? BITS(12,15)<<1 | BIT(22) : BIT(22) << 4 | BITS(12,15));
+	int m        = (single ? BITS(0, 3)<<1 | BIT(5) : BITS(0, 3) | BIT(5)<<4);
+	Value* m0;
+	if (single)
+	{
+		m0 =  FR32(m);
+		m0 = SELECT(FPCMP_OLT(m0,FPCONST32(0.0)),FPNEG32(m0),m0);
+		LETFPS(d,m0);
+	}
+	else
+	{
+		/* Check endian please */
+		Value *lo = FR32(2 * m);
+		Value *hi = FR32(2 * m + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		Value *hi64 = ZEXT64(hi);
+		Value* lo64 = ZEXT64(lo);
+		Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		m0 = FPBITCAST64(v64);
+		m0 = SELECT(FPCMP_OLT(m0,FPCONST64(0.0)),FPNEG64(m0),m0);
+		Value *val64 = IBITCAST64(m0);
+		hi = LSHR(val64,CONST64(32));
+		lo = AND(val64,CONST64(0xffffffff));
+		hi = TRUNC32(hi);
+		lo  = TRUNC32(lo);
+		hi = FPBITCAST32(hi);
+		lo = FPBITCAST32(lo);		
+		LETFPS(2*d ,lo);
+		LETFPS(d*2 + 1 , hi);
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VNEG */
+/* cond 1110 1D11 0001 Vd-- 101X 11M0 Vm-- */
+#define vfpinstr 	vneg
+#define vfpinstr_inst 	vneg_inst
+#define VFPLABEL_INST 	VNEG_INST
+#ifdef VFP_DECODE
+//{"vneg",        5,      ARMVFP2,        23, 27, 0x1d,  16, 21, 0x30,    9, 11, 0x5,      6, 7, 1,     4, 4, 0},
+{"vneg",        5,      ARMVFP2,        23, 27, 0x1d,  17, 21, 0x18,    9, 11, 0x5,      6, 7, 1,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vneg",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vneg_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;VFP_DEBUG_UNTESTED(VNEG);
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+
+		DBG("VNEG :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn == 1 && (OPC_2 & 0x7) == 2)
+{
+	DBG("VNEG :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int single   = BIT(8) == 0;
+	int d        = (single ? BITS(12,15)<<1 | BIT(22) : BIT(22) << 4 | BITS(12,15));
+	int m        = (single ? BITS(0, 3)<<1 | BIT(5) : BITS(0, 3) | BIT(5)<<4);
+	Value* m0;
+	if (single)
+	{
+		m0 =  FR32(m);
+		m0 = FPNEG32(m0);
+		LETFPS(d,m0);
+	}
+	else
+	{
+		/* Check endian please */
+		Value *lo = FR32(2 * m);
+		Value *hi = FR32(2 * m + 1);
+		hi = IBITCAST32(hi);
+		lo = IBITCAST32(lo);
+		Value *hi64 = ZEXT64(hi);
+		Value* lo64 = ZEXT64(lo);
+		Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+		m0 = FPBITCAST64(v64);
+		m0 = FPNEG64(m0);
+		Value *val64 = IBITCAST64(m0);
+		hi = LSHR(val64,CONST64(32));
+		lo = AND(val64,CONST64(0xffffffff));
+		hi = TRUNC32(hi);
+		lo  = TRUNC32(lo);
+		hi = FPBITCAST32(hi);
+		lo = FPBITCAST32(lo);		
+		LETFPS(2*d ,lo);
+		LETFPS(d*2 + 1 , hi);
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VSQRT */
+/* cond 1110 1D11 0001 Vd-- 101X 11M0 Vm-- */
+#define vfpinstr 	vsqrt
+#define vfpinstr_inst 	vsqrt_inst
+#define VFPLABEL_INST 	VSQRT_INST
+#ifdef VFP_DECODE
+{"vsqrt",        5,      ARMVFP2,        23, 27, 0x1d,  16, 21, 0x31,    9, 11, 0x5,      6, 7, 3,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vsqrt",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vsqrt_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VSQRT :\n");
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn == 1 && (OPC_2 & 0x7) == 6)
+{
+	DBG("VSQRT :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int dp_op = (BIT(8) == 1);
+	int d = dp_op ? BITS(12,15) | BIT(22) << 4 : BIT(22) | BITS(12,15) << 1;
+	int m = dp_op ? BITS(0,3) | BIT(5) << 4 : BIT(5) | BITS(0,3) << 1;
+	Value* v;
+	Value* tmp;
+	if(dp_op){
+		v = SHL(ZEXT64(IBITCAST32(FR32(2 * m + 1))),CONST64(32));
+		tmp = ZEXT64(IBITCAST32(FR32(2 * m)));
+		v = OR(v,tmp);
+		v = FPSQRT(FPBITCAST64(v));
+		tmp = TRUNC32(LSHR(IBITCAST64(v),CONST64(32)));
+		v = TRUNC32(AND(IBITCAST64(v),CONST64( 0xffffffff)));		
+		LETFPS(2 * d , FPBITCAST32(v));
+		LETFPS(2 * d + 1, FPBITCAST32(tmp));
+	}else {
+		v = FR32(m);
+		v = FPSQRT(FPEXT(64,v));
+		v = FPTRUNC(32,v);
+		LETFPS(d,v);
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VCMP VCMPE */
+/* cond 1110 1D11 0100 Vd-- 101X E1M0 Vm-- Encoding 1 */
+#define vfpinstr 	vcmp
+#define vfpinstr_inst 	vcmp_inst
+#define VFPLABEL_INST 	VCMP_INST
+#ifdef VFP_DECODE
+{"vcmp",        5,      ARMVFP2,        23, 27, 0x1d,  16, 21, 0x34,    9, 11, 0x5,      6, 6, 1,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vcmp",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vcmp_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+
+		DBG("VCMP(1) :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn == 4 && (OPC_2 & 0x2) == 2)
+{
+	DBG("VCMP(1) :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is executed out of JIT.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int dp_op = (BIT(8) == 1);
+	int d = dp_op ? BITS(12,15) | BIT(22) << 4 : BIT(22) | BITS(12,15) << 1;
+	int m = dp_op ? BITS(0,3) | BIT(5) << 4 : BIT(5) | BITS(0,3) << 1;
+	Value* v;
+	Value* tmp;
+	Value* n;
+	Value* z;
+	Value* c;
+	Value* vt;
+	Value* v1;
+	Value* nzcv;
+	if(dp_op){
+		v = SHL(ZEXT64(IBITCAST32(FR32(2 * m + 1))),CONST64(32));
+		tmp = ZEXT64(IBITCAST32(FR32(2 * m)));
+		v1 = OR(v,tmp);
+		v = SHL(ZEXT64(IBITCAST32(FR32(2 * d + 1))),CONST64(32));
+		tmp = ZEXT64(IBITCAST32(FR32(2 * d)));
+		v = OR(v,tmp);
+		z = FPCMP_OEQ(FPBITCAST64(v),FPBITCAST64(v1));
+		n = FPCMP_OLT(FPBITCAST64(v),FPBITCAST64(v1));
+		c = FPCMP_OGE(FPBITCAST64(v),FPBITCAST64(v1)); 
+		tmp =  FPCMP_UNO(FPBITCAST64(v),FPBITCAST64(v1));
+		v1 = tmp;
+		c = OR(c,tmp);
+		n = SHL(ZEXT32(n),CONST32(31));
+		z = SHL(ZEXT32(z),CONST32(30));
+		c = SHL(ZEXT32(c),CONST32(29));
+		v1 = SHL(ZEXT32(v1),CONST(28));
+		nzcv = OR(OR(OR(n,z),c),v1);	
+		v = R(VFP_FPSCR);
+		tmp = OR(nzcv,AND(v,CONST32(0x0fffffff)));
+		LET(VFP_FPSCR,tmp);
+	}else {
+		z = FPCMP_OEQ(FR32(d),FR32(m));
+		n = FPCMP_OLT(FR32(d),FR32(m));
+		c = FPCMP_OGE(FR32(d),FR32(m)); 
+		tmp = FPCMP_UNO(FR32(d),FR32(m));
+		c = OR(c,tmp);
+		v1 = tmp;
+		n = SHL(ZEXT32(n),CONST32(31));
+		z = SHL(ZEXT32(z),CONST32(30));
+		c = SHL(ZEXT32(c),CONST32(29));
+		v1 = SHL(ZEXT32(v1),CONST(28));
+		nzcv = OR(OR(OR(n,z),c),v1);	
+		v = R(VFP_FPSCR);
+		tmp = OR(nzcv,AND(v,CONST32(0x0fffffff)));
+		LET(VFP_FPSCR,tmp);
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VCMP VCMPE */
+/* cond 1110 1D11 0100 Vd-- 101X E1M0 Vm-- Encoding 2 */
+#define vfpinstr 	vcmp2
+#define vfpinstr_inst 	vcmp2_inst
+#define VFPLABEL_INST 	VCMP2_INST
+#ifdef VFP_DECODE
+{"vcmp2",        5,      ARMVFP2,        23, 27, 0x1d,  16, 21, 0x35,    9, 11, 0x5,     0, 6, 0x40},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vcmp2",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vcmp2_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VCMP(2) :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn == 5 && (OPC_2 & 0x2) == 2 && CRm == 0)
+{
+	DBG("VCMP(2) :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction will executed out of JIT.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int dp_op = (BIT(8) == 1);
+	int d = dp_op ? BITS(12,15) | BIT(22) << 4 : BIT(22) | BITS(12,15) << 1;
+	//int m = dp_op ? BITS(0,3) | BIT(5) << 4 : BIT(5) | BITS(0,3) << 1;
+	Value* v;
+	Value* tmp;
+	Value* n;
+	Value* z;
+	Value* c;
+	Value* vt;
+	Value* v1;
+	Value* nzcv;
+	if(dp_op){
+		v1 = CONST64(0);
+		v = SHL(ZEXT64(IBITCAST32(FR32(2 * d + 1))),CONST64(32));
+		tmp = ZEXT64(IBITCAST32(FR32(2 * d)));
+		v = OR(v,tmp);
+		z = FPCMP_OEQ(FPBITCAST64(v),FPBITCAST64(v1));
+		n = FPCMP_OLT(FPBITCAST64(v),FPBITCAST64(v1));
+		c = FPCMP_OGE(FPBITCAST64(v),FPBITCAST64(v1)); 
+		tmp =  FPCMP_UNO(FPBITCAST64(v),FPBITCAST64(v1));
+		v1 = tmp;
+		c = OR(c,tmp);
+		n = SHL(ZEXT32(n),CONST32(31));
+		z = SHL(ZEXT32(z),CONST32(30));
+		c = SHL(ZEXT32(c),CONST32(29));
+		v1 = SHL(ZEXT32(v1),CONST(28));
+		nzcv = OR(OR(OR(n,z),c),v1);	
+		v = R(VFP_FPSCR);
+		tmp = OR(nzcv,AND(v,CONST32(0x0fffffff)));
+		LET(VFP_FPSCR,tmp);
+	}else {
+		v1 = CONST(0);
+		v1 = FPBITCAST32(v1);
+		z = FPCMP_OEQ(FR32(d),v1);
+		n = FPCMP_OLT(FR32(d),v1);
+		c = FPCMP_OGE(FR32(d),v1); 
+		tmp = FPCMP_UNO(FR32(d),v1);
+		c = OR(c,tmp);
+		v1 = tmp;
+		n = SHL(ZEXT32(n),CONST32(31));
+		z = SHL(ZEXT32(z),CONST32(30));
+		c = SHL(ZEXT32(c),CONST32(29));
+		v1 = SHL(ZEXT32(v1),CONST(28));
+		nzcv = OR(OR(OR(n,z),c),v1);	
+		v = R(VFP_FPSCR);
+		tmp = OR(nzcv,AND(v,CONST32(0x0fffffff)));
+		LET(VFP_FPSCR,tmp);
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VCVTBDS between double and single */
+/* cond 1110 1D11 0111 Vd-- 101X 11M0 Vm-- */
+#define vfpinstr 	vcvtbds
+#define vfpinstr_inst 	vcvtbds_inst
+#define VFPLABEL_INST 	VCVTBDS_INST
+#ifdef VFP_DECODE
+{"vcvt(bds)",   5,      ARMVFP2,        23, 27, 0x1d,  16, 21, 0x37,    9, 11, 0x5,      6, 7, 3,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vcvt(bds)",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vcvtbds_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VCVT(BDS) :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn == 7 && (OPC_2 & 0x6) == 6)
+{
+	DBG("VCVT(BDS) :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is executed out.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int dp_op = (BIT(8) == 1);
+	int d = dp_op ? BITS(12,15) << 1 | BIT(22) : BIT(22) << 4 | BITS(12,15);
+	int m = dp_op ? BITS(0,3) | BIT(5) << 4 : BIT(5) | BITS(0,3) << 1;
+	int d2s = dp_op;
+	Value* v;
+	Value* tmp;
+	Value* v1;
+	if(d2s){
+		v = SHL(ZEXT64(IBITCAST32(FR32(2 * m + 1))),CONST64(32));
+		tmp = ZEXT64(IBITCAST32(FR32(2 * m)));
+		v1 = OR(v,tmp);
+		tmp = FPTRUNC(32,FPBITCAST64(v1));
+		LETFPS(d,tmp);	
+	}else {
+		v = FR32(m);
+		tmp = FPEXT(64,v);
+		v = IBITCAST64(tmp);
+		tmp = TRUNC32(AND(v,CONST64(0xffffffff)));
+		v1 = TRUNC32(LSHR(v,CONST64(32)));
+		LETFPS(2 * d, FPBITCAST32(tmp) );
+		LETFPS(2 * d + 1, FPBITCAST32(v1));
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VCVTBFF between floating point and fixed point */
+/* cond 1110 1D11 1op2 Vd-- 101X X1M0 Vm-- */
+#define vfpinstr 	vcvtbff
+#define vfpinstr_inst 	vcvtbff_inst
+#define VFPLABEL_INST 	VCVTBFF_INST
+#ifdef VFP_DECODE
+{"vcvt(bff)",   6,      ARMVFP3,        23, 27, 0x1d,  19, 21, 0x7,     17, 17, 0x1,      9, 11, 0x5,  	6, 6, 1},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vcvt(bff)",   0,      ARMVFP3,         4, 4, 1},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vcvtbff_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;VFP_DEBUG_UNTESTED(VCVTBFF);
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VCVT(BFF) :\n");
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn >= 0xA && (OPC_2 & 0x2) == 2)
+{
+	DBG("VCVT(BFF) :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arch_arm_undef(cpu, bb, instr);
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VCVTBFI between floating point and integer */
+/* cond 1110 1D11 1op2 Vd-- 101X X1M0 Vm-- */
+#define vfpinstr 	vcvtbfi
+#define vfpinstr_inst 	vcvtbfi_inst
+#define VFPLABEL_INST 	VCVTBFI_INST
+#ifdef VFP_DECODE
+{"vcvt(bfi)",   5,      ARMVFP2,        23, 27, 0x1d,  19, 21, 0x7,     9, 11, 0x5,      6, 6, 1,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vcvt(bfi)",   0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vcvtbfi_inst {
+	unsigned int instr;
+	unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->dp_operation = BIT(inst, 8);
+	inst_cream->instr = inst;
+
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		DBG("VCVT(BFI) :\n");
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		int ret;
+		
+		if (inst_cream->dp_operation)
+			ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		else
+			ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+		CHECK_VFP_CDP_RET;
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn > 7 && (OPC_2 & 0x2) == 2)
+{
+	DBG("VCVT(BFI) :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	DBG("\t\tin %s, instruction will be executed out of JIT.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s, instruction will be executed out of JIT.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	unsigned int opc2 = BITS(16,18);
+	int to_integer = ((opc2 >> 2) == 1);	
+	int dp_op =  (BIT(8) == 1);
+	unsigned int op = BIT(7);
+	int m,d;
+	Value* v;
+	Value* hi;
+	Value* lo;
+	Value* v64; 
+	if(to_integer){
+		d = BIT(22) | (BITS(12,15) << 1);
+		if(dp_op)
+			m = BITS(0,3) | BIT(5) << 4;
+		else
+			m = BIT(5) | BITS(0,3) << 1;
+	}else {
+		m = BIT(5) | BITS(0,3) << 1;
+		if(dp_op)
+			d = BITS(12,15) | BIT(22) << 4;
+ 		else
+			d  = BIT(22) | BITS(12,15) << 1;		
+	}
+	if(to_integer){
+		if(dp_op){
+			lo = FR32(m * 2);
+		        hi = FR32(m * 2 + 1);	
+			hi = ZEXT64(IBITCAST32(hi));
+			lo = ZEXT64(IBITCAST32(lo));
+			v64 = OR(SHL(hi,CONST64(32)),lo);	
+			if(BIT(16)){
+				v = FPTOSI(32,FPBITCAST64(v64));
+			}
+			else
+				v = FPTOUI(32,FPBITCAST64(v64));
+				
+				v = FPBITCAST32(v);
+				LETFPS(d,v);
+		}else {
+			v = FR32(m);
+			if(BIT(16)){
+				
+				v = FPTOSI(32,v);
+			}
+			else
+				v = FPTOUI(32,v);
+				LETFPS(d,FPBITCAST32(v));
+		}
+	}else {
+		if(dp_op){	
+			v = IBITCAST32(FR32(m));
+			if(BIT(7))
+				v64 = SITOFP(64,v); 
+			else
+				v64 = UITOFP(64,v);
+			v = IBITCAST64(v64);
+			hi = FPBITCAST32(TRUNC32(LSHR(v,CONST64(32))));
+			lo = FPBITCAST32(TRUNC32(AND(v,CONST64(0xffffffff))));
+			LETFPS(2 * d , lo);
+			LETFPS(2 * d + 1, hi);
+		}else {
+			v = IBITCAST32(FR32(m));
+			if(BIT(7))
+				v = SITOFP(32,v);
+			else
+				v = UITOFP(32,v);
+				LETFPS(d,v);
+		}
+	}
+	return No_exp;
+}
+
+/**
+* @brief The implementation of c language for vcvtbfi instruction of dyncom
+*
+* @param cpu
+* @param instr
+*
+* @return 
+*/
+int vcvtbfi_instr_impl(arm_core_t* cpu, uint32 instr){
+	int dp_operation = BIT(8);
+	int ret;
+	if (dp_operation)
+		ret = vfp_double_cpdo(cpu, instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+	else
+		ret = vfp_single_cpdo(cpu, instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+
+	vfp_raise_exceptions(cpu, ret, instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+	return 0;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* MRC / MCR instructions */
+/* cond 1110 AAAL XXXX XXXX 101C XBB1 XXXX */
+/* cond 1110 op11 CRn- Rt-- copr op21 CRm- */
+
+/* ----------------------------------------------------------------------- */
+/* VMOVBRS between register and single precision */
+/* cond 1110 000o Vn-- Rt-- 1010 N001 0000 */
+/* cond 1110 op11 CRn- Rt-- copr op21 CRm- MRC */
+#define vfpinstr 	vmovbrs
+#define vfpinstr_inst 	vmovbrs_inst
+#define VFPLABEL_INST 	VMOVBRS_INST
+#ifdef VFP_DECODE
+{"vmovbrs",    3,    ARMVFP2,    21, 27, 0x70,    8, 11, 0xA,    0, 6, 0x10},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmovbrs",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovbrs_inst {
+	unsigned int to_arm;
+	unsigned int t;
+	unsigned int n;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->to_arm   = BIT(inst, 20) == 1;
+	inst_cream->t        = BITS(inst, 12, 15);
+	inst_cream->n        = BIT(inst, 7) | BITS(inst, 16, 19)<<1;
+
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+           
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		VMOVBRS(cpu, inst_cream->to_arm, inst_cream->t, inst_cream->n, &(cpu->Reg[inst_cream->t]));
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MRC_TRANS
+if (OPC_1 == 0x0 && CRm == 0 && (OPC_2 & 0x3) == 0)
+{
+	/* VMOV r to s */
+	/* Transfering Rt is not mandatory, as the value of interest is pointed by value */
+	VMOVBRS(state, BIT(20), Rt, BIT(7)|CRn<<1, value);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MCR_TRANS
+if (OPC_1 == 0x0 && CRm == 0 && (OPC_2 & 0x3) == 0)
+{
+	/* VMOV s to r */
+	/* Transfering Rt is not mandatory, as the value of interest is pointed by value */
+	VMOVBRS(state, BIT(20), Rt, BIT(7)|CRn<<1, &value);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MRC_IMPL
+void VMOVBRS(ARMul_State * state, ARMword to_arm, ARMword t, ARMword n, ARMword *value)
+{
+	DBG("VMOV(BRS) :\n");
+	if (to_arm)
+	{
+		DBG("\tr%d <= s%d=[%x]\n", t, n, state->ExtReg[n]);
+		*value = state->ExtReg[n];
+	}
+	else
+	{
+		DBG("\ts%d <= r%d=[%x]\n", n, t, *value);
+		state->ExtReg[n] = *value;
+	}
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("VMOV(BRS) :\n");
+	int to_arm   = BIT(20) == 1;
+	int t        = BITS(12, 15);
+	int n        = BIT(7) | BITS(16, 19)<<1;
+
+	if (to_arm)
+	{
+		DBG("\tr%d <= s%d\n", t, n);
+		LET(t, IBITCAST32(FR32(n)));
+	}
+	else
+	{
+		DBG("\ts%d <= r%d\n", n, t);
+		LETFPS(n, FPBITCAST32(R(t)));
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VMSR */
+/* cond 1110 1110 reg- Rt-- 1010 0001 0000 */
+/* cond 1110 op10 CRn- Rt-- copr op21 CRm- MCR */
+#define vfpinstr 	vmsr
+#define vfpinstr_inst 	vmsr_inst
+#define VFPLABEL_INST 	VMSR_INST
+#ifdef VFP_DECODE
+{"vmsr",    2,    ARMVFP2,    20, 27, 0xEE,    0, 11, 0xA10},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmsr",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmsr_inst {
+	unsigned int reg;
+	unsigned int Rd;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->reg  = BITS(inst, 16, 19);
+	inst_cream->Rd   = BITS(inst, 12, 15);
+   
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		/* FIXME: special case for access to FPSID and FPEXC, VFP must be disabled ,
+		   and in privilegied mode */
+		/* Exceptions must be checked, according to v7 ref manual */
+		CHECK_VFP_ENABLED;
+           
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		VMSR(cpu, inst_cream->reg, inst_cream->Rd);
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MCR_TRANS
+if (OPC_1 == 0x7 && CRm == 0 && OPC_2 == 0)
+{
+	VMSR(state, CRn, Rt);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MCR_IMPL
+void VMSR(ARMul_State * state, ARMword reg, ARMword Rt)
+{
+	if (reg == 1)
+	{
+		DBG("VMSR :\tfpscr <= r%d=[%x]\n", Rt, state->Reg[Rt]);
+		state->VFP[VFP_OFFSET(VFP_FPSCR)] = state->Reg[Rt];
+	}
+	else if (reg == 8)
+	{
+		DBG("VMSR :\tfpexc <= r%d=[%x]\n", Rt, state->Reg[Rt]);
+		state->VFP[VFP_OFFSET(VFP_FPEXC)] = state->Reg[Rt];
+	}
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	DBG("VMSR :");
+	if(RD == 15) {
+		printf("in %s is not implementation.\n", __FUNCTION__);
+		exit(-1);
+	}
+	
+	Value *data = NULL;
+	int reg = RN;
+	int Rt   = RD;
+	if (reg == 1)
+	{
+		LET(VFP_FPSCR, R(Rt));
+		DBG("\tflags <= fpscr\n");
+	}
+	else
+	{
+		switch (reg)
+		{
+		case 8:
+			LET(VFP_FPEXC, R(Rt));
+			DBG("\tfpexc <= r%d \n", Rt);
+			break;
+		default:
+			DBG("\tSUBARCHITECTURE DEFINED\n");
+			break;
+		}
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VMOVBRC register to scalar */
+/* cond 1110 0XX0 Vd-- Rt-- 1011 DXX1 0000 */
+/* cond 1110 op10 CRn- Rt-- copr op21 CRm- MCR */
+#define vfpinstr 	vmovbrc
+#define vfpinstr_inst 	vmovbrc_inst
+#define VFPLABEL_INST 	VMOVBRC_INST
+#ifdef VFP_DECODE
+{"vmovbrc",    4,    ARMVFP2,    23, 27, 0x1C,    20, 20, 0x0,    8,11,0xB,    0,4,0x10},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmovbrc",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovbrc_inst {
+	unsigned int esize;
+	unsigned int index;
+	unsigned int d;
+	unsigned int t;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->d     = BITS(inst, 16, 19)|BIT(inst, 7)<<4;
+	inst_cream->t     = BITS(inst, 12, 15);
+	/* VFP variant of instruction */
+	inst_cream->esize = 32;
+	inst_cream->index = BIT(inst, 21);
+   
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		VFP_DEBUG_UNIMPLEMENTED(VMOVBRC);
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MCR_TRANS
+if ((OPC_1 & 0x4) == 0 && CoProc == 11 && CRm == 0)
+{
+	VFP_DEBUG_UNIMPLEMENTED(VMOVBRC);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arch_arm_undef(cpu, bb, instr);
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VMRS */
+/* cond 1110 1111 CRn- Rt-- 1010 0001 0000 */
+/* cond 1110 op11 CRn- Rt-- copr op21 CRm- MRC */
+#define vfpinstr 	vmrs
+#define vfpinstr_inst 	vmrs_inst
+#define VFPLABEL_INST 	VMRS_INST
+#ifdef VFP_DECODE
+{"vmrs",        2,      ARMVFP2,        20, 27, 0xEF,     0, 11, 0xa10},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmrs",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmrs_inst {
+	unsigned int reg;
+	unsigned int Rt;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->reg  = BITS(inst, 16, 19);
+	inst_cream->Rt	 = BITS(inst, 12, 15);
+   
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		/* FIXME: special case for access to FPSID and FPEXC, VFP must be disabled,
+		   and in privilegied mode */
+		/* Exceptions must be checked, according to v7 ref manual */
+		CHECK_VFP_ENABLED;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		DBG("VMRS :");
+	
+		if (inst_cream->reg == 1) /* FPSCR */
+		{
+			if (inst_cream->Rt != 15)
+			{	
+				cpu->Reg[inst_cream->Rt] = cpu->VFP[VFP_OFFSET(VFP_FPSCR)];
+				DBG("\tr%d <= fpscr[%08x]\n", inst_cream->Rt, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+			}
+			else
+			{	
+				cpu->NFlag = (cpu->VFP[VFP_OFFSET(VFP_FPSCR)] >> 31) & 1;
+				cpu->ZFlag = (cpu->VFP[VFP_OFFSET(VFP_FPSCR)] >> 30) & 1;
+				cpu->CFlag = (cpu->VFP[VFP_OFFSET(VFP_FPSCR)] >> 29) & 1;
+				cpu->VFlag = (cpu->VFP[VFP_OFFSET(VFP_FPSCR)] >> 28) & 1;
+				DBG("\tflags <= fpscr[%1xxxxxxxx]\n", cpu->VFP[VFP_OFFSET(VFP_FPSCR)]>>28);
+			}
+		} 
+		else
+		{
+			switch (inst_cream->reg)
+			{
+			case 0:
+				cpu->Reg[inst_cream->Rt] = cpu->VFP[VFP_OFFSET(VFP_FPSID)];
+				DBG("\tr%d <= fpsid[%08x]\n", inst_cream->Rt, cpu->VFP[VFP_OFFSET(VFP_FPSID)]);
+				break;
+			case 6:
+				/* MVFR1, VFPv3 only ? */
+				DBG("\tr%d <= MVFR1 unimplemented\n", inst_cream->Rt);
+				break;
+			case 7:
+				/* MVFR0, VFPv3 only? */
+				DBG("\tr%d <= MVFR0 unimplemented\n", inst_cream->Rt);
+				break;
+			case 8:
+				cpu->Reg[inst_cream->Rt] = cpu->VFP[VFP_OFFSET(VFP_FPEXC)];
+				DBG("\tr%d <= fpexc[%08x]\n", inst_cream->Rt, cpu->VFP[VFP_OFFSET(VFP_FPEXC)]);
+				break;
+			default:
+				DBG("\tSUBARCHITECTURE DEFINED\n");
+				break;
+			}
+		}
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MRC_TRANS
+if (OPC_1 == 0x7 && CRm == 0 && OPC_2 == 0)
+{
+	VMRS(state, CRn, Rt, value);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MRC_IMPL
+void VMRS(ARMul_State * state, ARMword reg, ARMword Rt, ARMword * value)
+{
+	DBG("VMRS :");
+	if (reg == 1)
+	{
+		if (Rt != 15)
+		{
+			*value = state->VFP[VFP_OFFSET(VFP_FPSCR)];
+			DBG("\tr%d <= fpscr[%08x]\n", Rt, state->VFP[VFP_OFFSET(VFP_FPSCR)]);
+		}
+		else
+		{
+			*value = state->VFP[VFP_OFFSET(VFP_FPSCR)] ;
+			DBG("\tflags <= fpscr[%1xxxxxxxx]\n", state->VFP[VFP_OFFSET(VFP_FPSCR)]>>28);
+		}
+	}
+	else
+	{
+		switch (reg)
+		{
+		case 0:
+			*value = state->VFP[VFP_OFFSET(VFP_FPSID)];
+			DBG("\tr%d <= fpsid[%08x]\n", Rt, state->VFP[VFP_OFFSET(VFP_FPSID)]);
+			break;
+		case 6:
+			/* MVFR1, VFPv3 only ? */
+			DBG("\tr%d <= MVFR1 unimplemented\n", Rt);
+			break;
+		case 7:
+			/* MVFR0, VFPv3 only? */
+			DBG("\tr%d <= MVFR0 unimplemented\n", Rt);
+			break;
+		case 8:
+			*value = state->VFP[VFP_OFFSET(VFP_FPEXC)];
+			DBG("\tr%d <= fpexc[%08x]\n", Rt, state->VFP[VFP_OFFSET(VFP_FPEXC)]);
+			break;
+		default:
+			DBG("\tSUBARCHITECTURE DEFINED\n");
+			break;
+		}
+	}
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	DBG("\t\tin %s .\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	
+	Value *data = NULL;
+	int reg = BITS(16, 19);;
+	int Rt   = BITS(12, 15);
+	DBG("VMRS : reg=%d, Rt=%d\n", reg, Rt);
+	if (reg == 1)
+	{
+		if (Rt != 15)
+		{
+			LET(Rt, R(VFP_FPSCR));
+			DBG("\tr%d <= fpscr\n", Rt);
+		}
+		else
+		{
+			//LET(Rt, R(VFP_FPSCR));
+			update_cond_from_fpscr(cpu, instr, bb, pc);
+			DBG("In %s, \tflags <= fpscr\n", __FUNCTION__);
+		}
+	}
+	else
+	{
+		switch (reg)
+		{
+		case 0:
+			LET(Rt, R(VFP_FPSID));
+			DBG("\tr%d <= fpsid\n", Rt);
+			break;
+		case 6:
+			/* MVFR1, VFPv3 only ? */
+			DBG("\tr%d <= MVFR1 unimplemented\n", Rt);
+			break;
+		case 7:
+			/* MVFR0, VFPv3 only? */
+			DBG("\tr%d <= MVFR0 unimplemented\n", Rt);
+			break;
+		case 8:
+			LET(Rt, R(VFP_FPEXC));
+			DBG("\tr%d <= fpexc\n", Rt);
+			break;
+		default:
+			DBG("\tSUBARCHITECTURE DEFINED\n");
+			break;
+		}
+	}
+
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VMOVBCR scalar to register */
+/* cond 1110 XXX1 Vd-- Rt-- 1011 NXX1 0000 */
+/* cond 1110 op11 CRn- Rt-- copr op21 CRm- MCR */
+#define vfpinstr 	vmovbcr
+#define vfpinstr_inst 	vmovbcr_inst
+#define VFPLABEL_INST 	VMOVBCR_INST
+#ifdef VFP_DECODE
+{"vmovbcr",    4,    ARMVFP2,    24, 27, 0xE,    20, 20, 1,    8, 11,0xB,    0,4, 0x10},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmovbcr",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovbcr_inst {
+	unsigned int esize;
+	unsigned int index;
+	unsigned int d;
+	unsigned int t;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->d     = BITS(inst, 16, 19)|BIT(inst, 7)<<4;
+	inst_cream->t     = BITS(inst, 12, 15);
+	/* VFP variant of instruction */
+	inst_cream->esize = 32;
+	inst_cream->index = BIT(inst, 21);
+   
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		VFP_DEBUG_UNIMPLEMENTED(VMOVBCR);
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MCR_TRANS
+if (CoProc == 11 && CRm == 0)
+{
+	VFP_DEBUG_UNIMPLEMENTED(VMOVBCR);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arch_arm_undef(cpu, bb, instr);
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* MRRC / MCRR instructions */
+/* cond 1100 0101 Rt2- Rt-- copr opc1 CRm- MRRC */
+/* cond 1100 0100 Rt2- Rt-- copr opc1 CRm- MCRR */
+
+/* ----------------------------------------------------------------------- */
+/* VMOVBRRSS between 2 registers to 2 singles */
+/* cond 1100 010X Rt2- Rt-- 1010 00X1 Vm-- */
+/* cond 1100 0101 Rt2- Rt-- copr opc1 CRm- MRRC */
+#define vfpinstr 	vmovbrrss
+#define vfpinstr_inst 	vmovbrrss_inst
+#define VFPLABEL_INST 	VMOVBRRSS_INST
+#ifdef VFP_DECODE
+{"vmovbrrss",    3,    ARMVFP2,    21, 27, 0x62,    8, 11, 0xA,    4, 4, 1},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmovbrrss",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovbrrss_inst {
+	unsigned int to_arm;
+	unsigned int t;
+	unsigned int t2;
+	unsigned int m;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->to_arm     = BIT(inst, 20) == 1;
+	inst_cream->t          = BITS(inst, 12, 15);
+	inst_cream->t2         = BITS(inst, 16, 19);
+	inst_cream->m          = BITS(inst, 0, 3)<<1|BIT(inst, 5);
+   
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		VFP_DEBUG_UNIMPLEMENTED(VMOVBRRSS);
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MCRR_TRANS
+if (CoProc == 10 && (OPC_1 & 0xD) == 1)
+{
+	VFP_DEBUG_UNIMPLEMENTED(VMOVBRRSS);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MRRC_TRANS
+if (CoProc == 10 && (OPC_1 & 0xD) == 1)
+{
+	VFP_DEBUG_UNIMPLEMENTED(VMOVBRRSS);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arch_arm_undef(cpu, bb, instr);
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VMOVBRRD between 2 registers and 1 double */
+/* cond 1100 010X Rt2- Rt-- 1011 00X1 Vm-- */
+/* cond 1100 0101 Rt2- Rt-- copr opc1 CRm- MRRC */
+#define vfpinstr 	vmovbrrd
+#define vfpinstr_inst 	vmovbrrd_inst
+#define VFPLABEL_INST 	VMOVBRRD_INST
+#ifdef VFP_DECODE
+{"vmovbrrd",    3,    ARMVFP2,    21, 27, 0x62,    6, 11, 0x2c,    4, 4, 1},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmovbrrd",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovbrrd_inst {
+	unsigned int to_arm;
+	unsigned int t;
+	unsigned int t2;
+	unsigned int m;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->to_arm   = BIT(inst, 20) == 1;
+	inst_cream->t        = BITS(inst, 12, 15);
+	inst_cream->t2       = BITS(inst, 16, 19);
+	inst_cream->m        = BIT(inst, 5)<<4 | BITS(inst, 0, 3);
+
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		VMOVBRRD(cpu, inst_cream->to_arm, inst_cream->t, inst_cream->t2, inst_cream->m, 
+				&(cpu->Reg[inst_cream->t]), &(cpu->Reg[inst_cream->t2]));
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MCRR_TRANS
+if (CoProc == 11 && (OPC_1 & 0xD) == 1)
+{
+	/* Transfering Rt and Rt2 is not mandatory, as the value of interest is pointed by value1 and value2 */
+	VMOVBRRD(state, BIT(20), Rt, Rt2, BIT(5)<<4|CRm, &value1, &value2);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MRRC_TRANS
+if (CoProc == 11 && (OPC_1 & 0xD) == 1)
+{
+	/* Transfering Rt and Rt2 is not mandatory, as the value of interest is pointed by value1 and value2 */
+	VMOVBRRD(state, BIT(20), Rt, Rt2, BIT(5)<<4|CRm, value1, value2);
+	return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MRRC_IMPL
+void VMOVBRRD(ARMul_State * state, ARMword to_arm, ARMword t, ARMword t2, ARMword n, ARMword *value1, ARMword *value2)
+{
+	DBG("VMOV(BRRD) :\n");
+	if (to_arm)
+	{
+		DBG("\tr[%d-%d] <= s[%d-%d]=[%x-%x]\n", t2, t, n*2+1, n*2, state->ExtReg[n*2+1], state->ExtReg[n*2]);
+		*value2 = state->ExtReg[n*2+1];
+		*value1 = state->ExtReg[n*2];
+	}
+	else
+	{
+		DBG("\ts[%d-%d] <= r[%d-%d]=[%x-%x]\n", n*2+1, n*2, t2, t, *value2, *value1);
+		state->ExtReg[n*2+1] = *value2;
+		state->ExtReg[n*2] = *value1;
+	}
+}
+
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	if(instr >> 28 != 0xe)
+		*tag |= TAG_CONDITIONAL;
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int to_arm   = BIT(20) == 1;
+	int t        = BITS(12, 15);
+	int t2       = BITS(16, 19);
+	int n        = BIT(5)<<4 | BITS(0, 3);
+	if(to_arm){
+		LET(t, IBITCAST32(FR32(n * 2)));
+		LET(t2, IBITCAST32(FR32(n * 2 + 1)));
+	}
+	else{
+		LETFPS(n * 2, FPBITCAST32(R(t)));
+		LETFPS(n * 2 + 1, FPBITCAST32(R(t2)));
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* LDC/STC between 2 registers and 1 double */
+/* cond 110X XXX1 Rn-- CRd- copr imm- imm- LDC */
+/* cond 110X XXX0 Rn-- CRd- copr imm8 imm8 STC */
+
+/* ----------------------------------------------------------------------- */
+/* VSTR */
+/* cond 1101 UD00 Rn-- Vd-- 101X imm8 imm8 */
+#define vfpinstr 	vstr
+#define vfpinstr_inst 	vstr_inst
+#define VFPLABEL_INST 	VSTR_INST
+#ifdef VFP_DECODE
+{"vstr",        3,      ARMVFP2,        24, 27, 0xd,   20, 21, 0,       9, 11, 0x5},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vstr",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vstr_inst {
+	unsigned int single;
+	unsigned int n;
+	unsigned int d;
+	unsigned int imm32;
+	unsigned int add;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+	
+	inst_cream->single = BIT(inst, 8) == 0;
+	inst_cream->add	   = BIT(inst, 23);
+	inst_cream->imm32  = BITS(inst, 0,7) << 2;
+	inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+	inst_cream->n	   = BITS(inst, 16, 19);
+
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		unsigned int base = (inst_cream->n == 15 ? (cpu->Reg[inst_cream->n] & 0xFFFFFFFC) + 8 : cpu->Reg[inst_cream->n]);
+		addr = (inst_cream->add ? base + inst_cream->imm32 : base - inst_cream->imm32);
+		DBG("VSTR :\n");
+		
+		
+		if (inst_cream->single)
+		{
+			fault = check_address_validity(cpu, addr, &phys_addr, 0);
+			if (fault) goto MMU_EXCEPTION;
+			fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d], 32);
+			if (fault) goto MMU_EXCEPTION;
+			DBG("\taddr[%x] <= s%d=[%x]\n", addr, inst_cream->d, cpu->ExtReg[inst_cream->d]);
+		}
+		else
+		{
+			fault = check_address_validity(cpu, addr, &phys_addr, 0);
+			if (fault) goto MMU_EXCEPTION;
+
+			/* Check endianness */
+			fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d*2], 32);
+			if (fault) goto MMU_EXCEPTION;
+
+			fault = check_address_validity(cpu, addr + 4, &phys_addr, 0);
+			if (fault) goto MMU_EXCEPTION;
+
+			fault = interpreter_write_memory(core, addr + 4, phys_addr, cpu->ExtReg[inst_cream->d*2+1], 32);
+			if (fault) goto MMU_EXCEPTION;
+			DBG("\taddr[%x-%x] <= s[%d-%d]=[%x-%x]\n", addr+4, addr, inst_cream->d*2+1, inst_cream->d*2, cpu->ExtReg[inst_cream->d*2+1], cpu->ExtReg[inst_cream->d*2]);
+		}
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_STC_TRANS
+if (P == 1 && W == 0)
+{
+	return VSTR(state, type, instr, value);
+}
+#endif
+#ifdef VFP_STC_IMPL
+int VSTR(ARMul_State * state, int type, ARMword instr, ARMword * value)
+{
+	static int i = 0;
+	static int single_reg, add, d, n, imm32, regs;
+	if (type == ARMul_FIRST)
+	{
+		single_reg = BIT(8) == 0;	/* Double precision */
+		add = BIT(23);		/* */
+		imm32 = BITS(0,7)<<2;	/* may not be used */
+		d = single_reg ? BITS(12, 15)<<1|BIT(22) : BIT(22)<<4|BITS(12, 15); /* Base register */
+		n = BITS(16, 19);	/* destination register */
+		
+		DBG("VSTR :\n");
+		
+		i = 0;
+		regs = 1;
+		
+		return ARMul_DONE;
+	}
+	else if (type == ARMul_DATA)
+	{
+		if (single_reg)
+		{
+			*value = state->ExtReg[d+i];
+			DBG("\taddr[?] <= s%d=[%x]\n", d+i, state->ExtReg[d+i]);
+			i++;
+			if (i < regs)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+		else
+		{
+			/* FIXME Careful of endianness, may need to rework this */
+			*value = state->ExtReg[d*2+i];
+			DBG("\taddr[?] <= s[%d]=[%x]\n", d*2+i, state->ExtReg[d*2+i]);
+			i++;
+			if (i < regs*2)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+	}
+
+	return -1;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+	*tag |= TAG_NEW_BB;
+	if(instr >> 28 != 0xe)
+		*tag |= TAG_CONDITIONAL;
+
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	int single = BIT(8) == 0;
+	int add	   = BIT(23);
+	int imm32  = BITS(0,7) << 2;
+	int d      = (single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4));
+	int n	   = BITS(16, 19);
+
+	Value* base = (n == 15) ? ADD(AND(R(n), CONST(0xFFFFFFFC)), CONST(8)): R(n);
+	Value* Addr = add ? ADD(base, CONST(imm32)) : SUB(base, CONST(imm32));
+	DBG("VSTR :\n");
+	//if(single)
+	//	bb = arch_check_mm(cpu, bb, Addr, 4, 0, cpu->dyncom_engine->bb_trap);
+	//else
+	//	bb = arch_check_mm(cpu, bb, Addr, 8, 0, cpu->dyncom_engine->bb_trap);
+	//Value* phys_addr;
+	if(single){
+		#if 0
+		phys_addr = get_phys_addr(cpu, bb, Addr, 0);
+		bb = cpu->dyncom_engine->bb;
+		arch_write_memory(cpu, bb, phys_addr, RSPR(d), 32);
+		#endif
+		//memory_write(cpu, bb, Addr, RSPR(d), 32);
+		memory_write(cpu, bb, Addr, IBITCAST32(FR32(d)), 32);
+		bb = cpu->dyncom_engine->bb;
+	}
+	else{
+		#if 0
+		phys_addr = get_phys_addr(cpu, bb, Addr, 0);
+		bb = cpu->dyncom_engine->bb;
+		arch_write_memory(cpu, bb, phys_addr, RSPR(d * 2), 32);
+		#endif
+		//memory_write(cpu, bb, Addr, RSPR(d * 2), 32);
+		memory_write(cpu, bb, Addr, IBITCAST32(FR32(d * 2)), 32);
+		bb = cpu->dyncom_engine->bb;
+		#if 0
+		phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 0);
+		bb = cpu->dyncom_engine->bb;
+		arch_write_memory(cpu, bb, phys_addr, RSPR(d * 2 + 1), 32);
+		#endif
+		//memory_write(cpu, bb, ADD(Addr, CONST(4)), RSPR(d * 2 + 1), 32);
+		memory_write(cpu, bb, ADD(Addr, CONST(4)), IBITCAST32(FR32(d * 2 + 1)), 32);
+		bb = cpu->dyncom_engine->bb;
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VPUSH */
+/* cond 1101 0D10 1101 Vd-- 101X imm8 imm8 */
+#define vfpinstr 	vpush
+#define vfpinstr_inst 	vpush_inst
+#define VFPLABEL_INST 	VPUSH_INST
+#ifdef VFP_DECODE
+{"vpush",       3,      ARMVFP2,        23, 27, 0x1a,  16, 21, 0x2d,    9, 11, 0x5},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vpush",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vpush_inst {
+	unsigned int single;
+	unsigned int d;
+	unsigned int imm32;
+	unsigned int regs;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->single  = BIT(inst, 8) == 0;
+	inst_cream->d       = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+	inst_cream->imm32   = BITS(inst, 0, 7)<<2;
+	inst_cream->regs    = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
+
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+				
+		int i;
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+		DBG("VPUSH :\n");
+			
+		addr = cpu->Reg[R13] - inst_cream->imm32;
+
+
+		for (i = 0; i < inst_cream->regs; i++)
+		{
+			if (inst_cream->single)
+			{
+				fault = check_address_validity(cpu, addr, &phys_addr, 0);
+				if (fault) goto MMU_EXCEPTION;
+				fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d+i], 32);
+				if (fault) goto MMU_EXCEPTION;
+				DBG("\taddr[%x] <= s%d=[%x]\n", addr, inst_cream->d+i, cpu->ExtReg[inst_cream->d+i]);
+				addr += 4;
+			}
+			else
+			{
+				/* Careful of endianness, little by default */
+				fault = check_address_validity(cpu, addr, &phys_addr, 0);
+				if (fault) goto MMU_EXCEPTION;
+				fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2], 32);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = check_address_validity(cpu, addr + 4, &phys_addr, 0);
+				if (fault) goto MMU_EXCEPTION;
+				fault = interpreter_write_memory(core, addr + 4, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2 + 1], 32);
+				if (fault) goto MMU_EXCEPTION;
+				DBG("\taddr[%x-%x] <= s[%d-%d]=[%x-%x]\n", addr+4, addr, (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, cpu->ExtReg[(inst_cream->d+i)*2+1], cpu->ExtReg[(inst_cream->d+i)*2]);
+				addr += 8;
+			}
+		}
+		DBG("\tsp[%x]", cpu->Reg[R13]);
+		cpu->Reg[R13] = cpu->Reg[R13] - inst_cream->imm32;
+		DBG("=>[%x]\n", cpu->Reg[R13]);
+	
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vpush_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_STC_TRANS
+if (P == 1 && U == 0 && W == 1 && Rn == 0xD)
+{
+	return VPUSH(state, type, instr, value);
+}
+#endif
+#ifdef VFP_STC_IMPL
+int VPUSH(ARMul_State * state, int type, ARMword instr, ARMword * value)
+{
+	static int i = 0;
+	static int single_regs, add, wback, d, n, imm32, regs;
+	if (type == ARMul_FIRST)
+	{
+		single_regs = BIT(8) == 0;	/* Single precision */
+		d = single_regs ? BITS(12, 15)<<1|BIT(22) : BIT(22)<<4|BITS(12, 15); /* Base register */
+		imm32 = BITS(0,7)<<2;	/* may not be used */
+		regs = single_regs ? BITS(0, 7) : BITS(1, 7); /* FSTMX if regs is odd */
+
+		DBG("VPUSH :\n");
+		DBG("\tsp[%x]", state->Reg[R13]);
+		state->Reg[R13] = state->Reg[R13] - imm32;
+		DBG("=>[%x]\n", state->Reg[R13]);
+		
+		i = 0;
+		
+		return ARMul_DONE;
+	} 
+	else if (type == ARMul_DATA)
+	{
+		if (single_regs)
+		{
+			*value = state->ExtReg[d + i];
+			DBG("\taddr[?] <= s%d=[%x]\n", d+i, state->ExtReg[d + i]);
+			i++;
+			if (i < regs)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+		else
+		{
+			/* FIXME Careful of endianness, may need to rework this */
+			*value = state->ExtReg[d*2 + i];
+			DBG("\taddr[?] <= s[%d]=[%x]\n", d*2 + i, state->ExtReg[d*2 + i]);
+			i++;
+			if (i < regs*2)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+	}
+
+	return -1;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+	*tag |= TAG_NEW_BB;
+	if(instr >> 28 != 0xe)
+		*tag |= TAG_CONDITIONAL;
+
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	int single  = BIT(8) == 0;
+	int d       = (single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4));
+	int imm32   = BITS(0, 7)<<2;
+	int regs    = (single ? BITS(0, 7) : BITS(1, 7));
+
+	DBG("\t\tin %s \n", __FUNCTION__);
+	Value* Addr = SUB(R(13), CONST(imm32));
+	//if(single)
+	//	bb = arch_check_mm(cpu, bb, Addr, regs * 4, 0, cpu->dyncom_engine->bb_trap);
+	//else
+	//	bb = arch_check_mm(cpu, bb, Addr, regs * 8, 0, cpu->dyncom_engine->bb_trap);
+	//Value* phys_addr;
+	int i;
+	for (i = 0; i < regs; i++)
+	{
+		if (single)
+		{
+			//fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d+i], 32);
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, Addr, 0);
+			bb = cpu->dyncom_engine->bb;
+			arch_write_memory(cpu, bb, phys_addr, RSPR(d + i), 32);
+			#endif
+			//memory_write(cpu, bb, Addr, RSPR(d + i), 32);
+			memory_write(cpu, bb, Addr, IBITCAST32(FR32(d + i)), 32);
+			bb = cpu->dyncom_engine->bb;
+			Addr = ADD(Addr, CONST(4));
+		}
+		else
+		{
+			/* Careful of endianness, little by default */
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, Addr, 0);
+			bb = cpu->dyncom_engine->bb;
+			arch_write_memory(cpu, bb, phys_addr, RSPR((d + i) * 2), 32);
+			#endif
+			//memory_write(cpu, bb, Addr, RSPR((d + i) * 2), 32);
+			memory_write(cpu, bb, Addr, IBITCAST32(FR32((d + i) * 2)), 32);
+			bb = cpu->dyncom_engine->bb;
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 0);
+			bb = cpu->dyncom_engine->bb;
+			arch_write_memory(cpu, bb, phys_addr, RSPR((d + i) * 2 + 1), 32);
+			#endif
+			//memory_write(cpu, bb, ADD(Addr, CONST(4)), RSPR((d + i) * 2 + 1), 32);
+			memory_write(cpu, bb, ADD(Addr, CONST(4)), IBITCAST32(FR32((d + i) * 2 + 1)), 32);
+			bb = cpu->dyncom_engine->bb;
+
+			Addr = ADD(Addr, CONST(8));
+		}
+	}
+	LET(13, SUB(R(13), CONST(imm32)));
+
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VSTM */
+/* cond 110P UDW0 Rn-- Vd-- 101X imm8 imm8 */
+#define vfpinstr 	vstm
+#define vfpinstr_inst 	vstm_inst
+#define VFPLABEL_INST 	VSTM_INST
+#ifdef VFP_DECODE
+{"vstm",	3,	ARMVFP2,	25, 27, 0x6,	20, 20, 0,	9, 11, 0x5},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vstm",	0,	ARMVFP2,	0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vstm_inst {
+	unsigned int single;
+	unsigned int add;
+	unsigned int wback;
+	unsigned int d;
+	unsigned int n;
+	unsigned int imm32;
+	unsigned int regs;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->single = BIT(inst, 8) == 0;
+	inst_cream->add    = BIT(inst, 23);
+	inst_cream->wback  = BIT(inst, 21);
+	inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+	inst_cream->n      = BITS(inst, 16, 19);
+	inst_cream->imm32  = BITS(inst, 0, 7)<<2;
+	inst_cream->regs   = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
+
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST: /* encoding 1 */
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		int i;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		addr = (inst_cream->add ? cpu->Reg[inst_cream->n] : cpu->Reg[inst_cream->n] - inst_cream->imm32);
+		DBG("VSTM : addr[%x]\n", addr);
+		
+		
+		for (i = 0; i < inst_cream->regs; i++)
+		{
+			if (inst_cream->single)
+			{
+				fault = check_address_validity(cpu, addr, &phys_addr, 0);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d+i], 32);
+				if (fault) goto MMU_EXCEPTION;
+				DBG("\taddr[%x] <= s%d=[%x]\n", addr, inst_cream->d+i, cpu->ExtReg[inst_cream->d+i]);
+				addr += 4;
+			}
+			else
+			{
+				/* Careful of endianness, little by default */
+				fault = check_address_validity(cpu, addr, &phys_addr, 0);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2], 32);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = check_address_validity(cpu, addr + 4, &phys_addr, 0);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = interpreter_write_memory(core, addr + 4, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2 + 1], 32);
+				if (fault) goto MMU_EXCEPTION;
+				DBG("\taddr[%x-%x] <= s[%d-%d]=[%x-%x]\n", addr+4, addr, (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, cpu->ExtReg[(inst_cream->d+i)*2+1], cpu->ExtReg[(inst_cream->d+i)*2]);
+				addr += 8;
+			}
+		}
+		if (inst_cream->wback){
+			cpu->Reg[inst_cream->n] = (inst_cream->add ? cpu->Reg[inst_cream->n] + inst_cream->imm32 : 
+						   cpu->Reg[inst_cream->n] - inst_cream->imm32);
+			DBG("\twback r%d[%x]\n", inst_cream->n, cpu->Reg[inst_cream->n]);
+		}
+
+	}
+	cpu->Reg[15] += 4;
+	INC_PC(sizeof(vstm_inst));
+
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_STC_TRANS
+/* Should be the last operation of STC */
+return VSTM(state, type, instr, value);
+#endif
+#ifdef VFP_STC_IMPL
+int VSTM(ARMul_State * state, int type, ARMword instr, ARMword * value)
+{
+	static int i = 0;
+	static int single_regs, add, wback, d, n, imm32, regs;
+	if (type == ARMul_FIRST)
+	{
+		single_regs = BIT(8) == 0;	/* Single precision */
+		add = BIT(23);		/* */
+		wback = BIT(21);	/* write-back */
+		d = single_regs ? BITS(12, 15)<<1|BIT(22) : BIT(22)<<4|BITS(12, 15); /* Base register */
+		n = BITS(16, 19);	/* destination register */
+		imm32 = BITS(0,7) * 4;	/* may not be used */
+		regs = single_regs ? BITS(0, 7) : BITS(0, 7)>>1; /* FSTMX if regs is odd */
+
+		DBG("VSTM :\n");
+		
+		if (wback) {
+			state->Reg[n] = (add ? state->Reg[n] + imm32 : state->Reg[n] - imm32);
+			DBG("\twback r%d[%x]\n", n, state->Reg[n]);
+		}
+		
+		i = 0;
+		
+		return ARMul_DONE;
+	} 
+	else if (type == ARMul_DATA)
+	{
+		if (single_regs)
+		{
+			*value = state->ExtReg[d + i];
+			DBG("\taddr[?] <= s%d=[%x]\n", d+i, state->ExtReg[d + i]);
+			i++;
+			if (i < regs)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+		else
+		{
+			/* FIXME Careful of endianness, may need to rework this */
+			*value = state->ExtReg[d*2 + i];
+			DBG("\taddr[?] <= s[%d]=[%x]\n", d*2 + i, state->ExtReg[d*2 + i]);
+			i++;
+			if (i < regs*2)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+	}
+
+	return -1;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+	*tag |= TAG_NEW_BB;
+	if(instr >> 28 != 0xe)
+		*tag |= TAG_CONDITIONAL;
+
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	//arch_arm_undef(cpu, bb, instr);
+	int single = BIT(8) == 0;
+	int add    = BIT(23);
+	int wback  = BIT(21);
+	int d      = single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4);
+	int n      = BITS(16, 19);
+	int imm32  = BITS(0, 7)<<2;
+	int regs   = single ? BITS(0, 7) : BITS(1, 7);
+
+	Value* Addr = SELECT(CONST1(add), R(n), SUB(R(n), CONST(imm32)));
+	DBG("VSTM \n");
+	//if(single)
+	//	bb = arch_check_mm(cpu, bb, Addr, regs * 4, 0, cpu->dyncom_engine->bb_trap);
+	//else
+	//	bb = arch_check_mm(cpu, bb, Addr, regs * 8, 0, cpu->dyncom_engine->bb_trap);
+
+	int i;	
+	Value* phys_addr;
+	for (i = 0; i < regs; i++)
+	{
+		if (single)
+		{
+			
+			//fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d+i], 32);
+			/* if R(i) is R15? */
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, Addr, 0);
+			bb = cpu->dyncom_engine->bb;
+			arch_write_memory(cpu, bb, phys_addr, RSPR(d + i), 32);
+			#endif
+			//memory_write(cpu, bb, Addr, RSPR(d + i), 32);
+			memory_write(cpu, bb, Addr, IBITCAST32(FR32(d + i)),32);
+			bb = cpu->dyncom_engine->bb;
+			//if (fault) goto MMU_EXCEPTION;
+			//DBG("\taddr[%x] <= s%d=[%x]\n", addr, inst_cream->d+i, cpu->ExtReg[inst_cream->d+i]);
+			Addr = ADD(Addr, CONST(4));
+		}
+		else
+		{
+		
+			//fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2], 32);
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, Addr, 0);
+			bb = cpu->dyncom_engine->bb;
+			arch_write_memory(cpu, bb, phys_addr, RSPR((d + i) * 2), 32);
+			#endif
+			//memory_write(cpu, bb, Addr, RSPR((d + i) * 2), 32);
+			memory_write(cpu, bb, Addr, IBITCAST32(FR32((d + i) * 2)),32);
+			bb = cpu->dyncom_engine->bb;
+			//if (fault) goto MMU_EXCEPTION;
+
+			//fault = interpreter_write_memory(core, addr + 4, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2 + 1], 32);
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 0);
+			bb = cpu->dyncom_engine->bb;
+			arch_write_memory(cpu, bb, phys_addr, RSPR((d + i) * 2 + 1), 32);
+			#endif
+			//memory_write(cpu, bb, ADD(Addr, CONST(4)), RSPR((d + i) * 2 + 1), 32);
+			memory_write(cpu, bb, ADD(Addr, CONST(4)), IBITCAST32(FR32((d + i) * 2 + 1)), 32);
+			bb = cpu->dyncom_engine->bb;
+			//if (fault) goto MMU_EXCEPTION;
+			//DBG("\taddr[%x-%x] <= s[%d-%d]=[%x-%x]\n", addr+4, addr, (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, cpu->ExtReg[(inst_cream->d+i)*2+1], cpu->ExtReg[(inst_cream->d+i)*2]);
+			//addr += 8;
+			Addr = ADD(Addr, CONST(8));
+		}
+	}
+	if (wback){
+		//cpu->Reg[n] = (add ? cpu->Reg[n] + imm32 : 
+		//			   cpu->Reg[n] - imm32);
+		LET(n, SELECT(CONST1(add), ADD(R(n), CONST(imm32)), SUB(R(n), CONST(imm32))));
+		DBG("\twback r%d, add=%d, imm32=%d\n", n, add, imm32);
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VPOP */
+/* cond 1100 1D11 1101 Vd-- 101X imm8 imm8 */
+#define vfpinstr 	vpop
+#define vfpinstr_inst 	vpop_inst
+#define VFPLABEL_INST 	VPOP_INST
+#ifdef VFP_DECODE
+{"vpop",        3,      ARMVFP2,        23, 27, 0x19,  16, 21, 0x3d,    9, 11, 0x5},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vpop",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vpop_inst {
+	unsigned int single;
+	unsigned int d;
+	unsigned int imm32;
+	unsigned int regs;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->single  = BIT(inst, 8) == 0;
+	inst_cream->d       = (inst_cream->single ? (BITS(inst, 12, 15)<<1)|BIT(inst, 22) : BITS(inst, 12, 15)|(BIT(inst, 22)<<4));
+	inst_cream->imm32   = BITS(inst, 0, 7)<<2;
+	inst_cream->regs    = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
+	
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		int i;
+		unsigned int value1, value2;
+
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		DBG("VPOP :\n");
+		
+		addr = cpu->Reg[R13];
+		
+
+		for (i = 0; i < inst_cream->regs; i++)
+		{
+			if (inst_cream->single)
+			{
+				fault = check_address_validity(cpu, addr, &phys_addr, 1);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = interpreter_read_memory(core, addr, phys_addr, value1, 32);
+				if (fault) goto MMU_EXCEPTION;
+				DBG("\ts%d <= [%x] addr[%x]\n", inst_cream->d+i, value1, addr);
+				cpu->ExtReg[inst_cream->d+i] = value1;
+				addr += 4;
+			}
+			else
+			{
+				/* Careful of endianness, little by default */
+				fault = check_address_validity(cpu, addr, &phys_addr, 1);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = interpreter_read_memory(core, addr, phys_addr, value1, 32);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = check_address_validity(cpu, addr + 4, &phys_addr, 1);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = interpreter_read_memory(core, addr + 4, phys_addr, value2, 32);
+				if (fault) goto MMU_EXCEPTION;
+				DBG("\ts[%d-%d] <= [%x-%x] addr[%x-%x]\n", (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, value2, value1, addr+4, addr);
+				cpu->ExtReg[(inst_cream->d+i)*2] = value1;
+				cpu->ExtReg[(inst_cream->d+i)*2 + 1] = value2;
+				addr += 8;
+			}
+		}
+		DBG("\tsp[%x]", cpu->Reg[R13]);
+		cpu->Reg[R13] = cpu->Reg[R13] + inst_cream->imm32;
+		DBG("=>[%x]\n", cpu->Reg[R13]);
+		
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vpop_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_LDC_TRANS
+if (P == 0 && U == 1 && W == 1 && Rn == 0xD)
+{
+	return VPOP(state, type, instr, value);
+}
+#endif
+#ifdef VFP_LDC_IMPL
+int VPOP(ARMul_State * state, int type, ARMword instr, ARMword value)
+{
+	static int i = 0;
+	static int single_regs, add, wback, d, n, imm32, regs;
+	if (type == ARMul_FIRST)
+	{
+		single_regs = BIT(8) == 0;	/* Single precision */
+		d = single_regs ? BITS(12, 15)<<1|BIT(22) : BIT(22)<<4|BITS(12, 15); /* Base register */
+		imm32 = BITS(0,7)<<2;	/* may not be used */
+		regs = single_regs ? BITS(0, 7) : BITS(1, 7); /* FLDMX if regs is odd */
+
+		DBG("VPOP :\n");
+		DBG("\tsp[%x]", state->Reg[R13]);
+		state->Reg[R13] = state->Reg[R13] + imm32;
+		DBG("=>[%x]\n", state->Reg[R13]);
+		
+		i = 0;
+		
+		return ARMul_DONE;
+	}
+	else if (type == ARMul_TRANSFER)
+	{
+		return ARMul_DONE;
+	}
+	else if (type == ARMul_DATA)
+	{
+		if (single_regs)
+		{
+			state->ExtReg[d + i] = value;
+			DBG("\ts%d <= [%x]\n", d + i, value);
+			i++;
+			if (i < regs)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+		else
+		{
+			/* FIXME Careful of endianness, may need to rework this */
+			state->ExtReg[d*2 + i] = value;
+			DBG("\ts%d <= [%x]\n", d*2 + i, value);
+			i++;
+			if (i < regs*2)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+	}
+
+	return -1;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	/* Should check if PC is destination register */
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+	*tag |= TAG_NEW_BB;
+	if(instr >> 28 != 0xe)
+		*tag |= TAG_CONDITIONAL;
+
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	DBG("\t\tin %s instruction .\n", __FUNCTION__);
+	//arch_arm_undef(cpu, bb, instr);
+	int single  = BIT(8) == 0;
+	int d       = (single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4));
+	int imm32   = BITS(0, 7)<<2;
+	int regs    = (single ? BITS(0, 7) : BITS(1, 7));
+
+	int i;
+	unsigned int value1, value2;
+
+	DBG("VPOP :\n");
+		
+	Value* Addr = R(13);
+	Value* val;
+	//if(single)
+	//	bb = arch_check_mm(cpu, bb, Addr, regs * 4, 1, cpu->dyncom_engine->bb_trap);
+	//else
+	//	bb = arch_check_mm(cpu, bb, Addr, regs * 4, 1, cpu->dyncom_engine->bb_trap);
+	//Value* phys_addr;	
+	for (i = 0; i < regs; i++)
+	{
+		if (single)
+		{
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, Addr, 1);
+			bb = cpu->dyncom_engine->bb;
+			val = arch_read_memory(cpu,bb,phys_addr,0,32);
+			#endif
+			memory_read(cpu, bb, Addr, 0, 32);
+			bb = cpu->dyncom_engine->bb;
+			val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+			LETFPS(d + i, FPBITCAST32(val));
+			Addr = ADD(Addr, CONST(4));
+		}
+		else
+		{
+			/* Careful of endianness, little by default */
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, Addr, 1);
+			bb = cpu->dyncom_engine->bb;
+			val = arch_read_memory(cpu,bb,phys_addr,0,32);
+			#endif
+			memory_read(cpu, bb, Addr, 0, 32);
+			bb = cpu->dyncom_engine->bb;
+			val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+			LETFPS((d + i) * 2, FPBITCAST32(val));
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 1);
+			bb = cpu->dyncom_engine->bb;
+			val = arch_read_memory(cpu,bb,phys_addr,0,32);
+			#endif
+			memory_read(cpu, bb, ADD(Addr, CONST(4)), 0, 32);
+			bb = cpu->dyncom_engine->bb;
+			val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+			LETFPS((d + i) * 2 + 1, FPBITCAST32(val));
+
+			Addr = ADD(Addr, CONST(8));
+		}
+	}
+	LET(13, ADD(R(13), CONST(imm32)));
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VLDR */
+/* cond 1101 UD01 Rn-- Vd-- 101X imm8 imm8 */
+#define vfpinstr 	vldr
+#define vfpinstr_inst 	vldr_inst
+#define VFPLABEL_INST 	VLDR_INST
+#ifdef VFP_DECODE
+{"vldr",        3,      ARMVFP2,        24, 27, 0xd,   20, 21, 0x1,     9, 11, 0x5},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vldr",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vldr_inst {
+	unsigned int single;
+	unsigned int n;
+	unsigned int d;
+	unsigned int imm32;
+	unsigned int add;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx	 = index;
+	inst_base->br	 = NON_BRANCH;
+	inst_base->load_r15 = 0;
+	
+	inst_cream->single = BIT(inst, 8) == 0;
+	inst_cream->add	   = BIT(inst, 23);
+	inst_cream->imm32  = BITS(inst, 0,7) << 2;
+	inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+	inst_cream->n	   = BITS(inst, 16, 19);
+
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		unsigned int base = (inst_cream->n == 15 ? (cpu->Reg[inst_cream->n] & 0xFFFFFFFC) + 8 : cpu->Reg[inst_cream->n]);
+		addr = (inst_cream->add ? base + inst_cream->imm32 : base - inst_cream->imm32);
+		DBG("VLDR :\n", addr);
+		
+		
+		if (inst_cream->single)
+		{
+			fault = check_address_validity(cpu, addr, &phys_addr, 1);
+			if (fault) goto MMU_EXCEPTION;
+			fault = interpreter_read_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d], 32);
+			if (fault) goto MMU_EXCEPTION;
+			DBG("\ts%d <= [%x] addr[%x]\n", inst_cream->d, cpu->ExtReg[inst_cream->d], addr);
+		}
+		else
+		{
+			unsigned int word1, word2;
+			fault = check_address_validity(cpu, addr, &phys_addr, 1);
+			if (fault) goto MMU_EXCEPTION;
+			fault = interpreter_read_memory(core, addr, phys_addr, word1, 32);
+			if (fault) goto MMU_EXCEPTION;
+
+			fault = check_address_validity(cpu, addr + 4, &phys_addr, 1);
+			if (fault) goto MMU_EXCEPTION;
+			fault = interpreter_read_memory(core, addr + 4, phys_addr, word2, 32);
+			if (fault) goto MMU_EXCEPTION;
+			/* Check endianness */
+			cpu->ExtReg[inst_cream->d*2] = word1;
+			cpu->ExtReg[inst_cream->d*2+1] = word2;
+			DBG("\ts[%d-%d] <= [%x-%x] addr[%x-%x]\n", inst_cream->d*2+1, inst_cream->d*2, word2, word1, addr+4, addr);
+		}
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vldr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_LDC_TRANS
+if (P == 1 && W == 0)
+{
+	return VLDR(state, type, instr, value);
+}
+#endif
+#ifdef VFP_LDC_IMPL
+int VLDR(ARMul_State * state, int type, ARMword instr, ARMword value)
+{
+	static int i = 0;
+	static int single_reg, add, d, n, imm32, regs;
+	if (type == ARMul_FIRST)
+	{
+		single_reg = BIT(8) == 0;	/* Double precision */
+		add = BIT(23);		/* */
+		imm32 = BITS(0,7)<<2;	/* may not be used */
+		d = single_reg ? BITS(12, 15)<<1|BIT(22) : BIT(22)<<4|BITS(12, 15); /* Base register */
+		n = BITS(16, 19);	/* destination register */
+		
+		DBG("VLDR :\n");
+		
+		i = 0;
+		regs = 1;
+		
+		return ARMul_DONE;
+	}
+	else if (type == ARMul_TRANSFER)
+	{
+		return ARMul_DONE;
+	}
+	else if (type == ARMul_DATA)
+	{
+		if (single_reg)
+		{
+			state->ExtReg[d+i] = value;
+			DBG("\ts%d <= [%x]\n", d+i, value);
+			i++;
+			if (i < regs)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+		else
+		{
+			/* FIXME Careful of endianness, may need to rework this */
+			state->ExtReg[d*2+i] = value;
+			DBG("\ts[%d] <= [%x]\n", d*2+i, value);
+			i++;
+			if (i < regs*2)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+	}
+
+	return -1;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	/* Should check if PC is destination register */
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+	*tag |= TAG_NEW_BB;
+	if(instr >> 28 != 0xe)
+		*tag |= TAG_CONDITIONAL;
+
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	int single = BIT(8) == 0;
+	int add    = BIT(23);
+	int wback  = BIT(21);
+	int d      = (single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4));
+	int n      = BITS(16, 19);
+	int imm32  = BITS(0, 7)<<2;
+	int regs   = (single ? BITS(0, 7) : BITS(1, 7));
+	Value* base = R(n);
+	DBG("\t\tin %s .\n", __FUNCTION__);
+	if(n == 15){
+		base = ADD(AND(base, CONST(0xFFFFFFFC)), CONST(8));
+	}
+	Value* Addr = add ? (ADD(base, CONST(imm32))) : (SUB(base, CONST(imm32)));
+	//if(single)
+	//	bb = arch_check_mm(cpu, bb, Addr, 4, 1, cpu->dyncom_engine->bb_trap);
+	//else
+	//	bb = arch_check_mm(cpu, bb, Addr, 8, 1, cpu->dyncom_engine->bb_trap);
+	//Value* phys_addr;
+	Value* val;
+	if(single){
+		#if 0
+		phys_addr = get_phys_addr(cpu, bb, Addr, 1);
+		bb = cpu->dyncom_engine->bb;
+		val = arch_read_memory(cpu,bb,phys_addr,0,32);
+		#endif
+		memory_read(cpu, bb, Addr, 0, 32);
+		bb = cpu->dyncom_engine->bb;
+		val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+		//LETS(d, val);
+		LETFPS(d,FPBITCAST32(val));
+	}
+	else{
+		#if 0
+		phys_addr = get_phys_addr(cpu, bb, Addr, 1);
+		bb = cpu->dyncom_engine->bb;
+		val = arch_read_memory(cpu,bb,phys_addr,0,32);
+		#endif
+		memory_read(cpu, bb, Addr, 0, 32);
+		bb = cpu->dyncom_engine->bb;
+		val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+		//LETS(d * 2, val);
+		LETFPS(d * 2,FPBITCAST32(val));
+		#if 0
+		phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 1);
+		bb = cpu->dyncom_engine->bb;
+		val = arch_read_memory(cpu,bb,phys_addr,0,32);
+		#endif
+		memory_read(cpu, bb, ADD(Addr, CONST(4)), 0,32);
+		bb = cpu->dyncom_engine->bb;
+		val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+		//LETS(d * 2 + 1, val);
+		LETFPS( d * 2 + 1,FPBITCAST32(val));
+	}
+
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+/* ----------------------------------------------------------------------- */
+/* VLDM */
+/* cond 110P UDW1 Rn-- Vd-- 101X imm8 imm8 */
+#define vfpinstr 	vldm
+#define vfpinstr_inst 	vldm_inst
+#define VFPLABEL_INST 	VLDM_INST
+#ifdef VFP_DECODE
+{"vldm",	3,	ARMVFP2,	25, 27, 0x6,	20, 20, 1,	9, 11, 0x5},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vldm",	0,	ARMVFP2,	0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vldm_inst {
+	unsigned int single;
+	unsigned int add;
+	unsigned int wback;
+	unsigned int d;
+	unsigned int n;
+	unsigned int imm32;
+	unsigned int regs;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+	VFP_DEBUG_TRANSLATE;
+	
+	arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+	vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+
+	inst_base->cond  = BITS(inst, 28, 31);
+	inst_base->idx     = index;
+	inst_base->br     = NON_BRANCH;
+	inst_base->load_r15 = 0;
+
+	inst_cream->single = BIT(inst, 8) == 0;
+	inst_cream->add    = BIT(inst, 23);
+	inst_cream->wback  = BIT(inst, 21);
+	inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+	inst_cream->n      = BITS(inst, 16, 19);
+	inst_cream->imm32  = BITS(inst, 0, 7)<<2;
+	inst_cream->regs   = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
+
+	return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+	INC_ICOUNTER;
+	if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+		CHECK_VFP_ENABLED;
+		
+		int i;
+		
+		vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+		
+		addr = (inst_cream->add ? cpu->Reg[inst_cream->n] : cpu->Reg[inst_cream->n] - inst_cream->imm32);
+		DBG("VLDM : addr[%x]\n", addr);
+		
+		for (i = 0; i < inst_cream->regs; i++)
+		{
+			if (inst_cream->single)
+			{
+				fault = check_address_validity(cpu, addr, &phys_addr, 1);
+				if (fault) goto MMU_EXCEPTION;
+				fault = interpreter_read_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d+i], 32);
+				if (fault) goto MMU_EXCEPTION;
+				DBG("\ts%d <= [%x] addr[%x]\n", inst_cream->d+i, cpu->ExtReg[inst_cream->d+i], addr);
+				addr += 4;
+			}
+			else
+			{
+				/* Careful of endianness, little by default */
+				fault = check_address_validity(cpu, addr, &phys_addr, 1);
+				if (fault) goto MMU_EXCEPTION;
+				fault = interpreter_read_memory(core, addr, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2], 32);
+				if (fault) goto MMU_EXCEPTION;
+
+				fault = check_address_validity(cpu, addr + 4, &phys_addr, 1);
+				if (fault) goto MMU_EXCEPTION;
+				fault = interpreter_read_memory(core, addr + 4, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2 + 1], 32);
+				if (fault) goto MMU_EXCEPTION;
+				DBG("\ts[%d-%d] <= [%x-%x] addr[%x-%x]\n", (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, cpu->ExtReg[(inst_cream->d+i)*2+1], cpu->ExtReg[(inst_cream->d+i)*2], addr+4, addr);
+				addr += 8;
+			}
+		}
+		if (inst_cream->wback){
+			cpu->Reg[inst_cream->n] = (inst_cream->add ? cpu->Reg[inst_cream->n] + inst_cream->imm32 : 
+						   cpu->Reg[inst_cream->n] - inst_cream->imm32);
+			DBG("\twback r%d[%x]\n", inst_cream->n, cpu->Reg[inst_cream->n]);
+		}
+
+	}
+	cpu->Reg[15] += GET_INST_SIZE(cpu);
+	INC_PC(sizeof(vfpinstr_inst));
+	FETCH_INST;
+	GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_LDC_TRANS
+/* Should be the last operation of LDC */
+return VLDM(state, type, instr, value);
+#endif
+#ifdef VFP_LDC_IMPL
+int VLDM(ARMul_State * state, int type, ARMword instr, ARMword value)
+{
+	static int i = 0;
+	static int single_regs, add, wback, d, n, imm32, regs;
+	if (type == ARMul_FIRST)
+	{
+		single_regs = BIT(8) == 0;	/* Single precision */
+		add = BIT(23);		/* */
+		wback = BIT(21);	/* write-back */
+		d = single_regs ? BITS(12, 15)<<1|BIT(22) : BIT(22)<<4|BITS(12, 15); /* Base register */
+		n = BITS(16, 19);	/* destination register */
+		imm32 = BITS(0,7) * 4;	/* may not be used */
+		regs = single_regs ? BITS(0, 7) : BITS(0, 7)>>1; /* FLDMX if regs is odd */
+
+		DBG("VLDM :\n");
+		
+		if (wback) {
+			state->Reg[n] = (add ? state->Reg[n] + imm32 : state->Reg[n] - imm32);
+			DBG("\twback r%d[%x]\n", n, state->Reg[n]);
+		}
+		
+		i = 0;
+		
+		return ARMul_DONE;
+	} 
+	else if (type == ARMul_DATA)
+	{
+		if (single_regs)
+		{
+			state->ExtReg[d + i] = value;
+			DBG("\ts%d <= [%x] addr[?]\n", d+i, state->ExtReg[d + i]);
+			i++;
+			if (i < regs)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+		else
+		{
+			/* FIXME Careful of endianness, may need to rework this */
+			state->ExtReg[d*2 + i] = value;
+			DBG("\ts[%d] <= [%x] addr[?]\n", d*2 + i, state->ExtReg[d*2 + i]);
+			i++;
+			if (i < regs*2)
+				return ARMul_INC;
+			else
+				return ARMul_DONE;
+		}
+	}
+
+	return -1;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+	int instr_size = INSTR_SIZE;
+	//DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+	//arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+	arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+	DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+	*tag |= TAG_NEW_BB;
+	if(instr >> 28 != 0xe)
+		*tag |= TAG_CONDITIONAL;
+
+	return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+	int single = BIT(8) == 0;
+	int add    = BIT(23);
+	int wback  = BIT(21);
+	int d      = single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|BIT(22)<<4;
+	int n      = BITS(16, 19);
+	int imm32  = BITS(0, 7)<<2;
+	int regs   = single ? BITS(0, 7) : BITS(1, 7);
+
+	Value* Addr = SELECT(CONST1(add), R(n), SUB(R(n), CONST(imm32)));
+	//if(single)
+	//	bb = arch_check_mm(cpu, bb, Addr, regs * 4, 1, cpu->dyncom_engine->bb_trap);
+	//else
+	//	bb = arch_check_mm(cpu, bb, Addr, regs * 4, 1, cpu->dyncom_engine->bb_trap);
+
+	DBG("VLDM \n");
+	int i;	
+	//Value* phys_addr;
+	Value* val;
+	for (i = 0; i < regs; i++)
+	{
+		if (single)
+		{
+			
+			//fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d+i], 32);
+			/* if R(i) is R15? */
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, Addr, 1);
+			bb = cpu->dyncom_engine->bb;
+			val = arch_read_memory(cpu,bb,phys_addr,0,32);
+			#endif
+			memory_read(cpu, bb, Addr, 0, 32);
+			bb = cpu->dyncom_engine->bb;
+			val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+			//LETS(d + i, val);
+			LETFPS(d + i, FPBITCAST32(val));
+			//if (fault) goto MMU_EXCEPTION;
+			//DBG("\taddr[%x] <= s%d=[%x]\n", addr, inst_cream->d+i, cpu->ExtReg[inst_cream->d+i]);
+			Addr = ADD(Addr, CONST(4));
+		}
+		else
+		{
+			#if 0	
+			phys_addr = get_phys_addr(cpu, bb, Addr, 1);
+			bb = cpu->dyncom_engine->bb;
+			val = arch_read_memory(cpu,bb,phys_addr,0,32);
+			#endif
+			memory_read(cpu, bb, Addr, 0, 32);
+			bb = cpu->dyncom_engine->bb;
+			val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+			LETFPS((d + i) * 2, FPBITCAST32(val));
+			#if 0
+			phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 1);
+			bb = cpu->dyncom_engine->bb;
+			val = arch_read_memory(cpu,bb,phys_addr,0,32);
+			#endif
+			memory_read(cpu, bb, Addr, 0, 32);
+			bb = cpu->dyncom_engine->bb;
+			val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+			LETFPS((d + i) * 2 + 1, FPBITCAST32(val));
+
+			//fault = interpreter_write_memory(core, addr + 4, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2 + 1], 32);
+			//DBG("\taddr[%x-%x] <= s[%d-%d]=[%x-%x]\n", addr+4, addr, (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, cpu->ExtReg[(inst_cream->d+i)*2+1], cpu->ExtReg[(inst_cream->d+i)*2]);
+			//addr += 8;
+			Addr = ADD(Addr, CONST(8));
+		}
+	}
+	if (wback){
+		//cpu->Reg[n] = (add ? cpu->Reg[n] + imm32 : 
+		//			   cpu->Reg[n] - imm32);
+		LET(n, SELECT(CONST1(add), ADD(R(n), CONST(imm32)), SUB(R(n), CONST(imm32))));
+		DBG("\twback r%d, add=%d, imm32=%d\n", n, add, imm32);
+	}
+	return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+
+#define VFP_DEBUG_TRANSLATE DBG("in func %s, %x\n", __FUNCTION__, inst);
+#define VFP_DEBUG_UNIMPLEMENTED(x) printf("in func %s, " #x " unimplemented\n", __FUNCTION__); exit(-1);
+#define VFP_DEBUG_UNTESTED(x) printf("in func %s, " #x " untested\n", __FUNCTION__);
+
+#define CHECK_VFP_ENABLED	
+	
+#define CHECK_VFP_CDP_RET	vfp_raise_exceptions(cpu, ret, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]); //if (ret == -1) {printf("VFP CDP FAILURE %x\n", inst_cream->instr); exit(-1);}
diff --git a/src/core/arm/skyeye_common/vfp/vfpsingle.cpp b/src/core/arm/skyeye_common/vfp/vfpsingle.cpp
new file mode 100644
index 000000000..8bcbd4fe9
--- /dev/null
+++ b/src/core/arm/skyeye_common/vfp/vfpsingle.cpp
@@ -0,0 +1,1278 @@
+/*
+    vfp/vfpsingle.c - ARM VFPv3 emulation unit - SoftFloat single instruction
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/*
+ * This code is derived in part from :
+ * - Android kernel
+ * - John R. Housers softfloat library, which
+ * carries the following notice:
+ *
+ * ===========================================================================
+ * This C source file is part of the SoftFloat IEC/IEEE Floating-point
+ * Arithmetic Package, Release 2.
+ *
+ * Written by John R. Hauser.  This work was made possible in part by the
+ * International Computer Science Institute, located at Suite 600, 1947 Center
+ * Street, Berkeley, California 94704.  Funding was partially provided by the
+ * National Science Foundation under grant MIP-9311980.  The original version
+ * of this code was written as part of a project to build a fixed-point vector
+ * processor in collaboration with the University of California at Berkeley,
+ * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+ * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
+ * arithmetic/softfloat.html'.
+ *
+ * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+ * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+ * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+ * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+ * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+ *
+ * Derivative works are acceptable, even for commercial purposes, so long as
+ * (1) they include prominent notice that the work is derivative, and (2) they
+ * include prominent notice akin to these three paragraphs for those parts of
+ * this code that are retained.
+ * ===========================================================================
+ */
+
+#include "core/arm/skyeye_common/vfp/vfp_helper.h"
+#include "core/arm/skyeye_common/vfp/asm_vfp.h"
+#include "core/arm/skyeye_common/vfp/vfp.h"
+
+static struct vfp_single vfp_single_default_qnan = {
+	//.exponent	= 255,
+	//.sign		= 0,
+	//.significand	= VFP_SINGLE_SIGNIFICAND_QNAN,
+};
+
+static void vfp_single_dump(const char *str, struct vfp_single *s)
+{
+	pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n",
+		 str, s->sign != 0, s->exponent, s->significand);
+}
+
+static void vfp_single_normalise_denormal(struct vfp_single *vs)
+{
+	int bits = 31 - vfp_fls(vs->significand);
+
+	vfp_single_dump("normalise_denormal: in", vs);
+
+	if (bits) {
+		vs->exponent -= bits - 1;
+		vs->significand <<= bits;
+	}
+
+	vfp_single_dump("normalise_denormal: out", vs);
+}
+
+
+u32 vfp_single_normaliseround(ARMul_State* state, int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func)
+{
+	u32 significand, incr, rmode;
+	int exponent, shift, underflow;
+
+	vfp_single_dump("pack: in", vs);
+
+	/*
+	 * Infinities and NaNs are a special case.
+	 */
+	if (vs->exponent == 255 && (vs->significand == 0 || exceptions))
+		goto pack;
+
+	/*
+	 * Special-case zero.
+	 */
+	if (vs->significand == 0) {
+		vs->exponent = 0;
+		goto pack;
+	}
+
+	exponent = vs->exponent;
+	significand = vs->significand;
+
+	/*
+	 * Normalise first.  Note that we shift the significand up to
+	 * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least
+	 * significant bit.
+	 */
+	shift = 32 - vfp_fls(significand);
+	if (shift < 32 && shift) {
+		exponent -= shift;
+		significand <<= shift;
+	}
+
+#if 1
+	vs->exponent = exponent;
+	vs->significand = significand;
+	vfp_single_dump("pack: normalised", vs);
+#endif
+
+	/*
+	 * Tiny number?
+	 */
+	underflow = exponent < 0;
+	if (underflow) {
+		significand = vfp_shiftright32jamming(significand, -exponent);
+		exponent = 0;
+#if 1
+		vs->exponent = exponent;
+		vs->significand = significand;
+		vfp_single_dump("pack: tiny number", vs);
+#endif
+		if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)))
+			underflow = 0;
+	}
+
+	/*
+	 * Select rounding increment.
+	 */
+	incr = 0;
+	rmode = fpscr & FPSCR_RMODE_MASK;
+
+	if (rmode == FPSCR_ROUND_NEAREST) {
+		incr = 1 << VFP_SINGLE_LOW_BITS;
+		if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0)
+			incr -= 1;
+	} else if (rmode == FPSCR_ROUND_TOZERO) {
+		incr = 0;
+	} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0))
+		incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1;
+
+	pr_debug("VFP: rounding increment = 0x%08x\n", incr);
+
+	/*
+	 * Is our rounding going to overflow?
+	 */
+	if ((significand + incr) < significand) {
+		exponent += 1;
+		significand = (significand >> 1) | (significand & 1);
+		incr >>= 1;
+#if 1
+		vs->exponent = exponent;
+		vs->significand = significand;
+		vfp_single_dump("pack: overflow", vs);
+#endif
+	}
+
+	/*
+	 * If any of the low bits (which will be shifted out of the
+	 * number) are non-zero, the result is inexact.
+	 */
+	if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))
+		exceptions |= FPSCR_IXC;
+
+	/*
+	 * Do our rounding.
+	 */
+	significand += incr;
+
+	/*
+	 * Infinity?
+	 */
+	if (exponent >= 254) {
+		exceptions |= FPSCR_OFC | FPSCR_IXC;
+		if (incr == 0) {
+			vs->exponent = 253;
+			vs->significand = 0x7fffffff;
+		} else {
+			vs->exponent = 255;		/* infinity */
+			vs->significand = 0;
+		}
+	} else {
+		if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0)
+			exponent = 0;
+		if (exponent || significand > 0x80000000)
+			underflow = 0;
+		if (underflow)
+			exceptions |= FPSCR_UFC;
+		vs->exponent = exponent;
+		vs->significand = significand >> 1;
+	}
+
+ pack:
+	vfp_single_dump("pack: final", vs);
+	{
+		s32 d = vfp_single_pack(vs);
+#if 1
+		pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func,
+			 sd, d, exceptions);
+#endif
+		vfp_put_float(state, d, sd);
+	}
+
+	return exceptions;
+}
+
+/*
+ * Propagate the NaN, setting exceptions if it is signalling.
+ * 'n' is always a NaN.  'm' may be a number, NaN or infinity.
+ */
+static u32
+vfp_propagate_nan(struct vfp_single *vsd, struct vfp_single *vsn,
+		  struct vfp_single *vsm, u32 fpscr)
+{
+	struct vfp_single *nan;
+	int tn, tm = 0;
+
+	tn = vfp_single_type(vsn);
+
+	if (vsm)
+		tm = vfp_single_type(vsm);
+
+	if (fpscr & FPSCR_DEFAULT_NAN)
+		/*
+		 * Default NaN mode - always returns a quiet NaN
+		 */
+		nan = &vfp_single_default_qnan;
+	else {
+		/*
+		 * Contemporary mode - select the first signalling
+		 * NAN, or if neither are signalling, the first
+		 * quiet NAN.
+		 */
+		if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
+			nan = vsn;
+		else
+			nan = vsm;
+		/*
+		 * Make the NaN quiet.
+		 */
+		nan->significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
+	}
+
+	*vsd = *nan;
+
+	/*
+	 * If one was a signalling NAN, raise invalid operation.
+	 */
+	return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
+}
+
+
+/*
+ * Extended operations
+ */
+static u32 vfp_single_fabs(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	vfp_put_float(state, vfp_single_packed_abs(m), sd);
+	return 0;
+}
+
+static u32 vfp_single_fcpy(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	vfp_put_float(state, m, sd);
+	return 0;
+}
+
+static u32 vfp_single_fneg(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	vfp_put_float(state, vfp_single_packed_negate(m), sd);
+	return 0;
+}
+
+static const u16 sqrt_oddadjust[] = {
+	0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0,
+	0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67
+};
+
+static const u16 sqrt_evenadjust[] = {
+	0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e,
+	0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002
+};
+
+u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand)
+{
+	int index;
+	u32 z, a;
+
+	if ((significand & 0xc0000000) != 0x40000000) {
+		pr_debug("VFP: estimate_sqrt: invalid significand\n");
+	}
+
+	a = significand << 1;
+	index = (a >> 27) & 15;
+	if (exponent & 1) {
+		z = 0x4000 + (a >> 17) - sqrt_oddadjust[index];
+		z = ((a / z) << 14) + (z << 15);
+		a >>= 1;
+	} else {
+		z = 0x8000 + (a >> 17) - sqrt_evenadjust[index];
+		z = a / z + z;
+		z = (z >= 0x20000) ? 0xffff8000 : (z << 15);
+		if (z <= a)
+			return (s32)a >> 1;
+	}
+	{
+		u64 v = (u64)a << 31;
+		do_div(v, z);
+		return v + (z >> 1);
+	}
+}
+
+static u32 vfp_single_fsqrt(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	struct vfp_single vsm, vsd, *vsp;
+	int ret, tm;
+
+	vfp_single_unpack(&vsm, m);
+	tm = vfp_single_type(&vsm);
+	if (tm & (VFP_NAN|VFP_INFINITY)) {
+		vsp = &vsd;
+
+		if (tm & VFP_NAN)
+			ret = vfp_propagate_nan(vsp, &vsm, NULL, fpscr);
+		else if (vsm.sign == 0) {
+ sqrt_copy:
+			vsp = &vsm;
+			ret = 0;
+		} else {
+ sqrt_invalid:
+			vsp = &vfp_single_default_qnan;
+			ret = FPSCR_IOC;
+		}
+		vfp_put_float(state, vfp_single_pack(vsp), sd);
+		return ret;
+	}
+
+	/*
+	 * sqrt(+/- 0) == +/- 0
+	 */
+	if (tm & VFP_ZERO)
+		goto sqrt_copy;
+
+	/*
+	 * Normalise a denormalised number
+	 */
+	if (tm & VFP_DENORMAL)
+		vfp_single_normalise_denormal(&vsm);
+
+	/*
+	 * sqrt(<0) = invalid
+	 */
+	if (vsm.sign)
+		goto sqrt_invalid;
+
+	vfp_single_dump("sqrt", &vsm);
+
+	/*
+	 * Estimate the square root.
+	 */
+	vsd.sign = 0;
+	vsd.exponent = ((vsm.exponent - 127) >> 1) + 127;
+	vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2;
+
+	vfp_single_dump("sqrt estimate", &vsd);
+
+	/*
+	 * And now adjust.
+	 */
+	if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) {
+		if (vsd.significand < 2) {
+			vsd.significand = 0xffffffff;
+		} else {
+			u64 term;
+			s64 rem;
+			vsm.significand <<= !(vsm.exponent & 1);
+			term = (u64)vsd.significand * vsd.significand;
+			rem = ((u64)vsm.significand << 32) - term;
+
+			pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem);
+
+			while (rem < 0) {
+				vsd.significand -= 1;
+				rem += ((u64)vsd.significand << 1) | 1;
+			}
+			vsd.significand |= rem != 0;
+		}
+	}
+	vsd.significand = vfp_shiftright32jamming(vsd.significand, 1);
+
+	return vfp_single_normaliseround(state, sd, &vsd, fpscr, 0, "fsqrt");
+}
+
+/*
+ * Equal	:= ZC
+ * Less than	:= N
+ * Greater than	:= C
+ * Unordered	:= CV
+ */
+static u32 vfp_compare(ARMul_State* state, int sd, int signal_on_qnan, s32 m, u32 fpscr)
+{
+	s32 d;
+	u32 ret = 0;
+
+	d = vfp_get_float(state, sd);
+	if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) {
+		ret |= FPSCR_C | FPSCR_V;
+		if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
+			/*
+			 * Signalling NaN, or signalling on quiet NaN
+			 */
+			ret |= FPSCR_IOC;
+	}
+
+	if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) {
+		ret |= FPSCR_C | FPSCR_V;
+		if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
+			/*
+			 * Signalling NaN, or signalling on quiet NaN
+			 */
+			ret |= FPSCR_IOC;
+	}
+
+	if (ret == 0) {
+		if (d == m || vfp_single_packed_abs(d | m) == 0) {
+			/*
+			 * equal
+			 */
+			ret |= FPSCR_Z | FPSCR_C;
+		} else if (vfp_single_packed_sign(d ^ m)) {
+			/*
+			 * different signs
+			 */
+			if (vfp_single_packed_sign(d))
+				/*
+				 * d is negative, so d < m
+				 */
+				ret |= FPSCR_N;
+			else
+				/*
+				 * d is positive, so d > m
+				 */
+				ret |= FPSCR_C;
+		} else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) {
+			/*
+			 * d < m
+			 */
+			ret |= FPSCR_N;
+		} else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) {
+			/*
+			 * d > m
+			 */
+			ret |= FPSCR_C;
+		}
+	}
+	return ret;
+}
+
+static u32 vfp_single_fcmp(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	return vfp_compare(state, sd, 0, m, fpscr);
+}
+
+static u32 vfp_single_fcmpe(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	return vfp_compare(state, sd, 1, m, fpscr);
+}
+
+static u32 vfp_single_fcmpz(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	return vfp_compare(state, sd, 0, 0, fpscr);
+}
+
+static u32 vfp_single_fcmpez(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	return vfp_compare(state, sd, 1, 0, fpscr);
+}
+
+static u32 vfp_single_fcvtd(ARMul_State* state, int dd, int unused, s32 m, u32 fpscr)
+{
+	struct vfp_single vsm;
+	struct vfp_double vdd;
+	int tm;
+	u32 exceptions = 0;
+
+	vfp_single_unpack(&vsm, m);
+
+	tm = vfp_single_type(&vsm);
+
+	/*
+	 * If we have a signalling NaN, signal invalid operation.
+	 */
+	if (tm == VFP_SNAN)
+		exceptions = FPSCR_IOC;
+
+	if (tm & VFP_DENORMAL)
+		vfp_single_normalise_denormal(&vsm);
+
+	vdd.sign = vsm.sign;
+	vdd.significand = (u64)vsm.significand << 32;
+
+	/*
+	 * If we have an infinity or NaN, the exponent must be 2047.
+	 */
+	if (tm & (VFP_INFINITY|VFP_NAN)) {
+		vdd.exponent = 2047;
+		if (tm == VFP_QNAN)
+			vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
+		goto pack_nan;
+	} else if (tm & VFP_ZERO)
+		vdd.exponent = 0;
+	else
+		vdd.exponent = vsm.exponent + (1023 - 127);
+
+	return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, "fcvtd");
+
+ pack_nan:
+	vfp_put_double(state, vfp_double_pack(&vdd), dd);
+	return exceptions;
+}
+
+static u32 vfp_single_fuito(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	struct vfp_single vs;
+
+	vs.sign = 0;
+	vs.exponent = 127 + 31 - 1;
+	vs.significand = (u32)m;
+
+	return vfp_single_normaliseround(state, sd, &vs, fpscr, 0, "fuito");
+}
+
+static u32 vfp_single_fsito(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	struct vfp_single vs;
+
+	vs.sign = (m & 0x80000000) >> 16;
+	vs.exponent = 127 + 31 - 1;
+	vs.significand = vs.sign ? -m : m;
+
+	return vfp_single_normaliseround(state, sd, &vs, fpscr, 0, "fsito");
+}
+
+static u32 vfp_single_ftoui(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	struct vfp_single vsm;
+	u32 d, exceptions = 0;
+	int rmode = fpscr & FPSCR_RMODE_MASK;
+	int tm;
+
+	vfp_single_unpack(&vsm, m);
+	vfp_single_dump("VSM", &vsm);
+
+	/*
+	 * Do we have a denormalised number?
+	 */
+	tm = vfp_single_type(&vsm);
+	if (tm & VFP_DENORMAL)
+		exceptions |= FPSCR_IDC;
+
+	if (tm & VFP_NAN)
+		vsm.sign = 0;
+
+	if (vsm.exponent >= 127 + 32) {
+		d = vsm.sign ? 0 : 0xffffffff;
+		exceptions = FPSCR_IOC;
+	} else if (vsm.exponent >= 127 - 1) {
+		int shift = 127 + 31 - vsm.exponent;
+		u32 rem, incr = 0;
+
+		/*
+		 * 2^0 <= m < 2^32-2^8
+		 */
+		d = (vsm.significand << 1) >> shift;
+		rem = vsm.significand << (33 - shift);
+
+		if (rmode == FPSCR_ROUND_NEAREST) {
+			incr = 0x80000000;
+			if ((d & 1) == 0)
+				incr -= 1;
+		} else if (rmode == FPSCR_ROUND_TOZERO) {
+			incr = 0;
+		} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
+			incr = ~0;
+		}
+
+		if ((rem + incr) < rem) {
+			if (d < 0xffffffff)
+				d += 1;
+			else
+				exceptions |= FPSCR_IOC;
+		}
+
+		if (d && vsm.sign) {
+			d = 0;
+			exceptions |= FPSCR_IOC;
+		} else if (rem)
+			exceptions |= FPSCR_IXC;
+	} else {
+		d = 0;
+		if (vsm.exponent | vsm.significand) {
+			exceptions |= FPSCR_IXC;
+			if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
+				d = 1;
+			else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) {
+				d = 0;
+				exceptions |= FPSCR_IOC;
+			}
+		}
+	}
+
+	pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
+
+	vfp_put_float(state, d, sd);
+
+	return exceptions;
+}
+
+static u32 vfp_single_ftouiz(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	return vfp_single_ftoui(state, sd, unused, m, FPSCR_ROUND_TOZERO);
+}
+
+static u32 vfp_single_ftosi(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	struct vfp_single vsm;
+	u32 d, exceptions = 0;
+	int rmode = fpscr & FPSCR_RMODE_MASK;
+	int tm;
+
+	vfp_single_unpack(&vsm, m);
+	vfp_single_dump("VSM", &vsm);
+
+	/*
+	 * Do we have a denormalised number?
+	 */
+	tm = vfp_single_type(&vsm);
+	if (vfp_single_type(&vsm) & VFP_DENORMAL)
+		exceptions |= FPSCR_IDC;
+
+	if (tm & VFP_NAN) {
+		d = 0;
+		exceptions |= FPSCR_IOC;
+	} else if (vsm.exponent >= 127 + 32) {
+		/*
+		 * m >= 2^31-2^7: invalid
+		 */
+		d = 0x7fffffff;
+		if (vsm.sign)
+			d = ~d;
+		exceptions |= FPSCR_IOC;
+	} else if (vsm.exponent >= 127 - 1) {
+		int shift = 127 + 31 - vsm.exponent;
+		u32 rem, incr = 0;
+
+		/* 2^0 <= m <= 2^31-2^7 */
+		d = (vsm.significand << 1) >> shift;
+		rem = vsm.significand << (33 - shift);
+
+		if (rmode == FPSCR_ROUND_NEAREST) {
+			incr = 0x80000000;
+			if ((d & 1) == 0)
+				incr -= 1;
+		} else if (rmode == FPSCR_ROUND_TOZERO) {
+			incr = 0;
+		} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
+			incr = ~0;
+		}
+
+		if ((rem + incr) < rem && d < 0xffffffff)
+			d += 1;
+		if (d > 0x7fffffff + (vsm.sign != 0)) {
+			d = 0x7fffffff + (vsm.sign != 0);
+			exceptions |= FPSCR_IOC;
+		} else if (rem)
+			exceptions |= FPSCR_IXC;
+
+		if (vsm.sign)
+			d = -d;
+	} else {
+		d = 0;
+		if (vsm.exponent | vsm.significand) {
+			exceptions |= FPSCR_IXC;
+			if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
+				d = 1;
+			else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign)
+				d = -1;
+		}
+	}
+
+	pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
+
+	vfp_put_float(state, (s32)d, sd);
+
+	return exceptions;
+}
+
+static u32 vfp_single_ftosiz(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+	return vfp_single_ftosi(state, sd, unused, m, FPSCR_ROUND_TOZERO);
+}
+
+static struct op fops_ext[] = {
+    { vfp_single_fcpy,   0 },                 //0x00000000 - FEXT_FCPY
+    { vfp_single_fabs,   0 },                 //0x00000001 - FEXT_FABS
+    { vfp_single_fneg,   0 },                 //0x00000002 - FEXT_FNEG
+    { vfp_single_fsqrt,  0 },                 //0x00000003 - FEXT_FSQRT
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { vfp_single_fcmp,   OP_SCALAR },         //0x00000008 - FEXT_FCMP
+    { vfp_single_fcmpe,  OP_SCALAR },         //0x00000009 - FEXT_FCMPE
+    { vfp_single_fcmpz,  OP_SCALAR },         //0x0000000A - FEXT_FCMPZ
+    { vfp_single_fcmpez, OP_SCALAR },         //0x0000000B - FEXT_FCMPEZ
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { vfp_single_fcvtd,  OP_SCALAR|OP_DD },   //0x0000000F - FEXT_FCVT
+    { vfp_single_fuito,  OP_SCALAR },         //0x00000010 - FEXT_FUITO
+    { vfp_single_fsito,  OP_SCALAR },         //0x00000011 - FEXT_FSITO
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { vfp_single_ftoui,  OP_SCALAR },         //0x00000018 - FEXT_FTOUI
+    { vfp_single_ftouiz, OP_SCALAR },         //0x00000019 - FEXT_FTOUIZ
+    { vfp_single_ftosi,  OP_SCALAR },         //0x0000001A - FEXT_FTOSI
+    { vfp_single_ftosiz, OP_SCALAR },         //0x0000001B - FEXT_FTOSIZ
+};
+
+
+
+
+
+static u32
+vfp_single_fadd_nonnumber(struct vfp_single *vsd, struct vfp_single *vsn,
+			  struct vfp_single *vsm, u32 fpscr)
+{
+	struct vfp_single *vsp;
+	u32 exceptions = 0;
+	int tn, tm;
+
+	tn = vfp_single_type(vsn);
+	tm = vfp_single_type(vsm);
+
+	if (tn & tm & VFP_INFINITY) {
+		/*
+		 * Two infinities.  Are they different signs?
+		 */
+		if (vsn->sign ^ vsm->sign) {
+			/*
+			 * different signs -> invalid
+			 */
+			exceptions = FPSCR_IOC;
+			vsp = &vfp_single_default_qnan;
+		} else {
+			/*
+			 * same signs -> valid
+			 */
+			vsp = vsn;
+		}
+	} else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
+		/*
+		 * One infinity and one number -> infinity
+		 */
+		vsp = vsn;
+	} else {
+		/*
+		 * 'n' is a NaN of some type
+		 */
+		return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
+	}
+	*vsd = *vsp;
+	return exceptions;
+}
+
+static u32
+vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn,
+	       struct vfp_single *vsm, u32 fpscr)
+{
+	u32 exp_diff, m_sig;
+
+	if (vsn->significand & 0x80000000 ||
+	    vsm->significand & 0x80000000) {
+		pr_info("VFP: bad FP values\n");
+		vfp_single_dump("VSN", vsn);
+		vfp_single_dump("VSM", vsm);
+	}
+
+	/*
+	 * Ensure that 'n' is the largest magnitude number.  Note that
+	 * if 'n' and 'm' have equal exponents, we do not swap them.
+	 * This ensures that NaN propagation works correctly.
+	 */
+	if (vsn->exponent < vsm->exponent) {
+		struct vfp_single *t = vsn;
+		vsn = vsm;
+		vsm = t;
+	}
+
+	/*
+	 * Is 'n' an infinity or a NaN?  Note that 'm' may be a number,
+	 * infinity or a NaN here.
+	 */
+	if (vsn->exponent == 255)
+		return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr);
+
+	/*
+	 * We have two proper numbers, where 'vsn' is the larger magnitude.
+	 *
+	 * Copy 'n' to 'd' before doing the arithmetic.
+	 */
+	*vsd = *vsn;
+
+	/*
+	 * Align both numbers.
+	 */
+	exp_diff = vsn->exponent - vsm->exponent;
+	m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff);
+
+	/*
+	 * If the signs are different, we are really subtracting.
+	 */
+	if (vsn->sign ^ vsm->sign) {
+		m_sig = vsn->significand - m_sig;
+		if ((s32)m_sig < 0) {
+			vsd->sign = vfp_sign_negate(vsd->sign);
+			m_sig = -m_sig;
+		} else if (m_sig == 0) {
+			vsd->sign = (fpscr & FPSCR_RMODE_MASK) ==
+				      FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
+		}
+	} else {
+		m_sig = vsn->significand + m_sig;
+	}
+	vsd->significand = m_sig;
+
+	return 0;
+}
+
+static u32
+vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_single *vsm, u32 fpscr)
+{
+	vfp_single_dump("VSN", vsn);
+	vfp_single_dump("VSM", vsm);
+
+	/*
+	 * Ensure that 'n' is the largest magnitude number.  Note that
+	 * if 'n' and 'm' have equal exponents, we do not swap them.
+	 * This ensures that NaN propagation works correctly.
+	 */
+	if (vsn->exponent < vsm->exponent) {
+		struct vfp_single *t = vsn;
+		vsn = vsm;
+		vsm = t;
+		pr_debug("VFP: swapping M <-> N\n");
+	}
+
+	vsd->sign = vsn->sign ^ vsm->sign;
+
+	/*
+	 * If 'n' is an infinity or NaN, handle it.  'm' may be anything.
+	 */
+	if (vsn->exponent == 255) {
+		if (vsn->significand || (vsm->exponent == 255 && vsm->significand))
+			return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
+		if ((vsm->exponent | vsm->significand) == 0) {
+			*vsd = vfp_single_default_qnan;
+			return FPSCR_IOC;
+		}
+		vsd->exponent = vsn->exponent;
+		vsd->significand = 0;
+		return 0;
+	}
+
+	/*
+	 * If 'm' is zero, the result is always zero.  In this case,
+	 * 'n' may be zero or a number, but it doesn't matter which.
+	 */
+	if ((vsm->exponent | vsm->significand) == 0) {
+		vsd->exponent = 0;
+		vsd->significand = 0;
+		return 0;
+	}
+
+	/*
+	 * We add 2 to the destination exponent for the same reason as
+	 * the addition case - though this time we have +1 from each
+	 * input operand.
+	 */
+	vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2;
+	vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand);
+
+	vfp_single_dump("VSD", vsd);
+	return 0;
+}
+
+#define NEG_MULTIPLY	(1 << 0)
+#define NEG_SUBTRACT	(1 << 1)
+
+static u32
+vfp_single_multiply_accumulate(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr, u32 negate, const char *func)
+{
+	struct vfp_single vsd, vsp, vsn, vsm;
+	u32 exceptions;
+	s32 v;
+
+	v = vfp_get_float(state, sn);
+	pr_debug("VFP: s%u = %08x\n", sn, v);
+	vfp_single_unpack(&vsn, v);
+	if (vsn.exponent == 0 && vsn.significand)
+		vfp_single_normalise_denormal(&vsn);
+
+	vfp_single_unpack(&vsm, m);
+	if (vsm.exponent == 0 && vsm.significand)
+		vfp_single_normalise_denormal(&vsm);
+
+	exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr);
+	if (negate & NEG_MULTIPLY)
+		vsp.sign = vfp_sign_negate(vsp.sign);
+
+	v = vfp_get_float(state, sd);
+	pr_debug("VFP: s%u = %08x\n", sd, v);
+	vfp_single_unpack(&vsn, v);
+	if (negate & NEG_SUBTRACT)
+		vsn.sign = vfp_sign_negate(vsn.sign);
+
+	exceptions |= vfp_single_add(&vsd, &vsn, &vsp, fpscr);
+
+	return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, func);
+}
+
+/*
+ * Standard operations
+ */
+
+/*
+ * sd = sd + (sn * sm)
+ */
+static u32 vfp_single_fmac(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+	pr_debug("In %sVFP: s%u = %08x\n", __FUNCTION__, sn, sd);
+	return vfp_single_multiply_accumulate(state, sd, sn, m, fpscr, 0, "fmac");
+}
+
+/*
+ * sd = sd - (sn * sm)
+ */
+static u32 vfp_single_fnmac(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+	pr_debug("In %sVFP: s%u = %08x\n", __FUNCTION__, sd, sn);
+	return vfp_single_multiply_accumulate(state, sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac");
+}
+
+/*
+ * sd = -sd + (sn * sm)
+ */
+static u32 vfp_single_fmsc(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+	pr_debug("In %sVFP: s%u = %08x\n", __FUNCTION__, sn, sd);
+	return vfp_single_multiply_accumulate(state, sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc");
+}
+
+/*
+ * sd = -sd - (sn * sm)
+ */
+static u32 vfp_single_fnmsc(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+	pr_debug("In %sVFP: s%u = %08x\n", __FUNCTION__, sn, sd);
+	return vfp_single_multiply_accumulate(state, sd, sn, m, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
+}
+
+/*
+ * sd = sn * sm
+ */
+static u32 vfp_single_fmul(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+	struct vfp_single vsd, vsn, vsm;
+	u32 exceptions;
+	s32 n = vfp_get_float(state, sn);
+
+	pr_debug("In %sVFP: s%u = %08x\n", __FUNCTION__, sn, n);
+
+	vfp_single_unpack(&vsn, n);
+	if (vsn.exponent == 0 && vsn.significand)
+		vfp_single_normalise_denormal(&vsn);
+
+	vfp_single_unpack(&vsm, m);
+	if (vsm.exponent == 0 && vsm.significand)
+		vfp_single_normalise_denormal(&vsm);
+
+	exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
+	return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, "fmul");
+}
+
+/*
+ * sd = -(sn * sm)
+ */
+static u32 vfp_single_fnmul(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+	struct vfp_single vsd, vsn, vsm;
+	u32 exceptions;
+	s32 n = vfp_get_float(state, sn);
+
+	pr_debug("VFP: s%u = %08x\n", sn, n);
+
+	vfp_single_unpack(&vsn, n);
+	if (vsn.exponent == 0 && vsn.significand)
+		vfp_single_normalise_denormal(&vsn);
+
+	vfp_single_unpack(&vsm, m);
+	if (vsm.exponent == 0 && vsm.significand)
+		vfp_single_normalise_denormal(&vsm);
+
+	exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
+	vsd.sign = vfp_sign_negate(vsd.sign);
+	return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, "fnmul");
+}
+
+/*
+ * sd = sn + sm
+ */
+static u32 vfp_single_fadd(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+	struct vfp_single vsd, vsn, vsm;
+	u32 exceptions;
+	s32 n = vfp_get_float(state, sn);
+
+	pr_debug("VFP: s%u = %08x\n", sn, n);
+
+	/*
+	 * Unpack and normalise denormals.
+	 */
+	vfp_single_unpack(&vsn, n);
+	if (vsn.exponent == 0 && vsn.significand)
+		vfp_single_normalise_denormal(&vsn);
+
+	vfp_single_unpack(&vsm, m);
+	if (vsm.exponent == 0 && vsm.significand)
+		vfp_single_normalise_denormal(&vsm);
+
+	exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr);
+
+	return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, "fadd");
+}
+
+/*
+ * sd = sn - sm
+ */
+static u32 vfp_single_fsub(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+	pr_debug("In %sVFP: s%u = %08x\n", __FUNCTION__, sn, sd);
+	/*
+	 * Subtraction is addition with one sign inverted.
+	 */
+	return vfp_single_fadd(state, sd, sn, vfp_single_packed_negate(m), fpscr);
+}
+
+/*
+ * sd = sn / sm
+ */
+static u32 vfp_single_fdiv(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+	struct vfp_single vsd, vsn, vsm;
+	u32 exceptions = 0;
+	s32 n = vfp_get_float(state, sn);
+	int tm, tn;
+
+	pr_debug("VFP: s%u = %08x\n", sn, n);
+
+	vfp_single_unpack(&vsn, n);
+	vfp_single_unpack(&vsm, m);
+
+	vsd.sign = vsn.sign ^ vsm.sign;
+
+	tn = vfp_single_type(&vsn);
+	tm = vfp_single_type(&vsm);
+
+	/*
+	 * Is n a NAN?
+	 */
+	if (tn & VFP_NAN)
+		goto vsn_nan;
+
+	/*
+	 * Is m a NAN?
+	 */
+	if (tm & VFP_NAN)
+		goto vsm_nan;
+
+	/*
+	 * If n and m are infinity, the result is invalid
+	 * If n and m are zero, the result is invalid
+	 */
+	if (tm & tn & (VFP_INFINITY|VFP_ZERO))
+		goto invalid;
+
+	/*
+	 * If n is infinity, the result is infinity
+	 */
+	if (tn & VFP_INFINITY)
+		goto infinity;
+
+	/*
+	 * If m is zero, raise div0 exception
+	 */
+	if (tm & VFP_ZERO)
+		goto divzero;
+
+	/*
+	 * If m is infinity, or n is zero, the result is zero
+	 */
+	if (tm & VFP_INFINITY || tn & VFP_ZERO)
+		goto zero;
+
+	if (tn & VFP_DENORMAL)
+		vfp_single_normalise_denormal(&vsn);
+	if (tm & VFP_DENORMAL)
+		vfp_single_normalise_denormal(&vsm);
+
+	/*
+	 * Ok, we have two numbers, we can perform division.
+	 */
+	vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1;
+	vsm.significand <<= 1;
+	if (vsm.significand <= (2 * vsn.significand)) {
+		vsn.significand >>= 1;
+		vsd.exponent++;
+	}
+	{
+		u64 significand = (u64)vsn.significand << 32;
+		do_div(significand, vsm.significand);
+		vsd.significand = significand;
+	}
+	if ((vsd.significand & 0x3f) == 0)
+		vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32);
+
+	return vfp_single_normaliseround(state, sd, &vsd, fpscr, 0, "fdiv");
+
+ vsn_nan:
+	exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr);
+ pack:
+	vfp_put_float(state, vfp_single_pack(&vsd), sd);
+	return exceptions;
+
+ vsm_nan:
+	exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr);
+	goto pack;
+
+ zero:
+	vsd.exponent = 0;
+	vsd.significand = 0;
+	goto pack;
+
+ divzero:
+	exceptions = FPSCR_DZC;
+ infinity:
+	vsd.exponent = 255;
+	vsd.significand = 0;
+	goto pack;
+
+ invalid:
+	vfp_put_float(state, vfp_single_pack(&vfp_single_default_qnan), sd);
+	return FPSCR_IOC;
+}
+
+static struct op fops[] = {
+	{ vfp_single_fmac,  0 },
+	{ vfp_single_fmsc,  0 },
+	{ vfp_single_fmul,  0 },
+	{ vfp_single_fadd,  0 },
+	{ vfp_single_fnmac, 0 },
+	{ vfp_single_fnmsc, 0 },
+	{ vfp_single_fnmul, 0 },
+	{ vfp_single_fsub,  0 },
+	{ vfp_single_fdiv,  0 },
+};
+
+#define FREG_BANK(x)	((x) & 0x18)
+#define FREG_IDX(x)	((x) & 7)
+
+u32 vfp_single_cpdo(ARMul_State* state, u32 inst, u32 fpscr)
+{
+	u32 op = inst & FOP_MASK;
+	u32 exceptions = 0;
+	unsigned int dest;
+	unsigned int sn = vfp_get_sn(inst);
+	unsigned int sm = vfp_get_sm(inst);
+	unsigned int vecitr, veclen, vecstride;
+	struct op *fop;
+	pr_debug("In %s\n", __FUNCTION__);
+
+	vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK);
+
+	fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)];
+
+	/*
+	 * fcvtsd takes a dN register number as destination, not sN.
+	 * Technically, if bit 0 of dd is set, this is an invalid
+	 * instruction.  However, we ignore this for efficiency.
+	 * It also only operates on scalars.
+	 */
+	if (fop->flags & OP_DD)
+		dest = vfp_get_dd(inst);
+	else
+		dest = vfp_get_sd(inst);
+
+	/*
+	 * If destination bank is zero, vector length is always '1'.
+	 * ARM DDI0100F C5.1.3, C5.3.2.
+	 */
+	if ((fop->flags & OP_SCALAR) || FREG_BANK(dest) == 0)
+		veclen = 0;
+	else
+		veclen = fpscr & FPSCR_LENGTH_MASK;
+
+	pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
+		 (veclen >> FPSCR_LENGTH_BIT) + 1);
+
+	if (!fop->fn) {
+		printf("VFP: could not find single op %d, inst=0x%x@0x%x\n", FEXT_TO_IDX(inst), inst, state->Reg[15]);
+		exit(-1);
+		goto invalid;
+	}
+
+	for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
+		s32 m = vfp_get_float(state, sm);
+		u32 except;
+		char type;
+
+		type = fop->flags & OP_DD ? 'd' : 's';
+		if (op == FOP_EXT)
+			pr_debug("VFP: itr%d (%c%u) = op[%u] (s%u=%08x)\n",
+				 vecitr >> FPSCR_LENGTH_BIT, type, dest, sn,
+				 sm, m);
+		else
+			pr_debug("VFP: itr%d (%c%u) = (s%u) op[%u] (s%u=%08x)\n",
+				 vecitr >> FPSCR_LENGTH_BIT, type, dest, sn,
+				 FOP_TO_IDX(op), sm, m);
+
+		except = fop->fn(state, dest, sn, m, fpscr);
+		pr_debug("VFP: itr%d: exceptions=%08x\n",
+			 vecitr >> FPSCR_LENGTH_BIT, except);
+
+		exceptions |= except;
+
+		/*
+		 * CHECK: It appears to be undefined whether we stop when
+		 * we encounter an exception.  We continue.
+		 */
+		dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 7);
+		sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7);
+		if (FREG_BANK(sm) != 0)
+			sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7);
+	}
+	return exceptions;
+
+ invalid:
+	return (u32)-1;
+}