Ingenic Jz4740:

* Clean up header file a bit * Add information about the IPU * Add original license * Add Ingenic Media Extension Instruction Set header file git-svn-id: svn://svn.rockbox.org/rockbox/trunk@19621 a1c6a512-1295-4272-9138-f99709370657
author: Maurus Cuelenaere <mcuelenaere@gmail.com> 2008-12-31 01:57:07 +0000
committer: Maurus Cuelenaere <mcuelenaere@gmail.com> 2008-12-31 01:57:07 +0000
commit: 54919ae9176bd9cbffc8412f0c831f471b8ac4d5 (patch)
tree: 3022bfec4d40b896ac889a7df8ac6a1d31aa2ef1
parent: e25505daa48d02f7696f5bd225a60d7bc137b4f7 (diff)
download: rockbox-54919ae9176bd9cbffc8412f0c831f471b8ac4d5.tar.gz
rockbox-54919ae9176bd9cbffc8412f0c831f471b8ac4d5.zip
2 files changed, 2064 insertions, 51 deletions
diff --git a/firmware/export/jz4740.h b/firmware/export/jz4740.h
index 7fbfba283f..4b6008a9ac 100644
--- a/firmware/export/jz4740.h
+++ b/firmware/export/jz4740.h
@@ -1,3 +1,39 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2008 by Maurus Cuelenaere
+ * Copyright (C) 2006-2007 by Ingenic Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+/*
+ *  linux/include/asm-mips/mach-jz4740/jz4740.h
+ *
+ *  JZ4740 common definition.
+ *
+ *  Copyright (C) 2006 - 2007 Ingenic Semiconductor Inc.
+ *
+ *  Author: <lhhuang@ingenic.cn>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
 /*
 * Include file for Ingenic Semiconductor's JZ4740 CPU.
 */
@@ -6,50 +42,50 @@
 #ifndef __ASSEMBLY__
-#define REG8(addr)    (*(volatile unsigned char *)(addr))
+#define REG8(addr)     (*(volatile unsigned char  *)(addr))
 #define REG16(addr)    (*(volatile unsigned short *)(addr))
-#define REG32(addr)    (*(volatile unsigned int *)(addr))
+#define REG32(addr)    (*(volatile unsigned int   *)(addr))
 #endif /* !ASSEMBLY */
-//----------------------------------------------------------------------
+/*************************************************************************
-// Boot ROM Specification
+ *  Boot ROM Specification
-//
+ */
 /* NOR Boot config */
 #define JZ4740_NORBOOT_8BIT    0x00000000    /* 8-bit data bus flash */
-#define JZ4740_NORBOOT_16BIT    0x10101010    /* 16-bit data bus flash */
+#define JZ4740_NORBOOT_16BIT   0x10101010    /* 16-bit data bus flash */
-#define JZ4740_NORBOOT_32BIT    0x20202020    /* 32-bit data bus flash */
+#define JZ4740_NORBOOT_32BIT   0x20202020    /* 32-bit data bus flash */
 /* NAND Boot config */
 #define JZ4740_NANDBOOT_B8R3    0xffffffff    /* 8-bit bus & 3 row cycles */
 #define JZ4740_NANDBOOT_B8R2    0xf0f0f0f0    /* 8-bit bus & 2 row cycles */
-#define JZ4740_NANDBOOT_B16R3    0x0f0f0f0f    /* 16-bit bus & 3 row cycles */
+#define JZ4740_NANDBOOT_B16R3   0x0f0f0f0f    /* 16-bit bus & 3 row cycles */
-#define JZ4740_NANDBOOT_B16R2    0x00000000    /* 16-bit bus & 2 row cycles */
+#define JZ4740_NANDBOOT_B16R2   0x00000000    /* 16-bit bus & 2 row cycles */
-//----------------------------------------------------------------------
+/*************************************************************************
-// Register Definitions
+ *  Register Definitions
-//
+ */
 #define    CPM_BASE    0xB0000000
-#define    INTC_BASE    0xB0001000
+#define    INTC_BASE   0xB0001000
 #define    TCU_BASE    0xB0002000
 #define    WDT_BASE    0xB0002000
 #define    RTC_BASE    0xB0003000
-#define    GPIO_BASE    0xB0010000
+#define    GPIO_BASE   0xB0010000
 #define    AIC_BASE    0xB0020000
-#define    ICDC_BASE    0xB0020000
+#define    ICDC_BASE   0xB0020000
 #define    MSC_BASE    0xB0021000
-#define    UART0_BASE    0xB0030000
+#define    UART0_BASE  0xB0030000
 #define    I2C_BASE    0xB0042000
 #define    SSI_BASE    0xB0043000
-#define    SADC_BASE    0xB0070000
+#define    SADC_BASE   0xB0070000
 #define    EMC_BASE    0xB3010000
-#define    DMAC_BASE    0xB3020000
+#define    DMAC_BASE   0xB3020000
 #define    UHC_BASE    0xB3030000
 #define    UDC_BASE    0xB3040000
 #define    LCD_BASE    0xB3050000
-#define    SLCD_BASE    0xB3050000
+#define    SLCD_BASE   0xB3050000
 #define    CIM_BASE    0xB3060000
 #define    ETH_BASE    0xB3100000
@@ -59,43 +95,43 @@
 *************************************************************************/
 #define INTC_ISR    (INTC_BASE + 0x00)
 #define INTC_IMR    (INTC_BASE + 0x04)
-#define INTC_IMSR    (INTC_BASE + 0x08)
+#define INTC_IMSR   (INTC_BASE + 0x08)
-#define INTC_IMCR    (INTC_BASE + 0x0c)
+#define INTC_IMCR   (INTC_BASE + 0x0c)
 #define INTC_IPR    (INTC_BASE + 0x10)
 #define REG_INTC_ISR    REG32(INTC_ISR)
 #define REG_INTC_IMR    REG32(INTC_IMR)
-#define REG_INTC_IMSR    REG32(INTC_IMSR)
+#define REG_INTC_IMSR   REG32(INTC_IMSR)
-#define REG_INTC_IMCR    REG32(INTC_IMCR)
+#define REG_INTC_IMCR   REG32(INTC_IMCR)
 #define REG_INTC_IPR    REG32(INTC_IPR)
 // 1st-level interrupts
 #define IRQ_I2C        1
-#define IRQ_EMC   2
+#define IRQ_EMC        2
 #define IRQ_UHC        3
-#define IRQ_UART0    9
+#define IRQ_UART0      9
-#define IRQ_SADC    12
+#define IRQ_SADC       12
 #define IRQ_MSC        14
 #define IRQ_RTC        15
 #define IRQ_SSI        16
 #define IRQ_CIM        17
 #define IRQ_AIC        18
 #define IRQ_ETH        19
-#define IRQ_DMAC    20
+#define IRQ_DMAC       20
-#define IRQ_TCU2    21
+#define IRQ_TCU2       21
-#define IRQ_TCU1    22
+#define IRQ_TCU1       22
-#define IRQ_TCU0    23
+#define IRQ_TCU0       23
-#define IRQ_UDC     24
+#define IRQ_UDC        24
-#define IRQ_GPIO3    25
+#define IRQ_GPIO3      25
-#define IRQ_GPIO2    26
+#define IRQ_GPIO2      26
-#define IRQ_GPIO1    27
+#define IRQ_GPIO1      27
-#define IRQ_GPIO0    28
+#define IRQ_GPIO0      28
 #define IRQ_IPU        29
 #define IRQ_LCD        30
 // 2nd-level interrupts
-#define IRQ_DMA_0    32  /* 32 to 37 for DMAC channel 0 to 5 */
+#define IRQ_DMA_0      32  /* 32 to 37 for DMAC channel 0 to 5 */
-#define IRQ_GPIO_0    48  /* 48 to 175 for GPIO pin 0 to 127 */
+#define IRQ_GPIO_0     48  /* 48 to 175 for GPIO pin 0 to 127 */
 /*************************************************************************
@@ -103,15 +139,15 @@
 *************************************************************************/
 #define RTC_RCR        (RTC_BASE + 0x00) /* RTC Control Register */
 #define RTC_RSR        (RTC_BASE + 0x04) /* RTC Second Register */
-#define RTC_RSAR    (RTC_BASE + 0x08) /* RTC Second Alarm Register */
+#define RTC_RSAR       (RTC_BASE + 0x08) /* RTC Second Alarm Register */
 #define RTC_RGR        (RTC_BASE + 0x0c) /* RTC Regulator Register */
 #define RTC_HCR        (RTC_BASE + 0x20) /* Hibernate Control Register */
-#define RTC_HWFCR    (RTC_BASE + 0x24) /* Hibernate Wakeup Filter Counter Reg */
+#define RTC_HWFCR      (RTC_BASE + 0x24) /* Hibernate Wakeup Filter Counter Reg */
-#define RTC_HRCR    (RTC_BASE + 0x28) /* Hibernate Reset Counter Register */
+#define RTC_HRCR       (RTC_BASE + 0x28) /* Hibernate Reset Counter Register */
-#define RTC_HWCR    (RTC_BASE + 0x2c) /* Hibernate Wakeup Control Register */
+#define RTC_HWCR       (RTC_BASE + 0x2c) /* Hibernate Wakeup Control Register */
-#define RTC_HWRSR    (RTC_BASE + 0x30) /* Hibernate Wakeup Status Register */
+#define RTC_HWRSR      (RTC_BASE + 0x30) /* Hibernate Wakeup Status Register */
-#define RTC_HSPR    (RTC_BASE + 0x34) /* Hibernate Scratch Pattern Register */
+#define RTC_HSPR       (RTC_BASE + 0x34) /* Hibernate Scratch Pattern Register */
 #define REG_RTC_RCR    REG32(RTC_RCR)
 #define REG_RTC_RSR    REG32(RTC_RSR)
@@ -1447,14 +1483,14 @@
  #define SSI_CR1_RTRG_120      (15<< SSI_CR1_RTRG_BIT)
 #define SSI_CR1_FLEN_BIT    4
 #define SSI_CR1_FLEN_MASK    (0xf << SSI_CR1_FLEN_BIT)
-  #define SSI_CR1_FLEN_2BIT      (0x0 << SSI_CR1_FLEN_BIT)
+  #define SSI_CR1_FLEN_2BIT       (0x0 << SSI_CR1_FLEN_BIT)
-  #define SSI_CR1_FLEN_3BIT      (0x1 << SSI_CR1_FLEN_BIT)
+  #define SSI_CR1_FLEN_3BIT       (0x1 << SSI_CR1_FLEN_BIT)
-  #define SSI_CR1_FLEN_4BIT      (0x2 << SSI_CR1_FLEN_BIT)
+  #define SSI_CR1_FLEN_4BIT       (0x2 << SSI_CR1_FLEN_BIT)
-  #define SSI_CR1_FLEN_5BIT      (0x3 << SSI_CR1_FLEN_BIT)
+  #define SSI_CR1_FLEN_5BIT       (0x3 << SSI_CR1_FLEN_BIT)
-  #define SSI_CR1_FLEN_6BIT      (0x4 << SSI_CR1_FLEN_BIT)
+  #define SSI_CR1_FLEN_6BIT       (0x4 << SSI_CR1_FLEN_BIT)
-  #define SSI_CR1_FLEN_7BIT      (0x5 << SSI_CR1_FLEN_BIT)
+  #define SSI_CR1_FLEN_7BIT       (0x5 << SSI_CR1_FLEN_BIT)
-  #define SSI_CR1_FLEN_8BIT      (0x6 << SSI_CR1_FLEN_BIT)
+  #define SSI_CR1_FLEN_8BIT       (0x6 << SSI_CR1_FLEN_BIT)
-  #define SSI_CR1_FLEN_9BIT      (0x7 << SSI_CR1_FLEN_BIT)
+  #define SSI_CR1_FLEN_9BIT       (0x7 << SSI_CR1_FLEN_BIT)
  #define SSI_CR1_FLEN_10BIT      (0x8 << SSI_CR1_FLEN_BIT)
  #define SSI_CR1_FLEN_11BIT      (0x9 << SSI_CR1_FLEN_BIT)
  #define SSI_CR1_FLEN_12BIT      (0xA << SSI_CR1_FLEN_BIT)
@@ -4945,4 +4981,175 @@ do{                                 \
 #endif /* !__ASSEMBLY__ */
+#ifndef _IPU_H_
+#define _IPU_H_
+// IPU_REG_BASE
+#define IPU_P_BASE  0x13080000
+#define IPU__OFFSET 0x13080000
+#define IPU__SIZE   0x00001000
+struct ipu_module
+{
+    unsigned int reg_ctrl;             // 0x0
+    unsigned int reg_status;           // 0x4
+    unsigned int reg_d_fmt;            // 0x8
+    unsigned int reg_y_addr;           // 0xc
+    unsigned int reg_u_addr;           // 0x10
+    unsigned int reg_v_addr;           // 0x14
+    unsigned int reg_in_fm_gs;         // 0x18
+    unsigned int reg_y_stride;         // 0x1c
+    unsigned int reg_uv_stride;        // 0x20
+    unsigned int reg_out_addr;         // 0x24
+    unsigned int reg_out_gs;           // 0x28
+    unsigned int reg_out_stride;       // 0x2c
+    unsigned int rsz_coef_index;       // 0x30
+    unsigned int reg_csc_c0_coef;      // 0x34
+    unsigned int reg_csc_c1_coef;      // 0x38
+    unsigned int reg_csc_c2_coef;      // 0x3c
+    unsigned int reg_csc_c3_coef;      // 0x40
+    unsigned int reg_csc_c4_coef;      // 0x44
+    unsigned int hrsz_coef_lut[20];    // 0x48
+    unsigned int vrsz_coef_lut[20];    // 0x98
+};
+typedef struct
+{
+    unsigned int coef;
+    unsigned short int in_n;
+    unsigned short int out_n;
+} rsz_lut;
+struct Ration2m
+{
+    float ratio;
+    int n, m;
+};
+// Register offset
+#define  REG_CTRL           0x0
+#define  REG_STATUS         0x4
+#define  REG_D_FMT          0x8
+#define  REG_Y_ADDR         0xc
+#define  REG_U_ADDR         0x10
+#define  REG_V_ADDR         0x14
+#define  REG_IN_FM_GS       0x18
+#define  REG_Y_STRIDE       0x1c
+#define  REG_UV_STRIDE      0x20
+#define  REG_OUT_ADDR       0x24
+#define  REG_OUT_GS         0x28
+#define  REG_OUT_STRIDE     0x2c
+#define  REG_RSZ_COEF_INDEX 0x30
+#define  REG_CSC_C0_COEF    0x34
+#define  REG_CSC_C1_COEF    0x38
+#define  REG_CSC_C2_COEF    0x3c
+#define  REG_CSC_C3_COEF    0x40
+#define  REG_CSC_C4_COEF    0x44
+#define  HRSZ_LUT_BASE      0x48
+#define  VRSZ_LUT_BASE      0x98
+// REG_CTRL field define
+#define IPU_EN             (1 << 0)
+#define RSZ_EN             (1 << 1)
+#define FM_IRQ_EN          (1 << 2)
+#define IPU_RESET          (1 << 3)
+#define H_UP_SCALE         (1 << 8)
+#define V_UP_SCALE         (1 << 9)
+#define H_SCALE_SHIFT      (8)
+#define V_SCALE_SHIFT      (9)
+// REG_STATUS field define
+#define OUT_END         (1 << 0)
+// REG_D_FMT field define
+#define INFMT_YUV420    (0 << 0)
+#define INFMT_YUV422    (1 << 0)
+#define INFMT_YUV444    (2 << 0)
+#define INFMT_YUV411    (3 << 0)
+#define INFMT_YCbCr420  (4 << 0)
+#define INFMT_YCbCr422  (5 << 0)
+#define INFMT_YCbCr444  (6 << 0)
+#define INFMT_YCbCr411  (7 << 0)
+#define OUTFMT_RGB555   (0 << 16)
+#define OUTFMT_RGB565   (1 << 16)
+#define OUTFMT_RGB888   (2 << 16)
+// REG_IN_FM_GS field define
+#define IN_FM_W(val)    ((val) << 16)
+#define IN_FM_H(val)    ((val) << 0)
+// REG_IN_FM_GS field define
+#define OUT_FM_W(val)    ((val) << 16)
+#define OUT_FM_H(val)    ((val) << 0)
+// REG_UV_STRIDE field define
+#define U_STRIDE(val)    ((val) << 16)
+#define V_STRIDE(val)    ((val) << 0)
+#define VE_IDX_SFT       0
+#define HE_IDX_SFT       16
+// RSZ_LUT_FIELD
+#define OUT_N_SFT        0
+#define OUT_N_MSK        0x1
+#define IN_N_SFT         1
+#define IN_N_MSK         0x1
+#define W_COEF_SFT       2
+#define W_COEF_MSK       0xFF
+// function about REG_CTRL
+#define stop_ipu(IPU_V_BASE) \
+    REG32(IPU_V_BASE + REG_CTRL) &= ~IPU_EN;
+#define run_ipu(IPU_V_BASE) \
+    REG32(IPU_V_BASE + REG_CTRL) |= IPU_EN;
+#define reset_ipu(IPU_V_BASE) \
+    REG32(IPU_V_BASE + REG_CTRL) |= IPU_RESET;
+#define disable_irq(IPU_V_BASE) \
+    REG32(IPU_V_BASE + REG_CTRL) &= ~FM_IRQ_EN;
+#define disable_rsize(IPU_V_BASE) \
+    REG32(IPU_V_BASE + REG_CTRL) &= ~RSZ_EN;
+#define enable_rsize(IPU_V_BASE) \
+    REG32(IPU_V_BASE + REG_CTRL) |= RSZ_EN;
+#define ipu_is_enable(IPU_V_BASE) \
+    (REG32(IPU_V_BASE + REG_CTRL) & IPU_EN)
+// function about REG_STATUS
+#define clear_end_flag(IPU_V_BASE) \
+    REG32(IPU_V_BASE +  REG_STATUS) &= ~OUT_END;
+#define polling_end_flag(IPU_V_BASE) \
+    (REG32(IPU_V_BASE +  REG_STATUS) & OUT_END)
+/* parameter
+   R = 1.164 * (Y - 16) + 1.596 * (cr - 128)                      {C0, C1}
+   G = 1.164 * (Y - 16) - 0.392 * (cb -128) - 0.813 * (cr - 128)  {C0, C2, C3}
+   B = 1.164 * (Y - 16) + 2.017 * (cb - 128)                      {C0, C4}
+*/
+#if 1
+#define YUV_CSC_C0 0x4A8        /* 1.164 * 1024 */
+#define YUV_CSC_C1 0x662        /* 1.596 * 1024 */
+#define YUV_CSC_C2 0x191        /* 0.392 * 1024 */
+#define YUV_CSC_C3 0x341        /* 0.813 * 1024 */
+#define YUV_CSC_C4 0x811        /* 2.017 * 1024 */
+#else
+#define YUV_CSC_C0 0x400
+#define YUV_CSC_C1 0x59C
+#define YUV_CSC_C2 0x161
+#define YUV_CSC_C3 0x2DC
+#define YUV_CSC_C4 0x718
+#endif
+#endif /* _IPU_H_ */
 #endif /* __JZ4740_H__ */
diff --git a/firmware/export/jz_mxu.h b/firmware/export/jz_mxu.h
new file mode 100644
index 0000000000..b833aedceb
--- /dev/null
+++ b/firmware/export/jz_mxu.h
@@ -0,0 +1,1806 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2008 by Maurus Cuelenaere
+ * Copyright (C) 2006-2007 by Ingenic Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+/* Jz47xx Ingenic Media Extension Instruction Set
+   These are ~60 SIMD instructions for the Jz47xx MIPS core.
+   
+   To compile assembly files using these instructions, they
+   must be piped through a bash script called mxu_as.
+*/
+#ifndef JZ_MXU_H_
+#define JZ_MXU_H_
+#define ptn0 0
+#define ptn1 1
+#define ptn2 2
+#define ptn3 3
+#ifdef C_VERSION
+/* MXU registers */
+#ifndef MXU_REGS_USE_ARRAY
+#define xr0 0
+static int xr1, xr2, xr3, xr4, xr5, xr6, xr7, xr8, xr9;
+static int xr10, xr11, xr12, xr13, xr14, xr15, xr16;
+#else
+static int mxu_xr[17] = {0};
+#define xr0        mxu_xr[ 0]
+#define xr1        mxu_xr[ 1]
+#define xr2        mxu_xr[ 2]
+#define xr3        mxu_xr[ 3]
+#define xr4        mxu_xr[ 4]
+#define xr5        mxu_xr[ 5]
+#define xr6        mxu_xr[ 6]
+#define xr7        mxu_xr[ 7]
+#define xr8        mxu_xr[ 8]
+#define xr9        mxu_xr[ 9]
+#define xr10       mxu_xr[10]
+#define xr11       mxu_xr[11]
+#define xr12       mxu_xr[12]
+#define xr13       mxu_xr[13]
+#define xr14       mxu_xr[14]
+#define xr15       mxu_xr[15]
+#define xr16       mxu_xr[16]
+#endif
+#else /* C_VERSION */
+#define xr0  0
+#define xr1  1
+#define xr2  2
+#define xr3  3
+#define xr4  4
+#define xr5  5
+#define xr6  6
+#define xr7  7
+#define xr8  8
+#define xr9  9
+#define xr10 10
+#define xr11 11
+#define xr12 12
+#define xr13 13
+#define xr14 14
+#define xr15 15
+#define xr16 16
+#endif /* C_VERSION */
+#ifdef C_VERSION
+#define S32I2M(xr, r)        if (&xr != mxu_xr) xr = r
+#define S32M2I(xr)           xr
+#define S32LDD(xr, p, o)     if (&xr != mxu_xr) xr = *(long*)((unsigned long)p + o)
+#define S32STD(xr, p, o)     *(long*)((unsigned long)p + o) = xr
+#define S32LDDV(xr, p, o, s)    if (&xr != mxu_xr) xr = *(long*)((unsigned long)p + ((o) << s))
+#define S32STDV(xr, p, o, s)    *(long*)((unsigned long)p + ((o) << s)) = xr
+#define S32LDIV(xra, rb, rc, strd2) \
+{\
+    if (&xra != mxu_xr) xra = *(long*)((unsigned long)rb + ((rc) << strd2));\
+    rb  = (char*)rb + ((rc) << strd2);\
+}
+#define S32SDIV(xra, rb, rc, strd2) \
+{\
+    *(long*)((unsigned long)rb + ((rc) << strd2)) = xra;\
+    rb  = (char*)rb + ((rc) << strd2);\
+}
+#define S32LDI(xra, rb, o) \
+{\
+    if (&xra != mxu_xr) xra = *(long*)((unsigned long)rb + o);\
+    rb  = (char*)rb + o;\
+}
+#define S32SDI(xra, rb, o) \
+{\
+    *(long*)((unsigned long)rb + o) = xra;\
+    rb  = (char*)rb + o;\
+}
+#define S32LDIV(xra, rb, rc, strd2) \
+{\
+    if (&xra != mxu_xr) xra = *(long*)((unsigned long)rb + ((rc) << strd2));\
+    rb  = (char*)rb + ((rc) << strd2);\
+}
+#define S32SDIV(xra, rb, rc, strd2) \
+{\
+    *(long*)((unsigned long)rb + ((rc) << strd2)) = xra;\
+    rb  = (char*)rb + ((rc) << strd2);\
+}
+#define Q16ADD_AS_WW(a, b, c, d) \
+{\
+    short bh = b >> 16;\
+    short bl = b & 0xFFFF;\
+    short ch = c >> 16;\
+    short cl = c & 0xFFFF;\
+    int ah = bh + ch;\
+    int al = bl + cl;\
+    int dh = bh - ch;\
+    int dl = bl - cl;\
+    if (&a != mxu_xr) a = (ah << 16) | (al & 0xFFFF);\
+    if (&d != mxu_xr) d = (dh << 16) | (dl & 0xFFFF);\
+}
+#define Q16ADD_AS_XW(a, b, c, d) \
+{\
+    short bh = b >> 16;\
+    short bl = b & 0xFFFF;\
+    short ch = c >> 16;\
+    short cl = c & 0xFFFF;\
+    int ah = bl + ch;\
+    int al = bh + cl;\
+    int dh = bl - ch;\
+    int dl = bh - cl;\
+    if (&a != mxu_xr) a = (ah << 16) | (al & 0xFFFF);\
+    if (&d != mxu_xr) d = (dh << 16) | (dl & 0xFFFF);\
+}
+#define Q16ADD_AA_WW(a, b, c, d) \
+{\
+    short bh = b >> 16;\
+    short bl = b & 0xFFFF;\
+    short ch = c >> 16;\
+    short cl = c & 0xFFFF;\
+    int ah = bh + ch;\
+    int al = bl + cl;\
+    if (&a != mxu_xr) a = (ah << 16) | (al & 0xFFFF);\
+    if (&d != mxu_xr) d = (ah << 16) | (al & 0xFFFF);\
+}
+#define D16MUL_LW(a, b, c, d)\
+{\
+    short bl = b & 0xFFFF;\
+    short cl = c & 0xFFFF;\
+    short ch = c >> 16;\
+    if (&a != mxu_xr) a = ch * bl;\
+    if (&d != mxu_xr) d = cl * bl;\
+}
+#define D16MUL_WW(a, b, c, d)\
+{\
+    short bh = b >> 16;\
+    short bl = b & 0xFFFF;\
+    short ch = c >> 16;\
+    short cl = c & 0xFFFF;\
+    if (&a != mxu_xr) a = ch * bh;\
+    if (&d != mxu_xr) d = cl * bl;\
+}
+#define D16MAC_AA_LW(a, b, c, d)\
+{\
+    short bl = b & 0xFFFF;\
+    short cl = c & 0xFFFF;\
+    short ch = c >> 16;\
+    if (&a != mxu_xr) a += ch * bl;\
+    if (&d != mxu_xr) d += cl * bl;\
+}
+#define D16MUL_HW(a, b, c, d)\
+{\
+    short bh = b >> 16;\
+    short cl = c & 0xFFFF;\
+    short ch = c >> 16;\
+    if (&a != mxu_xr) a = ch * bh;\
+    if (&d != mxu_xr) d = cl * bh;\
+}
+#define D16MAC_AA_HW(a, b, c, d)\
+{\
+    short bh = b >> 16;\
+    short cl = c & 0xFFFF;\
+    short ch = c >> 16;\
+    if (&a != mxu_xr) a += ch * bh;\
+    if (&d != mxu_xr) d += cl * bh;\
+}
+#define D32SLL(a, b, c, d, sft)\
+{\
+    if (&a != mxu_xr) a = b << sft;\
+    if (&d != mxu_xr) d = c << sft;\
+}
+#define D32SARL(a, b, c, sft) if (&a != mxu_xr) a = (((long)b >> sft) << 16) | (((long)c >> sft) & 0xFFFF)
+#define S32SFL(a, b, c, d, ptn) \
+{\
+    unsigned char b3 = (unsigned char)((unsigned long)b >> 24);\
+    unsigned char b2 = (unsigned char)((unsigned long)b >> 16);\
+    unsigned char b1 = (unsigned char)((unsigned long)b >>  8);\
+    unsigned char b0 = (unsigned char)((unsigned long)b >>  0);\
+    unsigned char c3 = (unsigned char)((unsigned long)c >> 24);\
+    unsigned char c2 = (unsigned char)((unsigned long)c >> 16);\
+    unsigned char c1 = (unsigned char)((unsigned long)c >>  8);\
+    unsigned char c0 = (unsigned char)((unsigned long)c >>  0);\
+    unsigned char a3, a2, a1, a0, d3, d2, d1, d0;\
+    if (ptn0 == ptn)    \
+    {\
+        a3 = b3;\
+        a2 = c3;\
+        a1 = b2;\
+        a0 = c2;\
+        d3 = b1;\
+        d2 = c1;\
+        d1 = b0;\
+        d0 = c0;\
+    }\
+    else if (ptn1 == ptn)\
+    {\
+        a3 = b3;\
+        a2 = b1;\
+        a1 = c3;\
+        a0 = c1;\
+        d3 = b2;\
+        d2 = b0;\
+        d1 = c2;\
+        d0 = c0;\
+    }\
+    else if (ptn2 == ptn)\
+    {\
+        a3 = b3;\
+        a2 = c3;\
+        a1 = b1;\
+        a0 = c1;\
+        d3 = b2;\
+        d2 = c2;\
+        d1 = b0;\
+        d0 = c0;\
+    }\
+    else if (ptn3 == ptn)\
+    {\
+        a3 = b3;\
+        a2 = b2;\
+        a1 = c3;\
+        a0 = c2;\
+        d3 = b1;\
+        d2 = b0;\
+        d1 = c1;\
+        d0 = c0;\
+    }\
+    if (&a != mxu_xr) a = ((unsigned long)a3 << 24) | ((unsigned long)a2 << 16) | ((unsigned long)a1 << 8) | (unsigned long)a0;\
+    if (&d != mxu_xr) d = ((unsigned long)d3 << 24) | ((unsigned long)d2 << 16) | ((unsigned long)d1 << 8) | (unsigned long)d0;\
+}
+#define D32SAR(a, b, c, d, sft)\
+{\
+    if (&a != mxu_xr) a = (long)b >> sft;\
+    if (&d != mxu_xr) d = (long)c >> sft;\
+}
+#define D32SLR(a, b, c, d, sft)\
+{\
+    if (&a != mxu_xr) a = (unsigned long)b >> sft;\
+    if (&d != mxu_xr) d = (unsigned long)c >> sft;\
+}
+#define Q16SLL(a,b,c,d,sft)\
+{\
+    short bh=b>>16;\
+    short bl=b&0xffff;\
+    short ch=c>>16;\
+    short cl=c&0xffff;\
+    if(&a!=mxu_xr) a=((bh<<sft)<<16)|(((long)bl<<sft) & 0xffff);\
+    if(&d!=mxu_xr) d=((dh<<sft)<<16)|(((long)bl<<sft) & 0xffff);\
+}
+#define Q16SAR(a,b,c,d,sft)\
+{\
+   short bh = b >> 16;\
+   short bl = b & 0xffff;\
+   short ch = c >> 16;\
+   short cl = c & 0xffff;\
+   if(&a!=mxu_xr) a=(((short)bh>>sft)<<16)|((long)((short)b1>>sft) & 0xffff);\
+   if(&d!=mxu_xr) d=(((short)ch>>sft)<<16)|((long)((short)cl>>sft) & 0xffff);\
+}
+#define D32ACC_AA(a, b, c, d)\
+{\
+    int _b = b;\
+    int _c = c;\
+    int _a = a;\
+    int _d = d;\
+    if (&a != mxu_xr) a = _a + _b + _c;\
+    if (&d != mxu_xr) d = _d + _b + _c;\
+}
+#define D32ACC_AS(a, b, c, d)\
+{\
+    int _b = b;\
+    int _c = c;\
+    int _a = a;\
+    int _d = d;\
+    if (&a != mxu_xr) a = _a + _b + _c;\
+    if (&d != mxu_xr) d = _d + _b - _c;\
+}
+#define D32ADD_AS(a, b, c, d)\
+{\
+    int _b = b;\
+    int _c = c;\
+    if (&a != mxu_xr) a = _b + _c;\
+    if (&d != mxu_xr) d = _b - _c;\
+}
+#define D32ADD_SS(a, b, c, d)\
+{\
+    int _b = b;\
+    int _c = c;\
+    if (&a != mxu_xr) a = _b - _c;\
+    if (&d != mxu_xr) d = _b - _c;\
+}
+#define D32ADD_AA(a, b, c, d)\
+{\
+    int _b = b;\
+    int _c = c;\
+    if (&a != mxu_xr) a = _b + _c;\
+    if (&d != mxu_xr) d = _b + _c;\
+}
+#define D16MADL_AA_WW(a, b, c, d)                                  \
+ do {                                                             \
+    short _ah = a >> 16;\
+    short _al = (a << 16) >> 16;\
+    short _bh = b >> 16;\
+    short _bl = (b << 16) >> 16;\
+    short _ch = c >> 16;\
+    short _cl = (c << 16) >> 16;\
+    int L32, R32; \
+    L32 = _bh * _ch;\
+    R32 = _bl * _cl; \
+    _ah += (L32 << 16) >> 16; \
+    _al += (R32 << 16) >> 16; \
+    if (&d != mxu_xr) d = (_ah << 16) + (_al & 0xffff);\
+  } while (0)
+#define D16MACF_AA_WW(a, b, c, d)                          \
+ do {                                                             \
+    short _bh = b >> 16;\
+    short _bl = (b << 16) >> 16;\
+    short _ch = c >> 16;\
+    short _cl = (c << 16) >> 16;\
+    int L32, R32; \
+    L32 = (_bh * _ch) << 1;\
+    R32 = (_bl * _cl) << 1; \
+    L32 = a + L32; \
+    R32 = d + R32; \
+    if (&a != mxu_xr) a = ((((L32 >> 15) + 1) >> 1) << 16) + ((((R32 >> 15) + 1) >> 1) & 0xffff);\
+  } while (0)
+#define D16MAC_AA_WW(a, b, c, d)                           \
+do {                                                             \
+    short _bh = b >> 16;\
+    short _bl = (b << 16) >> 16;\
+    short _ch = c >> 16;\
+    short _cl = (c << 16) >> 16;\
+    int L32, R32; \
+    L32 = (_bh * _ch);\
+    R32 = (_bl * _cl); \
+    if (&a != mxu_xr) a = a + L32;\
+    if (&d != mxu_xr) d = d + R32;\
+  } while (0)
+#define D16MAC_SS_WW(a, b, c, d)                           \
+do {                                                             \
+    short _bh = b >> 16;\
+    short _bl = (b << 16) >> 16;\
+    short _ch = c >> 16;\
+    short _cl = (c << 16) >> 16;\
+    int L32, R32; \
+    L32 = (_bh * _ch);\
+    R32 = (_bl * _cl); \
+    if (&a != mxu_xr) a = a - L32;\
+    if (&d != mxu_xr) d = d - R32;\
+  } while (0)
+#define D16MAC_SA_HW(a, b, c, d)                           \
+do {                                                             \
+    short _bh = b >> 16;\
+    short _bl = (b << 16) >> 16;\
+    short _ch = c >> 16;\
+    short _cl = (c << 16) >> 16;\
+    int L32, R32; \
+    L32 = (_bh * _ch);\
+    R32 = (_bh * _cl); \
+    if (&a != mxu_xr) a = a - L32;\
+    if (&d != mxu_xr) d = d + R32;\
+  } while (0)
+#define D16MAC_SS_HW(a, b, c, d)                           \
+do {                                                             \
+    short _bh = b >> 16;\
+    short _bl = (b << 16) >> 16;\
+    short _ch = c >> 16;\
+    short _cl = (c << 16) >> 16;\
+    int L32, R32; \
+    L32 = (_bh * _ch);\
+    R32 = (_bh * _cl); \
+    if (&a != mxu_xr) a = a - L32;\
+    if (&d != mxu_xr) d = d - R32;\
+  } while (0)
+#define D16MAC_AS_HW(a, b, c, d)                           \
+do {                                                             \
+    short _bh = b >> 16;\
+    short _bl = (b << 16) >> 16;\
+    short _ch = c >> 16;\
+    short _cl = (c << 16) >> 16;\
+    int L32, R32; \
+    L32 = (_bh * _ch);\
+    R32 = (_bh * _cl); \
+    if (&a != mxu_xr) a = a + L32;\
+    if (&d != mxu_xr) d = d - R32;\
+  } while (0)
+#define D16MAC_AS_LW(a, b, c, d)                           \
+do {                                                             \
+    short _bh = b >> 16;\
+    short _bl = (b << 16) >> 16;\
+    short _ch = c >> 16;\
+    short _cl = (c << 16) >> 16;\
+    int L32, R32; \
+    L32 = (_bl * _ch);\
+    R32 = (_bl * _cl); \
+    if (&a != mxu_xr) a = a + L32;\
+    if (&d != mxu_xr) d = d - R32;\
+  } while (0)
+#define D16MAC_SA_LW(a, b, c, d)                           \
+do {                                                             \
+    short _bh = b >> 16;\
+    short _bl = (b << 16) >> 16;\
+    short _ch = c >> 16;\
+    short _cl = (c << 16) >> 16;\
+    int L32, R32; \
+    L32 = (_bl * _ch);\
+    R32 = (_bl * _cl); \
+    if (&a != mxu_xr) a = a - L32;\
+    if (&d != mxu_xr) d = d + R32;\
+  } while (0)
+#define D16MAC_SS_LW(a, b, c, d)                           \
+do {                                                             \
+    short _bh = b >> 16;\
+    short _bl = (b << 16) >> 16;\
+    short _ch = c >> 16;\
+    short _cl = (c << 16) >> 16;\
+    int L32, R32; \
+    L32 = (_bl * _ch);\
+    R32 = (_bl * _cl); \
+    if (&a != mxu_xr) a = a - L32;\
+    if (&d != mxu_xr) d = d - R32;\
+  } while (0)
+#define Q8ADDE_AA(xra, xrb, xrc, xrd) \
+{\
+    unsigned char b3 = (unsigned char)((unsigned long)xrb >> 24);\
+    unsigned char b2 = (unsigned char)((unsigned long)xrb >> 16);\
+    unsigned char b1 = (unsigned char)((unsigned long)xrb >>  8);\
+    unsigned char b0 = (unsigned char)((unsigned long)xrb >>  0);\
+    unsigned char c3 = (unsigned char)((unsigned long)xrc >> 24);\
+    unsigned char c2 = (unsigned char)((unsigned long)xrc >> 16);\
+    unsigned char c1 = (unsigned char)((unsigned long)xrc >>  8);\
+    unsigned char c0 = (unsigned char)((unsigned long)xrc >>  0);\
+    short ah, al, dh, dl;\
+    ah = b3 + c3;\
+    al = b2 + c2;\
+    dh = b1 + c1;\
+    dl = b0 + c0;\
+    if (&xra != mxu_xr) xra = ((unsigned long)ah << 16) | (unsigned short)al;\
+    if (&xrd != mxu_xr) xrd = ((unsigned long)dh << 16) | (unsigned short)dl;\
+}
+#define Q16SAT(xra, xrb, xrc) \
+{\
+    short bh = xrb >> 16;\
+    short bl = xrb & 0xFFFF;\
+    short ch = xrc >> 16;\
+    short cl = xrc & 0xFFFF;\
+    if (bh > 255) bh = 255;\
+    if (bh < 0) bh = 0;\
+    if (bl > 255) bl = 255;\
+    if (bl < 0) bl = 0;\
+    if (ch > 255) ch = 255;\
+    if (ch < 0) ch = 0;\
+    if (cl > 255) cl = 255;\
+    if (cl < 0) cl = 0;\
+    if (&xra != mxu_xr) xra = ((unsigned)bh << 24) | ((unsigned)bl << 16) | ((unsigned)ch << 8) | (unsigned)cl;\
+}
+#define Q8SAD(xra, xrb, xrc, xrd)    \
+{\
+    short b3 = (unsigned char)((unsigned long)xrb >> 24);\
+    short b2 = (unsigned char)((unsigned long)xrb >> 16);\
+    short b1 = (unsigned char)((unsigned long)xrb >>  8);\
+    short b0 = (unsigned char)((unsigned long)xrb >>  0);\
+    short c3 = (unsigned char)((unsigned long)xrc >> 24);\
+    short c2 = (unsigned char)((unsigned long)xrc >> 16);\
+    short c1 = (unsigned char)((unsigned long)xrc >>  8);\
+    short c0 = (unsigned char)((unsigned long)xrc >>  0);\
+    int int0, int1, int2, int3;\
+    int3 = labs(b3 - c3);\
+    int2 = labs(b2 - c2);\
+    int1 = labs(b1 - c1);\
+    int0 = labs(b0 - c0);\
+    if (&xra != mxu_xr) xra  = int0 + int1 + int2 + int3;\
+    if (&xrd != mxu_xr) xrd += int0 + int1 + int2 + int3;\
+}
+#define Q8AVGR(xra, xrb, xrc) \
+{\
+    short b3 = (unsigned char)((unsigned long)xrb >> 24);\
+    short b2 = (unsigned char)((unsigned long)xrb >> 16);\
+    short b1 = (unsigned char)((unsigned long)xrb >>  8);\
+    short b0 = (unsigned char)((unsigned long)xrb >>  0);\
+    short c3 = (unsigned char)((unsigned long)xrc >> 24);\
+    short c2 = (unsigned char)((unsigned long)xrc >> 16);\
+    short c1 = (unsigned char)((unsigned long)xrc >>  8);\
+    short c0 = (unsigned char)((unsigned long)xrc >>  0);\
+    unsigned char a3, a2, a1, a0;\
+    a3 = (unsigned char)((b3 + c3 + 1) >> 1);\
+    a2 = (unsigned char)((b2 + c2 + 1) >> 1);\
+    a1 = (unsigned char)((b1 + c1 + 1) >> 1);\
+    a0 = (unsigned char)((b0 + c0 + 1) >> 1);\
+    if (&xra != mxu_xr) xra = ((unsigned long)a3 << 24) | ((unsigned long)a2 << 16) | ((unsigned long)a1 << 8) | (unsigned long)a0;\
+}
+#define S32ALN(xra, xrb, xrc, rs) \
+{\
+    if (0 == rs)\
+    {\
+        if (&xra != mxu_xr) xra = xrb;\
+    }\
+    else if (1 == rs)\
+    {\
+        if (&xra != mxu_xr) xra = (xrb << 8) | ((unsigned long)xrc >> 24);\
+    }\
+    else if (2 == rs)\
+    {\
+        if (&xra != mxu_xr) xra = (xrb << 16) | ((unsigned long)xrc >> 16);\
+    }\
+    else if (3 == rs)\
+    {\
+        if (&xra != mxu_xr) xra = (xrb << 24) | ((unsigned long)xrc >> 8);\
+    }\
+    else if (4 == rs)\
+    {\
+        if (&xra != mxu_xr) xra = xrc;\
+    }\
+}
+#else /* C_VERSION */
+/***********************************LD/SD***********************************/
+#define S32LDD(xra,rb,s12)                                        \
+  do {                                                            \
+    __asm__ __volatile ("S32LDD xr%0,%z1,%2"                    \
+            :                                                    \
+            :"K"(xra),"d" (rb),"I"(s12));                        \
+  } while (0)
+#define S32STD(xra,rb,s12)                                        \
+  do {                                                            \
+    __asm__ __volatile ("S32STD xr%0,%z1,%2"                    \
+           :                                                    \
+            :"K"(xra),"d" (rb),"I"(s12):"memory");                    \
+  } while (0)
+#define S32LDDV(xra,rb,rc,strd2)                                \
+  do {                                                            \
+    __asm__ __volatile ("S32LDDV xr%0,%z1,%z2,%3"                \
+           :                                                    \
+           :"K"(xra),"d" (rb),"d"(rc),"K"(strd2));                \
+  } while (0)
+#define S32STDV(xra,rb,rc,strd2)                                \
+  do {                                                            \
+    __asm__ __volatile ("S32STDV xr%0,%z1,%z2,%3"                \
+           :                                                    \
+            :"K"(xra),"d" (rb),"d"(rc),"K"(strd2):"memory");        \
+  } while (0)
+#define S32LDI(xra,rb,s12)                                        \
+  do {                                                            \
+    __asm__ __volatile ("S32LDI xr%1,%z0,%2"                    \
+           :"+d" (rb)                                            \
+           :"K"(xra),"I"(s12));                                    \
+  } while (0)
+#define S32SDI(xra,rb,s12)                                        \
+  do {                                                            \
+    __asm__ __volatile ("S32SDI xr%1,%z0,%2"                    \
+           :"+d" (rb)                                            \
+            :"K"(xra),"I"(s12):"memory");            \
+  } while (0)
+#define S32LDIV(xra,rb,rc,strd2)                                \
+  do {                                                            \
+    __asm__ __volatile ("S32LDIV xr%1,%z0,%z2,%3"                \
+           :"+d" (rb)                                            \
+           :"K"(xra),"d"(rc),"K"(strd2));                        \
+  } while (0)
+#define S32SDIV(xra,rb,rc,strd2)                                \
+  do {                                                            \
+    __asm__ __volatile ("S32SDIV xr%1,%z0,%z2,%3"                \
+           :"+d" (rb)                                            \
+            :"K"(xra),"d"(rc),"K"(strd2):"memory");                    \
+  } while (0)
+/***********************************D16MUL***********************************/
+#define D16MUL_WW(xra,xrb,xrc,xrd)                              \
+ do {                                                             \
+    __asm__ __volatile ("D16MUL xr%0,xr%1,xr%2,xr%3,WW"         \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MUL_LW(xra,xrb,xrc,xrd)                              \
+ do {                                                             \
+    __asm__ __volatile ("D16MUL xr%0,xr%1,xr%2,xr%3,LW"         \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MUL_HW(xra,xrb,xrc,xrd)                              \
+ do {                                                             \
+    __asm__ __volatile ("D16MUL xr%0,xr%1,xr%2,xr%3,HW"         \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MUL_XW(xra,xrb,xrc,xrd)                              \
+ do {                                                             \
+    __asm__ __volatile ("D16MUL xr%0,xr%1,xr%2,xr%3,XW"         \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+/**********************************D16MULF*******************************/
+#define D16MULF_WW(xra,xrb,xrc)                                 \
+    do {                                                         \
+    __asm__ __volatile ("D16MULF xr%0,xr%1,xr%2,WW"                \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc));                  \
+  } while (0)
+#define D16MULF_LW(xra,xrb,xrc)                                 \
+ do {                                                             \
+    __asm__ __volatile ("D16MULF xr%0,xr%1,xr%2,LW"                \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc));                  \
+  } while (0)
+#define D16MULF_HW(xra,xrb,xrc)                                    \
+ do {                                                             \
+    __asm__ __volatile ("D16MULF xr%0,xr%1,xr%2,HW"                \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc));                  \
+  } while (0)
+#define D16MULF_XW(xra,xrb,xrc)                                 \
+ do {                                                             \
+    __asm__ __volatile ("D16MULF xr%0,xr%1,xr%2,XW"                \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc));                  \
+  } while (0)
+/***********************************D16MAC********************************/
+#define D16MAC_AA_WW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,AA,WW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MAC_AA_LW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,AA,LW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MAC_AA_HW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,AA,HW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MAC_AA_XW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,AA,XW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MAC_AS_WW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,AS,WW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MAC_AS_LW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,AS,LW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MAC_AS_HW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,AS,HW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MAC_AS_XW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,AS,XW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MAC_SA_WW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,SA,WW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MAC_SA_LW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,SA,LW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MAC_SA_HW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,SA,HW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MAC_SA_XW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,SA,XW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MAC_SS_WW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,SS,WW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MAC_SS_LW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,SS,LW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MAC_SS_HW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,SS,HW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MAC_SS_XW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("D16MAC xr%0,xr%1,xr%2,xr%3,SS,XW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+/**********************************D16MACF*******************************/
+#define D16MACF_AA_WW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,AA,WW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MACF_AA_LW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,AA,LW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MACF_AA_HW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,AA,HW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MACF_AA_XW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,AA,XW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MACF_AS_WW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,AS,WW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MACF_AS_LW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,AS,LW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MACF_AS_HW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,AS,HW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MACF_AS_XW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,AS,XW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MACF_SA_WW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,SA,WW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MACF_SA_LW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,SA,LW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MACF_SA_HW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,SA,HW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MACF_SA_XW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,SA,XW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MACF_SS_WW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,SS,WW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MACF_SS_LW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,SS,LW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MACF_SS_HW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,SS,HW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MACF_SS_XW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MACF xr%0,xr%1,xr%2,xr%3,SS,XW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+/**********************************D16MADL*******************************/
+#define D16MADL_AA_WW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,AA,WW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MADL_AA_LW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,AA,LW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MADL_AA_HW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,AA,HW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MADL_AA_XW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,AA,XW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MADL_AS_WW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,AS,WW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MADL_AS_LW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,AS,LW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MADL_AS_HW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,AS,HW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MADL_AS_XW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,AS,XW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MADL_SA_WW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,SA,WW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MADL_SA_LW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,SA,LW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MADL_SA_HW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,SA,HW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MADL_SA_XW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,SA,XW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MADL_SS_WW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,SS,WW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MADL_SS_LW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,SS,LW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MADL_SS_HW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,SS,HW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define D16MADL_SS_XW(xra,xrb,xrc,xrd)                          \
+ do {                                                             \
+    __asm__ __volatile ("D16MADL xr%0,xr%1,xr%2,xr%3,SS,XW"     \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+/***********************************S16MAD*******************************/
+#define S16MAD_A_HH(xra,xrb,xrc,xrd)                             \
+ do {                                                              \
+    __asm__ __volatile ("S16MAD xr%0,xr%1,xr%2,xr%3,A,0"        \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+#define S16MAD_A_LL(xra,xrb,xrc,xrd)                            \
+ do {                                                              \
+    __asm__ __volatile ("S16MAD xr%0,xr%1,xr%2,xr%3,A,1"        \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+#define S16MAD_A_HL(xra,xrb,xrc,xrd)                             \
+ do {                                                              \
+    __asm__ __volatile ("S16MAD xr%0,xr%1,xr%2,xr%3,A,2"        \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+#define S16MAD_A_LH(xra,xrb,xrc,xrd)                             \
+ do {                                                              \
+    __asm__ __volatile ("S16MAD xr%0,xr%1,xr%2,xr%3,A,3"        \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+#define S16MAD_S_HH(xra,xrb,xrc,xrd)                            \
+ do {                                                              \
+    __asm__ __volatile ("S16MAD xr%0,xr%1,xr%2,xr%3,S,0"        \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+#define S16MAD_S_LL(xra,xrb,xrc,xrd)                            \
+ do {                                                              \
+    __asm__ __volatile ("S16MAD xr%0,xr%1,xr%2,xr%3,S,1"        \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+#define S16MAD_S_HL(xra,xrb,xrc,xrd)                             \
+ do {                                                              \
+    __asm__ __volatile ("S16MAD xr%0,xr%1,xr%2,xr%3,S,2"        \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+#define S16MAD_S_LH(xra,xrb,xrc,xrd)                             \
+ do {                                                              \
+    __asm__ __volatile ("S16MAD xr%0,xr%1,xr%2,xr%3,S,3"        \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+/***********************************Q8MUL********************************/
+#define Q8MUL(xra,xrb,xrc,xrd)                                    \
+ do {                                                             \
+    __asm__ __volatile ("Q8MUL xr%0,xr%1,xr%2,xr%3"                \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+/***********************************Q8MAC********************************/
+#define Q8MAC_AA(xra,xrb,xrc,xrd)                                \
+ do {                                                             \
+    __asm__ __volatile ("Q8MAC xr%0,xr%1,xr%2,xr%3,AA"            \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define Q8MAC_AS(xra,xrb,xrc,xrd)                                \
+ do {                                                             \
+    __asm__ __volatile ("Q8MAC xr%0,xr%1,xr%2,xr%3,AS"            \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define Q8MAC_SA(xra,xrb,xrc,xrd)                                \
+ do {                                                             \
+    __asm__ __volatile ("Q8MAC xr%0,xr%1,xr%2,xr%3,SA"            \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define Q8MAC_SS(xra,xrb,xrc,xrd)                                \
+ do {                                                             \
+    __asm__ __volatile ("Q8MAC xr%0,xr%1,xr%2,xr%3,SS"            \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+/***********************************Q8MADL********************************/
+#define Q8MADL_AA(xra,xrb,xrc,xrd)                                \
+ do {                                                            \
+    __asm__ __volatile ("Q8MADL xr%0,xr%1,xr%2,xr%3,AA"            \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+#define Q8MADL_AS(xra,xrb,xrc,xrd)                                \
+ do {                                                            \
+    __asm__ __volatile ("Q8MADL xr%0,xr%1,xr%2,xr%3,AS"            \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+#define Q8MADL_SA(xra,xrb,xrc,xrd)                                \
+ do {                                                            \
+    __asm__ __volatile ("Q8MADL xr%0,xr%1,xr%2,xr%3,SA"            \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+#define Q8MADL_SS(xra,xrb,xrc,xrd)                                \
+ do {                                                            \
+    __asm__ __volatile ("Q8MADL xr%0,xr%1,xr%2,xr%3,SS"            \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+/***********************************D32ADD********************************/
+#define D32ADD_AA(xra,xrb,xrc,xrd)                                \
+ do {                                                            \
+    __asm__ __volatile ("D32ADD xr%0,xr%1,xr%2,xr%3,AA"            \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+#define D32ADD_AS(xra,xrb,xrc,xrd)                                \
+ do {                                                            \
+    __asm__ __volatile ("D32ADD xr%0,xr%1,xr%2,xr%3,AS"            \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+#define D32ADD_SA(xra,xrb,xrc,xrd)                                \
+ do {                                                            \
+    __asm__ __volatile ("D32ADD xr%0,xr%1,xr%2,xr%3,SA"            \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+#define D32ADD_SS(xra,xrb,xrc,xrd)                                \
+ do {                                                            \
+    __asm__ __volatile ("D32ADD xr%0,xr%1,xr%2,xr%3,SS"            \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+/***********************************D32ACC********************************/
+#define D32ACC_AA(xra,xrb,xrc,xrd)                                \
+ do {                                                            \
+    __asm__ __volatile ("D32ACC xr%0,xr%1,xr%2,xr%3,AA"            \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+#define D32ACC_AS(xra,xrb,xrc,xrd)                                \
+ do {                                                            \
+    __asm__ __volatile ("D32ACC xr%0,xr%1,xr%2,xr%3,AS"            \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+#define D32ACC_SA(xra,xrb,xrc,xrd)                                \
+ do {                                                            \
+    __asm__ __volatile ("D32ACC xr%0,xr%1,xr%2,xr%3,SA"            \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+#define D32ACC_SS(xra,xrb,xrc,xrd)                                \
+ do {                                                            \
+    __asm__ __volatile ("D32ACC xr%0,xr%1,xr%2,xr%3,SS"            \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+/***********************************S32CPS********************************/
+#define S32CPS(xra,xrb,xrc)                                        \
+ do {                                                            \
+    __asm__ __volatile ("S32CPS xr%0,xr%1,xr%2"                    \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc));                    \
+  } while (0)
+#define S32ABS(xra,xrb)                                            \
+ do {                                                            \
+    __asm__ __volatile ("S32CPS xr%0,xr%1,xr%2"                    \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrb));                    \
+  } while (0)
+/***********************************Q16ADD********************************/
+#define Q16ADD_AA_WW(xra,xrb,xrc,xrd)                            \
+ do {                                                             \
+    __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,AA,WW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define Q16ADD_AA_LW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,AA,LW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define Q16ADD_AA_HW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,AA,HW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define Q16ADD_AA_XW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,AA,XW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define Q16ADD_AS_WW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,AS,WW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define Q16ADD_AS_LW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,AS,LW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define Q16ADD_AS_HW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,AS,HW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define Q16ADD_AS_XW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,AS,XW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define Q16ADD_SA_WW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,SA,WW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define Q16ADD_SA_LW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,SA,LW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define Q16ADD_SA_HW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,SA,HW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define Q16ADD_SA_XW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,SA,XW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define Q16ADD_SS_WW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,SS,WW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define Q16ADD_SS_LW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,SS,LW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define Q16ADD_SS_HW(xra,xrb,xrc,xrd)                              \
+ do {                                                             \
+    __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,SS,HW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define Q16ADD_SS_XW(xra,xrb,xrc,xrd)                           \
+ do {                                                             \
+    __asm__ __volatile ("Q16ADD xr%0,xr%1,xr%2,xr%3,SS,XW"      \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+/***********************************Q16ACC********************************/
+#define Q16ACC_AA(xra,xrb,xrc,xrd)                                \
+ do {                                                             \
+    __asm__ __volatile ("Q16ACC xr%0,xr%1,xr%2,xr%3,AA"            \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define Q16ACC_AS(xra,xrb,xrc,xrd)                                \
+ do {                                                             \
+    __asm__ __volatile ("Q16ACC xr%0,xr%1,xr%2,xr%3,AS"            \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define Q16ACC_SA(xra,xrb,xrc,xrd)                                \
+ do {                                                             \
+    __asm__ __volatile ("Q16ACC xr%0,xr%1,xr%2,xr%3,SA"            \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+#define Q16ACC_SS(xra,xrb,xrc,xrd)                              \
+ do {                                                             \
+    __asm__ __volatile ("Q16ACC xr%0,xr%1,xr%2,xr%3,SS"         \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));         \
+  } while (0)
+/***********************************D16CPS********************************/
+#define D16CPS(xra,xrb,xrc)                                        \
+ do {                                                             \
+    __asm__ __volatile ("D16CPS xr%0,xr%1,xr%2"                    \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc));                    \
+  } while (0)
+#define D16ABS(xra,xrb)                                            \
+ do {                                                             \
+    __asm__ __volatile ("D16CPS xr%0,xr%1,xr%2"                    \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrb));                    \
+  } while (0)
+/*******************************D16AVG/D16AVGR*****************************/
+#define D16AVG(xra,xrb,xrc)                                        \
+ do {                                                           \
+    __asm__ __volatile ("D16AVG xr%0,xr%1,xr%2"                 \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc));                  \
+  } while (0)
+#define D16AVGR(xra,xrb,xrc)                                    \
+ do {                                                           \
+    __asm__ __volatile ("D16AVGR xr%0,xr%1,xr%2"                \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc));                  \
+  } while (0)
+/************************************Q8ADD********************************/
+#define Q8ADD_AA(xra,xrb,xrc)                                    \
+    do {                                                        \
+    __asm__ __volatile ("Q8ADD xr%0,xr%1,xr%2,AA"                \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc));                    \
+  } while (0)
+#define Q8ADD_AS(xra,xrb,xrc)                                    \
+ do {                                                            \
+    __asm__ __volatile ("Q8ADD xr%0,xr%1,xr%2,AS"                \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc));                    \
+  } while (0)
+#define Q8ADD_SA(xra,xrb,xrc)                                    \
+ do {                                                            \
+    __asm__ __volatile ("Q8ADD xr%0,xr%1,xr%2,SA"                \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc));                    \
+  } while (0)
+#define Q8ADD_SS(xra,xrb,xrc)                                    \
+ do {                                                            \
+    __asm__ __volatile ("Q8ADD xr%0,xr%1,xr%2,SS"                \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc));                    \
+  } while (0)
+/************************************Q8ADDE********************************/
+#define Q8ADDE_AA(xra,xrb,xrc,xrd)                                \
+ do {                                                            \
+    __asm__ __volatile ("Q8ADDE xr%0,xr%1,xr%2,xr%3,AA"            \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+#define Q8ADDE_AS(xra,xrb,xrc,xrd)                                \
+ do {                                                            \
+    __asm__ __volatile ("Q8ADDE xr%0,xr%1,xr%2,xr%3,AS"            \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+#define Q8ADDE_SA(xra,xrb,xrc,xrd)                                \
+ do {                                                            \
+    __asm__ __volatile ("Q8ADDE xr%0,xr%1,xr%2,xr%3,SA"            \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+#define Q8ADDE_SS(xra,xrb,xrc,xrd)                                \
+ do {                                                            \
+    __asm__ __volatile ("Q8ADDE xr%0,xr%1,xr%2,xr%3,SS"            \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+/************************************Q8ACCE********************************/
+#define Q8ACCE_AA(xra,xrb,xrc,xrd)                                \
+ do {                                                            \
+    __asm__ __volatile ("Q8ACCE xr%0,xr%1,xr%2,xr%3,AA"            \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+#define Q8ACCE_AS(xra,xrb,xrc,xrd)                                \
+ do {                                                            \
+    __asm__ __volatile ("Q8ACCE xr%0,xr%1,xr%2,xr%3,AS"            \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+#define Q8ACCE_SA(xra,xrb,xrc,xrd)                                \
+ do {                                                            \
+    __asm__ __volatile ("Q8ACCE xr%0,xr%1,xr%2,xr%3,SA"            \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+#define Q8ACCE_SS(xra,xrb,xrc,xrd)                                \
+ do {                                                            \
+    __asm__ __volatile ("Q8ACCE xr%0,xr%1,xr%2,xr%3,SS"            \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+/************************************Q8ABD********************************/
+#define Q8ABD(xra,xrb,xrc)                                        \
+ do {                                                            \
+    __asm__ __volatile ("Q8ABD xr%0,xr%1,xr%2"                    \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc));                    \
+  } while (0)
+/************************************Q8SLT********************************/
+#define Q8SLT(xra,xrb,xrc)                                        \
+ do {                                                            \
+    __asm__ __volatile ("Q8SLT xr%0,xr%1,xr%2"                    \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc));                    \
+  } while (0)
+/************************************Q8SAD********************************/
+#define Q8SAD(xra,xrb,xrc,xrd)                                    \
+ do {                                                             \
+    __asm__ __volatile ("Q8SAD xr%0,xr%1,xr%2,xr%3"                \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd));            \
+  } while (0)
+/********************************Q8AVG/Q8AVGR*****************************/
+#define Q8AVG(xra,xrb,xrc)                                        \
+ do {                                                            \
+    __asm__ __volatile ("Q8AVG xr%0,xr%1,xr%2"                    \
+                 :                                                \
+                 :"K"(xra),"K"(xrb),"K"(xrc));                    \
+  } while (0)
+#define Q8AVGR(xra,xrb,xrc)                                       \
+ do {                                                           \
+    __asm__ __volatile ("Q8AVGR xr%0,xr%1,xr%2"                 \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc));                  \
+  } while (0)
+/**********************************D32SHIFT******************************/
+#define D32SLL(xra,xrb,xrc,xrd,SFT4)                                \
+    do {                                                             \
+    __asm__ __volatile ("D32SLL xr%0,xr%1,xr%2,xr%3,%4"                \
+                 :                                                    \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd),"K"(SFT4));   \
+  } while (0)
+#define D32SLR(xra,xrb,xrc,xrd,SFT4)                                \
+ do {                                                                 \
+    __asm__ __volatile ("D32SLR xr%0,xr%1,xr%2,xr%3,%4"                \
+                 :                                                    \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd),"K"(SFT4));   \
+  } while (0)
+#define D32SAR(xra,xrb,xrc,xrd,SFT4)                                \
+ do {                                                                 \
+    __asm__ __volatile ("D32SAR xr%0,xr%1,xr%2,xr%3,%4"                \
+                 :                                                    \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd),"K"(SFT4));   \
+  } while (0)
+#define D32SARL(xra,xrb,xrc,SFT4)                                    \
+ do {                                                                 \
+    __asm__ __volatile ("D32SARL xr%0,xr%1,xr%2,%3"                    \
+                 :                                                    \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(SFT4));            \
+  } while (0)
+#define D32SLLV(xra,xrd,rb)                                            \
+ do {                                                                 \
+    __asm__ __volatile ("D32SLLV xr%0,xr%1,%z2"                        \
+                 :                                                    \
+                 :"K"(xra),"K"(xrd),"d"(rb));                        \
+  } while (0)
+#define D32SLRV(xra,xrd,rb)                                            \
+ do {                                                                 \
+    __asm__ __volatile ("D32SLRV xr%0,xr%1,%z2"                        \
+                 :                                                    \
+                 :"K"(xra),"K"(xrd),"d"(rb));                        \
+  } while (0)
+#define D32SARV(xra,xrd,rb)                                            \
+ do {                                                                 \
+    __asm__ __volatile ("D32SARV xr%0,xr%1,%z2"                        \
+                 :                                                    \
+                 :"K"(xra),"K"(xrd),"d"(rb));                        \
+  } while (0)
+#define D32SARW(xra,xrb,xrc,rb)                                        \
+ do {                                                                 \
+    __asm__ __volatile ("D32SARW xr%0,xr%1,xr%2,%3"                    \
+                 :                                                    \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"d"(rb));                \
+  } while (0)
+/**********************************Q16SHIFT******************************/
+#define Q16SLL(xra,xrb,xrc,xrd,SFT4)                                \
+    do {                                                             \
+    __asm__ __volatile ("Q16SLL xr%0,xr%1,xr%2,xr%3,%4"                \
+                 :                                                    \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd),"K"(SFT4));    \
+  } while (0)
+#define Q16SLR(xra,xrb,xrc,xrd,SFT4)                                \
+    do {                                                             \
+    __asm__ __volatile ("Q16SLR xr%0,xr%1,xr%2,xr%3,%4"                \
+                 :                                                    \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd),"K"(SFT4));    \
+  } while (0)
+#define Q16SAR(xra,xrb,xrc,xrd,SFT4)                                \
+    do {                                                             \
+    __asm__ __volatile ("Q16SAR xr%0,xr%1,xr%2,xr%3,%4"                \
+                 :                                                    \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd),"K"(SFT4));    \
+  } while (0)
+#define Q16SLLV(xra,xrd,rb)                                            \
+    do {                                                             \
+    __asm__ __volatile ("Q16SLLV xr%0,xr%1,%z2"                        \
+                 :                                                    \
+                 :"K"(xra),"K"(xrd),"d"(rb));                        \
+  } while (0)
+#define Q16SLRV(xra,xrd,rb)                                            \
+    do {                                                             \
+    __asm__ __volatile ("Q16SLRV xr%0,xr%1,%z2"                        \
+                 :                                                    \
+                 :"K"(xra),"K"(xrd),"d"(rb));                        \
+  } while (0)
+#define Q16SARV(xra,xrd,rb)                                            \
+    do {                                                             \
+    __asm__ __volatile ("Q16SARV xr%0,xr%1,%z2"                        \
+                 :                                                    \
+                 :"K"(xra),"K"(xrd),"d"(rb));                        \
+  } while (0)
+/*********************************MAX/MIN*********************************/
+#define S32MAX(xra,xrb,xrc)                                        \
+ do {                                                             \
+    __asm__ __volatile ("S32MAX xr%0,xr%1,xr%2"                    \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc));                  \
+  } while (0)
+#define S32MIN(xra,xrb,xrc)                                        \
+ do {                                                             \
+    __asm__ __volatile ("S32MIN xr%0,xr%1,xr%2"                    \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc));                  \
+  } while (0)
+#define D16MAX(xra,xrb,xrc)                                        \
+ do {                                                             \
+    __asm__ __volatile ("D16MAX xr%0,xr%1,xr%2"                    \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc));                  \
+  } while (0)
+#define D16MIN(xra,xrb,xrc)                                        \
+ do {                                                             \
+    __asm__ __volatile ("D16MIN xr%0,xr%1,xr%2"                    \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc));                  \
+  } while (0)
+#define Q8MAX(xra,xrb,xrc)                                        \
+ do {                                                             \
+    __asm__ __volatile ("Q8MAX xr%0,xr%1,xr%2"                    \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc));                  \
+  } while (0)
+#define Q8MIN(xra,xrb,xrc)                                        \
+ do {                                                             \
+    __asm__ __volatile ("Q8MIN xr%0,xr%1,xr%2"                    \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc));                  \
+  } while (0)
+/*************************************MOVE********************************/
+#define S32I2M(xra,rb)                            \
+  do {                                             \
+    __asm__ __volatile ("S32I2M xr%0,%z1"        \
+           :                                    \
+           :"K"(xra),"d"(rb));                    \
+  } while (0)
+#define S32M2I(xra)                                \
+__extension__ ({                                \
+  int  __d;                                        \
+  __asm__ __volatile ("S32M2I xr%1, %0"            \
+       :"=d"(__d)                                \
+       :"K"(xra));                              \
+  __d;                                             \
+})
+/*********************************S32SFL**********************************/
+#define S32SFL(xra,xrb,xrc,xrd,optn2)                                \
+    do {                                                             \
+    __asm__ __volatile ("S32SFL xr%0,xr%1,xr%2,xr%3,ptn%4"            \
+                 :                                                    \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"K"(xrd),"K"(optn2));    \
+  } while (0)
+/*********************************S32ALN**********************************/
+#define S32ALN(xra,xrb,xrc,rs)                                    \
+ do {                                                             \
+    __asm__ __volatile ("S32ALN xr%0,xr%1,xr%2,%z3"                \
+                 :                                              \
+                 :"K"(xra),"K"(xrb),"K"(xrc),"d"(rs));            \
+  } while (0)
+/*********************************Q16SAT**********************************/
+#define Q16SAT(xra,xrb,xrc)                                    \
+ do {                                                         \
+    __asm__ __volatile ("Q16SAT xr%0,xr%1,xr%2"                \
+                 :                                          \
+                 :"K"(xra),"K"(xrb),"K"(xrc));                \
+  } while (0)
+// cache ops
+// cache
+#define Index_Invalidate_I      0x00
+#define Index_Writeback_Inv_D   0x01
+#define Index_Load_Tag_I        0x04
+#define Index_Load_Tag_D        0x05
+#define Index_Store_Tag_I       0x08
+#define Index_Store_Tag_D       0x09
+#define Hit_Invalidate_I        0x10
+#define Hit_Invalidate_D        0x11
+#define Hit_Writeback_Inv_D     0x15
+#define Hit_Writeback_I         0x18
+#define Hit_Writeback_D         0x19
+// pref
+#define PrefLoad                0
+#define PrefStore               1
+#define PrefLoadStreamed        4
+#define PrefStoreStreamed       5
+#define PrefLoadRetained        6
+#define PrefStoreRetained       7
+#define PrefWBInval             25
+#define PrefNudge               25
+#define PrefPreForStore         30
+#define mips_pref(base, offset, op)          \
+    __asm__ __volatile__(                    \
+    "    .set    noreorder        \n"        \
+    "    pref    %1, %2(%0)       \n"        \
+    "    .set    reorder"                    \
+    :                                        \
+    : "r" (base), "i" (op), "i" (offset))
+#define cache_op(op, addr)                   \
+    __asm__ __volatile__(                    \
+    "    .set    noreorder        \n"        \
+    "    cache    %0, %1          \n"        \
+    "    .set    reorder"                    \
+    :                                        \
+    : "i" (op), "m" (*(unsigned char *)(addr)))
+#define i_pref(hint,base,offset)    \
+    ({ __asm__ __volatile__("pref %0,%2(%1)"::"i"(hint),"r"(base),"i"(offset):"memory");})    
+struct unaligned_32 { unsigned int l; } __attribute__((packed));
+#define LD32(a) (((const struct unaligned_32 *) (a))->l)
+#define ST32(a, b) (((struct unaligned_32 *) (a))->l) = (b)
+#define REVERSE_LD32(xra, xrb, rb, s12)             \
+__extension__ ({                                    \
+  int  __d;                                         \
+    __asm__ __volatile ("S32LDD xr%1,%z3,%4\n\t"    \
+                        "S32SFL  xr%1,xr%1, xr%1, xr%2, ptn0\n\t"    \
+                        "S32SFL  xr%1,xr%2, xr%1, xr%2, ptn3\n\t"    \
+                        "S32SFL  xr%1,xr%2, xr%1, xr%2, ptn2\n\t"    \
+                        "S32M2I  xr%1,%0"                            \
+         :"=d"(__d)                                 \
+         :"K"(xra), "K"(xrb), "d"(rb), "I"(s12));   \
+  __d;                                              \
+})
+#define IU_CLZ(rb)                              \
+__extension__ ({                                \
+  int  __d;                                     \
+  __asm__ __volatile ("clz %0, %1"              \
+       :"=d"(__d)                               \
+       :"d"(rb));                               \
+  __d;                                          \
+})
+#endif /* C_VERSION */
+#endif /* JZ_MXU_H_ */
author	Maurus Cuelenaere <mcuelenaere@gmail.com>	2008-12-31 01:57:07 +0000
committer	Maurus Cuelenaere <mcuelenaere@gmail.com>	2008-12-31 01:57:07 +0000
commit	54919ae9176bd9cbffc8412f0c831f471b8ac4d5 (patch)
tree	3022bfec4d40b896ac889a7df8ac6a1d31aa2ef1
parent	e25505daa48d02f7696f5bd225a60d7bc137b4f7 (diff)
download	rockbox-54919ae9176bd9cbffc8412f0c831f471b8ac4d5.tar.gz rockbox-54919ae9176bd9cbffc8412f0c831f471b8ac4d5.zip