From f40bfc9267b13b54e6379dfe7539447662879d24 Mon Sep 17 00:00:00 2001
From: Sean Bartell <wingedtachikoma@gmail.com>
Date: Sat, 25 Jun 2011 21:32:25 -0400
Subject: Add codecs to librbcodec.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change-Id: Id7f4717d51ed02d67cb9f9cb3c0ada4a81843f97
Reviewed-on: http://gerrit.rockbox.org/137
Reviewed-by: Nils Wallménius <nils@rockbox.org>
Tested-by: Nils Wallménius <nils@rockbox.org>
---
 .../codecs/demac/libdemac/vector_math16_mmx.h      | 234 +++++++++++++++++++++
 1 file changed, 234 insertions(+)
 create mode 100644 lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h

(limited to 'lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h')

diff --git a/lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h b/lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h
new file mode 100644
index 0000000000..2177fe88ea
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h
@@ -0,0 +1,234 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+MMX vector math copyright (C) 2010 Jens Arnold
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#define FUSED_VECTOR_MATH
+
+#define REPEAT_MB3(x, n) x(n) x(n+8) x(n+16)
+#define REPEAT_MB7(x, n) x(n) x(n+8) x(n+16) x(n+24) x(n+32) x(n+40) x(n+48)
+#define REPEAT_MB8(x, n) REPEAT_MB7(x, n) x(n+56)
+
+#if ORDER == 16     /* 3 times */
+#define REPEAT_MB(x) REPEAT_MB3(x, 8) 
+#elif ORDER == 32   /* 7 times */
+#define REPEAT_MB(x) REPEAT_MB7(x, 8) 
+#elif ORDER == 64   /* 5*3 == 15 times */
+#define REPEAT_MB(x) REPEAT_MB3(x,  8) REPEAT_MB3(x, 32) REPEAT_MB3(x, 56) \
+                     REPEAT_MB3(x, 80) REPEAT_MB3(x, 104)
+#elif ORDER == 256  /* 9*7 == 63 times */
+#define REPEAT_MB(x) REPEAT_MB7(x,   8) REPEAT_MB7(x,  64) REPEAT_MB7(x, 120) \
+                     REPEAT_MB7(x, 176) REPEAT_MB7(x, 232) REPEAT_MB7(x, 288) \
+                     REPEAT_MB7(x, 344) REPEAT_MB7(x, 400) REPEAT_MB7(x, 456)
+#elif ORDER == 1280 /* 8*8 == 64 times */
+#define REPEAT_MB(x) REPEAT_MB8(x,   0) REPEAT_MB8(x,  64) REPEAT_MB8(x, 128) \
+                     REPEAT_MB8(x, 192) REPEAT_MB8(x, 256) REPEAT_MB8(x, 320) \
+                     REPEAT_MB8(x, 384) REPEAT_MB8(x, 448)
+#else
+#error unsupported order
+#endif
+
+
+static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t *s2)
+{
+    int res, t;
+#if ORDER > 256
+    int cnt = ORDER>>8;
+#endif
+
+    asm volatile (
+#if ORDER > 256
+        "pxor    %%mm2, %%mm2        \n"
+    "1:                              \n"
+#else
+        "movq    (%[v1]), %%mm2      \n"
+        "movq    %%mm2, %%mm0        \n"
+        "pmaddwd (%[f2]), %%mm2      \n"
+        "paddw   (%[s2]), %%mm0      \n"
+        "movq    %%mm0, (%[v1])      \n"
+#endif
+
+#define SP_ADD_BLOCK(n)                      \
+        "movq    " #n "(%[v1]), %%mm1    \n" \
+        "movq    %%mm1, %%mm0            \n" \
+        "pmaddwd " #n "(%[f2]), %%mm1    \n" \
+        "paddw   " #n "(%[s2]), %%mm0    \n" \
+        "movq    %%mm0, " #n "(%[v1])    \n" \
+        "paddd   %%mm1, %%mm2            \n"
+        
+REPEAT_MB(SP_ADD_BLOCK)
+
+#if ORDER > 256
+        "add     $512, %[v1]         \n"
+        "add     $512, %[s2]         \n"
+        "add     $512, %[f2]         \n"
+        "dec     %[cnt]              \n"
+        "jne     1b                  \n"
+#endif
+
+        "movd    %%mm2, %[t]         \n"
+        "psrlq   $32, %%mm2          \n"
+        "movd    %%mm2, %[res]       \n"
+        "add     %[t], %[res]        \n"
+        : /* outputs */
+#if ORDER > 256
+        [cnt]"+r"(cnt),
+        [s2] "+r"(s2),
+        [res]"=r"(res),
+        [t]  "=r"(t)
+        : /* inputs */
+        [v1]"2"(v1),
+        [f2]"3"(f2)
+#else
+        [res]"=r"(res),
+        [t]  "=r"(t)
+        : /* inputs */
+        [v1]"r"(v1),
+        [f2]"r"(f2),
+        [s2]"r"(s2)
+#endif
+        : /* clobbers */
+        "mm0", "mm1", "mm2"
+    );
+    return res;
+}
+
+static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t *s2)
+{
+    int res, t;
+#if ORDER > 256
+    int cnt = ORDER>>8;
+#endif
+
+    asm volatile (
+#if ORDER > 256
+        "pxor    %%mm2, %%mm2        \n"
+    "1:                              \n"
+#else
+        "movq    (%[v1]), %%mm2      \n"
+        "movq    %%mm2, %%mm0        \n"
+        "pmaddwd (%[f2]), %%mm2      \n"
+        "psubw   (%[s2]), %%mm0      \n"
+        "movq    %%mm0, (%[v1])      \n"
+#endif
+
+#define SP_SUB_BLOCK(n)                      \
+        "movq    " #n "(%[v1]), %%mm1    \n" \
+        "movq    %%mm1, %%mm0            \n" \
+        "pmaddwd " #n "(%[f2]), %%mm1    \n" \
+        "psubw   " #n "(%[s2]), %%mm0    \n" \
+        "movq    %%mm0, " #n "(%[v1])    \n" \
+        "paddd   %%mm1, %%mm2            \n"
+
+REPEAT_MB(SP_SUB_BLOCK)
+
+#if ORDER > 256
+        "add     $512, %[v1]         \n"
+        "add     $512, %[s2]         \n"
+        "add     $512, %[f2]         \n"
+        "dec     %[cnt]              \n"
+        "jne     1b                  \n"
+#endif
+
+        "movd    %%mm2, %[t]         \n"
+        "psrlq   $32, %%mm2          \n"
+        "movd    %%mm2, %[res]       \n"
+        "add     %[t], %[res]        \n"
+        : /* outputs */
+#if ORDER > 256
+        [cnt]"+r"(cnt),
+        [s2] "+r"(s2),
+        [res]"=r"(res),
+        [t]  "=r"(t)
+        : /* inputs */
+        [v1]"2"(v1),
+        [f2]"3"(f2)
+#else
+        [res]"=r"(res),
+        [t]  "=r"(t)
+        : /* inputs */
+        [v1]"r"(v1),
+        [f2]"r"(f2),
+        [s2]"r"(s2)
+#endif
+        : /* clobbers */
+        "mm0", "mm1", "mm2"
+    );
+    return res;
+}
+
+static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
+{
+    int res, t;
+#if ORDER > 256
+    int cnt = ORDER>>8;
+#endif
+               
+    asm volatile (
+#if ORDER > 256
+        "pxor    %%mm1, %%mm1        \n"
+    "1:                              \n"
+#else
+        "movq    (%[v1]), %%mm1      \n"
+        "pmaddwd (%[v2]), %%mm1      \n"
+#endif
+
+#define SP_BLOCK(n)                          \
+        "movq    " #n "(%[v1]), %%mm0    \n" \
+        "pmaddwd " #n "(%[v2]), %%mm0    \n" \
+        "paddd   %%mm0, %%mm1            \n"
+
+REPEAT_MB(SP_BLOCK)
+
+#if ORDER > 256
+        "add     $512, %[v1]         \n"
+        "add     $512, %[v2]         \n"
+        "dec     %[cnt]              \n"
+        "jne     1b                  \n"
+#endif
+
+        "movd    %%mm1, %[t]         \n"
+        "psrlq   $32, %%mm1          \n"
+        "movd    %%mm1, %[res]       \n"
+        "add     %[t], %[res]        \n"
+        : /* outputs */
+#if ORDER > 256
+        [cnt]"+r"(cnt),
+        [res]"=r"(res),
+        [t]  "=r"(t)
+        : /* inputs */
+        [v1]"1"(v1),
+        [v2]"2"(v2)
+#else
+        [res]"=r"(res),
+        [t]  "=r"(t)
+        : /* inputs */
+        [v1]"r"(v1),
+        [v2]"r"(v2)
+#endif
+        : /* clobbers */
+        "mm0", "mm1"
+    );
+    return res;
+}
-- 
cgit v1.2.3