From 1334738071b89982cc23b3a246fc5deba7a6fc3c Mon Sep 17 00:00:00 2001 From: Michael Giacomelli Date: Thu, 17 Jul 2008 17:43:49 +0000 Subject: Use Tremor IMDCT for WMA. Gives a ~20% speedup on ARM and coldfire. All my test samples gave equivilent or better accuracy (>>16 bit), but there may still be problems. Code becomes somewhat less readable since the IMDCT has bits of Vorbis in it, but this is not a serious issue and the old ffmpeg transform remains in SVN if anyone wants it. Also, WMA now passes Vorbis as the fastest transform codec on ARM, although MPC remains the fastest lossy codec overall. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@18084 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/libfaad/SOURCES | 1 + apps/codecs/libfaad/filtbank.c | 9 +- apps/codecs/libwma/SOURCES | 6 +- apps/codecs/libwma/asm_arm.h | 343 ++++++++++++++++++++++++ apps/codecs/libwma/asm_mcf5249.h | 327 +++++++++++++++++++++++ apps/codecs/libwma/mdct2.c | 522 +++++++++++++++++++++++++++++++++++++ apps/codecs/libwma/mdct2.h | 85 ++++++ apps/codecs/libwma/mdct_arm.S | 429 ++++++++++++++++++++++++++++++ apps/codecs/libwma/mdct_lookup.h | 544 +++++++++++++++++++++++++++++++++++++++ apps/codecs/libwma/misc.h | 291 +++++++++++++++++++++ apps/codecs/libwma/wmadeci.c | 35 ++- apps/codecs/libwma/wmafixed.h | 64 ----- apps/codecs/wma.c | 6 +- apps/debug_menu.c | 12 +- apps/plugins/SOURCES | 1 + 15 files changed, 2584 insertions(+), 91 deletions(-) create mode 100644 apps/codecs/libwma/asm_arm.h create mode 100644 apps/codecs/libwma/asm_mcf5249.h create mode 100644 apps/codecs/libwma/mdct2.c create mode 100644 apps/codecs/libwma/mdct2.h create mode 100644 apps/codecs/libwma/mdct_arm.S create mode 100644 apps/codecs/libwma/mdct_lookup.h create mode 100644 apps/codecs/libwma/misc.h diff --git a/apps/codecs/libfaad/SOURCES b/apps/codecs/libfaad/SOURCES index 4c7a34d210..f97e14d9e4 100644 --- a/apps/codecs/libfaad/SOURCES +++ b/apps/codecs/libfaad/SOURCES @@ -33,3 +33,4 @@ sbr_tf_grid.c specrec.c syntax.c tns.c +mdct2.c \ No newline at end of file diff --git a/apps/codecs/libfaad/filtbank.c b/apps/codecs/libfaad/filtbank.c index 9b1bc854ad..724110ecc5 100644 --- a/apps/codecs/libfaad/filtbank.c +++ b/apps/codecs/libfaad/filtbank.c @@ -1,19 +1,19 @@ /* ** FAAD2 - Freeware Advanced Audio (AAC) Decoder including SBR decoding ** Copyright (C) 2003-2004 M. Bakker, Ahead Software AG, http://www.nero.com -** +** ** This program is free software; you can redistribute it and/or modify ** it under the terms of the GNU General Public License as published by ** the Free Software Foundation; either version 2 of the License, or ** (at your option) any later version. -** +** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ** GNU General Public License for more details. -** +** ** You should have received a copy of the GNU General Public License -** along with this program; if not, write to the Free Software +** along with this program; if not, write to the Free Software ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ** ** Any non-GPL usage of this software or parts of this software is strictly @@ -42,6 +42,7 @@ #include "kbd_win.h" #include "sine_win.h" #include "mdct.h" +#include "mdct2.h" fb_info *filter_bank_init(uint16_t frame_len) diff --git a/apps/codecs/libwma/SOURCES b/apps/codecs/libwma/SOURCES index b9d4cc1882..213e1da12e 100644 --- a/apps/codecs/libwma/SOURCES +++ b/apps/codecs/libwma/SOURCES @@ -1,5 +1,7 @@ wmadeci.c wmafixed.c bitstream.c -fft.c -mdct.c +mdct2.c +#ifdef CPU_ARM +mdct_arm.S +#endif \ No newline at end of file diff --git a/apps/codecs/libwma/asm_arm.h b/apps/codecs/libwma/asm_arm.h new file mode 100644 index 0000000000..e0ab3e7e20 --- /dev/null +++ b/apps/codecs/libwma/asm_arm.h @@ -0,0 +1,343 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. * + * * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 * + * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ * + * * + ******************************************************************** + + function: arm7 and later wide math functions + + ********************************************************************/ +#ifdef CPU_ARM + +#if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_) +#define _V_WIDE_MATH + +static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) { + int lo,hi; + asm volatile("smull\t%0, %1, %2, %3" + : "=&r"(lo),"=&r"(hi) + : "%r"(x),"r"(y) + : "cc"); + return(hi); +} + +static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) { + return MULT32(x,y)<<1; +} + +static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) { + int lo,hi; + asm volatile("smull %0, %1, %2, %3\n\t" + "movs %0, %0, lsr #15\n\t" + "adc %1, %0, %1, lsl #17\n\t" + : "=&r"(lo),"=&r"(hi) + : "%r"(x),"r"(y) + : "cc"); + return(hi); +} + +#define MB() asm volatile ("" : : : "memory") + +#define XPROD32(a, b, t, v, x, y) \ +{ \ + long l; \ + asm( "smull %0, %1, %4, %6\n\t" \ + "smlal %0, %1, %5, %7\n\t" \ + "rsb %3, %4, #0\n\t" \ + "smull %0, %2, %5, %6\n\t" \ + "smlal %0, %2, %3, %7" \ + : "=&r" (l), "=&r" (x), "=&r" (y), "=r" ((a)) \ + : "3" ((a)), "r" ((b)), "r" ((t)), "r" ((v)) \ + : "cc" ); \ +} + +static inline void XPROD31(ogg_int32_t a, ogg_int32_t b, + ogg_int32_t t, ogg_int32_t v, + ogg_int32_t *x, ogg_int32_t *y) +{ + int x1, y1, l; + asm( "smull %0, %1, %4, %6\n\t" + "smlal %0, %1, %5, %7\n\t" + "rsb %3, %4, #0\n\t" + "smull %0, %2, %5, %6\n\t" + "smlal %0, %2, %3, %7" + : "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a) + : "3" (a), "r" (b), "r" (t), "r" (v) + : "cc" ); + *x = x1 << 1; + MB(); + *y = y1 << 1; +} + +static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b, + ogg_int32_t t, ogg_int32_t v, + ogg_int32_t *x, ogg_int32_t *y) +{ + int x1, y1, l; + asm( "rsb %2, %4, #0\n\t" + "smull %0, %1, %3, %5\n\t" + "smlal %0, %1, %2, %6\n\t" + "smull %0, %2, %4, %5\n\t" + "smlal %0, %2, %3, %6" + : "=&r" (l), "=&r" (x1), "=&r" (y1) + : "r" (a), "r" (b), "r" (t), "r" (v) + : "cc" ); + *x = x1 << 1; + MB(); + *y = y1 << 1; +} + +#ifndef _V_VECT_OPS +#define _V_VECT_OPS + +/* asm versions of vector operations for block.c, window.c */ +static inline +void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n) +{ + while (n>=4) { + asm volatile ("ldmia %[x], {r0, r1, r2, r3};" + "ldmia %[y]!, {r4, r5, r6, r7};" + "add r0, r0, r4;" + "add r1, r1, r5;" + "add r2, r2, r6;" + "add r3, r3, r7;" + "stmia %[x]!, {r0, r1, r2, r3};" + : [x] "+r" (x), [y] "+r" (y) + : : "r0", "r1", "r2", "r3", + "r4", "r5", "r6", "r7", + "memory"); + n -= 4; + } + /* add final elements */ + while (n>0) { + *x++ += *y++; + n--; + } +} + +static inline +void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n) +{ + while (n>=4) { + asm volatile ("ldmia %[y]!, {r0, r1, r2, r3};" + "stmia %[x]!, {r0, r1, r2, r3};" + : [x] "+r" (x), [y] "+r" (y) + : : "r0", "r1", "r2", "r3", + "memory"); + n -= 4; + } + /* copy final elements */ + while (n>0) { + *x++ = *y++; + n--; + } +} + +static inline +void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) +{ + while (n>=4) { + asm volatile ("ldmia %[d], {r0, r1, r2, r3};" + "ldmia %[w]!, {r4, r5, r6, r7};" + "smull r8, r9, r0, r4;" + "mov r0, r9, lsl #1;" + "smull r8, r9, r1, r5;" + "mov r1, r9, lsl #1;" + "smull r8, r9, r2, r6;" + "mov r2, r9, lsl #1;" + "smull r8, r9, r3, r7;" + "mov r3, r9, lsl #1;" + "stmia %[d]!, {r0, r1, r2, r3};" + : [d] "+r" (data), [w] "+r" (window) + : : "r0", "r1", "r2", "r3", + "r4", "r5", "r6", "r7", "r8", "r9", + "memory", "cc"); + n -= 4; + } + while(n>0) { + *data = MULT31(*data, *window); + data++; + window++; + n--; + } +} + +static inline +void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) +{ + while (n>=4) { + asm volatile ("ldmia %[d], {r0, r1, r2, r3};" + "ldmda %[w]!, {r4, r5, r6, r7};" + "smull r8, r9, r0, r7;" + "mov r0, r9, lsl #1;" + "smull r8, r9, r1, r6;" + "mov r1, r9, lsl #1;" + "smull r8, r9, r2, r5;" + "mov r2, r9, lsl #1;" + "smull r8, r9, r3, r4;" + "mov r3, r9, lsl #1;" + "stmia %[d]!, {r0, r1, r2, r3};" + : [d] "+r" (data), [w] "+r" (window) + : : "r0", "r1", "r2", "r3", + "r4", "r5", "r6", "r7", "r8", "r9", + "memory", "cc"); + n -= 4; + } + while(n>0) { + *data = MULT31(*data, *window); + data++; + window--; + n--; + } +} + +#endif + +#endif + +#ifndef _V_CLIP_MATH +#define _V_CLIP_MATH + +static inline ogg_int32_t CLIP_TO_15(ogg_int32_t x) { + int tmp; + asm volatile("subs %1, %0, #32768\n\t" + "movpl %0, #0x7f00\n\t" + "orrpl %0, %0, #0xff\n" + "adds %1, %0, #32768\n\t" + "movmi %0, #0x8000" + : "+r"(x),"=r"(tmp) + : + : "cc"); + return(x); +} + +#endif + +#ifndef _V_LSP_MATH_ASM +#define _V_LSP_MATH_ASM + +static inline void lsp_loop_asm(ogg_uint32_t *qip,ogg_uint32_t *pip, + ogg_int32_t *qexpp, + ogg_int32_t *ilsp,ogg_int32_t wi, + ogg_int32_t m){ + + ogg_uint32_t qi=*qip,pi=*pip; + ogg_int32_t qexp=*qexpp; + + asm("mov r0,%3;" + "mov r1,%5,asr#1;" + "add r0,r0,r1,lsl#3;" + "1:" + + "ldmdb r0!,{r1,r3};" + "subs r1,r1,%4;" //ilsp[j]-wi + "rsbmi r1,r1,#0;" //labs(ilsp[j]-wi) + "umull %0,r2,r1,%0;" //qi*=labs(ilsp[j]-wi) + + "subs r1,r3,%4;" //ilsp[j+1]-wi + "rsbmi r1,r1,#0;" //labs(ilsp[j+1]-wi) + "umull %1,r3,r1,%1;" //pi*=labs(ilsp[j+1]-wi) + + "cmn r2,r3;" // shift down 16? + "beq 0f;" + "add %2,%2,#16;" + "mov %0,%0,lsr #16;" + "orr %0,%0,r2,lsl #16;" + "mov %1,%1,lsr #16;" + "orr %1,%1,r3,lsl #16;" + "0:" + "cmp r0,%3;\n" + "bhi 1b;\n" + + // odd filter assymetry + "ands r0,%5,#1;\n" + "beq 2f;\n" + "add r0,%3,%5,lsl#2;\n" + + "ldr r1,[r0,#-4];\n" + "mov r0,#0x4000;\n" + + "subs r1,r1,%4;\n" //ilsp[j]-wi + "rsbmi r1,r1,#0;\n" //labs(ilsp[j]-wi) + "umull %0,r2,r1,%0;\n" //qi*=labs(ilsp[j]-wi) + "umull %1,r3,r0,%1;\n" //pi*=labs(ilsp[j+1]-wi) + + "cmn r2,r3;\n" // shift down 16? + "beq 2f;\n" + "add %2,%2,#16;\n" + "mov %0,%0,lsr #16;\n" + "orr %0,%0,r2,lsl #16;\n" + "mov %1,%1,lsr #16;\n" + "orr %1,%1,r3,lsl #16;\n" + + //qi=(pi>>shift)*labs(ilsp[j]-wi); + //pi=(qi>>shift)*labs(ilsp[j+1]-wi); + //qexp+=shift; + + //} + + /* normalize to max 16 sig figs */ + "2:" + "mov r2,#0;" + "orr r1,%0,%1;" + "tst r1,#0xff000000;" + "addne r2,r2,#8;" + "movne r1,r1,lsr #8;" + "tst r1,#0x00f00000;" + "addne r2,r2,#4;" + "movne r1,r1,lsr #4;" + "tst r1,#0x000c0000;" + "addne r2,r2,#2;" + "movne r1,r1,lsr #2;" + "tst r1,#0x00020000;" + "addne r2,r2,#1;" + "movne r1,r1,lsr #1;" + "tst r1,#0x00010000;" + "addne r2,r2,#1;" + "mov %0,%0,lsr r2;" + "mov %1,%1,lsr r2;" + "add %2,%2,r2;" + + : "+r"(qi),"+r"(pi),"+r"(qexp) + : "r"(ilsp),"r"(wi),"r"(m) + : "r0","r1","r2","r3","cc"); + + *qip=qi; + *pip=pi; + *qexpp=qexp; +} + +static inline void lsp_norm_asm(ogg_uint32_t *qip,ogg_int32_t *qexpp){ + + ogg_uint32_t qi=*qip; + ogg_int32_t qexp=*qexpp; + + asm("tst %0,#0x0000ff00;" + "moveq %0,%0,lsl #8;" + "subeq %1,%1,#8;" + "tst %0,#0x0000f000;" + "moveq %0,%0,lsl #4;" + "subeq %1,%1,#4;" + "tst %0,#0x0000c000;" + "moveq %0,%0,lsl #2;" + "subeq %1,%1,#2;" + "tst %0,#0x00008000;" + "moveq %0,%0,lsl #1;" + "subeq %1,%1,#1;" + : "+r"(qi),"+r"(qexp) + : + : "cc"); + *qip=qi; + *qexpp=qexp; +} + +#endif +#endif + diff --git a/apps/codecs/libwma/asm_mcf5249.h b/apps/codecs/libwma/asm_mcf5249.h new file mode 100644 index 0000000000..6b5c600685 --- /dev/null +++ b/apps/codecs/libwma/asm_mcf5249.h @@ -0,0 +1,327 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * + * Copyright (C) 2005 by Pedro Vasconcelos + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ +/* asm routines for wide math on the MCF5249 */ + +//#include "os_types.h" + +#if defined(CPU_COLDFIRE) + +/* attribute for 16-byte alignment */ +#define LINE_ATTR __attribute__ ((aligned (16))) + +#ifndef _V_WIDE_MATH +#define _V_WIDE_MATH + +#define MB() + +static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) { + + asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply & shift */ + "movclr.l %%acc0, %[x];" /* move & clear acc */ + "asr.l #1, %[x];" /* no overflow test */ + : [x] "+&d" (x) + : [y] "r" (y) + : "cc"); + return x; +} + +static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) { + + asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */ + "movclr.l %%acc0, %[x];" /* move and clear */ + : [x] "+&r" (x) + : [y] "r" (y) + : "cc"); + return x; +} + + +static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) { + ogg_int32_t r; + + asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */ + "mulu.l %[y], %[x];" /* get lower half, avoid emac stall */ + "movclr.l %%acc0, %[r];" /* get higher half */ + "asl.l #8, %[r];" /* hi<<16, plus one free */ + "asl.l #8, %[r];" + "lsr.l #8, %[x];" /* (unsigned)lo >> 15 */ + "lsr.l #7, %[x];" + "or.l %[x], %[r];" /* logical-or results */ + : [r] "=&d" (r), [x] "+d" (x) + : [y] "d" (y) + : "cc"); + return r; +} + + +static inline +void XPROD31(ogg_int32_t a, ogg_int32_t b, + ogg_int32_t t, ogg_int32_t v, + ogg_int32_t *x, ogg_int32_t *y) +{ + asm volatile ("mac.l %[a], %[t], %%acc0;" + "mac.l %[b], %[v], %%acc0;" + "mac.l %[b], %[t], %%acc1;" + "msac.l %[a], %[v], %%acc1;" + "movclr.l %%acc0, %[a];" + "move.l %[a], (%[x]);" + "movclr.l %%acc1, %[a];" + "move.l %[a], (%[y]);" + : [a] "+&r" (a) + : [x] "a" (x), [y] "a" (y), + [b] "r" (b), [t] "r" (t), [v] "r" (v) + : "cc", "memory"); +} + + +static inline +void XNPROD31(ogg_int32_t a, ogg_int32_t b, + ogg_int32_t t, ogg_int32_t v, + ogg_int32_t *x, ogg_int32_t *y) +{ + asm volatile ("mac.l %[a], %[t], %%acc0;" + "msac.l %[b], %[v], %%acc0;" + "mac.l %[b], %[t], %%acc1;" + "mac.l %[a], %[v], %%acc1;" + "movclr.l %%acc0, %[a];" + "move.l %[a], (%[x]);" + "movclr.l %%acc1, %[a];" + "move.l %[a], (%[y]);" + : [a] "+&r" (a) + : [x] "a" (x), [y] "a" (y), + [b] "r" (b), [t] "r" (t), [v] "r" (v) + : "cc", "memory"); +} + + +#if 0 /* canonical Tremor definition */ +#define XPROD32(_a, _b, _t, _v, _x, _y) \ + { (_x)=MULT32(_a,_t)+MULT32(_b,_v); \ + (_y)=MULT32(_b,_t)-MULT32(_a,_v); } +#endif + +/* this could lose the LSB by overflow, but i don't think it'll ever happen. + if anyone think they can hear a bug caused by this, please try the above + version. */ +#define XPROD32(_a, _b, _t, _v, _x, _y) \ + asm volatile ("mac.l %[a], %[t], %%acc0;" \ + "mac.l %[b], %[v], %%acc0;" \ + "mac.l %[b], %[t], %%acc1;" \ + "msac.l %[a], %[v], %%acc1;" \ + "movclr.l %%acc0, %[x];" \ + "asr.l #1, %[x];" \ + "movclr.l %%acc1, %[y];" \ + "asr.l #1, %[y];" \ + : [x] "=&d" (_x), [y] "=&d" (_y) \ + : [a] "r" (_a), [b] "r" (_b), \ + [t] "r" (_t), [v] "r" (_v) \ + : "cc"); + +#ifndef _V_VECT_OPS +#define _V_VECT_OPS + +/* asm versions of vector operations for block.c, window.c */ +/* assumes MAC is initialized & accumulators cleared */ +static inline +void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n) +{ + /* align to 16 bytes */ + while(n>0 && (int)x&16) { + *x++ += *y++; + n--; + } + asm volatile ("bra 1f;" + "0:" /* loop start */ + "movem.l (%[x]), %%d0-%%d3;" /* fetch values */ + "movem.l (%[y]), %%a0-%%a3;" + /* add */ + "add.l %%a0, %%d0;" + "add.l %%a1, %%d1;" + "add.l %%a2, %%d2;" + "add.l %%a3, %%d3;" + /* store and advance */ + "movem.l %%d0-%%d3, (%[x]);" + "lea.l (4*4, %[x]), %[x];" + "lea.l (4*4, %[y]), %[y];" + "subq.l #4, %[n];" /* done 4 elements */ + "1: cmpi.l #4, %[n];" + "bge 0b;" + : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y) + : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3", + "cc", "memory"); + /* add final elements */ + while (n>0) { + *x++ += *y++; + n--; + } +} + +static inline +void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n) +{ + /* align to 16 bytes */ + while(n>0 && (int)x&16) { + *x++ = *y++; + n--; + } + asm volatile ("bra 1f;" + "0:" /* loop start */ + "movem.l (%[y]), %%d0-%%d3;" /* fetch values */ + "movem.l %%d0-%%d3, (%[x]);" /* store */ + "lea.l (4*4, %[x]), %[x];" /* advance */ + "lea.l (4*4, %[y]), %[y];" + "subq.l #4, %[n];" /* done 4 elements */ + "1: cmpi.l #4, %[n];" + "bge 0b;" + : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y) + : : "%d0", "%d1", "%d2", "%d3", "cc", "memory"); + /* copy final elements */ + while (n>0) { + *x++ = *y++; + n--; + } +} + + +static inline +void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) +{ + /* ensure data is aligned to 16-bytes */ + while(n>0 && (int)data%16) { + *data = MULT31(*data, *window); + data++; + window++; + n--; + } + asm volatile ("movem.l (%[d]), %%d0-%%d3;" /* loop start */ + "movem.l (%[w]), %%a0-%%a3;" /* pre-fetch registers */ + "lea.l (4*4, %[w]), %[w];" + "bra 1f;" /* jump to loop condition */ + "0:" /* loop body */ + /* multiply and load next window values */ + "mac.l %%d0, %%a0, (%[w])+, %%a0, %%acc0;" + "mac.l %%d1, %%a1, (%[w])+, %%a1, %%acc1;" + "mac.l %%d2, %%a2, (%[w])+, %%a2, %%acc2;" + "mac.l %%d3, %%a3, (%[w])+, %%a3, %%acc3;" + "movclr.l %%acc0, %%d0;" /* get the products */ + "movclr.l %%acc1, %%d1;" + "movclr.l %%acc2, %%d2;" + "movclr.l %%acc3, %%d3;" + /* store and advance */ + "movem.l %%d0-%%d3, (%[d]);" + "lea.l (4*4, %[d]), %[d];" + "movem.l (%[d]), %%d0-%%d3;" + "subq.l #4, %[n];" /* done 4 elements */ + "1: cmpi.l #4, %[n];" + "bge 0b;" + /* multiply final elements */ + "tst.l %[n];" + "beq 1f;" /* n=0 */ + "mac.l %%d0, %%a0, %%acc0;" + "movclr.l %%acc0, %%d0;" + "move.l %%d0, (%[d])+;" + "subq.l #1, %[n];" + "beq 1f;" /* n=1 */ + "mac.l %%d1, %%a1, %%acc0;" + "movclr.l %%acc0, %%d1;" + "move.l %%d1, (%[d])+;" + "subq.l #1, %[n];" + "beq 1f;" /* n=2 */ + /* otherwise n = 3 */ + "mac.l %%d2, %%a2, %%acc0;" + "movclr.l %%acc0, %%d2;" + "move.l %%d2, (%[d])+;" + "1:" + : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window) + : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3", + "cc", "memory"); +} + +static inline +void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) +{ + /* ensure at least data is aligned to 16-bytes */ + while(n>0 && (int)data%16) { + *data = MULT31(*data, *window); + data++; + window--; + n--; + } + asm volatile ("lea.l (-3*4, %[w]), %[w];" /* loop start */ + "movem.l (%[d]), %%d0-%%d3;" /* pre-fetch registers */ + "movem.l (%[w]), %%a0-%%a3;" + "bra 1f;" /* jump to loop condition */ + "0:" /* loop body */ + /* multiply and load next window value */ + "mac.l %%d0, %%a3, -(%[w]), %%a3, %%acc0;" + "mac.l %%d1, %%a2, -(%[w]), %%a2, %%acc1;" + "mac.l %%d2, %%a1, -(%[w]), %%a1, %%acc2;" + "mac.l %%d3, %%a0, -(%[w]), %%a0, %%acc3;" + "movclr.l %%acc0, %%d0;" /* get the products */ + "movclr.l %%acc1, %%d1;" + "movclr.l %%acc2, %%d2;" + "movclr.l %%acc3, %%d3;" + /* store and advance */ + "movem.l %%d0-%%d3, (%[d]);" + "lea.l (4*4, %[d]), %[d];" + "movem.l (%[d]), %%d0-%%d3;" + "subq.l #4, %[n];" /* done 4 elements */ + "1: cmpi.l #4, %[n];" + "bge 0b;" + /* multiply final elements */ + "tst.l %[n];" + "beq 1f;" /* n=0 */ + "mac.l %%d0, %%a3, %%acc0;" + "movclr.l %%acc0, %%d0;" + "move.l %%d0, (%[d])+;" + "subq.l #1, %[n];" + "beq 1f;" /* n=1 */ + "mac.l %%d1, %%a2, %%acc0;" + "movclr.l %%acc0, %%d1;" + "move.l %%d1, (%[d])+;" + "subq.l #1, %[n];" + "beq 1f;" /* n=2 */ + /* otherwise n = 3 */ + "mac.l %%d2, %%a1, %%acc0;" + "movclr.l %%acc0, %%d2;" + "move.l %%d2, (%[d])+;" + "1:" + : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window) + : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3", + "cc", "memory"); +} + +#endif + +#endif + +#ifndef _V_CLIP_MATH +#define _V_CLIP_MATH + +/* this is portable C and simple; why not use this as default? */ +static inline ogg_int32_t CLIP_TO_15(register ogg_int32_t x) { + register ogg_int32_t hi=32767, lo=-32768; + return (x>=hi ? hi : (x<=lo ? lo : x)); +} + +#endif +#else +#define LINE_ATTR +#endif diff --git a/apps/codecs/libwma/mdct2.c b/apps/codecs/libwma/mdct2.c new file mode 100644 index 0000000000..c492403b41 --- /dev/null +++ b/apps/codecs/libwma/mdct2.c @@ -0,0 +1,522 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. * + * * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 * + * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ * + * * + ******************************************************************** + + function: normalized modified discrete cosine transform + power of two length transform only [64 <= n ] + + + Original algorithm adapted long ago from _The use of multirate filter + banks for coding of high quality digital audio_, by T. Sporer, + K. Brandenburg and B. Edler, collection of the European Signal + Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp + 211-214 + + The below code implements an algorithm that no longer looks much like + that presented in the paper, but the basic structure remains if you + dig deep enough to see it. + + This module DOES NOT INCLUDE code to generate/apply the window + function. Everybody has their own weird favorite including me... I + happen to like the properties of y=sin(.5PI*sin^2(x)), but others may + vehemently disagree. + + ********************************************************************/ + +/*Tremor IMDCT adapted for use with libwmai*/ + + +#include "mdct2.h" +#include "mdct_lookup.h" +#include + +#if defined(CPU_ARM) && CONFIG_CPU != S3C2440 +/* C code is faster on S3C2440 */ + +extern void mdct_butterfly_32(ogg_int32_t *x); +extern void mdct_butterfly_generic_loop(ogg_int32_t *x1, ogg_int32_t *x2, + LOOKUP_T *T0, int step, + LOOKUP_T *Ttop); + +static inline void mdct_butterfly_generic(ogg_int32_t *x,int points, int step){ + mdct_butterfly_generic_loop(x + points, x + (points>>1), + sincos_lookup0, step, sincos_lookup0+1024); +} + +#else + +/* 8 point butterfly (in place) */ +static inline void mdct_butterfly_8(ogg_int32_t *x){ + register ogg_int32_t r0 = x[4] + x[0]; + register ogg_int32_t r1 = x[4] - x[0]; + register ogg_int32_t r2 = x[5] + x[1]; + register ogg_int32_t r3 = x[5] - x[1]; + register ogg_int32_t r4 = x[6] + x[2]; + register ogg_int32_t r5 = x[6] - x[2]; + register ogg_int32_t r6 = x[7] + x[3]; + register ogg_int32_t r7 = x[7] - x[3]; + + x[0] = r5 + r3; + x[1] = r7 - r1; + x[2] = r5 - r3; + x[3] = r7 + r1; + x[4] = r4 - r0; + x[5] = r6 - r2; + x[6] = r4 + r0; + x[7] = r6 + r2; + MB(); +} + +/* 16 point butterfly (in place, 4 register) */ +static inline void mdct_butterfly_16(ogg_int32_t *x){ + + register ogg_int32_t r0, r1; + + r0 = x[ 0] - x[ 8]; x[ 8] += x[ 0]; + r1 = x[ 1] - x[ 9]; x[ 9] += x[ 1]; + x[ 0] = MULT31((r0 + r1) , cPI2_8); + x[ 1] = MULT31((r1 - r0) , cPI2_8); + MB(); + + r0 = x[10] - x[ 2]; x[10] += x[ 2]; + r1 = x[ 3] - x[11]; x[11] += x[ 3]; + x[ 2] = r1; x[ 3] = r0; + MB(); + + r0 = x[12] - x[ 4]; x[12] += x[ 4]; + r1 = x[13] - x[ 5]; x[13] += x[ 5]; + x[ 4] = MULT31((r0 - r1) , cPI2_8); + x[ 5] = MULT31((r0 + r1) , cPI2_8); + MB(); + + r0 = x[14] - x[ 6]; x[14] += x[ 6]; + r1 = x[15] - x[ 7]; x[15] += x[ 7]; + x[ 6] = r0; x[ 7] = r1; + MB(); + + mdct_butterfly_8(x); + mdct_butterfly_8(x+8); +} + +/* 32 point butterfly (in place, 4 register) */ +static inline void mdct_butterfly_32(ogg_int32_t *x){ + + register ogg_int32_t r0, r1; + + r0 = x[30] - x[14]; x[30] += x[14]; + r1 = x[31] - x[15]; x[31] += x[15]; + x[14] = r0; x[15] = r1; + MB(); + + r0 = x[28] - x[12]; x[28] += x[12]; + r1 = x[29] - x[13]; x[29] += x[13]; + XNPROD31( r0, r1, cPI1_8, cPI3_8, &x[12], &x[13] ); + MB(); + + r0 = x[26] - x[10]; x[26] += x[10]; + r1 = x[27] - x[11]; x[27] += x[11]; + x[10] = MULT31((r0 - r1) , cPI2_8); + x[11] = MULT31((r0 + r1) , cPI2_8); + MB(); + + r0 = x[24] - x[ 8]; x[24] += x[ 8]; + r1 = x[25] - x[ 9]; x[25] += x[ 9]; + XNPROD31( r0, r1, cPI3_8, cPI1_8, &x[ 8], &x[ 9] ); + MB(); + + r0 = x[22] - x[ 6]; x[22] += x[ 6]; + r1 = x[ 7] - x[23]; x[23] += x[ 7]; + x[ 6] = r1; x[ 7] = r0; + MB(); + + r0 = x[ 4] - x[20]; x[20] += x[ 4]; + r1 = x[ 5] - x[21]; x[21] += x[ 5]; + XPROD31 ( r0, r1, cPI3_8, cPI1_8, &x[ 4], &x[ 5] ); + MB(); + + r0 = x[ 2] - x[18]; x[18] += x[ 2]; + r1 = x[ 3] - x[19]; x[19] += x[ 3]; + x[ 2] = MULT31((r1 + r0) , cPI2_8); + x[ 3] = MULT31((r1 - r0) , cPI2_8); + MB(); + + r0 = x[ 0] - x[16]; x[16] += x[ 0]; + r1 = x[ 1] - x[17]; x[17] += x[ 1]; + XPROD31 ( r0, r1, cPI1_8, cPI3_8, &x[ 0], &x[ 1] ); + MB(); + + mdct_butterfly_16(x); + mdct_butterfly_16(x+16); +} + +/* N/stage point generic N stage butterfly (in place, 4 register) */ +void mdct_butterfly_generic(ogg_int32_t *x,int points, int step) + ICODE_ATTR_TREMOR_MDCT; +void mdct_butterfly_generic(ogg_int32_t *x,int points, int step){ + LOOKUP_T *T = sincos_lookup0; + ogg_int32_t *x1 = x + points - 8; + ogg_int32_t *x2 = x + (points>>1) - 8; + register ogg_int32_t r0; + register ogg_int32_t r1; + register ogg_int32_t r2; + register ogg_int32_t r3; + + do{ + r0 = x1[6] - x2[6]; x1[6] += x2[6]; + r1 = x2[7] - x1[7]; x1[7] += x2[7]; + r2 = x1[4] - x2[4]; x1[4] += x2[4]; + r3 = x2[5] - x1[5]; x1[5] += x2[5]; + XPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T+=step; + XPROD31( r3, r2, T[0], T[1], &x2[4], &x2[5] ); T+=step; + + r0 = x1[2] - x2[2]; x1[2] += x2[2]; + r1 = x2[3] - x1[3]; x1[3] += x2[3]; + r2 = x1[0] - x2[0]; x1[0] += x2[0]; + r3 = x2[1] - x1[1]; x1[1] += x2[1]; + XPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T+=step; + XPROD31( r3, r2, T[0], T[1], &x2[0], &x2[1] ); T+=step; + + x1-=8; x2-=8; + }while(Tsincos_lookup0); + do{ + r0 = x2[6] - x1[6]; x1[6] += x2[6]; + r1 = x2[7] - x1[7]; x1[7] += x2[7]; + r2 = x2[4] - x1[4]; x1[4] += x2[4]; + r3 = x2[5] - x1[5]; x1[5] += x2[5]; + XPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T+=step; + XPROD31( r2, r3, T[0], T[1], &x2[4], &x2[5] ); T+=step; + + r0 = x2[2] - x1[2]; x1[2] += x2[2]; + r1 = x2[3] - x1[3]; x1[3] += x2[3]; + r2 = x2[0] - x1[0]; x1[0] += x2[0]; + r3 = x2[1] - x1[1]; x1[1] += x2[1]; + XPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T+=step; + XPROD31( r2, r3, T[0], T[1], &x2[0], &x2[1] ); T+=step; + + x1-=8; x2-=8; + }while(Tsincos_lookup0); +} + +#endif /* CPU_ARM */ + +static inline void mdct_butterflies(ogg_int32_t *x,int points,int shift) { + + int stages=8-shift; + int i,j; + + for(i=0;--stages>0;i++){ + for(j=0;j<(1<>i)*j,points>>i,4<<(i+shift)); + } + + for(j=0;j>8]|(bitrev[(x&0x0f0)>>4]<<4)|(((int)bitrev[x&0x00f])<<8); +} + +static inline void mdct_bitreverse(ogg_int32_t *x,int n,int step,int shift) { + + int bit = 0; + ogg_int32_t *w0 = x; + ogg_int32_t *w1 = x = w0+(n>>1); + LOOKUP_T *T = (step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1; + LOOKUP_T *Ttop = T+1024; + register ogg_int32_t r2; + + do{ + register ogg_int32_t r3 = bitrev12(bit++); + ogg_int32_t *x0 = x + ((r3 ^ 0xfff)>>shift) -1; + ogg_int32_t *x1 = x + (r3>>shift); + + register ogg_int32_t r0 = x0[0] + x1[0]; + register ogg_int32_t r1 = x1[1] - x0[1]; + + XPROD32( r0, r1, T[1], T[0], r2, r3 ); T+=step; + + w1 -= 4; + + r0 = (x0[1] + x1[1])>>1; + r1 = (x0[0] - x1[0])>>1; + w0[0] = r0 + r2; + w0[1] = r1 + r3; + w1[2] = r0 - r2; + w1[3] = r3 - r1; + + r3 = bitrev12(bit++); + x0 = x + ((r3 ^ 0xfff)>>shift) -1; + x1 = x + (r3>>shift); + + r0 = x0[0] + x1[0]; + r1 = x1[1] - x0[1]; + + XPROD32( r0, r1, T[1], T[0], r2, r3 ); T+=step; + + r0 = (x0[1] + x1[1])>>1; + r1 = (x0[0] - x1[0])>>1; + w0[2] = r0 + r2; + w0[3] = r1 + r3; + w1[0] = r0 - r2; + w1[1] = r3 - r1; + + w0 += 4; + }while(T>shift) -1; + ogg_int32_t *x1 = x + (r3>>shift); + + register ogg_int32_t r0 = x0[0] + x1[0]; + register ogg_int32_t r1 = x1[1] - x0[1]; + + T-=step; XPROD32( r0, r1, T[0], T[1], r2, r3 ); + + w1 -= 4; + + r0 = (x0[1] + x1[1])>>1; + r1 = (x0[0] - x1[0])>>1; + w0[0] = r0 + r2; + w0[1] = r1 + r3; + w1[2] = r0 - r2; + w1[3] = r3 - r1; + + r3 = bitrev12(bit++); + x0 = x + ((r3 ^ 0xfff)>>shift) -1; + x1 = x + (r3>>shift); + + r0 = x0[0] + x1[0]; + r1 = x1[1] - x0[1]; + + T-=step; XPROD32( r0, r1, T[0], T[1], r2, r3 ); + + r0 = (x0[1] + x1[1])>>1; + r1 = (x0[0] - x1[0])>>1; + w0[2] = r0 + r2; + w0[3] = r1 + r3; + w1[0] = r0 - r2; + w1[1] = r3 - r1; + + w0 += 4; + }while(w0>1; + int n4=n>>2; + ogg_int32_t *iX; + ogg_int32_t *oX; + LOOKUP_T *T; + LOOKUP_T *V; + int shift; + int step; + for (shift=6;!(n&(1<=in+n4); + do{ + oX-=4; + XPROD31( iX[4], iX[6], T[1], T[0], &oX[2], &oX[3] ); T-=step; + XPROD31( iX[0], iX[2], T[1], T[0], &oX[0], &oX[1] ); T-=step; + iX-=8; + }while(iX>=in); + + iX = in+n2-8; + oX = out+n2+n4; + T = sincos_lookup0; + + do{ + T+=step; XNPROD31( iX[6], iX[4], T[0], T[1], &oX[0], &oX[1] ); + T+=step; XNPROD31( iX[2], iX[0], T[0], T[1], &oX[2], &oX[3] ); + iX-=8; + oX+=4; + }while(iX>=in+n4); + do{ + T-=step; XNPROD31( iX[6], iX[4], T[1], T[0], &oX[0], &oX[1] ); + T-=step; XNPROD31( iX[2], iX[0], T[1], T[0], &oX[2], &oX[3] ); + iX-=8; + oX+=4; + }while(iX>=in); + + mdct_butterflies(out+n2,n2,shift); + mdct_bitreverse(out,n,step,shift); + /* rotate + window */ + + step>>=2; + { + ogg_int32_t *oX1=out+n2+n4; + ogg_int32_t *oX2=out+n2+n4; + ogg_int32_t *iX =out; + + switch(step) { + default: { + T=(step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1; + do{ + oX1-=4; + XPROD31( iX[0], -iX[1], T[0], T[1], &oX1[3], &oX2[0] ); T+=step; + XPROD31( iX[2], -iX[3], T[0], T[1], &oX1[2], &oX2[1] ); T+=step; + XPROD31( iX[4], -iX[5], T[0], T[1], &oX1[1], &oX2[2] ); T+=step; + XPROD31( iX[6], -iX[7], T[0], T[1], &oX1[0], &oX2[3] ); T+=step; + oX2+=4; + iX+=8; + }while(iX>1; + t1 = (*T++)>>1; + do{ + oX1-=4; + + t0 += (v0 = (*V++)>>1); + t1 += (v1 = (*V++)>>1); + XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] ); + v0 += (t0 = (*T++)>>1); + v1 += (t1 = (*T++)>>1); + XPROD31( iX[2], -iX[3], v0, v1, &oX1[2], &oX2[1] ); + t0 += (v0 = (*V++)>>1); + t1 += (v1 = (*V++)>>1); + XPROD31( iX[4], -iX[5], t0, t1, &oX1[1], &oX2[2] ); + v0 += (t0 = (*T++)>>1); + v1 += (t1 = (*T++)>>1); + XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] ); + + oX2+=4; + iX+=8; + }while(iX>2); + t1 += (q1 = (v1-t1)>>2); + XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] ); + t0 = v0-q0; + t1 = v1-q1; + XPROD31( iX[2], -iX[3], t0, t1, &oX1[2], &oX2[1] ); + + t0 = *T++; + t1 = *T++; + v0 += (q0 = (t0-v0)>>2); + v1 += (q1 = (t1-v1)>>2); + XPROD31( iX[4], -iX[5], v0, v1, &oX1[1], &oX2[2] ); + v0 = t0-q0; + v1 = t1-q1; + XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] ); + + oX2+=4; + iX+=8; + }while(iXoX2); + } +} diff --git a/apps/codecs/libwma/mdct2.h b/apps/codecs/libwma/mdct2.h new file mode 100644 index 0000000000..a1c3a0b01d --- /dev/null +++ b/apps/codecs/libwma/mdct2.h @@ -0,0 +1,85 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. * + * * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 * + * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ * + * * + ******************************************************************** + + function: modified discrete cosine transform prototypes + + ********************************************************************/ + +#ifndef _OGG_mdct_H_ +#define _OGG_mdct_H_ + +typedef short ogg_int16_t; +typedef int ogg_int32_t; +typedef unsigned int ogg_uint32_t; +typedef long long ogg_int64_t; + + +#ifdef _LOW_ACCURACY_ +# define X(n) (((((n)>>22)+1)>>1) - ((((n)>>22)+1)>>9)) +# define LOOKUP_T const unsigned char +#else +# define X(n) (n) +# define LOOKUP_T const ogg_int32_t +#endif + +//#include "ivorbiscodec.h" + +#include +#include "asm_arm.h" +#include "asm_mcf5249.h" +#include "misc.h" + +#ifndef ICONST_ATTR_TREMOR_WINDOW +#define ICONST_ATTR_TREMOR_WINDOW ICONST_ATTR +#endif + +#ifndef ICODE_ATTR_TREMOR_MDCT +#define ICODE_ATTR_TREMOR_MDCT ICODE_ATTR +#endif + +#ifndef ICODE_ATTR_TREMOR_NOT_MDCT +#define ICODE_ATTR_TREMOR_NOT_MDCT ICODE_ATTR +#endif + + +#define DATA_TYPE ogg_int32_t +#define REG_TYPE register ogg_int32_t + +#ifdef _LOW_ACCURACY_ +#define cPI3_8 (0x0062) +#define cPI2_8 (0x00b5) +#define cPI1_8 (0x00ed) +#else +#define cPI3_8 (0x30fbc54d) +#define cPI2_8 (0x5a82799a) +#define cPI1_8 (0x7641af3d) +#endif + +//extern void mdct_forward(int n, DATA_TYPE *in, DATA_TYPE *out); +extern void mdct_backward(int n, ogg_int32_t *in, DATA_TYPE *out); +//extern void mdct_bitreverse(DATA_TYPE *x,int n,int step,int shift); +//extern void mdct_butterflies(DATA_TYPE *x,int points,int shift); + +#endif + + + + + + + + + + + + diff --git a/apps/codecs/libwma/mdct_arm.S b/apps/codecs/libwma/mdct_arm.S new file mode 100644 index 0000000000..f262951ea8 --- /dev/null +++ b/apps/codecs/libwma/mdct_arm.S @@ -0,0 +1,429 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2007 by Tomasz Malesinski + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include "config.h" +/* Codecs should not normally do this, but we need to check a macro, and + * codecs.h would confuse the assembler. */ + +#define cPI3_8 (0x30fbc54d) +#define cPI2_8 (0x5a82799a) +#define cPI1_8 (0x7641af3d) + +#ifdef USE_IRAM + .section .icode,"ax",%progbits +#else + .text +#endif + .align + + .global mdct_butterfly_32 + .global mdct_butterfly_generic_loop + +mdct_butterfly_8: + add r9, r5, r1 @ x4 + x0 + sub r5, r5, r1 @ x4 - x0 + add r7, r6, r2 @ x5 + x1 + sub r6, r6, r2 @ x5 - x1 + add r8, r10, r3 @ x6 + x2 + sub r10, r10, r3 @ x6 - x2 + add r12, r11, r4 @ x7 + x3 + sub r11, r11, r4 @ x7 - x3 + + add r1, r10, r6 @ y0 = (x6 - x2) + (x5 - x1) + sub r2, r11, r5 @ y1 = (x7 - x3) - (x4 - x0) + sub r3, r10, r6 @ y2 = (x6 - x2) - (x5 - x1) + add r4, r11, r5 @ y3 = (x7 - x3) + (x4 - x0) + sub r5, r8, r9 @ y4 = (x6 + x2) - (x4 + x0) + sub r6, r12, r7 @ y5 = (x7 + x3) - (x5 + x1) + add r10, r8, r9 @ y6 = (x6 + x2) + (x4 + x0) + add r11, r12, r7 @ y7 = (x7 + x3) + (x5 + x1) + stmia r0, {r1, r2, r3, r4, r5, r6, r10, r11} + + mov pc, lr + +mdct_butterfly_16: + str lr, [sp, #-4]! + add r1, r0, #8*4 + + ldmia r0, {r2, r3, r4, r5} + ldmia r1, {r6, r7, r8, r9} + add r6, r6, r2 @ y8 = x8 + x0 + rsb r2, r6, r2, asl #1 @ x0 - x8 + add r7, r7, r3 @ y9 = x9 + x1 + rsb r3, r7, r3, asl #1 @ x1 - x9 + add r8, r8, r4 @ y10 = x10 + x2 + sub r11, r8, r4, asl #1 @ x10 - x2 + add r9, r9, r5 @ y11 = x11 + x3 + rsb r10, r9, r5, asl #1 @ x3 - x11 + + stmia r1!, {r6, r7, r8, r9} + + add r2, r2, r3 @ (x0 - x8) + (x1 - x9) + rsb r3, r2, r3, asl #1 @ (x1 - x9) - (x0 - x8) + + ldr r12, =cPI2_8 + smull r8, r5, r2, r12 + mov r5, r5, asl #1 + smull r8, r6, r3, r12 + mov r6, r6, asl #1 + + stmia r0!, {r5, r6, r10, r11} + + ldmia r0, {r2, r3, r4, r5} + ldmia r1, {r6, r7, r8, r9} + add r6, r6, r2 @ y12 = x12 + x4 + sub r2, r6, r2, asl #1 @ x12 - x4 + add r7, r7, r3 @ y13 = x13 + x5 + sub r3, r7, r3, asl #1 @ x13 - x5 + add r8, r8, r4 @ y10 = x14 + x6 + sub r10, r8, r4, asl #1 @ x14 - x6 + add r9, r9, r5 @ y11 = x15 + x7 + sub r11, r9, r5, asl #1 @ x15 - x7 + + stmia r1, {r6, r7, r8, r9} + + sub r2, r2, r3 @ (x12 - x4) - (x13 - x5) + add r3, r2, r3, asl #1 @ (x12 - x4) + (x13 - x5) + + smull r8, r5, r2, r12 + mov r5, r5, asl #1 + smull r8, r6, r3, r12 + mov r6, r6, asl #1 + @ no stmia here, r5, r6, r10, r11 are passed to mdct_butterfly_8 + + sub r0, r0, #4*4 + ldmia r0, {r1, r2, r3, r4} + bl mdct_butterfly_8 + add r0, r0, #8*4 + ldmia r0, {r1, r2, r3, r4, r5, r6, r10, r11} + bl mdct_butterfly_8 + + ldr pc, [sp], #4 + +mdct_butterfly_32: + stmdb sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} + + add r1, r0, #16*4 + + ldmia r0, {r2, r3, r4, r5} + ldmia r1, {r6, r7, r8, r9} + add r6, r6, r2 @ y16 = x16 + x0 + rsb r2, r6, r2, asl #1 @ x0 - x16 + add r7, r7, r3 @ y17 = x17 + x1 + rsb r3, r7, r3, asl #1 @ x1 - x17 + add r8, r8, r4 @ y18 = x18 + x2 + rsb r4, r8, r4, asl #1 @ x2 - x18 + add r9, r9, r5 @ y19 = x19 + x3 + rsb r5, r9, r5, asl #1 @ x3 - x19 + + stmia r1!, {r6, r7, r8, r9} + + ldr r12, =cPI1_8 + ldr lr, =cPI3_8 + smull r10, r6, r2, r12 + smlal r10, r6, r3, lr + rsb r2, r2, #0 + smull r10, r7, r3, r12 + smlal r10, r7, r2, lr + mov r6, r6, asl #1 + mov r7, r7, asl #1 + + add r4, r4, r5 @ (x3 - x19) + (x2 - x18) + rsb r5, r4, r5, asl #1 @ (x3 - x19) - (x2 - x18) + + ldr r11, =cPI2_8 + smull r10, r8, r4, r11 + mov r8, r8, asl #1 + smull r10, r9, r5, r11 + mov r9, r9, asl #1 + + stmia r0!, {r6, r7, r8, r9} + + ldmia r0, {r2, r3, r4, r5} + ldmia r1, {r6, r7, r8, r9} + add r6, r6, r2 @ y20 = x20 + x4 + rsb r2, r6, r2, asl #1 @ x4 - x20 + add r7, r7, r3 @ y21 = x21 + x5 + rsb r3, r7, r3, asl #1 @ x5 - x21 + add r8, r8, r4 @ y22 = x22 + x6 + sub r4, r8, r4, asl #1 @ x22 - x6 + add r9, r9, r5 @ y23 = x23 + x7 + rsb r5, r9, r5, asl #1 @ x7 - x23 + + stmia r1!, {r6, r7, r8, r9} + + smull r10, r6, r2, lr + smlal r10, r6, r3, r12 + rsb r2, r2, #0 + smull r10, r7, r3, lr + smlal r10, r7, r2, r12 + mov r6, r6, asl #1 + mov r7, r7, asl #1 + + mov r8, r5 + mov r9, r4 + stmia r0!, {r6, r7, r8, r9} + + ldmia r0, {r2, r3, r4, r5} + ldmia r1, {r6, r7, r8, r9} + add r6, r6, r2 @ y24 = x24 + x8 + sub r2, r6, r2, asl #1 @ x24 - x8 + add r7, r7, r3 @ y25 = x25 + x9 + sub r3, r7, r3, asl #1 @ x25 - x9 + add r8, r8, r4 @ y26 = x26 + x10 + sub r4, r8, r4, asl #1 @ x26 - x10 + add r9, r9, r5 @ y27 = x27 + x11 + sub r5, r9, r5, asl #1 @ x27 - x11 + + stmia r1!, {r6, r7, r8, r9} + + smull r10, r7, r2, r12 + smlal r10, r7, r3, lr + rsb r3, r3, #0 + smull r10, r6, r3, r12 + smlal r10, r6, r2, lr + mov r6, r6, asl #1 + mov r7, r7, asl #1 + + sub r4, r4, r5 @ (x26 - x10) - (x27 - x11) + add r5, r4, r5, asl #1 @ (x26 - x10) + (x27 - x11) + + ldr r11, =cPI2_8 + smull r10, r8, r4, r11 + mov r8, r8, asl #1 + smull r10, r9, r5, r11 + mov r9, r9, asl #1 + + stmia r0!, {r6, r7, r8, r9} + + ldmia r0, {r2, r3, r4, r5} + ldmia r1, {r6, r7, r8, r9} + add r6, r6, r2 @ y28 = x28 + x12 + sub r2, r6, r2, asl #1 @ x28 - x12 + add r7, r7, r3 @ y29 = x29 + x13 + sub r3, r7, r3, asl #1 @ x29 - x13 + add r8, r8, r4 @ y30 = x30 + x14 + sub r4, r8, r4, asl #1 @ x30 - x14 + add r9, r9, r5 @ y31 = x31 + x15 + sub r5, r9, r5, asl #1 @ x31 - x15 + + stmia r1, {r6, r7, r8, r9} + + smull r10, r7, r2, lr + smlal r10, r7, r3, r12 + rsb r3, r3, #0 + smull r10, r6, r3, lr + smlal r10, r6, r2, r12 + mov r6, r6, asl #1 + mov r7, r7, asl #1 + + mov r8, r4 + mov r9, r5 + stmia r0, {r6, r7, r8, r9} + + sub r0, r0, #12*4 + str r0, [sp, #-4]! + bl mdct_butterfly_16 + + ldr r0, [sp], #4 + add r0, r0, #16*4 + bl mdct_butterfly_16 + + ldmia sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc} + + @ mdct_butterfly_generic_loop(x1, x2, T0, step, Ttop) +mdct_butterfly_generic_loop: + stmdb sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} + str r2, [sp, #-4] + ldr r4, [sp, #40] +1: + ldmdb r0, {r6, r7, r8, r9} + ldmdb r1, {r10, r11, r12, r14} + + add r6, r6, r10 + sub r10, r6, r10, asl #1 + add r7, r7, r11 + rsb r11, r7, r11, asl #1 + add r8, r8, r12 + sub r12, r8, r12, asl #1 + add r9, r9, r14 + rsb r14, r9, r14, asl #1 + + stmdb r0!, {r6, r7, r8, r9} + + ldmia r2, {r6, r7} + smull r5, r8, r14, r6 + smlal r5, r8, r12, r7 + rsb r14, r14, #0 + smull r5, r9, r12, r6 + smlal r5, r9, r14, r7 + + mov r8, r8, asl #1 + mov r9, r9, asl #1 + stmdb r1!, {r8, r9} + add r2, r2, r3, asl #2 + + ldmia r2, {r6, r7} + smull r5, r8, r11, r6 + smlal r5, r8, r10, r7 + rsb r11, r11, #0 + smull r5, r9, r10, r6 + smlal r5, r9, r11, r7 + + mov r8, r8, asl #1 + mov r9, r9, asl #1 + stmdb r1!, {r8, r9} + add r2, r2, r3, asl #2 + + cmp r2, r4 + blo 1b + + ldr r4, [sp, #-4] +1: + ldmdb r0, {r6, r7, r8, r9} + ldmdb r1, {r10, r11, r12, r14} + + add r6, r6, r10 + sub r10, r6, r10, asl #1 + add r7, r7, r11 + sub r11, r7, r11, asl #1 + add r8, r8, r12 + sub r12, r8, r12, asl #1 + add r9, r9, r14 + sub r14, r9, r14, asl #1 + + stmdb r0!, {r6, r7, r8, r9} + + ldmia r2, {r6, r7} + smull r5, r9, r14, r6 + smlal r5, r9, r12, r7 + rsb r14, r14, #0 + smull r5, r8, r12, r6 + smlal r5, r8, r14, r7 + + mov r8, r8, asl #1 + mov r9, r9, asl #1 + stmdb r1!, {r8, r9} + sub r2, r2, r3, asl #2 + + ldmia r2, {r6, r7} + smull r5, r9, r11, r6 + smlal r5, r9, r10, r7 + rsb r11, r11, #0 + smull r5, r8, r10, r6 + smlal r5, r8, r11, r7 + + mov r8, r8, asl #1 + mov r9, r9, asl #1 + stmdb r1!, {r8, r9} + sub r2, r2, r3, asl #2 + + cmp r2, r4 + bhi 1b + + ldr r4, [sp, #40] +1: + ldmdb r0, {r6, r7, r8, r9} + ldmdb r1, {r10, r11, r12, r14} + + add r6, r6, r10 + rsb r10, r6, r10, asl #1 + add r7, r7, r11 + rsb r11, r7, r11, asl #1 + add r8, r8, r12 + rsb r12, r8, r12, asl #1 + add r9, r9, r14 + rsb r14, r9, r14, asl #1 + + stmdb r0!, {r6, r7, r8, r9} + + ldmia r2, {r6, r7} + smull r5, r8, r12, r6 + smlal r5, r8, r14, r7 + rsb r12, r12, #0 + smull r5, r9, r14, r6 + smlal r5, r9, r12, r7 + + mov r8, r8, asl #1 + mov r9, r9, asl #1 + stmdb r1!, {r8, r9} + add r2, r2, r3, asl #2 + + ldmia r2, {r6, r7} + smull r5, r8, r10, r6 + smlal r5, r8, r11, r7 + rsb r10, r10, #0 + smull r5, r9, r11, r6 + smlal r5, r9, r10, r7 + + mov r8, r8, asl #1 + mov r9, r9, asl #1 + stmdb r1!, {r8, r9} + add r2, r2, r3, asl #2 + + cmp r2, r4 + blo 1b + + ldr r4, [sp, #-4] +1: + ldmdb r0, {r6, r7, r8, r9} + ldmdb r1, {r10, r11, r12, r14} + + add r6, r6, r10 + sub r10, r6, r10, asl #1 + add r7, r7, r11 + rsb r11, r7, r11, asl #1 + add r8, r8, r12 + sub r12, r8, r12, asl #1 + add r9, r9, r14 + rsb r14, r9, r14, asl #1 + + stmdb r0!, {r6, r7, r8, r9} + + ldmia r2, {r6, r7} + smull r5, r9, r12, r6 + smlal r5, r9, r14, r7 + rsb r12, r12, #0 + smull r5, r8, r14, r6 + smlal r5, r8, r12, r7 + + mov r8, r8, asl #1 + mov r9, r9, asl #1 + stmdb r1!, {r8, r9} + sub r2, r2, r3, asl #2 + + ldmia r2, {r6, r7} + smull r5, r9, r10, r6 + smlal r5, r9, r11, r7 + rsb r10, r10, #0 + smull r5, r8, r11, r6 + smlal r5, r8, r10, r7 + + mov r8, r8, asl #1 + mov r9, r9, asl #1 + stmdb r1!, {r8, r9} + sub r2, r2, r3, asl #2 + + cmp r2, r4 + bhi 1b + + ldmia sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc} + diff --git a/apps/codecs/libwma/mdct_lookup.h b/apps/codecs/libwma/mdct_lookup.h new file mode 100644 index 0000000000..1c568af736 --- /dev/null +++ b/apps/codecs/libwma/mdct_lookup.h @@ -0,0 +1,544 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. * + * * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 * + * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ * + * * + ******************************************************************** + + function: sin,cos lookup tables + + ********************************************************************/ + + + + + + + +/* {sin(2*i*PI/4096), cos(2*i*PI/4096)}, with i = 0 to 512 */ +static const int sincos_lookup0[1026] ICONST_ATTR = { + X(0x00000000), X(0x7fffffff), X(0x003243f5), X(0x7ffff621), + X(0x006487e3), X(0x7fffd886), X(0x0096cbc1), X(0x7fffa72c), + X(0x00c90f88), X(0x7fff6216), X(0x00fb5330), X(0x7fff0943), + X(0x012d96b1), X(0x7ffe9cb2), X(0x015fda03), X(0x7ffe1c65), + X(0x01921d20), X(0x7ffd885a), X(0x01c45ffe), X(0x7ffce093), + X(0x01f6a297), X(0x7ffc250f), X(0x0228e4e2), X(0x7ffb55ce), + X(0x025b26d7), X(0x7ffa72d1), X(0x028d6870), X(0x7ff97c18), + X(0x02bfa9a4), X(0x7ff871a2), X(0x02f1ea6c), X(0x7ff75370), + X(0x03242abf), X(0x7ff62182), X(0x03566a96), X(0x7ff4dbd9), + X(0x0388a9ea), X(0x7ff38274), X(0x03bae8b2), X(0x7ff21553), + X(0x03ed26e6), X(0x7ff09478), X(0x041f6480), X(0x7feeffe1), + X(0x0451a177), X(0x7fed5791), X(0x0483ddc3), X(0x7feb9b85), + X(0x04b6195d), X(0x7fe9cbc0), X(0x04e8543e), X(0x7fe7e841), + X(0x051a8e5c), X(0x7fe5f108), X(0x054cc7b1), X(0x7fe3e616), + X(0x057f0035), X(0x7fe1c76b), X(0x05b137df), X(0x7fdf9508), + X(0x05e36ea9), X(0x7fdd4eec), X(0x0615a48b), X(0x7fdaf519), + X(0x0647d97c), X(0x7fd8878e), X(0x067a0d76), X(0x7fd6064c), + X(0x06ac406f), X(0x7fd37153), X(0x06de7262), X(0x7fd0c8a3), + X(0x0710a345), X(0x7fce0c3e), X(0x0742d311), X(0x7fcb3c23), + X(0x077501be), X(0x7fc85854), X(0x07a72f45), X(0x7fc560cf), + X(0x07d95b9e), X(0x7fc25596), X(0x080b86c2), X(0x7fbf36aa), + X(0x083db0a7), X(0x7fbc040a), X(0x086fd947), X(0x7fb8bdb8), + X(0x08a2009a), X(0x7fb563b3), X(0x08d42699), X(0x7fb1f5fc), + X(0x09064b3a), X(0x7fae7495), X(0x09386e78), X(0x7faadf7c), + X(0x096a9049), X(0x7fa736b4), X(0x099cb0a7), X(0x7fa37a3c), + X(0x09cecf89), X(0x7f9faa15), X(0x0a00ece8), X(0x7f9bc640), + X(0x0a3308bd), X(0x7f97cebd), X(0x0a6522fe), X(0x7f93c38c), + X(0x0a973ba5), X(0x7f8fa4b0), X(0x0ac952aa), X(0x7f8b7227), + X(0x0afb6805), X(0x7f872bf3), X(0x0b2d7baf), X(0x7f82d214), + X(0x0b5f8d9f), X(0x7f7e648c), X(0x0b919dcf), X(0x7f79e35a), + X(0x0bc3ac35), X(0x7f754e80), X(0x0bf5b8cb), X(0x7f70a5fe), + X(0x0c27c389), X(0x7f6be9d4), X(0x0c59cc68), X(0x7f671a05), + X(0x0c8bd35e), X(0x7f62368f), X(0x0cbdd865), X(0x7f5d3f75), + X(0x0cefdb76), X(0x7f5834b7), X(0x0d21dc87), X(0x7f531655), + X(0x0d53db92), X(0x7f4de451), X(0x0d85d88f), X(0x7f489eaa), + X(0x0db7d376), X(0x7f434563), X(0x0de9cc40), X(0x7f3dd87c), + X(0x0e1bc2e4), X(0x7f3857f6), X(0x0e4db75b), X(0x7f32c3d1), + X(0x0e7fa99e), X(0x7f2d1c0e), X(0x0eb199a4), X(0x7f2760af), + X(0x0ee38766), X(0x7f2191b4), X(0x0f1572dc), X(0x7f1baf1e), + X(0x0f475bff), X(0x7f15b8ee), X(0x0f7942c7), X(0x7f0faf25), + X(0x0fab272b), X(0x7f0991c4), X(0x0fdd0926), X(0x7f0360cb), + X(0x100ee8ad), X(0x7efd1c3c), X(0x1040c5bb), X(0x7ef6c418), + X(0x1072a048), X(0x7ef05860), X(0x10a4784b), X(0x7ee9d914), + X(0x10d64dbd), X(0x7ee34636), X(0x11082096), X(0x7edc9fc6), + X(0x1139f0cf), X(0x7ed5e5c6), X(0x116bbe60), X(0x7ecf1837), + X(0x119d8941), X(0x7ec8371a), X(0x11cf516a), X(0x7ec14270), + X(0x120116d5), X(0x7eba3a39), X(0x1232d979), X(0x7eb31e78), + X(0x1264994e), X(0x7eabef2c), X(0x1296564d), X(0x7ea4ac58), + X(0x12c8106f), X(0x7e9d55fc), X(0x12f9c7aa), X(0x7e95ec1a), + X(0x132b7bf9), X(0x7e8e6eb2), X(0x135d2d53), X(0x7e86ddc6), + X(0x138edbb1), X(0x7e7f3957), X(0x13c0870a), X(0x7e778166), + X(0x13f22f58), X(0x7e6fb5f4), X(0x1423d492), X(0x7e67d703), + X(0x145576b1), X(0x7e5fe493), X(0x148715ae), X(0x7e57dea7), + X(0x14b8b17f), X(0x7e4fc53e), X(0x14ea4a1f), X(0x7e47985b), + X(0x151bdf86), X(0x7e3f57ff), X(0x154d71aa), X(0x7e37042a), + X(0x157f0086), X(0x7e2e9cdf), X(0x15b08c12), X(0x7e26221f), + X(0x15e21445), X(0x7e1d93ea), X(0x16139918), X(0x7e14f242), + X(0x16451a83), X(0x7e0c3d29), X(0x1676987f), X(0x7e0374a0), + X(0x16a81305), X(0x7dfa98a8), X(0x16d98a0c), X(0x7df1a942), + X(0x170afd8d), X(0x7de8a670), X(0x173c6d80), X(0x7ddf9034), + X(0x176dd9de), X(0x7dd6668f), X(0x179f429f), X(0x7dcd2981), + X(0x17d0a7bc), X(0x7dc3d90d), X(0x1802092c), X(0x7dba7534), + X(0x183366e9), X(0x7db0fdf8), X(0x1864c0ea), X(0x7da77359), + X(0x18961728), X(0x7d9dd55a), X(0x18c7699b), X(0x7d9423fc), + X(0x18f8b83c), X(0x7d8a5f40), X(0x192a0304), X(0x7d808728), + X(0x195b49ea), X(0x7d769bb5), X(0x198c8ce7), X(0x7d6c9ce9), + X(0x19bdcbf3), X(0x7d628ac6), X(0x19ef0707), X(0x7d58654d), + X(0x1a203e1b), X(0x7d4e2c7f), X(0x1a517128), X(0x7d43e05e), + X(0x1a82a026), X(0x7d3980ec), X(0x1ab3cb0d), X(0x7d2f0e2b), + X(0x1ae4f1d6), X(0x7d24881b), X(0x1b161479), X(0x7d19eebf), + X(0x1b4732ef), X(0x7d0f4218), X(0x1b784d30), X(0x7d048228), + X(0x1ba96335), X(0x7cf9aef0), X(0x1bda74f6), X(0x7ceec873), + X(0x1c0b826a), X(0x7ce3ceb2), X(0x1c3c8b8c), X(0x7cd8c1ae), + X(0x1c6d9053), X(0x7ccda169), X(0x1c9e90b8), X(0x7cc26de5), + X(0x1ccf8cb3), X(0x7cb72724), X(0x1d00843d), X(0x7cabcd28), + X(0x1d31774d), X(0x7ca05ff1), X(0x1d6265dd), X(0x7c94df83), + X(0x1d934fe5), X(0x7c894bde), X(0x1dc4355e), X(0x7c7da505), + X(0x1df5163f), X(0x7c71eaf9), X(0x1e25f282), X(0x7c661dbc), + X(0x1e56ca1e), X(0x7c5a3d50), X(0x1e879d0d), X(0x7c4e49b7), + X(0x1eb86b46), X(0x7c4242f2), X(0x1ee934c3), X(0x7c362904), + X(0x1f19f97b), X(0x7c29fbee), X(0x1f4ab968), X(0x7c1dbbb3), + X(0x1f7b7481), X(0x7c116853), X(0x1fac2abf), X(0x7c0501d2), + X(0x1fdcdc1b), X(0x7bf88830), X(0x200d888d), X(0x7bebfb70), + X(0x203e300d), X(0x7bdf5b94), X(0x206ed295), X(0x7bd2a89e), + X(0x209f701c), X(0x7bc5e290), X(0x20d0089c), X(0x7bb9096b), + X(0x21009c0c), X(0x7bac1d31), X(0x21312a65), X(0x7b9f1de6), + X(0x2161b3a0), X(0x7b920b89), X(0x219237b5), X(0x7b84e61f), + X(0x21c2b69c), X(0x7b77ada8), X(0x21f3304f), X(0x7b6a6227), + X(0x2223a4c5), X(0x7b5d039e), X(0x225413f8), X(0x7b4f920e), + X(0x22847de0), X(0x7b420d7a), X(0x22b4e274), X(0x7b3475e5), + X(0x22e541af), X(0x7b26cb4f), X(0x23159b88), X(0x7b190dbc), + X(0x2345eff8), X(0x7b0b3d2c), X(0x23763ef7), X(0x7afd59a4), + X(0x23a6887f), X(0x7aef6323), X(0x23d6cc87), X(0x7ae159ae), + X(0x24070b08), X(0x7ad33d45), X(0x243743fa), X(0x7ac50dec), + X(0x24677758), X(0x7ab6cba4), X(0x2497a517), X(0x7aa8766f), + X(0x24c7cd33), X(0x7a9a0e50), X(0x24f7efa2), X(0x7a8b9348), + X(0x25280c5e), X(0x7a7d055b), X(0x2558235f), X(0x7a6e648a), + X(0x2588349d), X(0x7a5fb0d8), X(0x25b84012), X(0x7a50ea47), + X(0x25e845b6), X(0x7a4210d8), X(0x26184581), X(0x7a332490), + X(0x26483f6c), X(0x7a24256f), X(0x26783370), X(0x7a151378), + X(0x26a82186), X(0x7a05eead), X(0x26d809a5), X(0x79f6b711), + X(0x2707ebc7), X(0x79e76ca7), X(0x2737c7e3), X(0x79d80f6f), + X(0x27679df4), X(0x79c89f6e), X(0x27976df1), X(0x79b91ca4), + X(0x27c737d3), X(0x79a98715), X(0x27f6fb92), X(0x7999dec4), + X(0x2826b928), X(0x798a23b1), X(0x2856708d), X(0x797a55e0), + X(0x288621b9), X(0x796a7554), X(0x28b5cca5), X(0x795a820e), + X(0x28e5714b), X(0x794a7c12), X(0x29150fa1), X(0x793a6361), + X(0x2944a7a2), X(0x792a37fe), X(0x29743946), X(0x7919f9ec), + X(0x29a3c485), X(0x7909a92d), X(0x29d34958), X(0x78f945c3), + X(0x2a02c7b8), X(0x78e8cfb2), X(0x2a323f9e), X(0x78d846fb), + X(0x2a61b101), X(0x78c7aba2), X(0x2a911bdc), X(0x78b6fda8), + X(0x2ac08026), X(0x78a63d11), X(0x2aefddd8), X(0x789569df), + X(0x2b1f34eb), X(0x78848414), X(0x2b4e8558), X(0x78738bb3), + X(0x2b7dcf17), X(0x786280bf), X(0x2bad1221), X(0x7851633b), + X(0x2bdc4e6f), X(0x78403329), X(0x2c0b83fa), X(0x782ef08b), + X(0x2c3ab2b9), X(0x781d9b65), X(0x2c69daa6), X(0x780c33b8), + X(0x2c98fbba), X(0x77fab989), X(0x2cc815ee), X(0x77e92cd9), + X(0x2cf72939), X(0x77d78daa), X(0x2d263596), X(0x77c5dc01), + X(0x2d553afc), X(0x77b417df), X(0x2d843964), X(0x77a24148), + X(0x2db330c7), X(0x7790583e), X(0x2de2211e), X(0x777e5cc3), + X(0x2e110a62), X(0x776c4edb), X(0x2e3fec8b), X(0x775a2e89), + X(0x2e6ec792), X(0x7747fbce), X(0x2e9d9b70), X(0x7735b6af), + X(0x2ecc681e), X(0x77235f2d), X(0x2efb2d95), X(0x7710f54c), + X(0x2f29ebcc), X(0x76fe790e), X(0x2f58a2be), X(0x76ebea77), + X(0x2f875262), X(0x76d94989), X(0x2fb5fab2), X(0x76c69647), + X(0x2fe49ba7), X(0x76b3d0b4), X(0x30133539), X(0x76a0f8d2), + X(0x3041c761), X(0x768e0ea6), X(0x30705217), X(0x767b1231), + X(0x309ed556), X(0x76680376), X(0x30cd5115), X(0x7654e279), + X(0x30fbc54d), X(0x7641af3d), X(0x312a31f8), X(0x762e69c4), + X(0x3158970e), X(0x761b1211), X(0x3186f487), X(0x7607a828), + X(0x31b54a5e), X(0x75f42c0b), X(0x31e39889), X(0x75e09dbd), + X(0x3211df04), X(0x75ccfd42), X(0x32401dc6), X(0x75b94a9c), + X(0x326e54c7), X(0x75a585cf), X(0x329c8402), X(0x7591aedd), + X(0x32caab6f), X(0x757dc5ca), X(0x32f8cb07), X(0x7569ca99), + X(0x3326e2c3), X(0x7555bd4c), X(0x3354f29b), X(0x75419de7), + X(0x3382fa88), X(0x752d6c6c), X(0x33b0fa84), X(0x751928e0), + X(0x33def287), X(0x7504d345), X(0x340ce28b), X(0x74f06b9e), + X(0x343aca87), X(0x74dbf1ef), X(0x3468aa76), X(0x74c7663a), + X(0x34968250), X(0x74b2c884), X(0x34c4520d), X(0x749e18cd), + X(0x34f219a8), X(0x7489571c), X(0x351fd918), X(0x74748371), + X(0x354d9057), X(0x745f9dd1), X(0x357b3f5d), X(0x744aa63f), + X(0x35a8e625), X(0x74359cbd), X(0x35d684a6), X(0x74208150), + X(0x36041ad9), X(0x740b53fb), X(0x3631a8b8), X(0x73f614c0), + X(0x365f2e3b), X(0x73e0c3a3), X(0x368cab5c), X(0x73cb60a8), + X(0x36ba2014), X(0x73b5ebd1), X(0x36e78c5b), X(0x73a06522), + X(0x3714f02a), X(0x738acc9e), X(0x37424b7b), X(0x73752249), + X(0x376f9e46), X(0x735f6626), X(0x379ce885), X(0x73499838), + X(0x37ca2a30), X(0x7333b883), X(0x37f76341), X(0x731dc70a), + X(0x382493b0), X(0x7307c3d0), X(0x3851bb77), X(0x72f1aed9), + X(0x387eda8e), X(0x72db8828), X(0x38abf0ef), X(0x72c54fc1), + X(0x38d8fe93), X(0x72af05a7), X(0x39060373), X(0x7298a9dd), + X(0x3932ff87), X(0x72823c67), X(0x395ff2c9), X(0x726bbd48), + X(0x398cdd32), X(0x72552c85), X(0x39b9bebc), X(0x723e8a20), + X(0x39e6975e), X(0x7227d61c), X(0x3a136712), X(0x7211107e), + X(0x3a402dd2), X(0x71fa3949), X(0x3a6ceb96), X(0x71e35080), + X(0x3a99a057), X(0x71cc5626), X(0x3ac64c0f), X(0x71b54a41), + X(0x3af2eeb7), X(0x719e2cd2), X(0x3b1f8848), X(0x7186fdde), + X(0x3b4c18ba), X(0x716fbd68), X(0x3b78a007), X(0x71586b74), + X(0x3ba51e29), X(0x71410805), X(0x3bd19318), X(0x7129931f), + X(0x3bfdfecd), X(0x71120cc5), X(0x3c2a6142), X(0x70fa74fc), + X(0x3c56ba70), X(0x70e2cbc6), X(0x3c830a50), X(0x70cb1128), + X(0x3caf50da), X(0x70b34525), X(0x3cdb8e09), X(0x709b67c0), + X(0x3d07c1d6), X(0x708378ff), X(0x3d33ec39), X(0x706b78e3), + X(0x3d600d2c), X(0x70536771), X(0x3d8c24a8), X(0x703b44ad), + X(0x3db832a6), X(0x7023109a), X(0x3de4371f), X(0x700acb3c), + X(0x3e10320d), X(0x6ff27497), X(0x3e3c2369), X(0x6fda0cae), + X(0x3e680b2c), X(0x6fc19385), X(0x3e93e950), X(0x6fa90921), + X(0x3ebfbdcd), X(0x6f906d84), X(0x3eeb889c), X(0x6f77c0b3), + X(0x3f1749b8), X(0x6f5f02b2), X(0x3f430119), X(0x6f463383), + X(0x3f6eaeb8), X(0x6f2d532c), X(0x3f9a5290), X(0x6f1461b0), + X(0x3fc5ec98), X(0x6efb5f12), X(0x3ff17cca), X(0x6ee24b57), + X(0x401d0321), X(0x6ec92683), X(0x40487f94), X(0x6eaff099), + X(0x4073f21d), X(0x6e96a99d), X(0x409f5ab6), X(0x6e7d5193), + X(0x40cab958), X(0x6e63e87f), X(0x40f60dfb), X(0x6e4a6e66), + X(0x4121589b), X(0x6e30e34a), X(0x414c992f), X(0x6e174730), + X(0x4177cfb1), X(0x6dfd9a1c), X(0x41a2fc1a), X(0x6de3dc11), + X(0x41ce1e65), X(0x6dca0d14), X(0x41f93689), X(0x6db02d29), + X(0x42244481), X(0x6d963c54), X(0x424f4845), X(0x6d7c3a98), + X(0x427a41d0), X(0x6d6227fa), X(0x42a5311b), X(0x6d48047e), + X(0x42d0161e), X(0x6d2dd027), X(0x42faf0d4), X(0x6d138afb), + X(0x4325c135), X(0x6cf934fc), X(0x4350873c), X(0x6cdece2f), + X(0x437b42e1), X(0x6cc45698), X(0x43a5f41e), X(0x6ca9ce3b), + X(0x43d09aed), X(0x6c8f351c), X(0x43fb3746), X(0x6c748b3f), + X(0x4425c923), X(0x6c59d0a9), X(0x4450507e), X(0x6c3f055d), + X(0x447acd50), X(0x6c242960), X(0x44a53f93), X(0x6c093cb6), + X(0x44cfa740), X(0x6bee3f62), X(0x44fa0450), X(0x6bd3316a), + X(0x452456bd), X(0x6bb812d1), X(0x454e9e80), X(0x6b9ce39b), + X(0x4578db93), X(0x6b81a3cd), X(0x45a30df0), X(0x6b66536b), + X(0x45cd358f), X(0x6b4af279), X(0x45f7526b), X(0x6b2f80fb), + X(0x4621647d), X(0x6b13fef5), X(0x464b6bbe), X(0x6af86c6c), + X(0x46756828), X(0x6adcc964), X(0x469f59b4), X(0x6ac115e2), + X(0x46c9405c), X(0x6aa551e9), X(0x46f31c1a), X(0x6a897d7d), + X(0x471cece7), X(0x6a6d98a4), X(0x4746b2bc), X(0x6a51a361), + X(0x47706d93), X(0x6a359db9), X(0x479a1d67), X(0x6a1987b0), + X(0x47c3c22f), X(0x69fd614a), X(0x47ed5be6), X(0x69e12a8c), + X(0x4816ea86), X(0x69c4e37a), X(0x48406e08), X(0x69a88c19), + X(0x4869e665), X(0x698c246c), X(0x48935397), X(0x696fac78), + X(0x48bcb599), X(0x69532442), X(0x48e60c62), X(0x69368bce), + X(0x490f57ee), X(0x6919e320), X(0x49389836), X(0x68fd2a3d), + X(0x4961cd33), X(0x68e06129), X(0x498af6df), X(0x68c387e9), + X(0x49b41533), X(0x68a69e81), X(0x49dd282a), X(0x6889a4f6), + X(0x4a062fbd), X(0x686c9b4b), X(0x4a2f2be6), X(0x684f8186), + X(0x4a581c9e), X(0x683257ab), X(0x4a8101de), X(0x68151dbe), + X(0x4aa9dba2), X(0x67f7d3c5), X(0x4ad2a9e2), X(0x67da79c3), + X(0x4afb6c98), X(0x67bd0fbd), X(0x4b2423be), X(0x679f95b7), + X(0x4b4ccf4d), X(0x67820bb7), X(0x4b756f40), X(0x676471c0), + X(0x4b9e0390), X(0x6746c7d8), X(0x4bc68c36), X(0x67290e02), + X(0x4bef092d), X(0x670b4444), X(0x4c177a6e), X(0x66ed6aa1), + X(0x4c3fdff4), X(0x66cf8120), X(0x4c6839b7), X(0x66b187c3), + X(0x4c9087b1), X(0x66937e91), X(0x4cb8c9dd), X(0x6675658c), + X(0x4ce10034), X(0x66573cbb), X(0x4d092ab0), X(0x66390422), + X(0x4d31494b), X(0x661abbc5), X(0x4d595bfe), X(0x65fc63a9), + X(0x4d8162c4), X(0x65ddfbd3), X(0x4da95d96), X(0x65bf8447), + X(0x4dd14c6e), X(0x65a0fd0b), X(0x4df92f46), X(0x65826622), + X(0x4e210617), X(0x6563bf92), X(0x4e48d0dd), X(0x6545095f), + X(0x4e708f8f), X(0x6526438f), X(0x4e984229), X(0x65076e25), + X(0x4ebfe8a5), X(0x64e88926), X(0x4ee782fb), X(0x64c99498), + X(0x4f0f1126), X(0x64aa907f), X(0x4f369320), X(0x648b7ce0), + X(0x4f5e08e3), X(0x646c59bf), X(0x4f857269), X(0x644d2722), + X(0x4faccfab), X(0x642de50d), X(0x4fd420a4), X(0x640e9386), + X(0x4ffb654d), X(0x63ef3290), X(0x50229da1), X(0x63cfc231), + X(0x5049c999), X(0x63b0426d), X(0x5070e92f), X(0x6390b34a), + X(0x5097fc5e), X(0x637114cc), X(0x50bf031f), X(0x635166f9), + X(0x50e5fd6d), X(0x6331a9d4), X(0x510ceb40), X(0x6311dd64), + X(0x5133cc94), X(0x62f201ac), X(0x515aa162), X(0x62d216b3), + X(0x518169a5), X(0x62b21c7b), X(0x51a82555), X(0x6292130c), + X(0x51ced46e), X(0x6271fa69), X(0x51f576ea), X(0x6251d298), + X(0x521c0cc2), X(0x62319b9d), X(0x524295f0), X(0x6211557e), + X(0x5269126e), X(0x61f1003f), X(0x528f8238), X(0x61d09be5), + X(0x52b5e546), X(0x61b02876), X(0x52dc3b92), X(0x618fa5f7), + X(0x53028518), X(0x616f146c), X(0x5328c1d0), X(0x614e73da), + X(0x534ef1b5), X(0x612dc447), X(0x537514c2), X(0x610d05b7), + X(0x539b2af0), X(0x60ec3830), X(0x53c13439), X(0x60cb5bb7), + X(0x53e73097), X(0x60aa7050), X(0x540d2005), X(0x60897601), + X(0x5433027d), X(0x60686ccf), X(0x5458d7f9), X(0x604754bf), + X(0x547ea073), X(0x60262dd6), X(0x54a45be6), X(0x6004f819), + X(0x54ca0a4b), X(0x5fe3b38d), X(0x54efab9c), X(0x5fc26038), + X(0x55153fd4), X(0x5fa0fe1f), X(0x553ac6ee), X(0x5f7f8d46), + X(0x556040e2), X(0x5f5e0db3), X(0x5585adad), X(0x5f3c7f6b), + X(0x55ab0d46), X(0x5f1ae274), X(0x55d05faa), X(0x5ef936d1), + X(0x55f5a4d2), X(0x5ed77c8a), X(0x561adcb9), X(0x5eb5b3a2), + X(0x56400758), X(0x5e93dc1f), X(0x566524aa), X(0x5e71f606), + X(0x568a34a9), X(0x5e50015d), X(0x56af3750), X(0x5e2dfe29), + X(0x56d42c99), X(0x5e0bec6e), X(0x56f9147e), X(0x5de9cc33), + X(0x571deefa), X(0x5dc79d7c), X(0x5742bc06), X(0x5da5604f), + X(0x57677b9d), X(0x5d8314b1), X(0x578c2dba), X(0x5d60baa7), + X(0x57b0d256), X(0x5d3e5237), X(0x57d5696d), X(0x5d1bdb65), + X(0x57f9f2f8), X(0x5cf95638), X(0x581e6ef1), X(0x5cd6c2b5), + X(0x5842dd54), X(0x5cb420e0), X(0x58673e1b), X(0x5c9170bf), + X(0x588b9140), X(0x5c6eb258), X(0x58afd6bd), X(0x5c4be5b0), + X(0x58d40e8c), X(0x5c290acc), X(0x58f838a9), X(0x5c0621b2), + X(0x591c550e), X(0x5be32a67), X(0x594063b5), X(0x5bc024f0), + X(0x59646498), X(0x5b9d1154), X(0x598857b2), X(0x5b79ef96), + X(0x59ac3cfd), X(0x5b56bfbd), X(0x59d01475), X(0x5b3381ce), + X(0x59f3de12), X(0x5b1035cf), X(0x5a1799d1), X(0x5aecdbc5), + X(0x5a3b47ab), X(0x5ac973b5), X(0x5a5ee79a), X(0x5aa5fda5), + X(0x5a82799a), X(0x5a82799a) + }; + + /* {sin((2*i+1)*PI/4096), cos((2*i+1)*PI/4096)}, with i = 0 to 511 */ +static LOOKUP_T sincos_lookup1[1024] ICONST_ATTR = { + X(0x001921fb), X(0x7ffffd88), X(0x004b65ee), X(0x7fffe9cb), + X(0x007da9d4), X(0x7fffc251), X(0x00afeda8), X(0x7fff8719), + X(0x00e23160), X(0x7fff3824), X(0x011474f6), X(0x7ffed572), + X(0x0146b860), X(0x7ffe5f03), X(0x0178fb99), X(0x7ffdd4d7), + X(0x01ab3e97), X(0x7ffd36ee), X(0x01dd8154), X(0x7ffc8549), + X(0x020fc3c6), X(0x7ffbbfe6), X(0x024205e8), X(0x7ffae6c7), + X(0x027447b0), X(0x7ff9f9ec), X(0x02a68917), X(0x7ff8f954), + X(0x02d8ca16), X(0x7ff7e500), X(0x030b0aa4), X(0x7ff6bcf0), + X(0x033d4abb), X(0x7ff58125), X(0x036f8a51), X(0x7ff4319d), + X(0x03a1c960), X(0x7ff2ce5b), X(0x03d407df), X(0x7ff1575d), + X(0x040645c7), X(0x7fefcca4), X(0x04388310), X(0x7fee2e30), + X(0x046abfb3), X(0x7fec7c02), X(0x049cfba7), X(0x7feab61a), + X(0x04cf36e5), X(0x7fe8dc78), X(0x05017165), X(0x7fe6ef1c), + X(0x0533ab20), X(0x7fe4ee06), X(0x0565e40d), X(0x7fe2d938), + X(0x05981c26), X(0x7fe0b0b1), X(0x05ca5361), X(0x7fde7471), + X(0x05fc89b8), X(0x7fdc247a), X(0x062ebf22), X(0x7fd9c0ca), + X(0x0660f398), X(0x7fd74964), X(0x06932713), X(0x7fd4be46), + X(0x06c5598a), X(0x7fd21f72), X(0x06f78af6), X(0x7fcf6ce8), + X(0x0729bb4e), X(0x7fcca6a7), X(0x075bea8c), X(0x7fc9ccb2), + X(0x078e18a7), X(0x7fc6df08), X(0x07c04598), X(0x7fc3dda9), + X(0x07f27157), X(0x7fc0c896), X(0x08249bdd), X(0x7fbd9fd0), + X(0x0856c520), X(0x7fba6357), X(0x0888ed1b), X(0x7fb7132b), + X(0x08bb13c5), X(0x7fb3af4e), X(0x08ed3916), X(0x7fb037bf), + X(0x091f5d06), X(0x7facac7f), X(0x09517f8f), X(0x7fa90d8e), + X(0x0983a0a7), X(0x7fa55aee), X(0x09b5c048), X(0x7fa1949e), + X(0x09e7de6a), X(0x7f9dbaa0), X(0x0a19fb04), X(0x7f99ccf4), + X(0x0a4c1610), X(0x7f95cb9a), X(0x0a7e2f85), X(0x7f91b694), + X(0x0ab0475c), X(0x7f8d8de1), X(0x0ae25d8d), X(0x7f895182), + X(0x0b147211), X(0x7f850179), X(0x0b4684df), X(0x7f809dc5), + X(0x0b7895f0), X(0x7f7c2668), X(0x0baaa53b), X(0x7f779b62), + X(0x0bdcb2bb), X(0x7f72fcb4), X(0x0c0ebe66), X(0x7f6e4a5e), + X(0x0c40c835), X(0x7f698461), X(0x0c72d020), X(0x7f64aabf), + X(0x0ca4d620), X(0x7f5fbd77), X(0x0cd6da2d), X(0x7f5abc8a), + X(0x0d08dc3f), X(0x7f55a7fa), X(0x0d3adc4e), X(0x7f507fc7), + X(0x0d6cda53), X(0x7f4b43f2), X(0x0d9ed646), X(0x7f45f47b), + X(0x0dd0d01f), X(0x7f409164), X(0x0e02c7d7), X(0x7f3b1aad), + X(0x0e34bd66), X(0x7f359057), X(0x0e66b0c3), X(0x7f2ff263), + X(0x0e98a1e9), X(0x7f2a40d2), X(0x0eca90ce), X(0x7f247ba5), + X(0x0efc7d6b), X(0x7f1ea2dc), X(0x0f2e67b8), X(0x7f18b679), + X(0x0f604faf), X(0x7f12b67c), X(0x0f923546), X(0x7f0ca2e7), + X(0x0fc41876), X(0x7f067bba), X(0x0ff5f938), X(0x7f0040f6), + X(0x1027d784), X(0x7ef9f29d), X(0x1059b352), X(0x7ef390ae), + X(0x108b8c9b), X(0x7eed1b2c), X(0x10bd6356), X(0x7ee69217), + X(0x10ef377d), X(0x7edff570), X(0x11210907), X(0x7ed94538), + X(0x1152d7ed), X(0x7ed28171), X(0x1184a427), X(0x7ecbaa1a), + X(0x11b66dad), X(0x7ec4bf36), X(0x11e83478), X(0x7ebdc0c6), + X(0x1219f880), X(0x7eb6aeca), X(0x124bb9be), X(0x7eaf8943), + X(0x127d7829), X(0x7ea85033), X(0x12af33ba), X(0x7ea1039b), + X(0x12e0ec6a), X(0x7e99a37c), X(0x1312a230), X(0x7e922fd6), + X(0x13445505), X(0x7e8aa8ac), X(0x137604e2), X(0x7e830dff), + X(0x13a7b1bf), X(0x7e7b5fce), X(0x13d95b93), X(0x7e739e1d), + X(0x140b0258), X(0x7e6bc8eb), X(0x143ca605), X(0x7e63e03b), + X(0x146e4694), X(0x7e5be40c), X(0x149fe3fc), X(0x7e53d462), + X(0x14d17e36), X(0x7e4bb13c), X(0x1503153a), X(0x7e437a9c), + X(0x1534a901), X(0x7e3b3083), X(0x15663982), X(0x7e32d2f4), + X(0x1597c6b7), X(0x7e2a61ed), X(0x15c95097), X(0x7e21dd73), + X(0x15fad71b), X(0x7e194584), X(0x162c5a3b), X(0x7e109a24), + X(0x165dd9f0), X(0x7e07db52), X(0x168f5632), X(0x7dff0911), + X(0x16c0cef9), X(0x7df62362), X(0x16f2443e), X(0x7ded2a47), + X(0x1723b5f9), X(0x7de41dc0), X(0x17552422), X(0x7ddafdce), + X(0x17868eb3), X(0x7dd1ca75), X(0x17b7f5a3), X(0x7dc883b4), + X(0x17e958ea), X(0x7dbf298d), X(0x181ab881), X(0x7db5bc02), + X(0x184c1461), X(0x7dac3b15), X(0x187d6c82), X(0x7da2a6c6), + X(0x18aec0db), X(0x7d98ff17), X(0x18e01167), X(0x7d8f4409), + X(0x19115e1c), X(0x7d85759f), X(0x1942a6f3), X(0x7d7b93da), + X(0x1973ebe6), X(0x7d719eba), X(0x19a52ceb), X(0x7d679642), + X(0x19d669fc), X(0x7d5d7a74), X(0x1a07a311), X(0x7d534b50), + X(0x1a38d823), X(0x7d4908d9), X(0x1a6a0929), X(0x7d3eb30f), + X(0x1a9b361d), X(0x7d3449f5), X(0x1acc5ef6), X(0x7d29cd8c), + X(0x1afd83ad), X(0x7d1f3dd6), X(0x1b2ea43a), X(0x7d149ad5), + X(0x1b5fc097), X(0x7d09e489), X(0x1b90d8bb), X(0x7cff1af5), + X(0x1bc1ec9e), X(0x7cf43e1a), X(0x1bf2fc3a), X(0x7ce94dfb), + X(0x1c240786), X(0x7cde4a98), X(0x1c550e7c), X(0x7cd333f3), + X(0x1c861113), X(0x7cc80a0f), X(0x1cb70f43), X(0x7cbcccec), + X(0x1ce80906), X(0x7cb17c8d), X(0x1d18fe54), X(0x7ca618f3), + X(0x1d49ef26), X(0x7c9aa221), X(0x1d7adb73), X(0x7c8f1817), + X(0x1dabc334), X(0x7c837ad8), X(0x1ddca662), X(0x7c77ca65), + X(0x1e0d84f5), X(0x7c6c06c0), X(0x1e3e5ee5), X(0x7c602fec), + X(0x1e6f342c), X(0x7c5445e9), X(0x1ea004c1), X(0x7c4848ba), + X(0x1ed0d09d), X(0x7c3c3860), X(0x1f0197b8), X(0x7c3014de), + X(0x1f325a0b), X(0x7c23de35), X(0x1f63178f), X(0x7c179467), + X(0x1f93d03c), X(0x7c0b3777), X(0x1fc4840a), X(0x7bfec765), + X(0x1ff532f2), X(0x7bf24434), X(0x2025dcec), X(0x7be5ade6), + X(0x205681f1), X(0x7bd9047c), X(0x208721f9), X(0x7bcc47fa), + X(0x20b7bcfe), X(0x7bbf7860), X(0x20e852f6), X(0x7bb295b0), + X(0x2118e3dc), X(0x7ba59fee), X(0x21496fa7), X(0x7b989719), + X(0x2179f64f), X(0x7b8b7b36), X(0x21aa77cf), X(0x7b7e4c45), + X(0x21daf41d), X(0x7b710a49), X(0x220b6b32), X(0x7b63b543), + X(0x223bdd08), X(0x7b564d36), X(0x226c4996), X(0x7b48d225), + X(0x229cb0d5), X(0x7b3b4410), X(0x22cd12bd), X(0x7b2da2fa), + X(0x22fd6f48), X(0x7b1feee5), X(0x232dc66d), X(0x7b1227d3), + X(0x235e1826), X(0x7b044dc7), X(0x238e646a), X(0x7af660c2), + X(0x23beab33), X(0x7ae860c7), X(0x23eeec78), X(0x7ada4dd8), + X(0x241f2833), X(0x7acc27f7), X(0x244f5e5c), X(0x7abdef25), + X(0x247f8eec), X(0x7aafa367), X(0x24afb9da), X(0x7aa144bc), + X(0x24dfdf20), X(0x7a92d329), X(0x250ffeb7), X(0x7a844eae), + X(0x25401896), X(0x7a75b74f), X(0x25702cb7), X(0x7a670d0d), + X(0x25a03b11), X(0x7a584feb), X(0x25d0439f), X(0x7a497feb), + X(0x26004657), X(0x7a3a9d0f), X(0x26304333), X(0x7a2ba75a), + X(0x26603a2c), X(0x7a1c9ece), X(0x26902b39), X(0x7a0d836d), + X(0x26c01655), X(0x79fe5539), X(0x26effb76), X(0x79ef1436), + X(0x271fda96), X(0x79dfc064), X(0x274fb3ae), X(0x79d059c8), + X(0x277f86b5), X(0x79c0e062), X(0x27af53a6), X(0x79b15435), + X(0x27df1a77), X(0x79a1b545), X(0x280edb23), X(0x79920392), + X(0x283e95a1), X(0x79823f20), X(0x286e49ea), X(0x797267f2), + X(0x289df7f8), X(0x79627e08), X(0x28cd9fc1), X(0x79528167), + X(0x28fd4140), X(0x79427210), X(0x292cdc6d), X(0x79325006), + X(0x295c7140), X(0x79221b4b), X(0x298bffb2), X(0x7911d3e2), + X(0x29bb87bc), X(0x790179cd), X(0x29eb0957), X(0x78f10d0f), + X(0x2a1a847b), X(0x78e08dab), X(0x2a49f920), X(0x78cffba3), + X(0x2a796740), X(0x78bf56f9), X(0x2aa8ced3), X(0x78ae9fb0), + X(0x2ad82fd2), X(0x789dd5cb), X(0x2b078a36), X(0x788cf94c), + X(0x2b36ddf7), X(0x787c0a36), X(0x2b662b0e), X(0x786b088c), + X(0x2b957173), X(0x7859f44f), X(0x2bc4b120), X(0x7848cd83), + X(0x2bf3ea0d), X(0x7837942b), X(0x2c231c33), X(0x78264849), + X(0x2c52478a), X(0x7814e9df), X(0x2c816c0c), X(0x780378f1), + X(0x2cb089b1), X(0x77f1f581), X(0x2cdfa071), X(0x77e05f91), + X(0x2d0eb046), X(0x77ceb725), X(0x2d3db928), X(0x77bcfc3f), + X(0x2d6cbb10), X(0x77ab2ee2), X(0x2d9bb5f6), X(0x77994f11), + X(0x2dcaa9d5), X(0x77875cce), X(0x2df996a3), X(0x7775581d), + X(0x2e287c5a), X(0x776340ff), X(0x2e575af3), X(0x77511778), + X(0x2e863267), X(0x773edb8b), X(0x2eb502ae), X(0x772c8d3a), + X(0x2ee3cbc1), X(0x771a2c88), X(0x2f128d99), X(0x7707b979), + X(0x2f41482e), X(0x76f5340e), X(0x2f6ffb7a), X(0x76e29c4b), + X(0x2f9ea775), X(0x76cff232), X(0x2fcd4c19), X(0x76bd35c7), + X(0x2ffbe95d), X(0x76aa670d), X(0x302a7f3a), X(0x76978605), + X(0x30590dab), X(0x768492b4), X(0x308794a6), X(0x76718d1c), + X(0x30b61426), X(0x765e7540), X(0x30e48c22), X(0x764b4b23), + X(0x3112fc95), X(0x76380ec8), X(0x31416576), X(0x7624c031), + X(0x316fc6be), X(0x76115f63), X(0x319e2067), X(0x75fdec60), + X(0x31cc7269), X(0x75ea672a), X(0x31fabcbd), X(0x75d6cfc5), + X(0x3228ff5c), X(0x75c32634), X(0x32573a3f), X(0x75af6a7b), + X(0x32856d5e), X(0x759b9c9b), X(0x32b398b3), X(0x7587bc98), + X(0x32e1bc36), X(0x7573ca75), X(0x330fd7e1), X(0x755fc635), + X(0x333debab), X(0x754bafdc), X(0x336bf78f), X(0x7537876c), + X(0x3399fb85), X(0x75234ce8), X(0x33c7f785), X(0x750f0054), + X(0x33f5eb89), X(0x74faa1b3), X(0x3423d78a), X(0x74e63108), + X(0x3451bb81), X(0x74d1ae55), X(0x347f9766), X(0x74bd199f), + X(0x34ad6b32), X(0x74a872e8), X(0x34db36df), X(0x7493ba34), + X(0x3508fa66), X(0x747eef85), X(0x3536b5be), X(0x746a12df), + X(0x356468e2), X(0x74552446), X(0x359213c9), X(0x744023bc), + X(0x35bfb66e), X(0x742b1144), X(0x35ed50c9), X(0x7415ece2), + X(0x361ae2d3), X(0x7400b69a), X(0x36486c86), X(0x73eb6e6e), + X(0x3675edd9), X(0x73d61461), X(0x36a366c6), X(0x73c0a878), + X(0x36d0d746), X(0x73ab2ab4), X(0x36fe3f52), X(0x73959b1b), + X(0x372b9ee3), X(0x737ff9ae), X(0x3758f5f2), X(0x736a4671), + X(0x37864477), X(0x73548168), X(0x37b38a6d), X(0x733eaa96), + X(0x37e0c7cc), X(0x7328c1ff), X(0x380dfc8d), X(0x7312c7a5), + X(0x383b28a9), X(0x72fcbb8c), X(0x38684c19), X(0x72e69db7), + X(0x389566d6), X(0x72d06e2b), X(0x38c278d9), X(0x72ba2cea), + X(0x38ef821c), X(0x72a3d9f7), X(0x391c8297), X(0x728d7557), + X(0x39497a43), X(0x7276ff0d), X(0x39766919), X(0x7260771b), + X(0x39a34f13), X(0x7249dd86), X(0x39d02c2a), X(0x72333251), + X(0x39fd0056), X(0x721c7580), X(0x3a29cb91), X(0x7205a716), + X(0x3a568dd4), X(0x71eec716), X(0x3a834717), X(0x71d7d585), + X(0x3aaff755), X(0x71c0d265), X(0x3adc9e86), X(0x71a9bdba), + X(0x3b093ca3), X(0x71929789), X(0x3b35d1a5), X(0x717b5fd3), + X(0x3b625d86), X(0x7164169d), X(0x3b8ee03e), X(0x714cbbeb), + X(0x3bbb59c7), X(0x71354fc0), X(0x3be7ca1a), X(0x711dd220), + X(0x3c143130), X(0x7106430e), X(0x3c408f03), X(0x70eea28e), + X(0x3c6ce38a), X(0x70d6f0a4), X(0x3c992ec0), X(0x70bf2d53), + X(0x3cc5709e), X(0x70a7589f), X(0x3cf1a91c), X(0x708f728b), + X(0x3d1dd835), X(0x70777b1c), X(0x3d49fde1), X(0x705f7255), + X(0x3d761a19), X(0x70475839), X(0x3da22cd7), X(0x702f2ccd), + X(0x3dce3614), X(0x7016f014), X(0x3dfa35c8), X(0x6ffea212), + X(0x3e262bee), X(0x6fe642ca), X(0x3e52187f), X(0x6fcdd241), + X(0x3e7dfb73), X(0x6fb5507a), X(0x3ea9d4c3), X(0x6f9cbd79), + X(0x3ed5a46b), X(0x6f841942), X(0x3f016a61), X(0x6f6b63d8), + X(0x3f2d26a0), X(0x6f529d40), X(0x3f58d921), X(0x6f39c57d), + X(0x3f8481dd), X(0x6f20dc92), X(0x3fb020ce), X(0x6f07e285), + X(0x3fdbb5ec), X(0x6eeed758), X(0x40074132), X(0x6ed5bb10), + X(0x4032c297), X(0x6ebc8db0), X(0x405e3a16), X(0x6ea34f3d), + X(0x4089a7a8), X(0x6e89ffb9), X(0x40b50b46), X(0x6e709f2a), + X(0x40e064ea), X(0x6e572d93), X(0x410bb48c), X(0x6e3daaf8), + X(0x4136fa27), X(0x6e24175c), X(0x416235b2), X(0x6e0a72c5), + X(0x418d6729), X(0x6df0bd35), X(0x41b88e84), X(0x6dd6f6b1), + X(0x41e3abbc), X(0x6dbd1f3c), X(0x420ebecb), X(0x6da336dc), + X(0x4239c7aa), X(0x6d893d93), X(0x4264c653), X(0x6d6f3365), + X(0x428fbabe), X(0x6d551858), X(0x42baa4e6), X(0x6d3aec6e), + X(0x42e584c3), X(0x6d20afac), X(0x43105a50), X(0x6d066215), + X(0x433b2585), X(0x6cec03af), X(0x4365e65b), X(0x6cd1947c), + X(0x43909ccd), X(0x6cb71482), X(0x43bb48d4), X(0x6c9c83c3), + X(0x43e5ea68), X(0x6c81e245), X(0x44108184), X(0x6c67300b), + X(0x443b0e21), X(0x6c4c6d1a), X(0x44659039), X(0x6c319975), + X(0x449007c4), X(0x6c16b521), X(0x44ba74bd), X(0x6bfbc021), + X(0x44e4d71c), X(0x6be0ba7b), X(0x450f2edb), X(0x6bc5a431), + X(0x45397bf4), X(0x6baa7d49), X(0x4563be60), X(0x6b8f45c7), + X(0x458df619), X(0x6b73fdae), X(0x45b82318), X(0x6b58a503), + X(0x45e24556), X(0x6b3d3bcb), X(0x460c5cce), X(0x6b21c208), + X(0x46366978), X(0x6b0637c1), X(0x46606b4e), X(0x6aea9cf8), + X(0x468a624a), X(0x6acef1b2), X(0x46b44e65), X(0x6ab335f4), + X(0x46de2f99), X(0x6a9769c1), X(0x470805df), X(0x6a7b8d1e), + X(0x4731d131), X(0x6a5fa010), X(0x475b9188), X(0x6a43a29a), + X(0x478546de), X(0x6a2794c1), X(0x47aef12c), X(0x6a0b7689), + X(0x47d8906d), X(0x69ef47f6), X(0x48022499), X(0x69d3090e), + X(0x482badab), X(0x69b6b9d3), X(0x48552b9b), X(0x699a5a4c), + X(0x487e9e64), X(0x697dea7b), X(0x48a805ff), X(0x69616a65), + X(0x48d16265), X(0x6944da10), X(0x48fab391), X(0x6928397e), + X(0x4923f97b), X(0x690b88b5), X(0x494d341e), X(0x68eec7b9), + X(0x49766373), X(0x68d1f68f), X(0x499f8774), X(0x68b5153a), + X(0x49c8a01b), X(0x689823bf), X(0x49f1ad61), X(0x687b2224), + X(0x4a1aaf3f), X(0x685e106c), X(0x4a43a5b0), X(0x6840ee9b), + X(0x4a6c90ad), X(0x6823bcb7), X(0x4a957030), X(0x68067ac3), + X(0x4abe4433), X(0x67e928c5), X(0x4ae70caf), X(0x67cbc6c0), + X(0x4b0fc99d), X(0x67ae54ba), X(0x4b387af9), X(0x6790d2b6), + X(0x4b6120bb), X(0x677340ba), X(0x4b89badd), X(0x67559eca), + X(0x4bb24958), X(0x6737ecea), X(0x4bdacc28), X(0x671a2b20), + X(0x4c034345), X(0x66fc596f), X(0x4c2baea9), X(0x66de77dc), + X(0x4c540e4e), X(0x66c0866d), X(0x4c7c622d), X(0x66a28524), + X(0x4ca4aa41), X(0x66847408), X(0x4ccce684), X(0x6666531d), + X(0x4cf516ee), X(0x66482267), X(0x4d1d3b7a), X(0x6629e1ec), + X(0x4d455422), X(0x660b91af), X(0x4d6d60df), X(0x65ed31b5), + X(0x4d9561ac), X(0x65cec204), X(0x4dbd5682), X(0x65b0429f), + X(0x4de53f5a), X(0x6591b38c), X(0x4e0d1c30), X(0x657314cf), + X(0x4e34ecfc), X(0x6554666d), X(0x4e5cb1b9), X(0x6535a86b), + X(0x4e846a60), X(0x6516dacd), X(0x4eac16eb), X(0x64f7fd98), + X(0x4ed3b755), X(0x64d910d1), X(0x4efb4b96), X(0x64ba147d), + X(0x4f22d3aa), X(0x649b08a0), X(0x4f4a4f89), X(0x647bed3f), + X(0x4f71bf2e), X(0x645cc260), X(0x4f992293), X(0x643d8806), + X(0x4fc079b1), X(0x641e3e38), X(0x4fe7c483), X(0x63fee4f8), + X(0x500f0302), X(0x63df7c4d), X(0x50363529), X(0x63c0043b), + X(0x505d5af1), X(0x63a07cc7), X(0x50847454), X(0x6380e5f6), + X(0x50ab814d), X(0x63613fcd), X(0x50d281d5), X(0x63418a50), + X(0x50f975e6), X(0x6321c585), X(0x51205d7b), X(0x6301f171), + X(0x5147388c), X(0x62e20e17), X(0x516e0715), X(0x62c21b7e), + X(0x5194c910), X(0x62a219aa), X(0x51bb7e75), X(0x628208a1), + X(0x51e22740), X(0x6261e866), X(0x5208c36a), X(0x6241b8ff), + X(0x522f52ee), X(0x62217a72), X(0x5255d5c5), X(0x62012cc2), + X(0x527c4bea), X(0x61e0cff5), X(0x52a2b556), X(0x61c06410), + X(0x52c91204), X(0x619fe918), X(0x52ef61ee), X(0x617f5f12), + X(0x5315a50e), X(0x615ec603), X(0x533bdb5d), X(0x613e1df0), + X(0x536204d7), X(0x611d66de), X(0x53882175), X(0x60fca0d2), + X(0x53ae3131), X(0x60dbcbd1), X(0x53d43406), X(0x60bae7e1), + X(0x53fa29ed), X(0x6099f505), X(0x542012e1), X(0x6078f344), + X(0x5445eedb), X(0x6057e2a2), X(0x546bbdd7), X(0x6036c325), + X(0x54917fce), X(0x601594d1), X(0x54b734ba), X(0x5ff457ad), + X(0x54dcdc96), X(0x5fd30bbc), X(0x5502775c), X(0x5fb1b104), + X(0x55280505), X(0x5f90478a), X(0x554d858d), X(0x5f6ecf53), + X(0x5572f8ed), X(0x5f4d4865), X(0x55985f20), X(0x5f2bb2c5), + X(0x55bdb81f), X(0x5f0a0e77), X(0x55e303e6), X(0x5ee85b82), + X(0x5608426e), X(0x5ec699e9), X(0x562d73b2), X(0x5ea4c9b3), + X(0x565297ab), X(0x5e82eae5), X(0x5677ae54), X(0x5e60fd84), + X(0x569cb7a8), X(0x5e3f0194), X(0x56c1b3a1), X(0x5e1cf71c), + X(0x56e6a239), X(0x5dfade20), X(0x570b8369), X(0x5dd8b6a7), + X(0x5730572e), X(0x5db680b4), X(0x57551d80), X(0x5d943c4e), + X(0x5779d65b), X(0x5d71e979), X(0x579e81b8), X(0x5d4f883b), + X(0x57c31f92), X(0x5d2d189a), X(0x57e7afe4), X(0x5d0a9a9a), + X(0x580c32a7), X(0x5ce80e41), X(0x5830a7d6), X(0x5cc57394), + X(0x58550f6c), X(0x5ca2ca99), X(0x58796962), X(0x5c801354), + X(0x589db5b3), X(0x5c5d4dcc), X(0x58c1f45b), X(0x5c3a7a05), + X(0x58e62552), X(0x5c179806), X(0x590a4893), X(0x5bf4a7d2), + X(0x592e5e19), X(0x5bd1a971), X(0x595265df), X(0x5bae9ce7), + X(0x59765fde), X(0x5b8b8239), X(0x599a4c12), X(0x5b68596d), + X(0x59be2a74), X(0x5b452288), X(0x59e1faff), X(0x5b21dd90), + X(0x5a05bdae), X(0x5afe8a8b), X(0x5a29727b), X(0x5adb297d), + X(0x5a4d1960), X(0x5ab7ba6c), X(0x5a70b258), X(0x5a943d5e), +}; + diff --git a/apps/codecs/libwma/misc.h b/apps/codecs/libwma/misc.h new file mode 100644 index 0000000000..59760bf885 --- /dev/null +++ b/apps/codecs/libwma/misc.h @@ -0,0 +1,291 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. * + * * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 * + * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ * + * * + ******************************************************************** + + function: miscellaneous math and prototypes + + ********************************************************************/ + +//#include "config-tremor.h" + +#ifndef _V_RANDOM_H_ +#define _V_RANDOM_H_ +//#include "ivorbiscodec.h" +//#include "os_types.h" + +//#include "asm_arm.h" +//#include "asm_mcf5249.h" + + +/* Some prototypes that were not defined elsewhere */ +//void *_vorbis_block_alloc(vorbis_block *vb,long bytes); +//void _vorbis_block_ripcord(vorbis_block *vb); +//extern int _ilog(unsigned int v); + +#ifndef _V_WIDE_MATH +#define _V_WIDE_MATH + +#ifndef _LOW_ACCURACY_ +/* 64 bit multiply */ +/* #include */ + +#if BYTE_ORDER==LITTLE_ENDIAN +union magic { + struct { + ogg_int32_t lo; + ogg_int32_t hi; + } halves; + ogg_int64_t whole; +}; +#elif BYTE_ORDER==BIG_ENDIAN +union magic { + struct { + ogg_int32_t hi; + ogg_int32_t lo; + } halves; + ogg_int64_t whole; +}; +#endif + +static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) { + union magic magic; + magic.whole = (ogg_int64_t)x * y; + return magic.halves.hi; +} +static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) { + return MULT32(x,y)<<1; +} + +static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) { + union magic magic; + magic.whole = (ogg_int64_t)x * y; + return ((ogg_uint32_t)(magic.halves.lo)>>15) | ((magic.halves.hi)<<17); +} + +#else +/* 32 bit multiply, more portable but less accurate */ + +/* + * Note: Precision is biased towards the first argument therefore ordering + * is important. Shift values were chosen for the best sound quality after + * many listening tests. + */ + +/* + * For MULT32 and MULT31: The second argument is always a lookup table + * value already preshifted from 31 to 8 bits. We therefore take the + * opportunity to save on text space and use unsigned char for those + * tables in this case. + */ + +static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) { + return (x >> 9) * y; /* y preshifted >>23 */ +} + +static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) { + return (x >> 8) * y; /* y preshifted >>23 */ +} + +static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) { + return (x >> 6) * y; /* y preshifted >>9 */ +} +#endif + +/* + * This should be used as a memory barrier, forcing all cached values in + * registers to wr writen back to memory. Might or might not be beneficial + * depending on the architecture and compiler. + */ +#define MB() + +/* + * The XPROD functions are meant to optimize the cross products found all + * over the place in mdct.c by forcing memory operation ordering to avoid + * unnecessary register reloads as soon as memory is being written to. + * However this is only beneficial on CPUs with a sane number of general + * purpose registers which exclude the Intel x86. On Intel, better let the + * compiler actually reload registers directly from original memory by using + * macros. + */ + +/* replaced XPROD32 with a macro to avoid memory reference + _x, _y are the results (must be l-values) */ +#define XPROD32(_a, _b, _t, _v, _x, _y) \ + { (_x)=MULT32(_a,_t)+MULT32(_b,_v); \ + (_y)=MULT32(_b,_t)-MULT32(_a,_v); } + + +#ifdef __i386__ + +#define XPROD31(_a, _b, _t, _v, _x, _y) \ + { *(_x)=MULT31(_a,_t)+MULT31(_b,_v); \ + *(_y)=MULT31(_b,_t)-MULT31(_a,_v); } +#define XNPROD31(_a, _b, _t, _v, _x, _y) \ + { *(_x)=MULT31(_a,_t)-MULT31(_b,_v); \ + *(_y)=MULT31(_b,_t)+MULT31(_a,_v); } + +#else + +static inline void XPROD31(ogg_int32_t a, ogg_int32_t b, + ogg_int32_t t, ogg_int32_t v, + ogg_int32_t *x, ogg_int32_t *y) +{ + *x = MULT31(a, t) + MULT31(b, v); + *y = MULT31(b, t) - MULT31(a, v); +} + +static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b, + ogg_int32_t t, ogg_int32_t v, + ogg_int32_t *x, ogg_int32_t *y) +{ + *x = MULT31(a, t) - MULT31(b, v); + *y = MULT31(b, t) + MULT31(a, v); +} +#endif + +#ifndef _V_VECT_OPS +#define _V_VECT_OPS + +static inline +void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n) +{ + while (n>0) { + *x++ += *y++; + n--; + } +} + +static inline +void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n) +{ + while (n>0) { + *x++ = *y++; + n--; + } +} + +static inline +void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) +{ + while(n>0) { + *data = MULT31(*data, *window); + data++; + window++; + n--; + } +} + +static inline +void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) +{ + while(n>0) { + *data = MULT31(*data, *window); + data++; + window--; + n--; + } +} +#endif + +#endif + +#ifndef _V_CLIP_MATH +#define _V_CLIP_MATH + +static inline ogg_int32_t CLIP_TO_15(ogg_int32_t x) { + int ret=x; + ret-= ((x<=32767)-1)&(x-32767); + ret-= ((x>=-32768)-1)&(x+32768); + return(ret); +} + +#endif + +static inline ogg_int32_t VFLOAT_MULT(ogg_int32_t a,ogg_int32_t ap, + ogg_int32_t b,ogg_int32_t bp, + ogg_int32_t *p){ + if(a && b){ +#ifndef _LOW_ACCURACY_ + *p=ap+bp+32; + return MULT32(a,b); +#else + *p=ap+bp+31; + return (a>>15)*(b>>16); +#endif + }else + return 0; +} + +/*static inline ogg_int32_t VFLOAT_MULTI(ogg_int32_t a,ogg_int32_t ap, + ogg_int32_t i, + ogg_int32_t *p){ + + int ip=_ilog(abs(i))-31; + return VFLOAT_MULT(a,ap,i<<-ip,ip,p); +} +*/ +static inline ogg_int32_t VFLOAT_ADD(ogg_int32_t a,ogg_int32_t ap, + ogg_int32_t b,ogg_int32_t bp, + ogg_int32_t *p){ + + if(!a){ + *p=bp; + return b; + }else if(!b){ + *p=ap; + return a; + } + + /* yes, this can leak a bit. */ + if(ap>bp){ + int shift=ap-bp+1; + *p=ap+1; + a>>=1; + if(shift<32){ + b=(b+(1<<(shift-1)))>>shift; + }else{ + b=0; + } + }else{ + int shift=bp-ap+1; + *p=bp+1; + b>>=1; + if(shift<32){ + a=(a+(1<<(shift-1)))>>shift; + }else{ + a=0; + } + } + + a+=b; + if((a&0xc0000000)==0xc0000000 || + (a&0xc0000000)==0){ + a<<=1; + (*p)--; + } + return(a); +} + +#ifdef __GNUC__ +#if __GNUC__ >= 3 +#define EXPECT(a, b) __builtin_expect((a), (b)) +#else +#define EXPECT(a, b) (a) +#endif +#else +#define EXPECT(a, b) (a) +#endif + +#endif + + + diff --git a/apps/codecs/libwma/wmadeci.c b/apps/codecs/libwma/wmadeci.c index a9c2ee8929..b09dd0edd2 100644 --- a/apps/codecs/libwma/wmadeci.c +++ b/apps/codecs/libwma/wmadeci.c @@ -28,6 +28,7 @@ #include "wmadec.h" #include "wmafixed.h" #include "bitstream.h" +#include "mdct2.h" #define VLCBITS 7 /*7 is the lowest without glitching*/ @@ -589,12 +590,16 @@ int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx) } } - mdct_init_global(); + /*Not using the ffmpeg IMDCT anymore*/ + + /* mdct_init_global(); for(i = 0; i < s->nb_block_sizes; ++i) { ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 1); + } + */ /*ffmpeg uses malloc to only allocate as many window sizes as needed. However, we're really only interested in the worst case memory usage. * In the worst case you can have 5 window sizes, 128 doubling up 2048 @@ -1207,8 +1212,10 @@ static int wma_decode_block(WMADecodeContext *s) n=0; /* - * Previously the IMDCT was run in 17.15 precision to avoid overflow. However rare files could - * overflow here as well, so switch to 17.15 during coefs calculation. + * The calculation of coefs has a shift right by 2 built in. This prepares samples + * for the Tremor IMDCT which uses a slightly different fixed format then the ffmpeg one. + * If the old ffmpeg imdct is used, each shift storing into coefs should be reduced by 1. + * See SVN logs for details. */ @@ -1223,7 +1230,7 @@ static int wma_decode_block(WMADecodeContext *s) /* very low freqs : noise */ for(i = 0;i < s->coefs_start; ++i) { - *coefs++ = fixmul32( (fixmul32(s->noise_table[s->noise_index],(*exponents++))>>4),Fixed32From64(mult1)) >>1; + *coefs++ = fixmul32( (fixmul32(s->noise_table[s->noise_index],(*exponents++))>>4),Fixed32From64(mult1)) >>2; s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1); } @@ -1284,7 +1291,7 @@ static int wma_decode_block(WMADecodeContext *s) { noise = s->noise_table[s->noise_index]; s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1); - *coefs++ = fixmul32((fixmul32(*exponents,noise)>>4),Fixed32From64(mult1)) >>1; + *coefs++ = fixmul32((fixmul32(*exponents,noise)>>4),Fixed32From64(mult1)) >>2; ++exponents; } } @@ -1299,7 +1306,7 @@ static int wma_decode_block(WMADecodeContext *s) /*don't forget to renormalize the noise*/ temp1 = (((int32_t)*coefs1++)<<16) + (noise>>4); - temp2 = fixmul32(*exponents, mult>>17); + temp2 = fixmul32(*exponents, mult>>18); *coefs++ = fixmul32(temp1, temp2); ++exponents; } @@ -1311,8 +1318,8 @@ static int wma_decode_block(WMADecodeContext *s) mult2 = fixmul32(mult>>16,exponents[-1]) ; /*the work around for 32.32 vars are getting stupid*/ for (i = 0; i < n; ++i) { - /*renormalize the noise product and then reduce to 17.15 precison*/ - *coefs++ = fixmul32(s->noise_table[s->noise_index],mult2) >>5; + /*renormalize the noise product and then reduce to 14.18 precison*/ + *coefs++ = fixmul32(s->noise_table[s->noise_index],mult2) >>6; s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1); } @@ -1334,7 +1341,7 @@ static int wma_decode_block(WMADecodeContext *s) for(i = 0;i < n; ++i) { - atemp = (coefs1[i] * mult3)>>1; + atemp = (coefs1[i] * mult3)>>2; /*ffmpeg imdct needs 15.17, while tremor 14.18*/ *coefs++=fixmul32(atemp,exponents[i<>esize]); } n = s->block_len - s->coefs_end[bsize]; @@ -1379,9 +1386,13 @@ static int wma_decode_block(WMADecodeContext *s) n = s->block_len; n4 = s->block_len >>1; - ff_imdct_calc(&s->mdct_ctx[bsize], - output, - (*(s->coefs))[ch]); + /*faster IMDCT from Vorbis*/ + mdct_backward( (1 << (12-bsize)), (int*)(*(s->coefs))[ch], (int*)output); + + /*slower but more easily understood IMDCT from FFMPEG*/ + //ff_imdct_calc(&s->mdct_ctx[bsize], + // output, + // (*(s->coefs))[ch]); /* add in the frame */ diff --git a/apps/codecs/libwma/wmafixed.h b/apps/codecs/libwma/wmafixed.h index 198e79e335..fb31cf8bfa 100644 --- a/apps/codecs/libwma/wmafixed.h +++ b/apps/codecs/libwma/wmafixed.h @@ -142,67 +142,3 @@ static inline fixed32 fixmul32b(fixed32 x, fixed32 y) #endif -#ifdef CPU_ARM -static inline -void CMUL(fixed32 *x, fixed32 *y, - fixed32 a, fixed32 b, - fixed32 t, fixed32 v) -{ - /* This version loses one bit of precision. Could be solved at the cost - * of 2 extra cycles if it becomes an issue. */ - int x1, y1, l; - asm( - "smull %[l], %[y1], %[b], %[t] \n" - "smlal %[l], %[y1], %[a], %[v] \n" - "rsb %[b], %[b], #0 \n" - "smull %[l], %[x1], %[a], %[t] \n" - "smlal %[l], %[x1], %[b], %[v] \n" - : [l] "=&r" (l), [x1]"=&r" (x1), [y1]"=&r" (y1), [b] "+r" (b) - : [a] "r" (a), [t] "r" (t), [v] "r" (v) - : "cc" - ); - *x = x1 << 1; - *y = y1 << 1; -} -#elif defined CPU_COLDFIRE -static inline -void CMUL(fixed32 *x, fixed32 *y, - fixed32 a, fixed32 b, - fixed32 t, fixed32 v) -{ - asm volatile ("mac.l %[a], %[t], %%acc0;" - "msac.l %[b], %[v], %%acc0;" - "mac.l %[b], %[t], %%acc1;" - "mac.l %[a], %[v], %%acc1;" - "movclr.l %%acc0, %[a];" - "move.l %[a], (%[x]);" - "movclr.l %%acc1, %[a];" - "move.l %[a], (%[y]);" - : [a] "+&r" (a) - : [x] "a" (x), [y] "a" (y), - [b] "r" (b), [t] "r" (t), [v] "r" (v) - : "cc", "memory"); -} -#else -static inline -void CMUL(fixed32 *pre, - fixed32 *pim, - fixed32 are, - fixed32 aim, - fixed32 bre, - fixed32 bim) -{ - //int64_t x,y; - fixed32 _aref = are; - fixed32 _aimf = aim; - fixed32 _bref = bre; - fixed32 _bimf = bim; - fixed32 _r1 = fixmul32b(_bref, _aref); - fixed32 _r2 = fixmul32b(_bimf, _aimf); - fixed32 _r3 = fixmul32b(_bref, _aimf); - fixed32 _r4 = fixmul32b(_bimf, _aref); - *pre = _r1 - _r2; - *pim = _r3 + _r4; - -} -#endif diff --git a/apps/codecs/wma.c b/apps/codecs/wma.c index f395139674..8019f877c2 100644 --- a/apps/codecs/wma.c +++ b/apps/codecs/wma.c @@ -392,7 +392,7 @@ static int get_timestamp(int *duration) datap += 4; *duration = get_short_le(datap); - /*the get_timestamp function advances us 12-13 bytes past the packet start, + /*the get_timestamp function advances us 12-13 bytes past the packet start, need to undo this here so that we stay synced with the packet*/ ci->seek_buffer(ci->curpos-bytesread); @@ -470,7 +470,7 @@ enum codec_status codec_main(void) /* Generic codec initialisation */ ci->configure(CODEC_SET_FILEBUF_WATERMARK, 1024*512); - ci->configure(DSP_SET_SAMPLE_DEPTH, 30); + ci->configure(DSP_SET_SAMPLE_DEPTH, 29); next_track: @@ -505,7 +505,7 @@ restart_track: if (resume_offset > ci->id3->first_frame_offset) { /* Get start of current packet */ - int packet_offset = (resume_offset - ci->id3->first_frame_offset) + int packet_offset = (resume_offset - ci->id3->first_frame_offset) % wfx.packet_size; ci->seek_buffer(resume_offset - packet_offset); elapsedtime = get_timestamp(&i); diff --git a/apps/debug_menu.c b/apps/debug_menu.c index b2144e2169..8c624dcc9a 100644 --- a/apps/debug_menu.c +++ b/apps/debug_menu.c @@ -303,7 +303,7 @@ static bool dbg_buffering_thread(void) lcd_setfont(FONT_SYSFIXED); while(!done) { - button = get_action(CONTEXT_STD,HZ/5); + button = get_action(CONTEXT_STD,HZ*2); switch(button) { case ACTION_STD_NEXT: @@ -2041,9 +2041,9 @@ static int database_callback(int btn, struct gui_synclist *lists) simplelist_addline(SIMPLELIST_ADD_LINE, "Commit delayed: %s", stat->commit_delayed ? "Yes" : "No"); - simplelist_addline(SIMPLELIST_ADD_LINE, "Queue length: %d", + simplelist_addline(SIMPLELIST_ADD_LINE, "Queue length: %d", stat->queue_length); - + if (synced) { synced = false; @@ -2068,7 +2068,7 @@ static bool dbg_tagcache_info(void) info.action_callback = database_callback; info.hide_selection = true; info.scroll_all = true; - + /* Don't do nonblock here, must give enough processing time for tagcache thread. */ /* info.timeout = TIMEOUT_NOBLOCK; */ @@ -2428,7 +2428,7 @@ static bool dbg_isp1583(void) struct simplelist_info isp1583; isp1583.scroll_all = true; simplelist_info_init(&isp1583, "ISP1583", dbg_usb_num_items(), NULL); - isp1583.timeout = HZ/100; + isp1583.timeout = HZ/100; isp1583.hide_selection = true; isp1583.get_name = dbg_usb_item; isp1583.action_callback = isp1583_action_callback; @@ -2453,7 +2453,7 @@ static bool dbg_pic(void) struct simplelist_info pic; pic.scroll_all = true; simplelist_info_init(&pic, "PIC", pic_dbg_num_items(), NULL); - pic.timeout = HZ/100; + pic.timeout = HZ/100; pic.hide_selection = true; pic.get_name = pic_dbg_item; pic.action_callback = pic_action_callback; diff --git a/apps/plugins/SOURCES b/apps/plugins/SOURCES index 4bcc144ed8..ac9e1c3977 100644 --- a/apps/plugins/SOURCES +++ b/apps/plugins/SOURCES @@ -20,6 +20,7 @@ stats.c stopwatch.c vbrfix.c viewer.c +test_codec.c #ifdef HAVE_BACKLIGHT lamp.c #endif /* HAVE_BACKLIGHT */ -- cgit v1.2.3