From f40bfc9267b13b54e6379dfe7539447662879d24 Mon Sep 17 00:00:00 2001 From: Sean Bartell Date: Sat, 25 Jun 2011 21:32:25 -0400 Subject: Add codecs to librbcodec. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: Id7f4717d51ed02d67cb9f9cb3c0ada4a81843f97 Reviewed-on: http://gerrit.rockbox.org/137 Reviewed-by: Nils Wallménius Tested-by: Nils Wallménius --- apps/codecs/lib/mdct.c | 644 ------------------------------------------------- 1 file changed, 644 deletions(-) delete mode 100644 apps/codecs/lib/mdct.c (limited to 'apps/codecs/lib/mdct.c') diff --git a/apps/codecs/lib/mdct.c b/apps/codecs/lib/mdct.c deleted file mode 100644 index 777aec4a55..0000000000 --- a/apps/codecs/lib/mdct.c +++ /dev/null @@ -1,644 +0,0 @@ -/* - * Fixed Point IMDCT - * Copyright (c) 2002 The FFmpeg Project. - * Copyright (c) 2010 Dave Hooper, Mohamed Tarek, Michael Giacomelli - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "codeclib.h" -#include "mdct.h" -#include "codeclib_misc.h" -#include "mdct_lookup.h" - -#ifndef ICODE_ATTR_TREMOR_MDCT -#define ICODE_ATTR_TREMOR_MDCT ICODE_ATTR -#endif - -/** - * Compute the middle half of the inverse MDCT of size N = 2^nbits - * thus excluding the parts that can be derived by symmetry - * @param output N/2 samples - * @param input N/2 samples - * - * NOTE - CANNOT CURRENTLY OPERATE IN PLACE (input and output must - * not overlap or intersect at all) - */ -void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input) ICODE_ATTR_TREMOR_MDCT; -void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input) -{ - int n8, n4, n2, n, j; - const fixed32 *in1, *in2; - (void)j; - n = 1 << nbits; - - n2 = n >> 1; - n4 = n >> 2; - n8 = n >> 3; - - FFTComplex *z = (FFTComplex *)output; - - /* pre rotation */ - in1 = input; - in2 = input + n2 - 1; - - /* revtab comes from the fft; revtab table is sized for N=4096 size fft = 2^12. - The fft is size N/4 so s->nbits-2, so our shift needs to be (12-(nbits-2)) */ - const int revtab_shift = (14- nbits); - - /* bitreverse reorder the input and rotate; result here is in OUTPUT ... */ - /* (note that when using the current split radix, the bitreverse ordering is - complex, meaning that this reordering cannot easily be done in-place) */ - /* Using the following pdf, you can see that it is possible to rearrange - the 'classic' pre/post rotate with an alternative one that enables - us to use fewer distinct twiddle factors. - http://www.eurasip.org/Proceedings/Eusipco/Eusipco2006/papers/1568980508.pdf - - For prerotation, the factors are just sin,cos(2PI*i/N) - For postrotation, the factors are sin,cos(2PI*(i+1/4)/N) - - Therefore, prerotation can immediately reuse the same twiddles as fft - (for postrotation it's still a bit complex, we reuse the fft trig tables - where we can, or a special table for N=2048, or interpolate between - trig tables for N>2048) - */ - const int32_t *T = sincos_lookup0; - const int step = 2<<(12-nbits); - const uint16_t * p_revtab=revtab; - { - const uint16_t * const p_revtab_end = p_revtab + n8; -#ifdef CPU_COLDFIRE - asm volatile ("move.l (%[in2]), %%d0\n\t" - "move.l (%[in1]), %%d1\n\t" - "bra.s 1f\n\t" - "0:\n\t" - "movem.l (%[T]), %%d2-%%d3\n\t" - - "addq.l #8, %[in1]\n\t" - "subq.l #8, %[in2]\n\t" - - "lea (%[step]*4, %[T]), %[T]\n\t" - - "mac.l %%d0, %%d3, (%[T]), %%d4, %%acc0;" - "msac.l %%d1, %%d2, (4, %[T]), %%d5, %%acc0;" - "mac.l %%d1, %%d3, (%[in1]), %%d1, %%acc1;" - "mac.l %%d0, %%d2, (%[in2]), %%d0, %%acc1;" - - "addq.l #8, %[in1]\n\t" - "subq.l #8, %[in2]\n\t" - - "mac.l %%d0, %%d5, %%acc2;" - "msac.l %%d1, %%d4, (%[p_revtab])+, %%d2, %%acc2;" - "mac.l %%d1, %%d5, (%[in1]), %%d1, %%acc3;" - "mac.l %%d0, %%d4, (%[in2]), %%d0, %%acc3;" - - "clr.l %%d3\n\t" - "move.w %%d2, %%d3\n\t" - "eor.l %%d3, %%d2\n\t" - "swap %%d2\n\t" - "lsr.l %[revtab_shift], %%d2\n\t" - - "movclr.l %%acc0, %%d4;" - "movclr.l %%acc1, %%d5;" - "lsl.l #3, %%d2\n\t" - "lea (%%d2, %[z]), %%a1\n\t" - "movem.l %%d4-%%d5, (%%a1)\n\t" - - "lsr.l %[revtab_shift], %%d3\n\t" - - "movclr.l %%acc2, %%d4;" - "movclr.l %%acc3, %%d5;" - "lsl.l #3, %%d3\n\t" - "lea (%%d3, %[z]), %%a1\n\t" - "movem.l %%d4-%%d5, (%%a1)\n\t" - - "lea (%[step]*4, %[T]), %[T]\n\t" - - "1:\n\t" - "cmp.l %[p_revtab_end], %[p_revtab]\n\t" - "bcs.s 0b\n\t" - : [in1] "+a" (in1), [in2] "+a" (in2), [T] "+a" (T), - [p_revtab] "+a" (p_revtab) - : [z] "a" (z), [step] "d" (step), [revtab_shift] "d" (revtab_shift), - [p_revtab_end] "r" (p_revtab_end) - : "d0", "d1", "d2", "d3", "d4", "d5", "a1", "cc", "memory"); -#else - while(LIKELY(p_revtab < p_revtab_end)) - { - j = (*p_revtab)>>revtab_shift; - XNPROD31(*in2, *in1, T[1], T[0], &z[j].re, &z[j].im ); - T += step; - in1 += 2; - in2 -= 2; - p_revtab++; - j = (*p_revtab)>>revtab_shift; - XNPROD31(*in2, *in1, T[1], T[0], &z[j].re, &z[j].im ); - T += step; - in1 += 2; - in2 -= 2; - p_revtab++; - } -#endif - } - { - const uint16_t * const p_revtab_end = p_revtab + n8; -#ifdef CPU_COLDFIRE - asm volatile ("move.l (%[in2]), %%d0\n\t" - "move.l (%[in1]), %%d1\n\t" - "bra.s 1f\n\t" - "0:\n\t" - "movem.l (%[T]), %%d2-%%d3\n\t" - - "addq.l #8, %[in1]\n\t" - "subq.l #8, %[in2]\n\t" - - "lea (%[step]*4, %[T]), %[T]\n\t" - - "mac.l %%d0, %%d2, (%[T]), %%d4, %%acc0;" - "msac.l %%d1, %%d3, (4, %[T]), %%d5, %%acc0;" - "mac.l %%d1, %%d2, (%[in1]), %%d1, %%acc1;" - "mac.l %%d0, %%d3, (%[in2]), %%d0, %%acc1;" - - "addq.l #8, %[in1]\n\t" - "subq.l #8, %[in2]\n\t" - - "mac.l %%d0, %%d4, %%acc2;" - "msac.l %%d1, %%d5, (%[p_revtab])+, %%d2, %%acc2;" - "mac.l %%d1, %%d4, (%[in1]), %%d1, %%acc3;" - "mac.l %%d0, %%d5, (%[in2]), %%d0, %%acc3;" - - "clr.l %%d3\n\t" - "move.w %%d2, %%d3\n\t" - "eor.l %%d3, %%d2\n\t" - "swap %%d2\n\t" - "lsr.l %[revtab_shift], %%d2\n\t" - - "movclr.l %%acc0, %%d4;" - "movclr.l %%acc1, %%d5;" - "lsl.l #3, %%d2\n\t" - "lea (%%d2, %[z]), %%a1\n\t" - "movem.l %%d4-%%d5, (%%a1)\n\t" - - "lsr.l %[revtab_shift], %%d3\n\t" - - "movclr.l %%acc2, %%d4;" - "movclr.l %%acc3, %%d5;" - "lsl.l #3, %%d3\n\t" - "lea (%%d3, %[z]), %%a1\n\t" - "movem.l %%d4-%%d5, (%%a1)\n\t" - - "lea (%[step]*4, %[T]), %[T]\n\t" - - "1:\n\t" - "cmp.l %[p_revtab_end], %[p_revtab]\n\t" - "bcs.s 0b\n\t" - : [in1] "+a" (in1), [in2] "+a" (in2), [T] "+a" (T), - [p_revtab] "+a" (p_revtab) - : [z] "a" (z), [step] "d" (-step), [revtab_shift] "d" (revtab_shift), - [p_revtab_end] "r" (p_revtab_end) - : "d0", "d1", "d2", "d3", "d4", "d5", "a1", "cc", "memory"); -#else - while(LIKELY(p_revtab < p_revtab_end)) - { - j = (*p_revtab)>>revtab_shift; - XNPROD31(*in2, *in1, T[0], T[1], &z[j].re, &z[j].im); - T -= step; - in1 += 2; - in2 -= 2; - p_revtab++; - j = (*p_revtab)>>revtab_shift; - XNPROD31(*in2, *in1, T[0], T[1], &z[j].re, &z[j].im); - T -= step; - in1 += 2; - in2 -= 2; - p_revtab++; - } -#endif - } - - - /* ... and so fft runs in OUTPUT buffer */ - ff_fft_calc_c(nbits-2, z); - - /* post rotation + reordering. now keeps the result within the OUTPUT buffer */ - switch( nbits ) - { - default: - { - fixed32 * z1 = (fixed32 *)(&z[0]); - int magic_step = step>>2; - int newstep; - if(n<=1024) - { - T = sincos_lookup0 + magic_step; - newstep = step>>1; - } - else - { - T = sincos_lookup1; - newstep = 2; - } - -#ifdef CPU_COLDFIRE - fixed32 * z2 = (fixed32 *)(&z[n4]); - int c = n4; - if (newstep == 2) - { - asm volatile ("movem.l (%[z1]), %%d0-%%d1\n\t" - "addq.l #8, %[z1]\n\t" - "movem.l (%[T]), %%d2-%%d3\n\t" - "addq.l #8, %[T]\n\t" - "bra.s 1f\n\t" - "0:\n\t" - "msac.l %%d1, %%d2, (%[T])+, %%a3, %%acc0\n\t" - "mac.l %%d0, %%d3, (%[T])+, %%a4, %%acc0\n\t" - - "msac.l %%d1, %%d3, -(%[z2]), %%d1, %%acc1\n\t" - "msac.l %%d0, %%d2, -(%[z2]), %%d0, %%acc1\n\t" - - "msac.l %%d1, %%a4, (%[T])+, %%d2, %%acc2\n\t" - "mac.l %%d0, %%a3, (%[T])+, %%d3, %%acc2\n\t" - "msac.l %%d0, %%a4, (%[z1])+, %%d0, %%acc3\n\t" - "msac.l %%d1, %%a3, (%[z1])+, %%d1, %%acc3\n\t" - - "movclr.l %%acc0, %%a3\n\t" - "movclr.l %%acc3, %%a4\n\t" - "movem.l %%a3-%%a4, (-16, %[z1])\n\t" - - "movclr.l %%acc1, %%a4\n\t" - "movclr.l %%acc2, %%a3\n\t" - "movem.l %%a3-%%a4, (%[z2])\n\t" - - "subq.l #2, %[n]\n\t" - "1:\n\t" - "bhi.s 0b\n\t" - : [z1] "+a" (z1), [z2] "+a" (z2), [T] "+a" (T), [n] "+d" (c) - : - : "d0", "d1", "d2", "d3", "a3", "a4", "cc", "memory"); - } - else - { - asm volatile ("movem.l (%[z1]), %%d0-%%d1\n\t" - "addq.l #8, %[z1]\n\t" - "movem.l (%[T]), %%d2-%%d3\n\t" - "lea (%[newstep]*4, %[T]), %[T]\n\t" - "bra.s 1f\n\t" - "0:\n\t" - "msac.l %%d1, %%d2, (%[T]), %%a3, %%acc0\n\t" - "mac.l %%d0, %%d3, (4, %[T]), %%a4, %%acc0\n\t" - "msac.l %%d1, %%d3, -(%[z2]), %%d1, %%acc1\n\t" - "msac.l %%d0, %%d2, -(%[z2]), %%d0, %%acc1\n\t" - - "lea (%[newstep]*4, %[T]), %[T]\n\t" - "msac.l %%d1, %%a4, (%[T]), %%d2, %%acc2\n\t" - "mac.l %%d0, %%a3, (4, %[T]), %%d3, %%acc2\n\t" - "msac.l %%d0, %%a4, (%[z1])+, %%d0, %%acc3\n\t" - "msac.l %%d1, %%a3, (%[z1])+, %%d1, %%acc3\n\t" - - "lea (%[newstep]*4, %[T]), %[T]\n\t" - - "movclr.l %%acc0, %%a3\n\t" - "movclr.l %%acc3, %%a4\n\t" - "movem.l %%a3-%%a4, (-16, %[z1])\n\t" - - "movclr.l %%acc1, %%a4\n\t" - "movclr.l %%acc2, %%a3\n\t" - "movem.l %%a3-%%a4, (%[z2])\n\t" - - "subq.l #2, %[n]\n\t" - "1:\n\t" - "bhi.s 0b\n\t" - : [z1] "+a" (z1), [z2] "+a" (z2), [T] "+a" (T), [n] "+d" (c) - : [newstep] "d" (newstep) - : "d0", "d1", "d2", "d3", "a3", "a4", "cc", "memory"); - } -#else - fixed32 * z2 = (fixed32 *)(&z[n4-1]); - while(z1>1; t1=T[1]>>1; - - while(z1>1)); - t1 += (v1 = (V[1]>>1)); - XNPROD31_R(z1[1], z1[0], t0, t1, r0, i1 ); - T+=2; - v0 += (t0 = (T[0]>>1)); - v1 += (t1 = (T[1]>>1)); - XNPROD31_R(z2[1], z2[0], v1, v0, r1, i0 ); - z1[0] = -r0; - z1[1] = -i0; - z2[0] = -r1; - z2[1] = -i1; - z1+=2; - z2-=2; - V+=2; - } - - break; - } - - case 13: /* n = 8192 */ - { - /* weight linear interpolation between sincos_lookup0 and sincos_lookup1 - specifically: 25:75 for first twiddle and 75:25 for second twiddle */ - const int32_t * V = sincos_lookup1; - T = sincos_lookup0; - int32_t t0,t1,v0,v1,q0,q1; - fixed32 * z1 = (fixed32 *)(&z[0]); - fixed32 * z2 = (fixed32 *)(&z[n4-1]); - - t0 = T[0]; t1=T[1]; - - while(z1>1); - t1 += (q1 = (v1-t1)>>1); - XNPROD31_R(z1[1], z1[0], t0, t1, r0, i1 ); - t0 = v0-q0; - t1 = v1-q1; - XNPROD31_R(z2[1], z2[0], t1, t0, r1, i0 ); - z1[0] = -r0; - z1[1] = -i0; - z2[0] = -r1; - z2[1] = -i1; - z1+=2; - z2-=2; - T+=2; - - t0 = T[0]; t1 = T[1]; - v0 += (q0 = (t0-v0)>>1); - v1 += (q1 = (t1-v1)>>1); - XNPROD31_R(z1[1], z1[0], v0, v1, r0, i1 ); - v0 = t0-q0; - v1 = t1-q1; - XNPROD31_R(z2[1], z2[0], v1, v0, r1, i0 ); - z1[0] = -r0; - z1[1] = -i0; - z2[0] = -r1; - z2[1] = -i1; - z1+=2; - z2-=2; - V+=2; - } - - break; - } - } -} - -/** - * Compute inverse MDCT of size N = 2^nbits - * @param output N samples - * @param input N/2 samples - * "In-place" processing can be achieved provided that: - * [0 .. N/2-1 | N/2 .. N-1 ] - * <----input----> - * <-----------output-----------> - * - * The result of ff_imdct_half is to put the 'half' imdct here - * - * N/2 N-1 - * <--half imdct--> - * - * We want it here for the full imdct: - * N/4 3N/4-1 - * <--------------> - * - * In addition we need to apply two symmetries to get the full imdct: - * - * - * - * - * D is a reflection of C - * A is a reflection of B (but with sign flipped) - * - * We process the symmetries at the same time as we 'move' the half imdct - * from [N/2,N-1] to [N/4,3N/4-1] - * - * TODO: find a way to make ff_imdct_half put the result in [N/4..3N/4-1] - * This would require being able to use revtab 'inplace' (since the input - * and output of imdct_half would then overlap somewhat) - */ -void ff_imdct_calc(unsigned int nbits, fixed32 *output, const fixed32 *input) ICODE_ATTR_TREMOR_MDCT; -#ifndef CPU_ARM -void ff_imdct_calc(unsigned int nbits, fixed32 *output, const fixed32 *input) -{ - const int n = (1<>1); - const int n4 = (n>>2); - - /* tell imdct_half to put the output in [N/2..3N/4-1] i.e. output+n2 */ - ff_imdct_half(nbits,output+n2,input); - - fixed32 * in_r, * in_r2, * out_r, * out_r2; - - /* Copy BBBB to AAAA, reflected and sign-flipped. - Also copy BBBB to its correct destination (from [N/2..3N/4-1] to [N/4..N/2-1]) */ - out_r = output; - out_r2 = output+n2-8; - in_r = output+n2+n4-8; - while(out_r <- ^b ^c -> <- ^d - * - * #1: copy from ^c to ^a - * #2: copy from ^d to ^b - * #3: swap ^c and ^d in place - */ - /* #1 pt1 : load 4 words from ^c. */ - t0=in_r[0]; t1=in_r[1]; t2=in_r[2]; t3=in_r[3]; - /* #1 pt2 : write to ^a */ - out_r[0]=t0;out_r[1]=t1;out_r[2]=t2;out_r[3]=t3; - /* #2 pt1 : load 4 words from ^d */ - s0=in_r2[0];s1=in_r2[1];s2=in_r2[2];s3=in_r2[3]; - /* #2 pt2 : write to ^b */ - out_r2[0]=s0;out_r2[1]=s1;out_r2[2]=s2;out_r2[3]=s3; - /* #3 pt1 : write words from #2 to ^c */ - in_r[0]=s3;in_r[1]=s2;in_r[2]=s1;in_r[3]=s0; - /* #3 pt2 : write words from #1 to ^d */ - in_r2[0]=t3;in_r2[1]=t2;in_r2[2]=t1;in_r2[3]=t0; - - in_r += 4; - in_r2 -= 4; - out_r += 4; - out_r2 -= 4; -#endif - } -} -#else -/* Follows the same structure as the canonical version above */ -void ff_imdct_calc(unsigned int nbits, fixed32 *output, const fixed32 *input) -{ - const int n = (1<>1); - const int n4 = (n>>2); - - ff_imdct_half(nbits,output+n2,input); - - fixed32 * in_r, * in_r2, * out_r, * out_r2; - - out_r = output; - out_r2 = output+n2; - in_r = output+n2+n4; - while(out_r