summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Giacomelli <giac2000@hotmail.com>2009-07-28 22:19:53 +0000
committerMichael Giacomelli <giac2000@hotmail.com>2009-07-28 22:19:53 +0000
commit5bd339c9f902749c8d88deb3be0992e0e0fe6ee9 (patch)
tree46d1359b9e511becd40f392d4ce4b6da604e27d7
parent30e3350030a4ec46d17e6e3fb412a7022fedd145 (diff)
downloadrockbox-5bd339c9f902749c8d88deb3be0992e0e0fe6ee9.tar.gz
rockbox-5bd339c9f902749c8d88deb3be0992e0e0fe6ee9.zip
Clean up a lot of old and ugly comments. Wrap some long lines. Remove unneeded fixed point function. Move some fixed point math functions out of wmadeci. No functional changes.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@22077 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/libwma/wmadec.h19
-rw-r--r--apps/codecs/libwma/wmadeci.c217
-rw-r--r--apps/codecs/libwma/wmafixed.c131
-rw-r--r--apps/codecs/libwma/wmafixed.h1
4 files changed, 182 insertions, 186 deletions
diff --git a/apps/codecs/libwma/wmadec.h b/apps/codecs/libwma/wmadec.h
index aaa06ee2a6..d22e435304 100644
--- a/apps/codecs/libwma/wmadec.h
+++ b/apps/codecs/libwma/wmadec.h
@@ -64,6 +64,25 @@
64#endif 64#endif
65#endif 65#endif
66 66
67#define VLCBITS 7 /*7 is the lowest without glitching*/
68#define VLCMAX ((22+VLCBITS-1)/VLCBITS)
69
70#define EXPVLCBITS 7
71#define EXPMAX ((19+EXPVLCBITS-1)/EXPVLCBITS)
72
73#define HGAINVLCBITS 9
74#define HGAINMAX ((13+HGAINVLCBITS-1)/HGAINVLCBITS)
75
76
77typedef struct CoefVLCTable
78{
79 int n; /* total number of codes */
80 const uint32_t *huffcodes; /* VLC bit values */
81 const uint8_t *huffbits; /* VLC bit size */
82 const uint16_t *levels; /* table to build run/level tables */
83}
84CoefVLCTable;
85
67typedef struct WMADecodeContext 86typedef struct WMADecodeContext
68{ 87{
69 GetBitContext gb; 88 GetBitContext gb;
diff --git a/apps/codecs/libwma/wmadeci.c b/apps/codecs/libwma/wmadeci.c
index aa81b5b81f..d1baca4c99 100644
--- a/apps/codecs/libwma/wmadeci.c
+++ b/apps/codecs/libwma/wmadeci.c
@@ -28,39 +28,29 @@
28#include "wmadec.h" 28#include "wmadec.h"
29#include "wmafixed.h" 29#include "wmafixed.h"
30#include "bitstream.h" 30#include "bitstream.h"
31#include "wmadata.h"
31 32
32 33
33#define VLCBITS 7 /*7 is the lowest without glitching*/
34#define VLCMAX ((22+VLCBITS-1)/VLCBITS)
35
36#define EXPVLCBITS 7
37#define EXPMAX ((19+EXPVLCBITS-1)/EXPVLCBITS)
38
39#define HGAINVLCBITS 9
40#define HGAINMAX ((13+HGAINVLCBITS-1)/HGAINVLCBITS)
41
42
43typedef struct CoefVLCTable
44{
45 int n; /* total number of codes */
46 const uint32_t *huffcodes; /* VLC bit values */
47 const uint8_t *huffbits; /* VLC bit size */
48 const uint16_t *levels; /* table to build run/level tables */
49}
50CoefVLCTable;
51
52static void wma_lsp_to_curve_init(WMADecodeContext *s, int frame_len); 34static void wma_lsp_to_curve_init(WMADecodeContext *s, int frame_len);
35inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data,
36 const fixed32 *window, int n);
37inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0,
38 const fixed32 *src1, int len);
39
40/*declarations of statically allocated variables used to remove malloc calls*/
53 41
54fixed32 coefsarray[MAX_CHANNELS][BLOCK_MAX_SIZE] IBSS_ATTR; 42fixed32 coefsarray[MAX_CHANNELS][BLOCK_MAX_SIZE] IBSS_ATTR;
55/*decode and window into IRAM on targets with at least 80KB of codec IRAM*/ 43/*decode and window into IRAM on targets with at least 80KB of codec IRAM*/
56fixed32 frame_out_buf[MAX_CHANNELS][BLOCK_MAX_SIZE * 2] IBSS_ATTR_WMA_LARGE_IRAM; 44fixed32 frame_out_buf[MAX_CHANNELS][BLOCK_MAX_SIZE * 2] IBSS_ATTR_WMA_LARGE_IRAM;
57 45
58//static variables that replace malloced stuff 46/*MDCT reconstruction windows*/
59fixed32 stat0[2048], stat1[1024], stat2[512], stat3[256], stat4[128]; //these are the MDCT reconstruction windows 47fixed32 stat0[2048], stat1[1024], stat2[512], stat3[256], stat4[128];
60 48
61uint16_t *runtabarray[2], *levtabarray[2]; //these are VLC lookup tables 49/*VLC lookup tables*/
50uint16_t *runtabarray[2], *levtabarray[2];
62 51
63uint16_t runtab0[1336], runtab1[1336], levtab0[1336], levtab1[1336]; //these could be made smaller since only one can be 1336 52/*these could be made smaller since only one can be 1336*/
53uint16_t runtab0[1336], runtab1[1336], levtab0[1336], levtab1[1336];
64 54
65#define VLCBUF1SIZE 4598 55#define VLCBUF1SIZE 4598
66#define VLCBUF2SIZE 3574 56#define VLCBUF2SIZE 3574
@@ -76,141 +66,6 @@ VLC_TYPE vlcbuf4[VLCBUF4SIZE][2];
76 66
77 67
78 68
79#include "wmadata.h" // PJJ
80
81
82
83/*
84 * Helper functions for wma_window.
85 *
86 *
87 */
88
89#ifdef CPU_ARM
90static inline
91void vector_fmul_add_add(fixed32 *dst, const fixed32 *data,
92 const fixed32 *window, int n)
93{
94 /* Block sizes are always power of two */
95 asm volatile (
96 "0:"
97 "ldmia %[d]!, {r0, r1};"
98 "ldmia %[w]!, {r4, r5};"
99 /* consume the first data and window value so we can use those
100 * registers again */
101 "smull r8, r9, r0, r4;"
102 "ldmia %[dst], {r0, r4};"
103 "add r0, r0, r9, lsl #1;" /* *dst=*dst+(r9<<1)*/
104 "smull r8, r9, r1, r5;"
105 "add r1, r4, r9, lsl #1;"
106 "stmia %[dst]!, {r0, r1};"
107 "subs %[n], %[n], #2;"
108 "bne 0b;"
109 : [d] "+r" (data), [w] "+r" (window), [dst] "+r" (dst), [n] "+r" (n)
110 : : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc");
111}
112
113static inline
114void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1,
115 int len)
116{
117 /* Block sizes are always power of two */
118 asm volatile (
119 "add %[s1], %[s1], %[n], lsl #2;"
120 "0:"
121 "ldmia %[s0]!, {r0, r1};"
122 "ldmdb %[s1]!, {r4, r5};"
123 "smull r8, r9, r0, r5;"
124 "mov r0, r9, lsl #1;"
125 "smull r8, r9, r1, r4;"
126 "mov r1, r9, lsl #1;"
127 "stmia %[dst]!, {r0, r1};"
128 "subs %[n], %[n], #2;"
129 "bne 0b;"
130 : [s0] "+r" (src0), [s1] "+r" (src1), [dst] "+r" (dst), [n] "+r" (len)
131 : : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc");
132}
133
134#elif defined(CPU_COLDFIRE)
135
136static inline
137void vector_fmul_add_add(fixed32 *dst, const fixed32 *data,
138 const fixed32 *window, int n)
139{
140 /* Block sizes are always power of two. Smallest block is always way bigger
141 * than four too.*/
142 asm volatile (
143 "0:"
144 "movem.l (%[d]), %%d0-%%d3;"
145 "movem.l (%[w]), %%d4-%%d5/%%a0-%%a1;"
146 "mac.l %%d0, %%d4, %%acc0;"
147 "mac.l %%d1, %%d5, %%acc1;"
148 "mac.l %%d2, %%a0, %%acc2;"
149 "mac.l %%d3, %%a1, %%acc3;"
150 "lea.l (16, %[d]), %[d];"
151 "lea.l (16, %[w]), %[w];"
152 "movclr.l %%acc0, %%d0;"
153 "movclr.l %%acc1, %%d1;"
154 "movclr.l %%acc2, %%d2;"
155 "movclr.l %%acc3, %%d3;"
156 "movem.l (%[dst]), %%d4-%%d5/%%a0-%%a1;"
157 "add.l %%d4, %%d0;"
158 "add.l %%d5, %%d1;"
159 "add.l %%a0, %%d2;"
160 "add.l %%a1, %%d3;"
161 "movem.l %%d0-%%d3, (%[dst]);"
162 "lea.l (16, %[dst]), %[dst];"
163 "subq.l #4, %[n];"
164 "jne 0b;"
165 : [d] "+a" (data), [w] "+a" (window), [dst] "+a" (dst), [n] "+d" (n)
166 : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc");
167}
168
169static inline
170void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1,
171 int len)
172{
173 /* Block sizes are always power of two. Smallest block is always way bigger
174 * than four too.*/
175 asm volatile (
176 "lea.l (-16, %[s1], %[n]*4), %[s1];"
177 "0:"
178 "movem.l (%[s0]), %%d0-%%d3;"
179 "movem.l (%[s1]), %%d4-%%d5/%%a0-%%a1;"
180 "mac.l %%d0, %%a1, %%acc0;"
181 "mac.l %%d1, %%a0, %%acc1;"
182 "mac.l %%d2, %%d5, %%acc2;"
183 "mac.l %%d3, %%d4, %%acc3;"
184 "lea.l (16, %[s0]), %[s0];"
185 "lea.l (-16, %[s1]), %[s1];"
186 "movclr.l %%acc0, %%d0;"
187 "movclr.l %%acc1, %%d1;"
188 "movclr.l %%acc2, %%d2;"
189 "movclr.l %%acc3, %%d3;"
190 "movem.l %%d0-%%d3, (%[dst]);"
191 "lea.l (16, %[dst]), %[dst];"
192 "subq.l #4, %[n];"
193 "jne 0b;"
194 : [s0] "+a" (src0), [s1] "+a" (src1), [dst] "+a" (dst), [n] "+d" (len)
195 : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc");
196}
197
198#else
199
200static inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){
201 int i;
202 for(i=0; i<len; i++)
203 dst[i] = fixmul32b(src0[i], src1[i]) + dst[i];
204}
205
206static inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){
207 int i;
208 src1 += len-1;
209 for(i=0; i<len; i++)
210 dst[i] = fixmul32b(src0[i], src1[-i]);
211}
212
213#endif
214 69
215/** 70/**
216 * Apply MDCT window and add into output. 71 * Apply MDCT window and add into output.
@@ -227,7 +82,9 @@ static inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const
227 int block_len, bsize, n; 82 int block_len, bsize, n;
228 83
229 /* left part */ 84 /* left part */
230 /*previous block was larger, so we'll use the size of the current block to set the window size*/ 85
86 /* previous block was larger, so we'll use the size of the current
87 * block to set the window size*/
231 if (s->block_len_bits <= s->prev_block_len_bits) { 88 if (s->block_len_bits <= s->prev_block_len_bits) {
232 block_len = s->block_len; 89 block_len = s->block_len;
233 bsize = s->frame_len_bits - s->block_len_bits; 90 bsize = s->frame_len_bits - s->block_len_bits;
@@ -314,7 +171,7 @@ static void init_coef_vlc(VLC *vlc,
314 171
315int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx) 172int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx)
316{ 173{
317 //WMADecodeContext *s = avctx->priv_data; 174
318 int i, flags1, flags2; 175 int i, flags1, flags2;
319 fixed32 *window; 176 fixed32 *window;
320 uint8_t *extradata; 177 uint8_t *extradata;
@@ -608,10 +465,11 @@ int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx)
608 } 465 }
609 */ 466 */
610 467
611 /*ffmpeg uses malloc to only allocate as many window sizes as needed. However, we're really only interested in the worst case memory usage. 468 /* ffmpeg uses malloc to only allocate as many window sizes as needed.
612 * In the worst case you can have 5 window sizes, 128 doubling up 2048 469 * However, we're really only interested in the worst case memory usage.
613 * Smaller windows are handled differently. 470 * In the worst case you can have 5 window sizes, 128 doubling up 2048
614 * Since we don't have malloc, just statically allocate this 471 * Smaller windows are handled differently.
472 * Since we don't have malloc, just statically allocate this
615 */ 473 */
616 fixed32 *temp[5]; 474 fixed32 *temp[5];
617 temp[0] = stat0; 475 temp[0] = stat0;
@@ -626,19 +484,15 @@ int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx)
626 int n, j; 484 int n, j;
627 fixed32 alpha; 485 fixed32 alpha;
628 n = 1 << (s->frame_len_bits - i); 486 n = 1 << (s->frame_len_bits - i);
629 //window = av_malloc(sizeof(fixed32) * n);
630 window = temp[i]; 487 window = temp[i];
631 488
632 //fixed32 n2 = itofix32(n<<1); //2x the window length 489 /* this calculates 0.5/(2*n) */
633 //alpha = fixdiv32(M_PI_F, n2); //PI / (2x Window length) == PI<<(s->frame_len_bits - i+1) 490 alpha = (1<<15)>>(s->frame_len_bits - i+1);
634
635 //alpha = M_PI_F>>(s->frame_len_bits - i+1);
636 alpha = (1<<15)>>(s->frame_len_bits - i+1); /* this calculates 0.5/(2*n) */
637 for(j=0;j<n;++j) 491 for(j=0;j<n;++j)
638 { 492 {
639 fixed32 j2 = itofix32(j) + 0x8000; 493 fixed32 j2 = itofix32(j) + 0x8000;
640 window[j] = fsincos(fixmul32(j2,alpha)<<16, 0); //alpha between 0 and pi/2 494 /*alpha between 0 and pi/2*/
641 495 window[j] = fsincos(fixmul32(j2,alpha)<<16, 0);
642 } 496 }
643 s->windows[i] = window; 497 s->windows[i] = window;
644 498
@@ -663,6 +517,7 @@ int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx)
663 s->noise_table = noisetable_exp; 517 s->noise_table = noisetable_exp;
664 } 518 }
665#if 0 519#if 0
520/* We use a lookup table computered in advance, so no need to do this*/
666 { 521 {
667 unsigned int seed; 522 unsigned int seed;
668 fixed32 norm; 523 fixed32 norm;
@@ -836,7 +691,9 @@ static void wma_lsp_to_curve(WMADecodeContext *s,
836 *val_max_ptr = val_max; 691 *val_max_ptr = val_max;
837} 692}
838 693
839/* decode exponents coded with LSP coefficients (same idea as Vorbis) */ 694/* decode exponents coded with LSP coefficients (same idea as Vorbis)
695 * only used for low bitrate (< 16kbps) files
696 */
840static void decode_exp_lsp(WMADecodeContext *s, int ch) 697static void decode_exp_lsp(WMADecodeContext *s, int ch)
841{ 698{
842 fixed32 lsp_coefs[NB_LSP_COEFS]; 699 fixed32 lsp_coefs[NB_LSP_COEFS];
@@ -858,7 +715,7 @@ static void decode_exp_lsp(WMADecodeContext *s, int ch)
858 lsp_coefs); 715 lsp_coefs);
859} 716}
860 717
861/* decode exponents coded with VLC codes */ 718/* decode exponents coded with VLC codes - used for bitrate >= 32kbps*/
862static int decode_exp_vlc(WMADecodeContext *s, int ch) 719static int decode_exp_vlc(WMADecodeContext *s, int ch)
863{ 720{
864 int last_exp, n, code; 721 int last_exp, n, code;
@@ -879,7 +736,7 @@ static int decode_exp_vlc(WMADecodeContext *s, int ch)
879 if (s->version == 1) //wmav1 only 736 if (s->version == 1) //wmav1 only
880 { 737 {
881 last_exp = get_bits(&s->gb, 5) + 10; 738 last_exp = get_bits(&s->gb, 5) + 10;
882 /* XXX: use a table */ 739
883 v = pow_10_to_yover16_ptr[last_exp]; 740 v = pow_10_to_yover16_ptr[last_exp];
884 max_scale = v; 741 max_scale = v;
885 n = *ptr++; 742 n = *ptr++;
@@ -901,7 +758,7 @@ static int decode_exp_vlc(WMADecodeContext *s, int ch)
901 } 758 }
902 /* NOTE: this offset is the same as MPEG4 AAC ! */ 759 /* NOTE: this offset is the same as MPEG4 AAC ! */
903 last_exp += code - 60; 760 last_exp += code - 60;
904 /* XXX: use a table */ 761
905 v = pow_10_to_yover16_ptr[last_exp]; 762 v = pow_10_to_yover16_ptr[last_exp];
906 if (v > max_scale) 763 if (v > max_scale)
907 { 764 {
@@ -1136,7 +993,7 @@ static int wma_decode_block(WMADecodeContext *s, int32_t *scratch_buffer)
1136 for(;;) 993 for(;;)
1137 { 994 {
1138 code = get_vlc2(&s->gb, coef_vlc->table, VLCBITS, VLCMAX); 995 code = get_vlc2(&s->gb, coef_vlc->table, VLCBITS, VLCMAX);
1139 //code = get_vlc(&s->gb, coef_vlc); 996
1140 if (code < 0) 997 if (code < 0)
1141 { 998 {
1142 return -8; 999 return -8;
@@ -1228,7 +1085,9 @@ static int wma_decode_block(WMADecodeContext *s, int32_t *scratch_buffer)
1228 1085
1229 1086
1230 if (s->use_noise_coding) 1087 if (s->use_noise_coding)
1231 { 1088 {
1089 /*This case is only used for low bitrates (typically less then 32kbps)*/
1090
1232 /*TODO: mult should be converted to 32 bit to speed up noise coding*/ 1091 /*TODO: mult should be converted to 32 bit to speed up noise coding*/
1233 1092
1234 mult = fixdiv64(pow_table[total_gain+20],Fixed32To64(s->max_exponent[ch])); 1093 mult = fixdiv64(pow_table[total_gain+20],Fixed32To64(s->max_exponent[ch]));
diff --git a/apps/codecs/libwma/wmafixed.c b/apps/codecs/libwma/wmafixed.c
index 3c96700076..5569309145 100644
--- a/apps/codecs/libwma/wmafixed.c
+++ b/apps/codecs/libwma/wmafixed.c
@@ -63,20 +63,139 @@ fixed64 Fixed32To64(fixed32 x)
63 return (fixed64)x; 63 return (fixed64)x;
64} 64}
65 65
66
66/* 67/*
67 Not performance senstitive code here 68 * Helper functions for wma_window.
69 *
70 *
71 */
68 72
69*/ 73#ifdef CPU_ARM
74inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data,
75 const fixed32 *window, int n)
76{
77 /* Block sizes are always power of two */
78 asm volatile (
79 "0:"
80 "ldmia %[d]!, {r0, r1};"
81 "ldmia %[w]!, {r4, r5};"
82 /* consume the first data and window value so we can use those
83 * registers again */
84 "smull r8, r9, r0, r4;"
85 "ldmia %[dst], {r0, r4};"
86 "add r0, r0, r9, lsl #1;" /* *dst=*dst+(r9<<1)*/
87 "smull r8, r9, r1, r5;"
88 "add r1, r4, r9, lsl #1;"
89 "stmia %[dst]!, {r0, r1};"
90 "subs %[n], %[n], #2;"
91 "bne 0b;"
92 : [d] "+r" (data), [w] "+r" (window), [dst] "+r" (dst), [n] "+r" (n)
93 : : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc");
94}
95
96inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1,
97 int len)
98{
99 /* Block sizes are always power of two */
100 asm volatile (
101 "add %[s1], %[s1], %[n], lsl #2;"
102 "0:"
103 "ldmia %[s0]!, {r0, r1};"
104 "ldmdb %[s1]!, {r4, r5};"
105 "smull r8, r9, r0, r5;"
106 "mov r0, r9, lsl #1;"
107 "smull r8, r9, r1, r4;"
108 "mov r1, r9, lsl #1;"
109 "stmia %[dst]!, {r0, r1};"
110 "subs %[n], %[n], #2;"
111 "bne 0b;"
112 : [s0] "+r" (src0), [s1] "+r" (src1), [dst] "+r" (dst), [n] "+r" (len)
113 : : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc");
114}
70 115
116#elif defined(CPU_COLDFIRE)
71 117
72fixed64 fixmul64byfixed(fixed64 x, fixed32 y) 118inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data,
119 const fixed32 *window, int n)
73{ 120{
121 /* Block sizes are always power of two. Smallest block is always way bigger
122 * than four too.*/
123 asm volatile (
124 "0:"
125 "movem.l (%[d]), %%d0-%%d3;"
126 "movem.l (%[w]), %%d4-%%d5/%%a0-%%a1;"
127 "mac.l %%d0, %%d4, %%acc0;"
128 "mac.l %%d1, %%d5, %%acc1;"
129 "mac.l %%d2, %%a0, %%acc2;"
130 "mac.l %%d3, %%a1, %%acc3;"
131 "lea.l (16, %[d]), %[d];"
132 "lea.l (16, %[w]), %[w];"
133 "movclr.l %%acc0, %%d0;"
134 "movclr.l %%acc1, %%d1;"
135 "movclr.l %%acc2, %%d2;"
136 "movclr.l %%acc3, %%d3;"
137 "movem.l (%[dst]), %%d4-%%d5/%%a0-%%a1;"
138 "add.l %%d4, %%d0;"
139 "add.l %%d5, %%d1;"
140 "add.l %%a0, %%d2;"
141 "add.l %%a1, %%d3;"
142 "movem.l %%d0-%%d3, (%[dst]);"
143 "lea.l (16, %[dst]), %[dst];"
144 "subq.l #4, %[n];"
145 "jne 0b;"
146 : [d] "+a" (data), [w] "+a" (window), [dst] "+a" (dst), [n] "+d" (n)
147 : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc");
148}
74 149
75 //return x * y; 150inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1,
76 return (x * y); 151 int len)
77 // return (fixed64) fixmul32(Fixed32From64(x),y); 152{
153 /* Block sizes are always power of two. Smallest block is always way bigger
154 * than four too.*/
155 asm volatile (
156 "lea.l (-16, %[s1], %[n]*4), %[s1];"
157 "0:"
158 "movem.l (%[s0]), %%d0-%%d3;"
159 "movem.l (%[s1]), %%d4-%%d5/%%a0-%%a1;"
160 "mac.l %%d0, %%a1, %%acc0;"
161 "mac.l %%d1, %%a0, %%acc1;"
162 "mac.l %%d2, %%d5, %%acc2;"
163 "mac.l %%d3, %%d4, %%acc3;"
164 "lea.l (16, %[s0]), %[s0];"
165 "lea.l (-16, %[s1]), %[s1];"
166 "movclr.l %%acc0, %%d0;"
167 "movclr.l %%acc1, %%d1;"
168 "movclr.l %%acc2, %%d2;"
169 "movclr.l %%acc3, %%d3;"
170 "movem.l %%d0-%%d3, (%[dst]);"
171 "lea.l (16, %[dst]), %[dst];"
172 "subq.l #4, %[n];"
173 "jne 0b;"
174 : [s0] "+a" (src0), [s1] "+a" (src1), [dst] "+a" (dst), [n] "+d" (len)
175 : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc");
78} 176}
79 177
178#else
179
180inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){
181 int i;
182 for(i=0; i<len; i++)
183 dst[i] = fixmul32b(src0[i], src1[i]) + dst[i];
184}
185
186inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){
187 int i;
188 src1 += len-1;
189 for(i=0; i<len; i++)
190 dst[i] = fixmul32b(src0[i], src1[-i]);
191}
192
193#endif
194
195/*
196 Not performance senstitive code here
197
198*/
80 199
81fixed32 fixdiv32(fixed32 x, fixed32 y) 200fixed32 fixdiv32(fixed32 x, fixed32 y)
82{ 201{
diff --git a/apps/codecs/libwma/wmafixed.h b/apps/codecs/libwma/wmafixed.h
index fb31cf8bfa..6b5137e044 100644
--- a/apps/codecs/libwma/wmafixed.h
+++ b/apps/codecs/libwma/wmafixed.h
@@ -47,7 +47,6 @@ fixed64 IntTo64(int x);
47int IntFrom64(fixed64 x); 47int IntFrom64(fixed64 x);
48fixed32 Fixed32From64(fixed64 x); 48fixed32 Fixed32From64(fixed64 x);
49fixed64 Fixed32To64(fixed32 x); 49fixed64 Fixed32To64(fixed32 x);
50fixed64 fixmul64byfixed(fixed64 x, fixed32 y);
51fixed32 fixdiv32(fixed32 x, fixed32 y); 50fixed32 fixdiv32(fixed32 x, fixed32 y);
52fixed64 fixdiv64(fixed64 x, fixed64 y); 51fixed64 fixdiv64(fixed64 x, fixed64 y);
53fixed32 fixsqrt32(fixed32 x); 52fixed32 fixsqrt32(fixed32 x);