summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNils Wallménius <nils@rockbox.org>2011-09-22 11:47:51 +0000
committerNils Wallménius <nils@rockbox.org>2011-09-22 11:47:51 +0000
commit412cdd6cf7d6c9025532fe226ed2500403890203 (patch)
tree7fe6b8f48a2401027cac16403b040b153e190d61
parentb1298c2c442ebe1db5d58cbea5ffab6d2e302d74 (diff)
downloadrockbox-412cdd6cf7d6c9025532fe226ed2500403890203.tar.gz
rockbox-412cdd6cf7d6c9025532fe226ed2500403890203.zip
libtremor: port over ffmpeg's windowing code
Use the windowing approach from ffmpeg in tremor, does the mdct doubling, windowing and overlap add in one go. Also uses less memory so all the processing buffers fit in iram on targets with small iram for the common blocksizes (256/2048) now. Speeds up decoding of vorbis files by 3MHz for 256/2048 and 20MHz for 512/4096 files on h300. Speeds up decoding of vorbis files by 3MHz for 256/2048 and 4.5MHz for 512/4096 on the beast. Speeds up decoding of vorbis files by 0.3MHz for 256/2048 and 1MHz for 512/4096 on c200v1. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@30580 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/libtremor/asm_arm.h168
-rw-r--r--apps/codecs/libtremor/asm_mcf5249.h32
-rw-r--r--apps/codecs/libtremor/block.c181
-rw-r--r--apps/codecs/libtremor/config-tremor.h8
-rw-r--r--apps/codecs/libtremor/ffmpeg_render_line.h71
-rw-r--r--apps/codecs/libtremor/ivorbiscodec.h16
-rw-r--r--apps/codecs/libtremor/mapping0.c28
-rw-r--r--apps/codecs/libtremor/misc.h27
-rw-r--r--apps/codecs/libtremor/synthesis.c20
-rw-r--r--apps/codecs/libtremor/window.c4
-rw-r--r--apps/codecs/libtremor/window.h4
11 files changed, 143 insertions, 416 deletions
diff --git a/apps/codecs/libtremor/asm_arm.h b/apps/codecs/libtremor/asm_arm.h
index eb0d3ca789..42f82ec088 100644
--- a/apps/codecs/libtremor/asm_arm.h
+++ b/apps/codecs/libtremor/asm_arm.h
@@ -17,174 +17,6 @@
17 17
18#ifdef _ARM_ASSEM_ 18#ifdef _ARM_ASSEM_
19 19
20#if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_)
21#define _V_WIDE_MATH
22#ifndef _TREMOR_VECT_OPS
23#define _TREMOR_VECT_OPS
24/* asm versions of vector operations for block.c, window.c */
25/* SOME IMPORTANT NOTES: this implementation of vect_mult_bw does
26 NOT do a final shift, meaning that the result of vect_mult_bw is
27 only 31 bits not 32. This is so that we can do the shift in-place
28 in vect_add_xxxx instead to save one instruction for each mult on arm */
29static inline
30void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n)
31{
32 /* first arg is right subframe of previous frame and second arg
33 is left subframe of current frame. overlap left onto right overwriting
34 the right subframe */
35
36 do{
37 asm volatile (
38 "ldmia %[x], {r0, r1, r2, r3};"
39 "ldmia %[y]!, {r4, r5, r6, r7};"
40 "add r0, r4, r0, lsl #1;"
41 "add r1, r5, r1, lsl #1;"
42 "add r2, r6, r2, lsl #1;"
43 "add r3, r7, r3, lsl #1;"
44 "stmia %[x]!, {r0, r1, r2, r3};"
45 "ldmia %[x], {r0, r1, r2, r3};"
46 "ldmia %[y]!, {r4, r5, r6, r7};"
47 "add r0, r4, r0, lsl #1;"
48 "add r1, r5, r1, lsl #1;"
49 "add r2, r6, r2, lsl #1;"
50 "add r3, r7, r3, lsl #1;"
51 "stmia %[x]!, {r0, r1, r2, r3};"
52 : [x] "+r" (x), [y] "+r" (y)
53 : : "r0", "r1", "r2", "r3",
54 "r4", "r5", "r6", "r7",
55 "memory");
56 n -= 8;
57 } while (n);
58}
59
60static inline
61void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n)
62{
63 /* first arg is left subframe of current frame and second arg
64 is right subframe of previous frame. overlap right onto left overwriting
65 the LEFT subframe */
66 do{
67 asm volatile (
68 "ldmia %[x], {r0, r1, r2, r3};"
69 "ldmia %[y]!, {r4, r5, r6, r7};"
70 "add r0, r0, r4, lsl #1;"
71 "add r1, r1, r5, lsl #1;"
72 "add r2, r2, r6, lsl #1;"
73 "add r3, r3, r7, lsl #1;"
74 "stmia %[x]!, {r0, r1, r2, r3};"
75 "ldmia %[x], {r0, r1, r2, r3};"
76 "ldmia %[y]!, {r4, r5, r6, r7};"
77 "add r0, r0, r4, lsl #1;"
78 "add r1, r1, r5, lsl #1;"
79 "add r2, r2, r6, lsl #1;"
80 "add r3, r3, r7, lsl #1;"
81 "stmia %[x]!, {r0, r1, r2, r3};"
82 : [x] "+r" (x), [y] "+r" (y)
83 : : "r0", "r1", "r2", "r3",
84 "r4", "r5", "r6", "r7",
85 "memory");
86 n -= 8;
87 } while (n);
88}
89
90#if ARM_ARCH >= 6
91static inline
92void ogg_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
93{
94 /* Note, mult_fw uses MULT31 */
95 do{
96 asm volatile (
97 "ldmia %[d], {r0, r1, r2, r3};"
98 "ldmia %[w]!, {r4, r5, r6, r7};"
99 "smmul r0, r4, r0;"
100 "smmul r1, r5, r1;"
101 "smmul r2, r6, r2;"
102 "smmul r3, r7, r3;"
103 "mov r0, r0, lsl #1;"
104 "mov r1, r1, lsl #1;"
105 "mov r2, r2, lsl #1;"
106 "mov r3, r3, lsl #1;"
107 "stmia %[d]!, {r0, r1, r2, r3};"
108 : [d] "+r" (data), [w] "+r" (window)
109 : : "r0", "r1", "r2", "r3",
110 "r4", "r5", "r6", "r7",
111 "memory" );
112 n -= 4;
113 } while (n);
114}
115#else
116static inline
117void ogg_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
118{
119 /* Note, mult_fw uses MULT31 */
120 do{
121 asm volatile (
122 "ldmia %[d], {r0, r1, r2, r3};"
123 "ldmia %[w]!, {r4, r5, r6, r7};"
124 "smull r8, r0, r4, r0;"
125 "mov r0, r0, lsl #1;"
126 "smull r8, r1, r5, r1;"
127 "mov r1, r1, lsl #1;"
128 "smull r8, r2, r6, r2;"
129 "mov r2, r2, lsl #1;"
130 "smull r8, r3, r7, r3;"
131 "mov r3, r3, lsl #1;"
132 "stmia %[d]!, {r0, r1, r2, r3};"
133 : [d] "+r" (data), [w] "+r" (window)
134 : : "r0", "r1", "r2", "r3",
135 "r4", "r5", "r6", "r7", "r8",
136 "memory" );
137 n -= 4;
138 } while (n);
139}
140#endif
141
142#if ARM_ARCH >= 6
143static inline
144void ogg_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
145{
146 /* NOTE mult_bw uses MULT_32 i.e. doesn't shift result left at end */
147 /* On ARM, we can do the shift at the same time as the overlap-add */
148 do{
149 asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
150 "ldmda %[w]!, {r4, r5, r6, r7};"
151 "smmul r0, r7, r0;"
152 "smmul r1, r6, r1;"
153 "smmul r2, r5, r2;"
154 "smmul r3, r4, r3;"
155 "stmia %[d]!, {r0, r1, r2, r3};"
156 : [d] "+r" (data), [w] "+r" (window)
157 : : "r0", "r1", "r2", "r3",
158 "r4", "r5", "r6", "r7",
159 "memory" );
160 n -= 4;
161 } while (n);
162}
163#else
164static inline
165void ogg_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
166{
167 /* NOTE mult_bw uses MULT_32 i.e. doesn't shift result left at end */
168 /* On ARM, we can do the shift at the same time as the overlap-add */
169 do{
170 asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
171 "ldmda %[w]!, {r4, r5, r6, r7};"
172 "smull r8, r0, r7, r0;"
173 "smull r7, r1, r6, r1;"
174 "smull r6, r2, r5, r2;"
175 "smull r5, r3, r4, r3;"
176 "stmia %[d]!, {r0, r1, r2, r3};"
177 : [d] "+r" (data), [w] "+r" (window)
178 : : "r0", "r1", "r2", "r3",
179 "r4", "r5", "r6", "r7", "r8",
180 "memory" );
181 n -= 4;
182 } while (n);
183}
184#endif
185#endif
186#endif
187
188#ifndef _V_LSP_MATH_ASM 20#ifndef _V_LSP_MATH_ASM
189#define _V_LSP_MATH_ASM 21#define _V_LSP_MATH_ASM
190 22
diff --git a/apps/codecs/libtremor/asm_mcf5249.h b/apps/codecs/libtremor/asm_mcf5249.h
index 3e7d46983e..66de07615f 100644
--- a/apps/codecs/libtremor/asm_mcf5249.h
+++ b/apps/codecs/libtremor/asm_mcf5249.h
@@ -28,37 +28,5 @@
28 28
29#define MB() 29#define MB()
30 30
31#ifndef _TREMOR_VECT_OPS
32#define _TREMOR_VECT_OPS
33static inline
34void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n)
35{
36 /* coldfire asm has symmetrical versions of vect_add_right_left
37 and vect_add_left_right (since symmetrical versions of
38 vect_mult_fw and vect_mult_bw i.e. both use MULT31) */
39 vect_add(x, y, n );
40}
41
42static inline
43void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n)
44{
45 /* coldfire asm has symmetrical versions of vect_add_right_left
46 and vect_add_left_right (since symmetrical versions of
47 vect_mult_fw and vect_mult_bw i.e. both use MULT31) */
48 vect_add(x, y, n );
49}
50
51static inline
52void ogg_vect_mult_fw(int32_t *data, const int32_t *window, int n)
53{
54 vect_mult_fw(data, window, n);
55}
56
57static inline
58void ogg_vect_mult_bw(int32_t *data, const int32_t *window, int n)
59{
60 vect_mult_bw(data, window, n);
61}
62#endif
63#endif 31#endif
64#endif 32#endif
diff --git a/apps/codecs/libtremor/block.c b/apps/codecs/libtremor/block.c
index d678719cbe..b7ce5adc4e 100644
--- a/apps/codecs/libtremor/block.c
+++ b/apps/codecs/libtremor/block.c
@@ -25,6 +25,7 @@
25#include "window.h" 25#include "window.h"
26#include "registry.h" 26#include "registry.h"
27#include "misc.h" 27#include "misc.h"
28#include "ffmpeg_render_line.h"
28//#include <codecs/lib/codeclib.h> 29//#include <codecs/lib/codeclib.h>
29 30
30static int ilog(unsigned int v){ 31static int ilog(unsigned int v){
@@ -37,11 +38,10 @@ static int ilog(unsigned int v){
37 return(ret); 38 return(ret);
38} 39}
39 40
40static ogg_int32_t* _pcmp [CHANNELS] IBSS_ATTR;
41static ogg_int32_t* _pcmbp[CHANNELS] IBSS_ATTR; 41static ogg_int32_t* _pcmbp[CHANNELS] IBSS_ATTR;
42static ogg_int32_t* _pcmret[CHANNELS] IBSS_ATTR; 42static ogg_int32_t* _pcmret[CHANNELS] IBSS_ATTR;
43/* save original pointers returned by malloc so we can free it easily */ 43/* save original pointers returned by malloc so we can free it easily */
44static ogg_int32_t* pcm_copy[CHANNELS] = {NULL}; 44static ogg_int32_t* malloc_pointers[3] = {NULL};
45 45
46/* pcm accumulator examples (not exhaustive): 46/* pcm accumulator examples (not exhaustive):
47 47
@@ -153,14 +153,13 @@ int vorbis_block_clear(vorbis_block *vb){
153static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){ 153static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){
154 int i; 154 int i;
155 long b_size[2]; 155 long b_size[2];
156 156
157 codec_setup_info *ci=(codec_setup_info *)vi->codec_setup; 157 codec_setup_info *ci=(codec_setup_info *)vi->codec_setup;
158 private_state *b=NULL; 158 private_state *b=NULL;
159 159
160 if(ci==NULL) return 1; 160 if(ci==NULL) return 1;
161 161
162 memset(v,0,sizeof(*v)); 162 memset(v,0,sizeof(*v));
163 v->reset_pcmb=true;
164 b=(private_state *)(v->backend_state=_ogg_calloc(1,sizeof(*b))); 163 b=(private_state *)(v->backend_state=_ogg_calloc(1,sizeof(*b)));
165 164
166 v->vi=vi; 165 v->vi=vi;
@@ -169,16 +168,42 @@ static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){
169#ifdef TREMOR_USE_IRAM 168#ifdef TREMOR_USE_IRAM
170 /* allocate IRAM buffer for the PCM data generated by synthesis */ 169 /* allocate IRAM buffer for the PCM data generated by synthesis */
171 iram_malloc_init(); 170 iram_malloc_init();
172 v->first_pcm = iram_malloc(vi->channels*ci->blocksizes[1]*sizeof(ogg_int32_t)); 171
173 /* when can't allocate IRAM buffer, allocate normal RAM buffer */ 172 v->floors = iram_malloc(vi->channels*ci->blocksizes[1]/2*sizeof(ogg_int32_t));
174 if(v->first_pcm == NULL) 173 v->residues[0] = iram_malloc(vi->channels*ci->blocksizes[1]/2*sizeof(ogg_int32_t));
174 /* if we can get away with it, put a double buffer into IRAM too, so that
175 overlap-add runs iram-to-iram and we avoid needing to memcpy */
176 v->residues[1] = iram_malloc(vi->channels*ci->blocksizes[1]/2*sizeof(ogg_int32_t));
177 if (v->residues[1] == NULL)
178 v->saved = iram_malloc(vi->channels*ci->blocksizes[1]/4*sizeof(ogg_int32_t));
179
175#endif 180#endif
176 { 181
177 pcm_copy[0] = _ogg_malloc(vi->channels*ci->blocksizes[1]*sizeof(ogg_int32_t)); 182 if (v->residues[0] == NULL) {
178 v->first_pcm = pcm_copy[0]; 183 malloc_pointers[0] = _ogg_malloc(vi->channels*ci->blocksizes[1]/2*sizeof(ogg_int32_t));
184 v->residues[0] = malloc_pointers[0];
185 }
186
187 if (v->residues[1] == NULL && v->saved == NULL) {
188 malloc_pointers[1] = _ogg_malloc(vi->channels*ci->blocksizes[1]/2*sizeof(ogg_int32_t));
189 v->residues[1] = malloc_pointers[1];
179 } 190 }
180 191
181 v->centerW=0; 192 if (v->floors == NULL) {
193 malloc_pointers[2] = _ogg_malloc(vi->channels*ci->blocksizes[1]/2*sizeof(ogg_int32_t));
194 v->floors = malloc_pointers[2];
195 }
196
197 /* needed for the first overlap/add */
198 if (v->saved) {
199 memset(v->saved, 0, vi->channels*ci->blocksizes[1]/4*sizeof(ogg_int32_t));
200 for (i = 0; i < vi->channels; i++)
201 v->saved_ptr[i] = v->saved + i*ci->blocksizes[1]/4;
202 } else {
203 memset(v->residues[1], 0, vi->channels*ci->blocksizes[1]/2*sizeof(ogg_int32_t));
204 for (i = 0; i < vi->channels; i++)
205 v->saved_ptr[i] = v->residues[1] + i*ci->blocksizes[1]/2;
206 }
182 207
183 /* Vorbis I uses only window type 0 */ 208 /* Vorbis I uses only window type 0 */
184 b_size[0]=ci->blocksizes[0]/2; 209 b_size[0]=ci->blocksizes[0]/2;
@@ -214,37 +239,13 @@ static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){
214 } 239 }
215 } 240 }
216 241
217 /* if we can get away with it, put a double buffer into IRAM too, so that
218 overlap-add runs iram-to-iram and we avoid needing to memcpy */
219 v->pcm_storage=ci->blocksizes[1]; 242 v->pcm_storage=ci->blocksizes[1];
220 v->pcm=_pcmp;
221 v->pcmret=_pcmret; 243 v->pcmret=_pcmret;
222 v->pcmb=_pcmbp; 244 v->pcmb=_pcmbp;
223 245
224 _pcmp[0]=NULL;
225 _pcmp[1]=NULL;
226 _pcmbp[0]=NULL; 246 _pcmbp[0]=NULL;
227 _pcmbp[1]=NULL; 247 _pcmbp[1]=NULL;
228 248
229#ifdef TREMOR_USE_IRAM
230 if(NULL != (v->iram_double_pcm = iram_malloc(vi->channels*v->pcm_storage*sizeof(ogg_int32_t))))
231 {
232 /* one-time initialisation at codec start or on switch from
233 blocksizes greater than IRAM_PCM_END to sizes that fit */
234 for(i=0;i<vi->channels;i++)
235 v->pcm[i]=&v->iram_double_pcm[i*v->pcm_storage];
236 }
237 else
238#endif
239 {
240 /* one-time initialisation at codec start or on switch from
241 blocksizes that fit in IRAM_PCM_END to those that don't */
242 /* save copy of the pointer so we can free it easily later */
243 pcm_copy[1] = _ogg_calloc(vi->channels*v->pcm_storage,sizeof(*v->pcm[i]));
244 for(i=0;i<vi->channels;i++)
245 v->pcm[i] = pcm_copy[1]+i*v->pcm_storage;
246 }
247
248 /* all 1 (large block) or 0 (small block) */ 249 /* all 1 (large block) or 0 (small block) */
249 /* explicitly set for the sake of clarity */ 250 /* explicitly set for the sake of clarity */
250 v->lW=0; /* previous window size */ 251 v->lW=0; /* previous window size */
@@ -274,35 +275,19 @@ abort_books:
274int vorbis_synthesis_restart(vorbis_dsp_state *v){ 275int vorbis_synthesis_restart(vorbis_dsp_state *v){
275 vorbis_info *vi=v->vi; 276 vorbis_info *vi=v->vi;
276 codec_setup_info *ci; 277 codec_setup_info *ci;
277 int i;
278 278
279 if(!v->backend_state)return -1; 279 if(!v->backend_state)return -1;
280 if(!vi)return -1; 280 if(!vi)return -1;
281 ci=vi->codec_setup; 281 ci=vi->codec_setup;
282 if(!ci)return -1; 282 if(!ci)return -1;
283 283
284 v->centerW=0;
285 v->pcm_current=0; 284 v->pcm_current=0;
286 285
287 v->pcm_returned=-1; 286 v->pcm_returned=-1;
288 v->granulepos=-1; 287 v->granulepos=-1;
289 v->sequence=-1; 288 v->sequence=-1;
290 ((private_state *)(v->backend_state))->sample_count=-1; 289 ((private_state *)(v->backend_state))->sample_count=-1;
291 290
292 /* indicate to synthesis code that buffer pointers no longer valid
293 (if we're using double pcm buffer) and will need to reset them */
294 v->reset_pcmb = true;
295 /* also reset our copy of the double buffer pointers if we have one */
296#ifdef TREMOR_USE_IRAM
297 if(v->iram_double_pcm)
298 {
299 for(i=0;i<vi->channels;i++)
300 v->pcm[i]=&v->iram_double_pcm[i*v->pcm_storage];
301 }
302#else
303 for(i=0;i<vi->channels;i++)
304 v->pcm[i] = pcm_copy[1]+i*v->pcm_storage;
305#endif
306 return(0); 291 return(0);
307} 292}
308 293
@@ -323,11 +308,10 @@ void vorbis_dsp_clear(vorbis_dsp_state *v){
323 if(vi != NULL) 308 if(vi != NULL)
324 { 309 {
325 /* pcm buffer came from oggmalloc rather than iram */ 310 /* pcm buffer came from oggmalloc rather than iram */
326 for(i=0;i<2;i++) 311 for(i=0;i<3;i++)
327 if(pcm_copy[i]) 312 if(malloc_pointers[i]) {
328 { 313 _ogg_free(malloc_pointers[i]);
329 _ogg_free(pcm_copy[i]); 314 malloc_pointers[i] = NULL;
330 pcm_copy[i] = NULL;
331 } 315 }
332 } 316 }
333 317
@@ -359,10 +343,6 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
359 vorbis_info *vi=v->vi; 343 vorbis_info *vi=v->vi;
360 codec_setup_info *ci=(codec_setup_info *)vi->codec_setup; 344 codec_setup_info *ci=(codec_setup_info *)vi->codec_setup;
361 private_state *b=v->backend_state; 345 private_state *b=v->backend_state;
362 int j;
363#ifdef TREMOR_USE_IRAM
364 bool iram_pcm_doublebuffer = (NULL != v->iram_double_pcm);
365#endif
366 346
367 if(v->pcm_current>v->pcm_returned && v->pcm_returned!=-1)return(OV_EINVAL); 347 if(v->pcm_current>v->pcm_returned && v->pcm_returned!=-1)return(OV_EINVAL);
368 348
@@ -380,79 +360,11 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
380 int n=ci->blocksizes[v->W]/2; 360 int n=ci->blocksizes[v->W]/2;
381 int ln=ci->blocksizes[v->lW]/2; 361 int ln=ci->blocksizes[v->lW]/2;
382 362
383 if(LIKELY(vb->pcm)){ /* no pcm to process if vorbis_synthesis_trackonly 363 if(LIKELY(vb->pcmend != 0)){ /* no pcm to process if vorbis_synthesis_trackonly
384 was called on block */ 364 was called on block */
385 int prevCenter; 365 window_overlap_add(ci->blocksizes[v->W], ci->blocksizes[v->lW],
386 int n0=ci->blocksizes[0]/2; 366 ci->blocksizes[0], ci->blocksizes[1], vi->channels,
387 int n1=ci->blocksizes[1]/2; 367 b->window[v->W & v->lW], v);
388
389#ifdef TREMOR_USE_IRAM
390 if(!iram_pcm_doublebuffer)
391 {
392 prevCenter = v->centerW;
393 v->centerW = n1 - v->centerW;
394 }
395 else
396#endif
397 prevCenter = ln;
398
399 /* overlap/add PCM */
400 /* nb nothing to overlap with on first block so don't bother */
401 if(LIKELY(v->pcm_returned!=-1))
402 {
403 for(j=0;j<vi->channels;j++)
404 {
405 ogg_int32_t *pcm=v->pcm[j]+prevCenter;
406 ogg_int32_t *p=vb->pcm[j];
407
408 /* the overlap/add section */
409 if(v->lW == v->W)
410 {
411 /* large/large or small/small */
412 vect_add_right_left(pcm,p,n);
413 v->pcmb[j]=pcm;
414 }
415 else if (!v->W)
416 {
417 /* large/small */
418 vect_add_right_left(pcm + (n1-n0)/2, p, n0);
419 v->pcmb[j]=pcm;
420 }
421 else
422 {
423 /* small/large */
424 p += (n1-n0)/2;
425 vect_add_left_right(p,pcm,n0);
426 v->pcmb[j]=p;
427 }
428 }
429 }
430#ifdef TREMOR_USE_IRAM
431 /* the copy section */
432 if(!iram_pcm_doublebuffer)
433 {
434 for(j=0;j<vi->channels;j++)
435 {
436 /* at best only vb->pcm is in iram, and that's where we do the
437 synthesis, so we copy out the right-hand subframe of last
438 synthesis into (noniram) local buffer so we can still do
439 synth in iram */
440 vect_copy(v->pcm[j]+v->centerW, vb->pcm[j]+n, n);
441 }
442 }
443 else
444#endif
445 {
446 /* just flip the pointers over as we have a double buffer in iram */
447 ogg_int32_t *p;
448 p=v->pcm[0];
449 v->pcm[0]=vb->pcm[0];
450 vb->pcm[0] = p;
451 p=v->pcm[1];
452 v->pcm[1]=vb->pcm[1];
453 vb->pcm[1] = p;
454 }
455
456 /* deal with initial packet state; we do this using the explicit 368 /* deal with initial packet state; we do this using the explicit
457 pcm_returned==-1 flag otherwise we're sensitive to first block 369 pcm_returned==-1 flag otherwise we're sensitive to first block
458 being short or long */ 370 being short or long */
@@ -464,7 +376,6 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
464 v->pcm_returned=0; 376 v->pcm_returned=0;
465 v->pcm_current=(n+ln)/2; 377 v->pcm_current=(n+ln)/2;
466 } 378 }
467
468 } 379 }
469 380
470 /* track the frame number... This is for convenience, but also 381 /* track the frame number... This is for convenience, but also
diff --git a/apps/codecs/libtremor/config-tremor.h b/apps/codecs/libtremor/config-tremor.h
index eba0fe0912..cf2dbe977d 100644
--- a/apps/codecs/libtremor/config-tremor.h
+++ b/apps/codecs/libtremor/config-tremor.h
@@ -40,10 +40,10 @@
40 40
41/* Define CPU of Normal IRAM (96KB) */ 41/* Define CPU of Normal IRAM (96KB) */
42#else 42#else
43/* PCM_BUFFER : 16384 Byte (2048*2*4) * 43/* floor and double residue buffer : 24576 Byte (2048/2*4*2*3) *
44 * WINDOW_LOOKUP : 4608 Byte (128*4 + 1024*4) * 44 * WINDOW_LOOKUP : 4608 Byte (128*4 + 1024*4) *
45 * TOTAL : 20992 */ 45 * TOTAL : 29184 */
46#define IRAM_IBSS_SIZE 20992 46#define IRAM_IBSS_SIZE 29184
47#endif 47#endif
48#endif 48#endif
49 49
diff --git a/apps/codecs/libtremor/ffmpeg_render_line.h b/apps/codecs/libtremor/ffmpeg_render_line.h
index 1b760ae20e..a08952d95d 100644
--- a/apps/codecs/libtremor/ffmpeg_render_line.h
+++ b/apps/codecs/libtremor/ffmpeg_render_line.h
@@ -21,6 +21,9 @@
21 */ 21 */
22 22
23/* render_line and friend taken from ffmpeg (libavcodec/vorbis.c) */ 23/* render_line and friend taken from ffmpeg (libavcodec/vorbis.c) */
24
25#include "misc.h"
26
24static inline void render_line_unrolled(int x, int y, int x1, 27static inline void render_line_unrolled(int x, int y, int x1,
25 int sy, int ady, int adx, 28 int sy, int ady, int adx,
26 const ogg_int32_t *lookup, ogg_int32_t *buf) 29 const ogg_int32_t *lookup, ogg_int32_t *buf)
@@ -45,8 +48,8 @@ static inline void render_line_unrolled(int x, int y, int x1,
45 } 48 }
46} 49}
47 50
48static void render_line(int x0, int y0, int x1, int y1, 51static inline void render_line(int x0, int y0, int x1, int y1,
49 const ogg_int32_t *lookup, ogg_int32_t *buf) 52 const ogg_int32_t *lookup, ogg_int32_t *buf)
50{ 53{
51 int dy = y1 - y0; 54 int dy = y1 - y0;
52 int adx = x1 - x0; 55 int adx = x1 - x0;
@@ -72,3 +75,67 @@ static void render_line(int x0, int y0, int x1, int y1,
72 } 75 }
73 } 76 }
74} 77}
78
79#ifndef INCL_OPTIMIZED_VECTOR_FMUL_WINDOW
80#define INCL_OPTIMIZED_VECTOR_FMUL_WINDOW
81static inline void ff_vector_fmul_window_c(ogg_int32_t *dst, const ogg_int32_t *src0,
82 const ogg_int32_t *src1, const ogg_int32_t *win, int len){
83 int i,j;
84 dst += len;
85 win += len;
86 src0+= len;
87 for(i=-len, j=len-1; i<0; i++, j--) {
88 ogg_int32_t s0 = src0[i];
89 ogg_int32_t s1 = src1[j];
90 ogg_int32_t wi = win[i];
91 ogg_int32_t wj = win[j];
92 XNPROD31(s0, s1, wj, wi, &dst[i], &dst[j]);
93 /*
94 dst[i] = MULT31(s0,wj) - MULT31(s1,wi);
95 dst[j] = MULT31(s0,wi) + MULT31(s1,wj);
96 */
97 }
98}
99#endif
100
101static inline void copy_normalize(ogg_int32_t *dst, ogg_int32_t *src, int len)
102{
103 memcpy(dst, src, len * sizeof(ogg_int32_t));
104}
105
106static inline void window_overlap_add(unsigned int blocksize, unsigned int lastblock,
107 unsigned int bs0, unsigned int bs1, int ch,
108 const ogg_int32_t *win, vorbis_dsp_state *v)
109{
110 unsigned retlen = (blocksize + lastblock) / 4;
111 int j;
112 for (j = 0; j < ch; j++) {
113 ogg_int32_t *residue = v->residues[v->ri] + j * blocksize / 2;
114 ogg_int32_t *saved;
115 saved = v->saved_ptr[j];
116 ogg_int32_t *ret = v->floors + j * retlen;
117 ogg_int32_t *buf = residue;
118
119 if (v->W == v->lW) {
120 ff_vector_fmul_window_c(ret, saved, buf, win, blocksize / 4);
121 } else if (v->W > v->lW) {
122 ff_vector_fmul_window_c(ret, saved, buf, win, bs0 / 4);
123 copy_normalize(ret+bs0/2, buf+bs0/4, (bs1-bs0)/4);
124 } else {
125 copy_normalize(ret, saved, (bs1 - bs0) / 4);
126 ff_vector_fmul_window_c(ret + (bs1 - bs0) / 4, saved + (bs1 - bs0) / 4, buf, win, bs0 / 4);
127 }
128 if (v->residues[1] == NULL) {
129 memcpy(saved, buf + blocksize / 4, blocksize / 4 * sizeof(ogg_int32_t));
130 v->saved_ptr[j] = v->saved + j * bs1 / 4;
131 } else {
132 v->saved_ptr[j] = buf + blocksize / 4;
133 }
134
135 v->pcmb[j] = ret;
136 }
137
138 if (v->residues[1] != NULL) {
139 v->ri ^= 1;
140 }
141}
diff --git a/apps/codecs/libtremor/ivorbiscodec.h b/apps/codecs/libtremor/ivorbiscodec.h
index 23b62c48bd..73ba4aeb2a 100644
--- a/apps/codecs/libtremor/ivorbiscodec.h
+++ b/apps/codecs/libtremor/ivorbiscodec.h
@@ -59,7 +59,13 @@ typedef struct vorbis_info{
59typedef struct vorbis_dsp_state{ 59typedef struct vorbis_dsp_state{
60 vorbis_info *vi; 60 vorbis_info *vi;
61 61
62 ogg_int32_t **pcm; 62 ogg_int32_t *residues[2];
63 ogg_int32_t *floors;
64 ogg_int32_t *saved;
65 ogg_int32_t *saved_ptr[CHANNELS];
66
67 int ri;
68
63 ogg_int32_t **pcmb; 69 ogg_int32_t **pcmb;
64 ogg_int32_t **pcmret; 70 ogg_int32_t **pcmret;
65 int pcm_storage; 71 int pcm_storage;
@@ -71,23 +77,15 @@ typedef struct vorbis_dsp_state{
71 long lW; 77 long lW;
72 long W; 78 long W;
73 long nW; 79 long nW;
74 long centerW;
75 80
76 ogg_int64_t granulepos; 81 ogg_int64_t granulepos;
77 ogg_int64_t sequence; 82 ogg_int64_t sequence;
78 83
79 void *backend_state; 84 void *backend_state;
80
81 ogg_int32_t *first_pcm; /* PCM buffer (for normal RAM or IRAM)*/
82#ifdef TREMOR_USE_IRAM
83 ogg_int32_t *iram_double_pcm; /* PCM 2nd buffer for IRAM */
84#endif
85 bool reset_pcmb;
86} vorbis_dsp_state; 85} vorbis_dsp_state;
87 86
88typedef struct vorbis_block{ 87typedef struct vorbis_block{
89 /* necessary stream state for linking to the framing abstraction */ 88 /* necessary stream state for linking to the framing abstraction */
90 ogg_int32_t **pcm; /* this is a pointer into local storage */
91 oggpack_buffer opb; 89 oggpack_buffer opb;
92 90
93 long lW; 91 long lW;
diff --git a/apps/codecs/libtremor/mapping0.c b/apps/codecs/libtremor/mapping0.c
index 9042b9c174..084d5e076d 100644
--- a/apps/codecs/libtremor/mapping0.c
+++ b/apps/codecs/libtremor/mapping0.c
@@ -302,7 +302,6 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
302 vorbis_dsp_state *vd=vb->vd; 302 vorbis_dsp_state *vd=vb->vd;
303 vorbis_info *vi=vd->vi; 303 vorbis_info *vi=vd->vi;
304 codec_setup_info *ci=(codec_setup_info *)vi->codec_setup; 304 codec_setup_info *ci=(codec_setup_info *)vi->codec_setup;
305 private_state *b=(private_state *)vd->backend_state;
306 vorbis_look_mapping0 *look=(vorbis_look_mapping0 *)l; 305 vorbis_look_mapping0 *look=(vorbis_look_mapping0 *)l;
307 vorbis_info_mapping0 *info=look->map; 306 vorbis_info_mapping0 *info=look->map;
308 307
@@ -329,8 +328,8 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
329 if(floormemo[i]) 328 if(floormemo[i])
330 nonzero[i]=1; 329 nonzero[i]=1;
331 else 330 else
332 nonzero[i]=0; 331 nonzero[i]=0;
333 memset(vb->pcm[i],0,sizeof(*vb->pcm[i])*n/2); 332 memset(vd->floors + i * ci->blocksizes[vb->W]/2,0,sizeof(ogg_int32_t)*n/2);
334 } 333 }
335 334
336 /* channel coupling can 'dirty' the nonzero listing */ 335 /* channel coupling can 'dirty' the nonzero listing */
@@ -351,7 +350,7 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
351 zerobundle[ch_in_bundle]=1; 350 zerobundle[ch_in_bundle]=1;
352 else 351 else
353 zerobundle[ch_in_bundle]=0; 352 zerobundle[ch_in_bundle]=0;
354 pcmbundle[ch_in_bundle++]=vb->pcm[j]; 353 pcmbundle[ch_in_bundle++] = vd->floors + j * ci->blocksizes[vb->W]/2;
355 } 354 }
356 } 355 }
357 356
@@ -365,8 +364,8 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
365 364
366 /* channel coupling */ 365 /* channel coupling */
367 for(i=info->coupling_steps-1;i>=0;i--){ 366 for(i=info->coupling_steps-1;i>=0;i--){
368 ogg_int32_t *pcmM=vb->pcm[info->coupling_mag[i]]; 367 ogg_int32_t *pcmM = vd->floors + info->coupling_mag[i] * ci->blocksizes[vb->W]/2;
369 ogg_int32_t *pcmA=vb->pcm[info->coupling_ang[i]]; 368 ogg_int32_t *pcmA = vd->floors + info->coupling_ang[i] * ci->blocksizes[vb->W]/2;
370 channel_couple(pcmM,pcmA,n); 369 channel_couple(pcmM,pcmA,n);
371 } 370 }
372 371
@@ -378,24 +377,21 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
378 377
379 /* transform the PCM data; takes PCM vector, vb; modifies PCM vector */ 378 /* transform the PCM data; takes PCM vector, vb; modifies PCM vector */
380 /* only MDCT right now.... */ 379 /* only MDCT right now.... */
381
382 for(i=0;i<vi->channels;i++){ 380 for(i=0;i<vi->channels;i++){
383 ogg_int32_t *pcm=vb->pcm[i]; 381 ogg_int32_t *pcm = vd->floors + i*ci->blocksizes[vb->W]/2;
384 int submap=info->chmuxlist[i]; 382 int submap=info->chmuxlist[i];
385 383
386 if(nonzero[i]) { 384 if(nonzero[i]) {
387 /* compute and apply spectral envelope */ 385 /* compute and apply spectral envelope */
388 look->floor_func[submap]-> 386 look->floor_func[submap]->
389 inverse2(vb,look->floor_look[submap],floormemo[i],pcm); 387 inverse2(vb,look->floor_look[submap],floormemo[i],pcm);
390 388
391 ff_imdct_calc(ci->blocksizes_nbits[vb->W], 389 ff_imdct_half(ci->blocksizes_nbits[vb->W],
392 (int32_t*)pcm, 390 (int32_t*)vd->residues[vd->ri] + i*ci->blocksizes[vb->W]/2,
393 (int32_t*)pcm); 391 (int32_t*)&vd->floors[i*ci->blocksizes[vb->W]/2]);
394 /* window the data */
395 _vorbis_apply_window(pcm,b->window,ci->blocksizes,vb->lW,vb->W,vb->nW);
396 } 392 }
397 else 393 else
398 memset(pcm, 0, sizeof(ogg_int32_t)*n); 394 memset(vd->residues[vd->ri] + i*ci->blocksizes[vb->W]/2, 0, sizeof(ogg_int32_t)*n/2);
399 } 395 }
400 396
401 //for(j=0;j<vi->channels;j++) 397 //for(j=0;j<vi->channels;j++)
diff --git a/apps/codecs/libtremor/misc.h b/apps/codecs/libtremor/misc.h
index 0b0ff4d3a7..592a60ffd8 100644
--- a/apps/codecs/libtremor/misc.h
+++ b/apps/codecs/libtremor/misc.h
@@ -26,7 +26,6 @@
26 26
27#include "asm_arm.h" 27#include "asm_arm.h"
28#include "asm_mcf5249.h" 28#include "asm_mcf5249.h"
29
30 29
31/* Some prototypes that were not defined elsewhere */ 30/* Some prototypes that were not defined elsewhere */
32void *_vorbis_block_alloc(vorbis_block *vb,long bytes); 31void *_vorbis_block_alloc(vorbis_block *vb,long bytes);
@@ -155,32 +154,6 @@ static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b,
155} 154}
156*/ 155*/
157#endif 156#endif
158#ifndef _TREMOR_VECT_OPS
159#define _TREMOR_VECT_OPS
160static inline
161void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n)
162{
163 vect_add(x, y, n );
164}
165
166static inline
167void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n)
168{
169 vect_add(x, y, n );
170}
171
172static inline
173void ogg_vect_mult_fw(int32_t *data, const int32_t *window, int n)
174{
175 vect_mult_fw(data, window, n);
176}
177
178static inline
179void ogg_vect_mult_bw(int32_t *data, const int32_t *window, int n)
180{
181 vect_mult_bw(data, window, n);
182}
183#endif
184 157
185#if 0 158#if 0
186#ifndef _V_VECT_OPS 159#ifndef _V_VECT_OPS
diff --git a/apps/codecs/libtremor/synthesis.c b/apps/codecs/libtremor/synthesis.c
index 657aa74e11..d1ef99ae37 100644
--- a/apps/codecs/libtremor/synthesis.c
+++ b/apps/codecs/libtremor/synthesis.c
@@ -25,15 +25,13 @@
25#include "os.h" 25#include "os.h"
26 26
27 27
28static ogg_int32_t *ipcm_vect[CHANNELS] IBSS_ATTR;
29
30static inline int _vorbis_synthesis1(vorbis_block *vb,ogg_packet *op,int decodep){ 28static inline int _vorbis_synthesis1(vorbis_block *vb,ogg_packet *op,int decodep){
31 vorbis_dsp_state *vd= vb ? vb->vd : 0; 29 vorbis_dsp_state *vd= vb ? vb->vd : 0;
32 private_state *b= vd ? (private_state *)vd->backend_state: 0; 30 private_state *b= vd ? (private_state *)vd->backend_state: 0;
33 vorbis_info *vi= vd ? vd->vi : 0; 31 vorbis_info *vi= vd ? vd->vi : 0;
34 codec_setup_info *ci= vi ? (codec_setup_info *)vi->codec_setup : 0; 32 codec_setup_info *ci= vi ? (codec_setup_info *)vi->codec_setup : 0;
35 oggpack_buffer *opb=vb ? &vb->opb : 0; 33 oggpack_buffer *opb=vb ? &vb->opb : 0;
36 int type,mode,i; 34 int type,mode;
37 35
38 if (!vd || !b || !vi || !ci || !opb) { 36 if (!vd || !b || !vi || !ci || !opb) {
39 return OV_EBADPACKET; 37 return OV_EBADPACKET;
@@ -75,22 +73,8 @@ static inline int _vorbis_synthesis1(vorbis_block *vb,ogg_packet *op,int decodep
75 73
76 if(decodep && vi->channels<=CHANNELS) 74 if(decodep && vi->channels<=CHANNELS)
77 { 75 {
78 vb->pcm = ipcm_vect;
79
80 /* set pcm end point */ 76 /* set pcm end point */
81 vb->pcmend=ci->blocksizes[vb->W]; 77 vb->pcmend=ci->blocksizes[vb->W];
82 /* use statically allocated buffer */
83 if(vd->reset_pcmb || vb->pcm[0]==NULL)
84 {
85 /* one-time initialisation at codec start
86 NOT for every block synthesis start
87 allows us to flip between buffers once initialised
88 by simply flipping pointers */
89 for(i=0; i<vi->channels; i++)
90 vb->pcm[i] = &vd->first_pcm[i*ci->blocksizes[1]];
91
92 }
93 vd->reset_pcmb = false;
94 78
95 /* unpack_header enforces range checking */ 79 /* unpack_header enforces range checking */
96 type=ci->map_type[ci->mode_param[mode]->mapping]; 80 type=ci->map_type[ci->mode_param[mode]->mapping];
@@ -98,8 +82,6 @@ static inline int _vorbis_synthesis1(vorbis_block *vb,ogg_packet *op,int decodep
98 }else{ 82 }else{
99 /* no pcm */ 83 /* no pcm */
100 vb->pcmend=0; 84 vb->pcmend=0;
101 vb->pcm=NULL;
102
103 return(0); 85 return(0);
104 } 86 }
105} 87}
diff --git a/apps/codecs/libtremor/window.c b/apps/codecs/libtremor/window.c
index e46008aef0..3bc947f0e5 100644
--- a/apps/codecs/libtremor/window.c
+++ b/apps/codecs/libtremor/window.c
@@ -53,7 +53,7 @@ const void *_vorbis_window(int type, int left){
53 return(0); 53 return(0);
54 } 54 }
55} 55}
56 56#if 0
57void _vorbis_apply_window(ogg_int32_t *d,const void *window_p[2], 57void _vorbis_apply_window(ogg_int32_t *d,const void *window_p[2],
58 long *blocksizes, 58 long *blocksizes,
59 int lW,int W,int nW){ 59 int lW,int W,int nW){
@@ -79,4 +79,4 @@ void _vorbis_apply_window(ogg_int32_t *d,const void *window_p[2],
79 /* Again - memset not needed 79 /* Again - memset not needed
80 memset((void *)&d[rightend], 0, sizeof(ogg_int32_t)*(n-rightend)); */ 80 memset((void *)&d[rightend], 0, sizeof(ogg_int32_t)*(n-rightend)); */
81} 81}
82 82#endif
diff --git a/apps/codecs/libtremor/window.h b/apps/codecs/libtremor/window.h
index b242ec2354..166d0b8e9c 100644
--- a/apps/codecs/libtremor/window.h
+++ b/apps/codecs/libtremor/window.h
@@ -19,9 +19,9 @@
19#define _V_WINDOW_ 19#define _V_WINDOW_
20 20
21extern const void *_vorbis_window(int type,int left); 21extern const void *_vorbis_window(int type,int left);
22extern void _vorbis_apply_window(ogg_int32_t *d,const void *window[2], 22/*extern void _vorbis_apply_window(ogg_int32_t *d,const void *window[2],
23 long *blocksizes, 23 long *blocksizes,
24 int lW,int W,int nW); 24 int lW,int W,int nW);*/
25 25
26 26
27#endif 27#endif